libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
pappso::AaStringCodec Class Reference

code and decode amino acid string sequence to unique integer More...

#include <aastringcodec.h>

Public Member Functions

 AaStringCodec (const AaCode &aaCode)
 AaStringCodec (const AaStringCodec &other)
 ~AaStringCodec ()
std::size_t getLimitMax (std::size_t size) const
 get the maximum code number for a given peptide size
uint32_t code (const QString &aa_str) const
 get integer from amino acide suite string
uint32_t codeLlc (const QString &aa_str) const
 get the lowest common denominator integer from amino acide suite string
uint32_t codeLlc (std::vector< uint8_t >::const_iterator it_begin, std::size_t size) const
 get the lowest common denominator integer from amino acide code vector
QString decode (uint32_t code) const
QStringList decode (const std::vector< uint32_t > &code_list) const
double getMass (uint32_t code) const
std::vector< CodeToMassgenerateLlcCodeListUpToMaxPeptideSize (std::size_t size) const
 generates all possible combination of llc code mass llc : the lowest common code denominator for a given aa formula
std::vector< CodeToMassgenerateLlcCodeListByMaxPeptideSize (std::size_t size) const
 generates all possible combination of llc code mass llc : the lowest common code denominator for a given aa formula
bool codeOnlyContains (uint32_t code, const std::vector< uint8_t > &aa_ok) const
 tell if a code only contains a list of amino acid
bool uniqueCodeContainsAminoAcid (uint32_t code, uint8_t aa_ok, int times) const
 tell if a unique code only contains one amino acid 1 or n times
const AaCodegetAaCode () const

Private Member Functions

void recGenerateModel (std::vector< CodeToMass > &glist, std::vector< uint8_t > &model, std::size_t position) const
 recursive method to generate models
CodeToMass generateCodeMassFromModel (const std::vector< uint8_t > &model) const

Private Attributes

uint32_t m_base = 0
const AaCodem_aaCode
std::vector< uint32_t > m_units

Detailed Description

code and decode amino acid string sequence to unique integer

Definition at line 51 of file aastringcodec.h.

Constructor & Destructor Documentation

◆ AaStringCodec() [1/2]

AaStringCodec::AaStringCodec ( const AaCode & aaCode)

Default constructor

Definition at line 33 of file aastringcodec.cpp.

33 : m_aaCode(aaCode)
34{
35
36 m_base = m_aaCode.getSize() + 1;
37 m_units.resize(10);
38 uint32_t unit = 1;
39 for(auto &this_unit : m_units)
40 {
41 this_unit = unit;
42 unit *= m_base;
43 }
44}
std::vector< uint32_t > m_units
const AaCode & m_aaCode

References m_aaCode, m_base, and m_units.

Referenced by AaStringCodec().

◆ AaStringCodec() [2/2]

AaStringCodec::AaStringCodec ( const AaStringCodec & other)

Copy constructor

Parameters
otherTODO

Definition at line 46 of file aastringcodec.cpp.

46 : m_aaCode(other.m_aaCode)
47{
48 m_base = other.m_base;
49 m_units = other.m_units;
50}

References AaStringCodec(), m_aaCode, m_base, and m_units.

◆ ~AaStringCodec()

AaStringCodec::~AaStringCodec ( )

Destructor

Definition at line 52 of file aastringcodec.cpp.

53{
54}

Member Function Documentation

◆ code()

uint32_t pappso::AaStringCodec::code ( const QString & aa_str) const

get integer from amino acide suite string

Definition at line 58 of file aastringcodec.cpp.

59{
60
61 std::size_t pos = 0;
62 uint32_t code = 0;
63 for(auto &aa_char : aa_str)
64 {
65 code += m_aaCode.getAaCode(aa_char.toLatin1()) * m_units[pos];
66 pos++;
67 }
68 return code;
69}
uint32_t code(const QString &aa_str) const
get integer from amino acide suite string

References code(), m_aaCode, and m_units.

Referenced by code(), codeLlc(), codeLlc(), codeOnlyContains(), decode(), decode(), getLimitMax(), getMass(), and uniqueCodeContainsAminoAcid().

◆ codeLlc() [1/2]

uint32_t pappso::AaStringCodec::codeLlc ( const QString & aa_str) const

get the lowest common denominator integer from amino acide suite string

Definition at line 72 of file aastringcodec.cpp.

73{
74 std::vector<uint8_t> llc_vec;
75
76 for(auto &aa_char : aa_str)
77 {
78 llc_vec.push_back(m_aaCode.getAaCode(aa_char.toLatin1()));
79 }
80 std::sort(llc_vec.begin(), llc_vec.end(), std::greater<uint8_t>());
81
82
83 std::size_t pos = 0;
84 uint32_t code = 0;
85 for(auto &aa_code : llc_vec)
86 {
87 code += (uint32_t)aa_code * m_units[pos];
88 pos++;
89 }
90 return code;
91}

References code(), m_aaCode, and m_units.

Referenced by pappso::ProteinIntegerCode::computePeptideCodeFragments().

◆ codeLlc() [2/2]

uint32_t pappso::AaStringCodec::codeLlc ( std::vector< uint8_t >::const_iterator it_begin,
std::size_t size ) const

get the lowest common denominator integer from amino acide code vector

Definition at line 94 of file aastringcodec.cpp.

96{
97 std::vector<uint8_t> llc_vec;
98
99 for(std::size_t i = 0; i < size; i++)
100 {
101 llc_vec.push_back(*it_begin);
102 it_begin++;
103 }
104 std::sort(llc_vec.begin(), llc_vec.end(), std::greater<uint8_t>());
105
106
107 std::size_t pos = 0;
108 uint32_t code = 0;
109 for(auto &aa_code : llc_vec)
110 {
111 code += (uint32_t)aa_code * m_units[pos];
112 pos++;
113 }
114 return code;
115}

References code(), and m_units.

◆ codeOnlyContains()

bool pappso::AaStringCodec::codeOnlyContains ( uint32_t code,
const std::vector< uint8_t > & aa_ok ) const

tell if a code only contains a list of amino acid

Parameters
codethe code to valid
aa_oklist of required amino acids

Definition at line 265 of file aastringcodec.cpp.

266{
267
268 while(code > 0)
269 {
270 if(std::find(aa_ok.begin(), aa_ok.end(), (uint8_t)(code % m_base)) == aa_ok.end())
271 return false;
272
273 code /= m_base;
274 }
275 return true;
276}

References code(), and m_base.

◆ decode() [1/2]

QStringList pappso::AaStringCodec::decode ( const std::vector< uint32_t > & code_list) const

Definition at line 135 of file aastringcodec.cpp.

136{
137 QStringList aa_string_list;
138 for(auto code : code_list)
139 {
140 aa_string_list << decode(code);
141 }
142 return aa_string_list;
143}
QString decode(uint32_t code) const

References code(), and decode().

◆ decode() [2/2]

QString pappso::AaStringCodec::decode ( uint32_t code) const

Definition at line 119 of file aastringcodec.cpp.

120{
121 QString aa_suite;
122
123 while(code > 0)
124 {
125 aa_suite.append(m_aaCode.getAa((uint8_t)(code % m_base)).getLetter());
126 code /= m_base;
127 }
128
129 // qDebug() << aa_suite;
130
131 return aa_suite;
132}

References code(), m_aaCode, and m_base.

Referenced by decode().

◆ generateCodeMassFromModel()

pappso::CodeToMass pappso::AaStringCodec::generateCodeMassFromModel ( const std::vector< uint8_t > & model) const
private

Definition at line 235 of file aastringcodec.cpp.

236{
237 CodeToMass code_mass;
238 std::size_t pos = 0;
239 for(auto aacode : model)
240 {
241 code_mass.mass += m_aaCode.getMass(aacode);
242
243 code_mass.code += (uint32_t)aacode * m_units[pos];
244 pos++;
245 }
246
247 // qDebug() << code_mass.code << " " << code_mass.mass;
248 return code_mass;
249}

References pappso::CodeToMass::code, m_aaCode, m_units, and pappso::CodeToMass::mass.

Referenced by generateLlcCodeListUpToMaxPeptideSize(), and recGenerateModel().

◆ generateLlcCodeListByMaxPeptideSize()

std::vector< CodeToMass > pappso::AaStringCodec::generateLlcCodeListByMaxPeptideSize ( std::size_t size) const

generates all possible combination of llc code mass llc : the lowest common code denominator for a given aa formula

generate only for this peptide size

Definition at line 190 of file aastringcodec.cpp.

191{
192 std::vector<CodeToMass> llc_list;
193 if(size == 0)
194 return llc_list;
195 std::vector<uint8_t> model;
196 model.resize(size, 0);
197
198 for(uint8_t i = 1; i < m_base; i++)
199 {
200 model[0] = i;
201 recGenerateModel(llc_list, model, 1);
202 }
203 return llc_list;
204}
void recGenerateModel(std::vector< CodeToMass > &glist, std::vector< uint8_t > &model, std::size_t position) const
recursive method to generate models

References m_base, and recGenerateModel().

◆ generateLlcCodeListUpToMaxPeptideSize()

std::vector< CodeToMass > pappso::AaStringCodec::generateLlcCodeListUpToMaxPeptideSize ( std::size_t size) const

generates all possible combination of llc code mass llc : the lowest common code denominator for a given aa formula

generate from peptide size =1 to peptide size

Definition at line 162 of file aastringcodec.cpp.

163{
164 std::vector<CodeToMass> llc_list;
165 if(size == 0)
166 return llc_list;
167 std::vector<uint8_t> model;
168 for(uint8_t p = 1; p <= size; p++)
169 {
170 model.resize(p, 0);
171
172 for(uint8_t i = 1; i < m_base; i++)
173 {
174 model[0] = i;
175 if(p == 1)
176 {
177 llc_list.push_back(generateCodeMassFromModel(model));
178 }
179 else
180 {
181 recGenerateModel(llc_list, model, 1);
182 }
183 }
184 }
185 return llc_list;
186}
CodeToMass generateCodeMassFromModel(const std::vector< uint8_t > &model) const

References generateCodeMassFromModel(), m_base, and recGenerateModel().

◆ getAaCode()

const pappso::AaCode & pappso::AaStringCodec::getAaCode ( ) const

Definition at line 299 of file aastringcodec.cpp.

300{
301 return m_aaCode;
302}

References m_aaCode.

Referenced by pappso::ProteinIntegerCode::ProteinIntegerCode().

◆ getLimitMax()

std::size_t pappso::AaStringCodec::getLimitMax ( std::size_t size) const

get the maximum code number for a given peptide size

Definition at line 253 of file aastringcodec.cpp.

254{
255
256 std::size_t code = 0;
257 for(std::size_t pos = 0; pos < size; pos++)
258 {
259 code += (std::size_t)(m_base - 1) * (std::size_t)m_units[pos];
260 }
261 return code;
262}

References code(), m_base, and m_units.

◆ getMass()

double pappso::AaStringCodec::getMass ( uint32_t code) const

Definition at line 147 of file aastringcodec.cpp.

148{
149 double mass = 0;
150
151 while(code > 0)
152 {
153 mass += m_aaCode.getMass((uint8_t)(code % m_base));
154 code /= m_base;
155 }
156
157 return mass;
158}

References code(), m_aaCode, and m_base.

◆ recGenerateModel()

void pappso::AaStringCodec::recGenerateModel ( std::vector< CodeToMass > & glist,
std::vector< uint8_t > & model,
std::size_t position ) const
private

recursive method to generate models

Definition at line 207 of file aastringcodec.cpp.

210{
211 if(position == model.size())
212 return;
213
214 if(position == model.size() - 1)
215 {
216 uint8_t max = model[position - 1];
217 for(uint8_t i = 1; i <= max; i++)
218 {
219 model[position] = i;
220 glist.push_back(generateCodeMassFromModel(model));
221 }
222 }
223 else
224 {
225 uint8_t max = model[position - 1];
226 for(uint8_t i = 1; i <= max; i++)
227 {
228 model[position] = i;
229 recGenerateModel(glist, model, position + 1);
230 }
231 }
232}
@ max
maximum of intensities
Definition types.h:280

References generateCodeMassFromModel(), and recGenerateModel().

Referenced by generateLlcCodeListByMaxPeptideSize(), generateLlcCodeListUpToMaxPeptideSize(), and recGenerateModel().

◆ uniqueCodeContainsAminoAcid()

bool pappso::AaStringCodec::uniqueCodeContainsAminoAcid ( uint32_t code,
uint8_t aa_ok,
int times ) const

tell if a unique code only contains one amino acid 1 or n times

Parameters
codethe code to valid
aa_okthe amino acid the code must contains
timesthe number of aa_ok presence in code

Definition at line 279 of file aastringcodec.cpp.

280{
281
282 int number = 0;
283 while(code > 0)
284 {
285 if(aa_ok == (uint8_t)(code % m_base))
286 {
287 number++;
288 if(number == times)
289 return true;
290 }
291
292 code /= m_base;
293 }
294 return false;
295}

References code(), and m_base.

Member Data Documentation

◆ m_aaCode

const AaCode& pappso::AaStringCodec::m_aaCode
private

◆ m_base

◆ m_units

std::vector<uint32_t> pappso::AaStringCodec::m_units
private

The documentation for this class was generated from the following files: