67 double sum_of_elems = std::accumulate(
shifts.begin(),
shifts.end(), 0);
88 key_cell_init_first.
n_row = 0;
89 key_cell_init_first.
score = 0;
91 key_cell_init_first.
tree_id = 0;
95const std::vector<pappso::specpeptidoms::KeyCell> &
105 std::size_t sequence_length = protein_ptr->size();
109 for(std::size_t row_number = 1; row_number <= sequence_length; row_number++)
130 key_cell_init.
n_row = 0;
148 m_scenario.reserve(length2 + 1, spectrum.size());
161 for(std::size_t iter =
m_interest_cells.size(); iter < spectrum.size(); iter++)
171 const std::size_t beginning,
172 const std::size_t length)
177 const QString &protein_seq = protein_ptr->
getSequence();
179 if((qsizetype)(beginning + length) <= protein_seq.size())
185 length2 = protein_seq.size() - beginning;
189 QString sequence_str = protein_seq.sliced(protein_seq.size() - beginning - length2, length2);
194 std::vector<AaPosition> aa_positions;
199 for(std::size_t row_number = 1; row_number <= length2; row_number++)
202 qDebug() <<
"row_number - 1=" << row_number - 1 <<
" sequence.size()=" << sequence.size();
241 std::vector<std::vector<std::size_t>> corrections = correction_tree.
getPeaks();
242 if(corrections.size() > 0)
248 for(
auto peaks_to_remove : corrections)
255 protein_seq.size() - beginning);
274 QObject::tr(
"SemiGlobalAlignment::preciseAlign failed :\n%1").arg(err.
qwhat()));
282 std::vector<std::size_t> &peaks_to_remove,
285 std::vector<AaPosition> aa_positions;
287 std::vector<std::size_t> final_peaks_to_remove;
291 key_cell_init.
n_row = 0;
301 for(qsizetype row_number = 1; row_number <= sequence.size(); row_number++)
303 qDebug() << row_number - 1 <<
" " << sequence.size();
304 qDebug() <<
"sequence[row_number - 1].aa" << (char)sequence[row_number - 1].aa;
306 aa_positions = spectrum.
getAaPositions(sequence[row_number - 1].code, peaks_to_remove);
343 std::vector<std::vector<std::size_t>> corrections = correction_tree.
getPeaks();
344 if(corrections.size() > 0)
346 for(
auto new_peaks_to_remove : corrections)
348 final_peaks_to_remove = std::vector<std::size_t>(new_peaks_to_remove);
349 final_peaks_to_remove.insert(
350 final_peaks_to_remove.end(), peaks_to_remove.begin(), peaks_to_remove.end());
351 correctAlign(sequence, protein_ptr, spectrum, final_peaks_to_remove, offset);
365 std::size_t beginning,
367 const std::vector<double> &shifts)
372 for(
double precursor_mass_error : shifts)
374 SpOMSSpectrum corrected_spectrum(spectrum, precursor_mass_error);
375 preciseAlign(corrected_spectrum, protein_ptr, beginning, length);
391 const std::size_t row_number,
392 const std::vector<AaPosition> &aa_positions,
394 const bool fast_align,
400 int score_found, score_shift, best_score, alt_score, tree_id;
402 std::size_t best_column,
shift, beginning, missing_aas, length, perfect_shift_origin;
403 KeyCell *current_cell_ptr, *tested_cell_ptr;
404 AlignType alignment_type, temp_align_type;
406 double smallest_aa_mass =
m_aaCode.getMass((std::uint8_t)1);
416 qDebug() << (char)sequence.at(row_number - 2).aa;
417 qDebug() <<
"condition" << condition <<
"aa" << (char)sequence.at(row_number - 2).aa
418 << sequence.at(row_number - 2).code;
419 condition += 2 << sequence.at(row_number - 2).code;
421 qDebug() <<
"condition" << condition;
425 for(std::vector<AaPosition>::const_iterator aa_position = aa_positions.begin();
426 aa_position != aa_positions.end();
429 qDebug() <<
"l_peak" << aa_position->l_peak <<
"r_peak" << aa_position->r_peak <<
"l_mass"
430 << aa_position->l_mass <<
"l_support" << aa_position->l_support <<
"condition"
431 << aa_position->condition;
434 if(((condition & aa_position->condition) != 0) ||
439 qDebug() <<
"threePeaks condition verified";
443 if(spectrum.
peakType(aa_position->r_peak) ==
448 qDebug() <<
"double peak";
454 qDebug() <<
"single peak";
460 best_column = aa_position->r_peak;
461 best_score = current_cell_ptr->
score + (row_number - current_cell_ptr->
n_row) *
464 tree_id = current_cell_ptr->
tree_id;
466 qDebug() <<
"not found" << best_score;
469 if(aa_position->l_support)
472 if(aa_position->l_peak == 0)
474 alt_score = tested_cell_ptr->
score + score_found;
478 if(tested_cell_ptr->
n_row == row_number - 1)
480 alt_score = tested_cell_ptr->
score +
481 (row_number - tested_cell_ptr->
n_row - 1) *
487 alt_score = tested_cell_ptr->
score +
488 (row_number - tested_cell_ptr->
n_row - 1) *
493 if(alt_score >= best_score)
497 best_score = alt_score;
498 best_column = aa_position->l_peak;
518 tree_id = tested_cell_ptr->
tree_id;
520 qDebug() <<
"found" << best_score <<
"from" << best_column << beginning
527 if(aa_position->l_support)
541 tested_cell_ptr->
n_row,
543 aa_position->l_peak -
shift,
544 aa_position->r_peak) &&
547 alt_score = tested_cell_ptr->
score +
548 (row_number - tested_cell_ptr->
n_row - 1) *
555 alt_score = tested_cell_ptr->
score +
556 (row_number - tested_cell_ptr->
n_row - 1) *
561 if(alt_score > best_score)
563 alignment_type = temp_align_type;
564 best_score = alt_score;
565 best_column = aa_position->l_peak -
shift;
567 tree_id = tested_cell_ptr->
tree_id;
568 qDebug() <<
"shift" << best_score <<
"from" << best_column;
579 perfect_shift_origin =
584 perfect_shift_origin = row_number;
587 if(perfect_shift_origin != row_number)
589 alt_score = tested_cell_ptr->
score + score_found;
594 alt_score = tested_cell_ptr->
score + score_shift;
599 if(alt_score > best_score)
601 qDebug() <<
"shift" << alt_score <<
"from 0";
602 alignment_type = temp_align_type;
603 best_score = alt_score;
606 std::floor((aa_position->l_mass -
609 qDebug() <<
"missing aas" << missing_aas;
628 if(best_column != aa_position->r_peak)
631 {aa_position->r_peak, {row_number, best_score, beginning, tree_id}});
638 row_number - beginning + 1 +
639 std::ceil(spectrum.
getMissingMass(aa_position->r_peak) / smallest_aa_mass) +
642 m_location_saver.addLocation(beginning, length, tree_id, best_score, protein_ptr);
652 perfect_shift_origin,
686 catch(
const std::exception &error)
689 QObject::tr(
"updateAlignmentMatrix failed std::exception :\n%1 %2")
696 QObject::tr(
"updateAlignmentMatrix failed :\n%1").arg(err.
qwhat()));
704 const std::size_t origin_row,
705 const std::size_t current_row,
706 const std::size_t l_peak,
707 const std::size_t r_peak)
const
711 double missing_mass = 0;
712 auto it_end = sequence.begin() + current_row;
713 for(
auto iter = sequence.begin() + origin_row; (iter != it_end) && (iter != sequence.end());
716 missing_mass += iter->mass;
728 catch(
const std::exception &error)
731 QObject::tr(
"perfectShiftPossible failed std exception:\n%1").arg(error.what()));
736 QObject::tr(
"perfectShiftPossible failed :\n%1").arg(err.
qwhat()));
744 const std::size_t current_row,
745 const std::size_t r_peak)
const
747 std::size_t perfect_shift_origin = current_row;
748 double missing_mass = spectrum.
getMZShift(0, r_peak);
751 while(aa_mass < missing_mass && perfect_shift_origin > 0 && !mz_range.
contains(aa_mass))
753 aa_mass += sequence.at(perfect_shift_origin - 1)
755 perfect_shift_origin--;
759 return perfect_shift_origin;
772 std::size_t end_peak)
const
776 std::size_t perfect_shift_end = end_row + 1;
780 while(aa_mass < missing_mass && perfect_shift_end < (std::size_t)sequence.size() &&
783 aa_mass += sequence.at(perfect_shift_end - 1)
789 return perfect_shift_end - 1;
799 QObject::tr(
"perfectShiftPossibleEnd failed :\n%1").arg(err.
qwhat()));
829 std::size_t previous_row;
830 std::size_t previous_column = 0;
831 std::size_t perfect_shift_end;
832 std::pair<std::vector<ScenarioCell>,
int> best_alignment =
m_scenario.getBestAlignment();
834 std::vector<SpOMSAa> skipped_aa;
837 if(best_alignment.first.front().previous_row > offset)
840 QString(
"best_alignment.first.front().previous_row > offset %1 %2")
842 .arg(best_alignment.first.front().previous_row));
844 if(best_alignment.first.back().previous_row > offset)
847 QString(
"best_alignment.first.back().previous_row > offset %1 %2")
849 .arg(best_alignment.first.back().previous_row));
851 m_best_alignment.beginning = offset - best_alignment.first.front().previous_row;
852 m_best_alignment.end = offset - best_alignment.first.back().previous_row - 1;
858 for(
auto cell : best_alignment.first)
860 switch(cell.alignment_type)
863 aa_model.
m_aminoAcid = sequence.at(previous_row - 1).aa;
867 if(previous_row > cell.previous_row + 1)
869 skipped_mass = sequence.at(previous_row - 1)
872 sequence.
sliced(cell.previous_row, previous_row - cell.previous_row - 1);
875 for(
auto aa : skipped_aa)
879 skipped_mass += aa.mass;
882 spectrum.
getMZShift(cell.previous_column, previous_column) - skipped_mass;
887 aa_model.
m_aminoAcid = sequence.at(previous_row - 1).aa;
894 aa_model.
m_aminoAcid = sequence.at(previous_row - 1).aa;
901 spectrum.
getMZShift(cell.previous_column, previous_column) -
902 sequence.at(previous_row - 1).mass);
906 skipped_aa = sequence.
sliced(cell.previous_row, previous_row - cell.previous_row);
907 std::reverse(skipped_aa.begin(), skipped_aa.end());
910 for(
auto aa : skipped_aa)
917 previous_row = cell.previous_row;
918 previous_column = cell.previous_column;
922 previous_row = cell.previous_row;
923 previous_column = cell.previous_column;
943 switch(spectrum.at(peak).type)
946 qDebug() << peak <<
"native";
950 qDebug() << peak <<
"both";
954 qDebug() << peak <<
"synthetic";
957 qDebug() << peak <<
"symmetric";
969 best_alignment.first.front().previous_row,
971 if(perfect_shift_end != best_alignment.first.front().previous_row)
974 sequence.
sliced(best_alignment.first.front().previous_row,
975 perfect_shift_end - best_alignment.first.front().previous_row);
978 for(
auto aa = skipped_aa.begin(); aa != skipped_aa.end(); aa++)
1018 const QString &protein_seq)
1021 if(alignment.
end > (std::size_t)protein_seq.size())
1025 .arg(protein_seq.size()));
1027 std::vector<double> potential_mass_errors(alignment.
shifts);
1033 while(
shift > 0 && index > 0)
1035 potential_mass_errors.push_back(
shift);
1039 protein_seq.at(index).toLatin1());
1046 index = alignment.
end + 1;
1047 while(
shift > 0 && index < (std::size_t)protein_seq.size())
1049 potential_mass_errors.push_back(
shift);
1050 qDebug() <<
" shift=" <<
shift <<
" index=" << index
1051 <<
" letter=" << protein_seq.at(index).toLatin1();
1053 protein_seq.at(index).toLatin1());
1057 return potential_mass_errors;
1063 std::size_t minimum_aa_diversity)
1065 qDebug() <<
"sequence=" << sequence <<
" window=" << window
1066 <<
" minimum_aa_diversity=" << minimum_aa_diversity;
1067 if(sequence.size() < window)
1069 auto it_begin = sequence.begin();
1070 auto it_end = sequence.begin() + window;
1071 QString window_copy(sequence.mid(0, window));
1072 while(it_end != sequence.end())
1074 std::partial_sort_copy(it_begin, it_end, window_copy.begin(), window_copy.end());
1076 qDebug() << window_copy;
1077 std::size_t uniqueCount =
1078 std::unique(window_copy.begin(), window_copy.end()) - window_copy.begin();
1080 qDebug() << uniqueCount;
1081 if(uniqueCount < minimum_aa_diversity)
1089const std::vector<pappso::specpeptidoms::KeyCell> &
1092 const std::size_t row_number,
1093 const std::vector<AaPosition> &aa_positions,
1095 const bool fast_align,
collection of integer code for each amino acid 0 => null 1 to 20 => amino acid sorted by there mass (...
double getMass(uint8_t aa_code) const
get the mass of the amino acid given its integer code the amino acid can bear some modification (if a...
pappso_double getMass() const override
bool contains(pappso_double) const
virtual const QString & qwhat() const
std::vector< std::vector< std::size_t > > getPeaks() const
void addPeaks(std::size_t peak1, std::size_t peak2)
LocationSaver m_location_saver
void initpreciseAlign(const SpOMSSpectrum &spectrum, std::size_t length2)
function made for testing the preciseAlign process, initiate the variables for alignment
const Alignment & getBestAlignment() const
Returns a const ref to m_best_alignment.
Scenario getScenario() const
Returns a copy of m_scenario.
std::size_t perfectShiftPossibleEnd(const pappso::specpeptidoms::SpOMSProtein &sequence, const SpOMSSpectrum &spectrum, std::size_t end_row, std::size_t end_peak) const
indicates if a perfect shift is possible between the provided positions
void updateAlignmentMatrix(const pappso::specpeptidoms::SpOMSProtein &sequence, const std::size_t row_number, const std::vector< AaPosition > &aa_positions, const SpOMSSpectrum &spectrum, const bool fast_align, const pappso::specpeptidoms::SpOMSProtein *protein_ptr)
updates the scores of the alignment matrix for a given amino acid as well as the location heap/scenar...
void postProcessingAlign(const SpOMSSpectrum &spectrum, const SpOMSProtein *protein_ptr, std::size_t beginning, std::size_t length, const std::vector< double > &shifts)
performs the post-processing : generates corrected spectra and align them
void preciseAlign(const SpOMSSpectrum &spectrum, const SpOMSProtein *protein_ptr, const std::size_t beginning, const std::size_t length)
performs the second alignment search between a protein subsequence and a spectrum.
Alignment m_best_alignment
void correctAlign(const SpOMSProtein &protein_subseq, const SpOMSProtein *protein_ptr, const SpOMSSpectrum &spectrum, std::vector< std::size_t > &peaks_to_remove, std::size_t offset)
Recursively performs the correction of the alignment.
const std::vector< KeyCell > & oneAlignStep(const pappso::specpeptidoms::SpOMSProtein &sequence, const std::size_t row_number, const std::vector< AaPosition > &aa_positions, const SpOMSSpectrum &spectrum, const bool fast_align, const pappso::specpeptidoms::SpOMSProtein *protein_ptr)
function made for testing the fastAlign process, process one line and return the alignment matrix
const ScoreValues & m_scorevalues
Alignment m_best_post_processed_alignment
const std::vector< KeyCell > & getInterestCells() const
convenient function for degub purpose
Alignment m_best_corrected_alignment
bool perfectShiftPossible(const pappso::specpeptidoms::SpOMSProtein &sequence, const SpOMSSpectrum &spectrum, const std::size_t origin_row, const std::size_t current_row, const std::size_t l_peak, const std::size_t r_peak) const
indicates if a perfect shift is possible between the provided positions
std::vector< std::pair< std::size_t, KeyCell > > m_updated_cells
void fastAlign(const SpOMSSpectrum &spectrum, const SpOMSProtein *protein_ptr)
perform the first alignment search between a protein sequence and a spectrum. The member location hea...
static bool checkSequenceDiversity(const QString &sequence, std::size_t window, std::size_t minimum_aa_diversity)
check that the sequence has a minimum of amino acid checkSequenceDiversity
std::size_t perfectShiftPossibleFrom0(const pappso::specpeptidoms::SpOMSProtein &sequence, const SpOMSSpectrum &spectrum, const std::size_t current_row, const std::size_t r_peak) const
indicates if a perfect shift is possible from the spectrum beginning to the provided peak....
std::vector< KeyCell > m_interest_cells
void initFastAlign(const SpOMSSpectrum &spectrum)
function made for testing the fastAlign process, initiate the variables for alignment
static std::vector< double > getPotentialMassErrors(const pappso::AaCode &aa_code, const Alignment &alignment, const QString &protein_seq)
Returns a list of the potential mass errors corresponding to the provided alignment in the provided p...
void saveBestAlignment(const SpOMSProtein &sequence, const SpOMSSpectrum &spectrum, std::size_t offset)
Stores the best alignment from m_scenario in m_best_alignment.
SemiGlobalAlignment(const ScoreValues &score_values, const pappso::PrecisionPtr precision_ptr, const AaCode &aaCode)
pappso::PrecisionPtr m_precision_ptr
LocationSaver getLocationSaver() const
Returns a copy of m_location_saver.
const QString & getSequence() const
std::vector< SpOMSAa > sliced(std::size_t position, std::size_t length) const
double getMZShift(std::size_t l_peak, std::size_t r_peak) const
Returns the mz difference between two peaks.
uint getPrecursorCharge() const
Returns the spectrum's precursor's charge.
double getMissingMass(std::size_t peak) const
Returns the missing mass between a peak and the precursor's mass (shift at the end).
std::size_t getComplementaryPeak(std::size_t peak) const
const std::vector< AaPosition > & getAaPositions(std::uint8_t aa_code) const
Returns the list of aa_positions for a given amino acid code.
specglob::ExperimentalSpectrumDataPointType peakType(std::size_t indice) const
Returns the type of one of the spectrum's peaks.
double getPrecursorMass() const
@ synthetic
does not correspond to existing peak, for computational purpose
@ both
both, the ion and the complement exists in the original spectrum
@ symmetric
new peak : computed symmetric mass from a corresponding native peak
const uint ALIGNMENT_SURPLUS(5)
const int MIN_ALIGNMENT_SCORE(15)
const uint TOL_PEAKS_MISSING(4)
const uint TOL_PEAKS_MISSING_FIRST_COLUMN(5)
const pappso_double MHPLUS(1.007276466879)
const pappso_double MPROTIUM(1.007825032241)
const pappso_double MASSOXYGEN(15.99491461956)
const PrecisionBase * PrecisionPtr
void reset()
reinitialize to default score_values
QString getPeptideString(const QString &protein_sequence) const
convenient function to get peptide sequence from location
double getNonAlignedMass() const
convenient function to get the remaining non explained mass shift
std::vector< double > shifts
std::vector< std::size_t > peaks
std::size_t getPositionStart() const
get position of start on the protein sequence
PeptideModel m_peptideModel