libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
mzidentmlreader.cpp
Go to the documentation of this file.
1/**
2 * \file src/input/mzidentml/mzidentmlreader.cpp
3 * \date 24/11/2022
4 * \author Olivier Langella
5 * \brief new method to read mzIdentML XML files
6 */
7
8
9/*******************************************************************************
10 * Copyright (c) 2022 Olivier Langella
11 *<Olivier.Langella@universite-paris-saclay.fr>.
12 *
13 * This file is part of i2MassChroQ.
14 *
15 * i2MassChroQ is free software: you can redistribute it and/or modify
16 * it under the terms of the GNU General Public License as published by
17 * the Free Software Foundation, either version 3 of the License, or
18 * (at your option) any later version.
19 *
20 * i2MassChroQ is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 * GNU General Public License for more details.
24 *
25 * You should have received a copy of the GNU General Public License
26 * along with i2MassChroQ. If not, see <http://www.gnu.org/licenses/>.
27 *
28 ******************************************************************************/
29
30#include "mzidentmlreader.h"
33
34
38#include <QDebug>
39#include <memory>
40
41
42namespace pappso
43{
44namespace cbor
45{
46namespace psm
47{
48QString
50{
51 return QString("%1 %2 %3 %4").arg(cvRef).arg(accession).arg(name).arg(value);
52}
53
54
55QString
57{
58 return QString("%1 %2").arg(name).arg(value);
59}
60
61
64 const QFileInfo &mzident_file)
65{
66
67 mp_monitor = p_monitor;
68 qDebug() << mzident_file.absoluteFilePath() << "'";
70 m_mzidentFile = mzident_file;
71
72
73 try
74 {
75
76 mp_cborWriter = p_output;
77
78 mp_cborWriter->startMap();
79 mp_cborWriter->writeInformations(
80 "mzidentml_file_reader", Utils::getVersion(), "psm", "mzidentml reader");
81
82
83 if(!readFile(mzident_file.absoluteFilePath()))
84 {
85
86 if(errorString() == "Not an MzIdentML file")
87 {
89 QObject::tr("Error reading %1 not mzIdentML file :\n %2")
90 .arg(mzident_file.absoluteFilePath())
91 .arg(errorString()));
92 }
93 else
94 {
95 throw pappso::PappsoException(QObject::tr("Error reading %1 mzIdentML file :\n %2")
96 .arg(mzident_file.absoluteFilePath())
97 .arg(errorString()));
98 }
99 }
100
101
102 mp_cborWriter->endMap();
103 }
105 {
106 throw err;
107 }
108 catch(pappso::PappsoException &other_err)
109 {
110 throw pappso::PappsoException(QObject::tr("Error reading mzIdentML file %1:\n%2")
111 .arg(mzident_file.absoluteFilePath())
112 .arg(other_err.qwhat()));
113 }
114}
115
119
120
121void
123{
124 qDebug();
125 mp_cborWriter->append("parameter_map");
126 mp_cborWriter->writeCborMap(m_cborParameterMap);
127
128 QStringList fasta_files;
129 for(auto &pair_searchdb : m_mzidSearchDatabaseIdMap)
130 {
131 fasta_files << pair_searchdb.second.file;
132 }
133 mp_cborWriter->append("target_fasta_files");
134 mp_cborWriter->writeArray(fasta_files);
135
136
137 for(std::pair<const QString, MzidDBSequence> &pair_sequence : m_MzidDBSequenceIdMap)
138 {
139 PsmProtein psm_protein;
140
141 psm_protein.isContaminant = false;
142 psm_protein.isTarget = !pair_sequence.second.is_decoy;
143 psm_protein.protein_sp = pair_sequence.second.protein_sp;
144
145 m_proteinMap.insert(psm_protein);
146 }
147
148 mp_cborWriter->append(QString("protein_map"));
149 m_proteinMap.writeMap(*mp_cborWriter);
150
151 mp_cborWriter->append("sample_list");
152 mp_cborWriter->startArray();
153 for(std::pair<const QString, std::vector<SpectrumIdentificationResult>> &pair_spectra :
155 {
156
157 mp_cborWriter->startMap();
158 mp_cborWriter->append("name");
159 mp_cborWriter->append(m_mzidSpectraDataIdMap.at(pair_spectra.first).name);
160
161 mp_cborWriter->append("identification_file_list");
162 mp_cborWriter->startMap();
163 mp_cborWriter->append("name");
164 mp_cborWriter->append(m_mzidentFile.absoluteFilePath());
165 mp_cborWriter->endMap();
166 // one_sample.cbor_core_sample.value("identification_file_list")
167 // .toCbor(m_sageReader.getCborStreamWriter());
168
169
170 mp_cborWriter->append("peaklist_file");
171 mp_cborWriter->startMap();
172 mp_cborWriter->append("name");
173 mp_cborWriter->append(m_mzidSpectraDataIdMap.at(pair_spectra.first).file);
174 mp_cborWriter->endMap();
175 //"scan_list": [
176
177 mp_cborWriter->append("scan_list");
178 mp_cborWriter->startArray(pair_spectra.second.size());
179 for(SpectrumIdentificationResult &it_spectrum_ident : pair_spectra.second)
180 {
181 writeSpectrumIdentificationResult(it_spectrum_ident);
182 }
183 mp_cborWriter->endArray();
184
185
186 mp_cborWriter->endMap();
187 }
188
189 mp_cborWriter->endArray();
190 qDebug();
191}
192
193void
195{
196 // mp_monitor->setStatus("reading X!Tandem result file");
197 if(m_qxmlStreamReader.readNextStartElement())
198 {
199
200 qDebug() << m_qxmlStreamReader.name().toString();
201 if(m_qxmlStreamReader.name().toString().toLower() == "mzidentml")
202 {
203 while(m_qxmlStreamReader.readNextStartElement())
204 {
205 // cvList
206 if(m_qxmlStreamReader.name().toString() == "cvList")
207 {
208 m_qxmlStreamReader.skipCurrentElement();
209 }
210 // AnalysisSoftwareList
211 else if(m_qxmlStreamReader.name().toString() == "AnalysisSoftwareList")
212 {
213 while(readAnalysisSoftware())
214 {
215 }
216 } // Provider
217 else if(m_qxmlStreamReader.name().toString() == "Provider")
218 {
219 m_qxmlStreamReader.skipCurrentElement();
220 }
221 // AuditCollection
222 else if(m_qxmlStreamReader.name().toString() == "AuditCollection")
223 {
224 m_qxmlStreamReader.skipCurrentElement();
225 }
226 // SequenceCollection
227 else if(m_qxmlStreamReader.name().toString() == "SequenceCollection")
228 {
230 {
231 }
232 }
233 // AnalysisCollection
234 else if(m_qxmlStreamReader.name().toString() == "AnalysisCollection")
235 {
237 {
238 }
239 }
240 // AnalysisProtocolCollection
241 else if(m_qxmlStreamReader.name().toString() == "AnalysisProtocolCollection")
242 {
244 }
245
246 // DataCollection
247 else if(m_qxmlStreamReader.name().toString() == "DataCollection")
248 {
250 {
251 }
252 }
253 else if(m_qxmlStreamReader.name().toString() == "BibliographicReference")
254 {
255 m_qxmlStreamReader.skipCurrentElement();
256 }
257
258 else
259 {
260 m_qxmlStreamReader.raiseError(
261 QObject::tr("element %1 not implemented").arg(m_qxmlStreamReader.name()));
262 m_qxmlStreamReader.skipCurrentElement();
263 }
264 }
265 }
266 else
267 {
268 m_qxmlStreamReader.raiseError(QObject::tr("Not an MzIdentML input file"));
269 m_qxmlStreamReader.skipCurrentElement();
270 }
271 }
272}
273
274
275void
277{
278 //<AnalysisProtocolCollection>
279
280 qDebug();
281 while(m_qxmlStreamReader.readNextStartElement())
282 {
283 qDebug() << m_qxmlStreamReader.name();
284 if(m_qxmlStreamReader.name().toString() == "SpectrumIdentificationProtocol")
285 {
286 auto it_soft = m_IdentificationEngineMap.find(
287 m_qxmlStreamReader.attributes().value("analysisSoftware_ref").toString());
288 if(it_soft != m_IdentificationEngineMap.end())
289 {
290 IdentificationEngine identification_engine = it_soft->second;
291
292 if(identification_engine == IdentificationEngine::XTandem)
293 {
294 //<SpectrumIdentificationProtocol analysisSoftware_ref="ID_software"
295 // id="SearchProtocol_1">
296 QCborMap identification_engine_parameters;
297
298 while(m_qxmlStreamReader.readNextStartElement())
299 {
300 if(m_qxmlStreamReader.name().toString() == "SearchType")
301 {
302 m_qxmlStreamReader.skipCurrentElement();
303 }
304 else if(m_qxmlStreamReader.name().toString() == "ModificationParams")
305 {
306 m_qxmlStreamReader.skipCurrentElement();
307 }
308
309 else if(m_qxmlStreamReader.name().toString() == "Threshold")
310 {
311 m_qxmlStreamReader.skipCurrentElement();
312 }
313 else if(m_qxmlStreamReader.name().toString() == "ParentTolerance")
314 {
315 m_qxmlStreamReader.skipCurrentElement();
316 }
317 else if(m_qxmlStreamReader.name().toString() == "FragmentTolerance")
318 {
319 m_qxmlStreamReader.skipCurrentElement();
320 }
321 else if(m_qxmlStreamReader.name().toString() == "Enzymes")
322 {
323 m_qxmlStreamReader.skipCurrentElement();
324 }
325 else if(m_qxmlStreamReader.name().toString() == "AdditionalSearchParams")
326 {
327 while(m_qxmlStreamReader.readNextStartElement())
328 {
329 if(m_qxmlStreamReader.name().toString() == "userParam")
330 {
331 //<userParam name="list path, default parameters"
332 // value="/tmp/i2masschroq.ubFSuT/QExactive_analysis_FDR_nosemi.xml"/>
333 identification_engine_parameters.insert(
334 m_qxmlStreamReader.attributes().value("name").toString(),
335 m_qxmlStreamReader.attributes().value("value").toString());
336 m_qxmlStreamReader.skipCurrentElement();
337 }
338 else
339 {
340 m_qxmlStreamReader.skipCurrentElement();
341 }
342 }
343 }
344 else
345 {
346
347 m_qxmlStreamReader.raiseError(QObject::tr("element %1 not implemented")
348 .arg(m_qxmlStreamReader.name()));
349 m_qxmlStreamReader.skipCurrentElement();
350 }
351 }
352
353 m_cborParameterMap.insert(QString("xtandem"),
354 identification_engine_parameters.toCborValue());
355 // qWarning() << "coucou";
356 }
357 else
358 {
359 // other than tandem
360 m_qxmlStreamReader.skipCurrentElement();
361 }
362 }
363 else
364 {
365 m_qxmlStreamReader.skipCurrentElement();
366 }
367 }
368 else
369 {
370 m_qxmlStreamReader.raiseError(
371 QObject::tr("element %1 not implemented").arg(m_qxmlStreamReader.name()));
372 m_qxmlStreamReader.skipCurrentElement();
373 }
374 }
375 qDebug();
376}
377
378bool
380{
381
382
383 /** @brief stores the current analysis software id
384 */
385 QString analysisSoftwareId;
386
387
388 /** @brief tells if the software name has been found and is handled by the
389 * parser
390 */
391 IdentificationEngine analysisSotwareNameFound = IdentificationEngine::unknown;
392 if(m_qxmlStreamReader.readNextStartElement())
393 {
394 if(m_qxmlStreamReader.name().toString() == "AnalysisSoftware")
395 {
396
397 // <AnalysisSoftware version="0.0.9" name="DeepProt" id="as1">
398
399
400 analysisSoftwareId = m_qxmlStreamReader.attributes().value("id").toString();
401 QString software_name = m_qxmlStreamReader.attributes().value("name").toString();
402
403
404 m_analysisSoftwareVersion = m_qxmlStreamReader.attributes().value("version").toString();
405
406 if(software_name == "SpecOMS")
407 {
408 analysisSotwareNameFound = IdentificationEngine::SpecOMS;
409
410 m_qxmlStreamReader.skipCurrentElement();
411 }
412 else if(software_name == "DeepProt")
413 {
414 analysisSotwareNameFound = IdentificationEngine::SpecOMS;
415
416 m_qxmlStreamReader.skipCurrentElement();
417 }
418 else
419 {
420 while(m_qxmlStreamReader.readNextStartElement())
421 {
422 if(m_qxmlStreamReader.name().toString() == "SoftwareName")
423 {
424 while(m_qxmlStreamReader.readNextStartElement())
425 {
426 if(m_qxmlStreamReader.name().toString() == "cvParam")
427 {
428 CvParam cv_param = readCvParam();
429 if(cv_param.accession == "MS:1001476")
430 {
431 analysisSotwareNameFound = IdentificationEngine::XTandem;
432 }
433 else if(cv_param.accession == "MS:1002048")
434 {
435 analysisSotwareNameFound = IdentificationEngine::MSGFplus;
436 }
437 else if(cv_param.accession == "MS:1001946")
438 {
439 analysisSotwareNameFound = IdentificationEngine::PEAKS_Studio;
440 }
441 }
442 }
443 }
444 else
445 {
446 m_qxmlStreamReader.skipCurrentElement();
447 }
448 }
449 }
450
451
452 switch(analysisSotwareNameFound)
453 {
455 break;
457 break;
459 break;
461 break;
462
463 default:
464 m_qxmlStreamReader.raiseError(
465 QObject::tr("identification results from %1 are not supported yet, "
466 "Please contact "
467 "the PAPPSO team.")
468 .arg(software_name));
469 return false;
470 }
471
472
473 auto it = m_IdentificationEngineMap.insert(
474 std::pair<QString, IdentificationEngine>(analysisSoftwareId, analysisSotwareNameFound));
475
476 if(it.second == false)
477 {
478 it.first->second = analysisSotwareNameFound;
479 }
480 }
481 else
482 {
483 m_qxmlStreamReader.raiseError(QObject::tr("Not an MzIdentML input file"));
484 m_qxmlStreamReader.skipCurrentElement();
485 return false;
486 }
487 return true;
488 }
489 return false;
490}
491
492bool
494{
495 if(m_qxmlStreamReader.readNextStartElement())
496 {
497 if(m_qxmlStreamReader.name().toString() == "DBSequence")
498 {
500 }
501 else if(m_qxmlStreamReader.name().toString() == "Peptide")
502 {
503 readPeptide();
504 }
505 // PeptideEvidence
506 else if(m_qxmlStreamReader.name().toString() == "PeptideEvidence")
507 {
509 }
510 else
511 {
512 m_qxmlStreamReader.raiseError(
513 QObject::tr("Error in MzIdentML input file, %1 no DBSequence")
514 .arg(m_qxmlStreamReader.name()));
515 m_qxmlStreamReader.skipCurrentElement();
516 return false;
517 }
518 return true;
519 }
520 return false;
521}
522
523void
525{
526
527 // attributes.value("base_name")
528 // ProteinXtpSp sp_xtp_protein = _current_protein.makeProteinXtpSp();
529 MzidDBSequence dbsequence;
530 dbsequence.is_decoy = false;
531 dbsequence.accession = m_qxmlStreamReader.attributes().value("accession").toString();
532
533 QString accession_description = dbsequence.accession;
534 dbsequence.searchDatabase_ref =
535 m_qxmlStreamReader.attributes().value("searchDatabase_ref").toString();
536
537 QString id = m_qxmlStreamReader.attributes().value("id").toString();
538
539
540 while(m_qxmlStreamReader.readNextStartElement())
541 {
542 if(m_qxmlStreamReader.name().toString() == "cvParam")
543 {
544 CvParam cv_param = readCvParam();
545
546
547 if(cv_param.accession == "MS:1001088")
548 {
549 // protein description
550 // protein.get()->setDescription(cv_param.value);
551 dbsequence.description = cv_param.value;
552 accession_description.append(" ").append(dbsequence.description);
553 }
554 else if(cv_param.accession == "MS:1001195")
555 {
556 // PSI-MS MS:1001195 decoy DB type reverse
557 // protein.get()->setIsDecoy(true);
558 dbsequence.is_decoy = true;
559 }
560 else
561 {
562 dbsequence.cvParamList.push_back(cv_param);
563 }
564 }
565 else if(m_qxmlStreamReader.name().toString().toLower() == "seq")
566 {
567
568
569 dbsequence.sequence = m_qxmlStreamReader.readElementText();
570 }
571 else
572 {
573
574 m_qxmlStreamReader.raiseError(
575 QObject::tr("Error in MzIdentML/DBSequence unexpected %1 tag")
576 .arg(m_qxmlStreamReader.name()));
577 m_qxmlStreamReader.skipCurrentElement();
578 }
579 }
580
581
582 // qWarning() << accession_description;
583 dbsequence.protein_sp = std::make_shared<Protein>(accession_description, dbsequence.sequence);
584
585 m_MzidDBSequenceIdMap.insert({id, dbsequence});
586 // searchDatabase_ref="SearchDB_1"
587
588 // protein.get()->setFastaFileP(fastaFile.get());
589}
590
593{
594 CvParam cv_param;
595 cv_param.cvRef = m_qxmlStreamReader.attributes().value("cvRef").toString();
596 cv_param.accession = m_qxmlStreamReader.attributes().value("accession").toString();
597 cv_param.name = m_qxmlStreamReader.attributes().value("name").toString();
598 cv_param.value = m_qxmlStreamReader.attributes().value("value").toString();
599 cv_param.unitAccession = m_qxmlStreamReader.attributes().value("unitAccession").toString();
600 cv_param.unitName = m_qxmlStreamReader.attributes().value("unitName").toString();
601 cv_param.unitCvRef = m_qxmlStreamReader.attributes().value("unitCvRef").toString();
602 m_qxmlStreamReader.skipCurrentElement();
603 return cv_param;
604}
605
608{
609 UserParam user_param;
610
611 user_param.name = m_qxmlStreamReader.attributes().value("name").toString();
612 user_param.value = m_qxmlStreamReader.attributes().value("value").toString();
613 m_qxmlStreamReader.skipCurrentElement();
614
615 return user_param;
616}
617
618
619void
621{
622 std::shared_ptr<pappso::Peptide> peptide_sp;
623
624 QString xml_id = m_qxmlStreamReader.attributes().value("id").toString();
625
626 // PeptideSequence
627 if(m_qxmlStreamReader.readNextStartElement())
628 {
629 if(m_qxmlStreamReader.name().toString() == "PeptideSequence")
630 {
631 peptide_sp = std::make_shared<Peptide>(m_qxmlStreamReader.readElementText().simplified());
632 }
633 else
634 {
635
636 m_qxmlStreamReader.raiseError(
637 QObject::tr("Error in MzIdentML/Peptide no PeptideSequence"));
638 }
639 }
640
641 while(m_qxmlStreamReader.readNextStartElement())
642 {
643 if(m_qxmlStreamReader.name().toString() == "Modification")
644 {
645 // <Modification monoisotopicMassDelta="-0.02682025649"
646 // location="1">
647 // <cvParam accession="-0.0268203" cvRef="PSI-MOD" name=""/>
648 // </Modification>
649
650 Modification modification;
651 modification.monoisotopicMassDelta =
652 m_qxmlStreamReader.attributes().value("monoisotopicMassDelta").toDouble();
653 modification.location = m_qxmlStreamReader.attributes().value("location").toUInt();
654 bool is_cv_param = false;
655 while(m_qxmlStreamReader.readNextStartElement())
656 {
657 if(m_qxmlStreamReader.name().toString() == "cvParam")
658 {
659 is_cv_param = true;
660 modification.cvParam = readCvParam();
661
662
663 // qDebug() << "startElement_aa ";
664 pappso::AaModificationP modif = nullptr;
665
666
667 qDebug() << modification.monoisotopicMassDelta;
668 if(modification.cvParam.accession != "")
669 {
670 qDebug() << modification.cvParam.accession;
671 if(modification.cvParam.accession.startsWith("UNIMOD:"))
672 {
674 modification.cvParam.accession);
675 }
676 else
677 {
678 // hope it is psi mod:
679 if(modification.cvParam.accession.startsWith("MOD:"))
680 {
681 modif =
683 }
684 else if(modification.cvParam.accession == "MS:1001460")
685 {
686 //[Term]
687 // id: MS:1001460
688 // name: unknown modification
689 // def: "This term should be given if the
690 // modification was unknown." [PSI:PI] is_a:
691 // MS:1001471 ! peptide modification details
693 modification.monoisotopicMassDelta);
694 }
695 else
696 {
697 qInfo() << "MzIdentMlSaxHandler::endElement_"
698 "Modification unknown "
699 "modification "
700 << modification.cvParam.accession << " "
701 << modification.cvParam.name;
702 }
703 }
704 }
705
706 if(modif == nullptr)
707 {
708 m_qxmlStreamReader.raiseError(
709 QObject::tr("Error in MzIdentML/Peptide/Modification/cvParam "
710 "modification accession %1 not found")
711 .arg(modification.cvParam.accession));
712 return;
713 }
714
715 if(modification.location == 0)
716 {
717 peptide_sp.get()->addAaModification(modif, 0);
718 }
719 else
720 {
721 peptide_sp.get()->addAaModification(modif, modification.location - 1);
722 }
723 }
724 else
725 {
726
727 m_qxmlStreamReader.raiseError(
728 QObject::tr("Error in MzIdentML/Peptide/Modification "
729 "unexpected %1 tag")
730 .arg(m_qxmlStreamReader.name()));
731 }
732 }
733
734 if(is_cv_param == false)
735 {
736 pappso::AaModificationP modif_without_cvparam = nullptr;
737 // no cv param element
738
739 if(modification.location == 0)
740 {
741 modif_without_cvparam =
743 peptide_sp.get()->getAa(0).getAminoAcidChar(),
744 modification.monoisotopicMassDelta);
745 peptide_sp.get()->addAaModification(modif_without_cvparam, 0);
746 }
747 else
748 {
749 modif_without_cvparam =
751 peptide_sp.get()->getAa(modification.location - 1).getAminoAcidChar(),
752 modification.monoisotopicMassDelta);
753 peptide_sp.get()->addAaModification(modif_without_cvparam,
754 modification.location - 1);
755 }
756 }
757 }
758
759 else
760 {
761
762 m_qxmlStreamReader.raiseError(QObject::tr("Error in MzIdentML/Peptide unexpected %1 tag")
763 .arg(m_qxmlStreamReader.name()));
764 }
765 }
766
767
768 m_PeptideIdMap.insert(std::pair<QString, PeptideSp>(xml_id, peptide_sp));
769}
770
771bool
773{
774 qDebug();
776
777 auto itprot =
778 m_MzidDBSequenceIdMap.find(m_qxmlStreamReader.attributes().value("dBSequence_ref").toString());
779 if(itprot == m_MzidDBSequenceIdMap.end())
780 {
781 m_qxmlStreamReader.raiseError(
782 QObject::tr("dBSequence_ref %1 not defined")
783 .arg(m_qxmlStreamReader.attributes().value("dBSequence_ref").toString()));
784 return false;
785 }
786 qDebug();
787 pe.protein = itprot->second.protein_sp;
788
789
790 auto itpep = m_PeptideIdMap.find(m_qxmlStreamReader.attributes().value("peptide_ref").toString());
791 if(itpep == m_PeptideIdMap.end())
792 {
793 m_qxmlStreamReader.raiseError(
794 QObject::tr("peptide_ref %1 not defined")
795 .arg(m_qxmlStreamReader.attributes().value("peptide_ref").toString()));
796 return false;
797 }
798 qDebug();
799 pe.peptide = itpep->second;
800
801 pe.start = m_qxmlStreamReader.attributes().value("start").toUInt() - 1;
802 pe.end = m_qxmlStreamReader.attributes().value("end").toUInt() - 1;
803 pe.isDecoy = false;
804 if(m_qxmlStreamReader.attributes().value("isDecoy").toString() == "true")
805 {
806 pe.isDecoy = true;
807 }
808 qDebug();
809 m_MzidPeptideEvidenceIdMap.insert(std::pair<QString, MzidPeptideEvidence>(
810 m_qxmlStreamReader.attributes().value("id").toString(), pe));
811 m_qxmlStreamReader.skipCurrentElement();
812 qDebug();
813 return true;
814}
815
816bool
818{
819
820 qDebug();
821 if(m_qxmlStreamReader.readNextStartElement())
822 {
823 if(m_qxmlStreamReader.name().toString() == "SpectrumIdentification")
824 {
826 qDebug();
827 }
828 else if(m_qxmlStreamReader.name().toString() == "ProteinDetection")
829 {
830 m_qxmlStreamReader.skipCurrentElement();
831 }
832 else
833 {
834 m_qxmlStreamReader.raiseError(
835 QObject::tr("Error in MzIdentML/AnalysisCollection, unexpected %1 t")
836 .arg(m_qxmlStreamReader.name()));
837 m_qxmlStreamReader.skipCurrentElement();
838 return false;
839 }
840 return true;
841 }
842 return false;
843}
844
845void
847{
848
849 m_qxmlStreamReader.skipCurrentElement();
850}
851
852bool
854{
855
856 qDebug();
857 if(m_qxmlStreamReader.readNextStartElement())
858 {
859 if(m_qxmlStreamReader.name().toString() == "Inputs")
860 {
861 readInputs();
862 }
863
864 // AnalysisData
865 else if(m_qxmlStreamReader.name().toString() == "AnalysisData")
866 {
867 qDebug();
869 }
870 else
871 {
872 m_qxmlStreamReader.raiseError(
873 QObject::tr("Error in MzIdentML/DataCollection, unexpected %1 tag")
874 .arg(m_qxmlStreamReader.name()));
875 m_qxmlStreamReader.skipCurrentElement();
876 return false;
877 }
878 return true;
879 }
880 return false;
881}
882
883void
885{
886 while(m_qxmlStreamReader.readNextStartElement())
887 {
888 if(m_qxmlStreamReader.name().toString() == "SearchDatabase")
889 {
891 }
892 else if(m_qxmlStreamReader.name().toString() == "SpectraData")
893 {
895 }
896 else if(m_qxmlStreamReader.name().toString() == "SourceFile")
897 {
898 m_qxmlStreamReader.skipCurrentElement();
899 }
900
901 else
902 {
903 m_qxmlStreamReader.raiseError(
904 QObject::tr("Error in MzIdentML/DataCollection/Inputs, unexpected %1 tag")
905 .arg(m_qxmlStreamReader.name()));
906 m_qxmlStreamReader.skipCurrentElement();
907 }
908 }
909}
910
911
912//<SearchDatabase numDatabaseSequences="136828"
913// location="/home/thierry/test/MS-GF+/Genome_Z_mays_v5a_conta.fasta"
914// id="SearchDB_1">
915// <FileFormat>
916// <cvParam cvRef="PSI-MS" accession="MS:1001348" name="FASTA format"/>
917// </FileFormat>
918// <DatabaseName>
919// <userParam name="Genome_Z_mays_v5a_conta.fasta"/>
920// </DatabaseName>
921// </SearchDatabase>
922bool
924{
925 /* <SearchDatabase
926 location="/gorgone/pappso/versions_logiciels_pappso/tandemng/database/Genome_Z_mays_5a.fasta"
927 id="SearchDB_0"> <FileFormat> <cvParam accession="MS:1001348" cvRef="PSI-MS" name="FASTA
928 format"/>
929 </FileFormat>
930 <DatabaseName>
931 <userParam name="DatabaseName" value="Genome_Z_mays_5a.fasta"/>
932 </DatabaseName>
933 <cvParam accession="MS:1001197" cvRef="PSI-MS" name="DB composition
934 target+decoy"/> <cvParam accession="MS: 1001283" cvRef="PSI-MS" name="decoy DB accession
935 regexp" value="^XXX"/> <cvParam accession="MS: 1001195" cvRef="PSI-MS" name="decoy DB type
936 reverse"/>
937 </SearchDatabase>
938 */
939 qDebug();
940 QString id = m_qxmlStreamReader.attributes().value("id").toString();
941 // auto itfasta = m_FastaFileIdMap.find(id);
942
943 MzidSearchDatabase search_database;
944
945 search_database.file = m_qxmlStreamReader.attributes().value("location").toString();
946
947
948 if(search_database.file.isEmpty())
949 {
950 m_qxmlStreamReader.raiseError(QObject::tr("SearchDatabase id %1 location is empty").arg(id));
951 return false;
952 }
953
954
955 m_mzidSearchDatabaseIdMap.insert({id, search_database});
956
957 m_qxmlStreamReader.skipCurrentElement();
958 qDebug();
959 return true;
960}
961
962void
964{
965
966 qDebug();
967 MzidSpectraData spectra_data;
968 spectra_data.file = m_qxmlStreamReader.attributes().value("location").toString();
969
970
971 // msrun.get()->setXmlId(attributes.value("id"));
972 spectra_data.name = m_qxmlStreamReader.attributes().value("name").toString();
973
974
975 m_mzidSpectraDataIdMap.insert(std::pair<QString, MzidSpectraData>(
976 m_qxmlStreamReader.attributes().value("id").toString(), spectra_data));
977 m_qxmlStreamReader.skipCurrentElement();
978}
979
980void
982{
983
984 qDebug() << m_qxmlStreamReader.name();
985 while(m_qxmlStreamReader.readNextStartElement())
986 {
987 qDebug() << m_qxmlStreamReader.name();
988 if(m_qxmlStreamReader.name().toString() == "SpectrumIdentificationList")
989 {
990 while(m_qxmlStreamReader.readNextStartElement())
991 {
992 qDebug() << m_qxmlStreamReader.name();
993 if(m_qxmlStreamReader.name().toString() == "SpectrumIdentificationResult")
994 {
996 }
997 else if(m_qxmlStreamReader.name().toString() == "FragmentationTable")
998 {
999 m_qxmlStreamReader.skipCurrentElement();
1000 }
1001 else
1002 {
1003 m_qxmlStreamReader.raiseError(
1004 QObject::tr("Error in "
1005 "MzIdentML/DataCollection/AnalysisData/"
1006 "SpectrumIdentificationList, unexpected %1 tag")
1007 .arg(m_qxmlStreamReader.name()));
1008 m_qxmlStreamReader.skipCurrentElement();
1009 }
1010 }
1011 }
1012 else if(m_qxmlStreamReader.name().toString() == "ProteinDetectionList")
1013 {
1014 // ProteinDetectionList
1015 m_qxmlStreamReader.skipCurrentElement();
1016 }
1017 else
1018 {
1019 m_qxmlStreamReader.raiseError(
1020 QObject::tr("Error in MzIdentML/DataCollection/AnalysisData, "
1021 "unexpected %1 tag")
1022 .arg(m_qxmlStreamReader.name()));
1023 m_qxmlStreamReader.skipCurrentElement();
1024 }
1025 }
1026 qDebug();
1027 finalDebrief();
1028}
1029
1030void
1032{
1033 qDebug() << m_qxmlStreamReader.name();
1034
1035 QString spectra_id = m_qxmlStreamReader.attributes().value("spectraData_ref").toString();
1036 auto it_spectra_data = m_mzidSpectraDataIdMap.find(spectra_id);
1037
1038 if(it_spectra_data == m_mzidSpectraDataIdMap.end())
1039 {
1040 m_qxmlStreamReader.raiseError(QObject::tr("spectraData_ref %1 not defined in "
1041 "m_mzidSpectraDataIdMap")
1042 .arg(spectra_id));
1043 }
1044
1045 auto it_insert_ident = m_spectrumIdentificationResultBySpectraIdMap.insert({spectra_id, {}});
1046
1047 it_insert_ident.first->second.push_back({});
1048 SpectrumIdentificationResult &spectrum_identification_result =
1049 it_insert_ident.first->second.back();
1050 spectrum_identification_result.cvParamList.clear();
1051 spectrum_identification_result.userParamList.clear();
1052 spectrum_identification_result.spectrumIdentificationItemList.clear();
1053
1054 spectrum_identification_result.spectrumID =
1055 m_qxmlStreamReader.attributes().value("spectrumID").toString();
1056 spectrum_identification_result.id = m_qxmlStreamReader.attributes().value("id").toString();
1057
1058
1059 qDebug() << m_qxmlStreamReader.name();
1060
1061 while(m_qxmlStreamReader.readNextStartElement())
1062 {
1063 qDebug() << m_qxmlStreamReader.name();
1064 if(m_qxmlStreamReader.name() == QString("SpectrumIdentificationItem"))
1065 {
1066 qDebug();
1067 readSpectrumIdentificationItem(spectrum_identification_result);
1068 }
1069 else if(m_qxmlStreamReader.name() == QString("cvParam"))
1070 {
1071 CvParam cv_param = readCvParam();
1072 qDebug() << cv_param.toString();
1073 spectrum_identification_result.cvParamList.push_back(cv_param);
1074 }
1075
1076 else if(m_qxmlStreamReader.name() == QString("userParam"))
1077 {
1078 UserParam user_param = readUserParam();
1079 qDebug() << user_param.toString();
1080 }
1081 else
1082 {
1083 m_qxmlStreamReader.raiseError(
1084 QObject::tr("Error in "
1085 "MzIdentML/DataCollection/AnalysisData/"
1086 "SpectrumIdentificationList/"
1087 "SpectrumIdentificationResult, unexpected %1 tag")
1088 .arg(m_qxmlStreamReader.name()));
1089 // m_qxmlStreamReader.skipCurrentElement();
1090 }
1091 }
1092
1093 if(m_qxmlStreamReader.hasError())
1094 return;
1095 qDebug() << m_qxmlStreamReader.name();
1096
1097 // find scan number
1098 spectrum_identification_result.spectrumIndex = 0;
1099 spectrum_identification_result.isSpectrumIndex = false;
1100 spectrum_identification_result.scanNum = 0;
1101 spectrum_identification_result.retentionTime = 0;
1102
1103 // spectrumID="index=194"
1104 if(spectrum_identification_result.spectrumID.startsWith("index="))
1105 {
1106 bool is_ok = false;
1107 spectrum_identification_result.spectrumIndex =
1108 spectrum_identification_result.spectrumID.mid(6).toULongLong(&is_ok);
1109 spectrum_identification_result.isSpectrumIndex = true;
1110 if(!is_ok)
1111 {
1112 m_qxmlStreamReader.raiseError(QObject::tr("reading spectrum index failed in %1"
1113 "SpectrumIdentificationResult id %2")
1114 .arg(spectrum_identification_result.spectrumID)
1115 .arg(spectrum_identification_result.id));
1116 return;
1117 }
1118 }
1119
1120 // <cvParam cvRef="PSI-MS" accession="MS:1001115" name="scan number(s)"
1121 // value="16079"/>
1122 for(auto cvParam : spectrum_identification_result.cvParamList)
1123 {
1124
1125 qDebug() << cvParam.toString();
1126 if(cvParam.accession == "MS:1001115")
1127 {
1128 spectrum_identification_result.scanNum = cvParam.value.toUInt();
1129 }
1130 else if(cvParam.accession == "MS:1003062")
1131 {
1132 spectrum_identification_result.isSpectrumIndex = true;
1133 spectrum_identification_result.spectrumIndex = cvParam.value.toUInt();
1134 }
1135 else if((cvParam.accession == "MS:1000016") || (cvParam.accession == "MS:1000894"))
1136 {
1137 //[Term]
1138 // id: MS:1000894
1139 // name: retention time
1140 // def: "A time interval from the start of chromatography when an
1141 // analyte exits a chromatographic column." [PSI:MS]
1142
1143 // [Term]
1144 // id: MS:1000016
1145 // name: scan start time
1146 // def: "The time that an analyzer started a scan, relative to the
1147 // start of the MS run." [PSI:MS]
1148
1149
1150 spectrum_identification_result.retentionTime = cvParam.value.toDouble();
1151 }
1152 }
1153 if((spectrum_identification_result.scanNum == 0) &&
1154 (spectrum_identification_result.isSpectrumIndex == false))
1155 {
1156 m_qxmlStreamReader.raiseError(QObject::tr("scan number or spectrum index not found in "
1157 "SpectrumIdentificationResult id %1")
1158 .arg(spectrum_identification_result.id));
1159 }
1160
1161
1162 if(spectrum_identification_result.retentionTime == 0)
1163 {
1164 m_qxmlStreamReader.raiseError(
1165 QObject::tr("retention time not found in SpectrumIdentificationResult id %1")
1166 .arg(spectrum_identification_result.id));
1167 }
1168
1169
1170 for(auto spectrumIdentificationItem :
1171 spectrum_identification_result.spectrumIdentificationItemList)
1172 {
1173 // processSpectrumIdentificationItem(spectrum_identification_result,
1174 // spectrumIdentificationItem);
1175 }
1176}
1177
1178void
1180 MzIdentMlReader::SpectrumIdentificationResult &spectrum_identification_result)
1181{
1182
1183 qDebug();
1184 spectrum_identification_result.spectrumIdentificationItemList.push_back(
1186 spectrum_identification_result.spectrumIdentificationItemList.back()
1187 .mzidPeptideEvidenceList.clear();
1188 spectrum_identification_result.spectrumIdentificationItemList.back().cvParamList.clear();
1189 spectrum_identification_result.spectrumIdentificationItemList.back().userParamList.clear();
1190
1191 spectrum_identification_result.spectrumIdentificationItemList.back().chargeState =
1192 m_qxmlStreamReader.attributes().value("chargeState").toUInt();
1193
1194 spectrum_identification_result.spectrumIdentificationItemList.back().experimentalMassToCharge =
1195 m_qxmlStreamReader.attributes().value("experimentalMassToCharge").toDouble();
1196
1197
1198 auto itpeptide =
1199 m_PeptideIdMap.find(m_qxmlStreamReader.attributes().value("peptide_ref").toString());
1200
1201 if(itpeptide == m_PeptideIdMap.end())
1202 {
1203 m_qxmlStreamReader.raiseError(QObject::tr("peptide_ref %1 not defined")
1204 .arg(m_qxmlStreamReader.attributes().value("peptide_ref")));
1205 }
1206 spectrum_identification_result.spectrumIdentificationItemList.back().peptide = itpeptide->second;
1207
1208
1209 while(m_qxmlStreamReader.readNextStartElement())
1210 {
1211 qDebug() << m_qxmlStreamReader.name();
1212 if(m_qxmlStreamReader.name() == QString("PeptideEvidenceRef"))
1213 {
1214
1215 auto itpeptideEvidence = m_MzidPeptideEvidenceIdMap.find(
1216 m_qxmlStreamReader.attributes().value("peptideEvidence_ref").toString());
1217
1218 if(itpeptideEvidence == m_MzidPeptideEvidenceIdMap.end())
1219 {
1220 m_qxmlStreamReader.raiseError(
1221 QObject::tr("peptideEvidence_ref %1 not defined")
1222 .arg(m_qxmlStreamReader.attributes().value("peptideEvidence_ref")));
1223 }
1224
1225 spectrum_identification_result.spectrumIdentificationItemList.back()
1226 .mzidPeptideEvidenceList.push_back(itpeptideEvidence->second);
1227 m_qxmlStreamReader.skipCurrentElement();
1228 }
1229 else if(m_qxmlStreamReader.name() == QString("cvParam"))
1230 {
1231 spectrum_identification_result.spectrumIdentificationItemList.back()
1232 .cvParamList.push_back(readCvParam());
1233 qDebug() << spectrum_identification_result.spectrumIdentificationItemList.back()
1234 .cvParamList.back()
1235 .toString();
1236 }
1237 else if(m_qxmlStreamReader.name() == QString("userParam"))
1238 {
1239 UserParam user_param = readUserParam();
1240 spectrum_identification_result.spectrumIdentificationItemList.back()
1241 .userParamList.push_back(user_param);
1242 qDebug() << user_param.toString();
1243 }
1244 else
1245 {
1246 m_qxmlStreamReader.raiseError(QObject::tr("Error in "
1247 "MzIdentML/DataCollection/AnalysisData/"
1248 "SpectrumIdentificationList/"
1249 "SpectrumIdentificationResult/"
1250 "SpectrumIdentificationItem, unexpected %1 tag")
1251 .arg(m_qxmlStreamReader.name()));
1252 }
1253 }
1254 qDebug();
1255}
1256/*
1257void
1258MzIdentMlReader::processSpectrumIdentificationItem(
1259 SpectrumIdentificationResult &spectrum_identification_result,
1260 const SpectrumIdentificationItem &spectrumIdentificationItem)
1261{
1262
1263
1264 qDebug();
1265
1266 if(spectrum_identification_result.isSpectrumIndex)
1267 {
1268 spectrum_identification_result.scanNum = spectrum_identification_result.spectrumIndex;
1269 }
1270 PeptideEvidence peptide_evidence(
1271 spectrum_identification_result.mzident_source_sp.get()->getMsRunSp().get(),
1272 spectrum_identification_result.scanNum,
1273 spectrum_identification_result.isSpectrumIndex);
1274 peptide_evidence.setRetentionTime(spectrum_identification_result.retentionTime);
1275 peptide_evidence.setCharge(spectrumIdentificationItem.chargeState);
1276 peptide_evidence.setPeptideXtpSp(spectrumIdentificationItem.peptide);
1277 qDebug() << peptide_evidence.getPeptideXtpSp().get()->toAbsoluteString();
1278 peptide_evidence.setChecked(true);
1279 peptide_evidence.setIdentificationDataSource(
1280 spectrum_identification_result.mzident_source_sp.get());
1281 peptide_evidence.setIdentificationEngine(getIdentificationEngine());
1282
1283 peptide_evidence.setExperimentalMassToCharge(spectrumIdentificationItem.experimentalMassToCharge);
1284
1285 qDebug();
1286
1287 // <cvParam cvRef="PSI-MS" accession="MS:1002049"
1288 // name="MS-GF:RawScore" value="356"/> <cvParam cvRef="PSI-MS"
1289 // accession="MS:1002050" name="MS-GF:DeNovoScore" value="369"/>
1290 // <cvParam cvRef="PSI-MS" accession="MS:1002052"
1291 // name="MS-GF:SpecEValue" value="9.149361665076834E-40"/> <cvParam
1292 // cvRef="PSI-MS" accession="MS:1002053" name="MS-GF:EValue"
1293 // value="2.057944235338586E-32"/>
1294 // <userParam name="IsotopeError" value="0"/>
1295 // <userParam name="AssumedDissociationMethod" value="HCD"/>
1296 for(auto cvParam : spectrumIdentificationItem.cvParamList)
1297 {
1298
1299 //<cvParam accession="MS:1002258" cvRef="PSI-MS" value="7"
1300 // name="Comet:matched ions"/>
1301 if(cvParam.accession == "MS:1002049")
1302 {
1303 // PSI-MS MS:1002049 MS-GF:RawScore 356
1304 peptide_evidence.setParam(PeptideEvidenceParam::msgfplus_raw,
1305 QVariant(cvParam.value.toUInt()));
1306 }
1307 else if(cvParam.accession == "MS:1002050")
1308 {
1309 // msgfplus_denovo = 9, ///< MS:1002050 "MS-GF de novo score."
1310 peptide_evidence.setParam(PeptideEvidenceParam::msgfplus_denovo,
1311 QVariant(cvParam.value.toUInt()));
1312 }
1313
1314 else if(cvParam.accession == "MS:1002052")
1315 {
1316 peptide_evidence.setParam(PeptideEvidenceParam::msgfplus_SpecEValue,
1317 QVariant(cvParam.value.toDouble()));
1318 }
1319
1320 else if(cvParam.accession == "MS:1002053")
1321 {
1322 // PSI-MS MS:1002053 MS-GF:EValue
1323 peptide_evidence.setParam(PeptideEvidenceParam::msgfplus_EValue,
1324 QVariant(cvParam.value.toDouble()));
1325 }
1326
1327 else if(cvParam.accession == "MS:1002054")
1328 {
1329 // <cvParam cvRef="PSI-MS" accession="MS:1002054" name="MS-GF:QValue"
1330 // value="0.0"/>
1331 peptide_evidence.setParam(PeptideEvidenceParam::msgfplus_QValue,
1332 QVariant(cvParam.value.toDouble()));
1333 }
1334 else if(cvParam.accession == "MS:1002055")
1335 {
1336 // <cvParam cvRef="PSI-MS" accession="MS:1002055"
1337 // name="MS-GF:PepQValue" value="0.0"/>
1338 peptide_evidence.setParam(PeptideEvidenceParam::msgfplus_PepQValue,
1339 QVariant(cvParam.value.toDouble()));
1340 }
1341 else if(cvParam.accession == "MS:1001331")
1342 {
1343 // PSI-MS MS:1001331 tandem hyperscore
1344 peptide_evidence.setParam(PeptideEvidenceParam::tandem_hyperscore,
1345 QVariant(cvParam.value.toDouble()));
1346 }
1347 else if(cvParam.accession == "MS:1001330")
1348 {
1349 // PSI-MS MS:1001330 X!Tandem:expect
1350 peptide_evidence.setParam(PeptideEvidenceParam::tandem_expectation_value,
1351 QVariant(cvParam.value.toDouble()));
1352 peptide_evidence.setEvalue(cvParam.value.toDouble());
1353 }
1354 else if(cvParam.accession == "MS:1001950")
1355 {
1356 //<cvParam accession="MS:1001950" cvRef="PSI-MS" value="54.90"
1357 // name="PEAKS:peptideScore"/>
1358
1359 peptide_evidence.setParam(PeptideEvidenceParam::peaks_peptide_score,
1360 QVariant(cvParam.value.toDouble()));
1361 }
1362
1363 //msgfplus_energy = 10, ///< MS:1002051 "MS-GF energy score." [PSI:PI]
1364 //msgfplus_SpecEValue = 11, ///< MS:1002052 "MS-GF spectral E-value."
1365 //[PSI:PI] msgfplus_EValue = 12, ///< MS:1002053 "MS-GF E-value."
1366 //[PSI:PI] msgfplus_isotope_error = 13, ///< MS-GF isotope error
1367 //comet_xcorr = 14, ///< MS:1002252 "The Comet result 'XCorr'." [PSI:PI]
1368 //comet_deltacn = 15, ///< MS:1002253 "The Comet result 'DeltaCn'."
1369 //[PSI:PI] comet_deltacnstar = 16, ///< MS:1002254 "The Comet result
1370 //'DeltaCnStar'." [PSI:PI] comet_spscore = 17, ///< MS:1002255 "The Comet
1371 //result 'SpScore'." [PSI:PI] comet_sprank = 18, ///< MS:1002256 "The
1372 //Comet result 'SpRank'." [PSI:PI] comet_expectation_value = 19, ///<
1373 //MS:1002257 "The Comet result 'Expectation value'." [PSI:PI]
1374
1375 else
1376 {
1377 m_qxmlStreamReader.raiseError(
1378 QObject::tr("cvParam %1 is not taken into account").arg(cvParam.toString()));
1379 }
1380 }
1381
1382 qDebug();
1383 for(auto userParam : spectrumIdentificationItem.userParamList)
1384 {
1385 if(userParam.name == "DeepProt:original_count")
1386 {
1387 // <userParam name="DeepProt:original_count" value="7"/>
1388 peptide_evidence.setParam(PeptideEvidenceParam::deepprot_original_count,
1389 QVariant(userParam.value.toUInt()));
1390 }
1391 else if(userParam.name == "DeepProt:fitted_count")
1392 {
1393 // <userParam name="DeepProt:fitted_count" value="7"/>
1394 peptide_evidence.setParam(PeptideEvidenceParam::deepprot_fitted_count,
1395 QVariant(userParam.value.toUInt()));
1396 }
1397 else if(userParam.name == "DeepProt:match_type")
1398 {
1399 peptide_evidence.setParam(
1400 PeptideEvidenceParam::deepprot_match_type,
1401 (std::uint8_t)pappso::DeepProtEnumStr::DeepProtMatchTypeFromString(userParam.value));
1402 }
1403 else if(userParam.name == "DeepProt:status")
1404 {
1405 peptide_evidence.setParam(
1406 PeptideEvidenceParam::deepprot_peptide_candidate_status,
1407 (std::uint8_t)pappso::DeepProtEnumStr::DeepProtPeptideCandidateStatusFromString(
1408 userParam.value));
1409 }
1410 else if(userParam.name == "DeepProt:mass_delta")
1411 {
1412 peptide_evidence.setParam(PeptideEvidenceParam::deepprot_mass_delta,
1413 QVariant(userParam.value.toDouble()));
1414 }
1415 else if(userParam.name == "DeepProt:delta_positions")
1416 {
1417 // DeepProt:delta_positions 4 5 6 7 8 9 10 11 12 13
1418 peptide_evidence.setParam(PeptideEvidenceParam::deepprot_delta_positions,
1419 userParam.value);
1420 }
1421 // <userParam name="IsotopeError" value="0"/>
1422 //<userParam name="AssumedDissociationMethod" value="HCD"/>
1423 }
1424
1425
1426 qDebug();
1427 for(auto mz_peptide_evidence : spectrumIdentificationItem.mzidPeptideEvidenceList)
1428 {
1429 PeptideMatch peptide_match;
1430 peptide_match.setStart(mz_peptide_evidence.start);
1431 peptide_match.setPeptideEvidenceSp(spectrum_identification_result.mzident_source_sp.get()
1432 ->getPeptideEvidenceStore()
1433 .getInstance(&peptide_evidence));
1434
1435
1436 ProteinMatch *p_protein_match =
1437 spectrum_identification_result.identification_group_p->getProteinMatchInstance(
1438 mz_peptide_evidence.protein.get()->getAccession());
1439
1440 p_protein_match->setChecked(true);
1441 // qDebug() << "startElement_protein p_protein_match 3 " <<
1442 // _p_protein_match;
1443 p_protein_match->setProteinXtpSp(mz_peptide_evidence.protein);
1444 p_protein_match->addPeptideMatch(peptide_match);
1445 }
1446 qDebug();
1447}*/
1448
1449void
1451 const SpectrumIdentificationResult &spectrum_identificatio_result)
1452{
1453
1454 mp_cborWriter->startMap(); // one scan
1455 mp_cborWriter->append("id");
1456 mp_cborWriter->startMap(); // id
1457 mp_cborWriter->append("index");
1458 mp_cborWriter->append((qint64)spectrum_identificatio_result.spectrumIndex);
1459 mp_cborWriter->append("native_id");
1460 mp_cborWriter->append(spectrum_identificatio_result.spectrumID);
1461 mp_cborWriter->endMap(); // end id
1462
1463 mp_cborWriter->append("ms2");
1464 mp_cborWriter->startMap(); // ms2
1465 mp_cborWriter->append("rt");
1466 mp_cborWriter->append(spectrum_identificatio_result.retentionTime);
1467 mp_cborWriter->endMap(); // end ms2
1468
1469
1470 uint charge = spectrum_identificatio_result.spectrumIdentificationItemList.front().chargeState;
1471 double exp_mz =
1472 spectrum_identificatio_result.spectrumIdentificationItemList.front().experimentalMassToCharge;
1473
1474 mp_cborWriter->append("precursor");
1475 mp_cborWriter->startMap(); // precursor
1476 mp_cborWriter->append("z");
1477 mp_cborWriter->append(charge);
1478 mp_cborWriter->append("mz");
1479 mp_cborWriter->append(exp_mz);
1480 mp_cborWriter->endMap(); // end precursor
1481
1482
1483 mp_cborWriter->append("psm_list");
1484 mp_cborWriter->startArray(spectrum_identificatio_result.spectrumIdentificationItemList.size());
1485 for(auto &spectrum_ident_item : spectrum_identificatio_result.spectrumIdentificationItemList)
1486 {
1487 if(spectrum_ident_item.chargeState != charge)
1488 { // error
1489 }
1490 if(spectrum_ident_item.experimentalMassToCharge != exp_mz)
1491 { // error
1492 }
1493 writeSpectrumIdentificationItem(spectrum_ident_item);
1494 }
1495 mp_cborWriter->endArray();
1496
1497 mp_cborWriter->endMap(); // end one scan
1498}
1499
1500void
1502 const SpectrumIdentificationItem &spectrum_identification_item)
1503{
1504 mp_cborWriter->startMap(); // psm
1505 mp_cborWriter->append("proforma");
1506 mp_cborWriter->append(spectrum_identification_item.peptide.get()->toProForma());
1507
1508 std::map<QString, std::vector<std::size_t>> map_protein_positions;
1509 for(auto &it_peptide_evidence : spectrum_identification_item.mzidPeptideEvidenceList)
1510 {
1511 auto it_insert =
1512 map_protein_positions.insert({it_peptide_evidence.protein.get()->getAccession(), {}});
1513 it_insert.first->second.push_back(it_peptide_evidence.start);
1514 }
1515 mp_cborWriter->append("protein_list");
1516 mp_cborWriter->startArray(map_protein_positions.size());
1517 for(auto &it_prot_pos : map_protein_positions)
1518 {
1519 mp_cborWriter->startMap();
1520 mp_cborWriter->append("accession");
1521 mp_cborWriter->append(it_prot_pos.first);
1522 mp_cborWriter->append("positions");
1523 mp_cborWriter->writeArray(it_prot_pos.second);
1524 mp_cborWriter->endMap();
1525 }
1526 mp_cborWriter->endArray();
1527
1528
1529 mp_cborWriter->append("eval");
1530 mp_cborWriter->startMap(); // start eval
1531 bool is_ok = false;
1532 is_ok = writeTandemEval(spectrum_identification_item.cvParamList);
1533 mp_cborWriter->endMap(); // end eval
1534
1535 if(!is_ok)
1536 {
1537 throw pappso::PappsoException("This identification engine is not taken into account");
1538 }
1539 mp_cborWriter->endMap(); // end psm
1540}
1541
1542
1543bool
1544MzIdentMlReader::writeTandemEval(const std::vector<CvParam> &cv_param_list)
1545{
1546 // <cvParam accession="MS:1001330" cvRef="PSI-MS" name="X!Tandem:expect"
1547 // value="0.0410883"/>
1548 // <cvParam accession="MS:1001331" cvRef="PSI-MS" name="X!Tandem:hyperscore" value="25.9"/>
1549 auto it_find =
1550 std::find_if(cv_param_list.begin(), cv_param_list.end(), [](const CvParam &cv_param) {
1551 return cv_param.accession == "MS:1001330";
1552 });
1553 if(it_find != cv_param_list.end())
1554 {
1555 mp_cborWriter->append("xtandem");
1556 mp_cborWriter->startMap(); // start tandem
1557 mp_cborWriter->append("evalue");
1558 mp_cborWriter->append(it_find->value.toDouble());
1559 auto it_find =
1560 std::find_if(cv_param_list.begin(), cv_param_list.end(), [](const CvParam &cv_param) {
1561 return cv_param.accession == "MS:1001331";
1562 });
1563 mp_cborWriter->append("hyperscore");
1564 mp_cborWriter->append(it_find->value.toDouble());
1565
1566 mp_cborWriter->endMap(); // end tandem
1567
1568 return true;
1569 }
1570 return false;
1571}
1572
1573} // namespace psm
1574} // namespace cbor
1575} // namespace pappso
static AaModificationP getInstance(const QString &accession)
static AaModificationP getInstanceCustomizedMod(pappso_double modificationMass)
excetion to use when an item type is not recognized
virtual const QString & qwhat() const
static AaModificationP guessAaModificationPbyMonoisotopicMassDelta(Enums::AminoAcidChar aa, pappso_double mass)
Definition utils.cpp:658
static AaModificationP translateAaModificationFromUnimod(const QString &unimod_accession)
Definition utils.cpp:734
static QString getVersion()
Definition utils.cpp:650
virtual bool readFile(const QString &fileName)
overrides QCborStreamWriter base class to provide convenient functions
pappso::cbor::CborStreamWriter * mp_cborWriter
std::map< QString, PeptideSp > m_PeptideIdMap
store association between xml ID and peptide sequence
MzIdentMlReader(pappso::UiMonitorInterface *p_monitor, pappso::cbor::CborStreamWriter *p_output, const QFileInfo &mzident_file)
IdentificationEngine m_identificationEngine
@ MSGFplus
MS:1002048 "MS-GF+ software used to analyze the spectra." [PSI:PI].
@ XTandem
MS:1001476 X!Tandem was used to analyze the spectra.
bool writeTandemEval(const std::vector< CvParam > &cv_param_list)
void writeSpectrumIdentificationItem(const SpectrumIdentificationItem &spectrum_identification_item)
std::map< QString, MzidPeptideEvidence > m_MzidPeptideEvidenceIdMap
store association between xml ID and peptide evidence
pappso::UiMonitorInterface * mp_monitor
void writeSpectrumIdentificationResult(const SpectrumIdentificationResult &spectrum_identificatio_result)
std::map< QString, IdentificationEngine > m_IdentificationEngineMap
store association between xml ID and an identification engine
std::map< QString, MzidSpectraData > m_mzidSpectraDataIdMap
store association between xml ID and SpectraData
std::map< QString, MzidSearchDatabase > m_mzidSearchDatabaseIdMap
store association between xml ID and fasta files
std::map< QString, std::vector< SpectrumIdentificationResult > > m_spectrumIdentificationResultBySpectraIdMap
store all identification results by spectra xml id
void readSpectrumIdentificationItem(SpectrumIdentificationResult &spectrum_identification_result)
std::map< QString, MzidDBSequence > m_MzidDBSequenceIdMap
store association between xml ID and DBSequence
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
const AaModification * AaModificationP
unsigned int uint
Definition types.h:67
std::vector< SpectrumIdentificationItem > spectrumIdentificationItemList
std::shared_ptr< Protein > protein_sp