libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
filterlowintensitysignalremoval.cpp
Go to the documentation of this file.
1/* BEGIN software license
2 *
3 * msXpertSuite - mass spectrometry software suite
4 * -----------------------------------------------
5 * Copyright(C) 2009,...,2021 Filippo Rusconi
6 *
7 * http://www.msxpertsuite.org
8 *
9 * This file is part of the msXpertSuite project.
10 *
11 * The msXpertSuite project is the successor of the massXpert project. This
12 * project now includes various independent modules:
13 *
14 * - massXpert, model polymer chemistries and simulate mass spectrometric data;
15 * - mineXpert, a powerful TIC chromatogram/mass spectrum viewer/miner;
16 *
17 * This program is free software: you can redistribute it and/or modify
18 * it under the terms of the GNU General Public License as published by
19 * the Free Software Foundation, either version 3 of the License, or
20 * (at your option) any later version.
21 *
22 * This program is distributed in the hope that it will be useful,
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 * GNU General Public License for more details.
26 *
27 * You should have received a copy of the GNU General Public License
28 * along with this program. If not, see <http://www.gnu.org/licenses/>.
29 *
30 * END software license
31 */
32
33
34#include <qmath.h>
35
36#include <limits>
37#include <iterator>
38
39#include <QVector>
40#include <QDebug>
41
43
46
47
48namespace pappso
49{
50
51
53 double std_dev,
54 double threshold)
55{
56 // qDebug() << "Building with parameters: [ mean:" << mean << "- std_dev:" << std_dev
57 // << "- threshold" << threshold << "]";
58
59 m_noiseMean = mean;
60 m_noiseStdDev = std_dev;
61 m_threshold = threshold;
62}
63
64
66{
67 // qDebug() << "Building with parameters:" << parameters;
68
69 buildFilterFromString(parameters);
70}
71
72
80
81
85
86
89{
90 if(&other == this)
91 return *this;
92
96
97 return *this;
98}
99
100
101void
103{
104 // qDebug();
105
106 // Typical string: "FilterLowIntensitySignalRemoval|2000"
107 if(parameters.startsWith(QString("%1|").arg(name())))
108 {
109 QStringList params = parameters.split("|").back().split(";");
110
111 if(params.size() != 3)
113 QString("Building of FilterLowIntensitySignalRemoval from string '%1' failed. Expected "
114 "name and three parameters.")
115 .arg(parameters));
116
117 m_noiseMean = params.at(0).toDouble();
118 m_noiseStdDev = params.at(1).toDouble();
119 m_threshold = params.at(2).toDouble();
120 }
121 else
122 {
124 QString("Building of FilterLowIntensitySignalRemoval from string %1 failed")
125 .arg(parameters));
126 }
127}
128
129
130std::size_t
132{
133 // We want to iterate in the trace and check if we can get the apicess
134 // isolated one by one. An apex is nothing more than the top cusp of a
135 // triangle. We only store apices that have a value > m_threshold.
136
137 m_clusters.clear();
138
139 std::size_t trace_size = trace.size();
140 if(trace_size <= 2)
141 {
142 // qDebug() << "The original trace has less than 3 points. Returning it "
143 //"without modification.";
144 return m_clusters.size();
145 }
146 // else
147 // qDebug() << "The original trace has" << trace_size << "data points";
148
149 // Seed the system with the first point of the trace.
150 m_curIter = trace.cbegin();
151 m_prevIter = trace.cbegin();
152
153 // qDebug() << "First trace point: "
154 //<< "(" << m_prevIter->x << "," << m_prevIter->y << ");";
155
156 // We still have not found any apex!
157 m_prevApex = trace.cend();
158
159 // We will need to know if we were ascending to an apex.
160 bool was_ascending_to_apex = false;
161
162 // Prepare a first apex cluster.
163 ApicesSPtr cluster_apices = std::make_shared<ClusterApices>();
164
165 // Now that we have seeded the system with the first point of the original
166 // trace, go to the next one.
167 ++m_curIter;
168
169 // And now iterate in the original trace and make sure we detect all the
170 // apicess.
171
172 while(m_curIter != trace.cend())
173 {
174 // qDebug() << "Current trace point: "
175 //<< "(" << m_curIter->x << "," << m_curIter->y << ");";
176
177 // We monitor if we are going up or down a peak.
178
179 if(m_curIter->y > m_prevIter->y)
180 // We are ascending a peak. We do not know if we are at the apex.
181 {
182 // qDebug().noquote() << "We are ascending to an apex.\n";
183 was_ascending_to_apex = true;
184 }
185 else
186 // We are descending a peak.
187 {
188 // qDebug().noquote() << "Descending a peak. ";
189
190 // There are two situations:
191 //
192 // 1. Either we were ascending to an apex, and m_prev is that apex,
193 //
194 // 2. Or we were not ascending to an apex and in fact all we are doing
195 // is going down an apex that occurred more than one trace point ago.
196
197 if(!was_ascending_to_apex)
198 // We were not ascending to an apex.
199 {
200 // Do nothing here.
201
202 // qDebug().noquote()
203 //<< "But, we were not ascending to an apex, so do nothing.\n";
204 }
205 else
206 // We are effectively descending a peak right after the apex was
207 // reached at the previous iteration.
208 {
210
211 // qDebug().noquote()
212 //<< "And, we were ascending to an apex, so "
213 //"m_curApex has become m_prevIter: ("
214 //<< m_curApex->x << "," << m_curApex->y << ")\n";
215
216 // We might have two situations:
217
218 // 1. We had already encountered an apex.
219 // 2. We had not yet encountered an apex.
220
221 if(m_prevApex != trace.cend())
222 // We had already encountered an apex.
223 {
224 // Was that apex far on the left of the current apex ?
225
227 // The distance is not compatible with both apices to belong
228 // to the same apices cluster.
229 {
230 // We are creating a new isotopic apices.
231
232 // But, since another apex had been encountered already,
233 // that means that an isotopic apices was cooking
234 // already. We must store it.
235
236 if(!cluster_apices->size())
237 qFatal("Cannot be that the apices has no apex.");
238
239 // Store the crafted apices cluster.
240 m_clusters.push_back(cluster_apices);
241
242 // qDebug().noquote()
243 //<< "There was a previous apex already, BUT "
244 //"outside of apices range. "
245 //"Pushing the cooking apices that has size:"
246 //<< cluster_apices->size();
247
248 // Now create a brand new apices for later work.
249 cluster_apices = std::make_shared<ClusterApices>();
250
251 // qDebug() << "Created a brand new apices.";
252
253 // We only start the new apices with the current apex if
254 // that apex is above the threshold.
255
256 if(m_curApex->y > m_threshold)
257 {
258 // And start it with the current apex.
259 cluster_apices->push_back(m_curApex);
260
261 // qDebug()
262 //<< "Since the current apex is above the threshold, "
263 //"we PUSH it to the newly created apices: ("
264 //<< m_curApex->x << "," << m_curApex->y << ")";
265
267
268 // qDebug() << "Set prev apex to be cur apex.";
269 }
270 else
271 {
272 // qDebug()
273 //<< "Since the current apex is below the threshold, "
274 //"we DO NOT push it to the newly created "
275 //"apices: ("
276 //<< m_curApex->x << "," << m_curApex->y << ")";
277
278 // Since previous apex went to the closed apices, we
279 // need to reset it.
280
281 m_prevApex = trace.cend();
282
283 // qDebug() << "Since the previous apex went to the "
284 //"closed apices, and cur apex has too "
285 //"small an intensity, we reset prev apex "
286 //"to trace.cend().";
287 }
288 }
289 else
290 // The distance is compatible with both apices to belong to
291 // the same isotopic apices.
292 {
293
294 // But we only push back the current apex to the apices
295 // if its intensity is above the threshold.
296
297 if(m_curApex->y > m_threshold)
298 // The current apex was above the threshold
299 {
300 cluster_apices->push_back(m_curApex);
301
302 // qDebug().noquote()
303 //<< "There was an apex already inside of apices "
304 //"range. "
305 //"AND, since the current apex was above the "
306 //"threshold, we indeed push it to apices.\n";
307
308 // qDebug().noquote()
309 //<< "Current apex PUSHED: " << m_curApex->x << ", "
310 //<< m_curApex->y;
311
313
314 // qDebug() << "We set prev apex to be cur apex.";
315 }
316 else
317 {
318 // qDebug().noquote()
319 //<< "There was an apex already inside of apices "
320 //"range. "
321 //"BUT, since the current apex was below the "
322 //"threshold, we do not push it to apices.\n";
323
324 // qDebug().noquote()
325 //<< "Current apex NOT pushed: " << m_curApex->x
326 //<< ", " << m_curApex->y;
327 }
328 }
329 }
330 else
331 // No apex was previously found. We are fillin-up a new isotopic
332 // apices.
333 {
334 if(m_curApex->y > m_threshold)
335 // We can actually add that apex to a new isotopic apices.
336 {
337 if(cluster_apices->size())
338 qCritical(
339 "At this point, the apices should be new and "
340 "empty.");
341
342 cluster_apices->push_back(m_curApex);
343
344 // qDebug().noquote()
345 //<< "No previous apex was found. Since current apex' "
346 //"intensity is above threshold, we push it back to "
347 //"the "
348 //"apices.\n";
349
350 // Store current apex as previous apex for next rounds.
352
353 // qDebug().noquote() << "And thus we store the current "
354 //"apex as previous apex.\n";
355 }
356 else
357 {
358 // qDebug().noquote()
359 //<< "No previous apex was found. Since current apex' "
360 //"intensity is below threshold, we do nothing.\n";
361 }
362 }
363 }
364 // End of
365 // ! if(!was_ascending_to_apex)
366 // That is, we were ascending to an apex.
367
368 // Tell what it is: we were not ascending to an apex.
369 was_ascending_to_apex = false;
370 }
371 // End of
372 // ! if(m_curIter->y > m_prevIter->y)
373 // That is, we are descending a peak.
374
375 // At this point, prepare next round.
376
377 // qDebug().noquote()
378 //<< "Preparing next round, with m_prevIter = m_curIter and ++index.\n";
379
381
382 ++m_curIter;
383 }
384 // End of
385 // while(index < trace_size)
386
387 // At this point, if a apices had been cooking, add it.
388
389 if(cluster_apices->size())
390 m_clusters.push_back(cluster_apices);
391
392 return m_clusters.size();
393}
394
395
396Trace::const_iterator
398 Trace::const_iterator iter,
399 double distance_threshold)
400{
401 // We receive an iterator that points to an apex. We want iterate back in the
402 // trace working copy and look if there are apices that are distant less than
403 // 1.1~Th.
404
405 Trace::const_iterator init_iter = iter;
406
407 if(iter == trace.cbegin())
408 return iter;
409
410 // Seed the previous iter to iter, because we'll move from there right away.
411 Trace::const_iterator prev_iter = init_iter;
412
413 Trace::const_iterator last_apex_iter = init_iter;
414
415 // We will need to know if we were ascending to an apex.
416 bool was_ascending_to_apex = false;
417
418 // Now that we have seeded the system, we can move iter one data point:
419 --iter;
420
421 while(iter != trace.cbegin())
422 {
423 // If we are already outside of distance_threshold, return the last apex
424 // that was found (or initial iter if none was encountered)
425
426 if(abs(init_iter->x - iter->x) >= distance_threshold)
427 return last_apex_iter;
428
429 if(iter->y > prev_iter->y)
430 {
431 // New data point has greater intensity. Just record that fact.
432 was_ascending_to_apex = true;
433 }
434 else
435 {
436 // New data point has smaller intensity. We are descending a peak.
437
438 if(was_ascending_to_apex)
439 {
440 // We had an apex at previous iter. We are inside the distance
441 // threshold. That is good. But we still could find another apex
442 // while moving along the trace that is in the distance threshold.
443 // This is why we keep going, but we store the previous iter as
444 // the last encountered apex.
445
446 last_apex_iter = prev_iter;
447 }
448 }
449
450 prev_iter = iter;
451
452 // Move.
453 --iter;
454 }
455
456 // qDebug() << "Init m/z: " << init_iter->x
457 //<< "Returning m/z: " << last_apex_iter->x
458 //<< "at distance:" << std::distance(last_apex_iter, init_iter);
459
460 // At this point last_apex_iter is the same as the initial iter.
461 return last_apex_iter;
462}
463
464
465Trace::const_iterator
467 Trace::const_iterator iter,
468 double distance_threshold)
469{
470 // We receive an iterator that points to an apex. We want iterate back in the
471 // trace working copy and look if there are apices that are distant less than
472 // 1.1~Th.
473
474 Trace::const_iterator init_iter = iter;
475
476 if(iter == trace.cend())
477 return iter;
478
479 // Seed the previous iter to iter, because we'll move from there right away.
480 Trace::const_iterator prev_iter = init_iter;
481
482 Trace::const_iterator last_apex_iter = init_iter;
483
484 // We will need to know if we were ascending to an apex.
485 bool was_ascending_to_apex = false;
486
487 // Now that we have seeded the system, we can move iter one data point:
488 ++iter;
489
490 while(iter != trace.cend())
491 {
492 // If we are already outside of distance_threshold, return the last apex
493 // that was found (or initial iter if none was encountered)
494
495 // FIXME: maybe we should compare prev_iter->x with iter->x so that we
496 // continue moving if all the succcessive apices found are each one in the
497 // distance_threshold from the previous one ?
498
499 if(abs(init_iter->x - iter->x) >= distance_threshold)
500 return last_apex_iter;
501
502 if(iter->y > prev_iter->y)
503 {
504 // New data point has greater intensity. Just record that fact.
505 was_ascending_to_apex = true;
506 }
507 else
508 {
509 // New data point has smaller intensity. We are descending a peak.
510
511 if(was_ascending_to_apex)
512 {
513 // We had an apex at previous iter. We are inside the distance
514 // threshold. That is good. But we still could find another apex
515 // while moving along the trace that is in the distance threshold.
516 // This is why we keep going, but we store the previous iter as
517 // the last encountered apex.
518
519 last_apex_iter = prev_iter;
520 }
521 }
522
523 prev_iter = iter;
524
525 // Move.
526 ++iter;
527 }
528
529 // At this point last_apex_iter is the same as the initial iter.
530 return last_apex_iter;
531}
532
533
534Trace
536{
537 // We start from the vector of apices and try to remake a high resolution
538 // trace out of these apices.
539
540 MapTrace map_trace;
541
542 // The general idea is that apices were detected, and only apices having
543 // their intensity above the threshold were stored. That means that we need to
544 // add points to the trace to reconstruct a meaningful trace. Indeed, imagine
545 // a heavy peptide isotopic cluster: the first peak's apex is below threshold
546 // and was not stored. The second peak is also below. But the third isotopic
547 // cluster peak is above and was stored.
548 //
549 // How do we reconstruct the trace to have all these points that were
550 // preceding the first isotopic cluster apex that was detected.
551 //
552 // The same applies to the last peaks of the cluster that are below the
553 // threshold whil the preceeding ones were above!
554
555 // The general idea is to iterate in the vector of apices and for each apex
556 // that is encountered, ask if there were apices
557
558 // m_clusters is a vector that contains vectors of Trace::const_iter.
559 // Each vector in m_clusters should represent all the apices of a cluster.
560
561 // qDebug() << "Reconstructing trace with " << m_clusters.size() <<
562 // "clusters.";
563
564 Trace::const_iterator left_begin_iter = trace.cend();
565 Trace::const_iterator right_end_iter = trace.cend();
566
567 for(auto &&cluster_apices : m_clusters)
568 {
569 // cluster_apices is a vector of Trace::const_iterator. If we want to
570 // reconstruct the Trace, we need to iterate through all the DataPoint
571 // objects in between cluster_apices.begin() and cluster_apices.end().
572
573 Trace::const_iterator begin_iter = *(cluster_apices->begin());
574 Trace::const_iterator end_iter = *(std::prev(cluster_apices->end()));
575
576 // qDebug() << "Iterating in a new cluster apices with boundaries:"
577 //<< begin_iter->x << "-" << end_iter->x;
578
579 left_begin_iter = backwardFindApex(trace, begin_iter, INTRA_CLUSTER_INTER_PEAK_DISTANCE);
580
581 // qDebug() << "After backwardFindApex, left_begin_iter points to:"
582 //<< left_begin_iter->toString() << "with distance:"
583 //<< std::distance(left_begin_iter, begin_iter);
584
585 right_end_iter = forwardFindApex(trace, end_iter, 1.5 * INTRA_CLUSTER_INTER_PEAK_DISTANCE);
586
587 for(Trace::const_iterator iter = left_begin_iter; iter != right_end_iter; ++iter)
588 {
589 map_trace[iter->x] = iter->y;
590 }
591
592 // Now reset the left and right iters to intensity 0 to avoid having
593 // disgraceful oblique lines connnecting trace segments.
594
595 map_trace[left_begin_iter->x] = 0;
596 map_trace[std::prev(right_end_iter)->x] = 0;
597 }
598
599 return map_trace.toTrace();
600}
601
602
603Trace &
605{
606 // qDebug();
607
608 // Horrible hack to have a non const filtering process.
609 return const_cast<FilterLowIntensitySignalRemoval *>(this)->nonConstFilter(trace);
610}
611
612
613Trace &
615{
616 // qDebug();
617
618 if(trace.size() <= 2)
619 {
620 // qDebug() << "The original trace has less than 3 points. Returning it "
621 //"without modification.";
622 return trace;
623 }
624 // else
625 // qDebug() << "The original trace had" << trace.size() << "data points";
626
627 std::size_t cluster_count = detectClusterApices(trace);
628
629 // qDebug() << "Number of detected cluster apices: " << cluster_count;
630
631 if(!cluster_count)
632 return trace;
633
634 // At this point we want to work on the apices and reconstruct a full
635 // trace.
636
637 Trace reconstructed_trace = reconstructTrace(trace);
638
639 trace = std::move(reconstructed_trace);
640
641 return trace;
642}
643
644
645double
650
651
652//! Return a string with the textual representation of the configuration data.
653QString
655{
656 return QString("%1|%2;%3;%4")
657 .arg(name())
658 .arg(QString::number(m_noiseMean, 'f', 2))
659 .arg(QString::number(m_noiseStdDev, 'f', 2))
660 .arg(QString::number(m_threshold, 'f', 2));
661}
662
663
664QString
666{
667 return "FilterLowIntensitySignalRemoval";
668}
669
670} // namespace pappso
excetion to use when an item type is not recognized
Redefines the floor intensity of the Trace.
Trace::const_iterator backwardFindApex(const Trace &trace, Trace::const_iterator iter, double distance_threshold)
FilterLowIntensitySignalRemoval(double mean, double std_dev, double threshold)
Trace & filter(Trace &data_points) const override
Trace::const_iterator forwardFindApex(const Trace &trace, Trace::const_iterator iter, double distance_threshold)
FilterLowIntensitySignalRemoval & operator=(const FilterLowIntensitySignalRemoval &other)
void buildFilterFromString(const QString &strBuildParams) override
build this filter using a string
QString toString() const override
Return a string with the textual representation of the configuration data.
Trace toTrace() const
Definition maptrace.cpp:214
A simple container of DataPoint instances.
Definition trace.h:152
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39