ProteoWizard
MinimumPepXML.hpp
Go to the documentation of this file.
1//
2// $Id$
3//
4//
5// Original author: Kate Hoff <katherine.hoff@proteowizard.org>
6//
7// Copyright 2009 Spielberg Family Center for Applied Proteomics
8// Cedars-Sinai Medical Center, Los Angeles, California 90048
9//
10// Licensed under the Apache License, Version 2.0 (the "License");
11// you may not use this file except in compliance with the License.
12// You may obtain a copy of the License at
13//
14// http://www.apache.org/licenses/LICENSE-2.0
15//
16// Unless required by applicable law or agreed to in writing, software
17// distributed under the License is distributed on an "AS IS" BASIS,
18// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19// See the License for the specific language governing permissions and
20// limitations under the License.
21//
22
23
24#ifndef _MINIMUMPEPXML_HPP_
25#define _MINIMUMPEPXML_HPP_
26
29#include "boost/shared_ptr.hpp"
30#include "boost/logic/tribool.hpp"
31
32#include <iostream>
33#include <stdexcept>
34
35using namespace pwiz::minimxml;
36using namespace pwiz::data::peakdata;
37
38namespace pwiz{
39namespace data{
40namespace pepxml{
41
42void setLogStream(std::ostream& os);
43
45{
46 Specificity() : minSpace(1) {}
47
48 /// One or more 1-letter residue codes. Enzyme cleaves on the
49 /// sense side of the residue(s) listed in cut unless one of the
50 /// residues listed in no_cut is adjacent to the potential
51 /// cleavage site.
52 std::string cut;
53
54 /// Zero or more 1-letter residue codes. Enzyme cleaves on the
55 /// sense side of the residue(s) listed in cut unless one of the
56 /// residues listed in no_cut is adjacent to the potential
57 /// cleavage site.
58 std::string noCut;
59
60 /// Defines whether cleavage occurs on the C-terminal or
61 /// N-terminal side of the residue(s) listed in cut (values "C" or
62 /// "N")
63 std::string sense;
64
65 /// minimum separation between adjacent cleavages. default 1.
66 size_t minSpace;
67
68 void write(XMLWriter& writer) const;
69 void read(std::istream& is);
70
71 bool operator==(const Specificity& that) const;
72 bool operator!=(const Specificity& that) const;
73
74};
75
77{
78 SampleEnzyme() : independent(boost::indeterminate) {}
79
80 /// Controlled code name for the enzyme that can be referred to by
81 /// applications.
82 std::string name;
83
84 /// Free text to describe alternative names, special conditions,
85 /// etc.
86 std::string description;
87
88 /// Semispecific means that at least one end of a pepide must
89 /// conform to the cleavage specificity, (unless the peptide was
90 /// at the terminus of the parent sequence). Nonspecific means
91 /// that neither end of a peptide must conform to the cleavage
92 /// specificity.
93 std::string fidelity;
94
95 /// If there are multiple specificities and independent is true,
96 /// then a single peptide cannot exhibit one specificity at one
97 /// terminus and a different specificity at the other. If
98 /// independent is false, then a single peptide can exhibit mixed
99 /// specificities.
100 boost::tribool independent;
101
103
104 void write(XMLWriter& writer) const;
105 void read(std::istream& is);
106
107 bool operator==(const SampleEnzyme& that) const;
108 bool operator!=(const SampleEnzyme& that) const;
109
110};
111
113{
115
116 std::string localPath;
117 std::string databaseName;
121 std::string type;
122
123 void write(XMLWriter& writer) const;
124 void read(std::istream& is);
125
126 bool operator==(const SearchDatabase& that) const;
127 bool operator!=(const SearchDatabase& that) const;
128
129};
130
132{
133 Q3RatioResult() : lightFirstScan(0), lightLastScan(0), lightMass(0), heavyFirstScan(0), heavyLastScan(0), heavyMass(0), lightArea(0), heavyArea(0), q2LightArea(0), q2HeavyArea(0), decimalRatio(0) {}
134
137 double lightMass;
140 double heavyMass;
141 double lightArea;
142 double heavyArea;
146
147 void write(XMLWriter& writer) const;
148 void read(std::istream& is);
149
150 bool operator==(const Q3RatioResult& that) const;
151 bool operator!=(const Q3RatioResult& that) const;
152
153};
154
156{
157 double min_prob;
159 double error;
162};
163
165{
166 double error;
167 double min_prob;
170};
171
185
187{
189 std::string comments;
194
195 // TODO Child tags go here... Don't forget to add the struct for
196 // them too
197};
198
200{
201 std::string version;
202 std::string author;
203 double min_prob;
204 std::string options;
206
207 std::vector<std::string> inputFile;
208 std::vector<RocDataPoint> roc_data_point;
209 std::vector<ErrorPoint> error_point;
210 std::vector<DistributionPoint> distribution_point;
211 std::vector<MixtureModel> mixture_model;
212};
213
215{
217
219 std::vector<double> allNttProb;
220 std::string analysis;
221
222 // TODO add search_score_summary and its parameter (2+ occurances)
223
224 void write(XMLWriter& writer) const;
225 void read(std::istream& is);
226
227 bool operator==(const PeptideProphetResult& that) const;
228 bool operator!=(const PeptideProphetResult& that) const;
229
230};
231
233{
234 AnalysisResult() : analysis("peptideprophet_result") {}
235
236 std::string analysis;
239
240 void write(XMLWriter& writer) const;
241 void read(std::istream& is);
242
243 bool operator==(const AnalysisResult& that) const;
244 bool operator!=(const AnalysisResult& that) const;
245
246};
247
249{
251
252 std::string protein;
253 std::string proteinDescr;
254 std::string numTolTerm;
255
256 void write(XMLWriter& writer) const;
257 void read(std::istream& is);
258
259 bool operator==(const AlternativeProtein& that) const;
260 bool operator!=(const AlternativeProtein& that) const;
261
262};
263
265{
266 ModAminoAcidMass() : position(0), mass(0) {}
267
269 double mass;
270
271 void write(XMLWriter& writer) const;
272 void read(std::istream& is);
273
274 bool operator==(const ModAminoAcidMass& that) const;
275 bool operator!=(const ModAminoAcidMass& that) const;
276
277};
278
280{
282
283 std::string modifiedPeptide;
285
286 void write(XMLWriter& writer) const;
287 void read(std::istream& is);
288
289 bool operator==(const ModificationInfo& that) const;
290 bool operator!=(const ModificationInfo& that) const;
291
292};
293
295{
296 Parameter(const std::string& name = "", const std::string& value = "")
297 : name(name), value(value)
298 {}
299
300 std::string name;
301 std::string value;
302
303 void write(XMLWriter& writer) const;
304 void read(std::istream& is);
305
306 bool operator==(const Parameter& that) const;
307 bool operator!=(const Parameter& that) const;
308};
309
310typedef boost::shared_ptr<Parameter> ParameterPtr;
311
312
314{
315 SearchScore(const std::string& name = "", const std::string& value = "")
316 : Parameter(name, value)
317 {}
318
319 void write(XMLWriter& writer) const;
320 void read(std::istream& is);
321
322};
323
324typedef boost::shared_ptr<SearchScore> SearchScorePtr;
325
326
328{
329 SearchHit() : hitRank(0),numTotalProteins(0), numMatchedIons(0), totalNumIons(0), calcNeutralPepMass(0), massDiff(0), numTolTerm(0), numMissedCleavages(0), isRejected(0) {}
330
332 std::string peptide;
333 std::string peptidePrevAA;
334 std::string peptideNextAA;
335 std::string protein;
336 std::string proteinDescr;
341 double massDiff;
344 int isRejected; // bool?
346 std::vector<AlternativeProtein> alternativeProteins;
348
349 std::vector<SearchScorePtr> searchScore;
350
351 void write(XMLWriter& writer) const;
352 void read(std::istream& is);
353
354 bool operator==(const SearchHit& that) const;
355 bool operator!=(const SearchHit& that) const;
356
357};
358
359typedef boost::shared_ptr<SearchHit> SearchHitPtr;
360
361
362PWIZ_API_DECL bool operator==(const SearchHitPtr left, const SearchHitPtr right);
363
365{
366 SearchResult(size_t searchId = 0) :searchId(searchId){}
367
368 /// Unique identifier to search summary
369 size_t searchId;
370
371 std::vector<SearchHitPtr> searchHit;
372
373 void write(XMLWriter& writer) const;
374 void read(std::istream& is);
375
376 bool operator==(const SearchResult& that) const;
377 bool operator!=(const SearchResult& that) const;
378
379};
380
381typedef boost::shared_ptr<SearchResult> SearchResultPtr;
382
384
385
387{
388 EnzymaticSearchConstraint() : maxNumInternalCleavages(0), minNumTermini(0){}
389
390 std::string enzyme;
393
394 void write(XMLWriter& writer) const;
395 void read(std::istream& is);
396
397 bool operator==(const EnzymaticSearchConstraint& that) const;
398 bool operator!=(const EnzymaticSearchConstraint& that) const;
399
400};
401
403{
404 AminoAcidModification() : massDiff(0), mass(0) {}
405
406 std::string aminoAcid;
407 double massDiff;
408 double mass;
409 std::string variable;
410 std::string peptideTerminus;
411 std::string binary;
412 std::string description;
413 std::string symbol;
414
415 void write(XMLWriter& writer) const;
416 void read(std::istream& is);
417
418 bool operator==(const AminoAcidModification& that) const;
419 bool operator!=(const AminoAcidModification& that) const;
420
421};
422
423/// Database search settings
425{
427
428 /// Full path location of mzXML file for this search run (without
429 /// the .mzXML extension)
430 std::string baseName;
431
432 /// SEQUEST, Mascot, COMET, etc
433 std::string searchEngine;
434
435 /// average or monoisotopic
436 std::string precursorMassType;
437
438 /// average or monoisotopic
439 std::string fragmentMassType;
440
441 /// Format of file storing the runner up peptides (if not present
442 /// in pepXML)
443 std::string searchID;
444
445 /// runner up search hit data type extension (e.g. .tgz)
447
448 /// matches id in search hit
449 size_t search_id;
450
452 std::vector<AminoAcidModification> aminoAcidModifications;
453
454 std::vector<ParameterPtr> parameters;
455
456 void write(XMLWriter& writer) const;
457 void read(std::istream& is);
458
459 bool operator==(const SearchSummary& that) const;
460 bool operator!=(const SearchSummary& that) const;
461
462};
463
464typedef boost::shared_ptr<SearchSummary> SearchSummaryPtr;
465
467
468
469/// Reference for analysis applied to current run (time corresponds
470/// with analysis_summary/@time, id corresponds with
471/// analysis_result/@id)
473{
474 /// Date of analysis
475 std::string time;
476
477 /// Analysis name
478 std::string analsysis;
479
480 /// Unique identifier for each type of analysis
481 size_t id;
482
483 // Evil ##any data goes here
484};
485
486
488{
489 SpectrumQuery() : startScan(0), endScan(0), precursorNeutralMass(0), assumedCharge(0), index(0), retentionTimeSec(0) {}
490
491 std::string spectrum;
492
493 /// first scan number integrated into MS/MS spectrum
495
496 /// last scan number integrated into MS/MS spectrum
498
500
501 /// Precursor ion charge used for search
503
504 /// Search constraint applied specifically to this query
505 int index;
506
507 /// Unique identifier
509
510 std::vector<SearchResultPtr> searchResult;
511
512 void write(XMLWriter& writer) const;
513 void read(std::istream& is);
514
515 bool operator==(const SpectrumQuery& that) const;
516 bool operator!=(const SpectrumQuery& that) const;
517
518};
519
520typedef boost::shared_ptr<SpectrumQuery> SpectrumQueryPtr;
521
523
525{
527
528 std::string base_name;
529 std::string raw_data_type;
530 std::string raw_data;
531 std::string msManufacturer;
532 std::string msModel;
533 std::string msIonization;
534 std::string msMassAnalyzer;
535 std::string msDetector;
536
538 std::vector<SearchSummaryPtr> searchSummary;
539 std::vector<SpectrumQueryPtr> spectrumQueries;
540
541 void write(XMLWriter& writer) const;
542 void read(std::istream& is);
543
544 bool operator==(const MSMSRunSummary& that) const;
545 bool operator!=(const MSMSRunSummary& that) const;
546
547};
548
550{
551 /// Time analysis complete (unique id)
552 std::string time;
553
554 /// Name of analysis program
555 std::string analysis;
556
557 /// Release
558 std::string version;
559
560 // All the unknown stuff goes here
561
562 // TODO deal with the results of
563 // <xs:any namespace="##any" processContents="lax" minOccurs="0">
564 std::vector<PeptideProphetSummary> peptideprophet_summary;
565};
566
567typedef boost::shared_ptr<AnalysisSummary> AnalysisSummaryPtr;
568
569
571{
572 size_t number;
573
574 /// File from which derived
575 std::string parent_file;
576
577 std::string windows_parent;
578
579 /// filtering criteria applied to data
580 std::string description;
581};
582
583typedef boost::shared_ptr<DataFilter> DataFilterPtr;
584
585
586/// Source and filtering criteria used to generate dataset
588{
589 /// number preceding filter generations
591
592 std::vector<DataFilterPtr> dataFilters;
593};
594
595typedef boost::shared_ptr<DatasetDerivation> DatasetDerivationPtr;
596
597
599{
601
602 std::string date;
603 std::string summaryXML;
604 std::string xmlns;
605 std::string xmlnsXSI;
606 std::string XSISchemaLocation;
607
608 /// full path file name of mzXML (minus the .mzXML)
609 std::string baseName;
610
611 /// raw data type extension (e.g. .mzXML)
612 std::string raw_data_type;
613
614 /// raw data type extension (e.g. .mzXML)
615 std::string raw_data;
616
617 /// Manufacturer of MS/MS instrument
618 std::string msManufacturer;
619
620 /// Instrument model (cf mzXML)
621 std::string msModel;
622
623 /// Instrument model (cf mzXML)
624 std::string msIonization;
625
626 /// Ion trap, etc (cf mzXML)
627 std::string msMassAnalyzer;
628
629 /// EMT, etc(cf mzXML)
630 std::string msDetector;
631
635
636 void write(XMLWriter& writer) const;
637 void read(std::istream& is);
638
639 bool operator==(const MSMSPipelineAnalysis& that) const;
640 bool operator!=(const MSMSPipelineAnalysis& that) const;
641
642};
643
645{
646 Match() : score(0), feature(new Feature()) {}
647 Match(const SpectrumQuery& _spectrumQuery, FeaturePtr _feature, double _score = 0) : score(_score), spectrumQuery(_spectrumQuery), feature(_feature) {}
648
649 double score;
651 double massDeviation; // ( feature mz - proton mass ) * charge - calculatedMass (not absolute val!)
652
655
656 void write(minimxml::XMLWriter& writer) const;
657 void read(std::istream& is);
658
659 bool operator==(const Match& that) const;
660 bool operator!=(const Match& that) const;
661
662private:
665
666};
667
668typedef boost::shared_ptr<Match> MatchPtr;
669
670PWIZ_API_DECL bool operator==(const MatchPtr left, const MatchPtr right);
671
672
674{
676 MatchData(std::string wfc, std::string snc) : warpFunctionCalculator(wfc), searchNbhdCalculator(snc) {}
677 MatchData(std::vector<MatchPtr> _matches) : matches(_matches){}
678
681 std::vector<MatchPtr> matches;
682
683 void write(minimxml::XMLWriter& writer) const;
684 void read(std::istream& is);
685
686 bool operator==(const MatchData& that) const;
687 bool operator!=(const MatchData& that) const;
688
689};
690
691} // namespace pepxml
692} // namespace data
693} // namespace pwiz
694
695
696
697#endif // _MINIMUMPEPXML_HPP_
698
699// LocalWords: RatioResult
#define PWIZ_API_DECL
Definition Export.hpp:32
double probability(const vector< double > &p, const vector< int > &i)
The XMLWriter class provides simple, tag-level XML syntax writing.
Definition XMLWriter.hpp:48
boost::shared_ptr< Feature > FeaturePtr
Definition PeakData.hpp:292
boost::shared_ptr< AnalysisSummary > AnalysisSummaryPtr
boost::shared_ptr< SearchResult > SearchResultPtr
boost::shared_ptr< SearchSummary > SearchSummaryPtr
boost::shared_ptr< DatasetDerivation > DatasetDerivationPtr
boost::shared_ptr< Parameter > ParameterPtr
boost::shared_ptr< SearchScore > SearchScorePtr
boost::shared_ptr< Match > MatchPtr
boost::shared_ptr< DataFilter > DataFilterPtr
PWIZ_API_DECL bool operator==(const SearchHitPtr left, const SearchHitPtr right)
boost::shared_ptr< SpectrumQuery > SpectrumQueryPtr
void setLogStream(std::ostream &os)
boost::shared_ptr< SearchHit > SearchHitPtr
void write(XMLWriter &writer) const
bool operator==(const AlternativeProtein &that) const
bool operator!=(const AlternativeProtein &that) const
bool operator!=(const AminoAcidModification &that) const
bool operator==(const AminoAcidModification &that) const
void write(XMLWriter &writer) const
void write(XMLWriter &writer) const
void read(std::istream &is)
bool operator!=(const AnalysisResult &that) const
bool operator==(const AnalysisResult &that) const
PeptideProphetResult peptideProphetResult
std::string analysis
Name of analysis program.
std::string time
Time analysis complete (unique id)
std::vector< PeptideProphetSummary > peptideprophet_summary
Reference for analysis applied to current run (time corresponds with analysis_summary/@time,...
size_t id
Unique identifier for each type of analysis.
std::string time
Date of analysis.
std::string analsysis
Analysis name.
std::string parent_file
File from which derived.
std::string description
filtering criteria applied to data
Source and filtering criteria used to generate dataset.
size_t generation_no
number preceding filter generations
std::vector< DataFilterPtr > dataFilters
void write(XMLWriter &writer) const
bool operator!=(const EnzymaticSearchConstraint &that) const
bool operator==(const EnzymaticSearchConstraint &that) const
std::string msMassAnalyzer
Ion trap, etc (cf mzXML)
std::string msDetector
EMT, etc(cf mzXML)
bool operator==(const MSMSPipelineAnalysis &that) const
std::string msManufacturer
Manufacturer of MS/MS instrument.
std::string msModel
Instrument model (cf mzXML)
void write(XMLWriter &writer) const
bool operator!=(const MSMSPipelineAnalysis &that) const
std::string baseName
full path file name of mzXML (minus the .mzXML)
std::string msIonization
Instrument model (cf mzXML)
std::string raw_data_type
raw data type extension (e.g. .mzXML)
std::string raw_data
raw data type extension (e.g. .mzXML)
std::vector< SpectrumQueryPtr > spectrumQueries
bool operator!=(const MSMSRunSummary &that) const
void read(std::istream &is)
bool operator==(const MSMSRunSummary &that) const
std::vector< SearchSummaryPtr > searchSummary
void write(XMLWriter &writer) const
std::vector< MatchPtr > matches
bool operator!=(const MatchData &that) const
void write(minimxml::XMLWriter &writer) const
bool operator==(const MatchData &that) const
void read(std::istream &is)
MatchData(std::string wfc, std::string snc)
MatchData(std::vector< MatchPtr > _matches)
bool operator==(const Match &that) const
void write(minimxml::XMLWriter &writer) const
void read(std::istream &is)
Match operator=(Match &)
bool operator!=(const Match &that) const
Match(const SpectrumQuery &_spectrumQuery, FeaturePtr _feature, double _score=0)
bool operator==(const ModAminoAcidMass &that) const
void write(XMLWriter &writer) const
bool operator!=(const ModAminoAcidMass &that) const
bool operator!=(const ModificationInfo &that) const
void write(XMLWriter &writer) const
bool operator==(const ModificationInfo &that) const
bool operator==(const Parameter &that) const
void write(XMLWriter &writer) const
Parameter(const std::string &name="", const std::string &value="")
void read(std::istream &is)
bool operator!=(const Parameter &that) const
void write(XMLWriter &writer) const
bool operator==(const PeptideProphetResult &that) const
bool operator!=(const PeptideProphetResult &that) const
std::vector< RocDataPoint > roc_data_point
std::vector< MixtureModel > mixture_model
std::vector< DistributionPoint > distribution_point
void write(XMLWriter &writer) const
bool operator!=(const Q3RatioResult &that) const
void read(std::istream &is)
bool operator==(const Q3RatioResult &that) const
std::string fidelity
Semispecific means that at least one end of a pepide must conform to the cleavage specificity,...
void read(std::istream &is)
bool operator==(const SampleEnzyme &that) const
bool operator!=(const SampleEnzyme &that) const
void write(XMLWriter &writer) const
std::string name
Controlled code name for the enzyme that can be referred to by applications.
boost::tribool independent
If there are multiple specificities and independent is true, then a single peptide cannot exhibit one...
std::string description
Free text to describe alternative names, special conditions, etc.
void read(std::istream &is)
bool operator==(const SearchDatabase &that) const
bool operator!=(const SearchDatabase &that) const
void write(XMLWriter &writer) const
void read(std::istream &is)
std::vector< AlternativeProtein > alternativeProteins
bool operator!=(const SearchHit &that) const
bool operator==(const SearchHit &that) const
void write(XMLWriter &writer) const
std::vector< SearchScorePtr > searchScore
bool operator!=(const SearchResult &that) const
bool operator==(const SearchResult &that) const
void read(std::istream &is)
void write(XMLWriter &writer) const
std::vector< SearchHitPtr > searchHit
size_t searchId
Unique identifier to search summary.
void read(std::istream &is)
SearchScore(const std::string &name="", const std::string &value="")
void write(XMLWriter &writer) const
Database search settings.
size_t search_id
matches id in search hit
SearchDatabase searchDatabase
runner up search hit data type extension (e.g. .tgz)
std::vector< AminoAcidModification > aminoAcidModifications
bool operator==(const SearchSummary &that) const
std::string fragmentMassType
average or monoisotopic
std::string searchEngine
SEQUEST, Mascot, COMET, etc.
std::vector< ParameterPtr > parameters
std::string searchID
Format of file storing the runner up peptides (if not present in pepXML)
void write(XMLWriter &writer) const
void read(std::istream &is)
std::string baseName
Full path location of mzXML file for this search run (without the .mzXML extension)
std::string precursorMassType
average or monoisotopic
bool operator!=(const SearchSummary &that) const
EnzymaticSearchConstraint enzymaticSearchConstraint
std::string noCut
Zero or more 1-letter residue codes.
bool operator==(const Specificity &that) const
bool operator!=(const Specificity &that) const
size_t minSpace
minimum separation between adjacent cleavages. default 1.
void read(std::istream &is)
std::string cut
One or more 1-letter residue codes.
void write(XMLWriter &writer) const
std::string sense
Defines whether cleavage occurs on the C-terminal or N-terminal side of the residue(s) listed in cut ...
std::vector< SearchResultPtr > searchResult
double retentionTimeSec
Unique identifier.
int index
Search constraint applied specifically to this query.
bool operator!=(const SpectrumQuery &that) const
int startScan
first scan number integrated into MS/MS spectrum
void read(std::istream &is)
bool operator==(const SpectrumQuery &that) const
int endScan
last scan number integrated into MS/MS spectrum
void write(XMLWriter &writer) const
int assumedCharge
Precursor ion charge used for search.