ProteoWizard
Classes | Functions | Variables
MSDataFileTest.cpp File Reference
#include "MSDataFile.hpp"
#include "Diff.hpp"
#include "IO.hpp"
#include "SpectrumListBase.hpp"
#include "ChromatogramListBase.hpp"
#include "examples.hpp"
#include "pwiz/utility/misc/unit.hpp"
#include "pwiz/utility/misc/Filesystem.hpp"
#include "pwiz/utility/misc/Std.hpp"
#include <boost/iostreams/filtering_stream.hpp>
#include <boost/iostreams/filter/gzip.hpp>
#include <boost/iostreams/device/file_descriptor.hpp>
#include <boost/iostreams/copy.hpp>

Go to the source code of this file.

Classes

class  TestReader
 

Functions

void hackInMemoryMSData (MSData &msd)
 
void validateMmgfMzxmlRoundTrip ()
 
void validateWriteRead (const MSDataFile::WriteConfig &writeConfig, const DiffConfig diffConfig)
 
void test ()
 
void demo ()
 
void testReader ()
 
void testSHA1 ()
 
int main (int argc, char *argv[])
 

Variables

ostream * os_ = 0
 
string filenameBase_ = "temp.MSDataFileTest"
 
const char rawHeader_ []
 

Function Documentation

◆ hackInMemoryMSData()

void hackInMemoryMSData ( MSData msd)

Definition at line 51 of file MSDataFileTest.cpp.

52{
53 // remove metadata ptrs appended on read
54 vector<SourceFilePtr>& sfs = msd.fileDescription.sourceFilePtrs;
55 if (!sfs.empty()) sfs.erase(sfs.end()-1);
56 vector<SoftwarePtr>& sws = msd.softwarePtrs;
57 if (!sws.empty()) sws.erase(sws.end()-1);
58
59 // remove current DataProcessing created on read
60 SpectrumListBase* sl = dynamic_cast<SpectrumListBase*>(msd.run.spectrumListPtr.get());
64}
common functionality for base ChromatogramList implementations
virtual void setDataProcessingPtr(DataProcessingPtr dp)
set DataProcessing
common functionality for base SpectrumList implementations
virtual void setDataProcessingPtr(DataProcessingPtr dp)
set DataProcessing
boost::shared_ptr< DataProcessing > DataProcessingPtr
Definition MSData.hpp:288
std::vector< SourceFilePtr > sourceFilePtrs
list and descriptions of the source files this mzML document was generated or derived from.
Definition MSData.hpp:90
Run run
a run in mzML should correspond to a single, consecutive and coherent set of scans on an instrument.
Definition MSData.hpp:886
std::vector< SoftwarePtr > softwarePtrs
list and descriptions of software used to acquire and/or process the data in this mzML file.
Definition MSData.hpp:871
FileDescription fileDescription
information pertaining to the entire mzML file (i.e. not specific to any part of the data set) is sto...
Definition MSData.hpp:862
ChromatogramListPtr chromatogramListPtr
all chromatograms for this run.
Definition MSData.hpp:830
SpectrumListPtr spectrumListPtr
all mass spectra and the acquisitions underlying them are described and attached here....
Definition MSData.hpp:827

References pwiz::msdata::Run::chromatogramListPtr, pwiz::msdata::MSData::fileDescription, pwiz::msdata::MSData::run, pwiz::msdata::ChromatogramListBase::setDataProcessingPtr(), pwiz::msdata::SpectrumListBase::setDataProcessingPtr(), pwiz::msdata::MSData::softwarePtrs, pwiz::msdata::FileDescription::sourceFilePtrs, and pwiz::msdata::Run::spectrumListPtr.

Referenced by validateWriteRead().

◆ validateMmgfMzxmlRoundTrip()

void validateMmgfMzxmlRoundTrip ( )

Definition at line 66 of file MSDataFileTest.cpp.

67{
68 string filename1 = filenameBase_ + ".mgf";
69 string filename2 = filenameBase_ + ".mzXML";
70
71 ofstream ofs(filename1.c_str());
72 string mgf = "CHARGE=2+ and 3+\nBEGIN IONS\nPEPMASS=952.924194 145032.0000\nCHARGE=2+\nRTINSECONDS=301.48\n271.0874 2\n298.1747 4\nEND IONS\nBEGIN IONS\nPEPMASS=503.800000 67522.2000\nCHARGE=2+\nRTINSECONDS=302.51\n147.1840 3\n154.3668 3\n162.2118 2\n162.9007 1\n167.3297 1\n175.2387 2\n184.9460 3\nEND IONS\n";
73 ofs.write(mgf.c_str(), mgf.length());
74 ofs.close();
75
76 // make sure that round trip doesn't systematically increase converted scan numbers
77 for (int loop = 3; loop--; )
78 {
79 MSDataFile msd1(filename1); // read back the MGF
80 const SpectrumList& sl = *msd1.run.spectrumListPtr;
81 SpectrumPtr spectrum = sl.spectrum(0);
82 unit_assert(spectrum->id == "index=0");
83 MSDataFile::WriteConfig writeConfig;
84 writeConfig.format = MSDataFile::Format_mzXML;
85 MSDataFile::write(msd1, filename2, writeConfig); // write as mzXML
86 MSDataFile msd2(filename2); // read back the mzXML
87 const SpectrumList& sl2= *msd2.run.spectrumListPtr;
88 SpectrumPtr spectrum2 = sl2.spectrum(0);
89 unit_assert(spectrum2->id == "index=1"); // mzXML is 1-based
90 MSDataFile::WriteConfig writeConfig2;
91 writeConfig2.format = MSDataFile::Format_MGF;
92 MSDataFile::write(msd2, filename1, writeConfig2); // write as mgf
93 }
94
95 // remove temp files
96 boost::filesystem::remove(filename1);
97 boost::filesystem::remove(filename2);
98}
string filename1
string filenameBase_
Interface for accessing spectra, which may be stored in memory or backed by a data file (RAW,...
Definition MSData.hpp:661
virtual SpectrumPtr spectrum(size_t index, bool getBinaryData=false) const =0
retrieve a spectrum by index
boost::shared_ptr< Spectrum > SpectrumPtr
Definition MSData.hpp:573
configuration for write()
MSData object plus file I/O.
static void write(const MSData &msd, const std::string &filename, const WriteConfig &config=WriteConfig(), const pwiz::util::IterationListenerRegistry *iterationListenerRegistry=0)
static write function for any MSData object; iterationListenerRegistry may be used for progress updat...
#define unit_assert(x)
Definition unit.hpp:85

References filename1, filenameBase_, pwiz::msdata::MSDataFile::WriteConfig::format, pwiz::msdata::MSData::run, pwiz::msdata::SpectrumList::spectrum(), pwiz::msdata::Run::spectrumListPtr, unit_assert, and pwiz::msdata::MSDataFile::write().

Referenced by test().

◆ validateWriteRead()

void validateWriteRead ( const MSDataFile::WriteConfig writeConfig,
const DiffConfig  diffConfig 
)

Definition at line 101 of file MSDataFileTest.cpp.

103{
104 if (os_) *os_ << "validateWriteRead()\n " << writeConfig << endl;
105
106 string filename1 = filenameBase_ + ".1";
107 string filename2 = filenameBase_ + ".2";
108 string filename3 = filenameBase_ + ".3";
109 string filename4 = filenameBase_ + ".\xE4\xB8\x80\xE4\xB8\xAA\xE8\xAF\x95.4";
110 // FIXME: 4-byte UTF-8 not working: string filename5 = filenameBase_ + ".\x01\x04\xA4\x01\x04\xA2.5";
111
112 {
113 // create MSData object in memory
114 MSData tiny;
116
117 if (writeConfig.format == MSDataFile::Format_mzXML)
118 {
119 // remove s22 since it is not written to mzXML
120 static_cast<SpectrumListSimple&>(*tiny.run.spectrumListPtr).spectra.pop_back();
121 }
122
123 // write to file #1 (static)
124 MSDataFile::write(tiny, filename1, writeConfig);
125
126 // simulate CLI garbage collect behavior, wherein delayed deletes stress
127 // memory and file handle usage
128 {
129 std::vector< boost::shared_ptr< MSDataFile > > msds;
130 for (int i=0;i<100;i++)
131 {
132 boost::shared_ptr<MSDataFile> msd1(new MSDataFile(filename1));
133 msds.push_back(msd1);
134 hackInMemoryMSData(*msd1);
135 Diff<MSData, DiffConfig> diff(tiny, *msd1, diffConfig);
136 }
137 }
138
139 // read back into an MSDataFile object
140 MSDataFile msd1(filename1);
141 hackInMemoryMSData(msd1);
142
143 // compare
144 Diff<MSData, DiffConfig> diff(tiny, msd1, diffConfig);
145 if (diff && os_) *os_ << diff << endl;
147
148 // write to file #2 (member)
149 msd1.write(filename2, writeConfig);
150
151 // read back into another MSDataFile object
152 MSDataFile msd2(filename2);
153 hackInMemoryMSData(msd2);
154
155 // compare
156 diff(tiny, msd2);
157 if (diff && os_) *os_ << diff << endl;
159
160 // now give the gzip read a workout
161 bio::filtering_istream tinyGZ(bio::gzip_compressor() | bio::file_descriptor_source(filename1));
162 bio::copy(tinyGZ, bio::file_descriptor_sink(filename1+".gz", ios::out|ios::binary));
163
164 MSDataFile msd3(filename1+".gz");
165 hackInMemoryMSData(msd3);
166
167 // compare
168 diff(tiny, msd3);
169 if (diff && os_) *os_ << diff << endl;
171
172 // test writing to a stream
173 ostringstream oss;
174 msd1.write(oss, writeConfig);
175 string ossStr = oss.str();
176 ofstream ofs(filename3.c_str());
177 ofs.write(ossStr.c_str(), ossStr.length());
178 ofs.close();
179
180 // read back into another MSDataFile object
181 MSDataFile msd4(filename3);
182 hackInMemoryMSData(msd4);
183
184 // compare
185 diff(tiny, msd4);
186 if (diff && os_) *os_ << diff << endl;
188
189
190 // write to file #4 (testing two byte UTF-8 code points)
191 msd1.write(filename4, writeConfig);
192
193 // read back into another MSDataFile object
194 MSDataFile msd5(filename4);
195 hackInMemoryMSData(msd5);
196
197 // compare
198 diff(tiny, msd5);
199 if (diff && os_) *os_ << diff << endl;
201
202
203 // write to file #5 (testing four byte UTF-8 code points)
204 /*msd1.write(filename5, writeConfig);
205
206 // read back into another MSDataFile object
207 MSDataFile msd6(filename5);
208 hackInMemoryMSData(msd6);
209
210 // compare
211 diff(tiny, msd6);
212 if (diff && os_) *os_ << diff << endl;
213 unit_assert(!diff);*/
214 }
215
216 // remove temp files
217 boost::filesystem::remove(filename1);
218 boost::filesystem::remove(filename2);
219 boost::filesystem::remove(filename1 + ".gz");
220 boost::filesystem::remove(filename3);
221 boost::filesystem::remove(filename4);
222 //boost::filesystem::remove(filename5);
223}
void diff(const string &filename1, const string &filename2)
void hackInMemoryMSData(MSData &msd)
ostream * os_
PWIZ_API_DECL void initializeTiny(MSData &msd)
Calculate diffs of objects in a ProteoWizard data model hierarchy.
Definition diff_std.hpp:143
This is the root element of ProteoWizard; it represents the mzML element, defined as: intended to cap...
Definition MSData.hpp:850
Simple writeable in-memory implementation of SpectrumList.
Definition MSData.hpp:717

References diff(), filename1, filenameBase_, pwiz::msdata::MSDataFile::WriteConfig::format, hackInMemoryMSData(), pwiz::msdata::examples::initializeTiny(), os_, pwiz::msdata::MSData::run, pwiz::msdata::Run::spectrumListPtr, unit_assert, and pwiz::msdata::MSDataFile::write().

Referenced by test().

◆ test()

void test ( )

Definition at line 225 of file MSDataFileTest.cpp.

226{
227 MSDataFile::WriteConfig writeConfig;
228 DiffConfig diffConfig;
229
231
232 // mzML 64-bit, full diff
233 validateWriteRead(writeConfig, diffConfig);
234
235 writeConfig.indexed = false;
236 validateWriteRead(writeConfig, diffConfig); // no index
237 writeConfig.indexed = true;
238
239 // mzML 32-bit, full diff
241 validateWriteRead(writeConfig, diffConfig);
242
243 // mzXML 32-bit, diff ignoring metadata and chromatograms
244 writeConfig.format = MSDataFile::Format_mzXML;
245 diffConfig.ignoreMetadata = true;
246 diffConfig.ignoreChromatograms = true;
247 validateWriteRead(writeConfig, diffConfig);
248
249 // mzXML 64-bit, diff ignoring metadata and chromatograms
251 validateWriteRead(writeConfig, diffConfig);
252
253 writeConfig.indexed = false;
254 validateWriteRead(writeConfig, diffConfig); // no index
255 writeConfig.indexed = true;
256}
void validateWriteRead(const MSDataFile::WriteConfig &writeConfig, const DiffConfig diffConfig)
void validateMmgfMzxmlRoundTrip()
configuration struct for diffing MSData types
Definition Diff.hpp:206
bool ignoreMetadata
ignore all file level metadata, and most scan level metadata, i.e.
Definition Diff.hpp:214
BinaryDataEncoder::Config binaryDataEncoderConfig

References pwiz::msdata::MSDataFile::WriteConfig::binaryDataEncoderConfig, pwiz::msdata::MSDataFile::WriteConfig::format, pwiz::msdata::DiffConfig::ignoreChromatograms, pwiz::msdata::DiffConfig::ignoreMetadata, pwiz::msdata::MSDataFile::WriteConfig::indexed, pwiz::msdata::BinaryDataEncoder::Config::precision, pwiz::msdata::BinaryDataEncoder::Precision_32, pwiz::msdata::BinaryDataEncoder::Precision_64, validateMmgfMzxmlRoundTrip(), and validateWriteRead().

Referenced by main().

◆ demo()

void demo ( )

◆ testReader()

void testReader ( )

Definition at line 326 of file MSDataFileTest.cpp.

327{
328 // create a file
329 string filename = filenameBase_ + ".RAW";
330 ofstream os(filename.c_str());
331 os.write(rawHeader_, 18);
332 os.close();
333
334 // open the file with our Reader
335 TestReader reader;
336 MSDataFile msd(filename, &reader);
337
338 // verify that our reader got called properly
339 unit_assert(reader.count == 2);
340
341 // remove temp file
342 boost::filesystem::remove(filename);
343
344 if (os_) *os_ << endl;
345}
const char rawHeader_[]

References TestReader::count, filenameBase_, os_, rawHeader_, and unit_assert.

Referenced by main().

◆ testSHA1()

void testSHA1 ( )

Definition at line 348 of file MSDataFileTest.cpp.

349{
350 if (os_) *os_ << "testSHA1()\n";
351
352 // write out a test file
353
354 string filename = filenameBase_ + ".SHA1Test";
355 MSData tiny;
357 MSDataFile::write(tiny, filename);
358
359 {
360 // read in without SHA-1 calculation
361 MSDataFile msd(filename);
362
363 if (os_)
364 {
365 *os_ << "no SHA-1:\n";
367 IO::write(writer, *msd.fileDescription.sourceFilePtrs.back());
368 }
369
370 unit_assert(!msd.fileDescription.sourceFilePtrs.empty());
371 unit_assert(!msd.fileDescription.sourceFilePtrs.back()->hasCVParam(MS_SHA_1));
372
373 // read in with SHA-1 calculation
374
375 MSDataFile msd_sha1(filename, 0, true);
376
377 if (os_)
378 {
379 *os_ << "with SHA-1:\n";
381 IO::write(writer, *msd_sha1.fileDescription.sourceFilePtrs.back());
382 }
383
384 unit_assert(!msd_sha1.fileDescription.sourceFilePtrs.empty());
385 unit_assert(msd_sha1.fileDescription.sourceFilePtrs.back()->hasCVParam(MS_SHA_1));
386 }
387
388 // clean up
389
390 boost::filesystem::remove(filename);
391 if (os_) *os_ << endl;
392}
The XMLWriter class provides simple, tag-level XML syntax writing.
Definition XMLWriter.hpp:48
MS_SHA_1
SHA-1: SHA-1 (Secure Hash Algorithm-1) is a cryptographic hash function designed by the National Secu...
Definition cv.hpp:2316
PWIZ_API_DECL void write(minimxml::XMLWriter &writer, const CV &cv)

References pwiz::msdata::MSData::fileDescription, filenameBase_, pwiz::msdata::examples::initializeTiny(), MS_SHA_1, os_, pwiz::msdata::FileDescription::sourceFilePtrs, unit_assert, pwiz::msdata::MSDataFile::write(), and pwiz::msdata::IO::write().

Referenced by main().

◆ main()

int main ( int  argc,
char *  argv[] 
)

Definition at line 395 of file MSDataFileTest.cpp.

396{
397 TEST_PROLOG(argc, argv)
398
399 try
400 {
401 if (argc>1 && !strcmp(argv[1],"-v")) os_ = &cout;
402 test();
403 //demo();
404 testReader();
405 testSHA1();
406 }
407 catch (exception& e)
408 {
409 TEST_FAILED(e.what())
410 }
411 catch (...)
412 {
413 TEST_FAILED("Caught unknown exception.")
414 }
415
417}
void testSHA1()
void testReader()
void test()
#define TEST_EPILOG
Definition unit.hpp:183
#define TEST_FAILED(x)
Definition unit.hpp:177
#define TEST_PROLOG(argc, argv)
Definition unit.hpp:175

References os_, test(), TEST_EPILOG, TEST_FAILED, TEST_PROLOG, testReader(), and testSHA1().

Variable Documentation

◆ os_

ostream* os_ = 0

Definition at line 45 of file MSDataFileTest.cpp.

Referenced by main(), testReader(), testSHA1(), and validateWriteRead().

◆ filenameBase_

string filenameBase_ = "temp.MSDataFileTest"

◆ rawHeader_

const char rawHeader_[]
Initial value:
= {'\x01', '\xA1',
'F', '\0', 'i', '\0', 'n', '\0', 'n', '\0',
'i', '\0', 'g', '\0', 'a', '\0', 'n', '\0'}

Definition at line 281 of file MSDataFileTest.cpp.

281 {'\x01', '\xA1',
282 'F', '\0', 'i', '\0', 'n', '\0', 'n', '\0',
283 'i', '\0', 'g', '\0', 'a', '\0', 'n', '\0'};

Referenced by TestReader::identify(), and testReader().