void test() { SpectrumListPtr sl = createSpectrumList(); for (size_t i=0; i<sl->size(); i++) verifySpectrumSize(*sl, i, 5); SpectrumList_MZWindow window(sl, 4.20, 6.66); if (os_) { *os_ << "filtered list:\n"; printSpectrumList(window, *os_); *os_ << endl; } unit_assert(window.size() == sl->size()); verifySpectrumSize(window, 0, 0); verifySpectrumSize(window, 1, 1); verifySpectrumSize(window, 2, 2); verifySpectrumSize(window, 3, 2); verifySpectrumSize(window, 4, 2); verifySpectrumSize(window, 5, 2); verifySpectrumSize(window, 6, 1); verifySpectrumSize(window, 7, 0); verifySpectrumSize(window, 8, 0); verifySpectrumSize(window, 9, 0); }
void mzDBMSDataTomzMLMSData(MzDBFile* f, MSDataPtr mzdb, MSDataPtr raw) { //out = mzdb; //to catch all metadata ofstream fileHandle("ErrorPercentage.txt", ios::out | ios::trunc); //ofstream fileHandle_2("ErrorNbPoints.txt", ios::out | ios::trunc); FILE* fileHandle_2 = fopen("ErrorNbPoints.txt", "w"); FILE* fileHandle_3 = fopen("ymax.txt", "w"); if ( ! fileHandle || ! fileHandle_2) printf("Failed to create ratiosPercentage file\n"); mzDataCache* cache = new mzDataCache; cache->open(); int lastPercent = 0; //BinnedSpectrum::getMzDataInterval(300., 2001, 0.); //printf("\ninterval vector size : %d\n", BinnedSpectrum::mzData.size()); SpectrumListPtr spec = mzdb->run.spectrumListPtr; SpectrumListPtr rawSpec = raw->run.spectrumListPtr; for (size_t i = 0; i < spec->size(); ++i) { SpectrumPtr ptr = spec->spectrum(i, true); SpectrumPtr rawSpectrum = rawSpec->spectrum(i, true); //if( i != 1 ) { continue; } mzScan* scan = (mzScan*)(ptr.get()); if (! scan) { printf("Empty pointer\n"); exit(0); } vector<double>& mz = scan->mz; vector<double>& intens = scan->intensities; vector<float>& lwhm = scan->lwhm; vector<float>& rwhm = scan->rwhm; /*printf("%d\n", mz.size()); for (size_t j = 0; j < mz.size(); ++j) { printf("%f\n", mz[j]); } printf("\n");*/ if ( scan->encoding.mode == FITTED) { BinnedSpectrum binnedSpectrum(rawSpectrum); //printf("size of mzdata:%d\n", binnedSpectrum.mzData.size()); vector<double> intData(binnedSpectrum.mzData.size(), 0); for (size_t j=0; j < mz.size(); ++j) { double y_max = intens[j]; double x_zero = mz[j]; fprintf( fileHandle_3, "%.5f\t%f\n", x_zero,y_max); //printf("max value: %f\n", y_max); // add the apex size_t index = binnedSpectrum.getLeftIndex(x_zero); //get the limit of ppm double ppmLimit = 1.1 * sqrt(x_zero); double maxDistFromTheApex = (x_zero * (ppmLimit / 2)) / 1e6; double mzLeftLimit = x_zero - maxDistFromTheApex; double mzRightLimit = x_zero + maxDistFromTheApex; //binnedSpectrum.putFromIndex(index, y_max); //intData[index] += mzMath::y(binnedSpectrum.mzData[index], x_zero, y_max, sigmal_squared);// y_max; double lwhm_j = static_cast<double>(lwhm[j]); double rwhm_j = static_cast<double>(rwhm[j]); //generate x value; //printf("%f, %f, %f, %f\n", mz[j], intens[j], lwhm[j], rwhm[j]); //if ( lwhm_j ) { double sigmal = (2.0 * lwhm_j) / SIGMA_FACTOR; //99% of the value 6 sigma goes to 3/4 sigmas up and down double sigmal_x6 = sigmal * 2; double min_x_left = x_zero - sigmal_x6; size_t index_left = index;//(index - 1 >= 0) ? index - 1 : 0; double sigmal_squared = sigmal * sigmal; double x_zero_pl = binnedSpectrum.mzData[index_left];//binnedSpectrum.getPairFromIndex(index_left).first;// double y_vall = mzMath::y(x_zero_pl, x_zero, y_max, sigmal_squared); bool makeBounds = false; if (y_vall < 6e3) makeBounds = true; //visitedLeftIndexes.push_back(index); //size_t counter = 0; while ( x_zero_pl > min_x_left ) { if (x_zero_pl < mzLeftLimit && makeBounds) break; //binnedSpectrum.putFromIndex(index_left, y_vall); intData[index_left] += y_vall; /* calculate values for next step */ index_left--;//-= 1; //= index_left - 1 >= 0 ? index_left - 1 : 0; x_zero_pl = binnedSpectrum.mzData[index_left];//binnedSpectrum.getPairFromIndex(index_left).first;// y_vall = mzMath::y(x_zero_pl, x_zero, y_max, sigmal_squared); //counter++; } //do not padd with zero if bin already exists //binnedSpectrum.getOrInitIntensityFromIndex(index_left); //do nothing, the tested pair is initialized to zero // during the test /*} else { binnedSpectrum.getOrInitIntensityFromIndex(index - 1); }*/ //if ( rwhm_j ) { double sigma = (2.0 * rwhm_j) / SIGMA_FACTOR; double sigma_x6 = sigma * 2; //99% of the value double min_x_right = x_zero + sigma_x6; size_t index_right = index + 1 ; double sigma_squared = sigma * sigma; double x_zero_p = binnedSpectrum.mzData[index_right];//binnedSpectrum.getPairFromIndex(index_right).first;// double y_val = mzMath::y(x_zero_p, x_zero, y_max, sigma_squared); //size_t counter = 0; //printf("%f, %f, %d, %d\n", x_zero_p, min_x_right, index_right, binnedSpectrum.binnedSpectrum.size()); while ( x_zero_p < min_x_right) { if (x_zero_p > mzRightLimit && makeBounds) break; intData[index_right] += y_val; index_right++; x_zero_p = binnedSpectrum.mzData[index_right];//binnedSpectrum.getPairFromIndex(index_right).first;// y_val = mzMath::y(x_zero_p, x_zero, y_max, sigma_squared); //counter++; } //do not padd with zero if bin already exists //actually it is never supposed to happen since mz is increasing //so automatically create an entry and set it to 0 //in some case (i suppose...) a previous peak could be larger //that is a reason to not reset the intensity to 0 too //if (! binnedSpectrum.getIntensityFromIndex(index_right)) //binnedSpectrum.getOrInitIntensityFromIndex(index_right); /*} else { binnedSpectrum.getOrInitIntensityFromIndex(index + 1); }*/ } //pair<vector<double>, vector<double> > data = binnedSpectrum.getData(); //printf("%d, %d\n", intData.size(), binnedSpectrum.mzData.size()); ptr->setMZIntensityArrays(binnedSpectrum.mzData, intData, CVID_Unknown); //here binning /*---comparing data intensities*/ //printf("binnedSpectrumSize:%d, rawSize:%d\n", data.first.size(), rawSpectrum->getMZArray()->data.size()); //bin //int nbBins = (int) (1700.0 / 0.1); map<int, vector<pair<double, double> > > rawIntensitiesSumByBinIndex, mzdbIntensitiesSumByIndex; vector<MZIntensityPair> rawMzIntensities; rawSpectrum->getMZIntensityPairs(rawMzIntensities); for (size_t j = 0; j < rawMzIntensities.size(); ++j) { const MZIntensityPair& pair = rawMzIntensities[j]; int idx = (int) (pair.mz / 1); rawIntensitiesSumByBinIndex[idx].push_back(make_pair(pair.mz, pair.intensity)); } for (size_t j= 0; j < intData.size(); ++j) { const double& mz = binnedSpectrum.mzData[j]; const double& intensity = intData[j]; int idx = (int) (mz / 1); mzdbIntensitiesSumByIndex[idx].push_back(make_pair(mz, intensity)); } //iterate on raw map double counter = 0; double sum = 0; for (auto it = rawIntensitiesSumByBinIndex.begin(); it != rawIntensitiesSumByBinIndex.end(); ++it) { const int& idx = it->first; const vector<pair<double, double> >& vec = it->second; double integratedIntensity = 0; if (! vec.empty()) { integrate(vec, integratedIntensity); } if (integratedIntensity) { if (mzdbIntensitiesSumByIndex.find(idx) != mzdbIntensitiesSumByIndex.end()) { const vector<pair<double, double> >& mzdbPoints = mzdbIntensitiesSumByIndex[idx]; //printf("v1 %f, %f\n", intensity, mzdbIntensity); double mzDBIntegratedIntensity = 0; integrate(mzdbPoints, mzDBIntegratedIntensity); double error = (integratedIntensity - mzDBIntegratedIntensity) / integratedIntensity; sum += error; counter ++; } } } //printf("%f, %f\n", sum, counter); /*if (scan->idMzDB == 5524) { printf("Scan 5524:%d, reconstructed profile:%d, raw profile:%d\n", scan->mz.size(), data.first.size(), rawSpectrum->getMZArray()->data.size()); }*/ fileHandle << (sum / counter) * 100.0 << endl; if (rawSpectrum->index == 1) { auto& rawData = rawSpectrum->getIntensityArray()->data; for (size_t kk = 0; kk < binnedSpectrum.mzData.size(); ++kk) { //fileHandle_2 << binnedSpectrum.mzData[kk] << "\t" << rawData[kk] << "\t" << intData[kk] << endl; fprintf(fileHandle_2, "%.4f\t%f\t%f\n", binnedSpectrum.mzData[kk] ,rawData[kk] , intData[kk] ); } } //fileHandle_2 << ( (rawNonZeroCount - mzdbNonZeroCount) / rawNonZeroCount ) * 100 << endl; } else { ptr->setMZIntensityArrays(mz, intens, CVID_Unknown); } cache->addKeyValue(boost::lexical_cast<string>(i), ptr); //clear data //clearScanData(scan); int newPercent = (int) (((float) (i) / spec->size() * 100)); if (newPercent != lastPercent) { printProgBar(newPercent); lastPercent = newPercent; } }//end iteration mzdb spectra; printProgBar(100); fileHandle.close(); fclose(fileHandle_2);/*---do not forget to close the fileHandle*/ fclose(fileHandle_3); SpectrumListPtr spectrumListCached(new mzSpectrumListCache(f, mzdb.get(), cache)); mzdb->run.spectrumListPtr = spectrumListCached; ChromatogramListPtr chromListPtr(new mzEmptyChromatogram); mzdb->run.chromatogramListPtr = chromListPtr; }
void test() { MSData msd; examples::initializeTiny(msd); SpectrumListPtr originalList = msd.run.spectrumListPtr; SpectrumListPtr defaultArrayLengthSortedList( new SpectrumList_Sorter(originalList, DefaultArrayLengthSorter())); SpectrumListPtr msLevelUnstableSortedList( new SpectrumList_Sorter(originalList, MSLevelSorter())); SpectrumListPtr msLevelStableSortedList( new SpectrumList_Sorter(originalList, MSLevelSorter(), true)); SpectrumListPtr sillySortedList( new SpectrumList_Sorter(msLevelStableSortedList, DefaultArrayLengthSorter())); if (os_) { *os_ << "Original spectrum list (" << originalList->size() << "):\n"; TextWriter write(*os_); write(*originalList); *os_ << endl; } if (os_) { *os_ << "Default array length sorted spectrum list (" << defaultArrayLengthSortedList->size() << "):\n"; TextWriter write(*os_); write(*defaultArrayLengthSortedList); *os_ << endl; } if (os_) { *os_ << "MS level unstable sorted spectrum list (" << msLevelUnstableSortedList->size() << "):\n"; TextWriter write(*os_); write(*msLevelUnstableSortedList); *os_ << endl; } if (os_) { *os_ << "MS level stable sorted spectrum list (" << msLevelStableSortedList->size() << "):\n"; TextWriter write(*os_); write(*msLevelStableSortedList); *os_ << endl; } if (os_) { *os_ << "Silly (nested) sorted spectrum list (" << sillySortedList->size() << "):\n"; TextWriter write(*os_); write(*sillySortedList); *os_ << endl; } unit_assert_operator_equal(originalList->size(), defaultArrayLengthSortedList->size()); unit_assert_operator_equal(originalList->size(), msLevelUnstableSortedList->size()); unit_assert_operator_equal(originalList->size(), msLevelStableSortedList->size()); unit_assert_operator_equal(originalList->size(), sillySortedList->size()); SpectrumPtr s; // assert that the original list is unmodified unit_assert_operator_equal("scan=19", originalList->spectrumIdentity(0).id); unit_assert_operator_equal(0, originalList->spectrumIdentity(0).index); unit_assert_operator_equal("scan=20", originalList->spectrumIdentity(1).id); unit_assert_operator_equal(1, originalList->spectrumIdentity(1).index); unit_assert_operator_equal("scan=21", originalList->spectrumIdentity(2).id); unit_assert_operator_equal(2, originalList->spectrumIdentity(2).index); unit_assert_operator_equal("scan=22", originalList->spectrumIdentity(3).id); unit_assert_operator_equal(3, originalList->spectrumIdentity(3).index); s = originalList->spectrum(0); unit_assert_operator_equal("scan=19", s->id); unit_assert_operator_equal(0, s->index); // validate the default array length sorted list (ascending order, scan=19 and scan=22 are interchangeable) unit_assert_operator_equal("scan=21", defaultArrayLengthSortedList->spectrumIdentity(0).id); unit_assert_operator_equal(0, defaultArrayLengthSortedList->spectrumIdentity(0).index); unit_assert_operator_equal("scan=20", defaultArrayLengthSortedList->spectrumIdentity(1).id); unit_assert_operator_equal(1, defaultArrayLengthSortedList->spectrumIdentity(1).index); unit_assert_operator_equal(2, defaultArrayLengthSortedList->spectrumIdentity(2).index); unit_assert_operator_equal(3, defaultArrayLengthSortedList->spectrumIdentity(3).index); s = defaultArrayLengthSortedList->spectrum(0); unit_assert_operator_equal("scan=21", s->id); unit_assert_operator_equal(0, s->index); s = defaultArrayLengthSortedList->spectrum(1); unit_assert_operator_equal("scan=20", s->id); unit_assert_operator_equal(1, s->index); s = defaultArrayLengthSortedList->spectrum(2); unit_assert_operator_equal(2, s->index); s = defaultArrayLengthSortedList->spectrum(3); unit_assert_operator_equal(3, s->index); for (size_t i=1, end=defaultArrayLengthSortedList->size(); i < end; ++i) unit_assert(defaultArrayLengthSortedList->spectrum(i)->defaultArrayLength >= defaultArrayLengthSortedList->spectrum(i-1)->defaultArrayLength); // validate the MS level unstable sorted list (scan=19, scan=21, and scan=22 are interchangeable) unit_assert_operator_equal(0, msLevelUnstableSortedList->spectrumIdentity(0).index); unit_assert_operator_equal(1, msLevelUnstableSortedList->spectrumIdentity(1).index); unit_assert_operator_equal(2, msLevelUnstableSortedList->spectrumIdentity(2).index); unit_assert_operator_equal("scan=20", msLevelUnstableSortedList->spectrumIdentity(3).id); unit_assert_operator_equal(3, msLevelUnstableSortedList->spectrumIdentity(3).index); s = msLevelUnstableSortedList->spectrum(0); unit_assert_operator_equal(0, s->index); s = msLevelUnstableSortedList->spectrum(1); unit_assert_operator_equal(1, s->index); s = msLevelUnstableSortedList->spectrum(2); unit_assert_operator_equal(2, s->index); s = msLevelUnstableSortedList->spectrum(3); unit_assert_operator_equal("scan=20", s->id); unit_assert_operator_equal(3, s->index); // validate the MS level stable sorted list (scan=19, scan=21, and scan=22 should stay in order) unit_assert_operator_equal("scan=19", msLevelStableSortedList->spectrumIdentity(0).id); unit_assert_operator_equal(0, msLevelStableSortedList->spectrumIdentity(0).index); unit_assert_operator_equal("scan=21", msLevelStableSortedList->spectrumIdentity(1).id); unit_assert_operator_equal(1, msLevelStableSortedList->spectrumIdentity(1).index); unit_assert_operator_equal("sample=1 period=1 cycle=23 experiment=1", msLevelStableSortedList->spectrumIdentity(2).id); unit_assert_operator_equal(2, msLevelStableSortedList->spectrumIdentity(2).index); unit_assert_operator_equal("scan=20", msLevelStableSortedList->spectrumIdentity(3).id); unit_assert_operator_equal(3, msLevelStableSortedList->spectrumIdentity(3).index); s = msLevelStableSortedList->spectrum(0); unit_assert_operator_equal("scan=19", s->id); unit_assert_operator_equal(0, s->index); s = msLevelStableSortedList->spectrum(1); unit_assert_operator_equal("scan=21", s->id); unit_assert_operator_equal(1, s->index); s = msLevelStableSortedList->spectrum(2); unit_assert_operator_equal("sample=1 period=1 cycle=23 experiment=1", s->id); unit_assert_operator_equal(2, s->index); s = msLevelStableSortedList->spectrum(3); unit_assert_operator_equal("scan=20", s->id); unit_assert_operator_equal(3, s->index); // validate the silly (nested) sorted list unit_assert_operator_equal("scan=21", sillySortedList->spectrumIdentity(0).id); unit_assert_operator_equal(0, sillySortedList->spectrumIdentity(0).index); unit_assert_operator_equal("scan=20", sillySortedList->spectrumIdentity(1).id); unit_assert_operator_equal(1, sillySortedList->spectrumIdentity(1).index); unit_assert_operator_equal(2, sillySortedList->spectrumIdentity(2).index); unit_assert_operator_equal(3, sillySortedList->spectrumIdentity(3).index); s = sillySortedList->spectrum(0); unit_assert_operator_equal("scan=21", s->id); unit_assert_operator_equal(0, s->index); s = sillySortedList->spectrum(1); unit_assert_operator_equal("scan=20", s->id); unit_assert_operator_equal(1, s->index); s = sillySortedList->spectrum(2); unit_assert_operator_equal(2, s->index); s = sillySortedList->spectrum(3); unit_assert_operator_equal(3, s->index); for (size_t i=1, end=sillySortedList->size(); i < end; ++i) unit_assert(sillySortedList->spectrum(i)->defaultArrayLength >= sillySortedList->spectrum(i-1)->defaultArrayLength); }
void test(bool indexed) { if (os_) *os_ << "test(): indexed=\"" << boolalpha << indexed << "\"\n"; MSData tiny; examples::initializeTiny(tiny); Serializer_mzML::Config config; config.indexed = indexed; Serializer_mzML serializer(config); ostringstream oss; serializer.write(oss, tiny); if (os_) *os_ << "oss:\n" << oss.str() << endl; shared_ptr<istream> is(new istringstream(oss.str())); // dummy would normally be read in from file MSData dummy; ParamGroupPtr pg1(new ParamGroup); pg1->id = "CommonMS1SpectrumParams"; pg1->cvParams.push_back(MS_positive_scan); pg1->cvParams.push_back(MS_full_scan); dummy.paramGroupPtrs.push_back(pg1); ParamGroupPtr pg2(new ParamGroup); pg2->id = "CommonMS2SpectrumParams"; pg2->cvParams.push_back(MS_positive_scan); pg2->cvParams.push_back(MS_full_scan); dummy.paramGroupPtrs.push_back(pg2); // so we don't have any dangling references dummy.instrumentConfigurationPtrs.push_back(InstrumentConfigurationPtr(new InstrumentConfiguration("LCQDeca"))); dummy.dataProcessingPtrs.push_back(DataProcessingPtr(new DataProcessing("XcaliburProcessing"))); SpectrumListPtr sl = SpectrumList_mzML::create(is, dummy, indexed); // check easy functions unit_assert(sl.get()); unit_assert(sl->size() == 4); unit_assert(sl->find ("S19") == 0); unit_assert(sl->findNative("19") == 0); unit_assert(sl->find("S20") == 1); unit_assert(sl->findNative("20") == 1); unit_assert(sl->find("S21") == 2); unit_assert(sl->findNative("21") == 2); unit_assert(sl->find("S22") == 3); unit_assert(sl->findNative("22") == 3); unit_assert(sl->findSpotID("A1").empty()); IndexList spotIndexList = sl->findSpotID("A1,42x42,4242x4242"); unit_assert(spotIndexList.size() == 1); unit_assert(spotIndexList[0] == 3); // check scan 19 SpectrumPtr s = sl->spectrum(0); // read without binary data unit_assert(s.get()); unit_assert(s->id == "S19"); unit_assert(s->nativeID == "19"); unit_assert(s->spotID.empty()); unit_assert(s->cvParam(MS_ms_level).valueAs<int>() == 1); unit_assert(s->binaryDataArrayPtrs.empty()); unit_assert(sl->spectrumIdentity(0).index == 0); unit_assert(sl->spectrumIdentity(0).id == "S19"); unit_assert(sl->spectrumIdentity(0).nativeID == "19"); unit_assert(sl->spectrumIdentity(0).spotID.empty()); SpectrumPtr s_cache = sl->spectrum(0); // cache read unit_assert(s_cache.get() == s.get()); s = sl->spectrum(0, true); // read with binary data unit_assert(s_cache.get() != s.get()); vector<MZIntensityPair> pairs; s->getMZIntensityPairs(pairs); unit_assert(pairs.size() == 15); for (int i=0; i<15; i++) unit_assert(pairs[i].mz==i && pairs[i].intensity==15-i); unit_assert(s->spectrumDescription.scan.paramGroupPtrs.size() == 1); unit_assert(s->spectrumDescription.scan.paramGroupPtrs.back()->id == "CommonMS1SpectrumParams"); unit_assert(s->spectrumDescription.scan.paramGroupPtrs.back()->cvParams.size() == 2); // check scan 20 s = sl->spectrum(1, true); unit_assert(s.get()); unit_assert(s->id == "S20"); unit_assert(s->nativeID == "20"); unit_assert(s->spotID.empty()); unit_assert(s->cvParam(MS_ms_level).valueAs<int>() == 2); unit_assert(sl->spectrumIdentity(1).index == 1); unit_assert(sl->spectrumIdentity(1).id == "S20"); unit_assert(sl->spectrumIdentity(1).nativeID == "20"); unit_assert(sl->spectrumIdentity(1).spotID.empty()); pairs.clear(); s->getMZIntensityPairs(pairs); unit_assert(pairs.size() == 10); for (int i=0; i<10; i++) unit_assert(pairs[i].mz==2*i && pairs[i].intensity==(10-i)*2); unit_assert(s->spectrumDescription.scan.paramGroupPtrs.size() == 1); unit_assert(s->spectrumDescription.scan.paramGroupPtrs.back()->id == "CommonMS2SpectrumParams"); unit_assert(s->spectrumDescription.scan.paramGroupPtrs.back()->cvParams.size() == 2); // check scan 22 (MALDI) s = sl->spectrum(3, true); unit_assert(s.get()); unit_assert(s->id == "S22"); unit_assert(s->nativeID == "22"); unit_assert(s->spotID == "A1,42x42,4242x4242"); unit_assert(s->cvParam(MS_ms_level).valueAs<int>() == 1); unit_assert(sl->spectrumIdentity(3).index == 3); unit_assert(sl->spectrumIdentity(3).id == "S22"); unit_assert(sl->spectrumIdentity(3).nativeID == "22"); unit_assert(sl->spectrumIdentity(3).spotID == "A1,42x42,4242x4242"); }