/* Fetch the product ID from the given device path */ static inline uint16_t pidFromDevPath(std::string const& devpath) { try { auto t1 = searchOne(devpath, "VID_.*PID_([0-9a-fA-F]{4})"); if (t1.empty()) // prevent unnecessary exceptions return 0; return static_cast<uint16_t>(std::stoi(t1, nullptr, 16)); } catch (std::invalid_argument &) { return 0; } }
void searchOneMaskTrim(struct dnaSeq *seq, boolean isProt, struct genoFind *gf, FILE *outFile, struct hash *maskHash, long long *retTotalSize, int *retCount) /* Search a single sequence against a single genoFind index. */ { boolean maskQuery = (qMask != NULL); boolean lcMask = (qMask != NULL && sameWord(qMask, "lower")); Bits *qMaskBits = maskQuerySeq(seq, isProt, maskQuery, lcMask); struct dnaSeq trimmedSeq; ZeroVar(&trimmedSeq); trimSeq(seq, &trimmedSeq); if (qType == gftRna || qType == gftRnaX) memSwapChar(trimmedSeq.dna, trimmedSeq.size, 'u', 't'); searchOne(&trimmedSeq, gf, outFile, isProt, maskHash, qMaskBits); *retTotalSize += seq->size; *retCount += 1; bitFree(&qMaskBits); }
// search - runs the searches void SpectraSTMzXMLSearchTask::search() { if (!m_params.indexCacheAll) { // Not caching all entries. In this case, the queries have to be sorted by precursor m/z first, such that // the cached window slides from low to high precursor m/z only once. (Otherwise, the cached entries will need to be swapped // in and out repeatedly, defeating the purpose of caching.) // There is a catch however. To be able to search out of order, one has to keep many mzXML files open, and most systems // have a max file opened limit. In such case, we will need to divide the mzXML files into smaller batches. The library will // will need to be read numBatches times, but the tradeoff is we won't need to keep all entries cached in memory by selecting // the indexCacheAll option. // Divide the mzXML files into equal batches of at most MAX_NUM_OPEN_FILES files. unsigned int numBatches = ((unsigned int)m_searchFileNames.size() - 1) / MAX_NUM_OPEN_FILES + 1; unsigned int batchStart = 0; for (unsigned int b = 0; b < numBatches; b++) { m_batchBoundaries.push_back(batchStart); batchStart += (unsigned int)m_searchFileNames.size() / numBatches; } m_batchBoundaries.push_back((unsigned int)m_searchFileNames.size()); // For each batch, sort all spectra by precursor m/z, open the files, and set the search in motion for (unsigned int batch = 0; batch < (unsigned int)m_batchBoundaries.size() - 1; batch++) { // this will do the sorting. the vector m_scans will be populated with the sorted scans. prepareSortedSearch((unsigned int)batch); // open the output files and print the headers (e.g. the xml definitions, ms run info, etc) for (unsigned int n = m_batchBoundaries[batch]; n < m_batchBoundaries[batch + 1]; n++) { m_outputs[n]->openFile(); m_outputs[n]->printHeader(); } // tracking search progress ProgressCount pc(!g_quiet && !g_verbose, 1, (int)(m_scans.size())); string msg("Searching"); pc.start(msg); // create searches from the m_scans one-by-one, and search them for (vector<pair<unsigned int, rampScanInfo*> >::iterator i = m_scans.begin(); i != m_scans.end(); i++) { searchOne((*i).first, (*i).second); pc.increment(); // done. we can delete the rampScanInfo object now. delete (*i).second; } pc.done(); // log the search of the batch stringstream searchLogss; searchLogss << "Searched sorted scans "; searchLogss << "(Max " << m_numScansInFile << " scans; " << m_numSearchedInFile << " searched, "; searchLogss << m_numLikelyGoodInFile << " likely good; "; if (m_numNotSelectedInFile > 0) { searchLogss << m_numNotSelectedInFile << " not selected; "; } searchLogss << m_numFailedFilterInFile << " failed filter; " << m_numMissingInFile << " missing; " << m_numMS1InFile << " MS1)"; g_log->log("MZXML SEARCH", searchLogss.str()); // done with this batch. close the files so that we can open more files in the next batch for (unsigned int n = m_batchBoundaries[batch]; n < m_batchBoundaries[batch + 1]; n++) { delete (m_files[n].second); // the cramp objects, which will close the mzXML files m_files[n].second = NULL; m_outputs[n]->printFooter(); // the output files m_outputs[n]->closeFile(); } } } else { // This is the case where we're caching everything anyway. In this case, it is not necessary to sort // by precursor m/z before searching. We simply open the files one by one and search the queries // in the order they are read. for (unsigned int n = 0; n < (unsigned int)m_searchFileNames.size(); n++) { // open the file using cRamp cRamp* cramp = new cRamp(m_searchFileNames[n].c_str()); if (!cramp->OK()) { g_log->error("MZXML SEARCH", "Cannot open file \"" + m_searchFileNames[n] + "\". File skipped."); delete (cramp); continue; } // Read the run info to extract the number of scans rampRunInfo* runInfo = cramp->getRunInfo(); if (!runInfo) { // probably an empty file... g_log->error("MZXML SEARCH", "Cannot open file \"" + m_searchFileNames[n] + "\". File skipped."); delete (cramp); continue; } rampInstrumentInfo* instr = cramp->getInstrumentInfo(); if (instr) { m_outputs[n]->setInstrInfo(instr); // delete (instr); } // open the output file m_outputs[n]->openFile(); m_outputs[n]->printHeader(); int numScans = cramp->getLastScan(); delete (runInfo); // parse out the file name to determine the query prefix. Note that the query string // has the form <mzXML file name>.<scan num>.<scan num>.0 FileName fn; parseFileName(m_searchFileNames[n], fn); // m_files is a vector of (FileName, cRamp*) m_files[n].first = fn; m_files[n].second = cramp; ProgressCount pc(!g_quiet && !g_verbose, 1, numScans); stringstream msg; msg << "Searching \"" << m_searchFileNames[n] << "\" " << "(" << n + 1 << " of " << m_searchFileNames.size() << ")"; pc.start(msg.str()); m_numScansInFile = numScans; m_numNotSelectedInFile = 0; m_numMissingInFile = 0; m_numMS1InFile = 0; m_numFailedFilterInFile = 0; m_numSearchedInFile = 0; m_numLikelyGoodInFile = 0; for (int k = 1; k <= numScans; k++) { pc.increment(); // Filter out all scans not in selected list (in this case, // the selected list contains a list of scan numbers as strings if (!m_searchAll && !isInSelectedList(SpectraSTQuery::constructQueryName(fn.name, k, 0))) { m_numNotSelectedInFile++; continue; } // get the scan header (no peak list) first to check whether it's MS2. // it'd be a waste of time if we read all scans, including MS1 rampScanInfo* scanInfo = cramp->getScanHeaderInfo(k); // check to make sure the scan is good, and is not MS1 if (!scanInfo || (!m_isMzData && scanInfo->m_data.acquisitionNum != k)) { m_numMissingInFile++; if (scanInfo) delete (scanInfo); continue; } if (scanInfo->m_data.msLevel == 1) { m_numMS1InFile++; delete (scanInfo); continue; } // now we can search searchOne(n, scanInfo); // done, can delete scanInfo delete scanInfo; } pc.done(); // log the search of this file stringstream searchLogss; searchLogss << "Searched \"" << m_searchFileNames[n] + "\" "; searchLogss << "(Max " << m_numScansInFile << " scans; " << m_numSearchedInFile << " searched, "; searchLogss << m_numLikelyGoodInFile << " likely good; "; if (m_numNotSelectedInFile > 0) { searchLogss << m_numNotSelectedInFile << " not selected; "; } searchLogss << m_numFailedFilterInFile << " failed filter; " << m_numMissingInFile << " missing; " << m_numMS1InFile << " MS1)"; g_log->log("MZXML SEARCH", searchLogss.str()); // we can delete the cRamp object now that we're done with this file. // this is in contrast to the case where we're opening all the files at once for // sorting -- in that case the cRamp objects will be deleted at the end of all // searches delete (m_files[n].second); m_files[n].second = NULL; m_outputs[n]->printFooter(); m_outputs[n]->closeFile(); // just so we won't hit the File Open limit if there are too many files } } m_searchTaskStats.logStats(); }