int Initialize(string ccsBasFileName, const H5::FileAccPropList & fileAccPropList = H5::FileAccPropList::DEFAULT) { // // Open the file and initialize for reading reads. // // // First, initialize for reading the unrolled bases from this // file. // if (this->T_HDFBasReader<T_Sequence>::Initialize(ccsBasFileName, fileAccPropList) == 0) { cout << "ERROR, Could not initialize ccs file " << ccsBasFileName << endl; exit(1); } if (this->pulseDataGroup.ContainsObject("ConsensusBaseCalls") and ccsGroup.Initialize(this->hdfBasFile, "PulseData/ConsensusBaseCalls") == 0) { cout << "ERROR, attempting to read cicular consensus data from '" << ccsBasFileName << "', which does not contain a ConsensusBaseCalls field." << endl; cout << "Check HDF file structure." << endl; exit(1); } curPassPos = 0; int passesSuccess = 1; if (ccsGroup.ContainsObject("Passes") == 0) { passesSuccess = 0; } else { if (passesGroup.Initialize(ccsGroup.group,"Passes") == 0) { passesSuccess = 0; } } if (passesSuccess == 0) { cout <<"ERROR, attempting to read circular consensus group Passes but it does not exist. " << endl; cout <<"Check HDF file structure."<<endl; exit(1); } // // Initialize the bas reader to read ccs reads as normal bas reads. // // Next, the location of the bases is in a non-standard group. ccsBasReader.baseCallsGroupName = "ConsensusBaseCalls"; // // Read in the CCS fields that are the same as the base fields, // but in a different group. // // ccsBasReader.OpenHDFFile(ccsBasFileName); // // Initialize the fields that are read. // ccsBasReader.IncludeField("Basecall"); ccsBasReader.IncludeField("InsertionQV"); ccsBasReader.IncludeField("DeletionQV"); ccsBasReader.IncludeField("DeletionTag"); ccsBasReader.IncludeField("SubstitutionQV"); ccsBasReader.IncludeField("SubstitutionTag"); ccsBasReader.IncludeField("QualityValue"); // // Initialize this without opening a file. // ccsBasReader.Initialize(&this->rootGroup); //ccsBasReader.InitializeForReadingPulseInformation(); //ccsBasReader.LoadRunInfo(); /* * Initialize pass information for reading. */ if (this->InitializeField(passesGroup, "AdapterHitAfter", adapterHitAfterArray, this->includedFields["AdapterHitAfter"]) == 0) return 0; if (this->InitializeField(passesGroup, "AdapterHitBefore", adapterHitBeforeArray, this->includedFields["AdapterHitBefore"]) == 0) return 0; if (this->InitializeField(passesGroup, "NumPasses", numPassesArray, this->includedFields["NumPasses"]) == 0) return 0; if (this->InitializeField(passesGroup, "PassDirection", passDirectionArray, this->includedFields["PassDirection"]) == 0) return 0; if (this->InitializeField(passesGroup, "PassNumBases", passNumBasesArray, this->includedFields["PassNumBases"]) == 0) return 0; if (this->InitializeField(passesGroup, "PassStartBase", passStartBaseArray, this->includedFields["PassStartBase"]) == 0) return 0; // // The following two fields are not critical. // this->InitializeField(passesGroup, "PassStartPulse", passStartPulseArray, this->includedFields["PassStartPulse"]); this->InitializeField(passesGroup, "PassNumPulses", passNumPulsesArray, this->includedFields["PassNumPulses"]); // // The zmw reader contains the group that hols all pass information // zmwReader.Initialize(&ccsBasReader.baseCallsGroup); return 1; }
int main(int argc, char* argv[]) { string inFileName, outFileName; if (argc < 3) { PrintUsage(); exit(1); } inFileName = argv[1]; outFileName = argv[2]; vector<int> readIndices; int argi = 3; vector<string> patterns; vector<int> holeNumbers; string regionTableFileName = ""; int from = 0, to = 0; while (argi < argc) { if (strlen(argv[argi]) > 0 and argv[argi][0] == '-'){ if (strcmp(argv[argi], "-pat") == 0) { patterns.push_back(argv[++argi]); } else if (strcmp(argv[argi], "-holenumber") == 0) { holeNumbers.push_back(atoi(argv[++argi])); } else if (strcmp(argv[argi], "-regionTable") == 0) { regionTableFileName = argv[++argi]; } else if (strcmp(argv[argi], "-fromto") == 0) { from = atoi(argv[++argi]); to = atoi(argv[++argi]); if (from >= to) { cout <<"ERROR. From must be less than to." << endl; exit(0); } } else { cout <<"Error. Bad option " << argv[argi] << endl; PrintUsage(); exit(1); } } else { readIndices.push_back(atoi(argv[argi])); } ++argi; } int index; for (index = from; index < to; index++) { readIndices.push_back(index); } std::sort(readIndices.begin(), readIndices.end()); T_HDFBasReader<SMRTSequence> reader; HDFRegionTableReader regionReader; HDFBasWriter writer; HDFRegionTableWriter regionWriter; reader.InitializeDefaultIncludedFields(); writer.InitializeDefaultIncludedFields(); writer.IncludeField("HoleNumber"); writer.IncludeField("HoleXY"); vector<string> inFiles; FileOfFileNames::StoreFileOrFileList(inFileName, inFiles); inFileName = inFiles[0]; reader.Initialize(inFileName); RegionTable regionTable; if (regionTableFileName != "") { regionReader.Initialize(regionTableFileName); } else { regionReader.Initialize(inFileName); } regionReader.ReadTable(regionTable); string changeListID; reader.GetChangeListID(changeListID); if (reader.scanDataReader.GetPlatformId() == AstroPlatform) { writer.Initialize(outFileName, reader.GetMovieName(), reader.GetRunCode()); } else { writer.Initialize(outFileName, reader.GetMovieName(), changeListID); } regionWriter.Initialize(writer.pulseDataGroup); int ri; int curReadIndex = 0; SMRTSequence seq; bool printSeq = false; ri = 0; if (readIndices.size() > 0) { reader.PrepareForRandomAccess(); for (ri = 0; ri < readIndices.size(); ri++) { reader.GetReadAt(readIndices[ri], seq); writer.Write(seq); // // Write out region information for the read. // int low, high; FindRegionIndices(readIndices[ri], ®ionTable, low, high); int regionIndex; for (regionIndex = low; regionIndex < high; regionIndex++) { regionWriter.Write(regionTable.table[regionIndex]); } } regionWriter.Finalize(regionTable.columnNames, regionTable.regionTypes, regionTable.regionDescriptions, regionTable.regionSources ); } else if (patterns.size() > 0) { while (reader.GetNext(seq)) { printSeq = false; if (curReadIndex < readIndices.size() and ri == readIndices[curReadIndex]) { ++curReadIndex; printSeq = true; } int p; for (p = 0; p < patterns.size(); p++) { if (ExactPatternMatch(seq.title, patterns[p])) { printSeq = true; break; } } for (p = 0; p < holeNumbers.size(); p++) { if (seq.holeNumber == holeNumbers[p]) { printSeq = true; break; } } if (printSeq) { cout << "writing " << seq.title << endl; writer.Write(seq); } ++ri; } } writer.Flush(); }
int GetNext(T_Sequence &ccsSequence) { // // Read in all ccs pass data. // ccsSequence.Free(); int retVal = 0; if (this->curRead == ccsBasReader.nReads) { return 0; } if (this->curBasePos == ccsBasReader.nBases) { return 0; } try { UInt numPasses; numPassesArray.Read(this->curRead, this->curRead+1, &numPasses); if (numPasses > 0) { // Read in the ccs bases if ((retVal = ccsBasReader.GetNext((SMRTSequence&)ccsSequence)) == 0) return 0; ccsSequence.numPasses = numPasses; if (this->includedFields["AdapterHitAfter"]) { ccsSequence.adapterHitAfter.resize(ccsSequence.numPasses); adapterHitAfterArray.Read(curPassPos, curPassPos + ccsSequence.numPasses, &ccsSequence.adapterHitAfter[0]); } if (this->includedFields["AdapterHitBefore"]) { ccsSequence.adapterHitBefore.resize(ccsSequence.numPasses); adapterHitBeforeArray.Read(curPassPos, curPassPos + ccsSequence.numPasses, &ccsSequence.adapterHitBefore[0]); } if (this->includedFields["PassDirection"]) { ccsSequence.passDirection.resize(ccsSequence.numPasses); passDirectionArray.Read(curPassPos, curPassPos + ccsSequence.numPasses, &ccsSequence.passDirection[0]); } if (this->includedFields["PassNumBases"]) { ccsSequence.passNumBases.resize(ccsSequence.numPasses); passNumBasesArray.Read(curPassPos, curPassPos + ccsSequence.numPasses, &ccsSequence.passNumBases[0]); } if (this->includedFields["PassStartBase"]) { ccsSequence.passStartBase.resize(ccsSequence.numPasses); passStartBaseArray.Read(curPassPos, curPassPos + ccsSequence.numPasses, &ccsSequence.passStartBase[0]); } if (this->includedFields["PassStartPulse"]) { ccsSequence.passStartPulse.resize(ccsSequence.numPasses); passStartPulseArray.Read(curPassPos, curPassPos + ccsSequence.numPasses, &ccsSequence.passStartPulse[0]); } if (this->includedFields["PassNumPulses"]) { ccsSequence.passNumPulses.resize(ccsSequence.numPasses); passNumPulsesArray.Read(curPassPos, curPassPos + ccsSequence.numPasses, &ccsSequence.passNumPulses[0]); } curPassPos += ccsSequence.numPasses; } else { // advance a read in the ccs sequence without advancing positions. ccsBasReader.curRead++; } // // Regardless whether or not a ccs read was called, read the next // unrolled read, since an unrolled read is called for each zmw. // retVal = ((T_HDFBasReader<SMRTSequence>*)this)->GetNext(ccsSequence.unrolledRead); ccsSequence.zmwData = ccsSequence.unrolledRead.zmwData; ccsSequence.CopyTitle(ccsSequence.unrolledRead.title); string newTitle = string(ccsSequence.title) + string("/ccs"); ccsSequence.CopyTitle(newTitle.c_str()); } catch (H5::DataSetIException e) { cout << "ERROR, could not read ccs data for CCS Sequence " << ccsSequence.unrolledRead.title << endl; exit(1); } // cout << "title: " << ccsSequence.title << endl; if (retVal == 0) { return 0; } else { return 1; } }