Ejemplo n.º 1
0
	int Initialize(string ccsBasFileName,
            const H5::FileAccPropList & fileAccPropList = H5::FileAccPropList::DEFAULT) {
		//
		// Open the file and initialize for reading reads.
		//

		// 
		// First, initialize for reading the unrolled bases from this
		// file.
		//
	  if (this->T_HDFBasReader<T_Sequence>::Initialize(ccsBasFileName, fileAccPropList) == 0) {
        cout << "ERROR, Could not initialize ccs file " << ccsBasFileName << endl;
        exit(1);
      }

		if (this->pulseDataGroup.ContainsObject("ConsensusBaseCalls") and
				ccsGroup.Initialize(this->hdfBasFile, "PulseData/ConsensusBaseCalls") == 0) {
			cout << "ERROR, attempting to read cicular consensus data from '" << ccsBasFileName 
					 << "', which does not contain a ConsensusBaseCalls field." << endl;
			cout << "Check HDF file structure." << endl;
			exit(1);
		}
		curPassPos = 0;
		int passesSuccess = 1;
		if (ccsGroup.ContainsObject("Passes") == 0) { 
			passesSuccess = 0;
		}
		else {
			if (passesGroup.Initialize(ccsGroup.group,"Passes") == 0) {
				passesSuccess = 0;
			}
		}

		if (passesSuccess == 0) {
			cout <<"ERROR, attempting to read circular consensus group Passes but it does not exist. " << endl;
			cout <<"Check HDF file structure."<<endl;
			exit(1);
		}
		
		//
		// Initialize the bas reader to read ccs reads as normal bas reads.
		//
		
		// Next, the location of the bases is in a non-standard group.
		ccsBasReader.baseCallsGroupName = "ConsensusBaseCalls";


		//
		// Read in the CCS fields that are the same as the base fields,
		// but in a different group.
		//

		//		ccsBasReader.OpenHDFFile(ccsBasFileName);
		
		//
		// Initialize the fields that are read.
		//
		ccsBasReader.IncludeField("Basecall");
		ccsBasReader.IncludeField("InsertionQV");
		ccsBasReader.IncludeField("DeletionQV");
		ccsBasReader.IncludeField("DeletionTag");
		ccsBasReader.IncludeField("SubstitutionQV");
		ccsBasReader.IncludeField("SubstitutionTag");
		ccsBasReader.IncludeField("QualityValue");
		//
		// Initialize this without opening a file.
		//
		ccsBasReader.Initialize(&this->rootGroup);
		//ccsBasReader.InitializeForReadingPulseInformation();
		//ccsBasReader.LoadRunInfo();
		/*
		 * Initialize pass information for reading.
		 */
		if (this->InitializeField(passesGroup, "AdapterHitAfter", adapterHitAfterArray, this->includedFields["AdapterHitAfter"]) == 0) return 0;
		if (this->InitializeField(passesGroup, "AdapterHitBefore", adapterHitBeforeArray, this->includedFields["AdapterHitBefore"]) == 0) return 0;
		if (this->InitializeField(passesGroup, "NumPasses", numPassesArray, this->includedFields["NumPasses"]) == 0) return 0;
		if (this->InitializeField(passesGroup, "PassDirection", passDirectionArray, this->includedFields["PassDirection"]) == 0) return 0;
		if (this->InitializeField(passesGroup, "PassNumBases", passNumBasesArray, this->includedFields["PassNumBases"]) == 0) return 0;
		if (this->InitializeField(passesGroup, "PassStartBase", passStartBaseArray, this->includedFields["PassStartBase"]) == 0) return 0;
		//
		// The following two fields are not critical.
		//
		this->InitializeField(passesGroup, "PassStartPulse", passStartPulseArray, this->includedFields["PassStartPulse"]);
		this->InitializeField(passesGroup, "PassNumPulses", passNumPulsesArray, this->includedFields["PassNumPulses"]);

		//
		// The zmw reader contains the group that hols all pass information
		//
		zmwReader.Initialize(&ccsBasReader.baseCallsGroup);
			
		return 1;
	}
Ejemplo n.º 2
0
int main(int argc, char* argv[]) {
	
	string inFileName, outFileName;

	if (argc < 3) {
		PrintUsage();
		exit(1);
	}
	inFileName  = argv[1];
	outFileName = argv[2];

	vector<int> readIndices;
	int argi = 3;
	vector<string> patterns;
	vector<int> holeNumbers;
  string regionTableFileName = "";
  int from = 0, to = 0;
	while (argi < argc) {
		if (strlen(argv[argi]) > 0 and argv[argi][0] == '-'){ 
			if (strcmp(argv[argi], "-pat") == 0) {
				patterns.push_back(argv[++argi]);
			}
			else if (strcmp(argv[argi], "-holenumber") == 0) {
				holeNumbers.push_back(atoi(argv[++argi]));
			}
			else if (strcmp(argv[argi], "-regionTable") == 0) {
				regionTableFileName = argv[++argi];
			}
      else if (strcmp(argv[argi], "-fromto") == 0) {
        from = atoi(argv[++argi]);
        to   = atoi(argv[++argi]);
        if (from >= to) {
          cout <<"ERROR. From must be less than to." << endl;
          exit(0);
        }
      }
      else {
        cout <<"Error. Bad option " << argv[argi] << endl;
        PrintUsage();
        exit(1);
      }
		}
		else {
			readIndices.push_back(atoi(argv[argi]));
		}
		++argi;
	}
  int index;
  for (index = from; index < to; index++) {
    readIndices.push_back(index);
  }
	std::sort(readIndices.begin(), readIndices.end());
	T_HDFBasReader<SMRTSequence> reader;
  HDFRegionTableReader regionReader;

	HDFBasWriter writer;
  HDFRegionTableWriter regionWriter;
	reader.InitializeDefaultIncludedFields();
	writer.InitializeDefaultIncludedFields();
	writer.IncludeField("HoleNumber");
	writer.IncludeField("HoleXY");

  vector<string> inFiles;
  FileOfFileNames::StoreFileOrFileList(inFileName, inFiles);
  inFileName = inFiles[0];
	reader.Initialize(inFileName);
  RegionTable regionTable;
  if (regionTableFileName != "") {
    regionReader.Initialize(regionTableFileName);
  }
  else {
    regionReader.Initialize(inFileName);
  }
  regionReader.ReadTable(regionTable);
  
  string changeListID;
  reader.GetChangeListID(changeListID);
  
	if (reader.scanDataReader.GetPlatformId() == AstroPlatform) {
		writer.Initialize(outFileName, reader.GetMovieName(), reader.GetRunCode());
	}
	else {
		writer.Initialize(outFileName, reader.GetMovieName(), changeListID);
	}
  regionWriter.Initialize(writer.pulseDataGroup);
  

	int ri;
	int curReadIndex = 0;
	SMRTSequence seq;
	bool printSeq = false;
	ri = 0;
  if (readIndices.size() > 0) {
    reader.PrepareForRandomAccess();
    for (ri = 0; ri < readIndices.size(); ri++) {
      reader.GetReadAt(readIndices[ri], seq);
      writer.Write(seq);

      //
      // Write out region information for the read.
      //
      int low, high;
      FindRegionIndices(readIndices[ri], &regionTable, low, high);
      int regionIndex;
      for (regionIndex = low; regionIndex < high; regionIndex++) {
        regionWriter.Write(regionTable.table[regionIndex]);
      }
    }
    regionWriter.Finalize(regionTable.columnNames,
                          regionTable.regionTypes, 
                          regionTable.regionDescriptions, 
                          regionTable.regionSources
                          );
  }
  else if (patterns.size() > 0) {
    while (reader.GetNext(seq)) {
      printSeq = false;
      if (curReadIndex < readIndices.size() and ri == readIndices[curReadIndex]) {
        ++curReadIndex;
        printSeq = true;
      }
      int p;
      for (p = 0; p < patterns.size(); p++) {
        if (ExactPatternMatch(seq.title, patterns[p])) {
          printSeq = true;
          break;
        }
      }

      for (p = 0; p < holeNumbers.size(); p++) {
        if (seq.holeNumber == holeNumbers[p]) {
          printSeq = true;
          break;
        }
      }

      if (printSeq) {
        cout << "writing " << seq.title << endl;
        writer.Write(seq);
      }
      ++ri;
    }
  }
  
	writer.Flush();

}
Ejemplo n.º 3
0
	int GetNext(T_Sequence &ccsSequence) {
		//
		// Read in all ccs pass data.
		//

		ccsSequence.Free();
		int retVal = 0;
		if (this->curRead == ccsBasReader.nReads) {
			return 0;
		}
		if (this->curBasePos == ccsBasReader.nBases) {
			return 0;
		}
        try {
        UInt numPasses;
		numPassesArray.Read(this->curRead, this->curRead+1, &numPasses);
		if (numPasses > 0) {
			// Read in the ccs bases
			if ((retVal = ccsBasReader.GetNext((SMRTSequence&)ccsSequence)) == 0)
                return 0;

            ccsSequence.numPasses = numPasses;

			if (this->includedFields["AdapterHitAfter"]) {
				ccsSequence.adapterHitAfter.resize(ccsSequence.numPasses);
				adapterHitAfterArray.Read(curPassPos,  curPassPos + ccsSequence.numPasses, &ccsSequence.adapterHitAfter[0]);
			}
			if (this->includedFields["AdapterHitBefore"]) {
				ccsSequence.adapterHitBefore.resize(ccsSequence.numPasses);
				adapterHitBeforeArray.Read(curPassPos, curPassPos + ccsSequence.numPasses, &ccsSequence.adapterHitBefore[0]);
			}
			if (this->includedFields["PassDirection"]) {
				ccsSequence.passDirection.resize(ccsSequence.numPasses);
				passDirectionArray.Read(curPassPos,    curPassPos + ccsSequence.numPasses, &ccsSequence.passDirection[0]);
			}
			if (this->includedFields["PassNumBases"]) {
				ccsSequence.passNumBases.resize(ccsSequence.numPasses);
				passNumBasesArray.Read(curPassPos,     curPassPos + ccsSequence.numPasses, &ccsSequence.passNumBases[0]);
			}
			if (this->includedFields["PassStartBase"]) {
				ccsSequence.passStartBase.resize(ccsSequence.numPasses);
				passStartBaseArray.Read(curPassPos,    curPassPos + ccsSequence.numPasses, &ccsSequence.passStartBase[0]);
			}
			if (this->includedFields["PassStartPulse"]) {
				ccsSequence.passStartPulse.resize(ccsSequence.numPasses);
				passStartPulseArray.Read(curPassPos,   curPassPos + ccsSequence.numPasses, &ccsSequence.passStartPulse[0]);
			}
			if (this->includedFields["PassNumPulses"]) { 
				ccsSequence.passNumPulses.resize(ccsSequence.numPasses);
				passNumPulsesArray.Read(curPassPos,    curPassPos + ccsSequence.numPasses, &ccsSequence.passNumPulses[0]);			
			}
			curPassPos += ccsSequence.numPasses;
		}
		else {
			// advance a read in the ccs sequence without advancing positions.
			ccsBasReader.curRead++;
		}
		//
		// Regardless whether or not a ccs read was called, read the next
		// unrolled read, since an unrolled read is called for each zmw.
		//
		retVal = ((T_HDFBasReader<SMRTSequence>*)this)->GetNext(ccsSequence.unrolledRead);
        ccsSequence.zmwData = ccsSequence.unrolledRead.zmwData;
		ccsSequence.CopyTitle(ccsSequence.unrolledRead.title);
    string newTitle = string(ccsSequence.title) + string("/ccs");
    ccsSequence.CopyTitle(newTitle.c_str());
        } catch (H5::DataSetIException e) {
            cout << "ERROR, could not read ccs data for CCS Sequence " 
                 << ccsSequence.unrolledRead.title << endl; 
            exit(1);
        }
		//		cout << "title: " << ccsSequence.title << endl;
		if (retVal == 0) {
			return 0;
		}
		else {
			return 1;
		}
	}