// Read a record from the currently opened file. bool SamFile::ReadRecord(SamFileHeader& header, SamRecord& record) { myStatus = SamStatus::SUCCESS; if(myIsOpenForRead == false) { // File is not open for read myStatus.setStatus(SamStatus::FAIL_ORDER, "Cannot read record since the file is not open for reading"); throw(std::runtime_error("SOFTWARE BUG: trying to read a SAM/BAM record prior to opening the file.")); return(false); } if(myHasHeader == false) { // The header has not yet been read. // TODO - maybe just read the header. myStatus.setStatus(SamStatus::FAIL_ORDER, "Cannot read record since the header has not been read."); throw(std::runtime_error("SOFTWARE BUG: trying to read a SAM/BAM record prior to reading the header.")); return(false); } // Check to see if a new region has been set. If so, determine the // chunks for that region. if(myNewSection) { if(!processNewSection(header)) { // Failed processing a new section. Could be an // order issue like the file not being open or the // indexed file not having been read. // processNewSection sets myStatus with the failure reason. return(false); } } // Read until a record is not successfully read or there are no more // requested records. while(myStatus == SamStatus::SUCCESS) { record.setReference(myRefPtr); record.setSequenceTranslation(myReadTranslation); // If reading by index, this method will setup to ensure it is in // the correct position for the next record (if not already there). // Sets myStatus if it could not move to a good section. // Just returns true if it is not setup to read by index. if(!ensureIndexedReadPosition()) { // Either there are no more records in the section // or it failed to move to the right section, so there // is nothing more to read, stop looping. break; } // File is open for reading and the header has been read, so read the // next record. myInterfacePtr->readRecord(myFilePtr, header, record, myStatus); if(myStatus != SamStatus::SUCCESS) { // Failed to read the record, so break out of the loop. break; } // Successfully read a record, so check if we should filter it. // First check if it is out of the section. Returns true // if not reading by sections, returns false if the record // is outside of the section. Sets status to NO_MORE_RECS if // there is nothing left ot read in the section. if(!checkRecordInSection(record)) { // The record is not in the section. // The while loop will detect if NO_MORE_RECS was set. continue; } // Check the flag for required/excluded flags. uint16_t flag = record.getFlag(); if((flag & myRequiredFlags) != myRequiredFlags) { // The record does not conatain all required flags, so // continue looking. continue; } if((flag & myExcludedFlags) != 0) { // The record contains an excluded flag, so continue looking. continue; } //increment the record count. myRecordCount++; if(myStatistics != NULL) { // Statistics should be updated. myStatistics->updateStatistics(record); } // Successfully read the record, so check the sort order. if(!validateSortOrder(record, header)) { // ValidateSortOrder sets the status on a failure. return(false); } return(true); } // End while loop that checks if a desired record is found or failure. // Return true if a record was found. return(myStatus == SamStatus::SUCCESS); }
bool VcfFileReader::readRecord(VcfRecord& record, VcfSubsetSamples* subset) { myStatus = StatGenStatus::SUCCESS; // Subset the read if there are subsets specified. VcfSubsetSamples* subsetPtr = subset; if((subsetPtr == NULL) && myUseSubset) { subsetPtr = &mySampleSubset; } // Check to see if a new region has been set. If so, setup for that region. bool searchChrom = false; if(myNewSection) { if(myVcfIndex != NULL) { // Have an index file so use if(!processNewSection()) { // processNewSection sets the status appropriately on failure. return(false); } } else if(myTotalRead == 0) { // ReadSection without an index only works if no records // have been read. searchChrom = true; myNewSection = false; } else { myNewSection = false; myStatus.setStatus(StatGenStatus::FAIL_ORDER, "Cannot set read section with no index after reading records"); return(false); } } // Keep looping until a desired record is found. bool recordFound = false; while(!recordFound) { if(!record.read(myFilePtr, mySiteOnly, myRecordDiscardRules, subsetPtr)) { myStatus = record.getStatus(); myTotalRead += myRecordDiscardRules.getNumDiscarded(); myNumRecords += myRecordDiscardRules.getNumDiscarded(); myRecordDiscardRules.clearNumDiscarded(); return(false); } ++myTotalRead; myTotalRead += myRecordDiscardRules.getNumDiscarded(); // Check to see if the record is in the section. // First check the chromosome. if(!mySectionChrom.empty() && (mySectionChrom != record.getChromStr())) { if(searchChrom) { // Still searching for the chromosome, so continue // to the next record. continue; } // Record is not within the correct chromosome, so return failure. myStatus = StatGenStatus::NO_MORE_RECS; return(false); } searchChrom = false; // Check if the record is after the section end if applicable. if((mySection1BasedEndPos != -1) && (record.get1BasedPosition() >= mySection1BasedEndPos)) { myStatus = StatGenStatus::NO_MORE_RECS; return(false); } // Check if the record is prior to the section start if applicable. // Determinine the VCF record end position. // If we are not requiring overlap, then we only need to check // the start position, but if overlap is required, then it needs // to incrment the start by the length-1. int numIncBases = 0; if(mySectionOverlap) { // The VCF record end position is the start position + length of the // reference string - 1. numIncBases = record.getNumRefBases() - 1; } if((mySection1BasedStartPos != -1) && ((record.get1BasedPosition() + numIncBases) < mySection1BasedStartPos)) { // This record is prior to the section, so keep reading. continue; } ++myNumRecords; myNumRecords += myRecordDiscardRules.getNumDiscarded(); myRecordDiscardRules.clearNumDiscarded(); // Record successfully read, so check to see if it is discarded. if((myDiscardRules & DISCARD_NON_PHASED) && !record.allPhased()) { // Not all samples are phased, so discard this record. continue; } if((myDiscardRules & DISCARD_MISSING_GT) && !record.hasAllGenotypeAlleles()) { // discard missing GTs and this record had missing alleles, // so keep reading. continue; } if((myDiscardRules & DISCARD_FILTERED) && !(record.getFilter().passedAllFilters())) { // Record was filtered, so discard it. continue; } if((myDiscardRules & DISCARD_MULTIPLE_ALTS) && (record.getNumAlts() > 1)) { // Record had multiple alternates, so discard. continue; } // Check allele counts for discarding. if(myMinAltAlleleCount != UNSET_MIN_ALT_ALLELE_COUNT) { // Count the number of alternates. int32_t altCount = 0; for(int sampleNum = 0; sampleNum < record.getNumSamples(); sampleNum++) { if((myAltAlleleCountSubset != NULL) && !(myAltAlleleCountSubset->keep(sampleNum))) { // Skip this sample. continue; } for(int gtNum = 0; gtNum < record.getNumGTs(sampleNum); gtNum++) { if(record.getGT(sampleNum, gtNum) > 0) { // Alternate, so increment the count. ++altCount; } } } if(altCount < myMinAltAlleleCount) { // Not enough alternates so continue to the next sample. continue; } } // Check to see if the minimum alternate allele count is met. if(myMinMinorAlleleCount != UNSET_MIN_MINOR_ALLELE_COUNT) { // Get the number of possible alternates. unsigned int numAlts = record.getNumAlts(); // Verify that each allele has the min count. bool failMinorAlleleCount = false; for(unsigned int i = 0; i <= numAlts; i++) { if(record.getAlleleCount(i, myMinorAlleleCountSubset) < myMinMinorAlleleCount) { // Not enough of one gt, so not ok. failMinorAlleleCount = true; break; } } if(failMinorAlleleCount) { // not enough alleles, so continue to the next record. continue; } } // Record was not discarded. recordFound = true; } // Increment the number of kept records. ++myNumKeptRecords; return(true); }