Exemple #1
0
// Read a record from the currently opened file.
bool SamFile::ReadRecord(SamFileHeader& header, 
                         SamRecord& record)
{
    myStatus = SamStatus::SUCCESS;

    if(myIsOpenForRead == false)
    {
        // File is not open for read
        myStatus.setStatus(SamStatus::FAIL_ORDER, 
                           "Cannot read record since the file is not open for reading");
        throw(std::runtime_error("SOFTWARE BUG: trying to read a SAM/BAM record prior to opening the file."));
        return(false);
    }

    if(myHasHeader == false)
    {
        // The header has not yet been read.
        // TODO - maybe just read the header.
        myStatus.setStatus(SamStatus::FAIL_ORDER, 
                           "Cannot read record since the header has not been read.");
        throw(std::runtime_error("SOFTWARE BUG: trying to read a SAM/BAM record prior to reading the header."));
        return(false);
    }

    // Check to see if a new region has been set.  If so, determine the
    // chunks for that region.
    if(myNewSection)
    {
        if(!processNewSection(header))
        {
            // Failed processing a new section.  Could be an 
            // order issue like the file not being open or the
            // indexed file not having been read.
            // processNewSection sets myStatus with the failure reason.
            return(false);
        }
    }

    // Read until a record is not successfully read or there are no more
    // requested records.
    while(myStatus == SamStatus::SUCCESS)
    {
        record.setReference(myRefPtr);
        record.setSequenceTranslation(myReadTranslation);

        // If reading by index, this method will setup to ensure it is in
        // the correct position for the next record (if not already there).
        // Sets myStatus if it could not move to a good section.
        // Just returns true if it is not setup to read by index.
        if(!ensureIndexedReadPosition())
        {
            // Either there are no more records in the section
            // or it failed to move to the right section, so there
            // is nothing more to read, stop looping.
            break;
        }
        
        // File is open for reading and the header has been read, so read the
        // next record.
        myInterfacePtr->readRecord(myFilePtr, header, record, myStatus);
        if(myStatus != SamStatus::SUCCESS)
        {
            // Failed to read the record, so break out of the loop.
            break;
        }

        // Successfully read a record, so check if we should filter it.
        // First check if it is out of the section.  Returns true
        // if not reading by sections, returns false if the record
        // is outside of the section.  Sets status to NO_MORE_RECS if
        // there is nothing left ot read in the section.
        if(!checkRecordInSection(record))
        {
            // The record is not in the section.
            // The while loop will detect if NO_MORE_RECS was set.
            continue;
        }

        // Check the flag for required/excluded flags.
        uint16_t flag = record.getFlag();
        if((flag & myRequiredFlags) != myRequiredFlags)
        {
            // The record does not conatain all required flags, so
            // continue looking.
            continue;
        }
        if((flag & myExcludedFlags) != 0)
        {
            // The record contains an excluded flag, so continue looking.
            continue;
        }

        //increment the record count.
        myRecordCount++;
        
        if(myStatistics != NULL)
        {
            // Statistics should be updated.
            myStatistics->updateStatistics(record);
        }
        
        // Successfully read the record, so check the sort order.
        if(!validateSortOrder(record, header))
        {
            // ValidateSortOrder sets the status on a failure.
            return(false);
        }
        return(true);

    } // End while loop that checks if a desired record is found or failure.

    // Return true if a record was found.
    return(myStatus == SamStatus::SUCCESS);
}
Exemple #2
0
bool VcfFileReader::readRecord(VcfRecord& record, VcfSubsetSamples* subset)
{
    myStatus = StatGenStatus::SUCCESS;
    // Subset the read if there are subsets specified.
    VcfSubsetSamples* subsetPtr = subset;
    if((subsetPtr == NULL) && myUseSubset)
    {
        subsetPtr = &mySampleSubset;
    }

    // Check to see if a new region has been set.  If so, setup for that region.
    bool searchChrom = false;
    if(myNewSection)
    {
        if(myVcfIndex != NULL)
        {
            // Have an index file so use
            if(!processNewSection())
            {
                // processNewSection sets the status appropriately on failure.
                return(false);
            }
        }
        else if(myTotalRead == 0)
        {
            // ReadSection without an index only works if no records
            // have been read.
            searchChrom = true;
            myNewSection = false;
        }
        else
        {
            myNewSection = false;
            myStatus.setStatus(StatGenStatus::FAIL_ORDER, 
                               "Cannot set read section with no index after reading records");
            return(false);
        }
    }

    // Keep looping until a desired record is found.
    bool recordFound = false;
    while(!recordFound)
    {
        if(!record.read(myFilePtr, mySiteOnly, myRecordDiscardRules, subsetPtr))
        {
            myStatus = record.getStatus();
            myTotalRead += myRecordDiscardRules.getNumDiscarded();
            myNumRecords += myRecordDiscardRules.getNumDiscarded();
            myRecordDiscardRules.clearNumDiscarded();
            return(false);
        }

        ++myTotalRead;
        myTotalRead += myRecordDiscardRules.getNumDiscarded();

        // Check to see if the record is in the section.
        // First check the chromosome.
        if(!mySectionChrom.empty() && (mySectionChrom != record.getChromStr()))
        {
            if(searchChrom)
            {
                // Still searching for the chromosome, so continue
                // to the next record.
                continue;
            }

            // Record is not within the correct chromosome, so return failure.
            myStatus = StatGenStatus::NO_MORE_RECS;
           return(false);
        }
        searchChrom = false;

        // Check if the record is after the section end if applicable.
        if((mySection1BasedEndPos != -1) && 
           (record.get1BasedPosition() >= mySection1BasedEndPos))
        {
            myStatus = StatGenStatus::NO_MORE_RECS;
            return(false);
        }
        
        // Check if the record is prior to the section start if applicable.
        // Determinine the VCF record end position.
        // If we are not requiring overlap, then we only need to check
        // the start position, but if overlap is required, then it needs
        // to incrment the start by the length-1.
        int numIncBases = 0;
        if(mySectionOverlap)
        {
            // The VCF record end position is the start position + length of the
            // reference string - 1.
            numIncBases = record.getNumRefBases() - 1;
        }
        if((mySection1BasedStartPos != -1) &&
           ((record.get1BasedPosition() + numIncBases)
            < mySection1BasedStartPos))
        {
            // This record is prior to the section, so keep reading.
            continue;
        }

        ++myNumRecords;
        myNumRecords += myRecordDiscardRules.getNumDiscarded();
        myRecordDiscardRules.clearNumDiscarded();
        
        // Record successfully read, so check to see if it is discarded.
        if((myDiscardRules & DISCARD_NON_PHASED) && !record.allPhased())
        {
            // Not all samples are phased, so discard this record.
            continue;
        }
        if((myDiscardRules & DISCARD_MISSING_GT) &&
           !record.hasAllGenotypeAlleles())
        {
            // discard missing GTs and this record had missing alleles,
            // so keep reading.
            continue;
        }
        if((myDiscardRules & DISCARD_FILTERED) && 
           !(record.getFilter().passedAllFilters()))
        {
            // Record was filtered, so discard it.
            continue;
        }
        if((myDiscardRules & DISCARD_MULTIPLE_ALTS) &&
           (record.getNumAlts() > 1))
        {
            // Record had multiple alternates, so discard.
            continue;
        }

        // Check allele counts for discarding.
        if(myMinAltAlleleCount != UNSET_MIN_ALT_ALLELE_COUNT)
        {
            // Count the number of alternates.
            int32_t altCount = 0;
            for(int sampleNum = 0; sampleNum < record.getNumSamples(); 
                sampleNum++)
            {
                if((myAltAlleleCountSubset != NULL) &&
                   !(myAltAlleleCountSubset->keep(sampleNum)))
                {
                    // Skip this sample.
                    continue;
                }
                for(int gtNum = 0; gtNum < record.getNumGTs(sampleNum); gtNum++)
                {
                    if(record.getGT(sampleNum, gtNum) > 0)
                    {
                        // Alternate, so increment the count.
                        ++altCount;
                    }
                }
            }
            if(altCount < myMinAltAlleleCount)
            {
                // Not enough alternates so continue to the next sample.
                continue;
            }
        }

        // Check to see if the minimum alternate allele count is met.
        if(myMinMinorAlleleCount != UNSET_MIN_MINOR_ALLELE_COUNT)
        {
            // Get the number of possible alternates.
            unsigned int numAlts = record.getNumAlts();

            // Verify that each allele has the min count.
            bool failMinorAlleleCount = false;
            for(unsigned int i = 0; i <= numAlts; i++)
            {
                if(record.getAlleleCount(i, myMinorAlleleCountSubset) 
                   < myMinMinorAlleleCount)
                {
                    // Not enough of one gt, so not ok.
                    failMinorAlleleCount = true;
                    break;
                }
            }
            if(failMinorAlleleCount)
            {
                // not enough alleles, so continue to the next record.
                continue;
            }
        }

        // Record was not discarded.
        recordFound = true;
    }

    // Increment the number of kept records.
    ++myNumKeptRecords;
    return(true);
}