Esempio n. 1
0
const Genome::Contig *Genome::getContigForRead(unsigned location, unsigned readLength, unsigned *extraBasesClippedBefore) const 
{
    const Contig *contig = getContigAtLocation(location);

    //
    // Sometimes, a read aligns before the beginning of a chromosome (imagine prepending a few bases to the read).
    // In that case, we want to handle it by soft-clipping the bases off of the beginning of the read.  We detect it
    // here by looking to see if the aligned location plus the read length crosses a contig boundary.  It also might
    // happen that it is aligned before the first contig, in which case contig will be NULL.
    //
     if (NULL == contig || location + readLength > contig->beginningOffset + contig->length) {
        //
        // We should never align over the end of a chromosome, only before the beginning.  So move this into the next
        // chromosome.
        //
        contig = getNextContigAfterLocation(location);
        _ASSERT(NULL != contig);
        _ASSERT(contig->beginningOffset > location && contig->beginningOffset < location + readLength);
        *extraBasesClippedBefore = contig->beginningOffset - location;
    } else {
        *extraBasesClippedBefore = 0;
    }

    return contig;
}
Esempio n. 2
0
//
// Makes a copy of a Genome, but with only one of the sex chromosomes.
//
// The fate of the mitochondrion is that of the X chromosome.
//
    Genome *
Genome::copy(bool copyX, bool copyY, bool copyM) const
{
    Genome *newCopy = new Genome(getCountOfBases(),getCountOfBases(), chromosomePadding);

    if (NULL == newCopy) {
        WriteErrorMessage("Genome::copy: failed to allocate space for copy.\n");
        return NULL;
    }

    const Genome::Contig *currentContig = NULL;
    const Genome::Contig *nextContig = getContigAtLocation(0);

    unsigned offsetInReference = 0;
    while (offsetInReference < getCountOfBases()) {
        if (NULL != nextContig && offsetInReference >= nextContig->beginningOffset) {
            //
            // Start of a new contig.  See if we want to skip it.
            //
            currentContig = nextContig;
            nextContig = getNextContigAfterLocation(offsetInReference + 1);
            if ((!copyX && !strcmp(currentContig->name,"chrX")) ||
                (!copyY && !strcmp(currentContig->name,"chrY")) ||
                (!copyM && !strcmp(currentContig->name,"chrM"))) {
                //
                // Yes, skip over this contig.
                //
                nextContig = getNextContigAfterLocation(offsetInReference + 1);
                if (NULL == nextContig) {
                    //
                    // The chromosome that we're skipping was the last one, so we're done.
                    //
                    break;
                } else {
                    offsetInReference = nextContig->beginningOffset;
                    continue;
                }
            } // If skipping this chromosome

            newCopy->startContig(currentContig->name);
        } // If new contig beginning

        const size_t maxCopySize = 10000;
        char dataBuffer[maxCopySize + 1];

        unsigned amountToCopy = maxCopySize;
        if (nextContig && nextContig->beginningOffset < offsetInReference + amountToCopy) {
            amountToCopy = nextContig->beginningOffset - offsetInReference;
        }

        if (getCountOfBases() < offsetInReference + amountToCopy) {
            amountToCopy = getCountOfBases() - offsetInReference;
        }

        memcpy(dataBuffer,getSubstring(offsetInReference,amountToCopy), amountToCopy);
        dataBuffer[amountToCopy] = '\0';

        newCopy->addData(dataBuffer);

        offsetInReference += amountToCopy;
    }

    newCopy->fillInContigLengths();
    newCopy->sortContigsByName();
    return newCopy;
}
Esempio n. 3
0
    int
Genome::getContigNumAtLocation(GenomeLocation location) const
{
    const Contig *contig = getContigAtLocation(location);
    return (int)(contig - contigs);
}