const Genome::Contig *Genome::getContigForRead(unsigned location, unsigned readLength, unsigned *extraBasesClippedBefore) const { const Contig *contig = getContigAtLocation(location); // // Sometimes, a read aligns before the beginning of a chromosome (imagine prepending a few bases to the read). // In that case, we want to handle it by soft-clipping the bases off of the beginning of the read. We detect it // here by looking to see if the aligned location plus the read length crosses a contig boundary. It also might // happen that it is aligned before the first contig, in which case contig will be NULL. // if (NULL == contig || location + readLength > contig->beginningOffset + contig->length) { // // We should never align over the end of a chromosome, only before the beginning. So move this into the next // chromosome. // contig = getNextContigAfterLocation(location); _ASSERT(NULL != contig); _ASSERT(contig->beginningOffset > location && contig->beginningOffset < location + readLength); *extraBasesClippedBefore = contig->beginningOffset - location; } else { *extraBasesClippedBefore = 0; } return contig; }
// // Makes a copy of a Genome, but with only one of the sex chromosomes. // // The fate of the mitochondrion is that of the X chromosome. // Genome * Genome::copy(bool copyX, bool copyY, bool copyM) const { Genome *newCopy = new Genome(getCountOfBases(),getCountOfBases(), chromosomePadding); if (NULL == newCopy) { WriteErrorMessage("Genome::copy: failed to allocate space for copy.\n"); return NULL; } const Genome::Contig *currentContig = NULL; const Genome::Contig *nextContig = getContigAtLocation(0); unsigned offsetInReference = 0; while (offsetInReference < getCountOfBases()) { if (NULL != nextContig && offsetInReference >= nextContig->beginningOffset) { // // Start of a new contig. See if we want to skip it. // currentContig = nextContig; nextContig = getNextContigAfterLocation(offsetInReference + 1); if ((!copyX && !strcmp(currentContig->name,"chrX")) || (!copyY && !strcmp(currentContig->name,"chrY")) || (!copyM && !strcmp(currentContig->name,"chrM"))) { // // Yes, skip over this contig. // nextContig = getNextContigAfterLocation(offsetInReference + 1); if (NULL == nextContig) { // // The chromosome that we're skipping was the last one, so we're done. // break; } else { offsetInReference = nextContig->beginningOffset; continue; } } // If skipping this chromosome newCopy->startContig(currentContig->name); } // If new contig beginning const size_t maxCopySize = 10000; char dataBuffer[maxCopySize + 1]; unsigned amountToCopy = maxCopySize; if (nextContig && nextContig->beginningOffset < offsetInReference + amountToCopy) { amountToCopy = nextContig->beginningOffset - offsetInReference; } if (getCountOfBases() < offsetInReference + amountToCopy) { amountToCopy = getCountOfBases() - offsetInReference; } memcpy(dataBuffer,getSubstring(offsetInReference,amountToCopy), amountToCopy); dataBuffer[amountToCopy] = '\0'; newCopy->addData(dataBuffer); offsetInReference += amountToCopy; } newCopy->fillInContigLengths(); newCopy->sortContigsByName(); return newCopy; }
int Genome::getContigNumAtLocation(GenomeLocation location) const { const Contig *contig = getContigAtLocation(location); return (int)(contig - contigs); }