// Soft Clip from the beginning of the read to the specified reference position. int32_t CigarHelper::softClipBeginByRefPos(SamRecord& record, int32_t refPosition0Based, CigarRoller& newCigar, int32_t &new0BasedPosition) { newCigar.clear(); Cigar* cigar = record.getCigarInfo(); if(cigar == NULL) { // Failed to get the cigar. ErrorHandler::handleError("Soft clipping, but failed to read the cigar"); return(NO_CLIP); } // No cigar or position in the record, so return no clip. if((cigar->size() == 0) || (record.get0BasedPosition() == -1)) { return(NO_CLIP); } // Check to see if the reference position occurs before the record starts, // if it does, do no clipping. if(refPosition0Based < record.get0BasedPosition()) { // Not within this read, so nothing to clip. newCigar.Set(record.getCigar()); return(NO_CLIP); } // The position falls after the read starts, so loop through until the // position or the end of the read is found. int32_t readClipPosition = 0; bool clipWritten = false; new0BasedPosition = record.get0BasedPosition(); for(int i = 0; i < cigar->size(); i++) { const Cigar::CigarOperator* op = &(cigar->getOperator(i)); if(clipWritten) { // Clip point has been found, so just add everything. newCigar += *op; // Go to the next operation. continue; } // The clip point has not yet been found, so check to see if we found // it now. // Not a clip, check to see if the operation is found in the // reference. if(Cigar::foundInReference(*op)) { // match, mismatch, deletion, skip // increment the current reference position to just past this // operation. new0BasedPosition += op->count; // Check to see if this is also in the query, because otherwise // the operation is still being consumed. if(Cigar::foundInQuery(*op)) { // Also in the query, determine if the entire thing should // be clipped or just part of it. uint32_t numKeep = 0; // Check to see if we have hit our clip position. if(refPosition0Based < new0BasedPosition) { // The specified clip position is in this cigar operation. numKeep = new0BasedPosition - refPosition0Based - 1; if(numKeep > op->count) { // Keep the entire read. This happens because // we keep reading until the first match/mismatch // after the clip. numKeep = op->count; } } // Add the part of this operation that is being clipped // to the clip count. readClipPosition += (op->count - numKeep); // Only write the clip if we found a match/mismatch // to write. Otherwise we will keep accumulating clips // for the case of insertions. if(numKeep > 0) { new0BasedPosition -= numKeep; newCigar.Add(Cigar::softClip, readClipPosition); // Add the clipped part of this cigar to the clip // position. newCigar.Add(op->operation, numKeep); // Found a match after the clip point, so stop // consuming cigar operations. clipWritten = true; continue; } } } else { // Only add hard clips. The softclips will be added in // when the total number is found. if(op->operation == Cigar::hardClip) { // Check if this is the first operation, if so, just write it. if(i == 0) { newCigar += *op; } // Check if it is the last operation (otherwise skip it). else if(i == (cigar->size() - 1)) { // Check whether or not the clip was ever written, and if // not, write it. if(clipWritten == false) { newCigar.Add(Cigar::softClip, readClipPosition); // Since no match/mismatch was ever found, set // the new ref position to the original one. new0BasedPosition = record.get0BasedPosition(); clipWritten = true; } // Add the hard clip. newCigar += *op; } } // Not yet to the clip position, so do not add this operation. if(Cigar::foundInQuery(*op)) { // Found in the query, so update the read clip position. readClipPosition += op->count; } } } // End loop through cigar. // Check whether or not the clip was ever written, and if // not, write it. if(clipWritten == false) { newCigar.Add(Cigar::softClip, readClipPosition); // Since no match/mismatch was ever found, set // the new ref position to the original one. new0BasedPosition = record.get0BasedPosition(); } // Subtract 1 since readClipPosition atually contains the first 0based // position that is not clipped. return(readClipPosition - 1); }
// Soft clip the cigar from the front and/or the back, writing the value // into the new cigar. SamFilter::FilterStatus SamFilter::softClip(Cigar& oldCigar, int32_t numFrontClips, int32_t numBackClips, int32_t& startPos, CigarRoller& updatedCigar) { int32_t readLength = oldCigar.getExpectedQueryBaseCount(); int32_t endClipPos = readLength - numBackClips; FilterStatus status = NONE; if((numFrontClips != 0) || (numBackClips != 0)) { // Clipping from front and/or from the back. // Check to see if the entire read was clipped. int32_t totalClips = numFrontClips + numBackClips; if(totalClips >= readLength) { ///////////////////////////// // The entire read is clipped, so rather than clipping it, // filter it out. return(FILTERED); } // Part of the read was clipped. status = CLIPPED; // Loop through, creating an updated cigar. int origCigarOpIndex = 0; // Track how many read positions are covered up to this // point by the cigar to determine up to up to what // point in the cigar is affected by this clipping. int32_t numPositions = 0; // Track if any non-clips are in the new cigar. bool onlyClips = true; const Cigar::CigarOperator* op = NULL; ////////////////// // Clip from front while((origCigarOpIndex < oldCigar.size()) && (numPositions < numFrontClips)) { op = &(oldCigar.getOperator(origCigarOpIndex)); switch(op->operation) { case Cigar::hardClip: // Keep this operation as the new clips do not // affect other clips. updatedCigar += *op; break; case Cigar::del: case Cigar::skip: // Skip and delete are going to be dropped, and // are not in the read, so the read index doesn't // need to be updated break; case Cigar::insert: case Cigar::match: case Cigar::mismatch: case Cigar::softClip: // Update the read index as these types // are found in the read. numPositions += op->count; break; case Cigar::none: default: // Nothing to do for none. break; }; ++origCigarOpIndex; } // If bases were clipped from the front, add the clip and // any partial cigar operation as necessary. if(numFrontClips != 0) { // Add the softclip to the front of the read. updatedCigar.Add(Cigar::softClip, numFrontClips); // Add the rest of the last Cigar operation if // it is not entirely clipped. int32_t newCount = numPositions - numFrontClips; if(newCount > 0) { // Before adding it, check to see if the same // operation is clipped from the end. // numPositions greater than the endClipPos // means that it is equal or past that position, // so shorten the number of positions. if(numPositions > endClipPos) { newCount -= (numPositions - endClipPos); } if(newCount > 0) { updatedCigar.Add(op->operation, newCount); if(!Cigar::isClip(op->operation)) { onlyClips = false; } } } } // Add operations until the point of the end clip is reached. // For example... // 2M1D3M = MMDMMM readLength = 5 // readIndex: 01 234 // at cigarOpIndex 0 (2M), numPositions = 2. // at cigarOpIndex 1 (1D), numPositions = 2. // at cigarOpIndex 2 (3M), numPositions = 5. // if endClipPos = 2, we still want to consume the 1D, so // need to keep looping until numPositions > endClipPos while((origCigarOpIndex < oldCigar.size()) && (numPositions <= endClipPos)) { op = &(oldCigar.getOperator(origCigarOpIndex)); // Update the numPositions count if the operations indicates // bases within the read. if(!Cigar::foundInQuery(op->operation)) { // This operation is not in the query read sequence, // so it is not yet to the endClipPos, just add the // operation do not increment the number of positions. updatedCigar += *op; if(!Cigar::isClip(op->operation)) { onlyClips = false; } } else { // This operation appears in the query sequence, so // check to see if the clip occurs in this operation. // endClipPos is 0 based & numPositions is a count. // If endClipPos is 4, then it is the 5th position. // If 4 positions are covered so far (numPositions = 4), // then we are right at endCLipPos: 4-4 = 0, none of // this operation should be kept. // If only 3 positions were covered, then we are at offset // 3, so offset 3 should be added: 4-3 = 1. uint32_t numPosTilClip = endClipPos - numPositions; if(numPosTilClip < op->count) { // this operation is partially clipped, write the part // that was not clipped if it is not all clipped. if(numPosTilClip != 0) { updatedCigar.Add(op->operation, numPosTilClip); if(!Cigar::isClip(op->operation)) { onlyClips = false; } } } else { // This operation is not clipped, so add it updatedCigar += *op; if(!Cigar::isClip(op->operation)) { onlyClips = false; } } // This operation occurs in the query sequence, so // increment the number of positions covered. numPositions += op->count; } // Move to the next cigar position. ++origCigarOpIndex; } ////////////////// // Add the softclip to the back. if(numBackClips != 0) { // Add the softclip to the end updatedCigar.Add(Cigar::softClip, numBackClips); } ////////////////// // Add any hardclips remaining in the original cigar to the back. while(origCigarOpIndex < oldCigar.size()) { op = &(oldCigar.getOperator(origCigarOpIndex)); if(op->operation == Cigar::hardClip) { // Keep this operation as the new clips do not // affect other clips. updatedCigar += *op; } ++origCigarOpIndex; } // Check to see if the new cigar is only clips. if(onlyClips) { // Only clips in the new cigar, so mark the read as filtered // instead of updating the cigar. ///////////////////////////// // The entire read was clipped. status = FILTERED; } else { // Part of the read was clipped. // Update the starting position if a clip was added to // the front. if(numFrontClips > 0) { // Convert from query index to reference position (from the // old cigar) // Get the position for the last front clipped position by // getting the position associated with the clipped base on // the reference. Then add one to get to the first // non-clipped position. int32_t lastFrontClipPos = numFrontClips - 1; int32_t newStartPos = oldCigar.getRefPosition(lastFrontClipPos, startPos); if(newStartPos != Cigar::INDEX_NA) { // Add one to get first non-clipped position. startPos = newStartPos + 1; } } } } return(status); }
// Soft Clip from the end of the read at the specified reference position. int32_t CigarHelper::softClipEndByRefPos(SamRecord& record, int32_t refPosition0Based, CigarRoller& newCigar) { newCigar.clear(); Cigar* cigar = record.getCigarInfo(); if(cigar == NULL) { // Failed to get the cigar. ErrorHandler::handleError("Soft clipping, but failed to read the cigar"); return(NO_CLIP); } // No cigar or position in the record, so return no clip. if((cigar->size() == 0) || (record.get0BasedPosition() == -1)) { return(NO_CLIP); } // Check to see if the reference position occurs after the record ends, // if so, do no clipping. if(refPosition0Based > record.get0BasedAlignmentEnd()) { // Not within this read, so nothing to clip. newCigar.Set(record.getCigar()); return(NO_CLIP); } // The position falls before the read ends, so loop through until the // position is found. int32_t currentRefPosition = record.get0BasedPosition(); int32_t readClipPosition = 0; for(int i = 0; i < cigar->size(); i++) { const Cigar::CigarOperator* op = &(cigar->getOperator(i)); // If the operation is found in the reference, increase the // reference position. if(Cigar::foundInReference(*op)) { // match, mismatch, deletion, skip // increment the current reference position to just past // this operation. currentRefPosition += op->count; } // Check to see if we have hit our clip position. if(refPosition0Based < currentRefPosition) { // If this read is also in the query (match/mismatch), // write the partial op to the new cigar. int32_t numKeep = 0; if(Cigar::foundInQuery(*op)) { numKeep = op->count - (currentRefPosition - refPosition0Based); if(numKeep > 0) { newCigar.Add(op->operation, numKeep); readClipPosition += numKeep; } } else if(Cigar::isClip(*op)) { // This is a hard clip, so write it. newCigar.Add(op->operation, op->count); } else { // Not found in the query (skip/deletion), // so don't write any of the operation. } // Found the clip point, so break. break; } else if(refPosition0Based == currentRefPosition) { newCigar += *op; if(Cigar::foundInQuery(*op)) { readClipPosition += op->count; } } else { // Not yet to the clip position, so add this operation/size to // the new cigar. newCigar += *op; if(Cigar::foundInQuery(*op)) { // Found in the query, so update the read clip position. readClipPosition += op->count; } } } // End loop through cigar. // Before adding the softclip, read from the end of the cigar checking to // see if the operations are in the query, removing operations that are // not (pad/delete/skip) until a hardclip or an operation in the query is // found. We do not want a pad/delete/skip right before a softclip. for(int j = newCigar.size() - 1; j >= 0; j--) { const Cigar::CigarOperator* op = &(newCigar.getOperator(j)); if(!Cigar::foundInQuery(*op) && !Cigar::isClip(*op)) { // pad/delete/skip newCigar.Remove(j); } else if(Cigar::foundInQuery(*op) & Cigar::isClip(*op)) { // Soft clip, so increment the clip position for the return value. // Remove the softclip since the readClipPosition is used to // calculate teh size of the soft clip added. readClipPosition -= op->count; newCigar.Remove(j); } else { // Found a cigar operation that should not be deleted, so stop deleting. break; } } // Determine the number of soft clips. int32_t numSoftClips = record.getReadLength() - readClipPosition; // NOTE that if the previous operation is a softclip, the CigarRoller logic // will merge this with that one. newCigar.Add(Cigar::softClip, numSoftClips); // Check if an ending hard clip needs to be added. if(cigar->size() != 0) { const Cigar::CigarOperator* lastOp = &(cigar->getOperator(cigar->size() - 1)); if(lastOp->operation == Cigar::hardClip) { newCigar += *lastOp; } } return(readClipPosition); }