Esempio n. 1
0
// Soft Clip from the beginning of the read to the specified reference position.
int32_t CigarHelper::softClipBeginByRefPos(SamRecord& record, 
                                           int32_t refPosition0Based,
                                           CigarRoller& newCigar,
                                           int32_t &new0BasedPosition)
{
    newCigar.clear();
    Cigar* cigar = record.getCigarInfo();
    if(cigar == NULL)
    {
        // Failed to get the cigar.
        ErrorHandler::handleError("Soft clipping, but failed to read the cigar");
        return(NO_CLIP);
    }

    // No cigar or position in the record, so return no clip.
    if((cigar->size() == 0) || (record.get0BasedPosition() == -1))
    {
        return(NO_CLIP);
    }

    // Check to see if the reference position occurs before the record starts,
    // if it does, do no clipping.
    if(refPosition0Based < record.get0BasedPosition())
    {
        // Not within this read, so nothing to clip.
        newCigar.Set(record.getCigar());
        return(NO_CLIP);
    }

    // The position falls after the read starts, so loop through until the
    // position or the end of the read is found.
    int32_t readClipPosition = 0;
    bool clipWritten = false;
    new0BasedPosition = record.get0BasedPosition();
    for(int i = 0; i < cigar->size(); i++)
    {
        const Cigar::CigarOperator* op = &(cigar->getOperator(i));

        if(clipWritten)
        {
            // Clip point has been found, so just add everything.
            newCigar += *op;
            // Go to the next operation.
            continue;
        }

        // The clip point has not yet been found, so check to see if we found 
        // it now.

        // Not a clip, check to see if the operation is found in the
        // reference.
        if(Cigar::foundInReference(*op))
        {
            // match, mismatch, deletion, skip

            // increment the current reference position to just past this
            // operation.
            new0BasedPosition += op->count;

            // Check to see if this is also in the query, because otherwise
            // the operation is still being consumed.
            if(Cigar::foundInQuery(*op))
            {
                // Also in the query, determine if the entire thing should
                // be clipped or just part of it.

                uint32_t numKeep = 0;
                // Check to see if we have hit our clip position.
                if(refPosition0Based < new0BasedPosition)
                {
                    // The specified clip position is in this cigar operation.
                    numKeep = new0BasedPosition - refPosition0Based - 1;
                    
                    if(numKeep > op->count)
                    {
                        // Keep the entire read.  This happens because
                        // we keep reading until the first match/mismatch
                        // after the clip.
                        numKeep = op->count;
                    }
                }

                // Add the part of this operation that is being clipped
                // to the clip count.
                readClipPosition += (op->count - numKeep);

                // Only write the clip if we found a match/mismatch
                // to write.  Otherwise we will keep accumulating clips
                // for the case of insertions.
                if(numKeep > 0)
                {
                    new0BasedPosition -= numKeep;

                    newCigar.Add(Cigar::softClip, readClipPosition);
                    
                    // Add the clipped part of this cigar to the clip
                    // position.
                    newCigar.Add(op->operation, numKeep);
                    
                    // Found a match after the clip point, so stop
                    // consuming cigar operations.
                    clipWritten = true;
                    continue;
                }
            }
        }
        else
        {
            // Only add hard clips.  The softclips will be added in
            // when the total number is found.
            if(op->operation == Cigar::hardClip)
            {
                // Check if this is the first operation, if so, just write it.
                if(i == 0)
                {
                    newCigar += *op;
                }
                // Check if it is the last operation (otherwise skip it).
                else if(i == (cigar->size() - 1))
                {
                    // Check whether or not the clip was ever written, and if
                    // not, write it.
                    if(clipWritten == false)
                    {
                        newCigar.Add(Cigar::softClip, readClipPosition);
                        // Since no match/mismatch was ever found, set
                        // the new ref position to the original one.
                        new0BasedPosition = record.get0BasedPosition();
                        clipWritten = true;
                    }
                    // Add the hard clip.
                    newCigar += *op;
                }
            }
            // Not yet to the clip position, so do not add this operation.
            if(Cigar::foundInQuery(*op))
            {
                // Found in the query, so update the read clip position.
                readClipPosition += op->count;
            }
        }
    } // End loop through cigar.


    // Check whether or not the clip was ever written, and if
    // not, write it.
    if(clipWritten == false)
    {
        newCigar.Add(Cigar::softClip, readClipPosition);
        // Since no match/mismatch was ever found, set
        // the new ref position to the original one.
        new0BasedPosition = record.get0BasedPosition();
    }

    // Subtract 1 since readClipPosition atually contains the first 0based 
    // position that is not clipped.
    return(readClipPosition - 1);
}
Esempio n. 2
0
// Soft clip the cigar from the front and/or the back, writing the value
// into the new cigar.
SamFilter::FilterStatus SamFilter::softClip(Cigar& oldCigar, 
                                            int32_t numFrontClips,
                                            int32_t numBackClips,
                                            int32_t& startPos,
                                            CigarRoller& updatedCigar)
{
    int32_t readLength = oldCigar.getExpectedQueryBaseCount();
    int32_t endClipPos = readLength - numBackClips;
    FilterStatus status = NONE;

    if((numFrontClips != 0) || (numBackClips != 0))
    {
        // Clipping from front and/or from the back.

        // Check to see if the entire read was clipped.
        int32_t totalClips = numFrontClips + numBackClips;
        if(totalClips >= readLength)
        {
            /////////////////////////////
            // The entire read is clipped, so rather than clipping it,
            // filter it out.
            return(FILTERED);
        }
         
        // Part of the read was clipped.
        status = CLIPPED;
            
        // Loop through, creating an updated cigar.
        int origCigarOpIndex = 0;
        
        // Track how many read positions are covered up to this
        // point by the cigar to determine up to up to what
        // point in the cigar is affected by this clipping.
        int32_t numPositions = 0;
        
        // Track if any non-clips are in the new cigar.
        bool onlyClips = true;

        const Cigar::CigarOperator* op = NULL;

        //////////////////
        // Clip from front
        while((origCigarOpIndex < oldCigar.size()) &&
              (numPositions < numFrontClips))
        {
            op = &(oldCigar.getOperator(origCigarOpIndex));
            switch(op->operation)
            {
                case Cigar::hardClip:
                    // Keep this operation as the new clips do not
                    // affect other clips.
                    updatedCigar += *op;
                    break;
                case Cigar::del:
                case Cigar::skip:
                    // Skip and delete are going to be dropped, and
                    // are not in the read, so the read index doesn't
                    // need to be updated
                    break;
                case Cigar::insert:
                case Cigar::match:
                case Cigar::mismatch:
                case Cigar::softClip:
                    // Update the read index as these types
                    // are found in the read.
                    numPositions += op->count;
                    break;
                case Cigar::none:
                default:
                    // Nothing to do for none.
                    break;
            };
            ++origCigarOpIndex;
        }
    
        // If bases were clipped from the front, add the clip and
        // any partial cigar operation as necessary.
        if(numFrontClips != 0)
        {
            // Add the softclip to the front of the read.
            updatedCigar.Add(Cigar::softClip, numFrontClips);
        
            // Add the rest of the last Cigar operation if
            // it is not entirely clipped.
            int32_t newCount = numPositions - numFrontClips;
            if(newCount > 0)
            {
                // Before adding it, check to see if the same
                // operation is clipped from the end.
                // numPositions greater than the endClipPos
                // means that it is equal or past that position,
                // so shorten the number of positions.
                if(numPositions > endClipPos)
                {
                    newCount -= (numPositions - endClipPos);
                }
                if(newCount > 0)
                {
                    updatedCigar.Add(op->operation, newCount);
                    if(!Cigar::isClip(op->operation))
                    {
                        onlyClips = false;
                    }
                }
            }
        }
    
        // Add operations until the point of the end clip is reached.
        // For example...
        //   2M1D3M = MMDMMM  readLength = 5
        // readIndex: 01 234
        //   at cigarOpIndex 0 (2M), numPositions = 2.
        //   at cigarOpIndex 1 (1D), numPositions = 2.
        //   at cigarOpIndex 2 (3M), numPositions = 5.
        // if endClipPos = 2, we still want to consume the 1D, so
        // need to keep looping until numPositions > endClipPos
        while((origCigarOpIndex < oldCigar.size()) &&
              (numPositions <= endClipPos))
        {
            op = &(oldCigar.getOperator(origCigarOpIndex));
            
            // Update the numPositions count if the operations indicates
            // bases within the read.
            if(!Cigar::foundInQuery(op->operation))
            {
                // This operation is not in the query read sequence,
                // so it is not yet to the endClipPos, just add the
                // operation do not increment the number of positions.
                updatedCigar += *op;
                if(!Cigar::isClip(op->operation))
                {
                    onlyClips = false;
                }
            }
            else
            {
                // This operation appears in the query sequence, so
                // check to see if the clip occurs in this operation.
                
                // endClipPos is 0 based & numPositions is a count.
                // If endClipPos is 4, then it is the 5th position.
                // If 4 positions are covered so far (numPositions = 4), 
                // then we are right at endCLipPos: 4-4 = 0, none of 
                // this operation should be kept. 
                // If only 3 positions were covered, then we are at offset
                // 3, so offset 3 should be added: 4-3 = 1.
                uint32_t numPosTilClip = endClipPos - numPositions;
                
                if(numPosTilClip < op->count)
                {
                    // this operation is partially clipped, write the part
                    // that was not clipped if it is not all clipped.
                    if(numPosTilClip != 0)
                    {
                        updatedCigar.Add(op->operation,
                                     numPosTilClip);
                        if(!Cigar::isClip(op->operation))
                        {
                            onlyClips = false;
                        }
                    }
                }
                else
                {
                    // This operation is not clipped, so add it
                    updatedCigar += *op;
                    if(!Cigar::isClip(op->operation))
                    {
                        onlyClips = false;
                    }
                }
                // This operation occurs in the query sequence, so 
                // increment the number of positions covered.
                numPositions += op->count;
            }

            // Move to the next cigar position.
            ++origCigarOpIndex;
        }
            
        //////////////////
        // Add the softclip to the back.
        if(numBackClips != 0)
        {
            // Add the softclip to the end
            updatedCigar.Add(Cigar::softClip, numBackClips);
        }
        
        //////////////////
        // Add any hardclips remaining in the original cigar to the back.
        while(origCigarOpIndex < oldCigar.size())
        {
            op = &(oldCigar.getOperator(origCigarOpIndex));
            if(op->operation == Cigar::hardClip)
            {
                // Keep this operation as the new clips do not
                // affect other clips.
                updatedCigar += *op;
            }
            ++origCigarOpIndex;
        }
        
        // Check to see if the new cigar is only clips.
        if(onlyClips)
        {
            // Only clips in the new cigar, so mark the read as filtered
            // instead of updating the cigar.
            /////////////////////////////
            // The entire read was clipped.
            status = FILTERED;
        }
        else
        {
            // Part of the read was clipped.
            // Update the starting position if a clip was added to
            // the front.
            if(numFrontClips > 0)
            {
                // Convert from query index to reference position (from the
                // old cigar)
                // Get the position for the last front clipped position by
                // getting the position associated with the clipped base on
                // the reference.  Then add one to get to the first
                // non-clipped position.
                int32_t lastFrontClipPos = numFrontClips - 1;
                int32_t newStartPos = oldCigar.getRefPosition(lastFrontClipPos, 
                                                              startPos);
                if(newStartPos != Cigar::INDEX_NA)
                {
                    // Add one to get first non-clipped position.
                    startPos = newStartPos + 1;
                }
            }
        }
    }
    return(status);
}
Esempio n. 3
0
// Soft Clip from the end of the read at the specified reference position.
int32_t CigarHelper::softClipEndByRefPos(SamRecord& record, 
                                         int32_t refPosition0Based,
                                         CigarRoller& newCigar)
{
    newCigar.clear();
    Cigar* cigar = record.getCigarInfo();
    if(cigar == NULL)
    {
        // Failed to get the cigar.
        ErrorHandler::handleError("Soft clipping, but failed to read the cigar");
        return(NO_CLIP);
    }

    // No cigar or position in the record, so return no clip.
    if((cigar->size() == 0) || (record.get0BasedPosition() == -1))
    {
        return(NO_CLIP);
    }

    // Check to see if the reference position occurs after the record ends,
    // if so, do no clipping.
    if(refPosition0Based > record.get0BasedAlignmentEnd())
    {
        // Not within this read, so nothing to clip.
        newCigar.Set(record.getCigar());
        return(NO_CLIP);
    }

    // The position falls before the read ends, so loop through until the
    // position is found.
    int32_t currentRefPosition = record.get0BasedPosition();
    int32_t readClipPosition = 0;
    for(int i = 0; i < cigar->size(); i++)
    {
        const Cigar::CigarOperator* op = &(cigar->getOperator(i));

        // If the operation is found in the reference, increase the
        // reference position.
        if(Cigar::foundInReference(*op))
        {
            // match, mismatch, deletion, skip
            // increment the current reference position to just past
            // this operation.
            currentRefPosition += op->count;
        }
         
        // Check to see if we have hit our clip position.
        if(refPosition0Based < currentRefPosition)
        {
            // If this read is also in the query (match/mismatch), 
            // write the partial op to the new cigar.
            int32_t numKeep = 0;
            if(Cigar::foundInQuery(*op))
            {
                numKeep = op->count - (currentRefPosition - refPosition0Based);
                if(numKeep > 0)
                {
                    newCigar.Add(op->operation, numKeep);
                    readClipPosition += numKeep;
                }
            }
            else if(Cigar::isClip(*op))
            {
                // This is a hard clip, so write it.
                newCigar.Add(op->operation, op->count);
            }
            else
            {

                // Not found in the query (skip/deletion),
                // so don't write any of the operation.
            }
            // Found the clip point, so break.
            break;
        }
        else if(refPosition0Based == currentRefPosition)
        {
            newCigar += *op;
            if(Cigar::foundInQuery(*op))
            {
                readClipPosition += op->count;
            }
        }
        else
        {
            // Not yet to the clip position, so add this operation/size to
            // the new cigar.
            newCigar += *op;
            if(Cigar::foundInQuery(*op))
            {
                // Found in the query, so update the read clip position.
                readClipPosition += op->count;
            }
        }
    } // End loop through cigar.

    // Before adding the softclip, read from the end of the cigar checking to
    // see if the operations are in the query, removing operations that are
    // not (pad/delete/skip) until a hardclip or an operation in the query is
    // found. We do not want a pad/delete/skip right before a softclip.
    for(int j = newCigar.size() - 1; j >= 0; j--)
    {
        const Cigar::CigarOperator* op = &(newCigar.getOperator(j));
        if(!Cigar::foundInQuery(*op) && !Cigar::isClip(*op))
        {
            // pad/delete/skip
            newCigar.Remove(j);
        }
        else if(Cigar::foundInQuery(*op) & Cigar::isClip(*op))
        {
            // Soft clip, so increment the clip position for the return value.
            // Remove the softclip since the readClipPosition is used to
            // calculate teh size of the soft clip added.
            readClipPosition -= op->count;
            newCigar.Remove(j);
        }
        else
        {
            // Found a cigar operation that should not be deleted, so stop deleting.
            break;
        }
    } 

    // Determine the number of soft clips.
    int32_t numSoftClips = record.getReadLength() - readClipPosition;
    // NOTE that if the previous operation is a softclip, the CigarRoller logic
    // will merge this with that one.
    newCigar.Add(Cigar::softClip, numSoftClips);

    // Check if an ending hard clip needs to be added.
    if(cigar->size() != 0)
    {
        const Cigar::CigarOperator* lastOp = 
            &(cigar->getOperator(cigar->size() - 1));
        if(lastOp->operation == Cigar::hardClip)
        {
            newCigar += *lastOp;
        }
    }

    return(readClipPosition);
}