Example #1
0
R PPForest<L>::maxScore(const Algebra<R,L> &alg, size_type i, size_type j) const
{
	R down, over;

	if(j==0)
		return 0;

	if(isLeave(i))
	{
		over=maxScore(alg,rb(i),j-1);
		return alg.replace(label(i),0,label(i),over);
	}
	else
	{
		down=maxScore(alg,i+1,noc(i));
		over=maxScore(alg,rb(i),j-1);
		return alg.replace(label(i),down,label(i),over);
	}
}  
Example #2
0
R PPForest<L>::maxScore(const RNA_Algebra<R,L> &alg, Uint i, Uint j) const
{
	R down, over;

	if(j==0)
		return 0;

	if(isLeave(i))
	{
		over=maxScore(alg,rb(i),j-1);
		return alg.replace(label(i),0,label(i),over);
	}
	else
	{
		down=maxScore(alg,i+1+1,noc(i)-2);
		over=maxScore(alg,rb(i),j-1);
		return alg.replacepair(label(i+1),label(i+1),down,label(getRightmostBrotherIndex(i+1)),label(getRightmostBrotherIndex(i+1)),over);
	}
}  
Example #3
0
//------------------------------------------ Private Function Definitions ----//
bool _alignEngine
(const char * A0, long int Astart, long int & Aend,
 const char * B0, long int Bstart, long int & Bend,
 vector<long int> & Delta, unsigned int m_o)

//  A0 is a sequence such that A [1...\0]
//  B0 is a sequence such that B [1...\0]
//  The alignment should use bases A [Astart...Aend] (inclusive)
//  The alignment should use beses B [Bstart...Bend] (inclusive)
//     of [Aend...Astart] etc. if BACKWARD_SEARCH
//     Aend must never equal Astart, same goes for Bend and Bstart
//  Delta is an integer vector, not necessarily empty
//  m_o is the modus operandi of the function:
//    FORWARD_ALIGN, FORWARD_SEARCH, BACKWARD_SEARCH
//  Returns true on s.cppess (Aend & Bend reached) or false on failure

{
    Diagonal * Diag;       // the list of diagonals to make up edit matrix

    bool TargetReached;    // the target was reached

    const char * A, * B;     // the sequence pointers to be used by this func

    long int min_score = (-1 * LONG_MAX);       // minimum possible score
    long int high_score = min_score;        // global maximum score
    long int xhigh_score = min_score;         // non-optimal high score

    // max score difference
    long int max_diff = GOOD_SCORE [getMatrixType( )] * _break_len;

    long int CDi;        // conceptual diagonal index (not relating to mem)
    long int Dct, Di;      // diagonal counter, actual diagonal index
    long int PDct, PPDct;    // previous diagonal and prev prev diagonal
    long int PDi, PPDi;    // previous diagonal index and prev prev diag index
    long int Ds, PDs, PPDs;  // diagonal size, prev, prev prev diagonal size
    //   where 'size' = rbound - lbound + 1
    long int Ll = 100;     // capacity of the diagonal list
    long int Dl = 2;       // current conceptual diagonal length
    long int lbound = 0;     // current diagonal left(lower) node bound index
    long int rbound = 0;     // current diagonal right(upper) node bound index
    long int FinishCt = 0;   // diagonal containing the high_score
    long int FinishCDi = 0;  // conceptual index of the high_score on FinishCt
    long int xFinishCt = 0;  // non-optimal ...
    long int xFinishCDi = 0;   // non-optimal ...
    long int N, M, L;      // maximum matrix dimensions... N rows, M columns

    long int tlb, trb;
    double Dmid = .5;      // diag midpoint
    double Dband = _banding/2.0; // diag banding

    int Iadj, Dadj, Madj;    // insert, delete and match adjust values

#ifdef _DEBUG_VERBOSE
    long int MaxL = 0;       // biggest diagonal seen
    long int TrimCt = 0;       // counter of nodes trimmed
    long int CalcCt = 0;       // counter of nodes calculated
#endif

    //-- Set up character pointers for the appropriate m_o
    if ( m_o & DIRECTION_BIT ) {
        A = A0 + ( Astart - 1 );
        B = B0 + ( Bstart - 1 );
        N = Aend - Astart + 1;
        M = Bend - Bstart + 1;
    } else {
        A = A0 + ( Astart + 1 );
        B = B0 + ( Bstart + 1 );
        N = Astart - Aend + 1;
        M = Bstart - Bend + 1;
    }

    //-- Initialize the diagonals list
    Diag = (Diagonal *) Safe_malloc ( Ll * sizeof(Diagonal) );

    //-- Initialize position 0,0 in the matrices
    Diag[0] . lbound = lbound;
    Diag[0] . rbound = rbound ++;

    Diag[0] . I = (Node *) Safe_malloc ( 1 * sizeof(Node) );
    Diag[0] . I[0] . S[DELETE] . value = min_score;
    Diag[0] . I[0] . S[INSERT] . value = min_score;
    Diag[0] . I[0] . S[MATCH] . value = 0;
    Diag[0] . I[0] . max = Diag[0] . I[0] . S + MATCH;

    Diag[0] . I[0] . S[DELETE] . used = NONE;
    Diag[0] . I[0] . S[INSERT] . used = NONE;
    Diag[0] . I[0] . S[MATCH] . used = START;

    L = N < M ? N : M;

    //-- **START** of diagonal processing loop
    //-- Calculate the rest of the diagonals until goal reached or score worsens
    for ( Dct = 1; Dct <= N + M  &&
            (Dct - FinishCt) <= _break_len  &&
            lbound <= rbound; Dct++ ) {
        //-- If diagonals capacity exceeded, realloc
        if ( Dct >= Ll ) {
            Ll *= 2;
            Diag = (Diagonal *) Safe_realloc
                   ( Diag, sizeof(Diagonal) * Ll );
        }

        Diag[Dct] . lbound = lbound;
        Diag[Dct] . rbound = rbound;

        //-- malloc space for the edit char and score nodes
        Ds = rbound - lbound + 1;
        Diag[Dct] . I = (Node *) Safe_malloc
                        ( Ds * sizeof(Node) );

#ifdef _DEBUG_VERBOSE
        //-- Keep count of trimmed and calculated nodes
        CalcCt += Ds;
        TrimCt += Dl - Ds;
        if ( Ds > MaxL )
            MaxL = Ds;
#endif

        //-- Set diagonal index adjustment values
        if ( Dct <= N ) {
            Iadj = 0;
            Madj = -1;
        } else {
            Iadj = 1;
            Madj = Dct == N + 1 ? 0 : 1;
        }
        Dadj = Iadj - 1;

        //-- Set parent diagonal values
        PDct = Dct - 1;
        PDs = Diag[PDct] . rbound - Diag[PDct] . lbound + 1;
        PDi = lbound + Dadj;
        PDi = PDi - Diag[PDct] . lbound;

        //-- Set grandparent diagonal values
        PPDct = Dct - 2;
        if ( PPDct >= 0 ) {
            PPDs = Diag[PPDct] . rbound - Diag[PPDct] . lbound + 1;
            PPDi = lbound + Madj;
            PPDi = PPDi - Diag[PPDct] . lbound;
        } else
            PPDi = PPDs = 0;

        //-- If forced alignment, don't keep track of global max
        if ( m_o & FORCED_BIT )
            high_score = min_score;

        //-- **START** of internal node scoring loop
        //-- Calculate scores for every node (within bounds) for diagonal Dct
        for ( CDi = lbound; CDi <= rbound; CDi ++ ) {
            //-- Set the index (in memory) of current node and clear score
            Di = CDi - Diag[Dct] . lbound;

            //-- Calculate DELETE score
            if ( PDi >= 0  &&  PDi < PDs )
                scoreEdit
                (Diag[Dct] . I[Di] . S[DELETE],
                 Diag[PDct] . I[PDi] . S[DELETE] . used == NONE ?
                 Diag[PDct] . I[PDi] . S[DELETE] . value :
                 Diag[PDct] . I[PDi] . S[DELETE] . value +
                 CONT_GAP_SCORE [_matrix_type],
                 Diag[PDct] . I[PDi] . S[INSERT] . used == NONE ?
                 Diag[PDct] . I[PDi] . S[INSERT] . value :
                 Diag[PDct] . I[PDi] . S[INSERT] . value +
                 OPEN_GAP_SCORE [_matrix_type],
                 Diag[PDct] . I[PDi] . S[MATCH]  . used == NONE ?
                 Diag[PDct] . I[PDi] . S[MATCH]  . value :
                 Diag[PDct] . I[PDi] . S[MATCH]  . value +
                 OPEN_GAP_SCORE [_matrix_type]);
            else {
                Diag[Dct] . I[Di] . S[DELETE] . value = min_score;
                Diag[Dct] . I[Di] . S[DELETE] . used = NONE;
            }

            PDi ++;

            //-- Calculate INSERT score
            if ( PDi >= 0  &&  PDi < PDs )
                scoreEdit
                (Diag[Dct] . I[Di] . S[INSERT],
                 Diag[PDct] . I[PDi] . S[DELETE] . used == NONE ?
                 Diag[PDct] . I[PDi] . S[DELETE] . value :
                 Diag[PDct] . I[PDi] . S[DELETE] . value +
                 OPEN_GAP_SCORE [_matrix_type],
                 Diag[PDct] . I[PDi] . S[INSERT] . used == NONE ?
                 Diag[PDct] . I[PDi] . S[INSERT] . value :
                 Diag[PDct] . I[PDi] . S[INSERT] . value +
                 CONT_GAP_SCORE [_matrix_type],
                 Diag[PDct] . I[PDi] . S[MATCH]  . used == NONE ?
                 Diag[PDct] . I[PDi] . S[MATCH]  . value :
                 Diag[PDct] . I[PDi] . S[MATCH]  . value +
                 OPEN_GAP_SCORE [_matrix_type]);
            else {
                Diag[Dct] . I[Di] . S[INSERT] . value = min_score;
                Diag[Dct] . I[Di] . S[INSERT] . used = NONE;
            }

            //-- Calculate MATCH/MIS-MATCH score
            if ( PPDi >= 0  &&  PPDi < PPDs ) {
                scoreEdit
                (Diag[Dct] . I[Di] . S[MATCH],
                 Diag[PPDct] . I[PPDi] . S[DELETE] . value,
                 Diag[PPDct] . I[PPDi] . S[INSERT] . value,
                 Diag[PPDct] . I[PPDi] . S[MATCH]  . value);
                Diag[Dct] . I[Di] . S[MATCH] . value +=
                    scoreMatch (Diag[Dct], Dct, CDi, A, B, N, m_o);
            } else {
                Diag[Dct] . I[Di] . S[MATCH] . value = min_score;
                Diag[Dct] . I[Di] . S[MATCH] . used = NONE;
            }

            PPDi ++;

            Diag[Dct] . I[Di] . max = maxScore (Diag[Dct] . I[Di] . S);

            //-- Reset high_score if new global max was found
            if ( Diag[Dct] . I[Di] . max->value >= high_score ) {
                high_score = Diag[Dct] . I[Di] . max->value;
                FinishCt = Dct;
                FinishCDi = CDi;
            }
        }
        //-- **END** of internal node scoring loop


        //-- Calculate max non-optimal score
        if ( m_o & SEQEND_BIT  &&  Dct >= L ) {
            if ( L == N ) {
                if ( lbound == 0 ) {
                    if ( Diag[Dct] . I[0] . max->value >= xhigh_score ) {
                        xhigh_score = Diag[Dct] . I[0] . max->value;
                        xFinishCt = Dct;
                        xFinishCDi = 0;
                    }
                }
            } else { // L == M
                if ( rbound == M ) {
                    if ( Diag[Dct] . I[M-Diag[Dct].lbound] .
                            max->value >= xhigh_score ) {
                        xhigh_score = Diag[Dct] . I[M-Diag[Dct].lbound] .
                                      max->value;
                        xFinishCt = Dct;
                        xFinishCDi = M;
                    }
                }
            }
        }


        //-- If in extender modus operandi, free soon to be greatgrandparent diag
        if ( m_o & SEARCH_BIT  &&  Dct > 1 )
            free ( Diag[PPDct] . I );


        //-- Trim hopeless diagonal nodes
        for ( Di = 0; Di < Ds; Di ++ ) {
            if ( high_score - Diag[Dct] . I[Di] . max->value > max_diff )
                lbound ++;
            else
                break;
        }
        for ( Di = Ds - 1; Di >= 0; Di -- ) {
            if ( high_score - Diag[Dct] . I[Di] . max->value > max_diff )
                rbound --;
            else
                break;
        }

        //-- Grow new diagonal and reset boundaries
        if ( Dct < N && Dct < M ) {
            Dl ++;
            rbound ++;
            Dmid = (Dct+1)/2.0;
        } else if ( Dct >= N && Dct >= M ) {
            Dl --;
            lbound --;
            Dmid = N - (Dct+1)/2.0;
        } else if ( Dct >= N ) {
            lbound --;
            Dmid = N - (Dct+1)/2.0;
        } else {
            rbound ++;
            Dmid = (Dct+1)/2.0;
        }

        //-- Trim at hard band
        if ( Dband > 0 ) {
            tlb = (long int)ceil(Dmid - Dband);
            if ( lbound < tlb )
                lbound = tlb;
            trb = (long int)floor(Dmid + Dband);
            if ( rbound > trb )
                rbound = trb;
        }

        if ( lbound < 0 )
            lbound = 0;
        if ( rbound >= Dl )
            rbound = Dl - 1;
    }
    //-- **END** of diagonal processing loop
    Dct --;

    //-- Check if the target was reached
    //   If OPTIMAL, backtrack to last high_score to maximize alignment score
    TargetReached = false;
    if ( Dct == N + M ) {
        if ( ~m_o & OPTIMAL_BIT || m_o & SEQEND_BIT ) {
            TargetReached = true;
            FinishCt = N + M;
            FinishCDi = 0;
        } else if ( FinishCt == Dct )
            TargetReached = true;
    } else if ( m_o & SEQEND_BIT  &&  xFinishCt != 0 ) {
        //-- non-optimal, extend alignment to end of shortest seq if possible
        FinishCt = xFinishCt;
        FinishCDi = xFinishCDi;
    }

    //-- Set A/Bend to finish positions
    long int Aadj = FinishCt <= N ? FinishCt - FinishCDi - 1 : N - FinishCDi - 1;
    long int Badj = FinishCt <= N ? FinishCDi - 1 : FinishCt - N + FinishCDi - 1;
    if ( ~m_o & DIRECTION_BIT ) {
        Aadj *= -1;
        Badj *= -1;
    }
    Aend = Astart + Aadj;
    Bend = Bstart + Badj;

#ifdef _DEBUG_VERBOSE
    assert (FinishCt > 1);

    //-- Ouput calculation statistics
    if ( TargetReached )
        fprintf(stderr,"Finish score = %ld : %ld,%ld\n",
                Diag[FinishCt] . I[0] . max->value, N, M);
    else
        fprintf(stderr,"High score = %ld : %ld,%ld\n", high_score,
                labs(Aadj) + 1, labs(Badj) + 1);
    fprintf(stderr, "%ld nodes calculated, %ld nodes trimmed\n", CalcCt, TrimCt);
    if ( m_o & DIRECTION_BIT )
        fprintf(stderr, "%ld bytes used\n",
                (long int)sizeof(Diagonal) * Dct + (long int)sizeof(Node) * CalcCt);
    else
        fprintf(stderr, "%ld bytes used\n",
                ((long int)sizeof(Diagonal) + (long int)sizeof(Node) * MaxL) * 2);
#endif

    //-- If in forward alignment m_o, create the Delta information
    if ( ~m_o & SEARCH_BIT )
        generateDelta (Diag, FinishCt, FinishCDi, N, Delta);

    //-- Free the scoring and edit spaces remaining
    for ( Di = m_o & SEARCH_BIT ? Dct - 1 : 0; Di <= Dct; Di ++ )
        free ( Diag[Di] . I );
    free ( Diag );

    return TargetReached;
}