// Return the score of this model on string w double Score_Window (struct Fixed_Length_ICM_t fixed, char * w, int left) { static char * buff = NULL; static int buff_len = 0; double score = 0.0; int i; if (fixed.length > buff_len) { buff = (char *) Safe_realloc (buff, fixed.length+1, __FILE__, __LINE__); buff_len = fixed.length; } strncpy (buff, w, fixed.length); // strncpy (buff, w, left); // strncpy (buff+left, w+left+2, fixed.length-left); if (fixed.permutation != NULL) Permute_String (buff, fixed.permutation, fixed.length); for (i = 0; i < fixed.length; i ++) { if (buff [i] == '\0') { fprintf (stderr, "ERROR: String \"%s\" too short in Score_Window\n", buff); exit (-1); } score += Full_Window_Prob (fixed.sub_model[i], buff, 0); } return score; }
void *traceAlloc(allocTrace *ptrace, void *p) { assert(ptrace); assert(p); /* Also handles where max == 0 */ if (ptrace->num == ptrace->max) { /* Add an offset to handle max == 0 */ ptrace->max = (ptrace->max+2)*2; ptrace->palloced = Safe_realloc(ptrace->palloced, ptrace->max * sizeof(*ptrace->palloced)); } ptrace->palloced[ptrace->num++] = p; return p; }
void Permute_String(char * s, int * perm, int n) { static char * buff = NULL; static int buff_len = 0; int i; if (n > buff_len) { buff = (char *) Safe_realloc (buff, n, __FILE__, __LINE__); buff_len = n; } for (i = 0; i < n; i ++) buff [i] = s [perm [i]]; strncpy (s, buff, n); return; }
void hTabAddItemLong (hTab ** htab, int key, void *pkey, void *item) { hashtItem *htip; hashtItem *last; if (!(*htab)) *htab = newHashTable (DEFAULT_HTAB_SIZE); if (key > (*htab)->size) { int i; (*htab)->table = Safe_realloc ((*htab)->table, (key * 2 + 2) * sizeof (hashtItem *)); for (i = (*htab)->size + 1; i <= (key * 2 + 1); i++) (*htab)->table[i] = NULL; (*htab)->size = key * 2 + 1; } /* update the key */ if ((*htab)->maxKey < key) (*htab)->maxKey = key; if ((*htab)->minKey > key) (*htab)->minKey = key; /* create the item */ htip = _newHashtItem (key, pkey, item); /* if there is a clash then goto end of chain */ if ((last = (*htab)->table[key])) { while (last->next) last = last->next; last->next = htip; } else /* else just add it */ (*htab)->table[key] = htip; (*htab)->nItems++; }
//------------------------------------------ Private Function Definitions ----// bool _alignEngine (const char * A0, long int Astart, long int & Aend, const char * B0, long int Bstart, long int & Bend, vector<long int> & Delta, unsigned int m_o) // A0 is a sequence such that A [1...\0] // B0 is a sequence such that B [1...\0] // The alignment should use bases A [Astart...Aend] (inclusive) // The alignment should use beses B [Bstart...Bend] (inclusive) // of [Aend...Astart] etc. if BACKWARD_SEARCH // Aend must never equal Astart, same goes for Bend and Bstart // Delta is an integer vector, not necessarily empty // m_o is the modus operandi of the function: // FORWARD_ALIGN, FORWARD_SEARCH, BACKWARD_SEARCH // Returns true on s.cppess (Aend & Bend reached) or false on failure { Diagonal * Diag; // the list of diagonals to make up edit matrix bool TargetReached; // the target was reached const char * A, * B; // the sequence pointers to be used by this func long int min_score = (-1 * LONG_MAX); // minimum possible score long int high_score = min_score; // global maximum score long int xhigh_score = min_score; // non-optimal high score // max score difference long int max_diff = GOOD_SCORE [getMatrixType( )] * _break_len; long int CDi; // conceptual diagonal index (not relating to mem) long int Dct, Di; // diagonal counter, actual diagonal index long int PDct, PPDct; // previous diagonal and prev prev diagonal long int PDi, PPDi; // previous diagonal index and prev prev diag index long int Ds, PDs, PPDs; // diagonal size, prev, prev prev diagonal size // where 'size' = rbound - lbound + 1 long int Ll = 100; // capacity of the diagonal list long int Dl = 2; // current conceptual diagonal length long int lbound = 0; // current diagonal left(lower) node bound index long int rbound = 0; // current diagonal right(upper) node bound index long int FinishCt = 0; // diagonal containing the high_score long int FinishCDi = 0; // conceptual index of the high_score on FinishCt long int xFinishCt = 0; // non-optimal ... long int xFinishCDi = 0; // non-optimal ... long int N, M, L; // maximum matrix dimensions... N rows, M columns long int tlb, trb; double Dmid = .5; // diag midpoint double Dband = _banding/2.0; // diag banding int Iadj, Dadj, Madj; // insert, delete and match adjust values #ifdef _DEBUG_VERBOSE long int MaxL = 0; // biggest diagonal seen long int TrimCt = 0; // counter of nodes trimmed long int CalcCt = 0; // counter of nodes calculated #endif //-- Set up character pointers for the appropriate m_o if ( m_o & DIRECTION_BIT ) { A = A0 + ( Astart - 1 ); B = B0 + ( Bstart - 1 ); N = Aend - Astart + 1; M = Bend - Bstart + 1; } else { A = A0 + ( Astart + 1 ); B = B0 + ( Bstart + 1 ); N = Astart - Aend + 1; M = Bstart - Bend + 1; } //-- Initialize the diagonals list Diag = (Diagonal *) Safe_malloc ( Ll * sizeof(Diagonal) ); //-- Initialize position 0,0 in the matrices Diag[0] . lbound = lbound; Diag[0] . rbound = rbound ++; Diag[0] . I = (Node *) Safe_malloc ( 1 * sizeof(Node) ); Diag[0] . I[0] . S[DELETE] . value = min_score; Diag[0] . I[0] . S[INSERT] . value = min_score; Diag[0] . I[0] . S[MATCH] . value = 0; Diag[0] . I[0] . max = Diag[0] . I[0] . S + MATCH; Diag[0] . I[0] . S[DELETE] . used = NONE; Diag[0] . I[0] . S[INSERT] . used = NONE; Diag[0] . I[0] . S[MATCH] . used = START; L = N < M ? N : M; //-- **START** of diagonal processing loop //-- Calculate the rest of the diagonals until goal reached or score worsens for ( Dct = 1; Dct <= N + M && (Dct - FinishCt) <= _break_len && lbound <= rbound; Dct++ ) { //-- If diagonals capacity exceeded, realloc if ( Dct >= Ll ) { Ll *= 2; Diag = (Diagonal *) Safe_realloc ( Diag, sizeof(Diagonal) * Ll ); } Diag[Dct] . lbound = lbound; Diag[Dct] . rbound = rbound; //-- malloc space for the edit char and score nodes Ds = rbound - lbound + 1; Diag[Dct] . I = (Node *) Safe_malloc ( Ds * sizeof(Node) ); #ifdef _DEBUG_VERBOSE //-- Keep count of trimmed and calculated nodes CalcCt += Ds; TrimCt += Dl - Ds; if ( Ds > MaxL ) MaxL = Ds; #endif //-- Set diagonal index adjustment values if ( Dct <= N ) { Iadj = 0; Madj = -1; } else { Iadj = 1; Madj = Dct == N + 1 ? 0 : 1; } Dadj = Iadj - 1; //-- Set parent diagonal values PDct = Dct - 1; PDs = Diag[PDct] . rbound - Diag[PDct] . lbound + 1; PDi = lbound + Dadj; PDi = PDi - Diag[PDct] . lbound; //-- Set grandparent diagonal values PPDct = Dct - 2; if ( PPDct >= 0 ) { PPDs = Diag[PPDct] . rbound - Diag[PPDct] . lbound + 1; PPDi = lbound + Madj; PPDi = PPDi - Diag[PPDct] . lbound; } else PPDi = PPDs = 0; //-- If forced alignment, don't keep track of global max if ( m_o & FORCED_BIT ) high_score = min_score; //-- **START** of internal node scoring loop //-- Calculate scores for every node (within bounds) for diagonal Dct for ( CDi = lbound; CDi <= rbound; CDi ++ ) { //-- Set the index (in memory) of current node and clear score Di = CDi - Diag[Dct] . lbound; //-- Calculate DELETE score if ( PDi >= 0 && PDi < PDs ) scoreEdit (Diag[Dct] . I[Di] . S[DELETE], Diag[PDct] . I[PDi] . S[DELETE] . used == NONE ? Diag[PDct] . I[PDi] . S[DELETE] . value : Diag[PDct] . I[PDi] . S[DELETE] . value + CONT_GAP_SCORE [_matrix_type], Diag[PDct] . I[PDi] . S[INSERT] . used == NONE ? Diag[PDct] . I[PDi] . S[INSERT] . value : Diag[PDct] . I[PDi] . S[INSERT] . value + OPEN_GAP_SCORE [_matrix_type], Diag[PDct] . I[PDi] . S[MATCH] . used == NONE ? Diag[PDct] . I[PDi] . S[MATCH] . value : Diag[PDct] . I[PDi] . S[MATCH] . value + OPEN_GAP_SCORE [_matrix_type]); else { Diag[Dct] . I[Di] . S[DELETE] . value = min_score; Diag[Dct] . I[Di] . S[DELETE] . used = NONE; } PDi ++; //-- Calculate INSERT score if ( PDi >= 0 && PDi < PDs ) scoreEdit (Diag[Dct] . I[Di] . S[INSERT], Diag[PDct] . I[PDi] . S[DELETE] . used == NONE ? Diag[PDct] . I[PDi] . S[DELETE] . value : Diag[PDct] . I[PDi] . S[DELETE] . value + OPEN_GAP_SCORE [_matrix_type], Diag[PDct] . I[PDi] . S[INSERT] . used == NONE ? Diag[PDct] . I[PDi] . S[INSERT] . value : Diag[PDct] . I[PDi] . S[INSERT] . value + CONT_GAP_SCORE [_matrix_type], Diag[PDct] . I[PDi] . S[MATCH] . used == NONE ? Diag[PDct] . I[PDi] . S[MATCH] . value : Diag[PDct] . I[PDi] . S[MATCH] . value + OPEN_GAP_SCORE [_matrix_type]); else { Diag[Dct] . I[Di] . S[INSERT] . value = min_score; Diag[Dct] . I[Di] . S[INSERT] . used = NONE; } //-- Calculate MATCH/MIS-MATCH score if ( PPDi >= 0 && PPDi < PPDs ) { scoreEdit (Diag[Dct] . I[Di] . S[MATCH], Diag[PPDct] . I[PPDi] . S[DELETE] . value, Diag[PPDct] . I[PPDi] . S[INSERT] . value, Diag[PPDct] . I[PPDi] . S[MATCH] . value); Diag[Dct] . I[Di] . S[MATCH] . value += scoreMatch (Diag[Dct], Dct, CDi, A, B, N, m_o); } else { Diag[Dct] . I[Di] . S[MATCH] . value = min_score; Diag[Dct] . I[Di] . S[MATCH] . used = NONE; } PPDi ++; Diag[Dct] . I[Di] . max = maxScore (Diag[Dct] . I[Di] . S); //-- Reset high_score if new global max was found if ( Diag[Dct] . I[Di] . max->value >= high_score ) { high_score = Diag[Dct] . I[Di] . max->value; FinishCt = Dct; FinishCDi = CDi; } } //-- **END** of internal node scoring loop //-- Calculate max non-optimal score if ( m_o & SEQEND_BIT && Dct >= L ) { if ( L == N ) { if ( lbound == 0 ) { if ( Diag[Dct] . I[0] . max->value >= xhigh_score ) { xhigh_score = Diag[Dct] . I[0] . max->value; xFinishCt = Dct; xFinishCDi = 0; } } } else { // L == M if ( rbound == M ) { if ( Diag[Dct] . I[M-Diag[Dct].lbound] . max->value >= xhigh_score ) { xhigh_score = Diag[Dct] . I[M-Diag[Dct].lbound] . max->value; xFinishCt = Dct; xFinishCDi = M; } } } } //-- If in extender modus operandi, free soon to be greatgrandparent diag if ( m_o & SEARCH_BIT && Dct > 1 ) free ( Diag[PPDct] . I ); //-- Trim hopeless diagonal nodes for ( Di = 0; Di < Ds; Di ++ ) { if ( high_score - Diag[Dct] . I[Di] . max->value > max_diff ) lbound ++; else break; } for ( Di = Ds - 1; Di >= 0; Di -- ) { if ( high_score - Diag[Dct] . I[Di] . max->value > max_diff ) rbound --; else break; } //-- Grow new diagonal and reset boundaries if ( Dct < N && Dct < M ) { Dl ++; rbound ++; Dmid = (Dct+1)/2.0; } else if ( Dct >= N && Dct >= M ) { Dl --; lbound --; Dmid = N - (Dct+1)/2.0; } else if ( Dct >= N ) { lbound --; Dmid = N - (Dct+1)/2.0; } else { rbound ++; Dmid = (Dct+1)/2.0; } //-- Trim at hard band if ( Dband > 0 ) { tlb = (long int)ceil(Dmid - Dband); if ( lbound < tlb ) lbound = tlb; trb = (long int)floor(Dmid + Dband); if ( rbound > trb ) rbound = trb; } if ( lbound < 0 ) lbound = 0; if ( rbound >= Dl ) rbound = Dl - 1; } //-- **END** of diagonal processing loop Dct --; //-- Check if the target was reached // If OPTIMAL, backtrack to last high_score to maximize alignment score TargetReached = false; if ( Dct == N + M ) { if ( ~m_o & OPTIMAL_BIT || m_o & SEQEND_BIT ) { TargetReached = true; FinishCt = N + M; FinishCDi = 0; } else if ( FinishCt == Dct ) TargetReached = true; } else if ( m_o & SEQEND_BIT && xFinishCt != 0 ) { //-- non-optimal, extend alignment to end of shortest seq if possible FinishCt = xFinishCt; FinishCDi = xFinishCDi; } //-- Set A/Bend to finish positions long int Aadj = FinishCt <= N ? FinishCt - FinishCDi - 1 : N - FinishCDi - 1; long int Badj = FinishCt <= N ? FinishCDi - 1 : FinishCt - N + FinishCDi - 1; if ( ~m_o & DIRECTION_BIT ) { Aadj *= -1; Badj *= -1; } Aend = Astart + Aadj; Bend = Bstart + Badj; #ifdef _DEBUG_VERBOSE assert (FinishCt > 1); //-- Ouput calculation statistics if ( TargetReached ) fprintf(stderr,"Finish score = %ld : %ld,%ld\n", Diag[FinishCt] . I[0] . max->value, N, M); else fprintf(stderr,"High score = %ld : %ld,%ld\n", high_score, labs(Aadj) + 1, labs(Badj) + 1); fprintf(stderr, "%ld nodes calculated, %ld nodes trimmed\n", CalcCt, TrimCt); if ( m_o & DIRECTION_BIT ) fprintf(stderr, "%ld bytes used\n", (long int)sizeof(Diagonal) * Dct + (long int)sizeof(Node) * CalcCt); else fprintf(stderr, "%ld bytes used\n", ((long int)sizeof(Diagonal) + (long int)sizeof(Node) * MaxL) * 2); #endif //-- If in forward alignment m_o, create the Delta information if ( ~m_o & SEARCH_BIT ) generateDelta (Diag, FinishCt, FinishCDi, N, Delta); //-- Free the scoring and edit spaces remaining for ( Di = m_o & SEARCH_BIT ? Dct - 1 : 0; Di <= Dct; Di ++ ) free ( Diag[Di] . I ); free ( Diag ); return TargetReached; }
static void generateDelta (const Diagonal * Diag, long int FinishCt, long int FinishCDi, long int N, vector<long int> & Delta) // Diag is the list of diagonals that compose the edit matrix // FinishCt is the diagonal that contains the finishing node // FinishCDi is the conceptual finishing node, in FinishCt, for the align // N & M are the target positions for the alignment // Delta is the vector in which to store the alignment data, new data // will be appended onto any existing data. // NOTE: there will be no zero at the end of the data, end of data // is signaled by the end of the vector // Return is void { //-- Function pre-conditions #ifdef _DEBUG_ASSERT assert ( Diag != NULL ); assert ( FinishCt > 1 ); #endif long int Count; // delta counter long int Dct = FinishCt; // diagonal index long int CDi = FinishCDi; // conceptual node index long int Di = 0; // actual node index long int Pi = 0; // path index long int PSize = 100; // capacity of the path space char * Reverse_Path; // path space Score curr_score; int edit; //-- malloc space for the edit path Reverse_Path = (char *) Safe_malloc ( PSize * sizeof(char) ); //-- Which Score index is the maximum value in? Store in edit Di = CDi - Diag[Dct] . lbound; edit = Diag[Dct] . I[Di] . max - Diag[Dct] . I[Di] . S; //-- Walk the path backwards through the edit space while ( Dct >= 0 ) { //-- remalloc path space if n.cppessary if ( Pi >= PSize ) { PSize *= 2; Reverse_Path = (char *) Safe_realloc ( Reverse_Path, sizeof(char) * PSize ); } Di = CDi - Diag[Dct] . lbound; curr_score = Diag[Dct] . I[Di] . S[edit]; Reverse_Path[Pi ++] = edit; switch ( edit ) { case DELETE : CDi = Dct -- <= N ? CDi - 1 : CDi; break; case INSERT : CDi = Dct -- <= N ? CDi : CDi + 1; break; case MATCH : CDi = Dct <= N ? CDi - 1 : ( Dct == N + 1 ? CDi : CDi + 1 ); Dct -= 2; break; case START : Dct = -1; break; default : fprintf(stderr,"\nERROR: Invalid edit matrix entry,\n" " please file a bug report\n"); exit ( EXIT_FAILURE ); } edit = curr_score . used; } //-- Generate the delta information Count = 1; for (Pi -= 2; Pi >= 0; Pi --) { switch ( Reverse_Path[Pi] ) { case DELETE : Delta . push_back(-Count); Count = 1; break; case INSERT : Delta . push_back(Count); Count = 1; break; case MATCH : Count ++; break; case START : break; default : fprintf(stderr,"\nERROR: Invalid path matrix entry,\n" " please file a bug report\n"); exit ( EXIT_FAILURE ); } } free (Reverse_Path); return; }
int Read_Multi_String (FILE * fp, char * & T, long int & Size, char Name [], int Partial, int no) /* Read next string #no from fp (assuming MULTIFASTA format) into T [1 ..] * which has Size characters. Allocate extra memory if needed * and adjust Size accordingly. Return TRUE if successful, FALSE * otherwise (e.g., EOF). Partial indicates if first line has * numbers indicating a subrange of characters to read. If Partial is * true, then the first line must have 2 integers indicating positions * in the string and only those positions will be put into T . If * Partial is false, the entire string is put into T . Sets Name * to the first string after the starting '>' character. */ { char * P, Line [MAX_LINE]; long int Len, Lo, Hi; int Ch, Ct = FALSE; int i; i=0; while(i<no) { while ((Ch = fgetc (fp)) != EOF && Ch != '>') ; if (Ch == EOF) return FALSE; if (Ch == '>') i++; } fgets (Line, MAX_LINE, fp); Len = strlen (Line); assert (Len > 0 && Line [Len - 1] == '\n'); P = strtok (Line, " \t\n"); if (P != NULL) strcpy (Name, P); else Name [0] = '\0'; Lo = 0; Hi = LONG_MAX; if (Partial) { P = strtok (NULL, " \t\n"); if (P != NULL) { Lo = strtol (P, NULL, 10); P = strtok (NULL, " \t\n"); if (P != NULL) Hi = strtol (P, NULL, 10); } assert (Lo <= Hi); } Ct = 0; T [0] = '\0'; Len = 1; while ((Ch = fgetc (fp)) != EOF && Ch != '>') { if (isspace (Ch)) continue; Ct ++; if (Ct < Lo || Ct > Hi) continue; if (Len >= Size) { Size += INCR_SIZE; T = (char *) Safe_realloc (T, Size); } Ch = tolower (Ch); switch (Ch) { case 'a' : case 'c' : case 'g' : case 't' : case 's' : case 'w' : case 'r' : case 'y' : case 'm' : case 'k' : case 'b' : case 'd' : case 'h' : case 'v' : case 'n' : break; default : fprintf (stderr, "Unexpected character `%c\' in string %s\n", Ch, Name); Ch = 'n'; } T [Len ++] = Ch; } T [Len] = '\0'; if (Ch == '>') ungetc (Ch, fp); return TRUE; }
/*-----------------------------------------------------------------*/ ebbIndex * iCodeBreakDown (iCode * ic) { eBBlock **ebbs = NULL; iCode *loop = ic; ebbIndex *ebbi; ebbi = Safe_alloc (sizeof (ebbIndex)); ebbi->count = 0; ebbi->dfOrder = NULL; /* no depth first order information yet */ /* allocate for the first entry */ ebbs = Safe_alloc (sizeof (eBBlock *)); ebbi->bbOrder = ebbs; while (loop) { /* convert 2 block */ eBBlock *ebb = iCode2eBBlock (loop); loop = ebb->ech->next; ebb->ech->next = NULL; /* mark the end of this chain */ if (loop) loop->prev = NULL; ebb->bbnum = ebbi->count; /* save this block number */ /* put it in the array */ ebbs[(ebbi->count)++] = ebb; /* allocate for the next one. Remember to clear the new */ /* pointer at the end, that was created by realloc. */ ebbs = Safe_realloc (ebbs, (ebbi->count + 1) * sizeof (eBBlock *)); ebbi->bbOrder = ebbs; ebbs[ebbi->count] = 0; /* if this one ends in a goto or a conditional */ /* branch then check if the block it is going */ /* to already exists, if yes then this could */ /* be a loop, add a preheader to the block it */ /* goes to if it does not already have one */ if (ebbs[(ebbi->count) - 1]->ech && (ebbs[(ebbi->count) - 1]->ech->op == GOTO || ebbs[(ebbi->count) - 1]->ech->op == IFX)) { symbol *label; eBBlock *destBlock; if (ebbs[(ebbi->count) - 1]->ech->op == GOTO) label = IC_LABEL (ebbs[(ebbi->count) - 1]->ech); else if (!(label = IC_TRUE (ebbs[(ebbi->count) - 1]->ech))) label = IC_FALSE (ebbs[(ebbi->count) - 1]->ech); if ((destBlock = eBBWithEntryLabel (ebbi, label)) && destBlock->preHeader == NULL && otherPathsPresent (ebbs, destBlock)) { symbol *preHeaderLabel = newiTempLoopHeaderLabel (1); int i, j; eBBlock *pBlock; /* go thru all block replacing the entryLabel with new label */ /* till we reach the block , then we insert a new ebblock */ for (i = 0; i < (ebbi->count); i++) { if (ebbs[i] == destBlock) break; replaceLabel (ebbs[i], label, preHeaderLabel); } (ebbi->count)++; /* if we have stopped at the block , allocate for an extra one */ ebbs = Safe_realloc (ebbs, (ebbi->count + 1) * sizeof (eBBlock *)); ebbi->bbOrder = ebbs; ebbs[ebbi->count] = 0; /* then move the block down one count */ pBlock = ebbs[j = i]; for (i += 1; i < (ebbi->count); i++) { eBBlock *xBlock; xBlock = ebbs[i]; ebbs[i] = pBlock; ebbs[i]->bbnum = i; pBlock = xBlock; } destBlock->preHeader = ebbs[j] = neweBBlock (); ebbs[j]->bbnum = j; ebbs[j]->entryLabel = preHeaderLabel; ebbs[j]->sch = ebbs[j]->ech = newiCodeLabelGoto (LABEL, preHeaderLabel); ebbs[j]->sch->filename = destBlock->sch->filename; ebbs[j]->sch->lineno = destBlock->sch->lineno; } } } /* mark the end */ ebbs[ebbi->count] = NULL; return ebbi; }