char *consensus(const char *AS[]) { /* simple consensus sequence (most frequent character) */ char *string; int i,n; n = strlen(AS[0]); string = (char *) space((n+1)*sizeof(char)); for (i=0; i<n; i++) { int s,c,fm, freq[8] = {0,0,0,0,0,0,0,0}; for (s=0; AS[s]!=NULL; s++) freq[encode_char(AS[s][i])]++; for (s=c=fm=0; s<8; s++) /* find the most frequent char */ if (freq[s]>fm) {c=s, fm=freq[c];} if (s>4) s++; /* skip T */ string[i]=Law_and_Order[c]; } return string; }
void match_word(const DoubleArrayTrieItem *dat, const ucs4_t * word, int *match_pos, int *id, int limit) { int i, j, p; for (i = 0,p = 0; word[p] && (limit == 0 || p < limit) && dat[i].base != DATRIE_UNUSED; p ++) { int k = encode_char(word[p]); j = dat[i].base + k; if (j < 0 || j > DATRIE_SIZE || dat[j].parent != i) break; i = j; } if (match_pos) *match_pos = p; if (id) *id = i; }
void vigenere(char* dst, char* src, char* pass, int encode) { int i; int j; int pass_len = strlen(pass); for (i = 0, j = 0; i < strlen(src); i++, j++) { if (encode) dst[i] = encode_char(src[i], pass[j % pass_len]); else dst[i] = decode_char(src[i], pass[j % pass_len]); } /* Ensure trailing null character since `dst` may not have been initialized * yet. */ dst[i] = '\0'; }
int find_saddle(char *sequence, char *struc1, char *struc2, int max) { int maxl, maxE, i; char *tmp; move_t *bestpath=NULL; int dir; maxE = INT_MAX - 1; seq = sequence; update_fold_params(); make_pair_matrix(); /* nummerically encode sequence */ S = (short *) space(sizeof(short)*(strlen(seq)+1)); S1 = (short *) space(sizeof(short)*(strlen(seq)+1)); S[0] = S1[0] = (short) strlen(seq); for (i=0; i< strlen(seq); i++) { S[i+1] = encode_char(seq[i]); S1[i+1] = alias[S[i+1]]; } maxl=1; do { int saddleE; path_fwd = !path_fwd; if (maxl>max) maxl=max; saddleE = find_path_once(struc1, struc2, maxE, maxl); if (saddleE<maxE) { maxE = saddleE; if (bestpath) free(bestpath); bestpath = path; dir = path_fwd; } else free(path); tmp=struc1; struc1=struc2; struc2=tmp; maxl *=2; } while (maxl<2*max); free(S); free(S1); path=bestpath; path_fwd = dir; return maxE; }
PRIVATE void make_pairset(void) { int i,j; int sym[MAXALPHA]; make_pair_matrix(); base = strlen(symbolset); for (i=0; i< base; i++) sym[i] = encode_char(symbolset[i]); for (i=npairs=0; i< base; i++) for (j=0; j<base; j++) if (pair[sym[i]][sym[j]]) { pairset[npairs++] = symbolset[i]; pairset[npairs++] = symbolset[j]; } npairs /= 2; if (npairs==0) nrerror("No pairs in this alphabet!"); }
static void encode_sequence(const char *plain, int len, char *coded) { assert(len >= 0 && len <= 5); for (int block = 0; block < 8; block++) { int octet = get_octet(block); int junk = get_offset(block); if (octet >= len) { pad(&coded[block], 8 - block); return; } char c = shift_right(plain[octet], junk); if (junk < 0 && octet < len - 1) { c |= shift_right(plain[octet+1], 8 + junk); } coded[block] = encode_char(c); } }
PUBLIC float max_matching(char *string, char *structure) { int i,j,k,p,t,n1,n2,mm,n; int max=0, kmax; int nbp=0, npieces; int m,s; short *sq; /* init */ m = minimum_loopsize; s = minimum_stacksize; n = strlen(string); for ( i=0; i<=n; i++ ) { for ( j=0; j<=n; j++ ) { aa[i][j] = 0; for (t=0; t<=s; t++ ) { mu[i][j][t] = 0; } } } n = strlen(string); sq = (short *) space((unsigned) (n+1)*sizeof(short)); /* encode sequence numerically */ for ( i=1; i<=n; i++ ) sq[i] = encode_char(string[i-1]); /* interate */ if ( s > 1 ) { for ( p = m+1; p<=n; p++ ) { for ( i=1; i<= n-p; i++ ) { j=i+p; /* no terminal match */ max = 0; kmax = 0; for ( k=i; k<j-m; k++ ) { if( (mu[k][j][s] > 0) ) { if( (mu[i][k-1][0] + mu[k][j][s]) >= max){ max = mu[i][k-1][0] + mu[k][j][s]; kmax = k; } if( (mu[i][k-1][s] + mu[k][j][s]) >= max){ max = mu[i][k-1][s] + mu[k][j][s]; kmax = k; } } if ( mu[i][j-1][0] > max ){ max = mu[i][j-1][0]; kmax = 0; } if ( mu[i][j-1][s] > max ){ max = mu[i][j-1][s]; kmax = 0; } } mu[i][j][0] = max; aa[i][j] = kmax; /* s or more terminal base pairs */ max = 0; if ( pair[sq[i]][sq[j]] ) { if ( mu[i+1][j-1][s-1] > 0 ) { max = mu[i+1][j-1][s-1]; if ( mu[i+1][j-1][s] > max ) max = mu[i+1][j-1][s]; max = max + NJ_energy(sq[i], sq[j]); } } mu[i][j][s] = max; /* exactly one terminal base pair */ max = 0; if ( pair[sq[i]][sq[j]] ) { max = mu[i+1][j-1][0] + NJ_energy(sq[i], sq[j]); } mu[i][j][1] = max; /* any other number of base pairs */ for( t=2; t <= s-1; t++) { max = 0; if ( (pair[sq[i]][sq[j]]) && (mu[i+1][j-1][t-1] > 0) ) { max = mu[i+1][j-1][t-1] + NJ_energy(sq[i], sq[j]); } mu[i][j][t] = max; } } } max = mu[1][n][0]; if ( mu[1][n][s] >= max ) max = mu[1][n][s]; /* BACKTRACKING */ npieces = 1; nbp = 0; pieces[1][1] = 1; pieces[1][2] = n; base_pair[0].i = max; while ( npieces > 0) { n1 = pieces[npieces][1]; n2 = pieces[npieces][2]; npieces--; if ( mu[n1][n2][0] >= mu[n1][n2][s] ) { /* n1 ./. n2 */ k = aa[n1][n2]; if ( k > 0 ) { /* pb k...n2 */ if( ((k-1)-n1) > m ) { /*non-vanishing 1st part*/ npieces++ ; pieces[npieces][1] = n1; pieces[npieces][2] = k-1; } n1 = k; mm = mu[n1][n2][s]; while ( mu[n1][n2][s] == mm ) { nbp++; mm--; base_pair[nbp].i = n1; base_pair[nbp].j = n2; n1++; n2--; } for ( i=s-1; i >=1; i--) { nbp++; base_pair[nbp].i = n1; base_pair[nbp].j = n2; n1++; n2--; } if( (n2-n1) > m ) { npieces++; pieces[npieces][1] = n1; pieces[npieces][2] = n2; } } else { if( ((n2-1)-n1) > m ) { npieces++; pieces[npieces][1] = n1; pieces[npieces][2] = n2-1; } } } else { /* n1...n2 */ mm = mu[n1][n2][s]; while ( mu[n1][n2][s] == mm ) { nbp++; mm--; base_pair[nbp].i = n1; base_pair[nbp].j = n2; n1++; n2--; } for ( i=s-1; i >=1; i-- ) { nbp++; base_pair[nbp].i = n1; base_pair[nbp].j = n2; n1++; n2--; } if( (n2-n1) > m ) { npieces++; pieces[npieces][1] = n1; pieces[npieces][2] = n2; } } } /* REPEAT UNTIL npieces==0 */ } /* END OF THE s>1 BLOCK */ if ( s == 1 ) { /* iterate */ for ( p=m+1; p<=n; p++ ) { for( i=1; i<= n-p; i++ ) { j=i+p; max = mu[i][j-1][0]; kmax = 0; /* no additional bracket */ for ( k=i; k<j-m; k++ ) { if ( pair[sq[k]][sq[j]] ) { if( (mu[i][k-1][0] + mu[k+1][j-1][0] + NJ_energy(sq[k], sq[j])) > max ) { max = mu[i][k-1][0] + mu[k+1][j-1][0] + NJ_energy(sq[k], sq[j]); kmax = k; } } } mu[i][j][0] = max; aa[i][j] = kmax; } } max = mu[1][n][0]; /* mm_print_matrix(mu, string); */ /* BACKTRACKING */ base_pair[0].i = max; npieces = 1; nbp = 0; pieces[1][1] = 1; pieces[1][2] = n; while ( npieces > 0 ) { n1 = pieces[npieces][1]; n2 = pieces[npieces][2]; npieces--; k = aa[n1][n2]; if ( k>0 ) { /* stack von k...n2 */ nbp++; base_pair[nbp].i = k; base_pair[nbp].j = n2; if( (k-n1) > (m+1) ) { npieces++; pieces[npieces][1] = n1; pieces[npieces][2] = k-1; } if ( ((n2-1)-(k+1)) > m ) { npieces++; pieces[npieces][1] = k+1; pieces[npieces][2] = n2-1; } } else { if ( (n2-n1) > (m+1) ) { npieces++; pieces[npieces][1] = n1; /* eigentlich unnoetig */ pieces[npieces][2] = n2-1; } } } /* repeat until npieces = 0 */ } /* END OF s==1 */ for ( i=0; i<n; i++ ) structure[i]='.'; structure[n]='\0'; for ( i=1; i<= nbp; i++ ) { n1 = base_pair[i].i; n2 = base_pair[i].j; structure[n1-1] = '('; structure[n2-1] = ')'; } return (float)max; }
/* ------------------------------------------------------------------------- */ PUBLIC void mm_fill_arrays(char *string, char *structure) { int i,j,k,p,t,n; int max, kmax; int m,s; short *sq; /* init */ m = minimum_loopsize; s = minimum_stacksize; n = strlen(string); for ( i=0; i<=n; i++ ) { for ( j=0; j<=n; j++ ) { aa[i][j] = 0; for (t=0; t<=s; t++ ) { mu[i][j][t] = 0; } } } n = strlen(string); sq = (short *) space((unsigned) (n+1)*sizeof(short)); for ( i=1; i<=n; i++ ) sq[i] = encode_char(string[i-1]); if ( s > 1 ) { /* case: minimal stack size > 1 */ /* interate */ for ( p = m+1; p<=n; p++ ) { for ( i=1; i<= n-p; i++ ) { j = i + p; /* no terminal match */ max = 0; kmax = 0; for ( k=i; k<j-m; k++ ) { if( (mu[k][j][s] > 0) ) { if( (mu[i][k-1][0] + mu[k][j][s]) >= max){ max = mu[i][k-1][0] + mu[k][j][s]; kmax = k; } if( (mu[i][k-1][s] + mu[k][j][s]) >= max){ max = mu[i][k-1][s]+mu[k][j][s]; kmax = k; } } if ( mu[i][j-1][0] > max ){ max = mu[i][j-1][0]; kmax = 0; } if ( mu[i][j-1][s] > max ){ max = mu[i][j-1][s]; kmax = 0; } } mu[i][j][0] = max; aa[i][j] = kmax; /* s or more terminal base pairs */ max = 0; if ( pair[sq[i]][sq[j]] ) { if ( mu[i+1][j-1][s-1] > 0 ) { max = mu[i+1][j-1][s-1]; if ( mu[i+1][j-1][s] > max ) max = mu[i+1][j-1][s]; max = max + NJ_energy(sq[i], sq[j]); } } mu[i][j][s] = max; /* exactly one terminal base pair */ max = 0; if ( pair[sq[i]][sq[j]] ) { max = mu[i+1][j-1][0] + 1; } mu[i][j][1] = max; /* any other number of base pairs */ for( t=2; t <= s-1; t++) { max = 0; if ( (pair[sq[i]][sq[j]]) && (mu[i+1][j-1][t-1] > 0) ) { max = mu[i+1][j-1][t-1] + NJ_energy(sq[i], sq[j]); } mu[i][j][t] = max; } } } max = mu[1][n][0]; if ( mu[1][n][s] >= max ) max = mu[1][n][s]; } if ( s == 1 ) { /* case: minimal stack size == 1 */ /* iterate */ for ( p=m+1; p<=n; p++ ) { for( i=1; i<= n-p; i++ ) { j = i + p; max = mu[i][j-1][0]; kmax = 0; /* no additional bracket */ for ( k=i; k<j-m; k++ ) { if ( pair[sq[k]][sq[j]] ) { if( (mu[i][k-1][0] + mu[k+1][j-1][0]+1) > max ) { max = mu[i][k-1][0] + mu[k+1][j-1][0] + NJ_energy(sq[k], sq[j]); kmax = k; } } } mu[i][j][0] = max; aa[i][j] = kmax; } } max = mu[1][n][0]; } /* mm_print_matrix(mu, string); */ }
PUBLIC void encode_ali_sequence(const char *sequence, short *S, short *s5, short *s3, char *ss, unsigned short *as, int circ){ unsigned int i,l; unsigned short p; l = strlen(sequence); S[0] = (short) l; s5[0] = s5[1] = 0; /* make numerical encoding of sequence */ for(i=1; i<=l; i++){ short ctemp; ctemp=(short) encode_char(toupper(sequence[i-1])); S[i]= ctemp ; } if (oldAliEn){ /* use alignment sequences in all energy evaluations */ ss[0]=sequence[0]; for(i=1; i<l; i++){ s5[i] = S[i-1]; s3[i] = S[i+1]; ss[i] = sequence[i]; as[i] = i; } ss[l] = sequence[l]; as[l] = l; s5[l] = S[l-1]; s3[l] = 0; S[l+1] = S[1]; s5[1] = 0; if (circ) { s5[1] = S[l]; s3[l] = S[1]; ss[l+1] = S[1]; } } else{ if(circ){ for(i=l; i>0; i--){ char c5; c5 = sequence[i-1]; if ((c5=='-')||(c5=='_')||(c5=='~')||(c5=='.')) continue; s5[1] = S[i]; break; } for (i=1; i<=l; i++) { char c3; c3 = sequence[i-1]; if ((c3=='-')||(c3=='_')||(c3=='~')||(c3=='.')) continue; s3[l] = S[i]; break; } } else s5[1]=s3[l]=0; for(i=1,p=0; i<=l; i++){ char c5; c5 = sequence[i-1]; if ((c5=='-')||(c5=='_')||(c5=='~')||(c5=='.')) s5[i+1]=s5[i]; else { /* no gap */ ss[p++]=sequence[i-1]; /*start at 0!!*/ s5[i+1]=S[i]; } as[i]=p; } for (i=l; i>=1; i--) { char c3; c3 = sequence[i-1]; if ((c3=='-')||(c3=='_')||(c3=='~')||(c3=='.')) s3[i-1]=s3[i]; else s3[i-1]=S[i]; } } }
static void ini_ringlist(void) { int i; /* needed by function energy_of_struct_pt() from Vienna-RNA-1.4 */ pairList = (short *)calloc(GSV.len + 2, sizeof(short)); assert(pairList != NULL); typeList = (short *)calloc(GSV.len + 2, sizeof(short)); assert(typeList != NULL); aliasList = (short *)calloc(GSV.len + 2, sizeof(short)); assert(aliasList != NULL); pairList[0] = typeList[0] = aliasList[0] = GSV.len; ptype = (char **)calloc(GSV.len + 2, sizeof(char *)); assert(ptype != NULL); for (i=0; i<=GSV.len; i++) { ptype[i] = (char*)calloc(GSV.len + 2, sizeof(char)); assert(ptype[i] != NULL); } /* allocate virtual root */ wurzl = (baum *)calloc(1, sizeof(baum)); assert(wurzl != NULL); /* allocate ringList */ rl = (baum *)calloc(GSV.len+1, sizeof(baum)); assert(rl != NULL); /* allocate PostOrderList */ /* initialize virtualroot */ wurzl->typ = 'r'; wurzl->nummer = -1; /* connect virtualroot to ringlist-tree in down direction */ wurzl->down = &rl[GSV.len]; /* initialize post-order list */ make_pair_matrix(); /* initialize rest of ringlist-tree */ for(i = 0; i < GSV.len; i++) { int c; GAV.currform[i] = '.'; GAV.prevform[i] = 'x'; pairList[i+1] = 0; rl[i].typ = 'u'; /* decode base to numeric value */ c = encode_char(GAV.farbe[i]); rl[i].base = typeList[i+1] = c; aliasList[i+1] = alias[typeList[i+1]]; /* astablish links for node of the ringlist-tree */ rl[i].nummer = i; rl[i].next = &rl[i+1]; rl[i].prev = ((i == 0) ? &rl[GSV.len] : &rl[i-1]); rl[i].up = rl[i].down = NULL; } GAV.currform[GSV.len] = GAV.prevform[GSV.len] = '\0'; make_ptypes(aliasList); rl[i].nummer = i; rl[i].base = 0; /* make ringlist circular in next, prev direction */ rl[i].next = &rl[0]; rl[i].prev = &rl[i-1]; /* make virtual basepair for virtualroot */ rl[i].up = wurzl; rl[i].typ = 'x'; }