PRIVATE char *backtrack_XS(int k, int l, const int i, const int j, const int max_interaction_length) {
  /* backtrack structure going backwards from i, and forwards from j
     return structure in bracket notation with & as separator */
  int p, q, type, type2, E, traced, i0, j0;
  char *st1, *st2, *struc;
  st1 = (char *) vrna_alloc(sizeof(char)*(i-k+2));
  st1[i-k+1]='\0';
  st2 = (char *) vrna_alloc(sizeof(char)*(l-j+2));
  st2[l-j+1]='\0';

  i0=k; j0=l;
  while (k<=i && l>=j) {
    E = c3[j-11][max_interaction_length-i+k-1][l-j]; traced=0;
    st1[k-i0] = '(';
    st2[l-j] = ')';

    type=ptype[indx[l]+k];
    if (!type) vrna_message_error("backtrack failed in fold duplex bli");
    for (p=k+1; p<=i; p++) {
      for (q=l-1; q>=j; q--) {
        int LE;
        if (p-k+l-q-2>MAXLOOP) break;
        type2=ptype[indx[q]+p];
        if (!type2) continue;
         LE = E_IntLoop(p-k-1, l-q-1, type, rtype[type2], SS1[k+1], SS1[l-1], SS1[p-1], SS1[q+1], P);
         if (E == c3[j-11][max_interaction_length-i+p-1][q-j]+LE) {
          traced=1;
           k=p; l=q;
          break;
        }
      }
      if (traced) break;
    }
    if (!traced) {
      E-=E_ExtLoop(type2, ((k<i)?SS1[k+1]:-1), ((l>j-1)? SS1[l-1]:-1), P);
      break;
      if (E != P->DuplexInit) {
        vrna_message_error("backtrack failed in fold duplex bal");
      } else break;
    }
  }
  struc = (char *) vrna_alloc(k-i0+1+j0-l+1+2);

  for (p=0; p<=i-i0; p++){
    if (!st1[p]) st1[p] = '.';
  }

  for (p=0; p<=j0-j; p++) {
    if (!st2[p]) {
      st2[p] = '.';
    }
  }

  strcpy(struc, st1);
  strcat(struc, "&");
  strcat(struc, st2);
  free(st1); free(st2);
  return struc;
}
Beispiel #2
0
/**
*** backtrack in the energy matrices to obtain a structure with MFE
**/
void backtrack(const char **strings, int s) {
  /*------------------------------------------------------------------
    trace back through the "c", "f5" and "fML" arrays to get the
    base pairing list. No search for equivalent structures is done.
    This inverts the folding procedure, hence it's very fast.
    ------------------------------------------------------------------*/
   /* normally s=0.
     If s>0 then s items have been already pushed onto the sector stack */
  int   i, j, k, p, q, length, energy;
  int   type_2, tt, mm;
  int   b=0, cov_en = 0;
  int   n_seq;
  int *type;
  length = strlen(strings[0]);
  for (n_seq=0; strings[n_seq]!=NULL; n_seq++);
  type = (int *) space(n_seq*sizeof(int));
  if (s==0) {
    sector[++s].i = 1;
    sector[s].j = length;
    sector[s].ml = (backtrack_type=='M') ? 1 : ((backtrack_type=='C')?2:0);
  }
  while (s>0) {
    int ss, ml, fij, fi, cij, traced, i1, j1, d3, d5, jj=0;
    int canonical = 1;     /* (i,j) closes a canonical structure */
    i  = sector[s].i;
    j  = sector[s].j;
    ml = sector[s--].ml;   /* ml is a flag indicating if backtracking is to
                              occur in the fML- (1) or in the f-array (0) */
    if (ml==2) {
      base_pair[++b].i = i;
      base_pair[b].j   = j;
      cov_en += pscore[indx[j]+i];
      goto repeat1;
    }

    if (j < i+TURN+1) continue; /* no more pairs in this interval */

    fij = (ml)? fML[indx[j]+i] : f5[j];
    fi  = (ml)?(fML[indx[j-1]+i]+n_seq*P->MLbase):f5[j-1];

    if (fij == fi) {  /* 3' end is unpaired */
      sector[++s].i = i;
      sector[s].j   = j-1;
      sector[s].ml  = ml;
      continue;
    }

    if (ml == 0) { /* backtrack in f5 */
      /* j or j-1 is paired. Find pairing partner */
      for (i=j-TURN-1,traced=0; i>=1; i--) {
        int cc, en;
        jj = i-1;
        if (c[indx[j]+i]<INF) {
          en = c[indx[j]+i] + f5[i-1];
          if(dangles){
            for(ss = 0; ss < n_seq; ss++){
              type[ss] = pair[S[ss][i]][S[ss][j]];
              if (type[ss]==0) type[ss] = 7;
              en += E_ExtLoop(type[ss], (i>1) ? S5[ss][i]: -1, (j < length) ? S3[ss][j] : -1, P);
            }
          }
          else{
            for(ss = 0; ss < n_seq; ss++){
              type[ss] = pair[S[ss][i]][S[ss][j]];
              if (type[ss]==0) type[ss] = 7;
              en += E_ExtLoop(type[ss], -1, -1, P);
            }
          }
          if (fij == en) traced=j;
        }
        if (traced) break;
      }

      if (!traced) nrerror("backtrack failed in f5");
      sector[++s].i = 1;
      sector[s].j   = jj;
      sector[s].ml  = ml;

      j=traced;
      base_pair[++b].i = i;
      base_pair[b].j   = j;
      cov_en += pscore[indx[j]+i];
      goto repeat1;
    }
    else { /* trace back in fML array */
      if (fML[indx[j]+i+1]+n_seq*P->MLbase == fij) { /* 5' end is unpaired */
        sector[++s].i = i+1;
        sector[s].j   = j;
        sector[s].ml  = ml;
        continue;
      }

      cij = c[indx[j]+i];
      if(dangles){
        for(ss = 0; ss < n_seq; ss++){
          tt = pair[S[ss][i]][S[ss][j]];
          if(tt==0) tt=7;
          cij += E_MLstem(tt, S5[ss][i], S3[ss][j], P);
        }
      }
      else{
        for(ss = 0; ss < n_seq; ss++){
          tt = pair[S[ss][i]][S[ss][j]];
          if(tt==0) tt=7;
          cij += E_MLstem(tt, -1, -1, P);
        }
      }

      if (fij==cij){
        /* found a pair */
        base_pair[++b].i = i;
        base_pair[b].j   = j;
        cov_en += pscore[indx[j]+i];
        goto repeat1;
      }

      for (k = i+1+TURN; k <= j-2-TURN; k++)
        if (fij == (fML[indx[k]+i]+fML[indx[j]+k+1]))
          break;

      sector[++s].i = i;
      sector[s].j   = k;
      sector[s].ml  = ml;
      sector[++s].i = k+1;
      sector[s].j   = j;
      sector[s].ml  = ml;

      if (k>j-2-TURN) nrerror("backtrack failed in fML");
      continue;
    }

  repeat1:

    /*----- begin of "repeat:" -----*/
    if (canonical)  cij = c[indx[j]+i];

    for (ss=0; ss<n_seq; ss++) {
      type[ss] = pair[S[ss][i]][S[ss][j]];
      if (type[ss]==0) type[ss] = 7;
    }

    if (noLonelyPairs)
      if (cij == c[indx[j]+i]) {
        /* (i.j) closes canonical structures, thus
           (i+1.j-1) must be a pair                */
        for (ss=0; ss<n_seq; ss++) {
          type_2 = pair[S[ss][j-1]][S[ss][i+1]];  /* j,i not i,j */
          if (type_2==0) type_2 = 7;
          cij -= P->stack[type[ss]][type_2];
        }
        cij += pscore[indx[j]+i];
        base_pair[++b].i = i+1;
        base_pair[b].j   = j-1;
        cov_en += pscore[indx[j-1]+i+1];
        i++; j--;
        canonical=0;
        goto repeat1;
      }
    canonical = 1;
    cij += pscore[indx[j]+i];

    {int cc=0;
    for (ss=0; ss<n_seq; ss++) {
        if ((a2s[ss][j-1]-a2s[ss][i])<3) cc+=600;
        else cc += E_Hairpin(a2s[ss][j-1]-a2s[ss][i], type[ss], S3[ss][i], S5[ss][j], Ss[ss]+a2s[ss][i-1], P);
      }
    if (cij == cc) /* found hairpin */
      continue;
    }
    for (p = i+1; p <= MIN2(j-2-TURN,i+MAXLOOP+1); p++) {
      int minq = j-i+p-MAXLOOP-2;
      if (minq<p+1+TURN) minq = p+1+TURN;
      for (q = j-1; q >= minq; q--) {

        if (c[indx[q]+p]>=INF) continue;

        for (ss=energy=0; ss<n_seq; ss++) {
          type_2 = pair[S[ss][q]][S[ss][p]];  /* q,p not p,q */
          if (type_2==0) type_2 = 7;
          energy += E_IntLoop(a2s[ss][p-1]-a2s[ss][i],a2s[ss][j-1]-a2s[ss][q],
                               type[ss], type_2,
                               S3[ss][i], S5[ss][j],
                               S5[ss][p], S3[ss][q], P);

        }
        traced = (cij == energy+c[indx[q]+p]);
        if (traced) {
          base_pair[++b].i = p;
          base_pair[b].j   = q;
          cov_en += pscore[indx[q]+p];
          i = p, j = q;
          goto repeat1;
        }
      }
    }

    /* end of repeat: --------------------------------------------------*/

    /* (i.j) must close a multi-loop */

    mm = n_seq*P->MLclosing;
    if(dangles){
      for(ss = 0; ss < n_seq; ss++){
        tt = rtype[type[ss]];
        mm += E_MLstem(tt, S5[ss][j], S3[ss][i], P);
      }
    }
    else{
      for(ss = 0; ss < n_seq; ss++){
        tt = rtype[type[ss]];
        mm += E_MLstem(tt, -1, -1, P);
      }
    }
    i1 = i+1;
    j1 = j-1;
    sector[s+1].ml  = sector[s+2].ml = 1;

    for (k = i1+TURN+1; k < j1-TURN-1; k++){
      if(cij == fML[indx[k]+i1] + fML[indx[j1]+k+1] + mm) break;
    }

    if (k<=j-3-TURN) { /* found the decomposition */
      sector[++s].i = i1;
      sector[s].j   = k;
      sector[++s].i = k+1;
      sector[s].j   = j1;
    } else {
        nrerror("backtracking failed in repeat");
    }

  }

  /* fprintf(stderr, "covariance energy %6.2f\n", cov_en/100.);  */

  base_pair[0].i = b;    /* save the total number of base pairs */
  free(type);
}
Beispiel #3
0
/**
*** the actual forward recursion to fill the energy arrays
**/
PRIVATE int fill_arrays(const char **strings) {
  int   i, j, k, p, q, length, energy, new_c;
  int   decomp, MLenergy, new_fML;
  int   s, n_seq, *type, type_2, tt;

  /* count number of sequences */
  for (n_seq=0; strings[n_seq]!=NULL; n_seq++);

  type = (int *) space(n_seq*sizeof(int));
  length = strlen(strings[0]);

  /* init energies */
  for (j=1; j<=length; j++){
    Fmi[j]=DMLi[j]=DMLi1[j]=DMLi2[j]=INF;
    for (i=(j>TURN?(j-TURN):1); i<j; i++) {
      c[indx[j]+i] = fML[indx[j]+i] = INF;
    }
  }

  /* begin recursions */
  for (i = length-TURN-1; i >= 1; i--) { /* i,j in [1..length] */
    for (j = i+TURN+1; j <= length; j++) {
      int ij, psc;
      ij = indx[j]+i;

      for (s=0; s<n_seq; s++) {
        type[s] = pair[S[s][i]][S[s][j]];
        if (type[s]==0) type[s]=7;
      }

      psc = pscore[indx[j]+i];
      if (psc>=MINPSCORE) {   /* a pair to consider */
        int stackEnergy = INF;
        /* hairpin ----------------------------------------------*/


        for (new_c=s=0; s<n_seq; s++) {
          if ((a2s[s][j-1]-a2s[s][i])<3) new_c+=600;
          else  new_c += E_Hairpin(a2s[s][j-1]-a2s[s][i],type[s],S3[s][i],S5[s][j],Ss[s]+(a2s[s][i-1]), P);
       }
        /*--------------------------------------------------------
          check for elementary structures involving more than one
          closing pair.
          --------------------------------------------------------*/

        for (p = i+1; p <= MIN2(j-2-TURN,i+MAXLOOP+1) ; p++) {
          int minq = j-i+p-MAXLOOP-2;
          if (minq<p+1+TURN) minq = p+1+TURN;
          for (q = minq; q < j; q++) {
            if (pscore[indx[q]+p]<MINPSCORE) continue;
            for (energy = s=0; s<n_seq; s++) {
              type_2 = pair[S[s][q]][S[s][p]]; /* q,p not p,q! */
              if (type_2 == 0) type_2 = 7;
              energy += E_IntLoop(a2s[s][p-1]-a2s[s][i], a2s[s][j-1]-a2s[s][q], type[s], type_2,
                                   S3[s][i], S5[s][j],
                                   S5[s][p], S3[s][q], P);
            }
            new_c = MIN2(new_c, energy + c[indx[q]+p]);
            if ((p==i+1)&&(j==q+1)) stackEnergy = energy; /* remember stack energy */
          } /* end q-loop */
        } /* end p-loop */

        /* multi-loop decomposition ------------------------*/
        decomp = DMLi1[j-1];
        if(dangles){
          for(s=0; s<n_seq; s++){
            tt = rtype[type[s]];
            decomp += E_MLstem(tt, S5[s][j], S3[s][i], P);
          }
        }
        else{
          for(s=0; s<n_seq; s++){
            tt = rtype[type[s]];
            decomp += E_MLstem(tt, -1, -1, P);
          }
        }
        MLenergy = decomp + n_seq*P->MLclosing;
        new_c = MIN2(new_c, MLenergy);

        new_c = MIN2(new_c, cc1[j-1]+stackEnergy);

        cc[j] = new_c - psc; /* add covariance bonnus/penalty */
        if (noLonelyPairs)
          c[ij] = cc1[j-1]+stackEnergy-psc;
        else
          c[ij] = cc[j];

      } /* end >> if (pair) << */

      else c[ij] = INF;
      /* done with c[i,j], now compute fML[i,j] */
      /* free ends ? -----------------------------------------*/

      new_fML = fML[ij+1]+n_seq*P->MLbase;
      new_fML = MIN2(fML[indx[j-1]+i]+n_seq*P->MLbase, new_fML);
      energy = c[ij];
      if(dangles){
        for (s=0; s<n_seq; s++) {
          energy += E_MLstem(type[s], S5[s][i], S3[s][j], P);
        }
      }
      else{
        for (s=0; s<n_seq; s++) {
          energy += E_MLstem(type[s], -1, -1, P);
        }
      }
      new_fML = MIN2(energy, new_fML);


      /* modular decomposition -------------------------------*/
      for (decomp = INF, k = i+1+TURN; k <= j-2-TURN; k++)
        decomp = MIN2(decomp, Fmi[k]+fML[indx[j]+k+1]);

      DMLi[j] = decomp;               /* store for use in ML decompositon */
      new_fML = MIN2(new_fML,decomp);

      /* coaxial stacking deleted */

      fML[ij] = Fmi[j] = new_fML;     /* substring energy */
    } /* END for j */

    {
      int *FF; /* rotate the auxilliary arrays */
      FF = DMLi2; DMLi2 = DMLi1; DMLi1 = DMLi; DMLi = FF;
      FF = cc1; cc1=cc; cc=FF;
      for (j=1; j<=length; j++) {cc[j]=Fmi[j]=DMLi[j]=INF; }
    }
  } /* END for i */
  /* calculate energies of 5' and 3' fragments */

  f5[TURN+1]=0;
  for (j=TURN+2; j<=length; j++) {
    f5[j] = f5[j-1];
    if (c[indx[j]+1]<INF) {
      energy = c[indx[j]+1];
      if(dangles){
        for(s = 0; s < n_seq; s++){
          tt = pair[S[s][1]][S[s][j]];
          if(tt==0) tt=7;
          energy += E_ExtLoop(tt, -1, (j<length) ? S3[s][j] : -1, P);
        }
      }
      else{
        for(s = 0; s < n_seq; s++){
          tt = pair[S[s][1]][S[s][j]];
          if(tt==0) tt=7;
          energy += E_ExtLoop(tt, -1, -1, P);
        }
      }
      f5[j] = MIN2(f5[j], energy);
    }
    for (i=j-TURN-1; i>1; i--) {
      if (c[indx[j]+i]<INF) {
        energy = f5[i-1] + c[indx[j]+i];
        if(dangles){
          for(s = 0; s < n_seq; s++){
            tt = pair[S[s][i]][S[s][j]];
            if(tt==0) tt=7;
            energy += E_ExtLoop(tt, S5[s][i], (j < length) ? S3[s][j] : -1, P);
          }
        }
        else{
          for(s = 0; s < n_seq; s++){
            tt = pair[S[s][i]][S[s][j]];
            if(tt==0) tt=7;
            energy += E_ExtLoop(tt, -1, -1, P);
          }
        }
        f5[j] = MIN2(f5[j], energy);
      }
    }
  }
  free(type);
  return(f5[length]);
}
PRIVATE void  duplexfold_XS(const char *s1,
                            int **access_s1,
                            const int threshold,
                            const int max_interaction_length){

  int i, j, k, l, p, q, Emin=INF, l_min=0, k_min=0, j_min=0;
  int type, type2, type3, E, tempK;
  char *struc;
  int length = (int) strlen(s1);
  struc=NULL;

  c3 = (int ***) vrna_alloc(sizeof(int **) * (length));
  for (i=0; i<length; i++){
    c3[i] = (int **) vrna_alloc(sizeof(int *) * max_interaction_length);
    for (j=0; j<max_interaction_length; j++) {
      c3[i][j]=(int *) vrna_alloc(sizeof(int) * max_interaction_length);
    }
  }

  i = length - 9;

  while( i-- > 11 ){
    Emin=INF;
    j_min=0;
    l_min=0;
    k_min=0;

    /* init all matrix elements to INF */
    for (j=0; j < length; j++){
      for(k=0;k<max_interaction_length;k++){
        for(l=0;l<max_interaction_length;l++){
          c3[j][k][l]=INF;
        }
      }
    }
    char string[10] = {'\0'};
    /* matrix starting values for (i,j)-basepairs */
    for(j=i+4; j<n1-10; j++) {
      type=ptype[indx[j]+i];
      if (type) {
        c3[j-11][max_interaction_length-1][0] = P->DuplexInit;
        c3[j-11][max_interaction_length-1][0] += E_Hairpin(j-i-1, type,  SS1[i+1], SS1[j-1], string, P);
/*        c3[j-11][max_interaction_length-1][0] += E_ExtLoop(type, SS1[i+1], SS1[j-1], P); */
/*           c3[j-11][max_interaction_length-1][0] += E_ExtLoop(rtype[type], SS1[j-1], SS1[i+1], P); */
       }
    }

    int i_pos_begin=MAX2(9, i-max_interaction_length); /* why 9 ??? */

    /* fill matrix */
    for(k=i-1; k>i_pos_begin; k--) {
      tempK=max_interaction_length-i+k-1;
      for(l = i + 5; l < n1 - 9; l++) { /* again, why 9 less then the sequence length ? */
        type2 = ptype[indx[l] + k];
        if(!type2) continue;
        for(p = k + 1; (p <= i) && (p <= k + MAXLOOP + 1); p++){
          for(q = l - 1; (q>=i+4) && (q >= l - MAXLOOP - 1); q--){
            if (p - k + l - q - 2 > MAXLOOP) break;
            type3 = ptype[indx[q] + p];
            if(!type3) continue;
            E = E_IntLoop(p - k - 1, l - q - 1, type2, rtype[type3], SS1[k + 1], SS1[l - 1], SS1[p - 1], SS1[q + 1], P);
            for(j = MAX2(i + 4, l - max_interaction_length + 1); j <= q; j++){
              type = ptype[indx[j]+i];
              if (type){
                c3[j-11][tempK][l-j] = MIN2(c3[j-11][tempK][l-j], c3[j-11][max_interaction_length-i+p-1][q-j]+E);
              }
            }/* next j */
          }/* next q */
        }/* next p */
      }/* next l */
    }/* next k */

    /* read out matrix minimum */
    for(j=i+4; j<n1-10; j++) {
      type=ptype[indx[j]+i];
      if (!type) continue;
      int j_pos_end=MIN2(n1-9,j+max_interaction_length);
      for (k=i-1; k>i_pos_begin; k--) {
        for (l=j+1; l<j_pos_end; l++) {
          type2=ptype[indx[l]+k];
          if (!type2) continue;
          E = c3[j-11][max_interaction_length-i+k-1][l-j];
/*           printf("[%d,%d][%d,%d]\t%6.2f\t%6.2f\t%6.2f\n", i, k, l, j, E/100., access_s1[i-k+1][i]/100., access_s1[l-j+1][l]/100.); */
          E+=access_s1[i-k+1][i]+access_s1[l-j+1][l];
          E+=E_ExtLoop(type2,((k>i_pos_begin+1)? SS1[k-1]:-1),((l<j_pos_end-1)? SS1[l+1]:-1),P);
          E+=E_ExtLoop(rtype[type], SS1[j-1], SS1[i+1], P);
          if (E<Emin) {
            Emin=E; k_min=k; l_min=l;
            j_min=j;
          }
        }
      }
    }

    if(Emin  < threshold){
      struc = backtrack_XS(k_min, l_min, i, j_min, max_interaction_length);

      /* lets take care of the dangles */
      /* find best combination */
      int dx_5, dx_3, dy_5, dy_3,dGx,dGy,bonus_x, bonus_y;
      dx_5 = dx_3 = dy_5 = dy_3 = dGx = dGy = bonus_x = bonus_y = 0;
      dGx = access_s1[i-k_min+1][i];
      dGy = access_s1[l_min-j_min+1][l_min];
      PlexHits[NumberOfHits].tb=k_min -10 -dx_5;
      PlexHits[NumberOfHits].te=i -10 + dx_3;
      PlexHits[NumberOfHits].qb=j_min -10 - dy_5;
      PlexHits[NumberOfHits].qe=l_min -10 + dy_3;
      PlexHits[NumberOfHits].ddG=(double) Emin * 0.01;
      PlexHits[NumberOfHits].dG1=(double) dGx*0.01 ;
      PlexHits[NumberOfHits].dG2=(double) dGy*0.01 ;
      PlexHits[NumberOfHits].energy = PlexHits[NumberOfHits].ddG - PlexHits[NumberOfHits].dG1 - PlexHits[NumberOfHits].dG2;
      PlexHits[NumberOfHits].structure = struc;

      /* output: */
      if(PlexHits[NumberOfHits].energy * 100 < threshold){
        if (verbose) printf("%s %3d,%-3d : %3d,%-3d (%5.2f = %5.2f + %5.2f + %5.2f)\n", PlexHits[NumberOfHits].structure, PlexHits[NumberOfHits].tb, PlexHits[NumberOfHits].te, PlexHits[NumberOfHits].qb, PlexHits[NumberOfHits].qe, PlexHits[NumberOfHits].ddG, PlexHits[NumberOfHits].energy, PlexHits[NumberOfHits].dG1, PlexHits[NumberOfHits].dG2);
        NumberOfHits++;
        if(NumberOfHits==PlexHitsArrayLength-1){
          PlexHitsArrayLength*=2;
          PlexHits = (dupVar *) vrna_realloc(PlexHits,sizeof(dupVar)*PlexHitsArrayLength);
        }
      }
    }
  }

  for (i=0; i<(n1-20); i++) {
    for (j=0; j<max_interaction_length; j++) {
      free(c3[i][j]);
    }
    free(c3[i]);
  }
  free(c3);
}