Пример #1
0
PRIVATE char *backtrack_XS(int k, int l, const int i, const int j, const int max_interaction_length) {
  /* backtrack structure going backwards from i, and forwards from j
     return structure in bracket notation with & as separator */
  int p, q, type, type2, E, traced, i0, j0;
  char *st1, *st2, *struc;
  st1 = (char *) vrna_alloc(sizeof(char)*(i-k+2));
  st1[i-k+1]='\0';
  st2 = (char *) vrna_alloc(sizeof(char)*(l-j+2));
  st2[l-j+1]='\0';

  i0=k; j0=l;
  while (k<=i && l>=j) {
    E = c3[j-11][max_interaction_length-i+k-1][l-j]; traced=0;
    st1[k-i0] = '(';
    st2[l-j] = ')';

    type=ptype[indx[l]+k];
    if (!type) vrna_message_error("backtrack failed in fold duplex bli");
    for (p=k+1; p<=i; p++) {
      for (q=l-1; q>=j; q--) {
        int LE;
        if (p-k+l-q-2>MAXLOOP) break;
        type2=ptype[indx[q]+p];
        if (!type2) continue;
         LE = E_IntLoop(p-k-1, l-q-1, type, rtype[type2], SS1[k+1], SS1[l-1], SS1[p-1], SS1[q+1], P);
         if (E == c3[j-11][max_interaction_length-i+p-1][q-j]+LE) {
          traced=1;
           k=p; l=q;
          break;
        }
      }
      if (traced) break;
    }
    if (!traced) {
      E-=E_ExtLoop(type2, ((k<i)?SS1[k+1]:-1), ((l>j-1)? SS1[l-1]:-1), P);
      break;
      if (E != P->DuplexInit) {
        vrna_message_error("backtrack failed in fold duplex bal");
      } else break;
    }
  }
  struc = (char *) vrna_alloc(k-i0+1+j0-l+1+2);

  for (p=0; p<=i-i0; p++){
    if (!st1[p]) st1[p] = '.';
  }

  for (p=0; p<=j0-j; p++) {
    if (!st2[p]) {
      st2[p] = '.';
    }
  }

  strcpy(struc, st1);
  strcat(struc, "&");
  strcat(struc, st2);
  free(st1); free(st2);
  return struc;
}
Пример #2
0
PUBLIC vrna_plist_t *
stackProb(double cutoff){

  if(!(backward_compat_compound && backward_compat)){
    vrna_message_error("stackProb: run pf_fold() first!");
  } else if( !backward_compat_compound->exp_matrices->probs){
    vrna_message_error("stackProb: probs==NULL!");
  }

  return vrna_stack_prob(backward_compat_compound, cutoff);
}
Пример #3
0
PRIVATE int decode(char *id)
{
    int   n, quit, i;
    char  label[100], *code;

    n = 0;

    quit = 0;
    code = coding;

    while (!quit) {
        for (i = 0; code[i] != sep; i++) {
            if (code[i] == '\0') {
                quit = 1;
                break;
            }
            label[i] = code[i];
        }
        label[i] = '\0';
        if (strcmp(id, label) == 0) return (n);
        code += (i+1);
        n++;
    }

    vrna_message_error("Syntax error: node identifier \"%s\" not found "
                              "in coding string \"%s\"\n"
                              "Exiting",
                              id, coding);
    exit(0);
}
Пример #4
0
PUBLIC void *
vrna_alloc(unsigned size){

  void *pointer;

  if ( (pointer = (void *) calloc(1, (size_t) size)) == NULL) {
#ifdef EINVAL
    if (errno==EINVAL) {
      fprintf(stderr,"vrna_alloc: requested size: %d\n", size);
      vrna_message_error("Memory allocation failure -> EINVAL");
    }
    if (errno==ENOMEM)
#endif
      vrna_message_error("Memory allocation failure -> no memory");
  }
  return  pointer;
}
Пример #5
0
PUBLIC void *
vrna_realloc(void *p, unsigned size){

  if (p == NULL)
    return vrna_alloc(size);
  p = (void *) realloc(p, size);
  if (p == NULL) {
#ifdef EINVAL
    if (errno==EINVAL) {
      fprintf(stderr,"vrna_realloc: requested size: %d\n", size);
      vrna_message_error("vrna_realloc allocation failure -> EINVAL");
    }
    if (errno==ENOMEM)
#endif
      vrna_message_error("vrna_realloc allocation failure -> no memory");
  }
  return p;
}
Пример #6
0
PUBLIC char *
centroid( int length,
          double *dist) {

  if (pr==NULL)
    vrna_message_error("pr==NULL. You need to call pf_fold() before centroid()");

  return vrna_centroid_from_probs(length, dist, pr);
}
Пример #7
0
PUBLIC double
mean_bp_distance(int length){

  if(backward_compat_compound)
    if(backward_compat_compound->exp_matrices)
      if(backward_compat_compound->exp_matrices->probs)
        return vrna_mean_bp_distance(backward_compat_compound);

  vrna_message_error("mean_bp_distance: you need to call vrna_pf_fold first");
  return 0.; /* we will never get to this point */
}
Пример #8
0
PRIVATE char *get_array1(int *arr, int size)
{
  int    i, p, pos, pp, r, last;
  char  *line, buf[16];


  i = last = 0;
  while( i<size ) {
    line = get_line(fp);
    if (!line) vrna_message_error("unexpected end of file in get_array1");
    ignore_comment(line);
    pos=0;
    while ((i<size)&&(sscanf(line+pos,"%15s%n", buf, &pp)==1)) {
      pos += pp;
      if (buf[0]=='*') {i++; continue;}
      else if (buf[0]=='x') { /* should only be used for loop parameters */
        if (i==0) vrna_message_error("can't extrapolate first value");
        p = arr[last] + (int) (0.5+ lxc37*log(((double) i)/(double)(last)));
      }
      else if (strcmp(buf,"DEF") == 0) p = DEF;
      else if (strcmp(buf,"INF") == 0) p = INF;
      else if (strcmp(buf,"NST") == 0) p = NST;
      else {
        r=sscanf(buf,"%d", &p);
        if (r!=1) {
          return line+pos;
          fprintf(stderr, "can't interpret `%s' in get_array1\n", buf);
          exit(1);
        }
        last = i;
      }
      arr[i++]=p;
    }
    free(line);
  }

  return NULL;
}
Пример #9
0
PUBLIC void
alloc_sequence_arrays(const char **sequences,
                      short ***S,
                      short ***S5,
                      short ***S3,
                      unsigned short ***a2s,
                      char ***Ss,
                      int circ){

  unsigned int s, n_seq, length;
  if(sequences[0] != NULL){
    length = strlen(sequences[0]);
    for (s=0; sequences[s] != NULL; s++);
    n_seq = s;
    *S    = (short **)          vrna_alloc((n_seq+1) * sizeof(short *));
    *S5   = (short **)          vrna_alloc((n_seq+1) * sizeof(short *));
    *S3   = (short **)          vrna_alloc((n_seq+1) * sizeof(short *));
    *a2s  = (unsigned short **) vrna_alloc((n_seq+1) * sizeof(unsigned short *));
    *Ss   = (char **)           vrna_alloc((n_seq+1) * sizeof(char *));
    for (s=0; s<n_seq; s++) {
      if(strlen(sequences[s]) != length) vrna_message_error("uneqal seqence lengths");
      (*S5)[s]  = (short *)         vrna_alloc((length + 2) * sizeof(short));
      (*S3)[s]  = (short *)         vrna_alloc((length + 2) * sizeof(short));
      (*a2s)[s] = (unsigned short *)vrna_alloc((length + 2) * sizeof(unsigned short));
      (*Ss)[s]  = (char *)          vrna_alloc((length + 2) * sizeof(char));
      (*S)[s]   = (short *)         vrna_alloc((length + 2) * sizeof(short));
      encode_ali_sequence(sequences[s], (*S)[s], (*S5)[s], (*S3)[s], (*Ss)[s], (*a2s)[s], circ);
    }
    (*S5)[n_seq]  = NULL;
    (*S3)[n_seq]  = NULL;
    (*a2s)[n_seq] = NULL;
    (*Ss)[n_seq]  = NULL;
    (*S)[n_seq]   = NULL;
  }
  else vrna_message_error("alloc_sequence_arrays: no sequences in the alignment!");
}
Пример #10
0
PUBLIC double
mean_bp_distance_pr(int length,
                    FLT_OR_DBL *p){

  double d=0;
  int *index = vrna_idx_row_wise((unsigned int) length);

  if (p==NULL)
    vrna_message_error("p==NULL. You need to supply a valid probability matrix for mean_bp_distance_pr()");

  d = wrap_mean_bp_distance(p, length, index, TURN);

  free(index);
  return d;
}
Пример #11
0
/* get the free energy of a subsequence from the q[] array */
PUBLIC double
get_subseq_F( int i,
              int j){

  if(backward_compat_compound)
    if(backward_compat_compound->exp_matrices)
      if(backward_compat_compound->exp_matrices->q){
        int               *my_iindx   = backward_compat_compound->iindx;
        vrna_exp_param_t  *pf_params  = backward_compat_compound->exp_params;
        FLT_OR_DBL        *q          = backward_compat_compound->exp_matrices->q;
        return ((-log(q[my_iindx[i]-j])-(j-i+1)*log(pf_params->pf_scale))*pf_params->kT/1000.0);
      }

  vrna_message_error("call pf_fold() to fill q[] array before calling get_subseq_F()");
  return 0.; /* we will never get to this point */
}
Пример #12
0
PRIVATE void ignore_comment(char * line)
{
  /* excise C style comments */
  /* only one comment per line, no multiline comments */
  char *cp1, *cp2;

  if ((cp1=strstr(line, "/*"))) {
    cp2 = strstr(cp1, "*/");
    if (cp2==NULL)
      vrna_message_error("unclosed comment in parameter file");
    /* can't use strcpy for overlapping strings */
    for (cp2+=2; *cp2!='\0'; cp2++, cp1++)
      *cp1 = *cp2;
    *cp1 = '\0';
  }

  return;
}
Пример #13
0
PUBLIC char *settype(enum parset s){
  switch(s){
    case        S:  return "stack";
    case      S_H:  return "stack_enthalpies";
    case       HP:  return "hairpin";
    case     HP_H:  return "hairpin_enthalpies";
    case        B:  return "bulge";
    case      B_H:  return "bulge_enthalpies";
    case       IL:  return "interior";
    case     IL_H:  return "interior_enthalpies";
    case      MME:  return "mismatch_exterior";
    case    MME_H:  return "mismatch_exterior_enthalpies";
    case      MMH:  return "mismatch_hairpin";
    case    MMH_H:  return "mismatch_hairpin_enthalpies";
    case      MMI:  return "mismatch_interior";
    case    MMI_H:  return "mismatch_interior_enthalpies";
    case    MMI1N:  return "mismatch_interior_1n";
    case  MMI1N_H:  return "mismatch_interior_1n_enthalpies";
    case    MMI23:  return "mismatch_interior_23";
    case  MMI23_H:  return "mismatch_interior_23_enthalpies";
    case      MMM:  return "mismatch_multi";
    case    MMM_H:  return "mismatch_multi_enthalpies";
    case       D5:  return "dangle5";
    case     D5_H:  return "dangle5_enthalpies";
    case       D3:  return "dangle3";
    case     D3_H:  return "dangle3_enthalpies";
    case    INT11:  return "int11";
    case  INT11_H:  return "int11_enthalpies";
    case    INT21:  return "int21";
    case  INT21_H:  return "int21_enthalpies";
    case    INT22:  return "int22";
    case  INT22_H:  return "int22_enthalpies";
    case       ML:  return "ML_params";
    case      NIN:  return "NINIO";
    case      TRI:  return "Triloops";
    case       TL:  return "Tetraloops";
    case      HEX:  return "Hexaloops";
    case     QUIT:  return "END";
    case     MISC:  return "Misc";
    default: vrna_message_error("\nThe answer is: 42\n");
  }
  return "";
}
Пример #14
0
PRIVATE double PrfEditCost(int i, int j, const float *T1, const float *T2)
{
  double  dist;
  int    k, kmax;

  kmax = (int) T1[1];
  if ((int) T2[1] != kmax) vrna_message_error("inconsistent Profiles in PrfEditCost");

  if(i==0) {
    for(dist = 0. ,k=0 ; k<kmax ; k++)
      dist += T2[j*kmax+k];
  }
  if(j==0) {
    for(dist = 0. ,k=0 ; k<kmax ; k++)
      dist += T1[i*kmax+k];
  }
  if((i>0)&&(j>0)) {
    for(dist = 2.,k=0; k<kmax; k++)
      dist -= 2.*average(T1[i*kmax+k],T2[j*kmax+k]);
  }
  return dist;
}
Пример #15
0
PUBLIC double
mean_bp_dist(int length) {

  /* compute the mean base pair distance in the thermodynamic ensemble */
  /* <d> = \sum_{a,b} p_a p_b d(S_a,S_b)
     this can be computed from the pair probs p_ij as
     <d> = \sum_{ij} p_{ij}(1-p_{ij}) */

  int     i, j, *my_iindx;
  double  d = 0;

  if (pr==NULL)
    vrna_message_error("pr==NULL. You need to call pf_fold() before mean_bp_dist()");

  my_iindx = vrna_idx_row_wise(length);

  for (i=1; i<=length; i++)
    for (j=i+TURN+1; j<=length; j++)
      d += pr[my_iindx[i]-j] * (1-pr[my_iindx[i]-j]);

  free(my_iindx);
  return 2*d;
}
Пример #16
0
PRIVATE void usage(void)
{
   vrna_message_error("usage: AnalyseSeqs [-X[bswnm]] [-Q] [-M{mask}] \n"
   "                   [-D{H|A[,cost]|G[,cost1,cost2]}] [-d{D|B|H|S}]");
   exit(0);
}
Пример #17
0
PRIVATE void
pf_linear(vrna_fold_compound_t *vc){

  char                *hard_constraints;
  int                 n, i,j, k, ij, d, *my_iindx, *jindx, with_gquad, turn,
                      with_ud, hc_decompose;
  FLT_OR_DBL          temp, Qmax, qbt1, *q, *qb, *qm, *qm1, *q1k, *qln;
  double              max_real;
  vrna_ud_t           *domains_up;
  vrna_md_t           *md;
  vrna_hc_t           *hc;
  vrna_mx_pf_t        *matrices;
  vrna_mx_pf_aux_el_t *aux_mx_el;
  vrna_mx_pf_aux_ml_t *aux_mx_ml;
  vrna_exp_param_t    *pf_params;

  n                 = vc->length;
  my_iindx          = vc->iindx;
  jindx             = vc->jindx;
  matrices          = vc->exp_matrices;
  pf_params         = vc->exp_params;
  hc                = vc->hc;
  domains_up        = vc->domains_up;
  q                 = matrices->q;
  qb                = matrices->qb;
  qm                = matrices->qm;
  qm1               = matrices->qm1;
  q1k               = matrices->q1k;
  qln               = matrices->qln;
  md                = &(pf_params->model_details);
  with_gquad        = md->gquad;
  turn              = md->min_loop_size;
  hard_constraints  = hc->matrix;

  with_ud           = (domains_up && domains_up->exp_energy_cb);
  Qmax              = 0;

  max_real = (sizeof(FLT_OR_DBL) == sizeof(float)) ? FLT_MAX : DBL_MAX;

  if (with_ud && domains_up->exp_prod_cb)
    domains_up->exp_prod_cb(vc, domains_up->data);

  if(with_gquad){
    free(vc->exp_matrices->G);
    vc->exp_matrices->G = get_gquad_pf_matrix(vc->sequence_encoding2, vc->exp_matrices->scale, vc->exp_params);
  }

  /* init auxiliary arrays for fast exterior/multibranch loops */
  aux_mx_el = vrna_exp_E_ext_fast_init(vc);
  aux_mx_ml = vrna_exp_E_ml_fast_init(vc);

  /*array initialization ; qb,qm,q
    qb,qm,q (i,j) are stored as ((n+1-i)*(n-i) div 2 + n+1-j */
  for (d=0; d<=turn; d++)
    for (i=1; i<=n-d; i++) {
      j=i+d;
      ij = my_iindx[i]-j;
      qb[ij] = 0.0;
    }

  for (j = turn + 2; j <= n; j++) {
    for (i = j - turn - 1; i >= 1; i--) {
      /* construction of partition function of segment i,j */
      /* firstly that given i binds j : qb(i,j) */
      ij            = my_iindx[i] - j;
      hc_decompose  = hard_constraints[jindx[j] + i];
      qbt1          = 0;

      if(hc_decompose){
        /* process hairpin loop(s) */
        qbt1 += vrna_exp_E_hp_loop(vc, i, j);
        /* process interior loop(s) */
        qbt1 += vrna_exp_E_int_loop(vc, i, j);
        /* process multibranch loop(s) */
        qbt1 += vrna_exp_E_mb_loop_fast(vc, i, j, aux_mx_ml->qqm1);
      }
      qb[ij] = qbt1;

      /* Multibranch loop */
      qm[ij] = vrna_exp_E_ml_fast(vc, i, j, aux_mx_ml);

      if (qm1)
        qm1[jindx[j] + i] = aux_mx_ml->qqm[i]; /* for stochastic backtracking and circfold */

      /* Exterior loop */
      q[ij] = temp = vrna_exp_E_ext_fast(vc, i, j, aux_mx_el);

      if (temp>Qmax) {
        Qmax = temp;
        if (Qmax>max_real/10.)
          vrna_message_warning("Q close to overflow: %d %d %g", i,j,temp);
      }
      if (temp>=max_real) {
        vrna_message_error("overflow in pf_fold while calculating q[%d,%d]\n"
                                  "use larger pf_scale", i,j);
      }
    }

    /* rotate auxiliary arrays */
    vrna_exp_E_ext_fast_rotate(vc, aux_mx_el);
    vrna_exp_E_ml_fast_rotate(vc, aux_mx_ml);

  }

  /* prefill linear qln, q1k arrays */
  if(q1k && qln){
    for (k=1; k<=n; k++) {
      q1k[k] = q[my_iindx[1] - k];
      qln[k] = q[my_iindx[k] - n];
    }
    q1k[0] = 1.0;
    qln[n+1] = 1.0;
  }

  /* free memory occupied by auxiliary arrays for fast exterior/multibranch loops */
  vrna_exp_E_ml_fast_free(vc, aux_mx_ml);
  vrna_exp_E_ext_fast_free(vc, aux_mx_el);
}
Пример #18
0
PRIVATE void
alipf_linear( vrna_fold_compound_t *vc){

  char                *hard_constraints;
  int                 i,j, ij, jij, d, turn, n, *my_iindx, *jindx, *pscore;
  FLT_OR_DBL          temp, Qmax, qbt1, *q, *qb, *qm, *qm1;
  double              kTn, max_real;
  vrna_exp_param_t    *pf_params;
  vrna_mx_pf_t        *matrices;
  vrna_mx_pf_aux_el_t *aux_mx_el;
  vrna_mx_pf_aux_ml_t *aux_mx_ml;
  vrna_md_t           *md;
  vrna_hc_t           *hc;

  n                 = vc->length;
  pf_params         = vc->exp_params;
  matrices          = vc->exp_matrices;
  hc                = vc->hc;
  my_iindx          = vc->iindx;
  jindx             = vc->jindx;
  pscore            = vc->pscore;     /* precomputed array of pair types */
  md                = &(pf_params->model_details);
  q                 = matrices->q;
  qb                = matrices->qb;
  qm                = matrices->qm;
  qm1               = matrices->qm1;
  hard_constraints  = hc->matrix;
  turn              = md->min_loop_size;
  kTn               = pf_params->kT/10.;   /* kT in cal/mol  */
  Qmax              = 0.;

  max_real          = (sizeof(FLT_OR_DBL) == sizeof(float)) ? FLT_MAX : DBL_MAX;

  /* init auxiliary arrays for fast exterior/multibranch loops */
  aux_mx_el = vrna_exp_E_ext_fast_init(vc);
  aux_mx_ml = vrna_exp_E_ml_fast_init(vc);

  /* array initialization ; qb,qm,q
     qb,qm,q (i,j) are stored as ((n+1-i)*(n-i) div 2 + n+1-j */

  for (d = 0; d <= turn; d++)
    for (i = 1; i <= n - d; i++) {
      j       = i + d;
      ij      = my_iindx[i]-j;
      qb[ij]  = 0.0;
    }

  for (j = turn + 2; j <= n; j++) {
    for (i = j - turn - 1; i >= 1; i--) {
      int psc;
      /* construction of partition function for segment i,j */
      /* calculate pf given that i and j pair: qb(i,j)      */
      ij  = my_iindx[i] - j;
      jij = jindx[j] + i;

      psc   = pscore[jij];
      qbt1  = 0.;

      if (hard_constraints[jij]) {
        /* process hairpin loop(s) */
        qbt1 += vrna_exp_E_hp_loop(vc, i, j);
        /* process interior loop(s) */
        qbt1 += vrna_exp_E_int_loop(vc, i, j);
        /* process multibranch loop(s) */
        qbt1 += vrna_exp_E_mb_loop_fast(vc, i, j, aux_mx_ml->qqm1);

        qbt1 *= exp(psc/kTn);
      }

      qb[ij] = qbt1;

      /* Multibranch loop */
      qm[ij] = vrna_exp_E_ml_fast(vc, i, j, aux_mx_ml);

      if (qm1)
        qm1[jindx[j] + i] = aux_mx_ml->qqm[i]; /* for stochastic backtracking and circfold */

      /* Exterior loop */
      q[ij] = temp = vrna_exp_E_ext_fast(vc, i, j, aux_mx_el);

      if (temp > Qmax) {
        Qmax = temp;
        if (Qmax > max_real/10.)
          vrna_message_warning("Q close to overflow: %d %d %g", i,j,temp);
      }
      if (temp >= max_real) {
        vrna_message_error("overflow in pf_fold while calculating q[%d,%d]\n"
                                  "use larger pf_scale", i,j);
      }
    }

    /* rotate auxiliary arrays */
    vrna_exp_E_ext_fast_rotate(vc, aux_mx_el);
    vrna_exp_E_ml_fast_rotate(vc, aux_mx_ml);
  }

  /* free memory occupied by auxiliary arrays for fast exterior/multibranch loops */
  vrna_exp_E_ml_fast_free(vc, aux_mx_ml);
  vrna_exp_E_ext_fast_free(vc, aux_mx_el);
}
Пример #19
0
PRIVATE void
make_pscores(vrna_fold_compound_t *vc){

  /* calculate co-variance bonus for each pair depending on  */
  /* compensatory/consistent mutations and incompatible seqs */
  /* should be 0 for conserved pairs, >0 for good pairs      */

#define NONE -10000 /* score for forbidden pairs */

  char *structure = NULL;
  int i,j,k,l,s, max_span, turn;
  float **dm;
  int olddm[7][7]={{0,0,0,0,0,0,0}, /* hamming distance between pairs */
                  {0,0,2,2,1,2,2} /* CG */,
                  {0,2,0,1,2,2,2} /* GC */,
                  {0,2,1,0,2,1,2} /* GU */,
                  {0,1,2,2,0,2,1} /* UG */,
                  {0,2,2,1,2,0,2} /* AU */,
                  {0,2,2,2,1,2,0} /* UA */};

  short           **S         = vc->S;
  char            **AS        = vc->sequences;
  int             n_seq       = vc->n_seq;
  vrna_md_t       *md         = (vc->params) ? &(vc->params->model_details) : &(vc->exp_params->model_details);
  int             *pscore     = vc->pscore;     /* precomputed array of pair types */             
  int             *indx       = vc->jindx;                                             
  int             *my_iindx   = vc->iindx;                                             
  int             n           = vc->length;                                            

  turn    = md->min_loop_size;

  if (md->ribo) {
    if (RibosumFile !=NULL) dm=readribosum(RibosumFile);
    else dm=get_ribosum((const char **)AS, n_seq, n);
  }
  else { /*use usual matrix*/
    dm = vrna_alloc(7*sizeof(float*));
    for (i=0; i<7;i++) {
      dm[i] = vrna_alloc(7*sizeof(float));
      for (j=0; j<7; j++)
        dm[i][j] = (float) olddm[i][j];
    }
  }

  max_span = md->max_bp_span;
  if((max_span < turn+2) || (max_span > n))
    max_span = n;
  for (i=1; i<n; i++) {
    for (j=i+1; (j<i+turn+1) && (j<=n); j++)
      pscore[indx[j]+i] = NONE;
    for (j=i+turn+1; j<=n; j++) {
      int pfreq[8]={0,0,0,0,0,0,0,0};
      double score;
      for (s=0; s<n_seq; s++) {
        int type;
        if (S[s][i]==0 && S[s][j]==0) type = 7; /* gap-gap  */
        else {
          if ((AS[s][i] == '~')||(AS[s][j] == '~')) type = 7;
          else type = md->pair[S[s][i]][S[s][j]];
        }
        pfreq[type]++;
      }
      if (pfreq[0]*2+pfreq[7]>n_seq) { pscore[indx[j]+i] = NONE; continue;}
      for (k=1,score=0; k<=6; k++) /* ignore pairtype 7 (gap-gap) */
        for (l=k; l<=6; l++)
          score += pfreq[k]*pfreq[l]*dm[k][l];
      /* counter examples score -1, gap-gap scores -0.25   */
      pscore[indx[j]+i] = md->cv_fact *
        ((UNIT*score)/n_seq - md->nc_fact*UNIT*(pfreq[0] + pfreq[7]*0.25));

      if((j - i + 1) > max_span){
        pscore[indx[j]+i] = NONE;
      }
    }
  }

  if (md->noLP) /* remove unwanted pairs */
    for (k=1; k<n-turn-1; k++)
      for (l=1; l<=2; l++) {
        int type,ntype=0,otype=0;
        i=k; j = i+turn+l;
        type = pscore[indx[j]+i];
        while ((i>=1)&&(j<=n)) {
          if ((i>1)&&(j<n)) ntype = pscore[indx[j+1]+i-1];
          if ((otype<md->cv_fact*MINPSCORE)&&(ntype<md->cv_fact*MINPSCORE))  /* too many counterexamples */
            pscore[indx[j]+i] = NONE; /* i.j can only form isolated pairs */
          otype =  type;
          type  = ntype;
          i--; j++;
        }
      }


  if (fold_constrained&&(structure!=NULL)) {
    int psij, hx, hx2, *stack, *stack2;
    stack = vrna_alloc(sizeof(int)*(n+1));
    stack2 = vrna_alloc(sizeof(int)*(n+1));

    for(hx=hx2=0, j=1; j<=n; j++) {
      switch (structure[j-1]) {
      case 'x': /* can't pair */
        for (l=1; l<j-turn; l++) pscore[indx[j]+l] = NONE;
        for (l=j+turn+1; l<=n; l++) pscore[indx[l]+j] = NONE;
        break;
      case '(':
        stack[hx++]=j;
        /* fallthrough */
      case '[':
        stack2[hx2++]=j;
        /* fallthrough */
      case '<': /* pairs upstream */
        for (l=1; l<j-turn; l++) pscore[indx[j]+l] = NONE;
        break;
      case ']':
        if (hx2<=0) {
          fprintf(stderr, "%s\n", structure);
          vrna_message_error("unbalanced brackets in constraints");
        }
        i = stack2[--hx2];
        pscore[indx[j]+i]=NONE;
        break;
      case ')':
        if (hx<=0) {
          fprintf(stderr, "%s\n", structure);
          vrna_message_error("unbalanced brackets in constraints");
        }
        i = stack[--hx];
        psij = pscore[indx[j]+i]; /* store for later */
        for (k=j; k<=n; k++)
          for (l=i; l<=j; l++)
            pscore[indx[k]+l] = NONE;
        for (l=i; l<=j; l++)
          for (k=1; k<=i; k++)
            pscore[indx[l]+k] = NONE;
        for (k=i+1; k<j; k++)
          pscore[indx[k]+i] = pscore[indx[j]+k] = NONE;
        pscore[indx[j]+i] = (psij>0) ? psij : 0;
        /* fallthrough */
      case '>': /* pairs downstream */
        for (l=j+turn+1; l<=n; l++) pscore[indx[l]+j] = NONE;
        break;
      }
    }
    if (hx!=0) {
      fprintf(stderr, "%s\n", structure);
      vrna_message_error("unbalanced brackets in constraint string");
    }
    free(stack); free(stack2);
  }
  /*free dm */
  for (i=0; i<7;i++) {
    free(dm[i]);
  }
  free(dm);

  /* copy over pscores for backward compatibility */
  if(vc->pscore_pf_compat){
    for(i = 1; i < n; i++)
      for(j = i; j <= n; j++){
        vc->pscore_pf_compat[my_iindx[i] - j] = (short)pscore[indx[j] + i];
      }
  }

}
Пример #20
0
PUBLIC vrna_fold_compound_t *
vrna_fold_compound_TwoD(const char *sequence,
                      const char *s1,
                      const char *s2,
                      vrna_md_t *md_p,
                      unsigned int options){

  int                 length, l, turn;
  vrna_fold_compound_t  *vc;
  vrna_md_t           md;


  if(sequence == NULL) return NULL;

  /* sanity check */
  length = strlen(sequence);
  if(length == 0)
    vrna_message_error("vrna_fold_compound_TwoD: sequence length must be greater 0");

  l = strlen(s1);
  if(l != length)
    vrna_message_error("vrna_fold_compound_TwoD: sequence and s1 differ in length");

  l = strlen(s2);
  if(l != length)
    vrna_message_error("vrna_fold_compound_TwoD: sequence and s2 differ in length");

  vc                = vrna_alloc(sizeof(vrna_fold_compound_t));
  vc->type          = VRNA_VC_TYPE_SINGLE;
  vc->length        = length;
  vc->sequence      = strdup(sequence);

  /* get a copy of the model details */
  if(md_p)
    md = *md_p;
  else /* this fallback relies on global parameters and thus is not threadsafe */
    vrna_md_set_default(&md);

  /* always make uniq ML decomposition ! */
  md.uniq_ML      = 1;
  md.compute_bpp  = 0;

  set_fold_compound(vc, &md, options, WITH_PTYPE | WITH_PTYPE_COMPAT);

  if(!(options & VRNA_OPTION_EVAL_ONLY)){
    vrna_hc_init(vc); /* add default hard constraints */

    /* add DP matrices */
    vrna_mx_add(vc, VRNA_MX_2DFOLD, options);
  }

  /* set all fields that are unique to Distance class partitioning... */
  turn  = vc->params->model_details.min_loop_size;
  vc->reference_pt1 = vrna_ptable(s1);
  vc->reference_pt2 = vrna_ptable(s2);
  vc->referenceBPs1 = vrna_refBPcnt_matrix(vc->reference_pt1, turn);
  vc->referenceBPs2 = vrna_refBPcnt_matrix(vc->reference_pt2, turn);
  vc->bpdist        = vrna_refBPdist_matrix(vc->reference_pt1, vc->reference_pt2, turn);
  /* compute maximum matching with reference structure 1 disallowed */
  vc->mm1           = maximumMatchingConstraint(vc->sequence, vc->reference_pt1);
  /* compute maximum matching with reference structure 2 disallowed */
  vc->mm2           = maximumMatchingConstraint(vc->sequence, vc->reference_pt2);

  vc->maxD1         = vc->mm1[vc->iindx[1]-length] + vc->referenceBPs1[vc->iindx[1]-length];
  vc->maxD2         = vc->mm2[vc->iindx[1]-length] + vc->referenceBPs2[vc->iindx[1]-length];

  return vc;
}
Пример #21
0
PUBLIC vrna_fold_compound_t *
vrna_fold_compound_comparative( const char **sequences,
                                vrna_md_t *md_p,
                                unsigned int options){

  int s, n_seq, length;
  vrna_fold_compound_t *vc;
  vrna_md_t           md;
  unsigned int        aux_options;

  aux_options = 0L;
 
  if(sequences == NULL) return NULL;

  for(s=0;sequences[s];s++); /* count the sequences */

  n_seq = s;

  length = strlen(sequences[0]);
  /* sanity check */
  if(length == 0)
    vrna_message_error("vrna_fold_compound_comparative: sequence length must be greater 0");
  for(s = 0; s < n_seq; s++)
    if(strlen(sequences[s]) != length)
      vrna_message_error("vrna_fold_compound_comparative: uneqal sequence lengths in alignment");

  vc            = vrna_alloc(sizeof(vrna_fold_compound_t));
  vc->type      = VRNA_VC_TYPE_ALIGNMENT;

  vc->n_seq     = n_seq;
  vc->length    = length;
  vc->sequences = vrna_alloc(sizeof(char *) * (vc->n_seq + 1));
  for(s = 0; sequences[s]; s++)
    vc->sequences[s] = strdup(sequences[s]);

  /* get a copy of the model details */
  if(md_p)
    md = *md_p;
  else /* this fallback relies on global parameters and thus is not threadsafe */
    vrna_md_set_default(&md);


  aux_options |= WITH_PTYPE;

  if(options & VRNA_OPTION_PF)
    aux_options |= WITH_PTYPE_COMPAT;

  set_fold_compound(vc, &md, options, aux_options);

  make_pscores(vc);

  if(!(options & VRNA_OPTION_EVAL_ONLY)){
    /* add default hard constraints */
    vrna_hc_init(vc);

    /* add DP matrices (if required) */
    vrna_mx_add(vc, VRNA_MX_DEFAULT, options);
  }

  return vc;
}
Пример #22
0
PUBLIC vrna_fold_compound_t *
vrna_fold_compound( const char *sequence,
                    vrna_md_t *md_p,
                    unsigned int options){

  unsigned int        i, length, aux_options;
  vrna_fold_compound_t  *vc;
  vrna_md_t           md;

  if(sequence == NULL) return NULL;

  /* sanity check */
  length = strlen(sequence);
  if(length == 0)
    vrna_message_error("vrna_fold_compound: sequence length must be greater 0");

  vc            = vrna_alloc(sizeof(vrna_fold_compound_t));
  vc->type      = VRNA_VC_TYPE_SINGLE;
  vc->length    = length;
  vc->sequence  = strdup(sequence);
  aux_options   = 0L;


  /* get a copy of the model details */
  if(md_p)
    md = *md_p;
  else
    vrna_md_set_default(&md);

  if(options & VRNA_OPTION_WINDOW){ /* sliding window structure prediction */
    if(md.window_size <= 0)
      md.window_size = (int)vc->length;
    else if(md.window_size > (int)vc->length)
      md.window_size = (int)vc->length;

    vc->window_size = md.window_size;

    if((md.max_bp_span <= 0) || (md.max_bp_span > md.window_size))
      md.max_bp_span = md.window_size;

    set_fold_compound(vc, &md, options, aux_options);

    vc->ptype_local = vrna_alloc(sizeof(char *)*(vc->length+1));
    for (i = vc->length; ( i > vc->length - vc->window_size - 5) && (i >= 0); i--){
      vc->ptype_local[i] = vrna_alloc(sizeof(char)*(vc->window_size+5));
    }

    if(!(options & VRNA_OPTION_EVAL_ONLY)){
      /* add default hard constraints */
      /* vrna_hc_init(vc); */ /* no hard constraints in Lfold, yet! */

      /* add DP matrices */
      vrna_mx_add(vc, VRNA_MX_WINDOW, options);
    }
  } else { /* regular global structure prediction */

    /* set window size to entire sequence */
    md.window_size = (int)vc->length;

    aux_options |= WITH_PTYPE;

    if(options & VRNA_OPTION_PF)
      aux_options |= WITH_PTYPE_COMPAT;

    set_fold_compound(vc, &md, options, aux_options);

    if(!(options & VRNA_OPTION_EVAL_ONLY)){
      /* add default hard constraints */
      vrna_hc_init(vc);

      /* add DP matrices (if required) */
      vrna_mx_add(vc, VRNA_MX_DEFAULT, options);
    }
  }

  return vc;
}
Пример #23
0
int main(int argc, char *argv[])
{
   int      n,i,j,l;
   int      intty;
   int      outtty;
   char    *mask, junk[20];
   char   **s;
   char   **ss[4];
   float   *B;
   float  **dm;
   Split   *S;
   Union   *U;
   char     DistAlgorithm='H';
   int      nn[4];
   short    Do_Split=0, Do_Wards=0, Do_Stg=1, Do_4_Stg=0, Do_Nj=0, Do_Mat=0;
   float    per_digit, per_gap;
   
   mask   = vrna_alloc(sizeof(char)*54);
   strcpy (mask,"%ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz");
   for (i=1; i<argc; i++) {
      if (argv[i][0]=='-') {
	 switch ( argv[i][1] ) {
	  case 'X':  
	     if (argv[i][2]=='\0') { Do_Stg = 1 ; break; }
             Do_Split = 0;
             Do_Wards = 0;
             Do_Stg   = 0;
             Do_Nj    = 0;
	     for(j=2;j<strlen(argv[i]);j++) {
                switch(argv[i][j]) {
                 case 's' :  Do_Split = 1; 
                    break;
                 case 'w' :  Do_Wards = 1;
                    break;
                 case 'b' :  Do_Stg   = 1;
                    break;
                 case 'n' :  Do_Nj    = 1;
                    break;
		 case 'm' :  Do_Mat   = 1;
		   break;
                 default :
                    usage();
                }
             }
             break;
          case 'Q': 
             Do_4_Stg = 1;
             break;
          case 'M':
             if(mask) { free(mask); mask = NULL; }
             switch (argv[i][2] ) {
              case '\0' :
                usage();
                break;
              case 'a' : 
                mask   = vrna_alloc(sizeof(char)*54);
                strcpy(mask,
		"%ABCDEFGHIJKLMNOPQRSTUVWabcdefghijklmnopqrstuvwxyz");
                if(argv[i][3]=='+') mask[0] = '~';  /* make case sensitive */
                break;
              case 'u' :
                mask   = vrna_alloc(sizeof(char)*28);
                strcpy(mask,"~ABCDEFGHIJKLMNOPQRSTUVW");
                break;
              case 'l' :
                mask   = vrna_alloc(sizeof(char)*28);
                strcpy(mask,"~abcdefghijklmnopqrstuvwxyz");
                break;
              case 'c' :
                mask   = vrna_alloc(sizeof(char)*12);
                strcpy(mask,"~1234567890");
                break;
              case 'n' :
                mask   = vrna_alloc(sizeof(char)*64);
                strcpy (mask,
                "%ABCDEFGHIJKLMNOPQRSTUVWabcdefghijklmnopqrstuvwxyz1234567890");
                if(argv[i][3]=='+') mask[0] = '~';  /* make case sensitive */
                break;
              case 'R' :     /* RNA */
                mask   = vrna_alloc(sizeof(char)*10);
                strcpy(mask,"%GCAUgcau");
                if(argv[i][3]=='+') mask[0] = '~';  /* make case sensitive */
                break;
              case 'D' :     /* DNA */
                mask   = vrna_alloc(sizeof(char)*10);
                strcpy (mask,"%GCATgcat");
                if(argv[i][3]=='+') mask[0] = '~';  /* make case sensitive */
                break;
              case 'A' :    /*  AMINOACIDS */
                mask   = vrna_alloc(sizeof(char)*42);
                strcpy(mask,"%ACDEFGHIKLMNPQRSTVWYacdefghiklmnpqrstvwy");
                if(argv[i][3]=='+') mask[0] = '~';  /* make case sensitive */
                break;
              case 'S' :    /* SECONDARY STRUCTURES */
                mask   = vrna_alloc(sizeof(char)*6);
                strcpy(mask,"~().^");
                break;
              case '%' :    /* ARBITRARY ALPHABETS */
                l = strlen(argv[i]);
                if(argv[i][l] == '+'){
                   mask =   vrna_alloc(sizeof(char)*(l-2));
                   mask[0] = '~';
                   for(j=1;j<=l-4;j++) mask[j] = argv[i][j+2];
		   mask[l-3]='\0';
                }                
                if(argv[i][l] == '!'){
                   mask =   vrna_alloc(sizeof(char)*(l-2));
                   mask[0] = '!';
                   for(j=1;j<=l-4;j++) mask[j] = argv[i][j+2];
		   mask[l-3]='\0';
                }
                else { 
                   mask =   vrna_alloc(sizeof(char)*(l-1));
                   mask[0] = '%';
                   for(j=1;j<=l-3;j++) mask[j] = argv[i][j+2];
		   mask[l-2]='\0';
                }
                break;
              default : 
                usage();
            }   
            break;
          case 'D' :               /* choose algorithm */
            switch(argv[i][2]) {
              case '\0' : 
                usage();
                break;
              case 'H' :
                DistAlgorithm = 'H';
                break;
              case 'A' :
                DistAlgorithm = 'A';
                if(argv[i][3]==',') {
                   per_digit=-1.;
                   sscanf(argv[i],"%[^,],%g",junk,&per_digit);
                   if(per_digit<0.) usage();
                   per_gap = per_digit;
                   Set_StrEdit_GapCosts(per_digit,per_gap);
                }
                break;
              case 'G' :
                DistAlgorithm = 'G';
                if(argv[i][3]==',') {
                   per_digit=-1.;
                   per_gap  =-1.;
                   sscanf(argv[i]+4,"%f,%f",&per_digit,&per_gap);
                   if((per_digit<0.)||(per_gap<0)) usage(); 
                   Set_StrEdit_GapCosts(per_digit,per_gap);
                }
                break;
              default:
                usage();
             }
             break;
           case 'd' :               /* choose distance matrix */  
             switch(argv[i][2]) {
               case 'D' :    /* Dayhoff Distances */
	       case 'A' :    /* Aminoacid Distance (Hofacker & Borstnik) */
               case 'B' :    /* RY distances for nucleotides */
               case 'H' :    /* Hogeweg's Distance for Secondary Structures */
               case 'S' :    /* Simple Distance (superfluous option) */
                 Set_StrEdit_CostMatrix(argv[i][2]);
                 break;
               default :
                 usage();
             }
             break;
          default : 
             usage();
         }
      }
   }

   /* END PARSING OF COMMAND LINE */

   intty = isatty(fileno(stdin));
   outtty= isatty(fileno(stdout));
   
   if(intty){
      if(outtty) {
         printf("Input sequences; @ to mark end of input\n");
         printf("%s%s\n", scale1, scale2);
      }
      else {
         fprintf(stderr,"Input sequences; @ to mark end of input\n");
         fprintf(stderr,"%s%s\n", scale1, scale2);
      }
   }
   
   while ((s=read_sequence_list(&n,mask))!=NULL) {
      dm = NULL;
      if(Do_4_Stg) {
         ss[0] = s;
         nn[0] = n;
         for(i=1;i<4;i++) {
            ss[i] = read_sequence_list(&n,mask);
            if(ss[i]==NULL) vrna_message_error("read_sequences: wrong or insufficient data.");
            nn[i] = n;
         }
         printf_taxa_list();
         B = statgeom4(ss,nn);
         printf_stg(B);
         SimplifiedBox(B,"box.ps");    /* This is preliminary !!! */ 
         free(B);
         
         for(i=0;i<4;i++){
	    for(j=0;j<nn[i];j++) free(ss[i][j]);
	    free(ss[i]);
         }
	 /* free(ss); */ /* attempt to free a non-heap object */
      }
      else {
         printf_taxa_list();
         if(Do_Stg) {
            B = statgeom(s,n);
	    if (B) {
	       printf_stg(B);
	       SimplifiedBox(B,"box.ps");
	       free(B);
	    }
         }
         if((Do_Split)||(Do_Wards)||(Do_Nj)||(Do_Mat)) {
            switch(DistAlgorithm) {
             case 'H' :
		dm = Hamming_Distance_Matrix(s,n);
		printf("> %s\n","H (Hamming Distance)");
		break;
	      case 'A' :
		dm = StrEdit_SimpleDistMatrix(s,n);
		printf("> %s\n","A (Needleman-Wunsch Distance)");
		break;
	      case 'G' :
		 dm = StrEdit_GotohDistMatrix(s,n);
		printf("> %s\n","G (Gotoh Distance)");
		break;
	      default:
		vrna_message_error("This can't happen.");
	     }
         }
         if(Do_Split) {
            S = split_decomposition(dm);
            sort_Split(S);
            print_Split(S);
            free_Split(S);
         }
         if(Do_Wards) {
            U = wards_cluster(dm);
            printf_phylogeny(U,"W");
            PSplot_phylogeny(U,"wards.ps","Ward's Method");
            free(U);
         }
         if(Do_Nj) {
            U = neighbour_joining(dm);
            printf_phylogeny(U,"Nj");
            PSplot_phylogeny(U,"nj.ps","Neighbor Joining");
            free(U);
         }
	 if(Do_Mat) printf_distance_matrix(dm);
      }

      if (dm!=NULL) free_distance_matrix(dm);
      for(i=0;i<n;i++) free(s[i]);
      free(s);
   }
   return 0;
}
Пример #24
0
PUBLIC void
nrerror(const char message[]){

  vrna_message_error(message);
}
Пример #25
0
int main(int argc, char *argv[]){
  struct        RNAplot_args_info args_info;
  char          *structure, *pre, *post;
  char          fname[FILENAME_MAX_LENGTH], ffname[FILENAME_MAX_LENGTH];
  char          *rec_sequence, *rec_id, **rec_rest;
  int           i, length;
  int           istty;
  char          format[5]="ps";
  unsigned int  rec_type, read_opt;
  vrna_md_t     md;

  structure = pre = post = NULL;
  length = 0;
  vrna_md_set_default(&md);

  /*
  #############################################
  # check the command line parameters
  #############################################
  */
  if(RNAplot_cmdline_parser (argc, argv, &args_info) != 0) exit(1);

  if(args_info.layout_type_given) rna_plot_type = args_info.layout_type_arg;
  if(args_info.pre_given)         pre           = strdup(args_info.pre_arg);
  if(args_info.post_given)        post          = strdup(args_info.post_arg);
  if(args_info.output_format_given){
    strncpy(format, args_info.output_format_arg, 4); format[4] = '\0';
  }

  /* free allocated memory of command line data structure */
  RNAplot_cmdline_parser_free (&args_info);

  /*
  #############################################
  # begin initializing
  #############################################
  */
  rec_type      = read_opt = 0;
  rec_id        = rec_sequence = NULL;
  rec_rest      = NULL;
  istty         = isatty(fileno(stdout)) && isatty(fileno(stdin));

  /* set options we wanna pass to vrna_file_fasta_read_record() */
  if(istty){
    read_opt |= VRNA_INPUT_NOSKIP_BLANK_LINES;
    vrna_message_input_seq("Input sequence (upper or lower case) followed by structure");
  }

  /*
  #############################################
  # main loop: continue until end of file
  #############################################
  */
  while(
    !((rec_type = vrna_file_fasta_read_record(&rec_id, &rec_sequence, &rec_rest, NULL, read_opt))
        & (VRNA_INPUT_ERROR | VRNA_INPUT_QUIT))){

    if(rec_id){
      (void) sscanf(rec_id, ">%" XSTR(FILENAME_ID_LENGTH) "s", fname);
    }
    else fname[0] = '\0';

    length = (int)strlen(rec_sequence);

    structure = vrna_extract_record_rest_structure((const char **)rec_rest, 0, (rec_id) ? VRNA_OPTION_MULTILINE : 0);

    if(!structure) vrna_message_error("structure missing");
    if((int)strlen(structure) != length) vrna_message_error("structure and sequence differ in length!");

    if (fname[0]!='\0'){
      strcpy(ffname, fname);
      strcat(ffname, "_ss");
    } else
      strcpy(ffname, "rna");

    structure = NULL;
    unsigned int struct_options = (rec_id) ? VRNA_OPTION_MULTILINE : 0;
    structure = vrna_extract_record_rest_structure((const char **)rec_rest, 0, struct_options);

    if(strlen(rec_sequence) != strlen(structure))
      vrna_message_error("sequence and structure have unequal length");

    switch (format[0]) {
      case 'p':
        strcat(ffname, ".ps");

        (void) vrna_file_PS_rnaplot_a(rec_sequence, structure, ffname, pre, post, &md);

        break;
      case 'g':
        strcat(ffname, ".gml");
        gmlRNA(rec_sequence, structure, ffname, 'x');
        break;
      case 'x':
        strcat(ffname, ".ss");
        xrna_plot(rec_sequence, structure, ffname);
        break;
      case 's':
        strcat(ffname, ".svg");
        svg_rna_plot(rec_sequence, structure, ffname);
        break;
      default:
        RNAplot_cmdline_parser_print_help(); exit(EXIT_FAILURE);
    }

    fflush(stdout);

    /* clean up */
    if(rec_id) free(rec_id);
    free(rec_sequence);
    free(structure);
    /* free the rest of current dataset */
    if(rec_rest){
      for(i=0;rec_rest[i];i++) free(rec_rest[i]);
      free(rec_rest);
    }
    rec_id = rec_sequence = structure = NULL;
    rec_rest = NULL;

    /* print user help for the next round if we get input from tty */
    if(istty){
      vrna_message_input_seq("Input sequence (upper or lower case) followed by structure");
    }
  }

  return EXIT_SUCCESS;
}