Ejemplo n.º 1
0
RNAProfileAlignment::RNAProfileAlignment(const string &baseStr, const string &name, const string &constraint, double t)
  : PPForestAli<RNA_Alphabet_Profile,RNA_Alphabet_Profile>(2*baseStr.length()),
    m_name(name),
    m_numStructures(1)
{
  char *viennaStr=NULL;
  
  // calculate partition function for the sequence
  do_backtrack=1;
  init_pf_fold(baseStr.length());

  //if(constraint.length()>0)
  //pf_fold((char*)baseStr.c_str(),(char*)constraint.c_str());    // expicit conversion to non-const value, but pf_fold does not alter baseStr
  //else
  pf_fold((char*)baseStr.c_str(),NULL);    // expicit conversion to non-const value, but pf_fold does not alter baseStr

  viennaStr=new char[baseStr.length()+1];
  dangles=2;
  fold((char*)baseStr.c_str(),viennaStr);

  setSize(RNAFuncs::treeSize(viennaStr));
  buildForest(baseStr,viennaStr,true);
  
  free_pf_arrays();
  delete[] viennaStr;
  
  //  hasSequence=true;
  addStrName(name);
}
Ejemplo n.º 2
0
char* seq_pf_fold(const char* sequence, float* gfe)
{
    char* structure = (char*)space(sizeof(char) * (strlen(sequence) + 1));
    *gfe = pf_fold(sequence, structure);
    free_pf_arrays();
    return structure;
}
Ejemplo n.º 3
0
/* Objective function */
double calculate_f(const gsl_vector *v, void *params) {

    double D;
    int i,j,length;
    minimizer_pars_struct *pars = (minimizer_pars_struct *)params;
    double q_tmp;
    double sigma_tmp;

    //fprintf(stderr, "=> Evaluating objective Function...\n");

    length = pars->length;

    for (i=0; i <= length; i++) {
        epsilon[i] = gsl_vector_get(v, i);
        p_unpaired[i] = 0.0;
        for (j=0; j <= length; j++) {
            p_pp[i][j] = 0.0;
        }
    }

    init_pf_fold(length);
    last_lnQ = pf_fold_pb(pars->seq, NULL);

    if (isnan(last_lnQ)) {
        return(NAN);
    }

    for (i = 1; i < length; i++) {
        for (j = i+1; j<= length; j++) {
            p_pp[i][j]=p_pp[j][i]=pr[iindx[i]-j];
        }
    }

    get_pair_prob_vector(p_pp, p_unpaired, length, 1);

    free_pf_arrays();

    D = 0.0;

    for (i = 1; i <= length; i++) {
        D += 1 / pars->tau * epsilon[i] * epsilon[i];

        /* Ignore missing data. These have values of -1.0. To be on the
           safe side numerically test for < -0.5 */
        if (q_unpaired[i] < -0.5) {
            sigma_tmp = 10000; // Set very high sigma to ignore these positions
            q_tmp = 0.5; // Set to arbitrary value, does not matter
        } else {
            sigma_tmp = pars->sigma;
            q_tmp = q_unpaired[i];
        }

        D += 1 / sigma_tmp *
             ( p_unpaired[i] - q_tmp ) *
             ( p_unpaired[i] - q_tmp );
    }

    return D;
}
Ejemplo n.º 4
0
PRIVATE void heat_capacity(char *string, float T_min, float T_max,
                          float h, int m)
{
   int length, i;
   char *structure;
   float hc, kT, min_en;

   length = (int) strlen(string);

   do_backtrack = 0;

   temperature = T_min -m*h;
   /* initialize_fold(length); <- obsolete */
   structure = (char *) vrna_alloc((unsigned) length+1);
   min_en = fold(string, structure);
   free(structure); free_arrays();
   kT = (temperature+K0)*GASCONST/1000;    /* in kcal */
   pf_scale = exp(-(1.07*min_en)/kT/length );
   /* init_pf_fold(length); <- obsolete */
   vrna_exp_param_t  *pf_parameters = NULL;
   vrna_md_t         md;
   set_model_details(&md);
   pf_parameters = get_boltzmann_factors(temperature, 1.0, md, pf_scale);

   update_pf_params_par(length, pf_parameters);
   for (i=0; i<2*m+1; i++) {

      F[i] = pf_fold_par(string, NULL, pf_parameters, 0, 0, 0);   /* T_min -2h */
      md.temperature = temperature += h;
      kT = (temperature+K0)*GASCONST/1000;
      pf_scale=exp(-(F[i]/length +h*0.00727)/kT); /* try to extrapolate F */
      free(pf_parameters);
      pf_parameters = get_boltzmann_factors(temperature, 1.0, md, pf_scale);
      update_pf_params_par(length, pf_parameters);
}
   while (temperature <= (T_max+m*h+h)) {

      hc = - ddiff(F,h,m)* (temperature +K0 - m*h -h);
      printf("%g   %g\n", (temperature-m*h-h), hc);

      for (i=0; i<2*m; i++)
         F[i] = F[i+1];

      F[2*m] = pf_fold_par(string, NULL, pf_parameters, 0, 0, 0);
/*       printf("%g\n", F[2*m]);*/
      temperature += h;
      kT = (temperature+K0)*GASCONST/1000;
      pf_scale=exp(-(F[i]/length +h*0.00727)/kT);
      free(pf_parameters);
      md.temperature = temperature;
      pf_parameters = get_boltzmann_factors(temperature, 1.0, md, pf_scale);
      update_pf_params_par(length, pf_parameters);
   }
   free_pf_arrays();
}
Ejemplo n.º 5
0
PRIVATE void heat_capacity(char *string, float T_min, float T_max,
                          float h, int m)
{
   int length, i;
   char *structure;
   float hc, kT, min_en;
   
   length = (int) strlen(string);
   
   do_backtrack = 0;   

   temperature = T_min -m*h;
   initialize_fold(length);
   structure = (char *) space((unsigned) length+1);
   min_en = fold(string, structure);
   free(structure); free_arrays();
   kT = (temperature+K0)*GASCONST/1000;    /* in kcal */
   pf_scale = exp(-(1.07*min_en)/kT/length );
   init_pf_fold(length);
   
   for (i=0; i<2*m+1; i++) {
      F[i] = pf_fold(string, NULL);   /* T_min -2h */
      temperature += h;
      kT = (temperature+K0)*GASCONST/1000;
      pf_scale=exp(-(F[i]/length +h*0.00727)/kT); /* try to extrapolate F */
      update_pf_params(length); 
   }
   while (temperature <= (T_max+m*h+h)) {
      
      hc = - ddiff(F,h,m)* (temperature +K0 - m*h -h); 
      printf("%g   %g\n", (temperature-m*h-h), hc);  
      
      for (i=0; i<2*m; i++)
         F[i] = F[i+1];
      F[2*m] = pf_fold(string, NULL); 
      temperature += h;
      kT = (temperature+K0)*GASCONST/1000;
      pf_scale=exp(-(F[i]/length +h*0.00727)/kT);
      update_pf_params(length); 
   }
   free_pf_arrays();
}
Ejemplo n.º 6
0
float
PFWrapper::
fold(std::string& structure)
{
#if !defined(HAVE_MPI) && defined(HAVE_BOOST_THREAD)
  static boost::mutex mtx;
  boost::mutex::scoped_lock lock(mtx);
#endif
  ::noGU = noGU_ ? 1 : 0;
  ::no_closingGU = noCloseGU_ ? 1 : 0;
  ::noLonelyPairs = noLP_ ? 1 : 0;
  float ret=0.0;
  init_pf_fold(sz_-1);
  char *s = new char[sz_];
  ret=::pf_fold(const_cast<char*>(seq_.c_str()), s);
  structure=s;
  delete[] s;
  std::copy(pr, pr+sz_*(sz_+1)/2, pr_.begin());
  std::copy(iindx, iindx+sz_, iindx_.begin());
  free_pf_arrays();
  return ret;
}
Ejemplo n.º 7
0
int main(int argc, char *argv[])
{
   char  *line;
   char *sequence;
   char *structure = NULL;
   char  fname[21];
   char  *ParamFile = NULL;
   char  *ns_bases = NULL, *c;
   int   i, length, l, sym, r;
   int   istty;
   double deltaf, deltap=0;
   int delta=100;
   int n_back = 0;
   int noconv = 0;
   int circ=0;
   int dos=0;
   int zuker=0;
   do_backtrack = 1;
   dangles = 2;
   for (i=1; i<argc; i++) {
      if (argv[i][0]=='-')
	switch ( argv[i][1] )
	  {
	  case 'T':  if (argv[i][2]!='\0') usage();
	    if(i==argc-1) usage();
	    r=sscanf(argv[++i], "%lf", &temperature);
	    if (r!=1) usage();
	    break;
	  case 'p':
	    if (argv[i][2]!='\0') usage();
	    if(i==argc-1) usage();
	    (void) sscanf(argv[++i], "%d", &n_back);
	    init_rand();
	    break;
	  case 'n':
	    if ( strcmp(argv[i], "-noGU" )==0) noGU=1;
	    if ( strcmp(argv[i], "-noCloseGU" ) ==0) no_closingGU=1;
	    if ( strcmp(argv[i], "-noLP")==0) noLonelyPairs=1;
	    if ( strcmp(argv[i], "-nsp") ==0) {
	      if (i==argc-1) usage();
	      ns_bases = argv[++i];
	    }
	    if ( strcmp(argv[i], "-noconv")==0) noconv=1;
	    break;
	  case '4':
	    tetra_loop=0;
	    break;
	  case 'C':
	    fold_constrained=1;
	    break;
	  case 'D':
	    dos=1;
	    print_energy = -999999;
	    break;
	  case 'd': dangles=0;
	    if (argv[i][2]!='\0') {
	      r=sscanf(argv[i]+2, "%d", &dangles);
	      if (r!=1) usage();
	    }
	    break;
	  case 'P':
	    if (i==argc-1) usage();
	    ParamFile = argv[++i];
	    break;
	  case 's':
	    subopt_sorted=1;
	    break;
	  case 'l':
	    if (strcmp(argv[i],"-logML")==0) {
	      logML=1;
	      break;
	    }
	    else usage();
	    break;
	  case 'e':
	    if (i>=argc-1) usage();
	    if (strcmp(argv[i],"-ep")==0)
	      r=sscanf(argv[++i], "%lf", &deltap);
	    else {
	      r=sscanf(argv[++i], "%lf", &deltaf);
	      delta = (int) (0.1+deltaf*100);
	    }
	    if (r!=1) usage();
	    break;
	  case 'c':
	    if ( strcmp(argv[i], "-circ")==0) circ=1;
	    break;
	  case 'z':
	    zuker=1;
	    break;
	  default: usage();
	  }
   }

   if ((zuker)&&(circ)) {
     printf("Sorry, zuker subopts not yet implemented for circfold\n");
     usage();
   }
   if ((zuker)&&(n_back>0)) {
     printf("Cna't do zuker subopts and stochastic subopts at the same time\n");
     usage();
   }
   if (ParamFile != NULL)
     read_parameter_file(ParamFile);

   if (ns_bases != NULL) {
      nonstandards = space(33);
      c=ns_bases;
      i=sym=0;
      if (*c=='-') {
	 sym=1; c++;
      }
      while (*c) {
	if (*c!=',') {
	  nonstandards[i++]=*c++;
	  nonstandards[i++]=*c;
	  if ((sym)&&(*c!=*(c-1))) {
	    nonstandards[i++]=*c;
	    nonstandards[i++]=*(c-1);
	  }
	}
	c++;
      }
   }
   istty = isatty(fileno(stdout))&&isatty(fileno(stdin));
   if ((fold_constrained)&&(istty)) {
     printf("Input constraints using the following notation:\n");
     /* printf("| : paired with another base\n"); */
     printf(". : no constraint at all\n");
     printf("x : base must not pair\n");
   }

   do {				/* main loop: continue until end of file */
     cut_point = -1;
     if (istty) {
       printf("\nInput string (upper or lower case); @ to quit\n");
       if (!zuker)printf("Use '&' to connect 2 sequences that shall form a complex.\n");
       printf("%s\n", scale);
     }
     fname[0]='\0';
     if ((line = get_line(stdin))==NULL) break;

     /* skip comment lines and get filenames */
     while ((*line=='*')||(*line=='\0')||(*line=='>')) {
       if (*line=='>')
	 (void) sscanf(line, ">%20s", fname);
       free(line);
       if ((line = get_line(stdin))==NULL) break;;
     }

     if ((line==NULL)||strcmp(line,"@")==0) break;

     sequence = tokenize(line); /* frees line */
     length = (int) strlen(sequence);
     structure = (char *) space((unsigned) length+1);

     if (fold_constrained) {
       char *cstruc;
       cstruc = tokenize(get_line(stdin));
       if (cstruc!=NULL) {
	 strncpy(structure, cstruc, length);
	 for (i=0; i<length; i++)
	   if (structure[i]=='|')
	     nrerror("constraints of type '|' not allowed");
	 free(cstruc);
       }
     }

     for (l = 0; l < length; l++) {
       sequence[l] = toupper(sequence[l]);
       if (!noconv && sequence[l] == 'T') sequence[l] = 'U';
     }
     if (istty) {
       if (cut_point == -1)
	 printf("length = %d\n", length);
       else
	 printf("length1 = %d\nlength2 = %d\n",
		cut_point-1, length-cut_point+1);
     }


     if ((logML!=0 || dangles==1 || dangles==3) && dos==0)
       if (deltap<=0) deltap=delta/100. +0.001;
     if (deltap>0)
       print_energy = deltap;

     /* first lines of output (suitable  for sort +1n) */
     if (fname[0] != '\0')
       printf("> %s [%d]\n", fname, delta);

     if (n_back>0) {  /* stochastic backtrack */
       double mfe, kT;
       char *ss;
       st_back=1;
       ss = (char *) space(strlen(sequence)+1);
       strncpy(ss, structure, length);
       mfe = fold(sequence, ss);
       kT = (temperature+273.15)*1.98717/1000.; /* in Kcal */
       pf_scale = exp(-(1.03*mfe)/kT/length);
       strncpy(ss, structure, length);
       /* ignore return value, we are not interested in the free energy */
       (circ) ? (void) pf_circ_fold(sequence, ss) : (void) pf_fold(sequence, ss);
       free(ss);
       for (i=0; i<n_back; i++) {
	 char *s;
	 s =(circ) ? pbacktrack_circ(sequence) : pbacktrack(sequence);
	 printf("%s\n", s);
	 free(s);
       }
       free_pf_arrays();
     } else if (!zuker) { /* normal subopt */
       (circ) ? subopt_circ(sequence, structure, delta, stdout) : subopt(sequence, structure, delta, stdout);
       if (dos) {
	 int i;
	 for (i=0; i<= MAXDOS && i<=delta/10; i++) {
	   printf("%4d %6d\n", i, density_of_states[i]);
	 }
       }
     } else { /* Zuker suboptimals */
       SOLUTION *zr;
       int i;
       if (cut_point!=-1) {
	 printf("Sorry, zuker subopts not yet implemented for cofold\n");
	 usage();
       }
       zr = zukersubopt(sequence);
       putoutzuker(zr);
       (void)fflush(stdout);
       for (i=0; zr[i].structure; i++) {
	 free(zr[i].structure);
       }
       free(zr);
     }
     (void)fflush(stdout);
     free(sequence);
     free(structure);
   } while (1);
   return 0;
}
Ejemplo n.º 8
0
int main(int argc, char *argv[])
{
    char *string/*, *line*/;
    char *structure=NULL, *cstruc=NULL;
    /*char  fname[13], ffname[20], gfname[20];*/
    /*char  *ParamFile=NULL;*/
    char  *ns_bases=NULL, *c;
    int   i, length, l, sym/*, r*/;
    double energy, min_en;
    double kT, sfact=1.07;
    int   pf=0, noPS=0, istty;
    int noconv=0;
    int circ=0;

    AjPSeq  seq     = NULL;
    AjPFile confile = NULL;
    AjPFile paramfile = NULL;
    AjPFile outf = NULL;
    AjPFile essfile = NULL;
    AjPFile dotfilea = NULL;
    AjPFile dotfileb = NULL;
    

    AjPStr seqstring = NULL;
    AjPStr constring = NULL;
    AjPStr seqname   = NULL;
  
    float eT = 0.;
    AjBool eGU;
    AjBool ecirc = ajFalse;
  
    AjBool eclose;
    AjBool lonely;
    AjBool convert;
    AjPStr ensbases = NULL;
    AjBool etloop;
    AjPStr eenergy = NULL;
    char ewt = '\0';
    float escale = 0.;
    AjPStr edangles = NULL;
    char edangle = '\0';

    ajint len;



    embInitPV("vrnafold",argc,argv,"VIENNA",VERSION);
    
    
    seqstring = ajStrNew();
    constring = ajStrNew();
    seqname   = ajStrNew();
    
    
    seq       = ajAcdGetSeq("sequence");
    confile   = ajAcdGetInfile("constraintfile");
    paramfile = ajAcdGetInfile("paramfile");
    eT        = ajAcdGetFloat("temperature");
    ecirc     = ajAcdGetBoolean("circular");
    eGU       = ajAcdGetBoolean("gu");
    eclose    = ajAcdGetBoolean("closegu");
    lonely    = ajAcdGetBoolean("lp");
    convert   = ajAcdGetBoolean("convert");
    ensbases  = ajAcdGetString("nsbases");
    etloop    = ajAcdGetBoolean("tetraloop");
    eenergy   = ajAcdGetListSingle("energy");
    escale    = ajAcdGetFloat("scale");
    edangles  = ajAcdGetListSingle("dangles");
    outf      = ajAcdGetOutfile("outfile");
    essfile   = ajAcdGetOutfile("ssoutfile");
    /*
      dotfilea  = ajAcdGetOutfile("adotoutfile");
      dotfileb  = ajAcdGetOutfile("bdotoutfile");
    */
    
    do_backtrack = 2; 
    pf = 0;
    string = NULL;
    istty = 0;

    temperature   = (double) eT;
    circ          = !!ecirc;
    noGU          = (eGU) ? 0 : 1;
    no_closingGU  = (eclose) ? 0 : 1;
    noLonelyPairs = (lonely) ? 0 : 1;
    noconv        = (convert) ? 0 : 1;
    ns_bases      = (ajStrGetLen(ensbases)) ? MAJSTRGETPTR(ensbases) : NULL;
    tetra_loop    = !!etloop;
    
    ewt = *ajStrGetPtr(eenergy);
    if(ewt == '0')
	energy_set = 0;
    else if(ewt == '1')
	energy_set = 1;
    else if(ewt == '2')
	energy_set = 2;
    
    sfact = (double) escale;
    
    edangle = *ajStrGetPtr(edangles);
    if(edangle == '0')
	dangles = 0;
    else if(edangle == '1')
	dangles = 1;
    else if(edangle == '2')
	dangles = 2;
    else if(edangle == '3')
	dangles = 3;


    if(circ && noLonelyPairs)
    {

        ajWarn("Depending on the origin of the circular sequence\n"
               "some structures may be missed when using -noLP\nTry "
               "rotating your sequence a few times\n");        
    }


    if(paramfile)
	read_parameter_file(paramfile);
   
    if (ns_bases != NULL)
    {
	nonstandards = space(33);
	c=ns_bases;
	i=sym=0;

	if (*c=='-')
	{
	    sym=1; c++;
	}

	while (*c!='\0')
	{
	    if (*c!=',')
	    {
		nonstandards[i++]=*c++;
		nonstandards[i++]=*c;
		if ((sym)&&(*c!=*(c-1)))
		{
		    nonstandards[i++]=*c;
		    nonstandards[i++]=*(c-1);
		}
	    }
	    c++;
	}
    }


    if(confile)
	vienna_GetConstraints(confile,&constring);
    
    string = NULL;
    structure = NULL;

    length = ajSeqGetLen(seq);
    string = (char *) space(length+1);
    strcpy(string,ajSeqGetSeqC(seq));

    len = ajStrGetLen(constring);
    structure = (char *) space(length+1);
    if(len)
    {
	fold_constrained = 1;
	strcpy(structure,ajStrGetPtr(constring));
    }
    

    for (l = 0; l < length; l++) {
        string[l] = toupper(string[l]);
        if (!noconv && string[l] == 'T') string[l] = 'U';
    }

    /* initialize_fold(length); */
    if (circ)
        min_en = circfold(string, structure);
    else
        min_en = fold(string, structure);

    ajFmtPrintF(outf,"%s\n%s", string, structure);
    if (istty)
        printf("\n minimum free energy = %6.2f kcal/mol\n", min_en);
    else
        ajFmtPrintF(outf," (%6.2f)\n", min_en);

    if (!noPS)
    {
        if (length<2000)
            (void) PS_rna_plot(string, structure, essfile);
        else
            ajWarn("Structure too long, not doing xy_plot\n");
    }
    if (length>=2000) free_arrays(); 

    if (pf)
    {
        char *pf_struc;
        pf_struc = (char *) space((unsigned) length+1);
	if (dangles==1)
        {
            dangles=2;   /* recompute with dangles as in pf_fold() */
            min_en = (circ) ? energy_of_circ_struct(string, structure) :
                energy_of_struct(string, structure);
            dangles=1;
        }

        kT = (temperature+273.15)*1.98717/1000.; /* in Kcal */
        pf_scale = exp(-(sfact*min_en)/kT/length);

        if (length>2000)
            ajWarn("scaling factor %f\n", pf_scale);

        (circ) ? init_pf_circ_fold(length) : init_pf_fold(length);

        if (cstruc!=NULL)
            strncpy(pf_struc, cstruc, length+1);

        energy = (circ) ? pf_circ_fold(string, pf_struc) :
            pf_fold(string, pf_struc);

        if (do_backtrack)
        {
            ajFmtPrintF(outf,"%s", pf_struc);
            ajFmtPrintF(outf," [%6.2f]\n", energy);
        }

        if ((istty)||(!do_backtrack))
            ajFmtPrintF(outf," free energy of ensemble = %6.2f kcal/mol\n",
                        energy);

        if (do_backtrack)
        {
            plist *pl1,*pl2;
            char *cent;
            double dist, cent_en;
            cent = centroid(length, &dist);
            cent_en = (circ) ? energy_of_circ_struct(string, cent) :
                energy_of_struct(string, cent);
            ajFmtPrintF(outf,"%s {%6.2f d=%.2f}\n", cent, cent_en, dist);
            free(cent);

            pl1 = make_plist(length, 1e-5);
            pl2 = b2plist(structure);
            (void) PS_dot_plot_list(string, dotfilea, pl1, pl2, "");
            free(pl2);
            if (do_backtrack==2)
            {
                pl2 = stackProb(1e-5);
                PS_dot_plot_list(string, dotfileb, pl1, pl2,
                                 "Probabilities for stacked pairs (i,j)(i+1,j-1)");
                free(pl2);
            }
            free(pl1);
            free(pf_struc);
        }

        ajFmtPrintF(outf," frequency of mfe structure in ensemble %g; ",
                    exp((energy-min_en)/kT));

        if (do_backtrack)
            ajFmtPrintF(outf,"ensemble diversity %-6.2f", mean_bp_dist(length));

        ajFmtPrintF(outf,"\n");
        free_pf_arrays();

    }

    if (cstruc!=NULL)
        free(cstruc);

    free(string);
    free(structure);

    ajStrDel(&seqstring);
    ajStrDel(&constring);
    ajStrDel(&seqname);

    ajStrDel(&ensbases);
    ajStrDel(&eenergy);
    ajStrDel(&edangles);

    ajSeqDel(&seq);

    ajFileClose(&confile);
    ajFileClose(&paramfile);
    ajFileClose(&outf);
    ajFileClose(&essfile);

/*
  ajFileClose(&dotfilea);
  ajFileClose(&dotfileb);
*/  
    if (length<2000) free_arrays(); 
    embExit();
    
    return 0;
}
Ejemplo n.º 9
0
int main(int argc, char *argv[])
{
    char *sequence;
    char *structure = NULL;
    char  *ns_bases = NULL, *c;
    int   i, length, l, sym;
    int   istty;
    double deltap=0.;
    int delta=100;
    int n_back = 0;
    int noconv=0;
    int circ=0;
    int dos=0;
    
    AjPSeq  seq     = NULL;
    AjPFile confile = NULL;
    AjPFile paramfile = NULL;
    AjPFile outf = NULL;
    

    AjPStr constring = NULL;
  
    float eT = 0.;
    AjBool eGU;
  
    AjBool eclose;
    AjBool lonely;
    AjBool convert;
    AjPStr ensbases = NULL;
    AjBool etloop;
    AjPStr edangles = NULL;
    char edangle = '\0';

    ajint len;
    float erange;
    float prange;
   

    embInitPV("vrnasubopt",argc,argv,"VIENNA",VERSION);
    
    
    constring = ajStrNew();
    
    seq           = ajAcdGetSeq("sequence");
    confile       = ajAcdGetInfile("constraintfile");
    paramfile     = ajAcdGetInfile("paramfile");
    eT            = ajAcdGetFloat("temperature");
    circ          = !!ajAcdGetBoolean("circular");
    dos           = !!ajAcdGetBoolean("dos");
    eGU           = ajAcdGetBoolean("gu");
    eclose        = ajAcdGetBoolean("closegu");
    lonely        = ajAcdGetBoolean("lp");
    convert       = ajAcdGetBoolean("convert");
    ensbases      = ajAcdGetString("nsbases");
    etloop        = ajAcdGetBoolean("tetraloop");
    erange        = ajAcdGetFloat("erange");
    prange        = ajAcdGetFloat("prange");
    subopt_sorted = !!ajAcdGetBoolean("sort");
    logML         = !!ajAcdGetBoolean("logml");
    n_back        = ajAcdGetInt("nrandom");
   
    edangles      = ajAcdGetListSingle("dangles");
    outf      = ajAcdGetOutfile("outfile");

    if(dos)
        print_energy = -999999;

    do_backtrack = 1;
   
    istty = 0;

    temperature   = (double) eT;
    noGU          = (eGU) ? 0 : 1;
    no_closingGU  = (eclose) ? 0 : 1;
    noLonelyPairs = (lonely) ? 0 : 1;
    noconv        = (convert) ? 0 : 1;
    ns_bases      = (ajStrGetLen(ensbases)) ? MAJSTRGETPTR(ensbases) : NULL;
    tetra_loop    = !!etloop;

    delta = (int) (0.1 + erange * 100);
    deltap = prange;
    
    edangle = *ajStrGetPtr(edangles);
    if(edangle == '0')
	dangles = 0;
    else if(edangle == '1')
	dangles = 1;
    else if(edangle == '2')
	dangles = 2;
    else if(edangle == '3')
	dangles = 3;


    if(paramfile)
	read_parameter_file(paramfile);


   
    if (ns_bases != NULL)
    {
	nonstandards = space(33);
	c=ns_bases;
	i=sym=0;
	if (*c=='-')
	{
	    sym=1;
	    c++;
	}
	while (*c)
	{
	    if (*c!=',')
	    {
		nonstandards[i++]=*c++;
		nonstandards[i++]=*c;
		if ((sym)&&(*c!=*(c-1)))
		{
		    nonstandards[i++]=*c;
		    nonstandards[i++]=*(c-1);
		}
	    }
	    c++;
	}
    }


    if(confile)
	vienna_GetConstraints(confile,&constring);
    

    if(n_back)
        init_rand();

    
    sequence  = NULL;
    structure = NULL;

    length = ajSeqGetLen(seq);
    sequence = (char *) space(length+1);
    strcpy(sequence,ajSeqGetSeqC(seq));

    len = ajStrGetLen(constring);
    structure = (char *) space(length+1);
    if(len)
    {
	fold_constrained = 1;
	strcpy(structure,ajStrGetPtr(constring));
    }
    
    istty = 0;

    if (fold_constrained)
    {
	for (i=0; i<length; i++)
	    if (structure[i]=='|')
		ajFatal("Constraints of type '|' are not allowed\n");
    }      
      
    for (l = 0; l < length; l++)
    {
        sequence[l] = toupper(sequence[l]);
        if (!noconv && sequence[l] == 'T')
            sequence[l] = 'U';
    }
    
    if ((logML!=0 || dangles==1 || dangles==3) && dos==0)
	if (deltap<=0) deltap=delta/100. +0.001;
    if (deltap>0)
	print_energy = deltap;

    /* first lines of output (suitable  for sort +1n) */

    ajFmtPrintF(outf,"> %s [%d]\n", ajSeqGetNameC(seq), delta);

    if(n_back>0)
    {
	int i;
	double mfe, kT;
	char *ss;
	st_back=1;
	ss = (char *) space(strlen(sequence)+1);
	strncpy(ss, structure, length);
	mfe = (circ) ? circfold(sequence, ss) : fold(sequence, ss);
	kT = (temperature+273.15)*1.98717/1000.; /* in Kcal */
	pf_scale = exp(-(1.03*mfe)/kT/length);
	strncpy(ss, structure, length);
        /*
        ** we are not interested in the free energy but in the bppm, so we
        ** drop free energy into the void
        */
        (circ) ? (void) pf_circ_fold(sequence, ss) :
            (void) pf_fold(sequence, ss);
	free(ss);
	for (i=0; i<n_back; i++)
	{
	    char *s;
            s = (circ) ? pbacktrack_circ(sequence) : pbacktrack(sequence);
	    ajFmtPrintF(outf,"%s\n", s);
	    free(s);
	}
	free_pf_arrays();
    }
    else
    {
	(circ) ? subopt_circ(sequence, structure, delta, ajFileGetFileptr(outf)) :
            subopt(sequence, structure, delta, ajFileGetFileptr(outf));
    }
      

    free(sequence);
    free(structure); 

    ajSeqDel(&seq);
    ajStrDel(&ensbases);
    ajStrDel(&edangles);

    ajFileClose(&confile);
    ajFileClose(&outf);
    ajFileClose(&paramfile);

    embExit();

    return 0;
}
Ejemplo n.º 10
0
int main(int argc, char *argv[])
{
    char *start, *structure, *rstart, *str2, *line;
    char  *ParamFile=NULL;
    int   i,j, length, l, hd;
    double energy=0., kT;
    int   pf, mfe, istty;
    int   repeat, found;

    do_backtrack = 0;
    pf = 0;
    mfe = 1;
    repeat = 0;
    init_rand();
    for (i=1; i<argc; i++) {
        if (argv[i][0]=='-')
            switch ( argv[i][1] )
            {
            case 'a':
                symbolset = argv[++i];
                /* symbolset should only have uppercase characters */
                for (l = 0; l < (int)strlen(symbolset); l++)
                    symbolset[l] = toupper(symbolset[l]);
                break;
            case 'T':
                if (argv[i][2]!='\0') usage();
                if (sscanf(argv[++i], "%lf", &temperature)==0)
                    usage();
                break;
            case 'F':
                mfe = 0;
                pf = 0;
                for(j=2; j<(int)strlen(argv[i]); j++) {
                    switch( argv[i][j] ) {
                    case 'm' :
                        mfe = 1;
                        break;
                    case 'p' :
                        pf = 1; /* old version had dangles=0 here */
                        break;
                    default :
                        usage();
                    }
                }
                break;
            case 'R':
                repeat = REPEAT_DEFAULT;
                if(++i<argc)
                    if (sscanf(argv[i], "%d", &repeat)==0)
                        usage();
                break;
            case 'n':
                if (strcmp(argv[i], "-noGU" )==0) noGU=1;
                else if (strcmp(argv[i], "-noLP" )==0) noLonelyPairs=1;
                else usage();
                break;
            case '4':
                tetra_loop=0;
                break;
            case 'e':
                if (sscanf(argv[++i],"%d", &energy_set)==0)
                    usage();
                break;
            case 'd':
                dangles=0;
                if (argv[i][2]!='\0')
                    if (sscanf(argv[i]+2, "%d", &dangles)==0)
                        usage();
                break;
            case 'f': /* when to stop RNAfold -p */
                if (sscanf(argv[++i],"%f", &final_cost)==0)
                    usage();
                break;
            case 'P':
                if (++i<argc)
                    ParamFile = argv[i];
                else
                    usage();
                break;
            case 'v':
                inv_verbose = 1;
                break;
            default:
                usage();
            }
    }

    kT = (temperature+273.15)*1.98717/1000.0;

    istty = (isatty(fileno(stdout))&&isatty(fileno(stdin)));

    if (ParamFile!=NULL)
        read_parameter_file(ParamFile);

    give_up = (repeat<0);

    do {
        if (istty) {
            printf("\nInput structure & start string"
                   " (lower case letters for const positions)\n"
                   "    @ to quit, and 0 for random start string\n");
            printf("%s\n", scale);
        }

        if ((line = get_line(stdin))==NULL) break;

        /* read structure, skipping over comment lines */
        while ((*line=='*')||(*line=='\0')||(*line=='>')) {
            printf("%s\n", line);
            free(line);
            if ((line = get_line(stdin))==NULL) break;
        }
        /* stop at eof or '@' */
        if (line==NULL) break;
        if (strcmp(line, "@") == 0) {
            free(line);
            break;
        }

        structure = (char *) space(strlen(line)+1);
        (void) sscanf(line,"%s",structure); /* scanf gets rid of trailing junk */
        free(line);

        length = (int) strlen(structure);
        str2 = (char *) space((unsigned)length+1);

        if ((line = get_line(stdin))!=NULL)
            if (strcmp(line, "@") == 0) {
                free(line);
                break;
            }

        start = (char *) space((unsigned) length+1);
        if (line !=NULL) {
            (void) strncpy(start, line, length);
            free(line);
        }

        if (istty) printf("length = %d\n", length);

        if (repeat!=0) found = (repeat>0)? repeat : (-repeat);
        else found = 1;

        initialize_fold(length);

        rstart = (char *) space((unsigned)length+1);
        while(found>0) {
            char *string;
            string = (char *) space((unsigned)length+1);
            strcpy(string, start);
            for (i=0; i<length; i++) {
                /* lower case characters are kept fixed, any other character
                   not in symbolset is replaced by a random character */
                if (islower(string[i])) continue;

                if (string[i]=='\0' || (strchr(symbolset,string[i])==NULL))
                    string[i]=symbolset[int_urn(0,strlen(symbolset)-1)];
            }
            strcpy(rstart, string); /* remember start string */

            if (mfe) {
                energy = inverse_fold(string, structure);
                if( (repeat>=0) || (energy<=0.0) ) {
                    found--;
                    hd = hamming(rstart, string);
                    printf("%s  %3d", string, hd);
                    if (energy>0) { /* no solution found */
                        printf("   d= %g\n", energy);
                        if(istty) {
                            energy = fold(string,str2);
                            printf("%s\n", str2);
                        }
                    } else printf("\n");
                }
            }
            if (pf) {
                if (!(mfe && give_up && (energy>0))) {
                    /* unless we gave up in the mfe part */
                    double prob, min_en, sfact=1.07;

                    /* get a reasonable pf_scale */
                    min_en = fold(string,str2);
                    pf_scale = exp(-(sfact*min_en)/kT/length);
                    init_pf_fold(length);

                    energy = inverse_pf_fold(string, structure);
                    prob = exp(-energy/kT);
                    hd = hamming(rstart, string);
                    printf("%s  %3d  (%g)\n", string, hd, prob);
                    free_pf_arrays();
                }
                if (!mfe) found--;
            }
            (void) fflush(stdout);
            free(string);
        }
        free(rstart);
        free_arrays();

        free(structure);
        free(str2);
        free(start);
        (void) fflush(stdout);
    } while (1);
    return 0;
}
Ejemplo n.º 11
0
int main(int argc, char *argv[]){
  struct        RNAsubopt_args_info args_info;
  unsigned int  input_type;
  char          fname[80], *cstruc, *sequence, *c, *input_string;
  char          *structure = NULL, *ParamFile = NULL, *ns_bases = NULL;
  int           i, length, l, sym, istty;
  double        deltaf, deltap;
  int           delta, n_back, noconv, circular, dos, zuker;

  do_backtrack  = 1;
  dangles       = 2;
  delta         = 100;
  deltap = n_back = noconv = circular = dos = zuker = 0;
  /*
  #############################################
  # check the command line parameters
  #############################################
  */
  if(RNAsubopt_cmdline_parser (argc, argv, &args_info) != 0) exit(1);
  /* temperature */
  if(args_info.temp_given)        temperature = args_info.temp_arg;
  /* structure constraint */
  if(args_info.constraint_given)  fold_constrained=1;
  /* do not take special tetra loop energies into account */
  if(args_info.noTetra_given)     tetra_loop=0;
  /* set dangle model */
  if(args_info.dangles_given)     dangles = args_info.dangles_arg;
  /* do not allow weak pairs */
  if(args_info.noLP_given)        noLonelyPairs = 1;
  /* do not allow wobble pairs (GU) */
  if(args_info.noGU_given)        noGU = 1;
  /* do not allow weak closing pairs (AU,GU) */
  if(args_info.noClosingGU_given) no_closingGU = 1;
  /* do not convert DNA nucleotide "T" to appropriate RNA "U" */
  if(args_info.noconv_given)      noconv = 1;
  /* take another energy parameter set */
  if(args_info.paramFile_given)   ParamFile = strdup(args_info.paramFile_arg);
  /* Allow other pairs in addition to the usual AU,GC,and GU pairs */
  if(args_info.nsp_given)         ns_bases = strdup(args_info.nsp_arg);
  /* energy range */
  if(args_info.deltaEnergy_given) delta = (int) (0.1+args_info.deltaEnergy_arg*100);
  /* energy range after post evaluation */
  if(args_info.deltaEnergyPost_given) deltap = args_info.deltaEnergyPost_arg;
  /* sorted output */
  if(args_info.sorted_given)      subopt_sorted = 1;
  /* assume RNA sequence to be circular */
  if(args_info.circ_given)        circular=1;
  /* stochastic backtracking */
  if(args_info.stochBT_given){
    n_back = args_info.stochBT_arg;
    init_rand();
  }
  /* density of states */
  if(args_info.dos_given){
    dos = 1;
    print_energy = -999999;
  }
  /* logarithmic multiloop energies */
  if(args_info.logML_given) logML = 1;
  /* zuker subopts */
  if(args_info.zuker_given) zuker = 1;

  if(zuker){
    if(circular){
      warn_user("Sorry, zuker subopts not yet implemented for circfold");
      RNAsubopt_cmdline_parser_print_help();
      exit(1);
    }
    else if(n_back>0){
      warn_user("Can't do zuker subopts and stochastic subopts at the same time");
      RNAsubopt_cmdline_parser_print_help();
      exit(1);
    }
  }

  /* free allocated memory of command line data structure */
  RNAsubopt_cmdline_parser_free(&args_info);

  /*
  #############################################
  # begin initializing
  #############################################
  */

  if (ParamFile != NULL) read_parameter_file(ParamFile);

  if (ns_bases != NULL) {
    nonstandards = space(33);
    c=ns_bases;
    i=sym=0;
    if (*c=='-') {
      sym=1; c++;
    }
    while (*c!='\0') {
      if (*c!=',') {
        nonstandards[i++]=*c++;
        nonstandards[i++]=*c;
        if ((sym)&&(*c!=*(c-1))) {
          nonstandards[i++]=*c;
          nonstandards[i++]=*(c-1);
        }
      }
      c++;
    }
  }

  istty = isatty(fileno(stdout))&&isatty(fileno(stdin));

  if(fold_constrained && istty) print_tty_constraint(VRNA_CONSTRAINT_DOT | VRNA_CONSTRAINT_X);


  /*
  #############################################
  # main loop: continue until end of file
  #############################################
  */
  do {
    cut_point = -1;
    /*
    ########################################################
    # handle user input from 'stdin'
    ########################################################
    */
    if(istty){ 
      if (!zuker)
        printf("Use '&' to connect 2 sequences that shall form a complex.\n");
      print_tty_input_seq();
    }
    /* extract filename from fasta header if available */
    fname[0] = '\0';
    while((input_type = get_input_line(&input_string, 0)) == VRNA_INPUT_FASTA_HEADER){
      printf(">%s\n", input_string);
      (void) sscanf(input_string, "%42s", fname);
      free(input_string);
    }

    /* break on any error, EOF or quit request */
    if(input_type & (VRNA_INPUT_QUIT | VRNA_INPUT_ERROR)){ break;}
    /* else assume a proper sequence of letters of a certain alphabet (RNA, DNA, etc.) */
    else{
      sequence  = tokenize(input_string); /* frees input_string */
      length    = (int) strlen(sequence);
    }
    structure = (char *) space((unsigned) length+1);

    if(noconv)  str_RNA2RNA(sequence);
    else        str_DNA2RNA(sequence);

    if(istty){
      if (cut_point == -1)
        printf("length = %d\n", length);
      else
        printf("length1 = %d\nlength2 = %d\n", cut_point-1, length-cut_point+1);
    }

    /* get structure constraint or break if necessary, entering an empty line results in a warning */
    if (fold_constrained) {
      input_type = get_input_line(&input_string, VRNA_INPUT_NOSKIP_COMMENTS);
      if(input_type & VRNA_INPUT_QUIT){ break;}
      else if((input_type & VRNA_INPUT_MISC) && (strlen(input_string) > 0)){
        cstruc = tokenize(input_string);
        strncpy(structure, cstruc, length);
        for (i=0; i<length; i++)
          if (structure[i]=='|')
            nrerror("constraints of type '|' not allowed");
        free(cstruc);
      }
      else warn_user("constraints missing");
    }
    /*
    ########################################################
    # done with 'stdin' handling, now init everything properly
    ########################################################
    */

    if((logML != 0 || dangles==1 || dangles==3) && dos == 0)
      if(deltap<=0) deltap = delta/100. + 0.001;
    if (deltap>0)
      print_energy = deltap;

    /* first lines of output (suitable  for sort +1n) */
    if (fname[0] != '\0')
      printf("> %s [%d]\n", fname, delta);

    /* stochastic backtracking */
    if(n_back>0){
      double mfe, kT;
      char *ss;
      st_back=1;
      ss = (char *) space(strlen(sequence)+1);
      strncpy(ss, structure, length);
      mfe = fold(sequence, ss);
      kT = (temperature+273.15)*1.98717/1000.; /* in Kcal */
      pf_scale = exp(-(1.03*mfe)/kT/length);
      strncpy(ss, structure, length);
      /* ignore return value, we are not interested in the free energy */
      (circular) ? (void) pf_circ_fold(sequence, ss) : (void) pf_fold(sequence, ss);
      free(ss);
      for (i=0; i<n_back; i++) {
        char *s;
        s =(circular) ? pbacktrack_circ(sequence) : pbacktrack(sequence);
        printf("%s\n", s);
        free(s);
      }
      free_pf_arrays();
    }
    /* normal subopt */
    else if(!zuker){
      (circular) ? subopt_circ(sequence, structure, delta, stdout) : subopt(sequence, structure, delta, stdout);
      if (dos) {
        int i;
        for (i=0; i<= MAXDOS && i<=delta/10; i++) {
          printf("%4d %6d\n", i, density_of_states[i]);
        }
      }
    }
    /* Zuker suboptimals */
    else{
      SOLUTION *zr;
      int i;
      if (cut_point!=-1) {
        nrerror("Sorry, zuker subopts not yet implemented for cofold\n");
      }
      zr = zukersubopt(sequence);
      putoutzuker(zr);
      (void)fflush(stdout);
      for (i=0; zr[i].structure; i++) {
        free(zr[i].structure);
      }
      free(zr);
    }
    (void)fflush(stdout);
    free(sequence);
    free(structure);
  } while (1);
  return 0;
}
Ejemplo n.º 12
0
/*--------------------------------------------------------------------------*/
int main(int argc, char *argv[])
{
  char *string1=NULL, *string2=NULL, *dummy=NULL, *temp=NULL, *line=NULL;
  char *structure=NULL, *cstruc=NULL, *cstruc_l=NULL, *cstruc_s=NULL;
  char fname[53], ffname[53], temp_name[201], first_name[53], my_contrib[10];
  char up_out[250], unstrs[201], name[400], cmd_line[500];
  char *ParamFile=NULL;
  char *ns_bases=NULL, *c,*head;
  int  i, length1,length2,length, l, sym, r, *u_vals, Switch, header,output;
  double energy, min_en;
  double sfact=1.07;
  int   istty;
  int noconv=0;
  /* variables for output */
  pu_contrib *unstr_out, *unstr_short;
  interact *inter_out;
  /* pu_out *longer; */
  char *title;
  /* commandline parameters */
  int w;       /* length of region of interaction */
  int incr3;   /* add x unpaired bases after 3'end of short RNA*/
  int incr5;   /* add x unpaired bases after 5'end of short RNA*/
  int unstr;   /* length of unpaired region for output*/
  int upmode ; /* 1 compute only pf_unpaired, >1 compute interactions 
		  2 compute intra-molecular structure only for long RNA, 3 both RNAs */
  int task;    /* input mode for calculation of interaction */
  /* default settings for RNAup */
  head = NULL;/* header text - if header wanted, see header */
  header = 1; /* if header is 0 print no header in output file: option -nh */
  output = 1; /* if output is 0 make no output file: option -o */
  Switch = 1; /* the longer sequence is selected as the target */
  task=0;
  upmode = 1; /* default is one sequence, option -X[p|f] has to be set
		 for the calculation of an interaction, if no "&" is in
		 the sequence string  */
  unstrs[0]='\0';
  default_u = 4;
  unstr=default_u;
  default_w = 25;
  w=default_w;
  u_vals=NULL;
  incr3=0;
  incr5=0;
  do_backtrack = 1;
  length1=length2=0;
  title=NULL;
  unstr_out=NULL;
  inter_out=NULL;
  my_contrib[0] = 'S';
  my_contrib[1] = '\0';
  first_name[0] = '\0';

  /* collect the command line  */
  sprintf(cmd_line,"RNAup ");
  length = 0;
  for (i=1; i<argc; i++) {
    r=sscanf(argv[i], "%100s", &temp_name);
    length+=r+1;
    if(length > 500) break;
    strcat(cmd_line, temp_name);
    strcat(cmd_line," ");
  }
  length = 0;
  
  for (i=1; i<argc; i++) {
    if (argv[i][0]=='-') 
      switch ( argv[i][1] )
	{
	case 'T':  if (argv[i][2]!='\0') usage();
	  if (i==argc-1) usage();
	  r=sscanf(argv[++i], "%lf", &temperature);
	  if (!r) usage();
	  break;
	case 'w':
	  /* -w maximal length of unstructured region */  
	  if (i==argc-1) usage();
	  r=sscanf(argv[++i],"%d", &w);
	  if (!r) usage();
	  break;
	case 't':
	  /* use the first sequence as the target */
	  if ( strcmp(argv[i], "-target")==0) {
	    Switch=0;
	  }
	  break;
	case 'o':
	  /* make no output file */
	  output=0;
	  break; 
	case 'n':
	  if ( strcmp(argv[i], "-nh")==0) {
	    header=0;
	  }
	  if ( strcmp(argv[i], "-noGU")==0) {
	    noGU=1;
	  }
	  if ( strcmp(argv[i], "-noCloseGU")==0) {
	    no_closingGU=1;
	  }
	  if ( strcmp(argv[i], "-noLP")==0) {
	    noLonelyPairs=1;
	  }
	  if ( strcmp(argv[i], "-nsp") ==0) {
	    if (i==argc-1) usage();
	    ns_bases = argv[++i];
	  }
	  if ( strcmp(argv[i], "-noconv")==0) {
	    noconv=1;
	  }
	  break;
	case '4':
	  tetra_loop=0;
	  break;
	case 'e':
	  if (i==argc-1) usage();
	  r=sscanf(argv[++i],"%d", &energy_set);
	  if (!r) usage();
	  break;
	case 'C':
	  fold_constrained=1;
	  break;
	case 'S':
	  if (i==argc-1) usage();
	  r=sscanf(argv[++i],"%lf", &sfact);
	  if (!r) usage();
	  break;
	case 'd': dangles=0;
	  if (argv[i][2]!='\0') {
	    r=sscanf(argv[i]+2, "%d", &dangles);
	    if (r!=1) usage();
	  }
	  break;
	case 'b': upmode=3;
	  break;
	case 'X':
	  /* interaction mode invoked */
	  if (upmode == 1) upmode=2;
	  switch (argv[i][2]) { /* now determine which sequences interact */
	  case 'p': task=1;
	    break; /* pairwise interaction */
	  case 'f': task=2;
	    break; /* first one interacts with all others */
	  }
	  break;
	case 'u':
	  /* -u length of unstructured region in pr_unpaired output */  
	  if (i==argc-1) usage();
	  r=sscanf(argv[++i],"%200s", unstrs);
	  if (!r) usage();
	  if (!isdigit(unstrs[0])) usage();
	  break;
	  /* incr5 and incr3 are only for the longer (target) sequence */
	  /* increments w (length of the unpaired region) to incr5+w+incr3*/
	  /* the longer sequence is given in 5'(= position 1) to */
	  /* 3' (=position n) direction */
	  /* incr5 adds incr5 residues to the 5' end of w */
	case '5':
	  if (i==argc-1) usage();
	  r=sscanf(argv[++i],"%d", &incr5);
	  if (!r) usage();
	  break; 
	  /* incr3 adds incr3 residues to the 3' end of w */
	case '3':
	  if (i==argc-1) usage();
	  r=sscanf(argv[++i],"%d", &incr3);
	  if (!r) usage();
	  break;
	case 'P':
	  if (i==argc-1) usage();
	  ParamFile = argv[++i];
	  break;
	case 'c':  
	  if (i==argc-1) usage();
	  r=sscanf(argv[++i], "%6s", my_contrib);
	  if (!r) usage();
	  break;  
	default: usage();
	} 
  }
  cmd_line[strlen(cmd_line)] = '\0';
  if (dangles>0) dangles=2; /* only 0 or 2 allowed */
  if (ParamFile != NULL)
    read_parameter_file(ParamFile);
   
  if (ns_bases != NULL) {
    nonstandards = space(33);
    c=ns_bases;
    i=sym=0;
    if (*c=='-') {
      sym=1; c++;
    }
    while (*c!='\0') {
      if (*c!=',') {
	nonstandards[i++]=*c++;
	nonstandards[i++]=*c;
	if ((sym)&&(*c!=*(c-1))) {
	  nonstandards[i++]=*c;
	  nonstandards[i++]=*(c-1);
	}
      }
      c++;
	    
    }
  }
  istty = isatty(fileno(stdout))&&isatty(fileno(stdin));
  if ((fold_constrained)&&(istty)) {
    printf("Input constraints using the following notation:\n");      
    printf(". : no constraint at all\n");
    printf("x : base must not pair\n");
    printf("matching brackets ( ): base i pairs base j\n");
    printf("constraints for intramolecular folding only:\n"); 
    printf("< : base i is intramolecularly paired with a base j<i\n");
    printf("> : base i is intramolecularly paired with a base j>i\n");    
    printf("constraints for cofolding (intermolecular folding) only:\n");
    printf("| : paired with another base intermolecularly\n");        
  } 
 
  RT = ((temperature+K0)*GASCONST/1000.0);	
  do {	/* main loop: continue until end of file */
    cut_point=-1;
    if (istty) {
      if (upmode == 1) {
	printf("\nInput string (upper or lower case); @ to quit\n");
	printf("%s%s\n", scale1, scale2);
      }
      else if (upmode > 1) {
	if (task == 1 || (task == 0 && upmode == 3)) {
	  printf("\nUse either '&' to connect the 2 sequences or give each sequence on an extra line.\n"); 
	  printf("%s%s\n", scale1, scale2);
	}
	else if (task == 2) { /* option -Xf read the first two seqs */
	  printf("\nGive each sequence on an extra line. The first seq. is stored, every other seq. is compared to the first one.\n"); 
	  printf("%s%s\n", scale1, scale2);
	}
	else if (task == 3) {/* option -Xf read another sequence which
				will interact with the first one */
	  printf("\nEnter another sequence.\n"); 
	  printf("%s%s\n", scale1, scale2); 
	}
      }
    }
    fname[0]='\0';
    ffname[0]='\0';
    /* read the first sequence */
    if ((line = get_line(stdin))==NULL) break;

    /* skip comment lines and get filenames */
    while ((*line=='*')||(*line=='\0')||(*line=='>')) {
      if (*line=='>')
	(void) sscanf(line, ">%51s", fname);
      free(line);
      line=NULL;
      if ((line = get_line(stdin))==NULL) break;
    } 
    if ((line == NULL) || (strcmp(line, "@") == 0)) break;

    if (first_name[0] == '\0' && fname[0] !='\0' && task == 2) {
      strncpy(first_name,fname,30);
      first_name[30] = '\0';
    }
    /* if upmode == 2: check if the sequences are seperated via "&" (cut_point > -1) or given on extra lines */
    if (task < 3) {
      tokenize(line,&string1,&string2);
      if (task == 2 && cut_point != -1) task = 3;
      /* two sequences with & are given: calculate interaction */
      if (task == 0 && cut_point != -1) {
	task = 1;
	if (upmode == 1) upmode = 2;
      }
    }
    else if (task == 3) { /* option -Xf*/
      strncpy(ffname,fname,30);
      ffname[30] = '\0';
      strncpy(fname,first_name,30);  /* first_name: name of first seq */
      fname[30] = '\0';
      if (temp != NULL) { /*strings have been switched - write temp to string1*/
	string1 = (char *) xrealloc (string1,sizeof(char)*strlen(temp)+1);
	(void) sscanf(temp,"%s",string1);
	free(temp);temp=NULL;
	
      }	
      tokenize(line,&string2,&dummy); /*compare every seq to first one given */
      free(dummy);dummy=NULL;
      if (cut_point != -1) {
	nrerror(
	   "After the first sequence pair: Input a single sequence (no &)!\n"
	   "Each input seq. is compared to the first seq. given.\n");
      }
    }
    /* interaction mode -> get the second seq. if seq are on seperate lines*/
    if (upmode > 1){ /* interaction mode */
      if (cut_point == -1 && task < 3) { /* seqs are given on seperate lines */
	/* read the second sequence */
	if (task == 2) task = 3;
	if ((line = get_line(stdin))==NULL) {
	  nrerror("only one sequence - can not cofold one sequence!");
	}
	/* skip comment lines and get filenames */
	while ((*line=='*')||(*line=='\0')||(*line=='>')) {
	  if (*line=='>')
	    (void) sscanf(line, ">%51s", ffname); /* name of the 2nd seq */
	  free(line);
	  line=NULL;
	  if ((line = get_line(stdin))==NULL) break;
	} 
	if ((line ==NULL) || (strcmp(line, "@") == 0)) break;
	free(string2); /* string2 has been allocated in tokenize() */
    
	string2 = (char *) space(strlen(line)+1);
	(void) sscanf(line,"%s",string2); free(line);line=NULL;
      }
    } else { /* default mode pr_unpaired for ONE seq */
      /* if a second sequence is give, cofold the sequences*/
      if (cut_point != -1){
	upmode = 2;	
      }
    }

    if (string1 != NULL){length1 = (int) strlen(string1);}
    else {nrerror("sequence is NULL, check your input.");}
    if (upmode > 1) {
      if (string2 != NULL) {length2 = (int) strlen(string2);}
      else{nrerror("one of the sequences is NULL, check your input.");}

      /* write longer seq in string1 and and shorter one in string2 */ 
      if (length1 < length2 && Switch) {
	strncpy(temp_name,fname,30);
	strncpy(fname,ffname,30);
	strncpy(ffname,temp_name,30);
	  
	length=length1; length1=length2; length2=length;
	
	temp=(char *) space(sizeof(char)*strlen(string1)+1);
	(void) sscanf(string1,"%s",temp);
	string1 = (char *) xrealloc (string1,sizeof(char)*length1+1);
	(void) sscanf(string2,"%s",string1);
	string2 = (char *) xrealloc(string2,sizeof(char)*length2+1);
	(void) sscanf(temp,"%s",string2);
	if (task == 1) {
	  free(temp);
	  temp = NULL;
	}
      } 
    }
    /* parse cml parameters for output filename*/    
    /* create the name of the output file */
    if (fname[0]!='\0') {
      printf(">%s\n",fname);
      if(strlen(fname) < 30) {
	strcpy(up_out,fname);
      } else {
	strncpy(up_out,fname,30);
	up_out[30] = '\0';
      }
      
      if (upmode > 1 && ffname[0] != '\0') {
	 printf(">%s\n",ffname);
	if(strlen(fname) < 15) {
	  strcpy(up_out,fname);
	} else {
	  strncpy(up_out,fname,15);
	  up_out[15] = '\0';
	}
	strcat(up_out, "_");
	if(strlen(ffname) < 15) {
	  strcat(up_out,ffname);
	} else {
	  strncat(up_out,ffname,15);
	}
      }	
    } else {
      strcpy(up_out, "RNA");
    }
    if (upmode >1) {
      sprintf(temp_name,"_w%d",w);
      strncat(up_out, temp_name,10);
    }    
    /* do this only when -X[p|f] is used or if two sequences seperated by & are given */
    if (upmode > 1) {
      if (task == 3) {
	/* strncpy(temp_name,fname,30); */
	if(strlen(fname) < 30) {
	  strcpy(temp_name,fname);
	} else {
	  strncpy(temp_name,fname,30);
	  up_out[30] = '\0';
	}
      }
    }
    
    /* get values for -u */
    if ( ! get_u_values(unstrs,&u_vals,length1)) {
      nrerror("option -u: length value exceeds sequence length\n");
    }
      
    
    for (l = 0; l < length1; l++) {
      string1[l] = toupper(string1[l]);
      if (!noconv && string1[l] == 'T') string1[l] = 'U';
    }
    for (l = 0; l < length2; l++) {
      string2[l] = toupper(string2[l]);
      if (!noconv && string2[l] == 'T') string2[l] = 'U';
    }
    
    if (fold_constrained) {
      char *temp_cstruc=NULL;
      int old_cut;
      temp_cstruc = get_line(stdin);
      old_cut = cut_point;
      cut_point=-1;
      /* get contrained string without & */
      cstruc = tokenize_one(temp_cstruc);
      /* free(temp_cstruc); */
      /* only one seq, cstruc should not have an & */
      if (upmode == 1 && cut_point == -1) {
	if (strlen(cstruc) == length1) {
	  cstruc_l=(char*)space(sizeof(char)*(length1+1));
	  strncpy(cstruc_l,cstruc,length1);
	}else{
	  fprintf(stderr, "%s\n%s\n",string1,cstruc);
	  nrerror("RNAup -C: constrain string and structure have unequal length");
	}
      }	else if (upmode == 1 && cut_point != -1) {
	fprintf(stderr, "%s\n%s\n",string1,cstruc);
	nrerror("RNAup -C: only one sequence but constrain structure for cofolding");
      }
      /* constrain string is for both seqs */
      else if (upmode > 1 && cut_point != -1) {
	if (old_cut != cut_point) {
	  nrerror("RNAup -C: different cut points in sequence und constrain string");
	}
	seperate_bp(&cstruc,length1,&cstruc_l,&cstruc_s);
	if (strlen(cstruc) != (length1+length2)) {
	  fprintf(stderr, "%s&%s\n%s\n",string1,string2,cstruc);
	  nrerror("RNAup -C: constrain string and structure have unequal length");
	}
	if (strlen(cstruc_l) != (length1)) {
	  fprintf(stderr, "%s\n%s\n",string1,cstruc_l);
	  nrerror("RNAup -C: constrain string and structure have unequal length");
	}
	if (strlen(cstruc_s) != (length2)) {
	  fprintf(stderr, "%s\n%s\n",string2,cstruc_s);
	  nrerror("RNAup -C: constrain string and structure have unequal length");
	} 
      } else {
	fprintf(stderr, "%s&%s\n%s\n",string1,string2,cstruc);
	nrerror("RNAup -C: no cutpoint in constrain string");
      }      
    }
    if(length1 > length2) {
      structure = (char *) space(sizeof(char)*(length1+1));
    } else {
      structure = (char *) space(sizeof(char)*(length2+1));
    }
    update_fold_params();
    if (cstruc_s != NULL)
      strncpy(structure, cstruc_s, length2+1);
    min_en = fold(string1, structure);    
    (void) fflush(stdout);

    if (upmode != 0){
      int wplus,w_sh;
      if (upmode == 3) { /* calculate prob. unstruct. for shorter seq */  
	w_sh = w;
	/* len of unstructured region has to be <= len shorter seq. */
	if (w > length2) w_sh = length2;
	if (cstruc_s != NULL)
	  strncpy(structure, cstruc_s, length2+1);
	min_en = fold(string2, structure);	  
	pf_scale = exp(-(sfact*min_en)/RT/length2);
	if (length2>2000) fprintf(stderr, "scaling factor %f\n", pf_scale);
	init_pf_fold(length2);
	if (cstruc_s != NULL)
	  strncpy(structure, cstruc_s, length2+1);
	energy = pf_fold(string2, structure);
	unstr_short = pf_unstru(string2, w_sh);
	free_pf_arrays(); /* for arrays for pf_fold(...) */
      }
      
      /* calculate prob. unstructured for longer seq */
      wplus=w+incr3+incr5;
      /* calculate prob. unpaired for the maximal length of -u */
      if (u_vals[u_vals[0]] > wplus) wplus=u_vals[u_vals[0]];
      /* length of the unstructured region has to be <= len longer seq. */
      if (wplus > length1) wplus=length1;
      if (cstruc_l !=NULL)
	strncpy(structure, cstruc_l, length1+1);
      min_en = fold(string1, structure);
      pf_scale = exp(-(sfact*min_en)/RT/length1);
      if (length1>2000) fprintf(stderr, "scaling factor %f\n", pf_scale);
      init_pf_fold(length1);
      if (cstruc_l !=NULL)
	strncpy(structure, cstruc_l, length1+1);
      energy = pf_fold(string1, structure);
      if (upmode > 1) {
	unstr_out = pf_unstru(string1, wplus);
      } else {
	unstr_out = pf_unstru(string1, u_vals[u_vals[0]]);
      }
      free_pf_arrays(); /* for arrays for pf_fold(...) */
      /* now make output to stdout and to the output file */
      if (upmode > 1){/* calculate interaction between two sequences */
	int count;
	if (upmode == 2) {
	  inter_out = pf_interact(string1,string2,unstr_out,NULL,w,cstruc,incr3,incr5);
	  print_interaction(inter_out,string1,string2,unstr_out,NULL,w,incr3,incr5);
	} else if (upmode == 3){
	  inter_out = pf_interact(string1,string2,unstr_out,unstr_short,w,cstruc,incr3,incr5);
	  print_interaction(inter_out,string1,string2,unstr_out,unstr_short,w,incr3,incr5);
	}
	if(output) { /* make RNAup output to file */
	  printf("RNAup output in file: ");
	  /* plot for all -u values */
	  strcpy(name,up_out);
	  strcat(name, "_u");
	  if(u_vals[0] <= 20) {
	    for (count = 1; count <= u_vals[0]; count++) {
	      unstr = u_vals[count];
	      sprintf(temp_name,"%d",unstr);
	      if (count < u_vals[0]) {
		strcat(temp_name,"_");
		strncat(name, temp_name,5);
	      } else {
		strncat(name, temp_name,5);
		strcat(name, "_up.out");
		printf("%s\n",name);
	      }
	    }
	  } else {
	    sprintf(temp_name,"%d",u_vals[1]);
	    strcat(temp_name,"_to_");
	    strncat(name, temp_name,5);
	    sprintf(temp_name,"%d",u_vals[0]);
	    strncat(name, temp_name,5);
	    strcat(name, ".out");
	    printf("%s\n",name);
	  }
	  
	  if(header) {
	    char startl[3];
	    sprintf(startl,"# ");
	  
	    head = (char*)space(sizeof(char)*(length1+length2+1000));
	    /* mach kein \n als ende von head */
	    sprintf(head,"%s %s\n%s %d %s\n%s %s\n%s %d %s\n%s %s",startl, cmd_line, startl,length1,fname, startl,string1, startl,length2,ffname, startl,string2);
	  
	  } else {
	    if(head != NULL) { nrerror("error with header\n"); }
	  }
	  Up_plot(unstr_out,NULL,inter_out,name,u_vals,my_contrib,head);
	
	  if(head != NULL) {
	    free(head);
	    head = NULL;
	  }
	
	  if (upmode == 3 ) {/* plot opening energy for boths RNAs */
	    if(head != NULL) { nrerror("error with header\n"); }
	    Up_plot(NULL,unstr_short,NULL,name,u_vals,my_contrib,head);
	  }
	}
      } else { /* one sequence:  plot only results for prob unstructured */
	int count;
	char collect_out[1000];
	collect_out[0]='\0';
	
	for (count = 1; count <= u_vals[0]; count++) {
	  unstr = u_vals[count];
	  print_unstru(unstr_out,unstr);
	}
	if(output) {/* make RNAup output to file */
	  printf("RNAup output in file: ");
	  strcpy(name,up_out);
	  strcat(name, "_u");
	  if(u_vals[0] <= 20) {
	    for (count = 1; count <= u_vals[0]; count++) {
	      unstr = u_vals[count];
	      sprintf(temp_name,"%d",unstr);
	      if (count < u_vals[0]) {
		strcat(temp_name,"_");
		strncat(name, temp_name,5);
	      } else {
		strncat(name, temp_name,5);
		strcat(name, ".out");
		printf("%s\n",name);
	      }
	    }
	  } else {
	    sprintf(temp_name,"%d",u_vals[1]);
	    strcat(temp_name,"_to_");
	    strncat(name, temp_name,5);
	    sprintf(temp_name,"%d",u_vals[0]);
	    strncat(name, temp_name,5);
	    strcat(name, ".out");
	    printf("%s\n",name);
	  }
	  
	  if(header) {
	    char startl[3];
	    sprintf(startl,"# ");
	    head = (char*)space(sizeof(char)*(length1+length2+1000));
	    /* mach kein \n als ende von head */
	    sprintf(head,"%s %s\n%s %d %s\n%s %s",startl, cmd_line, startl,length1,fname, startl,string1);
	  } else { if(head != NULL) { nrerror("error with header\n"); }}
	
	  Up_plot(unstr_out,NULL,NULL,name,u_vals,my_contrib,head);
	
	  if(head != NULL) { free(head); head = NULL;}
	}
      }	
    } else {
      nrerror("no output format given\n");
    }
    
    
    if(structure != NULL) free(structure);
    structure = NULL;
    if (title != NULL) free(title);
    title=NULL;
    if (u_vals != NULL) free(u_vals);
    u_vals=NULL;
    if (upmode == 1) free_pu_contrib(unstr_out);
    if (upmode > 1) {
      free_pu_contrib(unstr_out);
      free_interact(inter_out);
    }
    if (upmode == 3)free_pu_contrib(unstr_short);
    free_arrays(); /* for arrays for fold(...) */   
    if (cstruc!=NULL) free(cstruc);
    cstruc=NULL;
    if (cstruc_l!=NULL) free(cstruc_l);
    cstruc_l=NULL;
    if (cstruc_s!=NULL) free(cstruc_s);
    cstruc_s=NULL;
    (void) fflush(stdout);
    if (string1!=NULL && task != 3) {
      free(string1);
      string1 = NULL;
    }
    if (string2!=NULL) free(string2);
    string2 = NULL;
    
  } while (1);
  if (line != NULL) free(line);
  if (string1!=NULL) free(string1);
  if (string2!=NULL) free(string2);
  if (cstruc!=NULL) free(cstruc);
  if (cstruc_l!=NULL) free(cstruc_l);
  if (cstruc_s!=NULL) free(cstruc_s);  
  
  return 0;
}
Ejemplo n.º 13
0
int main(int argc, char *argv[])
{
    char *start;
    char *structure;
    char *rstart;
    char *str2;
    char *line;
    int i;
    int length;
    int l;
    int hd;
    double energy = 0.;
    double kT;
    int   pf = 0;
    int   mfe = 0;
    int   istty;
    int   repeat; 
    int   found;
    
    AjPFile inf     = NULL;
    AjPSeq  seq = NULL;
    AjPFile paramfile = NULL;
    AjPFile outf = NULL;
    
    float eT = 0.;
    AjBool eGU;
    
    AjBool eclose;
    AjBool lonely;
    AjBool etloop;
    AjPStr eenergy = NULL;
    char ewt = '\0';
    AjPStr edangles = NULL;
    AjPStr method   = NULL;
    AjPStr ealpha    = NULL;
    AjBool showfails = ajFalse;
    AjBool succeed = ajFalse;
    
    char edangle = '\0';
    
    ajint len;
    FILE *fp;
    
    
    
    embInitPV("vrnainverse",argc,argv,"VIENNA",VERSION);
    
    
    inf        = ajAcdGetInfile("structuresfile");
    seq        = ajAcdGetSeq("sequence");
    paramfile  = ajAcdGetInfile("paramfile");
    eT         = ajAcdGetFloat("temperature");
    eGU        = ajAcdGetBoolean("gu");
    eclose     = ajAcdGetBoolean("closegu");
    lonely     = ajAcdGetBoolean("lp");
    etloop     = ajAcdGetBoolean("tetraloop");
    eenergy    = ajAcdGetListSingle("energy");
    edangles   = ajAcdGetListSingle("dangles");
    method     = ajAcdGetListSingle("folding");
    ealpha     = ajAcdGetString("alphabet");
    final_cost = ajAcdGetFloat("final");
    repeat     = ajAcdGetInt("repeats");
    showfails  = ajAcdGetBoolean("showfails");
    succeed    = ajAcdGetBoolean("succeed");
    outf       = ajAcdGetOutfile("outfile");
    
    
    do_backtrack = 0; 
    structure = NULL;
    istty = 0;
    
    temperature   = (double) eT;
    noGU          = (eGU) ? 0 : 1;
    no_closingGU  = (eclose) ? 0 : 1;
    noLonelyPairs = (lonely) ? 0 : 1;
    tetra_loop    = !!etloop;
    
    ewt = *ajStrGetPtr(eenergy);
    if(ewt == '0')
	energy_set = 0;
    else if(ewt == '1')
	energy_set = 1;
    else if(ewt == '2')
	energy_set = 2;
    
    edangle = *ajStrGetPtr(edangles);
    if(edangle == '0')
	dangles = 0;
    else if(edangle == '1')
	dangles = 1;
    else if(edangle == '2')
	dangles = 2;
    else if(edangle == '3')
	dangles = 3;
    
    if(ajStrMatchC(method,"mp"))
    {
	mfe = 1;
	pf  = 1;
    }
    else if(ajStrMatchC(method,"m"))
    {
	mfe = 1;
	pf  = 0;
    }
    else if(ajStrMatchC(method,"p"))
    {
	mfe = 0;
	pf  = 1;
    }
    
    len = ajStrGetLen(ealpha);
    symbolset = (char *) space(len + 1);
    strcpy(symbolset, ajStrGetPtr(ealpha));
    for (l = 0; l < len; l++)
	symbolset[l] = toupper(symbolset[l]);
    
    inv_verbose = !!showfails;
    fp = ajFileGetFileptr(inf);
    
    init_rand();
    kT = (temperature+273.15)*1.98717/1000.0;
    
    istty = (isatty(fileno(stdout))&&isatty(fileno(stdin)));
    
    if (paramfile)
	read_parameter_file(paramfile);
    
    give_up = succeed;
    
    do {
      
	if ((line = get_line(fp))==NULL) break;

	/* read structure, skipping over comment lines */
	while ((*line=='*')||(*line=='\0')||(*line=='>'))
	{
	    free(line);
	    if ((line = get_line(fp))==NULL)
		break;
	} 
	/* stop at eof or '@' */
	if (line==NULL) break;
	if (strcmp(line, "@") == 0)
	{
	    free(line);
	    break;
	}

	structure = (char *) space(strlen(line)+1);
	/* scanf gets rid of trailing junk */
	(void) sscanf(line,"%s",structure);
	free(line);
      
	length = (int) strlen(structure);
	str2 = (char *) space((unsigned)length+1);

/* now look for a sequence to match the structure */

/*
	if ((line = get_line(fp))!=NULL)
	    if (strcmp(line, "@") == 0)
	    {
		free(line);
		break;
	    }
*/

	start = (char *) space((unsigned) length+1);
	if(seq)
	    (void) strncpy(start, ajSeqGetSeqC(seq), length);

	if (repeat!=0)
	    found = repeat;
	else
	    found = 1;
      
	initialize_fold(length);

	rstart = (char *) space((unsigned)length+1);
	while(found>0)
	{
	    char *string;
	    string = (char *) space((unsigned)length+1);
	    strcpy(string, start);
	    for (i=0; i<length; i++)
	    {
		/* lower case characters are kept fixed, any other character
		   not in symbolset is replaced by a random character */
		if (islower(string[i]))
		    continue;

		if (string[i]=='\0' || (strchr(symbolset,string[i])==NULL))
		    string[i]=symbolset[int_urn(0,strlen(symbolset)-1)];
	    }
	    strcpy(rstart, string);	/* remember start string */
	
	    if (mfe)
	    {
		energy = inverse_fold(string, structure);
		if( (!succeed) || (energy<=0.0) ) {
		    found--;
		    hd = hamming(rstart, string);
		    ajFmtPrintF(outf,"%s  %3d", string, hd);
		    if (energy>0)
		    {			/* no solution found */
			ajFmtPrintF(outf,"   d = %f\n", energy);
		    }
		    else
			ajFmtPrintF(outf,"\n");
		}
	    }

	    if (pf)
	    {
		if (!(mfe && give_up && (energy>0)))
		{
		    /* unless we gave up in the mfe part */
		    double prob, min_en, sfact=1.07;
	    
		    /* get a reasonable pf_scale */
		    min_en = fold(string,str2); 
		    pf_scale = exp(-(sfact*min_en)/kT/length);
		    init_pf_fold(length);
	    
		    energy = inverse_pf_fold(string, structure);
		    prob = exp(-energy/kT);
		    hd = hamming(rstart, string);
		    ajFmtPrintF(outf,"%s  %3d  (%f)\n", string, hd, prob);
		    free_pf_arrays();
		}
		if (!mfe)
		    found--;
	    }

	    free(string);
	}
	free(rstart);
	free_arrays();
      
	free(structure);
	free(str2);
	free(start);

    } while (1);

    ajSeqDel(&seq);
    ajStrDel(&eenergy);
    ajStrDel(&edangles);
    ajStrDel(&method);
    ajStrDel(&ealpha);

    ajFileClose(&inf);
    ajFileClose(&paramfile);
    ajFileClose(&outf);
    AJFREE(symbolset);

    embExit();
    return 0;
}
Ejemplo n.º 14
0
/*--------------------------------------------------------------------------*/
int main(int argc, char *argv[])
{
  char *string1=NULL, *string2=NULL, *temp, *line;
  char *structure=NULL, *cstruc=NULL;
  char  fname[53], my_contrib[10], *up_out;
  char  *ParamFile=NULL;
  char  *ns_bases=NULL, *c;
  int   i, length1,length2,length, l, sym, r;
  double energy, min_en;
  double kT, sfact=1.07;
  int   pf, istty;
  int noconv=0;
  double Zu, Zup;
  /* variables for output */
  pu_contrib *unstr_out, *unstr_short;
  FLT_OR_DBL **inter_out;
  char *title;
  /* commandline parameters */
  int w;       /* length of region of interaction */
  int incr3;   /* add x unpaired bases after 3'end of short RNA*/
  int incr5;   /* add x unpaired bases after 5'end of short RNA*/
  int  unstr;  /* length of unpaired region for output*/
  int  upmode; /* output mode for pf_unpaired and pf_up()*/
  upmode = 0;
  unstr = 4; 
  incr3=0;
  incr5=0;
  w=25;
  do_backtrack = 1;
  pf=1; /* partition function has to be calculated */
  length1=length2=0;
  up_out=NULL;
  title=NULL;
  unstr_out=NULL;
  inter_out=NULL;
  my_contrib[0] = 'S';
  my_contrib[1] = '\0';
  
  for (i=1; i<argc; i++) {
    if (argv[i][0]=='-') 
      switch ( argv[i][1] )
	{
	case 'T':  if (argv[i][2]!='\0') usage();
	  if(i==argc-1) usage();
	  r=sscanf(argv[++i], "%lf", &temperature);
	  if (!r) usage();
	  break;
	case 'w':
	  /* -w maximal length of unstructured region */  
	  if(i==argc-1) usage();
	  r=sscanf(argv[++i],"%d", &w);
	  if (!r) usage();
	  break;
	case 'n':
	  if ( strcmp(argv[i], "-noGU")==0) noGU=1;
	  if ( strcmp(argv[i], "-noCloseGU")==0) no_closingGU=1;
	  if ( strcmp(argv[i], "-noLP")==0) noLonelyPairs=1;
	  if ( strcmp(argv[i], "-nsp") ==0) {
	    if (i==argc-1) usage();
	    ns_bases = argv[++i];
	  }
	  if ( strcmp(argv[i], "-noconv")==0) noconv=1;
	  break;
	case '4':
	  tetra_loop=0;
	  break;
	case 'e':
	  if(i==argc-1) usage();
	  r=sscanf(argv[++i],"%d", &energy_set);
	  if (!r) usage();
	  break;
	case 'C':
	  fold_constrained=1;
	  break;
	case 'S':
	  if(i==argc-1) usage();
	  r=sscanf(argv[++i],"%lf", &sfact);
	  if (!r) usage();
	  break;
	case 'd': dangles=0;
	  if (argv[i][2]!='\0') {
	    r=sscanf(argv[i]+2, "%d", &dangles);
	    if (r!=1) usage();
	  }
	  break;
	case 'o': upmode=1;
	  /* output mode 0: non, 1:only pr_unpaired, 2: pr_unpaired + pr_up */
	  if (argv[i][2]!='\0') {
	    r=sscanf(argv[i]+2, "%d", &upmode);
	    if (r!=1) usage();
	  }
	  break;
	case 'u':
	  /* -u length of unstructured region in pr_unpaired output
	     makes only sense in combination with -o1 or -o2 */  
	  if(i==argc-1) usage();
	  r=sscanf(argv[++i],"%d", &unstr);
	  if (!r) usage();
	  break;
	  /* incr5 and incr3 are only for the longer (target) sequence */
	  /* increments w (length of the unpaired region) to incr5+w+incr3*/
	  /* the longer sequence is given in 5'(= position 1) to */
	  /* 3' (=position n) direction */
	  /* incr5 adds incr5 residues to the 5' end of w */
	case '5':
	  if(i==argc-1) usage();
	  r=sscanf(argv[++i],"%d", &incr5);
	  if (!r) usage();
	  break; 
	  /* incr3 adds incr3 residues to the 3' end of w */
	case '3':
	  if(i==argc-1) usage();
	  r=sscanf(argv[++i],"%d", &incr3);
	  if (!r) usage();
	  break;
	case 'P':
	  if (i==argc-1) usage();
	  ParamFile = argv[++i];
	  break;
	case 'x':  
	  if(i==argc-1) usage();
	  r=sscanf(argv[++i], "%s", my_contrib);
	  if (!r) usage();
	  break;  
	default: usage();
	} 
  }
  
  if (ParamFile != NULL)
    read_parameter_file(ParamFile);
   
  if (ns_bases != NULL) {
    nonstandards = space(33);
    c=ns_bases;
    i=sym=0;
    if (*c=='-') {
      sym=1; c++;
    }
    while (*c!='\0') {
      if (*c!=',') {
	nonstandards[i++]=*c++;
	nonstandards[i++]=*c;
	if ((sym)&&(*c!=*(c-1))) {
	  nonstandards[i++]=*c;
	  nonstandards[i++]=*(c-1);
	}
      }
      c++;
    }
  }
  istty = isatty(fileno(stdout))&&isatty(fileno(stdin));
  if ((fold_constrained)&&(istty)) {
    printf("Input constraints using the following notation:\n");
    printf("| : paired with another base\n");
    printf(". : no constraint at all\n");
    printf("x : base must not pair\n");
    printf("< : base i is paired with a base j<i\n");
    printf("> : base i is paired with a base j>i\n");
    printf("matching brackets ( ): base i pairs base j\n");
  } 
	
  do {				/* main loop: continue until end of file */
    cut_point=-1;
    if (istty) {
      printf("\nInput string (upper or lower case); @ to quit\n");
      printf("Use '&' to connect 2 sequences that shall form a complex.\n");
      printf("%s%s\n", scale1, scale2);
    }
    fname[0]='\0';
   
    if ((line = get_line(stdin))==NULL) break;

    /* skip comment lines and get filenames */
    while ((*line=='*')||(*line=='\0')||(*line=='>')) {
      if (*line=='>')
	
	(void) sscanf(line, ">%51s", fname);
      free(line);
      if ((line = get_line(stdin))==NULL) break;
    } 
    if ((line == NULL) || (strcmp(line, "@") == 0)) break;

    tokenize(line,&string1,&string2);
    
    if(upmode != 0){
      if(cut_point == -1 && upmode == 2) {
	  nrerror("only one sequence - can not cofold one sequence!");
      }
    } else {
      if(cut_point == -1){
	upmode=1;
      } else {
	upmode=2;
      }
    }
    
    if(string1 != NULL)
      length1 = (int) strlen(string1);
    if(string2 != NULL) 
      length2 = (int) strlen(string2);
    else
      length2=0;    

    /* write longer seq in string1 and and shorter one in string2 */ 
    if(length1 < length2)
      {
	length=length1; length1=length2; length2=length;
	
	temp=(char *) space(strlen(string1)+1);
	(void) sscanf(string1,"%s",temp);
	string1 = (char *) xrealloc (string1,sizeof(char)*length1+1);
	(void) sscanf(string2,"%s",string1);
	string2 = (char *) xrealloc(string2,sizeof(char)*length2+1);
	(void) sscanf(temp,"%s",string2);
	free(temp);
      }
   
    structure = (char *) space((unsigned) length1+1);
    if (fold_constrained) {
      cstruc = get_line(stdin);
      if (cstruc!=NULL) 
	strncpy(structure, cstruc, length1);
      else
	fprintf(stderr, "constraints missing\n");
    }
    for (l = 0; l < length1; l++) {
      string1[l] = toupper(string1[l]);
      if (!noconv && string1[l] == 'T') string1[l] = 'U';
    }
    for (l = 0; l < length2; l++) {
      string2[l] = toupper(string2[l]);
      if (!noconv && string2[l] == 'T') string2[l] = 'U';
    }

    if (istty)
      printf("length1 = %d\n", length1);
    
    /* initialize_fold(length); */
    update_fold_params();
    printf("\n%s", string1);
    min_en = fold(string1, structure);
    
    if (istty)
      {
	printf("\n minimum free energy = %6.2f kcal/mol\n", min_en);
      }
    else
      printf(" (%6.2f)\n", min_en);
    
    (void) fflush(stdout);
    
    /* parse cml parameters for the filename*/
    if(upmode > 0) {
      char wuadd[10];
      up_out = (char*) space(sizeof(char)*53);
      /* create the name of the output file */
      if(fname[0]!='\0' && up_out[0] =='\0' ){
	if(strlen(fname)< 30){
	  strcpy(up_out, fname);
	} else {  
	  strncpy(up_out, fname,30);
	}
      }
      else if(fname[0]=='\0' && up_out[0] == '\0'){
	char defaultn[10] = "RNA";
	sprintf(up_out,"%s",defaultn);
      }
	
      sprintf(wuadd,"%d",w);
      strcat(up_out, "_w");
      strcat(up_out, wuadd);
      strcat(up_out, "u");
      sprintf(wuadd,"%d",unstr);
      strcat(up_out, wuadd);
      strcat(up_out, "_up.out");
      printf("RNAup output in file: %s\n",up_out);
	    
      /* create the title for the output file */      
      if (title == NULL) {
	char wuadd[10];
	title = (char*) space(sizeof(char)*60);
	if(fname[0]!='\0'){
	  if(strlen(fname)< 30){
	    strcpy(title, fname);
	  } else {  
	    strncpy(title, fname,30);
	  }
	}
	else if (fname[0]=='\0'){
	  char defaultn[10]= "RNAup";
	  sprintf(title,"%s",defaultn);
	}
	sprintf(wuadd,"%d",unstr);
	strcat(title," u=");
	strcat(title, wuadd);
	sprintf(wuadd,"%d",w);
	strcat(title," w=");
	strcat(title, wuadd);
	sprintf(wuadd,"%d",length1);
	strcat(title," n=");
	strcat(title, wuadd);
      }
    } else {
      nrerror("no output format given: use [-o[1|2]] to select output format");
    }
    
    
    if (pf) {
      
      if (dangles==1) {
	dangles=2;   /* recompute with dangles as in pf_fold() */
	min_en = energy_of_struct(string1, structure);
	dangles=1;
      }
	 
      kT = (temperature+273.15)*1.98717/1000.; /* in Kcal */
      
      if(upmode != 0){
	int wplus;
	wplus=w+incr3+incr5;
	/* calculate prob. unstructured for the shorter seq */
	if(upmode == 3) {
	  min_en = fold(string2, structure);
	  pf_scale = exp(-(sfact*min_en)/kT/length2);
	  if (length2>2000) fprintf(stderr, "scaling factor %f\n", pf_scale);
	  init_pf_fold(length2);
	  if (cstruc!=NULL)
	    strncpy(structure, cstruc, length2+1);
	  energy = pf_fold(string2, structure);
	  if(wplus > length2){ wplus = length2;} /* for the shorter seq */
	  unstr_short = pf_unstru(string2, structure, wplus);
	  free_pf_unstru();
	  free_pf_arrays(); /* for arrays for pf_fold(...) */
	}

	/* calculate prob. unstructured for the longer seq */
	wplus=w+incr3+incr5; 
	min_en = fold(string1, structure);
	pf_scale = exp(-(sfact*min_en)/kT/length1);
	if (length1>2000) fprintf(stderr, "scaling factor %f\n", pf_scale);
	init_pf_fold(length1);
	if (cstruc!=NULL)
	  strncpy(structure, cstruc, length1+1);
	energy = pf_fold(string1, structure);
	unstr_out = pf_unstru(string1, structure, wplus);
	free_pf_unstru();
	free_pf_arrays(); /* for arrays for pf_fold(...) */
	/* calculate the interaction between the two sequences */
	if(upmode > 1 && cut_point > -1){
	  inter_out = pf_interact(string1,string2,unstr_out,w, incr3, incr5);
	  if(Up_plot(unstr_out,inter_out,length1,up_out,unstr,my_contrib)==0){
	    nrerror("Up_plot: no output values assigned");
	  }
	} else if(cut_point == -1 && upmode > 1) { /* no second seq given */
	  nrerror("only one sequence given - cannot cofold one sequence!");
	} else { /* plot only the results for prob unstructured */
	  if(Up_plot(unstr_out,NULL,length1,up_out,unstr,my_contrib)==0){
	    nrerror("Up_plot: no output values assigned");
	  }
	}	
      } else {
	nrerror("no output format given: use [-o[1|2]] to select output format");
      }
       
      if (do_backtrack) {
	printf("%s", structure);
	if (!istty) printf(" [%6.2f]\n", energy);
	else printf("\n");
      }
      if ((istty)||(!do_backtrack)) 
	printf(" free energy of ensemble = %6.2f kcal/mol\n", energy);
      energy = pf_fold(string1, structure);
      printf(" frequency of mfe structure in ensemble %g; "
	     "ensemble diversity %-6.2f\n", exp((energy-min_en)/kT),
	     mean_bp_dist(length1));
      free_pf_arrays();
    }
    if (cstruc!=NULL) free(cstruc);
    (void) fflush(stdout);
    if (string1!=NULL) free(string1);
    if (string2!=NULL) free(string2);
    free(structure);
    if(up_out != NULL) free(up_out);
    up_out=NULL;
    if(title != NULL) free(title);
    title=NULL;
    if(upmode == 1) free_pf_two(unstr_out,NULL);
    if(upmode > 1) free_pf_two(unstr_out,inter_out);
    if(upmode == 3)free_pf_two(unstr_short,NULL);
    free_arrays(); /* for arrays for fold(...) */
    
  } while (1);
  return 0;
}
Ejemplo n.º 15
0
void main()
{
   char *seq1="CGCAGGGAUACCCGCG", *seq2="GCGCCCAUAGGGACGC",
        *struct1,* struct2,* xstruc;
   float e1, e2, tree_dist, string_dist, profile_dist, kT;
   Tree *T1, *T2;
   swString *S1, *S2;
   float *pf1, *pf2;
   FLT_OR_DBL *bppm;
   /* fold at 30C instead of the default 37C */
   temperature = 30.;      /* must be set *before* initializing  */

   /* allocate memory for structure and fold */
   struct1 = (char* ) space(sizeof(char)*(strlen(seq1)+1));
   e1 =  fold(seq1, struct1);

   struct2 = (char* ) space(sizeof(char)*(strlen(seq2)+1));
   e2 =  fold(seq2, struct2);

   free_arrays();     /* free arrays used in fold() */

   /* produce tree and string representations for comparison */
   xstruc = expand_Full(struct1);
   T1 = make_tree(xstruc);
   S1 = Make_swString(xstruc);
   free(xstruc);

   xstruc = expand_Full(struct2);
   T2 = make_tree(xstruc);
   S2 = Make_swString(xstruc);
   free(xstruc);

   /* calculate tree edit distance and aligned structures with gaps */
   edit_backtrack = 1;
   tree_dist = tree_edit_distance(T1, T2);
   free_tree(T1); free_tree(T2);
   unexpand_aligned_F(aligned_line);
   printf("%s\n%s  %3.2f\n", aligned_line[0], aligned_line[1], tree_dist);

   /* same thing using string edit (alignment) distance */
   string_dist = string_edit_distance(S1, S2);
   free(S1); free(S2);
   printf("%s  mfe=%5.2f\n%s  mfe=%5.2f  dist=%3.2f\n",
          aligned_line[0], e1, aligned_line[1], e2, string_dist);

   /* for longer sequences one should also set a scaling factor for
      partition function folding, e.g: */
   kT = (temperature+273.15)*1.98717/1000.;  /* kT in kcal/mol */
   pf_scale = exp(-e1/kT/strlen(seq1));

   /* calculate partition function and base pair probabilities */
   e1 = pf_fold(seq1, struct1);
   /* get the base pair probability matrix for the previous run of pf_fold() */
   bppm = export_bppm();
   pf1 = Make_bp_profile_bppm(bppm, strlen(seq1));

   e2 = pf_fold(seq2, struct2);
   /* get the base pair probability matrix for the previous run of pf_fold() */
   bppm = export_bppm();
   pf2 = Make_bp_profile_bppm(bppm, strlen(seq2));

   free_pf_arrays();  /* free space allocated for pf_fold() */

   profile_dist = profile_edit_distance(pf1, pf2);
   printf("%s  free energy=%5.2f\n%s  free energy=%5.2f  dist=%3.2f\n",
          aligned_line[0], e1, aligned_line[1], e2, profile_dist);

   free_profile(pf1); free_profile(pf2);
}
Ejemplo n.º 16
0
void print_stats(FILE* statsfile, char* seq, char* struc, int length, int iteration, int count_df_evaluations, double D, double prev_D, double norm, int printPS) {

    plist *pl, *pl1,*pl2;
    char fname[100];
    char title[100];
    char* ss;
    double MEAgamma, mea, mea_en;
    char* output;
    int i,j;
    static char timestamp[40];
    const struct tm *tm;
    time_t now;

    ss = (char *) space((unsigned) length+1);
    memset(ss,'.',length);

    init_pf_fold(length);
    pf_fold_pb(seq, NULL);

    for (i = 1; i < length; i++) {
        for (j = i+1; j<= length; j++) {
            p_pp[i][j]=p_pp[j][i]=pr[iindx[i]-j];
        }
    }
    get_pair_prob_vector(p_pp, p_unpaired, length, 1);

    fprintf (stderr, "\nITERATION:   %i\n", iteration);
    fprintf(stderr,  "DISCREPANCY: %.4f\n", D);
    fprintf(stderr,  "NORM:        %.2f\n", norm);
    if (prev_D > -1.0) {
        fprintf(stderr,  "IMPROVEMENT: %.4f%%\n\n", (1-(D/prev_D))*100);
    }

    fprintf(statsfile, "%i\t%.4f\t%.4f\t%i\t", iteration, D, norm, count_df_evaluations);

    for (MEAgamma=1e-5; MEAgamma<1e+6; MEAgamma*=10 ) {
        pl = make_plist(length, 1e-4/(1+MEAgamma));
        mea = MEA(pl, ss, MEAgamma);
        mea_en = energy_of_struct(seq, ss);
        fprintf(statsfile,"%s,%.2e;", ss, MEAgamma);
        free(pl);
    }
    fprintf(statsfile, "\t");

    // Stochastic backtracking

    fprintf(stderr, "Sampling structures...\n");

    if (sample_structure) {

        char* best_structure;
        char* curr_structure;
        double x;

        double curr_energy = 0.0;
        double min_energy = +1.0;
        int curr_distance =  0;
        int min_distance = 999999;

        best_structure = (char *) space((unsigned) length+1);

        for (i=1; i<=10000; i++) {

            curr_structure = pbacktrack_pb(seq);
            curr_energy = energy_of_struct(seq, curr_structure);
            curr_distance = 0.0;

            //fprintf(stderr, "%s%.2f ", curr_structure, curr_energy);

            for (j = 1; j <= length; j++) {

                if (q_unpaired[j] > -0.5) {
                    x = (curr_structure[j-1] == '.') ? 1.0 : 0.0;
                    curr_distance += abs(x-q_unpaired[j]);
                }
            }

            if (curr_distance < min_distance) {
                min_distance = curr_distance;
                min_energy = curr_energy;
                strcpy(best_structure, curr_structure);
            }

            if (curr_distance == min_distance) {
                if (curr_energy < min_energy) {
                    min_energy = curr_energy;
                    strcpy(best_structure, curr_structure);
                }
            }

            //fprintf(stderr, "%i\n", curr_distance);
            free(curr_structure);
        }

        //fprintf(stderr, "\n%s %.2f %i\n", best_structure, min_energy, min_distance);
        fprintf(statsfile, "\t%s\t%.2f\t%i\t", best_structure, min_energy, min_distance);

    } else {
        fprintf(statsfile, "NA\tNA\tNA\t");
    }

    for (i = 1; i <= length; i++) {
        fprintf(statsfile, "%.4f", epsilon[i]);
        if (!(i==length)) {
            fprintf(statsfile, ",");
        }
    }

    now = time ( NULL );
    tm = localtime ( &now );

    strftime ( timestamp, 40, "%Y-%m-%d %X", tm );

    fprintf(statsfile, "\t%s\n", timestamp);

    /* Print dotplot only if not noPS is given and function call asks for it */
    if (!noPS && printPS) {

        /* Print dotplot */
        sprintf(fname,"%s/iteration%i.ps", psDir, iteration);
        pl1 = make_plist(length, 1e-5);

        if (struc != NULL) {
            pl2 = b2plist(struc);
        } else {
            pl2 = NULL;
        }
        sprintf(title,"Iteration %i, D = %.4f", iteration, D);
        (void) PS_dot_plot_list_epsilon(seq, fname, pl2, pl1, epsilon, title);
    }


    free_pf_arrays();


}
Ejemplo n.º 17
0
int main(int argc, char *argv[]) {

    struct        RNAfold_args_info args_info;
    char          *string, *input_string, *structure=NULL, *cstruc=NULL;
    char          fname[80], ffname[80], gfname[80], *ParamFile=NULL;
    char          *ns_bases=NULL, *c;
    int           i, j, ii, jj, mu, length, l, sym, r, pf=0, noconv=0;
    unsigned int  input_type;
    double        energy, min_en, kT, sfact=1.07;
    int           doMEA=0, circular = 0, N;
    char *pf_struc;
    double dist;
    plist *pl;

    FILE * filehandle;
    FILE * statsfile;
    char* line;

    double tau   = 0.01; /* Variance of energy parameters */
    double sigma = 0.01; /* Variance of experimental constraints */
    double *gradient;           /* Gradient for steepest descent search
                                 epsilon[i+1]= epsilon[i] - gradient *
                                 step_size */
    double initial_step_size = 0.5;  /* Initial step size for steepest
                                      descent search */
    double step_size;
    double D;                  /* Discrepancy (i.e. value of objective
                                function) for the current
                                prediction */
    int iteration, max_iteration = 2000; /* Current and maximum number of
                                         iterations after which
                                         algorithm stops */

    double precision = 0.1; /* cutoff used for stop conditions */
    double tolerance = 0.1;   /* Parameter used by various GSL minimizers */
    int method_id = 1;        /* Method to use for minimization, 0 and 1
                               are custom steepest descent, the rest
                               are GSL implementations (see below)*/

    int initial_guess_method = 0;

    int sample_N = 1000;

    double *prev_epsilon;
    double *prev_gradient;
    double DD, prev_D, sum, norm;
    int status;
    double* gradient_numeric;
    double* gradient_numeric_gsl;

    /* Minimizer vars */
    const gsl_multimin_fdfminimizer_type *T;
    gsl_multimin_fdfminimizer *minimizer;
    gsl_vector *minimizer_x;
    gsl_vector *minimizer_g;
    gsl_multimin_function_fdf minimizer_func;
    minimizer_pars_struct minimizer_pars;

    char *constraints;
    char outfile[256];
    char constraints_file[256];
    char epsilon_file[256];
    FILE* fh;

    double last_non_nan_lnQ;

    pf_overflow = 0;
    pf_underflow = 0;

    dangles=2;

    do_backtrack  = 1;
    string        = NULL;

    noPS = 0;
    outfile[0]='\0';
    epsilon_file[0]='\0';
    strcpy(psDir, "dotplots");

    if(RNAfold_cmdline_parser (argc, argv, &args_info) != 0) exit(1);

    /* RNAbpfold specific options */

    if (args_info.tau_given) tau = args_info.tau_arg;
    if (args_info.sigma_given) sigma = args_info.sigma_arg;
    if (args_info.precision_given) precision = args_info.precision_arg;
    if (args_info.step_given) initial_step_size = args_info.step_arg;
    if (args_info.maxN_given) max_iteration = args_info.maxN_arg;
    if (args_info.minimization_given) method_id = args_info.minimization_arg;
    if (args_info.init_given) initial_guess_method = args_info.init_arg;
    if (args_info.tolerance_given) tolerance = args_info.tolerance_arg;
    if (args_info.outfile_given) strcpy(outfile, args_info.outfile_arg);
    if (args_info.constraints_given) strcpy(constraints_file, args_info.constraints_arg);
    if (args_info.epsilon_given) strcpy(epsilon_file, args_info.epsilon_arg);
    if (args_info.sampleGradient_given) sample_conditionals=1;
    if (args_info.hybridGradient_given) {
        sample_conditionals=1;
        hybrid_conditionals=1;
    }
    if (args_info.numericalGradient_given) numerical=1;
    if (args_info.sampleStructure_given) sample_structure=1;
    if (args_info.psDir_given) strcpy(psDir, args_info.psDir_arg);
    if (args_info.sparsePS_given) sparsePS=args_info.sparsePS_arg;
    if (args_info.gridSearch_given) grid_search = 1;


    /* Generic RNAfold options */

    if (args_info.temp_given)        temperature = args_info.temp_arg;
    if (args_info.reference_given)  fold_constrained=1;
    if (args_info.noTetra_given)     tetra_loop=0;
    if (args_info.dangles_given)     dangles = args_info.dangles_arg;
    if (args_info.noLP_given)        noLonelyPairs = 1;
    if (args_info.noGU_given)        noGU = 1;
    if (args_info.noClosingGU_given) no_closingGU = 1;
    if (args_info.noconv_given)      noconv = 1;
    if (args_info.energyModel_given) energy_set = args_info.energyModel_arg;
    if (args_info.paramFile_given)   ParamFile = strdup(args_info.paramFile_arg);
    if (args_info.nsp_given)         ns_bases = strdup(args_info.nsp_arg);
    if (args_info.pfScale_given)     sfact = args_info.pfScale_arg;
    if (args_info.noPS_given)        noPS=1;



    /* Create postscript directory */
    if (!noPS) {
        struct stat stat_p;
        if (stat (psDir, &stat_p) != 0) {
            if (mkdir(psDir, S_IRWXU|S_IROTH|S_IRGRP ) !=0) {
                fprintf(stderr, "WARNING: Could not create directory: %s", psDir);
            }
        }
    }

    if (ParamFile != NULL) {
        read_parameter_file(ParamFile);
    }

    if (ns_bases != NULL) {
        nonstandards = space(33);
        c=ns_bases;
        i=sym=0;
        if (*c=='-') {
            sym=1;
            c++;
        }
        while (*c!='\0') {
            if (*c!=',') {
                nonstandards[i++]=*c++;
                nonstandards[i++]=*c;
                if ((sym)&&(*c!=*(c-1))) {
                    nonstandards[i++]=*c;
                    nonstandards[i++]=*(c-1);
                }
            }
            c++;
        }
    }

    /*Read sequence*/
    fname[0] = '\0';
    while((input_type = get_input_line(&input_string, 0)) & VRNA_INPUT_FASTA_HEADER) {
        (void) sscanf(input_string, "%42s", fname);
        free(input_string);
    }

    length = (int)    strlen(input_string);
    string = strdup(input_string);
    free(input_string);
    structure = (char *) space((unsigned) length+1);

    /* For testing purpose pass dot bracket structure of reference structure via -C */
    if (fold_constrained) {
        input_type = get_input_line(&input_string, VRNA_INPUT_NOSKIP_COMMENTS);
        if(input_type & VRNA_INPUT_QUIT) {
            exit(1);
        }
        else if((input_type & VRNA_INPUT_MISC) && (strlen(input_string) > 0)) {
            cstruc = strdup(input_string);
            free(input_string);
        }
        else warn_user("-C was given but reference structure is missing");
    }

    if(noconv) {
        str_RNA2RNA(string);
    } else {
        str_DNA2RNA(string);
    }

    /* Allocating space */

    epsilon =     (double *) space(sizeof(double)*(length+1));

    exp_pert =  (double **)space(sizeof(double *)*(length+1));
    perturbations =  (double **)space(sizeof(double *)*(length+1));
    prev_epsilon = (double *) space(sizeof(double)*(length+1));
    gradient =    (double *) space(sizeof(double)*(length+1));
    gradient_numeric =    (double *) space(sizeof(double)*(length+1));
    gradient_numeric_gsl =    (double *) space(sizeof(double)*(length+1));
    prev_gradient = (double *) space(sizeof(double)*(length+1));

    q_unpaired = (double *) space(sizeof(double)*(length+1));
    p_unpaired_cond = (double **)space(sizeof(double *)*(length+1));
    p_unpaired_cond_sampled = (double **)space(sizeof(double *)*(length+1));
    p_pp =  (double **)space(sizeof(double *)*(length+1));
    p_unpaired =  (double *) space(sizeof(double)*(length+1));
    p_unpaired_tmp = (double *) space(sizeof(double)*(length+1));

    for (i=0; i <= length; i++) {
        epsilon[i] = gradient[i] = q_unpaired[i] = 0.0;
        p_unpaired_cond[i] = (double *) space(sizeof(double)*(length+1));
        p_unpaired_cond_sampled[i] = (double *) space(sizeof(double)*(length+1));
        p_pp[i] = (double *) space(sizeof(double)*(length+1));
        exp_pert[i] = (double *) space(sizeof(double)*(length+1));
        perturbations[i] = (double *) space(sizeof(double)*(length+1));
        for (j=0; j <= length; j++) {
            p_pp[i][j]=p_unpaired_cond[i][j] = 0.0;
            p_unpaired_cond_sampled[i][j] = 0.0;
        }
    }


    /*** If file with perturbation vector epsilon is given we fold using
         this epsilon and are done ***/

    if (args_info.epsilon_given) {
        plist *pl, *pl1,*pl2;

        filehandle = fopen (epsilon_file,"r");

        if (filehandle == NULL) {
            nrerror("Could not open file with perturbation vector.");
        }

        i=1;
        while (1) {
            double t;
            line = get_line(filehandle);
            if (line == NULL) break;
            if (i>length) nrerror("Too many values in perturbation vector file.");
            if (sscanf(line, "%lf", &epsilon[i]) !=1) {
                nrerror("Error while reading perturbation vector file.");
            }
            i++;
        }

        if (i-1 != length) {
            nrerror("Too few values in perturbation vector file.");
        }

        init_pf_fold(length);
        pf_fold_pb(string, NULL);

        sprintf(fname,"%s/dot.ps", psDir);
        pl1 = make_plist(length, 1e-5);

        (void) PS_dot_plot_list_epsilon(string, fname, NULL, pl1, epsilon, "");

        exit(0);
    }



    /*** Get constraints from reference structure or from external file ***/

    /* Structure was given by -C */
    if (fold_constrained) {
        for (i=0; i<length; i++) {
            if (cstruc[i] == '(' || cstruc[i] == ')') {
                q_unpaired[i+1] = 0.0;
            } else {
                q_unpaired[i+1] = 1.0;
            }
        }

        /*Read constraints from file*/
    } else {

        filehandle = fopen (constraints_file,"r");

        if (filehandle == NULL) {
            nrerror("No constraints given as dot bracket or wrong file name");
        }

        i=1;
        while (1) {
            double t;
            line = get_line(filehandle);
            if (line == NULL) break;
            if (i>length) nrerror("Too many values in constraints.dat");
            if (sscanf(line, "%lf", &q_unpaired[i]) !=1) {
                nrerror("Error while reading constraints.dat");
            }
            i++;
        }

        if (i-1 != length) {
            nrerror("Too few values in constraints.dat");
        }
    }

    /* Create file handle */
    if (outfile[0] !='\0') {
        statsfile = fopen (outfile,"w");
    } else {
        statsfile = fopen ("stats.dat","w");
    }

    setvbuf(statsfile, NULL, _IONBF, 0);

    if (!grid_search) {
        fprintf(statsfile, "Iteration\tDiscrepancy\tNorm\tdfCount\tMEA\tSampled_structure\tSampled_energy\tSampled_distance\tEpsilon\ttimestamp\n");
    } else {
        /* If we do a grid search we have a different output. */
        fprintf(statsfile, "Dummy\tm\tb\tdummy\tMEA\tepsilon\n");
    }

    if (statsfile == NULL) {
        nrerror("Could not open stats.dat for writing.");
    }

    fprintf(stderr, "tau^2 = %.4f; sigma^2 = %.4f; precision = %.4f; tolerance = %.4f; step-size: %.4f\n\n",
            tau, sigma, precision, tolerance, initial_step_size);

    st_back=1;
    min_en = fold(string, structure);

    (void) fflush(stdout);

    if (length>2000) free_arrays();

    pf_struc = (char *) space((unsigned) length+1);

    kT = (temperature+273.15)*1.98717/1000.; /* in Kcal */
    pf_scale = exp(-(sfact*min_en)/kT/length);

    /* Set up minimizer */

    minimizer_x = gsl_vector_alloc (length+1);
    minimizer_g = gsl_vector_alloc (length+1);

    for (i=0; i <= length; i++) {
        epsilon[i] = 0.0;
        gsl_vector_set (minimizer_g, i, 0.0);
        gsl_vector_set (minimizer_x, i, epsilon[i]);
    }

    minimizer_pars.length=length;
    minimizer_pars.seq = string;
    minimizer_pars.tau=tau;
    minimizer_pars.sigma=sigma;
    minimizer_pars.kT=kT;

    minimizer_func.n = length+1;
    minimizer_func.f = calculate_f;
    minimizer_func.df = numerical ? calculate_df_numerically: calculate_df;
    minimizer_func.fdf = calculate_fdf;
    minimizer_func.params = &minimizer_pars;


    //min_en = fold_pb(string, structure);
    //fprintf(stderr, "%f", min_en);
    //exit(0);

    /* Calling test functions for debugging */
    for (i=1; i <= length; i++) {
        if (i%2==0) {
            epsilon[i] = +0.2*i;
        } else {
            epsilon[i] = -0.2*i;
        }
    }

    //test_folding(string, length);
    /* //test_stochastic_backtracking(string, length); */
    /* //test_gradient(minimizer_func, minimizer_pars); */
    /* //test_gradient_sampling(minimizer_func, minimizer_pars); */
    //exit(1);


    count_df_evaluations=0;

    /* Initial guess for epsilon */

    if (initial_guess_method !=0 && initial_guess_method !=3) {

        /* Vars for inital guess methods */
        double m,b;
        double* curr_epsilon;
        double* best_epsilon;
        double best_m, best_b, best_scale;
        double curr_D;
        double min_D = 999999999.0;
        double inc = +0.25;
        double cut;

        if (initial_guess_method == 1) fprintf(stderr, "Mathew's constant perturbations\n");
        if (initial_guess_method == 2) fprintf(stderr, "Perturbations proportional to q-p\n");

        curr_epsilon = (double *) space(sizeof(double)*(length+1));
        best_epsilon = (double *) space(sizeof(double)*(length+1));

        last_non_nan_lnQ = min_en;

        // Calculate p_unpaired for unperturbed state which we need later
        // for the proportinal method
        if (initial_guess_method == 2) {

            init_pf_fold(length);

            for (i=0; i <= length; i++) {
                epsilon[i] = 0.0;
            }

            pf_fold_pb(string, NULL);
            for (i = 1; i < length; i++) {
                for (j = i+1; j<= length; j++) {
                    p_pp[i][j]=p_pp[j][i]=pr[iindx[i]-j];
                }
            }
            get_pair_prob_vector(p_pp, p_unpaired_tmp, length, 1);
            free_pf_arrays();
        }

        /* We do the same grid search as in the Mathews paper Fig. 4*/
        for (m=0.25; m <=7.0; m+=0.25) {

            // Weird way of writing this inner loop for the grid search. We
            // traverse the grid without big jumps in the parameters to make
            // sure that the updated scaling factor is accurate all the time.
            inc*=-1;

            for (b = inc < 0.0 ? 0.0 : -3.0; inc < 0.0 ? b >= -3.0 : b<= 0.0 ; b+=inc) {

                // calculate cut point with x-axis and skip parameter pairs
                // which give a cut point outside the range of
                // q_unpaired (0 to 1). They gave frequently overflows and the
                // idea is that we both want positive and negative perturbations
                cut = exp( (-1) * b / m ) - 1;

                fprintf(stderr, "\nm = %.2f, b = %.2f, cut=%.2f\n", m, b, cut);

                if (cut > 1.0 || cut < 0.01) {
                    fprintf(stderr, "\nSkipping m = %.2f, b = %.2f\n", m, b);
                    continue;
                }

                /* Mathew's constant perturbations */
                if (initial_guess_method == 1) {
                    for (i=0; i <= length; i++) {

                        /* We add epsilon to unpaired regions (as opposed to
                           paired regions as in the Mathews paper) so we multiply
                           by -1; if missing data we set it to 0.0 */

                        if (q_unpaired[i] < -0.5) {
                            curr_epsilon[i] = 0.0;
                        } else {
                            curr_epsilon[i] = (m *(log(q_unpaired[i]+1))+b) *(-1);
                        }

                        gsl_vector_set (minimizer_x, i, curr_epsilon[i]);
                    }
                    /* Perturbations proportional to q-p */
                } else {

                    for (i=0; i <= length; i++) {
                        curr_epsilon[i] = (m *(log(q_unpaired[i]+1)-log(p_unpaired_tmp[i]+1))+ b ) * (-1);
                        gsl_vector_set (minimizer_x, i, curr_epsilon[i]);
                    }
                }

                // Repeat and adjust scaling factor until we get result without over-/underflows
                do {

                    // First we use default scaling factor
                    if (pf_underflow == 0 && pf_overflow == 0) {
                        sfact = 1.070;
                    }

                    if (pf_underflow) {
                        sfact *= 0.8;
                        fprintf(stderr,"Underflow, adjusting sfact to %.4f\n", sfact );
                    }

                    if (pf_overflow) {
                        sfact *= 1.2;
                        fprintf(stderr,"Overflow, adjusting sfact to %.4f\n", sfact );
                    }

                    pf_scale = exp(-(sfact*last_non_nan_lnQ)/kT/length);

                    //fprintf(stderr,"Scaling factor is now: %.4e\n", pf_scale);

                    curr_D = calculate_f(minimizer_x, (void*)&minimizer_pars);

                    if (!isnan(last_lnQ)) last_non_nan_lnQ = last_lnQ;

                    // Give up when even extreme scaling does not give results
                    // (for some reason I could not get rid of overflows even with high scaling factors)
                    if (sfact < 0.1 || sfact > 2.0) break;

                } while (pf_underflow == 1 || pf_overflow == 1);

                // We have not given up so everything is ok now
                if (!(sfact < 0.1 || sfact > 2.0)) {

                    if (curr_D < min_D) {
                        min_D = curr_D;
                        for (i=0; i <= length; i++) {
                            best_epsilon[i] = curr_epsilon[i];
                        }
                        best_m = m;
                        best_b = b;
                        best_scale = pf_scale;
                    }

                    /*If we are interested in the grid search we misuse the
                      print_stats function and report m and b together with MEA*/
                    if (grid_search) {
                        for (i=0; i <= length; i++) {
                            epsilon[i] = curr_epsilon[i];
                        }
                        print_stats(statsfile, string, cstruc, length, 0, 0, m, 0.0, b, 0);
                    }

                    fprintf(stderr, "curr D: %.2f, minimum D: %.2f\n", curr_D, min_D);

                    // Adjust pf_scale with default scaling factor but lnQ from
                    // previous step
                    sfact = 1.070;
                    pf_scale = exp(-(sfact*last_lnQ)/kT/length);

                } else {
                    sfact = 1.070;
                    fprintf(stderr, "Skipping m = %.2f, b = %.2f; did not get stable result.\n", m, b);
                }
            } // for b
        } // for m

        fprintf(stderr, "Minimum found: m=%.2f, b=%.2f: %.2f\n", best_m, best_b, min_D);

        for (i=0; i <= length; i++) {
            epsilon[i] = best_epsilon[i];
            gsl_vector_set (minimizer_x, i, best_epsilon[i]);
        }
        pf_scale = best_scale;
    }

    if (initial_guess_method == 3) {
        srand((unsigned)time(0));
        for (i=0; i <= length; i++) {
            double r = (double)rand()/(double)RAND_MAX * 4 - 2;
            epsilon[i] = r;
            gsl_vector_set (minimizer_x, i, epsilon[i]);
        }
    }

    /* If we just wanted a grid search we are done now. */
    if (grid_search) {
        exit(0);
    }

    prev_D = calculate_f(minimizer_x, (void*)&minimizer_pars);

    print_stats(statsfile, string, cstruc, length, 0 , count_df_evaluations , prev_D, -1.0, 0.0,1);

    /* GSL minimization */

    if (method_id !=0) {

        if (method_id > 2) {
            char name[100];
            // Available algorithms
            //  3  gsl_multimin_fdfminimizer_conjugate_fr
            //  4  gsl_multimin_fdfminimizer_conjugate_pr
            //  5  gsl_multimin_fdfminimizer_vector_bfgs
            //  6  gsl_multimin_fdfminimizer_vector_bfgs2
            //  7  gsl_multimin_fdfminimizer_steepest_descent

            //   http://www.gnu.org/software/gsl/manual/html_node/Multimin-Algorithms-with-Derivatives.html

            switch (method_id) {
            case 2:
                minimizer = gsl_multimin_fdfminimizer_alloc (gsl_multimin_fdfminimizer_conjugate_fr, length+1);
                strcpy(name, "Fletcher-Reeves conjugate gradient");
                break;
            case 3:
                minimizer = gsl_multimin_fdfminimizer_alloc (gsl_multimin_fdfminimizer_conjugate_pr, length+1);
                strcpy(name, "Polak-Ribiere conjugate gradient");
                break;
            case 4:
                minimizer = gsl_multimin_fdfminimizer_alloc ( gsl_multimin_fdfminimizer_vector_bfgs, length+1);
                strcpy(name, "Broyden-Fletcher-Goldfarb-Shanno");
                break;
            case 5:
                minimizer = gsl_multimin_fdfminimizer_alloc ( gsl_multimin_fdfminimizer_vector_bfgs2, length+1);
                strcpy(name, "Broyden-Fletcher-Goldfarb-Shanno (improved version)");
                break;
            case 6:
                minimizer = gsl_multimin_fdfminimizer_alloc (gsl_multimin_fdfminimizer_steepest_descent, length+1);
                strcpy(name, "Gradient descent (GSL implmementation)");
                break;
            }

            fprintf(stderr, "Starting minimization via GSL implementation of %s...\n\n", name);

            // The last two parmeters are step size and tolerance (with
            // different meaning for different algorithms

            gsl_multimin_fdfminimizer_set (minimizer, &minimizer_func, minimizer_x, initial_step_size, tolerance);

            iteration = 1;

            do {

                status = gsl_multimin_fdfminimizer_iterate (minimizer);
                D = minimizer->f;
                norm = gsl_blas_dnrm2(minimizer->gradient);

                print_stats(statsfile, string, cstruc, length,iteration, count_df_evaluations, D, prev_D, norm, iteration%sparsePS == 0);

                prev_D = D;

                if (status) {
                    fprintf(stderr, "An unexpected error has occured in the iteration (status:%i)\n", status);
                    break;
                }

                status = gsl_multimin_test_gradient (minimizer->gradient, precision);
                if (status == GSL_SUCCESS) fprintf(stderr, "Minimum found stopping.\n");

                iteration++;

            } while (status == GSL_CONTINUE && iteration < max_iteration);

            gsl_multimin_fdfminimizer_free (minimizer);
            gsl_vector_free (minimizer_x);

            /* Custom implementation of steepest descent */
        } else {

            if (method_id == 1) {
                fprintf(stderr, "Starting custom implemented steepest descent search...\n\n");
            } else {
                fprintf(stderr, "Starting custom implemented steepest descent search with Barzilai Borwein step size...\n\n");
            }

            iteration = 0;
            D = 0.0;

            while (iteration++ < max_iteration) {

                for (i=1; i <= length; i++) {
                    gsl_vector_set (minimizer_x, i, epsilon[i]);
                }

                D = calculate_f(minimizer_x, (void*)&minimizer_pars);

                if (numerical) {
                    calculate_df_numerically(minimizer_x, (void*)&minimizer_pars, minimizer_g);
                } else {
                    calculate_df(minimizer_x, (void*)&minimizer_pars, minimizer_g);
                }

                for (i=1; i <= length; i++) {
                    gradient[i] = gsl_vector_get (minimizer_g, i);
                }

                // Do line search

                fprintf(stderr, "\nLine search:\n");

                // After the first iteration, use Barzilai-Borwain (1988) step size (currently turned off)
                if (iteration>1 && method_id==2) {

                    double denominator=0.0;
                    double numerator=0.0;

                    for (i=1; i <= length; i++) {
                        numerator += (epsilon[i]-prev_epsilon[i]) * (gradient[i]-prev_gradient[i]);
                        denominator+=(gradient[i]-prev_gradient[i]) * (gradient[i]-prev_gradient[i]);
                    }

                    step_size = numerator / denominator;

                    norm =1.0;
                } else {
                    // Use step sized given by the user (normalize it first)
                    step_size = initial_step_size / calculate_norm(gradient, length);
                }

                for (i=1; i <= length; i++) {
                    prev_epsilon[i] = epsilon[i];
                    prev_gradient[i] = gradient[i];
                }

                do {

                    for (mu=1; mu <= length; mu++) {
                        epsilon[mu] = prev_epsilon[mu] - step_size * gradient[mu];
                    }

                    for (i=1; i <= length; i++) {
                        gsl_vector_set (minimizer_x, i, epsilon[i]);
                    }

                    DD = calculate_f(minimizer_x, (void*)&minimizer_pars);

                    if (step_size > 0.0001) {
                        fprintf(stderr, "Old D: %.4f; New D: %.4f; Step size: %.4f\n", D, DD, step_size);
                    } else {
                        fprintf(stderr, "Old D: %.4f; New D: %.4f; Step size: %.4e\n", D, DD, step_size);
                    }

                    step_size /= 2;
                } while (step_size > 1e-12 && DD > D);

                norm = calculate_norm(gradient,length);

                if (DD > D) {
                    fprintf(stderr, "Line search did not improve D in iteration %i. Stop.\n", iteration);

                    if (hybrid_conditionals) {
                        sample_conditionals=0;
                    } else {
                        break;
                    }
                }

                print_stats(statsfile, string, cstruc, length,iteration, count_df_evaluations, DD, prev_D, norm, iteration%sparsePS == 0);

                if (norm<precision && iteration>1) {
                    fprintf(stderr, "Minimum found stopping.\n");
                    break;
                }

                prev_D = DD;

            }
        }

        /* Force last dotplot to be printed */
        print_stats(statsfile, string, cstruc, length,iteration, count_df_evaluations, DD, prev_D, norm, 1);
    }

    free(pf_struc);
    if (cstruc!=NULL) free(cstruc);
    (void) fflush(stdout);
    free(string);
    free(structure);
    RNAfold_cmdline_parser_free (&args_info);


    return 0;
}
Ejemplo n.º 18
0
int main(int argc, char *argv[]){
  struct  RNAinverse_args_info args_info;
  int     input_type;
  char    *input_string, *start, *structure, *rstart, *str2, *line;
  char    *ParamFile=NULL, *c, *ns_bases;
  int     i,j, length, l, hd, sym;
  double  energy=0., kT;
  int     pf, mfe, istty;
  int     repeat, found;

  do_backtrack = 0; pf = 0; mfe = 1;
  repeat = 0;
  input_type = 0;
  input_string = ns_bases = NULL;
  init_rand();

  /*
  #############################################
  # check the command line parameters
  #############################################
  */
  if(RNAinverse_cmdline_parser (argc, argv, &args_info) != 0) exit(1);
  /* temperature */
  if(args_info.temp_given)        temperature = args_info.temp_arg;
  /* do not take special tetra loop energies into account */
  if(args_info.noTetra_given)     tetra_loop=0;
  /* set dangle model */
  if(args_info.dangles_given)     dangles = args_info.dangles_arg;
  /* do not allow wobble pairs (GU) */
  if(args_info.noGU_given)        noGU = 1;
  /* do not allow weak closing pairs (AU,GU) */
  if(args_info.noClosingGU_given) no_closingGU = 1;
  /* set energy model */
  if(args_info.energyModel_given) energy_set = args_info.energyModel_arg;
  /* take another energy parameter set */
  if(args_info.paramFile_given)   ParamFile = strdup(args_info.paramFile_arg);
  /* Allow other pairs in addition to the usual AU,GC,and GU pairs */
  if(args_info.nsp_given)         ns_bases = strdup(args_info.nsp_arg);
  /* alter the alphabet */
  if(args_info.alphabet_given){
    symbolset=args_info.alphabet_arg;
    /* symbolset should only have uppercase characters */
    for (l = 0; l < (int)strlen(symbolset); l++)
      symbolset[l] = toupper(symbolset[l]);
  }
  /* set function for optimization */
  if(args_info.function_given){
    if(strlen(args_info.function_arg) > 2){
      RNAinverse_cmdline_parser_print_help(); exit(EXIT_FAILURE);
    }
    else{
      if((*args_info.function_arg == 'm') || (*(args_info.function_arg+1) == 'm')) mfe = 1;
      if((*args_info.function_arg == 'p') || (*(args_info.function_arg+1) == 'p')) pf = 1;
    }
  }
  /* set repeat */
  if(args_info.repeat_given)      repeat = args_info.repeat_arg;
  /* set final cost */
  if(args_info.final_given)       final_cost = args_info.final_arg;
  /* do we wannabe verbose */
  if(args_info.verbose_given)     inv_verbose = 1;

  /* free allocated memory of command line data structure */
  RNAinverse_cmdline_parser_free (&args_info);

  kT = (temperature+273.15)*1.98717/1000.0;

  istty = (isatty(fileno(stdout))&&isatty(fileno(stdin)));

  if (ParamFile!=NULL)
    read_parameter_file(ParamFile);

  give_up = (repeat<0);

  do {
    /*
    ########################################################
    # handle user input from 'stdin'
    ########################################################
    */
    if(istty)
      print_tty_input_seq_str("Input structure & start string\n"
                              "(lower case letters for const positions) and 0 or empty line for random start string\n");

    input_type = get_multi_input_line(&input_string, 0);
    /* we are waiting for a structure (i.e. something like a constraint) so we skip all sequences, fasta-headers and misc lines */
    while(input_type & (VRNA_INPUT_SEQUENCE | VRNA_INPUT_MISC | VRNA_INPUT_FASTA_HEADER)){
      if(!istty && (input_type & VRNA_INPUT_FASTA_HEADER)) printf(">%s\n", input_string);
      free(input_string); input_string = NULL;
      input_type = get_multi_input_line(&input_string, 0);
    }
    if(input_type & (VRNA_INPUT_QUIT | VRNA_INPUT_ERROR)) break;

    if(input_type & (VRNA_INPUT_CONSTRAINT)){
      structure = (char *)space(sizeof(char) * (strlen(input_string) + 1));
      (void)sscanf(input_string, "%s", structure); /* scanf gets rid of trailing junk */
      length = (int)strlen(structure);
      free(input_string); input_string = NULL;
      input_type = get_multi_input_line(&input_string, VRNA_INPUT_NOSKIP_BLANK_LINES | VRNA_INPUT_NOSKIP_COMMENTS);
    }
    if(input_type & VRNA_INPUT_QUIT) break;

    start = (char *)space(sizeof(char) * (length+1));
    /* now we assume to get a sequence (input_string may be empty as well) */
    if(input_type & VRNA_INPUT_SEQUENCE){
      (void)strncpy(start, input_string, length);
      start[length] = '\0';
      free(input_string); input_string = NULL;
    }
    /* fallback to empty start sequence */
    else start[0] = '\0';

    /*
    ########################################################
    # done with 'stdin' handling
    ########################################################
    */

    if (ns_bases != NULL) {
      nonstandards = space(33);
      c=ns_bases;
      i=sym=0;
      if (*c=='-') {
        sym=1; c++;
      }
      while (*c!='\0') {
        if (*c!=',') {
          nonstandards[i++]=*c++;
          nonstandards[i++]=*c;
          if ((sym)&&(*c!=*(c-1))) {
            nonstandards[i++]=*c;
            nonstandards[i++]=*(c-1);
          }
        }
        c++;
      }
    }

    str2 = (char *) space((unsigned)length+1);
    if (istty) printf("length = %d\n", length);

    if (repeat!=0) found = (repeat>0)? repeat : (-repeat);
    else found = 1;

    /* initialize_fold(length); <- obsolete (hopefully commenting this out does not affect anything crucial ;) */

    rstart = (char *) space((unsigned)length+1);
    while(found>0) {
      char *string;
      string = (char *) space((unsigned)length+1);
      strcpy(string, start);
      for (i=0; i<length; i++) {
        /* lower case characters are kept fixed, any other character
           not in symbolset is replaced by a random character */
        if (islower(string[i])) continue;

        if (string[i]=='\0' || (strchr(symbolset,string[i])==NULL))
          string[i]=symbolset[int_urn(0,strlen(symbolset)-1)];
      }
      strcpy(rstart, string); /* remember start string */

      if (mfe) {
        energy = inverse_fold(string, structure);
        if( (repeat>=0) || (energy<=0.0) ) {
          found--;
          hd = hamming(rstart, string);
          printf("%s  %3d", string, hd);
          if (energy>0) { /* no solution found */
            printf("   d= %g\n", energy);
            if(istty) {
              energy = fold(string,str2);
              printf("%s\n", str2);
            }
          } else printf("\n");
        }
      }
      if (pf) {
        if (!(mfe && give_up && (energy>0))) {
          /* unless we gave up in the mfe part */
          double prob, min_en, sfact=1.07;

          /* get a reasonable pf_scale */
          min_en = fold(string,str2);
          pf_scale = exp(-(sfact*min_en)/kT/length);
          /* init_pf_fold(length); <- obsolete (hopefully commenting this out does not affect anything crucial ;) */

          energy = inverse_pf_fold(string, structure);
          prob = exp(-energy/kT);
          hd = hamming(rstart, string);
          printf("%s  %3d  (%g)\n", string, hd, prob);
          free_pf_arrays();
        }
        if (!mfe) found--;
      }
      (void) fflush(stdout);
      free(string);
    }
    free(rstart);
    free_arrays();

    free(structure);
    free(str2);
    free(start);
    (void) fflush(stdout);
  } while (1);
  return 0;
}
Ejemplo n.º 19
0
/* Calculate the gradient analytically */
void calculate_df (const gsl_vector *v, void *params, gsl_vector *df) {

    double D, sum;
    int ii,jj,i,j,mu, length, N;
    minimizer_pars_struct *pars = (minimizer_pars_struct *)params;
    char *constraints;

    int* unpaired_count;
    int** unpaired_count_cond;

    double q_tmp;
    double sigma_tmp;

    length = pars->length;

    count_df_evaluations++;

    fprintf(stderr, "=> Evaluating gradient (analytical, %s)...\n",sample_conditionals == 1 ? "sampled conditionals" : "exact conditionals");

    constraints = (char *) space((unsigned) length+1);
    for (i=0; i <= length; i++) {
        epsilon[i] = gsl_vector_get(v, i);
        p_unpaired[i] = 0.0;
        for (j=0; j <= length; j++) {
            p_pp[i][j] = p_pp[j][i] = 0.0;
            p_unpaired_cond[i][j] = 0.0;
            p_unpaired_cond_sampled[i][j] = 0.0;
        }
    }

    init_pf_fold(length);
    pf_fold_pb(pars->seq, NULL);

    for (i=1; i<length; i++) {
        for (j=i+1; j<=length; j++) {
            p_pp[i][j]=p_pp[j][i]=pr[iindx[i]-j];
        }
    }
    get_pair_prob_vector(p_pp, p_unpaired, length, 1);

    free_pf_arrays();


    if (!sample_conditionals) {
        // Calculate conditional probabilities

        fold_constrained=1;

        for (ii = 1; ii <= length; ii++) {

            // Set constraints strings like
            //   x............
            //   .x...........
            //   ..x..........

            memset(constraints,'.',length);
            constraints[ii-1]='x';

            fprintf(stderr, ".");

            init_pf_fold(length);
            pf_fold_pb(pars->seq, constraints);

            for (i=1; i<length; i++) {
                for (j=i+1; j<=length; j++) {
                    p_pp[i][j]=p_pp[j][i]=pr[iindx[i]-j];
                }
            }
            get_pair_prob_vector(p_pp, p_unpaired_cond[ii], length, 1);
            free_pf_arrays();
        }

        fprintf(stderr, "\n");

        fold_constrained = 0;

        // Sample gradient with stochastic backtracking
    } else {

        unpaired_count = (int *) space(sizeof(int)*(length+1));
        unpaired_count_cond = (int **)space(sizeof(int *)*(length+1));

        for (i=0; i <= length; i++) {
            unpaired_count[i] = 0;
            unpaired_count_cond[i] = (int *) space(sizeof(int)*(length+1));
            for (j=0; j <= length; j++) {
                unpaired_count_cond[i][j] = 0;
            }
        }

        fold_constrained = 0;
        init_pf_fold(length);

        pf_fold_pb(pars->seq, NULL);

        N=10000;

        for (i=1; i<=N; i++) {
            char *s;
            s = pbacktrack_pb(pars->seq);

            for (ii = 1; ii <= length; ii++) {
                if (s[ii-1]=='.') {
                    unpaired_count[ii]++;
                    for (jj = 1; jj <= length; jj++) {
                        if (s[jj-1]=='.') {
                            unpaired_count_cond[ii][jj]++;
                        }
                    }
                }
            }
            free(s);
        }

        for (i = 1; i <= length; i++) {
            for (ii = 1; ii <= length; ii++) {
                if (unpaired_count_cond[i][ii] > 0) {
                    p_unpaired_cond_sampled[i][ii] = (double)unpaired_count_cond[i][ii]/(double)unpaired_count[i];
                    p_unpaired_cond[i][ii] = (double)unpaired_count_cond[i][ii]/(double)unpaired_count[i];
                } else {
                    p_unpaired_cond_sampled[i][ii]= 0.0;
                    p_unpaired_cond[i][ii]= 0.0;
                }
            }
        }
    }

    for (mu=1; mu <= length; mu++) {
        sum = 0.0;
        for (i=1; i <= length; i++) {

            // Comments on handling missing data see the corresponding code in calcuate_f
            if (q_unpaired[i] < -0.5) {
                sigma_tmp = 10000;
                q_tmp = 0.5;
            } else {
                sigma_tmp = pars->sigma;
                q_tmp = q_unpaired[i];
            }
            sum += (1 / sigma_tmp) * p_unpaired[i] *
                   ( p_unpaired[i] - q_tmp ) *
                   ( p_unpaired[mu] - p_unpaired_cond[i][mu] );
        }
        gsl_vector_set(df, mu, (2 * epsilon[mu] /pars->tau ) + (2 /  pars->kT * sum));
    }
}
Ejemplo n.º 20
0
int main(int argc, char *argv[])
{
  char *string, *line;
  char *structure=NULL, *cstruc=NULL;
  char  fname[13], ffname[20], gfname[20];
  char  *ParamFile=NULL;
  char  *ns_bases=NULL, *c;
  int   i, length, l, sym, r;
  double energy, min_en;
  double kT, sfact=1.07;
  int   pf=0, noPS=0, istty;
  int noconv=0;
  int circ=0;

  do_backtrack = 1;
  string=NULL;
  for (i=1; i<argc; i++) {
    if (argv[i][0]=='-')
      switch ( argv[i][1] )
	{
	case 'T':  if (argv[i][2]!='\0') usage();
	  if(i==argc-1) usage();
	  r=sscanf(argv[++i], "%lf", &temperature);
	  if (!r) usage();
	  break;
	case 'p':  pf=1;
	  if (argv[i][2]!='\0')
	    (void) sscanf(argv[i]+2, "%d", &do_backtrack);
	  break;
	case 'n':
	  if ( strcmp(argv[i], "-noGU")==0) noGU=1;
	  if ( strcmp(argv[i], "-noCloseGU")==0) no_closingGU=1;
	  if ( strcmp(argv[i], "-noLP")==0) noLonelyPairs=1;
	  if ( strcmp(argv[i], "-noPS")==0) noPS=1;
	  if ( strcmp(argv[i], "-nsp") ==0) {
	    if (i==argc-1) usage();
	    ns_bases = argv[++i];
	  }
	  if ( strcmp(argv[i], "-noconv")==0) noconv=1;
	  break;
	case '4':
	  tetra_loop=0;
	  break;
	case 'e':
	  if(i==argc-1) usage();
	  r=sscanf(argv[++i],"%d", &energy_set);
	  if (!r) usage();
	  break;
	case 'C':
	  fold_constrained=1;
	  break;
	case 'c':
	  if ( strcmp(argv[i], "-circ")==0) circ=1;
	  break;
	case 'S':
	  if(i==argc-1) usage();
	  r=sscanf(argv[++i],"%lf", &sfact);
	  if (!r) usage();
	  break;
	case 'd': dangles=0;
	  if (argv[i][2]!='\0') {
	    r=sscanf(argv[i]+2, "%d", &dangles);
	    if (r!=1) usage();
	  }
	  break;
	case 'P':
	  if (i==argc-1) usage();
	  ParamFile = argv[++i];
	  break;
	default: usage();
	}
  }

  if (circ && noLonelyPairs)
    fprintf(stderr, "warning, depending on the origin of the circular sequence, some structures may be missed when using -noLP\nTry rotating your sequence a few times\n");
  if (ParamFile != NULL)
    read_parameter_file(ParamFile);

  if (ns_bases != NULL) {
    nonstandards = space(33);
    c=ns_bases;
    i=sym=0;
    if (*c=='-') {
      sym=1; c++;
    }
    while (*c!='\0') {
      if (*c!=',') {
	nonstandards[i++]=*c++;
	nonstandards[i++]=*c;
	if ((sym)&&(*c!=*(c-1))) {
	  nonstandards[i++]=*c;
	  nonstandards[i++]=*(c-1);
	}
      }
      c++;
    }
  }
  istty = isatty(fileno(stdout))&&isatty(fileno(stdin));
  if ((fold_constrained)&&(istty)) {
    printf("Input constraints using the following notation:\n");
    printf("| : paired with another base\n");
    printf(". : no constraint at all\n");
    printf("x : base must not pair\n");
    printf("< : base i is paired with a base j<i\n");
    printf("> : base i is paired with a base j>i\n");
    printf("matching brackets ( ): base i pairs base j\n");
  }

  do {				/* main loop: continue until end of file */
    if (istty) {
      printf("\nInput string (upper or lower case); @ to quit\n");
      printf("%s%s\n", scale1, scale2);
    }
    fname[0]='\0';
    if ((line = get_line(stdin))==NULL) break;

    /* skip comment lines and get filenames */
    while ((*line=='*')||(*line=='\0')||(*line=='>')) {
      if (*line=='>')
	(void) sscanf(line, ">%12s", fname);
      printf("%s\n", line);
      free(line);
      if ((line = get_line(stdin))==NULL) break;
    }

    if ((line ==NULL) || (strcmp(line, "@") == 0)) break;

    string = (char *) space(strlen(line)+1);
    (void) sscanf(line,"%s",string);
    free(line);
    length = (int) strlen(string);

    structure = (char *) space((unsigned) length+1);
    if (fold_constrained) {
      cstruc = get_line(stdin);
      if (cstruc!=NULL)
	strncpy(structure, cstruc, length);
      else
	fprintf(stderr, "constraints missing\n");
    }
    for (l = 0; l < length; l++) {
      string[l] = toupper(string[l]);
      if (!noconv && string[l] == 'T') string[l] = 'U';
    }
    if (istty)
      printf("length = %d\n", length);

    /* initialize_fold(length); */
    if (circ)
      min_en = circfold(string, structure);
    else
      min_en = fold(string, structure);
    printf("%s\n%s", string, structure);
    if (istty)
      printf("\n minimum free energy = %6.2f kcal/mol\n", min_en);
    else
      printf(" (%6.2f)\n", min_en);

    (void) fflush(stdout);

    if (fname[0]!='\0') {
      strcpy(ffname, fname);
      strcat(ffname, "_ss.ps");
      strcpy(gfname, fname);
      strcat(gfname, "_ss.g");
    } else {
      strcpy(ffname, "rna.ps");
      strcpy(gfname, "rna.g");
    }
    if (!noPS) {
      if (length<2000)
	(void) PS_rna_plot(string, structure, ffname);
      else 
	fprintf(stderr,"INFO: structure too long, not doing xy_plot\n");
    }
    if (length>2000) free_arrays(); 
    if (pf) {
      char *pf_struc;
      pf_struc = (char *) space((unsigned) length+1);
	if (dangles==1) {
	  dangles=2;   /* recompute with dangles as in pf_fold() */
	  min_en = (circ) ? energy_of_circ_struct(string, structure) : energy_of_struct(string, structure);
	  dangles=1;
      }

      kT = (temperature+273.15)*1.98717/1000.; /* in Kcal */
      pf_scale = exp(-(sfact*min_en)/kT/length);
      if (length>2000) fprintf(stderr, "scaling factor %f\n", pf_scale);

      (circ) ? init_pf_circ_fold(length) : init_pf_fold(length);

      if (cstruc!=NULL)
	strncpy(pf_struc, cstruc, length+1);
      energy = (circ) ? pf_circ_fold(string, pf_struc) : pf_fold(string, pf_struc);

      if (do_backtrack) {
	printf("%s", pf_struc);
	if (!istty) printf(" [%6.2f]\n", energy);
	else printf("\n");
      }
      if ((istty)||(!do_backtrack))
	printf(" free energy of ensemble = %6.2f kcal/mol\n", energy);
      if (do_backtrack) {
	plist *pl1,*pl2;
	char *cent;
	double dist, cent_en;
	cent = centroid(length, &dist);
	cent_en = (circ) ? energy_of_circ_struct(string, cent) :energy_of_struct(string, cent);
	printf("%s {%6.2f d=%.2f}\n", cent, cent_en, dist);
	free(cent);
	if (fname[0]!='\0') {
	  strcpy(ffname, fname);
	  strcat(ffname, "_dp.ps");
	} else strcpy(ffname, "dot.ps");
	pl1 = make_plist(length, 1e-5);
	pl2 = b2plist(structure);
	(void) PS_dot_plot_list(string, ffname, pl1, pl2, "");
	free(pl2);
	if (do_backtrack==2) {
	  pl2 = stackProb(1e-5);
	  if (fname[0]!='\0') {
	    strcpy(ffname, fname);
	    strcat(ffname, "_dp2.ps");
	  } else strcpy(ffname, "dot2.ps");
	  PS_dot_plot_list(string, ffname, pl1, pl2,
			   "Probabilities for stacked pairs (i,j)(i+1,j-1)");
	  free(pl2);
	}
	free(pl1);
	free(pf_struc);
      }
      printf(" frequency of mfe structure in ensemble %g; ",
	     exp((energy-min_en)/kT));
      if (do_backtrack)
	printf("ensemble diversity %-6.2f", mean_bp_dist(length));

      printf("\n");
      free_pf_arrays();

    }
    if (cstruc!=NULL) free(cstruc);
    (void) fflush(stdout);
    free(string);
    free(structure);
  } while (1);
  return 0;
}
Ejemplo n.º 21
0
int main(int argc, char *argv[]){
  struct        RNA2Dfold_args_info args_info;
  unsigned int  input_type;
  char *string, *input_string, *orig_sequence;
  char *mfe_structure=NULL, *structure1=NULL, *structure2=NULL, *reference_struc1=NULL, *reference_struc2=NULL;
  char  *ParamFile=NULL;
  int   i, j, length, l;
  double min_en;
  double kT, sfact=1.07;
  int   pf=0,istty;
  int noconv=0;
  int circ=0;
  int maxDistance1 = -1;
  int maxDistance2 = -1;
  int do_backtrack = 1;
  int stBT = 0;
  int nstBT = 0;
  string=NULL;
  dangles = 2;
  struct nbhoods *neighborhoods = NULL;
  struct nbhoods *neighborhoods_cur = NULL;

  string = input_string = orig_sequence = NULL;
  /*
  #############################################
  # check the command line prameters
  #############################################
  */
  if(RNA2Dfold_cmdline_parser (argc, argv, &args_info) != 0) exit(1);

  /* temperature */
  if(args_info.temp_given)
    temperature = args_info.temp_arg;

  /* max distance to 1st reference structure */
  if(args_info.maxDist1_given)
    maxDistance1 = args_info.maxDist1_arg;

  /* max distance to 2nd reference structure */
  if(args_info.maxDist2_given)
    maxDistance2 = args_info.maxDist2_arg;

  /* compute partition function and boltzmann probabilities */
  if(args_info.partfunc_given)
    pf = 1;

  /* do stachastic backtracking */
  if(args_info.stochBT_given){
    pf = 1;
    stBT = 1;
    nstBT = args_info.stochBT_arg;
  }

  if(args_info.noTetra_given)
    tetra_loop=0;

  /* assume RNA sequence to be circular */
  if(args_info.circ_given)
    circ=1;

  /* dangle options */
  if(args_info.dangles_given)
    dangles=args_info.dangles_arg;

  /* set number of threads for parallel computation */
  if(args_info.numThreads_given)
#ifdef _OPENMP
  omp_set_num_threads(args_info.numThreads_arg);
#else
  nrerror("\'j\' option is available only if compiled with OpenMP support!");
#endif

  /* get energy parameter file name */
  if(args_info.parameterFile_given)
    ParamFile = strdup(args_info.parameterFile_arg);

  /* do not allow GU pairs ? */
  if(args_info.noGU_given)
    noGU = 1;

  /* do not allow GU pairs at the end of helices? */
  if(args_info.noClosingGU_given)
    no_closingGU = 1;

  /* pf scaling factor */
  if(args_info.pfScale_given)
    sfact = args_info.pfScale_arg;

  /* do not backtrack structures ? */
  if(args_info.noBT_given)
    do_backtrack = 0;

  for (i = 0; i < args_info.neighborhood_given; i++){
    int kappa, lambda;
    kappa = lambda = 0;
    if(sscanf(args_info.neighborhood_arg[i], "%d:%d", &kappa, &lambda) == 2);
    if ((kappa>-2) && (lambda>-2)){
      if(neighborhoods_cur != NULL){
        neighborhoods_cur->next = (nbhoods *)space(sizeof(nbhoods));
        neighborhoods_cur = neighborhoods_cur->next;
      }
      else{
        neighborhoods = (nbhoods *)space(sizeof(nbhoods));
        neighborhoods_cur = neighborhoods;
      }
      neighborhoods_cur->k = kappa;
      neighborhoods_cur->l = lambda;
      neighborhoods_cur->next = NULL;
    }
  }
  /* free allocated memory of command line data structure */
  RNA2Dfold_cmdline_parser_free (&args_info);

  /*
  #############################################
  # begin actual program code
  #############################################
  */
  if (ParamFile != NULL)
    read_parameter_file(ParamFile);

  istty = isatty(fileno(stdout))&&isatty(fileno(stdin));

 /*
  #############################################
  # main loop, continue until end of file
  #############################################
  */
  do {
    if (istty)
      print_tty_input_seq_str("Input strings\n1st line: sequence (upper or lower case)\n2nd + 3rd line: reference structures (dot bracket notation)\n@ to quit\n");

    while((input_type = get_input_line(&input_string, 0)) & VRNA_INPUT_FASTA_HEADER){
      printf(">%s\n", input_string); /* print fasta header if available */
      free(input_string);
    }

    /* break on any error, EOF or quit request */
    if(input_type & (VRNA_INPUT_QUIT | VRNA_INPUT_ERROR)){ break;}
    /* else assume a proper sequence of letters of a certain alphabet (RNA, DNA, etc.) */
    else{
      length = (int)    strlen(input_string);
      string = strdup(input_string);
      free(input_string);
    }

    mfe_structure = (char *) space((unsigned) length+1);
    structure1 = (char *) space((unsigned) length+1);
    structure2 = (char *) space((unsigned) length+1);

    input_type = get_input_line(&input_string, VRNA_INPUT_NOSKIP_COMMENTS);
    if(input_type & VRNA_INPUT_QUIT){ break;}
    else if((input_type & VRNA_INPUT_MISC) && (strlen(input_string) > 0)){
      reference_struc1 = strdup(input_string);
      free(input_string);
      if(strlen(reference_struc1) != length)
        nrerror("sequence and 1st reference structure have unequal length");
    }
    else nrerror("1st reference structure missing\n");
    strncpy(structure1, reference_struc1, length);

    input_type = get_input_line(&input_string, VRNA_INPUT_NOSKIP_COMMENTS);
    if(input_type & VRNA_INPUT_QUIT){ break;}
    else if((input_type & VRNA_INPUT_MISC) && (strlen(input_string) > 0)){
      reference_struc2 = strdup(input_string);
      free(input_string);
      if(strlen(reference_struc2) != length)
        nrerror("sequence and 2nd reference structure have unequal length");
    }
    else nrerror("2nd reference structure missing\n");
    strncpy(structure2, reference_struc2, length);

    /* convert DNA alphabet to RNA if not explicitely switched off */
    if(!noconv) str_DNA2RNA(string);
    /* store case-unmodified sequence */
    orig_sequence = strdup(string);
    /* convert sequence to uppercase letters only */
    str_uppercase(string);

    if (istty)  printf("length = %d\n", length);

    min_en = (circ) ? circfold(string, mfe_structure) : fold(string, mfe_structure);

    printf("%s\n%s", orig_sequence, mfe_structure);

    if (istty)
      printf("\n minimum free energy = %6.2f kcal/mol\n", min_en);
    else
      printf(" (%6.2f)\n", min_en);

    printf("%s (%6.2f) <ref 1>\n", structure1, (circ) ? energy_of_circ_structure(string, structure1, 0) : energy_of_structure(string,structure1, 0));
    printf("%s (%6.2f) <ref 2>\n", structure2, (circ) ? energy_of_circ_structure(string, structure2, 0) : energy_of_structure(string,structure2, 0));

    /* get all variables need for the folding process (some memory will be preallocated here too) */
    TwoDfold_vars *mfe_vars = get_TwoDfold_variables(string, structure1, structure2, circ);
    mfe_vars->do_backtrack = do_backtrack;
    TwoDfold_solution *mfe_s = TwoDfoldList(mfe_vars, maxDistance1, maxDistance2);

    if(!pf){
#ifdef COUNT_STATES
      printf("k\tl\tn\tMFE\tMFE-structure\n");
      for(i = 0; mfe_s[i].k != INF; i++){
        printf("%d\t%d\t%lu\t%6.2f\t%s\n", mfe_s[i].k, mfe_s[i].l, mfe_vars->N_F5[length][mfe_s[i].k][mfe_s[i].l/2], mfe_s[i].en, mfe_s[i].s);
        if(mfe_s[i].s) free(mfe_s[i].s);
      }
      free(mfe_s);
#else
      printf("k\tl\tMFE\tMFE-structure\n");
      for(i = 0; mfe_s[i].k != INF; i++){
        printf("%d\t%d\t%6.2f\t%s\n", mfe_s[i].k, mfe_s[i].l, mfe_s[i].en, mfe_s[i].s);
        if(mfe_s[i].s) free(mfe_s[i].s);
      }
      free(mfe_s);
#endif
    }

    if(pf){
      int maxD1 = (int) mfe_vars->maxD1;
      int maxD2 = (int) mfe_vars->maxD2;
      float mmfe = INF;
      double Q;
      for(i = 0; mfe_s[i].k != INF; i++){
        if(mmfe > mfe_s[i].en) mmfe = mfe_s[i].en;
      }
      kT = (temperature+K0)*GASCONST/1000.0; /* in Kcal */
      pf_scale = exp(-(sfact*mmfe)/kT/length);
      if (length>2000)
        fprintf(stdout, "scaling factor %f\n", pf_scale);

      /* get all variables need for the folding process (some memory will be preallocated there too) */
      //TwoDpfold_vars *q_vars = get_TwoDpfold_variables_from_MFE(mfe_vars);
      /* we dont need the mfe vars and arrays anymore, so we can savely free their occupying memory */
      destroy_TwoDfold_variables(mfe_vars);
      TwoDpfold_vars *q_vars = get_TwoDpfold_variables(string, structure1, structure2, circ);

      TwoDpfold_solution *pf_s = TwoDpfoldList(q_vars, maxD1, maxD2);

      Q = 0.;
      
      for(i = 0; pf_s[i].k != INF; i++){
        Q += pf_s[i].q;
      }

      double fee = (-log(Q)-length*log(pf_scale))*kT;

      if(!stBT){
        printf("free energy of ensemble = %6.2f kcal/mol\n",fee);
        printf("k\tl\tP(neighborhood)\tP(MFE in neighborhood)\tP(MFE in ensemble)\tMFE\tE_gibbs\tMFE-structure\n");
        for(i=0; pf_s[i].k != INF;i++){
          float free_energy = (-log((float)pf_s[i].q)-length*log(pf_scale))*kT;
          if((pf_s[i].k != mfe_s[i].k) || (pf_s[i].l != mfe_s[i].l))
            nrerror("This should never happen!");
          fprintf(stdout,
                  "%d\t%d\t%2.8f\t%2.8f\t%2.8f\t%6.2f\t%6.2f\t%s\n",
                  pf_s[i].k,
                  pf_s[i].l,
                  (float)(pf_s[i].q)/(float)Q,
                  exp((free_energy-mfe_s[i].en)/kT),
                  exp((fee-mfe_s[i].en)/kT),
                  mfe_s[i].en,
                  free_energy,
                  mfe_s[i].s);
        }
      }
      else{
        init_rand();
        if(neighborhoods != NULL){
          nbhoods *tmp, *tmp2;
          for(tmp = neighborhoods; tmp != NULL; tmp = tmp->next){
            int k,l;
            k = tmp->k;
            l = tmp->l;
            for(i = 0; i < nstBT; i++){
              char *s = TwoDpfold_pbacktrack(q_vars, k, l);
              printf("%d\t%d\t%s\t%6.2f\n", k, l, s, q_vars->circ ? energy_of_circ_structure(q_vars->sequence, s, 0) : energy_of_structure(q_vars->sequence, s, 0));
            }
          }
        }
        else{
          for(i=0; pf_s[i].k != INF;i++){
            for(l = 0; l < nstBT; l++){
              char *s = TwoDpfold_pbacktrack(q_vars, pf_s[i].k, pf_s[i].l);
              printf("%d\t%d\t%s\t%6.2f\n", pf_s[i].k, pf_s[i].l, s, q_vars->circ ? energy_of_circ_structure(q_vars->sequence, s, 0) : energy_of_structure(q_vars->sequence, s, 0));
            }
          }
        }
      }
      free_pf_arrays();

      for(i=0; mfe_s[i].k != INF;i++){
        if(mfe_s[i].s) free(mfe_s[i].s);
      }
      free(pf_s);
      free(mfe_s);
      /* destroy the q_vars */
      destroy_TwoDpfold_variables(q_vars);
    }
    else
      destroy_TwoDfold_variables(mfe_vars);

    free_arrays();
    free(string);
    free(orig_sequence);
    free(mfe_structure);
    free(structure1);
    free(structure2);
    free(reference_struc1);
    free(reference_struc2);
    string = orig_sequence = mfe_structure = NULL;
  } while (1);
  return 0;
}
Ejemplo n.º 22
0
int main(int argc, char *argv[])

{
  float     *T[MAXSEQ];
  int        i,j, istty, n=0;
  int        type, length, taxa_list=0;
  float      dist;
  FILE      *somewhere=NULL;
  char      *structure;
  char      *line=NULL, fname[FILENAME_MAX_LENGTH], *list_title=NULL;
  plist     *pr_pl, *mfe_pl;

  pr_pl = mfe_pl = NULL;

  command_line(argc, argv);

  if((outfile[0]=='\0')&&(task=='m')&&(edit_backtrack))
    strcpy(outfile,"backtrack.file");
  if (outfile[0]!='\0') somewhere = fopen(outfile,"w");
  if (somewhere==NULL) somewhere = stdout;
  istty   = (isatty(fileno(stdout))&&isatty(fileno(stdin)));

  while (1) {
    if ((istty)&&(n==0)) {
      printf("\nInput sequence;  @ to quit\n");
      printf("%s\n", ruler);
    }

    type = 0;
    do {  /* get sequence to fold */
      if (line!=NULL) free(line);
      *fname='\0';
      if ((line=get_line(stdin))==NULL) {type = 999; break;}
      if (line[0]=='@') type = 999;
      if (line[0]=='*') {
        if (taxa_list==0) {
          if (task=='m') taxa_list=1;
          printf("%s\n", line);
          type = 0;
        } else {
          list_title = strdup(line);
          type = 888;
        }
      }
      if (line[0]=='>') {
        if (sscanf(line,">%" XSTR(FILENAME_ID_LENGTH) "s", fname)!=0)
          strcat(fname, "_dp.ps");
        if (taxa_list)
          printf("%d : %s\n", n+1, line+1);
        else printf("%s\n",line);
        type = 0;
      }
      if (isalpha(line[0]))  {
        char *cp;
        cp =strchr(line,' ');
        if (cp) *cp='\0';
        type = 1;
      }
    } while(type==0);

    if( (task == 'm')&&(type>800) ) {
      if (taxa_list)
        printf("* END of taxa list\n");
      printf("> p %d (pdist)\n",n);
      for (i=1; i<n; i++) {
        for (j=0; j<i; j++) {
          printf("%g ",profile_edit_distance(T[i], T[j]));
          if(edit_backtrack) fprintf(somewhere,"> %d %d\n",i+1,j+1);
          print_aligned_lines(somewhere);
        }
        printf("\n");
      }
      if (type==888) {  /* do another distance matrix */
        n = 0;
        printf("%s\n", list_title);
        free(list_title);
      }
    }

    if(type>800) {
      for (i=0; i<n; i++)
        free_profile(T[i]);
      if (type == 888) continue;
      if (outfile[0]!='\0') (void) fclose(somewhere);
      if (line!= NULL) free(line);
      return 0; /* finito */
    }

    length = (int) strlen(line);
    for (i=0; i<length; i++) {
      line[i]=toupper(line[i]);
      if (!noconv && line[i] == 'T') line[i] = 'U';
    }

    /* init_pf_fold(length); <- obsolete */
    structure = (char *) space((length+1)*sizeof(char));
    (void) pf_fold(line,structure);

    if (*fname=='\0')
      sprintf(fname, "%d_dp.ps", n+1);

    /* PS_dot_plot(line, fname); <- NOT THREADSAFE and obsolete function! */

    /* get pairlist of probability matrix */
    assign_plist_from_pr(&pr_pl, pr, length, 1e-5);
    /* no previous mfe call thus no mfe structure information known */
    mfe_pl = (plist *)space(sizeof(plist));
    mfe_pl[0].i = mfe_pl[0].j = 0;

    /* call threadsafe dot plot printing function */
    PS_dot_plot_list(line, fname, pr_pl, mfe_pl, "");

    T[n] = Make_bp_profile_bppm(pr, length);
    if((istty)&&(task=='m')) printf("%s\n",structure);
    free(structure);
    free(mfe_pl);
    free(pr_pl);
    free_pf_arrays();

    n++;
    switch (task) {
    case 'p' :
      if (n==2) {
        dist = profile_edit_distance(T[0],T[1]);
        printf("%g\n",dist);
        print_aligned_lines(somewhere);
        free_profile(T[0]);
        free_profile(T[1]);
        n=0;
      }
      break;
    case 'f' :
      if (n>1) {
        dist = profile_edit_distance(T[1], T[0]);
        printf("%g\n",dist);
        print_aligned_lines(somewhere);
        free_profile(T[1]);
        n=1;
      }
      break;
    case 'c' :
      if (n>1) {
        dist = profile_edit_distance(T[1], T[0]);
        printf("%g\n",dist);
        print_aligned_lines(somewhere);
        free_profile(T[0]);
        T[0] = T[1];
        n=1;
      }
      break;

    case 'm' :
      break;

    default :
      nrerror("This can't happen.");
    }    /* END switch task */
    (void) fflush(stdout);
  }    /* END while */
  if (line !=NULL) free(line);
  return 0;
}