Ejemplo n.º 1
0
RNAProfileAlignment::RNAProfileAlignment(const string &baseStr, const string &name, const string &constraint, double t)
  : PPForestAli<RNA_Alphabet_Profile,RNA_Alphabet_Profile>(2*baseStr.length()),
    m_name(name),
    m_numStructures(1)
{
  char *viennaStr=NULL;
  
  // calculate partition function for the sequence
  do_backtrack=1;
  init_pf_fold(baseStr.length());

  //if(constraint.length()>0)
  //pf_fold((char*)baseStr.c_str(),(char*)constraint.c_str());    // expicit conversion to non-const value, but pf_fold does not alter baseStr
  //else
  pf_fold((char*)baseStr.c_str(),NULL);    // expicit conversion to non-const value, but pf_fold does not alter baseStr

  viennaStr=new char[baseStr.length()+1];
  dangles=2;
  fold((char*)baseStr.c_str(),viennaStr);

  setSize(RNAFuncs::treeSize(viennaStr));
  buildForest(baseStr,viennaStr,true);
  
  free_pf_arrays();
  delete[] viennaStr;
  
  //  hasSequence=true;
  addStrName(name);
}
Ejemplo n.º 2
0
char* seq_pf_fold(const char* sequence, float* gfe)
{
    char* structure = (char*)space(sizeof(char) * (strlen(sequence) + 1));
    *gfe = pf_fold(sequence, structure);
    free_pf_arrays();
    return structure;
}
Ejemplo n.º 3
0
PRIVATE void heat_capacity(char *string, float T_min, float T_max,
                          float h, int m)
{
   int length, i;
   char *structure;
   float hc, kT, min_en;
   
   length = (int) strlen(string);
   
   do_backtrack = 0;   

   temperature = T_min -m*h;
   initialize_fold(length);
   structure = (char *) space((unsigned) length+1);
   min_en = fold(string, structure);
   free(structure); free_arrays();
   kT = (temperature+K0)*GASCONST/1000;    /* in kcal */
   pf_scale = exp(-(1.07*min_en)/kT/length );
   init_pf_fold(length);
   
   for (i=0; i<2*m+1; i++) {
      F[i] = pf_fold(string, NULL);   /* T_min -2h */
      temperature += h;
      kT = (temperature+K0)*GASCONST/1000;
      pf_scale=exp(-(F[i]/length +h*0.00727)/kT); /* try to extrapolate F */
      update_pf_params(length); 
   }
   while (temperature <= (T_max+m*h+h)) {
      
      hc = - ddiff(F,h,m)* (temperature +K0 - m*h -h); 
      printf("%g   %g\n", (temperature-m*h-h), hc);  
      
      for (i=0; i<2*m; i++)
         F[i] = F[i+1];
      F[2*m] = pf_fold(string, NULL); 
      temperature += h;
      kT = (temperature+K0)*GASCONST/1000;
      pf_scale=exp(-(F[i]/length +h*0.00727)/kT);
      update_pf_params(length); 
   }
   free_pf_arrays();
}
Ejemplo n.º 4
0
PRIVATE double pf_cost(const char *string, char *structure, const char *target)
{
#if PF
   double  f, e;

   f = pf_fold(string, structure);
   e = energy_of_struct(string, target);
   return (double) (e-f-final_cost);
#else
   nrerror("this version not linked with pf_fold");
   return 0;
#endif
}
Ejemplo n.º 5
0
int main(int argc, char *argv[])
{
   char  *line;
   char *sequence;
   char *structure = NULL;
   char  fname[21];
   char  *ParamFile = NULL;
   char  *ns_bases = NULL, *c;
   int   i, length, l, sym, r;
   int   istty;
   double deltaf, deltap=0;
   int delta=100;
   int n_back = 0;
   int noconv = 0;
   int circ=0;
   int dos=0;
   int zuker=0;
   do_backtrack = 1;
   dangles = 2;
   for (i=1; i<argc; i++) {
      if (argv[i][0]=='-')
	switch ( argv[i][1] )
	  {
	  case 'T':  if (argv[i][2]!='\0') usage();
	    if(i==argc-1) usage();
	    r=sscanf(argv[++i], "%lf", &temperature);
	    if (r!=1) usage();
	    break;
	  case 'p':
	    if (argv[i][2]!='\0') usage();
	    if(i==argc-1) usage();
	    (void) sscanf(argv[++i], "%d", &n_back);
	    init_rand();
	    break;
	  case 'n':
	    if ( strcmp(argv[i], "-noGU" )==0) noGU=1;
	    if ( strcmp(argv[i], "-noCloseGU" ) ==0) no_closingGU=1;
	    if ( strcmp(argv[i], "-noLP")==0) noLonelyPairs=1;
	    if ( strcmp(argv[i], "-nsp") ==0) {
	      if (i==argc-1) usage();
	      ns_bases = argv[++i];
	    }
	    if ( strcmp(argv[i], "-noconv")==0) noconv=1;
	    break;
	  case '4':
	    tetra_loop=0;
	    break;
	  case 'C':
	    fold_constrained=1;
	    break;
	  case 'D':
	    dos=1;
	    print_energy = -999999;
	    break;
	  case 'd': dangles=0;
	    if (argv[i][2]!='\0') {
	      r=sscanf(argv[i]+2, "%d", &dangles);
	      if (r!=1) usage();
	    }
	    break;
	  case 'P':
	    if (i==argc-1) usage();
	    ParamFile = argv[++i];
	    break;
	  case 's':
	    subopt_sorted=1;
	    break;
	  case 'l':
	    if (strcmp(argv[i],"-logML")==0) {
	      logML=1;
	      break;
	    }
	    else usage();
	    break;
	  case 'e':
	    if (i>=argc-1) usage();
	    if (strcmp(argv[i],"-ep")==0)
	      r=sscanf(argv[++i], "%lf", &deltap);
	    else {
	      r=sscanf(argv[++i], "%lf", &deltaf);
	      delta = (int) (0.1+deltaf*100);
	    }
	    if (r!=1) usage();
	    break;
	  case 'c':
	    if ( strcmp(argv[i], "-circ")==0) circ=1;
	    break;
	  case 'z':
	    zuker=1;
	    break;
	  default: usage();
	  }
   }

   if ((zuker)&&(circ)) {
     printf("Sorry, zuker subopts not yet implemented for circfold\n");
     usage();
   }
   if ((zuker)&&(n_back>0)) {
     printf("Cna't do zuker subopts and stochastic subopts at the same time\n");
     usage();
   }
   if (ParamFile != NULL)
     read_parameter_file(ParamFile);

   if (ns_bases != NULL) {
      nonstandards = space(33);
      c=ns_bases;
      i=sym=0;
      if (*c=='-') {
	 sym=1; c++;
      }
      while (*c) {
	if (*c!=',') {
	  nonstandards[i++]=*c++;
	  nonstandards[i++]=*c;
	  if ((sym)&&(*c!=*(c-1))) {
	    nonstandards[i++]=*c;
	    nonstandards[i++]=*(c-1);
	  }
	}
	c++;
      }
   }
   istty = isatty(fileno(stdout))&&isatty(fileno(stdin));
   if ((fold_constrained)&&(istty)) {
     printf("Input constraints using the following notation:\n");
     /* printf("| : paired with another base\n"); */
     printf(". : no constraint at all\n");
     printf("x : base must not pair\n");
   }

   do {				/* main loop: continue until end of file */
     cut_point = -1;
     if (istty) {
       printf("\nInput string (upper or lower case); @ to quit\n");
       if (!zuker)printf("Use '&' to connect 2 sequences that shall form a complex.\n");
       printf("%s\n", scale);
     }
     fname[0]='\0';
     if ((line = get_line(stdin))==NULL) break;

     /* skip comment lines and get filenames */
     while ((*line=='*')||(*line=='\0')||(*line=='>')) {
       if (*line=='>')
	 (void) sscanf(line, ">%20s", fname);
       free(line);
       if ((line = get_line(stdin))==NULL) break;;
     }

     if ((line==NULL)||strcmp(line,"@")==0) break;

     sequence = tokenize(line); /* frees line */
     length = (int) strlen(sequence);
     structure = (char *) space((unsigned) length+1);

     if (fold_constrained) {
       char *cstruc;
       cstruc = tokenize(get_line(stdin));
       if (cstruc!=NULL) {
	 strncpy(structure, cstruc, length);
	 for (i=0; i<length; i++)
	   if (structure[i]=='|')
	     nrerror("constraints of type '|' not allowed");
	 free(cstruc);
       }
     }

     for (l = 0; l < length; l++) {
       sequence[l] = toupper(sequence[l]);
       if (!noconv && sequence[l] == 'T') sequence[l] = 'U';
     }
     if (istty) {
       if (cut_point == -1)
	 printf("length = %d\n", length);
       else
	 printf("length1 = %d\nlength2 = %d\n",
		cut_point-1, length-cut_point+1);
     }


     if ((logML!=0 || dangles==1 || dangles==3) && dos==0)
       if (deltap<=0) deltap=delta/100. +0.001;
     if (deltap>0)
       print_energy = deltap;

     /* first lines of output (suitable  for sort +1n) */
     if (fname[0] != '\0')
       printf("> %s [%d]\n", fname, delta);

     if (n_back>0) {  /* stochastic backtrack */
       double mfe, kT;
       char *ss;
       st_back=1;
       ss = (char *) space(strlen(sequence)+1);
       strncpy(ss, structure, length);
       mfe = fold(sequence, ss);
       kT = (temperature+273.15)*1.98717/1000.; /* in Kcal */
       pf_scale = exp(-(1.03*mfe)/kT/length);
       strncpy(ss, structure, length);
       /* ignore return value, we are not interested in the free energy */
       (circ) ? (void) pf_circ_fold(sequence, ss) : (void) pf_fold(sequence, ss);
       free(ss);
       for (i=0; i<n_back; i++) {
	 char *s;
	 s =(circ) ? pbacktrack_circ(sequence) : pbacktrack(sequence);
	 printf("%s\n", s);
	 free(s);
       }
       free_pf_arrays();
     } else if (!zuker) { /* normal subopt */
       (circ) ? subopt_circ(sequence, structure, delta, stdout) : subopt(sequence, structure, delta, stdout);
       if (dos) {
	 int i;
	 for (i=0; i<= MAXDOS && i<=delta/10; i++) {
	   printf("%4d %6d\n", i, density_of_states[i]);
	 }
       }
     } else { /* Zuker suboptimals */
       SOLUTION *zr;
       int i;
       if (cut_point!=-1) {
	 printf("Sorry, zuker subopts not yet implemented for cofold\n");
	 usage();
       }
       zr = zukersubopt(sequence);
       putoutzuker(zr);
       (void)fflush(stdout);
       for (i=0; zr[i].structure; i++) {
	 free(zr[i].structure);
       }
       free(zr);
     }
     (void)fflush(stdout);
     free(sequence);
     free(structure);
   } while (1);
   return 0;
}
Ejemplo n.º 6
0
int main(int argc, char *argv[])
{
    char *string/*, *line*/;
    char *structure=NULL, *cstruc=NULL;
    /*char  fname[13], ffname[20], gfname[20];*/
    /*char  *ParamFile=NULL;*/
    char  *ns_bases=NULL, *c;
    int   i, length, l, sym/*, r*/;
    double energy, min_en;
    double kT, sfact=1.07;
    int   pf=0, noPS=0, istty;
    int noconv=0;
    int circ=0;

    AjPSeq  seq     = NULL;
    AjPFile confile = NULL;
    AjPFile paramfile = NULL;
    AjPFile outf = NULL;
    AjPFile essfile = NULL;
    AjPFile dotfilea = NULL;
    AjPFile dotfileb = NULL;
    

    AjPStr seqstring = NULL;
    AjPStr constring = NULL;
    AjPStr seqname   = NULL;
  
    float eT = 0.;
    AjBool eGU;
    AjBool ecirc = ajFalse;
  
    AjBool eclose;
    AjBool lonely;
    AjBool convert;
    AjPStr ensbases = NULL;
    AjBool etloop;
    AjPStr eenergy = NULL;
    char ewt = '\0';
    float escale = 0.;
    AjPStr edangles = NULL;
    char edangle = '\0';

    ajint len;



    embInitPV("vrnafold",argc,argv,"VIENNA",VERSION);
    
    
    seqstring = ajStrNew();
    constring = ajStrNew();
    seqname   = ajStrNew();
    
    
    seq       = ajAcdGetSeq("sequence");
    confile   = ajAcdGetInfile("constraintfile");
    paramfile = ajAcdGetInfile("paramfile");
    eT        = ajAcdGetFloat("temperature");
    ecirc     = ajAcdGetBoolean("circular");
    eGU       = ajAcdGetBoolean("gu");
    eclose    = ajAcdGetBoolean("closegu");
    lonely    = ajAcdGetBoolean("lp");
    convert   = ajAcdGetBoolean("convert");
    ensbases  = ajAcdGetString("nsbases");
    etloop    = ajAcdGetBoolean("tetraloop");
    eenergy   = ajAcdGetListSingle("energy");
    escale    = ajAcdGetFloat("scale");
    edangles  = ajAcdGetListSingle("dangles");
    outf      = ajAcdGetOutfile("outfile");
    essfile   = ajAcdGetOutfile("ssoutfile");
    /*
      dotfilea  = ajAcdGetOutfile("adotoutfile");
      dotfileb  = ajAcdGetOutfile("bdotoutfile");
    */
    
    do_backtrack = 2; 
    pf = 0;
    string = NULL;
    istty = 0;

    temperature   = (double) eT;
    circ          = !!ecirc;
    noGU          = (eGU) ? 0 : 1;
    no_closingGU  = (eclose) ? 0 : 1;
    noLonelyPairs = (lonely) ? 0 : 1;
    noconv        = (convert) ? 0 : 1;
    ns_bases      = (ajStrGetLen(ensbases)) ? MAJSTRGETPTR(ensbases) : NULL;
    tetra_loop    = !!etloop;
    
    ewt = *ajStrGetPtr(eenergy);
    if(ewt == '0')
	energy_set = 0;
    else if(ewt == '1')
	energy_set = 1;
    else if(ewt == '2')
	energy_set = 2;
    
    sfact = (double) escale;
    
    edangle = *ajStrGetPtr(edangles);
    if(edangle == '0')
	dangles = 0;
    else if(edangle == '1')
	dangles = 1;
    else if(edangle == '2')
	dangles = 2;
    else if(edangle == '3')
	dangles = 3;


    if(circ && noLonelyPairs)
    {

        ajWarn("Depending on the origin of the circular sequence\n"
               "some structures may be missed when using -noLP\nTry "
               "rotating your sequence a few times\n");        
    }


    if(paramfile)
	read_parameter_file(paramfile);
   
    if (ns_bases != NULL)
    {
	nonstandards = space(33);
	c=ns_bases;
	i=sym=0;

	if (*c=='-')
	{
	    sym=1; c++;
	}

	while (*c!='\0')
	{
	    if (*c!=',')
	    {
		nonstandards[i++]=*c++;
		nonstandards[i++]=*c;
		if ((sym)&&(*c!=*(c-1)))
		{
		    nonstandards[i++]=*c;
		    nonstandards[i++]=*(c-1);
		}
	    }
	    c++;
	}
    }


    if(confile)
	vienna_GetConstraints(confile,&constring);
    
    string = NULL;
    structure = NULL;

    length = ajSeqGetLen(seq);
    string = (char *) space(length+1);
    strcpy(string,ajSeqGetSeqC(seq));

    len = ajStrGetLen(constring);
    structure = (char *) space(length+1);
    if(len)
    {
	fold_constrained = 1;
	strcpy(structure,ajStrGetPtr(constring));
    }
    

    for (l = 0; l < length; l++) {
        string[l] = toupper(string[l]);
        if (!noconv && string[l] == 'T') string[l] = 'U';
    }

    /* initialize_fold(length); */
    if (circ)
        min_en = circfold(string, structure);
    else
        min_en = fold(string, structure);

    ajFmtPrintF(outf,"%s\n%s", string, structure);
    if (istty)
        printf("\n minimum free energy = %6.2f kcal/mol\n", min_en);
    else
        ajFmtPrintF(outf," (%6.2f)\n", min_en);

    if (!noPS)
    {
        if (length<2000)
            (void) PS_rna_plot(string, structure, essfile);
        else
            ajWarn("Structure too long, not doing xy_plot\n");
    }
    if (length>=2000) free_arrays(); 

    if (pf)
    {
        char *pf_struc;
        pf_struc = (char *) space((unsigned) length+1);
	if (dangles==1)
        {
            dangles=2;   /* recompute with dangles as in pf_fold() */
            min_en = (circ) ? energy_of_circ_struct(string, structure) :
                energy_of_struct(string, structure);
            dangles=1;
        }

        kT = (temperature+273.15)*1.98717/1000.; /* in Kcal */
        pf_scale = exp(-(sfact*min_en)/kT/length);

        if (length>2000)
            ajWarn("scaling factor %f\n", pf_scale);

        (circ) ? init_pf_circ_fold(length) : init_pf_fold(length);

        if (cstruc!=NULL)
            strncpy(pf_struc, cstruc, length+1);

        energy = (circ) ? pf_circ_fold(string, pf_struc) :
            pf_fold(string, pf_struc);

        if (do_backtrack)
        {
            ajFmtPrintF(outf,"%s", pf_struc);
            ajFmtPrintF(outf," [%6.2f]\n", energy);
        }

        if ((istty)||(!do_backtrack))
            ajFmtPrintF(outf," free energy of ensemble = %6.2f kcal/mol\n",
                        energy);

        if (do_backtrack)
        {
            plist *pl1,*pl2;
            char *cent;
            double dist, cent_en;
            cent = centroid(length, &dist);
            cent_en = (circ) ? energy_of_circ_struct(string, cent) :
                energy_of_struct(string, cent);
            ajFmtPrintF(outf,"%s {%6.2f d=%.2f}\n", cent, cent_en, dist);
            free(cent);

            pl1 = make_plist(length, 1e-5);
            pl2 = b2plist(structure);
            (void) PS_dot_plot_list(string, dotfilea, pl1, pl2, "");
            free(pl2);
            if (do_backtrack==2)
            {
                pl2 = stackProb(1e-5);
                PS_dot_plot_list(string, dotfileb, pl1, pl2,
                                 "Probabilities for stacked pairs (i,j)(i+1,j-1)");
                free(pl2);
            }
            free(pl1);
            free(pf_struc);
        }

        ajFmtPrintF(outf," frequency of mfe structure in ensemble %g; ",
                    exp((energy-min_en)/kT));

        if (do_backtrack)
            ajFmtPrintF(outf,"ensemble diversity %-6.2f", mean_bp_dist(length));

        ajFmtPrintF(outf,"\n");
        free_pf_arrays();

    }

    if (cstruc!=NULL)
        free(cstruc);

    free(string);
    free(structure);

    ajStrDel(&seqstring);
    ajStrDel(&constring);
    ajStrDel(&seqname);

    ajStrDel(&ensbases);
    ajStrDel(&eenergy);
    ajStrDel(&edangles);

    ajSeqDel(&seq);

    ajFileClose(&confile);
    ajFileClose(&paramfile);
    ajFileClose(&outf);
    ajFileClose(&essfile);

/*
  ajFileClose(&dotfilea);
  ajFileClose(&dotfileb);
*/  
    if (length<2000) free_arrays(); 
    embExit();
    
    return 0;
}
Ejemplo n.º 7
0
int main(int argc, char *argv[])
{
    char *sequence;
    char *structure = NULL;
    char  *ns_bases = NULL, *c;
    int   i, length, l, sym;
    int   istty;
    double deltap=0.;
    int delta=100;
    int n_back = 0;
    int noconv=0;
    int circ=0;
    int dos=0;
    
    AjPSeq  seq     = NULL;
    AjPFile confile = NULL;
    AjPFile paramfile = NULL;
    AjPFile outf = NULL;
    

    AjPStr constring = NULL;
  
    float eT = 0.;
    AjBool eGU;
  
    AjBool eclose;
    AjBool lonely;
    AjBool convert;
    AjPStr ensbases = NULL;
    AjBool etloop;
    AjPStr edangles = NULL;
    char edangle = '\0';

    ajint len;
    float erange;
    float prange;
   

    embInitPV("vrnasubopt",argc,argv,"VIENNA",VERSION);
    
    
    constring = ajStrNew();
    
    seq           = ajAcdGetSeq("sequence");
    confile       = ajAcdGetInfile("constraintfile");
    paramfile     = ajAcdGetInfile("paramfile");
    eT            = ajAcdGetFloat("temperature");
    circ          = !!ajAcdGetBoolean("circular");
    dos           = !!ajAcdGetBoolean("dos");
    eGU           = ajAcdGetBoolean("gu");
    eclose        = ajAcdGetBoolean("closegu");
    lonely        = ajAcdGetBoolean("lp");
    convert       = ajAcdGetBoolean("convert");
    ensbases      = ajAcdGetString("nsbases");
    etloop        = ajAcdGetBoolean("tetraloop");
    erange        = ajAcdGetFloat("erange");
    prange        = ajAcdGetFloat("prange");
    subopt_sorted = !!ajAcdGetBoolean("sort");
    logML         = !!ajAcdGetBoolean("logml");
    n_back        = ajAcdGetInt("nrandom");
   
    edangles      = ajAcdGetListSingle("dangles");
    outf      = ajAcdGetOutfile("outfile");

    if(dos)
        print_energy = -999999;

    do_backtrack = 1;
   
    istty = 0;

    temperature   = (double) eT;
    noGU          = (eGU) ? 0 : 1;
    no_closingGU  = (eclose) ? 0 : 1;
    noLonelyPairs = (lonely) ? 0 : 1;
    noconv        = (convert) ? 0 : 1;
    ns_bases      = (ajStrGetLen(ensbases)) ? MAJSTRGETPTR(ensbases) : NULL;
    tetra_loop    = !!etloop;

    delta = (int) (0.1 + erange * 100);
    deltap = prange;
    
    edangle = *ajStrGetPtr(edangles);
    if(edangle == '0')
	dangles = 0;
    else if(edangle == '1')
	dangles = 1;
    else if(edangle == '2')
	dangles = 2;
    else if(edangle == '3')
	dangles = 3;


    if(paramfile)
	read_parameter_file(paramfile);


   
    if (ns_bases != NULL)
    {
	nonstandards = space(33);
	c=ns_bases;
	i=sym=0;
	if (*c=='-')
	{
	    sym=1;
	    c++;
	}
	while (*c)
	{
	    if (*c!=',')
	    {
		nonstandards[i++]=*c++;
		nonstandards[i++]=*c;
		if ((sym)&&(*c!=*(c-1)))
		{
		    nonstandards[i++]=*c;
		    nonstandards[i++]=*(c-1);
		}
	    }
	    c++;
	}
    }


    if(confile)
	vienna_GetConstraints(confile,&constring);
    

    if(n_back)
        init_rand();

    
    sequence  = NULL;
    structure = NULL;

    length = ajSeqGetLen(seq);
    sequence = (char *) space(length+1);
    strcpy(sequence,ajSeqGetSeqC(seq));

    len = ajStrGetLen(constring);
    structure = (char *) space(length+1);
    if(len)
    {
	fold_constrained = 1;
	strcpy(structure,ajStrGetPtr(constring));
    }
    
    istty = 0;

    if (fold_constrained)
    {
	for (i=0; i<length; i++)
	    if (structure[i]=='|')
		ajFatal("Constraints of type '|' are not allowed\n");
    }      
      
    for (l = 0; l < length; l++)
    {
        sequence[l] = toupper(sequence[l]);
        if (!noconv && sequence[l] == 'T')
            sequence[l] = 'U';
    }
    
    if ((logML!=0 || dangles==1 || dangles==3) && dos==0)
	if (deltap<=0) deltap=delta/100. +0.001;
    if (deltap>0)
	print_energy = deltap;

    /* first lines of output (suitable  for sort +1n) */

    ajFmtPrintF(outf,"> %s [%d]\n", ajSeqGetNameC(seq), delta);

    if(n_back>0)
    {
	int i;
	double mfe, kT;
	char *ss;
	st_back=1;
	ss = (char *) space(strlen(sequence)+1);
	strncpy(ss, structure, length);
	mfe = (circ) ? circfold(sequence, ss) : fold(sequence, ss);
	kT = (temperature+273.15)*1.98717/1000.; /* in Kcal */
	pf_scale = exp(-(1.03*mfe)/kT/length);
	strncpy(ss, structure, length);
        /*
        ** we are not interested in the free energy but in the bppm, so we
        ** drop free energy into the void
        */
        (circ) ? (void) pf_circ_fold(sequence, ss) :
            (void) pf_fold(sequence, ss);
	free(ss);
	for (i=0; i<n_back; i++)
	{
	    char *s;
            s = (circ) ? pbacktrack_circ(sequence) : pbacktrack(sequence);
	    ajFmtPrintF(outf,"%s\n", s);
	    free(s);
	}
	free_pf_arrays();
    }
    else
    {
	(circ) ? subopt_circ(sequence, structure, delta, ajFileGetFileptr(outf)) :
            subopt(sequence, structure, delta, ajFileGetFileptr(outf));
    }
      

    free(sequence);
    free(structure); 

    ajSeqDel(&seq);
    ajStrDel(&ensbases);
    ajStrDel(&edangles);

    ajFileClose(&confile);
    ajFileClose(&outf);
    ajFileClose(&paramfile);

    embExit();

    return 0;
}
Ejemplo n.º 8
0
int main(int argc, char *argv[])

{
  float     *T[MAXSEQ];
  int        i,j, istty, n=0;
  int        type, length, taxa_list=0;
  float      dist;
  FILE      *somewhere=NULL;
  char      *structure;
  char      *line=NULL, fname[FILENAME_MAX_LENGTH], *list_title=NULL;
  plist     *pr_pl, *mfe_pl;

  pr_pl = mfe_pl = NULL;

  command_line(argc, argv);

  if((outfile[0]=='\0')&&(task=='m')&&(edit_backtrack))
    strcpy(outfile,"backtrack.file");
  if (outfile[0]!='\0') somewhere = fopen(outfile,"w");
  if (somewhere==NULL) somewhere = stdout;
  istty   = (isatty(fileno(stdout))&&isatty(fileno(stdin)));

  while (1) {
    if ((istty)&&(n==0)) {
      printf("\nInput sequence;  @ to quit\n");
      printf("%s\n", ruler);
    }

    type = 0;
    do {  /* get sequence to fold */
      if (line!=NULL) free(line);
      *fname='\0';
      if ((line=get_line(stdin))==NULL) {type = 999; break;}
      if (line[0]=='@') type = 999;
      if (line[0]=='*') {
        if (taxa_list==0) {
          if (task=='m') taxa_list=1;
          printf("%s\n", line);
          type = 0;
        } else {
          list_title = strdup(line);
          type = 888;
        }
      }
      if (line[0]=='>') {
        if (sscanf(line,">%" XSTR(FILENAME_ID_LENGTH) "s", fname)!=0)
          strcat(fname, "_dp.ps");
        if (taxa_list)
          printf("%d : %s\n", n+1, line+1);
        else printf("%s\n",line);
        type = 0;
      }
      if (isalpha(line[0]))  {
        char *cp;
        cp =strchr(line,' ');
        if (cp) *cp='\0';
        type = 1;
      }
    } while(type==0);

    if( (task == 'm')&&(type>800) ) {
      if (taxa_list)
        printf("* END of taxa list\n");
      printf("> p %d (pdist)\n",n);
      for (i=1; i<n; i++) {
        for (j=0; j<i; j++) {
          printf("%g ",profile_edit_distance(T[i], T[j]));
          if(edit_backtrack) fprintf(somewhere,"> %d %d\n",i+1,j+1);
          print_aligned_lines(somewhere);
        }
        printf("\n");
      }
      if (type==888) {  /* do another distance matrix */
        n = 0;
        printf("%s\n", list_title);
        free(list_title);
      }
    }

    if(type>800) {
      for (i=0; i<n; i++)
        free_profile(T[i]);
      if (type == 888) continue;
      if (outfile[0]!='\0') (void) fclose(somewhere);
      if (line!= NULL) free(line);
      return 0; /* finito */
    }

    length = (int) strlen(line);
    for (i=0; i<length; i++) {
      line[i]=toupper(line[i]);
      if (!noconv && line[i] == 'T') line[i] = 'U';
    }

    /* init_pf_fold(length); <- obsolete */
    structure = (char *) space((length+1)*sizeof(char));
    (void) pf_fold(line,structure);

    if (*fname=='\0')
      sprintf(fname, "%d_dp.ps", n+1);

    /* PS_dot_plot(line, fname); <- NOT THREADSAFE and obsolete function! */

    /* get pairlist of probability matrix */
    assign_plist_from_pr(&pr_pl, pr, length, 1e-5);
    /* no previous mfe call thus no mfe structure information known */
    mfe_pl = (plist *)space(sizeof(plist));
    mfe_pl[0].i = mfe_pl[0].j = 0;

    /* call threadsafe dot plot printing function */
    PS_dot_plot_list(line, fname, pr_pl, mfe_pl, "");

    T[n] = Make_bp_profile_bppm(pr, length);
    if((istty)&&(task=='m')) printf("%s\n",structure);
    free(structure);
    free(mfe_pl);
    free(pr_pl);
    free_pf_arrays();

    n++;
    switch (task) {
    case 'p' :
      if (n==2) {
        dist = profile_edit_distance(T[0],T[1]);
        printf("%g\n",dist);
        print_aligned_lines(somewhere);
        free_profile(T[0]);
        free_profile(T[1]);
        n=0;
      }
      break;
    case 'f' :
      if (n>1) {
        dist = profile_edit_distance(T[1], T[0]);
        printf("%g\n",dist);
        print_aligned_lines(somewhere);
        free_profile(T[1]);
        n=1;
      }
      break;
    case 'c' :
      if (n>1) {
        dist = profile_edit_distance(T[1], T[0]);
        printf("%g\n",dist);
        print_aligned_lines(somewhere);
        free_profile(T[0]);
        T[0] = T[1];
        n=1;
      }
      break;

    case 'm' :
      break;

    default :
      nrerror("This can't happen.");
    }    /* END switch task */
    (void) fflush(stdout);
  }    /* END while */
  if (line !=NULL) free(line);
  return 0;
}
Ejemplo n.º 9
0
int main(int argc, char *argv[])
{
  char *string, *line;
  char *structure=NULL, *cstruc=NULL;
  char  fname[13], ffname[20], gfname[20];
  char  *ParamFile=NULL;
  char  *ns_bases=NULL, *c;
  int   i, length, l, sym, r;
  double energy, min_en;
  double kT, sfact=1.07;
  int   pf=0, noPS=0, istty;
  int noconv=0;
  int circ=0;

  do_backtrack = 1;
  string=NULL;
  for (i=1; i<argc; i++) {
    if (argv[i][0]=='-')
      switch ( argv[i][1] )
	{
	case 'T':  if (argv[i][2]!='\0') usage();
	  if(i==argc-1) usage();
	  r=sscanf(argv[++i], "%lf", &temperature);
	  if (!r) usage();
	  break;
	case 'p':  pf=1;
	  if (argv[i][2]!='\0')
	    (void) sscanf(argv[i]+2, "%d", &do_backtrack);
	  break;
	case 'n':
	  if ( strcmp(argv[i], "-noGU")==0) noGU=1;
	  if ( strcmp(argv[i], "-noCloseGU")==0) no_closingGU=1;
	  if ( strcmp(argv[i], "-noLP")==0) noLonelyPairs=1;
	  if ( strcmp(argv[i], "-noPS")==0) noPS=1;
	  if ( strcmp(argv[i], "-nsp") ==0) {
	    if (i==argc-1) usage();
	    ns_bases = argv[++i];
	  }
	  if ( strcmp(argv[i], "-noconv")==0) noconv=1;
	  break;
	case '4':
	  tetra_loop=0;
	  break;
	case 'e':
	  if(i==argc-1) usage();
	  r=sscanf(argv[++i],"%d", &energy_set);
	  if (!r) usage();
	  break;
	case 'C':
	  fold_constrained=1;
	  break;
	case 'c':
	  if ( strcmp(argv[i], "-circ")==0) circ=1;
	  break;
	case 'S':
	  if(i==argc-1) usage();
	  r=sscanf(argv[++i],"%lf", &sfact);
	  if (!r) usage();
	  break;
	case 'd': dangles=0;
	  if (argv[i][2]!='\0') {
	    r=sscanf(argv[i]+2, "%d", &dangles);
	    if (r!=1) usage();
	  }
	  break;
	case 'P':
	  if (i==argc-1) usage();
	  ParamFile = argv[++i];
	  break;
	default: usage();
	}
  }

  if (circ && noLonelyPairs)
    fprintf(stderr, "warning, depending on the origin of the circular sequence, some structures may be missed when using -noLP\nTry rotating your sequence a few times\n");
  if (ParamFile != NULL)
    read_parameter_file(ParamFile);

  if (ns_bases != NULL) {
    nonstandards = space(33);
    c=ns_bases;
    i=sym=0;
    if (*c=='-') {
      sym=1; c++;
    }
    while (*c!='\0') {
      if (*c!=',') {
	nonstandards[i++]=*c++;
	nonstandards[i++]=*c;
	if ((sym)&&(*c!=*(c-1))) {
	  nonstandards[i++]=*c;
	  nonstandards[i++]=*(c-1);
	}
      }
      c++;
    }
  }
  istty = isatty(fileno(stdout))&&isatty(fileno(stdin));
  if ((fold_constrained)&&(istty)) {
    printf("Input constraints using the following notation:\n");
    printf("| : paired with another base\n");
    printf(". : no constraint at all\n");
    printf("x : base must not pair\n");
    printf("< : base i is paired with a base j<i\n");
    printf("> : base i is paired with a base j>i\n");
    printf("matching brackets ( ): base i pairs base j\n");
  }

  do {				/* main loop: continue until end of file */
    if (istty) {
      printf("\nInput string (upper or lower case); @ to quit\n");
      printf("%s%s\n", scale1, scale2);
    }
    fname[0]='\0';
    if ((line = get_line(stdin))==NULL) break;

    /* skip comment lines and get filenames */
    while ((*line=='*')||(*line=='\0')||(*line=='>')) {
      if (*line=='>')
	(void) sscanf(line, ">%12s", fname);
      printf("%s\n", line);
      free(line);
      if ((line = get_line(stdin))==NULL) break;
    }

    if ((line ==NULL) || (strcmp(line, "@") == 0)) break;

    string = (char *) space(strlen(line)+1);
    (void) sscanf(line,"%s",string);
    free(line);
    length = (int) strlen(string);

    structure = (char *) space((unsigned) length+1);
    if (fold_constrained) {
      cstruc = get_line(stdin);
      if (cstruc!=NULL)
	strncpy(structure, cstruc, length);
      else
	fprintf(stderr, "constraints missing\n");
    }
    for (l = 0; l < length; l++) {
      string[l] = toupper(string[l]);
      if (!noconv && string[l] == 'T') string[l] = 'U';
    }
    if (istty)
      printf("length = %d\n", length);

    /* initialize_fold(length); */
    if (circ)
      min_en = circfold(string, structure);
    else
      min_en = fold(string, structure);
    printf("%s\n%s", string, structure);
    if (istty)
      printf("\n minimum free energy = %6.2f kcal/mol\n", min_en);
    else
      printf(" (%6.2f)\n", min_en);

    (void) fflush(stdout);

    if (fname[0]!='\0') {
      strcpy(ffname, fname);
      strcat(ffname, "_ss.ps");
      strcpy(gfname, fname);
      strcat(gfname, "_ss.g");
    } else {
      strcpy(ffname, "rna.ps");
      strcpy(gfname, "rna.g");
    }
    if (!noPS) {
      if (length<2000)
	(void) PS_rna_plot(string, structure, ffname);
      else 
	fprintf(stderr,"INFO: structure too long, not doing xy_plot\n");
    }
    if (length>2000) free_arrays(); 
    if (pf) {
      char *pf_struc;
      pf_struc = (char *) space((unsigned) length+1);
	if (dangles==1) {
	  dangles=2;   /* recompute with dangles as in pf_fold() */
	  min_en = (circ) ? energy_of_circ_struct(string, structure) : energy_of_struct(string, structure);
	  dangles=1;
      }

      kT = (temperature+273.15)*1.98717/1000.; /* in Kcal */
      pf_scale = exp(-(sfact*min_en)/kT/length);
      if (length>2000) fprintf(stderr, "scaling factor %f\n", pf_scale);

      (circ) ? init_pf_circ_fold(length) : init_pf_fold(length);

      if (cstruc!=NULL)
	strncpy(pf_struc, cstruc, length+1);
      energy = (circ) ? pf_circ_fold(string, pf_struc) : pf_fold(string, pf_struc);

      if (do_backtrack) {
	printf("%s", pf_struc);
	if (!istty) printf(" [%6.2f]\n", energy);
	else printf("\n");
      }
      if ((istty)||(!do_backtrack))
	printf(" free energy of ensemble = %6.2f kcal/mol\n", energy);
      if (do_backtrack) {
	plist *pl1,*pl2;
	char *cent;
	double dist, cent_en;
	cent = centroid(length, &dist);
	cent_en = (circ) ? energy_of_circ_struct(string, cent) :energy_of_struct(string, cent);
	printf("%s {%6.2f d=%.2f}\n", cent, cent_en, dist);
	free(cent);
	if (fname[0]!='\0') {
	  strcpy(ffname, fname);
	  strcat(ffname, "_dp.ps");
	} else strcpy(ffname, "dot.ps");
	pl1 = make_plist(length, 1e-5);
	pl2 = b2plist(structure);
	(void) PS_dot_plot_list(string, ffname, pl1, pl2, "");
	free(pl2);
	if (do_backtrack==2) {
	  pl2 = stackProb(1e-5);
	  if (fname[0]!='\0') {
	    strcpy(ffname, fname);
	    strcat(ffname, "_dp2.ps");
	  } else strcpy(ffname, "dot2.ps");
	  PS_dot_plot_list(string, ffname, pl1, pl2,
			   "Probabilities for stacked pairs (i,j)(i+1,j-1)");
	  free(pl2);
	}
	free(pl1);
	free(pf_struc);
      }
      printf(" frequency of mfe structure in ensemble %g; ",
	     exp((energy-min_en)/kT));
      if (do_backtrack)
	printf("ensemble diversity %-6.2f", mean_bp_dist(length));

      printf("\n");
      free_pf_arrays();

    }
    if (cstruc!=NULL) free(cstruc);
    (void) fflush(stdout);
    free(string);
    free(structure);
  } while (1);
  return 0;
}
Ejemplo n.º 10
0
int main(int argc, char *argv[]){
  struct        RNAsubopt_args_info args_info;
  unsigned int  input_type;
  char          fname[80], *cstruc, *sequence, *c, *input_string;
  char          *structure = NULL, *ParamFile = NULL, *ns_bases = NULL;
  int           i, length, l, sym, istty;
  double        deltaf, deltap;
  int           delta, n_back, noconv, circular, dos, zuker;

  do_backtrack  = 1;
  dangles       = 2;
  delta         = 100;
  deltap = n_back = noconv = circular = dos = zuker = 0;
  /*
  #############################################
  # check the command line parameters
  #############################################
  */
  if(RNAsubopt_cmdline_parser (argc, argv, &args_info) != 0) exit(1);
  /* temperature */
  if(args_info.temp_given)        temperature = args_info.temp_arg;
  /* structure constraint */
  if(args_info.constraint_given)  fold_constrained=1;
  /* do not take special tetra loop energies into account */
  if(args_info.noTetra_given)     tetra_loop=0;
  /* set dangle model */
  if(args_info.dangles_given)     dangles = args_info.dangles_arg;
  /* do not allow weak pairs */
  if(args_info.noLP_given)        noLonelyPairs = 1;
  /* do not allow wobble pairs (GU) */
  if(args_info.noGU_given)        noGU = 1;
  /* do not allow weak closing pairs (AU,GU) */
  if(args_info.noClosingGU_given) no_closingGU = 1;
  /* do not convert DNA nucleotide "T" to appropriate RNA "U" */
  if(args_info.noconv_given)      noconv = 1;
  /* take another energy parameter set */
  if(args_info.paramFile_given)   ParamFile = strdup(args_info.paramFile_arg);
  /* Allow other pairs in addition to the usual AU,GC,and GU pairs */
  if(args_info.nsp_given)         ns_bases = strdup(args_info.nsp_arg);
  /* energy range */
  if(args_info.deltaEnergy_given) delta = (int) (0.1+args_info.deltaEnergy_arg*100);
  /* energy range after post evaluation */
  if(args_info.deltaEnergyPost_given) deltap = args_info.deltaEnergyPost_arg;
  /* sorted output */
  if(args_info.sorted_given)      subopt_sorted = 1;
  /* assume RNA sequence to be circular */
  if(args_info.circ_given)        circular=1;
  /* stochastic backtracking */
  if(args_info.stochBT_given){
    n_back = args_info.stochBT_arg;
    init_rand();
  }
  /* density of states */
  if(args_info.dos_given){
    dos = 1;
    print_energy = -999999;
  }
  /* logarithmic multiloop energies */
  if(args_info.logML_given) logML = 1;
  /* zuker subopts */
  if(args_info.zuker_given) zuker = 1;

  if(zuker){
    if(circular){
      warn_user("Sorry, zuker subopts not yet implemented for circfold");
      RNAsubopt_cmdline_parser_print_help();
      exit(1);
    }
    else if(n_back>0){
      warn_user("Can't do zuker subopts and stochastic subopts at the same time");
      RNAsubopt_cmdline_parser_print_help();
      exit(1);
    }
  }

  /* free allocated memory of command line data structure */
  RNAsubopt_cmdline_parser_free(&args_info);

  /*
  #############################################
  # begin initializing
  #############################################
  */

  if (ParamFile != NULL) read_parameter_file(ParamFile);

  if (ns_bases != NULL) {
    nonstandards = space(33);
    c=ns_bases;
    i=sym=0;
    if (*c=='-') {
      sym=1; c++;
    }
    while (*c!='\0') {
      if (*c!=',') {
        nonstandards[i++]=*c++;
        nonstandards[i++]=*c;
        if ((sym)&&(*c!=*(c-1))) {
          nonstandards[i++]=*c;
          nonstandards[i++]=*(c-1);
        }
      }
      c++;
    }
  }

  istty = isatty(fileno(stdout))&&isatty(fileno(stdin));

  if(fold_constrained && istty) print_tty_constraint(VRNA_CONSTRAINT_DOT | VRNA_CONSTRAINT_X);


  /*
  #############################################
  # main loop: continue until end of file
  #############################################
  */
  do {
    cut_point = -1;
    /*
    ########################################################
    # handle user input from 'stdin'
    ########################################################
    */
    if(istty){ 
      if (!zuker)
        printf("Use '&' to connect 2 sequences that shall form a complex.\n");
      print_tty_input_seq();
    }
    /* extract filename from fasta header if available */
    fname[0] = '\0';
    while((input_type = get_input_line(&input_string, 0)) == VRNA_INPUT_FASTA_HEADER){
      printf(">%s\n", input_string);
      (void) sscanf(input_string, "%42s", fname);
      free(input_string);
    }

    /* break on any error, EOF or quit request */
    if(input_type & (VRNA_INPUT_QUIT | VRNA_INPUT_ERROR)){ break;}
    /* else assume a proper sequence of letters of a certain alphabet (RNA, DNA, etc.) */
    else{
      sequence  = tokenize(input_string); /* frees input_string */
      length    = (int) strlen(sequence);
    }
    structure = (char *) space((unsigned) length+1);

    if(noconv)  str_RNA2RNA(sequence);
    else        str_DNA2RNA(sequence);

    if(istty){
      if (cut_point == -1)
        printf("length = %d\n", length);
      else
        printf("length1 = %d\nlength2 = %d\n", cut_point-1, length-cut_point+1);
    }

    /* get structure constraint or break if necessary, entering an empty line results in a warning */
    if (fold_constrained) {
      input_type = get_input_line(&input_string, VRNA_INPUT_NOSKIP_COMMENTS);
      if(input_type & VRNA_INPUT_QUIT){ break;}
      else if((input_type & VRNA_INPUT_MISC) && (strlen(input_string) > 0)){
        cstruc = tokenize(input_string);
        strncpy(structure, cstruc, length);
        for (i=0; i<length; i++)
          if (structure[i]=='|')
            nrerror("constraints of type '|' not allowed");
        free(cstruc);
      }
      else warn_user("constraints missing");
    }
    /*
    ########################################################
    # done with 'stdin' handling, now init everything properly
    ########################################################
    */

    if((logML != 0 || dangles==1 || dangles==3) && dos == 0)
      if(deltap<=0) deltap = delta/100. + 0.001;
    if (deltap>0)
      print_energy = deltap;

    /* first lines of output (suitable  for sort +1n) */
    if (fname[0] != '\0')
      printf("> %s [%d]\n", fname, delta);

    /* stochastic backtracking */
    if(n_back>0){
      double mfe, kT;
      char *ss;
      st_back=1;
      ss = (char *) space(strlen(sequence)+1);
      strncpy(ss, structure, length);
      mfe = fold(sequence, ss);
      kT = (temperature+273.15)*1.98717/1000.; /* in Kcal */
      pf_scale = exp(-(1.03*mfe)/kT/length);
      strncpy(ss, structure, length);
      /* ignore return value, we are not interested in the free energy */
      (circular) ? (void) pf_circ_fold(sequence, ss) : (void) pf_fold(sequence, ss);
      free(ss);
      for (i=0; i<n_back; i++) {
        char *s;
        s =(circular) ? pbacktrack_circ(sequence) : pbacktrack(sequence);
        printf("%s\n", s);
        free(s);
      }
      free_pf_arrays();
    }
    /* normal subopt */
    else if(!zuker){
      (circular) ? subopt_circ(sequence, structure, delta, stdout) : subopt(sequence, structure, delta, stdout);
      if (dos) {
        int i;
        for (i=0; i<= MAXDOS && i<=delta/10; i++) {
          printf("%4d %6d\n", i, density_of_states[i]);
        }
      }
    }
    /* Zuker suboptimals */
    else{
      SOLUTION *zr;
      int i;
      if (cut_point!=-1) {
        nrerror("Sorry, zuker subopts not yet implemented for cofold\n");
      }
      zr = zukersubopt(sequence);
      putoutzuker(zr);
      (void)fflush(stdout);
      for (i=0; zr[i].structure; i++) {
        free(zr[i].structure);
      }
      free(zr);
    }
    (void)fflush(stdout);
    free(sequence);
    free(structure);
  } while (1);
  return 0;
}
Ejemplo n.º 11
0
/*--------------------------------------------------------------------------*/
int main(int argc, char *argv[])
{
  char *string1=NULL, *string2=NULL, *dummy=NULL, *temp=NULL, *line=NULL;
  char *structure=NULL, *cstruc=NULL, *cstruc_l=NULL, *cstruc_s=NULL;
  char fname[53], ffname[53], temp_name[201], first_name[53], my_contrib[10];
  char up_out[250], unstrs[201], name[400], cmd_line[500];
  char *ParamFile=NULL;
  char *ns_bases=NULL, *c,*head;
  int  i, length1,length2,length, l, sym, r, *u_vals, Switch, header,output;
  double energy, min_en;
  double sfact=1.07;
  int   istty;
  int noconv=0;
  /* variables for output */
  pu_contrib *unstr_out, *unstr_short;
  interact *inter_out;
  /* pu_out *longer; */
  char *title;
  /* commandline parameters */
  int w;       /* length of region of interaction */
  int incr3;   /* add x unpaired bases after 3'end of short RNA*/
  int incr5;   /* add x unpaired bases after 5'end of short RNA*/
  int unstr;   /* length of unpaired region for output*/
  int upmode ; /* 1 compute only pf_unpaired, >1 compute interactions 
		  2 compute intra-molecular structure only for long RNA, 3 both RNAs */
  int task;    /* input mode for calculation of interaction */
  /* default settings for RNAup */
  head = NULL;/* header text - if header wanted, see header */
  header = 1; /* if header is 0 print no header in output file: option -nh */
  output = 1; /* if output is 0 make no output file: option -o */
  Switch = 1; /* the longer sequence is selected as the target */
  task=0;
  upmode = 1; /* default is one sequence, option -X[p|f] has to be set
		 for the calculation of an interaction, if no "&" is in
		 the sequence string  */
  unstrs[0]='\0';
  default_u = 4;
  unstr=default_u;
  default_w = 25;
  w=default_w;
  u_vals=NULL;
  incr3=0;
  incr5=0;
  do_backtrack = 1;
  length1=length2=0;
  title=NULL;
  unstr_out=NULL;
  inter_out=NULL;
  my_contrib[0] = 'S';
  my_contrib[1] = '\0';
  first_name[0] = '\0';

  /* collect the command line  */
  sprintf(cmd_line,"RNAup ");
  length = 0;
  for (i=1; i<argc; i++) {
    r=sscanf(argv[i], "%100s", &temp_name);
    length+=r+1;
    if(length > 500) break;
    strcat(cmd_line, temp_name);
    strcat(cmd_line," ");
  }
  length = 0;
  
  for (i=1; i<argc; i++) {
    if (argv[i][0]=='-') 
      switch ( argv[i][1] )
	{
	case 'T':  if (argv[i][2]!='\0') usage();
	  if (i==argc-1) usage();
	  r=sscanf(argv[++i], "%lf", &temperature);
	  if (!r) usage();
	  break;
	case 'w':
	  /* -w maximal length of unstructured region */  
	  if (i==argc-1) usage();
	  r=sscanf(argv[++i],"%d", &w);
	  if (!r) usage();
	  break;
	case 't':
	  /* use the first sequence as the target */
	  if ( strcmp(argv[i], "-target")==0) {
	    Switch=0;
	  }
	  break;
	case 'o':
	  /* make no output file */
	  output=0;
	  break; 
	case 'n':
	  if ( strcmp(argv[i], "-nh")==0) {
	    header=0;
	  }
	  if ( strcmp(argv[i], "-noGU")==0) {
	    noGU=1;
	  }
	  if ( strcmp(argv[i], "-noCloseGU")==0) {
	    no_closingGU=1;
	  }
	  if ( strcmp(argv[i], "-noLP")==0) {
	    noLonelyPairs=1;
	  }
	  if ( strcmp(argv[i], "-nsp") ==0) {
	    if (i==argc-1) usage();
	    ns_bases = argv[++i];
	  }
	  if ( strcmp(argv[i], "-noconv")==0) {
	    noconv=1;
	  }
	  break;
	case '4':
	  tetra_loop=0;
	  break;
	case 'e':
	  if (i==argc-1) usage();
	  r=sscanf(argv[++i],"%d", &energy_set);
	  if (!r) usage();
	  break;
	case 'C':
	  fold_constrained=1;
	  break;
	case 'S':
	  if (i==argc-1) usage();
	  r=sscanf(argv[++i],"%lf", &sfact);
	  if (!r) usage();
	  break;
	case 'd': dangles=0;
	  if (argv[i][2]!='\0') {
	    r=sscanf(argv[i]+2, "%d", &dangles);
	    if (r!=1) usage();
	  }
	  break;
	case 'b': upmode=3;
	  break;
	case 'X':
	  /* interaction mode invoked */
	  if (upmode == 1) upmode=2;
	  switch (argv[i][2]) { /* now determine which sequences interact */
	  case 'p': task=1;
	    break; /* pairwise interaction */
	  case 'f': task=2;
	    break; /* first one interacts with all others */
	  }
	  break;
	case 'u':
	  /* -u length of unstructured region in pr_unpaired output */  
	  if (i==argc-1) usage();
	  r=sscanf(argv[++i],"%200s", unstrs);
	  if (!r) usage();
	  if (!isdigit(unstrs[0])) usage();
	  break;
	  /* incr5 and incr3 are only for the longer (target) sequence */
	  /* increments w (length of the unpaired region) to incr5+w+incr3*/
	  /* the longer sequence is given in 5'(= position 1) to */
	  /* 3' (=position n) direction */
	  /* incr5 adds incr5 residues to the 5' end of w */
	case '5':
	  if (i==argc-1) usage();
	  r=sscanf(argv[++i],"%d", &incr5);
	  if (!r) usage();
	  break; 
	  /* incr3 adds incr3 residues to the 3' end of w */
	case '3':
	  if (i==argc-1) usage();
	  r=sscanf(argv[++i],"%d", &incr3);
	  if (!r) usage();
	  break;
	case 'P':
	  if (i==argc-1) usage();
	  ParamFile = argv[++i];
	  break;
	case 'c':  
	  if (i==argc-1) usage();
	  r=sscanf(argv[++i], "%6s", my_contrib);
	  if (!r) usage();
	  break;  
	default: usage();
	} 
  }
  cmd_line[strlen(cmd_line)] = '\0';
  if (dangles>0) dangles=2; /* only 0 or 2 allowed */
  if (ParamFile != NULL)
    read_parameter_file(ParamFile);
   
  if (ns_bases != NULL) {
    nonstandards = space(33);
    c=ns_bases;
    i=sym=0;
    if (*c=='-') {
      sym=1; c++;
    }
    while (*c!='\0') {
      if (*c!=',') {
	nonstandards[i++]=*c++;
	nonstandards[i++]=*c;
	if ((sym)&&(*c!=*(c-1))) {
	  nonstandards[i++]=*c;
	  nonstandards[i++]=*(c-1);
	}
      }
      c++;
	    
    }
  }
  istty = isatty(fileno(stdout))&&isatty(fileno(stdin));
  if ((fold_constrained)&&(istty)) {
    printf("Input constraints using the following notation:\n");      
    printf(". : no constraint at all\n");
    printf("x : base must not pair\n");
    printf("matching brackets ( ): base i pairs base j\n");
    printf("constraints for intramolecular folding only:\n"); 
    printf("< : base i is intramolecularly paired with a base j<i\n");
    printf("> : base i is intramolecularly paired with a base j>i\n");    
    printf("constraints for cofolding (intermolecular folding) only:\n");
    printf("| : paired with another base intermolecularly\n");        
  } 
 
  RT = ((temperature+K0)*GASCONST/1000.0);	
  do {	/* main loop: continue until end of file */
    cut_point=-1;
    if (istty) {
      if (upmode == 1) {
	printf("\nInput string (upper or lower case); @ to quit\n");
	printf("%s%s\n", scale1, scale2);
      }
      else if (upmode > 1) {
	if (task == 1 || (task == 0 && upmode == 3)) {
	  printf("\nUse either '&' to connect the 2 sequences or give each sequence on an extra line.\n"); 
	  printf("%s%s\n", scale1, scale2);
	}
	else if (task == 2) { /* option -Xf read the first two seqs */
	  printf("\nGive each sequence on an extra line. The first seq. is stored, every other seq. is compared to the first one.\n"); 
	  printf("%s%s\n", scale1, scale2);
	}
	else if (task == 3) {/* option -Xf read another sequence which
				will interact with the first one */
	  printf("\nEnter another sequence.\n"); 
	  printf("%s%s\n", scale1, scale2); 
	}
      }
    }
    fname[0]='\0';
    ffname[0]='\0';
    /* read the first sequence */
    if ((line = get_line(stdin))==NULL) break;

    /* skip comment lines and get filenames */
    while ((*line=='*')||(*line=='\0')||(*line=='>')) {
      if (*line=='>')
	(void) sscanf(line, ">%51s", fname);
      free(line);
      line=NULL;
      if ((line = get_line(stdin))==NULL) break;
    } 
    if ((line == NULL) || (strcmp(line, "@") == 0)) break;

    if (first_name[0] == '\0' && fname[0] !='\0' && task == 2) {
      strncpy(first_name,fname,30);
      first_name[30] = '\0';
    }
    /* if upmode == 2: check if the sequences are seperated via "&" (cut_point > -1) or given on extra lines */
    if (task < 3) {
      tokenize(line,&string1,&string2);
      if (task == 2 && cut_point != -1) task = 3;
      /* two sequences with & are given: calculate interaction */
      if (task == 0 && cut_point != -1) {
	task = 1;
	if (upmode == 1) upmode = 2;
      }
    }
    else if (task == 3) { /* option -Xf*/
      strncpy(ffname,fname,30);
      ffname[30] = '\0';
      strncpy(fname,first_name,30);  /* first_name: name of first seq */
      fname[30] = '\0';
      if (temp != NULL) { /*strings have been switched - write temp to string1*/
	string1 = (char *) xrealloc (string1,sizeof(char)*strlen(temp)+1);
	(void) sscanf(temp,"%s",string1);
	free(temp);temp=NULL;
	
      }	
      tokenize(line,&string2,&dummy); /*compare every seq to first one given */
      free(dummy);dummy=NULL;
      if (cut_point != -1) {
	nrerror(
	   "After the first sequence pair: Input a single sequence (no &)!\n"
	   "Each input seq. is compared to the first seq. given.\n");
      }
    }
    /* interaction mode -> get the second seq. if seq are on seperate lines*/
    if (upmode > 1){ /* interaction mode */
      if (cut_point == -1 && task < 3) { /* seqs are given on seperate lines */
	/* read the second sequence */
	if (task == 2) task = 3;
	if ((line = get_line(stdin))==NULL) {
	  nrerror("only one sequence - can not cofold one sequence!");
	}
	/* skip comment lines and get filenames */
	while ((*line=='*')||(*line=='\0')||(*line=='>')) {
	  if (*line=='>')
	    (void) sscanf(line, ">%51s", ffname); /* name of the 2nd seq */
	  free(line);
	  line=NULL;
	  if ((line = get_line(stdin))==NULL) break;
	} 
	if ((line ==NULL) || (strcmp(line, "@") == 0)) break;
	free(string2); /* string2 has been allocated in tokenize() */
    
	string2 = (char *) space(strlen(line)+1);
	(void) sscanf(line,"%s",string2); free(line);line=NULL;
      }
    } else { /* default mode pr_unpaired for ONE seq */
      /* if a second sequence is give, cofold the sequences*/
      if (cut_point != -1){
	upmode = 2;	
      }
    }

    if (string1 != NULL){length1 = (int) strlen(string1);}
    else {nrerror("sequence is NULL, check your input.");}
    if (upmode > 1) {
      if (string2 != NULL) {length2 = (int) strlen(string2);}
      else{nrerror("one of the sequences is NULL, check your input.");}

      /* write longer seq in string1 and and shorter one in string2 */ 
      if (length1 < length2 && Switch) {
	strncpy(temp_name,fname,30);
	strncpy(fname,ffname,30);
	strncpy(ffname,temp_name,30);
	  
	length=length1; length1=length2; length2=length;
	
	temp=(char *) space(sizeof(char)*strlen(string1)+1);
	(void) sscanf(string1,"%s",temp);
	string1 = (char *) xrealloc (string1,sizeof(char)*length1+1);
	(void) sscanf(string2,"%s",string1);
	string2 = (char *) xrealloc(string2,sizeof(char)*length2+1);
	(void) sscanf(temp,"%s",string2);
	if (task == 1) {
	  free(temp);
	  temp = NULL;
	}
      } 
    }
    /* parse cml parameters for output filename*/    
    /* create the name of the output file */
    if (fname[0]!='\0') {
      printf(">%s\n",fname);
      if(strlen(fname) < 30) {
	strcpy(up_out,fname);
      } else {
	strncpy(up_out,fname,30);
	up_out[30] = '\0';
      }
      
      if (upmode > 1 && ffname[0] != '\0') {
	 printf(">%s\n",ffname);
	if(strlen(fname) < 15) {
	  strcpy(up_out,fname);
	} else {
	  strncpy(up_out,fname,15);
	  up_out[15] = '\0';
	}
	strcat(up_out, "_");
	if(strlen(ffname) < 15) {
	  strcat(up_out,ffname);
	} else {
	  strncat(up_out,ffname,15);
	}
      }	
    } else {
      strcpy(up_out, "RNA");
    }
    if (upmode >1) {
      sprintf(temp_name,"_w%d",w);
      strncat(up_out, temp_name,10);
    }    
    /* do this only when -X[p|f] is used or if two sequences seperated by & are given */
    if (upmode > 1) {
      if (task == 3) {
	/* strncpy(temp_name,fname,30); */
	if(strlen(fname) < 30) {
	  strcpy(temp_name,fname);
	} else {
	  strncpy(temp_name,fname,30);
	  up_out[30] = '\0';
	}
      }
    }
    
    /* get values for -u */
    if ( ! get_u_values(unstrs,&u_vals,length1)) {
      nrerror("option -u: length value exceeds sequence length\n");
    }
      
    
    for (l = 0; l < length1; l++) {
      string1[l] = toupper(string1[l]);
      if (!noconv && string1[l] == 'T') string1[l] = 'U';
    }
    for (l = 0; l < length2; l++) {
      string2[l] = toupper(string2[l]);
      if (!noconv && string2[l] == 'T') string2[l] = 'U';
    }
    
    if (fold_constrained) {
      char *temp_cstruc=NULL;
      int old_cut;
      temp_cstruc = get_line(stdin);
      old_cut = cut_point;
      cut_point=-1;
      /* get contrained string without & */
      cstruc = tokenize_one(temp_cstruc);
      /* free(temp_cstruc); */
      /* only one seq, cstruc should not have an & */
      if (upmode == 1 && cut_point == -1) {
	if (strlen(cstruc) == length1) {
	  cstruc_l=(char*)space(sizeof(char)*(length1+1));
	  strncpy(cstruc_l,cstruc,length1);
	}else{
	  fprintf(stderr, "%s\n%s\n",string1,cstruc);
	  nrerror("RNAup -C: constrain string and structure have unequal length");
	}
      }	else if (upmode == 1 && cut_point != -1) {
	fprintf(stderr, "%s\n%s\n",string1,cstruc);
	nrerror("RNAup -C: only one sequence but constrain structure for cofolding");
      }
      /* constrain string is for both seqs */
      else if (upmode > 1 && cut_point != -1) {
	if (old_cut != cut_point) {
	  nrerror("RNAup -C: different cut points in sequence und constrain string");
	}
	seperate_bp(&cstruc,length1,&cstruc_l,&cstruc_s);
	if (strlen(cstruc) != (length1+length2)) {
	  fprintf(stderr, "%s&%s\n%s\n",string1,string2,cstruc);
	  nrerror("RNAup -C: constrain string and structure have unequal length");
	}
	if (strlen(cstruc_l) != (length1)) {
	  fprintf(stderr, "%s\n%s\n",string1,cstruc_l);
	  nrerror("RNAup -C: constrain string and structure have unequal length");
	}
	if (strlen(cstruc_s) != (length2)) {
	  fprintf(stderr, "%s\n%s\n",string2,cstruc_s);
	  nrerror("RNAup -C: constrain string and structure have unequal length");
	} 
      } else {
	fprintf(stderr, "%s&%s\n%s\n",string1,string2,cstruc);
	nrerror("RNAup -C: no cutpoint in constrain string");
      }      
    }
    if(length1 > length2) {
      structure = (char *) space(sizeof(char)*(length1+1));
    } else {
      structure = (char *) space(sizeof(char)*(length2+1));
    }
    update_fold_params();
    if (cstruc_s != NULL)
      strncpy(structure, cstruc_s, length2+1);
    min_en = fold(string1, structure);    
    (void) fflush(stdout);

    if (upmode != 0){
      int wplus,w_sh;
      if (upmode == 3) { /* calculate prob. unstruct. for shorter seq */  
	w_sh = w;
	/* len of unstructured region has to be <= len shorter seq. */
	if (w > length2) w_sh = length2;
	if (cstruc_s != NULL)
	  strncpy(structure, cstruc_s, length2+1);
	min_en = fold(string2, structure);	  
	pf_scale = exp(-(sfact*min_en)/RT/length2);
	if (length2>2000) fprintf(stderr, "scaling factor %f\n", pf_scale);
	init_pf_fold(length2);
	if (cstruc_s != NULL)
	  strncpy(structure, cstruc_s, length2+1);
	energy = pf_fold(string2, structure);
	unstr_short = pf_unstru(string2, w_sh);
	free_pf_arrays(); /* for arrays for pf_fold(...) */
      }
      
      /* calculate prob. unstructured for longer seq */
      wplus=w+incr3+incr5;
      /* calculate prob. unpaired for the maximal length of -u */
      if (u_vals[u_vals[0]] > wplus) wplus=u_vals[u_vals[0]];
      /* length of the unstructured region has to be <= len longer seq. */
      if (wplus > length1) wplus=length1;
      if (cstruc_l !=NULL)
	strncpy(structure, cstruc_l, length1+1);
      min_en = fold(string1, structure);
      pf_scale = exp(-(sfact*min_en)/RT/length1);
      if (length1>2000) fprintf(stderr, "scaling factor %f\n", pf_scale);
      init_pf_fold(length1);
      if (cstruc_l !=NULL)
	strncpy(structure, cstruc_l, length1+1);
      energy = pf_fold(string1, structure);
      if (upmode > 1) {
	unstr_out = pf_unstru(string1, wplus);
      } else {
	unstr_out = pf_unstru(string1, u_vals[u_vals[0]]);
      }
      free_pf_arrays(); /* for arrays for pf_fold(...) */
      /* now make output to stdout and to the output file */
      if (upmode > 1){/* calculate interaction between two sequences */
	int count;
	if (upmode == 2) {
	  inter_out = pf_interact(string1,string2,unstr_out,NULL,w,cstruc,incr3,incr5);
	  print_interaction(inter_out,string1,string2,unstr_out,NULL,w,incr3,incr5);
	} else if (upmode == 3){
	  inter_out = pf_interact(string1,string2,unstr_out,unstr_short,w,cstruc,incr3,incr5);
	  print_interaction(inter_out,string1,string2,unstr_out,unstr_short,w,incr3,incr5);
	}
	if(output) { /* make RNAup output to file */
	  printf("RNAup output in file: ");
	  /* plot for all -u values */
	  strcpy(name,up_out);
	  strcat(name, "_u");
	  if(u_vals[0] <= 20) {
	    for (count = 1; count <= u_vals[0]; count++) {
	      unstr = u_vals[count];
	      sprintf(temp_name,"%d",unstr);
	      if (count < u_vals[0]) {
		strcat(temp_name,"_");
		strncat(name, temp_name,5);
	      } else {
		strncat(name, temp_name,5);
		strcat(name, "_up.out");
		printf("%s\n",name);
	      }
	    }
	  } else {
	    sprintf(temp_name,"%d",u_vals[1]);
	    strcat(temp_name,"_to_");
	    strncat(name, temp_name,5);
	    sprintf(temp_name,"%d",u_vals[0]);
	    strncat(name, temp_name,5);
	    strcat(name, ".out");
	    printf("%s\n",name);
	  }
	  
	  if(header) {
	    char startl[3];
	    sprintf(startl,"# ");
	  
	    head = (char*)space(sizeof(char)*(length1+length2+1000));
	    /* mach kein \n als ende von head */
	    sprintf(head,"%s %s\n%s %d %s\n%s %s\n%s %d %s\n%s %s",startl, cmd_line, startl,length1,fname, startl,string1, startl,length2,ffname, startl,string2);
	  
	  } else {
	    if(head != NULL) { nrerror("error with header\n"); }
	  }
	  Up_plot(unstr_out,NULL,inter_out,name,u_vals,my_contrib,head);
	
	  if(head != NULL) {
	    free(head);
	    head = NULL;
	  }
	
	  if (upmode == 3 ) {/* plot opening energy for boths RNAs */
	    if(head != NULL) { nrerror("error with header\n"); }
	    Up_plot(NULL,unstr_short,NULL,name,u_vals,my_contrib,head);
	  }
	}
      } else { /* one sequence:  plot only results for prob unstructured */
	int count;
	char collect_out[1000];
	collect_out[0]='\0';
	
	for (count = 1; count <= u_vals[0]; count++) {
	  unstr = u_vals[count];
	  print_unstru(unstr_out,unstr);
	}
	if(output) {/* make RNAup output to file */
	  printf("RNAup output in file: ");
	  strcpy(name,up_out);
	  strcat(name, "_u");
	  if(u_vals[0] <= 20) {
	    for (count = 1; count <= u_vals[0]; count++) {
	      unstr = u_vals[count];
	      sprintf(temp_name,"%d",unstr);
	      if (count < u_vals[0]) {
		strcat(temp_name,"_");
		strncat(name, temp_name,5);
	      } else {
		strncat(name, temp_name,5);
		strcat(name, ".out");
		printf("%s\n",name);
	      }
	    }
	  } else {
	    sprintf(temp_name,"%d",u_vals[1]);
	    strcat(temp_name,"_to_");
	    strncat(name, temp_name,5);
	    sprintf(temp_name,"%d",u_vals[0]);
	    strncat(name, temp_name,5);
	    strcat(name, ".out");
	    printf("%s\n",name);
	  }
	  
	  if(header) {
	    char startl[3];
	    sprintf(startl,"# ");
	    head = (char*)space(sizeof(char)*(length1+length2+1000));
	    /* mach kein \n als ende von head */
	    sprintf(head,"%s %s\n%s %d %s\n%s %s",startl, cmd_line, startl,length1,fname, startl,string1);
	  } else { if(head != NULL) { nrerror("error with header\n"); }}
	
	  Up_plot(unstr_out,NULL,NULL,name,u_vals,my_contrib,head);
	
	  if(head != NULL) { free(head); head = NULL;}
	}
      }	
    } else {
      nrerror("no output format given\n");
    }
    
    
    if(structure != NULL) free(structure);
    structure = NULL;
    if (title != NULL) free(title);
    title=NULL;
    if (u_vals != NULL) free(u_vals);
    u_vals=NULL;
    if (upmode == 1) free_pu_contrib(unstr_out);
    if (upmode > 1) {
      free_pu_contrib(unstr_out);
      free_interact(inter_out);
    }
    if (upmode == 3)free_pu_contrib(unstr_short);
    free_arrays(); /* for arrays for fold(...) */   
    if (cstruc!=NULL) free(cstruc);
    cstruc=NULL;
    if (cstruc_l!=NULL) free(cstruc_l);
    cstruc_l=NULL;
    if (cstruc_s!=NULL) free(cstruc_s);
    cstruc_s=NULL;
    (void) fflush(stdout);
    if (string1!=NULL && task != 3) {
      free(string1);
      string1 = NULL;
    }
    if (string2!=NULL) free(string2);
    string2 = NULL;
    
  } while (1);
  if (line != NULL) free(line);
  if (string1!=NULL) free(string1);
  if (string2!=NULL) free(string2);
  if (cstruc!=NULL) free(cstruc);
  if (cstruc_l!=NULL) free(cstruc_l);
  if (cstruc_s!=NULL) free(cstruc_s);  
  
  return 0;
}
Ejemplo n.º 12
0
/*--------------------------------------------------------------------------*/
int main(int argc, char *argv[])
{
  char *string1=NULL, *string2=NULL, *temp, *line;
  char *structure=NULL, *cstruc=NULL;
  char  fname[53], my_contrib[10], *up_out;
  char  *ParamFile=NULL;
  char  *ns_bases=NULL, *c;
  int   i, length1,length2,length, l, sym, r;
  double energy, min_en;
  double kT, sfact=1.07;
  int   pf, istty;
  int noconv=0;
  double Zu, Zup;
  /* variables for output */
  pu_contrib *unstr_out, *unstr_short;
  FLT_OR_DBL **inter_out;
  char *title;
  /* commandline parameters */
  int w;       /* length of region of interaction */
  int incr3;   /* add x unpaired bases after 3'end of short RNA*/
  int incr5;   /* add x unpaired bases after 5'end of short RNA*/
  int  unstr;  /* length of unpaired region for output*/
  int  upmode; /* output mode for pf_unpaired and pf_up()*/
  upmode = 0;
  unstr = 4; 
  incr3=0;
  incr5=0;
  w=25;
  do_backtrack = 1;
  pf=1; /* partition function has to be calculated */
  length1=length2=0;
  up_out=NULL;
  title=NULL;
  unstr_out=NULL;
  inter_out=NULL;
  my_contrib[0] = 'S';
  my_contrib[1] = '\0';
  
  for (i=1; i<argc; i++) {
    if (argv[i][0]=='-') 
      switch ( argv[i][1] )
	{
	case 'T':  if (argv[i][2]!='\0') usage();
	  if(i==argc-1) usage();
	  r=sscanf(argv[++i], "%lf", &temperature);
	  if (!r) usage();
	  break;
	case 'w':
	  /* -w maximal length of unstructured region */  
	  if(i==argc-1) usage();
	  r=sscanf(argv[++i],"%d", &w);
	  if (!r) usage();
	  break;
	case 'n':
	  if ( strcmp(argv[i], "-noGU")==0) noGU=1;
	  if ( strcmp(argv[i], "-noCloseGU")==0) no_closingGU=1;
	  if ( strcmp(argv[i], "-noLP")==0) noLonelyPairs=1;
	  if ( strcmp(argv[i], "-nsp") ==0) {
	    if (i==argc-1) usage();
	    ns_bases = argv[++i];
	  }
	  if ( strcmp(argv[i], "-noconv")==0) noconv=1;
	  break;
	case '4':
	  tetra_loop=0;
	  break;
	case 'e':
	  if(i==argc-1) usage();
	  r=sscanf(argv[++i],"%d", &energy_set);
	  if (!r) usage();
	  break;
	case 'C':
	  fold_constrained=1;
	  break;
	case 'S':
	  if(i==argc-1) usage();
	  r=sscanf(argv[++i],"%lf", &sfact);
	  if (!r) usage();
	  break;
	case 'd': dangles=0;
	  if (argv[i][2]!='\0') {
	    r=sscanf(argv[i]+2, "%d", &dangles);
	    if (r!=1) usage();
	  }
	  break;
	case 'o': upmode=1;
	  /* output mode 0: non, 1:only pr_unpaired, 2: pr_unpaired + pr_up */
	  if (argv[i][2]!='\0') {
	    r=sscanf(argv[i]+2, "%d", &upmode);
	    if (r!=1) usage();
	  }
	  break;
	case 'u':
	  /* -u length of unstructured region in pr_unpaired output
	     makes only sense in combination with -o1 or -o2 */  
	  if(i==argc-1) usage();
	  r=sscanf(argv[++i],"%d", &unstr);
	  if (!r) usage();
	  break;
	  /* incr5 and incr3 are only for the longer (target) sequence */
	  /* increments w (length of the unpaired region) to incr5+w+incr3*/
	  /* the longer sequence is given in 5'(= position 1) to */
	  /* 3' (=position n) direction */
	  /* incr5 adds incr5 residues to the 5' end of w */
	case '5':
	  if(i==argc-1) usage();
	  r=sscanf(argv[++i],"%d", &incr5);
	  if (!r) usage();
	  break; 
	  /* incr3 adds incr3 residues to the 3' end of w */
	case '3':
	  if(i==argc-1) usage();
	  r=sscanf(argv[++i],"%d", &incr3);
	  if (!r) usage();
	  break;
	case 'P':
	  if (i==argc-1) usage();
	  ParamFile = argv[++i];
	  break;
	case 'x':  
	  if(i==argc-1) usage();
	  r=sscanf(argv[++i], "%s", my_contrib);
	  if (!r) usage();
	  break;  
	default: usage();
	} 
  }
  
  if (ParamFile != NULL)
    read_parameter_file(ParamFile);
   
  if (ns_bases != NULL) {
    nonstandards = space(33);
    c=ns_bases;
    i=sym=0;
    if (*c=='-') {
      sym=1; c++;
    }
    while (*c!='\0') {
      if (*c!=',') {
	nonstandards[i++]=*c++;
	nonstandards[i++]=*c;
	if ((sym)&&(*c!=*(c-1))) {
	  nonstandards[i++]=*c;
	  nonstandards[i++]=*(c-1);
	}
      }
      c++;
    }
  }
  istty = isatty(fileno(stdout))&&isatty(fileno(stdin));
  if ((fold_constrained)&&(istty)) {
    printf("Input constraints using the following notation:\n");
    printf("| : paired with another base\n");
    printf(". : no constraint at all\n");
    printf("x : base must not pair\n");
    printf("< : base i is paired with a base j<i\n");
    printf("> : base i is paired with a base j>i\n");
    printf("matching brackets ( ): base i pairs base j\n");
  } 
	
  do {				/* main loop: continue until end of file */
    cut_point=-1;
    if (istty) {
      printf("\nInput string (upper or lower case); @ to quit\n");
      printf("Use '&' to connect 2 sequences that shall form a complex.\n");
      printf("%s%s\n", scale1, scale2);
    }
    fname[0]='\0';
   
    if ((line = get_line(stdin))==NULL) break;

    /* skip comment lines and get filenames */
    while ((*line=='*')||(*line=='\0')||(*line=='>')) {
      if (*line=='>')
	
	(void) sscanf(line, ">%51s", fname);
      free(line);
      if ((line = get_line(stdin))==NULL) break;
    } 
    if ((line == NULL) || (strcmp(line, "@") == 0)) break;

    tokenize(line,&string1,&string2);
    
    if(upmode != 0){
      if(cut_point == -1 && upmode == 2) {
	  nrerror("only one sequence - can not cofold one sequence!");
      }
    } else {
      if(cut_point == -1){
	upmode=1;
      } else {
	upmode=2;
      }
    }
    
    if(string1 != NULL)
      length1 = (int) strlen(string1);
    if(string2 != NULL) 
      length2 = (int) strlen(string2);
    else
      length2=0;    

    /* write longer seq in string1 and and shorter one in string2 */ 
    if(length1 < length2)
      {
	length=length1; length1=length2; length2=length;
	
	temp=(char *) space(strlen(string1)+1);
	(void) sscanf(string1,"%s",temp);
	string1 = (char *) xrealloc (string1,sizeof(char)*length1+1);
	(void) sscanf(string2,"%s",string1);
	string2 = (char *) xrealloc(string2,sizeof(char)*length2+1);
	(void) sscanf(temp,"%s",string2);
	free(temp);
      }
   
    structure = (char *) space((unsigned) length1+1);
    if (fold_constrained) {
      cstruc = get_line(stdin);
      if (cstruc!=NULL) 
	strncpy(structure, cstruc, length1);
      else
	fprintf(stderr, "constraints missing\n");
    }
    for (l = 0; l < length1; l++) {
      string1[l] = toupper(string1[l]);
      if (!noconv && string1[l] == 'T') string1[l] = 'U';
    }
    for (l = 0; l < length2; l++) {
      string2[l] = toupper(string2[l]);
      if (!noconv && string2[l] == 'T') string2[l] = 'U';
    }

    if (istty)
      printf("length1 = %d\n", length1);
    
    /* initialize_fold(length); */
    update_fold_params();
    printf("\n%s", string1);
    min_en = fold(string1, structure);
    
    if (istty)
      {
	printf("\n minimum free energy = %6.2f kcal/mol\n", min_en);
      }
    else
      printf(" (%6.2f)\n", min_en);
    
    (void) fflush(stdout);
    
    /* parse cml parameters for the filename*/
    if(upmode > 0) {
      char wuadd[10];
      up_out = (char*) space(sizeof(char)*53);
      /* create the name of the output file */
      if(fname[0]!='\0' && up_out[0] =='\0' ){
	if(strlen(fname)< 30){
	  strcpy(up_out, fname);
	} else {  
	  strncpy(up_out, fname,30);
	}
      }
      else if(fname[0]=='\0' && up_out[0] == '\0'){
	char defaultn[10] = "RNA";
	sprintf(up_out,"%s",defaultn);
      }
	
      sprintf(wuadd,"%d",w);
      strcat(up_out, "_w");
      strcat(up_out, wuadd);
      strcat(up_out, "u");
      sprintf(wuadd,"%d",unstr);
      strcat(up_out, wuadd);
      strcat(up_out, "_up.out");
      printf("RNAup output in file: %s\n",up_out);
	    
      /* create the title for the output file */      
      if (title == NULL) {
	char wuadd[10];
	title = (char*) space(sizeof(char)*60);
	if(fname[0]!='\0'){
	  if(strlen(fname)< 30){
	    strcpy(title, fname);
	  } else {  
	    strncpy(title, fname,30);
	  }
	}
	else if (fname[0]=='\0'){
	  char defaultn[10]= "RNAup";
	  sprintf(title,"%s",defaultn);
	}
	sprintf(wuadd,"%d",unstr);
	strcat(title," u=");
	strcat(title, wuadd);
	sprintf(wuadd,"%d",w);
	strcat(title," w=");
	strcat(title, wuadd);
	sprintf(wuadd,"%d",length1);
	strcat(title," n=");
	strcat(title, wuadd);
      }
    } else {
      nrerror("no output format given: use [-o[1|2]] to select output format");
    }
    
    
    if (pf) {
      
      if (dangles==1) {
	dangles=2;   /* recompute with dangles as in pf_fold() */
	min_en = energy_of_struct(string1, structure);
	dangles=1;
      }
	 
      kT = (temperature+273.15)*1.98717/1000.; /* in Kcal */
      
      if(upmode != 0){
	int wplus;
	wplus=w+incr3+incr5;
	/* calculate prob. unstructured for the shorter seq */
	if(upmode == 3) {
	  min_en = fold(string2, structure);
	  pf_scale = exp(-(sfact*min_en)/kT/length2);
	  if (length2>2000) fprintf(stderr, "scaling factor %f\n", pf_scale);
	  init_pf_fold(length2);
	  if (cstruc!=NULL)
	    strncpy(structure, cstruc, length2+1);
	  energy = pf_fold(string2, structure);
	  if(wplus > length2){ wplus = length2;} /* for the shorter seq */
	  unstr_short = pf_unstru(string2, structure, wplus);
	  free_pf_unstru();
	  free_pf_arrays(); /* for arrays for pf_fold(...) */
	}

	/* calculate prob. unstructured for the longer seq */
	wplus=w+incr3+incr5; 
	min_en = fold(string1, structure);
	pf_scale = exp(-(sfact*min_en)/kT/length1);
	if (length1>2000) fprintf(stderr, "scaling factor %f\n", pf_scale);
	init_pf_fold(length1);
	if (cstruc!=NULL)
	  strncpy(structure, cstruc, length1+1);
	energy = pf_fold(string1, structure);
	unstr_out = pf_unstru(string1, structure, wplus);
	free_pf_unstru();
	free_pf_arrays(); /* for arrays for pf_fold(...) */
	/* calculate the interaction between the two sequences */
	if(upmode > 1 && cut_point > -1){
	  inter_out = pf_interact(string1,string2,unstr_out,w, incr3, incr5);
	  if(Up_plot(unstr_out,inter_out,length1,up_out,unstr,my_contrib)==0){
	    nrerror("Up_plot: no output values assigned");
	  }
	} else if(cut_point == -1 && upmode > 1) { /* no second seq given */
	  nrerror("only one sequence given - cannot cofold one sequence!");
	} else { /* plot only the results for prob unstructured */
	  if(Up_plot(unstr_out,NULL,length1,up_out,unstr,my_contrib)==0){
	    nrerror("Up_plot: no output values assigned");
	  }
	}	
      } else {
	nrerror("no output format given: use [-o[1|2]] to select output format");
      }
       
      if (do_backtrack) {
	printf("%s", structure);
	if (!istty) printf(" [%6.2f]\n", energy);
	else printf("\n");
      }
      if ((istty)||(!do_backtrack)) 
	printf(" free energy of ensemble = %6.2f kcal/mol\n", energy);
      energy = pf_fold(string1, structure);
      printf(" frequency of mfe structure in ensemble %g; "
	     "ensemble diversity %-6.2f\n", exp((energy-min_en)/kT),
	     mean_bp_dist(length1));
      free_pf_arrays();
    }
    if (cstruc!=NULL) free(cstruc);
    (void) fflush(stdout);
    if (string1!=NULL) free(string1);
    if (string2!=NULL) free(string2);
    free(structure);
    if(up_out != NULL) free(up_out);
    up_out=NULL;
    if(title != NULL) free(title);
    title=NULL;
    if(upmode == 1) free_pf_two(unstr_out,NULL);
    if(upmode > 1) free_pf_two(unstr_out,inter_out);
    if(upmode == 3)free_pf_two(unstr_short,NULL);
    free_arrays(); /* for arrays for fold(...) */
    
  } while (1);
  return 0;
}
Ejemplo n.º 13
0
void main()
{
   char *seq1="CGCAGGGAUACCCGCG", *seq2="GCGCCCAUAGGGACGC",
        *struct1,* struct2,* xstruc;
   float e1, e2, tree_dist, string_dist, profile_dist, kT;
   Tree *T1, *T2;
   swString *S1, *S2;
   float *pf1, *pf2;
   FLT_OR_DBL *bppm;
   /* fold at 30C instead of the default 37C */
   temperature = 30.;      /* must be set *before* initializing  */

   /* allocate memory for structure and fold */
   struct1 = (char* ) space(sizeof(char)*(strlen(seq1)+1));
   e1 =  fold(seq1, struct1);

   struct2 = (char* ) space(sizeof(char)*(strlen(seq2)+1));
   e2 =  fold(seq2, struct2);

   free_arrays();     /* free arrays used in fold() */

   /* produce tree and string representations for comparison */
   xstruc = expand_Full(struct1);
   T1 = make_tree(xstruc);
   S1 = Make_swString(xstruc);
   free(xstruc);

   xstruc = expand_Full(struct2);
   T2 = make_tree(xstruc);
   S2 = Make_swString(xstruc);
   free(xstruc);

   /* calculate tree edit distance and aligned structures with gaps */
   edit_backtrack = 1;
   tree_dist = tree_edit_distance(T1, T2);
   free_tree(T1); free_tree(T2);
   unexpand_aligned_F(aligned_line);
   printf("%s\n%s  %3.2f\n", aligned_line[0], aligned_line[1], tree_dist);

   /* same thing using string edit (alignment) distance */
   string_dist = string_edit_distance(S1, S2);
   free(S1); free(S2);
   printf("%s  mfe=%5.2f\n%s  mfe=%5.2f  dist=%3.2f\n",
          aligned_line[0], e1, aligned_line[1], e2, string_dist);

   /* for longer sequences one should also set a scaling factor for
      partition function folding, e.g: */
   kT = (temperature+273.15)*1.98717/1000.;  /* kT in kcal/mol */
   pf_scale = exp(-e1/kT/strlen(seq1));

   /* calculate partition function and base pair probabilities */
   e1 = pf_fold(seq1, struct1);
   /* get the base pair probability matrix for the previous run of pf_fold() */
   bppm = export_bppm();
   pf1 = Make_bp_profile_bppm(bppm, strlen(seq1));

   e2 = pf_fold(seq2, struct2);
   /* get the base pair probability matrix for the previous run of pf_fold() */
   bppm = export_bppm();
   pf2 = Make_bp_profile_bppm(bppm, strlen(seq2));

   free_pf_arrays();  /* free space allocated for pf_fold() */

   profile_dist = profile_edit_distance(pf1, pf2);
   printf("%s  free energy=%5.2f\n%s  free energy=%5.2f  dist=%3.2f\n",
          aligned_line[0], e1, aligned_line[1], e2, profile_dist);

   free_profile(pf1); free_profile(pf2);
}