Example #1
0
/* Testing (conditional) folding; use with eval.pl */
void test_folding(char* seq, int length) {

    int i,j;
    char* constraints;

    fold_constrained = 1;
    constraints = (char *) space((unsigned) length+1);

    memset(constraints,'.',length);
    constraints[0]='x';

    //constraints=NULL;

    init_pf_fold(length);

    pf_fold_pb(seq, constraints);

    for (i = 1; i < length; i++) {
        for (j = i+1; j<= length; j++) {
            p_pp[i][j]=p_pp[j][i]=pr[iindx[i]-j];
            if (p_pp[i][j]>1e-50) {
                printf("%i %i %.13e \n", i, j, p_pp[i][j]);
            }
        }
    }

    free_arrays();
}
Example #2
0
RNAProfileAlignment::RNAProfileAlignment(const string &baseStr, const string &name, const string &constraint, double t)
  : PPForestAli<RNA_Alphabet_Profile,RNA_Alphabet_Profile>(2*baseStr.length()),
    m_name(name),
    m_numStructures(1)
{
  char *viennaStr=NULL;
  
  // calculate partition function for the sequence
  do_backtrack=1;
  init_pf_fold(baseStr.length());

  //if(constraint.length()>0)
  //pf_fold((char*)baseStr.c_str(),(char*)constraint.c_str());    // expicit conversion to non-const value, but pf_fold does not alter baseStr
  //else
  pf_fold((char*)baseStr.c_str(),NULL);    // expicit conversion to non-const value, but pf_fold does not alter baseStr

  viennaStr=new char[baseStr.length()+1];
  dangles=2;
  fold((char*)baseStr.c_str(),viennaStr);

  setSize(RNAFuncs::treeSize(viennaStr));
  buildForest(baseStr,viennaStr,true);
  
  free_pf_arrays();
  delete[] viennaStr;
  
  //  hasSequence=true;
  addStrName(name);
}
Example #3
0
/* Objective function */
double calculate_f(const gsl_vector *v, void *params) {

    double D;
    int i,j,length;
    minimizer_pars_struct *pars = (minimizer_pars_struct *)params;
    double q_tmp;
    double sigma_tmp;

    //fprintf(stderr, "=> Evaluating objective Function...\n");

    length = pars->length;

    for (i=0; i <= length; i++) {
        epsilon[i] = gsl_vector_get(v, i);
        p_unpaired[i] = 0.0;
        for (j=0; j <= length; j++) {
            p_pp[i][j] = 0.0;
        }
    }

    init_pf_fold(length);
    last_lnQ = pf_fold_pb(pars->seq, NULL);

    if (isnan(last_lnQ)) {
        return(NAN);
    }

    for (i = 1; i < length; i++) {
        for (j = i+1; j<= length; j++) {
            p_pp[i][j]=p_pp[j][i]=pr[iindx[i]-j];
        }
    }

    get_pair_prob_vector(p_pp, p_unpaired, length, 1);

    free_pf_arrays();

    D = 0.0;

    for (i = 1; i <= length; i++) {
        D += 1 / pars->tau * epsilon[i] * epsilon[i];

        /* Ignore missing data. These have values of -1.0. To be on the
           safe side numerically test for < -0.5 */
        if (q_unpaired[i] < -0.5) {
            sigma_tmp = 10000; // Set very high sigma to ignore these positions
            q_tmp = 0.5; // Set to arbitrary value, does not matter
        } else {
            sigma_tmp = pars->sigma;
            q_tmp = q_unpaired[i];
        }

        D += 1 / sigma_tmp *
             ( p_unpaired[i] - q_tmp ) *
             ( p_unpaired[i] - q_tmp );
    }

    return D;
}
Example #4
0
PRIVATE void heat_capacity(char *string, float T_min, float T_max,
                          float h, int m)
{
   int length, i;
   char *structure;
   float hc, kT, min_en;
   
   length = (int) strlen(string);
   
   do_backtrack = 0;   

   temperature = T_min -m*h;
   initialize_fold(length);
   structure = (char *) space((unsigned) length+1);
   min_en = fold(string, structure);
   free(structure); free_arrays();
   kT = (temperature+K0)*GASCONST/1000;    /* in kcal */
   pf_scale = exp(-(1.07*min_en)/kT/length );
   init_pf_fold(length);
   
   for (i=0; i<2*m+1; i++) {
      F[i] = pf_fold(string, NULL);   /* T_min -2h */
      temperature += h;
      kT = (temperature+K0)*GASCONST/1000;
      pf_scale=exp(-(F[i]/length +h*0.00727)/kT); /* try to extrapolate F */
      update_pf_params(length); 
   }
   while (temperature <= (T_max+m*h+h)) {
      
      hc = - ddiff(F,h,m)* (temperature +K0 - m*h -h); 
      printf("%g   %g\n", (temperature-m*h-h), hc);  
      
      for (i=0; i<2*m; i++)
         F[i] = F[i+1];
      F[2*m] = pf_fold(string, NULL); 
      temperature += h;
      kT = (temperature+K0)*GASCONST/1000;
      pf_scale=exp(-(F[i]/length +h*0.00727)/kT);
      update_pf_params(length); 
   }
   free_pf_arrays();
}
Example #5
0
void test_stochastic_backtracking(char* seq, int length) {

    int i, j, N;

    init_pf_fold(length);

    pf_fold_pb(seq, NULL);

    for (i = 1; i < length; i++) {
        for (j = i+1; j<= length; j++) {
            p_pp[i][j]=p_pp[j][i]=pr[iindx[i]-j];
        }
    }

    get_pair_prob_vector(p_pp, p_unpaired, length, 1);

    for (i=1; i <= length; i++) {
        q_unpaired[i] = 0.0;
    }

    N=10000;

    for (i=1; i<=N; i++) {
        char *s;
        s = pbacktrack_pb(seq);
        for (j=1; j <= length; j++) {
            if (s[j-1]=='.') {
                q_unpaired[j]+=1.0/N;
            }
        }
        free(s);
    }

    for (i=1; i <= length; i++) {
        printf("%.4f\t%.4f\t%.4f\n",  p_unpaired[i], q_unpaired[i], q_unpaired[i]-p_unpaired[i]);
    }

    free_arrays();

}
Example #6
0
float
PFWrapper::
fold(std::string& structure)
{
#if !defined(HAVE_MPI) && defined(HAVE_BOOST_THREAD)
  static boost::mutex mtx;
  boost::mutex::scoped_lock lock(mtx);
#endif
  ::noGU = noGU_ ? 1 : 0;
  ::no_closingGU = noCloseGU_ ? 1 : 0;
  ::noLonelyPairs = noLP_ ? 1 : 0;
  float ret=0.0;
  init_pf_fold(sz_-1);
  char *s = new char[sz_];
  ret=::pf_fold(const_cast<char*>(seq_.c_str()), s);
  structure=s;
  delete[] s;
  std::copy(pr, pr+sz_*(sz_+1)/2, pr_.begin());
  std::copy(iindx, iindx+sz_, iindx_.begin());
  free_pf_arrays();
  return ret;
}
Example #7
0
int main(int argc, char *argv[])
{
    char *start, *structure, *rstart, *str2, *line;
    char  *ParamFile=NULL;
    int   i,j, length, l, hd;
    double energy=0., kT;
    int   pf, mfe, istty;
    int   repeat, found;

    do_backtrack = 0;
    pf = 0;
    mfe = 1;
    repeat = 0;
    init_rand();
    for (i=1; i<argc; i++) {
        if (argv[i][0]=='-')
            switch ( argv[i][1] )
            {
            case 'a':
                symbolset = argv[++i];
                /* symbolset should only have uppercase characters */
                for (l = 0; l < (int)strlen(symbolset); l++)
                    symbolset[l] = toupper(symbolset[l]);
                break;
            case 'T':
                if (argv[i][2]!='\0') usage();
                if (sscanf(argv[++i], "%lf", &temperature)==0)
                    usage();
                break;
            case 'F':
                mfe = 0;
                pf = 0;
                for(j=2; j<(int)strlen(argv[i]); j++) {
                    switch( argv[i][j] ) {
                    case 'm' :
                        mfe = 1;
                        break;
                    case 'p' :
                        pf = 1; /* old version had dangles=0 here */
                        break;
                    default :
                        usage();
                    }
                }
                break;
            case 'R':
                repeat = REPEAT_DEFAULT;
                if(++i<argc)
                    if (sscanf(argv[i], "%d", &repeat)==0)
                        usage();
                break;
            case 'n':
                if (strcmp(argv[i], "-noGU" )==0) noGU=1;
                else if (strcmp(argv[i], "-noLP" )==0) noLonelyPairs=1;
                else usage();
                break;
            case '4':
                tetra_loop=0;
                break;
            case 'e':
                if (sscanf(argv[++i],"%d", &energy_set)==0)
                    usage();
                break;
            case 'd':
                dangles=0;
                if (argv[i][2]!='\0')
                    if (sscanf(argv[i]+2, "%d", &dangles)==0)
                        usage();
                break;
            case 'f': /* when to stop RNAfold -p */
                if (sscanf(argv[++i],"%f", &final_cost)==0)
                    usage();
                break;
            case 'P':
                if (++i<argc)
                    ParamFile = argv[i];
                else
                    usage();
                break;
            case 'v':
                inv_verbose = 1;
                break;
            default:
                usage();
            }
    }

    kT = (temperature+273.15)*1.98717/1000.0;

    istty = (isatty(fileno(stdout))&&isatty(fileno(stdin)));

    if (ParamFile!=NULL)
        read_parameter_file(ParamFile);

    give_up = (repeat<0);

    do {
        if (istty) {
            printf("\nInput structure & start string"
                   " (lower case letters for const positions)\n"
                   "    @ to quit, and 0 for random start string\n");
            printf("%s\n", scale);
        }

        if ((line = get_line(stdin))==NULL) break;

        /* read structure, skipping over comment lines */
        while ((*line=='*')||(*line=='\0')||(*line=='>')) {
            printf("%s\n", line);
            free(line);
            if ((line = get_line(stdin))==NULL) break;
        }
        /* stop at eof or '@' */
        if (line==NULL) break;
        if (strcmp(line, "@") == 0) {
            free(line);
            break;
        }

        structure = (char *) space(strlen(line)+1);
        (void) sscanf(line,"%s",structure); /* scanf gets rid of trailing junk */
        free(line);

        length = (int) strlen(structure);
        str2 = (char *) space((unsigned)length+1);

        if ((line = get_line(stdin))!=NULL)
            if (strcmp(line, "@") == 0) {
                free(line);
                break;
            }

        start = (char *) space((unsigned) length+1);
        if (line !=NULL) {
            (void) strncpy(start, line, length);
            free(line);
        }

        if (istty) printf("length = %d\n", length);

        if (repeat!=0) found = (repeat>0)? repeat : (-repeat);
        else found = 1;

        initialize_fold(length);

        rstart = (char *) space((unsigned)length+1);
        while(found>0) {
            char *string;
            string = (char *) space((unsigned)length+1);
            strcpy(string, start);
            for (i=0; i<length; i++) {
                /* lower case characters are kept fixed, any other character
                   not in symbolset is replaced by a random character */
                if (islower(string[i])) continue;

                if (string[i]=='\0' || (strchr(symbolset,string[i])==NULL))
                    string[i]=symbolset[int_urn(0,strlen(symbolset)-1)];
            }
            strcpy(rstart, string); /* remember start string */

            if (mfe) {
                energy = inverse_fold(string, structure);
                if( (repeat>=0) || (energy<=0.0) ) {
                    found--;
                    hd = hamming(rstart, string);
                    printf("%s  %3d", string, hd);
                    if (energy>0) { /* no solution found */
                        printf("   d= %g\n", energy);
                        if(istty) {
                            energy = fold(string,str2);
                            printf("%s\n", str2);
                        }
                    } else printf("\n");
                }
            }
            if (pf) {
                if (!(mfe && give_up && (energy>0))) {
                    /* unless we gave up in the mfe part */
                    double prob, min_en, sfact=1.07;

                    /* get a reasonable pf_scale */
                    min_en = fold(string,str2);
                    pf_scale = exp(-(sfact*min_en)/kT/length);
                    init_pf_fold(length);

                    energy = inverse_pf_fold(string, structure);
                    prob = exp(-energy/kT);
                    hd = hamming(rstart, string);
                    printf("%s  %3d  (%g)\n", string, hd, prob);
                    free_pf_arrays();
                }
                if (!mfe) found--;
            }
            (void) fflush(stdout);
            free(string);
        }
        free(rstart);
        free_arrays();

        free(structure);
        free(str2);
        free(start);
        (void) fflush(stdout);
    } while (1);
    return 0;
}
Example #8
0
/* Calculate the gradient analytically */
void calculate_df (const gsl_vector *v, void *params, gsl_vector *df) {

    double D, sum;
    int ii,jj,i,j,mu, length, N;
    minimizer_pars_struct *pars = (minimizer_pars_struct *)params;
    char *constraints;

    int* unpaired_count;
    int** unpaired_count_cond;

    double q_tmp;
    double sigma_tmp;

    length = pars->length;

    count_df_evaluations++;

    fprintf(stderr, "=> Evaluating gradient (analytical, %s)...\n",sample_conditionals == 1 ? "sampled conditionals" : "exact conditionals");

    constraints = (char *) space((unsigned) length+1);
    for (i=0; i <= length; i++) {
        epsilon[i] = gsl_vector_get(v, i);
        p_unpaired[i] = 0.0;
        for (j=0; j <= length; j++) {
            p_pp[i][j] = p_pp[j][i] = 0.0;
            p_unpaired_cond[i][j] = 0.0;
            p_unpaired_cond_sampled[i][j] = 0.0;
        }
    }

    init_pf_fold(length);
    pf_fold_pb(pars->seq, NULL);

    for (i=1; i<length; i++) {
        for (j=i+1; j<=length; j++) {
            p_pp[i][j]=p_pp[j][i]=pr[iindx[i]-j];
        }
    }
    get_pair_prob_vector(p_pp, p_unpaired, length, 1);

    free_pf_arrays();


    if (!sample_conditionals) {
        // Calculate conditional probabilities

        fold_constrained=1;

        for (ii = 1; ii <= length; ii++) {

            // Set constraints strings like
            //   x............
            //   .x...........
            //   ..x..........

            memset(constraints,'.',length);
            constraints[ii-1]='x';

            fprintf(stderr, ".");

            init_pf_fold(length);
            pf_fold_pb(pars->seq, constraints);

            for (i=1; i<length; i++) {
                for (j=i+1; j<=length; j++) {
                    p_pp[i][j]=p_pp[j][i]=pr[iindx[i]-j];
                }
            }
            get_pair_prob_vector(p_pp, p_unpaired_cond[ii], length, 1);
            free_pf_arrays();
        }

        fprintf(stderr, "\n");

        fold_constrained = 0;

        // Sample gradient with stochastic backtracking
    } else {

        unpaired_count = (int *) space(sizeof(int)*(length+1));
        unpaired_count_cond = (int **)space(sizeof(int *)*(length+1));

        for (i=0; i <= length; i++) {
            unpaired_count[i] = 0;
            unpaired_count_cond[i] = (int *) space(sizeof(int)*(length+1));
            for (j=0; j <= length; j++) {
                unpaired_count_cond[i][j] = 0;
            }
        }

        fold_constrained = 0;
        init_pf_fold(length);

        pf_fold_pb(pars->seq, NULL);

        N=10000;

        for (i=1; i<=N; i++) {
            char *s;
            s = pbacktrack_pb(pars->seq);

            for (ii = 1; ii <= length; ii++) {
                if (s[ii-1]=='.') {
                    unpaired_count[ii]++;
                    for (jj = 1; jj <= length; jj++) {
                        if (s[jj-1]=='.') {
                            unpaired_count_cond[ii][jj]++;
                        }
                    }
                }
            }
            free(s);
        }

        for (i = 1; i <= length; i++) {
            for (ii = 1; ii <= length; ii++) {
                if (unpaired_count_cond[i][ii] > 0) {
                    p_unpaired_cond_sampled[i][ii] = (double)unpaired_count_cond[i][ii]/(double)unpaired_count[i];
                    p_unpaired_cond[i][ii] = (double)unpaired_count_cond[i][ii]/(double)unpaired_count[i];
                } else {
                    p_unpaired_cond_sampled[i][ii]= 0.0;
                    p_unpaired_cond[i][ii]= 0.0;
                }
            }
        }
    }

    for (mu=1; mu <= length; mu++) {
        sum = 0.0;
        for (i=1; i <= length; i++) {

            // Comments on handling missing data see the corresponding code in calcuate_f
            if (q_unpaired[i] < -0.5) {
                sigma_tmp = 10000;
                q_tmp = 0.5;
            } else {
                sigma_tmp = pars->sigma;
                q_tmp = q_unpaired[i];
            }
            sum += (1 / sigma_tmp) * p_unpaired[i] *
                   ( p_unpaired[i] - q_tmp ) *
                   ( p_unpaired[mu] - p_unpaired_cond[i][mu] );
        }
        gsl_vector_set(df, mu, (2 * epsilon[mu] /pars->tau ) + (2 /  pars->kT * sum));
    }
}
Example #9
0
int main(int argc, char *argv[]) {

    struct        RNAfold_args_info args_info;
    char          *string, *input_string, *structure=NULL, *cstruc=NULL;
    char          fname[80], ffname[80], gfname[80], *ParamFile=NULL;
    char          *ns_bases=NULL, *c;
    int           i, j, ii, jj, mu, length, l, sym, r, pf=0, noconv=0;
    unsigned int  input_type;
    double        energy, min_en, kT, sfact=1.07;
    int           doMEA=0, circular = 0, N;
    char *pf_struc;
    double dist;
    plist *pl;

    FILE * filehandle;
    FILE * statsfile;
    char* line;

    double tau   = 0.01; /* Variance of energy parameters */
    double sigma = 0.01; /* Variance of experimental constraints */
    double *gradient;           /* Gradient for steepest descent search
                                 epsilon[i+1]= epsilon[i] - gradient *
                                 step_size */
    double initial_step_size = 0.5;  /* Initial step size for steepest
                                      descent search */
    double step_size;
    double D;                  /* Discrepancy (i.e. value of objective
                                function) for the current
                                prediction */
    int iteration, max_iteration = 2000; /* Current and maximum number of
                                         iterations after which
                                         algorithm stops */

    double precision = 0.1; /* cutoff used for stop conditions */
    double tolerance = 0.1;   /* Parameter used by various GSL minimizers */
    int method_id = 1;        /* Method to use for minimization, 0 and 1
                               are custom steepest descent, the rest
                               are GSL implementations (see below)*/

    int initial_guess_method = 0;

    int sample_N = 1000;

    double *prev_epsilon;
    double *prev_gradient;
    double DD, prev_D, sum, norm;
    int status;
    double* gradient_numeric;
    double* gradient_numeric_gsl;

    /* Minimizer vars */
    const gsl_multimin_fdfminimizer_type *T;
    gsl_multimin_fdfminimizer *minimizer;
    gsl_vector *minimizer_x;
    gsl_vector *minimizer_g;
    gsl_multimin_function_fdf minimizer_func;
    minimizer_pars_struct minimizer_pars;

    char *constraints;
    char outfile[256];
    char constraints_file[256];
    char epsilon_file[256];
    FILE* fh;

    double last_non_nan_lnQ;

    pf_overflow = 0;
    pf_underflow = 0;

    dangles=2;

    do_backtrack  = 1;
    string        = NULL;

    noPS = 0;
    outfile[0]='\0';
    epsilon_file[0]='\0';
    strcpy(psDir, "dotplots");

    if(RNAfold_cmdline_parser (argc, argv, &args_info) != 0) exit(1);

    /* RNAbpfold specific options */

    if (args_info.tau_given) tau = args_info.tau_arg;
    if (args_info.sigma_given) sigma = args_info.sigma_arg;
    if (args_info.precision_given) precision = args_info.precision_arg;
    if (args_info.step_given) initial_step_size = args_info.step_arg;
    if (args_info.maxN_given) max_iteration = args_info.maxN_arg;
    if (args_info.minimization_given) method_id = args_info.minimization_arg;
    if (args_info.init_given) initial_guess_method = args_info.init_arg;
    if (args_info.tolerance_given) tolerance = args_info.tolerance_arg;
    if (args_info.outfile_given) strcpy(outfile, args_info.outfile_arg);
    if (args_info.constraints_given) strcpy(constraints_file, args_info.constraints_arg);
    if (args_info.epsilon_given) strcpy(epsilon_file, args_info.epsilon_arg);
    if (args_info.sampleGradient_given) sample_conditionals=1;
    if (args_info.hybridGradient_given) {
        sample_conditionals=1;
        hybrid_conditionals=1;
    }
    if (args_info.numericalGradient_given) numerical=1;
    if (args_info.sampleStructure_given) sample_structure=1;
    if (args_info.psDir_given) strcpy(psDir, args_info.psDir_arg);
    if (args_info.sparsePS_given) sparsePS=args_info.sparsePS_arg;
    if (args_info.gridSearch_given) grid_search = 1;


    /* Generic RNAfold options */

    if (args_info.temp_given)        temperature = args_info.temp_arg;
    if (args_info.reference_given)  fold_constrained=1;
    if (args_info.noTetra_given)     tetra_loop=0;
    if (args_info.dangles_given)     dangles = args_info.dangles_arg;
    if (args_info.noLP_given)        noLonelyPairs = 1;
    if (args_info.noGU_given)        noGU = 1;
    if (args_info.noClosingGU_given) no_closingGU = 1;
    if (args_info.noconv_given)      noconv = 1;
    if (args_info.energyModel_given) energy_set = args_info.energyModel_arg;
    if (args_info.paramFile_given)   ParamFile = strdup(args_info.paramFile_arg);
    if (args_info.nsp_given)         ns_bases = strdup(args_info.nsp_arg);
    if (args_info.pfScale_given)     sfact = args_info.pfScale_arg;
    if (args_info.noPS_given)        noPS=1;



    /* Create postscript directory */
    if (!noPS) {
        struct stat stat_p;
        if (stat (psDir, &stat_p) != 0) {
            if (mkdir(psDir, S_IRWXU|S_IROTH|S_IRGRP ) !=0) {
                fprintf(stderr, "WARNING: Could not create directory: %s", psDir);
            }
        }
    }

    if (ParamFile != NULL) {
        read_parameter_file(ParamFile);
    }

    if (ns_bases != NULL) {
        nonstandards = space(33);
        c=ns_bases;
        i=sym=0;
        if (*c=='-') {
            sym=1;
            c++;
        }
        while (*c!='\0') {
            if (*c!=',') {
                nonstandards[i++]=*c++;
                nonstandards[i++]=*c;
                if ((sym)&&(*c!=*(c-1))) {
                    nonstandards[i++]=*c;
                    nonstandards[i++]=*(c-1);
                }
            }
            c++;
        }
    }

    /*Read sequence*/
    fname[0] = '\0';
    while((input_type = get_input_line(&input_string, 0)) & VRNA_INPUT_FASTA_HEADER) {
        (void) sscanf(input_string, "%42s", fname);
        free(input_string);
    }

    length = (int)    strlen(input_string);
    string = strdup(input_string);
    free(input_string);
    structure = (char *) space((unsigned) length+1);

    /* For testing purpose pass dot bracket structure of reference structure via -C */
    if (fold_constrained) {
        input_type = get_input_line(&input_string, VRNA_INPUT_NOSKIP_COMMENTS);
        if(input_type & VRNA_INPUT_QUIT) {
            exit(1);
        }
        else if((input_type & VRNA_INPUT_MISC) && (strlen(input_string) > 0)) {
            cstruc = strdup(input_string);
            free(input_string);
        }
        else warn_user("-C was given but reference structure is missing");
    }

    if(noconv) {
        str_RNA2RNA(string);
    } else {
        str_DNA2RNA(string);
    }

    /* Allocating space */

    epsilon =     (double *) space(sizeof(double)*(length+1));

    exp_pert =  (double **)space(sizeof(double *)*(length+1));
    perturbations =  (double **)space(sizeof(double *)*(length+1));
    prev_epsilon = (double *) space(sizeof(double)*(length+1));
    gradient =    (double *) space(sizeof(double)*(length+1));
    gradient_numeric =    (double *) space(sizeof(double)*(length+1));
    gradient_numeric_gsl =    (double *) space(sizeof(double)*(length+1));
    prev_gradient = (double *) space(sizeof(double)*(length+1));

    q_unpaired = (double *) space(sizeof(double)*(length+1));
    p_unpaired_cond = (double **)space(sizeof(double *)*(length+1));
    p_unpaired_cond_sampled = (double **)space(sizeof(double *)*(length+1));
    p_pp =  (double **)space(sizeof(double *)*(length+1));
    p_unpaired =  (double *) space(sizeof(double)*(length+1));
    p_unpaired_tmp = (double *) space(sizeof(double)*(length+1));

    for (i=0; i <= length; i++) {
        epsilon[i] = gradient[i] = q_unpaired[i] = 0.0;
        p_unpaired_cond[i] = (double *) space(sizeof(double)*(length+1));
        p_unpaired_cond_sampled[i] = (double *) space(sizeof(double)*(length+1));
        p_pp[i] = (double *) space(sizeof(double)*(length+1));
        exp_pert[i] = (double *) space(sizeof(double)*(length+1));
        perturbations[i] = (double *) space(sizeof(double)*(length+1));
        for (j=0; j <= length; j++) {
            p_pp[i][j]=p_unpaired_cond[i][j] = 0.0;
            p_unpaired_cond_sampled[i][j] = 0.0;
        }
    }


    /*** If file with perturbation vector epsilon is given we fold using
         this epsilon and are done ***/

    if (args_info.epsilon_given) {
        plist *pl, *pl1,*pl2;

        filehandle = fopen (epsilon_file,"r");

        if (filehandle == NULL) {
            nrerror("Could not open file with perturbation vector.");
        }

        i=1;
        while (1) {
            double t;
            line = get_line(filehandle);
            if (line == NULL) break;
            if (i>length) nrerror("Too many values in perturbation vector file.");
            if (sscanf(line, "%lf", &epsilon[i]) !=1) {
                nrerror("Error while reading perturbation vector file.");
            }
            i++;
        }

        if (i-1 != length) {
            nrerror("Too few values in perturbation vector file.");
        }

        init_pf_fold(length);
        pf_fold_pb(string, NULL);

        sprintf(fname,"%s/dot.ps", psDir);
        pl1 = make_plist(length, 1e-5);

        (void) PS_dot_plot_list_epsilon(string, fname, NULL, pl1, epsilon, "");

        exit(0);
    }



    /*** Get constraints from reference structure or from external file ***/

    /* Structure was given by -C */
    if (fold_constrained) {
        for (i=0; i<length; i++) {
            if (cstruc[i] == '(' || cstruc[i] == ')') {
                q_unpaired[i+1] = 0.0;
            } else {
                q_unpaired[i+1] = 1.0;
            }
        }

        /*Read constraints from file*/
    } else {

        filehandle = fopen (constraints_file,"r");

        if (filehandle == NULL) {
            nrerror("No constraints given as dot bracket or wrong file name");
        }

        i=1;
        while (1) {
            double t;
            line = get_line(filehandle);
            if (line == NULL) break;
            if (i>length) nrerror("Too many values in constraints.dat");
            if (sscanf(line, "%lf", &q_unpaired[i]) !=1) {
                nrerror("Error while reading constraints.dat");
            }
            i++;
        }

        if (i-1 != length) {
            nrerror("Too few values in constraints.dat");
        }
    }

    /* Create file handle */
    if (outfile[0] !='\0') {
        statsfile = fopen (outfile,"w");
    } else {
        statsfile = fopen ("stats.dat","w");
    }

    setvbuf(statsfile, NULL, _IONBF, 0);

    if (!grid_search) {
        fprintf(statsfile, "Iteration\tDiscrepancy\tNorm\tdfCount\tMEA\tSampled_structure\tSampled_energy\tSampled_distance\tEpsilon\ttimestamp\n");
    } else {
        /* If we do a grid search we have a different output. */
        fprintf(statsfile, "Dummy\tm\tb\tdummy\tMEA\tepsilon\n");
    }

    if (statsfile == NULL) {
        nrerror("Could not open stats.dat for writing.");
    }

    fprintf(stderr, "tau^2 = %.4f; sigma^2 = %.4f; precision = %.4f; tolerance = %.4f; step-size: %.4f\n\n",
            tau, sigma, precision, tolerance, initial_step_size);

    st_back=1;
    min_en = fold(string, structure);

    (void) fflush(stdout);

    if (length>2000) free_arrays();

    pf_struc = (char *) space((unsigned) length+1);

    kT = (temperature+273.15)*1.98717/1000.; /* in Kcal */
    pf_scale = exp(-(sfact*min_en)/kT/length);

    /* Set up minimizer */

    minimizer_x = gsl_vector_alloc (length+1);
    minimizer_g = gsl_vector_alloc (length+1);

    for (i=0; i <= length; i++) {
        epsilon[i] = 0.0;
        gsl_vector_set (minimizer_g, i, 0.0);
        gsl_vector_set (minimizer_x, i, epsilon[i]);
    }

    minimizer_pars.length=length;
    minimizer_pars.seq = string;
    minimizer_pars.tau=tau;
    minimizer_pars.sigma=sigma;
    minimizer_pars.kT=kT;

    minimizer_func.n = length+1;
    minimizer_func.f = calculate_f;
    minimizer_func.df = numerical ? calculate_df_numerically: calculate_df;
    minimizer_func.fdf = calculate_fdf;
    minimizer_func.params = &minimizer_pars;


    //min_en = fold_pb(string, structure);
    //fprintf(stderr, "%f", min_en);
    //exit(0);

    /* Calling test functions for debugging */
    for (i=1; i <= length; i++) {
        if (i%2==0) {
            epsilon[i] = +0.2*i;
        } else {
            epsilon[i] = -0.2*i;
        }
    }

    //test_folding(string, length);
    /* //test_stochastic_backtracking(string, length); */
    /* //test_gradient(minimizer_func, minimizer_pars); */
    /* //test_gradient_sampling(minimizer_func, minimizer_pars); */
    //exit(1);


    count_df_evaluations=0;

    /* Initial guess for epsilon */

    if (initial_guess_method !=0 && initial_guess_method !=3) {

        /* Vars for inital guess methods */
        double m,b;
        double* curr_epsilon;
        double* best_epsilon;
        double best_m, best_b, best_scale;
        double curr_D;
        double min_D = 999999999.0;
        double inc = +0.25;
        double cut;

        if (initial_guess_method == 1) fprintf(stderr, "Mathew's constant perturbations\n");
        if (initial_guess_method == 2) fprintf(stderr, "Perturbations proportional to q-p\n");

        curr_epsilon = (double *) space(sizeof(double)*(length+1));
        best_epsilon = (double *) space(sizeof(double)*(length+1));

        last_non_nan_lnQ = min_en;

        // Calculate p_unpaired for unperturbed state which we need later
        // for the proportinal method
        if (initial_guess_method == 2) {

            init_pf_fold(length);

            for (i=0; i <= length; i++) {
                epsilon[i] = 0.0;
            }

            pf_fold_pb(string, NULL);
            for (i = 1; i < length; i++) {
                for (j = i+1; j<= length; j++) {
                    p_pp[i][j]=p_pp[j][i]=pr[iindx[i]-j];
                }
            }
            get_pair_prob_vector(p_pp, p_unpaired_tmp, length, 1);
            free_pf_arrays();
        }

        /* We do the same grid search as in the Mathews paper Fig. 4*/
        for (m=0.25; m <=7.0; m+=0.25) {

            // Weird way of writing this inner loop for the grid search. We
            // traverse the grid without big jumps in the parameters to make
            // sure that the updated scaling factor is accurate all the time.
            inc*=-1;

            for (b = inc < 0.0 ? 0.0 : -3.0; inc < 0.0 ? b >= -3.0 : b<= 0.0 ; b+=inc) {

                // calculate cut point with x-axis and skip parameter pairs
                // which give a cut point outside the range of
                // q_unpaired (0 to 1). They gave frequently overflows and the
                // idea is that we both want positive and negative perturbations
                cut = exp( (-1) * b / m ) - 1;

                fprintf(stderr, "\nm = %.2f, b = %.2f, cut=%.2f\n", m, b, cut);

                if (cut > 1.0 || cut < 0.01) {
                    fprintf(stderr, "\nSkipping m = %.2f, b = %.2f\n", m, b);
                    continue;
                }

                /* Mathew's constant perturbations */
                if (initial_guess_method == 1) {
                    for (i=0; i <= length; i++) {

                        /* We add epsilon to unpaired regions (as opposed to
                           paired regions as in the Mathews paper) so we multiply
                           by -1; if missing data we set it to 0.0 */

                        if (q_unpaired[i] < -0.5) {
                            curr_epsilon[i] = 0.0;
                        } else {
                            curr_epsilon[i] = (m *(log(q_unpaired[i]+1))+b) *(-1);
                        }

                        gsl_vector_set (minimizer_x, i, curr_epsilon[i]);
                    }
                    /* Perturbations proportional to q-p */
                } else {

                    for (i=0; i <= length; i++) {
                        curr_epsilon[i] = (m *(log(q_unpaired[i]+1)-log(p_unpaired_tmp[i]+1))+ b ) * (-1);
                        gsl_vector_set (minimizer_x, i, curr_epsilon[i]);
                    }
                }

                // Repeat and adjust scaling factor until we get result without over-/underflows
                do {

                    // First we use default scaling factor
                    if (pf_underflow == 0 && pf_overflow == 0) {
                        sfact = 1.070;
                    }

                    if (pf_underflow) {
                        sfact *= 0.8;
                        fprintf(stderr,"Underflow, adjusting sfact to %.4f\n", sfact );
                    }

                    if (pf_overflow) {
                        sfact *= 1.2;
                        fprintf(stderr,"Overflow, adjusting sfact to %.4f\n", sfact );
                    }

                    pf_scale = exp(-(sfact*last_non_nan_lnQ)/kT/length);

                    //fprintf(stderr,"Scaling factor is now: %.4e\n", pf_scale);

                    curr_D = calculate_f(minimizer_x, (void*)&minimizer_pars);

                    if (!isnan(last_lnQ)) last_non_nan_lnQ = last_lnQ;

                    // Give up when even extreme scaling does not give results
                    // (for some reason I could not get rid of overflows even with high scaling factors)
                    if (sfact < 0.1 || sfact > 2.0) break;

                } while (pf_underflow == 1 || pf_overflow == 1);

                // We have not given up so everything is ok now
                if (!(sfact < 0.1 || sfact > 2.0)) {

                    if (curr_D < min_D) {
                        min_D = curr_D;
                        for (i=0; i <= length; i++) {
                            best_epsilon[i] = curr_epsilon[i];
                        }
                        best_m = m;
                        best_b = b;
                        best_scale = pf_scale;
                    }

                    /*If we are interested in the grid search we misuse the
                      print_stats function and report m and b together with MEA*/
                    if (grid_search) {
                        for (i=0; i <= length; i++) {
                            epsilon[i] = curr_epsilon[i];
                        }
                        print_stats(statsfile, string, cstruc, length, 0, 0, m, 0.0, b, 0);
                    }

                    fprintf(stderr, "curr D: %.2f, minimum D: %.2f\n", curr_D, min_D);

                    // Adjust pf_scale with default scaling factor but lnQ from
                    // previous step
                    sfact = 1.070;
                    pf_scale = exp(-(sfact*last_lnQ)/kT/length);

                } else {
                    sfact = 1.070;
                    fprintf(stderr, "Skipping m = %.2f, b = %.2f; did not get stable result.\n", m, b);
                }
            } // for b
        } // for m

        fprintf(stderr, "Minimum found: m=%.2f, b=%.2f: %.2f\n", best_m, best_b, min_D);

        for (i=0; i <= length; i++) {
            epsilon[i] = best_epsilon[i];
            gsl_vector_set (minimizer_x, i, best_epsilon[i]);
        }
        pf_scale = best_scale;
    }

    if (initial_guess_method == 3) {
        srand((unsigned)time(0));
        for (i=0; i <= length; i++) {
            double r = (double)rand()/(double)RAND_MAX * 4 - 2;
            epsilon[i] = r;
            gsl_vector_set (minimizer_x, i, epsilon[i]);
        }
    }

    /* If we just wanted a grid search we are done now. */
    if (grid_search) {
        exit(0);
    }

    prev_D = calculate_f(minimizer_x, (void*)&minimizer_pars);

    print_stats(statsfile, string, cstruc, length, 0 , count_df_evaluations , prev_D, -1.0, 0.0,1);

    /* GSL minimization */

    if (method_id !=0) {

        if (method_id > 2) {
            char name[100];
            // Available algorithms
            //  3  gsl_multimin_fdfminimizer_conjugate_fr
            //  4  gsl_multimin_fdfminimizer_conjugate_pr
            //  5  gsl_multimin_fdfminimizer_vector_bfgs
            //  6  gsl_multimin_fdfminimizer_vector_bfgs2
            //  7  gsl_multimin_fdfminimizer_steepest_descent

            //   http://www.gnu.org/software/gsl/manual/html_node/Multimin-Algorithms-with-Derivatives.html

            switch (method_id) {
            case 2:
                minimizer = gsl_multimin_fdfminimizer_alloc (gsl_multimin_fdfminimizer_conjugate_fr, length+1);
                strcpy(name, "Fletcher-Reeves conjugate gradient");
                break;
            case 3:
                minimizer = gsl_multimin_fdfminimizer_alloc (gsl_multimin_fdfminimizer_conjugate_pr, length+1);
                strcpy(name, "Polak-Ribiere conjugate gradient");
                break;
            case 4:
                minimizer = gsl_multimin_fdfminimizer_alloc ( gsl_multimin_fdfminimizer_vector_bfgs, length+1);
                strcpy(name, "Broyden-Fletcher-Goldfarb-Shanno");
                break;
            case 5:
                minimizer = gsl_multimin_fdfminimizer_alloc ( gsl_multimin_fdfminimizer_vector_bfgs2, length+1);
                strcpy(name, "Broyden-Fletcher-Goldfarb-Shanno (improved version)");
                break;
            case 6:
                minimizer = gsl_multimin_fdfminimizer_alloc (gsl_multimin_fdfminimizer_steepest_descent, length+1);
                strcpy(name, "Gradient descent (GSL implmementation)");
                break;
            }

            fprintf(stderr, "Starting minimization via GSL implementation of %s...\n\n", name);

            // The last two parmeters are step size and tolerance (with
            // different meaning for different algorithms

            gsl_multimin_fdfminimizer_set (minimizer, &minimizer_func, minimizer_x, initial_step_size, tolerance);

            iteration = 1;

            do {

                status = gsl_multimin_fdfminimizer_iterate (minimizer);
                D = minimizer->f;
                norm = gsl_blas_dnrm2(minimizer->gradient);

                print_stats(statsfile, string, cstruc, length,iteration, count_df_evaluations, D, prev_D, norm, iteration%sparsePS == 0);

                prev_D = D;

                if (status) {
                    fprintf(stderr, "An unexpected error has occured in the iteration (status:%i)\n", status);
                    break;
                }

                status = gsl_multimin_test_gradient (minimizer->gradient, precision);
                if (status == GSL_SUCCESS) fprintf(stderr, "Minimum found stopping.\n");

                iteration++;

            } while (status == GSL_CONTINUE && iteration < max_iteration);

            gsl_multimin_fdfminimizer_free (minimizer);
            gsl_vector_free (minimizer_x);

            /* Custom implementation of steepest descent */
        } else {

            if (method_id == 1) {
                fprintf(stderr, "Starting custom implemented steepest descent search...\n\n");
            } else {
                fprintf(stderr, "Starting custom implemented steepest descent search with Barzilai Borwein step size...\n\n");
            }

            iteration = 0;
            D = 0.0;

            while (iteration++ < max_iteration) {

                for (i=1; i <= length; i++) {
                    gsl_vector_set (minimizer_x, i, epsilon[i]);
                }

                D = calculate_f(minimizer_x, (void*)&minimizer_pars);

                if (numerical) {
                    calculate_df_numerically(minimizer_x, (void*)&minimizer_pars, minimizer_g);
                } else {
                    calculate_df(minimizer_x, (void*)&minimizer_pars, minimizer_g);
                }

                for (i=1; i <= length; i++) {
                    gradient[i] = gsl_vector_get (minimizer_g, i);
                }

                // Do line search

                fprintf(stderr, "\nLine search:\n");

                // After the first iteration, use Barzilai-Borwain (1988) step size (currently turned off)
                if (iteration>1 && method_id==2) {

                    double denominator=0.0;
                    double numerator=0.0;

                    for (i=1; i <= length; i++) {
                        numerator += (epsilon[i]-prev_epsilon[i]) * (gradient[i]-prev_gradient[i]);
                        denominator+=(gradient[i]-prev_gradient[i]) * (gradient[i]-prev_gradient[i]);
                    }

                    step_size = numerator / denominator;

                    norm =1.0;
                } else {
                    // Use step sized given by the user (normalize it first)
                    step_size = initial_step_size / calculate_norm(gradient, length);
                }

                for (i=1; i <= length; i++) {
                    prev_epsilon[i] = epsilon[i];
                    prev_gradient[i] = gradient[i];
                }

                do {

                    for (mu=1; mu <= length; mu++) {
                        epsilon[mu] = prev_epsilon[mu] - step_size * gradient[mu];
                    }

                    for (i=1; i <= length; i++) {
                        gsl_vector_set (minimizer_x, i, epsilon[i]);
                    }

                    DD = calculate_f(minimizer_x, (void*)&minimizer_pars);

                    if (step_size > 0.0001) {
                        fprintf(stderr, "Old D: %.4f; New D: %.4f; Step size: %.4f\n", D, DD, step_size);
                    } else {
                        fprintf(stderr, "Old D: %.4f; New D: %.4f; Step size: %.4e\n", D, DD, step_size);
                    }

                    step_size /= 2;
                } while (step_size > 1e-12 && DD > D);

                norm = calculate_norm(gradient,length);

                if (DD > D) {
                    fprintf(stderr, "Line search did not improve D in iteration %i. Stop.\n", iteration);

                    if (hybrid_conditionals) {
                        sample_conditionals=0;
                    } else {
                        break;
                    }
                }

                print_stats(statsfile, string, cstruc, length,iteration, count_df_evaluations, DD, prev_D, norm, iteration%sparsePS == 0);

                if (norm<precision && iteration>1) {
                    fprintf(stderr, "Minimum found stopping.\n");
                    break;
                }

                prev_D = DD;

            }
        }

        /* Force last dotplot to be printed */
        print_stats(statsfile, string, cstruc, length,iteration, count_df_evaluations, DD, prev_D, norm, 1);
    }

    free(pf_struc);
    if (cstruc!=NULL) free(cstruc);
    (void) fflush(stdout);
    free(string);
    free(structure);
    RNAfold_cmdline_parser_free (&args_info);


    return 0;
}
Example #10
0
void print_stats(FILE* statsfile, char* seq, char* struc, int length, int iteration, int count_df_evaluations, double D, double prev_D, double norm, int printPS) {

    plist *pl, *pl1,*pl2;
    char fname[100];
    char title[100];
    char* ss;
    double MEAgamma, mea, mea_en;
    char* output;
    int i,j;
    static char timestamp[40];
    const struct tm *tm;
    time_t now;

    ss = (char *) space((unsigned) length+1);
    memset(ss,'.',length);

    init_pf_fold(length);
    pf_fold_pb(seq, NULL);

    for (i = 1; i < length; i++) {
        for (j = i+1; j<= length; j++) {
            p_pp[i][j]=p_pp[j][i]=pr[iindx[i]-j];
        }
    }
    get_pair_prob_vector(p_pp, p_unpaired, length, 1);

    fprintf (stderr, "\nITERATION:   %i\n", iteration);
    fprintf(stderr,  "DISCREPANCY: %.4f\n", D);
    fprintf(stderr,  "NORM:        %.2f\n", norm);
    if (prev_D > -1.0) {
        fprintf(stderr,  "IMPROVEMENT: %.4f%%\n\n", (1-(D/prev_D))*100);
    }

    fprintf(statsfile, "%i\t%.4f\t%.4f\t%i\t", iteration, D, norm, count_df_evaluations);

    for (MEAgamma=1e-5; MEAgamma<1e+6; MEAgamma*=10 ) {
        pl = make_plist(length, 1e-4/(1+MEAgamma));
        mea = MEA(pl, ss, MEAgamma);
        mea_en = energy_of_struct(seq, ss);
        fprintf(statsfile,"%s,%.2e;", ss, MEAgamma);
        free(pl);
    }
    fprintf(statsfile, "\t");

    // Stochastic backtracking

    fprintf(stderr, "Sampling structures...\n");

    if (sample_structure) {

        char* best_structure;
        char* curr_structure;
        double x;

        double curr_energy = 0.0;
        double min_energy = +1.0;
        int curr_distance =  0;
        int min_distance = 999999;

        best_structure = (char *) space((unsigned) length+1);

        for (i=1; i<=10000; i++) {

            curr_structure = pbacktrack_pb(seq);
            curr_energy = energy_of_struct(seq, curr_structure);
            curr_distance = 0.0;

            //fprintf(stderr, "%s%.2f ", curr_structure, curr_energy);

            for (j = 1; j <= length; j++) {

                if (q_unpaired[j] > -0.5) {
                    x = (curr_structure[j-1] == '.') ? 1.0 : 0.0;
                    curr_distance += abs(x-q_unpaired[j]);
                }
            }

            if (curr_distance < min_distance) {
                min_distance = curr_distance;
                min_energy = curr_energy;
                strcpy(best_structure, curr_structure);
            }

            if (curr_distance == min_distance) {
                if (curr_energy < min_energy) {
                    min_energy = curr_energy;
                    strcpy(best_structure, curr_structure);
                }
            }

            //fprintf(stderr, "%i\n", curr_distance);
            free(curr_structure);
        }

        //fprintf(stderr, "\n%s %.2f %i\n", best_structure, min_energy, min_distance);
        fprintf(statsfile, "\t%s\t%.2f\t%i\t", best_structure, min_energy, min_distance);

    } else {
        fprintf(statsfile, "NA\tNA\tNA\t");
    }

    for (i = 1; i <= length; i++) {
        fprintf(statsfile, "%.4f", epsilon[i]);
        if (!(i==length)) {
            fprintf(statsfile, ",");
        }
    }

    now = time ( NULL );
    tm = localtime ( &now );

    strftime ( timestamp, 40, "%Y-%m-%d %X", tm );

    fprintf(statsfile, "\t%s\n", timestamp);

    /* Print dotplot only if not noPS is given and function call asks for it */
    if (!noPS && printPS) {

        /* Print dotplot */
        sprintf(fname,"%s/iteration%i.ps", psDir, iteration);
        pl1 = make_plist(length, 1e-5);

        if (struc != NULL) {
            pl2 = b2plist(struc);
        } else {
            pl2 = NULL;
        }
        sprintf(title,"Iteration %i, D = %.4f", iteration, D);
        (void) PS_dot_plot_list_epsilon(seq, fname, pl2, pl1, epsilon, title);
    }


    free_pf_arrays();


}
Example #11
0
int main(int argc, char *argv[])
{
    char *string/*, *line*/;
    char *structure=NULL, *cstruc=NULL;
    /*char  fname[13], ffname[20], gfname[20];*/
    /*char  *ParamFile=NULL;*/
    char  *ns_bases=NULL, *c;
    int   i, length, l, sym/*, r*/;
    double energy, min_en;
    double kT, sfact=1.07;
    int   pf=0, noPS=0, istty;
    int noconv=0;
    int circ=0;

    AjPSeq  seq     = NULL;
    AjPFile confile = NULL;
    AjPFile paramfile = NULL;
    AjPFile outf = NULL;
    AjPFile essfile = NULL;
    AjPFile dotfilea = NULL;
    AjPFile dotfileb = NULL;
    

    AjPStr seqstring = NULL;
    AjPStr constring = NULL;
    AjPStr seqname   = NULL;
  
    float eT = 0.;
    AjBool eGU;
    AjBool ecirc = ajFalse;
  
    AjBool eclose;
    AjBool lonely;
    AjBool convert;
    AjPStr ensbases = NULL;
    AjBool etloop;
    AjPStr eenergy = NULL;
    char ewt = '\0';
    float escale = 0.;
    AjPStr edangles = NULL;
    char edangle = '\0';

    ajint len;



    embInitPV("vrnafold",argc,argv,"VIENNA",VERSION);
    
    
    seqstring = ajStrNew();
    constring = ajStrNew();
    seqname   = ajStrNew();
    
    
    seq       = ajAcdGetSeq("sequence");
    confile   = ajAcdGetInfile("constraintfile");
    paramfile = ajAcdGetInfile("paramfile");
    eT        = ajAcdGetFloat("temperature");
    ecirc     = ajAcdGetBoolean("circular");
    eGU       = ajAcdGetBoolean("gu");
    eclose    = ajAcdGetBoolean("closegu");
    lonely    = ajAcdGetBoolean("lp");
    convert   = ajAcdGetBoolean("convert");
    ensbases  = ajAcdGetString("nsbases");
    etloop    = ajAcdGetBoolean("tetraloop");
    eenergy   = ajAcdGetListSingle("energy");
    escale    = ajAcdGetFloat("scale");
    edangles  = ajAcdGetListSingle("dangles");
    outf      = ajAcdGetOutfile("outfile");
    essfile   = ajAcdGetOutfile("ssoutfile");
    /*
      dotfilea  = ajAcdGetOutfile("adotoutfile");
      dotfileb  = ajAcdGetOutfile("bdotoutfile");
    */
    
    do_backtrack = 2; 
    pf = 0;
    string = NULL;
    istty = 0;

    temperature   = (double) eT;
    circ          = !!ecirc;
    noGU          = (eGU) ? 0 : 1;
    no_closingGU  = (eclose) ? 0 : 1;
    noLonelyPairs = (lonely) ? 0 : 1;
    noconv        = (convert) ? 0 : 1;
    ns_bases      = (ajStrGetLen(ensbases)) ? MAJSTRGETPTR(ensbases) : NULL;
    tetra_loop    = !!etloop;
    
    ewt = *ajStrGetPtr(eenergy);
    if(ewt == '0')
	energy_set = 0;
    else if(ewt == '1')
	energy_set = 1;
    else if(ewt == '2')
	energy_set = 2;
    
    sfact = (double) escale;
    
    edangle = *ajStrGetPtr(edangles);
    if(edangle == '0')
	dangles = 0;
    else if(edangle == '1')
	dangles = 1;
    else if(edangle == '2')
	dangles = 2;
    else if(edangle == '3')
	dangles = 3;


    if(circ && noLonelyPairs)
    {

        ajWarn("Depending on the origin of the circular sequence\n"
               "some structures may be missed when using -noLP\nTry "
               "rotating your sequence a few times\n");        
    }


    if(paramfile)
	read_parameter_file(paramfile);
   
    if (ns_bases != NULL)
    {
	nonstandards = space(33);
	c=ns_bases;
	i=sym=0;

	if (*c=='-')
	{
	    sym=1; c++;
	}

	while (*c!='\0')
	{
	    if (*c!=',')
	    {
		nonstandards[i++]=*c++;
		nonstandards[i++]=*c;
		if ((sym)&&(*c!=*(c-1)))
		{
		    nonstandards[i++]=*c;
		    nonstandards[i++]=*(c-1);
		}
	    }
	    c++;
	}
    }


    if(confile)
	vienna_GetConstraints(confile,&constring);
    
    string = NULL;
    structure = NULL;

    length = ajSeqGetLen(seq);
    string = (char *) space(length+1);
    strcpy(string,ajSeqGetSeqC(seq));

    len = ajStrGetLen(constring);
    structure = (char *) space(length+1);
    if(len)
    {
	fold_constrained = 1;
	strcpy(structure,ajStrGetPtr(constring));
    }
    

    for (l = 0; l < length; l++) {
        string[l] = toupper(string[l]);
        if (!noconv && string[l] == 'T') string[l] = 'U';
    }

    /* initialize_fold(length); */
    if (circ)
        min_en = circfold(string, structure);
    else
        min_en = fold(string, structure);

    ajFmtPrintF(outf,"%s\n%s", string, structure);
    if (istty)
        printf("\n minimum free energy = %6.2f kcal/mol\n", min_en);
    else
        ajFmtPrintF(outf," (%6.2f)\n", min_en);

    if (!noPS)
    {
        if (length<2000)
            (void) PS_rna_plot(string, structure, essfile);
        else
            ajWarn("Structure too long, not doing xy_plot\n");
    }
    if (length>=2000) free_arrays(); 

    if (pf)
    {
        char *pf_struc;
        pf_struc = (char *) space((unsigned) length+1);
	if (dangles==1)
        {
            dangles=2;   /* recompute with dangles as in pf_fold() */
            min_en = (circ) ? energy_of_circ_struct(string, structure) :
                energy_of_struct(string, structure);
            dangles=1;
        }

        kT = (temperature+273.15)*1.98717/1000.; /* in Kcal */
        pf_scale = exp(-(sfact*min_en)/kT/length);

        if (length>2000)
            ajWarn("scaling factor %f\n", pf_scale);

        (circ) ? init_pf_circ_fold(length) : init_pf_fold(length);

        if (cstruc!=NULL)
            strncpy(pf_struc, cstruc, length+1);

        energy = (circ) ? pf_circ_fold(string, pf_struc) :
            pf_fold(string, pf_struc);

        if (do_backtrack)
        {
            ajFmtPrintF(outf,"%s", pf_struc);
            ajFmtPrintF(outf," [%6.2f]\n", energy);
        }

        if ((istty)||(!do_backtrack))
            ajFmtPrintF(outf," free energy of ensemble = %6.2f kcal/mol\n",
                        energy);

        if (do_backtrack)
        {
            plist *pl1,*pl2;
            char *cent;
            double dist, cent_en;
            cent = centroid(length, &dist);
            cent_en = (circ) ? energy_of_circ_struct(string, cent) :
                energy_of_struct(string, cent);
            ajFmtPrintF(outf,"%s {%6.2f d=%.2f}\n", cent, cent_en, dist);
            free(cent);

            pl1 = make_plist(length, 1e-5);
            pl2 = b2plist(structure);
            (void) PS_dot_plot_list(string, dotfilea, pl1, pl2, "");
            free(pl2);
            if (do_backtrack==2)
            {
                pl2 = stackProb(1e-5);
                PS_dot_plot_list(string, dotfileb, pl1, pl2,
                                 "Probabilities for stacked pairs (i,j)(i+1,j-1)");
                free(pl2);
            }
            free(pl1);
            free(pf_struc);
        }

        ajFmtPrintF(outf," frequency of mfe structure in ensemble %g; ",
                    exp((energy-min_en)/kT));

        if (do_backtrack)
            ajFmtPrintF(outf,"ensemble diversity %-6.2f", mean_bp_dist(length));

        ajFmtPrintF(outf,"\n");
        free_pf_arrays();

    }

    if (cstruc!=NULL)
        free(cstruc);

    free(string);
    free(structure);

    ajStrDel(&seqstring);
    ajStrDel(&constring);
    ajStrDel(&seqname);

    ajStrDel(&ensbases);
    ajStrDel(&eenergy);
    ajStrDel(&edangles);

    ajSeqDel(&seq);

    ajFileClose(&confile);
    ajFileClose(&paramfile);
    ajFileClose(&outf);
    ajFileClose(&essfile);

/*
  ajFileClose(&dotfilea);
  ajFileClose(&dotfileb);
*/  
    if (length<2000) free_arrays(); 
    embExit();
    
    return 0;
}
Example #12
0
int main(int argc, char *argv[])
{
  char *string, *line;
  char *structure=NULL, *cstruc=NULL;
  char  fname[13], ffname[20], gfname[20];
  char  *ParamFile=NULL;
  char  *ns_bases=NULL, *c;
  int   i, length, l, sym, r;
  double energy, min_en;
  double kT, sfact=1.07;
  int   pf=0, noPS=0, istty;
  int noconv=0;
  int circ=0;

  do_backtrack = 1;
  string=NULL;
  for (i=1; i<argc; i++) {
    if (argv[i][0]=='-')
      switch ( argv[i][1] )
	{
	case 'T':  if (argv[i][2]!='\0') usage();
	  if(i==argc-1) usage();
	  r=sscanf(argv[++i], "%lf", &temperature);
	  if (!r) usage();
	  break;
	case 'p':  pf=1;
	  if (argv[i][2]!='\0')
	    (void) sscanf(argv[i]+2, "%d", &do_backtrack);
	  break;
	case 'n':
	  if ( strcmp(argv[i], "-noGU")==0) noGU=1;
	  if ( strcmp(argv[i], "-noCloseGU")==0) no_closingGU=1;
	  if ( strcmp(argv[i], "-noLP")==0) noLonelyPairs=1;
	  if ( strcmp(argv[i], "-noPS")==0) noPS=1;
	  if ( strcmp(argv[i], "-nsp") ==0) {
	    if (i==argc-1) usage();
	    ns_bases = argv[++i];
	  }
	  if ( strcmp(argv[i], "-noconv")==0) noconv=1;
	  break;
	case '4':
	  tetra_loop=0;
	  break;
	case 'e':
	  if(i==argc-1) usage();
	  r=sscanf(argv[++i],"%d", &energy_set);
	  if (!r) usage();
	  break;
	case 'C':
	  fold_constrained=1;
	  break;
	case 'c':
	  if ( strcmp(argv[i], "-circ")==0) circ=1;
	  break;
	case 'S':
	  if(i==argc-1) usage();
	  r=sscanf(argv[++i],"%lf", &sfact);
	  if (!r) usage();
	  break;
	case 'd': dangles=0;
	  if (argv[i][2]!='\0') {
	    r=sscanf(argv[i]+2, "%d", &dangles);
	    if (r!=1) usage();
	  }
	  break;
	case 'P':
	  if (i==argc-1) usage();
	  ParamFile = argv[++i];
	  break;
	default: usage();
	}
  }

  if (circ && noLonelyPairs)
    fprintf(stderr, "warning, depending on the origin of the circular sequence, some structures may be missed when using -noLP\nTry rotating your sequence a few times\n");
  if (ParamFile != NULL)
    read_parameter_file(ParamFile);

  if (ns_bases != NULL) {
    nonstandards = space(33);
    c=ns_bases;
    i=sym=0;
    if (*c=='-') {
      sym=1; c++;
    }
    while (*c!='\0') {
      if (*c!=',') {
	nonstandards[i++]=*c++;
	nonstandards[i++]=*c;
	if ((sym)&&(*c!=*(c-1))) {
	  nonstandards[i++]=*c;
	  nonstandards[i++]=*(c-1);
	}
      }
      c++;
    }
  }
  istty = isatty(fileno(stdout))&&isatty(fileno(stdin));
  if ((fold_constrained)&&(istty)) {
    printf("Input constraints using the following notation:\n");
    printf("| : paired with another base\n");
    printf(". : no constraint at all\n");
    printf("x : base must not pair\n");
    printf("< : base i is paired with a base j<i\n");
    printf("> : base i is paired with a base j>i\n");
    printf("matching brackets ( ): base i pairs base j\n");
  }

  do {				/* main loop: continue until end of file */
    if (istty) {
      printf("\nInput string (upper or lower case); @ to quit\n");
      printf("%s%s\n", scale1, scale2);
    }
    fname[0]='\0';
    if ((line = get_line(stdin))==NULL) break;

    /* skip comment lines and get filenames */
    while ((*line=='*')||(*line=='\0')||(*line=='>')) {
      if (*line=='>')
	(void) sscanf(line, ">%12s", fname);
      printf("%s\n", line);
      free(line);
      if ((line = get_line(stdin))==NULL) break;
    }

    if ((line ==NULL) || (strcmp(line, "@") == 0)) break;

    string = (char *) space(strlen(line)+1);
    (void) sscanf(line,"%s",string);
    free(line);
    length = (int) strlen(string);

    structure = (char *) space((unsigned) length+1);
    if (fold_constrained) {
      cstruc = get_line(stdin);
      if (cstruc!=NULL)
	strncpy(structure, cstruc, length);
      else
	fprintf(stderr, "constraints missing\n");
    }
    for (l = 0; l < length; l++) {
      string[l] = toupper(string[l]);
      if (!noconv && string[l] == 'T') string[l] = 'U';
    }
    if (istty)
      printf("length = %d\n", length);

    /* initialize_fold(length); */
    if (circ)
      min_en = circfold(string, structure);
    else
      min_en = fold(string, structure);
    printf("%s\n%s", string, structure);
    if (istty)
      printf("\n minimum free energy = %6.2f kcal/mol\n", min_en);
    else
      printf(" (%6.2f)\n", min_en);

    (void) fflush(stdout);

    if (fname[0]!='\0') {
      strcpy(ffname, fname);
      strcat(ffname, "_ss.ps");
      strcpy(gfname, fname);
      strcat(gfname, "_ss.g");
    } else {
      strcpy(ffname, "rna.ps");
      strcpy(gfname, "rna.g");
    }
    if (!noPS) {
      if (length<2000)
	(void) PS_rna_plot(string, structure, ffname);
      else 
	fprintf(stderr,"INFO: structure too long, not doing xy_plot\n");
    }
    if (length>2000) free_arrays(); 
    if (pf) {
      char *pf_struc;
      pf_struc = (char *) space((unsigned) length+1);
	if (dangles==1) {
	  dangles=2;   /* recompute with dangles as in pf_fold() */
	  min_en = (circ) ? energy_of_circ_struct(string, structure) : energy_of_struct(string, structure);
	  dangles=1;
      }

      kT = (temperature+273.15)*1.98717/1000.; /* in Kcal */
      pf_scale = exp(-(sfact*min_en)/kT/length);
      if (length>2000) fprintf(stderr, "scaling factor %f\n", pf_scale);

      (circ) ? init_pf_circ_fold(length) : init_pf_fold(length);

      if (cstruc!=NULL)
	strncpy(pf_struc, cstruc, length+1);
      energy = (circ) ? pf_circ_fold(string, pf_struc) : pf_fold(string, pf_struc);

      if (do_backtrack) {
	printf("%s", pf_struc);
	if (!istty) printf(" [%6.2f]\n", energy);
	else printf("\n");
      }
      if ((istty)||(!do_backtrack))
	printf(" free energy of ensemble = %6.2f kcal/mol\n", energy);
      if (do_backtrack) {
	plist *pl1,*pl2;
	char *cent;
	double dist, cent_en;
	cent = centroid(length, &dist);
	cent_en = (circ) ? energy_of_circ_struct(string, cent) :energy_of_struct(string, cent);
	printf("%s {%6.2f d=%.2f}\n", cent, cent_en, dist);
	free(cent);
	if (fname[0]!='\0') {
	  strcpy(ffname, fname);
	  strcat(ffname, "_dp.ps");
	} else strcpy(ffname, "dot.ps");
	pl1 = make_plist(length, 1e-5);
	pl2 = b2plist(structure);
	(void) PS_dot_plot_list(string, ffname, pl1, pl2, "");
	free(pl2);
	if (do_backtrack==2) {
	  pl2 = stackProb(1e-5);
	  if (fname[0]!='\0') {
	    strcpy(ffname, fname);
	    strcat(ffname, "_dp2.ps");
	  } else strcpy(ffname, "dot2.ps");
	  PS_dot_plot_list(string, ffname, pl1, pl2,
			   "Probabilities for stacked pairs (i,j)(i+1,j-1)");
	  free(pl2);
	}
	free(pl1);
	free(pf_struc);
      }
      printf(" frequency of mfe structure in ensemble %g; ",
	     exp((energy-min_en)/kT));
      if (do_backtrack)
	printf("ensemble diversity %-6.2f", mean_bp_dist(length));

      printf("\n");
      free_pf_arrays();

    }
    if (cstruc!=NULL) free(cstruc);
    (void) fflush(stdout);
    free(string);
    free(structure);
  } while (1);
  return 0;
}
Example #13
0
/*--------------------------------------------------------------------------*/
int main(int argc, char *argv[])
{
  char *string1=NULL, *string2=NULL, *dummy=NULL, *temp=NULL, *line=NULL;
  char *structure=NULL, *cstruc=NULL, *cstruc_l=NULL, *cstruc_s=NULL;
  char fname[53], ffname[53], temp_name[201], first_name[53], my_contrib[10];
  char up_out[250], unstrs[201], name[400], cmd_line[500];
  char *ParamFile=NULL;
  char *ns_bases=NULL, *c,*head;
  int  i, length1,length2,length, l, sym, r, *u_vals, Switch, header,output;
  double energy, min_en;
  double sfact=1.07;
  int   istty;
  int noconv=0;
  /* variables for output */
  pu_contrib *unstr_out, *unstr_short;
  interact *inter_out;
  /* pu_out *longer; */
  char *title;
  /* commandline parameters */
  int w;       /* length of region of interaction */
  int incr3;   /* add x unpaired bases after 3'end of short RNA*/
  int incr5;   /* add x unpaired bases after 5'end of short RNA*/
  int unstr;   /* length of unpaired region for output*/
  int upmode ; /* 1 compute only pf_unpaired, >1 compute interactions 
		  2 compute intra-molecular structure only for long RNA, 3 both RNAs */
  int task;    /* input mode for calculation of interaction */
  /* default settings for RNAup */
  head = NULL;/* header text - if header wanted, see header */
  header = 1; /* if header is 0 print no header in output file: option -nh */
  output = 1; /* if output is 0 make no output file: option -o */
  Switch = 1; /* the longer sequence is selected as the target */
  task=0;
  upmode = 1; /* default is one sequence, option -X[p|f] has to be set
		 for the calculation of an interaction, if no "&" is in
		 the sequence string  */
  unstrs[0]='\0';
  default_u = 4;
  unstr=default_u;
  default_w = 25;
  w=default_w;
  u_vals=NULL;
  incr3=0;
  incr5=0;
  do_backtrack = 1;
  length1=length2=0;
  title=NULL;
  unstr_out=NULL;
  inter_out=NULL;
  my_contrib[0] = 'S';
  my_contrib[1] = '\0';
  first_name[0] = '\0';

  /* collect the command line  */
  sprintf(cmd_line,"RNAup ");
  length = 0;
  for (i=1; i<argc; i++) {
    r=sscanf(argv[i], "%100s", &temp_name);
    length+=r+1;
    if(length > 500) break;
    strcat(cmd_line, temp_name);
    strcat(cmd_line," ");
  }
  length = 0;
  
  for (i=1; i<argc; i++) {
    if (argv[i][0]=='-') 
      switch ( argv[i][1] )
	{
	case 'T':  if (argv[i][2]!='\0') usage();
	  if (i==argc-1) usage();
	  r=sscanf(argv[++i], "%lf", &temperature);
	  if (!r) usage();
	  break;
	case 'w':
	  /* -w maximal length of unstructured region */  
	  if (i==argc-1) usage();
	  r=sscanf(argv[++i],"%d", &w);
	  if (!r) usage();
	  break;
	case 't':
	  /* use the first sequence as the target */
	  if ( strcmp(argv[i], "-target")==0) {
	    Switch=0;
	  }
	  break;
	case 'o':
	  /* make no output file */
	  output=0;
	  break; 
	case 'n':
	  if ( strcmp(argv[i], "-nh")==0) {
	    header=0;
	  }
	  if ( strcmp(argv[i], "-noGU")==0) {
	    noGU=1;
	  }
	  if ( strcmp(argv[i], "-noCloseGU")==0) {
	    no_closingGU=1;
	  }
	  if ( strcmp(argv[i], "-noLP")==0) {
	    noLonelyPairs=1;
	  }
	  if ( strcmp(argv[i], "-nsp") ==0) {
	    if (i==argc-1) usage();
	    ns_bases = argv[++i];
	  }
	  if ( strcmp(argv[i], "-noconv")==0) {
	    noconv=1;
	  }
	  break;
	case '4':
	  tetra_loop=0;
	  break;
	case 'e':
	  if (i==argc-1) usage();
	  r=sscanf(argv[++i],"%d", &energy_set);
	  if (!r) usage();
	  break;
	case 'C':
	  fold_constrained=1;
	  break;
	case 'S':
	  if (i==argc-1) usage();
	  r=sscanf(argv[++i],"%lf", &sfact);
	  if (!r) usage();
	  break;
	case 'd': dangles=0;
	  if (argv[i][2]!='\0') {
	    r=sscanf(argv[i]+2, "%d", &dangles);
	    if (r!=1) usage();
	  }
	  break;
	case 'b': upmode=3;
	  break;
	case 'X':
	  /* interaction mode invoked */
	  if (upmode == 1) upmode=2;
	  switch (argv[i][2]) { /* now determine which sequences interact */
	  case 'p': task=1;
	    break; /* pairwise interaction */
	  case 'f': task=2;
	    break; /* first one interacts with all others */
	  }
	  break;
	case 'u':
	  /* -u length of unstructured region in pr_unpaired output */  
	  if (i==argc-1) usage();
	  r=sscanf(argv[++i],"%200s", unstrs);
	  if (!r) usage();
	  if (!isdigit(unstrs[0])) usage();
	  break;
	  /* incr5 and incr3 are only for the longer (target) sequence */
	  /* increments w (length of the unpaired region) to incr5+w+incr3*/
	  /* the longer sequence is given in 5'(= position 1) to */
	  /* 3' (=position n) direction */
	  /* incr5 adds incr5 residues to the 5' end of w */
	case '5':
	  if (i==argc-1) usage();
	  r=sscanf(argv[++i],"%d", &incr5);
	  if (!r) usage();
	  break; 
	  /* incr3 adds incr3 residues to the 3' end of w */
	case '3':
	  if (i==argc-1) usage();
	  r=sscanf(argv[++i],"%d", &incr3);
	  if (!r) usage();
	  break;
	case 'P':
	  if (i==argc-1) usage();
	  ParamFile = argv[++i];
	  break;
	case 'c':  
	  if (i==argc-1) usage();
	  r=sscanf(argv[++i], "%6s", my_contrib);
	  if (!r) usage();
	  break;  
	default: usage();
	} 
  }
  cmd_line[strlen(cmd_line)] = '\0';
  if (dangles>0) dangles=2; /* only 0 or 2 allowed */
  if (ParamFile != NULL)
    read_parameter_file(ParamFile);
   
  if (ns_bases != NULL) {
    nonstandards = space(33);
    c=ns_bases;
    i=sym=0;
    if (*c=='-') {
      sym=1; c++;
    }
    while (*c!='\0') {
      if (*c!=',') {
	nonstandards[i++]=*c++;
	nonstandards[i++]=*c;
	if ((sym)&&(*c!=*(c-1))) {
	  nonstandards[i++]=*c;
	  nonstandards[i++]=*(c-1);
	}
      }
      c++;
	    
    }
  }
  istty = isatty(fileno(stdout))&&isatty(fileno(stdin));
  if ((fold_constrained)&&(istty)) {
    printf("Input constraints using the following notation:\n");      
    printf(". : no constraint at all\n");
    printf("x : base must not pair\n");
    printf("matching brackets ( ): base i pairs base j\n");
    printf("constraints for intramolecular folding only:\n"); 
    printf("< : base i is intramolecularly paired with a base j<i\n");
    printf("> : base i is intramolecularly paired with a base j>i\n");    
    printf("constraints for cofolding (intermolecular folding) only:\n");
    printf("| : paired with another base intermolecularly\n");        
  } 
 
  RT = ((temperature+K0)*GASCONST/1000.0);	
  do {	/* main loop: continue until end of file */
    cut_point=-1;
    if (istty) {
      if (upmode == 1) {
	printf("\nInput string (upper or lower case); @ to quit\n");
	printf("%s%s\n", scale1, scale2);
      }
      else if (upmode > 1) {
	if (task == 1 || (task == 0 && upmode == 3)) {
	  printf("\nUse either '&' to connect the 2 sequences or give each sequence on an extra line.\n"); 
	  printf("%s%s\n", scale1, scale2);
	}
	else if (task == 2) { /* option -Xf read the first two seqs */
	  printf("\nGive each sequence on an extra line. The first seq. is stored, every other seq. is compared to the first one.\n"); 
	  printf("%s%s\n", scale1, scale2);
	}
	else if (task == 3) {/* option -Xf read another sequence which
				will interact with the first one */
	  printf("\nEnter another sequence.\n"); 
	  printf("%s%s\n", scale1, scale2); 
	}
      }
    }
    fname[0]='\0';
    ffname[0]='\0';
    /* read the first sequence */
    if ((line = get_line(stdin))==NULL) break;

    /* skip comment lines and get filenames */
    while ((*line=='*')||(*line=='\0')||(*line=='>')) {
      if (*line=='>')
	(void) sscanf(line, ">%51s", fname);
      free(line);
      line=NULL;
      if ((line = get_line(stdin))==NULL) break;
    } 
    if ((line == NULL) || (strcmp(line, "@") == 0)) break;

    if (first_name[0] == '\0' && fname[0] !='\0' && task == 2) {
      strncpy(first_name,fname,30);
      first_name[30] = '\0';
    }
    /* if upmode == 2: check if the sequences are seperated via "&" (cut_point > -1) or given on extra lines */
    if (task < 3) {
      tokenize(line,&string1,&string2);
      if (task == 2 && cut_point != -1) task = 3;
      /* two sequences with & are given: calculate interaction */
      if (task == 0 && cut_point != -1) {
	task = 1;
	if (upmode == 1) upmode = 2;
      }
    }
    else if (task == 3) { /* option -Xf*/
      strncpy(ffname,fname,30);
      ffname[30] = '\0';
      strncpy(fname,first_name,30);  /* first_name: name of first seq */
      fname[30] = '\0';
      if (temp != NULL) { /*strings have been switched - write temp to string1*/
	string1 = (char *) xrealloc (string1,sizeof(char)*strlen(temp)+1);
	(void) sscanf(temp,"%s",string1);
	free(temp);temp=NULL;
	
      }	
      tokenize(line,&string2,&dummy); /*compare every seq to first one given */
      free(dummy);dummy=NULL;
      if (cut_point != -1) {
	nrerror(
	   "After the first sequence pair: Input a single sequence (no &)!\n"
	   "Each input seq. is compared to the first seq. given.\n");
      }
    }
    /* interaction mode -> get the second seq. if seq are on seperate lines*/
    if (upmode > 1){ /* interaction mode */
      if (cut_point == -1 && task < 3) { /* seqs are given on seperate lines */
	/* read the second sequence */
	if (task == 2) task = 3;
	if ((line = get_line(stdin))==NULL) {
	  nrerror("only one sequence - can not cofold one sequence!");
	}
	/* skip comment lines and get filenames */
	while ((*line=='*')||(*line=='\0')||(*line=='>')) {
	  if (*line=='>')
	    (void) sscanf(line, ">%51s", ffname); /* name of the 2nd seq */
	  free(line);
	  line=NULL;
	  if ((line = get_line(stdin))==NULL) break;
	} 
	if ((line ==NULL) || (strcmp(line, "@") == 0)) break;
	free(string2); /* string2 has been allocated in tokenize() */
    
	string2 = (char *) space(strlen(line)+1);
	(void) sscanf(line,"%s",string2); free(line);line=NULL;
      }
    } else { /* default mode pr_unpaired for ONE seq */
      /* if a second sequence is give, cofold the sequences*/
      if (cut_point != -1){
	upmode = 2;	
      }
    }

    if (string1 != NULL){length1 = (int) strlen(string1);}
    else {nrerror("sequence is NULL, check your input.");}
    if (upmode > 1) {
      if (string2 != NULL) {length2 = (int) strlen(string2);}
      else{nrerror("one of the sequences is NULL, check your input.");}

      /* write longer seq in string1 and and shorter one in string2 */ 
      if (length1 < length2 && Switch) {
	strncpy(temp_name,fname,30);
	strncpy(fname,ffname,30);
	strncpy(ffname,temp_name,30);
	  
	length=length1; length1=length2; length2=length;
	
	temp=(char *) space(sizeof(char)*strlen(string1)+1);
	(void) sscanf(string1,"%s",temp);
	string1 = (char *) xrealloc (string1,sizeof(char)*length1+1);
	(void) sscanf(string2,"%s",string1);
	string2 = (char *) xrealloc(string2,sizeof(char)*length2+1);
	(void) sscanf(temp,"%s",string2);
	if (task == 1) {
	  free(temp);
	  temp = NULL;
	}
      } 
    }
    /* parse cml parameters for output filename*/    
    /* create the name of the output file */
    if (fname[0]!='\0') {
      printf(">%s\n",fname);
      if(strlen(fname) < 30) {
	strcpy(up_out,fname);
      } else {
	strncpy(up_out,fname,30);
	up_out[30] = '\0';
      }
      
      if (upmode > 1 && ffname[0] != '\0') {
	 printf(">%s\n",ffname);
	if(strlen(fname) < 15) {
	  strcpy(up_out,fname);
	} else {
	  strncpy(up_out,fname,15);
	  up_out[15] = '\0';
	}
	strcat(up_out, "_");
	if(strlen(ffname) < 15) {
	  strcat(up_out,ffname);
	} else {
	  strncat(up_out,ffname,15);
	}
      }	
    } else {
      strcpy(up_out, "RNA");
    }
    if (upmode >1) {
      sprintf(temp_name,"_w%d",w);
      strncat(up_out, temp_name,10);
    }    
    /* do this only when -X[p|f] is used or if two sequences seperated by & are given */
    if (upmode > 1) {
      if (task == 3) {
	/* strncpy(temp_name,fname,30); */
	if(strlen(fname) < 30) {
	  strcpy(temp_name,fname);
	} else {
	  strncpy(temp_name,fname,30);
	  up_out[30] = '\0';
	}
      }
    }
    
    /* get values for -u */
    if ( ! get_u_values(unstrs,&u_vals,length1)) {
      nrerror("option -u: length value exceeds sequence length\n");
    }
      
    
    for (l = 0; l < length1; l++) {
      string1[l] = toupper(string1[l]);
      if (!noconv && string1[l] == 'T') string1[l] = 'U';
    }
    for (l = 0; l < length2; l++) {
      string2[l] = toupper(string2[l]);
      if (!noconv && string2[l] == 'T') string2[l] = 'U';
    }
    
    if (fold_constrained) {
      char *temp_cstruc=NULL;
      int old_cut;
      temp_cstruc = get_line(stdin);
      old_cut = cut_point;
      cut_point=-1;
      /* get contrained string without & */
      cstruc = tokenize_one(temp_cstruc);
      /* free(temp_cstruc); */
      /* only one seq, cstruc should not have an & */
      if (upmode == 1 && cut_point == -1) {
	if (strlen(cstruc) == length1) {
	  cstruc_l=(char*)space(sizeof(char)*(length1+1));
	  strncpy(cstruc_l,cstruc,length1);
	}else{
	  fprintf(stderr, "%s\n%s\n",string1,cstruc);
	  nrerror("RNAup -C: constrain string and structure have unequal length");
	}
      }	else if (upmode == 1 && cut_point != -1) {
	fprintf(stderr, "%s\n%s\n",string1,cstruc);
	nrerror("RNAup -C: only one sequence but constrain structure for cofolding");
      }
      /* constrain string is for both seqs */
      else if (upmode > 1 && cut_point != -1) {
	if (old_cut != cut_point) {
	  nrerror("RNAup -C: different cut points in sequence und constrain string");
	}
	seperate_bp(&cstruc,length1,&cstruc_l,&cstruc_s);
	if (strlen(cstruc) != (length1+length2)) {
	  fprintf(stderr, "%s&%s\n%s\n",string1,string2,cstruc);
	  nrerror("RNAup -C: constrain string and structure have unequal length");
	}
	if (strlen(cstruc_l) != (length1)) {
	  fprintf(stderr, "%s\n%s\n",string1,cstruc_l);
	  nrerror("RNAup -C: constrain string and structure have unequal length");
	}
	if (strlen(cstruc_s) != (length2)) {
	  fprintf(stderr, "%s\n%s\n",string2,cstruc_s);
	  nrerror("RNAup -C: constrain string and structure have unequal length");
	} 
      } else {
	fprintf(stderr, "%s&%s\n%s\n",string1,string2,cstruc);
	nrerror("RNAup -C: no cutpoint in constrain string");
      }      
    }
    if(length1 > length2) {
      structure = (char *) space(sizeof(char)*(length1+1));
    } else {
      structure = (char *) space(sizeof(char)*(length2+1));
    }
    update_fold_params();
    if (cstruc_s != NULL)
      strncpy(structure, cstruc_s, length2+1);
    min_en = fold(string1, structure);    
    (void) fflush(stdout);

    if (upmode != 0){
      int wplus,w_sh;
      if (upmode == 3) { /* calculate prob. unstruct. for shorter seq */  
	w_sh = w;
	/* len of unstructured region has to be <= len shorter seq. */
	if (w > length2) w_sh = length2;
	if (cstruc_s != NULL)
	  strncpy(structure, cstruc_s, length2+1);
	min_en = fold(string2, structure);	  
	pf_scale = exp(-(sfact*min_en)/RT/length2);
	if (length2>2000) fprintf(stderr, "scaling factor %f\n", pf_scale);
	init_pf_fold(length2);
	if (cstruc_s != NULL)
	  strncpy(structure, cstruc_s, length2+1);
	energy = pf_fold(string2, structure);
	unstr_short = pf_unstru(string2, w_sh);
	free_pf_arrays(); /* for arrays for pf_fold(...) */
      }
      
      /* calculate prob. unstructured for longer seq */
      wplus=w+incr3+incr5;
      /* calculate prob. unpaired for the maximal length of -u */
      if (u_vals[u_vals[0]] > wplus) wplus=u_vals[u_vals[0]];
      /* length of the unstructured region has to be <= len longer seq. */
      if (wplus > length1) wplus=length1;
      if (cstruc_l !=NULL)
	strncpy(structure, cstruc_l, length1+1);
      min_en = fold(string1, structure);
      pf_scale = exp(-(sfact*min_en)/RT/length1);
      if (length1>2000) fprintf(stderr, "scaling factor %f\n", pf_scale);
      init_pf_fold(length1);
      if (cstruc_l !=NULL)
	strncpy(structure, cstruc_l, length1+1);
      energy = pf_fold(string1, structure);
      if (upmode > 1) {
	unstr_out = pf_unstru(string1, wplus);
      } else {
	unstr_out = pf_unstru(string1, u_vals[u_vals[0]]);
      }
      free_pf_arrays(); /* for arrays for pf_fold(...) */
      /* now make output to stdout and to the output file */
      if (upmode > 1){/* calculate interaction between two sequences */
	int count;
	if (upmode == 2) {
	  inter_out = pf_interact(string1,string2,unstr_out,NULL,w,cstruc,incr3,incr5);
	  print_interaction(inter_out,string1,string2,unstr_out,NULL,w,incr3,incr5);
	} else if (upmode == 3){
	  inter_out = pf_interact(string1,string2,unstr_out,unstr_short,w,cstruc,incr3,incr5);
	  print_interaction(inter_out,string1,string2,unstr_out,unstr_short,w,incr3,incr5);
	}
	if(output) { /* make RNAup output to file */
	  printf("RNAup output in file: ");
	  /* plot for all -u values */
	  strcpy(name,up_out);
	  strcat(name, "_u");
	  if(u_vals[0] <= 20) {
	    for (count = 1; count <= u_vals[0]; count++) {
	      unstr = u_vals[count];
	      sprintf(temp_name,"%d",unstr);
	      if (count < u_vals[0]) {
		strcat(temp_name,"_");
		strncat(name, temp_name,5);
	      } else {
		strncat(name, temp_name,5);
		strcat(name, "_up.out");
		printf("%s\n",name);
	      }
	    }
	  } else {
	    sprintf(temp_name,"%d",u_vals[1]);
	    strcat(temp_name,"_to_");
	    strncat(name, temp_name,5);
	    sprintf(temp_name,"%d",u_vals[0]);
	    strncat(name, temp_name,5);
	    strcat(name, ".out");
	    printf("%s\n",name);
	  }
	  
	  if(header) {
	    char startl[3];
	    sprintf(startl,"# ");
	  
	    head = (char*)space(sizeof(char)*(length1+length2+1000));
	    /* mach kein \n als ende von head */
	    sprintf(head,"%s %s\n%s %d %s\n%s %s\n%s %d %s\n%s %s",startl, cmd_line, startl,length1,fname, startl,string1, startl,length2,ffname, startl,string2);
	  
	  } else {
	    if(head != NULL) { nrerror("error with header\n"); }
	  }
	  Up_plot(unstr_out,NULL,inter_out,name,u_vals,my_contrib,head);
	
	  if(head != NULL) {
	    free(head);
	    head = NULL;
	  }
	
	  if (upmode == 3 ) {/* plot opening energy for boths RNAs */
	    if(head != NULL) { nrerror("error with header\n"); }
	    Up_plot(NULL,unstr_short,NULL,name,u_vals,my_contrib,head);
	  }
	}
      } else { /* one sequence:  plot only results for prob unstructured */
	int count;
	char collect_out[1000];
	collect_out[0]='\0';
	
	for (count = 1; count <= u_vals[0]; count++) {
	  unstr = u_vals[count];
	  print_unstru(unstr_out,unstr);
	}
	if(output) {/* make RNAup output to file */
	  printf("RNAup output in file: ");
	  strcpy(name,up_out);
	  strcat(name, "_u");
	  if(u_vals[0] <= 20) {
	    for (count = 1; count <= u_vals[0]; count++) {
	      unstr = u_vals[count];
	      sprintf(temp_name,"%d",unstr);
	      if (count < u_vals[0]) {
		strcat(temp_name,"_");
		strncat(name, temp_name,5);
	      } else {
		strncat(name, temp_name,5);
		strcat(name, ".out");
		printf("%s\n",name);
	      }
	    }
	  } else {
	    sprintf(temp_name,"%d",u_vals[1]);
	    strcat(temp_name,"_to_");
	    strncat(name, temp_name,5);
	    sprintf(temp_name,"%d",u_vals[0]);
	    strncat(name, temp_name,5);
	    strcat(name, ".out");
	    printf("%s\n",name);
	  }
	  
	  if(header) {
	    char startl[3];
	    sprintf(startl,"# ");
	    head = (char*)space(sizeof(char)*(length1+length2+1000));
	    /* mach kein \n als ende von head */
	    sprintf(head,"%s %s\n%s %d %s\n%s %s",startl, cmd_line, startl,length1,fname, startl,string1);
	  } else { if(head != NULL) { nrerror("error with header\n"); }}
	
	  Up_plot(unstr_out,NULL,NULL,name,u_vals,my_contrib,head);
	
	  if(head != NULL) { free(head); head = NULL;}
	}
      }	
    } else {
      nrerror("no output format given\n");
    }
    
    
    if(structure != NULL) free(structure);
    structure = NULL;
    if (title != NULL) free(title);
    title=NULL;
    if (u_vals != NULL) free(u_vals);
    u_vals=NULL;
    if (upmode == 1) free_pu_contrib(unstr_out);
    if (upmode > 1) {
      free_pu_contrib(unstr_out);
      free_interact(inter_out);
    }
    if (upmode == 3)free_pu_contrib(unstr_short);
    free_arrays(); /* for arrays for fold(...) */   
    if (cstruc!=NULL) free(cstruc);
    cstruc=NULL;
    if (cstruc_l!=NULL) free(cstruc_l);
    cstruc_l=NULL;
    if (cstruc_s!=NULL) free(cstruc_s);
    cstruc_s=NULL;
    (void) fflush(stdout);
    if (string1!=NULL && task != 3) {
      free(string1);
      string1 = NULL;
    }
    if (string2!=NULL) free(string2);
    string2 = NULL;
    
  } while (1);
  if (line != NULL) free(line);
  if (string1!=NULL) free(string1);
  if (string2!=NULL) free(string2);
  if (cstruc!=NULL) free(cstruc);
  if (cstruc_l!=NULL) free(cstruc_l);
  if (cstruc_s!=NULL) free(cstruc_s);  
  
  return 0;
}
Example #14
0
int main(int argc, char *argv[])
{
    char *start;
    char *structure;
    char *rstart;
    char *str2;
    char *line;
    int i;
    int length;
    int l;
    int hd;
    double energy = 0.;
    double kT;
    int   pf = 0;
    int   mfe = 0;
    int   istty;
    int   repeat; 
    int   found;
    
    AjPFile inf     = NULL;
    AjPSeq  seq = NULL;
    AjPFile paramfile = NULL;
    AjPFile outf = NULL;
    
    float eT = 0.;
    AjBool eGU;
    
    AjBool eclose;
    AjBool lonely;
    AjBool etloop;
    AjPStr eenergy = NULL;
    char ewt = '\0';
    AjPStr edangles = NULL;
    AjPStr method   = NULL;
    AjPStr ealpha    = NULL;
    AjBool showfails = ajFalse;
    AjBool succeed = ajFalse;
    
    char edangle = '\0';
    
    ajint len;
    FILE *fp;
    
    
    
    embInitPV("vrnainverse",argc,argv,"VIENNA",VERSION);
    
    
    inf        = ajAcdGetInfile("structuresfile");
    seq        = ajAcdGetSeq("sequence");
    paramfile  = ajAcdGetInfile("paramfile");
    eT         = ajAcdGetFloat("temperature");
    eGU        = ajAcdGetBoolean("gu");
    eclose     = ajAcdGetBoolean("closegu");
    lonely     = ajAcdGetBoolean("lp");
    etloop     = ajAcdGetBoolean("tetraloop");
    eenergy    = ajAcdGetListSingle("energy");
    edangles   = ajAcdGetListSingle("dangles");
    method     = ajAcdGetListSingle("folding");
    ealpha     = ajAcdGetString("alphabet");
    final_cost = ajAcdGetFloat("final");
    repeat     = ajAcdGetInt("repeats");
    showfails  = ajAcdGetBoolean("showfails");
    succeed    = ajAcdGetBoolean("succeed");
    outf       = ajAcdGetOutfile("outfile");
    
    
    do_backtrack = 0; 
    structure = NULL;
    istty = 0;
    
    temperature   = (double) eT;
    noGU          = (eGU) ? 0 : 1;
    no_closingGU  = (eclose) ? 0 : 1;
    noLonelyPairs = (lonely) ? 0 : 1;
    tetra_loop    = !!etloop;
    
    ewt = *ajStrGetPtr(eenergy);
    if(ewt == '0')
	energy_set = 0;
    else if(ewt == '1')
	energy_set = 1;
    else if(ewt == '2')
	energy_set = 2;
    
    edangle = *ajStrGetPtr(edangles);
    if(edangle == '0')
	dangles = 0;
    else if(edangle == '1')
	dangles = 1;
    else if(edangle == '2')
	dangles = 2;
    else if(edangle == '3')
	dangles = 3;
    
    if(ajStrMatchC(method,"mp"))
    {
	mfe = 1;
	pf  = 1;
    }
    else if(ajStrMatchC(method,"m"))
    {
	mfe = 1;
	pf  = 0;
    }
    else if(ajStrMatchC(method,"p"))
    {
	mfe = 0;
	pf  = 1;
    }
    
    len = ajStrGetLen(ealpha);
    symbolset = (char *) space(len + 1);
    strcpy(symbolset, ajStrGetPtr(ealpha));
    for (l = 0; l < len; l++)
	symbolset[l] = toupper(symbolset[l]);
    
    inv_verbose = !!showfails;
    fp = ajFileGetFileptr(inf);
    
    init_rand();
    kT = (temperature+273.15)*1.98717/1000.0;
    
    istty = (isatty(fileno(stdout))&&isatty(fileno(stdin)));
    
    if (paramfile)
	read_parameter_file(paramfile);
    
    give_up = succeed;
    
    do {
      
	if ((line = get_line(fp))==NULL) break;

	/* read structure, skipping over comment lines */
	while ((*line=='*')||(*line=='\0')||(*line=='>'))
	{
	    free(line);
	    if ((line = get_line(fp))==NULL)
		break;
	} 
	/* stop at eof or '@' */
	if (line==NULL) break;
	if (strcmp(line, "@") == 0)
	{
	    free(line);
	    break;
	}

	structure = (char *) space(strlen(line)+1);
	/* scanf gets rid of trailing junk */
	(void) sscanf(line,"%s",structure);
	free(line);
      
	length = (int) strlen(structure);
	str2 = (char *) space((unsigned)length+1);

/* now look for a sequence to match the structure */

/*
	if ((line = get_line(fp))!=NULL)
	    if (strcmp(line, "@") == 0)
	    {
		free(line);
		break;
	    }
*/

	start = (char *) space((unsigned) length+1);
	if(seq)
	    (void) strncpy(start, ajSeqGetSeqC(seq), length);

	if (repeat!=0)
	    found = repeat;
	else
	    found = 1;
      
	initialize_fold(length);

	rstart = (char *) space((unsigned)length+1);
	while(found>0)
	{
	    char *string;
	    string = (char *) space((unsigned)length+1);
	    strcpy(string, start);
	    for (i=0; i<length; i++)
	    {
		/* lower case characters are kept fixed, any other character
		   not in symbolset is replaced by a random character */
		if (islower(string[i]))
		    continue;

		if (string[i]=='\0' || (strchr(symbolset,string[i])==NULL))
		    string[i]=symbolset[int_urn(0,strlen(symbolset)-1)];
	    }
	    strcpy(rstart, string);	/* remember start string */
	
	    if (mfe)
	    {
		energy = inverse_fold(string, structure);
		if( (!succeed) || (energy<=0.0) ) {
		    found--;
		    hd = hamming(rstart, string);
		    ajFmtPrintF(outf,"%s  %3d", string, hd);
		    if (energy>0)
		    {			/* no solution found */
			ajFmtPrintF(outf,"   d = %f\n", energy);
		    }
		    else
			ajFmtPrintF(outf,"\n");
		}
	    }

	    if (pf)
	    {
		if (!(mfe && give_up && (energy>0)))
		{
		    /* unless we gave up in the mfe part */
		    double prob, min_en, sfact=1.07;
	    
		    /* get a reasonable pf_scale */
		    min_en = fold(string,str2); 
		    pf_scale = exp(-(sfact*min_en)/kT/length);
		    init_pf_fold(length);
	    
		    energy = inverse_pf_fold(string, structure);
		    prob = exp(-energy/kT);
		    hd = hamming(rstart, string);
		    ajFmtPrintF(outf,"%s  %3d  (%f)\n", string, hd, prob);
		    free_pf_arrays();
		}
		if (!mfe)
		    found--;
	    }

	    free(string);
	}
	free(rstart);
	free_arrays();
      
	free(structure);
	free(str2);
	free(start);

    } while (1);

    ajSeqDel(&seq);
    ajStrDel(&eenergy);
    ajStrDel(&edangles);
    ajStrDel(&method);
    ajStrDel(&ealpha);

    ajFileClose(&inf);
    ajFileClose(&paramfile);
    ajFileClose(&outf);
    AJFREE(symbolset);

    embExit();
    return 0;
}
Example #15
0
/*--------------------------------------------------------------------------*/
int main(int argc, char *argv[])
{
  char *string1=NULL, *string2=NULL, *temp, *line;
  char *structure=NULL, *cstruc=NULL;
  char  fname[53], my_contrib[10], *up_out;
  char  *ParamFile=NULL;
  char  *ns_bases=NULL, *c;
  int   i, length1,length2,length, l, sym, r;
  double energy, min_en;
  double kT, sfact=1.07;
  int   pf, istty;
  int noconv=0;
  double Zu, Zup;
  /* variables for output */
  pu_contrib *unstr_out, *unstr_short;
  FLT_OR_DBL **inter_out;
  char *title;
  /* commandline parameters */
  int w;       /* length of region of interaction */
  int incr3;   /* add x unpaired bases after 3'end of short RNA*/
  int incr5;   /* add x unpaired bases after 5'end of short RNA*/
  int  unstr;  /* length of unpaired region for output*/
  int  upmode; /* output mode for pf_unpaired and pf_up()*/
  upmode = 0;
  unstr = 4; 
  incr3=0;
  incr5=0;
  w=25;
  do_backtrack = 1;
  pf=1; /* partition function has to be calculated */
  length1=length2=0;
  up_out=NULL;
  title=NULL;
  unstr_out=NULL;
  inter_out=NULL;
  my_contrib[0] = 'S';
  my_contrib[1] = '\0';
  
  for (i=1; i<argc; i++) {
    if (argv[i][0]=='-') 
      switch ( argv[i][1] )
	{
	case 'T':  if (argv[i][2]!='\0') usage();
	  if(i==argc-1) usage();
	  r=sscanf(argv[++i], "%lf", &temperature);
	  if (!r) usage();
	  break;
	case 'w':
	  /* -w maximal length of unstructured region */  
	  if(i==argc-1) usage();
	  r=sscanf(argv[++i],"%d", &w);
	  if (!r) usage();
	  break;
	case 'n':
	  if ( strcmp(argv[i], "-noGU")==0) noGU=1;
	  if ( strcmp(argv[i], "-noCloseGU")==0) no_closingGU=1;
	  if ( strcmp(argv[i], "-noLP")==0) noLonelyPairs=1;
	  if ( strcmp(argv[i], "-nsp") ==0) {
	    if (i==argc-1) usage();
	    ns_bases = argv[++i];
	  }
	  if ( strcmp(argv[i], "-noconv")==0) noconv=1;
	  break;
	case '4':
	  tetra_loop=0;
	  break;
	case 'e':
	  if(i==argc-1) usage();
	  r=sscanf(argv[++i],"%d", &energy_set);
	  if (!r) usage();
	  break;
	case 'C':
	  fold_constrained=1;
	  break;
	case 'S':
	  if(i==argc-1) usage();
	  r=sscanf(argv[++i],"%lf", &sfact);
	  if (!r) usage();
	  break;
	case 'd': dangles=0;
	  if (argv[i][2]!='\0') {
	    r=sscanf(argv[i]+2, "%d", &dangles);
	    if (r!=1) usage();
	  }
	  break;
	case 'o': upmode=1;
	  /* output mode 0: non, 1:only pr_unpaired, 2: pr_unpaired + pr_up */
	  if (argv[i][2]!='\0') {
	    r=sscanf(argv[i]+2, "%d", &upmode);
	    if (r!=1) usage();
	  }
	  break;
	case 'u':
	  /* -u length of unstructured region in pr_unpaired output
	     makes only sense in combination with -o1 or -o2 */  
	  if(i==argc-1) usage();
	  r=sscanf(argv[++i],"%d", &unstr);
	  if (!r) usage();
	  break;
	  /* incr5 and incr3 are only for the longer (target) sequence */
	  /* increments w (length of the unpaired region) to incr5+w+incr3*/
	  /* the longer sequence is given in 5'(= position 1) to */
	  /* 3' (=position n) direction */
	  /* incr5 adds incr5 residues to the 5' end of w */
	case '5':
	  if(i==argc-1) usage();
	  r=sscanf(argv[++i],"%d", &incr5);
	  if (!r) usage();
	  break; 
	  /* incr3 adds incr3 residues to the 3' end of w */
	case '3':
	  if(i==argc-1) usage();
	  r=sscanf(argv[++i],"%d", &incr3);
	  if (!r) usage();
	  break;
	case 'P':
	  if (i==argc-1) usage();
	  ParamFile = argv[++i];
	  break;
	case 'x':  
	  if(i==argc-1) usage();
	  r=sscanf(argv[++i], "%s", my_contrib);
	  if (!r) usage();
	  break;  
	default: usage();
	} 
  }
  
  if (ParamFile != NULL)
    read_parameter_file(ParamFile);
   
  if (ns_bases != NULL) {
    nonstandards = space(33);
    c=ns_bases;
    i=sym=0;
    if (*c=='-') {
      sym=1; c++;
    }
    while (*c!='\0') {
      if (*c!=',') {
	nonstandards[i++]=*c++;
	nonstandards[i++]=*c;
	if ((sym)&&(*c!=*(c-1))) {
	  nonstandards[i++]=*c;
	  nonstandards[i++]=*(c-1);
	}
      }
      c++;
    }
  }
  istty = isatty(fileno(stdout))&&isatty(fileno(stdin));
  if ((fold_constrained)&&(istty)) {
    printf("Input constraints using the following notation:\n");
    printf("| : paired with another base\n");
    printf(". : no constraint at all\n");
    printf("x : base must not pair\n");
    printf("< : base i is paired with a base j<i\n");
    printf("> : base i is paired with a base j>i\n");
    printf("matching brackets ( ): base i pairs base j\n");
  } 
	
  do {				/* main loop: continue until end of file */
    cut_point=-1;
    if (istty) {
      printf("\nInput string (upper or lower case); @ to quit\n");
      printf("Use '&' to connect 2 sequences that shall form a complex.\n");
      printf("%s%s\n", scale1, scale2);
    }
    fname[0]='\0';
   
    if ((line = get_line(stdin))==NULL) break;

    /* skip comment lines and get filenames */
    while ((*line=='*')||(*line=='\0')||(*line=='>')) {
      if (*line=='>')
	
	(void) sscanf(line, ">%51s", fname);
      free(line);
      if ((line = get_line(stdin))==NULL) break;
    } 
    if ((line == NULL) || (strcmp(line, "@") == 0)) break;

    tokenize(line,&string1,&string2);
    
    if(upmode != 0){
      if(cut_point == -1 && upmode == 2) {
	  nrerror("only one sequence - can not cofold one sequence!");
      }
    } else {
      if(cut_point == -1){
	upmode=1;
      } else {
	upmode=2;
      }
    }
    
    if(string1 != NULL)
      length1 = (int) strlen(string1);
    if(string2 != NULL) 
      length2 = (int) strlen(string2);
    else
      length2=0;    

    /* write longer seq in string1 and and shorter one in string2 */ 
    if(length1 < length2)
      {
	length=length1; length1=length2; length2=length;
	
	temp=(char *) space(strlen(string1)+1);
	(void) sscanf(string1,"%s",temp);
	string1 = (char *) xrealloc (string1,sizeof(char)*length1+1);
	(void) sscanf(string2,"%s",string1);
	string2 = (char *) xrealloc(string2,sizeof(char)*length2+1);
	(void) sscanf(temp,"%s",string2);
	free(temp);
      }
   
    structure = (char *) space((unsigned) length1+1);
    if (fold_constrained) {
      cstruc = get_line(stdin);
      if (cstruc!=NULL) 
	strncpy(structure, cstruc, length1);
      else
	fprintf(stderr, "constraints missing\n");
    }
    for (l = 0; l < length1; l++) {
      string1[l] = toupper(string1[l]);
      if (!noconv && string1[l] == 'T') string1[l] = 'U';
    }
    for (l = 0; l < length2; l++) {
      string2[l] = toupper(string2[l]);
      if (!noconv && string2[l] == 'T') string2[l] = 'U';
    }

    if (istty)
      printf("length1 = %d\n", length1);
    
    /* initialize_fold(length); */
    update_fold_params();
    printf("\n%s", string1);
    min_en = fold(string1, structure);
    
    if (istty)
      {
	printf("\n minimum free energy = %6.2f kcal/mol\n", min_en);
      }
    else
      printf(" (%6.2f)\n", min_en);
    
    (void) fflush(stdout);
    
    /* parse cml parameters for the filename*/
    if(upmode > 0) {
      char wuadd[10];
      up_out = (char*) space(sizeof(char)*53);
      /* create the name of the output file */
      if(fname[0]!='\0' && up_out[0] =='\0' ){
	if(strlen(fname)< 30){
	  strcpy(up_out, fname);
	} else {  
	  strncpy(up_out, fname,30);
	}
      }
      else if(fname[0]=='\0' && up_out[0] == '\0'){
	char defaultn[10] = "RNA";
	sprintf(up_out,"%s",defaultn);
      }
	
      sprintf(wuadd,"%d",w);
      strcat(up_out, "_w");
      strcat(up_out, wuadd);
      strcat(up_out, "u");
      sprintf(wuadd,"%d",unstr);
      strcat(up_out, wuadd);
      strcat(up_out, "_up.out");
      printf("RNAup output in file: %s\n",up_out);
	    
      /* create the title for the output file */      
      if (title == NULL) {
	char wuadd[10];
	title = (char*) space(sizeof(char)*60);
	if(fname[0]!='\0'){
	  if(strlen(fname)< 30){
	    strcpy(title, fname);
	  } else {  
	    strncpy(title, fname,30);
	  }
	}
	else if (fname[0]=='\0'){
	  char defaultn[10]= "RNAup";
	  sprintf(title,"%s",defaultn);
	}
	sprintf(wuadd,"%d",unstr);
	strcat(title," u=");
	strcat(title, wuadd);
	sprintf(wuadd,"%d",w);
	strcat(title," w=");
	strcat(title, wuadd);
	sprintf(wuadd,"%d",length1);
	strcat(title," n=");
	strcat(title, wuadd);
      }
    } else {
      nrerror("no output format given: use [-o[1|2]] to select output format");
    }
    
    
    if (pf) {
      
      if (dangles==1) {
	dangles=2;   /* recompute with dangles as in pf_fold() */
	min_en = energy_of_struct(string1, structure);
	dangles=1;
      }
	 
      kT = (temperature+273.15)*1.98717/1000.; /* in Kcal */
      
      if(upmode != 0){
	int wplus;
	wplus=w+incr3+incr5;
	/* calculate prob. unstructured for the shorter seq */
	if(upmode == 3) {
	  min_en = fold(string2, structure);
	  pf_scale = exp(-(sfact*min_en)/kT/length2);
	  if (length2>2000) fprintf(stderr, "scaling factor %f\n", pf_scale);
	  init_pf_fold(length2);
	  if (cstruc!=NULL)
	    strncpy(structure, cstruc, length2+1);
	  energy = pf_fold(string2, structure);
	  if(wplus > length2){ wplus = length2;} /* for the shorter seq */
	  unstr_short = pf_unstru(string2, structure, wplus);
	  free_pf_unstru();
	  free_pf_arrays(); /* for arrays for pf_fold(...) */
	}

	/* calculate prob. unstructured for the longer seq */
	wplus=w+incr3+incr5; 
	min_en = fold(string1, structure);
	pf_scale = exp(-(sfact*min_en)/kT/length1);
	if (length1>2000) fprintf(stderr, "scaling factor %f\n", pf_scale);
	init_pf_fold(length1);
	if (cstruc!=NULL)
	  strncpy(structure, cstruc, length1+1);
	energy = pf_fold(string1, structure);
	unstr_out = pf_unstru(string1, structure, wplus);
	free_pf_unstru();
	free_pf_arrays(); /* for arrays for pf_fold(...) */
	/* calculate the interaction between the two sequences */
	if(upmode > 1 && cut_point > -1){
	  inter_out = pf_interact(string1,string2,unstr_out,w, incr3, incr5);
	  if(Up_plot(unstr_out,inter_out,length1,up_out,unstr,my_contrib)==0){
	    nrerror("Up_plot: no output values assigned");
	  }
	} else if(cut_point == -1 && upmode > 1) { /* no second seq given */
	  nrerror("only one sequence given - cannot cofold one sequence!");
	} else { /* plot only the results for prob unstructured */
	  if(Up_plot(unstr_out,NULL,length1,up_out,unstr,my_contrib)==0){
	    nrerror("Up_plot: no output values assigned");
	  }
	}	
      } else {
	nrerror("no output format given: use [-o[1|2]] to select output format");
      }
       
      if (do_backtrack) {
	printf("%s", structure);
	if (!istty) printf(" [%6.2f]\n", energy);
	else printf("\n");
      }
      if ((istty)||(!do_backtrack)) 
	printf(" free energy of ensemble = %6.2f kcal/mol\n", energy);
      energy = pf_fold(string1, structure);
      printf(" frequency of mfe structure in ensemble %g; "
	     "ensemble diversity %-6.2f\n", exp((energy-min_en)/kT),
	     mean_bp_dist(length1));
      free_pf_arrays();
    }
    if (cstruc!=NULL) free(cstruc);
    (void) fflush(stdout);
    if (string1!=NULL) free(string1);
    if (string2!=NULL) free(string2);
    free(structure);
    if(up_out != NULL) free(up_out);
    up_out=NULL;
    if(title != NULL) free(title);
    title=NULL;
    if(upmode == 1) free_pf_two(unstr_out,NULL);
    if(upmode > 1) free_pf_two(unstr_out,inter_out);
    if(upmode == 3)free_pf_two(unstr_short,NULL);
    free_arrays(); /* for arrays for fold(...) */
    
  } while (1);
  return 0;
}