Exemplo n.º 1
0
int main(int argc, char *argv[]){
  struct        RNA2Dfold_args_info args_info;
  unsigned int  input_type;
  char *string, *input_string, *orig_sequence;
  char *mfe_structure=NULL, *structure1=NULL, *structure2=NULL, *reference_struc1=NULL, *reference_struc2=NULL;
  char  *ParamFile=NULL;
  int   i, j, length, l;
  double min_en;
  double kT, sfact=1.07;
  int   pf=0,istty;
  int noconv=0;
  int circ=0;
  int maxDistance1 = -1;
  int maxDistance2 = -1;
  int do_backtrack = 1;
  int stBT = 0;
  int nstBT = 0;
  string=NULL;
  dangles = 2;
  struct nbhoods *neighborhoods = NULL;
  struct nbhoods *neighborhoods_cur = NULL;

  string = input_string = orig_sequence = NULL;
  /*
  #############################################
  # check the command line prameters
  #############################################
  */
  if(RNA2Dfold_cmdline_parser (argc, argv, &args_info) != 0) exit(1);

  /* temperature */
  if(args_info.temp_given)
    temperature = args_info.temp_arg;

  /* max distance to 1st reference structure */
  if(args_info.maxDist1_given)
    maxDistance1 = args_info.maxDist1_arg;

  /* max distance to 2nd reference structure */
  if(args_info.maxDist2_given)
    maxDistance2 = args_info.maxDist2_arg;

  /* compute partition function and boltzmann probabilities */
  if(args_info.partfunc_given)
    pf = 1;

  /* do stachastic backtracking */
  if(args_info.stochBT_given){
    pf = 1;
    stBT = 1;
    nstBT = args_info.stochBT_arg;
  }

  if(args_info.noTetra_given)
    tetra_loop=0;

  /* assume RNA sequence to be circular */
  if(args_info.circ_given)
    circ=1;

  /* dangle options */
  if(args_info.dangles_given)
    dangles=args_info.dangles_arg;

  /* set number of threads for parallel computation */
  if(args_info.numThreads_given)
#ifdef _OPENMP
  omp_set_num_threads(args_info.numThreads_arg);
#else
  nrerror("\'j\' option is available only if compiled with OpenMP support!");
#endif

  /* get energy parameter file name */
  if(args_info.parameterFile_given)
    ParamFile = strdup(args_info.parameterFile_arg);

  /* do not allow GU pairs ? */
  if(args_info.noGU_given)
    noGU = 1;

  /* do not allow GU pairs at the end of helices? */
  if(args_info.noClosingGU_given)
    no_closingGU = 1;

  /* pf scaling factor */
  if(args_info.pfScale_given)
    sfact = args_info.pfScale_arg;

  /* do not backtrack structures ? */
  if(args_info.noBT_given)
    do_backtrack = 0;

  for (i = 0; i < args_info.neighborhood_given; i++){
    int kappa, lambda;
    kappa = lambda = 0;
    if(sscanf(args_info.neighborhood_arg[i], "%d:%d", &kappa, &lambda) == 2);
    if ((kappa>-2) && (lambda>-2)){
      if(neighborhoods_cur != NULL){
        neighborhoods_cur->next = (nbhoods *)space(sizeof(nbhoods));
        neighborhoods_cur = neighborhoods_cur->next;
      }
      else{
        neighborhoods = (nbhoods *)space(sizeof(nbhoods));
        neighborhoods_cur = neighborhoods;
      }
      neighborhoods_cur->k = kappa;
      neighborhoods_cur->l = lambda;
      neighborhoods_cur->next = NULL;
    }
  }
  /* free allocated memory of command line data structure */
  RNA2Dfold_cmdline_parser_free (&args_info);

  /*
  #############################################
  # begin actual program code
  #############################################
  */
  if (ParamFile != NULL)
    read_parameter_file(ParamFile);

  istty = isatty(fileno(stdout))&&isatty(fileno(stdin));

 /*
  #############################################
  # main loop, continue until end of file
  #############################################
  */
  do {
    if (istty)
      print_tty_input_seq_str("Input strings\n1st line: sequence (upper or lower case)\n2nd + 3rd line: reference structures (dot bracket notation)\n@ to quit\n");

    while((input_type = get_input_line(&input_string, 0)) & VRNA_INPUT_FASTA_HEADER){
      printf(">%s\n", input_string); /* print fasta header if available */
      free(input_string);
    }

    /* break on any error, EOF or quit request */
    if(input_type & (VRNA_INPUT_QUIT | VRNA_INPUT_ERROR)){ break;}
    /* else assume a proper sequence of letters of a certain alphabet (RNA, DNA, etc.) */
    else{
      length = (int)    strlen(input_string);
      string = strdup(input_string);
      free(input_string);
    }

    mfe_structure = (char *) space((unsigned) length+1);
    structure1 = (char *) space((unsigned) length+1);
    structure2 = (char *) space((unsigned) length+1);

    input_type = get_input_line(&input_string, VRNA_INPUT_NOSKIP_COMMENTS);
    if(input_type & VRNA_INPUT_QUIT){ break;}
    else if((input_type & VRNA_INPUT_MISC) && (strlen(input_string) > 0)){
      reference_struc1 = strdup(input_string);
      free(input_string);
      if(strlen(reference_struc1) != length)
        nrerror("sequence and 1st reference structure have unequal length");
    }
    else nrerror("1st reference structure missing\n");
    strncpy(structure1, reference_struc1, length);

    input_type = get_input_line(&input_string, VRNA_INPUT_NOSKIP_COMMENTS);
    if(input_type & VRNA_INPUT_QUIT){ break;}
    else if((input_type & VRNA_INPUT_MISC) && (strlen(input_string) > 0)){
      reference_struc2 = strdup(input_string);
      free(input_string);
      if(strlen(reference_struc2) != length)
        nrerror("sequence and 2nd reference structure have unequal length");
    }
    else nrerror("2nd reference structure missing\n");
    strncpy(structure2, reference_struc2, length);

    /* convert DNA alphabet to RNA if not explicitely switched off */
    if(!noconv) str_DNA2RNA(string);
    /* store case-unmodified sequence */
    orig_sequence = strdup(string);
    /* convert sequence to uppercase letters only */
    str_uppercase(string);

    if (istty)  printf("length = %d\n", length);

    min_en = (circ) ? circfold(string, mfe_structure) : fold(string, mfe_structure);

    printf("%s\n%s", orig_sequence, mfe_structure);

    if (istty)
      printf("\n minimum free energy = %6.2f kcal/mol\n", min_en);
    else
      printf(" (%6.2f)\n", min_en);

    printf("%s (%6.2f) <ref 1>\n", structure1, (circ) ? energy_of_circ_structure(string, structure1, 0) : energy_of_structure(string,structure1, 0));
    printf("%s (%6.2f) <ref 2>\n", structure2, (circ) ? energy_of_circ_structure(string, structure2, 0) : energy_of_structure(string,structure2, 0));

    /* get all variables need for the folding process (some memory will be preallocated here too) */
    TwoDfold_vars *mfe_vars = get_TwoDfold_variables(string, structure1, structure2, circ);
    mfe_vars->do_backtrack = do_backtrack;
    TwoDfold_solution *mfe_s = TwoDfoldList(mfe_vars, maxDistance1, maxDistance2);

    if(!pf){
#ifdef COUNT_STATES
      printf("k\tl\tn\tMFE\tMFE-structure\n");
      for(i = 0; mfe_s[i].k != INF; i++){
        printf("%d\t%d\t%lu\t%6.2f\t%s\n", mfe_s[i].k, mfe_s[i].l, mfe_vars->N_F5[length][mfe_s[i].k][mfe_s[i].l/2], mfe_s[i].en, mfe_s[i].s);
        if(mfe_s[i].s) free(mfe_s[i].s);
      }
      free(mfe_s);
#else
      printf("k\tl\tMFE\tMFE-structure\n");
      for(i = 0; mfe_s[i].k != INF; i++){
        printf("%d\t%d\t%6.2f\t%s\n", mfe_s[i].k, mfe_s[i].l, mfe_s[i].en, mfe_s[i].s);
        if(mfe_s[i].s) free(mfe_s[i].s);
      }
      free(mfe_s);
#endif
    }

    if(pf){
      int maxD1 = (int) mfe_vars->maxD1;
      int maxD2 = (int) mfe_vars->maxD2;
      float mmfe = INF;
      double Q;
      for(i = 0; mfe_s[i].k != INF; i++){
        if(mmfe > mfe_s[i].en) mmfe = mfe_s[i].en;
      }
      kT = (temperature+K0)*GASCONST/1000.0; /* in Kcal */
      pf_scale = exp(-(sfact*mmfe)/kT/length);
      if (length>2000)
        fprintf(stdout, "scaling factor %f\n", pf_scale);

      /* get all variables need for the folding process (some memory will be preallocated there too) */
      //TwoDpfold_vars *q_vars = get_TwoDpfold_variables_from_MFE(mfe_vars);
      /* we dont need the mfe vars and arrays anymore, so we can savely free their occupying memory */
      destroy_TwoDfold_variables(mfe_vars);
      TwoDpfold_vars *q_vars = get_TwoDpfold_variables(string, structure1, structure2, circ);

      TwoDpfold_solution *pf_s = TwoDpfoldList(q_vars, maxD1, maxD2);

      Q = 0.;
      
      for(i = 0; pf_s[i].k != INF; i++){
        Q += pf_s[i].q;
      }

      double fee = (-log(Q)-length*log(pf_scale))*kT;

      if(!stBT){
        printf("free energy of ensemble = %6.2f kcal/mol\n",fee);
        printf("k\tl\tP(neighborhood)\tP(MFE in neighborhood)\tP(MFE in ensemble)\tMFE\tE_gibbs\tMFE-structure\n");
        for(i=0; pf_s[i].k != INF;i++){
          float free_energy = (-log((float)pf_s[i].q)-length*log(pf_scale))*kT;
          if((pf_s[i].k != mfe_s[i].k) || (pf_s[i].l != mfe_s[i].l))
            nrerror("This should never happen!");
          fprintf(stdout,
                  "%d\t%d\t%2.8f\t%2.8f\t%2.8f\t%6.2f\t%6.2f\t%s\n",
                  pf_s[i].k,
                  pf_s[i].l,
                  (float)(pf_s[i].q)/(float)Q,
                  exp((free_energy-mfe_s[i].en)/kT),
                  exp((fee-mfe_s[i].en)/kT),
                  mfe_s[i].en,
                  free_energy,
                  mfe_s[i].s);
        }
      }
      else{
        init_rand();
        if(neighborhoods != NULL){
          nbhoods *tmp, *tmp2;
          for(tmp = neighborhoods; tmp != NULL; tmp = tmp->next){
            int k,l;
            k = tmp->k;
            l = tmp->l;
            for(i = 0; i < nstBT; i++){
              char *s = TwoDpfold_pbacktrack(q_vars, k, l);
              printf("%d\t%d\t%s\t%6.2f\n", k, l, s, q_vars->circ ? energy_of_circ_structure(q_vars->sequence, s, 0) : energy_of_structure(q_vars->sequence, s, 0));
            }
          }
        }
        else{
          for(i=0; pf_s[i].k != INF;i++){
            for(l = 0; l < nstBT; l++){
              char *s = TwoDpfold_pbacktrack(q_vars, pf_s[i].k, pf_s[i].l);
              printf("%d\t%d\t%s\t%6.2f\n", pf_s[i].k, pf_s[i].l, s, q_vars->circ ? energy_of_circ_structure(q_vars->sequence, s, 0) : energy_of_structure(q_vars->sequence, s, 0));
            }
          }
        }
      }
      free_pf_arrays();

      for(i=0; mfe_s[i].k != INF;i++){
        if(mfe_s[i].s) free(mfe_s[i].s);
      }
      free(pf_s);
      free(mfe_s);
      /* destroy the q_vars */
      destroy_TwoDpfold_variables(q_vars);
    }
    else
      destroy_TwoDfold_variables(mfe_vars);

    free_arrays();
    free(string);
    free(orig_sequence);
    free(mfe_structure);
    free(structure1);
    free(structure2);
    free(reference_struc1);
    free(reference_struc2);
    string = orig_sequence = mfe_structure = NULL;
  } while (1);
  return 0;
}
Exemplo n.º 2
0
int main(int argc, char *argv[])
{
  struct        RNAcofold_args_info args_info;
  unsigned int  input_type;
  char          *string, *input_string;
  char    *structure, *cstruc, *rec_sequence, *orig_sequence, *rec_id, **rec_rest;
  char    fname[FILENAME_MAX_LENGTH], ffname[FILENAME_MAX_LENGTH];
  char    *ParamFile;
  char    *ns_bases, *c;
  char    *Concfile;
  int     i, length, l, sym, r, cl;
  double  min_en;
  double  kT, sfact, betaScale;
  int     pf, istty;
  int     noconv, noPS;
  int     doT;    /*compute dimere free energies etc.*/
  int     doC;    /*toggle to compute concentrations*/
  int     doQ;    /*toggle to compute prob of base being paired*/
  int     cofi;   /*toggle concentrations stdin / file*/
  plist   *prAB;
  plist   *prAA;   /*pair probabilities of AA dimer*/
  plist   *prBB;
  plist   *prA;
  plist   *prB;
  plist   *mfAB;
  plist   *mfAA;   /*pair mfobabilities of AA dimer*/
  plist   *mfBB;
  plist   *mfA;
  plist   *mfB;
  double  *ConcAandB;
  unsigned int    rec_type, read_opt;
  pf_paramT       *pf_parameters;
  model_detailsT  md;


  /*
  #############################################
  # init variables and parameter options
  #############################################
  */
  dangles       = 2;
  sfact         = 1.07;
  bppmThreshold = 1e-5;
  noconv        = 0;
  noPS          = 0;
  do_backtrack  = 1;
  pf            = 0;
  doT           = 0;
  doC           = 0;
  doQ           = 0;
  cofi          = 0;
  betaScale     = 1.;
  gquad         = 0;
  ParamFile     = NULL;
  pf_parameters = NULL;
  string        = NULL;
  Concfile      = NULL;
  structure     = NULL;
  cstruc        = NULL;
  ns_bases      = NULL;
  rec_type      = read_opt = 0;
  rec_id        = rec_sequence = orig_sequence = NULL;
  rec_rest      = NULL;

  set_model_details(&md);
  /*
  #############################################
  # check the command line prameters
  #############################################
  */
  if(RNAcofold_cmdline_parser (argc, argv, &args_info) != 0) exit(1);
  /* temperature */
  if(args_info.temp_given)            temperature = args_info.temp_arg;
  /* structure constraint */
  if(args_info.constraint_given)      fold_constrained=1;
  /* do not take special tetra loop energies into account */
  if(args_info.noTetra_given)         md.special_hp = tetra_loop=0;
  /* set dangle model */
  if(args_info.dangles_given){
    if((args_info.dangles_arg < 0) || (args_info.dangles_arg > 3))
      warn_user("required dangle model not implemented, falling back to default dangles=2");
    else
     md.dangles = dangles = args_info.dangles_arg;
  }
  /* do not allow weak pairs */
  if(args_info.noLP_given)            md.noLP = noLonelyPairs = 1;
  /* do not allow wobble pairs (GU) */
  if(args_info.noGU_given)            md.noGU = noGU = 1;
  /* do not allow weak closing pairs (AU,GU) */
  if(args_info.noClosingGU_given)     md.noGUclosure = no_closingGU = 1;
  /* gquadruplex support */
  if(args_info.gquad_given)           md.gquad = gquad = 1;
  /* enforce canonical base pairs in any case? */
  if(args_info.canonicalBPonly_given) md.canonicalBPonly = canonicalBPonly = 1;
  /* do not convert DNA nucleotide "T" to appropriate RNA "U" */
  if(args_info.noconv_given)          noconv = 1;
  /* set energy model */
  if(args_info.energyModel_given)     energy_set = args_info.energyModel_arg;
  /*  */
  if(args_info.noPS_given)            noPS = 1;
  /* take another energy parameter set */
  if(args_info.paramFile_given)       ParamFile = strdup(args_info.paramFile_arg);
  /* Allow other pairs in addition to the usual AU,GC,and GU pairs */
  if(args_info.nsp_given)             ns_bases = strdup(args_info.nsp_arg);
  /* set pf scaling factor */
  if(args_info.pfScale_given)         sfact = args_info.pfScale_arg;

  if(args_info.all_pf_given)          doT = pf = 1;
  /* concentrations from stdin */
  if(args_info.concentrations_given)  doC = doT = pf = 1;
  /* set the bppm threshold for the dotplot */
  if(args_info.bppmThreshold_given)
    bppmThreshold = MIN2(1., MAX2(0.,args_info.bppmThreshold_arg));
  /* concentrations in file */
  if(args_info.betaScale_given)       betaScale = args_info.betaScale_arg;
  if(args_info.concfile_given){
    Concfile = strdup(args_info.concfile_arg);
    doC = cofi = doT = pf = 1;
  }
  /* partition function settings */
  if(args_info.partfunc_given){
    pf = 1;
    if(args_info.partfunc_arg != -1)
      do_backtrack = args_info.partfunc_arg;
  }
  /* free allocated memory of command line data structure */
  RNAcofold_cmdline_parser_free (&args_info);


  /*
  #############################################
  # begin initializing
  #############################################
  */
  if(pf && gquad){
    nrerror("G-Quadruplex support is currently not available for partition function computations");
  }

  if (ParamFile != NULL)
    read_parameter_file(ParamFile);

  if (ns_bases != NULL) {
    nonstandards = space(33);
    c=ns_bases;
    i=sym=0;
    if (*c=='-') {
      sym=1; c++;
    }
    while (*c!='\0') {
      if (*c!=',') {
        nonstandards[i++]=*c++;
        nonstandards[i++]=*c;
        if ((sym)&&(*c!=*(c-1))) {
          nonstandards[i++]=*c;
          nonstandards[i++]=*(c-1);
        }
      }
      c++;
    }
  }
  istty = isatty(fileno(stdout))&&isatty(fileno(stdin));

  /* print user help if we get input from tty */
  if(istty){
    printf("Use '&' to connect 2 sequences that shall form a complex.\n");
    if(fold_constrained){
      print_tty_constraint(VRNA_CONSTRAINT_DOT | VRNA_CONSTRAINT_X | VRNA_CONSTRAINT_ANG_BRACK | VRNA_CONSTRAINT_RND_BRACK);
      print_tty_input_seq_str("Input sequence (upper or lower case) followed by structure constraint\n");
    }
    else print_tty_input_seq();
  }

  /* set options we wanna pass to read_record */
  if(istty)             read_opt |= VRNA_INPUT_NOSKIP_BLANK_LINES;
  if(!fold_constrained) read_opt |= VRNA_INPUT_NO_REST;

  /*
  #############################################
  # main loop: continue until end of file
  #############################################
  */
  while(
    !((rec_type = read_record(&rec_id, &rec_sequence, &rec_rest, read_opt))
        & (VRNA_INPUT_ERROR | VRNA_INPUT_QUIT))){

    /*
    ########################################################
    # init everything according to the data we've read
    ########################################################
    */
    if(rec_id){
      if(!istty) printf("%s\n", rec_id);
      (void) sscanf(rec_id, ">%" XSTR(FILENAME_ID_LENGTH) "s", fname);
    }
    else fname[0] = '\0';

    cut_point = -1;

    rec_sequence  = tokenize(rec_sequence); /* frees input_string and sets cut_point */
    length    = (int) strlen(rec_sequence);
    structure = (char *) space((unsigned) length+1);

    /* parse the rest of the current dataset to obtain a structure constraint */
    if(fold_constrained){
      cstruc = NULL;
      int cp = cut_point;
      unsigned int coptions = (rec_id) ? VRNA_CONSTRAINT_MULTILINE : 0;
      coptions |= VRNA_CONSTRAINT_DOT | VRNA_CONSTRAINT_X | VRNA_CONSTRAINT_ANG_BRACK | VRNA_CONSTRAINT_RND_BRACK;
      getConstraint(&cstruc, (const char **)rec_rest, coptions);
      cstruc = tokenize(cstruc);
      if(cut_point != cp) nrerror("cut point in sequence and structure constraint differs");
      cl = (cstruc) ? (int)strlen(cstruc) : 0;

      if(cl == 0)           warn_user("structure constraint is missing");
      else if(cl < length)  warn_user("structure constraint is shorter than sequence");
      else if(cl > length)  nrerror("structure constraint is too long");

      if(cstruc) strncpy(structure, cstruc, sizeof(char)*(cl+1));
    }

    /* convert DNA alphabet to RNA if not explicitely switched off */
    if(!noconv) str_DNA2RNA(rec_sequence);
    /* store case-unmodified sequence */
    orig_sequence = strdup(rec_sequence);
    /* convert sequence to uppercase letters only */
    str_uppercase(rec_sequence);

    if(istty){
      if (cut_point == -1)
        printf("length = %d\n", length);
      else
        printf("length1 = %d\nlength2 = %d\n", cut_point-1, length-cut_point+1);
    }

    /*
    ########################################################
    # begin actual computations
    ########################################################
    */

    if (doC) {
      FILE *fp;
      if (cofi) { /* read from file */
        fp = fopen(Concfile, "r");
        if (fp==NULL) {
          fprintf(stderr, "could not open concentration file %s", Concfile);
          nrerror("\n");
        }
        ConcAandB = read_concentrations(fp);
        fclose(fp);
      } else {
        printf("Please enter concentrations [mol/l]\n format: ConcA ConcB\n return to end\n");
        ConcAandB = read_concentrations(stdin);
      }
    }
    /*compute mfe of AB dimer*/
    min_en = cofold(rec_sequence, structure);
    assign_plist_from_db(&mfAB, structure, 0.95);

    {
      char *pstring, *pstruct;
      if (cut_point == -1) {
        pstring = strdup(orig_sequence);
        pstruct = strdup(structure);
      } else {
        pstring = costring(orig_sequence);
        pstruct = costring(structure);
      }
      printf("%s\n%s", pstring, pstruct);
      if (istty)
        printf("\n minimum free energy = %6.2f kcal/mol\n", min_en);
      else
        printf(" (%6.2f)\n", min_en);

      (void) fflush(stdout);

      if (!noPS) {
        char annot[512] = "";
        if (fname[0]!='\0') {
          strcpy(ffname, fname);
          strcat(ffname, "_ss.ps");
        } else {
          strcpy(ffname, "rna.ps");
        }
        if (cut_point >= 0)
          sprintf(annot,
                  "1 %d 9  0 0.9 0.2 omark\n%d %d 9  1 0.1 0.2 omark\n",
                  cut_point-1, cut_point+1, length+1);
        if(gquad){
          if (!noPS) (void) PS_rna_plot_a_gquad(pstring, pstruct, ffname, annot, NULL);
        } else {
          if (!noPS) (void) PS_rna_plot_a(pstring, pstruct, ffname, annot, NULL);
        }
      }
      free(pstring);
      free(pstruct);
    }

    if (length>2000)  free_co_arrays();

    /*compute partition function*/
    if (pf) {
      cofoldF AB, AA, BB;
      FLT_OR_DBL *probs;
      if (dangles==1) {
        dangles=2;   /* recompute with dangles as in pf_fold() */
        min_en = energy_of_structure(rec_sequence, structure, 0);
        dangles=1;
      }

      kT = (betaScale*((temperature+K0)*GASCONST))/1000.; /* in Kcal */
      pf_scale = exp(-(sfact*min_en)/kT/length);
      if (length>2000) fprintf(stderr, "scaling factor %f\n", pf_scale);

      pf_parameters = get_boltzmann_factors(temperature, betaScale, md, pf_scale);

      if (cstruc!=NULL)
        strncpy(structure, cstruc, length+1);
      AB = co_pf_fold_par(rec_sequence, structure, pf_parameters, do_backtrack, fold_constrained);

      if (do_backtrack) {
        char *costruc;
        costruc = (char *) space(sizeof(char)*(strlen(structure)+2));
        if (cut_point<0) printf("%s", structure);
        else {
          strncpy(costruc, structure, cut_point-1);
          strcat(costruc, "&");
          strcat(costruc, structure+cut_point-1);
          printf("%s", costruc);
        }
        if (!istty) printf(" [%6.2f]\n", AB.FAB);
        else printf("\n");/*8.6.04*/
      }
      if ((istty)||(!do_backtrack))
        printf(" free energy of ensemble = %6.2f kcal/mol\n", AB.FAB);
      printf(" frequency of mfe structure in ensemble %g",
             exp((AB.FAB-min_en)/kT));

      printf(" , delta G binding=%6.2f\n", AB.FcAB - AB.FA - AB.FB);

      probs = export_co_bppm();
      assign_plist_from_pr(&prAB, probs, length, bppmThreshold);

      /* if (doQ) make_probsum(length,fname); */ /*compute prob of base paired*/
      /* free_co_arrays(); */
      if (doT) { /* cofold of all dimers, monomers */
        int Blength, Alength;
        char  *Astring, *Bstring, *orig_Astring, *orig_Bstring;
        char *Newstring;
        char Newname[30];
        char comment[80];
        if (cut_point<0) {
          printf("Sorry, i cannot do that with only one molecule, please give me two or leave it\n");
          free(mfAB);
          free(prAB);
          continue;
        }
        if (dangles==1) dangles=2;
        Alength=cut_point-1;        /*length of first molecule*/
        Blength=length-cut_point+1; /*length of 2nd molecule*/

        Astring=(char *)space(sizeof(char)*(Alength+1));/*Sequence of first molecule*/
        Bstring=(char *)space(sizeof(char)*(Blength+1));/*Sequence of second molecule*/
        strncat(Astring,rec_sequence,Alength);
        strncat(Bstring,rec_sequence+Alength,Blength);

        orig_Astring=(char *)space(sizeof(char)*(Alength+1));/*Sequence of first molecule*/
        orig_Bstring=(char *)space(sizeof(char)*(Blength+1));/*Sequence of second molecule*/
        strncat(orig_Astring,orig_sequence,Alength);
        strncat(orig_Bstring,orig_sequence+Alength,Blength);

        /* compute AA dimer */
        AA=do_partfunc(Astring, Alength, 2, &prAA, &mfAA, pf_parameters);
        /* compute BB dimer */
        BB=do_partfunc(Bstring, Blength, 2, &prBB, &mfBB, pf_parameters);
        /*free_co_pf_arrays();*/

        /* compute A monomer */
        do_partfunc(Astring, Alength, 1, &prA, &mfA, pf_parameters);

        /* compute B monomer */
        do_partfunc(Bstring, Blength, 1, &prB, &mfB, pf_parameters);

        compute_probabilities(AB.F0AB, AB.FA, AB.FB, prAB, prA, prB, Alength);
        compute_probabilities(AA.F0AB, AA.FA, AA.FA, prAA, prA, prA, Alength);
        compute_probabilities(BB.F0AB, BB.FA, BB.FA, prBB, prA, prB, Blength);
        printf("Free Energies:\nAB\t\tAA\t\tBB\t\tA\t\tB\n%.6f\t%6f\t%6f\t%6f\t%6f\n",
               AB.FcAB, AA.FcAB, BB.FcAB, AB.FA, AB.FB);

        if (doC) {
          do_concentrations(AB.FcAB, AA.FcAB, BB.FcAB, AB.FA, AB.FB, ConcAandB);
          free(ConcAandB);/*freeen*/
        }

        if (fname[0]!='\0') {
          strcpy(ffname, fname);
          strcat(ffname, "_dp5.ps");
        } else strcpy(ffname, "dot5.ps");
        /*output of the 5 dot plots*/

        /*AB dot_plot*/
        /*write Free Energy into comment*/
        sprintf(comment,"\n%%Heterodimer AB FreeEnergy= %.9f\n", AB.FcAB);
        /*reset cut_point*/
        cut_point=Alength+1;
        /*write New name*/
        strcpy(Newname,"AB");
        strcat(Newname,ffname);
        (void)PS_dot_plot_list(orig_sequence, Newname, prAB, mfAB, comment);

        /*AA dot_plot*/
        sprintf(comment,"\n%%Homodimer AA FreeEnergy= %.9f\n",AA.FcAB);
        /*write New name*/
        strcpy(Newname,"AA");
        strcat(Newname,ffname);
        /*write AA sequence*/
        Newstring=(char*)space((2*Alength+1)*sizeof(char));
        strcpy(Newstring,orig_Astring);
        strcat(Newstring,orig_Astring);
        (void)PS_dot_plot_list(Newstring, Newname, prAA, mfAA, comment);
        free(Newstring);

        /*BB dot_plot*/
        sprintf(comment,"\n%%Homodimer BB FreeEnergy= %.9f\n",BB.FcAB);
        /*write New name*/
        strcpy(Newname,"BB");
        strcat(Newname,ffname);
        /*write BB sequence*/
        Newstring=(char*)space((2*Blength+1)*sizeof(char));
        strcpy(Newstring,orig_Bstring);
        strcat(Newstring,orig_Bstring);
        /*reset cut_point*/
        cut_point=Blength+1;
        (void)PS_dot_plot_list(Newstring, Newname, prBB, mfBB, comment);
        free(Newstring);

        /*A dot plot*/
        /*reset cut_point*/
        cut_point=-1;
        sprintf(comment,"\n%%Monomer A FreeEnergy= %.9f\n",AB.FA);
        /*write New name*/
        strcpy(Newname,"A");
        strcat(Newname,ffname);
        /*write BB sequence*/
        (void)PS_dot_plot_list(orig_Astring, Newname, prA, mfA, comment);

        /*B monomer dot plot*/
        sprintf(comment,"\n%%Monomer B FreeEnergy= %.9f\n",AB.FB);
        /*write New name*/
        strcpy(Newname,"B");
        strcat(Newname,ffname);
        /*write BB sequence*/
        (void)PS_dot_plot_list(orig_Bstring, Newname, prB, mfB, comment);
        free(Astring); free(Bstring); free(orig_Astring); free(orig_Bstring);
        free(prAB); free(prAA); free(prBB); free(prA); free(prB);
        free(mfAB); free(mfAA); free(mfBB); free(mfA); free(mfB);

      } /*end if(doT)*/

      free(pf_parameters);
    }/*end if(pf)*/


    if (do_backtrack) {
      if (fname[0]!='\0') {
        strcpy(ffname, fname);
        strcat(ffname, "_dp.ps");
      } else strcpy(ffname, "dot.ps");

      if (!doT) {
        if (pf) {          (void) PS_dot_plot_list(rec_sequence, ffname, prAB, mfAB, "doof");
        free(prAB);}
        free(mfAB);
      }
    }
    if (!doT) free_co_pf_arrays();

    (void) fflush(stdout);
    
    /* clean up */
    if(cstruc) free(cstruc);
    if(rec_id) free(rec_id);
    free(rec_sequence);
    free(orig_sequence);
    free(structure);
    /* free the rest of current dataset */
    if(rec_rest){
      for(i=0;rec_rest[i];i++) free(rec_rest[i]);
      free(rec_rest);
    }
    rec_id = rec_sequence = orig_sequence = structure = cstruc = NULL;
    rec_rest = NULL;

    /* print user help for the next round if we get input from tty */
    if(istty){
      printf("Use '&' to connect 2 sequences that shall form a complex.\n");
      if(fold_constrained){
        print_tty_constraint(VRNA_CONSTRAINT_DOT | VRNA_CONSTRAINT_X | VRNA_CONSTRAINT_ANG_BRACK | VRNA_CONSTRAINT_RND_BRACK);
        print_tty_input_seq_str("Input sequence (upper or lower case) followed by structure constraint\n");
      }
      else print_tty_input_seq();
    }
  }
  return EXIT_SUCCESS;
}
Exemplo n.º 3
0
int main(int argc, char *argv[]){
  struct RNAeval_args_info  args_info;
  char                      *string, *structure, *orig_sequence, *tmp;
  char                      *rec_sequence, *rec_id, **rec_rest;
  char                      fname[FILENAME_MAX_LENGTH];
  char                      *ParamFile;
  int                       i, length1, length2;
  float                     energy;
  int                       istty;
  int                       circular=0;
  int                       noconv=0;
  int                       verbose = 0;
  unsigned int              rec_type, read_opt;

  string  = orig_sequence = ParamFile = NULL;
  gquad   = 0;
  dangles = 2;

  /*
  #############################################
  # check the command line parameters
  #############################################
  */
  if(RNAeval_cmdline_parser (argc, argv, &args_info) != 0) exit(1);
  /* temperature */
  if(args_info.temp_given)        temperature = args_info.temp_arg;
  /* do not take special tetra loop energies into account */
  if(args_info.noTetra_given)     tetra_loop=0;
  /* set dangle model */
  if(args_info.dangles_given){
    if((args_info.dangles_arg < 0) || (args_info.dangles_arg > 3))
      warn_user("required dangle model not implemented, falling back to default dangles=2");
    else
      dangles = args_info.dangles_arg;
  }
  /* do not convert DNA nucleotide "T" to appropriate RNA "U" */
  if(args_info.noconv_given)      noconv = 1;
  /* set energy model */
  if(args_info.energyModel_given) energy_set = args_info.energyModel_arg;
  /* take another energy parameter set */
  if(args_info.paramFile_given)   ParamFile = strdup(args_info.paramFile_arg);
  /* assume RNA sequence to be circular */
  if(args_info.circ_given)        circular=1;
  /* logarithmic multiloop energies */
  if(args_info.logML_given)       logML = 1;
  /* be verbose */
  if(args_info.verbose_given)     verbose = 1;
  /* gquadruplex support */
  if(args_info.gquad_given)       gquad = 1;

  /* free allocated memory of command line data structure */
  RNAeval_cmdline_parser_free (&args_info);

  /*
  #############################################
  # begin initializing
  #############################################
  */

  if (ParamFile!=NULL) read_parameter_file(ParamFile);

  rec_type      = read_opt = 0;
  rec_id        = rec_sequence = NULL;
  rec_rest      = NULL;
  istty         = isatty(fileno(stdout)) && isatty(fileno(stdin));

  if(circular && gquad){
    nrerror("G-Quadruplex support is currently not available for circular RNA structures");
  }

  /* set options we wanna pass to read_record */
  if(istty){
    read_opt |= VRNA_INPUT_NOSKIP_BLANK_LINES;
    print_tty_input_seq_str("Use '&' to connect 2 sequences that shall form a complex.\n"
                            "Input sequence (upper or lower case) followed by structure");
  }

  /*
  #############################################
  # main loop: continue until end of file
  #############################################
  */
  while(
    !((rec_type = read_record(&rec_id, &rec_sequence, &rec_rest, read_opt))
        & (VRNA_INPUT_ERROR | VRNA_INPUT_QUIT))){

    if(rec_id){
      if(!istty) printf("%s\n", rec_id);
      (void) sscanf(rec_id, ">%" XSTR(FILENAME_ID_LENGTH) "s", fname);
    }
    else fname[0] = '\0';

    cut_point = -1;

    string    = tokenize(rec_sequence);
    length2   = (int) strlen(string);
    tmp       = extract_record_rest_structure((const char **)rec_rest, 0, (rec_id) ? VRNA_OPTION_MULTILINE : 0);

    if(!tmp)
      nrerror("structure missing");

    structure = tokenize(tmp);
    length1   = (int) strlen(structure);
    if(length1 != length2)
      nrerror("structure and sequence differ in length!");

    free(tmp);

    /* convert DNA alphabet to RNA if not explicitely switched off */
    if(!noconv) str_DNA2RNA(string);
    /* store case-unmodified sequence */
    orig_sequence = strdup(string);
    /* convert sequence to uppercase letters only */
    str_uppercase(string);

    if(istty){
      if (cut_point == -1)
        printf("length = %d\n", length1);
      else
        printf("length1 = %d\nlength2 = %d\n", cut_point-1, length1-cut_point+1);
    }

    if(gquad)
      energy = energy_of_gquad_structure(string, structure, verbose);
    else
      energy = (circular) ? energy_of_circ_structure(string, structure, verbose) : energy_of_structure(string, structure, verbose);

    if (cut_point == -1)
      printf("%s\n%s", orig_sequence, structure);
    else {
      char *pstring, *pstruct;
      pstring = costring(orig_sequence);
      pstruct = costring(structure);
      printf("%s\n%s", pstring,  pstruct);
      free(pstring);
      free(pstruct);
    }
    if (istty)
      printf("\n energy = %6.2f\n", energy);
    else
      printf(" (%6.2f)\n", energy);

    /* clean up */
    (void) fflush(stdout);
    if(rec_id) free(rec_id);
    free(rec_sequence);
    free(structure);
    /* free the rest of current dataset */
    if(rec_rest){
      for(i=0;rec_rest[i];i++) free(rec_rest[i]);
      free(rec_rest);
    }
    rec_id = rec_sequence = structure = NULL;
    rec_rest = NULL;

    free(string);
    free(orig_sequence);
    string = orig_sequence = NULL;

    /* print user help for the next round if we get input from tty */
    if(istty){
      print_tty_input_seq_str("Use '&' to connect 2 sequences that shall form a complex.\n"
                              "Input sequence (upper or lower case) followed by structure");
    }
  }
  return EXIT_SUCCESS;
}
Exemplo n.º 4
0
int main(int argc, char *argv[]) {

    struct        RNAfold_args_info args_info;
    char          *string, *input_string, *structure=NULL, *cstruc=NULL;
    char          fname[80], ffname[80], gfname[80], *ParamFile=NULL;
    char          *ns_bases=NULL, *c;
    int           i, j, ii, jj, mu, length, l, sym, r, pf=0, noconv=0;
    unsigned int  input_type;
    double        energy, min_en, kT, sfact=1.07;
    int           doMEA=0, circular = 0, N;
    char *pf_struc;
    double dist;
    plist *pl;

    FILE * filehandle;
    FILE * statsfile;
    char* line;

    double tau   = 0.01; /* Variance of energy parameters */
    double sigma = 0.01; /* Variance of experimental constraints */
    double *gradient;           /* Gradient for steepest descent search
                                 epsilon[i+1]= epsilon[i] - gradient *
                                 step_size */
    double initial_step_size = 0.5;  /* Initial step size for steepest
                                      descent search */
    double step_size;
    double D;                  /* Discrepancy (i.e. value of objective
                                function) for the current
                                prediction */
    int iteration, max_iteration = 2000; /* Current and maximum number of
                                         iterations after which
                                         algorithm stops */

    double precision = 0.1; /* cutoff used for stop conditions */
    double tolerance = 0.1;   /* Parameter used by various GSL minimizers */
    int method_id = 1;        /* Method to use for minimization, 0 and 1
                               are custom steepest descent, the rest
                               are GSL implementations (see below)*/

    int initial_guess_method = 0;

    int sample_N = 1000;

    double *prev_epsilon;
    double *prev_gradient;
    double DD, prev_D, sum, norm;
    int status;
    double* gradient_numeric;
    double* gradient_numeric_gsl;

    /* Minimizer vars */
    const gsl_multimin_fdfminimizer_type *T;
    gsl_multimin_fdfminimizer *minimizer;
    gsl_vector *minimizer_x;
    gsl_vector *minimizer_g;
    gsl_multimin_function_fdf minimizer_func;
    minimizer_pars_struct minimizer_pars;

    char *constraints;
    char outfile[256];
    char constraints_file[256];
    char epsilon_file[256];
    FILE* fh;

    double last_non_nan_lnQ;

    pf_overflow = 0;
    pf_underflow = 0;

    dangles=2;

    do_backtrack  = 1;
    string        = NULL;

    noPS = 0;
    outfile[0]='\0';
    epsilon_file[0]='\0';
    strcpy(psDir, "dotplots");

    if(RNAfold_cmdline_parser (argc, argv, &args_info) != 0) exit(1);

    /* RNAbpfold specific options */

    if (args_info.tau_given) tau = args_info.tau_arg;
    if (args_info.sigma_given) sigma = args_info.sigma_arg;
    if (args_info.precision_given) precision = args_info.precision_arg;
    if (args_info.step_given) initial_step_size = args_info.step_arg;
    if (args_info.maxN_given) max_iteration = args_info.maxN_arg;
    if (args_info.minimization_given) method_id = args_info.minimization_arg;
    if (args_info.init_given) initial_guess_method = args_info.init_arg;
    if (args_info.tolerance_given) tolerance = args_info.tolerance_arg;
    if (args_info.outfile_given) strcpy(outfile, args_info.outfile_arg);
    if (args_info.constraints_given) strcpy(constraints_file, args_info.constraints_arg);
    if (args_info.epsilon_given) strcpy(epsilon_file, args_info.epsilon_arg);
    if (args_info.sampleGradient_given) sample_conditionals=1;
    if (args_info.hybridGradient_given) {
        sample_conditionals=1;
        hybrid_conditionals=1;
    }
    if (args_info.numericalGradient_given) numerical=1;
    if (args_info.sampleStructure_given) sample_structure=1;
    if (args_info.psDir_given) strcpy(psDir, args_info.psDir_arg);
    if (args_info.sparsePS_given) sparsePS=args_info.sparsePS_arg;
    if (args_info.gridSearch_given) grid_search = 1;


    /* Generic RNAfold options */

    if (args_info.temp_given)        temperature = args_info.temp_arg;
    if (args_info.reference_given)  fold_constrained=1;
    if (args_info.noTetra_given)     tetra_loop=0;
    if (args_info.dangles_given)     dangles = args_info.dangles_arg;
    if (args_info.noLP_given)        noLonelyPairs = 1;
    if (args_info.noGU_given)        noGU = 1;
    if (args_info.noClosingGU_given) no_closingGU = 1;
    if (args_info.noconv_given)      noconv = 1;
    if (args_info.energyModel_given) energy_set = args_info.energyModel_arg;
    if (args_info.paramFile_given)   ParamFile = strdup(args_info.paramFile_arg);
    if (args_info.nsp_given)         ns_bases = strdup(args_info.nsp_arg);
    if (args_info.pfScale_given)     sfact = args_info.pfScale_arg;
    if (args_info.noPS_given)        noPS=1;



    /* Create postscript directory */
    if (!noPS) {
        struct stat stat_p;
        if (stat (psDir, &stat_p) != 0) {
            if (mkdir(psDir, S_IRWXU|S_IROTH|S_IRGRP ) !=0) {
                fprintf(stderr, "WARNING: Could not create directory: %s", psDir);
            }
        }
    }

    if (ParamFile != NULL) {
        read_parameter_file(ParamFile);
    }

    if (ns_bases != NULL) {
        nonstandards = space(33);
        c=ns_bases;
        i=sym=0;
        if (*c=='-') {
            sym=1;
            c++;
        }
        while (*c!='\0') {
            if (*c!=',') {
                nonstandards[i++]=*c++;
                nonstandards[i++]=*c;
                if ((sym)&&(*c!=*(c-1))) {
                    nonstandards[i++]=*c;
                    nonstandards[i++]=*(c-1);
                }
            }
            c++;
        }
    }

    /*Read sequence*/
    fname[0] = '\0';
    while((input_type = get_input_line(&input_string, 0)) & VRNA_INPUT_FASTA_HEADER) {
        (void) sscanf(input_string, "%42s", fname);
        free(input_string);
    }

    length = (int)    strlen(input_string);
    string = strdup(input_string);
    free(input_string);
    structure = (char *) space((unsigned) length+1);

    /* For testing purpose pass dot bracket structure of reference structure via -C */
    if (fold_constrained) {
        input_type = get_input_line(&input_string, VRNA_INPUT_NOSKIP_COMMENTS);
        if(input_type & VRNA_INPUT_QUIT) {
            exit(1);
        }
        else if((input_type & VRNA_INPUT_MISC) && (strlen(input_string) > 0)) {
            cstruc = strdup(input_string);
            free(input_string);
        }
        else warn_user("-C was given but reference structure is missing");
    }

    if(noconv) {
        str_RNA2RNA(string);
    } else {
        str_DNA2RNA(string);
    }

    /* Allocating space */

    epsilon =     (double *) space(sizeof(double)*(length+1));

    exp_pert =  (double **)space(sizeof(double *)*(length+1));
    perturbations =  (double **)space(sizeof(double *)*(length+1));
    prev_epsilon = (double *) space(sizeof(double)*(length+1));
    gradient =    (double *) space(sizeof(double)*(length+1));
    gradient_numeric =    (double *) space(sizeof(double)*(length+1));
    gradient_numeric_gsl =    (double *) space(sizeof(double)*(length+1));
    prev_gradient = (double *) space(sizeof(double)*(length+1));

    q_unpaired = (double *) space(sizeof(double)*(length+1));
    p_unpaired_cond = (double **)space(sizeof(double *)*(length+1));
    p_unpaired_cond_sampled = (double **)space(sizeof(double *)*(length+1));
    p_pp =  (double **)space(sizeof(double *)*(length+1));
    p_unpaired =  (double *) space(sizeof(double)*(length+1));
    p_unpaired_tmp = (double *) space(sizeof(double)*(length+1));

    for (i=0; i <= length; i++) {
        epsilon[i] = gradient[i] = q_unpaired[i] = 0.0;
        p_unpaired_cond[i] = (double *) space(sizeof(double)*(length+1));
        p_unpaired_cond_sampled[i] = (double *) space(sizeof(double)*(length+1));
        p_pp[i] = (double *) space(sizeof(double)*(length+1));
        exp_pert[i] = (double *) space(sizeof(double)*(length+1));
        perturbations[i] = (double *) space(sizeof(double)*(length+1));
        for (j=0; j <= length; j++) {
            p_pp[i][j]=p_unpaired_cond[i][j] = 0.0;
            p_unpaired_cond_sampled[i][j] = 0.0;
        }
    }


    /*** If file with perturbation vector epsilon is given we fold using
         this epsilon and are done ***/

    if (args_info.epsilon_given) {
        plist *pl, *pl1,*pl2;

        filehandle = fopen (epsilon_file,"r");

        if (filehandle == NULL) {
            nrerror("Could not open file with perturbation vector.");
        }

        i=1;
        while (1) {
            double t;
            line = get_line(filehandle);
            if (line == NULL) break;
            if (i>length) nrerror("Too many values in perturbation vector file.");
            if (sscanf(line, "%lf", &epsilon[i]) !=1) {
                nrerror("Error while reading perturbation vector file.");
            }
            i++;
        }

        if (i-1 != length) {
            nrerror("Too few values in perturbation vector file.");
        }

        init_pf_fold(length);
        pf_fold_pb(string, NULL);

        sprintf(fname,"%s/dot.ps", psDir);
        pl1 = make_plist(length, 1e-5);

        (void) PS_dot_plot_list_epsilon(string, fname, NULL, pl1, epsilon, "");

        exit(0);
    }



    /*** Get constraints from reference structure or from external file ***/

    /* Structure was given by -C */
    if (fold_constrained) {
        for (i=0; i<length; i++) {
            if (cstruc[i] == '(' || cstruc[i] == ')') {
                q_unpaired[i+1] = 0.0;
            } else {
                q_unpaired[i+1] = 1.0;
            }
        }

        /*Read constraints from file*/
    } else {

        filehandle = fopen (constraints_file,"r");

        if (filehandle == NULL) {
            nrerror("No constraints given as dot bracket or wrong file name");
        }

        i=1;
        while (1) {
            double t;
            line = get_line(filehandle);
            if (line == NULL) break;
            if (i>length) nrerror("Too many values in constraints.dat");
            if (sscanf(line, "%lf", &q_unpaired[i]) !=1) {
                nrerror("Error while reading constraints.dat");
            }
            i++;
        }

        if (i-1 != length) {
            nrerror("Too few values in constraints.dat");
        }
    }

    /* Create file handle */
    if (outfile[0] !='\0') {
        statsfile = fopen (outfile,"w");
    } else {
        statsfile = fopen ("stats.dat","w");
    }

    setvbuf(statsfile, NULL, _IONBF, 0);

    if (!grid_search) {
        fprintf(statsfile, "Iteration\tDiscrepancy\tNorm\tdfCount\tMEA\tSampled_structure\tSampled_energy\tSampled_distance\tEpsilon\ttimestamp\n");
    } else {
        /* If we do a grid search we have a different output. */
        fprintf(statsfile, "Dummy\tm\tb\tdummy\tMEA\tepsilon\n");
    }

    if (statsfile == NULL) {
        nrerror("Could not open stats.dat for writing.");
    }

    fprintf(stderr, "tau^2 = %.4f; sigma^2 = %.4f; precision = %.4f; tolerance = %.4f; step-size: %.4f\n\n",
            tau, sigma, precision, tolerance, initial_step_size);

    st_back=1;
    min_en = fold(string, structure);

    (void) fflush(stdout);

    if (length>2000) free_arrays();

    pf_struc = (char *) space((unsigned) length+1);

    kT = (temperature+273.15)*1.98717/1000.; /* in Kcal */
    pf_scale = exp(-(sfact*min_en)/kT/length);

    /* Set up minimizer */

    minimizer_x = gsl_vector_alloc (length+1);
    minimizer_g = gsl_vector_alloc (length+1);

    for (i=0; i <= length; i++) {
        epsilon[i] = 0.0;
        gsl_vector_set (minimizer_g, i, 0.0);
        gsl_vector_set (minimizer_x, i, epsilon[i]);
    }

    minimizer_pars.length=length;
    minimizer_pars.seq = string;
    minimizer_pars.tau=tau;
    minimizer_pars.sigma=sigma;
    minimizer_pars.kT=kT;

    minimizer_func.n = length+1;
    minimizer_func.f = calculate_f;
    minimizer_func.df = numerical ? calculate_df_numerically: calculate_df;
    minimizer_func.fdf = calculate_fdf;
    minimizer_func.params = &minimizer_pars;


    //min_en = fold_pb(string, structure);
    //fprintf(stderr, "%f", min_en);
    //exit(0);

    /* Calling test functions for debugging */
    for (i=1; i <= length; i++) {
        if (i%2==0) {
            epsilon[i] = +0.2*i;
        } else {
            epsilon[i] = -0.2*i;
        }
    }

    //test_folding(string, length);
    /* //test_stochastic_backtracking(string, length); */
    /* //test_gradient(minimizer_func, minimizer_pars); */
    /* //test_gradient_sampling(minimizer_func, minimizer_pars); */
    //exit(1);


    count_df_evaluations=0;

    /* Initial guess for epsilon */

    if (initial_guess_method !=0 && initial_guess_method !=3) {

        /* Vars for inital guess methods */
        double m,b;
        double* curr_epsilon;
        double* best_epsilon;
        double best_m, best_b, best_scale;
        double curr_D;
        double min_D = 999999999.0;
        double inc = +0.25;
        double cut;

        if (initial_guess_method == 1) fprintf(stderr, "Mathew's constant perturbations\n");
        if (initial_guess_method == 2) fprintf(stderr, "Perturbations proportional to q-p\n");

        curr_epsilon = (double *) space(sizeof(double)*(length+1));
        best_epsilon = (double *) space(sizeof(double)*(length+1));

        last_non_nan_lnQ = min_en;

        // Calculate p_unpaired for unperturbed state which we need later
        // for the proportinal method
        if (initial_guess_method == 2) {

            init_pf_fold(length);

            for (i=0; i <= length; i++) {
                epsilon[i] = 0.0;
            }

            pf_fold_pb(string, NULL);
            for (i = 1; i < length; i++) {
                for (j = i+1; j<= length; j++) {
                    p_pp[i][j]=p_pp[j][i]=pr[iindx[i]-j];
                }
            }
            get_pair_prob_vector(p_pp, p_unpaired_tmp, length, 1);
            free_pf_arrays();
        }

        /* We do the same grid search as in the Mathews paper Fig. 4*/
        for (m=0.25; m <=7.0; m+=0.25) {

            // Weird way of writing this inner loop for the grid search. We
            // traverse the grid without big jumps in the parameters to make
            // sure that the updated scaling factor is accurate all the time.
            inc*=-1;

            for (b = inc < 0.0 ? 0.0 : -3.0; inc < 0.0 ? b >= -3.0 : b<= 0.0 ; b+=inc) {

                // calculate cut point with x-axis and skip parameter pairs
                // which give a cut point outside the range of
                // q_unpaired (0 to 1). They gave frequently overflows and the
                // idea is that we both want positive and negative perturbations
                cut = exp( (-1) * b / m ) - 1;

                fprintf(stderr, "\nm = %.2f, b = %.2f, cut=%.2f\n", m, b, cut);

                if (cut > 1.0 || cut < 0.01) {
                    fprintf(stderr, "\nSkipping m = %.2f, b = %.2f\n", m, b);
                    continue;
                }

                /* Mathew's constant perturbations */
                if (initial_guess_method == 1) {
                    for (i=0; i <= length; i++) {

                        /* We add epsilon to unpaired regions (as opposed to
                           paired regions as in the Mathews paper) so we multiply
                           by -1; if missing data we set it to 0.0 */

                        if (q_unpaired[i] < -0.5) {
                            curr_epsilon[i] = 0.0;
                        } else {
                            curr_epsilon[i] = (m *(log(q_unpaired[i]+1))+b) *(-1);
                        }

                        gsl_vector_set (minimizer_x, i, curr_epsilon[i]);
                    }
                    /* Perturbations proportional to q-p */
                } else {

                    for (i=0; i <= length; i++) {
                        curr_epsilon[i] = (m *(log(q_unpaired[i]+1)-log(p_unpaired_tmp[i]+1))+ b ) * (-1);
                        gsl_vector_set (minimizer_x, i, curr_epsilon[i]);
                    }
                }

                // Repeat and adjust scaling factor until we get result without over-/underflows
                do {

                    // First we use default scaling factor
                    if (pf_underflow == 0 && pf_overflow == 0) {
                        sfact = 1.070;
                    }

                    if (pf_underflow) {
                        sfact *= 0.8;
                        fprintf(stderr,"Underflow, adjusting sfact to %.4f\n", sfact );
                    }

                    if (pf_overflow) {
                        sfact *= 1.2;
                        fprintf(stderr,"Overflow, adjusting sfact to %.4f\n", sfact );
                    }

                    pf_scale = exp(-(sfact*last_non_nan_lnQ)/kT/length);

                    //fprintf(stderr,"Scaling factor is now: %.4e\n", pf_scale);

                    curr_D = calculate_f(minimizer_x, (void*)&minimizer_pars);

                    if (!isnan(last_lnQ)) last_non_nan_lnQ = last_lnQ;

                    // Give up when even extreme scaling does not give results
                    // (for some reason I could not get rid of overflows even with high scaling factors)
                    if (sfact < 0.1 || sfact > 2.0) break;

                } while (pf_underflow == 1 || pf_overflow == 1);

                // We have not given up so everything is ok now
                if (!(sfact < 0.1 || sfact > 2.0)) {

                    if (curr_D < min_D) {
                        min_D = curr_D;
                        for (i=0; i <= length; i++) {
                            best_epsilon[i] = curr_epsilon[i];
                        }
                        best_m = m;
                        best_b = b;
                        best_scale = pf_scale;
                    }

                    /*If we are interested in the grid search we misuse the
                      print_stats function and report m and b together with MEA*/
                    if (grid_search) {
                        for (i=0; i <= length; i++) {
                            epsilon[i] = curr_epsilon[i];
                        }
                        print_stats(statsfile, string, cstruc, length, 0, 0, m, 0.0, b, 0);
                    }

                    fprintf(stderr, "curr D: %.2f, minimum D: %.2f\n", curr_D, min_D);

                    // Adjust pf_scale with default scaling factor but lnQ from
                    // previous step
                    sfact = 1.070;
                    pf_scale = exp(-(sfact*last_lnQ)/kT/length);

                } else {
                    sfact = 1.070;
                    fprintf(stderr, "Skipping m = %.2f, b = %.2f; did not get stable result.\n", m, b);
                }
            } // for b
        } // for m

        fprintf(stderr, "Minimum found: m=%.2f, b=%.2f: %.2f\n", best_m, best_b, min_D);

        for (i=0; i <= length; i++) {
            epsilon[i] = best_epsilon[i];
            gsl_vector_set (minimizer_x, i, best_epsilon[i]);
        }
        pf_scale = best_scale;
    }

    if (initial_guess_method == 3) {
        srand((unsigned)time(0));
        for (i=0; i <= length; i++) {
            double r = (double)rand()/(double)RAND_MAX * 4 - 2;
            epsilon[i] = r;
            gsl_vector_set (minimizer_x, i, epsilon[i]);
        }
    }

    /* If we just wanted a grid search we are done now. */
    if (grid_search) {
        exit(0);
    }

    prev_D = calculate_f(minimizer_x, (void*)&minimizer_pars);

    print_stats(statsfile, string, cstruc, length, 0 , count_df_evaluations , prev_D, -1.0, 0.0,1);

    /* GSL minimization */

    if (method_id !=0) {

        if (method_id > 2) {
            char name[100];
            // Available algorithms
            //  3  gsl_multimin_fdfminimizer_conjugate_fr
            //  4  gsl_multimin_fdfminimizer_conjugate_pr
            //  5  gsl_multimin_fdfminimizer_vector_bfgs
            //  6  gsl_multimin_fdfminimizer_vector_bfgs2
            //  7  gsl_multimin_fdfminimizer_steepest_descent

            //   http://www.gnu.org/software/gsl/manual/html_node/Multimin-Algorithms-with-Derivatives.html

            switch (method_id) {
            case 2:
                minimizer = gsl_multimin_fdfminimizer_alloc (gsl_multimin_fdfminimizer_conjugate_fr, length+1);
                strcpy(name, "Fletcher-Reeves conjugate gradient");
                break;
            case 3:
                minimizer = gsl_multimin_fdfminimizer_alloc (gsl_multimin_fdfminimizer_conjugate_pr, length+1);
                strcpy(name, "Polak-Ribiere conjugate gradient");
                break;
            case 4:
                minimizer = gsl_multimin_fdfminimizer_alloc ( gsl_multimin_fdfminimizer_vector_bfgs, length+1);
                strcpy(name, "Broyden-Fletcher-Goldfarb-Shanno");
                break;
            case 5:
                minimizer = gsl_multimin_fdfminimizer_alloc ( gsl_multimin_fdfminimizer_vector_bfgs2, length+1);
                strcpy(name, "Broyden-Fletcher-Goldfarb-Shanno (improved version)");
                break;
            case 6:
                minimizer = gsl_multimin_fdfminimizer_alloc (gsl_multimin_fdfminimizer_steepest_descent, length+1);
                strcpy(name, "Gradient descent (GSL implmementation)");
                break;
            }

            fprintf(stderr, "Starting minimization via GSL implementation of %s...\n\n", name);

            // The last two parmeters are step size and tolerance (with
            // different meaning for different algorithms

            gsl_multimin_fdfminimizer_set (minimizer, &minimizer_func, minimizer_x, initial_step_size, tolerance);

            iteration = 1;

            do {

                status = gsl_multimin_fdfminimizer_iterate (minimizer);
                D = minimizer->f;
                norm = gsl_blas_dnrm2(minimizer->gradient);

                print_stats(statsfile, string, cstruc, length,iteration, count_df_evaluations, D, prev_D, norm, iteration%sparsePS == 0);

                prev_D = D;

                if (status) {
                    fprintf(stderr, "An unexpected error has occured in the iteration (status:%i)\n", status);
                    break;
                }

                status = gsl_multimin_test_gradient (minimizer->gradient, precision);
                if (status == GSL_SUCCESS) fprintf(stderr, "Minimum found stopping.\n");

                iteration++;

            } while (status == GSL_CONTINUE && iteration < max_iteration);

            gsl_multimin_fdfminimizer_free (minimizer);
            gsl_vector_free (minimizer_x);

            /* Custom implementation of steepest descent */
        } else {

            if (method_id == 1) {
                fprintf(stderr, "Starting custom implemented steepest descent search...\n\n");
            } else {
                fprintf(stderr, "Starting custom implemented steepest descent search with Barzilai Borwein step size...\n\n");
            }

            iteration = 0;
            D = 0.0;

            while (iteration++ < max_iteration) {

                for (i=1; i <= length; i++) {
                    gsl_vector_set (minimizer_x, i, epsilon[i]);
                }

                D = calculate_f(minimizer_x, (void*)&minimizer_pars);

                if (numerical) {
                    calculate_df_numerically(minimizer_x, (void*)&minimizer_pars, minimizer_g);
                } else {
                    calculate_df(minimizer_x, (void*)&minimizer_pars, minimizer_g);
                }

                for (i=1; i <= length; i++) {
                    gradient[i] = gsl_vector_get (minimizer_g, i);
                }

                // Do line search

                fprintf(stderr, "\nLine search:\n");

                // After the first iteration, use Barzilai-Borwain (1988) step size (currently turned off)
                if (iteration>1 && method_id==2) {

                    double denominator=0.0;
                    double numerator=0.0;

                    for (i=1; i <= length; i++) {
                        numerator += (epsilon[i]-prev_epsilon[i]) * (gradient[i]-prev_gradient[i]);
                        denominator+=(gradient[i]-prev_gradient[i]) * (gradient[i]-prev_gradient[i]);
                    }

                    step_size = numerator / denominator;

                    norm =1.0;
                } else {
                    // Use step sized given by the user (normalize it first)
                    step_size = initial_step_size / calculate_norm(gradient, length);
                }

                for (i=1; i <= length; i++) {
                    prev_epsilon[i] = epsilon[i];
                    prev_gradient[i] = gradient[i];
                }

                do {

                    for (mu=1; mu <= length; mu++) {
                        epsilon[mu] = prev_epsilon[mu] - step_size * gradient[mu];
                    }

                    for (i=1; i <= length; i++) {
                        gsl_vector_set (minimizer_x, i, epsilon[i]);
                    }

                    DD = calculate_f(minimizer_x, (void*)&minimizer_pars);

                    if (step_size > 0.0001) {
                        fprintf(stderr, "Old D: %.4f; New D: %.4f; Step size: %.4f\n", D, DD, step_size);
                    } else {
                        fprintf(stderr, "Old D: %.4f; New D: %.4f; Step size: %.4e\n", D, DD, step_size);
                    }

                    step_size /= 2;
                } while (step_size > 1e-12 && DD > D);

                norm = calculate_norm(gradient,length);

                if (DD > D) {
                    fprintf(stderr, "Line search did not improve D in iteration %i. Stop.\n", iteration);

                    if (hybrid_conditionals) {
                        sample_conditionals=0;
                    } else {
                        break;
                    }
                }

                print_stats(statsfile, string, cstruc, length,iteration, count_df_evaluations, DD, prev_D, norm, iteration%sparsePS == 0);

                if (norm<precision && iteration>1) {
                    fprintf(stderr, "Minimum found stopping.\n");
                    break;
                }

                prev_D = DD;

            }
        }

        /* Force last dotplot to be printed */
        print_stats(statsfile, string, cstruc, length,iteration, count_df_evaluations, DD, prev_D, norm, 1);
    }

    free(pf_struc);
    if (cstruc!=NULL) free(cstruc);
    (void) fflush(stdout);
    free(string);
    free(structure);
    RNAfold_cmdline_parser_free (&args_info);


    return 0;
}
Exemplo n.º 5
0
/*--------------------------------------------------------------------------*/
int main(int argc, char *argv[]){
  struct        RNAplfold_args_info args_info;
  unsigned int  error = 0;
  char          fname[FILENAME_MAX_LENGTH], ffname[FILENAME_MAX_LENGTH], *c, *structure, *ParamFile, *ns_bases, *rec_sequence, *rec_id, **rec_rest, *orig_sequence;
  unsigned int  input_type;
  int           i, length, l, sym, r, istty, winsize, pairdist;
  float         cutoff;
  int           tempwin, temppair, tempunpaired;
  FILE          *pUfp = NULL, *spup = NULL;
  double        **pup = NULL; /*prob of being unpaired, lengthwise*/
  int           noconv, plexoutput, simply_putout, openenergies, binaries;
  plist         *pl, *dpp = NULL;
  unsigned int  rec_type, read_opt;
  double        betaScale;
  pf_paramT     *pf_parameters;
  model_detailsT  md;

  dangles       = 2;
  cutoff        = 0.01;
  winsize       = 70;
  pairdist      = 0;
  unpaired      = 0;
  betaScale     = 1.;
  simply_putout = plexoutput = openenergies = noconv = 0;binaries=0;
  tempwin       = temppair = tempunpaired = 0;
  structure     = ParamFile = ns_bases = NULL;
  rec_type      = read_opt = 0;
  rec_id        = rec_sequence = orig_sequence = NULL;
  rec_rest      = NULL;
  pf_parameters = NULL;

  set_model_details(&md);

  /*
  #############################################
  # check the command line parameters
  #############################################
  */
  if(RNAplfold_cmdline_parser (argc, argv, &args_info) != 0) exit(1);
  /* temperature */
  if(args_info.temp_given)              temperature = args_info.temp_arg;
  /* do not take special tetra loop energies into account */
  if(args_info.noTetra_given)           md.special_hp = tetra_loop=0;
  /* set dangle model */
  if(args_info.dangles_given){
    if((args_info.dangles_arg != 0) && (args_info.dangles_arg != 2))
      warn_user("required dangle model not implemented, falling back to default dangles=2");
    else
      md.dangles = dangles = args_info.dangles_arg;
  }
  /* do not allow weak pairs */
  if(args_info.noLP_given)              md.noLP = noLonelyPairs = 1;
  /* do not allow wobble pairs (GU) */
  if(args_info.noGU_given)              md.noGU = noGU = 1;
  /* do not allow weak closing pairs (AU,GU) */
  if(args_info.noClosingGU_given)       md.noGUclosure = no_closingGU = 1;
  /* do not convert DNA nucleotide "T" to appropriate RNA "U" */
  if(args_info.noconv_given)            noconv = 1;
  /* set energy model */
  if(args_info.energyModel_given)       energy_set = args_info.energyModel_arg;
  /* take another energy parameter set */
  if(args_info.paramFile_given)         ParamFile = strdup(args_info.paramFile_arg);
  /* Allow other pairs in addition to the usual AU,GC,and GU pairs */
  if(args_info.nsp_given)               ns_bases = strdup(args_info.nsp_arg);
  /* set the maximum base pair span */
  if(args_info.span_given)              pairdist = args_info.span_arg;
  /* set the pair probability cutoff */
  if(args_info.cutoff_given)            cutoff = args_info.cutoff_arg;
  /* set the windowsize */
  if(args_info.winsize_given)           winsize = args_info.winsize_arg;
  /* set the length of unstructured region */
  if(args_info.ulength_given)           unpaired = args_info.ulength_arg;
  /* compute opening energies */
  if(args_info.opening_energies_given)  openenergies = 1;
  /* print output on the fly */
  if(args_info.print_onthefly_given)    simply_putout = 1;
  /* turn on RNAplex output */
  if(args_info.plex_output_given)       plexoutput = 1;
  /* turn on binary output*/
  if(args_info.binaries_given)          binaries = 1;

  if(args_info.betaScale_given)         betaScale = args_info.betaScale_arg;
    
  /* check for errorneous parameter options */
  if((pairdist < 0) || (cutoff < 0.) || (unpaired < 0) || (winsize < 0)){
    RNAplfold_cmdline_parser_print_help();
    exit(EXIT_FAILURE);
  }

  /* free allocated memory of command line data structure */
  RNAplfold_cmdline_parser_free(&args_info);

  /*
  #############################################
  # begin initializing
  #############################################
  */
  if (ParamFile != NULL)
    read_parameter_file(ParamFile);

  if (ns_bases != NULL) {
    nonstandards = space(33);
    c=ns_bases;
    i=sym=0;
    if (*c=='-') {
      sym=1; c++;
    }
    while (*c!='\0') {
      if (*c!=',') {
        nonstandards[i++]=*c++;
        nonstandards[i++]=*c;
        if ((sym)&&(*c!=*(c-1))) {
          nonstandards[i++]=*c;
          nonstandards[i++]=*(c-1);
        }
      }
      c++;
    }
  }

  /* check parameter options again and reset to reasonable values if needed */
  if(openenergies && !unpaired) unpaired  = 31;
  if(pairdist == 0)             pairdist  = winsize;
  if(pairdist > winsize){
    fprintf(stderr, "pairdist (-L %d) should be <= winsize (-W %d);"
            "Setting pairdist=winsize\n",pairdist, winsize);
    pairdist = winsize;
  }
  if(dangles % 2){
    warn_user("using default dangles = 2");
    dangles = 2;
  }

  istty = isatty(fileno(stdout))&&isatty(fileno(stdin));
  read_opt |= VRNA_INPUT_NO_REST;
  if(istty){
    print_tty_input_seq();
    read_opt |= VRNA_INPUT_NOSKIP_BLANK_LINES;
  }

  /*
  #############################################
  # main loop: continue until end of file
  #############################################
  */
  while(
    !((rec_type = read_record(&rec_id, &rec_sequence, &rec_rest, read_opt))
        & (VRNA_INPUT_ERROR | VRNA_INPUT_QUIT))){

    /*
    ########################################################
    # init everything according to the data we've read
    ########################################################
    */
    if(rec_id){
      if(!istty) printf("%s\n", rec_id);
      (void) sscanf(rec_id, ">%" XSTR(FILENAME_ID_LENGTH) "s", fname);
    }
    else fname[0] = '\0';

    length    = (int)strlen(rec_sequence);
    structure = (char *) space((unsigned) length+1);

    /* convert DNA alphabet to RNA if not explicitely switched off */
    if(!noconv) str_DNA2RNA(rec_sequence);
    /* store case-unmodified sequence */
    orig_sequence = strdup(rec_sequence);
    /* convert sequence to uppercase letters only */
    str_uppercase(rec_sequence);

    if(istty) printf("length = %d\n", length);

    /*
    ########################################################
    # done with 'stdin' handling
    ########################################################
    */

    if(length > 1000000){
      if(!simply_putout && !unpaired){
        printf("Switched to simple output mode!!!\n");
        simply_putout = 1;
      }
    }
    if(unpaired && simply_putout){
      printf("Output simplification not possible if unpaired is switched on\n");
      simply_putout = 0;
    }

    /* restore winsize if altered before */
    if(tempwin != 0){
      winsize = tempwin;
      tempwin = 0;
    }
    /* restore pairdist if altered before */
    if(temppair != 0){
      pairdist = temppair;
      temppair = 0;
    }
    /* restore ulength if altered before */
    if(tempunpaired != 0){
      unpaired      = tempunpaired;
      tempunpaired  = 0;
    }

    /* adjust winsize, pairdist and ulength if necessary */
    if(length < winsize){
      fprintf(stderr, "WARN: window size %d larger than sequence length %d\n", winsize, length);
      tempwin = winsize;
      winsize = length;
      if (pairdist>winsize) {
        temppair=pairdist;
        pairdist=winsize;
      }
      if (unpaired>winsize) {
        tempunpaired=unpaired;
        unpaired=winsize;
      }
    }

    /*
    ########################################################
    # begin actual computations
    ########################################################
    */

    if (length >= 5){
      /* construct output file names */
      char fname1[FILENAME_MAX_LENGTH], fname2[FILENAME_MAX_LENGTH], fname3[FILENAME_MAX_LENGTH], fname4[FILENAME_MAX_LENGTH], fname_t[FILENAME_MAX_LENGTH];

      strcpy(fname_t, (fname[0] != '\0') ? fname : "plfold");

      strcpy(fname1, fname_t);
      strcpy(fname2, fname_t);
      strcpy(fname3, fname_t);
      strcpy(fname4, fname_t);
      strcpy(ffname, fname_t);

      strcat(fname1, "_lunp");
      strcat(fname2, "_basepairs");
      strcat(fname3, "_uplex");
      if(binaries){
        strcat(fname4, "_openen_bin");
      }
      else{
        strcat(fname4, "_openen");
      }
      strcat(ffname, "_dp.ps");

      pf_parameters = get_boltzmann_factors(temperature, betaScale, md, -1);

      if(unpaired > 0){
        pup       =(double **)  space((length+1)*sizeof(double *));
        pup[0]    =(double *)   space(sizeof(double)); /*I only need entry 0*/
        pup[0][0] = unpaired;
      }

      pUfp = spup = NULL;
      if(simply_putout){
        spup = fopen(fname2, "w");
        pUfp = (unpaired > 0) ? fopen(fname1, "w") : NULL;

        pl = pfl_fold_par(rec_sequence, winsize, pairdist, cutoff, pup, &dpp, pUfp, spup, pf_parameters);

        if(pUfp != NULL)  fclose(pUfp);
        if(spup != NULL)  fclose(spup);
      }
      else{
        pl = pfl_fold_par(rec_sequence, winsize, pairdist, cutoff, pup, &dpp, pUfp, spup, pf_parameters);
        PS_dot_plot_turn(orig_sequence, pl, ffname, pairdist);
        if (unpaired > 0){
          if(plexoutput){
            pUfp = fopen(fname3, "w");
            putoutphakim_u(pup,length, unpaired, pUfp);
            fclose(pUfp);
          }
          pUfp = fopen(openenergies ? fname4 : fname1, "w");
          if(binaries){
            putoutpU_prob_bin_par(pup, length, unpaired, pUfp, openenergies, pf_parameters);
          }
          else{
            putoutpU_prob_par(pup, length, unpaired, pUfp, openenergies, pf_parameters);
          }
          fclose(pUfp);
        }
      }
      if(pl) free(pl);
      if(unpaired > 0){
        free(pup[0]);
        free(pup);
      }

      free(pf_parameters);
    }
    (void) fflush(stdout);

    /* clean up */
    if(rec_id) free(rec_id);
    free(rec_sequence);
    free(orig_sequence);
    free(structure);
    rec_id = rec_sequence = orig_sequence = NULL;
    rec_rest = NULL;
    /* print user help for the next round if we get input from tty */

    if(istty) print_tty_input_seq();
  }
  return EXIT_SUCCESS;
}
Exemplo n.º 6
0
int main(int argc, char *argv[]){
  struct        RNAsubopt_args_info args_info;
  unsigned int  input_type;
  char          fname[80], *cstruc, *sequence, *c, *input_string;
  char          *structure = NULL, *ParamFile = NULL, *ns_bases = NULL;
  int           i, length, l, sym, istty;
  double        deltaf, deltap;
  int           delta, n_back, noconv, circular, dos, zuker;

  do_backtrack  = 1;
  dangles       = 2;
  delta         = 100;
  deltap = n_back = noconv = circular = dos = zuker = 0;
  /*
  #############################################
  # check the command line parameters
  #############################################
  */
  if(RNAsubopt_cmdline_parser (argc, argv, &args_info) != 0) exit(1);
  /* temperature */
  if(args_info.temp_given)        temperature = args_info.temp_arg;
  /* structure constraint */
  if(args_info.constraint_given)  fold_constrained=1;
  /* do not take special tetra loop energies into account */
  if(args_info.noTetra_given)     tetra_loop=0;
  /* set dangle model */
  if(args_info.dangles_given)     dangles = args_info.dangles_arg;
  /* do not allow weak pairs */
  if(args_info.noLP_given)        noLonelyPairs = 1;
  /* do not allow wobble pairs (GU) */
  if(args_info.noGU_given)        noGU = 1;
  /* do not allow weak closing pairs (AU,GU) */
  if(args_info.noClosingGU_given) no_closingGU = 1;
  /* do not convert DNA nucleotide "T" to appropriate RNA "U" */
  if(args_info.noconv_given)      noconv = 1;
  /* take another energy parameter set */
  if(args_info.paramFile_given)   ParamFile = strdup(args_info.paramFile_arg);
  /* Allow other pairs in addition to the usual AU,GC,and GU pairs */
  if(args_info.nsp_given)         ns_bases = strdup(args_info.nsp_arg);
  /* energy range */
  if(args_info.deltaEnergy_given) delta = (int) (0.1+args_info.deltaEnergy_arg*100);
  /* energy range after post evaluation */
  if(args_info.deltaEnergyPost_given) deltap = args_info.deltaEnergyPost_arg;
  /* sorted output */
  if(args_info.sorted_given)      subopt_sorted = 1;
  /* assume RNA sequence to be circular */
  if(args_info.circ_given)        circular=1;
  /* stochastic backtracking */
  if(args_info.stochBT_given){
    n_back = args_info.stochBT_arg;
    init_rand();
  }
  /* density of states */
  if(args_info.dos_given){
    dos = 1;
    print_energy = -999999;
  }
  /* logarithmic multiloop energies */
  if(args_info.logML_given) logML = 1;
  /* zuker subopts */
  if(args_info.zuker_given) zuker = 1;

  if(zuker){
    if(circular){
      warn_user("Sorry, zuker subopts not yet implemented for circfold");
      RNAsubopt_cmdline_parser_print_help();
      exit(1);
    }
    else if(n_back>0){
      warn_user("Can't do zuker subopts and stochastic subopts at the same time");
      RNAsubopt_cmdline_parser_print_help();
      exit(1);
    }
  }

  /* free allocated memory of command line data structure */
  RNAsubopt_cmdline_parser_free(&args_info);

  /*
  #############################################
  # begin initializing
  #############################################
  */

  if (ParamFile != NULL) read_parameter_file(ParamFile);

  if (ns_bases != NULL) {
    nonstandards = space(33);
    c=ns_bases;
    i=sym=0;
    if (*c=='-') {
      sym=1; c++;
    }
    while (*c!='\0') {
      if (*c!=',') {
        nonstandards[i++]=*c++;
        nonstandards[i++]=*c;
        if ((sym)&&(*c!=*(c-1))) {
          nonstandards[i++]=*c;
          nonstandards[i++]=*(c-1);
        }
      }
      c++;
    }
  }

  istty = isatty(fileno(stdout))&&isatty(fileno(stdin));

  if(fold_constrained && istty) print_tty_constraint(VRNA_CONSTRAINT_DOT | VRNA_CONSTRAINT_X);


  /*
  #############################################
  # main loop: continue until end of file
  #############################################
  */
  do {
    cut_point = -1;
    /*
    ########################################################
    # handle user input from 'stdin'
    ########################################################
    */
    if(istty){ 
      if (!zuker)
        printf("Use '&' to connect 2 sequences that shall form a complex.\n");
      print_tty_input_seq();
    }
    /* extract filename from fasta header if available */
    fname[0] = '\0';
    while((input_type = get_input_line(&input_string, 0)) == VRNA_INPUT_FASTA_HEADER){
      printf(">%s\n", input_string);
      (void) sscanf(input_string, "%42s", fname);
      free(input_string);
    }

    /* break on any error, EOF or quit request */
    if(input_type & (VRNA_INPUT_QUIT | VRNA_INPUT_ERROR)){ break;}
    /* else assume a proper sequence of letters of a certain alphabet (RNA, DNA, etc.) */
    else{
      sequence  = tokenize(input_string); /* frees input_string */
      length    = (int) strlen(sequence);
    }
    structure = (char *) space((unsigned) length+1);

    if(noconv)  str_RNA2RNA(sequence);
    else        str_DNA2RNA(sequence);

    if(istty){
      if (cut_point == -1)
        printf("length = %d\n", length);
      else
        printf("length1 = %d\nlength2 = %d\n", cut_point-1, length-cut_point+1);
    }

    /* get structure constraint or break if necessary, entering an empty line results in a warning */
    if (fold_constrained) {
      input_type = get_input_line(&input_string, VRNA_INPUT_NOSKIP_COMMENTS);
      if(input_type & VRNA_INPUT_QUIT){ break;}
      else if((input_type & VRNA_INPUT_MISC) && (strlen(input_string) > 0)){
        cstruc = tokenize(input_string);
        strncpy(structure, cstruc, length);
        for (i=0; i<length; i++)
          if (structure[i]=='|')
            nrerror("constraints of type '|' not allowed");
        free(cstruc);
      }
      else warn_user("constraints missing");
    }
    /*
    ########################################################
    # done with 'stdin' handling, now init everything properly
    ########################################################
    */

    if((logML != 0 || dangles==1 || dangles==3) && dos == 0)
      if(deltap<=0) deltap = delta/100. + 0.001;
    if (deltap>0)
      print_energy = deltap;

    /* first lines of output (suitable  for sort +1n) */
    if (fname[0] != '\0')
      printf("> %s [%d]\n", fname, delta);

    /* stochastic backtracking */
    if(n_back>0){
      double mfe, kT;
      char *ss;
      st_back=1;
      ss = (char *) space(strlen(sequence)+1);
      strncpy(ss, structure, length);
      mfe = fold(sequence, ss);
      kT = (temperature+273.15)*1.98717/1000.; /* in Kcal */
      pf_scale = exp(-(1.03*mfe)/kT/length);
      strncpy(ss, structure, length);
      /* ignore return value, we are not interested in the free energy */
      (circular) ? (void) pf_circ_fold(sequence, ss) : (void) pf_fold(sequence, ss);
      free(ss);
      for (i=0; i<n_back; i++) {
        char *s;
        s =(circular) ? pbacktrack_circ(sequence) : pbacktrack(sequence);
        printf("%s\n", s);
        free(s);
      }
      free_pf_arrays();
    }
    /* normal subopt */
    else if(!zuker){
      (circular) ? subopt_circ(sequence, structure, delta, stdout) : subopt(sequence, structure, delta, stdout);
      if (dos) {
        int i;
        for (i=0; i<= MAXDOS && i<=delta/10; i++) {
          printf("%4d %6d\n", i, density_of_states[i]);
        }
      }
    }
    /* Zuker suboptimals */
    else{
      SOLUTION *zr;
      int i;
      if (cut_point!=-1) {
        nrerror("Sorry, zuker subopts not yet implemented for cofold\n");
      }
      zr = zukersubopt(sequence);
      putoutzuker(zr);
      (void)fflush(stdout);
      for (i=0; zr[i].structure; i++) {
        free(zr[i].structure);
      }
      free(zr);
    }
    (void)fflush(stdout);
    free(sequence);
    free(structure);
  } while (1);
  return 0;
}
Exemplo n.º 7
0
int main(int argc, char *argv[]){
  struct  RNAheat_args_info args_info;
  char                      *string, *input_string, *ns_bases, *c, *ParamFile, *rec_sequence, *rec_id, **rec_rest, *orig_sequence;
  int                       i, length, l, sym;
  float                     T_min, T_max, h;
  int                       mpoints, istty, noconv = 0;
  unsigned int              input_type;
  unsigned int              rec_type, read_opt;

  string    = ParamFile = ns_bases = NULL;
  T_min     = 0.;
  T_max     = 100.;
  h         = 1;
  mpoints   = 2;
  dangles   = 2;   /* dangles can be 0 (no dangles) or 2, default is 2 */
  rec_type  = read_opt = 0;
  rec_id    = rec_sequence = orig_sequence = NULL;
  rec_rest  = NULL;

  /*
  #############################################
  # check the command line parameters
  #############################################
  */
  if(RNAheat_cmdline_parser(argc, argv, &args_info) != 0) exit(1);

  /* do not take special tetra loop energies into account */
  if(args_info.noTetra_given)     tetra_loop=0;
  /* set dangle model */
  if(args_info.dangles_given){
    if((args_info.dangles_arg != 0) && (args_info.dangles_arg != 2))
      warn_user("required dangle model not implemented, falling back to default dangles=2");
    else
      dangles = args_info.dangles_arg;
  }
  /* do not allow weak pairs */
  if(args_info.noLP_given)        noLonelyPairs = 1;
  /* do not allow wobble pairs (GU) */
  if(args_info.noGU_given)        noGU = 1;
  /* do not allow weak closing pairs (AU,GU) */
  if(args_info.noClosingGU_given) no_closingGU = 1;
  /* do not convert DNA nucleotide "T" to appropriate RNA "U" */
  if(args_info.noconv_given)      noconv = 1;
  /* set energy model */
  if(args_info.energyModel_given) energy_set = args_info.energyModel_arg;
  /* take another energy parameter set */
  if(args_info.paramFile_given)   ParamFile = strdup(args_info.paramFile_arg);
  /* Allow other pairs in addition to the usual AU,GC,and GU pairs */
  if(args_info.nsp_given)         ns_bases = strdup(args_info.nsp_arg);
  /* Tmin */
  if(args_info.Tmin_given)        T_min = args_info.Tmin_arg;
  /* Tmax */
  if(args_info.Tmax_given)        T_max = args_info.Tmax_arg;
  /* step size */
  if(args_info.stepsize_given)    h = args_info.stepsize_arg;
  /* ipoints */
  if(args_info.ipoints_given){
    mpoints = args_info.ipoints_arg;
    if (mpoints < 1)    mpoints = 1;
    if (mpoints > 100)  mpoints = 100;
  }

  /* free allocated memory of command line data structure */
  RNAheat_cmdline_parser_free (&args_info);

  /*
  #############################################
  # begin initializing
  #############################################
  */
  if (ParamFile!=NULL) read_parameter_file(ParamFile);

  if (ns_bases != NULL) {
    nonstandards = space(33);
    c=ns_bases;
    i=sym=0;
    if (*c=='-') {
      sym=1; c++;
    }
    while (*c!='\0') {
      if (*c!=',') {
        nonstandards[i++]=*c++;
        nonstandards[i++]=*c;
        if ((sym)&&(*c!=*(c-1))) {
          nonstandards[i++]=*c;
          nonstandards[i++]=*(c-1);
        }
      }
      c++;
    }
  }

  istty = isatty(fileno(stdout))&&isatty(fileno(stdin));

  read_opt |= VRNA_INPUT_NO_REST;
  if(istty){
    print_tty_input_seq();
    read_opt |= VRNA_INPUT_NOSKIP_BLANK_LINES;
  }

  /*
  #############################################
  # main loop: continue until end of file
  #############################################
  */
  while(
    !((rec_type = read_record(&rec_id, &rec_sequence, &rec_rest, read_opt))
        & (VRNA_INPUT_ERROR | VRNA_INPUT_QUIT))){
    /*
    ########################################################
    # init everything according to the data we've read
    ########################################################
    */
    if(rec_id && !istty) printf("%s\n", rec_id);

    length = (int)strlen(rec_sequence);

    /* convert DNA alphabet to RNA if not explicitely switched off */
    if(!noconv) str_DNA2RNA(rec_sequence);
    /* store case-unmodified sequence */
    orig_sequence = strdup(rec_sequence);
    /* convert sequence to uppercase letters only */
    str_uppercase(rec_sequence);

    if(istty) printf("length = %d\n", length);
    /*
    ########################################################
    # done with 'stdin' handling
    ########################################################
    */

    heat_capacity(rec_sequence, T_min, T_max, h, mpoints);
    (void) fflush(stdout);

    /* clean up */
    if(rec_id) free(rec_id);
    free(rec_sequence);
    free(orig_sequence);
    rec_id = rec_sequence = orig_sequence = NULL;
    rec_rest = NULL;
    /* print user help for the next round if we get input from tty */

    if(istty) print_tty_input_seq();
  }
  return EXIT_SUCCESS;
}
Exemplo n.º 8
0
int main(int argc, char *argv[]){
  struct  RNAheat_args_info args_info;
  char                      *string, *input_string, *ns_bases, *c, *ParamFile;
  int                       i, length, l, sym;
  float                     T_min, T_max, h;
  int                       mpoints, istty, noconv = 0;
  unsigned int              input_type;

  string = ParamFile = ns_bases = NULL;
  T_min    = 0.;
  T_max    = 100.;
  h        = 1;
  mpoints  = 2;
  dangles  = 2;   /* dangles can be 0 (no dangles) or 2, default is 2 */

  /*
  #############################################
  # check the command line parameters
  #############################################
  */
  if(RNAheat_cmdline_parser(argc, argv, &args_info) != 0) exit(1);

  /* do not take special tetra loop energies into account */
  if(args_info.noTetra_given)     tetra_loop=0;
  /* set dangle model */
  if(args_info.dangles_given){
    dangles = args_info.dangles_arg;
    if(dangles % 2){
      warn_user("using default dangles = 2");
      dangles = 2;
    }
  }
  /* do not allow weak pairs */
  if(args_info.noLP_given)        noLonelyPairs = 1;
  /* do not allow wobble pairs (GU) */
  if(args_info.noGU_given)        noGU = 1;
  /* do not allow weak closing pairs (AU,GU) */
  if(args_info.noClosingGU_given) no_closingGU = 1;
  /* do not convert DNA nucleotide "T" to appropriate RNA "U" */
  if(args_info.noconv_given)      noconv = 1;
  /* set energy model */
  if(args_info.energyModel_given) energy_set = args_info.energyModel_arg;
  /* take another energy parameter set */
  if(args_info.paramFile_given)   ParamFile = strdup(args_info.paramFile_arg);
  /* Allow other pairs in addition to the usual AU,GC,and GU pairs */
  if(args_info.nsp_given)         ns_bases = strdup(args_info.nsp_arg);
  /* Tmin */
  if(args_info.Tmin_given)        T_min = args_info.Tmin_arg;
  /* Tmax */
  if(args_info.Tmax_given)        T_max = args_info.Tmax_arg;
  /* step size */
  if(args_info.stepsize_given)    h = args_info.stepsize_arg;
  /* ipoints */
  if(args_info.ipoints_given){
    mpoints = args_info.ipoints_arg;
    if (mpoints < 1)    mpoints = 1;
    if (mpoints > 100)  mpoints = 100;
  }

  /* free allocated memory of command line data structure */
  RNAheat_cmdline_parser_free (&args_info);

  /*
  #############################################
  # begin initializing
  #############################################
  */
  if (ParamFile!=NULL) read_parameter_file(ParamFile);

  if (ns_bases != NULL) {
    nonstandards = space(33);
    c=ns_bases;
    i=sym=0;
    if (*c=='-') {
      sym=1; c++;
    }
    while (*c!='\0') {
      if (*c!=',') {
        nonstandards[i++]=*c++;
        nonstandards[i++]=*c;
        if ((sym)&&(*c!=*(c-1))) {
          nonstandards[i++]=*c;
          nonstandards[i++]=*(c-1);
        }
      }
      c++;
    }
  }

  istty = isatty(fileno(stdout))&&isatty(fileno(stdin));

  /*
  #############################################
  # main loop: continue until end of file
  #############################################
  */
  do {
    /*
    ########################################################
    # handle user input from 'stdin'
    ########################################################
    */
    if(istty) print_tty_input_seq();

    /* skip fasta headers and comments */
    while((input_type = get_input_line(&input_string, 0)) & VRNA_INPUT_FASTA_HEADER){
      printf(">%s\n", input_string);
      /* skip fasta headers and comments */
      free(input_string);
    }

    /* break on any error, EOF or quit request */
    if(input_type & (VRNA_INPUT_QUIT | VRNA_INPUT_ERROR)){ break;}
    /* else assume a proper sequence of letters of a certain alphabet (RNA, DNA, etc.) */
    else{
      string = strdup(input_string);
      length = (int) strlen(string);
      free(input_string);
    }

    if(noconv)  str_RNA2RNA(string);
    else        str_DNA2RNA(string);

    if (istty) printf("length = %d\n", length);

    heat_capacity(string, T_min, T_max, h, mpoints);
    free(string);
    (void) fflush(stdout);
  } while (1);
  return 0;
}
Exemplo n.º 9
0
int main(int argc, char *argv[]){
  struct  RNALfold_args_info  args_info;
  char                        *input_string, *c, *string, *structure, *ParamFile, *ns_bases;
  int                         i, length, l, sym, r, istty, noconv, maxdist;
  double                      energy, min_en;
  unsigned int                input_type;

  string = structure = ParamFile = ns_bases = NULL;
  do_backtrack  = 1;
  noconv        = 0;
  maxdist       = 150;

  /*
  #############################################
  # check the command line parameters
  #############################################
  */
  if(RNALfold_cmdline_parser (argc, argv, &args_info) != 0) exit(1);
  /* temperature */
  if(args_info.temp_given)        temperature = args_info.temp_arg;
  /* do not take special tetra loop energies into account */
  if(args_info.noTetra_given)     tetra_loop=0;
  /* set dangle model */
  if(args_info.dangles_given)     dangles = args_info.dangles_arg;
  /* do not allow weak pairs */
  if(args_info.noLP_given)        noLonelyPairs = 1;
  /* do not allow wobble pairs (GU) */
  if(args_info.noGU_given)        noGU = 1;
  /* do not allow weak closing pairs (AU,GU) */
  if(args_info.noClosingGU_given) no_closingGU = 1;
  /* do not convert DNA nucleotide "T" to appropriate RNA "U" */
  if(args_info.noconv_given)      noconv = 1;
  /* set energy model */
  if(args_info.energyModel_given) energy_set = args_info.energyModel_arg;
  /* take another energy parameter set */
  if(args_info.paramFile_given)   ParamFile = strdup(args_info.paramFile_arg);
  /* Allow other pairs in addition to the usual AU,GC,and GU pairs */
  if(args_info.nsp_given)         ns_bases = strdup(args_info.nsp_arg);
  /* set the maximum base pair span */
  if(args_info.span_given)        maxdist = args_info.span_arg;
  
  /* check for errorneous parameter options */
  if(maxdist < 0){
    RNALfold_cmdline_parser_print_help();
    exit(EXIT_FAILURE);
  }

  /* free allocated memory of command line data structure */
  RNALfold_cmdline_parser_free (&args_info);

  /*
  #############################################
  # begin initializing
  #############################################
  */
  if (ParamFile != NULL)
    read_parameter_file(ParamFile);
   
  if (ns_bases != NULL) {
    nonstandards = space(33);
    c=ns_bases;
    i=sym=0;
    if (*c=='-') {
      sym=1; c++;
    }
    while (*c!='\0') {
      if (*c!=',') {
        nonstandards[i++]=*c++;
        nonstandards[i++]=*c;
        if ((sym)&&(*c!=*(c-1))) {
          nonstandards[i++]=*c;
          nonstandards[i++]=*(c-1);
        }
      }
      c++;
    }
  }

  istty = isatty(fileno(stdout))&&isatty(fileno(stdin));
        
  /*
  #############################################
  # main loop: continue until end of file
  #############################################
  */
  do{
    /*
    ########################################################
    # handle user input from 'stdin'
    ########################################################
    */
    if(istty) print_tty_input_seq();

    /* skip fasta header and comment lines */
    while((input_type = get_input_line(&input_string, 0)) & VRNA_INPUT_FASTA_HEADER){
      printf(">%s\n", input_string);
      free(input_string);
    }

    /* break on any error, EOF or quit request */
    if(input_type & (VRNA_INPUT_QUIT | VRNA_INPUT_ERROR)){ break;}
    /* else assume a proper sequence of letters of a certain alphabet (RNA, DNA, etc.) */
    else{
      length = (int)    strlen(input_string);
      string = strdup(input_string);
      free(input_string);
    }
    printf("%s\n", string);

    structure = (char *) space((unsigned) length+1);

    if(noconv)  str_RNA2RNA(string);
    else        str_DNA2RNA(string);

    if(istty) printf("length = %d\n", length);
    /*
    ########################################################
    # done with 'stdin' handling
    ########################################################
    */

    /* initialize_fold(length); */
    update_fold_params();
    min_en = Lfold((const char *)string, structure, maxdist);
    printf("%s", structure);

    if (istty)
      printf("\n minimum free energy = %6.2f kcal/mol\n", min_en);
    else
      printf(" (%6.2f)\n", min_en);

    (void) fflush(stdout);
    free(string);
    free(structure); 
  } while (1);
  return 0;
}