示例#1
0
int main(int argc, char *argv[])
{
  struct        RNAcofold_args_info args_info;
  unsigned int  input_type;
  char          *string, *input_string;
  char    *structure, *cstruc, *rec_sequence, *orig_sequence, *rec_id, **rec_rest;
  char    fname[FILENAME_MAX_LENGTH], ffname[FILENAME_MAX_LENGTH];
  char    *ParamFile;
  char    *ns_bases, *c;
  char    *Concfile;
  int     i, length, l, sym, r, cl;
  double  min_en;
  double  kT, sfact, betaScale;
  int     pf, istty;
  int     noconv, noPS;
  int     doT;    /*compute dimere free energies etc.*/
  int     doC;    /*toggle to compute concentrations*/
  int     doQ;    /*toggle to compute prob of base being paired*/
  int     cofi;   /*toggle concentrations stdin / file*/
  plist   *prAB;
  plist   *prAA;   /*pair probabilities of AA dimer*/
  plist   *prBB;
  plist   *prA;
  plist   *prB;
  plist   *mfAB;
  plist   *mfAA;   /*pair mfobabilities of AA dimer*/
  plist   *mfBB;
  plist   *mfA;
  plist   *mfB;
  double  *ConcAandB;
  unsigned int    rec_type, read_opt;
  pf_paramT       *pf_parameters;
  model_detailsT  md;


  /*
  #############################################
  # init variables and parameter options
  #############################################
  */
  dangles       = 2;
  sfact         = 1.07;
  bppmThreshold = 1e-5;
  noconv        = 0;
  noPS          = 0;
  do_backtrack  = 1;
  pf            = 0;
  doT           = 0;
  doC           = 0;
  doQ           = 0;
  cofi          = 0;
  betaScale     = 1.;
  gquad         = 0;
  ParamFile     = NULL;
  pf_parameters = NULL;
  string        = NULL;
  Concfile      = NULL;
  structure     = NULL;
  cstruc        = NULL;
  ns_bases      = NULL;
  rec_type      = read_opt = 0;
  rec_id        = rec_sequence = orig_sequence = NULL;
  rec_rest      = NULL;

  set_model_details(&md);
  /*
  #############################################
  # check the command line prameters
  #############################################
  */
  if(RNAcofold_cmdline_parser (argc, argv, &args_info) != 0) exit(1);
  /* temperature */
  if(args_info.temp_given)            temperature = args_info.temp_arg;
  /* structure constraint */
  if(args_info.constraint_given)      fold_constrained=1;
  /* do not take special tetra loop energies into account */
  if(args_info.noTetra_given)         md.special_hp = tetra_loop=0;
  /* set dangle model */
  if(args_info.dangles_given){
    if((args_info.dangles_arg < 0) || (args_info.dangles_arg > 3))
      warn_user("required dangle model not implemented, falling back to default dangles=2");
    else
     md.dangles = dangles = args_info.dangles_arg;
  }
  /* do not allow weak pairs */
  if(args_info.noLP_given)            md.noLP = noLonelyPairs = 1;
  /* do not allow wobble pairs (GU) */
  if(args_info.noGU_given)            md.noGU = noGU = 1;
  /* do not allow weak closing pairs (AU,GU) */
  if(args_info.noClosingGU_given)     md.noGUclosure = no_closingGU = 1;
  /* gquadruplex support */
  if(args_info.gquad_given)           md.gquad = gquad = 1;
  /* enforce canonical base pairs in any case? */
  if(args_info.canonicalBPonly_given) md.canonicalBPonly = canonicalBPonly = 1;
  /* do not convert DNA nucleotide "T" to appropriate RNA "U" */
  if(args_info.noconv_given)          noconv = 1;
  /* set energy model */
  if(args_info.energyModel_given)     energy_set = args_info.energyModel_arg;
  /*  */
  if(args_info.noPS_given)            noPS = 1;
  /* take another energy parameter set */
  if(args_info.paramFile_given)       ParamFile = strdup(args_info.paramFile_arg);
  /* Allow other pairs in addition to the usual AU,GC,and GU pairs */
  if(args_info.nsp_given)             ns_bases = strdup(args_info.nsp_arg);
  /* set pf scaling factor */
  if(args_info.pfScale_given)         sfact = args_info.pfScale_arg;

  if(args_info.all_pf_given)          doT = pf = 1;
  /* concentrations from stdin */
  if(args_info.concentrations_given)  doC = doT = pf = 1;
  /* set the bppm threshold for the dotplot */
  if(args_info.bppmThreshold_given)
    bppmThreshold = MIN2(1., MAX2(0.,args_info.bppmThreshold_arg));
  /* concentrations in file */
  if(args_info.betaScale_given)       betaScale = args_info.betaScale_arg;
  if(args_info.concfile_given){
    Concfile = strdup(args_info.concfile_arg);
    doC = cofi = doT = pf = 1;
  }
  /* partition function settings */
  if(args_info.partfunc_given){
    pf = 1;
    if(args_info.partfunc_arg != -1)
      do_backtrack = args_info.partfunc_arg;
  }
  /* free allocated memory of command line data structure */
  RNAcofold_cmdline_parser_free (&args_info);


  /*
  #############################################
  # begin initializing
  #############################################
  */
  if(pf && gquad){
    nrerror("G-Quadruplex support is currently not available for partition function computations");
  }

  if (ParamFile != NULL)
    read_parameter_file(ParamFile);

  if (ns_bases != NULL) {
    nonstandards = space(33);
    c=ns_bases;
    i=sym=0;
    if (*c=='-') {
      sym=1; c++;
    }
    while (*c!='\0') {
      if (*c!=',') {
        nonstandards[i++]=*c++;
        nonstandards[i++]=*c;
        if ((sym)&&(*c!=*(c-1))) {
          nonstandards[i++]=*c;
          nonstandards[i++]=*(c-1);
        }
      }
      c++;
    }
  }
  istty = isatty(fileno(stdout))&&isatty(fileno(stdin));

  /* print user help if we get input from tty */
  if(istty){
    printf("Use '&' to connect 2 sequences that shall form a complex.\n");
    if(fold_constrained){
      print_tty_constraint(VRNA_CONSTRAINT_DOT | VRNA_CONSTRAINT_X | VRNA_CONSTRAINT_ANG_BRACK | VRNA_CONSTRAINT_RND_BRACK);
      print_tty_input_seq_str("Input sequence (upper or lower case) followed by structure constraint\n");
    }
    else print_tty_input_seq();
  }

  /* set options we wanna pass to read_record */
  if(istty)             read_opt |= VRNA_INPUT_NOSKIP_BLANK_LINES;
  if(!fold_constrained) read_opt |= VRNA_INPUT_NO_REST;

  /*
  #############################################
  # main loop: continue until end of file
  #############################################
  */
  while(
    !((rec_type = read_record(&rec_id, &rec_sequence, &rec_rest, read_opt))
        & (VRNA_INPUT_ERROR | VRNA_INPUT_QUIT))){

    /*
    ########################################################
    # init everything according to the data we've read
    ########################################################
    */
    if(rec_id){
      if(!istty) printf("%s\n", rec_id);
      (void) sscanf(rec_id, ">%" XSTR(FILENAME_ID_LENGTH) "s", fname);
    }
    else fname[0] = '\0';

    cut_point = -1;

    rec_sequence  = tokenize(rec_sequence); /* frees input_string and sets cut_point */
    length    = (int) strlen(rec_sequence);
    structure = (char *) space((unsigned) length+1);

    /* parse the rest of the current dataset to obtain a structure constraint */
    if(fold_constrained){
      cstruc = NULL;
      int cp = cut_point;
      unsigned int coptions = (rec_id) ? VRNA_CONSTRAINT_MULTILINE : 0;
      coptions |= VRNA_CONSTRAINT_DOT | VRNA_CONSTRAINT_X | VRNA_CONSTRAINT_ANG_BRACK | VRNA_CONSTRAINT_RND_BRACK;
      getConstraint(&cstruc, (const char **)rec_rest, coptions);
      cstruc = tokenize(cstruc);
      if(cut_point != cp) nrerror("cut point in sequence and structure constraint differs");
      cl = (cstruc) ? (int)strlen(cstruc) : 0;

      if(cl == 0)           warn_user("structure constraint is missing");
      else if(cl < length)  warn_user("structure constraint is shorter than sequence");
      else if(cl > length)  nrerror("structure constraint is too long");

      if(cstruc) strncpy(structure, cstruc, sizeof(char)*(cl+1));
    }

    /* convert DNA alphabet to RNA if not explicitely switched off */
    if(!noconv) str_DNA2RNA(rec_sequence);
    /* store case-unmodified sequence */
    orig_sequence = strdup(rec_sequence);
    /* convert sequence to uppercase letters only */
    str_uppercase(rec_sequence);

    if(istty){
      if (cut_point == -1)
        printf("length = %d\n", length);
      else
        printf("length1 = %d\nlength2 = %d\n", cut_point-1, length-cut_point+1);
    }

    /*
    ########################################################
    # begin actual computations
    ########################################################
    */

    if (doC) {
      FILE *fp;
      if (cofi) { /* read from file */
        fp = fopen(Concfile, "r");
        if (fp==NULL) {
          fprintf(stderr, "could not open concentration file %s", Concfile);
          nrerror("\n");
        }
        ConcAandB = read_concentrations(fp);
        fclose(fp);
      } else {
        printf("Please enter concentrations [mol/l]\n format: ConcA ConcB\n return to end\n");
        ConcAandB = read_concentrations(stdin);
      }
    }
    /*compute mfe of AB dimer*/
    min_en = cofold(rec_sequence, structure);
    assign_plist_from_db(&mfAB, structure, 0.95);

    {
      char *pstring, *pstruct;
      if (cut_point == -1) {
        pstring = strdup(orig_sequence);
        pstruct = strdup(structure);
      } else {
        pstring = costring(orig_sequence);
        pstruct = costring(structure);
      }
      printf("%s\n%s", pstring, pstruct);
      if (istty)
        printf("\n minimum free energy = %6.2f kcal/mol\n", min_en);
      else
        printf(" (%6.2f)\n", min_en);

      (void) fflush(stdout);

      if (!noPS) {
        char annot[512] = "";
        if (fname[0]!='\0') {
          strcpy(ffname, fname);
          strcat(ffname, "_ss.ps");
        } else {
          strcpy(ffname, "rna.ps");
        }
        if (cut_point >= 0)
          sprintf(annot,
                  "1 %d 9  0 0.9 0.2 omark\n%d %d 9  1 0.1 0.2 omark\n",
                  cut_point-1, cut_point+1, length+1);
        if(gquad){
          if (!noPS) (void) PS_rna_plot_a_gquad(pstring, pstruct, ffname, annot, NULL);
        } else {
          if (!noPS) (void) PS_rna_plot_a(pstring, pstruct, ffname, annot, NULL);
        }
      }
      free(pstring);
      free(pstruct);
    }

    if (length>2000)  free_co_arrays();

    /*compute partition function*/
    if (pf) {
      cofoldF AB, AA, BB;
      FLT_OR_DBL *probs;
      if (dangles==1) {
        dangles=2;   /* recompute with dangles as in pf_fold() */
        min_en = energy_of_structure(rec_sequence, structure, 0);
        dangles=1;
      }

      kT = (betaScale*((temperature+K0)*GASCONST))/1000.; /* in Kcal */
      pf_scale = exp(-(sfact*min_en)/kT/length);
      if (length>2000) fprintf(stderr, "scaling factor %f\n", pf_scale);

      pf_parameters = get_boltzmann_factors(temperature, betaScale, md, pf_scale);

      if (cstruc!=NULL)
        strncpy(structure, cstruc, length+1);
      AB = co_pf_fold_par(rec_sequence, structure, pf_parameters, do_backtrack, fold_constrained);

      if (do_backtrack) {
        char *costruc;
        costruc = (char *) space(sizeof(char)*(strlen(structure)+2));
        if (cut_point<0) printf("%s", structure);
        else {
          strncpy(costruc, structure, cut_point-1);
          strcat(costruc, "&");
          strcat(costruc, structure+cut_point-1);
          printf("%s", costruc);
        }
        if (!istty) printf(" [%6.2f]\n", AB.FAB);
        else printf("\n");/*8.6.04*/
      }
      if ((istty)||(!do_backtrack))
        printf(" free energy of ensemble = %6.2f kcal/mol\n", AB.FAB);
      printf(" frequency of mfe structure in ensemble %g",
             exp((AB.FAB-min_en)/kT));

      printf(" , delta G binding=%6.2f\n", AB.FcAB - AB.FA - AB.FB);

      probs = export_co_bppm();
      assign_plist_from_pr(&prAB, probs, length, bppmThreshold);

      /* if (doQ) make_probsum(length,fname); */ /*compute prob of base paired*/
      /* free_co_arrays(); */
      if (doT) { /* cofold of all dimers, monomers */
        int Blength, Alength;
        char  *Astring, *Bstring, *orig_Astring, *orig_Bstring;
        char *Newstring;
        char Newname[30];
        char comment[80];
        if (cut_point<0) {
          printf("Sorry, i cannot do that with only one molecule, please give me two or leave it\n");
          free(mfAB);
          free(prAB);
          continue;
        }
        if (dangles==1) dangles=2;
        Alength=cut_point-1;        /*length of first molecule*/
        Blength=length-cut_point+1; /*length of 2nd molecule*/

        Astring=(char *)space(sizeof(char)*(Alength+1));/*Sequence of first molecule*/
        Bstring=(char *)space(sizeof(char)*(Blength+1));/*Sequence of second molecule*/
        strncat(Astring,rec_sequence,Alength);
        strncat(Bstring,rec_sequence+Alength,Blength);

        orig_Astring=(char *)space(sizeof(char)*(Alength+1));/*Sequence of first molecule*/
        orig_Bstring=(char *)space(sizeof(char)*(Blength+1));/*Sequence of second molecule*/
        strncat(orig_Astring,orig_sequence,Alength);
        strncat(orig_Bstring,orig_sequence+Alength,Blength);

        /* compute AA dimer */
        AA=do_partfunc(Astring, Alength, 2, &prAA, &mfAA, pf_parameters);
        /* compute BB dimer */
        BB=do_partfunc(Bstring, Blength, 2, &prBB, &mfBB, pf_parameters);
        /*free_co_pf_arrays();*/

        /* compute A monomer */
        do_partfunc(Astring, Alength, 1, &prA, &mfA, pf_parameters);

        /* compute B monomer */
        do_partfunc(Bstring, Blength, 1, &prB, &mfB, pf_parameters);

        compute_probabilities(AB.F0AB, AB.FA, AB.FB, prAB, prA, prB, Alength);
        compute_probabilities(AA.F0AB, AA.FA, AA.FA, prAA, prA, prA, Alength);
        compute_probabilities(BB.F0AB, BB.FA, BB.FA, prBB, prA, prB, Blength);
        printf("Free Energies:\nAB\t\tAA\t\tBB\t\tA\t\tB\n%.6f\t%6f\t%6f\t%6f\t%6f\n",
               AB.FcAB, AA.FcAB, BB.FcAB, AB.FA, AB.FB);

        if (doC) {
          do_concentrations(AB.FcAB, AA.FcAB, BB.FcAB, AB.FA, AB.FB, ConcAandB);
          free(ConcAandB);/*freeen*/
        }

        if (fname[0]!='\0') {
          strcpy(ffname, fname);
          strcat(ffname, "_dp5.ps");
        } else strcpy(ffname, "dot5.ps");
        /*output of the 5 dot plots*/

        /*AB dot_plot*/
        /*write Free Energy into comment*/
        sprintf(comment,"\n%%Heterodimer AB FreeEnergy= %.9f\n", AB.FcAB);
        /*reset cut_point*/
        cut_point=Alength+1;
        /*write New name*/
        strcpy(Newname,"AB");
        strcat(Newname,ffname);
        (void)PS_dot_plot_list(orig_sequence, Newname, prAB, mfAB, comment);

        /*AA dot_plot*/
        sprintf(comment,"\n%%Homodimer AA FreeEnergy= %.9f\n",AA.FcAB);
        /*write New name*/
        strcpy(Newname,"AA");
        strcat(Newname,ffname);
        /*write AA sequence*/
        Newstring=(char*)space((2*Alength+1)*sizeof(char));
        strcpy(Newstring,orig_Astring);
        strcat(Newstring,orig_Astring);
        (void)PS_dot_plot_list(Newstring, Newname, prAA, mfAA, comment);
        free(Newstring);

        /*BB dot_plot*/
        sprintf(comment,"\n%%Homodimer BB FreeEnergy= %.9f\n",BB.FcAB);
        /*write New name*/
        strcpy(Newname,"BB");
        strcat(Newname,ffname);
        /*write BB sequence*/
        Newstring=(char*)space((2*Blength+1)*sizeof(char));
        strcpy(Newstring,orig_Bstring);
        strcat(Newstring,orig_Bstring);
        /*reset cut_point*/
        cut_point=Blength+1;
        (void)PS_dot_plot_list(Newstring, Newname, prBB, mfBB, comment);
        free(Newstring);

        /*A dot plot*/
        /*reset cut_point*/
        cut_point=-1;
        sprintf(comment,"\n%%Monomer A FreeEnergy= %.9f\n",AB.FA);
        /*write New name*/
        strcpy(Newname,"A");
        strcat(Newname,ffname);
        /*write BB sequence*/
        (void)PS_dot_plot_list(orig_Astring, Newname, prA, mfA, comment);

        /*B monomer dot plot*/
        sprintf(comment,"\n%%Monomer B FreeEnergy= %.9f\n",AB.FB);
        /*write New name*/
        strcpy(Newname,"B");
        strcat(Newname,ffname);
        /*write BB sequence*/
        (void)PS_dot_plot_list(orig_Bstring, Newname, prB, mfB, comment);
        free(Astring); free(Bstring); free(orig_Astring); free(orig_Bstring);
        free(prAB); free(prAA); free(prBB); free(prA); free(prB);
        free(mfAB); free(mfAA); free(mfBB); free(mfA); free(mfB);

      } /*end if(doT)*/

      free(pf_parameters);
    }/*end if(pf)*/


    if (do_backtrack) {
      if (fname[0]!='\0') {
        strcpy(ffname, fname);
        strcat(ffname, "_dp.ps");
      } else strcpy(ffname, "dot.ps");

      if (!doT) {
        if (pf) {          (void) PS_dot_plot_list(rec_sequence, ffname, prAB, mfAB, "doof");
        free(prAB);}
        free(mfAB);
      }
    }
    if (!doT) free_co_pf_arrays();

    (void) fflush(stdout);
    
    /* clean up */
    if(cstruc) free(cstruc);
    if(rec_id) free(rec_id);
    free(rec_sequence);
    free(orig_sequence);
    free(structure);
    /* free the rest of current dataset */
    if(rec_rest){
      for(i=0;rec_rest[i];i++) free(rec_rest[i]);
      free(rec_rest);
    }
    rec_id = rec_sequence = orig_sequence = structure = cstruc = NULL;
    rec_rest = NULL;

    /* print user help for the next round if we get input from tty */
    if(istty){
      printf("Use '&' to connect 2 sequences that shall form a complex.\n");
      if(fold_constrained){
        print_tty_constraint(VRNA_CONSTRAINT_DOT | VRNA_CONSTRAINT_X | VRNA_CONSTRAINT_ANG_BRACK | VRNA_CONSTRAINT_RND_BRACK);
        print_tty_input_seq_str("Input sequence (upper or lower case) followed by structure constraint\n");
      }
      else print_tty_input_seq();
    }
  }
  return EXIT_SUCCESS;
}
示例#2
0
int main(int argc, char *argv[]){
  struct        RNA2Dfold_args_info args_info;
  unsigned int  input_type;
  char *string, *input_string, *orig_sequence;
  char *mfe_structure=NULL, *structure1=NULL, *structure2=NULL, *reference_struc1=NULL, *reference_struc2=NULL;
  char  *ParamFile=NULL;
  int   i, j, length, l;
  double min_en;
  double kT, sfact=1.07;
  int   pf=0,istty;
  int noconv=0;
  int circ=0;
  int maxDistance1 = -1;
  int maxDistance2 = -1;
  int do_backtrack = 1;
  int stBT = 0;
  int nstBT = 0;
  string=NULL;
  dangles = 2;
  struct nbhoods *neighborhoods = NULL;
  struct nbhoods *neighborhoods_cur = NULL;

  string = input_string = orig_sequence = NULL;
  /*
  #############################################
  # check the command line prameters
  #############################################
  */
  if(RNA2Dfold_cmdline_parser (argc, argv, &args_info) != 0) exit(1);

  /* temperature */
  if(args_info.temp_given)
    temperature = args_info.temp_arg;

  /* max distance to 1st reference structure */
  if(args_info.maxDist1_given)
    maxDistance1 = args_info.maxDist1_arg;

  /* max distance to 2nd reference structure */
  if(args_info.maxDist2_given)
    maxDistance2 = args_info.maxDist2_arg;

  /* compute partition function and boltzmann probabilities */
  if(args_info.partfunc_given)
    pf = 1;

  /* do stachastic backtracking */
  if(args_info.stochBT_given){
    pf = 1;
    stBT = 1;
    nstBT = args_info.stochBT_arg;
  }

  if(args_info.noTetra_given)
    tetra_loop=0;

  /* assume RNA sequence to be circular */
  if(args_info.circ_given)
    circ=1;

  /* dangle options */
  if(args_info.dangles_given)
    dangles=args_info.dangles_arg;

  /* set number of threads for parallel computation */
  if(args_info.numThreads_given)
#ifdef _OPENMP
  omp_set_num_threads(args_info.numThreads_arg);
#else
  nrerror("\'j\' option is available only if compiled with OpenMP support!");
#endif

  /* get energy parameter file name */
  if(args_info.parameterFile_given)
    ParamFile = strdup(args_info.parameterFile_arg);

  /* do not allow GU pairs ? */
  if(args_info.noGU_given)
    noGU = 1;

  /* do not allow GU pairs at the end of helices? */
  if(args_info.noClosingGU_given)
    no_closingGU = 1;

  /* pf scaling factor */
  if(args_info.pfScale_given)
    sfact = args_info.pfScale_arg;

  /* do not backtrack structures ? */
  if(args_info.noBT_given)
    do_backtrack = 0;

  for (i = 0; i < args_info.neighborhood_given; i++){
    int kappa, lambda;
    kappa = lambda = 0;
    if(sscanf(args_info.neighborhood_arg[i], "%d:%d", &kappa, &lambda) == 2);
    if ((kappa>-2) && (lambda>-2)){
      if(neighborhoods_cur != NULL){
        neighborhoods_cur->next = (nbhoods *)space(sizeof(nbhoods));
        neighborhoods_cur = neighborhoods_cur->next;
      }
      else{
        neighborhoods = (nbhoods *)space(sizeof(nbhoods));
        neighborhoods_cur = neighborhoods;
      }
      neighborhoods_cur->k = kappa;
      neighborhoods_cur->l = lambda;
      neighborhoods_cur->next = NULL;
    }
  }
  /* free allocated memory of command line data structure */
  RNA2Dfold_cmdline_parser_free (&args_info);

  /*
  #############################################
  # begin actual program code
  #############################################
  */
  if (ParamFile != NULL)
    read_parameter_file(ParamFile);

  istty = isatty(fileno(stdout))&&isatty(fileno(stdin));

 /*
  #############################################
  # main loop, continue until end of file
  #############################################
  */
  do {
    if (istty)
      print_tty_input_seq_str("Input strings\n1st line: sequence (upper or lower case)\n2nd + 3rd line: reference structures (dot bracket notation)\n@ to quit\n");

    while((input_type = get_input_line(&input_string, 0)) & VRNA_INPUT_FASTA_HEADER){
      printf(">%s\n", input_string); /* print fasta header if available */
      free(input_string);
    }

    /* break on any error, EOF or quit request */
    if(input_type & (VRNA_INPUT_QUIT | VRNA_INPUT_ERROR)){ break;}
    /* else assume a proper sequence of letters of a certain alphabet (RNA, DNA, etc.) */
    else{
      length = (int)    strlen(input_string);
      string = strdup(input_string);
      free(input_string);
    }

    mfe_structure = (char *) space((unsigned) length+1);
    structure1 = (char *) space((unsigned) length+1);
    structure2 = (char *) space((unsigned) length+1);

    input_type = get_input_line(&input_string, VRNA_INPUT_NOSKIP_COMMENTS);
    if(input_type & VRNA_INPUT_QUIT){ break;}
    else if((input_type & VRNA_INPUT_MISC) && (strlen(input_string) > 0)){
      reference_struc1 = strdup(input_string);
      free(input_string);
      if(strlen(reference_struc1) != length)
        nrerror("sequence and 1st reference structure have unequal length");
    }
    else nrerror("1st reference structure missing\n");
    strncpy(structure1, reference_struc1, length);

    input_type = get_input_line(&input_string, VRNA_INPUT_NOSKIP_COMMENTS);
    if(input_type & VRNA_INPUT_QUIT){ break;}
    else if((input_type & VRNA_INPUT_MISC) && (strlen(input_string) > 0)){
      reference_struc2 = strdup(input_string);
      free(input_string);
      if(strlen(reference_struc2) != length)
        nrerror("sequence and 2nd reference structure have unequal length");
    }
    else nrerror("2nd reference structure missing\n");
    strncpy(structure2, reference_struc2, length);

    /* convert DNA alphabet to RNA if not explicitely switched off */
    if(!noconv) str_DNA2RNA(string);
    /* store case-unmodified sequence */
    orig_sequence = strdup(string);
    /* convert sequence to uppercase letters only */
    str_uppercase(string);

    if (istty)  printf("length = %d\n", length);

    min_en = (circ) ? circfold(string, mfe_structure) : fold(string, mfe_structure);

    printf("%s\n%s", orig_sequence, mfe_structure);

    if (istty)
      printf("\n minimum free energy = %6.2f kcal/mol\n", min_en);
    else
      printf(" (%6.2f)\n", min_en);

    printf("%s (%6.2f) <ref 1>\n", structure1, (circ) ? energy_of_circ_structure(string, structure1, 0) : energy_of_structure(string,structure1, 0));
    printf("%s (%6.2f) <ref 2>\n", structure2, (circ) ? energy_of_circ_structure(string, structure2, 0) : energy_of_structure(string,structure2, 0));

    /* get all variables need for the folding process (some memory will be preallocated here too) */
    TwoDfold_vars *mfe_vars = get_TwoDfold_variables(string, structure1, structure2, circ);
    mfe_vars->do_backtrack = do_backtrack;
    TwoDfold_solution *mfe_s = TwoDfoldList(mfe_vars, maxDistance1, maxDistance2);

    if(!pf){
#ifdef COUNT_STATES
      printf("k\tl\tn\tMFE\tMFE-structure\n");
      for(i = 0; mfe_s[i].k != INF; i++){
        printf("%d\t%d\t%lu\t%6.2f\t%s\n", mfe_s[i].k, mfe_s[i].l, mfe_vars->N_F5[length][mfe_s[i].k][mfe_s[i].l/2], mfe_s[i].en, mfe_s[i].s);
        if(mfe_s[i].s) free(mfe_s[i].s);
      }
      free(mfe_s);
#else
      printf("k\tl\tMFE\tMFE-structure\n");
      for(i = 0; mfe_s[i].k != INF; i++){
        printf("%d\t%d\t%6.2f\t%s\n", mfe_s[i].k, mfe_s[i].l, mfe_s[i].en, mfe_s[i].s);
        if(mfe_s[i].s) free(mfe_s[i].s);
      }
      free(mfe_s);
#endif
    }

    if(pf){
      int maxD1 = (int) mfe_vars->maxD1;
      int maxD2 = (int) mfe_vars->maxD2;
      float mmfe = INF;
      double Q;
      for(i = 0; mfe_s[i].k != INF; i++){
        if(mmfe > mfe_s[i].en) mmfe = mfe_s[i].en;
      }
      kT = (temperature+K0)*GASCONST/1000.0; /* in Kcal */
      pf_scale = exp(-(sfact*mmfe)/kT/length);
      if (length>2000)
        fprintf(stdout, "scaling factor %f\n", pf_scale);

      /* get all variables need for the folding process (some memory will be preallocated there too) */
      //TwoDpfold_vars *q_vars = get_TwoDpfold_variables_from_MFE(mfe_vars);
      /* we dont need the mfe vars and arrays anymore, so we can savely free their occupying memory */
      destroy_TwoDfold_variables(mfe_vars);
      TwoDpfold_vars *q_vars = get_TwoDpfold_variables(string, structure1, structure2, circ);

      TwoDpfold_solution *pf_s = TwoDpfoldList(q_vars, maxD1, maxD2);

      Q = 0.;
      
      for(i = 0; pf_s[i].k != INF; i++){
        Q += pf_s[i].q;
      }

      double fee = (-log(Q)-length*log(pf_scale))*kT;

      if(!stBT){
        printf("free energy of ensemble = %6.2f kcal/mol\n",fee);
        printf("k\tl\tP(neighborhood)\tP(MFE in neighborhood)\tP(MFE in ensemble)\tMFE\tE_gibbs\tMFE-structure\n");
        for(i=0; pf_s[i].k != INF;i++){
          float free_energy = (-log((float)pf_s[i].q)-length*log(pf_scale))*kT;
          if((pf_s[i].k != mfe_s[i].k) || (pf_s[i].l != mfe_s[i].l))
            nrerror("This should never happen!");
          fprintf(stdout,
                  "%d\t%d\t%2.8f\t%2.8f\t%2.8f\t%6.2f\t%6.2f\t%s\n",
                  pf_s[i].k,
                  pf_s[i].l,
                  (float)(pf_s[i].q)/(float)Q,
                  exp((free_energy-mfe_s[i].en)/kT),
                  exp((fee-mfe_s[i].en)/kT),
                  mfe_s[i].en,
                  free_energy,
                  mfe_s[i].s);
        }
      }
      else{
        init_rand();
        if(neighborhoods != NULL){
          nbhoods *tmp, *tmp2;
          for(tmp = neighborhoods; tmp != NULL; tmp = tmp->next){
            int k,l;
            k = tmp->k;
            l = tmp->l;
            for(i = 0; i < nstBT; i++){
              char *s = TwoDpfold_pbacktrack(q_vars, k, l);
              printf("%d\t%d\t%s\t%6.2f\n", k, l, s, q_vars->circ ? energy_of_circ_structure(q_vars->sequence, s, 0) : energy_of_structure(q_vars->sequence, s, 0));
            }
          }
        }
        else{
          for(i=0; pf_s[i].k != INF;i++){
            for(l = 0; l < nstBT; l++){
              char *s = TwoDpfold_pbacktrack(q_vars, pf_s[i].k, pf_s[i].l);
              printf("%d\t%d\t%s\t%6.2f\n", pf_s[i].k, pf_s[i].l, s, q_vars->circ ? energy_of_circ_structure(q_vars->sequence, s, 0) : energy_of_structure(q_vars->sequence, s, 0));
            }
          }
        }
      }
      free_pf_arrays();

      for(i=0; mfe_s[i].k != INF;i++){
        if(mfe_s[i].s) free(mfe_s[i].s);
      }
      free(pf_s);
      free(mfe_s);
      /* destroy the q_vars */
      destroy_TwoDpfold_variables(q_vars);
    }
    else
      destroy_TwoDfold_variables(mfe_vars);

    free_arrays();
    free(string);
    free(orig_sequence);
    free(mfe_structure);
    free(structure1);
    free(structure2);
    free(reference_struc1);
    free(reference_struc2);
    string = orig_sequence = mfe_structure = NULL;
  } while (1);
  return 0;
}
示例#3
0
int main(int argc, char *argv[]){
  struct        RNALalifold_args_info args_info;
  char          *string, *structure, *ParamFile, *ns_bases, *c;
  char          ffname[80], gfname[80], fname[80];
  int           n_seq, i, length, sym, r, maxdist;
  int           mis, pf, istty;
  float         cutoff;
  double        min_en, real_en, sfact;
  char          *AS[MAX_NUM_NAMES];          /* aligned sequences */
  char          *names[MAX_NUM_NAMES];       /* sequence names */
  FILE          *clust_file = stdin;

  string = structure = ParamFile = ns_bases = NULL;
  mis = pf      = 0;
  maxdist       = 70;
  do_backtrack  = 1;
  dangles       = 2;
  sfact         = 1.07;
  cutoff        = 0.0005;

  /*
  #############################################
  # check the command line parameters
  #############################################
  */
  if(RNALalifold_cmdline_parser (argc, argv, &args_info) != 0) exit(1);
  /* temperature */
  if(args_info.temp_given)        temperature = args_info.temp_arg;
  /* structure constraint */
  if(args_info.noTetra_given)     tetra_loop=0;
  /* set dangle model */
  if(args_info.dangles_given)     dangles = args_info.dangles_arg;
  /* do not allow weak pairs */
  if(args_info.noLP_given)        noLonelyPairs = 1;
  /* do not allow wobble pairs (GU) */
  if(args_info.noGU_given)        noGU = 1;
  /* do not allow weak closing pairs (AU,GU) */
  if(args_info.noClosingGU_given) no_closingGU = 1;
  /* set energy model */
  if(args_info.energyModel_given) energy_set = args_info.energyModel_arg;
  /* take another energy parameter set */
  if(args_info.paramFile_given)   ParamFile = strdup(args_info.paramFile_arg);
  /* Allow other pairs in addition to the usual AU,GC,and GU pairs */
  if(args_info.nsp_given)         ns_bases = strdup(args_info.nsp_arg);
  /* set pf scaling factor */
  if(args_info.pfScale_given)     sfact = args_info.pfScale_arg;
  /* partition function settings */
  if(args_info.partfunc_given){
    pf = 1;
    if(args_info.partfunc_arg != -1)
      do_backtrack = args_info.partfunc_arg;
  }
  /* set cfactor */
  if(args_info.cfactor_given)     cv_fact = args_info.cfactor_arg;
  /* set nfactor */
  if(args_info.nfactor_given)     nc_fact = args_info.nfactor_arg;

  /* set the maximum base pair span */
  if(args_info.span_given)        maxdist = args_info.span_arg;
  /* set the pair probability cutoff */
  if(args_info.cutoff_given)      cutoff  = args_info.cutoff_arg;
  /* calculate most informative sequence */
  if(args_info.mis_given)         mis = 1;

  /* check unnamed options a.k.a. filename of input alignment */
  if(args_info.inputs_num == 1){
    clust_file = fopen(args_info.inputs[0], "r");
    if(clust_file == NULL){
      fprintf(stderr, "can't open %s\n", args_info.inputs[0]);
    }
  }
  else{
    RNALalifold_cmdline_parser_print_help();
    exit(1);
  }

  /* free allocated memory of command line data structure */
  RNALalifold_cmdline_parser_free (&args_info);

  /*
  #############################################
  # begin initializing
  #############################################
  */
  if (ParamFile != NULL)
    read_parameter_file(ParamFile);

  if (ns_bases != NULL) {
    nonstandards = space(33);
    c=ns_bases;
    i=sym=0;
    if (*c=='-') {
      sym=1; c++;
    }
    while (*c!='\0') {
      if (*c!=',') {
        nonstandards[i++]=*c++;
        nonstandards[i++]=*c;
        if ((sym)&&(*c!=*(c-1))) {
          nonstandards[i++]=*c;
          nonstandards[i++]=*(c-1);
        }
      }
      c++;
    }
  }

  istty = isatty(fileno(stdout))&&isatty(fileno(stdin));

  if (istty && (clust_file == stdin)) {
    print_tty_input_seq_str("Input aligned sequences in clustalw format");
  }

  n_seq = read_clustal(clust_file, AS, names);
  if (clust_file != stdin) fclose(clust_file);
  if (n_seq==0)
    nrerror("no sequences found");

  length = (int) strlen(AS[0]);
  if (length<maxdist) {
    fprintf(stderr, "Alignment length < window size: setting L=%d\n",length);
    maxdist=length;
  }

  structure = (char *) space((unsigned) length+1);

  /*
  #############################################
  # begin calculations
  #############################################
  */
  update_fold_params();
  if(!pf)
    min_en = aliLfold(AS, structure, maxdist);
  {
    eos_debug=-1; /* shut off warnings about nonstandard pairs */
    /*   for (i=0; AS[i]!=NULL; i++)
    s += energy_of_struct(AS[i], structure);
    real_en = s/i;*/
  }
  string = (mis) ? consens_mis((const char **) AS) : consensus((const char **) AS);
  printf("%s\n%s\n", string, structure);
  /*  if (istty)
    printf("\n minimum free energy = %6.2f kcal/mol (%6.2f + %6.2f)\n",
           min_en, real_en, min_en - real_en);
  else
    printf(" (%6.2f = %6.2f + %6.2f) \n", min_en, real_en, min_en-real_en );
  */
  strcpy(ffname, "alirna.ps");
  strcpy(gfname, "alirna.g");

  /*  if (length<=2500) {
    char *A;
    A = annote(structure, (const char**) AS);
    (void) PS_rna_plot_a(string, structure, ffname, NULL, A);
    free(A);
  } else
    fprintf(stderr,"INFO: structure too long, not doing xy_plot\n");
  */
  /* {*/ /* free mfe arrays but preserve base_pair for PS_dot_plot */
  /*  struct bond  *bp;
    bp = base_pair; base_pair = space(16);
    free_alifold_arrays();  / * frees base_pair *  /
    base_pair = bp;
  }*/
  if (pf) {
    double energy, kT;
    plist *pl;
    char * mfe_struc;

    mfe_struc = strdup(structure);

    kT = (temperature+273.15)*1.98717/1000.; /* in Kcal */
    pf_scale = -1;/*exp(-(sfact*min_en)/kT/length);*/
    if (length>2000) fprintf(stderr, "scaling factor %f\n", pf_scale);
    fflush(stdout);

    /* init_alipf_fold(length); */

    /* energy = alipfW_fold(AS, structure, &pl, maxdist, cutoff); */

    if (do_backtrack) {
      printf("%s", structure);
      /*if (!istty) printf(" [%6.2f]\n", energy);
        else */
      printf("\n");
    }
    /*if ((istty)||(!do_backtrack))
      printf(" free energy of ensemble = %6.2f kcal/mol\n", energy);
    useless!!*/
    /* printf(" frequency of mfe structure in ensemble %g\n",
       exp((energy-min_en)/kT));*/

    if (do_backtrack) {
      FILE *aliout;
      cpair *cp;
      strcpy(ffname, "alifold.out");
      aliout = fopen(ffname, "w");
      if (!aliout) {
        fprintf(stderr, "can't open %s    skipping output\n", ffname);
      } else {
        fprintf(aliout, "%d sequence; length of alignment %d\n",
                n_seq, length);
        fprintf(aliout, "alifold output\n");

        fprintf(aliout, "%s\n", structure);
      }
      strcpy(ffname, "alidotL.ps");
      cp = make_color_pinfo2(AS,pl,n_seq);
      (void) PS_color_dot_plot_turn(string, cp, ffname, maxdist);
      free(cp);
    }
    free(mfe_struc);
    free(pl);
  }
  free(base_pair);
  (void) fflush(stdout);
  free(string);
  free(structure);
  for (i=0; AS[i]; i++) {
    free(AS[i]); free(names[i]);
  }
  return 0;
}
示例#4
0
/*--------------------------------------------------------------------------*/
int main(int argc, char *argv[]){
  struct        RNAalifold_args_info args_info;
  unsigned int  input_type;
  char          ffname[FILENAME_MAX_LENGTH], gfname[FILENAME_MAX_LENGTH], fname[FILENAME_MAX_LENGTH];
  char          *input_string, *string, *structure, *cstruc, *ParamFile, *ns_bases, *c;
  int           n_seq, i, length, sym, r, noPS, with_sci;
  int           endgaps, mis, circular, doAlnPS, doColor, doMEA, n_back, eval_energy, pf, istty;
  double        min_en, real_en, sfact, MEAgamma, bppmThreshold, betaScale;
  char          *AS[MAX_NUM_NAMES];          /* aligned sequences */
  char          *names[MAX_NUM_NAMES];       /* sequence names */
  FILE          *clust_file = stdin;
  pf_paramT     *pf_parameters;
  model_detailsT  md;

  fname[0] = ffname[0] = gfname[0] = '\0';
  string = structure = cstruc = ParamFile = ns_bases = NULL;
  pf_parameters = NULL;
  endgaps = mis = pf = circular = doAlnPS = doColor = n_back = eval_energy = oldAliEn = doMEA = ribo = noPS = 0;
  do_backtrack  = 1;
  dangles       = 2;
  gquad         = 0;
  sfact         = 1.07;
  bppmThreshold = 1e-6;
  MEAgamma      = 1.0;
  betaScale     = 1.;
  with_sci      = 0;

  set_model_details(&md);

  /*
  #############################################
  # check the command line prameters
  #############################################
  */
  if(RNAalifold_cmdline_parser (argc, argv, &args_info) != 0) exit(1);
  /* temperature */
  if(args_info.temp_given)        temperature = args_info.temp_arg;
  /* structure constraint */
  if(args_info.constraint_given)  fold_constrained=1;
  /* do not take special tetra loop energies into account */
  if(args_info.noTetra_given)     md.special_hp = tetra_loop=0;
  /* set dangle model */
  if(args_info.dangles_given){
    if((args_info.dangles_arg != 0) && (args_info.dangles_arg != 2))
      warn_user("required dangle model not implemented, falling back to default dangles=2");
    else
      md.dangles = dangles=args_info.dangles_arg;
  }
  /* do not allow weak pairs */
  if(args_info.noLP_given)        md.noLP = noLonelyPairs = 1;
  /* do not allow wobble pairs (GU) */
  if(args_info.noGU_given)        md.noGU = noGU = 1;
  /* do not allow weak closing pairs (AU,GU) */
  if(args_info.noClosingGU_given) md.noGUclosure = no_closingGU = 1;
  /* gquadruplex support */
  if(args_info.gquad_given)       md.gquad = gquad = 1;
  /* sci computation */
  if(args_info.sci_given)         with_sci = 1;
  /* do not convert DNA nucleotide "T" to appropriate RNA "U" */
  /* set energy model */
  if(args_info.energyModel_given) energy_set = args_info.energyModel_arg;
  /* take another energy parameter set */
  if(args_info.paramFile_given)   ParamFile = strdup(args_info.paramFile_arg);
  /* Allow other pairs in addition to the usual AU,GC,and GU pairs */
  if(args_info.nsp_given)         ns_bases = strdup(args_info.nsp_arg);
  /* set pf scaling factor */
  if(args_info.pfScale_given)     sfact = args_info.pfScale_arg;
  /* assume RNA sequence to be circular */
  if(args_info.circ_given)        circular=1;
  /* do not produce postscript output */
  if(args_info.noPS_given)        noPS = 1;
  /* partition function settings */
  if(args_info.partfunc_given){
    pf = 1;
    if(args_info.partfunc_arg != -1)
      do_backtrack = args_info.partfunc_arg;
  }
  /* MEA (maximum expected accuracy) settings */
  if(args_info.MEA_given){
    pf = doMEA = 1;
    if(args_info.MEA_arg != -1)
      MEAgamma = args_info.MEA_arg;
  }
  if(args_info.betaScale_given)   betaScale = args_info.betaScale_arg;
  /* set the bppm threshold for the dotplot */
  if(args_info.bppmThreshold_given)
    bppmThreshold = MIN2(1., MAX2(0.,args_info.bppmThreshold_arg));
  /* set cfactor */
  if(args_info.cfactor_given)     cv_fact = args_info.cfactor_arg;
  /* set nfactor */
  if(args_info.nfactor_given)     nc_fact = args_info.nfactor_arg;
  if(args_info.endgaps_given)     endgaps = 1;
  if(args_info.mis_given)         mis = 1;
  if(args_info.color_given)       doColor=1;
  if(args_info.aln_given)         doAlnPS=1;
  if(args_info.old_given)         oldAliEn = 1;
  if(args_info.stochBT_given){
    n_back = args_info.stochBT_arg;
    do_backtrack = 0;
    pf = 1;
    init_rand();
  }
  if(args_info.stochBT_en_given){
    n_back = args_info.stochBT_en_arg;
    do_backtrack = 0;
    pf = 1;
    eval_energy = 1;
    init_rand();
  }
  if(args_info.ribosum_file_given){
    RibosumFile = strdup(args_info.ribosum_file_arg);
    ribo = 1;
  }
  if(args_info.ribosum_scoring_given){
    RibosumFile = NULL;
    ribo = 1;
  }
  if(args_info.layout_type_given)
    rna_plot_type = args_info.layout_type_arg;

  /* alignment file name given as unnamed option? */
  if(args_info.inputs_num == 1){
    clust_file = fopen(args_info.inputs[0], "r");
    if (clust_file == NULL) {
      fprintf(stderr, "can't open %s\n", args_info.inputs[0]);
    }
  }

  /* free allocated memory of command line data structure */
  RNAalifold_cmdline_parser_free (&args_info);

  /*
  #############################################
  # begin initializing
  #############################################
  */
  if(circular && gquad){
    nrerror("G-Quadruplex support is currently not available for circular RNA structures");
  }

  make_pair_matrix();

  if (circular && noLonelyPairs)
    warn_user("depending on the origin of the circular sequence, "
            "some structures may be missed when using --noLP\n"
            "Try rotating your sequence a few times\n");

  if (ParamFile != NULL) read_parameter_file(ParamFile);

  if (ns_bases != NULL) {
    nonstandards = space(33);
    c=ns_bases;
    i=sym=0;
    if (*c=='-') {
      sym=1; c++;
    }
    while (*c!='\0') {
      if (*c!=',') {
        nonstandards[i++]=*c++;
        nonstandards[i++]=*c;
        if ((sym)&&(*c!=*(c-1))) {
          nonstandards[i++]=*c;
          nonstandards[i++]=*(c-1);
        }
      }
      c++;
    }
  }

  istty = isatty(fileno(stdout))&&isatty(fileno(stdin));

  /*
  ########################################################
  # handle user input from 'stdin' if necessary
  ########################################################
  */
  if(fold_constrained){
    if(istty){
      print_tty_constraint_full();
      print_tty_input_seq_str("");
    }
    input_type = get_input_line(&input_string, VRNA_INPUT_NOSKIP_COMMENTS);
    if(input_type & VRNA_INPUT_QUIT){ return 0;}
    else if((input_type & VRNA_INPUT_MISC) && (strlen(input_string) > 0)){
      cstruc = strdup(input_string);
      free(input_string);
    }
    else warn_user("constraints missing");
  }

  if (istty && (clust_file == stdin))
    print_tty_input_seq_str("Input aligned sequences in clustalw or stockholm format\n(enter a line starting with \"//\" to indicate the end of your input)");

  n_seq = read_clustal(clust_file, AS, names);
  if (n_seq==0) nrerror("no sequences found");

  if (clust_file != stdin) fclose(clust_file);
  /*
  ########################################################
  # done with 'stdin' handling, now init everything properly
  ########################################################
  */

  length    = (int)   strlen(AS[0]);
  structure = (char *)space((unsigned) length+1);

  if(fold_constrained && cstruc != NULL)
    strncpy(structure, cstruc, length);

  if (endgaps)
    for (i=0; i<n_seq; i++) mark_endgaps(AS[i], '~');

  /*
  ########################################################
  # begin actual calculations
  ########################################################
  */

  if (circular) {
    int     i;
    double  s = 0;
    min_en    = circalifold((const char **)AS, structure);
    for (i=0; AS[i]!=NULL; i++)
      s += energy_of_circ_structure(AS[i], structure, -1);
    real_en = s/i;
  } else {
    float *ens  = (float *)space(2*sizeof(float));
    min_en      = alifold((const char **)AS, structure);
    if(md.gquad)
      energy_of_ali_gquad_structure((const char **)AS, structure, n_seq, ens);
    else
      energy_of_alistruct((const char **)AS, structure, n_seq, ens);

    real_en     = ens[0];
    free(ens);
  }

  string = (mis) ? consens_mis((const char **) AS) : consensus((const char **) AS);
  printf("%s\n%s", string, structure);

  if(istty){
    if(with_sci){
      float sci = min_en;
      float e_mean = 0;
      for (i=0; AS[i]!=NULL; i++){
        char *seq = get_ungapped_sequence(AS[i]);
        char *str = (char *)space(sizeof(char) * (strlen(seq) + 1));
        e_mean    += fold(seq, str);
        free(seq);
        free(str);
      }
      e_mean  /= i;
      sci     /= e_mean;

      printf( "\n minimum free energy = %6.2f kcal/mol (%6.2f + %6.2f)"
              "\n SCI = %2.4f\n",
              min_en, real_en, min_en-real_en, sci);
    } else
      printf("\n minimum free energy = %6.2f kcal/mol (%6.2f + %6.2f)\n",
             min_en, real_en, min_en - real_en);
  } else {
    if(with_sci){
      float sci = min_en;
      float e_mean = 0;
      for (i=0; AS[i]!=NULL; i++){
        char *seq = get_ungapped_sequence(AS[i]);
        char *str = (char *)space(sizeof(char) * (strlen(seq) + 1));
        e_mean    += fold(seq, str);
        free(seq);
        free(str);
      }
      e_mean  /= i;
      sci     /= e_mean;

      printf(" (%6.2f = %6.2f + %6.2f) [%2.4f]\n", min_en, real_en, min_en-real_en, sci);
    }
    else
      printf(" (%6.2f = %6.2f + %6.2f) \n", min_en, real_en, min_en-real_en );
  }

  strcpy(ffname, "alirna.ps");
  strcpy(gfname, "alirna.g");

  if (!noPS) {
    char **A;
    A = annote(structure, (const char**) AS);

    if(md.gquad){
      if (doColor)
        (void) PS_rna_plot_a_gquad(string, structure, ffname, A[0], A[1]);
      else
        (void) PS_rna_plot_a_gquad(string, structure, ffname, NULL, A[1]);
    } else {
      if (doColor)
        (void) PS_rna_plot_a(string, structure, ffname, A[0], A[1]);
      else
        (void) PS_rna_plot_a(string, structure, ffname, NULL, A[1]);
    }
    free(A[0]); free(A[1]); free(A);
  }
  if (doAlnPS)
    PS_color_aln(structure, "aln.ps", (const char const **) AS, (const char const **) names);

  /* free mfe arrays */
  free_alifold_arrays();

  if (pf) {
    float energy, kT;
    char * mfe_struc;

    mfe_struc = strdup(structure);

    kT = (betaScale*((temperature+K0)*GASCONST))/1000.; /* in Kcal */
    pf_scale = exp(-(sfact*min_en)/kT/length);
    if (length>2000) fprintf(stderr, "scaling factor %f\n", pf_scale);
    fflush(stdout);

    if (cstruc!=NULL) strncpy(structure, cstruc, length+1);

    pf_parameters = get_boltzmann_factors_ali(n_seq, temperature, betaScale, md, pf_scale);
    energy = alipf_fold_par((const char **)AS, structure, NULL, pf_parameters, do_backtrack, fold_constrained, circular);

    if (n_back>0) {
      /*stochastic sampling*/
      for (i=0; i<n_back; i++) {
        char *s;
        double prob=1.;
        s = alipbacktrack(&prob);
        printf("%s ", s);
        if (eval_energy ) printf("%6g %.2f ",prob, -1*(kT*log(prob)-energy));
        printf("\n");
         free(s);
      }

    }
    if (do_backtrack) {
      printf("%s", structure);
      if (!istty) printf(" [%6.2f]\n", energy);
      else printf("\n");
    }
    if ((istty)||(!do_backtrack))
      printf(" free energy of ensemble = %6.2f kcal/mol\n", energy);
    printf(" frequency of mfe structure in ensemble %g\n",
           exp((energy-min_en)/kT));

    if (do_backtrack) {
      FILE *aliout;
      cpair *cp;
      char *cent;
      double dist;
      FLT_OR_DBL *probs = export_ali_bppm();
      plist *pl, *mfel;

      assign_plist_from_pr(&pl, probs, length, bppmThreshold);
      assign_plist_from_db(&mfel, mfe_struc, 0.95*0.95);

      if (!circular){
        float *ens;
        cent = get_centroid_struct_pr(length, &dist, probs);
        ens=(float *)space(2*sizeof(float));
        energy_of_alistruct((const char **)AS, cent, n_seq, ens);
        /*cent_en = energy_of_struct(string, cent);*/ /*ali*/
        printf("%s %6.2f {%6.2f + %6.2f}\n",cent,ens[0]-ens[1],ens[0],(-1)*ens[1]);
        free(cent);
        free(ens);
      }
      if(doMEA){
        float mea, *ens;
        plist *pl2;
        assign_plist_from_pr(&pl2, probs, length, 1e-4/(1+MEAgamma));
        mea = MEA(pl2, structure, MEAgamma);
        ens = (float *)space(2*sizeof(float));
        if(circular)
          energy_of_alistruct((const char **)AS, structure, n_seq, ens);
        else
          ens[0] = energy_of_structure(string, structure, 0);
        printf("%s {%6.2f MEA=%.2f}\n", structure, ens[0], mea);
        free(ens);
        free(pl2);
      }

      if (fname[0]!='\0') {
        strcpy(ffname, fname);
        strcat(ffname, "_ali.out");
      } else strcpy(ffname, "alifold.out");
      aliout = fopen(ffname, "w");
      if (!aliout) {
        fprintf(stderr, "can't open %s    skipping output\n", ffname);
      } else {
        print_aliout(AS, pl, bppmThreshold, n_seq, mfe_struc, aliout);
      }
      fclose(aliout);
      if (fname[0]!='\0') {
        strcpy(ffname, fname);
        strcat(ffname, "_dp.ps");
      } else strcpy(ffname, "alidot.ps");
      cp = make_color_pinfo(AS,pl, bppmThreshold, n_seq, mfel);
      (void) PS_color_dot_plot(string, cp, ffname);
      free(cp);
      free(pl);
      free(mfel);
    }
    free(mfe_struc);
    free_alipf_arrays();
    free(pf_parameters);
  }
  if (cstruc!=NULL) free(cstruc);
  (void) fflush(stdout);
  free(string);
  free(structure);
  for (i=0; AS[i]; i++) {
    free(AS[i]); free(names[i]);
  }
  return 0;
}
示例#5
0
int main(int argc, char *argv[]){
  struct RNAeval_args_info  args_info;
  char                      *string, *structure, *orig_sequence, *tmp;
  char                      *rec_sequence, *rec_id, **rec_rest;
  char                      fname[FILENAME_MAX_LENGTH];
  char                      *ParamFile;
  int                       i, length1, length2;
  float                     energy;
  int                       istty;
  int                       circular=0;
  int                       noconv=0;
  int                       verbose = 0;
  unsigned int              rec_type, read_opt;

  string  = orig_sequence = ParamFile = NULL;
  gquad   = 0;
  dangles = 2;

  /*
  #############################################
  # check the command line parameters
  #############################################
  */
  if(RNAeval_cmdline_parser (argc, argv, &args_info) != 0) exit(1);
  /* temperature */
  if(args_info.temp_given)        temperature = args_info.temp_arg;
  /* do not take special tetra loop energies into account */
  if(args_info.noTetra_given)     tetra_loop=0;
  /* set dangle model */
  if(args_info.dangles_given){
    if((args_info.dangles_arg < 0) || (args_info.dangles_arg > 3))
      warn_user("required dangle model not implemented, falling back to default dangles=2");
    else
      dangles = args_info.dangles_arg;
  }
  /* do not convert DNA nucleotide "T" to appropriate RNA "U" */
  if(args_info.noconv_given)      noconv = 1;
  /* set energy model */
  if(args_info.energyModel_given) energy_set = args_info.energyModel_arg;
  /* take another energy parameter set */
  if(args_info.paramFile_given)   ParamFile = strdup(args_info.paramFile_arg);
  /* assume RNA sequence to be circular */
  if(args_info.circ_given)        circular=1;
  /* logarithmic multiloop energies */
  if(args_info.logML_given)       logML = 1;
  /* be verbose */
  if(args_info.verbose_given)     verbose = 1;
  /* gquadruplex support */
  if(args_info.gquad_given)       gquad = 1;

  /* free allocated memory of command line data structure */
  RNAeval_cmdline_parser_free (&args_info);

  /*
  #############################################
  # begin initializing
  #############################################
  */

  if (ParamFile!=NULL) read_parameter_file(ParamFile);

  rec_type      = read_opt = 0;
  rec_id        = rec_sequence = NULL;
  rec_rest      = NULL;
  istty         = isatty(fileno(stdout)) && isatty(fileno(stdin));

  if(circular && gquad){
    nrerror("G-Quadruplex support is currently not available for circular RNA structures");
  }

  /* set options we wanna pass to read_record */
  if(istty){
    read_opt |= VRNA_INPUT_NOSKIP_BLANK_LINES;
    print_tty_input_seq_str("Use '&' to connect 2 sequences that shall form a complex.\n"
                            "Input sequence (upper or lower case) followed by structure");
  }

  /*
  #############################################
  # main loop: continue until end of file
  #############################################
  */
  while(
    !((rec_type = read_record(&rec_id, &rec_sequence, &rec_rest, read_opt))
        & (VRNA_INPUT_ERROR | VRNA_INPUT_QUIT))){

    if(rec_id){
      if(!istty) printf("%s\n", rec_id);
      (void) sscanf(rec_id, ">%" XSTR(FILENAME_ID_LENGTH) "s", fname);
    }
    else fname[0] = '\0';

    cut_point = -1;

    string    = tokenize(rec_sequence);
    length2   = (int) strlen(string);
    tmp       = extract_record_rest_structure((const char **)rec_rest, 0, (rec_id) ? VRNA_OPTION_MULTILINE : 0);

    if(!tmp)
      nrerror("structure missing");

    structure = tokenize(tmp);
    length1   = (int) strlen(structure);
    if(length1 != length2)
      nrerror("structure and sequence differ in length!");

    free(tmp);

    /* convert DNA alphabet to RNA if not explicitely switched off */
    if(!noconv) str_DNA2RNA(string);
    /* store case-unmodified sequence */
    orig_sequence = strdup(string);
    /* convert sequence to uppercase letters only */
    str_uppercase(string);

    if(istty){
      if (cut_point == -1)
        printf("length = %d\n", length1);
      else
        printf("length1 = %d\nlength2 = %d\n", cut_point-1, length1-cut_point+1);
    }

    if(gquad)
      energy = energy_of_gquad_structure(string, structure, verbose);
    else
      energy = (circular) ? energy_of_circ_structure(string, structure, verbose) : energy_of_structure(string, structure, verbose);

    if (cut_point == -1)
      printf("%s\n%s", orig_sequence, structure);
    else {
      char *pstring, *pstruct;
      pstring = costring(orig_sequence);
      pstruct = costring(structure);
      printf("%s\n%s", pstring,  pstruct);
      free(pstring);
      free(pstruct);
    }
    if (istty)
      printf("\n energy = %6.2f\n", energy);
    else
      printf(" (%6.2f)\n", energy);

    /* clean up */
    (void) fflush(stdout);
    if(rec_id) free(rec_id);
    free(rec_sequence);
    free(structure);
    /* free the rest of current dataset */
    if(rec_rest){
      for(i=0;rec_rest[i];i++) free(rec_rest[i]);
      free(rec_rest);
    }
    rec_id = rec_sequence = structure = NULL;
    rec_rest = NULL;

    free(string);
    free(orig_sequence);
    string = orig_sequence = NULL;

    /* print user help for the next round if we get input from tty */
    if(istty){
      print_tty_input_seq_str("Use '&' to connect 2 sequences that shall form a complex.\n"
                              "Input sequence (upper or lower case) followed by structure");
    }
  }
  return EXIT_SUCCESS;
}
示例#6
0
int main(int argc, char *argv[]){
  struct  RNAinverse_args_info args_info;
  int     input_type;
  char    *input_string, *start, *structure, *rstart, *str2, *line;
  char    *ParamFile=NULL, *c, *ns_bases;
  int     i,j, length, l, hd, sym;
  double  energy=0., kT;
  int     pf, mfe, istty;
  int     repeat, found;

  do_backtrack = 0; pf = 0; mfe = 1;
  repeat = 0;
  input_type = 0;
  input_string = ns_bases = NULL;
  init_rand();

  /*
  #############################################
  # check the command line parameters
  #############################################
  */
  if(RNAinverse_cmdline_parser (argc, argv, &args_info) != 0) exit(1);
  /* temperature */
  if(args_info.temp_given)        temperature = args_info.temp_arg;
  /* do not take special tetra loop energies into account */
  if(args_info.noTetra_given)     tetra_loop=0;
  /* set dangle model */
  if(args_info.dangles_given)     dangles = args_info.dangles_arg;
  /* do not allow wobble pairs (GU) */
  if(args_info.noGU_given)        noGU = 1;
  /* do not allow weak closing pairs (AU,GU) */
  if(args_info.noClosingGU_given) no_closingGU = 1;
  /* set energy model */
  if(args_info.energyModel_given) energy_set = args_info.energyModel_arg;
  /* take another energy parameter set */
  if(args_info.paramFile_given)   ParamFile = strdup(args_info.paramFile_arg);
  /* Allow other pairs in addition to the usual AU,GC,and GU pairs */
  if(args_info.nsp_given)         ns_bases = strdup(args_info.nsp_arg);
  /* alter the alphabet */
  if(args_info.alphabet_given){
    symbolset=args_info.alphabet_arg;
    /* symbolset should only have uppercase characters */
    for (l = 0; l < (int)strlen(symbolset); l++)
      symbolset[l] = toupper(symbolset[l]);
  }
  /* set function for optimization */
  if(args_info.function_given){
    if(strlen(args_info.function_arg) > 2){
      RNAinverse_cmdline_parser_print_help(); exit(EXIT_FAILURE);
    }
    else{
      if((*args_info.function_arg == 'm') || (*(args_info.function_arg+1) == 'm')) mfe = 1;
      if((*args_info.function_arg == 'p') || (*(args_info.function_arg+1) == 'p')) pf = 1;
    }
  }
  /* set repeat */
  if(args_info.repeat_given)      repeat = args_info.repeat_arg;
  /* set final cost */
  if(args_info.final_given)       final_cost = args_info.final_arg;
  /* do we wannabe verbose */
  if(args_info.verbose_given)     inv_verbose = 1;

  /* free allocated memory of command line data structure */
  RNAinverse_cmdline_parser_free (&args_info);

  kT = (temperature+273.15)*1.98717/1000.0;

  istty = (isatty(fileno(stdout))&&isatty(fileno(stdin)));

  if (ParamFile!=NULL)
    read_parameter_file(ParamFile);

  give_up = (repeat<0);

  do {
    /*
    ########################################################
    # handle user input from 'stdin'
    ########################################################
    */
    if(istty)
      print_tty_input_seq_str("Input structure & start string\n"
                              "(lower case letters for const positions) and 0 or empty line for random start string\n");

    input_type = get_multi_input_line(&input_string, 0);
    /* we are waiting for a structure (i.e. something like a constraint) so we skip all sequences, fasta-headers and misc lines */
    while(input_type & (VRNA_INPUT_SEQUENCE | VRNA_INPUT_MISC | VRNA_INPUT_FASTA_HEADER)){
      if(!istty && (input_type & VRNA_INPUT_FASTA_HEADER)) printf(">%s\n", input_string);
      free(input_string); input_string = NULL;
      input_type = get_multi_input_line(&input_string, 0);
    }
    if(input_type & (VRNA_INPUT_QUIT | VRNA_INPUT_ERROR)) break;

    if(input_type & (VRNA_INPUT_CONSTRAINT)){
      structure = (char *)space(sizeof(char) * (strlen(input_string) + 1));
      (void)sscanf(input_string, "%s", structure); /* scanf gets rid of trailing junk */
      length = (int)strlen(structure);
      free(input_string); input_string = NULL;
      input_type = get_multi_input_line(&input_string, VRNA_INPUT_NOSKIP_BLANK_LINES | VRNA_INPUT_NOSKIP_COMMENTS);
    }
    if(input_type & VRNA_INPUT_QUIT) break;

    start = (char *)space(sizeof(char) * (length+1));
    /* now we assume to get a sequence (input_string may be empty as well) */
    if(input_type & VRNA_INPUT_SEQUENCE){
      (void)strncpy(start, input_string, length);
      start[length] = '\0';
      free(input_string); input_string = NULL;
    }
    /* fallback to empty start sequence */
    else start[0] = '\0';

    /*
    ########################################################
    # done with 'stdin' handling
    ########################################################
    */

    if (ns_bases != NULL) {
      nonstandards = space(33);
      c=ns_bases;
      i=sym=0;
      if (*c=='-') {
        sym=1; c++;
      }
      while (*c!='\0') {
        if (*c!=',') {
          nonstandards[i++]=*c++;
          nonstandards[i++]=*c;
          if ((sym)&&(*c!=*(c-1))) {
            nonstandards[i++]=*c;
            nonstandards[i++]=*(c-1);
          }
        }
        c++;
      }
    }

    str2 = (char *) space((unsigned)length+1);
    if (istty) printf("length = %d\n", length);

    if (repeat!=0) found = (repeat>0)? repeat : (-repeat);
    else found = 1;

    /* initialize_fold(length); <- obsolete (hopefully commenting this out does not affect anything crucial ;) */

    rstart = (char *) space((unsigned)length+1);
    while(found>0) {
      char *string;
      string = (char *) space((unsigned)length+1);
      strcpy(string, start);
      for (i=0; i<length; i++) {
        /* lower case characters are kept fixed, any other character
           not in symbolset is replaced by a random character */
        if (islower(string[i])) continue;

        if (string[i]=='\0' || (strchr(symbolset,string[i])==NULL))
          string[i]=symbolset[int_urn(0,strlen(symbolset)-1)];
      }
      strcpy(rstart, string); /* remember start string */

      if (mfe) {
        energy = inverse_fold(string, structure);
        if( (repeat>=0) || (energy<=0.0) ) {
          found--;
          hd = hamming(rstart, string);
          printf("%s  %3d", string, hd);
          if (energy>0) { /* no solution found */
            printf("   d= %g\n", energy);
            if(istty) {
              energy = fold(string,str2);
              printf("%s\n", str2);
            }
          } else printf("\n");
        }
      }
      if (pf) {
        if (!(mfe && give_up && (energy>0))) {
          /* unless we gave up in the mfe part */
          double prob, min_en, sfact=1.07;

          /* get a reasonable pf_scale */
          min_en = fold(string,str2);
          pf_scale = exp(-(sfact*min_en)/kT/length);
          /* init_pf_fold(length); <- obsolete (hopefully commenting this out does not affect anything crucial ;) */

          energy = inverse_pf_fold(string, structure);
          prob = exp(-energy/kT);
          hd = hamming(rstart, string);
          printf("%s  %3d  (%g)\n", string, hd, prob);
          free_pf_arrays();
        }
        if (!mfe) found--;
      }
      (void) fflush(stdout);
      free(string);
    }
    free(rstart);
    free_arrays();

    free(structure);
    free(str2);
    free(start);
    (void) fflush(stdout);
  } while (1);
  return 0;
}