示例#1
0
PRIVATE void make_start(char* start, const char *structure)
{
   int i,j,k,l,r,length;
   int *table, *S, sym[MAXALPHA], ss;

   length=strlen(start);
   table = (int *) space(sizeof(int)*length);
   S = (int *) space(sizeof(int)*length);

   make_ptable(structure, table);
   for (i=0; i<strlen(start); i++) S[i] = encode_char(toupper(start[i]));
   for (i=0; i<strlen(symbolset); i++) sym[i] = i;

   for (k=0; k<length; k++) {
      if (table[k]<k) continue;
      if (((urn()<0.5) && isupper(start[k])) ||
	  islower(start[table[k]])) {
	i = table[k]; j = k;
      } else {
	i = k; j = table[k];
      }

      if (!pair[S[i]][S[j]]) {   /* make a valid pair by mutating j */
	shuffle(sym, (int) base);
	for (l=0; l<base; l++) {
	  ss = encode_char(symbolset[sym[l]]);
	  if (pair[S[i]][ss]) break;
	}
	if (l==base) { /* nothing pairs start[i] */
	  r = 2*int_urn(0, npairs-1);
	  start[i] = pairset[r];
	  start[j] = pairset[r+1];
	} else start[j] = symbolset[sym[l]];
      }
   }
   free(table);
   free(S);
}
示例#2
0
文件: RNAinverse.c 项目: wash/probing
int main(int argc, char *argv[])
{
    char *start, *structure, *rstart, *str2, *line;
    char  *ParamFile=NULL;
    int   i,j, length, l, hd;
    double energy=0., kT;
    int   pf, mfe, istty;
    int   repeat, found;

    do_backtrack = 0;
    pf = 0;
    mfe = 1;
    repeat = 0;
    init_rand();
    for (i=1; i<argc; i++) {
        if (argv[i][0]=='-')
            switch ( argv[i][1] )
            {
            case 'a':
                symbolset = argv[++i];
                /* symbolset should only have uppercase characters */
                for (l = 0; l < (int)strlen(symbolset); l++)
                    symbolset[l] = toupper(symbolset[l]);
                break;
            case 'T':
                if (argv[i][2]!='\0') usage();
                if (sscanf(argv[++i], "%lf", &temperature)==0)
                    usage();
                break;
            case 'F':
                mfe = 0;
                pf = 0;
                for(j=2; j<(int)strlen(argv[i]); j++) {
                    switch( argv[i][j] ) {
                    case 'm' :
                        mfe = 1;
                        break;
                    case 'p' :
                        pf = 1; /* old version had dangles=0 here */
                        break;
                    default :
                        usage();
                    }
                }
                break;
            case 'R':
                repeat = REPEAT_DEFAULT;
                if(++i<argc)
                    if (sscanf(argv[i], "%d", &repeat)==0)
                        usage();
                break;
            case 'n':
                if (strcmp(argv[i], "-noGU" )==0) noGU=1;
                else if (strcmp(argv[i], "-noLP" )==0) noLonelyPairs=1;
                else usage();
                break;
            case '4':
                tetra_loop=0;
                break;
            case 'e':
                if (sscanf(argv[++i],"%d", &energy_set)==0)
                    usage();
                break;
            case 'd':
                dangles=0;
                if (argv[i][2]!='\0')
                    if (sscanf(argv[i]+2, "%d", &dangles)==0)
                        usage();
                break;
            case 'f': /* when to stop RNAfold -p */
                if (sscanf(argv[++i],"%f", &final_cost)==0)
                    usage();
                break;
            case 'P':
                if (++i<argc)
                    ParamFile = argv[i];
                else
                    usage();
                break;
            case 'v':
                inv_verbose = 1;
                break;
            default:
                usage();
            }
    }

    kT = (temperature+273.15)*1.98717/1000.0;

    istty = (isatty(fileno(stdout))&&isatty(fileno(stdin)));

    if (ParamFile!=NULL)
        read_parameter_file(ParamFile);

    give_up = (repeat<0);

    do {
        if (istty) {
            printf("\nInput structure & start string"
                   " (lower case letters for const positions)\n"
                   "    @ to quit, and 0 for random start string\n");
            printf("%s\n", scale);
        }

        if ((line = get_line(stdin))==NULL) break;

        /* read structure, skipping over comment lines */
        while ((*line=='*')||(*line=='\0')||(*line=='>')) {
            printf("%s\n", line);
            free(line);
            if ((line = get_line(stdin))==NULL) break;
        }
        /* stop at eof or '@' */
        if (line==NULL) break;
        if (strcmp(line, "@") == 0) {
            free(line);
            break;
        }

        structure = (char *) space(strlen(line)+1);
        (void) sscanf(line,"%s",structure); /* scanf gets rid of trailing junk */
        free(line);

        length = (int) strlen(structure);
        str2 = (char *) space((unsigned)length+1);

        if ((line = get_line(stdin))!=NULL)
            if (strcmp(line, "@") == 0) {
                free(line);
                break;
            }

        start = (char *) space((unsigned) length+1);
        if (line !=NULL) {
            (void) strncpy(start, line, length);
            free(line);
        }

        if (istty) printf("length = %d\n", length);

        if (repeat!=0) found = (repeat>0)? repeat : (-repeat);
        else found = 1;

        initialize_fold(length);

        rstart = (char *) space((unsigned)length+1);
        while(found>0) {
            char *string;
            string = (char *) space((unsigned)length+1);
            strcpy(string, start);
            for (i=0; i<length; i++) {
                /* lower case characters are kept fixed, any other character
                   not in symbolset is replaced by a random character */
                if (islower(string[i])) continue;

                if (string[i]=='\0' || (strchr(symbolset,string[i])==NULL))
                    string[i]=symbolset[int_urn(0,strlen(symbolset)-1)];
            }
            strcpy(rstart, string); /* remember start string */

            if (mfe) {
                energy = inverse_fold(string, structure);
                if( (repeat>=0) || (energy<=0.0) ) {
                    found--;
                    hd = hamming(rstart, string);
                    printf("%s  %3d", string, hd);
                    if (energy>0) { /* no solution found */
                        printf("   d= %g\n", energy);
                        if(istty) {
                            energy = fold(string,str2);
                            printf("%s\n", str2);
                        }
                    } else printf("\n");
                }
            }
            if (pf) {
                if (!(mfe && give_up && (energy>0))) {
                    /* unless we gave up in the mfe part */
                    double prob, min_en, sfact=1.07;

                    /* get a reasonable pf_scale */
                    min_en = fold(string,str2);
                    pf_scale = exp(-(sfact*min_en)/kT/length);
                    init_pf_fold(length);

                    energy = inverse_pf_fold(string, structure);
                    prob = exp(-energy/kT);
                    hd = hamming(rstart, string);
                    printf("%s  %3d  (%g)\n", string, hd, prob);
                    free_pf_arrays();
                }
                if (!mfe) found--;
            }
            (void) fflush(stdout);
            free(string);
        }
        free(rstart);
        free_arrays();

        free(structure);
        free(str2);
        free(start);
        (void) fflush(stdout);
    } while (1);
    return 0;
}
示例#3
0
int main(int argc, char *argv[]){
  struct  RNAinverse_args_info args_info;
  int     input_type;
  char    *input_string, *start, *structure, *rstart, *str2, *line;
  char    *ParamFile=NULL, *c, *ns_bases;
  int     i,j, length, l, hd, sym;
  double  energy=0., kT;
  int     pf, mfe, istty;
  int     repeat, found;

  do_backtrack = 0; pf = 0; mfe = 1;
  repeat = 0;
  input_type = 0;
  input_string = ns_bases = NULL;
  init_rand();

  /*
  #############################################
  # check the command line parameters
  #############################################
  */
  if(RNAinverse_cmdline_parser (argc, argv, &args_info) != 0) exit(1);
  /* temperature */
  if(args_info.temp_given)        temperature = args_info.temp_arg;
  /* do not take special tetra loop energies into account */
  if(args_info.noTetra_given)     tetra_loop=0;
  /* set dangle model */
  if(args_info.dangles_given)     dangles = args_info.dangles_arg;
  /* do not allow wobble pairs (GU) */
  if(args_info.noGU_given)        noGU = 1;
  /* do not allow weak closing pairs (AU,GU) */
  if(args_info.noClosingGU_given) no_closingGU = 1;
  /* set energy model */
  if(args_info.energyModel_given) energy_set = args_info.energyModel_arg;
  /* take another energy parameter set */
  if(args_info.paramFile_given)   ParamFile = strdup(args_info.paramFile_arg);
  /* Allow other pairs in addition to the usual AU,GC,and GU pairs */
  if(args_info.nsp_given)         ns_bases = strdup(args_info.nsp_arg);
  /* alter the alphabet */
  if(args_info.alphabet_given){
    symbolset=args_info.alphabet_arg;
    /* symbolset should only have uppercase characters */
    for (l = 0; l < (int)strlen(symbolset); l++)
      symbolset[l] = toupper(symbolset[l]);
  }
  /* set function for optimization */
  if(args_info.function_given){
    if(strlen(args_info.function_arg) > 2){
      RNAinverse_cmdline_parser_print_help(); exit(EXIT_FAILURE);
    }
    else{
      if((*args_info.function_arg == 'm') || (*(args_info.function_arg+1) == 'm')) mfe = 1;
      if((*args_info.function_arg == 'p') || (*(args_info.function_arg+1) == 'p')) pf = 1;
    }
  }
  /* set repeat */
  if(args_info.repeat_given)      repeat = args_info.repeat_arg;
  /* set final cost */
  if(args_info.final_given)       final_cost = args_info.final_arg;
  /* do we wannabe verbose */
  if(args_info.verbose_given)     inv_verbose = 1;

  /* free allocated memory of command line data structure */
  RNAinverse_cmdline_parser_free (&args_info);

  kT = (temperature+273.15)*1.98717/1000.0;

  istty = (isatty(fileno(stdout))&&isatty(fileno(stdin)));

  if (ParamFile!=NULL)
    read_parameter_file(ParamFile);

  give_up = (repeat<0);

  do {
    /*
    ########################################################
    # handle user input from 'stdin'
    ########################################################
    */
    if(istty)
      print_tty_input_seq_str("Input structure & start string\n"
                              "(lower case letters for const positions) and 0 or empty line for random start string\n");

    input_type = get_multi_input_line(&input_string, 0);
    /* we are waiting for a structure (i.e. something like a constraint) so we skip all sequences, fasta-headers and misc lines */
    while(input_type & (VRNA_INPUT_SEQUENCE | VRNA_INPUT_MISC | VRNA_INPUT_FASTA_HEADER)){
      if(!istty && (input_type & VRNA_INPUT_FASTA_HEADER)) printf(">%s\n", input_string);
      free(input_string); input_string = NULL;
      input_type = get_multi_input_line(&input_string, 0);
    }
    if(input_type & (VRNA_INPUT_QUIT | VRNA_INPUT_ERROR)) break;

    if(input_type & (VRNA_INPUT_CONSTRAINT)){
      structure = (char *)space(sizeof(char) * (strlen(input_string) + 1));
      (void)sscanf(input_string, "%s", structure); /* scanf gets rid of trailing junk */
      length = (int)strlen(structure);
      free(input_string); input_string = NULL;
      input_type = get_multi_input_line(&input_string, VRNA_INPUT_NOSKIP_BLANK_LINES | VRNA_INPUT_NOSKIP_COMMENTS);
    }
    if(input_type & VRNA_INPUT_QUIT) break;

    start = (char *)space(sizeof(char) * (length+1));
    /* now we assume to get a sequence (input_string may be empty as well) */
    if(input_type & VRNA_INPUT_SEQUENCE){
      (void)strncpy(start, input_string, length);
      start[length] = '\0';
      free(input_string); input_string = NULL;
    }
    /* fallback to empty start sequence */
    else start[0] = '\0';

    /*
    ########################################################
    # done with 'stdin' handling
    ########################################################
    */

    if (ns_bases != NULL) {
      nonstandards = space(33);
      c=ns_bases;
      i=sym=0;
      if (*c=='-') {
        sym=1; c++;
      }
      while (*c!='\0') {
        if (*c!=',') {
          nonstandards[i++]=*c++;
          nonstandards[i++]=*c;
          if ((sym)&&(*c!=*(c-1))) {
            nonstandards[i++]=*c;
            nonstandards[i++]=*(c-1);
          }
        }
        c++;
      }
    }

    str2 = (char *) space((unsigned)length+1);
    if (istty) printf("length = %d\n", length);

    if (repeat!=0) found = (repeat>0)? repeat : (-repeat);
    else found = 1;

    /* initialize_fold(length); <- obsolete (hopefully commenting this out does not affect anything crucial ;) */

    rstart = (char *) space((unsigned)length+1);
    while(found>0) {
      char *string;
      string = (char *) space((unsigned)length+1);
      strcpy(string, start);
      for (i=0; i<length; i++) {
        /* lower case characters are kept fixed, any other character
           not in symbolset is replaced by a random character */
        if (islower(string[i])) continue;

        if (string[i]=='\0' || (strchr(symbolset,string[i])==NULL))
          string[i]=symbolset[int_urn(0,strlen(symbolset)-1)];
      }
      strcpy(rstart, string); /* remember start string */

      if (mfe) {
        energy = inverse_fold(string, structure);
        if( (repeat>=0) || (energy<=0.0) ) {
          found--;
          hd = hamming(rstart, string);
          printf("%s  %3d", string, hd);
          if (energy>0) { /* no solution found */
            printf("   d= %g\n", energy);
            if(istty) {
              energy = fold(string,str2);
              printf("%s\n", str2);
            }
          } else printf("\n");
        }
      }
      if (pf) {
        if (!(mfe && give_up && (energy>0))) {
          /* unless we gave up in the mfe part */
          double prob, min_en, sfact=1.07;

          /* get a reasonable pf_scale */
          min_en = fold(string,str2);
          pf_scale = exp(-(sfact*min_en)/kT/length);
          /* init_pf_fold(length); <- obsolete (hopefully commenting this out does not affect anything crucial ;) */

          energy = inverse_pf_fold(string, structure);
          prob = exp(-energy/kT);
          hd = hamming(rstart, string);
          printf("%s  %3d  (%g)\n", string, hd, prob);
          free_pf_arrays();
        }
        if (!mfe) found--;
      }
      (void) fflush(stdout);
      free(string);
    }
    free(rstart);
    free_arrays();

    free(structure);
    free(str2);
    free(start);
    (void) fflush(stdout);
  } while (1);
  return 0;
}
示例#4
0
int main(int argc, char *argv[])
{
    char *start;
    char *structure;
    char *rstart;
    char *str2;
    char *line;
    int i;
    int length;
    int l;
    int hd;
    double energy = 0.;
    double kT;
    int   pf = 0;
    int   mfe = 0;
    int   istty;
    int   repeat; 
    int   found;
    
    AjPFile inf     = NULL;
    AjPSeq  seq = NULL;
    AjPFile paramfile = NULL;
    AjPFile outf = NULL;
    
    float eT = 0.;
    AjBool eGU;
    
    AjBool eclose;
    AjBool lonely;
    AjBool etloop;
    AjPStr eenergy = NULL;
    char ewt = '\0';
    AjPStr edangles = NULL;
    AjPStr method   = NULL;
    AjPStr ealpha    = NULL;
    AjBool showfails = ajFalse;
    AjBool succeed = ajFalse;
    
    char edangle = '\0';
    
    ajint len;
    FILE *fp;
    
    
    
    embInitPV("vrnainverse",argc,argv,"VIENNA",VERSION);
    
    
    inf        = ajAcdGetInfile("structuresfile");
    seq        = ajAcdGetSeq("sequence");
    paramfile  = ajAcdGetInfile("paramfile");
    eT         = ajAcdGetFloat("temperature");
    eGU        = ajAcdGetBoolean("gu");
    eclose     = ajAcdGetBoolean("closegu");
    lonely     = ajAcdGetBoolean("lp");
    etloop     = ajAcdGetBoolean("tetraloop");
    eenergy    = ajAcdGetListSingle("energy");
    edangles   = ajAcdGetListSingle("dangles");
    method     = ajAcdGetListSingle("folding");
    ealpha     = ajAcdGetString("alphabet");
    final_cost = ajAcdGetFloat("final");
    repeat     = ajAcdGetInt("repeats");
    showfails  = ajAcdGetBoolean("showfails");
    succeed    = ajAcdGetBoolean("succeed");
    outf       = ajAcdGetOutfile("outfile");
    
    
    do_backtrack = 0; 
    structure = NULL;
    istty = 0;
    
    temperature   = (double) eT;
    noGU          = (eGU) ? 0 : 1;
    no_closingGU  = (eclose) ? 0 : 1;
    noLonelyPairs = (lonely) ? 0 : 1;
    tetra_loop    = !!etloop;
    
    ewt = *ajStrGetPtr(eenergy);
    if(ewt == '0')
	energy_set = 0;
    else if(ewt == '1')
	energy_set = 1;
    else if(ewt == '2')
	energy_set = 2;
    
    edangle = *ajStrGetPtr(edangles);
    if(edangle == '0')
	dangles = 0;
    else if(edangle == '1')
	dangles = 1;
    else if(edangle == '2')
	dangles = 2;
    else if(edangle == '3')
	dangles = 3;
    
    if(ajStrMatchC(method,"mp"))
    {
	mfe = 1;
	pf  = 1;
    }
    else if(ajStrMatchC(method,"m"))
    {
	mfe = 1;
	pf  = 0;
    }
    else if(ajStrMatchC(method,"p"))
    {
	mfe = 0;
	pf  = 1;
    }
    
    len = ajStrGetLen(ealpha);
    symbolset = (char *) space(len + 1);
    strcpy(symbolset, ajStrGetPtr(ealpha));
    for (l = 0; l < len; l++)
	symbolset[l] = toupper(symbolset[l]);
    
    inv_verbose = !!showfails;
    fp = ajFileGetFileptr(inf);
    
    init_rand();
    kT = (temperature+273.15)*1.98717/1000.0;
    
    istty = (isatty(fileno(stdout))&&isatty(fileno(stdin)));
    
    if (paramfile)
	read_parameter_file(paramfile);
    
    give_up = succeed;
    
    do {
      
	if ((line = get_line(fp))==NULL) break;

	/* read structure, skipping over comment lines */
	while ((*line=='*')||(*line=='\0')||(*line=='>'))
	{
	    free(line);
	    if ((line = get_line(fp))==NULL)
		break;
	} 
	/* stop at eof or '@' */
	if (line==NULL) break;
	if (strcmp(line, "@") == 0)
	{
	    free(line);
	    break;
	}

	structure = (char *) space(strlen(line)+1);
	/* scanf gets rid of trailing junk */
	(void) sscanf(line,"%s",structure);
	free(line);
      
	length = (int) strlen(structure);
	str2 = (char *) space((unsigned)length+1);

/* now look for a sequence to match the structure */

/*
	if ((line = get_line(fp))!=NULL)
	    if (strcmp(line, "@") == 0)
	    {
		free(line);
		break;
	    }
*/

	start = (char *) space((unsigned) length+1);
	if(seq)
	    (void) strncpy(start, ajSeqGetSeqC(seq), length);

	if (repeat!=0)
	    found = repeat;
	else
	    found = 1;
      
	initialize_fold(length);

	rstart = (char *) space((unsigned)length+1);
	while(found>0)
	{
	    char *string;
	    string = (char *) space((unsigned)length+1);
	    strcpy(string, start);
	    for (i=0; i<length; i++)
	    {
		/* lower case characters are kept fixed, any other character
		   not in symbolset is replaced by a random character */
		if (islower(string[i]))
		    continue;

		if (string[i]=='\0' || (strchr(symbolset,string[i])==NULL))
		    string[i]=symbolset[int_urn(0,strlen(symbolset)-1)];
	    }
	    strcpy(rstart, string);	/* remember start string */
	
	    if (mfe)
	    {
		energy = inverse_fold(string, structure);
		if( (!succeed) || (energy<=0.0) ) {
		    found--;
		    hd = hamming(rstart, string);
		    ajFmtPrintF(outf,"%s  %3d", string, hd);
		    if (energy>0)
		    {			/* no solution found */
			ajFmtPrintF(outf,"   d = %f\n", energy);
		    }
		    else
			ajFmtPrintF(outf,"\n");
		}
	    }

	    if (pf)
	    {
		if (!(mfe && give_up && (energy>0)))
		{
		    /* unless we gave up in the mfe part */
		    double prob, min_en, sfact=1.07;
	    
		    /* get a reasonable pf_scale */
		    min_en = fold(string,str2); 
		    pf_scale = exp(-(sfact*min_en)/kT/length);
		    init_pf_fold(length);
	    
		    energy = inverse_pf_fold(string, structure);
		    prob = exp(-energy/kT);
		    hd = hamming(rstart, string);
		    ajFmtPrintF(outf,"%s  %3d  (%f)\n", string, hd, prob);
		    free_pf_arrays();
		}
		if (!mfe)
		    found--;
	    }

	    free(string);
	}
	free(rstart);
	free_arrays();
      
	free(structure);
	free(str2);
	free(start);

    } while (1);

    ajSeqDel(&seq);
    ajStrDel(&eenergy);
    ajStrDel(&edangles);
    ajStrDel(&method);
    ajStrDel(&ealpha);

    ajFileClose(&inf);
    ajFileClose(&paramfile);
    ajFileClose(&outf);
    AJFREE(symbolset);

    embExit();
    return 0;
}