Пример #1
0
/*************************************************************************
 * Entry point for centrimo
 *************************************************************************/
int main(int argc, char *argv[]) {
  CENTRIMO_OPTIONS_T options;
  SEQ_SITES_T seq_sites;
  SITE_COUNTS_T counts;
  int seqN, motifN, seqlen, db_i, motif_i, i;
  double log_pvalue_thresh;
  SEQ_T** sequences = NULL;
  ARRAY_T* bg_freqs = NULL;
  ARRAYLST_T *stats_list;
  MOTIF_DB_T **dbs, *db;
  MREAD_T *mread;
  MOTIF_STATS_T *stats;
  MOTIF_T *motif, *rev_motif;
  PSSM_T *pos_pssm, *rev_pssm;
  char *sites_path, *desc;
  FILE *sites_file;
  HTMLWR_T *html;
  JSONWR_T *json;

  // COMMAND LINE PROCESSING
  process_command_line(argc, argv, &options);

  // load the sequences
  read_sequences(options.alphabet, options.seq_source, &sequences, &seqN);
  seqlen = (seqN ? get_seq_length(sequences[0]) : 0);
  // calculate a sequence background (unless other background is given)
  if (!options.bg_source) {
    bg_freqs = calc_bg_from_fastas(options.alphabet, seqN, sequences);
  }

  // load the motifs
  motifN = 0;
  dbs = mm_malloc(sizeof(MOTIF_DB_T*) * arraylst_size(options.motif_sources));
  for (i = 0; i < arraylst_size(options.motif_sources); i++) {
    char* db_source;
    db_source = (char*)arraylst_get(i, options.motif_sources);
    dbs[i] = read_motifs(i, db_source, options.bg_source, &bg_freqs, 
        options.pseudocount, options.selected_motifs, options.alphabet);
    motifN += arraylst_size(dbs[i]->motifs);
  }
  log_pvalue_thresh = log(options.evalue_thresh) - log(motifN);
  // Setup some things for double strand scanning
  if (options.scan_both_strands == TRUE) {
    // Set up hash tables for computing reverse complement
    setup_hash_alph(DNAB);
    setalph(0);
    // Correct background by averaging on freq. for both strands.
    average_freq_with_complement(options.alphabet, bg_freqs);
    normalize_subarray(0, alph_size(options.alphabet, ALPH_SIZE), 0.0, bg_freqs);
    calc_ambigs(options.alphabet, FALSE, bg_freqs);
  }
  // Create output directory
  if (create_output_directory(options.output_dirname, options.allow_clobber, 
        (verbosity >= NORMAL_VERBOSE))) {
    die("Couldn't create output directory %s.\n", options.output_dirname);
  }
  // open output files
  sites_path = make_path_to_file(options.output_dirname, SITES_FILENAME);
  sites_file = fopen(sites_path, "w");
  free(sites_path);
  // setup html monolith writer
  json = NULL;
  if ((html = htmlwr_create(get_meme_etc_dir(), TEMPLATE_FILENAME))) {
    htmlwr_set_dest_name(html, options.output_dirname, HTML_FILENAME);
    htmlwr_replace(html, "centrimo_data.js", "data");
    json = htmlwr_output(html);
    if (json == NULL) die("Template does not contain data section.\n");
  } else {
    DEBUG_MSG(QUIET_VERBOSE, "Failed to open html template file.\n");
  }
  if (json) {
    // output some top level variables
    jsonwr_str_prop(json, "version", VERSION);
    jsonwr_str_prop(json, "revision", REVISION);
    jsonwr_str_prop(json, "release", ARCHIVE_DATE);
    jsonwr_str_array_prop(json, "cmd", argv, argc);
    jsonwr_property(json, "options");
    jsonwr_start_object_value(json);
    jsonwr_dbl_prop(json, "motif-pseudo", options.pseudocount);
    jsonwr_dbl_prop(json, "score", options.score_thresh);
    jsonwr_dbl_prop(json, "ethresh", options.evalue_thresh);
    jsonwr_lng_prop(json, "maxbin", options.max_window+1);
    jsonwr_bool_prop(json, "norc", !options.scan_both_strands);
    jsonwr_bool_prop(json, "noflip", options.no_flip);
    jsonwr_end_object_value(json);
    // output the description
    desc = prepare_description(&options);
    if (desc) {
      jsonwr_str_prop(json, "job_description", desc);
      free(desc);
    }
    // output size metrics
    jsonwr_lng_prop(json, "seqlen", seqlen);
    jsonwr_lng_prop(json, "tested", motifN);
    // output the fasta db
    jsonwr_property(json, "sequence_db");
    jsonwr_start_object_value(json);
    jsonwr_str_prop(json, "source", options.seq_source);
    jsonwr_lng_prop(json, "count", seqN);
    jsonwr_end_object_value(json);
    // output the motif dbs
    jsonwr_property(json, "motif_dbs");
    jsonwr_start_array_value(json);
    for (db_i = 0; db_i < arraylst_size(options.motif_sources); db_i++) {
      db = dbs[db_i];
      jsonwr_start_object_value(json);
      jsonwr_str_prop(json, "source", db->source);
      jsonwr_lng_prop(json, "count", arraylst_size(db->motifs));
      jsonwr_end_object_value(json);
    }
    jsonwr_end_array_value(json);
    // start the motif array
    jsonwr_property(json, "motifs");
    jsonwr_start_array_value(json);
  }
  /**************************************************************
   * Tally the positions of the best sites for each of the 
   * selected motifs.
   **************************************************************/
  // prepare the sequence sites
  memset(&seq_sites, 0, sizeof(SEQ_SITES_T));
  // prepare the site counts
  counts.allocated = ((2 * seqlen) - 1);
  counts.sites = mm_malloc(sizeof(double) * counts.allocated);
  // prepare the motifs stats list
  stats_list = arraylst_create();
  // prepare the other vars
  motif = NULL; pos_pssm = NULL; rev_motif = NULL; rev_pssm = NULL;
  for (db_i = 0; db_i < arraylst_size(options.motif_sources); db_i++) {
    db = dbs[db_i];
    for (motif_i = 0; motif_i < arraylst_size(db->motifs); motif_i++) {
      motif = (MOTIF_T *) arraylst_get(motif_i, db->motifs);
      DEBUG_FMT(NORMAL_VERBOSE, "Using motif %s of width %d.\n",  
          get_motif_id(motif), get_motif_length(motif));
      // reset the counts
      for (i = 0; i < counts.allocated; i++) counts.sites[i] = 0;
      counts.total_sites = 0;
      // create the pssm 
      pos_pssm = make_pssm(bg_freqs, motif);
      // If required, do the same for the reverse complement motif.
      if (options.scan_both_strands) {
        rev_motif = dup_rc_motif(motif);
        rev_pssm = make_pssm(bg_freqs, rev_motif);
      }
      // scan the sequences
      for (i = 0; i < seqN; i++)
        score_sequence(&options, sequences[i], pos_pssm, rev_pssm, 
            &seq_sites, &counts);
      // DEBUG check that the sum of the sites is close to the site count
      double sum_check = 0, sum_diff;
      for (i = 0; i < counts.allocated; i++) sum_check += counts.sites[i];
      sum_diff = counts.total_sites - sum_check;
      if (sum_diff < 0) sum_diff = -sum_diff;
      if (sum_diff > 0.1) {
        fprintf(stderr, "Warning: site counts don't sum to accurate value! "
            "%g != %ld", sum_check, counts.total_sites);
      }
      // output the plain text site counts
      output_site_counts(sites_file, seqlen, db, motif, &counts);
      // compute the best central window
      stats = compute_stats(options.max_window, seqlen, db, motif, &counts);
      // check if it passes the threshold
      if (json && stats->log_adj_pvalue <= log_pvalue_thresh) {
        output_motif_json(json, stats, &counts);
        arraylst_add(stats, stats_list);
      } else {
        free(stats);
      }
      // Free memory associated with this motif.
      free_pssm(pos_pssm);
      free_pssm(rev_pssm);
      destroy_motif(rev_motif);
    }
  }
  if (json) jsonwr_end_array_value(json);
  // finish writing sites
  fclose(sites_file);
  // finish writing html file
  if (html) {
    if (htmlwr_output(html) != NULL) {
      die("Found another JSON replacement!\n");
    }
    htmlwr_destroy(html);
  }
  // write text file
  output_centrimo_text(&options, motifN, stats_list);
  // Clean up.
  for (i = 0; i < seqN; ++i) {
    free_seq(sequences[i]); 
  }
  free(sequences);
  for (i = 0; i < arraylst_size(options.motif_sources); i++) {
    free_db(dbs[i]);
  }
  free(dbs);
  free_array(bg_freqs);
  free(counts.sites);
  free(seq_sites.sites);
  arraylst_destroy(free, stats_list);
  cleanup_options(&options);
  return 0;

}
Пример #2
0
int main(int argc, char *argv[]) {
	set_new_handler(alloc_error);

	if(argc < 6) {
		print_usage(cout);
		exit(0);
	}
	
	string seqfile;                       // file with sequences
	string exprfile;                      // file with expression data
	string subsetfile;                    // file with subset of sequence names to search
	string scorefile;                     // file with scores

	if(! GetArg2(argc, argv, "-s", seqfile)) {
		cerr << "Please specify sequence file\n\n";
		print_usage(cout);
		exit(0);
	}
	if(! GetArg2(argc, argv, "-o", outfile)) {
		cerr << "Please specify output file\n\n";
		print_usage(cout);
		exit(0);
	}
	
	int search_type = UNDEFINED;
	if(GetArg2(argc, argv, "-ex", exprfile)) {
		search_type = EXPRESSION;
	}
	if(GetArg2(argc, argv, "-su", subsetfile)) {
		search_type = SUBSET;
	}
	if(GetArg2(argc, argv, "-sc", scorefile)) {
		search_type = SCORE;
	}
	if(search_type == UNDEFINED) {
		cerr << "Please specify either an expression data file, a file with a subset of sequence names, or a file with sequence scores.\n\n";
		print_usage(cout);
		exit(0);
	}

	// Decide mode of running
	archive = false;
	if(! GetArg2(argc, argv, "-worker", worker)) {
		worker = -1;
		archive = true;
	}
	
	// Read parameters
	vector<string> seqs;
	cerr << "Reading sequence data from '" << seqfile << "'... ";
	get_fasta_fast(seqfile.c_str(), seqs, seq_nameset);
	cerr << "done.\n";
	ngenes = seq_nameset.size();
	
	npoints = 0;
	if(search_type == EXPRESSION) {
		cerr << "Reading expression data from '" << exprfile << "'... ";
		get_expr(exprfile.c_str(), expr, data_nameset);
		cerr << "done.\n";
		npoints = expr[0].size();
		nsubset = 0;
	} else if(search_type == SUBSET) {
		cerr << "Reading subset of sequences to search from '" << subsetfile << "'... ";
		get_list(subsetfile.c_str(), subset);
		cerr << "done.\n";
		npoints = 0;
		for(int i = 0; i < ngenes; i++) {
			vector<float> row(0);
			expr.push_back(row);
		}
		nsubset = subset.size();
		sort(subset.begin(), subset.end());
	} else if(search_type == SCORE) {
		cerr << "Reading sequence scores from '" << scorefile << "'... ";
		get_scores(scorefile.c_str(), scores, data_nameset);
		cerr << "done.\n";
		npoints = 1;
		nsubset = 0;
	}

	vector<vector <float> > newexpr;
	vector<float> newscores;
	if(search_type == EXPRESSION) {
		order_data_expr(newexpr);
	}
	if(search_type == SCORE) {
		order_data_scores(newscores);
	}

	if(search_type == EXPRESSION) {
		cerr << "Successfully read input files -- dataset size is " << ngenes << " sequences X " << npoints << " timepoints\n";
	} else if(search_type == SUBSET) {
		cerr << "Successfully read input files -- dataset size is " << ngenes << " sequences, with " << nsubset << " to be searched\n";
	} else if(search_type == SCORE) {
		cerr << "Successfully read input files -- dataset size is " << ngenes << " scored sequences\n";
	}

	cerr << "Setting up MotifSearch... ";
	if(! GetArg2(argc, argv, "-numcols", ncol)) ncol = 10;
	if(! GetArg2(argc, argv, "-order", order)) order = 0;
	if(! GetArg2(argc, argv, "-simcut", simcut)) simcut = 0.8;
	if(! GetArg2(argc, argv, "-maxm", maxm)) maxm = 20;
	MotifSearch* ms;
	if(search_type == EXPRESSION) {
		ms = new MotifSearchExpr(seq_nameset, seqs, ncol, order, simcut, maxm, newexpr, npoints);
	} else if(search_type == SCORE) {
		ms = new MotifSearchScore(seq_nameset, seqs, ncol, order, simcut, maxm, newscores);
	} else {
		ms = new MotifSearchSubset(seq_nameset, seqs, ncol, order, simcut, maxm, subset);
	}
	ms->modify_params(argc, argv);
	ms->set_final_params();
	ms->ace_initialize();
	cerr << "done.\n";
	cerr << "Random seed: " << ms->get_params().seed << '\n';

	if(archive) {
		cerr << "Running in archive mode...\n";
		string archinstr(outfile);
		archinstr.append(".ms");
		ifstream archin(archinstr.c_str());
		if(archin) {
			cerr << "Refreshing from existing archive file " << archinstr << "... ";
			ms->get_archive().read(archin);
			cerr << "done.\n";
		}
		while(true) {
			int found = read_motifs(ms);
			if(found > 0) output(ms);
			if(found < 50) sleep(10);
		}
	} else {
		cerr << "Running as worker " << worker << "...\n";
		int nruns = ms->positions_in_search_space()/(ms->get_params().expect * ncol);
		nruns *= ms->get_params().oversample;
		nruns /= ms->get_params().undersample;
		cerr << "Restarts planned: " << nruns << '\n';
		string archinstr(outfile);
		archinstr.append(".ms");
		string lockstr(outfile);
		lockstr.append(".lock");
		for(int j = 1; j <= nruns; j++) {
			if(j == 1 || j % 50 == 0 || search_type == SUBSET) {
				struct flock fl;
				int fd;
				fl.l_type   = F_RDLCK;
				fl.l_whence = SEEK_SET;
				fl.l_start  = 0;
				fl.l_len    = 0;
				fl.l_pid    = getpid();
				fd = open(lockstr.c_str(), O_RDONLY);
				if(fd == -1) {
					if(errno != ENOENT)
						cerr << "\t\tUnable to read lock file, error was " << strerror(errno) << "\n";
				} else {
					while(fcntl(fd, F_SETLK, &fl) == -1) {
						cerr << "\t\tWaiting for lock release on archive file... \n";
						sleep(10);
					}
					ifstream archin(archinstr.c_str());
					if(archin) {
						cerr << "\t\tRefreshing archive from " << archinstr << "...";
						ms->get_archive().clear();
						ms->get_archive().read(archin);
						archin.close();
						cerr << "done.\n";
					}
					fl.l_type = F_UNLCK;
					fcntl(fd, F_SETLK, &fl);
					close(fd);
					cerr << "\t\tArchive now has " << ms->get_archive().nmots() << " motifs\n";
				}
			}
			cerr << "\t\tSearch restart #" << j << "/" << nruns << "\n";
			ms->search_for_motif(worker, j, outfile);
		}
	}
	delete ms;
	return 0;
}
Пример #3
0
int main(int argc, char ** argv) {
    int      i ;
    
    char     *seedfile ;
    char     *rna_fastafile ;
    char     *expfile ;
    char     *dataoutfile ;
    char     *motifoutfile ;
    char     *location ;
    char     *icshape_file;  //new line
    
    int      dooptimize = 1 ;
    int      doonlypositive = 0 ;
    
    float    *E ;
    int      *E_q ;
    int      *M_q ;
    
    struct   my_hsearch_data *h_rna_ind ;
    ENTRY    e ;
    ENTRY*   ep ;
    int      hashret =0 ;
    
    int      quantized = 1 ;
    int      ebins = 0 ;
    int      mbins = 2 ;
    int      divbins = 50 ;
    float*   E_q_bins = 0 ;
    
    int      shuffle = 1000000 ;
    
    int      rnd_fasta = 0 ;
    float    max_p = 0.0000001 ;
    float    max_z = -100 ;
    
    int      mincount = 5 ;
    
    double   minr = 5.0 ;
    double   minratio = 0;
    int      midx;
    
    float    maxfreq = 0.5;
    float    myfreq;
    float    lastmyfreq;
    float    best_lastmyfreq;
    
    int      jn   = 10;
    int      jn_t = 6;
    int      jn_f = 3;
    
    s_sequence **sequences ;
    s_motif  **motifs ;
    s_motif  **opt_motifs ;
    
    char     **seq_names ;
    int      seq_count ;
    int      t_seq_count ;
    int      icshape_count;      //new line
    int      motif_count = 0 ;
    
    seedfile         = get_parameter(argc, argv, "-seedfile") ;
    rna_fastafile    = get_parameter(argc, argv, "-rna_fastafile") ;
    dataoutfile      = get_parameter(argc, argv, "-dataoutfile") ;
    motifoutfile     = get_parameter(argc, argv, "-motifoutfile") ;
    location         = get_parameter(argc, argv, "-location") ;
    icshape_file       = get_parameter(argc, argv, "-icshapefile");    //new line
    
    expfile          = get_parameter(argc, argv, "-expfile") ;
    quantized        = atoi(get_parameter(argc, argv, "-quantized"));
    
    if (exist_parameter(argc, argv, "-max_p")) {
        max_p          = atof(get_parameter(argc, argv, "-max_p"));
    }
    if (exist_parameter(argc, argv, "-max_z")) {
        max_z          = atof(get_parameter(argc, argv, "-max_z"));
    }
    if (exist_parameter(argc, argv, "-minr")) {
        minr           = atof(get_parameter(argc, argv, "-minr"));
    }
    if (exist_parameter(argc, argv, "-ebins")) {
        ebins          = atoi(get_parameter(argc, argv, "-ebins"));
    }
    if (exist_parameter(argc, argv, "-jn_t")) {
        jn_t           = atoi(get_parameter(argc, argv, "-jn_t"));
    }
    if (exist_parameter(argc, argv, "-shuffle")) {
        shuffle        = atoi(get_parameter(argc, argv, "-shuffle"));
    }
    if (exist_parameter(argc, argv, "-mincount")) {
        mincount       = atoi(get_parameter(argc, argv, "-mincount"));
    }
    if (exist_parameter(argc, argv, "-dooptimize")) {
        dooptimize     = atoi(get_parameter(argc, argv, "-dooptimize"));
    }
    
    if (exist_parameter(argc, argv, "-doonlypositive")) {
        doonlypositive = atoi(get_parameter(argc, argv, "-doonlypositive"));
    }
    
    FILE *f, *fmotif ;
    FILE *fptr = fopen ( seedfile, "rb") ;
    if (!fptr){
        printf("Could not open the seed file: %s\n", seedfile) ;
        exit(0) ;
    }
    
    motif_count = read_motifs( fptr, &motifs ) ;
    printf("%d seeds were loaded...\n", motif_count) ;
    fflush(stdout) ;
    fclose(fptr) ;
    
    /*Read Fasta here*/
    t_seq_count = read_FASTA ( rna_fastafile, &sequences, rnd_fasta) ;
    printf("%d sequences loaded...\n", t_seq_count) ;
    fflush(stdout) ;
    /*we should read icSHAPE Value here*/
    printf("Read icSHAPE Begin.....\n");    //new line
    icshape_count = read_icSHAPE(icshape_file,sequences,t_seq_count); //new line
    printf("%d icSHAPE Values loaded",icshape_count); //new line
    fflush(stdout); //new line
    
    E = read_expfile (expfile, sequences, t_seq_count, &seq_names, &seq_count) ;
    printf("Expfile loaded: %d values...\n", seq_count) ;
    fflush(stdout) ;
    if ((quantized == 0) && (ebins == 0)) {
        ebins = (int)(0.5 + (float)seq_count / ( divbins * mbins ));
    }
    
    if (quantized == 0) {
        printf("Adding small values...\n") ;
        add_small_values_to_identical_floats(E, seq_count);
    }
    
    printf("Quantizing the input vector...") ;
    E_q  = (int*)malloc((seq_count) * sizeof(int)) ;
    quantize_E(E, seq_count, quantized, &ebins, &E_q, &E_q_bins);
    printf("Done\n") ;
    fflush(stdout) ;
    
    h_rna_ind = (struct my_hsearch_data*)calloc(1,  sizeof(struct my_hsearch_data));
    hashret = my_hcreate_r(100000, h_rna_ind);
    if (hashret == 0) {
        printf("main: couldn't make the hashtable...\n");
        exit(0);
    }
    for (i=0 ; i<seq_count ; i++){
        e.key  = strdup(seq_names[i]) ;
        e.data = (char*) i ;
        hashret = my_hsearch_r(e, ENTER, &ep, h_rna_ind);
        if (hashret == 0){
            printf("main: couldn't add the data to hashtable...\n");
            exit(0);
        }
    }
    
    int opt_count=0 ;
    int hits =0;
    float init_best_mymi = 0 ;
    opt_motifs = (s_motif**) malloc (motif_count*sizeof(s_motif*)) ;
    int *jnres = (int*) malloc (motif_count*sizeof(int)) ;
    for (i=0 ; i<motif_count ; i++){
        M_q = get_motif_profile (motifs[i], sequences, t_seq_count, h_rna_ind, &hits) ;
        
        //  check how much information it adds to the previous guys
        if (opt_count > 0){
            minratio = minCondInfoNormalized(opt_motifs, mbins, opt_count, M_q, mbins, E_q, ebins, seq_count, minr, &midx, sequences, t_seq_count, h_rna_ind) ;
        }else{
            minratio = minr+1 ;
        }
        if (minratio < minr){
            free(M_q) ;
            printf("seed %d killed by motif %d.\n", i, midx) ;
            continue ;
        }else{
            printf("optimizing.\n") ;
        }
        
        if (doonlypositive==1){
            float r  = pearson_int (M_q, E_q, seq_count) ;
            if (r<0){
                free(M_q) ;
                printf("seed %d killed due to negative association (pearson=%4.3f)\n", i, r) ;
                continue ;
            }
        }
        
        lastmyfreq = (float)(hits) / seq_count ;
        best_lastmyfreq = lastmyfreq;
        
        s_motif *bestmotif = copy_motif(motifs[i]) ;
        if (dooptimize==1){
            //  initial mi value
            init_best_mymi = CalculateMIbasic(M_q, E_q, seq_count, mbins, ebins) ;
            printf("Initial MI = %3.4f\n", init_best_mymi) ;
            
            // create a random index
            int *k_shu ;
            int *k_inc = (int*) malloc (sizeof(int)*motifs[i]->num_phrases) ;
            int k ;
            for (k=0; k<motifs[i]->num_phrases; k++) {
                k_inc[k] = k;
            }
            k_shu = shuffleInt(k_inc, motifs[i]->num_phrases);
            
            // optimize motif
            printf("Optimzing the sequence of motif %d/%d...\n", i+1, motif_count) ;
            float bestmi = init_best_mymi ;
            printf("initial motif (mi = %3.3f): %s\n", bestmi, print_motif_to_char(bestmotif)) ;
            for (k=0 ; k<motifs[i]->num_phrases ; k++){
                int pos = k_shu[k] ;
                s_motif *modified_motifs [15] ;
                modify_base( bestmotif, modified_motifs, pos ) ;
                int j=0 ;
                for (j=0 ; j<15 ; j++){
                    int h ;
                    int *M_q_t = get_motif_profile (modified_motifs[j], sequences, t_seq_count, h_rna_ind, &h) ;
                    myfreq = (float)(h) / seq_count ;
                    float tempmi = CalculateMIbasic(M_q_t, E_q, seq_count, mbins, ebins) ;
                    if (tempmi>bestmi && h>10 && (myfreq<maxfreq || myfreq<lastmyfreq)){
                        free(bestmotif->phrases) ;
                        free(bestmotif) ;
                        bestmotif = copy_motif(modified_motifs[j]) ;
                        bestmi = tempmi ;
                        lastmyfreq = myfreq ;
                        printf("new motif (mi = %3.3f): %s\n", bestmi, print_motif_to_char(bestmotif)) ;
                    }
                    free (M_q_t) ;
                }
            }
            
            
            /* Elongating the motif here */
            printf("Elongating the motif...\n") ;
            float premi = bestmi ;
            do{
                s_motif *modified_motifs [46] ;
                elongate_motif( bestmotif, modified_motifs) ;
                int j ;
                for (j=0 ; j<46 ; j++){
                    int h ;
                    int *M_q_t = get_motif_profile (modified_motifs[j], sequences, t_seq_count, h_rna_ind, &h) ;
                    float tempmi = CalculateMIbasic(M_q_t, E_q, seq_count, mbins, ebins) ;
                    if (tempmi>bestmi && h>10){
                        free(bestmotif->phrases) ;
                        free(bestmotif) ;
                        bestmotif = copy_motif(modified_motifs[j]) ;
                        bestmi = tempmi ;
                        printf("new motif (mi = %3.3f): %s\n", bestmi, print_motif_to_char(bestmotif)) ;
                    }
                    free(M_q_t) ;
                }
            } while (premi >= bestmi && bestmotif->num_phrases > motifs[i]->num_phrases) ;
        }
        free(M_q) ;
        M_q = get_motif_profile (bestmotif, sequences, t_seq_count, h_rna_ind, &hits) ;
        float mi = CalculateMIbasic(M_q, E_q, seq_count, mbins, ebins) ;
        float z = teiser_z_score_test(mi, M_q, mbins, E_q, ebins, seq_count, 10000) ;
        printf("Final z-score = %4.3f (threshold=%3.3f)\n", z, max_z) ;
        if (z>max_z){
            printf("z-score passed the test\n.") ;
            int jn_test = teiser_jn_max_rank_test(M_q, mbins, E_q, ebins, seq_count, shuffle/10, max_p, jn, jn_f) ;
            if (jn_test>=jn_t){
                opt_motifs[opt_count] = copy_motif(bestmotif) ;
                jnres[opt_count] = jn_test ;
                opt_count++ ;
            }else{
                printf("robustness=%d/%d\n.", jn_test, jn_t) ;
            }
        }
        
        free(bestmotif->phrases) ;
        free(bestmotif) ;
        free(M_q) ;
    }
    
    f      = fopen(dataoutfile, "w") ;
    fmotif = fopen(motifoutfile, "wb") ;
    if (!f || !fmotif)
        die("Cannot open datafile\n");
    
    fprintf(f, "index\tlocation\tmotif-seq\tmotif_structure\tmi-value\tseq mi-value\tfrequency\tz-score\trobustness\tp-value\n") ;
    for (i=0 ; i<opt_count ; i++){
        int *M_qs = get_motif_profile_seq_only (opt_motifs[i], sequences, t_seq_count, h_rna_ind, &hits) ;
        float mis = CalculateMIbasic(M_qs, E_q, seq_count, mbins, ebins) ;
        
        M_q = get_motif_profile (opt_motifs[i], sequences, t_seq_count, h_rna_ind, &hits) ;
        float mi = CalculateMIbasic(M_q, E_q, seq_count, mbins, ebins) ;
        double pass = evalSeed(M_q, seq_count, mi, mbins, E_q, ebins, shuffle) ;
        float z = teiser_z_score_test(mi, M_q, mbins, E_q, ebins, seq_count, 10000) ;
        
        char p_val[100] ;
        if (pass == 0){
            sprintf(p_val, "<%.2e", 1.0/shuffle) ;
        }else{
            sprintf(p_val, "<%.6e", pass) ;
        }
        
        printf("motif %d/%d\t%s\tmi=%3.6f\thits=%d\t%3.4f\n", i+1, opt_count, print_motif_to_char(opt_motifs[i]), mi, hits, z) ;
        fprintf(f, "%d\t%s\t%s\t%3.6f\t%3.6f\t%3.3f\t%3.3f\t%d/10\t%s\n", i, location, print_motif_to_char(opt_motifs[i]), mi, mis, ((float)hits)/seq_count, z, jnres[i], p_val) ;
        free(M_q) ;
        free(M_qs) ;
    }
    
    write_motifs (fmotif, opt_motifs, opt_count) ;
    
    free(E) ;
    free(E_q) ;
    fclose(fmotif) ;
    fclose(f) ;
    return (0) ;
}