示例#1
0
/******************************************************************
 * This function checks the environment variables MEME_BIN_DIRS and
 * MEME_BIN_DIR for a file with the given name. If neither of those
 * is set then it will look for the file in the compiled BIN_DIR.
 * If the file is found then an allocated path is returned to it,
 * alternatively NULL is returned if no such file exists.
 * The caller is responsible for freeing memory.
******************************************************************/
char* get_meme_bin_file(const char* file_name) {
  struct stat stat_buffer;
  const char *dirs;
  char *path;
  dirs = getenv("MEME_BIN_DIRS");
  if (dirs == NULL) dirs = getenv("MEME_BIN_DIR");
  if (dirs != NULL) {
    return get_meme_dirs_file(dirs, file_name);
  }
  path = make_path_to_file(BIN_DIR, file_name);
  if (stat(path, &stat_buffer) == 0) return path;
  free(path);
  return NULL;
}
/**************************************************************************
 * Create an output file and dump the sequence matches to file.
 **************************************************************************/
void output_sequence_matches(char *dir, int margin, int bin, double sigthresh,
    BOOLEAN_T sig_only, RBTREE_T *sequences, MOTIF_T *primary_motif,
    SECONDARY_MOTIF_T *secondary_motif, ARRAY_T **matches) {
  FILE *out;
  int file_name_len;
  char *file_path, *file_name;
  file_name = make_pattern_file_name("seqs", "txt", primary_motif, secondary_motif);
  file_path = make_path_to_file(dir, file_name);
  out = fopen(file_path, "w");
  dump_sequence_matches(out, margin, bin, sigthresh, sig_only, sequences, primary_motif, secondary_motif, matches);
  fclose(out);
  free(file_path);
  free(file_name);
}
/*
 * Echo the file to the dest in base64 with the specified mime type.
 */
static void base64enc_file(HTMLWR_T* htmlwr, char *mimetype, char *binary_source) {
  char *src_path;
  FILE *src_file;
  int32_t bits24;
  int i, c, pad;
  src_path = make_path_to_file(htmlwr->search_dir, binary_source);
  src_file = fopen(src_path, "r");
  if (src_file) {
    fprintf(htmlwr->dest, "data:%s;base64,", mimetype);
    while (!feof(src_file) && !ferror(src_file)) {
      // create a number containing 3 bytes
      bits24 = 0;
      pad = 0;
      for (i = 0; i < 3; i++) {
        c = fgetc(src_file);
        if (c == EOF) {
          pad = 3 - i;
          bits24 = bits24 << (8 * pad);
          break;
        }
        bits24 = (bits24 << 8) | c;
      }
      switch (pad) {
        case 0:
          fputc(b64_enc[(bits24 >> 18) & 0x3F], htmlwr->dest);
          fputc(b64_enc[(bits24 >> 12) & 0x3F], htmlwr->dest);
          fputc(b64_enc[(bits24 >> 6) & 0x3F], htmlwr->dest);
          fputc(b64_enc[bits24 & 0x3F], htmlwr->dest);
          break;
        case 1:
          fputc(b64_enc[(bits24 >> 18) & 0x3F], htmlwr->dest);
          fputc(b64_enc[(bits24 >> 12) & 0x3F], htmlwr->dest);
          fputc(b64_enc[(bits24 >> 6) & 0x3F], htmlwr->dest);
          fputc('=', htmlwr->dest);
          break;
        case 2:
          fputc(b64_enc[(bits24 >> 18) & 0x3F], htmlwr->dest);
          fputc(b64_enc[(bits24 >> 12) & 0x3F], htmlwr->dest);
          fputc('=', htmlwr->dest);
          fputc('=', htmlwr->dest);
          break;
      }
    }
    if (ferror(src_file)) {
      fprintf(stderr, "Error reading included file \"%s\".\n", src_path);
    }
    fclose(src_file);
  } else {
/*
 * Echo the file to the dest
 */
static void echo_file(HTMLWR_T* htmlwr, bool is_style, char *secondary_source) {
  char buffer[CHUNK];
  size_t size_read;
  char *src_path;
  FILE *src_file;
  src_path = make_path_to_file(htmlwr->search_dir, secondary_source);
  src_file = fopen(src_path, "r");
  if (src_file) {
    fprintf(htmlwr->dest, "%s\n", (is_style ? STYLE_OPEN : SCRIPT_OPEN));
    while ((size_read = fread(buffer, sizeof(char), CHUNK, src_file)) != 0) {
      fwrite(buffer, sizeof(char), size_read, htmlwr->dest);
    }
    fclose(src_file);
    fprintf(htmlwr->dest, "\n    %s", (is_style ? STYLE_CLOSE : SCRIPT_CLOSE));
  } else {
    // can't find it, issue a warning
    fprintf(stderr, "Can not open included file \"%s\".\n", src_path);
  }
  free(src_path);
}
示例#5
0
文件: centrimo.c 项目: CPFL/gmeme
/*************************************************************************
 * Output CentriMo text output
 *************************************************************************/
static void output_centrimo_text(CENTRIMO_OPTIONS_T *options, int motifN,
    ARRAYLST_T *stats_list) {
  MOTIF_STATS_T *stats;
  char *file_path;
  int i, pad;
  double log_motifN;
  FILE *text_file;
  MOTIF_T *motif;
  // find the evalue conversion factor
  log_motifN = log(motifN);
  // Sort and write centrimo.txt
  arraylst_qsort(motif_stats_compar, stats_list);
  // open centrimo text file
  file_path = make_path_to_file(options->output_dirname, TEXT_FILENAME);
  text_file = fopen(file_path, "w");
  free(file_path);
  fputs("# motif             \tE-value\tadj_p-value\tlog_adj_p-value\t"
      "bin_width\ttotal_width\tsites_in_bin\ttotal_sites\tp_success\t"
      "p-value\tmult_tests\n", text_file);
  fprintf(text_file, "# Found %d motifs with E-values <= %g\n", 
      arraylst_size(stats_list), options->evalue_thresh);
  // write centrimo text output
  for (i = 0; i < arraylst_size(stats_list); i++) {
    stats = arraylst_get(i, stats_list);
    motif = stats->motif;
    pad = 19 - strlen(get_motif_id(motif));
    fprintf(text_file, "%s %-*s\t", get_motif_id(motif), 
        pad, get_motif_id2(motif));
    print_log_value(text_file, stats->log_adj_pvalue + log_motifN, 1);
    fputs("\t", text_file);
    print_log_value(text_file, stats->log_adj_pvalue, 1);
    fprintf(text_file, "\t%.2f\t%d\t%d\t%.0f\t%ld\t%.5f\t", 
        stats->log_adj_pvalue, stats->central_window+1, stats->all_window, 
        stats->central_sites, stats->total_sites, 
        stats->central_prob);
    print_log_value(text_file, stats->log_pvalue, 1);
    fprintf(text_file, "\t%d\n", stats->n_win_tested);
  }
  fclose(text_file);
}
示例#6
0
int main(int argc, char* argv[]) {

  MCAST_OPTIONS_T options;
  process_command_line(argc, argv, &options);

  //
  // Create output directory
  //
  if (create_output_directory(
        options.output_dirname,
        options.allow_clobber,
        FALSE /* Don't print warning messages */
      ) != 0) {
    // Failed to create output directory.
    die("Couldn't create output directory %s.\n", options.output_dirname);
  }

  //
  // If needed, convert motif file to MEME format using transfac2meme.
  //
  char *motif_basename = basename(options.motif_filename); // Using GNU basename
  if (options.motif_format == TRANSFAC_FORMAT) {
    // Build the name for the new MEME format file in the output directory.
    char *meme_filename = concat_string(motif_basename, ".meme");
    char *meme_path = make_path_to_file(options.output_dirname, meme_filename);
    myfree(meme_filename);
    run_transfactomeme(
        options.motif_filename,
        meme_path,
        options.bg_filename
      );
    // Replace motif file name with new name.
    options.motif_filename = meme_path;
  }

  //
  // Build the HMM using mhmm.
  //
  char *hmm_basename = concat_string(motif_basename, ".mhmm");
  char *hmm_path = make_path_to_file(options.output_dirname, hmm_basename);
  myfree(hmm_basename);
  run_mhmm(options.motif_filename, hmm_path);

  //
  // Read and score the sequences using mhmmscan.
  //
  char *score_path = make_path_to_file(
    options.output_dirname, 
    options.text_only == TRUE ? "mcast.txt" : "mcast.html"
  );
  run_mhmmscan(&options, hmm_path, options.seq_filename, score_path);

  //
  // Clean up
  //
  if (options.motif_format == TRANSFAC_FORMAT) {
    // If transfac format was used we have to 
    // clean up the string naming the MEME format
    // motif file.
    myfree(options.motif_filename);
  }
  myfree(hmm_path);
  myfree(score_path);

  return 0;

}
示例#7
0
/*************************************************************************
 * Entry point for ama
 *************************************************************************/
int main(int argc, char **argv) {
  AMA_OPTIONS_T options;
  ARRAYLST_T *motifs;
  clock_t c0, c1; // measuring cpu_time
  MOTIF_AND_PSSM_T *combo;
  CISML_T *cisml;
  PATTERN_T** patterns;
  PATTERN_T *pattern;
  FILE *fasta_file, *text_output, *cisml_output;
  int i, seq_loading_num, seq_counter, unique_seqs, seq_len, scan_len, x1, x2, y1, y2;
  char *seq_name, *path;
  bool need_postprocessing, created;
  SEQ_T *sequence;
  RBTREE_T *seq_ids;
  RBNODE_T *seq_node;
  double *logcumback;
  ALPH_T *alph;

  // process the command
  process_command_line(argc, argv, &options);

  // load DNA motifs
  motifs = load_motifs(&options);

  // get the alphabet
  if (arraylst_size(motifs) > 0) {
    combo = (MOTIF_AND_PSSM_T*)arraylst_get(0, motifs);
    alph = alph_hold(get_motif_alph(combo->motif));
  } else {
    alph = alph_dna();
  }

  // pick columns for GC operations
  x1 = -1; x2 = -1; y1 = -1; y2 = -1;
  if (alph_size_core(alph) == 4 && alph_size_pairs(alph) == 2) {
    x1 = 0; // A
    x2 = alph_complement(alph, x1); // T
    y1 = (x2 == 1 ? 2 : 1); // C
    y2 = alph_complement(alph, y1); // G
    assert(x1 != x2 && y1 != y2 && x1 != y1 && x2 != y2 && x1 != y2 && x2 != y1);
  }

  // record starting time
  c0 = clock();

  // Create cisml data structure for recording results
  cisml = allocate_cisml(PROGRAM_NAME, options.command_line, options.motif_filename, options.fasta_filename);
  set_cisml_background_file(cisml, options.bg_filename);

  // make a CISML pattern to hold scores for each motif
  for (i = 0; i < arraylst_size(motifs); i++) {
    combo = (MOTIF_AND_PSSM_T*)arraylst_get(i, motifs);
    add_cisml_pattern(cisml, allocate_pattern(get_motif_id(combo->motif), ""));
  }

  // Open the FASTA file for reading.
  fasta_file = NULL;
  if (!open_file(options.fasta_filename, "r", false, "FASTA", "sequences", &fasta_file)) {
    die("Couldn't open the file %s.\n", options.fasta_filename);
  }
  if (verbosity >= NORMAL_VERBOSE) {
    if (options.last == 0) {
      fprintf(stderr, "Using entire sequence\n");
    } else {
      fprintf(stderr, "Limiting sequence to last %d positions.\n", options.last);
    }
  }

  //
  // Read in all sequences and score with all motifs
  //
  seq_loading_num = 0;  // keeps track on the number of sequences read in total
  seq_counter = 0;      // holds the index to the seq in the pattern
  unique_seqs = 0;      // keeps track on the number of unique sequences
  need_postprocessing = false;
  sequence = NULL;
  logcumback = NULL;
  seq_ids = rbtree_create(rbtree_strcasecmp,rbtree_strcpy,free,rbtree_intcpy,free);
  while (read_one_fasta(alph, fasta_file, options.max_seq_length, &sequence)) {
    ++seq_loading_num;
    seq_name = get_seq_name(sequence);
    seq_len = get_seq_length(sequence);
    scan_len = (options.last != 0 ? options.last : seq_len);
    // red-black trees are only required if duplicates should be combined
    if (options.combine_duplicates){
      //lookup seq id and create new entry if required, return sequence index
      seq_node = rbtree_lookup(seq_ids, get_seq_name(sequence), true, &created);
      if (created) { // assign it a loading number
        rbtree_set(seq_ids, seq_node, &unique_seqs);
        seq_counter = unique_seqs;
        ++unique_seqs;
      } else {
        seq_counter = *((int*)rbnode_get(seq_node));
      }
    }
          
    //
    // Set up sequence-dependent background model and compute
    // log cumulative probability of sequence.
    // This needs the sequence in raw format.
    //
    if (options.sdbg_order >= 0)
      logcumback = log_cumulative_background(alph, options.sdbg_order, sequence);

    // Index the sequence, throwing away the raw format and ambiguous characters
    index_sequence(sequence, alph, SEQ_NOAMBIG);

    // Get the GC content of the sequence if binning p-values by GC
    // and store it in the sequence object.
    if (options.num_gc_bins > 1) {
      ARRAY_T *freqs = get_sequence_freqs(sequence, alph);
      set_total_gc_sequence(sequence, get_array_item(y1, freqs) + get_array_item(y2, freqs)); // f(C) + f(G)
      free_array(freqs);                        // clean up
    } else {
      set_total_gc_sequence(sequence, -1);      // flag ignore
    }

    // Scan with motifs.
    for (i = 0; i < arraylst_size(motifs); i++) {
      pattern = get_cisml_patterns(cisml)[i];
      combo = (MOTIF_AND_PSSM_T*)arraylst_get(i, motifs);
      if (verbosity >= HIGHER_VERBOSE) {
        fprintf(stderr, "Scanning %s sequence with length %d "
            "abbreviated to %d with motif %s with length %d.\n",
            seq_name, seq_len, scan_len, 
            get_motif_id(combo->motif), get_motif_length(combo->motif));
      }
      SCANNED_SEQUENCE_T* scanned_seq = NULL;
      if (!options.combine_duplicates || get_pattern_num_scanned_sequences(pattern) <= seq_counter) {
        // Create a scanned_sequence record and save it in the pattern.
        scanned_seq = allocate_scanned_sequence(seq_name, seq_name, pattern);
        set_scanned_sequence_length(scanned_seq, scan_len);
      } else {
        // get existing sequence record
        scanned_seq = get_pattern_scanned_sequences(pattern)[seq_counter];
        set_scanned_sequence_length(scanned_seq, max(scan_len, get_scanned_sequence_length(scanned_seq)));
      }
      
      // check if scanned component of sequence has sufficient length for the motif
      if (scan_len < get_motif_length(combo->motif)) {
        // set score to zero and p-value to 1 if not set yet
        if(!has_scanned_sequence_score(scanned_seq)){
          set_scanned_sequence_score(scanned_seq, 0.0);
        }
        if(options.pvalues && !has_scanned_sequence_pvalue(scanned_seq)){
          set_scanned_sequence_pvalue(scanned_seq, 1.0);
        } 
        add_scanned_sequence_scanned_position(scanned_seq); 
        if (get_scanned_sequence_num_scanned_positions(scanned_seq) > 0L) {
          need_postprocessing = true;
        }
        if (verbosity >= HIGH_VERBOSE) {
          fprintf(stderr, "%s too short for motif %s. Score set to 0.\n",
              seq_name, get_motif_id(combo->motif));
        }
      } else {
        // scan the sequence using average/maximum motif affinity
        ama_sequence_scan(alph, sequence, logcumback, combo->pssm_pair,
            options.scoring, options.pvalues, options.last, scanned_seq,
            &need_postprocessing);
      }
    } // All motifs scanned

    free_seq(sequence);
    if (options.sdbg_order >= 0) myfree(logcumback);

  } // read sequences

  fclose(fasta_file);
  if (verbosity >= HIGH_VERBOSE) fprintf(stderr, "(%d) sequences read in.\n", seq_loading_num);
  if (verbosity >= NORMAL_VERBOSE) fprintf(stderr, "Finished          \n");

        
  // if any sequence identifier was multiple times in the sequence set  then
  // postprocess of the data is required
  if (need_postprocessing || options.normalize_scores) {
    post_process(cisml, motifs, options.normalize_scores);
  }
        
  // output results
  if (options.output_format == DIRECTORY_FORMAT) {
    if (create_output_directory(options.out_dir, options.clobber, verbosity > QUIET_VERBOSE)) {
      // only warn in higher verbose modes
      fprintf(stderr, "failed to create output directory `%s' or already exists\n", options.out_dir);
      exit(1);
    }
    path = make_path_to_file(options.out_dir, text_filename);
    //FIXME check for errors: MEME doesn't either and we at least know we have a good directory
    text_output = fopen(path, "w");
    free(path);
    path = make_path_to_file(options.out_dir, cisml_filename);
    //FIXME check for errors
    cisml_output = fopen(path, "w");
    free(path);
    print_cisml(cisml_output, cisml, true, NULL, false);
    print_score(cisml, text_output);
    fclose(cisml_output);
    fclose(text_output);
  } else if (options.output_format == GFF_FORMAT) {
    print_score(cisml, stdout);
  } else if (options.output_format == CISML_FORMAT) {
    print_cisml(stdout, cisml, true, NULL, false);
  } else {
    die("Output format invalid!\n");
  }

  //
  // Clean up.
  //
  rbtree_destroy(seq_ids);
  arraylst_destroy(motif_and_pssm_destroy, motifs);
  free_cisml(cisml);
  rbtree_destroy(options.selected_motifs);
  alph_release(alph);
        
  // measure time
  if (verbosity >= NORMAL_VERBOSE) { // starting time
    c1 = clock();
    fprintf(stderr, "cycles (CPU);            %ld cycles\n", (long) c1);
    fprintf(stderr, "elapsed CPU time:        %f seconds\n", (float) (c1-c0) / CLOCKS_PER_SEC);
  }
  return 0;
}
示例#8
0
int main(int argc, char **argv) {
  data d;
  glam2_aln *alns;
  int r;

  prog_name = "glam2";  /* for error messages */
  getargs(&d.a, argc, argv);
  init(&d);

  fputs("GLAM2: Gapped Local Alignment of Motifs\nVersion "
#include "glam2_version.h"
	"\n\n", d.out);
  printargs(d.out, argc, argv);
  print_misc_info(d.out, &d);
  putc('\n', d.out);
  XMALLOC(alns, d.a.runs);

  for (r = 0; r < d.a.runs; ++r) {
    glam2_aln *aln = &alns[r];
    if (!d.a.quiet) {
      fprintf(stderr, "Run %d... ", r+1);
      fflush(stderr);
    }
    aln_init(aln, d.seqs.seqnum, d.a.max_width, d.alph.size);
    d.sm.underflow_flag = 1;  /* do we care about underflow in start_aln? */
    start_aln(aln, &d);
    optimise_aln(aln, &d);
    if (d.sm.underflow_flag < (d.a.algorithm == 2 ? DBL_EPSILON : DBL_MIN))
      fprintf(stderr, "%s: accuracy loss due to numeric underflow (%g)\nIf the alignment looks suspect, try rerunning with higher -u, or maybe lower -b\n", prog_name, d.sm.underflow_flag);
    if (d.a.profile)
      print_aln_info(d.out, aln, &d);
  }

  if (!d.a.quiet) putc('\n', stderr);

  SORT(alns, d.a.runs, aln_cmp);
  if (!d.a.profile)
    print_alns(d.out, alns, &d);

  xfclose(d.out);			// close text output file

  // Create the HTML output and MEME format output
  char *glam2html, *glam2psfm, *command;
  int command_length, command_ret;
  // create the paths to the programs
  glam2html = make_path_to_file(get_meme_bin_dir(), "glam2html");
  glam2psfm = make_path_to_file(get_meme_bin_dir(), "glam2psfm");
  // allocate memory for the command
  command_length = strlen(glam2html) + strlen(d.txt_filename) + strlen(d.html_filename) + 50;
  command = xmalloc(command_length);
  // run glam2html
  sprintf(command, "%s < %s > %s",  glam2html, d.txt_filename, d.html_filename);
  if ((command_ret = system(command)) != 0) {
    report_external_failure("glam2html", command_ret);
    fprintf(stderr, "Warning: failed to convert output to HTML!\n");
  }
  // run glam2psfm
  sprintf(command, "%s < %s > %s", glam2psfm, d.txt_filename, d.psfm_filename);
  if ((command_ret = system(command)) != 0) {
    report_external_failure("glam2psfm", command_ret);
    fprintf(stderr, "Warning: failed to convert output to MEME format motif!\n");
  }
  free(command);
  free(glam2psfm);
  free(glam2html);

  return 0;
}
示例#9
0
文件: centrimo.c 项目: CPFL/gmeme
/*************************************************************************
 * Entry point for centrimo
 *************************************************************************/
int main(int argc, char *argv[]) {
  CENTRIMO_OPTIONS_T options;
  SEQ_SITES_T seq_sites;
  SITE_COUNTS_T counts;
  int seqN, motifN, seqlen, db_i, motif_i, i;
  double log_pvalue_thresh;
  SEQ_T** sequences = NULL;
  ARRAY_T* bg_freqs = NULL;
  ARRAYLST_T *stats_list;
  MOTIF_DB_T **dbs, *db;
  MREAD_T *mread;
  MOTIF_STATS_T *stats;
  MOTIF_T *motif, *rev_motif;
  PSSM_T *pos_pssm, *rev_pssm;
  char *sites_path, *desc;
  FILE *sites_file;
  HTMLWR_T *html;
  JSONWR_T *json;

  // COMMAND LINE PROCESSING
  process_command_line(argc, argv, &options);

  // load the sequences
  read_sequences(options.alphabet, options.seq_source, &sequences, &seqN);
  seqlen = (seqN ? get_seq_length(sequences[0]) : 0);
  // calculate a sequence background (unless other background is given)
  if (!options.bg_source) {
    bg_freqs = calc_bg_from_fastas(options.alphabet, seqN, sequences);
  }

  // load the motifs
  motifN = 0;
  dbs = mm_malloc(sizeof(MOTIF_DB_T*) * arraylst_size(options.motif_sources));
  for (i = 0; i < arraylst_size(options.motif_sources); i++) {
    char* db_source;
    db_source = (char*)arraylst_get(i, options.motif_sources);
    dbs[i] = read_motifs(i, db_source, options.bg_source, &bg_freqs, 
        options.pseudocount, options.selected_motifs, options.alphabet);
    motifN += arraylst_size(dbs[i]->motifs);
  }
  log_pvalue_thresh = log(options.evalue_thresh) - log(motifN);
  // Setup some things for double strand scanning
  if (options.scan_both_strands == TRUE) {
    // Set up hash tables for computing reverse complement
    setup_hash_alph(DNAB);
    setalph(0);
    // Correct background by averaging on freq. for both strands.
    average_freq_with_complement(options.alphabet, bg_freqs);
    normalize_subarray(0, alph_size(options.alphabet, ALPH_SIZE), 0.0, bg_freqs);
    calc_ambigs(options.alphabet, FALSE, bg_freqs);
  }
  // Create output directory
  if (create_output_directory(options.output_dirname, options.allow_clobber, 
        (verbosity >= NORMAL_VERBOSE))) {
    die("Couldn't create output directory %s.\n", options.output_dirname);
  }
  // open output files
  sites_path = make_path_to_file(options.output_dirname, SITES_FILENAME);
  sites_file = fopen(sites_path, "w");
  free(sites_path);
  // setup html monolith writer
  json = NULL;
  if ((html = htmlwr_create(get_meme_etc_dir(), TEMPLATE_FILENAME))) {
    htmlwr_set_dest_name(html, options.output_dirname, HTML_FILENAME);
    htmlwr_replace(html, "centrimo_data.js", "data");
    json = htmlwr_output(html);
    if (json == NULL) die("Template does not contain data section.\n");
  } else {
    DEBUG_MSG(QUIET_VERBOSE, "Failed to open html template file.\n");
  }
  if (json) {
    // output some top level variables
    jsonwr_str_prop(json, "version", VERSION);
    jsonwr_str_prop(json, "revision", REVISION);
    jsonwr_str_prop(json, "release", ARCHIVE_DATE);
    jsonwr_str_array_prop(json, "cmd", argv, argc);
    jsonwr_property(json, "options");
    jsonwr_start_object_value(json);
    jsonwr_dbl_prop(json, "motif-pseudo", options.pseudocount);
    jsonwr_dbl_prop(json, "score", options.score_thresh);
    jsonwr_dbl_prop(json, "ethresh", options.evalue_thresh);
    jsonwr_lng_prop(json, "maxbin", options.max_window+1);
    jsonwr_bool_prop(json, "norc", !options.scan_both_strands);
    jsonwr_bool_prop(json, "noflip", options.no_flip);
    jsonwr_end_object_value(json);
    // output the description
    desc = prepare_description(&options);
    if (desc) {
      jsonwr_str_prop(json, "job_description", desc);
      free(desc);
    }
    // output size metrics
    jsonwr_lng_prop(json, "seqlen", seqlen);
    jsonwr_lng_prop(json, "tested", motifN);
    // output the fasta db
    jsonwr_property(json, "sequence_db");
    jsonwr_start_object_value(json);
    jsonwr_str_prop(json, "source", options.seq_source);
    jsonwr_lng_prop(json, "count", seqN);
    jsonwr_end_object_value(json);
    // output the motif dbs
    jsonwr_property(json, "motif_dbs");
    jsonwr_start_array_value(json);
    for (db_i = 0; db_i < arraylst_size(options.motif_sources); db_i++) {
      db = dbs[db_i];
      jsonwr_start_object_value(json);
      jsonwr_str_prop(json, "source", db->source);
      jsonwr_lng_prop(json, "count", arraylst_size(db->motifs));
      jsonwr_end_object_value(json);
    }
    jsonwr_end_array_value(json);
    // start the motif array
    jsonwr_property(json, "motifs");
    jsonwr_start_array_value(json);
  }
  /**************************************************************
   * Tally the positions of the best sites for each of the 
   * selected motifs.
   **************************************************************/
  // prepare the sequence sites
  memset(&seq_sites, 0, sizeof(SEQ_SITES_T));
  // prepare the site counts
  counts.allocated = ((2 * seqlen) - 1);
  counts.sites = mm_malloc(sizeof(double) * counts.allocated);
  // prepare the motifs stats list
  stats_list = arraylst_create();
  // prepare the other vars
  motif = NULL; pos_pssm = NULL; rev_motif = NULL; rev_pssm = NULL;
  for (db_i = 0; db_i < arraylst_size(options.motif_sources); db_i++) {
    db = dbs[db_i];
    for (motif_i = 0; motif_i < arraylst_size(db->motifs); motif_i++) {
      motif = (MOTIF_T *) arraylst_get(motif_i, db->motifs);
      DEBUG_FMT(NORMAL_VERBOSE, "Using motif %s of width %d.\n",  
          get_motif_id(motif), get_motif_length(motif));
      // reset the counts
      for (i = 0; i < counts.allocated; i++) counts.sites[i] = 0;
      counts.total_sites = 0;
      // create the pssm 
      pos_pssm = make_pssm(bg_freqs, motif);
      // If required, do the same for the reverse complement motif.
      if (options.scan_both_strands) {
        rev_motif = dup_rc_motif(motif);
        rev_pssm = make_pssm(bg_freqs, rev_motif);
      }
      // scan the sequences
      for (i = 0; i < seqN; i++)
        score_sequence(&options, sequences[i], pos_pssm, rev_pssm, 
            &seq_sites, &counts);
      // DEBUG check that the sum of the sites is close to the site count
      double sum_check = 0, sum_diff;
      for (i = 0; i < counts.allocated; i++) sum_check += counts.sites[i];
      sum_diff = counts.total_sites - sum_check;
      if (sum_diff < 0) sum_diff = -sum_diff;
      if (sum_diff > 0.1) {
        fprintf(stderr, "Warning: site counts don't sum to accurate value! "
            "%g != %ld", sum_check, counts.total_sites);
      }
      // output the plain text site counts
      output_site_counts(sites_file, seqlen, db, motif, &counts);
      // compute the best central window
      stats = compute_stats(options.max_window, seqlen, db, motif, &counts);
      // check if it passes the threshold
      if (json && stats->log_adj_pvalue <= log_pvalue_thresh) {
        output_motif_json(json, stats, &counts);
        arraylst_add(stats, stats_list);
      } else {
        free(stats);
      }
      // Free memory associated with this motif.
      free_pssm(pos_pssm);
      free_pssm(rev_pssm);
      destroy_motif(rev_motif);
    }
  }
  if (json) jsonwr_end_array_value(json);
  // finish writing sites
  fclose(sites_file);
  // finish writing html file
  if (html) {
    if (htmlwr_output(html) != NULL) {
      die("Found another JSON replacement!\n");
    }
    htmlwr_destroy(html);
  }
  // write text file
  output_centrimo_text(&options, motifN, stats_list);
  // Clean up.
  for (i = 0; i < seqN; ++i) {
    free_seq(sequences[i]); 
  }
  free(sequences);
  for (i = 0; i < arraylst_size(options.motif_sources); i++) {
    free_db(dbs[i]);
  }
  free(dbs);
  free_array(bg_freqs);
  free(counts.sites);
  free(seq_sites.sites);
  arraylst_destroy(free, stats_list);
  cleanup_options(&options);
  return 0;

}