Esempi in C++ (Cpp) per calc_ambigs

Esempio n. 1

0

Mostra file

File: subst-matrix.c Progetto: CPFL/gmeme

/**************************************************************************
 * Get pseudocount frequencies.
 *
 * The target_freq matrix only has values for the basic alphabet.
 * Fill in the ambiguous character pseudocounts afterwards using
 * the average of pseudocounts for letters matching the ambiguous ones.
 **************************************************************************/
ARRAY_T *get_pseudocount_freqs(
   ALPH_T alph,
   ARRAY_T *	  f,		/* Foreground distribution. */
   ARRAY_T *      b,		/* Background distribution. */
   MATRIX_T *     target_freq	/* Target frequency matrix. */
)
{
  int i, j;
  int asize = alph_size(alph, ALPH_SIZE);		// excludes ambigs
  ARRAY_T *g = allocate_array(alph_size(alph, ALL_SIZE));// includes ambigs

  /*
    Create pseudocount frequencies.
  */
  for (i = 0; i < asize; i++) {				/* non-ambiguous freqs */
    double gi = 0;
    for (j= 0; j < asize; j++) {			/* non-ambiguous freqs */
      double qij = get_matrix_cell(i, j, target_freq);
      double fj = get_array_item(j, f);
      double bj = get_array_item(j, b);
      gi += (fj/bj) * qij;
    } /* j */
    set_array_item(i, gi, g);
    if (SUBST_MATRIX_DEBUG) printf("%g %g, ", get_array_item(i, f), gi);
  } /* i */
  calc_ambigs(alph, FALSE, g);			/* takes the average pseudocount */
  if (SUBST_MATRIX_DEBUG) printf("\n");

  return(g);						/* return the pseudocounts */
} /* get_pseudocount_freqs */

Esempio n. 2

0

Mostra file

File: xml-util.c Progetto: brsaran/FuzzyApp

/***********************************************************************
 * Read the background letter frequencies from XML.
 * Caller is responsible for freeing the returned array.
 ***********************************************************************/
ARRAY_T* read_bg_freqs_from_xml(xmlXPathContextPtr xpath_ctxt, ALPH_T alph) {

  xmlXPathObjectPtr xpathObj = NULL;
  ATYPE    value;
  ARRAY_T* bg_freqs;

  int a_size = alph_size(alph, ALPH_SIZE);

  // Use XPATH to get the background frequencies from XML
  xpathObj = xpath_query(
    xpath_ctxt, 
    "//*/background_frequencies/alphabet_array/value"
  );
  int num_values = (xpathObj->nodesetval ? xpathObj->nodesetval->nodeNr : 0);
  xmlXPathFreeObject(xpathObj);

  // The number of background frequences should match the alphabet size.
  assert(num_values == a_size);

  // Allocate the array.
  bg_freqs= allocate_array(alph_size(alph, ALL_SIZE));

  // XML doesn't enforce any order on the emission probability values,
  // so force reading bg frequency values in alphabet order.
  const int MAX_XPATH_EXPRESSION = 200;
  char xpath_expression[MAX_XPATH_EXPRESSION];
  xmlNodePtr currValueNode = NULL;
  int i_node = 0;
  for (i_node = 0; i_node < a_size; i_node++) {
    // Build the XPATH expression to get bg freq for a character.
    snprintf(
      xpath_expression,
      MAX_XPATH_EXPRESSION,
      "//*/background_frequencies/"
      "alphabet_array/value[@letter_id='letter_%c']",
      alph_char(alph, i_node)
    );
    // Read the selected bg frequency.
    xpathObj = xpath_query(xpath_ctxt, xpath_expression);
    // Should only find one node
    assert(xpathObj->nodesetval->nodeNr == 1);
    // Decode from node set to numeric value for bg freq.
    currValueNode = xpathObj->nodesetval->nodeTab[0];
    xmlXPathFreeObject(xpathObj);
    value = xmlXPathCastNodeToNumber(currValueNode);
    set_array_item(i_node, value, bg_freqs);
  }

  // Make sure the frequencies add up to 1.0. 
  normalize_subarray(0, a_size, 0.0, bg_freqs);

  // Fill in ambiguous characters. 
  calc_ambigs(alph, FALSE, bg_freqs);

  return bg_freqs;

}

Esempio n. 3

0

Mostra file

File: motif.c Progetto: CPFL/gmeme

/***********************************************************************
 * Calculate the ambiguous letters from the concrete ones.
 ***********************************************************************/
void calc_motif_ambigs
  (MOTIF_T *motif)
{
  int i_row;
  resize_matrix(motif->length, alph_size(motif->alph, ALL_SIZE), 0, motif->freqs);
  motif->flags |= MOTIF_HAS_AMBIGS;
  for (i_row = 0; i_row < motif->length; ++i_row) {
    calc_ambigs(motif->alph, FALSE, get_matrix_row(i_row, motif->freqs));
  }
}

Esempio n. 4

0

Mostra file

File: alphabet.c Progetto: BioinformaticsArchive/EXTREME

/*
 * Load uniform frequencies into the array.
 */
ARRAY_T* get_uniform_frequencies(ALPH_T alph, ARRAY_T *freqs) {
  int i, n;

  n = ALPH_ASIZE[alph];
  if (freqs == NULL) freqs = allocate_array(alph_size(alph, ALL_SIZE));
  assert(get_array_length(freqs) >= alph_size(alph, ALL_SIZE));
  for (i = 0; i < n; i++) { 
    set_array_item(i, 1.0/n, freqs); 
  }
  calc_ambigs(alph, FALSE, freqs);
  return freqs;
}

Esempio n. 5

0

Mostra file

File: motif.c Progetto: CPFL/gmeme

/***********************************************************************
 * Convert array by compute the average of complementary dna frequencies.
 *
 * Apparently no-one uses this.
 *
 * Assumes DNA alphabet in order ACGT.
 ***********************************************************************/
void balance_complementary_dna_freqs
  (ARRAY_T* source)
{
  double at = (get_array_item(0, source)+get_array_item(3, source))/2.0;
  double cg = (get_array_item(1, source)+get_array_item(2, source))/2.0;
  set_array_item(0, at, source); // A -> T
  set_array_item(1, cg, source); // C -> G
  set_array_item(2, cg, source); // G -> C
  set_array_item(3, at, source); // T -> A

  calc_ambigs(DNA_ALPH, FALSE, source);
}

Esempio n. 6

0

Mostra file

File: alphabet.c Progetto: BioinformaticsArchive/EXTREME

/*
 * Load the non-redundant database frequencies into the array.
 */
ARRAY_T* get_nrdb_frequencies(ALPH_T alph, ARRAY_T *freqs) {
  int i, size;
  const PROB_T *nrdb_freqs;

  size = ALPH_ASIZE[alph];
  if (freqs == NULL) freqs = allocate_array(alph_size(alph, ALL_SIZE));
  assert(get_array_length(freqs) >= alph_size(alph, ALL_SIZE));
  nrdb_freqs = ALPH_NRDB[alph];
  for (i = 0; i < size; ++i) {
    set_array_item(i, nrdb_freqs[i], freqs);
  }
  normalize_subarray(0, size, 0.0, freqs);
  calc_ambigs(alph, FALSE, freqs);
  return freqs;
}

Esempio n. 7

0

Mostra file

File: motif.c Progetto: CPFL/gmeme

/***********************************************************************
 * Compute the complement of one DNA frequency distribution.
 * 
 * Assumes DNA alphabet in order ACGT.
 ***********************************************************************/
void complement_dna_freqs
  (ARRAY_T* source,
   ARRAY_T* dest)
{
  set_array_item(0, get_array_item(3, source), dest); // A -> T
  set_array_item(1, get_array_item(2, source), dest); // C -> G
  set_array_item(2, get_array_item(1, source), dest); // G -> C
  set_array_item(3, get_array_item(0, source), dest); // T -> A

  //check if the frequencies have ambiguous characters
  //for example meme does not use ambiguous characters
  if (get_array_length(source) > 4) {
    calc_ambigs(DNA_ALPH, FALSE, dest);
  }
}

Esempio n. 8

0

Mostra file

File: motif-in.c Progetto: a1aks/Haystack

/*
 * When the parser has been selected do some processing
 */
static void parser_selected(MREAD_T *mread) {
  ALPH_T alph;
  MFORMAT_T* format;
  format = mread->formats;
  // get the alphabet
  alph = format->get_alphabet(mread->formats->data);
  // get the background
  if (format->get_bg(format->data, &(mread->motif_bg))) {
    normalize_subarray(0, alph_size(alph, ALPH_SIZE), 0.0, mread->motif_bg);
    resize_array(mread->motif_bg, alph_size(alph, ALL_SIZE));
    calc_ambigs(alph, FALSE, mread->motif_bg);
  } else {
    mread->motif_bg = get_uniform_frequencies(alph, mread->motif_bg);
  }
  set_pseudo_bg(mread);
}

Esempio n. 9

0

Mostra file

File: seq.c Progetto: CPFL/gmeme

/****************************************************************************
 *  Return an array containing the frequencies in the sequences for each
 *  character of the alphabet. Characters not in the alphabet are not
 *  counted.
 *
 *  When seq is provided it returns null, otherwise it converts the accumulated 
 *  result in bgcalc into a background.
 *
 *
 *  Pseudocode example:
 *    ALPH_T alph = ...
 *    BGCALC_T *bgcalc = NULL;
 *    for each seq:
 *      calculate_background(alph, seq, &bgcalc);
 *    ARRAY_T *bg = calculate_background(NULL, &bgcalc);
 ****************************************************************************/
ARRAY_T* calculate_background(
  ALPH_T alph,
  SEQ_T* seq,
  BGCALC_T** bgcalc
){
  BGCALC_T *calc;
  int a_size, i, a_index;
  char c;
  double freq, chunk_part, chunk_freq;
  ARRAY_T *background;
  assert(bgcalc != NULL);
  assert(seq != NULL || *bgcalc != NULL);
  // get the alphabet
  // get the alphabet size
  a_size = alph_size(alph, ALPH_SIZE);
  if (*bgcalc == NULL) {
    //allocate and initialize calc
    calc = mm_malloc(sizeof(BGCALC_T));
    calc->alph = alph;
    calc->chunk_seen = 0;
    calc->weight = 0;
    calc->chunk_counts = mm_malloc(a_size * sizeof(long));
    calc->bg = mm_malloc(a_size * sizeof(double));
    for (i = 0; i < a_size; ++i) {
      calc->chunk_counts[i] = 0;
      calc->bg[i] = 0;
    }
    *bgcalc = calc;
  } else {
    calc = *bgcalc;
    assert(alph == calc->alph);
    if (calc->weight == LONG_MAX) return NULL;
  }
  if (seq == NULL) {
    // no sequence so calculate the final result
    background = allocate_array(alph_size(alph, ALL_SIZE));
    if (calc->weight == 0) {
      if (calc->chunk_seen > 0) {
        // when we haven't had to approximate yet
        // just do a normal background calculation
        for (i = 0; i < a_size; i++) {
          freq = (double) calc->chunk_counts[i] / (double) calc->chunk_seen;
          set_array_item(i, freq, background);
        }
      } else {
        fputs("Uniform\n", stdout);
        // when there are no counts then return uniform
        freq = (double) 1 / (double) a_size;
        for (i = 0; i < a_size; i++) {
          set_array_item(i, freq, background);
        }
      }
    } else {
      if (calc->chunk_seen > 0) {
        // combine the frequencies for the existing chunks with the counts
        // for the partially completed chunk
        chunk_part = (double) calc->chunk_seen / (double) BG_CALC_CHUNK;
        for (i = 0; i < a_size; i++) {
          chunk_freq = (double) calc->chunk_counts[i] / 
              (double) calc->chunk_seen;
          freq = ((calc->bg[i] * calc->weight) + (chunk_freq * chunk_part)) / 
              (calc->weight + chunk_part);
          set_array_item(i, freq, background);
        }
      } else {
        // in the odd case we get to an integer number of chunks
        for (i = 0; i < a_size; i++) {
          set_array_item(i, calc->bg[i], background);
        }
      }
    }
    calc_ambigs(alph, FALSE, background);
    // free bgcalc structure
    free(calc->bg);
    free(calc->chunk_counts);
    free(calc);
    *bgcalc = NULL;
    return background;
  }
  // we have a sequence to add to the background calculation
  for (i = 0; i < seq->length; i++) {
    c = get_seq_char(i, seq);
    a_index = alph_index(alph, c);
    if (a_index == -1 || a_index >= a_size) continue;
    calc->chunk_counts[a_index]++;
    calc->chunk_seen++;
    if (calc->chunk_seen == BG_CALC_CHUNK) {
      if (calc->weight == 0) {
        for (i = 0; i < a_size; i++) {
          calc->bg[i] = (double) calc->chunk_counts[i] / (double) BG_CALC_CHUNK;
        }
      } else {
        for (i = 0; i < a_size; i++) {
          chunk_freq = (double) calc->chunk_counts[i] / (double) BG_CALC_CHUNK;
          calc->bg[i] = (calc->bg[i] * calc->weight + chunk_freq) / 
              (calc->weight + 1);
        }
      }
      calc->weight++;
      // reset the counts for the next chunk
      for (i = 0; i < a_size; i++) {
        calc->chunk_counts[i] = 0;
      }
      calc->chunk_seen = 0;
      // I don't think it is feasible to reach this limit
      // but I guess I'd better check anyway
      if (calc->weight == LONG_MAX) {
        fprintf(stderr, "Sequence data set is so large that even the "
            "approximation designed for large datasets can't handle it!");
        return NULL;
      }
    }
  }
  return NULL;
}

Esempio n. 10

0

Mostra file

File: alphabet.c Progetto: BioinformaticsArchive/EXTREME

/*
 * Load background file frequencies into the array.
 */
ARRAY_T* get_file_frequencies(ALPH_T *alph, char *bg_filename, ARRAY_T *freqs) {
  regmatch_t matches[4];
  STR_T *line;
  char chunk[BG_CHUNK_SIZE+1], letter[2], *key;
  int size, terminate, offset, i;
  FILE *fp;
  regex_t bgfreq;
  double freq;
  RBTREE_T *letters;
  RBNODE_T *node;
  
  regcomp_or_die("bg freq", &bgfreq, BGFREQ_RE, REG_EXTENDED);
  letters = rbtree_create(rbtree_strcasecmp, rbtree_strcpy, free, rbtree_dblcpy, free);
  line = str_create(100);
  if (!(fp = fopen(bg_filename, "r"))) {
    die("Unable to open background file \"%s\" for reading.\n", bg_filename);
  }
  
  terminate = feof(fp);
  while (!terminate) {
    size = fread(chunk, sizeof(char), BG_CHUNK_SIZE, fp);
    chunk[size] = '\0';
    terminate = feof(fp);
    offset = 0;
    while (offset < size) {
      // skip mac newline
      if (str_len(line) == 0 && chunk[offset] == '\r') {
        offset++;
        continue;
      }
      // find next new line
      for (i = offset; i < size; ++i) {
        if (chunk[i] == '\n') break;
      }
      // append portion up to the new line or end of chunk
      str_append(line, chunk+offset, i - offset);
      // read more if we didn't find a new line
      if (i == size && !terminate) break;
      // move the offset past the new line
      offset = i + 1;
      // handle windows new line
      if (str_char(line, -1) == '\r') str_truncate(line, -1);
      // remove everything to the right of a comment character
      for (i = 0; i < str_len(line); ++i) {
        if (str_char(line, i) == '#') {
          str_truncate(line, i);
          break;
        }
      }
      // check the line for a single letter followed by a number
      if (regexec_or_die("bg freq", &bgfreq, str_internal(line), 4, matches, 0)) {
        // parse the letter and frequency value
        regex_strncpy(matches+1, str_internal(line), letter, 2);
        freq = regex_dbl(matches+2, str_internal(line));
        // check the frequency is acceptable
        if (freq < 0 || freq > 1) {
          die("The background file lists the illegal probability %g for "
            "the letter %s.\n", freq, letter);
        } else if (freq == 0) {
          die("The background file lists a probability of zero for the "
            "letter %s\n", letter);
        }
        if (freq >= 0 && freq <= 1) rbtree_put(letters, letter, &freq);
      }
      str_clear(line);
    }
  }
  // finished with the file so clean up file parsing stuff
  fclose(fp);
  str_destroy(line, FALSE);
  regfree(&bgfreq);
  // guess the alphabet
  if (*alph == INVALID_ALPH) {
    switch (rbtree_size(letters)) {
      case PROTEIN_ASIZE:
        *alph = PROTEIN_ALPH;
        break;
      case DNA_ASIZE:
        *alph = DNA_ALPH;
        break;
      default:
        die("Number of single character entries in background does not match "
            "an alphabet.\n");
    }
  }
  // make the background
  if (freqs == NULL) freqs = allocate_array(alph_size(*alph, ALL_SIZE));
  assert(get_array_length(freqs) >= alph_size(*alph, ALL_SIZE));
  init_array(-1, freqs);
  for (node = rbtree_first(letters); node != NULL; node = rbtree_next(node)) {
    key = (char*)rbtree_key(node);
    i = alph_index(*alph, key[0]);
    freq = *((double*)rbtree_value(node));
    if (i == -1) {
      die("Background contains letter %s which is not in the %s alphabet.\n", 
          key, alph_name(*alph));
    }
    if (get_array_item(i, freqs) != -1) {
      die("Background contains letter %s which has the same meaning as an "
          "already listed letter.\n", key);
    }
    set_array_item(i, freq, freqs);
  }
  // check that all items were set
  for (i = 0; i < alph_size(*alph, ALPH_SIZE); i++) {
    if (get_array_item(i, freqs) == -1) {
      die("Background is missing letter %c.\n", alph_char(*alph, i));
    }
  }
  // disabled for backwards compatability (AMA test was failing)
  //normalize_subarray(0, ALPH_ASIZE[*alph], 0.0, freqs);
  // calculate the values of the ambiguous letters from the concrete ones
  calc_ambigs(*alph, FALSE, freqs);
  // cleanup
  rbtree_destroy(letters);
  // return result
  return freqs;
}

Esempio n. 11

0

Mostra file

File: centrimo.c Progetto: CPFL/gmeme

/*************************************************************************
 * Entry point for centrimo
 *************************************************************************/
int main(int argc, char *argv[]) {
  CENTRIMO_OPTIONS_T options;
  SEQ_SITES_T seq_sites;
  SITE_COUNTS_T counts;
  int seqN, motifN, seqlen, db_i, motif_i, i;
  double log_pvalue_thresh;
  SEQ_T** sequences = NULL;
  ARRAY_T* bg_freqs = NULL;
  ARRAYLST_T *stats_list;
  MOTIF_DB_T **dbs, *db;
  MREAD_T *mread;
  MOTIF_STATS_T *stats;
  MOTIF_T *motif, *rev_motif;
  PSSM_T *pos_pssm, *rev_pssm;
  char *sites_path, *desc;
  FILE *sites_file;
  HTMLWR_T *html;
  JSONWR_T *json;

  // COMMAND LINE PROCESSING
  process_command_line(argc, argv, &options);

  // load the sequences
  read_sequences(options.alphabet, options.seq_source, &sequences, &seqN);
  seqlen = (seqN ? get_seq_length(sequences[0]) : 0);
  // calculate a sequence background (unless other background is given)
  if (!options.bg_source) {
    bg_freqs = calc_bg_from_fastas(options.alphabet, seqN, sequences);
  }

  // load the motifs
  motifN = 0;
  dbs = mm_malloc(sizeof(MOTIF_DB_T*) * arraylst_size(options.motif_sources));
  for (i = 0; i < arraylst_size(options.motif_sources); i++) {
    char* db_source;
    db_source = (char*)arraylst_get(i, options.motif_sources);
    dbs[i] = read_motifs(i, db_source, options.bg_source, &bg_freqs, 
        options.pseudocount, options.selected_motifs, options.alphabet);
    motifN += arraylst_size(dbs[i]->motifs);
  }
  log_pvalue_thresh = log(options.evalue_thresh) - log(motifN);
  // Setup some things for double strand scanning
  if (options.scan_both_strands == TRUE) {
    // Set up hash tables for computing reverse complement
    setup_hash_alph(DNAB);
    setalph(0);
    // Correct background by averaging on freq. for both strands.
    average_freq_with_complement(options.alphabet, bg_freqs);
    normalize_subarray(0, alph_size(options.alphabet, ALPH_SIZE), 0.0, bg_freqs);
    calc_ambigs(options.alphabet, FALSE, bg_freqs);
  }
  // Create output directory
  if (create_output_directory(options.output_dirname, options.allow_clobber, 
        (verbosity >= NORMAL_VERBOSE))) {
    die("Couldn't create output directory %s.\n", options.output_dirname);
  }
  // open output files
  sites_path = make_path_to_file(options.output_dirname, SITES_FILENAME);
  sites_file = fopen(sites_path, "w");
  free(sites_path);
  // setup html monolith writer
  json = NULL;
  if ((html = htmlwr_create(get_meme_etc_dir(), TEMPLATE_FILENAME))) {
    htmlwr_set_dest_name(html, options.output_dirname, HTML_FILENAME);
    htmlwr_replace(html, "centrimo_data.js", "data");
    json = htmlwr_output(html);
    if (json == NULL) die("Template does not contain data section.\n");
  } else {
    DEBUG_MSG(QUIET_VERBOSE, "Failed to open html template file.\n");
  }
  if (json) {
    // output some top level variables
    jsonwr_str_prop(json, "version", VERSION);
    jsonwr_str_prop(json, "revision", REVISION);
    jsonwr_str_prop(json, "release", ARCHIVE_DATE);
    jsonwr_str_array_prop(json, "cmd", argv, argc);
    jsonwr_property(json, "options");
    jsonwr_start_object_value(json);
    jsonwr_dbl_prop(json, "motif-pseudo", options.pseudocount);
    jsonwr_dbl_prop(json, "score", options.score_thresh);
    jsonwr_dbl_prop(json, "ethresh", options.evalue_thresh);
    jsonwr_lng_prop(json, "maxbin", options.max_window+1);
    jsonwr_bool_prop(json, "norc", !options.scan_both_strands);
    jsonwr_bool_prop(json, "noflip", options.no_flip);
    jsonwr_end_object_value(json);
    // output the description
    desc = prepare_description(&options);
    if (desc) {
      jsonwr_str_prop(json, "job_description", desc);
      free(desc);
    }
    // output size metrics
    jsonwr_lng_prop(json, "seqlen", seqlen);
    jsonwr_lng_prop(json, "tested", motifN);
    // output the fasta db
    jsonwr_property(json, "sequence_db");
    jsonwr_start_object_value(json);
    jsonwr_str_prop(json, "source", options.seq_source);
    jsonwr_lng_prop(json, "count", seqN);
    jsonwr_end_object_value(json);
    // output the motif dbs
    jsonwr_property(json, "motif_dbs");
    jsonwr_start_array_value(json);
    for (db_i = 0; db_i < arraylst_size(options.motif_sources); db_i++) {
      db = dbs[db_i];
      jsonwr_start_object_value(json);
      jsonwr_str_prop(json, "source", db->source);
      jsonwr_lng_prop(json, "count", arraylst_size(db->motifs));
      jsonwr_end_object_value(json);
    }
    jsonwr_end_array_value(json);
    // start the motif array
    jsonwr_property(json, "motifs");
    jsonwr_start_array_value(json);
  }
  /**************************************************************
   * Tally the positions of the best sites for each of the 
   * selected motifs.
   **************************************************************/
  // prepare the sequence sites
  memset(&seq_sites, 0, sizeof(SEQ_SITES_T));
  // prepare the site counts
  counts.allocated = ((2 * seqlen) - 1);
  counts.sites = mm_malloc(sizeof(double) * counts.allocated);
  // prepare the motifs stats list
  stats_list = arraylst_create();
  // prepare the other vars
  motif = NULL; pos_pssm = NULL; rev_motif = NULL; rev_pssm = NULL;
  for (db_i = 0; db_i < arraylst_size(options.motif_sources); db_i++) {
    db = dbs[db_i];
    for (motif_i = 0; motif_i < arraylst_size(db->motifs); motif_i++) {
      motif = (MOTIF_T *) arraylst_get(motif_i, db->motifs);
      DEBUG_FMT(NORMAL_VERBOSE, "Using motif %s of width %d.\n",  
          get_motif_id(motif), get_motif_length(motif));
      // reset the counts
      for (i = 0; i < counts.allocated; i++) counts.sites[i] = 0;
      counts.total_sites = 0;
      // create the pssm 
      pos_pssm = make_pssm(bg_freqs, motif);
      // If required, do the same for the reverse complement motif.
      if (options.scan_both_strands) {
        rev_motif = dup_rc_motif(motif);
        rev_pssm = make_pssm(bg_freqs, rev_motif);
      }
      // scan the sequences
      for (i = 0; i < seqN; i++)
        score_sequence(&options, sequences[i], pos_pssm, rev_pssm, 
            &seq_sites, &counts);
      // DEBUG check that the sum of the sites is close to the site count
      double sum_check = 0, sum_diff;
      for (i = 0; i < counts.allocated; i++) sum_check += counts.sites[i];
      sum_diff = counts.total_sites - sum_check;
      if (sum_diff < 0) sum_diff = -sum_diff;
      if (sum_diff > 0.1) {
        fprintf(stderr, "Warning: site counts don't sum to accurate value! "
            "%g != %ld", sum_check, counts.total_sites);
      }
      // output the plain text site counts
      output_site_counts(sites_file, seqlen, db, motif, &counts);
      // compute the best central window
      stats = compute_stats(options.max_window, seqlen, db, motif, &counts);
      // check if it passes the threshold
      if (json && stats->log_adj_pvalue <= log_pvalue_thresh) {
        output_motif_json(json, stats, &counts);
        arraylst_add(stats, stats_list);
      } else {
        free(stats);
      }
      // Free memory associated with this motif.
      free_pssm(pos_pssm);
      free_pssm(rev_pssm);
      destroy_motif(rev_motif);
    }
  }
  if (json) jsonwr_end_array_value(json);
  // finish writing sites
  fclose(sites_file);
  // finish writing html file
  if (html) {
    if (htmlwr_output(html) != NULL) {
      die("Found another JSON replacement!\n");
    }
    htmlwr_destroy(html);
  }
  // write text file
  output_centrimo_text(&options, motifN, stats_list);
  // Clean up.
  for (i = 0; i < seqN; ++i) {
    free_seq(sequences[i]); 
  }
  free(sequences);
  for (i = 0; i < arraylst_size(options.motif_sources); i++) {
    free_db(dbs[i]);
  }
  free(dbs);
  free_array(bg_freqs);
  free(counts.sites);
  free(seq_sites.sites);
  arraylst_destroy(free, stats_list);
  cleanup_options(&options);
  return 0;

}