/************************************************************************** * Prepare a sequence for recognition by * - making sure it is uppercase, * - making sure it doesn't contain illegal characters, * - adding flanking Xs to match START/END states, and * - converting it to an integer format * - computing cumulative GC counts * * In the integer form, each character in the sequence is replaced by * the index of that character in the alphabet array. Thus, if the * alphabet is 'ACGT', every occurence of the letter 'G' in the * sequence will be represented by the index 2. **************************************************************************/ void prepare_sequence (SEQ_T* sequence, ALPH_T alph) { int i_seq; // Index in the sequence. int badchar; // Number of characters converted. char wildcard; // Wildcard character wildcard = alph_wildcard(alph); badchar = 0; for (i_seq = 0; i_seq < get_seq_length(sequence); i_seq++) { // Make sure the sequence is uppercase. if (islower((int)(sequence->sequence)[i_seq])) { (sequence->sequence)[i_seq] = toupper((int)(sequence->sequence)[i_seq]); } // Convert non-alphabetic characters to ambiguous. if (alph_index(alph, (sequence->sequence)[i_seq]) == -1) { fprintf(stderr, "%c -> %c\n", (sequence->sequence)[i_seq], wildcard); (sequence->sequence)[i_seq] = wildcard; badchar++; } } // Tell the user about the conversions. if (badchar) { fprintf(stderr, "Warning: converted %d non-alphabetic ", badchar); fprintf(stderr, "characters to %c in sequence %s.\n", wildcard, get_seq_name(sequence)); } // Add flanking X's. add_flanking_xs(sequence, alph); // Make the integer sequence. sequence->intseq = (int *)mm_malloc(sizeof(int) * get_seq_length(sequence)); for (i_seq = 0; i_seq < get_seq_length(sequence); i_seq++) { (sequence->intseq)[i_seq] = alph_index(alph, (sequence->sequence)[i_seq]); } // // Get cumulative GC counts. // if (alph == DNA_ALPH) { int len = get_seq_length(sequence); char c = (sequence->sequence)[0]; // first character sequence->gc = (int *)mm_malloc(sizeof(int) * get_seq_length(sequence)); // set count at first position (sequence->gc)[0] = (c == 'G' || c == 'C') ? 1 : 0; // set cumulative counts at rest of postitions for (i_seq = 1; i_seq < len; i_seq++) { c = (sequence->sequence)[i_seq]; (sequence->gc)[i_seq] = (c == 'G' || c == 'C') ? (sequence->gc)[i_seq-1] + 1 : (sequence->gc)[i_seq-1]; } } }
/************************************************************************* * Calculate the log odds score for a single motif-sized window. *************************************************************************/ static inline BOOLEAN_T score_motif_site( ALPH_T alph, char *seq, PSSM_T *pssm, double *score // OUT ) { int asize = alph_size(alph, ALPH_SIZE); MATRIX_T* pssm_matrix = pssm->matrix; double scaled_log_odds = 0.0; // For each position in the site int motif_position; for (motif_position = 0; motif_position < pssm->w; motif_position++) { char c = seq[motif_position]; int aindex = alph_index(alph, c); // Check for gaps and ambiguity codes at this site if(aindex == -1 || aindex >= asize) return FALSE; scaled_log_odds += get_matrix_cell(motif_position, aindex, pssm_matrix); } *score = get_unscaled_pssm_score(scaled_log_odds, pssm); // Handle scores that are out of range if ((int) scaled_log_odds >= get_array_length(pssm->pv)) { scaled_log_odds = (float)(get_array_length(pssm->pv) - 1); *score = scaled_to_raw(scaled_log_odds, pssm->w, pssm->scale, pssm->offset); } return TRUE; }
/* * Make one position in an array the sum of a set of other positions. */ static void calc_ambig(ALPH_T alph, BOOLEAN_T log_space, char ambig, char *sources, ARRAY_T *array) { char *source; PROB_T sum; PROB_T value; sum = 0.0; for (source = sources; *source != '\0'; ++source) { value = get_array_item(alph_index(alph, *source), array); if (log_space) { sum = LOG_SUM(sum, value); } else { sum += value; } } set_array_item(alph_index(alph, ambig), sum, array); }
/* * Replace the elements an array of frequences with the average * over complementary bases. */ void average_freq_with_complement(ALPH_T alph, ARRAY_T *freqs) { int a_index, t_index, g_index, c_index; double at_freq, gc_freq; assert(alph == DNA_ALPH); a_index = alph_index(alph, 'A'); t_index = alph_index(alph, 'T'); g_index = alph_index(alph, 'G'); c_index = alph_index(alph, 'C'); at_freq = (get_array_item(a_index, freqs) + get_array_item(t_index, freqs)) / 2.0; gc_freq = (get_array_item(g_index, freqs) + get_array_item(c_index, freqs)) / 2.0; set_array_item(a_index, at_freq, freqs); set_array_item(t_index, at_freq, freqs); set_array_item(g_index, gc_freq, freqs); set_array_item(c_index, gc_freq, freqs); }
/* * Take the counts from an ambiguous character and evenly distribute * them among the corresponding concrete characters. * * This function operates in log space. */ static void dist_ambig(ALPH_T alph, char ambig, char *concrete_chars, ARRAY_T* freqs) { PROB_T ambig_count, concrete_count; int ambig_index, num_concretes, i, concrete_index; // Get the count to be distributed. ambig_index = alph_index(alph, ambig); ambig_count = get_array_item(ambig_index, freqs); // Divide it by the number of corresponding concrete characters. num_concretes = strlen(concrete_chars); ambig_count -= my_log2((PROB_T)num_concretes); // Distribute it in equal portions to the given concrete characters. for (i = 0; i < num_concretes; i++) { concrete_index = alph_index(alph, concrete_chars[i]); concrete_count = get_array_item(concrete_index, freqs); // Add the ambiguous counts. concrete_count = LOG_SUM(concrete_count, ambig_count); set_array_item(concrete_index, concrete_count, freqs); } // Set the ambiguous count to zero. set_array_item(ambig_index, LOG_ZERO, freqs); }
/**************************************************************************** * Return an array containing the frequencies in the alignment for each * character of the alphabet. Gaps and ambiguity characters other then * ANY_BASE are not counted. The freq. of ANY_BASE characters is stored * in the last element of the array. ****************************************************************************/ ARRAY_T* get_alignment_freqs(ALPH_T alph, ALIGNMENT_T* alignment) { char c = 0; int aindex = 0; int asize = 0; int i = 0; int s = 0; int total_bases = 0; int* num_bases = NULL; ARRAY_T* freqs = NULL; // Initialize counts for each character in the alphabet asize = alph_size(alph, ALPH_SIZE); num_bases = mm_malloc(asize * sizeof(int)); for (i = 0; i < asize; i++) { num_bases[i] = 0; } for (s = 0; s < alignment->num_sequences; s++) { for (i = 0; i < alignment->length; i++) { c = get_seq_char(i, alignment->sequences[s]); aindex = alph_index(alph, c); // c might be an ambiguity code. We don't count ambiguity codes. if (aindex != -1 && aindex < asize) { num_bases[aindex]++; total_bases++; } } } freqs = allocate_array(asize); for (i = 0; i < asize; i++) { set_array_item(i, (double) num_bases[i] / (double) total_bases, freqs); } // Clean up the count of characters myfree(num_bases); return freqs; }
/**************************************************************************** * Return an array containing the frequencies in the sequence for each * character of the alphabet. Characters not in the alphabet are not * counted. ****************************************************************************/ ARRAY_T* get_sequence_freqs (SEQ_T* seq, ALPH_T alph) { char c = 0; int a_index = 0; int a_size = 0; int i = 0; int total_bases = 0; int* num_bases = NULL; ARRAY_T* freqs = NULL; // Initialize counts for each character in the alphabet a_size = alph_size(alph, ALPH_SIZE); num_bases = mm_malloc(a_size * sizeof(int)); for (i = 0; i < a_size; i++) { num_bases[i] = 0; } for (i = 0; i < seq->length; i++) { c = get_seq_char(i, seq); if (c != '-' && c != '-') { a_index = alph_index(alph, c); if (a_index == -1 || a_index >= a_size) continue; num_bases[a_index]++; total_bases++; } } freqs = allocate_array(a_size); for (i = 0; i < a_size; i++) { set_array_item(i, (double) num_bases[i] / (double) total_bases, freqs); } // Clean up the count of characters myfree(num_bases); return freqs; }
/************************************************************************* * Convert the string representing an alignment column into an integer * which will be the column index for that alignment column in the PSSM. * If the alphabet has m characters, and the alignment columns have n entries, * the array of all alignment columns is conveniently numbered by the set of * consecutive n-digit base m numerals: * AAAA = 0000, AAAC = 0001, ..., TTTG = 3332, TTTT = 3333. *************************************************************************/ int hash_alignment_col(ALPH_T alph, char* alignment_col, int alignment_col_size) { // The alignment column must have at least // one character aside from the terminating null. assert(alignment_col_size >= 1); assert(alignment_col != NULL); int asize = alph_size(alph, ALPH_SIZE); int hash = 0; int place = 1; int i, j; for (i = alignment_col_size - 1; i >= 0; i--) { // Find alignment column character in alphabet j = alph_index(alph, alignment_col[i]); if (j == -1 || j >= asize) { die("Alignment character %c not found in alphabet\n", alignment_col[i]); } hash += j * place; place *= asize; } return hash; } // hash_alignment_col
/**************************************************************************** * Return an array containing the frequencies in the sequences for each * character of the alphabet. Characters not in the alphabet are not * counted. * * When seq is provided it returns null, otherwise it converts the accumulated * result in bgcalc into a background. * * * Pseudocode example: * ALPH_T alph = ... * BGCALC_T *bgcalc = NULL; * for each seq: * calculate_background(alph, seq, &bgcalc); * ARRAY_T *bg = calculate_background(NULL, &bgcalc); ****************************************************************************/ ARRAY_T* calculate_background( ALPH_T alph, SEQ_T* seq, BGCALC_T** bgcalc ){ BGCALC_T *calc; int a_size, i, a_index; char c; double freq, chunk_part, chunk_freq; ARRAY_T *background; assert(bgcalc != NULL); assert(seq != NULL || *bgcalc != NULL); // get the alphabet // get the alphabet size a_size = alph_size(alph, ALPH_SIZE); if (*bgcalc == NULL) { //allocate and initialize calc calc = mm_malloc(sizeof(BGCALC_T)); calc->alph = alph; calc->chunk_seen = 0; calc->weight = 0; calc->chunk_counts = mm_malloc(a_size * sizeof(long)); calc->bg = mm_malloc(a_size * sizeof(double)); for (i = 0; i < a_size; ++i) { calc->chunk_counts[i] = 0; calc->bg[i] = 0; } *bgcalc = calc; } else { calc = *bgcalc; assert(alph == calc->alph); if (calc->weight == LONG_MAX) return NULL; } if (seq == NULL) { // no sequence so calculate the final result background = allocate_array(alph_size(alph, ALL_SIZE)); if (calc->weight == 0) { if (calc->chunk_seen > 0) { // when we haven't had to approximate yet // just do a normal background calculation for (i = 0; i < a_size; i++) { freq = (double) calc->chunk_counts[i] / (double) calc->chunk_seen; set_array_item(i, freq, background); } } else { fputs("Uniform\n", stdout); // when there are no counts then return uniform freq = (double) 1 / (double) a_size; for (i = 0; i < a_size; i++) { set_array_item(i, freq, background); } } } else { if (calc->chunk_seen > 0) { // combine the frequencies for the existing chunks with the counts // for the partially completed chunk chunk_part = (double) calc->chunk_seen / (double) BG_CALC_CHUNK; for (i = 0; i < a_size; i++) { chunk_freq = (double) calc->chunk_counts[i] / (double) calc->chunk_seen; freq = ((calc->bg[i] * calc->weight) + (chunk_freq * chunk_part)) / (calc->weight + chunk_part); set_array_item(i, freq, background); } } else { // in the odd case we get to an integer number of chunks for (i = 0; i < a_size; i++) { set_array_item(i, calc->bg[i], background); } } } calc_ambigs(alph, FALSE, background); // free bgcalc structure free(calc->bg); free(calc->chunk_counts); free(calc); *bgcalc = NULL; return background; } // we have a sequence to add to the background calculation for (i = 0; i < seq->length; i++) { c = get_seq_char(i, seq); a_index = alph_index(alph, c); if (a_index == -1 || a_index >= a_size) continue; calc->chunk_counts[a_index]++; calc->chunk_seen++; if (calc->chunk_seen == BG_CALC_CHUNK) { if (calc->weight == 0) { for (i = 0; i < a_size; i++) { calc->bg[i] = (double) calc->chunk_counts[i] / (double) BG_CALC_CHUNK; } } else { for (i = 0; i < a_size; i++) { chunk_freq = (double) calc->chunk_counts[i] / (double) BG_CALC_CHUNK; calc->bg[i] = (calc->bg[i] * calc->weight + chunk_freq) / (calc->weight + 1); } } calc->weight++; // reset the counts for the next chunk for (i = 0; i < a_size; i++) { calc->chunk_counts[i] = 0; } calc->chunk_seen = 0; // I don't think it is feasible to reach this limit // but I guess I'd better check anyway if (calc->weight == LONG_MAX) { fprintf(stderr, "Sequence data set is so large that even the " "approximation designed for large datasets can't handle it!"); return NULL; } } } return NULL; }
/************************************************************************* * Calculate the odds score for each motif-sized window at each * site in the sequence using the given nucleotide frequencies. * * This function is a lightweight version based on the one contained in * motiph-scoring. Several calculations that are unnecessary for gomo * have been removed in order to speed up the process *************************************************************************/ static double score_sequence( SEQ_T *seq, // sequence to scan (IN) MOTIF_T *motif, // motif already converted to odds values (IN) PSSM_T *m_pssm, // motif pssm (IN) MATRIX_T *m_odds, // motif odds (IN) int method, // method used for scoring (IN) double threshold, // Threshold to use in TOTAL_HITS mode with a PWM ARRAY_T *bg_freqs //background model ) { assert(seq != NULL); assert(motif != NULL); assert((method == TOTAL_HITS && m_pssm) || (method != TOTAL_HITS && m_odds)); char* raw_seq = get_raw_sequence(seq); int seq_length = get_seq_length(seq); // Get the pv lookup table ARRAY_T* pv_lookup = NULL; if (NULL != m_pssm) { pv_lookup = m_pssm->pv; assert(get_array_length(pv_lookup) > 0); } // Prepare storage for the string representing the portion // of the reference sequence within the window. char* window_seq = (char *) mm_malloc(sizeof(char) * (get_motif_length(motif) + 1)); window_seq[get_motif_length(motif)] = '\0'; int max_index = seq_length - get_motif_length(motif); if (max_index < 0) max_index = 0; const int asize = alph_size(get_motif_alph(motif), ALPH_SIZE); double* odds = (double*) mm_malloc(sizeof(double)*max_index); double* scaled_log_odds = (double*) mm_malloc(sizeof(double)*max_index); // For each site in the sequence int seq_index; for (seq_index = 0; seq_index < max_index; seq_index++) { double odd = 1.0; scaled_log_odds[seq_index] = 0; // For each site in the motif window int motif_position; for (motif_position = 0; motif_position < get_motif_length(motif); motif_position++) { char c = raw_seq[seq_index + motif_position]; window_seq[motif_position] = c; // Check for gaps at this site if(c == '-' || c == '.') { break; } // Check for ambiguity codes at this site //TODO: This next call is very expensive - it takes up approx. 10% of a // programme's running time. It should be fixed up somehow. int aindex = alph_index(get_motif_alph(motif), c); if (aindex > asize) { break; } if (method == TOTAL_HITS) { //If we're in this mode, then we're using LOG ODDS. //scaled_log_odds[seq_index] += get_matrix_cell(motif_position, aindex, get_motif_freqs(motif)); scaled_log_odds[seq_index] += get_matrix_cell(motif_position, aindex, m_pssm->matrix); } else { odd *= get_matrix_cell(motif_position, aindex, m_odds); } } odds[seq_index] = odd; } // return odds as requested (MAX or AVG scoring) double requested_odds = 0.0; if (method == AVG_ODDS){ for (seq_index = 0; seq_index < max_index; seq_index++) { requested_odds += odds[seq_index]; } requested_odds /= max_index + 1; // Divide by 0 if max_index==0 } else if (method == MAX_ODDS){ for (seq_index = 0; seq_index < max_index; seq_index++) { if (odds[seq_index] > requested_odds){ requested_odds = odds[seq_index]; } } } else if (method == SUM_ODDS) { for (seq_index = 0; seq_index < max_index; seq_index++) { requested_odds += odds[seq_index]; } } else if (method == TOTAL_HITS) { for (seq_index = 0; seq_index < max_index; seq_index++) { if (scaled_log_odds[seq_index] >= (double)get_array_length(pv_lookup)) { scaled_log_odds[seq_index] = (double)(get_array_length(pv_lookup) - 1); } double pvalue = get_array_item((int) scaled_log_odds[seq_index], pv_lookup); //Figure out how to calculate the p-value of a hit //fprintf(stderr, "m: %s pv_l len: %i scaled_log_odds: %g seq index: %i pvalue: %g\n", // get_motif_id(motif), get_array_length(pv_lookup), scaled_log_odds[seq_index], seq_index, pvalue); if (pvalue < threshold) { requested_odds++; //Add another hit. } if (verbosity > HIGHER_VERBOSE) { fprintf(stderr, "Window Data: %s\t%s\t%i\t%g\t%g\t%g\n", get_seq_name(seq), get_motif_id(motif), seq_index, scaled_log_odds[seq_index], pvalue, threshold); } } } myfree(odds); myfree(scaled_log_odds); myfree(window_seq); return requested_odds; }
/* * Load background file frequencies into the array. */ ARRAY_T* get_file_frequencies(ALPH_T *alph, char *bg_filename, ARRAY_T *freqs) { regmatch_t matches[4]; STR_T *line; char chunk[BG_CHUNK_SIZE+1], letter[2], *key; int size, terminate, offset, i; FILE *fp; regex_t bgfreq; double freq; RBTREE_T *letters; RBNODE_T *node; regcomp_or_die("bg freq", &bgfreq, BGFREQ_RE, REG_EXTENDED); letters = rbtree_create(rbtree_strcasecmp, rbtree_strcpy, free, rbtree_dblcpy, free); line = str_create(100); if (!(fp = fopen(bg_filename, "r"))) { die("Unable to open background file \"%s\" for reading.\n", bg_filename); } terminate = feof(fp); while (!terminate) { size = fread(chunk, sizeof(char), BG_CHUNK_SIZE, fp); chunk[size] = '\0'; terminate = feof(fp); offset = 0; while (offset < size) { // skip mac newline if (str_len(line) == 0 && chunk[offset] == '\r') { offset++; continue; } // find next new line for (i = offset; i < size; ++i) { if (chunk[i] == '\n') break; } // append portion up to the new line or end of chunk str_append(line, chunk+offset, i - offset); // read more if we didn't find a new line if (i == size && !terminate) break; // move the offset past the new line offset = i + 1; // handle windows new line if (str_char(line, -1) == '\r') str_truncate(line, -1); // remove everything to the right of a comment character for (i = 0; i < str_len(line); ++i) { if (str_char(line, i) == '#') { str_truncate(line, i); break; } } // check the line for a single letter followed by a number if (regexec_or_die("bg freq", &bgfreq, str_internal(line), 4, matches, 0)) { // parse the letter and frequency value regex_strncpy(matches+1, str_internal(line), letter, 2); freq = regex_dbl(matches+2, str_internal(line)); // check the frequency is acceptable if (freq < 0 || freq > 1) { die("The background file lists the illegal probability %g for " "the letter %s.\n", freq, letter); } else if (freq == 0) { die("The background file lists a probability of zero for the " "letter %s\n", letter); } if (freq >= 0 && freq <= 1) rbtree_put(letters, letter, &freq); } str_clear(line); } } // finished with the file so clean up file parsing stuff fclose(fp); str_destroy(line, FALSE); regfree(&bgfreq); // guess the alphabet if (*alph == INVALID_ALPH) { switch (rbtree_size(letters)) { case PROTEIN_ASIZE: *alph = PROTEIN_ALPH; break; case DNA_ASIZE: *alph = DNA_ALPH; break; default: die("Number of single character entries in background does not match " "an alphabet.\n"); } } // make the background if (freqs == NULL) freqs = allocate_array(alph_size(*alph, ALL_SIZE)); assert(get_array_length(freqs) >= alph_size(*alph, ALL_SIZE)); init_array(-1, freqs); for (node = rbtree_first(letters); node != NULL; node = rbtree_next(node)) { key = (char*)rbtree_key(node); i = alph_index(*alph, key[0]); freq = *((double*)rbtree_value(node)); if (i == -1) { die("Background contains letter %s which is not in the %s alphabet.\n", key, alph_name(*alph)); } if (get_array_item(i, freqs) != -1) { die("Background contains letter %s which has the same meaning as an " "already listed letter.\n", key); } set_array_item(i, freq, freqs); } // check that all items were set for (i = 0; i < alph_size(*alph, ALPH_SIZE); i++) { if (get_array_item(i, freqs) == -1) { die("Background is missing letter %c.\n", alph_char(*alph, i)); } } // disabled for backwards compatability (AMA test was failing) //normalize_subarray(0, ALPH_ASIZE[*alph], 0.0, freqs); // calculate the values of the ambiguous letters from the concrete ones calc_ambigs(*alph, FALSE, freqs); // cleanup rbtree_destroy(letters); // return result return freqs; }
/* * Determine if a letter is ambiguous */ BOOLEAN_T alph_is_ambiguous(ALPH_T alph, char letter) { int index; index = alph_index(alph, letter); return (index >= ALPH_ASIZE[alph]); }
/* * Determine if a letter is a concrete representation */ BOOLEAN_T alph_is_concrete(ALPH_T alph, char letter) { int index; index = alph_index(alph, letter); return (index != -1 && index < ALPH_ASIZE[alph]); }
/* * Determine if a letter is known in this alphabet */ BOOLEAN_T alph_is_known(ALPH_T alph, char letter) { int index; index = alph_index(alph, letter); return (index != -1); }
void ramen_scan_sequences() { FILE* seq_file = NULL; MOTIF_T* motif = NULL; MOTIF_T* rev_motif = NULL; SEQ_T* sequence = NULL; SCANNED_SEQUENCE_T* scanned_seq = NULL; PATTERN_T* pattern; int i; int j; SEQ_T** seq_list; int num_seqs; int seq_len; //For the bdb_bg mode: ARRAY_T* seq_bg_freqs; double atcontent; double roundatcontent; double avg_seq_length = 0; //Open the file. if (open_file(args.sequence_filename, "r", FALSE, "FASTA", "sequences", &seq_file) == 0) { fprintf(stderr, "Couldn't open the file %s.\n", args.sequence_filename); ramen_terminate(1); } //Start reading in the sequences read_many_fastas(ramen_alph, seq_file, MAX_SEQ_LENGTH, &num_seqs, &seq_list); seq_ids = new_string_list(); seq_fscores = allocate_array(num_seqs); //Allocate the required space for results results = malloc(sizeof(double*) * motifs.num); for (i=0;i<motifs.num;i++) { results[i] = malloc(sizeof(double)*num_seqs); } for (j=0;j<num_seqs;j++) { fprintf(stderr, "\rScanning %i of %i sequences...", j+1, num_seqs); //copy the pointer into our current object for clarity sequence = seq_list[j]; //Read the fluorescence data from the description field. add_string(get_seq_name(sequence),seq_ids); seq_len = get_seq_length(sequence); set_array_item(j,atof(get_seq_description(sequence)),seq_fscores); //Scan with each motif. for (i=0;i<motifs.num;i++) { int motifindex = i*2; results[i][j] = ramen_sequence_scan(sequence, motif_at(motifs.motifs, motifindex), motif_at(motifs.motifs, motifindex+1), NULL, NULL, //No need to pass PSSM. AVG_ODDS, 0, TRUE, 0, motifs.bg_freqs); if (TRUE == args.linreg_normalise) { int k; double maxscore = 1; motif = motif_at(motifs.motifs,motifindex); for (k=0;k<get_motif_length(motif);k++) { double maxprob = 0; if (maxprob < get_matrix_cell(k, alph_index(ramen_alph, 'A'), get_motif_freqs(motif))) maxprob = get_matrix_cell(k, alph_index(ramen_alph, 'A'), get_motif_freqs(motif)); if (maxprob < get_matrix_cell(k, alph_index(ramen_alph, 'C'), get_motif_freqs(motif))) maxprob = get_matrix_cell(k, alph_index(ramen_alph, 'C'), get_motif_freqs(motif)); if (maxprob < get_matrix_cell(k, alph_index(ramen_alph, 'G'), get_motif_freqs(motif))) maxprob = get_matrix_cell(k, alph_index(ramen_alph, 'G'), get_motif_freqs(motif)); if (maxprob < get_matrix_cell(k, alph_index(ramen_alph, 'T'), get_motif_freqs(motif))) maxprob = get_matrix_cell(k, alph_index(ramen_alph, 'T'), get_motif_freqs(motif)); maxscore *= maxprob; } results[i][j] /= maxscore; } } } }