extern void get_not_o( DATASET *dataset, /* the dataset */ int w, /* width of motif */ BOOLEAN get_log /* compute log_not_o if true */ ) { int i,j,k; int n_samples = dataset->n_samples; SAMPLE **samples = dataset->samples; for (i=0; i < n_samples; i++){ /* sequence */ SAMPLE *s = samples[i]; int lseq = s->length; double *weights = s->weights; /* prb not in a previous site */ double *not_o = s->not_o; /* prb not overlapping a site */ int *log_not_o = s->log_not_o; /* prb not overlapping a site */ for (j=0; j <= lseq - w; j++) { /* site start */ not_o[j] = 1.0; /* assume not overlapping */ for (k=j; k < j+w; k++) { /* position in sequence */ if (weights[k] < not_o[j]) not_o[j] = weights[k]; } if (get_log) log_not_o[j] = INT_LOG(not_o[j]); } for (j=lseq-w+1; j < lseq ; j++) { /* beyond possible site starts */ not_o[j] = 1; if (get_log) log_not_o[j] = 0; } } }
extern void get_not_o( DATASET *dataset, // the dataset int w // motif width ) { int i,j,k; int n_samples = dataset->n_samples; SAMPLE **samples = dataset->samples; for (i=0; i<n_samples; i++){ /* sequence */ SAMPLE *s = samples[i]; int lseq = s->length; double *weights = s->weights; /* prb not in a previous site */ double *not_o = s->not_o; /* prb not overlapping a site */ int *log_not_o = s->log_not_o; /* log prb not overlapping a site */ if (lseq < w) continue; /* sequence to short for motif */ for (j=0; j <= lseq - w; j++) { /* site start */ not_o[j] = 1.0; /* assume not overlapping */ for (k=j; k < j+w; k++) { /* position in sequence */ if (weights[k] < not_o[j]) not_o[j] = weights[k]; } log_not_o[j] = INT_LOG(not_o[j]); } /* for j */ for (j=lseq-w+1; j < lseq; j++) { /* beyond possible site starts */ not_o[j] = 1; log_not_o[j] = 0; } } /* for i */ } /* get_not_o */
/** * convert_to_lmap * * Converts a matrix of sequence to theta probability mappings into a * matrix containing the int logs of those probabilities. Also sets up * a vector for mapping an "X" character to a vector of letter probabilities. * Those probabilities are uniform across all letters. */ extern void convert_to_lmap ( THETA map, int lmap[MAXALPH][MAXALPH], int alength ) { /* Set up the matrix of frequency vectors for each letter in the alphabet. Column and row for the "match-anything" character X are set to 1.0/alength so that such matches are neither favored nor disfavored, and where they match is irrelevant: */ int i,j; for (i=0; i<alength+1; i++) { for (j=0; j<alength; j++) { lmap[i][j] = (i<alength) ? INT_LOG(map[j][i]) : INT_LOG(1.0/alength); } lmap[i][j] = INT_LOG(1.0/alength); /* X */ } }
/** * convert_to_ltheta * * Convert the entries in the specified matrix motif model from doubles to INT * LOG values. */ extern void convert_to_ltheta ( double matrix_ds[MAXSITE][MAXALPH], ///< The input matrix of doubles int matrix_il[MAXSITE][MAXALPH], ///< The output matrix of int logs int nrows, int ncols ) { int row_idx; for (row_idx = 0; row_idx < nrows; row_idx++) { int col_idx; for (col_idx = 0; col_idx < ncols; col_idx++) { matrix_il[row_idx][col_idx] = INT_LOG(matrix_ds[row_idx][col_idx]); fprintf(stderr, "%i ", matrix_il[row_idx][col_idx]); } fprintf(stderr, "\n"); } }