Пример #1
0
void dxml_start_motif(void *ctx, char *id, char *seq, int length,
                      long num_sites, long p_hits, long n_hits,
                      double pvalue, double evalue, double uevalue) {
    CTX_T *data;
    MOTIF_T *motif;

    data = (CTX_T*)ctx;
    data->motif = (MOTIF_T*)mm_malloc(sizeof(MOTIF_T));
    motif = data->motif;
    memset(motif, 0, sizeof(MOTIF_T));
    set_motif_id(seq, strlen(seq), motif);
    set_motif_id2("", 0, motif);
    set_motif_strand('+', motif);
    motif->length = length;
    motif->num_sites = num_sites;
    motif->evalue = evalue;
    // both DNA and RNA have 4 letters
    motif->alph = data->fscope.alphabet;
    motif->flags = MOTIF_BOTH_STRANDS; // DREME does not support the concept of single strand scanning (yet)
    // allocate the matrix
    motif->freqs = allocate_matrix(motif->length, alph_size(motif->alph, ALPH_SIZE));
    motif->scores = NULL; // no scores in DREME xml
    // no url in DREME
    motif->url = strdup("");
    // set by postprocessing
    motif->complexity = -1;
    motif->trim_left = 0;
    motif->trim_right = 0;
}
/*****************************************************************************
 * MEME > motifs > motif
 * Construct the skeleton of a motif.
 ****************************************************************************/
void mxml_start_motif(void *ctx, char *id, char *name, char *alt, int width, double sites, 
    double llr, double ic, double re, double bayes_threshold,
    double log10_evalue, double elapsed_time, char *url) {
  CTX_T *data;
  MOTIF_T *motif;
  
  data = (CTX_T*)ctx;
  data->mscope.motif = mm_malloc(sizeof(MOTIF_T));
  motif = data->mscope.motif;
  memset(motif, 0, sizeof(MOTIF_T));
  set_motif_id(name, strlen(name), motif);
  set_motif_id2(alt, sizeof(alt), motif);
  set_motif_strand('+', motif);
  motif->length = width;
  motif->num_sites = sites;
  motif->url = strdup(url);
  motif->log_evalue = log10_evalue;
  motif->evalue = pow(10.0, log10_evalue);
  // calculate alphabet size
  motif->alph = alph_hold(data->alph);
  motif->flags = (data->fscope.strands == 2 ? MOTIF_BOTH_STRANDS : 0);
  // allocate matricies
  motif->freqs = allocate_matrix(motif->length, alph_size_core(motif->alph));
  init_matrix(-1, motif->freqs);
  motif->scores = allocate_matrix(motif->length, alph_size_core(motif->alph));
  init_matrix(NO_SCORE, motif->scores);
  // should be set by a post processing method
  motif->complexity = -1;
  motif->trim_left = 0;
  motif->trim_right = 0;
  // cache motif position
  if (data->options & SCANNED_SITES) {
    rbtree_put(data->motif_lookup, id, &(data->current_motif));
  }
}
Пример #3
0
void read_regexp_file(
   char*      filename,          // Name of MEME file  IN
   int*       num_motifs,             // Number of motifs retrieved  OUT
   MOTIF_T*   motifs                 // The retrieved motifs - NOT ALLOCATED!
) {
	FILE*      motif_file;         // MEME file containing the motifs.
	char motif_name[MAX_MOTIF_ID_LENGTH+1];
	char motif_regexp[MAX_MOTIF_WIDTH];
	ARRAY_T* these_freqs;
	MOTIF_T* m;
	int i;

	//Set things to the defaults.
	*num_motifs = 0;

	// Open the given MEME file.
	if (open_file(filename, "r", TRUE, "motif", "motifs", &motif_file) == 0)
		exit(1);

	//Set alphabet - ONLY supports dna.
	set_alphabet(verbosity, "ACGT");

	while (fscanf(motif_file, "%s\t%s", motif_name, motif_regexp) == 2) {
		/*
		 * Now we:
		 * 1. Fill in new motif (preallocated)
		 * 2. Assign name
		 * 3. Convert regexp into frequency table.
		 */

		m = &(motifs[*num_motifs]);
		set_motif_id(motif_name, m);
		m->length = strlen(motif_regexp);
		/* Store the alphabet size in the motif. */
		m->alph_size = get_alph_size(ALPH_SIZE);
		m->ambigs = get_alph_size(AMBIG_SIZE);
		/* Allocate memory for the matrix. */
		m->freqs = allocate_matrix(m->length, get_alph_size(ALL_SIZE));

		//Set motif frequencies here.
		for (i=0;i<strlen(motif_regexp);i++) {
			switch(toupper(motif_regexp[i])) {
			case 'A':
				set_matrix_cell(i,alphabet_index('A',get_alphabet(TRUE)),1,m->freqs);
				break;
			case 'C':
				set_matrix_cell(i,alphabet_index('C',get_alphabet(TRUE)),1,m->freqs);
				break;
			case 'G':
				set_matrix_cell(i,alphabet_index('G',get_alphabet(TRUE)),1,m->freqs);
				break;
			case 'T':
				set_matrix_cell(i,alphabet_index('T',get_alphabet(TRUE)),1,m->freqs);
				break;
			case 'U':
				set_matrix_cell(i,alphabet_index('U',get_alphabet(TRUE)),1,m->freqs);
				break;
			case 'R': //purines
				set_matrix_cell(i,alphabet_index('G',get_alphabet(TRUE)),1,m->freqs);
				set_matrix_cell(i,alphabet_index('A',get_alphabet(TRUE)),1,m->freqs);
				break;
			case 'Y': //pyramidines
				set_matrix_cell(i,alphabet_index('T',get_alphabet(TRUE)),1,m->freqs);
				set_matrix_cell(i,alphabet_index('C',get_alphabet(TRUE)),1,m->freqs);
				break;
			case 'K': //keto
				set_matrix_cell(i,alphabet_index('G',get_alphabet(TRUE)),1,m->freqs);
				set_matrix_cell(i,alphabet_index('T',get_alphabet(TRUE)),1,m->freqs);
				break;
			case 'M': //amino
				set_matrix_cell(i,alphabet_index('A',get_alphabet(TRUE)),1,m->freqs);
				set_matrix_cell(i,alphabet_index('C',get_alphabet(TRUE)),1,m->freqs);
				break;
			case 'S': //strong
				set_matrix_cell(i,alphabet_index('G',get_alphabet(TRUE)),1,m->freqs);
				set_matrix_cell(i,alphabet_index('C',get_alphabet(TRUE)),1,m->freqs);
				break;
			case 'W': //weak
				set_matrix_cell(i,alphabet_index('A',get_alphabet(TRUE)),1,m->freqs);
				set_matrix_cell(i,alphabet_index('T',get_alphabet(TRUE)),1,m->freqs);
				break;
			case 'B':
				set_matrix_cell(i,alphabet_index('G',get_alphabet(TRUE)),1,m->freqs);
				set_matrix_cell(i,alphabet_index('T',get_alphabet(TRUE)),1,m->freqs);
				set_matrix_cell(i,alphabet_index('C',get_alphabet(TRUE)),1,m->freqs);
				break;
			case 'D':
				set_matrix_cell(i,alphabet_index('G',get_alphabet(TRUE)),1,m->freqs);
				set_matrix_cell(i,alphabet_index('A',get_alphabet(TRUE)),1,m->freqs);
				set_matrix_cell(i,alphabet_index('T',get_alphabet(TRUE)),1,m->freqs);
				break;
			case 'H':
				set_matrix_cell(i,alphabet_index('A',get_alphabet(TRUE)),1,m->freqs);
				set_matrix_cell(i,alphabet_index('C',get_alphabet(TRUE)),1,m->freqs);
				set_matrix_cell(i,alphabet_index('T',get_alphabet(TRUE)),1,m->freqs);
				break;
			case 'V':
				set_matrix_cell(i,alphabet_index('G',get_alphabet(TRUE)),1,m->freqs);
				set_matrix_cell(i,alphabet_index('C',get_alphabet(TRUE)),1,m->freqs);
				set_matrix_cell(i,alphabet_index('A',get_alphabet(TRUE)),1,m->freqs);
				break;
			case 'N':
				set_matrix_cell(i,alphabet_index('A',get_alphabet(TRUE)),1,m->freqs);
				set_matrix_cell(i,alphabet_index('C',get_alphabet(TRUE)),1,m->freqs);
				set_matrix_cell(i,alphabet_index('G',get_alphabet(TRUE)),1,m->freqs);
				set_matrix_cell(i,alphabet_index('T',get_alphabet(TRUE)),1,m->freqs);
				break;
			}
		}

	    /* Compute values for ambiguous characters. */
		for (i = 0; i < m->length; i++) {
		    these_freqs = get_matrix_row(i, m->freqs);
		    fill_in_ambiguous_chars(FALSE, these_freqs);
		}

		/* Compute and store the motif complexity. */
		m->complexity = compute_motif_complexity(m);

		//Move our pointer along to do the next motif.
		(*num_motifs)++;
	}
}