void dxml_start_motif(void *ctx, char *id, char *seq, int length, long num_sites, long p_hits, long n_hits, double pvalue, double evalue, double uevalue) { CTX_T *data; MOTIF_T *motif; data = (CTX_T*)ctx; data->motif = (MOTIF_T*)mm_malloc(sizeof(MOTIF_T)); motif = data->motif; memset(motif, 0, sizeof(MOTIF_T)); set_motif_id(seq, strlen(seq), motif); set_motif_id2("", 0, motif); set_motif_strand('+', motif); motif->length = length; motif->num_sites = num_sites; motif->evalue = evalue; // both DNA and RNA have 4 letters motif->alph = data->fscope.alphabet; motif->flags = MOTIF_BOTH_STRANDS; // DREME does not support the concept of single strand scanning (yet) // allocate the matrix motif->freqs = allocate_matrix(motif->length, alph_size(motif->alph, ALPH_SIZE)); motif->scores = NULL; // no scores in DREME xml // no url in DREME motif->url = strdup(""); // set by postprocessing motif->complexity = -1; motif->trim_left = 0; motif->trim_right = 0; }
/***************************************************************************** * MEME > motifs > motif * Construct the skeleton of a motif. ****************************************************************************/ void mxml_start_motif(void *ctx, char *id, char *name, char *alt, int width, double sites, double llr, double ic, double re, double bayes_threshold, double log10_evalue, double elapsed_time, char *url) { CTX_T *data; MOTIF_T *motif; data = (CTX_T*)ctx; data->mscope.motif = mm_malloc(sizeof(MOTIF_T)); motif = data->mscope.motif; memset(motif, 0, sizeof(MOTIF_T)); set_motif_id(name, strlen(name), motif); set_motif_id2(alt, sizeof(alt), motif); set_motif_strand('+', motif); motif->length = width; motif->num_sites = sites; motif->url = strdup(url); motif->log_evalue = log10_evalue; motif->evalue = pow(10.0, log10_evalue); // calculate alphabet size motif->alph = alph_hold(data->alph); motif->flags = (data->fscope.strands == 2 ? MOTIF_BOTH_STRANDS : 0); // allocate matricies motif->freqs = allocate_matrix(motif->length, alph_size_core(motif->alph)); init_matrix(-1, motif->freqs); motif->scores = allocate_matrix(motif->length, alph_size_core(motif->alph)); init_matrix(NO_SCORE, motif->scores); // should be set by a post processing method motif->complexity = -1; motif->trim_left = 0; motif->trim_right = 0; // cache motif position if (data->options & SCANNED_SITES) { rbtree_put(data->motif_lookup, id, &(data->current_motif)); } }
void read_regexp_file( char* filename, // Name of MEME file IN int* num_motifs, // Number of motifs retrieved OUT MOTIF_T* motifs // The retrieved motifs - NOT ALLOCATED! ) { FILE* motif_file; // MEME file containing the motifs. char motif_name[MAX_MOTIF_ID_LENGTH+1]; char motif_regexp[MAX_MOTIF_WIDTH]; ARRAY_T* these_freqs; MOTIF_T* m; int i; //Set things to the defaults. *num_motifs = 0; // Open the given MEME file. if (open_file(filename, "r", TRUE, "motif", "motifs", &motif_file) == 0) exit(1); //Set alphabet - ONLY supports dna. set_alphabet(verbosity, "ACGT"); while (fscanf(motif_file, "%s\t%s", motif_name, motif_regexp) == 2) { /* * Now we: * 1. Fill in new motif (preallocated) * 2. Assign name * 3. Convert regexp into frequency table. */ m = &(motifs[*num_motifs]); set_motif_id(motif_name, m); m->length = strlen(motif_regexp); /* Store the alphabet size in the motif. */ m->alph_size = get_alph_size(ALPH_SIZE); m->ambigs = get_alph_size(AMBIG_SIZE); /* Allocate memory for the matrix. */ m->freqs = allocate_matrix(m->length, get_alph_size(ALL_SIZE)); //Set motif frequencies here. for (i=0;i<strlen(motif_regexp);i++) { switch(toupper(motif_regexp[i])) { case 'A': set_matrix_cell(i,alphabet_index('A',get_alphabet(TRUE)),1,m->freqs); break; case 'C': set_matrix_cell(i,alphabet_index('C',get_alphabet(TRUE)),1,m->freqs); break; case 'G': set_matrix_cell(i,alphabet_index('G',get_alphabet(TRUE)),1,m->freqs); break; case 'T': set_matrix_cell(i,alphabet_index('T',get_alphabet(TRUE)),1,m->freqs); break; case 'U': set_matrix_cell(i,alphabet_index('U',get_alphabet(TRUE)),1,m->freqs); break; case 'R': //purines set_matrix_cell(i,alphabet_index('G',get_alphabet(TRUE)),1,m->freqs); set_matrix_cell(i,alphabet_index('A',get_alphabet(TRUE)),1,m->freqs); break; case 'Y': //pyramidines set_matrix_cell(i,alphabet_index('T',get_alphabet(TRUE)),1,m->freqs); set_matrix_cell(i,alphabet_index('C',get_alphabet(TRUE)),1,m->freqs); break; case 'K': //keto set_matrix_cell(i,alphabet_index('G',get_alphabet(TRUE)),1,m->freqs); set_matrix_cell(i,alphabet_index('T',get_alphabet(TRUE)),1,m->freqs); break; case 'M': //amino set_matrix_cell(i,alphabet_index('A',get_alphabet(TRUE)),1,m->freqs); set_matrix_cell(i,alphabet_index('C',get_alphabet(TRUE)),1,m->freqs); break; case 'S': //strong set_matrix_cell(i,alphabet_index('G',get_alphabet(TRUE)),1,m->freqs); set_matrix_cell(i,alphabet_index('C',get_alphabet(TRUE)),1,m->freqs); break; case 'W': //weak set_matrix_cell(i,alphabet_index('A',get_alphabet(TRUE)),1,m->freqs); set_matrix_cell(i,alphabet_index('T',get_alphabet(TRUE)),1,m->freqs); break; case 'B': set_matrix_cell(i,alphabet_index('G',get_alphabet(TRUE)),1,m->freqs); set_matrix_cell(i,alphabet_index('T',get_alphabet(TRUE)),1,m->freqs); set_matrix_cell(i,alphabet_index('C',get_alphabet(TRUE)),1,m->freqs); break; case 'D': set_matrix_cell(i,alphabet_index('G',get_alphabet(TRUE)),1,m->freqs); set_matrix_cell(i,alphabet_index('A',get_alphabet(TRUE)),1,m->freqs); set_matrix_cell(i,alphabet_index('T',get_alphabet(TRUE)),1,m->freqs); break; case 'H': set_matrix_cell(i,alphabet_index('A',get_alphabet(TRUE)),1,m->freqs); set_matrix_cell(i,alphabet_index('C',get_alphabet(TRUE)),1,m->freqs); set_matrix_cell(i,alphabet_index('T',get_alphabet(TRUE)),1,m->freqs); break; case 'V': set_matrix_cell(i,alphabet_index('G',get_alphabet(TRUE)),1,m->freqs); set_matrix_cell(i,alphabet_index('C',get_alphabet(TRUE)),1,m->freqs); set_matrix_cell(i,alphabet_index('A',get_alphabet(TRUE)),1,m->freqs); break; case 'N': set_matrix_cell(i,alphabet_index('A',get_alphabet(TRUE)),1,m->freqs); set_matrix_cell(i,alphabet_index('C',get_alphabet(TRUE)),1,m->freqs); set_matrix_cell(i,alphabet_index('G',get_alphabet(TRUE)),1,m->freqs); set_matrix_cell(i,alphabet_index('T',get_alphabet(TRUE)),1,m->freqs); break; } } /* Compute values for ambiguous characters. */ for (i = 0; i < m->length; i++) { these_freqs = get_matrix_row(i, m->freqs); fill_in_ambiguous_chars(FALSE, these_freqs); } /* Compute and store the motif complexity. */ m->complexity = compute_motif_complexity(m); //Move our pointer along to do the next motif. (*num_motifs)++; } }