Score logl_positive_selection(char * ref,char * diff,RandomModel * rm,CodonTable *ct,DnaProbMatrix * dm) { int i; Score s = 0; char ref_aa; char diff_aa; /* we have to assess this position having changed */ for(i=0;i<3;i++) { s += Probability2Score(dm->prob[base_from_char(ref[i])][base_from_char(diff[i])]); } /* if the position has not changed, then we know it would have been selected */ ref_aa = aminoacid_from_seq(ct,ref); diff_aa = aminoacid_from_seq(ct,diff); if( ref_aa == diff_aa ) { return s; } /* else we add the probability of seeing this amino acid*/ s += Probability2Score(rm->aminoacid[diff_aa-'A']); return s; }
Score logl_pseudogene(char * ref,char * diff,DnaProbMatrix * dm) { int i; Score s = 0; for(i=0;i<3;i++) { s += Probability2Score(dm->prob[base_from_char(ref[i])][base_from_char(diff[i])]); } return s; }
codon codon_from_seq(char * seq) { base one; base two; base three; one = base_from_char(*seq); two = base_from_char(*(seq+1)); three = base_from_char(*(seq+2)); return one*25+two*5+three; }
DnaNumber dna_number_from_string(char * str,int nmer_size) { int i; int base = 1; DnaNumber out; int forward; int backward; out.flipped = 2; out.number = 0; for(i=0;i<nmer_size-1;i++) base *= 4; for(i=0;i<nmer_size;i++) { forward = base_from_char(str[i]); backward = complement_base(base_from_char(str[nmer_size-1-i])); if( forward == BASE_N || backward == BASE_N ) { return out; } if( forward > backward ) { out.flipped = 0; break; } if( backward > forward ) { out.flipped = 1; break; } } assert(out.flipped != 2); if( out.flipped == 0 ) { for(i=0;i<nmer_size;i++) { out.number += base * base_from_char(str[i]); base = base / 4; } } else { for(i=0;i<nmer_size;i++) { /* fprintf(stderr,"For position %d, [%d], using %d [%c]as complemented base\n",i,base,complement_base(base_from_char(str[nmer_size-1-i])),str[nmer_size-1-i]); */ out.number += base * complement_base(base_from_char(str[nmer_size-1-i])); base = base / 4; } } return out; }
int base4_codon_from_seq(char * seq) { base one; base two; base three; one = base_from_char(*seq); two = base_from_char(*(seq+1)); three = base_from_char(*(seq+2)); if( one == BASE_N || two == BASE_N || three == BASE_N) return 64; else return one*16+two*4+three; }
static void read_int22(int use_dna_params, tab8_t *t) { fill8(t, NOT_A_NUMBER); FILE *f = parfile("int22", use_dna_params); look_for_line_containing(f, "5' ------> 3'"); char buf[MAXLINE+1]; while (fgets(buf, MAXLINE, f)) { if (strlen(buf) >= MAXLINE) die("read_int22: line too long"); if (is_only_whitespace(buf)) continue; look_for_line_containing(f, "5' ------> 3'"); char a[4]; if (!(fgets(buf, MAXLINE, f) && sscanf(buf, " %c \\/ \\_/ %c", &a[0], &a[1]) == 2)) die("read_int22: couldn't read first line"); if (!(fgets(buf, MAXLINE, f) && sscanf(buf, " %c /\\ | %c", &a[2], &a[3]) == 2)) die("read_int22: couldn't read second line"); expect_line_containing(f, "3' <------ 5'"); int i, j; base_t b[4]; for (i = 0; i < 4; i++) b[i] = base_from_char(a[i]); for (i = 0; i < 4; i++) for (j = 0; j < 4; j++) { int_t val[16]; read_next_values(f, val, 16); int k, l; for (k = 0; k < 4; k++) for (l = 0; l < 4; l++) (*t)[b[0]][b[1]][b[2]][b[3]][i][k][j][l] = val[4*k+l]; } } fclose(f); }
boolean is_non_ambiguous_codon_seq(char * seq) { if( *seq == '\0' || *(seq+1) == '\0' || *(seq+2) == '\0') { warn("Attempting to find a codon number is something less than 3 bases long!"); return FALSE; } if( base_from_char(*(seq++)) == BASE_N) return FALSE; if( base_from_char(*(seq++)) == BASE_N) return FALSE; if( base_from_char(*(seq)) == BASE_N) return FALSE; return TRUE; }
static void read_small_loop(char *buf, base_t seq[], int_t *val, int n) { char *b = strtok(buf, whitespace); int i; for (i = 0; i < n; i++) seq[i] = base_from_char(b[i]); b = strtok(0, whitespace); *val = value_from_string(b); }
static void read_six_bases(const char *buf, base_t b[6]) { char a[6]; if (sscanf(buf, " Y%c Y%c Y%c Y%c Y%c Y%c", &a[0], &a[1], &a[2], &a[3], &a[4], &a[5]) != 6) die("read_six_bases: error"); int i; for (i = 0; i < 6; i++) b[i] = base_from_char(a[i]); }
static void read_four_bases(const char *buf, base_t b[4]) { char a[4]; const char *fmt = strchr(buf,'X') ? " %cX %cX %cX %cX" : " %c %c %c %c"; if (sscanf(buf, fmt, &a[0], &a[1], &a[2], &a[3]) != 4) die("read_four_bases: error"); int i; for (i = 0; i < 4; i++) b[i] = base_from_char(a[i]); }
static void read_twelve_bases(const char *buf, base_t b[12]) { char a[12]; if (sscanf(buf, " %c %c %c %c %c %c %c %c %c %c %c %c", &a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7], &a[8], &a[9], &a[10], &a[11]) != 12) die("read_twelve_bases: error"); int i; for (i = 0; i < 12; i++) b[i] = base_from_char(a[i]); }
double nocds_from_ambiguous_codon(char * codon,double * codon_freq_array) { int factor = 1; int one; int two; int three; int i,j,k; double ret = 0.0; one = base_from_char(*codon == '-' ? 'N' : *codon); two = base_from_char(*(codon+1) == '-' ? 'N' : *(codon+1)); three = base_from_char(*(codon+2) == '-' ? 'N' : *(codon+2)); if(one == BASE_N) factor *= 4; if(two == BASE_N) factor *= 4; if(three == BASE_N) factor *= 4; for(i=0;i<4;i++) for(j=0;j<4;j++) for(k=0;k<4;k++) if( (one == i || one == BASE_N) && (two == j || two == BASE_N) && (three == k || three == BASE_N)) { ret += codon_freq_array[i*16+j*4+k]; } ret = ret / factor; if( ret < 0.0000000000000001 ) { warn("For codon %c%c%c we have a frequency of %g",*codon,*(codon+1),*(codon+2),ret); ret = 0.0000000000000001; } return ret; }
Score logl_negative_selection(char * ref,char * diff,ThreeStateUnit * unit,CodonTable * ct,DnaProbMatrix * dm) { int i; Score s = 0; char ref_aa; char diff_aa; /* we have to assess this position having changed */ for(i=0;i<3;i++) { s += Probability2Score(dm->prob[base_from_char(ref[i])][base_from_char(diff[i])]); } /* if the position has not changed, then we know it could not have been selected */ ref_aa = aminoacid_from_seq(ct,ref); diff_aa = aminoacid_from_seq(ct,diff); if( ref_aa == diff_aa ) { return s; } /* else we add the difference in probability between the two amino acids */ /* fprintf(stdout,"%c vs %c has %d plays %d for total of %d\n",ref_aa,diff_aa, Probability2Score(unit->match_emission[ref_aa-'A']), Probability2Score(unit->match_emission[diff_aa-'A']), Probability2Score(unit->match_emission[diff_aa-'A']) - Probability2Score(unit->match_emission[ref_aa-'A']) ); */ s += Probability2Score(unit->match_emission[diff_aa-'A']) - Probability2Score(unit->match_emission[ref_aa-'A']); return s; }
boolean read_base_GeneConsensus(double * base_array,char* line,FILE * ifp) { boolean ret = TRUE; int b; char * base; char * number; if( strwhitestartcmp(line,"begin",spacestr) != 0 || strstr(line,"consensus") == NULL ) { warn("In reading base GeneConsensus line, got no 'begin consensus' tag [%s]",line); return FALSE; } while( fgets(line,MAXLINE,ifp) != NULL ) { if( line[0] == '#' ) continue; if( strwhitestartcmp(line,"end",spacestr) == 0 ) break; base = strtok(line,spacestr); number = strtok(NULL,spacestr); if( base == NULL ) { warn("Found an uncommented line in base consensus with no leading base word"); continue; } if( number == NULL ) { warn("For base %s, no number found",base); ret = FALSE; continue; } if( strlen(base) > 1 || (b=base_from_char(*base)) == BASE_N ) { warn("Could not interpret %s as an actual DNA base in read_base_GeneConsensus"); ret = FALSE; continue; } base_array[b]= atof(number); } return ret; }
Score SpliceSiteModel_score(SpliceSiteModel * ssm,char * seq) { int len; int i; int score; char * be = seq; base b; /* check I have enough sequence */ /* fprintf(stderr,"Being passed sequence %c%c%c\n",seq[0],seq[1],seq[2]); */ /* first calculate the CC score */ score = score_from_ComplexConsensi(seq- ssm->offset - ssm->pre_splice_site,ssm->cc); /* now move over the random score */ /* random score is subtracted - ie divided */ /* out from the model */ len = ssm->start_random - ssm->stop_random +1; for(i=0,seq = seq - ssm->start_random+1;i<len;i++,seq++) { if( *seq == '\0' ) { warn("You are attempting to score an impossible base (%d from SS) [%s] in a splice site",(int)(seq - be),be); return NEGI; } b = base_from_char(*seq); score -= ssm->rmds->base[b]; } /* this is for the possibility of errors/non splice consensus etc */ if( score < ssm->error_pos ) score = ssm->error_pos; return score; }
DnaHmmProbUnit * new_DnaHmmProbUnit_from_ColumnCount_ungapped(ColumnCount * cc,double simple_pseudocount) { DnaHmmProbUnit * out; double total; char base[] = "ATGC"; int i; out = DnaHmmProbUnit_alloc(); for(i=0,total = 0.0;i<4;i++) total += (cc->count[base[i]-'A'] + simple_pseudocount); for(i=0;i<4;i++) out->match[base_from_char(base[i])] = (cc->count[base[i]-'A'] + simple_pseudocount) / total; out->match[4] = 1.0; for(i=0;i<DHMM_TRANSITION_LEN;i++) { out->transition[i] = 0.0; } out->transition[DHMM_MATCH2MATCH] = 1.0; return out; }
int base_number_func(int type,void * data,char * seq) { return base_from_char(*seq); }
char char_complement_base(char c) { return char_from_base(complement_base(base_from_char(c))); }