Ejemplo n.º 1
0
Score logl_positive_selection(char * ref,char * diff,RandomModel * rm,CodonTable *ct,DnaProbMatrix * dm)
{
  int i;
  Score s = 0;
  char ref_aa;
  char diff_aa;

  /* we have to assess this position having changed */
  for(i=0;i<3;i++) {
    s += Probability2Score(dm->prob[base_from_char(ref[i])][base_from_char(diff[i])]);
  }

  /* if the position has not changed, then we know it would have been selected */


  ref_aa  = aminoacid_from_seq(ct,ref);
  diff_aa = aminoacid_from_seq(ct,diff);

  if( ref_aa == diff_aa ) {
    return s;
  }

  /* else we add the probability of seeing this amino acid*/

  s += Probability2Score(rm->aminoacid[diff_aa-'A']);

  return s;  
}
Ejemplo n.º 2
0
Score logl_pseudogene(char * ref,char * diff,DnaProbMatrix * dm)
{
  int i;
  Score s = 0;

  for(i=0;i<3;i++) {
    s += Probability2Score(dm->prob[base_from_char(ref[i])][base_from_char(diff[i])]);
  }

  return s;
}
Ejemplo n.º 3
0
codon codon_from_seq(char * seq)
{
  base one;
  base two;
  base three;

  one = base_from_char(*seq);
  two = base_from_char(*(seq+1));
  three = base_from_char(*(seq+2));

  return one*25+two*5+three;
}
Ejemplo n.º 4
0
DnaNumber dna_number_from_string(char * str,int nmer_size)
{
  int i;
  int base = 1;
  DnaNumber out;
  int forward;
  int backward;

  out.flipped = 2;
  out.number  = 0;

  for(i=0;i<nmer_size-1;i++) 
    base *= 4;

  for(i=0;i<nmer_size;i++) {
    forward = base_from_char(str[i]);
    backward = complement_base(base_from_char(str[nmer_size-1-i]));
    
    if( forward == BASE_N || backward == BASE_N ) {
      return out; 
    }

    if( forward > backward ) {
      out.flipped = 0;
      break;
    } 
    if( backward > forward ) {
      out.flipped = 1;
      break;
    }
    
  }

  assert(out.flipped != 2);

  if( out.flipped == 0 ) {
    for(i=0;i<nmer_size;i++) {
      out.number += base * base_from_char(str[i]);
      base = base / 4;
    }
  } else {
    for(i=0;i<nmer_size;i++) {
      /*      fprintf(stderr,"For position %d, [%d], using %d [%c]as complemented base\n",i,base,complement_base(base_from_char(str[nmer_size-1-i])),str[nmer_size-1-i]); */

      out.number += base * complement_base(base_from_char(str[nmer_size-1-i]));
      base = base / 4;
    }
  }

  return out;

}
Ejemplo n.º 5
0
int base4_codon_from_seq(char * seq)
{
  base one;
  base two;
  base three;

  one = base_from_char(*seq);
  two = base_from_char(*(seq+1));
  three = base_from_char(*(seq+2));

  if( one == BASE_N || two == BASE_N || three == BASE_N)
    return 64;

  else return one*16+two*4+three;
}
Ejemplo n.º 6
0
static void read_int22(int use_dna_params, tab8_t *t)
{
  fill8(t, NOT_A_NUMBER);
  FILE *f = parfile("int22", use_dna_params);
  look_for_line_containing(f, "5' ------> 3'");
  char buf[MAXLINE+1];
  while (fgets(buf, MAXLINE, f)) {
    if (strlen(buf) >= MAXLINE)
      die("read_int22: line too long");
    if (is_only_whitespace(buf))
      continue;
    look_for_line_containing(f, "5' ------> 3'");
    char a[4];
    if (!(fgets(buf, MAXLINE, f) && sscanf(buf, " %c \\/ \\_/ %c", &a[0], &a[1]) == 2))
      die("read_int22: couldn't read first line");
    if (!(fgets(buf, MAXLINE, f) && sscanf(buf, " %c /\\  |  %c", &a[2], &a[3]) == 2))
      die("read_int22: couldn't read second line");
    expect_line_containing(f, "3' <------ 5'");
    int i, j;
    base_t b[4];
    for (i = 0; i < 4; i++)
      b[i] = base_from_char(a[i]);
    for (i = 0; i < 4; i++)
      for (j = 0; j < 4; j++) {
	int_t val[16];
	read_next_values(f, val, 16);
	int k, l;
	for (k = 0; k < 4; k++)
	  for (l = 0; l < 4; l++)
	    (*t)[b[0]][b[1]][b[2]][b[3]][i][k][j][l] = val[4*k+l];
      }
  }
  fclose(f);
}
Ejemplo n.º 7
0
boolean is_non_ambiguous_codon_seq(char * seq)
{
  if( *seq == '\0' || *(seq+1) == '\0' || *(seq+2) == '\0') {
    warn("Attempting to find a codon number is something less than 3 bases long!");
    return FALSE;
  }

  if( base_from_char(*(seq++)) == BASE_N)
    return FALSE;
  if( base_from_char(*(seq++)) == BASE_N)
    return FALSE;
  if( base_from_char(*(seq)) == BASE_N)
    return FALSE;

  return TRUE;
}
Ejemplo n.º 8
0
static void read_small_loop(char *buf, base_t seq[], int_t *val, int n)
{
  char *b = strtok(buf, whitespace);
  int i;
  for (i = 0; i < n; i++)
    seq[i] = base_from_char(b[i]);
  b = strtok(0, whitespace);
  *val = value_from_string(b);
}
Ejemplo n.º 9
0
static void read_six_bases(const char *buf, base_t b[6]) 
{
  char a[6];
  if (sscanf(buf, " Y%c Y%c Y%c Y%c Y%c Y%c", 
	     &a[0], &a[1], &a[2], &a[3], &a[4], &a[5]) != 6)
    die("read_six_bases: error");
  int i;
  for (i = 0; i < 6; i++)
    b[i] = base_from_char(a[i]);
}
Ejemplo n.º 10
0
static void read_four_bases(const char *buf, base_t b[4])
{
  char a[4];
  const char *fmt = strchr(buf,'X') ? " %cX %cX %cX %cX" : " %c %c %c %c";
  if (sscanf(buf, fmt, &a[0], &a[1], &a[2], &a[3]) != 4)
    die("read_four_bases: error");
  int i;
  for (i = 0; i < 4; i++)
    b[i] = base_from_char(a[i]);
}
Ejemplo n.º 11
0
static void read_twelve_bases(const char *buf, base_t b[12]) 
{
  char a[12];
  if (sscanf(buf, " %c %c %c %c %c %c %c %c %c %c %c %c", 
	     &a[0], &a[1], &a[2], &a[3],
	     &a[4], &a[5], &a[6], &a[7],
	     &a[8], &a[9], &a[10], &a[11]) != 12)
    die("read_twelve_bases: error");
  int i;
  for (i = 0; i < 12; i++)
    b[i] = base_from_char(a[i]);
}
Ejemplo n.º 12
0
double nocds_from_ambiguous_codon(char * codon,double * codon_freq_array)
{
  int factor = 1;
  int one;
  int two;
  int three;
  int i,j,k;
  double ret = 0.0;

  one = base_from_char(*codon == '-' ? 'N' : *codon);
  two = base_from_char(*(codon+1) == '-' ? 'N' : *(codon+1));
  three = base_from_char(*(codon+2) == '-' ? 'N' : *(codon+2));


  if(one == BASE_N)
    factor *= 4;
  if(two == BASE_N)
    factor *= 4;
  if(three == BASE_N)
    factor *= 4;
  

  for(i=0;i<4;i++)
    for(j=0;j<4;j++) 
      for(k=0;k<4;k++) 
	if( (one == i || one == BASE_N) && (two == j || two == BASE_N) && (three == k || three == BASE_N)) { 
	  ret += codon_freq_array[i*16+j*4+k];
	}
  

  ret = ret / factor;

  if( ret < 0.0000000000000001 ) {
    warn("For codon  %c%c%c we have a frequency of %g",*codon,*(codon+1),*(codon+2),ret);
    ret = 0.0000000000000001;
  }

  return ret;
}
Ejemplo n.º 13
0
Score logl_negative_selection(char * ref,char * diff,ThreeStateUnit * unit,CodonTable * ct,DnaProbMatrix * dm)
{
  int i;
  Score s = 0;
  char ref_aa;
  char diff_aa;

  /* we have to assess this position having changed */
  for(i=0;i<3;i++) {
    s += Probability2Score(dm->prob[base_from_char(ref[i])][base_from_char(diff[i])]);
  }

  /* if the position has not changed, then we know it could not have been selected */


  ref_aa  = aminoacid_from_seq(ct,ref);
  diff_aa = aminoacid_from_seq(ct,diff);

  if( ref_aa == diff_aa ) {
    return s;
  }

  /* else we add the difference in probability between the two amino acids */
  /*
  fprintf(stdout,"%c vs %c has %d plays %d for total of %d\n",ref_aa,diff_aa,
	  Probability2Score(unit->match_emission[ref_aa-'A']),
	  Probability2Score(unit->match_emission[diff_aa-'A']),
	  Probability2Score(unit->match_emission[diff_aa-'A'])  - Probability2Score(unit->match_emission[ref_aa-'A'])
	  );
  */


  s += Probability2Score(unit->match_emission[diff_aa-'A'])  - Probability2Score(unit->match_emission[ref_aa-'A']);

  return s;
}
Ejemplo n.º 14
0
boolean read_base_GeneConsensus(double * base_array,char* line,FILE * ifp)
{
  boolean ret = TRUE;
  int b;
  char * base;
  char * number;


  if( strwhitestartcmp(line,"begin",spacestr) != 0 || strstr(line,"consensus") == NULL ) {
    warn("In reading base GeneConsensus line, got no 'begin consensus' tag [%s]",line);
    return FALSE;
  }


  while( fgets(line,MAXLINE,ifp) != NULL ) {
    if( line[0] == '#' )
      continue;

    if( strwhitestartcmp(line,"end",spacestr) == 0 )
      break;

    base = strtok(line,spacestr);
    number = strtok(NULL,spacestr);

    if( base == NULL ) {
      warn("Found an uncommented line in base consensus with no leading base word");
      continue;
    }

    if( number == NULL ) {
      warn("For base %s, no number found",base);
      ret = FALSE;
      continue;
    }

    if( strlen(base) > 1 || (b=base_from_char(*base)) == BASE_N ) {
      warn("Could not interpret %s as an actual DNA base in read_base_GeneConsensus");
      ret = FALSE;
      continue;
    }

    base_array[b]= atof(number);

  }

  return ret;
}
Ejemplo n.º 15
0
Score SpliceSiteModel_score(SpliceSiteModel * ssm,char * seq)
{
  int len;
  int i;
  int score;
  char * be = seq;
  base b;

  /* check I have enough sequence */

  /*  fprintf(stderr,"Being passed sequence %c%c%c\n",seq[0],seq[1],seq[2]); */

  /* first calculate the CC score */

  score = score_from_ComplexConsensi(seq- ssm->offset - ssm->pre_splice_site,ssm->cc);
  
  /* now move over the random score          */
  /* random score is subtracted - ie divided */
  /* out from the model                      */

  len = ssm->start_random - ssm->stop_random +1;

  for(i=0,seq = seq - ssm->start_random+1;i<len;i++,seq++) {

    if( *seq == '\0' ) {
      warn("You are attempting to score an impossible base (%d from SS) [%s] in a splice site",(int)(seq - be),be);
      return NEGI;
    }

    b = base_from_char(*seq);
    score -= ssm->rmds->base[b];
  }

  /* this is for the possibility of errors/non splice consensus etc */

  if( score < ssm->error_pos ) 
    score = ssm->error_pos;

  return score;
}
Ejemplo n.º 16
0
DnaHmmProbUnit * new_DnaHmmProbUnit_from_ColumnCount_ungapped(ColumnCount * cc,double simple_pseudocount)
{
  DnaHmmProbUnit * out;
  double total;
  char base[] = "ATGC";
  int i;

  out = DnaHmmProbUnit_alloc();
  
  for(i=0,total = 0.0;i<4;i++)
    total += (cc->count[base[i]-'A'] + simple_pseudocount);

  
  for(i=0;i<4;i++)
    out->match[base_from_char(base[i])] = (cc->count[base[i]-'A'] + simple_pseudocount) / total;
  out->match[4] = 1.0;

  for(i=0;i<DHMM_TRANSITION_LEN;i++) {
    out->transition[i] = 0.0;
  }
  out->transition[DHMM_MATCH2MATCH] = 1.0;

  return out;
}
Ejemplo n.º 17
0
int base_number_func(int type,void * data,char * seq)
{
  return base_from_char(*seq);
} 
Ejemplo n.º 18
0
char char_complement_base(char c)
{
  return char_from_base(complement_base(base_from_char(c)));
}