Пример #1
0
int is_start(unsigned char *seq, int n, struct _training *tinf) {

  /* ATG */
  if(is_a(seq, n) == 1 && is_t(seq, n+1) == 1 && is_g(seq, n+2) == 1) return 1;

  /* Codes that only use ATG */
  if(tinf->trans_table == 6 || tinf->trans_table == 10 ||
     tinf->trans_table == 14 || tinf->trans_table == 15 ||
     tinf->trans_table == 16 || tinf->trans_table == 22) return 0;

  /* GTG */
  if(is_g(seq, n) == 1 && is_t(seq, n+1) == 1 && is_g(seq, n+2) == 1) {
    if(tinf->trans_table == 1 || tinf->trans_table == 3 ||
       tinf->trans_table == 12 || tinf->trans_table == 22) return 0;
    return 1;
  }

  /* TTG */
  if(is_t(seq, n) == 1 && is_t(seq, n+1) == 1 && is_g(seq, n+2) == 1) {
    if(tinf->trans_table < 4 || tinf->trans_table == 9 ||
       tinf->trans_table >= 21) return 0;
    return 1;
  }

  /* We do not handle other initiation codons */
  return 0;
}
Пример #2
0
double gc_content(unsigned char *seq, int a, int b) {
  double sum = 0.0, gc = 0.0;
  int i;
  for(i = a; i <= b; i++) {
    if(is_g(seq, i) == 1 || is_c(seq, i) == 1) gc++;
    sum++;
  }
  return gc/sum;
}
Пример #3
0
int is_stop(unsigned char *seq, int n, struct _training *tinf) {

  /* TAG */
  if(is_t(seq, n) == 1 && is_a(seq, n+1) == 1 && is_g(seq, n+2) == 1) {
    if(tinf->trans_table == 6 || tinf->trans_table == 15 ||
       tinf->trans_table == 16 || tinf->trans_table == 22) return 0;
    return 1;
  }

  /* TGA */
  if(is_t(seq, n) == 1 && is_g(seq, n+1) == 1 && is_a(seq, n+2) == 1) {
    if((tinf->trans_table >= 2 && tinf->trans_table <= 5) ||
       tinf->trans_table == 9 || tinf->trans_table == 10 ||
       tinf->trans_table == 13 || tinf->trans_table == 14 ||
       tinf->trans_table == 21) return 0;
    return 1;
  }

  /* TAA */
  if(is_t(seq, n) == 1 && is_a(seq, n+1) == 1 && is_a(seq, n+2) == 1) {
    if(tinf->trans_table == 6 || tinf->trans_table == 14) return 0;
    return 1;
  }

  /* Code 2 */
  if(tinf->trans_table == 2 && is_a(seq, n) == 1 && is_g(seq, n+1) == 1 &&
     is_a(seq, n+2) == 1) return 1;
  if(tinf->trans_table == 2 && is_a(seq, n) == 1 && is_g(seq, n+1) == 1 &&
     is_g(seq, n+2) == 1) return 1;

  /* Code 22 */
  if(tinf->trans_table == 22 && is_t(seq, n) == 1 && is_c(seq, n+1) == 1 &&
     is_a(seq, n+2) == 1) return 1;

  /* Code 23 */
  if(tinf->trans_table == 23 && is_t(seq, n) == 1 && is_t(seq, n+1) == 1 &&
     is_a(seq, n+2) == 1) return 1;

  return 0;
}
//======================================================================
bool sccan_point_pair_handler::
is_string_a_sccan_run_config(std::string line){
	
	if(verbosity){
		std::cout<<"sccan_point_pair_handler -> ";
		std::cout<<"is_string_a_sccan_run_config() -> line: ";
		std::cout<<line<<std::endl;
	}
	
	// use DEAs for the std::string control
	// find 

	enum TokenT{ 
		digit,
		point,
		underscore,
		char_c,
		char_f,
		char_g,
		char_s,
		char_a,
		char_n,
		ERR
	};
		   
	int GetState[][23] ={
	//dig point under	c	f	g	s	a	n
	{  1,	-1,	-1,		-1,	-1,	-1, -1,	-1,	-1 },	//0		d
	{  2,	-1,	-1,		-1,	-1,	-1, -1,	-1,	-1 },	//1		d
	{  3,	-1,	-1,		-1,	-1,	-1, -1,	-1,	-1 },	//2		d
	{  4,	-1,	-1,		-1,	-1,	-1, -1,	-1,	-1 },	//3		d
	{  5,	-1,	-1,		-1,	-1,	-1, -1,	-1,	-1 },	//4		d
	{  6,	-1,	-1,		-1,	-1,	-1, -1,	-1,	-1 },	//5		d
	{  7,	-1,	-1,		-1,	-1,	-1, -1,	-1,	-1 },	//6		d
	{  8,	-1,	-1,		-1,	-1,	-1, -1,	-1,	-1 },	//7		d
	{  -1,	-1,  9,		-1,	-1,	-1, -1,	-1,	-1 },	//8		_
	{  10,	-1,	-1,		-1,	-1,	-1, -1,	-1,	-1 },	//9		d
	{  11,	-1,	-1,		-1,	-1,	-1, -1,	-1,	-1 },	//10	d
	{  12,	-1,	-1,		-1,	-1,	-1, -1,	-1,	-1 },	//11	d
	{  13,	-1,	-1,		-1,	-1,	-1, -1,	-1,	-1 },	//12	d
	{  -1,	-1,	14,		-1,	-1,	-1, -1,	-1,	-1 },	//13	_	
	{  -1,	-1,	-1,		-1,	-1,	-1, 15,	-1,	-1 },	//14	s
	{  -1,	-1,	-1,		16,	-1,	-1, -1,	-1,	-1 },	//15	c
	{  -1,	-1,	-1,		17,	-1,	-1, -1,	-1,	-1 },	//16	c
	{  -1,	-1,	-1,		-1,	-1,	-1, -1,	18,	-1 },	//17	a
	{  -1,	-1,	-1,		-1,	-1,	-1, -1,	-1,	19 },	//18	n
	{  -1,	-1,	20,		-1,	-1,	-1, -1,	-1,	-1 },	//19	_	
	{  20,	21,	-1,		-1,	-1,	-1, -1,	-1,	-1 },	//20	d	
	{  -1,	-1,	-1,		22,	-1,	-1, -1,	-1,	-1 },	//21	.
	{  -1,	-1,	-1,		-1,	23,	-1, -1,	-1,	-1 },	//22	c
	{  -1,	-1,	-1,		-1,	-1,	24, -1,	-1,	-1 },	//23	f
													//24	g
	};
	//for( int i=0; i<11;i++){std::cout<<"("<<i<<")"<<GetState[26][i]<<", ";}
	//std::cout<<std::endl;
	std::string ID;
	std::string tilt_x;
	std::string tilt_y;
	std::string tilt_z;
	std::string str_vec2D_x;
	std::string str_vec2D_y;
	int int_vec2D_count = 0;
	
	
	int state = 0;
	int char_number = 0;
	//std::cout<<"str laenge: "<<line.size()<<std::endl;
	while(state != -1 && char_number<line.size()){
		TokenT token = ERR;
		char s = line.at( char_number);
		if(is_c(s)) token = char_c;
		if(is_f(s)) token = char_f;
		if(is_g(s)) token = char_g;
		if(is_s(s)) token = char_s;
		if(is_a(s)) token = char_a;
		if(is_n(s)) token = char_n;
		if(isdigit(s)) token = digit; 
		//std::cout<<char(s)<<"==digit"<<std::endl;}
		if(is_point(s)) token = point;
		if(is_underscore(s)) token = underscore;
		//std::cout<<"check"<<char_number<<": ";
		//std::cout<<char(s)<<" state: "<<state<<" token: "<<token<<std::endl;
		state = (token == ERR) ? :GetState[state][token];
		char_number ++; 	
	
	}
	if(state==24){return true;}else{return false;};
}
Пример #5
0
int shine_dalgarno_mm(unsigned char *seq, int pos, int start, double *rwt) {
  int i, j, k, mism, rdis, limit, max_val, cur_val = 0;
  double match[6], cur_ctr, dis_flag;

  limit = imin(6, start-4-pos);
  for(i = limit; i < 6; i++) match[i] = -10.0;

  /* Compare the 6-base region to AGGAGG */
  for(i = 0; i < limit; i++) {
    if(pos+i < 0) continue;
    if(i % 3 == 0) {
      if(is_a(seq, pos+i) == 1) match[i] = 2.0;
      else match[i] = -3.0;
    }
    else {
      if(is_g(seq, pos+i) == 1) match[i] = 3.0;
      else match[i] = -2.0;
    }
  }

  /* Find the maximally scoring motif */
  max_val = 0;
  for(i = limit; i >= 5; i--) {
    for(j = 0; j <= limit-i; j++) {
      cur_ctr = -2.0;
      mism = 0;
      for(k = j; k < j+i; k++) {
        cur_ctr += match[k];
        if(match[k] < 0.0) mism++;
        if(match[k] < 0.0 && (k <= j+1 || k >= j+i-2)) cur_ctr -= 10.0;
      }
      if(mism != 1) continue;
      rdis = start - (pos+j+i);
      if(rdis < 5) { dis_flag = 1; }
      else if(rdis > 10 && rdis <= 12) { dis_flag = 2; }
      else if(rdis >= 13) { dis_flag = 3; }
      else dis_flag = 0;
      if(rdis > 15 || cur_ctr < 6.0) continue;

      /* Single-Mismatch RBS Motifs */
      if(cur_ctr < 6.0) cur_val = 0;
      else if(cur_ctr == 6.0 && dis_flag == 3) cur_val = 2;
      else if(cur_ctr == 7.0 && dis_flag == 3) cur_val = 2;
      else if(cur_ctr == 9.0 && dis_flag == 3) cur_val = 3;
      else if(cur_ctr == 6.0 && dis_flag == 2) cur_val = 4;
      else if(cur_ctr == 6.0 && dis_flag == 1) cur_val = 5;
      else if(cur_ctr == 6.0 && dis_flag == 0) cur_val = 9;
      else if(cur_ctr == 7.0 && dis_flag == 2) cur_val = 7;
      else if(cur_ctr == 7.0 && dis_flag == 1) cur_val = 8;
      else if(cur_ctr == 7.0 && dis_flag == 0) cur_val = 14;
      else if(cur_ctr == 9.0 && dis_flag == 2) cur_val = 17;
      else if(cur_ctr == 9.0 && dis_flag == 1) cur_val = 18;
      else if(cur_ctr == 9.0 && dis_flag == 0) cur_val = 19;

      if(rwt[cur_val] < rwt[max_val]) continue;
      if(rwt[cur_val] == rwt[max_val] && cur_val < max_val) continue;
      max_val = cur_val;
    }
  }

  return max_val;
}
Пример #6
0
int shine_dalgarno_exact(unsigned char *seq, int pos, int start, double *rwt) {
  int i, j, k, mism, rdis, limit, max_val, cur_val = 0;
  double match[6], cur_ctr, dis_flag;

  limit = imin(6, start-4-pos);
  for(i = limit; i < 6; i++) match[i] = -10.0;

  /* Compare the 6-base region to AGGAGG */
  for(i = 0; i < limit; i++) {
    if(pos+i < 0) continue;
    if(i%3 == 0 && is_a(seq, pos+i) == 1) match[i] = 2.0;
    else if(i%3 != 0 && is_g(seq, pos+i) == 1) match[i] = 3.0;
    else match[i] = -10.0;
  }

  /* Find the maximally scoring motif */
  max_val = 0;
  for(i = limit; i >= 3; i--) {
    for(j = 0; j <= limit-i; j++) {
      cur_ctr = -2.0;
      mism = 0;
      for(k = j; k < j+i; k++) {
        cur_ctr += match[k];
        if(match[k] < 0.0) mism++;
      }
      if(mism > 0) continue;
      rdis = start - (pos+j+i);
      if(rdis < 5 && i < 5) dis_flag = 2;
      else if(rdis < 5 && i >= 5) dis_flag = 1;
      else if(rdis > 10 && rdis <= 12 && i < 5) dis_flag = 1;
      else if(rdis > 10 && rdis <= 12 && i >= 5) dis_flag = 2;
      else if(rdis >= 13) { dis_flag = 3; }
      else dis_flag = 0;
      if(rdis > 15 || cur_ctr < 6.0) continue;

      /* Exact-Matching RBS Motifs */
      if(cur_ctr < 6.0) cur_val = 0;
      else if(cur_ctr == 6.0 && dis_flag == 2) cur_val = 1;
      else if(cur_ctr == 6.0 && dis_flag == 3) cur_val = 2;
      else if(cur_ctr == 8.0 && dis_flag == 3) cur_val = 3;
      else if(cur_ctr == 9.0 && dis_flag == 3) cur_val = 3;
      else if(cur_ctr == 6.0 && dis_flag == 1) cur_val = 6;
      else if(cur_ctr == 11.0 && dis_flag == 3) cur_val = 10;
      else if(cur_ctr == 12.0 && dis_flag == 3) cur_val = 10;
      else if(cur_ctr == 14.0 && dis_flag == 3) cur_val = 10;
      else if(cur_ctr == 8.0 && dis_flag == 2) cur_val = 11;
      else if(cur_ctr == 9.0 && dis_flag == 2) cur_val = 11;
      else if(cur_ctr == 8.0 && dis_flag == 1) cur_val = 12;
      else if(cur_ctr == 9.0 && dis_flag == 1) cur_val = 12;
      else if(cur_ctr == 6.0 && dis_flag == 0) cur_val = 13;
      else if(cur_ctr == 8.0 && dis_flag == 0) cur_val = 15;
      else if(cur_ctr == 9.0 && dis_flag == 0) cur_val = 16;
      else if(cur_ctr == 11.0 && dis_flag == 2) cur_val = 20;
      else if(cur_ctr == 11.0 && dis_flag == 1) cur_val = 21;
      else if(cur_ctr == 11.0 && dis_flag == 0) cur_val = 22;
      else if(cur_ctr == 12.0 && dis_flag == 2) cur_val = 20;
      else if(cur_ctr == 12.0 && dis_flag == 1) cur_val = 23;
      else if(cur_ctr == 12.0 && dis_flag == 0) cur_val = 24;
      else if(cur_ctr == 14.0 && dis_flag == 2) cur_val = 25;
      else if(cur_ctr == 14.0 && dis_flag == 1) cur_val = 26;
      else if(cur_ctr == 14.0 && dis_flag == 0) cur_val = 27;

      if(rwt[cur_val] < rwt[max_val]) continue;
      if(rwt[cur_val] == rwt[max_val] && cur_val < max_val) continue;
      max_val = cur_val;
    }
  }

  return max_val;
}
Пример #7
0
/* Returns a single amino acid for this position */
char amino(unsigned char *seq, int n, struct _training *tinf, int is_init) {
  if(is_stop(seq, n, tinf) == 1) return '*';
  if(is_start(seq, n, tinf) == 1 && is_init == 1) return 'M';
  if(is_t(seq, n) == 1 && is_t(seq, n+1) == 1 && is_t(seq, n+2) == 1)
    return 'F';
  if(is_t(seq, n) == 1 && is_t(seq, n+1) == 1 && is_c(seq, n+2) == 1)
    return 'F';
  if(is_t(seq, n) == 1 && is_t(seq, n+1) == 1 && is_a(seq, n+2) == 1)
    return 'L';
  if(is_t(seq, n) == 1 && is_t(seq, n+1) == 1 && is_g(seq, n+2) == 1)
    return 'L';
  if(is_t(seq, n) == 1 && is_c(seq, n+1) == 1) return 'S';
  if(is_t(seq, n) == 1 && is_a(seq, n+1) == 1 && is_t(seq, n+2) == 1)
    return 'Y';
  if(is_t(seq, n) == 1 && is_a(seq, n+1) == 1 && is_c(seq, n+2) == 1)
    return 'Y';
  if(is_t(seq, n) == 1 && is_a(seq, n+1) == 1 && is_a(seq, n+2) == 1) {
    if(tinf->trans_table == 6) return 'Q';
    if(tinf->trans_table == 14) return 'Y';
  }
  if(is_t(seq, n) == 1 && is_a(seq, n+1) == 1 && is_g(seq, n+2) == 1) {
    if(tinf->trans_table == 6 || tinf->trans_table == 15) return 'Q';
    if(tinf->trans_table == 22) return 'L';
  }
  if(is_t(seq, n) == 1 && is_g(seq, n+1) == 1 && is_t(seq, n+2) == 1)
    return 'C';
  if(is_t(seq, n) == 1 && is_g(seq, n+1) == 1 && is_c(seq, n+2) == 1)
    return 'C';
  if(is_t(seq, n) == 1 && is_g(seq, n+1) == 1 && is_a(seq, n+2) == 1)
    return 'W';
  if(is_t(seq, n) == 1 && is_g(seq, n+1) == 1 && is_g(seq, n+2) == 1)
    return 'W';
  if(is_c(seq, n) == 1 && is_t(seq, n+1) == 1 && is_t(seq, n+2) == 1) {
    if(tinf->trans_table == 3) return 'T';
    return 'L';
  }
  if(is_c(seq, n) == 1 && is_t(seq, n+1) == 1 && is_c(seq, n+2) == 1) {
    if(tinf->trans_table == 3) return 'T';
    return 'L';
  }
  if(is_c(seq, n) == 1 && is_t(seq, n+1) == 1 && is_a(seq, n+2) == 1) {
    if(tinf->trans_table == 3) return 'T';
    return 'L';
  }
  if(is_c(seq, n) == 1 && is_t(seq, n+1) == 1 && is_g(seq, n+2) == 1) {
    if(tinf->trans_table == 3) return 'T';
    if(tinf->trans_table == 12) return 'S';
    return 'L';
  }
  if(is_c(seq, n) == 1 && is_c(seq, n+1) == 1) return 'P';
  if(is_c(seq, n) == 1 && is_a(seq, n+1) == 1 && is_t(seq, n+2) == 1)
    return 'H';
  if(is_c(seq, n) == 1 && is_a(seq, n+1) == 1 && is_c(seq, n+2) == 1)
    return 'H';
  if(is_c(seq, n) == 1 && is_a(seq, n+1) == 1 && is_a(seq, n+2) == 1)
    return 'Q';
  if(is_c(seq, n) == 1 && is_a(seq, n+1) == 1 && is_g(seq, n+2) == 1)
    return 'Q';
  if(is_c(seq, n) == 1 && is_g(seq, n+1) == 1) return 'R';
  if(is_a(seq, n) == 1 && is_t(seq, n+1) == 1 && is_t(seq, n+2) == 1)
    return 'I';
  if(is_a(seq, n) == 1 && is_t(seq, n+1) == 1 && is_c(seq, n+2) == 1)
    return 'I';
  if(is_a(seq, n) == 1 && is_t(seq, n+1) == 1 && is_a(seq, n+2) == 1) {
    if(tinf->trans_table == 2 || tinf->trans_table == 3 ||
       tinf->trans_table == 5 || tinf->trans_table == 13 ||
       tinf->trans_table == 21) return 'M';
    return 'I';
  }
  if(is_a(seq, n) == 1 && is_t(seq, n+1) == 1 && is_g(seq, n+2) == 1)
    return 'M';
  if(is_a(seq, n) == 1 && is_c(seq, n+1) == 1) return 'T';
  if(is_a(seq, n) == 1 && is_a(seq, n+1) == 1 && is_t(seq, n+2) == 1)
    return 'N';
  if(is_a(seq, n) == 1 && is_a(seq, n+1) == 1 && is_c(seq, n+2) == 1)
    return 'N';
  if(is_a(seq, n) == 1 && is_a(seq, n+1) == 1 && is_a(seq, n+2) == 1) {
    if(tinf->trans_table == 9 || tinf->trans_table == 14 ||
       tinf->trans_table == 21) return 'N';
    return 'K';
  }
  if(is_a(seq, n) == 1 && is_a(seq, n+1) == 1 && is_g(seq, n+2) == 1)
    return 'K';
  if(is_a(seq, n) == 1 && is_g(seq, n+1) == 1 && is_t(seq, n+2) == 1)
    return 'S';
  if(is_a(seq, n) == 1 && is_g(seq, n+1) == 1 && is_c(seq, n+2) == 1)
    return 'S';
  if(is_a(seq, n) == 1 && is_g(seq, n+1) == 1 && (is_a(seq, n+2) == 1 ||
     is_g(seq, n+2) == 1)) {
    if(tinf->trans_table == 13) return 'G';
    if(tinf->trans_table == 5 || tinf->trans_table == 9 ||
       tinf->trans_table == 14 || tinf->trans_table == 21) return 'S';
    return 'R';
  }
  if(is_g(seq, n) == 1 && is_t(seq, n+1) == 1) return 'V';
  if(is_g(seq, n) == 1 && is_c(seq, n+1) == 1) return 'A';
  if(is_g(seq, n) == 1 && is_a(seq, n+1) == 1 && is_t(seq, n+2) == 1)
    return 'D';
  if(is_g(seq, n) == 1 && is_a(seq, n+1) == 1 && is_c(seq, n+2) == 1)
    return 'D';
  if(is_g(seq, n) == 1 && is_a(seq, n+1) == 1 && is_a(seq, n+2) == 1)
    return 'E';
  if(is_g(seq, n) == 1 && is_a(seq, n+1) == 1 && is_g(seq, n+2) == 1)
    return 'E';
  if(is_g(seq, n) == 1 && is_g(seq, n+1) == 1) return 'G';
  return 'X';
}
Пример #8
0
int is_ttg(unsigned char *seq, int n) {
  if(is_t(seq, n) == 0 || is_t(seq, n+1) == 0 || is_g(seq, n+2) == 0) return 0;
  return 1;
}