int is_start(unsigned char *seq, int n, struct _training *tinf) { /* ATG */ if(is_a(seq, n) == 1 && is_t(seq, n+1) == 1 && is_g(seq, n+2) == 1) return 1; /* Codes that only use ATG */ if(tinf->trans_table == 6 || tinf->trans_table == 10 || tinf->trans_table == 14 || tinf->trans_table == 15 || tinf->trans_table == 16 || tinf->trans_table == 22) return 0; /* GTG */ if(is_g(seq, n) == 1 && is_t(seq, n+1) == 1 && is_g(seq, n+2) == 1) { if(tinf->trans_table == 1 || tinf->trans_table == 3 || tinf->trans_table == 12 || tinf->trans_table == 22) return 0; return 1; } /* TTG */ if(is_t(seq, n) == 1 && is_t(seq, n+1) == 1 && is_g(seq, n+2) == 1) { if(tinf->trans_table < 4 || tinf->trans_table == 9 || tinf->trans_table >= 21) return 0; return 1; } /* We do not handle other initiation codons */ return 0; }
double gc_content(unsigned char *seq, int a, int b) { double sum = 0.0, gc = 0.0; int i; for(i = a; i <= b; i++) { if(is_g(seq, i) == 1 || is_c(seq, i) == 1) gc++; sum++; } return gc/sum; }
int is_stop(unsigned char *seq, int n, struct _training *tinf) { /* TAG */ if(is_t(seq, n) == 1 && is_a(seq, n+1) == 1 && is_g(seq, n+2) == 1) { if(tinf->trans_table == 6 || tinf->trans_table == 15 || tinf->trans_table == 16 || tinf->trans_table == 22) return 0; return 1; } /* TGA */ if(is_t(seq, n) == 1 && is_g(seq, n+1) == 1 && is_a(seq, n+2) == 1) { if((tinf->trans_table >= 2 && tinf->trans_table <= 5) || tinf->trans_table == 9 || tinf->trans_table == 10 || tinf->trans_table == 13 || tinf->trans_table == 14 || tinf->trans_table == 21) return 0; return 1; } /* TAA */ if(is_t(seq, n) == 1 && is_a(seq, n+1) == 1 && is_a(seq, n+2) == 1) { if(tinf->trans_table == 6 || tinf->trans_table == 14) return 0; return 1; } /* Code 2 */ if(tinf->trans_table == 2 && is_a(seq, n) == 1 && is_g(seq, n+1) == 1 && is_a(seq, n+2) == 1) return 1; if(tinf->trans_table == 2 && is_a(seq, n) == 1 && is_g(seq, n+1) == 1 && is_g(seq, n+2) == 1) return 1; /* Code 22 */ if(tinf->trans_table == 22 && is_t(seq, n) == 1 && is_c(seq, n+1) == 1 && is_a(seq, n+2) == 1) return 1; /* Code 23 */ if(tinf->trans_table == 23 && is_t(seq, n) == 1 && is_t(seq, n+1) == 1 && is_a(seq, n+2) == 1) return 1; return 0; }
//====================================================================== bool sccan_point_pair_handler:: is_string_a_sccan_run_config(std::string line){ if(verbosity){ std::cout<<"sccan_point_pair_handler -> "; std::cout<<"is_string_a_sccan_run_config() -> line: "; std::cout<<line<<std::endl; } // use DEAs for the std::string control // find enum TokenT{ digit, point, underscore, char_c, char_f, char_g, char_s, char_a, char_n, ERR }; int GetState[][23] ={ //dig point under c f g s a n { 1, -1, -1, -1, -1, -1, -1, -1, -1 }, //0 d { 2, -1, -1, -1, -1, -1, -1, -1, -1 }, //1 d { 3, -1, -1, -1, -1, -1, -1, -1, -1 }, //2 d { 4, -1, -1, -1, -1, -1, -1, -1, -1 }, //3 d { 5, -1, -1, -1, -1, -1, -1, -1, -1 }, //4 d { 6, -1, -1, -1, -1, -1, -1, -1, -1 }, //5 d { 7, -1, -1, -1, -1, -1, -1, -1, -1 }, //6 d { 8, -1, -1, -1, -1, -1, -1, -1, -1 }, //7 d { -1, -1, 9, -1, -1, -1, -1, -1, -1 }, //8 _ { 10, -1, -1, -1, -1, -1, -1, -1, -1 }, //9 d { 11, -1, -1, -1, -1, -1, -1, -1, -1 }, //10 d { 12, -1, -1, -1, -1, -1, -1, -1, -1 }, //11 d { 13, -1, -1, -1, -1, -1, -1, -1, -1 }, //12 d { -1, -1, 14, -1, -1, -1, -1, -1, -1 }, //13 _ { -1, -1, -1, -1, -1, -1, 15, -1, -1 }, //14 s { -1, -1, -1, 16, -1, -1, -1, -1, -1 }, //15 c { -1, -1, -1, 17, -1, -1, -1, -1, -1 }, //16 c { -1, -1, -1, -1, -1, -1, -1, 18, -1 }, //17 a { -1, -1, -1, -1, -1, -1, -1, -1, 19 }, //18 n { -1, -1, 20, -1, -1, -1, -1, -1, -1 }, //19 _ { 20, 21, -1, -1, -1, -1, -1, -1, -1 }, //20 d { -1, -1, -1, 22, -1, -1, -1, -1, -1 }, //21 . { -1, -1, -1, -1, 23, -1, -1, -1, -1 }, //22 c { -1, -1, -1, -1, -1, 24, -1, -1, -1 }, //23 f //24 g }; //for( int i=0; i<11;i++){std::cout<<"("<<i<<")"<<GetState[26][i]<<", ";} //std::cout<<std::endl; std::string ID; std::string tilt_x; std::string tilt_y; std::string tilt_z; std::string str_vec2D_x; std::string str_vec2D_y; int int_vec2D_count = 0; int state = 0; int char_number = 0; //std::cout<<"str laenge: "<<line.size()<<std::endl; while(state != -1 && char_number<line.size()){ TokenT token = ERR; char s = line.at( char_number); if(is_c(s)) token = char_c; if(is_f(s)) token = char_f; if(is_g(s)) token = char_g; if(is_s(s)) token = char_s; if(is_a(s)) token = char_a; if(is_n(s)) token = char_n; if(isdigit(s)) token = digit; //std::cout<<char(s)<<"==digit"<<std::endl;} if(is_point(s)) token = point; if(is_underscore(s)) token = underscore; //std::cout<<"check"<<char_number<<": "; //std::cout<<char(s)<<" state: "<<state<<" token: "<<token<<std::endl; state = (token == ERR) ? :GetState[state][token]; char_number ++; } if(state==24){return true;}else{return false;}; }
int shine_dalgarno_mm(unsigned char *seq, int pos, int start, double *rwt) { int i, j, k, mism, rdis, limit, max_val, cur_val = 0; double match[6], cur_ctr, dis_flag; limit = imin(6, start-4-pos); for(i = limit; i < 6; i++) match[i] = -10.0; /* Compare the 6-base region to AGGAGG */ for(i = 0; i < limit; i++) { if(pos+i < 0) continue; if(i % 3 == 0) { if(is_a(seq, pos+i) == 1) match[i] = 2.0; else match[i] = -3.0; } else { if(is_g(seq, pos+i) == 1) match[i] = 3.0; else match[i] = -2.0; } } /* Find the maximally scoring motif */ max_val = 0; for(i = limit; i >= 5; i--) { for(j = 0; j <= limit-i; j++) { cur_ctr = -2.0; mism = 0; for(k = j; k < j+i; k++) { cur_ctr += match[k]; if(match[k] < 0.0) mism++; if(match[k] < 0.0 && (k <= j+1 || k >= j+i-2)) cur_ctr -= 10.0; } if(mism != 1) continue; rdis = start - (pos+j+i); if(rdis < 5) { dis_flag = 1; } else if(rdis > 10 && rdis <= 12) { dis_flag = 2; } else if(rdis >= 13) { dis_flag = 3; } else dis_flag = 0; if(rdis > 15 || cur_ctr < 6.0) continue; /* Single-Mismatch RBS Motifs */ if(cur_ctr < 6.0) cur_val = 0; else if(cur_ctr == 6.0 && dis_flag == 3) cur_val = 2; else if(cur_ctr == 7.0 && dis_flag == 3) cur_val = 2; else if(cur_ctr == 9.0 && dis_flag == 3) cur_val = 3; else if(cur_ctr == 6.0 && dis_flag == 2) cur_val = 4; else if(cur_ctr == 6.0 && dis_flag == 1) cur_val = 5; else if(cur_ctr == 6.0 && dis_flag == 0) cur_val = 9; else if(cur_ctr == 7.0 && dis_flag == 2) cur_val = 7; else if(cur_ctr == 7.0 && dis_flag == 1) cur_val = 8; else if(cur_ctr == 7.0 && dis_flag == 0) cur_val = 14; else if(cur_ctr == 9.0 && dis_flag == 2) cur_val = 17; else if(cur_ctr == 9.0 && dis_flag == 1) cur_val = 18; else if(cur_ctr == 9.0 && dis_flag == 0) cur_val = 19; if(rwt[cur_val] < rwt[max_val]) continue; if(rwt[cur_val] == rwt[max_val] && cur_val < max_val) continue; max_val = cur_val; } } return max_val; }
int shine_dalgarno_exact(unsigned char *seq, int pos, int start, double *rwt) { int i, j, k, mism, rdis, limit, max_val, cur_val = 0; double match[6], cur_ctr, dis_flag; limit = imin(6, start-4-pos); for(i = limit; i < 6; i++) match[i] = -10.0; /* Compare the 6-base region to AGGAGG */ for(i = 0; i < limit; i++) { if(pos+i < 0) continue; if(i%3 == 0 && is_a(seq, pos+i) == 1) match[i] = 2.0; else if(i%3 != 0 && is_g(seq, pos+i) == 1) match[i] = 3.0; else match[i] = -10.0; } /* Find the maximally scoring motif */ max_val = 0; for(i = limit; i >= 3; i--) { for(j = 0; j <= limit-i; j++) { cur_ctr = -2.0; mism = 0; for(k = j; k < j+i; k++) { cur_ctr += match[k]; if(match[k] < 0.0) mism++; } if(mism > 0) continue; rdis = start - (pos+j+i); if(rdis < 5 && i < 5) dis_flag = 2; else if(rdis < 5 && i >= 5) dis_flag = 1; else if(rdis > 10 && rdis <= 12 && i < 5) dis_flag = 1; else if(rdis > 10 && rdis <= 12 && i >= 5) dis_flag = 2; else if(rdis >= 13) { dis_flag = 3; } else dis_flag = 0; if(rdis > 15 || cur_ctr < 6.0) continue; /* Exact-Matching RBS Motifs */ if(cur_ctr < 6.0) cur_val = 0; else if(cur_ctr == 6.0 && dis_flag == 2) cur_val = 1; else if(cur_ctr == 6.0 && dis_flag == 3) cur_val = 2; else if(cur_ctr == 8.0 && dis_flag == 3) cur_val = 3; else if(cur_ctr == 9.0 && dis_flag == 3) cur_val = 3; else if(cur_ctr == 6.0 && dis_flag == 1) cur_val = 6; else if(cur_ctr == 11.0 && dis_flag == 3) cur_val = 10; else if(cur_ctr == 12.0 && dis_flag == 3) cur_val = 10; else if(cur_ctr == 14.0 && dis_flag == 3) cur_val = 10; else if(cur_ctr == 8.0 && dis_flag == 2) cur_val = 11; else if(cur_ctr == 9.0 && dis_flag == 2) cur_val = 11; else if(cur_ctr == 8.0 && dis_flag == 1) cur_val = 12; else if(cur_ctr == 9.0 && dis_flag == 1) cur_val = 12; else if(cur_ctr == 6.0 && dis_flag == 0) cur_val = 13; else if(cur_ctr == 8.0 && dis_flag == 0) cur_val = 15; else if(cur_ctr == 9.0 && dis_flag == 0) cur_val = 16; else if(cur_ctr == 11.0 && dis_flag == 2) cur_val = 20; else if(cur_ctr == 11.0 && dis_flag == 1) cur_val = 21; else if(cur_ctr == 11.0 && dis_flag == 0) cur_val = 22; else if(cur_ctr == 12.0 && dis_flag == 2) cur_val = 20; else if(cur_ctr == 12.0 && dis_flag == 1) cur_val = 23; else if(cur_ctr == 12.0 && dis_flag == 0) cur_val = 24; else if(cur_ctr == 14.0 && dis_flag == 2) cur_val = 25; else if(cur_ctr == 14.0 && dis_flag == 1) cur_val = 26; else if(cur_ctr == 14.0 && dis_flag == 0) cur_val = 27; if(rwt[cur_val] < rwt[max_val]) continue; if(rwt[cur_val] == rwt[max_val] && cur_val < max_val) continue; max_val = cur_val; } } return max_val; }
/* Returns a single amino acid for this position */ char amino(unsigned char *seq, int n, struct _training *tinf, int is_init) { if(is_stop(seq, n, tinf) == 1) return '*'; if(is_start(seq, n, tinf) == 1 && is_init == 1) return 'M'; if(is_t(seq, n) == 1 && is_t(seq, n+1) == 1 && is_t(seq, n+2) == 1) return 'F'; if(is_t(seq, n) == 1 && is_t(seq, n+1) == 1 && is_c(seq, n+2) == 1) return 'F'; if(is_t(seq, n) == 1 && is_t(seq, n+1) == 1 && is_a(seq, n+2) == 1) return 'L'; if(is_t(seq, n) == 1 && is_t(seq, n+1) == 1 && is_g(seq, n+2) == 1) return 'L'; if(is_t(seq, n) == 1 && is_c(seq, n+1) == 1) return 'S'; if(is_t(seq, n) == 1 && is_a(seq, n+1) == 1 && is_t(seq, n+2) == 1) return 'Y'; if(is_t(seq, n) == 1 && is_a(seq, n+1) == 1 && is_c(seq, n+2) == 1) return 'Y'; if(is_t(seq, n) == 1 && is_a(seq, n+1) == 1 && is_a(seq, n+2) == 1) { if(tinf->trans_table == 6) return 'Q'; if(tinf->trans_table == 14) return 'Y'; } if(is_t(seq, n) == 1 && is_a(seq, n+1) == 1 && is_g(seq, n+2) == 1) { if(tinf->trans_table == 6 || tinf->trans_table == 15) return 'Q'; if(tinf->trans_table == 22) return 'L'; } if(is_t(seq, n) == 1 && is_g(seq, n+1) == 1 && is_t(seq, n+2) == 1) return 'C'; if(is_t(seq, n) == 1 && is_g(seq, n+1) == 1 && is_c(seq, n+2) == 1) return 'C'; if(is_t(seq, n) == 1 && is_g(seq, n+1) == 1 && is_a(seq, n+2) == 1) return 'W'; if(is_t(seq, n) == 1 && is_g(seq, n+1) == 1 && is_g(seq, n+2) == 1) return 'W'; if(is_c(seq, n) == 1 && is_t(seq, n+1) == 1 && is_t(seq, n+2) == 1) { if(tinf->trans_table == 3) return 'T'; return 'L'; } if(is_c(seq, n) == 1 && is_t(seq, n+1) == 1 && is_c(seq, n+2) == 1) { if(tinf->trans_table == 3) return 'T'; return 'L'; } if(is_c(seq, n) == 1 && is_t(seq, n+1) == 1 && is_a(seq, n+2) == 1) { if(tinf->trans_table == 3) return 'T'; return 'L'; } if(is_c(seq, n) == 1 && is_t(seq, n+1) == 1 && is_g(seq, n+2) == 1) { if(tinf->trans_table == 3) return 'T'; if(tinf->trans_table == 12) return 'S'; return 'L'; } if(is_c(seq, n) == 1 && is_c(seq, n+1) == 1) return 'P'; if(is_c(seq, n) == 1 && is_a(seq, n+1) == 1 && is_t(seq, n+2) == 1) return 'H'; if(is_c(seq, n) == 1 && is_a(seq, n+1) == 1 && is_c(seq, n+2) == 1) return 'H'; if(is_c(seq, n) == 1 && is_a(seq, n+1) == 1 && is_a(seq, n+2) == 1) return 'Q'; if(is_c(seq, n) == 1 && is_a(seq, n+1) == 1 && is_g(seq, n+2) == 1) return 'Q'; if(is_c(seq, n) == 1 && is_g(seq, n+1) == 1) return 'R'; if(is_a(seq, n) == 1 && is_t(seq, n+1) == 1 && is_t(seq, n+2) == 1) return 'I'; if(is_a(seq, n) == 1 && is_t(seq, n+1) == 1 && is_c(seq, n+2) == 1) return 'I'; if(is_a(seq, n) == 1 && is_t(seq, n+1) == 1 && is_a(seq, n+2) == 1) { if(tinf->trans_table == 2 || tinf->trans_table == 3 || tinf->trans_table == 5 || tinf->trans_table == 13 || tinf->trans_table == 21) return 'M'; return 'I'; } if(is_a(seq, n) == 1 && is_t(seq, n+1) == 1 && is_g(seq, n+2) == 1) return 'M'; if(is_a(seq, n) == 1 && is_c(seq, n+1) == 1) return 'T'; if(is_a(seq, n) == 1 && is_a(seq, n+1) == 1 && is_t(seq, n+2) == 1) return 'N'; if(is_a(seq, n) == 1 && is_a(seq, n+1) == 1 && is_c(seq, n+2) == 1) return 'N'; if(is_a(seq, n) == 1 && is_a(seq, n+1) == 1 && is_a(seq, n+2) == 1) { if(tinf->trans_table == 9 || tinf->trans_table == 14 || tinf->trans_table == 21) return 'N'; return 'K'; } if(is_a(seq, n) == 1 && is_a(seq, n+1) == 1 && is_g(seq, n+2) == 1) return 'K'; if(is_a(seq, n) == 1 && is_g(seq, n+1) == 1 && is_t(seq, n+2) == 1) return 'S'; if(is_a(seq, n) == 1 && is_g(seq, n+1) == 1 && is_c(seq, n+2) == 1) return 'S'; if(is_a(seq, n) == 1 && is_g(seq, n+1) == 1 && (is_a(seq, n+2) == 1 || is_g(seq, n+2) == 1)) { if(tinf->trans_table == 13) return 'G'; if(tinf->trans_table == 5 || tinf->trans_table == 9 || tinf->trans_table == 14 || tinf->trans_table == 21) return 'S'; return 'R'; } if(is_g(seq, n) == 1 && is_t(seq, n+1) == 1) return 'V'; if(is_g(seq, n) == 1 && is_c(seq, n+1) == 1) return 'A'; if(is_g(seq, n) == 1 && is_a(seq, n+1) == 1 && is_t(seq, n+2) == 1) return 'D'; if(is_g(seq, n) == 1 && is_a(seq, n+1) == 1 && is_c(seq, n+2) == 1) return 'D'; if(is_g(seq, n) == 1 && is_a(seq, n+1) == 1 && is_a(seq, n+2) == 1) return 'E'; if(is_g(seq, n) == 1 && is_a(seq, n+1) == 1 && is_g(seq, n+2) == 1) return 'E'; if(is_g(seq, n) == 1 && is_g(seq, n+1) == 1) return 'G'; return 'X'; }
int is_ttg(unsigned char *seq, int n) { if(is_t(seq, n) == 0 || is_t(seq, n+1) == 0 || is_g(seq, n+2) == 0) return 0; return 1; }