bool SeqPath::check_if_correct(const string& str, const Config *config) const { const vector<mass_t>& aa2mass = config->get_aa2mass(); const char *path_str = seq_str.c_str(); const char *corr_str = str.c_str(); int len_path_str = strlen(path_str); int len_corr_str = strlen(corr_str); if (len_path_str>len_corr_str) return false; int i; for (i=0; i<=len_corr_str-len_path_str; i++) { int j; bool correct_seq = true; for (j=0; j<len_path_str; j++) if (! (path_str[j] == corr_str[i+j] || (path_str[j] == 'I' && corr_str[i+j]== 'L') || (path_str[j] == 'L' && corr_str[i+j]== 'I') || (path_str[j] == 'Q' && corr_str[i+j]== 'K') || (path_str[j] == 'K' && corr_str[i+j]== 'Q') ) ) { correct_seq = false; break; } if (correct_seq) { // check prefix mass Peptide pep; pep.parseFromString(config,corr_str); const vector<int>& aas= pep.get_amino_acids(); mass_t mass=0; int j; if (n_term_mass == 0 && i==0) return true; for (j=0; j<aas.size(); j++) { mass+=aa2mass[aas[j]]; if (fabs(mass-this->n_term_mass)<6) return true; if (mass>n_term_mass) break; } } } return false; }
int SeqPath::get_num_correct_aas(const Peptide& pep, const Config *config) const { const vector<mass_t>& aa2mass = config->get_aa2mass(); const vector<int>& pep_aas = pep.get_amino_acids(); int num_correct=0; int i; vector<mass_t> pep_masses; vector<int> path_aas; get_amino_acids(path_aas); pep_masses.resize(pep_aas.size(),0); for (i=1; i<pep_aas.size(); i++) pep_masses[i]=pep_masses[i-1]+aa2mass[pep_aas[i-1]]; mass_t path_mass = n_term_mass; for (i=0; i<path_aas.size(); i++) { const int path_aa = path_aas[i]; int j; for (j=0; j<pep_aas.size(); j++) { const int pep_aa = pep_aas[j]; if (fabs(pep_masses[j]-path_mass)<1.0 && pep_aas[j] == path_aas[i]) { num_correct++; break; } } path_mass += aa2mass[path_aas[i]]; } return num_correct; }
int Peptide::calc_number_of_correct_aas(Config *config,const Peptide& other) const { const vector<int>& other_amino_acids = other.get_amino_acids(); const int num_aas = amino_acids.size(); const int num_other_aas = other_amino_acids.size(); vector<mass_t> this_breakages,other_breakages; calc_expected_breakage_masses(config,this_breakages); other.calc_expected_breakage_masses(config,other_breakages); int this_idx=0; int other_idx=0; int num_correct_aas=0; while (this_idx<num_aas && other_idx<num_other_aas) { if (fabs(this_breakages[this_idx]-other_breakages[other_idx])<1.0) { if (amino_acids[this_idx]==other_amino_acids[other_idx]) num_correct_aas++; this_idx++; other_idx++; } else { if (this_breakages[this_idx]<other_breakages[other_idx]) { this_idx++; } else other_idx++; } } return num_correct_aas; }
void AdvancedScoreModel::score_peptide_node_combos(PrmGraph *prm, const Peptide& peptide ) const { const vector<int>& org_aas = config.get_org_aa(); const vector<mass_t>& aa2mass = config.get_aa2mass(); const vector<MultiEdge>& multi_edges = prm->get_multi_edges(); const int num_nodes = prm->get_num_nodes(); const vector<int>& pep_aas = peptide.get_amino_acids(); const int num_pep_aas = pep_aas.size(); mass_t p_mass=0; int aa_idx=0; int i; for (i=0; i<num_nodes; i++) { Node& node = prm->get_non_const_node(i); const RegionalScoreModel& score_model = regional_breakage_score_models[prm->get_charge()][prm->get_size_idx()][node.breakage.region_idx]; int in_edge_idx=NEG_INF, in_edge_variant=NEG_INF; int out_edge_idx=NEG_INF, out_edge_variant=NEG_INF; // cout << "N: " << node.mass << endl; while (aa_idx<pep_aas.size() && fabs(p_mass-node.mass)>0.1) { p_mass += aa2mass[pep_aas[aa_idx]]; aa_idx++; // cout << aa_idx << "\t" << p_mass << endl; } if (aa_idx == pep_aas.size() && i != num_nodes-1) { int j; for (j=0; j<num_nodes; j++) cout << j << "\t" << prm->get_node(j).mass << endl; cout << endl << "PEP:" << endl; vector<mass_t> exp_masses; peptide.calc_expected_breakage_masses((Config *)&config,exp_masses); for (j=0; j<exp_masses.size(); j++) cout << j << "\t" << exp_masses[j] << endl; cout << "Error: mismatch between nodes and peptide!" << endl; exit(1); } if (node.in_edge_idxs.size()>0) { int j; for (j=0; j<node.in_edge_idxs.size(); j++) { const int edge_idx = node.in_edge_idxs[j]; const MultiEdge& in_edge = multi_edges[edge_idx]; const int num_aa = in_edge.num_aa; if (num_aa>aa_idx) continue; const int var_idx = in_edge.get_variant_idx(num_aa,&pep_aas[aa_idx-num_aa]); if (var_idx<0) continue; in_edge_idx = edge_idx; in_edge_variant = var_idx; break; } } if (node.out_edge_idxs.size()>0) { int j; for (j=0; j<node.out_edge_idxs.size(); j++) { const int edge_idx = node.out_edge_idxs[j]; const MultiEdge& out_edge = multi_edges[edge_idx]; const int num_aa = out_edge.num_aa; if (num_aa + aa_idx >num_pep_aas) continue; const int var_idx = out_edge.get_variant_idx(num_aa,&pep_aas[aa_idx]); if (var_idx<0) continue; out_edge_idx = edge_idx; out_edge_variant = var_idx; break; } } BreakageInfo info; prm->fill_breakage_info(this,&info,i,in_edge_idx,in_edge_variant,out_edge_idx,out_edge_variant); node.score_combos.clear(); // cout << in_edge_idx << " " << in_edge_variant << " " << out_edge_idx << " " << out_edge_variant << "\t"; info.score = score_model.score_a_single_breakage_combo(prm, node, &node.breakage, info); node.score_combos[ScoreComboLoc(info)]=info.score; node.score = info.score; node.breakage.score = node.score; // cout << node.score << endl; } prm->set_has_node_combo_scores(true); }
/************************************************************************** Returns the global edit distance between two peptides. Gap cost = 1. d(I,L)=0 d(K,Q)=0 d(F/M*)=0 d(N,D)=0.5; d(X,X)=0 d(X,Y)=1 ***************************************************************************/ float Peptide::peptide_edit_distance(Config *config, Peptide& other_pep) const { int i; vector<int> other_aa = other_pep.get_amino_acids(); const int *pep1 = &amino_acids[0]; const int pep_len1 = amino_acids.size(); const int *pep2 = &other_aa[0]; const int pep_len2 = other_aa.size(); const int max_width = 5; const int ox_met_aa = config->get_aa_from_label(string("M+16")); float row1[max_width*2+1], row2[max_width*2+1]; float *old_row, *new_row; if (abs(pep_len1-pep_len2)>=max_width) return pep_len1; // check that no little switch of two aa can make them the same if (pep_len1 == pep_len2) { int i; int err_pos=-1; int errs=0; for (i=0; i<pep_len1; i++) { if ( (pep1[i] != pep2[i]) && ! (((pep1[i]==Ile ||pep1[i]==Leu) && (pep2[i]==Ile || pep2[i]==Leu)) || ((pep1[i]==Gln || pep1[i]==Lys) && (pep2[i]==Gln || pep2[i]==Lys)) ) ) { err_pos=i; errs++; } if (errs>2) break; } if (errs == 0) return 0; if (errs == 2) { if ( (pep1[err_pos] == pep2[err_pos-1]) && (pep1[err_pos-1] == pep2[err_pos])) return 1; } } old_row=row1; new_row=row2; for (i=0; i<max_width; i++) { old_row[i]=9999; old_row[i+max_width]=i; new_row[i]=9999; new_row[i+max_width]=9999; } old_row[2*max_width]=9999; new_row[2*max_width]=9999; int start_new_row = max_width-1; for (i=1; i<=pep_len1; i++) { int j; if (start_new_row>0) { new_row[start_new_row]=i; new_row[--start_new_row]=9999; } else new_row[0] = 9999; for (j=1; j<= 2*max_width; j++) { int p2_pos = i + j - max_width; if (p2_pos<1) continue; if (p2_pos>pep_len2) break; float v1,v2,v3,dxy=0; int p1=pep1[i-1],p2=pep2[p2_pos-1]; if (p1 != p2) { dxy=1; if ( ((p1==Ile || p1==Leu) && (p2==Ile || p2==Leu)) || ((p1==Gln || p1==Lys) && (p2==Gln || p2==Lys)) ) { dxy=0; } else if ( ((p1==Asn && p2==Asp) || (p1==Asp && p2==Asn)) || ((pep1[i]==Gln && p2==Glu) || (p1==Glu && p2==Gln)) || ((pep1[i]==Lys && p2==Glu) || (p1==Glu && p2==Lys)) || ((pep1[i]==Ile && p2==Asn) || (p1==Asn && p2==Ile)) || ((pep1[i]==Leu && p2==Asn) || (p1==Asn && p2==Leu)) ) { dxy=0.5; } else if (ox_met_aa>0 && (( p1 == ox_met_aa && p2 == Phe) || ( p1 == Phe && p2 == ox_met_aa)) ) { dxy = 0; } } v1= old_row[j]+dxy; v2= new_row[j-1]+1; v3= (p2_pos<pep_len2 && j<2*max_width) ? old_row[j+1]+1 : 9999; new_row[j]=v1; if (new_row[j]>v2) new_row[j]=v2; if (new_row[j]>v3) new_row[j]=v3; } float *tmp; tmp = old_row; old_row = new_row; new_row = tmp; } return old_row[pep_len2-pep_len1+max_width]; }