bool SeqPath::check_if_correct(const string& str, const Config *config) const { const vector<mass_t>& aa2mass = config->get_aa2mass(); const char *path_str = seq_str.c_str(); const char *corr_str = str.c_str(); int len_path_str = strlen(path_str); int len_corr_str = strlen(corr_str); if (len_path_str>len_corr_str) return false; int i; for (i=0; i<=len_corr_str-len_path_str; i++) { int j; bool correct_seq = true; for (j=0; j<len_path_str; j++) if (! (path_str[j] == corr_str[i+j] || (path_str[j] == 'I' && corr_str[i+j]== 'L') || (path_str[j] == 'L' && corr_str[i+j]== 'I') || (path_str[j] == 'Q' && corr_str[i+j]== 'K') || (path_str[j] == 'K' && corr_str[i+j]== 'Q') ) ) { correct_seq = false; break; } if (correct_seq) { // check prefix mass Peptide pep; pep.parseFromString(config,corr_str); const vector<int>& aas= pep.get_amino_acids(); mass_t mass=0; int j; if (n_term_mass == 0 && i==0) return true; for (j=0; j<aas.size(); j++) { mass+=aa2mass[aas[j]]; if (fabs(mass-this->n_term_mass)<6) return true; if (mass>n_term_mass) break; } } } return false; }
PepData create5PeptideData() { using Peptide::Formula; PepData peptides; peptides.push_back(make_pair("Angiotensin II", Formula(50, 71, 13, 12))); peptides.push_back(make_pair("Bombesin", Formula(71, 110, 24, 18, 1))); peptides.push_back(make_pair("Substance P", Formula(63, 98, 18, 13, 1))); peptides.push_back(make_pair("Neurotensin", Formula(78, 121, 21, 20))); peptides.push_back(make_pair("Alpha1-6", Formula(45, 59, 11, 8))); return peptides; }
inline vector<DigestedPeptide> find_all(const Peptide& peptide) { typedef boost::iterator_range<string::const_iterator> const_string_iterator_range; digest(); // populate sites_ member if necessary const string& sequence_ = peptide_.sequence(); vector<DigestedPeptide> result; if ((int) peptide.sequence().length() > config_.maximumLength || (int) peptide.sequence().length() < config_.minimumLength) return result; vector<const_string_iterator_range> instances; bal::find_all(instances, sequence_, peptide.sequence()); BOOST_FOREACH(const_string_iterator_range& range, instances) { size_t beginOffset = range.begin() - sequence_.begin(); size_t endOffset = beginOffset + peptide.sequence().length() - 1; bool NTerminusIsSpecific = sitesSet_.count(int(beginOffset) - 1) > 0; bool CTerminusIsSpecific = sitesSet_.count(int(endOffset)) > 0; if (((size_t) NTerminusIsSpecific + (size_t) CTerminusIsSpecific) < (size_t) config_.minimumSpecificity) continue; size_t missedCleavages = 0; for (size_t i = beginOffset; i < endOffset; ++i) if (sitesSet_.count((int) i) > 0) ++missedCleavages; if (missedCleavages > (size_t) config_.maximumMissedCleavages) continue; string NTerminusPrefix, CTerminusSuffix; if (beginOffset > 0) NTerminusPrefix = sequence_.substr(beginOffset-1, 1); if (endOffset+1 < sequence_.length()) CTerminusSuffix = sequence_.substr(endOffset+1, 1); result.push_back(DigestedPeptide(peptide, beginOffset, missedCleavages, NTerminusIsSpecific, CTerminusIsSpecific, NTerminusPrefix, CTerminusSuffix)); }
//count and return the total number inter- peptide strand hydrogen bonds in a snapshot int countInterpeptideHB(vector<Peptide*> &peptides, double box[3]){ int totalNumInter=0; int totalNumPep = peptides.size(); for(int i=0; i < totalNumPep; i++) { Peptide* pepA = peptides.at(i); for(int n=0; n < numRes; n++) { double NH_pepA[3], H_pepA[3], O_pepA[3], C_pepA[3]; pepA->getNH(n,NH_pepA); pepA->getHofNH(n,H_pepA); pepA->getO(n,O_pepA); pepA->getC(n,C_pepA); for(int j = i+1; j < totalNumPep; j++) { Peptide* pepB = peptides.at(j); for(int m=0; m < numRes; m++) { if(pepB->isHbondedNH(m,O_pepA, C_pepA, box)){ totalNumInter++; } if(pepB->isHbondedCO(m,NH_pepA, H_pepA, box)){ totalNumInter++; } } } } } return totalNumInter; }
int MultiPath::get_num_correct_aas(const PrmGraph& prm, const Peptide& p, Config *config) const { const mass_t tolerance = config->getTolerance() * 1.25; vector<mass_t> break_masses; int idx=0; int num_correct=0; int i; p.calc_expected_breakage_masses(config,break_masses); for (i=0; i<breakages.size(); i++) { const mass_t& mass = breakages[i]->mass; const mass_t max_mass = mass + tolerance; const mass_t min_mass = mass - tolerance; while (idx < break_masses.size() && break_masses[idx] < min_mass) idx++; if (break_masses[idx]>max_mass) continue; if (idx<breakages.size()-1 && edge_idxs[idx]>=0) num_correct += prm.get_multi_edge(edge_idxs[idx]).num_aa; } return num_correct; }
std::map<int, double> IonizationEfficiencySimulator::calc_charge_distribution(const Peptide &peptide) { std::map<int,double> charge_to_percentage; int num_basic = 1+peptide.num_basic_residues(); boost::math::binomial bd(num_basic, .7+(g_uniform_distribution(g_rng)*.3)); // success rate in [0.7,1.0] for (int i = 1; i <= num_basic; i++) { charge_to_percentage[i] = boost::math::pdf(bd,i); //std::cout << i << " " << num_basic << " " << boost::math::pdf(bd,i) << std::endl; } return charge_to_percentage; }
int Peptide::calc_number_of_correct_aas(Config *config,const Peptide& other) const { const vector<int>& other_amino_acids = other.get_amino_acids(); const int num_aas = amino_acids.size(); const int num_other_aas = other_amino_acids.size(); vector<mass_t> this_breakages,other_breakages; calc_expected_breakage_masses(config,this_breakages); other.calc_expected_breakage_masses(config,other_breakages); int this_idx=0; int other_idx=0; int num_correct_aas=0; while (this_idx<num_aas && other_idx<num_other_aas) { if (fabs(this_breakages[this_idx]-other_breakages[other_idx])<1.0) { if (amino_acids[this_idx]==other_amino_acids[other_idx]) num_correct_aas++; this_idx++; other_idx++; } else { if (this_breakages[this_idx]<other_breakages[other_idx]) { this_idx++; } else other_idx++; } } return num_correct_aas; }
int SeqPath::get_num_correct_aas(const Peptide& pep, const Config *config) const { const vector<mass_t>& aa2mass = config->get_aa2mass(); const vector<int>& pep_aas = pep.get_amino_acids(); int num_correct=0; int i; vector<mass_t> pep_masses; vector<int> path_aas; get_amino_acids(path_aas); pep_masses.resize(pep_aas.size(),0); for (i=1; i<pep_aas.size(); i++) pep_masses[i]=pep_masses[i-1]+aa2mass[pep_aas[i-1]]; mass_t path_mass = n_term_mass; for (i=0; i<path_aas.size(); i++) { const int path_aa = path_aas[i]; int j; for (j=0; j<pep_aas.size(); j++) { const int pep_aa = pep_aas[j]; if (fabs(pep_masses[j]-path_mass)<1.0 && pep_aas[j] == path_aas[i]) { num_correct++; break; } } path_mass += aa2mass[path_aas[i]]; } return num_correct; }
bool MultiPath::check_if_correct(const Peptide& p, Config *config) const { const mass_t tolerance = config->getTolerance() * 1.25; vector<mass_t> break_masses; int idx=0; int i; p.calc_expected_breakage_masses(config,break_masses); for (i=0; i<breakages.size(); i++) { const mass_t& mass = breakages[i]->mass; const mass_t max_mass = mass + tolerance; const mass_t min_mass = mass - tolerance; while (idx < break_masses.size() && break_masses[idx] < min_mass) idx++; if (break_masses[idx]>max_mass) return false; } return true; }
void AllScoreModels::predict_fragmentation( const char* input_file, size_t num_peaks) { FILE* stream = fopen(input_file,"r"); if (! stream) { cout << "Error: couldn't open file for reading: " << input_file << endl; exit(1); } PeptideRankScorer *dnv_rank = (PeptideRankScorer *)get_rank_model_ptr(1); PeakRankModel *prm = this->get_peak_prediction_model_ptr(3); char buffer[128]; char pep_str[128]; while (fgets(buffer,128,stream)) { int charge; if (sscanf(buffer,"%s %d",pep_str,&charge) != 2) continue; cout << ">> " << pep_str << "\t" << charge << endl; if (charge<1 || charge>=prm->get_size_thresholds().size()) { cout << "Invalid charge!" << endl; continue; } Peptide pep; pep.parseFromString(&config_,static_cast<string>(pep_str)); PeptideSolution sol; sol.pep = pep; sol.reaches_n_terminal=true; sol.reaches_c_terminal=true; sol.charge = charge; sol.pm_with_19 = pep.get_mass_with_19(); PeptidePeakPrediction ppp; prm->calc_peptide_predicted_scores(sol, ppp); const size_t num_frags = ppp.frag_idxs.size(); vector< vector<int> > predicted_ranks; calc_combined_peak_ranks(ppp.rank_scores, predicted_ranks); vector<PeakTuple> tuples; for (size_t f=0; f<num_frags; f++) for (size_t i=0; i<ppp.rank_scores[f].size(); i++) if (predicted_ranks[f][i]<999) { PeakTuple pt; pt.frag_idx = f; pt.pos =i; pt.rank = predicted_ranks[f][i]; pt.score = ppp.rank_scores[f][i]; tuples.push_back(pt); } sort(tuples.begin(),tuples.end()); if (tuples.size()<1) continue; const size_t num_aas = pep.get_num_aas(); vector<mass_t> breakage_masses; pep.calc_expected_breakage_masses(&config_, breakage_masses); cout << fixed << "Rank\tIon\tm/z\tScore" << endl; for (size_t i=0; i<num_peaks && i<tuples.size(); i++) { PeakTuple pt = tuples[i]; cout << i+1 << "\t"; const FragmentType& ft = config_.get_fragment(ppp.frag_idxs[pt.frag_idx]); cout << ft.label << ":" << (ft.orientation == PREFIX ? pt.pos : num_aas - pt.pos) << "\t"; mass_t mz = ft.calc_expected_mass(breakage_masses[pt.pos],pep.get_mass_with_19()); cout << setprecision(2); if (mz<100) cout << " "; if (mz<1000) cout << " "; cout << mz << "\t"; cout << setprecision(3) << pt.score << endl; } cout << endl; } fclose(stream); }
void AdvancedScoreModel::score_peptide_node_combos(PrmGraph *prm, const Peptide& peptide ) const { const vector<int>& org_aas = config.get_org_aa(); const vector<mass_t>& aa2mass = config.get_aa2mass(); const vector<MultiEdge>& multi_edges = prm->get_multi_edges(); const int num_nodes = prm->get_num_nodes(); const vector<int>& pep_aas = peptide.get_amino_acids(); const int num_pep_aas = pep_aas.size(); mass_t p_mass=0; int aa_idx=0; int i; for (i=0; i<num_nodes; i++) { Node& node = prm->get_non_const_node(i); const RegionalScoreModel& score_model = regional_breakage_score_models[prm->get_charge()][prm->get_size_idx()][node.breakage.region_idx]; int in_edge_idx=NEG_INF, in_edge_variant=NEG_INF; int out_edge_idx=NEG_INF, out_edge_variant=NEG_INF; // cout << "N: " << node.mass << endl; while (aa_idx<pep_aas.size() && fabs(p_mass-node.mass)>0.1) { p_mass += aa2mass[pep_aas[aa_idx]]; aa_idx++; // cout << aa_idx << "\t" << p_mass << endl; } if (aa_idx == pep_aas.size() && i != num_nodes-1) { int j; for (j=0; j<num_nodes; j++) cout << j << "\t" << prm->get_node(j).mass << endl; cout << endl << "PEP:" << endl; vector<mass_t> exp_masses; peptide.calc_expected_breakage_masses((Config *)&config,exp_masses); for (j=0; j<exp_masses.size(); j++) cout << j << "\t" << exp_masses[j] << endl; cout << "Error: mismatch between nodes and peptide!" << endl; exit(1); } if (node.in_edge_idxs.size()>0) { int j; for (j=0; j<node.in_edge_idxs.size(); j++) { const int edge_idx = node.in_edge_idxs[j]; const MultiEdge& in_edge = multi_edges[edge_idx]; const int num_aa = in_edge.num_aa; if (num_aa>aa_idx) continue; const int var_idx = in_edge.get_variant_idx(num_aa,&pep_aas[aa_idx-num_aa]); if (var_idx<0) continue; in_edge_idx = edge_idx; in_edge_variant = var_idx; break; } } if (node.out_edge_idxs.size()>0) { int j; for (j=0; j<node.out_edge_idxs.size(); j++) { const int edge_idx = node.out_edge_idxs[j]; const MultiEdge& out_edge = multi_edges[edge_idx]; const int num_aa = out_edge.num_aa; if (num_aa + aa_idx >num_pep_aas) continue; const int var_idx = out_edge.get_variant_idx(num_aa,&pep_aas[aa_idx]); if (var_idx<0) continue; out_edge_idx = edge_idx; out_edge_variant = var_idx; break; } } BreakageInfo info; prm->fill_breakage_info(this,&info,i,in_edge_idx,in_edge_variant,out_edge_idx,out_edge_variant); node.score_combos.clear(); // cout << in_edge_idx << " " << in_edge_variant << " " << out_edge_idx << " " << out_edge_variant << "\t"; info.score = score_model.score_a_single_breakage_combo(prm, node, &node.breakage, info); node.score_combos[ScoreComboLoc(info)]=info.score; node.score = info.score; node.breakage.score = node.score; // cout << node.score << endl; } prm->set_has_node_combo_scores(true); }
// readFromCurFile - reads one .ms2 file void SpectraSTMs2LibImporter::readFromFile(string& impFileName) { ifstream fin; if (!myFileOpen(fin, impFileName)) { g_log->error("CREATE", "Cannot open .ms2 file \"" + impFileName + "\" for reading. File skipped."); return; } g_log->log("MS2 IMPORT", "Importing .ms2 file \"" + impFileName + "\"."); if (g_verbose) { cout << "\nImporting spectra from .ms2 library file..." << endl; } // start the progress count ProgressCount pc(!g_quiet && !g_verbose, 500, 0); pc.start("\nImporting spectra from .ms2 library file"); string line(""); unsigned int scan1 = 0; unsigned int scan2 = 0; double precursorMz = 0.0; int charge = 1; double mw = 0.0; string dummy(""); string seq(""); string modifiedSeq(""); SpectraSTPeakList* peakList = NULL; // looks like their cysteines are actually C[160]'s // Peptide::addModTokenToTables("C", "Carbamidomethyl"); while (nextLine(fin, line)) { if (line.empty()) continue; string::size_type pos = 0; if (line[0] == 'H') { continue; } else if (line[0] == 'S') { if (peakList) { if (peakList->getNumPeaks() == 0 || seq.empty()) { delete peakList; } else { pc.increment(); if (!(modifiedSeq.empty())) seq = modifiedSeq; Peptide* pep = new Peptide(seq, charge); // check legality of peptide and mod strings -- will not insert later if illegal (parsing will continue anyway) if (pep->hasUnknownMod) { g_log->error("MS2 IMPORT", "Peptide ID with unknown modification: \"" + seq + "\". Entry skipped."); } if (pep->illegalPeptideStr && !pep->hasUnknownMod) { g_log->error("MS2 IMPORT", "Illegal peptide ID string: \"" + seq + "\". Entry skipped."); } stringstream commentss; commentss << "Fullname=X." << seq << ".X/" << charge; commentss << " ScanNum=" << scan1 << '.' << scan2; commentss << " Spec=Raw"; SpectraSTLibEntry* entry = new SpectraSTLibEntry(pep, commentss.str(), "Normal", peakList); if (g_verbose) { cout << "Importing record " << m_count << ": " << pep->interactStyleWithCharge() << endl; } m_count++; if (passAllFilters(entry)) { entry->annotatePeaks(); m_lib->insertEntry(entry); } delete (entry); } } seq = ""; modifiedSeq = ""; peakList = NULL; scan1 = atoi(nextToken(line, 1, pos, " \t\r\n", " \t\r\n").c_str()); scan2 = atoi(nextToken(line, pos, pos, " \t\r\n", " \t\r\n").c_str()); precursorMz = atof(nextToken(line, pos, pos, " \t\r\n", " \t\r\n").c_str()); } else if (line[0] == 'Z') { charge = atoi(nextToken(line, 1, pos, " \t\r\n", " \t\r\n").c_str()); mw = atof(nextToken(line, pos, pos, " \t\r\n", " \t\r\n").c_str()); } else if (line[0] == 'D') { dummy = nextToken(line, 1, pos, "\t\r\n", " \t\r\n"); if (dummy == "seq") { seq = nextToken(line, pos, pos, "\r\n", " \t\r\n"); } else if (dummy == "modified seq") { modifiedSeq = nextToken(line, pos, pos, "\r\n", " \t\r\n"); } } else { // should be a peak if (!peakList) { peakList = new SpectraSTPeakList(precursorMz, 0); } double mz = atof(nextToken(line, 0, pos, " \t\r\n", " \t\r\n").c_str()); float intensity = atof(nextToken(line, pos, pos, " \t\r\n", " \t\r\n").c_str()); peakList->insert(mz, intensity, "", ""); } } // finish last record if (peakList) { if (peakList->getNumPeaks() == 0 || seq.empty()) { delete peakList; } else { pc.increment(); if (!(modifiedSeq.empty())) seq = modifiedSeq; Peptide* pep = new Peptide(seq, charge); if (pep->hasUnknownMod) { g_log->error("MS2 IMPORT", "Peptide ID with unknown modification: \"" + seq + "\". Entry skipped."); } if (pep->illegalPeptideStr && !pep->hasUnknownMod) { g_log->error("MS2 IMPORT", "Illegal peptide ID string: \"" + seq + "\". Entry skipped."); } stringstream commentss; commentss << "Fullname=X." << seq << ".X/" << charge; commentss << " ScanNum=" << scan1 << '.' << scan2; commentss << " Spec=Raw"; SpectraSTLibEntry* entry = new SpectraSTLibEntry(pep, commentss.str(), "Normal", peakList); if (g_verbose) { cout << "Importing record " << m_count << ": " << pep->interactStyleWithCharge() << endl; } m_count++; if (passAllFilters(entry)) { entry->annotatePeaks(); m_lib->insertEntry(entry); } delete (entry); } } pc.done(); }
bool InspectResultsLine::parse_from_fields(Config *config, const vector<string>& fields) { if (fields.size() < 20) { cout<< "Error: inspect results line has " << fields.size() << ", expecting 20-22" << endl; exit(1); } SpectrumFile = fields[0]; if (sscanf(fields[1].c_str(),"%d",&scan) != 1 || scan<0 || scan>100000000) error("scan"); Annotation = fields[2]; Protein = fields[3]; if (sscanf(fields[4].c_str(),"%d",&Charge) != 1 || Charge<0 || Charge>20) error("Charge"); if (sscanf(fields[5].c_str(),"%f",&MQScore) != 1 || MQScore<NEG_INF || MQScore>POS_INF) error("MQScore"); if (sscanf(fields[6].c_str(),"%d",&Length) != 1 || Length<1 || Length>POS_INF) error("Length"); if (sscanf(fields[7].c_str(),"%f",&TotalPRMScore) != 1 || TotalPRMScore<NEG_INF || TotalPRMScore>POS_INF) error("TotalPRMScore"); if (sscanf(fields[8].c_str(),"%f",&MedianPRMScore) != 1 || MedianPRMScore<NEG_INF || MedianPRMScore>POS_INF) error("MedianPRMScore"); if (sscanf(fields[9].c_str(),"%f",&FractionY) != 1 || FractionY<0 || FractionY>1000) error("FractionY"); if (sscanf(fields[10].c_str(),"%f",&FractionB) != 1 || FractionB<0 || FractionB>1000) error("FractionB"); if (sscanf(fields[11].c_str(),"%f",&Intensity) != 1 || Intensity<0) error("Intensity"); if (sscanf(fields[12].c_str(),"%d",&NTT) != 1 || NTT<0 || NTT>3) error("NTT"); if (sscanf(fields[13].c_str(),"%f",&p_value) != 1) error("p_value"); if (sscanf(fields[14].c_str(),"%f",&F_Score) != 1) error("F_Score"); if (sscanf(fields[15].c_str(),"%f",&DeltaScore) != 1) error("DeltaScore"); if (sscanf(fields[16].c_str(),"%f",&DeltaScoreOther) != 1) error("DeltaScoreOther"); if (sscanf(fields[17].c_str(),"%d",&RecordNumber) != 1) error("RecordNumber"); if (sscanf(fields[18].c_str(),"%d",&DBFilePos) != 1) error("DBFilePos"); if (sscanf(fields[19].c_str(),"%d",&SpecFilePos) != 1) error("SpecFilePos"); if (fields.size()>20 && sscanf(fields[20].c_str(),"%f",&PrecursorMz) != 1) error("SpecFilePos"); if (fields.size()>21 && sscanf(fields[21].c_str(),"%f",&PrecursorError) != 1) error("SpecFilePos"); Score = MQScore; const vector<int>& char2aa = config->get_char2aa(); const int ann_length = Annotation.length(); if ((Annotation[1] != '.') || (Annotation[ann_length-2] != '.')) { cout << "Error: bad annotation format: " << Annotation << endl; cout << "Expecting X.XXXXXXXXX.X" << endl; cout << "Ann1 : " << Annotation[1] << endl; cout << "Ann n-2: " << Annotation[ann_length-2] << endl; exit(1); } // cout << "|" << Annotation << "|" << endl; aaBefore = char2aa[Annotation[0]]; aaAfter = char2aa[Annotation[ann_length-1]]; pep.parseFromString(config,Annotation.substr(2,ann_length-4)); return true; }
int main(int argc, char* argv[]) { if(argc < 5){ cerr<<"usage: peptide_analysis_testing_simple <gro-file> <num-pep> <num-ins> <base>"<<endl; return 0; } ifstream gro(argv[1]); int numPeptides = atoi(argv[2]); //number of peptides in the snapshot int numIns = atoi(argv[3]); //number of inositols in the snapshot string base(argv[4]); vector<Peptide*> peptides; vector<Inositol*> inositols; vector<Water*> waters; int time = 0; double boxDims[3]; int total_bound_groups = numPeptides*16; //assumes that max number of HB that can be made by an inositol to be 6 vector<int> distr_inos_numHB_tot(7, 0); //distribution of number of inositols over #HB ofstream contactwat((base + "_water_contact").c_str()); //ofstream nonpolar((base + "_np_contact").c_str()); ofstream cmap((base + "_hb_contact_map").c_str()); ofstream inos((base + "_inos_contact").c_str()); //read each snapshot of the partial gro file //build vector of peptides, inositols, detect hydrogen bonds,collect statistics //delete memory allocated for the snapshots while(!readGroFile(gro, peptides, inositols, waters, numPeptides, numIns, boxDims)){ #ifdef DEBUG cout<<"################frame # "<<totalNumSnap<<" #####################"<<endl; #endif int* water_bound_res = new int[total_bound_groups]; //initialize all to 0 for(int i=0; i<total_bound_groups; i++){ water_bound_res[i]=0; } //calculate water contacts waterContacts(peptides, inositols, waters, water_bound_res, boxDims); //output to file for(int i=0; i < total_bound_groups; i++){ contactwat<<water_bound_res[i]<<" "; } contactwat<<endl; //de-allocate memory delete [] water_bound_res; Peptide* pep = peptides.at(0); // Contact maps calculations for(int n=0; n < numRes; n++){ double NH_pep[3], H_pep[3], O_pep[3], C_pep[3]; pep->getNH(n, NH_pep); pep->getHofNH(n, H_pep); pep->getO(n, O_pep); pep->getC(n, C_pep); for(int m=n+1; m < numRes; m++){ if(pep->isHbondedCO(m, NH_pep, H_pep, boxDims)){ cmap << n << " " << m <<endl; } if(pep->isHbondedNH(m, O_pep, C_pep, boxDims)){ cmap << m << " "<< n <<endl; } } } //inositol peptide contact calculations //we only have 1 peptide in the system for(int nres = 0; nres < numRes; nres++) { PepGroup* bgroupNH = new PepGroup; PepGroup* bgroupCO = new PepGroup; for(int nins = 0; nins < numIns; nins++) { Inositol* aInos = inositols.at(nins); for(int noh = 0; noh < 6; noh++) { double inosO[3], inosH[3]; aInos->getOCoords(noh,inosO); aInos->getHCoords(noh,inosH); if (pep->isHbondedNH(nres, inosO, inosH, boxDims)) { //cinos<<nres<<" has "<<nins*6+noh<<" NH bound"<<endl; bgroupNH->addPepGroup(nins,nins*6+noh,"OH"); } if (pep->isHbondedCO(nres, inosO, inosH, boxDims)) { //cinos<<nres<<" has "<<nins*6+noh<<" CO bound"<<endl; bgroupCO->addPepGroup(nins,nins*6+noh,"OH"); } } } pep->setBoundGroup(2*nres, bgroupNH); pep->setBoundGroup(2*nres+1, bgroupCO); //delete bgroupNH; //delete bgroupCO; } //inosput the data computed above for(int nbb = 0; nbb < 16; nbb++) { PepGroup* bgroup=pep->getBoundGroup(nbb); int numBoundGroups = bgroup->numGroups(); if(numBoundGroups){ for(int nbgroup=0; nbgroup<numBoundGroups; nbgroup++){ inos<<bgroup->getResId(nbgroup); if(nbgroup<numBoundGroups-1){ inos<<" "; } } }else{ inos<<"-"; } if(nbb%2 == 0 && nbb < 16){ inos<<";"; } if(nbb%2 && nbb < 15){ inos<<"|"; } //cinos<<nbb<<" has " << bgroup->numGroups()<< " bound groups"<<endl; } inos<<endl; time++; delete_vectors(peptides, inositols, waters); } contactwat.close(); cmap.close(); //nonpolar.close(); inos.close(); return 0; }
/************************************************************************** Returns the global edit distance between two peptides. Gap cost = 1. d(I,L)=0 d(K,Q)=0 d(F/M*)=0 d(N,D)=0.5; d(X,X)=0 d(X,Y)=1 ***************************************************************************/ float Peptide::peptide_edit_distance(Config *config, Peptide& other_pep) const { int i; vector<int> other_aa = other_pep.get_amino_acids(); const int *pep1 = &amino_acids[0]; const int pep_len1 = amino_acids.size(); const int *pep2 = &other_aa[0]; const int pep_len2 = other_aa.size(); const int max_width = 5; const int ox_met_aa = config->get_aa_from_label(string("M+16")); float row1[max_width*2+1], row2[max_width*2+1]; float *old_row, *new_row; if (abs(pep_len1-pep_len2)>=max_width) return pep_len1; // check that no little switch of two aa can make them the same if (pep_len1 == pep_len2) { int i; int err_pos=-1; int errs=0; for (i=0; i<pep_len1; i++) { if ( (pep1[i] != pep2[i]) && ! (((pep1[i]==Ile ||pep1[i]==Leu) && (pep2[i]==Ile || pep2[i]==Leu)) || ((pep1[i]==Gln || pep1[i]==Lys) && (pep2[i]==Gln || pep2[i]==Lys)) ) ) { err_pos=i; errs++; } if (errs>2) break; } if (errs == 0) return 0; if (errs == 2) { if ( (pep1[err_pos] == pep2[err_pos-1]) && (pep1[err_pos-1] == pep2[err_pos])) return 1; } } old_row=row1; new_row=row2; for (i=0; i<max_width; i++) { old_row[i]=9999; old_row[i+max_width]=i; new_row[i]=9999; new_row[i+max_width]=9999; } old_row[2*max_width]=9999; new_row[2*max_width]=9999; int start_new_row = max_width-1; for (i=1; i<=pep_len1; i++) { int j; if (start_new_row>0) { new_row[start_new_row]=i; new_row[--start_new_row]=9999; } else new_row[0] = 9999; for (j=1; j<= 2*max_width; j++) { int p2_pos = i + j - max_width; if (p2_pos<1) continue; if (p2_pos>pep_len2) break; float v1,v2,v3,dxy=0; int p1=pep1[i-1],p2=pep2[p2_pos-1]; if (p1 != p2) { dxy=1; if ( ((p1==Ile || p1==Leu) && (p2==Ile || p2==Leu)) || ((p1==Gln || p1==Lys) && (p2==Gln || p2==Lys)) ) { dxy=0; } else if ( ((p1==Asn && p2==Asp) || (p1==Asp && p2==Asn)) || ((pep1[i]==Gln && p2==Glu) || (p1==Glu && p2==Gln)) || ((pep1[i]==Lys && p2==Glu) || (p1==Glu && p2==Lys)) || ((pep1[i]==Ile && p2==Asn) || (p1==Asn && p2==Ile)) || ((pep1[i]==Leu && p2==Asn) || (p1==Asn && p2==Leu)) ) { dxy=0.5; } else if (ox_met_aa>0 && (( p1 == ox_met_aa && p2 == Phe) || ( p1 == Phe && p2 == ox_met_aa)) ) { dxy = 0; } } v1= old_row[j]+dxy; v2= new_row[j-1]+1; v3= (p2_pos<pep_len2 && j<2*max_width) ? old_row[j+1]+1 : 9999; new_row[j]=v1; if (new_row[j]>v2) new_row[j]=v2; if (new_row[j]>v3) new_row[j]=v3; } float *tmp; tmp = old_row; old_row = new_row; new_row = tmp; } return old_row[pep_len2-pep_len1+max_width]; }