void AllScoreModels::predict_fragmentation( const char* input_file, size_t num_peaks) { FILE* stream = fopen(input_file,"r"); if (! stream) { cout << "Error: couldn't open file for reading: " << input_file << endl; exit(1); } PeptideRankScorer *dnv_rank = (PeptideRankScorer *)get_rank_model_ptr(1); PeakRankModel *prm = this->get_peak_prediction_model_ptr(3); char buffer[128]; char pep_str[128]; while (fgets(buffer,128,stream)) { int charge; if (sscanf(buffer,"%s %d",pep_str,&charge) != 2) continue; cout << ">> " << pep_str << "\t" << charge << endl; if (charge<1 || charge>=prm->get_size_thresholds().size()) { cout << "Invalid charge!" << endl; continue; } Peptide pep; pep.parseFromString(&config_,static_cast<string>(pep_str)); PeptideSolution sol; sol.pep = pep; sol.reaches_n_terminal=true; sol.reaches_c_terminal=true; sol.charge = charge; sol.pm_with_19 = pep.get_mass_with_19(); PeptidePeakPrediction ppp; prm->calc_peptide_predicted_scores(sol, ppp); const size_t num_frags = ppp.frag_idxs.size(); vector< vector<int> > predicted_ranks; calc_combined_peak_ranks(ppp.rank_scores, predicted_ranks); vector<PeakTuple> tuples; for (size_t f=0; f<num_frags; f++) for (size_t i=0; i<ppp.rank_scores[f].size(); i++) if (predicted_ranks[f][i]<999) { PeakTuple pt; pt.frag_idx = f; pt.pos =i; pt.rank = predicted_ranks[f][i]; pt.score = ppp.rank_scores[f][i]; tuples.push_back(pt); } sort(tuples.begin(),tuples.end()); if (tuples.size()<1) continue; const size_t num_aas = pep.get_num_aas(); vector<mass_t> breakage_masses; pep.calc_expected_breakage_masses(&config_, breakage_masses); cout << fixed << "Rank\tIon\tm/z\tScore" << endl; for (size_t i=0; i<num_peaks && i<tuples.size(); i++) { PeakTuple pt = tuples[i]; cout << i+1 << "\t"; const FragmentType& ft = config_.get_fragment(ppp.frag_idxs[pt.frag_idx]); cout << ft.label << ":" << (ft.orientation == PREFIX ? pt.pos : num_aas - pt.pos) << "\t"; mass_t mz = ft.calc_expected_mass(breakage_masses[pt.pos],pep.get_mass_with_19()); cout << setprecision(2); if (mz<100) cout << " "; if (mz<1000) cout << " "; cout << mz << "\t"; cout << setprecision(3) << pt.score << endl; } cout << endl; } fclose(stream); }
/*********************************************************************** makes tables listing features and final scores Only makes table if the predictions match ************************************************************************/ bool PeakRankModel::make_peak_prediction_table( const PeptideSolution& sol, const vector< vector<intensity_t> >& intens, int num_peaks) const { PeptidePeakPrediction ppp; calc_peptide_predicted_scores(sol, ppp); // the ppp includes a table of rank scores (rows are actual frag idxs, not relative // position in the frag_type_idxs). // reduce intensities to the same dimensionality const int num_frags = ppp.frag_idxs.size(); vector< vector< float> > observed_intens; observed_intens.resize(num_frags); int i,f; for (f=0; f<num_frags; f++) { const int frag_idx = ppp.frag_idxs[f]; observed_intens[f]=intens[frag_idx]; } // calculate the ranks and mapping between predicted and observed vector< vector<int> > observed_ranks, predicted_ranks; calc_combined_peak_ranks(observed_intens, observed_ranks); calc_combined_peak_ranks(ppp.rank_scores, predicted_ranks); vector<int> sel_frags, sel_idxs; vector< float > intensities; int rank; for (rank=0; rank<num_peaks; rank++) { bool good_pred=false; for (f=0; f<num_frags; f++) { int i; for (i=0; i<predicted_ranks[f].size(); i++) { if (predicted_ranks[f][i] == rank && observed_ranks[f][i] == rank) { good_pred=true; sel_frags.push_back(f); sel_idxs.push_back(i); intensities.push_back(intens[f][i]); break; } } } if (! good_pred) return false; } // cout << "#sel_frags: " << sel_frags.size() << endl; // calc specific peak vectors and collect data vector< vector< string> > feature_names; vector< vector< float > > feature_values; vector< vector< float > > feature_scores; vector< float > total_scores; feature_names.resize(num_peaks); feature_values.resize(num_peaks); feature_scores.resize(num_peaks); total_scores.resize(num_peaks,0); const Peptide& pep = sol.pep; const mass_t pm_with_19 = sol.pm_with_19; const int spec_charge = sol.charge; const int mobility = get_proton_mobility(pep,spec_charge); const int size_idx = get_size_group(spec_charge,pm_with_19); if (! partition_models[spec_charge][size_idx][mobility]) { cout << "Error: no rank partition model for " << spec_charge << " " << size_idx << " " << mobility << endl; exit(1); } if (size_idx != 1 || mobility != 1) return false; const mass_t min_detected_mass = calc_min_detected_mass(pm_with_19, spec_charge); const mass_t max_detected_mass = get_max_detected_mass(); const vector<int>& amino_acids = pep.get_amino_acids(); vector<mass_t> exp_cuts; pep.calc_expected_breakage_masses(config,exp_cuts); const mass_t n_mass = pep.get_n_gap(); // calculate a single set of ranks across the combined set of fragments const int start_cut_idx = (sol.reaches_n_terminal ? 1 : 0); const int last_cut_idx = (sol.reaches_c_terminal ? exp_cuts.size()-1 : exp_cuts.size()); const mass_t c_mass = exp_cuts[exp_cuts.size()-1]; int max_l=0; for (i=0; i<sel_frags.size(); i++) { const int frag_idx=sel_frags[i]; const int cut_idx = sel_idxs[i]; const FragmentType& fragment = config->get_fragment(frag_idx); const mass_t cut_mass = exp_cuts[cut_idx]; const mass_t peak_mass = fragment.calc_expected_mass(cut_mass,pm_with_19); RankBoostSample rbs; for (f=0; f<num_frags; f++) if (ppp.frag_idxs[f] == frag_idx) break; // cout << "Frag: " << fragment.label << " fi:" << frag_idx << " f:" << f << endl; if (f==num_frags) { cout << "Error: bad frag!!!!" << endl; exit(1); } partition_models[spec_charge][size_idx][mobility]->fill_combined_simple_peak_features( this, amino_acids, cut_idx, cut_mass, sol, fragment, f, rbs); // partition_models[spec_charge][size_idx][mobility]->fill_combined_peak_features( // this, amino_acids, cut_idx, cut_mass, sol, fragment, f, rbs); total_scores[i] = partition_models[spec_charge][size_idx][mobility]->combined_frag_boost_model.calc_rank_score_with_details( rbs,feature_names[i],feature_values[i],feature_scores[i]); if (feature_names[i].size()>max_l) max_l = feature_names[i].size(); } cout << "Size: " << size_idx << " Mobility: " << mobility << endl; // print results for (i=0; i<num_peaks; i++) { cout << config->get_fragment(sel_frags[i]).label << " " << sel_idxs[i]; if (i<num_peaks-1) { cout << " & "; } else cout << "\\\\" << endl; } cout << setprecision(2) << fixed; for (i=0; i<num_peaks; i++) { cout << total_scores[i]; if (i<num_peaks-1) { cout << " & "; } else cout << "\\\\" << endl; } for (i=0; i<num_peaks; i++) { cout << intensities[i]; if (i<num_peaks-1) { cout << " & "; } else cout << "\\\\" << endl; } for (i=0; i<max_l; i++) { int j; for (j=0; j<num_peaks; j++) { if (feature_names[j].size()<=i) { cout << " & "; } else { cout << feature_names[j][i] << " " << feature_values[j][i] << " & "; if (feature_scores[j][i]>0) { cout << "+"; } cout << feature_scores[j][i]; } if (j<num_peaks-1) { cout << " & "; } else cout << "\\\\" << endl; } } return true; }