Exemplo n.º 1
0
void AllScoreModels::predict_fragmentation( const char* input_file, size_t num_peaks)
{
	FILE* stream = fopen(input_file,"r");
	if (! stream)
	{
		cout << "Error: couldn't open file for reading: " << input_file << endl;
		exit(1);
	}

	PeptideRankScorer *dnv_rank = (PeptideRankScorer *)get_rank_model_ptr(1);
	PeakRankModel *prm = this->get_peak_prediction_model_ptr(3);

	char buffer[128];
	char pep_str[128];
	while (fgets(buffer,128,stream))
	{
		int charge;

		if (sscanf(buffer,"%s %d",pep_str,&charge) != 2)
			continue;

		cout << ">> " << pep_str << "\t" << charge << endl;
		if (charge<1 || charge>=prm->get_size_thresholds().size())
		{
			cout << "Invalid charge!" << endl;
			continue;
		}

		Peptide pep;
		pep.parseFromString(&config_,static_cast<string>(pep_str));


		PeptideSolution sol;
		sol.pep = pep;
		sol.reaches_n_terminal=true;
		sol.reaches_c_terminal=true;
		sol.charge = charge;
		sol.pm_with_19 = pep.get_mass_with_19();

		PeptidePeakPrediction ppp;
		prm->calc_peptide_predicted_scores(sol, ppp);

		const size_t num_frags = ppp.frag_idxs.size();
		vector< vector<int> > predicted_ranks;
		calc_combined_peak_ranks(ppp.rank_scores, predicted_ranks);

		vector<PeakTuple> tuples;
		for (size_t f=0; f<num_frags; f++)
			for (size_t i=0; i<ppp.rank_scores[f].size(); i++)
				if (predicted_ranks[f][i]<999)
				{
					PeakTuple pt;
					pt.frag_idx = f;
					pt.pos =i;
					pt.rank = predicted_ranks[f][i];
					pt.score = ppp.rank_scores[f][i];
					tuples.push_back(pt);
				}
		
		sort(tuples.begin(),tuples.end());

		if (tuples.size()<1)
			continue;

		const size_t num_aas = pep.get_num_aas();
		vector<mass_t> breakage_masses;
		pep.calc_expected_breakage_masses(&config_, breakage_masses);

		cout << fixed << "Rank\tIon\tm/z\tScore" << endl;
		for (size_t i=0; i<num_peaks && i<tuples.size(); i++)
		{
			PeakTuple pt = tuples[i];
			cout << i+1 << "\t";
			const FragmentType& ft = config_.get_fragment(ppp.frag_idxs[pt.frag_idx]);
			cout << ft.label << ":" << (ft.orientation == PREFIX ? pt.pos : num_aas - pt.pos) << "\t";

			mass_t mz =  ft.calc_expected_mass(breakage_masses[pt.pos],pep.get_mass_with_19());
			cout << setprecision(2);
			if (mz<100)
				cout << " ";
			if (mz<1000)
				cout << " ";

			cout << mz << "\t";
			cout << setprecision(3) << pt.score << endl;
		}
		cout << endl;
	}

	fclose(stream);
}
Exemplo n.º 2
0
/***********************************************************************
makes tables listing features and final scores
Only makes table if the predictions match
************************************************************************/
bool PeakRankModel::make_peak_prediction_table(
			const PeptideSolution& sol,
			const vector< vector<intensity_t> >& intens,
			int num_peaks) const
{
	PeptidePeakPrediction ppp;
	calc_peptide_predicted_scores(sol, ppp);

	// the ppp includes a table of rank scores (rows are actual frag idxs, not relative
	// position in the frag_type_idxs).

	// reduce intensities to the same dimensionality
	const int num_frags = ppp.frag_idxs.size();
	vector< vector< float> > observed_intens;
	observed_intens.resize(num_frags);

	int i,f;
	for (f=0; f<num_frags; f++)
	{
		const int frag_idx = ppp.frag_idxs[f];
		observed_intens[f]=intens[frag_idx]; 
	}

	// calculate the ranks and mapping between predicted and observed
	vector< vector<int> > observed_ranks, predicted_ranks;
	calc_combined_peak_ranks(observed_intens, observed_ranks);
	calc_combined_peak_ranks(ppp.rank_scores, predicted_ranks);

	vector<int> sel_frags, sel_idxs;
	vector< float > intensities;
	
	
	int rank;
	for (rank=0; rank<num_peaks; rank++)
	{
		bool good_pred=false;
		for (f=0; f<num_frags; f++)
		{
			int i;
			for (i=0; i<predicted_ranks[f].size(); i++)
			{
				if (predicted_ranks[f][i] == rank &&
					observed_ranks[f][i]  == rank)
				{
					good_pred=true;
					sel_frags.push_back(f);
					sel_idxs.push_back(i);
					intensities.push_back(intens[f][i]);
					break;
				}
			}
		}
		if (! good_pred)
			return false;
	}

//	cout << "#sel_frags: " << sel_frags.size() << endl;
	

	// calc specific peak vectors and collect data
	vector< vector< string> > feature_names;
	vector< vector< float > > feature_values;
	vector< vector< float > > feature_scores;
	vector< float > total_scores;


	feature_names.resize(num_peaks);
	feature_values.resize(num_peaks);
	feature_scores.resize(num_peaks);
	total_scores.resize(num_peaks,0);
	

	const Peptide& pep = sol.pep;
	const mass_t pm_with_19 = sol.pm_with_19;
	const int spec_charge = sol.charge;
	const int mobility = get_proton_mobility(pep,spec_charge);
	const int size_idx =  get_size_group(spec_charge,pm_with_19);
	
	if (! partition_models[spec_charge][size_idx][mobility])
	{
		cout << "Error: no rank partition model for " <<
			spec_charge << " " << size_idx << " " << mobility << endl;
		exit(1);
	}

	if (size_idx != 1 || mobility != 1)
		return false;

	const mass_t min_detected_mass = calc_min_detected_mass(pm_with_19, spec_charge);
	const mass_t max_detected_mass = get_max_detected_mass();


	const vector<int>& amino_acids = pep.get_amino_acids();
	vector<mass_t> exp_cuts;

	pep.calc_expected_breakage_masses(config,exp_cuts);

	const mass_t n_mass = pep.get_n_gap();

	// calculate a single set of ranks across the combined set of fragments
	const int start_cut_idx = (sol.reaches_n_terminal ? 1 : 0);
	const int last_cut_idx  = (sol.reaches_c_terminal ? exp_cuts.size()-1 : exp_cuts.size());
	const mass_t c_mass = exp_cuts[exp_cuts.size()-1];


	int max_l=0;
	for (i=0; i<sel_frags.size(); i++)
	{
		const int frag_idx=sel_frags[i];
		const int cut_idx = sel_idxs[i];
	
		const FragmentType& fragment = config->get_fragment(frag_idx);

		const mass_t cut_mass = exp_cuts[cut_idx];
		const mass_t peak_mass = fragment.calc_expected_mass(cut_mass,pm_with_19);
		
		RankBoostSample rbs;

		for (f=0; f<num_frags; f++)
			if (ppp.frag_idxs[f] == frag_idx)
				break;

	//	cout << "Frag: " << fragment.label << " fi:" << frag_idx << " f:" << f << endl;

		if (f==num_frags)
		{
			cout << "Error: bad frag!!!!" << endl;
			exit(1);
		}
		

		partition_models[spec_charge][size_idx][mobility]->fill_combined_simple_peak_features(
			this, amino_acids, cut_idx, cut_mass, sol, fragment, f, rbs);
				
//		partition_models[spec_charge][size_idx][mobility]->fill_combined_peak_features(	
//			this, amino_acids, cut_idx, cut_mass, sol, fragment, f, rbs);
			
		total_scores[i] = partition_models[spec_charge][size_idx][mobility]->combined_frag_boost_model.calc_rank_score_with_details(
									rbs,feature_names[i],feature_values[i],feature_scores[i]);
							
			
		if (feature_names[i].size()>max_l)
			max_l = feature_names[i].size();
	}


	cout << "Size: " << size_idx << " Mobility: " << mobility << endl;


	// print results
	for (i=0; i<num_peaks; i++)
	{
		cout << config->get_fragment(sel_frags[i]).label << " " <<
			sel_idxs[i];
		
		if (i<num_peaks-1)
		{
			cout << " & ";
		}
		else
			cout << "\\\\" << endl;
	}

	cout << setprecision(2) << fixed;
	for (i=0; i<num_peaks; i++)
	{
		cout << total_scores[i];
		if (i<num_peaks-1)
		{
			cout << " & ";
		}
		else
			cout << "\\\\" << endl;
	}

	for (i=0; i<num_peaks; i++)
	{
		cout << intensities[i];
		if (i<num_peaks-1)
		{
			cout << " & ";
		}
		else
			cout << "\\\\" << endl;
	}

	for (i=0; i<max_l; i++)
	{
		int j;
		for (j=0; j<num_peaks; j++)
		{
			if (feature_names[j].size()<=i)
			{
				cout << "           &  ";  
			}
			else
			{
				cout << feature_names[j][i] << " " << feature_values[j][i] << " & ";
				if (feature_scores[j][i]>0)
				{
					cout << "+";
				}
				cout << feature_scores[j][i];
			}

			if (j<num_peaks-1)
			{
				cout << " & ";
			}
			else
				cout << "\\\\" << endl;
		}
	}



	return true;
}