Example #1
0
/***************************************************************************************
This function touches up inspect search results by rescoring the sequences returned by
inspect. The function produces a new inspect results file with the scores (and delta scores)
replaced.
****************************************************************************************/
void PeptideRankScorer::recalibrate_inspect_delta_scores(char *spectra_file, 
											   char *inspect_res, 
											   char *new_res_file) const
{
	AllScoreModels* allScoreModels = static_cast<AllScoreModels*>(this->allScoreModelsPtr_);
	Config *config = allScoreModels->get_config();

	ifstream org_res(inspect_res);

	if (!  org_res.is_open() || ! org_res.good())
	{
		cout << "Error: couldn't open original inspect results file for reading:" << inspect_res << endl;
		exit(1);
	}

	ofstream new_res(new_res_file);
	if (! new_res.is_open() || ! new_res.good())
	{
			cout << "Error: couldn't open new inspect results file for writing:" << new_res << endl;
		exit(1);
	}

	char line_buff[1024];
	org_res.getline(line_buff,1024);

	bool read_line  = true;
	vector<string> field_names;
	if (line_buff[0] != '#')
	{
		read_line = false;
	}
	else
	{
		string header = string(line_buff);
		split_string(header,field_names);

		int i;
		for (i=0; i<field_names.size(); i++)
			cout << i << "\t" << field_names[i] << endl;
	}


	vector<ScanCandidateSet> cand_sets;
	vector<int> scan_mapping;
	cand_sets.clear();
	scan_mapping.resize(100000,-1);
	
	while (! org_res.eof())
	{
		vector<string> fields;

		if (read_line)
		{
			org_res.getline(line_buff,1024);
			if (org_res.gcount() < 5)
				continue;
		}
		else
		{
			read_line = true;
		}

		split_string(line_buff,fields);
		InspectResultsLine res;

		res.parse_from_fields(config,fields);

		if (cand_sets.size()==0 || ! cand_sets[cand_sets.size()-1].add_new_line(res))
		{
			ScanCandidateSet new_set;
			new_set.add_new_line(res);
			
			if (new_set.scan>=scan_mapping.size())
				scan_mapping.resize(2*scan_mapping.size(),-1);

			scan_mapping[new_set.scan]=cand_sets.size();
			cand_sets.push_back(new_set);
		}
	}
	org_res.close();

	cout << "Read results for " << cand_sets.size() << " scans..." << endl;

	FileManager fm;
	FileSet     fs;
	fm.init_from_file(config,spectra_file);
	fs.select_all_files(fm);
	const vector<SingleSpectrumFile *>& all_ssfs = fs.get_ssf_pointers();

	cout << "Read " <<  all_ssfs.size() << " spectra headers..." << endl;

	BasicSpecReader bsr;
	QCPeak *peaks = new QCPeak[5000];

	vector<bool> spectrum_indicators;
	spectrum_indicators.resize(cand_sets.size(),false);

	int num_found =0;
	int i;
	for (i=0; i<all_ssfs.size(); i++)
	{
		SingleSpectrumFile *ssf = all_ssfs[i];
		
		const int scan_number = ssf->get_scan();
		if (scan_mapping[scan_number]<0)
			continue;

		const int num_peaks = bsr.read_basic_spec(config,fm,ssf,peaks);

		spectrum_indicators[scan_mapping[scan_number]]=true;
		num_found++;

		ScanCandidateSet& cand_set = cand_sets[scan_mapping[scan_number]];
		
		cand_set.recalbirate_scores(config);

		cand_set.output_to_stream(new_res,10);
	}

	if (num_found<cand_sets.size())
	{
		cout << "Warning: found only " << num_found << "/" << cand_sets.size() << " of the scans scored by InsPecT!" << endl;
	}
	else
	{
		cout << "All scored scans found in spectrum file." << endl;
	}


	delete [] peaks;
}