Exemplo n.º 1
0
// There are fixed magic numbers in this function that should not be changed
// (even though actual tolerances or values in config file might differ)
// changing these numbers without retraining all models might give unanticipated results
void PeakList::calculateLogRandomProbabilities(vector<float>& logIntensities,
        vector<float>& logRandomProbabilities) const
{
    if (numPeaks_<2)
        return;

    const float oneOverSqrt2pi = 1.0 / sqrt(2.0*3.1415927);
    const float logBias = log(1.2); // to fine tune the zero probabilites
    const mass_t peakWindowSize = 0.6; // this is fixed and independent of the tolerance!!!
    const mass_t margin      = 25.0;   // fixed and independent of values in config file
    const mass_t windowSize = 100.0;   // fixed and independent of values in config file
    const mass_t minPeakMass = peaks_[0].mass;
    const mass_t maxPeakMass = peaks_[numPeaks_-1].mass;
    const mass_t visibleRange = (maxPeakMass - minPeakMass);

    logRandomProbabilities.resize(numPeaks_);
    int i;
    for (i=0; i<numPeaks_; i++)
    {
        const mass_t peakMass = peaks_[i].mass;
        const mass_t relativePosition = (peakMass-minPeakMass)/visibleRange;
        const mass_t leftWindow  = margin + relativePosition * windowSize;
        const mass_t rightWindow = margin + windowSize - leftWindow;
        const PeakRange pr = findPeaksInRange(peakMass - leftWindow, peakMass + rightWindow);
        const float peakWindowProb = peakWindowSize /(leftWindow + rightWindow);

        // some freak cases have 0 peak counts (only in unix)
        const int numPeaksInRange = (pr.num_peaks>0 ? pr.num_peaks : 1);
        const float zeroProbability = pow((1.0 - peakWindowProb),numPeaksInRange);

        if (numPeaksInRange<5)
        {
            logRandomProbabilities[i] = log(1.0-zeroProbability) + logBias;
        }
        else // compute special probability based on peak densitiy model
        {
            vector<float> windowLogIntensities;
            int j;
            for (j=pr.low_idx; j<=pr.high_idx; j++)
                windowLogIntensities.push_back(logIntensities[j]);

            float mean=0,sd=1;
            calc_mean_sd(windowLogIntensities,&mean,&sd);
            const float e = (logIntensities[i] - mean)/sd;
            if (e<0)
            {
                logRandomProbabilities[i] = log(1 - zeroProbability) + logBias;
            }
            else
            {
                const float normalizedValue = (oneOverSqrt2pi/ sd) * exp(-0.5*e*e);
                const float normalizationConstant = (1.0 - zeroProbability) / (oneOverSqrt2pi / sd); //
                logRandomProbabilities[i] = log(normalizedValue*normalizationConstant);
            }
        }
    }
}
Exemplo n.º 2
0
/***********************************************************************
calcs for each peak the probability of observing it at random (based on
 the neighbor's distribution. Assumes the log_intens are distributed
 according to a normal disribution.
************************************************************************/
void Spectrum::set_log_random_probs()
{
	if (numPeaks_<2)
		return;
	const float log_add = log(1.2);
	const float one_over_sqr_2pi = 1.0 / sqrt(2*3.1415927);
	const mass_t peak_window_size = 0.6; // this is fixed and independent of the tolerance!
	const mass_t margin      = 25.0;
	const mass_t window_size = 100.0;
	const mass_t min_mass = peaks_[0].mass;
	const mass_t max_mass = peaks_[numPeaks_-1].mass;
	const mass_t viz_range = (max_mass - min_mass);

	logRandomProbabilities_.resize(numPeaks_);	
	int i;
	for (i=0; i<numPeaks_; i++)
	{
		const mass_t peak_mass    = peaks_[i].mass;
		const mass_t rel_position = (peak_mass-min_mass)/viz_range;
		const mass_t left_window  = margin + rel_position * window_size;
		const mass_t right_window = margin + window_size - left_window;
		const PeakRange pr = findPeaksInRange(peak_mass-left_window,peak_mass+right_window);
		const float peak_window_prob = peak_window_size /(left_window + right_window);

		// some freak cases have 0 peak counts (only in unix)
		const int num_peaks_in_range = (pr.num_peaks>0 ? pr.num_peaks : 1);
		const float zero_prob = pow((1.0 - peak_window_prob),num_peaks_in_range); 

		if (pr.num_peaks<5)
		{
			logRandomProbabilities_[i] = log(1.0-zero_prob) + log_add;
		}
		else
		{
			vector<float> log_intens;
			int j;
			for (j=pr.low_idx; j<=pr.high_idx; j++)
				log_intens.push_back(logIntensities_[j]);
		
			float mean=0,sd=1;
			calc_mean_sd(log_intens,&mean,&sd);
			const float e = (logIntensities_[i] - mean)/sd;
			if (e<0)
			{
				logRandomProbabilities_[i] = log(1 - zero_prob) + log_add;
			}
			else
			{
				const float norm = (one_over_sqr_2pi/ sd) * exp(-0.5*e*e);
				const float norm_const = (1 - zero_prob) / (one_over_sqr_2pi/ sd); //
				logRandomProbabilities_[i] = log(norm*norm_const);
			}
		} 
	}
}
Exemplo n.º 3
0
void PMCSQS_Scorer::output_pmc_rank_results(const FileManager& fm,
											int charge,
											const vector<SingleSpectrumFile *>& test_ssfs) 
{
	BasicSpecReader bsr;
	static QCPeak peaks[5000];

	vector<int> org_offset_counts, new_offset_counts;
	org_offset_counts.resize(201,0);
	new_offset_counts.resize(201,0);

	vector<mass_t> org_offsets;
	vector<mass_t> corr_offsets;

	org_offsets.clear();
	corr_offsets.clear();

	int i;
	for (i=0; i<test_ssfs.size(); i++)
	{
		SingleSpectrumFile* ssf = test_ssfs[i];
		BasicSpectrum bs;
	
		bs.num_peaks = bsr.read_basic_spec(config,fm,ssf,peaks);
		bs.peaks = peaks;
		bs.ssf = ssf;

		init_for_current_spec(config,bs);
		calculate_curr_spec_pmc_values(bs, bin_increment);

		PmcSqsChargeRes res;
		find_best_mz_values_from_rank_model(bs, charge, config->get_pm_tolerance(),res);

		ssf->peptide.calc_mass(config);
		mass_t true_mz = (ssf->peptide.get_mass() + 18.01 + charge)/charge;

		org_offsets.push_back(true_mz - ssf->m_over_z);
		corr_offsets.push_back(true_mz - res.mz1);
	}

	mass_t m_org,sd_org,m_corr,sd_corr;
	calc_mean_sd(org_offsets,&m_org, &sd_org);
	calc_mean_sd(corr_offsets,&m_corr,&sd_corr);

	cout << "CHARGE: " << charge << endl;
	cout << "ORG:  mean " << m_org << " " << sd_org << endl;
	cout << "CORR: mean " << m_corr << " " << sd_corr << endl;

	for (i=0; i<org_offsets.size(); i++)
	{
		int org_idx = 100 + int(org_offsets[i] * 20);
		if (org_idx<0)
			org_idx = 0;
		if (org_idx>200)
			org_idx=200;
		org_offset_counts[org_idx]++;

		int new_idx = 100 + int(corr_offsets[i] * 20);
		if (new_idx<0)
			new_idx = 0;
		if (new_idx>200)
			new_idx=200;
		new_offset_counts[new_idx]++;
	}

	int cum_org=0;
	int cum_new=0;
	for (i=0; i<=200; i++)
	{

		if (org_offset_counts[i]==0 && new_offset_counts[i]==0)
			continue;
		
		cum_org+=org_offset_counts[i];
		cum_new+=new_offset_counts[i];
		cout << fixed << setprecision(3) << i*0.05 - 5.0 << "\t" <<
			org_offset_counts[i]/(float)org_offsets.size() << "\t" <<
			new_offset_counts[i]/(float)corr_offsets.size() << "\t" <<
			cum_org/(float)org_offsets.size() << "\t"<<
			cum_new/(float)corr_offsets.size() << endl;

	}


}