// There are fixed magic numbers in this function that should not be changed // (even though actual tolerances or values in config file might differ) // changing these numbers without retraining all models might give unanticipated results void PeakList::calculateLogRandomProbabilities(vector<float>& logIntensities, vector<float>& logRandomProbabilities) const { if (numPeaks_<2) return; const float oneOverSqrt2pi = 1.0 / sqrt(2.0*3.1415927); const float logBias = log(1.2); // to fine tune the zero probabilites const mass_t peakWindowSize = 0.6; // this is fixed and independent of the tolerance!!! const mass_t margin = 25.0; // fixed and independent of values in config file const mass_t windowSize = 100.0; // fixed and independent of values in config file const mass_t minPeakMass = peaks_[0].mass; const mass_t maxPeakMass = peaks_[numPeaks_-1].mass; const mass_t visibleRange = (maxPeakMass - minPeakMass); logRandomProbabilities.resize(numPeaks_); int i; for (i=0; i<numPeaks_; i++) { const mass_t peakMass = peaks_[i].mass; const mass_t relativePosition = (peakMass-minPeakMass)/visibleRange; const mass_t leftWindow = margin + relativePosition * windowSize; const mass_t rightWindow = margin + windowSize - leftWindow; const PeakRange pr = findPeaksInRange(peakMass - leftWindow, peakMass + rightWindow); const float peakWindowProb = peakWindowSize /(leftWindow + rightWindow); // some freak cases have 0 peak counts (only in unix) const int numPeaksInRange = (pr.num_peaks>0 ? pr.num_peaks : 1); const float zeroProbability = pow((1.0 - peakWindowProb),numPeaksInRange); if (numPeaksInRange<5) { logRandomProbabilities[i] = log(1.0-zeroProbability) + logBias; } else // compute special probability based on peak densitiy model { vector<float> windowLogIntensities; int j; for (j=pr.low_idx; j<=pr.high_idx; j++) windowLogIntensities.push_back(logIntensities[j]); float mean=0,sd=1; calc_mean_sd(windowLogIntensities,&mean,&sd); const float e = (logIntensities[i] - mean)/sd; if (e<0) { logRandomProbabilities[i] = log(1 - zeroProbability) + logBias; } else { const float normalizedValue = (oneOverSqrt2pi/ sd) * exp(-0.5*e*e); const float normalizationConstant = (1.0 - zeroProbability) / (oneOverSqrt2pi / sd); // logRandomProbabilities[i] = log(normalizedValue*normalizationConstant); } } } }
/*********************************************************************** calcs for each peak the probability of observing it at random (based on the neighbor's distribution. Assumes the log_intens are distributed according to a normal disribution. ************************************************************************/ void Spectrum::set_log_random_probs() { if (numPeaks_<2) return; const float log_add = log(1.2); const float one_over_sqr_2pi = 1.0 / sqrt(2*3.1415927); const mass_t peak_window_size = 0.6; // this is fixed and independent of the tolerance! const mass_t margin = 25.0; const mass_t window_size = 100.0; const mass_t min_mass = peaks_[0].mass; const mass_t max_mass = peaks_[numPeaks_-1].mass; const mass_t viz_range = (max_mass - min_mass); logRandomProbabilities_.resize(numPeaks_); int i; for (i=0; i<numPeaks_; i++) { const mass_t peak_mass = peaks_[i].mass; const mass_t rel_position = (peak_mass-min_mass)/viz_range; const mass_t left_window = margin + rel_position * window_size; const mass_t right_window = margin + window_size - left_window; const PeakRange pr = findPeaksInRange(peak_mass-left_window,peak_mass+right_window); const float peak_window_prob = peak_window_size /(left_window + right_window); // some freak cases have 0 peak counts (only in unix) const int num_peaks_in_range = (pr.num_peaks>0 ? pr.num_peaks : 1); const float zero_prob = pow((1.0 - peak_window_prob),num_peaks_in_range); if (pr.num_peaks<5) { logRandomProbabilities_[i] = log(1.0-zero_prob) + log_add; } else { vector<float> log_intens; int j; for (j=pr.low_idx; j<=pr.high_idx; j++) log_intens.push_back(logIntensities_[j]); float mean=0,sd=1; calc_mean_sd(log_intens,&mean,&sd); const float e = (logIntensities_[i] - mean)/sd; if (e<0) { logRandomProbabilities_[i] = log(1 - zero_prob) + log_add; } else { const float norm = (one_over_sqr_2pi/ sd) * exp(-0.5*e*e); const float norm_const = (1 - zero_prob) / (one_over_sqr_2pi/ sd); // logRandomProbabilities_[i] = log(norm*norm_const); } } } }
void PMCSQS_Scorer::output_pmc_rank_results(const FileManager& fm, int charge, const vector<SingleSpectrumFile *>& test_ssfs) { BasicSpecReader bsr; static QCPeak peaks[5000]; vector<int> org_offset_counts, new_offset_counts; org_offset_counts.resize(201,0); new_offset_counts.resize(201,0); vector<mass_t> org_offsets; vector<mass_t> corr_offsets; org_offsets.clear(); corr_offsets.clear(); int i; for (i=0; i<test_ssfs.size(); i++) { SingleSpectrumFile* ssf = test_ssfs[i]; BasicSpectrum bs; bs.num_peaks = bsr.read_basic_spec(config,fm,ssf,peaks); bs.peaks = peaks; bs.ssf = ssf; init_for_current_spec(config,bs); calculate_curr_spec_pmc_values(bs, bin_increment); PmcSqsChargeRes res; find_best_mz_values_from_rank_model(bs, charge, config->get_pm_tolerance(),res); ssf->peptide.calc_mass(config); mass_t true_mz = (ssf->peptide.get_mass() + 18.01 + charge)/charge; org_offsets.push_back(true_mz - ssf->m_over_z); corr_offsets.push_back(true_mz - res.mz1); } mass_t m_org,sd_org,m_corr,sd_corr; calc_mean_sd(org_offsets,&m_org, &sd_org); calc_mean_sd(corr_offsets,&m_corr,&sd_corr); cout << "CHARGE: " << charge << endl; cout << "ORG: mean " << m_org << " " << sd_org << endl; cout << "CORR: mean " << m_corr << " " << sd_corr << endl; for (i=0; i<org_offsets.size(); i++) { int org_idx = 100 + int(org_offsets[i] * 20); if (org_idx<0) org_idx = 0; if (org_idx>200) org_idx=200; org_offset_counts[org_idx]++; int new_idx = 100 + int(corr_offsets[i] * 20); if (new_idx<0) new_idx = 0; if (new_idx>200) new_idx=200; new_offset_counts[new_idx]++; } int cum_org=0; int cum_new=0; for (i=0; i<=200; i++) { if (org_offset_counts[i]==0 && new_offset_counts[i]==0) continue; cum_org+=org_offset_counts[i]; cum_new+=new_offset_counts[i]; cout << fixed << setprecision(3) << i*0.05 - 5.0 << "\t" << org_offset_counts[i]/(float)org_offsets.size() << "\t" << new_offset_counts[i]/(float)corr_offsets.size() << "\t" << cum_org/(float)org_offsets.size() << "\t"<< cum_new/(float)corr_offsets.size() << endl; } }