void MapAlignmentAlgorithmPoseClustering::align(const ConsensusMap & map, TransformationDescription & trafo) { // TODO: move this to updateMembers_? (if consensusMap prevails) // TODO: why does superimposer work on consensus map??? const ConsensusMap & map_model = reference_; ConsensusMap map_scene = map; // run superimposer to find the global transformation TransformationDescription si_trafo; superimposer_.run(map_model, map_scene, si_trafo); // apply transformation to consensus features and contained feature // handles for (Size j = 0; j < map_scene.size(); ++j) { //Calculate new RT double rt = map_scene[j].getRT(); rt = si_trafo.apply(rt); //Set RT of consensus feature centroid map_scene[j].setRT(rt); //Set RT of consensus feature handles map_scene[j].begin()->asMutable().setRT(rt); } //run pairfinder to find pairs ConsensusMap result; //TODO: add another 2map interface to pairfinder? std::vector<ConsensusMap> input(2); input[0] = map_model; input[1] = map_scene; pairfinder_.run(input, result); // calculate the local transformation si_trafo.invert(); // to undo the transformation applied above TransformationDescription::DataPoints data; for (ConsensusMap::Iterator it = result.begin(); it != result.end(); ++it) { if (it->size() == 2) // two matching features { ConsensusFeature::iterator feat_it = it->begin(); double y = feat_it->getRT(); double x = si_trafo.apply((++feat_it)->getRT()); // one feature should be from the reference map: if (feat_it->getMapIndex() != 0) { data.push_back(make_pair(x, y)); } else { data.push_back(make_pair(y, x)); } } } trafo = TransformationDescription(data); trafo.fitModel("linear"); }
void InternalCalibration::makeLinearRegression_(std::vector<DoubleReal> & observed_masses, std::vector<DoubleReal> & theoretical_masses) { if (observed_masses.size() != theoretical_masses.size()) { throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "Number of observed and theoretical masses must agree."); } #ifdef DEBUG_CALIBRATION std::ofstream out("calibration_regression.txt"); std::vector<DoubleReal> rel_errors(observed_masses.size(), 0.); // determine rel error in ppm for the two reference masses for (Size ref_peak = 0; ref_peak < observed_masses.size(); ++ref_peak) { rel_errors[ref_peak] = (theoretical_masses[ref_peak] - observed_masses[ref_peak]) / theoretical_masses[ref_peak] * 1e6; out << observed_masses[ref_peak] << "\t" << rel_errors[ref_peak] << "\n"; std::cout << observed_masses[ref_peak] << " " << theoretical_masses[ref_peak] << std::endl; // std::cout << observed_masses[ref_peak]<<"\t"<<rel_errors[ref_peak]<<std::endl; } #endif TransformationDescription::DataPoints data; for (Size i = 0; i < observed_masses.size(); ++i) { data.push_back(std::make_pair(observed_masses[i], theoretical_masses[i])); } trafo_ = TransformationDescription(data); trafo_.fitModel("linear", Param()); #ifdef DEBUG_CALIBRATION // std::cout <<"\n\n---------------------------------\n\n"<< "after calibration "<<std::endl; for (Size i = 0; i < observed_masses.size(); ++i) { DoubleReal new_mass = trafo_.apply(observed_masses[i]); DoubleReal rel_error = (theoretical_masses[i] - (new_mass)) / theoretical_masses[i] * 1e6; std::cout << observed_masses[i] << "\t" << rel_error << std::endl; } #endif }
ExitCodes main_(int, const char **) { String in = getStringOption_("in"), out = getStringOption_("out"); TransformationDescription trafo_in; TransformationXMLFile().load(in, trafo_in); TransformationDescription::DataPoints data; DoubleReal min = getDoubleOption_("min"), max = getDoubleOption_("max"), step = getDoubleOption_("step"); if (max <= min) { data = trafo_in.getDataPoints(); sort(data.begin(), data.end()); max = data.back().first; DoubleReal magnitude = floor(log10(max)); max = Math::ceilDecimal(max, magnitude - 1); if (max <= min) { throw Exception::InvalidParameter(__FILE__, __LINE__, __PRETTY_FUNCTION__, "'min' must be lower than 'max'"); } } data.clear(); for (DoubleReal value = min; value <= max; value += step) { DoubleReal transformed = trafo_in.apply(value); if (out.empty()) { cout << value << '\t' << transformed << endl; } else data.push_back(make_pair(value, transformed)); } if (!out.empty()) { TransformationDescription trafo_out(trafo_in); trafo_out.setDataPoints(data); TransformationXMLFile().store(out, trafo_out); } return EXECUTION_OK; }
void MapAlignmentAlgorithmIdentification::computeTransformations_( vector<SeqToList>& rt_data, vector<TransformationDescription>& transforms, bool sorted) { Int size = rt_data.size(); // not Size because we compare to Ints later transforms.clear(); // filter RT data (remove peptides that elute in several fractions): // TODO // compute RT medians: LOG_DEBUG << "Computing RT medians..." << endl; vector<SeqToValue> medians_per_run(size); for (Int i = 0; i < size; ++i) { computeMedians_(rt_data[i], medians_per_run[i], sorted); } SeqToList medians_per_seq; for (vector<SeqToValue>::iterator run_it = medians_per_run.begin(); run_it != medians_per_run.end(); ++run_it) { for (SeqToValue::iterator med_it = run_it->begin(); med_it != run_it->end(); ++med_it) { medians_per_seq[med_it->first].push_back(med_it->second); } } // get reference retention time scale: either directly from reference file, // or compute consensus time scale bool reference_given = !reference_.empty(); // reference file given if (reference_given) { // remove peptides that don't occur in enough runs: LOG_DEBUG << "Removing peptides that occur in too few runs..." << endl; SeqToValue temp; for (SeqToValue::iterator ref_it = reference_.begin(); ref_it != reference_.end(); ++ref_it) { SeqToList::iterator med_it = medians_per_seq.find(ref_it->first); if ((med_it != medians_per_seq.end()) && (med_it->second.size() + 1 >= min_run_occur_)) { temp.insert(temp.end(), *ref_it); // new items should go at the end } } LOG_DEBUG << "Removed " << reference_.size() - temp.size() << " of " << reference_.size() << " peptides." << endl; temp.swap(reference_); } else // compute overall RT median per sequence (median of medians per run) { LOG_DEBUG << "Computing overall RT medians per sequence..." << endl; // remove peptides that don't occur in enough runs (at least two): LOG_DEBUG << "Removing peptides that occur in too few runs..." << endl; SeqToList temp; for (SeqToList::iterator med_it = medians_per_seq.begin(); med_it != medians_per_seq.end(); ++med_it) { if (med_it->second.size() >= min_run_occur_) { temp.insert(temp.end(), *med_it); } } LOG_DEBUG << "Removed " << medians_per_seq.size() - temp.size() << " of " << medians_per_seq.size() << " peptides." << endl; temp.swap(medians_per_seq); computeMedians_(medians_per_seq, reference_); } if (reference_.empty()) { throw Exception::MissingInformation(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "No reference RT information left after filtering"); } double max_rt_shift = param_.getValue("max_rt_shift"); if (max_rt_shift <= 1) { // compute max. allowed shift from overall retention time range: double rt_min = numeric_limits<double>::infinity(), rt_max = -rt_min; for (SeqToValue::iterator it = reference_.begin(); it != reference_.end(); ++it) { rt_min = min(rt_min, it->second); rt_max = max(rt_max, it->second); } double rt_range = rt_max - rt_min; max_rt_shift *= rt_range; // in the degenerate case of only one reference point, "max_rt_shift" // should be zero (because "rt_range" is zero) - this is covered below } if (max_rt_shift == 0) { max_rt_shift = numeric_limits<double>::max(); } LOG_DEBUG << "Max. allowed RT shift (in seconds): " << max_rt_shift << endl; // generate RT transformations: LOG_DEBUG << "Generating RT transformations..." << endl; LOG_INFO << "\nAlignment based on:" << endl; // diagnostic output Size offset = 0; // offset in case of internal reference for (Int i = 0; i < size + 1; ++i) { if (i == reference_index_) { // if one of the input maps was used as reference, it has been skipped // so far - now we have to consider it again: TransformationDescription trafo; trafo.fitModel("identity"); transforms.push_back(trafo); LOG_INFO << "- " << reference_.size() << " data points for sample " << i + 1 << " (reference)\n"; offset = 1; } if (i >= size) break; // to be useful for the alignment, a peptide sequence has to occur in the // current run ("medians_per_run[i]"), but also in at least one other run // ("medians_overall"): TransformationDescription::DataPoints data; Size n_outliers = 0; for (SeqToValue::iterator med_it = medians_per_run[i].begin(); med_it != medians_per_run[i].end(); ++med_it) { SeqToValue::const_iterator pos = reference_.find(med_it->first); if (pos != reference_.end()) { if (abs(med_it->second - pos->second) <= max_rt_shift) { // found, and satisfies "max_rt_shift" condition! TransformationDescription::DataPoint point(med_it->second, pos->second, pos->first); data.push_back(point); } else { n_outliers++; } } } transforms.push_back(TransformationDescription(data)); LOG_INFO << "- " << data.size() << " data points for sample " << i + offset + 1; if (n_outliers) LOG_INFO << " (" << n_outliers << " outliers removed)"; LOG_INFO << "\n"; } LOG_INFO << endl; // delete temporary reference if (!reference_given) reference_.clear(); }
void MapAlignmentAlgorithmIdentification::computeTransformations_( vector<SeqToList> & rt_data, vector<TransformationDescription> & transforms, bool sorted) { Size size = rt_data.size(); transforms.clear(); // filter RT data (remove peptides that elute in several fractions): // TODO // compute RT medians: LOG_DEBUG << "Computing RT medians..." << endl; vector<SeqToValue> medians_per_run(size); for (Size i = 0; i < size; ++i) { computeMedians_(rt_data[i], medians_per_run[i], sorted); } SeqToList medians_per_seq; for (vector<SeqToValue>::iterator run_it = medians_per_run.begin(); run_it != medians_per_run.end(); ++run_it) { for (SeqToValue::iterator med_it = run_it->begin(); med_it != run_it->end(); ++med_it) { medians_per_seq[med_it->first] << med_it->second; } } // get reference retention time scale: either directly from reference file, // or compute consensus time scale bool reference_given = !reference_.empty(); // reference file given if (reference_given) { // remove peptides that don't occur in enough runs: LOG_DEBUG << "Removing peptides that occur in too few runs..." << endl; SeqToValue temp; SeqToValue::iterator pos = temp.begin(); // to prevent segfault below for (SeqToValue::iterator ref_it = reference_.begin(); ref_it != reference_.end(); ++ref_it) { SeqToList::iterator med_it = medians_per_seq.find(ref_it->first); if ((med_it != medians_per_seq.end()) && (med_it->second.size() + 1 >= min_run_occur_)) { temp.insert(pos, *ref_it); pos = --temp.end(); // would cause segfault if "temp" was empty } } temp.swap(reference_); } else // compute overall RT median per sequence (median of medians per run) { LOG_DEBUG << "Computing overall RT medians per sequence..." << endl; // remove peptides that don't occur in enough runs (at least two): LOG_DEBUG << "Removing peptides that occur in too few runs..." << endl; SeqToList temp; SeqToList::iterator pos = temp.begin(); // to prevent segfault below for (SeqToList::iterator med_it = medians_per_seq.begin(); med_it != medians_per_seq.end(); ++med_it) { if (med_it->second.size() >= min_run_occur_) { temp.insert(pos, *med_it); pos = --temp.end(); // would cause segfault if "temp" was empty } } temp.swap(medians_per_seq); computeMedians_(medians_per_seq, reference_); } DoubleReal max_rt_shift = param_.getValue("max_rt_shift"); if (max_rt_shift == 0) { max_rt_shift = numeric_limits<DoubleReal>::max(); } else if (max_rt_shift <= 1) // compute max. allowed shift from overall retention time range: { DoubleReal rt_range, rt_min = reference_.begin()->second, rt_max = rt_min; for (SeqToValue::iterator it = ++reference_.begin(); it != reference_.end(); ++it) { rt_min = min(rt_min, it->second); rt_max = max(rt_max, it->second); } rt_range = rt_max - rt_min; max_rt_shift *= rt_range; } LOG_DEBUG << "Max. allowed RT shift (in seconds): " << max_rt_shift << endl; // generate RT transformations: LOG_DEBUG << "Generating RT transformations..." << endl; LOG_INFO << "\nAlignment based on:" << endl; // diagnostic output for (Size i = 0, offset = 0; i < size + 1; ++i) { if (i == reference_index_ - 1) { // if one of the input maps was used as reference, it has been skipped // so far - now we have to consider it again: TransformationDescription trafo; trafo.fitModel("identity"); transforms.push_back(trafo); LOG_INFO << "- 0 data points for sample " << i + 1 << " (reference)\n"; offset = 1; } if (i >= size) break; // to be useful for the alignment, a peptide sequence has to occur in the // current run ("medians_per_run[i]"), but also in at least one other run // ("medians_overall"): TransformationDescription::DataPoints data; for (SeqToValue::iterator med_it = medians_per_run[i].begin(); med_it != medians_per_run[i].end(); ++med_it) { SeqToValue::const_iterator pos = reference_.find(med_it->first); if ((pos != reference_.end()) && (fabs(med_it->second - pos->second) <= max_rt_shift)) { // found, and satisfies "max_rt_shift" condition! data.push_back(make_pair(med_it->second, pos->second)); } } transforms.push_back(TransformationDescription(data)); LOG_INFO << "- " << data.size() << " data points for sample " << i + offset + 1 << "\n"; } LOG_INFO << endl; // delete temporary reference if (!reference_given) reference_.clear(); }