void CompNovoIdentificationCID::getIdentification(PeptideIdentification & id, const PeakSpectrum & CID_spec) { //if (CID_spec.getPrecursors().begin()->getMZ() > 1000.0) //{ //cerr << "Weight of precursor has been estimated to exceed 2000.0 Da which is the current limit" << endl; //return; //} PeakSpectrum new_CID_spec(CID_spec); windowMower_(new_CID_spec, 0.3, 1); Param zhang_param; zhang_param = zhang_.getParameters(); zhang_param.setValue("tolerance", fragment_mass_tolerance_); zhang_param.setValue("use_gaussian_factor", "true"); zhang_param.setValue("use_linear_factor", "false"); zhang_.setParameters(zhang_param); Normalizer normalizer; Param n_param(normalizer.getParameters()); n_param.setValue("method", "to_one"); normalizer.setParameters(n_param); normalizer.filterSpectrum(new_CID_spec); Size charge(2); double precursor_weight(0); // [M+H]+ if (!CID_spec.getPrecursors().empty()) { // believe charge of spectrum? if (CID_spec.getPrecursors().begin()->getCharge() != 0) { charge = CID_spec.getPrecursors().begin()->getCharge(); } else { // TODO estimate charge state } precursor_weight = CID_spec.getPrecursors().begin()->getMZ() * charge - ((charge - 1) * Constants::PROTON_MASS_U); } //cerr << "charge=" << charge << ", [M+H]=" << precursor_weight << endl; // now delete all peaks that are right of the estimated precursor weight Size peak_counter(0); for (PeakSpectrum::ConstIterator it = new_CID_spec.begin(); it != new_CID_spec.end(); ++it, ++peak_counter) { if (it->getPosition()[0] > precursor_weight) { break; } } if (peak_counter < new_CID_spec.size()) { new_CID_spec.resize(peak_counter); } static double oxonium_mass = EmpiricalFormula("H2O+").getMonoWeight(); Peak1D p; p.setIntensity(1); p.setPosition(oxonium_mass); new_CID_spec.push_back(p); p.setPosition(precursor_weight); new_CID_spec.push_back(p); // add complement to spectrum /* for (PeakSpectrum::ConstIterator it1 = CID_spec.begin(); it1 != CID_spec.end(); ++it1) { // get m/z of complement double mz_comp = precursor_weight - it1->getPosition()[0] + Constants::PROTON_MASS_U; // search if peaks are available that have similar m/z values Size count(0); bool found(false); for (PeakSpectrum::ConstIterator it2 = CID_spec.begin(); it2 != CID_spec.end(); ++it2, ++count) { if (fabs(mz_comp - it2->getPosition()[0]) < fragment_mass_tolerance) { // add peak intensity to corresponding peak in new_CID_spec new_CID_spec[count].setIntensity(new_CID_spec[count].getIntensity()); } } if (!found) { // infer this peak Peak1D p; p.setIntensity(it1->getIntensity()); p.setPosition(mz_comp); new_CID_spec.push_back(p); } }*/ CompNovoIonScoringCID ion_scoring; Param ion_scoring_param(ion_scoring.getParameters()); ion_scoring_param.setValue("fragment_mass_tolerance", fragment_mass_tolerance_); ion_scoring_param.setValue("precursor_mass_tolerance", precursor_mass_tolerance_); ion_scoring_param.setValue("decomp_weights_precision", decomp_weights_precision_); ion_scoring_param.setValue("double_charged_iso_threshold", (double)param_.getValue("double_charged_iso_threshold")); ion_scoring_param.setValue("max_isotope_to_score", param_.getValue("max_isotope_to_score")); ion_scoring_param.setValue("max_isotope", max_isotope_); ion_scoring.setParameters(ion_scoring_param); Map<double, IonScore> ion_scores; ion_scoring.scoreSpectrum(ion_scores, new_CID_spec, precursor_weight, charge); new_CID_spec.sortByPosition(); /* cerr << "Size of ion_scores " << ion_scores.size() << endl; for (Map<double, IonScore>::const_iterator it = ion_scores.begin(); it != ion_scores.end(); ++it) { cerr << it->first << " " << it->second.score << endl; }*/ #ifdef WRITE_SCORED_SPEC PeakSpectrum filtered_spec(new_CID_spec); filtered_spec.clear(); for (Map<double, CompNovoIonScoringCID::IonScore>::const_iterator it = ion_scores.begin(); it != ion_scores.end(); ++it) { Peak1D p; p.setIntensity(it->second.score); p.setPosition(it->first); filtered_spec.push_back(p); } DTAFile().store("spec_scored.dta", filtered_spec); #endif set<String> sequences; getDecompositionsDAC_(sequences, 0, new_CID_spec.size() - 1, precursor_weight, new_CID_spec, ion_scores); #ifdef SPIKE_IN sequences.insert("AFCVDGEGR"); sequences.insert("APEFAAPWPDFVPR"); sequences.insert("AVKQFEESQGR"); sequences.insert("CCTESLVNR"); sequences.insert("DAFLGSFLYEYSR"); sequences.insert("DAIPENLPPLTADFAEDK"); sequences.insert("DDNKVEDIWSFLSK"); sequences.insert("DDPHACYSTVFDK"); sequences.insert("DEYELLCLDGSR"); sequences.insert("DGAESYKELSVLLPNR"); sequences.insert("DGASCWCVDADGR"); sequences.insert("DLFIPTCLETGEFAR"); sequences.insert("DTHKSEIAHR"); sequences.insert("DVCKNYQEAK"); sequences.insert("EACFAVEGPK"); sequences.insert("ECCHGDLLECADDR"); sequences.insert("EFLGDKFYTVISSLK"); sequences.insert("EFTPVLQADFQK"); sequences.insert("ELFLDSGIFQPMLQGR"); sequences.insert("ETYGDMADCCEK"); sequences.insert("EVGCPSSSVQEMVSCLR"); sequences.insert("EYEATLEECCAK"); sequences.insert("FADLIQSGTFQLHLDSK"); sequences.insert("FFSASCVPGATIEQK"); sequences.insert("FLANVSTVLTSK"); sequences.insert("FLSGSDYAIR"); sequences.insert("FTASCPPSIK"); sequences.insert("GAIEWEGIESGSVEQAVAK"); sequences.insert("GDVAFIQHSTVEENTGGK"); sequences.insert("GEPPSCAEDQSCPSER"); sequences.insert("GEYVPTSLTAR"); sequences.insert("GQEFTITGQKR"); sequences.insert("GTFAALSELHCDK"); sequences.insert("HLVDEPQNLIK"); sequences.insert("HQDCLVTTLQTQPGAVR"); sequences.insert("HTTVNENAPDQK"); sequences.insert("ILDCGSPDTEVR"); sequences.insert("KCPSPCQLQAER"); sequences.insert("KGTEFTVNDLQGK"); sequences.insert("KQTALVELLK"); sequences.insert("KVPQVSTPTLVEVSR"); sequences.insert("LALQFTTNAKR"); sequences.insert("LCVLHEKTPVSEK"); sequences.insert("LFTFHADICTLPDTEK"); sequences.insert("LGEYGFQNALIVR"); sequences.insert("LHVDPENFK"); sequences.insert("LKECCDKPLLEK"); sequences.insert("LKHLVDEPQNLIK"); sequences.insert("LKPDPNTLCDEFK"); sequences.insert("LLGNVLVVVLAR"); sequences.insert("LLVVYPWTQR"); sequences.insert("LRVDPVNFK"); sequences.insert("LTDEELAFPPLSPSR"); sequences.insert("LVNELTEFAK"); sequences.insert("MFLSFPTTK"); sequences.insert("MPCTEDYLSLILNR"); sequences.insert("NAPYSGYSGAFHCLK"); sequences.insert("NECFLSHKDDSPDLPK"); sequences.insert("NEPNKVPACPGSCEEVK"); sequences.insert("NLQMDDFELLCTDGR"); sequences.insert("QAGVQAEPSPK"); sequences.insert("RAPEFAAPWPDFVPR"); sequences.insert("RHPEYAVSVLLR"); sequences.insert("RPCFSALTPDETYVPK"); sequences.insert("RSLLLAPEEGPVSQR"); sequences.insert("SAFPPEPLLCSVQR"); sequences.insert("SAGWNIPIGTLLHR"); sequences.insert("SCWCVDEAGQK"); sequences.insert("SGNPNYPHEFSR"); sequences.insert("SHCIAEVEK"); sequences.insert("SISSGFFECER"); sequences.insert("SKYLASASTMDHAR"); sequences.insert("SLHTLFGDELCK"); sequences.insert("SLLLAPEEGPVSQR"); sequences.insert("SPPQCSPDGAFRPVQCK"); sequences.insert("SREGDPLAVYLK"); sequences.insert("SRQIPQCPTSCER"); sequences.insert("TAGTPVSIPVCDDSSVK"); sequences.insert("TCVADESHAGCEK"); sequences.insert("TQFGCLEGFGR"); sequences.insert("TVMENFVAFVDK"); sequences.insert("TYFPHFDLSHGSAQVK"); sequences.insert("TYMLAFDVNDEK"); sequences.insert("VDEVGGEALGR"); sequences.insert("VDLLIGSSQDDGLINR"); sequences.insert("VEDIWSFLSK"); sequences.insert("VGGHAAEYGAEALER"); sequences.insert("VGTRCCTKPESER"); sequences.insert("VKVDEVGGEALGR"); sequences.insert("VKVDLLIGSSQDDGLINR"); sequences.insert("VLDSFSNGMK"); sequences.insert("VLSAADKGNVK"); sequences.insert("VPQVSTPTLVEVSR"); sequences.insert("VTKCCTESLVNR"); sequences.insert("VVAASDASQDALGCVK"); sequences.insert("VVAGVANALAHR"); sequences.insert("YICDNQDTISSK"); sequences.insert("YLASASTMDHAR"); sequences.insert("YNGVFQECCQAEDK"); #endif SpectrumAlignmentScore spectra_zhang; spectra_zhang.setParameters(zhang_param); vector<PeptideHit> hits; Size missed_cleavages = param_.getValue("missed_cleavages"); for (set<String>::const_iterator it = sequences.begin(); it != sequences.end(); ++it) { Size num_missed = countMissedCleavagesTryptic_(*it); if (missed_cleavages < num_missed) { //cerr << "Two many missed cleavages: " << *it << ", found " << num_missed << ", allowed " << missed_cleavages << endl; continue; } PeakSpectrum CID_sim_spec; getCIDSpectrum_(CID_sim_spec, *it, charge); //normalizer.filterSpectrum(CID_sim_spec); double cid_score = zhang_(CID_sim_spec, CID_spec); PeptideHit hit; hit.setScore(cid_score); hit.setSequence(getModifiedAASequence_(*it)); hit.setCharge((Int)charge); //TODO unify charge interface: int or size? hits.push_back(hit); //cerr << getModifiedAASequence_(*it) << " " << cid_score << " " << endl; } // rescore the top hits id.setHits(hits); id.assignRanks(); hits = id.getHits(); SpectrumAlignmentScore alignment_score; Param align_param(alignment_score.getParameters()); align_param.setValue("tolerance", fragment_mass_tolerance_); align_param.setValue("use_linear_factor", "true"); alignment_score.setParameters(align_param); for (vector<PeptideHit>::iterator it = hits.begin(); it != hits.end(); ++it) { //cerr << "Pre: " << it->getRank() << " " << it->getSequence() << " " << it->getScore() << " " << endl; } Size number_of_prescoring_hits = param_.getValue("number_of_prescoring_hits"); if (hits.size() > number_of_prescoring_hits) { hits.resize(number_of_prescoring_hits); } for (vector<PeptideHit>::iterator it = hits.begin(); it != hits.end(); ++it) { PeakSpectrum CID_sim_spec; getCIDSpectrum_(CID_sim_spec, getModifiedStringFromAASequence_(it->getSequence()), charge); normalizer.filterSpectrum(CID_sim_spec); //DTAFile().store("sim_specs/" + it->getSequence().toUnmodifiedString() + "_sim_CID.dta", CID_sim_spec); //double cid_score = spectra_zhang(CID_sim_spec, CID_spec); double cid_score = alignment_score(CID_sim_spec, CID_spec); //cerr << "Final: " << it->getSequence() << " " << cid_score << endl; it->setScore(cid_score); } id.setHits(hits); id.assignRanks(); hits = id.getHits(); for (vector<PeptideHit>::iterator it = hits.begin(); it != hits.end(); ++it) { //cerr << "Fin: " << it->getRank() << " " << it->getSequence() << " " << it->getScore() << " " << endl; } Size number_of_hits = param_.getValue("number_of_hits"); if (id.getHits().size() > number_of_hits) { hits.resize(number_of_hits); } id.setHits(hits); id.assignRanks(); return; }
int single_map_align_backbone (Descr *descr1, Protein * protein1, Representation *rep1, Descr *descr2, Protein * protein2, Representation *rep2, Map * map) { /* for now, we will just postprocess the best map */ int no_res_1 = protein1->length, no_res_2= protein2->length; int resctr1, resctr2; int map_size; int *residue_map_i2j, *residue_map_j2i; int *type_1, *type_2; int longest_element_length = (protein1->length > protein2->length) ? protein1->length : protein2->length; double d0 = options.distance_tol_in_bb_almt; double aln_score, rmsd; double ** similarity; double ** sim_in_element; double **x, **y; double **R, T[3], q[4]; double total_score = 0; /* for the MC: */ int max_no_steps = 20, no_steps = 0; int done = 0, toggle = 0; double *current_q, *old_q, *current_T, *old_T; double *best_q, *best_T; double old_score = total_score, current_score = 0.0, d_mc = 0.5; double max_score; double t_mc, d_init; int alignment_size (int * residue_map_i2j, int no_res_1 ); int closeness_score_for_sse_almt (Descr *descr1, Representation *rep1, Representation *rep2, Map * map, Protein *protein1, Protein *protein2, double **R, double *T, double d0, double ** similarity, double * score_ptr); int following_loop (int *element_begin, int *element_end, int no_of_elements, int no_of_res, int element_ctr, int * first_res, int * last_res); int map2rotation (Protein *protein1, Protein *protein2, int *residue_map_i2j, double **x, double **y, double *q, double *T, double *rmsd); int out_of_order_alignment (Descr *descr1, Descr *descr2, Map *map, int *element_1_begin, int *element_1_end, int *element_2_begin, int *element_2_end, int longest_element_length, double ** similarity, double ** sim_in_element, int *residue_map_i2j, int *residue_map_j2i, double * score_ptr); int preceding_loop (int *element_begin, int *element_end, int element_ctr, int * first_res, int * last_res); if ( ! (R=dmatrix(3,3) ) ) return 1; /* compiler is bugging me otherwise */ construct_translation_vecs (rep1, rep2, map); /* make sure that we have all the info we might need */ /* define matrix, the size of nr of residues in set of SSEs x nr of residues in the other set of SSEs, and fill it with -1 */ similarity = dmatrix (no_res_1, no_res_2); if ( !similarity ) return 1; sim_in_element = dmatrix (no_res_1, no_res_2); if ( !similarity ) return 1; for (resctr1=0; resctr1<no_res_1; resctr1++) { for (resctr2=0; resctr2<no_res_2; resctr2++) { similarity[resctr1][resctr2] = -1; } } /* alloc */ type_1 = protein1->sse_sequence; type_2 = protein2->sse_sequence; if ( ! (residue_map_i2j = emalloc (no_res_1*sizeof(int))) ) return 1; if ( ! (residue_map_j2i = emalloc (no_res_2*sizeof(int))) ) return 2; if ( ! (x = dmatrix (3, no_res_1+no_res_2))) exit(1); if ( ! (y = dmatrix (3, no_res_1+no_res_2))) exit(1); /*********************************************************************/ /*********************************************************************/ /* aliases */ int *element_1_begin, *element_1_end; /* "element" here means SSE */ int *element_2_begin, *element_2_end; element_1_begin = protein1->element_begin; element_1_end = protein1->element_end; element_2_begin = protein2->element_begin; element_2_end = protein2->element_end; /************************************************************/ /************************************************************/ /* ALIGNMENT, round 1 */ /************************************************************/ /* for all mapped blocks calculate similarity as exp (-d/d0) */ total_score = 0.0; quat_to_R (map->q, R); closeness_score_for_sse_almt (descr1, rep1, rep2, map, protein1, protein2, R, NULL, d0, similarity, &total_score); /* run Smith-Waterman and use the mapped CA to find the transformation */ /* I have another copy of SW here (the first one is in struct_map.c) */ /* so I wouldn't fumble with parameters - the two should be joined eventually */ if ( options.search_algorithm == SEQUENTIAL ) { smith_waterman_2 (no_res_1, no_res_2, similarity, residue_map_i2j, residue_map_j2i, &aln_score); } else { out_of_order_alignment (descr1, descr2, map, element_1_begin, element_1_end, element_2_begin, element_2_end, longest_element_length, similarity, sim_in_element, residue_map_i2j, residue_map_j2i, &aln_score); } map2rotation (protein1, protein2, residue_map_i2j, x, y, q, T, &rmsd); quat_to_R (q, R); current_score = alignment_score (protein1, protein2, residue_map_i2j, R, T, d0); /*********************************************************/ /* fiddle iteratively with the transformation */ if ( ! (current_q = emalloc (4*sizeof(double)) )) return 1; if ( ! (old_q = emalloc (4*sizeof(double)) )) return 1; if ( ! (best_q = emalloc (4*sizeof(double)) )) return 1; if ( ! (current_T = emalloc (3*sizeof(double)) )) return 1; if ( ! (old_T = emalloc (3*sizeof(double)) )) return 1; if ( ! (best_T = emalloc (3*sizeof(double)) )) return 1; srand48 (time (0)); memcpy (current_q, q, 4*sizeof(double)); memcpy ( old_q, q, 4*sizeof(double)); memcpy ( best_q, q, 4*sizeof(double)); memcpy ( old_T, T, 3*sizeof(double)); memcpy ( best_T, T, 3*sizeof(double)); memcpy (current_T, T, 3*sizeof(double)); quat_to_R ( current_q, R); d_init = d0; /* t_mc = exp ( (1.0- (double)anneal_round)/10.0); */ d_mc = d_init; t_mc = 5; memcpy (current_q, best_q, 4*sizeof(double)); memcpy (current_T, best_T, 3*sizeof(double)); memcpy (old_q, best_q, 4*sizeof(double)); memcpy (old_T, best_T, 3*sizeof(double)); old_score = 0; max_score = 0; no_steps = 0; toggle = 1; done = 0; while (no_steps < max_no_steps && !done ) { closeness_score_for_sse_almt (descr1, NULL, NULL, map, protein1, protein2, R, current_T, d_mc, similarity, &total_score); if ( options.search_algorithm == SEQUENTIAL ) { smith_waterman_2 (no_res_1, no_res_2, similarity, residue_map_i2j, residue_map_j2i, &aln_score); } else { out_of_order_alignment (descr1, descr2, map, element_1_begin, element_1_end, element_2_begin, element_2_end, longest_element_length, similarity, sim_in_element, residue_map_i2j, residue_map_j2i, &aln_score); } map2rotation (protein1, protein2, residue_map_i2j, x, y, current_q, current_T, &rmsd); quat_to_R ( current_q, R); current_score = alignment_score (protein1, protein2, residue_map_i2j, R, current_T, d_mc); if ( current_score > max_score ) { max_score = current_score; memcpy (best_q, current_q, 4*sizeof(double)); memcpy (best_T, current_T, 3*sizeof(double)); } if (old_score) done = ( fabs(old_score-current_score)/old_score < 0.01); old_score = current_score; no_steps++; } memcpy (q, best_q, 4*sizeof(double)); memcpy (T, best_T, 3*sizeof(double)); free (current_q); free (old_q); free (best_q); free (current_T); free (old_T); free (best_T); /************************************************************/ /************************************************************/ /* ALIGNMENT, round 2 */ /************************************************************/ /************************************************************/ /* find the similarity matrix for this new rotation -- this*/ /* time extending to neighboring elements */ closeness_score_for_bb_almt (map, protein1, protein2, R, T, d0, similarity, &total_score); /************************************************************/ memset (residue_map_i2j, 0, no_res_1*sizeof(int)); memset (residue_map_j2i, 0, no_res_2*sizeof(int)); if ( options.search_algorithm == SEQUENTIAL ) { smith_waterman_2 (no_res_1, no_res_2, similarity, residue_map_i2j, residue_map_j2i, &aln_score); } else { out_of_order_alignment (descr1, descr2, map, element_1_begin, element_1_end, element_2_begin, element_2_end, longest_element_length, similarity, sim_in_element, residue_map_i2j, residue_map_j2i, &aln_score); } map2rotation (protein1, protein2, residue_map_i2j, x, y, q, T, &rmsd); quat_to_R (q, R); //aln_score = alignment_score (protein1, protein2, residue_map_i2j, R, T, d0); map_size = alignment_size (residue_map_i2j, protein1->length); memcpy (&(map->q[0]), &q[0], 4*sizeof(double)); memcpy (&(map->T[0]), &T[0], 3*sizeof(double)); map->x2y_residue_level = residue_map_i2j; map->y2x_residue_level = residue_map_j2i; map->x2y_residue_l_size = no_res_1; map->y2x_residue_l_size = no_res_2; /*************************************************************************/ map->res_almt_length = map_size; map->aln_score = aln_score; map->res_rmsd = rmsd; free_dmatrix(R); free_dmatrix (similarity); free_dmatrix (sim_in_element); free_dmatrix (x); free_dmatrix (y); return 0; }