Esempio n. 1
0
bool SeqPath::check_if_correct(const string& str, const Config *config) const
{
	const vector<mass_t>& aa2mass = config->get_aa2mass();
	const char *path_str = seq_str.c_str();
	const char *corr_str = str.c_str();

	int len_path_str = strlen(path_str);
	int len_corr_str = strlen(corr_str);

	if (len_path_str>len_corr_str)
		return false;

	
	int i;
	for (i=0; i<=len_corr_str-len_path_str; i++)
	{
		int j;
		bool correct_seq = true;
		for (j=0; j<len_path_str; j++)
			if (! (path_str[j] == corr_str[i+j] ||
				  (path_str[j] == 'I' && corr_str[i+j]== 'L') ||
				  (path_str[j] == 'L' && corr_str[i+j]== 'I') ||
				  (path_str[j] == 'Q' && corr_str[i+j]== 'K') ||
				  (path_str[j] == 'K' && corr_str[i+j]== 'Q') ) )
			{
				correct_seq = false;
				break;
			}



		if (correct_seq)
		{
			// check prefix mass
			Peptide pep;
			pep.parseFromString(config,corr_str);
			const vector<int>& aas= pep.get_amino_acids();
			mass_t mass=0;
			int j;

			if (n_term_mass == 0 && i==0)
				return true;

			for (j=0; j<aas.size(); j++)
			{
				mass+=aa2mass[aas[j]];
				if (fabs(mass-this->n_term_mass)<6)
					return true;

				if (mass>n_term_mass)
					break;
			}
		}
	}

	
	
	return false;
}
Esempio n. 2
0
PepData create5PeptideData()
{
    using Peptide::Formula;
    PepData peptides;
    peptides.push_back(make_pair("Angiotensin II", Formula(50, 71, 13, 12)));
    peptides.push_back(make_pair("Bombesin", Formula(71, 110, 24, 18, 1)));
    peptides.push_back(make_pair("Substance P", Formula(63, 98, 18, 13, 1)));
    peptides.push_back(make_pair("Neurotensin", Formula(78, 121, 21, 20)));
    peptides.push_back(make_pair("Alpha1-6", Formula(45, 59, 11, 8)));
    return peptides; 
}
Esempio n. 3
0
    inline vector<DigestedPeptide> find_all(const Peptide& peptide)
    {
        typedef boost::iterator_range<string::const_iterator> const_string_iterator_range;

        digest(); // populate sites_ member if necessary

        const string& sequence_ = peptide_.sequence();

        vector<DigestedPeptide> result;

        if ((int) peptide.sequence().length() > config_.maximumLength ||
            (int) peptide.sequence().length() < config_.minimumLength)
            return result;

        vector<const_string_iterator_range> instances;
        bal::find_all(instances, sequence_, peptide.sequence());

        BOOST_FOREACH(const_string_iterator_range& range, instances)
        {
            size_t beginOffset = range.begin() - sequence_.begin();
            size_t endOffset = beginOffset + peptide.sequence().length() - 1;

            bool NTerminusIsSpecific = sitesSet_.count(int(beginOffset) - 1) > 0;
            bool CTerminusIsSpecific = sitesSet_.count(int(endOffset)) > 0;

            if (((size_t) NTerminusIsSpecific + (size_t) CTerminusIsSpecific) < (size_t) config_.minimumSpecificity)
                continue;

            size_t missedCleavages = 0;
            for (size_t i = beginOffset; i < endOffset; ++i)
                if (sitesSet_.count((int) i) > 0)
                    ++missedCleavages;

            if (missedCleavages > (size_t) config_.maximumMissedCleavages)
                continue;

            string NTerminusPrefix, CTerminusSuffix;
            if (beginOffset > 0)
			    NTerminusPrefix = sequence_.substr(beginOffset-1, 1);
		    if (endOffset+1 < sequence_.length())
			    CTerminusSuffix = sequence_.substr(endOffset+1, 1);

            result.push_back(DigestedPeptide(peptide,
                                             beginOffset,
                                             missedCleavages,
                                             NTerminusIsSpecific,
                                             CTerminusIsSpecific,
                                             NTerminusPrefix,
                                             CTerminusSuffix));
        }
Esempio n. 4
0
//count and return the total number inter- peptide strand hydrogen bonds in a snapshot
int countInterpeptideHB(vector<Peptide*> &peptides, double box[3]){
	int totalNumInter=0;
	int totalNumPep = peptides.size();

	for(int i=0; i < totalNumPep; i++) {
		Peptide* pepA = peptides.at(i);
		for(int n=0; n < numRes; n++) {
			double NH_pepA[3], H_pepA[3], O_pepA[3], C_pepA[3];
			pepA->getNH(n,NH_pepA);
			pepA->getHofNH(n,H_pepA);
			pepA->getO(n,O_pepA);
			pepA->getC(n,C_pepA);
			for(int j = i+1; j < totalNumPep; j++) {
				Peptide* pepB = peptides.at(j);
				for(int m=0; m < numRes; m++) {
					if(pepB->isHbondedNH(m,O_pepA, C_pepA, box)){
						totalNumInter++;
					}
					if(pepB->isHbondedCO(m,NH_pepA, H_pepA, box)){
						totalNumInter++;
					}
				}
			}
		}
	}
	return totalNumInter;
}
Esempio n. 5
0
int  MultiPath::get_num_correct_aas(const PrmGraph& prm, const Peptide& p, Config *config) const
{
	const mass_t tolerance = config->getTolerance() * 1.25;
	vector<mass_t> break_masses;
	int idx=0;
	int num_correct=0;
	int i;

	p.calc_expected_breakage_masses(config,break_masses);

	for (i=0; i<breakages.size(); i++)
	{
		const mass_t& mass = breakages[i]->mass;
		const mass_t max_mass = mass + tolerance;
		const mass_t min_mass = mass - tolerance;

		while (idx < break_masses.size() && break_masses[idx] < min_mass)
			idx++;

		if (break_masses[idx]>max_mass)
			continue;
		
		if (idx<breakages.size()-1 && edge_idxs[idx]>=0)
			num_correct += prm.get_multi_edge(edge_idxs[idx]).num_aa;
	}
	return num_correct;
}
std::map<int, double> IonizationEfficiencySimulator::calc_charge_distribution(const Peptide &peptide) {
	std::map<int,double> charge_to_percentage;


	int num_basic = 1+peptide.num_basic_residues();
	boost::math::binomial bd(num_basic, .7+(g_uniform_distribution(g_rng)*.3)); // success rate in [0.7,1.0]

	for (int i = 1; i <= num_basic; i++) {
		charge_to_percentage[i] = boost::math::pdf(bd,i);
		//std::cout << i << " " << num_basic << " " << boost::math::pdf(bd,i) << std::endl;
	}

	return charge_to_percentage;
}
Esempio n. 7
0
int Peptide::calc_number_of_correct_aas(Config *config,const Peptide& other) const
{
	const vector<int>& other_amino_acids = other.get_amino_acids();
	const int num_aas = amino_acids.size();
	const int num_other_aas = other_amino_acids.size();

	vector<mass_t> this_breakages,other_breakages;

	calc_expected_breakage_masses(config,this_breakages);
	other.calc_expected_breakage_masses(config,other_breakages);

	int this_idx=0;
	int other_idx=0;
	int num_correct_aas=0;

	while (this_idx<num_aas && other_idx<num_other_aas)
	{
		if (fabs(this_breakages[this_idx]-other_breakages[other_idx])<1.0)
		{
			if (amino_acids[this_idx]==other_amino_acids[other_idx])
				num_correct_aas++;

			this_idx++;
			other_idx++;
		}
		else
		{
			if (this_breakages[this_idx]<other_breakages[other_idx])
			{
				this_idx++;
			}
			else
				other_idx++;
		}
	}
	return num_correct_aas;
}
Esempio n. 8
0
int SeqPath::get_num_correct_aas(const Peptide& pep, const Config *config) const
{
	const vector<mass_t>& aa2mass = config->get_aa2mass();
	const vector<int>& pep_aas = pep.get_amino_acids();

	int num_correct=0;
	int i;

	vector<mass_t> pep_masses;
	vector<int> path_aas;
	
	get_amino_acids(path_aas);

	pep_masses.resize(pep_aas.size(),0);
	for (i=1; i<pep_aas.size(); i++)
		pep_masses[i]=pep_masses[i-1]+aa2mass[pep_aas[i-1]];

	mass_t path_mass = n_term_mass;
	for (i=0; i<path_aas.size(); i++)
	{
		const int path_aa = path_aas[i];
		int j;
		for (j=0; j<pep_aas.size(); j++)
		{
			const int pep_aa = pep_aas[j];
			if (fabs(pep_masses[j]-path_mass)<1.0 && pep_aas[j] == path_aas[i])
			{
				num_correct++;
				break;
			}
		}

		path_mass += aa2mass[path_aas[i]];
	}

	return num_correct;
}
Esempio n. 9
0
bool MultiPath::check_if_correct(const Peptide& p, Config *config) const
{
	const mass_t tolerance = config->getTolerance() * 1.25;
	vector<mass_t> break_masses;
	int idx=0;
	int i;

	p.calc_expected_breakage_masses(config,break_masses);

	for (i=0; i<breakages.size(); i++)
	{
		const mass_t& mass = breakages[i]->mass;
		const mass_t max_mass = mass + tolerance;
		const mass_t min_mass = mass - tolerance;

		while (idx < break_masses.size() && break_masses[idx] < min_mass)
			idx++;

		if (break_masses[idx]>max_mass)
			return false;
	}

	return true;
}
Esempio n. 10
0
void AllScoreModels::predict_fragmentation( const char* input_file, size_t num_peaks)
{
	FILE* stream = fopen(input_file,"r");
	if (! stream)
	{
		cout << "Error: couldn't open file for reading: " << input_file << endl;
		exit(1);
	}

	PeptideRankScorer *dnv_rank = (PeptideRankScorer *)get_rank_model_ptr(1);
	PeakRankModel *prm = this->get_peak_prediction_model_ptr(3);

	char buffer[128];
	char pep_str[128];
	while (fgets(buffer,128,stream))
	{
		int charge;

		if (sscanf(buffer,"%s %d",pep_str,&charge) != 2)
			continue;

		cout << ">> " << pep_str << "\t" << charge << endl;
		if (charge<1 || charge>=prm->get_size_thresholds().size())
		{
			cout << "Invalid charge!" << endl;
			continue;
		}

		Peptide pep;
		pep.parseFromString(&config_,static_cast<string>(pep_str));


		PeptideSolution sol;
		sol.pep = pep;
		sol.reaches_n_terminal=true;
		sol.reaches_c_terminal=true;
		sol.charge = charge;
		sol.pm_with_19 = pep.get_mass_with_19();

		PeptidePeakPrediction ppp;
		prm->calc_peptide_predicted_scores(sol, ppp);

		const size_t num_frags = ppp.frag_idxs.size();
		vector< vector<int> > predicted_ranks;
		calc_combined_peak_ranks(ppp.rank_scores, predicted_ranks);

		vector<PeakTuple> tuples;
		for (size_t f=0; f<num_frags; f++)
			for (size_t i=0; i<ppp.rank_scores[f].size(); i++)
				if (predicted_ranks[f][i]<999)
				{
					PeakTuple pt;
					pt.frag_idx = f;
					pt.pos =i;
					pt.rank = predicted_ranks[f][i];
					pt.score = ppp.rank_scores[f][i];
					tuples.push_back(pt);
				}
		
		sort(tuples.begin(),tuples.end());

		if (tuples.size()<1)
			continue;

		const size_t num_aas = pep.get_num_aas();
		vector<mass_t> breakage_masses;
		pep.calc_expected_breakage_masses(&config_, breakage_masses);

		cout << fixed << "Rank\tIon\tm/z\tScore" << endl;
		for (size_t i=0; i<num_peaks && i<tuples.size(); i++)
		{
			PeakTuple pt = tuples[i];
			cout << i+1 << "\t";
			const FragmentType& ft = config_.get_fragment(ppp.frag_idxs[pt.frag_idx]);
			cout << ft.label << ":" << (ft.orientation == PREFIX ? pt.pos : num_aas - pt.pos) << "\t";

			mass_t mz =  ft.calc_expected_mass(breakage_masses[pt.pos],pep.get_mass_with_19());
			cout << setprecision(2);
			if (mz<100)
				cout << " ";
			if (mz<1000)
				cout << " ";

			cout << mz << "\t";
			cout << setprecision(3) << pt.score << endl;
		}
		cout << endl;
	}

	fclose(stream);
}
Esempio n. 11
0
void AdvancedScoreModel::score_peptide_node_combos(PrmGraph *prm, const Peptide& peptide ) const
{
	const vector<int>& org_aas		= config.get_org_aa();
	const vector<mass_t>& aa2mass	= config.get_aa2mass();
	const vector<MultiEdge>& multi_edges = prm->get_multi_edges();
	const int num_nodes		   = prm->get_num_nodes();
	const vector<int>& pep_aas = peptide.get_amino_acids();
	const int num_pep_aas = pep_aas.size();

	mass_t p_mass=0;
	int aa_idx=0;

	int i;
	for (i=0; i<num_nodes; i++)
	{
		Node& node = prm->get_non_const_node(i);
		const RegionalScoreModel& score_model = 
			regional_breakage_score_models[prm->get_charge()][prm->get_size_idx()][node.breakage.region_idx];

		int in_edge_idx=NEG_INF,  in_edge_variant=NEG_INF;
		int out_edge_idx=NEG_INF, out_edge_variant=NEG_INF;

	//	cout << "N: " << node.mass << endl;
		while (aa_idx<pep_aas.size() && fabs(p_mass-node.mass)>0.1)
		{
			p_mass += aa2mass[pep_aas[aa_idx]];
			aa_idx++;
	//		cout << aa_idx << "\t" << p_mass << endl;
		}
		
		if (aa_idx == pep_aas.size() && i != num_nodes-1)
		{
			int j;
			for (j=0; j<num_nodes; j++)
				cout << j << "\t" << prm->get_node(j).mass << endl;

			cout << endl << "PEP:" << endl;
			vector<mass_t> exp_masses;
			peptide.calc_expected_breakage_masses((Config *)&config,exp_masses);
			for (j=0; j<exp_masses.size(); j++)
				cout << j << "\t" << exp_masses[j] << endl;

			cout << "Error: mismatch between nodes and peptide!" << endl;
			exit(1);
		}

		if (node.in_edge_idxs.size()>0)
		{
			int j;
			for (j=0; j<node.in_edge_idxs.size(); j++)
			{
				const int edge_idx = node.in_edge_idxs[j];
				const MultiEdge& in_edge = multi_edges[edge_idx];
				const int num_aa = in_edge.num_aa;

				if (num_aa>aa_idx)
					continue;

				const int var_idx = in_edge.get_variant_idx(num_aa,&pep_aas[aa_idx-num_aa]);
				if (var_idx<0)
					continue;

				in_edge_idx = edge_idx;
				in_edge_variant = var_idx;
				break;
			}
		}

		if (node.out_edge_idxs.size()>0)
		{
			int j;
			for (j=0; j<node.out_edge_idxs.size(); j++)
			{
				const int edge_idx = node.out_edge_idxs[j];
				const MultiEdge& out_edge = multi_edges[edge_idx];
				const int num_aa = out_edge.num_aa;

				if (num_aa + aa_idx >num_pep_aas)
					continue;

				const int var_idx = out_edge.get_variant_idx(num_aa,&pep_aas[aa_idx]);
				if (var_idx<0)
					continue;

				out_edge_idx = edge_idx;
				out_edge_variant = var_idx;
				break;
			}
		}


		BreakageInfo info;
		prm->fill_breakage_info(this,&info,i,in_edge_idx,in_edge_variant,out_edge_idx,out_edge_variant);
	
		node.score_combos.clear();

	//	cout << in_edge_idx << " " << in_edge_variant << " " << out_edge_idx << " " << out_edge_variant << "\t";

		info.score = score_model.score_a_single_breakage_combo(prm, node, &node.breakage, info);
	
		node.score_combos[ScoreComboLoc(info)]=info.score;
		node.score = info.score; 
		node.breakage.score = node.score;

	//	cout << node.score << endl;
	}
	prm->set_has_node_combo_scores(true);
}
// readFromCurFile - reads one .ms2 file
void SpectraSTMs2LibImporter::readFromFile(string& impFileName) {

  
  ifstream fin;
  if (!myFileOpen(fin, impFileName)) {
    g_log->error("CREATE", "Cannot open .ms2 file \"" + impFileName + "\" for reading. File skipped.");
    return;
  }
  
  g_log->log("MS2 IMPORT", "Importing .ms2 file \"" + impFileName + "\"."); 
  
  if (g_verbose) {
    cout << "\nImporting spectra from .ms2 library file..." << endl;
  } 
  
  // start the progress count
  ProgressCount pc(!g_quiet && !g_verbose, 500, 0);
  pc.start("\nImporting spectra from .ms2 library file");
  
  string line("");
  unsigned int scan1 = 0;
  unsigned int scan2 = 0;
  double precursorMz = 0.0;
  int charge = 1;
  double mw = 0.0;
  
  string dummy("");
  string seq("");
  string modifiedSeq("");
  
  SpectraSTPeakList* peakList = NULL;
   
  // looks like their cysteines are actually C[160]'s
  // Peptide::addModTokenToTables("C", "Carbamidomethyl");
  
  while (nextLine(fin, line)) {
  
    if (line.empty()) continue;
    
    string::size_type pos = 0; 
    
    if (line[0] == 'H') {
      continue;
    
    } else if (line[0] == 'S') {
      if (peakList) {
        if (peakList->getNumPeaks() == 0 || seq.empty()) {
          delete peakList;
        } else {
          
          pc.increment();
	  if (!(modifiedSeq.empty())) seq = modifiedSeq;
          Peptide* pep = new Peptide(seq, charge);
	  
	  // check legality of peptide and mod strings -- will not insert later if illegal (parsing will continue anyway)
          if (pep->hasUnknownMod) {
	    g_log->error("MS2 IMPORT", "Peptide ID with unknown modification: \"" + seq + "\". Entry skipped.");
	  }
	  if (pep->illegalPeptideStr && !pep->hasUnknownMod) {
	    g_log->error("MS2 IMPORT", "Illegal peptide ID string: \"" + seq + "\". Entry skipped.");
          }
	  
          stringstream commentss;
          commentss << "Fullname=X." << seq << ".X/" << charge;
          commentss << " ScanNum=" << scan1 << '.' << scan2;
          commentss << " Spec=Raw";
          
          SpectraSTLibEntry* entry = new SpectraSTLibEntry(pep, commentss.str(), "Normal", peakList); 
          
          if (g_verbose) {
            cout << "Importing record " << m_count << ": " << pep->interactStyleWithCharge() << endl;
          }
          m_count++;

          if (passAllFilters(entry)) {
	    entry->annotatePeaks();
	    m_lib->insertEntry(entry);
          }
          delete (entry);
        }
        
      } 
      
      seq = "";
      modifiedSeq = "";
      peakList = NULL;
      scan1 = atoi(nextToken(line, 1, pos, " \t\r\n", " \t\r\n").c_str());
      scan2 = atoi(nextToken(line, pos, pos, " \t\r\n", " \t\r\n").c_str());
      precursorMz = atof(nextToken(line, pos, pos, " \t\r\n", " \t\r\n").c_str());
      
    } else if (line[0] == 'Z') {
      
      charge = atoi(nextToken(line, 1, pos, " \t\r\n", " \t\r\n").c_str());
      mw = atof(nextToken(line, pos, pos, " \t\r\n", " \t\r\n").c_str());
  
    } else if (line[0] == 'D') {
      dummy = nextToken(line, 1, pos, "\t\r\n", " \t\r\n");
      if (dummy == "seq") {
        seq = nextToken(line, pos, pos, "\r\n", " \t\r\n");
      } else if (dummy == "modified seq") {
        modifiedSeq = nextToken(line, pos, pos, "\r\n", " \t\r\n");
      }
 	
    } else {
      // should be a peak
      if (!peakList) {
        peakList = new SpectraSTPeakList(precursorMz, 0);
      }
      double mz = atof(nextToken(line, 0, pos, " \t\r\n", " \t\r\n").c_str());
      float intensity = atof(nextToken(line, pos, pos, " \t\r\n", " \t\r\n").c_str());
      peakList->insert(mz, intensity, "", "");
        
    }
    
    
  }
    	
  // finish last record
  if (peakList) {
    if (peakList->getNumPeaks() == 0 || seq.empty()) {
      delete peakList;
    } else {
          
      pc.increment();
      if (!(modifiedSeq.empty())) seq = modifiedSeq;
      Peptide* pep = new Peptide(seq, charge);
      if (pep->hasUnknownMod) {
	g_log->error("MS2 IMPORT", "Peptide ID with unknown modification: \"" + seq + "\". Entry skipped.");
      }
      if (pep->illegalPeptideStr && !pep->hasUnknownMod) {
	g_log->error("MS2 IMPORT", "Illegal peptide ID string: \"" + seq + "\". Entry skipped.");
      }
      stringstream commentss;
      commentss << "Fullname=X." << seq << ".X/" << charge;
      commentss << " ScanNum=" << scan1 << '.' << scan2;
      commentss << " Spec=Raw";
          
      SpectraSTLibEntry* entry = new SpectraSTLibEntry(pep, commentss.str(), "Normal", peakList); 
          
      if (g_verbose) {
        cout << "Importing record " << m_count << ": " << pep->interactStyleWithCharge() << endl;
      }
      m_count++;

      if (passAllFilters(entry)) {
	entry->annotatePeaks();
        m_lib->insertEntry(entry);
      }
      delete (entry);
    }
        
  } 
      
 
  pc.done();
}
Esempio n. 13
0
bool InspectResultsLine::parse_from_fields(Config *config,
										   const vector<string>& fields)
{
	if (fields.size() < 20)
	{
		cout<< "Error: inspect results line has " << fields.size() << ", expecting 20-22" << endl;
		exit(1);
	}

	SpectrumFile = fields[0];

	if (sscanf(fields[1].c_str(),"%d",&scan) != 1 ||
		scan<0 || scan>100000000)
		error("scan");

	Annotation = fields[2];
	Protein	   = fields[3];

	if (sscanf(fields[4].c_str(),"%d",&Charge) != 1 ||
		Charge<0 || Charge>20)
		error("Charge");

	if (sscanf(fields[5].c_str(),"%f",&MQScore) != 1 ||
		MQScore<NEG_INF || MQScore>POS_INF)
		error("MQScore");
	
	if (sscanf(fields[6].c_str(),"%d",&Length) != 1 ||
		Length<1 || Length>POS_INF)
		error("Length");
	
	if (sscanf(fields[7].c_str(),"%f",&TotalPRMScore) != 1 ||
		TotalPRMScore<NEG_INF || TotalPRMScore>POS_INF)
		error("TotalPRMScore");

	if (sscanf(fields[8].c_str(),"%f",&MedianPRMScore) != 1 ||
		MedianPRMScore<NEG_INF || MedianPRMScore>POS_INF)
		error("MedianPRMScore");

	if (sscanf(fields[9].c_str(),"%f",&FractionY) != 1 ||
		FractionY<0 || FractionY>1000)
		error("FractionY");

	if (sscanf(fields[10].c_str(),"%f",&FractionB) != 1 ||
		FractionB<0 || FractionB>1000)
		error("FractionB");

	if (sscanf(fields[11].c_str(),"%f",&Intensity) != 1 ||
		Intensity<0)
		error("Intensity");

	if (sscanf(fields[12].c_str(),"%d",&NTT) != 1 ||
		NTT<0 || NTT>3)
		error("NTT");

	if (sscanf(fields[13].c_str(),"%f",&p_value) != 1)
		error("p_value");

	if (sscanf(fields[14].c_str(),"%f",&F_Score) != 1)
		error("F_Score");

	if (sscanf(fields[15].c_str(),"%f",&DeltaScore) != 1)
		error("DeltaScore");

	if (sscanf(fields[16].c_str(),"%f",&DeltaScoreOther) != 1)
		error("DeltaScoreOther");

	if (sscanf(fields[17].c_str(),"%d",&RecordNumber) != 1)
		error("RecordNumber");

	if (sscanf(fields[18].c_str(),"%d",&DBFilePos) != 1)
		error("DBFilePos");

	if (sscanf(fields[19].c_str(),"%d",&SpecFilePos) != 1)
		error("SpecFilePos");

	if (fields.size()>20 && sscanf(fields[20].c_str(),"%f",&PrecursorMz) != 1)
		error("SpecFilePos");

	if (fields.size()>21 && sscanf(fields[21].c_str(),"%f",&PrecursorError) != 1)
		error("SpecFilePos");

	Score = MQScore;

	const vector<int>& char2aa = config->get_char2aa();
	const int ann_length = Annotation.length();

	if ((Annotation[1] != '.') || (Annotation[ann_length-2] != '.'))
	{
		cout << "Error: bad annotation format: " << Annotation << endl;
		cout << "Expecting X.XXXXXXXXX.X" << endl;
		cout << "Ann1   : " << Annotation[1] << endl;
		cout << "Ann n-2: " << Annotation[ann_length-2] << endl;
		exit(1);
	}

//	cout << "|" << Annotation << "|" << endl;
	aaBefore = char2aa[Annotation[0]];
	aaAfter	 = char2aa[Annotation[ann_length-1]];

	pep.parseFromString(config,Annotation.substr(2,ann_length-4));
	
	return true;
}
Esempio n. 14
0
int main(int argc, char* argv[]) {
	
	if(argc < 5){
		cerr<<"usage: peptide_analysis_testing_simple <gro-file> <num-pep> <num-ins> <base>"<<endl;
		return 0;
	}
	
	ifstream gro(argv[1]);
	int numPeptides = atoi(argv[2]);	//number of peptides in the snapshot
	int numIns = atoi(argv[3]);		//number of inositols in the snapshot
	string base(argv[4]);

	vector<Peptide*> peptides;
	vector<Inositol*> inositols;
	vector<Water*> waters;

	int time = 0;
	double boxDims[3];
	
	int total_bound_groups = numPeptides*16;

	//assumes that max number of HB that can be made by an inositol to be 6
	vector<int> distr_inos_numHB_tot(7, 0); //distribution of number of inositols over #HB
	ofstream contactwat((base + "_water_contact").c_str());
	//ofstream nonpolar((base + "_np_contact").c_str());
	ofstream cmap((base + "_hb_contact_map").c_str());
	ofstream inos((base + "_inos_contact").c_str());
	//read each snapshot of the partial gro file
	//build vector of peptides, inositols, detect hydrogen bonds,collect statistics
	//delete memory allocated for the snapshots
	while(!readGroFile(gro, peptides, inositols, waters, numPeptides, numIns, boxDims)){
#ifdef DEBUG
		cout<<"################frame # "<<totalNumSnap<<" #####################"<<endl;
#endif
		int* water_bound_res = new int[total_bound_groups];
		//initialize all to 0
		for(int i=0; i<total_bound_groups; i++){
			water_bound_res[i]=0;
		}
		//calculate water contacts
		waterContacts(peptides, inositols, waters, water_bound_res, boxDims);
		//output to file
		for(int i=0; i < total_bound_groups; i++){
			contactwat<<water_bound_res[i]<<" ";
		}
		contactwat<<endl;
		//de-allocate memory
		delete [] water_bound_res;

		Peptide* pep = peptides.at(0);
		// Contact maps calculations
		for(int n=0; n < numRes; n++){
			double NH_pep[3], H_pep[3], O_pep[3], C_pep[3];
			pep->getNH(n, NH_pep);
			pep->getHofNH(n, H_pep);
			pep->getO(n, O_pep);
			pep->getC(n, C_pep);
			for(int m=n+1; m < numRes; m++){
				if(pep->isHbondedCO(m, NH_pep, H_pep, boxDims)){
					cmap << n << " " << m <<endl;
				}
				if(pep->isHbondedNH(m, O_pep, C_pep, boxDims)){
					cmap << m << " "<< n <<endl;
				}
			}
		}
		
		//inositol peptide contact calculations
		//we only have 1 peptide in the system
		for(int nres = 0; nres < numRes; nres++) {
			PepGroup* bgroupNH = new PepGroup;
			PepGroup* bgroupCO = new PepGroup;
			for(int nins = 0; nins < numIns; nins++) {
				Inositol* aInos = inositols.at(nins);
				for(int noh = 0; noh < 6; noh++) {
					double inosO[3], inosH[3];
					aInos->getOCoords(noh,inosO);
					aInos->getHCoords(noh,inosH);
					if (pep->isHbondedNH(nres, inosO, inosH, boxDims)) {
						//cinos<<nres<<" has "<<nins*6+noh<<" NH bound"<<endl;
						bgroupNH->addPepGroup(nins,nins*6+noh,"OH");
					}
			
					if (pep->isHbondedCO(nres, inosO, inosH, boxDims)) {
						//cinos<<nres<<" has "<<nins*6+noh<<" CO bound"<<endl;
						bgroupCO->addPepGroup(nins,nins*6+noh,"OH");
					}
				}
			}
			pep->setBoundGroup(2*nres, bgroupNH);
			pep->setBoundGroup(2*nres+1, bgroupCO);
			//delete bgroupNH;
			//delete bgroupCO;
		}
	
		//inosput the data computed above
		for(int nbb = 0; nbb < 16; nbb++) {
			PepGroup* bgroup=pep->getBoundGroup(nbb);
			int numBoundGroups = bgroup->numGroups();
			if(numBoundGroups){
				for(int nbgroup=0; nbgroup<numBoundGroups; nbgroup++){
					inos<<bgroup->getResId(nbgroup);
					if(nbgroup<numBoundGroups-1){
						inos<<" ";
					}
				}
			}else{
				inos<<"-";
			}
			if(nbb%2 == 0 && nbb < 16){
				inos<<";";
			}
			if(nbb%2 && nbb < 15){
				inos<<"|";
			}
	
				//cinos<<nbb<<" has " << bgroup->numGroups()<< " bound groups"<<endl;
	
		}
		inos<<endl;
	
		time++;
		delete_vectors(peptides, inositols, waters);
	}

	contactwat.close();
	cmap.close();
	//nonpolar.close();
	inos.close();
	return 0;
}
Esempio n. 15
0
/**************************************************************************
Returns the global edit distance between two peptides.
Gap cost = 1.
d(I,L)=0
d(K,Q)=0
d(F/M*)=0
d(N,D)=0.5;
d(X,X)=0
d(X,Y)=1
***************************************************************************/
float Peptide::peptide_edit_distance(Config *config, Peptide& other_pep) const
{
	int i;
	vector<int> other_aa = other_pep.get_amino_acids();
	const int *pep1 = &amino_acids[0];
	const int pep_len1 = amino_acids.size();
	const int *pep2 = &other_aa[0];
	const int pep_len2 = other_aa.size(); 
	const int max_width = 5;
	const int ox_met_aa = config->get_aa_from_label(string("M+16"));

	float row1[max_width*2+1], row2[max_width*2+1];
	float *old_row, *new_row;

	if (abs(pep_len1-pep_len2)>=max_width)
		return pep_len1;

	// check that no little switch of two aa can make them the same
	if (pep_len1 == pep_len2)
	{
		int i;
		int err_pos=-1;
		int errs=0;
		for (i=0; i<pep_len1; i++)
		{
			if ( (pep1[i] != pep2[i]) &&
			    ! (((pep1[i]==Ile ||pep1[i]==Leu) && (pep2[i]==Ile || pep2[i]==Leu)) ||
		          ((pep1[i]==Gln || pep1[i]==Lys) && (pep2[i]==Gln || pep2[i]==Lys)) ) )
			{
				err_pos=i;
				errs++;
			}
			if (errs>2)
				break;
		}

		if (errs == 0)
			return 0;

		if (errs == 2)
		{
			if ( (pep1[err_pos] == pep2[err_pos-1]) && (pep1[err_pos-1] == pep2[err_pos]))
				return 1;
		}
	}

	
	old_row=row1;
	new_row=row2;
	for (i=0; i<max_width; i++)
	{
		old_row[i]=9999;
		old_row[i+max_width]=i;
		new_row[i]=9999;
		new_row[i+max_width]=9999;
	}
	old_row[2*max_width]=9999;
	new_row[2*max_width]=9999;

	int start_new_row = max_width-1;
	for (i=1; i<=pep_len1; i++)
	{	
		int j;

		if (start_new_row>0)
		{
			new_row[start_new_row]=i;
			new_row[--start_new_row]=9999;
		}
		else
			new_row[0] = 9999;
	
		for (j=1; j<= 2*max_width; j++)
		{
			int p2_pos = i + j - max_width;
			if (p2_pos<1)
				continue;
			if (p2_pos>pep_len2)
				break;

			float v1,v2,v3,dxy=0;
			int p1=pep1[i-1],p2=pep2[p2_pos-1];

			if (p1 != p2)
			{
				dxy=1;
				if (  ((p1==Ile || p1==Leu) && (p2==Ile || p2==Leu)) ||
					  ((p1==Gln || p1==Lys) && (p2==Gln || p2==Lys)) )
				{
					dxy=0;
				}
				else if ( ((p1==Asn && p2==Asp) || (p1==Asp && p2==Asn)) ||
						  ((pep1[i]==Gln && p2==Glu) || (p1==Glu && p2==Gln))  ||
					  ((pep1[i]==Lys && p2==Glu) || (p1==Glu && p2==Lys))  ||
					  ((pep1[i]==Ile && p2==Asn) || (p1==Asn && p2==Ile))  ||
					  ((pep1[i]==Leu && p2==Asn) || (p1==Asn && p2==Leu))  )
				{
					dxy=0.5;
				}
				else if (ox_met_aa>0 && (( p1 == ox_met_aa && p2 == Phe) ||
										 ( p1 == Phe  && p2 == ox_met_aa)) )
				{
					dxy = 0;
				}
			}

			v1= old_row[j]+dxy;
			v2= new_row[j-1]+1;
			v3= (p2_pos<pep_len2 && j<2*max_width) ? old_row[j+1]+1 : 9999;

			new_row[j]=v1;
			if (new_row[j]>v2)
				new_row[j]=v2;
			if (new_row[j]>v3)
				new_row[j]=v3;
		}

		float *tmp;
		tmp = old_row;
		old_row = new_row;
		new_row = tmp;
	}

	return old_row[pep_len2-pep_len1+max_width];
}