Esempio n. 1
0
bool SeqPath::check_if_correct(const string& str, const Config *config) const
{
	const vector<mass_t>& aa2mass = config->get_aa2mass();
	const char *path_str = seq_str.c_str();
	const char *corr_str = str.c_str();

	int len_path_str = strlen(path_str);
	int len_corr_str = strlen(corr_str);

	if (len_path_str>len_corr_str)
		return false;

	
	int i;
	for (i=0; i<=len_corr_str-len_path_str; i++)
	{
		int j;
		bool correct_seq = true;
		for (j=0; j<len_path_str; j++)
			if (! (path_str[j] == corr_str[i+j] ||
				  (path_str[j] == 'I' && corr_str[i+j]== 'L') ||
				  (path_str[j] == 'L' && corr_str[i+j]== 'I') ||
				  (path_str[j] == 'Q' && corr_str[i+j]== 'K') ||
				  (path_str[j] == 'K' && corr_str[i+j]== 'Q') ) )
			{
				correct_seq = false;
				break;
			}



		if (correct_seq)
		{
			// check prefix mass
			Peptide pep;
			pep.parseFromString(config,corr_str);
			const vector<int>& aas= pep.get_amino_acids();
			mass_t mass=0;
			int j;

			if (n_term_mass == 0 && i==0)
				return true;

			for (j=0; j<aas.size(); j++)
			{
				mass+=aa2mass[aas[j]];
				if (fabs(mass-this->n_term_mass)<6)
					return true;

				if (mass>n_term_mass)
					break;
			}
		}
	}

	
	
	return false;
}
Esempio n. 2
0
int SeqPath::get_num_correct_aas(const Peptide& pep, const Config *config) const
{
	const vector<mass_t>& aa2mass = config->get_aa2mass();
	const vector<int>& pep_aas = pep.get_amino_acids();

	int num_correct=0;
	int i;

	vector<mass_t> pep_masses;
	vector<int> path_aas;
	
	get_amino_acids(path_aas);

	pep_masses.resize(pep_aas.size(),0);
	for (i=1; i<pep_aas.size(); i++)
		pep_masses[i]=pep_masses[i-1]+aa2mass[pep_aas[i-1]];

	mass_t path_mass = n_term_mass;
	for (i=0; i<path_aas.size(); i++)
	{
		const int path_aa = path_aas[i];
		int j;
		for (j=0; j<pep_aas.size(); j++)
		{
			const int pep_aa = pep_aas[j];
			if (fabs(pep_masses[j]-path_mass)<1.0 && pep_aas[j] == path_aas[i])
			{
				num_correct++;
				break;
			}
		}

		path_mass += aa2mass[path_aas[i]];
	}

	return num_correct;
}
Esempio n. 3
0
int Peptide::calc_number_of_correct_aas(Config *config,const Peptide& other) const
{
	const vector<int>& other_amino_acids = other.get_amino_acids();
	const int num_aas = amino_acids.size();
	const int num_other_aas = other_amino_acids.size();

	vector<mass_t> this_breakages,other_breakages;

	calc_expected_breakage_masses(config,this_breakages);
	other.calc_expected_breakage_masses(config,other_breakages);

	int this_idx=0;
	int other_idx=0;
	int num_correct_aas=0;

	while (this_idx<num_aas && other_idx<num_other_aas)
	{
		if (fabs(this_breakages[this_idx]-other_breakages[other_idx])<1.0)
		{
			if (amino_acids[this_idx]==other_amino_acids[other_idx])
				num_correct_aas++;

			this_idx++;
			other_idx++;
		}
		else
		{
			if (this_breakages[this_idx]<other_breakages[other_idx])
			{
				this_idx++;
			}
			else
				other_idx++;
		}
	}
	return num_correct_aas;
}
Esempio n. 4
0
void AdvancedScoreModel::score_peptide_node_combos(PrmGraph *prm, const Peptide& peptide ) const
{
	const vector<int>& org_aas		= config.get_org_aa();
	const vector<mass_t>& aa2mass	= config.get_aa2mass();
	const vector<MultiEdge>& multi_edges = prm->get_multi_edges();
	const int num_nodes		   = prm->get_num_nodes();
	const vector<int>& pep_aas = peptide.get_amino_acids();
	const int num_pep_aas = pep_aas.size();

	mass_t p_mass=0;
	int aa_idx=0;

	int i;
	for (i=0; i<num_nodes; i++)
	{
		Node& node = prm->get_non_const_node(i);
		const RegionalScoreModel& score_model = 
			regional_breakage_score_models[prm->get_charge()][prm->get_size_idx()][node.breakage.region_idx];

		int in_edge_idx=NEG_INF,  in_edge_variant=NEG_INF;
		int out_edge_idx=NEG_INF, out_edge_variant=NEG_INF;

	//	cout << "N: " << node.mass << endl;
		while (aa_idx<pep_aas.size() && fabs(p_mass-node.mass)>0.1)
		{
			p_mass += aa2mass[pep_aas[aa_idx]];
			aa_idx++;
	//		cout << aa_idx << "\t" << p_mass << endl;
		}
		
		if (aa_idx == pep_aas.size() && i != num_nodes-1)
		{
			int j;
			for (j=0; j<num_nodes; j++)
				cout << j << "\t" << prm->get_node(j).mass << endl;

			cout << endl << "PEP:" << endl;
			vector<mass_t> exp_masses;
			peptide.calc_expected_breakage_masses((Config *)&config,exp_masses);
			for (j=0; j<exp_masses.size(); j++)
				cout << j << "\t" << exp_masses[j] << endl;

			cout << "Error: mismatch between nodes and peptide!" << endl;
			exit(1);
		}

		if (node.in_edge_idxs.size()>0)
		{
			int j;
			for (j=0; j<node.in_edge_idxs.size(); j++)
			{
				const int edge_idx = node.in_edge_idxs[j];
				const MultiEdge& in_edge = multi_edges[edge_idx];
				const int num_aa = in_edge.num_aa;

				if (num_aa>aa_idx)
					continue;

				const int var_idx = in_edge.get_variant_idx(num_aa,&pep_aas[aa_idx-num_aa]);
				if (var_idx<0)
					continue;

				in_edge_idx = edge_idx;
				in_edge_variant = var_idx;
				break;
			}
		}

		if (node.out_edge_idxs.size()>0)
		{
			int j;
			for (j=0; j<node.out_edge_idxs.size(); j++)
			{
				const int edge_idx = node.out_edge_idxs[j];
				const MultiEdge& out_edge = multi_edges[edge_idx];
				const int num_aa = out_edge.num_aa;

				if (num_aa + aa_idx >num_pep_aas)
					continue;

				const int var_idx = out_edge.get_variant_idx(num_aa,&pep_aas[aa_idx]);
				if (var_idx<0)
					continue;

				out_edge_idx = edge_idx;
				out_edge_variant = var_idx;
				break;
			}
		}


		BreakageInfo info;
		prm->fill_breakage_info(this,&info,i,in_edge_idx,in_edge_variant,out_edge_idx,out_edge_variant);
	
		node.score_combos.clear();

	//	cout << in_edge_idx << " " << in_edge_variant << " " << out_edge_idx << " " << out_edge_variant << "\t";

		info.score = score_model.score_a_single_breakage_combo(prm, node, &node.breakage, info);
	
		node.score_combos[ScoreComboLoc(info)]=info.score;
		node.score = info.score; 
		node.breakage.score = node.score;

	//	cout << node.score << endl;
	}
	prm->set_has_node_combo_scores(true);
}
Esempio n. 5
0
/**************************************************************************
Returns the global edit distance between two peptides.
Gap cost = 1.
d(I,L)=0
d(K,Q)=0
d(F/M*)=0
d(N,D)=0.5;
d(X,X)=0
d(X,Y)=1
***************************************************************************/
float Peptide::peptide_edit_distance(Config *config, Peptide& other_pep) const
{
	int i;
	vector<int> other_aa = other_pep.get_amino_acids();
	const int *pep1 = &amino_acids[0];
	const int pep_len1 = amino_acids.size();
	const int *pep2 = &other_aa[0];
	const int pep_len2 = other_aa.size(); 
	const int max_width = 5;
	const int ox_met_aa = config->get_aa_from_label(string("M+16"));

	float row1[max_width*2+1], row2[max_width*2+1];
	float *old_row, *new_row;

	if (abs(pep_len1-pep_len2)>=max_width)
		return pep_len1;

	// check that no little switch of two aa can make them the same
	if (pep_len1 == pep_len2)
	{
		int i;
		int err_pos=-1;
		int errs=0;
		for (i=0; i<pep_len1; i++)
		{
			if ( (pep1[i] != pep2[i]) &&
			    ! (((pep1[i]==Ile ||pep1[i]==Leu) && (pep2[i]==Ile || pep2[i]==Leu)) ||
		          ((pep1[i]==Gln || pep1[i]==Lys) && (pep2[i]==Gln || pep2[i]==Lys)) ) )
			{
				err_pos=i;
				errs++;
			}
			if (errs>2)
				break;
		}

		if (errs == 0)
			return 0;

		if (errs == 2)
		{
			if ( (pep1[err_pos] == pep2[err_pos-1]) && (pep1[err_pos-1] == pep2[err_pos]))
				return 1;
		}
	}

	
	old_row=row1;
	new_row=row2;
	for (i=0; i<max_width; i++)
	{
		old_row[i]=9999;
		old_row[i+max_width]=i;
		new_row[i]=9999;
		new_row[i+max_width]=9999;
	}
	old_row[2*max_width]=9999;
	new_row[2*max_width]=9999;

	int start_new_row = max_width-1;
	for (i=1; i<=pep_len1; i++)
	{	
		int j;

		if (start_new_row>0)
		{
			new_row[start_new_row]=i;
			new_row[--start_new_row]=9999;
		}
		else
			new_row[0] = 9999;
	
		for (j=1; j<= 2*max_width; j++)
		{
			int p2_pos = i + j - max_width;
			if (p2_pos<1)
				continue;
			if (p2_pos>pep_len2)
				break;

			float v1,v2,v3,dxy=0;
			int p1=pep1[i-1],p2=pep2[p2_pos-1];

			if (p1 != p2)
			{
				dxy=1;
				if (  ((p1==Ile || p1==Leu) && (p2==Ile || p2==Leu)) ||
					  ((p1==Gln || p1==Lys) && (p2==Gln || p2==Lys)) )
				{
					dxy=0;
				}
				else if ( ((p1==Asn && p2==Asp) || (p1==Asp && p2==Asn)) ||
						  ((pep1[i]==Gln && p2==Glu) || (p1==Glu && p2==Gln))  ||
					  ((pep1[i]==Lys && p2==Glu) || (p1==Glu && p2==Lys))  ||
					  ((pep1[i]==Ile && p2==Asn) || (p1==Asn && p2==Ile))  ||
					  ((pep1[i]==Leu && p2==Asn) || (p1==Asn && p2==Leu))  )
				{
					dxy=0.5;
				}
				else if (ox_met_aa>0 && (( p1 == ox_met_aa && p2 == Phe) ||
										 ( p1 == Phe  && p2 == ox_met_aa)) )
				{
					dxy = 0;
				}
			}

			v1= old_row[j]+dxy;
			v2= new_row[j-1]+1;
			v3= (p2_pos<pep_len2 && j<2*max_width) ? old_row[j+1]+1 : 9999;

			new_row[j]=v1;
			if (new_row[j]>v2)
				new_row[j]=v2;
			if (new_row[j]>v3)
				new_row[j]=v3;
		}

		float *tmp;
		tmp = old_row;
		old_row = new_row;
		new_row = tmp;
	}

	return old_row[pep_len2-pep_len1+max_width];
}