Esempio n. 1
0
// USED FOR TESTING
void get_feature_counts (char *sequence, char *structure, double *counter)
// sequence: sequence (input parameter)
// structure: secondary structure in dot-bracket format () and [] (input parameter)
// counter: array where counter[i] is the number of times the i-th feature occurs (output parameter)
{
	// create an array of shorts which denotes the index of the base pairs:
	// the first base in the sequence has index 1
	// pairseq[0] = 0 always;
	// pairseq[1] = i where i is the index of the base paired with base 1, etc;
	// pairseq[j] = 0 if base j is unpaired;

	int size = strlen (structure);
	short pairseq[size+1];
	detect_original_PKed_pairs_many(structure, pairseq);


	// Hosna -- CAN DELETE THIS NEXT COMMENTED OUT PART - it was for my debugging
	//  It shows an example of how pairseq should look like
/*
	short* pairseq = new short[size+1];
	pairseq[0] = 0;

	pairseq[1] = 6;
	pairseq[5] = 10;
	pairseq[6] = 1;
	pairseq[10] = 5;
	pairseq[2] = 0;
	pairseq[3] = 0;
	pairseq[4] = 0;
	pairseq[7] = 0;
	pairseq[8] = 0;
	pairseq[9] = 0;
*/


	ReadInput* R = new ReadInput(size, sequence, pairseq);

	printf("DEBUG = After readinput\n");

	Stack * s = new Stack(R);
	Bands * B = new Bands(R, s);

	int printTrace = 0;  // by default, don't print energy trace

	printf("Seq: %s \n", R->CSequence);
	printf("Size: %d \n", R->Size);
	for (int i = 1; i <= R->Size; i++) {
		printf("%d ", R->Sequence[i]);
	}
	printf("\n-------------------------------\n Making the Loop Tree\n");
	Loop * L = new Loop(0, MaxN+1, R, B, s);

	int a, b; //will store the borders of a closed regoin
	for (int i = 1; i <= R->Size; i++){
		if (R->BasePair(i)>= 0){
		  if (s->Add(i, a, b)){
		  //If a closed region is identifed add it to the tree by calling addLoop
		    L->addLoop(a,b);
		  };
		};
	};

	L->Print(-1);
	printf("-------------------------------\n");

	if (DEBUG2)
	{
		for (int i = 1; i <= R->Size; i++){
			if (R->BasePair(i)>= 0){
				s->printPrevStack(i);
			}
		}
		printf("\n");

		if (L != NULL && L->RightChild != NULL)
		printf("L->NumberOfUnpairedInPseudo = %d\n", L->RightChild->NumberOfUnpairedInPseudo);
	}

	short* secstructure = new short[R->Size+1];
	char *csequence = new char[R->Size+2];
        csequence[R->Size+1] = '\0';
	for (int i = 0; i < R->Size+1; i++) {
		secstructure[i] = (short)(R->Sequence[i]);
		if (secstructure[i] == -1) secstructure[i] = 0;
	  	csequence[i] = R->CSequence[i];
     	printf("%d %c %d \n", i, csequence[i], secstructure[i]);
	}

//	PlotRna(prefix, &sequence[1], &secstructure[1], outPSFile, L->Energy());

//	printf("Energy Model The total free energy is %f cal/mol\n", L->Energy());

	float totalEnergy = 0;

	cout << setw(15) << left << "Energy Model" << setw(25) << left << "Free Energy (kcal/mol)" << setw(40) << left << "Free Energy without Dangling (kcal/mol)" << endl;
	printf("--------------------------------------------------------------\n");
	if (RE_FLAG)
	{
		totalEnergy = -L->Energy(RE);
		if (no_pk_dangling_ends == 0)
			cout << setw(15) << left << "Rivas&Eddy" << setw(25) << left << (totalEnergy - L->EnergyDangling())/1000 << setw(40) << left << totalEnergy/1000 << endl;
		else
			cout << setw(15) << left << "Rivas&Eddy" << setw(25) << left << totalEnergy/1000 << setw(40) << left << totalEnergy/1000 << endl;

		if (printTrace)
			L->printEnergyTrace();
		cout << endl;
	}
	if (DP_FLAG)  // energy returned in kcal
	{
//		double *c = new double[R->Size];
		double ** quadratic_matrix;
		double f = 0;
		int reset_c = 0;
		int ignore_dangles = no_pk_dangling_ends;
		int ignore_AU = 0;  // 0 = do include AU penalties

		if (DEBUG)
			printf("Before call to Energy for DP model\n");

		totalEnergy = -L->Energy(DP, quadratic_matrix, counter, f, reset_c, ignore_dangles);

		if (DEBUG)		
			printf("After call to Energy for DP model\n");

		cout << setw(15) << left << "Dirks&Pierce" << setw(25) << left << totalEnergy - L->EnergyDangling(DP, quadratic_matrix,counter,f,reset_c,ignore_dangles,ignore_AU) << setw(40) << left << totalEnergy << endl;

		if (printTrace)
			L->printEnergyTrace();
	}

	printf("\n");
	printf("PARAMETER TUNING\n");
	printf("Ps Psm Psp Pb Pup Pps a b c stP intP a_p b_p c_p\n");
	cout << g_count_Ps << " " << g_count_Psm << " " << g_count_Psp << " " << g_count_Pb << " " << g_count_Pup << " " <<
			g_count_Pps << " " << g_count_a << " " << g_count_b << " " << g_count_c << " " << g_count_stP << " " <<
			g_count_intP << " " << g_count_a_p << " " << g_count_b_p << " " << g_count_c_p << endl;


// 	counter[0] = g_count_Ps;
// 	counter[1] = g_count_Psm;
// 	counter[2] = g_count_Psp;
// 	counter[3] = g_count_Pb;
// 	counter[4] = g_count_Pup;
// 	counter[5] = g_count_Pps;
// 	counter[6] = g_count_a;
// 	counter[7] = g_count_b;
// 	counter[8] = g_count_c;
// 	counter[9] = g_count_stP;
// 	counter[10] = g_count_intP;
// 	counter[11] = g_count_a_p;
// 	counter[12] = g_count_b_p;
// 	counter[13] = g_count_c_p;

}
Esempio n. 2
0
// USED FOR PARAMETER TUNING (CC2006b energy model)
// returns the free energy (in kcal/mol) of sequence folded into structure
double free_energy_PK_CC2006b(char *sequence, char *structure)
// sequence: sequence (input parameter)
// structure: secondary structure in dot-bracket format () and [] (input parameter)
{
	// create an array of shorts which denotes the index of the base pairs:
	// the first base in the sequence has index 1
	// pairseq[0] = 0 always;
	// pairseq[1] = i where i is the index of the base paired with base 1, etc;
	// pairseq[j] = 0 if base j is unpaired;

	int size = strlen (structure);
	short pairseq[size+1];
	detect_original_PKed_pairs_many(structure, pairseq);

	ReadInput* R = new ReadInput(size, sequence, pairseq);

//	printf("DEBUG = After readinput\n");

	Stack * s = new Stack(R);
	Bands * B = new Bands(R, s);

	int printTrace = 0;  // by default, don't print energy trace

	if (DEBUG)
	{
	printf("Seq: %s \n", R->CSequence);
	printf("Size: %d \n", R->Size);
    for (int i = 1; i <= R->Size; i++) {
		printf("%d ", R->Sequence[i]);
	}
	printf("\n-------------------------------\n Making the Loop Tree\n");
	}

	Loop * L = new Loop(0, MaxN+1, R, B, s);

	int a, b; //will store the borders of a closed regoin
	for (int i = 1; i <= R->Size; i++){
		if (R->BasePair(i)>= 0){
		  if (s->Add(i, a, b)){
		  //If a closed region is identifed add it to the tree by calling addLoop
		    L->addLoop(a,b);
		  };
		};
	};

	L->countNumberOfChildren();  // set number of children for the top loop

	if (DEBUG)
	{
		L->Print(-1);
		printf("-------------------------------\n");
	}

	if (DEBUG2)
	{
		for (int i = 1; i <= R->Size; i++){
			if (R->BasePair(i)>= 0){
				s->printPrevStack(i);
			}
		}
		printf("\n");

		if (L != NULL && L->RightChild != NULL)
			printf("L->NumberOfUnpairedInPseudo = %d\n", L->RightChild->NumberOfUnpairedInPseudo);
	}

	short* secstructure = new short[R->Size+1];
	char *csequence = new char[R->Size+2];
        csequence[R->Size+1] = '\0';
	for (int i = 0; i < R->Size+1; i++) {
		secstructure[i] = (short)(R->Sequence[i]);
		if (secstructure[i] == -1) secstructure[i] = 0;
	  	csequence[i] = R->CSequence[i];
		if (DEBUG)
		     	printf("%d %c %d \n", i, csequence[i], secstructure[i]);
	}

//	PlotRna(prefix, &sequence[1], &secstructure[1], outPSFile, L->Energy());

//	printf("Energy Model The total free energy is %f cal/mol\n", L->Energy());

	float totalEnergy = 0;

	if (DEBUG)
	{
	cout << setw(15) << left << "Energy Model" << setw(25) << left << "Free Energy (kcal/mol)" << setw(40) << left << "Free Energy without Dangling (kcal/mol)" << endl;
	printf("--------------------------------------------------------------\n");
	}

	// energy returned in kcal

	totalEnergy = -L->Energy(CC2006b)/1000;
	float totalEnergyDang = -L->EnergyDangling()/1000;

	if (DEBUG)
		cout << setw(15) << left << "Cao&Chen(b)" << setw(25) << left << totalEnergy + totalEnergyDang << setw(40) << left << totalEnergy << endl;

//	L->printEnergyTrace();

//	if (no_pk_dangling_ends == 0)
		return totalEnergy + totalEnergyDang;
//	else
//		return totalEnergy;
}
Esempio n. 3
0
// USED FOR PARAMETER TUNING (DP energy model)
double get_feature_counts_quadratic_PK_DP (char *sequence, char *structure, double **quadratic_matrix, double *counter, double &free_value)
// sequence: sequence (input parameter)
// structure: secondary structure in dot-bracket format () and [] (input parameter)
// counter: array where counter[i] is the number of times the i-th feature occurs (output parameter)
// quadratic_matrix: TODO
// free_value: TODO
// Note: The counter, free_value, and quadratic_matrix are automatically reset before passed onto other functions
{
	// create an array of shorts which denotes the index of the base pairs:
	// the first base in the sequence has index 1
	// pairseq[0] = 0 always;
	// pairseq[1] = i where i is the index of the base paired with base 1, etc;
	// pairseq[j] = 0 if base j is unpaired;

	int size = strlen (structure);
	short pairseq[size+1];
	detect_original_PKed_pairs_many(structure, pairseq);

	// Hosna -- CAN DELETE THIS NEXT COMMENTED OUT PART - it was for my debugging
	//  It shows an example of how pairseq should look like
/*
	short* pairseq = new short[size+1];
	pairseq[0] = 0;

	pairseq[1] = 6;
	pairseq[5] = 10;
	pairseq[6] = 1;
	pairseq[10] = 5;
	pairseq[2] = 0;
	pairseq[3] = 0;
	pairseq[4] = 0;
	pairseq[7] = 0;
	pairseq[8] = 0;
	pairseq[9] = 0;
*/


	ReadInput* R = new ReadInput(size, sequence, pairseq);

//	printf("DEBUG = After readinput\n");

	Stack * s = new Stack(R);
	Bands * B = new Bands(R, s);

	int printTrace = 0;  // by default, don't print energy trace

	if (DEBUG)
	{
	printf("Seq: %s \n", R->CSequence);
	printf("Size: %d \n", R->Size);
    for (int i = 1; i <= R->Size; i++) {
		printf("%d ", R->Sequence[i]);
	}
	printf("\n-------------------------------\n Making the Loop Tree\n");
	}

	Loop * L = new Loop(0, MaxN+1, R, B, s);

	int a, b; //will store the borders of a closed regoin
	for (int i = 1; i <= R->Size; i++){
		if (R->BasePair(i)>= 0){
		  if (s->Add(i, a, b)){
		  //If a closed region is identifed add it to the tree by calling addLoop
		    L->addLoop(a,b);
		  };
		};
	};

	L->countNumberOfChildren();  // set number of children for the top loop

	if (DEBUG)
	{
	L->Print(-1);
	printf("-------------------------------\n");
	}

	if (DEBUG2)
	{
		for (int i = 1; i <= R->Size; i++){
			if (R->BasePair(i)>= 0){
				s->printPrevStack(i);
			}
		}
		printf("\n");

		if (L != NULL && L->RightChild != NULL)
			printf("L->NumberOfUnpairedInPseudo = %d\n", L->RightChild->NumberOfUnpairedInPseudo);
	}

	short* secstructure = new short[R->Size+1];
	char *csequence = new char[R->Size+2];
        csequence[R->Size+1] = '\0';
	for (int i = 0; i < R->Size+1; i++) {
		secstructure[i] = (short)(R->Sequence[i]);
		if (secstructure[i] == -1) secstructure[i] = 0;
	  	csequence[i] = R->CSequence[i];
		if (DEBUG)
		     	printf("%d %c %d \n", i, csequence[i], secstructure[i]);
	}

//	PlotRna(prefix, &sequence[1], &secstructure[1], outPSFile, L->Energy());

//	printf("Energy Model The total free energy is %f cal/mol\n", L->Energy());

	float totalEnergy = 0;

	if (DEBUG)
	{
		cout << setw(15) << left << "Energy Model" << setw(25) << left << "Free Energy (kcal/mol)" << setw(40) << left << "Free Energy without Dangling (kcal/mol)" << endl;
		printf("--------------------------------------------------------------\n");
	}

	// energy returned in kcal
	
	// PARAMETER TUNING
	// clear the counter, quadratic_matrix, and free_value

	int num_params = get_num_params_PK_DP();
        free_value = 0;
	for (int i=0; i < num_params; i++)
	{
		counter[i] = 0;
		for (int j=i; j < num_params; j++)
			quadratic_matrix[i][j] = 0;
	}

	int reset_c = 0;
	int ignore_dangles = no_pk_dangling_ends;
	int ignore_AU = 0;  // 0 = do include AU penalties

	totalEnergy = -L->Energy(DP, quadratic_matrix, counter, free_value, reset_c, ignore_dangles);
	float totalEnergyDang = -L->EnergyDangling(DP, quadratic_matrix,counter,free_value,reset_c,ignore_dangles,ignore_AU);

	// CHECK VALUES OF COUNTER, ETC
	//int num_params = get_num_params_PK_DP();
	//int num_params_pkfree = get_num_params();
	//printf("Free Value: %f\n", free_value);
	//printf("Counter Values:\n");
	//for (int i = num_params_pkfree; i < num_params; i++)
	//	printf("c[%d]=%f  ", i, counter[i]);

	//printf("Some P_matrix Values:\n");
	//for (int i = 0; i < num_params/10; i++)
	//	printf("P[%d][%d]=%f  ", num_params_pkfree + structure_type_index_PK("stp")-1, i, quadratic_matrix[i][num_params_pkfree + structure_type_index_PK("stp")-1]);


	if (DEBUG)
	{
	cout << setw(15) << left << "Dirks&Pierce" << setw(25) << left << totalEnergy + totalEnergyDang << setw(40) << left << totalEnergy << endl;

	printf("\n");
	printf("PARAMETER TUNING\n");
	printf("Ps Psm Psp Pb Pup Pps a b c stP intP a_p b_p c_p\n");
	cout << g_count_Ps << " " << g_count_Psm << " " << g_count_Psp << " " << g_count_Pb << " " << g_count_Pup << " " <<
			g_count_Pps << " " << g_count_a << " " << g_count_b << " " << g_count_c << " " << g_count_stP << " " <<
			g_count_intP << " " << g_count_a_p << " " << g_count_b_p << " " << g_count_c_p << endl;
	}

	// return the free energy:
	//    - deltaG = x' P x + c' x + f
	//        - where x is the vector of parameters
	//        - P is a symmetric matrix of the coefficients for each quadratic term
	//        - c is a vector of counts for each linear term
	//        - c' means c transposed
	//        - f is a constant

	return totalEnergy + totalEnergyDang;

	//int num_params = get_num_params_PK_DP();
	//double * energy_temp1 = new double[num_params];
	//double * energy_temp2 = new double[num_params];
	//double energy = free_value;

	//for (int i = 0; i < num_params; i++)
	//{
	//	energy_temp1[i] = counter[i] * params_all[i];
	//	for (int j = 0; j < num_params; j++)
	//	{
	//		if (i <= j)
	//			energy_temp2[i] += quadratic_matrix[i][j] * params_all[j];
	//		else  // since only the upper triangle of the matrix is filled out
	//			energy_temp2[i] += quadratic_matrix[j][i] * params_all[j];
	//	}
	//}
	//for (int i = 0; i < num_params; i++)
	//{
	//	energy += params_all[i]*energy_temp2[i] + energy_temp1[i];
	//}

	//return energy;
}