Esempio n. 1
0
// USED FOR PARAMETER TUNING (CC2006b energy model)
// returns the free energy (in kcal/mol) of sequence folded into structure
double free_energy_PK_CC2006b(char *sequence, char *structure)
// sequence: sequence (input parameter)
// structure: secondary structure in dot-bracket format () and [] (input parameter)
{
	// create an array of shorts which denotes the index of the base pairs:
	// the first base in the sequence has index 1
	// pairseq[0] = 0 always;
	// pairseq[1] = i where i is the index of the base paired with base 1, etc;
	// pairseq[j] = 0 if base j is unpaired;

	int size = strlen (structure);
	short pairseq[size+1];
	detect_original_PKed_pairs_many(structure, pairseq);

	ReadInput* R = new ReadInput(size, sequence, pairseq);

//	printf("DEBUG = After readinput\n");

	Stack * s = new Stack(R);
	Bands * B = new Bands(R, s);

	int printTrace = 0;  // by default, don't print energy trace

	if (DEBUG)
	{
	printf("Seq: %s \n", R->CSequence);
	printf("Size: %d \n", R->Size);
    for (int i = 1; i <= R->Size; i++) {
		printf("%d ", R->Sequence[i]);
	}
	printf("\n-------------------------------\n Making the Loop Tree\n");
	}

	Loop * L = new Loop(0, MaxN+1, R, B, s);

	int a, b; //will store the borders of a closed regoin
	for (int i = 1; i <= R->Size; i++){
		if (R->BasePair(i)>= 0){
		  if (s->Add(i, a, b)){
		  //If a closed region is identifed add it to the tree by calling addLoop
		    L->addLoop(a,b);
		  };
		};
	};

	L->countNumberOfChildren();  // set number of children for the top loop

	if (DEBUG)
	{
		L->Print(-1);
		printf("-------------------------------\n");
	}

	if (DEBUG2)
	{
		for (int i = 1; i <= R->Size; i++){
			if (R->BasePair(i)>= 0){
				s->printPrevStack(i);
			}
		}
		printf("\n");

		if (L != NULL && L->RightChild != NULL)
			printf("L->NumberOfUnpairedInPseudo = %d\n", L->RightChild->NumberOfUnpairedInPseudo);
	}

	short* secstructure = new short[R->Size+1];
	char *csequence = new char[R->Size+2];
        csequence[R->Size+1] = '\0';
	for (int i = 0; i < R->Size+1; i++) {
		secstructure[i] = (short)(R->Sequence[i]);
		if (secstructure[i] == -1) secstructure[i] = 0;
	  	csequence[i] = R->CSequence[i];
		if (DEBUG)
		     	printf("%d %c %d \n", i, csequence[i], secstructure[i]);
	}

//	PlotRna(prefix, &sequence[1], &secstructure[1], outPSFile, L->Energy());

//	printf("Energy Model The total free energy is %f cal/mol\n", L->Energy());

	float totalEnergy = 0;

	if (DEBUG)
	{
	cout << setw(15) << left << "Energy Model" << setw(25) << left << "Free Energy (kcal/mol)" << setw(40) << left << "Free Energy without Dangling (kcal/mol)" << endl;
	printf("--------------------------------------------------------------\n");
	}

	// energy returned in kcal

	totalEnergy = -L->Energy(CC2006b)/1000;
	float totalEnergyDang = -L->EnergyDangling()/1000;

	if (DEBUG)
		cout << setw(15) << left << "Cao&Chen(b)" << setw(25) << left << totalEnergy + totalEnergyDang << setw(40) << left << totalEnergy << endl;

//	L->printEnergyTrace();

//	if (no_pk_dangling_ends == 0)
		return totalEnergy + totalEnergyDang;
//	else
//		return totalEnergy;
}
Esempio n. 2
0
// USED FOR PARAMETER TUNING (DP energy model)
double get_feature_counts_quadratic_PK_DP (char *sequence, char *structure, double **quadratic_matrix, double *counter, double &free_value)
// sequence: sequence (input parameter)
// structure: secondary structure in dot-bracket format () and [] (input parameter)
// counter: array where counter[i] is the number of times the i-th feature occurs (output parameter)
// quadratic_matrix: TODO
// free_value: TODO
// Note: The counter, free_value, and quadratic_matrix are automatically reset before passed onto other functions
{
	// create an array of shorts which denotes the index of the base pairs:
	// the first base in the sequence has index 1
	// pairseq[0] = 0 always;
	// pairseq[1] = i where i is the index of the base paired with base 1, etc;
	// pairseq[j] = 0 if base j is unpaired;

	int size = strlen (structure);
	short pairseq[size+1];
	detect_original_PKed_pairs_many(structure, pairseq);

	// Hosna -- CAN DELETE THIS NEXT COMMENTED OUT PART - it was for my debugging
	//  It shows an example of how pairseq should look like
/*
	short* pairseq = new short[size+1];
	pairseq[0] = 0;

	pairseq[1] = 6;
	pairseq[5] = 10;
	pairseq[6] = 1;
	pairseq[10] = 5;
	pairseq[2] = 0;
	pairseq[3] = 0;
	pairseq[4] = 0;
	pairseq[7] = 0;
	pairseq[8] = 0;
	pairseq[9] = 0;
*/


	ReadInput* R = new ReadInput(size, sequence, pairseq);

//	printf("DEBUG = After readinput\n");

	Stack * s = new Stack(R);
	Bands * B = new Bands(R, s);

	int printTrace = 0;  // by default, don't print energy trace

	if (DEBUG)
	{
	printf("Seq: %s \n", R->CSequence);
	printf("Size: %d \n", R->Size);
    for (int i = 1; i <= R->Size; i++) {
		printf("%d ", R->Sequence[i]);
	}
	printf("\n-------------------------------\n Making the Loop Tree\n");
	}

	Loop * L = new Loop(0, MaxN+1, R, B, s);

	int a, b; //will store the borders of a closed regoin
	for (int i = 1; i <= R->Size; i++){
		if (R->BasePair(i)>= 0){
		  if (s->Add(i, a, b)){
		  //If a closed region is identifed add it to the tree by calling addLoop
		    L->addLoop(a,b);
		  };
		};
	};

	L->countNumberOfChildren();  // set number of children for the top loop

	if (DEBUG)
	{
	L->Print(-1);
	printf("-------------------------------\n");
	}

	if (DEBUG2)
	{
		for (int i = 1; i <= R->Size; i++){
			if (R->BasePair(i)>= 0){
				s->printPrevStack(i);
			}
		}
		printf("\n");

		if (L != NULL && L->RightChild != NULL)
			printf("L->NumberOfUnpairedInPseudo = %d\n", L->RightChild->NumberOfUnpairedInPseudo);
	}

	short* secstructure = new short[R->Size+1];
	char *csequence = new char[R->Size+2];
        csequence[R->Size+1] = '\0';
	for (int i = 0; i < R->Size+1; i++) {
		secstructure[i] = (short)(R->Sequence[i]);
		if (secstructure[i] == -1) secstructure[i] = 0;
	  	csequence[i] = R->CSequence[i];
		if (DEBUG)
		     	printf("%d %c %d \n", i, csequence[i], secstructure[i]);
	}

//	PlotRna(prefix, &sequence[1], &secstructure[1], outPSFile, L->Energy());

//	printf("Energy Model The total free energy is %f cal/mol\n", L->Energy());

	float totalEnergy = 0;

	if (DEBUG)
	{
		cout << setw(15) << left << "Energy Model" << setw(25) << left << "Free Energy (kcal/mol)" << setw(40) << left << "Free Energy without Dangling (kcal/mol)" << endl;
		printf("--------------------------------------------------------------\n");
	}

	// energy returned in kcal
	
	// PARAMETER TUNING
	// clear the counter, quadratic_matrix, and free_value

	int num_params = get_num_params_PK_DP();
        free_value = 0;
	for (int i=0; i < num_params; i++)
	{
		counter[i] = 0;
		for (int j=i; j < num_params; j++)
			quadratic_matrix[i][j] = 0;
	}

	int reset_c = 0;
	int ignore_dangles = no_pk_dangling_ends;
	int ignore_AU = 0;  // 0 = do include AU penalties

	totalEnergy = -L->Energy(DP, quadratic_matrix, counter, free_value, reset_c, ignore_dangles);
	float totalEnergyDang = -L->EnergyDangling(DP, quadratic_matrix,counter,free_value,reset_c,ignore_dangles,ignore_AU);

	// CHECK VALUES OF COUNTER, ETC
	//int num_params = get_num_params_PK_DP();
	//int num_params_pkfree = get_num_params();
	//printf("Free Value: %f\n", free_value);
	//printf("Counter Values:\n");
	//for (int i = num_params_pkfree; i < num_params; i++)
	//	printf("c[%d]=%f  ", i, counter[i]);

	//printf("Some P_matrix Values:\n");
	//for (int i = 0; i < num_params/10; i++)
	//	printf("P[%d][%d]=%f  ", num_params_pkfree + structure_type_index_PK("stp")-1, i, quadratic_matrix[i][num_params_pkfree + structure_type_index_PK("stp")-1]);


	if (DEBUG)
	{
	cout << setw(15) << left << "Dirks&Pierce" << setw(25) << left << totalEnergy + totalEnergyDang << setw(40) << left << totalEnergy << endl;

	printf("\n");
	printf("PARAMETER TUNING\n");
	printf("Ps Psm Psp Pb Pup Pps a b c stP intP a_p b_p c_p\n");
	cout << g_count_Ps << " " << g_count_Psm << " " << g_count_Psp << " " << g_count_Pb << " " << g_count_Pup << " " <<
			g_count_Pps << " " << g_count_a << " " << g_count_b << " " << g_count_c << " " << g_count_stP << " " <<
			g_count_intP << " " << g_count_a_p << " " << g_count_b_p << " " << g_count_c_p << endl;
	}

	// return the free energy:
	//    - deltaG = x' P x + c' x + f
	//        - where x is the vector of parameters
	//        - P is a symmetric matrix of the coefficients for each quadratic term
	//        - c is a vector of counts for each linear term
	//        - c' means c transposed
	//        - f is a constant

	return totalEnergy + totalEnergyDang;

	//int num_params = get_num_params_PK_DP();
	//double * energy_temp1 = new double[num_params];
	//double * energy_temp2 = new double[num_params];
	//double energy = free_value;

	//for (int i = 0; i < num_params; i++)
	//{
	//	energy_temp1[i] = counter[i] * params_all[i];
	//	for (int j = 0; j < num_params; j++)
	//	{
	//		if (i <= j)
	//			energy_temp2[i] += quadratic_matrix[i][j] * params_all[j];
	//		else  // since only the upper triangle of the matrix is filled out
	//			energy_temp2[i] += quadratic_matrix[j][i] * params_all[j];
	//	}
	//}
	//for (int i = 0; i < num_params; i++)
	//{
	//	energy += params_all[i]*energy_temp2[i] + energy_temp1[i];
	//}

	//return energy;
}