Пример #1
0
// USED FOR PARAMETER TUNING (CC2006b energy model)
// returns the free energy (in kcal/mol) of sequence folded into structure
double free_energy_PK_CC2006b(char *sequence, char *structure)
// sequence: sequence (input parameter)
// structure: secondary structure in dot-bracket format () and [] (input parameter)
{
	// create an array of shorts which denotes the index of the base pairs:
	// the first base in the sequence has index 1
	// pairseq[0] = 0 always;
	// pairseq[1] = i where i is the index of the base paired with base 1, etc;
	// pairseq[j] = 0 if base j is unpaired;

	int size = strlen (structure);
	short pairseq[size+1];
	detect_original_PKed_pairs_many(structure, pairseq);

	ReadInput* R = new ReadInput(size, sequence, pairseq);

//	printf("DEBUG = After readinput\n");

	Stack * s = new Stack(R);
	Bands * B = new Bands(R, s);

	int printTrace = 0;  // by default, don't print energy trace

	if (DEBUG)
	{
	printf("Seq: %s \n", R->CSequence);
	printf("Size: %d \n", R->Size);
    for (int i = 1; i <= R->Size; i++) {
		printf("%d ", R->Sequence[i]);
	}
	printf("\n-------------------------------\n Making the Loop Tree\n");
	}

	Loop * L = new Loop(0, MaxN+1, R, B, s);

	int a, b; //will store the borders of a closed regoin
	for (int i = 1; i <= R->Size; i++){
		if (R->BasePair(i)>= 0){
		  if (s->Add(i, a, b)){
		  //If a closed region is identifed add it to the tree by calling addLoop
		    L->addLoop(a,b);
		  };
		};
	};

	L->countNumberOfChildren();  // set number of children for the top loop

	if (DEBUG)
	{
		L->Print(-1);
		printf("-------------------------------\n");
	}

	if (DEBUG2)
	{
		for (int i = 1; i <= R->Size; i++){
			if (R->BasePair(i)>= 0){
				s->printPrevStack(i);
			}
		}
		printf("\n");

		if (L != NULL && L->RightChild != NULL)
			printf("L->NumberOfUnpairedInPseudo = %d\n", L->RightChild->NumberOfUnpairedInPseudo);
	}

	short* secstructure = new short[R->Size+1];
	char *csequence = new char[R->Size+2];
        csequence[R->Size+1] = '\0';
	for (int i = 0; i < R->Size+1; i++) {
		secstructure[i] = (short)(R->Sequence[i]);
		if (secstructure[i] == -1) secstructure[i] = 0;
	  	csequence[i] = R->CSequence[i];
		if (DEBUG)
		     	printf("%d %c %d \n", i, csequence[i], secstructure[i]);
	}

//	PlotRna(prefix, &sequence[1], &secstructure[1], outPSFile, L->Energy());

//	printf("Energy Model The total free energy is %f cal/mol\n", L->Energy());

	float totalEnergy = 0;

	if (DEBUG)
	{
	cout << setw(15) << left << "Energy Model" << setw(25) << left << "Free Energy (kcal/mol)" << setw(40) << left << "Free Energy without Dangling (kcal/mol)" << endl;
	printf("--------------------------------------------------------------\n");
	}

	// energy returned in kcal

	totalEnergy = -L->Energy(CC2006b)/1000;
	float totalEnergyDang = -L->EnergyDangling()/1000;

	if (DEBUG)
		cout << setw(15) << left << "Cao&Chen(b)" << setw(25) << left << totalEnergy + totalEnergyDang << setw(40) << left << totalEnergy << endl;

//	L->printEnergyTrace();

//	if (no_pk_dangling_ends == 0)
		return totalEnergy + totalEnergyDang;
//	else
//		return totalEnergy;
}
Пример #2
0
// USED FOR PARAMETER TUNING (DP energy model)
double get_feature_counts_quadratic_PK_DP (char *sequence, char *structure, double **quadratic_matrix, double *counter, double &free_value)
// sequence: sequence (input parameter)
// structure: secondary structure in dot-bracket format () and [] (input parameter)
// counter: array where counter[i] is the number of times the i-th feature occurs (output parameter)
// quadratic_matrix: TODO
// free_value: TODO
// Note: The counter, free_value, and quadratic_matrix are automatically reset before passed onto other functions
{
	// create an array of shorts which denotes the index of the base pairs:
	// the first base in the sequence has index 1
	// pairseq[0] = 0 always;
	// pairseq[1] = i where i is the index of the base paired with base 1, etc;
	// pairseq[j] = 0 if base j is unpaired;

	int size = strlen (structure);
	short pairseq[size+1];
	detect_original_PKed_pairs_many(structure, pairseq);

	// Hosna -- CAN DELETE THIS NEXT COMMENTED OUT PART - it was for my debugging
	//  It shows an example of how pairseq should look like
/*
	short* pairseq = new short[size+1];
	pairseq[0] = 0;

	pairseq[1] = 6;
	pairseq[5] = 10;
	pairseq[6] = 1;
	pairseq[10] = 5;
	pairseq[2] = 0;
	pairseq[3] = 0;
	pairseq[4] = 0;
	pairseq[7] = 0;
	pairseq[8] = 0;
	pairseq[9] = 0;
*/


	ReadInput* R = new ReadInput(size, sequence, pairseq);

//	printf("DEBUG = After readinput\n");

	Stack * s = new Stack(R);
	Bands * B = new Bands(R, s);

	int printTrace = 0;  // by default, don't print energy trace

	if (DEBUG)
	{
	printf("Seq: %s \n", R->CSequence);
	printf("Size: %d \n", R->Size);
    for (int i = 1; i <= R->Size; i++) {
		printf("%d ", R->Sequence[i]);
	}
	printf("\n-------------------------------\n Making the Loop Tree\n");
	}

	Loop * L = new Loop(0, MaxN+1, R, B, s);

	int a, b; //will store the borders of a closed regoin
	for (int i = 1; i <= R->Size; i++){
		if (R->BasePair(i)>= 0){
		  if (s->Add(i, a, b)){
		  //If a closed region is identifed add it to the tree by calling addLoop
		    L->addLoop(a,b);
		  };
		};
	};

	L->countNumberOfChildren();  // set number of children for the top loop

	if (DEBUG)
	{
	L->Print(-1);
	printf("-------------------------------\n");
	}

	if (DEBUG2)
	{
		for (int i = 1; i <= R->Size; i++){
			if (R->BasePair(i)>= 0){
				s->printPrevStack(i);
			}
		}
		printf("\n");

		if (L != NULL && L->RightChild != NULL)
			printf("L->NumberOfUnpairedInPseudo = %d\n", L->RightChild->NumberOfUnpairedInPseudo);
	}

	short* secstructure = new short[R->Size+1];
	char *csequence = new char[R->Size+2];
        csequence[R->Size+1] = '\0';
	for (int i = 0; i < R->Size+1; i++) {
		secstructure[i] = (short)(R->Sequence[i]);
		if (secstructure[i] == -1) secstructure[i] = 0;
	  	csequence[i] = R->CSequence[i];
		if (DEBUG)
		     	printf("%d %c %d \n", i, csequence[i], secstructure[i]);
	}

//	PlotRna(prefix, &sequence[1], &secstructure[1], outPSFile, L->Energy());

//	printf("Energy Model The total free energy is %f cal/mol\n", L->Energy());

	float totalEnergy = 0;

	if (DEBUG)
	{
		cout << setw(15) << left << "Energy Model" << setw(25) << left << "Free Energy (kcal/mol)" << setw(40) << left << "Free Energy without Dangling (kcal/mol)" << endl;
		printf("--------------------------------------------------------------\n");
	}

	// energy returned in kcal
	
	// PARAMETER TUNING
	// clear the counter, quadratic_matrix, and free_value

	int num_params = get_num_params_PK_DP();
        free_value = 0;
	for (int i=0; i < num_params; i++)
	{
		counter[i] = 0;
		for (int j=i; j < num_params; j++)
			quadratic_matrix[i][j] = 0;
	}

	int reset_c = 0;
	int ignore_dangles = no_pk_dangling_ends;
	int ignore_AU = 0;  // 0 = do include AU penalties

	totalEnergy = -L->Energy(DP, quadratic_matrix, counter, free_value, reset_c, ignore_dangles);
	float totalEnergyDang = -L->EnergyDangling(DP, quadratic_matrix,counter,free_value,reset_c,ignore_dangles,ignore_AU);

	// CHECK VALUES OF COUNTER, ETC
	//int num_params = get_num_params_PK_DP();
	//int num_params_pkfree = get_num_params();
	//printf("Free Value: %f\n", free_value);
	//printf("Counter Values:\n");
	//for (int i = num_params_pkfree; i < num_params; i++)
	//	printf("c[%d]=%f  ", i, counter[i]);

	//printf("Some P_matrix Values:\n");
	//for (int i = 0; i < num_params/10; i++)
	//	printf("P[%d][%d]=%f  ", num_params_pkfree + structure_type_index_PK("stp")-1, i, quadratic_matrix[i][num_params_pkfree + structure_type_index_PK("stp")-1]);


	if (DEBUG)
	{
	cout << setw(15) << left << "Dirks&Pierce" << setw(25) << left << totalEnergy + totalEnergyDang << setw(40) << left << totalEnergy << endl;

	printf("\n");
	printf("PARAMETER TUNING\n");
	printf("Ps Psm Psp Pb Pup Pps a b c stP intP a_p b_p c_p\n");
	cout << g_count_Ps << " " << g_count_Psm << " " << g_count_Psp << " " << g_count_Pb << " " << g_count_Pup << " " <<
			g_count_Pps << " " << g_count_a << " " << g_count_b << " " << g_count_c << " " << g_count_stP << " " <<
			g_count_intP << " " << g_count_a_p << " " << g_count_b_p << " " << g_count_c_p << endl;
	}

	// return the free energy:
	//    - deltaG = x' P x + c' x + f
	//        - where x is the vector of parameters
	//        - P is a symmetric matrix of the coefficients for each quadratic term
	//        - c is a vector of counts for each linear term
	//        - c' means c transposed
	//        - f is a constant

	return totalEnergy + totalEnergyDang;

	//int num_params = get_num_params_PK_DP();
	//double * energy_temp1 = new double[num_params];
	//double * energy_temp2 = new double[num_params];
	//double energy = free_value;

	//for (int i = 0; i < num_params; i++)
	//{
	//	energy_temp1[i] = counter[i] * params_all[i];
	//	for (int j = 0; j < num_params; j++)
	//	{
	//		if (i <= j)
	//			energy_temp2[i] += quadratic_matrix[i][j] * params_all[j];
	//		else  // since only the upper triangle of the matrix is filled out
	//			energy_temp2[i] += quadratic_matrix[j][i] * params_all[j];
	//	}
	//}
	//for (int i = 0; i < num_params; i++)
	//{
	//	energy += params_all[i]*energy_temp2[i] + energy_temp1[i];
	//}

	//return energy;
}
Пример #3
0
// USED FOR TESTING
void get_feature_counts (char *sequence, char *structure, double *counter)
// sequence: sequence (input parameter)
// structure: secondary structure in dot-bracket format () and [] (input parameter)
// counter: array where counter[i] is the number of times the i-th feature occurs (output parameter)
{
	// create an array of shorts which denotes the index of the base pairs:
	// the first base in the sequence has index 1
	// pairseq[0] = 0 always;
	// pairseq[1] = i where i is the index of the base paired with base 1, etc;
	// pairseq[j] = 0 if base j is unpaired;

	int size = strlen (structure);
	short pairseq[size+1];
	detect_original_PKed_pairs_many(structure, pairseq);


	// Hosna -- CAN DELETE THIS NEXT COMMENTED OUT PART - it was for my debugging
	//  It shows an example of how pairseq should look like
/*
	short* pairseq = new short[size+1];
	pairseq[0] = 0;

	pairseq[1] = 6;
	pairseq[5] = 10;
	pairseq[6] = 1;
	pairseq[10] = 5;
	pairseq[2] = 0;
	pairseq[3] = 0;
	pairseq[4] = 0;
	pairseq[7] = 0;
	pairseq[8] = 0;
	pairseq[9] = 0;
*/


	ReadInput* R = new ReadInput(size, sequence, pairseq);

	printf("DEBUG = After readinput\n");

	Stack * s = new Stack(R);
	Bands * B = new Bands(R, s);

	int printTrace = 0;  // by default, don't print energy trace

	printf("Seq: %s \n", R->CSequence);
	printf("Size: %d \n", R->Size);
	for (int i = 1; i <= R->Size; i++) {
		printf("%d ", R->Sequence[i]);
	}
	printf("\n-------------------------------\n Making the Loop Tree\n");
	Loop * L = new Loop(0, MaxN+1, R, B, s);

	int a, b; //will store the borders of a closed regoin
	for (int i = 1; i <= R->Size; i++){
		if (R->BasePair(i)>= 0){
		  if (s->Add(i, a, b)){
		  //If a closed region is identifed add it to the tree by calling addLoop
		    L->addLoop(a,b);
		  };
		};
	};

	L->Print(-1);
	printf("-------------------------------\n");

	if (DEBUG2)
	{
		for (int i = 1; i <= R->Size; i++){
			if (R->BasePair(i)>= 0){
				s->printPrevStack(i);
			}
		}
		printf("\n");

		if (L != NULL && L->RightChild != NULL)
		printf("L->NumberOfUnpairedInPseudo = %d\n", L->RightChild->NumberOfUnpairedInPseudo);
	}

	short* secstructure = new short[R->Size+1];
	char *csequence = new char[R->Size+2];
        csequence[R->Size+1] = '\0';
	for (int i = 0; i < R->Size+1; i++) {
		secstructure[i] = (short)(R->Sequence[i]);
		if (secstructure[i] == -1) secstructure[i] = 0;
	  	csequence[i] = R->CSequence[i];
     	printf("%d %c %d \n", i, csequence[i], secstructure[i]);
	}

//	PlotRna(prefix, &sequence[1], &secstructure[1], outPSFile, L->Energy());

//	printf("Energy Model The total free energy is %f cal/mol\n", L->Energy());

	float totalEnergy = 0;

	cout << setw(15) << left << "Energy Model" << setw(25) << left << "Free Energy (kcal/mol)" << setw(40) << left << "Free Energy without Dangling (kcal/mol)" << endl;
	printf("--------------------------------------------------------------\n");
	if (RE_FLAG)
	{
		totalEnergy = -L->Energy(RE);
		if (no_pk_dangling_ends == 0)
			cout << setw(15) << left << "Rivas&Eddy" << setw(25) << left << (totalEnergy - L->EnergyDangling())/1000 << setw(40) << left << totalEnergy/1000 << endl;
		else
			cout << setw(15) << left << "Rivas&Eddy" << setw(25) << left << totalEnergy/1000 << setw(40) << left << totalEnergy/1000 << endl;

		if (printTrace)
			L->printEnergyTrace();
		cout << endl;
	}
	if (DP_FLAG)  // energy returned in kcal
	{
//		double *c = new double[R->Size];
		double ** quadratic_matrix;
		double f = 0;
		int reset_c = 0;
		int ignore_dangles = no_pk_dangling_ends;
		int ignore_AU = 0;  // 0 = do include AU penalties

		if (DEBUG)
			printf("Before call to Energy for DP model\n");

		totalEnergy = -L->Energy(DP, quadratic_matrix, counter, f, reset_c, ignore_dangles);

		if (DEBUG)		
			printf("After call to Energy for DP model\n");

		cout << setw(15) << left << "Dirks&Pierce" << setw(25) << left << totalEnergy - L->EnergyDangling(DP, quadratic_matrix,counter,f,reset_c,ignore_dangles,ignore_AU) << setw(40) << left << totalEnergy << endl;

		if (printTrace)
			L->printEnergyTrace();
	}

	printf("\n");
	printf("PARAMETER TUNING\n");
	printf("Ps Psm Psp Pb Pup Pps a b c stP intP a_p b_p c_p\n");
	cout << g_count_Ps << " " << g_count_Psm << " " << g_count_Psp << " " << g_count_Pb << " " << g_count_Pup << " " <<
			g_count_Pps << " " << g_count_a << " " << g_count_b << " " << g_count_c << " " << g_count_stP << " " <<
			g_count_intP << " " << g_count_a_p << " " << g_count_b_p << " " << g_count_c_p << endl;


// 	counter[0] = g_count_Ps;
// 	counter[1] = g_count_Psm;
// 	counter[2] = g_count_Psp;
// 	counter[3] = g_count_Pb;
// 	counter[4] = g_count_Pup;
// 	counter[5] = g_count_Pps;
// 	counter[6] = g_count_a;
// 	counter[7] = g_count_b;
// 	counter[8] = g_count_c;
// 	counter[9] = g_count_stP;
// 	counter[10] = g_count_intP;
// 	counter[11] = g_count_a_p;
// 	counter[12] = g_count_b_p;
// 	counter[13] = g_count_c_p;

}
Пример #4
0
/******************************************************************
main: taking input files, call appropriate functions for: storing
essential information in the structures which will be used by the 
program, identifying closed regions, adding the closed regions to 
the tree, computing the free energy of the secondary structure
and drawing the plots.
*******************************************************************/
int main(int argc, char ** argv){

	mirella_init();	

	int mode = 1;

        char fileSeq[100], fileStruct[100];
	char prefix[100];
	FILE *input_file;

	ReadInput * R;
	
	if (argc < 2){	  
	  usage();
          return 0;
        }
       	
       	strcpy(fileStruct, argv[1]);
        char outPSFile[100] = "ArcDiagram.ps";
     	        		     
        if ((input_file = fopen(fileStruct, "r")) == NULL) {
           fprintf(stderr, "Cannot open %s\n", fileStruct);           
           usage();
	   return 0;
	}
        fclose(input_file);
	R = new ReadInput(fileStruct);
	
	Stack * s = new Stack(R);
	Bands * B = new Bands(R, s);

	printf("Seq: %s \n", R->CSequence);
	printf("size: %d \n", R->Size);
        for (int i = 1; i <= R->Size; i++) {
	  printf("%d ", R->Sequence[i]);
	}
	printf("-------------------------------\n Making the Loop Tree\n");
	Loop * L = new Loop(0, MaxN+1, R, B, s);

	int a, b; //will store the borders of a closed regoin
	for (int i = 1; i <= R->Size; i++){
		if (R->BasePair(i)>= 0){
		  if (s->Add(i, a, b)){
		  //If a closed region is identifed add it to the tree by calling addLoop
		    L->addLoop(a,b);
		  };
		};
	};

	L->Print(-1);
	printf("-------------------------------\n ");


	
	short* secstructure = new short[R->Size+1];
	char *sequence = new char[R->Size+2];
        sequence[R->Size+1] = '\0';
	for(int i = 0; i < R->Size+1; i++) {
	  secstructure[i] = (short)(R->Sequence[i]);
	  if (secstructure[i] == -1) secstructure[i] = 0;
	  sequence[i] = R->CSequence[i];
          printf("%d %c %d \n", i, sequence[i], secstructure[i]); 
	}
	PlotRna(prefix, &sequence[1], &secstructure[1], outPSFile, L->Energy());
	printf("The total free energy is %f\n", -L->Energy()/1000.0);
	printf("Arc Diagram of the given structure is now in ArcDiagram.ps\n");
}