// USED FOR PARAMETER TUNING (CC2006b energy model) // returns the free energy (in kcal/mol) of sequence folded into structure double free_energy_PK_CC2006b(char *sequence, char *structure) // sequence: sequence (input parameter) // structure: secondary structure in dot-bracket format () and [] (input parameter) { // create an array of shorts which denotes the index of the base pairs: // the first base in the sequence has index 1 // pairseq[0] = 0 always; // pairseq[1] = i where i is the index of the base paired with base 1, etc; // pairseq[j] = 0 if base j is unpaired; int size = strlen (structure); short pairseq[size+1]; detect_original_PKed_pairs_many(structure, pairseq); ReadInput* R = new ReadInput(size, sequence, pairseq); // printf("DEBUG = After readinput\n"); Stack * s = new Stack(R); Bands * B = new Bands(R, s); int printTrace = 0; // by default, don't print energy trace if (DEBUG) { printf("Seq: %s \n", R->CSequence); printf("Size: %d \n", R->Size); for (int i = 1; i <= R->Size; i++) { printf("%d ", R->Sequence[i]); } printf("\n-------------------------------\n Making the Loop Tree\n"); } Loop * L = new Loop(0, MaxN+1, R, B, s); int a, b; //will store the borders of a closed regoin for (int i = 1; i <= R->Size; i++){ if (R->BasePair(i)>= 0){ if (s->Add(i, a, b)){ //If a closed region is identifed add it to the tree by calling addLoop L->addLoop(a,b); }; }; }; L->countNumberOfChildren(); // set number of children for the top loop if (DEBUG) { L->Print(-1); printf("-------------------------------\n"); } if (DEBUG2) { for (int i = 1; i <= R->Size; i++){ if (R->BasePair(i)>= 0){ s->printPrevStack(i); } } printf("\n"); if (L != NULL && L->RightChild != NULL) printf("L->NumberOfUnpairedInPseudo = %d\n", L->RightChild->NumberOfUnpairedInPseudo); } short* secstructure = new short[R->Size+1]; char *csequence = new char[R->Size+2]; csequence[R->Size+1] = '\0'; for (int i = 0; i < R->Size+1; i++) { secstructure[i] = (short)(R->Sequence[i]); if (secstructure[i] == -1) secstructure[i] = 0; csequence[i] = R->CSequence[i]; if (DEBUG) printf("%d %c %d \n", i, csequence[i], secstructure[i]); } // PlotRna(prefix, &sequence[1], &secstructure[1], outPSFile, L->Energy()); // printf("Energy Model The total free energy is %f cal/mol\n", L->Energy()); float totalEnergy = 0; if (DEBUG) { cout << setw(15) << left << "Energy Model" << setw(25) << left << "Free Energy (kcal/mol)" << setw(40) << left << "Free Energy without Dangling (kcal/mol)" << endl; printf("--------------------------------------------------------------\n"); } // energy returned in kcal totalEnergy = -L->Energy(CC2006b)/1000; float totalEnergyDang = -L->EnergyDangling()/1000; if (DEBUG) cout << setw(15) << left << "Cao&Chen(b)" << setw(25) << left << totalEnergy + totalEnergyDang << setw(40) << left << totalEnergy << endl; // L->printEnergyTrace(); // if (no_pk_dangling_ends == 0) return totalEnergy + totalEnergyDang; // else // return totalEnergy; }
// USED FOR PARAMETER TUNING (DP energy model) double get_feature_counts_quadratic_PK_DP (char *sequence, char *structure, double **quadratic_matrix, double *counter, double &free_value) // sequence: sequence (input parameter) // structure: secondary structure in dot-bracket format () and [] (input parameter) // counter: array where counter[i] is the number of times the i-th feature occurs (output parameter) // quadratic_matrix: TODO // free_value: TODO // Note: The counter, free_value, and quadratic_matrix are automatically reset before passed onto other functions { // create an array of shorts which denotes the index of the base pairs: // the first base in the sequence has index 1 // pairseq[0] = 0 always; // pairseq[1] = i where i is the index of the base paired with base 1, etc; // pairseq[j] = 0 if base j is unpaired; int size = strlen (structure); short pairseq[size+1]; detect_original_PKed_pairs_many(structure, pairseq); // Hosna -- CAN DELETE THIS NEXT COMMENTED OUT PART - it was for my debugging // It shows an example of how pairseq should look like /* short* pairseq = new short[size+1]; pairseq[0] = 0; pairseq[1] = 6; pairseq[5] = 10; pairseq[6] = 1; pairseq[10] = 5; pairseq[2] = 0; pairseq[3] = 0; pairseq[4] = 0; pairseq[7] = 0; pairseq[8] = 0; pairseq[9] = 0; */ ReadInput* R = new ReadInput(size, sequence, pairseq); // printf("DEBUG = After readinput\n"); Stack * s = new Stack(R); Bands * B = new Bands(R, s); int printTrace = 0; // by default, don't print energy trace if (DEBUG) { printf("Seq: %s \n", R->CSequence); printf("Size: %d \n", R->Size); for (int i = 1; i <= R->Size; i++) { printf("%d ", R->Sequence[i]); } printf("\n-------------------------------\n Making the Loop Tree\n"); } Loop * L = new Loop(0, MaxN+1, R, B, s); int a, b; //will store the borders of a closed regoin for (int i = 1; i <= R->Size; i++){ if (R->BasePair(i)>= 0){ if (s->Add(i, a, b)){ //If a closed region is identifed add it to the tree by calling addLoop L->addLoop(a,b); }; }; }; L->countNumberOfChildren(); // set number of children for the top loop if (DEBUG) { L->Print(-1); printf("-------------------------------\n"); } if (DEBUG2) { for (int i = 1; i <= R->Size; i++){ if (R->BasePair(i)>= 0){ s->printPrevStack(i); } } printf("\n"); if (L != NULL && L->RightChild != NULL) printf("L->NumberOfUnpairedInPseudo = %d\n", L->RightChild->NumberOfUnpairedInPseudo); } short* secstructure = new short[R->Size+1]; char *csequence = new char[R->Size+2]; csequence[R->Size+1] = '\0'; for (int i = 0; i < R->Size+1; i++) { secstructure[i] = (short)(R->Sequence[i]); if (secstructure[i] == -1) secstructure[i] = 0; csequence[i] = R->CSequence[i]; if (DEBUG) printf("%d %c %d \n", i, csequence[i], secstructure[i]); } // PlotRna(prefix, &sequence[1], &secstructure[1], outPSFile, L->Energy()); // printf("Energy Model The total free energy is %f cal/mol\n", L->Energy()); float totalEnergy = 0; if (DEBUG) { cout << setw(15) << left << "Energy Model" << setw(25) << left << "Free Energy (kcal/mol)" << setw(40) << left << "Free Energy without Dangling (kcal/mol)" << endl; printf("--------------------------------------------------------------\n"); } // energy returned in kcal // PARAMETER TUNING // clear the counter, quadratic_matrix, and free_value int num_params = get_num_params_PK_DP(); free_value = 0; for (int i=0; i < num_params; i++) { counter[i] = 0; for (int j=i; j < num_params; j++) quadratic_matrix[i][j] = 0; } int reset_c = 0; int ignore_dangles = no_pk_dangling_ends; int ignore_AU = 0; // 0 = do include AU penalties totalEnergy = -L->Energy(DP, quadratic_matrix, counter, free_value, reset_c, ignore_dangles); float totalEnergyDang = -L->EnergyDangling(DP, quadratic_matrix,counter,free_value,reset_c,ignore_dangles,ignore_AU); // CHECK VALUES OF COUNTER, ETC //int num_params = get_num_params_PK_DP(); //int num_params_pkfree = get_num_params(); //printf("Free Value: %f\n", free_value); //printf("Counter Values:\n"); //for (int i = num_params_pkfree; i < num_params; i++) // printf("c[%d]=%f ", i, counter[i]); //printf("Some P_matrix Values:\n"); //for (int i = 0; i < num_params/10; i++) // printf("P[%d][%d]=%f ", num_params_pkfree + structure_type_index_PK("stp")-1, i, quadratic_matrix[i][num_params_pkfree + structure_type_index_PK("stp")-1]); if (DEBUG) { cout << setw(15) << left << "Dirks&Pierce" << setw(25) << left << totalEnergy + totalEnergyDang << setw(40) << left << totalEnergy << endl; printf("\n"); printf("PARAMETER TUNING\n"); printf("Ps Psm Psp Pb Pup Pps a b c stP intP a_p b_p c_p\n"); cout << g_count_Ps << " " << g_count_Psm << " " << g_count_Psp << " " << g_count_Pb << " " << g_count_Pup << " " << g_count_Pps << " " << g_count_a << " " << g_count_b << " " << g_count_c << " " << g_count_stP << " " << g_count_intP << " " << g_count_a_p << " " << g_count_b_p << " " << g_count_c_p << endl; } // return the free energy: // - deltaG = x' P x + c' x + f // - where x is the vector of parameters // - P is a symmetric matrix of the coefficients for each quadratic term // - c is a vector of counts for each linear term // - c' means c transposed // - f is a constant return totalEnergy + totalEnergyDang; //int num_params = get_num_params_PK_DP(); //double * energy_temp1 = new double[num_params]; //double * energy_temp2 = new double[num_params]; //double energy = free_value; //for (int i = 0; i < num_params; i++) //{ // energy_temp1[i] = counter[i] * params_all[i]; // for (int j = 0; j < num_params; j++) // { // if (i <= j) // energy_temp2[i] += quadratic_matrix[i][j] * params_all[j]; // else // since only the upper triangle of the matrix is filled out // energy_temp2[i] += quadratic_matrix[j][i] * params_all[j]; // } //} //for (int i = 0; i < num_params; i++) //{ // energy += params_all[i]*energy_temp2[i] + energy_temp1[i]; //} //return energy; }
// USED FOR TESTING void get_feature_counts (char *sequence, char *structure, double *counter) // sequence: sequence (input parameter) // structure: secondary structure in dot-bracket format () and [] (input parameter) // counter: array where counter[i] is the number of times the i-th feature occurs (output parameter) { // create an array of shorts which denotes the index of the base pairs: // the first base in the sequence has index 1 // pairseq[0] = 0 always; // pairseq[1] = i where i is the index of the base paired with base 1, etc; // pairseq[j] = 0 if base j is unpaired; int size = strlen (structure); short pairseq[size+1]; detect_original_PKed_pairs_many(structure, pairseq); // Hosna -- CAN DELETE THIS NEXT COMMENTED OUT PART - it was for my debugging // It shows an example of how pairseq should look like /* short* pairseq = new short[size+1]; pairseq[0] = 0; pairseq[1] = 6; pairseq[5] = 10; pairseq[6] = 1; pairseq[10] = 5; pairseq[2] = 0; pairseq[3] = 0; pairseq[4] = 0; pairseq[7] = 0; pairseq[8] = 0; pairseq[9] = 0; */ ReadInput* R = new ReadInput(size, sequence, pairseq); printf("DEBUG = After readinput\n"); Stack * s = new Stack(R); Bands * B = new Bands(R, s); int printTrace = 0; // by default, don't print energy trace printf("Seq: %s \n", R->CSequence); printf("Size: %d \n", R->Size); for (int i = 1; i <= R->Size; i++) { printf("%d ", R->Sequence[i]); } printf("\n-------------------------------\n Making the Loop Tree\n"); Loop * L = new Loop(0, MaxN+1, R, B, s); int a, b; //will store the borders of a closed regoin for (int i = 1; i <= R->Size; i++){ if (R->BasePair(i)>= 0){ if (s->Add(i, a, b)){ //If a closed region is identifed add it to the tree by calling addLoop L->addLoop(a,b); }; }; }; L->Print(-1); printf("-------------------------------\n"); if (DEBUG2) { for (int i = 1; i <= R->Size; i++){ if (R->BasePair(i)>= 0){ s->printPrevStack(i); } } printf("\n"); if (L != NULL && L->RightChild != NULL) printf("L->NumberOfUnpairedInPseudo = %d\n", L->RightChild->NumberOfUnpairedInPseudo); } short* secstructure = new short[R->Size+1]; char *csequence = new char[R->Size+2]; csequence[R->Size+1] = '\0'; for (int i = 0; i < R->Size+1; i++) { secstructure[i] = (short)(R->Sequence[i]); if (secstructure[i] == -1) secstructure[i] = 0; csequence[i] = R->CSequence[i]; printf("%d %c %d \n", i, csequence[i], secstructure[i]); } // PlotRna(prefix, &sequence[1], &secstructure[1], outPSFile, L->Energy()); // printf("Energy Model The total free energy is %f cal/mol\n", L->Energy()); float totalEnergy = 0; cout << setw(15) << left << "Energy Model" << setw(25) << left << "Free Energy (kcal/mol)" << setw(40) << left << "Free Energy without Dangling (kcal/mol)" << endl; printf("--------------------------------------------------------------\n"); if (RE_FLAG) { totalEnergy = -L->Energy(RE); if (no_pk_dangling_ends == 0) cout << setw(15) << left << "Rivas&Eddy" << setw(25) << left << (totalEnergy - L->EnergyDangling())/1000 << setw(40) << left << totalEnergy/1000 << endl; else cout << setw(15) << left << "Rivas&Eddy" << setw(25) << left << totalEnergy/1000 << setw(40) << left << totalEnergy/1000 << endl; if (printTrace) L->printEnergyTrace(); cout << endl; } if (DP_FLAG) // energy returned in kcal { // double *c = new double[R->Size]; double ** quadratic_matrix; double f = 0; int reset_c = 0; int ignore_dangles = no_pk_dangling_ends; int ignore_AU = 0; // 0 = do include AU penalties if (DEBUG) printf("Before call to Energy for DP model\n"); totalEnergy = -L->Energy(DP, quadratic_matrix, counter, f, reset_c, ignore_dangles); if (DEBUG) printf("After call to Energy for DP model\n"); cout << setw(15) << left << "Dirks&Pierce" << setw(25) << left << totalEnergy - L->EnergyDangling(DP, quadratic_matrix,counter,f,reset_c,ignore_dangles,ignore_AU) << setw(40) << left << totalEnergy << endl; if (printTrace) L->printEnergyTrace(); } printf("\n"); printf("PARAMETER TUNING\n"); printf("Ps Psm Psp Pb Pup Pps a b c stP intP a_p b_p c_p\n"); cout << g_count_Ps << " " << g_count_Psm << " " << g_count_Psp << " " << g_count_Pb << " " << g_count_Pup << " " << g_count_Pps << " " << g_count_a << " " << g_count_b << " " << g_count_c << " " << g_count_stP << " " << g_count_intP << " " << g_count_a_p << " " << g_count_b_p << " " << g_count_c_p << endl; // counter[0] = g_count_Ps; // counter[1] = g_count_Psm; // counter[2] = g_count_Psp; // counter[3] = g_count_Pb; // counter[4] = g_count_Pup; // counter[5] = g_count_Pps; // counter[6] = g_count_a; // counter[7] = g_count_b; // counter[8] = g_count_c; // counter[9] = g_count_stP; // counter[10] = g_count_intP; // counter[11] = g_count_a_p; // counter[12] = g_count_b_p; // counter[13] = g_count_c_p; }
/****************************************************************** main: taking input files, call appropriate functions for: storing essential information in the structures which will be used by the program, identifying closed regions, adding the closed regions to the tree, computing the free energy of the secondary structure and drawing the plots. *******************************************************************/ int main(int argc, char ** argv){ mirella_init(); int mode = 1; char fileSeq[100], fileStruct[100]; char prefix[100]; FILE *input_file; ReadInput * R; if (argc < 2){ usage(); return 0; } strcpy(fileStruct, argv[1]); char outPSFile[100] = "ArcDiagram.ps"; if ((input_file = fopen(fileStruct, "r")) == NULL) { fprintf(stderr, "Cannot open %s\n", fileStruct); usage(); return 0; } fclose(input_file); R = new ReadInput(fileStruct); Stack * s = new Stack(R); Bands * B = new Bands(R, s); printf("Seq: %s \n", R->CSequence); printf("size: %d \n", R->Size); for (int i = 1; i <= R->Size; i++) { printf("%d ", R->Sequence[i]); } printf("-------------------------------\n Making the Loop Tree\n"); Loop * L = new Loop(0, MaxN+1, R, B, s); int a, b; //will store the borders of a closed regoin for (int i = 1; i <= R->Size; i++){ if (R->BasePair(i)>= 0){ if (s->Add(i, a, b)){ //If a closed region is identifed add it to the tree by calling addLoop L->addLoop(a,b); }; }; }; L->Print(-1); printf("-------------------------------\n "); short* secstructure = new short[R->Size+1]; char *sequence = new char[R->Size+2]; sequence[R->Size+1] = '\0'; for(int i = 0; i < R->Size+1; i++) { secstructure[i] = (short)(R->Sequence[i]); if (secstructure[i] == -1) secstructure[i] = 0; sequence[i] = R->CSequence[i]; printf("%d %c %d \n", i, sequence[i], secstructure[i]); } PlotRna(prefix, &sequence[1], &secstructure[1], outPSFile, L->Energy()); printf("The total free energy is %f\n", -L->Energy()/1000.0); printf("Arc Diagram of the given structure is now in ArcDiagram.ps\n"); }