// USED FOR TESTING void get_feature_counts (char *sequence, char *structure, double *counter) // sequence: sequence (input parameter) // structure: secondary structure in dot-bracket format () and [] (input parameter) // counter: array where counter[i] is the number of times the i-th feature occurs (output parameter) { // create an array of shorts which denotes the index of the base pairs: // the first base in the sequence has index 1 // pairseq[0] = 0 always; // pairseq[1] = i where i is the index of the base paired with base 1, etc; // pairseq[j] = 0 if base j is unpaired; int size = strlen (structure); short pairseq[size+1]; detect_original_PKed_pairs_many(structure, pairseq); // Hosna -- CAN DELETE THIS NEXT COMMENTED OUT PART - it was for my debugging // It shows an example of how pairseq should look like /* short* pairseq = new short[size+1]; pairseq[0] = 0; pairseq[1] = 6; pairseq[5] = 10; pairseq[6] = 1; pairseq[10] = 5; pairseq[2] = 0; pairseq[3] = 0; pairseq[4] = 0; pairseq[7] = 0; pairseq[8] = 0; pairseq[9] = 0; */ ReadInput* R = new ReadInput(size, sequence, pairseq); printf("DEBUG = After readinput\n"); Stack * s = new Stack(R); Bands * B = new Bands(R, s); int printTrace = 0; // by default, don't print energy trace printf("Seq: %s \n", R->CSequence); printf("Size: %d \n", R->Size); for (int i = 1; i <= R->Size; i++) { printf("%d ", R->Sequence[i]); } printf("\n-------------------------------\n Making the Loop Tree\n"); Loop * L = new Loop(0, MaxN+1, R, B, s); int a, b; //will store the borders of a closed regoin for (int i = 1; i <= R->Size; i++){ if (R->BasePair(i)>= 0){ if (s->Add(i, a, b)){ //If a closed region is identifed add it to the tree by calling addLoop L->addLoop(a,b); }; }; }; L->Print(-1); printf("-------------------------------\n"); if (DEBUG2) { for (int i = 1; i <= R->Size; i++){ if (R->BasePair(i)>= 0){ s->printPrevStack(i); } } printf("\n"); if (L != NULL && L->RightChild != NULL) printf("L->NumberOfUnpairedInPseudo = %d\n", L->RightChild->NumberOfUnpairedInPseudo); } short* secstructure = new short[R->Size+1]; char *csequence = new char[R->Size+2]; csequence[R->Size+1] = '\0'; for (int i = 0; i < R->Size+1; i++) { secstructure[i] = (short)(R->Sequence[i]); if (secstructure[i] == -1) secstructure[i] = 0; csequence[i] = R->CSequence[i]; printf("%d %c %d \n", i, csequence[i], secstructure[i]); } // PlotRna(prefix, &sequence[1], &secstructure[1], outPSFile, L->Energy()); // printf("Energy Model The total free energy is %f cal/mol\n", L->Energy()); float totalEnergy = 0; cout << setw(15) << left << "Energy Model" << setw(25) << left << "Free Energy (kcal/mol)" << setw(40) << left << "Free Energy without Dangling (kcal/mol)" << endl; printf("--------------------------------------------------------------\n"); if (RE_FLAG) { totalEnergy = -L->Energy(RE); if (no_pk_dangling_ends == 0) cout << setw(15) << left << "Rivas&Eddy" << setw(25) << left << (totalEnergy - L->EnergyDangling())/1000 << setw(40) << left << totalEnergy/1000 << endl; else cout << setw(15) << left << "Rivas&Eddy" << setw(25) << left << totalEnergy/1000 << setw(40) << left << totalEnergy/1000 << endl; if (printTrace) L->printEnergyTrace(); cout << endl; } if (DP_FLAG) // energy returned in kcal { // double *c = new double[R->Size]; double ** quadratic_matrix; double f = 0; int reset_c = 0; int ignore_dangles = no_pk_dangling_ends; int ignore_AU = 0; // 0 = do include AU penalties if (DEBUG) printf("Before call to Energy for DP model\n"); totalEnergy = -L->Energy(DP, quadratic_matrix, counter, f, reset_c, ignore_dangles); if (DEBUG) printf("After call to Energy for DP model\n"); cout << setw(15) << left << "Dirks&Pierce" << setw(25) << left << totalEnergy - L->EnergyDangling(DP, quadratic_matrix,counter,f,reset_c,ignore_dangles,ignore_AU) << setw(40) << left << totalEnergy << endl; if (printTrace) L->printEnergyTrace(); } printf("\n"); printf("PARAMETER TUNING\n"); printf("Ps Psm Psp Pb Pup Pps a b c stP intP a_p b_p c_p\n"); cout << g_count_Ps << " " << g_count_Psm << " " << g_count_Psp << " " << g_count_Pb << " " << g_count_Pup << " " << g_count_Pps << " " << g_count_a << " " << g_count_b << " " << g_count_c << " " << g_count_stP << " " << g_count_intP << " " << g_count_a_p << " " << g_count_b_p << " " << g_count_c_p << endl; // counter[0] = g_count_Ps; // counter[1] = g_count_Psm; // counter[2] = g_count_Psp; // counter[3] = g_count_Pb; // counter[4] = g_count_Pup; // counter[5] = g_count_Pps; // counter[6] = g_count_a; // counter[7] = g_count_b; // counter[8] = g_count_c; // counter[9] = g_count_stP; // counter[10] = g_count_intP; // counter[11] = g_count_a_p; // counter[12] = g_count_b_p; // counter[13] = g_count_c_p; }
// USED FOR PARAMETER TUNING (CC2006b energy model) // returns the free energy (in kcal/mol) of sequence folded into structure double free_energy_PK_CC2006b(char *sequence, char *structure) // sequence: sequence (input parameter) // structure: secondary structure in dot-bracket format () and [] (input parameter) { // create an array of shorts which denotes the index of the base pairs: // the first base in the sequence has index 1 // pairseq[0] = 0 always; // pairseq[1] = i where i is the index of the base paired with base 1, etc; // pairseq[j] = 0 if base j is unpaired; int size = strlen (structure); short pairseq[size+1]; detect_original_PKed_pairs_many(structure, pairseq); ReadInput* R = new ReadInput(size, sequence, pairseq); // printf("DEBUG = After readinput\n"); Stack * s = new Stack(R); Bands * B = new Bands(R, s); int printTrace = 0; // by default, don't print energy trace if (DEBUG) { printf("Seq: %s \n", R->CSequence); printf("Size: %d \n", R->Size); for (int i = 1; i <= R->Size; i++) { printf("%d ", R->Sequence[i]); } printf("\n-------------------------------\n Making the Loop Tree\n"); } Loop * L = new Loop(0, MaxN+1, R, B, s); int a, b; //will store the borders of a closed regoin for (int i = 1; i <= R->Size; i++){ if (R->BasePair(i)>= 0){ if (s->Add(i, a, b)){ //If a closed region is identifed add it to the tree by calling addLoop L->addLoop(a,b); }; }; }; L->countNumberOfChildren(); // set number of children for the top loop if (DEBUG) { L->Print(-1); printf("-------------------------------\n"); } if (DEBUG2) { for (int i = 1; i <= R->Size; i++){ if (R->BasePair(i)>= 0){ s->printPrevStack(i); } } printf("\n"); if (L != NULL && L->RightChild != NULL) printf("L->NumberOfUnpairedInPseudo = %d\n", L->RightChild->NumberOfUnpairedInPseudo); } short* secstructure = new short[R->Size+1]; char *csequence = new char[R->Size+2]; csequence[R->Size+1] = '\0'; for (int i = 0; i < R->Size+1; i++) { secstructure[i] = (short)(R->Sequence[i]); if (secstructure[i] == -1) secstructure[i] = 0; csequence[i] = R->CSequence[i]; if (DEBUG) printf("%d %c %d \n", i, csequence[i], secstructure[i]); } // PlotRna(prefix, &sequence[1], &secstructure[1], outPSFile, L->Energy()); // printf("Energy Model The total free energy is %f cal/mol\n", L->Energy()); float totalEnergy = 0; if (DEBUG) { cout << setw(15) << left << "Energy Model" << setw(25) << left << "Free Energy (kcal/mol)" << setw(40) << left << "Free Energy without Dangling (kcal/mol)" << endl; printf("--------------------------------------------------------------\n"); } // energy returned in kcal totalEnergy = -L->Energy(CC2006b)/1000; float totalEnergyDang = -L->EnergyDangling()/1000; if (DEBUG) cout << setw(15) << left << "Cao&Chen(b)" << setw(25) << left << totalEnergy + totalEnergyDang << setw(40) << left << totalEnergy << endl; // L->printEnergyTrace(); // if (no_pk_dangling_ends == 0) return totalEnergy + totalEnergyDang; // else // return totalEnergy; }
// USED FOR PARAMETER TUNING (DP energy model) double get_feature_counts_quadratic_PK_DP (char *sequence, char *structure, double **quadratic_matrix, double *counter, double &free_value) // sequence: sequence (input parameter) // structure: secondary structure in dot-bracket format () and [] (input parameter) // counter: array where counter[i] is the number of times the i-th feature occurs (output parameter) // quadratic_matrix: TODO // free_value: TODO // Note: The counter, free_value, and quadratic_matrix are automatically reset before passed onto other functions { // create an array of shorts which denotes the index of the base pairs: // the first base in the sequence has index 1 // pairseq[0] = 0 always; // pairseq[1] = i where i is the index of the base paired with base 1, etc; // pairseq[j] = 0 if base j is unpaired; int size = strlen (structure); short pairseq[size+1]; detect_original_PKed_pairs_many(structure, pairseq); // Hosna -- CAN DELETE THIS NEXT COMMENTED OUT PART - it was for my debugging // It shows an example of how pairseq should look like /* short* pairseq = new short[size+1]; pairseq[0] = 0; pairseq[1] = 6; pairseq[5] = 10; pairseq[6] = 1; pairseq[10] = 5; pairseq[2] = 0; pairseq[3] = 0; pairseq[4] = 0; pairseq[7] = 0; pairseq[8] = 0; pairseq[9] = 0; */ ReadInput* R = new ReadInput(size, sequence, pairseq); // printf("DEBUG = After readinput\n"); Stack * s = new Stack(R); Bands * B = new Bands(R, s); int printTrace = 0; // by default, don't print energy trace if (DEBUG) { printf("Seq: %s \n", R->CSequence); printf("Size: %d \n", R->Size); for (int i = 1; i <= R->Size; i++) { printf("%d ", R->Sequence[i]); } printf("\n-------------------------------\n Making the Loop Tree\n"); } Loop * L = new Loop(0, MaxN+1, R, B, s); int a, b; //will store the borders of a closed regoin for (int i = 1; i <= R->Size; i++){ if (R->BasePair(i)>= 0){ if (s->Add(i, a, b)){ //If a closed region is identifed add it to the tree by calling addLoop L->addLoop(a,b); }; }; }; L->countNumberOfChildren(); // set number of children for the top loop if (DEBUG) { L->Print(-1); printf("-------------------------------\n"); } if (DEBUG2) { for (int i = 1; i <= R->Size; i++){ if (R->BasePair(i)>= 0){ s->printPrevStack(i); } } printf("\n"); if (L != NULL && L->RightChild != NULL) printf("L->NumberOfUnpairedInPseudo = %d\n", L->RightChild->NumberOfUnpairedInPseudo); } short* secstructure = new short[R->Size+1]; char *csequence = new char[R->Size+2]; csequence[R->Size+1] = '\0'; for (int i = 0; i < R->Size+1; i++) { secstructure[i] = (short)(R->Sequence[i]); if (secstructure[i] == -1) secstructure[i] = 0; csequence[i] = R->CSequence[i]; if (DEBUG) printf("%d %c %d \n", i, csequence[i], secstructure[i]); } // PlotRna(prefix, &sequence[1], &secstructure[1], outPSFile, L->Energy()); // printf("Energy Model The total free energy is %f cal/mol\n", L->Energy()); float totalEnergy = 0; if (DEBUG) { cout << setw(15) << left << "Energy Model" << setw(25) << left << "Free Energy (kcal/mol)" << setw(40) << left << "Free Energy without Dangling (kcal/mol)" << endl; printf("--------------------------------------------------------------\n"); } // energy returned in kcal // PARAMETER TUNING // clear the counter, quadratic_matrix, and free_value int num_params = get_num_params_PK_DP(); free_value = 0; for (int i=0; i < num_params; i++) { counter[i] = 0; for (int j=i; j < num_params; j++) quadratic_matrix[i][j] = 0; } int reset_c = 0; int ignore_dangles = no_pk_dangling_ends; int ignore_AU = 0; // 0 = do include AU penalties totalEnergy = -L->Energy(DP, quadratic_matrix, counter, free_value, reset_c, ignore_dangles); float totalEnergyDang = -L->EnergyDangling(DP, quadratic_matrix,counter,free_value,reset_c,ignore_dangles,ignore_AU); // CHECK VALUES OF COUNTER, ETC //int num_params = get_num_params_PK_DP(); //int num_params_pkfree = get_num_params(); //printf("Free Value: %f\n", free_value); //printf("Counter Values:\n"); //for (int i = num_params_pkfree; i < num_params; i++) // printf("c[%d]=%f ", i, counter[i]); //printf("Some P_matrix Values:\n"); //for (int i = 0; i < num_params/10; i++) // printf("P[%d][%d]=%f ", num_params_pkfree + structure_type_index_PK("stp")-1, i, quadratic_matrix[i][num_params_pkfree + structure_type_index_PK("stp")-1]); if (DEBUG) { cout << setw(15) << left << "Dirks&Pierce" << setw(25) << left << totalEnergy + totalEnergyDang << setw(40) << left << totalEnergy << endl; printf("\n"); printf("PARAMETER TUNING\n"); printf("Ps Psm Psp Pb Pup Pps a b c stP intP a_p b_p c_p\n"); cout << g_count_Ps << " " << g_count_Psm << " " << g_count_Psp << " " << g_count_Pb << " " << g_count_Pup << " " << g_count_Pps << " " << g_count_a << " " << g_count_b << " " << g_count_c << " " << g_count_stP << " " << g_count_intP << " " << g_count_a_p << " " << g_count_b_p << " " << g_count_c_p << endl; } // return the free energy: // - deltaG = x' P x + c' x + f // - where x is the vector of parameters // - P is a symmetric matrix of the coefficients for each quadratic term // - c is a vector of counts for each linear term // - c' means c transposed // - f is a constant return totalEnergy + totalEnergyDang; //int num_params = get_num_params_PK_DP(); //double * energy_temp1 = new double[num_params]; //double * energy_temp2 = new double[num_params]; //double energy = free_value; //for (int i = 0; i < num_params; i++) //{ // energy_temp1[i] = counter[i] * params_all[i]; // for (int j = 0; j < num_params; j++) // { // if (i <= j) // energy_temp2[i] += quadratic_matrix[i][j] * params_all[j]; // else // since only the upper triangle of the matrix is filled out // energy_temp2[i] += quadratic_matrix[j][i] * params_all[j]; // } //} //for (int i = 0; i < num_params; i++) //{ // energy += params_all[i]*energy_temp2[i] + energy_temp1[i]; //} //return energy; }