// Run spf calculation on sequence and load folding priors. // THE PRIORS COMING FROM SPF PROGRAM ARE IN LINEAR DOMAIN!!! t_spf_array::t_spf_array(int seq_length, char* seq_path, t_ppf_cli* _ppf_cli, char* pairing_probs_file, bool mallocate) { this->ppf_cli = _ppf_cli; this->n_bytes_alloced = 0.0f; // Copy sequence length. this->N = seq_length; if(mallocate) { this->pairing_array = (double**)malloc(sizeof(double*) * (N + 1)); this->ind_unpairing_array = (double*)malloc(sizeof(double) * (N + 1)); this->ind_pairing_array = (double*)malloc(sizeof(double) * (N + 1)); this->fold_env = (bool**)malloc(sizeof(bool*) * (N + 1)); this->str_coinc_env = (bool**)malloc(sizeof(bool*) * (N + 1)); this->n_bytes_alloced += ((sizeof(double*) * (N + 1)) + (sizeof(double) * (N + 1)) + (sizeof(double) * (N + 1)) + (sizeof(bool*) * (N + 1)) + (sizeof(bool*) * (N + 1)) + (sizeof(short*) * (N + 1))); } else { this->pairing_array = NULL; this->ind_unpairing_array = NULL; this->ind_pairing_array = NULL; this->fold_env = NULL; this->str_coinc_env = NULL; } this->n_bytes_alloced += ((sizeof(double*) * (N + 1)) + (sizeof(double) * (N + 1)) + (sizeof(double) * (N + 1)) + (sizeof(bool*) * (N + 1)) + (sizeof(bool*) * (N + 1)) + (sizeof(short*) * (N + 1))); // Allocate pairing and unpairing spf arrays. for(int i1 = 0; i1 <= N; i1++) { // Include the max_separation criterion in the allocation function. int min_i2 = i1; int max_i2 = MIN(i1 + ppf_cli->max_n_separation_between_nucs, N); if(mallocate) { this->fold_env[i1] = (bool*)malloc(sizeof(bool) * (max_i2 - min_i2 + 2)); this->fold_env[i1] -= i1; // Do pointer shift for fold envelope. } this->n_bytes_alloced += (sizeof(bool) * (max_i2 - min_i2 + 2)); if(mallocate) { this->pairing_array[i1] = (double*)malloc(sizeof(double) * (max_i2 - min_i2 + 2)); // Allocate pairing prob. this->pairing_array[i1] -= i1; // Do pointer shift to access the array using sequence indices. } this->n_bytes_alloced += (sizeof(double) * (max_i2 - min_i2 + 2)); if(mallocate) { this->str_coinc_env[i1] = (bool*)malloc(sizeof(bool) * (max_i2 - min_i2 + 2)); this->str_coinc_env[i1] -= i1; // Do pointer shift for fold envelope. } this->n_bytes_alloced += (sizeof(double) * (max_i2 - min_i2 + 2)); if(mallocate) { this->ind_pairing_array[i1] = ZERO; this->ind_unpairing_array[i1] = ZERO; for(int i2 = min_i2; i2 <= max_i2; i2++) { this->pairing_array[i1][i2] = CONVERT_FROM_LIN(0.0); // Initialize the probabilities to 0. this->fold_env[i1][i2] = false; // Set all possible pairs. this->str_coinc_env[i1][i2] = false; } } } // i1 loop if(!mallocate) { return; } // Now arrays are allocated, do single partition function calculation for that sequence if(pairing_probs_file == NULL) { RNA* rna = new RNA(seq_path, 2); rna->PartitionFunction(); // Load pairing array. for(int i = 1; i <= this->N; i++) { int min_j = i+1; int max_j = MIN(i + ppf_cli->max_n_separation_between_nucs, N); for(int j = min_j; j <= max_j; j++) { this->pairing_array[i][j] = rna->GetPairProbability(i, j); } } } else { // Read spf file: /* Read spf array file, the format is as following: 1 2 0.000000000000000000000000000000 1 3 0.000000000000000000000000000000 1 4 0.000000000000000000000000000000 1 5 0.000000000000000000000000000000 1 6 0.000000000000000000000000000000 1 7 0.000000000000000000000000000000 1 8 0.000000000000000000000000000000 1 9 0.000000000000000000000000000000 1 10 0.000000000000000000000000000000 1 11 0.000002568332606195572231287628 ... where each line consists of [index 1] [index 2] [pairing probability of two nucleotides] */ char spf_array_fn[1000]; strcpy(spf_array_fn, pairing_probs_file); // Read file, read all lines, # of lines read must be equal to # of nucleotides in sequence. FILE* spf_file = open_f(spf_array_fn, "rb"); if(spf_file == NULL) { printf("Could not open single partition function %s @ %s(%d)\n", spf_array_fn, __FILE__, __LINE__); exit(0); } // SPF file do not contain all the (i1, i2) pairs, it rather includes // i1, i2 pairs where i1 < i2. However in ppf calculations, int i1 = 0; int i2 = 0; double current_lin_prob = ZERO; int n_samples = 0; int n_curr_pp_cnt = 0; if(fread(&n_samples, sizeof(int), 1, spf_file) != 1) { printf("Could not read number of samples from %s\n", spf_array_fn); exit(0); } else { printf("%d samples are processed to estimate base pairing probabilities.\n", n_samples); } // 1 11 0.000002568332606195572231287628 while(true) { if(fread(&i1, sizeof(int), 1, spf_file) != 1) { break; } if(fread(&i2, sizeof(int), 1, spf_file) != 1) { printf("Could not read i2 for i1=%d in %s\n", i1, spf_array_fn); exit(0); } if(fread(&n_curr_pp_cnt, sizeof(int), 1, spf_file) != 1) { printf("Could not read i2 for i1=%d in %s\n", i1, spf_array_fn); exit(0); } current_lin_prob = (double)n_curr_pp_cnt / (double)n_samples; // Check max_separation criterion. if(i2 > i1 && (i2 - i1) <= ppf_cli->max_n_separation_between_nucs) { // It should be noted that probabilities in spf file are linear, might need to change them. this->pairing_array[i1][i2] = CONVERT_FROM_LIN(current_lin_prob); if(_DUMP_SPF_MESSAGES_) printf("pp(%d, %d) = %.25f\n", i1, i2, current_lin_prob); // If the pairing probability is smaller than fold_env_prob_treshold, set fold envelope for this to 0. if(this->pairing_array[i1][i2] >= CONVERT_FROM_LIN(ppf_cli->fold_env_prob_treshold)) { this->fold_env[i1][i2] = true; } } else { // Out of bounds, do not set the value here since it is not allocated. } if(_DUMP_SPF_MESSAGES_) printf("P_pair(%d, %d) = %.25f\n", i1, i2, this->pairing_array[i1][i2]); //fscanf(spf_file, "%d %d %lf", &i1, &i2, ¤t_lin_prob); //printf("read %d %d\n", i1, i2); } fclose(spf_file); } // read the pairing probabilities from external file. // Compute the pairing and coincidence ptr relocation maps with base pairing enforced for pairs that have 0.999 or higher probability of pairing. this->folding_constraints = new t_folding_constraints(seq_path, this->pairing_array, 0.999f); // Weigh all pairing probabilities with a factor in log domain to // decrease affect of positive feedback. this->calculate_unpairing_probs(); if(_DUMP_SPF_MESSAGES_) printf("t_spf_array allocated %lf bytes\n", this->n_bytes_alloced); // // Dump spf plane if desired. //if(_DUMP_SPF_PLANES_) //{ // this->dump_spf_plane(); // this->dump_fold_env(); //} }
/////////////////////////////////////////////////////////////////////////////// // Read base pair probabilities from a partition function save file. /////////////////////////////////////////////////////////////////////////////// void Postscript_Annotation_Handler::readPartition( string file, RNA* structureStrand ) { // Initialize the RNA strand and error checker that reads partition data. RNA* partStrand = new RNA( file.c_str(), PFS_TYPE ); ErrorChecker<RNA>* partChecker = new ErrorChecker<RNA>( partStrand ); // If the RNA strand and error checker were created successfully, read in the // annotation data. if( !( error = partChecker->isErrorStatus() ) ) { // If there are no structures in the strand, print out an error message. // Otherwise, initialize the annotation array to handle the appropriate // amount of structures. if( structures == 0 ) { cerr << "No structures or pairs are present to annotate." << endl; error = true; } else { probabilityAnnotations.resize( structures ); for( int i = 1; i <= structures; i++ ) { vector<char> row; row.resize( length ); for( int j = 1; j <= length; j++ ) { row[j-1] = 'i'; } probabilityAnnotations[i - 1] = row; } } // For each structure possible, read in its base pair probability data. for( int i = 1; i <= structures; i++ ) { // If an error has occurred, stop reading data. if( error ) { break; } // Loop through the structure to find pairs. for( int j = 1; j <= length; j++ ) { // If an error has occurred, stop reading data. if( error ) { break; } // Get the next pair. If an error occurred, stop reading data. int pair = structureStrand->GetPair( j, i ); int code = structureStrand->GetErrorCode(); if( code != 0 ) { cerr << endl << structureStrand->GetErrorMessage( code ) << endl; error = true; break; } // If the next nucleotide is in fact paired, determine the proper // color code for it. if( ( pair != 0 ) && ( pair > j ) ) { // Get the probability for this pair. // If an error occurred, stop reading data. double bp = partStrand->GetPairProbability( j, pair ); if( ( error = partChecker->isErrorStatus() ) ) { break; } // Set the proper values for the color code. probabilityAnnotations[i-1][j-1] = ( bp >= 0.99 ) ? 'a' : ( bp > 0.95 ) ? 'b' : ( bp > 0.90 ) ? 'c' : ( bp > 0.80 ) ? 'd' : ( bp > 0.70 ) ? 'e' : ( bp > 0.60 ) ? 'f' : ( bp > 0.50 ) ? 'g' : 'h'; probabilityAnnotations[i-1][pair-1] = probabilityAnnotations[i-1][j-1]; } } } } // If an error occurred, print out an extra error message to make sure the // user knows the error came from reading the partition function annotation // file in. if( error ) { cerr << "Partition function save file not read successfully." << endl; } // Delete the RNA strand and error checker when they're no longer needed. delete partStrand; delete partChecker; }