// The sampled ct's are usually a set of cts. // Must enumerate the cts. void t_stoch_sampled_structures::dump_sampled_cts(int _sample_id) { if(_DUMP_STOCH_SAMPLED_STRUCTURES_MESSAGES_) printf("Dumping first structure.\n"); // Create the sampling output directory. char ct1_fp[4096]; if(this->ppf_cli->seq1_sample_ct_op == NULL) { sprintf(ct1_fp, "%s_sampled_cts.ct", this->ppf_cli->seq1_op_file_prefix); } else { strcpy(ct1_fp, this->ppf_cli->seq1_sample_ct_op); } FILE* ct1_file = NULL; ct1_file = open_f(ct1_fp, "a"); fprintf(ct1_file, "%d\t %s_%d\t Energy=0.0\n", this->N1, this->ppf_cli->seq1_op_file_prefix, _sample_id); // Dump all base pairing info for ct1. for(int cnt = 1; cnt <= N1; cnt++) { // Sth. like following: // 1 G 0 2 73 1 fprintf(ct1_file, "%d %c\t%d\t%d\t%d\t%d\n", cnt, this->seq_man->seq1->nucs[cnt], cnt-1, cnt+1, this->seq1_sampled_ct_bps[cnt], cnt); } fclose(ct1_file); if(_DUMP_STOCH_SAMPLED_STRUCTURES_MESSAGES_) printf("Dumping second sampled structure.\n"); char ct2_fp[4096]; if(this->ppf_cli->seq2_sample_ct_op == NULL) { sprintf(ct2_fp, "%s_sampled_cts.ct", this->ppf_cli->seq2_op_file_prefix); } else { strcpy(ct2_fp, this->ppf_cli->seq2_sample_ct_op); } FILE* ct2_file = open_f(ct2_fp, "a"); fprintf(ct2_file, "%d\t %s_%d\t Energy=0.0\n", this->N2, this->ppf_cli->seq2_op_file_prefix, _sample_id); // Dump all base pairing info for ct1. for(int cnt = 1; cnt <= N2; cnt++) { // Sth. like following: // 1 G 0 2 73 1 fprintf(ct2_file, "%d %c\t%d\t%d\t%d\t%d\n", cnt, this->seq_man->seq2->nucs[cnt], cnt-1, cnt+1, this->seq2_sampled_ct_bps[cnt], cnt); } fclose(ct2_file); }
void mpi_write_part( sptensor_t const * const tt, permutation_t const * const perm, rank_info const * const rinfo) { /* file name is <rank>.part */ char name[256]; sprintf(name, "%d.part", rinfo->rank); FILE * fout = open_f(name, "w"); for(idx_t n=0; n < tt->nnz; ++n) { for(idx_t m=0; m < tt->nmodes; ++m) { /* map idx to original global coordinate */ idx_t idx = tt->ind[m][n]; if(tt->indmap[m] != NULL) { idx = tt->indmap[m][idx]; } if(perm->iperms[m] != NULL) { idx = perm->iperms[m][idx]; } /* write index */ fprintf(fout, "%"SPLATT_PF_IDX" ", 1+idx); } fprintf(fout, "%"SPLATT_PF_VAL"\n", tt->vals[n]); } fclose(fout); }
void t_matrix::dump_sparse_matrix(char* fp) { FILE* f_matrix = open_f(fp, "wb"); // Must dump all the entries without regard to symmetry of the matrix. for(int i_row = 1; i_row <= this->height; i_row++) { for(int i_col = 1; i_col <= this->width; i_col++) { if(i_row > i_col && this->symmetric) { double cur_val = this->x(i_col, i_row); fwrite((void*)&i_row, sizeof(int), 1, f_matrix); fwrite((void*)&i_col, sizeof(int), 1, f_matrix); fwrite((void*)&cur_val, sizeof(double), 1, f_matrix); } else { double cur_val = this->x(i_row, i_col); fwrite((void*)&i_row, sizeof(int), 1, f_matrix); fwrite((void*)&i_col, sizeof(int), 1, f_matrix); fwrite((void*)&cur_val, sizeof(double), 1, f_matrix); } } // i_col loop } // i_row loop fclose(f_matrix); }
CTEST2(graph, graph_convert) { for(idx_t i=0; i < data->ntensors; ++i) { sptensor_t * const tt = data->tensors[i]; splatt_graph * graph = graph_convert(tt); /* count vtxs */ vtx_t nv = 0; for(idx_t m=0; m < tt->nmodes; ++m) { nv += (vtx_t) tt->dims[m]; } ASSERT_EQUAL(nv, graph->nvtxs); /* now write graph to tmp.txt and compare against good graph */ FILE * fout = open_f(TMP_FILE, "w"); graph_write_file(graph, fout); fclose(fout); FILE * fin = open_f(TMP_FILE, "r"); FILE * gold = open_f(graphs[i], "r"); /* check file lengths lengths */ fseek(fin , 0 , SEEK_END); fseek(gold , 0 , SEEK_END); long length_fin = ftell(fin); long length_gold = ftell(gold); ASSERT_EQUAL(length_gold, length_fin); rewind(fin); rewind(gold); /* compare each byte */ char fbyte; char gbyte; for(long byte=0; byte < length_fin; ++byte) { fread(&fbyte, 1, 1, fin); fread(&gbyte, 1, 1, gold); ASSERT_EQUAL(gbyte, fbyte); } /* clean up */ fclose(gold); fclose(fin); remove(TMP_FILE); graph_free(graph); } }
void t_matrix::dump_matrix(char* fp) { FILE* f_matrix = open_f(fp, "w"); printf("Dumping to %s\n", fp); // Dump indices are 1-based. for(int i_row = 1; i_row <= this->height; i_row++) { for(int i_col = 1; i_col <= this->width; i_col++) { fprintf(f_matrix, "%lf ", this->x(i_row, i_col)); } // i_col loop fprintf(f_matrix, "\n"); } // i_row loop fclose(f_matrix); }
static int * p_distribute_parts( sptensor_t * const ttbuf, char const * const pfname, rank_info * const rinfo) { /* root may have more than target_nnz */ idx_t const target_nnz = rinfo->global_nnz / rinfo->npes; int * parts = (int *) splatt_malloc(SS_MAX(ttbuf->nnz, target_nnz) * sizeof(int)); if(rinfo->rank == 0) { int ret; FILE * fin = open_f(pfname, "r"); /* send to all other ranks */ for(int p=1; p < rinfo->npes; ++p) { /* read into buffer */ for(idx_t n=0; n < target_nnz; ++n) { if((ret = fscanf(fin, "%d", &(parts[n]))) == 0) { fprintf(stderr, "SPLATT ERROR: not enough elements in '%s'\n", pfname); exit(1); } } MPI_Send(parts, target_nnz, MPI_INT, p, 0, rinfo->comm_3d); } /* now read my own part info */ for(idx_t n=0; n < ttbuf->nnz; ++n) { if((ret = fscanf(fin, "%d", &(parts[n]))) == 0) { fprintf(stderr, "SPLATT ERROR: not enough elements in '%s'\n", pfname); exit(1); } } fclose(fin); } else { /* receive part info */ MPI_Recv(parts, ttbuf->nnz, MPI_INT, 0, 0, rinfo->comm_3d, &(rinfo->status)); } return parts; }
sptensor_t * mpi_simple_distribute( char const * const ifname, MPI_Comm comm) { int rank, npes; MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &npes); sptensor_t * tt = NULL; FILE * fin = NULL; if(rank == 0) { fin = open_f(ifname, "r"); } switch(get_file_type(ifname)) { case SPLATT_FILE_TEXT_COORD: tt = p_tt_mpi_read_file(fin, comm); break; case SPLATT_FILE_BIN_COORD: tt = p_tt_mpi_read_binary_file(fin, comm); break; } if(rank == 0) { fclose(fin); } /* set dims info */ #pragma omp parallel for schedule(static, 1) for(idx_t m=0; m < tt->nmodes; ++m) { idx_t const * const inds = tt->ind[m]; idx_t dim = 1 +inds[0]; for(idx_t n=1; n < tt->nnz; ++n) { dim = SS_MAX(dim, 1 + inds[n]); } tt->dims[m] = dim; } return tt; }
/** * @brief Count the nonzero values in a partition of X. * * @param fname The name of the file containing X. * @param nmodes The number of modes of X. * * @return The number of nonzeros in the intersection of all sstarts and sends. */ static idx_t p_count_my_nnz_1d( char const * const fname, idx_t const nmodes, idx_t const * const sstarts, idx_t const * const sends) { FILE * fin = open_f(fname, "r"); char * ptr = NULL; char * line = NULL; ssize_t read; size_t len = 0; /* count nnz in my partition */ idx_t mynnz = 0; while((read = getline(&line, &len, fin)) != -1) { /* skip empty and commented lines */ if(read > 1 && line[0] != '#') { int mine = 0; ptr = line; for(idx_t m=0; m < nmodes; ++m) { idx_t ind = strtoull(ptr, &ptr, 10) - 1; /* I own the nnz if it falls in any of my slices */ if(ind >= sstarts[m] && ind < sends[m]) { mine = 1; break; } } if(mine) { ++mynnz; } /* skip over tensor val */ strtod(ptr, &ptr); } } fclose(fin); free(line); return mynnz; }
void t_matrix::load_sparse_matrix(char* fp) { FILE* f_matrix = open_f(fp, "rb"); int cur_i; int cur_j; double cur_value; //while(fscanf(f_matrix, "%d %d %lf", &cur_i, &cur_j, &cur_value) == 3) while(fread(&cur_i, sizeof(int), 1, f_matrix) == 1) { if(fread(&cur_j, sizeof(int), 1, f_matrix) != 1) { printf("Could not read current j in %s @ %s(%d)\n", fp, __FILE__, __LINE__); exit(0); } if(fread(&cur_value, sizeof(double), 1, f_matrix) != 1) { printf("Could not read current value in %s @ %s(%d)\n", fp, __FILE__, __LINE__); exit(0); } //printf("Read %d, %d %lf\n", cur_i, cur_j, cur_value); // If the matrix is symmetric, do a check on the read indices. if(this->symmetric) { if(cur_j > cur_i) { this->x(cur_i, cur_j) = cur_value; } } else { this->x(cur_i, cur_j) = cur_value; } } // file reading loop. fclose(f_matrix); }
/** * @brief Read a partition of X into tt. * * @param fname The file containing X. * @param tt The tensor structure (must be pre-allocated). * @param sstarts Array of starting slices, inclusive (one for each mode). * @param sends Array of ending slices, exclusive (one for each mode). */ static void p_read_tt_part_1d( char const * const fname, sptensor_t * const tt, idx_t const * const sstarts, idx_t const * const sends) { idx_t const nnz = tt->nnz; idx_t const nmodes = tt->nmodes; char * ptr = NULL; char * line = NULL; ssize_t read; size_t len = 0; FILE * fin = open_f(fname, "r"); idx_t nnzread = 0; while(nnzread < nnz && (read = getline(&line, &len, fin)) != -1) { /* skip empty and commented lines */ if(read > 1 && line[0] != '#') { int mine = 0; ptr = line; for(idx_t m=0; m < nmodes; ++m) { idx_t ind = strtoull(ptr, &ptr, 10) - 1; tt->ind[m][nnzread] = ind; if(ind >= sstarts[m] && ind < sends[m]) { mine = 1; } } tt->vals[nnzread] = strtod(ptr, &ptr); if(mine) { ++nnzread; } } } fclose(fin); free(line); }
// It is very important to make sure that a seq file is in following format: // ; ... // [ THIS LINE SHOULD NOT CONTAIN SEQUENCE DATA, ITS SHOULD BE A LABEL OR EMPTY LINE] // [ EMPTY LINE OR SEQUENCE DATA] void t_structure::openseq(char* seq_fp) { // Very strict measure: Exit is sequence file is not verifiable. if(!this->verify_seq(seq_fp)) { printf("Could not verify sequence file %s @ %s(%d)\n", seq_fp, __FILE__, __LINE__); exit(1); } FILE* seq_file = open_f(seq_fp, "r"); if(seq_file == NULL) { printf("seq file %s does not exist @ %s(%d).\n", seq_fp, __FILE__, __LINE__); exit(1); } this->numseq = NULL; this->nucs = NULL; this->basepr = NULL; this->danglings_on_branch = NULL; this->danglings_on_mb_closure = NULL; this->stackings_on_branch = NULL; this->stackings_on_mb_closure = NULL; this->unpaired_forced = NULL; char line_buffer[MAX_HEADER_LENGTH]; fgets(line_buffer, MAX_HEADER_LENGTH, seq_file); while(line_buffer[0] == ';') { fgets(line_buffer, MAX_HEADER_LENGTH, seq_file); } // Read label, contains new line character at the end of label. this->ctlabel = (char*)malloc(sizeof(char) * MAX_HEADER_LENGTH); strcpy(this->ctlabel, line_buffer); if(this->ctlabel[strlen(this->ctlabel) - 1] == '\n') { this->ctlabel[strlen(this->ctlabel) - 1] = 0; } this->check_set_label(); //printf("seq label: %s\n", this->ctlabel); // Read and determine length of sequence. char cur_char = 0; this->numofbases = 0; // Start reading sequence data. while(1) { int ret = fscanf(seq_file, "%c", &cur_char); if(ret == EOF) { break; } if(cur_char == '1') { break; } if(cur_char != '\n' && cur_char != ' ') { this->numofbases++; } } //printf("Length of sequence is %d\n", this->numofbases); this->numseq = (int*)malloc(sizeof(int) * (this->numofbases + 1)); this->nucs = (char*)malloc(sizeof(char) * (this->numofbases + 2)); this->basepr = (int*)malloc(sizeof(int) * (this->numofbases + 1)); this->unpaired_forced = (bool*)malloc(sizeof(bool) * (this->numofbases + 2)); // Set file position to data position. // Cannot use fsetpos and fgetpos because for some reason they are messing up indices // when a linux text file is taken to a windows machine. fseek(seq_file, 0, SEEK_SET); // Read all information again before sequence data. fgets(line_buffer, MAX_HEADER_LENGTH, seq_file); while(line_buffer[0] == ';') { fgets(line_buffer, MAX_HEADER_LENGTH, seq_file); } this->nucs[0] = '#'; int i = 1; // Sequence index, starts from 1. // Start reading sequence data. while(1) { // Read and validate input. int ret = fscanf(seq_file, "%c", &cur_char); if(ret == EOF) { break; } // Check end of sequence marker. if(cur_char == '1') { break; } // Process this nuc. if(cur_char != '\n' && cur_char != ' ') { this->nucs[i] = cur_char; if(this->nucs[i] == 'a' || this->nucs[i] == 'c' || this->nucs[i] == 'g' || this->nucs[i] == 'u' || this->nucs[i] == 't') { this->unpaired_forced[i] = true; } else { this->unpaired_forced[i] = false; } // Convert current base character into number value, from Dave's structure code. if (toupper(this->nucs[i]) == 'A') this->numseq[i]=1; else if (toupper(this->nucs[i]) == 'C') this->numseq[i]=2; else if (toupper(this->nucs[i]) == 'G') this->numseq[i]=3; else if (toupper(this->nucs[i]) == 'U' || toupper(this->nucs[i]) == 'T') this->numseq[i]=4; else if (toupper(this->nucs[i]) == 'I') this->numseq[i]=5; else this->numseq[i]=0; // Map unknown nucleotides to A automatically! this->basepr[i] = 0; // No base pairing information. //printf("%c %d\n", this->nucs[i], this->numseq[i]); i++; } } // This is for ending sequences. this->nucs[i] = 0; fclose(seq_file); }
/** * Meniul de navigare in fisiere */ void printdirs(struct DIR * dir){ char namebuf [12]; uint32_t size = 0; uint8_t bitmap = 0, selected = 0, cnt = 0, ret_code = 0, is=0; struct dirent * crt_dir; do{ /* Afisam continutul directorului curent */ rewinddir(dir); LCD_clear(); cnt = 0; while(1){ crt_dir = readdir( dir, buffer); ret_code = check (crt_dir); if(ret_code == DIR_INVALID) continue; if(ret_code == DIR_END) break; get_dirent_name(crt_dir, namebuf); if(cnt ++ == selected){ LCD_str( namebuf,SELECTED ); }else{ LCD_str( namebuf, NOT_SELECTED); } } /* Asteptam sa selecteze un fisier sau director */ bitmap = BTN_wait(); switch(bitmap){ case UP: selected = (selected +1)%cnt; continue; case DOWN: selected = (selected + cnt-1) % cnt; continue; case ENTER: rewinddir(dir); selected ++; while(1){ crt_dir = readdir( dir, buffer); ret_code = check (crt_dir); if( ret_code == DIR_VALID) selected--; if( !selected ) break; } break; default: continue; } is_dir(crt_dir, &is); if( is){ dir = opendir(crt_dir); }else{ get_dirent_size(crt_dir, &size); open_f(crt_dir); /* Curatam ecranul si afisam poza */ LCD_clear(); draw_bmp(); close_f(); /* Asteptam sa apese butonul de exit */ while( BTN_wait() != CLOSE); } }while(1); exit(EXIT_SUCCESS); }
// Read a fasta file that contains sequence information for multiple fasta files and return all of them in a vector. vector<t_structure*>* t_structure::read_multi_seq(char* multi_seq_fp) { vector<t_structure*>* seqs = new vector<t_structure*>(); FILE* f_multi_seq = open_f(multi_seq_fp, "r"); if(f_multi_seq == NULL) { printf("Could not find the input file @ %s.\n", multi_seq_fp); exit(0); } // Read the file and load information. vector<char>* cur_nucs = new vector<char>(); char cur_label[MAX_HEADER_LENGTH]; char cur_line[MAX_HEADER_LENGTH]; while(1) { // Read current line. if(fgets(cur_line, MAX_HEADER_LENGTH, f_multi_seq) == NULL) { // Save the last sequence in the sequence list and initiate a new sequence. if(cur_nucs->size() > 0) { t_structure* new_seq = new t_structure(cur_label, cur_nucs); seqs->push_back(new_seq); } delete(cur_nucs); break; } // Get rid of the new line, if there is one. if(strlen(cur_line) > 0 && cur_line[strlen(cur_line) - 1] == '\n') { cur_line[strlen(cur_line) - 1] = 0; } if(strlen(cur_line) > 0) { // if starts with a '>', then a new sequence is initiated. if(cur_line[0] == '>') { // Save the last sequence in the sequence list and initiate a new sequence. if(cur_nucs->size() > 0) { t_structure* new_seq = new t_structure(cur_label, cur_nucs); seqs->push_back(new_seq); } // Read the label from the remaining of the line. strcpy(cur_label, &cur_line[1]); // Empty current nucleotides for loading next sequence, if there is any. cur_nucs->clear(); } else if(cur_line[0] == ';') { // Save the last sequence in the sequence list and initiate a new sequence. if(cur_nucs->size() > 0) { //printf("instantiating with new label: %s\n", cur_label); t_structure* new_str = new t_structure(cur_label, cur_nucs); seqs->push_back(new_str); } // Read the label from the next line. fgets(cur_label, MAX_HEADER_LENGTH, f_multi_seq); if(cur_label[strlen(cur_label)-1] == '\n') { cur_label[strlen(cur_label)-1] = 0; } //printf("Read new label: %s\n", cur_label); // Empty current nucleotides for loading next sequence, if there is any. cur_nucs->clear(); } else { // This is sequence data, copy the sequence data and continue, no input validation here. for(int i_cpy = 0; i_cpy < (int)strlen(cur_line); i_cpy++) { // This is a necessity coming from .seq file specifications. All .seq files end with a '1' character. if(cur_line[i_cpy] != '1' && cur_line[i_cpy] != ' ' && cur_line[i_cpy] != '\n' && cur_line[i_cpy] != '\t') { cur_nucs->push_back(cur_line[i_cpy]); } } // Copy the nucleotides. } // label/nuc data check. } // Length check for current line. } return(seqs); }
/* Seq file should be like this: ; [Empty line or comment or id ...] [Empty line or sequence data] */ bool t_structure::verify_seq(char* seq_fp) { return(true); FILE* f_seq = open_f(seq_fp, "r"); char line_buffer[MAX_HEADER_LENGTH]; _fgets(line_buffer, MAX_HEADER_LENGTH, f_seq); // If the first character of first line is not semicolon, // this is not a valid sequence file. if(line_buffer[0] != ';') { printf("Verification failed for sequence file %s @ %s(%d)\n", seq_fp, __FILE__, __LINE__); return(false); } int current_line_cnt = 2; int i_seq = 0; char seq_data[MAX_HEADER_LENGTH]; // Read file and fill lines. while(1) { // Read next line starting with 2nd line. if(_fgets(line_buffer, MAX_HEADER_LENGTH, f_seq)) { //printf("Current line_buffer: %s\n", line_buffer); // If currently read line is after 2nd line // the sequence data is being retrieved. if(current_line_cnt > 2) { for(int i = 0; i < (int)strlen(line_buffer); i++) { // If this is end of sequence, if(seq_data[i_seq - 1] == '1') // Is sequence data already finished? { printf("Sequence data is ending before file ends, exiting at %s(%d)\n", __FILE__, __LINE__); return(false); } if(line_buffer[i] != '1' && line_buffer[i] != 'A' && line_buffer[i] != 'C' && line_buffer[i] != 'G' && line_buffer[i] != 'U' && line_buffer[i] != 'T' && line_buffer[i] != 'a' && line_buffer[i] != 'c' && line_buffer[i] != 'g' && line_buffer[i] != 'u' && line_buffer[i] != 't') { printf("Unknown nucleotide in sequence: %c, exiting at %s(%d)\n", line_buffer[i], __FILE__, __LINE__); return(false); } seq_data[i_seq++] = line_buffer[i]; } } current_line_cnt++; } else { break; } } // If 2nd line is not read OR no sequence data is read, // return false. /* if(current_line_cnt < 3 || // Check if at least 3 lines are read. i_seq == 0 || // Check if sequence data is read. seq_data[i_seq - 1] != '1') // Check correct ending of seq_data { printf("Verification failed for sequence file %s @ %s(%d)\n", seq_fp, __FILE__, __LINE__); return(false); } */ if(current_line_cnt < 3) // Check if at least 3 lines are read. { printf("Verification failed for sequence file %s @ %s(%d)\n", seq_fp, __FILE__, __LINE__); return(false); } if(i_seq == 0) // Check if sequence data is read. { printf("Verification failed for sequence file %s @ %s(%d): No sequence data\n", seq_fp, __FILE__, __LINE__); return(false); } if(seq_data[i_seq - 1] != '1') // Check correct ending of seq_data { printf("Verification failed for sequence file %s @ %s(%d): %c\n", seq_fp, __FILE__, __LINE__, seq_data[i_seq - 1]); return(false); } fclose(f_seq); return(true); }
t_config::t_config(const char* config_fp) { FILE* f_conf = open_f(config_fp, "r"); if(f_conf == NULL) { printf("Could not open configuration file %s\n", config_fp); exit(0); } this->ids = new vector<char*>(); this->vals = new vector<vector<char*>*>(); //char cur_id[1000]; //char cur_val[2000]; char cur_line[5000]; while(fgets(cur_line, 5000, f_conf) != NULL) { // Get rid of the new line. int l_line = strlen(cur_line); if(cur_line[l_line-1] == '\n') { cur_line[l_line-1] = 0; } if(cur_line[0] != '#') { t_string* line_str = new t_string(cur_line); t_string_tokens* line_tokens = line_str->tokenize_by_chars(" \t"); // Add all the values in this line as a new entry. if((int)line_tokens->size() < 2) { //printf("Empty entry: %s\n", cur_line); } else { char* new_id = new char[strlen(line_tokens->at(0)->str()) + 2]; strcpy(new_id, line_tokens->at(0)->str()); vector<char*>* new_val_list = new vector<char*>(); // Add all the values to the value list. for(int i_val = 1; i_val < (int)line_tokens->size(); i_val++) { char* new_val = new char[strlen(line_tokens->at(i_val)->str()) + 2]; strcpy(new_val, line_tokens->at(i_val)->str()); new_val_list->push_back(new_val); } // i_val loop. // Add the new entries. this->ids->push_back(new_id); this->vals->push_back(new_val_list); } /* if(sscanf(cur_line, "%s %s", cur_id, cur_val) == 2) { char* new_id = new char[strlen(cur_id) + 2]; char* new_val = new char[strlen(cur_val) + 2]; strcpy(new_id, cur_id); strcpy(new_val, cur_val); this->ids->push_back(new_id); this->vals->push_back(new_val); } // Skip the comments in the configuration file. */ } // Skip comments. } // File reading loop. fclose(f_conf); }
// Backend function for computing alignment envelope. t_aln_env_result* t_phmm_aln::compute_alignment_envelope(int aln_env_type, t_pp_result* _pp_result, double log_threshold, int par) { if(_DUMP_ALN_ENV_UTILS_MESSAGES_) printf("Computing alignment envelope...\n"); // if pp_result is not supplied, recompute it. t_pp_result* pp_result = NULL; if(_pp_result == NULL) { pp_result = this->compute_posterior_probs(); } else { pp_result = _pp_result; } // alignment envelope type affects how the limits are set. // Limit indices are 1 based. int* low_limits = (int*)malloc(sizeof(int) * (this->l1() + 2)); int* high_limits = (int*)malloc(sizeof(int) * (this->l1() + 2)); // Initialize loop limits. for(int i = 0; i <= this->l1(); i++) { low_limits[i] = 0; high_limits[i] = 0; } if(aln_env_type == PROB_ALN_ENV) { // Compute alignment envelope. if(_DUMP_ALN_ENV_UTILS_MESSAGES_) printf("Allocating alignment envelope...\n"); bool** aln_env = (bool**)malloc((this->l1() + 1) * sizeof(bool*)); double n_aln_env_bytes = 0.0f; for(int i = 0; i <= this->l1(); i++) { int low_k = t_phmm_array::low_phmm_limit(i, l1(), l2(), this->phmm_band_constraint_size); int high_k = t_phmm_array::high_phmm_limit(i, l1(), l2(), this->phmm_band_constraint_size); aln_env[i] = (bool*)malloc((high_k - low_k + 1) * sizeof(bool)); n_aln_env_bytes += ((high_k - low_k + 1) * sizeof(bool)); aln_env[i] -= low_k; } if(_DUMP_ALN_ENV_UTILS_MESSAGES_) printf("Allocated %lf bytes for alignment envelope.\n", n_aln_env_bytes); if(_DUMP_ALN_ENV_UTILS_MESSAGES_) printf("Computing alignment envelope from probability planes.\n"); for(int i = 0; i <= this->l1(); i++) { int low_k = t_phmm_array::low_phmm_limit(i, l1(), l2(), this->phmm_band_constraint_size); int high_k = t_phmm_array::high_phmm_limit(i, l1(), l2(), this->phmm_band_constraint_size); for(int k = low_k; k <= high_k; k++) { //printf("(%d, %d): %f, %f\n", cnt1, cnt2, xlog_div(global_aln_info.aln_probs[cnt1][cnt2], global_aln_info.op_prob), log_threshold); double ins1_prob = pp_result->ins1_probs[i][k]; double ins2_prob = pp_result->ins2_probs[i][k]; double aln_prob = pp_result->aln_probs[i][k]; double three_plane_sum = xlog_sum(ins1_prob, xlog_sum(ins2_prob, aln_prob)); if(three_plane_sum < log_threshold) { aln_env[i][k] = false; } else { aln_env[i][k] = true; } } } //FILE* f_aln_env = fopen("aln_env.txt", "w"); //for(int i = 0; i <= this->l1(); i++) //{ // int low_k = t_phmm_array::low_phmm_limit(i, l1(), l2(), this->phmm_band_constraint_size); // int high_k = t_phmm_array::high_phmm_limit(i, l1(), l2(), this->phmm_band_constraint_size); // for(int k = low_k; k <= high_k; k++) // { // fprintf(f_aln_env, "%d ", aln_env[i][k]); // } // k loop // fprintf(f_aln_env, "\n"); //} // i loop //fclose(f_aln_env); if(_DUMP_ALN_ENV_UTILS_MESSAGES_) printf("Validating alignment envelope connectivity...\n"); // If alignment envelope is not connected, return NULL. if(!this->check_connection(aln_env)) { printf("Alignment envelope not connected.\n"); // If pp_result is allocated, free it. if(_pp_result == NULL) { this->free_pp_result(pp_result); } // Free the limits. free(low_limits); free(high_limits); // Free aln. env. since it is of no use any more. for(int i = 0; i <= this->l1(); i++) { int low_k = t_phmm_array::low_phmm_limit(i, l1(), l2(), this->phmm_band_constraint_size); aln_env[i] += low_k; free( aln_env[i] ); } free(aln_env); return(NULL); } if(_DUMP_ALN_ENV_UTILS_MESSAGES_) printf("Pruning alignment envelope...\n"); // Calculate pruned alignment envelope and set it to global_aln_info's alignment envelope, // calculate also the size of alignment envelope. //#define _PRUNE_ALN_ //#ifdef _PRUNE_ALN_ bool** pruned_aln_env = this->prune_aln_env(aln_env); //#else // copy_aln_env(aln_env); //#endif if(_DUMP_ALN_ENV_UTILS_MESSAGES_) printf("Releasing alignment envelope memory.\n"); // Free aln. env. since it is of no use any more. for(int i = 0; i <= this->l1(); i++) { int low_k = t_phmm_array::low_phmm_limit(i, l1(), l2(), this->phmm_band_constraint_size); aln_env[i] += low_k; free( aln_env[i] ); } free(aln_env); if(_DUMP_ALN_ENV_UTILS_MESSAGES_) printf("Computing loop limits.\n"); // Compute the loop limits. for(int i = 1; i <= this->l1(); i++) { int low_k = t_phmm_array::low_phmm_limit(i, l1(), l2(), this->phmm_band_constraint_size); int high_k = t_phmm_array::high_phmm_limit(i, l1(), l2(), this->phmm_band_constraint_size); for(int k = low_k; k <= high_k; k++) { if(pruned_aln_env[i][k]) { //fprintf(ll_file, "%d ", cnt2); // Dump low limit. low_limits[i] = k; break; } } for(int k = high_k; k >= low_k; k--) { if(pruned_aln_env[i][k]) { //fprintf(ll_file, "%d", cnt2); // Dump high limit. high_limits[i] = k; break; } } } // loop limit computation loop. // Free pruned aln. env. since it is of no use any more. if(_DUMP_ALN_ENV_UTILS_MESSAGES_) printf("Releasing pruned alignment envelope memory.\n"); for(int i = 1; i <= this->l1(); i++) { int low_k = t_phmm_array::low_phmm_limit(i, l1(), l2(), this->phmm_band_constraint_size); pruned_aln_env[i] += low_k; free(pruned_aln_env[i]); } free(pruned_aln_env); } // PROB_ALN_ENV else if(aln_env_type == BANDED_ALN_ENV) { // par argument contains band size. double band_size = (double)par; double floating_N1 = (double)this->l1(); double floating_N2 = (double)this->l2(); // Initialize loop limits. for(double i = 1.0f; i <= this->l1(); i++) { low_limits[(int)i] = (int) MAX(0, ((i * floating_N2 / floating_N1) - band_size)); high_limits[(int)i] = (int) MIN(floating_N2, ((i * floating_N2 / floating_N1) + band_size)); if(_DUMP_ALN_ENV_UTILS_MESSAGES_) printf("%d -> (%d, %d)\n", (int)i, low_limits[(int)i], high_limits[(int)i]); } //exit(0); } // BANDED_ALN_ENV else if(aln_env_type == FULL_ALN_ENV) { // Initialize loop limits. for(double i = 0.0f; i <= this->l1(); i++) { low_limits[(int)i] = 0; high_limits[(int)i] = this->l2(); } } // FULL_ALN_ENV else if(aln_env_type == MANUAL_ALN_ENV) { this->load_map_limits_from_map("aln_map.txt", low_limits, high_limits); } else { printf("Invalid alignment envelope type: %d\n", aln_env_type); exit(0); } // switch according to selected alignment envelope type. low_limits[0] = low_limits[1]; high_limits[0] = high_limits[1]; // Set low limits with values 1 to 0, so that the initialized values can be recursed correctly. for(int i = 0; i <= this->l1(); i++) { if(low_limits[i] == 1) { low_limits[i] = 0; } } // Allocate and set aln_env_result. t_aln_env_result* aln_env_result = (t_aln_env_result*)malloc(sizeof(t_aln_env_result)); aln_env_result->high_limits = high_limits; aln_env_result->low_limits = low_limits; //aln_env_result->pp_result = pp_result; // Check for alignment constraints in the alignment envelope. this->check_ins1_ins2(aln_env_result); // Dump the probability planes. (all of it) if(_DUMP_ALN_ENV_UTILS_MESSAGES_) { FILE* f_aln_probs = open_f("aln_plane_probs", "wb"); FILE* f_ins1_probs = open_f("ins1_plane_probs", "wb"); FILE* f_ins2_probs = open_f("ins2_plane_probs", "wb"); for(int i1 = 1; i1 <= this->l1(); i1++) { int low_i2 = t_phmm_array::low_phmm_limit(i1, l1(), l2(), this->phmm_band_constraint_size); int high_i2 = t_phmm_array::high_phmm_limit(i1, l1(), l2(), this->phmm_band_constraint_size); for(int i2 = low_i2; i2 <= high_i2; i2++) { if(pp_result->aln_probs[i1][i2] != xlog(0.0)) { double cur_aln_prob = pp_result->aln_probs[i1][i2]; fwrite(&i1, sizeof(int), 1, f_aln_probs); fwrite(&i2, sizeof(int), 1, f_aln_probs); fwrite(&cur_aln_prob, sizeof(double), 1, f_aln_probs); } if(pp_result->ins1_probs[i1][i2] != xlog(0.0)) { double cur_ins1_prob = pp_result->ins1_probs[i1][i2]; fwrite(&i1, sizeof(int), 1, f_ins1_probs); fwrite(&i2, sizeof(int), 1, f_ins1_probs); fwrite(&cur_ins1_prob, sizeof(double), 1, f_ins1_probs); } if(pp_result->ins2_probs[i1][i2] != xlog(0.0)) { double cur_ins2_prob = pp_result->ins2_probs[i1][i2]; fwrite(&i1, sizeof(int), 1, f_ins2_probs); fwrite(&i2, sizeof(int), 1, f_ins2_probs); fwrite(&cur_ins2_prob, sizeof(double), 1, f_ins2_probs); } } // i2 loop. } // i1 loop. fclose(f_aln_probs); fclose(f_ins1_probs); fclose(f_ins2_probs); FILE* f_lls = open_f("loop_limits.txt", "w"); // Dump the loop limits. for(int i = 0; i <= this->l1(); i++) { fprintf(f_lls, "%d %d %d\n", i, low_limits[i], high_limits[i]); } fclose(f_lls); } // message dump check. //printf("Dumping alignment map.\n"); //FILE* f_aln_map = open_f("aln_map.txt", "w"); //for(int i = 1; i <= this->l1(); i++) //{ // for(int j = 1; j <= this->l2(); j++) // { // if(j < low_limits[i]) // { // fprintf(f_aln_map, "0"); // } // else if(j <= high_limits[i]) // { // fprintf(f_aln_map, "1"); // } // else // { // fprintf(f_aln_map, "0"); // } // } // fprintf(f_aln_map, "\n"); //} //fclose(f_aln_map); if(_DUMP_ALN_ENV_UTILS_MESSAGES_) printf("Computed alignment envelope.\n"); for(int i = 2; i <= this->l1(); i++) { // fprintf(f_lls, "%d %d %d\n", i, low_limits[i], high_limits[i]); if(aln_env_result->low_limits[i] < aln_env_result->low_limits[i-1]) aln_env_result->low_limits[i] = aln_env_result->low_limits[i-1]; } for(int i = this->l1()-1; i >= 1; i--) { // fprintf(f_lls, "%d %d %d\n", i, low_limits[i], high_limits[i]); if(aln_env_result->high_limits[i] > aln_env_result->high_limits[i+1]) aln_env_result->high_limits[i] = aln_env_result->high_limits[i+1]; } return(aln_env_result); }
void t_phmm_aln::load_map_limits_from_map(char* aln_map_fn, int* low_limits, int* high_limits) { if(_DUMP_ALN_ENV_UTILS_MESSAGES_) printf("Setting alignment loop limits from map.\n"); int N1 = this->l1(); int N2 = this->l2(); // Open alignment map file. FILE* aln_map_file = open_f(aln_map_fn, "r"); if(aln_map_file == NULL) { printf("Could not find alignment map file %s @ %s(%d), exiting.\n", aln_map_fn, __FILE__, __LINE__); exit(0); } for(int i1 = 1; i1 <= N1; i1++) { // Reset limits. low_limits[i1] = -1; high_limits[i1] = -1; for(int i2 = 1; i2 <= N2; i2++) { // Current flag for current position in alignment map file. int cur_flag; // Read map file which consists of 1s and 0s for correct positions. fscanf(aln_map_file, "%d", &cur_flag); // Read current flag. if(_DUMP_ALN_ENV_UTILS_MESSAGES_) printf("%d ", cur_flag); // Set low limit at the point where 1's start. if(low_limits[i1] == -1 && cur_flag == 1) { low_limits[i1] = i2; } // Set high limit if high limit is not already set and lomw limit is already set. if(high_limits[i1] == -1 && low_limits[i1] != -1 && cur_flag == 0) { high_limits[i1] = i2 - 1; } // If high limit is not set and loop hit end of alignment line, set high limit to end of 2nd sequence. if(high_limits[i1] == -1 && i2 == N2) { high_limits[i1] = N2; } } printf("\n"); } fclose(aln_map_file); //// Have to set limits for 0th nucleotide. //low_limits[0] = low_limits[1]; //high_limits[0] = high_limits[1]; //low_limits[0] = 1; //high_limits[0] = N2; // // For i > N1, just add N2 to limits for i < N1. // for(int i1 = N1 + 1; i1 <= 2 * N1; i1++) // { // low_limits[i1] = low_limits[i1 - N1] + N2; // high_limits[i1] = high_limits[i1 - N1] + N2; // //if(_DUMP_LOOP_LIMIT_MESSAGES_) //{ // printf("low[%d] = %d, high[%d]=%d\n", i1, low_limits[i1], i1, high_limits[i1]); //} // } // // // Set low limits with values 1 to 0, so that the initialized values can be recursed correctly. // for(int i = 0; i <= N1; i++) // { // if(low_limits[i] == 1) // { // low_limits[i] = 0; // } ////if(_DUMP_LOOP_LIMIT_MESSAGES_) //{ // //printf("low[%d] = %d, high[%d]=%d\n", i, low_limits[i], i, high_limits[i]); //} /*}*/ }
void t_structure::openct(char* ct_fp) { FILE* ct_file = open_f(ct_fp, "r"); if(ct_file == NULL) { printf("ct file %s does not exist @ %s(%d).\n", ct_fp, __FILE__, __LINE__); exit(1); } // Allocate header buffer. this->ctlabel = (char*)malloc(sizeof(char) * MAX_HEADER_LENGTH); // Read first line fscanf(ct_file, "%d", &this->numofbases); // Read remaining of the line, contains new line character at the end of label. fgets(this->ctlabel, MAX_HEADER_LENGTH, ct_file); if(this->ctlabel[strlen(this->ctlabel) - 1] == '\n') { this->ctlabel[strlen(this->ctlabel) - 1] = 0; } this->check_set_label(); //printf("ct label: %s\n", this->ctlabel); this->numseq = (int*)malloc(sizeof(int) * (this->numofbases + 3)); this->nucs = (char*)malloc(sizeof(char) * (this->numofbases + 3)); this->basepr = (int*)malloc(sizeof(int) * (this->numofbases + 3)); this->danglings_on_branch = (int*)malloc(sizeof(int) * (this->numofbases + 3)); this->danglings_on_mb_closure = (int*)malloc(sizeof(int) * (this->numofbases + 3)); this->stackings_on_branch = (int*)malloc(sizeof(int) * (this->numofbases + 3)); this->stackings_on_mb_closure = (int*)malloc(sizeof(int) * (this->numofbases + 3)); this->unpaired_forced = (bool*)malloc(sizeof(bool) * (this->numofbases + 2)); for(int i = 0; i <= this->numofbases; i++) { this->basepr[i] = 0; this->danglings_on_branch[i] = 0; this->danglings_on_mb_closure[i] = 0; this->stackings_on_branch[i] = 0; this->stackings_on_mb_closure[i] = 0; } int* dangles = (int*)malloc(sizeof(int) * (this->numofbases + 3)); int* stacks = (int*)malloc(sizeof(int) * (this->numofbases + 3)); // Read sequence data. // Must read base pairing before dangles/stacks can be resolved from file. for(int i = 1; i <= this->numofbases; i++) { int index; int some_val1; char raw_nuc; // 1 G 0 2 120 1 fscanf(ct_file, "%d %c %d %d %d %d", &index, &raw_nuc, &dangles[i], &stacks[i], &this->basepr[i], &some_val1); //if(this->nucs[i] == 'a' || // this->nucs[i] == 'c' || // this->nucs[i] == 'g' || // this->nucs[i] == 'u' || // this->nucs[i] == 't') //{ // this->unpaired_forced[i] = true; //} //else //{ // this->unpaired_forced[i] = false; //} //printf("%c", this->nucs[i]); /* The danglings on external loop branches are buffered as danglings on branch. Note that there cannot be a stacking on external loop closure because by definition external loop is not closed. */ // Convert nucleotide symbols into indices: XACGUI -> 012345 // refer to IUPAC nucleotide symbols for more information: // http://www.mun.ca/biochem/courses/3107/symbols.html //if (toupper(this->nucs[i]) == 'A' || toupper(this->nucs[i]) == 'B') // this->numseq[i]=1; //else if (toupper(this->nucs[i]) == 'C' || toupper(this->nucs[i]) == 'Z') // this->numseq[i]=2; //else if (toupper(this->nucs[i]) == 'G' || toupper(this->nucs[i]) == 'H') // this->numseq[i]=3; //else if (toupper(this->nucs[i]) == 'U' || toupper(this->nucs[i]) == 'T' || toupper(this->nucs[i]) == 'V' || toupper(this->nucs[i]) == 'W' ) // this->numseq[i]=4; //else if (toupper(this->nucs[i]) == 'I') // this->numseq[i]=5; //else // this->numseq[i]=0; this->map_nuc_IUPAC_code(raw_nuc, this->nucs[i], this->numseq[i], this->unpaired_forced[i]); //printf("%d\n", this->basepr[i]); } #undef _USE_STACKING_INFO_ #ifdef _USE_STACKING_INFO_ // Resolve stacks and dangles. for(int i = 1; i <= this->numofbases; i++) { // Dangling? if(dangles[i] != 0) { // Dangle on branch? if(dangles[i] == i+1) { if(this->basepr[i+1] == 0) { printf("Dangling of %d on unpaired nucleotide %d.\n", i, i+1); exit(0); } if(this->basepr[i+1] > i+1) { this->danglings_on_branch[i] = i+1; } else { this->danglings_on_mb_closure[i] = i+1; } } // Dangle on mbl closure? if(dangles[i] == i-1) { if(this->basepr[i-1] == 0) { printf("Dangling of %d on unpaired nucleotide %d.\n", i, i-1); exit(0); } if(this->basepr[i-1] > i-1) { this->danglings_on_mb_closure[i] = i-1; } else { this->danglings_on_branch[i] = i-1; } } } // Stacking? if(stacks[i] != 0) { // stack on branch? if(stacks[i] == i+1) { if(this->basepr[i+1] == 0) { printf("Stacking of %d on unpaired nucleotide %d.\n", i, i+1); exit(0); } if(this->basepr[i+1] > i+1) { this->stackings_on_branch[i] = i+1; } else { this->stackings_on_mb_closure[i] = i+1; } } // stack on mbl closure? if(stacks[i] == i-1) { if(this->basepr[i-1] == 0) { printf("Stacking of %d on unpaired nucleotide %d.\n", i, i-1); exit(0); } if(this->basepr[i-1] > i-1) { this->stackings_on_mb_closure[i] = i-1; } else { this->stackings_on_branch[i] = i-1; } } } } // Do a sanity check on dangles and stacks. for(int i = 1; i < this->numofbases; i++) { if(this->stackings_on_branch[i] == i+1) { int current_j = this->basepr[i+1]; if(current_j == 0 || this->stackings_on_branch[current_j+1] != current_j) { printf("Stacking check failed for stacking of %d on %d\n", i, i+1); } } if(this->stackings_on_mb_closure[i] == i+1) { int current_j = this->basepr[i+1]; if(current_j == 0 || this->stackings_on_mb_closure[current_j+1] != current_j) { printf("Stacking check failed for stacking of %d on %d\n", i, i+1); } } } #endif // _USE_STACKING_INFO_ free(dangles); free(stacks); fclose(ct_file); }
// Dump map alignment. void t_MAP_alignment::dump_map_alignment() { if(_DUMP_MAP_ALIGNMENT_MESSAGES_) { FILE* map_aln_file = open_f("ppf_map_alignment.txt", "w"); for(int cnt1 = 1; cnt1 <= this->seq_man->get_l_seq1(); cnt1++) { fprintf(map_aln_file, "%d %d %s\n", cnt1, this->seq1_alns[cnt1][0], state_names[this->seq1_alns[cnt1][1]]); } fprintf(map_aln_file, "\n\n"); for(int cnt2 = 1; cnt2 <= this->seq_man->get_l_seq2(); cnt2++) { fprintf(map_aln_file, "%d %d %s\n", cnt2, this->seq2_alns[cnt2][0], state_names[this->seq2_alns[cnt2][1]]); } fclose(map_aln_file); } // Both alignment arrays correspond to same coincidence path. // In order to represent those alignment arrays, have to trace them correctly // into alignment strings. //int aln_str_length = seq_man->get_l_seq1() + seq_man->get_l_seq2(); int l_aln = this->get_l_aln(); this->aln_str1 = (char*)malloc(sizeof(char) * (l_aln + 2)); this->aln_str2 = (char*)malloc(sizeof(char) * (l_aln + 2)); this->aln_index_line1 = (int*)malloc(sizeof(int) * (l_aln + 3)); this->aln_index_line2 = (int*)malloc(sizeof(int) * (l_aln + 3)); // Following points to last alignment position in coincidence map. int last_i1 = 0; int last_i2 = 0; char nucs[] = "NACGUI"; // Problem is determining if next state is an event in first seq (ins1) or an event in second sequence (ins2) // or if it is an event in both sequences (aln). So check seq1_alns[last_i1 + 1] and seq1_alns[last_i2 + 1] // indices and states; see if the state and indices are corectly adding up on last_i1 and last_i2. // e.g. if seq1_alns[1][0] = 0 and seq1_alns[1][1] = STATE_INS1, then this means that there is an insertion // in first sequence which will be over 0, 0. int aln_str_index = 0; while(last_i1 != seq_man->get_l_seq1() || last_i2 != seq_man->get_l_seq2()) { if(_DUMP_MAP_ALIGNMENT_MESSAGES_) printf("%d(%d), %d(%d)\n", last_i1, seq_man->get_l_seq1(), last_i2, seq_man->get_l_seq2()); // Check for alignment case. if((last_i1+1) <= seq_man->get_l_seq1() && (last_i2+1) <= seq_man->get_l_seq2() && this->seq1_alns[last_i1 + 1][1] == STATE_ALN && this->seq1_alns[last_i1 + 1][0] == last_i2 + 1) { // If next nuc. in sequence 1 is aligned, is it aligned to // next nuc. in sequence 2? last_i1++; last_i2++; aln_str1[aln_str_index] = nucs[this->seq_man->get_nuc_seq1(last_i1)]; aln_str2[aln_str_index] = nucs[this->seq_man->get_nuc_seq2(last_i2)]; this->aln_index_line1[aln_str_index+1] = last_i1; this->aln_index_line2[aln_str_index+1] = last_i2; aln_str_index++; if(_DUMP_MAP_ALIGNMENT_MESSAGES_) printf("Align %d, %d\n", last_i1, last_i2); } // Check for alignment case. else if((last_i1+1) <= seq_man->get_l_seq1() && this->seq1_alns[last_i1 + 1][1] == STATE_INS1 && this->seq1_alns[last_i1 + 1][0] == last_i2) { // If next nuc. in sequence 1 is inserted, is it inserted on top of current nuc. in sequence 2? last_i1++; aln_str1[aln_str_index] = nucs[this->seq_man->get_nuc_seq1(last_i1)]; aln_str2[aln_str_index] = '.'; this->aln_index_line1[aln_str_index+1] = last_i1; this->aln_index_line2[aln_str_index+1] = 0; aln_str_index++; if(_DUMP_MAP_ALIGNMENT_MESSAGES_) printf("Insert1 %d, %d\n", last_i1, last_i2); } // Check for alignment case. else if((last_i2+1) <= seq_man->get_l_seq2() && this->seq2_alns[last_i2 + 1][1] == STATE_INS2 && this->seq2_alns[last_i2 + 1][0] == last_i1) { // If next nuc. in sequence 2 is inserted, is it inserted on top of current nuc. in sequence 1? last_i2++; aln_str1[aln_str_index] = '.'; aln_str2[aln_str_index] = nucs[this->seq_man->get_nuc_seq2(last_i2)]; this->aln_index_line1[aln_str_index+1] = 0; this->aln_index_line2[aln_str_index+1] = last_i2; aln_str_index++; if(_DUMP_MAP_ALIGNMENT_MESSAGES_) printf("Insert2 %d, %d\n", last_i1, last_i2); } else { printf("Could not decode next coincidence position in alignment at (%d, %d) @ %s(%d).\n", last_i1, last_i2, __FILE__, __LINE__); exit(0); } } // map alignment string formation loop. // Finish alignment strings. aln_str1[aln_str_index] = 0; aln_str2[aln_str_index] = 0; if(_DUMP_MAP_ALIGNMENT_MESSAGES_) printf("MAP Alignment:\n%s\n%s\n", aln_str1, aln_str2); char aln_fp[4096]; if(this->ppf_cli->map_aln_op == NULL) { sprintf(aln_fp, "%s_%s_map_aln.aln", this->ppf_cli->seq1_op_file_prefix, this->ppf_cli->seq2_op_file_prefix); } else { strcpy(aln_fp, this->ppf_cli->map_aln_op); } FILE* aln_file = open_f(aln_fp, "w"); fprintf(aln_file, "%s-%s MAP Alignment:\n%s\n%s \n", this->ppf_cli->seq1_op_file_prefix, this->ppf_cli->seq2_op_file_prefix, aln_str1, aln_str2); fclose(aln_file); }
void t_structure::openfasta(char* fasta_fp) { // Very strict measure: Exit is sequence file is not verifiable. if(!this->verify_seq(fasta_fp)) { printf("Could not verify sequence file %s @ %s(%d)\n", fasta_fp, __FILE__, __LINE__); exit(1); } FILE* fasta_file = open_f(fasta_fp, "r"); if(fasta_file == NULL) { printf("fasta file %s does not exist @ %s(%d).\n", fasta_fp, __FILE__, __LINE__); exit(1); } this->numseq = NULL; this->nucs = NULL; this->basepr = NULL; this->danglings_on_branch = NULL; this->danglings_on_mb_closure = NULL; this->stackings_on_branch = NULL; this->stackings_on_mb_closure = NULL; char line_buffer[MAX_HEADER_LENGTH]; fgets(line_buffer, MAX_HEADER_LENGTH, fasta_file); if(line_buffer[0] == '>') { // Copy label. this->ctlabel = (char*)malloc(sizeof(char) * MAX_HEADER_LENGTH); strcpy(this->ctlabel, &line_buffer[1]); if(this->ctlabel[strlen(this->ctlabel) - 1] == '\n') { this->ctlabel[strlen(this->ctlabel) - 1] = 0; } } // Read and determine length of sequence. char cur_char = 0; this->numofbases = 0; // Start reading sequence data. while(1) { int ret = fscanf(fasta_file, "%c", &cur_char); if(ret == EOF) { break; } // Found a new fasta sequence? if(cur_char == '>') { break; } if(cur_char != '\n' && cur_char != ' ') { this->numofbases++; } } //printf("Length of sequence is %d\n", this->numofbases); this->numseq = (int*)malloc(sizeof(int) * (this->numofbases + 1)); this->nucs = (char*)malloc(sizeof(char) * (this->numofbases + 2)); this->basepr = (int*)malloc(sizeof(int) * (this->numofbases + 1)); this->unpaired_forced = (bool*)malloc(sizeof(bool) * (this->numofbases + 2)); // Set file position to data position. // Cannot use fsetpos and fgetpos because for some reason they are messing up indices // when a linux text file is taken to a windows machine. fseek(fasta_file, 0, SEEK_SET); // Read captoin information. fgets(line_buffer, MAX_HEADER_LENGTH, fasta_file); int i = 1; // Sequence index, starts from 1. // Start reading sequence data. while(1) { // Read and validate input. int ret = fscanf(fasta_file, "%c", &cur_char); if(ret == EOF) { break; } // Check end of sequence marker. if(cur_char == '>') { break; } // Process this nuc. if(cur_char != '\n' && cur_char != ' ') { this->basepr[i] = 0; // No base pairing information. this->map_nuc_IUPAC_code(cur_char, this->nucs[i], this->numseq[i], this->unpaired_forced[i]); //printf("%c %d\n", this->nucs[i], this->numseq[i]); i++; } } // This is for ending sequences. this->nucs[i] = 0; //printf("Read fasta file: %s (%d nucs)\n", this->nucs, this->numofbases); //getc(stdin); fclose(fasta_file); }