/****************************************************************************** * This function gets the name of the current sequence from the data block * reader. The name of the sequence is passed using the name parameter. * The caller is responsible for freeing the memory for the sequence name. * * Returns TRUE if successful, FALSE if there is no current sequence, as * at the start of the file. *****************************************************************************/ BOOLEAN_T get_seq_name_from_prior_reader_from_psp( DATA_BLOCK_READER_T *reader, char **name // OUT ) { BOOLEAN_T result = FALSE; PSP_DATA_BLOCK_READER_T *psp_reader = (PSP_DATA_BLOCK_READER_T *) get_data_block_reader_data(reader); if (psp_reader->sequence_header == NULL || psp_reader->sequence_header_len <= 0) { result = FALSE; } else { int name_len = 0; for (name_len = 0; name_len < psp_reader->sequence_header_len; ++name_len) { if (isspace(psp_reader->sequence_header[name_len])) { break; } } myassert( TRUE, name_len <= psp_reader->sequence_header_len, "Error parsing seq. name.\n" ); char *buffer = mm_malloc(sizeof(char) * (name_len + 1)); strncpy(buffer, psp_reader->sequence_header, name_len); buffer[name_len] = 0; *name = buffer; result = TRUE; } return result; }
BOOLEAN_T get_next_data_block_from_wig( DATA_BLOCK_READER_T *reader, DATA_BLOCK_T *data_block ) { BOOLEAN_T result = FALSE; int num_read = 0; WIG_PRIOR_BLOCK_READER_T *wig_reader = (WIG_PRIOR_BLOCK_READER_T *) get_data_block_reader_data(reader); BOOLEAN_T found_format_line; size_t step; size_t span; double value; result = get_next_data_line_from_wiggle( wig_reader->raw_reader, &(wig_reader->sequence_name), &(wig_reader->current_position), &step, &span, &value, &found_format_line ); if (result) { set_start_pos_for_data_block(data_block, wig_reader->current_position); set_num_read_into_data_block(data_block, span); set_prior_in_data_block(data_block, value); } return result; }
/****************************************************************************** * This function resets a MEME PSP prior block reader UDT. *****************************************************************************/ BOOLEAN_T reset_prior_reader_from_psp(DATA_BLOCK_READER_T *reader) { PSP_DATA_BLOCK_READER_T *psp_reader = (PSP_DATA_BLOCK_READER_T *) get_data_block_reader_data(reader); rewind(psp_reader->psp_file); psp_reader->current_position = -1; psp_reader->at_start_of_line = TRUE; return TRUE; }
/****************************************************************************** * This function resets a wiggle prior block reader UDT. *****************************************************************************/ BOOLEAN_T reset_prior_reader_from_wig(DATA_BLOCK_READER_T *reader) { WIG_PRIOR_BLOCK_READER_T *wig_reader = (WIG_PRIOR_BLOCK_READER_T *) get_data_block_reader_data(reader); reset_wiggle_reader(wig_reader->raw_reader); myfree(wig_reader->sequence_name); wig_reader->current_position = -1; return TRUE; }
/****************************************************************************** * This function closes a wiggle prior block reader UDT. *****************************************************************************/ BOOLEAN_T close_prior_reader_from_wig(DATA_BLOCK_READER_T *reader) { WIG_PRIOR_BLOCK_READER_T *wig_reader = (WIG_PRIOR_BLOCK_READER_T *) get_data_block_reader_data(reader); wig_reader->current_position = 0; if (wig_reader->raw_reader) { free_wiggle_reader(wig_reader->raw_reader); wig_reader->raw_reader = NULL; } return TRUE; }
BOOLEAN_T unget_data_block_from_wig(DATA_BLOCK_READER_T *reader) { BOOLEAN_T result = FALSE; WIG_PRIOR_BLOCK_READER_T *wig_reader = (WIG_PRIOR_BLOCK_READER_T *) get_data_block_reader_data(reader); result = unget_data_line_from_wiggle(wig_reader->raw_reader); return result; }
/****************************************************************************** * Read from the current position in the file to the first prior after the * start of the next sequence. Set the value of the current sequence. * * Returns TRUE if it was able to advance to the next sequence, FALSE if * EOF reached before the next sequence was found. Dies if other errors * encountered. *****************************************************************************/ BOOLEAN_T go_to_next_sequence_in_wiggle_reader( DATA_BLOCK_READER_T *reader ) { WIG_PRIOR_BLOCK_READER_T *wig_reader = (WIG_PRIOR_BLOCK_READER_T *) get_data_block_reader_data(reader); BOOLEAN_T result = go_to_next_sequence_in_wiggle(wig_reader->raw_reader); wig_reader->sequence_name = get_wiggle_seq_name(wig_reader->raw_reader); wig_reader->current_position = 0; return result; }
/****************************************************************************** * This function frees an instance of the MEME PSP prior block reader UDT. *****************************************************************************/ void free_prior_reader_from_psp(DATA_BLOCK_READER_T *reader) { PSP_DATA_BLOCK_READER_T *psp_reader = (PSP_DATA_BLOCK_READER_T *) get_data_block_reader_data(reader); myfree(psp_reader->filename); psp_reader->filename_len = 0; psp_reader->filename_buffer_len = 0; myfree(psp_reader->sequence_header); psp_reader->sequence_header_len = 0; psp_reader->sequence_buffer_len = 0; myfree(psp_reader); }
/****************************************************************************** * Read from the current position in the file to the first symbol after the * start of the next sequence. Set the value of the current sequence. * * Returns TRUE if it was able to advance to the next sequence, FALSE if * EOF reached before the next sequence was found. Dies if other errors * encountered. *****************************************************************************/ BOOLEAN_T go_to_next_sequence_in_seq_reader_from_fasta(DATA_BLOCK_READER_T *reader) { BOOLEAN_T result = FALSE; SEQ_READER_FROM_FASTA_T *fasta_reader = (SEQ_READER_FROM_FASTA_T *) get_data_block_reader_data(reader); fasta_reader->current_position = 0; int c = 0; while((c = fgetc(fasta_reader->fasta_file)) != EOF) { if (fasta_reader->at_start_of_line == TRUE && c == '>') { break; } else if (c == '\n') { fasta_reader->at_start_of_line = TRUE; } else { fasta_reader->at_start_of_line = FALSE; } } // At this point c is '>' or EOF if (c == '>') { BOOLEAN_T found_genomic_coordinates = FALSE; result = read_seq_header_from_seq_reader_from_fasta(fasta_reader); if (result == TRUE && fasta_reader->parse_genomic_coord == TRUE) { // Look for genomic coordinates in header found_genomic_coordinates = parse_genomic_coordinates(fasta_reader); } if (found_genomic_coordinates == FALSE) { // Look for whitespace in header // The sequence name is the string before the white space. BOOLEAN_T found_name = FALSE; found_name = parse_seq_name(fasta_reader); if (found_name == FALSE) { die( "Unable to find sequence name in header %s.\n", fasta_reader->sequence_header ); } } } else { if (ferror(fasta_reader->fasta_file)) { die( "Error reading file:%s.\nError message: %s\n", fasta_reader->filename, strerror(ferror(fasta_reader->fasta_file)) ); } else if (feof(fasta_reader->fasta_file)) { // Reached EOF before reaching the start of the sequence result = FALSE; } } return result; }
/****************************************************************************** * This function frees an instance of the sequence FASTA reader UDT. *****************************************************************************/ void free_seq_reader_from_fasta(DATA_BLOCK_READER_T *reader) { SEQ_READER_FROM_FASTA_T *fasta_reader = (SEQ_READER_FROM_FASTA_T *) get_data_block_reader_data(reader); myfree(fasta_reader->filename); fasta_reader->filename_len = 0; fasta_reader->filename_buffer_len = 0; myfree(fasta_reader->sequence_header); fasta_reader->sequence_header_len = 0; fasta_reader->sequence_buffer_len = 0; myfree(fasta_reader->sequence_name); fasta_reader->sequence_name_len = 0; myfree(fasta_reader); }
/****************************************************************************** * This function resets a sequence FASTA reader UDT. *****************************************************************************/ BOOLEAN_T reset_seq_reader_from_fasta(DATA_BLOCK_READER_T *reader) { SEQ_READER_FROM_FASTA_T *fasta_reader = (SEQ_READER_FROM_FASTA_T *) get_data_block_reader_data(reader); if (fasta_reader->fasta_file == stdin) { die("Unable to rewind when reading sequence from standard input\n"); } else { rewind(fasta_reader->fasta_file); } fasta_reader->current_position = -1; fasta_reader->at_start_of_line = TRUE; return TRUE; }
/****************************************************************************** * This function gets the name of the current sequence from the data block * reader. The name of the sequence is passed using the name parameter. * The caller is responsible for freeing the memory for the sequence name. * * Returns TRUE if successful, FALSE if there is no current sequence, as * at the start of the file. *****************************************************************************/ BOOLEAN_T get_seq_name_from_seq_reader_from_fasta( DATA_BLOCK_READER_T *reader, char **name // OUT ) { BOOLEAN_T result = FALSE; SEQ_READER_FROM_FASTA_T *fasta_reader = (SEQ_READER_FROM_FASTA_T *) get_data_block_reader_data(reader); if (fasta_reader->sequence_name == NULL || fasta_reader->sequence_name_len <= 0) { result = FALSE; } else { *name = strdup(fasta_reader->sequence_name); result = TRUE; } return result; }
/****************************************************************************** * This function closes a MEME PSP prior block reader UDT. *****************************************************************************/ BOOLEAN_T close_prior_reader_from_psp(DATA_BLOCK_READER_T *reader) { BOOLEAN_T result = FALSE; PSP_DATA_BLOCK_READER_T *psp_reader = (PSP_DATA_BLOCK_READER_T *) get_data_block_reader_data(reader); if (psp_reader->psp_file != NULL) { if (fclose(psp_reader->psp_file) == EOF) { die( "Error closing file: %s.\nError message: %s\n", psp_reader->filename, strerror(errno) ); } else { result = TRUE; } } return result; }
/****************************************************************************** * This function closes a sequence FASTA reader UDT. *****************************************************************************/ BOOLEAN_T close_seq_reader_from_fasta(DATA_BLOCK_READER_T *reader) { BOOLEAN_T result = FALSE; SEQ_READER_FROM_FASTA_T *fasta_reader = (SEQ_READER_FROM_FASTA_T *) get_data_block_reader_data(reader); fasta_reader->current_position = 0; if (fasta_reader->fasta_file != NULL) { if (fclose(fasta_reader->fasta_file) == EOF) { die( "Error closing file: %s.\nError message: %s\n", fasta_reader->filename, strerror(errno) ); } else { result = TRUE; } } return result; }
/****************************************************************************** * This function gets the name of the current sequence from the data block * reader. The name of the sequence is passed using the name parameter. * The caller is responsible for freeing the memory for the sequence name. * * Returns TRUE if successful, FALSE if there is no current sequence, as * at the start of the file. *****************************************************************************/ BOOLEAN_T get_seq_name_from_wig( DATA_BLOCK_READER_T *reader, char **name // OUT ) { BOOLEAN_T result = FALSE; WIG_PRIOR_BLOCK_READER_T *wig_reader = (WIG_PRIOR_BLOCK_READER_T *) get_data_block_reader_data(reader); if (wig_reader->sequence_name == NULL) { result = FALSE; } else { *name = strdup(wig_reader->sequence_name); result = TRUE; } return result; }
/****************************************************************************** * Read from the current position in the file to the first prior after the * start of the next sequence. Set the value of the current sequence. * * Returns TRUE if it was able to advance to the next sequence, FALSE if * EOF reached before the next sequence was found. Dies if other errors * encountered. *****************************************************************************/ BOOLEAN_T go_to_next_sequence_in_prior_reader_from_psp( DATA_BLOCK_READER_T *reader ) { BOOLEAN_T result = FALSE; PSP_DATA_BLOCK_READER_T *psp_reader = (PSP_DATA_BLOCK_READER_T *) get_data_block_reader_data(reader); int c = 0; while((c = fgetc(psp_reader->psp_file)) != EOF) { if (psp_reader->at_start_of_line == TRUE && c == '>') { break; } else if (c == '\n') { psp_reader->at_start_of_line = TRUE; } else { psp_reader->at_start_of_line = FALSE; } } // At this point c is '>' or EOF if (c == '>') { result = read_sequence_from_prior_reader_from_psp(psp_reader); } else { if (ferror(psp_reader->psp_file)) { die( "Error reading file:%s.\nError message: %s\n", psp_reader->filename, strerror(ferror(psp_reader->psp_file)) ); } else if (feof(psp_reader->psp_file)) { // Reached EOF before reaching the start of the sequence result = FALSE; } } return result; }
/****************************************************************************** * This function reports on whether a prior reader has reached EOF * Returns TRUE if the reader is at EOF *****************************************************************************/ BOOLEAN_T prior_reader_from_wig_is_eof(DATA_BLOCK_READER_T *reader) { WIG_PRIOR_BLOCK_READER_T *wig_reader = (WIG_PRIOR_BLOCK_READER_T *) get_data_block_reader_data(reader); return get_wiggle_eof(wig_reader->raw_reader) ? TRUE : FALSE; }
BOOLEAN_T get_next_data_block_from_prior_reader_from_psp( DATA_BLOCK_READER_T *reader, DATA_BLOCK_T *data_block ) { BOOLEAN_T result = FALSE; const int buffer_size = 100; char buffer[buffer_size]; int num_read = 0; PSP_DATA_BLOCK_READER_T *psp_reader = (PSP_DATA_BLOCK_READER_T *) get_data_block_reader_data(reader); double *output_prior = get_prior_from_data_block(data_block); *output_prior = NaN(); int c = 0; // Skip over leading white space while((c = fgetc(psp_reader->psp_file)) != EOF) { if (isspace(c)) { if (c == '\n') { psp_reader->at_start_of_line = TRUE; } else { psp_reader->at_start_of_line = FALSE; } continue; } else { break; } } if (c == '>' && psp_reader->at_start_of_line == TRUE) { // We found the start of a new sequence while trying // to find a prior. c = ungetc(c, psp_reader->psp_file); if (ferror(psp_reader->psp_file)) { die( "Error reading file:%s.\nError message: %s\n", psp_reader->filename, strerror(ferror(psp_reader->psp_file)) ); } } else { // We are at start of a prior. // Read prior string until next space or EOF. int buffer_index = 0; while(c != EOF && !isspace(c)) { buffer[buffer_index] = c; ++buffer_index; if (buffer_index >= (buffer_size - 1)) { // No prior string should be this long buffer[buffer_size - 1] = 0; die("File %s contains invalid prior value: %s\n", psp_reader->filename, buffer); } c = fgetc(psp_reader->psp_file); } if (c == '\n') { psp_reader->at_start_of_line = TRUE; } else { psp_reader->at_start_of_line = FALSE; } buffer[buffer_index] = '\0'; // If the buffer is not empty, it should contain a string // representing the prior. Convert it to a double. if (buffer_index != 0) { char *end_ptr = NULL; double prior = strtod(buffer, &end_ptr); if (end_ptr == buffer || *end_ptr != '\0' || prior < 0.0L || prior > 1.0L ) { die("File %s contains invalid prior value: %s\n", psp_reader->filename, buffer); } *output_prior = prior; num_read = 1; ++psp_reader->current_position; result = TRUE; } } if (c == EOF && ferror(psp_reader->psp_file)) { die( "Error while reading file:%s.\nError message: %s\n", psp_reader->filename, strerror(ferror(psp_reader->psp_file)) ); } set_start_pos_for_data_block(data_block, psp_reader->current_position); set_num_read_into_data_block(data_block, num_read); return result; }
/****************************************************************************** * This function frees an instance of the wiggle prior block reader UDT. *****************************************************************************/ void free_prior_reader_from_wig(DATA_BLOCK_READER_T *reader) { WIG_PRIOR_BLOCK_READER_T *wig_reader = (WIG_PRIOR_BLOCK_READER_T *) get_data_block_reader_data(reader); myfree(wig_reader->sequence_name); myfree(wig_reader); }
/****************************************************************************** * Fills in the next data block for the sequence. * During the first call for the sequence it fills in the full data block. * On successive calls, shifts the sequence in the block down one position * and reads one more character. * * Returns TRUE if it was able to completely fill the block, FALSE if * the next sequence or EOF was reached before the block was filled. * Dies if other errors encountered. *****************************************************************************/ BOOLEAN_T get_next_data_block_from_seq_reader_from_fasta( DATA_BLOCK_READER_T *reader, DATA_BLOCK_T *data_block ) { BOOLEAN_T result = FALSE; char *raw_seq = get_sequence_from_data_block(data_block); int block_size = get_block_size_from_data_block(data_block); int num_read = get_num_read_into_data_block(data_block); SEQ_READER_FROM_FASTA_T *fasta_reader = (SEQ_READER_FROM_FASTA_T *) get_data_block_reader_data(reader); if (num_read == block_size) { // Block is alread full, shift all elements in the block down by one position // FIXME CEG: Inefficient, replace with circular buffer. memmove(raw_seq, raw_seq + 1, block_size - 1); num_read = block_size - 1; raw_seq[num_read] = 0; } int c = 0; while((c = fgetc(fasta_reader->fasta_file)) != EOF) { if (isspace(c)) { // Skip over white space if (c == '\n') { fasta_reader->at_start_of_line = TRUE; } else { fasta_reader->at_start_of_line = FALSE; } continue; } else if (c == '>' && fasta_reader->at_start_of_line == TRUE) { // We found the start of a new sequence while trying // to fill the block. Leave the block incomplete. c = ungetc(c, fasta_reader->fasta_file); if (ferror(fasta_reader->fasta_file)) { die( "Error while reading file:%s.\nError message: %s\n", fasta_reader->filename, strerror(ferror(fasta_reader->fasta_file)) ); } raw_seq[num_read] = 0; break; } else { // Fill in another character in the block raw_seq[num_read] = toupper(c); // Check that character is legal in alphabet. // If not, replace with wild card character. if (!alph_is_known(fasta_reader->alphabet, raw_seq[num_read])) { raw_seq[num_read] = alph_wildcard(fasta_reader->alphabet); fprintf( stderr, "Warning: %c is not a valid character in %s alphabet.\n" " Converting %c to %c.\n", c, alph_name(fasta_reader->alphabet), c, raw_seq[num_read] ); } ++num_read; if (num_read == block_size) { // block is full result = TRUE; break; } } } if (c == EOF && ferror(fasta_reader->fasta_file)) { die( "Error while reading file:%s.\nError message: %s\n", fasta_reader->filename, strerror(ferror(fasta_reader->fasta_file)) ); } ++fasta_reader->current_position; set_start_pos_for_data_block(data_block, fasta_reader->current_position); set_num_read_into_data_block(data_block, num_read); return result; }
/****************************************************************************** * This function reports on whether a prior reader has reached EOF * Returns TRUE if the reader is at EOF *****************************************************************************/ BOOLEAN_T seq_reader_from_fasta_is_eof(DATA_BLOCK_READER_T *reader) { SEQ_READER_FROM_FASTA_T *fasta_reader = (SEQ_READER_FROM_FASTA_T *) get_data_block_reader_data(reader); return feof(fasta_reader->fasta_file) ? TRUE : FALSE; }