BOOLEAN_T get_next_data_block_from_wig( DATA_BLOCK_READER_T *reader, DATA_BLOCK_T *data_block ) { BOOLEAN_T result = FALSE; int num_read = 0; WIG_PRIOR_BLOCK_READER_T *wig_reader = (WIG_PRIOR_BLOCK_READER_T *) get_data_block_reader_data(reader); BOOLEAN_T found_format_line; size_t step; size_t span; double value; result = get_next_data_line_from_wiggle( wig_reader->raw_reader, &(wig_reader->sequence_name), &(wig_reader->current_position), &step, &span, &value, &found_format_line ); if (result) { set_start_pos_for_data_block(data_block, wig_reader->current_position); set_num_read_into_data_block(data_block, span); set_prior_in_data_block(data_block, value); } return result; }
BOOLEAN_T get_next_data_block_from_prior_reader_from_psp( DATA_BLOCK_READER_T *reader, DATA_BLOCK_T *data_block ) { BOOLEAN_T result = FALSE; const int buffer_size = 100; char buffer[buffer_size]; int num_read = 0; PSP_DATA_BLOCK_READER_T *psp_reader = (PSP_DATA_BLOCK_READER_T *) get_data_block_reader_data(reader); double *output_prior = get_prior_from_data_block(data_block); *output_prior = NaN(); int c = 0; // Skip over leading white space while((c = fgetc(psp_reader->psp_file)) != EOF) { if (isspace(c)) { if (c == '\n') { psp_reader->at_start_of_line = TRUE; } else { psp_reader->at_start_of_line = FALSE; } continue; } else { break; } } if (c == '>' && psp_reader->at_start_of_line == TRUE) { // We found the start of a new sequence while trying // to find a prior. c = ungetc(c, psp_reader->psp_file); if (ferror(psp_reader->psp_file)) { die( "Error reading file:%s.\nError message: %s\n", psp_reader->filename, strerror(ferror(psp_reader->psp_file)) ); } } else { // We are at start of a prior. // Read prior string until next space or EOF. int buffer_index = 0; while(c != EOF && !isspace(c)) { buffer[buffer_index] = c; ++buffer_index; if (buffer_index >= (buffer_size - 1)) { // No prior string should be this long buffer[buffer_size - 1] = 0; die("File %s contains invalid prior value: %s\n", psp_reader->filename, buffer); } c = fgetc(psp_reader->psp_file); } if (c == '\n') { psp_reader->at_start_of_line = TRUE; } else { psp_reader->at_start_of_line = FALSE; } buffer[buffer_index] = '\0'; // If the buffer is not empty, it should contain a string // representing the prior. Convert it to a double. if (buffer_index != 0) { char *end_ptr = NULL; double prior = strtod(buffer, &end_ptr); if (end_ptr == buffer || *end_ptr != '\0' || prior < 0.0L || prior > 1.0L ) { die("File %s contains invalid prior value: %s\n", psp_reader->filename, buffer); } *output_prior = prior; num_read = 1; ++psp_reader->current_position; result = TRUE; } } if (c == EOF && ferror(psp_reader->psp_file)) { die( "Error while reading file:%s.\nError message: %s\n", psp_reader->filename, strerror(ferror(psp_reader->psp_file)) ); } set_start_pos_for_data_block(data_block, psp_reader->current_position); set_num_read_into_data_block(data_block, num_read); return result; }
/****************************************************************************** * Fills in the next data block for the sequence. * During the first call for the sequence it fills in the full data block. * On successive calls, shifts the sequence in the block down one position * and reads one more character. * * Returns TRUE if it was able to completely fill the block, FALSE if * the next sequence or EOF was reached before the block was filled. * Dies if other errors encountered. *****************************************************************************/ BOOLEAN_T get_next_data_block_from_seq_reader_from_fasta( DATA_BLOCK_READER_T *reader, DATA_BLOCK_T *data_block ) { BOOLEAN_T result = FALSE; char *raw_seq = get_sequence_from_data_block(data_block); int block_size = get_block_size_from_data_block(data_block); int num_read = get_num_read_into_data_block(data_block); SEQ_READER_FROM_FASTA_T *fasta_reader = (SEQ_READER_FROM_FASTA_T *) get_data_block_reader_data(reader); if (num_read == block_size) { // Block is alread full, shift all elements in the block down by one position // FIXME CEG: Inefficient, replace with circular buffer. memmove(raw_seq, raw_seq + 1, block_size - 1); num_read = block_size - 1; raw_seq[num_read] = 0; } int c = 0; while((c = fgetc(fasta_reader->fasta_file)) != EOF) { if (isspace(c)) { // Skip over white space if (c == '\n') { fasta_reader->at_start_of_line = TRUE; } else { fasta_reader->at_start_of_line = FALSE; } continue; } else if (c == '>' && fasta_reader->at_start_of_line == TRUE) { // We found the start of a new sequence while trying // to fill the block. Leave the block incomplete. c = ungetc(c, fasta_reader->fasta_file); if (ferror(fasta_reader->fasta_file)) { die( "Error while reading file:%s.\nError message: %s\n", fasta_reader->filename, strerror(ferror(fasta_reader->fasta_file)) ); } raw_seq[num_read] = 0; break; } else { // Fill in another character in the block raw_seq[num_read] = toupper(c); // Check that character is legal in alphabet. // If not, replace with wild card character. if (!alph_is_known(fasta_reader->alphabet, raw_seq[num_read])) { raw_seq[num_read] = alph_wildcard(fasta_reader->alphabet); fprintf( stderr, "Warning: %c is not a valid character in %s alphabet.\n" " Converting %c to %c.\n", c, alph_name(fasta_reader->alphabet), c, raw_seq[num_read] ); } ++num_read; if (num_read == block_size) { // block is full result = TRUE; break; } } } if (c == EOF && ferror(fasta_reader->fasta_file)) { die( "Error while reading file:%s.\nError message: %s\n", fasta_reader->filename, strerror(ferror(fasta_reader->fasta_file)) ); } ++fasta_reader->current_position; set_start_pos_for_data_block(data_block, fasta_reader->current_position); set_num_read_into_data_block(data_block, num_read); return result; }