BOOLEAN_T get_next_data_block_from_wig(
  DATA_BLOCK_READER_T *reader, 
  DATA_BLOCK_T *data_block
) {

  BOOLEAN_T result = FALSE;
  int num_read = 0;

  WIG_PRIOR_BLOCK_READER_T *wig_reader 
    = (WIG_PRIOR_BLOCK_READER_T *) get_data_block_reader_data(reader);

  BOOLEAN_T found_format_line;
  size_t step;
  size_t span;
  double value;
  result = get_next_data_line_from_wiggle(
    wig_reader->raw_reader,
    &(wig_reader->sequence_name),
    &(wig_reader->current_position),
    &step,
    &span,
    &value,
    &found_format_line
  );

  if (result) {
    set_start_pos_for_data_block(data_block, wig_reader->current_position);
    set_num_read_into_data_block(data_block, span);
    set_prior_in_data_block(data_block, value);
  }

  return result;
}
BOOLEAN_T get_next_data_block_from_prior_reader_from_psp(
  DATA_BLOCK_READER_T *reader, 
  DATA_BLOCK_T *data_block
) {

  BOOLEAN_T result = FALSE;
  const int buffer_size = 100;
  char buffer[buffer_size];
  int num_read = 0;

  PSP_DATA_BLOCK_READER_T *psp_reader 
    = (PSP_DATA_BLOCK_READER_T *) get_data_block_reader_data(reader);

  double *output_prior = get_prior_from_data_block(data_block);
  *output_prior = NaN();

  int c = 0;

  // Skip over leading white space
  while((c = fgetc(psp_reader->psp_file)) != EOF) {

    if (isspace(c)) {
      if (c == '\n') {
        psp_reader->at_start_of_line = TRUE;
      }
      else {
        psp_reader->at_start_of_line = FALSE;
      }
      continue;
    }
    else {
      break;
    }
  }

  if (c == '>' && psp_reader->at_start_of_line == TRUE) {
    // We found the start of a new sequence while trying
    // to find a prior.
    c = ungetc(c, psp_reader->psp_file);
    if (ferror(psp_reader->psp_file)) {
      die(
        "Error reading file:%s.\nError message: %s\n", 
        psp_reader->filename,
        strerror(ferror(psp_reader->psp_file))
      );
    }
  }
  else {
    // We are at start of a prior.
    // Read prior string until next space or EOF.
    int buffer_index = 0;
    while(c != EOF && !isspace(c)) {
      buffer[buffer_index] = c;
      ++buffer_index;
      if (buffer_index >= (buffer_size - 1)) {
        // No prior string should be this long
        buffer[buffer_size - 1] = 0;
        die("File %s contains invalid prior value: %s\n", psp_reader->filename, buffer);
      }
      c = fgetc(psp_reader->psp_file);
    }

    if (c == '\n') {
      psp_reader->at_start_of_line = TRUE;
    }
    else {
      psp_reader->at_start_of_line = FALSE;
    }

    buffer[buffer_index] = '\0';

    // If the buffer is not empty, it should contain a string
    // representing the prior. Convert it to a double.
    if (buffer_index != 0) {
      char *end_ptr = NULL;
      double prior = strtod(buffer, &end_ptr);
      if (end_ptr == buffer 
          || *end_ptr != '\0' 
          || prior < 0.0L 
          || prior > 1.0L
      ) {
        die("File %s contains invalid prior value: %s\n", psp_reader->filename, buffer);
      }
      *output_prior = prior;
      num_read = 1;
      ++psp_reader->current_position;
      result = TRUE;
    }

  }

  if (c == EOF && ferror(psp_reader->psp_file)) {
    die(
      "Error while reading file:%s.\nError message: %s\n", 
      psp_reader->filename,
      strerror(ferror(psp_reader->psp_file))
    );
  }

  set_start_pos_for_data_block(data_block, psp_reader->current_position);
  set_num_read_into_data_block(data_block, num_read);
  return result;
}
Ejemplo n.º 3
0
/******************************************************************************
 * Fills in the next data block for the sequence. 
 * During the first call for the sequence it fills in the full data block.
 * On successive calls, shifts the sequence in the block down one position
 * and reads one more character.
 * 
 * Returns TRUE if it was able to completely fill the block, FALSE if 
 * the next sequence or EOF was reached before the block was filled.
 * Dies if other errors encountered.
 *****************************************************************************/
BOOLEAN_T get_next_data_block_from_seq_reader_from_fasta(
  DATA_BLOCK_READER_T *reader, 
  DATA_BLOCK_T *data_block
) {

  BOOLEAN_T result = FALSE;
  char *raw_seq = get_sequence_from_data_block(data_block);
  int block_size = get_block_size_from_data_block(data_block);
  int num_read = get_num_read_into_data_block(data_block);

  SEQ_READER_FROM_FASTA_T *fasta_reader 
    = (SEQ_READER_FROM_FASTA_T *) get_data_block_reader_data(reader);

  if (num_read == block_size) {
    // Block is alread full, shift all elements in the block down by one position
    // FIXME CEG: Inefficient, replace with circular buffer.
    memmove(raw_seq, raw_seq + 1, block_size - 1);
    num_read = block_size - 1;
    raw_seq[num_read] = 0;
  }

  int c = 0;
  while((c = fgetc(fasta_reader->fasta_file)) != EOF) {
    if (isspace(c)) {
      // Skip over white space
      if (c == '\n') {
        fasta_reader->at_start_of_line = TRUE;
      }
      else {
        fasta_reader->at_start_of_line = FALSE;
      }
      continue;
    }
    else if (c == '>' && fasta_reader->at_start_of_line == TRUE) {
      // We found the start of a new sequence while trying
      // to fill the block. Leave the block incomplete.
      c = ungetc(c, fasta_reader->fasta_file);
      if (ferror(fasta_reader->fasta_file)) {
        die(
          "Error while reading file:%s.\nError message: %s\n", 
          fasta_reader->filename,
          strerror(ferror(fasta_reader->fasta_file))
        );
      }
      raw_seq[num_read] = 0;
      break;
    }
    else {
      // Fill in another character in the block
      raw_seq[num_read] = toupper(c);
      // Check that character is legal in alphabet. 
      // If not, replace with wild card character.
      if (!alph_is_known(fasta_reader->alphabet, raw_seq[num_read])) {
        raw_seq[num_read] = alph_wildcard(fasta_reader->alphabet);
        fprintf(
          stderr, 
          "Warning: %c is not a valid character in %s alphabet.\n"
          "         Converting %c to %c.\n",
          c,
          alph_name(fasta_reader->alphabet),
          c,
          raw_seq[num_read]
        );
      }
      ++num_read;
      if (num_read == block_size) {
        // block is full
        result = TRUE;
        break;
      }
    }
  }

  if (c == EOF && ferror(fasta_reader->fasta_file)) {
    die(
      "Error while reading file:%s.\nError message: %s\n", 
      fasta_reader->filename,
      strerror(ferror(fasta_reader->fasta_file))
    );
  }

  ++fasta_reader->current_position;
  set_start_pos_for_data_block(data_block, fasta_reader->current_position);
  set_num_read_into_data_block(data_block, num_read);
  return result;

}