/******************************************************************************
 * This function gets the name of the current sequence from the data block
 * reader. The name of the sequence is passed using the name parameter.
 * The caller is responsible for freeing the memory for the sequence name.
 *
 * Returns TRUE if successful, FALSE if there is no current sequence, as 
 * at the start of the file.
 *****************************************************************************/
BOOLEAN_T get_seq_name_from_prior_reader_from_psp(
  DATA_BLOCK_READER_T *reader, 
  char **name // OUT
) {
  BOOLEAN_T result = FALSE;
  PSP_DATA_BLOCK_READER_T *psp_reader 
    = (PSP_DATA_BLOCK_READER_T *) get_data_block_reader_data(reader);
  if (psp_reader->sequence_header == NULL || psp_reader->sequence_header_len <= 0) {
    result = FALSE;
  }
  else {
    int name_len = 0;
    for (name_len = 0; name_len < psp_reader->sequence_header_len; ++name_len) {
      if (isspace(psp_reader->sequence_header[name_len])) {
          break;
      }
    }
    myassert(
      TRUE, 
      name_len <= psp_reader->sequence_header_len, 
      "Error parsing seq. name.\n"
    );
    char *buffer = mm_malloc(sizeof(char) * (name_len + 1));
    strncpy(buffer, psp_reader->sequence_header, name_len);
    buffer[name_len] = 0;
    *name = buffer;
    result = TRUE;
  }

  return result;
}
BOOLEAN_T get_next_data_block_from_wig(
  DATA_BLOCK_READER_T *reader, 
  DATA_BLOCK_T *data_block
) {

  BOOLEAN_T result = FALSE;
  int num_read = 0;

  WIG_PRIOR_BLOCK_READER_T *wig_reader 
    = (WIG_PRIOR_BLOCK_READER_T *) get_data_block_reader_data(reader);

  BOOLEAN_T found_format_line;
  size_t step;
  size_t span;
  double value;
  result = get_next_data_line_from_wiggle(
    wig_reader->raw_reader,
    &(wig_reader->sequence_name),
    &(wig_reader->current_position),
    &step,
    &span,
    &value,
    &found_format_line
  );

  if (result) {
    set_start_pos_for_data_block(data_block, wig_reader->current_position);
    set_num_read_into_data_block(data_block, span);
    set_prior_in_data_block(data_block, value);
  }

  return result;
}
/******************************************************************************
 * This function resets a MEME PSP prior block reader UDT.
 *****************************************************************************/
BOOLEAN_T reset_prior_reader_from_psp(DATA_BLOCK_READER_T *reader) {
  PSP_DATA_BLOCK_READER_T *psp_reader 
    = (PSP_DATA_BLOCK_READER_T *) get_data_block_reader_data(reader);
  rewind(psp_reader->psp_file);
  psp_reader->current_position = -1;
  psp_reader->at_start_of_line = TRUE;
  return TRUE;
}
/******************************************************************************
 * This function resets a wiggle prior block reader UDT.
 *****************************************************************************/
BOOLEAN_T reset_prior_reader_from_wig(DATA_BLOCK_READER_T *reader) {
  WIG_PRIOR_BLOCK_READER_T *wig_reader 
    = (WIG_PRIOR_BLOCK_READER_T *) get_data_block_reader_data(reader);
  reset_wiggle_reader(wig_reader->raw_reader);
  myfree(wig_reader->sequence_name);
  wig_reader->current_position = -1;
  return TRUE;
}
/******************************************************************************
 * This function closes a wiggle prior block reader UDT.
 *****************************************************************************/
BOOLEAN_T close_prior_reader_from_wig(DATA_BLOCK_READER_T *reader) {
  WIG_PRIOR_BLOCK_READER_T *wig_reader 
    = (WIG_PRIOR_BLOCK_READER_T *) get_data_block_reader_data(reader);
  wig_reader->current_position = 0;
  if (wig_reader->raw_reader) {
    free_wiggle_reader(wig_reader->raw_reader);
    wig_reader->raw_reader = NULL;
  }
  return TRUE;
}
BOOLEAN_T unget_data_block_from_wig(DATA_BLOCK_READER_T *reader) {

  BOOLEAN_T result = FALSE;

  WIG_PRIOR_BLOCK_READER_T *wig_reader 
    = (WIG_PRIOR_BLOCK_READER_T *) get_data_block_reader_data(reader);

  result = unget_data_line_from_wiggle(wig_reader->raw_reader);

  return result;
}
/******************************************************************************
 * Read from the current position in the file to the first prior after the
 * start of the next sequence. Set the value of the current sequence.
 *
 * Returns TRUE if it was able to advance to the next sequence, FALSE if 
 * EOF reached before the next sequence was found. Dies if other errors
 * encountered.
 *****************************************************************************/
BOOLEAN_T go_to_next_sequence_in_wiggle_reader(
  DATA_BLOCK_READER_T *reader
) {

  WIG_PRIOR_BLOCK_READER_T *wig_reader 
    = (WIG_PRIOR_BLOCK_READER_T *) get_data_block_reader_data(reader);
  BOOLEAN_T result = go_to_next_sequence_in_wiggle(wig_reader->raw_reader);
  wig_reader->sequence_name = get_wiggle_seq_name(wig_reader->raw_reader);
  wig_reader->current_position = 0;
  return result;
}
/******************************************************************************
 * This function frees an instance of the MEME PSP prior block reader UDT.
 *****************************************************************************/
void free_prior_reader_from_psp(DATA_BLOCK_READER_T *reader) {
  PSP_DATA_BLOCK_READER_T *psp_reader 
    = (PSP_DATA_BLOCK_READER_T *) get_data_block_reader_data(reader);
  myfree(psp_reader->filename);
  psp_reader->filename_len = 0;
  psp_reader->filename_buffer_len = 0;
  myfree(psp_reader->sequence_header);
  psp_reader->sequence_header_len = 0;
  psp_reader->sequence_buffer_len = 0;
  myfree(psp_reader);
}
Beispiel #9
0
/******************************************************************************
 * Read from the current position in the file to the first symbol after the
 * start of the next sequence. Set the value of the current sequence.
 *
 * Returns TRUE if it was able to advance to the next sequence, FALSE if 
 * EOF reached before the next sequence was found. Dies if other errors
 * encountered.
 *****************************************************************************/
BOOLEAN_T go_to_next_sequence_in_seq_reader_from_fasta(DATA_BLOCK_READER_T *reader) {
  BOOLEAN_T result = FALSE;
  SEQ_READER_FROM_FASTA_T *fasta_reader 
    = (SEQ_READER_FROM_FASTA_T *) get_data_block_reader_data(reader);
  fasta_reader->current_position = 0;
  int c = 0;
  while((c = fgetc(fasta_reader->fasta_file)) != EOF) {
    if (fasta_reader->at_start_of_line == TRUE && c == '>') {
      break;
    }
    else if (c == '\n') {
      fasta_reader->at_start_of_line = TRUE;
    }
    else {
      fasta_reader->at_start_of_line = FALSE;
    }
  }
  // At this point c is '>' or EOF
  if (c == '>') {
    BOOLEAN_T found_genomic_coordinates = FALSE;
    result = read_seq_header_from_seq_reader_from_fasta(fasta_reader);
    if (result == TRUE && fasta_reader->parse_genomic_coord == TRUE) {
      // Look for genomic coordinates in header
      found_genomic_coordinates = parse_genomic_coordinates(fasta_reader);
    }
    if (found_genomic_coordinates == FALSE) {
      //  Look for whitespace in header
      //  The sequence name is the string before the white space.
      BOOLEAN_T found_name = FALSE;
      found_name = parse_seq_name(fasta_reader);
      if (found_name == FALSE) {
        die(
            "Unable to find sequence name in header %s.\n",
            fasta_reader->sequence_header
        );
      }
    }
  }
  else {
    if (ferror(fasta_reader->fasta_file)) {
      die(
        "Error reading file:%s.\nError message: %s\n", 
        fasta_reader->filename,
        strerror(ferror(fasta_reader->fasta_file))
      );
    }
    else if (feof(fasta_reader->fasta_file)) {
        // Reached EOF before reaching the start of the sequence
        result = FALSE;
    }
  }
  return result;
}
Beispiel #10
0
/******************************************************************************
 * This function frees an instance of the sequence FASTA reader UDT.
 *****************************************************************************/
void free_seq_reader_from_fasta(DATA_BLOCK_READER_T *reader) {
  SEQ_READER_FROM_FASTA_T *fasta_reader 
    = (SEQ_READER_FROM_FASTA_T *) get_data_block_reader_data(reader);
  myfree(fasta_reader->filename);
  fasta_reader->filename_len = 0;
  fasta_reader->filename_buffer_len = 0;
  myfree(fasta_reader->sequence_header);
  fasta_reader->sequence_header_len = 0;
  fasta_reader->sequence_buffer_len = 0;
  myfree(fasta_reader->sequence_name);
  fasta_reader->sequence_name_len = 0;
  myfree(fasta_reader);
}
Beispiel #11
0
/******************************************************************************
 * This function resets a sequence FASTA reader UDT.
 *****************************************************************************/
BOOLEAN_T reset_seq_reader_from_fasta(DATA_BLOCK_READER_T *reader) {
  SEQ_READER_FROM_FASTA_T *fasta_reader 
    = (SEQ_READER_FROM_FASTA_T *) get_data_block_reader_data(reader);
  if (fasta_reader->fasta_file == stdin) {
    die("Unable to rewind when reading sequence from standard input\n");
  }
  else {
    rewind(fasta_reader->fasta_file);
  }
  fasta_reader->current_position = -1;
  fasta_reader->at_start_of_line = TRUE;
  return TRUE;
}
Beispiel #12
0
/******************************************************************************
 * This function gets the name of the current sequence from the data block
 * reader. The name of the sequence is passed using the name parameter.
 * The caller is responsible for freeing the memory for the sequence name.
 *
 * Returns TRUE if successful, FALSE if there is no current sequence, as 
 * at the start of the file.
 *****************************************************************************/
BOOLEAN_T get_seq_name_from_seq_reader_from_fasta(
  DATA_BLOCK_READER_T *reader, 
  char **name // OUT
) {
  BOOLEAN_T result = FALSE;
  SEQ_READER_FROM_FASTA_T *fasta_reader 
    = (SEQ_READER_FROM_FASTA_T *) get_data_block_reader_data(reader);
  if (fasta_reader->sequence_name == NULL || fasta_reader->sequence_name_len <= 0) {
    result = FALSE;
  }
  else {
    *name = strdup(fasta_reader->sequence_name);
    result = TRUE;
  }

  return result;
}
/******************************************************************************
 * This function closes a MEME PSP prior block reader UDT.
 *****************************************************************************/
BOOLEAN_T close_prior_reader_from_psp(DATA_BLOCK_READER_T *reader) {
  BOOLEAN_T result = FALSE;
  PSP_DATA_BLOCK_READER_T *psp_reader 
    = (PSP_DATA_BLOCK_READER_T *) get_data_block_reader_data(reader);
  if (psp_reader->psp_file != NULL) {
    if (fclose(psp_reader->psp_file) == EOF) {
      die(
        "Error closing file: %s.\nError message: %s\n", 
        psp_reader->filename, 
        strerror(errno)
      );
    }
    else {
      result = TRUE;
    }
  }
  return result;
}
Beispiel #14
0
/******************************************************************************
 * This function closes a sequence FASTA reader UDT.
 *****************************************************************************/
BOOLEAN_T close_seq_reader_from_fasta(DATA_BLOCK_READER_T *reader) {
  BOOLEAN_T result = FALSE;
  SEQ_READER_FROM_FASTA_T *fasta_reader 
    = (SEQ_READER_FROM_FASTA_T *) get_data_block_reader_data(reader);
  fasta_reader->current_position = 0;
  if (fasta_reader->fasta_file != NULL) {
    if (fclose(fasta_reader->fasta_file) == EOF) {
      die(
        "Error closing file: %s.\nError message: %s\n", 
        fasta_reader->filename, 
        strerror(errno)
      );
    }
    else {
      result = TRUE;
    }
  }
  return result;
}
/******************************************************************************
 * This function gets the name of the current sequence from the data block
 * reader. The name of the sequence is passed using the name parameter.
 * The caller is responsible for freeing the memory for the sequence name.
 *
 * Returns TRUE if successful, FALSE if there is no current sequence, as 
 * at the start of the file.
 *****************************************************************************/
BOOLEAN_T get_seq_name_from_wig(
  DATA_BLOCK_READER_T *reader, 
  char **name // OUT
) {

  BOOLEAN_T result = FALSE;
  WIG_PRIOR_BLOCK_READER_T *wig_reader 
    = (WIG_PRIOR_BLOCK_READER_T *) get_data_block_reader_data(reader);

  if (wig_reader->sequence_name == NULL) {
    result = FALSE;
  }
  else {
    *name = strdup(wig_reader->sequence_name);
    result = TRUE;
  }

  return result;
}
/******************************************************************************
 * Read from the current position in the file to the first prior after the
 * start of the next sequence. Set the value of the current sequence.
 *
 * Returns TRUE if it was able to advance to the next sequence, FALSE if 
 * EOF reached before the next sequence was found. Dies if other errors
 * encountered.
 *****************************************************************************/
BOOLEAN_T go_to_next_sequence_in_prior_reader_from_psp(
  DATA_BLOCK_READER_T *reader
) {
  BOOLEAN_T result = FALSE;
  PSP_DATA_BLOCK_READER_T *psp_reader 
    = (PSP_DATA_BLOCK_READER_T *) get_data_block_reader_data(reader);
  int c = 0;
  while((c = fgetc(psp_reader->psp_file)) != EOF) {
    if (psp_reader->at_start_of_line == TRUE && c == '>') {
      break;
    }
    else if (c == '\n') {
      psp_reader->at_start_of_line = TRUE;
    }
    else {
      psp_reader->at_start_of_line = FALSE;
    }
  }
  // At this point c is '>' or EOF
  if (c == '>') {
    result = read_sequence_from_prior_reader_from_psp(psp_reader);
  }
  else {
    if (ferror(psp_reader->psp_file)) {
      die(
        "Error reading file:%s.\nError message: %s\n", 
        psp_reader->filename,
        strerror(ferror(psp_reader->psp_file))
      );
    }
    else if (feof(psp_reader->psp_file)) {
        // Reached EOF before reaching the start of the sequence
        result = FALSE;
    }
  }
  return result;
}
/******************************************************************************
 * This function reports on whether a prior reader has reached EOF
 * Returns TRUE if the reader is at EOF
 *****************************************************************************/
BOOLEAN_T prior_reader_from_wig_is_eof(DATA_BLOCK_READER_T *reader) {
  WIG_PRIOR_BLOCK_READER_T *wig_reader 
    = (WIG_PRIOR_BLOCK_READER_T *) get_data_block_reader_data(reader);
  return get_wiggle_eof(wig_reader->raw_reader) ? TRUE : FALSE;
}
BOOLEAN_T get_next_data_block_from_prior_reader_from_psp(
  DATA_BLOCK_READER_T *reader, 
  DATA_BLOCK_T *data_block
) {

  BOOLEAN_T result = FALSE;
  const int buffer_size = 100;
  char buffer[buffer_size];
  int num_read = 0;

  PSP_DATA_BLOCK_READER_T *psp_reader 
    = (PSP_DATA_BLOCK_READER_T *) get_data_block_reader_data(reader);

  double *output_prior = get_prior_from_data_block(data_block);
  *output_prior = NaN();

  int c = 0;

  // Skip over leading white space
  while((c = fgetc(psp_reader->psp_file)) != EOF) {

    if (isspace(c)) {
      if (c == '\n') {
        psp_reader->at_start_of_line = TRUE;
      }
      else {
        psp_reader->at_start_of_line = FALSE;
      }
      continue;
    }
    else {
      break;
    }
  }

  if (c == '>' && psp_reader->at_start_of_line == TRUE) {
    // We found the start of a new sequence while trying
    // to find a prior.
    c = ungetc(c, psp_reader->psp_file);
    if (ferror(psp_reader->psp_file)) {
      die(
        "Error reading file:%s.\nError message: %s\n", 
        psp_reader->filename,
        strerror(ferror(psp_reader->psp_file))
      );
    }
  }
  else {
    // We are at start of a prior.
    // Read prior string until next space or EOF.
    int buffer_index = 0;
    while(c != EOF && !isspace(c)) {
      buffer[buffer_index] = c;
      ++buffer_index;
      if (buffer_index >= (buffer_size - 1)) {
        // No prior string should be this long
        buffer[buffer_size - 1] = 0;
        die("File %s contains invalid prior value: %s\n", psp_reader->filename, buffer);
      }
      c = fgetc(psp_reader->psp_file);
    }

    if (c == '\n') {
      psp_reader->at_start_of_line = TRUE;
    }
    else {
      psp_reader->at_start_of_line = FALSE;
    }

    buffer[buffer_index] = '\0';

    // If the buffer is not empty, it should contain a string
    // representing the prior. Convert it to a double.
    if (buffer_index != 0) {
      char *end_ptr = NULL;
      double prior = strtod(buffer, &end_ptr);
      if (end_ptr == buffer 
          || *end_ptr != '\0' 
          || prior < 0.0L 
          || prior > 1.0L
      ) {
        die("File %s contains invalid prior value: %s\n", psp_reader->filename, buffer);
      }
      *output_prior = prior;
      num_read = 1;
      ++psp_reader->current_position;
      result = TRUE;
    }

  }

  if (c == EOF && ferror(psp_reader->psp_file)) {
    die(
      "Error while reading file:%s.\nError message: %s\n", 
      psp_reader->filename,
      strerror(ferror(psp_reader->psp_file))
    );
  }

  set_start_pos_for_data_block(data_block, psp_reader->current_position);
  set_num_read_into_data_block(data_block, num_read);
  return result;
}
/******************************************************************************
 * This function frees an instance of the wiggle prior block reader UDT.
 *****************************************************************************/
void free_prior_reader_from_wig(DATA_BLOCK_READER_T *reader) {
  WIG_PRIOR_BLOCK_READER_T *wig_reader 
    = (WIG_PRIOR_BLOCK_READER_T *) get_data_block_reader_data(reader);
  myfree(wig_reader->sequence_name);
  myfree(wig_reader);
}
Beispiel #20
0
/******************************************************************************
 * Fills in the next data block for the sequence. 
 * During the first call for the sequence it fills in the full data block.
 * On successive calls, shifts the sequence in the block down one position
 * and reads one more character.
 * 
 * Returns TRUE if it was able to completely fill the block, FALSE if 
 * the next sequence or EOF was reached before the block was filled.
 * Dies if other errors encountered.
 *****************************************************************************/
BOOLEAN_T get_next_data_block_from_seq_reader_from_fasta(
  DATA_BLOCK_READER_T *reader, 
  DATA_BLOCK_T *data_block
) {

  BOOLEAN_T result = FALSE;
  char *raw_seq = get_sequence_from_data_block(data_block);
  int block_size = get_block_size_from_data_block(data_block);
  int num_read = get_num_read_into_data_block(data_block);

  SEQ_READER_FROM_FASTA_T *fasta_reader 
    = (SEQ_READER_FROM_FASTA_T *) get_data_block_reader_data(reader);

  if (num_read == block_size) {
    // Block is alread full, shift all elements in the block down by one position
    // FIXME CEG: Inefficient, replace with circular buffer.
    memmove(raw_seq, raw_seq + 1, block_size - 1);
    num_read = block_size - 1;
    raw_seq[num_read] = 0;
  }

  int c = 0;
  while((c = fgetc(fasta_reader->fasta_file)) != EOF) {
    if (isspace(c)) {
      // Skip over white space
      if (c == '\n') {
        fasta_reader->at_start_of_line = TRUE;
      }
      else {
        fasta_reader->at_start_of_line = FALSE;
      }
      continue;
    }
    else if (c == '>' && fasta_reader->at_start_of_line == TRUE) {
      // We found the start of a new sequence while trying
      // to fill the block. Leave the block incomplete.
      c = ungetc(c, fasta_reader->fasta_file);
      if (ferror(fasta_reader->fasta_file)) {
        die(
          "Error while reading file:%s.\nError message: %s\n", 
          fasta_reader->filename,
          strerror(ferror(fasta_reader->fasta_file))
        );
      }
      raw_seq[num_read] = 0;
      break;
    }
    else {
      // Fill in another character in the block
      raw_seq[num_read] = toupper(c);
      // Check that character is legal in alphabet. 
      // If not, replace with wild card character.
      if (!alph_is_known(fasta_reader->alphabet, raw_seq[num_read])) {
        raw_seq[num_read] = alph_wildcard(fasta_reader->alphabet);
        fprintf(
          stderr, 
          "Warning: %c is not a valid character in %s alphabet.\n"
          "         Converting %c to %c.\n",
          c,
          alph_name(fasta_reader->alphabet),
          c,
          raw_seq[num_read]
        );
      }
      ++num_read;
      if (num_read == block_size) {
        // block is full
        result = TRUE;
        break;
      }
    }
  }

  if (c == EOF && ferror(fasta_reader->fasta_file)) {
    die(
      "Error while reading file:%s.\nError message: %s\n", 
      fasta_reader->filename,
      strerror(ferror(fasta_reader->fasta_file))
    );
  }

  ++fasta_reader->current_position;
  set_start_pos_for_data_block(data_block, fasta_reader->current_position);
  set_num_read_into_data_block(data_block, num_read);
  return result;

}
Beispiel #21
0
/******************************************************************************
 * This function reports on whether a prior reader has reached EOF
 * Returns TRUE if the reader is at EOF
 *****************************************************************************/
BOOLEAN_T seq_reader_from_fasta_is_eof(DATA_BLOCK_READER_T *reader) {
  SEQ_READER_FROM_FASTA_T *fasta_reader 
    = (SEQ_READER_FROM_FASTA_T *) get_data_block_reader_data(reader);
  return feof(fasta_reader->fasta_file) ? TRUE : FALSE;
}