/********************************************************
* Read priors from the priors file.
*
* If *prior_block is NULL, a prior block will be allocated,
* and the first block of priors will be read into it.
*
* If the seq. position is less than the current prior
* position we leave the prior block in the current position
* and set the prior to NaN.
*
* If the seq. position is within the extent of the current
* prior block we set the prior to the value from the block.
*
* If the seq. position is past the extent of the current
* prior block we read blocks until reach the seq. position
* or we reach the end of the sequence.
* 
********************************************************/
void get_prior_from_reader(
  DATA_BLOCK_READER_T *prior_reader,
  const char *seq_name,
  size_t seq_position,
  DATA_BLOCK_T **prior_block, // Out variable
  double *prior // Out variable
) {

  double default_prior = get_default_prior_from_reader(prior_reader);
  *prior = default_prior;

  if (*prior_block == NULL) {
    // Allocate prior block if not we've not already done so
    *prior_block = new_prior_block();
    // Fill in first data for block 
    BOOLEAN_T result = prior_reader->get_next_block(prior_reader, *prior_block);
    if (result == FALSE) {
      die("Failed to read first prior from sequence %s.", seq_name);
    }
  }

  // Get prior for sequence postion
  size_t block_position = get_start_pos_for_data_block(*prior_block);
  size_t block_extent = get_num_read_into_data_block(*prior_block);
  if (block_position > seq_position) {
    // Sequence position is before current prior position
    return;
  }
  else if (block_position <= seq_position 
           && seq_position <= (block_position + block_extent - 1)) {
    // Sequence position is contained in current prior block
    *prior = get_prior_from_data_block(*prior_block);
  }
  else {
    // Sequence position is after current prior position.
    // Try reading the next prior block.
    BOOLEAN_T priors_remaining = FALSE;
    while ((priors_remaining = prior_reader->get_next_block(prior_reader, *prior_block)) != FALSE) {
      block_position = get_start_pos_for_data_block(*prior_block);
      block_extent = get_num_read_into_data_block(*prior_block);
      if (block_position > seq_position) {
        // Sequence position is before current prior position
        return;
      }
      else if (block_position <= seq_position && seq_position <= (block_position + block_extent - 1)) {
        // Sequence position is contained in current prior block
        *prior = get_prior_from_data_block(*prior_block);
        break;
      }
    }
    if (priors_remaining == FALSE && verbosity > NORMAL_VERBOSE) {
      fprintf(stderr, "Warning: reached end of priors for sequence %s.\n", seq_name);
    }
  }
}
/********************************************************
* Read an array of priors from the priors file.
*
* If no prior for in the sequence and coordinate given 
* is found in the prior file, the priors will be set to 0.5
*
* If the seq. position is within the extent of the current
* prior block we set the prior to the value from the block.
*
* If the seq. position is past the extent of the current
* prior block we read blocks until reach the seq. position
* or we reach the end of the sequence.
*
* Sequences must occur in the same order as the FASTA file
* Positions in sequence must be in increasing order
********************************************************/
void get_prior_array_from_reader(
  DATA_BLOCK_READER_T *prior_reader,
  const char *seq_name,
  size_t seq_start,
  size_t num_priors,
  size_t buffer_offset,
  double *priors
) {

  size_t seq_end = seq_start + num_priors;
  char *prior_seq_name = NULL;
  prior_reader->get_seq_name(prior_reader, &prior_seq_name);

  assert(strcmp(seq_name, prior_seq_name) == 0);

  // Fill the array with the default prior 
  // starting at the buffer offset
  double default_prior = get_default_prior_from_reader(prior_reader);
  size_t i;
  for (i = buffer_offset; i < num_priors; ++i) {
    priors[i] = default_prior;
  }

  BOOLEAN_T result;
  size_t prior_start;
  size_t prior_length;
  size_t prior_end;
  DATA_BLOCK_T *prior_block = new_prior_block();

  // Read and copy prior blocks until we've filled in the array
  while (TRUE) {

    result = prior_reader->get_next_block(prior_reader, prior_block);
    if (result == FALSE) {
      // Reached the end of priors for this sequence
      break;
    }
    prior_start = get_start_pos_for_data_block(prior_block) - 1;
    prior_length = get_num_read_into_data_block(prior_block);
    prior_end = prior_start + prior_length - 1;

    if (prior_end < seq_start) {
      // Skip prior blocks before region of interest
      continue;
    }

    // Copy the priors into the array
    size_t start_intersect = MAX(prior_start, seq_start);
    size_t end_intersect = MIN(prior_end, seq_end);
    BOOLEAN_T overlap = (end_intersect >= start_intersect);
    if (overlap == TRUE) {
      // FIXEME CEG
      // size_t num_to_copy = end_intersect - start_intersect + 1;
      size_t num_to_copy = end_intersect - start_intersect;
      size_t intersect_offset =  start_intersect - seq_start;
      for (i = 0; i < num_to_copy; ++i) {
        priors[intersect_offset + i] = get_prior_from_data_block(prior_block);
      }
    }

    if (prior_end > seq_end) {
      // We're done filling the array, but have some priors left over
      // Rewind the reader to before the last block read
      prior_reader->unget_block(prior_reader);
      break;
    }

  }

  free_data_block(prior_block);
  return;
}
BOOLEAN_T get_next_data_block_from_prior_reader_from_psp(
  DATA_BLOCK_READER_T *reader, 
  DATA_BLOCK_T *data_block
) {

  BOOLEAN_T result = FALSE;
  const int buffer_size = 100;
  char buffer[buffer_size];
  int num_read = 0;

  PSP_DATA_BLOCK_READER_T *psp_reader 
    = (PSP_DATA_BLOCK_READER_T *) get_data_block_reader_data(reader);

  double *output_prior = get_prior_from_data_block(data_block);
  *output_prior = NaN();

  int c = 0;

  // Skip over leading white space
  while((c = fgetc(psp_reader->psp_file)) != EOF) {

    if (isspace(c)) {
      if (c == '\n') {
        psp_reader->at_start_of_line = TRUE;
      }
      else {
        psp_reader->at_start_of_line = FALSE;
      }
      continue;
    }
    else {
      break;
    }
  }

  if (c == '>' && psp_reader->at_start_of_line == TRUE) {
    // We found the start of a new sequence while trying
    // to find a prior.
    c = ungetc(c, psp_reader->psp_file);
    if (ferror(psp_reader->psp_file)) {
      die(
        "Error reading file:%s.\nError message: %s\n", 
        psp_reader->filename,
        strerror(ferror(psp_reader->psp_file))
      );
    }
  }
  else {
    // We are at start of a prior.
    // Read prior string until next space or EOF.
    int buffer_index = 0;
    while(c != EOF && !isspace(c)) {
      buffer[buffer_index] = c;
      ++buffer_index;
      if (buffer_index >= (buffer_size - 1)) {
        // No prior string should be this long
        buffer[buffer_size - 1] = 0;
        die("File %s contains invalid prior value: %s\n", psp_reader->filename, buffer);
      }
      c = fgetc(psp_reader->psp_file);
    }

    if (c == '\n') {
      psp_reader->at_start_of_line = TRUE;
    }
    else {
      psp_reader->at_start_of_line = FALSE;
    }

    buffer[buffer_index] = '\0';

    // If the buffer is not empty, it should contain a string
    // representing the prior. Convert it to a double.
    if (buffer_index != 0) {
      char *end_ptr = NULL;
      double prior = strtod(buffer, &end_ptr);
      if (end_ptr == buffer 
          || *end_ptr != '\0' 
          || prior < 0.0L 
          || prior > 1.0L
      ) {
        die("File %s contains invalid prior value: %s\n", psp_reader->filename, buffer);
      }
      *output_prior = prior;
      num_read = 1;
      ++psp_reader->current_position;
      result = TRUE;
    }

  }

  if (c == EOF && ferror(psp_reader->psp_file)) {
    die(
      "Error while reading file:%s.\nError message: %s\n", 
      psp_reader->filename,
      strerror(ferror(psp_reader->psp_file))
    );
  }

  set_start_pos_for_data_block(data_block, psp_reader->current_position);
  set_num_read_into_data_block(data_block, num_read);
  return result;
}
/********************************************************************
 * This program reads a MEME PSP file and computes the binned
 * distribution of priors. The distribution is writen to stdout.
 ********************************************************************/
int main(int argc, char *argv[]) {

  char *usage = "compute-prior-dist <num-bins> <psp-file>";

  if (argc != 3) {
    fprintf(stderr, "Usage: %s\n", usage);
    return -1;
  }

  int num_bins = atoi(argv[1]);
  if (num_bins <= 0) {
    fprintf(stderr, "Usage: %s\n", usage);
    return -1;
  }

  const char *filename = argv[2];

  // Read each prior, find max and min of distribution.
  DATA_BLOCK_READER_T *psp_reader = NULL;
  psp_reader = new_prior_reader_from_psp(FALSE /* Don't try to parse genomic coord.*/, filename);
  DATA_BLOCK_T *psp_block = new_prior_block();

  int prior_array_size = 100;
  ARRAY_T *raw_priors = allocate_array(prior_array_size);
  int num_priors = 0;
  while (psp_reader->go_to_next_sequence(psp_reader) != FALSE) {
    while (psp_reader->get_next_block(psp_reader, psp_block) != FALSE) {
      double prior = get_prior_from_data_block(psp_block);
      if (prior == 0.0) {
        // Skip priors that are exactly 0.0
        continue;
      }
      if (num_priors == INT_MAX) {
        die("Number of priors exceeded maximum allowed value of %d", INT_MAX);
      }
      set_array_item(num_priors, prior, raw_priors);
      ++num_priors;
      if (num_priors >= prior_array_size) {
        resize_array(raw_priors, 2 * prior_array_size);
        prior_array_size = 2 * prior_array_size;
      }
    }
  }
  free_data_block(psp_block);
  free_data_block_reader(psp_reader);

  ARRAY_T *priors = extract_subarray(raw_priors, 0, num_priors);
  free_array(raw_priors);
  double median_prior = compute_median(priors);
  double min_prior = get_array_item(0, priors);
  double max_prior = get_array_item(num_priors - 1, priors);

  // Print min, max, and median
  printf("#min %6.5f\n", min_prior);
  printf("#max %6.5f\n", max_prior);
  printf("#median %6.5f\n", median_prior);

  // Special case if priors are exactly uniform.
  if (min_prior == max_prior) {
    printf("%6.5f\n", 1.0);
    return 0;
  }

  // Create the array of bins, intialized to 0.
  double *prior_dist = mm_calloc(num_bins, sizeof(double));
  double scale = (num_bins - 1) / (max_prior - min_prior);
  double offset = min_prior;
  int dist_index = 0;

  int i;
  for (i = 0; i < num_priors; ++i) {
      double prior = get_array_item(i, priors);
      dist_index = raw_to_scaled(prior, 1, scale, offset);
      ++prior_dist[dist_index];
  }

  for (dist_index = 0; dist_index < num_bins; ++dist_index) {
    // Print normalized bin counts
    prior_dist[dist_index] /= num_priors;
    printf("%6.5f\n", prior_dist[dist_index]);
  }

  return 0;
}