Ejemplo n.º 1
0
void filelist_mean_err(FileList *flist)
{
  size_t i, f, maxread = 0, readcap = 512, newcap, carry;
  long double *sumprob = malloc(readcap * sizeof(long double));
  size_t *counts = malloc(readcap * sizeof(size_t));
  for(i = 0; i < readcap; i++) { sumprob[i] = 0; counts[i] = 0; }

  read_t *r = &flist->read;
  int fmt, fqoffset = 33, minq, maxq;
  for(f = flist->curr; f < flist->num_files; f++) {
    fmt = seq_guess_fastq_format(flist->files[f], &minq, &maxq);
    fqoffset = (fmt == -1 ? 33 : FASTQ_OFFSET[fmt]);
    while(seq_read(flist->files[f], r) > 0) {
      if(r->qual.end > readcap) {
        newcap = ROUNDUP2POW(r->qual.end);
        sumprob = realloc(sumprob, newcap * sizeof(long double));
        counts = realloc(counts, newcap * sizeof(size_t));
        for(i = readcap; i < newcap; i++) { sumprob[i] = 0; counts[i] = 0; }
        readcap = newcap;
      }
      counts[r->qual.end-1]++;
      for(i = 0; i < r->qual.end; i++)
        sumprob[i] += qual_prob[r->qual.b[i] - fqoffset];
      maxread = MAX2(maxread, r->qual.end);
    }
  }

  // Convert counts to cummulative (reverse)
  for(i = maxread-1, carry = 0; i != SIZE_MAX; i--) {
    carry += counts[i];
    counts[i] = carry;
  }

  for(i = 0; i < maxread; i++) {
    // printf(" %.8Lf/%zu", sumprob[i], counts[i]);
    printf(" %.2Lf", 100.0 * sumprob[i] / counts[i]);
  } printf("\n");

  free(counts);
  free(sumprob);
}
Ejemplo n.º 2
0
// Load reads into a buffer and use them to guess the quality score offset
// Returns -1 if no quality scores
// Defaults to 0 if not recognisable (offset:33, min:33, max:126)
static inline int guess_fastq_format(seq_file_t *sf)
{
  // Detect fastq offset
  int min_qual = INT_MAX, max_qual = INT_MIN;
  int fmt = seq_guess_fastq_format(sf, &min_qual, &max_qual);

  // fmt == -1 if no quality scores found
  if(fmt == -1) {
    if(seq_is_fastq(sf) || seq_is_sam(sf) || seq_is_bam(sf))
      warn("Couldn't find qual scores in %s\n", sf->path);
    return -1;
  }

  status("%s: Qual scores: %s [offset: %i, range: [%i,%i], sample: [%i,%i]]\n",
         sf->path, FASTQ_FORMATS[fmt], FASTQ_OFFSET[fmt],
         FASTQ_MIN[fmt], FASTQ_MAX[fmt], min_qual, max_qual);


  // Test min and max fastq scores
  int qoffset = FASTQ_OFFSET[fmt], qmax = FASTQ_MAX[fmt];

  if(min_qual > qoffset + 20)
  {
    warn("Input file has min quality score %i but qoffset is set to %i: %s\n"
         "  Have you predefined an incorrect fastq offset? "
         "Or is cortex guessing it wrong?", min_qual, qoffset, sf->path);
  }
  else if(max_qual > qmax + 20)
  {
    warn("Input file has max quality score %i but expected qmax is to %i: %s\n"
         "  Have you predefined an incorrect fastq offset? "
         "Or is cortex guessing it wrong?", max_qual, qoffset, sf->path);
  }

  return fmt;
}
Ejemplo n.º 3
0
void filelist_alloc(FileList *flist, char **paths, size_t num)
{
  size_t i;
  flist->num_files = num;
  flist->curr = 0;
  flist->files = malloc(num * sizeof(seq_file_t*));
  flist->fqoffsets = malloc(num * sizeof(int));

  for(i = 0; i < num; i++) {
    if((flist->files[i] = seq_open(paths[i])) == NULL)
      die("Cannot open: %s", paths[i]);
    int min, max, fmt;
    fmt = seq_guess_fastq_format(flist->files[i], &min, &max);
    if(fmt < 0) die("Cannot detect FASTQ format: %s", paths[i]);
    flist->fqoffsets[i] = FASTQ_OFFSET[fmt];
    printf(" profile: %s [offset: %i]\n", paths[i], FASTQ_OFFSET[fmt]);
  }

  seq_read_alloc(&flist->read);
  flist->filesready = 1;
  flist->errors_cap = 512;
  flist->errors_len = 0;
  flist->errors = calloc(flist->errors_cap, sizeof(size_t));
}