コード例 #1
0
ファイル: seq_reader.c プロジェクト: Phelimb/mccortex
// Create chrom->read genome hash
// `chroms` and `genome` must already be allocated
void chrom_hash_load2(seq_file_t **seq_files, size_t num_files,
                      ReadBuffer *chroms, ChromHash *genome)
{
  size_t i;
  khiter_t k;
  int hret;

  seq_load_all_reads(seq_files, num_files, chroms);

  for(i = 0; i < chroms->len; i++)
  {
    seq_read_to_uppercase(&chroms->b[i]);
    seq_read_truncate_name(&chroms->b[i]);
    if(strchr(chroms->b[i].name.b,':') != NULL)
      die("Please remove colons from chromosome names [%s]", chroms->b[i].name.b);
    k = kh_put(kChromHash, genome, chroms->b[i].name.b, &hret);
    if(hret == 0)
      warn("duplicate chromosome (take first only): '%s'", chroms->b[i].name.b);
    else
      kh_value(genome, k) = &chroms->b[i];
  }
}
コード例 #2
0
ファイル: readsim.c プロジェクト: noporpoise/readsim
// Returns num of bases printed
size_t sim_reads(seq_file_t *reffile, gzFile out0, gzFile out1,
                 FileList *flist, float err_rate,
                 size_t insert, double insert_stddev, size_t rlen, double depth)
{
  size_t i, chromcap = 16, nchroms, glen = 0, nreads, chr, pos0, pos1, tlen;
  read_t *chroms;

  tlen = rlen + (out1 == NULL ? 0 : insert + rlen);

  chroms = malloc(chromcap * sizeof(read_t));
  nchroms = 0;

  // Load genome
  printf(" Loaded contigs:");
  while(1)
  {
    if(nchroms == chromcap) chroms = realloc(chroms, (chromcap*=2)*sizeof(read_t));
    seq_read_alloc(&chroms[nchroms]);
    if(seq_read(reffile, &chroms[nchroms]) <= 0)
    { seq_read_dealloc(&chroms[nchroms]); break; }
    if(chroms[nchroms].seq.end < tlen) { seq_read_dealloc(&chroms[nchroms]); }
    else {
      seq_read_truncate_name(&chroms[nchroms]);
      printf(" %s[%zu]", chroms[nchroms].name.b, chroms[nchroms].seq.end);
      glen += chroms[nchroms].seq.end;
      nchroms++;
    }
  }
  printf("\n Genome size: %zu\n", glen);

  if(nchroms == 0) {
    die("No sequences long enough in ref genome file [min len: %zu]: %s",
        tlen, reffile->path);
  }

  // Sample
  nreads = (glen * depth) / (out1 == NULL ? rlen : (2 * rlen));
  char read0[rlen+1], read1[rlen+1];
  read0[rlen] = read1[rlen] = '\0';

  printf("Sampling %zu %sreads...\n", nreads,
         out1 == NULL ? "single " : "paired-end ");

  // Sample paired-end if out1 != NULL
  for(i = 0; i < nreads; i++)
  {
    chr = (nchroms == 1) ? 0 : rand_chrom(chroms, nchroms, glen);
    pos0 = random_uniform(chroms[chr].seq.end - (out1 == NULL ? rlen : tlen));
    pos1 = pos0;
    memcpy(read0, chroms[chr].seq.b+pos0, rlen);
    if(out1 != NULL) {
      pos1 = pos0 + rlen + insert + ran_normal()*insert_stddev;
      if(pos1 + rlen > chroms[chr].seq.end) pos1 = chroms[chr].seq.end-rlen;
      memcpy(read1, chroms[chr].seq.b+pos1, rlen);
    }
    if(flist != NULL) {
      add_seq_error_profile(read0, rlen, flist);
      if(out1 != NULL)
        add_seq_error_profile(read1, rlen, flist);
    }
    else if(err_rate >= 0) {
      add_seq_error_rate(read0, rlen, err_rate);
    }
    gzprintf(out0, ">r%zu:%s:%zu:%zu%s\n%.*s\n", i, chroms[chr].name.b,
                   pos0, pos1, (out1 != NULL ? "/1" : ""), (int)rlen, read0);
    if(out1 != NULL) {
      dna_revcmp(read1, rlen);
      gzprintf(out1, ">r%zu:%s:%zu:%zu/2\n%.*s\n", i, chroms[chr].name.b,
                     pos0, pos1, (int)rlen, read1);
    }
  }

  for(i = 0; i < nchroms; i++) seq_read_dealloc(&chroms[i]);
  free(chroms);

  size_t num_bases = nreads * rlen;
  if(out1 != NULL) num_bases *= 2;

  return num_bases;
}