Beispiel #1
0
void pe_clean_core(char *fa_fn, clean_opt *opt) {
	bwa_seq_t *seqs, *s = NULL;
	int n_seqs = 0, i = 0;
	char *item = (char*) malloc(BUFSIZE), *solid = malloc(BUFSIZE);
	FILE *solid_file;
	clock_t t = clock();
	GPtrArray *solid_reads = NULL;

	show_debug_msg(__func__, "Loading library %s...\n", fa_fn);
	seqs = load_reads(fa_fn, &n_seqs);

	show_debug_msg(__func__, "Saving k-mer frequencies: %.2f sec...\n",
			(float) (clock() - t) / CLOCKS_PER_SEC);
	sprintf(solid, "%s.solid", opt->lib_name);
	solid_file = xopen(solid, "w");
	solid_reads = calc_solid_reads(seqs, n_seqs, opt, n_seqs * opt->stop_thre,
			0, 1);
	for (i = 0; i < solid_reads->len; i++) {
		s = g_ptr_array_index(solid_reads, i);
		sprintf(item, "%s\n", s->name);
		fputs(item, solid_file);
	}

	free(item);
	free(solid);
	g_ptr_array_free(solid_reads, TRUE);
	bwa_free_read_seq(n_seqs, seqs);
	fclose(solid_file);
}
Beispiel #2
0
void Depot::load_reads(ReadSet& dst) {
    load_reads(dst, 0, -1); // read all
}
Beispiel #3
0
Read* Depot::load_read(uint32_t index) {

    ReadSet temp;
    load_reads(temp, index, 1);
    return temp.front();
}
Beispiel #4
0
int main(int argc, char **argv)
{
  // compiler complains about unused function without these linese
  (void)kh_clear_ghash;
  (void)kh_del_ghash;

  if(argc < 2) print_usage(usage, NULL);

  char swap_alleles = 0;

  int c;
  while((c = getopt(argc, argv, "s")) >= 0) {
    switch (c) {
      case 's': swap_alleles = 1; break;
      default: die("Unknown option: %c", c);
    }
  }

  if(optind == argc) print_usage(usage, "Not enough arguments");

  char *inputpath = argv[optind];
  char **refpaths = argv + optind + 1;
  size_t num_refs = argc - optind - 1;

  gzFile gzin = gzopen(inputpath, "r");
  if(gzin == NULL) die("Cannot read file: %s", inputpath);

  size_t i, nchroms = 0, capacity = 1024;
  khash_t(ghash) *genome = kh_init(ghash);
  read_t *reads = malloc(capacity * sizeof(read_t)), *r;
  int hret;
  khiter_t k;

  for(i = 0; i < num_refs; i++) {
    fprintf(stderr, "Loading %s\n", refpaths[i]);
    load_reads(refpaths[i], &reads, &capacity, &nchroms);
  }

  if(num_refs == 0) {
    fprintf(stderr, "Loading from stdin\n");
    load_reads("-", &reads, &capacity, &nchroms);
  }

  if(nchroms == 0) die("No chromosomes loaded");

  for(i = 0; i < nchroms; i++) {
    r = reads + i;
    fprintf(stderr, "Loaded: '%s'\n", r->name.b);
    k = kh_put(ghash, genome, r->name.b, &hret);
    if(hret == 0) warn("Duplicate read name (taking first): %s", r->name.b);
    else kh_value(genome, k) = r;
  }

  // Now read VCF
  StrBuf line;
  strbuf_alloc(&line, 1024);
  char *fields[9];
  char *chr;
  int pos, reflen, altlen;

  while(strbuf_reset_gzreadline(&line, gzin) > 0)
  {
    if(line.b[0] == '#') fputs(line.b, stdout);
    else
    {
      strbuf_chomp(&line);
      vcf_columns(line.b, fields);
      fields[1][-1] = fields[2][-1] = '\0';
      chr = line.b;
      pos = atoi(fields[1])-1;
      k = kh_get(ghash, genome, chr);
      r = kh_value(genome, k);
      fields[1][-1] = fields[2][-1] = '\t';
      reflen = fields[4] - fields[3] - 1;
      altlen = fields[5] - fields[4] - 1;
      if(k == kh_end(genome)) warn("Cannot find chrom: %s", chr);
      else if(pos < 0) warn("Bad line: %s\n", line.b);
      else if((reflen == 1 && altlen == 1) || fields[3][0] == fields[4][0])
      {
        if((unsigned)pos + reflen <= r->seq.end &&
           strncasecmp(r->seq.b+pos,fields[3],reflen) == 0)
        {
          fputs(line.b, stdout);
          fputc('\n', stdout);
        }
        else if(swap_alleles && (unsigned)pos + altlen <= r->seq.end &&
                strncasecmp(r->seq.b+pos,fields[4],altlen) == 0)
        {
          // swap alleles
          char tmp[altlen], *ref = fields[3], *alt = fields[4];
          memcpy(tmp, alt, altlen);
          memmove(ref+altlen+1, ref, reflen);
          memcpy(ref, tmp, altlen);
          ref[altlen] = '\t';
          fputs(line.b, stdout);
          fputc('\n', stdout);
        }
        // else printf("FAIL0\n");
      }
      // else printf("FAIL1\n");
    }
  }

  kh_destroy(ghash, genome);
  strbuf_dealloc(&line);
  gzclose(gzin);

  for(i = 0; i < nchroms; i++) seq_read_dealloc(reads+i);
  free(reads);

  fprintf(stderr, " Done.\n");

  return 0;
}