void pe_clean_core(char *fa_fn, clean_opt *opt) { bwa_seq_t *seqs, *s = NULL; int n_seqs = 0, i = 0; char *item = (char*) malloc(BUFSIZE), *solid = malloc(BUFSIZE); FILE *solid_file; clock_t t = clock(); GPtrArray *solid_reads = NULL; show_debug_msg(__func__, "Loading library %s...\n", fa_fn); seqs = load_reads(fa_fn, &n_seqs); show_debug_msg(__func__, "Saving k-mer frequencies: %.2f sec...\n", (float) (clock() - t) / CLOCKS_PER_SEC); sprintf(solid, "%s.solid", opt->lib_name); solid_file = xopen(solid, "w"); solid_reads = calc_solid_reads(seqs, n_seqs, opt, n_seqs * opt->stop_thre, 0, 1); for (i = 0; i < solid_reads->len; i++) { s = g_ptr_array_index(solid_reads, i); sprintf(item, "%s\n", s->name); fputs(item, solid_file); } free(item); free(solid); g_ptr_array_free(solid_reads, TRUE); bwa_free_read_seq(n_seqs, seqs); fclose(solid_file); }
void Depot::load_reads(ReadSet& dst) { load_reads(dst, 0, -1); // read all }
Read* Depot::load_read(uint32_t index) { ReadSet temp; load_reads(temp, index, 1); return temp.front(); }
int main(int argc, char **argv) { // compiler complains about unused function without these linese (void)kh_clear_ghash; (void)kh_del_ghash; if(argc < 2) print_usage(usage, NULL); char swap_alleles = 0; int c; while((c = getopt(argc, argv, "s")) >= 0) { switch (c) { case 's': swap_alleles = 1; break; default: die("Unknown option: %c", c); } } if(optind == argc) print_usage(usage, "Not enough arguments"); char *inputpath = argv[optind]; char **refpaths = argv + optind + 1; size_t num_refs = argc - optind - 1; gzFile gzin = gzopen(inputpath, "r"); if(gzin == NULL) die("Cannot read file: %s", inputpath); size_t i, nchroms = 0, capacity = 1024; khash_t(ghash) *genome = kh_init(ghash); read_t *reads = malloc(capacity * sizeof(read_t)), *r; int hret; khiter_t k; for(i = 0; i < num_refs; i++) { fprintf(stderr, "Loading %s\n", refpaths[i]); load_reads(refpaths[i], &reads, &capacity, &nchroms); } if(num_refs == 0) { fprintf(stderr, "Loading from stdin\n"); load_reads("-", &reads, &capacity, &nchroms); } if(nchroms == 0) die("No chromosomes loaded"); for(i = 0; i < nchroms; i++) { r = reads + i; fprintf(stderr, "Loaded: '%s'\n", r->name.b); k = kh_put(ghash, genome, r->name.b, &hret); if(hret == 0) warn("Duplicate read name (taking first): %s", r->name.b); else kh_value(genome, k) = r; } // Now read VCF StrBuf line; strbuf_alloc(&line, 1024); char *fields[9]; char *chr; int pos, reflen, altlen; while(strbuf_reset_gzreadline(&line, gzin) > 0) { if(line.b[0] == '#') fputs(line.b, stdout); else { strbuf_chomp(&line); vcf_columns(line.b, fields); fields[1][-1] = fields[2][-1] = '\0'; chr = line.b; pos = atoi(fields[1])-1; k = kh_get(ghash, genome, chr); r = kh_value(genome, k); fields[1][-1] = fields[2][-1] = '\t'; reflen = fields[4] - fields[3] - 1; altlen = fields[5] - fields[4] - 1; if(k == kh_end(genome)) warn("Cannot find chrom: %s", chr); else if(pos < 0) warn("Bad line: %s\n", line.b); else if((reflen == 1 && altlen == 1) || fields[3][0] == fields[4][0]) { if((unsigned)pos + reflen <= r->seq.end && strncasecmp(r->seq.b+pos,fields[3],reflen) == 0) { fputs(line.b, stdout); fputc('\n', stdout); } else if(swap_alleles && (unsigned)pos + altlen <= r->seq.end && strncasecmp(r->seq.b+pos,fields[4],altlen) == 0) { // swap alleles char tmp[altlen], *ref = fields[3], *alt = fields[4]; memcpy(tmp, alt, altlen); memmove(ref+altlen+1, ref, reflen); memcpy(ref, tmp, altlen); ref[altlen] = '\t'; fputs(line.b, stdout); fputc('\n', stdout); } // else printf("FAIL0\n"); } // else printf("FAIL1\n"); } } kh_destroy(ghash, genome); strbuf_dealloc(&line); gzclose(gzin); for(i = 0; i < nchroms; i++) seq_read_dealloc(reads+i); free(reads); fprintf(stderr, " Done.\n"); return 0; }