/* Load all the regions from a wig or bigWig into a list of arrays basically. */ struct perBaseWig* perBaseWigLoad(char* wigFile, char* chrom, int start, int end) { struct metaBig* mb = metaBigOpen(wigFile, NULL); if (mb->type != isaBigWig) { metaBigClose(&mb); return NULL; } struct perBaseWig* list; list = perBaseWigLoadContinue(mb, chrom, start, end); metaBigClose(&mb); return list; }
struct perBaseWig* perBaseWigLoadSingle(char* wigFile, char* chrom, int start, int end, boolean reverse, double fill) /* Load all the regions into one perBaseWig, but with gaps filled */ /* in with NA value */ { struct metaBig* mb = metaBigOpen(wigFile, NULL); if (mb->type != isaBigWig) { metaBigClose(&mb); return NULL; } struct perBaseWig* list; list = perBaseWigLoadSingleContinue(mb, chrom, start, end, reverse, fill); metaBigClose(&mb); return list; }
void bwtool_find_extrema(struct hash *options, char *favorites, char *regions, unsigned decimals, double fill, char *bigfile, char *tmp_dir, char *outputfile) /* find local extrema */ { unsigned min_sep = sqlUnsigned((char *)hashOptionalVal(options, "min-sep", "0")); char *other_bigfile = (char *)hashOptionalVal(options, "against", NULL); enum ex_removal rem = get_removal(options); struct metaBig *main_big = metaBigOpen_check(bigfile, tmp_dir, regions); struct metaBig *other_big = NULL; struct extrema *main_list; struct extrema *other_list = NULL; struct extrema *ex; unsigned shift = 0; FILE *out; if (other_bigfile) { char *num; if (rem == no_removal) errAbort("must specify either -maxima or -minima with -against"); if (!strchr(other_bigfile, ',')) errAbort("must specify shift limit in -against option"); num = chopPrefixAt(other_bigfile, ','); shift = sqlUnsigned(num); other_big = metaBigOpen_check(other_bigfile, tmp_dir, regions); } if (!main_big || (!other_big && other_bigfile)) errAbort("could not open bigWig file"); main_list = extrema_find(main_big, min_sep, rem); slReverse(&main_list); if (other_bigfile) { other_list = extrema_find(other_big, min_sep, rem); extrema_find_shifts(main_list, other_list, shift); } metaBigClose(&main_big); if (other_big) metaBigClose(&other_big); out = mustOpen(outputfile, "w"); if (other_bigfile) for (ex = main_list; ex != NULL; ex = ex->next) fprintf(out, "%s\t%d\t%d\t%d\t1000\t%c\n", ex->chrom, ex->chromStart, ex->chromStart+1, (int)ex->val, ex->min_or_max); else { slSort(&main_list, extrema_bed_cmp); for (ex = main_list; ex != NULL; ex = ex->next) fprintf(out, "%s\t%d\t%d\t%0.*f\t1000\t%c\n", ex->chrom, ex->chromStart, ex->chromStart+1, decimals, ex->val, ex->min_or_max); } carefulClose(&out); extrema_free_list(&main_list); }
int main(int argc, char *argv[]) /* Process command line. */ { if (argc != 6) errAbort("bad running of bwmake"); struct metaBig *mb = metaBigOpen(argv[1], NULL); FILE *out = mustOpen(argv[2], "w"); int decimals = sqlSigned(argv[3]); struct bed *section; double na = NANUM; boolean condense = FALSE; enum wigOutType wot = get_wig_out_type(argv[4]); if (sameString(argv[5], "condense")) condense = TRUE; for (section = mb->sections; section != NULL; section = section->next) { struct perBaseWig *pbw = perBaseWigLoadSingleContinue(mb, section->chrom, section->chromStart, section->chromEnd, FALSE, na); perBaseWigOutputNASkip(pbw, out, wot, decimals, NULL, FALSE, condense); perBaseWigFree(&pbw); } carefulClose(&out); metaBigClose(&mb); return 0; }
void bwtool_extract(struct hash *options, char *regions, unsigned decimals, double fill, char *style_s, char *bigfile, char *tmp_dir, char *outputfile) /* bwtool_extract - main for the extract program */ { boolean tabs = (hashFindVal(options, "tabs") != NULL) ? TRUE : FALSE; boolean locus_name = (hashFindVal(options, "locus-name") != NULL) ? TRUE : FALSE; int orig_size = 0; struct bed6 *region_list = readBed6SoftAndSize(regions, &orig_size); struct metaBig *mb = metaBigOpenWithTmpDir(bigfile, tmp_dir, NULL); if (!mb) errAbort("problem opening %s", bigfile); FILE *out = mustOpen(outputfile, "w"); struct bed6 *section; enum style_type style = nothing; if (sameWord(style_s, "bed")) style = bed; else if (sameWord(style_s, "jsp")) style = jsp; else errAbort("please specify a valid style"); int section_num = 1; /* loop through each region */ for (section = region_list; section != NULL; section = section->next) { struct perBaseWig *pbw = perBaseWigLoadSingleContinue(mb, section->chrom, section->chromStart, section->chromEnd, (section->strand[0] == '-') ? TRUE : FALSE, fill); if (style == bed) /* for bed there is no name manipulation */ extractOutBed(out, section, orig_size, decimals, pbw, tabs); else { /* for jsp output there is some name manipulation that could be done prior to outputting */ char buf[128]; if (locus_name) { safef(buf, sizeof(buf), "%s:%d-%d", section->chrom, section->chromStart+1, section->chromEnd); if (section->name) freeMem(section->name); section->name = cloneString(buf); } else if ((orig_size < 4) || sameString(section->name, ".")) { safef(buf, sizeof(buf), "region_%d", section_num); if (section->name) freeMem(section->name); section->name = cloneString(buf); } extractOutJsp(out, section, decimals, pbw); } perBaseWigFree(&pbw); section_num++; } metaBigClose(&mb); bed6FreeList(®ion_list); carefulClose(&out); }
void bwtool_shift(struct hash *options, char *favorites, char *regions, unsigned decimals, enum wigOutType wot, boolean condense, char *val_s, char *bigfile, char *tmp_dir, char *outputfile) /* bwtool_shift - main for shifting program */ { const double na = NANUM; int shft = sqlSigned(val_s); int abs_shft = abs(shft); struct metaBig *mb = metaBigOpen_check(bigfile, tmp_dir, regions); if (!mb) errAbort("problem opening %s", bigfile); char wigfile[512]; safef(wigfile, sizeof(wigfile), "%s.tmp.wig", outputfile); FILE *out = mustOpen(wigfile, "w"); struct bed *section; boolean up = TRUE; if (shft > 0) up = FALSE; if (shft == 0) errAbort("it doesn't make sense to shift by zero."); for (section = mb->sections; section != NULL; section = section->next) { struct perBaseWig *pbw = perBaseWigLoadSingleContinue(mb, section->chrom, section->chromStart, section->chromEnd, FALSE, na); int i; /* if the shift size is bigger than the section, NA the entire thing */ int size = pbw->len; if (abs_shft >= size) for (i = 0; i < size; i++) pbw->data[i] = na; else { if (!up) { for (i = size-1; i >= abs_shft; i--) pbw->data[i] = pbw->data[i - abs_shft]; for (; i >= 0; i--) pbw->data[i] = na; } else { for (i = 0; i < size - abs_shft; i++) pbw->data[i] = pbw->data[i + abs_shft]; for (; i < size; i++) pbw->data[i] = na; } } perBaseWigOutputNASkip(pbw, out, wot, decimals, NULL, FALSE, condense); perBaseWigFree(&pbw); } carefulClose(&out); writeBw(wigfile, outputfile, mb->chromSizeHash); remove(wigfile); metaBigClose(&mb); }
void bwtool_sax(struct hash *options, char *favorites, char *regions, unsigned decimals, char *alpha_s, char *bigfile, char *outputfile) /* bwtool_sax - main for the sax symbol program */ { struct metaBig *mb = metaBigOpen_check(bigfile, regions); struct bed *bed; int alpha = (alpha_s != NULL) ? sqlUnsigned(alpha_s) : 8; unsigned itStart = sqlUnsigned((char *)hashOptionalVal(options, "iterate-start", (alpha_s != NULL) ? alpha_s : "8")); unsigned itEnd = sqlUnsigned((char *)hashOptionalVal(options, "iterate-end", (alpha_s != NULL) ? alpha_s : "8")); unsigned window = sqlUnsigned((char *)hashOptionalVal(options, "sax-window", "0")); char *mean_s = (char *)hashOptionalVal(options, "mean", NULL); char *std_s = (char *)hashOptionalVal(options, "std", NULL); if (mb->type != isaBigWig) errAbort("%s doesn't seem to be a bigWig", bigfile); double mean = bigWigMean(mb->big.bbi); double std = bigWigStd(mb->big.bbi); if (mean_s) mean = sqlDouble(mean_s); if (std_s) std = sqlDouble(std_s); FILE *out; boolean do_std = (hashLookup(options, "std") != NULL); boolean do_mean = (hashLookup(options, "mean") != NULL); boolean bed4 = (hashLookup(options, "bed4") != NULL); boolean wig_out = (hashLookup(options, "add-wig-out") != NULL); if (do_mean || do_std) { if (!do_std || !do_mean) errAbort("if -mean is specified, -std is required, and vice versa"); else if (std <= 0) errAbort("-std must be > 0"); } out = mustOpen(outputfile, "w"); for (bed = mb->sections; bed != NULL; bed = bed->next) { /* print a header */ if ((itStart == itEnd) && !bed4) { if (bed == mb->sections) fprintf(out, "# alphabet size = %d\n", alpha); wigsax_fasta(out, mb, bed, alpha, window, mean, std); } else { if (bed == mb->sections) fprintf(out, "# alphabet size = %d\n", alpha); wigsax_bed4(out, mb, bed, alpha, window, mean, std, wig_out); } } metaBigClose(&mb); carefulClose(&out); }
void bwtool_find_thresh(struct hash *options, char *favorites, char *regions, double fill, char *thresh_type, char *thresh_s, char *bigfile, char *tmp_dir, char *outputfile) /* the other kind of finding, based on thresholding. */ { boolean inverse = (hashFindVal(options, "inverse") != NULL) ? TRUE : FALSE; enum bw_op_type op= get_bw_op_type(thresh_type, inverse); struct metaBig *mb = metaBigOpen_check(bigfile, tmp_dir, regions); double thresh = sqlDouble(thresh_s); FILE *out = mustOpen(outputfile, "w"); struct bed out_bed; struct bed *section; for (section = mb->sections; section != NULL; section = section->next) { struct perBaseWig *pbwList = perBaseWigLoadContinue(mb, section->chrom, section->chromStart, section->chromEnd); struct perBaseWig *pbw; int i, len; if (pbwList) { out_bed.chrom = pbwList->chrom; for (pbw = pbwList; pbw != NULL; pbw = pbw->next) { i = 0; len = pbw->chromEnd - pbw->chromStart; out_bed.chromStart = out_bed.chromEnd = 0; while (i < len) { while ((i < len) && (!fit_thresh(pbw->data[i], thresh, op))) i++; out_bed.chromStart = i + pbw->chromStart; while ((i < len) && (fit_thresh(pbw->data[i], thresh, op))) i++; out_bed.chromEnd = i + pbw->chromStart; if (out_bed.chromEnd > out_bed.chromStart) bedTabOutN(&out_bed, 3, out); } } perBaseWigFree(&pbwList); } } metaBigClose(&mb); carefulClose(&out); }
void bwtool_lift(struct hash *options, char *favorites, char *regions, unsigned decimals, enum wigOutType wot, char *bigfile, char *chainfile, char *outputfile) /* bwtool_lift - main for lifting program */ { struct hash *sizeHash = NULL; struct hash *chainHash = readLiftOverMapChainHash(chainfile); struct hash *gpbw = NULL; char *size_file = hashFindVal(options, "sizes"); char *bad_file = hashFindVal(options, "unlifted"); if (size_file) sizeHash = readCsizeHash(size_file); else sizeHash = qSizeHash(chainfile); gpbw = genomePbw(sizeHash); struct metaBig *mb = metaBigOpen_check(bigfile, regions); char wigfile[512]; safef(wigfile, sizeof(wigfile), "%s.tmp.wig", outputfile); FILE *out = mustOpen(wigfile, "w"); struct hashEl *elList = hashElListHash(gpbw); struct hashEl *el; verbose(2,"starting first pass\n"); do_pass1(mb, chainHash, gpbw); verbose(2, "starting second pass\n"); do_pass2(mb, chainHash, gpbw); verbose(2,"starting final pass\n"); do_final_pass(mb, chainHash, gpbw, bad_file); slSort(&elList, pbwHashElCmp); for (el = elList; el != NULL; el = el->next) { struct perBaseWig *pbw = (struct perBaseWig *)el->val; perBaseWigOutputNASkip(pbw, out, wot, decimals, NULL, FALSE, FALSE); } hashElFreeList(&elList); carefulClose(&out); hashFreeWithVals(&chainHash, freeChainHashMap); hashFreeWithVals(&gpbw, perBaseWigFree); writeBw(wigfile, outputfile, sizeHash); hashFree(&sizeHash); remove(wigfile); metaBigClose(&mb); }
void bwtool_fill(struct hash *options, char *favorites, char *regions, unsigned decimals, enum wigOutType wot, boolean condense, char *val_s, char *bigfile, char *tmp_dir, char *outputfile) /* bwtool_fill - main for filling program */ { double val = sqlDouble(val_s); struct metaBig *mb = metaBigOpen_check(bigfile, tmp_dir, regions); char wigfile[512]; safef(wigfile, sizeof(wigfile), "%s.tmp.wig", outputfile); FILE *out = mustOpen(wigfile, "w"); struct bed *section; int i; for (section = mb->sections; section != NULL; section = section->next) { struct perBaseWig *pbw = perBaseWigLoadSingleContinue(mb, section->chrom, section->chromStart, section->chromEnd, FALSE, val); perBaseWigOutput(pbw, out, wot, decimals, NULL, FALSE, condense); perBaseWigFree(&pbw); } carefulClose(&out); writeBw(wigfile, outputfile, mb->chromSizeHash); remove(wigfile); metaBigClose(&mb); }
void bwtool_matrix(struct hash *options, char *favorites, char *regions, unsigned decimals, double fill, char *range_s, char *bigfile, char *tmp_dir, char *outputfile) /* bwtool_matrix - main for matrix-creation program */ { boolean do_k = (hashFindVal(options, "cluster") != NULL) ? TRUE : FALSE; boolean do_tile = (hashFindVal(options, "tiled-averages") != NULL) ? TRUE : FALSE; boolean do_binary_matrix = (hashFindVal(options, "binary-matrix") != NULL) ? TRUE : FALSE; boolean keep_bed = (hashFindVal(options, "keep-bed") != NULL) ? TRUE : FALSE; boolean starts = (hashFindVal(options, "starts") != NULL) ? TRUE : FALSE; boolean ends = (hashFindVal(options, "ends") != NULL) ? TRUE : FALSE; boolean lf_header = (hashFindVal(options, "long-form-header") != NULL) ? TRUE : FALSE; char *centroid_file = (char *)hashFindVal(options, "cluster-centroids"); char *long_form = (char *)hashFindVal(options, "long-form"); boolean do_long_form = (long_form != NULL); unsigned left = 0, right = 0; int meta = 0; int num_parse = parse_left_right(range_s, &left, &right, &meta); boolean do_meta = (num_parse == 3); int k = (int)sqlUnsigned((char *)hashOptionalVal(options, "cluster", "0")); int tile = (int)sqlUnsigned((char *)hashOptionalVal(options, "tiled-averages", "1")); if ((do_k) && ((k < 2) || (k > 10))) errAbort("k should be between 2 and 10\n"); if ((do_tile) && (tile < 2)) errAbort("tiling should be done for larger regions"); if ((left % tile != 0) || (right % tile != 0)) errAbort("tiling should be multiple of both up and down values"); if (do_meta && starts && ends) warn("meta uses -starts and -ends anyway"); else if ((do_meta) && (starts || ends)) warn("-starts and -ends both automatically used with meta"); if (do_meta && do_tile) errAbort("meta not compatible with -tile... yet"); if (do_binary_matrix && do_long_form) errAbort("Writing binary matrix is not compatible with -long-form... yet"); struct slName *bw_names = slNameListFromComma(bigfile); struct slName *bw_name; struct slName *labels_from_file = NULL; int num_bigwigs = check_for_list_files(&bw_names, &labels_from_file, 0); struct slName *labels = setup_labels(long_form, bw_names, &labels_from_file); struct bed6 *regs = NULL; struct bed6 *regions_left = NULL, *regions_right = NULL, *regions_meta = NULL; struct perBaseMatrix *pbm = NULL; int i; if (do_meta) { if (meta == -1) { meta = calculate_meta_file(regions); fprintf(stderr, "calculated meta = %d bases\n", meta); } regions_left = load_and_recalculate_coords(regions, left, 0, FALSE, TRUE, FALSE); regions_right = load_and_recalculate_coords(regions, 0, right, FALSE, FALSE, TRUE); regions_meta = (meta > 0) ? readBed6Soft(regions) : NULL; } else regs = load_and_recalculate_coords(regions, left, right, FALSE, starts, ends); for (bw_name = bw_names; bw_name != NULL; bw_name = bw_name->next) { struct metaBig *mb = metaBigOpenWithTmpDir(bw_name->name, tmp_dir, NULL); if (do_meta) { struct perBaseMatrix *one_pbm = load_perBaseMatrix(mb, regions_left, fill); struct perBaseMatrix *right_pbm = load_perBaseMatrix(mb, regions_right, fill); if (meta > 0) { struct perBaseMatrix *meta_pbm = load_meta_perBaseMatrix(mb, regions_meta, meta, fill); fuse_pbm(&one_pbm, &meta_pbm, TRUE); } fuse_pbm(&one_pbm, &right_pbm, TRUE); fuse_pbm(&pbm, &one_pbm, FALSE); } else { struct perBaseMatrix *one_pbm = (do_tile) ? load_ave_perBaseMatrix(mb, regs, tile, fill) : load_perBaseMatrix(mb, regs, fill); fuse_pbm(&pbm, &one_pbm, FALSE); } metaBigClose(&mb); } if (do_k) { struct cluster_bed_matrix *cbm = NULL; /* ordered by cluster with label in first column */ cbm = init_cbm_from_pbm(pbm, k); do_kmeans_sort(cbm, 0.001, TRUE); if (do_long_form) { output_cluster_matrix_long(cbm, labels, keep_bed, outputfile, lf_header); } else { if (do_binary_matrix) { output_binary_cluster_matrix(cbm, keep_bed, outputfile); } else { output_cluster_matrix(cbm, decimals, keep_bed, outputfile); } } if (centroid_file) { output_centroids(cbm, centroid_file, decimals); } free_cbm(&cbm); } else { if (do_long_form) { output_matrix_long(pbm, decimals, labels, keep_bed, left, right, tile, lf_header, outputfile); } else { if (do_binary_matrix) { output_binary_matrix(pbm, keep_bed, outputfile); } else { output_matrix(pbm, decimals, keep_bed, outputfile); } } /* unordered, no label */ free_perBaseMatrix(&pbm); } if (do_meta) { bed6FreeList(®ions_left); bed6FreeList(®ions_right); if (meta > 0) bed6FreeList(®ions_meta); } else bed6FreeList(®s); }
void bwtool_find_max(struct hash *options, char *favorites, char *regions, double fill, char *bigfile, char *tmp_dir, char *outputfile) /* find max points in a range */ { boolean med_base = (hashFindVal(options, "median-base") != NULL) ? TRUE : FALSE; boolean with_max = (hashFindVal(options, "with-max") != NULL) ? TRUE : FALSE; struct metaBig *mb = metaBigOpen_check(bigfile, tmp_dir, NULL); FILE *out = mustOpen(outputfile, "w"); struct bed6 *sections6 = readBed6Soft(regions); struct bed *sections = bed12FromBed6(§ions6); struct bed *section; for (section = sections; section != NULL; section = section->next) { struct perBaseWig *pbwList = perBaseWigLoadContinue(mb, section->chrom, section->chromStart, section->chromEnd); struct perBaseWig *pbw; struct slInt *ii; int i, size; double max = -DBL_MAX; struct slInt *list = NULL; for (pbw = pbwList; pbw != NULL; pbw = pbw->next) { int pbw_off = pbw->chromStart - section->chromStart; for (i = 0; i < pbw->len; i++) { if (pbw->data[i] > max) { slFreeList(&list); struct slInt *new_int = slIntNew(i + pbw_off); slAddHead(&list, new_int); max = pbw->data[i]; } else if (pbw->data[i] == max) { struct slInt *new_int = slIntNew(i + pbw_off); slAddHead(&list, new_int); } } } slReverse(&list); if (list) { size = slCount(list); if (med_base) { section->blockCount = 1; AllocArray(section->blockSizes, sizeof(int)); AllocArray(section->chromStarts, sizeof(int)); section->blockSizes[0] = 1; section->chromStarts[0] = median_base_calc(&list); } else { section->blockCount = size; AllocArray(section->blockSizes, sizeof(int) * size); AllocArray(section->chromStarts, sizeof(int) * size); for (i = 0, ii = list; (i < size) && (ii != NULL); i++, ii = ii->next) { section->blockSizes[i] = 1; section->chromStarts[i] = ii->val; } } if (!with_max) bedTabOutN(section, 12, out); else { bedOutputN(section, 12, out, '\t', '\t'); fprintf(out, "%f\n", max); } slFreeList(&list); } perBaseWigFree(&pbwList); } metaBigClose(&mb); bedFreeList(§ions); carefulClose(&out); }
void bwtool_roll(struct hash *options, char *favorites, char *regions, unsigned decimals, double fill, enum wigOutType wot, char *command, char *size_s, char *bigfile, char *tmp_dir, char *outputfile) /* bwtool_roll - main for the rolling-mean program */ /* this function is too long. it'd be nice to break it up some time. */ { struct metaBig *mb = metaBigOpenWithTmpDir(bigfile, tmp_dir, regions); int step = (int)sqlUnsigned((char *)hashOptionalVal(options, "step", "1")); int max_na = (int)sqlSigned((char *)hashOptionalVal(options, "max-NA", "-1")); char *min_mean_s = (char *)hashOptionalVal(options, "min-mean", "unused"); double min_mean = -DBL_MAX; if (!sameString(min_mean_s,"unused")) min_mean = sqlDouble(min_mean_s); int size = sqlSigned(size_s); if (max_na == -1) max_na = size/2; else if (max_na > size) max_na = size - 1; if (size < 1) errAbort("size must be >= 1 for bwtool window"); FILE *out = (outputfile) ? mustOpen(outputfile, "w") : stdout; struct bed *section; boolean broken = TRUE; /* for headers */ enum roll_command com; if (sameWord(command, "mean")) com = roll_mean; else if (sameWord(command, "total")) com = roll_total; else errAbort("Pick a roll command: mean or total"); for (section = mb->sections; section != NULL; section = section->next) { if (size <= section->chromEnd - section->chromStart) { struct perBaseWig *pbw = perBaseWigLoadSingleContinue(mb, section->chrom, section->chromStart, section->chromEnd, FALSE, fill); int i, j; int len = section->chromEnd - section->chromStart; double total = 0; int num_na = 0; /* load data */ for (i = 0; i < size; i++) add_to_tots(pbw->data[i], &num_na, &total); i = 0; do { int s = pbw->chromStart + i; int e = pbw->chromStart + i + size; int st = step; double mean = total/(size - num_na); /* the next two calculations center it */ s += size/2 - step/2; e = s + step; /* output */ if ((num_na <= max_na) && (mean >= min_mean)) { double out_val; if (com == roll_mean) out_val = mean; else out_val = total; if (wot == fixStepOut) { if (broken) fprintf(out, "fixedStep chrom=%s start=%d step=%d span=%d\n", pbw->chrom, s+1, step, step); fprintf(out, "%0.*f\n", decimals, out_val); } else if (wot == varStepOut) { if (broken) fprintf(out, "variableStep chrom=%s span=%d\n", pbw->chrom, step); fprintf(out, "%d\t%0.*f\n", s+1, decimals, out_val); } else fprintf(out, "%s\t%d\t%d\t%0.*f\n", pbw->chrom, s, e, decimals, out_val); broken = FALSE; } else broken = TRUE; /* move */ while (st > 0) { if (i + size <= pbw->len) { add_to_tots(pbw->data[i+size], &num_na, &total); sub_from_tots(pbw->data[i], &num_na, &total); } else break; i++; st--; } } while (i <= pbw->len - size); perBaseWigFree(&pbw); } } metaBigClose(&mb); carefulClose(&out); }
void bwtool_window(struct hash *options, char *favorites, char *regions, unsigned decimals, double fill, char *size_s, char *bigfile, char *output_file) /* bwtool_window - main for the windowing program */ { struct metaBig *mb = metaBigOpen(bigfile, regions); boolean skip_na = (hashFindVal(options, "skip-NA") != NULL) ? TRUE : FALSE; if (!isnan(fill) && skip_na) errAbort("cannot use -skip_na with -fill"); boolean center = (hashFindVal(options, "center") != NULL) ? TRUE : FALSE; int step = (int)sqlUnsigned((char *)hashOptionalVal(options, "step", "1")); int size = sqlSigned(size_s); if (size < 1) errAbort("size must be >= 1 for bwtool window"); FILE *out = (output_file) ? mustOpen(output_file, "w") : stdout; struct bed *section; for (section = mb->sections; section != NULL; section = section->next) { if (size <= section->chromEnd - section->chromStart) { struct perBaseWig *pbw = perBaseWigLoadSingleContinue(mb, section->chrom, section->chromStart, section->chromEnd, FALSE, fill); int i, j; for (i = 0; i <= pbw->len - size; i += step) { int s = pbw->chromStart + i; int e = pbw->chromStart + i + size; if (center) { s += size/2 - step/2; e = s + step; } boolean has_NA = FALSE; if (skip_na) { for (j = i; j < i + size; j++) if (isnan(pbw->data[j])) { i = j-step+1; has_NA = TRUE; break; } } if (!has_NA) { fprintf(out, "%s\t%d\t%d\t", pbw->chrom, s, e); for (j = i; j < i + size; j++) if (isnan(pbw->data[j]) && (j == i + size - 1)) fprintf(out, "NA\n"); else if (isnan(pbw->data[j])) fprintf(out, "NA,"); else if (j == i + size - 1) fprintf(out, "%0.*f\n", decimals, pbw->data[j]); else fprintf(out, "%0.*f,", decimals, pbw->data[j]); } } perBaseWigFree(&pbw); } } metaBigClose(&mb); carefulClose(&out); }
void bwtool_split(struct hash *options, char *regions, char *size_s, char *bigfile, char *tmp_dir, char *outputfile) /* bwtool_split - main for the splitting program */ { struct metaBig *mb = metaBigOpenWithTmpDir(bigfile, tmp_dir, regions); FILE *output = mustOpen(outputfile, "w"); struct bed *section; struct bed *splitList = NULL; int size = 0; unsigned min_gap = sqlUnsigned((char *)hashOptionalVal(options, "min_gap", "1")); unsigned chunk_size = sqlUnsigned(size_s); char chrom[256] = ""; int start = -1, end = 0; boolean over_size = FALSE; int ix = 1; int gap = 0; for (section = mb->sections; section != NULL; section = section->next) { struct perBaseWig *pbwList = perBaseWigLoadContinue(mb, section->chrom, section->chromStart, section->chromEnd); struct perBaseWig *pbw; for (pbw = pbwList; pbw != NULL; pbw = pbw->next) { int length = pbw->chromEnd - pbw->chromStart; if (end > 0) gap = pbw->chromStart - end; if (!sameString(chrom, pbw->chrom)) { if (!sameString(chrom, "")) slAddHead(&splitList, newBed(chrom, start, end)); strcpy(chrom, pbw->chrom); start = pbw->chromStart; end = pbw->chromEnd; if (size + length > chunk_size) size = length; else size += length; } else { if ((size + length + gap > chunk_size) && (gap >= min_gap)) { slAddHead(&splitList, newBed(chrom, start, end)); start = pbw->chromStart; end = pbw->chromEnd; size = length; } else { size += length + gap; end = pbw->chromEnd; } } } perBaseWigFreeList(&pbwList); } slAddHead(&splitList, newBed(chrom, start, end)); slReverse(&splitList); for (section = splitList; section != NULL; section = section->next) { fprintf(output, "%s\t%d\t%d\n", section->chrom, section->chromStart, section->chromEnd); } carefulClose(&output); metaBigClose(&mb); bedFreeList(&splitList); }