int main(int argc, char *argv[]) /* Process command line. */ { if (argc != 6) errAbort("bad running of bwmake"); struct metaBig *mb = metaBigOpen(argv[1], NULL); FILE *out = mustOpen(argv[2], "w"); int decimals = sqlSigned(argv[3]); struct bed *section; double na = NANUM; boolean condense = FALSE; enum wigOutType wot = get_wig_out_type(argv[4]); if (sameString(argv[5], "condense")) condense = TRUE; for (section = mb->sections; section != NULL; section = section->next) { struct perBaseWig *pbw = perBaseWigLoadSingleContinue(mb, section->chrom, section->chromStart, section->chromEnd, FALSE, na); perBaseWigOutputNASkip(pbw, out, wot, decimals, NULL, FALSE, condense); perBaseWigFree(&pbw); } carefulClose(&out); metaBigClose(&mb); return 0; }
/* Load all the regions from a wig or bigWig into a list of arrays basically. */ struct perBaseWig* perBaseWigLoad(char* wigFile, char* chrom, int start, int end) { struct metaBig* mb = metaBigOpen(wigFile, NULL); if (mb->type != isaBigWig) { metaBigClose(&mb); return NULL; } struct perBaseWig* list; list = perBaseWigLoadContinue(mb, chrom, start, end); metaBigClose(&mb); return list; }
struct perBaseWig* perBaseWigLoadSingle(char* wigFile, char* chrom, int start, int end, boolean reverse, double fill) /* Load all the regions into one perBaseWig, but with gaps filled */ /* in with NA value */ { struct metaBig* mb = metaBigOpen(wigFile, NULL); if (mb->type != isaBigWig) { metaBigClose(&mb); return NULL; } struct perBaseWig* list; list = perBaseWigLoadSingleContinue(mb, chrom, start, end, reverse, fill); metaBigClose(&mb); return list; }
void bwtool_matrix(struct hash *options, char *favorites, char *regions, unsigned decimals, double fill, char *range_s, char *bigfile, char *outputfile) /* bwtool_matrix - main for matrix-creation program */ { boolean do_k = (hashFindVal(options, "cluster") != NULL) ? TRUE : FALSE; boolean do_tile = (hashFindVal(options, "tiled-averages") != NULL) ? TRUE : FALSE; boolean keep_bed = (hashFindVal(options, "keep-bed") != NULL) ? TRUE : FALSE; boolean starts = (hashFindVal(options, "starts") != NULL) ? TRUE : FALSE; boolean ends = (hashFindVal(options, "ends") != NULL) ? TRUE : FALSE; boolean lf_header = (hashFindVal(options, "long-form-header") != NULL) ? TRUE : FALSE; char *centroid_file = (char *)hashFindVal(options, "cluster-centroids"); char *long_form = (char *)hashFindVal(options, "long-form"); boolean do_long_form = (long_form != NULL); unsigned left = 0, right = 0; int meta = 0; int num_parse = parse_left_right(range_s, &left, &right, &meta); boolean do_meta = (num_parse == 3); int k = (int)sqlUnsigned((char *)hashOptionalVal(options, "cluster", "0")); int tile = (int)sqlUnsigned((char *)hashOptionalVal(options, "tiled-averages", "1")); if ((do_k) && ((k < 2) || (k > 10))) errAbort("k should be between 2 and 10\n"); if ((do_tile) && (tile < 2)) errAbort("tiling should be done for larger regions"); if ((left % tile != 0) || (right % tile != 0)) errAbort("tiling should be multiple of both up and down values"); if (do_meta && starts && ends) warn("meta uses -starts and -ends anyway"); else if ((do_meta) && (starts || ends)) warn("-starts and -ends both automatically used with meta"); if (do_meta && tile) errAbort("meta not compatible with -tile... yet"); struct slName *bw_names = slNameListFromComma(bigfile); struct slName *bw_name; struct slName *labels_from_file = NULL; int num_bigwigs = check_for_list_files(&bw_names, &labels_from_file); struct slName *labels = setup_labels(long_form, bw_names, &labels_from_file); struct bed6 *regs = NULL; struct bed6 *regions_left = NULL, *regions_right = NULL, *regions_meta = NULL; struct perBaseMatrix *pbm = NULL; int i; if (do_meta) { if (meta == -1) { meta = calculate_meta_file(regions); fprintf(stderr, "calculated meta = %d bases\n", meta); } regions_left = load_and_recalculate_coords(regions, left, 0, FALSE, TRUE, FALSE); regions_right = load_and_recalculate_coords(regions, 0, right, FALSE, FALSE, TRUE); regions_meta = (meta > 0) ? readBed6Soft(regions) : NULL; } else regs = load_and_recalculate_coords(regions, left, right, FALSE, starts, ends); for (bw_name = bw_names; bw_name != NULL; bw_name = bw_name->next) { struct metaBig *mb = metaBigOpen(bw_name->name, NULL); if (do_meta) { struct perBaseMatrix *one_pbm = load_perBaseMatrix(mb, regions_left, fill); struct perBaseMatrix *right_pbm = load_perBaseMatrix(mb, regions_right, fill); if (meta > 0) { struct perBaseMatrix *meta_pbm = load_meta_perBaseMatrix(mb, regions_meta, meta, fill); fuse_pbm(&one_pbm, &meta_pbm, TRUE); } fuse_pbm(&one_pbm, &right_pbm, TRUE); fuse_pbm(&pbm, &one_pbm, FALSE); } else { struct perBaseMatrix *one_pbm = (do_tile) ? load_ave_perBaseMatrix(mb, regs, tile, fill) : load_perBaseMatrix(mb, regs, fill); fuse_pbm(&pbm, &one_pbm, FALSE); } metaBigClose(&mb); } if (do_k) { struct cluster_bed_matrix *cbm = NULL; /* ordered by cluster with label in first column */ cbm = init_cbm_from_pbm(pbm, k); do_kmeans_sort(cbm, 0.001, TRUE); if (do_long_form) output_cluster_matrix_long(cbm, labels, keep_bed, outputfile, lf_header); else output_cluster_matrix(cbm, decimals, keep_bed, outputfile); if (centroid_file) output_centroids(cbm, centroid_file, decimals); free_cbm(&cbm); } else { if (do_long_form) output_matrix_long(pbm, decimals, labels, keep_bed, left, right, tile, lf_header, outputfile); else output_matrix(pbm, decimals, keep_bed, outputfile); /* unordered, no label */ free_perBaseMatrix(&pbm); } if (do_meta) { bed6FreeList(®ions_left); bed6FreeList(®ions_right); if (meta > 0) bed6FreeList(®ions_meta); } else bed6FreeList(®s); }
void bwtool_window(struct hash *options, char *favorites, char *regions, unsigned decimals, double fill, char *size_s, char *bigfile, char *output_file) /* bwtool_window - main for the windowing program */ { struct metaBig *mb = metaBigOpen(bigfile, regions); boolean skip_na = (hashFindVal(options, "skip-NA") != NULL) ? TRUE : FALSE; if (!isnan(fill) && skip_na) errAbort("cannot use -skip_na with -fill"); boolean center = (hashFindVal(options, "center") != NULL) ? TRUE : FALSE; int step = (int)sqlUnsigned((char *)hashOptionalVal(options, "step", "1")); int size = sqlSigned(size_s); if (size < 1) errAbort("size must be >= 1 for bwtool window"); FILE *out = (output_file) ? mustOpen(output_file, "w") : stdout; struct bed *section; for (section = mb->sections; section != NULL; section = section->next) { if (size <= section->chromEnd - section->chromStart) { struct perBaseWig *pbw = perBaseWigLoadSingleContinue(mb, section->chrom, section->chromStart, section->chromEnd, FALSE, fill); int i, j; for (i = 0; i <= pbw->len - size; i += step) { int s = pbw->chromStart + i; int e = pbw->chromStart + i + size; if (center) { s += size/2 - step/2; e = s + step; } boolean has_NA = FALSE; if (skip_na) { for (j = i; j < i + size; j++) if (isnan(pbw->data[j])) { i = j-step+1; has_NA = TRUE; break; } } if (!has_NA) { fprintf(out, "%s\t%d\t%d\t", pbw->chrom, s, e); for (j = i; j < i + size; j++) if (isnan(pbw->data[j]) && (j == i + size - 1)) fprintf(out, "NA\n"); else if (isnan(pbw->data[j])) fprintf(out, "NA,"); else if (j == i + size - 1) fprintf(out, "%0.*f\n", decimals, pbw->data[j]); else fprintf(out, "%0.*f,", decimals, pbw->data[j]); } } perBaseWigFree(&pbw); } } metaBigClose(&mb); carefulClose(&out); }