Example #1
0
int main(int argc, char *argv[])
/* Process command line. */
{
    if (argc != 6)
	errAbort("bad running of bwmake");
    struct metaBig *mb = metaBigOpen(argv[1], NULL);
    FILE *out = mustOpen(argv[2], "w");
    int decimals = sqlSigned(argv[3]);
    struct bed *section;
    double na = NANUM;
    boolean condense = FALSE;
    enum wigOutType wot = get_wig_out_type(argv[4]);
    if (sameString(argv[5], "condense"))
	condense = TRUE;
    for (section = mb->sections; section != NULL; section = section->next)
    {
	struct perBaseWig *pbw = perBaseWigLoadSingleContinue(mb, section->chrom, section->chromStart, 
							      section->chromEnd, FALSE, na);
	perBaseWigOutputNASkip(pbw, out, wot, decimals, NULL, FALSE, condense);
	perBaseWigFree(&pbw);
    }
    carefulClose(&out);
    metaBigClose(&mb);
    return 0;
}
Example #2
0
/* Load all the regions from a wig or bigWig into a list of arrays basically. */
struct perBaseWig* perBaseWigLoad(char* wigFile, char* chrom, int start, int end)
{
    struct metaBig* mb = metaBigOpen(wigFile, NULL);
    if (mb->type != isaBigWig) {
        metaBigClose(&mb);
        return NULL;
    }
    struct perBaseWig* list;
    list = perBaseWigLoadContinue(mb, chrom, start, end);
    metaBigClose(&mb);
    return list;
}
Example #3
0
struct perBaseWig* perBaseWigLoadSingle(char* wigFile, char* chrom, int start, int end, boolean reverse, double fill)
/* Load all the regions into one perBaseWig, but with gaps filled  */
/* in with NA value */
{
    struct metaBig* mb = metaBigOpen(wigFile, NULL);
    if (mb->type != isaBigWig) {
        metaBigClose(&mb);
        return NULL;
    }
    struct perBaseWig* list;
    list = perBaseWigLoadSingleContinue(mb, chrom, start, end, reverse, fill);
    metaBigClose(&mb);
    return list;
}
Example #4
0
void bwtool_matrix(struct hash *options, char *favorites, char *regions, unsigned decimals, 
		   double fill, char *range_s, char *bigfile, char *outputfile)
/* bwtool_matrix - main for matrix-creation program */
{
    boolean do_k = (hashFindVal(options, "cluster") != NULL) ? TRUE : FALSE;
    boolean do_tile = (hashFindVal(options, "tiled-averages") != NULL) ? TRUE : FALSE;
    boolean keep_bed = (hashFindVal(options, "keep-bed") != NULL) ? TRUE : FALSE;
    boolean starts = (hashFindVal(options, "starts") != NULL) ? TRUE : FALSE;
    boolean ends = (hashFindVal(options, "ends") != NULL) ? TRUE : FALSE;
    boolean lf_header = (hashFindVal(options, "long-form-header") != NULL) ? TRUE : FALSE;
    char *centroid_file = (char *)hashFindVal(options, "cluster-centroids");
    char *long_form = (char *)hashFindVal(options, "long-form");
    boolean do_long_form = (long_form != NULL);
    unsigned left = 0, right = 0;
    int meta = 0;
    int num_parse = parse_left_right(range_s, &left, &right, &meta);
    boolean do_meta = (num_parse == 3);
    int k = (int)sqlUnsigned((char *)hashOptionalVal(options, "cluster", "0"));
    int tile = (int)sqlUnsigned((char *)hashOptionalVal(options, "tiled-averages", "1"));
    if ((do_k) && ((k < 2) || (k > 10)))
	errAbort("k should be between 2 and 10\n");
    if ((do_tile) && (tile < 2))
	errAbort("tiling should be done for larger regions");
    if ((left % tile != 0) || (right % tile != 0))
	errAbort("tiling should be multiple of both up and down values");
    if (do_meta && starts && ends)
	warn("meta uses -starts and -ends anyway");
    else if ((do_meta) && (starts || ends))
	warn("-starts and -ends both automatically used with meta");
    if (do_meta && tile)
	errAbort("meta not compatible with -tile... yet");
    struct slName *bw_names = slNameListFromComma(bigfile);
    struct slName *bw_name;
    struct slName *labels_from_file = NULL;
    int num_bigwigs = check_for_list_files(&bw_names, &labels_from_file);
    struct slName *labels = setup_labels(long_form, bw_names, &labels_from_file);
    struct bed6 *regs = NULL;
    struct bed6 *regions_left = NULL, *regions_right = NULL, *regions_meta = NULL;
    struct perBaseMatrix *pbm = NULL;
    int i;
    if (do_meta)
    {
	if (meta == -1) 
	{
	    meta = calculate_meta_file(regions);
	    fprintf(stderr, "calculated meta = %d bases\n", meta);
	}
	regions_left = load_and_recalculate_coords(regions, left, 0, FALSE, TRUE, FALSE);
	regions_right = load_and_recalculate_coords(regions, 0, right, FALSE, FALSE, TRUE);
	regions_meta = (meta > 0) ? readBed6Soft(regions) : NULL;
    }
    else
	regs = load_and_recalculate_coords(regions, left, right, FALSE, starts, ends);
    for (bw_name = bw_names; bw_name != NULL; bw_name = bw_name->next)
    {
	struct metaBig *mb = metaBigOpen(bw_name->name, NULL);
	if (do_meta)
	{
	    struct perBaseMatrix *one_pbm = load_perBaseMatrix(mb, regions_left, fill);
	    struct perBaseMatrix *right_pbm = load_perBaseMatrix(mb, regions_right, fill);
	    if (meta > 0)
	    {
		struct perBaseMatrix *meta_pbm = load_meta_perBaseMatrix(mb, regions_meta, meta, fill);
		fuse_pbm(&one_pbm, &meta_pbm, TRUE);
	    }
	    fuse_pbm(&one_pbm, &right_pbm, TRUE);
	    fuse_pbm(&pbm, &one_pbm, FALSE);
	}
	else
	{
	    struct perBaseMatrix *one_pbm = (do_tile) ? load_ave_perBaseMatrix(mb, regs, tile, fill) : 
		load_perBaseMatrix(mb, regs, fill);
	    fuse_pbm(&pbm, &one_pbm, FALSE);
	}
	metaBigClose(&mb);
    }
    if (do_k)
    {
	struct cluster_bed_matrix *cbm = NULL;
	/* ordered by cluster with label in first column */
	cbm = init_cbm_from_pbm(pbm, k);
	do_kmeans_sort(cbm, 0.001, TRUE);
	if (do_long_form)
	    output_cluster_matrix_long(cbm, labels, keep_bed, outputfile, lf_header);
	else
	    output_cluster_matrix(cbm, decimals, keep_bed, outputfile);
	if (centroid_file)
	    output_centroids(cbm, centroid_file, decimals);
	free_cbm(&cbm);
    }
    else 
    {
	if (do_long_form)
	    output_matrix_long(pbm, decimals, labels, keep_bed, left, right, tile, lf_header, outputfile);
	else
	    output_matrix(pbm, decimals, keep_bed, outputfile);
	/* unordered, no label  */
	free_perBaseMatrix(&pbm);
    }
    if (do_meta)
    {
	bed6FreeList(&regions_left);
	bed6FreeList(&regions_right);
	if (meta > 0)
	    bed6FreeList(&regions_meta);
    }
    else
	bed6FreeList(&regs);
}
Example #5
0
void bwtool_window(struct hash *options, char *favorites, char *regions, unsigned decimals, 
                   double fill, char *size_s, char *bigfile, char *output_file)
/* bwtool_window - main for the windowing program */
{
    struct metaBig *mb = metaBigOpen(bigfile, regions);
    boolean skip_na = (hashFindVal(options, "skip-NA") != NULL) ? TRUE : FALSE;
    if (!isnan(fill) && skip_na)
	errAbort("cannot use -skip_na with -fill");
    boolean center = (hashFindVal(options, "center") != NULL) ? TRUE : FALSE;
    int step = (int)sqlUnsigned((char *)hashOptionalVal(options, "step", "1"));
    int size = sqlSigned(size_s);
    if (size < 1)
	errAbort("size must be >= 1 for bwtool window");
    FILE *out = (output_file) ? mustOpen(output_file, "w") : stdout;
    struct bed *section;
    for (section = mb->sections; section != NULL; section = section->next)
    {
	if (size <= section->chromEnd - section->chromStart)
	{
	    struct perBaseWig *pbw = perBaseWigLoadSingleContinue(mb, section->chrom, section->chromStart,
								  section->chromEnd, FALSE, fill);
	    int i, j;
	    for (i = 0; i <= pbw->len - size; i += step)
	    {
		int s = pbw->chromStart + i;
		int e = pbw->chromStart + i + size;
		if (center)
		{
		    s += size/2 - step/2;
		    e = s + step;
		}
		boolean has_NA = FALSE;
		if (skip_na)
		{
		    for (j = i; j < i + size; j++)
			if (isnan(pbw->data[j]))
			{
			    i = j-step+1;
			    has_NA = TRUE;
			    break;
			}
		}
		if (!has_NA)
 		{
		    fprintf(out, "%s\t%d\t%d\t", pbw->chrom, s, e);
		    for (j = i; j < i + size; j++)
			if (isnan(pbw->data[j]) && (j == i + size - 1))
			    fprintf(out, "NA\n");
			else if (isnan(pbw->data[j]))
			    fprintf(out, "NA,");
			else if (j == i + size - 1)
			    fprintf(out, "%0.*f\n", decimals, pbw->data[j]);
			else
			    fprintf(out, "%0.*f,", decimals, pbw->data[j]);
		}
	    }
	    perBaseWigFree(&pbw);
	}
    }
    metaBigClose(&mb);
    carefulClose(&out);
}