Exemplo n.º 1
0
void do_pass2(struct metaBig *mb, struct hash *chainHash, struct hash *gpbw)
/* so now that everything is either zero, 1.0, or more, make everything that isn't 1.0 an NA */
{
    const double na = NANUM;
    struct bed *section;
    for (section = mb->sections; section != NULL; section = section->next)
    {
	struct perBaseWig *pbwList = perBaseWigLoadContinue(mb, section->chrom, section->chromStart, section->chromEnd);
	struct perBaseWig *pbw;
	for (pbw = pbwList; pbw != NULL; pbw = pbw->next)
	{
	    int i;
	    for (i = 0; i < pbw->len; i++)
	    {
		char *dest_chrom = NULL;
		int dest_start = 0;
		enum remapResult rmr = remapBase(chainHash, pbw->chrom, pbw->chromStart + i, &dest_chrom, &dest_start);
		if (rmr == lifted)
		{
		    struct perBaseWig *dest_chrom_pbw = (struct perBaseWig *)hashFindVal(gpbw, dest_chrom);
		    if (dest_chrom_pbw && ((int)dest_chrom_pbw->data[dest_start] != 1))
			dest_chrom_pbw->data[dest_start] = na;
		}
	    }
	}
	perBaseWigFreeList(&pbwList);
    }    
}
Exemplo n.º 2
0
/* Load all the regions from a wig or bigWig into a list of arrays basically. */
struct perBaseWig* perBaseWigLoad(char* wigFile, char* chrom, int start, int end)
{
    struct metaBig* mb = metaBigOpen(wigFile, NULL);
    if (mb->type != isaBigWig) {
        metaBigClose(&mb);
        return NULL;
    }
    struct perBaseWig* list;
    list = perBaseWigLoadContinue(mb, chrom, start, end);
    metaBigClose(&mb);
    return list;
}
Exemplo n.º 3
0
Arquivo: sax.c Projeto: hjanime/bwtool
void wigsax_bed4(FILE *out, struct metaBig *mb, struct bed *region, int alpha, int window, double mean, double std, boolean wig_out)
/* output the bed4 style when it's being run over an interval */
{
    struct bed *outBedList = NULL;
    struct bed *bed;
    struct perBaseWig *wigList = perBaseWigLoadContinue(mb, region->chrom, region->chromStart, region->chromEnd);
    struct perBaseWig *pbw;
    struct slDouble *datList = NULL;
    struct slDouble *oneDub;
    /* Maybe sometime I'll put back the option to use multiple alphabets at a time. */
    int alphaS = alpha;
    int alphaE = alpha;
    for (pbw = wigList; pbw != NULL; pbw = pbw->next)
    {
	struct bed *bedList = make_initial_bed_list(pbw, alphaE - alphaS + 2);
	int i, j;
	int data_len = pbw->chromEnd - pbw->chromStart; 
	for (i = alphaS; i <= alphaE; i++)
	{
	    char *sax = sax_from_array_force_window(pbw->data, data_len, i, window, mean, std);
	    for (j = 0, bed = bedList; ((j < data_len) && (bed != NULL)); j++, bed = bed->next)
		bed->name[i-alphaS] = sax[j];
	    freeMem(sax);
	}
	if (wig_out)
	    for (j = 0; j < data_len; j++)
	    {
		struct slDouble *dub = newSlDouble(pbw->data[j]);
		slAddHead(&datList, dub);
	    }
	while ((bed = slPopHead(&bedList)) != NULL)
	    slAddHead(&outBedList, bed);
    }
    slReverse(&outBedList);
    slReverse(&datList);
    perBaseWigFreeList(&wigList);
    oneDub = datList;
    for (bed = outBedList; bed != NULL; bed = bed->next)
    {
	bedOutputN(bed, 4, out, '\t', (wig_out) ? '\t' : '\n');
	if (wig_out)
	{
	    if (oneDub == NULL)
		errAbort("data inconsistency. programmer error\n");
	    fprintf(out, "%0.4f\n", oneDub->val);
	    oneDub = oneDub->next;
	}
    }
    bedFreeList(&outBedList);
    slFreeList(&datList);
}
Exemplo n.º 4
0
void do_final_pass(struct metaBig *mb, struct hash *chainHash, struct hash *gpbw, char *bad_file)
/* now everything is 1.0 or NA.  copy data into destination */
{
    struct bed *section;
    FILE *bad = NULL;
    if (bad_file)
	bad = mustOpen(bad_file, "w");
    for (section = mb->sections; section != NULL; section = section->next)
    {
	struct perBaseWig *pbwList = perBaseWigLoadContinue(mb, section->chrom, section->chromStart, section->chromEnd);
	struct perBaseWig *pbw;
	for (pbw = pbwList; pbw != NULL; pbw = pbw->next)
	{
	    int i;
	    for (i = 0; i < pbw->len; i++)
	    {
		char *dest_chrom = NULL;
		int dest_start = 0;
		enum remapResult rmr = remapBase(chainHash, pbw->chrom, pbw->chromStart + i, &dest_chrom, &dest_start);
		if (rmr == lifted)
		{
		    struct perBaseWig *dest_chrom_pbw = (struct perBaseWig *)hashFindVal(gpbw, dest_chrom);
		    if (dest_chrom_pbw && (!isnan(dest_chrom_pbw->data[dest_start])))
			dest_chrom_pbw->data[dest_start] = pbw->data[i];
		    else if (bad && isnan(dest_chrom_pbw->data[dest_start]))
			fprintf(bad, "%s\t%d\tmulti_mapped_%s_%d\n", pbw->chrom, pbw->chromStart+i, dest_chrom, dest_start);
		}
		else if (bad)
		{
		    if (rmr == duplicated)
			fprintf(bad, "%s\t%d\tduplicated_in_destination\n", pbw->chrom, pbw->chromStart+i);
		    else if (rmr == deleted)
			fprintf(bad, "%s\t%d\tdeleted_in_destination\n", pbw->chrom, pbw->chromStart+i);
		    else
			fprintf(bad, "%s\t%d\tproblem_lifting\n", pbw->chrom, pbw->chromStart+i);
		}
	    }
	}
	perBaseWigFreeList(&pbwList);
    }    
    if (bad_file)
	carefulClose(&bad);
}
Exemplo n.º 5
0
void bwtool_find_thresh(struct hash *options, char *favorites, char *regions, double fill,
			char *thresh_type, char *thresh_s, char *bigfile, char *tmp_dir, char *outputfile)
/* the other kind of finding, based on thresholding. */
{
    boolean inverse = (hashFindVal(options, "inverse") != NULL) ? TRUE : FALSE;
    enum bw_op_type op= get_bw_op_type(thresh_type, inverse);
    struct metaBig *mb = metaBigOpen_check(bigfile, tmp_dir, regions);
    double thresh = sqlDouble(thresh_s);
    FILE *out = mustOpen(outputfile, "w");
    struct bed out_bed;
    struct bed *section;
    for (section = mb->sections; section != NULL; section = section->next)
    {
	struct perBaseWig *pbwList = perBaseWigLoadContinue(mb, section->chrom, section->chromStart,
							      section->chromEnd);
	struct perBaseWig *pbw;
	int i, len;
	if (pbwList)
	{
	    out_bed.chrom = pbwList->chrom;
	    for (pbw = pbwList; pbw != NULL; pbw = pbw->next)
	    {
		i = 0;
		len = pbw->chromEnd - pbw->chromStart;
		out_bed.chromStart = out_bed.chromEnd = 0;
		while (i < len)
		{
		    while ((i < len) && (!fit_thresh(pbw->data[i], thresh, op)))
			i++;
		    out_bed.chromStart = i + pbw->chromStart;
		    while ((i < len) && (fit_thresh(pbw->data[i], thresh, op)))
			i++;
		    out_bed.chromEnd = i + pbw->chromStart;
		    if (out_bed.chromEnd > out_bed.chromStart)
			bedTabOutN(&out_bed, 3, out);
		}
	    }
	perBaseWigFree(&pbwList);
	}
    }
    metaBigClose(&mb);
    carefulClose(&out);
}
Exemplo n.º 6
0
struct perBaseWig* perBaseWigLoadSingleContinue(struct metaBig* mb, char* chrom,
    int start, int end, boolean reverse, double fill)
/* Load all the regions into one perBaseWig, but with gaps filled  */
/* in with NA value */
{
    if (mb->type != isaBigWig)
        errAbort("tried to load data from a non-bigWig file");
    struct perBaseWig* list;
    struct perBaseWig* region;
    struct perBaseWig* wholething = NULL;
    int size = end - start;
    int i, j;
    int s = start, e = end;
    if (!hashFindVal(mb->chromSizeHash, chrom)) {
        /* if the chrom isn't in the bigWig's chrom-size hash, return values of NA */
        wholething = alloc_fill_perBaseWig(chrom, start, end, fill);
        return wholething;
    }
    chromOob(mb, chrom, &s, &e);
    list = perBaseWigLoadContinue(mb, chrom, s, e);
    wholething = alloc_fill_perBaseWig(chrom, start, end, fill);
    if (list) {
        for (region = list; region != NULL; region = region->next) {
            int offset = region->chromStart - wholething->chromStart;
            for (j = 0; j < region->chromEnd - region->chromStart; j++)
                wholething->data[offset + j] = region->data[j];
        }
        perBaseWigFreeList(&list);
    }
    if (reverse) {
        double swap;
        for (i = 0; i < (size / 2); i++) {
            j = (size - 1) - i;
            swap = wholething->data[i];
            wholething->data[i] = wholething->data[j];
            wholething->data[j] = swap;
        }
    }
    return wholething;
}
Exemplo n.º 7
0
Arquivo: sax.c Projeto: hjanime/bwtool
void wigsax_fasta(FILE *out, struct metaBig *mb, struct bed *region, int alpha, int window, double mean, double std)
/* when not using an iterative alphabet size, make an output similar to FASTA */
{
    struct perBaseWig *wigList = perBaseWigLoadContinue(mb, region->chrom, region->chromStart, region->chromEnd);
    struct perBaseWig *pbw;
    for (pbw = wigList; pbw != NULL; pbw = pbw->next)
    {
	int data_len = pbw->chromEnd-pbw->chromStart;
	char *sax = sax_from_array_force_window(pbw->data, data_len, alpha, window, mean, std);
	int i;
	fprintf(out, ">%s:%d-%d\n", pbw->chrom, pbw->chromStart, pbw->chromEnd);
	for (i = 0; i < data_len; i += 60)
	{
	    char swap = sax[i+60];
	    sax[i+60] = '\0';
	    fprintf(out, "%s\n", sax + i);
	    sax[i+60] = swap;
	}
	freeMem(sax);
    }
    perBaseWigFreeList(&wigList);
}
Exemplo n.º 8
0
void do_pass1(struct metaBig *mb, struct hash *chainHash, struct hash *gpbw)
/* do the first pass in the destination pbws */
/* remap all the origen bases and increment from zero in */
/* the destination the number of times the base in the */
/* destination is mapped to. For the second pass we'll */
/* only use the ones that are 1 here.  Ones that are > 1 */
/* will be considered places where the destination is */
/* repeated */
{
    struct bed *section;
    for (section = mb->sections; section != NULL; section = section->next)
    {
	struct perBaseWig *pbwList = perBaseWigLoadContinue(mb, section->chrom, section->chromStart, section->chromEnd);
	struct perBaseWig *pbw;
	for (pbw = pbwList; pbw != NULL; pbw = pbw->next)
	{
	    int i;
	    for (i = 0; i < pbw->len; i++)
	    {
		char *dest_chrom = NULL;
		int dest_start = 0;
		enum remapResult rmr = remapBase(chainHash, pbw->chrom, pbw->chromStart + i, &dest_chrom, &dest_start);
		if (rmr == lifted)
		{
		    struct perBaseWig *dest_chrom_pbw = (struct perBaseWig *)hashFindVal(gpbw, dest_chrom);
		    if (dest_chrom_pbw)
		    {
			if (isnan(dest_chrom_pbw->data[dest_start]))
			    dest_chrom_pbw->data[dest_start] = 1.0;
			else
			    dest_chrom_pbw->data[dest_start] += 1.0;
		    }
		}
	    }
	}
	perBaseWigFreeList(&pbwList);
    }    
}
Exemplo n.º 9
0
/* in with NA value */
static void perBaseWigLoadHugeContinue(struct metaBig* mb, struct perBaseWig* big_pbw, int* big_offset, struct bed* section)
{
    struct perBaseWig* list = NULL;
    int s = section->chromStart;
    int e = section->chromEnd;
    if (!hashFindVal(mb->chromSizeHash, section->chrom)) {
        /* if the chrom isn't in the bigWig's chrom-size hash, skip over */
        *big_offset += e - s;
    } else {
        struct perBaseWig* pbw;
        chromOob(mb, section->chrom, &s, &e);
        list = perBaseWigLoadContinue(mb, section->chrom, s, e);
        if (list) {
            for (pbw = list; pbw != NULL; pbw = pbw->next) {
                int j;
                for (j = 0; j < pbw->len; j++)
                    big_pbw->data[*big_offset + j] = pbw->data[j];
                *big_offset += pbw->len;
            }
            perBaseWigFreeList(&list);
        }
    }
}
Exemplo n.º 10
0
void bwtool_find_max(struct hash *options, char *favorites, char *regions, double fill,
		     char *bigfile, char *tmp_dir, char *outputfile)
/* find max points in a range */
{
    boolean med_base = (hashFindVal(options, "median-base") != NULL) ? TRUE : FALSE;
    boolean with_max = (hashFindVal(options, "with-max") != NULL) ? TRUE : FALSE;
    struct metaBig *mb = metaBigOpen_check(bigfile, tmp_dir, NULL);
    FILE *out = mustOpen(outputfile, "w");
    struct bed6 *sections6 = readBed6Soft(regions);
    struct bed *sections = bed12FromBed6(&sections6);
    struct bed *section;
    for (section = sections; section != NULL; section = section->next)
    {
	struct perBaseWig *pbwList = perBaseWigLoadContinue(mb, section->chrom, section->chromStart,
							      section->chromEnd);
	struct perBaseWig *pbw;
	struct slInt *ii;
	int i, size;
	double max = -DBL_MAX;
	struct slInt *list = NULL;
	for (pbw = pbwList; pbw != NULL; pbw = pbw->next)
	{
	    int pbw_off = pbw->chromStart - section->chromStart;
	    for (i = 0; i < pbw->len; i++)
	    {
		if (pbw->data[i] > max)
		{
		    slFreeList(&list);
		    struct slInt *new_int = slIntNew(i + pbw_off);
		    slAddHead(&list, new_int);
		    max = pbw->data[i];
		}
		else if (pbw->data[i] == max)
		{
		    struct slInt *new_int = slIntNew(i + pbw_off);
		    slAddHead(&list, new_int);
		}
	    }
	}
	slReverse(&list);
	if (list)
	{
	    size = slCount(list);
	    if (med_base)
	    {
		section->blockCount = 1;
		AllocArray(section->blockSizes, sizeof(int));
		AllocArray(section->chromStarts, sizeof(int));
		section->blockSizes[0] = 1;
		section->chromStarts[0] = median_base_calc(&list);
	    }
	    else
	    {
		section->blockCount = size;
		AllocArray(section->blockSizes, sizeof(int) * size);
		AllocArray(section->chromStarts, sizeof(int) * size);
		for (i = 0, ii = list; (i < size) && (ii != NULL); i++, ii = ii->next)
		{
		    section->blockSizes[i] = 1;
		    section->chromStarts[i] = ii->val;
		}
	    }
	    if (!with_max)
		bedTabOutN(section, 12, out);
	    else
	    {
		bedOutputN(section, 12, out, '\t', '\t');
		fprintf(out, "%f\n", max);
	    }
	    slFreeList(&list);
	}
	perBaseWigFree(&pbwList);
    }
    metaBigClose(&mb);
    bedFreeList(&sections);
    carefulClose(&out);
}
Exemplo n.º 11
0
void bwtool_split(struct hash *options, char *regions, char *size_s, char *bigfile, char *tmp_dir, char *outputfile)
/* bwtool_split - main for the splitting program */
{
    struct metaBig *mb = metaBigOpenWithTmpDir(bigfile, tmp_dir, regions);
    FILE *output = mustOpen(outputfile, "w");
    struct bed *section;
    struct bed *splitList = NULL;
    int size = 0;
    unsigned min_gap = sqlUnsigned((char *)hashOptionalVal(options, "min_gap", "1"));
    unsigned chunk_size = sqlUnsigned(size_s);
    char chrom[256] = "";
    int start = -1, end = 0;
    boolean over_size = FALSE;
    int ix = 1;
    int gap = 0;
    for (section = mb->sections; section != NULL; section = section->next)
    {
	struct perBaseWig *pbwList = perBaseWigLoadContinue(mb, section->chrom, section->chromStart,
							      section->chromEnd);
	struct perBaseWig *pbw;
	for (pbw = pbwList; pbw != NULL; pbw = pbw->next)
	{
	    int length = pbw->chromEnd - pbw->chromStart;
	    if (end > 0)
		gap = pbw->chromStart - end;
	    if (!sameString(chrom, pbw->chrom))
	    {
		if (!sameString(chrom, ""))
		    slAddHead(&splitList, newBed(chrom, start, end));
		strcpy(chrom, pbw->chrom);
		start = pbw->chromStart;
		end = pbw->chromEnd;
		if (size + length > chunk_size)
		    size = length;
		else
		    size += length;
	    }
	    else
	    {
		if ((size + length + gap > chunk_size) && (gap >= min_gap))
		{
		    slAddHead(&splitList, newBed(chrom, start, end));
		    start = pbw->chromStart;
		    end = pbw->chromEnd;
		    size = length;
		}
		else
		{
		    size += length + gap;
		    end = pbw->chromEnd;
		}
	    }
	}
	perBaseWigFreeList(&pbwList);
    }
    slAddHead(&splitList, newBed(chrom, start, end));
    slReverse(&splitList);
    for (section = splitList; section != NULL; section = section->next)
    {
	fprintf(output, "%s\t%d\t%d\n", section->chrom, section->chromStart, section->chromEnd);
    }
    carefulClose(&output);
    metaBigClose(&mb);
    bedFreeList(&splitList);
}