void do_pass2(struct metaBig *mb, struct hash *chainHash, struct hash *gpbw) /* so now that everything is either zero, 1.0, or more, make everything that isn't 1.0 an NA */ { const double na = NANUM; struct bed *section; for (section = mb->sections; section != NULL; section = section->next) { struct perBaseWig *pbwList = perBaseWigLoadContinue(mb, section->chrom, section->chromStart, section->chromEnd); struct perBaseWig *pbw; for (pbw = pbwList; pbw != NULL; pbw = pbw->next) { int i; for (i = 0; i < pbw->len; i++) { char *dest_chrom = NULL; int dest_start = 0; enum remapResult rmr = remapBase(chainHash, pbw->chrom, pbw->chromStart + i, &dest_chrom, &dest_start); if (rmr == lifted) { struct perBaseWig *dest_chrom_pbw = (struct perBaseWig *)hashFindVal(gpbw, dest_chrom); if (dest_chrom_pbw && ((int)dest_chrom_pbw->data[dest_start] != 1)) dest_chrom_pbw->data[dest_start] = na; } } } perBaseWigFreeList(&pbwList); } }
/* Load all the regions from a wig or bigWig into a list of arrays basically. */ struct perBaseWig* perBaseWigLoad(char* wigFile, char* chrom, int start, int end) { struct metaBig* mb = metaBigOpen(wigFile, NULL); if (mb->type != isaBigWig) { metaBigClose(&mb); return NULL; } struct perBaseWig* list; list = perBaseWigLoadContinue(mb, chrom, start, end); metaBigClose(&mb); return list; }
void wigsax_bed4(FILE *out, struct metaBig *mb, struct bed *region, int alpha, int window, double mean, double std, boolean wig_out) /* output the bed4 style when it's being run over an interval */ { struct bed *outBedList = NULL; struct bed *bed; struct perBaseWig *wigList = perBaseWigLoadContinue(mb, region->chrom, region->chromStart, region->chromEnd); struct perBaseWig *pbw; struct slDouble *datList = NULL; struct slDouble *oneDub; /* Maybe sometime I'll put back the option to use multiple alphabets at a time. */ int alphaS = alpha; int alphaE = alpha; for (pbw = wigList; pbw != NULL; pbw = pbw->next) { struct bed *bedList = make_initial_bed_list(pbw, alphaE - alphaS + 2); int i, j; int data_len = pbw->chromEnd - pbw->chromStart; for (i = alphaS; i <= alphaE; i++) { char *sax = sax_from_array_force_window(pbw->data, data_len, i, window, mean, std); for (j = 0, bed = bedList; ((j < data_len) && (bed != NULL)); j++, bed = bed->next) bed->name[i-alphaS] = sax[j]; freeMem(sax); } if (wig_out) for (j = 0; j < data_len; j++) { struct slDouble *dub = newSlDouble(pbw->data[j]); slAddHead(&datList, dub); } while ((bed = slPopHead(&bedList)) != NULL) slAddHead(&outBedList, bed); } slReverse(&outBedList); slReverse(&datList); perBaseWigFreeList(&wigList); oneDub = datList; for (bed = outBedList; bed != NULL; bed = bed->next) { bedOutputN(bed, 4, out, '\t', (wig_out) ? '\t' : '\n'); if (wig_out) { if (oneDub == NULL) errAbort("data inconsistency. programmer error\n"); fprintf(out, "%0.4f\n", oneDub->val); oneDub = oneDub->next; } } bedFreeList(&outBedList); slFreeList(&datList); }
void do_final_pass(struct metaBig *mb, struct hash *chainHash, struct hash *gpbw, char *bad_file) /* now everything is 1.0 or NA. copy data into destination */ { struct bed *section; FILE *bad = NULL; if (bad_file) bad = mustOpen(bad_file, "w"); for (section = mb->sections; section != NULL; section = section->next) { struct perBaseWig *pbwList = perBaseWigLoadContinue(mb, section->chrom, section->chromStart, section->chromEnd); struct perBaseWig *pbw; for (pbw = pbwList; pbw != NULL; pbw = pbw->next) { int i; for (i = 0; i < pbw->len; i++) { char *dest_chrom = NULL; int dest_start = 0; enum remapResult rmr = remapBase(chainHash, pbw->chrom, pbw->chromStart + i, &dest_chrom, &dest_start); if (rmr == lifted) { struct perBaseWig *dest_chrom_pbw = (struct perBaseWig *)hashFindVal(gpbw, dest_chrom); if (dest_chrom_pbw && (!isnan(dest_chrom_pbw->data[dest_start]))) dest_chrom_pbw->data[dest_start] = pbw->data[i]; else if (bad && isnan(dest_chrom_pbw->data[dest_start])) fprintf(bad, "%s\t%d\tmulti_mapped_%s_%d\n", pbw->chrom, pbw->chromStart+i, dest_chrom, dest_start); } else if (bad) { if (rmr == duplicated) fprintf(bad, "%s\t%d\tduplicated_in_destination\n", pbw->chrom, pbw->chromStart+i); else if (rmr == deleted) fprintf(bad, "%s\t%d\tdeleted_in_destination\n", pbw->chrom, pbw->chromStart+i); else fprintf(bad, "%s\t%d\tproblem_lifting\n", pbw->chrom, pbw->chromStart+i); } } } perBaseWigFreeList(&pbwList); } if (bad_file) carefulClose(&bad); }
void bwtool_find_thresh(struct hash *options, char *favorites, char *regions, double fill, char *thresh_type, char *thresh_s, char *bigfile, char *tmp_dir, char *outputfile) /* the other kind of finding, based on thresholding. */ { boolean inverse = (hashFindVal(options, "inverse") != NULL) ? TRUE : FALSE; enum bw_op_type op= get_bw_op_type(thresh_type, inverse); struct metaBig *mb = metaBigOpen_check(bigfile, tmp_dir, regions); double thresh = sqlDouble(thresh_s); FILE *out = mustOpen(outputfile, "w"); struct bed out_bed; struct bed *section; for (section = mb->sections; section != NULL; section = section->next) { struct perBaseWig *pbwList = perBaseWigLoadContinue(mb, section->chrom, section->chromStart, section->chromEnd); struct perBaseWig *pbw; int i, len; if (pbwList) { out_bed.chrom = pbwList->chrom; for (pbw = pbwList; pbw != NULL; pbw = pbw->next) { i = 0; len = pbw->chromEnd - pbw->chromStart; out_bed.chromStart = out_bed.chromEnd = 0; while (i < len) { while ((i < len) && (!fit_thresh(pbw->data[i], thresh, op))) i++; out_bed.chromStart = i + pbw->chromStart; while ((i < len) && (fit_thresh(pbw->data[i], thresh, op))) i++; out_bed.chromEnd = i + pbw->chromStart; if (out_bed.chromEnd > out_bed.chromStart) bedTabOutN(&out_bed, 3, out); } } perBaseWigFree(&pbwList); } } metaBigClose(&mb); carefulClose(&out); }
struct perBaseWig* perBaseWigLoadSingleContinue(struct metaBig* mb, char* chrom, int start, int end, boolean reverse, double fill) /* Load all the regions into one perBaseWig, but with gaps filled */ /* in with NA value */ { if (mb->type != isaBigWig) errAbort("tried to load data from a non-bigWig file"); struct perBaseWig* list; struct perBaseWig* region; struct perBaseWig* wholething = NULL; int size = end - start; int i, j; int s = start, e = end; if (!hashFindVal(mb->chromSizeHash, chrom)) { /* if the chrom isn't in the bigWig's chrom-size hash, return values of NA */ wholething = alloc_fill_perBaseWig(chrom, start, end, fill); return wholething; } chromOob(mb, chrom, &s, &e); list = perBaseWigLoadContinue(mb, chrom, s, e); wholething = alloc_fill_perBaseWig(chrom, start, end, fill); if (list) { for (region = list; region != NULL; region = region->next) { int offset = region->chromStart - wholething->chromStart; for (j = 0; j < region->chromEnd - region->chromStart; j++) wholething->data[offset + j] = region->data[j]; } perBaseWigFreeList(&list); } if (reverse) { double swap; for (i = 0; i < (size / 2); i++) { j = (size - 1) - i; swap = wholething->data[i]; wholething->data[i] = wholething->data[j]; wholething->data[j] = swap; } } return wholething; }
void wigsax_fasta(FILE *out, struct metaBig *mb, struct bed *region, int alpha, int window, double mean, double std) /* when not using an iterative alphabet size, make an output similar to FASTA */ { struct perBaseWig *wigList = perBaseWigLoadContinue(mb, region->chrom, region->chromStart, region->chromEnd); struct perBaseWig *pbw; for (pbw = wigList; pbw != NULL; pbw = pbw->next) { int data_len = pbw->chromEnd-pbw->chromStart; char *sax = sax_from_array_force_window(pbw->data, data_len, alpha, window, mean, std); int i; fprintf(out, ">%s:%d-%d\n", pbw->chrom, pbw->chromStart, pbw->chromEnd); for (i = 0; i < data_len; i += 60) { char swap = sax[i+60]; sax[i+60] = '\0'; fprintf(out, "%s\n", sax + i); sax[i+60] = swap; } freeMem(sax); } perBaseWigFreeList(&wigList); }
void do_pass1(struct metaBig *mb, struct hash *chainHash, struct hash *gpbw) /* do the first pass in the destination pbws */ /* remap all the origen bases and increment from zero in */ /* the destination the number of times the base in the */ /* destination is mapped to. For the second pass we'll */ /* only use the ones that are 1 here. Ones that are > 1 */ /* will be considered places where the destination is */ /* repeated */ { struct bed *section; for (section = mb->sections; section != NULL; section = section->next) { struct perBaseWig *pbwList = perBaseWigLoadContinue(mb, section->chrom, section->chromStart, section->chromEnd); struct perBaseWig *pbw; for (pbw = pbwList; pbw != NULL; pbw = pbw->next) { int i; for (i = 0; i < pbw->len; i++) { char *dest_chrom = NULL; int dest_start = 0; enum remapResult rmr = remapBase(chainHash, pbw->chrom, pbw->chromStart + i, &dest_chrom, &dest_start); if (rmr == lifted) { struct perBaseWig *dest_chrom_pbw = (struct perBaseWig *)hashFindVal(gpbw, dest_chrom); if (dest_chrom_pbw) { if (isnan(dest_chrom_pbw->data[dest_start])) dest_chrom_pbw->data[dest_start] = 1.0; else dest_chrom_pbw->data[dest_start] += 1.0; } } } } perBaseWigFreeList(&pbwList); } }
/* in with NA value */ static void perBaseWigLoadHugeContinue(struct metaBig* mb, struct perBaseWig* big_pbw, int* big_offset, struct bed* section) { struct perBaseWig* list = NULL; int s = section->chromStart; int e = section->chromEnd; if (!hashFindVal(mb->chromSizeHash, section->chrom)) { /* if the chrom isn't in the bigWig's chrom-size hash, skip over */ *big_offset += e - s; } else { struct perBaseWig* pbw; chromOob(mb, section->chrom, &s, &e); list = perBaseWigLoadContinue(mb, section->chrom, s, e); if (list) { for (pbw = list; pbw != NULL; pbw = pbw->next) { int j; for (j = 0; j < pbw->len; j++) big_pbw->data[*big_offset + j] = pbw->data[j]; *big_offset += pbw->len; } perBaseWigFreeList(&list); } } }
void bwtool_find_max(struct hash *options, char *favorites, char *regions, double fill, char *bigfile, char *tmp_dir, char *outputfile) /* find max points in a range */ { boolean med_base = (hashFindVal(options, "median-base") != NULL) ? TRUE : FALSE; boolean with_max = (hashFindVal(options, "with-max") != NULL) ? TRUE : FALSE; struct metaBig *mb = metaBigOpen_check(bigfile, tmp_dir, NULL); FILE *out = mustOpen(outputfile, "w"); struct bed6 *sections6 = readBed6Soft(regions); struct bed *sections = bed12FromBed6(§ions6); struct bed *section; for (section = sections; section != NULL; section = section->next) { struct perBaseWig *pbwList = perBaseWigLoadContinue(mb, section->chrom, section->chromStart, section->chromEnd); struct perBaseWig *pbw; struct slInt *ii; int i, size; double max = -DBL_MAX; struct slInt *list = NULL; for (pbw = pbwList; pbw != NULL; pbw = pbw->next) { int pbw_off = pbw->chromStart - section->chromStart; for (i = 0; i < pbw->len; i++) { if (pbw->data[i] > max) { slFreeList(&list); struct slInt *new_int = slIntNew(i + pbw_off); slAddHead(&list, new_int); max = pbw->data[i]; } else if (pbw->data[i] == max) { struct slInt *new_int = slIntNew(i + pbw_off); slAddHead(&list, new_int); } } } slReverse(&list); if (list) { size = slCount(list); if (med_base) { section->blockCount = 1; AllocArray(section->blockSizes, sizeof(int)); AllocArray(section->chromStarts, sizeof(int)); section->blockSizes[0] = 1; section->chromStarts[0] = median_base_calc(&list); } else { section->blockCount = size; AllocArray(section->blockSizes, sizeof(int) * size); AllocArray(section->chromStarts, sizeof(int) * size); for (i = 0, ii = list; (i < size) && (ii != NULL); i++, ii = ii->next) { section->blockSizes[i] = 1; section->chromStarts[i] = ii->val; } } if (!with_max) bedTabOutN(section, 12, out); else { bedOutputN(section, 12, out, '\t', '\t'); fprintf(out, "%f\n", max); } slFreeList(&list); } perBaseWigFree(&pbwList); } metaBigClose(&mb); bedFreeList(§ions); carefulClose(&out); }
void bwtool_split(struct hash *options, char *regions, char *size_s, char *bigfile, char *tmp_dir, char *outputfile) /* bwtool_split - main for the splitting program */ { struct metaBig *mb = metaBigOpenWithTmpDir(bigfile, tmp_dir, regions); FILE *output = mustOpen(outputfile, "w"); struct bed *section; struct bed *splitList = NULL; int size = 0; unsigned min_gap = sqlUnsigned((char *)hashOptionalVal(options, "min_gap", "1")); unsigned chunk_size = sqlUnsigned(size_s); char chrom[256] = ""; int start = -1, end = 0; boolean over_size = FALSE; int ix = 1; int gap = 0; for (section = mb->sections; section != NULL; section = section->next) { struct perBaseWig *pbwList = perBaseWigLoadContinue(mb, section->chrom, section->chromStart, section->chromEnd); struct perBaseWig *pbw; for (pbw = pbwList; pbw != NULL; pbw = pbw->next) { int length = pbw->chromEnd - pbw->chromStart; if (end > 0) gap = pbw->chromStart - end; if (!sameString(chrom, pbw->chrom)) { if (!sameString(chrom, "")) slAddHead(&splitList, newBed(chrom, start, end)); strcpy(chrom, pbw->chrom); start = pbw->chromStart; end = pbw->chromEnd; if (size + length > chunk_size) size = length; else size += length; } else { if ((size + length + gap > chunk_size) && (gap >= min_gap)) { slAddHead(&splitList, newBed(chrom, start, end)); start = pbw->chromStart; end = pbw->chromEnd; size = length; } else { size += length + gap; end = pbw->chromEnd; } } } perBaseWigFreeList(&pbwList); } slAddHead(&splitList, newBed(chrom, start, end)); slReverse(&splitList); for (section = splitList; section != NULL; section = section->next) { fprintf(output, "%s\t%d\t%d\n", section->chrom, section->chromStart, section->chromEnd); } carefulClose(&output); metaBigClose(&mb); bedFreeList(&splitList); }