struct chromAnnMap *chromAnnMapNew() /* construct a new object */ { struct chromAnnMap *cam; AllocVar(cam); cam->ranges = genomeRangeTreeNew(); return cam; }
void edwBamToWig(char *input, char *output) /* edwBamToWig - Convert a bam file to a wig file by measuring depth of coverage, optionally adjusting hit size to average for library.. */ { FILE *f = mustOpen(output, "w"); /* Open file and get header for it. */ samfile_t *sf = samopen(input, "rb", NULL); if (sf == NULL) errnoAbort("Couldn't open %s.\n", input); bam_header_t *head = sf->header; if (head == NULL) errAbort("Aborting ... Bad BAM header in file: %s", input); /* Scan through input populating genome range trees */ struct genomeRangeTree *grt = genomeRangeTreeNew(); bam1_t one = {}; for (;;) { /* Read next record. */ if (bam_read1(sf->x.bam, &one) < 0) break; if (one.core.tid >= 0 && one.core.n_cigar > 0) { char *chrom = head->target_name[one.core.tid]; int start = one.core.pos; int end = start + one.core.l_qseq; if (one.core.flag & BAM_FREVERSE) { start -= clPad; } else { end += clPad; } struct rbTree *rt = genomeRangeTreeFindOrAddRangeTree(grt,chrom); rangeTreeAddToCoverageDepth(rt, start, end); } } /* Convert genome range tree into output wig */ /* Get list of chromosomes. */ struct hashEl *hel, *helList = hashElListHash(grt->hash); for (hel = helList; hel != NULL; hel = hel->next) { char *chrom = hel->name; struct rbTree *rt = hel->val; struct range *range, *rangeList = rangeTreeList(rt); for (range = rangeList; range != NULL; range = range->next) { fprintf(f, "%s\t%d\t%d\t%d\n", chrom, range->start, range->end, ptToInt(range->val)); } } carefulClose(&f); }
struct genomeRangeTree *edwMakeGrtFromBed3List(struct bed3 *bedList) /* Make up a genomeRangeTree around bed file. */ { struct genomeRangeTree *grt = genomeRangeTreeNew(); struct bed3 *bed; for (bed = bedList; bed != NULL; bed = bed->next) genomeRangeTreeAdd(grt, bed->chrom, bed->chromStart, bed->chromEnd); return grt; }
/* build the range tree when needed */ static void buildRangeTree(struct malnSet *malnSet) { malnSet->compRangeMap = genomeRangeTreeNew(); struct malnBlkSetIterator *iter = malnBlkSet_getIterator(malnSet->blks); struct malnBlk *blk; while ((blk = malnBlkSetIterator_getNext(iter)) != NULL) { addCompsToMap(malnSet, blk); } malnBlkSetIterator_destruct(iter); }
struct genomeRangeTree* getRangeTreeOfRegdoms(struct regdom* regdoms) { struct genomeRangeTree *ranges = genomeRangeTreeNew(); struct regdom* currRD; for (currRD = regdoms; currRD != NULL; currRD = currRD->next) { genomeRangeTreeAdd(ranges, currRD->chrom, currRD->chromStart, currRD->chromEnd); } return ranges; }
static void subset_with_sections(struct metaBig* mb, struct bed** p_list) /* mainly for chopgenome */ { struct genomeRangeTree* grt = genomeRangeTreeNew(); struct bed* sec; struct bed* list; struct bed* newlist = NULL; struct bed* head; for (sec = mb->sections; sec != NULL; sec = sec->next) genomeRangeTreeAdd(grt, sec->chrom, sec->chromStart, sec->chromEnd); list = *p_list; while ((head = slPopHead(&list)) != NULL) { if (genomeRangeTreeOverlaps(grt, head->chrom, head->chromStart, head->chromEnd) && genomeRangeTreeFindEnclosing(grt, head->chrom, head->chromStart, head->chromEnd)) slAddHead(&newlist, head); else bedFree(&head); } slReverse(&newlist); *p_list = newlist; }
struct genomeRangeTree *edwGrtFromBigBed(char *fileName) /* Return genome range tree for simple (unblocked) bed */ { struct bbiFile *bbi = bigBedFileOpen(fileName); struct bbiChromInfo *chrom, *chromList = bbiChromList(bbi); struct genomeRangeTree *grt = genomeRangeTreeNew(); for (chrom = chromList; chrom != NULL; chrom = chrom->next) { struct rbTree *tree = genomeRangeTreeFindOrAddRangeTree(grt, chrom->name); struct lm *lm = lmInit(0); struct bigBedInterval *iv, *ivList = NULL; ivList = bigBedIntervalQuery(bbi, chrom->name, 0, chrom->size, 0, lm); for (iv = ivList; iv != NULL; iv = iv->next) rangeTreeAdd(tree, iv->start, iv->end); lmCleanup(&lm); } bigBedFileClose(&bbi); bbiChromInfoFreeList(&chromList); return grt; }
struct genomeRangeTree *grtFromOpenBed(struct lineFile *lf, int size, boolean doPromoter) /* Read an open bed file into a genomeRangeTree and return it. */ { struct genomeRangeTree *grt = genomeRangeTreeNew(); char *row[size]; while (lineFileRow(lf, row)) { struct bed *bed = bedLoadN(row, size); if (doPromoter) { if (bed->strand[0] == '+') genomeRangeTreeAdd(grt, bed->chrom, bed->chromStart - 500, bed->chromEnd + 500); else genomeRangeTreeAdd(grt, bed->chrom, bed->chromEnd - 500, bed->chromEnd + 500); } else genomeRangeTreeAdd(grt, bed->chrom, bed->chromStart, bed->chromEnd); } return grt; }
void edwAlignFastqMakeBed(struct edwFile *ef, struct edwAssembly *assembly, char *fastqPath, struct edwValidFile *vf, FILE *bedF, double *retMapRatio, double *retDepth, double *retSampleCoverage) /* Take a sample fastq and run bwa on it, and then convert that file to a bed. * bedF and all the ret parameters can be NULL. */ { /* Hmm, tried doing this with Mark's pipeline code, but somehow it would be flaky the * second time it was run in same app. Resorting therefore to temp files. */ char genoFile[PATH_LEN]; safef(genoFile, sizeof(genoFile), "%s%s/bwaData/%s.fa", edwValDataDir, assembly->ucscDb, assembly->ucscDb); char cmd[3*PATH_LEN]; char *saiName = cloneString(rTempName(edwTempDir(), "edwSample1", ".sai")); safef(cmd, sizeof(cmd), "bwa aln -t 3 %s %s > %s", genoFile, fastqPath, saiName); mustSystem(cmd); char *samName = cloneString(rTempName(edwTempDir(), "ewdSample1", ".sam")); safef(cmd, sizeof(cmd), "bwa samse %s %s %s > %s", genoFile, saiName, fastqPath, samName); mustSystem(cmd); remove(saiName); /* Scan sam file to calculate vf->mapRatio, vf->sampleCoverage and vf->depth. * and also to produce little bed file for enrichment step. */ struct genomeRangeTree *grt = genomeRangeTreeNew(); long long hitCount=0, missCount=0, totalBasesInHits=0; scanSam(samName, bedF, grt, &hitCount, &missCount, &totalBasesInHits); verbose(1, "hitCount=%lld, missCount=%lld, totalBasesInHits=%lld, grt=%p\n", hitCount, missCount, totalBasesInHits, grt); if (retMapRatio) *retMapRatio = (double)hitCount/(hitCount+missCount); if (retDepth) *retDepth = (double)totalBasesInHits/assembly->baseCount * (double)vf->itemCount/vf->sampleCount; long long basesHitBySample = genomeRangeTreeSumRanges(grt); if (retSampleCoverage) *retSampleCoverage = (double)basesHitBySample/assembly->baseCount; genomeRangeTreeFree(&grt); remove(samName); }