struct genomeRangeTree *edwMakeGrtFromBed3List(struct bed3 *bedList) /* Make up a genomeRangeTree around bed file. */ { struct genomeRangeTree *grt = genomeRangeTreeNew(); struct bed3 *bed; for (bed = bedList; bed != NULL; bed = bed->next) genomeRangeTreeAdd(grt, bed->chrom, bed->chromStart, bed->chromEnd); return grt; }
struct genomeRangeTree* getRangeTreeOfRegdoms(struct regdom* regdoms) { struct genomeRangeTree *ranges = genomeRangeTreeNew(); struct regdom* currRD; for (currRD = regdoms; currRD != NULL; currRD = currRD->next) { genomeRangeTreeAdd(ranges, currRD->chrom, currRD->chromStart, currRD->chromEnd); } return ranges; }
struct genomeRangeTree *grtFromOpenBed(struct lineFile *lf, int size, boolean doPromoter) /* Read an open bed file into a genomeRangeTree and return it. */ { struct genomeRangeTree *grt = genomeRangeTreeNew(); char *row[size]; while (lineFileRow(lf, row)) { struct bed *bed = bedLoadN(row, size); if (doPromoter) { if (bed->strand[0] == '+') genomeRangeTreeAdd(grt, bed->chrom, bed->chromStart - 500, bed->chromEnd + 500); else genomeRangeTreeAdd(grt, bed->chrom, bed->chromEnd - 500, bed->chromEnd + 500); } else genomeRangeTreeAdd(grt, bed->chrom, bed->chromStart, bed->chromEnd); } return grt; }
static void scanSam(char *samIn, FILE *f, struct genomeRangeTree *grt, long long *retHit, long long *retMiss, long long *retTotalBasesInHits) /* Scan through sam file doing several things:counting how many reads hit and how many * miss target during mapping phase, copying those that hit to a little bed file, and * also defining regions covered in a genomeRangeTree. */ { samfile_t *sf = samopen(samIn, "r", NULL); bam_header_t *bamHeader = sf->header; bam1_t one; ZeroVar(&one); int err; long long hit = 0, miss = 0, totalBasesInHits = 0; while ((err = samread(sf, &one)) >= 0) { int32_t tid = one.core.tid; if (tid < 0) { ++miss; continue; } ++hit; char *chrom = bamHeader->target_name[tid]; // Approximate here... can do better if parse cigar. int start = one.core.pos; int size = one.core.l_qseq; int end = start + size; totalBasesInHits += size; boolean isRc = (one.core.flag & BAM_FREVERSE); char strand = '+'; if (isRc) { strand = '-'; reverseIntRange(&start, &end, bamHeader->target_len[tid]); } if (start < 0) start=0; if (f != NULL) fprintf(f, "%s\t%d\t%d\t.\t0\t%c\n", chrom, start, end, strand); genomeRangeTreeAdd(grt, chrom, start, end); } if (err < 0 && err != -1) errnoAbort("samread err %d", err); samclose(sf); *retHit = hit; *retMiss = miss; *retTotalBasesInHits = totalBasesInHits; }
static void subset_with_sections(struct metaBig* mb, struct bed** p_list) /* mainly for chopgenome */ { struct genomeRangeTree* grt = genomeRangeTreeNew(); struct bed* sec; struct bed* list; struct bed* newlist = NULL; struct bed* head; for (sec = mb->sections; sec != NULL; sec = sec->next) genomeRangeTreeAdd(grt, sec->chrom, sec->chromStart, sec->chromEnd); list = *p_list; while ((head = slPopHead(&list)) != NULL) { if (genomeRangeTreeOverlaps(grt, head->chrom, head->chromStart, head->chromEnd) && genomeRangeTreeFindEnclosing(grt, head->chrom, head->chromStart, head->chromEnd)) slAddHead(&newlist, head); else bedFree(&head); } slReverse(&newlist); *p_list = newlist; }