示例#1
0
struct chromAnnMap *chromAnnMapNew()
/* construct a new object */
{
struct chromAnnMap *cam;
AllocVar(cam);
cam->ranges = genomeRangeTreeNew();
return cam;
}
示例#2
0
void edwBamToWig(char *input, char *output)
/* edwBamToWig - Convert a bam file to a wig file by measuring depth of coverage, optionally adjusting hit size to average for library.. */
{
FILE *f = mustOpen(output, "w");
/* Open file and get header for it. */
samfile_t *sf = samopen(input, "rb", NULL);
if (sf == NULL)
    errnoAbort("Couldn't open %s.\n", input);
bam_header_t *head = sf->header;
if (head == NULL)
    errAbort("Aborting ... Bad BAM header in file: %s", input);


/* Scan through input populating genome range trees */
struct genomeRangeTree *grt = genomeRangeTreeNew();
bam1_t one = {};
for (;;)
    {
    /* Read next record. */
    if (bam_read1(sf->x.bam, &one) < 0)
	break;
    if (one.core.tid >= 0 && one.core.n_cigar > 0)
	{
	char *chrom = head->target_name[one.core.tid];
	int start = one.core.pos;
	int end = start + one.core.l_qseq;
	if (one.core.flag & BAM_FREVERSE)
	    {
	    start -= clPad;
	    }
	else
	    {
	    end += clPad;
	    }
	struct rbTree *rt = genomeRangeTreeFindOrAddRangeTree(grt,chrom);
	rangeTreeAddToCoverageDepth(rt, start, end);
	}
    }


/* Convert genome range tree into output wig */

/* Get list of chromosomes. */
struct hashEl *hel, *helList = hashElListHash(grt->hash);
for (hel = helList; hel != NULL; hel = hel->next)
    {
    char *chrom = hel->name;
    struct rbTree *rt = hel->val;
    struct range *range, *rangeList = rangeTreeList(rt);
    for (range = rangeList; range != NULL; range = range->next)
         {
	 fprintf(f, "%s\t%d\t%d\t%d\n",  chrom, range->start, range->end, ptToInt(range->val));
	 }
    }

carefulClose(&f);
}
示例#3
0
struct genomeRangeTree *edwMakeGrtFromBed3List(struct bed3 *bedList)
/* Make up a genomeRangeTree around bed file. */
{
struct genomeRangeTree *grt = genomeRangeTreeNew();
struct bed3 *bed;
for (bed = bedList; bed != NULL; bed = bed->next)
    genomeRangeTreeAdd(grt, bed->chrom, bed->chromStart, bed->chromEnd);
return grt;
}
示例#4
0
/* build the range tree when needed */
static void buildRangeTree(struct malnSet *malnSet) {
    malnSet->compRangeMap = genomeRangeTreeNew();
    struct malnBlkSetIterator *iter = malnBlkSet_getIterator(malnSet->blks);
    struct malnBlk *blk;
    while ((blk = malnBlkSetIterator_getNext(iter)) != NULL) {
        addCompsToMap(malnSet, blk);
    }
    malnBlkSetIterator_destruct(iter);
}
示例#5
0
struct genomeRangeTree* getRangeTreeOfRegdoms(struct regdom* regdoms)
{
	struct genomeRangeTree *ranges = genomeRangeTreeNew();
	struct regdom* currRD;
	for (currRD = regdoms; currRD != NULL; currRD = currRD->next) {
		genomeRangeTreeAdd(ranges, currRD->chrom, currRD->chromStart, currRD->chromEnd);
	}
	return ranges;
}
示例#6
0
static void subset_with_sections(struct metaBig* mb, struct bed** p_list)
/* mainly for chopgenome */
{
    struct genomeRangeTree* grt = genomeRangeTreeNew();
    struct bed* sec;
    struct bed* list;
    struct bed* newlist = NULL;
    struct bed* head;
    for (sec = mb->sections; sec != NULL; sec = sec->next)
        genomeRangeTreeAdd(grt, sec->chrom, sec->chromStart, sec->chromEnd);
    list = *p_list;
    while ((head = slPopHead(&list)) != NULL) {
        if (genomeRangeTreeOverlaps(grt, head->chrom, head->chromStart, head->chromEnd) && genomeRangeTreeFindEnclosing(grt, head->chrom, head->chromStart, head->chromEnd))
            slAddHead(&newlist, head);
        else
            bedFree(&head);
    }
    slReverse(&newlist);
    *p_list = newlist;
}
示例#7
0
struct genomeRangeTree *edwGrtFromBigBed(char *fileName)
/* Return genome range tree for simple (unblocked) bed */
{
struct bbiFile *bbi = bigBedFileOpen(fileName);
struct bbiChromInfo *chrom, *chromList = bbiChromList(bbi);
struct genomeRangeTree *grt = genomeRangeTreeNew();
for (chrom = chromList; chrom != NULL; chrom = chrom->next)
    {
    struct rbTree *tree = genomeRangeTreeFindOrAddRangeTree(grt, chrom->name);
    struct lm *lm = lmInit(0);
    struct bigBedInterval *iv, *ivList = NULL;
    ivList = bigBedIntervalQuery(bbi, chrom->name, 0, chrom->size, 0, lm);
    for (iv = ivList; iv != NULL; iv = iv->next)
        rangeTreeAdd(tree, iv->start, iv->end);
    lmCleanup(&lm);
    }
bigBedFileClose(&bbi);
bbiChromInfoFreeList(&chromList);
return grt;
}
示例#8
0
struct genomeRangeTree *grtFromOpenBed(struct lineFile *lf, int size, boolean doPromoter)
/* Read an open bed file into a genomeRangeTree and return it. */
{
struct genomeRangeTree *grt = genomeRangeTreeNew();
char *row[size];
while (lineFileRow(lf, row))
    {
    struct bed *bed = bedLoadN(row, size);
    if (doPromoter)
        {
	if (bed->strand[0] == '+')
	    genomeRangeTreeAdd(grt, bed->chrom, bed->chromStart - 500, bed->chromEnd + 500);
	else
	    genomeRangeTreeAdd(grt, bed->chrom, bed->chromEnd - 500, bed->chromEnd + 500);
	}
    else
	genomeRangeTreeAdd(grt,  bed->chrom, bed->chromStart, bed->chromEnd);
    }
return grt;
}
示例#9
0
void edwAlignFastqMakeBed(struct edwFile *ef, struct edwAssembly *assembly,
    char *fastqPath, struct edwValidFile *vf, FILE *bedF,
    double *retMapRatio,  double *retDepth,  double *retSampleCoverage)
/* Take a sample fastq and run bwa on it, and then convert that file to a bed. 
 * bedF and all the ret parameters can be NULL. */
{
/* Hmm, tried doing this with Mark's pipeline code, but somehow it would be flaky the
 * second time it was run in same app.  Resorting therefore to temp files. */
char genoFile[PATH_LEN];
safef(genoFile, sizeof(genoFile), "%s%s/bwaData/%s.fa", 
    edwValDataDir, assembly->ucscDb, assembly->ucscDb);

char cmd[3*PATH_LEN];
char *saiName = cloneString(rTempName(edwTempDir(), "edwSample1", ".sai"));
safef(cmd, sizeof(cmd), "bwa aln -t 3 %s %s > %s", genoFile, fastqPath, saiName);
mustSystem(cmd);

char *samName = cloneString(rTempName(edwTempDir(), "ewdSample1", ".sam"));
safef(cmd, sizeof(cmd), "bwa samse %s %s %s > %s", genoFile, saiName, fastqPath, samName);
mustSystem(cmd);
remove(saiName);

/* Scan sam file to calculate vf->mapRatio, vf->sampleCoverage and vf->depth. 
 * and also to produce little bed file for enrichment step. */
struct genomeRangeTree *grt = genomeRangeTreeNew();
long long hitCount=0, missCount=0, totalBasesInHits=0;
scanSam(samName, bedF, grt, &hitCount, &missCount, &totalBasesInHits);
verbose(1, "hitCount=%lld, missCount=%lld, totalBasesInHits=%lld, grt=%p\n", 
    hitCount, missCount, totalBasesInHits, grt);
if (retMapRatio)
    *retMapRatio = (double)hitCount/(hitCount+missCount);
if (retDepth)
    *retDepth = (double)totalBasesInHits/assembly->baseCount 
	    * (double)vf->itemCount/vf->sampleCount;
long long basesHitBySample = genomeRangeTreeSumRanges(grt);
if (retSampleCoverage)
    *retSampleCoverage = (double)basesHitBySample/assembly->baseCount;
genomeRangeTreeFree(&grt);
remove(samName);
}