Ejemplo n.º 1
0
void doBedReplicate(struct sqlConnection *conn, char *format, struct cdwAssembly *assembly,
    struct cdwFile *elderEf, struct cdwValidFile *elderVf,
    struct cdwFile *youngerEf, struct cdwValidFile *youngerVf)
/* Do correlation analysis between elder and younger and save result to
 * a new cdwQaPairCorrelation record. Do this for a format where we have a bigBed file. */
{
/* If got both pairs, work is done already */
if (pairExists(conn, elderEf->id, youngerEf->id, "cdwQaPairSampleOverlap"))
    return;

/* Get files for both younger and older. */
char *elderPath = cdwPathForFileId(conn, elderEf->id);
char *youngerPath = cdwPathForFileId(conn, youngerEf->id);

/* Do replicate calcs on bed3 lists from files. */
struct bed3 *elderBedList = bed3LoadAll(elderPath);
struct bed3 *youngerBedList = bed3LoadAll(youngerPath);
doBed3Replicate(conn, format, assembly, elderEf, elderVf, elderBedList,
		youngerEf, youngerVf, youngerBedList);

/* Clean up. */
bed3FreeList(&elderBedList);
bed3FreeList(&youngerBedList);
freez(&youngerPath);
freez(&elderPath);
}
Ejemplo n.º 2
0
void doBigWigReplicate(struct sqlConnection *conn, struct edwAssembly *assembly,
    struct edwFile *elderEf, struct edwValidFile *elderVf,
    struct edwFile *youngerEf, struct edwValidFile *youngerVf)
/* Do correlation analysis between elder and younger and save result to
 * a new edwQaPairCorrelation record. Do this for a format where we have a bigWig file. */
{
if (pairExists(conn, elderEf->id, youngerEf->id, "edwQaPairCorrelation"))
    return;
char *enrichedIn = elderVf->enrichedIn;
if (!isEmpty(enrichedIn) && !sameString(enrichedIn, "unknown"))
    {
    struct genomeRangeTree *targetGrt = genomeRangeTreeForTarget(conn, assembly, enrichedIn);

    /* Get open big wig files for both younger and older. */
    char *elderPath = edwPathForFileId(conn, elderEf->id);
    char *youngerPath = edwPathForFileId(conn, youngerEf->id);
    struct bbiFile *elderBbi = bigWigFileOpen(elderPath);
    struct bbiFile *youngerBbi = bigWigFileOpen(youngerPath);

    /* Figure out thresholds */
    double elderThreshold = twoStdsOverMean(elderBbi);
    double youngerThreshold = twoStdsOverMean(youngerBbi);

    /* Loop through a chromosome at a time adding to correlation, and at the end save result in r.*/
    struct correlate *c = correlateNew(), *cInEnriched = correlateNew(), *cClipped = correlateNew();
    struct bbiChromInfo *chrom, *chromList = bbiChromList(elderBbi);
    struct bigWigValsOnChrom *aVals = bigWigValsOnChromNew();
    struct bigWigValsOnChrom *bVals = bigWigValsOnChromNew();
    for (chrom = chromList; chrom != NULL; chrom = chrom->next)
        {
	addBwCorrelations(chrom, targetGrt, aVals, bVals, elderBbi, youngerBbi, 
	    elderThreshold, youngerThreshold, c, cInEnriched, cClipped);
	}

    /* Make up correlation structure . */
    struct edwQaPairCorrelation *cor;
    AllocVar(cor);
    cor->elderFileId = elderVf->fileId;
    cor->youngerFileId = youngerVf->fileId;
    cor->pearsonOverall = correlateResult(c);
    cor->pearsonInEnriched = correlateResult(cInEnriched);
    cor->pearsonClipped = correlateResult(cClipped);
    edwQaPairCorrelationSaveToDb(conn, cor, "edwQaPairCorrelation", 128);


    bigWigValsOnChromFree(&bVals);
    bigWigValsOnChromFree(&aVals);
    genomeRangeTreeFree(&targetGrt);
    freez(&cor);
    correlateFree(&c);
    bigWigFileClose(&youngerBbi);
    bigWigFileClose(&elderBbi);
    freez(&youngerPath);
    freez(&elderPath);
    }
}
Ejemplo n.º 3
0
void doSampleReplicate(struct sqlConnection *conn, char *format, struct cdwAssembly *assembly,
    struct cdwFile *elderEf, struct cdwValidFile *elderVf,
    struct cdwFile *youngerEf, struct cdwValidFile *youngerVf)
/* Do correlation analysis between elder and younger and save result to
 * a new cdwQaPairSampleOverlap record. Do this for a format where we have a bed3 sample file. */
{
if (pairExists(conn, elderEf->id, youngerEf->id, "cdwQaPairSampleOverlap"))
    return;
struct bed3 *elderBedList = cdwLoadSampleBed3(conn, elderVf);
struct bed3 *youngerBedList = cdwLoadSampleBed3(conn, youngerVf);
doBed3Replicate(conn, format, assembly, elderEf, elderVf, elderBedList,
		youngerEf, youngerVf, youngerBedList);
bed3FreeList(&elderBedList);
bed3FreeList(&youngerBedList);
}
Ejemplo n.º 4
0
void doSampleReplicate(struct sqlConnection *conn, char *format, struct edwAssembly *assembly,
    struct edwFile *elderEf, struct edwValidFile *elderVf,
    struct edwFile *youngerEf, struct edwValidFile *youngerVf)
/* Do correlation analysis between elder and younger and save result to
 * a new edwQaPairSampleOverlap record. Do this for a format where we have a bed3 sample file. */
{
if (pairExists(conn, elderEf->id, youngerEf->id, "edwQaPairSampleOverlap"))
    return;
struct edwQaPairSampleOverlap *sam;
AllocVar(sam);
sam->elderFileId = elderVf->fileId;
sam->youngerFileId = youngerVf->fileId;
sam->elderSampleBases = elderVf->basesInSample;
sam->youngerSampleBases = youngerVf->basesInSample;

/* Load up elder into genome range tree. */
struct bed3 *elderBedList = edwLoadSampleBed3(conn, elderVf);
struct genomeRangeTree *elderGrt = edwMakeGrtFromBed3List(elderBedList);

/* Load up younger as bed, and loop through to get overlap */
long long totalOverlap = 0;
struct bed3 *bed, *youngerBedList = edwLoadSampleBed3(conn, youngerVf);
for (bed = youngerBedList; bed != NULL; bed = bed->next)
    {
    int overlap = genomeRangeTreeOverlapSize(elderGrt, 
	bed->chrom, bed->chromStart, bed->chromEnd);
    totalOverlap += overlap;
    }
sam->sampleOverlapBases = totalOverlap;
setSampleSampleEnrichment(sam, format, assembly, elderVf, youngerVf);

/* Save to database, clean up, go home. */
edwQaPairSampleOverlapSaveToDb(conn, sam, "edwQaPairSampleOverlap", 128);
freez(&sam);
genomeRangeTreeFree(&elderGrt);
bed3FreeList(&elderBedList);
bed3FreeList(&youngerBedList);
}
Ejemplo n.º 5
0
void doBigBedReplicate(struct sqlConnection *conn, char *format, struct edwAssembly *assembly,
    struct edwFile *elderEf, struct edwValidFile *elderVf,
    struct edwFile *youngerEf, struct edwValidFile *youngerVf)
/* Do correlation analysis between elder and younger and save result to
 * a new edwQaPairCorrelation record. Do this for a format where we have a bigBed file. */
{
/* If got both pairs, work is done already */
if (pairExists(conn, elderEf->id, youngerEf->id, "edwQaPairSampleOverlap") 
    && pairExists(conn, elderEf->id, youngerEf->id, "edwQaPairCorrelation"))
    return;

int numColIx = 0;
if (sameString(format, "narrowPeak") || sameString(format, "broadPeak"))
    numColIx = 6;	// signalVal
else
    numColIx = 4;	// score
numColIx -= 3;		// Subtract off chrom/start/end
char *enrichedIn = elderVf->enrichedIn;
struct genomeRangeTree *targetGrt = NULL;
if (!isEmpty(enrichedIn) && !sameString(enrichedIn, "unknown"))
    targetGrt = genomeRangeTreeForTarget(conn, assembly, enrichedIn);

/* Get open big bed files for both younger and older. */
char *elderPath = edwPathForFileId(conn, elderEf->id);
char *youngerPath = edwPathForFileId(conn, youngerEf->id);
struct bbiFile *elderBbi = bigBedFileOpen(elderPath);
struct bbiFile *youngerBbi = bigBedFileOpen(youngerPath);

/* Loop through a chromosome at a time adding to correlation, and at the end save result in r.*/
struct correlate *c = correlateNew(), *cInEnriched = correlateNew();
struct bbiChromInfo *chrom, *chromList = bbiChromList(elderBbi);
long long elderTotalSpan = 0, youngerTotalSpan = 0, overlapTotalSpan = 0;
for (chrom = chromList; chrom != NULL; chrom = chrom->next)
    {
    addBbCorrelations(chrom, targetGrt, elderBbi, youngerBbi, numColIx, c, cInEnriched,
	&elderTotalSpan, &youngerTotalSpan, &overlapTotalSpan);
    }

/* Make up correlation structure and save. */
if (!pairExists(conn, elderEf->id, youngerEf->id, "edwQaPairCorrelation"))
    {
    struct edwQaPairCorrelation *cor;
    AllocVar(cor);
    cor->elderFileId = elderVf->fileId;
    cor->youngerFileId = youngerVf->fileId;
    cor->pearsonOverall = correlateResult(c);
    cor->pearsonInEnriched = correlateResult(cInEnriched);
    edwQaPairCorrelationSaveToDb(conn, cor, "edwQaPairCorrelation", 128);
    freez(&cor);
    }

/* Also make up sample structure and save.  */
if (!pairExists(conn, elderEf->id, youngerEf->id, "edwQaPairSampleOverlap"))
    {
    struct edwQaPairSampleOverlap *sam;
    AllocVar(sam);
    sam->elderFileId = elderVf->fileId;
    sam->youngerFileId = youngerVf->fileId;
    sam->elderSampleBases = elderTotalSpan;
    sam->youngerSampleBases = youngerTotalSpan;
    sam->sampleOverlapBases = overlapTotalSpan;
    setSampleSampleEnrichment(sam, format, assembly, elderVf, youngerVf);
    edwQaPairSampleOverlapSaveToDb(conn, sam, "edwQaPairSampleOverlap", 128);
    freez(&sam);
    }

genomeRangeTreeFree(&targetGrt);
correlateFree(&c);
bigBedFileClose(&youngerBbi);
bigBedFileClose(&elderBbi);
freez(&youngerPath);
freez(&elderPath);
}