Пример #1
0
void bigWigCorrelateList(char *listFile)
/* Correlate all files in list to each other */
{
char **fileNames = NULL;
int fileCount = 0;
char *buf = NULL;
readAllWords(listFile, &fileNames, &fileCount, &buf);
int i;
for (i=0; i<fileCount; ++i)
    {
    char *aPath = fileNames[i];
    char aName[FILENAME_LEN];
    if (rootNames)
	splitPath(aPath, NULL, aName, NULL);
    else
        safef(aName, sizeof(aName), "%s", aPath);
    int j;
    for (j=i+1; j<fileCount; ++j)
        {
	char *bPath = fileNames[j];
	char bName[FILENAME_LEN];
	if (rootNames)
	    splitPath(bPath, NULL, bName, NULL);
	else
	    safef(bName, sizeof(bName), "%s", bPath);
	struct correlate *c = bigWigCorrelate(aPath, bPath);
	printf("%s\t%s\t%g\n", aName, bName, correlateResult(c));
	correlateFree(&c);
	}
    }
}
Пример #2
0
void doBigWigReplicate(struct sqlConnection *conn, struct edwAssembly *assembly,
    struct edwFile *elderEf, struct edwValidFile *elderVf,
    struct edwFile *youngerEf, struct edwValidFile *youngerVf)
/* Do correlation analysis between elder and younger and save result to
 * a new edwQaPairCorrelation record. Do this for a format where we have a bigWig file. */
{
if (pairExists(conn, elderEf->id, youngerEf->id, "edwQaPairCorrelation"))
    return;
char *enrichedIn = elderVf->enrichedIn;
if (!isEmpty(enrichedIn) && !sameString(enrichedIn, "unknown"))
    {
    struct genomeRangeTree *targetGrt = genomeRangeTreeForTarget(conn, assembly, enrichedIn);

    /* Get open big wig files for both younger and older. */
    char *elderPath = edwPathForFileId(conn, elderEf->id);
    char *youngerPath = edwPathForFileId(conn, youngerEf->id);
    struct bbiFile *elderBbi = bigWigFileOpen(elderPath);
    struct bbiFile *youngerBbi = bigWigFileOpen(youngerPath);

    /* Figure out thresholds */
    double elderThreshold = twoStdsOverMean(elderBbi);
    double youngerThreshold = twoStdsOverMean(youngerBbi);

    /* Loop through a chromosome at a time adding to correlation, and at the end save result in r.*/
    struct correlate *c = correlateNew(), *cInEnriched = correlateNew(), *cClipped = correlateNew();
    struct bbiChromInfo *chrom, *chromList = bbiChromList(elderBbi);
    struct bigWigValsOnChrom *aVals = bigWigValsOnChromNew();
    struct bigWigValsOnChrom *bVals = bigWigValsOnChromNew();
    for (chrom = chromList; chrom != NULL; chrom = chrom->next)
        {
	addBwCorrelations(chrom, targetGrt, aVals, bVals, elderBbi, youngerBbi, 
	    elderThreshold, youngerThreshold, c, cInEnriched, cClipped);
	}

    /* Make up correlation structure . */
    struct edwQaPairCorrelation *cor;
    AllocVar(cor);
    cor->elderFileId = elderVf->fileId;
    cor->youngerFileId = youngerVf->fileId;
    cor->pearsonOverall = correlateResult(c);
    cor->pearsonInEnriched = correlateResult(cInEnriched);
    cor->pearsonClipped = correlateResult(cClipped);
    edwQaPairCorrelationSaveToDb(conn, cor, "edwQaPairCorrelation", 128);


    bigWigValsOnChromFree(&bVals);
    bigWigValsOnChromFree(&aVals);
    genomeRangeTreeFree(&targetGrt);
    freez(&cor);
    correlateFree(&c);
    bigWigFileClose(&youngerBbi);
    bigWigFileClose(&elderBbi);
    freez(&youngerPath);
    freez(&elderPath);
    }
}
Пример #3
0
double correlateArrays(double *x, double *y, int size)
/* Return correlation of two arrays of doubles. */
{
struct correlate *c = correlateNew();
double r;
int i;
for (i=0; i<size; ++i)
     correlateNext(c, x[i], y[i]);
r = correlateResult(c);
correlateFree(&c);
return r;
}
Пример #4
0
double correlatePair(struct metaWig *a, struct metaWig *b)
/* Correlate a pair of metaWigs. */
{
struct correlate *c = correlateNew();
struct slName *chrom, *chromList = metaWigChromList(a);
for (chrom = chromList; chrom != NULL; chrom = chrom->next)
    {
    struct lm *lm = lmInit(0);
    struct bbiInterval *aList = metaIntervalsForChrom(a, chrom->name, lm);
    struct bbiInterval *bList = metaIntervalsForChrom(b, chrom->name, lm);
    verbose(2, "%s a(%d) b(%d)\n", chrom->name, slCount(aList), slCount(bList));
    sortedApplyOverlapping(aList, bList, slListNextWrapper, 
	    bbiIntervalCmpEnd, bbiIntervalOverlap, bbiIntervalCorrelatePairWrapper, c);
    lmCleanup(&lm);
    }
slFreeList(&chromList);
double result = correlateResult(c);
verbose(2, "correlate: r %g, sumX %g, sumY %g, n %lld\n", result, c->sumX, c->sumY, c->n);
correlateFree(&c);
return result;
}
Пример #5
0
double chromGraphBinCorrelate(char *aFile, char *bFile)
/* Do correlation between two graphs.  */
{
struct chromGraphBin *a = chromGraphBinOpen(aFile);
struct chromGraphBin *b = chromGraphBinOpen(bFile);
struct cgbChrom *chrom;
struct correlate *c = correlateNew();
double r;

for (chrom = a->chromList; chrom != NULL; chrom = chrom->next)
    {
    chromGraphBinSeekToChrom(a, chrom->name);
    if (chromGraphBinSeekToChrom(b, chrom->name))
        {
	correlateChrom(a, b, c);
	}
    }
r = correlateResult(c);
correlateFree(&c);
return r;
}
Пример #6
0
void doBigBedReplicate(struct sqlConnection *conn, char *format, struct edwAssembly *assembly,
    struct edwFile *elderEf, struct edwValidFile *elderVf,
    struct edwFile *youngerEf, struct edwValidFile *youngerVf)
/* Do correlation analysis between elder and younger and save result to
 * a new edwQaPairCorrelation record. Do this for a format where we have a bigBed file. */
{
/* If got both pairs, work is done already */
if (pairExists(conn, elderEf->id, youngerEf->id, "edwQaPairSampleOverlap") 
    && pairExists(conn, elderEf->id, youngerEf->id, "edwQaPairCorrelation"))
    return;

int numColIx = 0;
if (sameString(format, "narrowPeak") || sameString(format, "broadPeak"))
    numColIx = 6;	// signalVal
else
    numColIx = 4;	// score
numColIx -= 3;		// Subtract off chrom/start/end
char *enrichedIn = elderVf->enrichedIn;
struct genomeRangeTree *targetGrt = NULL;
if (!isEmpty(enrichedIn) && !sameString(enrichedIn, "unknown"))
    targetGrt = genomeRangeTreeForTarget(conn, assembly, enrichedIn);

/* Get open big bed files for both younger and older. */
char *elderPath = edwPathForFileId(conn, elderEf->id);
char *youngerPath = edwPathForFileId(conn, youngerEf->id);
struct bbiFile *elderBbi = bigBedFileOpen(elderPath);
struct bbiFile *youngerBbi = bigBedFileOpen(youngerPath);

/* Loop through a chromosome at a time adding to correlation, and at the end save result in r.*/
struct correlate *c = correlateNew(), *cInEnriched = correlateNew();
struct bbiChromInfo *chrom, *chromList = bbiChromList(elderBbi);
long long elderTotalSpan = 0, youngerTotalSpan = 0, overlapTotalSpan = 0;
for (chrom = chromList; chrom != NULL; chrom = chrom->next)
    {
    addBbCorrelations(chrom, targetGrt, elderBbi, youngerBbi, numColIx, c, cInEnriched,
	&elderTotalSpan, &youngerTotalSpan, &overlapTotalSpan);
    }

/* Make up correlation structure and save. */
if (!pairExists(conn, elderEf->id, youngerEf->id, "edwQaPairCorrelation"))
    {
    struct edwQaPairCorrelation *cor;
    AllocVar(cor);
    cor->elderFileId = elderVf->fileId;
    cor->youngerFileId = youngerVf->fileId;
    cor->pearsonOverall = correlateResult(c);
    cor->pearsonInEnriched = correlateResult(cInEnriched);
    edwQaPairCorrelationSaveToDb(conn, cor, "edwQaPairCorrelation", 128);
    freez(&cor);
    }

/* Also make up sample structure and save.  */
if (!pairExists(conn, elderEf->id, youngerEf->id, "edwQaPairSampleOverlap"))
    {
    struct edwQaPairSampleOverlap *sam;
    AllocVar(sam);
    sam->elderFileId = elderVf->fileId;
    sam->youngerFileId = youngerVf->fileId;
    sam->elderSampleBases = elderTotalSpan;
    sam->youngerSampleBases = youngerTotalSpan;
    sam->sampleOverlapBases = overlapTotalSpan;
    setSampleSampleEnrichment(sam, format, assembly, elderVf, youngerVf);
    edwQaPairSampleOverlapSaveToDb(conn, sam, "edwQaPairSampleOverlap", 128);
    freez(&sam);
    }

genomeRangeTreeFree(&targetGrt);
correlateFree(&c);
bigBedFileClose(&youngerBbi);
bigBedFileClose(&elderBbi);
freez(&youngerPath);
freez(&elderPath);
}