示例#1
0
void doReplicateQa(struct sqlConnection *conn, struct edwFile *ef)
/* Try and do replicate level QA - find matching file and do correlation-like
 * things. */
{
/* Get validated file info.  If not validated we don't bother. */
struct edwValidFile *vf = edwValidFileFromFileId(conn, ef->id);
if (vf == NULL)
    return;

char *replicate = vf->replicate;
if (!isEmpty(replicate) && !sameString(replicate, "n/a") 
    && !sameString(replicate, "pooled")) // If expanding this, to expand bits in edwWebBrowse as well
    {
    /* Try to find other replicates of same experiment, format, and output type. */
    struct edwValidFile *elder, *elderList = edwFindElderReplicates(conn, vf);
    if (elderList != NULL)
	{
	struct edwAssembly *assembly = edwAssemblyForUcscDb(conn, vf->ucscDb);
	for (elder = elderList; elder != NULL; elder = elder->next)
	    {
	    doReplicatePair(conn, assembly, edwFileFromIdOrDie(conn, elder->fileId), elder, ef, vf);
	    }
	edwAssemblyFree(&assembly);
	}
    }
edwValidFileFree(&vf);
}
struct edwQaContamTarget *getContamTargets(struct sqlConnection *conn, 
    struct edwFile *ef, struct edwValidFile *vf)
/* Get list of contamination targets for file - basically all targets that aren't in same
 * taxon as self. */
{
assert(vf->ucscDb != NULL);
struct edwAssembly *origAsm = edwAssemblyForUcscDb(conn, vf->ucscDb);
assert(origAsm != NULL);
char query[256];
sqlSafef(query, sizeof(query), 
    "select edwQaContamTarget.* from edwQaContamTarget,edwAssembly "
    "where edwQaContamTarget.assemblyId = edwAssembly.id "
         " and edwAssembly.taxon != %d", origAsm->taxon);
struct edwQaContamTarget *targetList  = edwQaContamTargetLoadByQuery(conn, query);
edwAssemblyFree(&origAsm);
return targetList;
}
示例#3
0
void doEnrichments(struct sqlConnection *conn, struct edwFile *ef, char *path, 
    struct hash *assemblyToTarget)
/* Calculate enrichments on for all targets file. The targetList and the
 * grtList are in the same order. */
{
/* Get validFile from database. */
struct edwValidFile *vf = edwValidFileFromFileId(conn, ef->id);
if (vf == NULL)
    return;	/* We can only work if have validFile table entry */

if (!isEmpty(vf->enrichedIn))
    {
    /* Get our assembly */
    char *format = vf->format;
    char *ucscDb = vf->ucscDb;
    struct edwAssembly *assembly = edwAssemblyForUcscDb(conn, ucscDb);

    struct target *targetList = hashFindVal(assemblyToTarget, assembly->name);
    if (targetList == NULL)
	{
	targetList = targetsForAssembly(conn, assembly);
	if (targetList == NULL)
	    errAbort("No targets for assembly %s", assembly->name);
	hashAdd(assemblyToTarget, assembly->name, targetList);
	}

    /* Loop through targetList zeroing out existing ovelaps. */
    struct target *target;
    boolean allSkip = TRUE;
    for (target = targetList; target != NULL; target = target->next)
	{
	target->overlapBases = target->uniqOverlapBases = 0;
	target->skip = enrichmentExists(conn, ef, target->target);
	if (!target->skip)
	    allSkip = FALSE;
	}

    /* Do a big dispatch based on format. */
    if (!allSkip)
	{
	if (sameString(format, "fastq"))
	    doEnrichmentsFromSampleBed(conn, ef, vf, assembly, targetList);
	else if (sameString(format, "bigWig"))
	    doEnrichmentsFromBigWig(conn, ef, vf, assembly, targetList);
	else if (edwIsSupportedBigBedFormat(format))
	    doEnrichmentsFromBigBed(conn, ef, vf, assembly, targetList);
	else if (sameString(format, "gtf"))
	    doEnrichmentsFromSampleBed(conn, ef, vf, assembly, targetList);
	else if (sameString(format, "gff"))
	    doEnrichmentsFromSampleBed(conn, ef, vf, assembly, targetList);
	else if (sameString(format, "bam"))
	    doEnrichmentsFromSampleBed(conn, ef, vf, assembly, targetList);
	else if (sameString(format, "unknown"))
	    verbose(2, "Unknown format in doEnrichments(%s), that's chill.", ef->edwFileName);
	else
	    errAbort("Unrecognized format %s in doEnrichments(%s)", format, path);
	}

    /* Clean up and go home. */
    edwAssemblyFree(&assembly);
    }
edwValidFileFree(&vf);
}
void screenFastqForContaminants(struct sqlConnection *conn, 
    struct edwFile *ef, struct edwValidFile *vf)
/* The ef/vf point to same file, which is fastq format.  Set alignments up for a sample against all
 * contamination targets. */
{
/* Get target list and see if we have any work to do. */
struct edwQaContamTarget *target, *targetList;
targetList = getContamTargets(conn, ef, vf);
boolean needScreen = FALSE;
for (target = targetList; target != NULL; target = target->next)
    {
    if (edwQaContamMade(conn, ef->id, target->id) <= 0)
        {
	needScreen = TRUE;
	break;
	}
    }

if (needScreen)
    {
    verbose(1, "screenFastqForContaminants(%u(%s))\n", ef->id, ef->submitFileName);

    /* Get fastq record. */
    struct edwFastqFile *fqf = edwFastqFileFromFileId(conn, ef->id);
    if (fqf == NULL)
        errAbort("No edwFastqFile record for file id %lld", (long long)ef->id);

    /* Create downsampled fastq in temp directory - downsampled more than default even. */
    char sampleFastqName[PATH_LEN];
    edwMakeTempFastqSample(fqf->sampleFileName, FASTQ_SAMPLE_SIZE, sampleFastqName);
    verbose(1, "downsampled %s into %s\n", vf->licensePlate, sampleFastqName);

    for (target = targetList; target != NULL; target = target->next)
	{
	/* Get assembly associated with target */
	int assemblyId = target->assemblyId;
	char query[512];
	sqlSafef(query, sizeof(query), "select * from edwAssembly where id=%d", assemblyId);
	struct edwAssembly *newAsm = edwAssemblyLoadByQuery(conn, query);
	if (newAsm == NULL)
	    errAbort("warehouse edwQaContamTarget %d not found", assemblyId);

	/* If we don't already have a match, do work to create contam record. */
	int matchCount = edwQaContamMade(conn, ef->id, target->id);
	if (matchCount <= 0)
	    {
	    /* We run the bed-file maker, just for side effect calcs. */
	    double mapRatio = 0, depth = 0, sampleCoverage = 0, uniqueMapRatio;
	    edwAlignFastqMakeBed(ef, newAsm, sampleFastqName, vf, NULL,
		&mapRatio, &depth, &sampleCoverage, &uniqueMapRatio);

	    verbose(1, "%s mapRatio %g, depth %g, sampleCoverage %g\n", 
		newAsm->name, mapRatio, depth, sampleCoverage);
	    struct edwQaContam contam = 
		    {.fileId=ef->id, .qaContamTargetId=target->id, .mapRatio = mapRatio};
	    edwQaContamSaveToDb(conn, &contam, "edwQaContam", 256);
	    }
	edwAssemblyFree(&newAsm);
	}
    edwQaContamTargetFreeList(&targetList);
    if (keepTemp)
        verbose(1, "%s\n", sampleFastqName);
    else
	remove(sampleFastqName);
    edwFastqFileFree(&fqf);
    }
}