void doReplicateQa(struct sqlConnection *conn, struct edwFile *ef) /* Try and do replicate level QA - find matching file and do correlation-like * things. */ { /* Get validated file info. If not validated we don't bother. */ struct edwValidFile *vf = edwValidFileFromFileId(conn, ef->id); if (vf == NULL) return; char *replicate = vf->replicate; if (!isEmpty(replicate) && !sameString(replicate, "n/a") && !sameString(replicate, "pooled")) // If expanding this, to expand bits in edwWebBrowse as well { /* Try to find other replicates of same experiment, format, and output type. */ struct edwValidFile *elder, *elderList = edwFindElderReplicates(conn, vf); if (elderList != NULL) { struct edwAssembly *assembly = edwAssemblyForUcscDb(conn, vf->ucscDb); for (elder = elderList; elder != NULL; elder = elder->next) { doReplicatePair(conn, assembly, edwFileFromIdOrDie(conn, elder->fileId), elder, ef, vf); } edwAssemblyFree(&assembly); } } edwValidFileFree(&vf); }
struct edwQaContamTarget *getContamTargets(struct sqlConnection *conn, struct edwFile *ef, struct edwValidFile *vf) /* Get list of contamination targets for file - basically all targets that aren't in same * taxon as self. */ { assert(vf->ucscDb != NULL); struct edwAssembly *origAsm = edwAssemblyForUcscDb(conn, vf->ucscDb); assert(origAsm != NULL); char query[256]; sqlSafef(query, sizeof(query), "select edwQaContamTarget.* from edwQaContamTarget,edwAssembly " "where edwQaContamTarget.assemblyId = edwAssembly.id " " and edwAssembly.taxon != %d", origAsm->taxon); struct edwQaContamTarget *targetList = edwQaContamTargetLoadByQuery(conn, query); edwAssemblyFree(&origAsm); return targetList; }
void doEnrichments(struct sqlConnection *conn, struct edwFile *ef, char *path, struct hash *assemblyToTarget) /* Calculate enrichments on for all targets file. The targetList and the * grtList are in the same order. */ { /* Get validFile from database. */ struct edwValidFile *vf = edwValidFileFromFileId(conn, ef->id); if (vf == NULL) return; /* We can only work if have validFile table entry */ if (!isEmpty(vf->enrichedIn)) { /* Get our assembly */ char *format = vf->format; char *ucscDb = vf->ucscDb; struct edwAssembly *assembly = edwAssemblyForUcscDb(conn, ucscDb); struct target *targetList = hashFindVal(assemblyToTarget, assembly->name); if (targetList == NULL) { targetList = targetsForAssembly(conn, assembly); if (targetList == NULL) errAbort("No targets for assembly %s", assembly->name); hashAdd(assemblyToTarget, assembly->name, targetList); } /* Loop through targetList zeroing out existing ovelaps. */ struct target *target; boolean allSkip = TRUE; for (target = targetList; target != NULL; target = target->next) { target->overlapBases = target->uniqOverlapBases = 0; target->skip = enrichmentExists(conn, ef, target->target); if (!target->skip) allSkip = FALSE; } /* Do a big dispatch based on format. */ if (!allSkip) { if (sameString(format, "fastq")) doEnrichmentsFromSampleBed(conn, ef, vf, assembly, targetList); else if (sameString(format, "bigWig")) doEnrichmentsFromBigWig(conn, ef, vf, assembly, targetList); else if (edwIsSupportedBigBedFormat(format)) doEnrichmentsFromBigBed(conn, ef, vf, assembly, targetList); else if (sameString(format, "gtf")) doEnrichmentsFromSampleBed(conn, ef, vf, assembly, targetList); else if (sameString(format, "gff")) doEnrichmentsFromSampleBed(conn, ef, vf, assembly, targetList); else if (sameString(format, "bam")) doEnrichmentsFromSampleBed(conn, ef, vf, assembly, targetList); else if (sameString(format, "unknown")) verbose(2, "Unknown format in doEnrichments(%s), that's chill.", ef->edwFileName); else errAbort("Unrecognized format %s in doEnrichments(%s)", format, path); } /* Clean up and go home. */ edwAssemblyFree(&assembly); } edwValidFileFree(&vf); }
void screenFastqForContaminants(struct sqlConnection *conn, struct edwFile *ef, struct edwValidFile *vf) /* The ef/vf point to same file, which is fastq format. Set alignments up for a sample against all * contamination targets. */ { /* Get target list and see if we have any work to do. */ struct edwQaContamTarget *target, *targetList; targetList = getContamTargets(conn, ef, vf); boolean needScreen = FALSE; for (target = targetList; target != NULL; target = target->next) { if (edwQaContamMade(conn, ef->id, target->id) <= 0) { needScreen = TRUE; break; } } if (needScreen) { verbose(1, "screenFastqForContaminants(%u(%s))\n", ef->id, ef->submitFileName); /* Get fastq record. */ struct edwFastqFile *fqf = edwFastqFileFromFileId(conn, ef->id); if (fqf == NULL) errAbort("No edwFastqFile record for file id %lld", (long long)ef->id); /* Create downsampled fastq in temp directory - downsampled more than default even. */ char sampleFastqName[PATH_LEN]; edwMakeTempFastqSample(fqf->sampleFileName, FASTQ_SAMPLE_SIZE, sampleFastqName); verbose(1, "downsampled %s into %s\n", vf->licensePlate, sampleFastqName); for (target = targetList; target != NULL; target = target->next) { /* Get assembly associated with target */ int assemblyId = target->assemblyId; char query[512]; sqlSafef(query, sizeof(query), "select * from edwAssembly where id=%d", assemblyId); struct edwAssembly *newAsm = edwAssemblyLoadByQuery(conn, query); if (newAsm == NULL) errAbort("warehouse edwQaContamTarget %d not found", assemblyId); /* If we don't already have a match, do work to create contam record. */ int matchCount = edwQaContamMade(conn, ef->id, target->id); if (matchCount <= 0) { /* We run the bed-file maker, just for side effect calcs. */ double mapRatio = 0, depth = 0, sampleCoverage = 0, uniqueMapRatio; edwAlignFastqMakeBed(ef, newAsm, sampleFastqName, vf, NULL, &mapRatio, &depth, &sampleCoverage, &uniqueMapRatio); verbose(1, "%s mapRatio %g, depth %g, sampleCoverage %g\n", newAsm->name, mapRatio, depth, sampleCoverage); struct edwQaContam contam = {.fileId=ef->id, .qaContamTargetId=target->id, .mapRatio = mapRatio}; edwQaContamSaveToDb(conn, &contam, "edwQaContam", 256); } edwAssemblyFree(&newAsm); } edwQaContamTargetFreeList(&targetList); if (keepTemp) verbose(1, "%s\n", sampleFastqName); else remove(sampleFastqName); edwFastqFileFree(&fqf); } }