void fastqRepeatQa(struct sqlConnection *conn, struct edwFile *ef, struct edwValidFile *vf) /* Do repeat QA if possible on fastq file. */ { /* First see if total repeat content is already in our table, in which case we are done. */ long long fileId = ef->id; char query[512]; sqlSafef(query, sizeof(query), "select count(*) from edwQaRepeat where fileId=%lld and repeatClass='total'" , fileId); if (sqlQuickNum(conn, query) != 0) return; /* We've done this already */ /* Get sample file name from fastq table. */ struct edwFastqFile *fqf = edwFastqFileForFileId(conn, fileId); if (fqf == NULL) errAbort("No edqFastqRecord for %s", vf->licensePlate); char *fastqPath = fqf->sampleFileName; char bwaIndex[PATH_LEN]; safef(bwaIndex, sizeof(bwaIndex), "%s%s/repeatMasker/repeatMasker.fa", edwValDataDir, vf->ucscDb); char cmd[3*PATH_LEN]; char *saiName = cloneString(rTempName(edwTempDir(), "edwQaRepeat", ".sai")); safef(cmd, sizeof(cmd), "bwa aln %s %s > %s", bwaIndex, fastqPath, saiName); mustSystem(cmd); char *samName = cloneString(rTempName(edwTempDir(), "edwQaRepeat", ".sam")); safef(cmd, sizeof(cmd), "bwa samse %s %s %s > %s", bwaIndex, saiName, fastqPath, samName); mustSystem(cmd); remove(saiName); char *raName = cloneString(rTempName(edwTempDir(), "edwQaRepeat", ".ra")); safef(cmd, sizeof(cmd), "edwSamRepeatAnalysis %s %s", samName, raName); mustSystem(cmd); verbose(2, "mustSystem(%s)\n", cmd); remove(samName); raIntoEdwRepeatQa(raName, conn, fileId); remove(raName); #ifdef SOON #endif /* SOON */ freez(&saiName); freez(&samName); freez(&raName); edwFastqFileFree(&fqf); }
void screenFastqForContaminants(struct sqlConnection *conn, struct edwFile *ef, struct edwValidFile *vf) /* The ef/vf point to same file, which is fastq format. Set alignments up for a sample against all * contamination targets. */ { /* Get target list and see if we have any work to do. */ struct edwQaContamTarget *target, *targetList; targetList = getContamTargets(conn, ef, vf); boolean needScreen = FALSE; for (target = targetList; target != NULL; target = target->next) { if (edwQaContamMade(conn, ef->id, target->id) <= 0) { needScreen = TRUE; break; } } if (needScreen) { verbose(1, "screenFastqForContaminants(%u(%s))\n", ef->id, ef->submitFileName); /* Get fastq record. */ struct edwFastqFile *fqf = edwFastqFileFromFileId(conn, ef->id); if (fqf == NULL) errAbort("No edwFastqFile record for file id %lld", (long long)ef->id); /* Create downsampled fastq in temp directory - downsampled more than default even. */ char sampleFastqName[PATH_LEN]; edwMakeTempFastqSample(fqf->sampleFileName, FASTQ_SAMPLE_SIZE, sampleFastqName); verbose(1, "downsampled %s into %s\n", vf->licensePlate, sampleFastqName); for (target = targetList; target != NULL; target = target->next) { /* Get assembly associated with target */ int assemblyId = target->assemblyId; char query[512]; sqlSafef(query, sizeof(query), "select * from edwAssembly where id=%d", assemblyId); struct edwAssembly *newAsm = edwAssemblyLoadByQuery(conn, query); if (newAsm == NULL) errAbort("warehouse edwQaContamTarget %d not found", assemblyId); /* If we don't already have a match, do work to create contam record. */ int matchCount = edwQaContamMade(conn, ef->id, target->id); if (matchCount <= 0) { /* We run the bed-file maker, just for side effect calcs. */ double mapRatio = 0, depth = 0, sampleCoverage = 0, uniqueMapRatio; edwAlignFastqMakeBed(ef, newAsm, sampleFastqName, vf, NULL, &mapRatio, &depth, &sampleCoverage, &uniqueMapRatio); verbose(1, "%s mapRatio %g, depth %g, sampleCoverage %g\n", newAsm->name, mapRatio, depth, sampleCoverage); struct edwQaContam contam = {.fileId=ef->id, .qaContamTargetId=target->id, .mapRatio = mapRatio}; edwQaContamSaveToDb(conn, &contam, "edwQaContam", 256); } edwAssemblyFree(&newAsm); } edwQaContamTargetFreeList(&targetList); if (keepTemp) verbose(1, "%s\n", sampleFastqName); else remove(sampleFastqName); edwFastqFileFree(&fqf); } }