struct edwAssembly *edwAssemblyForUcscDb(struct sqlConnection *conn, char *ucscDb) /* Get assembly for given UCSC ID or die trying */ { char query[256]; sqlSafef(query, sizeof(query), "select * from edwAssembly where ucscDb='%s'", ucscDb); struct edwAssembly *assembly = edwAssemblyLoadByQuery(conn, query); if (assembly == NULL) errAbort("Can't find assembly for %s", ucscDb); return assembly; }
void edwFixTargetSeq(char *when) /* edwFixTargetSeq - Fill in new fields about target seq to edwBamFile and edwAssembly.. */ { struct sqlConnection *conn = edwConnectReadWrite(); struct edwAssembly *as, *asList = edwAssemblyLoadByQuery(conn, "select * from edwAssembly"); char query[512]; for (as = asList; as != NULL; as = as->next) { char *twoBitFileName = edwPathForFileId(conn, as->twoBitId); struct twoBitFile *tbf = twoBitOpen(twoBitFileName); safef(query, sizeof(query), "update edwAssembly set seqCount=%u where id=%u", tbf->seqCount, as->id); sqlUpdate(conn, query); freez(&twoBitFileName); twoBitClose(&tbf); } edwAssemblyFreeList(&asList); struct edwBamFile *bam, *bamList = edwBamFileLoadByQuery(conn, "select * from edwBamFile"); for (bam = bamList; bam != NULL; bam = bam->next) { char *fileName = edwPathForFileId(conn, bam->fileId); samfile_t *sf = samopen(fileName, "rb", NULL); if (sf == NULL) errnoAbort("Couldn't open %s.\n", fileName); bam_header_t *head = sf->header; if (head == NULL) errAbort("Aborting ... Bad BAM header in file: %s", fileName); /* Sum up some target sizes. */ long long targetBaseCount = 0; /* Total size of all bases in target seq */ int i; for (i=0; i<head->n_targets; ++i) targetBaseCount += head->target_len[i]; safef(query, sizeof(query), "update edwBamFile set targetBaseCount=%lld,targetSeqCount=%u where id=%u", targetBaseCount, (unsigned)head->n_targets, bam->id); sqlUpdate(conn, query); samclose(sf); freez(&fileName); } }
void screenFastqForContaminants(struct sqlConnection *conn, struct edwFile *ef, struct edwValidFile *vf) /* The ef/vf point to same file, which is fastq format. Set alignments up for a sample against all * contamination targets. */ { /* Get target list and see if we have any work to do. */ struct edwQaContamTarget *target, *targetList; targetList = getContamTargets(conn, ef, vf); boolean needScreen = FALSE; for (target = targetList; target != NULL; target = target->next) { if (edwQaContamMade(conn, ef->id, target->id) <= 0) { needScreen = TRUE; break; } } if (needScreen) { verbose(1, "screenFastqForContaminants(%u(%s))\n", ef->id, ef->submitFileName); /* Get fastq record. */ struct edwFastqFile *fqf = edwFastqFileFromFileId(conn, ef->id); if (fqf == NULL) errAbort("No edwFastqFile record for file id %lld", (long long)ef->id); /* Create downsampled fastq in temp directory - downsampled more than default even. */ char sampleFastqName[PATH_LEN]; edwMakeTempFastqSample(fqf->sampleFileName, FASTQ_SAMPLE_SIZE, sampleFastqName); verbose(1, "downsampled %s into %s\n", vf->licensePlate, sampleFastqName); for (target = targetList; target != NULL; target = target->next) { /* Get assembly associated with target */ int assemblyId = target->assemblyId; char query[512]; sqlSafef(query, sizeof(query), "select * from edwAssembly where id=%d", assemblyId); struct edwAssembly *newAsm = edwAssemblyLoadByQuery(conn, query); if (newAsm == NULL) errAbort("warehouse edwQaContamTarget %d not found", assemblyId); /* If we don't already have a match, do work to create contam record. */ int matchCount = edwQaContamMade(conn, ef->id, target->id); if (matchCount <= 0) { /* We run the bed-file maker, just for side effect calcs. */ double mapRatio = 0, depth = 0, sampleCoverage = 0, uniqueMapRatio; edwAlignFastqMakeBed(ef, newAsm, sampleFastqName, vf, NULL, &mapRatio, &depth, &sampleCoverage, &uniqueMapRatio); verbose(1, "%s mapRatio %g, depth %g, sampleCoverage %g\n", newAsm->name, mapRatio, depth, sampleCoverage); struct edwQaContam contam = {.fileId=ef->id, .qaContamTargetId=target->id, .mapRatio = mapRatio}; edwQaContamSaveToDb(conn, &contam, "edwQaContam", 256); } edwAssemblyFree(&newAsm); } edwQaContamTargetFreeList(&targetList); if (keepTemp) verbose(1, "%s\n", sampleFastqName); else remove(sampleFastqName); edwFastqFileFree(&fqf); } }