void doReplicateQa(struct sqlConnection *conn, struct cdwFile *ef) /* Try and do replicate level QA - find matching file and do correlation-like * things. */ { /* Get validated file info. If not validated we don't bother. */ struct cdwValidFile *vf = cdwValidFileFromFileId(conn, ef->id); if (vf == NULL) return; char *replicate = vf->replicate; if (!isEmpty(replicate) && !sameString(replicate, "n/a") && !sameString(replicate, "pooled")) // If expanding this, to expand bits in cdwWebBrowse as well { /* Try to find other replicates of same experiment, format, and output type. */ struct cdwValidFile *elder, *elderList = cdwFindElderReplicates(conn, vf); if (elderList != NULL) { char *targetDb = cdwSimpleAssemblyName(vf->ucscDb); struct cdwAssembly *assembly = cdwAssemblyForUcscDb(conn, targetDb); for (elder = elderList; elder != NULL; elder = elder->next) { if (sameString(targetDb, cdwSimpleAssemblyName(elder->ucscDb))) doReplicatePair(conn, assembly, cdwFileFromIdOrDie(conn, elder->fileId), elder, ef, vf); } cdwAssemblyFree(&assembly); } } cdwValidFileFree(&vf); }
void cdwMakeRepeatQa(int startFileId, int endFileId) /* cdwMakeRepeatQa - Figure out what proportion of things align to repeats.. */ { struct sqlConnection *conn = cdwConnectReadWrite(); struct cdwFile *ef, *efList = cdwFileAllIntactBetween(conn, startFileId, endFileId); for (ef = efList; ef != NULL; ef = ef->next) { struct cdwValidFile *vf = cdwValidFileFromFileId(conn, ef->id); if (vf != NULL) { if (sameString(vf->format, "fastq")) fastqRepeatQa(conn, ef, vf); } } sqlDisconnect(&conn); }
void cdwMakePairedEndQa(unsigned startId, unsigned endId) /* cdwMakePairedEndQa - Do alignments of paired-end fastq files and calculate distrubution of * insert size. */ { struct sqlConnection *conn = cdwConnectReadWrite(); struct cdwFile *ef, *efList = cdwFileAllIntactBetween(conn, startId, endId); for (ef = efList; ef != NULL; ef = ef->next) { struct cdwValidFile *vf = cdwValidFileFromFileId(conn, ef->id); if (vf != NULL) { if (sameString(vf->format, "fastq") && !isEmpty(vf->pairedEnd)) pairedEndQa(conn, ef, vf); } } sqlDisconnect(&conn); }
void cdwFakeManifestFromSubmit(char *submitIdString, char *outDir) /* cdwFakeManifestFromSubmit - Create a fake submission based on a real one that is in the warehouse. */ { struct sqlConnection *conn = cdwConnect(); char query[512]; sqlSafef(query, sizeof(query), "select * from cdwSubmit where id=%s", submitIdString); struct cdwSubmit *submit = cdwSubmitLoadByQuery(conn, query); if (submit == NULL) errAbort("Can't find submission %s", submitIdString); uglyf("%d files in query\n", submit->newFiles); sqlSafef(query, sizeof(query), "select * from cdwFile where submitId=%s", submitIdString); struct cdwFile *ef, *efList = cdwFileLoadByQuery(conn, query); FILE *maniF = NULL, *valiF = NULL; for (ef = efList; ef != NULL; ef = ef->next) { struct cdwValidFile *vf = cdwValidFileFromFileId(conn, ef->id); if (vf != NULL) { /* First time through create out directory and open output files. */ if (maniF == NULL) { char *fakeVersion = "##validateManifest version 1.7"; makeDirsOnPath(outDir); setCurrentDir(outDir); maniF = mustOpen("manifest.txt", "w"); printSharedHeader(maniF); fprintf(maniF, "\n"); fprintf(maniF, "%s\n", fakeVersion); valiF = mustOpen("validated.txt", "w"); printSharedHeader(valiF); fprintf(valiF, "\tmd5_sum\tsize\tmodified\tvalid_key\n"); fprintf(valiF, "%s\n", fakeVersion); } /* Figure out file names */ char cdwPath[PATH_LEN], rootName[FILENAME_LEN], ext[FILEEXT_LEN]; safef(cdwPath, sizeof(cdwPath), "%s%s", cdwRootDir, ef->cdwFileName); splitPath(ef->cdwFileName, NULL, rootName, ext); char localPath[PATH_LEN]; safef(localPath, sizeof(localPath), "%s%s", rootName, ext); /* Create sym-linked file and write to manifest */ symlink(cdwPath, localPath); fprint2(maniF, valiF, "%s", localPath); /* Write other columns shared between manifest and validated */ fprint2(maniF, valiF, "\t%s", vf->format); fprint2(maniF, valiF, "\t%s", naForEmpty(vf->outputType)); fprint2(maniF, valiF, "\t%s", naForEmpty(vf->experiment)); fprint2(maniF, valiF, "\t%s", naForEmpty(vf->enrichedIn)); fprint2(maniF, valiF, "\t%s", naForEmpty(vf->ucscDb)); fprint2(maniF, valiF, "\t%s", naForEmpty(vf->replicate)); fprint2(maniF, valiF, "\t%s", naForEmpty(vf->part)); fprint2(maniF, valiF, "\t%s", naForEmpty(vf->pairedEnd)); fprintf(maniF, "\n"); /* Print out remaining fields in validated.txt */ fprintf(valiF, "\t%s\t%lld\t%lld\n", ef->md5, ef->size, ef->updateTime); } } carefulClose(&maniF); carefulClose(&valiF); }
void doEnrichments(struct sqlConnection *conn, struct cdwFile *ef, char *path, struct hash *assemblyToTarget) /* Calculate enrichments on for all targets file. The targetList and the * grtList are in the same order. */ { /* Get validFile from database. */ struct cdwValidFile *vf = cdwValidFileFromFileId(conn, ef->id); if (vf == NULL) return; /* We can only work if have validFile table entry */ if (!isEmpty(vf->enrichedIn) && !sameWord(vf->ucscDb, "unknown") && !isEmpty(vf->ucscDb) && !sameWord(vf->format, "unknown")) { /* Get our assembly */ char *format = vf->format; char *ucscDb = vf->ucscDb; char *targetName = cdwSimpleAssemblyName(ucscDb); struct cdwAssembly *assembly = cdwAssemblyForUcscDb(conn, targetName); struct target *targetList = hashFindVal(assemblyToTarget, assembly->name); if (targetList == NULL) { targetList = targetsForAssembly(conn, assembly); if (targetList == NULL) errAbort("No targets for assembly %s", assembly->name); hashAdd(assemblyToTarget, assembly->name, targetList); } /* Loop through targetList zeroing out existing ovelaps. */ struct target *target; boolean allSkip = TRUE; for (target = targetList; target != NULL; target = target->next) { target->overlapBases = target->uniqOverlapBases = 0; target->skip = enrichmentExists(conn, ef, target->target); if (!target->skip) allSkip = FALSE; } /* Do a big dispatch based on format. */ if (!allSkip) { if (sameString(format, "fastq")) doEnrichmentsFromSampleBed(conn, ef, vf, assembly, targetList); else if (sameString(format, "bigWig")) doEnrichmentsFromBigWig(conn, ef, vf, assembly, targetList); else if (startsWith("bed_", format)) doEnrichmentsFromBed(conn, ef, vf, assembly, targetList); else if (cdwIsSupportedBigBedFormat(format) || sameString(format, "bigBed")) doEnrichmentsFromBigBed(conn, ef, vf, assembly, targetList); else if (sameString(format, "gtf")) doEnrichmentsFromSampleBed(conn, ef, vf, assembly, targetList); else if (sameString(format, "gff")) doEnrichmentsFromSampleBed(conn, ef, vf, assembly, targetList); else if (sameString(format, "bam")) doEnrichmentsFromSampleBed(conn, ef, vf, assembly, targetList); else if (sameString(format, "vcf")) doEnrichmentsFromSampleBed(conn, ef, vf, assembly, targetList); else if (sameString(format, "idat")) verbose(2, "Ignoring idat %s, in doEnrichments.", ef->cdwFileName); else if (sameString(format, "customTrack")) verbose(2, "Ignoring customTrack %s, in doEnrichments.", ef->cdwFileName); else if (sameString(format, "rcc")) verbose(2, "Ignoring rcc %s, in doEnrichments.", ef->cdwFileName); else if (sameString(format, "bam.bai")) verbose(2, "Ignoring bam.bai %s, in doEnrichments - just and index file.", ef->cdwFileName); else if (sameString(format, "vcf.gz.tbi")) verbose(2, "Ignoring vcf.gz.tbi %s, in doEnrichments - just and index file.", ef->cdwFileName); else if (sameString(format, "unknown")) verbose(2, "Unknown format in doEnrichments(%s), that's ok.", ef->cdwFileName); else errAbort("Unrecognized format %s in doEnrichments(%s)", format, path); } /* Clean up and go home. */ cdwAssemblyFree(&assembly); } cdwValidFileFree(&vf); }