void doReplicateQa(struct sqlConnection *conn, struct cdwFile *ef) /* Try and do replicate level QA - find matching file and do correlation-like * things. */ { /* Get validated file info. If not validated we don't bother. */ struct cdwValidFile *vf = cdwValidFileFromFileId(conn, ef->id); if (vf == NULL) return; char *replicate = vf->replicate; if (!isEmpty(replicate) && !sameString(replicate, "n/a") && !sameString(replicate, "pooled")) // If expanding this, to expand bits in cdwWebBrowse as well { /* Try to find other replicates of same experiment, format, and output type. */ struct cdwValidFile *elder, *elderList = cdwFindElderReplicates(conn, vf); if (elderList != NULL) { char *targetDb = cdwSimpleAssemblyName(vf->ucscDb); struct cdwAssembly *assembly = cdwAssemblyForUcscDb(conn, targetDb); for (elder = elderList; elder != NULL; elder = elder->next) { if (sameString(targetDb, cdwSimpleAssemblyName(elder->ucscDb))) doReplicatePair(conn, assembly, cdwFileFromIdOrDie(conn, elder->fileId), elder, ef, vf); } cdwAssemblyFree(&assembly); } } cdwValidFileFree(&vf); }
void doEnrichments(struct sqlConnection *conn, struct cdwFile *ef, char *path, struct hash *assemblyToTarget) /* Calculate enrichments on for all targets file. The targetList and the * grtList are in the same order. */ { /* Get validFile from database. */ struct cdwValidFile *vf = cdwValidFileFromFileId(conn, ef->id); if (vf == NULL) return; /* We can only work if have validFile table entry */ if (!isEmpty(vf->enrichedIn) && !sameWord(vf->ucscDb, "unknown") && !isEmpty(vf->ucscDb) && !sameWord(vf->format, "unknown")) { /* Get our assembly */ char *format = vf->format; char *ucscDb = vf->ucscDb; char *targetName = cdwSimpleAssemblyName(ucscDb); struct cdwAssembly *assembly = cdwAssemblyForUcscDb(conn, targetName); struct target *targetList = hashFindVal(assemblyToTarget, assembly->name); if (targetList == NULL) { targetList = targetsForAssembly(conn, assembly); if (targetList == NULL) errAbort("No targets for assembly %s", assembly->name); hashAdd(assemblyToTarget, assembly->name, targetList); } /* Loop through targetList zeroing out existing ovelaps. */ struct target *target; boolean allSkip = TRUE; for (target = targetList; target != NULL; target = target->next) { target->overlapBases = target->uniqOverlapBases = 0; target->skip = enrichmentExists(conn, ef, target->target); if (!target->skip) allSkip = FALSE; } /* Do a big dispatch based on format. */ if (!allSkip) { if (sameString(format, "fastq")) doEnrichmentsFromSampleBed(conn, ef, vf, assembly, targetList); else if (sameString(format, "bigWig")) doEnrichmentsFromBigWig(conn, ef, vf, assembly, targetList); else if (startsWith("bed_", format)) doEnrichmentsFromBed(conn, ef, vf, assembly, targetList); else if (cdwIsSupportedBigBedFormat(format) || sameString(format, "bigBed")) doEnrichmentsFromBigBed(conn, ef, vf, assembly, targetList); else if (sameString(format, "gtf")) doEnrichmentsFromSampleBed(conn, ef, vf, assembly, targetList); else if (sameString(format, "gff")) doEnrichmentsFromSampleBed(conn, ef, vf, assembly, targetList); else if (sameString(format, "bam")) doEnrichmentsFromSampleBed(conn, ef, vf, assembly, targetList); else if (sameString(format, "vcf")) doEnrichmentsFromSampleBed(conn, ef, vf, assembly, targetList); else if (sameString(format, "idat")) verbose(2, "Ignoring idat %s, in doEnrichments.", ef->cdwFileName); else if (sameString(format, "customTrack")) verbose(2, "Ignoring customTrack %s, in doEnrichments.", ef->cdwFileName); else if (sameString(format, "rcc")) verbose(2, "Ignoring rcc %s, in doEnrichments.", ef->cdwFileName); else if (sameString(format, "bam.bai")) verbose(2, "Ignoring bam.bai %s, in doEnrichments - just and index file.", ef->cdwFileName); else if (sameString(format, "vcf.gz.tbi")) verbose(2, "Ignoring vcf.gz.tbi %s, in doEnrichments - just and index file.", ef->cdwFileName); else if (sameString(format, "unknown")) verbose(2, "Unknown format in doEnrichments(%s), that's ok.", ef->cdwFileName); else errAbort("Unrecognized format %s in doEnrichments(%s)", format, path); } /* Clean up and go home. */ cdwAssemblyFree(&assembly); } cdwValidFileFree(&vf); }
void pairedEndQa(struct sqlConnection *conn, struct cdwFile *ef, struct cdwValidFile *vf) /* Look for other end, do a pairwise alignment, and save results in database. */ { verbose(2, "pairedEndQa on %u %s %s\n", ef->id, ef->cdwFileName, ef->submitFileName); /* Get other end, return if not found. */ struct cdwValidFile *otherVf = cdwOppositePairedEnd(conn, ef, vf); if (otherVf == NULL) return; if (otherVf->fileId > vf->fileId) return; struct cdwValidFile *vf1, *vf2; struct cdwQaPairedEndFastq *pair = cdwQaPairedEndFastqFromVfs(conn, vf, otherVf, &vf1, &vf2); if (pair != NULL) { cdwValidFileFree(&otherVf); return; } /* Get target assembly and figure out path for BWA index. */ struct cdwAssembly *assembly = cdwAssemblyForUcscDb(conn, vf->ucscDb); assert(assembly != NULL); char genoFile[PATH_LEN]; safef(genoFile, sizeof(genoFile), "%s%s/bwaData/%s.fa", cdwValDataDir, assembly->ucscDb, assembly->ucscDb); verbose(1, "aligning subsamples on %u vs. %u paired reads\n", vf1->fileId, vf2->fileId); /* Make alignments of subsamples. */ char *sample1 = NULL, *sample2 = NULL, *sai1 = NULL, *sai2 = NULL; makeTmpSai(conn, vf1, genoFile, &sample1, &sai1); makeTmpSai(conn, vf2, genoFile, &sample2, &sai2); /* Make paired end alignment */ char *tmpSam = cloneString(rTempName(cdwTempDir(), "cdwPairSample", ".sam")); char command[6*PATH_LEN]; safef(command, sizeof(command), "bwa sampe -n 1 -N 1 -f %s %s %s %s %s %s" , tmpSam, genoFile, sai1, sai2, sample1, sample2); mustSystem(command); /* Make ra file with pairing statistics */ char *tmpRa = cloneString(rTempName(cdwTempDir(), "cdwPairSample", ".ra")); safef(command, sizeof(command), "edwSamPairedEndStats -maxInsert=%d %s %s", maxInsert, tmpSam, tmpRa); mustSystem(command); /* Read RA file into variables. */ struct cdwQaPairedEndFastq *pe = cdwQaPairedEndFastqOneFromRa(tmpRa); /* Update database with record. */ struct sqlConnection *freshConn = cdwConnectReadWrite(); char query[256]; sqlSafef(query, sizeof(query), "insert into cdwQaPairedEndFastq " "(fileId1,fileId2,concordance,distanceMean,distanceStd,distanceMin,distanceMax,recordComplete) " " values (%u,%u,%g,%g,%g,%g,%g,1)" , vf1->fileId, vf2->fileId, pe->concordance, pe->distanceMean , pe->distanceStd, pe->distanceMin, pe->distanceMax); sqlUpdate(conn, query); sqlDisconnect(&freshConn); /* Clean up and go home. */ cdwValidFileFree(&otherVf); remove(sample1); remove(sample2); remove(sai1); remove(sai2); remove(tmpSam); remove(tmpRa); #ifdef SOON #endif /* SOON */ freez(&sample1); freez(&sample2); freez(&sai1); freez(&sai2); freez(&tmpSam); freez(&tmpRa); cdwQaPairedEndFastqFree(&pe); cdwValidFileFree(&otherVf); }