void doReplicateQa(struct sqlConnection *conn, struct edwFile *ef) /* Try and do replicate level QA - find matching file and do correlation-like * things. */ { /* Get validated file info. If not validated we don't bother. */ struct edwValidFile *vf = edwValidFileFromFileId(conn, ef->id); if (vf == NULL) return; char *replicate = vf->replicate; if (!isEmpty(replicate) && !sameString(replicate, "n/a") && !sameString(replicate, "pooled")) // If expanding this, to expand bits in edwWebBrowse as well { /* Try to find other replicates of same experiment, format, and output type. */ struct edwValidFile *elder, *elderList = edwFindElderReplicates(conn, vf); if (elderList != NULL) { struct edwAssembly *assembly = edwAssemblyForUcscDb(conn, vf->ucscDb); for (elder = elderList; elder != NULL; elder = elder->next) { doReplicatePair(conn, assembly, edwFileFromIdOrDie(conn, elder->fileId), elder, ef, vf); } edwAssemblyFree(&assembly); } } edwValidFileFree(&vf); }
void doContaminationQa(struct sqlConnection *conn, struct edwFile *ef) /* Try and do contamination level QA - mostly mapping fastq files to other * genomes. */ { /* Get validated file info. If not validated we don't bother. */ struct edwValidFile *vf = edwValidFileFromFileId(conn, ef->id); if (vf == NULL) return; /* We only work on fastq. */ if (!sameString(vf->format, "fastq")) return; screenFastqForContaminants(conn, ef, vf); }
void edwMakeRepeatQa(int startFileId, int endFileId) /* edwMakeRepeatQa - Figure out what proportion of things align to repeats.. */ { struct sqlConnection *conn = edwConnectReadWrite(); struct edwFile *ef, *efList = edwFileAllIntactBetween(conn, startFileId, endFileId); for (ef = efList; ef != NULL; ef = ef->next) { struct edwValidFile *vf = edwValidFileFromFileId(conn, ef->id); if (vf != NULL) { if (sameString(vf->format, "fastq")) fastqRepeatQa(conn, ef, vf); } } sqlDisconnect(&conn); }
long long totalReadsForExp(struct sqlConnection *conn, struct fullExperiment *exp, struct edwExperiment *ee, struct eapGraph *eg) /* Return total number of reads in both good replicates of this experiment */ { boolean uglyOne = TRUE; if (uglyOne) uglyf("Got you %s %s\n", exp->name, exp->exp->biosample); // We start with broad peaks because they are relatively rare, only generated by hotspot long long total = 0; long long pooledPeakId = pooledBroadPeaksForExp(exp); if (uglyOne) uglyf("Broad is %lld\n", pooledPeakId); struct slRef *ref, *refList = NULL; eapGraphAncestorsOfFormat(eg, pooledPeakId, "fastq", -1, &refList); for (ref = refList; ref != NULL; ref = ref->next) { struct eapInput *in = ref->val; struct edwValidFile *vf = edwValidFileFromFileId(conn, in->fileId); if (uglyOne) uglyf("Fastq is %u, reads %lld\n", vf->fileId, (long long)vf->itemCount); total += vf->itemCount; } return total; }
void edwChangeFormat(char *format, int idCount, char *idStrings[]) /* edwChangeFormat - Change format and force a revalidation for a file.. */ { struct sqlConnection *conn = edwConnectReadWrite(); /* Convert ascii id's to valid file ids so we catch errors early. */ long long ids[idCount]; struct edwValidFile *vfs[idCount]; int i; for (i=0; i<idCount; ++i) { long long id = ids[i] = sqlLongLong(idStrings[i]); struct edwValidFile *vf = vfs[i] = edwValidFileFromFileId(conn, id); if (vf == NULL) errAbort("%lld is not a fileId in the edwValidFile table", id); } /* Loop through each file and change format. */ for (i=0; i<idCount; ++i) { changeFormat(conn, vfs[i], format); } sqlDisconnect(&conn); }
void doEnrichments(struct sqlConnection *conn, struct edwFile *ef, char *path, struct hash *assemblyToTarget) /* Calculate enrichments on for all targets file. The targetList and the * grtList are in the same order. */ { /* Get validFile from database. */ struct edwValidFile *vf = edwValidFileFromFileId(conn, ef->id); if (vf == NULL) return; /* We can only work if have validFile table entry */ if (!isEmpty(vf->enrichedIn)) { /* Get our assembly */ char *format = vf->format; char *ucscDb = vf->ucscDb; struct edwAssembly *assembly = edwAssemblyForUcscDb(conn, ucscDb); struct target *targetList = hashFindVal(assemblyToTarget, assembly->name); if (targetList == NULL) { targetList = targetsForAssembly(conn, assembly); if (targetList == NULL) errAbort("No targets for assembly %s", assembly->name); hashAdd(assemblyToTarget, assembly->name, targetList); } /* Loop through targetList zeroing out existing ovelaps. */ struct target *target; boolean allSkip = TRUE; for (target = targetList; target != NULL; target = target->next) { target->overlapBases = target->uniqOverlapBases = 0; target->skip = enrichmentExists(conn, ef, target->target); if (!target->skip) allSkip = FALSE; } /* Do a big dispatch based on format. */ if (!allSkip) { if (sameString(format, "fastq")) doEnrichmentsFromSampleBed(conn, ef, vf, assembly, targetList); else if (sameString(format, "bigWig")) doEnrichmentsFromBigWig(conn, ef, vf, assembly, targetList); else if (edwIsSupportedBigBedFormat(format)) doEnrichmentsFromBigBed(conn, ef, vf, assembly, targetList); else if (sameString(format, "gtf")) doEnrichmentsFromSampleBed(conn, ef, vf, assembly, targetList); else if (sameString(format, "gff")) doEnrichmentsFromSampleBed(conn, ef, vf, assembly, targetList); else if (sameString(format, "bam")) doEnrichmentsFromSampleBed(conn, ef, vf, assembly, targetList); else if (sameString(format, "unknown")) verbose(2, "Unknown format in doEnrichments(%s), that's chill.", ef->edwFileName); else errAbort("Unrecognized format %s in doEnrichments(%s)", format, path); } /* Clean up and go home. */ edwAssemblyFree(&assembly); } edwValidFileFree(&vf); }