Beispiel #1
0
void doReplicateQa(struct sqlConnection *conn, struct edwFile *ef)
/* Try and do replicate level QA - find matching file and do correlation-like
 * things. */
{
/* Get validated file info.  If not validated we don't bother. */
struct edwValidFile *vf = edwValidFileFromFileId(conn, ef->id);
if (vf == NULL)
    return;

char *replicate = vf->replicate;
if (!isEmpty(replicate) && !sameString(replicate, "n/a") 
    && !sameString(replicate, "pooled")) // If expanding this, to expand bits in edwWebBrowse as well
    {
    /* Try to find other replicates of same experiment, format, and output type. */
    struct edwValidFile *elder, *elderList = edwFindElderReplicates(conn, vf);
    if (elderList != NULL)
	{
	struct edwAssembly *assembly = edwAssemblyForUcscDb(conn, vf->ucscDb);
	for (elder = elderList; elder != NULL; elder = elder->next)
	    {
	    doReplicatePair(conn, assembly, edwFileFromIdOrDie(conn, elder->fileId), elder, ef, vf);
	    }
	edwAssemblyFree(&assembly);
	}
    }
edwValidFileFree(&vf);
}
void doContaminationQa(struct sqlConnection *conn, struct edwFile *ef)
/* Try and do contamination level QA - mostly mapping fastq files to other
 * genomes. */
{
/* Get validated file info.  If not validated we don't bother. */
struct edwValidFile *vf = edwValidFileFromFileId(conn, ef->id);
if (vf == NULL)
    return;

/* We only work on fastq. */
if (!sameString(vf->format, "fastq"))
    return;

screenFastqForContaminants(conn, ef, vf);
}
Beispiel #3
0
void edwMakeRepeatQa(int startFileId, int endFileId)
/* edwMakeRepeatQa - Figure out what proportion of things align to repeats.. */
{
struct sqlConnection *conn = edwConnectReadWrite();
struct edwFile *ef, *efList = edwFileAllIntactBetween(conn, startFileId, endFileId);
for (ef = efList; ef != NULL; ef = ef->next)
    {
    struct edwValidFile *vf = edwValidFileFromFileId(conn, ef->id);
    if (vf != NULL)
	{
	if (sameString(vf->format, "fastq"))
	    fastqRepeatQa(conn, ef, vf);
	}
    }
sqlDisconnect(&conn);
}
Beispiel #4
0
long long totalReadsForExp(struct sqlConnection *conn, struct fullExperiment *exp, 
    struct edwExperiment *ee, struct eapGraph *eg)
/* Return total number of reads in both good replicates of this experiment */
{
boolean uglyOne = TRUE;
if (uglyOne) uglyf("Got you %s %s\n", exp->name, exp->exp->biosample);
// We start with broad peaks because they are relatively rare, only generated by hotspot
long long total = 0;
long long pooledPeakId = pooledBroadPeaksForExp(exp);
if (uglyOne) uglyf("Broad is %lld\n", pooledPeakId);
struct slRef *ref, *refList = NULL;
eapGraphAncestorsOfFormat(eg, pooledPeakId, "fastq", -1, &refList);
for (ref = refList; ref != NULL; ref = ref->next)
    {
    struct eapInput *in = ref->val;
    struct edwValidFile *vf = edwValidFileFromFileId(conn, in->fileId);
    if (uglyOne) uglyf("Fastq is %u, reads %lld\n", vf->fileId, (long long)vf->itemCount);
    total += vf->itemCount;
    }
return total;
}
Beispiel #5
0
void edwChangeFormat(char *format, int idCount, char *idStrings[])
/* edwChangeFormat - Change format and force a revalidation for a file.. */
{
struct sqlConnection *conn = edwConnectReadWrite();

/* Convert ascii id's to valid file ids so we catch errors early. */
long long ids[idCount];
struct edwValidFile *vfs[idCount];
int i;
for (i=0; i<idCount; ++i)
    {
    long long id = ids[i] = sqlLongLong(idStrings[i]);
    struct edwValidFile *vf = vfs[i] = edwValidFileFromFileId(conn, id);
    if (vf == NULL)
        errAbort("%lld is not a fileId in the edwValidFile table", id);
    }

/* Loop through each file and change format. */
for (i=0; i<idCount; ++i)
    {
    changeFormat(conn, vfs[i], format);
    }
sqlDisconnect(&conn);
}
Beispiel #6
0
void doEnrichments(struct sqlConnection *conn, struct edwFile *ef, char *path, 
    struct hash *assemblyToTarget)
/* Calculate enrichments on for all targets file. The targetList and the
 * grtList are in the same order. */
{
/* Get validFile from database. */
struct edwValidFile *vf = edwValidFileFromFileId(conn, ef->id);
if (vf == NULL)
    return;	/* We can only work if have validFile table entry */

if (!isEmpty(vf->enrichedIn))
    {
    /* Get our assembly */
    char *format = vf->format;
    char *ucscDb = vf->ucscDb;
    struct edwAssembly *assembly = edwAssemblyForUcscDb(conn, ucscDb);

    struct target *targetList = hashFindVal(assemblyToTarget, assembly->name);
    if (targetList == NULL)
	{
	targetList = targetsForAssembly(conn, assembly);
	if (targetList == NULL)
	    errAbort("No targets for assembly %s", assembly->name);
	hashAdd(assemblyToTarget, assembly->name, targetList);
	}

    /* Loop through targetList zeroing out existing ovelaps. */
    struct target *target;
    boolean allSkip = TRUE;
    for (target = targetList; target != NULL; target = target->next)
	{
	target->overlapBases = target->uniqOverlapBases = 0;
	target->skip = enrichmentExists(conn, ef, target->target);
	if (!target->skip)
	    allSkip = FALSE;
	}

    /* Do a big dispatch based on format. */
    if (!allSkip)
	{
	if (sameString(format, "fastq"))
	    doEnrichmentsFromSampleBed(conn, ef, vf, assembly, targetList);
	else if (sameString(format, "bigWig"))
	    doEnrichmentsFromBigWig(conn, ef, vf, assembly, targetList);
	else if (edwIsSupportedBigBedFormat(format))
	    doEnrichmentsFromBigBed(conn, ef, vf, assembly, targetList);
	else if (sameString(format, "gtf"))
	    doEnrichmentsFromSampleBed(conn, ef, vf, assembly, targetList);
	else if (sameString(format, "gff"))
	    doEnrichmentsFromSampleBed(conn, ef, vf, assembly, targetList);
	else if (sameString(format, "bam"))
	    doEnrichmentsFromSampleBed(conn, ef, vf, assembly, targetList);
	else if (sameString(format, "unknown"))
	    verbose(2, "Unknown format in doEnrichments(%s), that's chill.", ef->edwFileName);
	else
	    errAbort("Unrecognized format %s in doEnrichments(%s)", format, path);
	}

    /* Clean up and go home. */
    edwAssemblyFree(&assembly);
    }
edwValidFileFree(&vf);
}