void doReplicateQa(struct sqlConnection *conn, struct cdwFile *ef)
/* Try and do replicate level QA - find matching file and do correlation-like
 * things. */
{
/* Get validated file info.  If not validated we don't bother. */
struct cdwValidFile *vf = cdwValidFileFromFileId(conn, ef->id);
if (vf == NULL)
    return;

char *replicate = vf->replicate;
if (!isEmpty(replicate) && !sameString(replicate, "n/a") 
    && !sameString(replicate, "pooled")) // If expanding this, to expand bits in cdwWebBrowse as well
    {
    /* Try to find other replicates of same experiment, format, and output type. */
    struct cdwValidFile *elder, *elderList = cdwFindElderReplicates(conn, vf);
    if (elderList != NULL)
	{
	char *targetDb = cdwSimpleAssemblyName(vf->ucscDb);
	struct cdwAssembly *assembly = cdwAssemblyForUcscDb(conn, targetDb);
	for (elder = elderList; elder != NULL; elder = elder->next)
	    {
	    if (sameString(targetDb, cdwSimpleAssemblyName(elder->ucscDb)))
		doReplicatePair(conn, assembly, 
		    cdwFileFromIdOrDie(conn, elder->fileId), elder, ef, vf);
	    }
	cdwAssemblyFree(&assembly);
	}
    }
cdwValidFileFree(&vf);
}
Exemple #2
0
void cdwMakeRepeatQa(int startFileId, int endFileId)
/* cdwMakeRepeatQa - Figure out what proportion of things align to repeats.. */
{
struct sqlConnection *conn = cdwConnectReadWrite();
struct cdwFile *ef, *efList = cdwFileAllIntactBetween(conn, startFileId, endFileId);
for (ef = efList; ef != NULL; ef = ef->next)
    {
    struct cdwValidFile *vf = cdwValidFileFromFileId(conn, ef->id);
    if (vf != NULL)
	{
	if (sameString(vf->format, "fastq"))
	    fastqRepeatQa(conn, ef, vf);
	}
    }
sqlDisconnect(&conn);
}
Exemple #3
0
void cdwMakePairedEndQa(unsigned startId, unsigned endId)
/* cdwMakePairedEndQa - Do alignments of paired-end fastq files and calculate distrubution of 
 * insert size. */
{
struct sqlConnection *conn = cdwConnectReadWrite();
struct cdwFile *ef, *efList = cdwFileAllIntactBetween(conn, startId, endId);
for (ef = efList; ef != NULL; ef = ef->next)
    {
    struct cdwValidFile *vf = cdwValidFileFromFileId(conn, ef->id);
    if (vf != NULL)
	{
	if (sameString(vf->format, "fastq") && !isEmpty(vf->pairedEnd))
	    pairedEndQa(conn, ef, vf);
	}
    }
sqlDisconnect(&conn);
}
void cdwFakeManifestFromSubmit(char *submitIdString, char *outDir)
/* cdwFakeManifestFromSubmit - Create a fake submission based on a real one that is in the warehouse. */
{
struct sqlConnection *conn = cdwConnect();
char query[512];
sqlSafef(query, sizeof(query), "select * from cdwSubmit where id=%s", submitIdString);
struct cdwSubmit *submit = cdwSubmitLoadByQuery(conn, query);
if (submit == NULL)
    errAbort("Can't find submission %s", submitIdString);

uglyf("%d files in query\n", submit->newFiles);
sqlSafef(query, sizeof(query), "select * from cdwFile where submitId=%s", submitIdString);
struct cdwFile *ef, *efList = cdwFileLoadByQuery(conn, query);

FILE *maniF = NULL, *valiF = NULL;
for (ef = efList; ef != NULL; ef = ef->next)
    {
    struct cdwValidFile *vf = cdwValidFileFromFileId(conn, ef->id);
    if (vf != NULL)
        {
	/* First time through create out directory and open output files. */
	if (maniF == NULL)
	    {
	    char *fakeVersion = "##validateManifest version 1.7";
	    makeDirsOnPath(outDir);
	    setCurrentDir(outDir);
	    maniF = mustOpen("manifest.txt", "w");
	    printSharedHeader(maniF);
	    fprintf(maniF, "\n");
	    fprintf(maniF, "%s\n", fakeVersion);
	    valiF = mustOpen("validated.txt", "w");
	    printSharedHeader(valiF);
	    fprintf(valiF, "\tmd5_sum\tsize\tmodified\tvalid_key\n");
	    fprintf(valiF, "%s\n", fakeVersion);
	    }

	/* Figure out file names */
	char cdwPath[PATH_LEN], rootName[FILENAME_LEN], ext[FILEEXT_LEN];
	safef(cdwPath, sizeof(cdwPath), "%s%s", cdwRootDir, ef->cdwFileName);
	splitPath(ef->cdwFileName, NULL, rootName, ext);
	char localPath[PATH_LEN];
	safef(localPath, sizeof(localPath), "%s%s", rootName, ext);

	/* Create sym-linked file and write to manifest */
	symlink(cdwPath, localPath);
	fprint2(maniF, valiF, "%s", localPath);

	/* Write other columns shared between manifest and validated */
	fprint2(maniF, valiF, "\t%s", vf->format);
	fprint2(maniF, valiF, "\t%s", naForEmpty(vf->outputType));
	fprint2(maniF, valiF, "\t%s", naForEmpty(vf->experiment));
	fprint2(maniF, valiF, "\t%s", naForEmpty(vf->enrichedIn));
	fprint2(maniF, valiF, "\t%s", naForEmpty(vf->ucscDb));
	fprint2(maniF, valiF, "\t%s", naForEmpty(vf->replicate));
	fprint2(maniF, valiF, "\t%s", naForEmpty(vf->part));
	fprint2(maniF, valiF, "\t%s", naForEmpty(vf->pairedEnd));
	fprintf(maniF, "\n");

	/* Print out remaining fields in validated.txt */
	fprintf(valiF, "\t%s\t%lld\t%lld\n", ef->md5, ef->size, ef->updateTime);
	}
    }
carefulClose(&maniF);
carefulClose(&valiF);
}
void doEnrichments(struct sqlConnection *conn, struct cdwFile *ef, char *path, 
    struct hash *assemblyToTarget)
/* Calculate enrichments on for all targets file. The targetList and the
 * grtList are in the same order. */
{
/* Get validFile from database. */
struct cdwValidFile *vf = cdwValidFileFromFileId(conn, ef->id);
if (vf == NULL)
    return;	/* We can only work if have validFile table entry */

if (!isEmpty(vf->enrichedIn) && !sameWord(vf->ucscDb, "unknown") && !isEmpty(vf->ucscDb)
    && !sameWord(vf->format, "unknown"))
    {
    /* Get our assembly */
    char *format = vf->format;
    char *ucscDb = vf->ucscDb;
    char *targetName = cdwSimpleAssemblyName(ucscDb);
    struct cdwAssembly *assembly = cdwAssemblyForUcscDb(conn, targetName);

    struct target *targetList = hashFindVal(assemblyToTarget, assembly->name);
    if (targetList == NULL)
	{
	targetList = targetsForAssembly(conn, assembly);
	if (targetList == NULL)
	    errAbort("No targets for assembly %s", assembly->name);
	hashAdd(assemblyToTarget, assembly->name, targetList);
	}

    /* Loop through targetList zeroing out existing ovelaps. */
    struct target *target;
    boolean allSkip = TRUE;
    for (target = targetList; target != NULL; target = target->next)
	{
	target->overlapBases = target->uniqOverlapBases = 0;
	target->skip = enrichmentExists(conn, ef, target->target);
	if (!target->skip)
	    allSkip = FALSE;
	}

    /* Do a big dispatch based on format. */
    if (!allSkip)
	{
	if (sameString(format, "fastq"))
	    doEnrichmentsFromSampleBed(conn, ef, vf, assembly, targetList);
	else if (sameString(format, "bigWig"))
	    doEnrichmentsFromBigWig(conn, ef, vf, assembly, targetList);
	else if (startsWith("bed_", format))
	    doEnrichmentsFromBed(conn, ef, vf, assembly, targetList);
	else if (cdwIsSupportedBigBedFormat(format) || sameString(format, "bigBed"))
	    doEnrichmentsFromBigBed(conn, ef, vf, assembly, targetList);
	else if (sameString(format, "gtf"))
	    doEnrichmentsFromSampleBed(conn, ef, vf, assembly, targetList);
	else if (sameString(format, "gff"))
	    doEnrichmentsFromSampleBed(conn, ef, vf, assembly, targetList);
	else if (sameString(format, "bam"))
	    doEnrichmentsFromSampleBed(conn, ef, vf, assembly, targetList);
	else if (sameString(format, "vcf"))
	    doEnrichmentsFromSampleBed(conn, ef, vf, assembly, targetList);
	else if (sameString(format, "idat"))
	    verbose(2, "Ignoring idat %s, in doEnrichments.", ef->cdwFileName);
	else if (sameString(format, "customTrack"))
	    verbose(2, "Ignoring customTrack %s, in doEnrichments.", ef->cdwFileName);
	else if (sameString(format, "rcc"))
	    verbose(2, "Ignoring rcc %s, in doEnrichments.", ef->cdwFileName);
	else if (sameString(format, "bam.bai"))
	    verbose(2, "Ignoring bam.bai %s, in doEnrichments - just and index file.", 
		ef->cdwFileName);
	else if (sameString(format, "vcf.gz.tbi"))
	    verbose(2, "Ignoring vcf.gz.tbi %s, in doEnrichments - just and index file.", 
		ef->cdwFileName);
	else if (sameString(format, "unknown"))
	    verbose(2, "Unknown format in doEnrichments(%s), that's ok.", ef->cdwFileName);
	else
	    errAbort("Unrecognized format %s in doEnrichments(%s)", format, path);
	}

    /* Clean up and go home. */
    cdwAssemblyFree(&assembly);
    }
cdwValidFileFree(&vf);
}