Esempio n. 1
0
void doReplicateQa(struct sqlConnection *conn, struct cdwFile *ef)
/* Try and do replicate level QA - find matching file and do correlation-like
 * things. */
{
/* Get validated file info.  If not validated we don't bother. */
struct cdwValidFile *vf = cdwValidFileFromFileId(conn, ef->id);
if (vf == NULL)
    return;

char *replicate = vf->replicate;
if (!isEmpty(replicate) && !sameString(replicate, "n/a") 
    && !sameString(replicate, "pooled")) // If expanding this, to expand bits in cdwWebBrowse as well
    {
    /* Try to find other replicates of same experiment, format, and output type. */
    struct cdwValidFile *elder, *elderList = cdwFindElderReplicates(conn, vf);
    if (elderList != NULL)
	{
	char *targetDb = cdwSimpleAssemblyName(vf->ucscDb);
	struct cdwAssembly *assembly = cdwAssemblyForUcscDb(conn, targetDb);
	for (elder = elderList; elder != NULL; elder = elder->next)
	    {
	    if (sameString(targetDb, cdwSimpleAssemblyName(elder->ucscDb)))
		doReplicatePair(conn, assembly, 
		    cdwFileFromIdOrDie(conn, elder->fileId), elder, ef, vf);
	    }
	cdwAssemblyFree(&assembly);
	}
    }
cdwValidFileFree(&vf);
}
Esempio n. 2
0
void doEnrichments(struct sqlConnection *conn, struct cdwFile *ef, char *path, 
    struct hash *assemblyToTarget)
/* Calculate enrichments on for all targets file. The targetList and the
 * grtList are in the same order. */
{
/* Get validFile from database. */
struct cdwValidFile *vf = cdwValidFileFromFileId(conn, ef->id);
if (vf == NULL)
    return;	/* We can only work if have validFile table entry */

if (!isEmpty(vf->enrichedIn) && !sameWord(vf->ucscDb, "unknown") && !isEmpty(vf->ucscDb)
    && !sameWord(vf->format, "unknown"))
    {
    /* Get our assembly */
    char *format = vf->format;
    char *ucscDb = vf->ucscDb;
    char *targetName = cdwSimpleAssemblyName(ucscDb);
    struct cdwAssembly *assembly = cdwAssemblyForUcscDb(conn, targetName);

    struct target *targetList = hashFindVal(assemblyToTarget, assembly->name);
    if (targetList == NULL)
	{
	targetList = targetsForAssembly(conn, assembly);
	if (targetList == NULL)
	    errAbort("No targets for assembly %s", assembly->name);
	hashAdd(assemblyToTarget, assembly->name, targetList);
	}

    /* Loop through targetList zeroing out existing ovelaps. */
    struct target *target;
    boolean allSkip = TRUE;
    for (target = targetList; target != NULL; target = target->next)
	{
	target->overlapBases = target->uniqOverlapBases = 0;
	target->skip = enrichmentExists(conn, ef, target->target);
	if (!target->skip)
	    allSkip = FALSE;
	}

    /* Do a big dispatch based on format. */
    if (!allSkip)
	{
	if (sameString(format, "fastq"))
	    doEnrichmentsFromSampleBed(conn, ef, vf, assembly, targetList);
	else if (sameString(format, "bigWig"))
	    doEnrichmentsFromBigWig(conn, ef, vf, assembly, targetList);
	else if (startsWith("bed_", format))
	    doEnrichmentsFromBed(conn, ef, vf, assembly, targetList);
	else if (cdwIsSupportedBigBedFormat(format) || sameString(format, "bigBed"))
	    doEnrichmentsFromBigBed(conn, ef, vf, assembly, targetList);
	else if (sameString(format, "gtf"))
	    doEnrichmentsFromSampleBed(conn, ef, vf, assembly, targetList);
	else if (sameString(format, "gff"))
	    doEnrichmentsFromSampleBed(conn, ef, vf, assembly, targetList);
	else if (sameString(format, "bam"))
	    doEnrichmentsFromSampleBed(conn, ef, vf, assembly, targetList);
	else if (sameString(format, "vcf"))
	    doEnrichmentsFromSampleBed(conn, ef, vf, assembly, targetList);
	else if (sameString(format, "idat"))
	    verbose(2, "Ignoring idat %s, in doEnrichments.", ef->cdwFileName);
	else if (sameString(format, "customTrack"))
	    verbose(2, "Ignoring customTrack %s, in doEnrichments.", ef->cdwFileName);
	else if (sameString(format, "rcc"))
	    verbose(2, "Ignoring rcc %s, in doEnrichments.", ef->cdwFileName);
	else if (sameString(format, "bam.bai"))
	    verbose(2, "Ignoring bam.bai %s, in doEnrichments - just and index file.", 
		ef->cdwFileName);
	else if (sameString(format, "vcf.gz.tbi"))
	    verbose(2, "Ignoring vcf.gz.tbi %s, in doEnrichments - just and index file.", 
		ef->cdwFileName);
	else if (sameString(format, "unknown"))
	    verbose(2, "Unknown format in doEnrichments(%s), that's ok.", ef->cdwFileName);
	else
	    errAbort("Unrecognized format %s in doEnrichments(%s)", format, path);
	}

    /* Clean up and go home. */
    cdwAssemblyFree(&assembly);
    }
cdwValidFileFree(&vf);
}
Esempio n. 3
0
void pairedEndQa(struct sqlConnection *conn, struct cdwFile *ef, struct cdwValidFile *vf)
/* Look for other end,  do a pairwise alignment, and save results in database. */
{
verbose(2, "pairedEndQa on %u %s %s\n", ef->id, ef->cdwFileName, ef->submitFileName);
/* Get other end, return if not found. */
struct cdwValidFile *otherVf = cdwOppositePairedEnd(conn, ef, vf);
if (otherVf == NULL)
    return;

if (otherVf->fileId > vf->fileId)
    return;

struct cdwValidFile *vf1, *vf2;
struct cdwQaPairedEndFastq *pair = cdwQaPairedEndFastqFromVfs(conn, vf, otherVf, &vf1, &vf2);
if (pair != NULL)
    {
    cdwValidFileFree(&otherVf);
    return;
    }

/* Get target assembly and figure out path for BWA index. */
struct cdwAssembly *assembly = cdwAssemblyForUcscDb(conn, vf->ucscDb);
assert(assembly != NULL);
char genoFile[PATH_LEN];
safef(genoFile, sizeof(genoFile), "%s%s/bwaData/%s.fa", 
    cdwValDataDir, assembly->ucscDb, assembly->ucscDb);

verbose(1, "aligning subsamples on %u vs. %u paired reads\n", vf1->fileId, vf2->fileId);

/* Make alignments of subsamples. */
char *sample1 = NULL, *sample2 = NULL, *sai1 = NULL, *sai2 = NULL;
makeTmpSai(conn, vf1, genoFile, &sample1, &sai1);
makeTmpSai(conn, vf2, genoFile, &sample2, &sai2);

/* Make paired end alignment */
char *tmpSam = cloneString(rTempName(cdwTempDir(), "cdwPairSample", ".sam"));
char command[6*PATH_LEN];
safef(command, sizeof(command),
   "bwa sampe -n 1 -N 1 -f %s %s %s %s %s %s"
   , tmpSam, genoFile, sai1, sai2, sample1, sample2);
mustSystem(command);

/* Make ra file with pairing statistics */
char *tmpRa = cloneString(rTempName(cdwTempDir(), "cdwPairSample", ".ra"));
safef(command, sizeof(command), 
    "edwSamPairedEndStats -maxInsert=%d %s %s", maxInsert, tmpSam, tmpRa);
mustSystem(command);

/* Read RA file into variables. */
struct cdwQaPairedEndFastq *pe = cdwQaPairedEndFastqOneFromRa(tmpRa);

/* Update database with record. */
struct sqlConnection *freshConn = cdwConnectReadWrite();
char query[256];
sqlSafef(query, sizeof(query),
    "insert into cdwQaPairedEndFastq "
    "(fileId1,fileId2,concordance,distanceMean,distanceStd,distanceMin,distanceMax,recordComplete) "
    " values (%u,%u,%g,%g,%g,%g,%g,1)"
    , vf1->fileId, vf2->fileId, pe->concordance, pe->distanceMean
    , pe->distanceStd, pe->distanceMin, pe->distanceMax);
sqlUpdate(conn, query);
sqlDisconnect(&freshConn);

/* Clean up and go home. */
cdwValidFileFree(&otherVf);
remove(sample1);
remove(sample2);
remove(sai1);
remove(sai2);
remove(tmpSam);
remove(tmpRa);
#ifdef SOON
#endif /* SOON */
freez(&sample1);
freez(&sample2);
freez(&sai1);
freez(&sai2);
freez(&tmpSam);
freez(&tmpRa);
cdwQaPairedEndFastqFree(&pe);
cdwValidFileFree(&otherVf);
}