예제 #1
0
void fastqRepeatQa(struct sqlConnection *conn, struct edwFile *ef, struct edwValidFile *vf)
/* Do repeat QA if possible on fastq file. */
{
/* First see if total repeat content is already in our table, in which case we are done. */
long long fileId = ef->id;
char query[512];
sqlSafef(query, sizeof(query), 
    "select count(*) from edwQaRepeat where fileId=%lld and repeatClass='total'" , fileId);
if (sqlQuickNum(conn, query) != 0)
    return;	/* We've done this already */

/* Get sample file name from fastq table. */
struct edwFastqFile *fqf = edwFastqFileForFileId(conn, fileId);
if (fqf == NULL)
    errAbort("No edqFastqRecord for %s",  vf->licensePlate);
char *fastqPath = fqf->sampleFileName;

char bwaIndex[PATH_LEN];
safef(bwaIndex, sizeof(bwaIndex), "%s%s/repeatMasker/repeatMasker.fa", 
    edwValDataDir, vf->ucscDb);

char cmd[3*PATH_LEN];
char *saiName = cloneString(rTempName(edwTempDir(), "edwQaRepeat", ".sai"));
safef(cmd, sizeof(cmd), "bwa aln %s %s > %s", bwaIndex, fastqPath, saiName);
mustSystem(cmd);

char *samName = cloneString(rTempName(edwTempDir(), "edwQaRepeat", ".sam"));
safef(cmd, sizeof(cmd), "bwa samse %s %s %s > %s", bwaIndex, saiName, fastqPath, samName);
mustSystem(cmd);
remove(saiName);

char *raName = cloneString(rTempName(edwTempDir(), "edwQaRepeat", ".ra"));
safef(cmd, sizeof(cmd), "edwSamRepeatAnalysis %s %s", samName, raName);
mustSystem(cmd);
verbose(2, "mustSystem(%s)\n", cmd);
remove(samName);

raIntoEdwRepeatQa(raName, conn, fileId);
remove(raName);
#ifdef SOON
#endif /* SOON */

freez(&saiName);
freez(&samName);
freez(&raName);
edwFastqFileFree(&fqf);
}
void screenFastqForContaminants(struct sqlConnection *conn, 
    struct edwFile *ef, struct edwValidFile *vf)
/* The ef/vf point to same file, which is fastq format.  Set alignments up for a sample against all
 * contamination targets. */
{
/* Get target list and see if we have any work to do. */
struct edwQaContamTarget *target, *targetList;
targetList = getContamTargets(conn, ef, vf);
boolean needScreen = FALSE;
for (target = targetList; target != NULL; target = target->next)
    {
    if (edwQaContamMade(conn, ef->id, target->id) <= 0)
        {
	needScreen = TRUE;
	break;
	}
    }

if (needScreen)
    {
    verbose(1, "screenFastqForContaminants(%u(%s))\n", ef->id, ef->submitFileName);

    /* Get fastq record. */
    struct edwFastqFile *fqf = edwFastqFileFromFileId(conn, ef->id);
    if (fqf == NULL)
        errAbort("No edwFastqFile record for file id %lld", (long long)ef->id);

    /* Create downsampled fastq in temp directory - downsampled more than default even. */
    char sampleFastqName[PATH_LEN];
    edwMakeTempFastqSample(fqf->sampleFileName, FASTQ_SAMPLE_SIZE, sampleFastqName);
    verbose(1, "downsampled %s into %s\n", vf->licensePlate, sampleFastqName);

    for (target = targetList; target != NULL; target = target->next)
	{
	/* Get assembly associated with target */
	int assemblyId = target->assemblyId;
	char query[512];
	sqlSafef(query, sizeof(query), "select * from edwAssembly where id=%d", assemblyId);
	struct edwAssembly *newAsm = edwAssemblyLoadByQuery(conn, query);
	if (newAsm == NULL)
	    errAbort("warehouse edwQaContamTarget %d not found", assemblyId);

	/* If we don't already have a match, do work to create contam record. */
	int matchCount = edwQaContamMade(conn, ef->id, target->id);
	if (matchCount <= 0)
	    {
	    /* We run the bed-file maker, just for side effect calcs. */
	    double mapRatio = 0, depth = 0, sampleCoverage = 0, uniqueMapRatio;
	    edwAlignFastqMakeBed(ef, newAsm, sampleFastqName, vf, NULL,
		&mapRatio, &depth, &sampleCoverage, &uniqueMapRatio);

	    verbose(1, "%s mapRatio %g, depth %g, sampleCoverage %g\n", 
		newAsm->name, mapRatio, depth, sampleCoverage);
	    struct edwQaContam contam = 
		    {.fileId=ef->id, .qaContamTargetId=target->id, .mapRatio = mapRatio};
	    edwQaContamSaveToDb(conn, &contam, "edwQaContam", 256);
	    }
	edwAssemblyFree(&newAsm);
	}
    edwQaContamTargetFreeList(&targetList);
    if (keepTemp)
        verbose(1, "%s\n", sampleFastqName);
    else
	remove(sampleFastqName);
    edwFastqFileFree(&fqf);
    }
}