예제 #1
0
파일: edwLib.c 프로젝트: elmargb/kentUtils
struct edwAssembly *edwAssemblyForUcscDb(struct sqlConnection *conn, char *ucscDb)
/* Get assembly for given UCSC ID or die trying */
{
char query[256];
sqlSafef(query, sizeof(query), "select * from edwAssembly where ucscDb='%s'", ucscDb);
struct edwAssembly *assembly = edwAssemblyLoadByQuery(conn, query);
if (assembly == NULL)
    errAbort("Can't find assembly for %s", ucscDb);
return assembly;
}
예제 #2
0
void edwFixTargetSeq(char *when)
/* edwFixTargetSeq - Fill in new fields about target seq to edwBamFile and edwAssembly.. */
{
struct sqlConnection *conn = edwConnectReadWrite();
struct edwAssembly *as, *asList = edwAssemblyLoadByQuery(conn, "select * from edwAssembly");
char query[512];
for (as = asList; as != NULL; as = as->next)
    {
    char *twoBitFileName = edwPathForFileId(conn, as->twoBitId);
    struct twoBitFile *tbf = twoBitOpen(twoBitFileName);
    safef(query, sizeof(query), "update edwAssembly set seqCount=%u where id=%u",
	tbf->seqCount, as->id);
    sqlUpdate(conn, query);
    freez(&twoBitFileName);
    twoBitClose(&tbf);
    }
edwAssemblyFreeList(&asList);

struct edwBamFile *bam, *bamList = edwBamFileLoadByQuery(conn, "select * from edwBamFile");
for (bam = bamList; bam != NULL; bam = bam->next)
    {
    char *fileName = edwPathForFileId(conn, bam->fileId);
    samfile_t *sf = samopen(fileName, "rb", NULL);
    if (sf == NULL)
	errnoAbort("Couldn't open %s.\n", fileName);
    bam_header_t *head = sf->header;
    if (head == NULL)
	errAbort("Aborting ... Bad BAM header in file: %s", fileName);

    /* Sum up some target sizes. */
    long long targetBaseCount = 0;   /* Total size of all bases in target seq */
    int i;
    for (i=0; i<head->n_targets; ++i)
	targetBaseCount  += head->target_len[i];

    safef(query, sizeof(query), 
	"update edwBamFile set targetBaseCount=%lld,targetSeqCount=%u where id=%u",
	targetBaseCount, (unsigned)head->n_targets, bam->id);
    sqlUpdate(conn, query);

    samclose(sf);
    freez(&fileName);
    }

}
void screenFastqForContaminants(struct sqlConnection *conn, 
    struct edwFile *ef, struct edwValidFile *vf)
/* The ef/vf point to same file, which is fastq format.  Set alignments up for a sample against all
 * contamination targets. */
{
/* Get target list and see if we have any work to do. */
struct edwQaContamTarget *target, *targetList;
targetList = getContamTargets(conn, ef, vf);
boolean needScreen = FALSE;
for (target = targetList; target != NULL; target = target->next)
    {
    if (edwQaContamMade(conn, ef->id, target->id) <= 0)
        {
	needScreen = TRUE;
	break;
	}
    }

if (needScreen)
    {
    verbose(1, "screenFastqForContaminants(%u(%s))\n", ef->id, ef->submitFileName);

    /* Get fastq record. */
    struct edwFastqFile *fqf = edwFastqFileFromFileId(conn, ef->id);
    if (fqf == NULL)
        errAbort("No edwFastqFile record for file id %lld", (long long)ef->id);

    /* Create downsampled fastq in temp directory - downsampled more than default even. */
    char sampleFastqName[PATH_LEN];
    edwMakeTempFastqSample(fqf->sampleFileName, FASTQ_SAMPLE_SIZE, sampleFastqName);
    verbose(1, "downsampled %s into %s\n", vf->licensePlate, sampleFastqName);

    for (target = targetList; target != NULL; target = target->next)
	{
	/* Get assembly associated with target */
	int assemblyId = target->assemblyId;
	char query[512];
	sqlSafef(query, sizeof(query), "select * from edwAssembly where id=%d", assemblyId);
	struct edwAssembly *newAsm = edwAssemblyLoadByQuery(conn, query);
	if (newAsm == NULL)
	    errAbort("warehouse edwQaContamTarget %d not found", assemblyId);

	/* If we don't already have a match, do work to create contam record. */
	int matchCount = edwQaContamMade(conn, ef->id, target->id);
	if (matchCount <= 0)
	    {
	    /* We run the bed-file maker, just for side effect calcs. */
	    double mapRatio = 0, depth = 0, sampleCoverage = 0, uniqueMapRatio;
	    edwAlignFastqMakeBed(ef, newAsm, sampleFastqName, vf, NULL,
		&mapRatio, &depth, &sampleCoverage, &uniqueMapRatio);

	    verbose(1, "%s mapRatio %g, depth %g, sampleCoverage %g\n", 
		newAsm->name, mapRatio, depth, sampleCoverage);
	    struct edwQaContam contam = 
		    {.fileId=ef->id, .qaContamTargetId=target->id, .mapRatio = mapRatio};
	    edwQaContamSaveToDb(conn, &contam, "edwQaContam", 256);
	    }
	edwAssemblyFree(&newAsm);
	}
    edwQaContamTargetFreeList(&targetList);
    if (keepTemp)
        verbose(1, "%s\n", sampleFastqName);
    else
	remove(sampleFastqName);
    edwFastqFileFree(&fqf);
    }
}