Exemplo n.º 1
0
// 1 if a better than b, 0 if equivalent, -1 if a worse than b
int compareProcessEntries(processEntry_s* a, processEntry_s* b, bool* requirements)
{
	if(!a || !b)return 0;

	int cnt_a = countBools(a->capabilities, requirements, NUM_CAPABILITIES);
	int cnt_b = countBools(b->capabilities, requirements, NUM_CAPABILITIES);

	if(cnt_a > cnt_b)return 1;
	else if(cnt_a < cnt_b)return -1;
	else if(a->processId == -1)return 1;
	else if(b->processId == -1)return -1;

	return 0;
}
Exemplo n.º 2
0
void ggcChrom(struct chromGenes *chrom, char *axtFile, 
	struct ggcInfo *g, struct hash *restrictHash, 
	FILE *fParts)
/* Tabulate matches on chromosome. */
{
struct lineFile *lf = lineFileOpen(axtFile, TRUE);
bool *hits, *covers;
int hitCount = 0, coverCount = 0;
struct axt *axt;
struct genePred *gp;
int closeSize = g->closeSize;
int closeHalf = closeSize/2;

/* Build up array of booleans - one per base - which are
 * 1's where mouse/human align and bases match, zero 
 * elsewhere. */
AllocArray(hits, chrom->size);
AllocArray(covers, chrom->size);
printf("%s (%d bases)\n", chrom->name, chrom->size);
while ((axt = axtRead(lf)) != NULL)
    {
    int tPos = axt->tStart;
    int symCount = axt->symCount, i;
    char t, q, *tSym = axt->tSym, *qSym = axt->qSym;

    if (axt->tEnd > chrom->size)
        errAbort("tEnd %d, chrom size %d in %s", 
		axt->tEnd, chrom->size, axtFile);
    if (axt->tStrand == '-')
        errAbort("Can't handle minus strand on target in %s", axtFile);
    for (i=0; i<symCount; ++i)
        {
	t = tSym[i];
	if (t != '-')
	    {
	    q = qSym[i];
	    if (toupper(t) == toupper(q))
		{
	        hits[tPos] = TRUE;
		++hitCount;
		}
	    if (q == '-')
	       covers[tPos] = 1;
	    else
	       covers[tPos] = 2;
	    ++tPos;
	    }
	}
    axtFree(&axt);
    }

for (gp = chrom->geneList; gp != NULL; gp = gp->next)
    {
    int exonIx;
    int utr3Size = 0, utr5Size = 0, cdsAllSize = 0;
    int utr3Pos = 0, utr5Pos = 0, cdsAllPos = 0;
    bool *utr3Hits = NULL, *utr3Covers = NULL;
    bool *utr5Hits = NULL, *utr5Covers = NULL;
    bool *cdsAllHits = NULL, *cdsAllCovers = NULL;
    bool isRev = (gp->strand[0] == '-');


    /* Filter out genes not in restrict hash if any. */
    ++totalGenes;
    if (restrictHash != NULL)
        if (!hashLookup(restrictHash, gp->name))
	    continue;
    ++reviewedGenes;

    /* Filter out genes without meaningful UTRs */
    if (gp->cdsStart - gp->txStart < g->closeSize/2 || 
    	gp->txEnd - gp->cdsEnd < g->closeSize/2)
        continue;
    ++genesUsed;

    /* Total up UTR and CDS sizes. */
    for (exonIx=0; exonIx<gp->exonCount; ++exonIx)
	 {
	 int eStart = gp->exonStarts[exonIx];
	 int eEnd = gp->exonEnds[exonIx];
	 int eSize = eEnd - eStart;
	 int oneUtr, oneCds;
	 oneCds = rangeIntersection(gp->cdsStart, gp->cdsEnd, eStart, eEnd);
	 if (oneCds > 0)
	     {
	     cdsAllSize += oneCds;
	     }
	 if (eStart < gp->cdsStart)
	     {
	     int utrStart = eStart;
	     int utrEnd = min(gp->cdsStart, eEnd);
	     int utrSize = utrEnd - utrStart;
	     if (isRev)
		 utr3Size += utrSize;
	     else
		 utr5Size += utrSize;
	     }
	 if (eEnd > gp->cdsEnd)
	     {
	     int utrStart = max(gp->cdsEnd, eStart);
	     int utrEnd = eEnd;
	     int utrSize = utrEnd - utrStart;
	     if (isRev)
		 utr5Size += utrSize;
	     else
		 utr3Size += utrSize;
	     }
	 }

    /* Condense hits from UTRs and CDSs */
    if (utr5Size > 0)
	{
	AllocArray(utr5Hits, utr5Size);
	AllocArray(utr5Covers, utr5Size);
	}
    if (utr3Size > 0)
	{
	AllocArray(utr3Hits, utr3Size);
	AllocArray(utr3Covers, utr3Size);
	}
    if (cdsAllSize > 0)
	{
	AllocArray(cdsAllHits, cdsAllSize);
	AllocArray(cdsAllCovers, cdsAllSize);
	}
    for (exonIx=0; exonIx<gp->exonCount; ++exonIx)
	{
	int eStart = gp->exonStarts[exonIx];
	int eEnd = gp->exonEnds[exonIx];
	int eSize = eEnd - eStart;
	int oneUtr, oneCds;
	oneCds = rangeIntersection(gp->cdsStart, gp->cdsEnd, eStart, eEnd);
	if (oneCds > 0)
	    {
	    int cdsStart = eStart;
	    int cdsEnd = gp->cdsEnd;

	    if (cdsStart < gp->cdsStart)
		cdsStart = gp->cdsStart;
	    memcpy(cdsAllHits + cdsAllPos, hits + cdsStart, oneCds * sizeof(*hits));
	    memcpy(cdsAllCovers + cdsAllPos, covers + cdsStart, oneCds * sizeof(*covers));
	    cdsAllPos += oneCds;
	    }
	if (eStart < gp->cdsStart)
	    {
	    int utrStart = eStart;
	    int utrEnd = min(gp->cdsStart, eEnd);
	    int utrSize = utrEnd - utrStart;
	    if (isRev)
		{
		memcpy(utr3Hits + utr3Pos, hits + utrStart, utrSize * sizeof(*hits));
		memcpy(utr3Covers + utr3Pos, covers + utrStart, utrSize * sizeof(*covers));
		utr3Pos += utrSize;
		}
	    else
		{
		memcpy(utr5Hits + utr5Pos, hits + utrStart, utrSize * sizeof(*hits));
		memcpy(utr5Covers + utr5Pos, covers + utrStart, utrSize * sizeof(*covers));
		utr5Pos += utrSize;
		}
	    }
	if (eEnd > gp->cdsEnd)
	    {
	    int utrStart = max(gp->cdsEnd, eStart);
	    int utrEnd = eEnd;
	    int utrSize = utrEnd - utrStart;
	    if (isRev)
		{
		memcpy(utr5Hits + utr5Pos, hits + utrStart, utrSize * sizeof(*hits));
		memcpy(utr5Covers + utr5Pos, covers + utrStart, utrSize * sizeof(*covers));
		utr5Pos += utrSize;
		}
	    else
		{
		memcpy(utr3Hits + utr3Pos, hits + utrStart, utrSize * sizeof(*hits));
		memcpy(utr3Covers + utr3Pos, covers + utrStart, utrSize * sizeof(*covers));
		utr3Pos += utrSize;
		}
	    }
	}
    assert(utr3Pos == utr3Size);
    assert(utr5Pos == utr5Size);
    assert(cdsAllPos == cdsAllSize);

    tallyHits(&g->utr5, utr5Hits, utr5Covers, utr5Size, isRev);
    tallyHits(&g->utr3, utr3Hits, utr3Covers, utr3Size, isRev);
    tallyHits(&g->cdsAll, cdsAllHits, cdsAllCovers, cdsAllSize, isRev);

    /* Optionally write out file with gene by gene info. */
    if (fParts != NULL)
        {
	/* Write header line first time through. */
	static boolean firstTime = TRUE;
	if (firstTime)
	    {
	    firstTime = FALSE;
	    fprintf(fParts, "#accession\tsize_5\tali_5\tmatch_5\tsize_c\tali_c\tmatch_c\tsize_3\tali_3\tmatch_3\n");
	    }
	fprintf(fParts, "%s\t", gp->name);
	fprintf(fParts, "%d\t%d\t%d\t", utr5Size, 
		countBools(utr5Covers, utr5Size),
		countBools(utr5Hits, utr5Size));
	fprintf(fParts, "%d\t%d\t%d\t", cdsAllSize, 
		countBools(cdsAllCovers, cdsAllSize),
		countBools(cdsAllHits, cdsAllSize));
	fprintf(fParts, "%d\t%d\t%d\n", utr3Size, 
		countBools(utr3Covers, utr3Size),
		countBools(utr3Hits, utr3Size));
	}

    /* Tally upstream/downstream hits. */
	{
	int s1 = gp->txStart - closeHalf;
	int e1 = s1 + closeSize;
	int s2 = gp->txEnd - closeHalf;
	int e2 = s2 + closeSize;
	if (isRev)
	    {
	    tallyInRange(&g->down, hits, covers, chrom->size, gp->txStart - g->baseDown,
		gp->txStart, isRev);
	    tallyInRange(&g->up, hits, covers, chrom->size, gp->txEnd, 
		gp->txEnd + g->baseUp, isRev);
	    tallyInRange(&g->txEnd, hits, covers, chrom->size, s1, e1, isRev);
	    tallyInRange(&g->txStart, hits, covers, chrom->size, s2, e2, isRev);
	    }
	else
	    {
	    tallyInRange(&g->up, hits, covers, chrom->size, gp->txStart - g->baseUp,
		gp->txStart, isRev);
	    tallyInRange(&g->down, hits, covers, chrom->size, gp->txEnd, 
		gp->txEnd + g->baseDown, isRev);
	    tallyInRange(&g->txStart, hits, covers, chrom->size, s1, e1, isRev);
	    tallyInRange(&g->txEnd, hits, covers, chrom->size, s2, e2, isRev);
	    }
	}

    /* Tally hits in coding exons */
    for (exonIx=0; exonIx < gp->exonCount; ++exonIx)
        {
	int eStart = gp->exonStarts[exonIx];
	int eEnd = gp->exonEnds[exonIx];
	/* Single coding exon. */
	if (eStart <= gp->cdsStart && eEnd >= gp->cdsEnd)
	   {
	   eStart = gp->cdsStart;
	   eEnd = gp->cdsEnd;
	   tallyInRange(&g->cdsSingle, hits, covers, chrom->size,
	   		eStart, eEnd, isRev);
	   }
	/* Initial coding exon */
	else if (eStart < gp->cdsStart && eEnd > gp->cdsStart)
	    {
	    int cs = gp->cdsStart - closeHalf;
	    int ce = cs + closeSize;
	    eStart = gp->cdsStart;
	    if (isRev)
	        {
		tallyInRange(&g->tlEnd, hits, covers, chrom->size, cs, ce, isRev);
		tallyInRange(&g->cdsLast, hits, covers, chrom->size, 
			eStart, eEnd, isRev);
		}
	    else
	        {
		tallyInRange(&g->tlStart, hits, covers, chrom->size, cs, ce, isRev);
		tallyInRange(&g->cdsFirst, hits, covers, chrom->size, 
			eStart, eEnd, isRev);
		}
	    }
	/* Final coding exon */
	else if (eStart < gp->cdsEnd && eEnd > gp->cdsEnd)
	    {
	    int cs = gp->cdsEnd - closeHalf;
	    int ce = cs + closeSize;
	    eEnd = gp->cdsEnd;
	    if (isRev)
	        {
		tallyInRange(&g->tlStart, hits, covers, chrom->size, cs, ce, isRev);
		tallyInRange(&g->cdsFirst, hits, covers, chrom->size, 
			eStart, eEnd, isRev);
		}
	    else
	        {
		tallyInRange(&g->tlEnd, hits, covers, chrom->size, cs, ce, isRev);
		tallyInRange(&g->cdsLast, hits, covers, chrom->size, 
			eStart, eEnd, isRev);
		}
	    }
	/* Middle (but not only) coding exon */
	else if (eStart >= gp->cdsStart && eEnd <= gp->cdsEnd)
	    {
	    tallyInRange(&g->cdsMiddle, hits, covers, chrom->size, eStart, eEnd, isRev);
	    }
	else
	    {
	    }
	}
	

    /* Tally hits in introns and splice sites. */
    for (exonIx=1; exonIx<gp->exonCount; ++exonIx)
        {
	int iStart = gp->exonEnds[exonIx-1];
	int iEnd = gp->exonStarts[exonIx];
	int s1 = iStart - closeHalf;
	int e1 = s1 + closeSize;
	int s2 = iEnd - closeHalf;
	int e2 = s2 + closeSize;
	if (isRev)
	    {
	    tallyInRange(&g->splice3, hits, covers, chrom->size, 
		    s1, e1, isRev);
	    tallyInRange(&g->splice5, hits, covers, chrom->size, 
		    s2, e2, isRev);
	    }
	else
	    {
	    tallyInRange(&g->splice5, hits, covers, chrom->size, 
		    s1, e1, isRev);
	    tallyInRange(&g->splice3, hits, covers, chrom->size, 
		    s2, e2, isRev);
	    }
	tallyInRange(&g->intron, hits, covers, chrom->size, iStart, iEnd, isRev);
	}
    freez(&utr5Hits);
    freez(&utr3Hits);
    freez(&cdsAllHits);
    freez(&utr5Covers);
    freez(&utr3Covers);
    freez(&cdsAllCovers);
    }
freez(&hits);
freez(&covers);
lineFileClose(&lf);
}