// 1 if a better than b, 0 if equivalent, -1 if a worse than b int compareProcessEntries(processEntry_s* a, processEntry_s* b, bool* requirements) { if(!a || !b)return 0; int cnt_a = countBools(a->capabilities, requirements, NUM_CAPABILITIES); int cnt_b = countBools(b->capabilities, requirements, NUM_CAPABILITIES); if(cnt_a > cnt_b)return 1; else if(cnt_a < cnt_b)return -1; else if(a->processId == -1)return 1; else if(b->processId == -1)return -1; return 0; }
void ggcChrom(struct chromGenes *chrom, char *axtFile, struct ggcInfo *g, struct hash *restrictHash, FILE *fParts) /* Tabulate matches on chromosome. */ { struct lineFile *lf = lineFileOpen(axtFile, TRUE); bool *hits, *covers; int hitCount = 0, coverCount = 0; struct axt *axt; struct genePred *gp; int closeSize = g->closeSize; int closeHalf = closeSize/2; /* Build up array of booleans - one per base - which are * 1's where mouse/human align and bases match, zero * elsewhere. */ AllocArray(hits, chrom->size); AllocArray(covers, chrom->size); printf("%s (%d bases)\n", chrom->name, chrom->size); while ((axt = axtRead(lf)) != NULL) { int tPos = axt->tStart; int symCount = axt->symCount, i; char t, q, *tSym = axt->tSym, *qSym = axt->qSym; if (axt->tEnd > chrom->size) errAbort("tEnd %d, chrom size %d in %s", axt->tEnd, chrom->size, axtFile); if (axt->tStrand == '-') errAbort("Can't handle minus strand on target in %s", axtFile); for (i=0; i<symCount; ++i) { t = tSym[i]; if (t != '-') { q = qSym[i]; if (toupper(t) == toupper(q)) { hits[tPos] = TRUE; ++hitCount; } if (q == '-') covers[tPos] = 1; else covers[tPos] = 2; ++tPos; } } axtFree(&axt); } for (gp = chrom->geneList; gp != NULL; gp = gp->next) { int exonIx; int utr3Size = 0, utr5Size = 0, cdsAllSize = 0; int utr3Pos = 0, utr5Pos = 0, cdsAllPos = 0; bool *utr3Hits = NULL, *utr3Covers = NULL; bool *utr5Hits = NULL, *utr5Covers = NULL; bool *cdsAllHits = NULL, *cdsAllCovers = NULL; bool isRev = (gp->strand[0] == '-'); /* Filter out genes not in restrict hash if any. */ ++totalGenes; if (restrictHash != NULL) if (!hashLookup(restrictHash, gp->name)) continue; ++reviewedGenes; /* Filter out genes without meaningful UTRs */ if (gp->cdsStart - gp->txStart < g->closeSize/2 || gp->txEnd - gp->cdsEnd < g->closeSize/2) continue; ++genesUsed; /* Total up UTR and CDS sizes. */ for (exonIx=0; exonIx<gp->exonCount; ++exonIx) { int eStart = gp->exonStarts[exonIx]; int eEnd = gp->exonEnds[exonIx]; int eSize = eEnd - eStart; int oneUtr, oneCds; oneCds = rangeIntersection(gp->cdsStart, gp->cdsEnd, eStart, eEnd); if (oneCds > 0) { cdsAllSize += oneCds; } if (eStart < gp->cdsStart) { int utrStart = eStart; int utrEnd = min(gp->cdsStart, eEnd); int utrSize = utrEnd - utrStart; if (isRev) utr3Size += utrSize; else utr5Size += utrSize; } if (eEnd > gp->cdsEnd) { int utrStart = max(gp->cdsEnd, eStart); int utrEnd = eEnd; int utrSize = utrEnd - utrStart; if (isRev) utr5Size += utrSize; else utr3Size += utrSize; } } /* Condense hits from UTRs and CDSs */ if (utr5Size > 0) { AllocArray(utr5Hits, utr5Size); AllocArray(utr5Covers, utr5Size); } if (utr3Size > 0) { AllocArray(utr3Hits, utr3Size); AllocArray(utr3Covers, utr3Size); } if (cdsAllSize > 0) { AllocArray(cdsAllHits, cdsAllSize); AllocArray(cdsAllCovers, cdsAllSize); } for (exonIx=0; exonIx<gp->exonCount; ++exonIx) { int eStart = gp->exonStarts[exonIx]; int eEnd = gp->exonEnds[exonIx]; int eSize = eEnd - eStart; int oneUtr, oneCds; oneCds = rangeIntersection(gp->cdsStart, gp->cdsEnd, eStart, eEnd); if (oneCds > 0) { int cdsStart = eStart; int cdsEnd = gp->cdsEnd; if (cdsStart < gp->cdsStart) cdsStart = gp->cdsStart; memcpy(cdsAllHits + cdsAllPos, hits + cdsStart, oneCds * sizeof(*hits)); memcpy(cdsAllCovers + cdsAllPos, covers + cdsStart, oneCds * sizeof(*covers)); cdsAllPos += oneCds; } if (eStart < gp->cdsStart) { int utrStart = eStart; int utrEnd = min(gp->cdsStart, eEnd); int utrSize = utrEnd - utrStart; if (isRev) { memcpy(utr3Hits + utr3Pos, hits + utrStart, utrSize * sizeof(*hits)); memcpy(utr3Covers + utr3Pos, covers + utrStart, utrSize * sizeof(*covers)); utr3Pos += utrSize; } else { memcpy(utr5Hits + utr5Pos, hits + utrStart, utrSize * sizeof(*hits)); memcpy(utr5Covers + utr5Pos, covers + utrStart, utrSize * sizeof(*covers)); utr5Pos += utrSize; } } if (eEnd > gp->cdsEnd) { int utrStart = max(gp->cdsEnd, eStart); int utrEnd = eEnd; int utrSize = utrEnd - utrStart; if (isRev) { memcpy(utr5Hits + utr5Pos, hits + utrStart, utrSize * sizeof(*hits)); memcpy(utr5Covers + utr5Pos, covers + utrStart, utrSize * sizeof(*covers)); utr5Pos += utrSize; } else { memcpy(utr3Hits + utr3Pos, hits + utrStart, utrSize * sizeof(*hits)); memcpy(utr3Covers + utr3Pos, covers + utrStart, utrSize * sizeof(*covers)); utr3Pos += utrSize; } } } assert(utr3Pos == utr3Size); assert(utr5Pos == utr5Size); assert(cdsAllPos == cdsAllSize); tallyHits(&g->utr5, utr5Hits, utr5Covers, utr5Size, isRev); tallyHits(&g->utr3, utr3Hits, utr3Covers, utr3Size, isRev); tallyHits(&g->cdsAll, cdsAllHits, cdsAllCovers, cdsAllSize, isRev); /* Optionally write out file with gene by gene info. */ if (fParts != NULL) { /* Write header line first time through. */ static boolean firstTime = TRUE; if (firstTime) { firstTime = FALSE; fprintf(fParts, "#accession\tsize_5\tali_5\tmatch_5\tsize_c\tali_c\tmatch_c\tsize_3\tali_3\tmatch_3\n"); } fprintf(fParts, "%s\t", gp->name); fprintf(fParts, "%d\t%d\t%d\t", utr5Size, countBools(utr5Covers, utr5Size), countBools(utr5Hits, utr5Size)); fprintf(fParts, "%d\t%d\t%d\t", cdsAllSize, countBools(cdsAllCovers, cdsAllSize), countBools(cdsAllHits, cdsAllSize)); fprintf(fParts, "%d\t%d\t%d\n", utr3Size, countBools(utr3Covers, utr3Size), countBools(utr3Hits, utr3Size)); } /* Tally upstream/downstream hits. */ { int s1 = gp->txStart - closeHalf; int e1 = s1 + closeSize; int s2 = gp->txEnd - closeHalf; int e2 = s2 + closeSize; if (isRev) { tallyInRange(&g->down, hits, covers, chrom->size, gp->txStart - g->baseDown, gp->txStart, isRev); tallyInRange(&g->up, hits, covers, chrom->size, gp->txEnd, gp->txEnd + g->baseUp, isRev); tallyInRange(&g->txEnd, hits, covers, chrom->size, s1, e1, isRev); tallyInRange(&g->txStart, hits, covers, chrom->size, s2, e2, isRev); } else { tallyInRange(&g->up, hits, covers, chrom->size, gp->txStart - g->baseUp, gp->txStart, isRev); tallyInRange(&g->down, hits, covers, chrom->size, gp->txEnd, gp->txEnd + g->baseDown, isRev); tallyInRange(&g->txStart, hits, covers, chrom->size, s1, e1, isRev); tallyInRange(&g->txEnd, hits, covers, chrom->size, s2, e2, isRev); } } /* Tally hits in coding exons */ for (exonIx=0; exonIx < gp->exonCount; ++exonIx) { int eStart = gp->exonStarts[exonIx]; int eEnd = gp->exonEnds[exonIx]; /* Single coding exon. */ if (eStart <= gp->cdsStart && eEnd >= gp->cdsEnd) { eStart = gp->cdsStart; eEnd = gp->cdsEnd; tallyInRange(&g->cdsSingle, hits, covers, chrom->size, eStart, eEnd, isRev); } /* Initial coding exon */ else if (eStart < gp->cdsStart && eEnd > gp->cdsStart) { int cs = gp->cdsStart - closeHalf; int ce = cs + closeSize; eStart = gp->cdsStart; if (isRev) { tallyInRange(&g->tlEnd, hits, covers, chrom->size, cs, ce, isRev); tallyInRange(&g->cdsLast, hits, covers, chrom->size, eStart, eEnd, isRev); } else { tallyInRange(&g->tlStart, hits, covers, chrom->size, cs, ce, isRev); tallyInRange(&g->cdsFirst, hits, covers, chrom->size, eStart, eEnd, isRev); } } /* Final coding exon */ else if (eStart < gp->cdsEnd && eEnd > gp->cdsEnd) { int cs = gp->cdsEnd - closeHalf; int ce = cs + closeSize; eEnd = gp->cdsEnd; if (isRev) { tallyInRange(&g->tlStart, hits, covers, chrom->size, cs, ce, isRev); tallyInRange(&g->cdsFirst, hits, covers, chrom->size, eStart, eEnd, isRev); } else { tallyInRange(&g->tlEnd, hits, covers, chrom->size, cs, ce, isRev); tallyInRange(&g->cdsLast, hits, covers, chrom->size, eStart, eEnd, isRev); } } /* Middle (but not only) coding exon */ else if (eStart >= gp->cdsStart && eEnd <= gp->cdsEnd) { tallyInRange(&g->cdsMiddle, hits, covers, chrom->size, eStart, eEnd, isRev); } else { } } /* Tally hits in introns and splice sites. */ for (exonIx=1; exonIx<gp->exonCount; ++exonIx) { int iStart = gp->exonEnds[exonIx-1]; int iEnd = gp->exonStarts[exonIx]; int s1 = iStart - closeHalf; int e1 = s1 + closeSize; int s2 = iEnd - closeHalf; int e2 = s2 + closeSize; if (isRev) { tallyInRange(&g->splice3, hits, covers, chrom->size, s1, e1, isRev); tallyInRange(&g->splice5, hits, covers, chrom->size, s2, e2, isRev); } else { tallyInRange(&g->splice5, hits, covers, chrom->size, s1, e1, isRev); tallyInRange(&g->splice3, hits, covers, chrom->size, s2, e2, isRev); } tallyInRange(&g->intron, hits, covers, chrom->size, iStart, iEnd, isRev); } freez(&utr5Hits); freez(&utr3Hits); freez(&cdsAllHits); freez(&utr5Covers); freez(&utr3Covers); freez(&cdsAllCovers); } freez(&hits); freez(&covers); lineFileClose(&lf); }