void pslIntronsOnly(char *inPslName, char *genoFile, char *outPslName) /* pslIntronsOnly - Filter psl files to only include those with introns. */ { struct lineFile *lf = NULL; FILE *outFile = NULL; struct hash *genoHash = loadGeno(genoFile); struct psl *psl; int count = 0, intronCount = 0; lf = pslFileOpen(inPslName); outFile = mustOpen(outPslName, "w"); while ((psl = pslNext(lf)) != NULL) { struct dnaSeq *geno = hashMustFindVal(genoHash, psl->tName); if (pslHasIntron(psl, geno, 0)) { ++intronCount; pslTabOut(psl, outFile); } pslFree(&psl); ++count; } carefulClose(&outFile); lineFileClose(&lf); printf("%d of %d in %s have introns\n", intronCount, count, inPslName); }
struct altGraphX *agFromGp(char *db, struct genePred *gp, struct sqlConnection *conn, int maxGap, FILE *out) /** Create an altGraphX record by clustering psl records within coordinates specified by genePred record. */ { struct altGraphX *ag = NULL; struct dnaSeq *genoSeq = NULL; struct ggMrnaAli *maList=NULL, *ma=NULL, *maNext=NULL, *maSameStrand=NULL; struct psl *pslList = NULL, *psl = NULL, *pslCluster = NULL, *pslNext = NULL; char *chrom = gp->chrom; int chromStart = BIGNUM; int chromEnd = -1; verbose(2, "agFromGp on %s %s:%d-%d\n", gp->name, gp->chrom, gp->txStart, gp->txEnd); pslList = getPsls(gp, conn); verbose(3, " got %d psls\n", slCount(pslList)); if(slCount(pslList) == 0) { verbose(2, "No available alignments for %s.", gp->name); return NULL; } /* expand to find the furthest boundaries of alignments */ expandToMaxAlignment(pslList, chrom, &chromStart, &chromEnd); verbose(3, " expanded to %s:%d-%d\n", chrom, chromStart, chromEnd); /* get the sequence */ genoSeq = dnaFromChrom(db, chrom, chromStart, chromEnd, dnaLower); for(psl = pslList; psl != NULL; psl = pslNext) { pslNext = psl->next; if(singleExonOk || pslHasIntron(psl, genoSeq, chromStart)) { slAddHead(&pslCluster, psl); } else { if(!useChromKeeper) pslFree(&psl); } } verbose(3, " got %d psls after intron/singleExon check\n", slCount(pslCluster)); /* load and check the alignments */ maList = pslListToGgMrnaAliList(pslCluster, gp->chrom, chromStart, chromEnd, genoSeq, maxGap); verbose(3, " got %d in maList\n", slCount(maList)); for(ma = maList; ma != NULL; ma = maNext) { maNext = ma->next; verbose(4, " ma->strand %s, gp->strand %s\n", ma->strand, gp->strand); if(ma->strand[0] == gp->strand[0]) { slSafeAddHead(&maSameStrand, ma); } else ggMrnaAliFree(&ma); } slReverse(&maSameStrand); verbose(3, " got %d in ma on same strand\n", slCount(maSameStrand)); /* If there is a cluster to work with create an geneGraph */ if(maSameStrand != NULL) { ag = agFromAlignments(db, maSameStrand, genoSeq, conn, chromStart, chromEnd, out); } else { dnaSeqFree(&genoSeq); ggMrnaAliFreeList(&maSameStrand); } /* Only free psls if not using cache... */ if(!useChromKeeper) pslFreeList(&pslCluster); return ag; }