void polyInfo(char *pslFile, char *genoFile, char *estFile, char *outputFile) /* polyInfo - Collect info on polyAdenylation signals etc. */ { struct hash *pslHash = NULL; struct hash *genoHash = loadGeno(genoFile); static struct dnaSeq est; struct lineFile *lf = NULL; FILE *f = NULL; pslHash = pslIntoHash(pslFile); lf = lineFileOpen(estFile, TRUE); f = mustOpen(outputFile, "w"); while (faSpeedReadNext(lf, &est.dna, &est.size, &est.name)) { struct pslList *pl; struct psl *psl; struct estOrientInfo ei; if ((pl = hashFindVal(pslHash, est.name)) != NULL) { for (psl = pl->list; psl != NULL; psl = psl->next) { struct dnaSeq *geno = hashMustFindVal(genoHash, psl->tName); if (psl->tSize != geno->size) errAbort("psl generated on a different version of the genome"); ZeroVar(&ei); fillInEstInfo(&ei, &est, geno, psl); estOrientInfoTabOut(&ei, f); } } } }
void pslIntronsOnly(char *inPslName, char *genoFile, char *outPslName) /* pslIntronsOnly - Filter psl files to only include those with introns. */ { struct lineFile *lf = NULL; FILE *outFile = NULL; struct hash *genoHash = loadGeno(genoFile); struct psl *psl; int count = 0, intronCount = 0; lf = pslFileOpen(inPslName); outFile = mustOpen(outPslName, "w"); while ((psl = pslNext(lf)) != NULL) { struct dnaSeq *geno = hashMustFindVal(genoHash, psl->tName); if (pslHasIntron(psl, geno, 0)) { ++intronCount; pslTabOut(psl, outFile); } pslFree(&psl); ++count; } carefulClose(&outFile); lineFileClose(&lf); printf("%d of %d in %s have introns\n", intronCount, count, inPslName); }