void pslIntronsOnly(char *inPslName, char *genoFile, char *outPslName)
/* pslIntronsOnly - Filter psl files to only include those with introns. */
{
struct lineFile *lf = NULL;
FILE *outFile = NULL;
struct hash *genoHash = loadGeno(genoFile);
struct psl *psl;
int count = 0, intronCount = 0;

lf = pslFileOpen(inPslName);
outFile = mustOpen(outPslName, "w");
while ((psl = pslNext(lf)) != NULL)
    {
    struct dnaSeq *geno = hashMustFindVal(genoHash, psl->tName);
    if (pslHasIntron(psl, geno, 0))
        {
	++intronCount;
	pslTabOut(psl, outFile);
	}
    pslFree(&psl);
    ++count;
    }
carefulClose(&outFile);
lineFileClose(&lf);
printf("%d of %d in %s have introns\n", intronCount, count, inPslName);
}
Example #2
0
struct altGraphX *agFromGp(char *db, struct genePred *gp, struct sqlConnection *conn, 
			   int maxGap, FILE *out)
/** Create an altGraphX record by clustering psl records within coordinates
    specified by genePred record. */
{
struct altGraphX *ag = NULL;
struct dnaSeq *genoSeq = NULL;
struct ggMrnaAli *maList=NULL, *ma=NULL, *maNext=NULL, *maSameStrand=NULL;
struct psl *pslList = NULL, *psl = NULL, *pslCluster = NULL, *pslNext = NULL;
char *chrom = gp->chrom;
int chromStart = BIGNUM;
int chromEnd = -1;

verbose(2, "agFromGp on %s %s:%d-%d\n", gp->name, gp->chrom, gp->txStart, gp->txEnd);

pslList = getPsls(gp, conn);
verbose(3, "  got %d psls\n", slCount(pslList));
if(slCount(pslList) == 0)
    {
    verbose(2, "No available alignments for %s.", gp->name);
    return NULL;
    }
/* expand to find the furthest boundaries of alignments */
expandToMaxAlignment(pslList, chrom, &chromStart, &chromEnd);
verbose(3, "  expanded to %s:%d-%d\n", chrom, chromStart, chromEnd);

/* get the sequence */
genoSeq = dnaFromChrom(db, chrom, chromStart, chromEnd, dnaLower);

for(psl = pslList; psl != NULL; psl = pslNext)
    {
    pslNext = psl->next;
    if(singleExonOk || pslHasIntron(psl, genoSeq, chromStart))
	{
	slAddHead(&pslCluster, psl);
	}
    else 
	{
	if(!useChromKeeper)
	    pslFree(&psl);
	}
    }
verbose(3, "  got %d psls after intron/singleExon check\n", slCount(pslCluster));
/* load and check the alignments */
maList = pslListToGgMrnaAliList(pslCluster, gp->chrom, chromStart, chromEnd, genoSeq, maxGap);
verbose(3, "  got %d in maList\n", slCount(maList));

for(ma = maList; ma != NULL; ma = maNext)
    {
    maNext = ma->next;
    verbose(4, "      ma->strand %s, gp->strand %s\n", ma->strand, gp->strand);
    if(ma->strand[0] == gp->strand[0])
	{
	slSafeAddHead(&maSameStrand, ma);
	}
    else
	ggMrnaAliFree(&ma);
    }
slReverse(&maSameStrand);

verbose(3, "  got %d in ma on same strand\n", slCount(maSameStrand));

/* If there is a cluster to work with create an geneGraph */
if(maSameStrand != NULL)
    {
    ag = agFromAlignments(db, maSameStrand, genoSeq, conn, chromStart, chromEnd,  out);
    }
else
    {
    dnaSeqFree(&genoSeq);
    ggMrnaAliFreeList(&maSameStrand);
    }

/* Only free psls if not using cache... */
if(!useChromKeeper)
    pslFreeList(&pslCluster);
return ag;
}