Beispiel #1
0
void pslRecalcMatch(char *inName, char *targetName, char *queryName, 
	char *outName)
/* pslRecalcMatch - Recalculate match,mismatch,repMatch columns in psl file.  
 * This can be useful if the psl went through pslMap, or if you've added 
 * lower-case repeat masking after the fact. */
{
struct nibTwoCache *tCache = nibTwoCacheNew(targetName);
struct dnaSeq *qSeqList = dnaLoadAll(queryName);
struct hash *qHash = dnaSeqHash(qSeqList);
struct psl *psl;
struct lineFile *lf = pslFileOpen(inName);
FILE *f = mustOpen(outName, "w");

while ((psl = pslNext(lf)) != NULL)
    {
    int tSize;
    struct dnaSeq *tSeqPart = nibTwoCacheSeqPart(tCache,
    	psl->tName, psl->tStart, psl->tEnd - psl->tStart, &tSize);
    struct dnaSeq *qSeq = hashMustFindVal(qHash, getQName(psl->qName));
    recalcMatches(psl, tSeqPart, psl->tStart, qSeq);
    pslTabOut(psl, f);
    dnaSeqFree(&tSeqPart);
    }
carefulClose(&f);
lineFileClose(&lf);
}
Beispiel #2
0
void pslCoverage(char *inLst, char *inPsl, double aliRatio, int trimSize, char *outName,
	char *misAsmName)
/* Analyse inName and put best alignments for eacmRNA in estAliName.
 * Put repeat info in repName. */
{
struct lineFile *in = pslFileOpen(inPsl);
FILE *out = mustOpen(outName, "a");
FILE *misAsm = mustOpen(misAsmName, "w");
struct psl *pslList = NULL, *psl;
char lastName[256];
int threshold = round((1.0 - (1.0 - aliRatio)*2)*1000);
struct hash *probeHash;
struct probe *probeList;

readProbeList(inLst, &probeList, &probeHash);
printf("Found %d probes in %s\n", slCount(probeList), inLst);
printf("Processing %s percent ID %f%% threshold %d\n", inPsl, aliRatio*100, threshold);
strcpy(lastName, "");
while ((psl = pslNext(in)) != NULL)
    {
    if (!sameString(lastName, psl->qName))
	{
	doOneAcc(lastName, pslList, threshold, trimSize, probeHash, misAsm);
	finishList(&pslList);
	strcpy(lastName, psl->qName);
	}
    slAddHead(&pslList, psl);
    }
doOneAcc(lastName, pslList, threshold, trimSize, probeHash, misAsm);
finishList(&pslList);
lineFileClose(&in);

summarizeProbeList(probeList, trimSize, aliRatio, out, misAsm);
fclose(out);
}
Beispiel #3
0
void migratePsls(struct migrateAligns* migrate, unsigned pslFileType,
                 struct gbEntryCnts* counts, FILE* outPslFh)
/* Migrate selected PSL records */
{
char inPsl[PATH_LEN];
struct lineFile* inPslLf;
struct psl* psl;

gbAlignedGetPath(migrate->prevSelect, gPslFileGzExt[pslFileType], NULL, inPsl);

/* It's possible to end up here and not have a file if none of the sequences
 * aligned */
if (fileExists(inPsl))
    {
    gbVerbEnter(2, "migrating %ss from %s", gPslFileExt[pslFileType], inPsl);
    inPslLf = gzLineFileOpen(inPsl);
    while ((psl = pslNext(inPslLf)) != NULL)
        {
        migratePsl(migrate, pslFileType, counts, psl, inPsl, outPslFh);
        pslFree(&psl);
        }
    gzLineFileClose(&inPslLf);
    gbVerbLeave(2, "migrating %ss from %s", gPslFileExt[pslFileType], inPsl);
    }
}
void pslCopyInClones(char *listFile, char *partDir, char *outName)
/* Copy in the .psl files corresponding to the clones named in listFile. */
{
struct slName *inList, *inEl;
FILE *out = mustOpen(outName, "w");
struct psl *psl;
int pslCount = 0;
int fileCount = 0;

pslWriteHead(out);
inList = getFileList(listFile, partDir);
for (inEl = inList; inEl != NULL; inEl = inEl->next)
    {
    char *inName = inEl->name;
    struct lineFile *lf = pslFileOpen(inName);
    ++fileCount;
    while ((psl = pslNext(lf)) != NULL)
	{
	pslTabOut(psl, out);
	pslFree(&psl);
	++pslCount;
	}
    lineFileClose(&lf);
    }
printf("%d psls in %d files written to %s\n", pslCount, fileCount, outName);
fclose(out);
}
void pslIntronsOnly(char *inPslName, char *genoFile, char *outPslName)
/* pslIntronsOnly - Filter psl files to only include those with introns. */
{
struct lineFile *lf = NULL;
FILE *outFile = NULL;
struct hash *genoHash = loadGeno(genoFile);
struct psl *psl;
int count = 0, intronCount = 0;

lf = pslFileOpen(inPslName);
outFile = mustOpen(outPslName, "w");
while ((psl = pslNext(lf)) != NULL)
    {
    struct dnaSeq *geno = hashMustFindVal(genoHash, psl->tName);
    if (pslHasIntron(psl, geno, 0))
        {
	++intronCount;
	pslTabOut(psl, outFile);
	}
    pslFree(&psl);
    ++count;
    }
carefulClose(&outFile);
lineFileClose(&lf);
printf("%d of %d in %s have introns\n", intronCount, count, inPslName);
}
Beispiel #6
0
void pslToBed(char *pslFile, char *bedFile, struct hash *cdsHash, bool doPosName)
/* pslToBed -- tranform a psl format file to a bed format file */
{
struct lineFile *pslLf = pslFileOpen(pslFile);
FILE *bedFh = mustOpen(bedFile, "w");
struct psl *psl;

while ((psl = pslNext(pslLf)) != NULL)
    {
    struct bed *bed = bedFromPsl(psl);
    if (doPosName)
        {
        char *newName = needMem(512);
        safef(newName, 512, "%s:%d-%d", psl->qName, psl->qStart, psl->qEnd);
        freeMem(bed->name);
        bed->name = newName;
        }

    if (cdsHash)
	{
	struct cds *cds = hashFindVal(cdsHash, psl->qName);
	if (cds == NULL)
	    bed->thickStart = bed->thickEnd = bed->chromStart;
	else
	    setThick(psl, bed, cds);
	}
    bedTabOutN(bed, 12, bedFh);
    bedFree(&bed);
    pslFree(&psl);
    }
carefulClose(&bedFh);
lineFileClose(&pslLf);
}
struct seqPair *readPslBlocks(char *fileName, struct hash *pairHash, FILE *f)
/* Read in psl file and parse blocks into pairHash */
{
struct seqPair *spList = NULL, *sp;
struct lineFile *lf = pslFileOpenWithUniqueMeta(fileName, f);
struct dyString *dy = newDyString(512);
struct psl *psl;

while ((psl = pslNext(lf)) != NULL)
    {
    dyStringClear(dy);
    dyStringPrintf(dy, "%s%s%s", psl->qName, psl->strand, psl->tName);
    sp = hashFindVal(pairHash, dy->string);
    if (sp == NULL)
        {
	AllocVar(sp);
	slAddHead(&spList, sp);
	hashAddSaveName(pairHash, dy->string, sp, &sp->name);
	sp->qName = cloneString(psl->qName);
	sp->tName = cloneString(psl->tName);
	sp->qStrand = psl->strand[0];
	}
    addPslBlocks(&sp->blockList, psl);
    sp->axtCount += 1;
    pslFree(&psl);
    }

lineFileClose(&lf);
dyStringFree(&dy);
return spList;
}
static void pslMap(char* inPslFile, char *mapFile, char *outPslFile)
/* project inPsl query through mapFile query to mapFile target */
{
struct chromBins *mapAlns;
struct psl* inPsl;
struct lineFile* inPslLf = pslFileOpen(inPslFile);
FILE *outPslFh, *mapInfoFh = NULL, *mappingPslFh = NULL;

if (chainMapFile)
    mapAlns = loadMapChains(mapFile);
else
    mapAlns = loadMapPsls(mapFile);

outPslFh = mustOpen(outPslFile, "w");
if (mapInfoFile != NULL)
    {
    mapInfoFh = mustOpen(mapInfoFile, "w");
    fputs(mapInfoHdr, mapInfoFh);
    }
if (mappingPslFile != NULL)
    mappingPslFh = mustOpen(mappingPslFile, "w");
while ((inPsl = pslNext(inPslLf)) != NULL)
    {
    if (swapIn)
        pslSwap(inPsl, FALSE);
    mapQueryPsl(inPsl, mapAlns, outPslFh, mapInfoFh, mappingPslFh);
    pslFree(&inPsl);
    }
carefulClose(&mappingPslFh);
carefulClose(&mapInfoFh);
carefulClose(&outPslFh);
lineFileClose(&inPslLf);
}
Beispiel #9
0
void sgName(char *database, char *protDb,  char *refPsl, char *outAssoc)
/* sgName - builds association table between knownPep and gene common name. */
{
struct sqlConnection *conn = sqlConnect(database);
//struct sqlConnection *conn2 = sqlConnect("swissProt");
char *words[1], **row;
FILE *f = mustOpen(outAssoc, "w");
struct lineFile *pslLf = pslFileOpen(refPsl);
int count = 0, found = 0;
char query[256];
struct psl *psl;
char *swiss = NULL;

while ((psl = pslNext(pslLf)) != NULL)
    {
    fprintf(f,"%s\t%s\t%s:%d-%d\t",psl->qName, lookupName(conn,psl->qName), 
	psl->tName, psl->tStart, psl->tEnd);
    fprintf(f,"%s\n", swiss = getSwiss(conn, psl->qName));
    }
/*
while (lineFileRow(lf, words))
    {
    fprintf(f,"%s\t%s\n",words[0], lookupName(conn,words[0])); //, getSwiss(conn, words[0]));
    }
    */
hFreeConn(&conn);
}
Beispiel #10
0
void copyPslToTab(char *pslFile, char *tabFile)
/* copy a single PSL to the tab file */
{
struct psl *psl;
struct lineFile *lf = pslFileOpen(pslFile);
struct pipeline *pl = NULL;
FILE *tabFh = NULL;
if (noSort)
    tabFh = mustOpen(tabFile, "w");
else
    {
    if (pslCreateOpts & PSL_WITH_BIN)
	pl = pipelineOpen(outPipeBin, pipelineWrite, tabFile, NULL);
    else
	pl = pipelineOpen(outPipeNoBin, pipelineWrite, tabFile, NULL);
    tabFh = pipelineFile(pl);
    }
while ((psl = pslNext(lf)) != NULL)
    {
    if (pslCreateOpts & PSL_WITH_BIN)
        fprintf(tabFh, "%u\t", hFindBin(psl->tStart, psl->tEnd));
    pslTabOut(psl, tabFh);
    pslFree(&psl);
    }
lineFileClose(&lf);
if (noSort)
    carefulClose(&tabFh);
else
    {
    pipelineWait(pl);
    pipelineFree(&pl);
    }
}
Beispiel #11
0
static void pslAlignStats(char *pslFile, char *statsFile, char *querySizeFile)
/* collect and output per-alignment stats */
{
struct hash* querySizesTbl = (querySizeFile != NULL)
    ? querySizeCntLoad(querySizeFile) : NULL;
struct lineFile *pslLf = pslFileOpen(pslFile);
FILE *fh = mustOpen(statsFile, "w");
struct psl* psl;

fputs(alnStatsHdr, fh);
while ((psl = pslNext(pslLf)) != NULL)
    {
    fprintf(fh, alnStatsFmt, psl->qName, psl->qSize, psl->tName, psl->tStart, psl->tEnd,
            calcIdent(psl), calcQCover(psl), calcRepMatch(psl), calcTCover(psl));
    if (querySizesTbl != NULL)
        querySizeCntGet(querySizesTbl, psl->qName, psl->qSize)->alnCnt++;
    pslFree(&psl);
    }
lineFileClose(&pslLf);

if (querySizesTbl != NULL)
    alignStatsOutputUnaligned(fh, querySizesTbl);

carefulClose(&fh);
}
Beispiel #12
0
void pslCut(char *cutList, char *inPsl, char *outPsl)
/* pslCut - Remove a list of clones from psl file.. */
{
struct hash *cutHash = newHash(0);
struct lineFile *lf = pslFileOpen(inPsl);
FILE *f = mustOpen(outPsl, "w");
struct psl *psl;
char cloneName[128];
int total = 0, cut = 0;

buildCutHash(cutList, cutHash);
pslWriteHead(f);
while ((psl = pslNext(lf)) != NULL)
    {
    fragToCloneName(psl->tName, cloneName);
    if (!hashLookup(cutHash, cloneName))
	{
        pslTabOut(psl, f);
	}
    else
        ++cut;
    ++total;
    pslFree(&psl);
    }
printf("Cut %d of %d\n", cut, total);
}
void fillInPsls(char *pslName, struct hash *pairHash)
/* Read in psl file and save overlaps between indicated pairs
 * in hash. */
{
struct lineFile *lf = pslFileOpen(pslName);
struct psl *psl;
char *pairName;
struct seqPair *pair;
struct seqOver *so;
boolean firstA;
char queryClone[128], targetClone[128];
struct hashEl *hel;

while ((psl = pslNext(lf)) != NULL)
    {
    fragToCloneName(psl->qName, queryClone);
    fragToCloneName(psl->tName, targetClone);
    pairName = makePairName(queryClone, targetClone, &firstA);
    if ((pair = hashFindVal(pairHash, pairName)) != NULL)
	{
	so = (firstA ? &pair->a : &pair->b);
	slAddHead(&so->pslList, psl);
	}
    else
	{
	pslFree(&psl);
	}
    }
}
void pslxToFa(char *pslName, char *faName, char *liftTargetName, char *liftQueryName)
/* pslxToFa - convert pslx to fasta file. */
{
FILE *liftTarget = NULL;
FILE *liftQuery = NULL;
struct lineFile *in = pslFileOpen(pslName);
FILE *out = mustOpen(faName, "w");
struct psl *psl;

if (liftQueryName != NULL)
    liftQuery = mustOpen(liftQueryName, "w");

if (liftTargetName != NULL)
    liftTarget = mustOpen(liftTargetName, "w");

while ((psl = pslNext(in)) != NULL)
    {
    int ii=0;
    //int sumQuery = 0;
    if (liftQuery != NULL)
	{
	fprintf(liftQuery,"%d\t%s/%s_%d_%d\t%ld\t%s\t%d\n",
		psl->qStarts[0], "1", psl->qName,0,psl->blockCount, (long)strlen(psl->qSequence[0]), psl->qName, psl->qSize);
	//sumQuery += strlen(psl->qSequence[0]);
	}
    if (liftTarget != NULL)
	{
	if (psl->strand[1] == '-')
	    fprintf(liftTarget,"%d\t%s/%s_%d_%d\t%ld\t%s\t%d\t%c\n",
		    psl->tSize - psl->tStarts[0], &psl->tName[3], psl->qName,0,psl->blockCount, (long)3*strlen(psl->qSequence[0]), psl->tName, psl->tSize, psl->strand[1]);
	else
	    fprintf(liftTarget,"%d\t%s/%s_%d_%d\t%ld\t%s\t%d\t%c\n",
		    psl->tStarts[0], &psl->tName[3], psl->qName,0,psl->blockCount, (long)3*strlen(psl->qSequence[0]), psl->tName, psl->tSize, psl->strand[1]);
	}
    fprintf(out,">%s_%d_%d\n%s\n",psl->qName, 0, psl->blockCount, psl->qSequence[0]);

    for(ii=1; ii < psl->blockCount; ii++)
	{
	if (liftTarget != NULL)
	    {
	if (psl->strand[1] == '-')
	    fprintf(liftTarget,"%d\t%s/%s_%d_%d\t%ld\t%s\t%d\t%c\n",
		psl->tSize - psl->tStarts[ii], &psl->tName[3], psl->qName,ii,psl->blockCount, (long)3*strlen(psl->qSequence[ii]), psl->tName, psl->tSize, psl->strand[1]);
	else
	    fprintf(liftTarget,"%d\t%s/%s_%d_%d\t%ld\t%s\t%d\t%c\n",
		psl->tStarts[ii], &psl->tName[3], psl->qName,ii,psl->blockCount, (long)3*strlen(psl->qSequence[ii]), psl->tName, psl->tSize, psl->strand[1]);
	    }
	if (liftQuery != NULL)
	    {
	    fprintf(liftQuery,"%d\t%s/%s_%d_%d\t%ld\t%s\t%d\n",
		psl->qStarts[ii], "1", psl->qName,ii,psl->blockCount, (long)strlen(psl->qSequence[ii]), psl->qName, psl->qSize);
	    //sumQuery += strlen(psl->qSequence[ii]);
	    }
	fprintf(out,">%s_%d_%d\n%s\n",psl->qName, ii, psl->blockCount,  psl->qSequence[ii]);
	}
    pslFree(&psl);
    }
}
void pslMrnaCover(char *pslFile, char *faFile)
/* pslMrnaCover - Make histogram of coverage percentage of mRNA in psl. */
{
static int histogram[101];
int i;
int qAli;
struct hash *hash;
struct rnaCover *rcList = NULL, *rc;
struct lineFile *lf = pslFileOpen(pslFile);
struct psl *psl;

/* Build up list of all sequences. */
readFa(faFile, &rcList, &hash);

/* Scan psls and see maximum amount each is aligned. */
while ((psl = pslNext(lf)) != NULL)
    {
    if (psl->qSize >= minSize)
	{
	if ((rc = hashFindVal(hash, psl->qName)) == NULL)
	    errAbort("%s is in %s but not %s", psl->qName, pslFile, faFile);
	if (rc->qSize != psl->qSize)
	    errAbort("%s is %d bytes in %s but %d in %s", psl->qName,
		rc->qSize, faFile, psl->qSize, pslFile);
	qAli = psl->match + psl->repMatch + psl->misMatch;
	if (qAli > rc->qMaxAli)
	   rc->qMaxAli = qAli;
	}
    pslFree(&psl);
    }
lineFileClose(&lf);

/* Open file to keep track of non-aligners */
if (listZero != NULL)
    {
    FILE *f = mustOpen(listZero, "w");
    for (rc = rcList; rc != NULL; rc = rc->next)
	{
	if (rc->qMaxAli == 0)
	    fprintf(f, "%s\t%d\n", rc->name, rc->qSize);
	}
    }

/* Talley up percentage aligning in histogram. */
for (rc = rcList; rc != NULL; rc = rc->next)
    {
    int histIx = roundingScale(100, rc->qMaxAli, rc->qSize);
    assert(histIx <= 100);
    histogram[histIx] += 1;
    }

/* Print out histogram. */
for (i=0; i<=100; ++i)
    {
    printf("%3d%% %6d\n", i, histogram[i]);
    }
}
Beispiel #16
0
static struct psl *pslInputNext(struct pslInput *pi)
/* read next psl */
{
    struct psl *psl = pi->pending;
    if (psl != NULL)
        pi->pending = NULL;
    else
        psl = pslNext(pi->lf);
    return psl;
}
Beispiel #17
0
void exonMap(char *query, char *target, char *output)
/* exonMap - map exons using two psls. */
{
struct lineFile *qlf = pslFileOpen(query);
struct lineFile *tlf = pslFileOpen(target);
struct psl *psl, *pslList, *newPslList, *pslRef;
struct hash *pslHash = newHash(0);  
FILE *outF = mustOpen(output, "w");

while ((psl = pslNext(qlf)) != NULL)
    {
    pslList = hashFindVal(pslHash, psl->qName);
    if (pslList == NULL)
	hashAdd(pslHash, psl->qName, psl);
    else
	{
	psl->next = pslList->next;
	pslList->next = psl;
	}
    }

while ((psl = pslNext(tlf)) != NULL)
    {
    struct psl *newPsl = NULL;

    pslList = hashFindVal(pslHash,psl->qName);
//    if (pslList == NULL)
//	errAbort("can't find %s in query file",psl->qName);

    for(pslRef = pslList; pslRef ; pslRef = pslRef->next )
	{
	if (optionExists("exons"))
	    mapBlocks(pslRef, psl, outPsl, (void *)outF);
	else
	    {
	    newPsl = NULL;
	    mapBlocks(pslRef, psl, addPsl, &newPsl);
	    pslTabOut(newPsl, outF);
	    }
	}
    }
}
Beispiel #18
0
int readAlignments(char *pairsPsl, struct hash *readHash, struct hash *fragHash)
/* Read in alignments and process them into the read->aliList. 
 * Returns number of alignments altogether. */
{
struct lineFile *lf = pslFileOpen(pairsPsl);
struct shortAli *ali;
struct psl *psl;
struct readInfo *rd;
int aliCount = 0;
int dotEvery = 20*1024;
int dotty = dotEvery;
int aliSize;

printf("Reading and processing %s\n", pairsPsl);
for (;;)
    {
    AllocVar(ali);     /* Allocate this first to reduce memory fragmentation. */
    if ((psl = pslNext(lf)) == NULL)
        {
	freeMem(ali);
	break;
	}
    if (filter(psl))
	{
	rd = hashMustFindVal(readHash, psl->qName);
	aliSize = psl->match + psl->repMatch;
	aliSize /= 100;
	if (aliSize < 0) aliSize = 0;
	if (aliSize >= ArraySize(aliSizes)) aliSize = ArraySize(aliSizes)-1;
	aliSizes[aliSize] += 1;
	ali->tName = hashStoreName(fragHash, psl->tName);
	ali->tStart = psl->tStart;
	ali->tEnd = psl->tEnd;
	ali->tSize = psl->tSize;
	ali->strand = psl->strand[0];
	slAddHead(&rd->aliList, ali);
	pslFree(&psl);
	++aliCount;
	}
    else
        {
	pslFree(&psl);
	freeMem(ali);
	}
    if (--dotty <= 0)
	{
	dotty = dotEvery;
        printf(".");
	fflush(stdout);
	}
    }
printf("\n");
return aliCount;
}
void readPslFile (struct lineFile *pf, struct hash **hash)
/* Read in psl file and store contents in a hash keyed by qName */
{
struct hash *pslHash = *hash;
struct psl *psl = pslNext(pf);
char *key = NULL;

while (psl != NULL)
    {
    /* add to hash */
    if (psl != NULL)
        {
        key = createKey(psl->qName, psl->tName, psl->tStart, psl->tEnd);
        /* check if this key exists already, if not then add to hash */
        if (!existsInHash(pslHash, key))    
            hashAdd(pslHash, key, psl);
        }    
    psl = pslNext(pf);
    }
}
Beispiel #20
0
void pslPretty(char *pslName, char *targetList, char *queryList, 
	char *prettyName, boolean axt, char *checkFileName)
/* pslPretty - Convert PSL to human readable output. */
{
struct hash *fileHash = newHash(0);  /* No value. */
struct hash *tHash = newHash(20);  /* seqFilePos value. */
struct hash *qHash = newHash(20);  /* seqFilePos value. */
struct dlList *fileCache = newDlList();
struct lineFile *lf = pslFileOpen(pslName);
FILE *f = mustOpen(prettyName, "w");
FILE *checkFile = NULL;
struct psl *psl;
int dotMod = dot;

if (checkFileName != NULL)
    checkFile = mustOpen(checkFileName, "w");
/* fprintf(stderr,"Scanning %s\n", targetList); */
hashFileList(targetList, fileHash, tHash);
/* fprintf(stderr,"Scanning %s\n", queryList); */
hashFileList(queryList, fileHash, qHash);
/* fprintf(stderr,"Converting %s\n", pslName); */
while ((psl = pslNext(lf)) != NULL)
    {
    if (dot > 0)
        {
	if (--dotMod <= 0)
	   {
	   fprintf(stderr,"."); /* stderr flushes itself */
	   dotMod = dot;
	   }
	}
    prettyOne(psl, qHash, tHash, fileCache, f, axt, checkFile);
    pslFree(&psl);
    }
if (dot > 0)
    fprintf(stderr,"\n");
if (checkFile != NULL)
    {
    fprintf(checkFile,"missLargeStart: %d\n", total_missLargeStart);
    fprintf(checkFile,"missSmallStart: %d\n", total_missSmallStart);
    fprintf(checkFile,"missLargeEnd: %d\n", total_missLargeEnd);
    fprintf(checkFile,"missSmallEnd: %d\n", total_missSmallEnd);
    fprintf(checkFile,"missLargeMiddle: %d\n", total_missLargeMiddle);
    fprintf(checkFile,"missSmallMiddle: %d\n", total_missSmallMiddle);
    fprintf(checkFile,"weirdSplice: %d\n", total_weirdSplice);
    fprintf(checkFile,"doubleGap: %d\n", total_doubleGap);
    fprintf(checkFile,"jumpBack: %d\n", total_jumpBack);
    fprintf(checkFile,"perfect: %d\n", total_rnaPerfect);
    fprintf(checkFile,"total: %d\n", total_rnaCount);
    }
lineFileClose(&lf);
carefulClose(&f);
carefulClose(&checkFile);
}
Beispiel #21
0
void pslToChain(char *pslIn, char *chainOut)
/* pslToChain - Extract multiple psl records. */
{
struct lineFile *lf = pslFileOpen(pslIn);
int chainId = 1;
int ii;
FILE *f = mustOpen(chainOut, "w");
struct psl *psl;
struct chain chain;

while ((psl = pslNext(lf) ) != NULL)
    {
    if (psl->strand[1] == '-') 
        {
        if (ignoreError)
            continue;
        errAbort("PSL record on line %d has '-' for target strand which is not allowed.", lf->lineIx);
        }

    chain.score = pslScore(psl);
    chain.id = chainId++;
    chain.tName = psl->tName;
    chain.tSize = psl->tSize;
    chain.tStart = psl->tStart;
    chain.tEnd = psl->tEnd;
    chain.qName = psl->qName;
    chain.qSize = psl->qSize;
    chain.qStrand = psl->strand[0];

    if (psl->strand[0] == '-')
        {
        chain.qEnd = psl->qSize - psl->qStart;
        chain.qStart = psl->qSize - psl->qEnd;
        }
    else
        {
        chain.qStart = psl->qStart;
        chain.qEnd = psl->qEnd;
        }
    chainWriteHead(&chain,f);

    for(ii=0; ii < psl->blockCount; ii++)
	{
	fprintf(f, "%d", psl->blockSizes[ii]);
	if (ii < psl->blockCount - 1)
	    fprintf(f, "\t%d\t%d", psl->tStarts[ii+1]-(psl->tStarts[ii] + psl->blockSizes[ii]),
		psl->qStarts[ii+1]-(psl->qStarts[ii] + psl->blockSizes[ii]));
	fprintf(f,"\n");
	}


    pslFree(&psl);
    }
}
void pslGlue(char *inNames[], int inCount, char *outName, char *glueName)
/* Reduce a psl file to only the gluing components. */
{
FILE *out;
FILE *glue;
struct psl *pslList = NULL, *psl, *nextPsl;
int i;
struct psl *localList = NULL;
int glueCount = 0;

int pslCount = 0;

printf("Reading");
for (i=0; i<inCount; ++i)
    {
    char *inName = inNames[i];
    struct lineFile *lf = pslFileOpen(inName);
    printf(" %s", inName);
    fflush(stdout);
    while ((psl = pslNext(lf)) != NULL)
        {
        slAddHead(&pslList, psl);
        ++pslCount;
        }
    lineFileClose(&lf);
    }
printf("\n");
slSort(&pslList, pslCmpQuery);

out = mustOpen(outName, "w");
glue = mustOpen(glueName, "w");
pslWriteHead(out);

/* Chop this up into chunks that share the same query. */
for (psl = pslList; psl != NULL; psl = nextPsl)
    {
    nextPsl = psl->next;
    if (localList != NULL)
        {
        if (!sameString(localList->qName, psl->qName))
            {
            glueCount += simpleOut(out, glue, &localList);
            localList = NULL;
            }
        }
    slAddHead(&localList, psl);
    }
glueCount += simpleOut(out, glue, &localList);
printf("Got %d gluing mRNAs out of %d psls in %d bundles %d ltot %d mtot\n",
        glueCount, pslCount, outCount, ltot, mtot);
fclose(out);
fclose(glue);
}
void fixBlastTrack(char *query, char *target, char *outFile)
{
struct lineFile *qlf = pslFileOpen(query);
struct lineFile *tlf = pslFileOpen(target);
struct psl *psl, *queryPsl, *newPslList;
struct hash *pslHash = newHash(0);  
FILE *outStream = mustOpen(outFile, "w");

while ((psl = pslNext(qlf)) != NULL)
    {
    queryPsl = hashFindVal(pslHash, psl->qName);
    if (queryPsl != NULL)
	errAbort("each qName in query psl file must be unique (%s)",psl->qName);

    hashAdd(pslHash, psl->qName, psl);
    }

while ((psl = pslNext(tlf)) != NULL)
    {
    queryPsl = hashFindVal(pslHash, psl->qName);
    if (queryPsl == NULL)
	errAbort("can't find %s in query file",psl->qName);

    if ((queryPsl->qStarts[0] != 0) && (psl->qStarts[0] < queryPsl->blockSizes[0]))
	{
	int qStart, qEnd, tBlock;
	assert(queryPsl->qStart == queryPsl->qStarts[0]);
	qStart =  0;// queryPsl->qStarts[0];
	qEnd = qStart + queryPsl->blockSizes[0];
	psl->qStarts[0] += queryPsl->qStart;
	psl->qStart = psl->qStarts[0];
	tBlock = 1;
	while((tBlock < psl->blockCount) && (psl->qStarts[tBlock] >= qStart) && (psl->qStarts[tBlock] < qEnd))
	    psl->qStarts[tBlock++] += queryPsl->qStart;
	psl->qEnd = psl->qStarts[psl->blockCount - 1] + psl->blockSizes[psl->blockCount - 1];
	}
    pslTabOut(psl, outStream);
    }

}
void pslGlueRna(char *listFile, char *partDir, char *pslName, char *gluName)
/* Reduce a psl files for only the gluing mRNA/EST components. */
{
FILE *pslOut;
FILE *gluOut;
struct psl *pslList = NULL, *psl, *nextPsl;
struct psl *localList = NULL;
int glueCount = 0;
int pslCount = 0;
struct slName *inList, *inEl;

inList = getFileList(listFile, partDir);
for (inEl = inList; inEl != NULL; inEl = inEl->next)
    {
    char *inName = inEl->name;
    struct lineFile *lf = pslFileOpen(inName);
    while ((psl = pslNext(lf)) != NULL)
	{
	slAddHead(&pslList, psl);
	++pslCount;
	}
    lineFileClose(&lf);
    }
slSort(&pslList, pslCmpQuery);

pslOut = mustOpen(pslName, "w");
gluOut = mustOpen(gluName, "w");
pslWriteHead(pslOut);

/* Chop this up into chunks that share the same query. */
for (psl = pslList; psl != NULL; psl = nextPsl)
    {
    nextPsl = psl->next;
    if (localList != NULL)
	{
	if (!sameString(localList->qName, psl->qName))
	    {
	    glueCount += output(pslOut, gluOut, &localList);
	    localList = NULL;
	    }
	}
    slAddHead(&localList, psl);
    }
glueCount += output(pslOut, gluOut, &localList);
printf("Got %d gluing mRNAs out of %d psls in %d bundles %d ltot %d mtot to %s\n", 
	glueCount, pslCount, outCount, ltot, mtot, gluName);
fclose(pslOut);
fclose(gluOut);
}
static void pslToPslx(char *inPslFile, char *qSeqSpec, char *tSeqSpec, char *outPslFile)
/* pslToPslx - Convert from psl to pslx alignment format. */
{
struct lineFile *pslInLf = pslFileOpen(inPslFile);
struct seqReader *qSeqReader = seqReaderNew(qSeqSpec);
struct seqReader *tSeqReader = seqReaderNew(tSeqSpec);
FILE *pslOutFh = mustOpen(outPslFile, "w");
struct psl *psl;
while ((psl = pslNext(pslInLf)) != NULL)
    {
    writePslx(pslOutFh, qSeqReader, tSeqReader, psl);
    pslFree(&psl);
    }
lineFileClose(&pslInLf);
carefulClose(&pslOutFh);
}
Beispiel #26
0
void pslUnpile(char *inName, char *outName)
/* pslUnpile - Removes huge piles of alignments from sorted 
 * psl files (due to unmasked repeats presumably).. */
{
FILE *f = mustOpen(outName, "w");
enum gfType qType, tType;
struct lineFile *lf;
struct psl *list = NULL, *psl, *el;

pslxFileOpen(inName, &qType, &tType, &lf);
if (!noHead)
    pslxWriteHead(f, qType, tType);
for (;;)
    {
    psl = pslNext(lf);
    if (list != NULL && (psl == NULL || !pslOverlap(psl, list)))
        {
	if (list != NULL)
	    {
	    slReverse(&list);
	    if (checkPile(list))
	        {
		for (el = list; el != NULL; el = el->next)
		    {
		    pslTabOut(el, f);
		    }
		}
	    else
	        {
		for (el = list; el != NULL; el = el->next)
		    {
		    if (psl == NULL)
			pslTabOut(el, f);
		    else if (psl->tEnd - psl->tStart > 4000)
			pslTabOut(el, f);
		    }
		}
	    pslFreeList(&list);
	    }
	}
    if (psl == NULL)
        break;
    slAddHead(&list, psl);
    }
lineFileClose(&lf);
carefulClose(&f);
}
Beispiel #27
0
void pslRcFile(char *inPslFile, char *outPslFile)
/* reverse target and query in a psl file */
{
struct lineFile *inLf = pslFileOpen(inPslFile);
FILE *outFh = mustOpen(outPslFile, "w");
struct psl *psl;

while ((psl = pslNext(inLf)) != NULL)
    {
    pslRc(psl);
    pslTabOut(psl, outFh);
    pslFree(&psl);
    }

carefulClose(&outFh);
lineFileClose(&inLf);
}
Beispiel #28
0
void fbOrPsl(Bits *acc, char *track, char *chrom, int chromSize)
/* Or in bits of psl file that correspond to chrom. */
{
struct lineFile *lf;
char fileName[512];
struct psl *psl;

chromFileName(track, chrom, fileName);
if (!fileExists(fileName))
    return;
lf = pslFileOpen(fileName);
while ((psl = pslNext(lf)) != NULL)
    {
    if (sameString(psl->tName, chrom))
	setPslBits(lf, acc, psl, 0, chromSize);
    pslFree(&psl);
    }
lineFileClose(&lf);
}
Beispiel #29
0
static struct hash *collectQueryStats(char *pslFile, char *querySizeFile)
/* collect per-query statistics */
{
struct hash *queryStatsTbl = (querySizeFile != NULL)
    ? sumStatsLoad(querySizeFile)
    : hashNew(queryHashPowTwo);

struct lineFile *pslLf = pslFileOpen(pslFile);
struct psl* psl;

while ((psl = pslNext(pslLf)) != NULL)
    {
    struct sumStats *ss = sumStatsGetForQuery(queryStatsTbl, psl->qName, psl->qSize);
    sumStatsAccumulateQuery(ss, psl);
    pslFree(&psl);
    }
lineFileClose(&pslLf);
return queryStatsTbl;
}
void extractUsedPairs(struct hash *pairHash, char *inPslName, char *outPairName)
/* Extract pairs that are used in inPsl to outPair. */
{
struct hash *refHash = newHash(12);
struct pairRef *refList = NULL, *ref;
struct psl *psl;
struct lineFile *lf;

printf("Processing pairs from %s to %s\n", inPslName, outPairName);
lf = pslFileOpen(inPslName);
while ((psl = pslNext(lf)) != NULL)
    {
    char *name = psl->qName;
    struct hashEl *hel;
    struct pair *pair;
    if ((hel = hashLookup(pairHash, name)) != NULL)
	{
	pair = hel->val;
	if ((hel = hashLookup(refHash, name)) != NULL)
	    {
	    ref = hel->val;
	    }
	else
	    {
	    AllocVar(ref);
	    ref->pair = pair;
	    slAddHead(&refList, ref);
	    hashAdd(refHash, pair->a, ref);
	    hashAdd(refHash, pair->b, ref);
	    }
	if (sameString(name, pair->a))
	    ref->gotA = TRUE;
	else
	    ref->gotB = TRUE;
	}
    pslFree(&psl);
    }
slReverse(&refList);
writePairs(outPairName, refList);
slFreeList(&refList);
freeHash(&refHash);
}