Example #1
0
void chainToPsl(char *inName, char *tSizeFile, char *qSizeFile,  char *targetList, char *queryList, char *outName)
/* chainToPsl - Convert chain file to psl format. */
{
struct hash *tSizeHash = readSizes(tSizeFile);
struct hash *qSizeHash = readSizes(qSizeFile);
struct lineFile *lf = lineFileOpen(inName, TRUE);
FILE *f = mustOpen(outName, "w");
struct hash *fileHash = newHash(0);  /* No value. */
struct hash *tHash = newHash(20);  /* seqFilePos value. */
struct hash *qHash = newHash(20);  /* seqFilePos value. */
struct dlList *fileCache = newDlList();
struct chain *chain;
int q,t;

verbose(1, "Scanning %s\n", targetList);
hashFileList(targetList, fileHash, tHash);
verbose(1, "Scanning %s\n", queryList);
hashFileList(queryList, fileHash, qHash);
verbose(1, "Converting %s\n", inName);

while ((chain = chainRead(lf)) != NULL)
    {
    //uglyf("chain %s %s \n",chain->tName,chain->qName); 
    q = findSize(qSizeHash, chain->qName);
    t = findSize(tSizeHash, chain->tName);
    aliStringToPsl(lf, chain->qName, chain->tName, chain->qSize, chain->tSize,
	min(chain->tEnd-chain->tStart, chain->qEnd-chain->qStart), chain->qStart, chain->qEnd, chain->tStart, chain->tEnd,
        chain->qStrand, f, chain, tHash, qHash, fileCache);
    chainFree(&chain);
    }
lineFileClose(&lf);
carefulClose(&f);
}
Example #2
0
void axtToPsl(char *inName, char *tSizeFile, char *qSizeFile, char *outName)
/* axtToPsl - Convert axt to psl format. */
{
struct hash *tSizeHash = readSizes(tSizeFile);
struct hash *qSizeHash = readSizes(qSizeFile);
struct lineFile *lf = lineFileOpen(inName, TRUE);
char strand[2];
FILE *f = mustOpen(outName, "w");
struct psl* psl;
struct axt *axt;
strand[1] = '\0';

while ((axt = axtRead(lf)) != NULL)
    {
    int qSize = findSize(qSizeHash, axt->qName);
    int qStart =  axt->qStart;
    int qEnd = axt->qEnd;
    if (axt->qStrand == '-')
        reverseIntRange(&qStart, &qEnd, qSize);
    strand[0] = axt->qStrand;
    psl = pslFromAlign(axt->qName, qSize, qStart, qEnd, axt->qSym, 
                       axt->tName, findSize(tSizeHash, axt->tName),
                       axt->tStart, axt->tEnd, axt->tSym, strand,
                       PSL_IS_SOFTMASK);
    if (psl != NULL)
	{
	pslTabOut(psl, f);
	pslFree(&psl);
	}
    axtFree(&axt);
    }
lineFileClose(&lf);
carefulClose(&f);
}
Example #3
0
static void gff3ToPsl(char *mapFile, char *inGff3File, char *outPSL)
/* gff3ToPsl - convert a GFF3 file to a genePred file. */
{
    struct hash *chromHash = readSizes(mapFile);
    struct hash *processed = hashNew(12);
    struct gff3File *gff3File = loadGff3(inGff3File);
    FILE *pslF = mustOpen(outPSL, "w");
    struct gff3AnnRef *root;
    for (root = gff3File->roots; root != NULL; root = root->next)
    {
        if (!isProcessed(processed, root->ann))
        {
            processRoot(pslF, root->ann, processed, chromHash);
            if (convertErrCnt >= maxConvertErrs)
                break;
        }
    }
    carefulClose(&pslF);
    if (convertErrCnt > 0)
        errAbort("%d errors converting GFF3 file: %s", convertErrCnt, inGff3File);

#if 0  // free memory for leak debugging if 1
    gff3FileFree(&gff3File);
    hashFree(&processed);
#endif
}
void axtDropOverlap(char *inName, char *tSizeFile, char *qSizeFile, char *outName)
/* used for cleaning up self alignments - deletes all overlapping self alignments */
{
struct hash *qSizeHash = readSizes(qSizeFile);
struct lineFile *lf = lineFileOpen(inName, TRUE);
FILE *f = mustOpen(outName, "w");
struct axt *axt;
int totMatch = 0;
int totSkip = 0;
int totLines = 0;

while ((axt = axtRead(lf)) != NULL)
    {
    totLines++;
    totMatch += axt->score;
	if (sameString(axt->qName, axt->tName))
        {
        int qs = axt->qStart;
        int qe = axt->qEnd;
        if (axt->qStrand == '-')
            reverseIntRange(&qs, &qe, findSize(qSizeHash, axt->qName));
        if (axt->tStart == qs && axt->tEnd == qe) 
            {
            /*
            printf( "skip %c\t%s\t%d\t%d\t%d\t%s\t%d\t%d\t%d\n",
              axt->qStrand,
              axt->qName, axt->symCount, axt->qStart, axt->qEnd,
              axt->tName, axt->symCount, axt->tStart, axt->tEnd
              );
              */
            totSkip++;
            continue;
            }
        }
    axtWrite(axt, f);

    axtFree(&axt);
    }
fclose(f);
lineFileClose(&lf);
}
Example #5
0
void ppAnalyse(char *headersName, char *sizesName, char *pairsPsl, 
    char *finDir, char *pairOut, char *mispairOut)
/* ppAnalyse - Analyse paired plasmid reads. */
{
struct hash *readHash = newHash(21);
struct hash *pairHash = newHash(20);
struct hash *fragHash = newHash(17);
struct hash *finHash = NULL;
struct hash *gsiHash = newHash(8);
struct pairInfo *pairList = NULL, *measuredList, *pair;
struct readInfo *readList = NULL, *rd;
struct groupSizeInfo *gsiList = NULL, *gsi;
int aliCount;
int finCount;
int i;
struct slName *finList, *name;

finHash = newHash(14);
finList = listDir(finDir, "*.fa");
if ((finCount = slCount(finList)) == 0)
    errAbort("No fa files in %s\n", finDir);
printf("Got %d (finished) .fa files in %s\n", finCount, finDir);
for (name = finList; name != NULL; name = name->next)
    {
    chopSuffix(name->name);
    hashStore(finHash, name->name);
    }
#ifdef SOMETIMES
#endif /* SOMETIMES */

gsiList = readSizes(sizesName, gsiHash);
printf("Got %d groups\n", slCount(gsiList));

readHeaders(headersName, readHash, pairHash, &readList, &pairList);
slReverse(&readList);
slReverse(&pairList);
printf("Got %d reads in %d pairs\n", slCount(readList), slCount(pairList));


savePairs(pairOut, pairList, gsiHash);
printf("Saved pairs to %s\n", pairOut);

aliCount = readAlignments(pairsPsl, readHash, fragHash);
printf("Got %d alignments in %s\n", aliCount, pairsPsl);


doReadStats(readList, aliCount);
doPairStats(pairList, finHash, gsiHash, mispairOut, &measuredList, &pairList);
gsiMeanAndVariance(measuredList, gsiList, gsiHash, finHash);
printf("Alignment length stats:\n");
for (i=0; i<10; ++i)
    {
    printf("%3d - %4d :  %6d  %4.2f%%\n",
    	i*100, (i+1)*100, aliSizes[i], 100.0 * (double)aliSizes[i]/(double)aliCount);
    }
doMeasuredStats(measuredList);

for (gsi = gsiList; gsi != NULL; gsi = gsi->next)
    {
    if (gsi->measuredCount > 0)
	{
	printf("%s: mean %f, std %f, min %d, max %d, samples %d\n",
	    gsi->name, gsi->measuredMean, sqrt(gsi->variance),
	    gsi->measuredMin, gsi->measuredMax,
	    gsi->measuredCount);
	printf("   %4.2f%% within guessed range (%d-%d)\n", 
		100.0 * (double)gsi->guessedCount/(double)gsi->measuredCount,
		gsi->guessedMin, gsi->guessedMax);
	printf("   w/in 200 %4.2f%%, w/in 400 %4.2f%%,  w/in 800 %4.2f%%,  w/in 1600 %4.3f%%, w/in 3200 %4.2f%%\n\n",
	    gsiTight(gsi, 200), gsiTight(gsi, 400), gsiTight(gsi, 800), gsiTight(gsi, 1600), gsiTight(gsi, 3200) );
	showHistogram(gsi);
	}
    }
}