void chainToPsl(char *inName, char *tSizeFile, char *qSizeFile, char *targetList, char *queryList, char *outName) /* chainToPsl - Convert chain file to psl format. */ { struct hash *tSizeHash = readSizes(tSizeFile); struct hash *qSizeHash = readSizes(qSizeFile); struct lineFile *lf = lineFileOpen(inName, TRUE); FILE *f = mustOpen(outName, "w"); struct hash *fileHash = newHash(0); /* No value. */ struct hash *tHash = newHash(20); /* seqFilePos value. */ struct hash *qHash = newHash(20); /* seqFilePos value. */ struct dlList *fileCache = newDlList(); struct chain *chain; int q,t; verbose(1, "Scanning %s\n", targetList); hashFileList(targetList, fileHash, tHash); verbose(1, "Scanning %s\n", queryList); hashFileList(queryList, fileHash, qHash); verbose(1, "Converting %s\n", inName); while ((chain = chainRead(lf)) != NULL) { //uglyf("chain %s %s \n",chain->tName,chain->qName); q = findSize(qSizeHash, chain->qName); t = findSize(tSizeHash, chain->tName); aliStringToPsl(lf, chain->qName, chain->tName, chain->qSize, chain->tSize, min(chain->tEnd-chain->tStart, chain->qEnd-chain->qStart), chain->qStart, chain->qEnd, chain->tStart, chain->tEnd, chain->qStrand, f, chain, tHash, qHash, fileCache); chainFree(&chain); } lineFileClose(&lf); carefulClose(&f); }
void axtToPsl(char *inName, char *tSizeFile, char *qSizeFile, char *outName) /* axtToPsl - Convert axt to psl format. */ { struct hash *tSizeHash = readSizes(tSizeFile); struct hash *qSizeHash = readSizes(qSizeFile); struct lineFile *lf = lineFileOpen(inName, TRUE); char strand[2]; FILE *f = mustOpen(outName, "w"); struct psl* psl; struct axt *axt; strand[1] = '\0'; while ((axt = axtRead(lf)) != NULL) { int qSize = findSize(qSizeHash, axt->qName); int qStart = axt->qStart; int qEnd = axt->qEnd; if (axt->qStrand == '-') reverseIntRange(&qStart, &qEnd, qSize); strand[0] = axt->qStrand; psl = pslFromAlign(axt->qName, qSize, qStart, qEnd, axt->qSym, axt->tName, findSize(tSizeHash, axt->tName), axt->tStart, axt->tEnd, axt->tSym, strand, PSL_IS_SOFTMASK); if (psl != NULL) { pslTabOut(psl, f); pslFree(&psl); } axtFree(&axt); } lineFileClose(&lf); carefulClose(&f); }
static void gff3ToPsl(char *mapFile, char *inGff3File, char *outPSL) /* gff3ToPsl - convert a GFF3 file to a genePred file. */ { struct hash *chromHash = readSizes(mapFile); struct hash *processed = hashNew(12); struct gff3File *gff3File = loadGff3(inGff3File); FILE *pslF = mustOpen(outPSL, "w"); struct gff3AnnRef *root; for (root = gff3File->roots; root != NULL; root = root->next) { if (!isProcessed(processed, root->ann)) { processRoot(pslF, root->ann, processed, chromHash); if (convertErrCnt >= maxConvertErrs) break; } } carefulClose(&pslF); if (convertErrCnt > 0) errAbort("%d errors converting GFF3 file: %s", convertErrCnt, inGff3File); #if 0 // free memory for leak debugging if 1 gff3FileFree(&gff3File); hashFree(&processed); #endif }
void axtDropOverlap(char *inName, char *tSizeFile, char *qSizeFile, char *outName) /* used for cleaning up self alignments - deletes all overlapping self alignments */ { struct hash *qSizeHash = readSizes(qSizeFile); struct lineFile *lf = lineFileOpen(inName, TRUE); FILE *f = mustOpen(outName, "w"); struct axt *axt; int totMatch = 0; int totSkip = 0; int totLines = 0; while ((axt = axtRead(lf)) != NULL) { totLines++; totMatch += axt->score; if (sameString(axt->qName, axt->tName)) { int qs = axt->qStart; int qe = axt->qEnd; if (axt->qStrand == '-') reverseIntRange(&qs, &qe, findSize(qSizeHash, axt->qName)); if (axt->tStart == qs && axt->tEnd == qe) { /* printf( "skip %c\t%s\t%d\t%d\t%d\t%s\t%d\t%d\t%d\n", axt->qStrand, axt->qName, axt->symCount, axt->qStart, axt->qEnd, axt->tName, axt->symCount, axt->tStart, axt->tEnd ); */ totSkip++; continue; } } axtWrite(axt, f); axtFree(&axt); } fclose(f); lineFileClose(&lf); }
void ppAnalyse(char *headersName, char *sizesName, char *pairsPsl, char *finDir, char *pairOut, char *mispairOut) /* ppAnalyse - Analyse paired plasmid reads. */ { struct hash *readHash = newHash(21); struct hash *pairHash = newHash(20); struct hash *fragHash = newHash(17); struct hash *finHash = NULL; struct hash *gsiHash = newHash(8); struct pairInfo *pairList = NULL, *measuredList, *pair; struct readInfo *readList = NULL, *rd; struct groupSizeInfo *gsiList = NULL, *gsi; int aliCount; int finCount; int i; struct slName *finList, *name; finHash = newHash(14); finList = listDir(finDir, "*.fa"); if ((finCount = slCount(finList)) == 0) errAbort("No fa files in %s\n", finDir); printf("Got %d (finished) .fa files in %s\n", finCount, finDir); for (name = finList; name != NULL; name = name->next) { chopSuffix(name->name); hashStore(finHash, name->name); } #ifdef SOMETIMES #endif /* SOMETIMES */ gsiList = readSizes(sizesName, gsiHash); printf("Got %d groups\n", slCount(gsiList)); readHeaders(headersName, readHash, pairHash, &readList, &pairList); slReverse(&readList); slReverse(&pairList); printf("Got %d reads in %d pairs\n", slCount(readList), slCount(pairList)); savePairs(pairOut, pairList, gsiHash); printf("Saved pairs to %s\n", pairOut); aliCount = readAlignments(pairsPsl, readHash, fragHash); printf("Got %d alignments in %s\n", aliCount, pairsPsl); doReadStats(readList, aliCount); doPairStats(pairList, finHash, gsiHash, mispairOut, &measuredList, &pairList); gsiMeanAndVariance(measuredList, gsiList, gsiHash, finHash); printf("Alignment length stats:\n"); for (i=0; i<10; ++i) { printf("%3d - %4d : %6d %4.2f%%\n", i*100, (i+1)*100, aliSizes[i], 100.0 * (double)aliSizes[i]/(double)aliCount); } doMeasuredStats(measuredList); for (gsi = gsiList; gsi != NULL; gsi = gsi->next) { if (gsi->measuredCount > 0) { printf("%s: mean %f, std %f, min %d, max %d, samples %d\n", gsi->name, gsi->measuredMean, sqrt(gsi->variance), gsi->measuredMin, gsi->measuredMax, gsi->measuredCount); printf(" %4.2f%% within guessed range (%d-%d)\n", 100.0 * (double)gsi->guessedCount/(double)gsi->measuredCount, gsi->guessedMin, gsi->guessedMax); printf(" w/in 200 %4.2f%%, w/in 400 %4.2f%%, w/in 800 %4.2f%%, w/in 1600 %4.3f%%, w/in 3200 %4.2f%%\n\n", gsiTight(gsi, 200), gsiTight(gsi, 400), gsiTight(gsi, 800), gsiTight(gsi, 1600), gsiTight(gsi, 3200) ); showHistogram(gsi); } } }