void outputChunk(struct psl **pPslList, char *tempDir, int midIx, boolean noHead) /* Sort and write out pslList and free it. */ { char fileName[512]; FILE *f; struct psl *psl; if (*pPslList == NULL) return; /* Empty. */ psl = *pPslList; //slSort(pPslList, pslCmpTarget); makeMidName(tempDir, midIx, fileName); if (stripVer) { char *s = stringIn(".",psl->qName); if (s != NULL) *s = 0; } if (chunkSize ==1) safef(fileName, sizeof(fileName), "%s/%s.psl",tempDir,psl->qName); f = mustOpen(fileName, "w"); if (!noHead) pslWriteHead(f); for (psl = *pPslList; psl != NULL; psl = psl->next) pslTabOut(psl, f); fclose(f); pslFreeList(pPslList); }
void pslCut(char *cutList, char *inPsl, char *outPsl) /* pslCut - Remove a list of clones from psl file.. */ { struct hash *cutHash = newHash(0); struct lineFile *lf = pslFileOpen(inPsl); FILE *f = mustOpen(outPsl, "w"); struct psl *psl; char cloneName[128]; int total = 0, cut = 0; buildCutHash(cutList, cutHash); pslWriteHead(f); while ((psl = pslNext(lf)) != NULL) { fragToCloneName(psl->tName, cloneName); if (!hashLookup(cutHash, cloneName)) { pslTabOut(psl, f); } else ++cut; ++total; pslFree(&psl); } printf("Cut %d of %d\n", cut, total); }
void pslCopyInClones(char *listFile, char *partDir, char *outName) /* Copy in the .psl files corresponding to the clones named in listFile. */ { struct slName *inList, *inEl; FILE *out = mustOpen(outName, "w"); struct psl *psl; int pslCount = 0; int fileCount = 0; pslWriteHead(out); inList = getFileList(listFile, partDir); for (inEl = inList; inEl != NULL; inEl = inEl->next) { char *inName = inEl->name; struct lineFile *lf = pslFileOpen(inName); ++fileCount; while ((psl = pslNext(lf)) != NULL) { pslTabOut(psl, out); pslFree(&psl); ++pslCount; } lineFileClose(&lf); } printf("%d psls in %d files written to %s\n", pslCount, fileCount, outName); fclose(out); }
void fastaToPsl(char *inName, char *outName) /* fastaToPsl - Convert axt to psl format. */ { struct lineFile *inLF; FILE *outFh; boolean read; struct psl* pslAlign; DNA *qSeq; int qSize; int qSeqLen; char *qHeader; DNA *tSeq; int tSize; int tSeqLen; char *tHeader; int queryCounter; inLF = lineFileOpen(inName, TRUE); outFh = mustOpen(outName, "w"); /* read the target sequence */ read = faMixedSpeedReadNext(inLF, &qSeq, &qSize, &qHeader); if (!read) errAbort("Could not read target FASTA entry."); qSeq = cloneString(qSeq); qSeqLen = countNonDash(qSeq, qSize); qHeader = cloneString(qHeader); verbose(2, "Query sequence header: %s\n", qHeader); verbose(3, "Query sequence alignment length: %d\n", qSize); verbose(3, "Query sequence length: %d\n", qSeqLen); verbose(4, "Query sequence: %s\n", qSeq); /* read the rest of the sequences */ queryCounter = 1; pslWriteHead(outFh); while (faMixedSpeedReadNext(inLF, &tSeq, &tSize, &tHeader)) { tSeqLen = countNonDash(tSeq, tSize); verbose(2, "Target sequence (%d) header: %s\n", queryCounter, tHeader); verbose(3, "Target sequence (%d) length: %d\n", queryCounter, tSeqLen); verbose(4, "Target sequence (%d): %s\n", queryCounter, tSeq); pslAlign = pslFromAlign(qHeader, qSeqLen, 0, qSeqLen, qSeq, tHeader, tSeqLen, 0, tSeqLen, tSeq, "+", 0); pslTabOut(pslAlign, outFh); ++queryCounter; } lineFileClose(&inLF); }
void pslGlue(char *inNames[], int inCount, char *outName, char *glueName) /* Reduce a psl file to only the gluing components. */ { FILE *out; FILE *glue; struct psl *pslList = NULL, *psl, *nextPsl; int i; struct psl *localList = NULL; int glueCount = 0; int pslCount = 0; printf("Reading"); for (i=0; i<inCount; ++i) { char *inName = inNames[i]; struct lineFile *lf = pslFileOpen(inName); printf(" %s", inName); fflush(stdout); while ((psl = pslNext(lf)) != NULL) { slAddHead(&pslList, psl); ++pslCount; } lineFileClose(&lf); } printf("\n"); slSort(&pslList, pslCmpQuery); out = mustOpen(outName, "w"); glue = mustOpen(glueName, "w"); pslWriteHead(out); /* Chop this up into chunks that share the same query. */ for (psl = pslList; psl != NULL; psl = nextPsl) { nextPsl = psl->next; if (localList != NULL) { if (!sameString(localList->qName, psl->qName)) { glueCount += simpleOut(out, glue, &localList); localList = NULL; } } slAddHead(&localList, psl); } glueCount += simpleOut(out, glue, &localList); printf("Got %d gluing mRNAs out of %d psls in %d bundles %d ltot %d mtot\n", glueCount, pslCount, outCount, ltot, mtot); fclose(out); fclose(glue); }
void pslReps(char *inName, char *bestAliName, char *repName) /* Analyse inName and put best alignments for eacmRNA in estAliName. * Put repeat info in repName. */ { struct lineFile *in = pslFileOpen(inName); FILE *bestFile = mustOpen(bestAliName, "w"); FILE *repFile = mustOpen(repName, "w"); int lineSize; char *line; char *words[32]; int wordCount; struct psl *pslList = NULL, *psl = NULL; char lastName[512]; int aliCount = 0; quiet = sameString(bestAliName, "stdout") || sameString(repName, "stdout"); if (coverQSizeFile != NULL) loadCoverQSizes(coverQSizeFile); if (!quiet) printf("Processing %s to %s and %s\n", inName, bestAliName, repName); if (!noHead) pslWriteHead(bestFile); strcpy(lastName, ""); while (lineFileNext(in, &line, &lineSize)) { if (((++aliCount & 0x1ffff) == 0) && !quiet) { printf("."); fflush(stdout); } wordCount = chopTabs(line, words); if (wordCount == 21) psl = pslLoad(words); else if (wordCount == 23) psl = pslxLoad(words); else errAbort("Bad line %d of %s\n", in->lineIx, in->fileName); if (!sameString(lastName, psl->qName)) { doOneAcc(lastName, pslList, bestFile, repFile); pslFreeList(&pslList); safef(lastName, sizeof(lastName), "%s", psl->qName); } slAddHead(&pslList, psl); } doOneAcc(lastName, pslList, bestFile, repFile); pslFreeList(&pslList); lineFileClose(&in); fclose(bestFile); fclose(repFile); if (!quiet) printf("Processed %d alignments\n", aliCount); }
void pslGlueRna(char *listFile, char *partDir, char *pslName, char *gluName) /* Reduce a psl files for only the gluing mRNA/EST components. */ { FILE *pslOut; FILE *gluOut; struct psl *pslList = NULL, *psl, *nextPsl; struct psl *localList = NULL; int glueCount = 0; int pslCount = 0; struct slName *inList, *inEl; inList = getFileList(listFile, partDir); for (inEl = inList; inEl != NULL; inEl = inEl->next) { char *inName = inEl->name; struct lineFile *lf = pslFileOpen(inName); while ((psl = pslNext(lf)) != NULL) { slAddHead(&pslList, psl); ++pslCount; } lineFileClose(&lf); } slSort(&pslList, pslCmpQuery); pslOut = mustOpen(pslName, "w"); gluOut = mustOpen(gluName, "w"); pslWriteHead(pslOut); /* Chop this up into chunks that share the same query. */ for (psl = pslList; psl != NULL; psl = nextPsl) { nextPsl = psl->next; if (localList != NULL) { if (!sameString(localList->qName, psl->qName)) { glueCount += output(pslOut, gluOut, &localList); localList = NULL; } } slAddHead(&localList, psl); } glueCount += output(pslOut, gluOut, &localList); printf("Got %d gluing mRNAs out of %d psls in %d bundles %d ltot %d mtot to %s\n", glueCount, pslCount, outCount, ltot, mtot, gluName); fclose(pslOut); fclose(gluOut); }
void outputChunk(struct psl **pPslList, char *tempDir, int midIx) /* Sort and write out pslList and free it. */ { char fileName[512]; FILE *f; struct psl *psl; if (*pPslList == NULL) return; /* Empty. */ slSort(pPslList, pslCmpTarget); makeMidName(tempDir, midIx, fileName); f = mustOpen(fileName, "w"); pslWriteHead(f); for (psl = *pPslList; psl != NULL; psl = psl->next) pslTabOut(psl, f); fclose(f); pslFreeList(pPslList); }
int main(int argc, char *argv[]) { char *genoListName; char *otherListName; char *oocFileName; char *typeName; char *outName; struct patSpace *patSpace; long startTime, endTime; char **genoList; int genoListSize; char *genoListBuf; char **otherList; int otherListSize; char *otherListBuf; char *genoName; int i; int blockCount = 0; struct dnaSeq **seqListList = NULL, *seq = NULL; char *outRoot; struct sqlConnection *conn; enum ffStringency stringency = ffCdna; int seedSize = 10; FILE *out; boolean noHead = FALSE; struct repeatTracker *rt; struct hash *repeatHash = newHash(10); hostName = getenv("HOST"); pushWarnHandler(warnHandler); startTime = clock1(); cgiSpoof(&argc, argv); minMatch = cgiOptionalInt("minMatch", minMatch); maxBad = cgiOptionalInt("maxBad", maxBad); minBases = cgiOptionalInt("minBases", minBases); dnaUtilOpen(); #ifdef DEBUG /* Hard wire command line input so don't have to type it in each * time run the stupid Gnu debugger. */ genoListName = "pFoo/geno.lst"; otherListName = "pFoo/bacend.lst"; typeName = "genomic"; oocFileName = "/d/biodata/human/10.ooc"; outName = "pFoo/pFoo.psl"; #else if (argc != 6 && argc != 7) usage(); genoListName = argv[1]; otherListName = argv[2]; typeName = argv[3]; oocFileName = argv[4]; if (sameWord(oocFileName, "none")) oocFileName = NULL; outName = argv[5]; if (argc == 7) { if (sameWord("noHead", argv[6])) noHead = TRUE; else usage(); } #endif if (sameWord(typeName, "mRNA") || sameWord(typeName, "cDNA")) { stringency = ffCdna; } else if (sameWord(typeName, "genomic")) { stringency = ffTight; } else if (sameWord(typeName, "g2g")) { stringency = ffTight; veryTight = TRUE; seedSize = 11; } else if (sameString(typeName, "asm")) { stringency = ffTight; avoidSelfSelf = TRUE; } else { warn("Unrecognized otherType %s\n", typeName); usage(); } readAllWordsOrFa(genoListName, &genoList, &genoListSize, &genoListBuf); filterMissingFiles(genoList, &genoListSize); if (genoListSize <= 0) errAbort("There are no files that exist in %s\n", genoListName); readAllWordsOrFa(otherListName, &otherList, &otherListSize, &otherListBuf); if (otherListSize <= 0) errAbort("There are no files that exist in %s\n", otherListName); filterMissingFiles(otherList, &otherListSize); out = mustOpen(outName, "w"); if (!noHead) pslWriteHead(out); AllocArray(seqListList, genoListSize); for (i=0; i<genoListSize; ++i) { genoName = genoList[i]; if (!startsWith("#", genoName) ) seqListList[i] = seq = faReadAllDna(genoName); for (;seq != NULL; seq = seq->next) { int size = seq->size; char *name = seq->name; struct hashEl *hel; AllocVar(rt); AllocArray(rt->repBytes, size); rt->seq = seq; if ((hel = hashLookup(repeatHash, name)) != NULL) errAbort("Duplicate %s in %s\n", name, genoName); hashAdd(repeatHash, name, rt); } storeMasked(repeatHash, genoName); } patSpace = makePatSpace(seqListList, genoListSize, seedSize, oocFileName, minMatch, 2000); endTime = clock1(); printf("Made index in %ld seconds\n", (endTime-startTime)); startTime = endTime; for (i=0; i<otherListSize; ++i) { FILE *f; char *otherName; int c; int dotCount = 0; struct dnaSeq otherSeq; ZeroVar(&otherSeq); otherName = otherList[i]; if (startsWith("#", otherName) ) continue; f = mustOpen(otherName, "r"); while ((c = fgetc(f)) != EOF) if (c == '>') break; printf("%s\n", otherName); fflush(stdout); while (faFastReadNext(f, &otherSeq.dna, &otherSeq.size, &otherSeq.name)) { aliSeqName = otherSeq.name; oneStrand(patSpace, repeatHash, &otherSeq, FALSE, stringency, out); reverseComplement(otherSeq.dna, otherSeq.size); oneStrand(patSpace, repeatHash, &otherSeq, TRUE, stringency, out); aliSeqName = NULL; } fclose(f); } freePatSpace(&patSpace); endTime = clock1(); printf("Alignment time is %ld sec\n", (endTime-startTime)); startTime = endTime; fclose(out); return 0; }
void liftPsl(char *destFile, struct hash *liftHash, int sourceCount, char *sources[], boolean querySide, boolean isExtended) /* Lift up coordinates in .psl file. */ { FILE *dest = mustOpen(destFile, "w"); char *source; int i,j; struct lineFile *lf; struct psl *psl; struct xAli *xa = NULL; unsigned *starts; unsigned *blockSizes; struct liftSpec *spec; int offset; int blockCount; char *seqName; int dotMod = dots; int seqSize; int strandChar = (querySide ? 0 : 1); if (!nohead) pslWriteHead(dest); for (i=0; i<sourceCount; ++i) { source = sources[i]; if (!fileExists(source)) { warn("%s doesn't exist!", source); continue; } verbose(1, "Lifting %s\n", source); lf = pslFileOpenWithMeta(source, dest); for (;;) { if (isExtended) { xa = xAliNext(lf); psl = (struct psl *)xa; } else psl = pslNext(lf); if (psl == NULL) break; boolean isProt = pslIsProtein(psl); doDots(&dotMod); if (querySide) seqName = psl->qName; else seqName = psl->tName; spec = findLift(liftHash, seqName, lf); if (spec == NULL) { if (how != carryMissing) { freePslOrXa(psl, isExtended); continue; } } else { offset = spec->offset; blockSizes = psl->blockSizes; if (querySide) { if (!isPtoG) { cantHandleSpecRevStrand(spec); psl->qStart += offset; psl->qEnd += offset; } else { psl->match *= 3; psl->misMatch *= 3; if (spec->strand == '-') { int tmp = psl->qEnd; psl->qEnd = psl->qStart; psl->qStart = tmp; psl->qStart *= -3; psl->qEnd *= -3; psl->qStart += offset; psl->qEnd += offset; } else if (spec->strand == '+') { psl->qStart *= 3; psl->qStart += offset; psl->qEnd *= 3; psl->qEnd += offset; } } starts = psl->qStarts; seqSize = psl->qSize; } else { if (spec->strand == '-') reverseIntRange(&psl->tStart, &psl->tEnd, psl->tSize); psl->tStart += offset; psl->tEnd += offset; starts = psl->tStarts; seqSize = psl->tSize; } blockCount = psl->blockCount; if (isPtoG && (spec->strand == '-')) { psl->strand[strandChar] = spec->strand; for (j=0; j<blockCount; ++j) { starts[j] *= -3; starts[j] += offset; starts[j] = spec->newSize - starts[j]; } } else if (isPtoG && (spec->strand == '+')) { psl->strand[strandChar] = spec->strand; for (j=0; j<blockCount; ++j) { starts[j] *= 3; starts[j] += offset; } } else /* mRNA case. */ { if (spec->strand == '+') { if (psl->strand[strandChar] == '-') { for (j=0; j<blockCount; ++j) { int tr = seqSize - starts[j]; tr += offset; starts[j] = spec->newSize - tr; } } else { for (j=0; j<blockCount; ++j) starts[j] += offset; } } else { if (isProt) { /* if it's protein, we can't reverse the query */ if (psl->strand[strandChar] == '-') { for (j=0; j<blockCount; ++j) starts[j] += offset; } else { for (j=0; j<blockCount; ++j) { int tr = seqSize - starts[j]; tr += offset; starts[j] = spec->newSize - tr; } } psl->strand[strandChar] = flipStrand(psl->strand[strandChar]); } else { if (psl->strand[strandChar] == '-') errAbort("Can't handle all these minus strands! line %d",lf->lineIx); else { for (j=0; j<blockCount; ++j) { psl->tStarts[j] = psl->tSize - (psl->tStarts[j] + blockSizes[j]) + offset; psl->qStarts[j] = psl->qSize - (psl->qStarts[j] + blockSizes[j]); /* no offset. */ } psl->strand[1-strandChar] = flipStrand(psl->strand[1-strandChar]); reverseUnsigned(blockSizes, blockCount); reverseUnsigned(psl->qStarts, blockCount); reverseUnsigned(psl->tStarts, blockCount); } } } } if (isPtoG) for (j=0; j<blockCount; ++j) blockSizes[j] *= 3; if (querySide) { psl->qSize = spec->newSize; psl->qName = spec->newName; } else { psl->tSize = spec->newSize; psl->tName = spec->newName; } } if (isExtended) { xAliTabOut(xa, dest); } else { pslTabOut(psl, dest); } if (querySide) psl->qName = seqName; else psl->tName = seqName; freePslOrXa(psl, isExtended); } lineFileClose(&lf); if (dots) verbose(1, "\n"); } if (ferror(dest)) errAbort("error writing %s", destFile); fclose(dest); }
static void pslHead(struct gfOutput *out, FILE *f) /* Write out psl head */ { pslWriteHead(f); }
int main(int argc, char *argv[]) { /* * Arguments/options */ char outputFile[50]; char inputFile[50]; char query[100]; char target[100]; /////////////////////////////////////////////////////////////////////////// // (0) Parse the inputs handed by genomeCactus.py / setup stuff. /////////////////////////////////////////////////////////////////////////// while(1) { static struct option long_options[] = { { "query", required_argument, 0, 'q' }, { "target", required_argument, 0, 't' }, { "outputFile", required_argument, 0, 'o' }, { "inputFile", required_argument, 0, 'i' }, { "help", no_argument, 0, 'h' }, { 0, 0, 0, 0 } }; int option_index = 0; int key = getopt_long(argc, argv, "i:o:q:t:h", long_options, &option_index); if(key == -1) { break; } switch(key) { case 'i': strcpy(inputFile, optarg); break; case 'o': strcpy(outputFile, optarg); break; case 'q': strcpy(query, optarg); break; case 't': strcpy(target, optarg); break; case 'h': usage(); return 0; default: usage(); return 1; } } /////////////////////////////////////////////////////////////////////////// // (0) Check the inputs. /////////////////////////////////////////////////////////////////////////// assert(outputFile != NULL); assert(query != NULL); assert(target != NULL); FILE *fileHandle = fopen(outputFile, "w"); pslWriteHead(fileHandle); struct psl *pslList = pslLoadAll(inputFile); mapPSLs(pslList, fileHandle, query, target); fclose(fileHandle); return 0; }
void pslSort2(char *outDir, char *tempDir, boolean noHead) /* Do second step of sort - merge all sorted files in tempDir * to final outdir. */ { char fileName[512]; struct slName *tmpList, *tmp; struct midFile *midList = NULL, *mid; int aliCount = 0; FILE *f = NULL; char lastTargetAcc[256]; char targetAcc[256]; strcpy(lastTargetAcc, ""); tmpList = listDir(tempDir, "tmp*.psl"); if (tmpList == NULL) errAbort("No tmp*.psl files in %s\n", tempDir); for (tmp = tmpList; tmp != NULL; tmp = tmp->next) { sprintf(fileName, "%s/%s", tempDir, tmp->name); AllocVar(mid); mid->lf = pslFileOpen(fileName); slAddHead(&midList, mid); } printf("writing %s", outDir); fflush(stdout); /* Write out the lowest sorting line from mid list until done. */ for (;;) { struct midFile *bestMid = NULL; if ( (++aliCount & 0xffff) == 0) { printf("."); fflush(stdout); } for (mid = midList; mid != NULL; mid = mid->next) { if (mid->lf != NULL && mid->psl == NULL) { if ((mid->psl = nextPsl(mid->lf)) == NULL) lineFileClose(&mid->lf); } if (mid->psl != NULL) { if (bestMid == NULL || pslCmpTarget(&mid->psl, &bestMid->psl) < 0) bestMid = mid; } } if (bestMid == NULL) break; getTargetAcc(bestMid->psl->tName, targetAcc); if (!sameString(targetAcc, lastTargetAcc)) { strcpy(lastTargetAcc, targetAcc); carefulClose(&f); sprintf(fileName, "%s/%s.psl", outDir, targetAcc); f = mustOpen(fileName, "w"); if (!noHead) pslWriteHead(f); } pslTabOut(bestMid->psl, f); pslFree(&bestMid->psl); } carefulClose(&f); printf("\n"); printf("Cleaning up temp files\n"); for (tmp = tmpList; tmp != NULL; tmp = tmp->next) { sprintf(fileName, "%s/%s", tempDir, tmp->name); remove(fileName); } }
void pslSort(char *command, char *outFile, char *tempDir, char *inDirs[], int inDirCount) /* Do the two step sort. */ { int i; struct slName *fileList = NULL, *name; char *inDir; struct slName *dirDir, *dirFile; char fileName[512]; int fileCount; int totalFilesProcessed = 0; int filesPerMidFile; int midFileCount = 0; FILE *f; struct lineFile *lf; boolean doReflect = FALSE; boolean suppressSelf = FALSE; boolean firstOnly = endsWith(command, "1"); boolean secondOnly = endsWith(command, "2"); if (startsWith("dirs", command)) ; else if (startsWith("g2g", command)) { doReflect = TRUE; suppressSelf = TRUE; } else usage(); if (!secondOnly) { makeDir(tempDir); /* Figure out how many files to process. */ for (i=0; i<inDirCount; ++i) { inDir = inDirs[i]; dirDir = listDir(inDir, "*.psl"); if (slCount(dirDir) == 0) dirDir = listDir(inDir, "*.psl.gz"); if (slCount(dirDir) == 0) errAbort("No psl files in %s\n", inDir); verbose(1, "%s with %d files\n", inDir, slCount(dirDir)); for (dirFile = dirDir; dirFile != NULL; dirFile = dirFile->next) { sprintf(fileName, "%s/%s", inDir, dirFile->name); name = newSlName(fileName); slAddHead(&fileList, name); } slFreeList(&dirDir); } verbose(1, "%d files in %d dirs\n", slCount(fileList), inDirCount); slReverse(&fileList); fileCount = slCount(fileList); filesPerMidFile = round(sqrt(fileCount)); // if (filesPerMidFile > 20) // filesPerMidFile = 20; /* bandaide! Should keep track of mem usage. */ verbose(1, "Got %d files %d files per mid file\n", fileCount, filesPerMidFile); /* Read in files a group at a time, sort, and write merged, sorted * output of one group. */ name = fileList; while (totalFilesProcessed < fileCount) { int filesInMidFile = 0; struct psl *pslList = NULL, *psl; int lfileCount = 0; struct lm *lm = lmInit(256*1024); for (filesInMidFile = 0; filesInMidFile < filesPerMidFile && name != NULL; ++filesInMidFile, ++totalFilesProcessed, name = name->next) { boolean reflectMe = FALSE; if (doReflect) { reflectMe = !selfFile(name->name); } verbose(2, "Reading %s (%d of %d)\n", name->name, totalFilesProcessed+1, fileCount); lf = pslFileOpen(name->name); while ((psl = nextLmPsl(lf, lm)) != NULL) { if (psl->qStart == psl->tStart && psl->strand[0] == '+' && suppressSelf && sameString(psl->qName, psl->tName)) { continue; } ++lfileCount; slAddHead(&pslList, psl); if (reflectMe) { psl = mirrorLmPsl(psl, lm); slAddHead(&pslList, psl); } } lineFileClose(&lf); } slSort(&pslList, pslCmpQuery); makeMidName(tempDir, midFileCount, fileName); verbose(1, "Writing %s\n", fileName); f = mustOpen(fileName, "w"); if (!nohead) pslWriteHead(f); for (psl = pslList; psl != NULL; psl = psl->next) { pslTabOut(psl, f); } fclose(f); pslList = NULL; lmCleanup(&lm); verbose(2, "lfileCount %d\n", lfileCount); ++midFileCount; } } if (!firstOnly) pslSort2(outFile, tempDir); }
void pslSort2(char *outFile, char *tempDir) /* Do second step of sort - merge all sorted files in tempDir * to final. */ { char fileName[512]; struct slName *tmpList, *tmp; struct midFile *midList = NULL, *mid; int aliCount = 0; FILE *f = mustOpen(outFile, "w"); if (!nohead) pslWriteHead(f); tmpList = listDir(tempDir, "tmp*.psl"); if (tmpList == NULL) errAbort("No tmp*.psl files in %s\n", tempDir); for (tmp = tmpList; tmp != NULL; tmp = tmp->next) { sprintf(fileName, "%s/%s", tempDir, tmp->name); AllocVar(mid); mid->lf = pslFileOpen(fileName); slAddHead(&midList, mid); } verbose(1, "writing %s", outFile); fflush(stdout); /* Write out the lowest sorting line from mid list until done. */ for (;;) { struct midFile *bestMid = NULL; if ( (++aliCount & 0xffff) == 0) { verboseDot(); fflush(stdout); } for (mid = midList; mid != NULL; mid = mid->next) { if (mid->lf != NULL && mid->psl == NULL) { if ((mid->psl = nextPsl(mid->lf)) == NULL) lineFileClose(&mid->lf); } if (mid->psl != NULL) { if (bestMid == NULL || pslCmpQuery(&mid->psl, &bestMid->psl) < 0) bestMid = mid; } } if (bestMid == NULL) break; pslTabOut(bestMid->psl, f); pslFree(&bestMid->psl); } printf("\n"); fclose(f); /* The followint really shouldn't be necessary.... */ for (mid = midList; mid != NULL; mid = mid->next) lineFileClose(&mid->lf); printf("Cleaning up temp files\n"); for (tmp = tmpList; tmp != NULL; tmp = tmp->next) { sprintf(fileName, "%s/%s", tempDir, tmp->name); remove(fileName); } }