void netToAxt(char *netName, char *chainName, char *tNibDir, char *qNibDir, char *axtName) /* netToAxt - Convert net (and chain) to axt.. */ { Bits *usedBits = findUsedIds(netName); struct hash *chainHash; struct chainNet *net; struct lineFile *lf = lineFileOpen(netName, TRUE); FILE *f = mustOpen(axtName, "w"); struct dnaSeq *tChrom = NULL; struct nibTwoCache *qNtc = nibTwoCacheNew(qNibDir); char *gapFileName = optionVal("gapOut", NULL); FILE *gapFile = NULL; if (gapFileName) gapFile = mustOpen(gapFileName, "w"); lineFileSetMetaDataOutput(lf, f); chainHash = chainReadUsedSwap(chainName, qChain, usedBits); bitFree(&usedBits); while ((net = chainNetRead(lf)) != NULL) { verbose(1, "Processing %s\n", net->name); tChrom = nibTwoLoadOne(tNibDir, net->name); if (tChrom->size != net->size) errAbort("Size mismatch on %s. Net/nib out of sync or possibly nib dirs swapped?", tChrom->name); rConvert(net->fillList, tChrom, qNtc, qNibDir, chainHash, f, gapFile); freeDnaSeq(&tChrom); chainNetFree(&net); } nibTwoCacheFree(&qNtc); }
struct seqPair *readAxtBlocks(char *fileName, struct hash *pairHash, FILE *f) /* Read in axt file and parse blocks into pairHash */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); struct dyString *dy = newDyString(512); struct axt *axt; struct seqPair *spList = NULL, *sp; lineFileSetMetaDataOutput(lf, f); lineFileSetUniqueMetaData(lf); while ((axt = axtRead(lf)) != NULL) { dyStringClear(dy); dyStringPrintf(dy, "%s%c%s", axt->qName, axt->qStrand, axt->tName); sp = hashFindVal(pairHash, dy->string); if (sp == NULL) { AllocVar(sp); slAddHead(&spList, sp); hashAddSaveName(pairHash, dy->string, sp, &sp->name); sp->qName = cloneString(axt->qName); sp->tName = cloneString(axt->tName); sp->qStrand = axt->qStrand; } axtAddBlocksToBoxInList(&sp->blockList, axt); sp->axtCount += 1; axtFree(&axt); } lineFileClose(&lf); dyStringFree(&dy); slSort(&spList, seqPairCmp); return spList; }
struct hash *chainReadAllWithMeta(char *fileName, FILE *f) /* Read chains into a hash keyed by id. */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); struct hash *hash = NULL; lineFileSetMetaDataOutput(lf, f); hash = chainReadUsedSwapLf(fileName, FALSE, NULL, lf); lineFileClose(&lf); return hash; }
void liftNet(char *destFile, struct hash *liftHash, int sourceCount, char *sources[], boolean querySide) /* Lift up coordinates in .net file. */ { FILE *f = mustOpen(destFile, "w"); int sourceIx; int dotMod = dots; for (sourceIx = 0; sourceIx < sourceCount; ++sourceIx) { char *source = sources[sourceIx]; struct lineFile *lf = lineFileOpen(source, TRUE); struct chainNet *net; lineFileSetMetaDataOutput(lf, f); verbose(1, "Lifting %s\n", source); while ((net = chainNetRead(lf)) != NULL) { if (querySide) { struct hash *newNameHash = hashNew(6); liftFillsQ(net->fillList, newNameHash, liftHash, lf); hashFree(&(net->nameHash)); net->nameHash = newNameHash; } else { struct liftSpec *spec = findLift(liftHash, net->name, lf); if (spec == NULL) { if (how != carryMissing) { chainNetFree(&net); continue; } } else { freeMem(net->name); net->name = cloneString(spec->newName); net->size = spec->newSize; liftFillsT(net->fillList, spec); } } chainNetWrite(net, f); chainNetFree(&net); doDots(&dotMod); } lineFileClose(&lf); if (dots) verbose(1, "\n"); } }
void chainMergeSort(int fileCount, char *files[], FILE *out, int level) /* chainMergeSort - Combine sorted files into larger sorted file. */ { int i; struct chainFile *cf; int id = 0; struct quickHeap *h = NULL; h = newQuickHeap(fileCount, &cmpChainScores); /* Open up all input files and read first chain. */ for (i=0; i<fileCount; ++i) { AllocVar(cf); cf->lf = lineFileOpen(files[i], TRUE); lineFileSetMetaDataOutput(cf->lf, out); cf->chain = chainRead(cf->lf); if (cf->chain) addToQuickHeap(h, cf); else cfEof(&cf,level); /* deal with EOF */ } while (!quickHeapEmpty(h)) { cf = peekQuickHeapTop(h); if (!saveId) cf->chain->id = ++id; /* We reset id's here. */ chainWrite(cf->chain, out); chainFree(&cf->chain); if ((cf->chain = chainRead(cf->lf))) { quickHeapTopChanged(h); } else { /* deal with EOF */ if (!removeFromQuickHeapByElem(h, cf)) errAbort("unexpected error: chainFile not found on heap"); cfEof(&cf,level); } } freeQuickHeap(&h); }
void axtRescore(char *in, char *out) /* axtRescore - Recalculate scores in axt. */ { struct lineFile *lf = lineFileOpen(in, TRUE); FILE *f = mustOpen(out, "w"); struct axt *axt; lineFileSetMetaDataOutput(lf, f); axtScoreSchemeDnaWrite(scoreScheme, f, "axtRescore"); for (;;) { axt = axtRead(lf); if (axt == NULL) break; axt->score = axtScore(axt, scoreScheme); axtWrite(axt, f); axtFree(&axt); } }
void chainSplit(char *outDir, int inCount, char *inFiles[]) /* chainSplit - Split chains up by target or query sequence. */ { struct hash *hash = newHash(0); int inIx; char tpath[512]; FILE *meta ; bool metaOpen = TRUE; makeDir(outDir); safef(tpath, sizeof(tpath), "%s/meta.tmp", outDir); meta = mustOpen(tpath,"w"); for (inIx = 0; inIx < inCount; ++inIx) { struct lineFile *lf = lineFileOpen(inFiles[inIx], TRUE); struct chain *chain; FILE *f; lineFileSetMetaDataOutput(lf, meta); while ((chain = chainRead(lf)) != NULL) { char *name = (splitOnQ ? chain->qName : chain->tName); if (lump > 0) name = lumpName(name); if ((f = hashFindVal(hash, name)) == NULL) { char path[512], cmd[512]; safef(path, sizeof(path),"%s/%s.chain", outDir, name); if (metaOpen) fclose(meta); metaOpen = FALSE; safef(cmd,sizeof(cmd), "cat %s | sort -u > %s", tpath, path); mustSystem(cmd); f = mustOpen(path, "a"); hashAdd(hash, name, f); } chainWrite(chain, f); chainFree(&chain); } lineFileClose(&lf); } }
void chainPreNet(char *inFile, char *targetSizes, char *querySizes, char *outFile) /* chainPreNet - Remove chains that don't have a chance of being netted. */ { struct hash *tHash = setupChroms(targetSizes); struct hash *qHash = setupChroms(querySizes); struct lineFile *lf = lineFileOpen(inFile, TRUE); FILE *f = mustOpen(outFile, "w"); struct chain *chain; double score, lastScore = 9e99; struct chrom *qChrom, *tChrom; lineFileSetMetaDataOutput(lf, f); while ((chain = chainRead(lf)) != NULL) { /* Report progress. */ dotOut(); /* Check to make sure it really is sorted by score. */ score = chain->score; if (score > lastScore) { errAbort("%s not sorted by score line %d", lf->fileName, lf->lineIx); } lastScore = score; /* Output chain if necessary and then free it. */ qChrom = hashMustFindVal(qHash, chain->qName); tChrom = hashMustFindVal(tHash, chain->tName); if (chainUsed(chain, qChrom, tChrom) && inclQuery(chain)) { chainWrite(chain, f); } chainFree(&chain); } }
void liftChain(char *destFile, struct hash *liftHash, int sourceCount, char *sources[], boolean querySide) /* Lift up coordinates in .chain file. */ { FILE *f = mustOpen(destFile, "w"); int sourceIx; int dotMod = dots; for (sourceIx = 0; sourceIx < sourceCount; ++sourceIx) { char *source = sources[sourceIx]; struct lineFile *lf = lineFileOpen(source, TRUE); struct chain *chain; lineFileSetMetaDataOutput(lf, f); verbose(1, "Lifting %s\n", source); while ((chain = chainRead(lf)) != NULL) { struct liftSpec *spec; char *seqName = querySide ? chain->qName : chain->tName; spec = findLift(liftHash, seqName, lf); if (spec == NULL) { if (how != carryMissing) { chainFree(&chain); continue; } } else { struct cBlock *b = NULL; int offset = spec->offset; if (spec->strand == '-') { if (querySide) { int qSpan = chain->qEnd - chain->qStart; if (chain->qStrand == '-') chain->qStart += spec->offset; else { chain->qStart = spec->newSize - spec->offset - (chain->qSize - chain->qStart); } chain->qEnd = chain->qStart + qSpan; chain->qStrand = flipStrand(chain->qStrand); freeMem(chain->qName); chain->qName = cloneString(spec->newName); chain->qSize = spec->newSize; /* We don't need to mess with the blocks here * since they are all relative to the start. */ } else { /* We try and keep the target strand positive, so we end up * flipping in both target and query and flipping the target * strand. */ reverseIntRange(&chain->qStart, &chain->qEnd, chain->qSize); reverseIntRange(&chain->tStart, &chain->tEnd, chain->tSize); chain->qStrand = flipStrand(chain->qStrand); /* Flip around blocks and add offset. */ for (b=chain->blockList; b != NULL; b=b->next) { reverseIntRange(&b->qStart, &b->qEnd, chain->qSize); reverseIntRange(&b->tStart, &b->tEnd, chain->tSize); b->tStart += offset; b->tEnd += offset; } slReverse(&chain->blockList); /* On target side add offset as well and update name and size. */ chain->tStart += offset; chain->tEnd += offset; freeMem(chain->tName); chain->tName = cloneString(spec->newName); chain->tSize = spec->newSize; } } else { if (querySide) { if (chain->qStrand == '-') offset = spec->newSize - (spec->offset + spec->oldSize); freeMem(chain->qName); chain->qName = cloneString(spec->newName); chain->qSize = spec->newSize; chain->qStart += offset; chain->qEnd += offset; for (b=chain->blockList; b != NULL; b=b->next) { b->qStart += offset; b->qEnd += offset; } } else { freeMem(chain->tName); chain->tName = cloneString(spec->newName); chain->tSize = spec->newSize; chain->tStart += offset; chain->tEnd += offset; for (b=chain->blockList; b != NULL; b=b->next) { b->tStart += offset; b->tEnd += offset; } } } } chainWrite(chain, f); chainFree(&chain); doDots(&dotMod); } lineFileClose(&lf); if (dots) verbose(1, "\n"); } }
void liftAxt(char *destFile, struct hash *liftHash, int sourceCount, char *sources[], boolean querySide) /* Lift up coordinates in .axt file. */ { FILE *f = mustOpen(destFile, "w"); int sourceIx; int dotMod = dots; for (sourceIx = 0; sourceIx < sourceCount; ++sourceIx) { char *source = sources[sourceIx]; struct lineFile *lf = lineFileOpen(source, TRUE); struct axt *axt; lineFileSetMetaDataOutput(lf, f); verbose(1, "Lifting %s\n", source); while ((axt = axtRead(lf)) != NULL) { struct liftSpec *spec; struct axt a = *axt; char *seqName; if (querySide) seqName = a.qName; else seqName = a.tName; spec = findLift(liftHash, seqName, lf); if (spec == NULL) { if (how != carryMissing) { axtFree(&axt); continue; } } else { int offset; char strand = (querySide ? a.qStrand : a.tStrand); cantHandleSpecRevStrand(spec); if (strand == '-') { int ctgEnd = spec->offset + spec->oldSize; offset = spec->newSize - ctgEnd; } else offset = spec->offset; if (querySide) { a.qStart += offset; a.qEnd += offset; a.qName = spec->newName; } else { a.tStart += offset; a.tEnd += offset; a.tName = spec->newName; if (strand == '-') warn("Target minus strand, please double check results."); } } axtWrite(&a, f); axtFree(&axt); doDots(&dotMod); } lineFileClose(&lf); if (dots) verbose(1, "\n"); } }
void chainNet(char *chainFile, char *tSizes, char *qSizes, char *tNet, char *qNet) /* chainNet - Make alignment nets out of chains. */ { struct lineFile *lf = lineFileOpen(chainFile, TRUE); struct hash *qHash, *tHash; struct chrom *qChromList, *tChromList, *tChrom, *qChrom; struct chain *chain; double lastScore = -1; struct lm *lm = lmInit(0); struct rbTreeNode **rbStack; FILE *tNetFile = mustOpen(tNet, "w"); FILE *qNetFile = mustOpen(qNet, "w"); lmAllocArray(lm, rbStack, 256); makeChroms(qSizes, lm, rbStack, &qHash, &qChromList); makeChroms(tSizes, lm, rbStack, &tHash, &tChromList); verbose(1, "Got %d chroms in %s, %d in %s\n", slCount(tChromList), tSizes, slCount(qChromList), qSizes); lineFileSetMetaDataOutput(lf, tNetFile); lineFileSetMetaDataOutput(lf, qNetFile); /* Loop through chain file building up net. */ while ((chain = chainRead(lf)) != NULL) { /* Make sure that input is really sorted. */ if (lastScore >= 0 && chain->score > lastScore) errAbort("%s must be sorted in order of score", chainFile); lastScore = chain->score; if (chain->score < minScore) { break; } verbose(2, "chain %f (%d els) %s %d-%d %c %s %d-%d\n", chain->score, slCount(chain->blockList), chain->tName, chain->tStart, chain->tEnd, chain->qStrand, chain->qName, chain->qStart, chain->qEnd); qChrom = hashMustFindVal(qHash, chain->qName); if (qChrom->size != chain->qSize) errAbort("%s is %d in %s but %d in %s", chain->qName, chain->qSize, chainFile, qChrom->size, qSizes); tChrom = hashMustFindVal(tHash, chain->tName); if (tChrom->size != chain->tSize) errAbort("%s is %d in %s but %d in %s", chain->tName, chain->tSize, chainFile, tChrom->size, tSizes); if (!inclQuery(chain)) verbose(2, "skipping chain on query %s\n", chain->qName); else { addChain(qChrom, tChrom, chain); verbose(2, "%s has %d inserts, %s has %d\n", tChrom->name, tChrom->spaces->n, qChrom->name, qChrom->spaces->n); } } /* Build up other side of fills. It's just for historical * reasons this is not done during the main build up. * It's a little less efficient this way, but to change it * some hard reverse strand issues would have to be juggled. */ verbose(1, "Finishing nets\n"); finishNet(qChromList, TRUE); finishNet(tChromList, FALSE); /* Write out basic net files. */ verbose(1, "writing %s\n", tNet); outputNetSide(tChromList, tNetFile, FALSE); verbose(1, "writing %s\n", qNet); outputNetSide(qChromList, qNetFile, TRUE); /* prevent SIGPIPE in preceding process if input is a pipe, consume remainder * of input file since we stop before EOF. */ if (isPipe(lf->fd)) { char *line; while(lineFileNext(lf, &line, NULL)) continue; } lineFileClose(&lf); if (verboseLevel() > 1) printMem(stderr); }