void writeChainWhole(struct chain *chain, FILE *f, FILE *gapFile) /* Write out entire chain. */ { chainWrite(chain, f); if (gapFile != NULL) gapWrite(chain, gapFile); }
void chainWriteAll(struct chain *chainList, FILE *f) /* Write all chains to file. */ { struct chain *chain; for (chain = chainList; chain != NULL; chain = chain->next) chainWrite(chain, f); }
static void chainRecWrite(struct chromAnn *ca, FILE *fh, char term) /* write a chromAnn that is chain */ { struct chain *chain = ca->rec; assert(term == '\n'); chainWrite(chain, fh); }
void writeChainPart(struct chain *chain, int tStart, int tEnd, FILE *f, FILE *gapFile) /* Write out part of a chain. */ { struct chain *subChain, *chainToFree; chainSubsetOnT(chain, tStart, tEnd, &subChain, &chainToFree); assert(subChain != NULL); chainWrite(subChain, f); if (gapFile != NULL) gapWrite(subChain, gapFile); chainFree(&chainToFree); }
void chainStitchId(char *inChain, char *outChain) /* chainStitchId - Join chain fragments with the same chain ID into a single chain per ID. */ { struct lineFile *lf = lineFileOpen(inChain, TRUE); struct chain *chain = NULL, *chainList = NULL; FILE *f = mustOpen(outChain, "w"); int idArrLen = 64 * 1024 * 1024; struct chain **idArr = needLargeZeroedMem(idArrLen * sizeof(struct chain *)); int i=0; /* Build up an array of chains, indexed by IDs. Agglomerate chains with same * ID as we go. */ while ((chain = chainRead(lf)) != NULL) { while (chain->id >= idArrLen) { idArr = needMoreMem(idArr, idArrLen, idArrLen*2*sizeof(idArr[0])); idArrLen *= 2; } if (idArr[chain->id] == NULL) idArr[chain->id] = chain; else { tackOnFrag(idArr[chain->id], chain); chainFree(&chain); } } lineFileClose(&lf); /* Clean up each agglomerated chain and add to head of list (but step * backwards so the resulting list is in order by chain id). */ for (i = idArrLen-1; i >= 0; i--) { chain = idArr[i]; if (chain != NULL) { slSort(&(chain->blockList), cBlockCmpTarget); slAddHead(&chainList, chain); } } /* Ordering by original chain id gets us most of the way to sorting by * score, but not all the way: sort and finally write out the chains. */ slSort(&chainList, chainCmpScore); for (chain = chainList; chain != NULL; chain = chain->next) { chainWrite(chain, f); /* could free here, but program is about to end so why waste the time. */ } carefulClose(&f); }
void chainMergeSort(int fileCount, char *files[], FILE *out, int level) /* chainMergeSort - Combine sorted files into larger sorted file. */ { int i; struct chainFile *cf; int id = 0; struct quickHeap *h = NULL; h = newQuickHeap(fileCount, &cmpChainScores); /* Open up all input files and read first chain. */ for (i=0; i<fileCount; ++i) { AllocVar(cf); cf->lf = lineFileOpen(files[i], TRUE); lineFileSetMetaDataOutput(cf->lf, out); cf->chain = chainRead(cf->lf); if (cf->chain) addToQuickHeap(h, cf); else cfEof(&cf,level); /* deal with EOF */ } while (!quickHeapEmpty(h)) { cf = peekQuickHeapTop(h); if (!saveId) cf->chain->id = ++id; /* We reset id's here. */ chainWrite(cf->chain, out); chainFree(&cf->chain); if ((cf->chain = chainRead(cf->lf))) { quickHeapTopChanged(h); } else { /* deal with EOF */ if (!removeFromQuickHeapByElem(h, cf)) errAbort("unexpected error: chainFile not found on heap"); cfEof(&cf,level); } } freeQuickHeap(&h); }
void chainSplit(char *outDir, int inCount, char *inFiles[]) /* chainSplit - Split chains up by target or query sequence. */ { struct hash *hash = newHash(0); int inIx; char tpath[512]; FILE *meta ; bool metaOpen = TRUE; makeDir(outDir); safef(tpath, sizeof(tpath), "%s/meta.tmp", outDir); meta = mustOpen(tpath,"w"); for (inIx = 0; inIx < inCount; ++inIx) { struct lineFile *lf = lineFileOpen(inFiles[inIx], TRUE); struct chain *chain; FILE *f; lineFileSetMetaDataOutput(lf, meta); while ((chain = chainRead(lf)) != NULL) { char *name = (splitOnQ ? chain->qName : chain->tName); if (lump > 0) name = lumpName(name); if ((f = hashFindVal(hash, name)) == NULL) { char path[512], cmd[512]; safef(path, sizeof(path),"%s/%s.chain", outDir, name); if (metaOpen) fclose(meta); metaOpen = FALSE; safef(cmd,sizeof(cmd), "cat %s | sort -u > %s", tpath, path); mustSystem(cmd); f = mustOpen(path, "a"); hashAdd(hash, name, f); } chainWrite(chain, f); chainFree(&chain); } lineFileClose(&lf); } }
void chainPreNet(char *inFile, char *targetSizes, char *querySizes, char *outFile) /* chainPreNet - Remove chains that don't have a chance of being netted. */ { struct hash *tHash = setupChroms(targetSizes); struct hash *qHash = setupChroms(querySizes); struct lineFile *lf = lineFileOpen(inFile, TRUE); FILE *f = mustOpen(outFile, "w"); struct chain *chain; double score, lastScore = 9e99; struct chrom *qChrom, *tChrom; lineFileSetMetaDataOutput(lf, f); while ((chain = chainRead(lf)) != NULL) { /* Report progress. */ dotOut(); /* Check to make sure it really is sorted by score. */ score = chain->score; if (score > lastScore) { errAbort("%s not sorted by score line %d", lf->fileName, lf->lineIx); } lastScore = score; /* Output chain if necessary and then free it. */ qChrom = hashMustFindVal(qHash, chain->qName); tChrom = hashMustFindVal(tHash, chain->tName); if (chainUsed(chain, qChrom, tChrom) && inclQuery(chain)) { chainWrite(chain, f); } chainFree(&chain); } }
void chainStitch(char *psls, char *chains, char *outChainName, char *outFoundName, char *outNotFoundName) /* chainStitch - Stitch psls into chains. */ { int lastChainId = -1; struct psl *prevPsl, *nextPsl; struct psl *fakePslList; int jj; int deletedBases, addedBases; FILE *outFound = mustOpen(outFoundName, "w"); FILE *outNotFound = mustOpen(outNotFoundName, "w"); FILE *outChains = mustOpen(outChainName, "w"); struct lineFile *chainsLf = lineFileOpen(chains, TRUE); struct cseqPair *cspList = NULL, *csp; struct seqPair *spList = NULL, *sp; struct lineFile *pslLf = pslFileOpen(psls); struct dyString *dy = newDyString(512); struct psl *psl; struct hash *pslHash = newHash(0); /* Hash keyed by qSeq<strand>tSeq */ struct hash *chainHash = newHash(0); /* Hash keyed by qSeq<strand>tSeq */ struct chain *chain, *chainList = NULL; struct cBlock *block , *nextBlock = NULL, *prevBlock = NULL; int count; count = 0; while ((psl = pslNext(pslLf)) != NULL) { assert((psl->strand[1] == 0) || (psl->strand[1] == '+')); count++; dyStringClear(dy); dyStringPrintf(dy, "%s%c%s", psl->qName, psl->strand[0], psl->tName); sp = hashFindVal(pslHash, dy->string); if (sp == NULL) { AllocVar(sp); slAddHead(&spList, sp); hashAddSaveName(pslHash, dy->string, sp, &sp->name); sp->qName = cloneString(psl->qName); sp->tName = cloneString(psl->tName); sp->qStrand = psl->strand[0]; } slAddHead(&sp->psl, psl); } lineFileClose(&pslLf); printf("read in %d psls\n",count); for(sp = spList; sp; sp = sp->next) slReverse(&sp->psl); //slSort(&sp->psl, pslCmpTarget); count = 0; while ((chain = chainRead(chainsLf)) != NULL) { if (chain->id > lastChainId) lastChainId = chain->id; dyStringClear(dy); dyStringPrintf(dy, "%s%c%s", chain->qName, chain->qStrand, chain->tName); csp = hashFindVal(chainHash, dy->string); if (csp == NULL) { AllocVar(csp); slAddHead(&cspList, csp); hashAddSaveName(chainHash, dy->string, csp, &csp->name); csp->qName = cloneString(chain->qName); csp->tName = cloneString(chain->tName); csp->qStrand = chain->qStrand; } slAddHead(&csp->chain, chain); count++; } lineFileClose(&chainsLf); printf("read in %d chains\n",count); for(csp = cspList; csp; csp = csp->next) { slSort(&csp->chain, chainCmpTarget); // csp->chain = aggregateChains(csp->chain); } addedBases = deletedBases = 0; for(sp = spList; sp; sp = sp->next) { #ifdef NOTNOW /* find the chains associated with this strand */ if ((csp = hashFindVal(chainHash, sp->name)) != NULL) { /* first check to see if psl blocks are in any chains */ checkInChains(&sp->psl, &csp->chain, outFound, &addedBases); /* now extend chains to the right */ checkAfterChains(&sp->psl, &csp->chain, outFound, &addedBases); /* now extend chains to the left */ slReverse(&sp->psl); checkBeforeChains(&sp->psl, &csp->chain, outFound, &addedBases); } #endif /* do we still have psl's */ if (sp->psl != NULL) { /* make sure we have a chainList */ chainList = NULL; if (csp == NULL) { AllocVar(csp); slAddHead(&cspList, csp); csp->qName = cloneString(sp->psl->qName); csp->tName = cloneString(sp->psl->tName); csp->qStrand = sp->psl->strand[0]; dyStringClear(dy); dyStringPrintf(dy, "%s%c%s", csp->qName, csp->qStrand, csp->tName); hashAddSaveName(chainHash, dy->string, csp, &csp->name); } for(psl = sp->psl; psl ; psl = nextPsl) { /* this psl will either fit a chain or make a new one */ nextPsl = psl->next; sp->psl = nextPsl; psl->next = NULL; fakePslList = psl; if (chainList) checkInChains(&fakePslList, &chainList, outFound, &addedBases); if (fakePslList == NULL) { //freez(&psl); continue; } // if (chainList) // checkAfterChains(&fakePslList, &chainList, outFound, &addedBases); if (fakePslList == NULL) { //freez(&psl); continue; } if (chainList) checkBeforeChains(&fakePslList, &chainList, outFound, &addedBases); if (fakePslList == NULL) { //freez(&psl); continue; } AllocVar(chain); chain->tStart = psl->tStarts[0]; chain->qStart = psl->qStarts[0]; chain->tEnd = psl->tStarts[psl->blockCount - 1] + psl->blockSizes[psl->blockCount - 1]; chain->qEnd = psl->qStarts[psl->blockCount - 1] + psl->blockSizes[psl->blockCount - 1]; chain->tSize = psl->tSize; chain->qSize = psl->qSize; chain->qName = cloneString(psl->qName); chain->tName = cloneString(psl->tName); chain->qStrand = psl->strand[0]; chain->id = ++lastChainId; if (!addPslToChain(chain, psl, &addedBases)) errAbort("new "); slAddHead(&chainList, chain); pslTabOut(psl, outFound); freez(&psl); } csp->chain = slCat(csp->chain, chainList); } slSort(&csp->chain, chainCmpTarget); csp->chain = aggregateChains(csp->chain); } fclose(outFound); printf("deleted %d bases\n",deletedBases); printf("added %d bases\n",addedBases); count = 0; for(sp = spList; sp; sp = sp->next) for(psl = sp->psl ; psl ; psl = psl->next) { pslTabOut(psl, outNotFound); count++; } fclose(outNotFound); printf("%d psls remain\n",count); for(csp = cspList; csp; csp = csp->next) for(chain = csp->chain ; chain ; chain = chain->next) { //slSort(&chain->blockList, boxInCmpBoth); chain->tStart = chain->blockList->tStart; chain->qStart = chain->blockList->qStart; for(block = chain->blockList; block; block = block->next) { chain->tEnd = block->tEnd; chain->qEnd = block->qEnd; } chainWrite(chain, outChains); } fclose(outChains); dyStringFree(&dy); }
void liftChain(char *destFile, struct hash *liftHash, int sourceCount, char *sources[], boolean querySide) /* Lift up coordinates in .chain file. */ { FILE *f = mustOpen(destFile, "w"); int sourceIx; int dotMod = dots; for (sourceIx = 0; sourceIx < sourceCount; ++sourceIx) { char *source = sources[sourceIx]; struct lineFile *lf = lineFileOpen(source, TRUE); struct chain *chain; lineFileSetMetaDataOutput(lf, f); verbose(1, "Lifting %s\n", source); while ((chain = chainRead(lf)) != NULL) { struct liftSpec *spec; char *seqName = querySide ? chain->qName : chain->tName; spec = findLift(liftHash, seqName, lf); if (spec == NULL) { if (how != carryMissing) { chainFree(&chain); continue; } } else { struct cBlock *b = NULL; int offset = spec->offset; if (spec->strand == '-') { if (querySide) { int qSpan = chain->qEnd - chain->qStart; if (chain->qStrand == '-') chain->qStart += spec->offset; else { chain->qStart = spec->newSize - spec->offset - (chain->qSize - chain->qStart); } chain->qEnd = chain->qStart + qSpan; chain->qStrand = flipStrand(chain->qStrand); freeMem(chain->qName); chain->qName = cloneString(spec->newName); chain->qSize = spec->newSize; /* We don't need to mess with the blocks here * since they are all relative to the start. */ } else { /* We try and keep the target strand positive, so we end up * flipping in both target and query and flipping the target * strand. */ reverseIntRange(&chain->qStart, &chain->qEnd, chain->qSize); reverseIntRange(&chain->tStart, &chain->tEnd, chain->tSize); chain->qStrand = flipStrand(chain->qStrand); /* Flip around blocks and add offset. */ for (b=chain->blockList; b != NULL; b=b->next) { reverseIntRange(&b->qStart, &b->qEnd, chain->qSize); reverseIntRange(&b->tStart, &b->tEnd, chain->tSize); b->tStart += offset; b->tEnd += offset; } slReverse(&chain->blockList); /* On target side add offset as well and update name and size. */ chain->tStart += offset; chain->tEnd += offset; freeMem(chain->tName); chain->tName = cloneString(spec->newName); chain->tSize = spec->newSize; } } else { if (querySide) { if (chain->qStrand == '-') offset = spec->newSize - (spec->offset + spec->oldSize); freeMem(chain->qName); chain->qName = cloneString(spec->newName); chain->qSize = spec->newSize; chain->qStart += offset; chain->qEnd += offset; for (b=chain->blockList; b != NULL; b=b->next) { b->qStart += offset; b->qEnd += offset; } } else { freeMem(chain->tName); chain->tName = cloneString(spec->newName); chain->tSize = spec->newSize; chain->tStart += offset; chain->tEnd += offset; for (b=chain->blockList; b != NULL; b=b->next) { b->tStart += offset; b->tEnd += offset; } } } } chainWrite(chain, f); chainFree(&chain); doDots(&dotMod); } lineFileClose(&lf); if (dots) verbose(1, "\n"); } }
void axtChain(char *axtIn, char *tNibDir, char *qNibDir, char *chainOut) /* axtChain - Chain together axt alignments.. */ { struct hash *pairHash = newHash(0); /* Hash keyed by qSeq<strand>tSeq */ struct seqPair *spList = NULL, *sp; FILE *f = mustOpen(chainOut, "w"); char *qName = "", *tName = ""; struct dnaSeq *qSeq = NULL, *tSeq = NULL; char qStrand = 0, tStrand = 0; struct chain *chainList = NULL, *chain; FILE *details = NULL; struct dnaSeq *seq, *seqList = NULL; struct hash *faHash = newHash(0); struct hash *tFaHash = newHash(0); FILE *faF; boolean qIsTwoBit = twoBitIsFile(qNibDir); boolean tIsTwoBit = twoBitIsFile(tNibDir); axtScoreSchemeDnaWrite(scoreScheme, f, "axtChain"); if (detailsName != NULL) details = mustOpen(detailsName, "w"); /* Read input file and divide alignments into various parts. */ if (optionExists("psl")) spList = readPslBlocks(axtIn, pairHash, f); else spList = readAxtBlocks(axtIn, pairHash, f); if (optionExists("faQ")) { faF = mustOpen(qNibDir, "r"); while ( faReadMixedNext(faF, TRUE, NULL, TRUE, NULL, &seq)) { hashAdd(faHash, seq->name, seq); slAddHead(&seqList, seq); } fclose(faF); } if (optionExists("faT")) { faF = mustOpen(tNibDir, "r"); while ( faReadMixedNext(faF, TRUE, NULL, TRUE, NULL, &seq)) { hashAdd(tFaHash, seq->name, seq); slAddHead(&seqList, seq); } fclose(faF); } for (sp = spList; sp != NULL; sp = sp->next) { slReverse(&sp->blockList); removeExactOverlaps(&sp->blockList); verbose(1, "%d blocks after duplicate removal\n", slCount(sp->blockList)); if (optionExists("faQ")) { assert (faHash != NULL); loadFaSeq(faHash, sp->qName, sp->qStrand, &qName, &qSeq, &qStrand); } else { loadIfNewSeq(qNibDir, qIsTwoBit, sp->qName, sp->qStrand, &qName, &qSeq, &qStrand); } if (optionExists("faT")) { assert (tFaHash != NULL); loadFaSeq(tFaHash, sp->tName, '+', &tName, &tSeq, &tStrand); } else { loadIfNewSeq(tNibDir, tIsTwoBit, sp->tName, '+', &tName, &tSeq, &tStrand); } chainPair(sp, qSeq, tSeq, &chainList, details); } slSort(&chainList, chainCmpScore); for (chain = chainList; chain != NULL; chain = chain->next) { assert(chain->qStart == chain->blockList->qStart && chain->tStart == chain->blockList->tStart); chainWrite(chain, f); } carefulClose(&f); }
void doChainScore(char *chainIn, char *tNibDir, char *qNibDir, char *chainOut) { char qStrand = 0, tStrand = 0; struct dnaSeq *qSeq = NULL, *tSeq = NULL; char *qName = "", *tName = ""; FILE *f = mustOpen(chainOut, "w"); struct chain *chainList = NULL, *chain; struct chain *inputChains, *next; FILE *details = NULL; struct lineFile *lf = NULL; struct dnaSeq *seq, *seqList = NULL; struct hash *faHash = newHash(0); struct hash *chainHash = newHash(0); char comment[1024]; FILE *faF; struct seqPair *spList = NULL, *sp; struct dyString *dy = newDyString(512); struct lineFile *chainsLf = lineFileOpen(chainIn, TRUE); while ((chain = chainRead(chainsLf)) != NULL) { dyStringClear(dy); dyStringPrintf(dy, "%s%c%s", chain->qName, chain->qStrand, chain->tName); sp = hashFindVal(chainHash, dy->string); if (sp == NULL) { AllocVar(sp); slAddHead(&spList, sp); hashAddSaveName(chainHash, dy->string, sp, &sp->name); sp->qName = cloneString(chain->qName); sp->tName = cloneString(chain->tName); sp->qStrand = chain->qStrand; } slAddHead(&sp->chain, chain); } slSort(&spList, seqPairCmp); lineFileClose(&chainsLf); if (optionExists("faQ")) { faF = mustOpen(qNibDir, "r"); while ( faReadMixedNext(faF, TRUE, NULL, TRUE, NULL, &seq)) { hashAdd(faHash, seq->name, seq); slAddHead(&seqList, seq); } fclose(faF); } for (sp = spList; sp != NULL; sp = sp->next) { if (optionExists("faQ")) { assert (faHash != NULL); loadFaSeq(faHash, sp->qName, sp->qStrand, &qName, &qSeq, &qStrand); } else loadIfNewSeq(qNibDir, sp->qName, sp->qStrand, &qName, &qSeq, &qStrand); loadIfNewSeq(tNibDir, sp->tName, '+', &tName, &tSeq, &tStrand); scorePair(sp, qSeq, tSeq, &chainList, sp->chain); } slSort(&chainList, chainCmpScore); for (chain = chainList; chain != NULL; chain = chain->next) { assert(chain->qStart == chain->blockList->qStart && chain->tStart == chain->blockList->tStart); chainWrite(chain, f); } carefulClose(&f); }