static void axtQueryOut(struct gfOutput *out, FILE *f) /* Do axt oriented output - at end of processing query. */ { struct axtData *aod = out->data; struct axtBundle *gab; for (gab = aod->bundleList; gab != NULL; gab = gab->next) { struct axt *axt; for (axt = gab->axtList; axt != NULL; axt = axt->next) axtWrite(axt, f); } axtBundleFreeList(&aod->bundleList); }
void writeAxtFromChain(struct chain *chain, struct dnaSeq *qSeq, int qOffset, struct dnaSeq *tSeq, int tOffset, FILE *f, FILE *gapFile) /* Write out axt's that correspond to chain. */ { struct axt *axt, *axtList; if (gapFile != NULL) writeGaps(chain, gapFile); axtList = chainToAxt(chain, qSeq, qOffset, tSeq, tOffset, maxGap, BIGNUM); verbose(9, "%d axts\n", slCount(axtList)); for (axt = axtList; axt != NULL; axt = axt->next) axtWrite(axt, f); axtFreeList(&axtList); }
void outputSubAxt(struct axt *axt, int start, int size, int score, FILE *f) /* Output subset of axt to axt file. */ { struct axt a; a = *axt; a.symCount = size; a.score = score; a.qStart += countNonDash(a.qSym, start); a.qEnd = a.qStart + countNonDash(a.qSym + start, size); a.tStart += countNonDash(a.tSym, start); a.tEnd = a.tStart + countNonDash(a.tSym + start, size); a.qSym += start; a.tSym += start; axtWrite(&a, f); }
void axtSplitByTarget(char *inName, char *outDir) /* axtSplitByTarget - Split a single axt file into one file per target. */ { struct hash *outHash = newHash(8); /* FILE valued hash */ struct lineFile *lf = lineFileOpen(inName, TRUE); struct axt *axt; makeDir(outDir); while ((axt = axtRead(lf)) != NULL) { FILE *f = getSplitFile(outHash, outDir, axt->tName, axt->tStart); axtWrite(axt, f); totalWritten += strlen(axt->tName) + strlen(axt->qName) + 40 + strlen(axt->qSym)+ strlen(axt->tSym); axtFree(&axt); } }
void axtDropSelf(char *inFile, char *outFile) /* axtDropSelf - Drop alignments that just align same thing to itself. */ { FILE *f = mustOpen(outFile, "w"); struct lineFile *lf = lineFileOpen(inFile, TRUE); struct axt *axt; while ((axt = axtRead(lf)) != NULL) { if (axt->qStart != axt->tStart || axt->qEnd != axt->tEnd || axt->qStrand != axt->tStrand || !sameString(axt->qName, axt->tName)) { axtWrite(axt,f); } axtFree(&axt); } }
void axtSwapFile(char *source, char *targetSizes, char *querySizes, char *dest) /* axtSwapFile - Swap source and query in an axt file. */ { struct hash *tHash = loadIntHash(targetSizes); struct hash *qHash = loadIntHash(querySizes); struct lineFile *lf = lineFileOpen(source, TRUE); FILE *f = mustOpen(dest, "w"); struct axt *axt; while ((axt = axtRead(lf)) != NULL) { axtSwap(axt, hashIntVal(tHash, axt->tName), hashIntVal(qHash, axt->qName)); axtWrite(axt, f); axtFree(&axt); } }
void axtRescore(char *in, char *out) /* axtRescore - Recalculate scores in axt. */ { struct lineFile *lf = lineFileOpen(in, TRUE); FILE *f = mustOpen(out, "w"); struct axt *axt; lineFileSetMetaDataOutput(lf, f); axtScoreSchemeDnaWrite(scoreScheme, f, "axtRescore"); for (;;) { axt = axtRead(lf); if (axt == NULL) break; axt->score = axtScore(axt, scoreScheme); axtWrite(axt, f); axtFree(&axt); } }
void axtDropOverlap(char *inName, char *tSizeFile, char *qSizeFile, char *outName) /* used for cleaning up self alignments - deletes all overlapping self alignments */ { struct hash *qSizeHash = readSizes(qSizeFile); struct lineFile *lf = lineFileOpen(inName, TRUE); FILE *f = mustOpen(outName, "w"); struct axt *axt; int totMatch = 0; int totSkip = 0; int totLines = 0; while ((axt = axtRead(lf)) != NULL) { totLines++; totMatch += axt->score; if (sameString(axt->qName, axt->tName)) { int qs = axt->qStart; int qe = axt->qEnd; if (axt->qStrand == '-') reverseIntRange(&qs, &qe, findSize(qSizeHash, axt->qName)); if (axt->tStart == qs && axt->tEnd == qe) { /* printf( "skip %c\t%s\t%d\t%d\t%d\t%s\t%d\t%d\t%d\n", axt->qStrand, axt->qName, axt->symCount, axt->qStart, axt->qEnd, axt->tName, axt->symCount, axt->tStart, axt->tEnd ); */ totSkip++; continue; } } axtWrite(axt, f); axtFree(&axt); } fclose(f); lineFileClose(&lf); }
static void doAChain(struct chain *chain, struct nibTwoCache *tSeqCache, struct nibTwoCache *qSeqCache, FILE *f) /* Convert one chain to an axt. */ { struct dnaSeq *qSeq = loadSeqStrand(qSeqCache, chain->qName, chain->qStart, chain->qEnd, chain->qStrand); struct dnaSeq *tSeq = loadSeqStrand(tSeqCache, chain->tName, chain->tStart, chain->tEnd, '+'); struct axt *axtList= chainToAxt(chain, qSeq, chain->qStart, tSeq, chain->tStart, maxGap, BIGNUM); struct axt *axt = NULL; for (axt = axtList; axt != NULL; axt = axt->next) { double idRatio = axtIdRatio(axt); if (minIdRatio <= idRatio) { if (bedOut) bedWriteAxt(axt, chain->qSize, chain->tSize, idRatio, f); else axtWrite(axt, f); } } axtFreeList(&axtList); freeDnaSeq(&qSeq); freeDnaSeq(&tSeq); }
void liftAxt(char *destFile, struct hash *liftHash, int sourceCount, char *sources[], boolean querySide) /* Lift up coordinates in .axt file. */ { FILE *f = mustOpen(destFile, "w"); int sourceIx; int dotMod = dots; for (sourceIx = 0; sourceIx < sourceCount; ++sourceIx) { char *source = sources[sourceIx]; struct lineFile *lf = lineFileOpen(source, TRUE); struct axt *axt; lineFileSetMetaDataOutput(lf, f); verbose(1, "Lifting %s\n", source); while ((axt = axtRead(lf)) != NULL) { struct liftSpec *spec; struct axt a = *axt; char *seqName; if (querySide) seqName = a.qName; else seqName = a.tName; spec = findLift(liftHash, seqName, lf); if (spec == NULL) { if (how != carryMissing) { axtFree(&axt); continue; } } else { int offset; char strand = (querySide ? a.qStrand : a.tStrand); cantHandleSpecRevStrand(spec); if (strand == '-') { int ctgEnd = spec->offset + spec->oldSize; offset = spec->newSize - ctgEnd; } else offset = spec->offset; if (querySide) { a.qStart += offset; a.qEnd += offset; a.qName = spec->newName; } else { a.tStart += offset; a.tEnd += offset; a.tName = spec->newName; if (strand == '-') warn("Target minus strand, please double check results."); } } axtWrite(&a, f); axtFree(&axt); doDots(&dotMod); } lineFileClose(&lf); if (dots) verbose(1, "\n"); } }
void outputBlocks(struct lineFile *lf, struct block *blockList, int score, FILE *f, boolean isRc, char *qName, int qSize, char *qNibDir, struct dlList *qCache, char *tName, int tSize, char *tNibDir, struct dlList *tCache, boolean rescore) /* Output block list as an axt to file f. */ { int qStart = BIGNUM, qEnd = 0, tStart = BIGNUM, tEnd = 0; struct block *lastBlock = NULL; struct block *block; struct dyString *qSym = newDyString(16*1024); struct dyString *tSym = newDyString(16*1024); struct dnaSeq *qSeq = NULL, *tSeq = NULL, *seq = NULL; struct axt axt; boolean qIsTwoBit = twoBitIsFile(qNibDir); boolean tIsTwoBit = twoBitIsFile(tNibDir); if (blockList == NULL) return; /* Figure overall dimensions. */ for (block = blockList; block != NULL; block = block->next) { if (qStart > block->qStart) qStart = block->qStart; if (qEnd < block->qEnd) qEnd = block->qEnd; if (tStart > block->tStart) tStart = block->tStart; if (tEnd < block->tEnd) tEnd = block->tEnd; } /* Load sequence covering alignment from nib files. */ if (isRc) { reverseIntRange(&qStart, &qEnd, qSize); if (qIsFa) { for (seq = qFaList ; seq != NULL ; seq = seq->next) if (sameString(qName, seq->name)) break; if (seq != NULL) { AllocVar(qSeq); qSeq->size = qEnd - qStart; qSeq->name = cloneString(qName); qSeq->dna = cloneMem((seq->dna)+qStart, qSeq->size); } else errAbort("sequence not found %s\n",qName); } else qSeq = readFromCache(qCache, qNibDir, qName, qStart, qEnd - qStart, qSize, qIsTwoBit); reverseIntRange(&qStart, &qEnd, qSize); reverseComplement(qSeq->dna, qSeq->size); } else { if (qIsFa) { for (seq = qFaList ; seq != NULL ; seq = seq->next) { if (sameString(qName, seq->name)) break; } if (seq != NULL) { AllocVar(qSeq); qSeq->size = qEnd - qStart; qSeq->name = cloneString(qName); qSeq->dna = (seq->dna)+qStart; } else errAbort("sequence not found %s\n",qName); } else qSeq = readFromCache(qCache, qNibDir, qName, qStart, qEnd - qStart, qSize, qIsTwoBit); } if (tIsFa) { for (seq = tFaList ; seq != NULL ; seq = seq->next) if (sameString(tName, seq->name)) break; if (seq != NULL) { AllocVar(tSeq); tSeq->size = tEnd - tStart; tSeq->name = cloneString(tName); tSeq->dna = cloneMem((seq->dna)+tStart, tSeq->size); } else errAbort("sequence not found %s\n",tName); } else tSeq = readFromCache(tCache, tNibDir, tName, tStart, tEnd - tStart, tSize, tIsTwoBit); /* Loop through blocks copying sequence into dynamic strings. */ for (block = blockList; block != NULL; block = block->next) { if (lastBlock != NULL) { int qGap = block->qStart - lastBlock->qEnd; int tGap = block->tStart - lastBlock->tEnd; if (qGap != 0 && tGap != 0) { errAbort("Gaps in both strand on alignment ending line %d of %s", lf->lineIx, lf->fileName); } if (qGap > 0) { dyStringAppendMultiC(tSym, '-', qGap); dyStringAppendN(qSym, qSeq->dna + lastBlock->qEnd - qStart, qGap); } if (tGap > 0) { dyStringAppendMultiC(qSym, '-', tGap); dyStringAppendN(tSym, tSeq->dna + lastBlock->tEnd - tStart, tGap); } } if (qSeq->size < block->qStart - qStart) { errAbort("read past end of sequence %s size =%d block->qStart-qstart=%d block->qStart=%d qEnd=%d \n", qName, qSeq->size, block->qStart-qStart,block->qStart, block->qEnd ); } dyStringAppendN(qSym, qSeq->dna + block->qStart - qStart, block->qEnd - block->qStart); if (tSeq->size < block->tStart - tStart) { errAbort("read past end of sequence %s size =%d block->tStart-tstart=%d\n", tName, tSeq->size, block->tStart-tStart); } dyStringAppendN(tSym, tSeq->dna + block->tStart - tStart, block->tEnd - block->tStart); lastBlock = block; } if (qSym->stringSize != tSym->stringSize) errAbort("qSize and tSize don't agree in alignment ending line %d of %s", lf->lineIx, lf->fileName); if (rescore) score = axtScoreSym(scoreScheme, qSym->stringSize, qSym->string, tSym->string); /* Fill in an axt and write it to output. */ ZeroVar(&axt); axt.qName = qName; axt.qStart = qStart; axt.qEnd = qEnd; axt.qStrand = (isRc ? '-' : '+'); axt.tName = tName; axt.tStart = tStart; axt.tEnd = tEnd; axt.tStrand = '+'; axt.score = score; axt.symCount = qSym->stringSize; axt.qSym = qSym->string; axt.tSym = tSym->string; axtWrite(&axt, f); /* Clean up. */ if (!qIsFa) freeDnaSeq(&qSeq); freeDnaSeq(&tSeq); dyStringFree(&qSym); dyStringFree(&tSym); }