void doJob(const std::string& inputMaf, const std::string& outDir, std::vector<ParamPair> simplParams, std::vector<int> minBlockSizes) { const int MIN_ALIGNMENT = 1; const int MAX_ALIGNMENT_GAP = 0; const auto EMPTY_GROUP = BlockGroups(); BlockGroups blockGroups = EMPTY_GROUP; PermVec currentBlocks; //sort blocks in reverse order (will use it as stack) std::sort(minBlockSizes.begin(), minBlockSizes.end(), std::greater<int>()); makeDirectory(outDir); //read maf alignment and join adjacent columns std::cerr << "\tParsing MAF file\n"; PermVec mafBlocks = mafToPermutations(inputMaf, MIN_ALIGNMENT); compressPaths(mafBlocks, MAX_ALIGNMENT_GAP, currentBlocks, blockGroups); //iterative simplification for (const ParamPair& ppair : simplParams) { if (minBlockSizes.empty()) break; //output blocks of certain size while (!minBlockSizes.empty() && minBlockSizes.back() < ppair.minBlock) { std::string blockDir = outDir + "/" + std::to_string(minBlockSizes.back()); outputBlocks(blockDir, currentBlocks, blockGroups, minBlockSizes.back()); minBlockSizes.pop_back(); } std::cerr << "\tSimplification with " << ppair.minBlock << " " << ppair.maxGap << std::endl; PermVec inputBlocks = filterBySize(currentBlocks, EMPTY_GROUP, ppair.minBlock, true); PermVec outBlocks; blockGroups.clear(); processGraph(inputBlocks, ppair.maxGap, outBlocks, blockGroups); currentBlocks = outBlocks; } //if any left for (int minBlock : minBlockSizes) { std::string blockDir = outDir + "/" + std::to_string(minBlock); outputBlocks(blockDir, currentBlocks, blockGroups, minBlock); } }
static void writePslx(FILE *pslOutFh, struct seqReader *qSeqReader, struct seqReader *tSeqReader, struct psl *psl) /* output a pslx, cheating in not creating the whole pslx to avoid more sequence * manipulation */ { if (pslIsProtein(psl)) errAbort("doesn't support protein PSLs: qName: %s", psl->qName); pslOutput(psl, pslOutFh, '\t', '\t'); outputBlocks(pslOutFh, qSeqReader, psl->qName, psl->strand[0], psl->qStart, psl->qEnd, psl->blockCount, psl->qStarts, psl->blockSizes); fputc('\t', pslOutFh); outputBlocks(pslOutFh, tSeqReader, psl->tName, psl->strand[1], psl->tStart, psl->tEnd, psl->blockCount, psl->tStarts, psl->blockSizes); fputc('\n', pslOutFh); }
void parseIntoAxt(char *lavFile, FILE *f, char *tNibDir, struct dlList *tCache, char *qNibDir, struct dlList *qCache) /* Parse a blastz lav file and put it an axt. */ { struct lineFile *lf = lineFileOpen(lavFile, TRUE); char *line; struct block *blockList = NULL; boolean isRc = FALSE; char *tName = NULL, *qName = NULL; char *matrix = NULL, *command = NULL; int qSize = 0, tSize = 0; int score = 0; /* Check header. */ if (!lineFileNext(lf, &line, NULL)) errAbort("%s is empty", lf->fileName); if (!startsWith("#:lav", line)) errAbort("%s is not a lav file\n", lf->fileName); while (lineFileNext(lf, &line, NULL)) { if (startsWith("s {", line)) { parseS(lf, &tSize, &qSize); } else if (startsWith("h {", line)) { parseH(lf, &tName, &qName, &isRc); } else if (startsWith("d {", line)) { parseD(lf, &matrix, &command, f); } else if (startsWith("a {", line)) { parseA(lf, &blockList, &score); if (optionExists("dropSelf")) { struct block *bArr[256]; int numBLs = 0, i = 0; boolean rescore = FALSE; rescore = breakUpIfOnDiagonal(blockList, isRc, qName, tName, qSize, tSize, bArr, ArraySize(bArr), &numBLs); for (i=0; i < numBLs; i++) { outputBlocks(lf, bArr[i], score, f, isRc, qName, qSize, qNibDir, qCache, tName, tSize, tNibDir, tCache, rescore); slFreeList(&bArr[i]); } } else { outputBlocks(lf, blockList, score, f, isRc, qName, qSize, qNibDir, qCache, tName, tSize, tNibDir, tCache, FALSE); slFreeList(&blockList); } } } lineFileClose(&lf); }