Example #1
0
void doJob(const std::string& inputMaf, const std::string& outDir, 
		   std::vector<ParamPair> simplParams, std::vector<int> minBlockSizes)
{

	const int MIN_ALIGNMENT = 1;
	const int MAX_ALIGNMENT_GAP = 0;
	const auto EMPTY_GROUP = BlockGroups();

	BlockGroups blockGroups = EMPTY_GROUP;
	PermVec currentBlocks;
	//sort blocks in reverse order (will use it as stack)
	std::sort(minBlockSizes.begin(), minBlockSizes.end(), std::greater<int>());
	makeDirectory(outDir);

	//read maf alignment and join adjacent columns
	std::cerr << "\tParsing MAF file\n";
	PermVec mafBlocks = mafToPermutations(inputMaf, MIN_ALIGNMENT);
	compressPaths(mafBlocks, MAX_ALIGNMENT_GAP, currentBlocks, blockGroups);

	//iterative simplification
	for (const ParamPair& ppair : simplParams)
	{
		if (minBlockSizes.empty()) break;
		//output blocks of certain size
		while (!minBlockSizes.empty() && minBlockSizes.back() < ppair.minBlock)
		{
			std::string blockDir = outDir + "/" + 
								   std::to_string(minBlockSizes.back());
			outputBlocks(blockDir, currentBlocks, blockGroups,
						 minBlockSizes.back());
			minBlockSizes.pop_back();
		}

		std::cerr << "\tSimplification with " << ppair.minBlock << " "
				  << ppair.maxGap << std::endl;
		PermVec inputBlocks = filterBySize(currentBlocks, EMPTY_GROUP,
										   ppair.minBlock, true);
		PermVec outBlocks;
		blockGroups.clear();
		processGraph(inputBlocks, ppair.maxGap, outBlocks, blockGroups);
		currentBlocks = outBlocks;
	}

	//if any left
	for (int minBlock : minBlockSizes)
	{
		std::string blockDir = outDir + "/" + std::to_string(minBlock);
		outputBlocks(blockDir, currentBlocks, blockGroups, minBlock);
	}
}
static void writePslx(FILE *pslOutFh, struct seqReader *qSeqReader, struct seqReader *tSeqReader, struct psl *psl)
/* output a pslx, cheating in not creating the whole pslx to avoid more sequence
 * manipulation */
{
if (pslIsProtein(psl))
    errAbort("doesn't support protein PSLs: qName: %s", psl->qName);
pslOutput(psl, pslOutFh, '\t', '\t');
outputBlocks(pslOutFh, qSeqReader, psl->qName, psl->strand[0],
             psl->qStart, psl->qEnd, psl->blockCount, psl->qStarts, psl->blockSizes);
fputc('\t', pslOutFh);
outputBlocks(pslOutFh, tSeqReader, psl->tName, psl->strand[1],
             psl->tStart, psl->tEnd, psl->blockCount, psl->tStarts, psl->blockSizes);
fputc('\n', pslOutFh);
}
Example #3
0
void parseIntoAxt(char *lavFile, FILE *f, 
	char *tNibDir, struct dlList *tCache, 
	char *qNibDir, struct dlList *qCache)
/* Parse a blastz lav file and put it an axt. */
{
struct lineFile *lf = lineFileOpen(lavFile, TRUE);
char *line;
struct block *blockList = NULL;
boolean isRc = FALSE;
char *tName = NULL, *qName = NULL;
char *matrix = NULL, *command = NULL;
int qSize = 0, tSize = 0;
int score = 0;

/* Check header. */
if (!lineFileNext(lf, &line, NULL))
   errAbort("%s is empty", lf->fileName);
if (!startsWith("#:lav", line))
   errAbort("%s is not a lav file\n", lf->fileName);

while (lineFileNext(lf, &line, NULL))
    {
    if (startsWith("s {", line))
        {
	parseS(lf, &tSize, &qSize);
	}
    else if (startsWith("h {", line))
        {
	parseH(lf, &tName, &qName, &isRc);
	}
    else if (startsWith("d {", line))
        {
	parseD(lf, &matrix, &command, f);
	}
    else if (startsWith("a {", line))
        {
	parseA(lf, &blockList, &score);
        if (optionExists("dropSelf"))
	    {
	    struct block *bArr[256];
	    int numBLs = 0, i = 0;
	    boolean rescore = FALSE;
	    rescore = breakUpIfOnDiagonal(blockList, isRc, qName, tName,
					  qSize, tSize, bArr, ArraySize(bArr),
					  &numBLs);
	    for (i=0;  i < numBLs;  i++)
		{
		outputBlocks(lf, bArr[i], score, f, isRc, 
			     qName, qSize, qNibDir, qCache,
			     tName, tSize, tNibDir, tCache, rescore);
		slFreeList(&bArr[i]);
		}
	    }
	else
	    {
	    outputBlocks(lf, blockList, score, f, isRc, 
			 qName, qSize, qNibDir, qCache,
			 tName, tSize, tNibDir, tCache, FALSE);
	    slFreeList(&blockList);
	    }
	}
    }
lineFileClose(&lf);
}