示例#1
0
int convertBeetlMain(int argc, char** argv)
{
    Timer t("sga beetl-convert");
    parseConvertBeetlOptions(argc, argv);
    std::cout << "Converting " << opt::inBWTFile << "\n";

    // Read the ASCII bwt beetl file and convert it to the SGA
    // run length encoded binary format

    // To write the header of the file, we need to count the number of strings
    // and symbols in the BWT. We do this in an initial pas
    std::istream* pReader = createReader(opt::inBWTFile);
    size_t numSymbols = 0;
    size_t numStrings = 0;
    char c;
    while(*pReader >> c) 
    {
        numSymbols += 1;
        if(c == 'N')
        {
            std::cerr << "Error: ambiguous character found in BWT\n";
            std::cerr << "sga preprocess must be run on the data\n";
            exit(EXIT_FAILURE);
        }

        if(c == '$')
            numStrings += 1;
    }
    delete pReader;

    printf("Read %zu symbols and %zu strings from the beetl bwt\n", numSymbols, numStrings);

    //
    std::string outBWTName = opt::prefix + BWT_EXT;
    BWTWriterBinary* pWriter = new BWTWriterBinary(outBWTName);
    pWriter->writeHeader(numStrings, numSymbols, BWF_NOFMI);
    
    // Re-read the file, writing out the bw chars
    pReader = createReader(opt::inBWTFile);
    while(*pReader >> c) 
        pWriter->writeBWChar(c);
    pWriter->finalize();
    delete pWriter;
    delete pReader;

    // Create the suffix array index files using the sampled suffix array machinery
    std::cout << "Generating lexicographic index (.sai)\n";
    BWT* pBWT = new BWT(outBWTName, 512); 
    SampledSuffixArray* pSSA = new SampledSuffixArray();

    pSSA->buildLexicoIndex(pBWT);
    pSSA->writeLexicoIndex(opt::prefix + SAI_EXT);

    delete pBWT;
    delete pSSA;
    return 0;
}
示例#2
0
int genSSAMain(int argc, char** argv)
{
    Timer t("sga gen-ssa");
    parseGenSSAOptions(argc, argv);
    
    BWT* pBWT = new BWT(opt::prefix + BWT_EXT);
    ReadInfoTable* pRIT = new ReadInfoTable(opt::readsFile, pBWT->getNumStrings(), RIO_NUMERICID);
    pBWT->printInfo();

    SampledSuffixArray* pSSA = new SampledSuffixArray();
    pSSA->build(pBWT, pRIT, opt::sampleRate);
    pSSA->printInfo();
    pSSA->writeSSA(opt::prefix + SSA_EXT);

    if(opt::validate)
        pSSA->validate(opt::readsFile, pBWT);

    delete pBWT;
    delete pRIT;
    delete pSSA;

    return 0;
}
示例#3
0
void indexInMemoryRopebwt()
{
    std::cout << "Building index for " << opt::readsFile << " in memory using ropebwt\n";

    bool use_threads = opt::numThreads >= 4;

    if(opt::bBuildForward)
    {
        std::string bwt_filename = opt::prefix + BWT_EXT;
        std::string sai_filename = opt::prefix + SAI_EXT;
        BWTCA::runRopebwt(opt::readsFile, bwt_filename, use_threads, false);

        if(opt::bBuildSAI)
        {
            std::cout << "\t done bwt construction, generating .sai file\n";
            BWT* pBWT = new BWT(bwt_filename);
            SampledSuffixArray ssa;
            ssa.buildLexicoIndex(pBWT, opt::numThreads);
            ssa.writeLexicoIndex(sai_filename);
            delete pBWT;
        }
    }

    if(opt::bBuildReverse)
    {
        std::string rbwt_filename = opt::prefix + RBWT_EXT;
        std::string rsai_filename = opt::prefix + RSAI_EXT;
        BWTCA::runRopebwt(opt::readsFile, rbwt_filename, use_threads, true);

        if(opt::bBuildSAI)
        {
            std::cout << "\t done rbwt construction, generating .rsai file\n";
            BWT* pRBWT = new BWT(rbwt_filename);
            SampledSuffixArray ssa;
            ssa.buildLexicoIndex(pRBWT, opt::numThreads);
            ssa.writeLexicoIndex(rsai_filename);
            delete pRBWT;
        }
    }
}
示例#4
0
//
// Main
//
int filterMain(int argc, char** argv)
{
    parseFilterOptions(argc, argv);
    Timer* pTimer = new Timer(PROGRAM_IDENT);


    BWT* pBWT = new BWT(opt::prefix + BWT_EXT, opt::sampleRate);
    BWT* pRBWT = new BWT(opt::prefix + RBWT_EXT, opt::sampleRate);
    //pBWT->printInfo();

    std::ostream* pWriter = createWriter(opt::outFile);
    std::ostream* pDiscardWriter = createWriter(opt::discardFile);
    QCPostProcess* pPostProcessor = new QCPostProcess(pWriter, pDiscardWriter);

    // If performing duplicate check, create a bitvector to record
    // which reads are duplicates
    BitVector* pSharedBV = NULL;
    if(opt::dupCheck)
        pSharedBV = new BitVector(pBWT->getNumStrings());

    // Set up QC parameters
    QCParameters params;
    params.pBWT = pBWT;
    params.pRevBWT = pRBWT;
    params.pSharedBV = pSharedBV;

    params.checkDuplicates = opt::dupCheck;
    params.substringOnly = opt::substringOnly;
    params.checkKmer = opt::kmerCheck;
    params.checkHPRuns = opt::hpCheck;
    params.checkDegenerate = opt::lowComplexityCheck;

    params.verbose = opt::verbose;

    params.kmerLength = opt::kmerLength;
    params.kmerThreshold = opt::kmerThreshold;

    params.hpKmerLength = 51;
    params.hpHardAcceptCount = 10;
    params.hpMinProportion = 0.1f;
    params.hpMinLength = 6;

    if(opt::numThreads <= 1)
    {
        // Serial mode
        QCProcess processor(params);
        PROCESS_FILTER_SERIAL(opt::readsFile, &processor, pPostProcessor);
    }
    else
    {
        // Parallel mode
        std::vector<QCProcess*> processorVector;
        for(int i = 0; i < opt::numThreads; ++i)
        {
            QCProcess* pProcessor = new QCProcess(params);
            processorVector.push_back(pProcessor);
        }

        PROCESS_FILTER_PARALLEL(opt::readsFile, processorVector, pPostProcessor);

        for(int i = 0; i < opt::numThreads; ++i)
            delete processorVector[i];
    }

    delete pPostProcessor;
    delete pWriter;
    delete pDiscardWriter;

    delete pBWT;
    delete pRBWT;

    if(pSharedBV != NULL)
        delete pSharedBV;

    std::cout << "RE-building index for " << opt::outFile << " in memory using ropebwt2\n";
    std::string prefix=stripFilename(opt::outFile);
        //BWT *pBWT, *pRBWT;
		#pragma omp parallel
		{
			#pragma omp single nowait
			{	
			    std::string bwt_filename = prefix + BWT_EXT;
				BWTCA::runRopebwt2(opt::outFile, bwt_filename, opt::numThreads, false);
				std::cout << "\t done bwt construction, generating .sai file\n";
				pBWT = new BWT(bwt_filename);
			}
			#pragma omp single nowait
			{	
				std::string rbwt_filename = prefix + RBWT_EXT;
				BWTCA::runRopebwt2(opt::outFile, rbwt_filename, opt::numThreads, true);
				std::cout << "\t done rbwt construction, generating .rsai file\n";
				pRBWT = new BWT(rbwt_filename);
			}
		}
        std::string sai_filename = prefix + SAI_EXT;
		SampledSuffixArray ssa;
        ssa.buildLexicoIndex(pBWT, opt::numThreads);
        ssa.writeLexicoIndex(sai_filename);
        delete pBWT;

        std::string rsai_filename = prefix + RSAI_EXT;
        SampledSuffixArray rssa;
        rssa.buildLexicoIndex(pRBWT, opt::numThreads);
        rssa.writeLexicoIndex(rsai_filename);
        delete pRBWT;

    // Cleanup
    delete pTimer;

    return 0;
}