Пример #1
0
//! runs one iteration. This call blocks the main thread
void FMEThreadPool::runThreads()
{
	for (__uint32 i=1; i < numThreads(); i++)
	{
		thread(i)->start();
	}

	thread(0)->doWork();

	for (__uint32 i=1; i < numThreads(); i++)
	{
		thread(i)->join();
	}
}
Пример #2
0
void FMEThreadPool::deallocate()
{
	for (__uint32 i=0; i < numThreads(); i++)
	{
		delete m_pThreads[i];
	}
	delete[] m_pThreads;
	delete m_pSyncBarrier;
}
Пример #3
0
void FMEThreadPool::allocate()
{
	typedef FMEThread* FMEThreadPtr;

	m_pSyncBarrier = new Barrier(m_numThreads);
	m_pThreads = new FMEThreadPtr[m_numThreads];
	for (__uint32 i=0; i < numThreads(); i++)
	{
		m_pThreads[i] = new FMEThread(this, i);
#ifdef OGDF_SYSTEM_WINDOWS
		m_pThreads[i]->priority(Thread::tpCritical);
		m_pThreads[i]->cpuAffinity(1 << i);
#endif
	}
}
Пример #4
0
void FMEMultipoleKernel::quadtreeConstruction(ArrayPartition& pointPartition)
{
	FMELocalContext*  localContext	= m_pLocalContext;
	FMEGlobalContext* globalContext = m_pGlobalContext;
	LinearQuadtree&	tree			= *globalContext->pQuadtree;

	// precompute the bounding box for the quadtree points from the graph nodes
	for_loop(pointPartition, min_max_x_function(localContext));
	for_loop(pointPartition, min_max_y_function(localContext));

	// wait until the thread's bounding box is computed
	sync();

	// let the main thread computed the bounding box of the bounding boxes
	if (isMainThread())
	{
		globalContext->min_x = globalContext->pLocalContext[0]->min_x;
		globalContext->min_y = globalContext->pLocalContext[0]->min_y;
		globalContext->max_x = globalContext->pLocalContext[0]->max_x;
		globalContext->max_y = globalContext->pLocalContext[0]->max_y;
		for (__uint32 j=1; j < numThreads(); j++)
		{
			globalContext->min_x = min(globalContext->min_x, globalContext->pLocalContext[j]->min_x);
			globalContext->min_y = min(globalContext->min_y, globalContext->pLocalContext[j]->min_y);
			globalContext->max_x = max(globalContext->max_x, globalContext->pLocalContext[j]->max_x);
			globalContext->max_y = max(globalContext->max_y, globalContext->pLocalContext[j]->max_y);
		};
		tree.init(globalContext->min_x, globalContext->min_y, globalContext->max_x, globalContext->max_y);
		globalContext->coolDown *= 0.999f;
		tree.clear();
	};
	// wait because the morton number computation needs the bounding box
	sync();		
	// udpate morton number to prepare them for sorting
	for_loop(pointPartition, LQMortonFunctor(localContext));
	// wait so we can sort them by morton number
	sync();

#ifdef OGDF_FME_PARALLEL_QUADTREE_SORT
	// use a simple parallel sorting algorithm
	LinearQuadtree::LQPoint* points = tree.pointArray();
	sort_parallel(points, tree.numberOfPoints(), LQPointComparer);
#else
	if (isMainThread())
	{
		LinearQuadtree::LQPoint* points = tree.pointArray();
		sort_single(points, tree.numberOfPoints(), LQPointComparer);
	};
#endif
	// wait because the quadtree builder needs the sorted order
	sync();
	// if not a parallel run, we can do the easy way
	if (isSingleThreaded())
	{
		LinearQuadtreeBuilder builder(tree);
		// prepare the tree
		builder.prepareTree();
		// and link it
		builder.build();
		LQPartitioner partitioner( localContext );
		partitioner.partition();
	} else // the more difficult part
	{
		// snap the left point of the interval of the thread to the first in the cell
		LinearQuadtree::PointID beginPoint = tree.findFirstPointInCell(pointPartition.begin);
		LinearQuadtree::PointID endPoint_plus_one;
		// if this thread is the last one, no snapping required for the right point
		if (threadNr()==numThreads()-1)
			endPoint_plus_one = tree.numberOfPoints();
		else // find the left point of the next thread
			endPoint_plus_one = tree.findFirstPointInCell(pointPartition.end+1);
		// and calculate the number of points to prepare
		__uint32 numPointsToPrepare = endPoint_plus_one - beginPoint;

		// now we can prepare the snapped interval
		LinearQuadtreeBuilder builder(tree);
		// this function prepares the tree from begin point to endPoint_plus_one-1 (EXCLUDING endPoint_plus_one)
		builder.prepareTree(beginPoint, endPoint_plus_one);
		// save the start, end and count of the inner node chain in the context
		localContext->firstInnerNode = builder.firstInner;
		localContext->lastInnerNode = builder.lastInner;
		localContext->numInnerNodes = builder.numInnerNodes;
		// save the start, end and count of the leaf node chain in the context
		localContext->firstLeaf = builder.firstLeaf;
		localContext->lastLeaf = builder.lastLeaf;
		localContext->numLeaves = builder.numLeaves;
		// wait until all are finished
		sync();

		// now the main thread has to link the tree
		if (isMainThread())
		{
			// with his own builder
			LinearQuadtreeBuilder sbuilder(tree);
			// first we need the complete chain data
			sbuilder.firstInner = globalContext->pLocalContext[0]->firstInnerNode;
			sbuilder.firstLeaf = globalContext->pLocalContext[0]->firstLeaf;
			sbuilder.numInnerNodes = globalContext->pLocalContext[0]->numInnerNodes;
			sbuilder.numLeaves = globalContext->pLocalContext[0]->numLeaves;
			for (__uint32 j=1; j < numThreads(); j++)
			{
				sbuilder.numLeaves += globalContext->pLocalContext[j]->numLeaves;
				sbuilder.numInnerNodes += globalContext->pLocalContext[j]->numInnerNodes;
			};
			sbuilder.lastInner = globalContext->pLocalContext[numThreads()-1]->lastInnerNode;
			sbuilder.lastLeaf = globalContext->pLocalContext[numThreads()-1]->lastLeaf;
			// Link the tree
			sbuilder.build();
			// and run the partitions
			LQPartitioner partitioner(localContext);
			partitioner.partition();
		};
	};
	// wait for tree to finish
	sync();
	// now update the copy of the point data 
	for_loop(pointPartition, LQPointUpdateFunctor(localContext));
	// compute the nodes coordinates and sizes
	tree.forall_tree_nodes(LQCoordsFunctor(localContext), localContext->innerNodePartition.begin, localContext->innerNodePartition.numNodes)();
	tree.forall_tree_nodes(LQCoordsFunctor(localContext), localContext->leafPartition.begin, localContext->leafPartition.numNodes)();
};
Пример #5
0
void FMEMultipoleKernel::operator()(FMEGlobalContext* globalContext)
{
	__uint32					maxNumIterations    =  globalContext->pOptions->maxNumIterations;
	__uint32					minNumIterations    =  globalContext->pOptions->minNumIterations;
	__uint32					numPoints			=  globalContext->pQuadtree->numberOfPoints();
	ArrayGraph&					graph				= *globalContext->pGraph;
	LinearQuadtree&				tree				= *globalContext->pQuadtree;
	LinearQuadtreeExpansion&	treeExp				= *globalContext->pExpansion;
	WSPD&						wspd				= *globalContext->pWSPD;
	FMELocalContext*			localContext		= globalContext->pLocalContext[threadNr()];
	FMEGlobalOptions*			options				= globalContext->pOptions;
	float*						threadsForceArrayX	= localContext->forceX;
	float*						threadsForceArrayY	= localContext->forceY;
    float*						globalForceArrayX	= globalContext->globalForceX;
	float*						globalForceArrayY	= globalContext->globalForceY;

	ArrayPartition edgePartition = arrayPartition(graph.numEdges());
	ArrayPartition nodePointPartition = arrayPartition(graph.numNodes());

	m_pLocalContext = localContext;
	m_pGlobalContext = globalContext;
	/****************************/
	/* INIT						*/
	/****************************/
	//! reset the global force array 
	for_loop_array_set(threadNr(), numThreads(), globalForceArrayX, tree.numberOfPoints(), 0.0f);
	for_loop_array_set(threadNr(), numThreads(), globalForceArrayY, tree.numberOfPoints(), 0.0f);
	
	// reset the threads force array
	for (__uint32 i = 0; i < tree.numberOfPoints(); i++)
	{
		threadsForceArrayX[i] = 0.0f;
		threadsForceArrayY[i] = 0.0f;
	};

	__uint32 maxNumIt = options->preProcMaxNumIterations;
	for (__uint32 currNumIteration = 0; ((currNumIteration < maxNumIt) ); currNumIteration++)
	{
		// iterate over all edges and store the resulting forces in the threads array
		for_loop(edgePartition, 
			edge_force_function< EDGE_FORCE_DIV_DEGREE > (localContext)	// divide the forces by degree of the node to avoid oscilation						
		);
		// wait until all edges are done
		sync();
		// now collect the forces in parallel and put the sum into the global array and move the nodes accordingly
		for_loop(nodePointPartition, 
			func_comp(
				 collect_force_function<COLLECT_EDGE_FACTOR_PREP | COLLECT_ZERO_THREAD_ARRAY >(localContext),
				 node_move_function<TIME_STEP_PREP | ZERO_GLOBAL_ARRAY>(localContext)
			)
		);
	};
	if (isMainThread())
	{
		globalContext->coolDown = 1.0f;
	};
	sync();

	for (__uint32 currNumIteration = 0; ((currNumIteration < maxNumIterations) && !globalContext->earlyExit); currNumIteration++)
	{
		// reset the coefficients 
		for_loop_array_set(threadNr(), numThreads(), treeExp.m_multiExp, treeExp.m_numExp*(treeExp.m_numCoeff << 1), 0.0);
		for_loop_array_set(threadNr(), numThreads(), treeExp.m_localExp, treeExp.m_numExp*(treeExp.m_numCoeff << 1), 0.0);

		localContext->maxForceSq = 0.0;
		localContext->avgForce = 0.0;
		
		// construct the quadtree
		quadtreeConstruction(nodePointPartition);
		// wait for all threads to finish
		sync();

		if (isSingleThreaded()) // if is single threaded run the simple approximation
			multipoleApproxSingleThreaded(nodePointPartition);
		else // otherwise use the partitioning
			multipoleApproxFinal(nodePointPartition); 
		// now wait until all forces are summed up in the global array and mapped to graph node order
		sync();
		
		// run the edge forces
		for_loop(edgePartition,							// iterate over all edges and sum up the forces in the threads array 
			edge_force_function< EDGE_FORCE_DIV_DEGREE >(localContext)	// divide the forces by degree of the node to avoid oscilation
		);	
		// wait until edges are finished
		sync();

		// collect the edge forces and move nodes without waiting
		for_loop(nodePointPartition, 
			func_comp(
				 collect_force_function<COLLECT_EDGE_FACTOR | COLLECT_ZERO_THREAD_ARRAY>(localContext),
				 node_move_function<TIME_STEP_NORMAL | ZERO_GLOBAL_ARRAY>(localContext)
			)
		);
		// wait so we can decide if we need another iteration
		sync();
		// check the max force square for all threads
		if (isMainThread())
		{
			double maxForceSq = 0.0;
			for (__uint32 j=0; j < numThreads(); j++)
				maxForceSq = max(globalContext->pLocalContext[j]->maxForceSq, maxForceSq);

			// if we are allowed to quit and the max force sq falls under the threshold tell all threads we are done
			if ((currNumIteration >= minNumIterations) && (maxForceSq < globalContext->pOptions->stopCritForce ))
			{
				globalContext->earlyExit = true;
			};
		};
		// this is required to wait for the earlyExit result
		sync();
	};
};
Пример #6
0
int rapMapMap(int argc, char* argv[]) {
    std::cerr << "RapMap Mapper\n";

    std::string versionString = rapmap::version;
    TCLAP::CmdLine cmd(
            "RapMap Mapper",
            ' ',
            versionString);
    cmd.getProgramName() = "rapmap";

    TCLAP::ValueArg<std::string> index("i", "index", "The location of the pseudoindex", true, "", "path");
    TCLAP::ValueArg<std::string> read1("1", "leftMates", "The location of the left paired-end reads", false, "", "path");
    TCLAP::ValueArg<std::string> read2("2", "rightMates", "The location of the right paired-end reads", false, "", "path");
    TCLAP::ValueArg<std::string> unmatedReads("r", "unmatedReads", "The location of single-end reads", false, "", "path");
    TCLAP::ValueArg<uint32_t> numThreads("t", "numThreads", "Number of threads to use", false, 1, "positive integer");
    TCLAP::ValueArg<uint32_t> maxNumHits("m", "maxNumHits", "Reads mapping to more than this many loci are discarded", false, 200, "positive integer");
    TCLAP::ValueArg<std::string> outname("o", "output", "The output file (default: stdout)", false, "", "path");
    TCLAP::SwitchArg endCollectorSwitch("e", "endCollector", "Use the simpler (and faster) \"end\" collector as opposed to the more sophisticated \"skipping\" collector", false);
    TCLAP::SwitchArg noout("n", "noOutput", "Don't write out any alignments (for speed testing purposes)", false);
    cmd.add(index);
    cmd.add(noout);

    cmd.add(read1);
    cmd.add(read2);
    cmd.add(unmatedReads);
    cmd.add(outname);
    cmd.add(numThreads);
    cmd.add(maxNumHits);
    cmd.add(endCollectorSwitch);

    auto consoleSink = std::make_shared<spdlog::sinks::stderr_sink_mt>();
    auto consoleLog = spdlog::create("stderrLog", {consoleSink});

    try {

	cmd.parse(argc, argv);
	bool pairedEnd = (read1.isSet() or read2.isSet());
	if (pairedEnd and (read1.isSet() != read2.isSet())) {
	    consoleLog->error("You must set both the -1 and -2 arguments to align "
		    "paired end reads!");
	    std::exit(1);
	}

	if (pairedEnd and unmatedReads.isSet()) {
	    consoleLog->error("You cannot specify both paired-end and unmated "
		    "reads in the input!");
	    std::exit(1);
	}

	if (!pairedEnd and !unmatedReads.isSet()) {
	    consoleLog->error("You must specify input; either both paired-end "
			      "or unmated reads!");
	    std::exit(1);

	}

	std::string indexPrefix(index.getValue());
	if (indexPrefix.back() != '/') {
	    indexPrefix += "/";
	}

	if (!rapmap::fs::DirExists(indexPrefix.c_str())) {
	    consoleLog->error("It looks like the index you provided [{}] "
		    "doesn't exist", indexPrefix);
	    std::exit(1);
	}


	IndexHeader h;
	std::ifstream indexStream(indexPrefix + "header.json");
	{
		cereal::JSONInputArchive ar(indexStream);
		ar(h);
	}
	indexStream.close();

	if (h.indexType() != IndexType::PSEUDO) {
	    consoleLog->error("The index {} does not appear to be of the "
			    "appropriate type (pseudo)", indexPrefix);
	    std::exit(1);
	}

	RapMapIndex rmi;
	rmi.load(indexPrefix);

	std::cerr << "\n\n\n\n";

	// from: http://stackoverflow.com/questions/366955/obtain-a-stdostream-either-from-stdcout-or-stdofstreamfile
	// set either a file or cout as the output stream
	std::streambuf* outBuf;
	std::ofstream outFile;
	bool haveOutputFile{false};
	if (outname.getValue() == "") {
	    outBuf = std::cout.rdbuf();
	} else {
	    outFile.open(outname.getValue());
	    outBuf = outFile.rdbuf();
	    haveOutputFile = true;
	}
	// Now set the output stream to the buffer, which is
	// either std::cout, or a file.
	std::ostream outStream(outBuf);

	// Must be a power of 2
	size_t queueSize{268435456};
	spdlog::set_async_mode(queueSize);
	auto outputSink = std::make_shared<spdlog::sinks::ostream_sink_mt>(outStream);
	auto outLog = std::make_shared<spdlog::logger>("outLog", outputSink);
	outLog->set_pattern("%v");

	uint32_t nthread = numThreads.getValue();
	std::unique_ptr<paired_parser> pairParserPtr{nullptr};
	std::unique_ptr<single_parser> singleParserPtr{nullptr};

	if (!noout.getValue()) {
	    rapmap::utils::writeSAMHeader(rmi, outLog);
	}

	SpinLockT iomutex;
	{
	    ScopedTimer timer;
	    HitCounters hctrs;
	    consoleLog->info("mapping reads . . . \n\n\n");
	    if (pairedEnd) {
		std::vector<std::thread> threads;
		std::vector<std::string> read1Vec = rapmap::utils::tokenize(read1.getValue(), ',');
		std::vector<std::string> read2Vec = rapmap::utils::tokenize(read2.getValue(), ',');

		if (read1Vec.size() != read2Vec.size()) {
		    consoleLog->error("The number of provided files for "
			    "-1 and -2 must be the same!");
		    std::exit(1);
		}

		size_t numFiles = read1Vec.size() + read2Vec.size();
		char** pairFileList = new char*[numFiles];
		for (size_t i = 0; i < read1Vec.size(); ++i) {
		    pairFileList[2*i] = const_cast<char*>(read1Vec[i].c_str());
		    pairFileList[2*i+1] = const_cast<char*>(read2Vec[i].c_str());
		}
		size_t maxReadGroup{1000}; // Number of reads in each "job"
		size_t concurrentFile{2}; // Number of files to read simultaneously
		pairParserPtr.reset(new paired_parser(4 * nthread, maxReadGroup,
			    concurrentFile,
			    pairFileList, pairFileList+numFiles));

		/** Create the threads depending on the collector type **/
		if (endCollectorSwitch.getValue()) {
		    EndCollector endCollector(&rmi);
		    for (size_t i = 0; i < nthread; ++i) {
			threads.emplace_back(processReadsPair<EndCollector, SpinLockT>,
				pairParserPtr.get(),
				std::ref(rmi),
				std::ref(endCollector),
				&iomutex,
				outLog,
				std::ref(hctrs),
				maxNumHits.getValue(),
				noout.getValue());
		    }
		} else {
		    SkippingCollector skippingCollector(&rmi);
		    for (size_t i = 0; i < nthread; ++i) {
			threads.emplace_back(processReadsPair<SkippingCollector, SpinLockT>,
				pairParserPtr.get(),
				std::ref(rmi),
				std::ref(skippingCollector),
				&iomutex,
				outLog,
				std::ref(hctrs),
				maxNumHits.getValue(),
				noout.getValue());
		    }
		}

		for (auto& t : threads) { t.join(); }
		delete [] pairFileList;
	    } else {
		std::vector<std::thread> threads;
		std::vector<std::string> unmatedReadVec = rapmap::utils::tokenize(unmatedReads.getValue(), ',');
		size_t maxReadGroup{1000}; // Number of reads in each "job"
		size_t concurrentFile{1};
		stream_manager streams( unmatedReadVec.begin(), unmatedReadVec.end(),
			concurrentFile);
		singleParserPtr.reset(new single_parser(4 * nthread,
			    maxReadGroup,
			    concurrentFile,
			    streams));

		/** Create the threads depending on the collector type **/
		if (endCollectorSwitch.getValue()) {
		    EndCollector endCollector(&rmi);
		    for (size_t i = 0; i < nthread; ++i) {
			threads.emplace_back(processReadsSingle<EndCollector, SpinLockT>,
				singleParserPtr.get(),
				std::ref(rmi),
				std::ref(endCollector),
				&iomutex,
				outLog,
				std::ref(hctrs),
				maxNumHits.getValue(),
				noout.getValue());
		    }
		} else {
		    SkippingCollector skippingCollector(&rmi);
		    for (size_t i = 0; i < nthread; ++i) {
			threads.emplace_back(processReadsSingle<SkippingCollector, SpinLockT>,
				singleParserPtr.get(),
				std::ref(rmi),
				std::ref(skippingCollector),
				&iomutex,
				outLog,
				std::ref(hctrs),
				maxNumHits.getValue(),
				noout.getValue());
		    }
		}
		for (auto& t : threads) { t.join(); }
	    }
	    consoleLog->info("Done mapping reads.");
        consoleLog->info("In total saw {} reads.", hctrs.numReads);
        consoleLog->info("Final # hits per read = {}", hctrs.totHits / static_cast<float>(hctrs.numReads));
	    consoleLog->info("Discarded {} reads because they had > {} alignments",
		    hctrs.tooManyHits, maxNumHits.getValue());

	    consoleLog->info("flushing output");
	    outLog->flush();
	}

	if (haveOutputFile) {
	    outFile.close();
	}
	return 0;
    } catch (TCLAP::ArgException& e) {
	consoleLog->error("Exception [{}] when parsing argument {}", e.error(), e.argId());
	return 1;
    }

}