Пример #1
0
int HBRuntime::unprotectSharedData(){

	if(isSingleThreaded()){
		return 0;
	}
#ifdef _PROFILING
	uint64_t starttime = Util::copy_time();
#endif
	unprotectGlobals();/*Protect the global variables, log the differences.*/
	//unprotect_heap();
	Heap::getHeap()->unprotect_heap();
	//unprotectHeap();

#ifdef _PROFILING
	uint64_t endtime = Util::copy_time();
	me->protecttime += (endtime - starttime);
#endif
	return 0;
}
Пример #2
0
int HBRuntime::protectSharedData(){
	if(isSingleThreaded()){
		return 0;
	}
#ifdef _PROFILING
	uint64_t starttime = Util::copy_time();
#endif

	DEBUG_MSG("Protect globals\n");
	protectGlobals();/*Protect the global variables.*/
	//protect_heap();
	DEBUG_MSG("Protect heap\n");
	Heap::getHeap()->protect_heap();
	//protectHeap();
	
#ifdef _PROFILING
	uint64_t endtime = Util::copy_time();
	me->protecttime += (endtime - starttime);
#endif
	DEBUG_MSG("Heap protected\n");
	return 0;
}
Пример #3
0
int HBRuntime::threadCreate (pthread_t * pid, const pthread_attr_t * attr, void *(*fn) (void *), void * arg){
	/**
	 * Fixme: the tid assignment should be deterministic! using logical time!
	 * */
//#ifdef NOTHING
	bool singlethread = isSingleThreaded();

	Util::spinlock(&metadata->lock);
	thread_id_t tid = metadata->thread_slot;
	if(tid >= MAX_THREAD_NUM){
		tid = INVALID_THREAD_ID;
	}
	else{
		metadata->thread_slot ++;
	}
	Util::unlock(&metadata->lock);

	if(tid == INVALID_THREAD_ID){
		tid = findTid();
	}

	if(tid == INVALID_THREAD_ID){
		VATAL_MSG("Too much thread: HBDet can only support thread number < %d\n", MAX_THREAD_NUM);
		exit(0);
	}

	*pid = tid;
	/*Initialize the thread struct*/
	thread_info_t* thread = &metadata->threads[tid];
	thread->start_routine = fn;
	thread->args = arg;
	thread->tid = tid;
	thread->vclock = me->vclock;
	thread->oldtime = me->vclock;


	/*Add this thread to the active list*/
	me->insertToActiveList(thread);

	//printf("HBDet: before mmap!\n");
	char* child_stack = (char *) mmap(NULL, STACK_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
	if(child_stack == NULL){
		fprintf(stderr, "HBDet: cannot allocate stack for child thread\n");
		exit(0);
	}
	child_stack += STACK_SIZE;

	NORMAL_MSG("Thread %d using stack from %x to %x\n", tid, child_stack, child_stack + STACK_SIZE);
	int child = clone(thread_entry_point, child_stack, CLONE_FILES | CLONE_FS | CLONE_IO | SIGCHLD, thread);
	NORMAL_MSG("HBDet: clone thread(%d), pid = %d!\n", thread->tid, child);
	if(child == -1){
		VATAL_MSG("HBRuntime: create thread error!\n");
		exit(0);
		return -1;
	}
	thread->pid = child;
//#else
//	DEBUG_MSG("call real_pthread_create(%x)\n", real_pthread_create);
//	real_pthread_create(pid, attr, thread_entry_point, thread);
//#endif

	me->vclock.incClock(me->tid);

	//printf("--thread %d leave pthread_create\n", me->tid);

	if(singlethread){//changing from single thread to multithreads.
		DEBUG_MSG("In protectSharedData()\n");
		protectSharedData();
		DEBUG_MSG("Thread (%d): After protectSharedData()\n", me->tid);
	}
	else{
		//TODO: takeSnapshot & flushLog
		//beginSlice();
	}
	DEBUG_MSG("Thread (%d): After protectSharedData() ................\n", me->tid);
	NORMAL_MSG("Thread (%d) Create Thread (%d) OK!\n\n", me->tid, tid);
	return 0;
}
Пример #4
0
void FMEMultipoleKernel::quadtreeConstruction(ArrayPartition& pointPartition)
{
	FMELocalContext*  localContext	= m_pLocalContext;
	FMEGlobalContext* globalContext = m_pGlobalContext;
	LinearQuadtree&	tree			= *globalContext->pQuadtree;

	// precompute the bounding box for the quadtree points from the graph nodes
	for_loop(pointPartition, min_max_x_function(localContext));
	for_loop(pointPartition, min_max_y_function(localContext));

	// wait until the thread's bounding box is computed
	sync();

	// let the main thread computed the bounding box of the bounding boxes
	if (isMainThread())
	{
		globalContext->min_x = globalContext->pLocalContext[0]->min_x;
		globalContext->min_y = globalContext->pLocalContext[0]->min_y;
		globalContext->max_x = globalContext->pLocalContext[0]->max_x;
		globalContext->max_y = globalContext->pLocalContext[0]->max_y;
		for (__uint32 j=1; j < numThreads(); j++)
		{
			globalContext->min_x = min(globalContext->min_x, globalContext->pLocalContext[j]->min_x);
			globalContext->min_y = min(globalContext->min_y, globalContext->pLocalContext[j]->min_y);
			globalContext->max_x = max(globalContext->max_x, globalContext->pLocalContext[j]->max_x);
			globalContext->max_y = max(globalContext->max_y, globalContext->pLocalContext[j]->max_y);
		};
		tree.init(globalContext->min_x, globalContext->min_y, globalContext->max_x, globalContext->max_y);
		globalContext->coolDown *= 0.999f;
		tree.clear();
	};
	// wait because the morton number computation needs the bounding box
	sync();		
	// udpate morton number to prepare them for sorting
	for_loop(pointPartition, LQMortonFunctor(localContext));
	// wait so we can sort them by morton number
	sync();

#ifdef OGDF_FME_PARALLEL_QUADTREE_SORT
	// use a simple parallel sorting algorithm
	LinearQuadtree::LQPoint* points = tree.pointArray();
	sort_parallel(points, tree.numberOfPoints(), LQPointComparer);
#else
	if (isMainThread())
	{
		LinearQuadtree::LQPoint* points = tree.pointArray();
		sort_single(points, tree.numberOfPoints(), LQPointComparer);
	};
#endif
	// wait because the quadtree builder needs the sorted order
	sync();
	// if not a parallel run, we can do the easy way
	if (isSingleThreaded())
	{
		LinearQuadtreeBuilder builder(tree);
		// prepare the tree
		builder.prepareTree();
		// and link it
		builder.build();
		LQPartitioner partitioner( localContext );
		partitioner.partition();
	} else // the more difficult part
	{
		// snap the left point of the interval of the thread to the first in the cell
		LinearQuadtree::PointID beginPoint = tree.findFirstPointInCell(pointPartition.begin);
		LinearQuadtree::PointID endPoint_plus_one;
		// if this thread is the last one, no snapping required for the right point
		if (threadNr()==numThreads()-1)
			endPoint_plus_one = tree.numberOfPoints();
		else // find the left point of the next thread
			endPoint_plus_one = tree.findFirstPointInCell(pointPartition.end+1);
		// and calculate the number of points to prepare
		__uint32 numPointsToPrepare = endPoint_plus_one - beginPoint;

		// now we can prepare the snapped interval
		LinearQuadtreeBuilder builder(tree);
		// this function prepares the tree from begin point to endPoint_plus_one-1 (EXCLUDING endPoint_plus_one)
		builder.prepareTree(beginPoint, endPoint_plus_one);
		// save the start, end and count of the inner node chain in the context
		localContext->firstInnerNode = builder.firstInner;
		localContext->lastInnerNode = builder.lastInner;
		localContext->numInnerNodes = builder.numInnerNodes;
		// save the start, end and count of the leaf node chain in the context
		localContext->firstLeaf = builder.firstLeaf;
		localContext->lastLeaf = builder.lastLeaf;
		localContext->numLeaves = builder.numLeaves;
		// wait until all are finished
		sync();

		// now the main thread has to link the tree
		if (isMainThread())
		{
			// with his own builder
			LinearQuadtreeBuilder sbuilder(tree);
			// first we need the complete chain data
			sbuilder.firstInner = globalContext->pLocalContext[0]->firstInnerNode;
			sbuilder.firstLeaf = globalContext->pLocalContext[0]->firstLeaf;
			sbuilder.numInnerNodes = globalContext->pLocalContext[0]->numInnerNodes;
			sbuilder.numLeaves = globalContext->pLocalContext[0]->numLeaves;
			for (__uint32 j=1; j < numThreads(); j++)
			{
				sbuilder.numLeaves += globalContext->pLocalContext[j]->numLeaves;
				sbuilder.numInnerNodes += globalContext->pLocalContext[j]->numInnerNodes;
			};
			sbuilder.lastInner = globalContext->pLocalContext[numThreads()-1]->lastInnerNode;
			sbuilder.lastLeaf = globalContext->pLocalContext[numThreads()-1]->lastLeaf;
			// Link the tree
			sbuilder.build();
			// and run the partitions
			LQPartitioner partitioner(localContext);
			partitioner.partition();
		};
	};
	// wait for tree to finish
	sync();
	// now update the copy of the point data 
	for_loop(pointPartition, LQPointUpdateFunctor(localContext));
	// compute the nodes coordinates and sizes
	tree.forall_tree_nodes(LQCoordsFunctor(localContext), localContext->innerNodePartition.begin, localContext->innerNodePartition.numNodes)();
	tree.forall_tree_nodes(LQCoordsFunctor(localContext), localContext->leafPartition.begin, localContext->leafPartition.numNodes)();
};
Пример #5
0
void FMEMultipoleKernel::operator()(FMEGlobalContext* globalContext)
{
	__uint32					maxNumIterations    =  globalContext->pOptions->maxNumIterations;
	__uint32					minNumIterations    =  globalContext->pOptions->minNumIterations;
	__uint32					numPoints			=  globalContext->pQuadtree->numberOfPoints();
	ArrayGraph&					graph				= *globalContext->pGraph;
	LinearQuadtree&				tree				= *globalContext->pQuadtree;
	LinearQuadtreeExpansion&	treeExp				= *globalContext->pExpansion;
	WSPD&						wspd				= *globalContext->pWSPD;
	FMELocalContext*			localContext		= globalContext->pLocalContext[threadNr()];
	FMEGlobalOptions*			options				= globalContext->pOptions;
	float*						threadsForceArrayX	= localContext->forceX;
	float*						threadsForceArrayY	= localContext->forceY;
    float*						globalForceArrayX	= globalContext->globalForceX;
	float*						globalForceArrayY	= globalContext->globalForceY;

	ArrayPartition edgePartition = arrayPartition(graph.numEdges());
	ArrayPartition nodePointPartition = arrayPartition(graph.numNodes());

	m_pLocalContext = localContext;
	m_pGlobalContext = globalContext;
	/****************************/
	/* INIT						*/
	/****************************/
	//! reset the global force array 
	for_loop_array_set(threadNr(), numThreads(), globalForceArrayX, tree.numberOfPoints(), 0.0f);
	for_loop_array_set(threadNr(), numThreads(), globalForceArrayY, tree.numberOfPoints(), 0.0f);
	
	// reset the threads force array
	for (__uint32 i = 0; i < tree.numberOfPoints(); i++)
	{
		threadsForceArrayX[i] = 0.0f;
		threadsForceArrayY[i] = 0.0f;
	};

	__uint32 maxNumIt = options->preProcMaxNumIterations;
	for (__uint32 currNumIteration = 0; ((currNumIteration < maxNumIt) ); currNumIteration++)
	{
		// iterate over all edges and store the resulting forces in the threads array
		for_loop(edgePartition, 
			edge_force_function< EDGE_FORCE_DIV_DEGREE > (localContext)	// divide the forces by degree of the node to avoid oscilation						
		);
		// wait until all edges are done
		sync();
		// now collect the forces in parallel and put the sum into the global array and move the nodes accordingly
		for_loop(nodePointPartition, 
			func_comp(
				 collect_force_function<COLLECT_EDGE_FACTOR_PREP | COLLECT_ZERO_THREAD_ARRAY >(localContext),
				 node_move_function<TIME_STEP_PREP | ZERO_GLOBAL_ARRAY>(localContext)
			)
		);
	};
	if (isMainThread())
	{
		globalContext->coolDown = 1.0f;
	};
	sync();

	for (__uint32 currNumIteration = 0; ((currNumIteration < maxNumIterations) && !globalContext->earlyExit); currNumIteration++)
	{
		// reset the coefficients 
		for_loop_array_set(threadNr(), numThreads(), treeExp.m_multiExp, treeExp.m_numExp*(treeExp.m_numCoeff << 1), 0.0);
		for_loop_array_set(threadNr(), numThreads(), treeExp.m_localExp, treeExp.m_numExp*(treeExp.m_numCoeff << 1), 0.0);

		localContext->maxForceSq = 0.0;
		localContext->avgForce = 0.0;
		
		// construct the quadtree
		quadtreeConstruction(nodePointPartition);
		// wait for all threads to finish
		sync();

		if (isSingleThreaded()) // if is single threaded run the simple approximation
			multipoleApproxSingleThreaded(nodePointPartition);
		else // otherwise use the partitioning
			multipoleApproxFinal(nodePointPartition); 
		// now wait until all forces are summed up in the global array and mapped to graph node order
		sync();
		
		// run the edge forces
		for_loop(edgePartition,							// iterate over all edges and sum up the forces in the threads array 
			edge_force_function< EDGE_FORCE_DIV_DEGREE >(localContext)	// divide the forces by degree of the node to avoid oscilation
		);	
		// wait until edges are finished
		sync();

		// collect the edge forces and move nodes without waiting
		for_loop(nodePointPartition, 
			func_comp(
				 collect_force_function<COLLECT_EDGE_FACTOR | COLLECT_ZERO_THREAD_ARRAY>(localContext),
				 node_move_function<TIME_STEP_NORMAL | ZERO_GLOBAL_ARRAY>(localContext)
			)
		);
		// wait so we can decide if we need another iteration
		sync();
		// check the max force square for all threads
		if (isMainThread())
		{
			double maxForceSq = 0.0;
			for (__uint32 j=0; j < numThreads(); j++)
				maxForceSq = max(globalContext->pLocalContext[j]->maxForceSq, maxForceSq);

			// if we are allowed to quit and the max force sq falls under the threshold tell all threads we are done
			if ((currNumIteration >= minNumIterations) && (maxForceSq < globalContext->pOptions->stopCritForce ))
			{
				globalContext->earlyExit = true;
			};
		};
		// this is required to wait for the earlyExit result
		sync();
	};
};