int HBRuntime::unprotectSharedData(){ if(isSingleThreaded()){ return 0; } #ifdef _PROFILING uint64_t starttime = Util::copy_time(); #endif unprotectGlobals();/*Protect the global variables, log the differences.*/ //unprotect_heap(); Heap::getHeap()->unprotect_heap(); //unprotectHeap(); #ifdef _PROFILING uint64_t endtime = Util::copy_time(); me->protecttime += (endtime - starttime); #endif return 0; }
int HBRuntime::protectSharedData(){ if(isSingleThreaded()){ return 0; } #ifdef _PROFILING uint64_t starttime = Util::copy_time(); #endif DEBUG_MSG("Protect globals\n"); protectGlobals();/*Protect the global variables.*/ //protect_heap(); DEBUG_MSG("Protect heap\n"); Heap::getHeap()->protect_heap(); //protectHeap(); #ifdef _PROFILING uint64_t endtime = Util::copy_time(); me->protecttime += (endtime - starttime); #endif DEBUG_MSG("Heap protected\n"); return 0; }
int HBRuntime::threadCreate (pthread_t * pid, const pthread_attr_t * attr, void *(*fn) (void *), void * arg){ /** * Fixme: the tid assignment should be deterministic! using logical time! * */ //#ifdef NOTHING bool singlethread = isSingleThreaded(); Util::spinlock(&metadata->lock); thread_id_t tid = metadata->thread_slot; if(tid >= MAX_THREAD_NUM){ tid = INVALID_THREAD_ID; } else{ metadata->thread_slot ++; } Util::unlock(&metadata->lock); if(tid == INVALID_THREAD_ID){ tid = findTid(); } if(tid == INVALID_THREAD_ID){ VATAL_MSG("Too much thread: HBDet can only support thread number < %d\n", MAX_THREAD_NUM); exit(0); } *pid = tid; /*Initialize the thread struct*/ thread_info_t* thread = &metadata->threads[tid]; thread->start_routine = fn; thread->args = arg; thread->tid = tid; thread->vclock = me->vclock; thread->oldtime = me->vclock; /*Add this thread to the active list*/ me->insertToActiveList(thread); //printf("HBDet: before mmap!\n"); char* child_stack = (char *) mmap(NULL, STACK_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); if(child_stack == NULL){ fprintf(stderr, "HBDet: cannot allocate stack for child thread\n"); exit(0); } child_stack += STACK_SIZE; NORMAL_MSG("Thread %d using stack from %x to %x\n", tid, child_stack, child_stack + STACK_SIZE); int child = clone(thread_entry_point, child_stack, CLONE_FILES | CLONE_FS | CLONE_IO | SIGCHLD, thread); NORMAL_MSG("HBDet: clone thread(%d), pid = %d!\n", thread->tid, child); if(child == -1){ VATAL_MSG("HBRuntime: create thread error!\n"); exit(0); return -1; } thread->pid = child; //#else // DEBUG_MSG("call real_pthread_create(%x)\n", real_pthread_create); // real_pthread_create(pid, attr, thread_entry_point, thread); //#endif me->vclock.incClock(me->tid); //printf("--thread %d leave pthread_create\n", me->tid); if(singlethread){//changing from single thread to multithreads. DEBUG_MSG("In protectSharedData()\n"); protectSharedData(); DEBUG_MSG("Thread (%d): After protectSharedData()\n", me->tid); } else{ //TODO: takeSnapshot & flushLog //beginSlice(); } DEBUG_MSG("Thread (%d): After protectSharedData() ................\n", me->tid); NORMAL_MSG("Thread (%d) Create Thread (%d) OK!\n\n", me->tid, tid); return 0; }
void FMEMultipoleKernel::quadtreeConstruction(ArrayPartition& pointPartition) { FMELocalContext* localContext = m_pLocalContext; FMEGlobalContext* globalContext = m_pGlobalContext; LinearQuadtree& tree = *globalContext->pQuadtree; // precompute the bounding box for the quadtree points from the graph nodes for_loop(pointPartition, min_max_x_function(localContext)); for_loop(pointPartition, min_max_y_function(localContext)); // wait until the thread's bounding box is computed sync(); // let the main thread computed the bounding box of the bounding boxes if (isMainThread()) { globalContext->min_x = globalContext->pLocalContext[0]->min_x; globalContext->min_y = globalContext->pLocalContext[0]->min_y; globalContext->max_x = globalContext->pLocalContext[0]->max_x; globalContext->max_y = globalContext->pLocalContext[0]->max_y; for (__uint32 j=1; j < numThreads(); j++) { globalContext->min_x = min(globalContext->min_x, globalContext->pLocalContext[j]->min_x); globalContext->min_y = min(globalContext->min_y, globalContext->pLocalContext[j]->min_y); globalContext->max_x = max(globalContext->max_x, globalContext->pLocalContext[j]->max_x); globalContext->max_y = max(globalContext->max_y, globalContext->pLocalContext[j]->max_y); }; tree.init(globalContext->min_x, globalContext->min_y, globalContext->max_x, globalContext->max_y); globalContext->coolDown *= 0.999f; tree.clear(); }; // wait because the morton number computation needs the bounding box sync(); // udpate morton number to prepare them for sorting for_loop(pointPartition, LQMortonFunctor(localContext)); // wait so we can sort them by morton number sync(); #ifdef OGDF_FME_PARALLEL_QUADTREE_SORT // use a simple parallel sorting algorithm LinearQuadtree::LQPoint* points = tree.pointArray(); sort_parallel(points, tree.numberOfPoints(), LQPointComparer); #else if (isMainThread()) { LinearQuadtree::LQPoint* points = tree.pointArray(); sort_single(points, tree.numberOfPoints(), LQPointComparer); }; #endif // wait because the quadtree builder needs the sorted order sync(); // if not a parallel run, we can do the easy way if (isSingleThreaded()) { LinearQuadtreeBuilder builder(tree); // prepare the tree builder.prepareTree(); // and link it builder.build(); LQPartitioner partitioner( localContext ); partitioner.partition(); } else // the more difficult part { // snap the left point of the interval of the thread to the first in the cell LinearQuadtree::PointID beginPoint = tree.findFirstPointInCell(pointPartition.begin); LinearQuadtree::PointID endPoint_plus_one; // if this thread is the last one, no snapping required for the right point if (threadNr()==numThreads()-1) endPoint_plus_one = tree.numberOfPoints(); else // find the left point of the next thread endPoint_plus_one = tree.findFirstPointInCell(pointPartition.end+1); // and calculate the number of points to prepare __uint32 numPointsToPrepare = endPoint_plus_one - beginPoint; // now we can prepare the snapped interval LinearQuadtreeBuilder builder(tree); // this function prepares the tree from begin point to endPoint_plus_one-1 (EXCLUDING endPoint_plus_one) builder.prepareTree(beginPoint, endPoint_plus_one); // save the start, end and count of the inner node chain in the context localContext->firstInnerNode = builder.firstInner; localContext->lastInnerNode = builder.lastInner; localContext->numInnerNodes = builder.numInnerNodes; // save the start, end and count of the leaf node chain in the context localContext->firstLeaf = builder.firstLeaf; localContext->lastLeaf = builder.lastLeaf; localContext->numLeaves = builder.numLeaves; // wait until all are finished sync(); // now the main thread has to link the tree if (isMainThread()) { // with his own builder LinearQuadtreeBuilder sbuilder(tree); // first we need the complete chain data sbuilder.firstInner = globalContext->pLocalContext[0]->firstInnerNode; sbuilder.firstLeaf = globalContext->pLocalContext[0]->firstLeaf; sbuilder.numInnerNodes = globalContext->pLocalContext[0]->numInnerNodes; sbuilder.numLeaves = globalContext->pLocalContext[0]->numLeaves; for (__uint32 j=1; j < numThreads(); j++) { sbuilder.numLeaves += globalContext->pLocalContext[j]->numLeaves; sbuilder.numInnerNodes += globalContext->pLocalContext[j]->numInnerNodes; }; sbuilder.lastInner = globalContext->pLocalContext[numThreads()-1]->lastInnerNode; sbuilder.lastLeaf = globalContext->pLocalContext[numThreads()-1]->lastLeaf; // Link the tree sbuilder.build(); // and run the partitions LQPartitioner partitioner(localContext); partitioner.partition(); }; }; // wait for tree to finish sync(); // now update the copy of the point data for_loop(pointPartition, LQPointUpdateFunctor(localContext)); // compute the nodes coordinates and sizes tree.forall_tree_nodes(LQCoordsFunctor(localContext), localContext->innerNodePartition.begin, localContext->innerNodePartition.numNodes)(); tree.forall_tree_nodes(LQCoordsFunctor(localContext), localContext->leafPartition.begin, localContext->leafPartition.numNodes)(); };
void FMEMultipoleKernel::operator()(FMEGlobalContext* globalContext) { __uint32 maxNumIterations = globalContext->pOptions->maxNumIterations; __uint32 minNumIterations = globalContext->pOptions->minNumIterations; __uint32 numPoints = globalContext->pQuadtree->numberOfPoints(); ArrayGraph& graph = *globalContext->pGraph; LinearQuadtree& tree = *globalContext->pQuadtree; LinearQuadtreeExpansion& treeExp = *globalContext->pExpansion; WSPD& wspd = *globalContext->pWSPD; FMELocalContext* localContext = globalContext->pLocalContext[threadNr()]; FMEGlobalOptions* options = globalContext->pOptions; float* threadsForceArrayX = localContext->forceX; float* threadsForceArrayY = localContext->forceY; float* globalForceArrayX = globalContext->globalForceX; float* globalForceArrayY = globalContext->globalForceY; ArrayPartition edgePartition = arrayPartition(graph.numEdges()); ArrayPartition nodePointPartition = arrayPartition(graph.numNodes()); m_pLocalContext = localContext; m_pGlobalContext = globalContext; /****************************/ /* INIT */ /****************************/ //! reset the global force array for_loop_array_set(threadNr(), numThreads(), globalForceArrayX, tree.numberOfPoints(), 0.0f); for_loop_array_set(threadNr(), numThreads(), globalForceArrayY, tree.numberOfPoints(), 0.0f); // reset the threads force array for (__uint32 i = 0; i < tree.numberOfPoints(); i++) { threadsForceArrayX[i] = 0.0f; threadsForceArrayY[i] = 0.0f; }; __uint32 maxNumIt = options->preProcMaxNumIterations; for (__uint32 currNumIteration = 0; ((currNumIteration < maxNumIt) ); currNumIteration++) { // iterate over all edges and store the resulting forces in the threads array for_loop(edgePartition, edge_force_function< EDGE_FORCE_DIV_DEGREE > (localContext) // divide the forces by degree of the node to avoid oscilation ); // wait until all edges are done sync(); // now collect the forces in parallel and put the sum into the global array and move the nodes accordingly for_loop(nodePointPartition, func_comp( collect_force_function<COLLECT_EDGE_FACTOR_PREP | COLLECT_ZERO_THREAD_ARRAY >(localContext), node_move_function<TIME_STEP_PREP | ZERO_GLOBAL_ARRAY>(localContext) ) ); }; if (isMainThread()) { globalContext->coolDown = 1.0f; }; sync(); for (__uint32 currNumIteration = 0; ((currNumIteration < maxNumIterations) && !globalContext->earlyExit); currNumIteration++) { // reset the coefficients for_loop_array_set(threadNr(), numThreads(), treeExp.m_multiExp, treeExp.m_numExp*(treeExp.m_numCoeff << 1), 0.0); for_loop_array_set(threadNr(), numThreads(), treeExp.m_localExp, treeExp.m_numExp*(treeExp.m_numCoeff << 1), 0.0); localContext->maxForceSq = 0.0; localContext->avgForce = 0.0; // construct the quadtree quadtreeConstruction(nodePointPartition); // wait for all threads to finish sync(); if (isSingleThreaded()) // if is single threaded run the simple approximation multipoleApproxSingleThreaded(nodePointPartition); else // otherwise use the partitioning multipoleApproxFinal(nodePointPartition); // now wait until all forces are summed up in the global array and mapped to graph node order sync(); // run the edge forces for_loop(edgePartition, // iterate over all edges and sum up the forces in the threads array edge_force_function< EDGE_FORCE_DIV_DEGREE >(localContext) // divide the forces by degree of the node to avoid oscilation ); // wait until edges are finished sync(); // collect the edge forces and move nodes without waiting for_loop(nodePointPartition, func_comp( collect_force_function<COLLECT_EDGE_FACTOR | COLLECT_ZERO_THREAD_ARRAY>(localContext), node_move_function<TIME_STEP_NORMAL | ZERO_GLOBAL_ARRAY>(localContext) ) ); // wait so we can decide if we need another iteration sync(); // check the max force square for all threads if (isMainThread()) { double maxForceSq = 0.0; for (__uint32 j=0; j < numThreads(); j++) maxForceSq = max(globalContext->pLocalContext[j]->maxForceSq, maxForceSq); // if we are allowed to quit and the max force sq falls under the threshold tell all threads we are done if ((currNumIteration >= minNumIterations) && (maxForceSq < globalContext->pOptions->stopCritForce )) { globalContext->earlyExit = true; }; }; // this is required to wait for the earlyExit result sync(); }; };