int main(int argc, const char * argv[]){ omp_init_lock(&simple_lock); #pragma omp parallel num_threads(4) { while (!omp_test_lock(&simple_lock)){ printf("=== Hilo %d: bloqueo ocupado\n", omp_get_thread_num()); } printf("+++ Hilo %d: Consegui el bloque\n", omp_get_thread_num()); printf("--- Hilo %d: Libere el bloqueo\n", omp_get_thread_num()); omp_unset_lock(&simple_lock); } omp_destroy_lock(&simple_lock); }
void BasicMesh::ComputeNormals() { boost::timer::auto_cpu_timer timer("[BasicMesh] Normals computation time = %w seconds.\n"); norms.resize(faces.rows(), 3); vertex_norms.resize(verts.size(), 3); vertex_norms.setZero(); vector<double> area_sum(verts.size(), 0.0); omp_lock_t writelock; omp_init_lock(&writelock); #pragma omp parallel for for(int i=0;i<faces.rows();++i) { auto vidx0 = faces(i, 0); auto vidx1 = faces(i, 1); auto vidx2 = faces(i, 2); auto v0 = Vector3d(verts.row(vidx0)); auto v1 = Vector3d(verts.row(vidx1)); auto v2 = Vector3d(verts.row(vidx2)); auto v0v1 = v1 - v0; auto v0v2 = v2 - v0; auto n = v0v1.cross(v0v2); double area = n.norm(); omp_set_lock(&writelock); vertex_norms.row(vidx0) += n; vertex_norms.row(vidx1) += n; vertex_norms.row(vidx2) += n; area_sum[vidx0] += area; area_sum[vidx1] += area; area_sum[vidx2] += area; omp_unset_lock(&writelock); n.normalize(); norms.row(i) = n; } omp_destroy_lock(&writelock); #pragma omp parallel for for(int i=0;i<vertex_norms.rows();++i) { vertex_norms.row(i) /= area_sum[i]; } }
/** * FlatSourceRegion constructor */ FlatSourceRegion::FlatSourceRegion() { _material = NULL; _volume = 0.0; _quad_id = -1; /* Initializes region's other attributes */ for (int e = 0; e < NUM_ENERGY_GROUPS; e++) { _flux[e] = 0.0; _source[e] = 0.0; } #if USE_OPENMP omp_init_lock(&_flux_lock); #endif }
int initWorld(int world_size){ int i, j, z; #pragma omp parallel for private(i, j, z) schedule(guided, world_size/8) for(i=0; i < world_size; i++) for(j=0; j < world_size; j++){ for(z=0; z < 5; z++) world[i][j].conflicts[z]=NULL; world[i][j].type = empty; world[i][j].breed = 0; world[i][j].count=0; omp_init_lock(&(world[i][j].lock_count)); } return 0; }
void UnitigGraph::OutputInitUnitigs(FILE *contig_file, FILE *multi_file, std::map<int64_t, int> &histo) { vertexID_t output_id = 0; omp_lock_t output_lock; omp_init_lock(&output_lock); histo.clear(); #pragma omp parallel for for (vertexID_t i = 0; i < vertices_.size(); ++i) { uint16_t multi = std::min(kMaxMulti_t, int((double)vertices_[i].depth / vertices_[i].length + 0.5)); std::string label = VertexToDNAString(sdbg_, vertices_[i]); if (vertices_[i].is_loop) { omp_set_lock(&output_lock); fprintf(contig_file, ">contig%d_length_%ld_multi_%d_loop\n%s\n", output_id, label.length(), multi, label.c_str()); fwrite(&multi, sizeof(uint16_t), 1, multi_file); ++output_id; ++histo[label.length()]; omp_unset_lock(&output_lock); } else { int indegree = sdbg_->Indegree(vertices_[i].start_node); int outdegree = sdbg_->Outdegree(vertices_[i].end_node); if (indegree == 0 && outdegree == 0) { vertices_[i].is_deleted = true; } omp_set_lock(&output_lock); fprintf(contig_file, ">contig%d_length_%ld_multi_%d_in_%d_out_%d\n%s\n", output_id, label.length(), multi, indegree, outdegree, label.c_str()); fwrite(&multi, sizeof(uint16_t), 1, multi_file); ++output_id; ++histo[label.length()]; omp_unset_lock(&output_lock); } } omp_destroy_lock(&output_lock); }
int main (int argc, char *argv[]) { int id, suma, arreglo[TAMARR]; omp_lock_t C1; omp_init_lock(&C1); omp_set_lock(&C1); omp_set_num_threads(NHILOS); suma=0; #pragma omp parallel private(id) shared(suma) { id = omp_get_thread_num(); suma=suma + 1; omp_unset_lock(&C1); } printf("\nbye, suma total=%d, id:%d\n\n",suma,id); sleep(1); return 0; }
int main(int argc, char **argv) { int i; struct timeval startTime, endTime; if (argc != 3) usage(argv[0]); // Record start of total time gettimeofday(&startTime, NULL); // Initialize global variables and data-structures. P = atoi(argv[1]); initialize(argv[2], &cells, &constraints); nodeCount = 0; found = 0; jobQueues = (job_queue_t*)calloc(sizeof(job_queue_t), P); if (!jobQueues) unixError("Failed to allocated memory for the job queues"); for (i = 0; i < P; i++) omp_init_lock(&(jobQueues[i].headLock)); // Add initial job (nothing assigned) to root processor jobQueues[0].tail = 1; runParallel(P); if (!found) appError("No solution found"); // Use final time to calculate total time gettimeofday(&endTime, NULL); totalTime = TIME_DIFF(endTime, startTime); // Print out number of nodes visited and calculated times printf("Nodes Visited: %lld\n", nodeCount); printf("Computation Time = %.3f millisecs\n", compTime); printf(" Total Time = %.3f millisecs\n", totalTime); return 0; }
struct frame dailyOne(struct cortage task) { int i, j, i0, j0, currentSum; int maxSum = A[0][0]; int maxI0 = 0, maxJ0 = 0, maxI1 = 0, maxJ1 = 0; omp_lock_t lock; omp_init_lock(&lock); int **H = A; for (i = 0; i < task.M; i++) for (j = 0; j < task.N; j++) H[i][j] = (i ? H[i-1][j] : 0) + (j ? H[i][j-1] : 0) + A[i][j] - ((i && j) ? H[i-1][j-1] : 0); #pragma omp parallel for private(j, j0, i0, currentSum) shared(maxSum) for (i = 0; i < task.M; i++) for (j = 0; j < task.N; j++) for (i0 = 0; i0 <= i; i0++) for (j0 = 0; j0 <= j; j0++) { currentSum = H[i][j] - ( i0 ? H[i0 - 1][j] : 0 ) - ( j0 ? H[i][j0 - 1] : 0 ) + ((i0 && j0) ? H[i0-1][j0-1] : 0); if (currentSum > maxSum) { omp_set_lock(&lock); maxSum = currentSum; maxI0 = i0; maxJ0 = j0; maxI1 = i; maxJ1 = j; omp_unset_lock(&lock); } } struct frame result = { maxI0, maxJ0, maxI1, maxJ1, maxSum }; return result; }
int main(int argc, char *argv[]) { double start1,start2,end1,end2; int r; int i; /* Init locks */ for(i=0;i<MAX_PROCS;i++) omp_init_lock(&(remaining_iters_lock[i])); init1(); start1 = omp_get_wtime(); for (r=0; r<reps; r++){ runloop(1); } end1 = omp_get_wtime(); valid1(); printf("Total time for %d reps of loop 1 = %f\n",reps, (float)(end1-start1)); init2(); start2 = omp_get_wtime(); for (r=0; r<reps; r++){ runloop(2); } end2 = omp_get_wtime(); valid2(); printf("Total time for %d reps of loop 2 = %f\n",reps, (float)(end2-start2)); for(i=0;i<MAX_PROCS;i++) omp_destroy_lock(&(remaining_iters_lock[i])); }
int main() { omp_init_lock(&mylock); #pragma omp parallel { #pragma omp sections { #pragma omp section { omp_set_lock(&mylock); sleep(1); printf("[%d] 1. Hello world\n", omp_get_thread_num()); omp_unset_lock(&mylock); } #pragma omp section { omp_set_lock(&mylock); sleep(1); printf("[%d] 2. Hello world\n", omp_get_thread_num()); omp_unset_lock(&mylock); } #pragma omp section { omp_set_lock(&mylock); sleep(1); printf("[%d] 3. Hello world\n", omp_get_thread_num()); omp_unset_lock(&mylock); } #pragma omp section { omp_set_lock(&mylock); sleep(1); printf("[%d] 4. Hello world\n", omp_get_thread_num()); omp_unset_lock(&mylock); } } /* sections */ } /* parallel */ omp_destroy_lock(&mylock); return 0; }
/** * @brief Initializes Cmfd object for acceleration prior to source iteration. * @details Instantiates a dummy Cmfd object if one was not assigned to * the Solver by the user and initializes FSRs, Materials, fluxes * and the Mesh. This method intializes a global array for the * surface currents. */ void CPUSolver::initializeCmfd() { /* Call parent class method */ Solver::initializeCmfd(); /* Delete old Cmfd Mesh surface currents array it it exists */ if (_surface_currents != NULL) delete [] _surface_currents; int size; /* Allocate memory for the Cmfd Mesh surface currents array */ try{ /* Allocate an array for the Cmfd Mesh surface currents */ if (_cmfd->getMesh()->getCmfdOn()){ size = _num_mesh_cells * _cmfd->getNumCmfdGroups() * 8; log_printf(NORMAL, "creating surface currents of size: %i", size); _surface_currents = new double[size]; } } catch(std::exception &e) { log_printf(ERROR, "Could not allocate memory for the Solver's Cmfd " "Mesh surface currents. Backtrace:%s", e.what()); } if (_cmfd->getMesh()->getCmfdOn()){ /* Initialize an array of OpenMP locks for each Cmfd Mesh surface */ _mesh_surface_locks = new omp_lock_t[_cmfd->getMesh()->getNumCells() * 8]; /* Loop over all mesh cells to initialize OpenMP locks */ #pragma omp parallel for schedule(guided) for (int r=0; r < _num_mesh_cells*8; r++) omp_init_lock(&_mesh_surface_locks[r]); } return; }
FragilityFn::FragilityFn(std::vector<LogNormalDist> onsets) { omp_init_lock(&lock); omp_set_lock(&lock); if (onsets.size() == 0) { omp_unset_lock(&lock); throw std::invalid_argument("onsets"); } else { damage_states.resize(onsets.size()); double mean = NAN; for (unsigned int i=0; i < onsets.size(); i++) { if (onsets[i].get_mu_lnX() <= mean) { throw std::invalid_argument("onsets"); break; } else { damage_states[i] = onsets[i]; mean = damage_states[i].get_mu_lnX(); } } } omp_unset_lock(&lock); };
StructuredSVM::StructuredSVM() { omp_init_lock(&my_lock); InitTrainParams(¶ms); stats = new StructuredSVMStatistics(this); chooser = new StructuredSVMExampleChooser(this); sizePsi = 0; t = 0; sum_w = NULL; u_i_buff = NULL; trainfile = modelfile = NULL; numCacheIters = 0; numExampleIds = 0; exampleIdsToIndices = NULL; exampleIndicesToIds = NULL; base_time = 0; runForever = false; hasConverged = false; n = 0; finished = false; nextUpdateInd = 0; useFixedSampleSet = false; isMultiSample = false; validationfile = NULL; trainset = NULL; regularization_error = 0; sum_dual = 0; sum_alpha_loss = 0; sum_w_sqr = 0; sum_w_scale = 1; }
/* Implements using omp lock functions * */ void foo_locks(long long int n) { long long int a=0; long long int i; omp_lock_t my_lock; // init lock omp_init_lock(&my_lock); double time = omp_get_wtime(); #pragma omp parallel for schedule(static) shared(a) for(i = 0; i < n; i++) { omp_set_lock(&my_lock); a+=1; omp_unset_lock(&my_lock); } omp_destroy_lock(&my_lock); time = omp_get_wtime() - time; printf("Final value = %d \n ", a); printf("Locks: Total time = %f seconds \n ", time); } // end foo_locks
triangulation triangulate_cube_random(data_list * data) { int dim = data_list_dim(data); cube_points cube = gen_cube_points(dim); triangulation result; omp_lock_t result_lock; omp_init_lock(&result_lock); //If we found a triangulation, use this lock! facet_acute_data parameters; //Parameters for conform_ triangulation tmp_triang; //Triangulation we are expanding in current thread ptetra tet_list; //List of tetrahedrons, used in the parallel section unsigned short tet_list_len; //Holds the length of this list int triangulation_found = 0; //Stop if one of the threads has found a triangulation! int rand_bound, i; unsigned short tet_max, tet_min, tet_rand, tet_add; size_t max_volume; //Start the parallel loop! #pragma omp parallel default(none) \ private(parameters, tmp_triang, tet_list, tet_list_len, rand_bound, i,max_volume,tet_max, tet_min, tet_rand, tet_add) \ shared(result, result_lock, cube,data,dim, triangulation_found) { //Initalization for each thread parameters.cube = &cube; parameters.boundary_func = &triangle_boundary_cube; parameters.data = data; parameters.store_acute_ind = 1; parameters.acute_ind = malloc(sizeof(vert_index) * cube.len); tet_list = malloc(sizeof(tetra) * cube.len); max_volume = 0; while (!triangulation_found) { //Not found a triangulation //Initalize the triangulation variables tmp_triang = triangulation_init(dim); tet_list_len = 0; //Start triangle (0,0,0), (rand,0,0), (rand,rand,0) tmp_triang.bound_len = 1; tmp_triang.bound_tri = triangulation_start_facet(data); //printf("Thread %d with iteration %zu starts with:\n", omp_get_thread_num(), ++iterations); //print_triangle(tmp_triang.bound_tri); //While we have triangles on the boundary while (tmp_triang.bound_len > 0) { /* * We are going to add a tetrahedron on the boundary triangle. * To do so, we select a random triangle on the boundary. Then we generate all the * acute tetrahedra (above and below) with facets in our possible list. * From this list we remove all the tetrahedrons that intersect with our current triangulation. * Then we add a random tetrahedron to our triangulation and repeat. */ rand_bound = rand() % tmp_triang.bound_len; // //Calculate the conform tetrahedrons above and below if (!facet_conform(tmp_triang.bound_tri + rand_bound, ¶meters)) break; //Triangle on the boundary that does not have a conform facet tet_list_len = parameters.acute_ind_len; //Form explicit list of the tetrahedrons for (i = 0; i < tet_list_len; i++) { copyArr3(tet_list[i].vertices[0], tmp_triang.bound_tri[rand_bound].vertices[0]); copyArr3(tet_list[i].vertices[1], tmp_triang.bound_tri[rand_bound].vertices[1]); copyArr3(tet_list[i].vertices[2], tmp_triang.bound_tri[rand_bound].vertices[2]); copyArr3(tet_list[i].vertices[3], cube.points[parameters.acute_ind[i]]); } //Remove all the tetrahedrons that intersect with current triangulation. filter_tet_list_disjoint_triangulation(tet_list, &tet_list_len, &tmp_triang); if (tet_list_len == 0) break; //We can not find a conform tetrahedron for this boundary.. Restart //Select a ttetrahedron from the tet_list to add to the triangulation.. Different approaches. //Combinations between: random tetra, smallest volume, maximum volume. Indices stored in tet_max, tet_min and tet_rand tet_list_min_max_volume(tet_list, tet_list_len, &tet_max, &tet_min); tet_rand = rand() % tet_list_len; switch (omp_get_thread_num() % 6) { case 0: //Choose tet with max volume tet_add = tet_max; break; case 1: //Choose tet with min volume tet_add = tet_min; break; case 2: //Choose random tet tet_add = tet_rand; break; case 3: //Choose either max or min (random) tet_add = (rand() % 2)? tet_min : tet_max; break; case 4: //Choose either max or rand tet_add = (rand() % 5)? tet_max : tet_rand; break; case 5: //Either min or rand tet_add = (rand() % 5)? tet_min : tet_rand; break; default: tet_add = 0; } /* * Add the above tetra to the triangulation. * This removes all the boundary triangles that are covered by this tetrahedron */ add_tet_triangulation(tet_list + tet_add,&tmp_triang); } if (triangulation_volume(&tmp_triang) > max_volume) { max_volume = triangulation_volume(&tmp_triang); printf("Record for thread %d using method %d amount: %zu\n", omp_get_thread_num(), omp_get_thread_num() % 6, max_volume); triangulation_print(&tmp_triang); } if (tmp_triang.bound_len == 0) { printf("FOUND A TRIANGULATION!!!\n"); triangulation_print(&tmp_triang); omp_set_lock(&result_lock); result = tmp_triang; triangulation_found = 1; omp_unset_lock(&result_lock); } else triangulation_free(&tmp_triang); } free(parameters.acute_ind); free(tet_list); } free(cube.points); omp_destroy_lock(&result_lock); return result; }
void Bucket_init(Bucket* self) { self->used = 0; self->size = 4; self->data = (float*) malloc(self->size * sizeof(float)); omp_init_lock(&self->lock); }
int kdFoF(KD kd,float fEps) { PARTICLE *p; KDN *c; int pi,pj,pn,cp; int iGroup; int *Fifo,iHead,iTail,nFifo; float fEps2; float dx,dy,dz,x,y,z,lx,ly,lz,sx,sy,sz,fDist2; #ifdef _OPENMP int idSelf; omp_lock_t *locks; for (pn=0;pn<kd->nActive;++pn) kd->p[pn].iTouched = -1; /* We really want to make an independent lock for each particle. However, each lock * seems to use a buttload of memory (something like 312 bytes per lock). Therefore, * to ensure that we don't use too much memory, only use 1 lock per 100 particles. * This should still provide very low lock contention while not using oodles of * memory at the same time, since it is extremely rare that two threads will be looking * two particles that map to the same lock at the same time.*/ kd->nHash = (int)(kd->nActive/100); locks = (omp_lock_t *)malloc(kd->nHash*sizeof(omp_lock_t)); assert(locks != NULL); for (pn=0;pn<kd->nHash;++pn) omp_init_lock(&locks[pn]); #endif p = kd->p; c = kd->kdNodes; lx = kd->fPeriod[0]; ly = kd->fPeriod[1]; lz = kd->fPeriod[2]; fEps2 = fEps*fEps; for (pn=0;pn<kd->nActive;++pn) p[pn].iGroup = 0; #pragma omp parallel default(none) shared(kd,locks,p,c,lx,ly,lz,fEps2) \ private(pi,pj,pn,cp,iGroup,Fifo,iHead,iTail,dx,dy,dz,x,y,z,sx,sy,sz,fDist2,idSelf,nFifo) { #ifdef _OPENMP nFifo = kd->nActive/omp_get_num_threads(); idSelf = omp_get_thread_num(); #else nFifo = kd->nActive; #endif Fifo = (int *)malloc(nFifo*sizeof(int)); assert(Fifo != NULL); iHead = 0; iTail = 0; iGroup = 0; #pragma omp for schedule(runtime) for (pn=0;pn<kd->nActive;++pn) { if (p[pn].iGroup) continue; /* ** Mark it and add to the do-fifo. */ #ifdef _OPENMP omp_set_lock(&locks[_hashLock(kd,pn)]); if (p[pn].iTouched >= 0 && p[pn].iTouched < idSelf ) { assert(p[pn].iGroup > 0); omp_unset_lock(&locks[_hashLock(kd,pn)]); continue; } p[pn].iTouched = idSelf; iGroup = pn+1; p[pn].iGroup = iGroup; omp_unset_lock(&locks[_hashLock(kd,pn)]); #else ++iGroup; p[pn].iGroup = iGroup; #endif Fifo[iTail++] = pn; if (iTail == nFifo) iTail = 0; while (iHead != iTail) { pi = Fifo[iHead++]; if (iHead == nFifo) iHead = 0; /* ** Now do an fEps-Ball Gather! */ x = p[pi].r[0]; y = p[pi].r[1]; z = p[pi].r[2]; cp = ROOT; while (1) { INTERSECT(c,cp,fEps2,lx,ly,lz,x,y,z,sx,sy,sz); /* ** We have an intersection to test. */ if (c[cp].iDim >= 0) { cp = LOWER(cp); continue; } else { for (pj=c[cp].pLower;pj<=c[cp].pUpper;++pj) { #ifdef _OPENMP if (p[pj].iGroup == iGroup) { /* We have already looked at this particle */ //assert(p[pj].iTouched == idSelf); particle is not locked. continue; } if (p[pj].iTouched >= 0 && p[pj].iTouched < idSelf) { /* Somebody more important than us is already looking at this * particle. However, we do not yet know if this particle belongs * in our group, so just skip it to save time but don't restart the * entire group. */ // assert(p[pj].iGroup > 0); particle is not locked continue; } #else if (p[pj].iGroup) continue; #endif dx = sx - p[pj].r[0]; dy = sy - p[pj].r[1]; dz = sz - p[pj].r[2]; fDist2 = dx*dx + dy*dy + dz*dz; if (fDist2 < fEps2) { /* ** Mark it and add to the do-fifo. */ #ifdef _OPENMP omp_set_lock(&locks[_hashLock(kd,pj)]); if (p[pj].iTouched >= 0 && p[pj].iTouched < idSelf) { /* Now we know this particle should be in our group. If somebody more * important than us touched it, about the entire group. */ assert(p[pj].iGroup > 0); omp_unset_lock(&locks[_hashLock(kd,pj)]); iHead = iTail; /*printf("Thread %d: Aborting group %d. p[%d].iOrder p.iGroup=%d p.iTouched=%d (Per-Particle2)\n", idSelf, iGroup, pj, p[pj].iOrder, p[pj].iGroup, p[pj].iTouched);*/ goto RestartSnake; } p[pj].iTouched = idSelf; p[pj].iGroup = iGroup; omp_unset_lock(&locks[_hashLock(kd,pj)]); #else p[pj].iGroup = iGroup; #endif Fifo[iTail++] = pj; if (iTail == nFifo) iTail = 0; } } SETNEXT(cp); if (cp == ROOT) break; continue; } ContainedCell: for (pj=c[cp].pLower;pj<=c[cp].pUpper;++pj) { #ifdef _OPENMP if (p[pj].iGroup == iGroup) continue; if (p[pj].iTouched >= 0 && p[pj].iTouched < idSelf) { /* Somebody more important that us is already looking at this * group. Abort this entire group! */ //assert(p[pj].iGroup > 0); particle is not locked iHead = iTail; /*printf("Thread %d: Aborting group %d. p[%d].iOrder=%d p.iGroup=%d p.iTouched=%d (Per-Cell1)\n", idSelf, iGroup, pj, p[pj].iOrder, p[pj].iGroup, p[pj].iTouched);*/ goto RestartSnake; } #else if (p[pj].iGroup) continue; #endif /* ** Mark it and add to the do-fifo. */ #ifdef _OPENMP omp_set_lock(&locks[_hashLock(kd,pj)]); if (p[pj].iTouched >= 0 && p[pj].iTouched < idSelf) { /* Check again in case somebody touched it before the lock. */ assert(p[pj].iGroup > 0); omp_unset_lock(&locks[_hashLock(kd,pj)]); iHead = iTail; /*printf("Thread %d: Aborting group %d. p[%d].iGroup=%d p[%d].iTouched=%d (Per-Cell2)\n", idSelf, iGroup, pj, p[pj].iGroup, pj, p[pj].iTouched);*/ goto RestartSnake; } p[pj].iTouched = idSelf; p[pj].iGroup = iGroup; omp_unset_lock(&locks[_hashLock(kd,pj)]); #else p[pj].iGroup = iGroup; #endif Fifo[iTail++] = pj; if (iTail == nFifo) iTail = 0; } GetNextCell: SETNEXT(cp); if (cp == ROOT) break; } } /* End while(iHead != iTail) */ #ifdef _OPENMP RestartSnake: #endif assert(iHead == iTail); } free(Fifo); } /* End of the OpenMP PARALLEL section */ #ifdef _OPENMP /* Now we have count how many groups there are. This is straightforward, * since the number of groups is the number of particles whose groupID equals * their particleID+1. */ pj = 0; for (pn=0;pn<kd->nActive;++pn) if (p[pn].iGroup == pn+1) ++pj; kd->nGroup = (kd->nActive)+1; free(locks); #else kd->nGroup = iGroup+1; #endif return(kd->nGroup-1); }
OpenMPCounter::OpenMPCounter(size_type init) : _counter(init), destroyed(false) { omp_init_lock(&_lock); }
OpenMPCounter::OpenMPCounter() : _counter(0), destroyed(false) { omp_init_lock(&_lock); }
void vertex_betweenness_centrality_parBFS(graph_t* G, double* BC, long numSrcs) { attr_id_t *S; /* stack of vertices in the order of non-decreasing distance from s. Also used to implicitly represent the BFS queue */ plist_t* P; /* predecessors of a vertex v on shortest paths from s */ double* sig; /* No. of shortest paths */ attr_id_t* d; /* Length of the shortest path between every pair */ double* del; /* dependency of vertices */ attr_id_t *in_degree, *numEdges, *pSums; attr_id_t* pListMem; #if RANDSRCS attr_id_t* Srcs; #endif attr_id_t *start, *end; long MAX_NUM_PHASES; attr_id_t *psCount; #ifdef _OPENMP omp_lock_t* vLock; long chunkSize; #endif #ifdef DIAGNOSTIC double elapsed_time; #endif int seed = 2387; #ifdef _OPENMP #pragma omp parallel firstprivate(G) { #endif attr_id_t *myS, *myS_t; attr_id_t myS_size; long i, j, k, p, count, myCount; long v, w, vert; long k0, k1; long numV, num_traversals, n, m, phase_num; long start_iter, end_iter; long tid, nthreads; int* stream; #ifdef DIAGNOSTIC double elapsed_time_part; #endif #ifdef _OPENMP int myLock; tid = omp_get_thread_num(); nthreads = omp_get_num_threads(); #else tid = 0; nthreads = 1; #endif #ifdef DIAGNOSTIC if (tid == 0) { elapsed_time = get_seconds(); elapsed_time_part = get_seconds(); } #endif /* numV: no. of vertices to run BFS from = numSrcs */ numV = numSrcs; n = G->n; m = G->m; /* Permute vertices */ if (tid == 0) { #if RANDSRCS Srcs = (attr_id_t *) malloc(n*sizeof(attr_id_t)); #endif #ifdef _OPENMP vLock = (omp_lock_t *) malloc(n*sizeof(omp_lock_t)); #endif } #ifdef _OPENMP #pragma omp barrier #pragma omp for for (i=0; i<n; i++) { omp_init_lock(&vLock[i]); } #endif /* Initialize RNG stream */ stream = init_sprng(0, tid, nthreads, seed, SPRNG_DEFAULT); #if RANDSRCS #ifdef _OPENMP #pragma omp for #endif for (i=0; i<n; i++) { Srcs[i] = i; } #ifdef _OPENMP #pragma omp for #endif for (i=0; i<n; i++) { j = n * sprng(stream); if (i != j) { #ifdef _OPENMP int l1 = omp_test_lock(&vLock[i]); if (l1) { int l2 = omp_test_lock(&vLock[j]); if (l2) { #endif k = Srcs[i]; Srcs[i] = Srcs[j]; Srcs[j] = k; #ifdef _OPENMP omp_unset_lock(&vLock[j]); } omp_unset_lock(&vLock[i]); } #endif } } #endif #ifdef _OPENMP #pragma omp barrier #endif if (tid == 0) { MAX_NUM_PHASES = 500; } #ifdef _OPENMP #pragma omp barrier #endif /* Initialize predecessor lists */ /* The size of the predecessor list of each vertex is bounded by its in-degree. So we first compute the in-degree of every vertex */ if (tid == 0) { P = (plist_t *) calloc(n, sizeof(plist_t)); in_degree = (attr_id_t *) calloc(n+1, sizeof(attr_id_t)); numEdges = (attr_id_t *) malloc((n+1)*sizeof(attr_id_t)); pSums = (attr_id_t *) malloc(nthreads*sizeof(attr_id_t)); } #ifdef _OPENMP #pragma omp barrier #pragma omp for #endif for (i=0; i<m; i++) { v = G->endV[i]; #ifdef _OPENMP omp_set_lock(&vLock[v]); #endif in_degree[v]++; #ifdef _OPENMP omp_unset_lock(&vLock[v]); #endif } prefix_sums(in_degree, numEdges, pSums, n); if (tid == 0) { pListMem = (attr_id_t *) malloc(m*sizeof(attr_id_t)); } #ifdef _OPENMP #pragma omp barrier #pragma omp for #endif for (i=0; i<n; i++) { P[i].list = pListMem + numEdges[i]; P[i].degree = in_degree[i]; P[i].count = 0; } #ifdef DIAGNOSTIC if (tid == 0) { elapsed_time_part = get_seconds() -elapsed_time_part; fprintf(stderr, "In-degree computation time: %lf seconds\n", elapsed_time_part); elapsed_time_part = get_seconds(); } #endif /* Allocate shared memory */ if (tid == 0) { free(in_degree); free(numEdges); free(pSums); S = (attr_id_t *) malloc(n*sizeof(attr_id_t)); sig = (double *) malloc(n*sizeof(double)); d = (attr_id_t *) malloc(n*sizeof(attr_id_t)); del = (double *) calloc(n, sizeof(double)); start = (attr_id_t *) malloc(MAX_NUM_PHASES*sizeof(attr_id_t)); end = (attr_id_t *) malloc(MAX_NUM_PHASES*sizeof(attr_id_t)); psCount = (attr_id_t *) malloc((nthreads+1)*sizeof(attr_id_t)); } /* local memory for each thread */ myS_size = (2*n)/nthreads; myS = (attr_id_t *) malloc(myS_size*sizeof(attr_id_t)); num_traversals = 0; myCount = 0; #ifdef _OPENMP #pragma omp barrier #endif #ifdef _OPENMP #pragma omp for #endif for (i=0; i<n; i++) { d[i] = -1; } #ifdef DIAGNOSTIC if (tid == 0) { elapsed_time_part = get_seconds() - elapsed_time_part; fprintf(stderr, "BC initialization time: %lf seconds\n", elapsed_time_part); elapsed_time_part = get_seconds(); } #endif for (p=0; p<n; p++) { #if RANDSRCS i = Srcs[p]; #else i = p; #endif if (G->numEdges[i+1] - G->numEdges[i] == 0) { continue; } else { num_traversals++; } if (num_traversals == numV + 1) { break; } if (tid == 0) { sig[i] = 1; d[i] = 0; S[0] = i; start[0] = 0; end[0] = 1; } count = 1; phase_num = 0; #ifdef _OPENMP #pragma omp barrier #endif while (end[phase_num] - start[phase_num] > 0) { myCount = 0; start_iter = start[phase_num]; end_iter = end[phase_num]; #ifdef _OPENMP #pragma omp barrier #pragma omp for schedule(dynamic) nowait #endif for (vert = start_iter; vert < end_iter; vert++) { v = S[vert]; for (j=G->numEdges[v]; j<G->numEdges[v+1]; j++) { w = G->endV[j]; if (v != w) { #ifdef _OPENMP myLock = omp_test_lock(&vLock[w]); if (myLock) { #endif /* w found for the first time? */ if (d[w] == -1) { if (myS_size == myCount) { /* Resize myS */ myS_t = (attr_id_t *) malloc(2*myS_size*sizeof(attr_id_t)); memcpy(myS_t, myS, myS_size*sizeof(attr_id_t)); free(myS); myS = myS_t; myS_size = 2*myS_size; } myS[myCount++] = w; d[w] = d[v] + 1; sig[w] = sig[v]; P[w].list[P[w].count++] = v; } else if (d[w] == d[v] + 1) { sig[w] += sig[v]; P[w].list[P[w].count++] = v; } #ifdef _OPENMP omp_unset_lock(&vLock[w]); } else { if ((d[w] == -1) || (d[w] == d[v]+ 1)) { omp_set_lock(&vLock[w]); sig[w] += sig[v]; P[w].list[P[w].count++] = v; omp_unset_lock(&vLock[w]); } } #endif } } } /* Merge all local stacks for next iteration */ phase_num++; if (tid == 0) { if (phase_num >= MAX_NUM_PHASES) { fprintf(stderr, "Error: Max num phases set to %ld\n", MAX_NUM_PHASES); fprintf(stderr, "Diameter of input network greater than" " this value. Increase MAX_NUM_PHASES" " in vertex_betweenness_centrality_parBFS()\n"); exit(-1); } } psCount[tid+1] = myCount; #ifdef _OPENMP #pragma omp barrier #endif if (tid == 0) { start[phase_num] = end[phase_num-1]; psCount[0] = start[phase_num]; for(k=1; k<=nthreads; k++) { psCount[k] = psCount[k-1] + psCount[k]; } end[phase_num] = psCount[nthreads]; } #ifdef _OPENMP #pragma omp barrier #endif k0 = psCount[tid]; k1 = psCount[tid+1]; for (k = k0; k < k1; k++) { S[k] = myS[k-k0]; } count = end[phase_num]; } phase_num--; while (phase_num > 0) { start_iter = start[phase_num]; end_iter = end[phase_num]; #ifdef _OPENMP #pragma omp for schedule(static) nowait #endif for (j=start_iter; j<end_iter; j++) { w = S[j]; for (k = 0; k<P[w].count; k++) { v = P[w].list[k]; #ifdef _OPENMP omp_set_lock(&vLock[v]); #endif del[v] = del[v] + sig[v]*(1+del[w])/sig[w]; #ifdef _OPENMP omp_unset_lock(&vLock[v]); #endif } BC[w] += del[w]; } phase_num--; #ifdef _OPENMP #pragma omp barrier #endif } #ifdef _OPENMP chunkSize = n/nthreads; #pragma omp for schedule(static, chunkSize) nowait #endif for (j=0; j<count; j++) { w = S[j]; d[w] = -1; del[w] = 0; P[w].count = 0; } #ifdef _OPENMP #pragma omp barrier #endif } #ifdef DIAGNOSTIC if (tid == 0) { elapsed_time_part = get_seconds() - elapsed_time_part; fprintf(stderr, "BC computation time: %lf seconds\n", elapsed_time_part); } #endif #ifdef _OPENMP #pragma omp barrier #endif #ifdef _OPENMP #pragma omp for for (i=0; i<n; i++) { omp_destroy_lock(&vLock[i]); } #endif free(myS); if (tid == 0) { free(S); free(pListMem); free(P); free(sig); free(d); free(del); #ifdef _OPENMP free(vLock); #endif free(start); free(end); free(psCount); #ifdef DIAGNOSTIC elapsed_time = get_seconds() - elapsed_time; fprintf(stderr, "Time taken: %lf\n seconds", elapsed_time); #endif #if RANDSRCS free(Srcs); #endif } free_sprng(stream); #ifdef _OPENMP } #endif }
void vertex_betweenness_centrality_simple(graph_t* G, double* BC, long numSrcs) { attr_id_t *in_degree, *numEdges, *pSums; #if RANDSRCS attr_id_t* Srcs; #endif long num_traversals = 0; #ifdef _OPENMP omp_lock_t* vLock; long chunkSize; #endif #ifdef DIAGNOSTIC double elapsed_time; #endif int seed = 2387; /* The outer loop is parallelized in this case. Each thread does a BFS and the vertex BC values are incremented atomically */ #ifdef _OPENMP #pragma omp parallel firstprivate(G) { #endif attr_id_t *S; /* stack of vertices in the order of non-decreasing distance from s. Also used to implicitly represent the BFS queue */ plist_t* P; /* predecessors of a vertex v on shortest paths from s */ attr_id_t* pListMem; double* sig; /* No. of shortest paths */ attr_id_t* d; /* Length of the shortest path between every pair */ double* del; /* dependency of vertices */ attr_id_t *start, *end; long MAX_NUM_PHASES; long i, j, k, p, count; long v, w, vert; long numV, n, m, phase_num; long tid, nthreads; int* stream; #ifdef DIAGNOSTIC double elapsed_time_part; #endif #ifdef _OPENMP int myLock; tid = omp_get_thread_num(); nthreads = omp_get_num_threads(); #else tid = 0; nthreads = 1; #endif #ifdef DIAGNOSTIC if (tid == 0) { elapsed_time = get_seconds(); elapsed_time_part = get_seconds(); } #endif /* numV: no. of vertices to run BFS from = numSrcs */ numV = numSrcs; n = G->n; m = G->m; /* Permute vertices */ if (tid == 0) { #if RANDSRCS Srcs = (attr_id_t *) malloc(n*sizeof(attr_id_t)); #endif #ifdef _OPENMP vLock = (omp_lock_t *) malloc(n*sizeof(omp_lock_t)); #endif } #ifdef _OPENMP #pragma omp barrier #pragma omp for for (i=0; i<n; i++) { omp_init_lock(&vLock[i]); } #endif /* Initialize RNG stream */ stream = init_sprng(0, tid, nthreads, seed, SPRNG_DEFAULT); #if RANDSRCS #ifdef _OPENMP #pragma omp for #endif for (i=0; i<n; i++) { Srcs[i] = i; } #ifdef _OPENMP #pragma omp for #endif for (i=0; i<n; i++) { j = n * sprng(stream); if (i != j) { #ifdef _OPENMP int l1 = omp_test_lock(&vLock[i]); if (l1) { int l2 = omp_test_lock(&vLock[j]); if (l2) { #endif k = Srcs[i]; Srcs[i] = Srcs[j]; Srcs[j] = k; #ifdef _OPENMP omp_unset_lock(&vLock[j]); } omp_unset_lock(&vLock[i]); } #endif } } #endif #ifdef _OPENMP #pragma omp barrier #endif MAX_NUM_PHASES = 50; /* Initialize predecessor lists */ /* The size of the predecessor list of each vertex is bounded by its in-degree. So we first compute the in-degree of every vertex */ if (tid == 0) { in_degree = (attr_id_t *) calloc(n+1, sizeof(attr_id_t)); numEdges = (attr_id_t *) malloc((n+1)*sizeof(attr_id_t)); pSums = (attr_id_t *) malloc(nthreads*sizeof(attr_id_t)); } #ifdef _OPENMP #pragma omp barrier #pragma omp for #endif for (i=0; i<m; i++) { v = G->endV[i]; #ifdef _OPENMP omp_set_lock(&vLock[v]); #endif in_degree[v]++; #ifdef _OPENMP omp_unset_lock(&vLock[v]); #endif } prefix_sums(in_degree, numEdges, pSums, n); P = (plist_t *) calloc(n, sizeof(plist_t)); pListMem = (attr_id_t *) malloc(m*sizeof(attr_id_t)); for (i=0; i<n; i++) { P[i].list = pListMem + numEdges[i]; P[i].degree = in_degree[i]; P[i].count = 0; } #ifdef DIAGNOSTIC if (tid == 0) { elapsed_time_part = get_seconds() -elapsed_time_part; fprintf(stderr, "In-degree computation time: %lf seconds\n", elapsed_time_part); elapsed_time_part = get_seconds(); } #endif #ifdef _OPENMP #pragma omp barrier #endif /* Allocate shared memory */ if (tid == 0) { free(in_degree); free(numEdges); free(pSums); } S = (attr_id_t *) malloc(n*sizeof(attr_id_t)); sig = (double *) malloc(n*sizeof(double)); d = (attr_id_t *) malloc(n*sizeof(attr_id_t)); del = (double *) calloc(n, sizeof(double)); start = (attr_id_t *) malloc(MAX_NUM_PHASES*sizeof(attr_id_t)); end = (attr_id_t *) malloc(MAX_NUM_PHASES*sizeof(attr_id_t)); #ifdef _OPENMP #pragma omp barrier #endif for (i=0; i<n; i++) { d[i] = -1; } #ifdef DIAGNOSTIC if (tid == 0) { elapsed_time_part = get_seconds() - elapsed_time_part; fprintf(stderr, "BC initialization time: %lf seconds\n", elapsed_time_part); elapsed_time_part = get_seconds(); } #endif #ifdef _OPENMP #pragma omp for reduction(+:num_traversals) #endif for (p=0; p<numV; p++) { #if RANDSRCS i = Srcs[p]; #else i = p; #endif if (G->numEdges[i+1] - G->numEdges[i] == 0) { continue; } else { num_traversals++; } sig[i] = 1; d[i] = 0; S[0] = i; start[0] = 0; end[0] = 1; count = 1; phase_num = 0; while (end[phase_num] - start[phase_num] > 0) { for (vert = start[phase_num]; vert < end[phase_num]; vert++) { v = S[vert]; for (j=G->numEdges[v]; j<G->numEdges[v+1]; j++) { w = G->endV[j]; if (v != w) { /* w found for the first time? */ if (d[w] == -1) { S[count++] = w; d[w] = d[v] + 1; sig[w] = sig[v]; P[w].list[P[w].count++] = v; } else if (d[w] == d[v] + 1) { sig[w] += sig[v]; P[w].list[P[w].count++] = v; } } } } phase_num++; start[phase_num] = end[phase_num-1]; end[phase_num] = count; } phase_num--; while (phase_num > 0) { for (j=start[phase_num]; j<end[phase_num]; j++) { w = S[j]; for (k = 0; k<P[w].count; k++) { v = P[w].list[k]; del[v] = del[v] + sig[v]*(1+del[w])/sig[w]; } #ifdef _OPENMP omp_set_lock(&vLock[w]); BC[w] += del[w]; omp_unset_lock(&vLock[w]); #else BC[w] += del[w]; #endif } phase_num--; } for (j=0; j<count; j++) { w = S[j]; d[w] = -1; del[w] = 0; P[w].count = 0; } } #ifdef DIAGNOSTIC if (tid == 0) { elapsed_time_part = get_seconds() - elapsed_time_part; fprintf(stderr, "BC computation time: %lf seconds\n", elapsed_time_part); } #endif #ifdef _OPENMP #pragma omp barrier #endif #ifdef _OPENMP #pragma omp for for (i=0; i<n; i++) { omp_destroy_lock(&vLock[i]); } #endif free(S); free(pListMem); free(P); free(sig); free(d); free(del); free(start); free(end); if (tid == 0) { #ifdef _OPENMP free(vLock); #endif #if RANDSRCS free(Srcs); #endif #ifdef DIAGNOSTIC elapsed_time = get_seconds() - elapsed_time; fprintf(stderr, "Total time taken: %lf seconds\n", elapsed_time); #endif } free_sprng(stream); #ifdef _OPENMP #pragma omp barrier } #endif }
int main (void) { double d, e; int l; omp_lock_t lck; omp_nest_lock_t nlck; d = omp_get_wtime (); omp_init_lock (&lck); omp_set_lock (&lck); if (omp_test_lock (&lck)) abort (); omp_unset_lock (&lck); if (! omp_test_lock (&lck)) abort (); if (omp_test_lock (&lck)) abort (); omp_unset_lock (&lck); omp_destroy_lock (&lck); omp_init_nest_lock (&nlck); if (omp_test_nest_lock (&nlck) != 1) abort (); omp_set_nest_lock (&nlck); if (omp_test_nest_lock (&nlck) != 3) abort (); omp_unset_nest_lock (&nlck); omp_unset_nest_lock (&nlck); if (omp_test_nest_lock (&nlck) != 2) abort (); omp_unset_nest_lock (&nlck); omp_unset_nest_lock (&nlck); omp_destroy_nest_lock (&nlck); omp_set_dynamic (1); if (! omp_get_dynamic ()) abort (); omp_set_dynamic (0); if (omp_get_dynamic ()) abort (); omp_set_nested (1); if (! omp_get_nested ()) abort (); omp_set_nested (0); if (omp_get_nested ()) abort (); omp_set_num_threads (5); if (omp_get_num_threads () != 1) abort (); if (omp_get_max_threads () != 5) abort (); if (omp_get_thread_num () != 0) abort (); omp_set_num_threads (3); if (omp_get_num_threads () != 1) abort (); if (omp_get_max_threads () != 3) abort (); if (omp_get_thread_num () != 0) abort (); l = 0; #pragma omp parallel reduction (|:l) { l = omp_get_num_threads () != 3; l |= omp_get_thread_num () < 0; l |= omp_get_thread_num () >= 3; #pragma omp master l |= omp_get_thread_num () != 0; } if (l) abort (); if (omp_get_num_procs () <= 0) abort (); if (omp_in_parallel ()) abort (); #pragma omp parallel reduction (|:l) l = ! omp_in_parallel (); #pragma omp parallel reduction (|:l) if (1) l = ! omp_in_parallel (); if (l) abort (); e = omp_get_wtime (); if (d > e) abort (); d = omp_get_wtick (); /* Negative precision is definitely wrong, bigger than 1s clock resolution is also strange. */ if (d <= 0 || d > 1) abort (); return 0; }
double computeGraph(graph* G, graphSDG* SDGdata) { VERT_T* endV; LONG_T *degree, *numEdges, *pos, *pSums; WEIGHT_T* w; double elapsed_time; #ifdef _OPENMP omp_lock_t *vLock; LONG_T chunkSize; #endif elapsed_time = get_seconds(); #ifdef _OPENMP omp_set_num_threads(NUM_THREADS); #endif #ifdef _OPENMP #pragma omp parallel #endif { LONG_T i, j, u, n, m, tid, nthreads; #ifdef DIAGNOSTIC double elapsed_time_part; #endif #ifdef _OPENMP nthreads = omp_get_num_threads(); tid = omp_get_thread_num(); #else tid = 0; nthreads = 1; #endif n = N; m = M; if (tid == 0) { #ifdef _OPENMP vLock = (omp_lock_t *) malloc(n*sizeof(omp_lock_t)); assert(vLock != NULL); chunkSize = n/nthreads; #endif pos = (LONG_T *) malloc(m*sizeof(LONG_T)); assert(pos != NULL); degree = (LONG_T *) calloc(n, sizeof(LONG_T)); assert(degree != NULL); } #ifdef DIAGNOSTIC if (tid == 0) { elapsed_time_part = get_seconds(); } #endif #ifdef _OPENMP #pragma omp barrier #pragma omp for schedule(static, chunkSize) for (i=0; i<n; i++) { omp_init_lock(&vLock[i]); } #pragma omp barrier #ifdef DIAGNOSTIC if (tid == 0) { elapsed_time_part = get_seconds() - elapsed_time_part; fprintf(stderr, "Lock initialization time: %lf seconds\n", elapsed_time_part); elapsed_time_part = get_seconds(); } #endif #pragma omp for #endif for (i=0; i<m; i++) { u = SDGdata->startVertex[i]; #ifdef _OPENMP omp_set_lock(&vLock[u]); #endif pos[i] = degree[u]++; #ifdef _OPENMP omp_unset_lock(&vLock[u]); #endif } #ifdef DIAGNOSTIC if (tid == 0) { elapsed_time_part = get_seconds() - elapsed_time_part; fprintf(stderr, "Degree computation time: %lf seconds\n", elapsed_time_part); elapsed_time_part = get_seconds(); } #endif #ifdef _OPENMP #pragma omp barrier #pragma omp for schedule(static, chunkSize) for (i=0; i<n; i++) { omp_destroy_lock(&vLock[i]); } if (tid == 0) free(vLock); #endif #ifdef DIAGNOSTIC if (tid == 0) { elapsed_time_part = get_seconds() - elapsed_time_part; fprintf(stderr, "Lock destruction time: %lf seconds\n", elapsed_time_part); elapsed_time_part = get_seconds(); } #endif if (tid == 0) { numEdges = (LONG_T *) malloc((n+1)*sizeof(LONG_T)); pSums = (LONG_T *) malloc(nthreads*sizeof(LONG_T)); } #ifdef _OPENMP #pragma omp barrier #endif prefix_sums(degree, numEdges, pSums, n); #ifdef DIAGNOSTIC if (tid == 0) { elapsed_time_part = get_seconds() - elapsed_time_part; fprintf(stderr, "Prefix sums time: %lf seconds\n", elapsed_time_part); elapsed_time_part = get_seconds(); } #endif #ifdef _OPENMP #pragma omp barrier #endif if (tid == 0) { free(degree); free(pSums); w = (WEIGHT_T *) malloc(m*sizeof(WEIGHT_T)); endV = (VERT_T *) malloc(m* sizeof(VERT_T)); } #ifdef _OPENMP #pragma omp barrier #pragma omp for #endif for (i=0; i<m; i++) { u = SDGdata->startVertex[i]; j = numEdges[u] + pos[i]; endV[j] = SDGdata->endVertex[i]; //TODO: //w[j] = SDGdata->weight[i]; fprintf(stderr, "%d\n", SDGdata->weight[i]); w[j] = 1; } #ifdef DIAGNOSTIC if (tid == 0) { elapsed_time_part = get_seconds() - elapsed_time_part; fprintf(stderr, "Edge data structure construction time: %lf seconds\n", elapsed_time_part); elapsed_time_part = get_seconds(); } #endif if (tid == 0) { free(pos); G->n = n; G->m = m; G->numEdges = numEdges; G->endV = endV; G->weight = w; } #ifdef _OPENMP #endif } /* Verification */ #if 0 fprintf(stderr, "SDG data:\n"); for (int i=0; i<SDGdata->m; i++) { fprintf(stderr, "[%ld %ld %ld] ", SDGdata->startVertex[i], SDGdata->endVertex[i], SDGdata->weight[i]); } fprintf(stderr, "\n"); for (int i=0; i<G->n + 1; i++) { fprintf(stderr, "[%ld] ", G->numEdges[i]); } fprintf(stderr, "\nGraph:\n"); for (int i=0; i<G->n; i++) { for (int j=G->numEdges[i]; j<G->numEdges[i+1]; j++) { fprintf(stderr, "[%ld %ld %ld] ", i, G->endV[j], G->weight[j]); } } #endif free(SDGdata->startVertex); free(SDGdata->endVertex); free(SDGdata->weight); elapsed_time = get_seconds() - elapsed_time; return elapsed_time; }
MutexType(const MutexType& ) { omp_init_lock(&lock); }
MutexType() { omp_init_lock(&lock); }
triangulation triangulate_cube(data_list * data, char * tmp_triang_file, char * tmp_data_file) { printf("%s %s\n", tmp_triang_file, tmp_data_file); triangulation result = triangulation_init(data_list_dim(data)); cube_points cube = gen_cube_points(result.dim); facet_acute_data parameters; parameters.cube = &cube; parameters.boundary_func = &triangle_boundary_cube; parameters.data = data; parameters.store_acute_ind = 1; parameters.acute_ind = malloc(sizeof(unsigned short) * cube.len); //This list holds all conform tetrahedrons for a given triangle, max size = cube.len ptetra tet_list = malloc(sizeof(tetra) * cube.len); unsigned short tet_list_len = 0; //Lists needed for the dynamic_remove loop tri_list check_list, check_list_new; check_list = tri_list_init(result.dim, MEM_LIST_FALSE); check_list_new = tri_list_init(result.dim, MEM_LIST_FALSE); //Start triangle (0,0,0), (rand,0,0), (rand,rand,0) result.bound_len = 1; result.bound_tri = triangulation_start_facet(data); printf("Starting triangulation with facet:\n"); print_triangle(result.bound_tri); /* * During this method we are going to operate data that is not thread-safe. * To avoid race conditions we need an array of locks. We use a lock for the * first two points of a triangle (so need 2d array of locks). */ omp_lock_t ** locks = malloc(sizeof(omp_lock_t *) * cube.len); //Initalize the locks for (size_t i = 0; i < cube.len; i++){ locks[i] = malloc(sizeof(omp_lock_t) * (cube.len - i)); for (size_t j = 0; j < cube.len - i; j++) omp_init_lock(&locks[i][j]); } //While we have triangles on the boundary.. while (result.bound_len > 0) { tri_list_empty(&check_list); tri_list_empty(&check_list_new); /* * We are going to add a tetrahedron on the boundary triangle. * To do so, we select a random triangle on the boundary. Then we generate all the * acute tetrahedra (above and below) with facets in our possible list. * From this list we remove all the tetrahedrons that intersect with our current triangulation. * Then we add a random tetrahedron to our triangulation, update the conform list and repeat. */ int rand_bound = rand() % result.bound_len; printf("\n\nTotal amount of triangles left:%zu\nExpanding triangulation at boundary triangle: \n", data_list_count(data)); print_triangle(result.bound_tri + rand_bound); //Calculate the conform tetrahedrons above and below if (!facet_conform(&result.bound_tri[rand_bound], ¶meters)) { printf("We have a triangle on the boundary that is not conform anymore.\n"); printf("Whatthefuck? Breaking!\n"); break; } tet_list_len = parameters.acute_ind_len; printf("Total amount of conform tetrahedrons found for this boundary: %hu\n", tet_list_len); //Form explicit list of the tetrahedrons for (unsigned short i = 0; i < tet_list_len; i++) { copyArr3(tet_list[i].vertices[0], result.bound_tri[rand_bound].vertices[0]); copyArr3(tet_list[i].vertices[1], result.bound_tri[rand_bound].vertices[1]); copyArr3(tet_list[i].vertices[2], result.bound_tri[rand_bound].vertices[2]); copyArr3(tet_list[i].vertices[3], cube.points[parameters.acute_ind[i]]); } //Remove all the tetrahedrons that intersect with current triangulation. filter_tet_list_disjoint_triangulation(tet_list, &tet_list_len, &result); printf("Amount of tetrahedrons left after filtering: %hu\n\n",tet_list_len); if (tet_list_len == 0) { printf("Waarom is deze lijst nu al f*****g leeggefilterd?\n"); printf("Dead end, helaas pindakaas. Got to %zu\n", result.tetra_len); break; } //Select random tetrahedron disjoint with the current triangulation int rand_tet = rand() % tet_list_len; /* * Add the above tetra to the triangulation. * This removes all the boundary triangles that are covered by this tetrahedron */ printf("Adding the following tetra to the triangulation\n"); print_tetra(tet_list + rand_tet); printf("\n\n"); add_tet_triangulation(tet_list + rand_tet, &result); triangulation_print(&result); if (!result.bound_len) //If we have no boundaries left, we must be done!! { printf("No more boundaries left.. WE FINNISHED!??\n"); break; } //Consistency check if (!triangulation_consistent(&result, ¶meters)) { printf("Triangulation not consistent after adding the tetrahedron. Breaking.\n"); break; } /* * Calculate a list of all the triangles we are going to remove */ double time_removed = omp_get_wtime(); printf("Removing triangles not disjoint with new tetrahedron\n"); size_t removed = filter_intersection_data_list_tet(data, &check_list, tet_list + rand_tet, locks); printf("Removed %zu triangles that are not disjoint with the new tetrahedron\n", removed); printf("The check_list has size %zu\n", tri_list_count(&check_list)); printf("Time took to removed triangles: %g seconds\n", omp_get_wtime()-time_removed); if (!triangulation_consistent(&result, ¶meters)) { printf("After filtering the memory list we have a non consistent triangulation. Break\n"); break; } //Do two iterations facets_conform_dynamic_remove(data, &result, 1, &check_list, &check_list_new, locks); if (!triangulation_consistent(&result, ¶meters)) { printf("Triangulation not consistent anymore after conforming the data set.. Breaking\n"); break; } /*mem_list_cube_compress(&data->mem_list); if (tmp_triang_file && tmp_data_file) { triangulation_to_file(&result, tmp_triang_file); data_list_to_file(data, tmp_data_file, MEM_LIST_SAVE_CLEAN); } */ } for (size_t i = 0; i < cube.len; i++){ for (size_t j = 0; j < cube.len - i; j++) omp_destroy_lock(&locks[i][j]); free(locks[i]); } free(locks); free(cube.points); free(parameters.acute_ind); free(tet_list); tri_list_free(&check_list); tri_list_free(&check_list_new); printf("Triangulation has length of %zu\n", result.tetra_len); return result; }
inline OS23459783987() { #ifdef COSMO_OMP omp_init_lock(&lock_); #endif }
static void sort1 (int *array, int count) { omp_lock_t lock; struct int_pair_stack global_stack; int busy = 1; int num_threads; omp_init_lock (&lock); init_int_pair_stack (&global_stack); #pragma omp parallel firstprivate (array, count) { int lo = 0, hi = 0, mid, next_lo, next_hi; bool idle = true; struct int_pair_stack local_stack; init_int_pair_stack (&local_stack); if (omp_get_thread_num () == 0) { num_threads = omp_get_num_threads (); hi = count - 1; idle = false; } for (;;) { if (hi - lo < THRESHOLD) { insertsort (array, lo, hi); lo = hi; } if (lo >= hi) { if (size_int_pair_stack (&local_stack) == 0) { again: omp_set_lock (&lock); if (size_int_pair_stack (&global_stack) == 0) { if (!idle) busy--; if (busy == 0) { omp_unset_lock (&lock); break; } omp_unset_lock (&lock); idle = true; while (size_int_pair_stack (&global_stack) == 0 && busy) busy_wait (); goto again; } if (idle) busy++; pop_int_pair_stack (&global_stack, &lo, &hi); omp_unset_lock (&lock); idle = false; } else pop_int_pair_stack (&local_stack, &lo, &hi); } mid = partition (array, lo, hi); if (mid - lo < hi - mid) { next_lo = mid; next_hi = hi; hi = mid - 1; } else { next_lo = lo; next_hi = mid - 1; lo = mid; } if (next_hi - next_lo < THRESHOLD) insertsort (array, next_lo, next_hi); else { if (size_int_pair_stack (&global_stack) < num_threads - 1) { int size; omp_set_lock (&lock); size = size_int_pair_stack (&global_stack); if (size < num_threads - 1 && size < STACK_SIZE) push_int_pair_stack (&global_stack, next_lo, next_hi); else push_int_pair_stack (&local_stack, next_lo, next_hi); omp_unset_lock (&lock); } else push_int_pair_stack (&local_stack, next_lo, next_hi); } } } omp_destroy_lock (&lock); }
void Shape::splitshapes(vector<Shape*> &shapes, ViewProgress *progress) { int n_tr = (int)triangles.size(); if (progress) progress->start(_("Split Shapes"), n_tr); int progress_steps = max(1,(int)(n_tr/100)); vector<bool> done(n_tr); bool cont = true; // make list of adjacent triangles for each triangle vector< vector<uint> > adj(n_tr); if (progress) progress->set_label(_("Split: Sorting Triangles ...")); #ifdef _OPENMP omp_lock_t progress_lock; omp_init_lock(&progress_lock); #pragma omp parallel for schedule(dynamic) #endif for (int i = 0; i < n_tr; i++) { if (progress && i%progress_steps==0) { #ifdef _OPENMP omp_set_lock(&progress_lock); #endif cont = progress->update(i); #ifdef _OPENMP omp_unset_lock(&progress_lock); #endif } vector<uint> trv; for (int j = 0; j < n_tr; j++) { if (i!=j) { bool add = false; if (j<i) // maybe(!) we have it already for (uint k = 0; k<adj[j].size(); k++) { if ((int)adj[j][k] == i) { add = true; break; } } add |= (triangles[i].isConnectedTo(triangles[j], 0.01)); if (add) trv.push_back(j); } } adj[i] = trv; if (!cont) i=n_tr; } if (progress) progress->set_label(_("Split: Building shapes ...")); // triangle indices of shapes vector< vector<uint> > shape_tri; for (int i = 0; i < n_tr; i++) done[i] = false; for (int i = 0; i < n_tr; i++) { if (progress && i%progress_steps==0) cont = progress->update(i); if (!done[i]){ cerr << _("Shape ") << shapes.size()+1 << endl; vector<uint> current; addtoshape(i, adj, current, done); Shape *shape = new Shape(); shapes.push_back(shape); shapes.back()->triangles.resize(current.size()); for (uint i = 0; i < current.size(); i++) shapes.back()->triangles[i] = triangles[current[i]]; shapes.back()->CalcBBox(); } if (!cont) i=n_tr; } if (progress) progress->stop("_(Done)"); }
double betweennessCentrality(graph* G, DOUBLE_T* BC, int filter) { VERT_T *S; /* stack of vertices in the order of non-decreasing distance from s. Also used to implicitly represent the BFS queue */ plist* P; /* predecessors of a vertex v on shortest paths from s */ DOUBLE_T* sig; /* No. of shortest paths */ LONG_T* d; /* Length of the shortest path between every pair */ DOUBLE_T* del; /* dependency of vertices */ LONG_T *in_degree, *numEdges, *pSums; LONG_T *pListMem; LONG_T* Srcs; LONG_T *start, *end; LONG_T MAX_NUM_PHASES; LONG_T *psCount; #ifdef _OPENMP omp_lock_t* vLock; LONG_T chunkSize; #endif int seed = 2387; double elapsed_time; #ifdef _OPENMP #pragma omp parallel { #endif VERT_T *myS, *myS_t; LONG_T myS_size; LONG_T i, j, k, p, count, myCount; LONG_T v, w, vert; LONG_T numV, num_traversals, n, m, phase_num; LONG_T tid, nthreads; int* stream; #ifdef DIAGNOSTIC double elapsed_time_part; #endif #ifdef _OPENMP int myLock; tid = omp_get_thread_num(); nthreads = omp_get_num_threads(); #else tid = 0; nthreads = 1; #endif #ifdef DIAGNOSTIC if (tid == 0) { elapsed_time_part = get_seconds(); } #endif /* numV: no. of vertices to run BFS from = 2^K4approx */ numV = 1<<K4approx; n = G->n; m = G->m; /* Permute vertices */ if (tid == 0) { Srcs = (LONG_T *) malloc(n*sizeof(LONG_T)); #ifdef _OPENMP vLock = (omp_lock_t *) malloc(n*sizeof(omp_lock_t)); #endif } #ifdef _OPENMP #pragma omp barrier #pragma omp for for (i=0; i<n; i++) { omp_init_lock(&vLock[i]); } #endif /* Initialize RNG stream */ stream = init_sprng(0, tid, nthreads, seed, SPRNG_DEFAULT); #ifdef _OPENMP #pragma omp for #endif for (i=0; i<n; i++) { Srcs[i] = i; } #ifdef _OPENMP #pragma omp for #endif for (i=0; i<n; i++) { j = n*sprng(stream); if (i != j) { #ifdef _OPENMP int l1 = omp_test_lock(&vLock[i]); if (l1) { int l2 = omp_test_lock(&vLock[j]); if (l2) { #endif k = Srcs[i]; Srcs[i] = Srcs[j]; Srcs[j] = k; #ifdef _OPENMP omp_unset_lock(&vLock[j]); } omp_unset_lock(&vLock[i]); } #endif } } #ifdef _OPENMP #pragma omp barrier #endif #ifdef DIAGNOSTIC if (tid == 0) { elapsed_time_part = get_seconds() -elapsed_time_part; fprintf(stderr, "Vertex ID permutation time: %lf seconds\n", elapsed_time_part); elapsed_time_part = get_seconds(); } #endif /* Start timing code from here */ if (tid == 0) { elapsed_time = get_seconds(); #ifdef VERIFYK4 MAX_NUM_PHASES = 2*sqrt(n); #else MAX_NUM_PHASES = 50; #endif } #ifdef _OPENMP #pragma omp barrier #endif /* Initialize predecessor lists */ /* The size of the predecessor list of each vertex is bounded by its in-degree. So we first compute the in-degree of every vertex */ if (tid == 0) { P = (plist *) calloc(n, sizeof(plist)); in_degree = (LONG_T *) calloc(n+1, sizeof(LONG_T)); numEdges = (LONG_T *) malloc((n+1)*sizeof(LONG_T)); pSums = (LONG_T *) malloc(nthreads*sizeof(LONG_T)); } #ifdef _OPENMP #pragma omp barrier #pragma omp for #endif for (i=0; i<m; i++) { v = G->endV[i]; #ifdef _OPENMP omp_set_lock(&vLock[v]); #endif in_degree[v]++; #ifdef _OPENMP omp_unset_lock(&vLock[v]); #endif } prefix_sums(in_degree, numEdges, pSums, n); if (tid == 0) { pListMem = (LONG_T *) malloc(m*sizeof(LONG_T)); } #ifdef _OPENMP #pragma omp barrier #pragma omp for #endif for (i=0; i<n; i++) { P[i].list = pListMem + numEdges[i]; P[i].degree = in_degree[i]; P[i].count = 0; } #ifdef DIAGNOSTIC if (tid == 0) { elapsed_time_part = get_seconds() - elapsed_time_part; fprintf(stderr, "In-degree computation time: %lf seconds\n", elapsed_time_part); elapsed_time_part = get_seconds(); } #endif /* Allocate shared memory */ if (tid == 0) { free(in_degree); free(numEdges); free(pSums); S = (VERT_T *) malloc(n*sizeof(VERT_T)); sig = (DOUBLE_T *) malloc(n*sizeof(DOUBLE_T)); d = (LONG_T *) malloc(n*sizeof(LONG_T)); del = (DOUBLE_T *) calloc(n, sizeof(DOUBLE_T)); start = (LONG_T *) malloc(MAX_NUM_PHASES*sizeof(LONG_T)); end = (LONG_T *) malloc(MAX_NUM_PHASES*sizeof(LONG_T)); psCount = (LONG_T *) malloc((nthreads+1)*sizeof(LONG_T)); } /* local memory for each thread */ myS_size = (2*n)/nthreads; myS = (LONG_T *) malloc(myS_size*sizeof(LONG_T)); num_traversals = 0; myCount = 0; #ifdef _OPENMP #pragma omp barrier #endif #ifdef _OPENMP #pragma omp for #endif for (i=0; i<n; i++) { d[i] = -1; } #ifdef DIAGNOSTIC if (tid == 0) { elapsed_time_part = get_seconds() -elapsed_time_part; fprintf(stderr, "BC initialization time: %lf seconds\n", elapsed_time_part); elapsed_time_part = get_seconds(); } #endif for (p=0; p<n; p++) { i = Srcs[p]; //printf ("%d \n", i); // i = p; if (G->numEdges[i+1] - G->numEdges[i] == 0) { continue; } else { num_traversals++; } if (num_traversals == numV + 1) { break; } if (tid == 0) { sig[i] = 1; d[i] = 0; S[0] = i; start[0] = 0; end[0] = 1; } count = 1; phase_num = 0; #ifdef _OPENMP #pragma omp barrier #endif while (end[phase_num] - start[phase_num] > 0) { myCount = 0; #ifdef _OPENMP #pragma omp barrier #pragma omp for schedule(dynamic) #endif for (vert = start[phase_num]; vert < end[phase_num]; vert++) { v = S[vert]; for (j=G->numEdges[v]; j<G->numEdges[v+1]; j++) { if ((G->weight[j] & 7) == 0 && filter==1) continue; w = G->endV[j]; if (v != w) { #ifdef _OPENMP myLock = omp_test_lock(&vLock[w]); if (myLock) { #endif /* w found for the first time? */ if (d[w] == -1) { if (myS_size == myCount) { /* Resize myS */ myS_t = (LONG_T *) malloc(2*myS_size*sizeof(VERT_T)); memcpy(myS_t, myS, myS_size*sizeof(VERT_T)); free(myS); myS = myS_t; myS_size = 2*myS_size; } myS[myCount++] = w; d[w] = d[v] + 1; sig[w] = sig[v]; P[w].list[P[w].count++] = v; } else if (d[w] == d[v] + 1) { sig[w] += sig[v]; P[w].list[P[w].count++] = v; } #ifdef _OPENMP omp_unset_lock(&vLock[w]); } else { if ((d[w] == -1) || (d[w] == d[v]+ 1)) { omp_set_lock(&vLock[w]); sig[w] += sig[v]; P[w].list[P[w].count++] = v; omp_unset_lock(&vLock[w]); } } #endif } } } /* Merge all local stacks for next iteration */ phase_num++; psCount[tid+1] = myCount; #ifdef _OPENMP #pragma omp barrier #endif if (tid == 0) { start[phase_num] = end[phase_num-1]; psCount[0] = start[phase_num]; for(k=1; k<=nthreads; k++) { psCount[k] = psCount[k-1] + psCount[k]; } end[phase_num] = psCount[nthreads]; } #ifdef _OPENMP #pragma omp barrier #endif for (k = psCount[tid]; k < psCount[tid+1]; k++) { S[k] = myS[k-psCount[tid]]; } #ifdef _OPENMP #pragma omp barrier #endif count = end[phase_num]; } phase_num--; #ifdef _OPENMP #pragma omp barrier #endif //printf ("%d\n", phase_num); while (phase_num > 0) { #ifdef _OPENMP #pragma omp for #endif for (j=start[phase_num]; j<end[phase_num]; j++) { w = S[j]; for (k = 0; k<P[w].count; k++) { v = P[w].list[k]; #ifdef _OPENMP omp_set_lock(&vLock[v]); #endif del[v] = del[v] + sig[v]*(1+del[w])/sig[w]; #ifdef _OPENMP omp_unset_lock(&vLock[v]); #endif } BC[w] += del[w]; } phase_num--; #ifdef _OPENMP #pragma omp barrier #endif } #ifdef _OPENMP chunkSize = n/nthreads; #pragma omp for schedule(static, chunkSize) #endif for (j=0; j<count; j++) { w = S[j]; //fprintf (stderr, "w: %d\n", w); d[w] = -1; del[w] = 0; P[w].count = 0; } #ifdef _OPENMP #pragma omp barrier #endif } #ifdef DIAGNOSTIC if (tid == 0) { elapsed_time_part = get_seconds() -elapsed_time_part; fprintf(stderr, "BC computation time: %lf seconds\n", elapsed_time_part); } #endif #ifdef _OPENMP #pragma omp for for (i=0; i<n; i++) { omp_destroy_lock(&vLock[i]); } #endif free(myS); if (tid == 0) { free(S); free(pListMem); free(P); free(sig); free(d); free(del); #ifdef _OPENMP free(vLock); #endif free(start); free(end); free(psCount); elapsed_time = get_seconds() - elapsed_time; free(Srcs); } free_sprng(stream); #ifdef _OPENMP } #endif /* Verification */ #ifdef VERIFYK4 double BCval; if (SCALE % 2 == 0) { BCval = 0.5*pow(2, 3*SCALE/2)-pow(2, SCALE)+1.0; } else { BCval = 0.75*pow(2, (3*SCALE-1)/2)-pow(2, SCALE)+1.0; } int failed = 0; for (int i=0; i<G->n; i++) { if (round(BC[i] - BCval) != 0) { failed = 1; break; } } if (failed) { fprintf(stderr, "Kernel 4 failed validation!\n"); } else { fprintf(stderr, "Kernel 4 validation successful!\n"); } #endif for (int i = 0; i < G->n; i++) printf ("BC: %d %f\n",i, BC[i]); return elapsed_time; }