Esempio n. 1
0
int main(int argc, const char * argv[]){
    omp_init_lock(&simple_lock);

    #pragma omp parallel num_threads(4)
    {

        while (!omp_test_lock(&simple_lock)){
            printf("=== Hilo %d: bloqueo ocupado\n", omp_get_thread_num());
        }
        printf("+++ Hilo %d: Consegui el bloque\n", omp_get_thread_num());
        printf("--- Hilo %d: Libere el bloqueo\n", omp_get_thread_num());
        omp_unset_lock(&simple_lock);    
        
    }

    omp_destroy_lock(&simple_lock);
}
void BasicMesh::ComputeNormals() {
  boost::timer::auto_cpu_timer timer("[BasicMesh] Normals computation time = %w seconds.\n");

  norms.resize(faces.rows(), 3);
  vertex_norms.resize(verts.size(), 3);
  vertex_norms.setZero();
  vector<double> area_sum(verts.size(), 0.0);

  omp_lock_t writelock;

  omp_init_lock(&writelock);
#pragma omp parallel for
  for(int i=0;i<faces.rows();++i) {
    auto vidx0 = faces(i, 0);
    auto vidx1 = faces(i, 1);
    auto vidx2 = faces(i, 2);

    auto v0 = Vector3d(verts.row(vidx0));
    auto v1 = Vector3d(verts.row(vidx1));
    auto v2 = Vector3d(verts.row(vidx2));

    auto v0v1 = v1 - v0;
    auto v0v2 = v2 - v0;
    auto n = v0v1.cross(v0v2);
    double area = n.norm();

    omp_set_lock(&writelock);
    vertex_norms.row(vidx0) += n;
    vertex_norms.row(vidx1) += n;
    vertex_norms.row(vidx2) += n;

    area_sum[vidx0] += area;
    area_sum[vidx1] += area;
    area_sum[vidx2] += area;
    omp_unset_lock(&writelock);

    n.normalize();
    norms.row(i) = n;
  }
  omp_destroy_lock(&writelock);

#pragma omp parallel for
  for(int i=0;i<vertex_norms.rows();++i) {
    vertex_norms.row(i) /= area_sum[i];
  }
}
Esempio n. 3
0
/**
 * FlatSourceRegion constructor
 */
FlatSourceRegion::FlatSourceRegion() 
{
    _material = NULL;
    _volume = 0.0;
    _quad_id = -1;

    /* Initializes region's other attributes */
    for (int e = 0; e < NUM_ENERGY_GROUPS; e++) 
    {
        _flux[e] = 0.0;
        _source[e] = 0.0;
    }

#if USE_OPENMP
    omp_init_lock(&_flux_lock);
#endif
}
int initWorld(int world_size){
	
	int i, j, z;
	
	#pragma omp parallel for private(i, j, z) schedule(guided, world_size/8)
	for(i=0; i < world_size; i++)
		for(j=0; j < world_size; j++){
			for(z=0; z < 5; z++)
				world[i][j].conflicts[z]=NULL;
			world[i][j].type = empty;
			world[i][j].breed = 0;
			world[i][j].count=0;
			omp_init_lock(&(world[i][j].lock_count));
		}

	return 0;
}
Esempio n. 5
0
void UnitigGraph::OutputInitUnitigs(FILE *contig_file, FILE *multi_file, std::map<int64_t, int> &histo) {
    vertexID_t output_id = 0;
    omp_lock_t output_lock;
    omp_init_lock(&output_lock);
    histo.clear();

#pragma omp parallel for
    for (vertexID_t i = 0; i < vertices_.size(); ++i) {
        uint16_t multi = std::min(kMaxMulti_t, int((double)vertices_[i].depth / vertices_[i].length + 0.5));
        std::string label = VertexToDNAString(sdbg_, vertices_[i]);

        if (vertices_[i].is_loop) {
            omp_set_lock(&output_lock);
            fprintf(contig_file, ">contig%d_length_%ld_multi_%d_loop\n%s\n", 
                                 output_id,
                                 label.length(),
                                 multi,
                                 label.c_str());
            fwrite(&multi, sizeof(uint16_t), 1, multi_file);
            ++output_id;
            ++histo[label.length()];
            omp_unset_lock(&output_lock);
        } else {
            int indegree = sdbg_->Indegree(vertices_[i].start_node);
            int outdegree = sdbg_->Outdegree(vertices_[i].end_node);
            if (indegree == 0 && outdegree == 0) {
                vertices_[i].is_deleted = true;
            }
            omp_set_lock(&output_lock);
            fprintf(contig_file, ">contig%d_length_%ld_multi_%d_in_%d_out_%d\n%s\n", 
                                 output_id, 
                                 label.length(), 
                                 multi, 
                                 indegree, 
                                 outdegree, 
                                 label.c_str());
            fwrite(&multi, sizeof(uint16_t), 1, multi_file);
            ++output_id;
            ++histo[label.length()];
            omp_unset_lock(&output_lock);
        }
    }

    omp_destroy_lock(&output_lock);
}
Esempio n. 6
0
int main (int argc, char *argv[])
{
  int id, suma, arreglo[TAMARR];
  omp_lock_t C1;
  omp_init_lock(&C1);
  omp_set_lock(&C1);
  omp_set_num_threads(NHILOS);
  suma=0;
  #pragma omp parallel private(id) shared(suma)
  {
    id = omp_get_thread_num();
    suma=suma + 1;
    omp_unset_lock(&C1);
  }
  printf("\nbye, suma total=%d, id:%d\n\n",suma,id);
  sleep(1);
  return 0;
}
Esempio n. 7
0
int main(int argc, char **argv) {
  int i;
  struct timeval startTime, endTime;

  if (argc != 3)
    usage(argv[0]);

  // Record start of total time
  gettimeofday(&startTime, NULL);

  // Initialize global variables and data-structures.
  P = atoi(argv[1]);
  initialize(argv[2], &cells, &constraints);
  nodeCount = 0;
  found = 0;

  jobQueues = (job_queue_t*)calloc(sizeof(job_queue_t), P);
  if (!jobQueues)
    unixError("Failed to allocated memory for the job queues");

  for (i = 0; i < P; i++)
    omp_init_lock(&(jobQueues[i].headLock));

  // Add initial job (nothing assigned) to root processor
  jobQueues[0].tail = 1;


  runParallel(P);
  if (!found)
    appError("No solution found");

  // Use final time to calculate total time
  gettimeofday(&endTime, NULL);
  totalTime = TIME_DIFF(endTime, startTime);

  // Print out number of nodes visited and calculated times
  printf("Nodes Visited: %lld\n", nodeCount);
  printf("Computation Time = %.3f millisecs\n", compTime);
  printf("      Total Time = %.3f millisecs\n", totalTime);

  return 0;
}
Esempio n. 8
0
struct frame dailyOne(struct cortage task) {
    int i, j, i0, j0, currentSum;
    int maxSum = A[0][0];

    int maxI0 = 0, maxJ0 = 0, maxI1 = 0, maxJ1 = 0;

    omp_lock_t lock;
    omp_init_lock(&lock);

    int **H = A;
    for (i = 0; i < task.M; i++)
        for (j = 0; j < task.N; j++)
            H[i][j] =
                (i ? H[i-1][j] : 0)	+ (j ? H[i][j-1] : 0) + A[i][j]
                - ((i && j) ? H[i-1][j-1] : 0);

    #pragma omp parallel for private(j, j0, i0, currentSum) shared(maxSum)
    for (i = 0; i < task.M; i++)
        for (j = 0; j < task.N; j++)
            for (i0 = 0; i0 <= i; i0++)
                for (j0 = 0; j0 <= j; j0++) {

                    currentSum = H[i][j]
                                 - ( i0 ? H[i0 - 1][j] : 0 )
                                 - ( j0 ? H[i][j0 - 1] : 0 )
                                 + ((i0 && j0) ? H[i0-1][j0-1] : 0);

                    if (currentSum > maxSum) {
                        omp_set_lock(&lock);
                        maxSum = currentSum;
                        maxI0 = i0;
                        maxJ0 = j0;
                        maxI1 = i;
                        maxJ1 = j;
                        omp_unset_lock(&lock);
                    }
                }

    struct frame result = { maxI0, maxJ0, maxI1, maxJ1, maxSum };
    return result;
}
Esempio n. 9
0
int main(int argc, char *argv[]) { 

  double start1,start2,end1,end2;
  int r;
  int i;
  
  /* Init locks */
  for(i=0;i<MAX_PROCS;i++) omp_init_lock(&(remaining_iters_lock[i]));

  init1(); 

  start1 = omp_get_wtime(); 

  for (r=0; r<reps; r++){ 
    runloop(1);
  } 

  end1  = omp_get_wtime();  

  valid1(); 

  printf("Total time for %d reps of loop 1 = %f\n",reps, (float)(end1-start1)); 


  init2(); 

  start2 = omp_get_wtime(); 

  for (r=0; r<reps; r++){ 
    runloop(2);
  } 

  end2  = omp_get_wtime(); 

  valid2(); 

  printf("Total time for %d reps of loop 2 = %f\n",reps, (float)(end2-start2)); 

  for(i=0;i<MAX_PROCS;i++) omp_destroy_lock(&(remaining_iters_lock[i]));

} 
Esempio n. 10
0
int main()
{
	omp_init_lock(&mylock);
#pragma omp parallel
	{
#pragma omp sections
		{
#pragma omp section
			{
				omp_set_lock(&mylock);
				sleep(1);
				printf("[%d] 1. Hello world\n", omp_get_thread_num());
				omp_unset_lock(&mylock);
			}
#pragma omp section
			{
				omp_set_lock(&mylock);
				sleep(1);
				printf("[%d] 2. Hello world\n", omp_get_thread_num());
				omp_unset_lock(&mylock);
			}
#pragma omp section
			{
				omp_set_lock(&mylock);
				sleep(1);
				printf("[%d] 3. Hello world\n", omp_get_thread_num());
				omp_unset_lock(&mylock);
			}
#pragma omp section
			{
				omp_set_lock(&mylock);
				sleep(1);
				printf("[%d] 4. Hello world\n", omp_get_thread_num());
				omp_unset_lock(&mylock);
			}
		} /* sections */
	} /* parallel */
	omp_destroy_lock(&mylock);
	return 0;
}
Esempio n. 11
0
/**
 * @brief Initializes Cmfd object for acceleration prior to source iteration.
 * @details Instantiates a dummy Cmfd object if one was not assigned to
 *          the Solver by the user and initializes FSRs, Materials, fluxes
 *          and the Mesh. This method intializes a global array for the
 *          surface currents.
 */
void CPUSolver::initializeCmfd() {

  /* Call parent class method */
  Solver::initializeCmfd();

  /* Delete old Cmfd Mesh surface currents array it it exists */
  if (_surface_currents != NULL)
    delete [] _surface_currents;

  int size;

  /* Allocate memory for the Cmfd Mesh surface currents array */
  try{

    /* Allocate an array for the Cmfd Mesh surface currents */
    if (_cmfd->getMesh()->getCmfdOn()){
      size = _num_mesh_cells * _cmfd->getNumCmfdGroups() * 8;
      log_printf(NORMAL, "creating surface currents of size: %i", size);
      _surface_currents = new double[size];
    }

  }
  catch(std::exception &e) {
    log_printf(ERROR, "Could not allocate memory for the Solver's Cmfd "
               "Mesh surface currents. Backtrace:%s", e.what());
  }

  if (_cmfd->getMesh()->getCmfdOn()){

    /* Initialize an array of OpenMP locks for each Cmfd Mesh surface */
    _mesh_surface_locks = new omp_lock_t[_cmfd->getMesh()->getNumCells() * 8];

      /* Loop over all mesh cells to initialize OpenMP locks */
      #pragma omp parallel for schedule(guided)
      for (int r=0; r < _num_mesh_cells*8; r++)
          omp_init_lock(&_mesh_surface_locks[r]);
    }

  return;
}
Esempio n. 12
0
 FragilityFn::FragilityFn(std::vector<LogNormalDist> onsets)
 {
     omp_init_lock(&lock);
     omp_set_lock(&lock);
     if (onsets.size() == 0) {
         omp_unset_lock(&lock);
         throw std::invalid_argument("onsets");
     } else {
         damage_states.resize(onsets.size());
         double mean = NAN;
         for (unsigned int i=0; i < onsets.size(); i++) {
             if (onsets[i].get_mu_lnX() <= mean) {
                 throw std::invalid_argument("onsets");
                 break;
             } else {
                 damage_states[i] = onsets[i];
                 mean = damage_states[i].get_mu_lnX();
             }
         }   
     }
     omp_unset_lock(&lock);
 };
Esempio n. 13
0
StructuredSVM::StructuredSVM() {
  omp_init_lock(&my_lock);

  InitTrainParams(&params);
  stats = new StructuredSVMStatistics(this);
  chooser = new StructuredSVMExampleChooser(this);
  
  sizePsi = 0;
  t = 0;
  sum_w = NULL;
  u_i_buff = NULL;

  trainfile = modelfile = NULL; 
  numCacheIters = 0;

  numExampleIds = 0;
  exampleIdsToIndices = NULL;
  exampleIndicesToIds = NULL;

  base_time = 0;
  runForever = false;
  hasConverged = false;
  n = 0;
  finished = false;
  nextUpdateInd = 0;
  useFixedSampleSet = false;

  isMultiSample = false;

  validationfile = NULL;

  trainset = NULL;

  regularization_error = 0;
  sum_dual = 0;
  sum_alpha_loss = 0;
  sum_w_sqr = 0;
  sum_w_scale = 1;
}
Esempio n. 14
0
/* Implements using omp lock functions
 * */
void foo_locks(long long int n) {
	long long int a=0;
	long long int i;

	omp_lock_t my_lock;

	// init lock
	omp_init_lock(&my_lock);

	double time = omp_get_wtime();
	#pragma omp parallel for schedule(static) shared(a)	
	for(i = 0; i < n; i++) 
	{	
		omp_set_lock(&my_lock);
		a+=1;
		omp_unset_lock(&my_lock);
	}
	omp_destroy_lock(&my_lock);
	
	time = omp_get_wtime() - time;
	printf("Final value = %d \n ", a);
	printf("Locks: Total time = %f seconds \n ", time);
} // end foo_locks
Esempio n. 15
0
triangulation triangulate_cube_random(data_list * data) {
  int dim = data_list_dim(data);
  cube_points cube = gen_cube_points(dim); 
  triangulation result;
  omp_lock_t result_lock;
  omp_init_lock(&result_lock);  //If we found a triangulation, use this lock!

  facet_acute_data parameters; //Parameters for conform_
  triangulation tmp_triang; //Triangulation we are expanding in current thread
  ptetra tet_list; //List of tetrahedrons, used in the parallel section
  unsigned short tet_list_len; //Holds the length of this list
  int triangulation_found = 0; //Stop if one of the threads has found a triangulation!
  int rand_bound, i;
  unsigned short tet_max, tet_min, tet_rand, tet_add;

  size_t max_volume;
  //Start the parallel loop!
#pragma omp parallel default(none) \
  private(parameters, tmp_triang, tet_list, tet_list_len, rand_bound, i,max_volume,tet_max, tet_min, tet_rand, tet_add) \
  shared(result, result_lock, cube,data,dim, triangulation_found)
  {
    //Initalization for each thread
    parameters.cube = &cube;
    parameters.boundary_func = &triangle_boundary_cube;
    parameters.data = data;
    parameters.store_acute_ind = 1;
    parameters.acute_ind  = malloc(sizeof(vert_index) * cube.len);

    tet_list = malloc(sizeof(tetra) * cube.len);
    max_volume = 0;

    while (!triangulation_found) { //Not found a triangulation
      //Initalize the triangulation variables
      tmp_triang = triangulation_init(dim);
      tet_list_len = 0;
      //Start triangle (0,0,0), (rand,0,0), (rand,rand,0)
      tmp_triang.bound_len = 1;
      tmp_triang.bound_tri = triangulation_start_facet(data);
      //printf("Thread %d with iteration %zu starts with:\n", omp_get_thread_num(), ++iterations);
      //print_triangle(tmp_triang.bound_tri);

      //While we have triangles on the boundary
      while (tmp_triang.bound_len > 0) {
        /*
         * We are going to add a tetrahedron on the boundary triangle.
         * To do so, we select a random triangle on the boundary. Then we generate all the
         * acute tetrahedra (above and below) with facets in our possible list.
         * From this list we remove all the tetrahedrons that intersect with our current triangulation.
         * Then we add a random tetrahedron to our triangulation and repeat.
         */
        rand_bound = rand() % tmp_triang.bound_len;
        //
        //Calculate the conform tetrahedrons above and below
        if (!facet_conform(tmp_triang.bound_tri + rand_bound, &parameters))
          break; //Triangle on the boundary that does not have a conform facet

        tet_list_len = parameters.acute_ind_len;
        //Form explicit list of the tetrahedrons
        for (i = 0; i < tet_list_len; i++) 
        {
          copyArr3(tet_list[i].vertices[0], tmp_triang.bound_tri[rand_bound].vertices[0]);
          copyArr3(tet_list[i].vertices[1], tmp_triang.bound_tri[rand_bound].vertices[1]);
          copyArr3(tet_list[i].vertices[2], tmp_triang.bound_tri[rand_bound].vertices[2]);
          copyArr3(tet_list[i].vertices[3], cube.points[parameters.acute_ind[i]]);
        }

        //Remove all the tetrahedrons that intersect with current triangulation.
        filter_tet_list_disjoint_triangulation(tet_list, &tet_list_len, &tmp_triang);

        if (tet_list_len == 0) 
          break; //We can not find a conform tetrahedron for this boundary.. Restart

        //Select a ttetrahedron from the tet_list to add to the triangulation.. Different approaches.
        //Combinations between: random tetra, smallest volume, maximum volume. Indices stored in tet_max, tet_min and tet_rand

        tet_list_min_max_volume(tet_list, tet_list_len, &tet_max, &tet_min);
        tet_rand = rand() % tet_list_len; 

        switch (omp_get_thread_num() % 6) {
          case 0: //Choose tet with max volume
            tet_add = tet_max; break;
          case 1: //Choose tet with min volume
            tet_add = tet_min; break;
          case 2: //Choose random tet
            tet_add = tet_rand; break;
          case 3: //Choose either max or min (random)
            tet_add = (rand() % 2)? tet_min : tet_max; break;
          case 4: //Choose either max or rand
            tet_add = (rand() % 5)? tet_max : tet_rand; break;
          case 5: //Either min or rand
            tet_add = (rand() % 5)? tet_min : tet_rand; break;
          default:
            tet_add = 0;
        }

        /*
         * Add the above tetra to the triangulation.
         * This removes all the boundary triangles that are covered by this tetrahedron
         */
        add_tet_triangulation(tet_list + tet_add,&tmp_triang);
      }
      if (triangulation_volume(&tmp_triang) > max_volume) {
        max_volume = triangulation_volume(&tmp_triang);
        printf("Record for thread %d using method %d amount: %zu\n", omp_get_thread_num(), omp_get_thread_num() % 6, max_volume);
        triangulation_print(&tmp_triang);
      }
      if (tmp_triang.bound_len == 0)
      {
        printf("FOUND A TRIANGULATION!!!\n");
        triangulation_print(&tmp_triang);
        omp_set_lock(&result_lock);
        result = tmp_triang;
        triangulation_found = 1;
        omp_unset_lock(&result_lock);
      } else 
        triangulation_free(&tmp_triang);
    }
    free(parameters.acute_ind);
    free(tet_list);
  }
  free(cube.points);
  omp_destroy_lock(&result_lock);
  return result;
}
Esempio n. 16
0
void Bucket_init(Bucket* self) {
    self->used = 0;
    self->size = 4;
    self->data = (float*) malloc(self->size * sizeof(float));
    omp_init_lock(&self->lock);
}
Esempio n. 17
0
File: kd.c Progetto: N-BodyShop/fof
int kdFoF(KD kd,float fEps)
{
	PARTICLE *p;
	KDN *c;
	int pi,pj,pn,cp;

	int iGroup;

	int *Fifo,iHead,iTail,nFifo;
	float fEps2;
	float dx,dy,dz,x,y,z,lx,ly,lz,sx,sy,sz,fDist2;
#ifdef _OPENMP
    int idSelf;
    omp_lock_t *locks;

	for (pn=0;pn<kd->nActive;++pn) kd->p[pn].iTouched = -1;
    /* We really want to make an independent lock for each particle.  However, each lock
     * seems to use a buttload of memory (something like 312 bytes per lock).  Therefore,
     * to ensure that we don't use too much memory, only use 1 lock per 100 particles.
     * This should still provide very low lock contention while not using oodles of 
     * memory at the same time, since it is extremely rare that two threads will be looking
     * two particles that map to the same lock at the same time.*/
    kd->nHash = (int)(kd->nActive/100);
    locks = (omp_lock_t *)malloc(kd->nHash*sizeof(omp_lock_t));
    assert(locks != NULL);
    for (pn=0;pn<kd->nHash;++pn) omp_init_lock(&locks[pn]);
#endif

	p = kd->p;
	c = kd->kdNodes;
	lx = kd->fPeriod[0];
	ly = kd->fPeriod[1];
	lz = kd->fPeriod[2];
	fEps2 = fEps*fEps;
	for (pn=0;pn<kd->nActive;++pn) p[pn].iGroup = 0;
#pragma omp parallel default(none) shared(kd,locks,p,c,lx,ly,lz,fEps2) \
    private(pi,pj,pn,cp,iGroup,Fifo,iHead,iTail,dx,dy,dz,x,y,z,sx,sy,sz,fDist2,idSelf,nFifo)
  {
#ifdef _OPENMP
    nFifo = kd->nActive/omp_get_num_threads();
    idSelf = omp_get_thread_num();
#else
    nFifo = kd->nActive;
#endif
	Fifo = (int *)malloc(nFifo*sizeof(int));
	assert(Fifo != NULL);
	iHead = 0;
	iTail = 0;
	iGroup = 0;
#pragma omp for schedule(runtime)
	for (pn=0;pn<kd->nActive;++pn) {
		if (p[pn].iGroup) continue;
		/*
		 ** Mark it and add to the do-fifo.
		 */
#ifdef _OPENMP
        omp_set_lock(&locks[_hashLock(kd,pn)]);
        if (p[pn].iTouched >= 0 && p[pn].iTouched < idSelf ) {
            assert(p[pn].iGroup > 0);
            omp_unset_lock(&locks[_hashLock(kd,pn)]);
            continue;
        }
        p[pn].iTouched = idSelf;
        iGroup = pn+1;
		p[pn].iGroup = iGroup;
        omp_unset_lock(&locks[_hashLock(kd,pn)]);
#else
		++iGroup;
		p[pn].iGroup = iGroup;
#endif
		Fifo[iTail++] = pn;
		if (iTail == nFifo) iTail = 0;
		while (iHead != iTail) {
			pi = Fifo[iHead++];
			if (iHead == nFifo) iHead = 0;
			/*
			 ** Now do an fEps-Ball Gather!
			 */
			x = p[pi].r[0];
			y = p[pi].r[1];
			z = p[pi].r[2];
			cp = ROOT;
			while (1) {
				INTERSECT(c,cp,fEps2,lx,ly,lz,x,y,z,sx,sy,sz);
				/*
				 ** We have an intersection to test.
				 */
				if (c[cp].iDim >= 0) {
					cp = LOWER(cp);
					continue;
					}
				else {
					for (pj=c[cp].pLower;pj<=c[cp].pUpper;++pj) {
#ifdef _OPENMP
                        if (p[pj].iGroup == iGroup) {
                            /* We have already looked at this particle */
                            //assert(p[pj].iTouched == idSelf);  particle is not locked.
                            continue;
                        }
                        if (p[pj].iTouched >= 0 && p[pj].iTouched < idSelf) {
                            /* Somebody more important than us is already looking at this
                             * particle.  However, we do not yet know if this particle belongs
                             * in our group, so just skip it to save time but don't restart the
                             * entire group. */
                            // assert(p[pj].iGroup > 0); particle is not locked
                            continue;
                        }
#else
						if (p[pj].iGroup) continue;
#endif
						dx = sx - p[pj].r[0];
						dy = sy - p[pj].r[1];
						dz = sz - p[pj].r[2];
						fDist2 = dx*dx + dy*dy + dz*dz;
						if (fDist2 < fEps2) {
							/*
							 ** Mark it and add to the do-fifo.
							 */
#ifdef _OPENMP
                            omp_set_lock(&locks[_hashLock(kd,pj)]);
                            if (p[pj].iTouched >= 0 && p[pj].iTouched < idSelf) {
                                /* Now we know this particle should be in our group.  If somebody more
                                 * important than us touched it, about the entire group. */
                                assert(p[pj].iGroup > 0);
                                omp_unset_lock(&locks[_hashLock(kd,pj)]);
                                iHead = iTail;
                                /*printf("Thread %d: Aborting group %d. p[%d].iOrder  p.iGroup=%d  p.iTouched=%d (Per-Particle2)\n",
                                  idSelf, iGroup, pj, p[pj].iOrder, p[pj].iGroup, p[pj].iTouched);*/
                                goto RestartSnake;
                            }
                            p[pj].iTouched = idSelf;
							p[pj].iGroup = iGroup;
                            omp_unset_lock(&locks[_hashLock(kd,pj)]);
#else
							p[pj].iGroup = iGroup;
#endif
							Fifo[iTail++] = pj;
							if (iTail == nFifo) iTail = 0;
							}
						}
					SETNEXT(cp);
					if (cp == ROOT) break;
					continue;
					}
			ContainedCell:
				for (pj=c[cp].pLower;pj<=c[cp].pUpper;++pj) {
#ifdef _OPENMP
                    if (p[pj].iGroup == iGroup) continue;
                    if (p[pj].iTouched >= 0 && p[pj].iTouched < idSelf) {
                        /* Somebody more important that us is already looking at this
                         * group.  Abort this entire group! */
                        //assert(p[pj].iGroup > 0); particle is not locked
                        iHead = iTail;
                        /*printf("Thread %d: Aborting group %d. p[%d].iOrder=%d  p.iGroup=%d  p.iTouched=%d (Per-Cell1)\n",
                          idSelf, iGroup, pj, p[pj].iOrder, p[pj].iGroup, p[pj].iTouched);*/
                        goto RestartSnake;
                    }
#else
					if (p[pj].iGroup) continue;
#endif                    
					/*
					 ** Mark it and add to the do-fifo.
					 */
#ifdef _OPENMP
                    omp_set_lock(&locks[_hashLock(kd,pj)]);
                    if (p[pj].iTouched >= 0 && p[pj].iTouched < idSelf) {
                        /* Check again in case somebody touched it before the lock. */
                        assert(p[pj].iGroup > 0);
                        omp_unset_lock(&locks[_hashLock(kd,pj)]);
                        iHead = iTail;
                        /*printf("Thread %d: Aborting group %d.  p[%d].iGroup=%d  p[%d].iTouched=%d (Per-Cell2)\n",
                          idSelf, iGroup, pj, p[pj].iGroup, pj, p[pj].iTouched);*/
                        goto RestartSnake;
                    }
                    p[pj].iTouched = idSelf;
                    p[pj].iGroup = iGroup;
                    omp_unset_lock(&locks[_hashLock(kd,pj)]);
#else
					p[pj].iGroup = iGroup;
#endif
					Fifo[iTail++] = pj;
					if (iTail == nFifo) iTail = 0;
					}
			GetNextCell:
				SETNEXT(cp);
				if (cp == ROOT) break;
            }
        } /* End while(iHead != iTail) */
#ifdef _OPENMP
    RestartSnake:
#endif
        assert(iHead == iTail);
    }
	free(Fifo);
  }  /* End of the OpenMP PARALLEL section */

#ifdef _OPENMP
    /* Now we have count how many groups there are.  This is straightforward,
     * since the number of groups is the number of particles whose groupID equals
     * their particleID+1. */
    pj = 0;
	for (pn=0;pn<kd->nActive;++pn)
        if (p[pn].iGroup == pn+1) ++pj;
    kd->nGroup = (kd->nActive)+1;
    free(locks);
#else
	kd->nGroup = iGroup+1;
#endif
	return(kd->nGroup-1);
	}
Esempio n. 18
0
 OpenMPCounter::OpenMPCounter(size_type init)
         : _counter(init), destroyed(false)
 {
     omp_init_lock(&_lock);
 }
Esempio n. 19
0
 OpenMPCounter::OpenMPCounter()
         : _counter(0), destroyed(false)
 {
     omp_init_lock(&_lock);
 }
void vertex_betweenness_centrality_parBFS(graph_t* G, double* BC, long numSrcs) {

    attr_id_t *S;      /* stack of vertices in the order of non-decreasing 
                          distance from s. Also used to implicitly 
                          represent the BFS queue */
    plist_t* P;        /* predecessors of a vertex v on shortest paths from s */
    double* sig;       /* No. of shortest paths */
    attr_id_t* d;      /* Length of the shortest path between every pair */
    double* del;       /* dependency of vertices */
    attr_id_t *in_degree, *numEdges, *pSums;
    attr_id_t* pListMem;    
#if RANDSRCS
    attr_id_t* Srcs; 
#endif
    attr_id_t *start, *end;
    long MAX_NUM_PHASES;
    attr_id_t *psCount;

#ifdef _OPENMP    
    omp_lock_t* vLock;
    long chunkSize;
#endif
#ifdef DIAGNOSTIC
    double elapsed_time;
#endif
    int seed = 2387;

#ifdef _OPENMP    
#pragma omp parallel firstprivate(G)
    {
#endif

        attr_id_t *myS, *myS_t;
        attr_id_t myS_size;
        long i, j, k, p, count, myCount;
        long v, w, vert;
        long k0, k1;
        long numV, num_traversals, n, m, phase_num;
        long start_iter, end_iter;
        long tid, nthreads;
        int* stream;
#ifdef DIAGNOSTIC
        double elapsed_time_part;
#endif

#ifdef _OPENMP
        int myLock;
        tid = omp_get_thread_num();
        nthreads = omp_get_num_threads();
#else
        tid = 0;
        nthreads = 1;
#endif

#ifdef DIAGNOSTIC
        if (tid == 0) {
            elapsed_time = get_seconds();
            elapsed_time_part = get_seconds();
        }
#endif

        /* numV: no. of vertices to run BFS from = numSrcs */
        numV = numSrcs;
        n = G->n;
        m = G->m;

        /* Permute vertices */
        if (tid == 0) {
#if RANDSRCS
            Srcs = (attr_id_t *) malloc(n*sizeof(attr_id_t));
#endif
#ifdef _OPENMP
            vLock = (omp_lock_t *) malloc(n*sizeof(omp_lock_t));
#endif
        }

#ifdef _OPENMP   
#pragma omp barrier
#pragma omp for
        for (i=0; i<n; i++) {
            omp_init_lock(&vLock[i]);
        }
#endif

        /* Initialize RNG stream */ 
        stream = init_sprng(0, tid, nthreads, seed, SPRNG_DEFAULT);

#if RANDSRCS
#ifdef _OPENMP
#pragma omp for
#endif
        for (i=0; i<n; i++) {
            Srcs[i] = i;
        }

#ifdef _OPENMP
#pragma omp for
#endif
        for (i=0; i<n; i++) {
            j = n * sprng(stream);
            if (i != j) {
#ifdef _OPENMP
                int l1 = omp_test_lock(&vLock[i]);
                if (l1) {
                    int l2 = omp_test_lock(&vLock[j]);
                    if (l2) {
#endif
                        k = Srcs[i];
                        Srcs[i] = Srcs[j];
                        Srcs[j] = k;
#ifdef _OPENMP  
                        omp_unset_lock(&vLock[j]);
                    }
                    omp_unset_lock(&vLock[i]);
                }
#endif        
            }
        } 
#endif

#ifdef _OPENMP    
#pragma omp barrier
#endif

        if (tid == 0) {
            MAX_NUM_PHASES = 500;
        }

#ifdef _OPENMP
#pragma omp barrier    
#endif

        /* Initialize predecessor lists */

        /* The size of the predecessor list of each vertex is bounded by 
           its in-degree. So we first compute the in-degree of every
           vertex */ 

        if (tid == 0) {
            P   = (plist_t  *) calloc(n, sizeof(plist_t));
            in_degree = (attr_id_t *) calloc(n+1, sizeof(attr_id_t));
            numEdges = (attr_id_t *) malloc((n+1)*sizeof(attr_id_t));
            pSums = (attr_id_t *) malloc(nthreads*sizeof(attr_id_t));
        }

#ifdef _OPENMP
#pragma omp barrier
#pragma omp for
#endif
        for (i=0; i<m; i++) {
            v = G->endV[i];
#ifdef _OPENMP
            omp_set_lock(&vLock[v]);
#endif
            in_degree[v]++;
#ifdef _OPENMP
            omp_unset_lock(&vLock[v]);
#endif
        }

        prefix_sums(in_degree, numEdges, pSums, n);

        if (tid == 0) {
            pListMem = (attr_id_t *) malloc(m*sizeof(attr_id_t));
        }

#ifdef _OPENMP
#pragma omp barrier
#pragma omp for
#endif
        for (i=0; i<n; i++) {
            P[i].list = pListMem + numEdges[i];
            P[i].degree = in_degree[i];
            P[i].count = 0;
        }

#ifdef DIAGNOSTIC
        if (tid == 0) {
            elapsed_time_part = get_seconds() -elapsed_time_part;
            fprintf(stderr, "In-degree computation time: %lf seconds\n", 
                    elapsed_time_part);
            elapsed_time_part = get_seconds();
        }
#endif

        /* Allocate shared memory */ 
        if (tid == 0) {
            free(in_degree);
            free(numEdges);
            free(pSums);

            S   = (attr_id_t *) malloc(n*sizeof(attr_id_t));
            sig = (double *) malloc(n*sizeof(double));
            d   = (attr_id_t *) malloc(n*sizeof(attr_id_t));
            del = (double *) calloc(n, sizeof(double));

            start = (attr_id_t *) malloc(MAX_NUM_PHASES*sizeof(attr_id_t));
            end = (attr_id_t *) malloc(MAX_NUM_PHASES*sizeof(attr_id_t));
            psCount = (attr_id_t *) malloc((nthreads+1)*sizeof(attr_id_t));
        }

        /* local memory for each thread */  
        myS_size = (2*n)/nthreads;
        myS = (attr_id_t *) malloc(myS_size*sizeof(attr_id_t));
        num_traversals = 0;
        myCount = 0;

#ifdef _OPENMP    
#pragma omp barrier
#endif

#ifdef _OPENMP    
#pragma omp for
#endif
        for (i=0; i<n; i++) {
            d[i] = -1;
        }

#ifdef DIAGNOSTIC
        if (tid == 0) {
            elapsed_time_part = get_seconds() - elapsed_time_part;
            fprintf(stderr, "BC initialization time: %lf seconds\n", 
                    elapsed_time_part);
            elapsed_time_part = get_seconds();
        }
#endif

        for (p=0; p<n; p++) {
#if RANDSRCS
            i = Srcs[p];
#else
            i = p;
#endif
            if (G->numEdges[i+1] - G->numEdges[i] == 0) {
                continue;
            } else {
                num_traversals++;
            }

            if (num_traversals == numV + 1) {
                break;
            }

            if (tid == 0) {
                sig[i] = 1;
                d[i] = 0;
                S[0] = i;
                start[0] = 0;
                end[0] = 1;
            }

            count = 1;
            phase_num = 0;

#ifdef _OPENMP       
#pragma omp barrier
#endif

            while (end[phase_num] - start[phase_num] > 0) {

                myCount = 0;
                start_iter = start[phase_num];
                end_iter = end[phase_num];
#ifdef _OPENMP
#pragma omp barrier
#pragma omp for schedule(dynamic) nowait
#endif
                for (vert = start_iter; vert < end_iter; vert++) {
                    v = S[vert];
                    for (j=G->numEdges[v]; j<G->numEdges[v+1]; j++) {

                        w = G->endV[j];
                        if (v != w) {

#ifdef _OPENMP                            
                            myLock = omp_test_lock(&vLock[w]);
                            if (myLock) { 
#endif             
                                /* w found for the first time? */ 
                                if (d[w] == -1) {
                                    if (myS_size == myCount) {
                                        /* Resize myS */
                                        myS_t = (attr_id_t *)
                                            malloc(2*myS_size*sizeof(attr_id_t));
                                        memcpy(myS_t, myS, 
                                                myS_size*sizeof(attr_id_t));
                                        free(myS);
                                        myS = myS_t;
                                        myS_size = 2*myS_size;
                                    }
                                    myS[myCount++] = w;
                                    d[w] = d[v] + 1;
                                    sig[w] = sig[v];
                                    P[w].list[P[w].count++] = v;
                                } else if (d[w] == d[v] + 1) {
                                    sig[w] += sig[v];
                                    P[w].list[P[w].count++] = v;
                                }
#ifdef _OPENMP  

                                omp_unset_lock(&vLock[w]);
                            } else {
                                if ((d[w] == -1) || (d[w] == d[v]+ 1)) {
                                    omp_set_lock(&vLock[w]);
                                    sig[w] += sig[v];
                                    P[w].list[P[w].count++] = v;
                                    omp_unset_lock(&vLock[w]);
                                }
                            }
#endif

                        }
                    }
                }
                /* Merge all local stacks for next iteration */
                phase_num++; 
                if (tid == 0) {
                    if (phase_num >= MAX_NUM_PHASES) {
                        fprintf(stderr, "Error: Max num phases set to %ld\n",
                                MAX_NUM_PHASES);
                        fprintf(stderr, "Diameter of input network greater than"
                                " this value. Increase MAX_NUM_PHASES"
                                " in vertex_betweenness_centrality_parBFS()\n");
                        exit(-1);
                    }
                }

                psCount[tid+1] = myCount;

#ifdef _OPENMP
#pragma omp barrier
#endif

                if (tid == 0) {
                    start[phase_num] = end[phase_num-1];
                    psCount[0] = start[phase_num];
                    for(k=1; k<=nthreads; k++) {
                        psCount[k] = psCount[k-1] + psCount[k];
                    }
                    end[phase_num] = psCount[nthreads];
                }



#ifdef _OPENMP
#pragma omp barrier
#endif

                k0 = psCount[tid]; 
                k1 = psCount[tid+1];
                for (k = k0; k < k1; k++) {
                    S[k] = myS[k-k0];
                } 

                count = end[phase_num];
            }

            phase_num--;

            while (phase_num > 0) {
                start_iter = start[phase_num];
                end_iter = end[phase_num];
#ifdef _OPENMP        
#pragma omp for schedule(static) nowait
#endif
                for (j=start_iter; j<end_iter; j++) {
                    w = S[j];
                    for (k = 0; k<P[w].count; k++) {
                        v = P[w].list[k];
#ifdef _OPENMP
                        omp_set_lock(&vLock[v]);
#endif
                        del[v] = del[v] + sig[v]*(1+del[w])/sig[w];
#ifdef _OPENMP
                        omp_unset_lock(&vLock[v]);
#endif
                    }
                    BC[w] += del[w];
                }

                phase_num--;

#ifdef _OPENMP
#pragma omp barrier
#endif            
            }


#ifdef _OPENMP
            chunkSize = n/nthreads;
#pragma omp for schedule(static, chunkSize) nowait
#endif
            for (j=0; j<count; j++) {
                w = S[j];
                d[w] = -1;
                del[w] = 0;
                P[w].count = 0;
            }


#ifdef _OPENMP
#pragma omp barrier
#endif

        }

#ifdef DIAGNOSTIC
        if (tid == 0) {
            elapsed_time_part = get_seconds() - elapsed_time_part;
            fprintf(stderr, "BC computation time: %lf seconds\n", 
                    elapsed_time_part);
        }
#endif


#ifdef _OPENMP
#pragma omp barrier
#endif

#ifdef _OPENMP
#pragma omp for
        for (i=0; i<n; i++) {
            omp_destroy_lock(&vLock[i]);
        }
#endif

        free(myS);

        if (tid == 0) { 
            free(S);
            free(pListMem);
            free(P);
            free(sig);
            free(d);
            free(del);
#ifdef _OPENMP
            free(vLock);
#endif
            free(start);
            free(end);
            free(psCount);

#ifdef DIAGNOSTIC
            elapsed_time = get_seconds() - elapsed_time;
            fprintf(stderr, "Time taken: %lf\n seconds", elapsed_time);
#endif

#if RANDSRCS
            free(Srcs);
#endif
        }

        free_sprng(stream);
#ifdef _OPENMP
    }    
#endif

}
void vertex_betweenness_centrality_simple(graph_t* G, double* BC, long numSrcs) {

    attr_id_t *in_degree, *numEdges, *pSums;
#if RANDSRCS
    attr_id_t* Srcs; 
#endif
    long num_traversals = 0;
#ifdef _OPENMP    
    omp_lock_t* vLock;
    long chunkSize;
#endif
#ifdef DIAGNOSTIC
    double elapsed_time;
#endif
    int seed = 2387;

    /* The outer loop is parallelized in this case. Each thread does a BFS 
       and the vertex BC values are incremented atomically */   
#ifdef _OPENMP
#pragma omp parallel firstprivate(G)
    {
#endif
        attr_id_t *S;      /* stack of vertices in the order of non-decreasing 
                              distance from s. Also used to implicitly 
                              represent the BFS queue */
        plist_t* P;          /* predecessors of a vertex v on shortest paths 
                                from s */
        attr_id_t* pListMem;    
        double* sig;       /* No. of shortest paths */
        attr_id_t* d;      /* Length of the shortest path between every pair */
        double* del;       /* dependency of vertices */
        attr_id_t *start, *end;
        long MAX_NUM_PHASES;

        long i, j, k, p, count;
        long v, w, vert;
        long numV, n, m, phase_num;
        long tid, nthreads;
        int* stream;
#ifdef DIAGNOSTIC
        double elapsed_time_part;
#endif

#ifdef _OPENMP
        int myLock;
        tid = omp_get_thread_num();
        nthreads = omp_get_num_threads();
#else
        tid = 0;
        nthreads = 1;
#endif

#ifdef DIAGNOSTIC
        if (tid == 0) {
            elapsed_time = get_seconds();
            elapsed_time_part = get_seconds();
        }
#endif

        /* numV: no. of vertices to run BFS from = numSrcs */
        numV = numSrcs;
        n = G->n;
        m = G->m;

        /* Permute vertices */
        if (tid == 0) {
#if RANDSRCS
            Srcs = (attr_id_t *) malloc(n*sizeof(attr_id_t));
#endif
#ifdef _OPENMP
            vLock = (omp_lock_t *) malloc(n*sizeof(omp_lock_t));
#endif
        }

#ifdef _OPENMP   
#pragma omp barrier
#pragma omp for
        for (i=0; i<n; i++) {
            omp_init_lock(&vLock[i]);
        }
#endif

        /* Initialize RNG stream */ 
        stream = init_sprng(0, tid, nthreads, seed, SPRNG_DEFAULT);

#if RANDSRCS
#ifdef _OPENMP
#pragma omp for
#endif
        for (i=0; i<n; i++) {
            Srcs[i] = i;
        }

#ifdef _OPENMP
#pragma omp for
#endif
        for (i=0; i<n; i++) {
            j = n * sprng(stream);
            if (i != j) {
#ifdef _OPENMP
                int l1 = omp_test_lock(&vLock[i]);
                if (l1) {
                    int l2 = omp_test_lock(&vLock[j]);
                    if (l2) {
#endif
                        k = Srcs[i];
                        Srcs[i] = Srcs[j];
                        Srcs[j] = k;
#ifdef _OPENMP  
                        omp_unset_lock(&vLock[j]);
                    }
                    omp_unset_lock(&vLock[i]);
                }
#endif        
            }
        } 
#endif

#ifdef _OPENMP    
#pragma omp barrier
#endif

        MAX_NUM_PHASES = 50;

        /* Initialize predecessor lists */

        /* The size of the predecessor list of each vertex is bounded by 
           its in-degree. So we first compute the in-degree of every
           vertex */ 

        if (tid == 0) {
            in_degree = (attr_id_t *) calloc(n+1, sizeof(attr_id_t));
            numEdges = (attr_id_t *) malloc((n+1)*sizeof(attr_id_t));
            pSums = (attr_id_t *) malloc(nthreads*sizeof(attr_id_t));
        }


#ifdef _OPENMP
#pragma omp barrier
#pragma omp for
#endif
        for (i=0; i<m; i++) {
            v = G->endV[i];
#ifdef _OPENMP
            omp_set_lock(&vLock[v]);
#endif
            in_degree[v]++;
#ifdef _OPENMP
            omp_unset_lock(&vLock[v]);
#endif
        }

        prefix_sums(in_degree, numEdges, pSums, n);

        P  = (plist_t  *) calloc(n, sizeof(plist_t));
        pListMem = (attr_id_t *) malloc(m*sizeof(attr_id_t));

        for (i=0; i<n; i++) {
            P[i].list = pListMem + numEdges[i];
            P[i].degree = in_degree[i];
            P[i].count = 0;
        }

#ifdef DIAGNOSTIC
        if (tid == 0) {
            elapsed_time_part = get_seconds() -elapsed_time_part;
            fprintf(stderr, "In-degree computation time: %lf seconds\n", 
                    elapsed_time_part);
            elapsed_time_part = get_seconds();
        }
#endif

#ifdef _OPENMP
#pragma omp barrier
#endif

        /* Allocate shared memory */ 
        if (tid == 0) {
            free(in_degree);
            free(numEdges);
            free(pSums);
        }

        S   = (attr_id_t *) malloc(n*sizeof(attr_id_t));
        sig = (double *) malloc(n*sizeof(double));
        d   = (attr_id_t *) malloc(n*sizeof(attr_id_t));
        del = (double *) calloc(n, sizeof(double));

        start = (attr_id_t *) malloc(MAX_NUM_PHASES*sizeof(attr_id_t));
        end = (attr_id_t *) malloc(MAX_NUM_PHASES*sizeof(attr_id_t));

#ifdef _OPENMP   
#pragma omp barrier
#endif

        for (i=0; i<n; i++) {
            d[i] = -1;
        }

#ifdef DIAGNOSTIC
        if (tid == 0) {
            elapsed_time_part = get_seconds() - elapsed_time_part;
            fprintf(stderr, "BC initialization time: %lf seconds\n", 
                    elapsed_time_part);
            elapsed_time_part = get_seconds();
        }
#endif

#ifdef _OPENMP
#pragma omp for reduction(+:num_traversals)
#endif
        for (p=0; p<numV; p++) {
#if RANDSRCS
            i = Srcs[p];
#else
            i = p;
#endif
            if (G->numEdges[i+1] - G->numEdges[i] == 0) {
                continue;
            } else {
                num_traversals++;
            }

            sig[i] = 1;
            d[i] = 0;
            S[0] = i;
            start[0] = 0;
            end[0] = 1;

            count = 1;
            phase_num = 0;

            while (end[phase_num] - start[phase_num] > 0) {

                for (vert = start[phase_num]; vert < end[phase_num]; vert++) {
                    v = S[vert];
                    for (j=G->numEdges[v]; j<G->numEdges[v+1]; j++) {
                        w = G->endV[j];
                        if (v != w) {
                            /* w found for the first time? */ 
                            if (d[w] == -1) {
                                S[count++] = w;
                                d[w] = d[v] + 1;
                                sig[w] = sig[v];
                                P[w].list[P[w].count++] = v;
                            } else if (d[w] == d[v] + 1) {
                                sig[w] += sig[v];
                                P[w].list[P[w].count++] = v;
                            }
                        }
                    }
                }

                phase_num++; 

                start[phase_num] = end[phase_num-1];
                end[phase_num] = count;
            }

            phase_num--;

            while (phase_num > 0) {
                for (j=start[phase_num]; j<end[phase_num]; j++) {
                    w = S[j];
                    for (k = 0; k<P[w].count; k++) {
                        v = P[w].list[k];
                        del[v] = del[v] + sig[v]*(1+del[w])/sig[w];
                    }
#ifdef _OPENMP
                    omp_set_lock(&vLock[w]);
                    BC[w] += del[w];
                    omp_unset_lock(&vLock[w]);
#else
                    BC[w] += del[w];
#endif
                }

                phase_num--;
            }

            for (j=0; j<count; j++) {
                w = S[j];
                d[w] = -1;
                del[w] = 0;
                P[w].count = 0;
            }

        }

#ifdef DIAGNOSTIC
        if (tid == 0) {
            elapsed_time_part = get_seconds() - elapsed_time_part;
            fprintf(stderr, "BC computation time: %lf seconds\n", 
                    elapsed_time_part);
        }
#endif


#ifdef _OPENMP
#pragma omp barrier
#endif

#ifdef _OPENMP
#pragma omp for
        for (i=0; i<n; i++) {
            omp_destroy_lock(&vLock[i]);
        }
#endif

        free(S);
        free(pListMem);
        free(P);
        free(sig);
        free(d);
        free(del);
        free(start);
        free(end);

        if (tid == 0) {

#ifdef _OPENMP
            free(vLock);
#endif

#if RANDSRCS
            free(Srcs);
#endif

#ifdef DIAGNOSTIC
            elapsed_time = get_seconds() - elapsed_time;
            fprintf(stderr, "Total time taken: %lf seconds\n", elapsed_time);
#endif

        }

        free_sprng(stream);

#ifdef _OPENMP
#pragma omp barrier
    }
#endif

}    
Esempio n. 22
0
File: lib-1.c Progetto: 0day-ci/gcc
int
main (void)
{
  double d, e;
  int l;
  omp_lock_t lck;
  omp_nest_lock_t nlck;

  d = omp_get_wtime ();

  omp_init_lock (&lck);
  omp_set_lock (&lck);
  if (omp_test_lock (&lck))
    abort ();
  omp_unset_lock (&lck);
  if (! omp_test_lock (&lck))
    abort ();
  if (omp_test_lock (&lck))
    abort ();
  omp_unset_lock (&lck);
  omp_destroy_lock (&lck);

  omp_init_nest_lock (&nlck);
  if (omp_test_nest_lock (&nlck) != 1)
    abort ();
  omp_set_nest_lock (&nlck);
  if (omp_test_nest_lock (&nlck) != 3)
    abort ();
  omp_unset_nest_lock (&nlck);
  omp_unset_nest_lock (&nlck);
  if (omp_test_nest_lock (&nlck) != 2)
    abort ();
  omp_unset_nest_lock (&nlck);
  omp_unset_nest_lock (&nlck);
  omp_destroy_nest_lock (&nlck);

  omp_set_dynamic (1);
  if (! omp_get_dynamic ())
    abort ();
  omp_set_dynamic (0);
  if (omp_get_dynamic ())
    abort ();

  omp_set_nested (1);
  if (! omp_get_nested ())
    abort ();
  omp_set_nested (0);
  if (omp_get_nested ())
    abort ();

  omp_set_num_threads (5);
  if (omp_get_num_threads () != 1)
    abort ();
  if (omp_get_max_threads () != 5)
    abort ();
  if (omp_get_thread_num () != 0)
    abort ();
  omp_set_num_threads (3);
  if (omp_get_num_threads () != 1)
    abort ();
  if (omp_get_max_threads () != 3)
    abort ();
  if (omp_get_thread_num () != 0)
    abort ();
  l = 0;
#pragma omp parallel reduction (|:l)
  {
    l = omp_get_num_threads () != 3;
    l |= omp_get_thread_num () < 0;
    l |= omp_get_thread_num () >= 3;
#pragma omp master
    l |= omp_get_thread_num () != 0;
  }
  if (l)
    abort ();

  if (omp_get_num_procs () <= 0)
    abort ();
  if (omp_in_parallel ())
    abort ();
#pragma omp parallel reduction (|:l)
  l = ! omp_in_parallel ();
#pragma omp parallel reduction (|:l) if (1)
  l = ! omp_in_parallel ();
  if (l)
    abort ();

  e = omp_get_wtime ();
  if (d > e)
    abort ();
  d = omp_get_wtick ();
  /* Negative precision is definitely wrong,
     bigger than 1s clock resolution is also strange.  */
  if (d <= 0 || d > 1)
    abort ();

  return 0;
}
Esempio n. 23
0
double computeGraph(graph* G, graphSDG* SDGdata) {

    VERT_T* endV;
    LONG_T *degree, *numEdges, *pos, *pSums;
    WEIGHT_T* w;
    double elapsed_time;

#ifdef _OPENMP
    omp_lock_t *vLock;
    LONG_T chunkSize;
#endif

    elapsed_time = get_seconds();

#ifdef _OPENMP
    omp_set_num_threads(NUM_THREADS);
#endif

#ifdef _OPENMP
#pragma omp parallel
#endif    
{
    LONG_T i, j, u, n, m, tid, nthreads;
#ifdef DIAGNOSTIC
    double elapsed_time_part;
#endif
    
#ifdef _OPENMP    
    nthreads = omp_get_num_threads();
    tid = omp_get_thread_num();
#else
    tid = 0;
    nthreads = 1;
#endif

    n = N;
    m = M;
    
    if (tid == 0) {
#ifdef _OPENMP
        vLock = (omp_lock_t *) malloc(n*sizeof(omp_lock_t));
        assert(vLock != NULL);
        chunkSize = n/nthreads;
#endif
        pos = (LONG_T *) malloc(m*sizeof(LONG_T));
        assert(pos != NULL);
        degree = (LONG_T *) calloc(n, sizeof(LONG_T));
        assert(degree != NULL);
    }
  
#ifdef DIAGNOSTIC
    if (tid == 0) {
        elapsed_time_part = get_seconds();
    }
#endif
    
#ifdef _OPENMP    
#pragma omp barrier
    
    #pragma omp for schedule(static, chunkSize)
    for (i=0; i<n; i++) {
        omp_init_lock(&vLock[i]);
    }

    #pragma omp barrier
  
#ifdef DIAGNOSTIC
    if (tid == 0) {
        elapsed_time_part = get_seconds() - elapsed_time_part;
        fprintf(stderr, "Lock initialization time: %lf seconds\n",
                elapsed_time_part);
        elapsed_time_part = get_seconds();

    }
#endif
 
    #pragma omp for
#endif
    for (i=0; i<m; i++) {
        u = SDGdata->startVertex[i];
#ifdef _OPENMP        
        omp_set_lock(&vLock[u]);
#endif
        pos[i] = degree[u]++;
#ifdef _OPENMP
        omp_unset_lock(&vLock[u]);
#endif
    } 
   
#ifdef DIAGNOSTIC
    if (tid == 0) {
        elapsed_time_part = get_seconds() - elapsed_time_part;
        fprintf(stderr, "Degree computation time: %lf seconds\n",
                elapsed_time_part);
        elapsed_time_part = get_seconds();

    }
#endif
   
#ifdef _OPENMP
#pragma omp barrier

#pragma omp for schedule(static, chunkSize)
    for (i=0; i<n; i++) {
        omp_destroy_lock(&vLock[i]);
    }

    if (tid == 0) 
        free(vLock);
#endif
    
#ifdef DIAGNOSTIC
    if (tid == 0) {
        elapsed_time_part = get_seconds() - elapsed_time_part;
        fprintf(stderr, "Lock destruction time: %lf seconds\n",
                elapsed_time_part);
        elapsed_time_part = get_seconds();

    }
#endif
   
    if (tid == 0) {
        numEdges = (LONG_T *) malloc((n+1)*sizeof(LONG_T));
        pSums = (LONG_T *) malloc(nthreads*sizeof(LONG_T));
   }

#ifdef _OPENMP
#pragma omp barrier
#endif

    prefix_sums(degree, numEdges, pSums, n); 
    
#ifdef DIAGNOSTIC
    if (tid == 0) {
        elapsed_time_part = get_seconds() - elapsed_time_part;
        fprintf(stderr, "Prefix sums time: %lf seconds\n",
                elapsed_time_part);
        elapsed_time_part = get_seconds();

    }
#endif
 
#ifdef _OPENMP
#pragma omp barrier
#endif

    if (tid == 0) {
        free(degree);
        free(pSums);
        w = (WEIGHT_T *) malloc(m*sizeof(WEIGHT_T));
        endV = (VERT_T *) malloc(m* sizeof(VERT_T));
    }

#ifdef _OPENMP
    #pragma omp barrier

    #pragma omp for
#endif
    for (i=0; i<m; i++) {
        u = SDGdata->startVertex[i];
        j = numEdges[u] + pos[i];
        endV[j] = SDGdata->endVertex[i];
        //TODO: 
		//w[j] = SDGdata->weight[i]; 
		fprintf(stderr, "%d\n", SDGdata->weight[i]);
		w[j] = 1; 
    }
    
#ifdef DIAGNOSTIC
    if (tid == 0) {
        elapsed_time_part = get_seconds() - elapsed_time_part;
        fprintf(stderr, "Edge data structure construction time: %lf seconds\n",
                elapsed_time_part);
        elapsed_time_part = get_seconds();

    }
#endif
 
    if (tid == 0) {
        free(pos);
        G->n = n;
        G->m = m;
        G->numEdges = numEdges;
        G->endV = endV;
        G->weight = w;
    }
#ifdef _OPENMP    
#endif
}
    /* Verification */
#if 0 
    fprintf(stderr, "SDG data:\n");
    for (int i=0; i<SDGdata->m; i++) {
        fprintf(stderr, "[%ld %ld %ld] ", SDGdata->startVertex[i], 
                SDGdata->endVertex[i], SDGdata->weight[i]);
    }
 
    fprintf(stderr, "\n");

    for (int i=0; i<G->n + 1; i++) {
        fprintf(stderr, "[%ld] ", G->numEdges[i]);
    }
    
    fprintf(stderr, "\nGraph:\n");
    for (int i=0; i<G->n; i++) {
        for (int j=G->numEdges[i]; j<G->numEdges[i+1]; j++) {
            fprintf(stderr, "[%ld %ld %ld] ", i, G->endV[j], G->weight[j]);
        }
    }
#endif 
    
    free(SDGdata->startVertex);
    free(SDGdata->endVertex);
    free(SDGdata->weight);
    
    elapsed_time = get_seconds() - elapsed_time; 
    
    return elapsed_time;
}
Esempio n. 24
0
	MutexType(const MutexType& ) { omp_init_lock(&lock); }
Esempio n. 25
0
	MutexType() { omp_init_lock(&lock); }
Esempio n. 26
0
triangulation triangulate_cube(data_list * data,  char * tmp_triang_file, char * tmp_data_file) {
	printf("%s %s\n", tmp_triang_file, tmp_data_file);
  triangulation result = triangulation_init(data_list_dim(data));

  cube_points cube = gen_cube_points(result.dim);
  facet_acute_data parameters;
  parameters.cube = &cube;
  parameters.boundary_func = &triangle_boundary_cube;
  parameters.data = data;
  parameters.store_acute_ind = 1;
  parameters.acute_ind = malloc(sizeof(unsigned short) * cube.len);

  //This list holds all conform tetrahedrons for a given triangle, max size = cube.len
  ptetra tet_list = malloc(sizeof(tetra) * cube.len);
  unsigned short tet_list_len = 0;

  //Lists needed for the dynamic_remove loop
  tri_list check_list, check_list_new;

  check_list     = tri_list_init(result.dim, MEM_LIST_FALSE);
  check_list_new = tri_list_init(result.dim, MEM_LIST_FALSE);

  //Start triangle (0,0,0), (rand,0,0), (rand,rand,0)
  result.bound_len = 1;
  result.bound_tri = triangulation_start_facet(data);
  printf("Starting triangulation with facet:\n");
  print_triangle(result.bound_tri);
  /*
   * During this method we are going to operate data that is not thread-safe.
   * To avoid race conditions we need an array of locks. We use a lock for the
   * first two points of a triangle (so need 2d array of locks).
   */
  omp_lock_t ** locks = malloc(sizeof(omp_lock_t *) * cube.len);
  //Initalize the locks
  for (size_t i = 0; i < cube.len; i++){
    locks[i] = malloc(sizeof(omp_lock_t) * (cube.len - i));
    for (size_t j = 0; j < cube.len - i; j++)
      omp_init_lock(&locks[i][j]);
  }
  //While we have triangles on the boundary..
  while (result.bound_len > 0) {
    tri_list_empty(&check_list);
    tri_list_empty(&check_list_new);
    /*
     * We are going to add a tetrahedron on the boundary triangle.
     * To do so, we select a random triangle on the boundary. Then we generate all the
     * acute tetrahedra (above and below) with facets in our possible list.
     * From this list we remove all the tetrahedrons that intersect with our current triangulation.
     * Then we add a random tetrahedron to our triangulation, update the conform list and repeat.
     */
    int rand_bound = rand() % result.bound_len;
    printf("\n\nTotal amount of triangles left:%zu\nExpanding triangulation at boundary triangle: \n", data_list_count(data));
    print_triangle(result.bound_tri + rand_bound);

    //Calculate the conform tetrahedrons above and below
    if (!facet_conform(&result.bound_tri[rand_bound], &parameters))
    {
      printf("We have a triangle on the boundary that is not conform anymore.\n");
      printf("Whatthefuck? Breaking!\n");
      break;
    }

    tet_list_len = parameters.acute_ind_len;
    printf("Total amount of conform tetrahedrons found for this boundary: %hu\n", tet_list_len);
    //Form explicit list of the tetrahedrons
    for (unsigned short i = 0; i < tet_list_len; i++) 
    {
      copyArr3(tet_list[i].vertices[0], result.bound_tri[rand_bound].vertices[0]);
      copyArr3(tet_list[i].vertices[1], result.bound_tri[rand_bound].vertices[1]);
      copyArr3(tet_list[i].vertices[2], result.bound_tri[rand_bound].vertices[2]);
      copyArr3(tet_list[i].vertices[3], cube.points[parameters.acute_ind[i]]);
    }

    //Remove all the tetrahedrons that intersect with current triangulation.
    filter_tet_list_disjoint_triangulation(tet_list, &tet_list_len, &result);

    printf("Amount of tetrahedrons left after filtering: %hu\n\n",tet_list_len);
    if (tet_list_len == 0) {
      printf("Waarom is deze lijst nu al f*****g leeggefilterd?\n");
      printf("Dead end, helaas pindakaas. Got to %zu\n", result.tetra_len);
      break;
    }

    //Select random tetrahedron disjoint with the current triangulation
    int rand_tet = rand() % tet_list_len;
    /*
     * Add the above tetra to the triangulation.
     * This removes all the boundary triangles that are covered by this tetrahedron
     */
    printf("Adding the following tetra to the triangulation\n");
    print_tetra(tet_list + rand_tet);
    printf("\n\n");
    add_tet_triangulation(tet_list + rand_tet, &result);
    triangulation_print(&result);

    if (!result.bound_len) //If we have no boundaries left, we must be done!!
    {
      printf("No more boundaries left.. WE FINNISHED!??\n");
      break;
    }
    //Consistency check
    if (!triangulation_consistent(&result, &parameters))
    {
      printf("Triangulation not consistent after adding the tetrahedron. Breaking.\n");
      break;
    }
    /*
     * Calculate a list of all the triangles we are going to remove
     */
    double time_removed = omp_get_wtime();
    printf("Removing triangles not disjoint with new tetrahedron\n");
    size_t removed = filter_intersection_data_list_tet(data,  &check_list, tet_list + rand_tet, locks);
    printf("Removed %zu triangles that are not disjoint with the new tetrahedron\n", removed);
    printf("The check_list has size %zu\n", tri_list_count(&check_list));
    printf("Time took to removed triangles: %g seconds\n", omp_get_wtime()-time_removed);

    if (!triangulation_consistent(&result, &parameters)) {
      printf("After filtering the memory list we have a non consistent triangulation. Break\n");
      break;
    }
    //Do two iterations
    facets_conform_dynamic_remove(data, &result, 1, &check_list, &check_list_new, locks);

    if (!triangulation_consistent(&result, &parameters)) {
      printf("Triangulation not consistent anymore after conforming the data set.. Breaking\n");
      break;
    }

    /*mem_list_cube_compress(&data->mem_list);


      if (tmp_triang_file && tmp_data_file) {
      triangulation_to_file(&result, tmp_triang_file);
      data_list_to_file(data, tmp_data_file, MEM_LIST_SAVE_CLEAN);
      }
      */
  }
  for (size_t i = 0; i < cube.len; i++){
    for (size_t j = 0; j < cube.len - i; j++)
      omp_destroy_lock(&locks[i][j]);
    free(locks[i]);
  }

  free(locks);
  free(cube.points);
  free(parameters.acute_ind);
  free(tet_list);
  tri_list_free(&check_list);
  tri_list_free(&check_list_new);
  printf("Triangulation has length of %zu\n", result.tetra_len);
  return result;
}
Esempio n. 27
0
    inline OS23459783987()
    {
#ifdef COSMO_OMP
        omp_init_lock(&lock_);
#endif
    }
Esempio n. 28
0
static void
sort1 (int *array, int count)
{
  omp_lock_t lock;
  struct int_pair_stack global_stack;
  int busy = 1;
  int num_threads;

  omp_init_lock (&lock);
  init_int_pair_stack (&global_stack);
  #pragma omp parallel firstprivate (array, count)
  {
    int lo = 0, hi = 0, mid, next_lo, next_hi;
    bool idle = true;
    struct int_pair_stack local_stack;

    init_int_pair_stack (&local_stack);
    if (omp_get_thread_num () == 0)
      {
	num_threads = omp_get_num_threads ();
	hi = count - 1;
	idle = false;
      }

    for (;;)
      {
	if (hi - lo < THRESHOLD)
	  {
	    insertsort (array, lo, hi);
	    lo = hi;
	  }
	if (lo >= hi)
	  {
	    if (size_int_pair_stack (&local_stack) == 0)
	      {
	      again:
		omp_set_lock (&lock);
		if (size_int_pair_stack (&global_stack) == 0)
		  {
		    if (!idle)
		      busy--;
		    if (busy == 0)
		      {
			omp_unset_lock (&lock);
			break;
		      }
		    omp_unset_lock (&lock);
		    idle = true;
		    while (size_int_pair_stack (&global_stack) == 0
			   && busy)
		      busy_wait ();
		    goto again;
		  }
		if (idle)
		  busy++;
		pop_int_pair_stack (&global_stack, &lo, &hi);
		omp_unset_lock (&lock);
		idle = false;
	      }
	    else
	      pop_int_pair_stack (&local_stack, &lo, &hi);
	  }

	mid = partition (array, lo, hi);
	if (mid - lo < hi - mid)
	  {
	    next_lo = mid;
	    next_hi = hi;
	    hi = mid - 1;
	  }
	else
	  {
	    next_lo = lo;
	    next_hi = mid - 1;
	    lo = mid;
	  }

	if (next_hi - next_lo < THRESHOLD)
	  insertsort (array, next_lo, next_hi);
	else
	  {
	    if (size_int_pair_stack (&global_stack) < num_threads - 1)
	      {
		int size;

		omp_set_lock (&lock);
		size = size_int_pair_stack (&global_stack);
		if (size < num_threads - 1 && size < STACK_SIZE)
		  push_int_pair_stack (&global_stack, next_lo, next_hi);
		else
		  push_int_pair_stack (&local_stack, next_lo, next_hi);
		omp_unset_lock (&lock);
	      }
	    else
	      push_int_pair_stack (&local_stack, next_lo, next_hi);
	  }
      }
    }
  omp_destroy_lock (&lock);
}
Esempio n. 29
0
void Shape::splitshapes(vector<Shape*> &shapes, ViewProgress *progress)
{
  int n_tr = (int)triangles.size();
  if (progress) progress->start(_("Split Shapes"), n_tr);
  int progress_steps = max(1,(int)(n_tr/100));
  vector<bool> done(n_tr);
  bool cont = true;
  // make list of adjacent triangles for each triangle
  vector< vector<uint> > adj(n_tr);
  if (progress) progress->set_label(_("Split: Sorting Triangles ..."));
#ifdef _OPENMP
  omp_lock_t progress_lock;
  omp_init_lock(&progress_lock);
#pragma omp parallel for schedule(dynamic)
#endif
  for (int i = 0; i < n_tr; i++) {
    if (progress && i%progress_steps==0) {
#ifdef _OPENMP
      omp_set_lock(&progress_lock);
#endif
      cont = progress->update(i);
#ifdef _OPENMP
      omp_unset_lock(&progress_lock);
#endif
    }
    vector<uint> trv;
    for (int j = 0; j < n_tr; j++) {
      if (i!=j) {
	bool add = false;
	if (j<i) // maybe(!) we have it already
	  for (uint k = 0; k<adj[j].size(); k++) {
	    if ((int)adj[j][k] == i) {
	      add = true; break;
	    }
	  }
	add |= (triangles[i].isConnectedTo(triangles[j], 0.01));
	if (add) trv.push_back(j);
      }
    }
    adj[i] = trv;
    if (!cont) i=n_tr;
  }

  if (progress) progress->set_label(_("Split: Building shapes ..."));


  // triangle indices of shapes
  vector< vector<uint> > shape_tri;

  for (int i = 0; i < n_tr; i++) done[i] = false;
  for (int i = 0; i < n_tr; i++) {
    if (progress && i%progress_steps==0)
      cont = progress->update(i);
    if (!done[i]){
      cerr << _("Shape ") << shapes.size()+1 << endl;
      vector<uint> current;
      addtoshape(i, adj, current, done);
      Shape *shape = new Shape();
      shapes.push_back(shape);
      shapes.back()->triangles.resize(current.size());
      for (uint i = 0; i < current.size(); i++)
	shapes.back()->triangles[i] = triangles[current[i]];
      shapes.back()->CalcBBox();
    }
    if (!cont) i=n_tr;
  }

  if (progress) progress->stop("_(Done)");
}
double betweennessCentrality(graph* G, DOUBLE_T* BC, int filter) {

    VERT_T *S;         /* stack of vertices in the order of non-decreasing 
                          distance from s. Also used to implicitly 
                          represent the BFS queue */
    plist* P;          /* predecessors of a vertex v on shortest paths from s */
    DOUBLE_T* sig;     /* No. of shortest paths */
    LONG_T* d;         /* Length of the shortest path between every pair */
    DOUBLE_T* del;     /* dependency of vertices */
    LONG_T *in_degree, *numEdges, *pSums;
    LONG_T *pListMem;    
    LONG_T* Srcs; 
    LONG_T *start, *end;
    LONG_T MAX_NUM_PHASES;
    LONG_T *psCount;
#ifdef _OPENMP    
    omp_lock_t* vLock;
    LONG_T chunkSize;
#endif
    int seed = 2387;
    double elapsed_time;

#ifdef _OPENMP    
#pragma omp parallel
{
#endif

    VERT_T *myS, *myS_t;
    LONG_T myS_size;
    LONG_T i, j, k, p, count, myCount;
    LONG_T v, w, vert;
    LONG_T numV, num_traversals, n, m, phase_num;
    LONG_T tid, nthreads;
    int* stream;
#ifdef DIAGNOSTIC
    double elapsed_time_part;
#endif

#ifdef _OPENMP
    int myLock;
    tid = omp_get_thread_num();
    nthreads = omp_get_num_threads();
#else
    tid = 0;
    nthreads = 1;
#endif

#ifdef DIAGNOSTIC
    if (tid == 0) {
        elapsed_time_part = get_seconds();
    }
#endif

    /* numV: no. of vertices to run BFS from = 2^K4approx */
    numV = 1<<K4approx;
    n = G->n;
    m = G->m;

    /* Permute vertices */
    if (tid == 0) {
        Srcs = (LONG_T *) malloc(n*sizeof(LONG_T));
#ifdef _OPENMP
        vLock = (omp_lock_t *) malloc(n*sizeof(omp_lock_t));
#endif
    }

#ifdef _OPENMP   
#pragma omp barrier
#pragma omp for
    for (i=0; i<n; i++) {
        omp_init_lock(&vLock[i]);
    }
#endif

    /* Initialize RNG stream */ 
	stream = init_sprng(0, tid, nthreads, seed, SPRNG_DEFAULT);

#ifdef _OPENMP
#pragma omp for
#endif
    for (i=0; i<n; i++) {
        Srcs[i] = i;
    }

#ifdef _OPENMP
#pragma omp for
#endif    
    for (i=0; i<n; i++) {
        j = n*sprng(stream);
        if (i != j) {
#ifdef _OPENMP
            int l1 = omp_test_lock(&vLock[i]);
            if (l1) {
                int l2 = omp_test_lock(&vLock[j]);
                if (l2) {
#endif
                    k = Srcs[i];
                    Srcs[i] = Srcs[j];
                    Srcs[j] = k;
#ifdef _OPENMP
                    omp_unset_lock(&vLock[j]);
                }
                omp_unset_lock(&vLock[i]);
            }
#endif
        }
    }

#ifdef _OPENMP    
#pragma omp barrier
#endif

#ifdef DIAGNOSTIC
    if (tid == 0) {
        elapsed_time_part = get_seconds() -elapsed_time_part;
        fprintf(stderr, "Vertex ID permutation time: %lf seconds\n", elapsed_time_part);
        elapsed_time_part = get_seconds();
    }
#endif

    /* Start timing code from here */
    if (tid == 0) {
        elapsed_time = get_seconds();
#ifdef VERIFYK4
        MAX_NUM_PHASES = 2*sqrt(n);
#else
        MAX_NUM_PHASES = 50;
#endif
    }

#ifdef _OPENMP
#pragma omp barrier    
#endif

    /* Initialize predecessor lists */
    
    /* The size of the predecessor list of each vertex is bounded by 
       its in-degree. So we first compute the in-degree of every
       vertex */ 

    if (tid == 0) {
        P   = (plist  *) calloc(n, sizeof(plist));
        in_degree = (LONG_T *) calloc(n+1, sizeof(LONG_T));
        numEdges = (LONG_T *) malloc((n+1)*sizeof(LONG_T));
        pSums = (LONG_T *) malloc(nthreads*sizeof(LONG_T));
    }

#ifdef _OPENMP
#pragma omp barrier
#pragma omp for
#endif
    for (i=0; i<m; i++) {
        v = G->endV[i];
#ifdef _OPENMP
        omp_set_lock(&vLock[v]);
#endif
        in_degree[v]++;
#ifdef _OPENMP
        omp_unset_lock(&vLock[v]);
#endif
    }

    prefix_sums(in_degree, numEdges, pSums, n);
    
    if (tid == 0) {
        pListMem = (LONG_T *) malloc(m*sizeof(LONG_T));
    }

#ifdef _OPENMP
#pragma omp barrier
#pragma omp for
#endif
    for (i=0; i<n; i++) {
        P[i].list = pListMem + numEdges[i];
        P[i].degree = in_degree[i];
        P[i].count = 0;
    }

#ifdef DIAGNOSTIC
    if (tid == 0) {
        elapsed_time_part = get_seconds() - elapsed_time_part;
        fprintf(stderr, "In-degree computation time: %lf seconds\n", elapsed_time_part);
        elapsed_time_part = get_seconds();
    }
#endif

    /* Allocate shared memory */ 
    if (tid == 0) {
        free(in_degree);
        free(numEdges);
        free(pSums);
        
        S   = (VERT_T *) malloc(n*sizeof(VERT_T));
        sig = (DOUBLE_T *) malloc(n*sizeof(DOUBLE_T));
        d   = (LONG_T *) malloc(n*sizeof(LONG_T));
        del = (DOUBLE_T *) calloc(n, sizeof(DOUBLE_T));
        
        start = (LONG_T *) malloc(MAX_NUM_PHASES*sizeof(LONG_T));
        end = (LONG_T *) malloc(MAX_NUM_PHASES*sizeof(LONG_T));
        psCount = (LONG_T *) malloc((nthreads+1)*sizeof(LONG_T));
    }

    /* local memory for each thread */  
    myS_size = (2*n)/nthreads;
    myS = (LONG_T *) malloc(myS_size*sizeof(LONG_T));
    num_traversals = 0;
    myCount = 0;

#ifdef _OPENMP    
#pragma omp barrier
#endif

#ifdef _OPENMP    
#pragma omp for
#endif
    for (i=0; i<n; i++) {
        d[i] = -1;
    }
 
#ifdef DIAGNOSTIC
    if (tid == 0) {
        elapsed_time_part = get_seconds() -elapsed_time_part;
        fprintf(stderr, "BC initialization time: %lf seconds\n", elapsed_time_part);
        elapsed_time_part = get_seconds();
    }
#endif
   
    for (p=0; p<n; p++) {

        i = Srcs[p];
        //printf ("%d \n", i);
//         i = p;
        if (G->numEdges[i+1] - G->numEdges[i] == 0) {
            continue;
        } else {
            num_traversals++;
        }

        if (num_traversals == numV + 1) {
            break;
        }
        
        if (tid == 0) {
            sig[i] = 1;
            d[i] = 0;
            S[0] = i;
            start[0] = 0;
            end[0] = 1;
        }
        
        count = 1;
        phase_num = 0;

#ifdef _OPENMP       
#pragma omp barrier
#endif
        
        while (end[phase_num] - start[phase_num] > 0) {
            
            myCount = 0;
#ifdef _OPENMP
#pragma omp barrier
#pragma omp for schedule(dynamic)
#endif
            for (vert = start[phase_num]; vert < end[phase_num]; vert++) {
                v = S[vert];
                for (j=G->numEdges[v]; j<G->numEdges[v+1]; j++) {

                     if ((G->weight[j] & 7) == 0 && filter==1) continue; 

                        w = G->endV[j];
                        if (v != w) {

#ifdef _OPENMP                            
                            myLock = omp_test_lock(&vLock[w]);
                            if (myLock) { 
#endif             
                                /* w found for the first time? */ 
                                if (d[w] == -1) {
                                    if (myS_size == myCount) {
                                        /* Resize myS */
                                        myS_t = (LONG_T *)
                                            malloc(2*myS_size*sizeof(VERT_T));
                                        memcpy(myS_t, myS, myS_size*sizeof(VERT_T));
                                        free(myS);
                                        myS = myS_t;
                                        myS_size = 2*myS_size;
                                    }
                                    myS[myCount++] = w;
                                    d[w] = d[v] + 1;
                                    sig[w] = sig[v];
                                    P[w].list[P[w].count++] = v;
                                } else if (d[w] == d[v] + 1) {
                                    sig[w] += sig[v];
                                    P[w].list[P[w].count++] = v;
                                }
#ifdef _OPENMP  
                            
                            omp_unset_lock(&vLock[w]);
                            } else {
                                if ((d[w] == -1) || (d[w] == d[v]+ 1)) {
                                    omp_set_lock(&vLock[w]);
                                    sig[w] += sig[v];
                                    P[w].list[P[w].count++] = v;
                                    omp_unset_lock(&vLock[w]);
                                }
                            }
#endif
                            
                        }
                }
             }
            /* Merge all local stacks for next iteration */
            phase_num++; 

            psCount[tid+1] = myCount;

#ifdef _OPENMP
#pragma omp barrier
#endif


            if (tid == 0) {
                start[phase_num] = end[phase_num-1];
                psCount[0] = start[phase_num];
                for(k=1; k<=nthreads; k++) {
                    psCount[k] = psCount[k-1] + psCount[k];
                }
                end[phase_num] = psCount[nthreads];
            }
            
#ifdef _OPENMP           
#pragma omp barrier
#endif

            for (k = psCount[tid]; k < psCount[tid+1]; k++) {
                S[k] = myS[k-psCount[tid]];
            } 
            
#ifdef _OPENMP            
#pragma omp barrier
#endif
            count = end[phase_num];
        }
     
        phase_num--;

#ifdef _OPENMP        
#pragma omp barrier
#endif
            //printf ("%d\n", phase_num);

        while (phase_num > 0) {
#ifdef _OPENMP        
#pragma omp for
#endif
            for (j=start[phase_num]; j<end[phase_num]; j++) {
                w = S[j];
                for (k = 0; k<P[w].count; k++) {
                    v = P[w].list[k];
#ifdef _OPENMP
                    omp_set_lock(&vLock[v]);
#endif
                    del[v] = del[v] + sig[v]*(1+del[w])/sig[w];
#ifdef _OPENMP
                    omp_unset_lock(&vLock[v]);
#endif
                }
                BC[w] += del[w];
            }

            phase_num--;
            
#ifdef _OPENMP
#pragma omp barrier
#endif            
        }

        
#ifdef _OPENMP
        chunkSize = n/nthreads;
#pragma omp for schedule(static, chunkSize)
#endif
        for (j=0; j<count; j++) {
            w = S[j];
            //fprintf (stderr, "w: %d\n", w);
            d[w] = -1;
            del[w] = 0;
            P[w].count = 0;
        }


#ifdef _OPENMP
#pragma omp barrier
#endif

    }
 
#ifdef DIAGNOSTIC
    if (tid == 0) {
        elapsed_time_part = get_seconds() -elapsed_time_part;
        fprintf(stderr, "BC computation time: %lf seconds\n", elapsed_time_part);
    }
#endif

#ifdef _OPENMP
#pragma omp for
    for (i=0; i<n; i++) {
        omp_destroy_lock(&vLock[i]);
    }
#endif

    free(myS);
    
    if (tid == 0) { 
        free(S);
        free(pListMem);
        free(P);
        free(sig);
        free(d);
        free(del);
#ifdef _OPENMP
        free(vLock);
#endif
        free(start);
        free(end);
        free(psCount);
        elapsed_time = get_seconds() - elapsed_time;
        free(Srcs);
    }

    free_sprng(stream);
#ifdef _OPENMP
}    
#endif
    /* Verification */
#ifdef VERIFYK4
    double BCval;
    if (SCALE % 2 == 0) {
        BCval = 0.5*pow(2, 3*SCALE/2)-pow(2, SCALE)+1.0;
    } else {
        BCval = 0.75*pow(2, (3*SCALE-1)/2)-pow(2, SCALE)+1.0;
    }
    int failed = 0;
    for (int i=0; i<G->n; i++) {
        if (round(BC[i] - BCval) != 0) {
            failed = 1;
            break;
        }
    }
    if (failed) {
        fprintf(stderr, "Kernel 4 failed validation!\n");
    } else {
        fprintf(stderr, "Kernel 4 validation successful!\n");
    }
#endif

    for (int i = 0; i < G->n; i++) printf ("BC: %d %f\n",i, BC[i]);
    return elapsed_time;
}