void runloop(int loopid)  {

#pragma omp parallel default(none) shared(loopid) 
  {
    int myid  = omp_get_thread_num();
    int nthreads = omp_get_num_threads(); 
    int ipt = (int) ceil((double)N/(double)nthreads); 
    int lo = myid*ipt;
    int hi = (myid+1)*ipt;
    if (hi > N) hi = N; 
//	printf("thread %d has lo = %d and hi = %d \n", myid, lo, hi);
    int total_iters = hi-lo;
 	 int remaining_iters = hi-lo;
    int dist = ceil(remaining_iters/nthreads);
    int counter=0;
    while(remaining_iters>0) {
      dist = floor( remaining_iters / nthreads ) + 1;
      hi = lo + dist;  
// 	printf("thread : %d lo = %d hi = %d \n", myid, lo, hi);
      switch (loopid) { 
          case 1: loop1chunk(lo,hi); break;
          case 2: loop2chunk(lo,hi); break;
      } 
      counter += hi-lo;
      remaining_iters = total_iters - counter;
      lo = hi;

    }
//    printf("Final counter on thread %d =  %d \n", myid, counter);
  }
}
Exemplo n.º 2
0
void runloop(int loopid)  {

#pragma omp parallel default(none) shared(loopid, remaining_iters, hi, lo, remaining_iters_lock) 
  {
    int chunk, start_iter, end_iter, remaining_iters_tmp;
    int next_thread_id;
    int myid  = omp_get_thread_num();
    int nthreads = omp_get_num_threads(); 
    double K = (double) 1/nthreads;//k=1/p
    int ipt = (int) ceil((double)N/(double)nthreads); 
    lo[myid] = myid*ipt;
    hi[myid] = (myid+1)*ipt;
    if (hi[myid] > N) hi[myid] = N;

    remaining_iters_tmp = hi[myid]-lo[myid];
    remaining_iters[myid] = remaining_iters_tmp;

    while(remaining_iters_tmp > 0) { 
	get_chunks(myid, K, &start_iter, &chunk);
	/* Set DEBUG flag to TRUE if you want to see the flow details*/
	if(DEBUG==TRUE) print_run_details("Own", loopid, myid, myid, start_iter, chunk);
	switch(loopid){
		case 1: loop1chunk(start_iter, start_iter+chunk);
		case 2: loop2chunk(start_iter, start_iter+chunk);
	}
	remaining_iters_tmp = read_remaining_iters(myid);
    }//end while loop 1

    get_most_loaded_thread_details(nthreads, &next_thread_id, &remaining_iters_tmp);
    
    while(remaining_iters_tmp >0){
	get_chunks(next_thread_id, K, &start_iter, &chunk);
	
	/* Set DEBUG flag to TRUE if you want to see the flow details*/
	if(DEBUG==TRUE) print_run_details("Affinity", loopid, myid, next_thread_id, start_iter, chunk);
        switch(loopid){
                case 1: loop1chunk(start_iter, start_iter+chunk);
                case 2: loop2chunk(start_iter, start_iter+chunk);
        }
	get_most_loaded_thread_details(nthreads, &next_thread_id, &remaining_iters_tmp);
    }//end while loop 2
	
  }
}
Exemplo n.º 3
0
/**
 * The idea is to implement a general work-stealing algorithm using critical sections (alternatives are discussed in the report)/
 * However, rather than computing iterations owned by the current thread, we're going to steal from ourself. Once own own iterations have
 * been completed, we will start stealing from other threads. The preference will be given to threads with higher IDs due to the way the
 * work is distributed (this, again, is explained in the report).
 */
void runloop(int loopid)
{
	int thread_count = omp_get_max_threads();							// the number of threads in the system.
																		// we don't know how many exist yet, so use this. alternatively, we
																		// could have used getenv() from <stdlib.h> to get the env variable, but
																		// this seems cleaner. it should always work within our setup as well.
	
	int n_over_p = (int) ceil((double) N / (double) thread_count);		// what it says on the tin
	
	float one_over_p = 1.0 / thread_count;								// one over p
	
	int lower_bounds[thread_count];										// stores the lower bound of the array not already computed.
	
	int upper_bounds[thread_count];										// stores the upper bound of the array not already computed.
																		// upper_bounds[i] - lower_bounds[i] = remaining iterations

	#pragma omp parallel default(none)  \
						 shared(thread_count, loopid, lower_bounds, upper_bounds, n_over_p, one_over_p)
	{
		int thread_id	= omp_get_thread_num(),
			thread_low	= thread_id * n_over_p,
			thread_high = ((thread_id + 1) * n_over_p) > N ? N : (thread_id + 1) * n_over_p; // in case n mod p != 0

		lower_bounds[thread_id] = thread_low;
		upper_bounds[thread_id] = thread_high;

		// We need to ensure that the last iteration does not compute twice. Although this could be done with an if statement below the
		// switch, I feel that it should be achievable in a more succict method. Thus, in the first iteration we will perform no work
		// which allows findThreadToSteaFrom() to perform it's computation and update current_low and current_high. Hence, the second
		// iteration is the first one that will perform any work.
		int current_low	  = 0,
			current_high  = 0,
			stealing_from = 0;
		
		while(stealing_from != -1)
		{
			switch(loopid)
			{
				case 1: loop1chunk(current_low, current_high); break;
				case 2: loop2chunk(current_low, current_high); break;
			}

			// Find the next current_low and current_high. Notice the use of pointers to these values as replacements for C#/C++-style out params.
			// This would go nicely in the while loop condition, but unfortunately we need the #pragma block.
			#pragma omp critical
			{
				stealing_from = findThreadToStealFrom(lower_bounds, upper_bounds, thread_count, thread_id, one_over_p, &current_low, &current_high);
			}
		}
	}
}
void runloop(int loopid)  {

	int global_work_remaining[omp_get_max_threads()];
	omp_lock_t writelock;
	omp_init_lock(&writelock);
	#pragma omp parallel default(none) shared(global_work_remaining, writelock, loopid, waiting_time, loop_time,a, b, c) 
  {
		int i;
		int start_time, stop_time;

    int my_id  = omp_get_thread_num();
    int nthreads = omp_get_num_threads(); 
    int ipt = (int) ceil((double)N/(double)nthreads); 

		/* there should be as many chunks as there are threads
		 * and they should have roughly identical ranges		*/			
		int chunk_id = my_id;
    int chunk_lo = chunk_id*ipt;
   	int chunk_hi = (chunk_id+1)*ipt;
    if (chunk_hi > N) chunk_hi = N;
		int chunk_range = chunk_hi-chunk_lo;

		/* these are the variables that tell how much
		 * work a thread is doing in a chunk */
		int local_lo, local_hi, local_work;

		/* initialise the shared array*/
		global_work_remaining[my_id] = chunk_range;
		#pragma omp barrier

		/* continue to do work unless there is no work left to do */
		while(1)
		{
			start_time = omp_get_wtime();
			omp_set_lock(&writelock);
			if(global_work_remaining[chunk_id] == 0)
			{
				int old_id = chunk_id;
				for(i=0; i<nthreads; i++)
				{
					if(global_work_remaining[chunk_id] < global_work_remaining[i])
					{
						chunk_id = i;
					}
				}
				if(old_id == chunk_id)
				{
					omp_unset_lock(&writelock);
					break;
				}
				else
				{
			    chunk_hi = (chunk_id+1)*ipt;
  			  if (chunk_hi > N) chunk_hi = N;
					chunk_range = global_work_remaining[chunk_id];
				}
			}
			else
			{
				chunk_range = global_work_remaining[chunk_id];
			}
			local_work = floor((double)chunk_range/(double)nthreads);
			if(local_work < 1) local_work = 1;
			global_work_remaining[chunk_id] -= local_work;
			omp_unset_lock(&writelock);
			local_lo = chunk_hi - chunk_range;
			local_hi = local_lo +	local_work;
			waiting_time[my_id] += omp_get_wtime() - start_time;
			start_time = omp_get_wtime();
	    switch (loopid) { 
	       case 1: loop1chunk(local_lo,local_hi); break;
	       case 2: loop2chunk(local_lo,local_hi); break;
	    } 
			loop_time[my_id] += omp_get_wtime() -start_time;
		}
  }
}
Exemplo n.º 5
0
void runloop(int loopid)  {

 struct block* blocks; //Declaring the struct

 #pragma omp parallel default(none) shared(loopid, blocks)  //start of parallel region
  {
    int myid  = omp_get_thread_num();
    int nthreads = omp_get_num_threads(); 
    
    #pragma omp single 
    {
    	blocks=(struct block*)malloc(sizeof(struct block)*nthreads); //initialising the struct
    }
    
    int ipt = (int) ceil((double)N/(double)nthreads); 
    int lo = myid*ipt;
   
    
    int hi = (myid+1)*ipt;
    if (hi > N) hi = N; 
    int r = hi - lo;
    int num_iters= (int)ceil((double)r/(double)nthreads);
    int most_work;
    int loc_most_work;
    int max=0;
    
    #pragma omp critical  //members of the struct must be updated within critical regions to ensure synchronisation and avoid race condition
    {
		blocks[myid].high=hi;
		blocks[myid].remaining=r;
		printf("Thread %d has remaining %d and num iters is%d\n",myid, blocks[myid].remaining,num_iters);
	}
    
    //each thread does its own iterations in this while loop
    while(blocks[myid].remaining>0){ 
    	//critical region to update struct members
    	#pragma omp critical
    	{
			num_iters= (int)ceil((double)(blocks[myid].remaining)/(double)nthreads);
			lo=blocks[myid].high - blocks[myid].remaining;
			hi=lo + num_iters;
			blocks[myid].remaining = blocks[myid].remaining - num_iters;
			num_iters= (int)ceil((double)(blocks[myid].remaining)/(double)nthreads);
		
		}
		
        //printing working iterations
		printf("Thread %d iterating from %d to %d with %d remaining\n", myid, lo, hi, blocks[myid].remaining );
		//run through the loop
		if(blocks[myid].remaining>=0){
			switch (loopid) { 
				  case 1: loop1chunk(lo,hi); break;
				  case 2: loop2chunk(lo,hi); break;
			  }
		}
	
    }
    
    //do while loop for work stealing from most load thread by idle threads
    
    do {

		loc_most_work=-1;
		most_work=0;
		int remaining;
		
		//updating members and finding how much work the most loaded thread has, and which is most loaded
		//which also needs to be done inside a critical region
		
		#pragma omp critical
			{
			
				if(blocks[myid].remaining==0){
			
					int i;
					for(i=0;i<nthreads;i++){
						if (blocks[i].remaining>most_work){
							most_work = blocks[i].remaining;
							loc_most_work=i;
						}
					}
					if(loc_most_work>=0){
						if(most_work>=0){
				
					
							num_iters= (int)ceil((double)(blocks[loc_most_work].remaining)/(double)nthreads);
							lo=blocks[loc_most_work].high - blocks[loc_most_work].remaining;
							hi=lo + num_iters;
							if (hi > N) hi = N;
							blocks[loc_most_work].remaining -= num_iters;
						}
					}
				}
				
			}
			
			//ensuring synchronisation
			
			if(myid>=0){
				if(loc_most_work>=0){
					switch (loopid) { 
					  		case 1: loop1chunk(lo,hi); break;
					 		case 2: loop2chunk(lo,hi); break;
					}
					//printing the work steals
					printf("Thread %d stealing from thread %d iterating %d to %d with %d remaining\n",myid, loc_most_work, lo, hi, blocks[loc_most_work].remaining);
				}
			}
			
		}while(most_work>0); //iterations only done while other threads have work left to do
    

   
	}
 free(blocks); //freeing blocks so there are no memory leakages
}