Beispiel #1
0
void *thread(void *arg) {
    int i, j;
    char* outboard = ((t_args_t *)arg)->outboard;
    char* inboard = ((t_args_t *)arg)->inboard;
    const int nrows = ((t_args_t *)arg)->nrows;
    const int ncols = ((t_args_t *)arg)->ncols;
    const int rStart = ((t_args_t *)arg)->rStart;
    const int rEnd = ((t_args_t *)arg)->rEnd;
    const int cStart = ((t_args_t *)arg)->cStart;
    const int cEnd = ((t_args_t *)arg)->cEnd;

    const int LDA = nrows;

    for (i = rStart; i < rEnd; i++) {
        for (j = cStart; j < cEnd; j++) {
            const int inorth = mod (i-1, nrows);
            const int isouth = mod (i+1, nrows);
            const int jwest = mod (j-1, ncols);
            const int jeast = mod (j+1, ncols);

            const char neighbor_count = 
                BOARD (inboard, inorth, jwest) + 
                BOARD (inboard, inorth, j) + 
                BOARD (inboard, inorth, jeast) + 
                BOARD (inboard, i, jwest) +
                BOARD (inboard, i, jeast) + 
                BOARD (inboard, isouth, jwest) +
                BOARD (inboard, isouth, j) + 
                BOARD (inboard, isouth, jeast);
            BOARD(outboard, i, j) = alivep (neighbor_count, BOARD (inboard, i, j));
        }
    }
    pthread_exit(NULL);
}
    char*
sequential_game_of_life (char* outboard, 
        char* inboard,
        const int nrows,
        const int ncols,
        const int gens_max)
{
    /* HINT: in the parallel decomposition, LDA may not be equal to
       nrows! */
    const int LDA = nrows;
    int curgen, i, j;
    printf("%d\n", 2);

    for (curgen = 0; curgen < gens_max; curgen++)
    {
        /* HINT: you'll be parallelizing these loop(s) by doing a
           geometric decomposition of the output */
        for (j = 0; j < ncols; j++)
        {
            for (i = 0; i < nrows; i++)
            {
                const int inorth = mod (i-1, nrows);
                const int isouth = mod (i+1, nrows);
                const int jwest = mod (j-1, ncols);
                const int jeast = mod (j+1, ncols);
    
  //   printf("jwest-%d, jeast-%d, inorth-%d, isouth-%d\n", jwest,jeast,inorth,isouth);
                const char neighbor_count = 
                    BOARD (inboard, inorth, jwest) + 
                    BOARD (inboard, inorth, j) + 
                    BOARD (inboard, inorth, jeast) + 
                    BOARD (inboard, i, jwest) +
                    BOARD (inboard, i, jeast) + 
                    BOARD (inboard, isouth, jwest) +
                    BOARD (inboard, isouth, j) + 
                    BOARD (inboard, isouth, jeast);
 // printf("%d\n",neighbor_count);
                BOARD(outboard, i, j) = alivep (neighbor_count, BOARD (inboard, i, j));

            }
        }
        SWAP_BOARDS( outboard, inboard );

    }
    printf("%d\n", 3);
    /* 
     * We return the output board, so that we know which one contains
     * the final result (because we've been swapping boards around).
     * Just be careful when you free() the two boards, so that you don't
     * free the same one twice!!! 
     */
    return inboard;
}
Beispiel #3
0
/*
 * This is used in the original code version. It changed the modulus function call
 * To if/else statements. Which were later removed in other versions
 */
static inline void update(int i, int j, char* outboard,
        char* inboard,
        const int nrows,
        const int ncols){
	const int LDA = nrows;

	int inorth;
	int isouth;
	int jwest;
	int jeast;

	if(i == 0)
		inorth = nrows - 1;
	else
		inorth = i-1;

	if(i == nrows - 1)
		isouth = 0;
	else
		isouth = i+1;

	if(j == 0)
		jwest = ncols - 1;
	else
		jwest = j-1;

	if(j == ncols - 1)
		jeast = 0;
	else
		jeast = j+1;


	const char neighbor_count =
		BOARD (inboard, inorth, jwest) +
		BOARD (inboard, inorth, j) +
		BOARD (inboard, inorth, jeast) +
		BOARD (inboard, i, jwest) +
		BOARD (inboard, i, jeast) +
		BOARD (inboard, isouth, jwest) +
		BOARD (inboard, isouth, j) +
		BOARD (inboard, isouth, jeast);

	BOARD(outboard, i, j) = alivep (neighbor_count, BOARD (inboard, i, j));
}
Beispiel #4
0
void*
parallel_run(void* args) {
  int n = (intptr_t) args;
  int rows_from = n * slice;
  int rows_to = rows_from + slice;
  int i, j, ii, jj, inorth, isouth, jwest, jeast;


  for (i = rows_from; i < rows_to; i ++) {
    for (j = 0; j < ncols; j ++) {

      //for (ii = i; ii < i + BLOCK_SIZE; ii++) {
	  inorth = (i-1) & mask;
	  isouth = (i+1) & mask;
	  //	  for (jj = j; jj < j + BLOCK_SIZE; jj++) {

	    jwest = (j-1) & mask;
	    jeast = (j+1) & mask;
	    
                const char neighbor_count =
                    BOARD (inboard, inorth, jwest) +
		    BOARD (inboard, i, jwest) +
		    BOARD (inboard, isouth, jwest) +
                    BOARD (inboard, inorth, j) + 
                    BOARD (inboard, i, jeast) +
		    BOARD (inboard, isouth, jeast) +
		    BOARD (inboard, inorth, jeast) +
                    BOARD (inboard, isouth, j);

                BOARD(outboard, i, j) = alivep (neighbor_count, BOARD (inboard, i, j));


		// }
		//	}
    }
  }
}
/**
* Parallelized implementation of the game of life
*/
void* loop_parellize(void* arg){

	structArgs *a;
	a = (structArgs*) arg;
	int nrows = a->nrows;
	char* outboard = a->outboard;
	char* inboard = a->inboard;
	int threadNum = a->threadNum;
	int ncols = a->ncols;
	int gens_max = a->gens_max;

	int initial_i = threadNum*(nrows/NUM_THREADS);
	int maximum_i = initial_i + (nrows/NUM_THREADS);
	int i,j;

	const int LDA = nrows;

	int vari = nrows/NUM_THREADS;
	int varj = ncols/2;

	int j2,i2,curgen;
	int jself, jnw, jn, jne, jw, je, jsw, js, jse;
	int iself, inw, in, ine, iw, ie, isw, is, ise;
	for (curgen = 0; curgen < gens_max; curgen++)
	{
		// Optimization: loop switching j and i loops
		for (j = 0; j < ncols; j+=varj)
		{
			for (i = initial_i; i < maximum_i; i+=vari)
			{
				// Optimization: Code Motion, Improved formula for inorth and isouth
				const int inorth = (i==0) ? nrows-1 : i-1;
				const int isouth = (i==nrows-1) ? 0 : i+1;

				// Optimization: Tiling
				for(j2=j;j2<j+varj;j2++){

					// Optimization: Improved formula for jwest and jeast
					const int jwest = (j2 == 0)? ncols-1: j2-1;
					const int jeast = (j2 == ncols-1)? 0 : j2+1;

					if(j2 == j){
						// Optimization: Loop iteration memory sharing
						inw = jnw = BOARD (inboard, inorth, jwest);
						in = jn = BOARD (inboard, inorth, j2);
						ine = jne = BOARD (inboard, inorth, jeast);
						iw = jw = BOARD (inboard, i, jwest);
						iself = jself = BOARD (inboard, i, j2);
						ie = je = BOARD (inboard, i, jeast);
						isw = jsw = BOARD (inboard, isouth, jwest);
						is = js = BOARD (inboard, isouth, j2);
						ise = jse = BOARD (inboard, isouth, jeast);
					}
					else{
						//Optimization: Loop iteration memory sharing
						inw = jnw = jn;
						in = jn = jne;
						ine = jne = BOARD (inboard, inorth, jeast);
						iw = jw = jself;
						iself = jself = je;
						ie = je = BOARD (inboard, i, jeast);
						isw = jsw = js;
						is = js = jse;
						ise = jse = BOARD (inboard, isouth, jeast);
					}
					for(i2=i; i2<i+vari;i2++){
						// printf("jwest-%d, jeast-%d, inorth-%d, isouth-%d, ThreadNum-%d\n", jwest,jeast,inorth,isouth,threadNum);
						if(i2>i){
							//Optimization: Loop iteration memory sharing
							const int isouth2 = (i2==nrows-1) ? 0 : i2+1;
							inw = iw;
							in = iself;
							ine = ie;
							iw = isw;
							iself = is;
							ie = ise;
							isw = BOARD (inboard, isouth2, jwest);
							is = BOARD (inboard, isouth2, j2);
							ise = BOARD (inboard, isouth2, jeast);
						}

						const char neighbor_count = inw + in + ine + iw + ie + isw + is + ise;

						//  printf("%d\n", neighbor_count);
						BOARD(outboard, i2, j2) = alivep (neighbor_count, iself);
					}
				}
			}
		}
		// Optimizaton: pthread barrier
		pthread_barrier_wait(a->barrp);
		SWAP_BOARDS( outboard, inboard );
	}
	pthread_exit(0);
}
Beispiel #6
0
    char*
sequential_game_of_life_parallel (char* outboard,
        char* inboard,
        const int nrows,
        const int ncols,
        const int gens_max,

        const int sector,
        int *status,
        pthread_mutex_t *mutex,
        pthread_cond_t *cv)
{
    /* HINT: in the parallel decomposition, LDA may not be equal to
       nrows! */

    int curgen, i, j;
    int row_start, col_start;
    int row_end, col_end;

    //Splitting what quadrant we work on.
    if(sector == 0 || sector == 2){
    	row_start = 1;
    	row_end = nrows/2;
    }

    else{
    	row_start = nrows/2;
    	row_end = nrows - 1;
    }

    if(sector == 0 || sector == 1){
    	col_start = 1;
    	col_end = ncols/2;
    }

    else{
    	col_start = ncols/2;
    	col_end = ncols - 1;
    }

    const int LDA = nrows;
    char mem_access[3];
    char cent;

    for (curgen = 0; curgen < gens_max; curgen++)
    {
		char neighbor_count;

		//The overlapping sections
		if (sector == 0){
			//j == 0
			//i == 0
			COUNT_AND_BOARD(inboard, outboard, neighbor_count, 0, 0, nrows - 1, 1, ncols - 1, 1);

			//j == 0
			//i == 1 -> i == nrows/2 - 1

            I_CODE_WITH_J(0, ncols - 1, 1);

            //j == 1 -> j == ncols/2 - 1
            //i == 0
            //J_CODE_WITH_I(0, nrows - 1, 1);
            for (j = col_start; j < col_end; j++)
            {
            	COUNT_AND_BOARD(inboard, outboard, neighbor_count, 0, j, nrows - 1, 1, j - 1, j + 1);
            }
		}
		else if(sector == 1){
			//j == 0
			//i == nrows - 1
			COUNT_AND_BOARD(inboard, outboard, neighbor_count, nrows - 1, 0, nrows - 2, 0, ncols - 1, 1);

			//j == 0
			//i == nrows/2 -> i == nrows - 2
			I_CODE_WITH_J(0, ncols - 1, 1);


            //j == 1 -> j == ncols/2 - 1
            //i == nrows - 1
			//J_CODE_WITH_I(nrows - 1, nrows - 2, 0);
            for (j = col_start; j < col_end; j++)
            {
            	COUNT_AND_BOARD(inboard, outboard, neighbor_count, nrows - 1, j, nrows - 2, 0, j - 1, j + 1);
            }
		}
		else if(sector == 2){
			//j == ncols - 1
			//i == 0
			COUNT_AND_BOARD(inboard, outboard, neighbor_count, 0, ncols - 1, nrows - 1, 1, ncols - 2, 0);

			//j == ncols - 1
			//i == 1 -> i == nrows/2 - 1
			I_CODE_WITH_J(ncols - 1, ncols - 2, 0);


            //j == ncols/2 -> j == ncols - 2
            //i == 0
			//J_CODE_WITH_I(0, nrows - 1, 1);
            for (j = col_start; j < col_end; j++)
            {
            	COUNT_AND_BOARD(inboard, outboard, neighbor_count, 0, j, nrows - 1, 1, j - 1, j + 1);
            }
		}
		else{
			//j == ncols - 1
			//i == nrows - 1
			COUNT_AND_BOARD(inboard, outboard, neighbor_count, nrows - 1, ncols - 1, nrows - 2, 0, ncols - 2, 0);

			//j == ncols - 1
			//i == nrows/2 -> i == nrows - 2
			I_CODE_WITH_J(ncols - 1, ncols - 2, 0);

            //j == ncols/2 -> j == ncols - 2
            //i == nrows - 1
			//J_CODE_WITH_I(nrows - 1, nrows - 2, 0);
            for (j = col_start; j < col_end; j++)
            {
            	COUNT_AND_BOARD(inboard, outboard, neighbor_count, nrows - 1, j, nrows - 2, 0, j - 1, j + 1);
            }
		}

		//Main code part, no if/else branching
		//Unroleld once in the i dimension, as well as a block on j of size 4.

		int jj;
    	for (jj = col_start; jj < col_end; jj+= J_BLOCK_SIZE)
        {
			for (j = jj; j < min(jj + J_BLOCK_SIZE, col_end); j++)
			{

				//Initializing sum
				mem_access[0] = 0;

				mem_access[1] = BOARD (inboard, row_start-1, j-1) +
								BOARD (inboard, row_start-1, j) +
								BOARD (inboard, row_start-1, j+1);


				mem_access[2] = BOARD (inboard, row_start, j-1) +
								BOARD (inboard, row_start, j) +
								BOARD (inboard, row_start, j+1);

				cent = 0;

				neighbor_count = mem_access[1] + mem_access[2];

				for(i = row_start; i < row_end - 1; i+=2)
				{	//1
					neighbor_count += cent;

					cent = BOARD (inboard, i, j);

					neighbor_count = neighbor_count - mem_access[0] - cent;

					mem_access[0] = mem_access[1];

					mem_access[1] = mem_access[2];

					mem_access[2] = BOARD (inboard, i+1, j-1) +
									BOARD (inboard, i+1, j) +
									BOARD (inboard, i+1, j+1);

					neighbor_count += mem_access[2];

					BOARD(outboard, i, j) = alivep (neighbor_count, cent);

					//2
					neighbor_count += cent;

					cent = BOARD (inboard, i+1, j);

					neighbor_count = neighbor_count - mem_access[0] - cent;

					mem_access[0] = mem_access[1];

					mem_access[1] = mem_access[2];

					mem_access[2] = BOARD (inboard, i+2, j-1) +
									BOARD (inboard, i+2, j) +
									BOARD (inboard, i+2, j+1);

					neighbor_count += mem_access[2];

					BOARD(outboard, i+1, j) = alivep (neighbor_count, cent);

					//COUNT_AND_BOARD_IJ(inboard, outboard, neighbor_count, i, j);

				}

				neighbor_count += cent;

				cent = BOARD (inboard, i, j);

				neighbor_count = neighbor_count - mem_access[0] - cent;

				mem_access[0] = mem_access[1];

				mem_access[1] = mem_access[2];

				mem_access[2] = BOARD (inboard, i+1, j-1) +
								BOARD (inboard, i+1, j) +
								BOARD (inboard, i+1, j+1);

				neighbor_count += mem_access[2];

				BOARD(outboard, i, j) = alivep (neighbor_count, cent);

			}
        }



        //SWAP_BOARDS( outboard, inboard );
        //I don't like that weird do while wrapper.
        char *temp = outboard;
        outboard = inboard;
        inboard = temp;

        pthread_mutex_lock(mutex);
        *status = *status | (1 << sector);
        if(*status == 0b1111){
        	*status = 0;
        	pthread_cond_broadcast(cv);
        }
        else{
        	pthread_cond_wait(cv, mutex);
        }

        //Everyone finished working on their sector, can start the next sector
        pthread_mutex_unlock(mutex);

    }



    /*
     * We return the output board, so that we know which one contains
     * the final result (because we've been swapping boards around).
     * Just be careful when you free() the two boards, so that you don't
     * free the same one twice!!!
     */
    return inboard;
}
Beispiel #7
0
char*
game_of_life (char* outboard, 
	      char* inboard,
	      const int nrows,
	      const int ncols,
	      const int gens_max)
{
    /* HINT: in the parallel decomposition, LDA may not be equal to
    nrows! */
    const int LDA = nrows;
    int curgen;

    for (curgen = 0; curgen < gens_max; curgen++)
    {
        /* HINT: you'll be parallelizing these loop(s) by doing a
           geometric decomposition of the output */
        /**
         * Pragma directive invoking Open MP parallelization for the two nester for loops
         * Ensured that i and j declarations happen within the scope of the open MP
         * parallelization so that each thread have their own dedicated i and j variables
         * **/
        #pragma omp parallel num_threads(NUM_THREADS)
        {
	        int i, j;   //Need these inside omp pragme so that they are not shared between threads
	        int thread_num = omp_get_thread_num();  //Gets the current threads num identifier

            //Split the outer for loop equally between all threads in NUM_THREADS
	        for (i = thread_num*nrows/NUM_THREADS ; i < (thread_num+1)*nrows/NUM_THREADS; i++)
            {
	        	const int inorth = mod (i-1, nrows);    //LCIM - mod only uses 'i' value
	        	const int isouth = mod (i+1, nrows);

                //Declare all eight neighbours and current cell.
                //Compute, north, north-east, current, east, south, and south-west
	        	char nw;
	        	char n  = BOARD (inboard, inorth, mod (-1, ncols));
	        	char ne = BOARD (inboard, inorth, 0);
	        	char w;
	        	char c  = BOARD (inboard, i, mod (-1, ncols));
	        	char e  = BOARD (inboard, i, 0);
	        	char sw;
	        	char s  = BOARD (inboard, isouth, mod (-1, ncols));
	        	char se = BOARD (inboard, isouth, 0);

                for (j = 0; j < ncols; j++)
                {

                    const int jwest = mod (j-1, ncols);
                    const int jeast = mod (j+1, ncols);

                    //Shift the neighbour values to the left
                    //This enables us to save computation in each stride of j
                    //Only need to compute three new values each stride:
                    //  north-east
                    //  east
                    //  south-east
                    nw = n;
                    n  = ne;
                    ne = BOARD (inboard, inorth, jeast);
                    w  = c;
                    c  = e;
                    e  = BOARD (inboard, i, jeast);
					sw = s;
					s  = se;
					se = BOARD (inboard, isouth, jeast);

                    const char neighbor_count = nw + n + ne + w + e + sw + s + se;

                    BOARD(outboard, i, j) = alivep (neighbor_count, c);

                }
            }
	    }
        SWAP_BOARDS( outboard, inboard );

    }
    /* 
     * We return the output board, so that we know which one contains
     * the final result (because we've been swapping boards around).
     * Just be careful when you free() the two boards, so that you don't
     * free the same one twice!!! 
     */
    return inboard;
}
Beispiel #8
0
void*
parallel_game_of_life (void* arg)
{
    /* HINT: in the parallel decomposition, LDA may not be equal to
       nrows! */

	int inorth;
	int isouth;
	int jwest;
	int jeast;

    thread_struct *thread = (thread_struct *)arg;

    char* outboard = thread->outboard;
	char* inboard = thread->inboard;
	const int nrows = thread->nrows;
	const int ncols = thread->ncols;
	const int gens_max = thread->gens_max;
	pthread_barrier_t *bar = thread->bar;

	const int LDA = nrows;

	/**
	 * dividing up the number of rows between the 4 threads
	 */
	int from = (thread->thread_num * nrows) / NUMBER_OF_THREADS;
	int to_row = ((thread->thread_num + 1) * nrows) / NUMBER_OF_THREADS;

	int curgen, i, j;

	/* HINT: you'll be parallelizing these loop(s) by doing a
	   geometric decomposition of the output */
	for (curgen = 0; curgen < gens_max; curgen++)
	{
		for (i = from; i < to_row; i++)
		{
			//Only use mod to calculate inorth and isouth if we're at the boundary
			if (i == 0 || i == nrows - 1) {
				inorth = mod (i-1, nrows);
				isouth = mod (i+1, nrows);
			} else {
				inorth = i-1;
				isouth = i+1;
			}
			for (j = 0; j < ncols; j++)
			{
				//Only use mod to calculate jwest and jeast if we're at the boundary
				if (j == 0 || j == ncols - 1) {
					jwest = mod (j-1, ncols);
					jeast = mod (j+1, ncols);
				} else {
					jwest = j-1;
					jeast = j+1;
				}

				const char neighbor_count =
					BOARD (inboard, inorth, jwest) +
					BOARD (inboard, inorth, j) +
					BOARD (inboard, inorth, jeast) +
					BOARD (inboard, i, jwest) +
					BOARD (inboard, i, jeast) +
					BOARD (inboard, isouth, jwest) +
					BOARD (inboard, isouth, j) +
					BOARD (inboard, isouth, jeast);

				BOARD(outboard, i, j) = alivep (neighbor_count, BOARD (inboard, i, j));

			}
		}

		pthread_barrier_wait(bar);
		SWAP_BOARDS( outboard, inboard );
	}

    /*
     * We return the output board, so that we know which one contains
     * the final result (because we've been swapping boards around).
     * Just be careful when you free() the two boards, so that you don't
     * free the same one twice!!!
     */
    return NULL;
}
Beispiel #9
0
    char*
sequential_game_of_life (char* outboard_, 
        char* inboard_,
        const int nrows_,
        const int ncols_,
        const int gens_max)
{
    /* HINT: in the parallel decomposition, LDA may not be equal to
       nrows! */
  nrows = nrows_;
  ncols = ncols_;
  outboard = outboard_;
  inboard = inboard_;
  LDA = nrows;
  slice =  (nrows / NUM_THREADS);
  mask = nrows - 1;
      pthread_t *thread = (pthread_t*)malloc(NUM_THREADS * sizeof(pthread_t));

  
  if (nrows_ <= 32 && ncols_ <= 32) {
    int curgen, i, j;

    for (curgen = 0; curgen < gens_max; curgen++)
    {
        for (i = 0; i < nrows; i++)
        {
            for (j = 0; j < ncols; j++)
            {
                const int inorth = mod (i-1, nrows);
                const int isouth = mod (i+1, nrows);
                const int jwest = mod (j-1, ncols);
                const int jeast = mod (j+1, ncols);

                const char neighbor_count = 
                    BOARD (inboard, inorth, jwest) +
		    BOARD (inboard, i, jwest) +
		    BOARD (inboard, isouth, jwest) +
                    BOARD (inboard, inorth, j) + 
                    BOARD (inboard, i, jeast) +
		    BOARD (inboard, isouth, jeast) +
		    BOARD (inboard, inorth, jeast) +
                    BOARD (inboard, isouth, j);

                BOARD(outboard, i, j) = alivep (neighbor_count, BOARD (inboard, i, j));

            }
        }
        SWAP_BOARDS( outboard, inboard );

    }
  } else {
    int curgen, i;

    for (curgen = 0; curgen < gens_max; curgen++)
      {
	for (i = 0; i < NUM_THREADS; i++)
	   pthread_create (&thread[i], NULL, parallel_run, (void*)i);
	
	
	for (i = 0; i < NUM_THREADS; i++) pthread_join (thread[i], NULL);

       
        SWAP_BOARDS( outboard, inboard );

      }
    }
    /* 
     * We return the output board, so that we know which one contains
     * the final result (because we've been swapping boards around).
     * Just be careful when you free() the two boards, so that you don't
     * free the same one twice!!! 
     */
    return inboard;
}