char*
sequential_game_of_life (char* outboard, 
        char* inboard,
        const int nrows,
        const int ncols,
        const int gens_max)
{
    /* HINT: in the parallel decomposition, LDA may not be equal to
       nrows! */
    const int LDA = nrows;
    int curgen, i, j;
    printf("%d\n", 2);

    for (curgen = 0; curgen < gens_max; curgen++)
    {
        /* HINT: you'll be parallelizing these loop(s) by doing a
           geometric decomposition of the output */
        for (j = 0; j < ncols; j++)
        {
            for (i = 0; i < nrows; i++)
            {
                const int inorth = mod (i-1, nrows);
                const int isouth = mod (i+1, nrows);
                const int jwest = mod (j-1, ncols);
                const int jeast = mod (j+1, ncols);
    
  //   printf("jwest-%d, jeast-%d, inorth-%d, isouth-%d\n", jwest,jeast,inorth,isouth);
                const char neighbor_count = 
                    BOARD (inboard, inorth, jwest) + 
                    BOARD (inboard, inorth, j) + 
                    BOARD (inboard, inorth, jeast) + 
                    BOARD (inboard, i, jwest) +
                    BOARD (inboard, i, jeast) + 
                    BOARD (inboard, isouth, jwest) +
                    BOARD (inboard, isouth, j) + 
                    BOARD (inboard, isouth, jeast);
 // printf("%d\n",neighbor_count);
                BOARD(outboard, i, j) = alivep (neighbor_count, BOARD (inboard, i, j));

            }
        }
        SWAP_BOARDS( outboard, inboard );

    }
    printf("%d\n", 3);
    /* 
     * We return the output board, so that we know which one contains
     * the final result (because we've been swapping boards around).
     * Just be careful when you free() the two boards, so that you don't
     * free the same one twice!!! 
     */
    return inboard;
}
Пример #2
0
char*
sequential_game_of_life (char* outboard, 
        char* inboard,
        const int nrows,
        const int ncols,
        const int gens_max)
{
    /* HINT: in the parallel decomposition, LDA may not be equal to
       nrows! */
    // parallelization happens here
    pthread_t tid[NUM_THREADS];
    t_args_t args[NUM_THREADS*2];    
    int curgen, i;
    // set up static args for threads 8x1
    /*(for (i = 0; i < NUM_THREADS; i++) {
        args[i].nrows = nrows;
        args[i].ncols = ncols;
        args[i].rStart = (nrows/(NUM_THREADS*2)) * (i);
        args[i].rEnd = (nrows/(NUM_THREADS*2)) * (i+1);
        args[i].cStart = 0;
        args[i].cEnd = ncols;
    }
    for (i = 0; i < NUM_THREADS; i++) {
        args[NUM_THREADS+i].nrows = nrows;
        args[NUM_THREADS+i].ncols = ncols;
        args[NUM_THREADS+i].rStart = (nrows/(NUM_THREADS*2)) * (NUM_THREADS+i);
        args[NUM_THREADS+i].rEnd = (nrows/(NUM_THREADS*2)) * (NUM_THREADS+i+1);
        args[NUM_THREADS+i].cStart = 0;
        args[NUM_THREADS+i].cEnd = ncols;
    }*/
    // 4x2
    for (i = 0; i < NUM_THREADS; i++) {
        args[i].nrows = nrows;
        args[i].ncols = ncols;
        args[i].rStart = (nrows/(NUM_THREADS)) * (i%2);
        args[i].rEnd = (nrows/(NUM_THREADS)) * ((i%2)+1);
        args[i].cStart = (ncols/2) * (i/2);
        args[i].cEnd = (ncols/2) * ((i/2)+1);
    }
    for (i = 0; i < NUM_THREADS; i++) {
        args[NUM_THREADS+i].nrows = nrows;
        args[NUM_THREADS+i].ncols = ncols;
        args[NUM_THREADS+i].rStart = (nrows/(NUM_THREADS)) * (NUM_THREADS/2+(i%2));
        args[NUM_THREADS+i].rEnd = (nrows/(NUM_THREADS)) * (NUM_THREADS/2+(i%2)+1);
        args[NUM_THREADS+i].cStart = (ncols/2) * (i/2);
        args[NUM_THREADS+i].cEnd = (ncols/2) * ((i/2)+1);
    }

    for (curgen = 0; curgen < gens_max; curgen++)
    {
        /* HINT: you'll be parallelizing these loop(s) by doing a
           geometric decomposition of the output */
        for (i = 0; i < NUM_THREADS; i++) {
            args[i].outboard = outboard;
            args[i].inboard = inboard;
            pthread_create(&tid[i], NULL, thread, &args[i]);
        }
        for (i = 0; i < NUM_THREADS; i++) {
            pthread_join(tid[i], NULL);
        }
        for (i = 0; i < NUM_THREADS; i++) {
            args[NUM_THREADS+i].outboard = outboard;
            args[NUM_THREADS+i].inboard = inboard;
            pthread_create(&tid[i], NULL, thread, &args[NUM_THREADS+i]);
        }
        for (i = 0; i < NUM_THREADS; i++) {
            pthread_join(tid[i], NULL);
        }
        SWAP_BOARDS( outboard, inboard );
    }
    /* 
     * We return the output board, so that we know which one contains
     * the final result (because we've been swapping boards around).
     * Just be careful when you free() the two boards, so that you don't
     * free the same one twice!!! 
     */
    return inboard;
}
Пример #3
0
/**
* Parallelized implementation of the game of life
*/
void* loop_parellize(void* arg){

	structArgs *a;
	a = (structArgs*) arg;
	int nrows = a->nrows;
	char* outboard = a->outboard;
	char* inboard = a->inboard;
	int threadNum = a->threadNum;
	int ncols = a->ncols;
	int gens_max = a->gens_max;

	int initial_i = threadNum*(nrows/NUM_THREADS);
	int maximum_i = initial_i + (nrows/NUM_THREADS);
	int i,j;

	const int LDA = nrows;

	int vari = nrows/NUM_THREADS;
	int varj = ncols/2;

	int j2,i2,curgen;
	int jself, jnw, jn, jne, jw, je, jsw, js, jse;
	int iself, inw, in, ine, iw, ie, isw, is, ise;
	for (curgen = 0; curgen < gens_max; curgen++)
	{
		// Optimization: loop switching j and i loops
		for (j = 0; j < ncols; j+=varj)
		{
			for (i = initial_i; i < maximum_i; i+=vari)
			{
				// Optimization: Code Motion, Improved formula for inorth and isouth
				const int inorth = (i==0) ? nrows-1 : i-1;
				const int isouth = (i==nrows-1) ? 0 : i+1;

				// Optimization: Tiling
				for(j2=j;j2<j+varj;j2++){

					// Optimization: Improved formula for jwest and jeast
					const int jwest = (j2 == 0)? ncols-1: j2-1;
					const int jeast = (j2 == ncols-1)? 0 : j2+1;

					if(j2 == j){
						// Optimization: Loop iteration memory sharing
						inw = jnw = BOARD (inboard, inorth, jwest);
						in = jn = BOARD (inboard, inorth, j2);
						ine = jne = BOARD (inboard, inorth, jeast);
						iw = jw = BOARD (inboard, i, jwest);
						iself = jself = BOARD (inboard, i, j2);
						ie = je = BOARD (inboard, i, jeast);
						isw = jsw = BOARD (inboard, isouth, jwest);
						is = js = BOARD (inboard, isouth, j2);
						ise = jse = BOARD (inboard, isouth, jeast);
					}
					else{
						//Optimization: Loop iteration memory sharing
						inw = jnw = jn;
						in = jn = jne;
						ine = jne = BOARD (inboard, inorth, jeast);
						iw = jw = jself;
						iself = jself = je;
						ie = je = BOARD (inboard, i, jeast);
						isw = jsw = js;
						is = js = jse;
						ise = jse = BOARD (inboard, isouth, jeast);
					}
					for(i2=i; i2<i+vari;i2++){
						// printf("jwest-%d, jeast-%d, inorth-%d, isouth-%d, ThreadNum-%d\n", jwest,jeast,inorth,isouth,threadNum);
						if(i2>i){
							//Optimization: Loop iteration memory sharing
							const int isouth2 = (i2==nrows-1) ? 0 : i2+1;
							inw = iw;
							in = iself;
							ine = ie;
							iw = isw;
							iself = is;
							ie = ise;
							isw = BOARD (inboard, isouth2, jwest);
							is = BOARD (inboard, isouth2, j2);
							ise = BOARD (inboard, isouth2, jeast);
						}

						const char neighbor_count = inw + in + ine + iw + ie + isw + is + ise;

						//  printf("%d\n", neighbor_count);
						BOARD(outboard, i2, j2) = alivep (neighbor_count, iself);
					}
				}
			}
		}
		// Optimizaton: pthread barrier
		pthread_barrier_wait(a->barrp);
		SWAP_BOARDS( outboard, inboard );
	}
	pthread_exit(0);
}
Пример #4
0
char*
game_of_life (char* outboard, 
	      char* inboard,
	      const int nrows,
	      const int ncols,
	      const int gens_max)
{
    /* HINT: in the parallel decomposition, LDA may not be equal to
    nrows! */
    const int LDA = nrows;
    int curgen;

    for (curgen = 0; curgen < gens_max; curgen++)
    {
        /* HINT: you'll be parallelizing these loop(s) by doing a
           geometric decomposition of the output */
        /**
         * Pragma directive invoking Open MP parallelization for the two nester for loops
         * Ensured that i and j declarations happen within the scope of the open MP
         * parallelization so that each thread have their own dedicated i and j variables
         * **/
        #pragma omp parallel num_threads(NUM_THREADS)
        {
	        int i, j;   //Need these inside omp pragme so that they are not shared between threads
	        int thread_num = omp_get_thread_num();  //Gets the current threads num identifier

            //Split the outer for loop equally between all threads in NUM_THREADS
	        for (i = thread_num*nrows/NUM_THREADS ; i < (thread_num+1)*nrows/NUM_THREADS; i++)
            {
	        	const int inorth = mod (i-1, nrows);    //LCIM - mod only uses 'i' value
	        	const int isouth = mod (i+1, nrows);

                //Declare all eight neighbours and current cell.
                //Compute, north, north-east, current, east, south, and south-west
	        	char nw;
	        	char n  = BOARD (inboard, inorth, mod (-1, ncols));
	        	char ne = BOARD (inboard, inorth, 0);
	        	char w;
	        	char c  = BOARD (inboard, i, mod (-1, ncols));
	        	char e  = BOARD (inboard, i, 0);
	        	char sw;
	        	char s  = BOARD (inboard, isouth, mod (-1, ncols));
	        	char se = BOARD (inboard, isouth, 0);

                for (j = 0; j < ncols; j++)
                {

                    const int jwest = mod (j-1, ncols);
                    const int jeast = mod (j+1, ncols);

                    //Shift the neighbour values to the left
                    //This enables us to save computation in each stride of j
                    //Only need to compute three new values each stride:
                    //  north-east
                    //  east
                    //  south-east
                    nw = n;
                    n  = ne;
                    ne = BOARD (inboard, inorth, jeast);
                    w  = c;
                    c  = e;
                    e  = BOARD (inboard, i, jeast);
					sw = s;
					s  = se;
					se = BOARD (inboard, isouth, jeast);

                    const char neighbor_count = nw + n + ne + w + e + sw + s + se;

                    BOARD(outboard, i, j) = alivep (neighbor_count, c);

                }
            }
	    }
        SWAP_BOARDS( outboard, inboard );

    }
    /* 
     * We return the output board, so that we know which one contains
     * the final result (because we've been swapping boards around).
     * Just be careful when you free() the two boards, so that you don't
     * free the same one twice!!! 
     */
    return inboard;
}
Пример #5
0
void*
parallel_game_of_life (void* arg)
{
    /* HINT: in the parallel decomposition, LDA may not be equal to
       nrows! */

	int inorth;
	int isouth;
	int jwest;
	int jeast;

    thread_struct *thread = (thread_struct *)arg;

    char* outboard = thread->outboard;
	char* inboard = thread->inboard;
	const int nrows = thread->nrows;
	const int ncols = thread->ncols;
	const int gens_max = thread->gens_max;
	pthread_barrier_t *bar = thread->bar;

	const int LDA = nrows;

	/**
	 * dividing up the number of rows between the 4 threads
	 */
	int from = (thread->thread_num * nrows) / NUMBER_OF_THREADS;
	int to_row = ((thread->thread_num + 1) * nrows) / NUMBER_OF_THREADS;

	int curgen, i, j;

	/* HINT: you'll be parallelizing these loop(s) by doing a
	   geometric decomposition of the output */
	for (curgen = 0; curgen < gens_max; curgen++)
	{
		for (i = from; i < to_row; i++)
		{
			//Only use mod to calculate inorth and isouth if we're at the boundary
			if (i == 0 || i == nrows - 1) {
				inorth = mod (i-1, nrows);
				isouth = mod (i+1, nrows);
			} else {
				inorth = i-1;
				isouth = i+1;
			}
			for (j = 0; j < ncols; j++)
			{
				//Only use mod to calculate jwest and jeast if we're at the boundary
				if (j == 0 || j == ncols - 1) {
					jwest = mod (j-1, ncols);
					jeast = mod (j+1, ncols);
				} else {
					jwest = j-1;
					jeast = j+1;
				}

				const char neighbor_count =
					BOARD (inboard, inorth, jwest) +
					BOARD (inboard, inorth, j) +
					BOARD (inboard, inorth, jeast) +
					BOARD (inboard, i, jwest) +
					BOARD (inboard, i, jeast) +
					BOARD (inboard, isouth, jwest) +
					BOARD (inboard, isouth, j) +
					BOARD (inboard, isouth, jeast);

				BOARD(outboard, i, j) = alivep (neighbor_count, BOARD (inboard, i, j));

			}
		}

		pthread_barrier_wait(bar);
		SWAP_BOARDS( outboard, inboard );
	}

    /*
     * We return the output board, so that we know which one contains
     * the final result (because we've been swapping boards around).
     * Just be careful when you free() the two boards, so that you don't
     * free the same one twice!!!
     */
    return NULL;
}
Пример #6
0
    char*
sequential_game_of_life (char* outboard_, 
        char* inboard_,
        const int nrows_,
        const int ncols_,
        const int gens_max)
{
    /* HINT: in the parallel decomposition, LDA may not be equal to
       nrows! */
  nrows = nrows_;
  ncols = ncols_;
  outboard = outboard_;
  inboard = inboard_;
  LDA = nrows;
  slice =  (nrows / NUM_THREADS);
  mask = nrows - 1;
      pthread_t *thread = (pthread_t*)malloc(NUM_THREADS * sizeof(pthread_t));

  
  if (nrows_ <= 32 && ncols_ <= 32) {
    int curgen, i, j;

    for (curgen = 0; curgen < gens_max; curgen++)
    {
        for (i = 0; i < nrows; i++)
        {
            for (j = 0; j < ncols; j++)
            {
                const int inorth = mod (i-1, nrows);
                const int isouth = mod (i+1, nrows);
                const int jwest = mod (j-1, ncols);
                const int jeast = mod (j+1, ncols);

                const char neighbor_count = 
                    BOARD (inboard, inorth, jwest) +
		    BOARD (inboard, i, jwest) +
		    BOARD (inboard, isouth, jwest) +
                    BOARD (inboard, inorth, j) + 
                    BOARD (inboard, i, jeast) +
		    BOARD (inboard, isouth, jeast) +
		    BOARD (inboard, inorth, jeast) +
                    BOARD (inboard, isouth, j);

                BOARD(outboard, i, j) = alivep (neighbor_count, BOARD (inboard, i, j));

            }
        }
        SWAP_BOARDS( outboard, inboard );

    }
  } else {
    int curgen, i;

    for (curgen = 0; curgen < gens_max; curgen++)
      {
	for (i = 0; i < NUM_THREADS; i++)
	   pthread_create (&thread[i], NULL, parallel_run, (void*)i);
	
	
	for (i = 0; i < NUM_THREADS; i++) pthread_join (thread[i], NULL);

       
        SWAP_BOARDS( outboard, inboard );

      }
    }
    /* 
     * We return the output board, so that we know which one contains
     * the final result (because we've been swapping boards around).
     * Just be careful when you free() the two boards, so that you don't
     * free the same one twice!!! 
     */
    return inboard;
}