Exemplo n.º 1
0
int main(int argc, char **argv) {
    /////////////////////////////
    // Initialize MPI
    MPI_Init(&argc, &argv);
    int rank, nprocs;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
    if (argc < 2)
        die("ERROR: Must provide 2 arguments\n\tmpirun -n {num procs} EXEC"
                    " {number of cities} {distance file}\n");
    const int num_of_cities = atoi(argv[1]);
    char *file_location = argv[2];
    //
    /////////////////////////////

    /////////////////////////////
    // Array Init
    int **cityDistances = allocate_cells(num_of_cities, num_of_cities);
    initialize_city_distances(file_location, cityDistances, num_of_cities);
    //
    /////////////////////////////

    /////////////////////////////
    // Pick a coordination node / or just make it 0
    // TODO refactor this to be distributed
    time_t start_time = time(NULL);
    if (rank == 0) // Make the first processor the master
        master(cityDistances, num_of_cities, nprocs, 10);
    else // Otherwise their supporting roles
        slave(cityDistances, num_of_cities, rank, nprocs, 10);
    if (rank == 0) printf("Time to calc: %li\n", time(NULL) - start_time);
    /////////////////////////////

    MPI_Finalize(); // Close MPI
}
Exemplo n.º 2
0
/* ---------------------------------------------------------------------- */
static mxArray *
allocate_grid(struct processed_grid *grid, const char *func)
/* ---------------------------------------------------------------------- */
{
    size_t nflds, nhf;
    const char *fields[] = { "nodes", "faces", "cells",
                             "type", "cartDims", "griddim" };

    mxArray *G, *nodes, *faces, *cells;
    mxArray *type, *typestr, *cartDims, *griddim;

    nflds    = sizeof(fields) / sizeof(fields[0]);
    nhf      = count_halffaces(grid->number_of_faces, grid->face_neighbors);

    G        = mxCreateStructMatrix(1, 1, nflds, fields);

    nodes    = allocate_nodes(grid->number_of_nodes);
    faces    = allocate_faces(grid->number_of_faces,
                              grid->face_ptr[ grid->number_of_faces ]);
    cells    = allocate_cells(grid->number_of_cells, nhf);
    type     = mxCreateCellMatrix(1, 1);
    typestr  = mxCreateString(func);
    cartDims = mxCreateDoubleMatrix(1, 3, mxREAL);
    griddim  = mxCreateDoubleScalar(3);

    if ((G        != NULL) && (nodes   != NULL) && (faces   != NULL) &&
        (cells    != NULL) && (type    != NULL) && (typestr != NULL) &&
        (cartDims != NULL) && (griddim != NULL)) {
        mxSetCell(type, 0, typestr);

        mxSetField(G, 0, "nodes"   , nodes   );
        mxSetField(G, 0, "faces"   , faces   );
        mxSetField(G, 0, "cells"   , cells   );
        mxSetField(G, 0, "type"    , type    );
        mxSetField(G, 0, "cartDims", cartDims);
        mxSetField(G, 0, "griddim" , griddim );
    } else {
        if (griddim  != NULL) { mxDestroyArray(griddim);  }
        if (cartDims != NULL) { mxDestroyArray(cartDims); }
        if (typestr  != NULL) { mxDestroyArray(typestr);  }
        if (type     != NULL) { mxDestroyArray(type);     }
        if (cells    != NULL) { mxDestroyArray(cells);    }
        if (faces    != NULL) { mxDestroyArray(faces);    }
        if (nodes    != NULL) { mxDestroyArray(nodes);    }
        if (G        != NULL) { mxDestroyArray(G);        }

        G = NULL;
    }

    return G;
}
Exemplo n.º 3
0
/** Algorithm
 * Receive orders from Coordinator
 *
 * Calculate a the cost of the path / find best path given results
 *
 */
void slave(int **city_dist, const int num_of_cities, const int my_rank, const int nprocs, const int size_of_work) {
    MPI_Status stat;
    int local_lowest_cost = INT32_MAX;
    int stay_alive = 1;

    int *local_best_path = malloc((unsigned long) num_of_cities * sizeof(int));
    int **my_work = allocate_cells(num_of_cities, size_of_work);
    while (true) {
        MPI_Recv(&stay_alive, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &stat);
        if (stay_alive == -1) {
            //printf("Kill the %ith slave\n", my_rank);
            return;
        }

        // Receive work and bound from master
        MPI_Recv(my_work[0], num_of_cities * size_of_work, MPI_INT, 0, 0, MPI_COMM_WORLD, &stat);
        MPI_Recv(&local_lowest_cost, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &stat);

        for (int i = 0; i < size_of_work; i++) {
            int *best_dfs_path;
            best_dfs_path = dfs(my_work[i], num_of_cities, city_dist, 6, local_lowest_cost);

            int best_dfs_cost = calculate_full_tour_distance(best_dfs_path, city_dist, num_of_cities);

            if (local_lowest_cost > best_dfs_cost) {
                local_lowest_cost = best_dfs_cost;
                memcpy(local_best_path, best_dfs_path, (unsigned long) num_of_cities * sizeof(int));
                printf("Lowest cost: %i \t", local_lowest_cost);
                print_path(num_of_cities, local_best_path);
            }
            free(best_dfs_path);
        } // Finished all work
        // Send best path and distance to master
        MPI_Send(local_best_path, num_of_cities, MPI_INT, 0, 0, MPI_COMM_WORLD);
        MPI_Send(&local_lowest_cost, 1, MPI_INT, 0, 0, MPI_COMM_WORLD);
    }


}
Exemplo n.º 4
0
int main(int argc, char **argv) {
	// Record the start time of the program
	time_t start_time = time(NULL);
	
	// Extract the input parameters from the command line arguments
	// Number of columns in the grid (default = 1,000)
	int num_cols = (argc > 1) ? atoi(argv[1]) : 1000;
	// Number of rows in the grid (default = 1,000)
	int num_rows = (argc > 2) ? atoi(argv[2]) : 1000;
	// Number of iterations to simulate (default = 100)
	int iterations = (argc > 3) ? atoi(argv[3]) : 100;
	
	// Output the simulation parameters
	printf("Grid: %dx%d, Iterations: %d\n", num_cols, num_rows, iterations);
		
	// We allocate two arrays: one for the current time step and one for the next time step.
	// At the end of each iteration, we switch the arrays in order to avoid copying.
	// The arrays are allocated with an extra surrounding layer which contains
	//  the immutable boundary conditions (this simplifies the logic in the inner loop).
	float **cells[2];
	cells[0] = allocate_cells(num_cols + 2, num_rows + 2);
	cells[1] = allocate_cells(num_cols + 2, num_rows + 2);
	int cur_cells_index = 0, next_cells_index = 1;
	
	// Initialize the interior (non-boundary) cells to their initial value.
	// Note that we only need to initialize the array for the current time
	//  step, since we will write to the array for the next time step
	//  during the first iteration.
	initialize_cells(cells[0], num_cols, num_rows);
	
	// Set the immutable boundary conditions in both copies of the array
	int x, y, i;
	for (x = 1; x <= num_cols; x++) cells[0][0][x] = cells[1][0][x] = TOP_BOUNDARY_VALUE;
	for (x = 1; x <= num_cols; x++) cells[0][num_rows + 1][x] = cells[1][num_rows + 1][x] = BOTTOM_BOUNDARY_VALUE;
	for (y = 1; y <= num_rows; y++) cells[0][y][0] = cells[1][y][0] = LEFT_BOUNDARY_VALUE;
	for (y = 1; y <= num_rows; y++) cells[0][y][num_cols + 1] = cells[1][y][num_cols + 1] = RIGHT_BOUNDARY_VALUE;
	
	for (y = 0; y < num_rows + 2; y++) {
		for (x = 0; x < num_cols + 2; x++) {
		   printf("%.1f ",cells[0][y][x]);
		}
		printf("\n");
	}
/*	
	// Simulate the heat flow for the specified number of iterations
	for (i = 0; i < iterations; i++) {
		// Traverse the plate, computing the new value of each cell
		for (y = 1; y <= num_rows; y++) {
			for (x = 1; x <= num_cols; x++) {
				// The new value of this cell is the average of the old values of this cell's four neighbors
				cells[next_cells_index][y][x] = (cells[cur_cells_index][y][x - 1]  +
				                                 cells[cur_cells_index][y][x + 1]  +
				                                 cells[cur_cells_index][y - 1][x]  +
				                                 cells[cur_cells_index][y + 1][x]) * 0.25;
			}
		}
		
		// Swap the two arrays
		cur_cells_index = next_cells_index;
		next_cells_index = !cur_cells_index;


		cells[cur_cells_index][hotSpotRow][hotSptCol]=hotSpotTemp;
		
		// Print the current progress
		//printf("Iteration: %d / %d\n", i + 1, iterations);
	}
	
	// Output a snapshot of the final state of the plate
	int final_cells = (iterations % 2 == 0) ? 0 : 1;
	create_snapshot(cells[final_cells], num_cols, num_rows, iterations);

	// Compute and output the execution time
	time_t end_time = time(NULL);
	printf("\nExecution time: %d seconds\n", (int) difftime(end_time, start_time));
*/	
	return 0;
}
int main(int argc, char **argv) {
	// Record the start time of the program
	time_t start_time = time(NULL);
	// Extract the input parameters from the command line arguments
	// Number of columns in the grid (default = 1,000)
	num_cols = (argc > 1) ? atoi(argv[1]) : 1000;
	// Number of rows in the grid (default = 1,000)
	num_rows = (argc > 2) ? atoi(argv[2]) : 1000;
	// Number of iterations to simulate (default = 100)
	iterations = (argc > 3) ? atoi(argv[3]) : 100;
	// Number of threads
	thread_count= (argc > 4) ? atoi(argv[4]) : 2;	
	
	//Initialize barrier and barrier2
	//If any error, exit.
	
	if(pthread_barrier_init(&barrier, NULL, thread_count)){
        printf("Unable to init a barrier\n");
        return -1;
    }
	if(pthread_barrier_init(&barrier2, NULL, thread_count)){
        printf("Unable to init a barrier\n");
        return -1;
    }
	
	//Declare pthread attributes and ids.
	int thread_ids[thread_count];
	pthread_t threads[thread_count];
	pthread_attr_t attr; 
	
	//Declare cpu_set object to be used for CPU affinity setting
	cpu_set_t cores;
	
	//Initialize and set pthread attribute as joinable
	//Only threads created joinable can be joined, other the thread is detached.
	
	pthread_attr_init(&attr);
	pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
	// Output the simulation parameters
	printf("Grid: %dx%d, Iterations: %d\n", num_cols, num_rows, iterations);
		
	// We allocate two arrays: one for the current time step and one for the next time step.
	// At the end of each iteration, we switch the arrays in order to avoid copying.
	// The arrays are allocated with an extra surrounding layer which contains
	//  the immutable boundary conditions (this simplifies the logic in the inner loop).
	cells[0] = allocate_cells(num_cols + 2, num_rows + 2);
	cells[1] = allocate_cells(num_cols + 2, num_rows + 2);
	cur_cells_index = 0;
	next_cells_index = 1;
	
	// Initialize the interior (non-boundary) cells to their initial value.
	// Note that we only need to initialize the array for the current time
	//  step, since we will write to the array for the next time step
	//  during the first iteration.
	initialize_cells(cells[0], num_cols, num_rows);
	
	// Set the immutable boundary conditions in both copies of the array
	int x, y, i;
	for (x = 1; x <= num_cols; x++) cells[0][0][x] = cells[1][0][x] = TOP_BOUNDARY_VALUE;
	for (x = 1; x <= num_cols; x++) cells[0][num_rows + 1][x] = cells[1][num_rows + 1][x] = BOTTOM_BOUNDARY_VALUE;
	for (y = 1; y <= num_rows; y++) cells[0][y][0] = cells[1][y][0] = LEFT_BOUNDARY_VALUE;
	for (y = 1; y <= num_rows; y++) cells[0][y][num_cols + 1] = cells[1][y][num_cols + 1] = RIGHT_BOUNDARY_VALUE;
	
	for (i = 0; i < thread_count; i++) {
		thread_ids[i] = i;
		//For each new thread to create, first removes all cores from cores cpu_set_t object.
		CPU_ZERO(&cores);
		//Using thread id module the max core number, add corresponding core to the cpu set.
       	CPU_SET(i % MAXCORE, &cores);
		//Set the attr for this thread to reflect the core on which to bind it.
       	int status = pthread_attr_setaffinity_np(&attr,sizeof(cpu_set_t),&cores);
		if (status != 0) {
			printf("Could not set CPU affinity for thread %d\n",i);
			exit(EXIT_FAILURE);
		}
		//Create thread and bind to the core contained in cpu_set_t cores.
		status = pthread_create(&threads[i], &attr, PartialHeatPlate, (void*) &thread_ids[i]);
		if (status != 0) {
			printf("Could not create some pthreads\n");
			exit(EXIT_FAILURE);
		}
	}
	
	// wait for the threads to finish
	for (i = 0; i < thread_count; i++) {
		pthread_join(threads[i], NULL);
	}
	
	// Output a snapshot of the final state of the plate
	int final_cells = (iterations % 2 == 0) ? 0 : 1;
	create_snapshot(cells[final_cells], num_cols, num_rows, iterations);

	// Compute and output the execution time
	time_t end_time = time(NULL);
	printf("\nExecution time: %d seconds\n", (int) difftime(end_time, start_time));
	
  	pthread_attr_destroy(&attr);
	return 0;
}
Exemplo n.º 6
0
int main(int argc, char **argv) {
	// Record the start time of the program
	time_t start_time = time(NULL);
	pthread_t *threads;
	threads=(pthread_t *)malloc(THREAD_COUNT*sizeof(*threads));
	
	// Extract the input parameters from the command line arguments
	// Number of columns in the grid (default = 1,000)
	int num_cols = (argc > 1) ? atoi(argv[1]) : 1000;
	// Number of rows in the grid (default = 1,000)
	int num_rows = (argc > 2) ? atoi(argv[2]) : 1000;
	// Number of iterations to simulate (default = 100)
	int iterations = (argc > 3) ? atoi(argv[3]) : 100;
	int cur_cells_index = 0, next_cells_index = 1;
	
	// Output the simulation parameters
	printf("Grid: %dx%d, Iterations: %d\n", num_cols, num_rows, iterations);
		
	// We allocate two arrays: one for the current time step and one for the next time step.
	// At the end of each iteration, we switch the arrays in order to avoid copying.
	// The arrays are allocated with an extra surrounding layer which contains
	//  the immutable boundary conditions (this simplifies the logic in the inner loop).
	float **cells[2];
	int x, y, i, j;
	cells[0] = allocate_cells(num_cols + 2, num_rows + 2);
	cells[1] = allocate_cells(num_cols + 2, num_rows + 2);
	
	initialize_cells(cells[0], num_cols, num_rows);
	for (x = 1; x <= num_cols; x++) cells[0][0][x] = cells[1][0][x] = TOP_BOUNDARY_VALUE;
	for (x = 1; x <= num_cols; x++) cells[0][num_rows + 1][x] = cells[1][num_rows + 1][x] = BOTTOM_BOUNDARY_VALUE;
	for (y = 1; y <= num_rows; y++) cells[0][y][0] = cells[1][y][0] = LEFT_BOUNDARY_VALUE;
	for (y = 1; y <= num_rows; y++) cells[0][y][num_cols + 1] = cells[1][y][num_cols + 1] = RIGHT_BOUNDARY_VALUE;
	
	param p[THREAD_COUNT];
	for (i=0; i < THREAD_COUNT; i++){
		p[i].cells[0] = cells[0];
		p[i].cells[1] = cells[1];
		p[i].start_row = i * (num_rows/THREAD_COUNT) + 1;
		p[i].end_row = (i + 1) * (num_rows/THREAD_COUNT);
		p[i].num_rows = num_rows;
		p[i].num_cols = num_cols;
	}
	
	for (j = 0; j < iterations; j++) {
		printf("Iteration: %d / %d\n", j + 1, iterations);
		for (i=0; i < THREAD_COUNT; i++){
			printf("%d, %d\n", p[i].start_row, p[i].end_row);	
			p[i].cur_cells_index = cur_cells_index;
			p[i].next_cells_index = next_cells_index;
			pthread_create(&threads[i], NULL, iterate_plate_rows, (void *) &p[i]);
			printf("Creating thread %d\n", i);
		}
		for (i=0; i < THREAD_COUNT; i++){
			pthread_join(threads[i], (void *)NULL);
			printf("Waiting for the thread %d\n", i);
		}
		// Swap the two arrays
		printf("Swapping in iteration %d\n", j+1);
		cur_cells_index = next_cells_index;
		next_cells_index = !cur_cells_index;
	}
	
	// Output a snapshot of the final state of the plate
	int final_cells = (iterations % 2 == 0) ? 0 : 1;
	create_snapshot(cells[final_cells], num_cols, num_rows, iterations);

	// Compute and output the execution time
	time_t end_time = time(NULL);
	printf("\nExecution time: %d seconds\n", (int) difftime(end_time, start_time));
	
	return 0;
}
Exemplo n.º 7
0
int main(int argc, char **argv) {
	// Record the start time of the program
	time_t start_time = time(NULL);

	int iters = 0;
	int k = 0;
	int elapsedTime;
	int rank, p;
	// Initialize MPI
	MPI_Init(&argc, &argv);
	MPI_Barrier(MPI_COMM_WORLD);
	elapsedTime = -MPI_Wtime();
	// Get the rank of the curren process
	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
	
	// Get the total number of processes
	MPI_Comm_size(MPI_COMM_WORLD, &p);

	//Number of inner loop iterations.  Put loop around next[i,j]=(old[i-1,j...)*.25;
	int iters_per_cell = (argc > 1) ? atoi(argv[1]) : 1;
	//Number of iterations, same as before
	int iterations = (argc > 2) ? atoi(argv[2]) : 100;
	//How many ghost cell layers to send at a time & how many internal iterations to perform
	//per communication
	int boundary_thickness = (argc > 3) ? atoi(argv[3]) : 1;
	//Note: Ghost Cells are memory locations used to store redundant copies of 
	//data held by neighboring processes
	
	// Extract the input parameters from the command line arguments
	// Number of columns in the grid (default = 1,000)
	int num_cols = 160;
	// Number of rows in the grid (default = 1,000)
	int total_num_rows = 160;
	// Number of iterations to simulate (default = 100)
	//int iterations = 100;
	int num_rows = (total_num_rows / p);
	
	// Output the simulation parameters
	//printf("Grid: %dx%d, Iterations: %d\n", num_cols, num_rows, iterations);
		
	// We allocate two arrays: one for the current time step and one for the next time step.
	// At the end of each iteration, we switch the arrays in order to avoid copying.
	// The arrays are allocated with an extra surrounding layer which contains
	//  the immutable boundary conditions (this simplifies the logic in the inner loop).
	
	int added_boundary = 0;
	if (rank != 0) added_boundary+=boundary_thickness;
	if (rank != (p-1)) added_boundary+=boundary_thickness;
	num_rows += added_boundary;

	float **cells[2];
	cells[0] = allocate_cells(num_cols + 2, num_rows + 2);
	cells[1] = allocate_cells(num_cols + 2, num_rows + 2);
	int cur_cells_index = 0, next_cells_index = 1;
	
	// Initialize the interior (non-boundary) cells to their initial value.
	// Note that we only need to initialize the array for the current time
	//  step, since we will write to the array for the next time step
	//  during the first iteration.
	initialize_cells(cells[0], num_cols, num_rows);
	
	// Set the immutable boundary conditions in both copies of the array
	int x, y, i;
	if (rank == 0) for (x = 1; x <= num_cols; x++) cells[0][0][x] = cells[1][0][x] = TOP_BOUNDARY_VALUE;
	if (rank == (p-1)) for (x = 1; x <= num_cols; x++) cells[0][num_rows + 1][x] = cells[1][num_rows + 1][x] = BOTTOM_BOUNDARY_VALUE;
	for (y = 1; y <= num_rows; y++) cells[0][y][0] = cells[1][y][0] = LEFT_BOUNDARY_VALUE;
	for (y = 1; y <= num_rows; y++) cells[0][y][num_cols + 1] = cells[1][y][num_cols + 1] = RIGHT_BOUNDARY_VALUE;
	MPI_Status status;
	int t = 0;
	// Simulate the heat flow for the specified number of iterations
	for (i = 0; i < iterations; i++) {
		// Traverse the plate, computing the new value of each cell

		if (t >= boundary_thickness)
		{ 
			int count = (boundary_thickness * num_cols);
			//Message passing
			if (rank != (p-1))
			{
				//Pass lower real cells to rank+1's upper ghost cells
				//Recieve lower ghost cells from rank+1's upper real cells

				float lowerCells[boundary_thickness][num_cols];
				int r, c = 0;
				for (r = 0; r < boundary_thickness; r++){
					for (c = 0; c < num_cols; c++){
						lowerCells[r][c] = cells[cur_cells_index][num_rows-boundary_thickness-1+r][c+1];
					}
				}

				MPI_Send(&lowerCells, count, MPI_FLOAT, rank+1, 0, MPI_COMM_WORLD);
				MPI_Recv(&lowerCells, count, MPI_FLOAT, rank+1, 0, MPI_COMM_WORLD, &status);

				for (r = 0; r < boundary_thickness; r++){
					for (c = 0; c < num_cols; c++){
						cells[cur_cells_index][num_rows-boundary_thickness-1+r][c+1] = lowerCells[r][c];
					}
				}

			}
			if (rank != 0)
			{
				//Recieve upper ghost cells from rank-1's lower real cells
				//Pass upper real cells to rank-1's lower ghost cells
				float upperCells[boundary_thickness][num_cols];
				int r, c = 0;
				for (r = 0; r < boundary_thickness; r++){
					for (c = 0; c < num_cols; c++){
						upperCells[r][c] = cells[cur_cells_index][r+1][c+1];
					}
				}	

				MPI_Recv(&upperCells, count, MPI_FLOAT, rank-1, 0, MPI_COMM_WORLD, &status);
				MPI_Send(&upperCells, count, MPI_FLOAT, rank-1, 0, MPI_COMM_WORLD);	

				for (r = 0; r < boundary_thickness; r++){
					for (c = 0; c < num_cols; c++){
						cells[cur_cells_index][r+1][c+1] = upperCells[r][c];
					}
				}
			}
			t = 0;
		}
		for (y = 1; y <= num_rows; y++) {
			for (x = 1; x <= num_cols; x++) {
				int k = 0;
				for (k=0; k < iters_per_cell; k++){
				// The new value of this cell is the average of the old values of this cell's four neighbors
					cells[next_cells_index][y][x] = (cells[cur_cells_index][y][x - 1]  +
													 cells[cur_cells_index][y][x + 1]  +
													 cells[cur_cells_index][y - 1][x]  +
													 cells[cur_cells_index][y + 1][x]) * 0.25;
				}
			}
		}
		
		// Swap the two arrays
		cur_cells_index = next_cells_index;
		next_cells_index = !cur_cells_index;
		t++;
		
		// Print the current progress
		//printf("Iteration: %d / %d\n", i + 1, iterations);
	}
	
	// Output a snapshot of the final state of the plate
	int final_cells = (iterations % 2 == 0) ? 0 : 1;
	//create_snapshot(cells[final_cells], num_cols, num_rows, iterations);

	// Compute and output the execution time
	time_t end_time = time(NULL);
	printf("\nExecution time: %d seconds\n", (int) difftime(end_time, start_time));

	//End process prints out the full array
	if (rank == 0)
	{
		printf("\nRank 0 Gathering Cells\n");
		float **allCells;
		allCells = allocate_cells(num_cols + 2, total_num_rows + 2);
		int i,j = 0;
		for (i = 0; i < (total_num_rows + 2); i++)
		{
			if (i < (num_rows - added_boundary + 2))
			{
				for (j = 0; j < (num_cols + 2); j++)
				{
					allCells[i][j] = cells[final_cells][i][j];
				}
			}
			else
			{
				int target;
				target = (i / (num_rows - added_boundary));
				float **tempCells;
				int count = ((num_rows - boundary_thickness+2) * num_cols);
				printf("\nCount = %i", count);
				printf("\nRank 0 Wait On Rank %i\n", target);
				MPI_Recv(&tempCells, count, MPI_FLOAT, target, 0, MPI_COMM_WORLD, &status);		
				int k;
				for (k = 0; k < count; k++)
				{
					for (j = 0; j < num_cols; j++)
					{
						allCells[i][j] = tempCells[k][j];
					}
				}			
			}
		}
		create_snapshot(allCells, num_cols, total_num_rows, iterations);
	}
	else
	{
		float **returnCells;
		returnCells = allocate_cells(num_cols, (num_rows - added_boundary));
		int i,j = 0;
		
		for (i = 0; i < (num_rows - added_boundary); i++)
		{
			for (j = 0; j < num_cols; j++)
			{
				returnCells[i][j] = cells[final_cells][i+added_boundary][j];
			}
		}
		
		int count = ((num_rows - added_boundary) * num_cols);
		printf("\nCount = %i", count);
		printf("\nRank %i Send To Rank 0\n", rank);
		MPI_Send(&returnCells, count, MPI_FLOAT, 0, 0, MPI_COMM_WORLD);
	}

	MPI_Barrier(MPI_COMM_WORLD);
	elapsedTime+=MPI_Wtime();
	if (rank==0)
	{
		printf("\nElapsed time: %d seconds\n", (int) elapsedTime);
	}

	
	MPI_Finalize();
	
	return 0;
}