int main(int argc, char **argv) { ///////////////////////////// // Initialize MPI MPI_Init(&argc, &argv); int rank, nprocs; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); if (argc < 2) die("ERROR: Must provide 2 arguments\n\tmpirun -n {num procs} EXEC" " {number of cities} {distance file}\n"); const int num_of_cities = atoi(argv[1]); char *file_location = argv[2]; // ///////////////////////////// ///////////////////////////// // Array Init int **cityDistances = allocate_cells(num_of_cities, num_of_cities); initialize_city_distances(file_location, cityDistances, num_of_cities); // ///////////////////////////// ///////////////////////////// // Pick a coordination node / or just make it 0 // TODO refactor this to be distributed time_t start_time = time(NULL); if (rank == 0) // Make the first processor the master master(cityDistances, num_of_cities, nprocs, 10); else // Otherwise their supporting roles slave(cityDistances, num_of_cities, rank, nprocs, 10); if (rank == 0) printf("Time to calc: %li\n", time(NULL) - start_time); ///////////////////////////// MPI_Finalize(); // Close MPI }
/* ---------------------------------------------------------------------- */ static mxArray * allocate_grid(struct processed_grid *grid, const char *func) /* ---------------------------------------------------------------------- */ { size_t nflds, nhf; const char *fields[] = { "nodes", "faces", "cells", "type", "cartDims", "griddim" }; mxArray *G, *nodes, *faces, *cells; mxArray *type, *typestr, *cartDims, *griddim; nflds = sizeof(fields) / sizeof(fields[0]); nhf = count_halffaces(grid->number_of_faces, grid->face_neighbors); G = mxCreateStructMatrix(1, 1, nflds, fields); nodes = allocate_nodes(grid->number_of_nodes); faces = allocate_faces(grid->number_of_faces, grid->face_ptr[ grid->number_of_faces ]); cells = allocate_cells(grid->number_of_cells, nhf); type = mxCreateCellMatrix(1, 1); typestr = mxCreateString(func); cartDims = mxCreateDoubleMatrix(1, 3, mxREAL); griddim = mxCreateDoubleScalar(3); if ((G != NULL) && (nodes != NULL) && (faces != NULL) && (cells != NULL) && (type != NULL) && (typestr != NULL) && (cartDims != NULL) && (griddim != NULL)) { mxSetCell(type, 0, typestr); mxSetField(G, 0, "nodes" , nodes ); mxSetField(G, 0, "faces" , faces ); mxSetField(G, 0, "cells" , cells ); mxSetField(G, 0, "type" , type ); mxSetField(G, 0, "cartDims", cartDims); mxSetField(G, 0, "griddim" , griddim ); } else { if (griddim != NULL) { mxDestroyArray(griddim); } if (cartDims != NULL) { mxDestroyArray(cartDims); } if (typestr != NULL) { mxDestroyArray(typestr); } if (type != NULL) { mxDestroyArray(type); } if (cells != NULL) { mxDestroyArray(cells); } if (faces != NULL) { mxDestroyArray(faces); } if (nodes != NULL) { mxDestroyArray(nodes); } if (G != NULL) { mxDestroyArray(G); } G = NULL; } return G; }
/** Algorithm * Receive orders from Coordinator * * Calculate a the cost of the path / find best path given results * */ void slave(int **city_dist, const int num_of_cities, const int my_rank, const int nprocs, const int size_of_work) { MPI_Status stat; int local_lowest_cost = INT32_MAX; int stay_alive = 1; int *local_best_path = malloc((unsigned long) num_of_cities * sizeof(int)); int **my_work = allocate_cells(num_of_cities, size_of_work); while (true) { MPI_Recv(&stay_alive, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &stat); if (stay_alive == -1) { //printf("Kill the %ith slave\n", my_rank); return; } // Receive work and bound from master MPI_Recv(my_work[0], num_of_cities * size_of_work, MPI_INT, 0, 0, MPI_COMM_WORLD, &stat); MPI_Recv(&local_lowest_cost, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &stat); for (int i = 0; i < size_of_work; i++) { int *best_dfs_path; best_dfs_path = dfs(my_work[i], num_of_cities, city_dist, 6, local_lowest_cost); int best_dfs_cost = calculate_full_tour_distance(best_dfs_path, city_dist, num_of_cities); if (local_lowest_cost > best_dfs_cost) { local_lowest_cost = best_dfs_cost; memcpy(local_best_path, best_dfs_path, (unsigned long) num_of_cities * sizeof(int)); printf("Lowest cost: %i \t", local_lowest_cost); print_path(num_of_cities, local_best_path); } free(best_dfs_path); } // Finished all work // Send best path and distance to master MPI_Send(local_best_path, num_of_cities, MPI_INT, 0, 0, MPI_COMM_WORLD); MPI_Send(&local_lowest_cost, 1, MPI_INT, 0, 0, MPI_COMM_WORLD); } }
int main(int argc, char **argv) { // Record the start time of the program time_t start_time = time(NULL); // Extract the input parameters from the command line arguments // Number of columns in the grid (default = 1,000) int num_cols = (argc > 1) ? atoi(argv[1]) : 1000; // Number of rows in the grid (default = 1,000) int num_rows = (argc > 2) ? atoi(argv[2]) : 1000; // Number of iterations to simulate (default = 100) int iterations = (argc > 3) ? atoi(argv[3]) : 100; // Output the simulation parameters printf("Grid: %dx%d, Iterations: %d\n", num_cols, num_rows, iterations); // We allocate two arrays: one for the current time step and one for the next time step. // At the end of each iteration, we switch the arrays in order to avoid copying. // The arrays are allocated with an extra surrounding layer which contains // the immutable boundary conditions (this simplifies the logic in the inner loop). float **cells[2]; cells[0] = allocate_cells(num_cols + 2, num_rows + 2); cells[1] = allocate_cells(num_cols + 2, num_rows + 2); int cur_cells_index = 0, next_cells_index = 1; // Initialize the interior (non-boundary) cells to their initial value. // Note that we only need to initialize the array for the current time // step, since we will write to the array for the next time step // during the first iteration. initialize_cells(cells[0], num_cols, num_rows); // Set the immutable boundary conditions in both copies of the array int x, y, i; for (x = 1; x <= num_cols; x++) cells[0][0][x] = cells[1][0][x] = TOP_BOUNDARY_VALUE; for (x = 1; x <= num_cols; x++) cells[0][num_rows + 1][x] = cells[1][num_rows + 1][x] = BOTTOM_BOUNDARY_VALUE; for (y = 1; y <= num_rows; y++) cells[0][y][0] = cells[1][y][0] = LEFT_BOUNDARY_VALUE; for (y = 1; y <= num_rows; y++) cells[0][y][num_cols + 1] = cells[1][y][num_cols + 1] = RIGHT_BOUNDARY_VALUE; for (y = 0; y < num_rows + 2; y++) { for (x = 0; x < num_cols + 2; x++) { printf("%.1f ",cells[0][y][x]); } printf("\n"); } /* // Simulate the heat flow for the specified number of iterations for (i = 0; i < iterations; i++) { // Traverse the plate, computing the new value of each cell for (y = 1; y <= num_rows; y++) { for (x = 1; x <= num_cols; x++) { // The new value of this cell is the average of the old values of this cell's four neighbors cells[next_cells_index][y][x] = (cells[cur_cells_index][y][x - 1] + cells[cur_cells_index][y][x + 1] + cells[cur_cells_index][y - 1][x] + cells[cur_cells_index][y + 1][x]) * 0.25; } } // Swap the two arrays cur_cells_index = next_cells_index; next_cells_index = !cur_cells_index; cells[cur_cells_index][hotSpotRow][hotSptCol]=hotSpotTemp; // Print the current progress //printf("Iteration: %d / %d\n", i + 1, iterations); } // Output a snapshot of the final state of the plate int final_cells = (iterations % 2 == 0) ? 0 : 1; create_snapshot(cells[final_cells], num_cols, num_rows, iterations); // Compute and output the execution time time_t end_time = time(NULL); printf("\nExecution time: %d seconds\n", (int) difftime(end_time, start_time)); */ return 0; }
int main(int argc, char **argv) { // Record the start time of the program time_t start_time = time(NULL); // Extract the input parameters from the command line arguments // Number of columns in the grid (default = 1,000) num_cols = (argc > 1) ? atoi(argv[1]) : 1000; // Number of rows in the grid (default = 1,000) num_rows = (argc > 2) ? atoi(argv[2]) : 1000; // Number of iterations to simulate (default = 100) iterations = (argc > 3) ? atoi(argv[3]) : 100; // Number of threads thread_count= (argc > 4) ? atoi(argv[4]) : 2; //Initialize barrier and barrier2 //If any error, exit. if(pthread_barrier_init(&barrier, NULL, thread_count)){ printf("Unable to init a barrier\n"); return -1; } if(pthread_barrier_init(&barrier2, NULL, thread_count)){ printf("Unable to init a barrier\n"); return -1; } //Declare pthread attributes and ids. int thread_ids[thread_count]; pthread_t threads[thread_count]; pthread_attr_t attr; //Declare cpu_set object to be used for CPU affinity setting cpu_set_t cores; //Initialize and set pthread attribute as joinable //Only threads created joinable can be joined, other the thread is detached. pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); // Output the simulation parameters printf("Grid: %dx%d, Iterations: %d\n", num_cols, num_rows, iterations); // We allocate two arrays: one for the current time step and one for the next time step. // At the end of each iteration, we switch the arrays in order to avoid copying. // The arrays are allocated with an extra surrounding layer which contains // the immutable boundary conditions (this simplifies the logic in the inner loop). cells[0] = allocate_cells(num_cols + 2, num_rows + 2); cells[1] = allocate_cells(num_cols + 2, num_rows + 2); cur_cells_index = 0; next_cells_index = 1; // Initialize the interior (non-boundary) cells to their initial value. // Note that we only need to initialize the array for the current time // step, since we will write to the array for the next time step // during the first iteration. initialize_cells(cells[0], num_cols, num_rows); // Set the immutable boundary conditions in both copies of the array int x, y, i; for (x = 1; x <= num_cols; x++) cells[0][0][x] = cells[1][0][x] = TOP_BOUNDARY_VALUE; for (x = 1; x <= num_cols; x++) cells[0][num_rows + 1][x] = cells[1][num_rows + 1][x] = BOTTOM_BOUNDARY_VALUE; for (y = 1; y <= num_rows; y++) cells[0][y][0] = cells[1][y][0] = LEFT_BOUNDARY_VALUE; for (y = 1; y <= num_rows; y++) cells[0][y][num_cols + 1] = cells[1][y][num_cols + 1] = RIGHT_BOUNDARY_VALUE; for (i = 0; i < thread_count; i++) { thread_ids[i] = i; //For each new thread to create, first removes all cores from cores cpu_set_t object. CPU_ZERO(&cores); //Using thread id module the max core number, add corresponding core to the cpu set. CPU_SET(i % MAXCORE, &cores); //Set the attr for this thread to reflect the core on which to bind it. int status = pthread_attr_setaffinity_np(&attr,sizeof(cpu_set_t),&cores); if (status != 0) { printf("Could not set CPU affinity for thread %d\n",i); exit(EXIT_FAILURE); } //Create thread and bind to the core contained in cpu_set_t cores. status = pthread_create(&threads[i], &attr, PartialHeatPlate, (void*) &thread_ids[i]); if (status != 0) { printf("Could not create some pthreads\n"); exit(EXIT_FAILURE); } } // wait for the threads to finish for (i = 0; i < thread_count; i++) { pthread_join(threads[i], NULL); } // Output a snapshot of the final state of the plate int final_cells = (iterations % 2 == 0) ? 0 : 1; create_snapshot(cells[final_cells], num_cols, num_rows, iterations); // Compute and output the execution time time_t end_time = time(NULL); printf("\nExecution time: %d seconds\n", (int) difftime(end_time, start_time)); pthread_attr_destroy(&attr); return 0; }
int main(int argc, char **argv) { // Record the start time of the program time_t start_time = time(NULL); pthread_t *threads; threads=(pthread_t *)malloc(THREAD_COUNT*sizeof(*threads)); // Extract the input parameters from the command line arguments // Number of columns in the grid (default = 1,000) int num_cols = (argc > 1) ? atoi(argv[1]) : 1000; // Number of rows in the grid (default = 1,000) int num_rows = (argc > 2) ? atoi(argv[2]) : 1000; // Number of iterations to simulate (default = 100) int iterations = (argc > 3) ? atoi(argv[3]) : 100; int cur_cells_index = 0, next_cells_index = 1; // Output the simulation parameters printf("Grid: %dx%d, Iterations: %d\n", num_cols, num_rows, iterations); // We allocate two arrays: one for the current time step and one for the next time step. // At the end of each iteration, we switch the arrays in order to avoid copying. // The arrays are allocated with an extra surrounding layer which contains // the immutable boundary conditions (this simplifies the logic in the inner loop). float **cells[2]; int x, y, i, j; cells[0] = allocate_cells(num_cols + 2, num_rows + 2); cells[1] = allocate_cells(num_cols + 2, num_rows + 2); initialize_cells(cells[0], num_cols, num_rows); for (x = 1; x <= num_cols; x++) cells[0][0][x] = cells[1][0][x] = TOP_BOUNDARY_VALUE; for (x = 1; x <= num_cols; x++) cells[0][num_rows + 1][x] = cells[1][num_rows + 1][x] = BOTTOM_BOUNDARY_VALUE; for (y = 1; y <= num_rows; y++) cells[0][y][0] = cells[1][y][0] = LEFT_BOUNDARY_VALUE; for (y = 1; y <= num_rows; y++) cells[0][y][num_cols + 1] = cells[1][y][num_cols + 1] = RIGHT_BOUNDARY_VALUE; param p[THREAD_COUNT]; for (i=0; i < THREAD_COUNT; i++){ p[i].cells[0] = cells[0]; p[i].cells[1] = cells[1]; p[i].start_row = i * (num_rows/THREAD_COUNT) + 1; p[i].end_row = (i + 1) * (num_rows/THREAD_COUNT); p[i].num_rows = num_rows; p[i].num_cols = num_cols; } for (j = 0; j < iterations; j++) { printf("Iteration: %d / %d\n", j + 1, iterations); for (i=0; i < THREAD_COUNT; i++){ printf("%d, %d\n", p[i].start_row, p[i].end_row); p[i].cur_cells_index = cur_cells_index; p[i].next_cells_index = next_cells_index; pthread_create(&threads[i], NULL, iterate_plate_rows, (void *) &p[i]); printf("Creating thread %d\n", i); } for (i=0; i < THREAD_COUNT; i++){ pthread_join(threads[i], (void *)NULL); printf("Waiting for the thread %d\n", i); } // Swap the two arrays printf("Swapping in iteration %d\n", j+1); cur_cells_index = next_cells_index; next_cells_index = !cur_cells_index; } // Output a snapshot of the final state of the plate int final_cells = (iterations % 2 == 0) ? 0 : 1; create_snapshot(cells[final_cells], num_cols, num_rows, iterations); // Compute and output the execution time time_t end_time = time(NULL); printf("\nExecution time: %d seconds\n", (int) difftime(end_time, start_time)); return 0; }
int main(int argc, char **argv) { // Record the start time of the program time_t start_time = time(NULL); int iters = 0; int k = 0; int elapsedTime; int rank, p; // Initialize MPI MPI_Init(&argc, &argv); MPI_Barrier(MPI_COMM_WORLD); elapsedTime = -MPI_Wtime(); // Get the rank of the curren process MPI_Comm_rank(MPI_COMM_WORLD, &rank); // Get the total number of processes MPI_Comm_size(MPI_COMM_WORLD, &p); //Number of inner loop iterations. Put loop around next[i,j]=(old[i-1,j...)*.25; int iters_per_cell = (argc > 1) ? atoi(argv[1]) : 1; //Number of iterations, same as before int iterations = (argc > 2) ? atoi(argv[2]) : 100; //How many ghost cell layers to send at a time & how many internal iterations to perform //per communication int boundary_thickness = (argc > 3) ? atoi(argv[3]) : 1; //Note: Ghost Cells are memory locations used to store redundant copies of //data held by neighboring processes // Extract the input parameters from the command line arguments // Number of columns in the grid (default = 1,000) int num_cols = 160; // Number of rows in the grid (default = 1,000) int total_num_rows = 160; // Number of iterations to simulate (default = 100) //int iterations = 100; int num_rows = (total_num_rows / p); // Output the simulation parameters //printf("Grid: %dx%d, Iterations: %d\n", num_cols, num_rows, iterations); // We allocate two arrays: one for the current time step and one for the next time step. // At the end of each iteration, we switch the arrays in order to avoid copying. // The arrays are allocated with an extra surrounding layer which contains // the immutable boundary conditions (this simplifies the logic in the inner loop). int added_boundary = 0; if (rank != 0) added_boundary+=boundary_thickness; if (rank != (p-1)) added_boundary+=boundary_thickness; num_rows += added_boundary; float **cells[2]; cells[0] = allocate_cells(num_cols + 2, num_rows + 2); cells[1] = allocate_cells(num_cols + 2, num_rows + 2); int cur_cells_index = 0, next_cells_index = 1; // Initialize the interior (non-boundary) cells to their initial value. // Note that we only need to initialize the array for the current time // step, since we will write to the array for the next time step // during the first iteration. initialize_cells(cells[0], num_cols, num_rows); // Set the immutable boundary conditions in both copies of the array int x, y, i; if (rank == 0) for (x = 1; x <= num_cols; x++) cells[0][0][x] = cells[1][0][x] = TOP_BOUNDARY_VALUE; if (rank == (p-1)) for (x = 1; x <= num_cols; x++) cells[0][num_rows + 1][x] = cells[1][num_rows + 1][x] = BOTTOM_BOUNDARY_VALUE; for (y = 1; y <= num_rows; y++) cells[0][y][0] = cells[1][y][0] = LEFT_BOUNDARY_VALUE; for (y = 1; y <= num_rows; y++) cells[0][y][num_cols + 1] = cells[1][y][num_cols + 1] = RIGHT_BOUNDARY_VALUE; MPI_Status status; int t = 0; // Simulate the heat flow for the specified number of iterations for (i = 0; i < iterations; i++) { // Traverse the plate, computing the new value of each cell if (t >= boundary_thickness) { int count = (boundary_thickness * num_cols); //Message passing if (rank != (p-1)) { //Pass lower real cells to rank+1's upper ghost cells //Recieve lower ghost cells from rank+1's upper real cells float lowerCells[boundary_thickness][num_cols]; int r, c = 0; for (r = 0; r < boundary_thickness; r++){ for (c = 0; c < num_cols; c++){ lowerCells[r][c] = cells[cur_cells_index][num_rows-boundary_thickness-1+r][c+1]; } } MPI_Send(&lowerCells, count, MPI_FLOAT, rank+1, 0, MPI_COMM_WORLD); MPI_Recv(&lowerCells, count, MPI_FLOAT, rank+1, 0, MPI_COMM_WORLD, &status); for (r = 0; r < boundary_thickness; r++){ for (c = 0; c < num_cols; c++){ cells[cur_cells_index][num_rows-boundary_thickness-1+r][c+1] = lowerCells[r][c]; } } } if (rank != 0) { //Recieve upper ghost cells from rank-1's lower real cells //Pass upper real cells to rank-1's lower ghost cells float upperCells[boundary_thickness][num_cols]; int r, c = 0; for (r = 0; r < boundary_thickness; r++){ for (c = 0; c < num_cols; c++){ upperCells[r][c] = cells[cur_cells_index][r+1][c+1]; } } MPI_Recv(&upperCells, count, MPI_FLOAT, rank-1, 0, MPI_COMM_WORLD, &status); MPI_Send(&upperCells, count, MPI_FLOAT, rank-1, 0, MPI_COMM_WORLD); for (r = 0; r < boundary_thickness; r++){ for (c = 0; c < num_cols; c++){ cells[cur_cells_index][r+1][c+1] = upperCells[r][c]; } } } t = 0; } for (y = 1; y <= num_rows; y++) { for (x = 1; x <= num_cols; x++) { int k = 0; for (k=0; k < iters_per_cell; k++){ // The new value of this cell is the average of the old values of this cell's four neighbors cells[next_cells_index][y][x] = (cells[cur_cells_index][y][x - 1] + cells[cur_cells_index][y][x + 1] + cells[cur_cells_index][y - 1][x] + cells[cur_cells_index][y + 1][x]) * 0.25; } } } // Swap the two arrays cur_cells_index = next_cells_index; next_cells_index = !cur_cells_index; t++; // Print the current progress //printf("Iteration: %d / %d\n", i + 1, iterations); } // Output a snapshot of the final state of the plate int final_cells = (iterations % 2 == 0) ? 0 : 1; //create_snapshot(cells[final_cells], num_cols, num_rows, iterations); // Compute and output the execution time time_t end_time = time(NULL); printf("\nExecution time: %d seconds\n", (int) difftime(end_time, start_time)); //End process prints out the full array if (rank == 0) { printf("\nRank 0 Gathering Cells\n"); float **allCells; allCells = allocate_cells(num_cols + 2, total_num_rows + 2); int i,j = 0; for (i = 0; i < (total_num_rows + 2); i++) { if (i < (num_rows - added_boundary + 2)) { for (j = 0; j < (num_cols + 2); j++) { allCells[i][j] = cells[final_cells][i][j]; } } else { int target; target = (i / (num_rows - added_boundary)); float **tempCells; int count = ((num_rows - boundary_thickness+2) * num_cols); printf("\nCount = %i", count); printf("\nRank 0 Wait On Rank %i\n", target); MPI_Recv(&tempCells, count, MPI_FLOAT, target, 0, MPI_COMM_WORLD, &status); int k; for (k = 0; k < count; k++) { for (j = 0; j < num_cols; j++) { allCells[i][j] = tempCells[k][j]; } } } } create_snapshot(allCells, num_cols, total_num_rows, iterations); } else { float **returnCells; returnCells = allocate_cells(num_cols, (num_rows - added_boundary)); int i,j = 0; for (i = 0; i < (num_rows - added_boundary); i++) { for (j = 0; j < num_cols; j++) { returnCells[i][j] = cells[final_cells][i+added_boundary][j]; } } int count = ((num_rows - added_boundary) * num_cols); printf("\nCount = %i", count); printf("\nRank %i Send To Rank 0\n", rank); MPI_Send(&returnCells, count, MPI_FLOAT, 0, 0, MPI_COMM_WORLD); } MPI_Barrier(MPI_COMM_WORLD); elapsedTime+=MPI_Wtime(); if (rank==0) { printf("\nElapsed time: %d seconds\n", (int) elapsedTime); } MPI_Finalize(); return 0; }