/* The main function */ int main(int argc, char **argv) { /* Generate the grids and populate them with the same set of random values. */ GRID_STRUCT *grid_1 = (GRID_STRUCT *)malloc(sizeof(GRID_STRUCT)); GRID_STRUCT *grid_2 = (GRID_STRUCT *)malloc(sizeof(GRID_STRUCT)); GRID_STRUCT *grid_3 = (GRID_STRUCT *)malloc(sizeof(GRID_STRUCT)); grid_1->dimension = GRID_DIMENSION; grid_1->num_elements = grid_1->dimension * grid_1->dimension; grid_2->dimension = GRID_DIMENSION; grid_2->num_elements = grid_2->dimension * grid_2->dimension; grid_3->dimension = GRID_DIMENSION; grid_3->num_elements = grid_3->dimension * grid_3->dimension; create_grids(grid_1, grid_2, grid_3); /* Compute the reference solution using the single-threaded version. */ printf("Using the single threaded version to solve the grid. \n"); int num_iter = compute_gold(grid_1); printf("Convergence achieved after %d iterations. \n", num_iter); /* Use pthreads to solve the equation uisng the red-black parallelization technique. */ printf("Using pthreads to solve the grid using the red-black parallelization method. \n"); num_iter = compute_using_pthreads_red_black(grid_2); printf("Convergence achieved after %d iterations. \n", num_iter); /* Use pthreads to solve the equation using the jacobi method in parallel. */ printf("Using pthreads to solve the grid using the jacobi method. \n"); num_iter = compute_using_pthreads_jacobi(grid_3); printf("Convergence achieved after %d iterations. \n", num_iter); /* Print key statistics for the converged values. */ printf("\n"); printf("Reference: \n"); print_statistics(grid_1); printf("Red-black: \n"); print_statistics(grid_2); printf("Jacobi: \n"); print_statistics(grid_3); /* Compute grid differences. */ compute_grid_differences(grid_1, grid_2, grid_3); /* Free up the grid data structures. */ free((void *)grid_1->element); free((void *)grid_1); free((void *)grid_2->element); free((void *)grid_2); free((void *)grid_3->element); free((void *)grid_3); return 0; }
int main(int argc, char *argv[]) { int n = NUM_TRAPEZOIDS; float a = LEFT_ENDPOINT; float b = RIGHT_ENDPOINT; pthread_t threadManager; pthread_t workerThread[NUM_THREADs]; threadArgs *tThread; /* single threaded timing start */ clock_t start = clock(), diff; double reference = compute_gold(a, b, n, func); printf("Reference solution computed on the CPU = %f \n", reference); diff = clock() - start; int msec = diff * 1000 / CLOCKS_PER_SEC; printf("Time taken %d milliseconds\n", msec%1000); /* single threaded timing end */ int i; tThread = (threadArgs *) malloc(sizeof(threadArgs)); tThread -> a = a; tThread -> b = b; tThread -> n = n; tThread -> h = (b-a)/(float)n; tThread -> f = func; clock_t start2 = clock(), diff2; for(i = 0; i < NUM_THREADs; i++){ if(pthread_create(&workerThread[i], NULL, compute_using_pthreads, (void *) tThread) != 0){ printf("Error within pthread_create.\n"); return -1; } } double *trapInt; double sum = 0; for(i = 0; i < NUM_THREADs; i++){ pthread_join(workerThread[i], &trapInt); sum += *trapInt; } sum = sum/NUM_THREADs; diff2 = clock() - start2; int msec2 = diff2 * 1000 / CLOCKS_PER_SEC; printf("Pthreads compution solution: %f\n", sum); printf("Time taken %d milliseconds\n",(msec2%1000)/NUM_THREADs); pthread_exit((void *) threadManager); }
//////////////////////////////////////////////////////////////////////////////// //! Generate the histogram on Single Threaded CPU and Then PTHREADS and Check for Correctness ///////////////////////////////////////////////////////////my_args///////////////////// void run_test(int num_elements, int num_threads) { float diff; int i; int *reference_histogram = (int *)malloc(sizeof(int) * HISTOGRAM_SIZE); // Space to store histogram generated by the CPU int *histogram_using_pthreads = (int *)malloc(sizeof(int) * HISTOGRAM_SIZE); // Space to store histogram generated by the GPU // Allocate memory for the input data int size = sizeof(int) * num_elements; int *input_data = (int *)malloc(size); // Randomly generate input data. Initialize the input data to be integer values between 0 and (HISTOGRAM_SIZE - 1) srand(time(NULL)); // add this for real randomness for(i = 0; i < num_elements; i++) input_data[i] = floorf((HISTOGRAM_SIZE - 1) * (rand()/(float)RAND_MAX)); printf("Creating the reference histogram. \n"); // Compute the reference solution on the CPU struct timeval start, stop; gettimeofday(&start, NULL); compute_gold(input_data, reference_histogram, num_elements, HISTOGRAM_SIZE); gettimeofday(&stop, NULL); printf("CPU run time = %0.10f s. \n", (float)(stop.tv_sec - start.tv_sec + (stop.tv_usec - start.tv_usec)/(float)1000000)); // check_histogram(reference_histogram, num_elements, HISTOGRAM_SIZE); // Compute the histogram using pthreads. The result histogram should be stored on the histogram_using_pthreads array printf("\n"); printf("Creating histogram using pthreads. \n"); struct timeval pstart, pstop; gettimeofday(&pstart, NULL); compute_using_pthreads(input_data, histogram_using_pthreads, num_elements, num_threads, HISTOGRAM_SIZE); gettimeofday(&pstop, NULL); printf("Pthreads run time = %0.10f s. \n", (float)(pstop.tv_sec - pstart.tv_sec + (pstop.tv_usec - pstart.tv_usec)/(float)1000000)); // check_histogram(histogram_using_pthreads, num_elements, HISTOGRAM_SIZE); // Compute the differences between the reference and pthread results diff = 0.0; for(i = 0; i < HISTOGRAM_SIZE; i++) diff = diff + abs(reference_histogram[i] - histogram_using_pthreads[i]); printf("Difference between the reference and pthread results: %f. \n", diff); // cleanup memory free(input_data); free(reference_histogram); free(histogram_using_pthreads); pthread_mutex_destroy(&mutex); pthread_exit(NULL); }
int main(void) { int n = NUM_TRAPEZOIDS; float a = LEFT_ENDPOINT; float b = RIGHT_ENDPOINT; float h = (b-a)/(float)n; // Height of each trapezoid printf("The height of the trapezoid is %f \n", h); double reference = compute_gold(a, b, n, h); printf("Reference solution computed on the CPU = %f \n", reference); /* Write this function to complete the trapezoidal on the GPU. */ double pthread_result = compute_using_pthreads(a, b, n, h); printf("Solution computed using pthreads = %f \n", pthread_result); }
void run_test(int num_elements) { float diff; int i; /* Allocate memory for the histrogram structures. */ int *reference_histogram = (int *)malloc(sizeof(int) * HISTOGRAM_SIZE); int *histogram_using_pthreads = (int *)malloc(sizeof(int) * HISTOGRAM_SIZE); /* Generate input data---integer values between 0 and (HISTOGRAM_SIZE - 1). */ int size = sizeof(int) * num_elements; int *input_data = (int *)malloc(size); for(i = 0; i < num_elements; i++) input_data[i] = floorf((HISTOGRAM_SIZE - 1) * (rand()/(float)RAND_MAX)); /* Compute the reference solution on the CPU. */ printf("Creating the reference histogram. \n"); struct timeval start, stop; gettimeofday(&start, NULL); compute_gold(input_data, reference_histogram, num_elements, HISTOGRAM_SIZE); gettimeofday(&stop, NULL); printf("CPU run time = %0.2f s. \n", (float)(stop.tv_sec - start.tv_sec + (stop.tv_usec - start.tv_usec)/(float)1000000)); check_histogram(reference_histogram, num_elements, HISTOGRAM_SIZE); /* Compute the histogram using pthreads. The result histogram should be stored in the * histogram_using_pthreads array. */ printf("Creating histogram using pthreads. \n"); compute_using_pthreads(input_data, histogram_using_pthreads, num_elements, HISTOGRAM_SIZE); /* check_histogram(histogram_using_pthreads, num_elements, HISTOGRAM_SIZE); */ /* Compute the differences between the reference and pthread results. */ diff = 0.0; for(i = 0; i < HISTOGRAM_SIZE; i++) diff = diff + abs(reference_histogram[i] - histogram_using_pthreads[i]); printf("Difference between the reference and pthread results: %f. \n", diff); /* cleanup memory. */ free(input_data); free(reference_histogram); free(histogram_using_pthreads); pthread_exit(NULL); }
int main(int argc, char **argv) { if(argc != 2){ printf("Usage: vector_dot_product <num elements> \n"); exit(1); } int num_elements = atoi(argv[1]); // Obtain the size of the vector /* Create the vectors A and B and fill them with random numbers between [-.5, .5]. */ float *vector_a = (float *)malloc(sizeof(float) * num_elements); float *vector_b = (float *)malloc(sizeof(float) * num_elements); srand(time(NULL)); // Seed the random number generator for(int i = 0; i < num_elements; i++){ vector_a[i] = ((float)rand()/(float)RAND_MAX) - 0.5; vector_b[i] = ((float)rand()/(float)RAND_MAX) - 0.5; } /* Compute the dot product using the reference, single-threaded solution. */ struct timeval start, stop; gettimeofday(&start, NULL); float reference = compute_gold(vector_a, vector_b, num_elements); gettimeofday(&stop, NULL); printf("Reference solution = %f. \n", reference); printf("Execution time = %fs. \n", (float)(stop.tv_sec - start.tv_sec + (stop.tv_usec - start.tv_usec)/(float)1000000)); printf("\n"); /* Compute the dot product using the multi-threaded version. */ gettimeofday(&start, NULL); float result = compute_using_pthreads(vector_a, vector_b, num_elements); gettimeofday(&stop, NULL); printf("Pthread solution = %f. \n", result); printf("Execution time = %fs. \n", (float)(stop.tv_sec - start.tv_sec + (stop.tv_usec - start.tv_usec)/(float)1000000)); printf("\n"); /* Free memory here. */ free((void *)vector_a); free((void *)vector_b); pthread_exit(NULL); }
//////////////////////////////////////////////////////////////////////////////// // Program main //////////////////////////////////////////////////////////////////////////////// int main(int argc, char** argv) { // Matrices for the program Matrix A; // The input matrix Matrix U_reference; // The upper triangular matrix computed by the reference code Matrix U_mt; // The upper triangular matric computed by the openmp code // Initialize the random number generator with a seed value srand(time(NULL)); // Check command line arguments if(argc > 1){ printf("Error. This program accepts no arguments. \n"); exit(0); } // Allocate and initialize the matrices A = allocate_matrix(MATRIX_SIZE, MATRIX_SIZE, 1); // Allocate and populate a random square matrix U_reference = allocate_matrix(MATRIX_SIZE, MATRIX_SIZE, 0); // Allocate space for the reference result U_mt = allocate_matrix(MATRIX_SIZE, MATRIX_SIZE, 0); // Allocate space for the multi-threaded result // Copy the contents of the A matrix into the U matrices for (int i = 0; i < A.num_rows; i ++){ for(int j = 0; j < A.num_rows; j++){ U_reference.elements[A.num_rows*i + j] = A.elements[A.num_rows*i + j]; U_mt.elements[A.num_rows*i + j] = A.elements[A.num_rows*i + j]; } } printf("Performing gaussian elimination using the reference code. \n"); struct timeval start, stop; gettimeofday(&start, NULL); int status = compute_gold(U_reference.elements, A.num_rows); gettimeofday(&stop, NULL); printf("CPU run time = %0.2f s. \n", (float)(stop.tv_sec - start.tv_sec + (stop.tv_usec - start.tv_usec)/(float)1000000)); if(status == 0){ printf("Failed to convert given matrix to upper triangular. Try again. Exiting. \n"); exit(0); } status = perform_simple_check(U_reference); // Check that the principal diagonal elements are 1 if(status == 0){ printf("The upper triangular matrix is incorrect. Exiting. \n"); exit(0); } printf("Single-threaded Gaussian elimination was successful. \n"); /* MODIFY THIS CODE: Perform the Gaussian elimination using the multi-threaded version. The resulting upper triangular matrix should be returned in U_mt */ gauss_eliminate_using_openmp(U_mt); // check if the pthread result is equivalent to the expected solution within the specified tolerance. Do not change this value int size = MATRIX_SIZE*MATRIX_SIZE; int res = check_results(U_reference.elements, U_mt.elements, size, 0.0001f); printf("Test %s\n", (1 == res) ? "PASSED" : "FAILED"); // Free host matrices free(A.elements); free(U_reference.elements); free(U_mt.elements); return 0; }