int main (void) { /* declaração dos arrays de teste - usar o pretendido para cada execução */ /* declaration of the test arrays - use each one for each execution */ //int Array[] = { 1, 3, 5, 7, 9, 11, 20, 25, 27, 29 }; //int Array[] = { 1, 3, 6, 9, 11, 13, 20, 25, 27, 29 }; //int Array[] = { 1, 3, 6, 10, 11, 13, 20, 25, 27, 29 }; //int Array[] = { 1, 3, 6, 10, 15, 17, 20, 25, 27, 29 }; //int Array[] = { 1, 3, 6, 10, 15, 21, 22, 25, 27, 29 }; //int Array[] = { 1, 3, 6, 10, 15, 21, 28, 30, 37, 39 }; int Array[] = { 1, 3, 6, 10, 15, 21, 28, 36, 39, 49 }; //int Array[] = { 1, 3, 6, 10, 15, 21, 28, 36, 45, 49 }; //int Array[] = { 1, 3, 6, 10, 15, 21, 28, 36, 45, 55 }; int NElem = sizeof (Array) / sizeof (int); int Result; /* invocação do algoritmo pretendido */ /* algorithm invocation */ Result = CountDifferences (Array, NElem); /* apresentação do resultado e do número de operações aritméticas executadas pelo algoritmo */ /* presenting the result and the number of arithmetic operations executed by the algorithm */ if (Result) fprintf (stdout, "Verifica "); else fprintf (stdout, "Nao verifica "); fprintf (stdout, "e executou %10d operacoes aritmeticas\n", Count); exit (EXIT_SUCCESS); }
/* * Main */ int main( int argc, char **argv ) { int *arr; // Array to operate on int *cpuResult; // Resulting CPU array int *origArray; // Original array to copy back, since multiple iterations are performed int length = 1 << 23; clock_t startTime, endTime; float cpuTime, gpuTimeNaive, gpuTimeOptimized; printf("Running Bitonic Sort for %d iterations\n", ITERATIONS); printf("Length = %d\n", length); arr = (int*) malloc( length * sizeof(int) ); cpuResult = (int*) malloc( length * sizeof(int) ); origArray = (int*) malloc( length * sizeof(int) ); // Fill array with random values //printf("Initial\n"); for( int i = 0; i < length; ++i ) { int randVal = rand() % length; arr[i] = randVal; origArray[i] = randVal; arr[i] = i % 2 ? -arr[i] : arr[i]; //printf("%d ", arr[i]); } // Perform the CPU sort startTime = clock(); for( int i = 0; i < ITERATIONS; ++i ) { memcpy( arr, origArray, length * sizeof(int) ); BitonicSortCPU( arr, 0, length, UP ); } endTime = clock(); cpuTime = (float)(endTime - startTime) * 1000 / (float)CLOCKS_PER_SEC / ITERATIONS; // Store CPU result for comparison against GPU memcpy( cpuResult, arr, length * sizeof(int) ); // Print sorted contents /*printf("\nSorted (CPU):\n"); for( int i = 0; i < length; i++ ) { printf("%d ", arr[i]); } printf("\n");*/ // Warm-up pass for GPU (Naive) memcpy( arr, origArray, length * sizeof(int) ); BitonicSortGPU_Naive( arr, 0, length, UP ); // Timed passes for GPU (Naive) startTime = clock(); for( int i = 0; i < ITERATIONS; ++i ) { memcpy( arr, origArray, length * sizeof(int) ); BitonicSortGPU_Naive( arr, 0, length, UP ); } endTime = clock(); gpuTimeNaive = (float)(endTime - startTime) * 1000 / (float)CLOCKS_PER_SEC / ITERATIONS; int diffNaive = CountDifferences( cpuResult, arr, length ); // Warm-up pass for GPU (Optimized) memcpy( arr, origArray, length * sizeof(int) ); BitonicSortGPU_Optimized( arr, 0, length, UP ); // Timed passes for GPU (Optimized) startTime = clock(); for( int i = 0; i < ITERATIONS; ++i ) { memcpy( arr, origArray, length * sizeof(int) ); BitonicSortGPU_Optimized( arr, 0, length, UP ); } endTime = clock(); gpuTimeOptimized = (float)(endTime - startTime) * 1000 / (float)CLOCKS_PER_SEC / ITERATIONS; int diffOptimized = CountDifferences( cpuResult, arr, length ); // Print sorted contents /*printf("\nSorted (GPU Optimized):\n"); for( int i = 0; i < length; i++ ) { printf("%d ", arr[i]); } printf("\n");*/ printf("\nHost computation took %1.3f ms\n", cpuTime); printf("\nDevice computation (naive) took %1.3f ms\n", gpuTimeNaive); printf("\nDevice computation (optimized) took %1.3f ms\n", gpuTimeOptimized); printf("\nNumber of different elements between CPU and GPU (naive): %d\n", diffNaive); printf("\nNumber of different elements between CPU and GPU (optimized): %d\n", diffOptimized); // Clean up free( origArray ); free( cpuResult ); free( arr ); return 0; }