Beispiel #1
0
int main (void)
{
    /* declaração dos arrays de teste - usar o pretendido para cada execução */
    /* declaration of the test arrays - use each one for each execution */

     //int Array[] = { 1, 3, 5, 7, 9, 11, 20, 25, 27, 29 };
     //int Array[] = { 1, 3, 6, 9, 11, 13, 20, 25, 27, 29 };
     //int Array[] = { 1, 3, 6, 10, 11, 13, 20, 25, 27, 29 };
     //int Array[] = { 1, 3, 6, 10, 15, 17, 20, 25, 27, 29 };
     //int Array[] = { 1, 3, 6, 10, 15, 21, 22, 25, 27, 29 };
     //int Array[] = { 1, 3, 6, 10, 15, 21, 28, 30, 37, 39 };
     int Array[] = { 1, 3, 6, 10, 15, 21, 28, 36, 39, 49 };
     //int Array[] = { 1, 3, 6, 10, 15, 21, 28, 36, 45, 49 };
     //int Array[] = { 1, 3, 6, 10, 15, 21, 28, 36, 45, 55 };

    int NElem = sizeof (Array) / sizeof (int); int Result;

    /* invocação do algoritmo pretendido */
    /* algorithm invocation */
    Result = CountDifferences (Array, NElem);

    /* apresentação do resultado e do número de operações aritméticas executadas pelo algoritmo */
    /* presenting the result and the number of arithmetic operations executed by the algorithm */
    if (Result) fprintf (stdout, "Verifica ");
    else fprintf (stdout, "Nao verifica ");

    fprintf (stdout, "e executou %10d operacoes aritmeticas\n", Count);

    exit (EXIT_SUCCESS);
}
/* 
 * Main
 */
int main( int argc, char **argv ) {

    int *arr; // Array to operate on
	int *cpuResult; // Resulting CPU array
	int *origArray; // Original array to copy back, since multiple iterations are performed
    int length = 1 << 23;

	clock_t startTime, endTime;
	float cpuTime, gpuTimeNaive, gpuTimeOptimized;

	printf("Running Bitonic Sort for %d iterations\n", ITERATIONS);
    printf("Length = %d\n", length);

    arr = (int*) malloc( length * sizeof(int) );
	cpuResult = (int*) malloc( length * sizeof(int) );
	origArray = (int*) malloc( length * sizeof(int) );

    // Fill array with random values
	//printf("Initial\n");
    for( int i = 0; i < length; ++i ) {
		int randVal = rand() % length;
        arr[i] = randVal;
		origArray[i] = randVal;
		arr[i] = i % 2 ? -arr[i] : arr[i];
		//printf("%d ", arr[i]);
    }

    // Perform the CPU sort
	startTime = clock();
	for( int i = 0; i < ITERATIONS; ++i ) {
		memcpy( arr, origArray, length * sizeof(int) );
		BitonicSortCPU( arr, 0, length, UP );
	}
	endTime = clock();

	cpuTime = (float)(endTime - startTime) * 1000 / (float)CLOCKS_PER_SEC / ITERATIONS;

	// Store CPU result for comparison against GPU
	memcpy( cpuResult, arr, length * sizeof(int) );

    // Print sorted contents
    /*printf("\nSorted (CPU):\n");
    for( int i = 0; i < length; i++ ) {
        printf("%d ", arr[i]);        
    }
    printf("\n");*/

	// Warm-up pass for GPU (Naive)
	memcpy( arr, origArray, length * sizeof(int) );
	BitonicSortGPU_Naive( arr, 0, length, UP );

	// Timed passes for GPU (Naive)
	startTime = clock();
	for( int i = 0; i < ITERATIONS; ++i ) {
		memcpy( arr, origArray, length * sizeof(int) );
		BitonicSortGPU_Naive( arr, 0, length, UP );
	}
	endTime = clock();

	gpuTimeNaive = (float)(endTime - startTime) * 1000 / (float)CLOCKS_PER_SEC / ITERATIONS;
	int diffNaive = CountDifferences( cpuResult, arr, length );

	// Warm-up pass for GPU (Optimized)
	memcpy( arr, origArray, length * sizeof(int) );
	BitonicSortGPU_Optimized( arr, 0, length, UP );

	// Timed passes for GPU (Optimized)
	startTime = clock();
	for( int i = 0; i < ITERATIONS; ++i ) {
		memcpy( arr, origArray, length * sizeof(int) );
		BitonicSortGPU_Optimized( arr, 0, length, UP );
	}
	endTime = clock();

	gpuTimeOptimized = (float)(endTime - startTime) * 1000 / (float)CLOCKS_PER_SEC / ITERATIONS;
	int diffOptimized = CountDifferences( cpuResult, arr, length );

	// Print sorted contents
    /*printf("\nSorted (GPU Optimized):\n");
    for( int i = 0; i < length; i++ ) {
        printf("%d ", arr[i]);        
    }
    printf("\n");*/

	printf("\nHost computation took %1.3f ms\n", cpuTime);
	printf("\nDevice computation (naive) took %1.3f ms\n", gpuTimeNaive);
	printf("\nDevice computation (optimized) took %1.3f ms\n", gpuTimeOptimized);

	printf("\nNumber of different elements between CPU and GPU (naive): %d\n", diffNaive);
	printf("\nNumber of different elements between CPU and GPU (optimized): %d\n", diffOptimized);

	// Clean up
	free( origArray );
	free( cpuResult );
	free( arr );

	return 0;

}