int main(int argc, char *argv[])
{
    int i;
    char *opt;
    
    if(argc==1)
    {
        print_Usage();
    }
    
    for (i = 1; i < argc; i++)
    {
        opt = argv[i];
        
        if(strstr(opt, "-mangled"))
        {
            opt_mangled = 1;
        }
        else if(strstr(opt, "-demangled") || strstr(opt, "-unmangled"))
        {
            opt_demangled = 1;
        }
        else if(strstr(opt, "-h") || strstr(opt, "-v"))
        {
            print_Usage();
            return 0;
        }
        else if(strstr(opt, "-dumpversion"))
        {
            printf("%s\n", TOOL_VERSION);
            return 0;
        }
    }
    
    for (i = 1; i < argc; i++)
    {
        opt = argv[i];
        
        if(strstr(opt, "-mangled")
        || strstr(opt, "-demangled")
        || strstr(opt, "-unmangled")
        || strstr(opt, "-h")
        || strstr(opt, "-v")
        || strstr(opt, "-dumpversion")
        )
        {
            continue;
        }
        
        print_Info(opt);
    }
    
    return 0;
}
Esempio n. 2
0
int main(int argc, char** argv)
{
	if (argc <= 1) {
		print_Usage();
		exit(1);
	}
	//Init
	cudaError cudaStatus;
	cudaDeviceProp prop;
	int deviceID = atoi(argv[1]);
	int times = 4;
	if (argc == 3)
		times = atoi(argv[2]);

	cudaStatus = cudaGetDeviceProperties(&prop, deviceID);
	if (cudaStatus != cudaSuccess) {
		printf("Get device(%d) info failed!\n", deviceID);
		exit(1);
	}

	printf("Device: %s\n", prop.name);
	printf("Core Clock: %.2fMHz\tMemory Clock: %.2fMhz\n", prop.clockRate / 1000.0, prop.memoryClockRate / 1000.0);
	printf("Memory Bus Width: %d\n", prop.memoryBusWidth);
	printf("Mulitprocessor Count: %d\n", prop.multiProcessorCount);

	cudaStatus = cudaSetDevice(deviceID);
	if (cudaStatus != cudaSuccess) {
		printf("Set device(%d) failed!\n", deviceID);
	}
	//Start Benchmark
	printf("Searching for best thread count.\n");
	const int workCount = 524288;
	int threadCount = 16;
	int bestThreadCount = 16;
	float bestCycleTime = 1000000;
	while (threadCount < 1024) {
		int blockCount = workCount / threadCount;
		printf("Block Count: %d, Thread Count: %d", blockCount, threadCount);
		float cycleTime = GetCycleTime(blockCount, threadCount, times);
		printf(", CycleTime: %.2fms\n", cycleTime);
		if (cycleTime < bestCycleTime) {
			bestCycleTime = cycleTime;
			bestThreadCount = threadCount;
		}
		threadCount *= 2;
	}
	printf("\nBest thread count is: %d\n\n", bestThreadCount);
	printf("Searching for max performance block count");
	int blockCount = 512;
	int maxPerformanceBlockCount = 0;
	int smoothBlockCount = 0;
	bestCycleTime = 1000000;
	while (blockCount < 65536)
	{
		printf("Block Count: %d, Thread Count: %d", blockCount, bestThreadCount);
		float cycleTime = GetCycleTime(blockCount, bestThreadCount, times);
		float GPUTime = timeSlice * 1000 / CLOCKS_PER_SEC;
		printf(", CycleTime: %.2fms, GPU Time: %.2f\n", cycleTime, GPUTime);
		if (smoothBlockCount == 0 && GPUTime > 16) {
			smoothBlockCount = blockCount / 2;
		}
		if (cycleTime < bestCycleTime) {
			bestCycleTime = cycleTime;
			maxPerformanceBlockCount = blockCount;
		}
		blockCount *= 2;
	}
	printf("\nTesting device speed...\n");
	float finalCycleTime = GetCycleTime(maxPerformanceBlockCount, bestThreadCount, times * 16);
	printf("Average cycle time: %.2f, Speed: %.2f\n", finalCycleTime, 200000 / finalCycleTime);
	printf("Best parameter: -d %d -t %d -b %d -b2 %d\n", deviceID, bestThreadCount, smoothBlockCount, maxPerformanceBlockCount);
}