double BSCallWithParams(const MyMatrix& parametersMatrix) { if (parametersMatrix.columns() != 6 && parametersMatrix.rows() != 1 ) { throw("Input matrix should be 1 x 5");} double Spot = parametersMatrix(0,0); double Strike = parametersMatrix(0,1); double r = parametersMatrix(0,2); double d = parametersMatrix(0,3); double vol = parametersMatrix(0,4); double expiry = parametersMatrix(0,5); return BlackScholesCall(Spot, Strike,r,d,vol,expiry); }
double BSCallTwo::Price(double Vol) const { return BlackScholesCall(Spot,Strike,r,d,Vol,T); }
double BSCallSpot::operator ()(double Spot) const { return BlackScholesCall(Spot,Strike,r,d,Vol,T); }
double BSCall(double Spot) { return BlackScholesCall(Spot, 100,0.05,0.0,0.15,1); }
int main(int argc, char **argv) { char *precisionChoice; cutGetCmdLineArgumentstr(argc, (const char **)argv, "type", &precisionChoice); if(precisionChoice == NULL) useDoublePrecision = 0; else { if(!strcasecmp(precisionChoice, "double")) useDoublePrecision = 1; else useDoublePrecision = 0; } const int MAX_GPU_COUNT = 8; const int OPT_N = 256; const int PATH_N = 1 << 18; const unsigned int SEED = 777; //Input data array TOptionData optionData[OPT_N]; //Final GPU MC results TOptionValue callValueGPU[OPT_N]; //"Theoretical" call values by Black-Scholes formula float callValueBS[OPT_N]; //Solver config TOptionPlan optionSolver[MAX_GPU_COUNT]; //OS thread ID CUTThread threadID[MAX_GPU_COUNT]; //GPU number present in the system int GPU_N; int gpuBase, gpuIndex; int i; //Timer unsigned int hTimer; float time; double delta, ref, sumDelta, sumRef, sumReserve; cutilSafeCall( cudaGetDeviceCount(&GPU_N) ); cutilCheckError( cutCreateTimer(&hTimer) ); #ifdef _EMU GPU_N = 1; #endif printf("main(): generating input data...\n"); srand(123); for(i = 0; i < OPT_N; i++) { optionData[i].S = randFloat(5.0f, 50.0f); optionData[i].X = randFloat(10.0f, 25.0f); optionData[i].T = randFloat(1.0f, 5.0f); optionData[i].R = 0.06f; optionData[i].V = 0.10f; callValueGPU[i].Expected = -1.0f; callValueGPU[i].Confidence = -1.0f; } printf("main(): starting %i host threads...\n", GPU_N); //Get option count for each GPU for(i = 0; i < GPU_N; i++) optionSolver[i].optionCount = OPT_N / GPU_N; //Take into account cases with "odd" option counts for(i = 0; i < (OPT_N % GPU_N); i++) optionSolver[i].optionCount++; //Assign GPU option ranges gpuBase = 0; for(i = 0; i < GPU_N; i++) { optionSolver[i].device = i; optionSolver[i].optionData = optionData + gpuBase; optionSolver[i].callValue = callValueGPU + gpuBase; optionSolver[i].seed = SEED; optionSolver[i].pathN = PATH_N; gpuBase += optionSolver[i].optionCount; } //Start the timer cutilCheckError( cutResetTimer(hTimer) ); cutilCheckError( cutStartTimer(hTimer) ); //Start CPU thread for each GPU for(gpuIndex = 0; gpuIndex < GPU_N; gpuIndex++) threadID[gpuIndex] = cutStartThread((CUT_THREADROUTINE)solverThread, &optionSolver[gpuIndex]); //Stop the timer cutilCheckError( cutStopTimer(hTimer) ); time = cutGetTimerValue(hTimer); printf("main(): waiting for GPU results...\n"); cutWaitForThreads(threadID, GPU_N); printf("main(): GPU statistics\n"); for(i = 0; i < GPU_N; i++) { printf("GPU #%i\n", optionSolver[i].device); printf("Options : %i\n", optionSolver[i].optionCount); printf("Simulation paths: %i\n", optionSolver[i].pathN); } printf("\nTotal time (ms.): %f\n", time); printf("Options per sec.: %f\n", OPT_N / (time * 0.001)); #ifdef DO_CPU printf("main(): running CPU MonteCarlo...\n"); TOptionValue callValueCPU; sumDelta = 0; sumRef = 0; for(i = 0; i < OPT_N; i++) { MonteCarloCPU( callValueCPU, optionData[i], NULL, PATH_N ); delta = fabs(callValueCPU.Expected - callValueGPU[i].Expected); ref = callValueCPU.Expected; sumDelta += delta; sumRef += fabs(ref); printf("Exp : %f | %f\t", callValueCPU.Expected, callValueGPU[i].Expected); printf("Conf: %f | %f\n", callValueCPU.Confidence, callValueGPU[i].Confidence); } printf("L1 norm: %E\n", sumDelta / sumRef); #endif printf("main(): comparing Monte Carlo and Black-Scholes results...\n"); sumDelta = 0; sumRef = 0; sumReserve = 0; for(i = 0; i < OPT_N; i++) { BlackScholesCall( callValueBS[i], optionData[i] ); delta = fabs(callValueBS[i] - callValueGPU[i].Expected); ref = callValueBS[i]; sumDelta += delta; sumRef += fabs(ref); if(delta > 1e-6) sumReserve += callValueGPU[i].Confidence / delta; #ifdef PRINT_RESULTS printf("BS: %f; delta: %E\n", callValueBS[i], delta); #endif } sumReserve /= OPT_N; printf("L1 norm : %E\n", sumDelta / sumRef); printf("Average reserve: %f\n", sumReserve); printf((sumReserve > 1.0f) ? "PASSED\n" : "FAILED.\n"); printf("Shutting down...\n"); cutilCheckError( cutDeleteTimer(hTimer) ); cutilExit(argc, argv); }
int main(int argc, char **argv) { char *multiMethodChoice = NULL; char *scalingChoice = NULL; bool use_threads = true; bool bqatest = false; bool strongScaling = false; pArgc = &argc; pArgv = argv; printf("%s Starting...\n\n", argv[0]); if (checkCmdLineFlag(argc, (const char **)argv, "qatest")) { bqatest = true; } getCmdLineArgumentString(argc, (const char **)argv, "method", &multiMethodChoice); getCmdLineArgumentString(argc, (const char **)argv, "scaling", &scalingChoice); if (checkCmdLineFlag(argc, (const char **)argv, "h") || checkCmdLineFlag(argc, (const char **)argv, "help")) { usage(); exit(EXIT_SUCCESS); } if (multiMethodChoice == NULL) { use_threads = true; } else { if (!strcasecmp(multiMethodChoice, "threaded")) { use_threads = true; } else { use_threads = false; } } if (use_threads == false) { printf("Using single CPU thread for multiple GPUs\n"); } if (scalingChoice == NULL) { strongScaling = false; } else { if (!strcasecmp(scalingChoice, "strong")) { strongScaling = true; } else { strongScaling = false; } } //GPU number present in the system int GPU_N; checkCudaErrors(cudaGetDeviceCount(&GPU_N)); int nOptions = 256; nOptions = adjustProblemSize(GPU_N, nOptions); // select problem size int scale = (strongScaling) ? 1 : GPU_N; int OPT_N = nOptions * scale; int PATH_N = 262144; const unsigned long long SEED = 777; // initialize the timers hTimer = new StopWatchInterface*[GPU_N]; for (int i=0; i<GPU_N; i++) { sdkCreateTimer(&hTimer[i]); sdkResetTimer(&hTimer[i]); } //Input data array TOptionData *optionData = new TOptionData[OPT_N]; //Final GPU MC results TOptionValue *callValueGPU = new TOptionValue[OPT_N]; //"Theoretical" call values by Black-Scholes formula float *callValueBS = new float[OPT_N]; //Solver config TOptionPlan *optionSolver = new TOptionPlan[GPU_N]; //OS thread ID CUTThread *threadID = new CUTThread[GPU_N]; int gpuBase, gpuIndex; int i; float time; double delta, ref, sumDelta, sumRef, sumReserve; printf("MonteCarloMultiGPU\n"); printf("==================\n"); printf("Parallelization method = %s\n", use_threads ? "threaded" : "streamed"); printf("Problem scaling = %s\n", strongScaling? "strong" : "weak"); printf("Number of GPUs = %d\n", GPU_N); printf("Total number of options = %d\n", OPT_N); printf("Number of paths = %d\n", PATH_N); printf("main(): generating input data...\n"); srand(123); for (i=0; i < OPT_N; i++) { optionData[i].S = randFloat(5.0f, 50.0f); optionData[i].X = randFloat(10.0f, 25.0f); optionData[i].T = randFloat(1.0f, 5.0f); optionData[i].R = 0.06f; optionData[i].V = 0.10f; callValueGPU[i].Expected = -1.0f; callValueGPU[i].Confidence = -1.0f; } printf("main(): starting %i host threads...\n", GPU_N); //Get option count for each GPU for (i = 0; i < GPU_N; i++) { optionSolver[i].optionCount = OPT_N / GPU_N; } //Take into account cases with "odd" option counts for (i = 0; i < (OPT_N % GPU_N); i++) { optionSolver[i].optionCount++; } //Assign GPU option ranges gpuBase = 0; for (i = 0; i < GPU_N; i++) { optionSolver[i].device = i; optionSolver[i].optionData = optionData + gpuBase; optionSolver[i].callValue = callValueGPU + gpuBase; // all devices use the same global seed, but start // the sequence at a different offset optionSolver[i].seed = SEED; optionSolver[i].pathN = PATH_N; gpuBase += optionSolver[i].optionCount; } if (use_threads || bqatest) { //Start CPU thread for each GPU for (gpuIndex = 0; gpuIndex < GPU_N; gpuIndex++) { threadID[gpuIndex] = cutStartThread((CUT_THREADROUTINE)solverThread, &optionSolver[gpuIndex]); } printf("main(): waiting for GPU results...\n"); cutWaitForThreads(threadID, GPU_N); printf("main(): GPU statistics, threaded\n"); for (i = 0; i < GPU_N; i++) { cudaDeviceProp deviceProp; checkCudaErrors(cudaGetDeviceProperties(&deviceProp, optionSolver[i].device)); printf("GPU Device #%i: %s\n", optionSolver[i].device, deviceProp.name); printf("Options : %i\n", optionSolver[i].optionCount); printf("Simulation paths: %i\n", optionSolver[i].pathN); time = sdkGetTimerValue(&hTimer[i]); printf("Total time (ms.): %f\n", time); printf("Options per sec.: %f\n", OPT_N / (time * 0.001)); } printf("main(): comparing Monte Carlo and Black-Scholes results...\n"); sumDelta = 0; sumRef = 0; sumReserve = 0; for (i = 0; i < OPT_N; i++) { BlackScholesCall(callValueBS[i], optionData[i]); delta = fabs(callValueBS[i] - callValueGPU[i].Expected); ref = callValueBS[i]; sumDelta += delta; sumRef += fabs(ref); if (delta > 1e-6) { sumReserve += callValueGPU[i].Confidence / delta; } #ifdef PRINT_RESULTS printf("BS: %f; delta: %E\n", callValueBS[i], delta); #endif } sumReserve /= OPT_N; } if (!use_threads || bqatest) { multiSolver(optionSolver, GPU_N); printf("main(): GPU statistics, streamed\n"); for (i = 0; i < GPU_N; i++) { cudaDeviceProp deviceProp; checkCudaErrors(cudaGetDeviceProperties(&deviceProp, optionSolver[i].device)); printf("GPU Device #%i: %s\n", optionSolver[i].device, deviceProp.name); printf("Options : %i\n", optionSolver[i].optionCount); printf("Simulation paths: %i\n", optionSolver[i].pathN); } time = sdkGetTimerValue(&hTimer[0]); printf("\nTotal time (ms.): %f\n", time); printf("\tNote: This is elapsed time for all to compute.\n"); printf("Options per sec.: %f\n", OPT_N / (time * 0.001)); printf("main(): comparing Monte Carlo and Black-Scholes results...\n"); sumDelta = 0; sumRef = 0; sumReserve = 0; for (i = 0; i < OPT_N; i++) { BlackScholesCall(callValueBS[i], optionData[i]); delta = fabs(callValueBS[i] - callValueGPU[i].Expected); ref = callValueBS[i]; sumDelta += delta; sumRef += fabs(ref); if (delta > 1e-6) { sumReserve += callValueGPU[i].Confidence / delta; } #ifdef PRINT_RESULTS printf("BS: %f; delta: %E\n", callValueBS[i], delta); #endif } sumReserve /= OPT_N; } #ifdef DO_CPU printf("main(): running CPU MonteCarlo...\n"); TOptionValue callValueCPU; sumDelta = 0; sumRef = 0; for (i = 0; i < OPT_N; i++) { MonteCarloCPU( callValueCPU, optionData[i], NULL, PATH_N ); delta = fabs(callValueCPU.Expected - callValueGPU[i].Expected); ref = callValueCPU.Expected; sumDelta += delta; sumRef += fabs(ref); printf("Exp : %f | %f\t", callValueCPU.Expected, callValueGPU[i].Expected); printf("Conf: %f | %f\n", callValueCPU.Confidence, callValueGPU[i].Confidence); } printf("L1 norm: %E\n", sumDelta / sumRef); #endif printf("Shutting down...\n"); for (int i=0; i<GPU_N; i++) { sdkStartTimer(&hTimer[i]); checkCudaErrors(cudaSetDevice(i)); // cudaDeviceReset causes the driver to clean up all state. While // not mandatory in normal operation, it is good practice. It is also // needed to ensure correct operation when the application is being // profiled. Calling cudaDeviceReset causes all profile data to be // flushed before the application exits cudaDeviceReset(); } delete[] optionSolver; delete[] callValueBS; delete[] callValueGPU; delete[] optionData; delete[] threadID; delete[] hTimer; printf("Test Summary...\n"); printf("L1 norm : %E\n", sumDelta / sumRef); printf("Average reserve: %f\n", sumReserve); printf(sumReserve > 1.0f ? "Test passed\n" : "Test failed!\n"); exit(sumReserve > 1.0f ? EXIT_SUCCESS : EXIT_FAILURE); }
int main() { double spot = 100;double strike = 100;double sigma = 0.2;double r = 0.01;double T = 1; int steps[5]{100,500,1000,2000,5000}; double BS = BlackScholesCall(spot, strike, sigma, T, r); //CRR tree CRRtree* CRR[5]; double CRRtree_zero[5]; double CRRtree_intrinsic[5]; double CRRtree_linear[5]; for (int i = 0; i < 5; i++) { CRR[i]= new CRRtree(spot, strike, sigma, r, T, steps[i]); CRR[i]->build_Tree(); CRR[i]->finalprice(); CRRtree_zero[i] = CRR[i]->getprice_zeropruning(); CRRtree_intrinsic[i] = CRR[i]->getprice_intrinsic(); CRRtree_linear[i] = CRR[i]->getprice_linear(); } double CRR_error_zero[5]; double CRR_error_intrinsic[5]; double CRR_error_linear[5]; for (int i = 0; i < 5; i++) { CRR_error_zero[i] = CRRtree_zero[i] - BS; cout <<i<<"-thCRRerror1"<< CRR_error_zero[i] <<endl; CRR_error_intrinsic[i] = CRRtree_intrinsic[i] - BS; cout <<i<<"-thCRRerror2"<< CRR_error_intrinsic[i] << endl; CRR_error_linear[i] = CRRtree_linear[i] - BS; cout <<i<<"-thCRRerror3"<< CRR_error_linear[i] << endl; } // TIan tree Tiantree* tian[5]; double TIAN_setzero[5]; double TIAN_intrinsic[5]; double TIAN_linear[5]; for (int i = 0; i < 5; i++) { tian[i] = new Tiantree(spot, strike, sigma, r, T, steps[i]); tian[i]->build_Tree(); tian[i]->finalprice(); TIAN_setzero[i] = tian[i]->getprice_zeropruning(); TIAN_intrinsic[i] = tian[i]->getprice_intrinsic(); TIAN_linear[i] = tian[i]->getprice_linear(); } //error TIAN double TIAN_error_setzero[5]; double TIAN_error_intrinsic[5]; double TIAN_error_linear[5]; for (int i = 0; i < 5; i++) { TIAN_error_setzero[i] = TIAN_setzero[i] - BS; cout <<i<<"-th Tianerror1"<< TIAN_error_setzero[i] << endl; TIAN_error_intrinsic[i] = TIAN_intrinsic[i] - BS; cout <<i<<"-th Tianerror2"<< TIAN_error_intrinsic[i] << endl; TIAN_error_linear[i] = TIAN_linear[i] - BS; cout <<i<<"-th Tianerror3"<< TIAN_error_linear[i] << endl; } //Trinomial tree cout << "Trinomial model:" << endl; TRI_Tree* TRI[5]; double TRI_setzero[5]; double TRI_intrinsic[5]; double TRI_linear[5]; for (int i = 0; i < 5; i++) { TRI[i] = new TRI_Tree(spot, strike, sigma, r, T, steps[i]); TRI[i]->buildTree(); TRI[i]->finalprice(); TRI_setzero[i] = TRI[i]->getprice_zeropruning(); TRI_intrinsic[i] =TRI[i]->getprice_intrinsic(); TRI_linear[i] = TRI[i]->getprice_linear(); } //error Trinomial double TRI_error_zero[5]; double TRI_error_intrinsic[5]; double TRI_error_linear[5]; for (int i = 0; i < 5; i++) { TRI_error_zero[i] = TRI_setzero[i] - BS; cout <<i<<"-th TRIerror1"<< TRI_error_zero[i] << endl; TRI_error_intrinsic[i] = TRI_intrinsic[i] - BS; cout <<i<<"-th TRIerror1"<<TRI_error_intrinsic[i] << endl; TRI_error_linear[i] = TRI_linear[i] - BS; cout <<i<<"-th TRIerror1"<< TRI_error_linear[i] << endl; } }
//////////////////////////////////////////////////////////////////////////////// // Main program //////////////////////////////////////////////////////////////////////////////// int main(int argc, char **argv){ const unsigned int OPT_N_MAX = 512; unsigned int useDoublePrecision; printf("[binomialOptions]\n"); int devID = cutilDeviceInit(argc, argv); if (devID < 0) { printf("exiting...\n"); cutilExit(argc, argv); exit(0); } cutilSafeCall(cudaGetDevice(&devID)); cudaDeviceProp deviceProp; cutilSafeCall(cudaGetDeviceProperties(&deviceProp, devID)); char *precisionChoice; cutGetCmdLineArgumentstr(argc, (const char **)argv, "type", &precisionChoice); if(precisionChoice == NULL) { useDoublePrecision = 0; } else { if(!strcasecmp(precisionChoice, "double")) useDoublePrecision = 1; else useDoublePrecision = 0; } printf(useDoublePrecision ? "Using double precision...\n" : "Using single precision...\n"); const int OPT_N = deviceEmulation() ? 1 : OPT_N_MAX; TOptionData optionData[OPT_N_MAX]; float callValueBS[OPT_N_MAX], callValueGPU[OPT_N_MAX], callValueCPU[OPT_N_MAX]; double sumDelta, sumRef, gpuTime, errorVal; unsigned int hTimer; int i; cutilCheckError( cutCreateTimer(&hTimer) ); int version = deviceProp.major * 10 + deviceProp.minor; if(useDoublePrecision && version < 13){ printf("Double precision is not supported.\n"); return 0; } printf("Generating input data...\n"); //Generate options set srand(123); for(i = 0; i < OPT_N; i++){ optionData[i].S = randData(5.0f, 30.0f); optionData[i].X = randData(1.0f, 100.0f); optionData[i].T = randData(0.25f, 10.0f); optionData[i].R = 0.06f; optionData[i].V = 0.10f; BlackScholesCall(callValueBS[i], optionData[i]); } printf("Running GPU binomial tree...\n"); cutilSafeCall( cudaThreadSynchronize() ); cutilCheckError( cutResetTimer(hTimer) ); cutilCheckError( cutStartTimer(hTimer) ); if(useDoublePrecision) binomialOptions_SM13(callValueGPU, optionData, OPT_N); else binomialOptions_SM10(callValueGPU, optionData, OPT_N); cutilSafeCall( cudaThreadSynchronize() ); cutilCheckError( cutStopTimer(hTimer) ); gpuTime = cutGetTimerValue(hTimer); printf("Options count : %i \n", OPT_N); printf("Time steps : %i \n", NUM_STEPS); printf("binomialOptionsGPU() time: %f msec\n", gpuTime); printf("Options per second : %f \n", OPT_N / (gpuTime * 0.001)); printf("Running CPU binomial tree...\n"); for(i = 0; i < OPT_N; i++) binomialOptionsCPU(callValueCPU[i], optionData[i]); printf("Comparing the results...\n"); sumDelta = 0; sumRef = 0; printf("GPU binomial vs. Black-Scholes\n"); for(i = 0; i < OPT_N; i++){ sumDelta += fabs(callValueBS[i] - callValueGPU[i]); sumRef += fabs(callValueBS[i]); } if(sumRef >1E-5) printf("L1 norm: %E\n", sumDelta / sumRef); else printf("Avg. diff: %E\n", sumDelta / (double)OPT_N); printf("CPU binomial vs. Black-Scholes\n"); sumDelta = 0; sumRef = 0; for(i = 0; i < OPT_N; i++){ sumDelta += fabs(callValueBS[i]- callValueCPU[i]); sumRef += fabs(callValueBS[i]); } if(sumRef >1E-5) printf("L1 norm: %E\n", sumDelta / sumRef); else printf("Avg. diff: %E\n", sumDelta / (double)OPT_N); printf("CPU binomial vs. GPU binomial\n"); sumDelta = 0; sumRef = 0; for(i = 0; i < OPT_N; i++){ sumDelta += fabs(callValueGPU[i] - callValueCPU[i]); sumRef += callValueCPU[i]; } if(sumRef > 1E-5) printf("L1 norm: %E\n", errorVal = sumDelta / sumRef); else printf("Avg. diff: %E\n", errorVal = sumDelta / (double)OPT_N); printf("Shutting down...\n"); printf("\n[binomialOptions] - Test Summary:\n"); printf((errorVal < 5e-4) ? "PASSED\n" : "FAILED\n"); cutilCheckError( cutDeleteTimer(hTimer) ); cudaThreadExit(); cutilExit(argc, argv); }
//////////////////////////////////////////////////////////////////////////////// // Main program //////////////////////////////////////////////////////////////////////////////// int main(int argc, char **argv) { printf("[%s] - Starting...\n", argv[0]); cudaDeviceProp deviceProp; int devID = findCudaDevice(argc, (const char **)argv); checkCudaErrors(cudaGetDeviceProperties(&deviceProp, devID)); if (((deviceProp.major << 4) + deviceProp.minor) < 0x20) { fprintf(stderr, "binomialOptions requires Compute Capability of SM 2.0 or higher to run.\n"); cudaDeviceReset(); exit(EXIT_WAIVED); } const int OPT_N = MAX_OPTIONS; TOptionData optionData[MAX_OPTIONS]; real callValueBS[MAX_OPTIONS], callValueGPU[MAX_OPTIONS], callValueCPU[MAX_OPTIONS]; real sumDelta, sumRef, gpuTime, errorVal; StopWatchInterface *hTimer = NULL; int i; sdkCreateTimer(&hTimer); printf("Generating input data...\n"); //Generate options set srand(123); for (i = 0; i < OPT_N; i++) { optionData[i].S = randData(5.0f, 30.0f); optionData[i].X = randData(1.0f, 100.0f); optionData[i].T = randData(0.25f, 10.0f); optionData[i].R = 0.06f; optionData[i].V = 0.10f; BlackScholesCall(callValueBS[i], optionData[i]); } printf("Running GPU binomial tree...\n"); checkCudaErrors(cudaDeviceSynchronize()); sdkResetTimer(&hTimer); sdkStartTimer(&hTimer); binomialOptionsGPU(callValueGPU, optionData, OPT_N); checkCudaErrors(cudaDeviceSynchronize()); sdkStopTimer(&hTimer); gpuTime = sdkGetTimerValue(&hTimer); printf("Options count : %i \n", OPT_N); printf("Time steps : %i \n", NUM_STEPS); printf("binomialOptionsGPU() time: %f msec\n", gpuTime); printf("Options per second : %f \n", OPT_N / (gpuTime * 0.001)); printf("Running CPU binomial tree...\n"); for (i = 0; i < OPT_N; i++) { binomialOptionsCPU(callValueCPU[i], optionData[i]); } printf("Comparing the results...\n"); sumDelta = 0; sumRef = 0; printf("GPU binomial vs. Black-Scholes\n"); for (i = 0; i < OPT_N; i++) { sumDelta += fabs(callValueBS[i] - callValueGPU[i]); sumRef += fabs(callValueBS[i]); } if (sumRef >1E-5) { printf("L1 norm: %E\n", (double)(sumDelta / sumRef)); } else { printf("Avg. diff: %E\n", (double)(sumDelta / (real)OPT_N)); } printf("CPU binomial vs. Black-Scholes\n"); sumDelta = 0; sumRef = 0; for (i = 0; i < OPT_N; i++) { sumDelta += fabs(callValueBS[i]- callValueCPU[i]); sumRef += fabs(callValueBS[i]); } if (sumRef >1E-5) { printf("L1 norm: %E\n", sumDelta / sumRef); } else { printf("Avg. diff: %E\n", (double)(sumDelta / (real)OPT_N)); } printf("CPU binomial vs. GPU binomial\n"); sumDelta = 0; sumRef = 0; for (i = 0; i < OPT_N; i++) { sumDelta += fabs(callValueGPU[i] - callValueCPU[i]); sumRef += callValueCPU[i]; } if (sumRef > 1E-5) { printf("L1 norm: %E\n", errorVal = sumDelta / sumRef); } else { printf("Avg. diff: %E\n", (double)(sumDelta / (real)OPT_N)); } printf("Shutting down...\n"); sdkDeleteTimer(&hTimer); // cudaDeviceReset causes the driver to clean up all state. While // not mandatory in normal operation, it is good practice. It is also // needed to ensure correct operation when the application is being // profiled. Calling cudaDeviceReset causes all profile data to be // flushed before the application exits cudaDeviceReset(); printf("\nNOTE: The CUDA Samples are not meant for performance measurements. Results may vary when GPU Boost is enabled.\n\n"); if (errorVal > 5e-4) { printf("Test failed!\n"); exit(EXIT_FAILURE); } printf("Test passed\n"); exit(EXIT_SUCCESS); }