//////////////////////////////////////////////////////////////////////////////// // Process an array of optN options //////////////////////////////////////////////////////////////////////////////// extern "C" void BlackScholesCPU( float *h_Call, //Call option price float *h_Put, //Put option price float *h_S, //Current stock price float *h_X, //Option strike price float *h_T, //Option years float R, //Riskless rate of return float V, //Stock volatility unsigned int optionCount ) { for(unsigned int i = 0; i < optionCount; i++) BlackScholesBodyCPU( h_Call[i], h_Put[i], h_S[i], h_X[i], h_T[i], R, V ); }
//////////////////////////////////////////////////////////////////////////////// // Process an array of optN options //////////////////////////////////////////////////////////////////////////////// extern "C" void BlackScholesCPU( float *h_CallResult, float *h_PutResult, float *h_StockPrice, float *h_OptionStrike, float *h_OptionYears, float Riskfree, float Volatility, int optN ){ for(int opt = 0; opt < optN; opt++) BlackScholesBodyCPU( h_CallResult[opt], h_PutResult[opt], h_StockPrice[opt], h_OptionStrike[opt], h_OptionYears[opt], Riskfree, Volatility ); }
int main(int argc, char* argv[]) { double sTime, eTime; double sum_delta = 0.0; double sum_ref = 0.0; double max_delta = 0.0; double sumReserve = 0.0; printf("Monte Carlo European Option Pricing Single Precision\n\n"); printf("Compiler Version = %d\n", __INTEL_COMPILER/100); printf("Release Update = %d\n", __INTEL_COMPILER_UPDATE); printf("Build Time = %s %s\n", __DATE__, __TIME__); printf("Path Length = %d\n", RAND_N); printf("Number of Options = %d\n", OPT_N); printf("Block Size = %d\n", RAND_BLOCK_LENGTH); printf("Worker Threads = %d\n\n", NTHREADS); const int mem_size = sizeof(float)*OPT_PER_THREAD; #ifndef _OPENMP NTHREADS = 1; #endif float *samples[MAX_THREADS]; VSLStreamStatePtr Streams[MAX_THREADS]; const int nblocks = RAND_N/RAND_BLOCK_LENGTH; #pragma omp parallel reduction(+ : sum_delta) reduction(+ : sum_ref) reduction(+ : sumReserve) reduction(max : max_delta) { #ifdef _OPENMP int threadID = omp_get_thread_num(); #else int threadID = 0; #endif unsigned int randseed = RANDSEED + threadID; srand(randseed); float *CallResultList = (float *)scalable_aligned_malloc(mem_size, SIMDALIGN); float *CallConfidenceList = (float *)scalable_aligned_malloc(mem_size, SIMDALIGN); float *StockPriceList = (float *)scalable_aligned_malloc(mem_size, SIMDALIGN); float *OptionStrikeList = (float *)scalable_aligned_malloc(mem_size, SIMDALIGN); float *OptionYearsList = (float *)scalable_aligned_malloc(mem_size, SIMDALIGN); for(int i = 0; i < OPT_PER_THREAD; i++) { CallResultList[i] = 0.0f; CallConfidenceList[i] = 0.0f; StockPriceList[i] = RandFloat_T(5.0f, 50.0f, &randseed); OptionStrikeList[i] = RandFloat_T(10.0f, 25.0f, &randseed); OptionYearsList[i] = RandFloat_T(1.0f, 5.0f, &randseed); } samples[threadID] = (float *)scalable_aligned_malloc(RAND_BLOCK_LENGTH * sizeof(float), SIMDALIGN); vslNewStream(&(Streams[threadID]), VSL_BRNG_MT2203 + threadID, RANDSEED); #pragma omp barrier if (threadID == 0) { printf("Starting options pricing...\n"); sTime = second(); start_cyc = _rdtsc(); } for(int opt = 0; opt < OPT_PER_THREAD; opt++) { const float VBySqrtT = VLog2E * sqrtf(OptionYearsList[opt]); const float MuByT = MuLog2E * OptionYearsList[opt]; const float Y = StockPriceList[opt]; const float Z = OptionStrikeList[opt]; float v0 = 0.0f; float v1 = 0.0f; for(int block = 0; block < nblocks; ++block) { float *rand = samples[threadID]; vsRngGaussian (VSL_RNG_METHOD_GAUSSIAN_ICDF, Streams[threadID], RAND_BLOCK_LENGTH, rand, MuByT, VBySqrtT); #pragma vector aligned #pragma simd reduction(+:v0) reduction(+:v1) #pragma unroll(4) for(int i=0; i < RAND_BLOCK_LENGTH; i++) { float callValue = Y * exp2f(rand[i]) - Z; callValue = (callValue > 0.0) ? callValue : 0.0; v0 += callValue; v1 += callValue * callValue; } } const float exprt = exp2f(RLog2E*OptionYearsList[opt]); CallResultList[opt] = exprt * v0 * INV_RAND_N; const float stdDev = sqrtf((F_RAND_N * v1 - v0 * v0) * STDDEV_DENOM); CallConfidenceList[opt] = (float)(exprt * stdDev * CONFIDENCE_DENOM); } //end of opt #pragma omp barrier if (threadID == 0) { end_cyc = _rdtsc(); eTime = second(); printf("Parallel simulation completed in %f seconds.\n", eTime-sTime); printf("Validating the result...\n"); } double delta = 0.0, ref = 0.0, L1norm = 0.0; int max_index = 0; double max_local = 0.0; for(int i = 0; i < OPT_PER_THREAD; i++) { double callReference, putReference; BlackScholesBodyCPU( callReference, putReference, StockPriceList[i], OptionStrikeList[i], OptionYearsList[i], RISKFREE, VOLATILITY ); ref = callReference; delta = fabs(callReference - CallResultList[i]); sum_delta += delta; sum_ref += fabs(ref); if(delta > 1e-6) sumReserve += CallConfidenceList[i] / delta; max_local = delta>max_local? delta: max_local; } max_delta = max_local>max_delta? max_local: max_delta; vslDeleteStream(&(Streams[threadID])); scalable_aligned_free(samples[threadID]); scalable_aligned_free(CallResultList); scalable_aligned_free(CallConfidenceList); scalable_aligned_free(StockPriceList); scalable_aligned_free(OptionStrikeList); scalable_aligned_free(OptionYearsList); }//end of parallel block sumReserve /= (double)OPT_N; const double L1norm = sum_delta / sum_ref; printf("L1_Norm = %4.3E\n", L1norm); printf("Average RESERVE = %4.3f\n", sumReserve); printf("Max Error = %4.3E\n", max_delta); const unsigned long long cyc = end_cyc - start_cyc; const double optcyc = (double)cyc/(double)OPT_N; printf("==========================================\n"); printf("Total Cycles = %lld\n", cyc); printf("Cyc/opt = %8.3f\n", optcyc); printf("Time Elapsed = %8.3f\n", eTime-sTime); printf("Options/sec = %8.3f\n", OPT_N/(eTime-sTime)); printf("==========================================\n"); return 0; }