////////////////////////////////////////////////////////////////////////////////
// Process an array of optN options
////////////////////////////////////////////////////////////////////////////////
extern "C" void BlackScholesCPU(
    float *h_Call, //Call option price
    float *h_Put,  //Put option price
    float *h_S,    //Current stock price
    float *h_X,    //Option strike price
    float *h_T,    //Option years
    float R,       //Riskless rate of return
    float V,       //Stock volatility
    unsigned int optionCount
) {
    for(unsigned int i = 0; i < optionCount; i++)
        BlackScholesBodyCPU(
            h_Call[i],
            h_Put[i],
            h_S[i],
            h_X[i],
            h_T[i],
            R,
            V
        );
}
////////////////////////////////////////////////////////////////////////////////
// Process an array of optN options
////////////////////////////////////////////////////////////////////////////////
extern "C" void BlackScholesCPU(
    float *h_CallResult,
    float *h_PutResult,
    float *h_StockPrice,
    float *h_OptionStrike,
    float *h_OptionYears,
    float Riskfree,
    float Volatility,
    int optN
){
    for(int opt = 0; opt < optN; opt++)
        BlackScholesBodyCPU(
            h_CallResult[opt],
            h_PutResult[opt],
            h_StockPrice[opt],
            h_OptionStrike[opt],
            h_OptionYears[opt],
            Riskfree,
            Volatility
        );
}
Example #3
0
int main(int argc, char* argv[])
{
    double
	sTime, eTime;

    double sum_delta  = 0.0;
    double sum_ref    = 0.0;
    double max_delta  = 0.0;
    double sumReserve = 0.0;

    printf("Monte Carlo European Option Pricing Single Precision\n\n");
    printf("Compiler Version  = %d\n", __INTEL_COMPILER/100);
    printf("Release Update    = %d\n", __INTEL_COMPILER_UPDATE);
    printf("Build Time        = %s %s\n", __DATE__, __TIME__);
    printf("Path Length       = %d\n", RAND_N);
    printf("Number of Options = %d\n", OPT_N);
    printf("Block Size        = %d\n", RAND_BLOCK_LENGTH);
    printf("Worker Threads    = %d\n\n", NTHREADS);

    const int mem_size  = sizeof(float)*OPT_PER_THREAD;

#ifndef _OPENMP
    NTHREADS = 1;
#endif

    float *samples[MAX_THREADS];
    VSLStreamStatePtr Streams[MAX_THREADS];
    const int nblocks = RAND_N/RAND_BLOCK_LENGTH;
#pragma omp parallel reduction(+ : sum_delta) reduction(+ : sum_ref) reduction(+ : sumReserve) reduction(max : max_delta)
{
#ifdef _OPENMP
    int threadID = omp_get_thread_num();
#else
    int threadID = 0;
#endif
    unsigned int randseed = RANDSEED + threadID;
    srand(randseed);
    float *CallResultList     = (float *)scalable_aligned_malloc(mem_size, SIMDALIGN);
    float *CallConfidenceList = (float *)scalable_aligned_malloc(mem_size, SIMDALIGN);
    float *StockPriceList     = (float *)scalable_aligned_malloc(mem_size, SIMDALIGN);
    float *OptionStrikeList   = (float *)scalable_aligned_malloc(mem_size, SIMDALIGN);
    float *OptionYearsList    = (float *)scalable_aligned_malloc(mem_size, SIMDALIGN);
    for(int i = 0; i < OPT_PER_THREAD; i++)
    {
        CallResultList[i]     = 0.0f;
        CallConfidenceList[i] = 0.0f;
        StockPriceList[i]     = RandFloat_T(5.0f, 50.0f, &randseed);
        OptionStrikeList[i]   = RandFloat_T(10.0f, 25.0f, &randseed);
        OptionYearsList[i]    = RandFloat_T(1.0f, 5.0f, &randseed);
    }
    samples[threadID] = (float *)scalable_aligned_malloc(RAND_BLOCK_LENGTH * sizeof(float), SIMDALIGN);
    vslNewStream(&(Streams[threadID]), VSL_BRNG_MT2203 + threadID, RANDSEED);

#pragma omp barrier
    if (threadID == 0)
    {
        printf("Starting options pricing...\n");
        sTime = second();
        start_cyc = _rdtsc();
    }

    for(int opt = 0; opt < OPT_PER_THREAD; opt++)
    {
        const float VBySqrtT = VLog2E * sqrtf(OptionYearsList[opt]);
	const float MuByT    = MuLog2E * OptionYearsList[opt];
        const float Y        = StockPriceList[opt];
        const float Z        = OptionStrikeList[opt];
		            
        float v0 = 0.0f;
        float v1 = 0.0f;
        for(int block = 0; block < nblocks; ++block)
        {
            float *rand = samples[threadID];
            vsRngGaussian (VSL_RNG_METHOD_GAUSSIAN_ICDF, Streams[threadID], RAND_BLOCK_LENGTH, rand, MuByT, VBySqrtT); 
#pragma vector aligned
#pragma simd reduction(+:v0) reduction(+:v1)
#pragma unroll(4)
            for(int i=0; i < RAND_BLOCK_LENGTH; i++) 
            {
                float callValue  = Y * exp2f(rand[i]) - Z;
                callValue = (callValue > 0.0) ? callValue : 0.0;
                v0 += callValue;
                v1 += callValue * callValue;
            }
        }
        const float  exprt      = exp2f(RLog2E*OptionYearsList[opt]);
        CallResultList[opt]     = exprt * v0 * INV_RAND_N;
        const float  stdDev     = sqrtf((F_RAND_N * v1 - v0 * v0) * STDDEV_DENOM);
        CallConfidenceList[opt] = (float)(exprt * stdDev * CONFIDENCE_DENOM);
    } //end of opt 

#pragma omp barrier
    if (threadID == 0) {
        end_cyc = _rdtsc();
        eTime = second();
        printf("Parallel simulation completed in %f seconds.\n", eTime-sTime);
        printf("Validating the result...\n");
    }

    double delta = 0.0, ref = 0.0, L1norm = 0.0;
    int max_index = 0;
    double max_local  = 0.0;
    for(int i = 0; i < OPT_PER_THREAD; i++)
    {
        double callReference, putReference;
        BlackScholesBodyCPU(
            callReference,
            putReference,
            StockPriceList[i],
            OptionStrikeList[i], OptionYearsList[i],  RISKFREE, VOLATILITY );
        ref   = callReference;
        delta = fabs(callReference - CallResultList[i]);
        sum_delta += delta;
        sum_ref   += fabs(ref);
        if(delta > 1e-6)
             sumReserve += CallConfidenceList[i] / delta;
        max_local = delta>max_local? delta: max_local;
    }
    max_delta = max_local>max_delta? max_local: max_delta;
    vslDeleteStream(&(Streams[threadID]));
    scalable_aligned_free(samples[threadID]);
    scalable_aligned_free(CallResultList);
    scalable_aligned_free(CallConfidenceList);
    scalable_aligned_free(StockPriceList);
    scalable_aligned_free(OptionStrikeList);
    scalable_aligned_free(OptionYearsList);
}//end of parallel block

    sumReserve          /= (double)OPT_N;
    const double L1norm  = sum_delta / sum_ref;

    printf("L1_Norm          = %4.3E\n", L1norm);
    printf("Average RESERVE  = %4.3f\n", sumReserve);
    printf("Max Error        = %4.3E\n", max_delta);

    const unsigned long long cyc       = end_cyc - start_cyc;
    const double             optcyc    = (double)cyc/(double)OPT_N;

    printf("==========================================\n");
    printf("Total Cycles = %lld\n", cyc);
    printf("Cyc/opt      = %8.3f\n", optcyc);
    printf("Time Elapsed = %8.3f\n", eTime-sTime);
    printf("Options/sec  = %8.3f\n", OPT_N/(eTime-sTime));
    printf("==========================================\n");
    return 0;
}