JNIEXPORT jint JNICALL Java_edu_berkeley_bid_VSL_vsRngGaussian (JNIEnv * env, jobject calling_obj, jint method, jobject j_stream, jint n, jfloatArray j_r, jfloat a, jfloat b) { VSLStreamStatePtr stream = getStream(env, calling_obj, j_stream); jfloat * r = (*env)->GetPrimitiveArrayCritical(env, j_r, JNI_FALSE); jint retval = vsRngGaussian(method, stream, n, r, a, b); (*env)->ReleasePrimitiveArrayCritical(env, j_r, r, 0); return retval; }
void Random<CPU>::gaussian (float *data, int size, const float mu, const float sigma) const { CHECK (sigma > 0.f); rand_check (vsRngGaussian (0, vStream_, size, data, mu, sigma)); // TODO }
void pnlRandNormal( int numElem, float* vec, float mean, float sigma ) { vsRngGaussian( VSL_METHOD_SGAUSSIAN_BOXMULLER2, g_RNG.m_vslStream, numElem, vec, mean, (float)sqrt(sigma) ); }
float pnlRandNormal( float mean, float sigma ) { float val = 0; vsRngGaussian( VSL_METHOD_SGAUSSIAN_BOXMULLER2, g_RNG.m_vslStream, 1, &val, mean, (float)sqrt(sigma) ); return val; }
int main(int argc, char* argv[]) { // construct lattice unsigned int rows = 1; unsigned int columns = 300; lattice_t* lattice = lattice_create(rows, columns, periodic, periodic, periodic, periodic); // initialise lattice positioning unsigned int const kNumStdDevs = 5; unsigned int const kStdDevsRepeatCount = 1; unsigned int const kRepeatCount = 1000; double stddevs[] = { 0.1, 0.2, 0.3, 0.4, 0.5 }; unsigned int const kTimeSetsNum = 3; unsigned int timeSets[] = { 200, 500, 1000 }; // initialise random number storage VSLStreamStatePtr stream; float randomNumbers[columns]; // initialise temporary node storage double xPosition; double yPosition = 0.0; coordinate_t coord; // initialise loop variables bool trackedLatticeLayout; char latticeLayoutFileName[50]; char latticeProfileFilename[100]; // initialise agent tracking information unsigned int numTrackedAgents = 0; coordinate_t* trackedPositions = NULL; int* trackedAgentIds = NULL; // set motility properties double motilityProbability = 1.0; double xShiftPreference = 0; double yShiftPreference = 0; bool agentExclusion = false; // generation random lattices for (int stdDevIndex = 0; stdDevIndex < kNumStdDevs; stdDevIndex++) { for (int boundRepeatCount = 0; boundRepeatCount < kStdDevsRepeatCount; boundRepeatCount++) { // generate any required random numbers (uniform dist) vslNewStream(&stream, BRNG, arc4random()); vsRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER, stream, columns, randomNumbers, 0.0f, stddevs[stdDevIndex]); // perturb and sort node locations for (int col = 0; col < columns; col++) { randomNumbers[col] += col; } qsort(randomNumbers, columns, sizeof(float), compare); // specify node locations for (int row = 0; row < rows; row++) { for (int col = 0; col < columns; col++) { coord.row = row; coord.column = col; xPosition = (double)randomNumbers[col]; lattice_specify_position(lattice, coord, xPosition, yPosition); } } // save node locations bool saveNodeLocations = true; if (saveNodeLocations) { sprintf(latticeLayoutFileName, "node_positions_%0.02f_%d_ghosts.txt", stddevs[stdDevIndex], boundRepeatCount); trackedLatticeLayout = lattice_parser_node_positions(lattice, rows, columns, latticeLayoutFileName, "output/"); if (!trackedLatticeLayout) { printf("Error: failed storing lattice layout information (case: %0.02f %d).\n", stddevs[stdDevIndex], boundRepeatCount); } } // perform simulations bool performSimulation = true; if (performSimulation) { // perform simulations for (int repeatCount = 0; repeatCount < kRepeatCount; repeatCount++) { // populate lattice int* agentId; int currentAgentId = 1; coordinate_t agentPos; for (int j = 130; j < 171; j++) { agentId = malloc(sizeof(int)); *agentId = currentAgentId++; agentPos.row = 0; agentPos.column = j; lattice_push_agent(lattice, agentPos, agentId); } // perform simulation for (int timeStep = 0; timeStep < timeSets[kTimeSetsNum-1]; timeStep++) { performMotilityEvents(lattice, rows, columns, motilityProbability, xShiftPreference, yShiftPreference, agentExclusion, trackedAgentIds, numTrackedAgents, trackedPositions); for (int j = 0; j < kTimeSetsNum; j++) { if (timeStep == timeSets[j]-1) { // store lattice profile sprintf(latticeProfileFilename, "lattice_profile_%0.02f_%d_%d_%d_ghosts.txt", stddevs[stdDevIndex], boundRepeatCount, repeatCount, timeStep+1); bool isTracked = lattice_occupancy_parser(lattice, rows, columns, latticeProfileFilename, "output/"); if (!isTracked) { printf("Error: failed to store lattice profile.\n"); } } } } // clear lattice and deallocate memory lattice_clear(lattice, rows, columns, true); } } } } // deallocate memory lattice_destroy(&lattice, rows, columns, true); return EXIT_SUCCESS; }
int main(int argc, char* argv[]) { double sTime, eTime; double sum_delta = 0.0; double sum_ref = 0.0; double max_delta = 0.0; double sumReserve = 0.0; printf("Monte Carlo European Option Pricing Single Precision\n\n"); printf("Compiler Version = %d\n", __INTEL_COMPILER/100); printf("Release Update = %d\n", __INTEL_COMPILER_UPDATE); printf("Build Time = %s %s\n", __DATE__, __TIME__); printf("Path Length = %d\n", RAND_N); printf("Number of Options = %d\n", OPT_N); printf("Block Size = %d\n", RAND_BLOCK_LENGTH); printf("Worker Threads = %d\n\n", NTHREADS); const int mem_size = sizeof(float)*OPT_PER_THREAD; #ifndef _OPENMP NTHREADS = 1; #endif float *samples[MAX_THREADS]; VSLStreamStatePtr Streams[MAX_THREADS]; const int nblocks = RAND_N/RAND_BLOCK_LENGTH; #pragma omp parallel reduction(+ : sum_delta) reduction(+ : sum_ref) reduction(+ : sumReserve) reduction(max : max_delta) { #ifdef _OPENMP int threadID = omp_get_thread_num(); #else int threadID = 0; #endif unsigned int randseed = RANDSEED + threadID; srand(randseed); float *CallResultList = (float *)scalable_aligned_malloc(mem_size, SIMDALIGN); float *CallConfidenceList = (float *)scalable_aligned_malloc(mem_size, SIMDALIGN); float *StockPriceList = (float *)scalable_aligned_malloc(mem_size, SIMDALIGN); float *OptionStrikeList = (float *)scalable_aligned_malloc(mem_size, SIMDALIGN); float *OptionYearsList = (float *)scalable_aligned_malloc(mem_size, SIMDALIGN); for(int i = 0; i < OPT_PER_THREAD; i++) { CallResultList[i] = 0.0f; CallConfidenceList[i] = 0.0f; StockPriceList[i] = RandFloat_T(5.0f, 50.0f, &randseed); OptionStrikeList[i] = RandFloat_T(10.0f, 25.0f, &randseed); OptionYearsList[i] = RandFloat_T(1.0f, 5.0f, &randseed); } samples[threadID] = (float *)scalable_aligned_malloc(RAND_BLOCK_LENGTH * sizeof(float), SIMDALIGN); vslNewStream(&(Streams[threadID]), VSL_BRNG_MT2203 + threadID, RANDSEED); #pragma omp barrier if (threadID == 0) { printf("Starting options pricing...\n"); sTime = second(); start_cyc = _rdtsc(); } for(int opt = 0; opt < OPT_PER_THREAD; opt++) { const float VBySqrtT = VLog2E * sqrtf(OptionYearsList[opt]); const float MuByT = MuLog2E * OptionYearsList[opt]; const float Y = StockPriceList[opt]; const float Z = OptionStrikeList[opt]; float v0 = 0.0f; float v1 = 0.0f; for(int block = 0; block < nblocks; ++block) { float *rand = samples[threadID]; vsRngGaussian (VSL_RNG_METHOD_GAUSSIAN_ICDF, Streams[threadID], RAND_BLOCK_LENGTH, rand, MuByT, VBySqrtT); #pragma vector aligned #pragma simd reduction(+:v0) reduction(+:v1) #pragma unroll(4) for(int i=0; i < RAND_BLOCK_LENGTH; i++) { float callValue = Y * exp2f(rand[i]) - Z; callValue = (callValue > 0.0) ? callValue : 0.0; v0 += callValue; v1 += callValue * callValue; } } const float exprt = exp2f(RLog2E*OptionYearsList[opt]); CallResultList[opt] = exprt * v0 * INV_RAND_N; const float stdDev = sqrtf((F_RAND_N * v1 - v0 * v0) * STDDEV_DENOM); CallConfidenceList[opt] = (float)(exprt * stdDev * CONFIDENCE_DENOM); } //end of opt #pragma omp barrier if (threadID == 0) { end_cyc = _rdtsc(); eTime = second(); printf("Parallel simulation completed in %f seconds.\n", eTime-sTime); printf("Validating the result...\n"); } double delta = 0.0, ref = 0.0, L1norm = 0.0; int max_index = 0; double max_local = 0.0; for(int i = 0; i < OPT_PER_THREAD; i++) { double callReference, putReference; BlackScholesBodyCPU( callReference, putReference, StockPriceList[i], OptionStrikeList[i], OptionYearsList[i], RISKFREE, VOLATILITY ); ref = callReference; delta = fabs(callReference - CallResultList[i]); sum_delta += delta; sum_ref += fabs(ref); if(delta > 1e-6) sumReserve += CallConfidenceList[i] / delta; max_local = delta>max_local? delta: max_local; } max_delta = max_local>max_delta? max_local: max_delta; vslDeleteStream(&(Streams[threadID])); scalable_aligned_free(samples[threadID]); scalable_aligned_free(CallResultList); scalable_aligned_free(CallConfidenceList); scalable_aligned_free(StockPriceList); scalable_aligned_free(OptionStrikeList); scalable_aligned_free(OptionYearsList); }//end of parallel block sumReserve /= (double)OPT_N; const double L1norm = sum_delta / sum_ref; printf("L1_Norm = %4.3E\n", L1norm); printf("Average RESERVE = %4.3f\n", sumReserve); printf("Max Error = %4.3E\n", max_delta); const unsigned long long cyc = end_cyc - start_cyc; const double optcyc = (double)cyc/(double)OPT_N; printf("==========================================\n"); printf("Total Cycles = %lld\n", cyc); printf("Cyc/opt = %8.3f\n", optcyc); printf("Time Elapsed = %8.3f\n", eTime-sTime); printf("Options/sec = %8.3f\n", OPT_N/(eTime-sTime)); printf("==========================================\n"); return 0; }