REAL value(const unsigned i, const REAL s0, const REAL t, const REAL alpha, const REAL nu, const REAL beta, const unsigned int numX, const unsigned int numY, const unsigned int numT) { REAL strike; PrivGlobs globs(numX, numY, numT); strike = 0.001*i; initGrid(s0, alpha, nu, t, numX, numY, numT, globs); initOperator(globs.myX, globs.myDxx); initOperator(globs.myY, globs.myDyy); setPayoff(strike, globs); for(int i = globs.myTimeline.size()-2; i >= 0; i--) { updateParams(i,alpha,beta,nu,globs); rollback(i, globs); } return globs.myResult[globs.myXindex][globs.myYindex]; }
REAL value( PrivGlobs globs, const REAL s0, const REAL strike, const REAL t, const REAL alpha, const REAL nu, const REAL beta, const unsigned int numX, const unsigned int numY, const unsigned int numT ) { initGrid(s0,alpha,nu,t, numX, numY, numT, globs); initOperator(globs.myX,globs.myDxx); initOperator(globs.myY,globs.myDyy); setPayoff(strike, globs); // globs is global and cannot be privatized thus this loop cannot be // parallelized yet. // If updateParams and rollback is independent on i and globs, loop can be // parallelized by privatization of initGrid, initOperator and setPayoff calls. // If they write indepedently to globs, privatization is not needed. for(int i = globs.myTimeline.size()-2;i>=0;--i) // seq { updateParams(i,alpha,beta,nu,globs); rollback(i, globs); } return globs.myResult[globs.myXindex][globs.myYindex]; }
void run_OrigCPU(const unsigned int& outer,const unsigned int& numX, const unsigned int& numY,const unsigned int& numT, const REAL& s0,const REAL& t,const REAL& alpha, const REAL& nu,const REAL& beta,REAL* res) // [outer] RESULT { PrivGlobs globals(numX, numY, numT, outer); initGrid(s0,alpha,nu,t, numX, numY, numT, globals); initOperator(globals.myX, globals.numX, globals.myDxx); initOperator(globals.myY, globals.numY, globals.myDyy); setPayoff(globals, outer); for(int g = numT-2;g>=0;--g) { updateParams(g,alpha,beta,nu,globals, outer); rollback(g, globals, outer, numX, numY); } for (unsigned int i = 0; i < outer; i++) { res[i] = globals.myResult[i * globals.numM + globals.myXindex * numY + globals.myYindex]; } }
void run_GPU( const unsigned int& outer, const unsigned int& numX, const unsigned int& numY, const unsigned int& numT, const REAL& s0, const REAL& t, const REAL& alpha, const REAL& nu, const REAL& beta, REAL* res // [outer] RESULT ) { /* // Outerloop - Technically parallelizable, but restricts further // parallization further in. // If strike and globs is privatized, the loop can be parallelized. // Value is the limiting factor since most of the actual work is deeper in // the function. // Sequential loop (value) in between parallel loops (this loop). // Move seq to outer loop via array expansion (globs) and distribution. #pragma omp parallel for default(shared) schedule(static) if(outer>8) for( unsigned i = 0; i < outer; ++ i ) { REAL strike = 0.001*i; PrivGlobs globs(numX, numY, numT); res[i] = value( globs, s0, strike, t, alpha, nu, beta, numX, numY, numT ); }*/ // globs array expanded. Init moved to individual parallel loop //vector<PrivGlobs> globs(outer, PrivGlobs(numX, numY, numT)); // globs array expanded. Init moved to individual parallel loop //vector<PrivGlobs> globs(outer, PrivGlobs(numX, numY, numT)); PrivGlobs *globs = (PrivGlobs*) malloc(outer*sizeof(struct PrivGlobs)); #pragma omp parallel for default(shared) schedule(static) if(outer>8) for(int i = 0 ; i < outer ; i++) { globs[i] = PrivGlobs(numX,numY,numT); } #pragma omp parallel for default(shared) schedule(static) if(outer>8) for( unsigned i = 0; i < outer; ++ i ) { //par initGrid(s0,alpha,nu,t, numX, numY, numT, globs[i]); initOperator(globs[i].myX, globs[i].myXsize, globs[i].myDxx, globs[i].myDxxCols); initOperator(globs[i].myY, globs[i].myYsize, globs[i].myDyy, globs[i].myDyyCols); setPayoff(0.001*i, globs[i]); } //printFlatMatrix(globs[0].myX, 32, 1); //printVectMatrix(globs[0].myDxx, 32, 4); //printFlatMatrix(globs[0].myDxx, 32, 4); // sequential loop distributed. for(int i = numT-2;i>=0;--i){ //seq // inner loop parallel on each outer (par) instead of each time step (seq). #pragma omp parallel for default(shared) schedule(static) if(outer>8) for( unsigned j = 0; j < outer; ++ j ) { //par updateParams(i,alpha,beta,nu,globs[j]); rollback(i, globs[j]); } } // parallel assignment of results. #pragma omp parallel for default(shared) schedule(static) if(outer>8) for( unsigned j = 0; j < outer; ++ j ) { //par res[j] = globs[j].myResult[idx2d(globs[j].myXindex,globs[j].myYindex,globs[j].myResultCols)]; } //TODO: Free all struct and their pointers. }