int main(int argc, char **argv) { OptionParser op; op.addOption("verbose", OPT_BOOL, "", "enable verbose output", 'v'); op.addOption("passes", OPT_INT, "10", "specify number of passes", 'n'); op.addOption("size", OPT_INT, "1", "specify problem size", 's'); op.addOption("target", OPT_INT, "0", "specify MIC target device number", 't'); // If benchmark has any specific options, add those addBenchmarkSpecOptions(op); if (!op.parse(argc, argv)) { op.usage(); return -1; } ResultDatabase resultDB; // Run the test RunBenchmark(op, resultDB); // Print out results to stdout resultDB.DumpDetailed(cout); return 0; }
// **************************************************************************** // Function: addBenchmarkSpecOptions // // Purpose: // Add benchmark specific options parsing. The user is allowed to specify // the size of the input data in megabytes. // // Arguments: // op: the options parser / parameter database // // Programmer: Collin McCurdy // Creation: September 08, 2009 // Returns: nothing // // **************************************************************************** void addBenchmarkSpecOptions(OptionParser &op) { op.addOption("pts", OPT_INT, "0", "data size (in megabytes)"); op.addOption("pts1", OPT_INT, "0", "data size (in megabytes)"); op.addOption("pts2", OPT_INT, "0", "data size (in megabytes)"); op.addOption("2D", OPT_BOOL, "false", "2D FFT"); }
// **************************************************************************** // Function: addBenchmarkSpecOptions // // Purpose: // Add benchmark specific options parsing. The user is allowed to specify // the size of the input data in megabytes. // // Arguments: // op: the options parser / parameter database // // Programmer: Collin McCurdy // Creation: September 08, 2009 // Returns: nothing // // **************************************************************************** void addBenchmarkSpecOptions(OptionParser &op) { op.addOption("MB", OPT_INT, "0", "data size (in megabytes)"); op.addOption("use-native", OPT_BOOL, "false", "call native (HW) versions of sin/cos"); op.addOption("dump-sp", OPT_BOOL, "false", "dump result after SP fft/ifft"); op.addOption("dump-dp", OPT_BOOL, "false", "dump result after DP fft/ifft"); }
// **************************************************************************** // Function: addBenchmarkSpecOptions // // Purpose: // Add benchmark specific options parsing. // // Arguments: // op: the options parser / parameter database // // Programmer: Lukasz Wesolowski // Creation: June 21, 2010 // Returns: nothing // // **************************************************************************** void addBenchmarkSpecOptions(OptionParser &op) { op.addOption("iterations", OPT_INT, "100", "Number of SpMV iterations " "per pass"); op.addOption("mm_filename", OPT_STRING, "random", "Name of file " "which stores the matrix in Matrix Market format"); op.addOption("maxval", OPT_FLOAT, "10", "Maximum value for random " "matrices"); }
// Adds command line options to given OptionParser void addBenchmarkSpecOptions( OptionParser& opts ) { opts.addOption("customSize", OPT_VECINT, "0,0", "specify custom problem size"); opts.addOption( "num-iters", OPT_INT, "1000", "number of stencil iterations" ); opts.addOption( "weight-center", OPT_FLOAT, "0.25", "center value weight" ); opts.addOption( "weight-cardinal", OPT_FLOAT, "0.15", "cardinal values weight" ); opts.addOption( "weight-diagonal", OPT_FLOAT, "0.05", "diagonal values weight" ); opts.addOption( "seed", OPT_INT, "71594", "random number generator seed" ); opts.addOption( "val-threshold", OPT_FLOAT, "0.01", "validation error threshold" ); opts.addOption( "val-print-limit", OPT_INT, "15", "number of validation errors to print" ); opts.addOption( "haloVal", OPT_FLOAT, "0.0", "value to use for halo data" ); #if defined(PARALLEL) opts.addOption( "msize", OPT_VECINT, "2,2", "MPI 2D grid topology dimensions" ); opts.addOption( "iters-per-exchange", OPT_INT, "1", "Number of local iterations between MPI boundary exchange operations (also, halo width)" ); #endif // defined(PARALLEL) }
// Adds command line options to given OptionParser void addBenchmarkSpecOptions( OptionParser& opts ) { opts.addOption("customSize", OPT_VECINT, "0,0", "specify custom problem size"); opts.addOption( "lsize", OPT_VECINT, "8,256", "block dimensions" ); opts.addOption( "num-iters", OPT_INT, "1000", "number of stencil iterations" ); opts.addOption( "weight-center", OPT_FLOAT, "0.25", "center value weight" ); opts.addOption( "weight-cardinal", OPT_FLOAT, "0.15", "cardinal values weight" ); opts.addOption( "weight-diagonal", OPT_FLOAT, "0.05", "diagonal values weight" ); opts.addOption( "seed", OPT_INT, "71594", "random number generator seed" ); opts.addOption( "val-threshold", OPT_FLOAT, "0.01", "validation error threshold" ); opts.addOption( "val-print-limit", OPT_INT, "15", "number of validation errors to print" ); opts.addOption( "haloVal", OPT_FLOAT, "0.0", "value to use for halo data" ); opts.addOption( "expMatrixFile", OPT_STRING, "", "Basename for file(s) holding expected matrices" ); opts.addOption( "saveExpMatrixFile", OPT_STRING, "", "Basename for output file(s) that will hold expected matrices" ); opts.addOption( "warmupPasses", OPT_INT, "1", "Number of warmup passes to do before starting timings", 'w' ); #if defined(PARALLEL) MPI2DGridProgramBase::AddOptions( opts ); #endif // defined(PARALLEL) }
// **************************************************************************** // Function: addBenchmarkSpecOptions // // Purpose: // Add benchmark specific options parsing. The user is allowed to specify // the size of the input data in megabytes if they are not using a // predefined size (i.e. the -s option). // // Arguments: // op: the options parser / parameter database // // Programmer: Collin McCurdy // Creation: September 08, 2009 // Returns: nothing // // **************************************************************************** void addBenchmarkSpecOptions(OptionParser &op) { op.addOption("MB", OPT_INT, "0", "data size (in megabytes)"); op.addOption("dump-dp", OPT_BOOL, "false", "dump result after DP fft/ifft"); op.addOption("dump-sp", OPT_BOOL, "false", "dump result after SP fft/ifft"); }
// **************************************************************************** // Method: main() // // Purpose: // serial and parallel main for OpenCL level0 benchmarks // // Arguments: // argc, argv // // Programmer: SHOC Team // Creation: The Epoch // // Modifications: // Jeremy Meredith, Tue Jan 12 15:09:33 EST 2010 // Changed the way device selection works. It now defaults to the device // index corresponding to the process's rank within a node if no devices // are specified on the command command line, and otherwise, round-robins // the list of devices among the tasks. // // Gabriel Marin, Tue Jun 01 15:38 EST 2010 // Check that we have valid (not NULL) context and queue objects before // running the benchmarks. Errors inside CreateContextFromSingleDevice or // CreateCommandQueueForContextAndDevice were not propagated out to the main // program. // // Jeremy Meredith, Wed Nov 10 14:20:47 EST 2010 // Split timing reports into detailed and summary. For serial code, we // report all trial values, and for parallel, skip the per-process vals. // Also detect and print outliers from parallel runs. // // **************************************************************************** int main(int argc, char *argv[]) { int ret = 0; try { #ifdef PARALLEL int rank, size; MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_rank(MPI_COMM_WORLD, &rank); cout << "MPI Task "<< rank << "/" << size - 1 << " starting....\n"; #endif OptionParser op; //Add shared options to the parser op.addOption("platform", OPT_INT, "0", "specify OpenCL platform to use", 'p'); op.addOption("device", OPT_VECINT, "", "specify device(s) to run on", 'd'); op.addOption("passes", OPT_INT, "10", "specify number of passes", 'n'); op.addOption("size", OPT_VECINT, "1", "specify problem size", 's'); op.addOption("infoDevices", OPT_BOOL, "", "show info for available platforms and devices", 'i'); op.addOption("verbose", OPT_BOOL, "", "enable verbose output", 'v'); op.addOption("quiet", OPT_BOOL, "", "write minimum necessary to standard output", 'q'); addBenchmarkSpecOptions(op); if (!op.parse(argc, argv)) { #ifdef PARALLEL if (rank == 0) op.usage(); MPI_Finalize(); #else op.usage(); #endif return (op.HelpRequested() ? 0 : 1 ); } if (op.getOptionBool("infoDevices")) { #define DEBUG_DEVICE_CONTAINER 0 #ifdef PARALLEL // execute following code only if I am the process of lowest // rank on this node NodeInfo NI; int mynoderank = NI.nodeRank(); if (mynoderank==0) { int nlrrank, nlrsize; MPI_Comm nlrcomm = NI.getNLRComm(); MPI_Comm_size(nlrcomm, &nlrsize); MPI_Comm_rank(nlrcomm, &nlrrank); OpenCLNodePlatformContainer ndc1; OpenCLMultiNodeContainer localMnc(ndc1); localMnc.doMerge (nlrrank, nlrsize, nlrcomm); if (rank==0) // I am the global rank 0, print all configurations localMnc.Print (cout); } #else OpenCLNodePlatformContainer ndc1; ndc1.Print (cout); #if DEBUG_DEVICE_CONTAINER OpenCLMultiNodeContainer mnc1(ndc1), mnc2; mnc1.Print (cout); ostringstream oss; mnc1.writeObject (oss); std::string temp(oss.str()); cout << "Serialized MultiNodeContainer:\n" << temp; istringstream iss(temp); mnc2.readObject (iss); cout << "Unserialized object2:\n"; mnc2.Print (cout); mnc1.merge (mnc2); cout << "==============\nObject1 after merging 1:\n"; mnc1.Print (cout); mnc1.merge (mnc2); cout << "==============\nObject1 after merging 2:\n"; mnc1.Print (cout); #endif // DEBUG #endif // PARALLEL return (0); } bool verbose = op.getOptionBool("verbose"); // The device option supports specifying more than one device // for now, just choose the first one. int platform = op.getOptionInt("platform"); #ifdef PARALLEL NodeInfo ni; int myNodeRank = ni.nodeRank(); if (verbose) cout << "Global rank "<<rank<<" is local rank "<<myNodeRank << endl; #else int myNodeRank = 0; #endif // If they haven't specified any devices, assume they // want the process with in-node rank N to use device N int deviceIdx = myNodeRank; // If they have, then round-robin the list of devices // among the processes on a node. vector<long long> deviceVec = op.getOptionVecInt("device"); if (deviceVec.size() > 0) { int len = deviceVec.size(); deviceIdx = deviceVec[myNodeRank % len]; } // Check for an erroneous device if (deviceIdx >= GetNumOclDevices(platform)) { cerr << "Warning: device index: " << deviceIdx << " out of range, defaulting to device 0.\n"; deviceIdx = 0; } // Initialization if (verbose) cout << ">> initializing\n"; cl_device_id devID = ListDevicesAndGetDevice(platform, deviceIdx); cl_int clErr; cl_context ctx = clCreateContext( NULL, // properties 1, // number of devices &devID, // device NULL, // notification function NULL, &clErr ); CL_CHECK_ERROR(clErr); cl_command_queue queue = clCreateCommandQueue( ctx, devID, CL_QUEUE_PROFILING_ENABLE, &clErr ); CL_CHECK_ERROR(clErr); ResultDatabase resultDB; // Run the benchmark RunBenchmark(devID, ctx, queue, resultDB, op); clReleaseCommandQueue( queue ); clReleaseContext( ctx ); #ifndef PARALLEL resultDB.DumpDetailed(cout); #else ParallelResultDatabase pardb; pardb.MergeSerialDatabases(resultDB,MPI_COMM_WORLD); if (rank==0) { pardb.DumpSummary(cout); pardb.DumpOutliers(cout); } #endif } catch( std::exception& e ) { std::cerr << e.what() << std::endl; ret = 1; } catch( ... ) { std::cerr << "unrecognized exception caught" << std::endl; ret = 1; } #ifdef PARALLEL MPI_Finalize(); #endif return ret; }
int main(int argc, char **argv) { OptionParser opts; string mapFile, evidFile; int factor; opts.addOption(new StringOption("map", "--map <filename> : map file", "../input/grid.bmp", mapFile, false)); opts.addOption(new StringOption("evidence", "--evidence <filename> : evidence file", "", evidFile, true)); opts.addOption(new IntOption("factor", "--factor <int> : scaling factor", 1, factor, true)); opts.parse(argc,argv); JetColorMap jet; RGBTRIPLE black = {0,0,0}; RGBTRIPLE white = {255,255,255}; RGBTRIPLE red; red.R = 255; red.G = 0; red.B = 0; RGBTRIPLE blue; blue.R = 0; blue.G = 0; blue.B = 255; RGBTRIPLE green; green.R = 0; green.G = 255; green.B = 0; RGBTRIPLE initialColor; initialColor.R = 111; initialColor.G = 49; initialColor.B = 152; // initialColor.G = 152; // initialColor.B = 49; RGBTRIPLE currentColor; currentColor.R = 181; currentColor.G = 165; currentColor.B = 213; // currentColor.G = 213; // currentColor.B = 165; RGBTRIPLE magenta; magenta.R = 255; magenta.G = 0; magenta.B = 255; RGBTRIPLE cyan; cyan.R = 0; cyan.G = 255; cyan.B = 255; RGBTRIPLE yellow; yellow.R = 255; yellow.G = 255; yellow.B = 0; BMPFile bmpFile(mapFile); Grid grid(bmpFile, black); Evidence testSet(evidFile, grid, factor); /* if (1) { evid.split(trainSet, testSet, 0.8); }else{ evid.deterministicsplit(trainSet, testSet); }*/ #if 0 cout << "Creating Markov Model"<<endl; MarkovModel markmodel(grid, trainSet); double totalObj = 0.0; for (int i=0; i < testSet.size(); i++) { vector<pair<int, int> > path = testSet.at(i); cout << "Calling eval"<<endl; double obj = markmodel.eval(path); cout << "OBJ: "<<i<<" "<<obj<<endl; totalObj += obj; } cout << "TOTAL OBJ: "<<totalObj<<endl; cout << "AVERAGE OBJ: "<<totalObj/testSet.size()<<endl; return 0; #endif vector<PosFeature> features; cout << "Constant Feature"<<endl; ConstantFeature constFeat(grid); features.push_back(constFeat); cout << "Obstacle Feature"<<endl; ObstacleFeature obsFeat(grid); features.push_back(obsFeat); for (int i=1; i < 5; i++) { cout << "Blur Feature "<<i<<endl; ObstacleBlurFeature blurFeat(grid, 5*i); features.push_back(blurFeat); } cout << "Creating feature array"<<endl; FeatureArray featArray2(features); cout << "Creating lower resolution feature array"<<endl; FeatureArray featArray(featArray2, factor); pair<int, int> dims = grid.dims(); pair<int, int> lowDims((int)ceil((float)dims.first/factor), (int)ceil((float)dims.second/factor)); vector<double> weights(features.size(), -0.0); weights.at(1) = -6.2; //for (int i=2; i < weights.size(); i++) // weights.at(i) = -1.0; weights.at(0) = -2.23;//-2.23 weights.at(2) = -0.35; weights.at(3) = -2.73; weights.at(4) = -0.92; weights.at(5) = -0.26; Parameters params(weights); OrderedWaveInferenceEngine engine(InferenceEngine::GRID8); vector<vector<double> > prior(dims.first,vector<double> (dims.second,0.0)); /* double divide = 1.0; vector<double> radiusWeight; for (int i=0; i < 20; i++) { radiusWeight.push_back(1.0/divide); divide*=2; } generatePrior(grid, trainSet, priorOrig, radiusWeight, factor); reducePrior(priorOrig, prior, factor); */ vector<vector<vector<double> > > partition, backpartition; int time0 = time(0); BMPFile gridView(dims.first, dims.second); RewardMap rewards(featArray, params); vector<double> sums(params.size(),0.00001); vector<vector<double> > occupancy; Predictor predictor(grid, rewards, engine); predictor.setPrior(prior); cout << testSet.size() <<" Examples"<<endl; for (int i=0; i < testSet.size(); i++) { int index = 0; vector<pair<int, int> > traj = testSet.at(i); vector<double> times = testSet.getTimes(i); pair<int, int> initial = traj.front(); pair<int,int> & botinGrid = testSet.at_bot(i); pair<double,double>& botinPoint = testSet.at_rbot(i); pair<double,double>& end = testSet.at_raw(i).back(); predictor.setStart(initial); double thresh = -20.0; double startTime = times.front(); char buf[1024]; sprintf(buf, "../output/pppredict%03d.dat", i); ofstream file(buf); for (double tick = startTime; index < traj.size(); tick+=0.4) { for ( ; index < traj.size() && times.at(index) < tick; index++); if (index == traj.size() ) break; cout << "Evidence: "<<i<<" timestep: "<<tick <<" index: "<<index<<endl; predictor.predict(traj.at(index), occupancy); cout << "SIZE: "<<prior.size()<<endl; vector<vector<double> > pos = predictor.getPosterior(); gridView.addBelief(pos, -30.0, 0.0,jet); grid.addObstacles(gridView, black); gridView.addLabel(botinGrid,green); vector<pair<int, int> > subTraj; subTraj.insert(subTraj.end(), traj.begin(), traj.begin()+index); gridView.addVector(subTraj, red, factor); sprintf(buf, "../compare/pp%03d-%03f.bmp", i, tick-startTime); gridView.write(buf); //pair<double,double> values = predictor.check(traj.back()); double cost = 0.0; for(int itr = 0;itr<index;itr++) cost +=rewards.at(traj[itr].first,traj[itr].second); cout<<i<<" Normalizer: "<<predictor.getNormalizer(traj.back())<< " path cost: "<<cost<<" Probability: "<<cost+predictor.getNormalizer(traj.back())<<endl; vector<vector<vector<double> > > timeOcc = predictor.getTimeOccupancy(); vector<vector<double > > posterior = predictor.getPosterior(); double maxV = -HUGE_VAL; pair<int,int> predestGrid; pair<double,double> predestPoint; for (int ii=0; ii< dims.first; ii++) { for (int jj=0; jj < dims.second; jj++) { if(posterior[ii][jj]>maxV){ predestGrid.first = ii; predestGrid.second = jj; } maxV = max(maxV, posterior.at(ii).at(jj)); } } predestPoint = grid.grid2Real(predestGrid.first,predestGrid.second); double dist = sqrt((end.first-predestPoint.first)*(end.first-predestPoint.first) +(end.second-predestPoint.second)*(end.second-predestPoint.second)); double logloss = entropy(posterior); cout<<"final belief: "<<posterior.at(traj.back().first).at(traj.back().second) <<" max: "<<maxV <<" logloss: "<<logloss<<endl; cout<<botinGrid.first<<" "<<botinGrid.second <<" "<<predestGrid.first<<" "<<predestGrid.second<<endl; file<<tick-startTime <<" "<<logloss <<" "<<posterior.at(botinGrid.first).at(botinGrid.second) <<" "<<posterior.at(traj.back().first).at(traj.back().second) <<" "<<maxV<<" "<<dist<<endl; } file.close(); } }
void addBenchmarkSpecOptions(OptionParser &op) { op.addOption("nopinned", OPT_BOOL, "", "disable usage of pinned (pagelocked) memory"); }
// **************************************************************************** // Function: addBenchmarkSpecOptions // // Purpose: // Add benchmark specific options parsing // // Arguments: // op: the options parser / parameter database // // Returns: nothing // // Programmer: Kyle Spafford // Creation: July 26, 2010 // // Modifications: // // **************************************************************************** void addBenchmarkSpecOptions(OptionParser &op) { op.addOption("iterations", OPT_INT, "8", "specify MD kernel iterations", 'r'); }
int main(int argc, char *argv[]) { int numdev=0, totalnumdev=0, numtasks, mympirank, dest, source, rc, mypair=0, count, tag=2, mynoderank,myclusterrank,nodenprocs; int *grp1, *grp2; int mygrprank,grpnumtasks; MPI_Group orig_group,bmgrp; MPI_Comm bmcomm,nlrcomm; ResultDatabase resultDB,resultDBWU,resultDB1; OptionParser op; ParallelResultDatabase pardb, pardb1; bool amGPUTask = false; volatile unsigned long long *mpidone; int i,shmid; /* Allocate System V shared memory */ MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD, &numtasks); MPI_Comm_rank(MPI_COMM_WORLD, &mympirank); MPI_Comm_group(MPI_COMM_WORLD, &orig_group); //Add shared options to the parser op.addOption("device", OPT_VECINT, "0", "specify device(s) to run on", 'd'); op.addOption("verbose", OPT_BOOL, "", "enable verbose output", 'v'); op.addOption("quiet", OPT_BOOL, "", "write minimum necessary to standard output", 'q'); op.addOption("passes", OPT_INT, "10", "specify number of passes", 'z'); op.addOption("size", OPT_VECINT, "1", "specify problem size", 's'); op.addOption("time", OPT_INT, "5", "specify running time in miuntes", 't'); op.addOption("outputFile", OPT_STRING, "output.txt", "specify output file", 'o'); op.addOption("infoDevices", OPT_BOOL, "", "show summary info for available devices", 'i'); op.addOption("fullInfoDevices", OPT_BOOL, "", "show full info for available devices"); op.addOption("MPIminmsg", OPT_INT, "0", "specify minimum MPI message size"); op.addOption("MPImaxmsg", OPT_INT, "16384", "specify maximum MPI message size"); op.addOption("MPIiter", OPT_INT, "1000", "specify number of MPI benchmark iterations for each size"); op.addOption("platform", OPT_INT, "0", "specify platform for device selection", 'y'); if (!op.parse(argc, argv)) { if (mympirank == 0) op.usage(); MPI_Finalize(); return 0; } int npasses = op.getOptionInt("passes"); //our simple mapping NodeInfo NI; mynoderank = NI.nodeRank(); // rank of my process within the node myclusterrank = NI.clusterRank(); // cluster (essentially, node) id MPI_Comm smpcomm = NI.getSMPComm(); if(mynoderank==0){ shmid = shmget(IPC_PRIVATE, sizeof(unsigned long long), (IPC_CREAT | 0600)); } MPI_Bcast(&shmid, 1, MPI_INT, 0, NI.getSMPComm()); mpidone = ((volatile unsigned long long*) shmat(shmid, 0, 0)); if (mynoderank == 0) shmctl(shmid, IPC_RMID, 0); *mpidone = 0; nlrcomm = NI.getNLRComm(); // communcator of all the lowest rank processes // on all the nodes int numnodes = NI.numNodes(); if ( numnodes%2!=0 ) { if(mympirank==0) printf("\nThis test needs an even number of nodes\n"); MPI_Finalize(); exit(0); } int nodealr = NI.nodeALR(); nodenprocs=NI.nodeNprocs(); // determine how many GPU devices we are to use int devsPerNode = op.getOptionVecInt( "device" ).size(); //cout<<mympirank<<":numgpus="<<devsPerNode<<endl; // if there are as many or more devices as the nprocs, only use half of // the nproc if ( devsPerNode >= nodenprocs ) devsPerNode = nodenprocs/2; numdev = (mynoderank == 0) ? devsPerNode : 0; MPI_Allreduce(&numdev, &totalnumdev, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); numdev = devsPerNode; // determine whether I am to be a GPU or a comm task if( mynoderank < numdev ) { amGPUTask = true; } //Divide tasks into two distinct groups based upon noderank grp1=(int *)calloc(totalnumdev, sizeof(int)); grp2=(int *)calloc((numtasks-totalnumdev),sizeof(int)); if (grp1==NULL || grp2==NULL) { printf("\n%d:calloc failed in %s",mympirank,__FUNCTION__); exit(1); } /*compute the groups*/ int beginoffset[2]={0,0}; if(mynoderank == 0) { int tmp[2]; tmp[0]=numdev; tmp[1]=nodenprocs-numdev; if (mympirank ==0) MPI_Send(tmp, 2*sizeof(int), MPI_CHAR, 1, 112, nlrcomm); else { MPI_Status reqstat; MPI_Recv(beginoffset, 2*sizeof(int), MPI_CHAR, myclusterrank-1, 112, nlrcomm ,&reqstat); if (myclusterrank < numnodes-1) { beginoffset[0]+=numdev; beginoffset[1]+=(nodenprocs-numdev); MPI_Send(beginoffset,2*sizeof(int), MPI_CHAR, myclusterrank+1, 112, nlrcomm); beginoffset[0]-=numdev; beginoffset[1]-=(nodenprocs-numdev); } } } MPI_Bcast(beginoffset,2,MPI_INT,0,smpcomm); if ( amGPUTask ) { // I am to do GPU work grp1[beginoffset[0]+mynoderank]=mympirank; grpnumtasks=totalnumdev; } else { // I am to do MPI communication work grp2[beginoffset[1]+(mynoderank-numdev)]=mympirank; grpnumtasks=numtasks-totalnumdev; } MPI_Allreduce(MPI_IN_PLACE, grp1, totalnumdev, MPI_INT, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce(MPI_IN_PLACE, grp2, (numtasks-totalnumdev), MPI_INT, MPI_SUM, MPI_COMM_WORLD); if ( amGPUTask ) { // I am to do GPU work, so will be part of GPU communicator MPI_Group_incl(orig_group, totalnumdev, grp1, &bmgrp); } else { // I am to do MPI communication work, so will be part of MPI // messaging traffic communicator MPI_Group_incl(orig_group, (numtasks-totalnumdev), grp2, &bmgrp); } MPI_Comm_create(MPI_COMM_WORLD, bmgrp, &bmcomm); MPI_Comm_rank(bmcomm, &mygrprank); NodeInfo *GRPNI = new NodeInfo(bmcomm); int mygrpnoderank=GRPNI->nodeRank(); int grpnodealr = GRPNI->nodeALR(); int grpnodenprocs = GRPNI->nodeNprocs(); MPI_Comm grpnlrcomm = GRPNI->getNLRComm(); //note that clusterrank and number of nodes don't change for this child //group/comm //form node-random pairs (see README) among communication tasks if( amGPUTask ) { //setup GPU in GPU tasks GPUSetup(op, mympirank, mynoderank); } else { int * pairlist = new int[numnodes]; for (i=0;i<numnodes;i++) pairlist[i]=0; if ( mygrpnoderank==0 ) { pairlist[myclusterrank]=grpnodealr; MPI_Allreduce(MPI_IN_PLACE,pairlist,numnodes,MPI_INT,MPI_SUM, grpnlrcomm); mypair = RandomPairs(myclusterrank, numnodes, grpnlrcomm); mypair = pairlist[mypair]; } for (i=0;i<numnodes;i++) pairlist[i]=0; if ( mygrpnoderank==0 ) pairlist[myclusterrank]=mypair; MPI_Allreduce(MPI_IN_PLACE,pairlist,numnodes,MPI_INT,MPI_SUM, bmcomm); mypair = pairlist[myclusterrank]+mygrpnoderank; } // ensure we are all synchronized before starting test MPI_Barrier(MPI_COMM_WORLD); //warmup run if ( amGPUTask ) { GPUDriver(op, resultDBWU); } //first, individual runs for device benchmark for(i=0;i<npasses;i++){ if ( amGPUTask ) { GPUDriver(op, resultDB); } } MPI_Barrier(MPI_COMM_WORLD); //warmup run if ( !amGPUTask ) { MPITest(op, resultDBWU, grpnumtasks, mygrprank, mypair, bmcomm); } //next, individual run for MPI Benchmark for(i=0;i<npasses;i++){ if ( !amGPUTask ) { MPITest(op, resultDB, grpnumtasks, mygrprank, mypair, bmcomm); } } MPI_Barrier(MPI_COMM_WORLD); //merge and print pardb.MergeSerialDatabases(resultDB, bmcomm); if (mympirank==0) cout<<endl<<"*****************************Sequential GPU and MPI runs****************************"<<endl; DumpInSequence(pardb, mygrprank, mympirank); // Simultaneous runs for observing impact of contention MPI_Barrier(MPI_COMM_WORLD); if ( amGPUTask ) { do { if (mympirank == 0 ) cout<<"."; GPUDriver(op, resultDB1);flush(cout); } while(*mpidone==0); if (mympirank == 0 ) cout<<"*"<<endl; } else { for ( i=0;i<npasses;i++ ) { MPITest(op, resultDB1, grpnumtasks, mygrprank, mypair, bmcomm); } *mpidone=1; } MPI_Barrier(MPI_COMM_WORLD); //merge and print pardb1.MergeSerialDatabases(resultDB1,bmcomm); if (mympirank==0) cout<<endl<<"*****************************Simultaneous GPU and MPI runs****************************"<<endl; DumpInSequence(pardb1, mygrprank, mympirank); //print summary if ( !amGPUTask && mygrprank==0) { vector<ResultDatabase::Result> prelatency = pardb.GetResultsForTest("MPI Latency(mean)"); vector<ResultDatabase::Result> postlatency = pardb1.GetResultsForTest("MPI Latency(mean)"); cout<<endl<<"Summarized Mean(Mean) MPI Baseline Latency vs. Latency with Contention"; cout<<endl<<"MSG SIZE(B)\t"; int msgsize=0; for (i=0; i<prelatency.size(); i++) { cout<<msgsize<<"\t"; msgsize = (msgsize ? msgsize * 2 : msgsize + 1); } cout << endl <<"BASELATENCY\t"; for (i=0; i<prelatency.size(); i++) cout<<setiosflags(ios::fixed) << setprecision(2)<<prelatency[i].GetMean() << "\t"; cout << endl <<"CONTLATENCY\t"; for (i=0; i<postlatency.size(); i++) cout<<setiosflags(ios::fixed) << setprecision(2)<<postlatency[i].GetMean() << "\t"; flush(cout); cout<<endl; } MPI_Barrier(MPI_COMM_WORLD); if ( amGPUTask && mympirank==0) { vector<ResultDatabase::Result> prespeed = pardb.GetResultsForTest("DownloadSpeed(mean)"); vector<ResultDatabase::Result> postspeed = pardb1.GetResultsForTest("DownloadSpeed(mean)"); cout<<endl<<"Summarized Mean(Mean) GPU Baseline Download Speed vs. Download Speed with Contention"; cout<<endl<<"MSG SIZE(KB)\t"; int msgsize=1; for (i=0; i<prespeed.size(); ++i) { cout<<msgsize<<"\t"; msgsize = (msgsize ? msgsize * 2 : msgsize + 1); } cout << endl <<"BASESPEED\t"; for (i=0; i<prespeed.size(); ++i) cout<<setiosflags(ios::fixed) << setprecision(4)<<prespeed[i].GetMean() << "\t"; cout << endl <<"CONTSPEED\t"; for (i=0; i<postspeed.size(); ++i) cout<<setiosflags(ios::fixed) << setprecision(4)<<postspeed[i].GetMean() << "\t"; cout<<endl; } if ( amGPUTask && mympirank==0) { vector<ResultDatabase::Result> pregpulat = pardb.GetResultsForTest("DownloadLatencyEstimate(mean)"); vector<ResultDatabase::Result> postgpulat = pardb1.GetResultsForTest("DownloadLatencyEstimate(mean)"); cout<<endl<<"Summarized Mean(Mean) GPU Baseline Download Latency vs. Download Latency with Contention"; cout<<endl<<"MSG SIZE\t"; for (i=0; i<pregpulat.size(); ++i) { cout<<pregpulat[i].atts<<"\t"; } cout << endl <<"BASEGPULAT\t"; for (i=0; i<pregpulat.size(); ++i) cout<<setiosflags(ios::fixed) << setprecision(7)<<pregpulat[i].GetMean() << "\t"; cout << endl <<"CONTGPULAT\t"; for (i=0; i<postgpulat.size(); ++i) cout<<setiosflags(ios::fixed) << setprecision(7)<<postgpulat[i].GetMean() << "\t"; cout<<endl; } //cleanup GPU if( amGPUTask ) { GPUCleanup(op); } MPI_Finalize(); }
int main(int argc, char **argv) { OptionParser opts; string mapFile,trainFile,testFile; int factor = 1; double step; opts.addOption(new StringOption("map", "--map <filename> : map file", "../input/grid.bmp", mapFile, false)); opts.addOption(new StringOption("evidence", "--test evidence <filename> : evidence file", "", testFile, true)); opts.addOption(new DoubleOption("step", "--step <double> : inference interval", 1.0, step, true)); opts.parse(argc,argv); JetColorMap jet; RGBTRIPLE black = {0,0,0}; RGBTRIPLE white = {255,255,255}; RGBTRIPLE red; red.R = 255; red.G = 0; red.B = 0; RGBTRIPLE blue; blue.R = 0; blue.G = 0; blue.B = 255; RGBTRIPLE green; green.R = 0; green.G = 255; green.B = 0; RGBTRIPLE initialColor; initialColor.R = 111; initialColor.G = 49; initialColor.B = 152; RGBTRIPLE currentColor; currentColor.R = 181; currentColor.G = 165; currentColor.B = 213; RGBTRIPLE magenta; magenta.R = 255; magenta.G = 0; magenta.B = 255; RGBTRIPLE cyan; cyan.R = 0; cyan.G = 255; cyan.B = 255; RGBTRIPLE yellow; yellow.R = 255; yellow.G = 255; yellow.B = 0; BMPFile bmpFile(mapFile); Grid grid(bmpFile, black); Evidence testSet(testFile, grid, factor); // Evidence trainSet(trainFile, grid, factor); pair<int, int> dims = grid.dims(); cout << " Speed Feature"<<endl; vector<double> speedTable(VEL_DIM,0.0); speedTable.at(1) = 0.75; DisVecSeqFeature speedfeat(speedTable); vector<int> dimensions; dimensions.push_back(dims.first); dimensions.push_back(dims.second); dimensions.push_back(VEL_DIM); /* **************************************** * INITIALIZE MARKOV DECESION PROCESS * BASED MODEL PARAMETERS * ****************************************/ vector<double> p_weights(NUMPOSFEAT,-0.0); p_weights.at(0) = -2.23; //-2.23 for PPP forecast p_weights.at(1) = -6.2; p_weights.at(2) = -0.35; p_weights.at(3) = -2.73; p_weights.at(4) = -0.92; p_weights.at(5) = -0.26; vector<double> r_PosWeights(NUMPOSFEAT+NUMROBFEAT, -0.0); r_PosWeights.at(0) = -3.83; r_PosWeights.at(1) = -8.36; r_PosWeights.at(2) = -2.65; r_PosWeights.at(3) = -5.43; r_PosWeights.at(4) = -3.15; r_PosWeights.at(5) = -3.30; //r_PosWeights.at(6) = 0.60; //r_PosWeights.at(7) = 0.45; vector<double> nr_PosWeights(NUMPOSFEAT+NUMROBFEAT, -0.0); nr_PosWeights.at(0) = -4.51; nr_PosWeights.at(1) = -6.2; nr_PosWeights.at(2) = -0.35; nr_PosWeights.at(3) = -2.73; nr_PosWeights.at(4) = -0.93; nr_PosWeights.at(5) = -0.28; //nr_PosWeights.at(6) = -0.50; //nr_PosWeights.at(7) = -0.286; vector<double> r_SeqWeights(VEL_DIM, -0.0); r_SeqWeights.at(0) = 0.59; r_SeqWeights.at(1) = -0.83; vector<double> nr_SeqWeights(VEL_DIM, -0.0); nr_SeqWeights.at(0) = -1.21; nr_SeqWeights.at(1) = 0.49; Parameters p(p_weights); Parameters r_Pos(r_PosWeights); Parameters nr_Pos(nr_PosWeights); Parameters r_Seq(r_SeqWeights); Parameters nr_Seq(nr_SeqWeights); /* **************************************** * INITIALIZE LINEAR QUADRATIC CONTROL * BASED MODEL PARAMETERS * ****************************************/ M_6 A; A.setZero(); A(0,0) = 1; A(1,1) = 1; A(4,2) = -1; A(5,3) = -1; M_6_2 B; B<<1,0, 0,1, 1,0, 0,1, 1,0, 0,1; M_6 costM; ifstream infile("../params/nonrob2000.dat"); for(int row=0;row<costM.rows();row++){ for(int col=0;col<costM.cols();col++){ double temp; infile>>temp; costM(row,col) = temp; } } infile.close(); M_6 sigma; sigma<<0.001,0,0,0,0,0, 0,0.001,0,0,0,0, 0,0,0.005,0,0,0, 0,0,0,0.005,0,0, 0,0,0,0,0.005,0, 0,0,0,0,0,0.005; /* **************************************** * DECLARATION OF INFERENCE ENGINES * ****************************************/ OrderedWaveInferenceEngine pp(InferenceEngine::GRID8); DisSeqOrderInferEngine mdpr(InferenceEngine::GRID8); DisSeqOrderInferEngine mdpnr(InferenceEngine::GRID8); ContinuousState cState; LQControlInference lq(A,B,sigma,costM,cState); lq.valueInference(); IntentRecognizer IR(grid,p,r_Pos,r_Seq,nr_Pos,nr_Seq, speedfeat,pp,mdpr,mdpnr,lq); cout << testSet.size() <<" Examples"<<endl; for (int i=0; i < testSet.size(); i++) { vector<pair<int, int> > & traj = testSet.at(i); vector<double> & vels = testSet.at_v(i); vector<double> times = testSet.getTimes(i); pair<int,int> & botinGrid = testSet.at_bot(i); vector<pair<double,double> > & obs = testSet.at_raw(i); vector<double> & rawTimes = testSet.at_rawTime(i); IR.combineForecast(traj,vels,obs,times,rawTimes, botinGrid,i,step); } }
// **************************************************************************** // Function: main // // Purpose: // The main function takes care of initialization (device and MPI), then // performs the benchmark and prints results. // // Arguments: // // // Programmer: Jeremy Meredith // Creation: // // Modifications: // Jeremy Meredith, Wed Nov 10 14:20:47 EST 2010 // Split timing reports into detailed and summary. For serial code, we // report all trial values, and for parallel, skip the per-process vals. // Also detect and print outliers from parallel runs. // // **************************************************************************** int main(int argc, char *argv[]) { int ret = 0; bool noprompt = false; try { #ifdef PARALLEL int rank, size; MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_rank(MPI_COMM_WORLD, &rank); cerr << "MPI Task " << rank << "/" << size - 1 << " starting....\n"; #endif // Get args OptionParser op; //Add shared options to the parser op.addOption("device", OPT_VECINT, "0", "specify device(s) to run on", 'd'); op.addOption("verbose", OPT_BOOL, "", "enable verbose output", 'v'); op.addOption("passes", OPT_INT, "10", "specify number of passes", 'n'); op.addOption("size", OPT_INT, "1", "specify problem size", 's'); op.addOption("infoDevices", OPT_BOOL, "", "show info for available platforms and devices", 'i'); op.addOption("quiet", OPT_BOOL, "", "write minimum necessary to standard output", 'q'); #ifdef _WIN32 op.addOption("noprompt", OPT_BOOL, "", "don't wait for prompt at program exit"); #endif addBenchmarkSpecOptions(op); if (!op.parse(argc, argv)) { #ifdef PARALLEL if (rank == 0) op.usage(); MPI_Finalize(); #else op.usage(); #endif return (op.HelpRequested() ? 0 : 1); } bool verbose = op.getOptionBool("verbose"); bool infoDev = op.getOptionBool("infoDevices"); #ifdef _WIN32 noprompt = op.getOptionBool("noprompt"); #endif int device; #ifdef PARALLEL NodeInfo ni; int myNodeRank = ni.nodeRank(); vector<long long> deviceVec = op.getOptionVecInt("device"); if (myNodeRank >= deviceVec.size()) { // Default is for task i to test device i device = myNodeRank; } else { device = deviceVec[myNodeRank]; } #else device = op.getOptionVecInt("device")[0]; #endif int deviceCount; cudaGetDeviceCount(&deviceCount); if (device >= deviceCount) { cerr << "Warning: device index: " << device << " out of range, defaulting to device 0.\n"; device = 0; } // Initialization EnumerateDevicesAndChoose(device, infoDev); if( infoDev ) { return 0; } ResultDatabase resultDB; // Run the benchmark RunBenchmark(resultDB, op); #ifndef PARALLEL resultDB.DumpDetailed(cout); #else ParallelResultDatabase pardb; pardb.MergeSerialDatabases(resultDB,MPI_COMM_WORLD); if (rank==0) { pardb.DumpSummary(cout); pardb.DumpOutliers(cout); } #endif } catch( InvalidArgValue& e ) { std::cerr << e.what() << ": " << e.GetMessage() << std::endl; ret = 1; } catch( std::exception& e ) { std::cerr << e.what() << std::endl; ret = 1; } catch( ... ) { ret = 1; } #ifdef PARALLEL MPI_Finalize(); #endif #ifdef _WIN32 if (!noprompt) { cout << "Press return to exit\n"; cin.get(); } #endif return ret; }
// **************************************************************************** // Function: addBenchmarkSpecOptions // // Purpose: // Add benchmark specific options parsing. The user is allowed to specify // the size of the input data in megabytes. // // Arguments: // op: the options parser / parameter database // // Programmer: Collin McCurdy // Creation: September 08, 2009 // Returns: nothing // // **************************************************************************** void addBenchmarkSpecOptions(OptionParser &op) { op.addOption("time", OPT_INT, "1", "specify running time in miuntes", 't'); }
// **************************************************************************** // Function: addBenchmarkSpecOptions // // Purpose: // Add benchmark specific options parsing // // Arguments: // op: the options parser / parameter database // // Returns: nothing // // Programmer: Kyle Spafford // Creation: August 13, 2009 // // Modifications: // // **************************************************************************** void addBenchmarkSpecOptions(OptionParser &op) { op.addOption("iterations", OPT_INT, "256", "specify reduction iterations"); }
// **************************************************************************** // Function: addBenchmarkSpecOptions // // Purpose: // Add benchmark specific options parsing. The user is allowed to specify // the size of the input data in megabytes if they are not using a // predefined size (i.e. the -s option). // // Arguments: // op: the options parser / parameter database // // Programmer: Collin McCurdy // Creation: September 08, 2009 // Returns: nothing // // **************************************************************************** void addBenchmarkSpecOptions(OptionParser &op) { op.addOption("MB", OPT_INT, "0", "data size (in megabytes)"); }
// **************************************************************************** // Function: addBenchmarkSpecOptions // // Purpose: // Add benchmark specific options parsing // // Arguments: // op: the options parser / parameter database // // Returns: nothing // // Programmer: Kyle Spafford // Creation: August 13, 2009 // // Modifications: // // **************************************************************************** void addBenchmarkSpecOptions(OptionParser &op) { op.addOption("iterations", OPT_INT, "256", "specify scan iterations"); op.addOption("nthreads", OPT_INT, "64", "specify number of threads"); }
int main(int argc, char **argv) { OptionParser opts; string mapFile, evidFile;//interactFile,ignoreFile; int factor; opts.addOption(new StringOption("map", "--map <filename> : map file", "../input/grid.bmp", mapFile, false)); opts.addOption(new StringOption("evidence", "--evidence <filename> : evidence file", "", evidFile, true)); opts.addOption(new IntOption("factor", "--factor <int> : scaling factor", 1, factor, true)); opts.parse(argc,argv); cout << "Loading Map File"<<endl; BMPFile bmpFile(mapFile); Grid grid(bmpFile, black); // cout << "xdim: "<<grid.dims().first<<" yDim: "<<grid.dims().second<<endl; cout << "Loading Evidence"<<endl; //Evidence trainSet(evidFile, grid, factor); /* used when need to train two seperate models Evidence evid_int(interactFile, grid, factor); Evidence evid_ig(ignoreFile, grid, factor); Evidence train_int(grid),test_int(grid),train_ig(grid), test_ig(grid); evid_int.split(train_int, test_int, 0.05); evid_ig.split(train_ig, test_ig, 0.05); */ Evidence evid(evidFile,grid,factor); Evidence trainSet(grid),testSet(grid); evid.split(trainSet,testSet,0.05); cout<<"Optimize over "<<trainSet.size()<<" examples"<<endl; #if 0 for (int i=0; i < evid.size(); i++) { cout << "Evid "<<i<<endl; vector<pair<int, int> > traj = evid.at(i); vector<double> timestamps = evid.getTimes(i); cout << timestamps.size()<<" "<<traj.size()<<endl; for (int j=0; j < traj.size(); j++) { cout << timestamps.at(j)<<" "<<traj.at(j).first << " "<<traj.at(j).second<<endl; } } #endif // testSet.write("testTraj.data"); cout << "Generating Feature Set"<<endl; vector<PosFeature> features; cout << " Constant Feature"<<endl; ConstantFeature constFeat(grid); features.push_back(constFeat); cout << " Obstacle Feature"<<endl; ObstacleFeature obsFeat(grid); features.push_back(obsFeat); for (int i=1; i < 5; i++) { cout << " Blur Feature "<<i<<endl; ObstacleBlurFeature blurFeat(grid, 5*i); features.push_back(blurFeat); } /* cout << " Robot Feature"<<endl; RobotGlobalFeature robglobal(grid,snackbot,factor); features.push_back(robglobal); // robot local blurres features for (int i=1; i < 5; i++) { cout << " RobotBlur Feature "<<i<<endl; RobotLocalBlurFeature robblur(grid,snackbot,5*i,factor); features.push_back(robblur); } */ /* cout << " Creating feature array"<<endl; FeatureArray featArray2(features); cout << " Creating lower resolution feature array"<<endl; FeatureArray featArray(featArray2, factor); */ cout << " Speed Feature"<<endl; vector<double> speedTable(2,0.0); speedTable.at(1) = 0.75; //speedTable.at(2) = 1.1; DisVecSeqFeature speedfeat(speedTable); /* Robset training weights: * -3.83 -8.35991 -2.6512 -5.43475 -3.15203 -3.29758 * 0.596987 0.439284 * 0.589445 -0.82448 * Non-robot-ending trainng weights: * -4.57257 -6.2 -0.3537 -2.7385 -0.9357 -0.2797 * -0.495205 -0.2863 * -1.2225 0.43993 */ vector<double> weights(6+2+2, -0.0); weights.at(0) = -25; weights.at(1) = -8.36; weights.at(2) = -2.65; weights.at(3) = -5.43; weights.at(4) = -3.17; weights.at(5) = -3.34; weights.at(6) = 0.5; // robot feature weights.at(7) = 0.3; // robot feature weights.at(8) = -0.29; // velocity feature weights.at(9) = -1.11; // velocity feature //weights.push_back(1.5);//the last parameter is for velocity feature Parameters params(weights); DisSeqOrderInferEngine engine(8,InferenceEngine::GRID8); trajOptimizerplus optimizer(grid,trainSet,features,speedfeat,engine); optimizer.optimize(params,0.005,1000,1.0,OPT_EXP); return 0; }