예제 #1
0
int main(int argc, char **argv)
{

	OptionParser op;
  op.addOption("verbose", OPT_BOOL, "", "enable verbose output", 'v');
  op.addOption("passes", OPT_INT, "10", "specify number of passes", 'n');
  op.addOption("size", OPT_INT, "1", "specify problem size", 's');
  op.addOption("target", OPT_INT, "0", "specify MIC target device number", 't');
  
  // If benchmark has any specific options, add those
  addBenchmarkSpecOptions(op);
  
  if (!op.parse(argc, argv))
  {
     op.usage();
     return -1;
  }

  ResultDatabase resultDB;
  // Run the test
  RunBenchmark(op, resultDB);

  // Print out results to stdout
  resultDB.DumpDetailed(cout);

	return 0;
}
예제 #2
0
파일: fft.cpp 프로젝트: Sable/Ostrich
// ****************************************************************************
// Function: addBenchmarkSpecOptions
//
// Purpose:
//   Add benchmark specific options parsing.  The user is allowed to specify
//   the size of the input data in megabytes.
//
// Arguments:
//   op: the options parser / parameter database
//
// Programmer: Collin McCurdy
// Creation: September 08, 2009
// Returns:  nothing
//
// ****************************************************************************
	void
addBenchmarkSpecOptions(OptionParser &op)
{
	op.addOption("pts", OPT_INT, "0", "data size (in megabytes)");
	op.addOption("pts1", OPT_INT, "0", "data size (in megabytes)");
	op.addOption("pts2", OPT_INT, "0", "data size (in megabytes)");
	op.addOption("2D", OPT_BOOL, "false", "2D FFT");
}
예제 #3
0
파일: FFT.cpp 프로젝트: Poojachitral/shoc
// ****************************************************************************
// Function: addBenchmarkSpecOptions
//
// Purpose:
//   Add benchmark specific options parsing.  The user is allowed to specify
//   the size of the input data in megabytes.
//
// Arguments:
//   op: the options parser / parameter database
//
// Programmer: Collin McCurdy
// Creation: September 08, 2009
// Returns:  nothing
//
// ****************************************************************************
void 
addBenchmarkSpecOptions(OptionParser &op) 
{
    op.addOption("MB", OPT_INT, "0", "data size (in megabytes)");
    op.addOption("use-native", OPT_BOOL, "false", "call native (HW) versions of sin/cos");
    op.addOption("dump-sp", OPT_BOOL, "false", "dump result after SP fft/ifft");
    op.addOption("dump-dp", OPT_BOOL, "false", "dump result after DP fft/ifft");
}
예제 #4
0
파일: Spmv.cpp 프로젝트: bart-utahman/shoc
// ****************************************************************************
// Function: addBenchmarkSpecOptions
//
// Purpose:
//   Add benchmark specific options parsing.
//
// Arguments:
//   op: the options parser / parameter database
//
// Programmer: Lukasz Wesolowski
// Creation: June 21, 2010
// Returns:  nothing
//
// ****************************************************************************
void addBenchmarkSpecOptions(OptionParser &op)
{
    op.addOption("iterations", OPT_INT, "100", "Number of SpMV iterations "
                 "per pass");
    op.addOption("mm_filename", OPT_STRING, "random", "Name of file "
                 "which stores the matrix in Matrix Market format");
    op.addOption("maxval", OPT_FLOAT, "10", "Maximum value for random "
                 "matrices");
}
예제 #5
0
// Adds command line options to given OptionParser
void
addBenchmarkSpecOptions( OptionParser& opts )
{
    opts.addOption("customSize", OPT_VECINT, "0,0", "specify custom problem size");
    opts.addOption( "num-iters", OPT_INT, "1000", "number of stencil iterations" );
    opts.addOption( "weight-center", OPT_FLOAT, "0.25", "center value weight" );
    opts.addOption( "weight-cardinal", OPT_FLOAT, "0.15", "cardinal values weight" );
    opts.addOption( "weight-diagonal", OPT_FLOAT, "0.05", "diagonal values weight" );
    opts.addOption( "seed", OPT_INT, "71594", "random number generator seed" );
    opts.addOption( "val-threshold", OPT_FLOAT, "0.01", "validation error threshold" );
    opts.addOption( "val-print-limit", OPT_INT, "15", "number of validation errors to print" );
    opts.addOption( "haloVal", OPT_FLOAT, "0.0", "value to use for halo data" );

#if defined(PARALLEL)
    opts.addOption( "msize", OPT_VECINT, "2,2", "MPI 2D grid topology dimensions" );
    opts.addOption( "iters-per-exchange", OPT_INT, "1", "Number of local iterations between MPI boundary exchange operations (also, halo width)" );
#endif // defined(PARALLEL)
}
예제 #6
0
// Adds command line options to given OptionParser
void
addBenchmarkSpecOptions( OptionParser& opts )
{
    opts.addOption("customSize", OPT_VECINT, "0,0", "specify custom problem size");
    opts.addOption( "lsize", OPT_VECINT, "8,256", "block dimensions" );
    opts.addOption( "num-iters", OPT_INT, "1000", "number of stencil iterations" );
    opts.addOption( "weight-center", OPT_FLOAT, "0.25", "center value weight" );
    opts.addOption( "weight-cardinal", OPT_FLOAT, "0.15", "cardinal values weight" );
    opts.addOption( "weight-diagonal", OPT_FLOAT, "0.05", "diagonal values weight" );
    opts.addOption( "seed", OPT_INT, "71594", "random number generator seed" );
    opts.addOption( "val-threshold", OPT_FLOAT, "0.01", "validation error threshold" );
    opts.addOption( "val-print-limit", OPT_INT, "15", "number of validation errors to print" );
    opts.addOption( "haloVal", OPT_FLOAT, "0.0", "value to use for halo data" );

    opts.addOption( "expMatrixFile", OPT_STRING, "", "Basename for file(s) holding expected matrices" );
    opts.addOption( "saveExpMatrixFile", OPT_STRING, "", "Basename for output file(s) that will hold expected matrices" );

    opts.addOption( "warmupPasses", OPT_INT, "1", "Number of warmup passes to do before starting timings", 'w' );


#if defined(PARALLEL)
    MPI2DGridProgramBase::AddOptions( opts );
#endif // defined(PARALLEL)
}
예제 #7
0
// ****************************************************************************
// Function: addBenchmarkSpecOptions
//
// Purpose:
//   Add benchmark specific options parsing.  The user is allowed to specify
//   the size of the input data in megabytes if they are not using a
//   predefined size (i.e. the -s option).
//
// Arguments:
//   op: the options parser / parameter database
//
// Programmer: Collin McCurdy
// Creation: September 08, 2009
// Returns:  nothing
//
// ****************************************************************************
void addBenchmarkSpecOptions(OptionParser &op)
{
    op.addOption("MB", OPT_INT, "0", "data size (in megabytes)");
    op.addOption("dump-dp", OPT_BOOL, "false", "dump result after DP fft/ifft");
    op.addOption("dump-sp", OPT_BOOL, "false", "dump result after SP fft/ifft");
}
예제 #8
0
파일: main.cpp 프로젝트: ManavA/shoc
// ****************************************************************************
// Method:  main()
//
// Purpose:
//   serial and parallel main for OpenCL level0 benchmarks
//
// Arguments:
//   argc, argv
//
// Programmer:  SHOC Team
// Creation:    The Epoch
//
// Modifications:
//   Jeremy Meredith, Tue Jan 12 15:09:33 EST 2010
//   Changed the way device selection works.  It now defaults to the device
//   index corresponding to the process's rank within a node if no devices
//   are specified on the command command line, and otherwise, round-robins
//   the list of devices among the tasks.
//
//   Gabriel Marin, Tue Jun 01 15:38 EST 2010
//   Check that we have valid (not NULL) context and queue objects before
//   running the benchmarks. Errors inside CreateContextFromSingleDevice or
//   CreateCommandQueueForContextAndDevice were not propagated out to the main
//   program.
//
//   Jeremy Meredith, Wed Nov 10 14:20:47 EST 2010
//   Split timing reports into detailed and summary.  For serial code, we
//   report all trial values, and for parallel, skip the per-process vals.
//   Also detect and print outliers from parallel runs.
//
// ****************************************************************************
int main(int argc, char *argv[])
{
    int ret = 0;

    try
    {
#ifdef PARALLEL
        int rank, size;
        MPI_Init(&argc,&argv);
        MPI_Comm_size(MPI_COMM_WORLD, &size);
        MPI_Comm_rank(MPI_COMM_WORLD, &rank);
        cout << "MPI Task "<< rank << "/" << size - 1 << " starting....\n";
#endif

        OptionParser op;

        //Add shared options to the parser
        op.addOption("platform", OPT_INT, "0", "specify OpenCL platform to use",
                'p');
        op.addOption("device", OPT_VECINT, "", "specify device(s) to run on", 'd');
        op.addOption("passes", OPT_INT, "10", "specify number of passes", 'n');
        op.addOption("size", OPT_VECINT, "1", "specify problem size", 's');
        op.addOption("infoDevices", OPT_BOOL, "",
                "show info for available platforms and devices", 'i');
        op.addOption("verbose", OPT_BOOL, "", "enable verbose output", 'v');
        op.addOption("quiet", OPT_BOOL, "", "write minimum necessary to standard output", 'q');

        addBenchmarkSpecOptions(op);

        if (!op.parse(argc, argv))
        {
#ifdef PARALLEL
            if (rank == 0)
                op.usage();
            MPI_Finalize();
#else
            op.usage();
#endif
            return (op.HelpRequested() ? 0 : 1 );
        }

        if (op.getOptionBool("infoDevices"))
        {
#define DEBUG_DEVICE_CONTAINER 0
#ifdef PARALLEL
            // execute following code only if I am the process of lowest
            // rank on this node
            NodeInfo NI;
            int mynoderank = NI.nodeRank();
            if (mynoderank==0)
            {
                int nlrrank, nlrsize;
                MPI_Comm nlrcomm = NI.getNLRComm();
                MPI_Comm_size(nlrcomm, &nlrsize);
                MPI_Comm_rank(nlrcomm, &nlrrank);

                OpenCLNodePlatformContainer ndc1;
                OpenCLMultiNodeContainer localMnc(ndc1);
                localMnc.doMerge (nlrrank, nlrsize, nlrcomm);
                if (rank==0)  // I am the global rank 0, print all configurations
                    localMnc.Print (cout);
            }
#else
            OpenCLNodePlatformContainer ndc1;
            ndc1.Print (cout);
#if DEBUG_DEVICE_CONTAINER
            OpenCLMultiNodeContainer mnc1(ndc1), mnc2;
            mnc1.Print (cout);
            ostringstream oss;
            mnc1.writeObject (oss);
            std::string temp(oss.str());
            cout << "Serialized MultiNodeContainer:\n" << temp;
            istringstream iss(temp);
            mnc2.readObject (iss);
            cout << "Unserialized object2:\n";
            mnc2.Print (cout);
            mnc1.merge (mnc2);
            cout << "==============\nObject1 after merging 1:\n";
            mnc1.Print (cout);
            mnc1.merge (mnc2);
            cout << "==============\nObject1 after merging 2:\n";
            mnc1.Print (cout);
#endif  // DEBUG
#endif  // PARALLEL
            return (0);
        }

        bool verbose = op.getOptionBool("verbose");

        // The device option supports specifying more than one device
        // for now, just choose the first one.
        int platform = op.getOptionInt("platform");

#ifdef PARALLEL
        NodeInfo ni;
        int myNodeRank = ni.nodeRank();
        if (verbose)
        cout << "Global rank "<<rank<<" is local rank "<<myNodeRank << endl;
#else
        int myNodeRank = 0;
#endif

        // If they haven't specified any devices, assume they
        // want the process with in-node rank N to use device N
        int deviceIdx = myNodeRank;

        // If they have, then round-robin the list of devices
        // among the processes on a node.
        vector<long long> deviceVec = op.getOptionVecInt("device");
        if (deviceVec.size() > 0)
        {
        int len = deviceVec.size();
            deviceIdx = deviceVec[myNodeRank % len];
        }

        // Check for an erroneous device
        if (deviceIdx >= GetNumOclDevices(platform)) {
            cerr << "Warning: device index: " << deviceIdx
                 << " out of range, defaulting to device 0.\n";
            deviceIdx = 0;
        }

        // Initialization
        if (verbose) cout << ">> initializing\n";
        cl_device_id devID = ListDevicesAndGetDevice(platform, deviceIdx);
        cl_int clErr;
        cl_context ctx = clCreateContext( NULL,     // properties
                                            1,      // number of devices
                                            &devID, // device
                                            NULL,   // notification function
                                            NULL,
                                            &clErr );
        CL_CHECK_ERROR(clErr);
        cl_command_queue queue = clCreateCommandQueue( ctx,
                                                        devID,
                                                        CL_QUEUE_PROFILING_ENABLE,
                                                        &clErr );
        CL_CHECK_ERROR(clErr);
        ResultDatabase resultDB;

        // Run the benchmark
        RunBenchmark(devID, ctx, queue, resultDB, op);

        clReleaseCommandQueue( queue );
        clReleaseContext( ctx );


#ifndef PARALLEL
        resultDB.DumpDetailed(cout);
#else
        ParallelResultDatabase pardb;
        pardb.MergeSerialDatabases(resultDB,MPI_COMM_WORLD);
        if (rank==0)
        {
            pardb.DumpSummary(cout);
            pardb.DumpOutliers(cout);
        }
#endif
    }
    catch( std::exception& e )
    {
        std::cerr << e.what() << std::endl;
        ret = 1;
    }
    catch( ... )
    {
        std::cerr << "unrecognized exception caught" << std::endl;
        ret = 1;
    }

#ifdef PARALLEL
    MPI_Finalize();
#endif

    return ret;
}
예제 #9
0
int main(int argc, char **argv) {
   OptionParser opts;

   string mapFile, evidFile;

   int factor;

   opts.addOption(new StringOption("map", 
            "--map <filename>                 : map file",
            "../input/grid.bmp", mapFile, false));

   opts.addOption(new StringOption("evidence", 
            "--evidence <filename>            : evidence file",
            "", evidFile, true));

   opts.addOption(new IntOption("factor",
            "--factor <int>                   : scaling factor",
            1, factor, true));

   opts.parse(argc,argv);
   JetColorMap jet;
   RGBTRIPLE black = {0,0,0};

   RGBTRIPLE white = {255,255,255};

   RGBTRIPLE red;
   red.R = 255;
   red.G = 0;
   red.B = 0;

   RGBTRIPLE blue;
   blue.R = 0;
   blue.G = 0;
   blue.B = 255;

   RGBTRIPLE green;
   green.R = 0;
   green.G = 255;
   green.B = 0; 

   RGBTRIPLE initialColor;
   initialColor.R = 111; 
   initialColor.G = 49;
   initialColor.B = 152;
//   initialColor.G = 152;
//   initialColor.B = 49;


   RGBTRIPLE currentColor;
   currentColor.R = 181;
   currentColor.G = 165;
   currentColor.B = 213;
//   currentColor.G = 213;
//   currentColor.B = 165;


   RGBTRIPLE magenta;
   magenta.R = 255;
   magenta.G = 0;
   magenta.B = 255;

   RGBTRIPLE cyan;
   cyan.R = 0;
   cyan.G = 255;
   cyan.B = 255;

   RGBTRIPLE yellow;
   yellow.R = 255;
   yellow.G = 255;
   yellow.B = 0;

   BMPFile bmpFile(mapFile);

   Grid grid(bmpFile, black);

   
   Evidence testSet(evidFile, grid, factor);
 /* 
   if (1) { 
	   evid.split(trainSet, testSet, 0.8);
   }else{
	   evid.deterministicsplit(trainSet, testSet);
   }*/

#if 0 
   cout << "Creating Markov Model"<<endl;
   MarkovModel markmodel(grid, trainSet);

   double totalObj = 0.0;

   for (int i=0; i < testSet.size(); i++) {
      vector<pair<int, int> > path = testSet.at(i);
      cout << "Calling eval"<<endl;
      double obj = markmodel.eval(path);
      cout << "OBJ: "<<i<<" "<<obj<<endl;
	
      totalObj += obj;
   }

   cout << "TOTAL OBJ: "<<totalObj<<endl;

   cout << "AVERAGE OBJ: "<<totalObj/testSet.size()<<endl;
   return 0;
#endif
   vector<PosFeature> features;

   cout << "Constant Feature"<<endl;

   ConstantFeature constFeat(grid);
   features.push_back(constFeat);

   cout << "Obstacle Feature"<<endl;

   ObstacleFeature obsFeat(grid);
   features.push_back(obsFeat);

   for (int i=1; i < 5; i++) {
      cout << "Blur Feature "<<i<<endl;
      ObstacleBlurFeature blurFeat(grid, 5*i);
      features.push_back(blurFeat);
   }

   cout << "Creating feature array"<<endl;
   FeatureArray featArray2(features);

   cout << "Creating lower resolution feature array"<<endl;
   FeatureArray featArray(featArray2, factor);

   pair<int, int> dims = grid.dims();
   pair<int, int> lowDims((int)ceil((float)dims.first/factor),
         (int)ceil((float)dims.second/factor));

   vector<double> weights(features.size(), -0.0);
   weights.at(1) = -6.2;
   //for (int i=2; i < weights.size(); i++)
   //   weights.at(i) = -1.0;
   weights.at(0) = -2.23;//-2.23
   weights.at(2) = -0.35;
   weights.at(3) = -2.73;
   weights.at(4) = -0.92;
   weights.at(5) = -0.26;
   Parameters params(weights);

   OrderedWaveInferenceEngine engine(InferenceEngine::GRID8);

   vector<vector<double> > prior(dims.first,vector<double> (dims.second,0.0));
/*
   double divide = 1.0;
   vector<double> radiusWeight;
   for (int i=0; i < 20; i++) {
      radiusWeight.push_back(1.0/divide);
      divide*=2;
   }
   generatePrior(grid, trainSet, priorOrig, radiusWeight, factor);
 
   reducePrior(priorOrig, prior, factor);
*/

   vector<vector<vector<double> > > partition, backpartition;

   int time0 = time(0);

   BMPFile gridView(dims.first, dims.second);



   RewardMap rewards(featArray, params); 

   vector<double> sums(params.size(),0.00001);
      
   vector<vector<double> > occupancy;

   Predictor predictor(grid, rewards, engine); 
   
   predictor.setPrior(prior);


   cout << testSet.size() <<" Examples"<<endl;

   for (int i=0; i < testSet.size(); i++) {

      int index = 0;


      vector<pair<int, int> > traj = testSet.at(i);
      vector<double> times = testSet.getTimes(i); 
      pair<int, int> initial = traj.front();
	  pair<int,int> & botinGrid = testSet.at_bot(i); 
	  pair<double,double>& botinPoint = testSet.at_rbot(i);
	  pair<double,double>& end = testSet.at_raw(i).back();

      predictor.setStart(initial); 

      double thresh = -20.0;
	  double startTime = times.front();

      char buf[1024];
      sprintf(buf, "../output/pppredict%03d.dat", i);
      ofstream file(buf);

      for (double tick = startTime; index < traj.size(); tick+=0.4) {

         for ( ; index < traj.size() && times.at(index) < tick; index++); 

         if (index == traj.size() ) break;
 
         cout << "Evidence: "<<i<<"  timestep: "<<tick
            <<"   index: "<<index<<endl;
         predictor.predict(traj.at(index), occupancy);

         cout << "SIZE: "<<prior.size()<<endl;
		 vector<vector<double> >  pos 
            = predictor.getPosterior();

         gridView.addBelief(pos, -30.0, 0.0,jet);

         grid.addObstacles(gridView, black);
         gridView.addLabel(botinGrid,green);
         vector<pair<int, int> > subTraj;

         subTraj.insert(subTraj.end(), traj.begin(), traj.begin()+index);

         gridView.addVector(subTraj, red, factor);

         sprintf(buf, "../compare/pp%03d-%03f.bmp", i, tick-startTime); 
         gridView.write(buf);
		 //pair<double,double> values = predictor.check(traj.back());
		 double cost = 0.0;
		 for(int itr = 0;itr<index;itr++)
		   cost +=rewards.at(traj[itr].first,traj[itr].second);

		 cout<<i<<" Normalizer: "<<predictor.getNormalizer(traj.back())<<
			 " path cost: "<<cost<<" Probability:  "<<cost+predictor.getNormalizer(traj.back())<<endl;

         vector<vector<vector<double> > > timeOcc 
            = predictor.getTimeOccupancy();

		 vector<vector<double > > posterior  = predictor.getPosterior();
		 double maxV = -HUGE_VAL;
		 pair<int,int> predestGrid;
		 pair<double,double> predestPoint;

         for (int ii=0; ii< dims.first; ii++) { 
            for (int jj=0; jj < dims.second; jj++) {
			   if(posterior[ii][jj]>maxV){
				   predestGrid.first = ii;
				   predestGrid.second = jj;
			   }
               maxV  = max(maxV, posterior.at(ii).at(jj));
            }
         }
		 predestPoint = grid.grid2Real(predestGrid.first,predestGrid.second);
		 double dist = sqrt((end.first-predestPoint.first)*(end.first-predestPoint.first)
			 +(end.second-predestPoint.second)*(end.second-predestPoint.second));

		 double logloss = entropy(posterior);

		 cout<<"final belief: "<<posterior.at(traj.back().first).at(traj.back().second)
			 <<" max: "<<maxV
			 <<" logloss: "<<logloss<<endl; 
		 cout<<botinGrid.first<<" "<<botinGrid.second
			 <<" "<<predestGrid.first<<" "<<predestGrid.second<<endl;
		 file<<tick-startTime
			 <<" "<<logloss
			 <<" "<<posterior.at(botinGrid.first).at(botinGrid.second)
			 <<" "<<posterior.at(traj.back().first).at(traj.back().second)
			 <<" "<<maxV<<" "<<dist<<endl;

      } 
      file.close();
   }

}
예제 #10
0
void addBenchmarkSpecOptions(OptionParser &op)
{
    op.addOption("nopinned", OPT_BOOL, "",
                 "disable usage of pinned (pagelocked) memory");
}
예제 #11
0
// ****************************************************************************
// Function: addBenchmarkSpecOptions
//
// Purpose:
//   Add benchmark specific options parsing
//
// Arguments:
//   op: the options parser / parameter database
//
// Returns:  nothing
//
// Programmer: Kyle Spafford
// Creation: July 26, 2010
//
// Modifications:
//
// ****************************************************************************
void
addBenchmarkSpecOptions(OptionParser &op)
{
    op.addOption("iterations", OPT_INT, "8",
            "specify MD kernel iterations", 'r');
}
예제 #12
0
int main(int argc, char *argv[])
{
    int numdev=0, totalnumdev=0, numtasks, mympirank, dest, source, rc,
        mypair=0, count, tag=2, mynoderank,myclusterrank,nodenprocs;
    int *grp1, *grp2;
    int mygrprank,grpnumtasks;
    MPI_Group orig_group,bmgrp;
    MPI_Comm bmcomm,nlrcomm;
    ResultDatabase resultDB,resultDBWU,resultDB1;
    OptionParser op;
    ParallelResultDatabase pardb, pardb1;
    bool amGPUTask = false;
    volatile unsigned long long *mpidone;
    int i,shmid;

    /* Allocate System V shared memory */

    MPI_Init(&argc,&argv);
    MPI_Comm_size(MPI_COMM_WORLD, &numtasks);
    MPI_Comm_rank(MPI_COMM_WORLD, &mympirank);
    MPI_Comm_group(MPI_COMM_WORLD, &orig_group);


    //Add shared options to the parser
    op.addOption("device", OPT_VECINT, "0", "specify device(s) to run on",
		    'd');
    op.addOption("verbose", OPT_BOOL, "", "enable verbose output", 'v');
    op.addOption("quiet", OPT_BOOL, "",
		    "write minimum necessary to standard output", 'q');
    op.addOption("passes", OPT_INT, "10", "specify number of passes", 'z');
    op.addOption("size", OPT_VECINT, "1", "specify problem size", 's');
    op.addOption("time", OPT_INT, "5", "specify running time in miuntes", 't');
    op.addOption("outputFile", OPT_STRING, "output.txt", "specify output file",
       'o');
    op.addOption("infoDevices", OPT_BOOL, "", "show summary info for available devices",
       'i');
    op.addOption("fullInfoDevices", OPT_BOOL, "", "show full info for available devices");
    op.addOption("MPIminmsg", OPT_INT, "0", "specify minimum MPI message size");
    op.addOption("MPImaxmsg", OPT_INT, "16384",
                    "specify maximum MPI message size");
    op.addOption("MPIiter", OPT_INT, "1000",
                    "specify number of MPI benchmark iterations for each size");
    op.addOption("platform", OPT_INT, "0", "specify platform for device selection", 'y');

    if (!op.parse(argc, argv))
    {
        if (mympirank == 0)
            op.usage();
        MPI_Finalize();
        return 0;
    }

    int npasses = op.getOptionInt("passes");

    //our simple mapping
    NodeInfo NI;
    mynoderank = NI.nodeRank();         // rank of my process within the node
    myclusterrank = NI.clusterRank();   // cluster (essentially, node) id
    MPI_Comm smpcomm = NI.getSMPComm();

    if(mynoderank==0){
        shmid = shmget(IPC_PRIVATE,
                 sizeof(unsigned long long),
                 (IPC_CREAT | 0600));
    }

    MPI_Bcast(&shmid, 1, MPI_INT, 0, NI.getSMPComm());

    mpidone = ((volatile unsigned long long*) shmat(shmid, 0, 0));
    if (mynoderank == 0)
        shmctl(shmid, IPC_RMID, 0);
    *mpidone = 0;

    nlrcomm = NI.getNLRComm(); // communcator of all the lowest rank processes
                               // on all the nodes
    int numnodes = NI.numNodes();
    if ( numnodes%2!=0 )
    {
        if(mympirank==0)
            printf("\nThis test needs an even number of nodes\n");
        MPI_Finalize();
	exit(0);
    }
    int nodealr = NI.nodeALR();

    nodenprocs=NI.nodeNprocs();

    // determine how many GPU devices we are to use
    int devsPerNode = op.getOptionVecInt( "device" ).size();
    //cout<<mympirank<<":numgpus="<<devsPerNode<<endl;

    // if there are as many or more devices as the nprocs, only use half of
    // the nproc
    if ( devsPerNode >= nodenprocs ) devsPerNode = nodenprocs/2;

    numdev = (mynoderank == 0) ? devsPerNode : 0;
    MPI_Allreduce(&numdev, &totalnumdev, 1, MPI_INT, MPI_SUM,
                    MPI_COMM_WORLD);
    numdev = devsPerNode;

    // determine whether I am to be a GPU or a comm task
    if( mynoderank < numdev )
    {
        amGPUTask = true;
    }

    //Divide tasks into two distinct groups based upon noderank
    grp1=(int *)calloc(totalnumdev, sizeof(int));
    grp2=(int *)calloc((numtasks-totalnumdev),sizeof(int));
    if (grp1==NULL || grp2==NULL)
    {
        printf("\n%d:calloc failed in %s",mympirank,__FUNCTION__);
        exit(1);
    }


    /*compute the groups*/
    int beginoffset[2]={0,0};
    if(mynoderank == 0)
    {
        int tmp[2];
	tmp[0]=numdev;
	tmp[1]=nodenprocs-numdev;
        if (mympirank ==0)
            MPI_Send(tmp, 2*sizeof(int), MPI_CHAR, 1, 112, nlrcomm);
        else
        {
            MPI_Status reqstat;
	    MPI_Recv(beginoffset, 2*sizeof(int), MPI_CHAR, myclusterrank-1,
			    112, nlrcomm ,&reqstat);
            if (myclusterrank < numnodes-1)
            {
                beginoffset[0]+=numdev;
                beginoffset[1]+=(nodenprocs-numdev);
		MPI_Send(beginoffset,2*sizeof(int), MPI_CHAR, myclusterrank+1,
				112, nlrcomm);
		beginoffset[0]-=numdev;
		beginoffset[1]-=(nodenprocs-numdev);
            }
        }
    }
    MPI_Bcast(beginoffset,2,MPI_INT,0,smpcomm);

    if ( amGPUTask )
    {
        // I am to do GPU work
        grp1[beginoffset[0]+mynoderank]=mympirank;
        grpnumtasks=totalnumdev;
    }
    else
    {
        // I am to do MPI communication work
        grp2[beginoffset[1]+(mynoderank-numdev)]=mympirank;
        grpnumtasks=numtasks-totalnumdev;
    }

    MPI_Allreduce(MPI_IN_PLACE, grp1, totalnumdev, MPI_INT, MPI_SUM,
                    MPI_COMM_WORLD);
    MPI_Allreduce(MPI_IN_PLACE, grp2, (numtasks-totalnumdev), MPI_INT,
                            MPI_SUM, MPI_COMM_WORLD);

    if ( amGPUTask )
    {
        // I am to do GPU work, so will be part of GPU communicator
        MPI_Group_incl(orig_group, totalnumdev, grp1, &bmgrp);
    }
    else
    {
        // I am to do MPI communication work, so will be part of MPI
        // messaging traffic communicator
        MPI_Group_incl(orig_group, (numtasks-totalnumdev), grp2,
                        &bmgrp);
    }

    MPI_Comm_create(MPI_COMM_WORLD, bmgrp, &bmcomm);
    MPI_Comm_rank(bmcomm, &mygrprank);
    NodeInfo *GRPNI = new NodeInfo(bmcomm);
    int mygrpnoderank=GRPNI->nodeRank();
    int grpnodealr = GRPNI->nodeALR();
    int grpnodenprocs = GRPNI->nodeNprocs();
    MPI_Comm grpnlrcomm = GRPNI->getNLRComm();
    //note that clusterrank and number of nodes don't change for this child
    //group/comm


    //form node-random pairs (see README) among communication tasks
    if( amGPUTask )
    {
        //setup GPU in GPU tasks
        GPUSetup(op, mympirank, mynoderank);
    }
    else
    {
        int * pairlist = new int[numnodes];
        for (i=0;i<numnodes;i++) pairlist[i]=0;

        if ( mygrpnoderank==0 )
        {
            pairlist[myclusterrank]=grpnodealr;
            MPI_Allreduce(MPI_IN_PLACE,pairlist,numnodes,MPI_INT,MPI_SUM,
                          grpnlrcomm);
            mypair = RandomPairs(myclusterrank, numnodes, grpnlrcomm);
            mypair = pairlist[mypair];
        }
        for (i=0;i<numnodes;i++) pairlist[i]=0;
        if ( mygrpnoderank==0 )
            pairlist[myclusterrank]=mypair;
        MPI_Allreduce(MPI_IN_PLACE,pairlist,numnodes,MPI_INT,MPI_SUM,
                      bmcomm);
        mypair = pairlist[myclusterrank]+mygrpnoderank;
    }

    // ensure we are all synchronized before starting test
    MPI_Barrier(MPI_COMM_WORLD);

    //warmup run
    if ( amGPUTask )
    {
        GPUDriver(op, resultDBWU);
    }
    //first, individual runs for device benchmark
    for(i=0;i<npasses;i++){
        if ( amGPUTask )
        {
            GPUDriver(op, resultDB);
        }
    }
    MPI_Barrier(MPI_COMM_WORLD);

    //warmup run
    if ( !amGPUTask )
    {
        MPITest(op, resultDBWU, grpnumtasks, mygrprank, mypair, bmcomm);
    }
    //next, individual run for MPI Benchmark
    for(i=0;i<npasses;i++){
        if ( !amGPUTask )
        {
            MPITest(op, resultDB, grpnumtasks, mygrprank, mypair, bmcomm);
        }
    }
    MPI_Barrier(MPI_COMM_WORLD);

    //merge and print
    pardb.MergeSerialDatabases(resultDB, bmcomm);
    if (mympirank==0)
        cout<<endl<<"*****************************Sequential GPU and MPI runs****************************"<<endl;
    DumpInSequence(pardb, mygrprank, mympirank);

    // Simultaneous runs for observing impact of contention
    MPI_Barrier(MPI_COMM_WORLD);
    if ( amGPUTask )
    {
        do {
            if (mympirank == 0 ) cout<<".";
            GPUDriver(op, resultDB1);flush(cout);
        } while(*mpidone==0);
        if (mympirank == 0 ) cout<<"*"<<endl;
    }
    else
    {
        for ( i=0;i<npasses;i++ )
        {
            MPITest(op, resultDB1, grpnumtasks, mygrprank, mypair, bmcomm);
        }
        *mpidone=1;
    }
    MPI_Barrier(MPI_COMM_WORLD);

    //merge and print
    pardb1.MergeSerialDatabases(resultDB1,bmcomm);
    if (mympirank==0)
        cout<<endl<<"*****************************Simultaneous GPU and MPI runs****************************"<<endl;
    DumpInSequence(pardb1, mygrprank, mympirank);

    //print summary
    if ( !amGPUTask && mygrprank==0)
    {
        vector<ResultDatabase::Result> prelatency  = pardb.GetResultsForTest("MPI Latency(mean)");
        vector<ResultDatabase::Result> postlatency = pardb1.GetResultsForTest("MPI Latency(mean)");
        cout<<endl<<"Summarized Mean(Mean) MPI Baseline Latency vs. Latency with Contention";
        cout<<endl<<"MSG SIZE(B)\t";
        int msgsize=0;
        for (i=0; i<prelatency.size(); i++)
        {
            cout<<msgsize<<"\t";
            msgsize = (msgsize ? msgsize * 2 : msgsize + 1);
        }

        cout << endl <<"BASELATENCY\t";
        for (i=0; i<prelatency.size(); i++)
            cout<<setiosflags(ios::fixed) << setprecision(2)<<prelatency[i].GetMean() << "\t";

        cout << endl <<"CONTLATENCY\t";
        for (i=0; i<postlatency.size(); i++)
            cout<<setiosflags(ios::fixed) << setprecision(2)<<postlatency[i].GetMean() << "\t";
        flush(cout);
        cout<<endl;
    }
    MPI_Barrier(MPI_COMM_WORLD);

    if ( amGPUTask && mympirank==0)
    {
        vector<ResultDatabase::Result> prespeed  = pardb.GetResultsForTest("DownloadSpeed(mean)");
        vector<ResultDatabase::Result> postspeed = pardb1.GetResultsForTest("DownloadSpeed(mean)");
        cout<<endl<<"Summarized Mean(Mean) GPU Baseline Download Speed vs. Download Speed with Contention";
        cout<<endl<<"MSG SIZE(KB)\t";
        int msgsize=1;
        for (i=0; i<prespeed.size(); ++i)
        {
            cout<<msgsize<<"\t";
            msgsize = (msgsize ? msgsize * 2 : msgsize + 1);
        }
        cout << endl <<"BASESPEED\t";
        for (i=0; i<prespeed.size(); ++i)
            cout<<setiosflags(ios::fixed) << setprecision(4)<<prespeed[i].GetMean() << "\t";

        cout << endl <<"CONTSPEED\t";
        for (i=0; i<postspeed.size(); ++i)
            cout<<setiosflags(ios::fixed) << setprecision(4)<<postspeed[i].GetMean() << "\t";
         cout<<endl;
    }

    if ( amGPUTask && mympirank==0)
    {
        vector<ResultDatabase::Result> pregpulat  = pardb.GetResultsForTest("DownloadLatencyEstimate(mean)");
        vector<ResultDatabase::Result> postgpulat = pardb1.GetResultsForTest("DownloadLatencyEstimate(mean)");
        cout<<endl<<"Summarized Mean(Mean) GPU Baseline Download Latency vs. Download Latency with Contention";
        cout<<endl<<"MSG SIZE\t";
        for (i=0; i<pregpulat.size(); ++i)
        {
            cout<<pregpulat[i].atts<<"\t";
        }
        cout << endl <<"BASEGPULAT\t";
        for (i=0; i<pregpulat.size(); ++i)
            cout<<setiosflags(ios::fixed) << setprecision(7)<<pregpulat[i].GetMean() << "\t";

        cout << endl <<"CONTGPULAT\t";
        for (i=0; i<postgpulat.size(); ++i)
            cout<<setiosflags(ios::fixed) << setprecision(7)<<postgpulat[i].GetMean() << "\t";
         cout<<endl;
    }
    //cleanup GPU
    if( amGPUTask )
    {
        GPUCleanup(op);
    }

    MPI_Finalize();

}
예제 #13
0
int main(int argc, char **argv) {
   OptionParser opts;

   string mapFile,trainFile,testFile;

   int factor = 1;
   double step;

   opts.addOption(new StringOption("map", 
            "--map <filename>                 : map file",
            "../input/grid.bmp", mapFile, false));
   opts.addOption(new StringOption("evidence", 
            "--test evidence <filename>            : evidence file",
            "", testFile, true));

   opts.addOption(new DoubleOption("step",
            "--step <double>                   : inference interval",
            1.0, step, true));

   opts.parse(argc,argv);

   JetColorMap jet;
   RGBTRIPLE black = {0,0,0};
   RGBTRIPLE white = {255,255,255};
   RGBTRIPLE red;
   red.R = 255;
   red.G = 0;
   red.B = 0;
   RGBTRIPLE blue;
   blue.R = 0;
   blue.G = 0;
   blue.B = 255;
   RGBTRIPLE green;
   green.R = 0;
   green.G = 255;
   green.B = 0; 
   RGBTRIPLE initialColor;
   initialColor.R = 111; 
   initialColor.G = 49;
   initialColor.B = 152;
   RGBTRIPLE currentColor;
   currentColor.R = 181;
   currentColor.G = 165;
   currentColor.B = 213;
   RGBTRIPLE magenta;
   magenta.R = 255;
   magenta.G = 0;
   magenta.B = 255;
   RGBTRIPLE cyan;
   cyan.R = 0;
   cyan.G = 255;
   cyan.B = 255;
   RGBTRIPLE yellow;
   yellow.R = 255;
   yellow.G = 255;
   yellow.B = 0;

   BMPFile bmpFile(mapFile);
   Grid grid(bmpFile, black);

   
   Evidence testSet(testFile, grid, factor);
 //  Evidence trainSet(trainFile, grid, factor);

   pair<int, int> dims = grid.dims();
   
   cout << " Speed Feature"<<endl;
   vector<double> speedTable(VEL_DIM,0.0);
   speedTable.at(1) = 0.75;
   DisVecSeqFeature speedfeat(speedTable);

   vector<int> dimensions;
   dimensions.push_back(dims.first);
   dimensions.push_back(dims.second);
   dimensions.push_back(VEL_DIM);
   
   /* ****************************************
	*      INITIALIZE MARKOV DECESION PROCESS 
	*      BASED MODEL PARAMETERS
	* ****************************************/
   vector<double> p_weights(NUMPOSFEAT,-0.0);
   p_weights.at(0) = -2.23; //-2.23 for PPP forecast
   p_weights.at(1) = -6.2;
   p_weights.at(2) = -0.35;
   p_weights.at(3) = -2.73;
   p_weights.at(4) = -0.92;
   p_weights.at(5) = -0.26;
   vector<double> r_PosWeights(NUMPOSFEAT+NUMROBFEAT, -0.0);
   r_PosWeights.at(0) = -3.83;
   r_PosWeights.at(1) = -8.36;
   r_PosWeights.at(2) = -2.65;
   r_PosWeights.at(3) = -5.43;
   r_PosWeights.at(4) = -3.15;
   r_PosWeights.at(5) = -3.30;
   //r_PosWeights.at(6) =  0.60;
   //r_PosWeights.at(7) =  0.45;
   vector<double> nr_PosWeights(NUMPOSFEAT+NUMROBFEAT, -0.0);
   nr_PosWeights.at(0) = -4.51;
   nr_PosWeights.at(1) = -6.2;
   nr_PosWeights.at(2) = -0.35;
   nr_PosWeights.at(3) = -2.73;
   nr_PosWeights.at(4) = -0.93;
   nr_PosWeights.at(5) = -0.28;
   //nr_PosWeights.at(6) = -0.50;
   //nr_PosWeights.at(7) = -0.286;
   vector<double> r_SeqWeights(VEL_DIM, -0.0);
   r_SeqWeights.at(0) = 0.59;
   r_SeqWeights.at(1) = -0.83;
   vector<double> nr_SeqWeights(VEL_DIM, -0.0);
   nr_SeqWeights.at(0) = -1.21;
   nr_SeqWeights.at(1) = 0.49;

   Parameters p(p_weights);
   Parameters r_Pos(r_PosWeights);
   Parameters nr_Pos(nr_PosWeights);
   Parameters r_Seq(r_SeqWeights);
   Parameters nr_Seq(nr_SeqWeights);

   /* ****************************************
	*      INITIALIZE LINEAR QUADRATIC CONTROL 
	*      BASED MODEL PARAMETERS
	* ****************************************/
   M_6 A;
   A.setZero();
   A(0,0) = 1;
   A(1,1) = 1;
   A(4,2) = -1;
   A(5,3) = -1;
   M_6_2 B;
   B<<1,0,
	  0,1,
	  1,0,
	  0,1,
	  1,0,
	  0,1;
   M_6 costM;
   ifstream infile("../params/nonrob2000.dat");
   for(int row=0;row<costM.rows();row++){
	   for(int col=0;col<costM.cols();col++){
		   double temp;
		   infile>>temp;
		   costM(row,col) = temp;
	   }
   }
   infile.close();
   M_6 sigma;
   sigma<<0.001,0,0,0,0,0,
	      0,0.001,0,0,0,0,
		  0,0,0.005,0,0,0,
		  0,0,0,0.005,0,0,
		  0,0,0,0,0.005,0,
		  0,0,0,0,0,0.005;


   /* ****************************************
	*      DECLARATION OF INFERENCE ENGINES    
	* ****************************************/
   OrderedWaveInferenceEngine pp(InferenceEngine::GRID8);
   DisSeqOrderInferEngine mdpr(InferenceEngine::GRID8);
   DisSeqOrderInferEngine mdpnr(InferenceEngine::GRID8);
   ContinuousState cState;
   LQControlInference lq(A,B,sigma,costM,cState);
   lq.valueInference();


   IntentRecognizer IR(grid,p,r_Pos,r_Seq,nr_Pos,nr_Seq,
			   speedfeat,pp,mdpr,mdpnr,lq);

   cout << testSet.size() <<" Examples"<<endl;

   for (int i=0; i < testSet.size(); i++) {

      vector<pair<int, int> > & traj = testSet.at(i);
	  vector<double> & vels = testSet.at_v(i);
      vector<double> times = testSet.getTimes(i); 
	  pair<int,int> & botinGrid = testSet.at_bot(i);
	  vector<pair<double,double> > & obs = 
		  testSet.at_raw(i);
      vector<double> & rawTimes = testSet.at_rawTime(i);

      IR.combineForecast(traj,vels,obs,times,rawTimes,
				  botinGrid,i,step);
      
   }
}
예제 #14
0
파일: main.cpp 프로젝트: Poojachitral/shoc
// ****************************************************************************
// Function: main
//
// Purpose:
//   The main function takes care of initialization (device and MPI),  then
//   performs the benchmark and prints results.
//
// Arguments:
//
//
// Programmer: Jeremy Meredith
// Creation:
//
// Modifications:
//   Jeremy Meredith, Wed Nov 10 14:20:47 EST 2010
//   Split timing reports into detailed and summary.  For serial code, we
//   report all trial values, and for parallel, skip the per-process vals.
//   Also detect and print outliers from parallel runs.
//
// ****************************************************************************
int main(int argc, char *argv[])
{
    int ret = 0;
    bool noprompt = false;

    try
    {
#ifdef PARALLEL
        int rank, size;
        MPI_Init(&argc,&argv);
        MPI_Comm_size(MPI_COMM_WORLD, &size);
        MPI_Comm_rank(MPI_COMM_WORLD, &rank);
        cerr << "MPI Task " << rank << "/" << size - 1 << " starting....\n";
#endif

        // Get args
        OptionParser op;
       
        //Add shared options to the parser
        op.addOption("device", OPT_VECINT, "0",
                "specify device(s) to run on", 'd');
        op.addOption("verbose", OPT_BOOL, "", "enable verbose output", 'v');
        op.addOption("passes", OPT_INT, "10", "specify number of passes", 'n');
        op.addOption("size", OPT_INT, "1", "specify problem size", 's');
        op.addOption("infoDevices", OPT_BOOL, "",
                "show info for available platforms and devices", 'i');
        op.addOption("quiet", OPT_BOOL, "", "write minimum necessary to standard output", 'q');
#ifdef _WIN32
        op.addOption("noprompt", OPT_BOOL, "", "don't wait for prompt at program exit");
#endif

        addBenchmarkSpecOptions(op);

        if (!op.parse(argc, argv))
        {
#ifdef PARALLEL
            if (rank == 0)
                op.usage();
            MPI_Finalize();
#else
            op.usage();
#endif
            return (op.HelpRequested() ? 0 : 1);
        }
        
        bool verbose = op.getOptionBool("verbose");
        bool infoDev = op.getOptionBool("infoDevices");
#ifdef _WIN32
        noprompt = op.getOptionBool("noprompt");
#endif

        int device;
#ifdef PARALLEL
        NodeInfo ni;
        int myNodeRank = ni.nodeRank();
        vector<long long> deviceVec = op.getOptionVecInt("device");
        if (myNodeRank >= deviceVec.size()) {
            // Default is for task i to test device i
            device = myNodeRank;
        } else {
            device = deviceVec[myNodeRank];
        }
#else
        device = op.getOptionVecInt("device")[0];
#endif
        int deviceCount;
        cudaGetDeviceCount(&deviceCount);
        if (device >= deviceCount) {
            cerr << "Warning: device index: " << device <<
            " out of range, defaulting to device 0.\n";
            device = 0;
        }

        // Initialization
        EnumerateDevicesAndChoose(device, infoDev);
        if( infoDev )
        {
            return 0;
        }
        ResultDatabase resultDB;

        // Run the benchmark
        RunBenchmark(resultDB, op);

#ifndef PARALLEL
        resultDB.DumpDetailed(cout);
#else
        ParallelResultDatabase pardb;
        pardb.MergeSerialDatabases(resultDB,MPI_COMM_WORLD);
        if (rank==0)
        {
            pardb.DumpSummary(cout);
            pardb.DumpOutliers(cout);
        }
#endif

    }
    catch( InvalidArgValue& e )
    {
        std::cerr << e.what() << ": " << e.GetMessage() << std::endl;
        ret = 1;
    }
    catch( std::exception& e )
    {
        std::cerr << e.what() << std::endl;
        ret = 1;
    }
    catch( ... )
    {
        ret = 1;
    }


#ifdef PARALLEL
    MPI_Finalize();
#endif

#ifdef _WIN32
    if (!noprompt)
    {
        cout << "Press return to exit\n";
        cin.get();
    }
#endif

    return ret;
}
예제 #15
0
// ****************************************************************************
// Function: addBenchmarkSpecOptions
//
// Purpose:
//   Add benchmark specific options parsing.  The user is allowed to specify
//   the size of the input data in megabytes.
//
// Arguments:
//   op: the options parser / parameter database
//
// Programmer: Collin McCurdy
// Creation: September 08, 2009
// Returns:  nothing
//
// ****************************************************************************
void addBenchmarkSpecOptions(OptionParser &op) {

    op.addOption("time", OPT_INT, "1", 
        "specify running time in miuntes", 't');
}
예제 #16
0
// ****************************************************************************
// Function: addBenchmarkSpecOptions
//
// Purpose:
//   Add benchmark specific options parsing
//
// Arguments:
//   op: the options parser / parameter database
//
// Returns:  nothing
//
// Programmer: Kyle Spafford
// Creation: August 13, 2009
//
// Modifications:
//
// ****************************************************************************
void
addBenchmarkSpecOptions(OptionParser &op)
{
    op.addOption("iterations", OPT_INT, "256",
                 "specify reduction iterations");
}
예제 #17
0
파일: FFT.cpp 프로젝트: vetter/shoc
// ****************************************************************************
// Function: addBenchmarkSpecOptions
//
// Purpose:
//   Add benchmark specific options parsing.  The user is allowed to specify
//   the size of the input data in megabytes if they are not using a
//   predefined size (i.e. the -s option).
//
// Arguments:
//   op: the options parser / parameter database
//
// Programmer: Collin McCurdy
// Creation: September 08, 2009
// Returns:  nothing
//
// ****************************************************************************
void addBenchmarkSpecOptions(OptionParser &op)
{
    op.addOption("MB", OPT_INT, "0", "data size (in megabytes)");
}
예제 #18
0
파일: Sort.cpp 프로젝트: ellen-hl/shoc-mic
// ****************************************************************************
// Function: addBenchmarkSpecOptions
//
// Purpose:
//   Add benchmark specific options parsing
//
// Arguments:
//   op: the options parser / parameter database
//
// Returns:  nothing
//
// Programmer: Kyle Spafford
// Creation: August 13, 2009
//
// Modifications:
//
// ****************************************************************************
void addBenchmarkSpecOptions(OptionParser &op)
{
    op.addOption("iterations", OPT_INT, "256", "specify scan iterations");
    op.addOption("nthreads", OPT_INT, "64", "specify number of threads");

}
예제 #19
0
int main(int argc, char **argv) {
   OptionParser opts;

   string mapFile, evidFile;//interactFile,ignoreFile;

   int factor;

   opts.addOption(new StringOption("map", 
            "--map <filename>                 : map file",
            "../input/grid.bmp", mapFile, false));

   opts.addOption(new StringOption("evidence", 
            "--evidence <filename>            : evidence file",
            "", evidFile, true));
   opts.addOption(new IntOption("factor", 
            "--factor <int>                   : scaling factor",
            1, factor, true));


   opts.parse(argc,argv);

   cout << "Loading Map File"<<endl;
   BMPFile bmpFile(mapFile); 
   Grid grid(bmpFile, black);
//   cout << "xdim: "<<grid.dims().first<<" yDim: "<<grid.dims().second<<endl;
   cout << "Loading Evidence"<<endl;
   //Evidence trainSet(evidFile, grid, factor);
   /* used when need to train two seperate models
   Evidence evid_int(interactFile, grid, factor);
   Evidence evid_ig(ignoreFile, grid, factor);
   Evidence train_int(grid),test_int(grid),train_ig(grid), test_ig(grid);
   evid_int.split(train_int, test_int, 0.05);
   evid_ig.split(train_ig, test_ig, 0.05);
   */
   Evidence evid(evidFile,grid,factor);
   Evidence trainSet(grid),testSet(grid);
   evid.split(trainSet,testSet,0.05);
   cout<<"Optimize over "<<trainSet.size()<<" examples"<<endl;
#if 0 
   for (int i=0; i < evid.size(); i++) {
      cout << "Evid "<<i<<endl;
      vector<pair<int, int> > traj = evid.at(i);
      vector<double> timestamps = evid.getTimes(i);

      cout << timestamps.size()<<"  "<<traj.size()<<endl;

      for (int j=0; j < traj.size(); j++) {
         cout << timestamps.at(j)<<"  "<<traj.at(j).first
            << "  "<<traj.at(j).second<<endl;
      } 
   }
#endif
//   testSet.write("testTraj.data");

   cout << "Generating Feature Set"<<endl;

   vector<PosFeature> features;

   cout << "   Constant Feature"<<endl;

   ConstantFeature constFeat(grid);
   features.push_back(constFeat);

   cout << "   Obstacle Feature"<<endl;

   ObstacleFeature obsFeat(grid);
   features.push_back(obsFeat);
	

   for (int i=1; i < 5; i++) {
      cout << "   Blur Feature "<<i<<endl;
      ObstacleBlurFeature blurFeat(grid, 5*i);
      features.push_back(blurFeat);
   }

   /*
   cout << "    Robot Feature"<<endl;
   RobotGlobalFeature robglobal(grid,snackbot,factor);
   features.push_back(robglobal);
   //  robot local blurres features
   for (int i=1; i < 5; i++) {
      cout << "  RobotBlur Feature "<<i<<endl;
      RobotLocalBlurFeature robblur(grid,snackbot,5*i,factor);
      features.push_back(robblur);
   }
	
   */
 
   /* 
   cout << "   Creating feature array"<<endl;
   FeatureArray featArray2(features);

   cout << "   Creating lower resolution feature array"<<endl;
   FeatureArray featArray(featArray2, factor);
   */

   cout << " Speed Feature"<<endl;
   vector<double> speedTable(2,0.0);
   speedTable.at(1) = 0.75;
   //speedTable.at(2) = 1.1;
   DisVecSeqFeature speedfeat(speedTable);


   /* Robset training weights: 
	* -3.83 -8.35991 -2.6512 -5.43475 -3.15203 -3.29758
	*  0.596987 0.439284
	* 0.589445 -0.82448
	* Non-robot-ending trainng weights:
	* -4.57257  -6.2 -0.3537 -2.7385 -0.9357 -0.2797
	* -0.495205 -0.2863
	* -1.2225 0.43993
	*/
   vector<double> weights(6+2+2, -0.0);
   weights.at(0) = -25;	
   weights.at(1) = -8.36;
   weights.at(2) = -2.65;
   weights.at(3) = -5.43;
   weights.at(4) = -3.17;
   weights.at(5) = -3.34;
   
   weights.at(6) = 0.5; // robot feature
   weights.at(7) = 0.3; // robot feature
  
   weights.at(8) = -0.29;  // velocity feature
   weights.at(9) = -1.11; // velocity feature

   //weights.push_back(1.5);//the last parameter is for velocity feature
   Parameters params(weights);

   DisSeqOrderInferEngine engine(8,InferenceEngine::GRID8);

   trajOptimizerplus optimizer(grid,trainSet,features,speedfeat,engine);

   optimizer.optimize(params,0.005,1000,1.0,OPT_EXP);

   return 0;

}