コード例 #1
0
int main(int argc, char* argv[])
{

  int N = -1 ;       // number of rows 2^12
  int M = -1 ;       // number of columns 2^10
  int S = -1 ;      // total size 2^22
  int nrepeat = 100 ;    // number of repeats of the test

  // Read command line arguments
  for(int i=0; i<argc; i++) {
    if( (strcmp(argv[i], "-N") == 0) || (strcmp(argv[i], "-Rows") == 0) ) {
      N = pow( 2, atoi(argv[++i]) );
      printf("  User N is %d\n",N);
    } else if( (strcmp(argv[i], "-M") == 0) || (strcmp(argv[i], "-Columns") == 0)) {
      M = pow( 2, atof(argv[++i]) );
      printf("  User M is %d\n",M);
    } else if( (strcmp(argv[i], "-S") == 0) || (strcmp(argv[i], "-Size") == 0)) {
      S = pow( 2, atof(argv[++i]) );
      printf("  User S is %d\n",S);
    } else if( strcmp(argv[i], "-nrepeat") == 0) {
      nrepeat = atoi(argv[++i]);
    } else if( (strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "-help") == 0) ) {

      printf("  y^T*A*x Options:\n");
      printf("  -Rows (-N) <int>:      exponent num, determines number of rows 2^num (default: 2^12 = 4096)\n");
      printf("  -Columns (-M) <int>:   exponent num, determines number of columns 2^num (default: 2^10 = 1024)\n");
      printf("  -Size (-S) <int>:      exponent num, determines total matrix size 2^num (default: 2^22 = 4096*1024 )\n");
      printf("  -nrepeat <int>:        number of repetitions (default: 100)\n");
      printf("  -help (-h):            print this message\n\n");
      exit(1); }
  }

  //Check Sizes
  checkSizes( N, M, S, nrepeat );

  Kokkos::initialize(argc,argv);

  // typedef Kokkos::Serial   ExecSpace ;
  // typedef Kokkos::Threads  ExecSpace ;
  // typedef Kokkos::OpenMP   ExecSpace ;
  typedef Kokkos::Cuda        ExecSpace ;

  //EXERCISE: Choose device memory space
  // typedef Kokkos::HostSpace    MemSpace; 
  // typedef Kokkos::OpenMP       MemSpace; 
  typedef Kokkos::CudaSpace       MemSpace; 
  // typedef Kokkos::CudaUVMSpace MemSpace; 

  typedef Kokkos::LayoutLeft   Layout ;
  // typedef Kokkos::LayoutRight  Layout ;

  typedef Kokkos::RangePolicy<ExecSpace> range_policy ;

  // Allocate y, x vectors and Matrix A:
  // Device
  typedef Kokkos::View<double*, Layout, MemSpace>   ViewVectorType;
  typedef Kokkos::View<double**, Layout, MemSpace>   ViewMatrixType;
  ViewVectorType devy("devy", N);
  ViewVectorType devx("devx", M);
  ViewMatrixType devA("devA", N, M);

  //Host mirror
  ViewVectorType::HostMirror y =  Kokkos::create_mirror_view(devy);
  ViewVectorType::HostMirror x =  Kokkos::create_mirror_view(devx);
  ViewMatrixType::HostMirror A =  Kokkos::create_mirror_view(devA);

  // Initialize y vector on host
  for (int i = 0; i < N; ++i) {
    y( i ) = 1; 
  }

  // Initialize x vector on host
  for (int i = 0; i < M; ++i) {
    x( i ) = 1;
  }

  // Initialize A matrix, note 2D indexing computation on host
  for (int j = 0; j < N; ++j) {
    for ( int i = 0 ; i < M ; ++i ) {
      A( j , i ) = 1; 
    }
  }

  //Deep copy host view to device views
  Kokkos::deep_copy(devy, y);
  Kokkos::deep_copy(devx, x);
  Kokkos::deep_copy(devA, A);

  // EXERCISE: Use hierarchical parallel execution policy to initialize
  // EXERCISE hints:
  // typedef Kokkos::TeamPolicy<ExecSpace>               team_policy ;
  // typedef Kokkos::TeamPolicy<ExecSpace>::member_type  member_type ;

  // Timer products
  struct timeval begin,end;

  gettimeofday(&begin,NULL);

  for ( int repeat = 0; repeat < nrepeat; repeat++) {

    //Application: <y,Ax> = y^T*A*x
    // EXERCISE: Convert from range_policy to team_policy
    double result = 0;
    Kokkos::parallel_reduce( range_policy( 0, N ), KOKKOS_LAMBDA ( int j, double &update ) {
      // EXERCISE: Convert to nested Kokkos::parallel_reduce
      // EXERCISE hint: Kokkos::TeamThreadRange( ??? ) and [&]
      double temp2 = 0;
      for ( int i = 0 ; i < M ; ++i ) {
        temp2 += devA( j , i ) * devx( i );
      }
      // EXERCISE: Only one team member update the result
      update += devy( j ) * temp2;
    }, result );
コード例 #2
0
ファイル: example_ppa.cpp プロジェクト: bencrabbe/nlp-toolbox
int run_sampler(unsigned epochs,float alpha,unsigned batch_size){

    //load sampler
    string training_path = "PPAttachData/training.lemma";
    string param_path = "PPAttachData/wordsketches/";
    string vpath = param_path + string("vdistrib");
    string x1vpath = param_path + string("x1givenv");
    string pvpath = param_path + string("pgivenv");
    string x2vppath = param_path + string("x2givenvp");
    string px1path = param_path + string("pgivenx1");
    string x2x1ppath = param_path + string("x2givenx1p");  
    DataSampler samp(training_path.c_str(),
		   vpath.c_str(),
		   x1vpath.c_str(),
		   pvpath.c_str(),
		   x2vppath.c_str(),
		   px1path.c_str(),
		   x2x1ppath.c_str());

    //load dev and test
    PPADataEncoder dev_set("PPAttachData/devset.lemma");
    PPADataEncoder test_set("PPAttachData/test.lemma");

    //load Word vectors 
    Word2vec w2v;
    vector<string> wvdict;
    af::array w2v_embeddings;
    w2v.load_dictionary("PPAttachData/embeddings/deps.words.lemmatized");
    //w2v.filter(xdict);

    //make network
    vector<string> ydict;
    samp.getYdictionary(ydict);
    SymbolicFeedForwardNetwork<string,string> net;
    net.set_output_layer("loss",new SoftMaxLoss<string>(ydict));
    net.add_layer("top",new LinearLayer());  
    net.add_layer("hidden",new ReLUActivation(400));
    net.add_layer("A",new LinearLayer());
    net.add_input_layer("lookupA",new LinearLookup<string>(w2v.get_keys(),w2v.get_values(),4,false));
    net.connect_layers("loss","top");
    net.connect_layers("top","hidden");
    net.connect_layers("hidden","A");
    net.connect_layers("A","lookupA");

    for(int E = 0; E < epochs;++E){
      vector<string> ydata;
      vector<vector<string>> xdata;
      //af::timer start1 = af::timer::start();
      samp.generate_sample(ydata,xdata,batch_size);
      //printf("elapsed seconds (sampling): %g\n", af::timer::stop(start1));

      PPADataEncoder sampdata(ydata,xdata);
      vector<string> enc_ydata;
      vector<vector<string>> enc_xdata(1,vector<string>());
      sampdata.getYdata(enc_ydata);
      sampdata.getXdata(enc_xdata[0]);

      //af::timer start2 = af::timer::start();
      net.set_batch_data(enc_ydata,enc_xdata);
      float loss = net.train_one(alpha,true,true);
      //printf("elapsed seconds (backprop): %g\n", af::timer::stop(start2));


      if (E % 20 == 0){
	vector<string> devy;
	vector<vector<string>> devx(1,vector<string>());
	dev_set.getYdata(devy);	
	dev_set.getXdata(devx[0]);	
	float acc = net.eval_avg(devy,devx);        //auto-eval on dev data
        cout << "epoch " << E << ", loss= " << loss << ", eval (dev) = " << acc << endl;
      }else {
	cout << "epoch" << E <<endl;
      }
	      
    }
    vector<string> testy;
    vector<vector<string>> testx(1,vector<string>());
    test_set.getYdata(testy);	
    test_set.getXdata(testx[0]);	
    float acc = net.eval_avg(testy,testx); 
    cout << "final eval (test) = " << acc << endl;
    return 0;
}