int main(int argc, char* argv[]) { int N = -1 ; // number of rows 2^12 int M = -1 ; // number of columns 2^10 int S = -1 ; // total size 2^22 int nrepeat = 100 ; // number of repeats of the test // Read command line arguments for(int i=0; i<argc; i++) { if( (strcmp(argv[i], "-N") == 0) || (strcmp(argv[i], "-Rows") == 0) ) { N = pow( 2, atoi(argv[++i]) ); printf(" User N is %d\n",N); } else if( (strcmp(argv[i], "-M") == 0) || (strcmp(argv[i], "-Columns") == 0)) { M = pow( 2, atof(argv[++i]) ); printf(" User M is %d\n",M); } else if( (strcmp(argv[i], "-S") == 0) || (strcmp(argv[i], "-Size") == 0)) { S = pow( 2, atof(argv[++i]) ); printf(" User S is %d\n",S); } else if( strcmp(argv[i], "-nrepeat") == 0) { nrepeat = atoi(argv[++i]); } else if( (strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "-help") == 0) ) { printf(" y^T*A*x Options:\n"); printf(" -Rows (-N) <int>: exponent num, determines number of rows 2^num (default: 2^12 = 4096)\n"); printf(" -Columns (-M) <int>: exponent num, determines number of columns 2^num (default: 2^10 = 1024)\n"); printf(" -Size (-S) <int>: exponent num, determines total matrix size 2^num (default: 2^22 = 4096*1024 )\n"); printf(" -nrepeat <int>: number of repetitions (default: 100)\n"); printf(" -help (-h): print this message\n\n"); exit(1); } } //Check Sizes checkSizes( N, M, S, nrepeat ); Kokkos::initialize(argc,argv); // typedef Kokkos::Serial ExecSpace ; // typedef Kokkos::Threads ExecSpace ; // typedef Kokkos::OpenMP ExecSpace ; typedef Kokkos::Cuda ExecSpace ; //EXERCISE: Choose device memory space // typedef Kokkos::HostSpace MemSpace; // typedef Kokkos::OpenMP MemSpace; typedef Kokkos::CudaSpace MemSpace; // typedef Kokkos::CudaUVMSpace MemSpace; typedef Kokkos::LayoutLeft Layout ; // typedef Kokkos::LayoutRight Layout ; typedef Kokkos::RangePolicy<ExecSpace> range_policy ; // Allocate y, x vectors and Matrix A: // Device typedef Kokkos::View<double*, Layout, MemSpace> ViewVectorType; typedef Kokkos::View<double**, Layout, MemSpace> ViewMatrixType; ViewVectorType devy("devy", N); ViewVectorType devx("devx", M); ViewMatrixType devA("devA", N, M); //Host mirror ViewVectorType::HostMirror y = Kokkos::create_mirror_view(devy); ViewVectorType::HostMirror x = Kokkos::create_mirror_view(devx); ViewMatrixType::HostMirror A = Kokkos::create_mirror_view(devA); // Initialize y vector on host for (int i = 0; i < N; ++i) { y( i ) = 1; } // Initialize x vector on host for (int i = 0; i < M; ++i) { x( i ) = 1; } // Initialize A matrix, note 2D indexing computation on host for (int j = 0; j < N; ++j) { for ( int i = 0 ; i < M ; ++i ) { A( j , i ) = 1; } } //Deep copy host view to device views Kokkos::deep_copy(devy, y); Kokkos::deep_copy(devx, x); Kokkos::deep_copy(devA, A); // EXERCISE: Use hierarchical parallel execution policy to initialize // EXERCISE hints: // typedef Kokkos::TeamPolicy<ExecSpace> team_policy ; // typedef Kokkos::TeamPolicy<ExecSpace>::member_type member_type ; // Timer products struct timeval begin,end; gettimeofday(&begin,NULL); for ( int repeat = 0; repeat < nrepeat; repeat++) { //Application: <y,Ax> = y^T*A*x // EXERCISE: Convert from range_policy to team_policy double result = 0; Kokkos::parallel_reduce( range_policy( 0, N ), KOKKOS_LAMBDA ( int j, double &update ) { // EXERCISE: Convert to nested Kokkos::parallel_reduce // EXERCISE hint: Kokkos::TeamThreadRange( ??? ) and [&] double temp2 = 0; for ( int i = 0 ; i < M ; ++i ) { temp2 += devA( j , i ) * devx( i ); } // EXERCISE: Only one team member update the result update += devy( j ) * temp2; }, result );
int run_sampler(unsigned epochs,float alpha,unsigned batch_size){ //load sampler string training_path = "PPAttachData/training.lemma"; string param_path = "PPAttachData/wordsketches/"; string vpath = param_path + string("vdistrib"); string x1vpath = param_path + string("x1givenv"); string pvpath = param_path + string("pgivenv"); string x2vppath = param_path + string("x2givenvp"); string px1path = param_path + string("pgivenx1"); string x2x1ppath = param_path + string("x2givenx1p"); DataSampler samp(training_path.c_str(), vpath.c_str(), x1vpath.c_str(), pvpath.c_str(), x2vppath.c_str(), px1path.c_str(), x2x1ppath.c_str()); //load dev and test PPADataEncoder dev_set("PPAttachData/devset.lemma"); PPADataEncoder test_set("PPAttachData/test.lemma"); //load Word vectors Word2vec w2v; vector<string> wvdict; af::array w2v_embeddings; w2v.load_dictionary("PPAttachData/embeddings/deps.words.lemmatized"); //w2v.filter(xdict); //make network vector<string> ydict; samp.getYdictionary(ydict); SymbolicFeedForwardNetwork<string,string> net; net.set_output_layer("loss",new SoftMaxLoss<string>(ydict)); net.add_layer("top",new LinearLayer()); net.add_layer("hidden",new ReLUActivation(400)); net.add_layer("A",new LinearLayer()); net.add_input_layer("lookupA",new LinearLookup<string>(w2v.get_keys(),w2v.get_values(),4,false)); net.connect_layers("loss","top"); net.connect_layers("top","hidden"); net.connect_layers("hidden","A"); net.connect_layers("A","lookupA"); for(int E = 0; E < epochs;++E){ vector<string> ydata; vector<vector<string>> xdata; //af::timer start1 = af::timer::start(); samp.generate_sample(ydata,xdata,batch_size); //printf("elapsed seconds (sampling): %g\n", af::timer::stop(start1)); PPADataEncoder sampdata(ydata,xdata); vector<string> enc_ydata; vector<vector<string>> enc_xdata(1,vector<string>()); sampdata.getYdata(enc_ydata); sampdata.getXdata(enc_xdata[0]); //af::timer start2 = af::timer::start(); net.set_batch_data(enc_ydata,enc_xdata); float loss = net.train_one(alpha,true,true); //printf("elapsed seconds (backprop): %g\n", af::timer::stop(start2)); if (E % 20 == 0){ vector<string> devy; vector<vector<string>> devx(1,vector<string>()); dev_set.getYdata(devy); dev_set.getXdata(devx[0]); float acc = net.eval_avg(devy,devx); //auto-eval on dev data cout << "epoch " << E << ", loss= " << loss << ", eval (dev) = " << acc << endl; }else { cout << "epoch" << E <<endl; } } vector<string> testy; vector<vector<string>> testx(1,vector<string>()); test_set.getYdata(testy); test_set.getXdata(testx[0]); float acc = net.eval_avg(testy,testx); cout << "final eval (test) = " << acc << endl; return 0; }