int main(int narg, char* args[]) { Kokkos::initialize(narg,args); // Produce some 3D random data (see Algorithms/01_random_numbers for more info) Kokkos::View<int***,Kokkos::LayoutRight> data("Data",512,512,32); Kokkos::Random_XorShift64_Pool<> rand_pool64(5374857); Kokkos::fill_random(data,rand_pool64,100); // A global value to put the result in Kokkos::View<int> gsum("Sum"); // Each team handles a slice of the data // Set up TeamPolicy with 512 teams with maximum number of threads per team and 16 vector lanes. // The team_size_max function will determine the maximum number of threads taking into account // shared memory requirements of the Functor. // The maximum vector length is hardware dependent but can always be smaller than the hardware allows. // The vector length must be a power of 2. const Kokkos::TeamPolicy<> policy( 512 , Kokkos::TeamPolicy<>::team_size_max(SomeCorrelation(data,gsum)) , 16); Kokkos::parallel_for( policy , SomeCorrelation(data,gsum) ); Kokkos::fence(); // Copy result value back int sum = 0; Kokkos::deep_copy(sum,gsum); printf("Result %i\n",sum); Kokkos::finalize(); }
SEXP gmean(SEXP x, SEXP narm) { SEXP ans; int i, protecti=0, thisgrp, n; //clock_t start = clock(); if (!isLogical(narm) || LENGTH(narm)!=1 || LOGICAL(narm)[0]==NA_LOGICAL) error("na.rm must be TRUE or FALSE"); if (!isVectorAtomic(x)) error("GForce mean can only be applied to columns, not .SD or similar. Likely you're looking for 'DT[,lapply(.SD,mean),by=,.SDcols=]'. See ?data.table."); if (!LOGICAL(narm)[0]) { ans = PROTECT(gsum(x,narm)); protecti++; switch(TYPEOF(ans)) { case LGLSXP: case INTSXP: ans = PROTECT(coerceVector(ans, REALSXP)); protecti++; case REALSXP: for (i=0; i<ngrp; i++) REAL(ans)[i] /= grpsize[i]; // let NA propogate break; default : error("Internal error: gsum returned type '%s'. typeof(x) is '%s'", type2char(TYPEOF(ans)), type2char(TYPEOF(x))); } UNPROTECT(protecti); return(ans); } // na.rm=TRUE. Similar to gsum, but we need to count the non-NA as well for the divisor n = LENGTH(x); if (grpn != n) error("grpn [%d] != length(x) [%d] in gsum", grpn, length(x)); long double *s = malloc(ngrp * sizeof(long double)); if (!s) error("Unable to allocate %d * %d bytes for sum in gmean na.rm=TRUE", ngrp, sizeof(long double)); memset(s, 0, ngrp * sizeof(long double)); // all-0 bits == (long double)0, checked in init.c int *c = malloc(ngrp * sizeof(int)); if (!c) error("Unable to allocate %d * %d bytes for counts in gmean na.rm=TRUE", ngrp, sizeof(int)); memset(c, 0, ngrp * sizeof(int)); // all-0 bits == (int)0, checked in init.c switch(TYPEOF(x)) { case LGLSXP: case INTSXP: for (i=0; i<n; i++) { thisgrp = grp[i]; if(INTEGER(x)[i] == NA_INTEGER) continue; s[thisgrp] += INTEGER(x)[i]; // no under/overflow here, s is long double c[thisgrp]++; } break; case REALSXP: for (i=0; i<n; i++) { thisgrp = grp[i]; if (ISNAN(REAL(x)[i])) continue; s[thisgrp] += REAL(x)[i]; c[thisgrp]++; } break; default: free(s); free(c); error("Type '%s' not supported by GForce mean (gmean) na.rm=TRUE. Either add the prefix base::mean(.) or turn off GForce optimization using options(datatable.optimize=1)", type2char(TYPEOF(x))); } ans = PROTECT(allocVector(REALSXP, ngrp)); for (i=0; i<ngrp; i++) { if (c[i]==0) { REAL(ans)[i] = R_NaN; continue; } // NaN to follow base::mean s[i] /= c[i]; if (s[i] > DBL_MAX) REAL(ans)[i] = R_PosInf; else if (s[i] < -DBL_MAX) REAL(ans)[i] = R_NegInf; else REAL(ans)[i] = (double)s[i]; } free(s); free(c); UNPROTECT(1); // Rprintf("this gmean na.rm=TRUE took %8.3f\n", 1.0*(clock()-start)/CLOCKS_PER_SEC); return(ans); }
int main(int argc, char *argv[]) { // feenableexcept(FE_ALL_EXCEPT & ~FE_INEXACT); // This little trick lets us print to std::cout only if a (dummy) command-line argument is provided. int iprint = argc - 1; Teuchos::RCP<std::ostream> outStream; Teuchos::oblackholestream bhs; // outputs nothing /*** Initialize communicator. ***/ Teuchos::GlobalMPISession mpiSession (&argc, &argv, &bhs); Teuchos::RCP<const Teuchos::Comm<int> > comm = Tpetra::DefaultPlatform::getDefaultPlatform().getComm(); const int myRank = comm->getRank(); if ((iprint > 0) && (myRank == 0)) { outStream = Teuchos::rcp(&std::cout, false); } else { outStream = Teuchos::rcp(&bhs, false); } int errorFlag = 0; // *** Example body. try { /*** Read in XML input ***/ std::string filename = "input.xml"; Teuchos::RCP<Teuchos::ParameterList> parlist = Teuchos::rcp( new Teuchos::ParameterList() ); Teuchos::updateParametersFromXmlFile( filename, parlist.ptr() ); std::string stoch_filename = "stochastic.xml"; Teuchos::RCP<Teuchos::ParameterList> stoch_parlist = Teuchos::rcp( new Teuchos::ParameterList() ); Teuchos::updateParametersFromXmlFile( stoch_filename, stoch_parlist.ptr() ); /*** Initialize main data structure. ***/ Teuchos::RCP<const Teuchos::Comm<int> > serial_comm = Teuchos::rcp(new Teuchos::SerialComm<int>()); Teuchos::RCP<ElasticitySIMPOperators<RealT> > data = Teuchos::rcp(new ElasticitySIMPOperators<RealT>(serial_comm, parlist, outStream)); /*** Initialize density filter. ***/ Teuchos::RCP<DensityFilter<RealT> > filter = Teuchos::rcp(new DensityFilter<RealT>(serial_comm, parlist, outStream)); /*** Build vectors and dress them up as ROL vectors. ***/ Teuchos::RCP<const Tpetra::Map<> > vecmap_u = data->getDomainMapA(); Teuchos::RCP<const Tpetra::Map<> > vecmap_z = data->getCellMap(); Teuchos::RCP<Tpetra::MultiVector<> > u_rcp = createTpetraVector(vecmap_u); Teuchos::RCP<Tpetra::MultiVector<> > z_rcp = createTpetraVector(vecmap_z); Teuchos::RCP<Tpetra::MultiVector<> > du_rcp = createTpetraVector(vecmap_u); Teuchos::RCP<Tpetra::MultiVector<> > dw_rcp = createTpetraVector(vecmap_u); Teuchos::RCP<Tpetra::MultiVector<> > dz_rcp = createTpetraVector(vecmap_z); Teuchos::RCP<Tpetra::MultiVector<> > dz2_rcp = createTpetraVector(vecmap_z); Teuchos::RCP<std::vector<RealT> > vc_rcp = Teuchos::rcp(new std::vector<RealT>(1, 0)); Teuchos::RCP<std::vector<RealT> > vc_lam_rcp = Teuchos::rcp(new std::vector<RealT>(1, 0)); Teuchos::RCP<std::vector<RealT> > vscale_rcp = Teuchos::rcp(new std::vector<RealT>(1, 0)); // Set all values to 1 in u, z. RealT one(1), two(2); u_rcp->putScalar(one); // Set z to gray solution. RealT volFrac = parlist->sublist("ElasticityTopoOpt").get<RealT>("Volume Fraction"); z_rcp->putScalar(volFrac); // Set scaling vector for constraint RealT W = parlist->sublist("Geometry").get<RealT>("Width"); RealT H = parlist->sublist("Geometry").get<RealT>("Height"); (*vscale_rcp)[0] = one/std::pow(W*H*(one-volFrac),two); // Set Scaling vector for density bool useZscale = parlist->sublist("Problem").get<bool>("Use Scaled Density Vectors"); RealT densityScaling = parlist->sublist("Problem").get<RealT>("Density Scaling"); Teuchos::RCP<Tpetra::MultiVector<> > scaleVec = createTpetraVector(vecmap_z); scaleVec->putScalar(densityScaling); if ( !useZscale ) { scaleVec->putScalar(one); } Teuchos::RCP<const Tpetra::Vector<> > zscale_rcp = scaleVec->getVector(0); // Randomize d vectors. du_rcp->randomize(); //du_rcp->scale(0); dw_rcp->randomize(); dz_rcp->randomize(); //dz_rcp->scale(0); dz2_rcp->randomize(); // Create ROL::TpetraMultiVectors. Teuchos::RCP<ROL::Vector<RealT> > up = Teuchos::rcp(new ROL::TpetraMultiVector<RealT>(u_rcp)); Teuchos::RCP<ROL::Vector<RealT> > dup = Teuchos::rcp(new ROL::TpetraMultiVector<RealT>(du_rcp)); Teuchos::RCP<ROL::Vector<RealT> > dwp = Teuchos::rcp(new ROL::TpetraMultiVector<RealT>(dw_rcp)); Teuchos::RCP<ROL::Vector<RealT> > zp = Teuchos::rcp(new ROL::PrimalScaledTpetraMultiVector<RealT>(z_rcp,zscale_rcp)); Teuchos::RCP<ROL::Vector<RealT> > dzp = Teuchos::rcp(new ROL::PrimalScaledTpetraMultiVector<RealT>(dz_rcp,zscale_rcp)); Teuchos::RCP<ROL::Vector<RealT> > dz2p = Teuchos::rcp(new ROL::PrimalScaledTpetraMultiVector<RealT>(dz2_rcp,zscale_rcp)); Teuchos::RCP<ROL::Vector<RealT> > vcp = Teuchos::rcp(new ROL::PrimalScaledStdVector<RealT>(vc_rcp,vscale_rcp)); Teuchos::RCP<ROL::Vector<RealT> > vc_lamp = Teuchos::rcp(new ROL::DualScaledStdVector<RealT>(vc_lam_rcp,vscale_rcp)); // Create ROL SimOpt vectors. ROL::Vector_SimOpt<RealT> x(up,zp); ROL::Vector_SimOpt<RealT> d(dup,dzp); ROL::Vector_SimOpt<RealT> d2(dwp,dz2p); /*** Build sampler. ***/ BuildSampler<RealT> buildSampler(comm,*stoch_parlist,*parlist); buildSampler.print("samples"); /*** Compute compliance objective function scaling. ***/ RealT min(ROL::ROL_INF<RealT>()), gmin(0), max(0), gmax(0), sum(0), gsum(0), tmp(0); Teuchos::Array<RealT> dotF(1, 0); RealT minDensity = parlist->sublist("ElasticitySIMP").get<RealT>("Minimum Density"); for (int i = 0; i < buildSampler.get()->numMySamples(); ++i) { data->updateF(buildSampler.get()->getMyPoint(i)); (data->getVecF())->dot(*(data->getVecF()),dotF.view(0,1)); tmp = minDensity/dotF[0]; min = ((min < tmp) ? min : tmp); max = ((max > tmp) ? max : tmp); sum += buildSampler.get()->getMyWeight(i) * tmp; } ROL::Elementwise::ReductionMin<RealT> ROLmin; buildSampler.getBatchManager()->reduceAll(&min,&gmin,ROLmin); ROL::Elementwise::ReductionMax<RealT> ROLmax; buildSampler.getBatchManager()->reduceAll(&max,&gmax,ROLmax); buildSampler.getBatchManager()->sumAll(&sum,&gsum,1); bool useExpValScale = stoch_parlist->sublist("Problem").get("Use Expected Value Scaling",false); RealT scale = (useExpValScale ? gsum : gmin); /*** Build objective function, constraint and reduced objective function. ***/ Teuchos::RCP<ROL::Objective_SimOpt<RealT> > obj = Teuchos::rcp(new ParametrizedObjective_PDEOPT_ElasticitySIMP<RealT>(data, filter, parlist,scale)); Teuchos::RCP<ROL::EqualityConstraint_SimOpt<RealT> > con = Teuchos::rcp(new ParametrizedEqualityConstraint_PDEOPT_ElasticitySIMP<RealT>(data, filter, parlist)); Teuchos::RCP<ROL::Reduced_Objective_SimOpt<RealT> > objReduced = Teuchos::rcp(new ROL::Reduced_Objective_SimOpt<RealT>(obj, con, up, zp, dwp)); Teuchos::RCP<ROL::EqualityConstraint<RealT> > volcon = Teuchos::rcp(new EqualityConstraint_PDEOPT_ElasticitySIMP_Volume<RealT>(data, parlist)); /*** Build bound constraint ***/ Teuchos::RCP<Tpetra::MultiVector<> > lo_rcp = Teuchos::rcp(new Tpetra::MultiVector<>(vecmap_z, 1, true)); Teuchos::RCP<Tpetra::MultiVector<> > hi_rcp = Teuchos::rcp(new Tpetra::MultiVector<>(vecmap_z, 1, true)); lo_rcp->putScalar(0.0); hi_rcp->putScalar(1.0); Teuchos::RCP<ROL::Vector<RealT> > lop = Teuchos::rcp(new ROL::PrimalScaledTpetraMultiVector<RealT>(lo_rcp, zscale_rcp)); Teuchos::RCP<ROL::Vector<RealT> > hip = Teuchos::rcp(new ROL::PrimalScaledTpetraMultiVector<RealT>(hi_rcp, zscale_rcp)); Teuchos::RCP<ROL::BoundConstraint<RealT> > bnd = Teuchos::rcp(new ROL::BoundConstraint<RealT>(lop,hip)); /*** Build Stochastic Functionality. ***/ ROL::StochasticProblem<RealT> opt(*stoch_parlist,objReduced,buildSampler.get(),zp,bnd); /*** Check functional interface. ***/ bool checkDeriv = parlist->sublist("Problem").get("Derivative Check",false); if ( checkDeriv ) { // *outStream << "Checking Objective:" << "\n"; // obj->checkGradient(x,d,true,*outStream); // obj->checkHessVec(x,d,true,*outStream); // obj->checkHessSym(x,d,d2,true,*outStream); // *outStream << "Checking Constraint:" << "\n"; // con->checkAdjointConsistencyJacobian(*dup,d,x,true,*outStream); // con->checkAdjointConsistencyJacobian_1(*dwp, *dup, *up, *zp, true, *outStream); // con->checkAdjointConsistencyJacobian_2(*dwp, *dzp, *up, *zp, true, *outStream); // con->checkInverseJacobian_1(*up,*up,*up,*zp,true,*outStream); // con->checkInverseAdjointJacobian_1(*up,*up,*up,*zp,true,*outStream); // con->checkApplyJacobian(x,d,*up,true,*outStream); // con->checkApplyAdjointHessian(x,*dup,d,x,true,*outStream); *outStream << "Checking Reduced Objective:" << "\n"; opt.checkObjectiveGradient(*dzp,true,*outStream); opt.checkObjectiveHessVec(*dzp,true,*outStream); *outStream << "Checking Volume Constraint:" << "\n"; volcon->checkAdjointConsistencyJacobian(*vcp,*dzp,*zp,true,*outStream); volcon->checkApplyJacobian(*zp,*dzp,*vcp,true,*outStream); volcon->checkApplyAdjointHessian(*zp,*vcp,*dzp,*zp,true,*outStream); } /*** Run optimization ***/ ROL::AugmentedLagrangian<RealT> augLag(opt.getObjective(),volcon,*vc_lamp,1.0, *opt.getSolutionVector(),*vcp,*parlist); ROL::Algorithm<RealT> algo("Augmented Lagrangian",*parlist,false); std::clock_t timer = std::clock(); algo.run(*opt.getSolutionVector(),*vc_lamp,augLag,*volcon,*opt.getBoundConstraint(),true,*outStream); *outStream << "Optimization time: " << static_cast<RealT>(std::clock()-timer)/static_cast<RealT>(CLOCKS_PER_SEC) << " seconds." << std::endl; data->outputTpetraVector(z_rcp, "density.txt"); data->outputTpetraVector(u_rcp, "state.txt"); data->outputTpetraVector(zscale_rcp, "weights.txt"); // Build objective function distribution RealT val(0); int nSamp = stoch_parlist->sublist("Problem").get("Number of Output Samples",10); stoch_parlist->sublist("Problem").set("Number of Samples",nSamp); BuildSampler<RealT> buildSampler_dist(comm,*stoch_parlist,*parlist); std::stringstream name; name << "samples_" << buildSampler_dist.getBatchManager()->batchID() << ".txt"; std::ofstream file; file.open(name.str()); std::vector<RealT> sample; RealT tol = 1.e-8; std::clock_t timer_print = std::clock(); for (int i = 0; i < buildSampler_dist.get()->numMySamples(); ++i) { sample = buildSampler_dist.get()->getMyPoint(i); objReduced->setParameter(sample); val = objReduced->value(*zp,tol); for (int j = 0; j < static_cast<int>(sample.size()); ++j) { file << sample[j] << " "; } file << val << "\n"; } file.close(); *outStream << "Output time: " << static_cast<RealT>(std::clock()-timer_print)/static_cast<RealT>(CLOCKS_PER_SEC) << " seconds." << std::endl; } catch (std::logic_error err) { *outStream << err.what() << "\n"; errorFlag = -1000; }; // end try if (errorFlag != 0) std::cout << "End Result: TEST FAILED\n"; else std::cout << "End Result: TEST PASSED\n"; return 0; }