void EpsilonL1L2<EvalT, Traits>:: evaluateFields(typename Traits::EvalData workset) { for (std::size_t cell=0; cell < workset.numCells; ++cell) { for (std::size_t qp=0; qp < numQPs; ++qp) { epsilonXX(cell,qp) = Ugrad(cell,qp,0,0); epsilonYY(cell,qp) = Ugrad(cell,qp,1,1); epsilonXY(cell,qp) = 0.5*(Ugrad(cell,qp,0,1) + Ugrad(cell,qp,1,0)); epsilonB(cell,qp) = epsilonXX(cell,qp)*epsilonXX(cell,qp) + epsilonYY(cell,qp)*epsilonYY(cell,qp) + epsilonXX(cell,qp)*epsilonYY(cell,qp) + epsilonXY(cell,qp)*epsilonXY(cell,qp); } } }
void ViscosityFO<EvalT, Traits>:: evaluateFields(typename Traits::EvalData workset) { double a = 1.0; switch (visc_type) { case CONSTANT: for (std::size_t cell=0; cell < workset.numCells; ++cell) { for (std::size_t qp=0; qp < numQPs; ++qp) mu(cell,qp) = 1.0; } break; case EXPTRIG: for (std::size_t cell=0; cell < workset.numCells; ++cell) { for (std::size_t qp=0; qp < numQPs; ++qp) { MeshScalarT x = coordVec(cell,qp,0); MeshScalarT y2pi = 2.0*pi*coordVec(cell,qp,1); MeshScalarT muargt = (a*a + 4.0*pi*pi - 2.0*pi*a)*sin(y2pi)*sin(y2pi) + 1.0/4.0*(2.0*pi+a)*(2.0*pi+a)*cos(y2pi)*cos(y2pi); muargt = sqrt(muargt)*exp(a*x); mu(cell,qp) = 1.0/2.0*pow(A, -1.0/n)*pow(muargt, 1.0/n - 1.0); } } break; case GLENSLAW: std::vector<ScalarT> flowFactorVec; //create vector of the flow factor A at each cell flowFactorVec.resize(workset.numCells); switch (flowRate_type) { case UNIFORM: for (std::size_t cell=0; cell < workset.numCells; ++cell) flowFactorVec[cell] = 1.0/2.0*pow(A, -1.0/n); break; case TEMPERATUREBASED: for (std::size_t cell=0; cell < workset.numCells; ++cell) flowFactorVec[cell] = 1.0/2.0*pow(flowRate(temperature(cell)), -1.0/n); break; case FROMFILE: case FROMCISM: for (std::size_t cell=0; cell < workset.numCells; ++cell) flowFactorVec[cell] = 1.0/2.0*pow(flowFactorA(cell), -1.0/n); break; } double power = 0.5*(1.0/n - 1.0); if (homotopyParam == 0.0) { //set constant viscosity for (std::size_t cell=0; cell < workset.numCells; ++cell) { for (std::size_t qp=0; qp < numQPs; ++qp) { mu(cell,qp) = flowFactorVec[cell]; } } } else { //set Glen's law viscosity with regularization specified by homotopyParam ScalarT ff = pow(10.0, -10.0*homotopyParam); ScalarT epsilonEqpSq = 0.0; //used to define the viscosity in non-linear Stokes for (std::size_t cell=0; cell < workset.numCells; ++cell) { for (std::size_t qp=0; qp < numQPs; ++qp) { //evaluate non-linear viscosity, given by Glen's law, at quadrature points ScalarT& u00 = Ugrad(cell,qp,0,0); //epsilon_xx ScalarT& u11 = Ugrad(cell,qp,1,1); //epsilon_yy epsilonEqpSq = u00*u00 + u11*u11 + u00*u11; //epsilon_xx^2 + epsilon_yy^2 + epsilon_xx*epsilon_yy epsilonEqpSq += 0.25*(Ugrad(cell,qp,0,1) + Ugrad(cell,qp,1,0))*(Ugrad(cell,qp,0,1) + Ugrad(cell,qp,1,0)); //+0.25*epsilon_xy^2 for (int dim = 2; dim < numDims; ++dim) //3D case epsilonEqpSq += 0.25*(Ugrad(cell,qp,0,dim)*Ugrad(cell,qp,0,dim) + Ugrad(cell,qp,1,dim)*Ugrad(cell,qp,1,dim) ); // + 0.25*epsilon_xz^2 + 0.25*epsilon_yz^2 epsilonEqpSq += ff; //add regularization "fudge factor" mu(cell,qp) = flowFactorVec[cell]*pow(epsilonEqpSq, power); //non-linear viscosity, given by Glen's law } } } break; } }
void HMC_abstract<T>::__sampler_loop() { threadIsRunning = true; samples.clear(); sum_mean.zero(); sum_covariance.zero(); sum_N = 0; // q = location, p = momentum, H(q,p) = hamiltonian math::vertex<T> q, p; starting_position(q); // random starting position q p.resize(q.size()); // momentum is initially zero p.zero(); T epsilon = T(0.01f); unsigned int L = 20; std::random_device rd; std::mt19937 gen(rd()); std::normal_distribution<> rng(0, 1); // N(0,1) variables auto normalrnd = std::bind(rng, std::ref(gen)); // used to adaptively finetune step length epsilon based on accept rate // the aim of the adaptation is to keep accept rate near optimal 70% // L is fixed to rather large value 20 T accept_rate = T(0.0f); unsigned int accept_rate_samples = 0; while(running) // keep sampling forever { for(unsigned int i=0;i<p.size();i++) p[i] = T(normalrnd()); // Normal distribution math::vertex<T> old_q = q; math::vertex<T> current_p = p; p -= T(0.5f) * epsilon * Ugrad(q); for(unsigned int i=0;i<L;i++){ q += epsilon * p; if(i != L-1) p -= epsilon*Ugrad(q); } p -= T(0.5f) * epsilon * Ugrad(q); p = -p; T current_U = U(old_q); T proposed_U = U(q); T current_K = T(0.0f); T proposed_K = T(0.0f); for(unsigned int i=0;i<p.size();i++){ current_K += T(0.5f)*current_p[i]*current_p[i]; proposed_K += T(0.5f)*p[i]*p[i]; } T r = T( (float)rand()/((float)RAND_MAX) ); if(r <= exp(current_U-proposed_U+current_K-proposed_K)) { // accept (q) // printf("ACCEPT\n"); pthread_mutex_lock( &solution_lock ); if(sum_N > 0){ sum_mean += q; sum_covariance += q.outerproduct(); sum_N++; } else{ sum_mean = q; sum_covariance = q.outerproduct(); sum_N++; } samples.push_back(q); pthread_mutex_unlock( &solution_lock ); if(adaptive){ accept_rate++; accept_rate_samples++; } } else{ // reject (keep old_q) // printf("REJECT\n"); q = old_q; pthread_mutex_lock( &solution_lock ); if(sum_N > 0){ sum_mean += q; sum_covariance += q.outerproduct(); sum_N++; } else{ sum_mean = q; sum_covariance = q.outerproduct(); sum_N++; } samples.push_back(q); pthread_mutex_unlock( &solution_lock ); if(adaptive){ // accept_rate; accept_rate_samples++; } } if(adaptive){ // use accept rate to adapt epsilon // adapt sampling rate every N iteration (sample) if(accept_rate_samples >= 50) { accept_rate /= accept_rate_samples; // std::cout << "ACCEPT RATE: " << accept_rate << std::endl; if(accept_rate <= T(0.65f)){ epsilon = T(0.8)*epsilon; // std::cout << "NEW SMALLER EPSILON: " << epsilon << std::endl; } else if(accept_rate >= T(0.85f)){ epsilon = T(1.1)*epsilon; // std::cout << "NEW LARGER EPSILON: " << epsilon << std::endl; } accept_rate = T(0.0f); accept_rate_samples = 0; } } // printf("SAMPLES: %d\n", samples.size()); while(paused && running) // pause sleep(1); } threadIsRunning = false; }
std::tuple<arma::fmat, arma::fmat, arma::fmat> Worker::factorize(float lambda, bool clamp, bool reg, int reg_thr, int stop_tol) { feenableexcept(FE_DIVBYZERO|FE_INVALID|FE_OVERFLOW); petuum::RowAccessor rowacc; // Initialize tables with random values //arma::arma_rng::set_seed_random(); gaml::util::table::randomizeTable(usertable, rank, Ruser.n_rows, useroffset); gaml::util::table::randomizeTable(prodtable, rank, Rprod.n_cols, prodoffset); gaml::util::table::randomizeTable(wordtable, rank, Rword.n_words, wordoffset); float last_se_train=0; float last_se_vali=0; setable.Inc(1, id*2, Rvali.n_nz); setable.Inc(1, id*2+1, Rtest.n_nz); petuum::PSTableGroup::GlobalBarrier(); // Fetch U, P and T auto U = gaml::util::table::loadMatrix(usertable, Rword.n_rows, rank); auto P = gaml::util::table::loadMatrix(prodtable, Rword.n_cols, rank); auto T = gaml::util::table::loadMatrix(wordtable, Ruser.n_words, rank); auto sum_sizes = read_split_sum(1); auto vali_size = std::get<0>(sum_sizes); auto test_size = std::get<1>(sum_sizes); for (int round = 0; round < iterations; round++) { /////// // Compute gradient for U /////// arma::fmat Ugrad(Ruser.n_rows, rank, arma::fill::zeros); arma::fmat Unum(Ruser.n_rows, rank, arma::fill::zeros); arma::fmat Udenom(Ruser.n_rows, rank, arma::fill::zeros); // iterate over all up pairs in Ruser for (std::size_t i = 0; i != Ruser.n_nz; ++i) { int userind = Ruser.rows[i]; int prodind = Ruser.cols[i]; auto wordbag = Ruser.getWordBagAt(i); Unum.row(userind - useroffset) += P.row(prodind) % (wordbag * T); Udenom.row(userind - useroffset) += P.row(prodind) % ((U.row(userind) % P.row(prodind) * T.t()) * T); } arma::fmat Ulocal = U.rows(useroffset, useroffset + Ruser.n_rows - 1); // prevent div by zero Udenom += 10E-16f; Ugrad = (Ulocal % Unum / Udenom) - Ulocal; if(reg && round > reg_thr) { Ugrad = Ugrad - lambda * Ulocal % Ulocal / Udenom; } // Update U table gaml::util::table::updateMatrixSlice(Ugrad, usertable, Ugrad.n_rows, Ugrad.n_cols, useroffset); petuum::PSTableGroup::GlobalBarrier(); // Fetch updated U U = gaml::util::table::loadMatrix(usertable, U.n_rows, U.n_cols); if(clamp){ U = arma::clamp(U, 0.0, std::numeric_limits<float>::max()); } /////// // Compute gradient for P /////// arma::fmat Pgrad(Rprod.n_cols, rank, arma::fill::zeros); arma::fmat Pnum(Rprod.n_cols, rank, arma::fill::zeros); arma::fmat Pdenom(Rprod.n_cols, rank, arma::fill::zeros); // iterate over all up pairs in Rprod for (std::size_t i = 0; i != Rprod.n_nz; ++i) { int userind = Rprod.rows[i]; int prodind = Rprod.cols[i]; auto wordbag = Rprod.getWordBagAt(i); Pnum.row(prodind - prodoffset) += U.row(userind) % (wordbag * T); Pdenom.row(prodind - prodoffset) += U.row(userind) % ((U.row(userind) % P.row(prodind) * T.t()) * T); } arma::fmat Plocal = P.rows(prodoffset, prodoffset + Rprod.n_cols - 1); Pdenom += 10E-16f; Pgrad = (Plocal % Pnum / Pdenom) - Plocal; if(reg && round > reg_thr) { Pgrad = Pgrad - lambda * Plocal % Plocal / Pdenom; } // Update P table gaml::util::table::updateMatrixSlice(Pgrad, prodtable, Pgrad.n_rows, Pgrad.n_cols, prodoffset); petuum::PSTableGroup::GlobalBarrier(); // Fetch updated P P = gaml::util::table::loadMatrix(prodtable, P.n_rows, P.n_cols); if(clamp) { P = arma::clamp(P, 0.0, std::numeric_limits<float>::max()); } /////// // Compute gradient for T /////// arma::fmat Tgrad(Rword.n_words, rank, arma::fill::zeros); arma::fmat Tnum(Rword.n_words, rank, arma::fill::zeros); arma::fmat Tdenom(Rword.n_words, rank, arma::fill::zeros); arma::fmat Tlocal = T.rows(wordoffset, Rword.n_words + wordoffset - 1); // iterate over all uv pairs in Rword for (std::size_t i = 0; i != Rword.n_nz; ++i) { int userind = Rword.rows[i]; int prodind = Rword.cols[i]; auto wordbag = Rword.getWordBagAt(i); arma::frowvec user_times_prod = (U.row(userind) % P.row(prodind)); arma::frowvec pred = user_times_prod * Tlocal.t(); Tnum += wordbag.t() * user_times_prod; Tdenom += pred.t() * user_times_prod; } Tdenom += 10E-16f; Tgrad = (Tlocal % Tnum / Tdenom) - Tlocal; if(reg && round > reg_thr) { Tgrad = Tgrad - lambda * Tlocal % Tlocal / Tdenom; } // Update T table gaml::util::table::updateMatrixSlice(Tgrad, wordtable, Tgrad.n_rows, Tgrad.n_cols, wordoffset); petuum::PSTableGroup::GlobalBarrier(); // Fetch updated T T = gaml::util::table::loadMatrix(wordtable, T.n_rows, T.n_cols); if(clamp) { T = arma::clamp(T, 0.0, std::numeric_limits<float>::max()); } update_setable(U, P, T, round, last_se_train, last_se_vali); petuum::PSTableGroup::GlobalBarrier(); update_mse(round); if(id == 0) { output(round+1, se_train_vec[round % mse_log] / Rword.n_nz, se_vali_vec[round % mse_log] / vali_size); } if(check_stop(round, stop_tol)) { break; } } float se_test = eval(U, P, T, Rtest); setable.Inc(2, id, se_test); petuum::PSTableGroup::GlobalBarrier(); if(id == 0) { std::vector<float> se_test_vec; const auto& row = setable.Get<petuum::DenseRow<float>>(2, &rowacc); row.CopyToVector(&se_test_vec); float mse_test = std::accumulate(se_test_vec.begin(), se_test_vec.end(), 0.0f); std::cout << "MSE test: " << mse_test / test_size << std::endl; } return std::make_tuple(U, P, T); }
void StokesFOResid<EvalT, Traits>:: evaluateFields(typename Traits::EvalData workset) { typedef Intrepid::FunctionSpaceTools FST; for (std::size_t i=0; i < Residual.size(); ++i) Residual(i)=0.0; if (numDims == 3) { //3D case if (eqn_type == FELIX) { for (std::size_t cell=0; cell < workset.numCells; ++cell) { for (std::size_t qp=0; qp < numQPs; ++qp) { ScalarT& mu = muFELIX(cell,qp); ScalarT strs00 = 2.0*mu*(2.0*Ugrad(cell,qp,0,0) + Ugrad(cell,qp,1,1)); ScalarT strs11 = 2.0*mu*(2.0*Ugrad(cell,qp,1,1) + Ugrad(cell,qp,0,0)); ScalarT strs01 = mu*(Ugrad(cell,qp,1,0)+ Ugrad(cell,qp,0,1)); ScalarT strs02 = mu*Ugrad(cell,qp,0,2); ScalarT strs12 = mu*Ugrad(cell,qp,1,2); for (std::size_t node=0; node < numNodes; ++node) { Residual(cell,node,0) += strs00*wGradBF(cell,node,qp,0) + strs01*wGradBF(cell,node,qp,1) + strs02*wGradBF(cell,node,qp,2); Residual(cell,node,1) += strs01*wGradBF(cell,node,qp,0) + strs11*wGradBF(cell,node,qp,1) + strs12*wGradBF(cell,node,qp,2); } } for (std::size_t qp=0; qp < numQPs; ++qp) { ScalarT& frc0 = force(cell,qp,0); ScalarT& frc1 = force(cell,qp,1); for (std::size_t node=0; node < numNodes; ++node) { Residual(cell,node,0) += frc0*wBF(cell,node,qp); Residual(cell,node,1) += frc1*wBF(cell,node,qp); } } } } else if (eqn_type == POISSON) { //Laplace (Poisson) operator for (std::size_t cell=0; cell < workset.numCells; ++cell) { for (std::size_t node=0; node < numNodes; ++node) { for (std::size_t qp=0; qp < numQPs; ++qp) { Residual(cell,node,0) += Ugrad(cell,qp,0,0)*wGradBF(cell,node,qp,0) + Ugrad(cell,qp,0,1)*wGradBF(cell,node,qp,1) + Ugrad(cell,qp,0,2)*wGradBF(cell,node,qp,2) + force(cell,qp,0)*wBF(cell,node,qp); } } } } } else { //2D case if (eqn_type == FELIX) { for (std::size_t cell=0; cell < workset.numCells; ++cell) { for (std::size_t node=0; node < numNodes; ++node) { for (std::size_t qp=0; qp < numQPs; ++qp) { Residual(cell,node,0) += 2.0*muFELIX(cell,qp)*((2.0*Ugrad(cell,qp,0,0) + Ugrad(cell,qp,1,1))*wGradBF(cell,node,qp,0) + 0.5*(Ugrad(cell,qp,0,1) + Ugrad(cell,qp,1,0))*wGradBF(cell,node,qp,1)) + force(cell,qp,0)*wBF(cell,node,qp); Residual(cell,node,1) += 2.0*muFELIX(cell,qp)*(0.5*(Ugrad(cell,qp,0,1) + Ugrad(cell,qp,1,0))*wGradBF(cell,node,qp,0) + (Ugrad(cell,qp,0,0) + 2.0*Ugrad(cell,qp,1,1))*wGradBF(cell,node,qp,1)) + force(cell,qp,1)*wBF(cell,node,qp); } } } } else if (eqn_type == POISSON) { //Laplace (Poisson) operator for (std::size_t cell=0; cell < workset.numCells; ++cell) { for (std::size_t node=0; node < numNodes; ++node) { for (std::size_t qp=0; qp < numQPs; ++qp) { Residual(cell,node,0) += Ugrad(cell,qp,0,0)*wGradBF(cell,node,qp,0) + Ugrad(cell,qp,0,1)*wGradBF(cell,node,qp,1) + force(cell,qp,0)*wBF(cell,node,qp); } } } } } }