Пример #1
0
void EpsilonL1L2<EvalT, Traits>::
evaluateFields(typename Traits::EvalData workset)
{
  for (std::size_t cell=0; cell < workset.numCells; ++cell) {
    for (std::size_t qp=0; qp < numQPs; ++qp) {
       epsilonXX(cell,qp) = Ugrad(cell,qp,0,0); 
       epsilonYY(cell,qp) = Ugrad(cell,qp,1,1); 
       epsilonXY(cell,qp) = 0.5*(Ugrad(cell,qp,0,1) + Ugrad(cell,qp,1,0)); 
       epsilonB(cell,qp)  = epsilonXX(cell,qp)*epsilonXX(cell,qp) + epsilonYY(cell,qp)*epsilonYY(cell,qp) 
                          + epsilonXX(cell,qp)*epsilonYY(cell,qp) + epsilonXY(cell,qp)*epsilonXY(cell,qp);   
    }
  }
}
Пример #2
0
void ViscosityFO<EvalT, Traits>::
evaluateFields(typename Traits::EvalData workset)
{
  double a = 1.0;  
  switch (visc_type) {
    case CONSTANT: 
      for (std::size_t cell=0; cell < workset.numCells; ++cell) {
        for (std::size_t qp=0; qp < numQPs; ++qp) 
          mu(cell,qp) = 1.0; 
      }
      break; 
    case EXPTRIG:  
      for (std::size_t cell=0; cell < workset.numCells; ++cell) {
        for (std::size_t qp=0; qp < numQPs; ++qp) {
          MeshScalarT x = coordVec(cell,qp,0);
          MeshScalarT y2pi = 2.0*pi*coordVec(cell,qp,1);
          MeshScalarT muargt = (a*a + 4.0*pi*pi - 2.0*pi*a)*sin(y2pi)*sin(y2pi) + 1.0/4.0*(2.0*pi+a)*(2.0*pi+a)*cos(y2pi)*cos(y2pi); 
          muargt = sqrt(muargt)*exp(a*x);  
          mu(cell,qp) = 1.0/2.0*pow(A, -1.0/n)*pow(muargt, 1.0/n - 1.0); 
        }
      }
      break; 
    case GLENSLAW: 
      std::vector<ScalarT> flowFactorVec; //create vector of the flow factor A at each cell 
      flowFactorVec.resize(workset.numCells);
      switch (flowRate_type) {
        case UNIFORM: 
          for (std::size_t cell=0; cell < workset.numCells; ++cell) 
            flowFactorVec[cell] = 1.0/2.0*pow(A, -1.0/n);
          break; 
        case TEMPERATUREBASED:
          for (std::size_t cell=0; cell < workset.numCells; ++cell) 
	    flowFactorVec[cell] = 1.0/2.0*pow(flowRate(temperature(cell)), -1.0/n);
          break;
        case FROMFILE:
        case FROMCISM: 
          for (std::size_t cell=0; cell < workset.numCells; ++cell)  
	    flowFactorVec[cell] = 1.0/2.0*pow(flowFactorA(cell), -1.0/n);
          break;
      }
      double power = 0.5*(1.0/n - 1.0);
      if (homotopyParam == 0.0) { //set constant viscosity
        for (std::size_t cell=0; cell < workset.numCells; ++cell) {
          for (std::size_t qp=0; qp < numQPs; ++qp) {
            mu(cell,qp) = flowFactorVec[cell]; 
          }
        }
      }
      else { //set Glen's law viscosity with regularization specified by homotopyParam
        ScalarT ff = pow(10.0, -10.0*homotopyParam);
        ScalarT epsilonEqpSq = 0.0; //used to define the viscosity in non-linear Stokes 
        for (std::size_t cell=0; cell < workset.numCells; ++cell) {
          for (std::size_t qp=0; qp < numQPs; ++qp) {
            //evaluate non-linear viscosity, given by Glen's law, at quadrature points
            ScalarT& u00 = Ugrad(cell,qp,0,0); //epsilon_xx
            ScalarT& u11 = Ugrad(cell,qp,1,1); //epsilon_yy
            epsilonEqpSq = u00*u00 + u11*u11 + u00*u11; //epsilon_xx^2 + epsilon_yy^2 + epsilon_xx*epsilon_yy
            epsilonEqpSq += 0.25*(Ugrad(cell,qp,0,1) + Ugrad(cell,qp,1,0))*(Ugrad(cell,qp,0,1) + Ugrad(cell,qp,1,0)); //+0.25*epsilon_xy^2
            for (int dim = 2; dim < numDims; ++dim) //3D case
               epsilonEqpSq += 0.25*(Ugrad(cell,qp,0,dim)*Ugrad(cell,qp,0,dim) + Ugrad(cell,qp,1,dim)*Ugrad(cell,qp,1,dim) ); // + 0.25*epsilon_xz^2 + 0.25*epsilon_yz^2
            epsilonEqpSq += ff; //add regularization "fudge factor" 
            mu(cell,qp) = flowFactorVec[cell]*pow(epsilonEqpSq,  power); //non-linear viscosity, given by Glen's law  
           }
         }
      }
      break;
}
}
Пример #3
0
  void HMC_abstract<T>::__sampler_loop()
  {
    threadIsRunning = true;

    samples.clear();
    sum_mean.zero();
    sum_covariance.zero();
    sum_N = 0;
    
    // q = location, p = momentum, H(q,p) = hamiltonian
    math::vertex<T> q, p;
    
    starting_position(q); // random starting position q
    
    p.resize(q.size()); // momentum is initially zero
    p.zero();
    
    T epsilon = T(0.01f);
    unsigned int L = 20;

    std::random_device rd;
    std::mt19937 gen(rd());
    std::normal_distribution<> rng(0, 1); // N(0,1) variables
    auto normalrnd = std::bind(rng, std::ref(gen));

    // used to adaptively finetune step length epsilon based on accept rate
    // the aim of the adaptation is to keep accept rate near optimal 70%
    // L is fixed to rather large value 20
    T accept_rate = T(0.0f);
    unsigned int accept_rate_samples = 0;

    
    while(running) // keep sampling forever
    {
      for(unsigned int i=0;i<p.size();i++)
	p[i] = T(normalrnd()); // Normal distribution 

      math::vertex<T> old_q = q;
      math::vertex<T> current_p = p;
      
      p -= T(0.5f) * epsilon * Ugrad(q);

      for(unsigned int i=0;i<L;i++){
	q += epsilon * p;
	if(i != L-1) p -= epsilon*Ugrad(q);
      }

      p -= T(0.5f) * epsilon * Ugrad(q);

      p = -p;
      
      T current_U  = U(old_q);
      T proposed_U = U(q);
      
      T current_K  = T(0.0f);
      T proposed_K = T(0.0f);
      
      for(unsigned int i=0;i<p.size();i++){
	current_K  += T(0.5f)*current_p[i]*current_p[i];
	proposed_K += T(0.5f)*p[i]*p[i];
      }
      
      T r = T( (float)rand()/((float)RAND_MAX) );

      if(r <= exp(current_U-proposed_U+current_K-proposed_K))
      {
	// accept (q)
	// printf("ACCEPT\n");
	
	pthread_mutex_lock( &solution_lock );
	
	if(sum_N > 0){
	  sum_mean += q;
	  sum_covariance += q.outerproduct();
	  sum_N++;
	}
	else{
	  sum_mean = q;
	  sum_covariance = q.outerproduct();
	  sum_N++;
	}
	
	samples.push_back(q);
        
	pthread_mutex_unlock( &solution_lock );

	if(adaptive){
	  accept_rate++;
	  accept_rate_samples++;
	}
      }
      else{
	// reject (keep old_q)
	// printf("REJECT\n");
	
	q = old_q;
	
	pthread_mutex_lock( &solution_lock );
	
	if(sum_N > 0){
	  sum_mean += q;
	  sum_covariance += q.outerproduct();
	  sum_N++;
	}
	else{
	  sum_mean = q;
	  sum_covariance = q.outerproduct();
	  sum_N++;
	}
	
	samples.push_back(q);
	
	pthread_mutex_unlock( &solution_lock );

	if(adaptive){
	  // accept_rate;
	  accept_rate_samples++;
	}
      }


      if(adaptive){
	// use accept rate to adapt epsilon
	// adapt sampling rate every N iteration (sample)
	if(accept_rate_samples >= 50)
	{
	  accept_rate /= accept_rate_samples;

	  // std::cout << "ACCEPT RATE: " << accept_rate << std::endl;
	  
	  if(accept_rate <= T(0.65f)){
	    epsilon = T(0.8)*epsilon;
	    // std::cout << "NEW SMALLER EPSILON: " << epsilon << std::endl;
	    
	  }
	  else if(accept_rate >= T(0.85f)){
	    epsilon = T(1.1)*epsilon;
	    // std::cout << "NEW LARGER  EPSILON: " << epsilon << std::endl;
	  }

	  accept_rate = T(0.0f);
	  accept_rate_samples = 0;
	}
      }
      
      // printf("SAMPLES: %d\n", samples.size());
      
      while(paused && running) // pause
	sleep(1);
      
    }

    threadIsRunning = false;
  }
Пример #4
0
std::tuple<arma::fmat, arma::fmat, arma::fmat> Worker::factorize(float lambda, bool clamp, bool reg, int reg_thr, int stop_tol) {

  feenableexcept(FE_DIVBYZERO|FE_INVALID|FE_OVERFLOW);
  petuum::RowAccessor rowacc;
  
  // Initialize tables with random values
  //arma::arma_rng::set_seed_random();
  gaml::util::table::randomizeTable(usertable, rank, Ruser.n_rows, useroffset);
  gaml::util::table::randomizeTable(prodtable, rank, Rprod.n_cols, prodoffset);
  gaml::util::table::randomizeTable(wordtable, rank, Rword.n_words, wordoffset);
  
  float last_se_train=0;
  float last_se_vali=0;
  setable.Inc(1, id*2, Rvali.n_nz);
  setable.Inc(1, id*2+1, Rtest.n_nz);
  
  petuum::PSTableGroup::GlobalBarrier();
  
  // Fetch U, P and T
  auto U = gaml::util::table::loadMatrix(usertable, Rword.n_rows, rank);
  auto P = gaml::util::table::loadMatrix(prodtable, Rword.n_cols, rank);
  auto T = gaml::util::table::loadMatrix(wordtable, Ruser.n_words, rank);
  
  auto sum_sizes = read_split_sum(1);
  auto vali_size = std::get<0>(sum_sizes);
  auto test_size = std::get<1>(sum_sizes);
  
  for (int round = 0; round < iterations; round++) {
    ///////
    // Compute gradient for U
    ///////
    arma::fmat Ugrad(Ruser.n_rows, rank, arma::fill::zeros);
    arma::fmat Unum(Ruser.n_rows, rank, arma::fill::zeros);
    arma::fmat Udenom(Ruser.n_rows, rank, arma::fill::zeros);
    
    // iterate over all up pairs in Ruser
    for (std::size_t i = 0; i != Ruser.n_nz; ++i) {
      int userind = Ruser.rows[i];
      int prodind = Ruser.cols[i];
      auto wordbag = Ruser.getWordBagAt(i);
      
      Unum.row(userind - useroffset) += P.row(prodind) % (wordbag * T);
      Udenom.row(userind - useroffset) += P.row(prodind) % ((U.row(userind) % P.row(prodind) * T.t()) * T);  
    }
    
    arma::fmat Ulocal = U.rows(useroffset, useroffset + Ruser.n_rows - 1);
    // prevent div by zero
    Udenom += 10E-16f;
    Ugrad = (Ulocal % Unum / Udenom) - Ulocal;
    if(reg && round > reg_thr) {
      Ugrad = Ugrad - lambda * Ulocal % Ulocal / Udenom;
    }

    // Update U table
    gaml::util::table::updateMatrixSlice(Ugrad, usertable, Ugrad.n_rows, Ugrad.n_cols, useroffset);

    petuum::PSTableGroup::GlobalBarrier();

    // Fetch updated U
    U = gaml::util::table::loadMatrix(usertable, U.n_rows, U.n_cols);
    if(clamp){
      U = arma::clamp(U, 0.0, std::numeric_limits<float>::max());
    }

    ///////
    // Compute gradient for P
    ///////
    arma::fmat Pgrad(Rprod.n_cols, rank, arma::fill::zeros);
    arma::fmat Pnum(Rprod.n_cols, rank, arma::fill::zeros);
    arma::fmat Pdenom(Rprod.n_cols, rank, arma::fill::zeros);
    
    // iterate over all up pairs in Rprod
    for (std::size_t i = 0; i != Rprod.n_nz; ++i) {
      int userind = Rprod.rows[i];
      int prodind = Rprod.cols[i];
      auto wordbag = Rprod.getWordBagAt(i);
      
      Pnum.row(prodind - prodoffset) += U.row(userind) % (wordbag * T);
      Pdenom.row(prodind - prodoffset) += U.row(userind) % ((U.row(userind) % P.row(prodind) * T.t()) * T);
    }
    
    arma::fmat Plocal = P.rows(prodoffset, prodoffset + Rprod.n_cols   - 1);
    Pdenom += 10E-16f;
    Pgrad = (Plocal % Pnum / Pdenom) - Plocal;
    if(reg && round > reg_thr) {
      Pgrad = Pgrad - lambda * Plocal % Plocal / Pdenom;
    }

    // Update P table
    gaml::util::table::updateMatrixSlice(Pgrad, prodtable, Pgrad.n_rows, Pgrad.n_cols, prodoffset);

    petuum::PSTableGroup::GlobalBarrier();
  
    // Fetch updated P
    P = gaml::util::table::loadMatrix(prodtable, P.n_rows, P.n_cols);
    if(clamp) {
      P = arma::clamp(P, 0.0, std::numeric_limits<float>::max());
    }

    ///////
    // Compute gradient for T
    ///////
    arma::fmat Tgrad(Rword.n_words, rank, arma::fill::zeros);
    arma::fmat Tnum(Rword.n_words, rank, arma::fill::zeros);
    arma::fmat Tdenom(Rword.n_words, rank, arma::fill::zeros);
    arma::fmat Tlocal = T.rows(wordoffset, Rword.n_words + wordoffset - 1);
    
    // iterate over all uv pairs in Rword
    for (std::size_t i = 0; i != Rword.n_nz; ++i) {
      int userind = Rword.rows[i];
      int prodind = Rword.cols[i];
      
      auto wordbag = Rword.getWordBagAt(i);
      arma::frowvec user_times_prod = (U.row(userind) % P.row(prodind));
      arma::frowvec pred = user_times_prod * Tlocal.t();

      Tnum += wordbag.t() * user_times_prod;
      Tdenom += pred.t() * user_times_prod;
    }
    Tdenom += 10E-16f;
    Tgrad = (Tlocal % Tnum / Tdenom) - Tlocal;
    if(reg && round > reg_thr) {
      Tgrad = Tgrad - lambda * Tlocal % Tlocal / Tdenom;
    }

    // Update T table
    gaml::util::table::updateMatrixSlice(Tgrad, wordtable, Tgrad.n_rows, Tgrad.n_cols, wordoffset);

    petuum::PSTableGroup::GlobalBarrier();

    // Fetch updated T
    T = gaml::util::table::loadMatrix(wordtable, T.n_rows, T.n_cols);
    if(clamp) {
      T = arma::clamp(T, 0.0, std::numeric_limits<float>::max());
    }
    
    update_setable(U, P, T, round, last_se_train, last_se_vali);
    
    petuum::PSTableGroup::GlobalBarrier();
    
    update_mse(round);
    
    if(id == 0) {
      output(round+1, se_train_vec[round % mse_log] / Rword.n_nz, se_vali_vec[round % mse_log] / vali_size);
    }
    if(check_stop(round, stop_tol)) {
      break;
    }
  }
  
  float se_test = eval(U, P, T, Rtest);
  setable.Inc(2, id, se_test);  
  
  petuum::PSTableGroup::GlobalBarrier();
  
  if(id == 0) {
    std::vector<float> se_test_vec;
    const auto& row = setable.Get<petuum::DenseRow<float>>(2, &rowacc);
    row.CopyToVector(&se_test_vec);
    float mse_test = std::accumulate(se_test_vec.begin(), se_test_vec.end(), 0.0f);
    std::cout << "MSE test: " << mse_test / test_size << std::endl;
  }
  return std::make_tuple(U, P, T);
}
Пример #5
0
void StokesFOResid<EvalT, Traits>::
evaluateFields(typename Traits::EvalData workset)
{
  typedef Intrepid::FunctionSpaceTools FST; 

  for (std::size_t i=0; i < Residual.size(); ++i) Residual(i)=0.0;

  if (numDims == 3) { //3D case
    if (eqn_type == FELIX) {
    for (std::size_t cell=0; cell < workset.numCells; ++cell) {
      for (std::size_t qp=0; qp < numQPs; ++qp) {
        ScalarT& mu = muFELIX(cell,qp);
        ScalarT strs00 = 2.0*mu*(2.0*Ugrad(cell,qp,0,0) + Ugrad(cell,qp,1,1));
        ScalarT strs11 = 2.0*mu*(2.0*Ugrad(cell,qp,1,1) + Ugrad(cell,qp,0,0));
        ScalarT strs01 = mu*(Ugrad(cell,qp,1,0)+ Ugrad(cell,qp,0,1));
        ScalarT strs02 = mu*Ugrad(cell,qp,0,2);
        ScalarT strs12 = mu*Ugrad(cell,qp,1,2);
        for (std::size_t node=0; node < numNodes; ++node) {
             Residual(cell,node,0) += strs00*wGradBF(cell,node,qp,0) + 
                                      strs01*wGradBF(cell,node,qp,1) + 
                                      strs02*wGradBF(cell,node,qp,2);
             Residual(cell,node,1) += strs01*wGradBF(cell,node,qp,0) +
                                      strs11*wGradBF(cell,node,qp,1) + 
                                      strs12*wGradBF(cell,node,qp,2); 
        }
      }
      for (std::size_t qp=0; qp < numQPs; ++qp) {
        ScalarT& frc0 = force(cell,qp,0);
        ScalarT& frc1 = force(cell,qp,1);
        for (std::size_t node=0; node < numNodes; ++node) {
             Residual(cell,node,0) += frc0*wBF(cell,node,qp);
             Residual(cell,node,1) += frc1*wBF(cell,node,qp); 
        }
      }
    } }
    else if (eqn_type == POISSON) { //Laplace (Poisson) operator
    for (std::size_t cell=0; cell < workset.numCells; ++cell) {
      for (std::size_t node=0; node < numNodes; ++node) {
          for (std::size_t qp=0; qp < numQPs; ++qp) {
             Residual(cell,node,0) += Ugrad(cell,qp,0,0)*wGradBF(cell,node,qp,0) + 
                                      Ugrad(cell,qp,0,1)*wGradBF(cell,node,qp,1) + 
                                      Ugrad(cell,qp,0,2)*wGradBF(cell,node,qp,2) +  
                                      force(cell,qp,0)*wBF(cell,node,qp);
              }
           
    } } }
   }
   else { //2D case
   if (eqn_type == FELIX) { 
    for (std::size_t cell=0; cell < workset.numCells; ++cell) {
      for (std::size_t node=0; node < numNodes; ++node) {
          for (std::size_t qp=0; qp < numQPs; ++qp) {
             Residual(cell,node,0) += 2.0*muFELIX(cell,qp)*((2.0*Ugrad(cell,qp,0,0) + Ugrad(cell,qp,1,1))*wGradBF(cell,node,qp,0) + 
                                      0.5*(Ugrad(cell,qp,0,1) + Ugrad(cell,qp,1,0))*wGradBF(cell,node,qp,1)) + 
                                      force(cell,qp,0)*wBF(cell,node,qp);
             Residual(cell,node,1) += 2.0*muFELIX(cell,qp)*(0.5*(Ugrad(cell,qp,0,1) + Ugrad(cell,qp,1,0))*wGradBF(cell,node,qp,0) +
                                      (Ugrad(cell,qp,0,0) + 2.0*Ugrad(cell,qp,1,1))*wGradBF(cell,node,qp,1)) + force(cell,qp,1)*wBF(cell,node,qp); 
              }
           
    } } }
    else if (eqn_type == POISSON) { //Laplace (Poisson) operator
    for (std::size_t cell=0; cell < workset.numCells; ++cell) {
      for (std::size_t node=0; node < numNodes; ++node) {
          for (std::size_t qp=0; qp < numQPs; ++qp) {
             Residual(cell,node,0) += Ugrad(cell,qp,0,0)*wGradBF(cell,node,qp,0) + 
                                      Ugrad(cell,qp,0,1)*wGradBF(cell,node,qp,1) + 
                                      force(cell,qp,0)*wBF(cell,node,qp);
              }
           
    } } }
   }
}