示例#1
0
void SMCTest::test_itw_Lsodar()
{
  initTwisting();
  SP::ControlLsodarSimulation simLsodar(new ControlLsodarSimulation(_t0, _T, _h));
  simLsodar->setSaveOnlyMainSimulation(true);
  simLsodar->addDynamicalSystem(_DS);
  simLsodar->addSensor(_sensor, _h);
  simLsodar->addActuator(_itw, _h);
  simLsodar->initialize();
  simLsodar->run();
  SimpleMatrix& data = *simLsodar->data();
  ioMatrix::write("itw_Lsodar.dat", "ascii", data, "noDim");
  // Reference Matrix
  SimpleMatrix dataRef(data);
  dataRef.zero();
  ioMatrix::read("itw.ref", "ascii", dataRef);
  // it is a bad idea to compare solutions to an AVI that does not admit a unique solution
  SiconosVector lambda1 = SiconosVector(data.size(0));
  SiconosVector lambda2 = SiconosVector(data.size(0));
  data.getCol(3, lambda1);
  data.getCol(4, lambda2);
  axpy(_beta, lambda2, lambda1);
  SiconosVector lambda1Ref = SiconosVector(data.size(0));
  SiconosVector lambda2Ref = SiconosVector(data.size(0));
  dataRef.getCol(3, lambda1Ref);
  dataRef.getCol(4, lambda2Ref);
  axpy(_beta, lambda2Ref, lambda1Ref);
  data.setCol(3, lambda1);
  dataRef.setCol(3, lambda1Ref);
  data.resize(data.size(0), 4);
  dataRef.resize(data.size(0), 4);
  std::cout << "------- Integration done, error = " << (data - dataRef).normInf() << " -------" <<std::endl;
  CPPUNIT_ASSERT_EQUAL_MESSAGE("test_itw_Lsodar : ", (data - dataRef).normInf() < _tol, true);
}
示例#2
0
  void backwardOneInput(int layerId, const UpdateCallback& callback) {
    const MatrixPtr& inputMat = getInputValue(layerId);
    const MatrixPtr& inputGradMat = getInputGrad(layerId);
    const MatrixPtr& weightMat = weights_[layerId]->getW();
    const MatrixPtr& weightGradMat = weights_[layerId]->getWGrad();

    int dim = inputMat->getWidth();
    real* sampleGrad = sampleOut_.grad->getData();

    if (weightGradMat) {
      for (size_t i = 0; i < samples_.size(); ++i) {
        axpy(dim,
             sampleGrad[i],
             inputMat->getRowBuf(samples_[i].sampleId),
             weightGradMat->getRowBuf(samples_[i].labelId));
      }
      weights_[layerId]->incUpdate(callback);
    }

    if (inputGradMat) {
      for (size_t i = 0; i < samples_.size(); ++i) {
        axpy(dim,
             sampleGrad[i],
             weightMat->getRowBuf(samples_[i].labelId),
             inputGradMat->getRowBuf(samples_[i].sampleId));
      }
    }
  }
示例#3
0
void cg(eval_t A, Matrix b, double tolerance, void* ctx)
{
  Matrix r = createMatrix(b->rows, b->cols);
  Matrix p = createMatrix(b->rows, b->cols);
  Matrix buffer = createMatrix(b->rows, b->cols);
  double dotp = 1000;
  double rdr = dotp;
  copyVector(r->as_vec,b->as_vec);
  fillVector(b->as_vec, 0.0);
  int i=0;
  while (i < b->as_vec->len && rdr > tolerance) {
    ++i;
    if (i == 1) {
      copyVector(p->as_vec,r->as_vec);
      dotp = innerproduct(r->as_vec,r->as_vec);
    } else {
      double dotp2 = innerproduct(r->as_vec,r->as_vec);
      double beta = dotp2/dotp;
      dotp = dotp2;
      scaleVector(p->as_vec,beta);
      axpy(p->as_vec,r->as_vec,1.0);
    }
    A(buffer,p,ctx);
    double alpha = dotp/innerproduct(p->as_vec,buffer->as_vec);
    axpy(b->as_vec,p->as_vec,alpha);
    axpy(r->as_vec,buffer->as_vec,-alpha);
    rdr = sqrt(innerproduct(r->as_vec,r->as_vec));
  }
  printf("%i iterations\n",i);
  freeMatrix(r);
  freeMatrix(p);
  freeMatrix(buffer);
}
示例#4
0
void Basic_Agent::set_internal_uptake_constants( double dt )
{
	
		// overall form: dp/dt = S*(T-p) - U*p 
		//   p(n+1) - p(n) = dt*S(n)*T(n) - dt*( S(n) + U(n))*p(n+1)
		//   p(n+1)*temp2 =  p(n) + temp1
		//   p(n+1) = (  p(n) + temp1 )/temp2
		//int nearest_voxel= current_voxel_index;
		double internal_constant_to_discretize_the_delta_approximation = dt * volume / ( (microenvironment->voxels(current_voxel_index)).volume ) ; // needs a fix 
		
// before the fix on September 28, 2015. Also, switched on this day 
// from Delta function sources/sinks to volumetric 
/*		
		// temp1 = dt*S*T 
		cell_source_sink_solver_temp1 = *secretion_rates; 
		cell_source_sink_solver_temp1 *= *saturation_densities; 
		cell_source_sink_solver_temp1 *= dt; 

		// temp2 = 1 + dt*( S + U )
		cell_source_sink_solver_temp2.assign( (*secretion_rates).size() , 1.0 ); 
		axpy( &(cell_source_sink_solver_temp2) , dt , *secretion_rates );
		axpy( &(cell_source_sink_solver_temp2) , dt , *uptake_rates );
*/

		// temp1 = dt*(V_cell/V_voxel)*S*T 
		cell_source_sink_solver_temp1.assign( (*secretion_rates).size() , 0.0 ); 
		cell_source_sink_solver_temp1 += *secretion_rates; 
		cell_source_sink_solver_temp1 *= *saturation_densities; 
		cell_source_sink_solver_temp1 *= internal_constant_to_discretize_the_delta_approximation; 

		// temp2 = 1 + dt*(V_cell/V_voxel)*( S + U )
		cell_source_sink_solver_temp2.assign( (*secretion_rates).size() , 1.0 ); 
		axpy( &(cell_source_sink_solver_temp2) , internal_constant_to_discretize_the_delta_approximation , *secretion_rates );
		axpy( &(cell_source_sink_solver_temp2) , internal_constant_to_discretize_the_delta_approximation , *uptake_rates );	
}
示例#5
0
void CBOW_NS::update(uint64_t cur, const vector<uint64_t>& context, real alpha) {
    _in.clear();
    for (vector<uint64_t>::const_iterator i = context.begin(); i != context.end(); ++i) {
        _in.push_back(_net.get_input_vec(*i));
    }

    _out.clear();
    _out.push_back(_net.get_output_vec(cur));
    for (unsigned i = 0; i < _neg_sample_cnt; ++i) {
        _out.push_back(_net.get_output_vec(_vocab.sampling(&_seed)));
    }

    memset(_hidden_vec, 0, sizeof(real) * _sz);
    for (vector<const real*>::const_iterator i = _in.begin(); i != _in.end(); ++i) {
        addto(_sz, *i, _hidden_vec);
    }
    scale(_sz, static_cast<real>(1.0) / _in.size(), _hidden_vec);

    memset(_hidden_vec_grad, 0, sizeof(real) * _sz);
    for (size_t i = 0; i != _out.size(); ++i) {
        real sigma = sigmoid(dot(_sz, _hidden_vec, _out[i]));
        int label = (i==0) ? 1 : 0;
        real coef = (label - sigma) * alpha;

        axpy(_sz, coef, _out[i], _hidden_vec_grad);
        axpy(_sz, coef, _hidden_vec, const_cast<real*>(_out[i]));
    }

    scale(_sz, static_cast<real>(1.0) / _in.size(), _hidden_vec_grad);
    for (size_t i = 0; i != _in.size(); ++i) {
        addto(_sz, _hidden_vec_grad, const_cast<real*>(_in[i]));
    }
}
void ElecMinimizer::step(const ElecGradient& dir, double alpha)
{	myassert(dir.eInfo == &eInfo);
	for(int q=eInfo.qStart; q<eInfo.qStop; q++)
	{	if(dir.Y[q]) axpy(alpha, dir.Y[q], eVars.Y[q]);
		if(dir.B[q]) axpy(alpha, dir.B[q], eVars.B[q]);
	}
}
示例#7
0
文件: cg.c 项目: akva2/tma4280
int cg(Matrix A, Vector b, double tolerance)
{
  int i=0, j;
  double rl;
  Vector r = createVector(b->len);
  Vector p = createVector(b->len);
  Vector buffer = createVector(b->len);
  double dotp = 1000;
  double rdr = dotp;
  copyVector(r,b);
  fillVector(b, 0.0);
  rl = sqrt(dotproduct(r,r));
  while (i < b->len && rdr > tolerance*rl) {
    ++i;
    if (i == 1) {
      copyVector(p,r);
      dotp = dotproduct(r,r);
    } else {
      double dotp2 = dotproduct(r,r);
      double beta = dotp2/dotp;
      dotp = dotp2;
      scaleVector(p,beta);
      axpy(p,r,1.0);
    }
    MxV(buffer, p);
    double alpha = dotp/dotproduct(p,buffer);
    axpy(b,p,alpha);
    axpy(r,buffer,-alpha);
    rdr = sqrt(dotproduct(r,r));
  }
  freeVector(r);
  freeVector(p);
  freeVector(buffer);
  return i;
}
void axpy(double alpha, const ElecGradient& x, ElecGradient& y)
{	myassert(x.eInfo == y.eInfo);
	for(int q=x.eInfo->qStart; q<x.eInfo->qStop; q++)
	{	if(x.Y[q]) { if(y.Y[q]) axpy(alpha, x.Y[q], y.Y[q]); else y.Y[q] = alpha*x.Y[q]; }
		if(x.B[q]) { if(y.B[q]) axpy(alpha, x.B[q], y.B[q]); else y.B[q] = alpha*x.B[q]; }
	}
}
示例#9
0
void CBOW_HS::update(uint64_t cur, const vector<uint64_t>& context, real alpha) {
    _in.clear();
    for (vector<uint64_t>::const_iterator i = context.begin(); i != context.end(); ++i) {
        _in.push_back(_net.get_input_vec(*i));
    }

    memset(_hidden_vec, 0, sizeof(real) * _sz);
    for (vector<const real*>::const_iterator i = _in.begin(); i != _in.end(); ++i) {
        addto(_sz, *i, _hidden_vec);
    }
    scale(_sz, static_cast<real>(1.0) / _in.size(), _hidden_vec);

    const vector<char>& code = _huffman_tree.code(cur);
    const vector<uint64_t>& path = _huffman_tree.path(cur);
    memset(_hidden_vec_grad, 0, sizeof(real) * _sz);
    for (size_t i = 0; i != path.size(); ++i) {
        real* out = _net.get_output_vec(path[i]);
        real sigma = sigmoid(dot(_sz, _hidden_vec, out));
        int label = code[i];
        real coef = (label - sigma) * alpha;

        axpy(_sz, coef, out, _hidden_vec_grad);
        axpy(_sz, coef, _hidden_vec, out);
    }

    scale(_sz, static_cast<real>(1.0) / _in.size(), _hidden_vec_grad);
    for (size_t i = 0; i != _in.size(); ++i) {
        addto(_sz, _hidden_vec_grad, const_cast<real*>(_in[i]));
    }
}
示例#10
0
文件: misc.hpp 项目: milthorpe/LibBi
void bi::mean(const UniformPdf<V1>& q, V2 mu) {
  /* pre-condition */
  BI_ASSERT(q.size() == mu.size());

  axpy(0.5, q.lower(), mu, true);
  axpy(0.5, q.upper(), mu);
}
示例#11
0
文件: laplb.c 项目: g-koutsou/CoS-2
/*
 * Solves lapl(u) x = b, for x, given b, using Conjugate Gradient
 */
void
cg(latparams lp, field **x, field **b, link **g)
{
  size_t L = lp.L;
  int max_iter = 100;
  float tol = 1e-9;

  /* Temporary fields needed for CG */
  field **r = new_field(lp);
  field **p = new_field(lp);
  field **Ap = new_field(lp);

  /* Initial residual and p-vector */
  lapl(lp, r, x, g);
  xmy(lp, b, r);
  xeqy(lp, p, r);

  /* Initial r-norm and b-norm */
  float rr = xdotx(lp, r);  
  float bb = xdotx(lp, b);
  double t_lapl = 0;
  int iter = 0;
  for(iter=0; iter<max_iter; iter++) {
    printf(" %6d, res = %+e\n", iter, rr/bb);
    if(sqrt(rr/bb) < tol)
      break;
    double t = stop_watch(0);
    lapl(lp, Ap, p, g);
    t_lapl += stop_watch(t);
    float pAp = xdoty(lp, p, Ap);
    float alpha = rr/pAp;
    axpy(lp, alpha, p, x);
    axpy(lp, -alpha, Ap, r);
    float r1r1 = xdotx(lp, r);
    float beta = r1r1/rr;
    xpay(lp, r, beta, p);
    rr = r1r1;
  }

  /* Recompute residual after convergence */
  lapl(lp, r, x, g);
  xmy(lp, b, r);
  rr = xdotx(lp, r);

  double beta_fp = 50*((double)L*L*L)/(t_lapl/(double)iter)*1e-9;
  double beta_io = 40*((double)L*L*L)/(t_lapl/(double)iter)*1e-9;
  printf(" Converged after %6d iterations, res = %+e\n", iter, rr/bb);  
  printf(" Time in lapl(): %+6.3e sec/call, %4.2e Gflop/s, %4.2e GB/s\n",
	 t_lapl/(double)iter, beta_fp, beta_io);  

  del_field(r);
  del_field(p);
  del_field(Ap);
  return;
}
示例#12
0
void cgsolve(
  const CrsMatrix<AScalarType,Device>  & A ,
  const View<VScalarType*,LayoutRight,Device> & b ,
  const View<VScalarType*,LayoutRight,Device> & x ,
  size_t & iteration ,
  double & normr ,
  double & iter_time ,
  const size_t maximum_iteration = 200 ,
  const double tolerance = std::numeric_limits<VScalarType>::epsilon() )
{
  typedef View<VScalarType*,LayoutRight,Device> vector_type ;

  const size_t count = b.dimension_0();

  vector_type p ( "cg::p" , count );
  vector_type r ( "cg::r" , count );
  vector_type Ap( "cg::Ap", count );

  /* r = b - A * x ; */

  /* p  = x      */ deep_copy( p , x );
  /* Ap = A * p  */ multiply( A , p , Ap );
  /* r  = b - Ap */ waxpby( count , 1.0 , b , -1.0 , Ap , r );
  /* p  = r      */ deep_copy( p , r );

  double old_rdot = dot( count , r );

  normr     = std::sqrt( old_rdot );
  iteration = 0 ;

  Kokkos::Impl::Timer wall_clock ;

  while ( tolerance < normr && iteration < maximum_iteration ) {

    /* pAp_dot = dot( p , Ap = A * p ) */

    /* Ap = A * p  */ multiply( A , p , Ap );

    const double pAp_dot = dot( count , p , Ap );
    const double alpha   = old_rdot / pAp_dot ;

    /* x += alpha * p ;  */ axpy( count,  alpha, p , x );
    /* r -= alpha * Ap ; */ axpy( count, -alpha, Ap, r );

    const double r_dot = dot( count , r );
    const double beta  = r_dot / old_rdot ;

    /* p = r + beta * p ; */ xpby( count , r , beta , p );

    normr = std::sqrt( old_rdot = r_dot );
    ++iteration ;
  }

  iter_time = wall_clock.seconds();
}
示例#13
0
文件: lapl.c 项目: g-koutsou/LAP2015
/*
 * Solves lapl(u) x = b, for x, given b, using Conjugate Gradient
 */
void
cg(size_t L, _Complex float *x, _Complex float *b, _Complex float *u)
{
  int max_iter = 100;
  float tol = 1e-6;

  /* Temporary fields needed for CG */
  _Complex float *r = new_field(L);
  _Complex float *p = new_field(L);
  _Complex float *Ap = new_field(L);

  /* Initial residual and p-vector */
  lapl(L, r, x, u);
  xmy(L, b, r);
  xeqy(L, p, r);

  /* Initial r-norm and b-norm */
  float rr = xdotx(L, r);  
  float bb = xdotx(L, b);
  double t_lapl = 0;
  int iter = 0;
  for(iter=0; iter<max_iter; iter++) {
    printf(" %6d, res = %+e\n", iter, rr/bb);
    if(sqrt(rr/bb) < tol)
      break;
    double t = stop_watch(0);
    lapl(L, Ap, p, u);
    t_lapl += stop_watch(t);
    float pAp = xdoty(L, p, Ap);
    float alpha = rr/pAp;
    axpy(L, alpha, p, x);
    axpy(L, -alpha, Ap, r);
    float r1r1 = xdotx(L, r);
    float beta = r1r1/rr;
    xpay(L, r, beta, p);
    rr = r1r1;
  }

  /* Recompute residual after convergence */
  lapl(L, r, x, u);
  xmy(L, b, r);
  rr = xdotx(L, r);

  double beta_fp = 34*L*L/(t_lapl/(double)iter)*1e-9;
  double beta_io = 32*L*L/(t_lapl/(double)iter)*1e-9;
  printf(" Converged after %6d iterations, res = %+e\n", iter, rr/bb);  
  printf(" Time in lapl(): %+6.3e sec/call, %4.2e Gflop/s, %4.2e GB/s\n",
	 t_lapl/(double)iter, beta_fp, beta_io);  

  free(r);
  free(p);
  free(Ap);
  return;
}
示例#14
0
int GaussJacobiPoisson1D(Vector u, double tol, int maxit)
{
  int it=0, i;
  Vector b = cloneVector(u);
  Vector e = cloneVector(u);
  copyVector(b, u);
  fillVector(u, 0.0);
  double max = tol+1;
  while (max > tol && ++it < maxit) {
    copyVector(e, u);
    collectVector(e);
    copyVector(u, b);
#pragma omp parallel for schedule(static)
    for (i=1;i<e->len-1;++i) {
      u->data[i] += e->data[i-1];
      u->data[i] += e->data[i+1];
      u->data[i] /= (2.0+alpha);
    }
    axpy(e, u, -1.0);
    e->data[0] = e->data[e->len-1] = 0.0;
    max = maxNorm(e);
  }
  freeVector(b);
  freeVector(e);

  return it;
}
示例#15
0
文件: poisson1D.c 项目: akva2/tma4280
int GaussJacobiPoisson1D(Vector u, double tol, int maxit)
{
  int it=0, i;
  double rl;
  double max = tol+1;
  Vector b = createVector(u->len);
  Vector e = createVector(u->len);
  copyVector(b, u);
  fillVector(u, 0.0);
  rl = maxNorm(b);
  while (max > tol*rl && ++it < maxit) {
    copyVector(e, u);
    copyVector(u, b);
#pragma omp parallel for schedule(static)
    for (i=0;i<e->len;++i) {
      if (i > 0)
        u->data[i] += e->data[i-1];
      if (i < e->len-1)
        u->data[i] += e->data[i+1];
      u->data[i] /= (2.0+alpha);
    }
    axpy(e, u, -1.0);
    max = maxNorm(e);
  }
  freeVector(b);
  freeVector(e);

  return it;
}
示例#16
0
/* ************************************************************************* */
VectorValues DoglegOptimizerImpl::ComputeBlend(double delta, const VectorValues& x_u, const VectorValues& x_n, const bool verbose) {

  // See doc/trustregion.lyx or doc/trustregion.pdf

  // Compute inner products
  const double un = dot(x_u, x_n);
  const double uu = dot(x_u, x_u);
  const double nn = dot(x_n, x_n);

  // Compute quadratic formula terms
  const double a = uu - 2.*un + nn;
  const double b = 2. * (un - uu);
  const double c = uu - delta*delta;
  double sqrt_b_m4ac = std::sqrt(b*b - 4*a*c);

  // Compute blending parameter
  double tau1 = (-b + sqrt_b_m4ac) / (2.*a);
  double tau2 = (-b - sqrt_b_m4ac) / (2.*a);

  double tau;
  if(0.0 <= tau1 && tau1 <= 1.0) {
    assert(!(0.0 <= tau2 && tau2 <= 1.0));
    tau = tau1;
  } else {
    assert(0.0 <= tau2 && tau2 <= 1.0);
    tau = tau2;
  }

  // Compute blended point
  if(verbose) cout << "In blend region with fraction " << tau << " of Newton's method point" << endl;
  VectorValues blend = (1. - tau) * x_u;  axpy(tau, x_n, blend);
  return blend;
}
示例#17
0
文件: misc.hpp 项目: milthorpe/LibBi
void bi::marginalise(const ExpGaussianPdf<V1, M1>& p1,
    const ExpGaussianPdf<V2,M2>& p2, const M3 C,
    const ExpGaussianPdf<V4, M4>& q2, ExpGaussianPdf<V5,M5>& p3) {
  /* pre-conditions */
  BI_ASSERT(q2.size() == p2.size());
  BI_ASSERT(p3.size() == p1.size());
  BI_ASSERT(C.size1() == p1.size() && C.size2() == p2.size());

  typename sim_temp_vector<V1>::type z2(p2.size());
  typename sim_temp_matrix<M1>::type K(p1.size(), p2.size());
  typename sim_temp_matrix<M1>::type A1(p2.size(), p2.size());
  typename sim_temp_matrix<M1>::type A2(p2.size(), p2.size());

  /**
   * Compute gain matrix:
   *
   * \f[\mathcal{K} = C_{\mathbf{x}_1,\mathbf{x}_2}\Sigma_2^{-1}\,.\f]
   */
  symm(1.0, p2.prec(), C, 0.0, K, 'R', 'U');

  /**
   * Then result is given by \f$\mathcal{N}(\boldsymbol{\mu}',
   * \Sigma')\f$, where:
   *
   * \f[\boldsymbol{\mu}' = \boldsymbol{\mu}_1 +
   * \mathcal{K}(\boldsymbol{\mu}_3 - \boldsymbol{\mu}_2)\,,\f]
   */
  z2 = q2.mean();
  axpy(-1.0, p2.mean(), z2);
  p3.mean() = p1.mean();
  gemv(1.0, K, z2, 1.0, p3.mean());

  /**
   * and:
   *
   * \f{eqnarray*}
   * \Sigma' &=& \Sigma_1 + \mathcal{K}(\Sigma_3 -
   * \Sigma_2)\mathcal{K}^T \\
   * &=& \Sigma_1 + \mathcal{K}\Sigma_3\mathcal{K}^T -
   * \mathcal{K}\Sigma_2\mathcal{K}^T\,.
   * \f}
   */
  p3.cov() = p1.cov();

  A1 = K;
  trmm(1.0, q2.std(), A1, 'R', 'U', 'T');
  syrk(1.0, A1, 1.0, p3.cov(), 'U');

  A2 = K;
  trmm(1.0, p2.std(), A2, 'R', 'U', 'T');
  syrk(-1.0, A2, 1.0, p3.cov(), 'U');

  /* make sure correct log-variables set */
  p3.setLogs(p2.getLogs());
  p3.init(); // redo precalculations
}
示例#18
0
void BrooksCorey2p::readZones(ParameterDatabase& pd,Petsc::SecondOrderFd& node)
{
    int nZones = pd.i("nZones");
    real dx=0.0,dy=0.0,dz=0.0;
    dx=(pd.r("xRight") - pd.r("xLeft")) / (real(pd.i("nxNodes"))-1);
    if (pd.i("nyNodes") > 1)
        dy=(pd.r("yBack") - pd.r("yFront")) / (real(pd.i("nyNodes"))-1);
    if (pd.i("nzNodes") > 1)
        dz=(pd.r("zTop") - pd.r("zBottom")) / (real(pd.i("nzNodes"))-1);
    real gravity=pd.r("gravity"), // m/d^2
         density=pd.r("rhoW"), // kg / m^3
         viscosity=pd.r("muW"); // kg /m d

    for (int i=node.local_z0; i<node.local_z0+node.local_nzNodes; i++)
        for (int j=node.local_y0; j<node.local_y0+node.local_nyNodes; j++)
            for (int k=node.local_x0; k<node.local_x0+node.local_nxNodes; k++)
            {
                node(i,j,k);
                real x=k*dx,y=j*dy,z=i*dz;
                for (int n=0; n<nZones; n++)
                {
                    if (x >= pd.v("zoneLeft")(n) && x < pd.v("zoneRight")(n) &&
                            y >= pd.v("zoneFront")(n) && y < pd.v("zoneBack")(n) &&
                            z >= pd.v("zoneBottom")(n) && z < pd.v("zoneTop")(n) )
                    {
                        global_psiD(node.center) = pd.v("pdZone")(n);
                        global_lambda(node.center) = pd.v("lambdaZone")(n);
                        global_KWs(node.center) = pd.v("permZone")(n)*gravity*density/viscosity;
                        global_thetaS(node.center) = pd.v("thetaS_Zone")(n);
                        global_thetaR(node.center) = pd.v("thetaR_Zone")(n);
                    }
                }
            }
//     global_thetaSR = global_thetaS - global_thetaR;
    global_thetaSR = global_thetaS;
    axpy(-1.0,global_thetaR,global_thetaSR);
//     std::cout<<global_psiD<<std::endl
//              <<global_lambda<<std::endl
//              <<global_thetaS<<std::endl
//              <<global_KWs<<std::endl;


    psiD.startSetFromGlobal(global_psiD);
    psiD.endSetFromGlobal(global_psiD);
    lambda.startSetFromGlobal(global_lambda);
    lambda.endSetFromGlobal(global_lambda);
    KWs.startSetFromGlobal(global_KWs);
    KWs.endSetFromGlobal(global_KWs);
    thetaS.startSetFromGlobal(global_thetaS);
    thetaS.endSetFromGlobal(global_thetaS);
    thetaR.startSetFromGlobal(global_thetaR);
    thetaR.endSetFromGlobal(global_thetaR);
    thetaSR.startSetFromGlobal(global_thetaSR);
    thetaSR.endSetFromGlobal(global_thetaSR);
}
示例#19
0
int main(int argc, char** argv) {
    size_t pow = read_arg(argc, argv, 1, 16);
    size_t n = 1 << pow;
    size_t size_in_bytes = n * sizeof(double);

    std::cout << "memcopy and daxpy test of size " << n << std::endl;

    double* x = malloc_host<double>(n, 1.5);
    double* y = malloc_host<double>(n, 3.0);

    #ifdef FLUSH_CACHE
    // use dummy fields to avoid cache effects, which make results harder to interpret
    // use 1<<24 to ensure that cache is completely purged for all n
    double* x_ = malloc_host<double>(1<<24, 1.5);
    double* y_ = malloc_host<double>(1<<24, 3.0);
    axpy(1<<24, 2.0, x_, y_);
    #endif

    double start = get_time();
    axpy(n, 2.0, x, y);
    double time_axpy = get_time() - start;

    std::cout << "-------\ntimings\n-------" << std::endl;
    std::cout << "axpy : " << time_axpy << " s" << std::endl;
    std::cout << std::endl;

    // check for errors
    int errors = 0;
    for(int i=0; i<n; ++i) {
        if(std::fabs(6.-y[i])>1e-15) {
            errors++;
        }
    }

    if(errors>0) std::cout << "\n============ FAILED with " << errors << " errors" << std::endl;
    else         std::cout << "\n============ PASSED" << std::endl;

    free(x);
    free(y);

    return 0;
}
示例#20
0
void SkipGram_HS::update(uint64_t cur, const vector<uint64_t>& context, real alpha) {
    real* hidden_vec = _net.get_input_vec(cur);
    memset(_hidden_vec_grad, 0, sizeof(real) * _sz);

    for (vector<uint64_t>::const_iterator it = context.begin(); it != context.end(); ++it) {
        const vector<char>& code = _huffman_tree.code(*it);
        const vector<uint64_t>& path = _huffman_tree.path(*it);
        for (size_t i = 0; i != path.size(); ++i) {
            real* out = _net.get_output_vec(path[i]);
            real sigma = sigmoid(dot(_sz, hidden_vec, out));
            int label = code[i];
            real coef = (label - sigma) * alpha;
        
            axpy(_sz, coef, out, _hidden_vec_grad);
            // not exactly the same as the original
            axpy(_sz, coef, hidden_vec, out);
        }
    }
    addto(_sz, _hidden_vec_grad, hidden_vec);
}
示例#21
0
void SkipGram_NS::update(uint64_t cur, const vector<uint64_t>& context, real alpha) {
    const real* hidden_vec = _net.get_input_vec(cur);
    memset(_hidden_vec_grad, 0, sizeof(real) * _sz);

    for (vector<uint64_t>::const_iterator i = context.begin(); i != context.end(); ++i) {
        _out.clear();
        _out.push_back(_net.get_output_vec(*i));
        for (unsigned j = 0; j < _neg_sample_cnt; ++j) {
            _out.push_back(_net.get_output_vec(_vocab.sampling(&_seed)));
        }
        
        for (size_t i = 0; i != _out.size(); ++i) {
            real sigma = sigmoid(dot(_sz, hidden_vec, _out[i]));
            int label = (i==0) ? 1 : 0;
            real coef = (label - sigma) * alpha;

            axpy(_sz, coef, _out[i], _hidden_vec_grad);
            axpy(_sz, coef, hidden_vec, const_cast<real*>(_out[i]));
        }
    }
    addto(_sz, _hidden_vec_grad, const_cast<real*>(hidden_vec));
}
示例#22
0
文件: misc.hpp 项目: milthorpe/LibBi
void bi::cov(const UniformPdf<V1>& q, M1 Sigma) {
  /* pre-condition */
  BI_ASSERT(Sigma.size1() == q.size());
  BI_ASSERT(Sigma.size2() == q.size());

  temp_host_vector<real>::type diff(q.size());
  diff = q.upper();
  sub_elements(diff, q.lower(), diff);
  sq_elements(diff, diff);

  Sigma.clear();
  axpy(1.0/12.0, diff, diagonal(Sigma));
}
示例#23
0
void RosenbrockDaeDefinition::correctArgument(Vec& correction)
{
    yLast = yDaeDef;
    Flast = Fcurrent;

    updateJac=true;
    updateF=true;
#ifndef USE_BLAS
    yDaeDef-=correction;
#else
    axpy(-1.0,correction,yDaeDef);
#endif
}
int main() {
  double arrayOnStack[10];
  stackArray = arrayOnStack;
  createNewArray();
  createMallocArray();

  setArray(stackArray);
  setArray(newArray - 1);
  setArray(mallocArray - 1);

  printArray(newArray);
  printArray(mallocArray);

  axpy(newArray, stackArray);
  axpy(newArray, mallocArray);

  // delete stackArray;
  stackArray = 0;
  newArray = 0;
  mallocArray = 0;

  return 0;
}
示例#25
0
int main(int argc, char** argv)
{
    size_t pow = read_arg(argc, argv, 1, 16);
    size_t n = 1 << pow;

    std::cout << "memcopy and daxpy test of size " << n << "\n";

    double* x = malloc_host<double>(n, 1.5);
    double* y = malloc_host<double>(n, 3.0);

    // use dummy fields to avoid cache effects, which make results harder to
    // interpret use 1<<24 to ensure that cache is completely purged for all n
    double* x_ = malloc_host<double>(n, 1.5);
    double* y_ = malloc_host<double>(n, 3.0);

    // openmp version:
    auto start = get_time();
    axpy(n, 2.0, x_, y_);
    auto time_axpy_omp = get_time() - start;

    // openacc version:
    start = get_time();
    axpy_gpu(n, 2.0, x, y);
    auto time_axpy_gpu = get_time() - start;

    std::cout << "-------\ntimings\n-------\n";
    std::cout << "axpy (openmp): "  << time_axpy_omp << " s\n";
    std::cout << "axpy (openacc): " << time_axpy_gpu << " s\n";

    // check for errors
    auto errors = 0;
    #pragma omp parallel for reduction(+:errors)
    for (auto i = 0; i < n; ++i) {
        if (std::fabs(6.-y[i]) > 1e-15) {
            ++errors;
        }
    }

    if (errors > 0) {
        std::cout << "\n============ FAILED with " << errors << " errors\n";
    } else {
        std::cout << "\n============ PASSED\n";
    }

    free(x);
    free(y);
    return 0;
}
示例#26
0
文件: misc.hpp 项目: milthorpe/LibBi
void bi::condition(const ExpGaussianPdf<V1, M1>& p1, const ExpGaussianPdf<V2,
    M2>& p2, const M3 C, const V3 x2, ExpGaussianPdf<V4, M4>& p3) {
  /* pre-condition */
  BI_ASSERT(x2.size() == p2.size());
  BI_ASSERT(p3.size() == p1.size());
  BI_ASSERT(C.size1() == p1.size() && C.size2() == p2.size());

  typename sim_temp_vector<V1>::type z2(p2.size());
  typename sim_temp_matrix<M1>::type K(p1.size(), p2.size());

  /**
   * Compute gain matrix:
   *
   * \f[\mathcal{K} = C_{\mathbf{x}_1,\mathbf{x}_2}\Sigma_2^{-1}\,.\f]
   */
  symm(1.0, p2.prec(), C, 0.0, K, 'R', 'U');

  /**
   * Then result is given by \f$\mathcal{N}(\boldsymbol{\mu}',
   * \Sigma')\f$, where:
   *
   * \f[\boldsymbol{\mu}' = \boldsymbol{\mu}_1 + \mathcal{K}(\mathbf{x}_2 -
   * \boldsymbol{\mu}_2)\,,\f]
   */
  z2 = x2;
  log_vector(z2, p2.getLogs());
  axpy(-1.0, p2.mean(), z2);
  p3.mean() = p1.mean();
  gemv(1.0, K, z2, 1.0, p3.mean());

  /**
   * and:
   *
   * \f{eqnarray*}
   * \Sigma' &=& \Sigma_1 - \mathcal{K}C_{\mathbf{x}_1,\mathbf{x}_2}^T \\
   * &=& \Sigma_1 - C_{\mathbf{x}_1,\mathbf{x}_2}\Sigma_2^{-1}
   * C_{\mathbf{x}_1,\mathbf{x}_2}^T\,.\f}
   */
  K = C;
  trsm(1.0, p2.std(), K, 'R', 'U');
  p3.cov() = p1.cov();
  syrk(-1.0, K, 1.0, p3.cov(), 'U');

  /* update log-variables and precalculations */
  p3.setLogs(p1.getLogs());
  p3.init();
}
示例#27
0
clsparseStatus
cldenseDaxpy(cldenseVector *r,
             const clsparseScalar *alpha, const cldenseVector *x,
             const cldenseVector *y,
             const clsparseControl control)
{
    if (!clsparseInitialized)
    {
        return clsparseNotInitialized;
    }

    //check opencl elements
    if (control == nullptr)
    {
        return clsparseInvalidControlObject;
    }

    clsparse::vector<cl_double> pR (control, r->values, r->num_values);
    clsparse::vector<cl_double> pAlpha(control, alpha->value, 1);
    clsparse::vector<cl_double> pX (control, x->values, x->num_values);
    clsparse::vector<cl_double> pY (control, y->values, y->num_values);

    assert(pR.size() == pY.size());
    assert(pR.size() == pX.size());

    cl_ulong size = pR.size();

    if(size == 0) return clsparseSuccess;

    //nothing to do
    if (pAlpha[0] == 0.0)
    {
        auto pRBuff = pR.data()();
        auto pYBuff = pY.data()();

        //if R is different pointer than Y than copy Y to R
        if (pRBuff != pYBuff)
        {
            // deep copy;
            pR = pY;
        }
        return clsparseSuccess;
    }

    return axpy(pR, pAlpha, pX, pY, control);
}
示例#28
0
文件: misc.hpp 项目: milthorpe/LibBi
void bi::distance(const M1 X, const real h, M2 D) {
  /* pre-conditions */
  BI_ASSERT(D.size1() == D.size2());
  BI_ASSERT(D.size1() == X.size1());
  BI_ASSERT(!M2::on_device);

  typedef typename M1::value_type T1;

  FastGaussianKernel K(X.size2(), h);
  typename temp_host_vector<T1>::type d(X.size2());
  int i, j;
  for (j = 0; j < D.size2(); ++j) {
    for (i = 0; i <= j; ++i) {
      d = row(X, i);
      axpy(-1.0, row(X, j), d);
      D(i, j) = K(dot(d));
    }
  }
}
示例#29
0
void GS(Matrix u, double tolerance, int maxit)
{
  int it=0;
  Matrix b = cloneMatrix(u);
  Matrix e = cloneMatrix(u);
  Matrix v = cloneMatrix(u);
  int* sizes, *displ;
  splitVector(u->rows-2, 2*max_threads(), &sizes, &displ);
  copyVector(b->as_vec, u->as_vec);
  fillVector(u->as_vec, 0.0);
  double max = tolerance+1;
  while (max > tolerance && ++it < maxit) {
    copyVector(e->as_vec, u->as_vec);
    copyVector(u->as_vec, b->as_vec);
    for (int color=0;color<2;++color) {
      for (int i=1;i<u->cols-1;++i) {
#pragma omp parallel
        {
          int cnt=displ[get_thread()*2+color]+1;
          for (int j=0;j<sizes[get_thread()*2+color];++j, ++cnt) {
            u->data[i][cnt] += v->data[i][cnt-1];
            u->data[i][cnt] += v->data[i][cnt+1];
            u->data[i][cnt] += v->data[i-1][cnt];
            u->data[i][cnt] += v->data[i+1][cnt];
            u->data[i][cnt] /= 4.0;
            v->data[i][cnt] = u->data[i][cnt];
          }
        }
      }
    }
    axpy(e->as_vec, u->as_vec, -1.0);
    max = sqrt(innerproduct(e->as_vec, e->as_vec));
  }
  printf("number of iterations %i %f\n", it, max);
  freeMatrix(b);
  freeMatrix(e);
  freeMatrix(v);
  free(sizes);
  free(displ);
}
示例#30
0
bool RosenbrockDaeDefinition::numericalJacVec(const Vec& v, Vec& Jv)
{
    bool evalError=false;
    real delFac=1.0e-5;
    value(evalError);//sets Fcurrent
    if (evalError)
        return evalError;
    else
    {
//cek       del = -delFac*v;
        del = v;
        scal(-delFac,del);
        //end cek
        correctArgument(del);//sets Flast to Fcurrent --
        //remember correction is -del = delFac*v
        value(evalError);
        while (evalError)
        {
            std::cerr<<"cutting back on del in numerical jac vec"<<std::endl;
            unCorrect();//sets Flast to Fcurrent
            delFac*=0.1;
//cek           del = -delFac*v;
            del = v;
            scal(-delFac,del);
            //end cek
            correctArgument(del);
            value(evalError);//sets Fcurrent
        }
//cek      Jv = (Fcurrent - Flast)/delFac;
        Jv = Fcurrent;
        axpy(-1.0,Flast,Jv);
        scal(delFac,Jv);
        //end cek
        unCorrect();
    }
    return evalError;
}