static void apply(Alpha a, const V1 &x, const V2 &y, Beta b, V3 &z) { if (!math::is_zero(b)) z.array() = a * x.array() * y.array() + b * z.array(); else z.array() = a * x.array() * y.array(); }
void foo( D *p ) { for( int i = 0; i < 10; ++i ) { V1 *q = p; // assignment to 'q' should be brought // outside the loop q->bar( p->V1::v1 + p->V2::v1, p->V1::v2 + p->V2::v2 ); } }
void bi::renormalise(V1 lws) { thrust::replace_if(lws.begin(), lws.end(), is_not_finite_functor<real>(), bi::log(0.0)); real mx = max_reduce(lws); if (is_finite(mx)) { sub_elements(lws, mx, lws); } }
void bi::cov(const M1 X, const V1 mu, M2 Sigma) { /* pre-conditions */ BI_ASSERT(X.size2() == mu.size()); BI_ASSERT(Sigma.size1() == mu.size() && Sigma.size2() == mu.size()); const int N = X.size1(); typename sim_temp_matrix<M2>::type Y(X.size1(), X.size2()); Y = X; sub_rows(Y, mu); syrk(1.0/(N - 1.0), Y, 0.0, Sigma, 'U', 'T'); }
ExecStatus Int<V0,V1,Idx,Val>::post(Home home, IntSharedArray& c, V0 x0, V1 x1) { if (x0.assigned()) { GECODE_ME_CHECK(x1.eq(home,c[x0.val()])); } else if (x1.assigned()) { GECODE_ES_CHECK(assigned_val(home,c,x0,x1)); } else { (void) new (home) Int<V0,V1,Idx,Val>(home,c,x0,x1); } return ES_OK; }
void MiscCheckTheParallelEnvironment(const V1& vec1, const V2& vec2) { const BaseEnvironment& env = vec1.env(); if (env.numSubEnvironments() == (unsigned int) env.fullComm().NumProc()) { UQ_FATAL_TEST_MACRO(env.subRank() != 0, env.worldRank(), "MiscCheckTheParallelEnvironment<V1,V2>()", "there should exist only one processor per sub environment"); UQ_FATAL_TEST_MACRO((vec1.numOfProcsForStorage() != 1) || (vec2.numOfProcsForStorage() != 1), env.worldRank(), "MiscCheckTheParallelEnvironment<V1,V2>()", "only 1 processor (per sub environment) should be necessary for the storage of a parameter vector"); } else if (env.numSubEnvironments() < (unsigned int) env.fullComm().NumProc()) { UQ_FATAL_TEST_MACRO(env.fullComm().NumProc()%env.numSubEnvironments() != 0, env.worldRank(), "MiscCheckTheParallelEnvironment<V1,V2>()", "total number of processors should be a multiple of the number of sub environments"); unsigned int numProcsPerSubEnvironment = env.fullComm().NumProc()/env.numSubEnvironments(); UQ_FATAL_TEST_MACRO(env.subComm().NumProc() != (int) numProcsPerSubEnvironment, env.worldRank(), "MiscCheckTheParallelEnvironment<V1,V2>()", "inconsistent number of processors per sub environment"); if ((vec1.numOfProcsForStorage() == 1) && (vec2.numOfProcsForStorage() == 1)) { // Ok } else if ((vec1.numOfProcsForStorage() == numProcsPerSubEnvironment) && (vec2.numOfProcsForStorage() == numProcsPerSubEnvironment)) { UQ_FATAL_TEST_MACRO(true, env.worldRank(), "MiscCheckTheParallelEnvironment<V1,V2>()", "parallel vectors are not supported yet"); } else { UQ_FATAL_TEST_MACRO(true, env.worldRank(), "MiscCheckTheParallelEnvironment<V1,V2>()", "number of processors required for a vector storage should be equal to either 1 or to the number of processors in the sub environment"); } } else { UQ_FATAL_TEST_MACRO(true, env.worldRank(), "MiscCheckTheParallelEnvironment<V1,V2>()", "number of processors per sub environment is less than 1!"); } return; }
void bi::var(const M1 X, const V1 mu, V2 sigma) { /* pre-conditions */ BI_ASSERT(X.size2() == mu.size()); BI_ASSERT(sigma.size() == mu.size()); const int N = X.size1(); typename sim_temp_matrix<M1>::type Z(X.size2(), X.size1()); Z = X; sub_rows(Z, mu); dot_columns(Z, sigma); scal(1.0/(N - 1.0), sigma); }
void bi::cross(const M1 X, const M2 Y, const V1 muX, const V2 muY, M3 SigmaXY) { /* pre-conditions */ BI_ASSERT(X.size2() == muX.size()); BI_ASSERT(Y.size2() == muY.size()); BI_ASSERT(X.size1() == Y.size1()); BI_ASSERT(SigmaXY.size1() == muX.size() && SigmaXY.size2() == muY.size()); const int N = X.size1(); gemm(1.0/(N - 1.0), X, Y, 0.0, SigmaXY, 'T', 'N'); ger(-N/(N - 1.0), muX, muY, SigmaXY); }
void bi::inverse_gamma_log_densities(const M1 Z, const T1 alpha, const T1 beta, V1 p, const bool clear) { /* pre-condition */ BI_ASSERT(Z.size1() == p.size()); op_elements(vec(Z), vec(Z), inverse_gamma_log_density_functor<T1>(alpha, beta)); if (clear) { sum_columns(Z, p); } else { typename sim_temp_vector<V1>::type p1(p.size()); sum_columns(Z, p1); add_elements(p, p1, p); } }
void bi::MetropolisResamplerHost::ancestors(Random& rng, const V1 lws, V2 as, int B) { const int P1 = lws.size(); // number of particles const int P2 = as.size(); // number of ancestors to draw #pragma omp parallel { real alpha, lw1, lw2; int k, p1, p2, p; #pragma omp for for (p = 0; p < P2; ++p) { p1 = p; lw1 = lws(p); for (k = 0; k < B; ++k) { p2 = rng.uniformInt(0, P1 - 1); lw2 = lws(p2); alpha = rng.uniform<real>(); if (bi::log(alpha) < lw2 - lw1) { /* accept */ p1 = p2; lw1 = lw2; } } /* write result */ as(p) = p1; } } }
result_type apply_incompatible( const V1& src, const V2& dst) const { //copy_pixels( color_converted_view<typename V2::value_type>( src, _cc),dst); tbb::parallel_for( tbb::blocked_range<std::size_t>( 0, src.height()), detail::make_tbb_copy_pixels_fun( color_converted_view<typename V2::value_type>( src, _cc), dst), GIL_TBB_ALGORITHMS_DEFAULT_PARTITIONER()); }
void bi::gaussian_log_densities(const M1 Z, const T1 logZ, V1 p, const bool clear) { /* pre-condition */ BI_ASSERT(Z.size1() == p.size()); typedef typename V1::value_type T2; if (clear) { dot_rows(Z, p); op_elements(p, p, gaussian_log_density_functor<T2>(logZ)); } else { typename sim_temp_vector<V1>::type p1(p.size()); dot_rows(Z, p1); op_elements(p1, p, p, gaussian_log_density_update_functor<T2>(logZ)); } }
typename viennacl::enable_if< viennacl::is_any_dense_nonstructured_matrix<M1>::value && viennacl::is_any_dense_nonstructured_vector<V1>::value >::type inplace_solve(const matrix_expression< const M1, const M1, op_trans> & proxy, V1 & vec, SOLVERTAG) { assert( (proxy.lhs().size1() == vec.size()) && bool("Size check failed in inplace_solve(): size1(A) != size(b)")); assert( (proxy.lhs().size2() == vec.size()) && bool("Size check failed in inplace_solve(): size2(A) != size(b)")); switch (viennacl::traits::handle(proxy.lhs()).get_active_handle_id()) { case viennacl::MAIN_MEMORY: viennacl::linalg::host_based::inplace_solve(proxy, vec, SOLVERTAG()); break; #ifdef VIENNACL_WITH_OPENCL case viennacl::OPENCL_MEMORY: viennacl::linalg::opencl::inplace_solve(proxy, vec, SOLVERTAG()); break; #endif #ifdef VIENNACL_WITH_CUDA case viennacl::CUDA_MEMORY: viennacl::linalg::cuda::inplace_solve(proxy, vec, SOLVERTAG()); break; #endif default: throw "not implemented"; } }
void rot (const T1 &t1, V1 &v1, const T2 &t2, V2 &v2) { typedef typename promote_traits<typename V1::value_type, typename V2::value_type>::promote_type promote_type; vector<promote_type> vt (t1 * v1 + t2 * v2); v2.assign (- t2 * v1 + t1 * v2); v1.assign (vt); }
double inner_prod( const V1& v1, const V2& v2 ) { double inner_res; inner_res = 0; for (int i=0; i<v1.size(); ++i) { inner_res += v1[i]*v2[i] ; } return inner_res; }
void bi::MultinomialResamplerHost::ancestors(Random& rng, const V1 lws, V2 as, MultinomialPrecompute<ON_HOST>& pre) throw (ParticleFilterDegeneratedException) { typedef typename V1::value_type T1; const int P = as.size(); const int lwsSize = lws.size(); T1 lW; /* weights */ if (pre.W > 0) { lW = bi::log(pre.W); #pragma omp parallel { int Q = P/bi_omp_max_threads; int start = bi_omp_tid*Q + bi::min(bi_omp_tid, P % bi_omp_max_threads); // min() handles leftovers if (bi_omp_tid < P % bi_omp_max_threads) { ++Q; // pick up a leftover } int i, j = lwsSize; T1 lMax = 0.0, lu; for (i = Q; i > 0; --i) { lMax += bi::log(rng.uniform<T1>())/i; lu = lW + lMax; while (j > 0 && lu < bi::log(pre.Ws(j - 1))) { --j; } if (pre.sort) { as(start + i - 1) = pre.ps(j); } else { as(start + i - 1) = j; } } } } else { throw ParticleFilterDegeneratedException(); } /* post-condition */ BI_ASSERT(max_reduce(as) < lws.size()); }
void bi::mean(const M1 X, V1 mu) { /* pre-condition */ BI_ASSERT(X.size2() == mu.size()); const int N = X.size1(); typename sim_temp_vector<V1>::type w(N); set_elements(w, 1.0); gemv(1.0/N, X, w, 0.0, mu, 'T'); }
inline void bi::mean(const GammaPdf& q, V1 mu) { /* pre-condition */ BI_ASSERT(mu.size() == q.size()); real alpha = q.shape(); real beta = q.scale(); set_elements(mu, alpha*beta); }
void bi::mean(const M1 X, const V1 w, V2 mu) { /* pre-conditions */ BI_ASSERT(X.size2() == mu.size()); BI_ASSERT(X.size1() == w.size()); typedef typename V1::value_type T; T Wt = sum_reduce(w); gemv(1.0/Wt, X, w, 0.0, mu, 'T'); }
inline void bi::mean(const InverseGammaPdf& q, V1 mu) { /* pre-condition */ BI_ASSERT(mu.size() == q.size()); BI_ASSERT(q.shape() > 1.0); real alpha = q.shape(); real beta = q.scale(); set_elements(mu, alpha*std::pow(beta, 2)); }
void bi::cov(const M1 X, const V1 w, const V2 mu, M2 Sigma) { /* pre-conditions */ BI_ASSERT(X.size2() == mu.size()); BI_ASSERT(X.size1() == w.size()); BI_ASSERT(Sigma.size1() == mu.size() && Sigma.size2() == mu.size()); typedef typename V1::value_type T; typename sim_temp_matrix<M2>::type Y(X.size1(), X.size2()); typename sim_temp_matrix<M2>::type Z(X.size1(), X.size2()); typename sim_temp_vector<V2>::type v(w.size()); T Wt = sum_reduce(w); Y = X; sub_rows(Y, mu); sqrt_elements(w, v); gdmm(1.0, v, Y, 0.0, Z); syrk(1.0/Wt, Z, 0.0, Sigma, 'U', 'T'); // alternative weight: 1.0/(Wt - W2t/Wt) }
void bi::hist(const V1 x, const V2 w, V3 c, V4 h) { /* pre-condition */ BI_ASSERT(x.size() == w.size()); BI_ASSERT(c.size() == h.size()); BI_ASSERT(!V3::on_device); BI_ASSERT(!V4::on_device); typedef typename V1::value_type T1; typedef typename V2::value_type T2; const int P = x.size(); const int B = c.size(); T1 mx, mn; int i, j; typename temp_host_vector<T1>::type xSorted(P); typename temp_host_vector<T2>::type wSorted(P); xSorted = x; wSorted = w; bi::sort_by_key(xSorted, wSorted); mn = xSorted[0]; mx = xSorted[xSorted.size() - 1]; /* compute bin right edges */ for (j = 0; j < B; ++j) { c[j] = mn + (j + 1)*(mx - mn)/B; } /* compute bin heights */ h.clear(); for (i = 0, j = 0; i < P; ++i) { if (xSorted[i] >= c[j] && j < B - 1) { ++j; } h[j] += wSorted[i]; } /* compute bin centres */ for (j = B - 1; j > 0; --j) { c[j] = 0.5*(c[j - 1] + c[j]); } c[0] = 0.5*(mn + c[0]); }
void bi::var(const M1 X, const V1 w, const V2 mu, V3 sigma) { /* pre-conditions */ BI_ASSERT(X.size2() == mu.size()); BI_ASSERT(X.size1() == w.size()); BI_ASSERT(sigma.size() == mu.size()); typedef typename V1::value_type T1; typename sim_temp_matrix<M1>::type Z(X.size1(), X.size2()); typename sim_temp_matrix<M1>::type Y(X.size1(), X.size2()); typename sim_temp_vector<V2>::type v(w.size()); T1 Wt = sum_reduce(w); Z = X; sub_rows(Z, mu); sqrt_elements(w, v); gdmm(1.0, v, Z, 0.0, Y); dot_columns(Y, sigma); divscal_elements(sigma, Wt, sigma); // alternative weight: 1.0/(Wt - W2t/Wt) }
void checkDenseVectorAssignment(V1& v1, V2 const& v2){ BOOST_REQUIRE_EQUAL(v1.size(),v2.size()); //indexed access for(std::size_t i = 0; i != v2.size(); ++i){ v1(i) = 0; BOOST_CHECK_EQUAL(v1(i),0); v1(i) = v2(i); BOOST_CHECK_EQUAL(v1(i),v2(i)); v1(i) = 0; BOOST_CHECK_EQUAL(v1(i),0); } //iterator access rows typedef typename V1::iterator Iter; BOOST_REQUIRE_EQUAL(v1.end()-v1.begin(), v1.size()); std::size_t k = 0; for(Iter it = v1.begin(); it != v1.end(); ++it,++k){ BOOST_CHECK_EQUAL(k,it.index()); *it = 0; BOOST_CHECK_EQUAL(v1(k),0); *it = v2(k); BOOST_CHECK_EQUAL(v1(k),v2(k)); *it = 0; BOOST_CHECK_EQUAL(v1(k),0); } //test that the actual iterated length equals the number of elements BOOST_CHECK_EQUAL(k, v2.size()); }
ExecStatus Int<V0,V1,Idx,Val>::assigned_val(Space& home, IntSharedArray& c, V0 x0, V1 x1) { Region r(home); int* v = r.alloc<int>(x0.size()); int n = 0; for (ViewValues<V0> i(x0); i(); ++i) if (c[i.val()] != x1.val()) v[n++]=i.val(); Iter::Values::Array iv(v,n); GECODE_ME_CHECK(x0.minus_v(home,iv,false)); return ES_OK; }
template <typename V, typename V1> void addVectors(V &a, const V1 &b) { #ifdef FLOAT_REALS cblas_saxpy #else cblas_daxpy #endif (a.size(), 1, b.data(), 1, a.data(), 1); #ifdef FLOAT_REALS cblas_sscal #else cblas_dscal #endif (a.size(), 0.5, a.data(), 1); }
forceinline ExecStatus post_int(Home home, IntSharedArray& c, V0 x0, V1 x1) { assert(c.size() > 0); GECODE_ME_CHECK(x0.gq(home,0)); GECODE_ME_CHECK(x0.le(home,c.size())); Support::IntType idx_type = Support::s_type(c.size()); int min = c[0]; int max = c[0]; for (int i=1; i<c.size(); i++) { min = std::min(c[i],min); max = std::max(c[i],max); } GECODE_ME_CHECK(x1.gq(home,min)); GECODE_ME_CHECK(x1.lq(home,max)); Support::IntType val_type = std::max(Support::s_type(min),Support::s_type(max)); switch (idx_type) { case Support::IT_CHAR: switch (val_type) { case Support::IT_CHAR: return Int<V0,V1,signed char,signed char>::post(home,c,x0,x1); case Support::IT_SHRT: return Int<V0,V1,signed char,signed short int>::post(home,c,x0,x1); default: break; } break; case Support::IT_SHRT: switch (val_type) { case Support::IT_CHAR: case Support::IT_SHRT: return Int<V0,V1,signed short int,signed short int>::post(home,c,x0,x1); default: break; } break; default: break; } return Int<V0,V1,signed int,signed int>::post(home,c,x0,x1); }
template <class V1, class V2> double dot_impl( const V1 &v1, const V2 &v2) { assert(v1.size() == v2.size()); if(v1.stride() > 0 && v2.stride() > 0){ return ddot(v1.size(), v1.data(), v1.stride(), v2.data(), v2.stride()); }else{ double ans = 0; for(int i = 0; i < v1.size(); ++i){ ans += v1[i] * v2[i]; } return ans; } }
void bi::cross(const M1 X, const M2 Y, const V1 w, const V2 muX, const V3 muY, M3 SigmaXY) { /* pre-conditions */ BI_ASSERT(X.size2() == muX.size()); BI_ASSERT(Y.size2() == muY.size()); BI_ASSERT(X.size1() == Y.size1()); BI_ASSERT(X.size1() == w.size()); BI_ASSERT(Y.size1() == w.size()); BI_ASSERT(SigmaXY.size1() == muX.size() && SigmaXY.size2() == muY.size()); typedef typename V1::value_type T; typename sim_temp_matrix<M3>::type Z(X.size1(), X.size2()); T Wt = sum_reduce(w); T Wt2 = std::pow(Wt, 2); T W2t = sumsq_reduce(w); gdmm(1.0, w, X, 0.0, Z); gemm(1.0/Wt, Z, Y, 0.0, SigmaXY, 'T', 'N'); ger(-1.0, muX, muY, SigmaXY); matrix_scal(1.0/(1.0 - W2t/Wt2), SigmaXY); }
void swap (V1 &v1, V2 &v2) { v1.swap (v2); }