npoint_mlpack::ResamplingSplitter::ResamplingSplitter(arma::mat& data, arma::colvec& weights, int num_x_regions, int num_y_regions, int num_z_regions, ResamplingHelper& helper) : resampling_helper_(helper), num_resampling_regions_(num_x_regions * num_y_regions * num_z_regions), data_all_mat_(data.memptr(), data.n_rows, data.n_cols, false), data_all_weights_(weights), data_mats_(num_resampling_regions_), data_weights_(num_resampling_regions_), num_x_partitions_(num_x_regions), num_y_partitions_(num_y_regions), num_z_partitions_(num_z_regions), x_step_(resampling_helper_.x_size() / (double)num_x_partitions_), y_step_(resampling_helper_.y_size() / (double)num_y_partitions_), z_step_(resampling_helper_.z_size() / (double)num_z_partitions_), num_points_(data.n_cols), num_points_per_region_(num_resampling_regions_, 0) { for (size_t i = 0; i < num_resampling_regions_; i++) { data_mats_[i] = new arma::mat; data_weights_[i] = new arma::colvec; } SplitData_(); } // constructor
typename std::enable_if< HasWeightsCheck<T, arma::mat&(T::*)()>::value, size_t>::type LayerWeights(T& layer, arma::mat& weights, size_t offset, arma::mat& /* unused */) { layer.Weights() = arma::mat(weights.memptr() + offset, layer.Weights().n_rows, layer.Weights().n_cols, false, false); return layer.Weights().n_elem; }
typename std::enable_if< HasGradientCheck<T, arma::cube&(T::*)()>::value, size_t>::type LayerGradients(T& layer, arma::mat& gradients, size_t offset, arma::cube& /* unused */) { layer.Gradient() = arma::cube(gradients.memptr() + offset, layer.Weights().n_rows, layer.Weights().n_cols, layer.Weights().n_slices, false, false); return layer.Weights().n_elem; }
typename std::enable_if< HasWeightsCheck<T, arma::mat&(T::*)()>::value, size_t>::type LayerWeights(InitializationRuleType& initializeRule, T& layer, arma::mat& weights, size_t offset, arma::mat& /* output */) { layer.Weights() = arma::mat(weights.memptr() + offset, layer.Weights().n_rows, layer.Weights().n_cols, false, false); initializeRule.Initialize(layer.Weights(), layer.Weights().n_rows, layer.Weights().n_cols); return layer.Weights().n_elem; }
void Convolution2DMethodTest(const arma::mat input, const arma::mat filter, const arma::mat output) { arma::mat convOutput; ConvolutionFunction::Convolution(input, filter, convOutput); // Check the outut dimension. bool b = (convOutput.n_rows == output.n_rows) && (convOutput.n_cols == output.n_cols); BOOST_REQUIRE_EQUAL(b, 1); const double* outputPtr = output.memptr(); const double* convOutputPtr = convOutput.memptr(); for (size_t i = 0; i < output.n_elem; i++, outputPtr++, convOutputPtr++) BOOST_REQUIRE_CLOSE(*outputPtr, *convOutputPtr, 1e-3); }
void CLgemm::dgemm( arma::mat &res, arma::mat const &left, arma::mat const &right, double alpha, double beta, bool transL, bool transR) { timer.tic(); if (transL == false && transR == false) { //Check if res is correctly sized if (res.n_rows != left.n_rows || res.n_cols != right.n_cols) throw std::string("Badly shaped \"res\" in CLgemm::dgemm"); int m = left.n_rows; int n = right.n_cols; int p = left.n_cols; int work = m * n * p; if (work < 35000000) //TODO: A more carefull analysis of this decision should be done. { res = alpha * left * right + beta * res; } else { //The memptr can not be const in cl, however it is expected not to change. cl_double *A_p = const_cast<double *> (left.memptr()); cl_double *B_p = const_cast<double *> (right.memptr()); cl_double *C_p = res.memptr(); //Create CL buffers, pointing to Host memory. cl::Buffer A_cl(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof (*A_p) * left.n_elem, A_p); cl::Buffer B_cl(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof (*B_p) * right.n_elem, B_p); cl::Buffer C_cl(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof (*C_p) * res.n_elem, C_p); const size_t amd_M = left.n_rows; const size_t amd_N = right.n_cols; const size_t amd_P = right.n_rows; if (left.n_cols != right.n_rows) throw std::string("CLgemm: left and right matrix dimensions not compatible."); const clAmdBlasOrder amd_order = clAmdBlasColumnMajor; const clAmdBlasTranspose amd_transAll = clAmdBlasNoTrans; const cl_double amd_alpha = alpha; const cl_double amd_beta = beta; const size_t amd_lda = amd_M; const size_t amd_ldb = amd_P; const size_t amd_ldc = amd_M; cl::Event e; clAmdBlasDgemm(amd_order, amd_transAll, amd_transAll, amd_M, amd_N, amd_P, amd_alpha, A_cl(), amd_lda, B_cl(), amd_ldb, amd_beta, C_cl(), amd_ldc, 1, &queue(), 0, NULL, &e()); queue.enqueueReadBuffer(C_cl, true, 0, sizeof (*C_p) * res.n_elem, C_p); //BLOCKING //clReleaseMemObject(); } } else if (transL == true && transR == false) res = alpha * trans(left) * right + beta * res; else if (transL == false && transR == true) res = alpha * left * trans(right) + beta * res; else if (transL == true && transR == true) res = alpha * trans(left) * trans(right) + beta * res; tot_time += timer.toc(); return; }
// [[Rcpp::export]] arma::mat sgd(arma::mat& coords, arma::ivec& targets_i, // vary randomly arma::ivec& sources_j, // ordered arma::ivec& ps, // N+1 length vector of indices to start of each row j in vector is arma::vec& weights, // w{ij} const double& gamma, const double& rho, const arma::uword& n_samples, const int& M, const double& alpha, const Rcpp::Nullable<Rcpp::NumericVector> momentum, const bool& useDegree, const Rcpp::Nullable<Rcpp::NumericVector> seed, const Rcpp::Nullable<Rcpp::NumericVector> threads, const bool verbose) { #ifdef _OPENMP checkCRAN(threads); #endif const dimidxtype D = coords.n_rows; const vertexidxtype N = coords.n_cols; const edgeidxtype E = targets_i.n_elem; Visualizer* v; if (momentum.isNull()) v = new Visualizer( sources_j.memptr(), targets_i.memptr(), coords.memptr(), D, N, E, rho, n_samples, M, alpha, gamma); else { float moment = NumericVector(momentum)[0]; if (moment < 0) throw Rcpp::exception("Momentum cannot be negative."); if (moment > 0.95) throw Rcpp::exception("Bad things happen when momentum is > 0.95."); v = new MomentumVisualizer( sources_j.memptr(), targets_i.memptr(), coords.memptr(), D, N, E, rho, n_samples, moment, M, alpha, gamma); } distancetype* negweights = new distancetype[N]; std::fill(negweights, negweights + N, 0); if (useDegree) { std::for_each(targets_i.begin(), targets_i.end(), [&negweights](const sword& e) {negweights[e]++;}); } else { for (vertexidxtype p = 0; p < N; ++p) { for (edgeidxtype e = ps[p]; e != ps[p + 1]; ++e) { negweights[p] += weights[e]; } } } std::for_each(negweights, negweights + N, [](distancetype& weight) {weight = pow(weight, 0.75);}); v -> initAlias(weights.memptr(), negweights, seed); delete[] negweights; const uword batchSize = BATCHSIZE; #ifdef _OPENMP const unsigned int ts = omp_get_max_threads(); #else const unsigned int ts = 2; #endif Progress progress(max((uword) ts, n_samples / BATCHSIZE), verbose); #ifdef _OPENMP #pragma omp parallel for #endif for (unsigned int t = 0; t < ts; ++t) { v->thread(progress, batchSize); } delete v; return coords; }
void Distribution::generate_distribution3D(arma::mat& dist, int n_p, double bin_edge, int N, bool rerun) { int x_i, y_i, z_i, r_i, n, n_tot; double x, y, z, r, dr, dr_R, stretch, mean_r; int dim = 3; using namespace arma; ucube distribution(N, N, N); uvec radial_dist = zeros<uvec > (N); mat tot_dist; vec R = arma::sqrt(sum(dist % dist, 1)); n = dist.n_rows; #ifdef MPI_ON ivec n_list = zeros<ivec > (n_nodes); MPI_Allgather(&n, 1, MPI_INT, n_list.memptr(), 1, MPI_INT, MPI_COMM_WORLD); n_tot = accu(n_list); #else n_tot = n; #endif detect_deadlock(dist, n_p, dim, n); //On fly calculation during QMC initialized by a binedge 0. if (bin_edge == 0) { stretch = 3; //calculate the bin edge and size based on the mean radius if (!locked) { mean_r = ErrorEstimator::combine_mean(mean(R), n, n_tot); } else { mean_r = 0; int k = 0; for (int i = 0; i < n; i++) { if (is_deadlocked(dist, dim, i)) continue; mean_r += R(i); k++; } mean_r = ErrorEstimator::combine_mean(mean_r / k, n, n_tot); } bin_edge = stretch*mean_r; // if (node==0) cout << "pre " << mean_r << endl; } dr = 2 * bin_edge / (N - 1); for (int ni = 0; ni < n; ni++) { if (is_deadlocked(dist, dim, ni)) continue; x = dist(ni, 0); y = dist(ni, 1); z = dist(ni, 2); x_i = (N / 2 + int(x / dr))*(x > 0) + (N / 2 + int(x / dr) - 1)*(x <= 0); y_i = (N / 2 + int(y / dr))*(y > 0) + (N / 2 + int(y / dr) - 1)*(y <= 0); z_i = (N / 2 + int(z / dr))*(z > 0) + (N / 2 + int(z / dr) - 1)*(z <= 0); if (x_i < 0 || x_i >= N) { continue; } else if (y_i < 0 || y_i >= N) { continue; } else if (z_i < 0 || z_i >= N) { continue; } distribution(x_i, y_i, z_i)++; } dr_R = dr / 2; for (int ni = 0; ni < n; ni++) { if (is_deadlocked(dist, dim, ni)) continue; r = R(ni); r_i = r / dr_R; if (r_i >= N) { continue; } radial_dist(r_i)++; } #ifdef MPI_ON if (node == 0) { MPI_Reduce(MPI_IN_PLACE, distribution.memptr(), N * N*N, MPI_UNSIGNED, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(MPI_IN_PLACE, radial_dist.memptr(), N, MPI_UNSIGNED, MPI_SUM, 0, MPI_COMM_WORLD); if (!rerun) { ivec displs = zeros<ivec > (n_nodes); double displ_sum = 0; for (int j = 0; j < n_nodes; j++) { displs(j) = displ_sum; displ_sum += n_list(j); } tot_dist = zeros<mat > (n_tot, dim); for (int i = 0; i < dim; i++) { MPI_Gatherv(dist.colptr(i), n, MPI_DOUBLE, tot_dist.colptr(i), n_list.memptr(), displs.memptr(), MPI_DOUBLE, 0, MPI_COMM_WORLD); } // cout << as_scalar(mean(arma::sqrt(sum(tot_dist % tot_dist, 1)))) << endl; displs.reset(); } } else { MPI_Reduce(distribution.memptr(), NULL, N * N*N, MPI_UNSIGNED, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(radial_dist.memptr(), NULL, N, MPI_UNSIGNED, MPI_SUM, 0, MPI_COMM_WORLD); if (!rerun) { for (int i = 0; i < dim; i++) { MPI_Gatherv(dist.colptr(i), n, MPI_DOUBLE, NULL, NULL, NULL, NULL, 0, MPI_COMM_WORLD); } } radial_dist.reset(); distribution.reset(); } #else tot_dist(dist.memptr(), n, dim, false, true); #endif n_list.reset(); R.reset(); if (node == 0) { if (!silent) cout << "3D Distribution calculated using " << n_tot << " samples." << endl; cube normalized_dist = conv_to< cube>::from(distribution); vec normalized_radd = conv_to< vec>::from(radial_dist); vec radial_axis = linspace(0, bin_edge, N); normalized_dist *= n_p / (accu(normalized_dist) * dr * dr * dr); //project out a symmetric axis and normalize (skip singularity) // normalized_radd(span(1, N - 1)) /= radial_axis(span(1, N - 1)); // // normalized_radd(span(1, N - 1)) /= radial_axis(span(1, N - 1)); // // normalized_radd(0) = normalized_radd(1); // cout << normalized_radd.max()/(2*datum::pi*accu(normalized_radd)*dr_R) << endl; normalized_radd *= n_p / (accu(normalized_radd) * dr_R); s << path << "walker_positions/dist_out_" << name + suffix << "_edge" << bin_edge << ".arma3D"; normalized_dist.save(s.str()); normalized_dist.reset(); s.str(std::string()); if (!rerun) { s << path << "walker_positions/dist_rawdata_" << name + suffix << ".arma"; tot_dist.save(s.str()); tot_dist.reset(); s.str(std::string()); } s << path << "walker_positions/radial_out_" << name + suffix << "_edge" << bin_edge << ".arma"; normalized_radd.save(s.str()); normalized_radd.reset(); s.str(std::string()); radial_axis.reset(); radial_dist.reset(); distribution.reset(); } }