void operator()() { uint cnt=0; if (sv_index_.empty()) { for (uint i=0; i!=train_.size(); ++i) { if (cnt++%n_th_==th_no_) { boost::timer tm; matrix_[i] = kernel_(train_[i].second, data_.second); elapsed_ += tm.elapsed(); } } } else { for (uint i=0; i!=sv_index_.size(); ++i) { if (cnt++%n_th_==th_no_) { boost::timer tm; uint x = sv_index_[i]; matrix_[x] = kernel_(train_[x].second, data_.second); elapsed_ += tm.elapsed(); } } } if (data_self_) { if (cnt++%n_th_==th_no_) { boost::timer tm; *data_self_=kernel_(data_.second, data_.second); elapsed_ += tm.elapsed(); } cnt++; } }
void operator()() { uint cnt=0; if (sv_index_.empty()) { for (uint i=0; i!=train_.size(); ++i) { if (cnt++%n_th_==th_no_) { boost::timer tm; vals_[num_++]=kernel_(train_[i].second, data_.second); elapsed_ += tm.elapsed(); } } } else { for (uint i=0; i!=sv_index_.size(); ++i) { if (cnt++%n_th_==th_no_) { boost::timer tm; vals_[num_++]=kernel_(train_[sv_index_[i]].second, data_.second); elapsed_ += tm.elapsed(); } } } if (norm_test_) { if (cnt++%n_th_==th_no_) { boost::timer tm; vals_[num_++]=kernel_(data_.second, data_.second); elapsed_ += tm.elapsed(); } } }
// Note the _ // Macro that is called > API function that is never seen void OCCA_RFUNC occaKernelRun_(occaKernel kernel, occaArgumentList list) { occa::kernel kernel_((occa::kernel_v*) kernel); occaArgumentList_t &list_ = *((occaArgumentList_t*) list); kernel_.clearArgumentList(); for(int i = 0; i < list_.argc; ++i) { occaType_t &arg = *list_.argv[i]; if(arg.type == OCCA_TYPE_MEMORY) { occa::memory memory_((occa::memory_v*) arg.value.data.void_); kernel_.addArgument(i, occa::kernelArg(memory_)); } else if(arg.type == OCCA_TYPE_PTR) { // Calls UVA occa::memory memory_((void*) arg.value.data.void_); kernel_.addArgument(i, occa::kernelArg(memory_)); } else { kernel_.addArgument(i, occa::kernelArg(arg.value)); delete list_.argv[i]; } } kernel_.runFromArguments(); }
int main() { int n=10; variables_.n = n; // Creating Neighbor Connectivity for (int i=0; i<n; i++) { variables_.alive[i] = 0; variables_.numneighbors[i] = 2; // This will give the neighbor IDs in the C array index style variables_.neighbors[i][0] = i-1; variables_.neighbors[i][1] = i+1; } // Creating Periodic Boundary variables_.neighbors[0][0] = n-1; variables_.neighbors[n-1][1] = 0; // Seeding single alive cell variables_.alive[0]=1; //std::cout << "cpp: n=" << n << std::endl; for (int timestep=0; timestep<10; timestep++) { std::cout << "**** timestep = " << timestep << std::endl; kernel_(); for (int i=0; i<n; i++) { std::cout << "i=" << i << " alive =" << variables_.alive[i] << "\n"; } } return 0; }
void OCCA_RFUNC occaKernelSetWorkingDims(occaKernel kernel, int dims, occaDim items, occaDim groups) { occa::kernel kernel_((occa::kernel_v*) kernel); kernel_.setWorkingDims(dims, occa::dim(items.x, items.y, items.z), occa::dim(groups.x, groups.y, groups.z)); }
void OCCA_RFUNC occaKernelSetAllWorkingDims(occaKernel kernel, int dims, uintptr_t itemsX, uintptr_t itemsY, uintptr_t itemsZ, uintptr_t groupsX, uintptr_t groupsY, uintptr_t groupsZ) { occa::kernel kernel_((occa::kernel_v*) kernel); kernel_.setWorkingDims(dims, occa::dim(itemsX, itemsY, itemsZ), occa::dim(groupsX, groupsY, groupsZ)); }
Foam::scalar Foam::highestMomentReconstruction::targetFunction ( scalar sigma, const univariateMomentSet& moments, univariateMomentSet& momentsStar ) { kernel_().momentsToMomentsStar(sigma, moments, momentsStar); momentInverter_().invert(momentsStar); momentsStar.update ( momentInverter_().weights(), momentInverter_().abscissae() ); scalar highestMoment = moments.last(); return (highestMoment - kernel_().m2N(sigma, momentsStar))/highestMoment; }
void operator()() { if (sv_index_.empty()) { for (uint i=0; i!=train_.size(); ++i) { if (i%n_th_==th_no_) { boost::timer tm; diag_[i]=kernel_(train_[i].second, train_[i].second); elapsed_ += tm.elapsed(); } } } else { for (uint i=0; i!=sv_index_.size(); ++i) { if (i%n_th_==th_no_) { uint x = sv_index_[i]; boost::timer tm; diag_[x] = kernel_(train_[x].second, train_[x].second); elapsed_ += tm.elapsed(); } } } }
void operator()() { uint cnt=0; uint n = train_.size(); for (uint i=0; i!=n; ++i) { for (uint j=i; j!=n; ++j) { if (cnt++%n_th_==th_no_) { boost::timer tm; vals_[num_++]=kernel_(train_[i].second, train_[j].second); elapsed_ += tm.elapsed(); } } } }
void operator()() { uint cnt=0; uint n = train_.size(); for (uint i=0; i!=n; ++i) { for (uint j=i; j!=n; ++j) { if (cnt++%n_th_==th_no_) { boost::timer tm; matrix_(i,j)=kernel_(train_[i].second, train_[j].second); if (i!=j) matrix_(j,i)=matrix_(i,j); elapsed_ += tm.elapsed(); } } } }
Foam::scalar Foam::highestMomentReconstruction::normalizedMomentError ( scalar sigma, const univariateMomentSet& moments, univariateMomentSet& momentsStar ) { scalar norm = 0.0; targetFunction(sigma, moments, momentsStar); univariateMomentSet approximatedMoments(moments.size(), moments.support()); kernel_().momentsStarToMoments(sigma, approximatedMoments, momentsStar); for (label momenti = 0; momenti < moments.size(); momenti++) { norm += mag(1.0 - approximatedMoments[momenti]/moments[momenti]); } return sqrt(norm); }
void OCCA_RFUNC occaKernelRunN(occaKernel kernel, const int argc, occaType_t **args){ occa::kernel kernel_((occa::kernel_v*) kernel); kernel_.clearArgumentList(); for(int i = 0; i < argc; ++i){ occaType_t &arg = *(args[i]); void *argPtr = arg.value.data.void_; if(arg.type == OCCA_TYPE_MEMORY){ occa::memory memory_((occa::memory_v*) argPtr); kernel_.addArgument(i, occa::kernelArg(memory_)); } else if(arg.type == OCCA_TYPE_PTR){ occa::memory memory_((void*) argPtr); kernel_.addArgument(i, occa::kernelArg(memory_)); } else { kernel_.addArgument(i, occa::kernelArg(arg.value)); delete (occaType_t*) args[i]; } } kernel_.runFromArguments(); }
void OCCA_RFUNC occaKernelFree(occaKernel kernel) { occa::kernel kernel_((occa::kernel_v*) kernel); kernel_.free(); }
int OCCA_RFUNC occaKernelPreferredDimSize(occaKernel kernel) { occa::kernel kernel_((occa::kernel_v*) kernel); return kernel_.preferredDimSize(); }
// * * * * * * * * * * * * * * * Member Functions * * * * * * * * * * * * * // void Foam::highestMomentReconstruction::invert(const univariateMomentSet& moments) { univariateMomentSet m(moments); reset(); invertSingular(m); if (nullSigma_) return; label nRealizableMoments = m.nRealizableMoments(); // Resizing the moment set to avoid copying again m.resize(nRealizableMoments); // Local set of starred moments univariateMomentSet mStar(nRealizableMoments, m.support()); // Compute target function for sigma = 0 scalar sigmaLow = 0.0; scalar fLow = targetFunction(sigmaLow, m, mStar); sigma_ = sigmaLow; // Check if sigma = 0 is root if (mag(fLow) <= targetFunctionTol_) { sigma_ = 0.0; nullSigma_ = true; momentInverter_().invert(m); secondaryQuadrature ( momentInverter_().weights(), momentInverter_().abscissae() ); return; } // Compute target function for sigma = sigmaMax scalar sigMax = kernel_().sigmaMax(m); scalar sigmaHigh = sigMax; scalar fHigh = targetFunction(sigmaHigh, m, mStar); // This should not happen with the new algorithm if (fLow*fHigh > 0) { // Root not found. Minimize target function in [0, sigma_] sigma_ = minimizeTargetFunction(0, sigmaHigh, m, mStar); // If sigma_ is small, use QMOM if (mag(sigma_) < sigmaMin_) { sigma_ = 0.0; nullSigma_ = true; momentInverter_().invert(m); secondaryQuadrature ( momentInverter_().weights(), momentInverter_().abscissae() ); return; } targetFunction(sigma_, m, mStar); secondaryQuadrature // secondary quadrature from mStar ( momentInverter_().weights(), momentInverter_().abscissae() ); return; } // Apply Ridder's algorithm to find sigma for (label iter = 0; iter < maxSigmaIter_; iter++) { scalar fMid, sigmaMid; sigmaMid = (sigmaLow + sigmaHigh)/2.0; fMid = targetFunction(sigmaMid, m, mStar); scalar s = sqrt(sqr(fMid) - fLow*fHigh); if (s == 0.0) { FatalErrorInFunction << "Singular value encountered searching for root." << nl << " Moment set = " << m << nl << " sigma = " << sigma_ << nl << " fLow = " << fLow << nl << " fMid = " << fMid << nl << " fHigh = " << fHigh << abort(FatalError); } sigma_ = sigmaMid + (sigmaMid - sigmaLow)*sign(fLow - fHigh)*fMid/s; kernel_().momentsToMomentsStar(sigma_, m, mStar); scalar fNew = targetFunction(sigma_, m, mStar); scalar dSigma = (sigmaHigh - sigmaLow)/2.0; // Check for convergence if (mag(fNew) <= targetFunctionTol_ || mag(dSigma) <= sigmaTol_) { // Root finding converged // If sigma_ is small, use QMOM if (mag(sigma_) < sigmaMin_) { sigma_ = 0.0; nullSigma_ = true; momentInverter_().invert(m); secondaryQuadrature ( momentInverter_().weights(), momentInverter_().abscissae() ); return; } scalar momentError = normalizedMomentError(sigma_, m, mStar); if ( momentError < momentsTol_ ) { // Found a value of sigma that preserves all the moments secondaryQuadrature // Secondary quadrature from mStar ( momentInverter_().weights(), momentInverter_().abscissae() ); return; } else { // Root not found. Minimize target function in [0, sigma_] sigma_ = minimizeTargetFunction(0, sigma_, m, mStar); // If sigma_ is small, use QMOM if (mag(sigma_) < sigmaMin_) { sigma_ = 0.0; nullSigma_ = true; momentInverter_().invert(m); secondaryQuadrature ( momentInverter_().weights(), momentInverter_().abscissae() ); return; } targetFunction(sigma_, m, mStar); secondaryQuadrature // Secondary quadrature from mStar ( momentInverter_().weights(), momentInverter_().abscissae() ); return; } } else { // Root finding did not converge. Refine search. if (fNew*fMid < 0 && sigma_ < sigmaMid) { sigmaLow = sigma_; fLow = fNew; sigmaHigh = sigmaMid; fHigh = fMid; } else if (fNew*fMid < 0 && sigma_ > sigmaMid) { sigmaLow = sigmaMid; fLow = fMid; sigmaHigh = sigma_; fHigh = fNew; } else if (fNew*fLow < 0) { sigmaHigh = sigma_; fHigh = fNew; } else if (fNew*fHigh < 0) { sigmaLow = sigma_; fLow = fNew; } } } FatalErrorInFunction << "Number of iterations exceeded." << nl << " Max allowed iterations = " << maxSigmaIter_ << abort(FatalError); }
uintptr_t OCCA_RFUNC occaKernelMaximumInnerDimSize(occaKernel kernel) { occa::kernel kernel_((occa::kernel_v*) kernel); return kernel_.maximumInnerDimSize(); }
occaDevice OCCA_RFUNC occaKernelGetDevice(occaKernel kernel) { occa::kernel kernel_((occa::kernel_v*) kernel); occa::device device = kernel_.getDevice(); return (occaDevice) device.getDHandle(); }
const char* OCCA_RFUNC occaKernelName(occaKernel kernel) { occa::kernel kernel_((occa::kernel_v*) kernel); return kernel_.name().c_str(); }
int kernel(subdomain subd) { return kernel_(& subd.n, subd.alive, subd.numneighbors, subd.neighbors); }
void pcl::pcl_2d::convolution<PointT>::convolve (pcl::PointCloud<PointT> &output) { int rows = input_->height; int cols = input_->width; int k_rows = kernel_.height; int k_cols = kernel_.width; int input_row = 0; int input_col = 0; /*default boundary option : zero padding*/ output = *input_; typedef pcl::PointCloud<PointT> CloudT; typedef typename pcl::traits::fieldList<typename CloudT::PointType>::type FieldList; typename CloudT::PointType test_point = input_->points[0]; bool has_intensity; //bool has_rgb; float output_intensity, output_r, output_g, output_b; float input_intensity, input_r, input_g, input_b; float kernel_intensity, kernel_r, kernel_g, kernel_b; pcl::for_each_type<FieldList> (pcl::CopyIfFieldExists<typename CloudT::PointType, float> (test_point, "intensity", has_intensity, output_intensity)); pcl::for_each_type<FieldList> (pcl::CopyIfFieldExists<typename CloudT::PointType, float> (test_point, "r", has_intensity, output_r)); pcl::for_each_type<FieldList> (pcl::CopyIfFieldExists<typename CloudT::PointType, float> (test_point, "g", has_intensity, output_g)); pcl::for_each_type<FieldList> (pcl::CopyIfFieldExists<typename CloudT::PointType, float> (test_point, "b", has_intensity, output_b)); if (boundary_options_ == BOUNDARY_OPTION_CLAMP) { for (int i = 0; i < rows; i++) { for (int j = 0; j < cols; j++) { //output (j, i).intensity = 0; if(image_channel_ == IMAGE_CHANNEL_INTENSITY){ output_intensity = 0; } if(image_channel_ == IMAGE_CHANNEL_RGB){ output_r = 0; output_g = 0; output_b = 0; } for (int k = 0; k < k_rows; k++) { for (int l = 0; l < k_cols; l++) { if ((i + k - k_rows / 2) < 0) input_row = 0; else if ((i + k - k_rows / 2) >= rows) { input_row = rows - 1; } else input_row = i + k - k_rows / 2; if ((j + l - k_cols / 2) < 0) input_col = 0; else if ((j + l - k_cols / 2) >= cols) input_col = cols - 1; else input_col = j + l - k_cols / 2; if(image_channel_ == IMAGE_CHANNEL_INTENSITY){ pcl::for_each_type<FieldList> (pcl::CopyIfFieldExists<typename CloudT::PointType, float> ((*input_)(input_col, input_row), "intensity", has_intensity, input_intensity)); pcl::for_each_type<FieldList> (pcl::CopyIfFieldExists<typename CloudT::PointType, float> (kernel_ (l, k), "intensity", has_intensity, kernel_intensity)); output_intensity += kernel_intensity*input_intensity; } if(image_channel_ == IMAGE_CHANNEL_RGB){ pcl::for_each_type<FieldList> (pcl::CopyIfFieldExists<typename CloudT::PointType, float> ((*input_)(input_col, input_row), "r", has_intensity, input_r)); pcl::for_each_type<FieldList> (pcl::CopyIfFieldExists<typename CloudT::PointType, float> (kernel_ (l, k), "r", has_intensity, kernel_r)); output_r += kernel_r*input_r; pcl::for_each_type<FieldList> (pcl::CopyIfFieldExists<typename CloudT::PointType, float> ((*input_)(input_col, input_row), "g", has_intensity, input_g)); pcl::for_each_type<FieldList> (pcl::CopyIfFieldExists<typename CloudT::PointType, float> (kernel_ (l, k), "g", has_intensity, kernel_g)); output_g += kernel_r*input_g; pcl::for_each_type<FieldList> (pcl::CopyIfFieldExists<typename CloudT::PointType, float> ((*input_)(input_col, input_row), "b", has_intensity, input_b)); pcl::for_each_type<FieldList> (pcl::CopyIfFieldExists<typename CloudT::PointType, float> (kernel_ (l, k), "b", has_intensity, kernel_b)); output_b += kernel_r*input_b; } //output (j, i).intensity += kernel_ (l, k).intensity * ((*input_)(input_col, input_row).intensity); } } if(image_channel_ == IMAGE_CHANNEL_INTENSITY){ pcl::for_each_type<FieldList> (pcl::SetIfFieldExists<typename CloudT::PointType, float> (output (j, i), "intensity", output_intensity)); } if(image_channel_ == IMAGE_CHANNEL_RGB){ pcl::for_each_type<FieldList> (pcl::SetIfFieldExists<typename CloudT::PointType, float> (output (j, i), "r", output_r)); pcl::for_each_type<FieldList> (pcl::SetIfFieldExists<typename CloudT::PointType, float> (output (j, i), "g", output_g)); pcl::for_each_type<FieldList> (pcl::SetIfFieldExists<typename CloudT::PointType, float> (output (j, i), "b", output_b)); } } } } else if (boundary_options_ == BOUNDARY_OPTION_MIRROR) { for (int i = 0; i < rows; i++) { for (int j = 0; j < cols; j++) { //output (j, i).intensity = 0; if(image_channel_ == IMAGE_CHANNEL_INTENSITY){ output_intensity = 0; } if(image_channel_ == IMAGE_CHANNEL_RGB){ output_r = 0; output_g = 0; output_b = 0; } for (int k = 0; k < k_rows; k++) { for (int l = 0; l < k_cols; l++) { if ((i + k - (k_rows / 2)) < 0) input_row = -(i + k - (k_rows / 2)) - 1; else if ((i + k - (k_rows / 2)) >= rows) { input_row = 2 * rows - 1 - (i + k - (k_rows / 2)); } else input_row = i + k - (k_rows / 2); if ((j + l - (k_cols / 2)) < 0) input_col = -(j + l - (k_cols / 2)) - 1; else if ((j + l - (k_cols / 2)) >= cols) input_col = 2 * cols - 1 - (j + l - (k_cols / 2)); else input_col = j + l - (k_cols / 2); //output (j, i).intensity += kernel_ (l, k).intensity * ((*input_)(input_col, input_row).intensity); if(image_channel_ == IMAGE_CHANNEL_INTENSITY){ pcl::for_each_type<FieldList> (pcl::CopyIfFieldExists<typename CloudT::PointType, float> ((*input_)(input_col, input_row), "intensity", has_intensity, input_intensity)); pcl::for_each_type<FieldList> (pcl::CopyIfFieldExists<typename CloudT::PointType, float> (kernel_ (l, k), "intensity", has_intensity, kernel_intensity)); output_intensity += kernel_intensity*input_intensity; } if(image_channel_ == IMAGE_CHANNEL_RGB){ pcl::for_each_type<FieldList> (pcl::CopyIfFieldExists<typename CloudT::PointType, float> ((*input_)(input_col, input_row), "r", has_intensity, input_r)); pcl::for_each_type<FieldList> (pcl::CopyIfFieldExists<typename CloudT::PointType, float> (kernel_ (l, k), "r", has_intensity, kernel_r)); output_r += kernel_r*input_r; pcl::for_each_type<FieldList> (pcl::CopyIfFieldExists<typename CloudT::PointType, float> ((*input_)(input_col, input_row), "g", has_intensity, input_g)); pcl::for_each_type<FieldList> (pcl::CopyIfFieldExists<typename CloudT::PointType, float> (kernel_ (l, k), "g", has_intensity, kernel_g)); output_g += kernel_r*input_g; pcl::for_each_type<FieldList> (pcl::CopyIfFieldExists<typename CloudT::PointType, float> ((*input_)(input_col, input_row), "b", has_intensity, input_b)); pcl::for_each_type<FieldList> (pcl::CopyIfFieldExists<typename CloudT::PointType, float> (kernel_ (l, k), "b", has_intensity, kernel_b)); output_b += kernel_r*input_b; } } } if(image_channel_ == IMAGE_CHANNEL_INTENSITY){ pcl::for_each_type<FieldList> (pcl::SetIfFieldExists<typename CloudT::PointType, float> (output (j, i), "intensity", output_intensity)); } if(image_channel_ == IMAGE_CHANNEL_RGB){ pcl::for_each_type<FieldList> (pcl::SetIfFieldExists<typename CloudT::PointType, float> (output (j, i), "r", output_r)); pcl::for_each_type<FieldList> (pcl::SetIfFieldExists<typename CloudT::PointType, float> (output (j, i), "g", output_g)); pcl::for_each_type<FieldList> (pcl::SetIfFieldExists<typename CloudT::PointType, float> (output (j, i), "b", output_b)); } } } } else if (boundary_options_ == BOUNDARY_OPTION_ZERO_PADDING) { for (int i = 0; i < rows; i++) { for (int j = 0; j < cols; j++) { //output (j, i).intensity = 0; if(image_channel_ == IMAGE_CHANNEL_INTENSITY){ output_intensity = 0; } if(image_channel_ == IMAGE_CHANNEL_RGB){ output_r = 0; output_g = 0; output_b = 0; } for (int k = 0; k < k_rows; k++) { for (int l = 0; l < k_cols; l++) { if ((i + k - k_rows / 2) < 0 || (i + k - k_rows / 2) >= rows || (j + l - k_cols / 2) < 0 || (j + l - k_cols / 2) >= cols) { continue; } else { //output (j, i).intensity += kernel_ (l, k).intensity * ((*input_)(j + l - k_cols / 2, i + k - k_rows / 2).intensity); if(image_channel_ == IMAGE_CHANNEL_INTENSITY){ pcl::for_each_type<FieldList> (pcl::CopyIfFieldExists<typename CloudT::PointType, float> ((*input_)(j + l - k_cols / 2, i + k - k_rows / 2), "intensity", has_intensity, input_intensity)); pcl::for_each_type<FieldList> (pcl::CopyIfFieldExists<typename CloudT::PointType, float> (kernel_ (l, k), "intensity", has_intensity, kernel_intensity)); output_intensity += kernel_intensity*input_intensity; } if(image_channel_ == IMAGE_CHANNEL_RGB){ pcl::for_each_type<FieldList> (pcl::CopyIfFieldExists<typename CloudT::PointType, float> ((*input_)(j + l - k_cols / 2, i + k - k_rows / 2), "r", has_intensity, input_r)); pcl::for_each_type<FieldList> (pcl::CopyIfFieldExists<typename CloudT::PointType, float> (kernel_ (l, k), "r", has_intensity, kernel_r)); output_r += kernel_r*input_r; pcl::for_each_type<FieldList> (pcl::CopyIfFieldExists<typename CloudT::PointType, float> ((*input_)(j + l - k_cols / 2, i + k - k_rows / 2), "g", has_intensity, input_g)); pcl::for_each_type<FieldList> (pcl::CopyIfFieldExists<typename CloudT::PointType, float> (kernel_ (l, k), "g", has_intensity, kernel_g)); output_g += kernel_r*input_g; pcl::for_each_type<FieldList> (pcl::CopyIfFieldExists<typename CloudT::PointType, float> ((*input_)(j + l - k_cols / 2, i + k - k_rows / 2), "b", has_intensity, input_b)); pcl::for_each_type<FieldList> (pcl::CopyIfFieldExists<typename CloudT::PointType, float> (kernel_ (l, k), "b", has_intensity, kernel_b)); output_b += kernel_r*input_b; } } } } if(image_channel_ == IMAGE_CHANNEL_INTENSITY){ pcl::for_each_type<FieldList> (pcl::SetIfFieldExists<typename CloudT::PointType, float> (output (j, i), "intensity", output_intensity)); } if(image_channel_ == IMAGE_CHANNEL_RGB){ pcl::for_each_type<FieldList> (pcl::SetIfFieldExists<typename CloudT::PointType, float> (output (j, i), "r", output_r)); pcl::for_each_type<FieldList> (pcl::SetIfFieldExists<typename CloudT::PointType, float> (output (j, i), "g", output_g)); pcl::for_each_type<FieldList> (pcl::SetIfFieldExists<typename CloudT::PointType, float> (output (j, i), "b", output_b)); } } } } }
// returns K(||X-Y||) where X,Y are vectors Real kernelAbs(const Array& X, const Array& Y)const{ return kernel_(Norm2(X-Y)); }
double scaledKernel_(double x) const { return invWidth_*kernel_(x*invWidth_); }