void rk_combine_result( double h, ButcherTableau &tab, const VectorMatrix &k0, const VectorMatrix &k1, const VectorMatrix &k2, const VectorMatrix &k3, const VectorMatrix &k4, const VectorMatrix &k5, VectorMatrix &y, VectorMatrix &y_error) { const int s = y.size(); if ( s != y_error.size() || s != k1.size() || s != k2.size() || s != k3.size() || s != k4.size() || s != k5.size()) throw std::runtime_error("rk_combine_result: Input matrix size mismatch."); if (!tab.num_steps == 6) throw std::runtime_error("Need num_steps == 6 in rk_combine_result"); if (isCudaEnabled()) { #ifdef HAVE_CUDA rk_combine_result_cuda(h, tab, k0, k1, k2, k3, k4, k5, y, y_error, isCuda64Enabled()); #else assert(0); #endif } else { rk_combine_result_cpu(h, tab, k0, k1, k2, k3, k4, k5, y, y_error); } }
double cubic_anisotropy( const VectorMatrix &axis1, const VectorMatrix &axis2, const Matrix &k, const Matrix &Ms, const VectorMatrix &M, VectorMatrix &H) { const bool use_cuda = isCudaEnabled(); double energy_sum = 0.0; if (use_cuda) { #ifdef HAVE_CUDA CUTIC("cubic_anisotropy"); energy_sum = cubic_anisotropy_cuda(axis1, axis2, k, Ms, M, H, isCuda64Enabled()); CUTOC("cubic_anisotropy"); #else assert(0); #endif } else { TIC("cubic_anisotropy"); energy_sum = cubic_anisotropy_cpu(axis1, axis2, k, Ms, M, H); TOC("cubic_anisotropy"); } return energy_sum; }
void minimize( const Matrix &f, const double h, const VectorMatrix &M, const VectorMatrix &H, VectorMatrix &M2) { const bool use_cuda = isCudaEnabled(); if (use_cuda) { #ifdef HAVE_CUDA CUTIC("minimize"); #ifdef HAVE_CUDA_64 if (isCuda64Enabled()) minimize_cu64(f, h, M, H, M2); else #endif minimize_cu32(f, h, M, H, M2); CUTOC("minimize"); #else assert(0); #endif } else { TIC("minimize"); minimize_cpu(f, h, M, H, M2); TOC("minimize"); } }
void Transposer_CUDA::copy_unpad(const float *in_x, const float *in_y, const float *in_z, VectorMatrix &H) { // Ifdef HAVE_CUDA_64 and isCuda64Enabled(), we directly store output matrices on the GPU with 64 bit precision. #ifdef HAVE_CUDA_64 if (isCuda64Enabled()) { // xyz, s1 -> H VectorMatrix::cu64_accessor H_acc(H); cuda_copy_unpad_r2r(exp_x, dim_y, dim_z, dim_x, in_x, in_y, in_z, H_acc.ptr_x(), H_acc.ptr_y(), H_acc.ptr_z()); } else #endif { // xyz, s1 -> H VectorMatrix::cu32_accessor H_acc(H); cuda_copy_unpad_r2r(exp_x, dim_y, dim_z, dim_x, in_x, in_y, in_z, H_acc.ptr_x(), H_acc.ptr_y(), H_acc.ptr_z()); } }
void rk_prepare_step( int step, double h, ButcherTableau &tab, const VectorMatrix &k0, const VectorMatrix &k1, const VectorMatrix &k2, const VectorMatrix &k3, const VectorMatrix &k4, const VectorMatrix &k5, const VectorMatrix &y, VectorMatrix &ytmp) { if (isCudaEnabled()) { #ifdef HAVE_CUDA rk_prepare_step_cuda(step, h, tab, k0, k1, k2, k3, k4, k5, y, ytmp, isCuda64Enabled()); #else assert(0); #endif } else { rk_prepare_step_cpu(step, h, tab, k0, k1, k2, k3, k4, k5, y, ytmp); } }
double exchange( int dim_x, int dim_y, int dim_z, double delta_x, double delta_y, double delta_z, bool periodic_x, bool periodic_y, bool periodic_z, const Matrix &Ms, const Matrix &A, const VectorMatrix &M, VectorMatrix &H) { const bool use_cuda = isCudaEnabled(); double res = 0; if (use_cuda) { #ifdef HAVE_CUDA CUTIC("exchange"); res = exchange_cuda(dim_x, dim_y, dim_z, delta_x, delta_y, delta_z, periodic_x, periodic_y, periodic_z, Ms, A, M, H, isCuda64Enabled()); CUTOC("exchange"); #else assert(0); #endif } else { TIC("exchange"); res = exchange_cpu(dim_x, dim_y, dim_z, delta_x, delta_y, delta_z, periodic_x, periodic_y, periodic_z, Ms, A, M, H); TOC("exchange"); } return res; }