void VectorVectorConvolution_Simple::execute(const VectorMatrix &rhs, Matrix &res) { VectorMatrix::const_accessor S_acc(lhs); VectorMatrix::const_accessor M_acc(rhs); Matrix:: accessor phi_acc(res); // phi(r) = int S(r-r')*M(r') dr' // phi = Sx*Mx + Sy*My + Sz*Mz for (int z=0; z<dim_z; ++z) for (int y=0; y<dim_y; ++y) for (int x=0; x<dim_x; ++x) { double sum = 0.0; for (int o=0; o<dim_z; ++o) for (int n=0; n<dim_y; ++n) for (int m=0; m<dim_x; ++m) { // (X,Y,Z): position in demag tensor field matrix const int X = (x-m+exp_x) % exp_x; const int Y = (y-n+exp_y) % exp_y; const int Z = (z-o+exp_z) % exp_z; const Vector3d &S = S_acc.get(X, Y, Z); const Vector3d &M = M_acc.get(m, n, o); sum = S.x*M.x + S.y*M.y + S.z*M.z; } phi_acc.at(x, y, z) = sum; } }
void fdm_zhangli_cpu( int dim_x, int dim_y, int dim_z, double delta_x, double delta_y, double delta_z, bool do_precess, const Matrix &P, const Matrix &xi, const Matrix &Ms, const Matrix &alpha, const VectorMatrix &j, const VectorMatrix &M, VectorMatrix &dM) { VectorMatrix::const_accessor M_acc(M), j_acc(j); Matrix::ro_accessor Ms_acc(Ms), alpha_acc(alpha), P_acc(P), xi_acc(xi); VectorMatrix::accessor dM_acc(dM); for (int z=0; z<dim_z; ++z) for (int y=0; y<dim_y; ++y) for (int x=0; x<dim_x; ++x) { const int k = z*dim_x*dim_y + y*dim_x + x; const double Ms = Ms_acc.at(k); const double alpha = alpha_acc.at(k); const double P = P_acc.at(k); const double xi = xi_acc.at(k); const Vector3d j = j_acc.get(k); dM_acc.set(k, zhangli_dMdt(x, y, z, dim_x, dim_y, dim_z, delta_x, delta_y, delta_z, do_precess, P, xi, Ms, alpha, j, M_acc)); } }
void Transposer_CUDA::copy_pad(const VectorMatrix &M, float *out_x, float *out_y, float *out_z) { // Ifdef HAVE_CUDA_64, we directly support input matrices that // are stored with 64 bit precision on the GPU. #ifdef HAVE_CUDA_64 const bool M_is_cuda64_bit = M.isCached(2); // 0 = CPU device, 2 = CUDA_64 device if (M_is_cuda64_bit) { VectorMatrix::const_cu64_accessor M_acc(M); // xyz, M -> s1 cuda_copy_pad_r2r(dim_x, dim_y, dim_z, exp_x, M_acc.ptr_x(), M_acc.ptr_y(), M_acc.ptr_z(), out_x, out_y, out_z); } else #endif { VectorMatrix::const_cu32_accessor M_acc(M); // xyz, M -> s1 cuda_copy_pad_r2r(dim_x, dim_y, dim_z, exp_x, M_acc.ptr_x(), M_acc.ptr_y(), M_acc.ptr_z(), out_x, out_y, out_z); } }
Vector3d findExtremum_cpu(VectorMatrix &M, int z_slice, int component) { if (M.getShape().getRank() != 3) { throw std::runtime_error("findExtremum: Fixme: Need matrix of rank 3"); } if (component < 0 || component > 2) { throw std::runtime_error("findExtremum: Invalid 'component' value, must be 0, 1 or 2."); } const int dim_x = M.getShape().getDim(0); const int dim_y = M.getShape().getDim(1); VectorMatrix::const_accessor M_acc(M); // Find cell with maximum absolute value double max_val = -1.0; int max_x = -1, max_y = -1; for (int y=1; y<dim_y-1; ++y) for (int x=1; x<dim_x-1; ++x) { const int val = std::fabs(M_acc.get(x, y, z_slice)[component]); if (val > max_val) { max_val = val; max_x = x; max_y = y; } } assert(max_x > 0); assert(max_y > 0); // Refine maximum by fitting to sub-cell precision const double xdir_vals[3] = { M_acc.get(max_x-1, max_y+0, z_slice)[component], M_acc.get(max_x+0, max_y+0, z_slice)[component], M_acc.get(max_x+1, max_y+0, z_slice)[component] }; const double ydir_vals[3] = { M_acc.get(max_x+0, max_y-1, z_slice)[component], M_acc.get(max_x+0, max_y+0, z_slice)[component], M_acc.get(max_x+0, max_y+1, z_slice)[component] }; return Vector3d( fit(max_x-1, max_x+0, max_x+1, xdir_vals[0], xdir_vals[1], xdir_vals[2]), fit(max_y-1, max_y+0, max_y+1, ydir_vals[0], ydir_vals[1], ydir_vals[2]), static_cast<double>(z_slice) ); }
void fdm_slonchewski( int dim_x, int dim_y, int dim_z, double delta_x, double delta_y, double delta_z, double a_j, const VectorMatrix &p, // spin polarization const Matrix &Ms, const Matrix &alpha, const VectorMatrix &M, VectorMatrix &dM) { // Calculate: // c1*(M x (M x p)) + c2*(M x p) // // c1(theta): damping factor // c2(theta): precession factor // Ms*cos(theta) = M*p Matrix::ro_accessor Ms_acc(Ms), alpha_acc(alpha); VectorMatrix::const_accessor p_acc(p); VectorMatrix::const_accessor M_acc(M); VectorMatrix::accessor dM_acc(dM); const int N = dim_x * dim_y * dim_z; for (int n=0; n<N; ++n) { const double alpha = alpha_acc.at(n); const double Ms = Ms_acc.at(n); const Vector3d p = p_acc.get(n); const Vector3d M = M_acc.get(n); if (p == Vector3d(0.0, 0.0, 0.0)) continue; if (Ms == 0.0) continue; // Calculate precession and damping terms const Vector3d Mxp = cross(M, p); // precession: u=mxp const Vector3d MxMxp = cross(M, Mxp); // damping: t=mxu=mx(mxp) // add both terms to dm/dt in LLGE const double gamma_pr = GYROMAGNETIC_RATIO / (1.0 + alpha*alpha); Vector3d dM_n; dM_n.x = gamma_pr * a_j * (-MxMxp.x/Ms + Mxp.x*alpha); dM_n.y = gamma_pr * a_j * (-MxMxp.y/Ms + Mxp.y*alpha); dM_n.z = gamma_pr * a_j * (-MxMxp.z/Ms + Mxp.z*alpha); dM_acc.set(n, dM_n); } }
void SymmetricMatrixVectorConvolution_Simple::execute(const VectorMatrix &rhs, VectorMatrix &res) { Matrix::ro_accessor N_acc(lhs); VectorMatrix::const_accessor M_acc(rhs); VectorMatrix:: accessor H_acc(res); // H(r) = int N(r-r')*M(r') dr' // Hx = Nxx*Mx + Nxy*My + Nxz*Mz // Hy = Nyx*Mx + Nyy*My + Nyz*Mz // Hz = Nxz*Mx + Nyz*My + Nzz*Mz for (int z=0; z<dim_z; ++z) for (int y=0; y<dim_y; ++y) for (int x=0; x<dim_x; ++x) { Vector3d H(0.0, 0.0, 0.0); for (int o=0; o<dim_z; ++o) for (int n=0; n<dim_y; ++n) for (int m=0; m<dim_x; ++m) { // (X,Y,Z): position in demag tensor field matrix const int X = (x-m+exp_x) % exp_x; const int Y = (y-n+exp_y) % exp_y; const int Z = (z-o+exp_z) % exp_z; const double Nxx = N_acc.at(0,X,Y,Z); const double Nxy = N_acc.at(1,X,Y,Z); const double Nxz = N_acc.at(2,X,Y,Z); const double Nyy = N_acc.at(3,X,Y,Z); const double Nyz = N_acc.at(4,X,Y,Z); const double Nzz = N_acc.at(5,X,Y,Z); const Vector3d &M = M_acc.get(m, n, o); H.x += Nxx*M.x + Nxy*M.y + Nxz*M.z; H.y += Nxy*M.x + Nyy*M.y + Nyz*M.z; H.z += Nxz*M.x + Nyz*M.y + Nzz*M.z; } H_acc.set(x,y,z,H); } }
static double fdm_exchange_cpu_nonperiodic( int dim_x, int dim_y, int dim_z, double delta_x, double delta_y, double delta_z, const Matrix &Ms, const Matrix &A, const VectorMatrix &M, VectorMatrix &H) { const int dim_xy = dim_x * dim_y; const double wx = 1.0 / (delta_x * delta_x); const double wy = 1.0 / (delta_y * delta_y); const double wz = 1.0 / (delta_z * delta_z); VectorMatrix::const_accessor M_acc(M); VectorMatrix::accessor H_acc(H); Matrix::ro_accessor Ms_acc(Ms), A_acc(A); double energy = 0.0; for (int z=0; z<dim_z; ++z) { for (int y=0; y<dim_y; ++y) { for (int x=0; x<dim_x; ++x) { const int i = z*dim_xy + y*dim_x + x; // linear index of (x,y,z) const double Ms = Ms_acc.at(i); if (Ms == 0.0) { H_acc.set(i, Vector3d(0.0, 0.0, 0.0)); continue; } const int idx_l = i- 1; const int idx_r = i+ 1; const int idx_u = i- dim_x; const int idx_d = i+ dim_x; const int idx_f = i-dim_xy; const int idx_b = i+dim_xy; const Vector3d M_i = M_acc.get(i) / Ms; // magnetization at (x,y,z) Vector3d sum(0.0, 0.0, 0.0); // left / right (X) if (x > 0) { const double Ms_l = Ms_acc.at(idx_l); if (Ms_l != 0.0) sum += ((M_acc.get(idx_l) / Ms_l) - M_i) * wx; } if (x < dim_x-1) { const double Ms_r = Ms_acc.at(idx_r); if (Ms_r != 0.0) sum += ((M_acc.get(idx_r) / Ms_r) - M_i) * wx; } // up / down (Y) if (y > 0) { const double Ms_u = Ms_acc.at(idx_u); if (Ms_u != 0.0) sum += ((M_acc.get(idx_u) / Ms_u) - M_i) * wy; } if (y < dim_y-1) { const double Ms_d = Ms_acc.at(idx_d); if (Ms_d != 0.0) sum += ((M_acc.get(idx_d) / Ms_d) - M_i) * wy; } // forward / backward (Z) if (z > 0) { const double Ms_f = Ms_acc.at(idx_f); if (Ms_f != 0.0) sum += ((M_acc.get(idx_f) / Ms_f) - M_i) * wz; } if (z < dim_z-1) { const double Ms_b = Ms_acc.at(idx_b); if (Ms_b != 0.0) sum += ((M_acc.get(idx_b) / Ms_b) - M_i) * wz; } // Exchange field at (x,y,z) const Vector3d H_i = (2/MU0) * A_acc.at(i) * sum / Ms; H_acc.set(i, H_i); // Exchange energy sum energy += dot(M_i, H_i); } } } energy *= -MU0/2.0 * delta_x * delta_y * delta_z; return energy; }