long matrix::mul_ikj(const matrix *a, const matrix *b, matrix *r) { int width, height, h_div, h_mod; a->size(&height, &width); h_div = height / 16; h_mod = height % 16; for (int i = 0; i < height; i++) { for (iter_row r_i(r, i); *r_i; r_i++) **r_i = 0; iter_row a_i(a, i); for (int k = 0; k < width; k++) { number n = **a_i; a_i++; iter_row r_i(r, i); iter_row b_i(b, k); for (int j = 0; j < h_div; j++) { (*r_i)[0] += (*b_i)[0] * n; (*r_i)[1] += (*b_i)[1] * n; (*r_i)[2] += (*b_i)[2] * n; (*r_i)[3] += (*b_i)[3] * n; (*r_i)[4] += (*b_i)[4] * n; (*r_i)[5] += (*b_i)[5] * n; (*r_i)[6] += (*b_i)[6] * n; (*r_i)[7] += (*b_i)[7] * n; (*r_i)[8] += (*b_i)[8] * n; (*r_i)[9] += (*b_i)[9] * n; (*r_i)[10] += (*b_i)[10] * n; (*r_i)[11] += (*b_i)[11] * n; (*r_i)[12] += (*b_i)[12] * n; (*r_i)[13] += (*b_i)[13] * n; (*r_i)[14] += (*b_i)[14] * n; (*r_i)[15] += (*b_i)[15] * n; r_i += 16; b_i += 16; } for (int j = 0; j < h_mod; j++) { **r_i += **b_i * n; r_i++; b_i++; } } } return (long)width * width * height * 2; }
long matrix::mul_ijk(const matrix *a, const matrix *b, matrix *r) { int width, height; a->size(&height, &width); for (int i = 0; i < height; i++) { iter_row r_i(r, i); for (int j = 0; j < height; j++) { iter_row a_i(a, i); iter_col b_i(b, j); double n = 0; for (int k = 0; k < width; k++) { n += **a_i * **b_i; a_i++; b_i++; } **r_i = n; r_i++; } } return (long)width * width * height * 2; }
void updateVerletList(const string &verletStringId, Particles & particles, Grid & grid, double radius) { double radiusSquared = radius*radius; int verletId = particles.getVerletId(verletStringId); const unordered_map<int, GridPoint*> &gridpoints = grid.gridpoints(); const mat & R = particles.r(); const vector<int> &mygridPoints = grid.myGridPoints(); particles.clearVerletList(verletId); #ifdef USE_OPENMP #pragma omp parallel for #endif for(int i=0; i<mygridPoints.size(); i++) { double dx, dy, dz; int gridId = mygridPoints.at(i); const GridPoint & gridPoint = *gridpoints.at(gridId); for(const pair<int, int> & idCol_i:gridPoint.particles()) { int id_i = idCol_i.first; vector<int> verletList; const vec & r_i = R.col(idCol_i.second); for(const pair<int, int> & idCol_j:gridPoint.particles()) { int id_j = idCol_j.first; if(id_i == id_j) continue; const vec & r_j = R.col(idCol_j.second); dx = r_i(0) - r_j(0); dy = r_i(1) - r_j(1); dz = r_i(2) - r_j(2); double drSquared = dx*dx + dy*dy + dz*dz; if(drSquared < radiusSquared) { verletList.push_back(id_j); } } // Neighbouring cells const vector<GridPoint*> & neighbours = gridPoint.neighbours(); for(const GridPoint *neighbour:neighbours) { for(const pair<int, int> & idCol_j:neighbour->particles()) { const vec & r_j = R.col(idCol_j.second); dx = r_i(0) - r_j(0); dy = r_i(1) - r_j(1); dz = r_i(2) - r_j(2); double drSquared = dx*dx + dy*dy + dz*dz; if(drSquared < radiusSquared) { verletList.push_back(idCol_j.first); } } } #ifdef USE_OPENMP #pragma omp critical { particles.setVerletList(id_i, verletList, verletId); } #else particles.setVerletList(id_i, verletList, verletId); #endif } } }