double Atomtype::CalcPE(int frame_i, const Trajectory &trj, const coordinates &rand_xyz, const cubicbox_m256 &box, double vol) const { float pe = 0.0; int atom_i = 0; /* BEGIN SIMD SECTION */ // This performs the exact same calculation after the SIMD section // but doing it on 8 atoms at a time using SIMD instructions. coordinates8 rand_xyz8(rand_xyz), atom_xyz; __m256 r2_8, mask, r6, ri6, pe_tmp; __m256 pe_sum = _mm256_setzero_ps(); float result[n] __attribute__((aligned (16))); for (; atom_i < this->n-8; atom_i+=8) { atom_xyz = trj.GetXYZ8(frame_i, this->name, atom_i); r2_8 = distance2(atom_xyz, rand_xyz8, box); mask = _mm256_cmp_ps(r2_8, rcut2_8, _CMP_LT_OS); r6 = _mm256_and_ps(mask, _mm256_mul_ps(_mm256_mul_ps(r2_8, r2_8), r2_8)); ri6 = _mm256_and_ps(mask, _mm256_rcp_ps(r6)); pe_tmp = _mm256_and_ps(mask, _mm256_mul_ps(ri6, _mm256_sub_ps(_mm256_mul_ps(c12_8, ri6), c6_8))); pe_sum = _mm256_add_ps(pe_tmp, pe_sum); } _mm256_store_ps(result, pe_sum); for (int i = 0; i < 8; i++) { pe += result[i]; } /* END SIMD SECTION */ for (; atom_i < this->n; atom_i++) { coordinates atom_xyz = trj.GetXYZ(frame_i, this->name, atom_i); float r2 = distance2(atom_xyz, rand_xyz, cubicbox(box)); if (r2 < this->rcut2) { float ri6 = 1.0/(pow(r2,3)); pe += ri6*(this->c12*ri6 - this->c6); } } pe += this->n/vol * this->tail_factor;; return pe; }
void Clusters::do_clustering(int frame, Trajectory &traj, string group, double rcut2) { coordinates atom_j_vec; double r2; int atom_counter_i; int atom_counter_j; int atom_i; int atom_j; int mol_i; int mol_j; int c_i; int c_j; triclinicbox box; vector <coordinates> atom_i_vec(atoms_per_mol); box = traj.GetBox(frame); initialize(); for (mol_i = 0; mol_i < this->mol_n; mol_i++) { c_i = this->index.at(mol_i); /* Go ahead and save the atoms from this molecule to a vector so that we * don't have to keep accessing it later below. */ atom_counter_i = mol_i * atoms_per_mol; for (atom_i = 0; atom_i < this->atoms_per_mol; atom_i++) { atom_i_vec.at(atom_i) = traj.GetXYZ(frame, group, atom_counter_i); atom_counter_i++; } for (mol_j = 0; mol_j < this->mol_n; mol_j++) { c_j = this->index.at(mol_j); if (c_i != c_j) { /* This compares every atom on the two molecules. If one pair is * within the cutoff range then the two molecules should be * added to the same cluster and no more checking is necessary * for these two molecules. */ for (atom_i = 0; atom_i < this->atoms_per_mol; atom_i++) { atom_counter_j = mol_j * atoms_per_mol; for (atom_j = 0; atom_j < this->atoms_per_mol; atom_j++) { atom_j_vec = traj.GetXYZ(frame, group, atom_counter_j); atom_counter_j++; r2 = distance2(atom_i_vec.at(atom_i), atom_j_vec, box); if (r2 < rcut2) { add(c_i, c_j); goto nextmol; } } } } nextmol: continue; } } return; }