static void setup_exclusions_and_indices(gmx_allvsall_data_t * aadata, t_blocka * excl, int natoms) { int i,j,k; int nj0,nj1; int max_offset; int max_excl_offset; int iexcl; int nj; /* This routine can appear to be a bit complex, but it is mostly book-keeping. * To enable the fast all-vs-all kernel we need to be able to stream through all coordinates * whether they should interact or not. * * To avoid looping over the exclusions, we create a simple mask that is 1 if the interaction * should be present, otherwise 0. Since exclusions typically only occur when i & j are close, * we create a jindex array with three elements per i atom: the starting point, the point to * which we need to check exclusions, and the end point. * This way we only have to allocate a short exclusion mask per i atom. */ /* Allocate memory for our modified jindex array */ snew(aadata->jindex,3*natoms); /* Pointer to lists with exclusion masks */ snew(aadata->exclusion_mask,natoms); for(i=0;i<natoms;i++) { /* Start */ aadata->jindex[3*i] = i+1; max_offset = calc_maxoffset(i,natoms); /* Exclusions */ nj0 = excl->index[i]; nj1 = excl->index[i+1]; /* first check the max range */ max_excl_offset = -1; for(j=nj0; j<nj1; j++) { iexcl = excl->a[j]; k = iexcl - i; if( k+natoms <= max_offset ) { k+=natoms; } max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset; } max_excl_offset = (max_offset < max_excl_offset) ? max_offset : max_excl_offset; aadata->jindex[3*i+1] = i+1+max_excl_offset; snew(aadata->exclusion_mask[i],max_excl_offset); /* Include everything by default */ for(j=0;j<max_excl_offset;j++) { /* Use all-ones to mark interactions that should be present, compatible with SSE */ aadata->exclusion_mask[i][j] = 0xFFFFFFFF; } /* Go through exclusions again */ for(j=nj0; j<nj1; j++) { iexcl = excl->a[j]; k = iexcl - i; if( k+natoms <= max_offset ) { k+=natoms; } if(k>0 && k<=max_excl_offset) { /* Excluded, kill it! */ aadata->exclusion_mask[i][k-1] = 0; } } /* End */ aadata->jindex[3*i+2] = i+1+max_offset; } }
static void setup_gb_exclusions_and_indices(gmx_allvsallgb2_data_t * aadata, t_ilist * ilist, int natoms, gmx_bool bInclude12, gmx_bool bInclude13, gmx_bool bInclude14) { int i, j, k; int a1, a2; int max_offset; int max_excl_offset; /* This routine can appear to be a bit complex, but it is mostly book-keeping. * To enable the fast all-vs-all kernel we need to be able to stream through all coordinates * whether they should interact or not. * * To avoid looping over the exclusions, we create a simple mask that is 1 if the interaction * should be present, otherwise 0. Since exclusions typically only occur when i & j are close, * we create a jindex array with three elements per i atom: the starting point, the point to * which we need to check exclusions, and the end point. * This way we only have to allocate a short exclusion mask per i atom. */ /* Allocate memory for jindex arrays */ snew(aadata->jindex_gb, 3*natoms); /* Pointer to lists with exclusion masks */ snew(aadata->exclusion_mask_gb, natoms); for (i = 0; i < natoms; i++) { /* Start */ aadata->jindex_gb[3*i] = i+1; max_offset = calc_maxoffset(i, natoms); /* first check the max range of atoms to EXCLUDE */ max_excl_offset = 0; if (!bInclude12) { for (j = 0; j < ilist[F_GB12].nr; j += 3) { a1 = ilist[F_GB12].iatoms[j+1]; a2 = ilist[F_GB12].iatoms[j+2]; if (a1 == i) { k = a2-a1; } else if (a2 == i) { k = a1+natoms-a2; } else { continue; } if (k > 0 && k <= max_offset) { max_excl_offset = std::max(k, max_excl_offset); } } } if (!bInclude13) { for (j = 0; j < ilist[F_GB13].nr; j += 3) { a1 = ilist[F_GB13].iatoms[j+1]; a2 = ilist[F_GB13].iatoms[j+2]; if (a1 == i) { k = a2-a1; } else if (a2 == i) { k = a1+natoms-a2; } else { continue; } if (k > 0 && k <= max_offset) { max_excl_offset = std::max(k, max_excl_offset); } } } if (!bInclude14) { for (j = 0; j < ilist[F_GB14].nr; j += 3) { a1 = ilist[F_GB14].iatoms[j+1]; a2 = ilist[F_GB14].iatoms[j+2]; if (a1 == i) { k = a2-a1; } else if (a2 == i) { k = a1+natoms-a2; } else { continue; } if (k > 0 && k <= max_offset) { max_excl_offset = std::max(k, max_excl_offset); } } } max_excl_offset = std::min(max_offset, max_excl_offset); aadata->jindex_gb[3*i+1] = i+1+max_excl_offset; snew(aadata->exclusion_mask_gb[i], max_excl_offset); /* Include everything by default */ for (j = 0; j < max_excl_offset; j++) { /* Use all-ones to mark interactions that should be present, compatible with SSE */ aadata->exclusion_mask_gb[i][j] = 0xFFFFFFFF; } /* Go through exclusions again */ if (!bInclude12) { for (j = 0; j < ilist[F_GB12].nr; j += 3) { a1 = ilist[F_GB12].iatoms[j+1]; a2 = ilist[F_GB12].iatoms[j+2]; if (a1 == i) { k = a2-a1; } else if (a2 == i) { k = a1+natoms-a2; } else { continue; } if (k > 0 && k <= max_offset) { aadata->exclusion_mask_gb[i][k-1] = 0; } } } if (!bInclude13) { for (j = 0; j < ilist[F_GB13].nr; j += 3) { a1 = ilist[F_GB13].iatoms[j+1]; a2 = ilist[F_GB13].iatoms[j+2]; if (a1 == i) { k = a2-a1; } else if (a2 == i) { k = a1+natoms-a2; } else { continue; } if (k > 0 && k <= max_offset) { aadata->exclusion_mask_gb[i][k-1] = 0; } } } if (!bInclude14) { for (j = 0; j < ilist[F_GB14].nr; j += 3) { a1 = ilist[F_GB14].iatoms[j+1]; a2 = ilist[F_GB14].iatoms[j+2]; if (a1 == i) { k = a2-a1; } else if (a2 == i) { k = a1+natoms-a2; } else { continue; } if (k > 0 && k <= max_offset) { aadata->exclusion_mask_gb[i][k-1] = 0; } } } /* End */ /* End */ aadata->jindex_gb[3*i+2] = i+1+max_offset; } }