void sgemm( int m, int n, float *A, float *C ) { int i, j, k, jtn, cieling; float B[n * m]; float buf[2]; __m128d sum, ab, cd, ef, AB, CD, EF; transpose(m, n, A, B); for (i = 0; i < m; i += 1) { for (j = 0; j < m; j += 1) { jtn = j * n; for (k = 0, cieling = n - 5; k < cieling; k += 6) { ab = _mm_load1_pd(A + i + k * m); cd = _mm_load1_pd(A + i + (k + 2) * m); ef = _mm_load1_pd(A + i + (k + 4) * m); AB = _mm_loadu_pd(B + k + jtn); CD = _mm_loadu_pd(B + k + 2 + jtn); EF = _mm_loadu_pd(B + k + 4 + jtn); sum = _mm_add_pd(sum, _mm_mul_sd(ab, AB)); sum = _mm_add_pd(sum, _mm_mul_sd(cd, CD)); sum = _mm_add_pd(sum, _mm_mul_sd(ef, EF)); } _mm_storeu_pd(buf, sum); C[i + j * m] = buf[0]; if (n % 6 != 0) { for ( ; k < n; k += 1) { C[i + j * m] += A[i + k * m] * A[k + jtn]; } } } } }
void interpolate_gdouble_cubic_sse2 (gpointer op, const gpointer ap, gint len, const gpointer icp, gint astride) { gint i; gdouble *o = op, *a = ap, *ic = icp; __m128d f[4], t[4]; const gdouble *c[4] = { (gdouble *) ((gint8 *) a + 0 * astride), (gdouble *) ((gint8 *) a + 1 * astride), (gdouble *) ((gint8 *) a + 2 * astride), (gdouble *) ((gint8 *) a + 3 * astride) }; f[0] = _mm_load1_pd (ic + 0); f[1] = _mm_load1_pd (ic + 1); f[2] = _mm_load1_pd (ic + 2); f[3] = _mm_load1_pd (ic + 3); for (i = 0; i < len; i += 2) { t[0] = _mm_mul_pd (_mm_load_pd (c[0] + i + 0), f[0]); t[1] = _mm_mul_pd (_mm_load_pd (c[1] + i + 0), f[1]); t[2] = _mm_mul_pd (_mm_load_pd (c[2] + i + 0), f[2]); t[3] = _mm_mul_pd (_mm_load_pd (c[3] + i + 0), f[3]); t[0] = _mm_add_pd (t[0], t[1]); t[2] = _mm_add_pd (t[2], t[3]); _mm_store_pd (o + i + 0, _mm_add_pd (t[0], t[2])); } }
static void do_block(int M, int K, int N, double* A, double* B, double* C) { __m128d c0, c1, a0, a1, b0, b1, b2, b3, d0, d1; for (int k=0; k<K; k+=RSIZE_K) { for (int j=0; j<N; j+=RSIZE_N) { b0 = _mm_load1_pd(B+k+j*K); b1 = _mm_load1_pd(B+k+1+j*K); b2 = _mm_load1_pd(B+k+(j+1)*K); b3 = _mm_load1_pd(B+k+1+(j+1)*K); for (int i=0; i<M; i+=RSIZE_M) { a0 = _mm_load_pd(A+i+k*M); a1 = _mm_load_pd(A+i+(k+1)*M); c0 = _mm_load_pd(C+i+j*M); c1 = _mm_load_pd(C+i+(j+1)*M); d0 = _mm_add_pd(c0, _mm_mul_pd(a0,b0)); d1 = _mm_add_pd(c1, _mm_mul_pd(a0,b2)); c0 = _mm_add_pd(d0, _mm_mul_pd(a1,b1)); c1 = _mm_add_pd(d1, _mm_mul_pd(a1,b3)); _mm_store_pd(C+i+j*M,c0); _mm_store_pd(C+i+(j+1)*M,c1); } } } }
int main( int argc, char **argv ) { /* set A = |1 3|, B = |3 0| C = |0 0| |2 4| |0 2| |0 0| */ double A[4] = {1,2,3,4}, B[4] = {3,0,0,2}, C[4] = {0,0,0,0}; /* We are computing C = C + A x B, which means: C[0] += A[0]*B[0] + A[2]*B[1] C[1] += A[1]*B[0] + A[3]*B[1] C[2] += A[0]*B[2] + A[2]*B[3] C[3] += A[1]*B[2] + A[3]*B[3] */ /* load entire matrix C into SIMD variables */ __m128d c1 = _mm_loadu_pd( C+0 ); /* c1 = (C[0],C[1]) */ __m128d c2 = _mm_loadu_pd( C+2 ); /* c2 = (C[2],C[3]) */ for( int i = 0; i < 2; i++ ) { __m128d a = _mm_loadu_pd( A+i*2 ); /* load next column of A */ __m128d b1 = _mm_load1_pd( B+0+i ); __m128d b2 = _mm_load1_pd( B+2+i ); /* load next row of B */ c1 = _mm_add_pd( c1, _mm_mul_pd( a, b1 ) ); /* multiply and add */ c2 = _mm_add_pd( c2, _mm_mul_pd( a, b2 ) ); } /* store the result back to the C array */ _mm_storeu_pd( C+0, c1 ); /* (C[0],C[1]) = c1 */ _mm_storeu_pd( C+2, c2 ); /* (C[2],C[3]) = c2 */ /* output whatever we've got */ printf( "|%g %g| * |%g %g| = |%g %g|\n", A[0], A[2], B[0], B[2], C[0], C[2] ); printf( "|%g %g| |%g %g| |%g %g|\n", A[1], A[3], B[1], B[3], C[1], C[3] ); return 0; }
static inline void inner_product_gdouble_linear_1_sse2 (gdouble * o, const gdouble * a, const gdouble * b, gint len, const gdouble * icoeff, gint bstride) { gint i = 0; __m128d sum[2], t; const gdouble *c[2] = { (gdouble *) ((gint8 *) b + 0 * bstride), (gdouble *) ((gint8 *) b + 1 * bstride) }; sum[0] = sum[1] = _mm_setzero_pd (); for (; i < len; i += 4) { t = _mm_loadu_pd (a + i + 0); sum[0] = _mm_add_pd (sum[0], _mm_mul_pd (t, _mm_load_pd (c[0] + i + 0))); sum[1] = _mm_add_pd (sum[1], _mm_mul_pd (t, _mm_load_pd (c[1] + i + 0))); t = _mm_loadu_pd (a + i + 2); sum[0] = _mm_add_pd (sum[0], _mm_mul_pd (t, _mm_load_pd (c[0] + i + 2))); sum[1] = _mm_add_pd (sum[1], _mm_mul_pd (t, _mm_load_pd (c[1] + i + 2))); } sum[0] = _mm_mul_pd (_mm_sub_pd (sum[0], sum[1]), _mm_load1_pd (icoeff)); sum[0] = _mm_add_pd (sum[0], sum[1]); sum[0] = _mm_add_sd (sum[0], _mm_unpackhi_pd (sum[0], sum[0])); _mm_store_sd (o, sum[0]); }
/** this fun use the SSE to implement the mul **/ void square_dgemm(int lda, double* A, double* B, double* C) { // define the variable here register __m128d cTmp, aTmp, bTmp; for (int j = 0; j < lda; j++) { for (int k = 0; k < lda; k++) { // copy the B's val to fill the bTmp bTmp = _mm_load1_pd(B + k + j*lda); double* adda_mid = A + k*lda; double* addc_mid = C + j*lda; for (int i = 0; i < lda/8*8; i += 8) { double* adda = adda_mid + i; double* addc = addc_mid + i; aTmp = _mm_loadu_pd(adda); cTmp = _mm_loadu_pd(addc); cTmp = _mm_add_pd(cTmp, _mm_mul_pd(bTmp, aTmp)); _mm_storeu_pd(addc, cTmp); aTmp = _mm_loadu_pd(adda + 2); cTmp = _mm_loadu_pd(addc + 2); cTmp = _mm_add_pd(cTmp, _mm_mul_pd(bTmp, aTmp)); _mm_storeu_pd((addc + 2), cTmp); aTmp = _mm_loadu_pd(adda + 4); cTmp = _mm_loadu_pd(addc + 4); cTmp = _mm_add_pd(cTmp, _mm_mul_pd(bTmp, aTmp)); _mm_storeu_pd((addc + 4), cTmp); aTmp = _mm_loadu_pd(adda + 6); cTmp = _mm_loadu_pd(addc + 6); cTmp = _mm_add_pd(cTmp, _mm_mul_pd(bTmp, aTmp)); _mm_storeu_pd((addc + 6), cTmp); } for (int i = lda/8*8; i < lda/2*2; i += 2) { double* adda = adda_mid + i; double* addc = addc_mid + i; aTmp = _mm_loadu_pd(adda); cTmp = _mm_loadu_pd(addc); cTmp = _mm_add_pd(cTmp, _mm_mul_pd(bTmp, aTmp)); _mm_storeu_pd(addc, cTmp); } // the last case for (int i = lda/2*2; i < lda; i ++) { C[i + j*lda] += A[i + k*lda] * B[k+j*lda]; } } } }
__m128d test_mm_load1_pd(double const* A) { // DAG-LABEL: test_mm_load1_pd // DAG: load double, double* %{{.*}}, align 8 // DAG: insertelement <2 x double> undef, double %{{.*}}, i32 0 // DAG: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1 // // ASM-LABEL: test_mm_load1_pd // ASM: movsd // ASM: movlhps return _mm_load1_pd(A); }
void interpolate_gdouble_linear_sse2 (gpointer op, const gpointer ap, gint len, const gpointer icp, gint astride) { gint i; gdouble *o = op, *a = ap, *ic = icp; __m128d f[2], t1, t2; const gdouble *c[2] = { (gdouble *) ((gint8 *) a + 0 * astride), (gdouble *) ((gint8 *) a + 1 * astride) }; f[0] = _mm_load1_pd (ic + 0); f[1] = _mm_load1_pd (ic + 1); for (i = 0; i < len; i += 4) { t1 = _mm_mul_pd (_mm_load_pd (c[0] + i + 0), f[0]); t2 = _mm_mul_pd (_mm_load_pd (c[1] + i + 0), f[1]); _mm_store_pd (o + i + 0, _mm_add_pd (t1, t2)); t1 = _mm_mul_pd (_mm_load_pd (c[0] + i + 2), f[0]); t2 = _mm_mul_pd (_mm_load_pd (c[1] + i + 2), f[1]); _mm_store_pd (o + i + 2, _mm_add_pd (t1, t2)); } }
ALGEBRA_INLINE void vector_addm_double_aligned_32 (double* v1,double lambda,const double* v2,size_t n) { size_t k; __m128d l1 = _mm_load1_pd(&lambda); size_t q = n / 2; size_t r = n % 2; if(q > 0) { if (ALGEBRA_IS_ALIGNED(v1) && ALGEBRA_IS_ALIGNED(v2)) { for (k=0;k<q;k++) { /* Charge 2 valeurs de chaque tableau */ __m128d i1 = _mm_load_pd(v1); __m128d j1 = _mm_load_pd(v2); /* multiplie */ j1 = _mm_mul_pd(j1, l1); /* additionne */ i1 = _mm_add_pd(i1,j1); /* Sauvegarde */ _mm_store_pd(v1, i1); v1 += 2; v2 += 2; } } else { for (k=0;k<q;k++) { /* Charge 8 valeurs de chaque tableau */ __m128d i1 = _mm_loadu_pd(v1); __m128d j1 = _mm_loadu_pd(v2); j1 = _mm_mul_pd(j1, l1); /* Soustrait */ i1 = _mm_add_pd(i1,j1); /* Sauvegarde */ _mm_storeu_pd(v1, i1); v1 += 2; v2 += 2; } } } for(k = 0 ; k<r ; k++) v1[k] += lambda*v2[k]; }
static forcedinline ParallelType load1 (Type v) noexcept { return _mm_load1_pd (&v); }
void exchsolution_gmrfData_1(unsigned int slot) { for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((!neighbor_isValid[0][0])) { { double xPos; double yPos; /* Statements in this Scop: S493, S492, S494 */ { { { double* fieldData_Solution_GMRF_1_p1 = (&fieldData_Solution_GMRF[1][0]); int i1 = 1; for (; (i1<=2); i1 += 2) { fieldData_Solution_GMRF_1_p1[((i1*6)+2)] = 0.000000e+00; fieldData_Solution_GMRF_1_p1[((i1*6)+8)] = 0.000000e+00; } for (; (i1<=3); i1 += 1) { fieldData_Solution_GMRF_1_p1[((i1*6)+2)] = 0.000000e+00; } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { xPos = posBegin[0]; } __m128d vec1 = _mm_set1_pd(xPos); for (; (i1<1); i1 += 4) { /* xPos = posBegin[0]; */ __m128d vec0 = _mm_load1_pd((&posBegin[0])); __m128d vec0_2 = _mm_load1_pd((&posBegin[0])); vec1 = vec0; vec1 = vec0_2; } for (; (i1<4); i1 += 1) { xPos = posBegin[0]; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { yPos = ((((i1-1)/2.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); } __m128d vec1 = _mm_set1_pd(1.000000e+00); __m128d vec2 = _mm_set1_pd(2.000000e+00); __m128d vec5 = _mm_set1_pd(yPos); for (; (i1<1); i1 += 4) { /* yPos = ((((i1-1)/2.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); */ __m128d vec0 = _mm_set_pd(i1+1,i1); __m128d vec0_2 = _mm_set_pd(i1+1,i1); __m128d vec3 = _mm_load1_pd((&posEnd[1])); __m128d vec3_2 = _mm_load1_pd((&posEnd[1])); __m128d vec4 = _mm_load1_pd((&posBegin[1])); __m128d vec4_2 = _mm_load1_pd((&posBegin[1])); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2); } for (; (i1<4); i1 += 1) { yPos = ((((i1-1)/2.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); } } } } } if ((!neighbor_isValid[0][1])) { { double xPos; double yPos; /* Statements in this Scop: S496, S495, S497 */ { { { double* fieldData_Solution_GMRF_1_p1 = (&fieldData_Solution_GMRF[1][0]); int i1 = 1; for (; (i1<=2); i1 += 2) { fieldData_Solution_GMRF_1_p1[((i1*6)+4)] = 0.000000e+00; fieldData_Solution_GMRF_1_p1[((i1*6)+10)] = 0.000000e+00; } for (; (i1<=3); i1 += 1) { fieldData_Solution_GMRF_1_p1[((i1*6)+4)] = 0.000000e+00; } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { xPos = posEnd[0]; } __m128d vec1 = _mm_set1_pd(xPos); for (; (i1<1); i1 += 4) { /* xPos = posEnd[0]; */ __m128d vec0 = _mm_load1_pd((&posEnd[0])); __m128d vec0_2 = _mm_load1_pd((&posEnd[0])); vec1 = vec0; vec1 = vec0_2; } for (; (i1<4); i1 += 1) { xPos = posEnd[0]; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { yPos = ((((i1-1)/2.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); } __m128d vec1 = _mm_set1_pd(1.000000e+00); __m128d vec2 = _mm_set1_pd(2.000000e+00); __m128d vec5 = _mm_set1_pd(yPos); for (; (i1<1); i1 += 4) { /* yPos = ((((i1-1)/2.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); */ __m128d vec0 = _mm_set_pd(i1+1,i1); __m128d vec0_2 = _mm_set_pd(i1+1,i1); __m128d vec3 = _mm_load1_pd((&posEnd[1])); __m128d vec3_2 = _mm_load1_pd((&posEnd[1])); __m128d vec4 = _mm_load1_pd((&posBegin[1])); __m128d vec4_2 = _mm_load1_pd((&posBegin[1])); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2); } for (; (i1<4); i1 += 1) { yPos = ((((i1-1)/2.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); } } } } } if ((!neighbor_isValid[0][2])) { { double xPos; double yPos; /* Statements in this Scop: S500, S499, S498 */ { { { int i2 = 2; for (; (i2<=3); i2 += 2) { xPos = ((((i2-2)/2.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); xPos = ((((i2-1)/2.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); } for (; (i2<=4); i2 += 1) { xPos = ((((i2-2)/2.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); } } { double* fieldData_Solution_GMRF_1_p1 = (&fieldData_Solution_GMRF[1][0]); int i2 = 2; for (; (i2<=3); i2 += 2) { fieldData_Solution_GMRF_1_p1[(i2+6)] = 0.000000e+00; fieldData_Solution_GMRF_1_p1[(i2+7)] = 0.000000e+00; } for (; (i2<=4); i2 += 1) { fieldData_Solution_GMRF_1_p1[(i2+6)] = 0.000000e+00; } } } { int i2 = 2; for (; (i2<=3); i2 += 2) { yPos = posBegin[1]; yPos = posBegin[1]; } for (; (i2<=4); i2 += 1) { yPos = posBegin[1]; } } } } } if ((!neighbor_isValid[0][3])) { { double xPos; double yPos; /* Statements in this Scop: S503, S502, S501 */ { { { int i2 = 2; for (; (i2<=3); i2 += 2) { xPos = ((((i2-2)/2.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); xPos = ((((i2-1)/2.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); } for (; (i2<=4); i2 += 1) { xPos = ((((i2-2)/2.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); } } { int i2 = 2; for (; (i2<=3); i2 += 2) { yPos = posEnd[1]; yPos = posEnd[1]; } for (; (i2<=4); i2 += 1) { yPos = posEnd[1]; } } } { double* fieldData_Solution_GMRF_1_p1 = (&fieldData_Solution_GMRF[1][0]); int i2 = 2; for (; (i2<=3); i2 += 2) { fieldData_Solution_GMRF_1_p1[(i2+18)] = 0.000000e+00; fieldData_Solution_GMRF_1_p1[(i2+19)] = 0.000000e+00; } for (; (i2<=4); i2 += 1) { fieldData_Solution_GMRF_1_p1[(i2+18)] = 0.000000e+00; } } } } } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { ; } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) { MPI_Isend(&fieldData_Solution_GMRF[1][10], 1, mpiDatatype_3_1_6, neighbor_remoteRank[0][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]); reqOutstanding_Send[1] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) { MPI_Irecv(&fieldData_Solution_GMRF[1][8], 1, mpiDatatype_3_1_6, neighbor_remoteRank[0][0], ((unsigned int)(neighbor_fragCommId[0][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]); reqOutstanding_Recv[0] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Recv[0]) { waitForMPIReq(&mpiRequest_Recv[0]); reqOutstanding_Recv[0] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { ; } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Send[1]) { waitForMPIReq(&mpiRequest_Send[1]); reqOutstanding_Send[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { ; } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][3]&&neighbor_isRemote[0][3])) { MPI_Isend(&fieldData_Solution_GMRF[1][20], 1, mpiDatatype_1_3_6, neighbor_remoteRank[0][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]); reqOutstanding_Send[3] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][2]&&neighbor_isRemote[0][2])) { MPI_Irecv(&fieldData_Solution_GMRF[1][8], 1, mpiDatatype_1_3_6, neighbor_remoteRank[0][2], ((unsigned int)(neighbor_fragCommId[0][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]); reqOutstanding_Recv[2] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Recv[2]) { waitForMPIReq(&mpiRequest_Recv[2]); reqOutstanding_Recv[2] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { ; } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Send[3]) { waitForMPIReq(&mpiRequest_Send[3]); reqOutstanding_Send[3] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { ; ; } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) { MPI_Isend(&fieldData_Solution_GMRF[1][3], 1, mpiDatatype_5_1_6, neighbor_remoteRank[0][0], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][0]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[0]); reqOutstanding_Send[0] = true; } if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) { MPI_Isend(&fieldData_Solution_GMRF[1][3], 1, mpiDatatype_5_1_6, neighbor_remoteRank[0][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]); reqOutstanding_Send[1] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) { MPI_Irecv(&fieldData_Solution_GMRF[1][1], 1, mpiDatatype_5_1_6, neighbor_remoteRank[0][0], ((unsigned int)(neighbor_fragCommId[0][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]); reqOutstanding_Recv[0] = true; } if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) { MPI_Irecv(&fieldData_Solution_GMRF[1][5], 1, mpiDatatype_5_1_6, neighbor_remoteRank[0][1], ((unsigned int)(neighbor_fragCommId[0][1]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[1]); reqOutstanding_Recv[1] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Recv[0]) { waitForMPIReq(&mpiRequest_Recv[0]); reqOutstanding_Recv[0] = false; } if (reqOutstanding_Recv[1]) { waitForMPIReq(&mpiRequest_Recv[1]); reqOutstanding_Recv[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { ; ; } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Send[0]) { waitForMPIReq(&mpiRequest_Send[0]); reqOutstanding_Send[0] = false; } if (reqOutstanding_Send[1]) { waitForMPIReq(&mpiRequest_Send[1]); reqOutstanding_Send[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { ; ; } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][2]&&neighbor_isRemote[0][2])) { MPI_Isend(&fieldData_Solution_GMRF[1][13], 1, mpiDatatype_1_5_6, neighbor_remoteRank[0][2], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][2]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[2]); reqOutstanding_Send[2] = true; } if ((neighbor_isValid[0][3]&&neighbor_isRemote[0][3])) { MPI_Isend(&fieldData_Solution_GMRF[1][13], 1, mpiDatatype_1_5_6, neighbor_remoteRank[0][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]); reqOutstanding_Send[3] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][2]&&neighbor_isRemote[0][2])) { MPI_Irecv(&fieldData_Solution_GMRF[1][1], 1, mpiDatatype_1_5_6, neighbor_remoteRank[0][2], ((unsigned int)(neighbor_fragCommId[0][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]); reqOutstanding_Recv[2] = true; } if ((neighbor_isValid[0][3]&&neighbor_isRemote[0][3])) { MPI_Irecv(&fieldData_Solution_GMRF[1][25], 1, mpiDatatype_1_5_6, neighbor_remoteRank[0][3], ((unsigned int)(neighbor_fragCommId[0][3]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[3]); reqOutstanding_Recv[3] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Recv[2]) { waitForMPIReq(&mpiRequest_Recv[2]); reqOutstanding_Recv[2] = false; } if (reqOutstanding_Recv[3]) { waitForMPIReq(&mpiRequest_Recv[3]); reqOutstanding_Recv[3] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { ; ; } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Send[2]) { waitForMPIReq(&mpiRequest_Send[2]); reqOutstanding_Send[2] = false; } if (reqOutstanding_Send[3]) { waitForMPIReq(&mpiRequest_Send[3]); reqOutstanding_Send[3] = false; } } } }
void exchlaplacecoeff_gmrfData_0(unsigned int slot) { for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((!neighbor_isValid[0][0])) { { double xPos; double yPos; /* Statements in this Scop: S1053, S1056, S1059, S1050, S1058, S1052, S1055, S1060, S1054, S1057, S1051 */ { { { { { { { { { { { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+26)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+32)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+26)] = 0.000000e+00; } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+146)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+152)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+146)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+98)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+104)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+98)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+74)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+80)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+74)] = 0.000000e+00; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { xPos = posBegin[0]; } __m128d vec1 = _mm_set1_pd(xPos); for (; (i1<0); i1 += 4) { /* xPos = posBegin[0]; */ __m128d vec0 = _mm_load1_pd((&posBegin[0])); __m128d vec0_2 = _mm_load1_pd((&posBegin[0])); vec1 = vec0; vec1 = vec0_2; } for (; (i1<3); i1 += 1) { xPos = posBegin[0]; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+122)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+128)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+122)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+170)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+176)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+170)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+194)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+200)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+194)] = 0.000000e+00; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { yPos = ((((i1-1)/1.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); } __m128d vec1 = _mm_set1_pd(1.000000e+00); __m128d vec2 = _mm_set1_pd(1.000000e+00); __m128d vec5 = _mm_set1_pd(yPos); for (; (i1<0); i1 += 4) { /* yPos = ((((i1-1)/1.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); */ __m128d vec0 = _mm_set_pd(i1+1,i1); __m128d vec0_2 = _mm_set_pd(i1+1,i1); __m128d vec3 = _mm_load1_pd((&posEnd[1])); __m128d vec3_2 = _mm_load1_pd((&posEnd[1])); __m128d vec4 = _mm_load1_pd((&posBegin[1])); __m128d vec4_2 = _mm_load1_pd((&posBegin[1])); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2); } for (; (i1<3); i1 += 1) { yPos = ((((i1-1)/1.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+2)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+8)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+2)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+50)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+56)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+50)] = 0.000000e+00; } } } } } if ((!neighbor_isValid[0][1])) { { double xPos; double yPos; /* Statements in this Scop: S1071, S1065, S1068, S1062, S1070, S1064, S1067, S1061, S1069, S1063, S1066 */ { { { { { { { { { { { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+195)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+201)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+195)] = 0.000000e+00; } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+51)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+57)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+51)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+75)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+81)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+75)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+3)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+9)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+3)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+171)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+177)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+171)] = 0.000000e+00; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { yPos = ((((i1-1)/1.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); } __m128d vec1 = _mm_set1_pd(1.000000e+00); __m128d vec2 = _mm_set1_pd(1.000000e+00); __m128d vec5 = _mm_set1_pd(yPos); for (; (i1<0); i1 += 4) { /* yPos = ((((i1-1)/1.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); */ __m128d vec0 = _mm_set_pd(i1+1,i1); __m128d vec0_2 = _mm_set_pd(i1+1,i1); __m128d vec3 = _mm_load1_pd((&posEnd[1])); __m128d vec3_2 = _mm_load1_pd((&posEnd[1])); __m128d vec4 = _mm_load1_pd((&posBegin[1])); __m128d vec4_2 = _mm_load1_pd((&posBegin[1])); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2); } for (; (i1<3); i1 += 1) { yPos = ((((i1-1)/1.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+99)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+105)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+99)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+123)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+129)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+123)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+147)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+153)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+147)] = 0.000000e+00; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { xPos = posEnd[0]; } __m128d vec1 = _mm_set1_pd(xPos); for (; (i1<0); i1 += 4) { /* xPos = posEnd[0]; */ __m128d vec0 = _mm_load1_pd((&posEnd[0])); __m128d vec0_2 = _mm_load1_pd((&posEnd[0])); vec1 = vec0; vec1 = vec0_2; } for (; (i1<3); i1 += 1) { xPos = posEnd[0]; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+27)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+33)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+27)] = 0.000000e+00; } } } } } if ((!neighbor_isValid[0][2])) { { double xPos; double yPos; /* Statements in this Scop: S1080, S1074, S1077, S1082, S1076, S1079, S1073, S1072, S1081, S1075, S1078 */ { { { { { { { { { { { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+126)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+127)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+126)] = 0.000000e+00; } } { int i2 = 2; for (; (i2<=2); i2 += 2) { xPos = ((((i2-2)/1.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); xPos = ((((i2-1)/1.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); } for (; (i2<=3); i2 += 1) { xPos = ((((i2-2)/1.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+198)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+199)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+198)] = 0.000000e+00; } } } { int i2 = 2; for (; (i2<=2); i2 += 2) { yPos = posBegin[1]; yPos = posBegin[1]; } for (; (i2<=3); i2 += 1) { yPos = posBegin[1]; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+30)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+31)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+30)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+174)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+175)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+174)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+78)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+79)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+78)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+54)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+55)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+54)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+150)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+151)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+150)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+6)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+7)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+6)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+102)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+103)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+102)] = 0.000000e+00; } } } } } if ((!neighbor_isValid[0][3])) { { double xPos; double yPos; /* Statements in this Scop: S1083, S1092, S1086, S1089, S1088, S1091, S1085, S1090, S1093, S1087, S1084 */ { { { { { { { { { { { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+12)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+13)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+12)] = 0.000000e+00; } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+60)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+61)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+60)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+204)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+205)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+204)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+132)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+133)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+132)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+84)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+85)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+84)] = 0.000000e+00; } } } { int i2 = 2; for (; (i2<=2); i2 += 2) { yPos = posEnd[1]; yPos = posEnd[1]; } for (; (i2<=3); i2 += 1) { yPos = posEnd[1]; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+36)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+37)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+36)] = 0.000000e+00; } } } { int i2 = 2; for (; (i2<=2); i2 += 2) { xPos = ((((i2-2)/1.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); xPos = ((((i2-1)/1.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); } for (; (i2<=3); i2 += 1) { xPos = ((((i2-2)/1.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+180)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+181)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+180)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+156)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+157)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+156)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+108)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+109)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+108)] = 0.000000e+00; } } } } } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) { /* Statements in this Scop: S1094 */ for (int i0 = 0; (i0<=8); i0 += 1) { double* buffer_Send_1_p1 = (&buffer_Send[1][(i0*2)]); double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][(i0*24)]); int i1 = 1; for (; (i1<=1); i1 += 2) { buffer_Send_1_p1[(i1-1)] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+3)]; buffer_Send_1_p1[i1] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+9)]; } for (; (i1<=2); i1 += 1) { buffer_Send_1_p1[(i1-1)] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+3)]; } } } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) { MPI_Isend(buffer_Send[1], 18, MPI_DOUBLE, neighbor_remoteRank[0][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]); reqOutstanding_Send[1] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) { MPI_Irecv(buffer_Recv[0], 18, MPI_DOUBLE, neighbor_remoteRank[0][0], ((unsigned int)(neighbor_fragCommId[0][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]); reqOutstanding_Recv[0] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Recv[0]) { waitForMPIReq(&mpiRequest_Recv[0]); reqOutstanding_Recv[0] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) { /* Statements in this Scop: S1095 */ for (int i0 = 0; (i0<=8); i0 += 1) { double* buffer_Recv_0_p1 = (&buffer_Recv[0][(i0*2)]); double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][(i0*24)]); int i1 = 3; for (; (i1<=3); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)-10)] = buffer_Recv_0_p1[(i1-3)]; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)-4)] = buffer_Recv_0_p1[(i1-2)]; } for (; (i1<=4); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)-10)] = buffer_Recv_0_p1[(i1-3)]; } } } } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Send[1]) { waitForMPIReq(&mpiRequest_Send[1]); reqOutstanding_Send[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { ; } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][3]&&neighbor_isRemote[0][3])) { MPI_Isend(&fieldData_LaplaceCoeff_GMRF[0][14], 1, mpiDatatype_9_2_24, neighbor_remoteRank[0][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]); reqOutstanding_Send[3] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][2]&&neighbor_isRemote[0][2])) { MPI_Irecv(&fieldData_LaplaceCoeff_GMRF[0][8], 1, mpiDatatype_9_2_24, neighbor_remoteRank[0][2], ((unsigned int)(neighbor_fragCommId[0][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]); reqOutstanding_Recv[2] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Recv[2]) { waitForMPIReq(&mpiRequest_Recv[2]); reqOutstanding_Recv[2] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { ; } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Send[3]) { waitForMPIReq(&mpiRequest_Send[3]); reqOutstanding_Send[3] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) { /* Statements in this Scop: S1096 */ for (int i0 = 0; (i0<=8); i0 += 1) { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][(i0*24)]); double* buffer_Send_0_p1 = (&buffer_Send[0][(i0*4)]); int i1 = 0; for (; (i1<=2); i1 += 2) { buffer_Send_0_p1[i1] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+3)]; buffer_Send_0_p1[(i1+1)] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+9)]; } for (; (i1<=3); i1 += 1) { buffer_Send_0_p1[i1] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+3)]; } } } if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) { /* Statements in this Scop: S1097 */ for (int i0 = 0; (i0<=8); i0 += 1) { double* buffer_Send_1_p1 = (&buffer_Send[1][(i0*4)]); double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][(i0*24)]); int i1 = 0; for (; (i1<=2); i1 += 2) { buffer_Send_1_p1[i1] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+2)]; buffer_Send_1_p1[(i1+1)] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+8)]; } for (; (i1<=3); i1 += 1) { buffer_Send_1_p1[i1] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+2)]; } } } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) { MPI_Isend(buffer_Send[0], 36, MPI_DOUBLE, neighbor_remoteRank[0][0], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][0]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[0]); reqOutstanding_Send[0] = true; } if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) { MPI_Isend(buffer_Send[1], 36, MPI_DOUBLE, neighbor_remoteRank[0][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]); reqOutstanding_Send[1] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) { MPI_Irecv(buffer_Recv[0], 36, MPI_DOUBLE, neighbor_remoteRank[0][0], ((unsigned int)(neighbor_fragCommId[0][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]); reqOutstanding_Recv[0] = true; } if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) { MPI_Irecv(buffer_Recv[1], 36, MPI_DOUBLE, neighbor_remoteRank[0][1], ((unsigned int)(neighbor_fragCommId[0][1]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[1]); reqOutstanding_Recv[1] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Recv[0]) { waitForMPIReq(&mpiRequest_Recv[0]); reqOutstanding_Recv[0] = false; } if (reqOutstanding_Recv[1]) { waitForMPIReq(&mpiRequest_Recv[1]); reqOutstanding_Recv[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) { /* Statements in this Scop: S1098 */ for (int i0 = 0; (i0<=8); i0 += 1) { double* buffer_Recv_0_p1 = (&buffer_Recv[0][(i0*4)]); double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][(i0*24)]); int i1 = 1; for (; (i1<=3); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)-5)] = buffer_Recv_0_p1[(i1-1)]; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+1)] = buffer_Recv_0_p1[i1]; } for (; (i1<=4); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)-5)] = buffer_Recv_0_p1[(i1-1)]; } } } if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) { /* Statements in this Scop: S1099 */ for (int i0 = 0; (i0<=8); i0 += 1) { double* buffer_Recv_1_p1 = (&buffer_Recv[1][(i0*4)]); double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][(i0*24)]); int i1 = 4; for (; (i1<=6); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)-20)] = buffer_Recv_1_p1[(i1-4)]; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)-14)] = buffer_Recv_1_p1[(i1-3)]; } for (; (i1<=7); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)-20)] = buffer_Recv_1_p1[(i1-4)]; } } } } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Send[0]) { waitForMPIReq(&mpiRequest_Send[0]); reqOutstanding_Send[0] = false; } if (reqOutstanding_Send[1]) { waitForMPIReq(&mpiRequest_Send[1]); reqOutstanding_Send[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { ; ; } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][2]&&neighbor_isRemote[0][2])) { MPI_Isend(&fieldData_LaplaceCoeff_GMRF[0][13], 1, mpiDatatype_9_4_24, neighbor_remoteRank[0][2], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][2]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[2]); reqOutstanding_Send[2] = true; } if ((neighbor_isValid[0][3]&&neighbor_isRemote[0][3])) { MPI_Isend(&fieldData_LaplaceCoeff_GMRF[0][7], 1, mpiDatatype_9_4_24, neighbor_remoteRank[0][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]); reqOutstanding_Send[3] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][2]&&neighbor_isRemote[0][2])) { MPI_Irecv(&fieldData_LaplaceCoeff_GMRF[0][1], 1, mpiDatatype_9_4_24, neighbor_remoteRank[0][2], ((unsigned int)(neighbor_fragCommId[0][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]); reqOutstanding_Recv[2] = true; } if ((neighbor_isValid[0][3]&&neighbor_isRemote[0][3])) { MPI_Irecv(&fieldData_LaplaceCoeff_GMRF[0][19], 1, mpiDatatype_9_4_24, neighbor_remoteRank[0][3], ((unsigned int)(neighbor_fragCommId[0][3]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[3]); reqOutstanding_Recv[3] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Recv[2]) { waitForMPIReq(&mpiRequest_Recv[2]); reqOutstanding_Recv[2] = false; } if (reqOutstanding_Recv[3]) { waitForMPIReq(&mpiRequest_Recv[3]); reqOutstanding_Recv[3] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { ; ; } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Send[2]) { waitForMPIReq(&mpiRequest_Send[2]); reqOutstanding_Send[2] = false; } if (reqOutstanding_Send[3]) { waitForMPIReq(&mpiRequest_Send[3]); reqOutstanding_Send[3] = false; } } } }
int calc_gb_rad_still_sse2_double(t_commrec *cr, t_forcerec *fr, int natoms, gmx_localtop_t *top, const t_atomtypes *atype, double *x, t_nblist *nl, gmx_genborn_t *born) { int i,k,n,ii,is3,ii3,nj0,nj1,offset; int jnrA,jnrB,j3A,j3B; int *mdtype; double shX,shY,shZ; int *jjnr; double *shiftvec; double gpi_ai,gpi2; double factor; double *gb_radius; double *vsolv; double *work; double *dadx; __m128d ix,iy,iz; __m128d jx,jy,jz; __m128d dx,dy,dz; __m128d tx,ty,tz; __m128d rsq,rinv,rinv2,rinv4,rinv6; __m128d ratio,gpi,rai,raj,vai,vaj,rvdw; __m128d ccf,dccf,theta,cosq,term,sinq,res,prod,prod_ai,tmp; __m128d mask,icf4,icf6,mask_cmp; const __m128d half = _mm_set1_pd(0.5); const __m128d three = _mm_set1_pd(3.0); const __m128d one = _mm_set1_pd(1.0); const __m128d two = _mm_set1_pd(2.0); const __m128d zero = _mm_set1_pd(0.0); const __m128d four = _mm_set1_pd(4.0); const __m128d still_p5inv = _mm_set1_pd(STILL_P5INV); const __m128d still_pip5 = _mm_set1_pd(STILL_PIP5); const __m128d still_p4 = _mm_set1_pd(STILL_P4); factor = 0.5 * ONE_4PI_EPS0; gb_radius = born->gb_radius; vsolv = born->vsolv; work = born->gpol_still_work; jjnr = nl->jjnr; shiftvec = fr->shift_vec[0]; dadx = fr->dadx; jnrA = jnrB = 0; jx = _mm_setzero_pd(); jy = _mm_setzero_pd(); jz = _mm_setzero_pd(); n = 0; for(i=0;i<natoms;i++) { work[i]=0; } for(i=0;i<nl->nri;i++) { ii = nl->iinr[i]; ii3 = ii*3; is3 = 3*nl->shift[i]; shX = shiftvec[is3]; shY = shiftvec[is3+1]; shZ = shiftvec[is3+2]; nj0 = nl->jindex[i]; nj1 = nl->jindex[i+1]; ix = _mm_set1_pd(shX+x[ii3+0]); iy = _mm_set1_pd(shY+x[ii3+1]); iz = _mm_set1_pd(shZ+x[ii3+2]); /* Polarization energy for atom ai */ gpi = _mm_setzero_pd(); rai = _mm_load1_pd(gb_radius+ii); prod_ai = _mm_set1_pd(STILL_P4*vsolv[ii]); for(k=nj0;k<nj1-1;k+=2) { jnrA = jjnr[k]; jnrB = jjnr[k+1]; j3A = 3*jnrA; j3B = 3*jnrB; GMX_MM_LOAD_1RVEC_2POINTERS_PD(x+j3A,x+j3B,jx,jy,jz); GMX_MM_LOAD_2VALUES_PD(gb_radius+jnrA,gb_radius+jnrB,raj); GMX_MM_LOAD_2VALUES_PD(vsolv+jnrA,vsolv+jnrB,vaj); dx = _mm_sub_pd(ix,jx); dy = _mm_sub_pd(iy,jy); dz = _mm_sub_pd(iz,jz); rsq = gmx_mm_calc_rsq_pd(dx,dy,dz); rinv = gmx_mm_invsqrt_pd(rsq); rinv2 = _mm_mul_pd(rinv,rinv); rinv4 = _mm_mul_pd(rinv2,rinv2); rinv6 = _mm_mul_pd(rinv4,rinv2); rvdw = _mm_add_pd(rai,raj); ratio = _mm_mul_pd(rsq, gmx_mm_inv_pd( _mm_mul_pd(rvdw,rvdw))); mask_cmp = _mm_cmple_pd(ratio,still_p5inv); /* gmx_mm_sincos_pd() is quite expensive, so avoid calculating it if we can! */ if( 0 == _mm_movemask_pd(mask_cmp) ) { /* if ratio>still_p5inv for ALL elements */ ccf = one; dccf = _mm_setzero_pd(); } else { ratio = _mm_min_pd(ratio,still_p5inv); theta = _mm_mul_pd(ratio,still_pip5); gmx_mm_sincos_pd(theta,&sinq,&cosq); term = _mm_mul_pd(half,_mm_sub_pd(one,cosq)); ccf = _mm_mul_pd(term,term); dccf = _mm_mul_pd(_mm_mul_pd(two,term), _mm_mul_pd(sinq,theta)); } prod = _mm_mul_pd(still_p4,vaj); icf4 = _mm_mul_pd(ccf,rinv4); icf6 = _mm_mul_pd( _mm_sub_pd( _mm_mul_pd(four,ccf),dccf), rinv6); GMX_MM_INCREMENT_2VALUES_PD(work+jnrA,work+jnrB,_mm_mul_pd(prod_ai,icf4)); gpi = _mm_add_pd(gpi, _mm_mul_pd(prod,icf4) ); _mm_store_pd(dadx,_mm_mul_pd(prod,icf6)); dadx+=2; _mm_store_pd(dadx,_mm_mul_pd(prod_ai,icf6)); dadx+=2; } if(k<nj1) { jnrA = jjnr[k]; j3A = 3*jnrA; GMX_MM_LOAD_1RVEC_1POINTER_PD(x+j3A,jx,jy,jz); GMX_MM_LOAD_1VALUE_PD(gb_radius+jnrA,raj); GMX_MM_LOAD_1VALUE_PD(vsolv+jnrA,vaj); dx = _mm_sub_sd(ix,jx); dy = _mm_sub_sd(iy,jy); dz = _mm_sub_sd(iz,jz); rsq = gmx_mm_calc_rsq_pd(dx,dy,dz); rinv = gmx_mm_invsqrt_pd(rsq); rinv2 = _mm_mul_sd(rinv,rinv); rinv4 = _mm_mul_sd(rinv2,rinv2); rinv6 = _mm_mul_sd(rinv4,rinv2); rvdw = _mm_add_sd(rai,raj); ratio = _mm_mul_sd(rsq, gmx_mm_inv_pd( _mm_mul_pd(rvdw,rvdw))); mask_cmp = _mm_cmple_sd(ratio,still_p5inv); /* gmx_mm_sincos_pd() is quite expensive, so avoid calculating it if we can! */ if( 0 == _mm_movemask_pd(mask_cmp) ) { /* if ratio>still_p5inv for ALL elements */ ccf = one; dccf = _mm_setzero_pd(); } else { ratio = _mm_min_sd(ratio,still_p5inv); theta = _mm_mul_sd(ratio,still_pip5); gmx_mm_sincos_pd(theta,&sinq,&cosq); term = _mm_mul_sd(half,_mm_sub_sd(one,cosq)); ccf = _mm_mul_sd(term,term); dccf = _mm_mul_sd(_mm_mul_sd(two,term), _mm_mul_sd(sinq,theta)); } prod = _mm_mul_sd(still_p4,vaj); icf4 = _mm_mul_sd(ccf,rinv4); icf6 = _mm_mul_sd( _mm_sub_sd( _mm_mul_sd(four,ccf),dccf), rinv6); GMX_MM_INCREMENT_1VALUE_PD(work+jnrA,_mm_mul_sd(prod_ai,icf4)); gpi = _mm_add_sd(gpi, _mm_mul_sd(prod,icf4) ); _mm_store_pd(dadx,_mm_mul_pd(prod,icf6)); dadx+=2; _mm_store_pd(dadx,_mm_mul_pd(prod_ai,icf6)); dadx+=2; } gmx_mm_update_1pot_pd(gpi,work+ii); } /* Sum up the polarization energy from other nodes */ if(PARTDECOMP(cr)) { gmx_sum(natoms, work, cr); } else if(DOMAINDECOMP(cr)) { dd_atom_sum_real(cr->dd, work); } /* Compute the radii */ for(i=0;i<fr->natoms_force;i++) /* PELA born->nr */ { if(born->use[i] != 0) { gpi_ai = born->gpol[i] + work[i]; /* add gpi to the initial pol energy gpi_ai*/ gpi2 = gpi_ai * gpi_ai; born->bRad[i] = factor*gmx_invsqrt(gpi2); fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]); } } /* Extra (local) communication required for DD */ if(DOMAINDECOMP(cr)) { dd_atom_spread_real(cr->dd, born->bRad); dd_atom_spread_real(cr->dd, fr->invsqrta); } return 0; }
void exchlaplacecoeffData_2(unsigned int slot) { for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((!neighbor_isValid[1][0])) { { double xPos; double yPos; /* Statements in this Scop: S710, S704, S707, S701, S709, S700, S703, S706, S708, S702, S705 */ { { { { { { { { { { { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i1 = 1; for (; (i1<=4); i1 += 2) { fieldData_LaplaceCoeff_2_p1[((i1*8)+394)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[((i1*8)+402)] = 0.000000e+00; } for (; (i1<=5); i1 += 1) { fieldData_LaplaceCoeff_2_p1[((i1*8)+394)] = 0.000000e+00; } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i1 = 1; for (; (i1<=4); i1 += 2) { fieldData_LaplaceCoeff_2_p1[((i1*8)+226)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[((i1*8)+234)] = 0.000000e+00; } for (; (i1<=5); i1 += 1) { fieldData_LaplaceCoeff_2_p1[((i1*8)+226)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i1 = 1; for (; (i1<=4); i1 += 2) { fieldData_LaplaceCoeff_2_p1[((i1*8)+170)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[((i1*8)+178)] = 0.000000e+00; } for (; (i1<=5); i1 += 1) { fieldData_LaplaceCoeff_2_p1[((i1*8)+170)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i1 = 1; for (; (i1<=4); i1 += 2) { fieldData_LaplaceCoeff_2_p1[((i1*8)+58)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[((i1*8)+66)] = 0.000000e+00; } for (; (i1<=5); i1 += 1) { fieldData_LaplaceCoeff_2_p1[((i1*8)+58)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i1 = 1; for (; (i1<=4); i1 += 2) { fieldData_LaplaceCoeff_2_p1[((i1*8)+450)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[((i1*8)+458)] = 0.000000e+00; } for (; (i1<=5); i1 += 1) { fieldData_LaplaceCoeff_2_p1[((i1*8)+450)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i1 = 1; for (; (i1<=4); i1 += 2) { fieldData_LaplaceCoeff_2_p1[((i1*8)+114)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[((i1*8)+122)] = 0.000000e+00; } for (; (i1<=5); i1 += 1) { fieldData_LaplaceCoeff_2_p1[((i1*8)+114)] = 0.000000e+00; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { yPos = ((((i1-1)/4.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); } __m128d vec1 = _mm_set1_pd(1.000000e+00); __m128d vec2 = _mm_set1_pd(4.000000e+00); __m128d vec5 = _mm_set1_pd(yPos); for (; (i1<3); i1 += 4) { /* yPos = ((((i1-1)/4.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); */ __m128d vec0 = _mm_set_pd(i1+1,i1); __m128d vec0_2 = _mm_set_pd(i1+1,i1); __m128d vec3 = _mm_load1_pd((&posEnd[1])); __m128d vec3_2 = _mm_load1_pd((&posEnd[1])); __m128d vec4 = _mm_load1_pd((&posBegin[1])); __m128d vec4_2 = _mm_load1_pd((&posBegin[1])); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2); } for (; (i1<6); i1 += 1) { yPos = ((((i1-1)/4.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i1 = 1; for (; (i1<=4); i1 += 2) { fieldData_LaplaceCoeff_2_p1[((i1*8)+2)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[((i1*8)+10)] = 0.000000e+00; } for (; (i1<=5); i1 += 1) { fieldData_LaplaceCoeff_2_p1[((i1*8)+2)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i1 = 1; for (; (i1<=4); i1 += 2) { fieldData_LaplaceCoeff_2_p1[((i1*8)+338)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[((i1*8)+346)] = 0.000000e+00; } for (; (i1<=5); i1 += 1) { fieldData_LaplaceCoeff_2_p1[((i1*8)+338)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i1 = 1; for (; (i1<=4); i1 += 2) { fieldData_LaplaceCoeff_2_p1[((i1*8)+282)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[((i1*8)+290)] = 0.000000e+00; } for (; (i1<=5); i1 += 1) { fieldData_LaplaceCoeff_2_p1[((i1*8)+282)] = 0.000000e+00; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { xPos = posBegin[0]; } __m128d vec1 = _mm_set1_pd(xPos); for (; (i1<3); i1 += 4) { /* xPos = posBegin[0]; */ __m128d vec0 = _mm_load1_pd((&posBegin[0])); __m128d vec0_2 = _mm_load1_pd((&posBegin[0])); vec1 = vec0; vec1 = vec0_2; } for (; (i1<6); i1 += 1) { xPos = posBegin[0]; } } } } } if ((!neighbor_isValid[1][1])) { { double xPos; double yPos; /* Statements in this Scop: S716, S719, S713, S721, S715, S718, S712, S711, S720, S714, S717 */ { { { { { { { { { { { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { yPos = ((((i1-1)/4.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); } __m128d vec1 = _mm_set1_pd(1.000000e+00); __m128d vec2 = _mm_set1_pd(4.000000e+00); __m128d vec5 = _mm_set1_pd(yPos); for (; (i1<3); i1 += 4) { /* yPos = ((((i1-1)/4.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); */ __m128d vec0 = _mm_set_pd(i1+1,i1); __m128d vec0_2 = _mm_set_pd(i1+1,i1); __m128d vec3 = _mm_load1_pd((&posEnd[1])); __m128d vec3_2 = _mm_load1_pd((&posEnd[1])); __m128d vec4 = _mm_load1_pd((&posBegin[1])); __m128d vec4_2 = _mm_load1_pd((&posBegin[1])); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2); } for (; (i1<6); i1 += 1) { yPos = ((((i1-1)/4.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i1 = 1; for (; (i1<=4); i1 += 2) { fieldData_LaplaceCoeff_2_p1[((i1*8)+454)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[((i1*8)+462)] = 0.000000e+00; } for (; (i1<=5); i1 += 1) { fieldData_LaplaceCoeff_2_p1[((i1*8)+454)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i1 = 1; for (; (i1<=4); i1 += 2) { fieldData_LaplaceCoeff_2_p1[((i1*8)+230)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[((i1*8)+238)] = 0.000000e+00; } for (; (i1<=5); i1 += 1) { fieldData_LaplaceCoeff_2_p1[((i1*8)+230)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i1 = 1; for (; (i1<=4); i1 += 2) { fieldData_LaplaceCoeff_2_p1[((i1*8)+118)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[((i1*8)+126)] = 0.000000e+00; } for (; (i1<=5); i1 += 1) { fieldData_LaplaceCoeff_2_p1[((i1*8)+118)] = 0.000000e+00; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { xPos = posEnd[0]; } __m128d vec1 = _mm_set1_pd(xPos); for (; (i1<3); i1 += 4) { /* xPos = posEnd[0]; */ __m128d vec0 = _mm_load1_pd((&posEnd[0])); __m128d vec0_2 = _mm_load1_pd((&posEnd[0])); vec1 = vec0; vec1 = vec0_2; } for (; (i1<6); i1 += 1) { xPos = posEnd[0]; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i1 = 1; for (; (i1<=4); i1 += 2) { fieldData_LaplaceCoeff_2_p1[((i1*8)+286)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[((i1*8)+294)] = 0.000000e+00; } for (; (i1<=5); i1 += 1) { fieldData_LaplaceCoeff_2_p1[((i1*8)+286)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i1 = 1; for (; (i1<=4); i1 += 2) { fieldData_LaplaceCoeff_2_p1[((i1*8)+342)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[((i1*8)+350)] = 0.000000e+00; } for (; (i1<=5); i1 += 1) { fieldData_LaplaceCoeff_2_p1[((i1*8)+342)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i1 = 1; for (; (i1<=4); i1 += 2) { fieldData_LaplaceCoeff_2_p1[((i1*8)+398)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[((i1*8)+406)] = 0.000000e+00; } for (; (i1<=5); i1 += 1) { fieldData_LaplaceCoeff_2_p1[((i1*8)+398)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i1 = 1; for (; (i1<=4); i1 += 2) { fieldData_LaplaceCoeff_2_p1[((i1*8)+174)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[((i1*8)+182)] = 0.000000e+00; } for (; (i1<=5); i1 += 1) { fieldData_LaplaceCoeff_2_p1[((i1*8)+174)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i1 = 1; for (; (i1<=4); i1 += 2) { fieldData_LaplaceCoeff_2_p1[((i1*8)+62)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[((i1*8)+70)] = 0.000000e+00; } for (; (i1<=5); i1 += 1) { fieldData_LaplaceCoeff_2_p1[((i1*8)+62)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i1 = 1; for (; (i1<=4); i1 += 2) { fieldData_LaplaceCoeff_2_p1[((i1*8)+6)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[((i1*8)+14)] = 0.000000e+00; } for (; (i1<=5); i1 += 1) { fieldData_LaplaceCoeff_2_p1[((i1*8)+6)] = 0.000000e+00; } } } } } if ((!neighbor_isValid[1][2])) { { double xPos; double yPos; /* Statements in this Scop: S722, S731, S725, S728, S727, S730, S724, S732, S726, S729, S723 */ { { { { { { { { { { { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i2 = 2; for (; (i2<=5); i2 += 2) { fieldData_LaplaceCoeff_2_p1[(i2+344)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[(i2+345)] = 0.000000e+00; } for (; (i2<=6); i2 += 1) { fieldData_LaplaceCoeff_2_p1[(i2+344)] = 0.000000e+00; } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i2 = 2; for (; (i2<=5); i2 += 2) { fieldData_LaplaceCoeff_2_p1[(i2+400)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[(i2+401)] = 0.000000e+00; } for (; (i2<=6); i2 += 1) { fieldData_LaplaceCoeff_2_p1[(i2+400)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i2 = 2; for (; (i2<=5); i2 += 2) { fieldData_LaplaceCoeff_2_p1[(i2+120)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[(i2+121)] = 0.000000e+00; } for (; (i2<=6); i2 += 1) { fieldData_LaplaceCoeff_2_p1[(i2+120)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i2 = 2; for (; (i2<=5); i2 += 2) { fieldData_LaplaceCoeff_2_p1[(i2+8)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[(i2+9)] = 0.000000e+00; } for (; (i2<=6); i2 += 1) { fieldData_LaplaceCoeff_2_p1[(i2+8)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i2 = 2; for (; (i2<=5); i2 += 2) { fieldData_LaplaceCoeff_2_p1[(i2+64)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[(i2+65)] = 0.000000e+00; } for (; (i2<=6); i2 += 1) { fieldData_LaplaceCoeff_2_p1[(i2+64)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i2 = 2; for (; (i2<=5); i2 += 2) { fieldData_LaplaceCoeff_2_p1[(i2+456)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[(i2+457)] = 0.000000e+00; } for (; (i2<=6); i2 += 1) { fieldData_LaplaceCoeff_2_p1[(i2+456)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i2 = 2; for (; (i2<=5); i2 += 2) { fieldData_LaplaceCoeff_2_p1[(i2+232)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[(i2+233)] = 0.000000e+00; } for (; (i2<=6); i2 += 1) { fieldData_LaplaceCoeff_2_p1[(i2+232)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i2 = 2; for (; (i2<=5); i2 += 2) { fieldData_LaplaceCoeff_2_p1[(i2+288)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[(i2+289)] = 0.000000e+00; } for (; (i2<=6); i2 += 1) { fieldData_LaplaceCoeff_2_p1[(i2+288)] = 0.000000e+00; } } } { int i2 = 2; for (; (i2<=5); i2 += 2) { xPos = ((((i2-2)/4.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); xPos = ((((i2-1)/4.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); } for (; (i2<=6); i2 += 1) { xPos = ((((i2-2)/4.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i2 = 2; for (; (i2<=5); i2 += 2) { fieldData_LaplaceCoeff_2_p1[(i2+176)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[(i2+177)] = 0.000000e+00; } for (; (i2<=6); i2 += 1) { fieldData_LaplaceCoeff_2_p1[(i2+176)] = 0.000000e+00; } } } { int i2 = 2; for (; (i2<=5); i2 += 2) { yPos = posBegin[1]; yPos = posBegin[1]; } for (; (i2<=6); i2 += 1) { yPos = posBegin[1]; } } } } } if ((!neighbor_isValid[1][3])) { { double xPos; double yPos; /* Statements in this Scop: S743, S737, S733, S742, S736, S739, S738, S741, S735, S740, S734 */ { { { { { { { { { { { int i2 = 2; for (; (i2<=5); i2 += 2) { yPos = posEnd[1]; yPos = posEnd[1]; } for (; (i2<=6); i2 += 1) { yPos = posEnd[1]; } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i2 = 2; for (; (i2<=5); i2 += 2) { fieldData_LaplaceCoeff_2_p1[(i2+376)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[(i2+377)] = 0.000000e+00; } for (; (i2<=6); i2 += 1) { fieldData_LaplaceCoeff_2_p1[(i2+376)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i2 = 2; for (; (i2<=5); i2 += 2) { fieldData_LaplaceCoeff_2_p1[(i2+488)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[(i2+489)] = 0.000000e+00; } for (; (i2<=6); i2 += 1) { fieldData_LaplaceCoeff_2_p1[(i2+488)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i2 = 2; for (; (i2<=5); i2 += 2) { fieldData_LaplaceCoeff_2_p1[(i2+40)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[(i2+41)] = 0.000000e+00; } for (; (i2<=6); i2 += 1) { fieldData_LaplaceCoeff_2_p1[(i2+40)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i2 = 2; for (; (i2<=5); i2 += 2) { fieldData_LaplaceCoeff_2_p1[(i2+208)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[(i2+209)] = 0.000000e+00; } for (; (i2<=6); i2 += 1) { fieldData_LaplaceCoeff_2_p1[(i2+208)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i2 = 2; for (; (i2<=5); i2 += 2) { fieldData_LaplaceCoeff_2_p1[(i2+152)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[(i2+153)] = 0.000000e+00; } for (; (i2<=6); i2 += 1) { fieldData_LaplaceCoeff_2_p1[(i2+152)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i2 = 2; for (; (i2<=5); i2 += 2) { fieldData_LaplaceCoeff_2_p1[(i2+320)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[(i2+321)] = 0.000000e+00; } for (; (i2<=6); i2 += 1) { fieldData_LaplaceCoeff_2_p1[(i2+320)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i2 = 2; for (; (i2<=5); i2 += 2) { fieldData_LaplaceCoeff_2_p1[(i2+432)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[(i2+433)] = 0.000000e+00; } for (; (i2<=6); i2 += 1) { fieldData_LaplaceCoeff_2_p1[(i2+432)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i2 = 2; for (; (i2<=5); i2 += 2) { fieldData_LaplaceCoeff_2_p1[(i2+96)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[(i2+97)] = 0.000000e+00; } for (; (i2<=6); i2 += 1) { fieldData_LaplaceCoeff_2_p1[(i2+96)] = 0.000000e+00; } } } { int i2 = 2; for (; (i2<=5); i2 += 2) { xPos = ((((i2-2)/4.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); xPos = ((((i2-1)/4.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); } for (; (i2<=6); i2 += 1) { xPos = ((((i2-2)/4.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i2 = 2; for (; (i2<=5); i2 += 2) { fieldData_LaplaceCoeff_2_p1[(i2+264)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[(i2+265)] = 0.000000e+00; } for (; (i2<=6); i2 += 1) { fieldData_LaplaceCoeff_2_p1[(i2+264)] = 0.000000e+00; } } } } } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) { /* Statements in this Scop: S744 */ for (int i0 = 0; (i0<=8); i0 += 1) { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][(i0*56)]); double* buffer_Send_1_p1 = (&buffer_Send[1][(i0*5)]); int i1 = 1; for (; (i1<=4); i1 += 2) { buffer_Send_1_p1[(i1-1)] = fieldData_LaplaceCoeff_2_p1[((i1*8)+6)]; buffer_Send_1_p1[i1] = fieldData_LaplaceCoeff_2_p1[((i1*8)+14)]; } for (; (i1<=5); i1 += 1) { buffer_Send_1_p1[(i1-1)] = fieldData_LaplaceCoeff_2_p1[((i1*8)+6)]; } } } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) { MPI_Isend(buffer_Send[1], 45, MPI_DOUBLE, neighbor_remoteRank[1][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]); reqOutstanding_Send[1] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) { MPI_Irecv(buffer_Recv[0], 45, MPI_DOUBLE, neighbor_remoteRank[1][0], ((unsigned int)(neighbor_fragCommId[1][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]); reqOutstanding_Recv[0] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Recv[0]) { waitForMPIReq(&mpiRequest_Recv[0]); reqOutstanding_Recv[0] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) { /* Statements in this Scop: S745 */ for (int i0 = 0; (i0<=8); i0 += 1) { double* buffer_Recv_0_p1 = (&buffer_Recv[0][(i0*5)]); double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][(i0*56)]); int i1 = 3; for (; (i1<=6); i1 += 2) { fieldData_LaplaceCoeff_2_p1[((i1*8)-14)] = buffer_Recv_0_p1[(i1-3)]; fieldData_LaplaceCoeff_2_p1[((i1*8)-6)] = buffer_Recv_0_p1[(i1-2)]; } for (; (i1<=7); i1 += 1) { fieldData_LaplaceCoeff_2_p1[((i1*8)-14)] = buffer_Recv_0_p1[(i1-3)]; } } } } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Send[1]) { waitForMPIReq(&mpiRequest_Send[1]); reqOutstanding_Send[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { ; } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][3]&&neighbor_isRemote[1][3])) { MPI_Isend(&fieldData_LaplaceCoeff[2][42], 1, mpiDatatype_9_5_56, neighbor_remoteRank[1][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]); reqOutstanding_Send[3] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][2]&&neighbor_isRemote[1][2])) { MPI_Irecv(&fieldData_LaplaceCoeff[2][10], 1, mpiDatatype_9_5_56, neighbor_remoteRank[1][2], ((unsigned int)(neighbor_fragCommId[1][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]); reqOutstanding_Recv[2] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Recv[2]) { waitForMPIReq(&mpiRequest_Recv[2]); reqOutstanding_Recv[2] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { ; } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Send[3]) { waitForMPIReq(&mpiRequest_Send[3]); reqOutstanding_Send[3] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) { /* Statements in this Scop: S746 */ for (int i0 = 0; (i0<=8); i0 += 1) { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][(i0*56)]); double* buffer_Send_0_p1 = (&buffer_Send[0][(i0*7)]); int i1 = 0; for (; (i1<=5); i1 += 2) { buffer_Send_0_p1[i1] = fieldData_LaplaceCoeff_2_p1[((i1*8)+3)]; buffer_Send_0_p1[(i1+1)] = fieldData_LaplaceCoeff_2_p1[((i1*8)+11)]; } for (; (i1<=6); i1 += 1) { buffer_Send_0_p1[i1] = fieldData_LaplaceCoeff_2_p1[((i1*8)+3)]; } } } if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) { /* Statements in this Scop: S747 */ for (int i0 = 0; (i0<=8); i0 += 1) { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][(i0*56)]); double* buffer_Send_1_p1 = (&buffer_Send[1][(i0*7)]); int i1 = 0; for (; (i1<=5); i1 += 2) { buffer_Send_1_p1[i1] = fieldData_LaplaceCoeff_2_p1[((i1*8)+5)]; buffer_Send_1_p1[(i1+1)] = fieldData_LaplaceCoeff_2_p1[((i1*8)+13)]; } for (; (i1<=6); i1 += 1) { buffer_Send_1_p1[i1] = fieldData_LaplaceCoeff_2_p1[((i1*8)+5)]; } } } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) { MPI_Isend(buffer_Send[0], 63, MPI_DOUBLE, neighbor_remoteRank[1][0], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][0]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[0]); reqOutstanding_Send[0] = true; } if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) { MPI_Isend(buffer_Send[1], 63, MPI_DOUBLE, neighbor_remoteRank[1][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]); reqOutstanding_Send[1] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) { MPI_Irecv(buffer_Recv[0], 63, MPI_DOUBLE, neighbor_remoteRank[1][0], ((unsigned int)(neighbor_fragCommId[1][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]); reqOutstanding_Recv[0] = true; } if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) { MPI_Irecv(buffer_Recv[1], 63, MPI_DOUBLE, neighbor_remoteRank[1][1], ((unsigned int)(neighbor_fragCommId[1][1]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[1]); reqOutstanding_Recv[1] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Recv[0]) { waitForMPIReq(&mpiRequest_Recv[0]); reqOutstanding_Recv[0] = false; } if (reqOutstanding_Recv[1]) { waitForMPIReq(&mpiRequest_Recv[1]); reqOutstanding_Recv[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) { /* Statements in this Scop: S748 */ for (int i0 = 0; (i0<=8); i0 += 1) { double* buffer_Recv_0_p1 = (&buffer_Recv[0][(i0*7)]); double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][(i0*56)]); int i1 = 1; for (; (i1<=6); i1 += 2) { fieldData_LaplaceCoeff_2_p1[((i1*8)-7)] = buffer_Recv_0_p1[(i1-1)]; fieldData_LaplaceCoeff_2_p1[((i1*8)+1)] = buffer_Recv_0_p1[i1]; } for (; (i1<=7); i1 += 1) { fieldData_LaplaceCoeff_2_p1[((i1*8)-7)] = buffer_Recv_0_p1[(i1-1)]; } } } if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) { /* Statements in this Scop: S749 */ for (int i0 = 0; (i0<=8); i0 += 1) { double* buffer_Recv_1_p1 = (&buffer_Recv[1][(i0*7)]); double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][(i0*56)]); int i1 = 7; for (; (i1<=12); i1 += 2) { fieldData_LaplaceCoeff_2_p1[((i1*8)-49)] = buffer_Recv_1_p1[(i1-7)]; fieldData_LaplaceCoeff_2_p1[((i1*8)-41)] = buffer_Recv_1_p1[(i1-6)]; } for (; (i1<=13); i1 += 1) { fieldData_LaplaceCoeff_2_p1[((i1*8)-49)] = buffer_Recv_1_p1[(i1-7)]; } } } } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Send[0]) { waitForMPIReq(&mpiRequest_Send[0]); reqOutstanding_Send[0] = false; } if (reqOutstanding_Send[1]) { waitForMPIReq(&mpiRequest_Send[1]); reqOutstanding_Send[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { ; ; } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][2]&&neighbor_isRemote[1][2])) { MPI_Isend(&fieldData_LaplaceCoeff[2][17], 1, mpiDatatype_9_7_56, neighbor_remoteRank[1][2], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][2]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[2]); reqOutstanding_Send[2] = true; } if ((neighbor_isValid[1][3]&&neighbor_isRemote[1][3])) { MPI_Isend(&fieldData_LaplaceCoeff[2][33], 1, mpiDatatype_9_7_56, neighbor_remoteRank[1][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]); reqOutstanding_Send[3] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][2]&&neighbor_isRemote[1][2])) { MPI_Irecv(&fieldData_LaplaceCoeff[2][1], 1, mpiDatatype_9_7_56, neighbor_remoteRank[1][2], ((unsigned int)(neighbor_fragCommId[1][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]); reqOutstanding_Recv[2] = true; } if ((neighbor_isValid[1][3]&&neighbor_isRemote[1][3])) { MPI_Irecv(&fieldData_LaplaceCoeff[2][49], 1, mpiDatatype_9_7_56, neighbor_remoteRank[1][3], ((unsigned int)(neighbor_fragCommId[1][3]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[3]); reqOutstanding_Recv[3] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Recv[2]) { waitForMPIReq(&mpiRequest_Recv[2]); reqOutstanding_Recv[2] = false; } if (reqOutstanding_Recv[3]) { waitForMPIReq(&mpiRequest_Recv[3]); reqOutstanding_Recv[3] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { ; ; } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Send[2]) { waitForMPIReq(&mpiRequest_Send[2]); reqOutstanding_Send[2] = false; } if (reqOutstanding_Send[3]) { waitForMPIReq(&mpiRequest_Send[3]); reqOutstanding_Send[3] = false; } } } }
void exchsolutionData_2(unsigned int slot) { for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((!neighbor_isValid[1][0])) { { double xPos; double yPos; /* Statements in this Scop: S397, S396, S398 */ { { { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { yPos = ((((i1-1)/4.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); } __m128d vec1 = _mm_set1_pd(1.000000e+00); __m128d vec2 = _mm_set1_pd(4.000000e+00); __m128d vec5 = _mm_set1_pd(yPos); for (; (i1<3); i1 += 4) { /* yPos = ((((i1-1)/4.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); */ __m128d vec0 = _mm_set_pd(i1+1,i1); __m128d vec0_2 = _mm_set_pd(i1+1,i1); __m128d vec3 = _mm_load1_pd((&posEnd[1])); __m128d vec3_2 = _mm_load1_pd((&posEnd[1])); __m128d vec4 = _mm_load1_pd((&posBegin[1])); __m128d vec4_2 = _mm_load1_pd((&posBegin[1])); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2); } for (; (i1<6); i1 += 1) { yPos = ((((i1-1)/4.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); } } { double* fieldData_Solution_2_p1 = (&fieldData_Solution[2][0]); int i1 = 1; for (; (i1<=4); i1 += 2) { fieldData_Solution_2_p1[((i1*8)+2)] = 0.000000e+00; fieldData_Solution_2_p1[((i1*8)+10)] = 0.000000e+00; } for (; (i1<=5); i1 += 1) { fieldData_Solution_2_p1[((i1*8)+2)] = 0.000000e+00; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { xPos = posBegin[0]; } __m128d vec1 = _mm_set1_pd(xPos); for (; (i1<3); i1 += 4) { /* xPos = posBegin[0]; */ __m128d vec0 = _mm_load1_pd((&posBegin[0])); __m128d vec0_2 = _mm_load1_pd((&posBegin[0])); vec1 = vec0; vec1 = vec0_2; } for (; (i1<6); i1 += 1) { xPos = posBegin[0]; } } } } } if ((!neighbor_isValid[1][1])) { { double xPos; double yPos; /* Statements in this Scop: S401, S400, S399 */ { { { double* fieldData_Solution_2_p1 = (&fieldData_Solution[2][0]); int i1 = 1; for (; (i1<=4); i1 += 2) { fieldData_Solution_2_p1[((i1*8)+6)] = 0.000000e+00; fieldData_Solution_2_p1[((i1*8)+14)] = 0.000000e+00; } for (; (i1<=5); i1 += 1) { fieldData_Solution_2_p1[((i1*8)+6)] = 0.000000e+00; } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { xPos = posEnd[0]; } __m128d vec1 = _mm_set1_pd(xPos); for (; (i1<3); i1 += 4) { /* xPos = posEnd[0]; */ __m128d vec0 = _mm_load1_pd((&posEnd[0])); __m128d vec0_2 = _mm_load1_pd((&posEnd[0])); vec1 = vec0; vec1 = vec0_2; } for (; (i1<6); i1 += 1) { xPos = posEnd[0]; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { yPos = ((((i1-1)/4.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); } __m128d vec1 = _mm_set1_pd(1.000000e+00); __m128d vec2 = _mm_set1_pd(4.000000e+00); __m128d vec5 = _mm_set1_pd(yPos); for (; (i1<3); i1 += 4) { /* yPos = ((((i1-1)/4.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); */ __m128d vec0 = _mm_set_pd(i1+1,i1); __m128d vec0_2 = _mm_set_pd(i1+1,i1); __m128d vec3 = _mm_load1_pd((&posEnd[1])); __m128d vec3_2 = _mm_load1_pd((&posEnd[1])); __m128d vec4 = _mm_load1_pd((&posBegin[1])); __m128d vec4_2 = _mm_load1_pd((&posBegin[1])); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2); } for (; (i1<6); i1 += 1) { yPos = ((((i1-1)/4.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); } } } } } if ((!neighbor_isValid[1][2])) { { double xPos; double yPos; /* Statements in this Scop: S404, S403, S402 */ { { { double* fieldData_Solution_2_p1 = (&fieldData_Solution[2][0]); int i2 = 2; for (; (i2<=5); i2 += 2) { fieldData_Solution_2_p1[(i2+8)] = 0.000000e+00; fieldData_Solution_2_p1[(i2+9)] = 0.000000e+00; } for (; (i2<=6); i2 += 1) { fieldData_Solution_2_p1[(i2+8)] = 0.000000e+00; } } { int i2 = 2; for (; (i2<=5); i2 += 2) { xPos = ((((i2-2)/4.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); xPos = ((((i2-1)/4.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); } for (; (i2<=6); i2 += 1) { xPos = ((((i2-2)/4.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); } } } { int i2 = 2; for (; (i2<=5); i2 += 2) { yPos = posBegin[1]; yPos = posBegin[1]; } for (; (i2<=6); i2 += 1) { yPos = posBegin[1]; } } } } } if ((!neighbor_isValid[1][3])) { { double xPos; double yPos; /* Statements in this Scop: S407, S406, S405 */ { { { double* fieldData_Solution_2_p1 = (&fieldData_Solution[2][0]); int i2 = 2; for (; (i2<=5); i2 += 2) { fieldData_Solution_2_p1[(i2+40)] = 0.000000e+00; fieldData_Solution_2_p1[(i2+41)] = 0.000000e+00; } for (; (i2<=6); i2 += 1) { fieldData_Solution_2_p1[(i2+40)] = 0.000000e+00; } } { int i2 = 2; for (; (i2<=5); i2 += 2) { xPos = ((((i2-2)/4.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); xPos = ((((i2-1)/4.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); } for (; (i2<=6); i2 += 1) { xPos = ((((i2-2)/4.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); } } } { int i2 = 2; for (; (i2<=5); i2 += 2) { yPos = posEnd[1]; yPos = posEnd[1]; } for (; (i2<=6); i2 += 1) { yPos = posEnd[1]; } } } } } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { ; } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) { MPI_Isend(&fieldData_Solution[2][14], 1, mpiDatatype_5_1_8, neighbor_remoteRank[1][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]); reqOutstanding_Send[1] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) { MPI_Irecv(&fieldData_Solution[2][10], 1, mpiDatatype_5_1_8, neighbor_remoteRank[1][0], ((unsigned int)(neighbor_fragCommId[1][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]); reqOutstanding_Recv[0] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Recv[0]) { waitForMPIReq(&mpiRequest_Recv[0]); reqOutstanding_Recv[0] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { ; } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Send[1]) { waitForMPIReq(&mpiRequest_Send[1]); reqOutstanding_Send[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { ; } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][3]&&neighbor_isRemote[1][3])) { MPI_Isend(&fieldData_Solution[2][42], 1, mpiDatatype_1_5_8, neighbor_remoteRank[1][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]); reqOutstanding_Send[3] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][2]&&neighbor_isRemote[1][2])) { MPI_Irecv(&fieldData_Solution[2][10], 1, mpiDatatype_1_5_8, neighbor_remoteRank[1][2], ((unsigned int)(neighbor_fragCommId[1][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]); reqOutstanding_Recv[2] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Recv[2]) { waitForMPIReq(&mpiRequest_Recv[2]); reqOutstanding_Recv[2] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { ; } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Send[3]) { waitForMPIReq(&mpiRequest_Send[3]); reqOutstanding_Send[3] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { ; ; } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) { MPI_Isend(&fieldData_Solution[2][3], 1, mpiDatatype_7_1_8, neighbor_remoteRank[1][0], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][0]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[0]); reqOutstanding_Send[0] = true; } if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) { MPI_Isend(&fieldData_Solution[2][5], 1, mpiDatatype_7_1_8, neighbor_remoteRank[1][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]); reqOutstanding_Send[1] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) { MPI_Irecv(&fieldData_Solution[2][1], 1, mpiDatatype_7_1_8, neighbor_remoteRank[1][0], ((unsigned int)(neighbor_fragCommId[1][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]); reqOutstanding_Recv[0] = true; } if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) { MPI_Irecv(&fieldData_Solution[2][7], 1, mpiDatatype_7_1_8, neighbor_remoteRank[1][1], ((unsigned int)(neighbor_fragCommId[1][1]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[1]); reqOutstanding_Recv[1] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Recv[0]) { waitForMPIReq(&mpiRequest_Recv[0]); reqOutstanding_Recv[0] = false; } if (reqOutstanding_Recv[1]) { waitForMPIReq(&mpiRequest_Recv[1]); reqOutstanding_Recv[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { ; ; } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Send[0]) { waitForMPIReq(&mpiRequest_Send[0]); reqOutstanding_Send[0] = false; } if (reqOutstanding_Send[1]) { waitForMPIReq(&mpiRequest_Send[1]); reqOutstanding_Send[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { ; ; } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][2]&&neighbor_isRemote[1][2])) { MPI_Isend(&fieldData_Solution[2][17], 1, mpiDatatype_1_7_8, neighbor_remoteRank[1][2], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][2]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[2]); reqOutstanding_Send[2] = true; } if ((neighbor_isValid[1][3]&&neighbor_isRemote[1][3])) { MPI_Isend(&fieldData_Solution[2][33], 1, mpiDatatype_1_7_8, neighbor_remoteRank[1][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]); reqOutstanding_Send[3] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][2]&&neighbor_isRemote[1][2])) { MPI_Irecv(&fieldData_Solution[2][1], 1, mpiDatatype_1_7_8, neighbor_remoteRank[1][2], ((unsigned int)(neighbor_fragCommId[1][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]); reqOutstanding_Recv[2] = true; } if ((neighbor_isValid[1][3]&&neighbor_isRemote[1][3])) { MPI_Irecv(&fieldData_Solution[2][49], 1, mpiDatatype_1_7_8, neighbor_remoteRank[1][3], ((unsigned int)(neighbor_fragCommId[1][3]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[3]); reqOutstanding_Recv[3] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Recv[2]) { waitForMPIReq(&mpiRequest_Recv[2]); reqOutstanding_Recv[2] = false; } if (reqOutstanding_Recv[3]) { waitForMPIReq(&mpiRequest_Recv[3]); reqOutstanding_Recv[3] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { ; ; } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Send[2]) { waitForMPIReq(&mpiRequest_Send[2]); reqOutstanding_Send[2] = false; } if (reqOutstanding_Send[3]) { waitForMPIReq(&mpiRequest_Send[3]); reqOutstanding_Send[3] = false; } } } }
inline double lanczos13m53::lanczos_sum_expG_scaled<double>(const double& x) { static const ALIGN16 double coeff[26] = { static_cast<double>(0.006061842346248906525783753964555936883222L), static_cast<double>(1u), static_cast<double>(0.5098416655656676188125178644804694509993L), static_cast<double>(66u), static_cast<double>(19.51992788247617482847860966235652136208L), static_cast<double>(1925u), static_cast<double>(449.9445569063168119446858607650988409623L), static_cast<double>(32670u), static_cast<double>(6955.999602515376140356310115515198987526L), static_cast<double>(357423u), static_cast<double>(75999.29304014542649875303443598909137092L), static_cast<double>(2637558u), static_cast<double>(601859.6171681098786670226533699352302507L), static_cast<double>(13339535u), static_cast<double>(3481712.15498064590882071018964774556468L), static_cast<double>(45995730u), static_cast<double>(14605578.08768506808414169982791359218571L), static_cast<double>(105258076u), static_cast<double>(43338889.32467613834773723740590533316085L), static_cast<double>(150917976u), static_cast<double>(86363131.28813859145546927288977868422342L), static_cast<double>(120543840u), static_cast<double>(103794043.1163445451906271053616070238554L), static_cast<double>(39916800u), static_cast<double>(56906521.91347156388090791033559122686859L), static_cast<double>(0u) }; register __m128d vx = _mm_load1_pd(&x); register __m128d sum_even = _mm_load_pd(coeff); register __m128d sum_odd = _mm_load_pd(coeff+2); register __m128d nc_odd, nc_even; register __m128d vx2 = _mm_mul_pd(vx, vx); sum_even = _mm_mul_pd(sum_even, vx2); nc_even = _mm_load_pd(coeff + 4); sum_odd = _mm_mul_pd(sum_odd, vx2); nc_odd = _mm_load_pd(coeff + 6); sum_even = _mm_add_pd(sum_even, nc_even); sum_odd = _mm_add_pd(sum_odd, nc_odd); sum_even = _mm_mul_pd(sum_even, vx2); nc_even = _mm_load_pd(coeff + 8); sum_odd = _mm_mul_pd(sum_odd, vx2); nc_odd = _mm_load_pd(coeff + 10); sum_even = _mm_add_pd(sum_even, nc_even); sum_odd = _mm_add_pd(sum_odd, nc_odd); sum_even = _mm_mul_pd(sum_even, vx2); nc_even = _mm_load_pd(coeff + 12); sum_odd = _mm_mul_pd(sum_odd, vx2); nc_odd = _mm_load_pd(coeff + 14); sum_even = _mm_add_pd(sum_even, nc_even); sum_odd = _mm_add_pd(sum_odd, nc_odd); sum_even = _mm_mul_pd(sum_even, vx2); nc_even = _mm_load_pd(coeff + 16); sum_odd = _mm_mul_pd(sum_odd, vx2); nc_odd = _mm_load_pd(coeff + 18); sum_even = _mm_add_pd(sum_even, nc_even); sum_odd = _mm_add_pd(sum_odd, nc_odd); sum_even = _mm_mul_pd(sum_even, vx2); nc_even = _mm_load_pd(coeff + 20); sum_odd = _mm_mul_pd(sum_odd, vx2); nc_odd = _mm_load_pd(coeff + 22); sum_even = _mm_add_pd(sum_even, nc_even); sum_odd = _mm_add_pd(sum_odd, nc_odd); sum_even = _mm_mul_pd(sum_even, vx2); nc_even = _mm_load_pd(coeff + 24); sum_odd = _mm_mul_pd(sum_odd, vx); sum_even = _mm_add_pd(sum_even, nc_even); sum_even = _mm_add_pd(sum_even, sum_odd); double ALIGN16 t[2]; _mm_store_pd(t, sum_even); return t[0] / t[1]; }
void exchlaplacecoeffData_6(unsigned int slot) { for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((!neighbor_isValid[1][0])) { { double xPos; double yPos; /* Statements in this Scop: S902, S905, S908, S907, S901, S910, S904, S903, S906, S909, S900 */ { { { { { { { { { { { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i1 = 1; for (; (i1<=64); i1 += 2) { fieldData_LaplaceCoeff_6_p1[((i1*68)+4558)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[((i1*68)+4626)] = 0.000000e+00; } for (; (i1<=65); i1 += 1) { fieldData_LaplaceCoeff_6_p1[((i1*68)+4558)] = 0.000000e+00; } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { yPos = ((((i1-1)/6.400000e+01)*(posEnd[1]-posBegin[1]))+posBegin[1]); } __m128d vec1 = _mm_set1_pd(1.000000e+00); __m128d vec2 = _mm_set1_pd(6.400000e+01); __m128d vec5 = _mm_set1_pd(yPos); for (; (i1<63); i1 += 4) { /* yPos = ((((i1-1)/6.400000e+01)*(posEnd[1]-posBegin[1]))+posBegin[1]); */ __m128d vec0 = _mm_set_pd(i1+1,i1); __m128d vec0_2 = _mm_set_pd(i1+1,i1); __m128d vec3 = _mm_load1_pd((&posEnd[1])); __m128d vec3_2 = _mm_load1_pd((&posEnd[1])); __m128d vec4 = _mm_load1_pd((&posBegin[1])); __m128d vec4_2 = _mm_load1_pd((&posBegin[1])); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2); } for (; (i1<66); i1 += 1) { yPos = ((((i1-1)/6.400000e+01)*(posEnd[1]-posBegin[1]))+posBegin[1]); } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i1 = 1; for (; (i1<=64); i1 += 2) { fieldData_LaplaceCoeff_6_p1[((i1*68)+9114)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[((i1*68)+9182)] = 0.000000e+00; } for (; (i1<=65); i1 += 1) { fieldData_LaplaceCoeff_6_p1[((i1*68)+9114)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i1 = 1; for (; (i1<=64); i1 += 2) { fieldData_LaplaceCoeff_6_p1[((i1*68)+2)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[((i1*68)+70)] = 0.000000e+00; } for (; (i1<=65); i1 += 1) { fieldData_LaplaceCoeff_6_p1[((i1*68)+2)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i1 = 1; for (; (i1<=64); i1 += 2) { fieldData_LaplaceCoeff_6_p1[((i1*68)+31894)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[((i1*68)+31962)] = 0.000000e+00; } for (; (i1<=65); i1 += 1) { fieldData_LaplaceCoeff_6_p1[((i1*68)+31894)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i1 = 1; for (; (i1<=64); i1 += 2) { fieldData_LaplaceCoeff_6_p1[((i1*68)+36450)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[((i1*68)+36518)] = 0.000000e+00; } for (; (i1<=65); i1 += 1) { fieldData_LaplaceCoeff_6_p1[((i1*68)+36450)] = 0.000000e+00; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { xPos = posBegin[0]; } __m128d vec1 = _mm_set1_pd(xPos); for (; (i1<63); i1 += 4) { /* xPos = posBegin[0]; */ __m128d vec0 = _mm_load1_pd((&posBegin[0])); __m128d vec0_2 = _mm_load1_pd((&posBegin[0])); vec1 = vec0; vec1 = vec0_2; } for (; (i1<66); i1 += 1) { xPos = posBegin[0]; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i1 = 1; for (; (i1<=64); i1 += 2) { fieldData_LaplaceCoeff_6_p1[((i1*68)+18226)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[((i1*68)+18294)] = 0.000000e+00; } for (; (i1<=65); i1 += 1) { fieldData_LaplaceCoeff_6_p1[((i1*68)+18226)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i1 = 1; for (; (i1<=64); i1 += 2) { fieldData_LaplaceCoeff_6_p1[((i1*68)+13670)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[((i1*68)+13738)] = 0.000000e+00; } for (; (i1<=65); i1 += 1) { fieldData_LaplaceCoeff_6_p1[((i1*68)+13670)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i1 = 1; for (; (i1<=64); i1 += 2) { fieldData_LaplaceCoeff_6_p1[((i1*68)+27338)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[((i1*68)+27406)] = 0.000000e+00; } for (; (i1<=65); i1 += 1) { fieldData_LaplaceCoeff_6_p1[((i1*68)+27338)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i1 = 1; for (; (i1<=64); i1 += 2) { fieldData_LaplaceCoeff_6_p1[((i1*68)+22782)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[((i1*68)+22850)] = 0.000000e+00; } for (; (i1<=65); i1 += 1) { fieldData_LaplaceCoeff_6_p1[((i1*68)+22782)] = 0.000000e+00; } } } } } if ((!neighbor_isValid[1][1])) { { double xPos; double yPos; /* Statements in this Scop: S920, S914, S917, S911, S913, S916, S919, S921, S918, S912, S915 */ { { { { { { { { { { { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i1 = 1; for (; (i1<=64); i1 += 2) { fieldData_LaplaceCoeff_6_p1[((i1*68)+4622)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[((i1*68)+4690)] = 0.000000e+00; } for (; (i1<=65); i1 += 1) { fieldData_LaplaceCoeff_6_p1[((i1*68)+4622)] = 0.000000e+00; } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i1 = 1; for (; (i1<=64); i1 += 2) { fieldData_LaplaceCoeff_6_p1[((i1*68)+31958)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[((i1*68)+32026)] = 0.000000e+00; } for (; (i1<=65); i1 += 1) { fieldData_LaplaceCoeff_6_p1[((i1*68)+31958)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i1 = 1; for (; (i1<=64); i1 += 2) { fieldData_LaplaceCoeff_6_p1[((i1*68)+13734)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[((i1*68)+13802)] = 0.000000e+00; } for (; (i1<=65); i1 += 1) { fieldData_LaplaceCoeff_6_p1[((i1*68)+13734)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i1 = 1; for (; (i1<=64); i1 += 2) { fieldData_LaplaceCoeff_6_p1[((i1*68)+66)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[((i1*68)+134)] = 0.000000e+00; } for (; (i1<=65); i1 += 1) { fieldData_LaplaceCoeff_6_p1[((i1*68)+66)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i1 = 1; for (; (i1<=64); i1 += 2) { fieldData_LaplaceCoeff_6_p1[((i1*68)+22846)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[((i1*68)+22914)] = 0.000000e+00; } for (; (i1<=65); i1 += 1) { fieldData_LaplaceCoeff_6_p1[((i1*68)+22846)] = 0.000000e+00; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { yPos = ((((i1-1)/6.400000e+01)*(posEnd[1]-posBegin[1]))+posBegin[1]); } __m128d vec1 = _mm_set1_pd(1.000000e+00); __m128d vec2 = _mm_set1_pd(6.400000e+01); __m128d vec5 = _mm_set1_pd(yPos); for (; (i1<63); i1 += 4) { /* yPos = ((((i1-1)/6.400000e+01)*(posEnd[1]-posBegin[1]))+posBegin[1]); */ __m128d vec0 = _mm_set_pd(i1+1,i1); __m128d vec0_2 = _mm_set_pd(i1+1,i1); __m128d vec3 = _mm_load1_pd((&posEnd[1])); __m128d vec3_2 = _mm_load1_pd((&posEnd[1])); __m128d vec4 = _mm_load1_pd((&posBegin[1])); __m128d vec4_2 = _mm_load1_pd((&posBegin[1])); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2); } for (; (i1<66); i1 += 1) { yPos = ((((i1-1)/6.400000e+01)*(posEnd[1]-posBegin[1]))+posBegin[1]); } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i1 = 1; for (; (i1<=64); i1 += 2) { fieldData_LaplaceCoeff_6_p1[((i1*68)+18290)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[((i1*68)+18358)] = 0.000000e+00; } for (; (i1<=65); i1 += 1) { fieldData_LaplaceCoeff_6_p1[((i1*68)+18290)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i1 = 1; for (; (i1<=64); i1 += 2) { fieldData_LaplaceCoeff_6_p1[((i1*68)+27402)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[((i1*68)+27470)] = 0.000000e+00; } for (; (i1<=65); i1 += 1) { fieldData_LaplaceCoeff_6_p1[((i1*68)+27402)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i1 = 1; for (; (i1<=64); i1 += 2) { fieldData_LaplaceCoeff_6_p1[((i1*68)+36514)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[((i1*68)+36582)] = 0.000000e+00; } for (; (i1<=65); i1 += 1) { fieldData_LaplaceCoeff_6_p1[((i1*68)+36514)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i1 = 1; for (; (i1<=64); i1 += 2) { fieldData_LaplaceCoeff_6_p1[((i1*68)+9178)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[((i1*68)+9246)] = 0.000000e+00; } for (; (i1<=65); i1 += 1) { fieldData_LaplaceCoeff_6_p1[((i1*68)+9178)] = 0.000000e+00; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { xPos = posEnd[0]; } __m128d vec1 = _mm_set1_pd(xPos); for (; (i1<63); i1 += 4) { /* xPos = posEnd[0]; */ __m128d vec0 = _mm_load1_pd((&posEnd[0])); __m128d vec0_2 = _mm_load1_pd((&posEnd[0])); vec1 = vec0; vec1 = vec0_2; } for (; (i1<66); i1 += 1) { xPos = posEnd[0]; } } } } } if ((!neighbor_isValid[1][2])) { { double xPos; double yPos; /* Statements in this Scop: S929, S923, S926, S931, S925, S928, S922, S930, S924, S927, S932 */ { { { { { { { { { { { int i2 = 2; for (; (i2<=65); i2 += 2) { xPos = ((((i2-2)/6.400000e+01)*(posEnd[0]-posBegin[0]))+posBegin[0]); xPos = ((((i2-1)/6.400000e+01)*(posEnd[0]-posBegin[0]))+posBegin[0]); } for (; (i2<=66); i2 += 1) { xPos = ((((i2-2)/6.400000e+01)*(posEnd[0]-posBegin[0]))+posBegin[0]); } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i2 = 2; for (; (i2<=65); i2 += 2) { fieldData_LaplaceCoeff_6_p1[(i2+18292)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[(i2+18293)] = 0.000000e+00; } for (; (i2<=66); i2 += 1) { fieldData_LaplaceCoeff_6_p1[(i2+18292)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i2 = 2; for (; (i2<=65); i2 += 2) { fieldData_LaplaceCoeff_6_p1[(i2+36516)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[(i2+36517)] = 0.000000e+00; } for (; (i2<=66); i2 += 1) { fieldData_LaplaceCoeff_6_p1[(i2+36516)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i2 = 2; for (; (i2<=65); i2 += 2) { fieldData_LaplaceCoeff_6_p1[(i2+31960)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[(i2+31961)] = 0.000000e+00; } for (; (i2<=66); i2 += 1) { fieldData_LaplaceCoeff_6_p1[(i2+31960)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i2 = 2; for (; (i2<=65); i2 += 2) { fieldData_LaplaceCoeff_6_p1[(i2+68)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[(i2+69)] = 0.000000e+00; } for (; (i2<=66); i2 += 1) { fieldData_LaplaceCoeff_6_p1[(i2+68)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i2 = 2; for (; (i2<=65); i2 += 2) { fieldData_LaplaceCoeff_6_p1[(i2+9180)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[(i2+9181)] = 0.000000e+00; } for (; (i2<=66); i2 += 1) { fieldData_LaplaceCoeff_6_p1[(i2+9180)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i2 = 2; for (; (i2<=65); i2 += 2) { fieldData_LaplaceCoeff_6_p1[(i2+22848)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[(i2+22849)] = 0.000000e+00; } for (; (i2<=66); i2 += 1) { fieldData_LaplaceCoeff_6_p1[(i2+22848)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i2 = 2; for (; (i2<=65); i2 += 2) { fieldData_LaplaceCoeff_6_p1[(i2+27404)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[(i2+27405)] = 0.000000e+00; } for (; (i2<=66); i2 += 1) { fieldData_LaplaceCoeff_6_p1[(i2+27404)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i2 = 2; for (; (i2<=65); i2 += 2) { fieldData_LaplaceCoeff_6_p1[(i2+4624)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[(i2+4625)] = 0.000000e+00; } for (; (i2<=66); i2 += 1) { fieldData_LaplaceCoeff_6_p1[(i2+4624)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i2 = 2; for (; (i2<=65); i2 += 2) { fieldData_LaplaceCoeff_6_p1[(i2+13736)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[(i2+13737)] = 0.000000e+00; } for (; (i2<=66); i2 += 1) { fieldData_LaplaceCoeff_6_p1[(i2+13736)] = 0.000000e+00; } } } { int i2 = 2; for (; (i2<=65); i2 += 2) { yPos = posBegin[1]; yPos = posBegin[1]; } for (; (i2<=66); i2 += 1) { yPos = posBegin[1]; } } } } } if ((!neighbor_isValid[1][3])) { { double xPos; double yPos; /* Statements in this Scop: S941, S935, S938, S943, S940, S934, S937, S942, S936, S939, S933 */ { { { { { { { { { { { int i2 = 2; for (; (i2<=65); i2 += 2) { xPos = ((((i2-2)/6.400000e+01)*(posEnd[0]-posBegin[0]))+posBegin[0]); xPos = ((((i2-1)/6.400000e+01)*(posEnd[0]-posBegin[0]))+posBegin[0]); } for (; (i2<=66); i2 += 1) { xPos = ((((i2-2)/6.400000e+01)*(posEnd[0]-posBegin[0]))+posBegin[0]); } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i2 = 2; for (; (i2<=65); i2 += 2) { fieldData_LaplaceCoeff_6_p1[(i2+36312)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[(i2+36313)] = 0.000000e+00; } for (; (i2<=66); i2 += 1) { fieldData_LaplaceCoeff_6_p1[(i2+36312)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i2 = 2; for (; (i2<=65); i2 += 2) { fieldData_LaplaceCoeff_6_p1[(i2+22644)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[(i2+22645)] = 0.000000e+00; } for (; (i2<=66); i2 += 1) { fieldData_LaplaceCoeff_6_p1[(i2+22644)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i2 = 2; for (; (i2<=65); i2 += 2) { fieldData_LaplaceCoeff_6_p1[(i2+13532)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[(i2+13533)] = 0.000000e+00; } for (; (i2<=66); i2 += 1) { fieldData_LaplaceCoeff_6_p1[(i2+13532)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i2 = 2; for (; (i2<=65); i2 += 2) { fieldData_LaplaceCoeff_6_p1[(i2+40868)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[(i2+40869)] = 0.000000e+00; } for (; (i2<=66); i2 += 1) { fieldData_LaplaceCoeff_6_p1[(i2+40868)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i2 = 2; for (; (i2<=65); i2 += 2) { fieldData_LaplaceCoeff_6_p1[(i2+18088)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[(i2+18089)] = 0.000000e+00; } for (; (i2<=66); i2 += 1) { fieldData_LaplaceCoeff_6_p1[(i2+18088)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i2 = 2; for (; (i2<=65); i2 += 2) { fieldData_LaplaceCoeff_6_p1[(i2+4420)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[(i2+4421)] = 0.000000e+00; } for (; (i2<=66); i2 += 1) { fieldData_LaplaceCoeff_6_p1[(i2+4420)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i2 = 2; for (; (i2<=65); i2 += 2) { fieldData_LaplaceCoeff_6_p1[(i2+31756)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[(i2+31757)] = 0.000000e+00; } for (; (i2<=66); i2 += 1) { fieldData_LaplaceCoeff_6_p1[(i2+31756)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i2 = 2; for (; (i2<=65); i2 += 2) { fieldData_LaplaceCoeff_6_p1[(i2+27200)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[(i2+27201)] = 0.000000e+00; } for (; (i2<=66); i2 += 1) { fieldData_LaplaceCoeff_6_p1[(i2+27200)] = 0.000000e+00; } } } { int i2 = 2; for (; (i2<=65); i2 += 2) { yPos = posEnd[1]; yPos = posEnd[1]; } for (; (i2<=66); i2 += 1) { yPos = posEnd[1]; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i2 = 2; for (; (i2<=65); i2 += 2) { fieldData_LaplaceCoeff_6_p1[(i2+8976)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[(i2+8977)] = 0.000000e+00; } for (; (i2<=66); i2 += 1) { fieldData_LaplaceCoeff_6_p1[(i2+8976)] = 0.000000e+00; } } } } } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) { /* Statements in this Scop: S944 */ for (int i3 = 0; (i3<=8); i3 += 1) { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][(i3*4556)]); double* buffer_Send_1_p1 = (&buffer_Send[1][(i3*65)]); int i4 = 1; for (; (i4<=64); i4 += 2) { buffer_Send_1_p1[(i4-1)] = fieldData_LaplaceCoeff_6_p1[((i4*68)+66)]; buffer_Send_1_p1[i4] = fieldData_LaplaceCoeff_6_p1[((i4*68)+134)]; } for (; (i4<=65); i4 += 1) { buffer_Send_1_p1[(i4-1)] = fieldData_LaplaceCoeff_6_p1[((i4*68)+66)]; } } } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) { MPI_Isend(buffer_Send[1], 585, MPI_DOUBLE, neighbor_remoteRank[1][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]); reqOutstanding_Send[1] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) { MPI_Irecv(buffer_Recv[0], 585, MPI_DOUBLE, neighbor_remoteRank[1][0], ((unsigned int)(neighbor_fragCommId[1][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]); reqOutstanding_Recv[0] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Recv[0]) { waitForMPIReq(&mpiRequest_Recv[0]); reqOutstanding_Recv[0] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) { /* Statements in this Scop: S945 */ for (int i3 = 0; (i3<=8); i3 += 1) { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][(i3*4556)]); double* buffer_Recv_0_p1 = (&buffer_Recv[0][(i3*65)]); int i4 = 3; for (; (i4<=66); i4 += 2) { fieldData_LaplaceCoeff_6_p1[((i4*68)-134)] = buffer_Recv_0_p1[(i4-3)]; fieldData_LaplaceCoeff_6_p1[((i4*68)-66)] = buffer_Recv_0_p1[(i4-2)]; } for (; (i4<=67); i4 += 1) { fieldData_LaplaceCoeff_6_p1[((i4*68)-134)] = buffer_Recv_0_p1[(i4-3)]; } } } } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Send[1]) { waitForMPIReq(&mpiRequest_Send[1]); reqOutstanding_Send[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { ; } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][3]&&neighbor_isRemote[1][3])) { MPI_Isend(&fieldData_LaplaceCoeff[6][4422], 1, mpiDatatype_9_65_4556, neighbor_remoteRank[1][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]); reqOutstanding_Send[3] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][2]&&neighbor_isRemote[1][2])) { MPI_Irecv(&fieldData_LaplaceCoeff[6][70], 1, mpiDatatype_9_65_4556, neighbor_remoteRank[1][2], ((unsigned int)(neighbor_fragCommId[1][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]); reqOutstanding_Recv[2] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Recv[2]) { waitForMPIReq(&mpiRequest_Recv[2]); reqOutstanding_Recv[2] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { ; } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Send[3]) { waitForMPIReq(&mpiRequest_Send[3]); reqOutstanding_Send[3] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) { /* Statements in this Scop: S946 */ for (int i3 = 0; (i3<=8); i3 += 1) { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][(i3*4556)]); double* buffer_Send_0_p1 = (&buffer_Send[0][(i3*67)]); int i4 = 0; for (; (i4<=65); i4 += 2) { buffer_Send_0_p1[i4] = fieldData_LaplaceCoeff_6_p1[((i4*68)+3)]; buffer_Send_0_p1[(i4+1)] = fieldData_LaplaceCoeff_6_p1[((i4*68)+71)]; } for (; (i4<=66); i4 += 1) { buffer_Send_0_p1[i4] = fieldData_LaplaceCoeff_6_p1[((i4*68)+3)]; } } } if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) { /* Statements in this Scop: S947 */ for (int i3 = 0; (i3<=8); i3 += 1) { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][(i3*4556)]); double* buffer_Send_1_p1 = (&buffer_Send[1][(i3*67)]); int i4 = 0; for (; (i4<=65); i4 += 2) { buffer_Send_1_p1[i4] = fieldData_LaplaceCoeff_6_p1[((i4*68)+65)]; buffer_Send_1_p1[(i4+1)] = fieldData_LaplaceCoeff_6_p1[((i4*68)+133)]; } for (; (i4<=66); i4 += 1) { buffer_Send_1_p1[i4] = fieldData_LaplaceCoeff_6_p1[((i4*68)+65)]; } } } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) { MPI_Isend(buffer_Send[0], 603, MPI_DOUBLE, neighbor_remoteRank[1][0], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][0]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[0]); reqOutstanding_Send[0] = true; } if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) { MPI_Isend(buffer_Send[1], 603, MPI_DOUBLE, neighbor_remoteRank[1][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]); reqOutstanding_Send[1] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) { MPI_Irecv(buffer_Recv[0], 603, MPI_DOUBLE, neighbor_remoteRank[1][0], ((unsigned int)(neighbor_fragCommId[1][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]); reqOutstanding_Recv[0] = true; } if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) { MPI_Irecv(buffer_Recv[1], 603, MPI_DOUBLE, neighbor_remoteRank[1][1], ((unsigned int)(neighbor_fragCommId[1][1]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[1]); reqOutstanding_Recv[1] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Recv[0]) { waitForMPIReq(&mpiRequest_Recv[0]); reqOutstanding_Recv[0] = false; } if (reqOutstanding_Recv[1]) { waitForMPIReq(&mpiRequest_Recv[1]); reqOutstanding_Recv[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) { /* Statements in this Scop: S948 */ for (int i3 = 0; (i3<=8); i3 += 1) { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][(i3*4556)]); double* buffer_Recv_0_p1 = (&buffer_Recv[0][(i3*67)]); int i4 = 1; for (; (i4<=66); i4 += 2) { fieldData_LaplaceCoeff_6_p1[((i4*68)-67)] = buffer_Recv_0_p1[(i4-1)]; fieldData_LaplaceCoeff_6_p1[((i4*68)+1)] = buffer_Recv_0_p1[i4]; } for (; (i4<=67); i4 += 1) { fieldData_LaplaceCoeff_6_p1[((i4*68)-67)] = buffer_Recv_0_p1[(i4-1)]; } } } if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) { /* Statements in this Scop: S949 */ for (int i3 = 0; (i3<=8); i3 += 1) { double* buffer_Recv_1_p1 = (&buffer_Recv[1][(i3*67)]); double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][(i3*4556)]); int i4 = 67; for (; (i4<=132); i4 += 2) { fieldData_LaplaceCoeff_6_p1[((i4*68)-4489)] = buffer_Recv_1_p1[(i4-67)]; fieldData_LaplaceCoeff_6_p1[((i4*68)-4421)] = buffer_Recv_1_p1[(i4-66)]; } for (; (i4<=133); i4 += 1) { fieldData_LaplaceCoeff_6_p1[((i4*68)-4489)] = buffer_Recv_1_p1[(i4-67)]; } } } } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Send[0]) { waitForMPIReq(&mpiRequest_Send[0]); reqOutstanding_Send[0] = false; } if (reqOutstanding_Send[1]) { waitForMPIReq(&mpiRequest_Send[1]); reqOutstanding_Send[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { ; ; } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][2]&&neighbor_isRemote[1][2])) { MPI_Isend(&fieldData_LaplaceCoeff[6][137], 1, mpiDatatype_9_67_4556, neighbor_remoteRank[1][2], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][2]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[2]); reqOutstanding_Send[2] = true; } if ((neighbor_isValid[1][3]&&neighbor_isRemote[1][3])) { MPI_Isend(&fieldData_LaplaceCoeff[6][4353], 1, mpiDatatype_9_67_4556, neighbor_remoteRank[1][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]); reqOutstanding_Send[3] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][2]&&neighbor_isRemote[1][2])) { MPI_Irecv(&fieldData_LaplaceCoeff[6][1], 1, mpiDatatype_9_67_4556, neighbor_remoteRank[1][2], ((unsigned int)(neighbor_fragCommId[1][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]); reqOutstanding_Recv[2] = true; } if ((neighbor_isValid[1][3]&&neighbor_isRemote[1][3])) { MPI_Irecv(&fieldData_LaplaceCoeff[6][4489], 1, mpiDatatype_9_67_4556, neighbor_remoteRank[1][3], ((unsigned int)(neighbor_fragCommId[1][3]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[3]); reqOutstanding_Recv[3] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Recv[2]) { waitForMPIReq(&mpiRequest_Recv[2]); reqOutstanding_Recv[2] = false; } if (reqOutstanding_Recv[3]) { waitForMPIReq(&mpiRequest_Recv[3]); reqOutstanding_Recv[3] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { ; ; } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Send[2]) { waitForMPIReq(&mpiRequest_Send[2]); reqOutstanding_Send[2] = false; } if (reqOutstanding_Send[3]) { waitForMPIReq(&mpiRequest_Send[3]); reqOutstanding_Send[3] = false; } } } }
void exchlaplacecoeff_gmrfData_5(unsigned int slot) { for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((!neighbor_isValid[0][0])) { { double xPos; double yPos; /* Statements in this Scop: S1306, S1309, S1300, S1308, S1302, S1305, S1310, S1304, S1307, S1301, S1303 */ { { { { { { { { { { { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i1 = 1; for (; (i1<=32); i1 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+6302)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+6338)] = 0.000000e+00; } for (; (i1<=33); i1 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+6302)] = 0.000000e+00; } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i1 = 1; for (; (i1<=32); i1 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+3782)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+3818)] = 0.000000e+00; } for (; (i1<=33); i1 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+3782)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i1 = 1; for (; (i1<=32); i1 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+7562)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+7598)] = 0.000000e+00; } for (; (i1<=33); i1 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+7562)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i1 = 1; for (; (i1<=32); i1 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+2)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+38)] = 0.000000e+00; } for (; (i1<=33); i1 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+2)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i1 = 1; for (; (i1<=32); i1 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+2522)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+2558)] = 0.000000e+00; } for (; (i1<=33); i1 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+2522)] = 0.000000e+00; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { xPos = posBegin[0]; } __m128d vec1 = _mm_set1_pd(xPos); for (; (i1<31); i1 += 4) { /* xPos = posBegin[0]; */ __m128d vec0 = _mm_load1_pd((&posBegin[0])); __m128d vec0_2 = _mm_load1_pd((&posBegin[0])); vec1 = vec0; vec1 = vec0_2; } for (; (i1<34); i1 += 1) { xPos = posBegin[0]; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i1 = 1; for (; (i1<=32); i1 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+8822)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+8858)] = 0.000000e+00; } for (; (i1<=33); i1 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+8822)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i1 = 1; for (; (i1<=32); i1 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+1262)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+1298)] = 0.000000e+00; } for (; (i1<=33); i1 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+1262)] = 0.000000e+00; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { yPos = ((((i1-1)/3.200000e+01)*(posEnd[1]-posBegin[1]))+posBegin[1]); } __m128d vec1 = _mm_set1_pd(1.000000e+00); __m128d vec2 = _mm_set1_pd(3.200000e+01); __m128d vec5 = _mm_set1_pd(yPos); for (; (i1<31); i1 += 4) { /* yPos = ((((i1-1)/3.200000e+01)*(posEnd[1]-posBegin[1]))+posBegin[1]); */ __m128d vec0 = _mm_set_pd(i1+1,i1); __m128d vec0_2 = _mm_set_pd(i1+1,i1); __m128d vec3 = _mm_load1_pd((&posEnd[1])); __m128d vec3_2 = _mm_load1_pd((&posEnd[1])); __m128d vec4 = _mm_load1_pd((&posBegin[1])); __m128d vec4_2 = _mm_load1_pd((&posBegin[1])); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2); } for (; (i1<34); i1 += 1) { yPos = ((((i1-1)/3.200000e+01)*(posEnd[1]-posBegin[1]))+posBegin[1]); } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i1 = 1; for (; (i1<=32); i1 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+5042)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+5078)] = 0.000000e+00; } for (; (i1<=33); i1 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+5042)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i1 = 1; for (; (i1<=32); i1 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+10082)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+10118)] = 0.000000e+00; } for (; (i1<=33); i1 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+10082)] = 0.000000e+00; } } } } } if ((!neighbor_isValid[0][1])) { { double xPos; double yPos; /* Statements in this Scop: S1312, S1320, S1314, S1317, S1311, S1319, S1313, S1316, S1321, S1315, S1318 */ { { { { { { { { { { { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i1 = 1; for (; (i1<=32); i1 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+1294)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+1330)] = 0.000000e+00; } for (; (i1<=33); i1 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+1294)] = 0.000000e+00; } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i1 = 1; for (; (i1<=32); i1 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+5074)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+5110)] = 0.000000e+00; } for (; (i1<=33); i1 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+5074)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i1 = 1; for (; (i1<=32); i1 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+8854)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+8890)] = 0.000000e+00; } for (; (i1<=33); i1 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+8854)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i1 = 1; for (; (i1<=32); i1 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+2554)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+2590)] = 0.000000e+00; } for (; (i1<=33); i1 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+2554)] = 0.000000e+00; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { yPos = ((((i1-1)/3.200000e+01)*(posEnd[1]-posBegin[1]))+posBegin[1]); } __m128d vec1 = _mm_set1_pd(1.000000e+00); __m128d vec2 = _mm_set1_pd(3.200000e+01); __m128d vec5 = _mm_set1_pd(yPos); for (; (i1<31); i1 += 4) { /* yPos = ((((i1-1)/3.200000e+01)*(posEnd[1]-posBegin[1]))+posBegin[1]); */ __m128d vec0 = _mm_set_pd(i1+1,i1); __m128d vec0_2 = _mm_set_pd(i1+1,i1); __m128d vec3 = _mm_load1_pd((&posEnd[1])); __m128d vec3_2 = _mm_load1_pd((&posEnd[1])); __m128d vec4 = _mm_load1_pd((&posBegin[1])); __m128d vec4_2 = _mm_load1_pd((&posBegin[1])); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2); } for (; (i1<34); i1 += 1) { yPos = ((((i1-1)/3.200000e+01)*(posEnd[1]-posBegin[1]))+posBegin[1]); } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i1 = 1; for (; (i1<=32); i1 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+34)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+70)] = 0.000000e+00; } for (; (i1<=33); i1 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+34)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i1 = 1; for (; (i1<=32); i1 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+3814)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+3850)] = 0.000000e+00; } for (; (i1<=33); i1 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+3814)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i1 = 1; for (; (i1<=32); i1 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+7594)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+7630)] = 0.000000e+00; } for (; (i1<=33); i1 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+7594)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i1 = 1; for (; (i1<=32); i1 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+6334)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+6370)] = 0.000000e+00; } for (; (i1<=33); i1 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+6334)] = 0.000000e+00; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { xPos = posEnd[0]; } __m128d vec1 = _mm_set1_pd(xPos); for (; (i1<31); i1 += 4) { /* xPos = posEnd[0]; */ __m128d vec0 = _mm_load1_pd((&posEnd[0])); __m128d vec0_2 = _mm_load1_pd((&posEnd[0])); vec1 = vec0; vec1 = vec0_2; } for (; (i1<34); i1 += 1) { xPos = posEnd[0]; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i1 = 1; for (; (i1<=32); i1 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+10114)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+10150)] = 0.000000e+00; } for (; (i1<=33); i1 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+10114)] = 0.000000e+00; } } } } } if ((!neighbor_isValid[0][2])) { { double xPos; double yPos; /* Statements in this Scop: S1327, S1332, S1326, S1329, S1323, S1322, S1331, S1325, S1328, S1330, S1324 */ { { { { { { { { { { { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i2 = 2; for (; (i2<=33); i2 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+5076)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[(i2+5077)] = 0.000000e+00; } for (; (i2<=34); i2 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+5076)] = 0.000000e+00; } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i2 = 2; for (; (i2<=33); i2 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+8856)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[(i2+8857)] = 0.000000e+00; } for (; (i2<=34); i2 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+8856)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i2 = 2; for (; (i2<=33); i2 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+6336)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[(i2+6337)] = 0.000000e+00; } for (; (i2<=34); i2 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+6336)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i2 = 2; for (; (i2<=33); i2 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+7596)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[(i2+7597)] = 0.000000e+00; } for (; (i2<=34); i2 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+7596)] = 0.000000e+00; } } } { int i2 = 2; for (; (i2<=33); i2 += 2) { xPos = ((((i2-2)/3.200000e+01)*(posEnd[0]-posBegin[0]))+posBegin[0]); xPos = ((((i2-1)/3.200000e+01)*(posEnd[0]-posBegin[0]))+posBegin[0]); } for (; (i2<=34); i2 += 1) { xPos = ((((i2-2)/3.200000e+01)*(posEnd[0]-posBegin[0]))+posBegin[0]); } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i2 = 2; for (; (i2<=33); i2 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+36)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[(i2+37)] = 0.000000e+00; } for (; (i2<=34); i2 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+36)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i2 = 2; for (; (i2<=33); i2 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+2556)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[(i2+2557)] = 0.000000e+00; } for (; (i2<=34); i2 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+2556)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i2 = 2; for (; (i2<=33); i2 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+10116)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[(i2+10117)] = 0.000000e+00; } for (; (i2<=34); i2 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+10116)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i2 = 2; for (; (i2<=33); i2 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+3816)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[(i2+3817)] = 0.000000e+00; } for (; (i2<=34); i2 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+3816)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i2 = 2; for (; (i2<=33); i2 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+1296)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[(i2+1297)] = 0.000000e+00; } for (; (i2<=34); i2 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+1296)] = 0.000000e+00; } } } { int i2 = 2; for (; (i2<=33); i2 += 2) { yPos = posBegin[1]; yPos = posBegin[1]; } for (; (i2<=34); i2 += 1) { yPos = posBegin[1]; } } } } } if ((!neighbor_isValid[0][3])) { { double xPos; double yPos; /* Statements in this Scop: S1338, S1341, S1335, S1340, S1343, S1337, S1334, S1333, S1342, S1336, S1339 */ { { { { { { { { { { { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i2 = 2; for (; (i2<=33); i2 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+7488)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[(i2+7489)] = 0.000000e+00; } for (; (i2<=34); i2 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+7488)] = 0.000000e+00; } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i2 = 2; for (; (i2<=33); i2 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+2448)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[(i2+2449)] = 0.000000e+00; } for (; (i2<=34); i2 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+2448)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i2 = 2; for (; (i2<=33); i2 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+11268)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[(i2+11269)] = 0.000000e+00; } for (; (i2<=34); i2 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+11268)] = 0.000000e+00; } } } { int i2 = 2; for (; (i2<=33); i2 += 2) { yPos = posEnd[1]; yPos = posEnd[1]; } for (; (i2<=34); i2 += 1) { yPos = posEnd[1]; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i2 = 2; for (; (i2<=33); i2 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+10008)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[(i2+10009)] = 0.000000e+00; } for (; (i2<=34); i2 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+10008)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i2 = 2; for (; (i2<=33); i2 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+6228)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[(i2+6229)] = 0.000000e+00; } for (; (i2<=34); i2 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+6228)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i2 = 2; for (; (i2<=33); i2 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+8748)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[(i2+8749)] = 0.000000e+00; } for (; (i2<=34); i2 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+8748)] = 0.000000e+00; } } } { int i2 = 2; for (; (i2<=33); i2 += 2) { xPos = ((((i2-2)/3.200000e+01)*(posEnd[0]-posBegin[0]))+posBegin[0]); xPos = ((((i2-1)/3.200000e+01)*(posEnd[0]-posBegin[0]))+posBegin[0]); } for (; (i2<=34); i2 += 1) { xPos = ((((i2-2)/3.200000e+01)*(posEnd[0]-posBegin[0]))+posBegin[0]); } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i2 = 2; for (; (i2<=33); i2 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+3708)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[(i2+3709)] = 0.000000e+00; } for (; (i2<=34); i2 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+3708)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i2 = 2; for (; (i2<=33); i2 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+1188)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[(i2+1189)] = 0.000000e+00; } for (; (i2<=34); i2 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+1188)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i2 = 2; for (; (i2<=33); i2 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+4968)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[(i2+4969)] = 0.000000e+00; } for (; (i2<=34); i2 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+4968)] = 0.000000e+00; } } } } } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) { /* Statements in this Scop: S1344 */ for (int i3 = 0; (i3<=8); i3 += 1) { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][(i3*1260)]); double* buffer_Send_1_p1 = (&buffer_Send[1][(i3*33)]); int i4 = 1; for (; (i4<=32); i4 += 2) { buffer_Send_1_p1[(i4-1)] = fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)+34)]; buffer_Send_1_p1[i4] = fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)+70)]; } for (; (i4<=33); i4 += 1) { buffer_Send_1_p1[(i4-1)] = fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)+34)]; } } } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) { MPI_Isend(buffer_Send[1], 297, MPI_DOUBLE, neighbor_remoteRank[0][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]); reqOutstanding_Send[1] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) { MPI_Irecv(buffer_Recv[0], 297, MPI_DOUBLE, neighbor_remoteRank[0][0], ((unsigned int)(neighbor_fragCommId[0][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]); reqOutstanding_Recv[0] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Recv[0]) { waitForMPIReq(&mpiRequest_Recv[0]); reqOutstanding_Recv[0] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) { /* Statements in this Scop: S1345 */ for (int i3 = 0; (i3<=8); i3 += 1) { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][(i3*1260)]); double* buffer_Recv_0_p1 = (&buffer_Recv[0][(i3*33)]); int i4 = 3; for (; (i4<=34); i4 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)-70)] = buffer_Recv_0_p1[(i4-3)]; fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)-34)] = buffer_Recv_0_p1[(i4-2)]; } for (; (i4<=35); i4 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)-70)] = buffer_Recv_0_p1[(i4-3)]; } } } } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Send[1]) { waitForMPIReq(&mpiRequest_Send[1]); reqOutstanding_Send[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { ; } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][3]&&neighbor_isRemote[0][3])) { MPI_Isend(&fieldData_LaplaceCoeff_GMRF[5][1190], 1, mpiDatatype_9_33_1260, neighbor_remoteRank[0][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]); reqOutstanding_Send[3] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][2]&&neighbor_isRemote[0][2])) { MPI_Irecv(&fieldData_LaplaceCoeff_GMRF[5][38], 1, mpiDatatype_9_33_1260, neighbor_remoteRank[0][2], ((unsigned int)(neighbor_fragCommId[0][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]); reqOutstanding_Recv[2] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Recv[2]) { waitForMPIReq(&mpiRequest_Recv[2]); reqOutstanding_Recv[2] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { ; } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Send[3]) { waitForMPIReq(&mpiRequest_Send[3]); reqOutstanding_Send[3] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) { /* Statements in this Scop: S1346 */ for (int i3 = 0; (i3<=8); i3 += 1) { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][(i3*1260)]); double* buffer_Send_0_p1 = (&buffer_Send[0][(i3*35)]); int i4 = 0; for (; (i4<=33); i4 += 2) { buffer_Send_0_p1[i4] = fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)+3)]; buffer_Send_0_p1[(i4+1)] = fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)+39)]; } for (; (i4<=34); i4 += 1) { buffer_Send_0_p1[i4] = fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)+3)]; } } } if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) { /* Statements in this Scop: S1347 */ for (int i3 = 0; (i3<=8); i3 += 1) { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][(i3*1260)]); double* buffer_Send_1_p1 = (&buffer_Send[1][(i3*35)]); int i4 = 0; for (; (i4<=33); i4 += 2) { buffer_Send_1_p1[i4] = fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)+33)]; buffer_Send_1_p1[(i4+1)] = fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)+69)]; } for (; (i4<=34); i4 += 1) { buffer_Send_1_p1[i4] = fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)+33)]; } } } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) { MPI_Isend(buffer_Send[0], 315, MPI_DOUBLE, neighbor_remoteRank[0][0], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][0]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[0]); reqOutstanding_Send[0] = true; } if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) { MPI_Isend(buffer_Send[1], 315, MPI_DOUBLE, neighbor_remoteRank[0][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]); reqOutstanding_Send[1] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) { MPI_Irecv(buffer_Recv[0], 315, MPI_DOUBLE, neighbor_remoteRank[0][0], ((unsigned int)(neighbor_fragCommId[0][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]); reqOutstanding_Recv[0] = true; } if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) { MPI_Irecv(buffer_Recv[1], 315, MPI_DOUBLE, neighbor_remoteRank[0][1], ((unsigned int)(neighbor_fragCommId[0][1]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[1]); reqOutstanding_Recv[1] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Recv[0]) { waitForMPIReq(&mpiRequest_Recv[0]); reqOutstanding_Recv[0] = false; } if (reqOutstanding_Recv[1]) { waitForMPIReq(&mpiRequest_Recv[1]); reqOutstanding_Recv[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) { /* Statements in this Scop: S1348 */ for (int i3 = 0; (i3<=8); i3 += 1) { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][(i3*1260)]); double* buffer_Recv_0_p1 = (&buffer_Recv[0][(i3*35)]); int i4 = 1; for (; (i4<=34); i4 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)-35)] = buffer_Recv_0_p1[(i4-1)]; fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)+1)] = buffer_Recv_0_p1[i4]; } for (; (i4<=35); i4 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)-35)] = buffer_Recv_0_p1[(i4-1)]; } } } if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) { /* Statements in this Scop: S1349 */ for (int i3 = 0; (i3<=8); i3 += 1) { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][(i3*1260)]); double* buffer_Recv_1_p1 = (&buffer_Recv[1][(i3*35)]); int i4 = 35; for (; (i4<=68); i4 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)-1225)] = buffer_Recv_1_p1[(i4-35)]; fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)-1189)] = buffer_Recv_1_p1[(i4-34)]; } for (; (i4<=69); i4 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)-1225)] = buffer_Recv_1_p1[(i4-35)]; } } } } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Send[0]) { waitForMPIReq(&mpiRequest_Send[0]); reqOutstanding_Send[0] = false; } if (reqOutstanding_Send[1]) { waitForMPIReq(&mpiRequest_Send[1]); reqOutstanding_Send[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { ; ; } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][2]&&neighbor_isRemote[0][2])) { MPI_Isend(&fieldData_LaplaceCoeff_GMRF[5][73], 1, mpiDatatype_9_35_1260, neighbor_remoteRank[0][2], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][2]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[2]); reqOutstanding_Send[2] = true; } if ((neighbor_isValid[0][3]&&neighbor_isRemote[0][3])) { MPI_Isend(&fieldData_LaplaceCoeff_GMRF[5][1153], 1, mpiDatatype_9_35_1260, neighbor_remoteRank[0][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]); reqOutstanding_Send[3] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][2]&&neighbor_isRemote[0][2])) { MPI_Irecv(&fieldData_LaplaceCoeff_GMRF[5][1], 1, mpiDatatype_9_35_1260, neighbor_remoteRank[0][2], ((unsigned int)(neighbor_fragCommId[0][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]); reqOutstanding_Recv[2] = true; } if ((neighbor_isValid[0][3]&&neighbor_isRemote[0][3])) { MPI_Irecv(&fieldData_LaplaceCoeff_GMRF[5][1225], 1, mpiDatatype_9_35_1260, neighbor_remoteRank[0][3], ((unsigned int)(neighbor_fragCommId[0][3]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[3]); reqOutstanding_Recv[3] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Recv[2]) { waitForMPIReq(&mpiRequest_Recv[2]); reqOutstanding_Recv[2] = false; } if (reqOutstanding_Recv[3]) { waitForMPIReq(&mpiRequest_Recv[3]); reqOutstanding_Recv[3] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { ; ; } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Send[2]) { waitForMPIReq(&mpiRequest_Send[2]); reqOutstanding_Send[2] = false; } if (reqOutstanding_Send[3]) { waitForMPIReq(&mpiRequest_Send[3]); reqOutstanding_Send[3] = false; } } } }
void do_matrix_mult(int lda, double* A, double* B, double* C) { /* double* C = (double*) malloc(16*sizeof(double)); int offset = 0; for (int j=0; j<4; j++) { for (int i=0; i<4; i++) { C[j*4+i] = C_t[i+offset]; } offset += lda; } C[0] = C_t[0]; C[1] = C_t[1]; C[2] = C_t[2]; C[3] = C_t[3]; int offset = lda; C[4] = C_t[offset]; C[5] = C_t[offset+1]; C[6] = C_t[offset+2]; C[7] = C_t[offset+3]; offset += offset; C[8] = C_t[offset]; C[9] = C_t[offset+1]; C[10] = C_t[offset+2]; C[11] = C_t[offset+3]; offset += offset; C[12] = C_t[offset]; C[13] = C_t[offset+1]; C[14] = C_t[offset+2]; C[15] = C_t[offset+3]; */ __m128d c1 = _mm_load_pd(C); __m128d c2 = _mm_load_pd(C+2); //likewise, we are loading C[0,2] and C[0,3] here __m128d c3 = _mm_load_pd(C+lda); //likewise, we are loading C[1,0] and C[1,1] here __m128d c4 = _mm_load_pd(C+lda+2); //likewise, we are loading C[1,2] and C[1,3] here __m128d c5 = _mm_load_pd(C+2*lda); //likewise, we are loading C[2,0] and C[2,1] here __m128d c6 = _mm_load_pd(C+2*lda+2); //likewise, we are loading C[2,2] and C[2,3] here __m128d c7 = _mm_load_pd(C+3*lda); //likewise, we are loading C[3,0] and C[3,1] here __m128d c8 = _mm_load_pd(C+3*lda +2); //likewise, we are loading C[3,2] and C[3,3] here //__m128d r1, r2, r3, r4, r5, r6, r7, r8; for(int l=0; l<4; l+=1) { __m128d a1 = _mm_load1_pd(A+l); __m128d a2 = _mm_load1_pd(A+l+lda); __m128d a3 = _mm_load1_pd(A+l+2*lda); __m128d a4 = _mm_load1_pd(A+l+3*lda); __m128d b1 = _mm_load_pd(B+l*lda); __m128d b2 = _mm_load_pd(B+l*lda+2); c1 = _mm_add_pd(c1, _mm_mul_pd(a1, b1)); c2 = _mm_add_pd(c2, _mm_mul_pd(a1, b2)); c3 = _mm_add_pd(c3, _mm_mul_pd(a2, b1)); c4 = _mm_add_pd(c4, _mm_mul_pd(a2, b2)); c5 = _mm_add_pd(c5, _mm_mul_pd(a3, b1)); c6 = _mm_add_pd(c6, _mm_mul_pd(a3, b2)); c7 = _mm_add_pd(c7, _mm_mul_pd(a4, b1)); c8 = _mm_add_pd(c8, _mm_mul_pd(a4, b2)); } _mm_store_pd(C, c1); _mm_store_pd(C+2, c2); _mm_store_pd(C+lda, c3); _mm_store_pd(C+2+lda, c4); _mm_store_pd(C+2*lda, c5); _mm_store_pd(C+2+2*lda, c6); _mm_store_pd(C+3*lda, c7); _mm_store_pd(C+2+3*lda, c8); }
int calc_gb_chainrule_sse2_double(int natoms, t_nblist *nl, double *dadx, double *dvda, double *x, double *f, double *fshift, double *shiftvec, int gb_algorithm, gmx_genborn_t *born, t_mdatoms *md) { int i,k,n,ii,jnr,ii3,is3,nj0,nj1,n0,n1; int jnrA,jnrB; int j3A,j3B; int * jjnr; double rbi,shX,shY,shZ; double *rb; __m128d ix,iy,iz; __m128d jx,jy,jz; __m128d fix,fiy,fiz; __m128d dx,dy,dz; __m128d tx,ty,tz; __m128d rbai,rbaj,f_gb, f_gb_ai; __m128d xmm1,xmm2,xmm3; const __m128d two = _mm_set1_pd(2.0); rb = born->work; jjnr = nl->jjnr; /* Loop to get the proper form for the Born radius term, sse style */ n0 = 0; n1 = natoms; if(gb_algorithm==egbSTILL) { for(i=n0;i<n1;i++) { rbi = born->bRad[i]; rb[i] = (2 * rbi * rbi * dvda[i])/ONE_4PI_EPS0; } } else if(gb_algorithm==egbHCT) { for(i=n0;i<n1;i++) { rbi = born->bRad[i]; rb[i] = rbi * rbi * dvda[i]; } } else if(gb_algorithm==egbOBC) { for(i=n0;i<n1;i++) { rbi = born->bRad[i]; rb[i] = rbi * rbi * born->drobc[i] * dvda[i]; } } jz = _mm_setzero_pd(); n = j3A = j3B = 0; for(i=0;i<nl->nri;i++) { ii = nl->iinr[i]; ii3 = ii*3; is3 = 3*nl->shift[i]; shX = shiftvec[is3]; shY = shiftvec[is3+1]; shZ = shiftvec[is3+2]; nj0 = nl->jindex[i]; nj1 = nl->jindex[i+1]; ix = _mm_set1_pd(shX+x[ii3+0]); iy = _mm_set1_pd(shY+x[ii3+1]); iz = _mm_set1_pd(shZ+x[ii3+2]); rbai = _mm_load1_pd(rb+ii); fix = _mm_setzero_pd(); fiy = _mm_setzero_pd(); fiz = _mm_setzero_pd(); for(k=nj0;k<nj1-1;k+=2) { jnrA = jjnr[k]; jnrB = jjnr[k+1]; j3A = 3*jnrA; j3B = 3*jnrB; GMX_MM_LOAD_1RVEC_2POINTERS_PD(x+j3A,x+j3B,jx,jy,jz); dx = _mm_sub_pd(ix,jx); dy = _mm_sub_pd(iy,jy); dz = _mm_sub_pd(iz,jz); GMX_MM_LOAD_2VALUES_PD(rb+jnrA,rb+jnrB,rbaj); /* load chain rule terms for j1-4 */ f_gb = _mm_load_pd(dadx); dadx += 2; f_gb_ai = _mm_load_pd(dadx); dadx += 2; /* calculate scalar force */ f_gb = _mm_mul_pd(f_gb,rbai); f_gb_ai = _mm_mul_pd(f_gb_ai,rbaj); f_gb = _mm_add_pd(f_gb,f_gb_ai); tx = _mm_mul_pd(f_gb,dx); ty = _mm_mul_pd(f_gb,dy); tz = _mm_mul_pd(f_gb,dz); fix = _mm_add_pd(fix,tx); fiy = _mm_add_pd(fiy,ty); fiz = _mm_add_pd(fiz,tz); GMX_MM_DECREMENT_1RVEC_2POINTERS_PD(f+j3A,f+j3B,tx,ty,tz); } /*deal with odd elements */ if(k<nj1) { jnrA = jjnr[k]; j3A = 3*jnrA; GMX_MM_LOAD_1RVEC_1POINTER_PD(x+j3A,jx,jy,jz); dx = _mm_sub_sd(ix,jx); dy = _mm_sub_sd(iy,jy); dz = _mm_sub_sd(iz,jz); GMX_MM_LOAD_1VALUE_PD(rb+jnrA,rbaj); /* load chain rule terms */ f_gb = _mm_load_pd(dadx); dadx += 2; f_gb_ai = _mm_load_pd(dadx); dadx += 2; /* calculate scalar force */ f_gb = _mm_mul_sd(f_gb,rbai); f_gb_ai = _mm_mul_sd(f_gb_ai,rbaj); f_gb = _mm_add_sd(f_gb,f_gb_ai); tx = _mm_mul_sd(f_gb,dx); ty = _mm_mul_sd(f_gb,dy); tz = _mm_mul_sd(f_gb,dz); fix = _mm_add_sd(fix,tx); fiy = _mm_add_sd(fiy,ty); fiz = _mm_add_sd(fiz,tz); GMX_MM_DECREMENT_1RVEC_1POINTER_PD(f+j3A,tx,ty,tz); } /* fix/fiy/fiz now contain four partial force terms, that all should be * added to the i particle forces and shift forces. */ gmx_mm_update_iforce_1atom_pd(&fix,&fiy,&fiz,f+ii3,fshift+is3); } return 0; }
void m2l_along_z(long long nmultipoles, double *scr1, double *scr2, double *d2, double *fr, double *sg) { int mmmm,mmm,mm,m; int i,j,k,l,n,nn; __m128d reg00,reg01,reg02,reg03; __m128d reg04,reg05,reg06,reg07; __m128d reg08,reg09,reg10,reg11; __m128d reg12,reg13,reg14,reg15; __m128d reg16,reg17; /* register for rotation matrix TODO: rename regdmat1,regdmat2*/ __m128d reg18,reg19; /* register for g,gl,glm */ i = -15; __m128d regzero = _mm_setzero_pd(); reg08 = regzero; reg09 = regzero; reg10 = regzero; reg11 = regzero; for(j=0;j<=nmultipoles;++j) { i += 16; reg00 = _mm_load_pd(&scr2[i-1]); reg01 = _mm_load_pd(&scr2[i+1]); reg04 = _mm_load_pd(&scr2[i+7]); reg05 = _mm_load_pd(&scr2[i+9]); reg18 = _mm_load1_pd(&fr[j]); reg08 = _mm_add_pd(reg08,_mm_mul_pd(reg00,reg18)); reg09 = _mm_add_pd(reg09,_mm_mul_pd(reg01,reg18)); reg12 = _mm_add_pd(reg12,_mm_mul_pd(reg04,reg18)); reg13 = _mm_add_pd(reg13,_mm_mul_pd(reg05,reg18)); } _mm_store_pd(&scr1[ 0],reg12); _mm_store_pd(&scr1[ 2],reg13); _mm_store_pd(&scr1[ 4],regzero); _mm_store_pd(&scr1[ 6],regzero); _mm_store_pd(&scr1[ 8],reg08); _mm_store_pd(&scr1[10],reg09); _mm_store_pd(&scr1[12],regzero); _mm_store_pd(&scr1[14],regzero); i = 1; for(l=1;l<=nmultipoles;++l) { i += 16 * l; j = -15; k = nmultipoles+l; reg08 = regzero; reg09 = regzero; reg12 = regzero; reg13 = regzero; for(m=l;m<=k;++m) { j += 16; reg00 = _mm_load_pd(&scr2[j-1]); reg01 = _mm_load_pd(&scr2[j+1]); reg04 = _mm_load_pd(&scr2[j+7]); reg05 = _mm_load_pd(&scr2[j+9]); reg18 = _mm_load1_pd(&fr[m]); reg08 = _mm_add_pd(reg08,_mm_mul_pd(reg00,reg18)); reg09 = _mm_add_pd(reg09,_mm_mul_pd(reg01,reg18)); reg12 = _mm_add_pd(reg12,_mm_mul_pd(reg04,reg18)); reg13 = _mm_add_pd(reg13,_mm_mul_pd(reg05,reg18)); } reg18 = _mm_load1_pd(&sg[l]); reg12 = _mm_mul_pd(reg12,reg18); _mm_store_pd(&scr1[i- 1],reg12); reg13 = _mm_mul_pd(reg13,reg18); _mm_store_pd(&scr1[i+ 1],reg13); _mm_store_pd(&scr1[i+ 3],regzero); _mm_store_pd(&scr1[i+ 5],regzero); reg08 = _mm_mul_pd(reg08,reg18); _mm_store_pd(&scr1[i+ 7],reg08); reg09 = _mm_mul_pd(reg09,reg18); _mm_store_pd(&scr1[i+ 9],reg09); _mm_store_pd(&scr1[i+11],regzero); _mm_store_pd(&scr1[i+13],regzero); } mm = 16 * nmultipoles; i = 1; n = mm+1; for(m=1;m<=nmultipoles;++m) { i += 16 * m; j = i; for(l=m;l<=nmultipoles;++l) { j += 16 * l; nn = n; k = m + l; mmm = nmultipoles + l; reg08 = regzero; reg09 = regzero; reg10 = regzero; reg11 = regzero; reg12 = regzero; reg13 = regzero; reg14 = regzero; reg15 = regzero; for(mmmm=k;mmmm<=mmm;++mmmm) { nn += 16; reg00 = _mm_load_pd(&scr2[nn- 1]); reg01 = _mm_load_pd(&scr2[nn+ 1]); reg02 = _mm_load_pd(&scr2[nn+ 3]); reg03 = _mm_load_pd(&scr2[nn+ 5]); reg04 = _mm_load_pd(&scr2[nn+ 7]); reg05 = _mm_load_pd(&scr2[nn+ 9]); reg06 = _mm_load_pd(&scr2[nn+11]); reg07 = _mm_load_pd(&scr2[nn+13]); reg18 = _mm_load1_pd(&fr[mmmm]); reg08 = _mm_add_pd(reg08,_mm_mul_pd(reg00,reg18)); reg09 = _mm_add_pd(reg09,_mm_mul_pd(reg01,reg18)); reg10 = _mm_sub_pd(reg10,_mm_mul_pd(reg02,reg18)); reg11 = _mm_sub_pd(reg11,_mm_mul_pd(reg03,reg18)); reg12 = _mm_add_pd(reg12,_mm_mul_pd(reg04,reg18)); reg13 = _mm_add_pd(reg13,_mm_mul_pd(reg05,reg18)); reg14 = _mm_sub_pd(reg14,_mm_mul_pd(reg06,reg18)); reg15 = _mm_sub_pd(reg15,_mm_mul_pd(reg07,reg18)); } reg18 = _mm_load1_pd(&sg[k]); reg12 = _mm_mul_pd(reg12,reg18); _mm_store_pd(&scr1[j- 1],reg12); reg13 = _mm_mul_pd(reg13,reg18); _mm_store_pd(&scr1[j+ 1],reg13); reg14 = _mm_mul_pd(reg14,reg18); _mm_store_pd(&scr1[j+ 3],reg14); reg15 = _mm_mul_pd(reg15,reg18); _mm_store_pd(&scr1[j+ 5],reg15); reg08 = _mm_mul_pd(reg08,reg18); _mm_store_pd(&scr1[j+ 7],reg08); reg09 = _mm_mul_pd(reg09,reg18); _mm_store_pd(&scr1[j+ 9],reg09); reg10 = _mm_mul_pd(reg10,reg18); _mm_store_pd(&scr1[j+11],reg10); reg11 = _mm_mul_pd(reg11,reg18); _mm_store_pd(&scr1[j+13],reg11); } n += mm; mm -= 16; } }
static inline __m128d my_invrsq_pd(__m128d x) { const __m128d three = (const __m128d) {3.0f, 3.0f}; const __m128d half = (const __m128d) {0.5f, 0.5f}; __m128 t = _mm_rsqrt_ps(_mm_cvtpd_ps(x)); /* Convert to single precision and do _mm_rsqrt_ps() */ __m128d t1 = _mm_cvtps_pd(t); /* Convert back to double precision */ /* First Newton-Rapson step, accuracy is now 24 bits */ __m128d t2 = _mm_mul_pd(half,_mm_mul_pd(t1,_mm_sub_pd(three,_mm_mul_pd(x,_mm_mul_pd(t1,t1))))); /* Return second Newton-Rapson step, accuracy 48 bits */ return (__m128d) _mm_mul_pd(half,_mm_mul_pd(t2,_mm_sub_pd(three,_mm_mul_pd(x,_mm_mul_pd(t2,t2))))); } /* to extract single integers from a __m128i datatype */ #define _mm_extract_epi64(x, imm) \ _mm_cvtsi128_si32(_mm_srli_si128((x), 4 * (imm))) void nb_kernel400_x86_64_sse2(int * p_nri, int * iinr, int * jindex, int * jjnr, int * shift, double * shiftvec, double * fshift, int * gid, double * pos, double * faction, double * charge, double * p_facel, double * p_krf, double * p_crf, double * Vc, int * type, int * p_ntype, double * vdwparam, double * Vvdw, double * p_tabscale, double * VFtab, double * invsqrta, double * dvda, double * p_gbtabscale, double * GBtab, int * p_nthreads, int * count, void * mtx, int * outeriter, int * inneriter, double * work) { int nri,ntype,nthreads,offset; int n,ii,is3,ii3,k,nj0,nj1,jnr1,jnr2,j13,j23,ggid; double facel,krf,crf,tabscl,gbtabscl,vct,vgbt; double shX,shY,shZ,isai_d,dva; gmx_gbdata_t *gbdata; float * gpol; __m128d ix,iy,iz,jx,jy,jz; __m128d dx,dy,dz,t1,t2,t3; __m128d fix,fiy,fiz,rsq11,rinv,r,fscal,rt,eps,eps2; __m128d q,iq,qq,isai,isaj,isaprod,vcoul,gbscale,dvdai,dvdaj; __m128d Y,F,G,H,Fp,VV,FF,vgb,fijC,dvdatmp,dvdasum,vctot,vgbtot,n0d; __m128d xmm0,xmm1,xmm2,xmm3,xmm4,xmm5,xmm6,xmm7,xmm8; __m128d fac,tabscale,gbtabscale; __m128i n0,nnn; const __m128d neg = {-1.0f,-1.0f}; const __m128d zero = {0.0f,0.0f}; const __m128d half = {0.5f,0.5f}; const __m128d two = {2.0f,2.0f}; const __m128d three = {3.0f,3.0f}; gbdata = (gmx_gbdata_t *)work; gpol = gbdata->gpol; nri = *p_nri; ntype = *p_ntype; nthreads = *p_nthreads; facel = (*p_facel) * (1.0 - (1.0/gbdata->gb_epsilon_solvent)); krf = *p_krf; crf = *p_crf; tabscl = *p_tabscale; gbtabscl = *p_gbtabscale; nj1 = 0; /* Splat variables */ fac = _mm_load1_pd(&facel); tabscale = _mm_load1_pd(&tabscl); gbtabscale = _mm_load1_pd(&gbtabscl); /* Keep compiler happy */ dvdatmp = _mm_setzero_pd(); vgb = _mm_setzero_pd(); dvdaj = _mm_setzero_pd(); isaj = _mm_setzero_pd(); vcoul = _mm_setzero_pd(); t1 = _mm_setzero_pd(); t2 = _mm_setzero_pd(); t3 = _mm_setzero_pd(); jnr1=jnr2=0; j13=j23=0; for(n=0;n<nri;n++) { is3 = 3*shift[n]; shX = shiftvec[is3]; shY = shiftvec[is3+1]; shZ = shiftvec[is3+2]; nj0 = jindex[n]; nj1 = jindex[n+1]; offset = (nj1-nj0)%2; ii = iinr[n]; ii3 = ii*3; ix = _mm_set1_pd(shX+pos[ii3+0]); iy = _mm_set1_pd(shX+pos[ii3+1]); iz = _mm_set1_pd(shX+pos[ii3+2]); q = _mm_set1_pd(charge[ii]); iq = _mm_mul_pd(fac,q); isai_d = invsqrta[ii]; isai = _mm_load1_pd(&isai_d); fix = _mm_setzero_pd(); fiy = _mm_setzero_pd(); fiz = _mm_setzero_pd(); dvdasum = _mm_setzero_pd(); vctot = _mm_setzero_pd(); vgbtot = _mm_setzero_pd(); for(k=nj0;k<nj1-offset; k+=2) { jnr1 = jjnr[k]; jnr2 = jjnr[k+1]; j13 = jnr1 * 3; j23 = jnr2 * 3; /* Load coordinates */ xmm1 = _mm_loadu_pd(pos+j13); /* x1 y1 */ xmm2 = _mm_loadu_pd(pos+j23); /* x2 y2 */ xmm5 = _mm_load_sd(pos+j13+2); /* z1 - */ xmm6 = _mm_load_sd(pos+j23+2); /* z2 - */ /* transpose */ jx = _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(0,0)); jy = _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(1,1)); jz = _mm_shuffle_pd(xmm5,xmm6,_MM_SHUFFLE2(0,0)); /* distances */ dx = _mm_sub_pd(ix,jx); dy = _mm_sub_pd(iy,jy); dz = _mm_sub_pd(iz,jz); rsq11 = _mm_add_pd( _mm_add_pd( _mm_mul_pd(dx,dx) , _mm_mul_pd(dy,dy) ) , _mm_mul_pd(dz,dz) ); rinv = my_invrsq_pd(rsq11); /* Load invsqrta */ isaj = _mm_loadl_pd(isaj,invsqrta+jnr1); isaj = _mm_loadh_pd(isaj,invsqrta+jnr2); isaprod = _mm_mul_pd(isai,isaj); /* Load charges */ q = _mm_loadl_pd(q,charge+jnr1); q = _mm_loadh_pd(q,charge+jnr2); qq = _mm_mul_pd(iq,q); vcoul = _mm_mul_pd(qq,rinv); fscal = _mm_mul_pd(vcoul,rinv); qq = _mm_mul_pd(isaprod,qq); qq = _mm_mul_pd(qq,neg); gbscale = _mm_mul_pd(isaprod,gbtabscale); /* Load dvdaj */ dvdaj = _mm_loadl_pd(dvdaj, dvda+jnr1); dvdaj = _mm_loadh_pd(dvdaj, dvda+jnr2); r = _mm_mul_pd(rsq11,rinv); rt = _mm_mul_pd(r,gbscale); n0 = _mm_cvttpd_epi32(rt); n0d = _mm_cvtepi32_pd(n0); eps = _mm_sub_pd(rt,n0d); eps2 = _mm_mul_pd(eps,eps); nnn = _mm_slli_epi64(n0,2); xmm1 = _mm_load_pd(GBtab+(_mm_extract_epi64(nnn,0))); /* Y1 F1 */ xmm2 = _mm_load_pd(GBtab+(_mm_extract_epi64(nnn,1))); /* Y2 F2 */ xmm3 = _mm_load_pd(GBtab+(_mm_extract_epi64(nnn,0))+2); /* G1 H1 */ xmm4 = _mm_load_pd(GBtab+(_mm_extract_epi64(nnn,1))+2); /* G2 H2 */ Y = _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(0,0)); /* Y1 Y2 */ F = _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(1,1)); /* F1 F2 */ G = _mm_shuffle_pd(xmm3,xmm4,_MM_SHUFFLE2(0,0)); /* G1 G2 */ H = _mm_shuffle_pd(xmm3,xmm4,_MM_SHUFFLE2(1,1)); /* H1 H2 */ G = _mm_mul_pd(G,eps); H = _mm_mul_pd(H,eps2); Fp = _mm_add_pd(F,G); Fp = _mm_add_pd(Fp,H); VV = _mm_mul_pd(Fp,eps); VV = _mm_add_pd(Y,VV); H = _mm_mul_pd(two,H); FF = _mm_add_pd(Fp,G); FF = _mm_add_pd(FF,H); vgb = _mm_mul_pd(qq,VV); fijC = _mm_mul_pd(qq,FF); fijC = _mm_mul_pd(fijC,gbscale); dvdatmp = _mm_mul_pd(fijC,r); dvdatmp = _mm_add_pd(vgb,dvdatmp); dvdatmp = _mm_mul_pd(dvdatmp,neg); dvdatmp = _mm_mul_pd(dvdatmp,half); dvdasum = _mm_add_pd(dvdasum,dvdatmp); xmm1 = _mm_mul_pd(dvdatmp,isaj); xmm1 = _mm_mul_pd(xmm1,isaj); dvdaj = _mm_add_pd(dvdaj,xmm1); /* store dvda */ _mm_storel_pd(dvda+jnr1,dvdaj); _mm_storeh_pd(dvda+jnr2,dvdaj); vctot = _mm_add_pd(vctot,vcoul); vgbtot = _mm_add_pd(vgbtot,vgb); fscal = _mm_sub_pd(fijC,fscal); fscal = _mm_mul_pd(fscal,neg); fscal = _mm_mul_pd(fscal,rinv); /* calculate partial force terms */ t1 = _mm_mul_pd(fscal,dx); t2 = _mm_mul_pd(fscal,dy); t3 = _mm_mul_pd(fscal,dz); /* update the i force */ fix = _mm_add_pd(fix,t1); fiy = _mm_add_pd(fiy,t2); fiz = _mm_add_pd(fiz,t3); /* accumulate forces from memory */ xmm1 = _mm_loadu_pd(faction+j13); /* fx1 fy1 */ xmm2 = _mm_loadu_pd(faction+j23); /* fx2 fy2 */ xmm5 = _mm_load1_pd(faction+j13+2); /* fz1 fz1 */ xmm6 = _mm_load1_pd(faction+j23+2); /* fz2 fz2 */ /* transpose */ xmm7 = _mm_shuffle_pd(xmm5,xmm6,_MM_SHUFFLE2(0,0)); /* fz1 fz2 */ xmm5 = _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(0,0)); /* fx1 fx2 */ xmm6 = _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(1,1)); /* fy1 fy2 */ /* subtract partial forces */ xmm5 = _mm_sub_pd(xmm5,t1); xmm6 = _mm_sub_pd(xmm6,t2); xmm7 = _mm_sub_pd(xmm7,t3); xmm1 = _mm_shuffle_pd(xmm5,xmm6,_MM_SHUFFLE2(0,0)); /* fx1 fy1 */ xmm2 = _mm_shuffle_pd(xmm5,xmm6,_MM_SHUFFLE2(1,1)); /* fy1 fy2 */ /* store fx and fy */ _mm_storeu_pd(faction+j13,xmm1); _mm_storeu_pd(faction+j23,xmm2); /* .. then fz */ _mm_storel_pd(faction+j13+2,xmm7); _mm_storel_pd(faction+j23+2,xmm7); } /* In double precision, offset can only be either 0 or 1 */ if(offset!=0) { jnr1 = jjnr[k]; j13 = jnr1*3; jx = _mm_load_sd(pos+j13); jy = _mm_load_sd(pos+j13+1); jz = _mm_load_sd(pos+j13+2); isaj = _mm_load_sd(invsqrta+jnr1); isaprod = _mm_mul_sd(isai,isaj); dvdaj = _mm_load_sd(dvda+jnr1); q = _mm_load_sd(charge+jnr1); qq = _mm_mul_sd(iq,q); dx = _mm_sub_sd(ix,jx); dy = _mm_sub_sd(iy,jy); dz = _mm_sub_sd(iz,jz); rsq11 = _mm_add_pd( _mm_add_pd( _mm_mul_pd(dx,dx) , _mm_mul_pd(dy,dy) ) , _mm_mul_pd(dz,dz) ); rinv = my_invrsq_pd(rsq11); vcoul = _mm_mul_sd(qq,rinv); fscal = _mm_mul_sd(vcoul,rinv); qq = _mm_mul_sd(isaprod,qq); qq = _mm_mul_sd(qq,neg); gbscale = _mm_mul_sd(isaprod,gbtabscale); r = _mm_mul_sd(rsq11,rinv); rt = _mm_mul_sd(r,gbscale); n0 = _mm_cvttpd_epi32(rt); n0d = _mm_cvtepi32_pd(n0); eps = _mm_sub_sd(rt,n0d); eps2 = _mm_mul_sd(eps,eps); nnn = _mm_slli_epi64(n0,2); xmm1 = _mm_load_pd(GBtab+(_mm_extract_epi64(nnn,0))); xmm2 = _mm_load_pd(GBtab+(_mm_extract_epi64(nnn,1))); xmm3 = _mm_load_pd(GBtab+(_mm_extract_epi64(nnn,0))+2); xmm4 = _mm_load_pd(GBtab+(_mm_extract_epi64(nnn,1))+2); Y = _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(0,0)); F = _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(1,1)); G = _mm_shuffle_pd(xmm3,xmm4,_MM_SHUFFLE2(0,0)); H = _mm_shuffle_pd(xmm3,xmm4,_MM_SHUFFLE2(1,1)); G = _mm_mul_sd(G,eps); H = _mm_mul_sd(H,eps2); Fp = _mm_add_sd(F,G); Fp = _mm_add_sd(Fp,H); VV = _mm_mul_sd(Fp,eps); VV = _mm_add_sd(Y,VV); H = _mm_mul_sd(two,H); FF = _mm_add_sd(Fp,G); FF = _mm_add_sd(FF,H); vgb = _mm_mul_sd(qq,VV); fijC = _mm_mul_sd(qq,FF); fijC = _mm_mul_sd(fijC,gbscale); dvdatmp = _mm_mul_sd(fijC,r); dvdatmp = _mm_add_sd(vgb,dvdatmp); dvdatmp = _mm_mul_sd(dvdatmp,neg); dvdatmp = _mm_mul_sd(dvdatmp,half); dvdasum = _mm_add_sd(dvdasum,dvdatmp); xmm1 = _mm_mul_sd(dvdatmp,isaj); xmm1 = _mm_mul_sd(xmm1,isaj); dvdaj = _mm_add_sd(dvdaj,xmm1); /* store dvda */ _mm_storel_pd(dvda+jnr1,dvdaj); vctot = _mm_add_sd(vctot,vcoul); vgbtot = _mm_add_sd(vgbtot,vgb); fscal = _mm_sub_sd(fijC,fscal); fscal = _mm_mul_sd(fscal,neg); fscal = _mm_mul_sd(fscal,rinv); /* calculate partial force terms */ t1 = _mm_mul_sd(fscal,dx); t2 = _mm_mul_sd(fscal,dy); t3 = _mm_mul_sd(fscal,dz); /* update the i force */ fix = _mm_add_sd(fix,t1); fiy = _mm_add_sd(fiy,t2); fiz = _mm_add_sd(fiz,t3); /* accumulate forces from memory */ xmm5 = _mm_load_sd(faction+j13); /* fx */ xmm6 = _mm_load_sd(faction+j13+1); /* fy */ xmm7 = _mm_load_sd(faction+j13+2); /* fz */ /* subtract partial forces */ xmm5 = _mm_sub_sd(xmm5,t1); xmm6 = _mm_sub_sd(xmm6,t2); xmm7 = _mm_sub_sd(xmm7,t3); /* store forces */ _mm_store_sd(faction+j13,xmm5); _mm_store_sd(faction+j13+1,xmm6); _mm_store_sd(faction+j13+2,xmm7); } /* fix/fiy/fiz now contain four partial terms, that all should be * added to the i particle forces */ t1 = _mm_unpacklo_pd(t1,fix); t2 = _mm_unpacklo_pd(t2,fiy); t3 = _mm_unpacklo_pd(t3,fiz); fix = _mm_add_pd(fix,t1); fiy = _mm_add_pd(fiy,t2); fiz = _mm_add_pd(fiz,t3); fix = _mm_shuffle_pd(fix,fix,_MM_SHUFFLE2(1,1)); fiy = _mm_shuffle_pd(fiy,fiy,_MM_SHUFFLE2(1,1)); fiz = _mm_shuffle_pd(fiz,fiz,_MM_SHUFFLE2(1,1)); /* Load i forces from memory */ xmm1 = _mm_load_sd(faction+ii3); xmm2 = _mm_load_sd(faction+ii3+1); xmm3 = _mm_load_sd(faction+ii3+2); /* Add to i force */ fix = _mm_add_sd(fix,xmm1); fiy = _mm_add_sd(fiy,xmm2); fiz = _mm_add_sd(fiz,xmm3); /* store i forces to memory */ _mm_store_sd(faction+ii3,fix); _mm_store_sd(faction+ii3+1,fiy); _mm_store_sd(faction+ii3+2,fiz); /* now do dvda */ dvdatmp = _mm_unpacklo_pd(dvdatmp,dvdasum); dvdasum = _mm_add_pd(dvdasum,dvdatmp); _mm_storeh_pd(&dva,dvdasum); dvda[ii] = dvda[ii] + dva*isai_d*isai_d; ggid = gid[n]; /* Coulomb potential */ vcoul = _mm_unpacklo_pd(vcoul,vctot); vctot = _mm_add_pd(vctot,vcoul); _mm_storeh_pd(&vct,vctot); Vc[ggid] = Vc[ggid] + vct; /* GB potential */ vgb = _mm_unpacklo_pd(vgb,vgbtot); vgbtot = _mm_add_pd(vgbtot,vgb); _mm_storeh_pd(&vgbt,vgbtot); gpol[ggid] = gpol[ggid] + vgbt; } *outeriter = nri; *inneriter = nj1; }
void exchlaplacecoeffData_7(unsigned int slot) { for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((!neighbor_isValid[1][0])) { { double xPos; double yPos; /* Statements in this Scop: S956, S958, S952, S955, S960, S954, S957, S951, S959, S950, S953 */ { { { { { { { { { { { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i1 = 1; for (; (i1<=128); i1 += 2) { fieldData_LaplaceCoeff_7_p1[((i1*132)+138338)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[((i1*132)+138470)] = 0.000000e+00; } for (; (i1<=129); i1 += 1) { fieldData_LaplaceCoeff_7_p1[((i1*132)+138338)] = 0.000000e+00; } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i1 = 1; for (; (i1<=128); i1 += 2) { fieldData_LaplaceCoeff_7_p1[((i1*132)+34586)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[((i1*132)+34718)] = 0.000000e+00; } for (; (i1<=129); i1 += 1) { fieldData_LaplaceCoeff_7_p1[((i1*132)+34586)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i1 = 1; for (; (i1<=128); i1 += 2) { fieldData_LaplaceCoeff_7_p1[((i1*132)+103754)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[((i1*132)+103886)] = 0.000000e+00; } for (; (i1<=129); i1 += 1) { fieldData_LaplaceCoeff_7_p1[((i1*132)+103754)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i1 = 1; for (; (i1<=128); i1 += 2) { fieldData_LaplaceCoeff_7_p1[((i1*132)+2)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[((i1*132)+134)] = 0.000000e+00; } for (; (i1<=129); i1 += 1) { fieldData_LaplaceCoeff_7_p1[((i1*132)+2)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i1 = 1; for (; (i1<=128); i1 += 2) { fieldData_LaplaceCoeff_7_p1[((i1*132)+69170)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[((i1*132)+69302)] = 0.000000e+00; } for (; (i1<=129); i1 += 1) { fieldData_LaplaceCoeff_7_p1[((i1*132)+69170)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i1 = 1; for (; (i1<=128); i1 += 2) { fieldData_LaplaceCoeff_7_p1[((i1*132)+121046)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[((i1*132)+121178)] = 0.000000e+00; } for (; (i1<=129); i1 += 1) { fieldData_LaplaceCoeff_7_p1[((i1*132)+121046)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i1 = 1; for (; (i1<=128); i1 += 2) { fieldData_LaplaceCoeff_7_p1[((i1*132)+51878)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[((i1*132)+52010)] = 0.000000e+00; } for (; (i1<=129); i1 += 1) { fieldData_LaplaceCoeff_7_p1[((i1*132)+51878)] = 0.000000e+00; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { xPos = posBegin[0]; } __m128d vec1 = _mm_set1_pd(xPos); for (; (i1<127); i1 += 4) { /* xPos = posBegin[0]; */ __m128d vec0 = _mm_load1_pd((&posBegin[0])); __m128d vec0_2 = _mm_load1_pd((&posBegin[0])); vec1 = vec0; vec1 = vec0_2; } for (; (i1<130); i1 += 1) { xPos = posBegin[0]; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i1 = 1; for (; (i1<=128); i1 += 2) { fieldData_LaplaceCoeff_7_p1[((i1*132)+86462)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[((i1*132)+86594)] = 0.000000e+00; } for (; (i1<=129); i1 += 1) { fieldData_LaplaceCoeff_7_p1[((i1*132)+86462)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i1 = 1; for (; (i1<=128); i1 += 2) { fieldData_LaplaceCoeff_7_p1[((i1*132)+17294)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[((i1*132)+17426)] = 0.000000e+00; } for (; (i1<=129); i1 += 1) { fieldData_LaplaceCoeff_7_p1[((i1*132)+17294)] = 0.000000e+00; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { yPos = ((((i1-1)/1.280000e+02)*(posEnd[1]-posBegin[1]))+posBegin[1]); } __m128d vec1 = _mm_set1_pd(1.000000e+00); __m128d vec2 = _mm_set1_pd(1.280000e+02); __m128d vec5 = _mm_set1_pd(yPos); for (; (i1<127); i1 += 4) { /* yPos = ((((i1-1)/1.280000e+02)*(posEnd[1]-posBegin[1]))+posBegin[1]); */ __m128d vec0 = _mm_set_pd(i1+1,i1); __m128d vec0_2 = _mm_set_pd(i1+1,i1); __m128d vec3 = _mm_load1_pd((&posEnd[1])); __m128d vec3_2 = _mm_load1_pd((&posEnd[1])); __m128d vec4 = _mm_load1_pd((&posBegin[1])); __m128d vec4_2 = _mm_load1_pd((&posBegin[1])); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2); } for (; (i1<130); i1 += 1) { yPos = ((((i1-1)/1.280000e+02)*(posEnd[1]-posBegin[1]))+posBegin[1]); } } } } } if ((!neighbor_isValid[1][1])) { { double xPos; double yPos; /* Statements in this Scop: S962, S961, S970, S964, S967, S966, S969, S963, S971, S965, S968 */ { { { { { { { { { { { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i1 = 1; for (; (i1<=128); i1 += 2) { fieldData_LaplaceCoeff_7_p1[((i1*132)+121174)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[((i1*132)+121306)] = 0.000000e+00; } for (; (i1<=129); i1 += 1) { fieldData_LaplaceCoeff_7_p1[((i1*132)+121174)] = 0.000000e+00; } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i1 = 1; for (; (i1<=128); i1 += 2) { fieldData_LaplaceCoeff_7_p1[((i1*132)+138466)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[((i1*132)+138598)] = 0.000000e+00; } for (; (i1<=129); i1 += 1) { fieldData_LaplaceCoeff_7_p1[((i1*132)+138466)] = 0.000000e+00; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { yPos = ((((i1-1)/1.280000e+02)*(posEnd[1]-posBegin[1]))+posBegin[1]); } __m128d vec1 = _mm_set1_pd(1.000000e+00); __m128d vec2 = _mm_set1_pd(1.280000e+02); __m128d vec5 = _mm_set1_pd(yPos); for (; (i1<127); i1 += 4) { /* yPos = ((((i1-1)/1.280000e+02)*(posEnd[1]-posBegin[1]))+posBegin[1]); */ __m128d vec0 = _mm_set_pd(i1+1,i1); __m128d vec0_2 = _mm_set_pd(i1+1,i1); __m128d vec3 = _mm_load1_pd((&posEnd[1])); __m128d vec3_2 = _mm_load1_pd((&posEnd[1])); __m128d vec4 = _mm_load1_pd((&posBegin[1])); __m128d vec4_2 = _mm_load1_pd((&posBegin[1])); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2); } for (; (i1<130); i1 += 1) { yPos = ((((i1-1)/1.280000e+02)*(posEnd[1]-posBegin[1]))+posBegin[1]); } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i1 = 1; for (; (i1<=128); i1 += 2) { fieldData_LaplaceCoeff_7_p1[((i1*132)+103882)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[((i1*132)+104014)] = 0.000000e+00; } for (; (i1<=129); i1 += 1) { fieldData_LaplaceCoeff_7_p1[((i1*132)+103882)] = 0.000000e+00; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { xPos = posEnd[0]; } __m128d vec1 = _mm_set1_pd(xPos); for (; (i1<127); i1 += 4) { /* xPos = posEnd[0]; */ __m128d vec0 = _mm_load1_pd((&posEnd[0])); __m128d vec0_2 = _mm_load1_pd((&posEnd[0])); vec1 = vec0; vec1 = vec0_2; } for (; (i1<130); i1 += 1) { xPos = posEnd[0]; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i1 = 1; for (; (i1<=128); i1 += 2) { fieldData_LaplaceCoeff_7_p1[((i1*132)+130)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[((i1*132)+262)] = 0.000000e+00; } for (; (i1<=129); i1 += 1) { fieldData_LaplaceCoeff_7_p1[((i1*132)+130)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i1 = 1; for (; (i1<=128); i1 += 2) { fieldData_LaplaceCoeff_7_p1[((i1*132)+69298)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[((i1*132)+69430)] = 0.000000e+00; } for (; (i1<=129); i1 += 1) { fieldData_LaplaceCoeff_7_p1[((i1*132)+69298)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i1 = 1; for (; (i1<=128); i1 += 2) { fieldData_LaplaceCoeff_7_p1[((i1*132)+52006)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[((i1*132)+52138)] = 0.000000e+00; } for (; (i1<=129); i1 += 1) { fieldData_LaplaceCoeff_7_p1[((i1*132)+52006)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i1 = 1; for (; (i1<=128); i1 += 2) { fieldData_LaplaceCoeff_7_p1[((i1*132)+86590)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[((i1*132)+86722)] = 0.000000e+00; } for (; (i1<=129); i1 += 1) { fieldData_LaplaceCoeff_7_p1[((i1*132)+86590)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i1 = 1; for (; (i1<=128); i1 += 2) { fieldData_LaplaceCoeff_7_p1[((i1*132)+34714)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[((i1*132)+34846)] = 0.000000e+00; } for (; (i1<=129); i1 += 1) { fieldData_LaplaceCoeff_7_p1[((i1*132)+34714)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i1 = 1; for (; (i1<=128); i1 += 2) { fieldData_LaplaceCoeff_7_p1[((i1*132)+17422)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[((i1*132)+17554)] = 0.000000e+00; } for (; (i1<=129); i1 += 1) { fieldData_LaplaceCoeff_7_p1[((i1*132)+17422)] = 0.000000e+00; } } } } } if ((!neighbor_isValid[1][2])) { { double xPos; double yPos; /* Statements in this Scop: S982, S976, S979, S973, S972, S981, S975, S978, S977, S980, S974 */ { { { { { { { { { { { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i2 = 2; for (; (i2<=129); i2 += 2) { fieldData_LaplaceCoeff_7_p1[(i2+138468)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[(i2+138469)] = 0.000000e+00; } for (; (i2<=130); i2 += 1) { fieldData_LaplaceCoeff_7_p1[(i2+138468)] = 0.000000e+00; } } { int i2 = 2; for (; (i2<=129); i2 += 2) { xPos = ((((i2-2)/1.280000e+02)*(posEnd[0]-posBegin[0]))+posBegin[0]); xPos = ((((i2-1)/1.280000e+02)*(posEnd[0]-posBegin[0]))+posBegin[0]); } for (; (i2<=130); i2 += 1) { xPos = ((((i2-2)/1.280000e+02)*(posEnd[0]-posBegin[0]))+posBegin[0]); } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i2 = 2; for (; (i2<=129); i2 += 2) { fieldData_LaplaceCoeff_7_p1[(i2+34716)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[(i2+34717)] = 0.000000e+00; } for (; (i2<=130); i2 += 1) { fieldData_LaplaceCoeff_7_p1[(i2+34716)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i2 = 2; for (; (i2<=129); i2 += 2) { fieldData_LaplaceCoeff_7_p1[(i2+17424)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[(i2+17425)] = 0.000000e+00; } for (; (i2<=130); i2 += 1) { fieldData_LaplaceCoeff_7_p1[(i2+17424)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i2 = 2; for (; (i2<=129); i2 += 2) { fieldData_LaplaceCoeff_7_p1[(i2+132)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[(i2+133)] = 0.000000e+00; } for (; (i2<=130); i2 += 1) { fieldData_LaplaceCoeff_7_p1[(i2+132)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i2 = 2; for (; (i2<=129); i2 += 2) { fieldData_LaplaceCoeff_7_p1[(i2+86592)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[(i2+86593)] = 0.000000e+00; } for (; (i2<=130); i2 += 1) { fieldData_LaplaceCoeff_7_p1[(i2+86592)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i2 = 2; for (; (i2<=129); i2 += 2) { fieldData_LaplaceCoeff_7_p1[(i2+52008)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[(i2+52009)] = 0.000000e+00; } for (; (i2<=130); i2 += 1) { fieldData_LaplaceCoeff_7_p1[(i2+52008)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i2 = 2; for (; (i2<=129); i2 += 2) { fieldData_LaplaceCoeff_7_p1[(i2+103884)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[(i2+103885)] = 0.000000e+00; } for (; (i2<=130); i2 += 1) { fieldData_LaplaceCoeff_7_p1[(i2+103884)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i2 = 2; for (; (i2<=129); i2 += 2) { fieldData_LaplaceCoeff_7_p1[(i2+121176)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[(i2+121177)] = 0.000000e+00; } for (; (i2<=130); i2 += 1) { fieldData_LaplaceCoeff_7_p1[(i2+121176)] = 0.000000e+00; } } } { int i2 = 2; for (; (i2<=129); i2 += 2) { yPos = posBegin[1]; yPos = posBegin[1]; } for (; (i2<=130); i2 += 1) { yPos = posBegin[1]; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i2 = 2; for (; (i2<=129); i2 += 2) { fieldData_LaplaceCoeff_7_p1[(i2+69300)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[(i2+69301)] = 0.000000e+00; } for (; (i2<=130); i2 += 1) { fieldData_LaplaceCoeff_7_p1[(i2+69300)] = 0.000000e+00; } } } } } if ((!neighbor_isValid[1][3])) { { double xPos; double yPos; /* Statements in this Scop: S988, S991, S985, S990, S984, S993, S987, S983, S992, S986, S989 */ { { { { { { { { { { { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i2 = 2; for (; (i2<=129); i2 += 2) { fieldData_LaplaceCoeff_7_p1[(i2+138072)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[(i2+138073)] = 0.000000e+00; } for (; (i2<=130); i2 += 1) { fieldData_LaplaceCoeff_7_p1[(i2+138072)] = 0.000000e+00; } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i2 = 2; for (; (i2<=129); i2 += 2) { fieldData_LaplaceCoeff_7_p1[(i2+34320)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[(i2+34321)] = 0.000000e+00; } for (; (i2<=130); i2 += 1) { fieldData_LaplaceCoeff_7_p1[(i2+34320)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i2 = 2; for (; (i2<=129); i2 += 2) { fieldData_LaplaceCoeff_7_p1[(i2+155364)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[(i2+155365)] = 0.000000e+00; } for (; (i2<=130); i2 += 1) { fieldData_LaplaceCoeff_7_p1[(i2+155364)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i2 = 2; for (; (i2<=129); i2 += 2) { fieldData_LaplaceCoeff_7_p1[(i2+51612)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[(i2+51613)] = 0.000000e+00; } for (; (i2<=130); i2 += 1) { fieldData_LaplaceCoeff_7_p1[(i2+51612)] = 0.000000e+00; } } } { int i2 = 2; for (; (i2<=129); i2 += 2) { yPos = posEnd[1]; yPos = posEnd[1]; } for (; (i2<=130); i2 += 1) { yPos = posEnd[1]; } } } { int i2 = 2; for (; (i2<=129); i2 += 2) { xPos = ((((i2-2)/1.280000e+02)*(posEnd[0]-posBegin[0]))+posBegin[0]); xPos = ((((i2-1)/1.280000e+02)*(posEnd[0]-posBegin[0]))+posBegin[0]); } for (; (i2<=130); i2 += 1) { xPos = ((((i2-2)/1.280000e+02)*(posEnd[0]-posBegin[0]))+posBegin[0]); } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i2 = 2; for (; (i2<=129); i2 += 2) { fieldData_LaplaceCoeff_7_p1[(i2+103488)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[(i2+103489)] = 0.000000e+00; } for (; (i2<=130); i2 += 1) { fieldData_LaplaceCoeff_7_p1[(i2+103488)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i2 = 2; for (; (i2<=129); i2 += 2) { fieldData_LaplaceCoeff_7_p1[(i2+68904)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[(i2+68905)] = 0.000000e+00; } for (; (i2<=130); i2 += 1) { fieldData_LaplaceCoeff_7_p1[(i2+68904)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i2 = 2; for (; (i2<=129); i2 += 2) { fieldData_LaplaceCoeff_7_p1[(i2+17028)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[(i2+17029)] = 0.000000e+00; } for (; (i2<=130); i2 += 1) { fieldData_LaplaceCoeff_7_p1[(i2+17028)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i2 = 2; for (; (i2<=129); i2 += 2) { fieldData_LaplaceCoeff_7_p1[(i2+120780)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[(i2+120781)] = 0.000000e+00; } for (; (i2<=130); i2 += 1) { fieldData_LaplaceCoeff_7_p1[(i2+120780)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i2 = 2; for (; (i2<=129); i2 += 2) { fieldData_LaplaceCoeff_7_p1[(i2+86196)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[(i2+86197)] = 0.000000e+00; } for (; (i2<=130); i2 += 1) { fieldData_LaplaceCoeff_7_p1[(i2+86196)] = 0.000000e+00; } } } } } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) { /* Statements in this Scop: S994 */ for (int i3 = 0; (i3<=8); i3 += 1) { double* buffer_Send_1_p1 = (&buffer_Send[1][(i3*129)]); double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][(i3*17292)]); int i4 = 1; for (; (i4<=128); i4 += 2) { buffer_Send_1_p1[(i4-1)] = fieldData_LaplaceCoeff_7_p1[((i4*132)+130)]; buffer_Send_1_p1[i4] = fieldData_LaplaceCoeff_7_p1[((i4*132)+262)]; } for (; (i4<=129); i4 += 1) { buffer_Send_1_p1[(i4-1)] = fieldData_LaplaceCoeff_7_p1[((i4*132)+130)]; } } } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) { MPI_Isend(buffer_Send[1], 1161, MPI_DOUBLE, neighbor_remoteRank[1][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]); reqOutstanding_Send[1] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) { MPI_Irecv(buffer_Recv[0], 1161, MPI_DOUBLE, neighbor_remoteRank[1][0], ((unsigned int)(neighbor_fragCommId[1][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]); reqOutstanding_Recv[0] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Recv[0]) { waitForMPIReq(&mpiRequest_Recv[0]); reqOutstanding_Recv[0] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) { /* Statements in this Scop: S995 */ for (int i3 = 0; (i3<=8); i3 += 1) { double* buffer_Recv_0_p1 = (&buffer_Recv[0][(i3*129)]); double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][(i3*17292)]); int i4 = 3; for (; (i4<=130); i4 += 2) { fieldData_LaplaceCoeff_7_p1[((i4*132)-262)] = buffer_Recv_0_p1[(i4-3)]; fieldData_LaplaceCoeff_7_p1[((i4*132)-130)] = buffer_Recv_0_p1[(i4-2)]; } for (; (i4<=131); i4 += 1) { fieldData_LaplaceCoeff_7_p1[((i4*132)-262)] = buffer_Recv_0_p1[(i4-3)]; } } } } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Send[1]) { waitForMPIReq(&mpiRequest_Send[1]); reqOutstanding_Send[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { ; } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][3]&&neighbor_isRemote[1][3])) { MPI_Isend(&fieldData_LaplaceCoeff[7][17030], 1, mpiDatatype_9_129_17292, neighbor_remoteRank[1][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]); reqOutstanding_Send[3] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][2]&&neighbor_isRemote[1][2])) { MPI_Irecv(&fieldData_LaplaceCoeff[7][134], 1, mpiDatatype_9_129_17292, neighbor_remoteRank[1][2], ((unsigned int)(neighbor_fragCommId[1][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]); reqOutstanding_Recv[2] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Recv[2]) { waitForMPIReq(&mpiRequest_Recv[2]); reqOutstanding_Recv[2] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { ; } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Send[3]) { waitForMPIReq(&mpiRequest_Send[3]); reqOutstanding_Send[3] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) { /* Statements in this Scop: S996 */ for (int i3 = 0; (i3<=8); i3 += 1) { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][(i3*17292)]); double* buffer_Send_0_p1 = (&buffer_Send[0][(i3*131)]); int i4 = 0; for (; (i4<=129); i4 += 2) { buffer_Send_0_p1[i4] = fieldData_LaplaceCoeff_7_p1[((i4*132)+3)]; buffer_Send_0_p1[(i4+1)] = fieldData_LaplaceCoeff_7_p1[((i4*132)+135)]; } for (; (i4<=130); i4 += 1) { buffer_Send_0_p1[i4] = fieldData_LaplaceCoeff_7_p1[((i4*132)+3)]; } } } if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) { /* Statements in this Scop: S997 */ for (int i3 = 0; (i3<=8); i3 += 1) { double* buffer_Send_1_p1 = (&buffer_Send[1][(i3*131)]); double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][(i3*17292)]); int i4 = 0; for (; (i4<=129); i4 += 2) { buffer_Send_1_p1[i4] = fieldData_LaplaceCoeff_7_p1[((i4*132)+129)]; buffer_Send_1_p1[(i4+1)] = fieldData_LaplaceCoeff_7_p1[((i4*132)+261)]; } for (; (i4<=130); i4 += 1) { buffer_Send_1_p1[i4] = fieldData_LaplaceCoeff_7_p1[((i4*132)+129)]; } } } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) { MPI_Isend(buffer_Send[0], 1179, MPI_DOUBLE, neighbor_remoteRank[1][0], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][0]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[0]); reqOutstanding_Send[0] = true; } if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) { MPI_Isend(buffer_Send[1], 1179, MPI_DOUBLE, neighbor_remoteRank[1][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]); reqOutstanding_Send[1] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) { MPI_Irecv(buffer_Recv[0], 1179, MPI_DOUBLE, neighbor_remoteRank[1][0], ((unsigned int)(neighbor_fragCommId[1][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]); reqOutstanding_Recv[0] = true; } if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) { MPI_Irecv(buffer_Recv[1], 1179, MPI_DOUBLE, neighbor_remoteRank[1][1], ((unsigned int)(neighbor_fragCommId[1][1]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[1]); reqOutstanding_Recv[1] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Recv[0]) { waitForMPIReq(&mpiRequest_Recv[0]); reqOutstanding_Recv[0] = false; } if (reqOutstanding_Recv[1]) { waitForMPIReq(&mpiRequest_Recv[1]); reqOutstanding_Recv[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) { /* Statements in this Scop: S998 */ for (int i3 = 0; (i3<=8); i3 += 1) { double* buffer_Recv_0_p1 = (&buffer_Recv[0][(i3*131)]); double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][(i3*17292)]); int i4 = 1; for (; (i4<=130); i4 += 2) { fieldData_LaplaceCoeff_7_p1[((i4*132)-131)] = buffer_Recv_0_p1[(i4-1)]; fieldData_LaplaceCoeff_7_p1[((i4*132)+1)] = buffer_Recv_0_p1[i4]; } for (; (i4<=131); i4 += 1) { fieldData_LaplaceCoeff_7_p1[((i4*132)-131)] = buffer_Recv_0_p1[(i4-1)]; } } } if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) { /* Statements in this Scop: S999 */ for (int i3 = 0; (i3<=8); i3 += 1) { double* buffer_Recv_1_p1 = (&buffer_Recv[1][(i3*131)]); double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][(i3*17292)]); int i4 = 131; for (; (i4<=260); i4 += 2) { fieldData_LaplaceCoeff_7_p1[((i4*132)-17161)] = buffer_Recv_1_p1[(i4-131)]; fieldData_LaplaceCoeff_7_p1[((i4*132)-17029)] = buffer_Recv_1_p1[(i4-130)]; } for (; (i4<=261); i4 += 1) { fieldData_LaplaceCoeff_7_p1[((i4*132)-17161)] = buffer_Recv_1_p1[(i4-131)]; } } } } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Send[0]) { waitForMPIReq(&mpiRequest_Send[0]); reqOutstanding_Send[0] = false; } if (reqOutstanding_Send[1]) { waitForMPIReq(&mpiRequest_Send[1]); reqOutstanding_Send[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { ; ; } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][2]&&neighbor_isRemote[1][2])) { MPI_Isend(&fieldData_LaplaceCoeff[7][265], 1, mpiDatatype_9_131_17292, neighbor_remoteRank[1][2], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][2]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[2]); reqOutstanding_Send[2] = true; } if ((neighbor_isValid[1][3]&&neighbor_isRemote[1][3])) { MPI_Isend(&fieldData_LaplaceCoeff[7][16897], 1, mpiDatatype_9_131_17292, neighbor_remoteRank[1][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]); reqOutstanding_Send[3] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][2]&&neighbor_isRemote[1][2])) { MPI_Irecv(&fieldData_LaplaceCoeff[7][1], 1, mpiDatatype_9_131_17292, neighbor_remoteRank[1][2], ((unsigned int)(neighbor_fragCommId[1][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]); reqOutstanding_Recv[2] = true; } if ((neighbor_isValid[1][3]&&neighbor_isRemote[1][3])) { MPI_Irecv(&fieldData_LaplaceCoeff[7][17161], 1, mpiDatatype_9_131_17292, neighbor_remoteRank[1][3], ((unsigned int)(neighbor_fragCommId[1][3]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[3]); reqOutstanding_Recv[3] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Recv[2]) { waitForMPIReq(&mpiRequest_Recv[2]); reqOutstanding_Recv[2] = false; } if (reqOutstanding_Recv[3]) { waitForMPIReq(&mpiRequest_Recv[3]); reqOutstanding_Recv[3] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { ; ; } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Send[2]) { waitForMPIReq(&mpiRequest_Send[2]); reqOutstanding_Send[2] = false; } if (reqOutstanding_Send[3]) { waitForMPIReq(&mpiRequest_Send[3]); reqOutstanding_Send[3] = false; } } } }
void exchrhs_gmrfData_8(unsigned int slot) { for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((!neighbor_isValid[0][0])) { { double xPos; double yPos; /* Statements in this Scop: S589, S588, S590 */ { { { int i1 = 0; for (; (i1<(1&(~1))); i1 += 1) { yPos = (((i1/2.560000e+02)*(posEnd[1]-posBegin[1]))+posBegin[1]); } __m128d vec1 = _mm_set1_pd(2.560000e+02); __m128d vec4 = _mm_set1_pd(yPos); for (; (i1<254); i1 += 4) { /* yPos = (((i1/2.560000e+02)*(posEnd[1]-posBegin[1]))+posBegin[1]); */ __m128d vec0 = _mm_set_pd(i1+1,i1); __m128d vec0_2 = _mm_set_pd(i1+1,i1); __m128d vec2 = _mm_load1_pd((&posEnd[1])); __m128d vec2_2 = _mm_load1_pd((&posEnd[1])); __m128d vec3 = _mm_load1_pd((&posBegin[1])); __m128d vec3_2 = _mm_load1_pd((&posBegin[1])); vec4 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(vec0, vec1), _mm_sub_pd(vec2, vec3)), vec3); vec4 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(vec0_2, vec1), _mm_sub_pd(vec2_2, vec3_2)), vec3_2); } for (; (i1<257); i1 += 1) { yPos = (((i1/2.560000e+02)*(posEnd[1]-posBegin[1]))+posBegin[1]); } } { int i1 = 0; for (; (i1<(1&(~1))); i1 += 1) { xPos = posBegin[0]; } __m128d vec1 = _mm_set1_pd(xPos); for (; (i1<254); i1 += 4) { /* xPos = posBegin[0]; */ __m128d vec0 = _mm_load1_pd((&posBegin[0])); __m128d vec0_2 = _mm_load1_pd((&posBegin[0])); vec1 = vec0; vec1 = vec0_2; } for (; (i1<257); i1 += 1) { xPos = posBegin[0]; } } } { double* fieldData_RHS_GMRF_8_p1 = (&fieldData_RHS_GMRF[8][0]); int i1 = 0; for (; (i1<=255); i1 += 2) { fieldData_RHS_GMRF_8_p1[(i1*258)] = 0.000000e+00; fieldData_RHS_GMRF_8_p1[((i1*258)+258)] = 0.000000e+00; } for (; (i1<=256); i1 += 1) { fieldData_RHS_GMRF_8_p1[(i1*258)] = 0.000000e+00; } } } } } if ((!neighbor_isValid[0][1])) { { double xPos; double yPos; /* Statements in this Scop: S592, S591, S593 */ { { { int i1 = 0; for (; (i1<(1&(~1))); i1 += 1) { yPos = (((i1/2.560000e+02)*(posEnd[1]-posBegin[1]))+posBegin[1]); } __m128d vec1 = _mm_set1_pd(2.560000e+02); __m128d vec4 = _mm_set1_pd(yPos); for (; (i1<254); i1 += 4) { /* yPos = (((i1/2.560000e+02)*(posEnd[1]-posBegin[1]))+posBegin[1]); */ __m128d vec0 = _mm_set_pd(i1+1,i1); __m128d vec0_2 = _mm_set_pd(i1+1,i1); __m128d vec2 = _mm_load1_pd((&posEnd[1])); __m128d vec2_2 = _mm_load1_pd((&posEnd[1])); __m128d vec3 = _mm_load1_pd((&posBegin[1])); __m128d vec3_2 = _mm_load1_pd((&posBegin[1])); vec4 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(vec0, vec1), _mm_sub_pd(vec2, vec3)), vec3); vec4 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(vec0_2, vec1), _mm_sub_pd(vec2_2, vec3_2)), vec3_2); } for (; (i1<257); i1 += 1) { yPos = (((i1/2.560000e+02)*(posEnd[1]-posBegin[1]))+posBegin[1]); } } { double* fieldData_RHS_GMRF_8_p1 = (&fieldData_RHS_GMRF[8][0]); int i1 = 0; for (; (i1<=255); i1 += 2) { fieldData_RHS_GMRF_8_p1[((i1*258)+256)] = 0.000000e+00; fieldData_RHS_GMRF_8_p1[((i1*258)+514)] = 0.000000e+00; } for (; (i1<=256); i1 += 1) { fieldData_RHS_GMRF_8_p1[((i1*258)+256)] = 0.000000e+00; } } } { int i1 = 0; for (; (i1<(1&(~1))); i1 += 1) { xPos = posEnd[0]; } __m128d vec1 = _mm_set1_pd(xPos); for (; (i1<254); i1 += 4) { /* xPos = posEnd[0]; */ __m128d vec0 = _mm_load1_pd((&posEnd[0])); __m128d vec0_2 = _mm_load1_pd((&posEnd[0])); vec1 = vec0; vec1 = vec0_2; } for (; (i1<257); i1 += 1) { xPos = posEnd[0]; } } } } } if ((!neighbor_isValid[0][2])) { { double xPos; double yPos; /* Statements in this Scop: S595, S594, S596 */ { { { double* fieldData_RHS_GMRF_8_p1 = (&fieldData_RHS_GMRF[8][0]); int i2 = 0; for (; (i2<=255); i2 += 2) { fieldData_RHS_GMRF_8_p1[i2] = 0.000000e+00; fieldData_RHS_GMRF_8_p1[(i2+1)] = 0.000000e+00; } for (; (i2<=256); i2 += 1) { fieldData_RHS_GMRF_8_p1[i2] = 0.000000e+00; } } { int i2 = 0; for (; (i2<=255); i2 += 2) { xPos = (((i2/2.560000e+02)*(posEnd[0]-posBegin[0]))+posBegin[0]); xPos = ((((i2+1)/2.560000e+02)*(posEnd[0]-posBegin[0]))+posBegin[0]); } for (; (i2<=256); i2 += 1) { xPos = (((i2/2.560000e+02)*(posEnd[0]-posBegin[0]))+posBegin[0]); } } } { int i2 = 0; for (; (i2<=255); i2 += 2) { yPos = posBegin[1]; yPos = posBegin[1]; } for (; (i2<=256); i2 += 1) { yPos = posBegin[1]; } } } } } if ((!neighbor_isValid[0][3])) { { double xPos; double yPos; /* Statements in this Scop: S598, S597, S599 */ { { { int i2 = 0; for (; (i2<=255); i2 += 2) { xPos = (((i2/2.560000e+02)*(posEnd[0]-posBegin[0]))+posBegin[0]); xPos = ((((i2+1)/2.560000e+02)*(posEnd[0]-posBegin[0]))+posBegin[0]); } for (; (i2<=256); i2 += 1) { xPos = (((i2/2.560000e+02)*(posEnd[0]-posBegin[0]))+posBegin[0]); } } { int i2 = 0; for (; (i2<=255); i2 += 2) { yPos = posEnd[1]; yPos = posEnd[1]; } for (; (i2<=256); i2 += 1) { yPos = posEnd[1]; } } } { double* fieldData_RHS_GMRF_8_p1 = (&fieldData_RHS_GMRF[8][0]); int i2 = 0; for (; (i2<=255); i2 += 2) { fieldData_RHS_GMRF_8_p1[(i2+66048)] = 0.000000e+00; fieldData_RHS_GMRF_8_p1[(i2+66049)] = 0.000000e+00; } for (; (i2<=256); i2 += 1) { fieldData_RHS_GMRF_8_p1[(i2+66048)] = 0.000000e+00; } } } } } } } }
void nb_kernel430_ia32_sse2(int * p_nri, int * iinr, int * jindex, int * jjnr, int * shift, double * shiftvec, double * fshift, int * gid, double * pos, double * faction, double * charge, double * p_facel, double * p_krf, double * p_crf, double * vc, int * type, int * p_ntype, double * vdwparam, double * vvdw, double * p_tabscale, double * VFtab, double * invsqrta, double * dvda, double * p_gbtabscale, double * GBtab, int * p_nthreads, int * count, void * mtx, int * outeriter, int * inneriter, double * work) { int nri,ntype,nthreads; int n,ii,is3,ii3,k,nj0,nj1,ggid; double shX,shY,shZ; int offset,nti; int jnrA,jnrB; int j3A,j3B; int tjA,tjB; gmx_gbdata_t *gbdata; double * gpol; __m128d iq,qq,jq,isai; __m128d ix,iy,iz; __m128d jx,jy,jz; __m128d dx,dy,dz; __m128d vctot,vvdwtot,vgbtot,dvdasum,gbfactor; __m128d fix,fiy,fiz,tx,ty,tz,rsq; __m128d rinv,isaj,isaprod; __m128d vcoul,fscal,gbscale,c6,c12; __m128d rinvsq,r,rtab; __m128d eps,Y,F,G,H; __m128d VV,FF,Fp; __m128d vgb,fijGB,dvdatmp; __m128d rinvsix,vvdw6,vvdw12,vvdwtmp; __m128d facel,gbtabscale,dvdaj; __m128d fijD,fijR; __m128d xmm1,tabscale,eps2; __m128i n0, nnn; const __m128d neg = _mm_set1_pd(-1.0); const __m128d zero = _mm_set1_pd(0.0); const __m128d minushalf = _mm_set1_pd(-0.5); const __m128d two = _mm_set1_pd(2.0); gbdata = (gmx_gbdata_t *)work; gpol = gbdata->gpol; nri = *p_nri; ntype = *p_ntype; gbfactor = _mm_set1_pd( - ((1.0/gbdata->epsilon_r) - (1.0/gbdata->gb_epsilon_solvent))); gbtabscale = _mm_load1_pd(p_gbtabscale); facel = _mm_load1_pd(p_facel); tabscale = _mm_load1_pd(p_tabscale); nj1 = 0; jnrA = jnrB = 0; j3A = j3B = 0; jx = _mm_setzero_pd(); jy = _mm_setzero_pd(); jz = _mm_setzero_pd(); c6 = _mm_setzero_pd(); c12 = _mm_setzero_pd(); for(n=0;n<nri;n++) { is3 = 3*shift[n]; shX = shiftvec[is3]; shY = shiftvec[is3+1]; shZ = shiftvec[is3+2]; nj0 = jindex[n]; nj1 = jindex[n+1]; ii = iinr[n]; ii3 = 3*ii; ix = _mm_set1_pd(shX+pos[ii3+0]); iy = _mm_set1_pd(shY+pos[ii3+1]); iz = _mm_set1_pd(shZ+pos[ii3+2]); iq = _mm_load1_pd(charge+ii); iq = _mm_mul_pd(iq,facel); isai = _mm_load1_pd(invsqrta+ii); nti = 2*ntype*type[ii]; vctot = _mm_setzero_pd(); vvdwtot = _mm_setzero_pd(); vgbtot = _mm_setzero_pd(); dvdasum = _mm_setzero_pd(); fix = _mm_setzero_pd(); fiy = _mm_setzero_pd(); fiz = _mm_setzero_pd(); for(k=nj0;k<nj1-1; k+=2) { jnrA = jjnr[k]; jnrB = jjnr[k+1]; j3A = jnrA * 3; j3B = jnrB * 3; GMX_MM_LOAD_1RVEC_2POINTERS_PD(pos+j3A,pos+j3B,jx,jy,jz); dx = _mm_sub_pd(ix,jx); dy = _mm_sub_pd(iy,jy); dz = _mm_sub_pd(iz,jz); rsq = gmx_mm_calc_rsq_pd(dx,dy,dz); rinv = gmx_mm_invsqrt_pd(rsq); rinvsq = _mm_mul_pd(rinv,rinv); /***********************************/ /* INTERACTION SECTION STARTS HERE */ /***********************************/ GMX_MM_LOAD_2VALUES_PD(charge+jnrA,charge+jnrB,jq); GMX_MM_LOAD_2VALUES_PD(invsqrta+jnrA,invsqrta+jnrB,isaj); /* Lennard-Jones */ tjA = nti+2*type[jnrA]; tjB = nti+2*type[jnrB]; GMX_MM_LOAD_2PAIRS_PD(vdwparam+tjA,vdwparam+tjB,c6,c12); isaprod = _mm_mul_pd(isai,isaj); qq = _mm_mul_pd(iq,jq); vcoul = _mm_mul_pd(qq,rinv); fscal = _mm_mul_pd(vcoul,rinv); vctot = _mm_add_pd(vctot,vcoul); /* Polarization interaction */ qq = _mm_mul_pd(qq,_mm_mul_pd(isaprod,gbfactor)); gbscale = _mm_mul_pd(isaprod,gbtabscale); /* Calculate GB table index */ r = _mm_mul_pd(rsq,rinv); rtab = _mm_mul_pd(r,gbscale); n0 = _mm_cvttpd_epi32(rtab); eps = _mm_sub_pd(rtab,_mm_cvtepi32_pd(n0)); nnn = _mm_slli_epi32(n0,2); /* the tables are 16-byte aligned, so we can use _mm_load_pd */ Y = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))); F = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,1))); GMX_MM_TRANSPOSE2_PD(Y,F); G = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))+2); H = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,1))+2); GMX_MM_TRANSPOSE2_PD(G,H); G = _mm_mul_pd(G,eps); H = _mm_mul_pd(H, _mm_mul_pd(eps,eps) ); F = _mm_add_pd(F, _mm_add_pd( G , H ) ); Y = _mm_add_pd(Y, _mm_mul_pd(F, eps)); F = _mm_add_pd(F, _mm_add_pd(G , _mm_mul_pd(H,two))); vgb = _mm_mul_pd(Y, qq); fijGB = _mm_mul_pd(F, _mm_mul_pd(qq,gbscale)); dvdatmp = _mm_mul_pd(_mm_add_pd(vgb, _mm_mul_pd(fijGB,r)) , minushalf); vgbtot = _mm_add_pd(vgbtot, vgb); dvdasum = _mm_add_pd(dvdasum, dvdatmp); dvdatmp = _mm_mul_pd(dvdatmp, _mm_mul_pd(isaj,isaj)); GMX_MM_INCREMENT_2VALUES_PD(dvda+jnrA,dvda+jnrB,dvdatmp); /* Calculate VDW table index */ rtab = _mm_mul_pd(r,tabscale); n0 = _mm_cvttpd_epi32(rtab); eps = _mm_sub_pd(rtab,_mm_cvtepi32_pd(n0)); eps2 = _mm_mul_pd(eps,eps); nnn = _mm_slli_epi32(n0,3); /* Dispersion */ Y = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))); F = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,1))); GMX_MM_TRANSPOSE2_PD(Y,F); G = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))+2); H = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,1))+2); GMX_MM_TRANSPOSE2_PD(G,H); G = _mm_mul_pd(G,eps); H = _mm_mul_pd(H,eps2); Fp = _mm_add_pd(F,G); Fp = _mm_add_pd(Fp,H); VV = _mm_mul_pd(Fp,eps); VV = _mm_add_pd(Y,VV); xmm1 = _mm_mul_pd(two,H); FF = _mm_add_pd(Fp,G); FF = _mm_add_pd(FF,xmm1); vvdw6 = _mm_mul_pd(c6,VV); fijD = _mm_mul_pd(c6,FF); /* Dispersion */ Y = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))+4); F = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,1))+4); GMX_MM_TRANSPOSE2_PD(Y,F); G = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))+6); H = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,1))+6); GMX_MM_TRANSPOSE2_PD(G,H); G = _mm_mul_pd(G,eps); H = _mm_mul_pd(H,eps2); Fp = _mm_add_pd(F,G); Fp = _mm_add_pd(Fp,H); VV = _mm_mul_pd(Fp,eps); VV = _mm_add_pd(Y,VV); xmm1 = _mm_mul_pd(two,H); FF = _mm_add_pd(Fp,G); FF = _mm_add_pd(FF,xmm1); vvdw12 = _mm_mul_pd(c12,VV); fijR = _mm_mul_pd(c12,FF); vvdwtmp = _mm_add_pd(vvdw12,vvdw6); vvdwtot = _mm_add_pd(vvdwtot,vvdwtmp); xmm1 = _mm_add_pd(fijD,fijR); xmm1 = _mm_mul_pd(xmm1,tabscale); xmm1 = _mm_add_pd(xmm1,fijGB); xmm1 = _mm_sub_pd(xmm1,fscal); fscal = _mm_mul_pd(xmm1,neg); fscal = _mm_mul_pd(fscal,rinv); /***********************************/ /* INTERACTION SECTION ENDS HERE */ /***********************************/ /* Calculate temporary vectorial force */ tx = _mm_mul_pd(fscal,dx); ty = _mm_mul_pd(fscal,dy); tz = _mm_mul_pd(fscal,dz); /* Increment i atom force */ fix = _mm_add_pd(fix,tx); fiy = _mm_add_pd(fiy,ty); fiz = _mm_add_pd(fiz,tz); /* Store j forces back */ GMX_MM_DECREMENT_1RVEC_2POINTERS_PD(faction+j3A,faction+j3B,tx,ty,tz); } /* In double precision, offset can only be either 0 or 1 */ if(k<nj1) { jnrA = jjnr[k]; j3A = jnrA * 3; GMX_MM_LOAD_1RVEC_1POINTER_PD(pos+j3A,jx,jy,jz); dx = _mm_sub_sd(ix,jx); dy = _mm_sub_sd(iy,jy); dz = _mm_sub_sd(iz,jz); rsq = gmx_mm_calc_rsq_pd(dx,dy,dz); rinv = gmx_mm_invsqrt_pd(rsq); rinvsq = _mm_mul_sd(rinv,rinv); /* These reason for zeroing these variables here is for fixing bug 585 * What happens is that __m128d _mm_add_sd(a,b) gives back r0=a[0]+b[0], * and r1=0, but it should be r1=a[1]. * This might be a compiler issue (tested with gcc-4.1.3 and -O3). * To work around it, we zero these variables and use _mm_add_pd (**) instead * Note that the only variables that get affected are the energies since * the total sum needs to be correct */ vgb = _mm_setzero_pd(); vcoul = _mm_setzero_pd(); dvdatmp = _mm_setzero_pd(); vvdw6 = _mm_setzero_pd(); vvdw12 = _mm_setzero_pd(); /***********************************/ /* INTERACTION SECTION STARTS HERE */ /***********************************/ GMX_MM_LOAD_1VALUE_PD(charge+jnrA,jq); GMX_MM_LOAD_1VALUE_PD(invsqrta+jnrA,isaj); /* Lennard-Jones */ tjA = nti+2*type[jnrA]; GMX_MM_LOAD_1PAIR_PD(vdwparam+tjA,c6,c12); isaprod = _mm_mul_sd(isai,isaj); qq = _mm_mul_sd(jq,iq); vcoul = _mm_mul_sd(qq,rinv); fscal = _mm_mul_sd(vcoul,rinv); vctot = _mm_add_pd(vctot,vcoul); /* (**) */ /* Polarization interaction */ qq = _mm_mul_sd(qq,_mm_mul_sd(isaprod,gbfactor)); gbscale = _mm_mul_sd(isaprod,gbtabscale); /* Calculate GB table index */ r = _mm_mul_sd(rsq,rinv); rtab = _mm_mul_sd(r,gbscale); n0 = _mm_cvttpd_epi32(rtab); eps = _mm_sub_sd(rtab,_mm_cvtepi32_pd(n0)); nnn = _mm_slli_epi32(n0,2); /* the tables are 16-byte aligned, so we can use _mm_load_pd */ Y = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))); F = _mm_setzero_pd(); GMX_MM_TRANSPOSE2_PD(Y,F); G = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))+2); H = _mm_setzero_pd(); GMX_MM_TRANSPOSE2_PD(G,H); G = _mm_mul_sd(G,eps); H = _mm_mul_sd(H, _mm_mul_sd(eps,eps) ); F = _mm_add_sd(F, _mm_add_sd( G , H ) ); Y = _mm_add_sd(Y, _mm_mul_sd(F, eps)); F = _mm_add_sd(F, _mm_add_sd(G , _mm_mul_sd(H,two))); vgb = _mm_mul_sd(Y, qq); fijGB = _mm_mul_sd(F, _mm_mul_sd(qq,gbscale)); dvdatmp = _mm_mul_sd(_mm_add_sd(vgb, _mm_mul_sd(fijGB,r)) , minushalf); vgbtot = _mm_add_pd(vgbtot, vgb); /* (**) */ dvdasum = _mm_add_pd(dvdasum, dvdatmp); /* (**) */ dvdatmp = _mm_mul_sd(dvdatmp, _mm_mul_sd(isaj,isaj)); GMX_MM_INCREMENT_1VALUE_PD(dvda+jnrA,dvdatmp); /* Calculate VDW table index */ rtab = _mm_mul_sd(r,tabscale); n0 = _mm_cvttpd_epi32(rtab); eps = _mm_sub_sd(rtab,_mm_cvtepi32_pd(n0)); eps2 = _mm_mul_sd(eps,eps); nnn = _mm_slli_epi32(n0,3); /* Dispersion */ Y = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))); F = _mm_setzero_pd(); GMX_MM_TRANSPOSE2_PD(Y,F); G = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))+2); H = _mm_setzero_pd(); GMX_MM_TRANSPOSE2_PD(G,H); G = _mm_mul_sd(G,eps); H = _mm_mul_sd(H,eps2); Fp = _mm_add_sd(F,G); Fp = _mm_add_sd(Fp,H); VV = _mm_mul_sd(Fp,eps); VV = _mm_add_sd(Y,VV); xmm1 = _mm_mul_sd(two,H); FF = _mm_add_sd(Fp,G); FF = _mm_add_sd(FF,xmm1); vvdw6 = _mm_mul_sd(c6,VV); fijD = _mm_mul_sd(c6,FF); /* Dispersion */ Y = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))+4); F = _mm_setzero_pd(); GMX_MM_TRANSPOSE2_PD(Y,F); G = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))+6); H = _mm_setzero_pd(); GMX_MM_TRANSPOSE2_PD(G,H); G = _mm_mul_sd(G,eps); H = _mm_mul_sd(H,eps2); Fp = _mm_add_sd(F,G); Fp = _mm_add_sd(Fp,H); VV = _mm_mul_sd(Fp,eps); VV = _mm_add_sd(Y,VV); xmm1 = _mm_mul_sd(two,H); FF = _mm_add_sd(Fp,G); FF = _mm_add_sd(FF,xmm1); vvdw12 = _mm_mul_sd(c12,VV); fijR = _mm_mul_sd(c12,FF); vvdwtmp = _mm_add_sd(vvdw12,vvdw6); vvdwtot = _mm_add_pd(vvdwtot,vvdwtmp); /* (**) */ xmm1 = _mm_add_sd(fijD,fijR); xmm1 = _mm_mul_sd(xmm1,tabscale); xmm1 = _mm_add_sd(xmm1,fijGB); xmm1 = _mm_sub_sd(xmm1,fscal); fscal = _mm_mul_sd(xmm1,neg); fscal = _mm_mul_sd(fscal,rinv); /***********************************/ /* INTERACTION SECTION ENDS HERE */ /***********************************/ /* Calculate temporary vectorial force */ tx = _mm_mul_sd(fscal,dx); ty = _mm_mul_sd(fscal,dy); tz = _mm_mul_sd(fscal,dz); /* Increment i atom force */ fix = _mm_add_sd(fix,tx); fiy = _mm_add_sd(fiy,ty); fiz = _mm_add_sd(fiz,tz); /* Store j forces back */ GMX_MM_DECREMENT_1RVEC_1POINTER_PD(faction+j3A,tx,ty,tz); } dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai,isai)); gmx_mm_update_iforce_1atom_pd(&fix,&fiy,&fiz,faction+ii3,fshift+is3); ggid = gid[n]; gmx_mm_update_1pot_pd(vctot,vc+ggid); gmx_mm_update_1pot_pd(vgbtot,gpol+ggid); gmx_mm_update_1pot_pd(dvdasum,dvda+ii); gmx_mm_update_1pot_pd(vvdwtot,vvdw+ggid); } *outeriter = nri; *inneriter = nj1; }
__m128d test_load1_pd(__m128 x, void* y) { // CHECK: define {{.*}} @test_load1_pd // CHECK: load double* {{.*}}, align 1{{$}} return _mm_load1_pd(y); }
int calc_gb_rad_hct_obc_sse2_double(t_commrec *cr, t_forcerec * fr, int natoms, gmx_localtop_t *top, const t_atomtypes *atype, double *x, t_nblist *nl, gmx_genborn_t *born,t_mdatoms *md,int gb_algorithm) { int i,ai,k,n,ii,ii3,is3,nj0,nj1,at0,at1,offset; int jnrA,jnrB; int j3A,j3B; double shX,shY,shZ; double rr,rr_inv,rr_inv2,sum_tmp,sum,sum2,sum3,gbr; double sum_ai2, sum_ai3,tsum,tchain,doffset; double *obc_param; double *gb_radius; double *work; int * jjnr; double *dadx; double *shiftvec; double min_rad,rad; __m128d ix,iy,iz,jx,jy,jz; __m128d dx,dy,dz,t1,t2,t3,t4; __m128d rsq,rinv,r; __m128d rai,rai_inv,raj, raj_inv,rai_inv2,sk,sk2,lij,dlij,duij; __m128d uij,lij2,uij2,lij3,uij3,diff2; __m128d lij_inv,sk2_inv,prod,log_term,tmp,tmp_sum; __m128d sum_ai, tmp_ai,sk_ai,sk_aj,sk2_ai,sk2_aj,sk2_rinv; __m128d dadx1,dadx2; __m128d logterm; __m128d mask; __m128d obc_mask1,obc_mask2,obc_mask3; __m128d oneeighth = _mm_set1_pd(0.125); __m128d onefourth = _mm_set1_pd(0.25); const __m128d half = _mm_set1_pd(0.5); const __m128d three = _mm_set1_pd(3.0); const __m128d one = _mm_set1_pd(1.0); const __m128d two = _mm_set1_pd(2.0); const __m128d zero = _mm_set1_pd(0.0); const __m128d neg = _mm_set1_pd(-1.0); /* Set the dielectric offset */ doffset = born->gb_doffset; gb_radius = born->gb_radius; obc_param = born->param; work = born->gpol_hct_work; jjnr = nl->jjnr; dadx = fr->dadx; shiftvec = fr->shift_vec[0]; jx = _mm_setzero_pd(); jy = _mm_setzero_pd(); jz = _mm_setzero_pd(); jnrA = jnrB = 0; for(i=0;i<born->nr;i++) { work[i] = 0; } for(i=0;i<nl->nri;i++) { ii = nl->iinr[i]; ii3 = ii*3; is3 = 3*nl->shift[i]; shX = shiftvec[is3]; shY = shiftvec[is3+1]; shZ = shiftvec[is3+2]; nj0 = nl->jindex[i]; nj1 = nl->jindex[i+1]; ix = _mm_set1_pd(shX+x[ii3+0]); iy = _mm_set1_pd(shY+x[ii3+1]); iz = _mm_set1_pd(shZ+x[ii3+2]); rai = _mm_load1_pd(gb_radius+ii); rai_inv= gmx_mm_inv_pd(rai); sum_ai = _mm_setzero_pd(); sk_ai = _mm_load1_pd(born->param+ii); sk2_ai = _mm_mul_pd(sk_ai,sk_ai); for(k=nj0;k<nj1-1;k+=2) { jnrA = jjnr[k]; jnrB = jjnr[k+1]; j3A = 3*jnrA; j3B = 3*jnrB; GMX_MM_LOAD_1RVEC_2POINTERS_PD(x+j3A,x+j3B,jx,jy,jz); GMX_MM_LOAD_2VALUES_PD(gb_radius+jnrA,gb_radius+jnrB,raj); GMX_MM_LOAD_2VALUES_PD(obc_param+jnrA,obc_param+jnrB,sk_aj); dx = _mm_sub_pd(ix, jx); dy = _mm_sub_pd(iy, jy); dz = _mm_sub_pd(iz, jz); rsq = gmx_mm_calc_rsq_pd(dx,dy,dz); rinv = gmx_mm_invsqrt_pd(rsq); r = _mm_mul_pd(rsq,rinv); /* Compute raj_inv aj1-4 */ raj_inv = gmx_mm_inv_pd(raj); /* Evaluate influence of atom aj -> ai */ t1 = _mm_add_pd(r,sk_aj); t2 = _mm_sub_pd(r,sk_aj); t3 = _mm_sub_pd(sk_aj,r); obc_mask1 = _mm_cmplt_pd(rai, t1); obc_mask2 = _mm_cmplt_pd(rai, t2); obc_mask3 = _mm_cmplt_pd(rai, t3); uij = gmx_mm_inv_pd(t1); lij = _mm_or_pd( _mm_and_pd(obc_mask2,gmx_mm_inv_pd(t2)), _mm_andnot_pd(obc_mask2,rai_inv)); dlij = _mm_and_pd(one,obc_mask2); uij2 = _mm_mul_pd(uij, uij); uij3 = _mm_mul_pd(uij2,uij); lij2 = _mm_mul_pd(lij, lij); lij3 = _mm_mul_pd(lij2,lij); diff2 = _mm_sub_pd(uij2,lij2); lij_inv = gmx_mm_invsqrt_pd(lij2); sk2_aj = _mm_mul_pd(sk_aj,sk_aj); sk2_rinv = _mm_mul_pd(sk2_aj,rinv); prod = _mm_mul_pd(onefourth,sk2_rinv); logterm = gmx_mm_log_pd(_mm_mul_pd(uij,lij_inv)); t1 = _mm_sub_pd(lij,uij); t2 = _mm_mul_pd(diff2, _mm_sub_pd(_mm_mul_pd(onefourth,r), prod)); t3 = _mm_mul_pd(half,_mm_mul_pd(rinv,logterm)); t1 = _mm_add_pd(t1,_mm_add_pd(t2,t3)); t4 = _mm_mul_pd(two,_mm_sub_pd(rai_inv,lij)); t4 = _mm_and_pd(t4,obc_mask3); t1 = _mm_mul_pd(half,_mm_add_pd(t1,t4)); sum_ai = _mm_add_pd(sum_ai, _mm_and_pd(t1,obc_mask1) ); t1 = _mm_add_pd(_mm_mul_pd(half,lij2), _mm_mul_pd(prod,lij3)); t1 = _mm_sub_pd(t1, _mm_mul_pd(onefourth, _mm_add_pd(_mm_mul_pd(lij,rinv), _mm_mul_pd(lij3,r)))); t2 = _mm_mul_pd(onefourth, _mm_add_pd(_mm_mul_pd(uij,rinv), _mm_mul_pd(uij3,r))); t2 = _mm_sub_pd(t2, _mm_add_pd(_mm_mul_pd(half,uij2), _mm_mul_pd(prod,uij3))); t3 = _mm_mul_pd(_mm_mul_pd(onefourth,logterm), _mm_mul_pd(rinv,rinv)); t3 = _mm_sub_pd(t3, _mm_mul_pd(_mm_mul_pd(diff2,oneeighth), _mm_add_pd(one, _mm_mul_pd(sk2_rinv,rinv)))); t1 = _mm_mul_pd(rinv, _mm_add_pd(_mm_mul_pd(dlij,t1), _mm_add_pd(t2,t3))); dadx1 = _mm_and_pd(t1,obc_mask1); /* Evaluate influence of atom ai -> aj */ t1 = _mm_add_pd(r,sk_ai); t2 = _mm_sub_pd(r,sk_ai); t3 = _mm_sub_pd(sk_ai,r); obc_mask1 = _mm_cmplt_pd(raj, t1); obc_mask2 = _mm_cmplt_pd(raj, t2); obc_mask3 = _mm_cmplt_pd(raj, t3); uij = gmx_mm_inv_pd(t1); lij = _mm_or_pd( _mm_and_pd(obc_mask2,gmx_mm_inv_pd(t2)), _mm_andnot_pd(obc_mask2,raj_inv)); dlij = _mm_and_pd(one,obc_mask2); uij2 = _mm_mul_pd(uij, uij); uij3 = _mm_mul_pd(uij2,uij); lij2 = _mm_mul_pd(lij, lij); lij3 = _mm_mul_pd(lij2,lij); diff2 = _mm_sub_pd(uij2,lij2); lij_inv = gmx_mm_invsqrt_pd(lij2); sk2_rinv = _mm_mul_pd(sk2_ai,rinv); prod = _mm_mul_pd(onefourth,sk2_rinv); logterm = gmx_mm_log_pd(_mm_mul_pd(uij,lij_inv)); t1 = _mm_sub_pd(lij,uij); t2 = _mm_mul_pd(diff2, _mm_sub_pd(_mm_mul_pd(onefourth,r), prod)); t3 = _mm_mul_pd(half,_mm_mul_pd(rinv,logterm)); t1 = _mm_add_pd(t1,_mm_add_pd(t2,t3)); t4 = _mm_mul_pd(two,_mm_sub_pd(raj_inv,lij)); t4 = _mm_and_pd(t4,obc_mask3); t1 = _mm_mul_pd(half,_mm_add_pd(t1,t4)); GMX_MM_INCREMENT_2VALUES_PD(work+jnrA,work+jnrB,_mm_and_pd(t1,obc_mask1)); t1 = _mm_add_pd(_mm_mul_pd(half,lij2), _mm_mul_pd(prod,lij3)); t1 = _mm_sub_pd(t1, _mm_mul_pd(onefourth, _mm_add_pd(_mm_mul_pd(lij,rinv), _mm_mul_pd(lij3,r)))); t2 = _mm_mul_pd(onefourth, _mm_add_pd(_mm_mul_pd(uij,rinv), _mm_mul_pd(uij3,r))); t2 = _mm_sub_pd(t2, _mm_add_pd(_mm_mul_pd(half,uij2), _mm_mul_pd(prod,uij3))); t3 = _mm_mul_pd(_mm_mul_pd(onefourth,logterm), _mm_mul_pd(rinv,rinv)); t3 = _mm_sub_pd(t3, _mm_mul_pd(_mm_mul_pd(diff2,oneeighth), _mm_add_pd(one, _mm_mul_pd(sk2_rinv,rinv)))); t1 = _mm_mul_pd(rinv, _mm_add_pd(_mm_mul_pd(dlij,t1), _mm_add_pd(t2,t3))); dadx2 = _mm_and_pd(t1,obc_mask1); _mm_store_pd(dadx,dadx1); dadx += 2; _mm_store_pd(dadx,dadx2); dadx += 2; } /* end normal inner loop */ if(k<nj1) { jnrA = jjnr[k]; j3A = 3*jnrA; GMX_MM_LOAD_1RVEC_1POINTER_PD(x+j3A,jx,jy,jz); GMX_MM_LOAD_1VALUE_PD(gb_radius+jnrA,raj); GMX_MM_LOAD_1VALUE_PD(obc_param+jnrA,sk_aj); dx = _mm_sub_sd(ix, jx); dy = _mm_sub_sd(iy, jy); dz = _mm_sub_sd(iz, jz); rsq = gmx_mm_calc_rsq_pd(dx,dy,dz); rinv = gmx_mm_invsqrt_pd(rsq); r = _mm_mul_sd(rsq,rinv); /* Compute raj_inv aj1-4 */ raj_inv = gmx_mm_inv_pd(raj); /* Evaluate influence of atom aj -> ai */ t1 = _mm_add_sd(r,sk_aj); t2 = _mm_sub_sd(r,sk_aj); t3 = _mm_sub_sd(sk_aj,r); obc_mask1 = _mm_cmplt_sd(rai, t1); obc_mask2 = _mm_cmplt_sd(rai, t2); obc_mask3 = _mm_cmplt_sd(rai, t3); uij = gmx_mm_inv_pd(t1); lij = _mm_or_pd(_mm_and_pd(obc_mask2,gmx_mm_inv_pd(t2)), _mm_andnot_pd(obc_mask2,rai_inv)); dlij = _mm_and_pd(one,obc_mask2); uij2 = _mm_mul_sd(uij, uij); uij3 = _mm_mul_sd(uij2,uij); lij2 = _mm_mul_sd(lij, lij); lij3 = _mm_mul_sd(lij2,lij); diff2 = _mm_sub_sd(uij2,lij2); lij_inv = gmx_mm_invsqrt_pd(lij2); sk2_aj = _mm_mul_sd(sk_aj,sk_aj); sk2_rinv = _mm_mul_sd(sk2_aj,rinv); prod = _mm_mul_sd(onefourth,sk2_rinv); logterm = gmx_mm_log_pd(_mm_mul_sd(uij,lij_inv)); t1 = _mm_sub_sd(lij,uij); t2 = _mm_mul_sd(diff2, _mm_sub_sd(_mm_mul_pd(onefourth,r), prod)); t3 = _mm_mul_sd(half,_mm_mul_sd(rinv,logterm)); t1 = _mm_add_sd(t1,_mm_add_sd(t2,t3)); t4 = _mm_mul_sd(two,_mm_sub_sd(rai_inv,lij)); t4 = _mm_and_pd(t4,obc_mask3); t1 = _mm_mul_sd(half,_mm_add_sd(t1,t4)); sum_ai = _mm_add_sd(sum_ai, _mm_and_pd(t1,obc_mask1) ); t1 = _mm_add_sd(_mm_mul_sd(half,lij2), _mm_mul_sd(prod,lij3)); t1 = _mm_sub_sd(t1, _mm_mul_sd(onefourth, _mm_add_sd(_mm_mul_sd(lij,rinv), _mm_mul_sd(lij3,r)))); t2 = _mm_mul_sd(onefourth, _mm_add_sd(_mm_mul_sd(uij,rinv), _mm_mul_sd(uij3,r))); t2 = _mm_sub_sd(t2, _mm_add_sd(_mm_mul_sd(half,uij2), _mm_mul_sd(prod,uij3))); t3 = _mm_mul_sd(_mm_mul_sd(onefourth,logterm), _mm_mul_sd(rinv,rinv)); t3 = _mm_sub_sd(t3, _mm_mul_sd(_mm_mul_sd(diff2,oneeighth), _mm_add_sd(one, _mm_mul_sd(sk2_rinv,rinv)))); t1 = _mm_mul_sd(rinv, _mm_add_sd(_mm_mul_sd(dlij,t1), _mm_add_pd(t2,t3))); dadx1 = _mm_and_pd(t1,obc_mask1); /* Evaluate influence of atom ai -> aj */ t1 = _mm_add_sd(r,sk_ai); t2 = _mm_sub_sd(r,sk_ai); t3 = _mm_sub_sd(sk_ai,r); obc_mask1 = _mm_cmplt_sd(raj, t1); obc_mask2 = _mm_cmplt_sd(raj, t2); obc_mask3 = _mm_cmplt_sd(raj, t3); uij = gmx_mm_inv_pd(t1); lij = _mm_or_pd( _mm_and_pd(obc_mask2,gmx_mm_inv_pd(t2)), _mm_andnot_pd(obc_mask2,raj_inv)); dlij = _mm_and_pd(one,obc_mask2); uij2 = _mm_mul_sd(uij, uij); uij3 = _mm_mul_sd(uij2,uij); lij2 = _mm_mul_sd(lij, lij); lij3 = _mm_mul_sd(lij2,lij); diff2 = _mm_sub_sd(uij2,lij2); lij_inv = gmx_mm_invsqrt_pd(lij2); sk2_rinv = _mm_mul_sd(sk2_ai,rinv); prod = _mm_mul_sd(onefourth,sk2_rinv); logterm = gmx_mm_log_pd(_mm_mul_sd(uij,lij_inv)); t1 = _mm_sub_sd(lij,uij); t2 = _mm_mul_sd(diff2, _mm_sub_sd(_mm_mul_sd(onefourth,r), prod)); t3 = _mm_mul_sd(half,_mm_mul_sd(rinv,logterm)); t1 = _mm_add_sd(t1,_mm_add_sd(t2,t3)); t4 = _mm_mul_sd(two,_mm_sub_sd(raj_inv,lij)); t4 = _mm_and_pd(t4,obc_mask3); t1 = _mm_mul_sd(half,_mm_add_sd(t1,t4)); GMX_MM_INCREMENT_1VALUE_PD(work+jnrA,_mm_and_pd(t1,obc_mask1)); t1 = _mm_add_sd(_mm_mul_sd(half,lij2), _mm_mul_sd(prod,lij3)); t1 = _mm_sub_sd(t1, _mm_mul_sd(onefourth, _mm_add_sd(_mm_mul_sd(lij,rinv), _mm_mul_sd(lij3,r)))); t2 = _mm_mul_sd(onefourth, _mm_add_sd(_mm_mul_sd(uij,rinv), _mm_mul_sd(uij3,r))); t2 = _mm_sub_sd(t2, _mm_add_sd(_mm_mul_sd(half,uij2), _mm_mul_sd(prod,uij3))); t3 = _mm_mul_sd(_mm_mul_sd(onefourth,logterm), _mm_mul_sd(rinv,rinv)); t3 = _mm_sub_sd(t3, _mm_mul_sd(_mm_mul_sd(diff2,oneeighth), _mm_add_sd(one, _mm_mul_sd(sk2_rinv,rinv)))); t1 = _mm_mul_sd(rinv, _mm_add_sd(_mm_mul_sd(dlij,t1), _mm_add_sd(t2,t3))); dadx2 = _mm_and_pd(t1,obc_mask1); _mm_store_pd(dadx,dadx1); dadx += 2; _mm_store_pd(dadx,dadx2); dadx += 2; } gmx_mm_update_1pot_pd(sum_ai,work+ii); } /* Parallel summations */ if(PARTDECOMP(cr)) { gmx_sum(natoms, work, cr); } else if(DOMAINDECOMP(cr)) { dd_atom_sum_real(cr->dd, work); } if(gb_algorithm==egbHCT) { /* HCT */ for(i=0;i<fr->natoms_force;i++) /* PELA born->nr */ { if(born->use[i] != 0) { rr = top->atomtypes.gb_radius[md->typeA[i]]-doffset; sum = 1.0/rr - work[i]; min_rad = rr + doffset; rad = 1.0/sum; born->bRad[i] = rad > min_rad ? rad : min_rad; fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]); } } /* Extra communication required for DD */ if(DOMAINDECOMP(cr)) { dd_atom_spread_real(cr->dd, born->bRad); dd_atom_spread_real(cr->dd, fr->invsqrta); } } else { /* OBC */ for(i=0;i<fr->natoms_force;i++) /* PELA born->nr */ { if(born->use[i] != 0) { rr = top->atomtypes.gb_radius[md->typeA[i]]; rr_inv2 = 1.0/rr; rr = rr-doffset; rr_inv = 1.0/rr; sum = rr * work[i]; sum2 = sum * sum; sum3 = sum2 * sum; tsum = tanh(born->obc_alpha*sum-born->obc_beta*sum2+born->obc_gamma*sum3); born->bRad[i] = rr_inv - tsum*rr_inv2; born->bRad[i] = 1.0 / born->bRad[i]; fr->invsqrta[i]=gmx_invsqrt(born->bRad[i]); tchain = rr * (born->obc_alpha-2*born->obc_beta*sum+3*born->obc_gamma*sum2); born->drobc[i] = (1.0-tsum*tsum)*tchain*rr_inv2; } } /* Extra (local) communication required for DD */ if(DOMAINDECOMP(cr)) { dd_atom_spread_real(cr->dd, born->bRad); dd_atom_spread_real(cr->dd, fr->invsqrta); dd_atom_spread_real(cr->dd, born->drobc); } } return 0; }
inline double lanczos13m53::lanczos_sum<double>(const double& x) { static const ALIGN16 double coeff[26] = { static_cast<double>(2.506628274631000270164908177133837338626L), static_cast<double>(1u), static_cast<double>(210.8242777515793458725097339207133627117L), static_cast<double>(66u), static_cast<double>(8071.672002365816210638002902272250613822L), static_cast<double>(1925u), static_cast<double>(186056.2653952234950402949897160456992822L), static_cast<double>(32670u), static_cast<double>(2876370.628935372441225409051620849613599L), static_cast<double>(357423u), static_cast<double>(31426415.58540019438061423162831820536287L), static_cast<double>(2637558u), static_cast<double>(248874557.8620541565114603864132294232163L), static_cast<double>(13339535u), static_cast<double>(1439720407.311721673663223072794912393972L), static_cast<double>(45995730u), static_cast<double>(6039542586.35202800506429164430729792107L), static_cast<double>(105258076u), static_cast<double>(17921034426.03720969991975575445893111267L), static_cast<double>(150917976u), static_cast<double>(35711959237.35566804944018545154716670596L), static_cast<double>(120543840u), static_cast<double>(42919803642.64909876895789904700198885093L), static_cast<double>(39916800u), static_cast<double>(23531376880.41075968857200767445163675473L), static_cast<double>(0u) }; register __m128d vx = _mm_load1_pd(&x); register __m128d sum_even = _mm_load_pd(coeff); register __m128d sum_odd = _mm_load_pd(coeff+2); register __m128d nc_odd, nc_even; register __m128d vx2 = _mm_mul_pd(vx, vx); sum_even = _mm_mul_pd(sum_even, vx2); nc_even = _mm_load_pd(coeff + 4); sum_odd = _mm_mul_pd(sum_odd, vx2); nc_odd = _mm_load_pd(coeff + 6); sum_even = _mm_add_pd(sum_even, nc_even); sum_odd = _mm_add_pd(sum_odd, nc_odd); sum_even = _mm_mul_pd(sum_even, vx2); nc_even = _mm_load_pd(coeff + 8); sum_odd = _mm_mul_pd(sum_odd, vx2); nc_odd = _mm_load_pd(coeff + 10); sum_even = _mm_add_pd(sum_even, nc_even); sum_odd = _mm_add_pd(sum_odd, nc_odd); sum_even = _mm_mul_pd(sum_even, vx2); nc_even = _mm_load_pd(coeff + 12); sum_odd = _mm_mul_pd(sum_odd, vx2); nc_odd = _mm_load_pd(coeff + 14); sum_even = _mm_add_pd(sum_even, nc_even); sum_odd = _mm_add_pd(sum_odd, nc_odd); sum_even = _mm_mul_pd(sum_even, vx2); nc_even = _mm_load_pd(coeff + 16); sum_odd = _mm_mul_pd(sum_odd, vx2); nc_odd = _mm_load_pd(coeff + 18); sum_even = _mm_add_pd(sum_even, nc_even); sum_odd = _mm_add_pd(sum_odd, nc_odd); sum_even = _mm_mul_pd(sum_even, vx2); nc_even = _mm_load_pd(coeff + 20); sum_odd = _mm_mul_pd(sum_odd, vx2); nc_odd = _mm_load_pd(coeff + 22); sum_even = _mm_add_pd(sum_even, nc_even); sum_odd = _mm_add_pd(sum_odd, nc_odd); sum_even = _mm_mul_pd(sum_even, vx2); nc_even = _mm_load_pd(coeff + 24); sum_odd = _mm_mul_pd(sum_odd, vx); sum_even = _mm_add_pd(sum_even, nc_even); sum_even = _mm_add_pd(sum_even, sum_odd); double ALIGN16 t[2]; _mm_store_pd(t, sum_even); return t[0] / t[1]; }
/* This function multipiys 2nd blocks in an unrolling architecture. * At its most inner loop, it can multiply 4 pairs of 2x2 blocks * at one time, which increases the computing efficiency significantly. */ void do_l1_block (int lda, int M, int N, int K, double* A, double* B, double* C){ /* For each row i of A */ for (int i = 0; i < M; i+=2) /* For each column j of B */ for (int j = 0; j < N; j+=10) { /* Load 5 2x2 blocks from C. */ __m128d c1 = _mm_load_pd(C+i*lda+j + 0*lda); __m128d c2 = _mm_load_pd(C+i*lda+j + 1*lda); __m128d c3 = _mm_load_pd(C+i*lda+j + 0*lda + 2); __m128d c4 = _mm_load_pd(C+i*lda+j + 1*lda + 2); __m128d c5 = _mm_load_pd(C+i*lda+j + 0*lda + 4); __m128d c6 = _mm_load_pd(C+i*lda+j + 1*lda + 4); __m128d c7 = _mm_load_pd(C+i*lda+j + 0*lda + 6); __m128d c8 = _mm_load_pd(C+i*lda+j + 1*lda + 6); __m128d c9 = _mm_load_pd(C+i*lda+j + 0*lda + 8); __m128d c10 = _mm_load_pd(C+i*lda+j + 1*lda + 8); /* Compute C(i,j) */ for (int k = 0; k < K; k+=2) { /* a1 a2 a3 a4 are reused. */ __m128d a1 = _mm_load1_pd(A+i*lda+k + 0 + 0*lda); __m128d a3 = _mm_load1_pd(A+i*lda+k + 1 + 0*lda); __m128d a2 = _mm_load1_pd(A+i*lda+k + 0 + 1*lda); __m128d a4 = _mm_load1_pd(A+i*lda+k + 1 + 1*lda); /* b1 b2 only are used once. */ __m128d b1 = _mm_load_pd(B+k*lda+j + 0 *lda); __m128d b2 = _mm_load_pd(B+k*lda+j + 1 *lda); /* Use vectorized way to multiply 2x2 matrixs. */ c1 = _mm_add_pd(c1, _mm_mul_pd(a1, b1)); c2 = _mm_add_pd(c2, _mm_mul_pd(a2, b1)); c1 = _mm_add_pd(c1, _mm_mul_pd(a3, b2)); c2 = _mm_add_pd(c2, _mm_mul_pd(a4, b2)); /* Compute the 2nd block pairs. */ b1 = _mm_load_pd(B+k*lda+j + 0 *lda + 2); b2 = _mm_load_pd(B+k*lda+j + 1 *lda + 2); c3 = _mm_add_pd(c3, _mm_mul_pd(a1, b1)); c4 = _mm_add_pd(c4, _mm_mul_pd(a2, b1)); c3 = _mm_add_pd(c3, _mm_mul_pd(a3, b2)); c4 = _mm_add_pd(c4, _mm_mul_pd(a4, b2)); /* Compute the 3rd block pairs. */ b1 = _mm_load_pd(B+k*lda+j + 0 *lda + 4); b2 = _mm_load_pd(B+k*lda+j + 1 *lda + 4); c5 = _mm_add_pd(c5, _mm_mul_pd(a1, b1)); c6 = _mm_add_pd(c6, _mm_mul_pd(a2, b1)); c5 = _mm_add_pd(c5, _mm_mul_pd(a3, b2)); c6 = _mm_add_pd(c6, _mm_mul_pd(a4, b2)); /* Compute the 4th block pairs. */ b1 = _mm_load_pd(B+k*lda+j + 0 *lda + 6); b2 = _mm_load_pd(B+k*lda+j + 1 *lda + 6); c7 = _mm_add_pd(c7, _mm_mul_pd(a1, b1)); c8 = _mm_add_pd(c8, _mm_mul_pd(a2, b1)); c7 = _mm_add_pd(c7, _mm_mul_pd(a3, b2)); c8 = _mm_add_pd(c8, _mm_mul_pd(a4, b2)); /* Compute the 5th block pairs. */ b1 = _mm_load_pd(B+k*lda+j + 0 *lda + 8); b2 = _mm_load_pd(B+k*lda+j + 1 *lda + 8); c9 = _mm_add_pd(c9, _mm_mul_pd(a1, b1)); c10 = _mm_add_pd(c10, _mm_mul_pd(a2, b1)); c9 = _mm_add_pd(c9, _mm_mul_pd(a3, b2)); c10 = _mm_add_pd(c10, _mm_mul_pd(a4, b2)); } /* Store results back.*/ _mm_store_pd(C+i*lda+j + 0*lda, c1); _mm_store_pd(C+i*lda+j + 1*lda, c2); _mm_store_pd(C+i*lda+j + 0*lda + 2, c3); _mm_store_pd(C+i*lda+j + 1*lda + 2, c4); _mm_store_pd(C+i*lda+j + 0*lda + 4, c5); _mm_store_pd(C+i*lda+j + 1*lda + 4, c6); _mm_store_pd(C+i*lda+j + 0*lda + 6, c7); _mm_store_pd(C+i*lda+j + 1*lda + 6, c8); _mm_store_pd(C+i*lda+j + 0*lda + 8, c9); _mm_store_pd(C+i*lda+j + 1*lda + 8, c10); } }