void sgemm( int m, int n, float *A, float *C ) {
    int i, j, k, jtn, cieling;
    float B[n * m];
    float buf[2];
    __m128d sum, ab, cd, ef, AB, CD, EF;
    transpose(m, n, A, B);
    for (i = 0; i < m; i += 1) {
        for (j = 0; j < m; j += 1) {
            jtn = j * n;
            for (k = 0, cieling = n - 5; k < cieling; k += 6) {
                ab = _mm_load1_pd(A + i + k * m);
                cd = _mm_load1_pd(A + i + (k + 2) * m);
                ef = _mm_load1_pd(A + i + (k + 4) * m);
                AB = _mm_loadu_pd(B + k + jtn);
                CD = _mm_loadu_pd(B + k + 2 + jtn);
                EF = _mm_loadu_pd(B + k + 4 + jtn);
                sum = _mm_add_pd(sum, _mm_mul_sd(ab, AB));
                sum = _mm_add_pd(sum, _mm_mul_sd(cd, CD));
                sum = _mm_add_pd(sum, _mm_mul_sd(ef, EF));
            }
            _mm_storeu_pd(buf, sum);
            C[i + j * m] = buf[0];
            if (n % 6 != 0) {
                for ( ; k < n; k += 1) {
                    C[i + j * m] += A[i + k * m] * A[k + jtn];
                }
            }
        }
    }
}
void
interpolate_gdouble_cubic_sse2 (gpointer op, const gpointer ap,
    gint len, const gpointer icp, gint astride)
{
  gint i;
  gdouble *o = op, *a = ap, *ic = icp;
  __m128d f[4], t[4];
  const gdouble *c[4] = { (gdouble *) ((gint8 *) a + 0 * astride),
    (gdouble *) ((gint8 *) a + 1 * astride),
    (gdouble *) ((gint8 *) a + 2 * astride),
    (gdouble *) ((gint8 *) a + 3 * astride)
  };

  f[0] = _mm_load1_pd (ic + 0);
  f[1] = _mm_load1_pd (ic + 1);
  f[2] = _mm_load1_pd (ic + 2);
  f[3] = _mm_load1_pd (ic + 3);

  for (i = 0; i < len; i += 2) {
    t[0] = _mm_mul_pd (_mm_load_pd (c[0] + i + 0), f[0]);
    t[1] = _mm_mul_pd (_mm_load_pd (c[1] + i + 0), f[1]);
    t[2] = _mm_mul_pd (_mm_load_pd (c[2] + i + 0), f[2]);
    t[3] = _mm_mul_pd (_mm_load_pd (c[3] + i + 0), f[3]);
    t[0] = _mm_add_pd (t[0], t[1]);
    t[2] = _mm_add_pd (t[2], t[3]);
    _mm_store_pd (o + i + 0, _mm_add_pd (t[0], t[2]));
  }
}
Exemplo n.º 3
0
static void do_block(int M, int K, int N, double* A, double* B, double* C) {

    __m128d c0, c1, a0, a1, b0, b1, b2, b3, d0, d1;    

    for (int k=0; k<K; k+=RSIZE_K) {
        for (int j=0; j<N; j+=RSIZE_N) {

            b0 = _mm_load1_pd(B+k+j*K);
            b1 = _mm_load1_pd(B+k+1+j*K);
            b2 = _mm_load1_pd(B+k+(j+1)*K);
            b3 = _mm_load1_pd(B+k+1+(j+1)*K);

            for (int i=0; i<M; i+=RSIZE_M) {
                a0 = _mm_load_pd(A+i+k*M);
                a1 = _mm_load_pd(A+i+(k+1)*M);

                c0 = _mm_load_pd(C+i+j*M);
                c1 = _mm_load_pd(C+i+(j+1)*M);

                d0 = _mm_add_pd(c0, _mm_mul_pd(a0,b0));
                d1 = _mm_add_pd(c1, _mm_mul_pd(a0,b2));
                c0 = _mm_add_pd(d0, _mm_mul_pd(a1,b1));
                c1 = _mm_add_pd(d1, _mm_mul_pd(a1,b3));

                _mm_store_pd(C+i+j*M,c0);
                _mm_store_pd(C+i+(j+1)*M,c1); 
            }
        }
    }
}
Exemplo n.º 4
0
int main( int argc, char **argv ) {
    /* set   A  =   |1 3|,     B  =   |3 0|       C =   |0 0|
                    |2 4|             |0 2|             |0 0|  */
    double A[4] = {1,2,3,4}, B[4] = {3,0,0,2}, C[4] = {0,0,0,0};

    /*   We are computing C = C + A x B, which means:
         C[0] += A[0]*B[0] + A[2]*B[1]
         C[1] += A[1]*B[0] + A[3]*B[1]
         C[2] += A[0]*B[2] + A[2]*B[3]
         C[3] += A[1]*B[2] + A[3]*B[3] */

    /* load entire matrix C into SIMD variables */
    __m128d c1 = _mm_loadu_pd( C+0 ); /* c1 = (C[0],C[1]) */
    __m128d c2 = _mm_loadu_pd( C+2 ); /* c2 = (C[2],C[3]) */

    for( int i = 0; i < 2; i++ ) {
        __m128d a  = _mm_loadu_pd( A+i*2 ); /* load next column of A */
        __m128d b1 = _mm_load1_pd( B+0+i );
        __m128d b2 = _mm_load1_pd( B+2+i ); /* load next row of B */

        c1 = _mm_add_pd( c1, _mm_mul_pd( a, b1 ) ); /* multiply and add */
        c2 = _mm_add_pd( c2, _mm_mul_pd( a, b2 ) );
    }

    /* store the result back to the C array */
    _mm_storeu_pd( C+0, c1 ); /* (C[0],C[1]) = c1 */
    _mm_storeu_pd( C+2, c2 ); /* (C[2],C[3]) = c2 */

    /* output whatever we've got */
    printf( "|%g %g| * |%g %g| = |%g %g|\n", A[0], A[2], B[0], B[2], C[0], C[2] );
    printf( "|%g %g|   |%g %g|   |%g %g|\n", A[1], A[3], B[1], B[3], C[1], C[3] );

    return 0;
}
static inline void
inner_product_gdouble_linear_1_sse2 (gdouble * o, const gdouble * a,
    const gdouble * b, gint len, const gdouble * icoeff, gint bstride)
{
  gint i = 0;
  __m128d sum[2], t;
  const gdouble *c[2] = { (gdouble *) ((gint8 *) b + 0 * bstride),
    (gdouble *) ((gint8 *) b + 1 * bstride)
  };

  sum[0] = sum[1] = _mm_setzero_pd ();

  for (; i < len; i += 4) {
    t = _mm_loadu_pd (a + i + 0);
    sum[0] = _mm_add_pd (sum[0], _mm_mul_pd (t, _mm_load_pd (c[0] + i + 0)));
    sum[1] = _mm_add_pd (sum[1], _mm_mul_pd (t, _mm_load_pd (c[1] + i + 0)));
    t = _mm_loadu_pd (a + i + 2);
    sum[0] = _mm_add_pd (sum[0], _mm_mul_pd (t, _mm_load_pd (c[0] + i + 2)));
    sum[1] = _mm_add_pd (sum[1], _mm_mul_pd (t, _mm_load_pd (c[1] + i + 2)));
  }
  sum[0] = _mm_mul_pd (_mm_sub_pd (sum[0], sum[1]), _mm_load1_pd (icoeff));
  sum[0] = _mm_add_pd (sum[0], sum[1]);
  sum[0] = _mm_add_sd (sum[0], _mm_unpackhi_pd (sum[0], sum[0]));
  _mm_store_sd (o, sum[0]);
}
Exemplo n.º 6
0
/** this fun use the SSE to implement the mul **/
void square_dgemm(int lda, double* A, double* B, double* C) {
    // define the variable here

    register __m128d cTmp, aTmp, bTmp; 

    for (int j = 0; j < lda; j++) {
        for (int k = 0; k < lda; k++) {
            // copy the B's val to fill the bTmp
            bTmp = _mm_load1_pd(B + k + j*lda);

            double* adda_mid = A + k*lda;
            double* addc_mid = C + j*lda;
            for (int i = 0; i < lda/8*8; i += 8) {
                double* adda = adda_mid + i;
                double* addc = addc_mid + i;
                
                aTmp = _mm_loadu_pd(adda);
                cTmp = _mm_loadu_pd(addc);
                cTmp = _mm_add_pd(cTmp, _mm_mul_pd(bTmp, aTmp));
                _mm_storeu_pd(addc, cTmp);

                aTmp = _mm_loadu_pd(adda + 2);
                cTmp = _mm_loadu_pd(addc + 2);
                cTmp = _mm_add_pd(cTmp, _mm_mul_pd(bTmp, aTmp));
                _mm_storeu_pd((addc + 2), cTmp);

                aTmp = _mm_loadu_pd(adda + 4);
                cTmp = _mm_loadu_pd(addc + 4);
                cTmp = _mm_add_pd(cTmp, _mm_mul_pd(bTmp, aTmp));
                _mm_storeu_pd((addc + 4), cTmp);

                aTmp = _mm_loadu_pd(adda + 6);
                cTmp = _mm_loadu_pd(addc + 6);
                cTmp = _mm_add_pd(cTmp, _mm_mul_pd(bTmp, aTmp));
                _mm_storeu_pd((addc + 6), cTmp);
            }

            for (int i = lda/8*8; i < lda/2*2; i += 2) {
                double* adda = adda_mid + i;
                double* addc = addc_mid + i;
                
                aTmp = _mm_loadu_pd(adda);
                cTmp = _mm_loadu_pd(addc);
                cTmp = _mm_add_pd(cTmp, _mm_mul_pd(bTmp, aTmp));
                _mm_storeu_pd(addc, cTmp);
            }

            // the last case
            for (int i = lda/2*2; i < lda; i ++) {
                C[i + j*lda] += A[i + k*lda] * B[k+j*lda];

            }
        }
    }
}
Exemplo n.º 7
0
__m128d test_mm_load1_pd(double const* A) {
  // DAG-LABEL: test_mm_load1_pd
  // DAG: load double, double* %{{.*}}, align 8
  // DAG: insertelement <2 x double> undef, double %{{.*}}, i32 0
  // DAG: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
  //
  // ASM-LABEL: test_mm_load1_pd
  // ASM: movsd
  // ASM: movlhps
  return _mm_load1_pd(A);
}
void
interpolate_gdouble_linear_sse2 (gpointer op, const gpointer ap,
    gint len, const gpointer icp, gint astride)
{
  gint i;
  gdouble *o = op, *a = ap, *ic = icp;
  __m128d f[2], t1, t2;
  const gdouble *c[2] = { (gdouble *) ((gint8 *) a + 0 * astride),
    (gdouble *) ((gint8 *) a + 1 * astride)
  };

  f[0] = _mm_load1_pd (ic + 0);
  f[1] = _mm_load1_pd (ic + 1);

  for (i = 0; i < len; i += 4) {
    t1 = _mm_mul_pd (_mm_load_pd (c[0] + i + 0), f[0]);
    t2 = _mm_mul_pd (_mm_load_pd (c[1] + i + 0), f[1]);
    _mm_store_pd (o + i + 0, _mm_add_pd (t1, t2));

    t1 = _mm_mul_pd (_mm_load_pd (c[0] + i + 2), f[0]);
    t2 = _mm_mul_pd (_mm_load_pd (c[1] + i + 2), f[1]);
    _mm_store_pd (o + i + 2, _mm_add_pd (t1, t2));
  }
}
Exemplo n.º 9
0
ALGEBRA_INLINE void		vector_addm_double_aligned_32 (double* v1,double lambda,const double* v2,size_t n)
{
	size_t k;
	
	__m128d l1 = _mm_load1_pd(&lambda);

	size_t q = n / 2;
	size_t r = n % 2;
	if(q > 0) {
		if (ALGEBRA_IS_ALIGNED(v1) && ALGEBRA_IS_ALIGNED(v2)) {
			for (k=0;k<q;k++) {
				/* Charge 2 valeurs de chaque tableau */
				__m128d i1 = _mm_load_pd(v1);
				__m128d j1 = _mm_load_pd(v2);
				/* multiplie */
					   j1 = _mm_mul_pd(j1, l1);
				/* additionne */
				i1 = _mm_add_pd(i1,j1);
				/* Sauvegarde */
				_mm_store_pd(v1, i1);
				v1 += 2;
				v2 += 2;
			}
		}
		else {		
			for (k=0;k<q;k++) {
				/* Charge 8 valeurs de chaque tableau */
				__m128d i1 = _mm_loadu_pd(v1);
				__m128d j1 = _mm_loadu_pd(v2);
					   j1 = _mm_mul_pd(j1, l1);
				/* Soustrait */
				i1 = _mm_add_pd(i1,j1);
				/* Sauvegarde */
				_mm_storeu_pd(v1, i1);
				v1 += 2;
				v2 += 2;
			}
		}
	}
	
	for(k = 0 ; k<r ; k++)
		v1[k] += lambda*v2[k];

}
Exemplo n.º 10
0
 static forcedinline ParallelType load1 (Type v) noexcept                        { return _mm_load1_pd (&v); }
Exemplo n.º 11
0
void exchsolution_gmrfData_1(unsigned int slot) {
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((!neighbor_isValid[0][0])) {
{
double xPos;
double yPos;
/* Statements in this Scop: S493, S492, S494 */
{
{
{
double* fieldData_Solution_GMRF_1_p1 = (&fieldData_Solution_GMRF[1][0]);
int i1 = 1;
for (; (i1<=2); i1 += 2) {
fieldData_Solution_GMRF_1_p1[((i1*6)+2)] = 0.000000e+00;
fieldData_Solution_GMRF_1_p1[((i1*6)+8)] = 0.000000e+00;
}
for (; (i1<=3); i1 += 1) {
fieldData_Solution_GMRF_1_p1[((i1*6)+2)] = 0.000000e+00;
}
}
{
int i1 = 1;
for (; (i1<(2&(~1))); i1 += 1) {
xPos = posBegin[0];
}
__m128d vec1 = _mm_set1_pd(xPos);
for (; (i1<1); i1 += 4) {
/* xPos = posBegin[0]; */
__m128d vec0 = _mm_load1_pd((&posBegin[0]));
__m128d vec0_2 = _mm_load1_pd((&posBegin[0]));
vec1 = vec0;
vec1 = vec0_2;
}
for (; (i1<4); i1 += 1) {
xPos = posBegin[0];
}
}
}
{
int i1 = 1;
for (; (i1<(2&(~1))); i1 += 1) {
yPos = ((((i1-1)/2.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]);
}
__m128d vec1 = _mm_set1_pd(1.000000e+00);
__m128d vec2 = _mm_set1_pd(2.000000e+00);
__m128d vec5 = _mm_set1_pd(yPos);
for (; (i1<1); i1 += 4) {
/* yPos = ((((i1-1)/2.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); */
__m128d vec0 = _mm_set_pd(i1+1,i1);
__m128d vec0_2 = _mm_set_pd(i1+1,i1);
__m128d vec3 = _mm_load1_pd((&posEnd[1]));
__m128d vec3_2 = _mm_load1_pd((&posEnd[1]));
__m128d vec4 = _mm_load1_pd((&posBegin[1]));
__m128d vec4_2 = _mm_load1_pd((&posBegin[1]));
vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4);
vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2);
}
for (; (i1<4); i1 += 1) {
yPos = ((((i1-1)/2.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]);
}
}
}
}
}
if ((!neighbor_isValid[0][1])) {
{
double xPos;
double yPos;
/* Statements in this Scop: S496, S495, S497 */
{
{
{
double* fieldData_Solution_GMRF_1_p1 = (&fieldData_Solution_GMRF[1][0]);
int i1 = 1;
for (; (i1<=2); i1 += 2) {
fieldData_Solution_GMRF_1_p1[((i1*6)+4)] = 0.000000e+00;
fieldData_Solution_GMRF_1_p1[((i1*6)+10)] = 0.000000e+00;
}
for (; (i1<=3); i1 += 1) {
fieldData_Solution_GMRF_1_p1[((i1*6)+4)] = 0.000000e+00;
}
}
{
int i1 = 1;
for (; (i1<(2&(~1))); i1 += 1) {
xPos = posEnd[0];
}
__m128d vec1 = _mm_set1_pd(xPos);
for (; (i1<1); i1 += 4) {
/* xPos = posEnd[0]; */
__m128d vec0 = _mm_load1_pd((&posEnd[0]));
__m128d vec0_2 = _mm_load1_pd((&posEnd[0]));
vec1 = vec0;
vec1 = vec0_2;
}
for (; (i1<4); i1 += 1) {
xPos = posEnd[0];
}
}
}
{
int i1 = 1;
for (; (i1<(2&(~1))); i1 += 1) {
yPos = ((((i1-1)/2.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]);
}
__m128d vec1 = _mm_set1_pd(1.000000e+00);
__m128d vec2 = _mm_set1_pd(2.000000e+00);
__m128d vec5 = _mm_set1_pd(yPos);
for (; (i1<1); i1 += 4) {
/* yPos = ((((i1-1)/2.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); */
__m128d vec0 = _mm_set_pd(i1+1,i1);
__m128d vec0_2 = _mm_set_pd(i1+1,i1);
__m128d vec3 = _mm_load1_pd((&posEnd[1]));
__m128d vec3_2 = _mm_load1_pd((&posEnd[1]));
__m128d vec4 = _mm_load1_pd((&posBegin[1]));
__m128d vec4_2 = _mm_load1_pd((&posBegin[1]));
vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4);
vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2);
}
for (; (i1<4); i1 += 1) {
yPos = ((((i1-1)/2.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]);
}
}
}
}
}
if ((!neighbor_isValid[0][2])) {
{
double xPos;
double yPos;
/* Statements in this Scop: S500, S499, S498 */
{
{
{
int i2 = 2;
for (; (i2<=3); i2 += 2) {
xPos = ((((i2-2)/2.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]);
xPos = ((((i2-1)/2.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]);
}
for (; (i2<=4); i2 += 1) {
xPos = ((((i2-2)/2.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]);
}
}
{
double* fieldData_Solution_GMRF_1_p1 = (&fieldData_Solution_GMRF[1][0]);
int i2 = 2;
for (; (i2<=3); i2 += 2) {
fieldData_Solution_GMRF_1_p1[(i2+6)] = 0.000000e+00;
fieldData_Solution_GMRF_1_p1[(i2+7)] = 0.000000e+00;
}
for (; (i2<=4); i2 += 1) {
fieldData_Solution_GMRF_1_p1[(i2+6)] = 0.000000e+00;
}
}
}
{
int i2 = 2;
for (; (i2<=3); i2 += 2) {
yPos = posBegin[1];
yPos = posBegin[1];
}
for (; (i2<=4); i2 += 1) {
yPos = posBegin[1];
}
}
}
}
}
if ((!neighbor_isValid[0][3])) {
{
double xPos;
double yPos;
/* Statements in this Scop: S503, S502, S501 */
{
{
{
int i2 = 2;
for (; (i2<=3); i2 += 2) {
xPos = ((((i2-2)/2.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]);
xPos = ((((i2-1)/2.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]);
}
for (; (i2<=4); i2 += 1) {
xPos = ((((i2-2)/2.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]);
}
}
{
int i2 = 2;
for (; (i2<=3); i2 += 2) {
yPos = posEnd[1];
yPos = posEnd[1];
}
for (; (i2<=4); i2 += 1) {
yPos = posEnd[1];
}
}
}
{
double* fieldData_Solution_GMRF_1_p1 = (&fieldData_Solution_GMRF[1][0]);
int i2 = 2;
for (; (i2<=3); i2 += 2) {
fieldData_Solution_GMRF_1_p1[(i2+18)] = 0.000000e+00;
fieldData_Solution_GMRF_1_p1[(i2+19)] = 0.000000e+00;
}
for (; (i2<=4); i2 += 1) {
fieldData_Solution_GMRF_1_p1[(i2+18)] = 0.000000e+00;
}
}
}
}
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
;
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) {
MPI_Isend(&fieldData_Solution_GMRF[1][10], 1, mpiDatatype_3_1_6, neighbor_remoteRank[0][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]);
reqOutstanding_Send[1] = true;
}
}
}
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) {
MPI_Irecv(&fieldData_Solution_GMRF[1][8], 1, mpiDatatype_3_1_6, neighbor_remoteRank[0][0], ((unsigned int)(neighbor_fragCommId[0][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]);
reqOutstanding_Recv[0] = true;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if (reqOutstanding_Recv[0]) {
waitForMPIReq(&mpiRequest_Recv[0]);
reqOutstanding_Recv[0] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
;
}
}
;
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if (reqOutstanding_Send[1]) {
waitForMPIReq(&mpiRequest_Send[1]);
reqOutstanding_Send[1] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
;
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][3]&&neighbor_isRemote[0][3])) {
MPI_Isend(&fieldData_Solution_GMRF[1][20], 1, mpiDatatype_1_3_6, neighbor_remoteRank[0][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]);
reqOutstanding_Send[3] = true;
}
}
}
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][2]&&neighbor_isRemote[0][2])) {
MPI_Irecv(&fieldData_Solution_GMRF[1][8], 1, mpiDatatype_1_3_6, neighbor_remoteRank[0][2], ((unsigned int)(neighbor_fragCommId[0][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]);
reqOutstanding_Recv[2] = true;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if (reqOutstanding_Recv[2]) {
waitForMPIReq(&mpiRequest_Recv[2]);
reqOutstanding_Recv[2] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
;
}
}
;
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if (reqOutstanding_Send[3]) {
waitForMPIReq(&mpiRequest_Send[3]);
reqOutstanding_Send[3] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
;
;
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) {
MPI_Isend(&fieldData_Solution_GMRF[1][3], 1, mpiDatatype_5_1_6, neighbor_remoteRank[0][0], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][0]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[0]);
reqOutstanding_Send[0] = true;
}
if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) {
MPI_Isend(&fieldData_Solution_GMRF[1][3], 1, mpiDatatype_5_1_6, neighbor_remoteRank[0][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]);
reqOutstanding_Send[1] = true;
}
}
}
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) {
MPI_Irecv(&fieldData_Solution_GMRF[1][1], 1, mpiDatatype_5_1_6, neighbor_remoteRank[0][0], ((unsigned int)(neighbor_fragCommId[0][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]);
reqOutstanding_Recv[0] = true;
}
if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) {
MPI_Irecv(&fieldData_Solution_GMRF[1][5], 1, mpiDatatype_5_1_6, neighbor_remoteRank[0][1], ((unsigned int)(neighbor_fragCommId[0][1]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[1]);
reqOutstanding_Recv[1] = true;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if (reqOutstanding_Recv[0]) {
waitForMPIReq(&mpiRequest_Recv[0]);
reqOutstanding_Recv[0] = false;
}
if (reqOutstanding_Recv[1]) {
waitForMPIReq(&mpiRequest_Recv[1]);
reqOutstanding_Recv[1] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
;
;
}
}
;
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if (reqOutstanding_Send[0]) {
waitForMPIReq(&mpiRequest_Send[0]);
reqOutstanding_Send[0] = false;
}
if (reqOutstanding_Send[1]) {
waitForMPIReq(&mpiRequest_Send[1]);
reqOutstanding_Send[1] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
;
;
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][2]&&neighbor_isRemote[0][2])) {
MPI_Isend(&fieldData_Solution_GMRF[1][13], 1, mpiDatatype_1_5_6, neighbor_remoteRank[0][2], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][2]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[2]);
reqOutstanding_Send[2] = true;
}
if ((neighbor_isValid[0][3]&&neighbor_isRemote[0][3])) {
MPI_Isend(&fieldData_Solution_GMRF[1][13], 1, mpiDatatype_1_5_6, neighbor_remoteRank[0][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]);
reqOutstanding_Send[3] = true;
}
}
}
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][2]&&neighbor_isRemote[0][2])) {
MPI_Irecv(&fieldData_Solution_GMRF[1][1], 1, mpiDatatype_1_5_6, neighbor_remoteRank[0][2], ((unsigned int)(neighbor_fragCommId[0][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]);
reqOutstanding_Recv[2] = true;
}
if ((neighbor_isValid[0][3]&&neighbor_isRemote[0][3])) {
MPI_Irecv(&fieldData_Solution_GMRF[1][25], 1, mpiDatatype_1_5_6, neighbor_remoteRank[0][3], ((unsigned int)(neighbor_fragCommId[0][3]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[3]);
reqOutstanding_Recv[3] = true;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if (reqOutstanding_Recv[2]) {
waitForMPIReq(&mpiRequest_Recv[2]);
reqOutstanding_Recv[2] = false;
}
if (reqOutstanding_Recv[3]) {
waitForMPIReq(&mpiRequest_Recv[3]);
reqOutstanding_Recv[3] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
;
;
}
}
;
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if (reqOutstanding_Send[2]) {
waitForMPIReq(&mpiRequest_Send[2]);
reqOutstanding_Send[2] = false;
}
if (reqOutstanding_Send[3]) {
waitForMPIReq(&mpiRequest_Send[3]);
reqOutstanding_Send[3] = false;
}
}
}
}
Exemplo n.º 12
0
void exchlaplacecoeff_gmrfData_0(unsigned int slot) {
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((!neighbor_isValid[0][0])) {
{
double xPos;
double yPos;
/* Statements in this Scop: S1053, S1056, S1059, S1050, S1058, S1052, S1055, S1060, S1054, S1057, S1051 */
{
{
{
{
{
{
{
{
{
{
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i1 = 1;
for (; (i1<=1); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+26)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+32)] = 0.000000e+00;
}
for (; (i1<=2); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+26)] = 0.000000e+00;
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i1 = 1;
for (; (i1<=1); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+146)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+152)] = 0.000000e+00;
}
for (; (i1<=2); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+146)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i1 = 1;
for (; (i1<=1); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+98)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+104)] = 0.000000e+00;
}
for (; (i1<=2); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+98)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i1 = 1;
for (; (i1<=1); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+74)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+80)] = 0.000000e+00;
}
for (; (i1<=2); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+74)] = 0.000000e+00;
}
}
}
{
int i1 = 1;
for (; (i1<(2&(~1))); i1 += 1) {
xPos = posBegin[0];
}
__m128d vec1 = _mm_set1_pd(xPos);
for (; (i1<0); i1 += 4) {
/* xPos = posBegin[0]; */
__m128d vec0 = _mm_load1_pd((&posBegin[0]));
__m128d vec0_2 = _mm_load1_pd((&posBegin[0]));
vec1 = vec0;
vec1 = vec0_2;
}
for (; (i1<3); i1 += 1) {
xPos = posBegin[0];
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i1 = 1;
for (; (i1<=1); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+122)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+128)] = 0.000000e+00;
}
for (; (i1<=2); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+122)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i1 = 1;
for (; (i1<=1); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+170)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+176)] = 0.000000e+00;
}
for (; (i1<=2); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+170)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i1 = 1;
for (; (i1<=1); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+194)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+200)] = 0.000000e+00;
}
for (; (i1<=2); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+194)] = 0.000000e+00;
}
}
}
{
int i1 = 1;
for (; (i1<(2&(~1))); i1 += 1) {
yPos = ((((i1-1)/1.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]);
}
__m128d vec1 = _mm_set1_pd(1.000000e+00);
__m128d vec2 = _mm_set1_pd(1.000000e+00);
__m128d vec5 = _mm_set1_pd(yPos);
for (; (i1<0); i1 += 4) {
/* yPos = ((((i1-1)/1.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); */
__m128d vec0 = _mm_set_pd(i1+1,i1);
__m128d vec0_2 = _mm_set_pd(i1+1,i1);
__m128d vec3 = _mm_load1_pd((&posEnd[1]));
__m128d vec3_2 = _mm_load1_pd((&posEnd[1]));
__m128d vec4 = _mm_load1_pd((&posBegin[1]));
__m128d vec4_2 = _mm_load1_pd((&posBegin[1]));
vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4);
vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2);
}
for (; (i1<3); i1 += 1) {
yPos = ((((i1-1)/1.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]);
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i1 = 1;
for (; (i1<=1); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+2)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+8)] = 0.000000e+00;
}
for (; (i1<=2); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+2)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i1 = 1;
for (; (i1<=1); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+50)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+56)] = 0.000000e+00;
}
for (; (i1<=2); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+50)] = 0.000000e+00;
}
}
}
}
}
if ((!neighbor_isValid[0][1])) {
{
double xPos;
double yPos;
/* Statements in this Scop: S1071, S1065, S1068, S1062, S1070, S1064, S1067, S1061, S1069, S1063, S1066 */
{
{
{
{
{
{
{
{
{
{
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i1 = 1;
for (; (i1<=1); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+195)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+201)] = 0.000000e+00;
}
for (; (i1<=2); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+195)] = 0.000000e+00;
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i1 = 1;
for (; (i1<=1); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+51)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+57)] = 0.000000e+00;
}
for (; (i1<=2); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+51)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i1 = 1;
for (; (i1<=1); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+75)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+81)] = 0.000000e+00;
}
for (; (i1<=2); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+75)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i1 = 1;
for (; (i1<=1); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+3)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+9)] = 0.000000e+00;
}
for (; (i1<=2); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+3)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i1 = 1;
for (; (i1<=1); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+171)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+177)] = 0.000000e+00;
}
for (; (i1<=2); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+171)] = 0.000000e+00;
}
}
}
{
int i1 = 1;
for (; (i1<(2&(~1))); i1 += 1) {
yPos = ((((i1-1)/1.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]);
}
__m128d vec1 = _mm_set1_pd(1.000000e+00);
__m128d vec2 = _mm_set1_pd(1.000000e+00);
__m128d vec5 = _mm_set1_pd(yPos);
for (; (i1<0); i1 += 4) {
/* yPos = ((((i1-1)/1.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); */
__m128d vec0 = _mm_set_pd(i1+1,i1);
__m128d vec0_2 = _mm_set_pd(i1+1,i1);
__m128d vec3 = _mm_load1_pd((&posEnd[1]));
__m128d vec3_2 = _mm_load1_pd((&posEnd[1]));
__m128d vec4 = _mm_load1_pd((&posBegin[1]));
__m128d vec4_2 = _mm_load1_pd((&posBegin[1]));
vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4);
vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2);
}
for (; (i1<3); i1 += 1) {
yPos = ((((i1-1)/1.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]);
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i1 = 1;
for (; (i1<=1); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+99)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+105)] = 0.000000e+00;
}
for (; (i1<=2); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+99)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i1 = 1;
for (; (i1<=1); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+123)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+129)] = 0.000000e+00;
}
for (; (i1<=2); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+123)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i1 = 1;
for (; (i1<=1); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+147)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+153)] = 0.000000e+00;
}
for (; (i1<=2); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+147)] = 0.000000e+00;
}
}
}
{
int i1 = 1;
for (; (i1<(2&(~1))); i1 += 1) {
xPos = posEnd[0];
}
__m128d vec1 = _mm_set1_pd(xPos);
for (; (i1<0); i1 += 4) {
/* xPos = posEnd[0]; */
__m128d vec0 = _mm_load1_pd((&posEnd[0]));
__m128d vec0_2 = _mm_load1_pd((&posEnd[0]));
vec1 = vec0;
vec1 = vec0_2;
}
for (; (i1<3); i1 += 1) {
xPos = posEnd[0];
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i1 = 1;
for (; (i1<=1); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+27)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+33)] = 0.000000e+00;
}
for (; (i1<=2); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+27)] = 0.000000e+00;
}
}
}
}
}
if ((!neighbor_isValid[0][2])) {
{
double xPos;
double yPos;
/* Statements in this Scop: S1080, S1074, S1077, S1082, S1076, S1079, S1073, S1072, S1081, S1075, S1078 */
{
{
{
{
{
{
{
{
{
{
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i2 = 2;
for (; (i2<=2); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+126)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+127)] = 0.000000e+00;
}
for (; (i2<=3); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+126)] = 0.000000e+00;
}
}
{
int i2 = 2;
for (; (i2<=2); i2 += 2) {
xPos = ((((i2-2)/1.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]);
xPos = ((((i2-1)/1.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]);
}
for (; (i2<=3); i2 += 1) {
xPos = ((((i2-2)/1.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]);
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i2 = 2;
for (; (i2<=2); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+198)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+199)] = 0.000000e+00;
}
for (; (i2<=3); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+198)] = 0.000000e+00;
}
}
}
{
int i2 = 2;
for (; (i2<=2); i2 += 2) {
yPos = posBegin[1];
yPos = posBegin[1];
}
for (; (i2<=3); i2 += 1) {
yPos = posBegin[1];
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i2 = 2;
for (; (i2<=2); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+30)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+31)] = 0.000000e+00;
}
for (; (i2<=3); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+30)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i2 = 2;
for (; (i2<=2); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+174)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+175)] = 0.000000e+00;
}
for (; (i2<=3); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+174)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i2 = 2;
for (; (i2<=2); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+78)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+79)] = 0.000000e+00;
}
for (; (i2<=3); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+78)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i2 = 2;
for (; (i2<=2); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+54)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+55)] = 0.000000e+00;
}
for (; (i2<=3); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+54)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i2 = 2;
for (; (i2<=2); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+150)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+151)] = 0.000000e+00;
}
for (; (i2<=3); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+150)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i2 = 2;
for (; (i2<=2); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+6)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+7)] = 0.000000e+00;
}
for (; (i2<=3); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+6)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i2 = 2;
for (; (i2<=2); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+102)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+103)] = 0.000000e+00;
}
for (; (i2<=3); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+102)] = 0.000000e+00;
}
}
}
}
}
if ((!neighbor_isValid[0][3])) {
{
double xPos;
double yPos;
/* Statements in this Scop: S1083, S1092, S1086, S1089, S1088, S1091, S1085, S1090, S1093, S1087, S1084 */
{
{
{
{
{
{
{
{
{
{
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i2 = 2;
for (; (i2<=2); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+12)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+13)] = 0.000000e+00;
}
for (; (i2<=3); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+12)] = 0.000000e+00;
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i2 = 2;
for (; (i2<=2); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+60)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+61)] = 0.000000e+00;
}
for (; (i2<=3); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+60)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i2 = 2;
for (; (i2<=2); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+204)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+205)] = 0.000000e+00;
}
for (; (i2<=3); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+204)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i2 = 2;
for (; (i2<=2); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+132)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+133)] = 0.000000e+00;
}
for (; (i2<=3); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+132)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i2 = 2;
for (; (i2<=2); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+84)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+85)] = 0.000000e+00;
}
for (; (i2<=3); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+84)] = 0.000000e+00;
}
}
}
{
int i2 = 2;
for (; (i2<=2); i2 += 2) {
yPos = posEnd[1];
yPos = posEnd[1];
}
for (; (i2<=3); i2 += 1) {
yPos = posEnd[1];
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i2 = 2;
for (; (i2<=2); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+36)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+37)] = 0.000000e+00;
}
for (; (i2<=3); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+36)] = 0.000000e+00;
}
}
}
{
int i2 = 2;
for (; (i2<=2); i2 += 2) {
xPos = ((((i2-2)/1.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]);
xPos = ((((i2-1)/1.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]);
}
for (; (i2<=3); i2 += 1) {
xPos = ((((i2-2)/1.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]);
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i2 = 2;
for (; (i2<=2); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+180)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+181)] = 0.000000e+00;
}
for (; (i2<=3); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+180)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i2 = 2;
for (; (i2<=2); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+156)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+157)] = 0.000000e+00;
}
for (; (i2<=3); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+156)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]);
int i2 = 2;
for (; (i2<=2); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+108)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+109)] = 0.000000e+00;
}
for (; (i2<=3); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+108)] = 0.000000e+00;
}
}
}
}
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) {
/* Statements in this Scop: S1094 */
for (int i0 = 0; (i0<=8); i0 += 1) {
double* buffer_Send_1_p1 = (&buffer_Send[1][(i0*2)]);
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][(i0*24)]);
int i1 = 1;
for (; (i1<=1); i1 += 2) {
buffer_Send_1_p1[(i1-1)] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+3)];
buffer_Send_1_p1[i1] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+9)];
}
for (; (i1<=2); i1 += 1) {
buffer_Send_1_p1[(i1-1)] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+3)];
}
}
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) {
MPI_Isend(buffer_Send[1], 18, MPI_DOUBLE, neighbor_remoteRank[0][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]);
reqOutstanding_Send[1] = true;
}
}
}
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) {
MPI_Irecv(buffer_Recv[0], 18, MPI_DOUBLE, neighbor_remoteRank[0][0], ((unsigned int)(neighbor_fragCommId[0][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]);
reqOutstanding_Recv[0] = true;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if (reqOutstanding_Recv[0]) {
waitForMPIReq(&mpiRequest_Recv[0]);
reqOutstanding_Recv[0] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) {
/* Statements in this Scop: S1095 */
for (int i0 = 0; (i0<=8); i0 += 1) {
double* buffer_Recv_0_p1 = (&buffer_Recv[0][(i0*2)]);
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][(i0*24)]);
int i1 = 3;
for (; (i1<=3); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)-10)] = buffer_Recv_0_p1[(i1-3)];
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)-4)] = buffer_Recv_0_p1[(i1-2)];
}
for (; (i1<=4); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)-10)] = buffer_Recv_0_p1[(i1-3)];
}
}
}
}
}
;
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if (reqOutstanding_Send[1]) {
waitForMPIReq(&mpiRequest_Send[1]);
reqOutstanding_Send[1] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
;
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][3]&&neighbor_isRemote[0][3])) {
MPI_Isend(&fieldData_LaplaceCoeff_GMRF[0][14], 1, mpiDatatype_9_2_24, neighbor_remoteRank[0][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]);
reqOutstanding_Send[3] = true;
}
}
}
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][2]&&neighbor_isRemote[0][2])) {
MPI_Irecv(&fieldData_LaplaceCoeff_GMRF[0][8], 1, mpiDatatype_9_2_24, neighbor_remoteRank[0][2], ((unsigned int)(neighbor_fragCommId[0][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]);
reqOutstanding_Recv[2] = true;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if (reqOutstanding_Recv[2]) {
waitForMPIReq(&mpiRequest_Recv[2]);
reqOutstanding_Recv[2] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
;
}
}
;
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if (reqOutstanding_Send[3]) {
waitForMPIReq(&mpiRequest_Send[3]);
reqOutstanding_Send[3] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) {
/* Statements in this Scop: S1096 */
for (int i0 = 0; (i0<=8); i0 += 1) {
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][(i0*24)]);
double* buffer_Send_0_p1 = (&buffer_Send[0][(i0*4)]);
int i1 = 0;
for (; (i1<=2); i1 += 2) {
buffer_Send_0_p1[i1] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+3)];
buffer_Send_0_p1[(i1+1)] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+9)];
}
for (; (i1<=3); i1 += 1) {
buffer_Send_0_p1[i1] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+3)];
}
}
}
if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) {
/* Statements in this Scop: S1097 */
for (int i0 = 0; (i0<=8); i0 += 1) {
double* buffer_Send_1_p1 = (&buffer_Send[1][(i0*4)]);
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][(i0*24)]);
int i1 = 0;
for (; (i1<=2); i1 += 2) {
buffer_Send_1_p1[i1] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+2)];
buffer_Send_1_p1[(i1+1)] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+8)];
}
for (; (i1<=3); i1 += 1) {
buffer_Send_1_p1[i1] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+2)];
}
}
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) {
MPI_Isend(buffer_Send[0], 36, MPI_DOUBLE, neighbor_remoteRank[0][0], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][0]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[0]);
reqOutstanding_Send[0] = true;
}
if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) {
MPI_Isend(buffer_Send[1], 36, MPI_DOUBLE, neighbor_remoteRank[0][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]);
reqOutstanding_Send[1] = true;
}
}
}
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) {
MPI_Irecv(buffer_Recv[0], 36, MPI_DOUBLE, neighbor_remoteRank[0][0], ((unsigned int)(neighbor_fragCommId[0][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]);
reqOutstanding_Recv[0] = true;
}
if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) {
MPI_Irecv(buffer_Recv[1], 36, MPI_DOUBLE, neighbor_remoteRank[0][1], ((unsigned int)(neighbor_fragCommId[0][1]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[1]);
reqOutstanding_Recv[1] = true;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if (reqOutstanding_Recv[0]) {
waitForMPIReq(&mpiRequest_Recv[0]);
reqOutstanding_Recv[0] = false;
}
if (reqOutstanding_Recv[1]) {
waitForMPIReq(&mpiRequest_Recv[1]);
reqOutstanding_Recv[1] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) {
/* Statements in this Scop: S1098 */
for (int i0 = 0; (i0<=8); i0 += 1) {
double* buffer_Recv_0_p1 = (&buffer_Recv[0][(i0*4)]);
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][(i0*24)]);
int i1 = 1;
for (; (i1<=3); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)-5)] = buffer_Recv_0_p1[(i1-1)];
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+1)] = buffer_Recv_0_p1[i1];
}
for (; (i1<=4); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)-5)] = buffer_Recv_0_p1[(i1-1)];
}
}
}
if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) {
/* Statements in this Scop: S1099 */
for (int i0 = 0; (i0<=8); i0 += 1) {
double* buffer_Recv_1_p1 = (&buffer_Recv[1][(i0*4)]);
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][(i0*24)]);
int i1 = 4;
for (; (i1<=6); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)-20)] = buffer_Recv_1_p1[(i1-4)];
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)-14)] = buffer_Recv_1_p1[(i1-3)];
}
for (; (i1<=7); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)-20)] = buffer_Recv_1_p1[(i1-4)];
}
}
}
}
}
;
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if (reqOutstanding_Send[0]) {
waitForMPIReq(&mpiRequest_Send[0]);
reqOutstanding_Send[0] = false;
}
if (reqOutstanding_Send[1]) {
waitForMPIReq(&mpiRequest_Send[1]);
reqOutstanding_Send[1] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
;
;
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][2]&&neighbor_isRemote[0][2])) {
MPI_Isend(&fieldData_LaplaceCoeff_GMRF[0][13], 1, mpiDatatype_9_4_24, neighbor_remoteRank[0][2], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][2]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[2]);
reqOutstanding_Send[2] = true;
}
if ((neighbor_isValid[0][3]&&neighbor_isRemote[0][3])) {
MPI_Isend(&fieldData_LaplaceCoeff_GMRF[0][7], 1, mpiDatatype_9_4_24, neighbor_remoteRank[0][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]);
reqOutstanding_Send[3] = true;
}
}
}
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][2]&&neighbor_isRemote[0][2])) {
MPI_Irecv(&fieldData_LaplaceCoeff_GMRF[0][1], 1, mpiDatatype_9_4_24, neighbor_remoteRank[0][2], ((unsigned int)(neighbor_fragCommId[0][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]);
reqOutstanding_Recv[2] = true;
}
if ((neighbor_isValid[0][3]&&neighbor_isRemote[0][3])) {
MPI_Irecv(&fieldData_LaplaceCoeff_GMRF[0][19], 1, mpiDatatype_9_4_24, neighbor_remoteRank[0][3], ((unsigned int)(neighbor_fragCommId[0][3]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[3]);
reqOutstanding_Recv[3] = true;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if (reqOutstanding_Recv[2]) {
waitForMPIReq(&mpiRequest_Recv[2]);
reqOutstanding_Recv[2] = false;
}
if (reqOutstanding_Recv[3]) {
waitForMPIReq(&mpiRequest_Recv[3]);
reqOutstanding_Recv[3] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
;
;
}
}
;
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if (reqOutstanding_Send[2]) {
waitForMPIReq(&mpiRequest_Send[2]);
reqOutstanding_Send[2] = false;
}
if (reqOutstanding_Send[3]) {
waitForMPIReq(&mpiRequest_Send[3]);
reqOutstanding_Send[3] = false;
}
}
}
}
Exemplo n.º 13
0
int 
calc_gb_rad_still_sse2_double(t_commrec *cr, t_forcerec *fr,
                              int natoms, gmx_localtop_t *top,
                              const t_atomtypes *atype, double *x, t_nblist *nl,
                              gmx_genborn_t *born)
{
	int i,k,n,ii,is3,ii3,nj0,nj1,offset;
	int jnrA,jnrB,j3A,j3B;
    int *mdtype;
	double shX,shY,shZ;
    int *jjnr;
    double *shiftvec;
    
	double gpi_ai,gpi2;
	double factor;
	double *gb_radius;
    double *vsolv;
    double *work;
    double *dadx;
    
	__m128d ix,iy,iz;
	__m128d jx,jy,jz;
	__m128d dx,dy,dz;
	__m128d tx,ty,tz;
	__m128d rsq,rinv,rinv2,rinv4,rinv6;
	__m128d ratio,gpi,rai,raj,vai,vaj,rvdw;
	__m128d ccf,dccf,theta,cosq,term,sinq,res,prod,prod_ai,tmp;
	__m128d mask,icf4,icf6,mask_cmp;
	    
	const __m128d half   = _mm_set1_pd(0.5);
	const __m128d three  = _mm_set1_pd(3.0);
	const __m128d one    = _mm_set1_pd(1.0);
	const __m128d two    = _mm_set1_pd(2.0);
	const __m128d zero   = _mm_set1_pd(0.0);
	const __m128d four   = _mm_set1_pd(4.0);
	
	const __m128d still_p5inv  = _mm_set1_pd(STILL_P5INV);
	const __m128d still_pip5   = _mm_set1_pd(STILL_PIP5);
	const __m128d still_p4     = _mm_set1_pd(STILL_P4);
    
	factor  = 0.5 * ONE_4PI_EPS0;
    
    gb_radius = born->gb_radius;
    vsolv     = born->vsolv;
    work      = born->gpol_still_work;
	jjnr      = nl->jjnr;
    shiftvec  = fr->shift_vec[0];
    dadx      = fr->dadx;
    
	jnrA = jnrB = 0;
    jx = _mm_setzero_pd();
    jy = _mm_setzero_pd();
    jz = _mm_setzero_pd();
    
	n = 0;
    
	for(i=0;i<natoms;i++)
	{
		work[i]=0;
	}
    
	for(i=0;i<nl->nri;i++)
	{
        ii     = nl->iinr[i];
		ii3	   = ii*3;
        is3    = 3*nl->shift[i];     
        shX    = shiftvec[is3];  
        shY    = shiftvec[is3+1];
        shZ    = shiftvec[is3+2];
        nj0    = nl->jindex[i];      
        nj1    = nl->jindex[i+1];    
        
        ix     = _mm_set1_pd(shX+x[ii3+0]);
		iy     = _mm_set1_pd(shY+x[ii3+1]);
		iz     = _mm_set1_pd(shZ+x[ii3+2]);
		

		/* Polarization energy for atom ai */
		gpi    = _mm_setzero_pd();
		
        rai     = _mm_load1_pd(gb_radius+ii);
        prod_ai = _mm_set1_pd(STILL_P4*vsolv[ii]);

		for(k=nj0;k<nj1-1;k+=2)
		{
			jnrA        = jjnr[k];   
			jnrB        = jjnr[k+1];
            
            j3A         = 3*jnrA;  
			j3B         = 3*jnrB;
            
            GMX_MM_LOAD_1RVEC_2POINTERS_PD(x+j3A,x+j3B,jx,jy,jz);
            
            GMX_MM_LOAD_2VALUES_PD(gb_radius+jnrA,gb_radius+jnrB,raj);
			GMX_MM_LOAD_2VALUES_PD(vsolv+jnrA,vsolv+jnrB,vaj);
            
			dx          = _mm_sub_pd(ix,jx);
			dy          = _mm_sub_pd(iy,jy);
			dz          = _mm_sub_pd(iz,jz);
            
            rsq         = gmx_mm_calc_rsq_pd(dx,dy,dz);
            rinv        = gmx_mm_invsqrt_pd(rsq);
            rinv2       = _mm_mul_pd(rinv,rinv);
            rinv4       = _mm_mul_pd(rinv2,rinv2);
            rinv6       = _mm_mul_pd(rinv4,rinv2);
            
            rvdw        = _mm_add_pd(rai,raj);
            ratio       = _mm_mul_pd(rsq, gmx_mm_inv_pd( _mm_mul_pd(rvdw,rvdw)));
            
            mask_cmp    = _mm_cmple_pd(ratio,still_p5inv);

            /* gmx_mm_sincos_pd() is quite expensive, so avoid calculating it if we can! */
            if( 0 == _mm_movemask_pd(mask_cmp) )
            {
                /* if ratio>still_p5inv for ALL elements */
                ccf         = one;
                dccf        = _mm_setzero_pd();
            }
            else 
            {
                ratio       = _mm_min_pd(ratio,still_p5inv);
                theta       = _mm_mul_pd(ratio,still_pip5);
                gmx_mm_sincos_pd(theta,&sinq,&cosq);
                term        = _mm_mul_pd(half,_mm_sub_pd(one,cosq));
                ccf         = _mm_mul_pd(term,term);
                dccf        = _mm_mul_pd(_mm_mul_pd(two,term),
                                         _mm_mul_pd(sinq,theta));
            }

            prod        = _mm_mul_pd(still_p4,vaj);
            icf4        = _mm_mul_pd(ccf,rinv4);
            icf6        = _mm_mul_pd( _mm_sub_pd( _mm_mul_pd(four,ccf),dccf), rinv6);
                        
            GMX_MM_INCREMENT_2VALUES_PD(work+jnrA,work+jnrB,_mm_mul_pd(prod_ai,icf4));
            
            gpi           = _mm_add_pd(gpi, _mm_mul_pd(prod,icf4) );
            
            _mm_store_pd(dadx,_mm_mul_pd(prod,icf6));
            dadx+=2;
            _mm_store_pd(dadx,_mm_mul_pd(prod_ai,icf6));
            dadx+=2;
		} 
        
        if(k<nj1)
		{
			jnrA        = jjnr[k];   
            
            j3A         = 3*jnrA;  
            
            GMX_MM_LOAD_1RVEC_1POINTER_PD(x+j3A,jx,jy,jz);
            
            GMX_MM_LOAD_1VALUE_PD(gb_radius+jnrA,raj);
			GMX_MM_LOAD_1VALUE_PD(vsolv+jnrA,vaj);
            
			dx          = _mm_sub_sd(ix,jx);
			dy          = _mm_sub_sd(iy,jy);
			dz          = _mm_sub_sd(iz,jz);
            
            rsq         = gmx_mm_calc_rsq_pd(dx,dy,dz);
            rinv        = gmx_mm_invsqrt_pd(rsq);
            rinv2       = _mm_mul_sd(rinv,rinv);
            rinv4       = _mm_mul_sd(rinv2,rinv2);
            rinv6       = _mm_mul_sd(rinv4,rinv2);
            
            rvdw        = _mm_add_sd(rai,raj);
            ratio       = _mm_mul_sd(rsq, gmx_mm_inv_pd( _mm_mul_pd(rvdw,rvdw)));
            
            mask_cmp    = _mm_cmple_sd(ratio,still_p5inv);
            
            /* gmx_mm_sincos_pd() is quite expensive, so avoid calculating it if we can! */
            if( 0 == _mm_movemask_pd(mask_cmp) )
            {
                /* if ratio>still_p5inv for ALL elements */
                ccf         = one;
                dccf        = _mm_setzero_pd();
            }
            else 
            {
                ratio       = _mm_min_sd(ratio,still_p5inv);
                theta       = _mm_mul_sd(ratio,still_pip5);
                gmx_mm_sincos_pd(theta,&sinq,&cosq);
                term        = _mm_mul_sd(half,_mm_sub_sd(one,cosq));
                ccf         = _mm_mul_sd(term,term);
                dccf        = _mm_mul_sd(_mm_mul_sd(two,term),
                                         _mm_mul_sd(sinq,theta));
            }
            
            prod        = _mm_mul_sd(still_p4,vaj);
            icf4        = _mm_mul_sd(ccf,rinv4);
            icf6        = _mm_mul_sd( _mm_sub_sd( _mm_mul_sd(four,ccf),dccf), rinv6);

            GMX_MM_INCREMENT_1VALUE_PD(work+jnrA,_mm_mul_sd(prod_ai,icf4));
            
            gpi           = _mm_add_sd(gpi, _mm_mul_sd(prod,icf4) );
            
            _mm_store_pd(dadx,_mm_mul_pd(prod,icf6));
            dadx+=2;
            _mm_store_pd(dadx,_mm_mul_pd(prod_ai,icf6));
            dadx+=2;
		} 
        gmx_mm_update_1pot_pd(gpi,work+ii);
	}
    
	/* Sum up the polarization energy from other nodes */
	if(PARTDECOMP(cr))
	{
		gmx_sum(natoms, work, cr);
	}
	else if(DOMAINDECOMP(cr))
	{
		dd_atom_sum_real(cr->dd, work);
	}
	
	/* Compute the radii */
	for(i=0;i<fr->natoms_force;i++) /* PELA born->nr */
	{		
		if(born->use[i] != 0)
		{
			gpi_ai           = born->gpol[i] + work[i]; /* add gpi to the initial pol energy gpi_ai*/
			gpi2             = gpi_ai * gpi_ai;
			born->bRad[i]   = factor*gmx_invsqrt(gpi2);
			fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
		}
	}
    
	/* Extra (local) communication required for DD */
	if(DOMAINDECOMP(cr))
	{
		dd_atom_spread_real(cr->dd, born->bRad);
		dd_atom_spread_real(cr->dd, fr->invsqrta);
	}
    
	return 0;	
}
Exemplo n.º 14
0
void exchlaplacecoeffData_2(unsigned int slot) {
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((!neighbor_isValid[1][0])) {
{
double xPos;
double yPos;
/* Statements in this Scop: S710, S704, S707, S701, S709, S700, S703, S706, S708, S702, S705 */
{
{
{
{
{
{
{
{
{
{
{
double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]);
int i1 = 1;
for (; (i1<=4); i1 += 2) {
fieldData_LaplaceCoeff_2_p1[((i1*8)+394)] = 0.000000e+00;
fieldData_LaplaceCoeff_2_p1[((i1*8)+402)] = 0.000000e+00;
}
for (; (i1<=5); i1 += 1) {
fieldData_LaplaceCoeff_2_p1[((i1*8)+394)] = 0.000000e+00;
}
}
{
double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]);
int i1 = 1;
for (; (i1<=4); i1 += 2) {
fieldData_LaplaceCoeff_2_p1[((i1*8)+226)] = 0.000000e+00;
fieldData_LaplaceCoeff_2_p1[((i1*8)+234)] = 0.000000e+00;
}
for (; (i1<=5); i1 += 1) {
fieldData_LaplaceCoeff_2_p1[((i1*8)+226)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]);
int i1 = 1;
for (; (i1<=4); i1 += 2) {
fieldData_LaplaceCoeff_2_p1[((i1*8)+170)] = 0.000000e+00;
fieldData_LaplaceCoeff_2_p1[((i1*8)+178)] = 0.000000e+00;
}
for (; (i1<=5); i1 += 1) {
fieldData_LaplaceCoeff_2_p1[((i1*8)+170)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]);
int i1 = 1;
for (; (i1<=4); i1 += 2) {
fieldData_LaplaceCoeff_2_p1[((i1*8)+58)] = 0.000000e+00;
fieldData_LaplaceCoeff_2_p1[((i1*8)+66)] = 0.000000e+00;
}
for (; (i1<=5); i1 += 1) {
fieldData_LaplaceCoeff_2_p1[((i1*8)+58)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]);
int i1 = 1;
for (; (i1<=4); i1 += 2) {
fieldData_LaplaceCoeff_2_p1[((i1*8)+450)] = 0.000000e+00;
fieldData_LaplaceCoeff_2_p1[((i1*8)+458)] = 0.000000e+00;
}
for (; (i1<=5); i1 += 1) {
fieldData_LaplaceCoeff_2_p1[((i1*8)+450)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]);
int i1 = 1;
for (; (i1<=4); i1 += 2) {
fieldData_LaplaceCoeff_2_p1[((i1*8)+114)] = 0.000000e+00;
fieldData_LaplaceCoeff_2_p1[((i1*8)+122)] = 0.000000e+00;
}
for (; (i1<=5); i1 += 1) {
fieldData_LaplaceCoeff_2_p1[((i1*8)+114)] = 0.000000e+00;
}
}
}
{
int i1 = 1;
for (; (i1<(2&(~1))); i1 += 1) {
yPos = ((((i1-1)/4.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]);
}
__m128d vec1 = _mm_set1_pd(1.000000e+00);
__m128d vec2 = _mm_set1_pd(4.000000e+00);
__m128d vec5 = _mm_set1_pd(yPos);
for (; (i1<3); i1 += 4) {
/* yPos = ((((i1-1)/4.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); */
__m128d vec0 = _mm_set_pd(i1+1,i1);
__m128d vec0_2 = _mm_set_pd(i1+1,i1);
__m128d vec3 = _mm_load1_pd((&posEnd[1]));
__m128d vec3_2 = _mm_load1_pd((&posEnd[1]));
__m128d vec4 = _mm_load1_pd((&posBegin[1]));
__m128d vec4_2 = _mm_load1_pd((&posBegin[1]));
vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4);
vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2);
}
for (; (i1<6); i1 += 1) {
yPos = ((((i1-1)/4.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]);
}
}
}
{
double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]);
int i1 = 1;
for (; (i1<=4); i1 += 2) {
fieldData_LaplaceCoeff_2_p1[((i1*8)+2)] = 0.000000e+00;
fieldData_LaplaceCoeff_2_p1[((i1*8)+10)] = 0.000000e+00;
}
for (; (i1<=5); i1 += 1) {
fieldData_LaplaceCoeff_2_p1[((i1*8)+2)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]);
int i1 = 1;
for (; (i1<=4); i1 += 2) {
fieldData_LaplaceCoeff_2_p1[((i1*8)+338)] = 0.000000e+00;
fieldData_LaplaceCoeff_2_p1[((i1*8)+346)] = 0.000000e+00;
}
for (; (i1<=5); i1 += 1) {
fieldData_LaplaceCoeff_2_p1[((i1*8)+338)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]);
int i1 = 1;
for (; (i1<=4); i1 += 2) {
fieldData_LaplaceCoeff_2_p1[((i1*8)+282)] = 0.000000e+00;
fieldData_LaplaceCoeff_2_p1[((i1*8)+290)] = 0.000000e+00;
}
for (; (i1<=5); i1 += 1) {
fieldData_LaplaceCoeff_2_p1[((i1*8)+282)] = 0.000000e+00;
}
}
}
{
int i1 = 1;
for (; (i1<(2&(~1))); i1 += 1) {
xPos = posBegin[0];
}
__m128d vec1 = _mm_set1_pd(xPos);
for (; (i1<3); i1 += 4) {
/* xPos = posBegin[0]; */
__m128d vec0 = _mm_load1_pd((&posBegin[0]));
__m128d vec0_2 = _mm_load1_pd((&posBegin[0]));
vec1 = vec0;
vec1 = vec0_2;
}
for (; (i1<6); i1 += 1) {
xPos = posBegin[0];
}
}
}
}
}
if ((!neighbor_isValid[1][1])) {
{
double xPos;
double yPos;
/* Statements in this Scop: S716, S719, S713, S721, S715, S718, S712, S711, S720, S714, S717 */
{
{
{
{
{
{
{
{
{
{
{
int i1 = 1;
for (; (i1<(2&(~1))); i1 += 1) {
yPos = ((((i1-1)/4.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]);
}
__m128d vec1 = _mm_set1_pd(1.000000e+00);
__m128d vec2 = _mm_set1_pd(4.000000e+00);
__m128d vec5 = _mm_set1_pd(yPos);
for (; (i1<3); i1 += 4) {
/* yPos = ((((i1-1)/4.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); */
__m128d vec0 = _mm_set_pd(i1+1,i1);
__m128d vec0_2 = _mm_set_pd(i1+1,i1);
__m128d vec3 = _mm_load1_pd((&posEnd[1]));
__m128d vec3_2 = _mm_load1_pd((&posEnd[1]));
__m128d vec4 = _mm_load1_pd((&posBegin[1]));
__m128d vec4_2 = _mm_load1_pd((&posBegin[1]));
vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4);
vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2);
}
for (; (i1<6); i1 += 1) {
yPos = ((((i1-1)/4.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]);
}
}
{
double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]);
int i1 = 1;
for (; (i1<=4); i1 += 2) {
fieldData_LaplaceCoeff_2_p1[((i1*8)+454)] = 0.000000e+00;
fieldData_LaplaceCoeff_2_p1[((i1*8)+462)] = 0.000000e+00;
}
for (; (i1<=5); i1 += 1) {
fieldData_LaplaceCoeff_2_p1[((i1*8)+454)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]);
int i1 = 1;
for (; (i1<=4); i1 += 2) {
fieldData_LaplaceCoeff_2_p1[((i1*8)+230)] = 0.000000e+00;
fieldData_LaplaceCoeff_2_p1[((i1*8)+238)] = 0.000000e+00;
}
for (; (i1<=5); i1 += 1) {
fieldData_LaplaceCoeff_2_p1[((i1*8)+230)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]);
int i1 = 1;
for (; (i1<=4); i1 += 2) {
fieldData_LaplaceCoeff_2_p1[((i1*8)+118)] = 0.000000e+00;
fieldData_LaplaceCoeff_2_p1[((i1*8)+126)] = 0.000000e+00;
}
for (; (i1<=5); i1 += 1) {
fieldData_LaplaceCoeff_2_p1[((i1*8)+118)] = 0.000000e+00;
}
}
}
{
int i1 = 1;
for (; (i1<(2&(~1))); i1 += 1) {
xPos = posEnd[0];
}
__m128d vec1 = _mm_set1_pd(xPos);
for (; (i1<3); i1 += 4) {
/* xPos = posEnd[0]; */
__m128d vec0 = _mm_load1_pd((&posEnd[0]));
__m128d vec0_2 = _mm_load1_pd((&posEnd[0]));
vec1 = vec0;
vec1 = vec0_2;
}
for (; (i1<6); i1 += 1) {
xPos = posEnd[0];
}
}
}
{
double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]);
int i1 = 1;
for (; (i1<=4); i1 += 2) {
fieldData_LaplaceCoeff_2_p1[((i1*8)+286)] = 0.000000e+00;
fieldData_LaplaceCoeff_2_p1[((i1*8)+294)] = 0.000000e+00;
}
for (; (i1<=5); i1 += 1) {
fieldData_LaplaceCoeff_2_p1[((i1*8)+286)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]);
int i1 = 1;
for (; (i1<=4); i1 += 2) {
fieldData_LaplaceCoeff_2_p1[((i1*8)+342)] = 0.000000e+00;
fieldData_LaplaceCoeff_2_p1[((i1*8)+350)] = 0.000000e+00;
}
for (; (i1<=5); i1 += 1) {
fieldData_LaplaceCoeff_2_p1[((i1*8)+342)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]);
int i1 = 1;
for (; (i1<=4); i1 += 2) {
fieldData_LaplaceCoeff_2_p1[((i1*8)+398)] = 0.000000e+00;
fieldData_LaplaceCoeff_2_p1[((i1*8)+406)] = 0.000000e+00;
}
for (; (i1<=5); i1 += 1) {
fieldData_LaplaceCoeff_2_p1[((i1*8)+398)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]);
int i1 = 1;
for (; (i1<=4); i1 += 2) {
fieldData_LaplaceCoeff_2_p1[((i1*8)+174)] = 0.000000e+00;
fieldData_LaplaceCoeff_2_p1[((i1*8)+182)] = 0.000000e+00;
}
for (; (i1<=5); i1 += 1) {
fieldData_LaplaceCoeff_2_p1[((i1*8)+174)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]);
int i1 = 1;
for (; (i1<=4); i1 += 2) {
fieldData_LaplaceCoeff_2_p1[((i1*8)+62)] = 0.000000e+00;
fieldData_LaplaceCoeff_2_p1[((i1*8)+70)] = 0.000000e+00;
}
for (; (i1<=5); i1 += 1) {
fieldData_LaplaceCoeff_2_p1[((i1*8)+62)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]);
int i1 = 1;
for (; (i1<=4); i1 += 2) {
fieldData_LaplaceCoeff_2_p1[((i1*8)+6)] = 0.000000e+00;
fieldData_LaplaceCoeff_2_p1[((i1*8)+14)] = 0.000000e+00;
}
for (; (i1<=5); i1 += 1) {
fieldData_LaplaceCoeff_2_p1[((i1*8)+6)] = 0.000000e+00;
}
}
}
}
}
if ((!neighbor_isValid[1][2])) {
{
double xPos;
double yPos;
/* Statements in this Scop: S722, S731, S725, S728, S727, S730, S724, S732, S726, S729, S723 */
{
{
{
{
{
{
{
{
{
{
{
double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]);
int i2 = 2;
for (; (i2<=5); i2 += 2) {
fieldData_LaplaceCoeff_2_p1[(i2+344)] = 0.000000e+00;
fieldData_LaplaceCoeff_2_p1[(i2+345)] = 0.000000e+00;
}
for (; (i2<=6); i2 += 1) {
fieldData_LaplaceCoeff_2_p1[(i2+344)] = 0.000000e+00;
}
}
{
double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]);
int i2 = 2;
for (; (i2<=5); i2 += 2) {
fieldData_LaplaceCoeff_2_p1[(i2+400)] = 0.000000e+00;
fieldData_LaplaceCoeff_2_p1[(i2+401)] = 0.000000e+00;
}
for (; (i2<=6); i2 += 1) {
fieldData_LaplaceCoeff_2_p1[(i2+400)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]);
int i2 = 2;
for (; (i2<=5); i2 += 2) {
fieldData_LaplaceCoeff_2_p1[(i2+120)] = 0.000000e+00;
fieldData_LaplaceCoeff_2_p1[(i2+121)] = 0.000000e+00;
}
for (; (i2<=6); i2 += 1) {
fieldData_LaplaceCoeff_2_p1[(i2+120)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]);
int i2 = 2;
for (; (i2<=5); i2 += 2) {
fieldData_LaplaceCoeff_2_p1[(i2+8)] = 0.000000e+00;
fieldData_LaplaceCoeff_2_p1[(i2+9)] = 0.000000e+00;
}
for (; (i2<=6); i2 += 1) {
fieldData_LaplaceCoeff_2_p1[(i2+8)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]);
int i2 = 2;
for (; (i2<=5); i2 += 2) {
fieldData_LaplaceCoeff_2_p1[(i2+64)] = 0.000000e+00;
fieldData_LaplaceCoeff_2_p1[(i2+65)] = 0.000000e+00;
}
for (; (i2<=6); i2 += 1) {
fieldData_LaplaceCoeff_2_p1[(i2+64)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]);
int i2 = 2;
for (; (i2<=5); i2 += 2) {
fieldData_LaplaceCoeff_2_p1[(i2+456)] = 0.000000e+00;
fieldData_LaplaceCoeff_2_p1[(i2+457)] = 0.000000e+00;
}
for (; (i2<=6); i2 += 1) {
fieldData_LaplaceCoeff_2_p1[(i2+456)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]);
int i2 = 2;
for (; (i2<=5); i2 += 2) {
fieldData_LaplaceCoeff_2_p1[(i2+232)] = 0.000000e+00;
fieldData_LaplaceCoeff_2_p1[(i2+233)] = 0.000000e+00;
}
for (; (i2<=6); i2 += 1) {
fieldData_LaplaceCoeff_2_p1[(i2+232)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]);
int i2 = 2;
for (; (i2<=5); i2 += 2) {
fieldData_LaplaceCoeff_2_p1[(i2+288)] = 0.000000e+00;
fieldData_LaplaceCoeff_2_p1[(i2+289)] = 0.000000e+00;
}
for (; (i2<=6); i2 += 1) {
fieldData_LaplaceCoeff_2_p1[(i2+288)] = 0.000000e+00;
}
}
}
{
int i2 = 2;
for (; (i2<=5); i2 += 2) {
xPos = ((((i2-2)/4.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]);
xPos = ((((i2-1)/4.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]);
}
for (; (i2<=6); i2 += 1) {
xPos = ((((i2-2)/4.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]);
}
}
}
{
double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]);
int i2 = 2;
for (; (i2<=5); i2 += 2) {
fieldData_LaplaceCoeff_2_p1[(i2+176)] = 0.000000e+00;
fieldData_LaplaceCoeff_2_p1[(i2+177)] = 0.000000e+00;
}
for (; (i2<=6); i2 += 1) {
fieldData_LaplaceCoeff_2_p1[(i2+176)] = 0.000000e+00;
}
}
}
{
int i2 = 2;
for (; (i2<=5); i2 += 2) {
yPos = posBegin[1];
yPos = posBegin[1];
}
for (; (i2<=6); i2 += 1) {
yPos = posBegin[1];
}
}
}
}
}
if ((!neighbor_isValid[1][3])) {
{
double xPos;
double yPos;
/* Statements in this Scop: S743, S737, S733, S742, S736, S739, S738, S741, S735, S740, S734 */
{
{
{
{
{
{
{
{
{
{
{
int i2 = 2;
for (; (i2<=5); i2 += 2) {
yPos = posEnd[1];
yPos = posEnd[1];
}
for (; (i2<=6); i2 += 1) {
yPos = posEnd[1];
}
}
{
double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]);
int i2 = 2;
for (; (i2<=5); i2 += 2) {
fieldData_LaplaceCoeff_2_p1[(i2+376)] = 0.000000e+00;
fieldData_LaplaceCoeff_2_p1[(i2+377)] = 0.000000e+00;
}
for (; (i2<=6); i2 += 1) {
fieldData_LaplaceCoeff_2_p1[(i2+376)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]);
int i2 = 2;
for (; (i2<=5); i2 += 2) {
fieldData_LaplaceCoeff_2_p1[(i2+488)] = 0.000000e+00;
fieldData_LaplaceCoeff_2_p1[(i2+489)] = 0.000000e+00;
}
for (; (i2<=6); i2 += 1) {
fieldData_LaplaceCoeff_2_p1[(i2+488)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]);
int i2 = 2;
for (; (i2<=5); i2 += 2) {
fieldData_LaplaceCoeff_2_p1[(i2+40)] = 0.000000e+00;
fieldData_LaplaceCoeff_2_p1[(i2+41)] = 0.000000e+00;
}
for (; (i2<=6); i2 += 1) {
fieldData_LaplaceCoeff_2_p1[(i2+40)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]);
int i2 = 2;
for (; (i2<=5); i2 += 2) {
fieldData_LaplaceCoeff_2_p1[(i2+208)] = 0.000000e+00;
fieldData_LaplaceCoeff_2_p1[(i2+209)] = 0.000000e+00;
}
for (; (i2<=6); i2 += 1) {
fieldData_LaplaceCoeff_2_p1[(i2+208)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]);
int i2 = 2;
for (; (i2<=5); i2 += 2) {
fieldData_LaplaceCoeff_2_p1[(i2+152)] = 0.000000e+00;
fieldData_LaplaceCoeff_2_p1[(i2+153)] = 0.000000e+00;
}
for (; (i2<=6); i2 += 1) {
fieldData_LaplaceCoeff_2_p1[(i2+152)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]);
int i2 = 2;
for (; (i2<=5); i2 += 2) {
fieldData_LaplaceCoeff_2_p1[(i2+320)] = 0.000000e+00;
fieldData_LaplaceCoeff_2_p1[(i2+321)] = 0.000000e+00;
}
for (; (i2<=6); i2 += 1) {
fieldData_LaplaceCoeff_2_p1[(i2+320)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]);
int i2 = 2;
for (; (i2<=5); i2 += 2) {
fieldData_LaplaceCoeff_2_p1[(i2+432)] = 0.000000e+00;
fieldData_LaplaceCoeff_2_p1[(i2+433)] = 0.000000e+00;
}
for (; (i2<=6); i2 += 1) {
fieldData_LaplaceCoeff_2_p1[(i2+432)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]);
int i2 = 2;
for (; (i2<=5); i2 += 2) {
fieldData_LaplaceCoeff_2_p1[(i2+96)] = 0.000000e+00;
fieldData_LaplaceCoeff_2_p1[(i2+97)] = 0.000000e+00;
}
for (; (i2<=6); i2 += 1) {
fieldData_LaplaceCoeff_2_p1[(i2+96)] = 0.000000e+00;
}
}
}
{
int i2 = 2;
for (; (i2<=5); i2 += 2) {
xPos = ((((i2-2)/4.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]);
xPos = ((((i2-1)/4.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]);
}
for (; (i2<=6); i2 += 1) {
xPos = ((((i2-2)/4.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]);
}
}
}
{
double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]);
int i2 = 2;
for (; (i2<=5); i2 += 2) {
fieldData_LaplaceCoeff_2_p1[(i2+264)] = 0.000000e+00;
fieldData_LaplaceCoeff_2_p1[(i2+265)] = 0.000000e+00;
}
for (; (i2<=6); i2 += 1) {
fieldData_LaplaceCoeff_2_p1[(i2+264)] = 0.000000e+00;
}
}
}
}
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) {
/* Statements in this Scop: S744 */
for (int i0 = 0; (i0<=8); i0 += 1) {
double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][(i0*56)]);
double* buffer_Send_1_p1 = (&buffer_Send[1][(i0*5)]);
int i1 = 1;
for (; (i1<=4); i1 += 2) {
buffer_Send_1_p1[(i1-1)] = fieldData_LaplaceCoeff_2_p1[((i1*8)+6)];
buffer_Send_1_p1[i1] = fieldData_LaplaceCoeff_2_p1[((i1*8)+14)];
}
for (; (i1<=5); i1 += 1) {
buffer_Send_1_p1[(i1-1)] = fieldData_LaplaceCoeff_2_p1[((i1*8)+6)];
}
}
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) {
MPI_Isend(buffer_Send[1], 45, MPI_DOUBLE, neighbor_remoteRank[1][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]);
reqOutstanding_Send[1] = true;
}
}
}
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) {
MPI_Irecv(buffer_Recv[0], 45, MPI_DOUBLE, neighbor_remoteRank[1][0], ((unsigned int)(neighbor_fragCommId[1][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]);
reqOutstanding_Recv[0] = true;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if (reqOutstanding_Recv[0]) {
waitForMPIReq(&mpiRequest_Recv[0]);
reqOutstanding_Recv[0] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) {
/* Statements in this Scop: S745 */
for (int i0 = 0; (i0<=8); i0 += 1) {
double* buffer_Recv_0_p1 = (&buffer_Recv[0][(i0*5)]);
double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][(i0*56)]);
int i1 = 3;
for (; (i1<=6); i1 += 2) {
fieldData_LaplaceCoeff_2_p1[((i1*8)-14)] = buffer_Recv_0_p1[(i1-3)];
fieldData_LaplaceCoeff_2_p1[((i1*8)-6)] = buffer_Recv_0_p1[(i1-2)];
}
for (; (i1<=7); i1 += 1) {
fieldData_LaplaceCoeff_2_p1[((i1*8)-14)] = buffer_Recv_0_p1[(i1-3)];
}
}
}
}
}
;
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if (reqOutstanding_Send[1]) {
waitForMPIReq(&mpiRequest_Send[1]);
reqOutstanding_Send[1] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
;
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((neighbor_isValid[1][3]&&neighbor_isRemote[1][3])) {
MPI_Isend(&fieldData_LaplaceCoeff[2][42], 1, mpiDatatype_9_5_56, neighbor_remoteRank[1][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]);
reqOutstanding_Send[3] = true;
}
}
}
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((neighbor_isValid[1][2]&&neighbor_isRemote[1][2])) {
MPI_Irecv(&fieldData_LaplaceCoeff[2][10], 1, mpiDatatype_9_5_56, neighbor_remoteRank[1][2], ((unsigned int)(neighbor_fragCommId[1][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]);
reqOutstanding_Recv[2] = true;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if (reqOutstanding_Recv[2]) {
waitForMPIReq(&mpiRequest_Recv[2]);
reqOutstanding_Recv[2] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
;
}
}
;
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if (reqOutstanding_Send[3]) {
waitForMPIReq(&mpiRequest_Send[3]);
reqOutstanding_Send[3] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) {
/* Statements in this Scop: S746 */
for (int i0 = 0; (i0<=8); i0 += 1) {
double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][(i0*56)]);
double* buffer_Send_0_p1 = (&buffer_Send[0][(i0*7)]);
int i1 = 0;
for (; (i1<=5); i1 += 2) {
buffer_Send_0_p1[i1] = fieldData_LaplaceCoeff_2_p1[((i1*8)+3)];
buffer_Send_0_p1[(i1+1)] = fieldData_LaplaceCoeff_2_p1[((i1*8)+11)];
}
for (; (i1<=6); i1 += 1) {
buffer_Send_0_p1[i1] = fieldData_LaplaceCoeff_2_p1[((i1*8)+3)];
}
}
}
if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) {
/* Statements in this Scop: S747 */
for (int i0 = 0; (i0<=8); i0 += 1) {
double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][(i0*56)]);
double* buffer_Send_1_p1 = (&buffer_Send[1][(i0*7)]);
int i1 = 0;
for (; (i1<=5); i1 += 2) {
buffer_Send_1_p1[i1] = fieldData_LaplaceCoeff_2_p1[((i1*8)+5)];
buffer_Send_1_p1[(i1+1)] = fieldData_LaplaceCoeff_2_p1[((i1*8)+13)];
}
for (; (i1<=6); i1 += 1) {
buffer_Send_1_p1[i1] = fieldData_LaplaceCoeff_2_p1[((i1*8)+5)];
}
}
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) {
MPI_Isend(buffer_Send[0], 63, MPI_DOUBLE, neighbor_remoteRank[1][0], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][0]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[0]);
reqOutstanding_Send[0] = true;
}
if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) {
MPI_Isend(buffer_Send[1], 63, MPI_DOUBLE, neighbor_remoteRank[1][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]);
reqOutstanding_Send[1] = true;
}
}
}
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) {
MPI_Irecv(buffer_Recv[0], 63, MPI_DOUBLE, neighbor_remoteRank[1][0], ((unsigned int)(neighbor_fragCommId[1][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]);
reqOutstanding_Recv[0] = true;
}
if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) {
MPI_Irecv(buffer_Recv[1], 63, MPI_DOUBLE, neighbor_remoteRank[1][1], ((unsigned int)(neighbor_fragCommId[1][1]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[1]);
reqOutstanding_Recv[1] = true;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if (reqOutstanding_Recv[0]) {
waitForMPIReq(&mpiRequest_Recv[0]);
reqOutstanding_Recv[0] = false;
}
if (reqOutstanding_Recv[1]) {
waitForMPIReq(&mpiRequest_Recv[1]);
reqOutstanding_Recv[1] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) {
/* Statements in this Scop: S748 */
for (int i0 = 0; (i0<=8); i0 += 1) {
double* buffer_Recv_0_p1 = (&buffer_Recv[0][(i0*7)]);
double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][(i0*56)]);
int i1 = 1;
for (; (i1<=6); i1 += 2) {
fieldData_LaplaceCoeff_2_p1[((i1*8)-7)] = buffer_Recv_0_p1[(i1-1)];
fieldData_LaplaceCoeff_2_p1[((i1*8)+1)] = buffer_Recv_0_p1[i1];
}
for (; (i1<=7); i1 += 1) {
fieldData_LaplaceCoeff_2_p1[((i1*8)-7)] = buffer_Recv_0_p1[(i1-1)];
}
}
}
if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) {
/* Statements in this Scop: S749 */
for (int i0 = 0; (i0<=8); i0 += 1) {
double* buffer_Recv_1_p1 = (&buffer_Recv[1][(i0*7)]);
double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][(i0*56)]);
int i1 = 7;
for (; (i1<=12); i1 += 2) {
fieldData_LaplaceCoeff_2_p1[((i1*8)-49)] = buffer_Recv_1_p1[(i1-7)];
fieldData_LaplaceCoeff_2_p1[((i1*8)-41)] = buffer_Recv_1_p1[(i1-6)];
}
for (; (i1<=13); i1 += 1) {
fieldData_LaplaceCoeff_2_p1[((i1*8)-49)] = buffer_Recv_1_p1[(i1-7)];
}
}
}
}
}
;
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if (reqOutstanding_Send[0]) {
waitForMPIReq(&mpiRequest_Send[0]);
reqOutstanding_Send[0] = false;
}
if (reqOutstanding_Send[1]) {
waitForMPIReq(&mpiRequest_Send[1]);
reqOutstanding_Send[1] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
;
;
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((neighbor_isValid[1][2]&&neighbor_isRemote[1][2])) {
MPI_Isend(&fieldData_LaplaceCoeff[2][17], 1, mpiDatatype_9_7_56, neighbor_remoteRank[1][2], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][2]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[2]);
reqOutstanding_Send[2] = true;
}
if ((neighbor_isValid[1][3]&&neighbor_isRemote[1][3])) {
MPI_Isend(&fieldData_LaplaceCoeff[2][33], 1, mpiDatatype_9_7_56, neighbor_remoteRank[1][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]);
reqOutstanding_Send[3] = true;
}
}
}
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((neighbor_isValid[1][2]&&neighbor_isRemote[1][2])) {
MPI_Irecv(&fieldData_LaplaceCoeff[2][1], 1, mpiDatatype_9_7_56, neighbor_remoteRank[1][2], ((unsigned int)(neighbor_fragCommId[1][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]);
reqOutstanding_Recv[2] = true;
}
if ((neighbor_isValid[1][3]&&neighbor_isRemote[1][3])) {
MPI_Irecv(&fieldData_LaplaceCoeff[2][49], 1, mpiDatatype_9_7_56, neighbor_remoteRank[1][3], ((unsigned int)(neighbor_fragCommId[1][3]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[3]);
reqOutstanding_Recv[3] = true;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if (reqOutstanding_Recv[2]) {
waitForMPIReq(&mpiRequest_Recv[2]);
reqOutstanding_Recv[2] = false;
}
if (reqOutstanding_Recv[3]) {
waitForMPIReq(&mpiRequest_Recv[3]);
reqOutstanding_Recv[3] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
;
;
}
}
;
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if (reqOutstanding_Send[2]) {
waitForMPIReq(&mpiRequest_Send[2]);
reqOutstanding_Send[2] = false;
}
if (reqOutstanding_Send[3]) {
waitForMPIReq(&mpiRequest_Send[3]);
reqOutstanding_Send[3] = false;
}
}
}
}
Exemplo n.º 15
0
void exchsolutionData_2(unsigned int slot) {
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((!neighbor_isValid[1][0])) {
{
double xPos;
double yPos;
/* Statements in this Scop: S397, S396, S398 */
{
{
{
int i1 = 1;
for (; (i1<(2&(~1))); i1 += 1) {
yPos = ((((i1-1)/4.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]);
}
__m128d vec1 = _mm_set1_pd(1.000000e+00);
__m128d vec2 = _mm_set1_pd(4.000000e+00);
__m128d vec5 = _mm_set1_pd(yPos);
for (; (i1<3); i1 += 4) {
/* yPos = ((((i1-1)/4.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); */
__m128d vec0 = _mm_set_pd(i1+1,i1);
__m128d vec0_2 = _mm_set_pd(i1+1,i1);
__m128d vec3 = _mm_load1_pd((&posEnd[1]));
__m128d vec3_2 = _mm_load1_pd((&posEnd[1]));
__m128d vec4 = _mm_load1_pd((&posBegin[1]));
__m128d vec4_2 = _mm_load1_pd((&posBegin[1]));
vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4);
vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2);
}
for (; (i1<6); i1 += 1) {
yPos = ((((i1-1)/4.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]);
}
}
{
double* fieldData_Solution_2_p1 = (&fieldData_Solution[2][0]);
int i1 = 1;
for (; (i1<=4); i1 += 2) {
fieldData_Solution_2_p1[((i1*8)+2)] = 0.000000e+00;
fieldData_Solution_2_p1[((i1*8)+10)] = 0.000000e+00;
}
for (; (i1<=5); i1 += 1) {
fieldData_Solution_2_p1[((i1*8)+2)] = 0.000000e+00;
}
}
}
{
int i1 = 1;
for (; (i1<(2&(~1))); i1 += 1) {
xPos = posBegin[0];
}
__m128d vec1 = _mm_set1_pd(xPos);
for (; (i1<3); i1 += 4) {
/* xPos = posBegin[0]; */
__m128d vec0 = _mm_load1_pd((&posBegin[0]));
__m128d vec0_2 = _mm_load1_pd((&posBegin[0]));
vec1 = vec0;
vec1 = vec0_2;
}
for (; (i1<6); i1 += 1) {
xPos = posBegin[0];
}
}
}
}
}
if ((!neighbor_isValid[1][1])) {
{
double xPos;
double yPos;
/* Statements in this Scop: S401, S400, S399 */
{
{
{
double* fieldData_Solution_2_p1 = (&fieldData_Solution[2][0]);
int i1 = 1;
for (; (i1<=4); i1 += 2) {
fieldData_Solution_2_p1[((i1*8)+6)] = 0.000000e+00;
fieldData_Solution_2_p1[((i1*8)+14)] = 0.000000e+00;
}
for (; (i1<=5); i1 += 1) {
fieldData_Solution_2_p1[((i1*8)+6)] = 0.000000e+00;
}
}
{
int i1 = 1;
for (; (i1<(2&(~1))); i1 += 1) {
xPos = posEnd[0];
}
__m128d vec1 = _mm_set1_pd(xPos);
for (; (i1<3); i1 += 4) {
/* xPos = posEnd[0]; */
__m128d vec0 = _mm_load1_pd((&posEnd[0]));
__m128d vec0_2 = _mm_load1_pd((&posEnd[0]));
vec1 = vec0;
vec1 = vec0_2;
}
for (; (i1<6); i1 += 1) {
xPos = posEnd[0];
}
}
}
{
int i1 = 1;
for (; (i1<(2&(~1))); i1 += 1) {
yPos = ((((i1-1)/4.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]);
}
__m128d vec1 = _mm_set1_pd(1.000000e+00);
__m128d vec2 = _mm_set1_pd(4.000000e+00);
__m128d vec5 = _mm_set1_pd(yPos);
for (; (i1<3); i1 += 4) {
/* yPos = ((((i1-1)/4.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); */
__m128d vec0 = _mm_set_pd(i1+1,i1);
__m128d vec0_2 = _mm_set_pd(i1+1,i1);
__m128d vec3 = _mm_load1_pd((&posEnd[1]));
__m128d vec3_2 = _mm_load1_pd((&posEnd[1]));
__m128d vec4 = _mm_load1_pd((&posBegin[1]));
__m128d vec4_2 = _mm_load1_pd((&posBegin[1]));
vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4);
vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2);
}
for (; (i1<6); i1 += 1) {
yPos = ((((i1-1)/4.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]);
}
}
}
}
}
if ((!neighbor_isValid[1][2])) {
{
double xPos;
double yPos;
/* Statements in this Scop: S404, S403, S402 */
{
{
{
double* fieldData_Solution_2_p1 = (&fieldData_Solution[2][0]);
int i2 = 2;
for (; (i2<=5); i2 += 2) {
fieldData_Solution_2_p1[(i2+8)] = 0.000000e+00;
fieldData_Solution_2_p1[(i2+9)] = 0.000000e+00;
}
for (; (i2<=6); i2 += 1) {
fieldData_Solution_2_p1[(i2+8)] = 0.000000e+00;
}
}
{
int i2 = 2;
for (; (i2<=5); i2 += 2) {
xPos = ((((i2-2)/4.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]);
xPos = ((((i2-1)/4.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]);
}
for (; (i2<=6); i2 += 1) {
xPos = ((((i2-2)/4.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]);
}
}
}
{
int i2 = 2;
for (; (i2<=5); i2 += 2) {
yPos = posBegin[1];
yPos = posBegin[1];
}
for (; (i2<=6); i2 += 1) {
yPos = posBegin[1];
}
}
}
}
}
if ((!neighbor_isValid[1][3])) {
{
double xPos;
double yPos;
/* Statements in this Scop: S407, S406, S405 */
{
{
{
double* fieldData_Solution_2_p1 = (&fieldData_Solution[2][0]);
int i2 = 2;
for (; (i2<=5); i2 += 2) {
fieldData_Solution_2_p1[(i2+40)] = 0.000000e+00;
fieldData_Solution_2_p1[(i2+41)] = 0.000000e+00;
}
for (; (i2<=6); i2 += 1) {
fieldData_Solution_2_p1[(i2+40)] = 0.000000e+00;
}
}
{
int i2 = 2;
for (; (i2<=5); i2 += 2) {
xPos = ((((i2-2)/4.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]);
xPos = ((((i2-1)/4.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]);
}
for (; (i2<=6); i2 += 1) {
xPos = ((((i2-2)/4.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]);
}
}
}
{
int i2 = 2;
for (; (i2<=5); i2 += 2) {
yPos = posEnd[1];
yPos = posEnd[1];
}
for (; (i2<=6); i2 += 1) {
yPos = posEnd[1];
}
}
}
}
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
;
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) {
MPI_Isend(&fieldData_Solution[2][14], 1, mpiDatatype_5_1_8, neighbor_remoteRank[1][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]);
reqOutstanding_Send[1] = true;
}
}
}
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) {
MPI_Irecv(&fieldData_Solution[2][10], 1, mpiDatatype_5_1_8, neighbor_remoteRank[1][0], ((unsigned int)(neighbor_fragCommId[1][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]);
reqOutstanding_Recv[0] = true;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if (reqOutstanding_Recv[0]) {
waitForMPIReq(&mpiRequest_Recv[0]);
reqOutstanding_Recv[0] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
;
}
}
;
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if (reqOutstanding_Send[1]) {
waitForMPIReq(&mpiRequest_Send[1]);
reqOutstanding_Send[1] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
;
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((neighbor_isValid[1][3]&&neighbor_isRemote[1][3])) {
MPI_Isend(&fieldData_Solution[2][42], 1, mpiDatatype_1_5_8, neighbor_remoteRank[1][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]);
reqOutstanding_Send[3] = true;
}
}
}
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((neighbor_isValid[1][2]&&neighbor_isRemote[1][2])) {
MPI_Irecv(&fieldData_Solution[2][10], 1, mpiDatatype_1_5_8, neighbor_remoteRank[1][2], ((unsigned int)(neighbor_fragCommId[1][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]);
reqOutstanding_Recv[2] = true;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if (reqOutstanding_Recv[2]) {
waitForMPIReq(&mpiRequest_Recv[2]);
reqOutstanding_Recv[2] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
;
}
}
;
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if (reqOutstanding_Send[3]) {
waitForMPIReq(&mpiRequest_Send[3]);
reqOutstanding_Send[3] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
;
;
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) {
MPI_Isend(&fieldData_Solution[2][3], 1, mpiDatatype_7_1_8, neighbor_remoteRank[1][0], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][0]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[0]);
reqOutstanding_Send[0] = true;
}
if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) {
MPI_Isend(&fieldData_Solution[2][5], 1, mpiDatatype_7_1_8, neighbor_remoteRank[1][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]);
reqOutstanding_Send[1] = true;
}
}
}
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) {
MPI_Irecv(&fieldData_Solution[2][1], 1, mpiDatatype_7_1_8, neighbor_remoteRank[1][0], ((unsigned int)(neighbor_fragCommId[1][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]);
reqOutstanding_Recv[0] = true;
}
if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) {
MPI_Irecv(&fieldData_Solution[2][7], 1, mpiDatatype_7_1_8, neighbor_remoteRank[1][1], ((unsigned int)(neighbor_fragCommId[1][1]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[1]);
reqOutstanding_Recv[1] = true;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if (reqOutstanding_Recv[0]) {
waitForMPIReq(&mpiRequest_Recv[0]);
reqOutstanding_Recv[0] = false;
}
if (reqOutstanding_Recv[1]) {
waitForMPIReq(&mpiRequest_Recv[1]);
reqOutstanding_Recv[1] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
;
;
}
}
;
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if (reqOutstanding_Send[0]) {
waitForMPIReq(&mpiRequest_Send[0]);
reqOutstanding_Send[0] = false;
}
if (reqOutstanding_Send[1]) {
waitForMPIReq(&mpiRequest_Send[1]);
reqOutstanding_Send[1] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
;
;
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((neighbor_isValid[1][2]&&neighbor_isRemote[1][2])) {
MPI_Isend(&fieldData_Solution[2][17], 1, mpiDatatype_1_7_8, neighbor_remoteRank[1][2], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][2]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[2]);
reqOutstanding_Send[2] = true;
}
if ((neighbor_isValid[1][3]&&neighbor_isRemote[1][3])) {
MPI_Isend(&fieldData_Solution[2][33], 1, mpiDatatype_1_7_8, neighbor_remoteRank[1][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]);
reqOutstanding_Send[3] = true;
}
}
}
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((neighbor_isValid[1][2]&&neighbor_isRemote[1][2])) {
MPI_Irecv(&fieldData_Solution[2][1], 1, mpiDatatype_1_7_8, neighbor_remoteRank[1][2], ((unsigned int)(neighbor_fragCommId[1][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]);
reqOutstanding_Recv[2] = true;
}
if ((neighbor_isValid[1][3]&&neighbor_isRemote[1][3])) {
MPI_Irecv(&fieldData_Solution[2][49], 1, mpiDatatype_1_7_8, neighbor_remoteRank[1][3], ((unsigned int)(neighbor_fragCommId[1][3]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[3]);
reqOutstanding_Recv[3] = true;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if (reqOutstanding_Recv[2]) {
waitForMPIReq(&mpiRequest_Recv[2]);
reqOutstanding_Recv[2] = false;
}
if (reqOutstanding_Recv[3]) {
waitForMPIReq(&mpiRequest_Recv[3]);
reqOutstanding_Recv[3] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
;
;
}
}
;
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if (reqOutstanding_Send[2]) {
waitForMPIReq(&mpiRequest_Send[2]);
reqOutstanding_Send[2] = false;
}
if (reqOutstanding_Send[3]) {
waitForMPIReq(&mpiRequest_Send[3]);
reqOutstanding_Send[3] = false;
}
}
}
}
inline double lanczos13m53::lanczos_sum_expG_scaled<double>(const double& x)
{
   static const ALIGN16 double coeff[26] = {
         static_cast<double>(0.006061842346248906525783753964555936883222L),
         static_cast<double>(1u),
         static_cast<double>(0.5098416655656676188125178644804694509993L),
         static_cast<double>(66u),
         static_cast<double>(19.51992788247617482847860966235652136208L),
         static_cast<double>(1925u),
         static_cast<double>(449.9445569063168119446858607650988409623L),
         static_cast<double>(32670u),
         static_cast<double>(6955.999602515376140356310115515198987526L),
         static_cast<double>(357423u),
         static_cast<double>(75999.29304014542649875303443598909137092L),
         static_cast<double>(2637558u),
         static_cast<double>(601859.6171681098786670226533699352302507L),
         static_cast<double>(13339535u),
         static_cast<double>(3481712.15498064590882071018964774556468L),
         static_cast<double>(45995730u),
         static_cast<double>(14605578.08768506808414169982791359218571L),
         static_cast<double>(105258076u),
         static_cast<double>(43338889.32467613834773723740590533316085L),
         static_cast<double>(150917976u),
         static_cast<double>(86363131.28813859145546927288977868422342L),
         static_cast<double>(120543840u),
         static_cast<double>(103794043.1163445451906271053616070238554L),
         static_cast<double>(39916800u),
         static_cast<double>(56906521.91347156388090791033559122686859L),
         static_cast<double>(0u)
   };
   register __m128d vx = _mm_load1_pd(&x);
   register __m128d sum_even = _mm_load_pd(coeff);
   register __m128d sum_odd = _mm_load_pd(coeff+2);
   register __m128d nc_odd, nc_even;
   register __m128d vx2 = _mm_mul_pd(vx, vx);

   sum_even = _mm_mul_pd(sum_even, vx2);
   nc_even = _mm_load_pd(coeff + 4);
   sum_odd = _mm_mul_pd(sum_odd, vx2);
   nc_odd = _mm_load_pd(coeff + 6);
   sum_even = _mm_add_pd(sum_even, nc_even);
   sum_odd = _mm_add_pd(sum_odd, nc_odd);

   sum_even = _mm_mul_pd(sum_even, vx2);
   nc_even = _mm_load_pd(coeff + 8);
   sum_odd = _mm_mul_pd(sum_odd, vx2);
   nc_odd = _mm_load_pd(coeff + 10);
   sum_even = _mm_add_pd(sum_even, nc_even);
   sum_odd = _mm_add_pd(sum_odd, nc_odd);

   sum_even = _mm_mul_pd(sum_even, vx2);
   nc_even = _mm_load_pd(coeff + 12);
   sum_odd = _mm_mul_pd(sum_odd, vx2);
   nc_odd = _mm_load_pd(coeff + 14);
   sum_even = _mm_add_pd(sum_even, nc_even);
   sum_odd = _mm_add_pd(sum_odd, nc_odd);

   sum_even = _mm_mul_pd(sum_even, vx2);
   nc_even = _mm_load_pd(coeff + 16);
   sum_odd = _mm_mul_pd(sum_odd, vx2);
   nc_odd = _mm_load_pd(coeff + 18);
   sum_even = _mm_add_pd(sum_even, nc_even);
   sum_odd = _mm_add_pd(sum_odd, nc_odd);

   sum_even = _mm_mul_pd(sum_even, vx2);
   nc_even = _mm_load_pd(coeff + 20);
   sum_odd = _mm_mul_pd(sum_odd, vx2);
   nc_odd = _mm_load_pd(coeff + 22);
   sum_even = _mm_add_pd(sum_even, nc_even);
   sum_odd = _mm_add_pd(sum_odd, nc_odd);

   sum_even = _mm_mul_pd(sum_even, vx2);
   nc_even = _mm_load_pd(coeff + 24);
   sum_odd = _mm_mul_pd(sum_odd, vx);
   sum_even = _mm_add_pd(sum_even, nc_even);
   sum_even = _mm_add_pd(sum_even, sum_odd);


   double ALIGN16 t[2];
   _mm_store_pd(t, sum_even);
   
   return t[0] / t[1];
}
Exemplo n.º 17
0
void exchlaplacecoeffData_6(unsigned int slot) {
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((!neighbor_isValid[1][0])) {
{
double xPos;
double yPos;
/* Statements in this Scop: S902, S905, S908, S907, S901, S910, S904, S903, S906, S909, S900 */
{
{
{
{
{
{
{
{
{
{
{
double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]);
int i1 = 1;
for (; (i1<=64); i1 += 2) {
fieldData_LaplaceCoeff_6_p1[((i1*68)+4558)] = 0.000000e+00;
fieldData_LaplaceCoeff_6_p1[((i1*68)+4626)] = 0.000000e+00;
}
for (; (i1<=65); i1 += 1) {
fieldData_LaplaceCoeff_6_p1[((i1*68)+4558)] = 0.000000e+00;
}
}
{
int i1 = 1;
for (; (i1<(2&(~1))); i1 += 1) {
yPos = ((((i1-1)/6.400000e+01)*(posEnd[1]-posBegin[1]))+posBegin[1]);
}
__m128d vec1 = _mm_set1_pd(1.000000e+00);
__m128d vec2 = _mm_set1_pd(6.400000e+01);
__m128d vec5 = _mm_set1_pd(yPos);
for (; (i1<63); i1 += 4) {
/* yPos = ((((i1-1)/6.400000e+01)*(posEnd[1]-posBegin[1]))+posBegin[1]); */
__m128d vec0 = _mm_set_pd(i1+1,i1);
__m128d vec0_2 = _mm_set_pd(i1+1,i1);
__m128d vec3 = _mm_load1_pd((&posEnd[1]));
__m128d vec3_2 = _mm_load1_pd((&posEnd[1]));
__m128d vec4 = _mm_load1_pd((&posBegin[1]));
__m128d vec4_2 = _mm_load1_pd((&posBegin[1]));
vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4);
vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2);
}
for (; (i1<66); i1 += 1) {
yPos = ((((i1-1)/6.400000e+01)*(posEnd[1]-posBegin[1]))+posBegin[1]);
}
}
}
{
double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]);
int i1 = 1;
for (; (i1<=64); i1 += 2) {
fieldData_LaplaceCoeff_6_p1[((i1*68)+9114)] = 0.000000e+00;
fieldData_LaplaceCoeff_6_p1[((i1*68)+9182)] = 0.000000e+00;
}
for (; (i1<=65); i1 += 1) {
fieldData_LaplaceCoeff_6_p1[((i1*68)+9114)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]);
int i1 = 1;
for (; (i1<=64); i1 += 2) {
fieldData_LaplaceCoeff_6_p1[((i1*68)+2)] = 0.000000e+00;
fieldData_LaplaceCoeff_6_p1[((i1*68)+70)] = 0.000000e+00;
}
for (; (i1<=65); i1 += 1) {
fieldData_LaplaceCoeff_6_p1[((i1*68)+2)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]);
int i1 = 1;
for (; (i1<=64); i1 += 2) {
fieldData_LaplaceCoeff_6_p1[((i1*68)+31894)] = 0.000000e+00;
fieldData_LaplaceCoeff_6_p1[((i1*68)+31962)] = 0.000000e+00;
}
for (; (i1<=65); i1 += 1) {
fieldData_LaplaceCoeff_6_p1[((i1*68)+31894)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]);
int i1 = 1;
for (; (i1<=64); i1 += 2) {
fieldData_LaplaceCoeff_6_p1[((i1*68)+36450)] = 0.000000e+00;
fieldData_LaplaceCoeff_6_p1[((i1*68)+36518)] = 0.000000e+00;
}
for (; (i1<=65); i1 += 1) {
fieldData_LaplaceCoeff_6_p1[((i1*68)+36450)] = 0.000000e+00;
}
}
}
{
int i1 = 1;
for (; (i1<(2&(~1))); i1 += 1) {
xPos = posBegin[0];
}
__m128d vec1 = _mm_set1_pd(xPos);
for (; (i1<63); i1 += 4) {
/* xPos = posBegin[0]; */
__m128d vec0 = _mm_load1_pd((&posBegin[0]));
__m128d vec0_2 = _mm_load1_pd((&posBegin[0]));
vec1 = vec0;
vec1 = vec0_2;
}
for (; (i1<66); i1 += 1) {
xPos = posBegin[0];
}
}
}
{
double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]);
int i1 = 1;
for (; (i1<=64); i1 += 2) {
fieldData_LaplaceCoeff_6_p1[((i1*68)+18226)] = 0.000000e+00;
fieldData_LaplaceCoeff_6_p1[((i1*68)+18294)] = 0.000000e+00;
}
for (; (i1<=65); i1 += 1) {
fieldData_LaplaceCoeff_6_p1[((i1*68)+18226)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]);
int i1 = 1;
for (; (i1<=64); i1 += 2) {
fieldData_LaplaceCoeff_6_p1[((i1*68)+13670)] = 0.000000e+00;
fieldData_LaplaceCoeff_6_p1[((i1*68)+13738)] = 0.000000e+00;
}
for (; (i1<=65); i1 += 1) {
fieldData_LaplaceCoeff_6_p1[((i1*68)+13670)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]);
int i1 = 1;
for (; (i1<=64); i1 += 2) {
fieldData_LaplaceCoeff_6_p1[((i1*68)+27338)] = 0.000000e+00;
fieldData_LaplaceCoeff_6_p1[((i1*68)+27406)] = 0.000000e+00;
}
for (; (i1<=65); i1 += 1) {
fieldData_LaplaceCoeff_6_p1[((i1*68)+27338)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]);
int i1 = 1;
for (; (i1<=64); i1 += 2) {
fieldData_LaplaceCoeff_6_p1[((i1*68)+22782)] = 0.000000e+00;
fieldData_LaplaceCoeff_6_p1[((i1*68)+22850)] = 0.000000e+00;
}
for (; (i1<=65); i1 += 1) {
fieldData_LaplaceCoeff_6_p1[((i1*68)+22782)] = 0.000000e+00;
}
}
}
}
}
if ((!neighbor_isValid[1][1])) {
{
double xPos;
double yPos;
/* Statements in this Scop: S920, S914, S917, S911, S913, S916, S919, S921, S918, S912, S915 */
{
{
{
{
{
{
{
{
{
{
{
double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]);
int i1 = 1;
for (; (i1<=64); i1 += 2) {
fieldData_LaplaceCoeff_6_p1[((i1*68)+4622)] = 0.000000e+00;
fieldData_LaplaceCoeff_6_p1[((i1*68)+4690)] = 0.000000e+00;
}
for (; (i1<=65); i1 += 1) {
fieldData_LaplaceCoeff_6_p1[((i1*68)+4622)] = 0.000000e+00;
}
}
{
double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]);
int i1 = 1;
for (; (i1<=64); i1 += 2) {
fieldData_LaplaceCoeff_6_p1[((i1*68)+31958)] = 0.000000e+00;
fieldData_LaplaceCoeff_6_p1[((i1*68)+32026)] = 0.000000e+00;
}
for (; (i1<=65); i1 += 1) {
fieldData_LaplaceCoeff_6_p1[((i1*68)+31958)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]);
int i1 = 1;
for (; (i1<=64); i1 += 2) {
fieldData_LaplaceCoeff_6_p1[((i1*68)+13734)] = 0.000000e+00;
fieldData_LaplaceCoeff_6_p1[((i1*68)+13802)] = 0.000000e+00;
}
for (; (i1<=65); i1 += 1) {
fieldData_LaplaceCoeff_6_p1[((i1*68)+13734)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]);
int i1 = 1;
for (; (i1<=64); i1 += 2) {
fieldData_LaplaceCoeff_6_p1[((i1*68)+66)] = 0.000000e+00;
fieldData_LaplaceCoeff_6_p1[((i1*68)+134)] = 0.000000e+00;
}
for (; (i1<=65); i1 += 1) {
fieldData_LaplaceCoeff_6_p1[((i1*68)+66)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]);
int i1 = 1;
for (; (i1<=64); i1 += 2) {
fieldData_LaplaceCoeff_6_p1[((i1*68)+22846)] = 0.000000e+00;
fieldData_LaplaceCoeff_6_p1[((i1*68)+22914)] = 0.000000e+00;
}
for (; (i1<=65); i1 += 1) {
fieldData_LaplaceCoeff_6_p1[((i1*68)+22846)] = 0.000000e+00;
}
}
}
{
int i1 = 1;
for (; (i1<(2&(~1))); i1 += 1) {
yPos = ((((i1-1)/6.400000e+01)*(posEnd[1]-posBegin[1]))+posBegin[1]);
}
__m128d vec1 = _mm_set1_pd(1.000000e+00);
__m128d vec2 = _mm_set1_pd(6.400000e+01);
__m128d vec5 = _mm_set1_pd(yPos);
for (; (i1<63); i1 += 4) {
/* yPos = ((((i1-1)/6.400000e+01)*(posEnd[1]-posBegin[1]))+posBegin[1]); */
__m128d vec0 = _mm_set_pd(i1+1,i1);
__m128d vec0_2 = _mm_set_pd(i1+1,i1);
__m128d vec3 = _mm_load1_pd((&posEnd[1]));
__m128d vec3_2 = _mm_load1_pd((&posEnd[1]));
__m128d vec4 = _mm_load1_pd((&posBegin[1]));
__m128d vec4_2 = _mm_load1_pd((&posBegin[1]));
vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4);
vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2);
}
for (; (i1<66); i1 += 1) {
yPos = ((((i1-1)/6.400000e+01)*(posEnd[1]-posBegin[1]))+posBegin[1]);
}
}
}
{
double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]);
int i1 = 1;
for (; (i1<=64); i1 += 2) {
fieldData_LaplaceCoeff_6_p1[((i1*68)+18290)] = 0.000000e+00;
fieldData_LaplaceCoeff_6_p1[((i1*68)+18358)] = 0.000000e+00;
}
for (; (i1<=65); i1 += 1) {
fieldData_LaplaceCoeff_6_p1[((i1*68)+18290)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]);
int i1 = 1;
for (; (i1<=64); i1 += 2) {
fieldData_LaplaceCoeff_6_p1[((i1*68)+27402)] = 0.000000e+00;
fieldData_LaplaceCoeff_6_p1[((i1*68)+27470)] = 0.000000e+00;
}
for (; (i1<=65); i1 += 1) {
fieldData_LaplaceCoeff_6_p1[((i1*68)+27402)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]);
int i1 = 1;
for (; (i1<=64); i1 += 2) {
fieldData_LaplaceCoeff_6_p1[((i1*68)+36514)] = 0.000000e+00;
fieldData_LaplaceCoeff_6_p1[((i1*68)+36582)] = 0.000000e+00;
}
for (; (i1<=65); i1 += 1) {
fieldData_LaplaceCoeff_6_p1[((i1*68)+36514)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]);
int i1 = 1;
for (; (i1<=64); i1 += 2) {
fieldData_LaplaceCoeff_6_p1[((i1*68)+9178)] = 0.000000e+00;
fieldData_LaplaceCoeff_6_p1[((i1*68)+9246)] = 0.000000e+00;
}
for (; (i1<=65); i1 += 1) {
fieldData_LaplaceCoeff_6_p1[((i1*68)+9178)] = 0.000000e+00;
}
}
}
{
int i1 = 1;
for (; (i1<(2&(~1))); i1 += 1) {
xPos = posEnd[0];
}
__m128d vec1 = _mm_set1_pd(xPos);
for (; (i1<63); i1 += 4) {
/* xPos = posEnd[0]; */
__m128d vec0 = _mm_load1_pd((&posEnd[0]));
__m128d vec0_2 = _mm_load1_pd((&posEnd[0]));
vec1 = vec0;
vec1 = vec0_2;
}
for (; (i1<66); i1 += 1) {
xPos = posEnd[0];
}
}
}
}
}
if ((!neighbor_isValid[1][2])) {
{
double xPos;
double yPos;
/* Statements in this Scop: S929, S923, S926, S931, S925, S928, S922, S930, S924, S927, S932 */
{
{
{
{
{
{
{
{
{
{
{
int i2 = 2;
for (; (i2<=65); i2 += 2) {
xPos = ((((i2-2)/6.400000e+01)*(posEnd[0]-posBegin[0]))+posBegin[0]);
xPos = ((((i2-1)/6.400000e+01)*(posEnd[0]-posBegin[0]))+posBegin[0]);
}
for (; (i2<=66); i2 += 1) {
xPos = ((((i2-2)/6.400000e+01)*(posEnd[0]-posBegin[0]))+posBegin[0]);
}
}
{
double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]);
int i2 = 2;
for (; (i2<=65); i2 += 2) {
fieldData_LaplaceCoeff_6_p1[(i2+18292)] = 0.000000e+00;
fieldData_LaplaceCoeff_6_p1[(i2+18293)] = 0.000000e+00;
}
for (; (i2<=66); i2 += 1) {
fieldData_LaplaceCoeff_6_p1[(i2+18292)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]);
int i2 = 2;
for (; (i2<=65); i2 += 2) {
fieldData_LaplaceCoeff_6_p1[(i2+36516)] = 0.000000e+00;
fieldData_LaplaceCoeff_6_p1[(i2+36517)] = 0.000000e+00;
}
for (; (i2<=66); i2 += 1) {
fieldData_LaplaceCoeff_6_p1[(i2+36516)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]);
int i2 = 2;
for (; (i2<=65); i2 += 2) {
fieldData_LaplaceCoeff_6_p1[(i2+31960)] = 0.000000e+00;
fieldData_LaplaceCoeff_6_p1[(i2+31961)] = 0.000000e+00;
}
for (; (i2<=66); i2 += 1) {
fieldData_LaplaceCoeff_6_p1[(i2+31960)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]);
int i2 = 2;
for (; (i2<=65); i2 += 2) {
fieldData_LaplaceCoeff_6_p1[(i2+68)] = 0.000000e+00;
fieldData_LaplaceCoeff_6_p1[(i2+69)] = 0.000000e+00;
}
for (; (i2<=66); i2 += 1) {
fieldData_LaplaceCoeff_6_p1[(i2+68)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]);
int i2 = 2;
for (; (i2<=65); i2 += 2) {
fieldData_LaplaceCoeff_6_p1[(i2+9180)] = 0.000000e+00;
fieldData_LaplaceCoeff_6_p1[(i2+9181)] = 0.000000e+00;
}
for (; (i2<=66); i2 += 1) {
fieldData_LaplaceCoeff_6_p1[(i2+9180)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]);
int i2 = 2;
for (; (i2<=65); i2 += 2) {
fieldData_LaplaceCoeff_6_p1[(i2+22848)] = 0.000000e+00;
fieldData_LaplaceCoeff_6_p1[(i2+22849)] = 0.000000e+00;
}
for (; (i2<=66); i2 += 1) {
fieldData_LaplaceCoeff_6_p1[(i2+22848)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]);
int i2 = 2;
for (; (i2<=65); i2 += 2) {
fieldData_LaplaceCoeff_6_p1[(i2+27404)] = 0.000000e+00;
fieldData_LaplaceCoeff_6_p1[(i2+27405)] = 0.000000e+00;
}
for (; (i2<=66); i2 += 1) {
fieldData_LaplaceCoeff_6_p1[(i2+27404)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]);
int i2 = 2;
for (; (i2<=65); i2 += 2) {
fieldData_LaplaceCoeff_6_p1[(i2+4624)] = 0.000000e+00;
fieldData_LaplaceCoeff_6_p1[(i2+4625)] = 0.000000e+00;
}
for (; (i2<=66); i2 += 1) {
fieldData_LaplaceCoeff_6_p1[(i2+4624)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]);
int i2 = 2;
for (; (i2<=65); i2 += 2) {
fieldData_LaplaceCoeff_6_p1[(i2+13736)] = 0.000000e+00;
fieldData_LaplaceCoeff_6_p1[(i2+13737)] = 0.000000e+00;
}
for (; (i2<=66); i2 += 1) {
fieldData_LaplaceCoeff_6_p1[(i2+13736)] = 0.000000e+00;
}
}
}
{
int i2 = 2;
for (; (i2<=65); i2 += 2) {
yPos = posBegin[1];
yPos = posBegin[1];
}
for (; (i2<=66); i2 += 1) {
yPos = posBegin[1];
}
}
}
}
}
if ((!neighbor_isValid[1][3])) {
{
double xPos;
double yPos;
/* Statements in this Scop: S941, S935, S938, S943, S940, S934, S937, S942, S936, S939, S933 */
{
{
{
{
{
{
{
{
{
{
{
int i2 = 2;
for (; (i2<=65); i2 += 2) {
xPos = ((((i2-2)/6.400000e+01)*(posEnd[0]-posBegin[0]))+posBegin[0]);
xPos = ((((i2-1)/6.400000e+01)*(posEnd[0]-posBegin[0]))+posBegin[0]);
}
for (; (i2<=66); i2 += 1) {
xPos = ((((i2-2)/6.400000e+01)*(posEnd[0]-posBegin[0]))+posBegin[0]);
}
}
{
double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]);
int i2 = 2;
for (; (i2<=65); i2 += 2) {
fieldData_LaplaceCoeff_6_p1[(i2+36312)] = 0.000000e+00;
fieldData_LaplaceCoeff_6_p1[(i2+36313)] = 0.000000e+00;
}
for (; (i2<=66); i2 += 1) {
fieldData_LaplaceCoeff_6_p1[(i2+36312)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]);
int i2 = 2;
for (; (i2<=65); i2 += 2) {
fieldData_LaplaceCoeff_6_p1[(i2+22644)] = 0.000000e+00;
fieldData_LaplaceCoeff_6_p1[(i2+22645)] = 0.000000e+00;
}
for (; (i2<=66); i2 += 1) {
fieldData_LaplaceCoeff_6_p1[(i2+22644)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]);
int i2 = 2;
for (; (i2<=65); i2 += 2) {
fieldData_LaplaceCoeff_6_p1[(i2+13532)] = 0.000000e+00;
fieldData_LaplaceCoeff_6_p1[(i2+13533)] = 0.000000e+00;
}
for (; (i2<=66); i2 += 1) {
fieldData_LaplaceCoeff_6_p1[(i2+13532)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]);
int i2 = 2;
for (; (i2<=65); i2 += 2) {
fieldData_LaplaceCoeff_6_p1[(i2+40868)] = 0.000000e+00;
fieldData_LaplaceCoeff_6_p1[(i2+40869)] = 0.000000e+00;
}
for (; (i2<=66); i2 += 1) {
fieldData_LaplaceCoeff_6_p1[(i2+40868)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]);
int i2 = 2;
for (; (i2<=65); i2 += 2) {
fieldData_LaplaceCoeff_6_p1[(i2+18088)] = 0.000000e+00;
fieldData_LaplaceCoeff_6_p1[(i2+18089)] = 0.000000e+00;
}
for (; (i2<=66); i2 += 1) {
fieldData_LaplaceCoeff_6_p1[(i2+18088)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]);
int i2 = 2;
for (; (i2<=65); i2 += 2) {
fieldData_LaplaceCoeff_6_p1[(i2+4420)] = 0.000000e+00;
fieldData_LaplaceCoeff_6_p1[(i2+4421)] = 0.000000e+00;
}
for (; (i2<=66); i2 += 1) {
fieldData_LaplaceCoeff_6_p1[(i2+4420)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]);
int i2 = 2;
for (; (i2<=65); i2 += 2) {
fieldData_LaplaceCoeff_6_p1[(i2+31756)] = 0.000000e+00;
fieldData_LaplaceCoeff_6_p1[(i2+31757)] = 0.000000e+00;
}
for (; (i2<=66); i2 += 1) {
fieldData_LaplaceCoeff_6_p1[(i2+31756)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]);
int i2 = 2;
for (; (i2<=65); i2 += 2) {
fieldData_LaplaceCoeff_6_p1[(i2+27200)] = 0.000000e+00;
fieldData_LaplaceCoeff_6_p1[(i2+27201)] = 0.000000e+00;
}
for (; (i2<=66); i2 += 1) {
fieldData_LaplaceCoeff_6_p1[(i2+27200)] = 0.000000e+00;
}
}
}
{
int i2 = 2;
for (; (i2<=65); i2 += 2) {
yPos = posEnd[1];
yPos = posEnd[1];
}
for (; (i2<=66); i2 += 1) {
yPos = posEnd[1];
}
}
}
{
double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]);
int i2 = 2;
for (; (i2<=65); i2 += 2) {
fieldData_LaplaceCoeff_6_p1[(i2+8976)] = 0.000000e+00;
fieldData_LaplaceCoeff_6_p1[(i2+8977)] = 0.000000e+00;
}
for (; (i2<=66); i2 += 1) {
fieldData_LaplaceCoeff_6_p1[(i2+8976)] = 0.000000e+00;
}
}
}
}
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) {
/* Statements in this Scop: S944 */
for (int i3 = 0; (i3<=8); i3 += 1) {
double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][(i3*4556)]);
double* buffer_Send_1_p1 = (&buffer_Send[1][(i3*65)]);
int i4 = 1;
for (; (i4<=64); i4 += 2) {
buffer_Send_1_p1[(i4-1)] = fieldData_LaplaceCoeff_6_p1[((i4*68)+66)];
buffer_Send_1_p1[i4] = fieldData_LaplaceCoeff_6_p1[((i4*68)+134)];
}
for (; (i4<=65); i4 += 1) {
buffer_Send_1_p1[(i4-1)] = fieldData_LaplaceCoeff_6_p1[((i4*68)+66)];
}
}
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) {
MPI_Isend(buffer_Send[1], 585, MPI_DOUBLE, neighbor_remoteRank[1][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]);
reqOutstanding_Send[1] = true;
}
}
}
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) {
MPI_Irecv(buffer_Recv[0], 585, MPI_DOUBLE, neighbor_remoteRank[1][0], ((unsigned int)(neighbor_fragCommId[1][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]);
reqOutstanding_Recv[0] = true;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if (reqOutstanding_Recv[0]) {
waitForMPIReq(&mpiRequest_Recv[0]);
reqOutstanding_Recv[0] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) {
/* Statements in this Scop: S945 */
for (int i3 = 0; (i3<=8); i3 += 1) {
double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][(i3*4556)]);
double* buffer_Recv_0_p1 = (&buffer_Recv[0][(i3*65)]);
int i4 = 3;
for (; (i4<=66); i4 += 2) {
fieldData_LaplaceCoeff_6_p1[((i4*68)-134)] = buffer_Recv_0_p1[(i4-3)];
fieldData_LaplaceCoeff_6_p1[((i4*68)-66)] = buffer_Recv_0_p1[(i4-2)];
}
for (; (i4<=67); i4 += 1) {
fieldData_LaplaceCoeff_6_p1[((i4*68)-134)] = buffer_Recv_0_p1[(i4-3)];
}
}
}
}
}
;
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if (reqOutstanding_Send[1]) {
waitForMPIReq(&mpiRequest_Send[1]);
reqOutstanding_Send[1] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
;
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((neighbor_isValid[1][3]&&neighbor_isRemote[1][3])) {
MPI_Isend(&fieldData_LaplaceCoeff[6][4422], 1, mpiDatatype_9_65_4556, neighbor_remoteRank[1][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]);
reqOutstanding_Send[3] = true;
}
}
}
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((neighbor_isValid[1][2]&&neighbor_isRemote[1][2])) {
MPI_Irecv(&fieldData_LaplaceCoeff[6][70], 1, mpiDatatype_9_65_4556, neighbor_remoteRank[1][2], ((unsigned int)(neighbor_fragCommId[1][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]);
reqOutstanding_Recv[2] = true;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if (reqOutstanding_Recv[2]) {
waitForMPIReq(&mpiRequest_Recv[2]);
reqOutstanding_Recv[2] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
;
}
}
;
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if (reqOutstanding_Send[3]) {
waitForMPIReq(&mpiRequest_Send[3]);
reqOutstanding_Send[3] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) {
/* Statements in this Scop: S946 */
for (int i3 = 0; (i3<=8); i3 += 1) {
double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][(i3*4556)]);
double* buffer_Send_0_p1 = (&buffer_Send[0][(i3*67)]);
int i4 = 0;
for (; (i4<=65); i4 += 2) {
buffer_Send_0_p1[i4] = fieldData_LaplaceCoeff_6_p1[((i4*68)+3)];
buffer_Send_0_p1[(i4+1)] = fieldData_LaplaceCoeff_6_p1[((i4*68)+71)];
}
for (; (i4<=66); i4 += 1) {
buffer_Send_0_p1[i4] = fieldData_LaplaceCoeff_6_p1[((i4*68)+3)];
}
}
}
if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) {
/* Statements in this Scop: S947 */
for (int i3 = 0; (i3<=8); i3 += 1) {
double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][(i3*4556)]);
double* buffer_Send_1_p1 = (&buffer_Send[1][(i3*67)]);
int i4 = 0;
for (; (i4<=65); i4 += 2) {
buffer_Send_1_p1[i4] = fieldData_LaplaceCoeff_6_p1[((i4*68)+65)];
buffer_Send_1_p1[(i4+1)] = fieldData_LaplaceCoeff_6_p1[((i4*68)+133)];
}
for (; (i4<=66); i4 += 1) {
buffer_Send_1_p1[i4] = fieldData_LaplaceCoeff_6_p1[((i4*68)+65)];
}
}
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) {
MPI_Isend(buffer_Send[0], 603, MPI_DOUBLE, neighbor_remoteRank[1][0], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][0]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[0]);
reqOutstanding_Send[0] = true;
}
if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) {
MPI_Isend(buffer_Send[1], 603, MPI_DOUBLE, neighbor_remoteRank[1][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]);
reqOutstanding_Send[1] = true;
}
}
}
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) {
MPI_Irecv(buffer_Recv[0], 603, MPI_DOUBLE, neighbor_remoteRank[1][0], ((unsigned int)(neighbor_fragCommId[1][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]);
reqOutstanding_Recv[0] = true;
}
if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) {
MPI_Irecv(buffer_Recv[1], 603, MPI_DOUBLE, neighbor_remoteRank[1][1], ((unsigned int)(neighbor_fragCommId[1][1]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[1]);
reqOutstanding_Recv[1] = true;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if (reqOutstanding_Recv[0]) {
waitForMPIReq(&mpiRequest_Recv[0]);
reqOutstanding_Recv[0] = false;
}
if (reqOutstanding_Recv[1]) {
waitForMPIReq(&mpiRequest_Recv[1]);
reqOutstanding_Recv[1] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) {
/* Statements in this Scop: S948 */
for (int i3 = 0; (i3<=8); i3 += 1) {
double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][(i3*4556)]);
double* buffer_Recv_0_p1 = (&buffer_Recv[0][(i3*67)]);
int i4 = 1;
for (; (i4<=66); i4 += 2) {
fieldData_LaplaceCoeff_6_p1[((i4*68)-67)] = buffer_Recv_0_p1[(i4-1)];
fieldData_LaplaceCoeff_6_p1[((i4*68)+1)] = buffer_Recv_0_p1[i4];
}
for (; (i4<=67); i4 += 1) {
fieldData_LaplaceCoeff_6_p1[((i4*68)-67)] = buffer_Recv_0_p1[(i4-1)];
}
}
}
if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) {
/* Statements in this Scop: S949 */
for (int i3 = 0; (i3<=8); i3 += 1) {
double* buffer_Recv_1_p1 = (&buffer_Recv[1][(i3*67)]);
double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][(i3*4556)]);
int i4 = 67;
for (; (i4<=132); i4 += 2) {
fieldData_LaplaceCoeff_6_p1[((i4*68)-4489)] = buffer_Recv_1_p1[(i4-67)];
fieldData_LaplaceCoeff_6_p1[((i4*68)-4421)] = buffer_Recv_1_p1[(i4-66)];
}
for (; (i4<=133); i4 += 1) {
fieldData_LaplaceCoeff_6_p1[((i4*68)-4489)] = buffer_Recv_1_p1[(i4-67)];
}
}
}
}
}
;
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if (reqOutstanding_Send[0]) {
waitForMPIReq(&mpiRequest_Send[0]);
reqOutstanding_Send[0] = false;
}
if (reqOutstanding_Send[1]) {
waitForMPIReq(&mpiRequest_Send[1]);
reqOutstanding_Send[1] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
;
;
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((neighbor_isValid[1][2]&&neighbor_isRemote[1][2])) {
MPI_Isend(&fieldData_LaplaceCoeff[6][137], 1, mpiDatatype_9_67_4556, neighbor_remoteRank[1][2], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][2]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[2]);
reqOutstanding_Send[2] = true;
}
if ((neighbor_isValid[1][3]&&neighbor_isRemote[1][3])) {
MPI_Isend(&fieldData_LaplaceCoeff[6][4353], 1, mpiDatatype_9_67_4556, neighbor_remoteRank[1][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]);
reqOutstanding_Send[3] = true;
}
}
}
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((neighbor_isValid[1][2]&&neighbor_isRemote[1][2])) {
MPI_Irecv(&fieldData_LaplaceCoeff[6][1], 1, mpiDatatype_9_67_4556, neighbor_remoteRank[1][2], ((unsigned int)(neighbor_fragCommId[1][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]);
reqOutstanding_Recv[2] = true;
}
if ((neighbor_isValid[1][3]&&neighbor_isRemote[1][3])) {
MPI_Irecv(&fieldData_LaplaceCoeff[6][4489], 1, mpiDatatype_9_67_4556, neighbor_remoteRank[1][3], ((unsigned int)(neighbor_fragCommId[1][3]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[3]);
reqOutstanding_Recv[3] = true;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if (reqOutstanding_Recv[2]) {
waitForMPIReq(&mpiRequest_Recv[2]);
reqOutstanding_Recv[2] = false;
}
if (reqOutstanding_Recv[3]) {
waitForMPIReq(&mpiRequest_Recv[3]);
reqOutstanding_Recv[3] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
;
;
}
}
;
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if (reqOutstanding_Send[2]) {
waitForMPIReq(&mpiRequest_Send[2]);
reqOutstanding_Send[2] = false;
}
if (reqOutstanding_Send[3]) {
waitForMPIReq(&mpiRequest_Send[3]);
reqOutstanding_Send[3] = false;
}
}
}
}
Exemplo n.º 18
0
void exchlaplacecoeff_gmrfData_5(unsigned int slot) {
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((!neighbor_isValid[0][0])) {
{
double xPos;
double yPos;
/* Statements in this Scop: S1306, S1309, S1300, S1308, S1302, S1305, S1310, S1304, S1307, S1301, S1303 */
{
{
{
{
{
{
{
{
{
{
{
double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]);
int i1 = 1;
for (; (i1<=32); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+6302)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+6338)] = 0.000000e+00;
}
for (; (i1<=33); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+6302)] = 0.000000e+00;
}
}
{
double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]);
int i1 = 1;
for (; (i1<=32); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+3782)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+3818)] = 0.000000e+00;
}
for (; (i1<=33); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+3782)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]);
int i1 = 1;
for (; (i1<=32); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+7562)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+7598)] = 0.000000e+00;
}
for (; (i1<=33); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+7562)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]);
int i1 = 1;
for (; (i1<=32); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+2)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+38)] = 0.000000e+00;
}
for (; (i1<=33); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+2)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]);
int i1 = 1;
for (; (i1<=32); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+2522)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+2558)] = 0.000000e+00;
}
for (; (i1<=33); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+2522)] = 0.000000e+00;
}
}
}
{
int i1 = 1;
for (; (i1<(2&(~1))); i1 += 1) {
xPos = posBegin[0];
}
__m128d vec1 = _mm_set1_pd(xPos);
for (; (i1<31); i1 += 4) {
/* xPos = posBegin[0]; */
__m128d vec0 = _mm_load1_pd((&posBegin[0]));
__m128d vec0_2 = _mm_load1_pd((&posBegin[0]));
vec1 = vec0;
vec1 = vec0_2;
}
for (; (i1<34); i1 += 1) {
xPos = posBegin[0];
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]);
int i1 = 1;
for (; (i1<=32); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+8822)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+8858)] = 0.000000e+00;
}
for (; (i1<=33); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+8822)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]);
int i1 = 1;
for (; (i1<=32); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+1262)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+1298)] = 0.000000e+00;
}
for (; (i1<=33); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+1262)] = 0.000000e+00;
}
}
}
{
int i1 = 1;
for (; (i1<(2&(~1))); i1 += 1) {
yPos = ((((i1-1)/3.200000e+01)*(posEnd[1]-posBegin[1]))+posBegin[1]);
}
__m128d vec1 = _mm_set1_pd(1.000000e+00);
__m128d vec2 = _mm_set1_pd(3.200000e+01);
__m128d vec5 = _mm_set1_pd(yPos);
for (; (i1<31); i1 += 4) {
/* yPos = ((((i1-1)/3.200000e+01)*(posEnd[1]-posBegin[1]))+posBegin[1]); */
__m128d vec0 = _mm_set_pd(i1+1,i1);
__m128d vec0_2 = _mm_set_pd(i1+1,i1);
__m128d vec3 = _mm_load1_pd((&posEnd[1]));
__m128d vec3_2 = _mm_load1_pd((&posEnd[1]));
__m128d vec4 = _mm_load1_pd((&posBegin[1]));
__m128d vec4_2 = _mm_load1_pd((&posBegin[1]));
vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4);
vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2);
}
for (; (i1<34); i1 += 1) {
yPos = ((((i1-1)/3.200000e+01)*(posEnd[1]-posBegin[1]))+posBegin[1]);
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]);
int i1 = 1;
for (; (i1<=32); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+5042)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+5078)] = 0.000000e+00;
}
for (; (i1<=33); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+5042)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]);
int i1 = 1;
for (; (i1<=32); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+10082)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+10118)] = 0.000000e+00;
}
for (; (i1<=33); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+10082)] = 0.000000e+00;
}
}
}
}
}
if ((!neighbor_isValid[0][1])) {
{
double xPos;
double yPos;
/* Statements in this Scop: S1312, S1320, S1314, S1317, S1311, S1319, S1313, S1316, S1321, S1315, S1318 */
{
{
{
{
{
{
{
{
{
{
{
double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]);
int i1 = 1;
for (; (i1<=32); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+1294)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+1330)] = 0.000000e+00;
}
for (; (i1<=33); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+1294)] = 0.000000e+00;
}
}
{
double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]);
int i1 = 1;
for (; (i1<=32); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+5074)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+5110)] = 0.000000e+00;
}
for (; (i1<=33); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+5074)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]);
int i1 = 1;
for (; (i1<=32); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+8854)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+8890)] = 0.000000e+00;
}
for (; (i1<=33); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+8854)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]);
int i1 = 1;
for (; (i1<=32); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+2554)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+2590)] = 0.000000e+00;
}
for (; (i1<=33); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+2554)] = 0.000000e+00;
}
}
}
{
int i1 = 1;
for (; (i1<(2&(~1))); i1 += 1) {
yPos = ((((i1-1)/3.200000e+01)*(posEnd[1]-posBegin[1]))+posBegin[1]);
}
__m128d vec1 = _mm_set1_pd(1.000000e+00);
__m128d vec2 = _mm_set1_pd(3.200000e+01);
__m128d vec5 = _mm_set1_pd(yPos);
for (; (i1<31); i1 += 4) {
/* yPos = ((((i1-1)/3.200000e+01)*(posEnd[1]-posBegin[1]))+posBegin[1]); */
__m128d vec0 = _mm_set_pd(i1+1,i1);
__m128d vec0_2 = _mm_set_pd(i1+1,i1);
__m128d vec3 = _mm_load1_pd((&posEnd[1]));
__m128d vec3_2 = _mm_load1_pd((&posEnd[1]));
__m128d vec4 = _mm_load1_pd((&posBegin[1]));
__m128d vec4_2 = _mm_load1_pd((&posBegin[1]));
vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4);
vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2);
}
for (; (i1<34); i1 += 1) {
yPos = ((((i1-1)/3.200000e+01)*(posEnd[1]-posBegin[1]))+posBegin[1]);
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]);
int i1 = 1;
for (; (i1<=32); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+34)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+70)] = 0.000000e+00;
}
for (; (i1<=33); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+34)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]);
int i1 = 1;
for (; (i1<=32); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+3814)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+3850)] = 0.000000e+00;
}
for (; (i1<=33); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+3814)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]);
int i1 = 1;
for (; (i1<=32); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+7594)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+7630)] = 0.000000e+00;
}
for (; (i1<=33); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+7594)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]);
int i1 = 1;
for (; (i1<=32); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+6334)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+6370)] = 0.000000e+00;
}
for (; (i1<=33); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+6334)] = 0.000000e+00;
}
}
}
{
int i1 = 1;
for (; (i1<(2&(~1))); i1 += 1) {
xPos = posEnd[0];
}
__m128d vec1 = _mm_set1_pd(xPos);
for (; (i1<31); i1 += 4) {
/* xPos = posEnd[0]; */
__m128d vec0 = _mm_load1_pd((&posEnd[0]));
__m128d vec0_2 = _mm_load1_pd((&posEnd[0]));
vec1 = vec0;
vec1 = vec0_2;
}
for (; (i1<34); i1 += 1) {
xPos = posEnd[0];
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]);
int i1 = 1;
for (; (i1<=32); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+10114)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+10150)] = 0.000000e+00;
}
for (; (i1<=33); i1 += 1) {
fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+10114)] = 0.000000e+00;
}
}
}
}
}
if ((!neighbor_isValid[0][2])) {
{
double xPos;
double yPos;
/* Statements in this Scop: S1327, S1332, S1326, S1329, S1323, S1322, S1331, S1325, S1328, S1330, S1324 */
{
{
{
{
{
{
{
{
{
{
{
double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]);
int i2 = 2;
for (; (i2<=33); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+5076)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+5077)] = 0.000000e+00;
}
for (; (i2<=34); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+5076)] = 0.000000e+00;
}
}
{
double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]);
int i2 = 2;
for (; (i2<=33); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+8856)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+8857)] = 0.000000e+00;
}
for (; (i2<=34); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+8856)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]);
int i2 = 2;
for (; (i2<=33); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+6336)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+6337)] = 0.000000e+00;
}
for (; (i2<=34); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+6336)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]);
int i2 = 2;
for (; (i2<=33); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+7596)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+7597)] = 0.000000e+00;
}
for (; (i2<=34); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+7596)] = 0.000000e+00;
}
}
}
{
int i2 = 2;
for (; (i2<=33); i2 += 2) {
xPos = ((((i2-2)/3.200000e+01)*(posEnd[0]-posBegin[0]))+posBegin[0]);
xPos = ((((i2-1)/3.200000e+01)*(posEnd[0]-posBegin[0]))+posBegin[0]);
}
for (; (i2<=34); i2 += 1) {
xPos = ((((i2-2)/3.200000e+01)*(posEnd[0]-posBegin[0]))+posBegin[0]);
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]);
int i2 = 2;
for (; (i2<=33); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+36)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+37)] = 0.000000e+00;
}
for (; (i2<=34); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+36)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]);
int i2 = 2;
for (; (i2<=33); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+2556)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+2557)] = 0.000000e+00;
}
for (; (i2<=34); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+2556)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]);
int i2 = 2;
for (; (i2<=33); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+10116)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+10117)] = 0.000000e+00;
}
for (; (i2<=34); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+10116)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]);
int i2 = 2;
for (; (i2<=33); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+3816)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+3817)] = 0.000000e+00;
}
for (; (i2<=34); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+3816)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]);
int i2 = 2;
for (; (i2<=33); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+1296)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+1297)] = 0.000000e+00;
}
for (; (i2<=34); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+1296)] = 0.000000e+00;
}
}
}
{
int i2 = 2;
for (; (i2<=33); i2 += 2) {
yPos = posBegin[1];
yPos = posBegin[1];
}
for (; (i2<=34); i2 += 1) {
yPos = posBegin[1];
}
}
}
}
}
if ((!neighbor_isValid[0][3])) {
{
double xPos;
double yPos;
/* Statements in this Scop: S1338, S1341, S1335, S1340, S1343, S1337, S1334, S1333, S1342, S1336, S1339 */
{
{
{
{
{
{
{
{
{
{
{
double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]);
int i2 = 2;
for (; (i2<=33); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+7488)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+7489)] = 0.000000e+00;
}
for (; (i2<=34); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+7488)] = 0.000000e+00;
}
}
{
double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]);
int i2 = 2;
for (; (i2<=33); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+2448)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+2449)] = 0.000000e+00;
}
for (; (i2<=34); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+2448)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]);
int i2 = 2;
for (; (i2<=33); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+11268)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+11269)] = 0.000000e+00;
}
for (; (i2<=34); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+11268)] = 0.000000e+00;
}
}
}
{
int i2 = 2;
for (; (i2<=33); i2 += 2) {
yPos = posEnd[1];
yPos = posEnd[1];
}
for (; (i2<=34); i2 += 1) {
yPos = posEnd[1];
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]);
int i2 = 2;
for (; (i2<=33); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+10008)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+10009)] = 0.000000e+00;
}
for (; (i2<=34); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+10008)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]);
int i2 = 2;
for (; (i2<=33); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+6228)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+6229)] = 0.000000e+00;
}
for (; (i2<=34); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+6228)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]);
int i2 = 2;
for (; (i2<=33); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+8748)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+8749)] = 0.000000e+00;
}
for (; (i2<=34); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+8748)] = 0.000000e+00;
}
}
}
{
int i2 = 2;
for (; (i2<=33); i2 += 2) {
xPos = ((((i2-2)/3.200000e+01)*(posEnd[0]-posBegin[0]))+posBegin[0]);
xPos = ((((i2-1)/3.200000e+01)*(posEnd[0]-posBegin[0]))+posBegin[0]);
}
for (; (i2<=34); i2 += 1) {
xPos = ((((i2-2)/3.200000e+01)*(posEnd[0]-posBegin[0]))+posBegin[0]);
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]);
int i2 = 2;
for (; (i2<=33); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+3708)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+3709)] = 0.000000e+00;
}
for (; (i2<=34); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+3708)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]);
int i2 = 2;
for (; (i2<=33); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+1188)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+1189)] = 0.000000e+00;
}
for (; (i2<=34); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+1188)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]);
int i2 = 2;
for (; (i2<=33); i2 += 2) {
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+4968)] = 0.000000e+00;
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+4969)] = 0.000000e+00;
}
for (; (i2<=34); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_5_p1[(i2+4968)] = 0.000000e+00;
}
}
}
}
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) {
/* Statements in this Scop: S1344 */
for (int i3 = 0; (i3<=8); i3 += 1) {
double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][(i3*1260)]);
double* buffer_Send_1_p1 = (&buffer_Send[1][(i3*33)]);
int i4 = 1;
for (; (i4<=32); i4 += 2) {
buffer_Send_1_p1[(i4-1)] = fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)+34)];
buffer_Send_1_p1[i4] = fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)+70)];
}
for (; (i4<=33); i4 += 1) {
buffer_Send_1_p1[(i4-1)] = fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)+34)];
}
}
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) {
MPI_Isend(buffer_Send[1], 297, MPI_DOUBLE, neighbor_remoteRank[0][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]);
reqOutstanding_Send[1] = true;
}
}
}
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) {
MPI_Irecv(buffer_Recv[0], 297, MPI_DOUBLE, neighbor_remoteRank[0][0], ((unsigned int)(neighbor_fragCommId[0][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]);
reqOutstanding_Recv[0] = true;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if (reqOutstanding_Recv[0]) {
waitForMPIReq(&mpiRequest_Recv[0]);
reqOutstanding_Recv[0] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) {
/* Statements in this Scop: S1345 */
for (int i3 = 0; (i3<=8); i3 += 1) {
double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][(i3*1260)]);
double* buffer_Recv_0_p1 = (&buffer_Recv[0][(i3*33)]);
int i4 = 3;
for (; (i4<=34); i4 += 2) {
fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)-70)] = buffer_Recv_0_p1[(i4-3)];
fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)-34)] = buffer_Recv_0_p1[(i4-2)];
}
for (; (i4<=35); i4 += 1) {
fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)-70)] = buffer_Recv_0_p1[(i4-3)];
}
}
}
}
}
;
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if (reqOutstanding_Send[1]) {
waitForMPIReq(&mpiRequest_Send[1]);
reqOutstanding_Send[1] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
;
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][3]&&neighbor_isRemote[0][3])) {
MPI_Isend(&fieldData_LaplaceCoeff_GMRF[5][1190], 1, mpiDatatype_9_33_1260, neighbor_remoteRank[0][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]);
reqOutstanding_Send[3] = true;
}
}
}
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][2]&&neighbor_isRemote[0][2])) {
MPI_Irecv(&fieldData_LaplaceCoeff_GMRF[5][38], 1, mpiDatatype_9_33_1260, neighbor_remoteRank[0][2], ((unsigned int)(neighbor_fragCommId[0][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]);
reqOutstanding_Recv[2] = true;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if (reqOutstanding_Recv[2]) {
waitForMPIReq(&mpiRequest_Recv[2]);
reqOutstanding_Recv[2] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
;
}
}
;
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if (reqOutstanding_Send[3]) {
waitForMPIReq(&mpiRequest_Send[3]);
reqOutstanding_Send[3] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) {
/* Statements in this Scop: S1346 */
for (int i3 = 0; (i3<=8); i3 += 1) {
double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][(i3*1260)]);
double* buffer_Send_0_p1 = (&buffer_Send[0][(i3*35)]);
int i4 = 0;
for (; (i4<=33); i4 += 2) {
buffer_Send_0_p1[i4] = fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)+3)];
buffer_Send_0_p1[(i4+1)] = fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)+39)];
}
for (; (i4<=34); i4 += 1) {
buffer_Send_0_p1[i4] = fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)+3)];
}
}
}
if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) {
/* Statements in this Scop: S1347 */
for (int i3 = 0; (i3<=8); i3 += 1) {
double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][(i3*1260)]);
double* buffer_Send_1_p1 = (&buffer_Send[1][(i3*35)]);
int i4 = 0;
for (; (i4<=33); i4 += 2) {
buffer_Send_1_p1[i4] = fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)+33)];
buffer_Send_1_p1[(i4+1)] = fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)+69)];
}
for (; (i4<=34); i4 += 1) {
buffer_Send_1_p1[i4] = fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)+33)];
}
}
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) {
MPI_Isend(buffer_Send[0], 315, MPI_DOUBLE, neighbor_remoteRank[0][0], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][0]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[0]);
reqOutstanding_Send[0] = true;
}
if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) {
MPI_Isend(buffer_Send[1], 315, MPI_DOUBLE, neighbor_remoteRank[0][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]);
reqOutstanding_Send[1] = true;
}
}
}
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) {
MPI_Irecv(buffer_Recv[0], 315, MPI_DOUBLE, neighbor_remoteRank[0][0], ((unsigned int)(neighbor_fragCommId[0][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]);
reqOutstanding_Recv[0] = true;
}
if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) {
MPI_Irecv(buffer_Recv[1], 315, MPI_DOUBLE, neighbor_remoteRank[0][1], ((unsigned int)(neighbor_fragCommId[0][1]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[1]);
reqOutstanding_Recv[1] = true;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if (reqOutstanding_Recv[0]) {
waitForMPIReq(&mpiRequest_Recv[0]);
reqOutstanding_Recv[0] = false;
}
if (reqOutstanding_Recv[1]) {
waitForMPIReq(&mpiRequest_Recv[1]);
reqOutstanding_Recv[1] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) {
/* Statements in this Scop: S1348 */
for (int i3 = 0; (i3<=8); i3 += 1) {
double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][(i3*1260)]);
double* buffer_Recv_0_p1 = (&buffer_Recv[0][(i3*35)]);
int i4 = 1;
for (; (i4<=34); i4 += 2) {
fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)-35)] = buffer_Recv_0_p1[(i4-1)];
fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)+1)] = buffer_Recv_0_p1[i4];
}
for (; (i4<=35); i4 += 1) {
fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)-35)] = buffer_Recv_0_p1[(i4-1)];
}
}
}
if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) {
/* Statements in this Scop: S1349 */
for (int i3 = 0; (i3<=8); i3 += 1) {
double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][(i3*1260)]);
double* buffer_Recv_1_p1 = (&buffer_Recv[1][(i3*35)]);
int i4 = 35;
for (; (i4<=68); i4 += 2) {
fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)-1225)] = buffer_Recv_1_p1[(i4-35)];
fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)-1189)] = buffer_Recv_1_p1[(i4-34)];
}
for (; (i4<=69); i4 += 1) {
fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)-1225)] = buffer_Recv_1_p1[(i4-35)];
}
}
}
}
}
;
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if (reqOutstanding_Send[0]) {
waitForMPIReq(&mpiRequest_Send[0]);
reqOutstanding_Send[0] = false;
}
if (reqOutstanding_Send[1]) {
waitForMPIReq(&mpiRequest_Send[1]);
reqOutstanding_Send[1] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
;
;
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][2]&&neighbor_isRemote[0][2])) {
MPI_Isend(&fieldData_LaplaceCoeff_GMRF[5][73], 1, mpiDatatype_9_35_1260, neighbor_remoteRank[0][2], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][2]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[2]);
reqOutstanding_Send[2] = true;
}
if ((neighbor_isValid[0][3]&&neighbor_isRemote[0][3])) {
MPI_Isend(&fieldData_LaplaceCoeff_GMRF[5][1153], 1, mpiDatatype_9_35_1260, neighbor_remoteRank[0][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]);
reqOutstanding_Send[3] = true;
}
}
}
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][2]&&neighbor_isRemote[0][2])) {
MPI_Irecv(&fieldData_LaplaceCoeff_GMRF[5][1], 1, mpiDatatype_9_35_1260, neighbor_remoteRank[0][2], ((unsigned int)(neighbor_fragCommId[0][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]);
reqOutstanding_Recv[2] = true;
}
if ((neighbor_isValid[0][3]&&neighbor_isRemote[0][3])) {
MPI_Irecv(&fieldData_LaplaceCoeff_GMRF[5][1225], 1, mpiDatatype_9_35_1260, neighbor_remoteRank[0][3], ((unsigned int)(neighbor_fragCommId[0][3]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[3]);
reqOutstanding_Recv[3] = true;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if (reqOutstanding_Recv[2]) {
waitForMPIReq(&mpiRequest_Recv[2]);
reqOutstanding_Recv[2] = false;
}
if (reqOutstanding_Recv[3]) {
waitForMPIReq(&mpiRequest_Recv[3]);
reqOutstanding_Recv[3] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
;
;
}
}
;
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if (reqOutstanding_Send[2]) {
waitForMPIReq(&mpiRequest_Send[2]);
reqOutstanding_Send[2] = false;
}
if (reqOutstanding_Send[3]) {
waitForMPIReq(&mpiRequest_Send[3]);
reqOutstanding_Send[3] = false;
}
}
}
}
Exemplo n.º 19
0
void do_matrix_mult(int lda, double* A, double* B, double* C)
{

/*
	double* C = (double*) malloc(16*sizeof(double));
	int offset = 0;
	for (int j=0; j<4; j++) {
		for (int i=0; i<4; i++) {
			C[j*4+i] = C_t[i+offset];
		}
		offset += lda;
	}

	C[0] = C_t[0];
	C[1] = C_t[1];
	C[2] = C_t[2];
	C[3] = C_t[3];
	int offset = lda;
	C[4] = C_t[offset];
	C[5] = C_t[offset+1];
	C[6] = C_t[offset+2];
	C[7] = C_t[offset+3];
	offset += offset;
	C[8] = C_t[offset];
	C[9] = C_t[offset+1];
	C[10] = C_t[offset+2];
	C[11] = C_t[offset+3];
	offset += offset;
	C[12] = C_t[offset];
	C[13] = C_t[offset+1];
	C[14] = C_t[offset+2];
	C[15] = C_t[offset+3];
*/
	__m128d c1 = _mm_load_pd(C);
	__m128d c2 = _mm_load_pd(C+2);         //likewise, we are loading C[0,2] and C[0,3] here
	__m128d c3 = _mm_load_pd(C+lda);       //likewise, we are loading C[1,0] and C[1,1] here
	__m128d c4 = _mm_load_pd(C+lda+2);     //likewise, we are loading C[1,2] and C[1,3] here
	__m128d c5 = _mm_load_pd(C+2*lda);     //likewise, we are loading C[2,0] and C[2,1] here
	__m128d c6 = _mm_load_pd(C+2*lda+2);   //likewise, we are loading C[2,2] and C[2,3] here
	__m128d c7 = _mm_load_pd(C+3*lda);     //likewise, we are loading C[3,0] and C[3,1] here
	__m128d c8 = _mm_load_pd(C+3*lda +2);   //likewise, we are loading C[3,2] and C[3,3] here

	//__m128d r1, r2, r3, r4, r5, r6, r7, r8;

        for(int l=0; l<4; l+=1)
        {
		__m128d a1 = _mm_load1_pd(A+l);         
		__m128d a2 = _mm_load1_pd(A+l+lda);     
		__m128d a3 = _mm_load1_pd(A+l+2*lda);   
                __m128d a4 = _mm_load1_pd(A+l+3*lda);   
		__m128d b1 = _mm_load_pd(B+l*lda);     
                __m128d b2 = _mm_load_pd(B+l*lda+2);   
		c1 = _mm_add_pd(c1, _mm_mul_pd(a1, b1));
		c2 = _mm_add_pd(c2, _mm_mul_pd(a1, b2));
		c3 = _mm_add_pd(c3, _mm_mul_pd(a2, b1));
		c4 = _mm_add_pd(c4, _mm_mul_pd(a2, b2));
		c5 = _mm_add_pd(c5, _mm_mul_pd(a3, b1));
		c6 = _mm_add_pd(c6, _mm_mul_pd(a3, b2));
		c7 = _mm_add_pd(c7, _mm_mul_pd(a4, b1));
		c8 = _mm_add_pd(c8, _mm_mul_pd(a4, b2));

	}

	_mm_store_pd(C, c1);
	_mm_store_pd(C+2, c2);
	_mm_store_pd(C+lda, c3);
	_mm_store_pd(C+2+lda, c4);
	_mm_store_pd(C+2*lda, c5);
	_mm_store_pd(C+2+2*lda, c6);
	_mm_store_pd(C+3*lda, c7);
	_mm_store_pd(C+2+3*lda, c8);

}
Exemplo n.º 20
0
int
calc_gb_chainrule_sse2_double(int natoms, t_nblist *nl, double *dadx, double *dvda, 
                              double *x, double *f, double *fshift, double *shiftvec,
                              int gb_algorithm, gmx_genborn_t *born, t_mdatoms *md)						
{
	int    i,k,n,ii,jnr,ii3,is3,nj0,nj1,n0,n1;
	int	   jnrA,jnrB;
    int    j3A,j3B;
	int *  jjnr;
    
	double   rbi,shX,shY,shZ;
	double   *rb;
    
	__m128d ix,iy,iz;
	__m128d jx,jy,jz;
	__m128d fix,fiy,fiz;
	__m128d dx,dy,dz;
    __m128d tx,ty,tz;
    
	__m128d rbai,rbaj,f_gb, f_gb_ai;
	__m128d xmm1,xmm2,xmm3;
	
	const __m128d two = _mm_set1_pd(2.0);
    
	rb     = born->work; 
    
  jjnr   = nl->jjnr;
  
	/* Loop to get the proper form for the Born radius term, sse style */	
  n0 = 0;
  n1 = natoms;
    
	if(gb_algorithm==egbSTILL) 
	{
		for(i=n0;i<n1;i++)
		{
      rbi   = born->bRad[i];
			rb[i] = (2 * rbi * rbi * dvda[i])/ONE_4PI_EPS0;
		}
	}
	else if(gb_algorithm==egbHCT) 
	{
		for(i=n0;i<n1;i++)
		{
      rbi   = born->bRad[i];
			rb[i] = rbi * rbi * dvda[i];
		}
	}
	else if(gb_algorithm==egbOBC) 
	{
		for(i=n0;i<n1;i++)
		{
      rbi   = born->bRad[i];
			rb[i] = rbi * rbi * born->drobc[i] * dvda[i];
		}
	}
    
  jz = _mm_setzero_pd();
  
  n = j3A = j3B = 0;
  
	for(i=0;i<nl->nri;i++)
	{
    ii     = nl->iinr[i];
		ii3	   = ii*3;
    is3    = 3*nl->shift[i];     
    shX    = shiftvec[is3];  
    shY    = shiftvec[is3+1];
    shZ    = shiftvec[is3+2];
    nj0    = nl->jindex[i];      
    nj1    = nl->jindex[i+1];    
    
    ix     = _mm_set1_pd(shX+x[ii3+0]);
		iy     = _mm_set1_pd(shY+x[ii3+1]);
		iz     = _mm_set1_pd(shZ+x[ii3+2]);
    
		rbai   = _mm_load1_pd(rb+ii);			
		fix    = _mm_setzero_pd();
		fiy    = _mm_setzero_pd();
		fiz    = _mm_setzero_pd();	
    
        
    for(k=nj0;k<nj1-1;k+=2)
		{
			jnrA        = jjnr[k];   
			jnrB        = jjnr[k+1];
      
      j3A         = 3*jnrA;  
			j3B         = 3*jnrB;
            
      GMX_MM_LOAD_1RVEC_2POINTERS_PD(x+j3A,x+j3B,jx,jy,jz);
      
			dx          = _mm_sub_pd(ix,jx);
			dy          = _mm_sub_pd(iy,jy);
			dz          = _mm_sub_pd(iz,jz);
      
      GMX_MM_LOAD_2VALUES_PD(rb+jnrA,rb+jnrB,rbaj);
      
			/* load chain rule terms for j1-4 */
			f_gb        = _mm_load_pd(dadx);
			dadx += 2;
			f_gb_ai     = _mm_load_pd(dadx);
			dadx += 2;
			
      /* calculate scalar force */
      f_gb    = _mm_mul_pd(f_gb,rbai); 
      f_gb_ai = _mm_mul_pd(f_gb_ai,rbaj);
      f_gb    = _mm_add_pd(f_gb,f_gb_ai);
      
      tx     = _mm_mul_pd(f_gb,dx);
      ty     = _mm_mul_pd(f_gb,dy);
      tz     = _mm_mul_pd(f_gb,dz);
      
      fix    = _mm_add_pd(fix,tx);
      fiy    = _mm_add_pd(fiy,ty);
      fiz    = _mm_add_pd(fiz,tz);
      
      GMX_MM_DECREMENT_1RVEC_2POINTERS_PD(f+j3A,f+j3B,tx,ty,tz);
		}
    
		/*deal with odd elements */
		if(k<nj1) 
        {
          jnrA        = jjnr[k];   
          j3A         = 3*jnrA;  
          
          GMX_MM_LOAD_1RVEC_1POINTER_PD(x+j3A,jx,jy,jz);
          
          dx          = _mm_sub_sd(ix,jx);
          dy          = _mm_sub_sd(iy,jy);
          dz          = _mm_sub_sd(iz,jz);
          
          GMX_MM_LOAD_1VALUE_PD(rb+jnrA,rbaj);
          
          /* load chain rule terms */
          f_gb        = _mm_load_pd(dadx);
          dadx += 2;
          f_gb_ai     = _mm_load_pd(dadx);
          dadx += 2;
          
          /* calculate scalar force */
          f_gb    = _mm_mul_sd(f_gb,rbai); 
          f_gb_ai = _mm_mul_sd(f_gb_ai,rbaj);
          f_gb    = _mm_add_sd(f_gb,f_gb_ai);
          
          tx     = _mm_mul_sd(f_gb,dx);
          ty     = _mm_mul_sd(f_gb,dy);
          tz     = _mm_mul_sd(f_gb,dz);
          
          fix    = _mm_add_sd(fix,tx);
          fiy    = _mm_add_sd(fiy,ty);
          fiz    = _mm_add_sd(fiz,tz);
          
          GMX_MM_DECREMENT_1RVEC_1POINTER_PD(f+j3A,tx,ty,tz);
        } 
    
		/* fix/fiy/fiz now contain four partial force terms, that all should be
     * added to the i particle forces and shift forces. 
     */
 		gmx_mm_update_iforce_1atom_pd(&fix,&fiy,&fiz,f+ii3,fshift+is3);
	}	
  
	return 0;	
}
Exemplo n.º 21
0
void m2l_along_z(long long nmultipoles, double *scr1, double *scr2, double *d2, double *fr, double *sg)
{
  int mmmm,mmm,mm,m;
  int i,j,k,l,n,nn;

  __m128d reg00,reg01,reg02,reg03;
  __m128d reg04,reg05,reg06,reg07;
  __m128d reg08,reg09,reg10,reg11;
  __m128d reg12,reg13,reg14,reg15;
  __m128d reg16,reg17;                   /* register for rotation matrix TODO: rename regdmat1,regdmat2*/
  __m128d reg18,reg19;                   /* register for g,gl,glm */

  i = -15;

  __m128d regzero = _mm_setzero_pd();

  reg08 = regzero;
  reg09 = regzero;
  reg10 = regzero;
  reg11 = regzero;

  for(j=0;j<=nmultipoles;++j)
  {
    i += 16;

    reg00 = _mm_load_pd(&scr2[i-1]);
    reg01 = _mm_load_pd(&scr2[i+1]);
    reg04 = _mm_load_pd(&scr2[i+7]);
    reg05 = _mm_load_pd(&scr2[i+9]);

    reg18 = _mm_load1_pd(&fr[j]);

    reg08 = _mm_add_pd(reg08,_mm_mul_pd(reg00,reg18));
    reg09 = _mm_add_pd(reg09,_mm_mul_pd(reg01,reg18));
    reg12 = _mm_add_pd(reg12,_mm_mul_pd(reg04,reg18));
    reg13 = _mm_add_pd(reg13,_mm_mul_pd(reg05,reg18));
  }

  _mm_store_pd(&scr1[ 0],reg12);
  _mm_store_pd(&scr1[ 2],reg13);
  _mm_store_pd(&scr1[ 4],regzero);
  _mm_store_pd(&scr1[ 6],regzero);
  _mm_store_pd(&scr1[ 8],reg08);
  _mm_store_pd(&scr1[10],reg09);
  _mm_store_pd(&scr1[12],regzero);
  _mm_store_pd(&scr1[14],regzero);

  i = 1;

  for(l=1;l<=nmultipoles;++l)
  {
    i += 16 * l;
    j = -15;
    k = nmultipoles+l;

    reg08 = regzero;
    reg09 = regzero;
    reg12 = regzero;
    reg13 = regzero;

    for(m=l;m<=k;++m)
    {
      j += 16;

      reg00 = _mm_load_pd(&scr2[j-1]);
      reg01 = _mm_load_pd(&scr2[j+1]);
      reg04 = _mm_load_pd(&scr2[j+7]);
      reg05 = _mm_load_pd(&scr2[j+9]);

      reg18 = _mm_load1_pd(&fr[m]);

      reg08 = _mm_add_pd(reg08,_mm_mul_pd(reg00,reg18));
      reg09 = _mm_add_pd(reg09,_mm_mul_pd(reg01,reg18));
      reg12 = _mm_add_pd(reg12,_mm_mul_pd(reg04,reg18));
      reg13 = _mm_add_pd(reg13,_mm_mul_pd(reg05,reg18));
    }

    reg18 = _mm_load1_pd(&sg[l]);

    reg12 = _mm_mul_pd(reg12,reg18);
    _mm_store_pd(&scr1[i- 1],reg12);

    reg13 = _mm_mul_pd(reg13,reg18);
    _mm_store_pd(&scr1[i+ 1],reg13);

    _mm_store_pd(&scr1[i+ 3],regzero);
    _mm_store_pd(&scr1[i+ 5],regzero);

    reg08 = _mm_mul_pd(reg08,reg18);
    _mm_store_pd(&scr1[i+ 7],reg08);

    reg09 = _mm_mul_pd(reg09,reg18);
    _mm_store_pd(&scr1[i+ 9],reg09);

    _mm_store_pd(&scr1[i+11],regzero);
    _mm_store_pd(&scr1[i+13],regzero);
  }

  mm = 16 * nmultipoles;

  i = 1;
  n = mm+1;

  for(m=1;m<=nmultipoles;++m)
  {
    i += 16 * m;
    j = i;

    for(l=m;l<=nmultipoles;++l)
    {

      j += 16 * l;
      nn = n;
      k = m + l;
      mmm = nmultipoles + l;

      reg08 = regzero;
      reg09 = regzero;
      reg10 = regzero;
      reg11 = regzero;
      reg12 = regzero;
      reg13 = regzero;
      reg14 = regzero;
      reg15 = regzero;

      for(mmmm=k;mmmm<=mmm;++mmmm)
      {
        nn += 16;

        reg00 = _mm_load_pd(&scr2[nn- 1]);
        reg01 = _mm_load_pd(&scr2[nn+ 1]);
        reg02 = _mm_load_pd(&scr2[nn+ 3]);
        reg03 = _mm_load_pd(&scr2[nn+ 5]);
        reg04 = _mm_load_pd(&scr2[nn+ 7]);
        reg05 = _mm_load_pd(&scr2[nn+ 9]);
        reg06 = _mm_load_pd(&scr2[nn+11]);
        reg07 = _mm_load_pd(&scr2[nn+13]);

        reg18 = _mm_load1_pd(&fr[mmmm]);

        reg08 = _mm_add_pd(reg08,_mm_mul_pd(reg00,reg18));
        reg09 = _mm_add_pd(reg09,_mm_mul_pd(reg01,reg18));

        reg10 = _mm_sub_pd(reg10,_mm_mul_pd(reg02,reg18));
        reg11 = _mm_sub_pd(reg11,_mm_mul_pd(reg03,reg18));

        reg12 = _mm_add_pd(reg12,_mm_mul_pd(reg04,reg18));
        reg13 = _mm_add_pd(reg13,_mm_mul_pd(reg05,reg18));

        reg14 = _mm_sub_pd(reg14,_mm_mul_pd(reg06,reg18));
        reg15 = _mm_sub_pd(reg15,_mm_mul_pd(reg07,reg18));
      }

      reg18 = _mm_load1_pd(&sg[k]);

      reg12 = _mm_mul_pd(reg12,reg18);
      _mm_store_pd(&scr1[j- 1],reg12);

      reg13 = _mm_mul_pd(reg13,reg18);
      _mm_store_pd(&scr1[j+ 1],reg13);

      reg14 = _mm_mul_pd(reg14,reg18);
      _mm_store_pd(&scr1[j+ 3],reg14);

      reg15 = _mm_mul_pd(reg15,reg18);
      _mm_store_pd(&scr1[j+ 5],reg15);

      reg08 = _mm_mul_pd(reg08,reg18);
      _mm_store_pd(&scr1[j+ 7],reg08);

      reg09 = _mm_mul_pd(reg09,reg18);
      _mm_store_pd(&scr1[j+ 9],reg09);

      reg10 = _mm_mul_pd(reg10,reg18);
      _mm_store_pd(&scr1[j+11],reg10);

      reg11 = _mm_mul_pd(reg11,reg18);
      _mm_store_pd(&scr1[j+13],reg11);
    }

    n += mm;
    mm -= 16;
  }
}
Exemplo n.º 22
0
static inline __m128d
my_invrsq_pd(__m128d x)
{
	const __m128d three = (const __m128d) {3.0f, 3.0f};
	const __m128d half  = (const __m128d) {0.5f, 0.5f};
	
	__m128  t  = _mm_rsqrt_ps(_mm_cvtpd_ps(x)); /* Convert to single precision and do _mm_rsqrt_ps() */
	__m128d t1 = _mm_cvtps_pd(t); /* Convert back to double precision */
	
	/* First Newton-Rapson step, accuracy is now 24 bits */
	__m128d t2 = _mm_mul_pd(half,_mm_mul_pd(t1,_mm_sub_pd(three,_mm_mul_pd(x,_mm_mul_pd(t1,t1)))));
	
	/* Return second Newton-Rapson step, accuracy 48 bits */
	return (__m128d) _mm_mul_pd(half,_mm_mul_pd(t2,_mm_sub_pd(three,_mm_mul_pd(x,_mm_mul_pd(t2,t2)))));
}

/* to extract single integers from a __m128i datatype */
#define _mm_extract_epi64(x, imm) \
    _mm_cvtsi128_si32(_mm_srli_si128((x), 4 * (imm)))
	
void nb_kernel400_x86_64_sse2(int *           p_nri,
                    int *           iinr,
                    int *           jindex,
                    int *           jjnr,
                    int *           shift,
                    double *         shiftvec,
                    double *         fshift,
                    int *           gid,
                    double *         pos,
                    double *         faction,
                    double *         charge,
                    double *         p_facel,
                    double *         p_krf,
                    double *         p_crf,
                    double *         Vc,
                    int *           type,
                    int *           p_ntype,
                    double *         vdwparam,
                    double *         Vvdw,
                    double *         p_tabscale,
                    double *         VFtab,
                    double *         invsqrta,
                    double *         dvda,
                    double *         p_gbtabscale,
                    double *         GBtab,
                    int *           p_nthreads,
                    int *           count,
                    void *          mtx,
                    int *           outeriter,
                    int *           inneriter,
                    double *         work)
{
	int           nri,ntype,nthreads,offset;
	int           n,ii,is3,ii3,k,nj0,nj1,jnr1,jnr2,j13,j23,ggid;
	double        facel,krf,crf,tabscl,gbtabscl,vct,vgbt;
	double        shX,shY,shZ,isai_d,dva;
	gmx_gbdata_t *gbdata;
	float *        gpol;

	__m128d       ix,iy,iz,jx,jy,jz;
	__m128d		  dx,dy,dz,t1,t2,t3;
	__m128d		  fix,fiy,fiz,rsq11,rinv,r,fscal,rt,eps,eps2;
	__m128d		  q,iq,qq,isai,isaj,isaprod,vcoul,gbscale,dvdai,dvdaj;
	__m128d       Y,F,G,H,Fp,VV,FF,vgb,fijC,dvdatmp,dvdasum,vctot,vgbtot,n0d;
	__m128d		  xmm0,xmm1,xmm2,xmm3,xmm4,xmm5,xmm6,xmm7,xmm8;
	__m128d       fac,tabscale,gbtabscale;
	__m128i       n0,nnn;
	
	const __m128d neg    = {-1.0f,-1.0f};
	const __m128d zero   = {0.0f,0.0f};
	const __m128d half   = {0.5f,0.5f};
	const __m128d two    = {2.0f,2.0f};
	const __m128d three  = {3.0f,3.0f};
	
	gbdata     = (gmx_gbdata_t *)work;
	gpol       = gbdata->gpol;

	nri        = *p_nri;
	ntype      = *p_ntype;
	nthreads   = *p_nthreads; 
    facel      = (*p_facel) * (1.0 - (1.0/gbdata->gb_epsilon_solvent));       
	krf        = *p_krf;
	crf        = *p_crf;
	tabscl     = *p_tabscale;
	gbtabscl   = *p_gbtabscale;
	nj1        = 0;
	
	/* Splat variables */
	fac        = _mm_load1_pd(&facel);
	tabscale   = _mm_load1_pd(&tabscl);
	gbtabscale = _mm_load1_pd(&gbtabscl);
		
	/* Keep compiler happy */
	dvdatmp = _mm_setzero_pd();
	vgb     = _mm_setzero_pd();
	dvdaj   = _mm_setzero_pd();
	isaj    = _mm_setzero_pd();
	vcoul   = _mm_setzero_pd();
	t1      = _mm_setzero_pd();
	t2      = _mm_setzero_pd();
	t3      = _mm_setzero_pd();

	jnr1=jnr2=0;
	j13=j23=0;
	
	for(n=0;n<nri;n++)
	{
		is3     = 3*shift[n];
		shX     = shiftvec[is3];
		shY     = shiftvec[is3+1];
		shZ     = shiftvec[is3+2];
		
		nj0     = jindex[n];      
        nj1     = jindex[n+1];  
		offset  = (nj1-nj0)%2;
		
		ii      = iinr[n];
		ii3     = ii*3;
		
		ix      = _mm_set1_pd(shX+pos[ii3+0]);
		iy      = _mm_set1_pd(shX+pos[ii3+1]);
		iz      = _mm_set1_pd(shX+pos[ii3+2]); 
		q       = _mm_set1_pd(charge[ii]);
		
		iq      = _mm_mul_pd(fac,q); 
		isai_d  = invsqrta[ii];
		isai    = _mm_load1_pd(&isai_d);
		
		fix     = _mm_setzero_pd();
		fiy     = _mm_setzero_pd();
		fiz     = _mm_setzero_pd();
		dvdasum = _mm_setzero_pd();
		vctot   = _mm_setzero_pd();
		vgbtot  = _mm_setzero_pd();
		
		for(k=nj0;k<nj1-offset; k+=2)
		{
			jnr1    = jjnr[k];
			jnr2    = jjnr[k+1];
						
			j13     = jnr1 * 3;
			j23     = jnr2 * 3;
			
			/* Load coordinates */
			xmm1    = _mm_loadu_pd(pos+j13); /* x1 y1 */
			xmm2    = _mm_loadu_pd(pos+j23); /* x2 y2 */
			
			xmm5    = _mm_load_sd(pos+j13+2); /* z1 - */
			xmm6    = _mm_load_sd(pos+j23+2); /* z2 - */
			
			/* transpose */
			jx      = _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(0,0)); 
			jy      = _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(1,1)); 
			jz      = _mm_shuffle_pd(xmm5,xmm6,_MM_SHUFFLE2(0,0)); 
			
			/* distances */
			dx      = _mm_sub_pd(ix,jx);
			dy		= _mm_sub_pd(iy,jy);
			dz		= _mm_sub_pd(iz,jz);
			
			rsq11   = _mm_add_pd( _mm_add_pd( _mm_mul_pd(dx,dx) , _mm_mul_pd(dy,dy) ) , _mm_mul_pd(dz,dz) );
			rinv    = my_invrsq_pd(rsq11);
						
			/* Load invsqrta */
			isaj	= _mm_loadl_pd(isaj,invsqrta+jnr1);
			isaj	= _mm_loadh_pd(isaj,invsqrta+jnr2);
			isaprod = _mm_mul_pd(isai,isaj);
			
			/* Load charges */
			q		= _mm_loadl_pd(q,charge+jnr1);
			q		= _mm_loadh_pd(q,charge+jnr2);
			qq		= _mm_mul_pd(iq,q);
			
			vcoul	= _mm_mul_pd(qq,rinv);
			fscal	= _mm_mul_pd(vcoul,rinv);
			qq		= _mm_mul_pd(isaprod,qq);
			qq		= _mm_mul_pd(qq,neg);
			gbscale	= _mm_mul_pd(isaprod,gbtabscale);
			
			/* Load dvdaj */
			dvdaj	= _mm_loadl_pd(dvdaj, dvda+jnr1);
			dvdaj	= _mm_loadh_pd(dvdaj, dvda+jnr2);
			
			r		= _mm_mul_pd(rsq11,rinv);
			rt		= _mm_mul_pd(r,gbscale);
			n0		= _mm_cvttpd_epi32(rt);
			n0d		= _mm_cvtepi32_pd(n0);
			eps		= _mm_sub_pd(rt,n0d);
			eps2	= _mm_mul_pd(eps,eps);
			
			nnn		= _mm_slli_epi64(n0,2);
			
			xmm1	= _mm_load_pd(GBtab+(_mm_extract_epi64(nnn,0)));   /* Y1 F1 */
			xmm2	= _mm_load_pd(GBtab+(_mm_extract_epi64(nnn,1)));   /* Y2 F2 */
			xmm3	= _mm_load_pd(GBtab+(_mm_extract_epi64(nnn,0))+2); /* G1 H1 */
			xmm4	= _mm_load_pd(GBtab+(_mm_extract_epi64(nnn,1))+2); /* G2 H2 */
			
			Y		= _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(0,0)); /* Y1 Y2 */
			F		= _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(1,1)); /* F1 F2 */
			G		= _mm_shuffle_pd(xmm3,xmm4,_MM_SHUFFLE2(0,0)); /* G1 G2 */
			H		= _mm_shuffle_pd(xmm3,xmm4,_MM_SHUFFLE2(1,1)); /* H1 H2 */
			
			G		= _mm_mul_pd(G,eps);
			H		= _mm_mul_pd(H,eps2);
			Fp		= _mm_add_pd(F,G);
			Fp		= _mm_add_pd(Fp,H);
			VV		= _mm_mul_pd(Fp,eps);
			VV		= _mm_add_pd(Y,VV);
			H		= _mm_mul_pd(two,H);
			FF		= _mm_add_pd(Fp,G);
			FF		= _mm_add_pd(FF,H);
			vgb		= _mm_mul_pd(qq,VV);
			fijC	= _mm_mul_pd(qq,FF);
			fijC	= _mm_mul_pd(fijC,gbscale);
			
			dvdatmp = _mm_mul_pd(fijC,r);
			dvdatmp	= _mm_add_pd(vgb,dvdatmp);
			dvdatmp = _mm_mul_pd(dvdatmp,neg);
			dvdatmp = _mm_mul_pd(dvdatmp,half);
			dvdasum	= _mm_add_pd(dvdasum,dvdatmp);
			
			xmm1	= _mm_mul_pd(dvdatmp,isaj);
			xmm1	= _mm_mul_pd(xmm1,isaj);
			dvdaj	= _mm_add_pd(dvdaj,xmm1);
			
			/* store dvda */
			_mm_storel_pd(dvda+jnr1,dvdaj);
			_mm_storeh_pd(dvda+jnr2,dvdaj);
			
			vctot	= _mm_add_pd(vctot,vcoul);
			vgbtot  = _mm_add_pd(vgbtot,vgb);
					
			fscal	= _mm_sub_pd(fijC,fscal);
			fscal	= _mm_mul_pd(fscal,neg);
			fscal	= _mm_mul_pd(fscal,rinv);
						
			/* calculate partial force terms */
			t1		= _mm_mul_pd(fscal,dx);
			t2		= _mm_mul_pd(fscal,dy);
			t3		= _mm_mul_pd(fscal,dz);
			
			/* update the i force */
			fix		= _mm_add_pd(fix,t1);
			fiy		= _mm_add_pd(fiy,t2);
			fiz		= _mm_add_pd(fiz,t3);
			
			/* accumulate forces from memory */
			xmm1	= _mm_loadu_pd(faction+j13); /* fx1 fy1 */
			xmm2	= _mm_loadu_pd(faction+j23); /* fx2 fy2 */
			
			xmm5	= _mm_load1_pd(faction+j13+2); /* fz1 fz1 */
			xmm6	= _mm_load1_pd(faction+j23+2); /* fz2 fz2 */
			
			/* transpose */
			xmm7	= _mm_shuffle_pd(xmm5,xmm6,_MM_SHUFFLE2(0,0)); /* fz1 fz2 */
			xmm5	= _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(0,0)); /* fx1 fx2 */
			xmm6	= _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(1,1)); /* fy1 fy2 */
			
			/* subtract partial forces */
			xmm5	= _mm_sub_pd(xmm5,t1);
			xmm6	= _mm_sub_pd(xmm6,t2);
			xmm7	= _mm_sub_pd(xmm7,t3);
			
			xmm1	= _mm_shuffle_pd(xmm5,xmm6,_MM_SHUFFLE2(0,0)); /* fx1 fy1 */
			xmm2	= _mm_shuffle_pd(xmm5,xmm6,_MM_SHUFFLE2(1,1)); /* fy1 fy2 */
			
			/* store fx and fy */
			_mm_storeu_pd(faction+j13,xmm1);
			_mm_storeu_pd(faction+j23,xmm2);
			
			/* .. then fz */
			_mm_storel_pd(faction+j13+2,xmm7);
			_mm_storel_pd(faction+j23+2,xmm7);
		}

		/* In double precision, offset can only be either 0 or 1 */
		if(offset!=0)
		{
			jnr1	= jjnr[k];
			j13		= jnr1*3;
			
			jx      = _mm_load_sd(pos+j13);
			jy      = _mm_load_sd(pos+j13+1);
			jz      = _mm_load_sd(pos+j13+2);
						
			isaj	= _mm_load_sd(invsqrta+jnr1);
			isaprod = _mm_mul_sd(isai,isaj);
			dvdaj	= _mm_load_sd(dvda+jnr1);
			q		= _mm_load_sd(charge+jnr1);
			qq      = _mm_mul_sd(iq,q);
			
			dx      = _mm_sub_sd(ix,jx);
			dy		= _mm_sub_sd(iy,jy);
			dz		= _mm_sub_sd(iz,jz);
			
			rsq11   = _mm_add_pd( _mm_add_pd( _mm_mul_pd(dx,dx) , _mm_mul_pd(dy,dy) ) , _mm_mul_pd(dz,dz) );
			rinv    = my_invrsq_pd(rsq11);
						
			vcoul	= _mm_mul_sd(qq,rinv);
			fscal	= _mm_mul_sd(vcoul,rinv);
			qq		= _mm_mul_sd(isaprod,qq);
			qq		= _mm_mul_sd(qq,neg);
			gbscale	= _mm_mul_sd(isaprod,gbtabscale);
			
			r		= _mm_mul_sd(rsq11,rinv);
			rt		= _mm_mul_sd(r,gbscale);
			n0		= _mm_cvttpd_epi32(rt);
			n0d		= _mm_cvtepi32_pd(n0);
			eps		= _mm_sub_sd(rt,n0d);
			eps2	= _mm_mul_sd(eps,eps);
			
			nnn		= _mm_slli_epi64(n0,2);
			
			xmm1	= _mm_load_pd(GBtab+(_mm_extract_epi64(nnn,0))); 
			xmm2	= _mm_load_pd(GBtab+(_mm_extract_epi64(nnn,1))); 
			xmm3	= _mm_load_pd(GBtab+(_mm_extract_epi64(nnn,0))+2); 
			xmm4	= _mm_load_pd(GBtab+(_mm_extract_epi64(nnn,1))+2); 
			
			Y		= _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(0,0)); 
			F		= _mm_shuffle_pd(xmm1,xmm2,_MM_SHUFFLE2(1,1)); 
			G		= _mm_shuffle_pd(xmm3,xmm4,_MM_SHUFFLE2(0,0)); 
			H		= _mm_shuffle_pd(xmm3,xmm4,_MM_SHUFFLE2(1,1)); 
			
			G		= _mm_mul_sd(G,eps);
			H		= _mm_mul_sd(H,eps2);
			Fp		= _mm_add_sd(F,G);
			Fp		= _mm_add_sd(Fp,H);
			VV		= _mm_mul_sd(Fp,eps);
			VV		= _mm_add_sd(Y,VV);
			H		= _mm_mul_sd(two,H);
			FF		= _mm_add_sd(Fp,G);
			FF		= _mm_add_sd(FF,H);
			vgb		= _mm_mul_sd(qq,VV);
			fijC	= _mm_mul_sd(qq,FF);
			fijC	= _mm_mul_sd(fijC,gbscale);
			
			dvdatmp = _mm_mul_sd(fijC,r);
			dvdatmp	= _mm_add_sd(vgb,dvdatmp);
			dvdatmp = _mm_mul_sd(dvdatmp,neg);
			dvdatmp = _mm_mul_sd(dvdatmp,half);
			dvdasum	= _mm_add_sd(dvdasum,dvdatmp);
			
			xmm1	= _mm_mul_sd(dvdatmp,isaj);
			xmm1	= _mm_mul_sd(xmm1,isaj);
			dvdaj	= _mm_add_sd(dvdaj,xmm1);
			
			/* store dvda */
			_mm_storel_pd(dvda+jnr1,dvdaj);
			
			vctot	= _mm_add_sd(vctot,vcoul);
			vgbtot  = _mm_add_sd(vgbtot,vgb);
						
			fscal	= _mm_sub_sd(fijC,fscal);
			fscal	= _mm_mul_sd(fscal,neg);
			fscal	= _mm_mul_sd(fscal,rinv);
								
			/* calculate partial force terms */
			t1		= _mm_mul_sd(fscal,dx);
			t2		= _mm_mul_sd(fscal,dy);
			t3		= _mm_mul_sd(fscal,dz);
			
			/* update the i force */
			fix		= _mm_add_sd(fix,t1);
			fiy		= _mm_add_sd(fiy,t2);
			fiz		= _mm_add_sd(fiz,t3);
			
			/* accumulate forces from memory */
			xmm5	= _mm_load_sd(faction+j13);   /* fx */
			xmm6    = _mm_load_sd(faction+j13+1); /* fy */
			xmm7    = _mm_load_sd(faction+j13+2); /* fz */
						
			/* subtract partial forces */
			xmm5	= _mm_sub_sd(xmm5,t1);
			xmm6	= _mm_sub_sd(xmm6,t2);
			xmm7	= _mm_sub_sd(xmm7,t3);
			
			/* store forces */
			_mm_store_sd(faction+j13,xmm5);
			_mm_store_sd(faction+j13+1,xmm6);
			_mm_store_sd(faction+j13+2,xmm7);
		}
		
		/* fix/fiy/fiz now contain four partial terms, that all should be
		 * added to the i particle forces
		 */
		t1		 = _mm_unpacklo_pd(t1,fix);
		t2		 = _mm_unpacklo_pd(t2,fiy);
		t3		 = _mm_unpacklo_pd(t3,fiz);
				
		fix		 = _mm_add_pd(fix,t1);
		fiy		 = _mm_add_pd(fiy,t2);
		fiz		 = _mm_add_pd(fiz,t3);
		
		fix      = _mm_shuffle_pd(fix,fix,_MM_SHUFFLE2(1,1));
		fiy      = _mm_shuffle_pd(fiy,fiy,_MM_SHUFFLE2(1,1));
		fiz      = _mm_shuffle_pd(fiz,fiz,_MM_SHUFFLE2(1,1));
		
		/* Load i forces from memory */
		xmm1     = _mm_load_sd(faction+ii3);
		xmm2     = _mm_load_sd(faction+ii3+1);
		xmm3     = _mm_load_sd(faction+ii3+2);
		
		/* Add to i force */
		fix      = _mm_add_sd(fix,xmm1);
		fiy      = _mm_add_sd(fiy,xmm2);
		fiz      = _mm_add_sd(fiz,xmm3);
	
		/* store i forces to memory */
		_mm_store_sd(faction+ii3,fix);
		_mm_store_sd(faction+ii3+1,fiy);
		_mm_store_sd(faction+ii3+2,fiz);
				
		/* now do dvda */
		dvdatmp  = _mm_unpacklo_pd(dvdatmp,dvdasum);
		dvdasum  = _mm_add_pd(dvdasum,dvdatmp);
		_mm_storeh_pd(&dva,dvdasum);
		dvda[ii] = dvda[ii] + dva*isai_d*isai_d;
		
		ggid	 = gid[n];
		
		/* Coulomb potential */
		vcoul	 = _mm_unpacklo_pd(vcoul,vctot);
		vctot	 = _mm_add_pd(vctot,vcoul);
		_mm_storeh_pd(&vct,vctot);
		Vc[ggid] = Vc[ggid] + vct;
		
		/* GB potential */
		vgb  	 = _mm_unpacklo_pd(vgb,vgbtot);
		vgbtot	 = _mm_add_pd(vgbtot,vgb);
		_mm_storeh_pd(&vgbt,vgbtot);
		gpol[ggid] = gpol[ggid] + vgbt;
	}
	
	*outeriter   = nri;            
    *inneriter   = nj1; 
	
}
Exemplo n.º 23
0
void exchlaplacecoeffData_7(unsigned int slot) {
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((!neighbor_isValid[1][0])) {
{
double xPos;
double yPos;
/* Statements in this Scop: S956, S958, S952, S955, S960, S954, S957, S951, S959, S950, S953 */
{
{
{
{
{
{
{
{
{
{
{
double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]);
int i1 = 1;
for (; (i1<=128); i1 += 2) {
fieldData_LaplaceCoeff_7_p1[((i1*132)+138338)] = 0.000000e+00;
fieldData_LaplaceCoeff_7_p1[((i1*132)+138470)] = 0.000000e+00;
}
for (; (i1<=129); i1 += 1) {
fieldData_LaplaceCoeff_7_p1[((i1*132)+138338)] = 0.000000e+00;
}
}
{
double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]);
int i1 = 1;
for (; (i1<=128); i1 += 2) {
fieldData_LaplaceCoeff_7_p1[((i1*132)+34586)] = 0.000000e+00;
fieldData_LaplaceCoeff_7_p1[((i1*132)+34718)] = 0.000000e+00;
}
for (; (i1<=129); i1 += 1) {
fieldData_LaplaceCoeff_7_p1[((i1*132)+34586)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]);
int i1 = 1;
for (; (i1<=128); i1 += 2) {
fieldData_LaplaceCoeff_7_p1[((i1*132)+103754)] = 0.000000e+00;
fieldData_LaplaceCoeff_7_p1[((i1*132)+103886)] = 0.000000e+00;
}
for (; (i1<=129); i1 += 1) {
fieldData_LaplaceCoeff_7_p1[((i1*132)+103754)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]);
int i1 = 1;
for (; (i1<=128); i1 += 2) {
fieldData_LaplaceCoeff_7_p1[((i1*132)+2)] = 0.000000e+00;
fieldData_LaplaceCoeff_7_p1[((i1*132)+134)] = 0.000000e+00;
}
for (; (i1<=129); i1 += 1) {
fieldData_LaplaceCoeff_7_p1[((i1*132)+2)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]);
int i1 = 1;
for (; (i1<=128); i1 += 2) {
fieldData_LaplaceCoeff_7_p1[((i1*132)+69170)] = 0.000000e+00;
fieldData_LaplaceCoeff_7_p1[((i1*132)+69302)] = 0.000000e+00;
}
for (; (i1<=129); i1 += 1) {
fieldData_LaplaceCoeff_7_p1[((i1*132)+69170)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]);
int i1 = 1;
for (; (i1<=128); i1 += 2) {
fieldData_LaplaceCoeff_7_p1[((i1*132)+121046)] = 0.000000e+00;
fieldData_LaplaceCoeff_7_p1[((i1*132)+121178)] = 0.000000e+00;
}
for (; (i1<=129); i1 += 1) {
fieldData_LaplaceCoeff_7_p1[((i1*132)+121046)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]);
int i1 = 1;
for (; (i1<=128); i1 += 2) {
fieldData_LaplaceCoeff_7_p1[((i1*132)+51878)] = 0.000000e+00;
fieldData_LaplaceCoeff_7_p1[((i1*132)+52010)] = 0.000000e+00;
}
for (; (i1<=129); i1 += 1) {
fieldData_LaplaceCoeff_7_p1[((i1*132)+51878)] = 0.000000e+00;
}
}
}
{
int i1 = 1;
for (; (i1<(2&(~1))); i1 += 1) {
xPos = posBegin[0];
}
__m128d vec1 = _mm_set1_pd(xPos);
for (; (i1<127); i1 += 4) {
/* xPos = posBegin[0]; */
__m128d vec0 = _mm_load1_pd((&posBegin[0]));
__m128d vec0_2 = _mm_load1_pd((&posBegin[0]));
vec1 = vec0;
vec1 = vec0_2;
}
for (; (i1<130); i1 += 1) {
xPos = posBegin[0];
}
}
}
{
double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]);
int i1 = 1;
for (; (i1<=128); i1 += 2) {
fieldData_LaplaceCoeff_7_p1[((i1*132)+86462)] = 0.000000e+00;
fieldData_LaplaceCoeff_7_p1[((i1*132)+86594)] = 0.000000e+00;
}
for (; (i1<=129); i1 += 1) {
fieldData_LaplaceCoeff_7_p1[((i1*132)+86462)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]);
int i1 = 1;
for (; (i1<=128); i1 += 2) {
fieldData_LaplaceCoeff_7_p1[((i1*132)+17294)] = 0.000000e+00;
fieldData_LaplaceCoeff_7_p1[((i1*132)+17426)] = 0.000000e+00;
}
for (; (i1<=129); i1 += 1) {
fieldData_LaplaceCoeff_7_p1[((i1*132)+17294)] = 0.000000e+00;
}
}
}
{
int i1 = 1;
for (; (i1<(2&(~1))); i1 += 1) {
yPos = ((((i1-1)/1.280000e+02)*(posEnd[1]-posBegin[1]))+posBegin[1]);
}
__m128d vec1 = _mm_set1_pd(1.000000e+00);
__m128d vec2 = _mm_set1_pd(1.280000e+02);
__m128d vec5 = _mm_set1_pd(yPos);
for (; (i1<127); i1 += 4) {
/* yPos = ((((i1-1)/1.280000e+02)*(posEnd[1]-posBegin[1]))+posBegin[1]); */
__m128d vec0 = _mm_set_pd(i1+1,i1);
__m128d vec0_2 = _mm_set_pd(i1+1,i1);
__m128d vec3 = _mm_load1_pd((&posEnd[1]));
__m128d vec3_2 = _mm_load1_pd((&posEnd[1]));
__m128d vec4 = _mm_load1_pd((&posBegin[1]));
__m128d vec4_2 = _mm_load1_pd((&posBegin[1]));
vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4);
vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2);
}
for (; (i1<130); i1 += 1) {
yPos = ((((i1-1)/1.280000e+02)*(posEnd[1]-posBegin[1]))+posBegin[1]);
}
}
}
}
}
if ((!neighbor_isValid[1][1])) {
{
double xPos;
double yPos;
/* Statements in this Scop: S962, S961, S970, S964, S967, S966, S969, S963, S971, S965, S968 */
{
{
{
{
{
{
{
{
{
{
{
double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]);
int i1 = 1;
for (; (i1<=128); i1 += 2) {
fieldData_LaplaceCoeff_7_p1[((i1*132)+121174)] = 0.000000e+00;
fieldData_LaplaceCoeff_7_p1[((i1*132)+121306)] = 0.000000e+00;
}
for (; (i1<=129); i1 += 1) {
fieldData_LaplaceCoeff_7_p1[((i1*132)+121174)] = 0.000000e+00;
}
}
{
double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]);
int i1 = 1;
for (; (i1<=128); i1 += 2) {
fieldData_LaplaceCoeff_7_p1[((i1*132)+138466)] = 0.000000e+00;
fieldData_LaplaceCoeff_7_p1[((i1*132)+138598)] = 0.000000e+00;
}
for (; (i1<=129); i1 += 1) {
fieldData_LaplaceCoeff_7_p1[((i1*132)+138466)] = 0.000000e+00;
}
}
}
{
int i1 = 1;
for (; (i1<(2&(~1))); i1 += 1) {
yPos = ((((i1-1)/1.280000e+02)*(posEnd[1]-posBegin[1]))+posBegin[1]);
}
__m128d vec1 = _mm_set1_pd(1.000000e+00);
__m128d vec2 = _mm_set1_pd(1.280000e+02);
__m128d vec5 = _mm_set1_pd(yPos);
for (; (i1<127); i1 += 4) {
/* yPos = ((((i1-1)/1.280000e+02)*(posEnd[1]-posBegin[1]))+posBegin[1]); */
__m128d vec0 = _mm_set_pd(i1+1,i1);
__m128d vec0_2 = _mm_set_pd(i1+1,i1);
__m128d vec3 = _mm_load1_pd((&posEnd[1]));
__m128d vec3_2 = _mm_load1_pd((&posEnd[1]));
__m128d vec4 = _mm_load1_pd((&posBegin[1]));
__m128d vec4_2 = _mm_load1_pd((&posBegin[1]));
vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4);
vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2);
}
for (; (i1<130); i1 += 1) {
yPos = ((((i1-1)/1.280000e+02)*(posEnd[1]-posBegin[1]))+posBegin[1]);
}
}
}
{
double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]);
int i1 = 1;
for (; (i1<=128); i1 += 2) {
fieldData_LaplaceCoeff_7_p1[((i1*132)+103882)] = 0.000000e+00;
fieldData_LaplaceCoeff_7_p1[((i1*132)+104014)] = 0.000000e+00;
}
for (; (i1<=129); i1 += 1) {
fieldData_LaplaceCoeff_7_p1[((i1*132)+103882)] = 0.000000e+00;
}
}
}
{
int i1 = 1;
for (; (i1<(2&(~1))); i1 += 1) {
xPos = posEnd[0];
}
__m128d vec1 = _mm_set1_pd(xPos);
for (; (i1<127); i1 += 4) {
/* xPos = posEnd[0]; */
__m128d vec0 = _mm_load1_pd((&posEnd[0]));
__m128d vec0_2 = _mm_load1_pd((&posEnd[0]));
vec1 = vec0;
vec1 = vec0_2;
}
for (; (i1<130); i1 += 1) {
xPos = posEnd[0];
}
}
}
{
double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]);
int i1 = 1;
for (; (i1<=128); i1 += 2) {
fieldData_LaplaceCoeff_7_p1[((i1*132)+130)] = 0.000000e+00;
fieldData_LaplaceCoeff_7_p1[((i1*132)+262)] = 0.000000e+00;
}
for (; (i1<=129); i1 += 1) {
fieldData_LaplaceCoeff_7_p1[((i1*132)+130)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]);
int i1 = 1;
for (; (i1<=128); i1 += 2) {
fieldData_LaplaceCoeff_7_p1[((i1*132)+69298)] = 0.000000e+00;
fieldData_LaplaceCoeff_7_p1[((i1*132)+69430)] = 0.000000e+00;
}
for (; (i1<=129); i1 += 1) {
fieldData_LaplaceCoeff_7_p1[((i1*132)+69298)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]);
int i1 = 1;
for (; (i1<=128); i1 += 2) {
fieldData_LaplaceCoeff_7_p1[((i1*132)+52006)] = 0.000000e+00;
fieldData_LaplaceCoeff_7_p1[((i1*132)+52138)] = 0.000000e+00;
}
for (; (i1<=129); i1 += 1) {
fieldData_LaplaceCoeff_7_p1[((i1*132)+52006)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]);
int i1 = 1;
for (; (i1<=128); i1 += 2) {
fieldData_LaplaceCoeff_7_p1[((i1*132)+86590)] = 0.000000e+00;
fieldData_LaplaceCoeff_7_p1[((i1*132)+86722)] = 0.000000e+00;
}
for (; (i1<=129); i1 += 1) {
fieldData_LaplaceCoeff_7_p1[((i1*132)+86590)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]);
int i1 = 1;
for (; (i1<=128); i1 += 2) {
fieldData_LaplaceCoeff_7_p1[((i1*132)+34714)] = 0.000000e+00;
fieldData_LaplaceCoeff_7_p1[((i1*132)+34846)] = 0.000000e+00;
}
for (; (i1<=129); i1 += 1) {
fieldData_LaplaceCoeff_7_p1[((i1*132)+34714)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]);
int i1 = 1;
for (; (i1<=128); i1 += 2) {
fieldData_LaplaceCoeff_7_p1[((i1*132)+17422)] = 0.000000e+00;
fieldData_LaplaceCoeff_7_p1[((i1*132)+17554)] = 0.000000e+00;
}
for (; (i1<=129); i1 += 1) {
fieldData_LaplaceCoeff_7_p1[((i1*132)+17422)] = 0.000000e+00;
}
}
}
}
}
if ((!neighbor_isValid[1][2])) {
{
double xPos;
double yPos;
/* Statements in this Scop: S982, S976, S979, S973, S972, S981, S975, S978, S977, S980, S974 */
{
{
{
{
{
{
{
{
{
{
{
double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]);
int i2 = 2;
for (; (i2<=129); i2 += 2) {
fieldData_LaplaceCoeff_7_p1[(i2+138468)] = 0.000000e+00;
fieldData_LaplaceCoeff_7_p1[(i2+138469)] = 0.000000e+00;
}
for (; (i2<=130); i2 += 1) {
fieldData_LaplaceCoeff_7_p1[(i2+138468)] = 0.000000e+00;
}
}
{
int i2 = 2;
for (; (i2<=129); i2 += 2) {
xPos = ((((i2-2)/1.280000e+02)*(posEnd[0]-posBegin[0]))+posBegin[0]);
xPos = ((((i2-1)/1.280000e+02)*(posEnd[0]-posBegin[0]))+posBegin[0]);
}
for (; (i2<=130); i2 += 1) {
xPos = ((((i2-2)/1.280000e+02)*(posEnd[0]-posBegin[0]))+posBegin[0]);
}
}
}
{
double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]);
int i2 = 2;
for (; (i2<=129); i2 += 2) {
fieldData_LaplaceCoeff_7_p1[(i2+34716)] = 0.000000e+00;
fieldData_LaplaceCoeff_7_p1[(i2+34717)] = 0.000000e+00;
}
for (; (i2<=130); i2 += 1) {
fieldData_LaplaceCoeff_7_p1[(i2+34716)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]);
int i2 = 2;
for (; (i2<=129); i2 += 2) {
fieldData_LaplaceCoeff_7_p1[(i2+17424)] = 0.000000e+00;
fieldData_LaplaceCoeff_7_p1[(i2+17425)] = 0.000000e+00;
}
for (; (i2<=130); i2 += 1) {
fieldData_LaplaceCoeff_7_p1[(i2+17424)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]);
int i2 = 2;
for (; (i2<=129); i2 += 2) {
fieldData_LaplaceCoeff_7_p1[(i2+132)] = 0.000000e+00;
fieldData_LaplaceCoeff_7_p1[(i2+133)] = 0.000000e+00;
}
for (; (i2<=130); i2 += 1) {
fieldData_LaplaceCoeff_7_p1[(i2+132)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]);
int i2 = 2;
for (; (i2<=129); i2 += 2) {
fieldData_LaplaceCoeff_7_p1[(i2+86592)] = 0.000000e+00;
fieldData_LaplaceCoeff_7_p1[(i2+86593)] = 0.000000e+00;
}
for (; (i2<=130); i2 += 1) {
fieldData_LaplaceCoeff_7_p1[(i2+86592)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]);
int i2 = 2;
for (; (i2<=129); i2 += 2) {
fieldData_LaplaceCoeff_7_p1[(i2+52008)] = 0.000000e+00;
fieldData_LaplaceCoeff_7_p1[(i2+52009)] = 0.000000e+00;
}
for (; (i2<=130); i2 += 1) {
fieldData_LaplaceCoeff_7_p1[(i2+52008)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]);
int i2 = 2;
for (; (i2<=129); i2 += 2) {
fieldData_LaplaceCoeff_7_p1[(i2+103884)] = 0.000000e+00;
fieldData_LaplaceCoeff_7_p1[(i2+103885)] = 0.000000e+00;
}
for (; (i2<=130); i2 += 1) {
fieldData_LaplaceCoeff_7_p1[(i2+103884)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]);
int i2 = 2;
for (; (i2<=129); i2 += 2) {
fieldData_LaplaceCoeff_7_p1[(i2+121176)] = 0.000000e+00;
fieldData_LaplaceCoeff_7_p1[(i2+121177)] = 0.000000e+00;
}
for (; (i2<=130); i2 += 1) {
fieldData_LaplaceCoeff_7_p1[(i2+121176)] = 0.000000e+00;
}
}
}
{
int i2 = 2;
for (; (i2<=129); i2 += 2) {
yPos = posBegin[1];
yPos = posBegin[1];
}
for (; (i2<=130); i2 += 1) {
yPos = posBegin[1];
}
}
}
{
double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]);
int i2 = 2;
for (; (i2<=129); i2 += 2) {
fieldData_LaplaceCoeff_7_p1[(i2+69300)] = 0.000000e+00;
fieldData_LaplaceCoeff_7_p1[(i2+69301)] = 0.000000e+00;
}
for (; (i2<=130); i2 += 1) {
fieldData_LaplaceCoeff_7_p1[(i2+69300)] = 0.000000e+00;
}
}
}
}
}
if ((!neighbor_isValid[1][3])) {
{
double xPos;
double yPos;
/* Statements in this Scop: S988, S991, S985, S990, S984, S993, S987, S983, S992, S986, S989 */
{
{
{
{
{
{
{
{
{
{
{
double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]);
int i2 = 2;
for (; (i2<=129); i2 += 2) {
fieldData_LaplaceCoeff_7_p1[(i2+138072)] = 0.000000e+00;
fieldData_LaplaceCoeff_7_p1[(i2+138073)] = 0.000000e+00;
}
for (; (i2<=130); i2 += 1) {
fieldData_LaplaceCoeff_7_p1[(i2+138072)] = 0.000000e+00;
}
}
{
double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]);
int i2 = 2;
for (; (i2<=129); i2 += 2) {
fieldData_LaplaceCoeff_7_p1[(i2+34320)] = 0.000000e+00;
fieldData_LaplaceCoeff_7_p1[(i2+34321)] = 0.000000e+00;
}
for (; (i2<=130); i2 += 1) {
fieldData_LaplaceCoeff_7_p1[(i2+34320)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]);
int i2 = 2;
for (; (i2<=129); i2 += 2) {
fieldData_LaplaceCoeff_7_p1[(i2+155364)] = 0.000000e+00;
fieldData_LaplaceCoeff_7_p1[(i2+155365)] = 0.000000e+00;
}
for (; (i2<=130); i2 += 1) {
fieldData_LaplaceCoeff_7_p1[(i2+155364)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]);
int i2 = 2;
for (; (i2<=129); i2 += 2) {
fieldData_LaplaceCoeff_7_p1[(i2+51612)] = 0.000000e+00;
fieldData_LaplaceCoeff_7_p1[(i2+51613)] = 0.000000e+00;
}
for (; (i2<=130); i2 += 1) {
fieldData_LaplaceCoeff_7_p1[(i2+51612)] = 0.000000e+00;
}
}
}
{
int i2 = 2;
for (; (i2<=129); i2 += 2) {
yPos = posEnd[1];
yPos = posEnd[1];
}
for (; (i2<=130); i2 += 1) {
yPos = posEnd[1];
}
}
}
{
int i2 = 2;
for (; (i2<=129); i2 += 2) {
xPos = ((((i2-2)/1.280000e+02)*(posEnd[0]-posBegin[0]))+posBegin[0]);
xPos = ((((i2-1)/1.280000e+02)*(posEnd[0]-posBegin[0]))+posBegin[0]);
}
for (; (i2<=130); i2 += 1) {
xPos = ((((i2-2)/1.280000e+02)*(posEnd[0]-posBegin[0]))+posBegin[0]);
}
}
}
{
double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]);
int i2 = 2;
for (; (i2<=129); i2 += 2) {
fieldData_LaplaceCoeff_7_p1[(i2+103488)] = 0.000000e+00;
fieldData_LaplaceCoeff_7_p1[(i2+103489)] = 0.000000e+00;
}
for (; (i2<=130); i2 += 1) {
fieldData_LaplaceCoeff_7_p1[(i2+103488)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]);
int i2 = 2;
for (; (i2<=129); i2 += 2) {
fieldData_LaplaceCoeff_7_p1[(i2+68904)] = 0.000000e+00;
fieldData_LaplaceCoeff_7_p1[(i2+68905)] = 0.000000e+00;
}
for (; (i2<=130); i2 += 1) {
fieldData_LaplaceCoeff_7_p1[(i2+68904)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]);
int i2 = 2;
for (; (i2<=129); i2 += 2) {
fieldData_LaplaceCoeff_7_p1[(i2+17028)] = 0.000000e+00;
fieldData_LaplaceCoeff_7_p1[(i2+17029)] = 0.000000e+00;
}
for (; (i2<=130); i2 += 1) {
fieldData_LaplaceCoeff_7_p1[(i2+17028)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]);
int i2 = 2;
for (; (i2<=129); i2 += 2) {
fieldData_LaplaceCoeff_7_p1[(i2+120780)] = 0.000000e+00;
fieldData_LaplaceCoeff_7_p1[(i2+120781)] = 0.000000e+00;
}
for (; (i2<=130); i2 += 1) {
fieldData_LaplaceCoeff_7_p1[(i2+120780)] = 0.000000e+00;
}
}
}
{
double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]);
int i2 = 2;
for (; (i2<=129); i2 += 2) {
fieldData_LaplaceCoeff_7_p1[(i2+86196)] = 0.000000e+00;
fieldData_LaplaceCoeff_7_p1[(i2+86197)] = 0.000000e+00;
}
for (; (i2<=130); i2 += 1) {
fieldData_LaplaceCoeff_7_p1[(i2+86196)] = 0.000000e+00;
}
}
}
}
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) {
/* Statements in this Scop: S994 */
for (int i3 = 0; (i3<=8); i3 += 1) {
double* buffer_Send_1_p1 = (&buffer_Send[1][(i3*129)]);
double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][(i3*17292)]);
int i4 = 1;
for (; (i4<=128); i4 += 2) {
buffer_Send_1_p1[(i4-1)] = fieldData_LaplaceCoeff_7_p1[((i4*132)+130)];
buffer_Send_1_p1[i4] = fieldData_LaplaceCoeff_7_p1[((i4*132)+262)];
}
for (; (i4<=129); i4 += 1) {
buffer_Send_1_p1[(i4-1)] = fieldData_LaplaceCoeff_7_p1[((i4*132)+130)];
}
}
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) {
MPI_Isend(buffer_Send[1], 1161, MPI_DOUBLE, neighbor_remoteRank[1][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]);
reqOutstanding_Send[1] = true;
}
}
}
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) {
MPI_Irecv(buffer_Recv[0], 1161, MPI_DOUBLE, neighbor_remoteRank[1][0], ((unsigned int)(neighbor_fragCommId[1][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]);
reqOutstanding_Recv[0] = true;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if (reqOutstanding_Recv[0]) {
waitForMPIReq(&mpiRequest_Recv[0]);
reqOutstanding_Recv[0] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) {
/* Statements in this Scop: S995 */
for (int i3 = 0; (i3<=8); i3 += 1) {
double* buffer_Recv_0_p1 = (&buffer_Recv[0][(i3*129)]);
double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][(i3*17292)]);
int i4 = 3;
for (; (i4<=130); i4 += 2) {
fieldData_LaplaceCoeff_7_p1[((i4*132)-262)] = buffer_Recv_0_p1[(i4-3)];
fieldData_LaplaceCoeff_7_p1[((i4*132)-130)] = buffer_Recv_0_p1[(i4-2)];
}
for (; (i4<=131); i4 += 1) {
fieldData_LaplaceCoeff_7_p1[((i4*132)-262)] = buffer_Recv_0_p1[(i4-3)];
}
}
}
}
}
;
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if (reqOutstanding_Send[1]) {
waitForMPIReq(&mpiRequest_Send[1]);
reqOutstanding_Send[1] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
;
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((neighbor_isValid[1][3]&&neighbor_isRemote[1][3])) {
MPI_Isend(&fieldData_LaplaceCoeff[7][17030], 1, mpiDatatype_9_129_17292, neighbor_remoteRank[1][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]);
reqOutstanding_Send[3] = true;
}
}
}
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((neighbor_isValid[1][2]&&neighbor_isRemote[1][2])) {
MPI_Irecv(&fieldData_LaplaceCoeff[7][134], 1, mpiDatatype_9_129_17292, neighbor_remoteRank[1][2], ((unsigned int)(neighbor_fragCommId[1][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]);
reqOutstanding_Recv[2] = true;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if (reqOutstanding_Recv[2]) {
waitForMPIReq(&mpiRequest_Recv[2]);
reqOutstanding_Recv[2] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
;
}
}
;
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if (reqOutstanding_Send[3]) {
waitForMPIReq(&mpiRequest_Send[3]);
reqOutstanding_Send[3] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) {
/* Statements in this Scop: S996 */
for (int i3 = 0; (i3<=8); i3 += 1) {
double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][(i3*17292)]);
double* buffer_Send_0_p1 = (&buffer_Send[0][(i3*131)]);
int i4 = 0;
for (; (i4<=129); i4 += 2) {
buffer_Send_0_p1[i4] = fieldData_LaplaceCoeff_7_p1[((i4*132)+3)];
buffer_Send_0_p1[(i4+1)] = fieldData_LaplaceCoeff_7_p1[((i4*132)+135)];
}
for (; (i4<=130); i4 += 1) {
buffer_Send_0_p1[i4] = fieldData_LaplaceCoeff_7_p1[((i4*132)+3)];
}
}
}
if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) {
/* Statements in this Scop: S997 */
for (int i3 = 0; (i3<=8); i3 += 1) {
double* buffer_Send_1_p1 = (&buffer_Send[1][(i3*131)]);
double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][(i3*17292)]);
int i4 = 0;
for (; (i4<=129); i4 += 2) {
buffer_Send_1_p1[i4] = fieldData_LaplaceCoeff_7_p1[((i4*132)+129)];
buffer_Send_1_p1[(i4+1)] = fieldData_LaplaceCoeff_7_p1[((i4*132)+261)];
}
for (; (i4<=130); i4 += 1) {
buffer_Send_1_p1[i4] = fieldData_LaplaceCoeff_7_p1[((i4*132)+129)];
}
}
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) {
MPI_Isend(buffer_Send[0], 1179, MPI_DOUBLE, neighbor_remoteRank[1][0], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][0]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[0]);
reqOutstanding_Send[0] = true;
}
if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) {
MPI_Isend(buffer_Send[1], 1179, MPI_DOUBLE, neighbor_remoteRank[1][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]);
reqOutstanding_Send[1] = true;
}
}
}
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) {
MPI_Irecv(buffer_Recv[0], 1179, MPI_DOUBLE, neighbor_remoteRank[1][0], ((unsigned int)(neighbor_fragCommId[1][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]);
reqOutstanding_Recv[0] = true;
}
if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) {
MPI_Irecv(buffer_Recv[1], 1179, MPI_DOUBLE, neighbor_remoteRank[1][1], ((unsigned int)(neighbor_fragCommId[1][1]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[1]);
reqOutstanding_Recv[1] = true;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if (reqOutstanding_Recv[0]) {
waitForMPIReq(&mpiRequest_Recv[0]);
reqOutstanding_Recv[0] = false;
}
if (reqOutstanding_Recv[1]) {
waitForMPIReq(&mpiRequest_Recv[1]);
reqOutstanding_Recv[1] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) {
/* Statements in this Scop: S998 */
for (int i3 = 0; (i3<=8); i3 += 1) {
double* buffer_Recv_0_p1 = (&buffer_Recv[0][(i3*131)]);
double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][(i3*17292)]);
int i4 = 1;
for (; (i4<=130); i4 += 2) {
fieldData_LaplaceCoeff_7_p1[((i4*132)-131)] = buffer_Recv_0_p1[(i4-1)];
fieldData_LaplaceCoeff_7_p1[((i4*132)+1)] = buffer_Recv_0_p1[i4];
}
for (; (i4<=131); i4 += 1) {
fieldData_LaplaceCoeff_7_p1[((i4*132)-131)] = buffer_Recv_0_p1[(i4-1)];
}
}
}
if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) {
/* Statements in this Scop: S999 */
for (int i3 = 0; (i3<=8); i3 += 1) {
double* buffer_Recv_1_p1 = (&buffer_Recv[1][(i3*131)]);
double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][(i3*17292)]);
int i4 = 131;
for (; (i4<=260); i4 += 2) {
fieldData_LaplaceCoeff_7_p1[((i4*132)-17161)] = buffer_Recv_1_p1[(i4-131)];
fieldData_LaplaceCoeff_7_p1[((i4*132)-17029)] = buffer_Recv_1_p1[(i4-130)];
}
for (; (i4<=261); i4 += 1) {
fieldData_LaplaceCoeff_7_p1[((i4*132)-17161)] = buffer_Recv_1_p1[(i4-131)];
}
}
}
}
}
;
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if (reqOutstanding_Send[0]) {
waitForMPIReq(&mpiRequest_Send[0]);
reqOutstanding_Send[0] = false;
}
if (reqOutstanding_Send[1]) {
waitForMPIReq(&mpiRequest_Send[1]);
reqOutstanding_Send[1] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
;
;
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((neighbor_isValid[1][2]&&neighbor_isRemote[1][2])) {
MPI_Isend(&fieldData_LaplaceCoeff[7][265], 1, mpiDatatype_9_131_17292, neighbor_remoteRank[1][2], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][2]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[2]);
reqOutstanding_Send[2] = true;
}
if ((neighbor_isValid[1][3]&&neighbor_isRemote[1][3])) {
MPI_Isend(&fieldData_LaplaceCoeff[7][16897], 1, mpiDatatype_9_131_17292, neighbor_remoteRank[1][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]);
reqOutstanding_Send[3] = true;
}
}
}
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if ((neighbor_isValid[1][2]&&neighbor_isRemote[1][2])) {
MPI_Irecv(&fieldData_LaplaceCoeff[7][1], 1, mpiDatatype_9_131_17292, neighbor_remoteRank[1][2], ((unsigned int)(neighbor_fragCommId[1][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]);
reqOutstanding_Recv[2] = true;
}
if ((neighbor_isValid[1][3]&&neighbor_isRemote[1][3])) {
MPI_Irecv(&fieldData_LaplaceCoeff[7][17161], 1, mpiDatatype_9_131_17292, neighbor_remoteRank[1][3], ((unsigned int)(neighbor_fragCommId[1][3]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[3]);
reqOutstanding_Recv[3] = true;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if (reqOutstanding_Recv[2]) {
waitForMPIReq(&mpiRequest_Recv[2]);
reqOutstanding_Recv[2] = false;
}
if (reqOutstanding_Recv[3]) {
waitForMPIReq(&mpiRequest_Recv[3]);
reqOutstanding_Recv[3] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
;
;
}
}
;
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[1]) {
if (reqOutstanding_Send[2]) {
waitForMPIReq(&mpiRequest_Send[2]);
reqOutstanding_Send[2] = false;
}
if (reqOutstanding_Send[3]) {
waitForMPIReq(&mpiRequest_Send[3]);
reqOutstanding_Send[3] = false;
}
}
}
}
Exemplo n.º 24
0
void exchrhs_gmrfData_8(unsigned int slot) {
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((!neighbor_isValid[0][0])) {
{
double xPos;
double yPos;
/* Statements in this Scop: S589, S588, S590 */
{
{
{
int i1 = 0;
for (; (i1<(1&(~1))); i1 += 1) {
yPos = (((i1/2.560000e+02)*(posEnd[1]-posBegin[1]))+posBegin[1]);
}
__m128d vec1 = _mm_set1_pd(2.560000e+02);
__m128d vec4 = _mm_set1_pd(yPos);
for (; (i1<254); i1 += 4) {
/* yPos = (((i1/2.560000e+02)*(posEnd[1]-posBegin[1]))+posBegin[1]); */
__m128d vec0 = _mm_set_pd(i1+1,i1);
__m128d vec0_2 = _mm_set_pd(i1+1,i1);
__m128d vec2 = _mm_load1_pd((&posEnd[1]));
__m128d vec2_2 = _mm_load1_pd((&posEnd[1]));
__m128d vec3 = _mm_load1_pd((&posBegin[1]));
__m128d vec3_2 = _mm_load1_pd((&posBegin[1]));
vec4 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(vec0, vec1), _mm_sub_pd(vec2, vec3)), vec3);
vec4 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(vec0_2, vec1), _mm_sub_pd(vec2_2, vec3_2)), vec3_2);
}
for (; (i1<257); i1 += 1) {
yPos = (((i1/2.560000e+02)*(posEnd[1]-posBegin[1]))+posBegin[1]);
}
}
{
int i1 = 0;
for (; (i1<(1&(~1))); i1 += 1) {
xPos = posBegin[0];
}
__m128d vec1 = _mm_set1_pd(xPos);
for (; (i1<254); i1 += 4) {
/* xPos = posBegin[0]; */
__m128d vec0 = _mm_load1_pd((&posBegin[0]));
__m128d vec0_2 = _mm_load1_pd((&posBegin[0]));
vec1 = vec0;
vec1 = vec0_2;
}
for (; (i1<257); i1 += 1) {
xPos = posBegin[0];
}
}
}
{
double* fieldData_RHS_GMRF_8_p1 = (&fieldData_RHS_GMRF[8][0]);
int i1 = 0;
for (; (i1<=255); i1 += 2) {
fieldData_RHS_GMRF_8_p1[(i1*258)] = 0.000000e+00;
fieldData_RHS_GMRF_8_p1[((i1*258)+258)] = 0.000000e+00;
}
for (; (i1<=256); i1 += 1) {
fieldData_RHS_GMRF_8_p1[(i1*258)] = 0.000000e+00;
}
}
}
}
}
if ((!neighbor_isValid[0][1])) {
{
double xPos;
double yPos;
/* Statements in this Scop: S592, S591, S593 */
{
{
{
int i1 = 0;
for (; (i1<(1&(~1))); i1 += 1) {
yPos = (((i1/2.560000e+02)*(posEnd[1]-posBegin[1]))+posBegin[1]);
}
__m128d vec1 = _mm_set1_pd(2.560000e+02);
__m128d vec4 = _mm_set1_pd(yPos);
for (; (i1<254); i1 += 4) {
/* yPos = (((i1/2.560000e+02)*(posEnd[1]-posBegin[1]))+posBegin[1]); */
__m128d vec0 = _mm_set_pd(i1+1,i1);
__m128d vec0_2 = _mm_set_pd(i1+1,i1);
__m128d vec2 = _mm_load1_pd((&posEnd[1]));
__m128d vec2_2 = _mm_load1_pd((&posEnd[1]));
__m128d vec3 = _mm_load1_pd((&posBegin[1]));
__m128d vec3_2 = _mm_load1_pd((&posBegin[1]));
vec4 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(vec0, vec1), _mm_sub_pd(vec2, vec3)), vec3);
vec4 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(vec0_2, vec1), _mm_sub_pd(vec2_2, vec3_2)), vec3_2);
}
for (; (i1<257); i1 += 1) {
yPos = (((i1/2.560000e+02)*(posEnd[1]-posBegin[1]))+posBegin[1]);
}
}
{
double* fieldData_RHS_GMRF_8_p1 = (&fieldData_RHS_GMRF[8][0]);
int i1 = 0;
for (; (i1<=255); i1 += 2) {
fieldData_RHS_GMRF_8_p1[((i1*258)+256)] = 0.000000e+00;
fieldData_RHS_GMRF_8_p1[((i1*258)+514)] = 0.000000e+00;
}
for (; (i1<=256); i1 += 1) {
fieldData_RHS_GMRF_8_p1[((i1*258)+256)] = 0.000000e+00;
}
}
}
{
int i1 = 0;
for (; (i1<(1&(~1))); i1 += 1) {
xPos = posEnd[0];
}
__m128d vec1 = _mm_set1_pd(xPos);
for (; (i1<254); i1 += 4) {
/* xPos = posEnd[0]; */
__m128d vec0 = _mm_load1_pd((&posEnd[0]));
__m128d vec0_2 = _mm_load1_pd((&posEnd[0]));
vec1 = vec0;
vec1 = vec0_2;
}
for (; (i1<257); i1 += 1) {
xPos = posEnd[0];
}
}
}
}
}
if ((!neighbor_isValid[0][2])) {
{
double xPos;
double yPos;
/* Statements in this Scop: S595, S594, S596 */
{
{
{
double* fieldData_RHS_GMRF_8_p1 = (&fieldData_RHS_GMRF[8][0]);
int i2 = 0;
for (; (i2<=255); i2 += 2) {
fieldData_RHS_GMRF_8_p1[i2] = 0.000000e+00;
fieldData_RHS_GMRF_8_p1[(i2+1)] = 0.000000e+00;
}
for (; (i2<=256); i2 += 1) {
fieldData_RHS_GMRF_8_p1[i2] = 0.000000e+00;
}
}
{
int i2 = 0;
for (; (i2<=255); i2 += 2) {
xPos = (((i2/2.560000e+02)*(posEnd[0]-posBegin[0]))+posBegin[0]);
xPos = ((((i2+1)/2.560000e+02)*(posEnd[0]-posBegin[0]))+posBegin[0]);
}
for (; (i2<=256); i2 += 1) {
xPos = (((i2/2.560000e+02)*(posEnd[0]-posBegin[0]))+posBegin[0]);
}
}
}
{
int i2 = 0;
for (; (i2<=255); i2 += 2) {
yPos = posBegin[1];
yPos = posBegin[1];
}
for (; (i2<=256); i2 += 1) {
yPos = posBegin[1];
}
}
}
}
}
if ((!neighbor_isValid[0][3])) {
{
double xPos;
double yPos;
/* Statements in this Scop: S598, S597, S599 */
{
{
{
int i2 = 0;
for (; (i2<=255); i2 += 2) {
xPos = (((i2/2.560000e+02)*(posEnd[0]-posBegin[0]))+posBegin[0]);
xPos = ((((i2+1)/2.560000e+02)*(posEnd[0]-posBegin[0]))+posBegin[0]);
}
for (; (i2<=256); i2 += 1) {
xPos = (((i2/2.560000e+02)*(posEnd[0]-posBegin[0]))+posBegin[0]);
}
}
{
int i2 = 0;
for (; (i2<=255); i2 += 2) {
yPos = posEnd[1];
yPos = posEnd[1];
}
for (; (i2<=256); i2 += 1) {
yPos = posEnd[1];
}
}
}
{
double* fieldData_RHS_GMRF_8_p1 = (&fieldData_RHS_GMRF[8][0]);
int i2 = 0;
for (; (i2<=255); i2 += 2) {
fieldData_RHS_GMRF_8_p1[(i2+66048)] = 0.000000e+00;
fieldData_RHS_GMRF_8_p1[(i2+66049)] = 0.000000e+00;
}
for (; (i2<=256); i2 += 1) {
fieldData_RHS_GMRF_8_p1[(i2+66048)] = 0.000000e+00;
}
}
}
}
}
}
}
}
Exemplo n.º 25
0
void nb_kernel430_ia32_sse2(int *           p_nri,
                              int *           iinr,
                              int *           jindex,
                              int *           jjnr,
                              int *           shift,
                              double *         shiftvec,
                              double *         fshift,
                              int *           gid,
                              double *         pos,
                              double *         faction,
                              double *         charge,
                              double *         p_facel,
                              double *         p_krf,
                              double *         p_crf,
                              double *         vc,
                              int *           type,
                              int *           p_ntype,
                              double *         vdwparam,
                              double *         vvdw,
                              double *         p_tabscale,
                              double *         VFtab,
                              double *         invsqrta,
                              double *         dvda,
                              double *         p_gbtabscale,
                              double *         GBtab,
                              int *           p_nthreads,
                              int *           count,
                              void *          mtx,
                              int *           outeriter,
                              int *           inneriter,
                              double *         work)
{
  int           nri,ntype,nthreads;
  int           n,ii,is3,ii3,k,nj0,nj1,ggid;
  double        shX,shY,shZ;
	int			  offset,nti;
  int           jnrA,jnrB;
  int           j3A,j3B;
	int           tjA,tjB;
	gmx_gbdata_t *gbdata;
	double *      gpol;
    
	__m128d  iq,qq,jq,isai;
	__m128d  ix,iy,iz;
	__m128d  jx,jy,jz;
	__m128d  dx,dy,dz;
	__m128d  vctot,vvdwtot,vgbtot,dvdasum,gbfactor;
	__m128d  fix,fiy,fiz,tx,ty,tz,rsq;
	__m128d  rinv,isaj,isaprod;
	__m128d  vcoul,fscal,gbscale,c6,c12;
	__m128d  rinvsq,r,rtab;
	__m128d  eps,Y,F,G,H;
  __m128d  VV,FF,Fp;
	__m128d  vgb,fijGB,dvdatmp;
	__m128d  rinvsix,vvdw6,vvdw12,vvdwtmp;
	__m128d  facel,gbtabscale,dvdaj;
  __m128d  fijD,fijR;
  __m128d  xmm1,tabscale,eps2;
	__m128i  n0, nnn;
    
	
	const __m128d neg        = _mm_set1_pd(-1.0);
	const __m128d zero       = _mm_set1_pd(0.0);
	const __m128d minushalf  = _mm_set1_pd(-0.5);
	const __m128d two        = _mm_set1_pd(2.0);
	
	gbdata     = (gmx_gbdata_t *)work;
	gpol       = gbdata->gpol;
    
	nri        = *p_nri;
	ntype      = *p_ntype;
    
  gbfactor   = _mm_set1_pd( - ((1.0/gbdata->epsilon_r) - (1.0/gbdata->gb_epsilon_solvent)));     
  gbtabscale = _mm_load1_pd(p_gbtabscale);  
  facel      = _mm_load1_pd(p_facel);
  tabscale   = _mm_load1_pd(p_tabscale);
  
  nj1         = 0;
  jnrA = jnrB = 0;
  j3A = j3B   = 0;
  jx          = _mm_setzero_pd();
  jy          = _mm_setzero_pd();
  jz          = _mm_setzero_pd();
  c6          = _mm_setzero_pd();
  c12         = _mm_setzero_pd();
	
	for(n=0;n<nri;n++)
	{
    is3              = 3*shift[n];     
    shX              = shiftvec[is3];  
    shY              = shiftvec[is3+1];
    shZ              = shiftvec[is3+2];
    nj0              = jindex[n];      
    nj1              = jindex[n+1];    
    ii               = iinr[n];        
    ii3              = 3*ii;           
		
		ix               = _mm_set1_pd(shX+pos[ii3+0]);
		iy               = _mm_set1_pd(shY+pos[ii3+1]);
		iz               = _mm_set1_pd(shZ+pos[ii3+2]);
    
		iq               = _mm_load1_pd(charge+ii);
		iq               = _mm_mul_pd(iq,facel);
    
		isai             = _mm_load1_pd(invsqrta+ii);
    
		nti              = 2*ntype*type[ii];
		
		vctot            = _mm_setzero_pd();
		vvdwtot          = _mm_setzero_pd();
		vgbtot           = _mm_setzero_pd();
		dvdasum          = _mm_setzero_pd();
		fix              = _mm_setzero_pd();
		fiy              = _mm_setzero_pd();
		fiz              = _mm_setzero_pd();
        
		for(k=nj0;k<nj1-1; k+=2)
		{
			jnrA    = jjnr[k];
			jnrB    = jjnr[k+1];
			
			j3A     = jnrA * 3;
			j3B     = jnrB * 3;
            
      GMX_MM_LOAD_1RVEC_2POINTERS_PD(pos+j3A,pos+j3B,jx,jy,jz);
            
			dx           = _mm_sub_pd(ix,jx);
			dy           = _mm_sub_pd(iy,jy);
			dz           = _mm_sub_pd(iz,jz);
            
      rsq          = gmx_mm_calc_rsq_pd(dx,dy,dz);
      
      rinv         = gmx_mm_invsqrt_pd(rsq);
 			rinvsq       = _mm_mul_pd(rinv,rinv);
      
			/***********************************/
			/* INTERACTION SECTION STARTS HERE */
			/***********************************/
			GMX_MM_LOAD_2VALUES_PD(charge+jnrA,charge+jnrB,jq);
			GMX_MM_LOAD_2VALUES_PD(invsqrta+jnrA,invsqrta+jnrB,isaj);
            
      /* Lennard-Jones */
      tjA          = nti+2*type[jnrA];
			tjB          = nti+2*type[jnrB];
      
      GMX_MM_LOAD_2PAIRS_PD(vdwparam+tjA,vdwparam+tjB,c6,c12);
			
			isaprod      = _mm_mul_pd(isai,isaj);
			qq           = _mm_mul_pd(iq,jq);            
			vcoul        = _mm_mul_pd(qq,rinv);
			fscal        = _mm_mul_pd(vcoul,rinv);                                 
      vctot        = _mm_add_pd(vctot,vcoul);
      
      /* Polarization interaction */
			qq           = _mm_mul_pd(qq,_mm_mul_pd(isaprod,gbfactor));
			gbscale      = _mm_mul_pd(isaprod,gbtabscale);
      
 			/* Calculate GB table index */
			r            = _mm_mul_pd(rsq,rinv);
			rtab         = _mm_mul_pd(r,gbscale);
			
			n0		     = _mm_cvttpd_epi32(rtab);
			eps	     	 = _mm_sub_pd(rtab,_mm_cvtepi32_pd(n0));
			nnn		     = _mm_slli_epi32(n0,2);
			
      /* the tables are 16-byte aligned, so we can use _mm_load_pd */			
      Y            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))); 
      F            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,1)));
      GMX_MM_TRANSPOSE2_PD(Y,F);
      G            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))+2); 
      H            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,1))+2);
      GMX_MM_TRANSPOSE2_PD(G,H);
      
      G       = _mm_mul_pd(G,eps);
      H       = _mm_mul_pd(H, _mm_mul_pd(eps,eps) );
      F       = _mm_add_pd(F, _mm_add_pd( G , H ) );
      Y       = _mm_add_pd(Y, _mm_mul_pd(F, eps));
      F       = _mm_add_pd(F, _mm_add_pd(G , _mm_mul_pd(H,two)));
      vgb     = _mm_mul_pd(Y, qq);           
      fijGB   = _mm_mul_pd(F, _mm_mul_pd(qq,gbscale));
      
      dvdatmp = _mm_mul_pd(_mm_add_pd(vgb, _mm_mul_pd(fijGB,r)) , minushalf);
      
      vgbtot  = _mm_add_pd(vgbtot, vgb);
      
      dvdasum = _mm_add_pd(dvdasum, dvdatmp);
      dvdatmp = _mm_mul_pd(dvdatmp, _mm_mul_pd(isaj,isaj));
      
      GMX_MM_INCREMENT_2VALUES_PD(dvda+jnrA,dvda+jnrB,dvdatmp);
			
      /* Calculate VDW table index */
			rtab    = _mm_mul_pd(r,tabscale);
			n0      = _mm_cvttpd_epi32(rtab);
			eps     = _mm_sub_pd(rtab,_mm_cvtepi32_pd(n0));
			eps2    = _mm_mul_pd(eps,eps);
			nnn     = _mm_slli_epi32(n0,3);
			
      /* Dispersion */
      Y            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))); 
      F            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,1)));
      GMX_MM_TRANSPOSE2_PD(Y,F);
      G            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))+2); 
      H            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,1))+2);
      GMX_MM_TRANSPOSE2_PD(G,H);
      
      G       = _mm_mul_pd(G,eps);
			H       = _mm_mul_pd(H,eps2);
			Fp      = _mm_add_pd(F,G);
			Fp      = _mm_add_pd(Fp,H);
			VV      = _mm_mul_pd(Fp,eps);
			VV      = _mm_add_pd(Y,VV);
			xmm1    = _mm_mul_pd(two,H);
			FF      = _mm_add_pd(Fp,G);
			FF      = _mm_add_pd(FF,xmm1);
			
			vvdw6   = _mm_mul_pd(c6,VV);
			fijD    = _mm_mul_pd(c6,FF);
      
      /* Dispersion */
      Y            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))+4); 
      F            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,1))+4);
      GMX_MM_TRANSPOSE2_PD(Y,F);
      G            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))+6); 
      H            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,1))+6);
      GMX_MM_TRANSPOSE2_PD(G,H);
      
      G       = _mm_mul_pd(G,eps);
			H       = _mm_mul_pd(H,eps2);
			Fp      = _mm_add_pd(F,G);
			Fp      = _mm_add_pd(Fp,H);
			VV      = _mm_mul_pd(Fp,eps);
			VV      = _mm_add_pd(Y,VV);
			xmm1    = _mm_mul_pd(two,H);
			FF      = _mm_add_pd(Fp,G);
			FF      = _mm_add_pd(FF,xmm1);
			
			vvdw12  = _mm_mul_pd(c12,VV);
			fijR    = _mm_mul_pd(c12,FF);
			
			vvdwtmp = _mm_add_pd(vvdw12,vvdw6);
			vvdwtot = _mm_add_pd(vvdwtot,vvdwtmp);
      
			xmm1    = _mm_add_pd(fijD,fijR);
			xmm1    = _mm_mul_pd(xmm1,tabscale);
			xmm1    = _mm_add_pd(xmm1,fijGB);
			xmm1    = _mm_sub_pd(xmm1,fscal);
			fscal   = _mm_mul_pd(xmm1,neg);
			fscal   = _mm_mul_pd(fscal,rinv);
      
      /***********************************/
			/*  INTERACTION SECTION ENDS HERE  */
			/***********************************/
      
      /* Calculate temporary vectorial force */
      tx           = _mm_mul_pd(fscal,dx);
      ty           = _mm_mul_pd(fscal,dy);
      tz           = _mm_mul_pd(fscal,dz);
      
      /* Increment i atom force */
      fix          = _mm_add_pd(fix,tx);
      fiy          = _mm_add_pd(fiy,ty);
      fiz          = _mm_add_pd(fiz,tz);
      
      /* Store j forces back */
			GMX_MM_DECREMENT_1RVEC_2POINTERS_PD(faction+j3A,faction+j3B,tx,ty,tz);
		}
		
		/* In double precision, offset can only be either 0 or 1 */
		if(k<nj1)
		{
			jnrA    = jjnr[k];
			j3A     = jnrA * 3;
      
      GMX_MM_LOAD_1RVEC_1POINTER_PD(pos+j3A,jx,jy,jz);
      
			dx           = _mm_sub_sd(ix,jx);
			dy           = _mm_sub_sd(iy,jy);
			dz           = _mm_sub_sd(iz,jz);
            
      rsq          = gmx_mm_calc_rsq_pd(dx,dy,dz);
      
      rinv         = gmx_mm_invsqrt_pd(rsq);
 			rinvsq       = _mm_mul_sd(rinv,rinv);
      
      /* These reason for zeroing these variables here is for fixing bug 585
       * What happens is that __m128d _mm_add_sd(a,b) gives back r0=a[0]+b[0],
       * and r1=0, but it should be r1=a[1]. 
       * This might be a compiler issue (tested with gcc-4.1.3 and -O3).
       * To work around it, we zero these variables and use _mm_add_pd (**) instead
       * Note that the only variables that get affected are the energies since
       * the total sum needs to be correct 
       */
      vgb          = _mm_setzero_pd();
      vcoul        = _mm_setzero_pd();
      dvdatmp      = _mm_setzero_pd();
      vvdw6        = _mm_setzero_pd();
      vvdw12       = _mm_setzero_pd();

      /***********************************/
			/* INTERACTION SECTION STARTS HERE */
			/***********************************/
			GMX_MM_LOAD_1VALUE_PD(charge+jnrA,jq);
			GMX_MM_LOAD_1VALUE_PD(invsqrta+jnrA,isaj);
            
      /* Lennard-Jones */
      tjA          = nti+2*type[jnrA];
      
      GMX_MM_LOAD_1PAIR_PD(vdwparam+tjA,c6,c12);
			
			isaprod      = _mm_mul_sd(isai,isaj);
			qq           = _mm_mul_sd(jq,iq);            
			vcoul        = _mm_mul_sd(qq,rinv);
			fscal        = _mm_mul_sd(vcoul,rinv);                                 
      vctot        = _mm_add_pd(vctot,vcoul); /* (**) */
      
      /* Polarization interaction */
			qq           = _mm_mul_sd(qq,_mm_mul_sd(isaprod,gbfactor));
			gbscale      = _mm_mul_sd(isaprod,gbtabscale);
      
 			/* Calculate GB table index */
			r            = _mm_mul_sd(rsq,rinv);
			rtab         = _mm_mul_sd(r,gbscale);
			
			n0		     = _mm_cvttpd_epi32(rtab);
			eps	     	 = _mm_sub_sd(rtab,_mm_cvtepi32_pd(n0));
			nnn		     = _mm_slli_epi32(n0,2);
			
      /* the tables are 16-byte aligned, so we can use _mm_load_pd */			
      Y            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))); 
      F            = _mm_setzero_pd();
      GMX_MM_TRANSPOSE2_PD(Y,F);
      G            = _mm_load_pd(GBtab+(gmx_mm_extract_epi32(nnn,0))+2); 
      H            = _mm_setzero_pd();
      GMX_MM_TRANSPOSE2_PD(G,H);
      
      G       = _mm_mul_sd(G,eps);
      H       = _mm_mul_sd(H, _mm_mul_sd(eps,eps) );
      F       = _mm_add_sd(F, _mm_add_sd( G , H ) );
      Y       = _mm_add_sd(Y, _mm_mul_sd(F, eps));
      F       = _mm_add_sd(F, _mm_add_sd(G , _mm_mul_sd(H,two)));
      vgb     = _mm_mul_sd(Y, qq);           
      fijGB   = _mm_mul_sd(F, _mm_mul_sd(qq,gbscale));
      
      dvdatmp = _mm_mul_sd(_mm_add_sd(vgb, _mm_mul_sd(fijGB,r)) , minushalf);
      
      vgbtot  = _mm_add_pd(vgbtot, vgb); /* (**) */
      
      dvdasum = _mm_add_pd(dvdasum, dvdatmp); /* (**) */
      dvdatmp = _mm_mul_sd(dvdatmp, _mm_mul_sd(isaj,isaj));
      
      GMX_MM_INCREMENT_1VALUE_PD(dvda+jnrA,dvdatmp);
			
      /* Calculate VDW table index */
			rtab    = _mm_mul_sd(r,tabscale);
			n0      = _mm_cvttpd_epi32(rtab);
			eps     = _mm_sub_sd(rtab,_mm_cvtepi32_pd(n0));
			eps2    = _mm_mul_sd(eps,eps);
			nnn     = _mm_slli_epi32(n0,3);
			
      /* Dispersion */
      Y            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))); 
      F            = _mm_setzero_pd();
      GMX_MM_TRANSPOSE2_PD(Y,F);
      G            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))+2); 
      H            = _mm_setzero_pd();
      GMX_MM_TRANSPOSE2_PD(G,H);
      
      G       = _mm_mul_sd(G,eps);
			H       = _mm_mul_sd(H,eps2);
			Fp      = _mm_add_sd(F,G);
			Fp      = _mm_add_sd(Fp,H);
			VV      = _mm_mul_sd(Fp,eps);
			VV      = _mm_add_sd(Y,VV);
			xmm1    = _mm_mul_sd(two,H);
			FF      = _mm_add_sd(Fp,G);
			FF      = _mm_add_sd(FF,xmm1);
			
			vvdw6   = _mm_mul_sd(c6,VV);
			fijD    = _mm_mul_sd(c6,FF);
      
      /* Dispersion */
      Y            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))+4); 
      F            = _mm_setzero_pd();
      GMX_MM_TRANSPOSE2_PD(Y,F);
      G            = _mm_load_pd(VFtab+(gmx_mm_extract_epi32(nnn,0))+6); 
      H            = _mm_setzero_pd();
      GMX_MM_TRANSPOSE2_PD(G,H);
      
      G       = _mm_mul_sd(G,eps);
			H       = _mm_mul_sd(H,eps2);
			Fp      = _mm_add_sd(F,G);
			Fp      = _mm_add_sd(Fp,H);
			VV      = _mm_mul_sd(Fp,eps);
			VV      = _mm_add_sd(Y,VV);
			xmm1    = _mm_mul_sd(two,H);
			FF      = _mm_add_sd(Fp,G);
			FF      = _mm_add_sd(FF,xmm1);
			
			vvdw12  = _mm_mul_sd(c12,VV);
			fijR    = _mm_mul_sd(c12,FF);
			
			vvdwtmp = _mm_add_sd(vvdw12,vvdw6);
			vvdwtot = _mm_add_pd(vvdwtot,vvdwtmp); /* (**) */
            
			xmm1    = _mm_add_sd(fijD,fijR);
			xmm1    = _mm_mul_sd(xmm1,tabscale);
			xmm1    = _mm_add_sd(xmm1,fijGB);
			xmm1    = _mm_sub_sd(xmm1,fscal);
			fscal   = _mm_mul_sd(xmm1,neg);
			fscal   = _mm_mul_sd(fscal,rinv);

      /***********************************/
			/*  INTERACTION SECTION ENDS HERE  */
			/***********************************/
      
      /* Calculate temporary vectorial force */
      tx           = _mm_mul_sd(fscal,dx);
      ty           = _mm_mul_sd(fscal,dy);
      tz           = _mm_mul_sd(fscal,dz);
      
      /* Increment i atom force */
      fix          = _mm_add_sd(fix,tx);
      fiy          = _mm_add_sd(fiy,ty);
      fiz          = _mm_add_sd(fiz,tz);
      
      /* Store j forces back */
			GMX_MM_DECREMENT_1RVEC_1POINTER_PD(faction+j3A,tx,ty,tz);
		}
		
    dvdasum = _mm_mul_pd(dvdasum, _mm_mul_pd(isai,isai));
    gmx_mm_update_iforce_1atom_pd(&fix,&fiy,&fiz,faction+ii3,fshift+is3);
    
    ggid     = gid[n];         
    
    gmx_mm_update_1pot_pd(vctot,vc+ggid);
    gmx_mm_update_1pot_pd(vgbtot,gpol+ggid);
    gmx_mm_update_1pot_pd(dvdasum,dvda+ii);
    gmx_mm_update_1pot_pd(vvdwtot,vvdw+ggid);
    
	}
  
	*outeriter   = nri;            
  *inneriter   = nj1; 	
}
Exemplo n.º 26
0
__m128d test_load1_pd(__m128 x, void* y) {
  // CHECK: define {{.*}} @test_load1_pd
  // CHECK: load double* {{.*}}, align 1{{$}}
  return _mm_load1_pd(y);
}
Exemplo n.º 27
0
int 
calc_gb_rad_hct_obc_sse2_double(t_commrec *cr, t_forcerec * fr, int natoms, gmx_localtop_t *top,
                                const t_atomtypes *atype, double *x, t_nblist *nl, gmx_genborn_t *born,t_mdatoms *md,int gb_algorithm)
{
	int i,ai,k,n,ii,ii3,is3,nj0,nj1,at0,at1,offset;
    int jnrA,jnrB;
    int j3A,j3B;
	double shX,shY,shZ;
	double rr,rr_inv,rr_inv2,sum_tmp,sum,sum2,sum3,gbr;
	double sum_ai2, sum_ai3,tsum,tchain,doffset;
	double *obc_param;
    double *gb_radius;
    double *work;
    int *  jjnr;
    double *dadx;
    double *shiftvec;
    double min_rad,rad;
    
	__m128d ix,iy,iz,jx,jy,jz;
	__m128d dx,dy,dz,t1,t2,t3,t4;
	__m128d rsq,rinv,r;
	__m128d rai,rai_inv,raj, raj_inv,rai_inv2,sk,sk2,lij,dlij,duij;
	__m128d uij,lij2,uij2,lij3,uij3,diff2;
	__m128d lij_inv,sk2_inv,prod,log_term,tmp,tmp_sum;
	__m128d sum_ai, tmp_ai,sk_ai,sk_aj,sk2_ai,sk2_aj,sk2_rinv;
	__m128d dadx1,dadx2;
    __m128d logterm;
	__m128d mask;
	__m128d obc_mask1,obc_mask2,obc_mask3;    
    
    __m128d oneeighth   = _mm_set1_pd(0.125);
    __m128d onefourth   = _mm_set1_pd(0.25);
    
	const __m128d half  = _mm_set1_pd(0.5);
	const __m128d three = _mm_set1_pd(3.0);
	const __m128d one   = _mm_set1_pd(1.0);
	const __m128d two   = _mm_set1_pd(2.0);
	const __m128d zero  = _mm_set1_pd(0.0);
	const __m128d neg   = _mm_set1_pd(-1.0);
	
	/* Set the dielectric offset */
	doffset   = born->gb_doffset;
	gb_radius = born->gb_radius;
    obc_param = born->param;
    work      = born->gpol_hct_work;
    jjnr      = nl->jjnr;
    dadx      = fr->dadx;
    shiftvec  = fr->shift_vec[0];
    
    jx        = _mm_setzero_pd();
    jy        = _mm_setzero_pd();
    jz        = _mm_setzero_pd();
    
    jnrA = jnrB = 0;
    
	for(i=0;i<born->nr;i++)
	{
		work[i] = 0;
	}
	
	for(i=0;i<nl->nri;i++)
	{
        ii     = nl->iinr[i];
		ii3	   = ii*3;
        is3    = 3*nl->shift[i];     
        shX    = shiftvec[is3];  
        shY    = shiftvec[is3+1];
        shZ    = shiftvec[is3+2];
        nj0    = nl->jindex[i];      
        nj1    = nl->jindex[i+1];    
        
        ix     = _mm_set1_pd(shX+x[ii3+0]);
		iy     = _mm_set1_pd(shY+x[ii3+1]);
		iz     = _mm_set1_pd(shZ+x[ii3+2]);
		        
		rai    = _mm_load1_pd(gb_radius+ii);
		rai_inv= gmx_mm_inv_pd(rai);
        
		sum_ai = _mm_setzero_pd();
		
		sk_ai  = _mm_load1_pd(born->param+ii);
		sk2_ai = _mm_mul_pd(sk_ai,sk_ai);
        
		for(k=nj0;k<nj1-1;k+=2)
		{
			jnrA        = jjnr[k];   
			jnrB        = jjnr[k+1];
			
            j3A         = 3*jnrA;  
			j3B         = 3*jnrB;
            
            GMX_MM_LOAD_1RVEC_2POINTERS_PD(x+j3A,x+j3B,jx,jy,jz);
            GMX_MM_LOAD_2VALUES_PD(gb_radius+jnrA,gb_radius+jnrB,raj);
            GMX_MM_LOAD_2VALUES_PD(obc_param+jnrA,obc_param+jnrB,sk_aj);
			
            dx    = _mm_sub_pd(ix, jx);
			dy    = _mm_sub_pd(iy, jy);
			dz    = _mm_sub_pd(iz, jz);
			
            rsq         = gmx_mm_calc_rsq_pd(dx,dy,dz);
            
            rinv        = gmx_mm_invsqrt_pd(rsq);
            r           = _mm_mul_pd(rsq,rinv);
            
			/* Compute raj_inv aj1-4 */
            raj_inv     = gmx_mm_inv_pd(raj);
            
            /* Evaluate influence of atom aj -> ai */
            t1            = _mm_add_pd(r,sk_aj);
            t2            = _mm_sub_pd(r,sk_aj);
            t3            = _mm_sub_pd(sk_aj,r);
            obc_mask1     = _mm_cmplt_pd(rai, t1);
            obc_mask2     = _mm_cmplt_pd(rai, t2);
            obc_mask3     = _mm_cmplt_pd(rai, t3);
            
            uij           = gmx_mm_inv_pd(t1);
            lij           = _mm_or_pd(   _mm_and_pd(obc_mask2,gmx_mm_inv_pd(t2)),
                                      _mm_andnot_pd(obc_mask2,rai_inv));
            dlij          = _mm_and_pd(one,obc_mask2);
            uij2          = _mm_mul_pd(uij, uij);
            uij3          = _mm_mul_pd(uij2,uij);
            lij2          = _mm_mul_pd(lij, lij);
            lij3          = _mm_mul_pd(lij2,lij);
                        
            diff2         = _mm_sub_pd(uij2,lij2);
            lij_inv       = gmx_mm_invsqrt_pd(lij2);
            sk2_aj        = _mm_mul_pd(sk_aj,sk_aj);
            sk2_rinv      = _mm_mul_pd(sk2_aj,rinv);
            prod          = _mm_mul_pd(onefourth,sk2_rinv);
                        
            logterm       = gmx_mm_log_pd(_mm_mul_pd(uij,lij_inv));
            
            t1            = _mm_sub_pd(lij,uij);
            t2            = _mm_mul_pd(diff2,
                                       _mm_sub_pd(_mm_mul_pd(onefourth,r),
                                                  prod));
            t3            = _mm_mul_pd(half,_mm_mul_pd(rinv,logterm));
            t1            = _mm_add_pd(t1,_mm_add_pd(t2,t3));
            t4            = _mm_mul_pd(two,_mm_sub_pd(rai_inv,lij));
            t4            = _mm_and_pd(t4,obc_mask3);
            t1            = _mm_mul_pd(half,_mm_add_pd(t1,t4));
                        
            sum_ai        = _mm_add_pd(sum_ai, _mm_and_pd(t1,obc_mask1) );
            
            t1            = _mm_add_pd(_mm_mul_pd(half,lij2),
                                       _mm_mul_pd(prod,lij3));
            t1            = _mm_sub_pd(t1,
                                       _mm_mul_pd(onefourth,
                                                  _mm_add_pd(_mm_mul_pd(lij,rinv),
                                                             _mm_mul_pd(lij3,r))));
            t2            = _mm_mul_pd(onefourth,
                                       _mm_add_pd(_mm_mul_pd(uij,rinv),
                                                  _mm_mul_pd(uij3,r)));
            t2            = _mm_sub_pd(t2,
                                       _mm_add_pd(_mm_mul_pd(half,uij2),
                                                  _mm_mul_pd(prod,uij3)));
            t3            = _mm_mul_pd(_mm_mul_pd(onefourth,logterm),
                                       _mm_mul_pd(rinv,rinv));
            t3            = _mm_sub_pd(t3,
                                       _mm_mul_pd(_mm_mul_pd(diff2,oneeighth),
                                                  _mm_add_pd(one,
                                                             _mm_mul_pd(sk2_rinv,rinv))));
            t1            = _mm_mul_pd(rinv,
                                       _mm_add_pd(_mm_mul_pd(dlij,t1),
                                                  _mm_add_pd(t2,t3)));
            
            dadx1         = _mm_and_pd(t1,obc_mask1);
            
            /* Evaluate influence of atom ai -> aj */
            t1            = _mm_add_pd(r,sk_ai);
            t2            = _mm_sub_pd(r,sk_ai);
            t3            = _mm_sub_pd(sk_ai,r);
            obc_mask1     = _mm_cmplt_pd(raj, t1);
            obc_mask2     = _mm_cmplt_pd(raj, t2);
            obc_mask3     = _mm_cmplt_pd(raj, t3);
            
            uij           = gmx_mm_inv_pd(t1);
            lij           = _mm_or_pd(   _mm_and_pd(obc_mask2,gmx_mm_inv_pd(t2)),
                                      _mm_andnot_pd(obc_mask2,raj_inv));
            dlij          = _mm_and_pd(one,obc_mask2);
            uij2          = _mm_mul_pd(uij, uij);
            uij3          = _mm_mul_pd(uij2,uij);
            lij2          = _mm_mul_pd(lij, lij);
            lij3          = _mm_mul_pd(lij2,lij);
                        
            diff2         = _mm_sub_pd(uij2,lij2);
            lij_inv       = gmx_mm_invsqrt_pd(lij2);
            sk2_rinv      = _mm_mul_pd(sk2_ai,rinv);
            prod          = _mm_mul_pd(onefourth,sk2_rinv);
                        
            logterm       = gmx_mm_log_pd(_mm_mul_pd(uij,lij_inv));
            
            t1            = _mm_sub_pd(lij,uij);
            t2            = _mm_mul_pd(diff2,
                                       _mm_sub_pd(_mm_mul_pd(onefourth,r),
                                                  prod));
            t3            = _mm_mul_pd(half,_mm_mul_pd(rinv,logterm));
            t1            = _mm_add_pd(t1,_mm_add_pd(t2,t3));
            t4            = _mm_mul_pd(two,_mm_sub_pd(raj_inv,lij));
            t4            = _mm_and_pd(t4,obc_mask3);
            t1            = _mm_mul_pd(half,_mm_add_pd(t1,t4));
                        
            GMX_MM_INCREMENT_2VALUES_PD(work+jnrA,work+jnrB,_mm_and_pd(t1,obc_mask1));
            
            t1            = _mm_add_pd(_mm_mul_pd(half,lij2),
                                       _mm_mul_pd(prod,lij3));
            t1            = _mm_sub_pd(t1,
                                       _mm_mul_pd(onefourth,
                                                  _mm_add_pd(_mm_mul_pd(lij,rinv),
                                                             _mm_mul_pd(lij3,r))));
            t2            = _mm_mul_pd(onefourth,
                                       _mm_add_pd(_mm_mul_pd(uij,rinv),
                                                  _mm_mul_pd(uij3,r)));
            t2            = _mm_sub_pd(t2,
                                       _mm_add_pd(_mm_mul_pd(half,uij2),
                                                  _mm_mul_pd(prod,uij3)));
            t3            = _mm_mul_pd(_mm_mul_pd(onefourth,logterm),
                                       _mm_mul_pd(rinv,rinv));
            t3            = _mm_sub_pd(t3,
                                       _mm_mul_pd(_mm_mul_pd(diff2,oneeighth),
                                                  _mm_add_pd(one,
                                                             _mm_mul_pd(sk2_rinv,rinv))));
            t1            = _mm_mul_pd(rinv,
                                       _mm_add_pd(_mm_mul_pd(dlij,t1),
                                                  _mm_add_pd(t2,t3)));
            
            dadx2         = _mm_and_pd(t1,obc_mask1);
            
            _mm_store_pd(dadx,dadx1);
            dadx += 2;
            _mm_store_pd(dadx,dadx2);
            dadx += 2;
        } /* end normal inner loop */
        
		if(k<nj1)
		{
			jnrA        = jjnr[k];   
			
            j3A         = 3*jnrA;  
            
            GMX_MM_LOAD_1RVEC_1POINTER_PD(x+j3A,jx,jy,jz);
            GMX_MM_LOAD_1VALUE_PD(gb_radius+jnrA,raj);
            GMX_MM_LOAD_1VALUE_PD(obc_param+jnrA,sk_aj);
			
            dx    = _mm_sub_sd(ix, jx);
			dy    = _mm_sub_sd(iy, jy);
			dz    = _mm_sub_sd(iz, jz);
			
            rsq         = gmx_mm_calc_rsq_pd(dx,dy,dz);
            
            rinv        = gmx_mm_invsqrt_pd(rsq);
            r           = _mm_mul_sd(rsq,rinv);
            
			/* Compute raj_inv aj1-4 */
            raj_inv     = gmx_mm_inv_pd(raj);
            
            /* Evaluate influence of atom aj -> ai */
            t1            = _mm_add_sd(r,sk_aj);
            t2            = _mm_sub_sd(r,sk_aj);
            t3            = _mm_sub_sd(sk_aj,r);
            obc_mask1     = _mm_cmplt_sd(rai, t1);
            obc_mask2     = _mm_cmplt_sd(rai, t2);
            obc_mask3     = _mm_cmplt_sd(rai, t3);
            
            uij           = gmx_mm_inv_pd(t1);
            lij           = _mm_or_pd(_mm_and_pd(obc_mask2,gmx_mm_inv_pd(t2)),
                                      _mm_andnot_pd(obc_mask2,rai_inv));
            dlij          = _mm_and_pd(one,obc_mask2);
            uij2          = _mm_mul_sd(uij, uij);
            uij3          = _mm_mul_sd(uij2,uij);
            lij2          = _mm_mul_sd(lij, lij);
            lij3          = _mm_mul_sd(lij2,lij);
            
            diff2         = _mm_sub_sd(uij2,lij2);
            lij_inv       = gmx_mm_invsqrt_pd(lij2);
            sk2_aj        = _mm_mul_sd(sk_aj,sk_aj);
            sk2_rinv      = _mm_mul_sd(sk2_aj,rinv);
            prod          = _mm_mul_sd(onefourth,sk2_rinv);
            
            logterm       = gmx_mm_log_pd(_mm_mul_sd(uij,lij_inv));
            
            t1            = _mm_sub_sd(lij,uij);
            t2            = _mm_mul_sd(diff2,
                                       _mm_sub_sd(_mm_mul_pd(onefourth,r),
                                                  prod));
            t3            = _mm_mul_sd(half,_mm_mul_sd(rinv,logterm));
            t1            = _mm_add_sd(t1,_mm_add_sd(t2,t3));
            t4            = _mm_mul_sd(two,_mm_sub_sd(rai_inv,lij));
            t4            = _mm_and_pd(t4,obc_mask3);
            t1            = _mm_mul_sd(half,_mm_add_sd(t1,t4));
            
            sum_ai        = _mm_add_sd(sum_ai, _mm_and_pd(t1,obc_mask1) );
            
            t1            = _mm_add_sd(_mm_mul_sd(half,lij2),
                                       _mm_mul_sd(prod,lij3));
            t1            = _mm_sub_sd(t1,
                                       _mm_mul_sd(onefourth,
                                                  _mm_add_sd(_mm_mul_sd(lij,rinv),
                                                             _mm_mul_sd(lij3,r))));
            t2            = _mm_mul_sd(onefourth,
                                       _mm_add_sd(_mm_mul_sd(uij,rinv),
                                                  _mm_mul_sd(uij3,r)));
            t2            = _mm_sub_sd(t2,
                                       _mm_add_sd(_mm_mul_sd(half,uij2),
                                                  _mm_mul_sd(prod,uij3)));
            t3            = _mm_mul_sd(_mm_mul_sd(onefourth,logterm),
                                       _mm_mul_sd(rinv,rinv));
            t3            = _mm_sub_sd(t3,
                                       _mm_mul_sd(_mm_mul_sd(diff2,oneeighth),
                                                  _mm_add_sd(one,
                                                             _mm_mul_sd(sk2_rinv,rinv))));
            t1            = _mm_mul_sd(rinv,
                                       _mm_add_sd(_mm_mul_sd(dlij,t1),
                                                  _mm_add_pd(t2,t3)));
            
            dadx1         = _mm_and_pd(t1,obc_mask1);
            
            /* Evaluate influence of atom ai -> aj */
            t1            = _mm_add_sd(r,sk_ai);
            t2            = _mm_sub_sd(r,sk_ai);
            t3            = _mm_sub_sd(sk_ai,r);
            obc_mask1     = _mm_cmplt_sd(raj, t1);
            obc_mask2     = _mm_cmplt_sd(raj, t2);
            obc_mask3     = _mm_cmplt_sd(raj, t3);
            
            uij           = gmx_mm_inv_pd(t1);
            lij           = _mm_or_pd(   _mm_and_pd(obc_mask2,gmx_mm_inv_pd(t2)),
                                      _mm_andnot_pd(obc_mask2,raj_inv));
            dlij          = _mm_and_pd(one,obc_mask2);
            uij2          = _mm_mul_sd(uij, uij);
            uij3          = _mm_mul_sd(uij2,uij);
            lij2          = _mm_mul_sd(lij, lij);
            lij3          = _mm_mul_sd(lij2,lij);
            
            diff2         = _mm_sub_sd(uij2,lij2);
            lij_inv       = gmx_mm_invsqrt_pd(lij2);
            sk2_rinv      = _mm_mul_sd(sk2_ai,rinv);
            prod          = _mm_mul_sd(onefourth,sk2_rinv);
            
            logterm       = gmx_mm_log_pd(_mm_mul_sd(uij,lij_inv));
            
            t1            = _mm_sub_sd(lij,uij);
            t2            = _mm_mul_sd(diff2,
                                       _mm_sub_sd(_mm_mul_sd(onefourth,r),
                                                  prod));
            t3            = _mm_mul_sd(half,_mm_mul_sd(rinv,logterm));
            t1            = _mm_add_sd(t1,_mm_add_sd(t2,t3));
            t4            = _mm_mul_sd(two,_mm_sub_sd(raj_inv,lij));
            t4            = _mm_and_pd(t4,obc_mask3);
            t1            = _mm_mul_sd(half,_mm_add_sd(t1,t4));
            
            GMX_MM_INCREMENT_1VALUE_PD(work+jnrA,_mm_and_pd(t1,obc_mask1));
            
            t1            = _mm_add_sd(_mm_mul_sd(half,lij2),
                                       _mm_mul_sd(prod,lij3));
            t1            = _mm_sub_sd(t1,
                                       _mm_mul_sd(onefourth,
                                                  _mm_add_sd(_mm_mul_sd(lij,rinv),
                                                             _mm_mul_sd(lij3,r))));
            t2            = _mm_mul_sd(onefourth,
                                       _mm_add_sd(_mm_mul_sd(uij,rinv),
                                                  _mm_mul_sd(uij3,r)));
            t2            = _mm_sub_sd(t2,
                                       _mm_add_sd(_mm_mul_sd(half,uij2),
                                                  _mm_mul_sd(prod,uij3)));
            t3            = _mm_mul_sd(_mm_mul_sd(onefourth,logterm),
                                       _mm_mul_sd(rinv,rinv));
            t3            = _mm_sub_sd(t3,
                                       _mm_mul_sd(_mm_mul_sd(diff2,oneeighth),
                                                  _mm_add_sd(one,
                                                             _mm_mul_sd(sk2_rinv,rinv))));
            t1            = _mm_mul_sd(rinv,
                                       _mm_add_sd(_mm_mul_sd(dlij,t1),
                                                  _mm_add_sd(t2,t3)));
            
            dadx2         = _mm_and_pd(t1,obc_mask1);
            
            _mm_store_pd(dadx,dadx1);
            dadx += 2;
            _mm_store_pd(dadx,dadx2);
            dadx += 2;
        } 
        gmx_mm_update_1pot_pd(sum_ai,work+ii);
        
	}
	
	/* Parallel summations */
	if(PARTDECOMP(cr))
	{
		gmx_sum(natoms, work, cr);
	}
	else if(DOMAINDECOMP(cr))
	{
		dd_atom_sum_real(cr->dd, work);
	}
	
    if(gb_algorithm==egbHCT)
    {
        /* HCT */
        for(i=0;i<fr->natoms_force;i++) /* PELA born->nr */
        {
			if(born->use[i] != 0)
            {
                rr      = top->atomtypes.gb_radius[md->typeA[i]]-doffset; 
                sum     = 1.0/rr - work[i];
                min_rad = rr + doffset;
                rad     = 1.0/sum; 
                
                born->bRad[i]   = rad > min_rad ? rad : min_rad;
                fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
            }
        }
        
        /* Extra communication required for DD */
        if(DOMAINDECOMP(cr))
        {
            dd_atom_spread_real(cr->dd, born->bRad);
            dd_atom_spread_real(cr->dd, fr->invsqrta);
        }
    }
    else
    {
        /* OBC */
        for(i=0;i<fr->natoms_force;i++) /* PELA born->nr */
        {
			if(born->use[i] != 0)
            {
                rr      = top->atomtypes.gb_radius[md->typeA[i]];
                rr_inv2 = 1.0/rr;
                rr      = rr-doffset; 
                rr_inv  = 1.0/rr;
                sum     = rr * work[i];
                sum2    = sum  * sum;
                sum3    = sum2 * sum;
                
                tsum    = tanh(born->obc_alpha*sum-born->obc_beta*sum2+born->obc_gamma*sum3);
                born->bRad[i] = rr_inv - tsum*rr_inv2;
                born->bRad[i] = 1.0 / born->bRad[i];
                
                fr->invsqrta[i]=gmx_invsqrt(born->bRad[i]);
                
                tchain  = rr * (born->obc_alpha-2*born->obc_beta*sum+3*born->obc_gamma*sum2);
                born->drobc[i] = (1.0-tsum*tsum)*tchain*rr_inv2;
            }
        }
        /* Extra (local) communication required for DD */
        if(DOMAINDECOMP(cr))
        {
            dd_atom_spread_real(cr->dd, born->bRad);
            dd_atom_spread_real(cr->dd, fr->invsqrta);
            dd_atom_spread_real(cr->dd, born->drobc);
        }
    }
    
	
	
	return 0;
}
inline double lanczos13m53::lanczos_sum<double>(const double& x)
{
   static const ALIGN16 double coeff[26] = {
      static_cast<double>(2.506628274631000270164908177133837338626L),
      static_cast<double>(1u),
      static_cast<double>(210.8242777515793458725097339207133627117L),
      static_cast<double>(66u),
      static_cast<double>(8071.672002365816210638002902272250613822L),
      static_cast<double>(1925u),
      static_cast<double>(186056.2653952234950402949897160456992822L),
      static_cast<double>(32670u),
      static_cast<double>(2876370.628935372441225409051620849613599L),
      static_cast<double>(357423u),
      static_cast<double>(31426415.58540019438061423162831820536287L),
      static_cast<double>(2637558u),
      static_cast<double>(248874557.8620541565114603864132294232163L),
      static_cast<double>(13339535u),
      static_cast<double>(1439720407.311721673663223072794912393972L),
      static_cast<double>(45995730u),
      static_cast<double>(6039542586.35202800506429164430729792107L),
      static_cast<double>(105258076u),
      static_cast<double>(17921034426.03720969991975575445893111267L),
      static_cast<double>(150917976u),
      static_cast<double>(35711959237.35566804944018545154716670596L),
      static_cast<double>(120543840u),
      static_cast<double>(42919803642.64909876895789904700198885093L),
      static_cast<double>(39916800u),
      static_cast<double>(23531376880.41075968857200767445163675473L),
      static_cast<double>(0u)
   };
   register __m128d vx = _mm_load1_pd(&x);
   register __m128d sum_even = _mm_load_pd(coeff);
   register __m128d sum_odd = _mm_load_pd(coeff+2);
   register __m128d nc_odd, nc_even;
   register __m128d vx2 = _mm_mul_pd(vx, vx);

   sum_even = _mm_mul_pd(sum_even, vx2);
   nc_even = _mm_load_pd(coeff + 4);
   sum_odd = _mm_mul_pd(sum_odd, vx2);
   nc_odd = _mm_load_pd(coeff + 6);
   sum_even = _mm_add_pd(sum_even, nc_even);
   sum_odd = _mm_add_pd(sum_odd, nc_odd);

   sum_even = _mm_mul_pd(sum_even, vx2);
   nc_even = _mm_load_pd(coeff + 8);
   sum_odd = _mm_mul_pd(sum_odd, vx2);
   nc_odd = _mm_load_pd(coeff + 10);
   sum_even = _mm_add_pd(sum_even, nc_even);
   sum_odd = _mm_add_pd(sum_odd, nc_odd);

   sum_even = _mm_mul_pd(sum_even, vx2);
   nc_even = _mm_load_pd(coeff + 12);
   sum_odd = _mm_mul_pd(sum_odd, vx2);
   nc_odd = _mm_load_pd(coeff + 14);
   sum_even = _mm_add_pd(sum_even, nc_even);
   sum_odd = _mm_add_pd(sum_odd, nc_odd);

   sum_even = _mm_mul_pd(sum_even, vx2);
   nc_even = _mm_load_pd(coeff + 16);
   sum_odd = _mm_mul_pd(sum_odd, vx2);
   nc_odd = _mm_load_pd(coeff + 18);
   sum_even = _mm_add_pd(sum_even, nc_even);
   sum_odd = _mm_add_pd(sum_odd, nc_odd);

   sum_even = _mm_mul_pd(sum_even, vx2);
   nc_even = _mm_load_pd(coeff + 20);
   sum_odd = _mm_mul_pd(sum_odd, vx2);
   nc_odd = _mm_load_pd(coeff + 22);
   sum_even = _mm_add_pd(sum_even, nc_even);
   sum_odd = _mm_add_pd(sum_odd, nc_odd);

   sum_even = _mm_mul_pd(sum_even, vx2);
   nc_even = _mm_load_pd(coeff + 24);
   sum_odd = _mm_mul_pd(sum_odd, vx);
   sum_even = _mm_add_pd(sum_even, nc_even);
   sum_even = _mm_add_pd(sum_even, sum_odd);


   double ALIGN16 t[2];
   _mm_store_pd(t, sum_even);
   
   return t[0] / t[1];
}
/* This function multipiys 2nd blocks in an unrolling architecture.
 * At its most inner loop, it can multiply 4 pairs of 2x2 blocks 
 * at one time, which increases the computing efficiency significantly.
 */
void do_l1_block (int lda, int M, int N, int K, double* A, double* B, double* C){
  /* For each row i of A */
  for (int i = 0; i < M; i+=2)
      /* For each column j of B */ 
      for (int j = 0; j < N; j+=10) {
		/* Load 5 2x2 blocks from C. */
		__m128d c1 = _mm_load_pd(C+i*lda+j + 0*lda);
		__m128d c2 = _mm_load_pd(C+i*lda+j + 1*lda);
		__m128d c3 = _mm_load_pd(C+i*lda+j + 0*lda + 2);
		__m128d c4 = _mm_load_pd(C+i*lda+j + 1*lda + 2);
		__m128d c5 = _mm_load_pd(C+i*lda+j + 0*lda + 4);
		__m128d c6 = _mm_load_pd(C+i*lda+j + 1*lda + 4);
		__m128d c7 = _mm_load_pd(C+i*lda+j + 0*lda + 6);
		__m128d c8 = _mm_load_pd(C+i*lda+j + 1*lda + 6);
		__m128d c9 = _mm_load_pd(C+i*lda+j + 0*lda + 8);
		__m128d c10 = _mm_load_pd(C+i*lda+j + 1*lda + 8);
          /* Compute C(i,j) */
          for (int k = 0; k < K; k+=2) {
			/* a1 a2 a3 a4 are reused. */
	    		__m128d a1 = _mm_load1_pd(A+i*lda+k + 0 + 0*lda);
            	__m128d a3 = _mm_load1_pd(A+i*lda+k + 1 + 0*lda);
			__m128d a2 = _mm_load1_pd(A+i*lda+k + 0 + 1*lda);
            	__m128d a4 = _mm_load1_pd(A+i*lda+k + 1 + 1*lda);
				
			/* b1 b2 only are used once. */			
			__m128d b1 = _mm_load_pd(B+k*lda+j + 0 *lda);
            	__m128d b2 = _mm_load_pd(B+k*lda+j + 1 *lda);
				
			/* Use vectorized way to multiply 2x2 matrixs. */
			c1 = _mm_add_pd(c1, _mm_mul_pd(a1, b1));
            	c2 = _mm_add_pd(c2, _mm_mul_pd(a2, b1));
			c1 = _mm_add_pd(c1, _mm_mul_pd(a3, b2));	
			c2 = _mm_add_pd(c2, _mm_mul_pd(a4, b2));
		
			/* Compute the 2nd block pairs. */
			b1 = _mm_load_pd(B+k*lda+j + 0 *lda + 2);
            	b2 = _mm_load_pd(B+k*lda+j + 1 *lda + 2);
			c3 = _mm_add_pd(c3, _mm_mul_pd(a1, b1));
            	c4 = _mm_add_pd(c4, _mm_mul_pd(a2, b1));
			c3 = _mm_add_pd(c3, _mm_mul_pd(a3, b2));	
			c4 = _mm_add_pd(c4, _mm_mul_pd(a4, b2));
			
			/* Compute the 3rd block pairs. */
			b1 = _mm_load_pd(B+k*lda+j + 0 *lda + 4);
            	b2 = _mm_load_pd(B+k*lda+j + 1 *lda + 4);
			c5 = _mm_add_pd(c5, _mm_mul_pd(a1, b1));
            	c6 = _mm_add_pd(c6, _mm_mul_pd(a2, b1));
			c5 = _mm_add_pd(c5, _mm_mul_pd(a3, b2));	
			c6 = _mm_add_pd(c6, _mm_mul_pd(a4, b2));
			
			/* Compute the 4th block pairs. */
			b1 = _mm_load_pd(B+k*lda+j + 0 *lda + 6);
            	b2 = _mm_load_pd(B+k*lda+j + 1 *lda + 6);
			c7 = _mm_add_pd(c7, _mm_mul_pd(a1, b1));
            	c8 = _mm_add_pd(c8, _mm_mul_pd(a2, b1));
			c7 = _mm_add_pd(c7, _mm_mul_pd(a3, b2));	
			c8 = _mm_add_pd(c8, _mm_mul_pd(a4, b2));
			 
			/* Compute the 5th block pairs. */
			b1 = _mm_load_pd(B+k*lda+j + 0 *lda + 8);
            	b2 = _mm_load_pd(B+k*lda+j + 1 *lda + 8);
			c9 = _mm_add_pd(c9, _mm_mul_pd(a1, b1));
            	c10 = _mm_add_pd(c10, _mm_mul_pd(a2, b1));
			c9 = _mm_add_pd(c9, _mm_mul_pd(a3, b2));	
			c10 = _mm_add_pd(c10, _mm_mul_pd(a4, b2));
		  }
		/* Store results back.*/
		_mm_store_pd(C+i*lda+j + 0*lda, c1);
		_mm_store_pd(C+i*lda+j + 1*lda, c2);
		_mm_store_pd(C+i*lda+j + 0*lda + 2, c3);
		_mm_store_pd(C+i*lda+j + 1*lda + 2, c4);
		_mm_store_pd(C+i*lda+j + 0*lda + 4, c5);
		_mm_store_pd(C+i*lda+j + 1*lda + 4, c6);
		_mm_store_pd(C+i*lda+j + 0*lda + 6, c7);
		_mm_store_pd(C+i*lda+j + 1*lda + 6, c8);
		_mm_store_pd(C+i*lda+j + 0*lda + 8, c9);
		_mm_store_pd(C+i*lda+j + 1*lda + 8, c10);
	  } 
}