double bst_compute_121_m128_aligned4( void*_bst_obj, double* p, double* q, size_t nn ) { segments_t* mem = (segments_t*) _bst_obj; int n, i, r, l_end, l_end_pre, j; double t, e_tmp; double* e = mem->e, *w = mem->w; int* root = mem->r; __m128d v_tmp; __m128d v00, v01, v02, v03; __m128d v10, v11, v12, v13; __m128i v_cur_roots, v_old_roots, v_new_roots; __m128 v_rootmask; // initialization // mem->n = nn; n = nn; // subtractions with n potentially negative. say hello to all the bugs int idx1, idx2, idx3, pad, pad_r; idx1 = (n+1)*(n+2)/2 + n/2; e[idx1] = q[n]; idx1++; pad = 1; // pad contains the padding for row i+1 // for row n it's always 1 for (i = n-1; i >= 0; --i) { idx1 -= 2*(n-i)+1 + pad; idx2 = idx1 + 1; e[idx1] = q[i]; w[idx1] = q[i]; for (j = i+1; j < n+1; ++j,++idx2) { e[idx2] = INFINITY; w[idx2] = w[idx2-1] + p[j-1] + q[j]; } // idx2 now points to the beginning of the next line. idx2 += pad; // padding of line i+1 idx3 = idx1; pad_r = pad; // padding of line r for (r = i; r < n; ++r) { pad_r = !pad_r; // padding of line r+1 // idx2 = IDX(r+1, r+1); idx1 = idx3; l_end = idx2 + (n-r); e_tmp = e[idx1++]; // calculate until a multiple of 8 doubles is left // 8 = 4 * 2 128-bit vectors l_end_pre = idx2 + ((n-r)&3); for( ; (idx2 < l_end_pre) && (idx2 < l_end); ++idx2 ) { t = e_tmp + e[idx2] + w[idx1]; if (t < e[idx1]) { e[idx1] = t; root[idx1] = r; } idx1++; } v_tmp = _mm_set_pd( e_tmp, e_tmp ); // execute the shit for 4 vectors of size 2 v_cur_roots = _mm_set_epi32(r, r, r, r); for( ; idx2 < l_end; idx2 += 4 ) { v01 = _mm_load_pd( &w[idx1 ] ); v11 = _mm_load_pd( &w[idx1+2] ); v00 = _mm_load_pd( &e[idx2 ] ); v01 = _mm_add_pd( v01, v_tmp ); // supoptimal for raw-dependency v10 = _mm_load_pd( &e[idx2+2] ); v11 = _mm_add_pd( v11, v_tmp ); v01 = _mm_add_pd( v01, v00 ); v03 = _mm_load_pd( &e[idx1 ] ); v11 = _mm_add_pd( v11, v10 ); v13 = _mm_load_pd( &e[idx1+2] ); v02 = _mm_cmplt_pd( v01, v03 ); v12 = _mm_cmplt_pd( v11, v13 ); v00 = _mm_or_pd( _mm_and_pd( v02, v01 ), _mm_andnot_pd( v02, v03 )); v10 = _mm_or_pd( _mm_and_pd( v12, v11 ), _mm_andnot_pd( v12, v13 )); _mm_store_pd( &e[idx1 ], v00 ); _mm_store_pd( &e[idx1+2], v10 ); v_rootmask = _mm_shuffle_ps( _mm_castpd_ps( v02 ), _mm_castpd_ps( v12 ), _MM_SHUFFLE(0,2,0,2) ); v_old_roots = _mm_lddqu_si128( &root[idx1] ); v_new_roots = _mm_or_si128( _mm_and_si128( v_cur_roots, _mm_castps_si128( v_rootmask ) ), _mm_andnot_si128( v_old_roots, _mm_castps_si128( v_rootmask ) ) ); _mm_storeu_si128( &root[idx1], v_new_roots ); idx1 += 4; } idx2 += pad_r; idx3++; } pad = !pad; // every other line as padding 0, or 1, respectively } // if n is even, the total number of entries in the first // row of the table is odd, so we need padding return e[n + !(n&1)]; }
Point::Point(double x, double y) #ifdef __SSE3__ : v(_mm_set_pd(y, x)) #else : x(x), y(y)
int sse3_ChirpData_ak8( sah_complex * cx_DataArray, sah_complex * cx_ChirpDataArray, int chirp_rate_ind, double chirp_rate, int ul_NumDataPoints, double sample_rate ) { #ifdef USE_MANUAL_CALLSTACK call_stack.enter("sse3_ChirpData_ak8()"); #endif int i; if (chirp_rate_ind == 0) { memcpy(cx_ChirpDataArray, cx_DataArray, (int)ul_NumDataPoints * sizeof(sah_complex) ); #ifdef USE_MANUAL_CALLSTACK call_stack.exit(); #endif return 0; } int vEnd; double srate = chirp_rate * 0.5 / (sample_rate * sample_rate); __m128d rate = _mm_set1_pd(chirp_rate * 0.5 / (sample_rate * sample_rate)); __m128d roundVal = _mm_set1_pd(srate >= 0.0 ? TWO_TO_52 : -TWO_TO_52); __m128d DFOUR = _mm_set_pd(4.0, 4.0); // main vectorised loop vEnd = ul_NumDataPoints - (ul_NumDataPoints & 3); __m128d di1 = _mm_set_pd(2.0, 0.0); // set time patterns for eventual moveldup/movehdup __m128d di2 = _mm_set_pd(3.0, 1.0); for (i = 0; i < vEnd; i += 4) { const float *d = (const float *) (cx_DataArray + i); float *cd = (float *) (cx_ChirpDataArray + i); __m128d a1, a2; __m128 d1, d2; __m128 cd1, cd2; __m128 td1, td2; __m128 x; __m128 y; __m128 z; __m128 s; __m128 c; __m128 m; // load the signal to be chirped d1 = _mm_load_ps(d); d2 = _mm_load_ps(d+4); // calculate the input angle a1 = _mm_mul_pd(_mm_mul_pd(di1, di1), rate); a2 = _mm_mul_pd(_mm_mul_pd(di2, di2), rate); // update times for next di1 = _mm_add_pd(di1, DFOUR); di2 = _mm_add_pd(di2, DFOUR); // reduce the angle to the range (-0.5, 0.5) a1 = _mm_sub_pd(a1, _mm_sub_pd(_mm_add_pd(a1, roundVal), roundVal)); a2 = _mm_sub_pd(a2, _mm_sub_pd(_mm_add_pd(a2, roundVal), roundVal)); // convert pair of packed double into packed single x = _mm_movelh_ps(_mm_cvtpd_ps(a1), _mm_cvtpd_ps(a2)); // 3 1 2 0 // square to the range [0, 0.25) y = _mm_mul_ps(x, x); // perform the initial polynomial approximations, Estrin's method z = _mm_mul_ps(y, y); s = _mm_mul_ps(_mm_add_ps(_mm_mul_ps(_mm_add_ps(_mm_mul_ps(y, SS4F), SS3F), z), _mm_add_ps(_mm_mul_ps(y, SS2F), SS1F)), x); c = _mm_add_ps(_mm_mul_ps(_mm_add_ps(_mm_mul_ps(y, CC3F), CC2F), z), _mm_add_ps(_mm_mul_ps(y, CC1F), ONE)); // perform first angle doubling x = _mm_sub_ps(_mm_mul_ps(c, c), _mm_mul_ps(s, s)); y = _mm_mul_ps(_mm_mul_ps(s, c), TWO); // calculate scaling factor to correct the magnitude m = _mm_sub_ps(_mm_sub_ps(TWO, _mm_mul_ps(x, x)), _mm_mul_ps(y, y)); // perform second angle doubling c = _mm_sub_ps(_mm_mul_ps(x, x), _mm_mul_ps(y, y)); s = _mm_mul_ps(_mm_mul_ps(y, x), TWO); // correct the magnitude (final sine / cosine approximations) c = _mm_mul_ps(c, m); // c3 c1 c2 c0 s = _mm_mul_ps(s, m); // chirp the data cd1 = _mm_moveldup_ps(c); // c1 c1 c0 c0 cd2 = _mm_movehdup_ps(c); // c3 c3 c2 c2 cd1 = _mm_mul_ps(cd1, d1); // c1.i1 c1.r1 c0.i0 c0.r0 cd2 = _mm_mul_ps(cd2, d2); // c3.i3 c3.r3 c2.i2 c2.r2 d1 = _mm_shuffle_ps(d1, d1, 0xb1); d2 = _mm_shuffle_ps(d2, d2, 0xb1); td1 = _mm_moveldup_ps(s); td2 = _mm_movehdup_ps(s); td1 = _mm_mul_ps(td1, d1); td2 = _mm_mul_ps(td2, d2); cd1 = _mm_addsub_ps(cd1, td1); cd2 = _mm_addsub_ps(cd2, td2); // store chirped values _mm_stream_ps(cd, cd1); _mm_stream_ps(cd+4, cd2); } // handle tail elements with scalar code for (; i < ul_NumDataPoints; ++i) { double angle = srate * i * i * 0.5; double s = sin(angle); double c = cos(angle); float re = cx_DataArray[i][0]; float im = cx_DataArray[i][1]; cx_ChirpDataArray[i][0] = re * c - im * s; cx_ChirpDataArray[i][1] = re * s + im * c; } analysis_state.FLOP_counter+=12.0*ul_NumDataPoints; #ifdef USE_MANUAL_CALLSTACK call_stack.exit(); #endif return 0; }
static void mlib_ImageCopyMask_Fp_d64( const mlib_d64 *src, mlib_s32 slb, const mlib_d64 *mask, mlib_s32 mlb, mlib_d64 *dst, mlib_s32 dlb, mlib_s32 xsize, mlib_s32 ysize, mlib_s32 nchan, const mlib_d64 *thresh) { mlib_s32 i, j, nsize; mlib_d64 thresh0, thresh1, thresh2, thresh3; __m128d threshs, threshu, threshv; nsize = xsize * nchan; switch (nchan) { case 1: thresh0 = thresh[0]; threshs = _mm_set1_pd(thresh0); if ((((mlib_addr)dst | dlb | (mlib_addr)src | slb | (mlib_addr)mask | mlb) & 0xf) == 0) { for (j = 0; j < ysize; j ++) { #ifdef __SUNPRO_C #pragma pipeloop(0) #endif /* __SUNPRO_C */ for (i = 0; i <= (nsize - 2); i += 2) { MLIB_S_COPYIMAGEMASK_D64( _mm_store_pd, _mm_load_pd, _mm_load_pd, _mm_load_pd); } for (; i < nsize; i ++) { if (mask[i] <= thresh0) { dst[i] = src[i]; } } src = (mlib_d64 *)((mlib_u8 *)src + slb); mask = (mlib_d64 *)((mlib_u8 *)mask + mlb); dst = (mlib_d64 *)((mlib_u8 *)dst + dlb); } } else { for (j = 0; j < ysize; j ++) { #ifdef __SUNPRO_C #pragma pipeloop(0) #endif /* __SUNPRO_C */ for (i = 0; i <= (nsize - 2); i += 2) { MLIB_S_COPYIMAGEMASK_D64( _mm_storeu_pd, _mm_loadu_pd, _mm_loadu_pd, _mm_loadu_pd); } for (; i < nsize; i ++) { if (mask[i] <= thresh0) { dst[i] = src[i]; } } src = (mlib_d64 *)((mlib_u8 *)src + slb); mask = (mlib_d64 *)((mlib_u8 *)mask + mlb); dst = (mlib_d64 *)((mlib_u8 *)dst + dlb); } } break; case 2: thresh0 = thresh[0]; thresh1 = thresh[1]; threshs = _mm_set_pd(thresh1, thresh0); if ((((mlib_addr)dst | dlb | (mlib_addr)src | slb | (mlib_addr)mask | mlb) & 0xf) == 0) { for (j = 0; j < ysize; j ++) { #ifdef __SUNPRO_C #pragma pipeloop(0) #endif /* __SUNPRO_C */ for (i = 0; i <= (nsize - 2); i += 2) { MLIB_S_COPYIMAGEMASK_D64( _mm_store_pd, _mm_load_pd, _mm_load_pd, _mm_load_pd); } for (; i < nsize; i += 2) { if (mask[i] <= thresh0) { dst[i] = src[i]; } if (mask[i + 1] <= thresh1) { dst[i + 1] = src[i + 1]; } } src = (mlib_d64 *)((mlib_u8 *)src + slb); mask = (mlib_d64 *)((mlib_u8 *)mask + mlb); dst = (mlib_d64 *)((mlib_u8 *)dst + dlb); } } else { for (j = 0; j < ysize; j ++) { #ifdef __SUNPRO_C #pragma pipeloop(0) #endif /* __SUNPRO_C */ for (i = 0; i <= (nsize - 2); i += 2) { MLIB_S_COPYIMAGEMASK_D64( _mm_storeu_pd, _mm_loadu_pd, _mm_loadu_pd, _mm_loadu_pd); } for (; i < nsize; i += 2) { if (mask[i] <= thresh0) { dst[i] = src[i]; } if (mask[i + 1] <= thresh1) { dst[i + 1] = src[i + 1]; } } src = (mlib_d64 *)((mlib_u8 *)src + slb); mask = (mlib_d64 *)((mlib_u8 *)mask + mlb); dst = (mlib_d64 *)((mlib_u8 *)dst + dlb); } } break; case 3: thresh0 = thresh[0]; thresh1 = thresh[1]; thresh2 = thresh[2]; threshs = _mm_set_pd(thresh1, thresh0); threshu = _mm_set_pd(thresh0, thresh2); threshv = _mm_set_pd(thresh2, thresh1); if ((((mlib_addr)dst | dlb | (mlib_addr)src | slb | (mlib_addr)mask | mlb) & 0xf) == 0) { for (j = 0; j < ysize; j ++) { #ifdef __SUNPRO_C #pragma pipeloop(0) #endif /* __SUNPRO_C */ for (i = 0; i <= (nsize - 6); i += 6) { MLIB_S_COPYIMAGEMASK3_D64( _mm_store_pd, _mm_load_pd, _mm_load_pd, _mm_load_pd); } for (; i < nsize; i += 3) { if (mask[i] <= thresh0) { dst[i] = src[i]; } if (mask[i + 1] <= thresh1) { dst[i + 1] = src[i + 1]; } if (mask[i + 2] <= thresh2) { dst[i + 2] = src[i + 2]; } } src = (mlib_d64 *)((mlib_u8 *)src + slb); mask = (mlib_d64 *)((mlib_u8 *)mask + mlb); dst = (mlib_d64 *)((mlib_u8 *)dst + dlb); } } else { for (j = 0; j < ysize; j ++) { #ifdef __SUNPRO_C #pragma pipeloop(0) #endif /* __SUNPRO_C */ for (i = 0; i <= (nsize - 6); i += 6) { MLIB_S_COPYIMAGEMASK3_D64( _mm_storeu_pd, _mm_loadu_pd, _mm_loadu_pd, _mm_loadu_pd); } for (; i < nsize; i += 3) { if (mask[i] <= thresh0) { dst[i] = src[i]; } if (mask[i + 1] <= thresh1) { dst[i + 1] = src[i + 1]; } if (mask[i + 2] <= thresh2) { dst[i + 2] = src[i + 2]; } } src = (mlib_d64 *)((mlib_u8 *)src + slb); mask = (mlib_d64 *)((mlib_u8 *)mask + mlb); dst = (mlib_d64 *)((mlib_u8 *)dst + dlb); } } break; case 4: thresh0 = thresh[0]; thresh1 = thresh[1]; thresh2 = thresh[2]; thresh3 = thresh[3]; threshs = _mm_set_pd(thresh1, thresh0); threshu = _mm_set_pd(thresh3, thresh2); if ((((mlib_addr)dst | dlb | (mlib_addr)src | slb | (mlib_addr)mask | mlb) & 0xf) == 0) { for (j = 0; j < ysize; j ++) { #ifdef __SUNPRO_C #pragma pipeloop(0) #endif /* __SUNPRO_C */ for (i = 0; i <= (nsize - 4); i += 4) { MLIB_S_COPYIMAGEMASK4_D64( _mm_store_pd, _mm_load_pd, _mm_load_pd, _mm_load_pd); } for (; i < nsize; i += 4) { if (mask[i] <= thresh0) { dst[i] = src[i]; } if (mask[i + 1] <= thresh1) { dst[i + 1] = src[i + 1]; } if (mask[i + 2] <= thresh2) { dst[i + 1] = src[i + 2]; } if (mask[i + 3] <= thresh3) { dst[i + 1] = src[i + 3]; } } src = (mlib_d64 *)((mlib_u8 *)src + slb); mask = (mlib_d64 *)((mlib_u8 *)mask + mlb); dst = (mlib_d64 *)((mlib_u8 *)dst + dlb); } } else { for (j = 0; j < ysize; j ++) { #ifdef __SUNPRO_C #pragma pipeloop(0) #endif /* __SUNPRO_C */ for (i = 0; i <= (nsize - 4); i += 4) { MLIB_S_COPYIMAGEMASK4_D64( _mm_storeu_pd, _mm_loadu_pd, _mm_loadu_pd, _mm_loadu_pd); } for (; i < nsize; i += 4) { if (mask[i] <= thresh0) { dst[i] = src[i]; } if (mask[i + 1] <= thresh1) { dst[i + 1] = src[i + 1]; } if (mask[i + 2] <= thresh2) { dst[i + 1] = src[i + 2]; } if (mask[i + 3] <= thresh3) { dst[i + 1] = src[i + 3]; } } src = (mlib_d64 *)((mlib_u8 *)src + slb); mask = (mlib_d64 *)((mlib_u8 *)mask + mlb); dst = (mlib_d64 *)((mlib_u8 *)dst + dlb); } } break; } }
void exchlaplacecoeffData_2(unsigned int slot) { for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((!neighbor_isValid[1][0])) { { double xPos; double yPos; /* Statements in this Scop: S710, S704, S707, S701, S709, S700, S703, S706, S708, S702, S705 */ { { { { { { { { { { { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i1 = 1; for (; (i1<=4); i1 += 2) { fieldData_LaplaceCoeff_2_p1[((i1*8)+394)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[((i1*8)+402)] = 0.000000e+00; } for (; (i1<=5); i1 += 1) { fieldData_LaplaceCoeff_2_p1[((i1*8)+394)] = 0.000000e+00; } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i1 = 1; for (; (i1<=4); i1 += 2) { fieldData_LaplaceCoeff_2_p1[((i1*8)+226)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[((i1*8)+234)] = 0.000000e+00; } for (; (i1<=5); i1 += 1) { fieldData_LaplaceCoeff_2_p1[((i1*8)+226)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i1 = 1; for (; (i1<=4); i1 += 2) { fieldData_LaplaceCoeff_2_p1[((i1*8)+170)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[((i1*8)+178)] = 0.000000e+00; } for (; (i1<=5); i1 += 1) { fieldData_LaplaceCoeff_2_p1[((i1*8)+170)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i1 = 1; for (; (i1<=4); i1 += 2) { fieldData_LaplaceCoeff_2_p1[((i1*8)+58)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[((i1*8)+66)] = 0.000000e+00; } for (; (i1<=5); i1 += 1) { fieldData_LaplaceCoeff_2_p1[((i1*8)+58)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i1 = 1; for (; (i1<=4); i1 += 2) { fieldData_LaplaceCoeff_2_p1[((i1*8)+450)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[((i1*8)+458)] = 0.000000e+00; } for (; (i1<=5); i1 += 1) { fieldData_LaplaceCoeff_2_p1[((i1*8)+450)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i1 = 1; for (; (i1<=4); i1 += 2) { fieldData_LaplaceCoeff_2_p1[((i1*8)+114)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[((i1*8)+122)] = 0.000000e+00; } for (; (i1<=5); i1 += 1) { fieldData_LaplaceCoeff_2_p1[((i1*8)+114)] = 0.000000e+00; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { yPos = ((((i1-1)/4.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); } __m128d vec1 = _mm_set1_pd(1.000000e+00); __m128d vec2 = _mm_set1_pd(4.000000e+00); __m128d vec5 = _mm_set1_pd(yPos); for (; (i1<3); i1 += 4) { /* yPos = ((((i1-1)/4.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); */ __m128d vec0 = _mm_set_pd(i1+1,i1); __m128d vec0_2 = _mm_set_pd(i1+1,i1); __m128d vec3 = _mm_load1_pd((&posEnd[1])); __m128d vec3_2 = _mm_load1_pd((&posEnd[1])); __m128d vec4 = _mm_load1_pd((&posBegin[1])); __m128d vec4_2 = _mm_load1_pd((&posBegin[1])); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2); } for (; (i1<6); i1 += 1) { yPos = ((((i1-1)/4.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i1 = 1; for (; (i1<=4); i1 += 2) { fieldData_LaplaceCoeff_2_p1[((i1*8)+2)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[((i1*8)+10)] = 0.000000e+00; } for (; (i1<=5); i1 += 1) { fieldData_LaplaceCoeff_2_p1[((i1*8)+2)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i1 = 1; for (; (i1<=4); i1 += 2) { fieldData_LaplaceCoeff_2_p1[((i1*8)+338)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[((i1*8)+346)] = 0.000000e+00; } for (; (i1<=5); i1 += 1) { fieldData_LaplaceCoeff_2_p1[((i1*8)+338)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i1 = 1; for (; (i1<=4); i1 += 2) { fieldData_LaplaceCoeff_2_p1[((i1*8)+282)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[((i1*8)+290)] = 0.000000e+00; } for (; (i1<=5); i1 += 1) { fieldData_LaplaceCoeff_2_p1[((i1*8)+282)] = 0.000000e+00; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { xPos = posBegin[0]; } __m128d vec1 = _mm_set1_pd(xPos); for (; (i1<3); i1 += 4) { /* xPos = posBegin[0]; */ __m128d vec0 = _mm_load1_pd((&posBegin[0])); __m128d vec0_2 = _mm_load1_pd((&posBegin[0])); vec1 = vec0; vec1 = vec0_2; } for (; (i1<6); i1 += 1) { xPos = posBegin[0]; } } } } } if ((!neighbor_isValid[1][1])) { { double xPos; double yPos; /* Statements in this Scop: S716, S719, S713, S721, S715, S718, S712, S711, S720, S714, S717 */ { { { { { { { { { { { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { yPos = ((((i1-1)/4.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); } __m128d vec1 = _mm_set1_pd(1.000000e+00); __m128d vec2 = _mm_set1_pd(4.000000e+00); __m128d vec5 = _mm_set1_pd(yPos); for (; (i1<3); i1 += 4) { /* yPos = ((((i1-1)/4.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); */ __m128d vec0 = _mm_set_pd(i1+1,i1); __m128d vec0_2 = _mm_set_pd(i1+1,i1); __m128d vec3 = _mm_load1_pd((&posEnd[1])); __m128d vec3_2 = _mm_load1_pd((&posEnd[1])); __m128d vec4 = _mm_load1_pd((&posBegin[1])); __m128d vec4_2 = _mm_load1_pd((&posBegin[1])); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2); } for (; (i1<6); i1 += 1) { yPos = ((((i1-1)/4.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i1 = 1; for (; (i1<=4); i1 += 2) { fieldData_LaplaceCoeff_2_p1[((i1*8)+454)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[((i1*8)+462)] = 0.000000e+00; } for (; (i1<=5); i1 += 1) { fieldData_LaplaceCoeff_2_p1[((i1*8)+454)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i1 = 1; for (; (i1<=4); i1 += 2) { fieldData_LaplaceCoeff_2_p1[((i1*8)+230)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[((i1*8)+238)] = 0.000000e+00; } for (; (i1<=5); i1 += 1) { fieldData_LaplaceCoeff_2_p1[((i1*8)+230)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i1 = 1; for (; (i1<=4); i1 += 2) { fieldData_LaplaceCoeff_2_p1[((i1*8)+118)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[((i1*8)+126)] = 0.000000e+00; } for (; (i1<=5); i1 += 1) { fieldData_LaplaceCoeff_2_p1[((i1*8)+118)] = 0.000000e+00; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { xPos = posEnd[0]; } __m128d vec1 = _mm_set1_pd(xPos); for (; (i1<3); i1 += 4) { /* xPos = posEnd[0]; */ __m128d vec0 = _mm_load1_pd((&posEnd[0])); __m128d vec0_2 = _mm_load1_pd((&posEnd[0])); vec1 = vec0; vec1 = vec0_2; } for (; (i1<6); i1 += 1) { xPos = posEnd[0]; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i1 = 1; for (; (i1<=4); i1 += 2) { fieldData_LaplaceCoeff_2_p1[((i1*8)+286)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[((i1*8)+294)] = 0.000000e+00; } for (; (i1<=5); i1 += 1) { fieldData_LaplaceCoeff_2_p1[((i1*8)+286)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i1 = 1; for (; (i1<=4); i1 += 2) { fieldData_LaplaceCoeff_2_p1[((i1*8)+342)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[((i1*8)+350)] = 0.000000e+00; } for (; (i1<=5); i1 += 1) { fieldData_LaplaceCoeff_2_p1[((i1*8)+342)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i1 = 1; for (; (i1<=4); i1 += 2) { fieldData_LaplaceCoeff_2_p1[((i1*8)+398)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[((i1*8)+406)] = 0.000000e+00; } for (; (i1<=5); i1 += 1) { fieldData_LaplaceCoeff_2_p1[((i1*8)+398)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i1 = 1; for (; (i1<=4); i1 += 2) { fieldData_LaplaceCoeff_2_p1[((i1*8)+174)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[((i1*8)+182)] = 0.000000e+00; } for (; (i1<=5); i1 += 1) { fieldData_LaplaceCoeff_2_p1[((i1*8)+174)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i1 = 1; for (; (i1<=4); i1 += 2) { fieldData_LaplaceCoeff_2_p1[((i1*8)+62)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[((i1*8)+70)] = 0.000000e+00; } for (; (i1<=5); i1 += 1) { fieldData_LaplaceCoeff_2_p1[((i1*8)+62)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i1 = 1; for (; (i1<=4); i1 += 2) { fieldData_LaplaceCoeff_2_p1[((i1*8)+6)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[((i1*8)+14)] = 0.000000e+00; } for (; (i1<=5); i1 += 1) { fieldData_LaplaceCoeff_2_p1[((i1*8)+6)] = 0.000000e+00; } } } } } if ((!neighbor_isValid[1][2])) { { double xPos; double yPos; /* Statements in this Scop: S722, S731, S725, S728, S727, S730, S724, S732, S726, S729, S723 */ { { { { { { { { { { { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i2 = 2; for (; (i2<=5); i2 += 2) { fieldData_LaplaceCoeff_2_p1[(i2+344)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[(i2+345)] = 0.000000e+00; } for (; (i2<=6); i2 += 1) { fieldData_LaplaceCoeff_2_p1[(i2+344)] = 0.000000e+00; } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i2 = 2; for (; (i2<=5); i2 += 2) { fieldData_LaplaceCoeff_2_p1[(i2+400)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[(i2+401)] = 0.000000e+00; } for (; (i2<=6); i2 += 1) { fieldData_LaplaceCoeff_2_p1[(i2+400)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i2 = 2; for (; (i2<=5); i2 += 2) { fieldData_LaplaceCoeff_2_p1[(i2+120)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[(i2+121)] = 0.000000e+00; } for (; (i2<=6); i2 += 1) { fieldData_LaplaceCoeff_2_p1[(i2+120)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i2 = 2; for (; (i2<=5); i2 += 2) { fieldData_LaplaceCoeff_2_p1[(i2+8)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[(i2+9)] = 0.000000e+00; } for (; (i2<=6); i2 += 1) { fieldData_LaplaceCoeff_2_p1[(i2+8)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i2 = 2; for (; (i2<=5); i2 += 2) { fieldData_LaplaceCoeff_2_p1[(i2+64)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[(i2+65)] = 0.000000e+00; } for (; (i2<=6); i2 += 1) { fieldData_LaplaceCoeff_2_p1[(i2+64)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i2 = 2; for (; (i2<=5); i2 += 2) { fieldData_LaplaceCoeff_2_p1[(i2+456)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[(i2+457)] = 0.000000e+00; } for (; (i2<=6); i2 += 1) { fieldData_LaplaceCoeff_2_p1[(i2+456)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i2 = 2; for (; (i2<=5); i2 += 2) { fieldData_LaplaceCoeff_2_p1[(i2+232)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[(i2+233)] = 0.000000e+00; } for (; (i2<=6); i2 += 1) { fieldData_LaplaceCoeff_2_p1[(i2+232)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i2 = 2; for (; (i2<=5); i2 += 2) { fieldData_LaplaceCoeff_2_p1[(i2+288)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[(i2+289)] = 0.000000e+00; } for (; (i2<=6); i2 += 1) { fieldData_LaplaceCoeff_2_p1[(i2+288)] = 0.000000e+00; } } } { int i2 = 2; for (; (i2<=5); i2 += 2) { xPos = ((((i2-2)/4.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); xPos = ((((i2-1)/4.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); } for (; (i2<=6); i2 += 1) { xPos = ((((i2-2)/4.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i2 = 2; for (; (i2<=5); i2 += 2) { fieldData_LaplaceCoeff_2_p1[(i2+176)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[(i2+177)] = 0.000000e+00; } for (; (i2<=6); i2 += 1) { fieldData_LaplaceCoeff_2_p1[(i2+176)] = 0.000000e+00; } } } { int i2 = 2; for (; (i2<=5); i2 += 2) { yPos = posBegin[1]; yPos = posBegin[1]; } for (; (i2<=6); i2 += 1) { yPos = posBegin[1]; } } } } } if ((!neighbor_isValid[1][3])) { { double xPos; double yPos; /* Statements in this Scop: S743, S737, S733, S742, S736, S739, S738, S741, S735, S740, S734 */ { { { { { { { { { { { int i2 = 2; for (; (i2<=5); i2 += 2) { yPos = posEnd[1]; yPos = posEnd[1]; } for (; (i2<=6); i2 += 1) { yPos = posEnd[1]; } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i2 = 2; for (; (i2<=5); i2 += 2) { fieldData_LaplaceCoeff_2_p1[(i2+376)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[(i2+377)] = 0.000000e+00; } for (; (i2<=6); i2 += 1) { fieldData_LaplaceCoeff_2_p1[(i2+376)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i2 = 2; for (; (i2<=5); i2 += 2) { fieldData_LaplaceCoeff_2_p1[(i2+488)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[(i2+489)] = 0.000000e+00; } for (; (i2<=6); i2 += 1) { fieldData_LaplaceCoeff_2_p1[(i2+488)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i2 = 2; for (; (i2<=5); i2 += 2) { fieldData_LaplaceCoeff_2_p1[(i2+40)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[(i2+41)] = 0.000000e+00; } for (; (i2<=6); i2 += 1) { fieldData_LaplaceCoeff_2_p1[(i2+40)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i2 = 2; for (; (i2<=5); i2 += 2) { fieldData_LaplaceCoeff_2_p1[(i2+208)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[(i2+209)] = 0.000000e+00; } for (; (i2<=6); i2 += 1) { fieldData_LaplaceCoeff_2_p1[(i2+208)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i2 = 2; for (; (i2<=5); i2 += 2) { fieldData_LaplaceCoeff_2_p1[(i2+152)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[(i2+153)] = 0.000000e+00; } for (; (i2<=6); i2 += 1) { fieldData_LaplaceCoeff_2_p1[(i2+152)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i2 = 2; for (; (i2<=5); i2 += 2) { fieldData_LaplaceCoeff_2_p1[(i2+320)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[(i2+321)] = 0.000000e+00; } for (; (i2<=6); i2 += 1) { fieldData_LaplaceCoeff_2_p1[(i2+320)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i2 = 2; for (; (i2<=5); i2 += 2) { fieldData_LaplaceCoeff_2_p1[(i2+432)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[(i2+433)] = 0.000000e+00; } for (; (i2<=6); i2 += 1) { fieldData_LaplaceCoeff_2_p1[(i2+432)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i2 = 2; for (; (i2<=5); i2 += 2) { fieldData_LaplaceCoeff_2_p1[(i2+96)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[(i2+97)] = 0.000000e+00; } for (; (i2<=6); i2 += 1) { fieldData_LaplaceCoeff_2_p1[(i2+96)] = 0.000000e+00; } } } { int i2 = 2; for (; (i2<=5); i2 += 2) { xPos = ((((i2-2)/4.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); xPos = ((((i2-1)/4.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); } for (; (i2<=6); i2 += 1) { xPos = ((((i2-2)/4.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); } } } { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][0]); int i2 = 2; for (; (i2<=5); i2 += 2) { fieldData_LaplaceCoeff_2_p1[(i2+264)] = 0.000000e+00; fieldData_LaplaceCoeff_2_p1[(i2+265)] = 0.000000e+00; } for (; (i2<=6); i2 += 1) { fieldData_LaplaceCoeff_2_p1[(i2+264)] = 0.000000e+00; } } } } } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) { /* Statements in this Scop: S744 */ for (int i0 = 0; (i0<=8); i0 += 1) { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][(i0*56)]); double* buffer_Send_1_p1 = (&buffer_Send[1][(i0*5)]); int i1 = 1; for (; (i1<=4); i1 += 2) { buffer_Send_1_p1[(i1-1)] = fieldData_LaplaceCoeff_2_p1[((i1*8)+6)]; buffer_Send_1_p1[i1] = fieldData_LaplaceCoeff_2_p1[((i1*8)+14)]; } for (; (i1<=5); i1 += 1) { buffer_Send_1_p1[(i1-1)] = fieldData_LaplaceCoeff_2_p1[((i1*8)+6)]; } } } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) { MPI_Isend(buffer_Send[1], 45, MPI_DOUBLE, neighbor_remoteRank[1][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]); reqOutstanding_Send[1] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) { MPI_Irecv(buffer_Recv[0], 45, MPI_DOUBLE, neighbor_remoteRank[1][0], ((unsigned int)(neighbor_fragCommId[1][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]); reqOutstanding_Recv[0] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Recv[0]) { waitForMPIReq(&mpiRequest_Recv[0]); reqOutstanding_Recv[0] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) { /* Statements in this Scop: S745 */ for (int i0 = 0; (i0<=8); i0 += 1) { double* buffer_Recv_0_p1 = (&buffer_Recv[0][(i0*5)]); double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][(i0*56)]); int i1 = 3; for (; (i1<=6); i1 += 2) { fieldData_LaplaceCoeff_2_p1[((i1*8)-14)] = buffer_Recv_0_p1[(i1-3)]; fieldData_LaplaceCoeff_2_p1[((i1*8)-6)] = buffer_Recv_0_p1[(i1-2)]; } for (; (i1<=7); i1 += 1) { fieldData_LaplaceCoeff_2_p1[((i1*8)-14)] = buffer_Recv_0_p1[(i1-3)]; } } } } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Send[1]) { waitForMPIReq(&mpiRequest_Send[1]); reqOutstanding_Send[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { ; } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][3]&&neighbor_isRemote[1][3])) { MPI_Isend(&fieldData_LaplaceCoeff[2][42], 1, mpiDatatype_9_5_56, neighbor_remoteRank[1][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]); reqOutstanding_Send[3] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][2]&&neighbor_isRemote[1][2])) { MPI_Irecv(&fieldData_LaplaceCoeff[2][10], 1, mpiDatatype_9_5_56, neighbor_remoteRank[1][2], ((unsigned int)(neighbor_fragCommId[1][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]); reqOutstanding_Recv[2] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Recv[2]) { waitForMPIReq(&mpiRequest_Recv[2]); reqOutstanding_Recv[2] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { ; } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Send[3]) { waitForMPIReq(&mpiRequest_Send[3]); reqOutstanding_Send[3] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) { /* Statements in this Scop: S746 */ for (int i0 = 0; (i0<=8); i0 += 1) { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][(i0*56)]); double* buffer_Send_0_p1 = (&buffer_Send[0][(i0*7)]); int i1 = 0; for (; (i1<=5); i1 += 2) { buffer_Send_0_p1[i1] = fieldData_LaplaceCoeff_2_p1[((i1*8)+3)]; buffer_Send_0_p1[(i1+1)] = fieldData_LaplaceCoeff_2_p1[((i1*8)+11)]; } for (; (i1<=6); i1 += 1) { buffer_Send_0_p1[i1] = fieldData_LaplaceCoeff_2_p1[((i1*8)+3)]; } } } if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) { /* Statements in this Scop: S747 */ for (int i0 = 0; (i0<=8); i0 += 1) { double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][(i0*56)]); double* buffer_Send_1_p1 = (&buffer_Send[1][(i0*7)]); int i1 = 0; for (; (i1<=5); i1 += 2) { buffer_Send_1_p1[i1] = fieldData_LaplaceCoeff_2_p1[((i1*8)+5)]; buffer_Send_1_p1[(i1+1)] = fieldData_LaplaceCoeff_2_p1[((i1*8)+13)]; } for (; (i1<=6); i1 += 1) { buffer_Send_1_p1[i1] = fieldData_LaplaceCoeff_2_p1[((i1*8)+5)]; } } } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) { MPI_Isend(buffer_Send[0], 63, MPI_DOUBLE, neighbor_remoteRank[1][0], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][0]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[0]); reqOutstanding_Send[0] = true; } if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) { MPI_Isend(buffer_Send[1], 63, MPI_DOUBLE, neighbor_remoteRank[1][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]); reqOutstanding_Send[1] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) { MPI_Irecv(buffer_Recv[0], 63, MPI_DOUBLE, neighbor_remoteRank[1][0], ((unsigned int)(neighbor_fragCommId[1][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]); reqOutstanding_Recv[0] = true; } if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) { MPI_Irecv(buffer_Recv[1], 63, MPI_DOUBLE, neighbor_remoteRank[1][1], ((unsigned int)(neighbor_fragCommId[1][1]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[1]); reqOutstanding_Recv[1] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Recv[0]) { waitForMPIReq(&mpiRequest_Recv[0]); reqOutstanding_Recv[0] = false; } if (reqOutstanding_Recv[1]) { waitForMPIReq(&mpiRequest_Recv[1]); reqOutstanding_Recv[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) { /* Statements in this Scop: S748 */ for (int i0 = 0; (i0<=8); i0 += 1) { double* buffer_Recv_0_p1 = (&buffer_Recv[0][(i0*7)]); double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][(i0*56)]); int i1 = 1; for (; (i1<=6); i1 += 2) { fieldData_LaplaceCoeff_2_p1[((i1*8)-7)] = buffer_Recv_0_p1[(i1-1)]; fieldData_LaplaceCoeff_2_p1[((i1*8)+1)] = buffer_Recv_0_p1[i1]; } for (; (i1<=7); i1 += 1) { fieldData_LaplaceCoeff_2_p1[((i1*8)-7)] = buffer_Recv_0_p1[(i1-1)]; } } } if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) { /* Statements in this Scop: S749 */ for (int i0 = 0; (i0<=8); i0 += 1) { double* buffer_Recv_1_p1 = (&buffer_Recv[1][(i0*7)]); double* fieldData_LaplaceCoeff_2_p1 = (&fieldData_LaplaceCoeff[2][(i0*56)]); int i1 = 7; for (; (i1<=12); i1 += 2) { fieldData_LaplaceCoeff_2_p1[((i1*8)-49)] = buffer_Recv_1_p1[(i1-7)]; fieldData_LaplaceCoeff_2_p1[((i1*8)-41)] = buffer_Recv_1_p1[(i1-6)]; } for (; (i1<=13); i1 += 1) { fieldData_LaplaceCoeff_2_p1[((i1*8)-49)] = buffer_Recv_1_p1[(i1-7)]; } } } } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Send[0]) { waitForMPIReq(&mpiRequest_Send[0]); reqOutstanding_Send[0] = false; } if (reqOutstanding_Send[1]) { waitForMPIReq(&mpiRequest_Send[1]); reqOutstanding_Send[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { ; ; } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][2]&&neighbor_isRemote[1][2])) { MPI_Isend(&fieldData_LaplaceCoeff[2][17], 1, mpiDatatype_9_7_56, neighbor_remoteRank[1][2], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][2]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[2]); reqOutstanding_Send[2] = true; } if ((neighbor_isValid[1][3]&&neighbor_isRemote[1][3])) { MPI_Isend(&fieldData_LaplaceCoeff[2][33], 1, mpiDatatype_9_7_56, neighbor_remoteRank[1][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]); reqOutstanding_Send[3] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][2]&&neighbor_isRemote[1][2])) { MPI_Irecv(&fieldData_LaplaceCoeff[2][1], 1, mpiDatatype_9_7_56, neighbor_remoteRank[1][2], ((unsigned int)(neighbor_fragCommId[1][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]); reqOutstanding_Recv[2] = true; } if ((neighbor_isValid[1][3]&&neighbor_isRemote[1][3])) { MPI_Irecv(&fieldData_LaplaceCoeff[2][49], 1, mpiDatatype_9_7_56, neighbor_remoteRank[1][3], ((unsigned int)(neighbor_fragCommId[1][3]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[3]); reqOutstanding_Recv[3] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Recv[2]) { waitForMPIReq(&mpiRequest_Recv[2]); reqOutstanding_Recv[2] = false; } if (reqOutstanding_Recv[3]) { waitForMPIReq(&mpiRequest_Recv[3]); reqOutstanding_Recv[3] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { ; ; } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Send[2]) { waitForMPIReq(&mpiRequest_Send[2]); reqOutstanding_Send[2] = false; } if (reqOutstanding_Send[3]) { waitForMPIReq(&mpiRequest_Send[3]); reqOutstanding_Send[3] = false; } } } }
void mlib_s_ImageScalarBlend_s32( mlib_s32 *dst, mlib_s32 dlb, const mlib_s32 *src1, mlib_s32 slb1, const mlib_s32 *src2, mlib_s32 slb2, const mlib_s32 *alpha, mlib_s32 xsize, mlib_s32 ysize, mlib_s32 nchan) { mlib_s32 i, j, nsize; __m128i *srcPtr1, *srcPtr2, *dstPtr; mlib_s32 *dl = dst; mlib_s32 *sl1 = (mlib_s32 *)src1, *sl2 = (mlib_s32 *)src2; __m128d alphas0, alphas1, alphau0, alphau1, alphav0, alphav1; __m128d betas0, betas1, betau0, betau1, betav0, betav1; __m128d ones = _mm_set1_pd(1.0f); mlib_s32 res, sdata1, sdata2; mlib_d64 a0, a1, a2, a3; nsize = xsize * nchan; switch (nchan) { case 1: a0 = -(alpha[0] & MASK) / (mlib_d64)MLIB_S32_MIN; alphas0 = _mm_set1_pd(a0); alphas1 = _mm_set1_pd(a0); betas0 = _mm_sub_pd(ones, alphas0); betas1 = _mm_sub_pd(ones, alphas1); if ((((mlib_addr)dst | dlb | (mlib_addr)src1 | slb1 | (mlib_addr)src2 | slb2) & 0xf) == 0) { for (j = 0; j < ysize; j ++) { srcPtr1 = (__m128i *)sl1; srcPtr2 = (__m128i *)sl2; dstPtr = (__m128i *)dl; #ifdef __SUNPRO_C #pragma pipeloop(0) #endif /* __SUNPRO_C */ for (i = 0; i <= (nsize - 4); i += 4) { MLIB_S_IMAGESCALARBLEND_S32( _mm_store_si128, _mm_load_si128, _mm_load_si128); } for (; i < nsize; i++) { MLIB_C_IMAGESCALARBLEND_S32_1( sl1, sl2, dl); } sl1 = (mlib_s32 *)((mlib_u8 *)sl1 + slb1); sl2 = (mlib_s32 *)((mlib_u8 *)sl2 + slb2); dl = (mlib_s32 *)((mlib_u8 *)dl + dlb); } } else if ((((mlib_addr)src1 | slb1 | (mlib_addr)src2 | slb2) & 0xf) == 0) { for (j = 0; j < ysize; j ++) { srcPtr1 = (__m128i *)sl1; srcPtr2 = (__m128i *)sl2; dstPtr = (__m128i *)dl; #ifdef __SUNPRO_C #pragma pipeloop(0) #endif /* __SUNPRO_C */ for (i = 0; i <= (nsize - 4); i += 4) { MLIB_S_IMAGESCALARBLEND_S32( _mm_storeu_si128, _mm_load_si128, _mm_load_si128); } for (; i < nsize; i++) { MLIB_C_IMAGESCALARBLEND_S32_1( sl1, sl2, dl); } sl1 = (mlib_s32 *)((mlib_u8 *)sl1 + slb1); sl2 = (mlib_s32 *)((mlib_u8 *)sl2 + slb2); dl = (mlib_s32 *)((mlib_u8 *)dl + dlb); } } else { for (j = 0; j < ysize; j ++) { srcPtr1 = (__m128i *)sl1; srcPtr2 = (__m128i *)sl2; dstPtr = (__m128i *)dl; #ifdef __SUNPRO_C #pragma pipeloop(0) #endif /* __SUNPRO_C */ for (i = 0; i <= (nsize - 4); i += 4) { MLIB_S_IMAGESCALARBLEND_S32( _mm_storeu_si128, _mm_loadu_si128, _mm_loadu_si128); } for (; i < nsize; i++) { MLIB_C_IMAGESCALARBLEND_S32_1( sl1, sl2, dl); } sl1 = (mlib_s32 *)((mlib_u8 *)sl1 + slb1); sl2 = (mlib_s32 *)((mlib_u8 *)sl2 + slb2); dl = (mlib_s32 *)((mlib_u8 *)dl + dlb); } } break; case 2: a0 = -(alpha[0] & MASK) / (mlib_d64)MLIB_S32_MIN; a1 = -(alpha[1] & MASK) / (mlib_d64)MLIB_S32_MIN; alphas0 = _mm_set_pd(a1, a0); alphas1 = _mm_set_pd(a1, a0); betas0 = _mm_sub_pd(ones, alphas0); betas1 = _mm_sub_pd(ones, alphas1); if ((((mlib_addr)dst | dlb | (mlib_addr)src1 | slb1 | (mlib_addr)src2 | slb2) & 0xf) == 0) { for (j = 0; j < ysize; j ++) { srcPtr1 = (__m128i *)sl1; srcPtr2 = (__m128i *)sl2; dstPtr = (__m128i *)dl; #ifdef __SUNPRO_C #pragma pipeloop(0) #endif /* __SUNPRO_C */ for (i = 0; i <= (nsize - 4); i += 4) { MLIB_S_IMAGESCALARBLEND_S32( _mm_store_si128, _mm_load_si128, _mm_load_si128); } for (; i < nsize; i += 2) { MLIB_C_IMAGESCALARBLEND_S32_2( sl1, sl2, dl); } sl1 = (mlib_s32 *)((mlib_u8 *)sl1 + slb1); sl2 = (mlib_s32 *)((mlib_u8 *)sl2 + slb2); dl = (mlib_s32 *)((mlib_u8 *)dl + dlb); } } else if ((((mlib_addr)src1 | slb1 | (mlib_addr)src2 | slb2) & 0xf) == 0) { for (j = 0; j < ysize; j ++) { srcPtr1 = (__m128i *)sl1; srcPtr2 = (__m128i *)sl2; dstPtr = (__m128i *)dl; #ifdef __SUNPRO_C #pragma pipeloop(0) #endif /* __SUNPRO_C */ for (i = 0; i <= (nsize - 4); i += 4) { MLIB_S_IMAGESCALARBLEND_S32( _mm_storeu_si128, _mm_load_si128, _mm_load_si128); } for (; i < nsize; i += 2) { MLIB_C_IMAGESCALARBLEND_S32_2( sl1, sl2, dl); } sl1 = (mlib_s32 *)((mlib_u8 *)sl1 + slb1); sl2 = (mlib_s32 *)((mlib_u8 *)sl2 + slb2); dl = (mlib_s32 *)((mlib_u8 *)dl + dlb); } } else { for (j = 0; j < ysize; j ++) { srcPtr1 = (__m128i *)sl1; srcPtr2 = (__m128i *)sl2; dstPtr = (__m128i *)dl; #ifdef __SUNPRO_C #pragma pipeloop(0) #endif /* __SUNPRO_C */ for (i = 0; i <= (nsize - 4); i += 4) { MLIB_S_IMAGESCALARBLEND_S32( _mm_storeu_si128, _mm_loadu_si128, _mm_loadu_si128); } for (; i < nsize; i += 2) { MLIB_C_IMAGESCALARBLEND_S32_2( sl1, sl2, dl); } sl1 = (mlib_s32 *)((mlib_u8 *)sl1 + slb1); sl2 = (mlib_s32 *)((mlib_u8 *)sl2 + slb2); dl = (mlib_s32 *)((mlib_u8 *)dl + dlb); } } break; case 3: a0 = -(alpha[0] & MASK) / (mlib_d64)MLIB_S32_MIN; a1 = -(alpha[1] & MASK) / (mlib_d64)MLIB_S32_MIN; a2 = -(alpha[2] & MASK) / (mlib_d64)MLIB_S32_MIN; alphas0 = _mm_set_pd(a1, a0); alphas1 = _mm_set_pd(a0, a2); alphau0 = _mm_set_pd(a2, a1); alphau1 = _mm_set_pd(a1, a0); alphav0 = _mm_set_pd(a0, a2); alphav1 = _mm_set_pd(a2, a1); betas0 = _mm_sub_pd(ones, alphas0); betas1 = _mm_sub_pd(ones, alphas1); betau0 = _mm_sub_pd(ones, alphau0); betau1 = _mm_sub_pd(ones, alphau1); betav0 = _mm_sub_pd(ones, alphav0); betav1 = _mm_sub_pd(ones, alphav1); if ((((mlib_addr)dst | dlb | (mlib_addr)src1 | slb1 | (mlib_addr)src2 | slb2) & 0xf) == 0) { for (j = 0; j < ysize; j ++) { srcPtr1 = (__m128i *)sl1; srcPtr2 = (__m128i *)sl2; dstPtr = (__m128i *)dl; #ifdef __SUNPRO_C #pragma pipeloop(0) #endif /* __SUNPRO_C */ for (i = 0; i <= (nsize - 12); i += 12) { MLIB_S_IMAGESCALARBLEND3_S32( _mm_store_si128, _mm_load_si128, _mm_load_si128); } for (; i < nsize; i += 3) { MLIB_C_IMAGESCALARBLEND_S32_3( sl1, sl2, dl); } sl1 = (mlib_s32 *)((mlib_u8 *)sl1 + slb1); sl2 = (mlib_s32 *)((mlib_u8 *)sl2 + slb2); dl = (mlib_s32 *)((mlib_u8 *)dl + dlb); } } else if ((((mlib_addr)src1 | slb1 | (mlib_addr)src2 | slb2) & 0xf) == 0) { for (j = 0; j < ysize; j ++) { srcPtr1 = (__m128i *)sl1; srcPtr2 = (__m128i *)sl2; dstPtr = (__m128i *)dl; #ifdef __SUNPRO_C #pragma pipeloop(0) #endif /* __SUNPRO_C */ for (i = 0; i <= (nsize - 12); i += 12) { MLIB_S_IMAGESCALARBLEND3_S32( _mm_storeu_si128, _mm_load_si128, _mm_load_si128); } for (; i < nsize; i += 3) { MLIB_C_IMAGESCALARBLEND_S32_3( sl1, sl2, dl); } sl1 = (mlib_s32 *)((mlib_u8 *)sl1 + slb1); sl2 = (mlib_s32 *)((mlib_u8 *)sl2 + slb2); dl = (mlib_s32 *)((mlib_u8 *)dl + dlb); } } else { for (j = 0; j < ysize; j ++) { srcPtr1 = (__m128i *)sl1; srcPtr2 = (__m128i *)sl2; dstPtr = (__m128i *)dl; #ifdef __SUNPRO_C #pragma pipeloop(0) #endif /* __SUNPRO_C */ for (i = 0; i <= (nsize - 12); i += 12) { MLIB_S_IMAGESCALARBLEND3_S32( _mm_storeu_si128, _mm_loadu_si128, _mm_loadu_si128); } for (; i < nsize; i += 3) { MLIB_C_IMAGESCALARBLEND_S32_3( sl1, sl2, dl); } sl1 = (mlib_s32 *)((mlib_u8 *)sl1 + slb1); sl2 = (mlib_s32 *)((mlib_u8 *)sl2 + slb2); dl = (mlib_s32 *)((mlib_u8 *)dl + dlb); } } break; case 4: a0 = -(alpha[0] & MASK) / (mlib_d64)MLIB_S32_MIN; a1 = -(alpha[1] & MASK) / (mlib_d64)MLIB_S32_MIN; a2 = -(alpha[2] & MASK) / (mlib_d64)MLIB_S32_MIN; a3 = -(alpha[3] & MASK) / (mlib_d64)MLIB_S32_MIN; alphas0 = _mm_set_pd(a1, a0); alphas1 = _mm_set_pd(a3, a2); betas0 = _mm_sub_pd(ones, alphas0); betas1 = _mm_sub_pd(ones, alphas1); if ((((mlib_addr)dst | dlb | (mlib_addr)src1 | slb1 | (mlib_addr)src2 | slb2) & 0xf) == 0) { for (j = 0; j < ysize; j ++) { srcPtr1 = (__m128i *)sl1; srcPtr2 = (__m128i *)sl2; dstPtr = (__m128i *)dl; #ifdef __SUNPRO_C #pragma pipeloop(0) #endif /* __SUNPRO_C */ for (i = 0; i <= (nsize - 4); i += 4) { MLIB_S_IMAGESCALARBLEND_S32( _mm_store_si128, _mm_load_si128, _mm_load_si128); } sl1 = (mlib_s32 *)((mlib_u8 *)sl1 + slb1); sl2 = (mlib_s32 *)((mlib_u8 *)sl2 + slb2); dl = (mlib_s32 *)((mlib_u8 *)dl + dlb); } } else if ((((mlib_addr)src1 | slb1 | (mlib_addr)src2 | slb2) & 0xf) == 0) { for (j = 0; j < ysize; j ++) { srcPtr1 = (__m128i *)sl1; srcPtr2 = (__m128i *)sl2; dstPtr = (__m128i *)dl; #ifdef __SUNPRO_C #pragma pipeloop(0) #endif /* __SUNPRO_C */ for (i = 0; i <= (nsize - 4); i += 4) { MLIB_S_IMAGESCALARBLEND_S32( _mm_storeu_si128, _mm_load_si128, _mm_load_si128); } sl1 = (mlib_s32 *)((mlib_u8 *)sl1 + slb1); sl2 = (mlib_s32 *)((mlib_u8 *)sl2 + slb2); dl = (mlib_s32 *)((mlib_u8 *)dl + dlb); } } else { for (j = 0; j < ysize; j ++) { srcPtr1 = (__m128i *)sl1; srcPtr2 = (__m128i *)sl2; dstPtr = (__m128i *)dl; #ifdef __SUNPRO_C #pragma pipeloop(0) #endif /* __SUNPRO_C */ for (i = 0; i <= (nsize - 4); i += 4) { MLIB_S_IMAGESCALARBLEND_S32( _mm_storeu_si128, _mm_loadu_si128, _mm_loadu_si128); } sl1 = (mlib_s32 *)((mlib_u8 *)sl1 + slb1); sl2 = (mlib_s32 *)((mlib_u8 *)sl2 + slb2); dl = (mlib_s32 *)((mlib_u8 *)dl + dlb); } } break; } }
void mlib_ImageMinimum_D64_124( const mlib_image *img, mlib_d64 *min) { /* src address */ mlib_d64 *sp, *sl; /* src data */ __m128d sd0, sd1; /* min values */ __m128d min0, min1; /* loop variables */ mlib_s32 n1, i; /* height of image */ mlib_s32 height = mlib_ImageGetHeight(img); /* elements to next row */ mlib_s32 src_stride = mlib_ImageGetStride(img); /* number of image channels */ mlib_s32 channels = mlib_ImageGetChannels(img); mlib_s32 size_row = mlib_ImageGetWidth(img) * channels; sp = sl = mlib_ImageGetData(img); if (src_stride == size_row) { size_row *= height; height = 1; } /* min values */ min0 = _mm_set1_pd(MLIB_D64_MAX); min1 = _mm_set1_pd(MLIB_D64_MAX); mlib_d64 rez[4] = { MLIB_D64_MAX, MLIB_D64_MAX, MLIB_D64_MAX, MLIB_D64_MAX}; for (; height > 0; height--) { n1 = size_row; for (; n1 > 3; n1 -= 4) { sd0 = _mm_loadu_pd(sp); sp += 2; sd1 = _mm_loadu_pd(sp); sp += 2; MLIB_S_IMAGE_MINIMUM_D64(min0, min0, sd0); MLIB_S_IMAGE_MINIMUM_D64(min1, min1, sd1); } if (n1 > 0) { mlib_d64 sd2[4] = { MLIB_D64_MAX, MLIB_D64_MAX, MLIB_D64_MAX, MLIB_D64_MAX}; for (int i = 0; i < n1; i++) { sd2[i] = *sp++; } sd0 = _mm_set_pd(sd2[3], sd2[2]); sd1 = _mm_set_pd(sd2[1], sd2[0]); MLIB_S_IMAGE_MINIMUM_D64(min0, min0, sd0); MLIB_S_IMAGE_MINIMUM_D64(min1, min1, sd1); } sp = sl = (mlib_d64 *) ((mlib_u8 *)sl + src_stride); } switch (channels) { case 1: { mlib_d64 tmpRes0[2]; mlib_d64 tmpRes1[2]; _mm_storeu_pd(tmpRes0, min0); _mm_storeu_pd(tmpRes1, min1); tmpRes0[0] = (tmpRes0[0] < tmpRes0[1]) ? tmpRes0[0] : tmpRes0[1]; tmpRes1[0] = (tmpRes1[0] < tmpRes1[1]) ? tmpRes1[0] : tmpRes1[1]; min[0] = (tmpRes0[0] < tmpRes1[0]) ? tmpRes0[0] : tmpRes1[0]; break; } case 2: { mlib_d64 tmpRes0[2]; mlib_d64 tmpRes1[2]; _mm_storeu_pd(tmpRes0, min0); _mm_storeu_pd(tmpRes1, min1); min[0] = (tmpRes0[0] < tmpRes1[0]) ? tmpRes0[0] : tmpRes1[0]; min[1] = (tmpRes0[1] < tmpRes1[1]) ? tmpRes0[1] : tmpRes1[1]; break; } case 4: { _mm_storeu_pd(min, min0); _mm_storeu_pd((min + 2), min1); break; } } }
int AnalyzeSamples ( const Float_t* left_samples, const Float_t* right_samples, size_t num_samples, int num_channels ) { const Float_t* curleft; const Float_t* curright; long batchsamples; long cursamples; long cursamplepos; int i; #ifdef HAVE_SSE2 __m128d __temp; __declspec(align(16)) Float_t __temp2[2]; #endif if ( num_samples == 0 ) return GAIN_ANALYSIS_OK; cursamplepos = 0; batchsamples = (long)num_samples; switch ( num_channels) { case 1: right_samples = left_samples; case 2: break; default: return GAIN_ANALYSIS_ERROR; } if ( num_samples < MAX_ORDER ) { memcpy ( linprebuf + MAX_ORDER, left_samples , num_samples * sizeof(Float_t) ); memcpy ( rinprebuf + MAX_ORDER, right_samples, num_samples * sizeof(Float_t) ); } else { memcpy ( linprebuf + MAX_ORDER, left_samples, MAX_ORDER * sizeof(Float_t) ); memcpy ( rinprebuf + MAX_ORDER, right_samples, MAX_ORDER * sizeof(Float_t) ); } while ( batchsamples > 0 ) { cursamples = batchsamples > sampleWindow-totsamp ? sampleWindow - totsamp : batchsamples; if ( cursamplepos < MAX_ORDER ) { curleft = linpre+cursamplepos; curright = rinpre+cursamplepos; if (cursamples > MAX_ORDER - cursamplepos ) cursamples = MAX_ORDER - cursamplepos; } else { curleft = left_samples + cursamplepos; curright = right_samples + cursamplepos; } YULE_FILTER ( curleft , lstep + totsamp, cursamples, ABYule[freqindex]); YULE_FILTER ( curright, rstep + totsamp, cursamples, ABYule[freqindex]); BUTTER_FILTER ( lstep + totsamp, lout + totsamp, cursamples, ABButter[freqindex]); BUTTER_FILTER ( rstep + totsamp, rout + totsamp, cursamples, ABButter[freqindex]); curleft = lout + totsamp; // Get the squared values curright = rout + totsamp; #ifdef HAVE_SSE2 i = cursamples % 16; while (i--) { __temp = _mm_set_pd (*curleft++, *curright++); __temp = _mm_mul_pd(__temp, __temp); lrsum = _mm_add_pd(lrsum, __temp); } i = cursamples / 16; while (i--) { __temp = _mm_set_pd (curleft[0], curright[0]); __temp = _mm_mul_pd(__temp, __temp); lrsum = _mm_add_pd(lrsum, __temp); __temp = _mm_set_pd (curleft[1], curright[1]); __temp = _mm_mul_pd(__temp, __temp); lrsum = _mm_add_pd(lrsum, __temp); __temp = _mm_set_pd (curleft[2], curright[2]); __temp = _mm_mul_pd(__temp, __temp); lrsum = _mm_add_pd(lrsum, __temp); __temp = _mm_set_pd (curleft[3], curright[3]); __temp = _mm_mul_pd(__temp, __temp); lrsum = _mm_add_pd(lrsum, __temp); __temp = _mm_set_pd (curleft[4], curright[4]); __temp = _mm_mul_pd(__temp, __temp); lrsum = _mm_add_pd(lrsum, __temp); __temp = _mm_set_pd (curleft[5], curright[5]); __temp = _mm_mul_pd(__temp, __temp); lrsum = _mm_add_pd(lrsum, __temp); __temp = _mm_set_pd (curleft[6], curright[6]); __temp = _mm_mul_pd(__temp, __temp); lrsum = _mm_add_pd(lrsum, __temp); __temp = _mm_set_pd (curleft[7], curright[7]); __temp = _mm_mul_pd(__temp, __temp); lrsum = _mm_add_pd(lrsum, __temp); __temp = _mm_set_pd (curleft[8], curright[8]); __temp = _mm_mul_pd(__temp, __temp); lrsum = _mm_add_pd(lrsum, __temp); __temp = _mm_set_pd (curleft[9], curright[9]); __temp = _mm_mul_pd(__temp, __temp); lrsum = _mm_add_pd(lrsum, __temp); __temp = _mm_set_pd (curleft[10], curright[10]); __temp = _mm_mul_pd(__temp, __temp); lrsum = _mm_add_pd(lrsum, __temp); __temp = _mm_set_pd (curleft[11], curright[11]); __temp = _mm_mul_pd(__temp, __temp); lrsum = _mm_add_pd(lrsum, __temp); __temp = _mm_set_pd (curleft[12], curright[12]); __temp = _mm_mul_pd(__temp, __temp); lrsum = _mm_add_pd(lrsum, __temp); __temp = _mm_set_pd (curleft[13], curright[13]); __temp = _mm_mul_pd(__temp, __temp); lrsum = _mm_add_pd(lrsum, __temp); __temp = _mm_set_pd (curleft[14], curright[14]); __temp = _mm_mul_pd(__temp, __temp); lrsum = _mm_add_pd(lrsum, __temp); __temp = _mm_set_pd (curleft[15], curright[15]); __temp = _mm_mul_pd(__temp, __temp); lrsum = _mm_add_pd(lrsum, __temp); curleft += 16; curright += 16; } #else i = cursamples % 16; while (i--) { lsum += fsqr(*curleft++); rsum += fsqr(*curright++); } i = cursamples / 16; while (i--) { lsum += fsqr(curleft[0]) + fsqr(curleft[1]) + fsqr(curleft[2]) + fsqr(curleft[3]) + fsqr(curleft[4]) + fsqr(curleft[5]) + fsqr(curleft[6]) + fsqr(curleft[7]) + fsqr(curleft[8]) + fsqr(curleft[9]) + fsqr(curleft[10]) + fsqr(curleft[11]) + fsqr(curleft[12]) + fsqr(curleft[13]) + fsqr(curleft[14]) + fsqr(curleft[15]); curleft += 16; rsum += fsqr(curright[0]) + fsqr(curright[1]) + fsqr(curright[2]) + fsqr(curright[3]) + fsqr(curright[4]) + fsqr(curright[5]) + fsqr(curright[6]) + fsqr(curright[7]) + fsqr(curright[8]) + fsqr(curright[9]) + fsqr(curright[10]) + fsqr(curright[11]) + fsqr(curright[12]) + fsqr(curright[13]) + fsqr(curright[14]) + fsqr(curright[15]); curright += 16; } #endif batchsamples -= cursamples; cursamplepos += cursamples; totsamp += cursamples; if ( totsamp == sampleWindow ) { // Get the Root Mean Square (RMS) for this set of samples double val; int ival; #ifdef HAVE_SSE2 _mm_store_pd (__temp2, lrsum); val = (Float_t)STEPS_per_dB * 10. * log10 ( (__temp2[0]+__temp2[1]) / totsamp * 0.5 + 1.e-37 ); #else val = (Float_t)STEPS_per_dB * 10. * log10 ( (lsum+rsum) / totsamp * 0.5 + 1.e-37 ); #endif ival = (int) val; if ( ival < 0 ) ival = 0; if ( ival >= (int)(sizeof(A)/sizeof(*A)) ) ival = sizeof(A)/sizeof(*A) - 1; A [ival]++; #ifdef HAVE_SSE2 lrsum = _mm_setzero_pd(); #else lsum = rsum = 0.; #endif memmove ( loutbuf , loutbuf + totsamp, MAX_ORDER * sizeof(Float_t) ); memmove ( routbuf , routbuf + totsamp, MAX_ORDER * sizeof(Float_t) ); memmove ( lstepbuf, lstepbuf + totsamp, MAX_ORDER * sizeof(Float_t) ); memmove ( rstepbuf, rstepbuf + totsamp, MAX_ORDER * sizeof(Float_t) ); totsamp = 0; } if ( totsamp > sampleWindow ) // somehow I really screwed up: Error in programming! Contact author about totsamp > sampleWindow return GAIN_ANALYSIS_ERROR; } if ( num_samples < MAX_ORDER ) { memmove ( linprebuf, linprebuf + num_samples, (MAX_ORDER-num_samples) * sizeof(Float_t) ); memmove ( rinprebuf, rinprebuf + num_samples, (MAX_ORDER-num_samples) * sizeof(Float_t) ); memcpy ( linprebuf + MAX_ORDER - num_samples, left_samples, num_samples * sizeof(Float_t) ); memcpy ( rinprebuf + MAX_ORDER - num_samples, right_samples, num_samples * sizeof(Float_t) ); } else { memcpy ( linprebuf, left_samples + num_samples - MAX_ORDER, MAX_ORDER * sizeof(Float_t) ); memcpy ( rinprebuf, right_samples + num_samples - MAX_ORDER, MAX_ORDER * sizeof(Float_t) ); } return GAIN_ANALYSIS_OK; }
mlib_status __mlib_SignalLMSFilter_F32S_F32S( mlib_f32 *dst, const mlib_f32 *src, const mlib_f32 *ref, void *filter, mlib_s32 n) { LMS_Filter *pLMS_Filter = (LMS_Filter *) filter; mlib_d64 *W[2], *srcBuffer[2]; mlib_d64 U = pLMS_Filter->U; mlib_d64 D, Y; mlib_s32 i, t, j, off, count; mlib_s32 tap, bufferSize; mlib_f32 *psrc, *pdst, *pref; __m128d srcs1, srcs2, srcs3, srcs4; __m128d SBE0, SD, temp; __m128d *SW; if (src == NULL || dst == NULL || ref == NULL) return (MLIB_NULLPOINTER); if (n <= 0) return (MLIB_FAILURE); tap = pLMS_Filter->tap; bufferSize = pLMS_Filter->bufferSize; SW = __mlib_malloc(tap * sizeof (__m128d)); W[0] = pLMS_Filter->data; srcBuffer[0] = (W[0] + 2 * tap); W[1] = srcBuffer[0] + 2 * bufferSize; srcBuffer[1] = (W[1] + 2 * tap); psrc = (mlib_f32 *)src; pdst = (mlib_f32 *)dst; pref = (mlib_f32 *)ref; for (off = 0; off < n; off += count) { count = n - off; if (count > bufferSize) { count = bufferSize; } for (i = 0; i < count; i++) { srcBuffer[0][i] = (mlib_d64)psrc[2 * i]; srcBuffer[1][i] = (mlib_d64)psrc[2 * i + 1]; } for (j = 0; j < 2; j++) { mlib_d64 *psrcBuffer = srcBuffer[j]; mlib_d64 *pW = W[j]; mlib_d64 BE0 = pLMS_Filter->BE[j]; for (i = 0; i < (tap / 2); i++) { SW[i] = _mm_set_pd(pW[i * 2], pW[i * 2 + 1]); } for (t = 0; t < count; t++) { D = 0; Y = pref[2 * t + j]; SBE0 = _mm_set1_pd(BE0); SD = _mm_setzero_pd(); #ifdef __SUNPRO_C #pragma pipeloop(0) #endif /* __SUNPRO_C */ for (i = 0; i < (tap / 2); i++) { srcs1 = _mm_loadu_pd( psrcBuffer + t - i * 2 - 2); srcs2 = _mm_loadu_pd( psrcBuffer + t - i * 2 - 1); temp = _mm_mul_pd(SBE0, srcs1); SW[i] = _mm_add_pd(SW[i], temp); SD = _mm_add_pd(SD, _mm_mul_pd(srcs2, SW[i])); } mlib_d64 TD[2]; _mm_storeu_pd(TD, SD); D += TD[0] + TD[1]; for (i = i * 2; i < tap; i++) { pW[i] += BE0 * psrcBuffer[t - i - 1]; D += psrcBuffer[t - i] * pW[i]; } BE0 = (Y - D) * U; pdst[2 * t + j] = (mlib_f32)D; } mlib_d64 TW[2]; for (i = 0; i < (tap / 2); i++) { _mm_storeu_pd(TW, SW[i]); pW[i * 2] = TW[1]; pW[i * 2 + 1] = TW[0]; } for (i = 0; i < tap; i++) { psrcBuffer[i - tap] = psrcBuffer[count + (i - tap)]; } pLMS_Filter->BE[j] = BE0; } psrc += 2 * count; pdst += 2 * count; pref += 2 * count; } __mlib_free(SW); return (MLIB_SUCCESS); }
/* mix local carrier ----------------------------------------------------------- * mix local carrier to data * args : char *data I data * int dtype I data type (0:real,1:complex) * double ti I sampling interval (s) * int n I number of samples * double freq I carrier frequency (Hz) * double phi0 I initial phase (rad) * short *I,*Q O carrier mixed data I, Q component * return : double phase remainder *-----------------------------------------------------------------------------*/ extern double mixcarr(const char *data, int dtype, double ti, int n, double freq, double phi0, short *II, short *QQ) { const char *p; double phi,ps,prem; #if !defined(SSE2_ENABLE) static short cost[CDIV]={0},sint[CDIV]={0}; int i,index; /* initialize local carrier table */ if (!cost[0]) { for (i=0;i<CDIV;i++) { cost[i]=(short)floor((cos(DPI/CDIV*i)/CSCALE+0.5)); sint[i]=(short)floor((sin(DPI/CDIV*i)/CSCALE+0.5)); } } phi=phi0*CDIV/DPI; ps=freq*CDIV*ti; /* phase step */ if (dtype==DTYPEIQ) { /* complex */ for (p=data;p<data+n*2;p+=2,II++,QQ++,phi+=ps) { index=((int)phi)&CMASK; *II=cost[index]*p[0]-sint[index]*p[1]; *QQ=sint[index]*p[0]+cost[index]*p[1]; } } if (dtype==DTYPEI) { /* real */ for (p=data;p<data+n;p++,II++,QQ++,phi+=ps) { index=((int)phi)&CMASK; *II=cost[index]*p[0]; *QQ=sint[index]*p[0]; } } prem=phi*DPI/CDIV; while(prem>DPI) prem-=DPI; return prem; #else static char cost[16]={0},sint[16]={0}; short I1[16]={0},I2[16]={0},Q1[16]={0},Q2[16]={0}; int i; __m128d xmm1,xmm2,xmm3,xmm4,xmm5,xmm6,xmm7,xmm8,xmm9; __m128i dat1,dat2,dat3,dat4,ind1,ind2,xcos,xsin; __m128i zero=_mm_setzero_si128(); __m128i mask4=_mm_set1_epi32(15); __m128i mask8=_mm_set1_epi16(255); if (!cost[0]) { for (i=0;i<16;i++) { cost[i]=(char)floor((cos(DPI/16*i)/CSCALE+0.5)); sint[i]=(char)floor((sin(DPI/16*i)/CSCALE+0.5)); } } phi=phi0/DPI*16-floor(phi0/DPI)*16; ps=freq*16*ti; xmm1=_mm_set_pd(phi+ps,phi); phi+=ps*2; xmm2=_mm_set_pd(phi+ps,phi); phi+=ps*2; xmm3=_mm_set_pd(phi+ps,phi); phi+=ps*2; xmm4=_mm_set_pd(phi+ps,phi); phi+=ps*2; xmm5=_mm_set_pd(phi+ps,phi); phi+=ps*2; xmm6=_mm_set_pd(phi+ps,phi); phi+=ps*2; xmm7=_mm_set_pd(phi+ps,phi); phi+=ps*2; xmm8=_mm_set_pd(phi+ps,phi); phi+=ps*2; xmm9=_mm_set1_pd(ps*16); xcos=_mm_loadu_si128((__m128i *)cost); xsin=_mm_loadu_si128((__m128i *)sint); if (dtype==DTYPEIQ) { /* complex */ for (p=data;p<data+n*2;p+=32,II+=16,QQ+=16) { LOAD_INT8C(dat1,dat2,p ,zero,mask8); LOAD_INT8C(dat3,dat4,p+16,zero,mask8); DBLTOINT16(ind1,xmm1,xmm2,xmm3,xmm4,mask4); DBLTOINT16(ind2,xmm5,xmm6,xmm7,xmm8,mask4); ind1=_mm_packus_epi16(ind1,ind2); MIX_INT8(I1,dat1,dat3,xcos,ind1,zero); MIX_INT8(I2,dat1,dat3,xsin,ind1,zero); MIX_INT8(Q1,dat2,dat4,xsin,ind1,zero); MIX_INT8(Q2,dat2,dat4,xcos,ind1,zero); for (i=0;i<16;i++) { II[i]=I1[i]-Q1[i]; QQ[i]=I2[i]+Q2[i]; } xmm1=_mm_add_pd(xmm1,xmm9); xmm2=_mm_add_pd(xmm2,xmm9); xmm3=_mm_add_pd(xmm3,xmm9); xmm4=_mm_add_pd(xmm4,xmm9); xmm5=_mm_add_pd(xmm5,xmm9); xmm6=_mm_add_pd(xmm6,xmm9); xmm7=_mm_add_pd(xmm7,xmm9); xmm8=_mm_add_pd(xmm8,xmm9); } } if (dtype==DTYPEI) { /* real */ for (p=data;p<data+n;p+=16,II+=16,QQ+=16) { LOAD_INT8(dat1,dat2,p,zero); DBLTOINT16(ind1,xmm1,xmm2,xmm3,xmm4,mask4); DBLTOINT16(ind2,xmm5,xmm6,xmm7,xmm8,mask4); ind1=_mm_packus_epi16(ind1,ind2); MIX_INT8(II,dat1,dat2,xcos,ind1,zero); MIX_INT8(QQ,dat1,dat2,xsin,ind1,zero); xmm1=_mm_add_pd(xmm1,xmm9); xmm2=_mm_add_pd(xmm2,xmm9); xmm3=_mm_add_pd(xmm3,xmm9); xmm4=_mm_add_pd(xmm4,xmm9); xmm5=_mm_add_pd(xmm5,xmm9); xmm6=_mm_add_pd(xmm6,xmm9); xmm7=_mm_add_pd(xmm7,xmm9); xmm8=_mm_add_pd(xmm8,xmm9); } } prem=phi0+freq*ti*n*DPI; while(prem>DPI) prem-=DPI; return prem; #endif }
void exchrhs_gmrfData_8(unsigned int slot) { for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((!neighbor_isValid[0][0])) { { double xPos; double yPos; /* Statements in this Scop: S589, S588, S590 */ { { { int i1 = 0; for (; (i1<(1&(~1))); i1 += 1) { yPos = (((i1/2.560000e+02)*(posEnd[1]-posBegin[1]))+posBegin[1]); } __m128d vec1 = _mm_set1_pd(2.560000e+02); __m128d vec4 = _mm_set1_pd(yPos); for (; (i1<254); i1 += 4) { /* yPos = (((i1/2.560000e+02)*(posEnd[1]-posBegin[1]))+posBegin[1]); */ __m128d vec0 = _mm_set_pd(i1+1,i1); __m128d vec0_2 = _mm_set_pd(i1+1,i1); __m128d vec2 = _mm_load1_pd((&posEnd[1])); __m128d vec2_2 = _mm_load1_pd((&posEnd[1])); __m128d vec3 = _mm_load1_pd((&posBegin[1])); __m128d vec3_2 = _mm_load1_pd((&posBegin[1])); vec4 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(vec0, vec1), _mm_sub_pd(vec2, vec3)), vec3); vec4 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(vec0_2, vec1), _mm_sub_pd(vec2_2, vec3_2)), vec3_2); } for (; (i1<257); i1 += 1) { yPos = (((i1/2.560000e+02)*(posEnd[1]-posBegin[1]))+posBegin[1]); } } { int i1 = 0; for (; (i1<(1&(~1))); i1 += 1) { xPos = posBegin[0]; } __m128d vec1 = _mm_set1_pd(xPos); for (; (i1<254); i1 += 4) { /* xPos = posBegin[0]; */ __m128d vec0 = _mm_load1_pd((&posBegin[0])); __m128d vec0_2 = _mm_load1_pd((&posBegin[0])); vec1 = vec0; vec1 = vec0_2; } for (; (i1<257); i1 += 1) { xPos = posBegin[0]; } } } { double* fieldData_RHS_GMRF_8_p1 = (&fieldData_RHS_GMRF[8][0]); int i1 = 0; for (; (i1<=255); i1 += 2) { fieldData_RHS_GMRF_8_p1[(i1*258)] = 0.000000e+00; fieldData_RHS_GMRF_8_p1[((i1*258)+258)] = 0.000000e+00; } for (; (i1<=256); i1 += 1) { fieldData_RHS_GMRF_8_p1[(i1*258)] = 0.000000e+00; } } } } } if ((!neighbor_isValid[0][1])) { { double xPos; double yPos; /* Statements in this Scop: S592, S591, S593 */ { { { int i1 = 0; for (; (i1<(1&(~1))); i1 += 1) { yPos = (((i1/2.560000e+02)*(posEnd[1]-posBegin[1]))+posBegin[1]); } __m128d vec1 = _mm_set1_pd(2.560000e+02); __m128d vec4 = _mm_set1_pd(yPos); for (; (i1<254); i1 += 4) { /* yPos = (((i1/2.560000e+02)*(posEnd[1]-posBegin[1]))+posBegin[1]); */ __m128d vec0 = _mm_set_pd(i1+1,i1); __m128d vec0_2 = _mm_set_pd(i1+1,i1); __m128d vec2 = _mm_load1_pd((&posEnd[1])); __m128d vec2_2 = _mm_load1_pd((&posEnd[1])); __m128d vec3 = _mm_load1_pd((&posBegin[1])); __m128d vec3_2 = _mm_load1_pd((&posBegin[1])); vec4 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(vec0, vec1), _mm_sub_pd(vec2, vec3)), vec3); vec4 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(vec0_2, vec1), _mm_sub_pd(vec2_2, vec3_2)), vec3_2); } for (; (i1<257); i1 += 1) { yPos = (((i1/2.560000e+02)*(posEnd[1]-posBegin[1]))+posBegin[1]); } } { double* fieldData_RHS_GMRF_8_p1 = (&fieldData_RHS_GMRF[8][0]); int i1 = 0; for (; (i1<=255); i1 += 2) { fieldData_RHS_GMRF_8_p1[((i1*258)+256)] = 0.000000e+00; fieldData_RHS_GMRF_8_p1[((i1*258)+514)] = 0.000000e+00; } for (; (i1<=256); i1 += 1) { fieldData_RHS_GMRF_8_p1[((i1*258)+256)] = 0.000000e+00; } } } { int i1 = 0; for (; (i1<(1&(~1))); i1 += 1) { xPos = posEnd[0]; } __m128d vec1 = _mm_set1_pd(xPos); for (; (i1<254); i1 += 4) { /* xPos = posEnd[0]; */ __m128d vec0 = _mm_load1_pd((&posEnd[0])); __m128d vec0_2 = _mm_load1_pd((&posEnd[0])); vec1 = vec0; vec1 = vec0_2; } for (; (i1<257); i1 += 1) { xPos = posEnd[0]; } } } } } if ((!neighbor_isValid[0][2])) { { double xPos; double yPos; /* Statements in this Scop: S595, S594, S596 */ { { { double* fieldData_RHS_GMRF_8_p1 = (&fieldData_RHS_GMRF[8][0]); int i2 = 0; for (; (i2<=255); i2 += 2) { fieldData_RHS_GMRF_8_p1[i2] = 0.000000e+00; fieldData_RHS_GMRF_8_p1[(i2+1)] = 0.000000e+00; } for (; (i2<=256); i2 += 1) { fieldData_RHS_GMRF_8_p1[i2] = 0.000000e+00; } } { int i2 = 0; for (; (i2<=255); i2 += 2) { xPos = (((i2/2.560000e+02)*(posEnd[0]-posBegin[0]))+posBegin[0]); xPos = ((((i2+1)/2.560000e+02)*(posEnd[0]-posBegin[0]))+posBegin[0]); } for (; (i2<=256); i2 += 1) { xPos = (((i2/2.560000e+02)*(posEnd[0]-posBegin[0]))+posBegin[0]); } } } { int i2 = 0; for (; (i2<=255); i2 += 2) { yPos = posBegin[1]; yPos = posBegin[1]; } for (; (i2<=256); i2 += 1) { yPos = posBegin[1]; } } } } } if ((!neighbor_isValid[0][3])) { { double xPos; double yPos; /* Statements in this Scop: S598, S597, S599 */ { { { int i2 = 0; for (; (i2<=255); i2 += 2) { xPos = (((i2/2.560000e+02)*(posEnd[0]-posBegin[0]))+posBegin[0]); xPos = ((((i2+1)/2.560000e+02)*(posEnd[0]-posBegin[0]))+posBegin[0]); } for (; (i2<=256); i2 += 1) { xPos = (((i2/2.560000e+02)*(posEnd[0]-posBegin[0]))+posBegin[0]); } } { int i2 = 0; for (; (i2<=255); i2 += 2) { yPos = posEnd[1]; yPos = posEnd[1]; } for (; (i2<=256); i2 += 1) { yPos = posEnd[1]; } } } { double* fieldData_RHS_GMRF_8_p1 = (&fieldData_RHS_GMRF[8][0]); int i2 = 0; for (; (i2<=255); i2 += 2) { fieldData_RHS_GMRF_8_p1[(i2+66048)] = 0.000000e+00; fieldData_RHS_GMRF_8_p1[(i2+66049)] = 0.000000e+00; } for (; (i2<=256); i2 += 1) { fieldData_RHS_GMRF_8_p1[(i2+66048)] = 0.000000e+00; } } } } } } } }
int main() { #ifndef __EMSCRIPTEN__ _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); #endif printf ("{ \"workload\": %u, \"results\": [\n", N); assert(N%2 == 0); // Don't care about the tail for now. double *src = get_src_d();//(float*)aligned_alloc(16, N*sizeof(float)); for(int i = 0; i < N; ++i) src[i] = (double)rand() / RAND_MAX; double *src2 = get_src2_d();//(float*)aligned_alloc(16, N*sizeof(float)); for(int i = 0; i < N; ++i) src2[i] = (double)rand() / RAND_MAX; double *dst = get_dst_d();//(float*)aligned_alloc(16, N*sizeof(float)); float scalarTime; SETCHART("load"); START(); for(int i = 0; i < N; ++i) dst[i] = src[i]; ENDSCALAR(checksum_dst(dst), "scalar"); LS_TEST("_mm_load_pd", _mm_load_pd, 0, _mm_store_pd, double*, 0, 2); LS_TEST("_mm_load_pd1", _mm_load_pd1, 1, _mm_store_pd, double*, 0, 2); LS_TEST("_mm_load_sd", _mm_load_sd, 1, _mm_store_pd, double*, 0, 2); // _mm_load_si128 LS_TEST("_mm_load1_pd", _mm_load1_pd, 1, _mm_store_pd, double*, 0, 2); __m128d tempReg = _mm_set_pd(1.0, 2.0); LSH_TEST("_mm_loadh_pd", tempReg, _mm_loadh_pd, double*, 1, _mm_store_pd, double*, 0, 2); // _mm_loadl_epi64 LSH_TEST("_mm_loadl_pd", tempReg, _mm_loadh_pd, double*, 1, _mm_store_pd, double*, 0, 2); LS_TEST("_mm_loadr_pd", _mm_loadr_pd, 0, _mm_store_pd, double*, 0, 2); LS_TEST("_mm_loadu_pd", _mm_loadu_pd, 1, _mm_store_pd, double*, 0, 2); // _mm_loadu_si128 SETCHART("set"); /* _mm_set_epi16 _mm_set_epi32 _mm_set_epi64 _mm_set_epi64x _mm_set_epi8 */ SS_TEST_D("_mm_set_pd", _mm_set_pd(src[i+2], src[i+0])); //SS_TEST_D("_mm_set_pd1", _mm_set_pd1(src[i])); SS_TEST_D("_mm_set_sd", _mm_set_sd(src[i])); /* _mm_set1_epi16 _mm_set1_epi32 _mm_set1_epi64 _mm_set1_epi64x _mm_set1_epi8 */ SS_TEST_D("_mm_set1_pd", _mm_set1_pd(src[i])); /* _mm_setr_epi16 _mm_setr_epi32 _mm_setr_epi64 _mm_setr_epi8 */ SS_TEST_D("_mm_setr_pd", _mm_set_pd(src[i+2], src[i+0])); SS_TEST_D("_mm_setzero_pd", _mm_setzero_pd()); // _mm_setzero_si128 SETCHART("move"); // _mm_move_epi64 SS_TEST_D("_mm_move_sd", _mm_move_sd(_mm_load_pd(src+i), _mm_load_pd(src2+i))); SETCHART("store"); // _mm_maskmoveu_si128 LS_TEST("_mm_store_pd", _mm_load_pd, 0, _mm_store_pd, double*, 0, 2); // LS_TEST("_mm_store_pd1", _mm_load_pd, 0, _mm_store_pd1, double*, 0); LS_TEST("_mm_store_sd", _mm_load_pd, 0, _mm_store_sd, double*, 1, 2); // _mm_store_si128 // _mm_store1_pd LS64_TEST("_mm_storeh_pi", _mm_load_pd, 0, _mm_storeh_pi, 1, 2); // _mm_storel_epi64 LS64_TEST("_mm_storel_pi", _mm_load_pd, 0, _mm_storel_pi, 1, 2); LS_TEST("_mm_storer_pd", _mm_load_pd, 0, _mm_storer_pd, double*, 0, 2); LS_TEST("_mm_storeu_pd", _mm_load_pd, 0, _mm_storeu_pd, double*, 1, 2); // _mm_storeu_si128 LS_TEST("_mm_stream_pd", _mm_load_pd, 0, _mm_stream_pd, double*, 0, 2); // _mm_stream_si128 // _mm_stream_si32 // _mm_stream_si64 SETCHART("arithmetic"); // _mm_add_epi16 // _mm_add_epi32 // _mm_add_epi64 // _mm_add_epi8 START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] += src2[0]; dst[1] += src2[1]; dst[2] += src2[2]; dst[3] += src2[3]; } ENDSCALAR(checksum_dst(dst), "scalar add"); BINARYOP_TEST_D("_mm_add_pd", _mm_add_pd, _mm_load_pd(src), _mm_load_pd(src2)); BINARYOP_TEST_D("_mm_add_sd", _mm_add_sd, _mm_load_pd(src), _mm_load_pd(src2)); // _mm_adds_epi16 // _mm_adds_epi8 // _mm_adds_epu16 // _mm_adds_epu8 START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] /= src2[0]; dst[1] /= src2[1]; dst[2] /= src2[2]; dst[3] /= src2[3]; } ENDSCALAR(checksum_dst(dst), "scalar div"); BINARYOP_TEST_D("_mm_div_pd", _mm_div_pd, _mm_load_pd(src), _mm_load_pd(src2)); BINARYOP_TEST_D("_mm_div_sd", _mm_div_sd, _mm_load_pd(src), _mm_load_pd(src2)); // _mm_madd_epi16 // _mm_mul_epu32 START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] *= src2[0]; dst[1] *= src2[1]; dst[2] *= src2[2]; dst[3] *= src2[3]; } ENDSCALAR(checksum_dst(dst), "scalar mul"); BINARYOP_TEST_D("_mm_mul_pd", _mm_mul_pd, _mm_load_pd(src), _mm_load_pd(src2)); BINARYOP_TEST_D("_mm_mul_sd", _mm_mul_sd, _mm_load_pd(src), _mm_load_pd(src2)); // _mm_mulhi_epi16 // _mm_mulhi_epu16 // _mm_mullo_epi16 // _mm_sad_epu8 // _mm_sub_epi16 // _mm_sub_epi32 // _mm_sub_epi64 // _mm_sub_epi8 START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] -= src2[0]; dst[1] -= src2[1]; dst[2] -= src2[2]; dst[3] -= src2[3]; } ENDSCALAR(checksum_dst(dst), "scalar sub"); BINARYOP_TEST_D("_mm_sub_pd", _mm_sub_pd, _mm_load_pd(src), _mm_load_pd(src2)); BINARYOP_TEST_D("_mm_sub_sd", _mm_sub_sd, _mm_load_pd(src), _mm_load_pd(src2)); // _mm_subs_epi16 // _mm_subs_epi8 // _mm_subs_epu16 // _mm_subs_epu8 SETCHART("roots"); START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] = sqrt(dst[0]); dst[1] = sqrt(dst[1]); dst[2] = sqrt(dst[2]); dst[3] = sqrt(dst[3]); } ENDSCALAR(checksum_dst(dst), "scalar sqrt"); UNARYOP_TEST_D("_mm_sqrt_pd", _mm_sqrt_pd, _mm_load_pd(src)); // UNARYOP_TEST_D("_mm_sqrt_sd", _mm_sqrt_sd, _mm_load_pd(src)); SETCHART("logical"); START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] = ucastd(dcastu(dst[0]) & dcastu(src2[0])); dst[1] = ucastd(dcastu(dst[1]) & dcastu(src2[1])); dst[2] = ucastd(dcastu(dst[2]) & dcastu(src2[2])); dst[3] = ucastd(dcastu(dst[3]) & dcastu(src2[3])); } ENDSCALAR(checksum_dst(dst), "scalar and"); BINARYOP_TEST_D("_mm_and_pd", _mm_and_pd, _mm_load_pd(src), _mm_load_pd(src2)); // _mm_and_si128 START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] = ucastd((~dcastu(dst[0])) & dcastu(src2[0])); dst[1] = ucastd((~dcastu(dst[1])) & dcastu(src2[1])); dst[2] = ucastd((~dcastu(dst[2])) & dcastu(src2[2])); dst[3] = ucastd((~dcastu(dst[3])) & dcastu(src2[3])); } ENDSCALAR(checksum_dst(dst), "scalar andnot"); BINARYOP_TEST_D("_mm_andnot_pd", _mm_andnot_pd, _mm_load_pd(src), _mm_load_pd(src2)); // _mm_andnot_si128 START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] = ucastd(dcastu(dst[0]) | dcastu(src2[0])); dst[1] = ucastd(dcastu(dst[1]) | dcastu(src2[1])); dst[2] = ucastd(dcastu(dst[2]) | dcastu(src2[2])); dst[3] = ucastd(dcastu(dst[3]) | dcastu(src2[3])); } ENDSCALAR(checksum_dst(dst), "scalar or"); BINARYOP_TEST_D("_mm_or_pd", _mm_or_pd, _mm_load_pd(src), _mm_load_pd(src2)); // _mm_or_si128 START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] = ucastd(dcastu(dst[0]) ^ dcastu(src2[0])); dst[1] = ucastd(dcastu(dst[1]) ^ dcastu(src2[1])); dst[2] = ucastd(dcastu(dst[2]) ^ dcastu(src2[2])); dst[3] = ucastd(dcastu(dst[3]) ^ dcastu(src2[3])); } ENDSCALAR(checksum_dst(dst), "scalar xor"); BINARYOP_TEST_D("_mm_xor_pd", _mm_xor_pd, _mm_load_pd(src), _mm_load_pd(src2)); // _mm_xor_si128 SETCHART("cmp"); // _mm_cmpeq_epi16 // _mm_cmpeq_epi32 // _mm_cmpeq_epi8 START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] = (dst[0] == src2[0]) ? ucastd(0xFFFFFFFFU) : 0.f; dst[1] = (dst[1] == src2[1]) ? ucastd(0xFFFFFFFFU) : 0.f; dst[2] = (dst[2] == src2[2]) ? ucastd(0xFFFFFFFFU) : 0.f; dst[3] = (dst[3] == src2[3]) ? ucastd(0xFFFFFFFFU) : 0.f; } ENDSCALAR(checksum_dst(dst), "scalar cmp=="); BINARYOP_TEST_D("_mm_cmpeq_pd", _mm_cmpeq_pd, _mm_load_pd(src), _mm_load_pd(src2)); BINARYOP_TEST_D("_mm_cmpeq_sd", _mm_cmpeq_sd, _mm_load_pd(src), _mm_load_pd(src2)); START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] = (dst[0] >= src2[0]) ? ucastd(0xFFFFFFFFU) : 0.f; dst[1] = (dst[1] >= src2[1]) ? ucastd(0xFFFFFFFFU) : 0.f; dst[2] = (dst[2] >= src2[2]) ? ucastd(0xFFFFFFFFU) : 0.f; dst[3] = (dst[3] >= src2[3]) ? ucastd(0xFFFFFFFFU) : 0.f; } ENDSCALAR(checksum_dst(dst), "scalar cmp>="); BINARYOP_TEST_D("_mm_cmpge_pd", _mm_cmpge_pd, _mm_load_pd(src), _mm_load_pd(src2)); BINARYOP_TEST_D("_mm_cmpge_sd", _mm_cmpge_sd, _mm_load_pd(src), _mm_load_pd(src2)); // _mm_cmpgt_epi16 // _mm_cmpgt_epi32 // _mm_cmpgt_epi8 START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] = (dst[0] > src2[0]) ? ucastd(0xFFFFFFFFU) : 0.f; dst[1] = (dst[1] > src2[1]) ? ucastd(0xFFFFFFFFU) : 0.f; dst[2] = (dst[2] > src2[2]) ? ucastd(0xFFFFFFFFU) : 0.f; dst[3] = (dst[3] > src2[3]) ? ucastd(0xFFFFFFFFU) : 0.f; } ENDSCALAR(checksum_dst(dst), "scalar cmp>"); BINARYOP_TEST_D("_mm_cmpgt_pd", _mm_cmpgt_pd, _mm_load_pd(src), _mm_load_pd(src2)); BINARYOP_TEST_D("_mm_cmpgt_sd", _mm_cmpgt_sd, _mm_load_pd(src), _mm_load_pd(src2)); START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] = (dst[0] <= src2[0]) ? ucastd(0xFFFFFFFFU) : 0.f; dst[1] = (dst[1] <= src2[1]) ? ucastd(0xFFFFFFFFU) : 0.f; dst[2] = (dst[2] <= src2[2]) ? ucastd(0xFFFFFFFFU) : 0.f; dst[3] = (dst[3] <= src2[3]) ? ucastd(0xFFFFFFFFU) : 0.f; } ENDSCALAR(checksum_dst(dst), "scalar cmp<="); BINARYOP_TEST_D("_mm_cmple_pd", _mm_cmple_pd, _mm_load_pd(src), _mm_load_pd(src2)); BINARYOP_TEST_D("_mm_cmple_sd", _mm_cmple_sd, _mm_load_pd(src), _mm_load_pd(src2)); // _mm_cmplt_epi16 // _mm_cmplt_epi32 // _mm_cmplt_epi8 START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] = (dst[0] < src2[0]) ? ucastd(0xFFFFFFFFU) : 0.f; dst[1] = (dst[1] < src2[1]) ? ucastd(0xFFFFFFFFU) : 0.f; dst[2] = (dst[2] < src2[2]) ? ucastd(0xFFFFFFFFU) : 0.f; dst[3] = (dst[3] < src2[3]) ? ucastd(0xFFFFFFFFU) : 0.f; } ENDSCALAR(checksum_dst(dst), "scalar cmp<"); BINARYOP_TEST_D("_mm_cmplt_pd", _mm_cmplt_pd, _mm_load_pd(src), _mm_load_pd(src2)); BINARYOP_TEST_D("_mm_cmplt_sd", _mm_cmplt_sd, _mm_load_pd(src), _mm_load_pd(src2)); /*_mm_cmpneq_pd _mm_cmpneq_sd _mm_cmpnge_pd _mm_cmpnge_sd _mm_cmpngt_pd _mm_cmpngt_sd _mm_cmpnle_pd _mm_cmpnle_sd _mm_cmpnlt_pd _mm_cmpnlt_sd*/ START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] = (!Isnan(dst[0]) && !Isnan(src2[0])) ? ucastd(0xFFFFFFFFU) : 0.f; dst[1] = (!Isnan(dst[1]) && !Isnan(src2[1])) ? ucastd(0xFFFFFFFFU) : 0.f; dst[2] = (!Isnan(dst[2]) && !Isnan(src2[2])) ? ucastd(0xFFFFFFFFU) : 0.f; dst[3] = (!Isnan(dst[3]) && !Isnan(src2[3])) ? ucastd(0xFFFFFFFFU) : 0.f; } ENDSCALAR(checksum_dst(dst), "scalar cmpord"); BINARYOP_TEST_D("_mm_cmpord_pd", _mm_cmpord_pd, _mm_load_pd(src), _mm_load_pd(src2)); BINARYOP_TEST_D("_mm_cmpord_sd", _mm_cmpord_sd, _mm_load_pd(src), _mm_load_pd(src2)); START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] = (Isnan(dst[0]) || Isnan(src2[0])) ? ucastd(0xFFFFFFFFU) : 0.f; dst[1] = (Isnan(dst[1]) || Isnan(src2[1])) ? ucastd(0xFFFFFFFFU) : 0.f; dst[2] = (Isnan(dst[2]) || Isnan(src2[2])) ? ucastd(0xFFFFFFFFU) : 0.f; dst[3] = (Isnan(dst[3]) || Isnan(src2[3])) ? ucastd(0xFFFFFFFFU) : 0.f; } ENDSCALAR(checksum_dst(dst), "scalar cmpunord"); BINARYOP_TEST_D("_mm_cmpunord_pd", _mm_cmpunord_pd, _mm_load_pd(src), _mm_load_pd(src2)); BINARYOP_TEST_D("_mm_cmpunord_sd", _mm_cmpunord_sd, _mm_load_pd(src), _mm_load_pd(src2)); SETCHART("max"); // _mm_max_epi16 // _mm_max_epu8 START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] = Max(dst[0], src2[0]); dst[1] = Max(dst[1], src2[1]); dst[2] = Max(dst[2], src2[2]); dst[3] = Max(dst[3], src2[3]); } ENDSCALAR(checksum_dst(dst), "scalar max"); BINARYOP_TEST_D("_mm_max_pd", _mm_max_pd, _mm_load_pd(src), _mm_load_pd(src2)); BINARYOP_TEST_D("_mm_max_sd", _mm_max_sd, _mm_load_pd(src), _mm_load_pd(src2)); // _mm_min_epi16 // _mm_min_epu8 START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] = Min(dst[0], src2[0]); dst[1] = Min(dst[1], src2[1]); dst[2] = Min(dst[2], src2[2]); dst[3] = Min(dst[3], src2[3]); } ENDSCALAR(checksum_dst(dst), "scalar min"); BINARYOP_TEST_D("_mm_min_pd", _mm_min_pd, _mm_load_pd(src), _mm_load_pd(src2)); BINARYOP_TEST_D("_mm_min_sd", _mm_min_sd, _mm_load_pd(src), _mm_load_pd(src2)); SETCHART("shuffle"); // _mm_extract_epi16 // _mm_insert_epi16 // _mm_shuffle_epi32 START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[3] = dst[1]; dst[2] = dst[0]; dst[1] = src2[3]; dst[0] = src2[2]; } ENDSCALAR(checksum_dst(dst), "scalar shuffle"); // BINARYOP_TEST_D("_mm_shuffle_pd", _mm_shuffle_pd, _mm_load_pd(src), _mm_load_pd(src2)); START(); __m128 o0 = _mm_load_pd(src); __m128 o1 = _mm_load_pd(src2); for(int i = 0; i < N; i += 4) o0 = _mm_shuffle_pd(o0, o1, _MM_SHUFFLE(1, 0, 3, 2)); _mm_store_pd(dst, o0); END(checksum_dst(dst), "_mm_shuffle_pd"); // _mm_shufflehi_epi16 // _mm_shufflelo_epi16 // _mm_unpackhi_epi16 // _mm_unpackhi_epi32 // _mm_unpackhi_epi64 // _mm_unpackhi_epi8 START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[0] = dst[2]; dst[1] = src2[2]; dst[2] = dst[3]; dst[3] = src2[3]; } ENDSCALAR(checksum_dst(dst), "scalar unpackhi_pd"); BINARYOP_TEST_D("_mm_unpackhi_pd", _mm_unpackhi_pd, _mm_load_pd(src), _mm_load_pd(src2)); // _mm_unpacklo_epi16 // _mm_unpacklo_epi32 // _mm_unpacklo_epi64 // _mm_unpacklo_epi8 START(); dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; for(int i = 0; i < N; ++i) { dst[2] = dst[1]; dst[1] = dst[0]; dst[0] = src2[0]; dst[3] = src2[1]; } ENDSCALAR(checksum_dst(dst), "scalar unpacklo_pd"); BINARYOP_TEST_D("_mm_unpacklo_pd", _mm_unpacklo_pd, _mm_load_pd(src), _mm_load_pd(src2)); printf("]}\n"); /* printf("Finished!\n"); printf("Total time spent in scalar intrinsics: %f msecs.\n", (double)scalarTotalTicks * 1000.0 / ticks_per_sec()); printf("Total time spent in SSE1 intrinsics: %f msecs.\n", (double)simdTotalTicks * 1000.0 / ticks_per_sec()); if (scalarTotalTicks > simdTotalTicks) printf("SSE1 was %.3fx faster than scalar!\n", (double)scalarTotalTicks / simdTotalTicks); else printf("SSE1 was %.3fx slower than scalar!\n", (double)simdTotalTicks / scalarTotalTicks); */ #ifdef __EMSCRIPTEN__ fprintf(stderr,"User Agent: %s\n", emscripten_run_script_string("navigator.userAgent")); printf("/*Test finished! Now please close Firefox to continue with benchmark_sse2.py.*/\n"); #endif exit(0); }
// ============================================================================= // // sse3_vChirpData // version by: Alex Kan // http://tbp.berkeley.edu/~alexkan/seti/ // int sse3_ChirpData_ak( sah_complex * cx_DataArray, sah_complex * cx_ChirpDataArray, int chirp_rate_ind, double chirp_rate, int ul_NumDataPoints, double sample_rate ) { int i; #ifdef USE_MANUAL_CALLSTACK call_stack.enter("sse3_ChirpData_ak()"); #endif if (chirp_rate_ind == 0) { memcpy(cx_ChirpDataArray, cx_DataArray, (int)ul_NumDataPoints * sizeof(sah_complex) ); #ifdef USE_MANUAL_CALLSTACK call_stack.exit(); #endif return 0; } int vEnd; double srate = chirp_rate * 0.5 / (sample_rate * sample_rate); __m128d rate = _mm_set1_pd(chirp_rate * 0.5 / (sample_rate * sample_rate)); __m128d roundVal = _mm_set1_pd(srate >= 0.0 ? TWO_TO_52 : -TWO_TO_52); // main vectorised loop vEnd = ul_NumDataPoints - (ul_NumDataPoints & 3); for (i = 0; i < vEnd; i += 4) { const float *data = (const float *) (cx_DataArray + i); float *chirped = (float *) (cx_ChirpDataArray + i); __m128d di = _mm_set1_pd(i); __m128d a1 = _mm_add_pd(_mm_set_pd(1.0, 0.0), di); __m128d a2 = _mm_add_pd(_mm_set_pd(3.0, 2.0), di); __m128 d1, d2; __m128 cd1, cd2; __m128 td1, td2; __m128 x; __m128 y; __m128 s; __m128 c; __m128 m; // load the signal to be chirped prefetchnta((const void *)( data+32 )); d1 = _mm_load_ps(data); d2 = _mm_load_ps(data+4); // calculate the input angle a1 = _mm_mul_pd(_mm_mul_pd(a1, a1), rate); a2 = _mm_mul_pd(_mm_mul_pd(a2, a2), rate); // reduce the angle to the range (-0.5, 0.5) a1 = _mm_sub_pd(a1, _mm_sub_pd(_mm_add_pd(a1, roundVal), roundVal)); a2 = _mm_sub_pd(a2, _mm_sub_pd(_mm_add_pd(a2, roundVal), roundVal)); // convert pair of packed double into packed single x = _mm_movelh_ps(_mm_cvtpd_ps(a1), _mm_cvtpd_ps(a2)); // square to the range [0, 0.25) y = _mm_mul_ps(x, x); // perform the initial polynomial approximations s = _mm_mul_ps(_mm_add_ps(_mm_mul_ps(_mm_add_ps(_mm_mul_ps(_mm_add_ps(_mm_mul_ps(y, SS4), SS3), y), SS2), y), SS1), x); c = _mm_add_ps(_mm_mul_ps(_mm_add_ps(_mm_mul_ps(_mm_add_ps(_mm_mul_ps(y, CC3), CC2), y), CC1), y), ONE); // perform first angle doubling x = _mm_sub_ps(_mm_mul_ps(c, c), _mm_mul_ps(s, s)); y = _mm_mul_ps(_mm_mul_ps(s, c), TWO); // calculate scaling factor to correct the magnitude // m1 = vec_nmsub(y1, y1, vec_nmsub(x1, x1, TWO)); // m2 = vec_nmsub(y2, y2, vec_nmsub(x2, x2, TWO)); m = vec_recip3(_mm_add_ps(_mm_mul_ps(x, x), _mm_mul_ps(y, y))); // perform second angle doubling c = _mm_sub_ps(_mm_mul_ps(x, x), _mm_mul_ps(y, y)); s = _mm_mul_ps(_mm_mul_ps(y, x), TWO); // correct the magnitude (final sine / cosine approximations) s = _mm_mul_ps(s, m); c = _mm_mul_ps(c, m); // chirp the data cd1 = _mm_shuffle_ps(c, c, 0x50); cd2 = _mm_shuffle_ps(c, c, 0xfa); cd1 = _mm_mul_ps(cd1, d1); cd2 = _mm_mul_ps(cd2, d2); d1 = _mm_shuffle_ps(d1, d1, 0xb1); d2 = _mm_shuffle_ps(d2, d2, 0xb1); td1 = _mm_shuffle_ps(s, s, 0x50); td2 = _mm_shuffle_ps(s, s, 0xfa); td1 = _mm_mul_ps(td1, d1); td2 = _mm_mul_ps(td2, d2); cd1 = _mm_addsub_ps(cd1, td1); cd2 = _mm_addsub_ps(cd2, td2); // store chirped values _mm_stream_ps(chirped, cd1); _mm_stream_ps(chirped+4, cd2); } _mm_sfence(); // handle tail elements with scalar code for ( ; i < ul_NumDataPoints; ++i) { double angle = srate * i * i * 0.5; double s = sin(angle); double c = cos(angle); float re = cx_DataArray[i][0]; float im = cx_DataArray[i][1]; cx_ChirpDataArray[i][0] = re * c - im * s; cx_ChirpDataArray[i][1] = re * s + im * c; } analysis_state.FLOP_counter+=12.0*ul_NumDataPoints; #ifdef USE_MANUAL_CALLSTACK call_stack.exit(); #endif return 0; }
void exchlaplacecoeff_gmrfData_0(unsigned int slot) { for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((!neighbor_isValid[0][0])) { { double xPos; double yPos; /* Statements in this Scop: S1053, S1056, S1059, S1050, S1058, S1052, S1055, S1060, S1054, S1057, S1051 */ { { { { { { { { { { { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+26)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+32)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+26)] = 0.000000e+00; } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+146)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+152)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+146)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+98)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+104)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+98)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+74)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+80)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+74)] = 0.000000e+00; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { xPos = posBegin[0]; } __m128d vec1 = _mm_set1_pd(xPos); for (; (i1<0); i1 += 4) { /* xPos = posBegin[0]; */ __m128d vec0 = _mm_load1_pd((&posBegin[0])); __m128d vec0_2 = _mm_load1_pd((&posBegin[0])); vec1 = vec0; vec1 = vec0_2; } for (; (i1<3); i1 += 1) { xPos = posBegin[0]; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+122)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+128)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+122)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+170)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+176)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+170)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+194)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+200)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+194)] = 0.000000e+00; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { yPos = ((((i1-1)/1.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); } __m128d vec1 = _mm_set1_pd(1.000000e+00); __m128d vec2 = _mm_set1_pd(1.000000e+00); __m128d vec5 = _mm_set1_pd(yPos); for (; (i1<0); i1 += 4) { /* yPos = ((((i1-1)/1.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); */ __m128d vec0 = _mm_set_pd(i1+1,i1); __m128d vec0_2 = _mm_set_pd(i1+1,i1); __m128d vec3 = _mm_load1_pd((&posEnd[1])); __m128d vec3_2 = _mm_load1_pd((&posEnd[1])); __m128d vec4 = _mm_load1_pd((&posBegin[1])); __m128d vec4_2 = _mm_load1_pd((&posBegin[1])); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2); } for (; (i1<3); i1 += 1) { yPos = ((((i1-1)/1.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+2)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+8)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+2)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+50)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+56)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+50)] = 0.000000e+00; } } } } } if ((!neighbor_isValid[0][1])) { { double xPos; double yPos; /* Statements in this Scop: S1071, S1065, S1068, S1062, S1070, S1064, S1067, S1061, S1069, S1063, S1066 */ { { { { { { { { { { { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+195)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+201)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+195)] = 0.000000e+00; } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+51)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+57)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+51)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+75)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+81)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+75)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+3)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+9)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+3)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+171)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+177)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+171)] = 0.000000e+00; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { yPos = ((((i1-1)/1.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); } __m128d vec1 = _mm_set1_pd(1.000000e+00); __m128d vec2 = _mm_set1_pd(1.000000e+00); __m128d vec5 = _mm_set1_pd(yPos); for (; (i1<0); i1 += 4) { /* yPos = ((((i1-1)/1.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); */ __m128d vec0 = _mm_set_pd(i1+1,i1); __m128d vec0_2 = _mm_set_pd(i1+1,i1); __m128d vec3 = _mm_load1_pd((&posEnd[1])); __m128d vec3_2 = _mm_load1_pd((&posEnd[1])); __m128d vec4 = _mm_load1_pd((&posBegin[1])); __m128d vec4_2 = _mm_load1_pd((&posBegin[1])); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2); } for (; (i1<3); i1 += 1) { yPos = ((((i1-1)/1.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+99)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+105)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+99)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+123)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+129)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+123)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+147)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+153)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+147)] = 0.000000e+00; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { xPos = posEnd[0]; } __m128d vec1 = _mm_set1_pd(xPos); for (; (i1<0); i1 += 4) { /* xPos = posEnd[0]; */ __m128d vec0 = _mm_load1_pd((&posEnd[0])); __m128d vec0_2 = _mm_load1_pd((&posEnd[0])); vec1 = vec0; vec1 = vec0_2; } for (; (i1<3); i1 += 1) { xPos = posEnd[0]; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i1 = 1; for (; (i1<=1); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+27)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+33)] = 0.000000e+00; } for (; (i1<=2); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+27)] = 0.000000e+00; } } } } } if ((!neighbor_isValid[0][2])) { { double xPos; double yPos; /* Statements in this Scop: S1080, S1074, S1077, S1082, S1076, S1079, S1073, S1072, S1081, S1075, S1078 */ { { { { { { { { { { { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+126)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+127)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+126)] = 0.000000e+00; } } { int i2 = 2; for (; (i2<=2); i2 += 2) { xPos = ((((i2-2)/1.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); xPos = ((((i2-1)/1.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); } for (; (i2<=3); i2 += 1) { xPos = ((((i2-2)/1.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+198)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+199)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+198)] = 0.000000e+00; } } } { int i2 = 2; for (; (i2<=2); i2 += 2) { yPos = posBegin[1]; yPos = posBegin[1]; } for (; (i2<=3); i2 += 1) { yPos = posBegin[1]; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+30)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+31)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+30)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+174)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+175)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+174)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+78)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+79)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+78)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+54)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+55)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+54)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+150)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+151)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+150)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+6)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+7)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+6)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+102)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+103)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+102)] = 0.000000e+00; } } } } } if ((!neighbor_isValid[0][3])) { { double xPos; double yPos; /* Statements in this Scop: S1083, S1092, S1086, S1089, S1088, S1091, S1085, S1090, S1093, S1087, S1084 */ { { { { { { { { { { { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+12)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+13)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+12)] = 0.000000e+00; } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+60)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+61)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+60)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+204)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+205)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+204)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+132)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+133)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+132)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+84)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+85)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+84)] = 0.000000e+00; } } } { int i2 = 2; for (; (i2<=2); i2 += 2) { yPos = posEnd[1]; yPos = posEnd[1]; } for (; (i2<=3); i2 += 1) { yPos = posEnd[1]; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+36)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+37)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+36)] = 0.000000e+00; } } } { int i2 = 2; for (; (i2<=2); i2 += 2) { xPos = ((((i2-2)/1.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); xPos = ((((i2-1)/1.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); } for (; (i2<=3); i2 += 1) { xPos = ((((i2-2)/1.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+180)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+181)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+180)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+156)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+157)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+156)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][0]); int i2 = 2; for (; (i2<=2); i2 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+108)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_0_p1[(i2+109)] = 0.000000e+00; } for (; (i2<=3); i2 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[(i2+108)] = 0.000000e+00; } } } } } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) { /* Statements in this Scop: S1094 */ for (int i0 = 0; (i0<=8); i0 += 1) { double* buffer_Send_1_p1 = (&buffer_Send[1][(i0*2)]); double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][(i0*24)]); int i1 = 1; for (; (i1<=1); i1 += 2) { buffer_Send_1_p1[(i1-1)] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+3)]; buffer_Send_1_p1[i1] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+9)]; } for (; (i1<=2); i1 += 1) { buffer_Send_1_p1[(i1-1)] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+3)]; } } } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) { MPI_Isend(buffer_Send[1], 18, MPI_DOUBLE, neighbor_remoteRank[0][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]); reqOutstanding_Send[1] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) { MPI_Irecv(buffer_Recv[0], 18, MPI_DOUBLE, neighbor_remoteRank[0][0], ((unsigned int)(neighbor_fragCommId[0][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]); reqOutstanding_Recv[0] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Recv[0]) { waitForMPIReq(&mpiRequest_Recv[0]); reqOutstanding_Recv[0] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) { /* Statements in this Scop: S1095 */ for (int i0 = 0; (i0<=8); i0 += 1) { double* buffer_Recv_0_p1 = (&buffer_Recv[0][(i0*2)]); double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][(i0*24)]); int i1 = 3; for (; (i1<=3); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)-10)] = buffer_Recv_0_p1[(i1-3)]; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)-4)] = buffer_Recv_0_p1[(i1-2)]; } for (; (i1<=4); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)-10)] = buffer_Recv_0_p1[(i1-3)]; } } } } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Send[1]) { waitForMPIReq(&mpiRequest_Send[1]); reqOutstanding_Send[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { ; } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][3]&&neighbor_isRemote[0][3])) { MPI_Isend(&fieldData_LaplaceCoeff_GMRF[0][14], 1, mpiDatatype_9_2_24, neighbor_remoteRank[0][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]); reqOutstanding_Send[3] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][2]&&neighbor_isRemote[0][2])) { MPI_Irecv(&fieldData_LaplaceCoeff_GMRF[0][8], 1, mpiDatatype_9_2_24, neighbor_remoteRank[0][2], ((unsigned int)(neighbor_fragCommId[0][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]); reqOutstanding_Recv[2] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Recv[2]) { waitForMPIReq(&mpiRequest_Recv[2]); reqOutstanding_Recv[2] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { ; } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Send[3]) { waitForMPIReq(&mpiRequest_Send[3]); reqOutstanding_Send[3] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) { /* Statements in this Scop: S1096 */ for (int i0 = 0; (i0<=8); i0 += 1) { double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][(i0*24)]); double* buffer_Send_0_p1 = (&buffer_Send[0][(i0*4)]); int i1 = 0; for (; (i1<=2); i1 += 2) { buffer_Send_0_p1[i1] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+3)]; buffer_Send_0_p1[(i1+1)] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+9)]; } for (; (i1<=3); i1 += 1) { buffer_Send_0_p1[i1] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+3)]; } } } if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) { /* Statements in this Scop: S1097 */ for (int i0 = 0; (i0<=8); i0 += 1) { double* buffer_Send_1_p1 = (&buffer_Send[1][(i0*4)]); double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][(i0*24)]); int i1 = 0; for (; (i1<=2); i1 += 2) { buffer_Send_1_p1[i1] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+2)]; buffer_Send_1_p1[(i1+1)] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+8)]; } for (; (i1<=3); i1 += 1) { buffer_Send_1_p1[i1] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+2)]; } } } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) { MPI_Isend(buffer_Send[0], 36, MPI_DOUBLE, neighbor_remoteRank[0][0], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][0]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[0]); reqOutstanding_Send[0] = true; } if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) { MPI_Isend(buffer_Send[1], 36, MPI_DOUBLE, neighbor_remoteRank[0][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]); reqOutstanding_Send[1] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) { MPI_Irecv(buffer_Recv[0], 36, MPI_DOUBLE, neighbor_remoteRank[0][0], ((unsigned int)(neighbor_fragCommId[0][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]); reqOutstanding_Recv[0] = true; } if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) { MPI_Irecv(buffer_Recv[1], 36, MPI_DOUBLE, neighbor_remoteRank[0][1], ((unsigned int)(neighbor_fragCommId[0][1]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[1]); reqOutstanding_Recv[1] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Recv[0]) { waitForMPIReq(&mpiRequest_Recv[0]); reqOutstanding_Recv[0] = false; } if (reqOutstanding_Recv[1]) { waitForMPIReq(&mpiRequest_Recv[1]); reqOutstanding_Recv[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) { /* Statements in this Scop: S1098 */ for (int i0 = 0; (i0<=8); i0 += 1) { double* buffer_Recv_0_p1 = (&buffer_Recv[0][(i0*4)]); double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][(i0*24)]); int i1 = 1; for (; (i1<=3); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)-5)] = buffer_Recv_0_p1[(i1-1)]; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+1)] = buffer_Recv_0_p1[i1]; } for (; (i1<=4); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)-5)] = buffer_Recv_0_p1[(i1-1)]; } } } if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) { /* Statements in this Scop: S1099 */ for (int i0 = 0; (i0<=8); i0 += 1) { double* buffer_Recv_1_p1 = (&buffer_Recv[1][(i0*4)]); double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][(i0*24)]); int i1 = 4; for (; (i1<=6); i1 += 2) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)-20)] = buffer_Recv_1_p1[(i1-4)]; fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)-14)] = buffer_Recv_1_p1[(i1-3)]; } for (; (i1<=7); i1 += 1) { fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)-20)] = buffer_Recv_1_p1[(i1-4)]; } } } } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Send[0]) { waitForMPIReq(&mpiRequest_Send[0]); reqOutstanding_Send[0] = false; } if (reqOutstanding_Send[1]) { waitForMPIReq(&mpiRequest_Send[1]); reqOutstanding_Send[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { ; ; } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][2]&&neighbor_isRemote[0][2])) { MPI_Isend(&fieldData_LaplaceCoeff_GMRF[0][13], 1, mpiDatatype_9_4_24, neighbor_remoteRank[0][2], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][2]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[2]); reqOutstanding_Send[2] = true; } if ((neighbor_isValid[0][3]&&neighbor_isRemote[0][3])) { MPI_Isend(&fieldData_LaplaceCoeff_GMRF[0][7], 1, mpiDatatype_9_4_24, neighbor_remoteRank[0][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]); reqOutstanding_Send[3] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][2]&&neighbor_isRemote[0][2])) { MPI_Irecv(&fieldData_LaplaceCoeff_GMRF[0][1], 1, mpiDatatype_9_4_24, neighbor_remoteRank[0][2], ((unsigned int)(neighbor_fragCommId[0][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]); reqOutstanding_Recv[2] = true; } if ((neighbor_isValid[0][3]&&neighbor_isRemote[0][3])) { MPI_Irecv(&fieldData_LaplaceCoeff_GMRF[0][19], 1, mpiDatatype_9_4_24, neighbor_remoteRank[0][3], ((unsigned int)(neighbor_fragCommId[0][3]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[3]); reqOutstanding_Recv[3] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Recv[2]) { waitForMPIReq(&mpiRequest_Recv[2]); reqOutstanding_Recv[2] = false; } if (reqOutstanding_Recv[3]) { waitForMPIReq(&mpiRequest_Recv[3]); reqOutstanding_Recv[3] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { ; ; } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Send[2]) { waitForMPIReq(&mpiRequest_Send[2]); reqOutstanding_Send[2] = false; } if (reqOutstanding_Send[3]) { waitForMPIReq(&mpiRequest_Send[3]); reqOutstanding_Send[3] = false; } } } }
void thread_ibs_num(size_t i, size_t n) { const size_t npack = nBlock >> 3; const size_t npack2 = npack * 2; C_UInt8 *Base = Geno1b.Get(); IdMatTri I = Array_Thread_MatIdx[i]; C_Int64 N = Array_Thread_MatCnt[i]; TS_KINGHomo *p = ptrKING + I.Offset(); for (; N > 0; N--, ++I, p++) { C_UInt8 *p1 = Base + I.Row() * npack2; C_UInt8 *p2 = Base + I.Column() * npack2; double *pAF = AF_1_AF.Get(); double *pAF2 = AF_1_AF_2.Get(); ssize_t m = npack; #if defined(COREARRAY_SIMD_SSE2) { POPCNT_SSE2_HEAD __m128i ibs0_sum, sumsq_sum; ibs0_sum = sumsq_sum = _mm_setzero_si128(); __m128d sq_sum, sq_sum2; sq_sum = sq_sum2 = _mm_setzero_pd(); for (; m > 0; m-=16) { __m128i g1_1 = _mm_load_si128((__m128i*)p1); __m128i g1_2 = _mm_load_si128((__m128i*)(p1 + npack)); __m128i g2_1 = _mm_load_si128((__m128i*)p2); __m128i g2_2 = _mm_load_si128((__m128i*)(p2 + npack)); p1 += 16; p2 += 16; __m128i mask = (g1_1 | ~g1_2) & (g2_1 | ~g2_2); __m128i ibs0 = (~((g1_1 ^ ~g2_1) | (g1_2 ^ ~g2_2))) & mask; __m128i het = ((g1_1 ^ g1_2) ^ (g2_1 ^ g2_2)) & mask; POPCNT_SSE2_RUN(ibs0) ibs0_sum = _mm_add_epi32(ibs0_sum, ibs0); POPCNT_SSE2_RUN(het) sumsq_sum = _mm_add_epi32(_mm_add_epi32(sumsq_sum, het), _mm_slli_epi32(ibs0, 2)); C_UInt64 m1 = _mm_cvtsi128_si64(mask); C_UInt64 m2 = _mm_cvtsi128_si64(_mm_shuffle_epi32(mask, _MM_SHUFFLE(1,0,3,2))); for (size_t k=32; k > 0; k--) { switch (m1 & 0x03) { case 3: sq_sum = _mm_add_pd(sq_sum, _mm_load_pd(pAF)); sq_sum2 = _mm_add_pd(sq_sum2, _mm_load_pd(pAF2)); break; case 1: sq_sum = _mm_add_pd(sq_sum, _mm_set_pd(0, pAF[0])); sq_sum2 = _mm_add_pd(sq_sum2, _mm_set_pd(0, pAF2[0])); break; case 2: sq_sum = _mm_add_pd(sq_sum, _mm_set_pd(pAF[1], 0)); sq_sum2 = _mm_add_pd(sq_sum2, _mm_set_pd(pAF2[1], 0)); break; } pAF += 2; pAF2 += 2; m1 >>= 2; } for (size_t k=32; k > 0; k--) { switch (m2 & 0x03) { case 3: sq_sum = _mm_add_pd(sq_sum, _mm_load_pd(pAF)); sq_sum2 = _mm_add_pd(sq_sum2, _mm_load_pd(pAF2)); break; case 1: sq_sum = _mm_add_pd(sq_sum, _mm_set_pd(0, pAF[0])); sq_sum2 = _mm_add_pd(sq_sum2, _mm_set_pd(0, pAF2[0])); break; case 2: sq_sum = _mm_add_pd(sq_sum, _mm_set_pd(pAF[1], 0)); sq_sum2 = _mm_add_pd(sq_sum2, _mm_set_pd(pAF2[1], 0)); break; } pAF += 2; pAF2 += 2; m2 >>= 2; } } p->IBS0 += vec_sum_i32(ibs0_sum); p->SumSq += vec_sum_i32(sumsq_sum); p->SumAFreq += vec_sum_f64(sq_sum); p->SumAFreq2 += vec_sum_f64(sq_sum2); } #else for (; m > 0; m-=8) { C_UInt64 g1_1 = *((C_UInt64*)p1); C_UInt64 g1_2 = *((C_UInt64*)(p1 + npack)); C_UInt64 g2_1 = *((C_UInt64*)p2); C_UInt64 g2_2 = *((C_UInt64*)(p2 + npack)); p1 += 8; p2 += 8; C_UInt64 mask = (g1_1 | ~g1_2) & (g2_1 | ~g2_2); C_UInt64 ibs0 = (~((g1_1 ^ ~g2_1) | (g1_2 ^ ~g2_2))) & mask; C_UInt64 het = ((g1_1 ^ g1_2) ^ (g2_1 ^ g2_2)) & mask; p->IBS0 += POPCNT_U64(ibs0); p->SumSq += POPCNT_U64(het) + POPCNT_U64(ibs0)*4; double sum=0, sum2=0; for (size_t k=64; k > 0; k--) { if (mask & 0x01) { sum += (*pAF); sum2 += (*pAF2); } pAF ++; pAF2 ++; mask >>= 1; } p->SumAFreq += sum; p->SumAFreq2 += sum2; } #endif } }
void exchsolution_gmrfData_1(unsigned int slot) { for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((!neighbor_isValid[0][0])) { { double xPos; double yPos; /* Statements in this Scop: S493, S492, S494 */ { { { double* fieldData_Solution_GMRF_1_p1 = (&fieldData_Solution_GMRF[1][0]); int i1 = 1; for (; (i1<=2); i1 += 2) { fieldData_Solution_GMRF_1_p1[((i1*6)+2)] = 0.000000e+00; fieldData_Solution_GMRF_1_p1[((i1*6)+8)] = 0.000000e+00; } for (; (i1<=3); i1 += 1) { fieldData_Solution_GMRF_1_p1[((i1*6)+2)] = 0.000000e+00; } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { xPos = posBegin[0]; } __m128d vec1 = _mm_set1_pd(xPos); for (; (i1<1); i1 += 4) { /* xPos = posBegin[0]; */ __m128d vec0 = _mm_load1_pd((&posBegin[0])); __m128d vec0_2 = _mm_load1_pd((&posBegin[0])); vec1 = vec0; vec1 = vec0_2; } for (; (i1<4); i1 += 1) { xPos = posBegin[0]; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { yPos = ((((i1-1)/2.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); } __m128d vec1 = _mm_set1_pd(1.000000e+00); __m128d vec2 = _mm_set1_pd(2.000000e+00); __m128d vec5 = _mm_set1_pd(yPos); for (; (i1<1); i1 += 4) { /* yPos = ((((i1-1)/2.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); */ __m128d vec0 = _mm_set_pd(i1+1,i1); __m128d vec0_2 = _mm_set_pd(i1+1,i1); __m128d vec3 = _mm_load1_pd((&posEnd[1])); __m128d vec3_2 = _mm_load1_pd((&posEnd[1])); __m128d vec4 = _mm_load1_pd((&posBegin[1])); __m128d vec4_2 = _mm_load1_pd((&posBegin[1])); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2); } for (; (i1<4); i1 += 1) { yPos = ((((i1-1)/2.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); } } } } } if ((!neighbor_isValid[0][1])) { { double xPos; double yPos; /* Statements in this Scop: S496, S495, S497 */ { { { double* fieldData_Solution_GMRF_1_p1 = (&fieldData_Solution_GMRF[1][0]); int i1 = 1; for (; (i1<=2); i1 += 2) { fieldData_Solution_GMRF_1_p1[((i1*6)+4)] = 0.000000e+00; fieldData_Solution_GMRF_1_p1[((i1*6)+10)] = 0.000000e+00; } for (; (i1<=3); i1 += 1) { fieldData_Solution_GMRF_1_p1[((i1*6)+4)] = 0.000000e+00; } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { xPos = posEnd[0]; } __m128d vec1 = _mm_set1_pd(xPos); for (; (i1<1); i1 += 4) { /* xPos = posEnd[0]; */ __m128d vec0 = _mm_load1_pd((&posEnd[0])); __m128d vec0_2 = _mm_load1_pd((&posEnd[0])); vec1 = vec0; vec1 = vec0_2; } for (; (i1<4); i1 += 1) { xPos = posEnd[0]; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { yPos = ((((i1-1)/2.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); } __m128d vec1 = _mm_set1_pd(1.000000e+00); __m128d vec2 = _mm_set1_pd(2.000000e+00); __m128d vec5 = _mm_set1_pd(yPos); for (; (i1<1); i1 += 4) { /* yPos = ((((i1-1)/2.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); */ __m128d vec0 = _mm_set_pd(i1+1,i1); __m128d vec0_2 = _mm_set_pd(i1+1,i1); __m128d vec3 = _mm_load1_pd((&posEnd[1])); __m128d vec3_2 = _mm_load1_pd((&posEnd[1])); __m128d vec4 = _mm_load1_pd((&posBegin[1])); __m128d vec4_2 = _mm_load1_pd((&posBegin[1])); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2); } for (; (i1<4); i1 += 1) { yPos = ((((i1-1)/2.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); } } } } } if ((!neighbor_isValid[0][2])) { { double xPos; double yPos; /* Statements in this Scop: S500, S499, S498 */ { { { int i2 = 2; for (; (i2<=3); i2 += 2) { xPos = ((((i2-2)/2.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); xPos = ((((i2-1)/2.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); } for (; (i2<=4); i2 += 1) { xPos = ((((i2-2)/2.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); } } { double* fieldData_Solution_GMRF_1_p1 = (&fieldData_Solution_GMRF[1][0]); int i2 = 2; for (; (i2<=3); i2 += 2) { fieldData_Solution_GMRF_1_p1[(i2+6)] = 0.000000e+00; fieldData_Solution_GMRF_1_p1[(i2+7)] = 0.000000e+00; } for (; (i2<=4); i2 += 1) { fieldData_Solution_GMRF_1_p1[(i2+6)] = 0.000000e+00; } } } { int i2 = 2; for (; (i2<=3); i2 += 2) { yPos = posBegin[1]; yPos = posBegin[1]; } for (; (i2<=4); i2 += 1) { yPos = posBegin[1]; } } } } } if ((!neighbor_isValid[0][3])) { { double xPos; double yPos; /* Statements in this Scop: S503, S502, S501 */ { { { int i2 = 2; for (; (i2<=3); i2 += 2) { xPos = ((((i2-2)/2.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); xPos = ((((i2-1)/2.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); } for (; (i2<=4); i2 += 1) { xPos = ((((i2-2)/2.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); } } { int i2 = 2; for (; (i2<=3); i2 += 2) { yPos = posEnd[1]; yPos = posEnd[1]; } for (; (i2<=4); i2 += 1) { yPos = posEnd[1]; } } } { double* fieldData_Solution_GMRF_1_p1 = (&fieldData_Solution_GMRF[1][0]); int i2 = 2; for (; (i2<=3); i2 += 2) { fieldData_Solution_GMRF_1_p1[(i2+18)] = 0.000000e+00; fieldData_Solution_GMRF_1_p1[(i2+19)] = 0.000000e+00; } for (; (i2<=4); i2 += 1) { fieldData_Solution_GMRF_1_p1[(i2+18)] = 0.000000e+00; } } } } } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { ; } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) { MPI_Isend(&fieldData_Solution_GMRF[1][10], 1, mpiDatatype_3_1_6, neighbor_remoteRank[0][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]); reqOutstanding_Send[1] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) { MPI_Irecv(&fieldData_Solution_GMRF[1][8], 1, mpiDatatype_3_1_6, neighbor_remoteRank[0][0], ((unsigned int)(neighbor_fragCommId[0][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]); reqOutstanding_Recv[0] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Recv[0]) { waitForMPIReq(&mpiRequest_Recv[0]); reqOutstanding_Recv[0] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { ; } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Send[1]) { waitForMPIReq(&mpiRequest_Send[1]); reqOutstanding_Send[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { ; } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][3]&&neighbor_isRemote[0][3])) { MPI_Isend(&fieldData_Solution_GMRF[1][20], 1, mpiDatatype_1_3_6, neighbor_remoteRank[0][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]); reqOutstanding_Send[3] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][2]&&neighbor_isRemote[0][2])) { MPI_Irecv(&fieldData_Solution_GMRF[1][8], 1, mpiDatatype_1_3_6, neighbor_remoteRank[0][2], ((unsigned int)(neighbor_fragCommId[0][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]); reqOutstanding_Recv[2] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Recv[2]) { waitForMPIReq(&mpiRequest_Recv[2]); reqOutstanding_Recv[2] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { ; } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Send[3]) { waitForMPIReq(&mpiRequest_Send[3]); reqOutstanding_Send[3] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { ; ; } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) { MPI_Isend(&fieldData_Solution_GMRF[1][3], 1, mpiDatatype_5_1_6, neighbor_remoteRank[0][0], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][0]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[0]); reqOutstanding_Send[0] = true; } if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) { MPI_Isend(&fieldData_Solution_GMRF[1][3], 1, mpiDatatype_5_1_6, neighbor_remoteRank[0][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]); reqOutstanding_Send[1] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) { MPI_Irecv(&fieldData_Solution_GMRF[1][1], 1, mpiDatatype_5_1_6, neighbor_remoteRank[0][0], ((unsigned int)(neighbor_fragCommId[0][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]); reqOutstanding_Recv[0] = true; } if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) { MPI_Irecv(&fieldData_Solution_GMRF[1][5], 1, mpiDatatype_5_1_6, neighbor_remoteRank[0][1], ((unsigned int)(neighbor_fragCommId[0][1]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[1]); reqOutstanding_Recv[1] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Recv[0]) { waitForMPIReq(&mpiRequest_Recv[0]); reqOutstanding_Recv[0] = false; } if (reqOutstanding_Recv[1]) { waitForMPIReq(&mpiRequest_Recv[1]); reqOutstanding_Recv[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { ; ; } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Send[0]) { waitForMPIReq(&mpiRequest_Send[0]); reqOutstanding_Send[0] = false; } if (reqOutstanding_Send[1]) { waitForMPIReq(&mpiRequest_Send[1]); reqOutstanding_Send[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { ; ; } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][2]&&neighbor_isRemote[0][2])) { MPI_Isend(&fieldData_Solution_GMRF[1][13], 1, mpiDatatype_1_5_6, neighbor_remoteRank[0][2], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][2]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[2]); reqOutstanding_Send[2] = true; } if ((neighbor_isValid[0][3]&&neighbor_isRemote[0][3])) { MPI_Isend(&fieldData_Solution_GMRF[1][13], 1, mpiDatatype_1_5_6, neighbor_remoteRank[0][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]); reqOutstanding_Send[3] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][2]&&neighbor_isRemote[0][2])) { MPI_Irecv(&fieldData_Solution_GMRF[1][1], 1, mpiDatatype_1_5_6, neighbor_remoteRank[0][2], ((unsigned int)(neighbor_fragCommId[0][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]); reqOutstanding_Recv[2] = true; } if ((neighbor_isValid[0][3]&&neighbor_isRemote[0][3])) { MPI_Irecv(&fieldData_Solution_GMRF[1][25], 1, mpiDatatype_1_5_6, neighbor_remoteRank[0][3], ((unsigned int)(neighbor_fragCommId[0][3]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[3]); reqOutstanding_Recv[3] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Recv[2]) { waitForMPIReq(&mpiRequest_Recv[2]); reqOutstanding_Recv[2] = false; } if (reqOutstanding_Recv[3]) { waitForMPIReq(&mpiRequest_Recv[3]); reqOutstanding_Recv[3] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { ; ; } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Send[2]) { waitForMPIReq(&mpiRequest_Send[2]); reqOutstanding_Send[2] = false; } if (reqOutstanding_Send[3]) { waitForMPIReq(&mpiRequest_Send[3]); reqOutstanding_Send[3] = false; } } } }
// ============================================================================= // // sse2_vChirpData // version by: Alex Kan - SSE2 mods (haddsum removal) BH // http://tbp.berkeley.edu/~alexkan/seti/ // int sse2_ChirpData_ak( sah_complex * cx_DataArray, sah_complex * cx_ChirpDataArray, int chirp_rate_ind, double chirp_rate, int ul_NumDataPoints, double sample_rate ) { int i; if (chirp_rate_ind == 0) { memcpy(cx_ChirpDataArray, cx_DataArray, (int)ul_NumDataPoints * sizeof(sah_complex) ); return 0; } int vEnd; double srate = chirp_rate * 0.5 / (sample_rate * sample_rate); __m128d rate = _mm_set1_pd(chirp_rate * 0.5 / (sample_rate * sample_rate)); __m128d roundVal = _mm_set1_pd(srate >= 0.0 ? TWO_TO_52 : -TWO_TO_52); // main vectorised loop vEnd = ul_NumDataPoints - (ul_NumDataPoints & 3); for (i = 0; i < vEnd; i += 4) { const float *data = (const float *) (cx_DataArray + i); float *chirped = (float *) (cx_ChirpDataArray + i); __m128d di = _mm_set1_pd(i); __m128d a1 = _mm_add_pd(_mm_set_pd(1.0, 0.0), di); __m128d a2 = _mm_add_pd(_mm_set_pd(3.0, 2.0), di); __m128d x1, y1; __m128 d1, d2; __m128 cd1, cd2; __m128 td1, td2; __m128 x; __m128 y; __m128 s; __m128 c; __m128 m; // load the signal to be chirped prefetchnta((const void *)( data+32 )); d1 = _mm_load_ps(data); d2 = _mm_load_ps(data+4); // calculate the input angle a1 = _mm_mul_pd(a1, a1); a2 = _mm_mul_pd(a2, a2); a1 = _mm_mul_pd(a1, rate); a2 = _mm_mul_pd(a2, rate); // reduce the angle to the range (-0.5, 0.5) x1 = _mm_add_pd(a1, roundVal); y1 = _mm_add_pd(a2, roundVal); x1 = _mm_sub_pd(x1, roundVal); y1 = _mm_sub_pd(y1, roundVal); a1 = _mm_sub_pd(a1, x1); a2 = _mm_sub_pd(a2, y1); // convert pair of packed double into packed single x = _mm_movelh_ps(_mm_cvtpd_ps(a1), _mm_cvtpd_ps(a2)); // square to the range [0, 0.25) y = _mm_mul_ps(x, x); // perform the initial polynomial approximations s = _mm_mul_ps(y, SS4); c = _mm_mul_ps(y, CC3); s = _mm_add_ps(s, SS3); c = _mm_add_ps(c, CC2); s = _mm_mul_ps(s, y); c = _mm_mul_ps(c, y); s = _mm_add_ps(s, SS2); c = _mm_add_ps(c, CC1); s = _mm_mul_ps(s, y); c = _mm_mul_ps(c, y); s = _mm_add_ps(s, SS1); s = _mm_mul_ps(s, x); c = _mm_add_ps(c, ONE); // perform first angle doubling x = _mm_sub_ps(_mm_mul_ps(c, c), _mm_mul_ps(s, s)); y = _mm_mul_ps(_mm_mul_ps(s, c), TWO); // calculate scaling factor to correct the magnitude // m1 = vec_nmsub(y1, y1, vec_nmsub(x1, x1, TWO)); // m2 = vec_nmsub(y2, y2, vec_nmsub(x2, x2, TWO)); m = vec_recip2(_mm_add_ps(_mm_mul_ps(x, x), _mm_mul_ps(y, y))); // perform second angle doubling c = _mm_sub_ps(_mm_mul_ps(x, x), _mm_mul_ps(y, y)); s = _mm_mul_ps(_mm_mul_ps(y, x), TWO); // correct the magnitude (final sine / cosine approximations) c = _mm_mul_ps(c, m); s = _mm_mul_ps(s, m); /* c1 c2 c3 c4 s1 s2 s3 s4 R1 i1 R2 I2 R3 i3 R4 i4 R1 * c1 + (i1 * s1 * -1) i1 * c1 + R1 * s1 R2 * c2 + (i2 * s2 * -1) i2 * c2 + R2 * s2 */ x = d1; y = d2; x = _mm_shuffle_ps(x, x, 0xB1); y = _mm_shuffle_ps(y, y, 0xB1); x = _mm_mul_ps(x, R_NEG); y = _mm_mul_ps(y, R_NEG); cd1 = _mm_shuffle_ps(c, c, 0x50); // 01 01 00 00 AaBb => BBbb => c3c3c4c4 cd2 = _mm_shuffle_ps(c, c, 0xfa); // 11 11 10 10 AaBb => AAaa => c1c1c2c2 td1 = _mm_shuffle_ps(s, s, 0x50); td2 = _mm_shuffle_ps(s, s, 0xfa); cd1 = _mm_mul_ps(cd1, d1); cd2 = _mm_mul_ps(cd2, d2); td1 = _mm_mul_ps(td1, x); td2 = _mm_mul_ps(td2, y); cd1 = _mm_add_ps(cd1, td1); cd2 = _mm_add_ps(cd2, td2); // store chirped values _mm_stream_ps(chirped+0, cd1); _mm_stream_ps(chirped+4, cd2); } _mm_sfence(); if( i < ul_NumDataPoints) { // use original routine to finish up any tailings (max stride-1 elements) v_ChirpData(cx_DataArray+i, cx_ChirpDataArray+i , chirp_rate_ind, chirp_rate, ul_NumDataPoints-i, sample_rate); } analysis_state.FLOP_counter+=12.0*ul_NumDataPoints; return 0; }
void exchsolutionData_2(unsigned int slot) { for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((!neighbor_isValid[1][0])) { { double xPos; double yPos; /* Statements in this Scop: S397, S396, S398 */ { { { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { yPos = ((((i1-1)/4.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); } __m128d vec1 = _mm_set1_pd(1.000000e+00); __m128d vec2 = _mm_set1_pd(4.000000e+00); __m128d vec5 = _mm_set1_pd(yPos); for (; (i1<3); i1 += 4) { /* yPos = ((((i1-1)/4.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); */ __m128d vec0 = _mm_set_pd(i1+1,i1); __m128d vec0_2 = _mm_set_pd(i1+1,i1); __m128d vec3 = _mm_load1_pd((&posEnd[1])); __m128d vec3_2 = _mm_load1_pd((&posEnd[1])); __m128d vec4 = _mm_load1_pd((&posBegin[1])); __m128d vec4_2 = _mm_load1_pd((&posBegin[1])); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2); } for (; (i1<6); i1 += 1) { yPos = ((((i1-1)/4.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); } } { double* fieldData_Solution_2_p1 = (&fieldData_Solution[2][0]); int i1 = 1; for (; (i1<=4); i1 += 2) { fieldData_Solution_2_p1[((i1*8)+2)] = 0.000000e+00; fieldData_Solution_2_p1[((i1*8)+10)] = 0.000000e+00; } for (; (i1<=5); i1 += 1) { fieldData_Solution_2_p1[((i1*8)+2)] = 0.000000e+00; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { xPos = posBegin[0]; } __m128d vec1 = _mm_set1_pd(xPos); for (; (i1<3); i1 += 4) { /* xPos = posBegin[0]; */ __m128d vec0 = _mm_load1_pd((&posBegin[0])); __m128d vec0_2 = _mm_load1_pd((&posBegin[0])); vec1 = vec0; vec1 = vec0_2; } for (; (i1<6); i1 += 1) { xPos = posBegin[0]; } } } } } if ((!neighbor_isValid[1][1])) { { double xPos; double yPos; /* Statements in this Scop: S401, S400, S399 */ { { { double* fieldData_Solution_2_p1 = (&fieldData_Solution[2][0]); int i1 = 1; for (; (i1<=4); i1 += 2) { fieldData_Solution_2_p1[((i1*8)+6)] = 0.000000e+00; fieldData_Solution_2_p1[((i1*8)+14)] = 0.000000e+00; } for (; (i1<=5); i1 += 1) { fieldData_Solution_2_p1[((i1*8)+6)] = 0.000000e+00; } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { xPos = posEnd[0]; } __m128d vec1 = _mm_set1_pd(xPos); for (; (i1<3); i1 += 4) { /* xPos = posEnd[0]; */ __m128d vec0 = _mm_load1_pd((&posEnd[0])); __m128d vec0_2 = _mm_load1_pd((&posEnd[0])); vec1 = vec0; vec1 = vec0_2; } for (; (i1<6); i1 += 1) { xPos = posEnd[0]; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { yPos = ((((i1-1)/4.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); } __m128d vec1 = _mm_set1_pd(1.000000e+00); __m128d vec2 = _mm_set1_pd(4.000000e+00); __m128d vec5 = _mm_set1_pd(yPos); for (; (i1<3); i1 += 4) { /* yPos = ((((i1-1)/4.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); */ __m128d vec0 = _mm_set_pd(i1+1,i1); __m128d vec0_2 = _mm_set_pd(i1+1,i1); __m128d vec3 = _mm_load1_pd((&posEnd[1])); __m128d vec3_2 = _mm_load1_pd((&posEnd[1])); __m128d vec4 = _mm_load1_pd((&posBegin[1])); __m128d vec4_2 = _mm_load1_pd((&posBegin[1])); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2); } for (; (i1<6); i1 += 1) { yPos = ((((i1-1)/4.000000e+00)*(posEnd[1]-posBegin[1]))+posBegin[1]); } } } } } if ((!neighbor_isValid[1][2])) { { double xPos; double yPos; /* Statements in this Scop: S404, S403, S402 */ { { { double* fieldData_Solution_2_p1 = (&fieldData_Solution[2][0]); int i2 = 2; for (; (i2<=5); i2 += 2) { fieldData_Solution_2_p1[(i2+8)] = 0.000000e+00; fieldData_Solution_2_p1[(i2+9)] = 0.000000e+00; } for (; (i2<=6); i2 += 1) { fieldData_Solution_2_p1[(i2+8)] = 0.000000e+00; } } { int i2 = 2; for (; (i2<=5); i2 += 2) { xPos = ((((i2-2)/4.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); xPos = ((((i2-1)/4.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); } for (; (i2<=6); i2 += 1) { xPos = ((((i2-2)/4.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); } } } { int i2 = 2; for (; (i2<=5); i2 += 2) { yPos = posBegin[1]; yPos = posBegin[1]; } for (; (i2<=6); i2 += 1) { yPos = posBegin[1]; } } } } } if ((!neighbor_isValid[1][3])) { { double xPos; double yPos; /* Statements in this Scop: S407, S406, S405 */ { { { double* fieldData_Solution_2_p1 = (&fieldData_Solution[2][0]); int i2 = 2; for (; (i2<=5); i2 += 2) { fieldData_Solution_2_p1[(i2+40)] = 0.000000e+00; fieldData_Solution_2_p1[(i2+41)] = 0.000000e+00; } for (; (i2<=6); i2 += 1) { fieldData_Solution_2_p1[(i2+40)] = 0.000000e+00; } } { int i2 = 2; for (; (i2<=5); i2 += 2) { xPos = ((((i2-2)/4.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); xPos = ((((i2-1)/4.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); } for (; (i2<=6); i2 += 1) { xPos = ((((i2-2)/4.000000e+00)*(posEnd[0]-posBegin[0]))+posBegin[0]); } } } { int i2 = 2; for (; (i2<=5); i2 += 2) { yPos = posEnd[1]; yPos = posEnd[1]; } for (; (i2<=6); i2 += 1) { yPos = posEnd[1]; } } } } } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { ; } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) { MPI_Isend(&fieldData_Solution[2][14], 1, mpiDatatype_5_1_8, neighbor_remoteRank[1][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]); reqOutstanding_Send[1] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) { MPI_Irecv(&fieldData_Solution[2][10], 1, mpiDatatype_5_1_8, neighbor_remoteRank[1][0], ((unsigned int)(neighbor_fragCommId[1][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]); reqOutstanding_Recv[0] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Recv[0]) { waitForMPIReq(&mpiRequest_Recv[0]); reqOutstanding_Recv[0] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { ; } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Send[1]) { waitForMPIReq(&mpiRequest_Send[1]); reqOutstanding_Send[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { ; } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][3]&&neighbor_isRemote[1][3])) { MPI_Isend(&fieldData_Solution[2][42], 1, mpiDatatype_1_5_8, neighbor_remoteRank[1][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]); reqOutstanding_Send[3] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][2]&&neighbor_isRemote[1][2])) { MPI_Irecv(&fieldData_Solution[2][10], 1, mpiDatatype_1_5_8, neighbor_remoteRank[1][2], ((unsigned int)(neighbor_fragCommId[1][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]); reqOutstanding_Recv[2] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Recv[2]) { waitForMPIReq(&mpiRequest_Recv[2]); reqOutstanding_Recv[2] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { ; } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Send[3]) { waitForMPIReq(&mpiRequest_Send[3]); reqOutstanding_Send[3] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { ; ; } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) { MPI_Isend(&fieldData_Solution[2][3], 1, mpiDatatype_7_1_8, neighbor_remoteRank[1][0], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][0]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[0]); reqOutstanding_Send[0] = true; } if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) { MPI_Isend(&fieldData_Solution[2][5], 1, mpiDatatype_7_1_8, neighbor_remoteRank[1][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]); reqOutstanding_Send[1] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) { MPI_Irecv(&fieldData_Solution[2][1], 1, mpiDatatype_7_1_8, neighbor_remoteRank[1][0], ((unsigned int)(neighbor_fragCommId[1][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]); reqOutstanding_Recv[0] = true; } if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) { MPI_Irecv(&fieldData_Solution[2][7], 1, mpiDatatype_7_1_8, neighbor_remoteRank[1][1], ((unsigned int)(neighbor_fragCommId[1][1]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[1]); reqOutstanding_Recv[1] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Recv[0]) { waitForMPIReq(&mpiRequest_Recv[0]); reqOutstanding_Recv[0] = false; } if (reqOutstanding_Recv[1]) { waitForMPIReq(&mpiRequest_Recv[1]); reqOutstanding_Recv[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { ; ; } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Send[0]) { waitForMPIReq(&mpiRequest_Send[0]); reqOutstanding_Send[0] = false; } if (reqOutstanding_Send[1]) { waitForMPIReq(&mpiRequest_Send[1]); reqOutstanding_Send[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { ; ; } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][2]&&neighbor_isRemote[1][2])) { MPI_Isend(&fieldData_Solution[2][17], 1, mpiDatatype_1_7_8, neighbor_remoteRank[1][2], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][2]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[2]); reqOutstanding_Send[2] = true; } if ((neighbor_isValid[1][3]&&neighbor_isRemote[1][3])) { MPI_Isend(&fieldData_Solution[2][33], 1, mpiDatatype_1_7_8, neighbor_remoteRank[1][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]); reqOutstanding_Send[3] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][2]&&neighbor_isRemote[1][2])) { MPI_Irecv(&fieldData_Solution[2][1], 1, mpiDatatype_1_7_8, neighbor_remoteRank[1][2], ((unsigned int)(neighbor_fragCommId[1][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]); reqOutstanding_Recv[2] = true; } if ((neighbor_isValid[1][3]&&neighbor_isRemote[1][3])) { MPI_Irecv(&fieldData_Solution[2][49], 1, mpiDatatype_1_7_8, neighbor_remoteRank[1][3], ((unsigned int)(neighbor_fragCommId[1][3]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[3]); reqOutstanding_Recv[3] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Recv[2]) { waitForMPIReq(&mpiRequest_Recv[2]); reqOutstanding_Recv[2] = false; } if (reqOutstanding_Recv[3]) { waitForMPIReq(&mpiRequest_Recv[3]); reqOutstanding_Recv[3] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { ; ; } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Send[2]) { waitForMPIReq(&mpiRequest_Send[2]); reqOutstanding_Send[2] = false; } if (reqOutstanding_Send[3]) { waitForMPIReq(&mpiRequest_Send[3]); reqOutstanding_Send[3] = false; } } } }
void exchlaplacecoeff_gmrfData_5(unsigned int slot) { for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((!neighbor_isValid[0][0])) { { double xPos; double yPos; /* Statements in this Scop: S1306, S1309, S1300, S1308, S1302, S1305, S1310, S1304, S1307, S1301, S1303 */ { { { { { { { { { { { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i1 = 1; for (; (i1<=32); i1 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+6302)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+6338)] = 0.000000e+00; } for (; (i1<=33); i1 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+6302)] = 0.000000e+00; } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i1 = 1; for (; (i1<=32); i1 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+3782)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+3818)] = 0.000000e+00; } for (; (i1<=33); i1 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+3782)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i1 = 1; for (; (i1<=32); i1 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+7562)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+7598)] = 0.000000e+00; } for (; (i1<=33); i1 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+7562)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i1 = 1; for (; (i1<=32); i1 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+2)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+38)] = 0.000000e+00; } for (; (i1<=33); i1 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+2)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i1 = 1; for (; (i1<=32); i1 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+2522)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+2558)] = 0.000000e+00; } for (; (i1<=33); i1 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+2522)] = 0.000000e+00; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { xPos = posBegin[0]; } __m128d vec1 = _mm_set1_pd(xPos); for (; (i1<31); i1 += 4) { /* xPos = posBegin[0]; */ __m128d vec0 = _mm_load1_pd((&posBegin[0])); __m128d vec0_2 = _mm_load1_pd((&posBegin[0])); vec1 = vec0; vec1 = vec0_2; } for (; (i1<34); i1 += 1) { xPos = posBegin[0]; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i1 = 1; for (; (i1<=32); i1 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+8822)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+8858)] = 0.000000e+00; } for (; (i1<=33); i1 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+8822)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i1 = 1; for (; (i1<=32); i1 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+1262)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+1298)] = 0.000000e+00; } for (; (i1<=33); i1 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+1262)] = 0.000000e+00; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { yPos = ((((i1-1)/3.200000e+01)*(posEnd[1]-posBegin[1]))+posBegin[1]); } __m128d vec1 = _mm_set1_pd(1.000000e+00); __m128d vec2 = _mm_set1_pd(3.200000e+01); __m128d vec5 = _mm_set1_pd(yPos); for (; (i1<31); i1 += 4) { /* yPos = ((((i1-1)/3.200000e+01)*(posEnd[1]-posBegin[1]))+posBegin[1]); */ __m128d vec0 = _mm_set_pd(i1+1,i1); __m128d vec0_2 = _mm_set_pd(i1+1,i1); __m128d vec3 = _mm_load1_pd((&posEnd[1])); __m128d vec3_2 = _mm_load1_pd((&posEnd[1])); __m128d vec4 = _mm_load1_pd((&posBegin[1])); __m128d vec4_2 = _mm_load1_pd((&posBegin[1])); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2); } for (; (i1<34); i1 += 1) { yPos = ((((i1-1)/3.200000e+01)*(posEnd[1]-posBegin[1]))+posBegin[1]); } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i1 = 1; for (; (i1<=32); i1 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+5042)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+5078)] = 0.000000e+00; } for (; (i1<=33); i1 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+5042)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i1 = 1; for (; (i1<=32); i1 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+10082)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+10118)] = 0.000000e+00; } for (; (i1<=33); i1 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+10082)] = 0.000000e+00; } } } } } if ((!neighbor_isValid[0][1])) { { double xPos; double yPos; /* Statements in this Scop: S1312, S1320, S1314, S1317, S1311, S1319, S1313, S1316, S1321, S1315, S1318 */ { { { { { { { { { { { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i1 = 1; for (; (i1<=32); i1 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+1294)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+1330)] = 0.000000e+00; } for (; (i1<=33); i1 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+1294)] = 0.000000e+00; } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i1 = 1; for (; (i1<=32); i1 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+5074)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+5110)] = 0.000000e+00; } for (; (i1<=33); i1 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+5074)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i1 = 1; for (; (i1<=32); i1 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+8854)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+8890)] = 0.000000e+00; } for (; (i1<=33); i1 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+8854)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i1 = 1; for (; (i1<=32); i1 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+2554)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+2590)] = 0.000000e+00; } for (; (i1<=33); i1 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+2554)] = 0.000000e+00; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { yPos = ((((i1-1)/3.200000e+01)*(posEnd[1]-posBegin[1]))+posBegin[1]); } __m128d vec1 = _mm_set1_pd(1.000000e+00); __m128d vec2 = _mm_set1_pd(3.200000e+01); __m128d vec5 = _mm_set1_pd(yPos); for (; (i1<31); i1 += 4) { /* yPos = ((((i1-1)/3.200000e+01)*(posEnd[1]-posBegin[1]))+posBegin[1]); */ __m128d vec0 = _mm_set_pd(i1+1,i1); __m128d vec0_2 = _mm_set_pd(i1+1,i1); __m128d vec3 = _mm_load1_pd((&posEnd[1])); __m128d vec3_2 = _mm_load1_pd((&posEnd[1])); __m128d vec4 = _mm_load1_pd((&posBegin[1])); __m128d vec4_2 = _mm_load1_pd((&posBegin[1])); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2); } for (; (i1<34); i1 += 1) { yPos = ((((i1-1)/3.200000e+01)*(posEnd[1]-posBegin[1]))+posBegin[1]); } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i1 = 1; for (; (i1<=32); i1 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+34)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+70)] = 0.000000e+00; } for (; (i1<=33); i1 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+34)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i1 = 1; for (; (i1<=32); i1 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+3814)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+3850)] = 0.000000e+00; } for (; (i1<=33); i1 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+3814)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i1 = 1; for (; (i1<=32); i1 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+7594)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+7630)] = 0.000000e+00; } for (; (i1<=33); i1 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+7594)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i1 = 1; for (; (i1<=32); i1 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+6334)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+6370)] = 0.000000e+00; } for (; (i1<=33); i1 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+6334)] = 0.000000e+00; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { xPos = posEnd[0]; } __m128d vec1 = _mm_set1_pd(xPos); for (; (i1<31); i1 += 4) { /* xPos = posEnd[0]; */ __m128d vec0 = _mm_load1_pd((&posEnd[0])); __m128d vec0_2 = _mm_load1_pd((&posEnd[0])); vec1 = vec0; vec1 = vec0_2; } for (; (i1<34); i1 += 1) { xPos = posEnd[0]; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i1 = 1; for (; (i1<=32); i1 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+10114)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+10150)] = 0.000000e+00; } for (; (i1<=33); i1 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[((i1*36)+10114)] = 0.000000e+00; } } } } } if ((!neighbor_isValid[0][2])) { { double xPos; double yPos; /* Statements in this Scop: S1327, S1332, S1326, S1329, S1323, S1322, S1331, S1325, S1328, S1330, S1324 */ { { { { { { { { { { { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i2 = 2; for (; (i2<=33); i2 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+5076)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[(i2+5077)] = 0.000000e+00; } for (; (i2<=34); i2 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+5076)] = 0.000000e+00; } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i2 = 2; for (; (i2<=33); i2 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+8856)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[(i2+8857)] = 0.000000e+00; } for (; (i2<=34); i2 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+8856)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i2 = 2; for (; (i2<=33); i2 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+6336)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[(i2+6337)] = 0.000000e+00; } for (; (i2<=34); i2 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+6336)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i2 = 2; for (; (i2<=33); i2 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+7596)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[(i2+7597)] = 0.000000e+00; } for (; (i2<=34); i2 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+7596)] = 0.000000e+00; } } } { int i2 = 2; for (; (i2<=33); i2 += 2) { xPos = ((((i2-2)/3.200000e+01)*(posEnd[0]-posBegin[0]))+posBegin[0]); xPos = ((((i2-1)/3.200000e+01)*(posEnd[0]-posBegin[0]))+posBegin[0]); } for (; (i2<=34); i2 += 1) { xPos = ((((i2-2)/3.200000e+01)*(posEnd[0]-posBegin[0]))+posBegin[0]); } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i2 = 2; for (; (i2<=33); i2 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+36)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[(i2+37)] = 0.000000e+00; } for (; (i2<=34); i2 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+36)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i2 = 2; for (; (i2<=33); i2 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+2556)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[(i2+2557)] = 0.000000e+00; } for (; (i2<=34); i2 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+2556)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i2 = 2; for (; (i2<=33); i2 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+10116)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[(i2+10117)] = 0.000000e+00; } for (; (i2<=34); i2 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+10116)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i2 = 2; for (; (i2<=33); i2 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+3816)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[(i2+3817)] = 0.000000e+00; } for (; (i2<=34); i2 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+3816)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i2 = 2; for (; (i2<=33); i2 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+1296)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[(i2+1297)] = 0.000000e+00; } for (; (i2<=34); i2 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+1296)] = 0.000000e+00; } } } { int i2 = 2; for (; (i2<=33); i2 += 2) { yPos = posBegin[1]; yPos = posBegin[1]; } for (; (i2<=34); i2 += 1) { yPos = posBegin[1]; } } } } } if ((!neighbor_isValid[0][3])) { { double xPos; double yPos; /* Statements in this Scop: S1338, S1341, S1335, S1340, S1343, S1337, S1334, S1333, S1342, S1336, S1339 */ { { { { { { { { { { { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i2 = 2; for (; (i2<=33); i2 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+7488)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[(i2+7489)] = 0.000000e+00; } for (; (i2<=34); i2 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+7488)] = 0.000000e+00; } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i2 = 2; for (; (i2<=33); i2 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+2448)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[(i2+2449)] = 0.000000e+00; } for (; (i2<=34); i2 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+2448)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i2 = 2; for (; (i2<=33); i2 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+11268)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[(i2+11269)] = 0.000000e+00; } for (; (i2<=34); i2 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+11268)] = 0.000000e+00; } } } { int i2 = 2; for (; (i2<=33); i2 += 2) { yPos = posEnd[1]; yPos = posEnd[1]; } for (; (i2<=34); i2 += 1) { yPos = posEnd[1]; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i2 = 2; for (; (i2<=33); i2 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+10008)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[(i2+10009)] = 0.000000e+00; } for (; (i2<=34); i2 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+10008)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i2 = 2; for (; (i2<=33); i2 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+6228)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[(i2+6229)] = 0.000000e+00; } for (; (i2<=34); i2 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+6228)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i2 = 2; for (; (i2<=33); i2 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+8748)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[(i2+8749)] = 0.000000e+00; } for (; (i2<=34); i2 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+8748)] = 0.000000e+00; } } } { int i2 = 2; for (; (i2<=33); i2 += 2) { xPos = ((((i2-2)/3.200000e+01)*(posEnd[0]-posBegin[0]))+posBegin[0]); xPos = ((((i2-1)/3.200000e+01)*(posEnd[0]-posBegin[0]))+posBegin[0]); } for (; (i2<=34); i2 += 1) { xPos = ((((i2-2)/3.200000e+01)*(posEnd[0]-posBegin[0]))+posBegin[0]); } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i2 = 2; for (; (i2<=33); i2 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+3708)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[(i2+3709)] = 0.000000e+00; } for (; (i2<=34); i2 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+3708)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i2 = 2; for (; (i2<=33); i2 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+1188)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[(i2+1189)] = 0.000000e+00; } for (; (i2<=34); i2 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+1188)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][0]); int i2 = 2; for (; (i2<=33); i2 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+4968)] = 0.000000e+00; fieldData_LaplaceCoeff_GMRF_5_p1[(i2+4969)] = 0.000000e+00; } for (; (i2<=34); i2 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[(i2+4968)] = 0.000000e+00; } } } } } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) { /* Statements in this Scop: S1344 */ for (int i3 = 0; (i3<=8); i3 += 1) { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][(i3*1260)]); double* buffer_Send_1_p1 = (&buffer_Send[1][(i3*33)]); int i4 = 1; for (; (i4<=32); i4 += 2) { buffer_Send_1_p1[(i4-1)] = fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)+34)]; buffer_Send_1_p1[i4] = fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)+70)]; } for (; (i4<=33); i4 += 1) { buffer_Send_1_p1[(i4-1)] = fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)+34)]; } } } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) { MPI_Isend(buffer_Send[1], 297, MPI_DOUBLE, neighbor_remoteRank[0][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]); reqOutstanding_Send[1] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) { MPI_Irecv(buffer_Recv[0], 297, MPI_DOUBLE, neighbor_remoteRank[0][0], ((unsigned int)(neighbor_fragCommId[0][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]); reqOutstanding_Recv[0] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Recv[0]) { waitForMPIReq(&mpiRequest_Recv[0]); reqOutstanding_Recv[0] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) { /* Statements in this Scop: S1345 */ for (int i3 = 0; (i3<=8); i3 += 1) { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][(i3*1260)]); double* buffer_Recv_0_p1 = (&buffer_Recv[0][(i3*33)]); int i4 = 3; for (; (i4<=34); i4 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)-70)] = buffer_Recv_0_p1[(i4-3)]; fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)-34)] = buffer_Recv_0_p1[(i4-2)]; } for (; (i4<=35); i4 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)-70)] = buffer_Recv_0_p1[(i4-3)]; } } } } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Send[1]) { waitForMPIReq(&mpiRequest_Send[1]); reqOutstanding_Send[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { ; } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][3]&&neighbor_isRemote[0][3])) { MPI_Isend(&fieldData_LaplaceCoeff_GMRF[5][1190], 1, mpiDatatype_9_33_1260, neighbor_remoteRank[0][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]); reqOutstanding_Send[3] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][2]&&neighbor_isRemote[0][2])) { MPI_Irecv(&fieldData_LaplaceCoeff_GMRF[5][38], 1, mpiDatatype_9_33_1260, neighbor_remoteRank[0][2], ((unsigned int)(neighbor_fragCommId[0][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]); reqOutstanding_Recv[2] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Recv[2]) { waitForMPIReq(&mpiRequest_Recv[2]); reqOutstanding_Recv[2] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { ; } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Send[3]) { waitForMPIReq(&mpiRequest_Send[3]); reqOutstanding_Send[3] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) { /* Statements in this Scop: S1346 */ for (int i3 = 0; (i3<=8); i3 += 1) { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][(i3*1260)]); double* buffer_Send_0_p1 = (&buffer_Send[0][(i3*35)]); int i4 = 0; for (; (i4<=33); i4 += 2) { buffer_Send_0_p1[i4] = fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)+3)]; buffer_Send_0_p1[(i4+1)] = fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)+39)]; } for (; (i4<=34); i4 += 1) { buffer_Send_0_p1[i4] = fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)+3)]; } } } if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) { /* Statements in this Scop: S1347 */ for (int i3 = 0; (i3<=8); i3 += 1) { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][(i3*1260)]); double* buffer_Send_1_p1 = (&buffer_Send[1][(i3*35)]); int i4 = 0; for (; (i4<=33); i4 += 2) { buffer_Send_1_p1[i4] = fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)+33)]; buffer_Send_1_p1[(i4+1)] = fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)+69)]; } for (; (i4<=34); i4 += 1) { buffer_Send_1_p1[i4] = fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)+33)]; } } } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) { MPI_Isend(buffer_Send[0], 315, MPI_DOUBLE, neighbor_remoteRank[0][0], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][0]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[0]); reqOutstanding_Send[0] = true; } if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) { MPI_Isend(buffer_Send[1], 315, MPI_DOUBLE, neighbor_remoteRank[0][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]); reqOutstanding_Send[1] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) { MPI_Irecv(buffer_Recv[0], 315, MPI_DOUBLE, neighbor_remoteRank[0][0], ((unsigned int)(neighbor_fragCommId[0][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]); reqOutstanding_Recv[0] = true; } if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) { MPI_Irecv(buffer_Recv[1], 315, MPI_DOUBLE, neighbor_remoteRank[0][1], ((unsigned int)(neighbor_fragCommId[0][1]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[1]); reqOutstanding_Recv[1] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Recv[0]) { waitForMPIReq(&mpiRequest_Recv[0]); reqOutstanding_Recv[0] = false; } if (reqOutstanding_Recv[1]) { waitForMPIReq(&mpiRequest_Recv[1]); reqOutstanding_Recv[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) { /* Statements in this Scop: S1348 */ for (int i3 = 0; (i3<=8); i3 += 1) { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][(i3*1260)]); double* buffer_Recv_0_p1 = (&buffer_Recv[0][(i3*35)]); int i4 = 1; for (; (i4<=34); i4 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)-35)] = buffer_Recv_0_p1[(i4-1)]; fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)+1)] = buffer_Recv_0_p1[i4]; } for (; (i4<=35); i4 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)-35)] = buffer_Recv_0_p1[(i4-1)]; } } } if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) { /* Statements in this Scop: S1349 */ for (int i3 = 0; (i3<=8); i3 += 1) { double* fieldData_LaplaceCoeff_GMRF_5_p1 = (&fieldData_LaplaceCoeff_GMRF[5][(i3*1260)]); double* buffer_Recv_1_p1 = (&buffer_Recv[1][(i3*35)]); int i4 = 35; for (; (i4<=68); i4 += 2) { fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)-1225)] = buffer_Recv_1_p1[(i4-35)]; fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)-1189)] = buffer_Recv_1_p1[(i4-34)]; } for (; (i4<=69); i4 += 1) { fieldData_LaplaceCoeff_GMRF_5_p1[((i4*36)-1225)] = buffer_Recv_1_p1[(i4-35)]; } } } } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Send[0]) { waitForMPIReq(&mpiRequest_Send[0]); reqOutstanding_Send[0] = false; } if (reqOutstanding_Send[1]) { waitForMPIReq(&mpiRequest_Send[1]); reqOutstanding_Send[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { ; ; } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][2]&&neighbor_isRemote[0][2])) { MPI_Isend(&fieldData_LaplaceCoeff_GMRF[5][73], 1, mpiDatatype_9_35_1260, neighbor_remoteRank[0][2], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][2]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[2]); reqOutstanding_Send[2] = true; } if ((neighbor_isValid[0][3]&&neighbor_isRemote[0][3])) { MPI_Isend(&fieldData_LaplaceCoeff_GMRF[5][1153], 1, mpiDatatype_9_35_1260, neighbor_remoteRank[0][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]); reqOutstanding_Send[3] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if ((neighbor_isValid[0][2]&&neighbor_isRemote[0][2])) { MPI_Irecv(&fieldData_LaplaceCoeff_GMRF[5][1], 1, mpiDatatype_9_35_1260, neighbor_remoteRank[0][2], ((unsigned int)(neighbor_fragCommId[0][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]); reqOutstanding_Recv[2] = true; } if ((neighbor_isValid[0][3]&&neighbor_isRemote[0][3])) { MPI_Irecv(&fieldData_LaplaceCoeff_GMRF[5][1225], 1, mpiDatatype_9_35_1260, neighbor_remoteRank[0][3], ((unsigned int)(neighbor_fragCommId[0][3]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[3]); reqOutstanding_Recv[3] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Recv[2]) { waitForMPIReq(&mpiRequest_Recv[2]); reqOutstanding_Recv[2] = false; } if (reqOutstanding_Recv[3]) { waitForMPIReq(&mpiRequest_Recv[3]); reqOutstanding_Recv[3] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { ; ; } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[0]) { if (reqOutstanding_Send[2]) { waitForMPIReq(&mpiRequest_Send[2]); reqOutstanding_Send[2] = false; } if (reqOutstanding_Send[3]) { waitForMPIReq(&mpiRequest_Send[3]); reqOutstanding_Send[3] = false; } } } }
static inline void computeVectorGTRGAMMAPROT(double *lVector, int *eVector, double *gammaRates, int i, double qz, double rz, traversalInfo *ti, double *EIGN, double *EI, double *EV, double *tipVector, unsigned char **yVector, int mxtips) { double *x1, *x2, *x3; int s, pNumber = ti->pNumber, rNumber = ti->rNumber, qNumber = ti->qNumber, index1[4], index2[4]; x3 = &(lVector[80 * (pNumber - mxtips)]); switch(ti->tipCase) { case TIP_TIP: x1 = &(tipVector[20 * yVector[qNumber][i]]); x2 = &(tipVector[20 * yVector[rNumber][i]]); for(s = 0; s < 4; s++) { index1[s] = 0; index2[s] = 0; } break; case TIP_INNER: x1 = &(tipVector[20 * yVector[qNumber][i]]); x2 = &( lVector[80 * (rNumber - mxtips)]); for(s = 0; s < 4; s++) index1[s] = 0; for(s = 0; s < 4; s++) index2[s] = s; break; case INNER_INNER: x1 = &(lVector[80 * (qNumber - mxtips)]); x2 = &(lVector[80 * (rNumber - mxtips)]); for(s = 0; s < 4; s++) { index1[s] = s; index2[s] = s; } break; default: assert(0); } { double e1[20] __attribute__ ((aligned (BYTE_ALIGNMENT))), e2[20] __attribute__ ((aligned (BYTE_ALIGNMENT))), d1[20] __attribute__ ((aligned (BYTE_ALIGNMENT))), d2[20] __attribute__ ((aligned (BYTE_ALIGNMENT))), lz1, lz2; int l, k, scale, j; for(j = 0; j < 4; j++) { lz1 = qz * gammaRates[j]; lz2 = rz * gammaRates[j]; e1[0] = 1.0; e2[0] = 1.0; for(l = 1; l < 20; l++) { e1[l] = EXP(EIGN[l] * lz1); e2[l] = EXP(EIGN[l] * lz2); } for(l = 0; l < 20; l+=2) { __m128d d1v = _mm_mul_pd(_mm_load_pd(&x1[20 * index1[j] + l]), _mm_load_pd(&e1[l])); __m128d d2v = _mm_mul_pd(_mm_load_pd(&x2[20 * index2[j] + l]), _mm_load_pd(&e2[l])); _mm_store_pd(&d1[l], d1v); _mm_store_pd(&d2[l], d2v); } __m128d zero = _mm_setzero_pd(); for(l = 0; l < 20; l+=2) _mm_store_pd(&x3[j * 20 + l], zero); for(l = 0; l < 20; l++) { double *ev = &EV[l * 20]; __m128d ump_x1v = _mm_setzero_pd(); __m128d ump_x2v = _mm_setzero_pd(); __m128d x1px2v; for(k = 0; k < 20; k+=2) { __m128d eiv = _mm_load_pd(&EI[20 * l + k]); __m128d d1v = _mm_load_pd(&d1[k]); __m128d d2v = _mm_load_pd(&d2[k]); ump_x1v = _mm_add_pd(ump_x1v, _mm_mul_pd(d1v, eiv)); ump_x2v = _mm_add_pd(ump_x2v, _mm_mul_pd(d2v, eiv)); } ump_x1v = _mm_hadd_pd(ump_x1v, ump_x1v); ump_x2v = _mm_hadd_pd(ump_x2v, ump_x2v); x1px2v = _mm_mul_pd(ump_x1v, ump_x2v); for(k = 0; k < 20; k+=2) { __m128d ex3v = _mm_load_pd(&x3[j * 20 + k]); __m128d EVV = _mm_load_pd(&ev[k]); ex3v = _mm_add_pd(ex3v, _mm_mul_pd(x1px2v, EVV)); _mm_store_pd(&x3[j * 20 + k], ex3v); } } } scale = 1; for(l = 0; scale && (l < 80); l++) scale = ((x3[l] < minlikelihood) && (x3[l] > minusminlikelihood)); if(scale) { __m128d twoto = _mm_set_pd(twotothe256, twotothe256); for(l = 0; l < 80; l+=2) { __m128d ex3v = _mm_mul_pd(_mm_load_pd(&x3[l]),twoto); _mm_store_pd(&x3[l], ex3v); } *eVector = *eVector + 1; } return; } }
void exchlaplacecoeffData_6(unsigned int slot) { for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((!neighbor_isValid[1][0])) { { double xPos; double yPos; /* Statements in this Scop: S902, S905, S908, S907, S901, S910, S904, S903, S906, S909, S900 */ { { { { { { { { { { { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i1 = 1; for (; (i1<=64); i1 += 2) { fieldData_LaplaceCoeff_6_p1[((i1*68)+4558)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[((i1*68)+4626)] = 0.000000e+00; } for (; (i1<=65); i1 += 1) { fieldData_LaplaceCoeff_6_p1[((i1*68)+4558)] = 0.000000e+00; } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { yPos = ((((i1-1)/6.400000e+01)*(posEnd[1]-posBegin[1]))+posBegin[1]); } __m128d vec1 = _mm_set1_pd(1.000000e+00); __m128d vec2 = _mm_set1_pd(6.400000e+01); __m128d vec5 = _mm_set1_pd(yPos); for (; (i1<63); i1 += 4) { /* yPos = ((((i1-1)/6.400000e+01)*(posEnd[1]-posBegin[1]))+posBegin[1]); */ __m128d vec0 = _mm_set_pd(i1+1,i1); __m128d vec0_2 = _mm_set_pd(i1+1,i1); __m128d vec3 = _mm_load1_pd((&posEnd[1])); __m128d vec3_2 = _mm_load1_pd((&posEnd[1])); __m128d vec4 = _mm_load1_pd((&posBegin[1])); __m128d vec4_2 = _mm_load1_pd((&posBegin[1])); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2); } for (; (i1<66); i1 += 1) { yPos = ((((i1-1)/6.400000e+01)*(posEnd[1]-posBegin[1]))+posBegin[1]); } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i1 = 1; for (; (i1<=64); i1 += 2) { fieldData_LaplaceCoeff_6_p1[((i1*68)+9114)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[((i1*68)+9182)] = 0.000000e+00; } for (; (i1<=65); i1 += 1) { fieldData_LaplaceCoeff_6_p1[((i1*68)+9114)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i1 = 1; for (; (i1<=64); i1 += 2) { fieldData_LaplaceCoeff_6_p1[((i1*68)+2)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[((i1*68)+70)] = 0.000000e+00; } for (; (i1<=65); i1 += 1) { fieldData_LaplaceCoeff_6_p1[((i1*68)+2)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i1 = 1; for (; (i1<=64); i1 += 2) { fieldData_LaplaceCoeff_6_p1[((i1*68)+31894)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[((i1*68)+31962)] = 0.000000e+00; } for (; (i1<=65); i1 += 1) { fieldData_LaplaceCoeff_6_p1[((i1*68)+31894)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i1 = 1; for (; (i1<=64); i1 += 2) { fieldData_LaplaceCoeff_6_p1[((i1*68)+36450)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[((i1*68)+36518)] = 0.000000e+00; } for (; (i1<=65); i1 += 1) { fieldData_LaplaceCoeff_6_p1[((i1*68)+36450)] = 0.000000e+00; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { xPos = posBegin[0]; } __m128d vec1 = _mm_set1_pd(xPos); for (; (i1<63); i1 += 4) { /* xPos = posBegin[0]; */ __m128d vec0 = _mm_load1_pd((&posBegin[0])); __m128d vec0_2 = _mm_load1_pd((&posBegin[0])); vec1 = vec0; vec1 = vec0_2; } for (; (i1<66); i1 += 1) { xPos = posBegin[0]; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i1 = 1; for (; (i1<=64); i1 += 2) { fieldData_LaplaceCoeff_6_p1[((i1*68)+18226)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[((i1*68)+18294)] = 0.000000e+00; } for (; (i1<=65); i1 += 1) { fieldData_LaplaceCoeff_6_p1[((i1*68)+18226)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i1 = 1; for (; (i1<=64); i1 += 2) { fieldData_LaplaceCoeff_6_p1[((i1*68)+13670)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[((i1*68)+13738)] = 0.000000e+00; } for (; (i1<=65); i1 += 1) { fieldData_LaplaceCoeff_6_p1[((i1*68)+13670)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i1 = 1; for (; (i1<=64); i1 += 2) { fieldData_LaplaceCoeff_6_p1[((i1*68)+27338)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[((i1*68)+27406)] = 0.000000e+00; } for (; (i1<=65); i1 += 1) { fieldData_LaplaceCoeff_6_p1[((i1*68)+27338)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i1 = 1; for (; (i1<=64); i1 += 2) { fieldData_LaplaceCoeff_6_p1[((i1*68)+22782)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[((i1*68)+22850)] = 0.000000e+00; } for (; (i1<=65); i1 += 1) { fieldData_LaplaceCoeff_6_p1[((i1*68)+22782)] = 0.000000e+00; } } } } } if ((!neighbor_isValid[1][1])) { { double xPos; double yPos; /* Statements in this Scop: S920, S914, S917, S911, S913, S916, S919, S921, S918, S912, S915 */ { { { { { { { { { { { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i1 = 1; for (; (i1<=64); i1 += 2) { fieldData_LaplaceCoeff_6_p1[((i1*68)+4622)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[((i1*68)+4690)] = 0.000000e+00; } for (; (i1<=65); i1 += 1) { fieldData_LaplaceCoeff_6_p1[((i1*68)+4622)] = 0.000000e+00; } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i1 = 1; for (; (i1<=64); i1 += 2) { fieldData_LaplaceCoeff_6_p1[((i1*68)+31958)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[((i1*68)+32026)] = 0.000000e+00; } for (; (i1<=65); i1 += 1) { fieldData_LaplaceCoeff_6_p1[((i1*68)+31958)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i1 = 1; for (; (i1<=64); i1 += 2) { fieldData_LaplaceCoeff_6_p1[((i1*68)+13734)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[((i1*68)+13802)] = 0.000000e+00; } for (; (i1<=65); i1 += 1) { fieldData_LaplaceCoeff_6_p1[((i1*68)+13734)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i1 = 1; for (; (i1<=64); i1 += 2) { fieldData_LaplaceCoeff_6_p1[((i1*68)+66)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[((i1*68)+134)] = 0.000000e+00; } for (; (i1<=65); i1 += 1) { fieldData_LaplaceCoeff_6_p1[((i1*68)+66)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i1 = 1; for (; (i1<=64); i1 += 2) { fieldData_LaplaceCoeff_6_p1[((i1*68)+22846)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[((i1*68)+22914)] = 0.000000e+00; } for (; (i1<=65); i1 += 1) { fieldData_LaplaceCoeff_6_p1[((i1*68)+22846)] = 0.000000e+00; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { yPos = ((((i1-1)/6.400000e+01)*(posEnd[1]-posBegin[1]))+posBegin[1]); } __m128d vec1 = _mm_set1_pd(1.000000e+00); __m128d vec2 = _mm_set1_pd(6.400000e+01); __m128d vec5 = _mm_set1_pd(yPos); for (; (i1<63); i1 += 4) { /* yPos = ((((i1-1)/6.400000e+01)*(posEnd[1]-posBegin[1]))+posBegin[1]); */ __m128d vec0 = _mm_set_pd(i1+1,i1); __m128d vec0_2 = _mm_set_pd(i1+1,i1); __m128d vec3 = _mm_load1_pd((&posEnd[1])); __m128d vec3_2 = _mm_load1_pd((&posEnd[1])); __m128d vec4 = _mm_load1_pd((&posBegin[1])); __m128d vec4_2 = _mm_load1_pd((&posBegin[1])); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2); } for (; (i1<66); i1 += 1) { yPos = ((((i1-1)/6.400000e+01)*(posEnd[1]-posBegin[1]))+posBegin[1]); } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i1 = 1; for (; (i1<=64); i1 += 2) { fieldData_LaplaceCoeff_6_p1[((i1*68)+18290)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[((i1*68)+18358)] = 0.000000e+00; } for (; (i1<=65); i1 += 1) { fieldData_LaplaceCoeff_6_p1[((i1*68)+18290)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i1 = 1; for (; (i1<=64); i1 += 2) { fieldData_LaplaceCoeff_6_p1[((i1*68)+27402)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[((i1*68)+27470)] = 0.000000e+00; } for (; (i1<=65); i1 += 1) { fieldData_LaplaceCoeff_6_p1[((i1*68)+27402)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i1 = 1; for (; (i1<=64); i1 += 2) { fieldData_LaplaceCoeff_6_p1[((i1*68)+36514)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[((i1*68)+36582)] = 0.000000e+00; } for (; (i1<=65); i1 += 1) { fieldData_LaplaceCoeff_6_p1[((i1*68)+36514)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i1 = 1; for (; (i1<=64); i1 += 2) { fieldData_LaplaceCoeff_6_p1[((i1*68)+9178)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[((i1*68)+9246)] = 0.000000e+00; } for (; (i1<=65); i1 += 1) { fieldData_LaplaceCoeff_6_p1[((i1*68)+9178)] = 0.000000e+00; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { xPos = posEnd[0]; } __m128d vec1 = _mm_set1_pd(xPos); for (; (i1<63); i1 += 4) { /* xPos = posEnd[0]; */ __m128d vec0 = _mm_load1_pd((&posEnd[0])); __m128d vec0_2 = _mm_load1_pd((&posEnd[0])); vec1 = vec0; vec1 = vec0_2; } for (; (i1<66); i1 += 1) { xPos = posEnd[0]; } } } } } if ((!neighbor_isValid[1][2])) { { double xPos; double yPos; /* Statements in this Scop: S929, S923, S926, S931, S925, S928, S922, S930, S924, S927, S932 */ { { { { { { { { { { { int i2 = 2; for (; (i2<=65); i2 += 2) { xPos = ((((i2-2)/6.400000e+01)*(posEnd[0]-posBegin[0]))+posBegin[0]); xPos = ((((i2-1)/6.400000e+01)*(posEnd[0]-posBegin[0]))+posBegin[0]); } for (; (i2<=66); i2 += 1) { xPos = ((((i2-2)/6.400000e+01)*(posEnd[0]-posBegin[0]))+posBegin[0]); } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i2 = 2; for (; (i2<=65); i2 += 2) { fieldData_LaplaceCoeff_6_p1[(i2+18292)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[(i2+18293)] = 0.000000e+00; } for (; (i2<=66); i2 += 1) { fieldData_LaplaceCoeff_6_p1[(i2+18292)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i2 = 2; for (; (i2<=65); i2 += 2) { fieldData_LaplaceCoeff_6_p1[(i2+36516)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[(i2+36517)] = 0.000000e+00; } for (; (i2<=66); i2 += 1) { fieldData_LaplaceCoeff_6_p1[(i2+36516)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i2 = 2; for (; (i2<=65); i2 += 2) { fieldData_LaplaceCoeff_6_p1[(i2+31960)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[(i2+31961)] = 0.000000e+00; } for (; (i2<=66); i2 += 1) { fieldData_LaplaceCoeff_6_p1[(i2+31960)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i2 = 2; for (; (i2<=65); i2 += 2) { fieldData_LaplaceCoeff_6_p1[(i2+68)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[(i2+69)] = 0.000000e+00; } for (; (i2<=66); i2 += 1) { fieldData_LaplaceCoeff_6_p1[(i2+68)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i2 = 2; for (; (i2<=65); i2 += 2) { fieldData_LaplaceCoeff_6_p1[(i2+9180)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[(i2+9181)] = 0.000000e+00; } for (; (i2<=66); i2 += 1) { fieldData_LaplaceCoeff_6_p1[(i2+9180)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i2 = 2; for (; (i2<=65); i2 += 2) { fieldData_LaplaceCoeff_6_p1[(i2+22848)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[(i2+22849)] = 0.000000e+00; } for (; (i2<=66); i2 += 1) { fieldData_LaplaceCoeff_6_p1[(i2+22848)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i2 = 2; for (; (i2<=65); i2 += 2) { fieldData_LaplaceCoeff_6_p1[(i2+27404)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[(i2+27405)] = 0.000000e+00; } for (; (i2<=66); i2 += 1) { fieldData_LaplaceCoeff_6_p1[(i2+27404)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i2 = 2; for (; (i2<=65); i2 += 2) { fieldData_LaplaceCoeff_6_p1[(i2+4624)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[(i2+4625)] = 0.000000e+00; } for (; (i2<=66); i2 += 1) { fieldData_LaplaceCoeff_6_p1[(i2+4624)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i2 = 2; for (; (i2<=65); i2 += 2) { fieldData_LaplaceCoeff_6_p1[(i2+13736)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[(i2+13737)] = 0.000000e+00; } for (; (i2<=66); i2 += 1) { fieldData_LaplaceCoeff_6_p1[(i2+13736)] = 0.000000e+00; } } } { int i2 = 2; for (; (i2<=65); i2 += 2) { yPos = posBegin[1]; yPos = posBegin[1]; } for (; (i2<=66); i2 += 1) { yPos = posBegin[1]; } } } } } if ((!neighbor_isValid[1][3])) { { double xPos; double yPos; /* Statements in this Scop: S941, S935, S938, S943, S940, S934, S937, S942, S936, S939, S933 */ { { { { { { { { { { { int i2 = 2; for (; (i2<=65); i2 += 2) { xPos = ((((i2-2)/6.400000e+01)*(posEnd[0]-posBegin[0]))+posBegin[0]); xPos = ((((i2-1)/6.400000e+01)*(posEnd[0]-posBegin[0]))+posBegin[0]); } for (; (i2<=66); i2 += 1) { xPos = ((((i2-2)/6.400000e+01)*(posEnd[0]-posBegin[0]))+posBegin[0]); } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i2 = 2; for (; (i2<=65); i2 += 2) { fieldData_LaplaceCoeff_6_p1[(i2+36312)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[(i2+36313)] = 0.000000e+00; } for (; (i2<=66); i2 += 1) { fieldData_LaplaceCoeff_6_p1[(i2+36312)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i2 = 2; for (; (i2<=65); i2 += 2) { fieldData_LaplaceCoeff_6_p1[(i2+22644)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[(i2+22645)] = 0.000000e+00; } for (; (i2<=66); i2 += 1) { fieldData_LaplaceCoeff_6_p1[(i2+22644)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i2 = 2; for (; (i2<=65); i2 += 2) { fieldData_LaplaceCoeff_6_p1[(i2+13532)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[(i2+13533)] = 0.000000e+00; } for (; (i2<=66); i2 += 1) { fieldData_LaplaceCoeff_6_p1[(i2+13532)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i2 = 2; for (; (i2<=65); i2 += 2) { fieldData_LaplaceCoeff_6_p1[(i2+40868)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[(i2+40869)] = 0.000000e+00; } for (; (i2<=66); i2 += 1) { fieldData_LaplaceCoeff_6_p1[(i2+40868)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i2 = 2; for (; (i2<=65); i2 += 2) { fieldData_LaplaceCoeff_6_p1[(i2+18088)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[(i2+18089)] = 0.000000e+00; } for (; (i2<=66); i2 += 1) { fieldData_LaplaceCoeff_6_p1[(i2+18088)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i2 = 2; for (; (i2<=65); i2 += 2) { fieldData_LaplaceCoeff_6_p1[(i2+4420)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[(i2+4421)] = 0.000000e+00; } for (; (i2<=66); i2 += 1) { fieldData_LaplaceCoeff_6_p1[(i2+4420)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i2 = 2; for (; (i2<=65); i2 += 2) { fieldData_LaplaceCoeff_6_p1[(i2+31756)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[(i2+31757)] = 0.000000e+00; } for (; (i2<=66); i2 += 1) { fieldData_LaplaceCoeff_6_p1[(i2+31756)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i2 = 2; for (; (i2<=65); i2 += 2) { fieldData_LaplaceCoeff_6_p1[(i2+27200)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[(i2+27201)] = 0.000000e+00; } for (; (i2<=66); i2 += 1) { fieldData_LaplaceCoeff_6_p1[(i2+27200)] = 0.000000e+00; } } } { int i2 = 2; for (; (i2<=65); i2 += 2) { yPos = posEnd[1]; yPos = posEnd[1]; } for (; (i2<=66); i2 += 1) { yPos = posEnd[1]; } } } { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][0]); int i2 = 2; for (; (i2<=65); i2 += 2) { fieldData_LaplaceCoeff_6_p1[(i2+8976)] = 0.000000e+00; fieldData_LaplaceCoeff_6_p1[(i2+8977)] = 0.000000e+00; } for (; (i2<=66); i2 += 1) { fieldData_LaplaceCoeff_6_p1[(i2+8976)] = 0.000000e+00; } } } } } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) { /* Statements in this Scop: S944 */ for (int i3 = 0; (i3<=8); i3 += 1) { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][(i3*4556)]); double* buffer_Send_1_p1 = (&buffer_Send[1][(i3*65)]); int i4 = 1; for (; (i4<=64); i4 += 2) { buffer_Send_1_p1[(i4-1)] = fieldData_LaplaceCoeff_6_p1[((i4*68)+66)]; buffer_Send_1_p1[i4] = fieldData_LaplaceCoeff_6_p1[((i4*68)+134)]; } for (; (i4<=65); i4 += 1) { buffer_Send_1_p1[(i4-1)] = fieldData_LaplaceCoeff_6_p1[((i4*68)+66)]; } } } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) { MPI_Isend(buffer_Send[1], 585, MPI_DOUBLE, neighbor_remoteRank[1][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]); reqOutstanding_Send[1] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) { MPI_Irecv(buffer_Recv[0], 585, MPI_DOUBLE, neighbor_remoteRank[1][0], ((unsigned int)(neighbor_fragCommId[1][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]); reqOutstanding_Recv[0] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Recv[0]) { waitForMPIReq(&mpiRequest_Recv[0]); reqOutstanding_Recv[0] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) { /* Statements in this Scop: S945 */ for (int i3 = 0; (i3<=8); i3 += 1) { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][(i3*4556)]); double* buffer_Recv_0_p1 = (&buffer_Recv[0][(i3*65)]); int i4 = 3; for (; (i4<=66); i4 += 2) { fieldData_LaplaceCoeff_6_p1[((i4*68)-134)] = buffer_Recv_0_p1[(i4-3)]; fieldData_LaplaceCoeff_6_p1[((i4*68)-66)] = buffer_Recv_0_p1[(i4-2)]; } for (; (i4<=67); i4 += 1) { fieldData_LaplaceCoeff_6_p1[((i4*68)-134)] = buffer_Recv_0_p1[(i4-3)]; } } } } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Send[1]) { waitForMPIReq(&mpiRequest_Send[1]); reqOutstanding_Send[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { ; } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][3]&&neighbor_isRemote[1][3])) { MPI_Isend(&fieldData_LaplaceCoeff[6][4422], 1, mpiDatatype_9_65_4556, neighbor_remoteRank[1][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]); reqOutstanding_Send[3] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][2]&&neighbor_isRemote[1][2])) { MPI_Irecv(&fieldData_LaplaceCoeff[6][70], 1, mpiDatatype_9_65_4556, neighbor_remoteRank[1][2], ((unsigned int)(neighbor_fragCommId[1][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]); reqOutstanding_Recv[2] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Recv[2]) { waitForMPIReq(&mpiRequest_Recv[2]); reqOutstanding_Recv[2] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { ; } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Send[3]) { waitForMPIReq(&mpiRequest_Send[3]); reqOutstanding_Send[3] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) { /* Statements in this Scop: S946 */ for (int i3 = 0; (i3<=8); i3 += 1) { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][(i3*4556)]); double* buffer_Send_0_p1 = (&buffer_Send[0][(i3*67)]); int i4 = 0; for (; (i4<=65); i4 += 2) { buffer_Send_0_p1[i4] = fieldData_LaplaceCoeff_6_p1[((i4*68)+3)]; buffer_Send_0_p1[(i4+1)] = fieldData_LaplaceCoeff_6_p1[((i4*68)+71)]; } for (; (i4<=66); i4 += 1) { buffer_Send_0_p1[i4] = fieldData_LaplaceCoeff_6_p1[((i4*68)+3)]; } } } if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) { /* Statements in this Scop: S947 */ for (int i3 = 0; (i3<=8); i3 += 1) { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][(i3*4556)]); double* buffer_Send_1_p1 = (&buffer_Send[1][(i3*67)]); int i4 = 0; for (; (i4<=65); i4 += 2) { buffer_Send_1_p1[i4] = fieldData_LaplaceCoeff_6_p1[((i4*68)+65)]; buffer_Send_1_p1[(i4+1)] = fieldData_LaplaceCoeff_6_p1[((i4*68)+133)]; } for (; (i4<=66); i4 += 1) { buffer_Send_1_p1[i4] = fieldData_LaplaceCoeff_6_p1[((i4*68)+65)]; } } } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) { MPI_Isend(buffer_Send[0], 603, MPI_DOUBLE, neighbor_remoteRank[1][0], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][0]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[0]); reqOutstanding_Send[0] = true; } if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) { MPI_Isend(buffer_Send[1], 603, MPI_DOUBLE, neighbor_remoteRank[1][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]); reqOutstanding_Send[1] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) { MPI_Irecv(buffer_Recv[0], 603, MPI_DOUBLE, neighbor_remoteRank[1][0], ((unsigned int)(neighbor_fragCommId[1][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]); reqOutstanding_Recv[0] = true; } if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) { MPI_Irecv(buffer_Recv[1], 603, MPI_DOUBLE, neighbor_remoteRank[1][1], ((unsigned int)(neighbor_fragCommId[1][1]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[1]); reqOutstanding_Recv[1] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Recv[0]) { waitForMPIReq(&mpiRequest_Recv[0]); reqOutstanding_Recv[0] = false; } if (reqOutstanding_Recv[1]) { waitForMPIReq(&mpiRequest_Recv[1]); reqOutstanding_Recv[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) { /* Statements in this Scop: S948 */ for (int i3 = 0; (i3<=8); i3 += 1) { double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][(i3*4556)]); double* buffer_Recv_0_p1 = (&buffer_Recv[0][(i3*67)]); int i4 = 1; for (; (i4<=66); i4 += 2) { fieldData_LaplaceCoeff_6_p1[((i4*68)-67)] = buffer_Recv_0_p1[(i4-1)]; fieldData_LaplaceCoeff_6_p1[((i4*68)+1)] = buffer_Recv_0_p1[i4]; } for (; (i4<=67); i4 += 1) { fieldData_LaplaceCoeff_6_p1[((i4*68)-67)] = buffer_Recv_0_p1[(i4-1)]; } } } if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) { /* Statements in this Scop: S949 */ for (int i3 = 0; (i3<=8); i3 += 1) { double* buffer_Recv_1_p1 = (&buffer_Recv[1][(i3*67)]); double* fieldData_LaplaceCoeff_6_p1 = (&fieldData_LaplaceCoeff[6][(i3*4556)]); int i4 = 67; for (; (i4<=132); i4 += 2) { fieldData_LaplaceCoeff_6_p1[((i4*68)-4489)] = buffer_Recv_1_p1[(i4-67)]; fieldData_LaplaceCoeff_6_p1[((i4*68)-4421)] = buffer_Recv_1_p1[(i4-66)]; } for (; (i4<=133); i4 += 1) { fieldData_LaplaceCoeff_6_p1[((i4*68)-4489)] = buffer_Recv_1_p1[(i4-67)]; } } } } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Send[0]) { waitForMPIReq(&mpiRequest_Send[0]); reqOutstanding_Send[0] = false; } if (reqOutstanding_Send[1]) { waitForMPIReq(&mpiRequest_Send[1]); reqOutstanding_Send[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { ; ; } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][2]&&neighbor_isRemote[1][2])) { MPI_Isend(&fieldData_LaplaceCoeff[6][137], 1, mpiDatatype_9_67_4556, neighbor_remoteRank[1][2], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][2]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[2]); reqOutstanding_Send[2] = true; } if ((neighbor_isValid[1][3]&&neighbor_isRemote[1][3])) { MPI_Isend(&fieldData_LaplaceCoeff[6][4353], 1, mpiDatatype_9_67_4556, neighbor_remoteRank[1][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]); reqOutstanding_Send[3] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][2]&&neighbor_isRemote[1][2])) { MPI_Irecv(&fieldData_LaplaceCoeff[6][1], 1, mpiDatatype_9_67_4556, neighbor_remoteRank[1][2], ((unsigned int)(neighbor_fragCommId[1][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]); reqOutstanding_Recv[2] = true; } if ((neighbor_isValid[1][3]&&neighbor_isRemote[1][3])) { MPI_Irecv(&fieldData_LaplaceCoeff[6][4489], 1, mpiDatatype_9_67_4556, neighbor_remoteRank[1][3], ((unsigned int)(neighbor_fragCommId[1][3]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[3]); reqOutstanding_Recv[3] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Recv[2]) { waitForMPIReq(&mpiRequest_Recv[2]); reqOutstanding_Recv[2] = false; } if (reqOutstanding_Recv[3]) { waitForMPIReq(&mpiRequest_Recv[3]); reqOutstanding_Recv[3] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { ; ; } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Send[2]) { waitForMPIReq(&mpiRequest_Send[2]); reqOutstanding_Send[2] = false; } if (reqOutstanding_Send[3]) { waitForMPIReq(&mpiRequest_Send[3]); reqOutstanding_Send[3] = false; } } } }
mlib_status mlib_ImageColorConvert2_D64( const mlib_d64 *src, mlib_s32 slb, mlib_d64 *dst, mlib_s32 dlb, mlib_s32 xsize, mlib_s32 ysize, const mlib_d64 *fmat, const mlib_d64 *offset) { /* pointers for pixel and line of source */ mlib_d64 *sa, *sl; /* pointers for pixel and line of destination */ mlib_d64 *da, *dl; /* indices */ mlib_s32 i, j; /* coeff's */ mlib_d64 k02, k12, k22; mlib_d64 p0, p1, p2, q2; mlib_d64 off2; /* intermediate */ __m128d t0, t1, t2, s0, s1, q; __m128d p00, p11, p22; /* packed kernel */ __m128d k0, k1, k2; /* packed offset */ __m128d off; /* load transposed kernel */ k0 = _mm_set_pd(fmat[3], fmat[0]); k1 = _mm_set_pd(fmat[4], fmat[1]); k2 = _mm_set_pd(fmat[5], fmat[2]); /* load offset */ off = _mm_set_pd(offset[1], offset[0]); /* keep kernel in regs */ k02 = fmat[6]; k12 = fmat[7]; k22 = fmat[8]; off2 = offset[2]; sa = sl = (void *)src; da = dl = dst; for (j = 0; j < ysize; j++) { #ifdef __SUNPRO_C #pragma pipeloop(0) #endif /* __SUNPRO_C */ for (i = 0; i < 3 * xsize; i += 3) { p0 = *sa; sa ++; p00 = _mm_set1_pd(p0); p1 = *sa; sa ++; p11 = _mm_set1_pd(p1); p2 = *sa; sa ++; p22 = _mm_set1_pd(p2); t0 = _mm_mul_pd(p00, k0); t1 = _mm_mul_pd(p11, k1); t2 = _mm_mul_pd(p22, k2); s0 = _mm_add_pd(t0, t1); s1 = _mm_add_pd(t2, off); q = _mm_add_pd(s0, s1); q2 = p0 * k02 + p1 * k12 + p2 * k22 + off2; _mm_storeu_pd(da, q); da += 2; *da = q2; da ++; } /* set src ptrs to next row */ sl = sa = sl + slb; /* set dst ptrs to next row */ dl = da = dl + dlb; } return (MLIB_SUCCESS); }
void exchlaplacecoeffData_7(unsigned int slot) { for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((!neighbor_isValid[1][0])) { { double xPos; double yPos; /* Statements in this Scop: S956, S958, S952, S955, S960, S954, S957, S951, S959, S950, S953 */ { { { { { { { { { { { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i1 = 1; for (; (i1<=128); i1 += 2) { fieldData_LaplaceCoeff_7_p1[((i1*132)+138338)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[((i1*132)+138470)] = 0.000000e+00; } for (; (i1<=129); i1 += 1) { fieldData_LaplaceCoeff_7_p1[((i1*132)+138338)] = 0.000000e+00; } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i1 = 1; for (; (i1<=128); i1 += 2) { fieldData_LaplaceCoeff_7_p1[((i1*132)+34586)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[((i1*132)+34718)] = 0.000000e+00; } for (; (i1<=129); i1 += 1) { fieldData_LaplaceCoeff_7_p1[((i1*132)+34586)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i1 = 1; for (; (i1<=128); i1 += 2) { fieldData_LaplaceCoeff_7_p1[((i1*132)+103754)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[((i1*132)+103886)] = 0.000000e+00; } for (; (i1<=129); i1 += 1) { fieldData_LaplaceCoeff_7_p1[((i1*132)+103754)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i1 = 1; for (; (i1<=128); i1 += 2) { fieldData_LaplaceCoeff_7_p1[((i1*132)+2)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[((i1*132)+134)] = 0.000000e+00; } for (; (i1<=129); i1 += 1) { fieldData_LaplaceCoeff_7_p1[((i1*132)+2)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i1 = 1; for (; (i1<=128); i1 += 2) { fieldData_LaplaceCoeff_7_p1[((i1*132)+69170)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[((i1*132)+69302)] = 0.000000e+00; } for (; (i1<=129); i1 += 1) { fieldData_LaplaceCoeff_7_p1[((i1*132)+69170)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i1 = 1; for (; (i1<=128); i1 += 2) { fieldData_LaplaceCoeff_7_p1[((i1*132)+121046)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[((i1*132)+121178)] = 0.000000e+00; } for (; (i1<=129); i1 += 1) { fieldData_LaplaceCoeff_7_p1[((i1*132)+121046)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i1 = 1; for (; (i1<=128); i1 += 2) { fieldData_LaplaceCoeff_7_p1[((i1*132)+51878)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[((i1*132)+52010)] = 0.000000e+00; } for (; (i1<=129); i1 += 1) { fieldData_LaplaceCoeff_7_p1[((i1*132)+51878)] = 0.000000e+00; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { xPos = posBegin[0]; } __m128d vec1 = _mm_set1_pd(xPos); for (; (i1<127); i1 += 4) { /* xPos = posBegin[0]; */ __m128d vec0 = _mm_load1_pd((&posBegin[0])); __m128d vec0_2 = _mm_load1_pd((&posBegin[0])); vec1 = vec0; vec1 = vec0_2; } for (; (i1<130); i1 += 1) { xPos = posBegin[0]; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i1 = 1; for (; (i1<=128); i1 += 2) { fieldData_LaplaceCoeff_7_p1[((i1*132)+86462)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[((i1*132)+86594)] = 0.000000e+00; } for (; (i1<=129); i1 += 1) { fieldData_LaplaceCoeff_7_p1[((i1*132)+86462)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i1 = 1; for (; (i1<=128); i1 += 2) { fieldData_LaplaceCoeff_7_p1[((i1*132)+17294)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[((i1*132)+17426)] = 0.000000e+00; } for (; (i1<=129); i1 += 1) { fieldData_LaplaceCoeff_7_p1[((i1*132)+17294)] = 0.000000e+00; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { yPos = ((((i1-1)/1.280000e+02)*(posEnd[1]-posBegin[1]))+posBegin[1]); } __m128d vec1 = _mm_set1_pd(1.000000e+00); __m128d vec2 = _mm_set1_pd(1.280000e+02); __m128d vec5 = _mm_set1_pd(yPos); for (; (i1<127); i1 += 4) { /* yPos = ((((i1-1)/1.280000e+02)*(posEnd[1]-posBegin[1]))+posBegin[1]); */ __m128d vec0 = _mm_set_pd(i1+1,i1); __m128d vec0_2 = _mm_set_pd(i1+1,i1); __m128d vec3 = _mm_load1_pd((&posEnd[1])); __m128d vec3_2 = _mm_load1_pd((&posEnd[1])); __m128d vec4 = _mm_load1_pd((&posBegin[1])); __m128d vec4_2 = _mm_load1_pd((&posBegin[1])); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2); } for (; (i1<130); i1 += 1) { yPos = ((((i1-1)/1.280000e+02)*(posEnd[1]-posBegin[1]))+posBegin[1]); } } } } } if ((!neighbor_isValid[1][1])) { { double xPos; double yPos; /* Statements in this Scop: S962, S961, S970, S964, S967, S966, S969, S963, S971, S965, S968 */ { { { { { { { { { { { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i1 = 1; for (; (i1<=128); i1 += 2) { fieldData_LaplaceCoeff_7_p1[((i1*132)+121174)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[((i1*132)+121306)] = 0.000000e+00; } for (; (i1<=129); i1 += 1) { fieldData_LaplaceCoeff_7_p1[((i1*132)+121174)] = 0.000000e+00; } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i1 = 1; for (; (i1<=128); i1 += 2) { fieldData_LaplaceCoeff_7_p1[((i1*132)+138466)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[((i1*132)+138598)] = 0.000000e+00; } for (; (i1<=129); i1 += 1) { fieldData_LaplaceCoeff_7_p1[((i1*132)+138466)] = 0.000000e+00; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { yPos = ((((i1-1)/1.280000e+02)*(posEnd[1]-posBegin[1]))+posBegin[1]); } __m128d vec1 = _mm_set1_pd(1.000000e+00); __m128d vec2 = _mm_set1_pd(1.280000e+02); __m128d vec5 = _mm_set1_pd(yPos); for (; (i1<127); i1 += 4) { /* yPos = ((((i1-1)/1.280000e+02)*(posEnd[1]-posBegin[1]))+posBegin[1]); */ __m128d vec0 = _mm_set_pd(i1+1,i1); __m128d vec0_2 = _mm_set_pd(i1+1,i1); __m128d vec3 = _mm_load1_pd((&posEnd[1])); __m128d vec3_2 = _mm_load1_pd((&posEnd[1])); __m128d vec4 = _mm_load1_pd((&posBegin[1])); __m128d vec4_2 = _mm_load1_pd((&posBegin[1])); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0, vec1), vec2), _mm_sub_pd(vec3, vec4)), vec4); vec5 = _mm_add_pd(_mm_mul_pd(_mm_div_pd(_mm_sub_pd(vec0_2, vec1), vec2), _mm_sub_pd(vec3_2, vec4_2)), vec4_2); } for (; (i1<130); i1 += 1) { yPos = ((((i1-1)/1.280000e+02)*(posEnd[1]-posBegin[1]))+posBegin[1]); } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i1 = 1; for (; (i1<=128); i1 += 2) { fieldData_LaplaceCoeff_7_p1[((i1*132)+103882)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[((i1*132)+104014)] = 0.000000e+00; } for (; (i1<=129); i1 += 1) { fieldData_LaplaceCoeff_7_p1[((i1*132)+103882)] = 0.000000e+00; } } } { int i1 = 1; for (; (i1<(2&(~1))); i1 += 1) { xPos = posEnd[0]; } __m128d vec1 = _mm_set1_pd(xPos); for (; (i1<127); i1 += 4) { /* xPos = posEnd[0]; */ __m128d vec0 = _mm_load1_pd((&posEnd[0])); __m128d vec0_2 = _mm_load1_pd((&posEnd[0])); vec1 = vec0; vec1 = vec0_2; } for (; (i1<130); i1 += 1) { xPos = posEnd[0]; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i1 = 1; for (; (i1<=128); i1 += 2) { fieldData_LaplaceCoeff_7_p1[((i1*132)+130)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[((i1*132)+262)] = 0.000000e+00; } for (; (i1<=129); i1 += 1) { fieldData_LaplaceCoeff_7_p1[((i1*132)+130)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i1 = 1; for (; (i1<=128); i1 += 2) { fieldData_LaplaceCoeff_7_p1[((i1*132)+69298)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[((i1*132)+69430)] = 0.000000e+00; } for (; (i1<=129); i1 += 1) { fieldData_LaplaceCoeff_7_p1[((i1*132)+69298)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i1 = 1; for (; (i1<=128); i1 += 2) { fieldData_LaplaceCoeff_7_p1[((i1*132)+52006)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[((i1*132)+52138)] = 0.000000e+00; } for (; (i1<=129); i1 += 1) { fieldData_LaplaceCoeff_7_p1[((i1*132)+52006)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i1 = 1; for (; (i1<=128); i1 += 2) { fieldData_LaplaceCoeff_7_p1[((i1*132)+86590)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[((i1*132)+86722)] = 0.000000e+00; } for (; (i1<=129); i1 += 1) { fieldData_LaplaceCoeff_7_p1[((i1*132)+86590)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i1 = 1; for (; (i1<=128); i1 += 2) { fieldData_LaplaceCoeff_7_p1[((i1*132)+34714)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[((i1*132)+34846)] = 0.000000e+00; } for (; (i1<=129); i1 += 1) { fieldData_LaplaceCoeff_7_p1[((i1*132)+34714)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i1 = 1; for (; (i1<=128); i1 += 2) { fieldData_LaplaceCoeff_7_p1[((i1*132)+17422)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[((i1*132)+17554)] = 0.000000e+00; } for (; (i1<=129); i1 += 1) { fieldData_LaplaceCoeff_7_p1[((i1*132)+17422)] = 0.000000e+00; } } } } } if ((!neighbor_isValid[1][2])) { { double xPos; double yPos; /* Statements in this Scop: S982, S976, S979, S973, S972, S981, S975, S978, S977, S980, S974 */ { { { { { { { { { { { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i2 = 2; for (; (i2<=129); i2 += 2) { fieldData_LaplaceCoeff_7_p1[(i2+138468)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[(i2+138469)] = 0.000000e+00; } for (; (i2<=130); i2 += 1) { fieldData_LaplaceCoeff_7_p1[(i2+138468)] = 0.000000e+00; } } { int i2 = 2; for (; (i2<=129); i2 += 2) { xPos = ((((i2-2)/1.280000e+02)*(posEnd[0]-posBegin[0]))+posBegin[0]); xPos = ((((i2-1)/1.280000e+02)*(posEnd[0]-posBegin[0]))+posBegin[0]); } for (; (i2<=130); i2 += 1) { xPos = ((((i2-2)/1.280000e+02)*(posEnd[0]-posBegin[0]))+posBegin[0]); } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i2 = 2; for (; (i2<=129); i2 += 2) { fieldData_LaplaceCoeff_7_p1[(i2+34716)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[(i2+34717)] = 0.000000e+00; } for (; (i2<=130); i2 += 1) { fieldData_LaplaceCoeff_7_p1[(i2+34716)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i2 = 2; for (; (i2<=129); i2 += 2) { fieldData_LaplaceCoeff_7_p1[(i2+17424)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[(i2+17425)] = 0.000000e+00; } for (; (i2<=130); i2 += 1) { fieldData_LaplaceCoeff_7_p1[(i2+17424)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i2 = 2; for (; (i2<=129); i2 += 2) { fieldData_LaplaceCoeff_7_p1[(i2+132)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[(i2+133)] = 0.000000e+00; } for (; (i2<=130); i2 += 1) { fieldData_LaplaceCoeff_7_p1[(i2+132)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i2 = 2; for (; (i2<=129); i2 += 2) { fieldData_LaplaceCoeff_7_p1[(i2+86592)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[(i2+86593)] = 0.000000e+00; } for (; (i2<=130); i2 += 1) { fieldData_LaplaceCoeff_7_p1[(i2+86592)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i2 = 2; for (; (i2<=129); i2 += 2) { fieldData_LaplaceCoeff_7_p1[(i2+52008)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[(i2+52009)] = 0.000000e+00; } for (; (i2<=130); i2 += 1) { fieldData_LaplaceCoeff_7_p1[(i2+52008)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i2 = 2; for (; (i2<=129); i2 += 2) { fieldData_LaplaceCoeff_7_p1[(i2+103884)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[(i2+103885)] = 0.000000e+00; } for (; (i2<=130); i2 += 1) { fieldData_LaplaceCoeff_7_p1[(i2+103884)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i2 = 2; for (; (i2<=129); i2 += 2) { fieldData_LaplaceCoeff_7_p1[(i2+121176)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[(i2+121177)] = 0.000000e+00; } for (; (i2<=130); i2 += 1) { fieldData_LaplaceCoeff_7_p1[(i2+121176)] = 0.000000e+00; } } } { int i2 = 2; for (; (i2<=129); i2 += 2) { yPos = posBegin[1]; yPos = posBegin[1]; } for (; (i2<=130); i2 += 1) { yPos = posBegin[1]; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i2 = 2; for (; (i2<=129); i2 += 2) { fieldData_LaplaceCoeff_7_p1[(i2+69300)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[(i2+69301)] = 0.000000e+00; } for (; (i2<=130); i2 += 1) { fieldData_LaplaceCoeff_7_p1[(i2+69300)] = 0.000000e+00; } } } } } if ((!neighbor_isValid[1][3])) { { double xPos; double yPos; /* Statements in this Scop: S988, S991, S985, S990, S984, S993, S987, S983, S992, S986, S989 */ { { { { { { { { { { { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i2 = 2; for (; (i2<=129); i2 += 2) { fieldData_LaplaceCoeff_7_p1[(i2+138072)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[(i2+138073)] = 0.000000e+00; } for (; (i2<=130); i2 += 1) { fieldData_LaplaceCoeff_7_p1[(i2+138072)] = 0.000000e+00; } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i2 = 2; for (; (i2<=129); i2 += 2) { fieldData_LaplaceCoeff_7_p1[(i2+34320)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[(i2+34321)] = 0.000000e+00; } for (; (i2<=130); i2 += 1) { fieldData_LaplaceCoeff_7_p1[(i2+34320)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i2 = 2; for (; (i2<=129); i2 += 2) { fieldData_LaplaceCoeff_7_p1[(i2+155364)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[(i2+155365)] = 0.000000e+00; } for (; (i2<=130); i2 += 1) { fieldData_LaplaceCoeff_7_p1[(i2+155364)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i2 = 2; for (; (i2<=129); i2 += 2) { fieldData_LaplaceCoeff_7_p1[(i2+51612)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[(i2+51613)] = 0.000000e+00; } for (; (i2<=130); i2 += 1) { fieldData_LaplaceCoeff_7_p1[(i2+51612)] = 0.000000e+00; } } } { int i2 = 2; for (; (i2<=129); i2 += 2) { yPos = posEnd[1]; yPos = posEnd[1]; } for (; (i2<=130); i2 += 1) { yPos = posEnd[1]; } } } { int i2 = 2; for (; (i2<=129); i2 += 2) { xPos = ((((i2-2)/1.280000e+02)*(posEnd[0]-posBegin[0]))+posBegin[0]); xPos = ((((i2-1)/1.280000e+02)*(posEnd[0]-posBegin[0]))+posBegin[0]); } for (; (i2<=130); i2 += 1) { xPos = ((((i2-2)/1.280000e+02)*(posEnd[0]-posBegin[0]))+posBegin[0]); } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i2 = 2; for (; (i2<=129); i2 += 2) { fieldData_LaplaceCoeff_7_p1[(i2+103488)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[(i2+103489)] = 0.000000e+00; } for (; (i2<=130); i2 += 1) { fieldData_LaplaceCoeff_7_p1[(i2+103488)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i2 = 2; for (; (i2<=129); i2 += 2) { fieldData_LaplaceCoeff_7_p1[(i2+68904)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[(i2+68905)] = 0.000000e+00; } for (; (i2<=130); i2 += 1) { fieldData_LaplaceCoeff_7_p1[(i2+68904)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i2 = 2; for (; (i2<=129); i2 += 2) { fieldData_LaplaceCoeff_7_p1[(i2+17028)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[(i2+17029)] = 0.000000e+00; } for (; (i2<=130); i2 += 1) { fieldData_LaplaceCoeff_7_p1[(i2+17028)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i2 = 2; for (; (i2<=129); i2 += 2) { fieldData_LaplaceCoeff_7_p1[(i2+120780)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[(i2+120781)] = 0.000000e+00; } for (; (i2<=130); i2 += 1) { fieldData_LaplaceCoeff_7_p1[(i2+120780)] = 0.000000e+00; } } } { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][0]); int i2 = 2; for (; (i2<=129); i2 += 2) { fieldData_LaplaceCoeff_7_p1[(i2+86196)] = 0.000000e+00; fieldData_LaplaceCoeff_7_p1[(i2+86197)] = 0.000000e+00; } for (; (i2<=130); i2 += 1) { fieldData_LaplaceCoeff_7_p1[(i2+86196)] = 0.000000e+00; } } } } } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) { /* Statements in this Scop: S994 */ for (int i3 = 0; (i3<=8); i3 += 1) { double* buffer_Send_1_p1 = (&buffer_Send[1][(i3*129)]); double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][(i3*17292)]); int i4 = 1; for (; (i4<=128); i4 += 2) { buffer_Send_1_p1[(i4-1)] = fieldData_LaplaceCoeff_7_p1[((i4*132)+130)]; buffer_Send_1_p1[i4] = fieldData_LaplaceCoeff_7_p1[((i4*132)+262)]; } for (; (i4<=129); i4 += 1) { buffer_Send_1_p1[(i4-1)] = fieldData_LaplaceCoeff_7_p1[((i4*132)+130)]; } } } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) { MPI_Isend(buffer_Send[1], 1161, MPI_DOUBLE, neighbor_remoteRank[1][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]); reqOutstanding_Send[1] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) { MPI_Irecv(buffer_Recv[0], 1161, MPI_DOUBLE, neighbor_remoteRank[1][0], ((unsigned int)(neighbor_fragCommId[1][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]); reqOutstanding_Recv[0] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Recv[0]) { waitForMPIReq(&mpiRequest_Recv[0]); reqOutstanding_Recv[0] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) { /* Statements in this Scop: S995 */ for (int i3 = 0; (i3<=8); i3 += 1) { double* buffer_Recv_0_p1 = (&buffer_Recv[0][(i3*129)]); double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][(i3*17292)]); int i4 = 3; for (; (i4<=130); i4 += 2) { fieldData_LaplaceCoeff_7_p1[((i4*132)-262)] = buffer_Recv_0_p1[(i4-3)]; fieldData_LaplaceCoeff_7_p1[((i4*132)-130)] = buffer_Recv_0_p1[(i4-2)]; } for (; (i4<=131); i4 += 1) { fieldData_LaplaceCoeff_7_p1[((i4*132)-262)] = buffer_Recv_0_p1[(i4-3)]; } } } } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Send[1]) { waitForMPIReq(&mpiRequest_Send[1]); reqOutstanding_Send[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { ; } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][3]&&neighbor_isRemote[1][3])) { MPI_Isend(&fieldData_LaplaceCoeff[7][17030], 1, mpiDatatype_9_129_17292, neighbor_remoteRank[1][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]); reqOutstanding_Send[3] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][2]&&neighbor_isRemote[1][2])) { MPI_Irecv(&fieldData_LaplaceCoeff[7][134], 1, mpiDatatype_9_129_17292, neighbor_remoteRank[1][2], ((unsigned int)(neighbor_fragCommId[1][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]); reqOutstanding_Recv[2] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Recv[2]) { waitForMPIReq(&mpiRequest_Recv[2]); reqOutstanding_Recv[2] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { ; } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Send[3]) { waitForMPIReq(&mpiRequest_Send[3]); reqOutstanding_Send[3] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) { /* Statements in this Scop: S996 */ for (int i3 = 0; (i3<=8); i3 += 1) { double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][(i3*17292)]); double* buffer_Send_0_p1 = (&buffer_Send[0][(i3*131)]); int i4 = 0; for (; (i4<=129); i4 += 2) { buffer_Send_0_p1[i4] = fieldData_LaplaceCoeff_7_p1[((i4*132)+3)]; buffer_Send_0_p1[(i4+1)] = fieldData_LaplaceCoeff_7_p1[((i4*132)+135)]; } for (; (i4<=130); i4 += 1) { buffer_Send_0_p1[i4] = fieldData_LaplaceCoeff_7_p1[((i4*132)+3)]; } } } if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) { /* Statements in this Scop: S997 */ for (int i3 = 0; (i3<=8); i3 += 1) { double* buffer_Send_1_p1 = (&buffer_Send[1][(i3*131)]); double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][(i3*17292)]); int i4 = 0; for (; (i4<=129); i4 += 2) { buffer_Send_1_p1[i4] = fieldData_LaplaceCoeff_7_p1[((i4*132)+129)]; buffer_Send_1_p1[(i4+1)] = fieldData_LaplaceCoeff_7_p1[((i4*132)+261)]; } for (; (i4<=130); i4 += 1) { buffer_Send_1_p1[i4] = fieldData_LaplaceCoeff_7_p1[((i4*132)+129)]; } } } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) { MPI_Isend(buffer_Send[0], 1179, MPI_DOUBLE, neighbor_remoteRank[1][0], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][0]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[0]); reqOutstanding_Send[0] = true; } if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) { MPI_Isend(buffer_Send[1], 1179, MPI_DOUBLE, neighbor_remoteRank[1][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]); reqOutstanding_Send[1] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) { MPI_Irecv(buffer_Recv[0], 1179, MPI_DOUBLE, neighbor_remoteRank[1][0], ((unsigned int)(neighbor_fragCommId[1][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]); reqOutstanding_Recv[0] = true; } if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) { MPI_Irecv(buffer_Recv[1], 1179, MPI_DOUBLE, neighbor_remoteRank[1][1], ((unsigned int)(neighbor_fragCommId[1][1]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[1]); reqOutstanding_Recv[1] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Recv[0]) { waitForMPIReq(&mpiRequest_Recv[0]); reqOutstanding_Recv[0] = false; } if (reqOutstanding_Recv[1]) { waitForMPIReq(&mpiRequest_Recv[1]); reqOutstanding_Recv[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][0]&&neighbor_isRemote[1][0])) { /* Statements in this Scop: S998 */ for (int i3 = 0; (i3<=8); i3 += 1) { double* buffer_Recv_0_p1 = (&buffer_Recv[0][(i3*131)]); double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][(i3*17292)]); int i4 = 1; for (; (i4<=130); i4 += 2) { fieldData_LaplaceCoeff_7_p1[((i4*132)-131)] = buffer_Recv_0_p1[(i4-1)]; fieldData_LaplaceCoeff_7_p1[((i4*132)+1)] = buffer_Recv_0_p1[i4]; } for (; (i4<=131); i4 += 1) { fieldData_LaplaceCoeff_7_p1[((i4*132)-131)] = buffer_Recv_0_p1[(i4-1)]; } } } if ((neighbor_isValid[1][1]&&neighbor_isRemote[1][1])) { /* Statements in this Scop: S999 */ for (int i3 = 0; (i3<=8); i3 += 1) { double* buffer_Recv_1_p1 = (&buffer_Recv[1][(i3*131)]); double* fieldData_LaplaceCoeff_7_p1 = (&fieldData_LaplaceCoeff[7][(i3*17292)]); int i4 = 131; for (; (i4<=260); i4 += 2) { fieldData_LaplaceCoeff_7_p1[((i4*132)-17161)] = buffer_Recv_1_p1[(i4-131)]; fieldData_LaplaceCoeff_7_p1[((i4*132)-17029)] = buffer_Recv_1_p1[(i4-130)]; } for (; (i4<=261); i4 += 1) { fieldData_LaplaceCoeff_7_p1[((i4*132)-17161)] = buffer_Recv_1_p1[(i4-131)]; } } } } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Send[0]) { waitForMPIReq(&mpiRequest_Send[0]); reqOutstanding_Send[0] = false; } if (reqOutstanding_Send[1]) { waitForMPIReq(&mpiRequest_Send[1]); reqOutstanding_Send[1] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { ; ; } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][2]&&neighbor_isRemote[1][2])) { MPI_Isend(&fieldData_LaplaceCoeff[7][265], 1, mpiDatatype_9_131_17292, neighbor_remoteRank[1][2], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][2]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[2]); reqOutstanding_Send[2] = true; } if ((neighbor_isValid[1][3]&&neighbor_isRemote[1][3])) { MPI_Isend(&fieldData_LaplaceCoeff[7][16897], 1, mpiDatatype_9_131_17292, neighbor_remoteRank[1][3], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[1][3]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[3]); reqOutstanding_Send[3] = true; } } } ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if ((neighbor_isValid[1][2]&&neighbor_isRemote[1][2])) { MPI_Irecv(&fieldData_LaplaceCoeff[7][1], 1, mpiDatatype_9_131_17292, neighbor_remoteRank[1][2], ((unsigned int)(neighbor_fragCommId[1][2]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[2]); reqOutstanding_Recv[2] = true; } if ((neighbor_isValid[1][3]&&neighbor_isRemote[1][3])) { MPI_Irecv(&fieldData_LaplaceCoeff[7][17161], 1, mpiDatatype_9_131_17292, neighbor_remoteRank[1][3], ((unsigned int)(neighbor_fragCommId[1][3]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[3]); reqOutstanding_Recv[3] = true; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Recv[2]) { waitForMPIReq(&mpiRequest_Recv[2]); reqOutstanding_Recv[2] = false; } if (reqOutstanding_Recv[3]) { waitForMPIReq(&mpiRequest_Recv[3]); reqOutstanding_Recv[3] = false; } } } for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { ; ; } } ; ; for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) { if (isValidForSubdomain[1]) { if (reqOutstanding_Send[2]) { waitForMPIReq(&mpiRequest_Send[2]); reqOutstanding_Send[2] = false; } if (reqOutstanding_Send[3]) { waitForMPIReq(&mpiRequest_Send[3]); reqOutstanding_Send[3] = false; } } } }