C++ (Cpp) _mm256_i32gather_psの例

プログラミング言語: C++ (Cpp)

メソッド/関数: _mm256_i32gather_ps

hotexamples.comのコード掲載数: 7

C++ (Cpp) _mm256_i32gather_ps - 7件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたC++ (Cpp)の_mm256_i32gather_psの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: csr.cpp プロジェクト: dsheffie/Toys

void avx2_csr_spmv( float *A, int32_t *nIdx, int32_t **indices, float *x, int32_t n, float *y)
{
  int32_t A_offset = 0;
  for(int32_t i = 0; i < n; i++)
    {
      int32_t nElem = nIdx[i]; 
      float t = 0.0f;
      
      __m256 vT = _mm256_setzero_ps();
      int32_t smLen = nElem - (nElem & 7);

      for(int32_t j = 0; j < smLen; j+=8)
	{
	  __m256i vIdx = _mm256_load_si256((__m256i*)&(indices[i][j]));
	  __m256 vX = _mm256_i32gather_ps((float const*)x,vIdx,4);
	  __m256 vA = _mm256_loadu_ps(&A[A_offset + j]);
	  vT = _mm256_add_ps(vT, _mm256_mul_ps(vX,vA));

	}

      t += sum8(vT);
      for(int32_t j = smLen; j < nElem; j++)
	{
	  int32_t idx = indices[i][j];
	  t += x[idx]*A[A_offset + j];
	}

      y[i] = t;
      A_offset += nElem;
    }
}

コード例 #2

ファイルを表示

ファイル: avx2-builtins.c プロジェクト: CSI-LLVM/clang

__m256 test_mm256_i32gather_ps(float const *b, __m256i c) {
  // CHECK-LABEL: test_mm256_i32gather_ps
  // CHECK:         [[CMP:%.*]] = fcmp oeq <8 x float>
  // CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
  // CHECK-NEXT:    [[BC:%.*]] = bitcast <8 x i32> [[SEXT]] to <8 x float>
  // CHECK: call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> undef, i8* %{{.*}}, <8 x i32> %{{.*}}, <8 x float> %{{.*}}, i8 2)
  return _mm256_i32gather_ps(b, c, 2);
}

コード例 #3

ファイルを表示

ファイル: avx2-builtins.c プロジェクト: autodesk-forks/llvm-clang

__m256 test_mm256_i32gather_ps(float const *b, __m256i c) {
  // CHECK-LABEL: test_mm256_i32gather_ps
  // CHECK: call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i8 0)
  // CHECK: call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> undef, i8* %{{.*}}, <8 x i32> %{{.*}}, <8 x float> %{{.*}}, i8 2)
  return _mm256_i32gather_ps(b, c, 2);
}

コード例 #4

ファイルを表示

ファイル: TransLut_avx2.cpp プロジェクト: EleonoreMizo/fmtconv

void	TransLut::process_plane_flt_any_avx2 (uint8_t *dst_ptr, const uint8_t *src_ptr, int stride_dst, int stride_src, int w, int h)
{
	assert (dst_ptr != 0);
	assert (src_ptr != 0);
	assert (stride_dst != 0 || h == 1);
	assert (stride_src != 0 || h == 1);
	assert (w > 0);
	assert (h > 0);

	for (int y = 0; y < h; ++y)
	{
		const FloatIntMix *  s_ptr =
			reinterpret_cast <const FloatIntMix *> (src_ptr);
		TD *                 d_ptr =
			reinterpret_cast <               TD *> (dst_ptr);

		for (int x = 0; x < w; x += 8)
		{
			union
			{
				__m256i            _vect;
				uint32_t           _scal [8];
			}                  index;
			__m256             lerp;
			TransLut_FindIndexAvx2 <M>::find_index (s_ptr + x, index._vect, lerp);
#if 1	// Looks as fast as _mm256_set_ps
			// G++ complains about sizeof() as argument
			__m256             val = _mm256_i32gather_ps (
				&_lut.use <float> (0), index._vect, 4  // 4 == sizeof (float)
			);
			const __m256       va2 = _mm256_i32gather_ps (
				&_lut.use <float> (1), index._vect, 4  // 4 == sizeof (float)
			);
#else
			__m256             val = _mm256_set_ps (
				_lut.use <float> (index._scal [7]    ),
				_lut.use <float> (index._scal [6]    ),
				_lut.use <float> (index._scal [5]    ),
				_lut.use <float> (index._scal [4]    ),
				_lut.use <float> (index._scal [3]    ),
				_lut.use <float> (index._scal [2]    ),
				_lut.use <float> (index._scal [1]    ),
				_lut.use <float> (index._scal [0]    )
			);
			const __m256       va2 = _mm256_set_ps (
				_lut.use <float> (index._scal [7] + 1),
				_lut.use <float> (index._scal [6] + 1),
				_lut.use <float> (index._scal [5] + 1),
				_lut.use <float> (index._scal [4] + 1),
				_lut.use <float> (index._scal [3] + 1),
				_lut.use <float> (index._scal [2] + 1),
				_lut.use <float> (index._scal [1] + 1),
				_lut.use <float> (index._scal [0] + 1)
			);
#endif
			const __m256       dif = _mm256_sub_ps (va2, val);
			val = _mm256_add_ps (val, _mm256_mul_ps (dif, lerp));
			TransLut_store_avx2 (&d_ptr [x], val);
		}

		src_ptr += stride_src;
		dst_ptr += stride_dst;
	}

	_mm256_zeroupper ();	// Back to SSE state
}

コード例 #5

ファイルを表示

ファイル: avx2-builtins.c プロジェクト: Bigcheese/clang

__m256 test_mm256_i32gather_ps(float const *b, __m256i c) {
  // CHECK: @llvm.x86.avx2.gather.d.ps.256
  return _mm256_i32gather_ps(b, c, 2);
}

コード例 #6

ファイルを表示

ファイル: pr59839.c プロジェクト: 0day-ci/gcc

void
test (const float *x)
{
  __m256i i = _mm256_set1_epi32 (1);
  __m256 d = _mm256_i32gather_ps (x, i, 1);
}

コード例 #7

ファイルを表示

ファイル: avx2-i32gatherps256-1.c プロジェクト: AsherBond/MondocosmOS-Dependencies

void extern
avx2_test (void)
{
  x = _mm256_i32gather_ps (base, idx, 1);
}