/// Get the transpose of this matrix. /// /// @param[out] rMatrix Matrix transpose. /// /// @see Transpose() void Helium::Simd::Matrix44::GetTranspose( Matrix44& rMatrix ) const { Mask transposeMask; transposeMask = _mm512_int2mask( 0x33cc ); Register temp = _mm512_mask_shuf128x32( m_matrix, transposeMask, m_matrix, _MM_PERM_BADC, _MM_PERM_BADC ); transposeMask = _mm512_int2mask( 0x5a5a ); rMatrix.m_matrix = _mm512_mask_shuf128x32( temp, transposeMask, temp, _MM_PERM_CDAB, _MM_PERM_CDAB ); }
__inline void mic_broadcast16x64(const double* inv, double* outv) { __mmask8 k1 = _mm512_int2mask(0x0F); __mmask8 k2 = _mm512_int2mask(0xF0); for(int l = 0; l < 16; l += 2) { __m512d t = _mm512_setzero_pd(); t = _mm512_mask_extload_pd(t, k1, &inv[(l%4)*4 + l/4], _MM_UPCONV_PD_NONE, _MM_BROADCAST_1X8, _MM_HINT_NONE); t = _mm512_mask_extload_pd(t, k2, &inv[((l+1)%4)*4 + (l+1)/4], _MM_UPCONV_PD_NONE, _MM_BROADCAST_1X8, _MM_HINT_NONE); _mm512_store_pd(&outv[l*4], t); } }