예제 #1
0
/// Get the transpose of this matrix.
///
/// @param[out] rMatrix  Matrix transpose.
///
/// @see Transpose()
void Helium::Simd::Matrix44::GetTranspose( Matrix44& rMatrix ) const
{
    Mask transposeMask;

    transposeMask = _mm512_int2mask( 0x33cc );
    Register temp = _mm512_mask_shuf128x32( m_matrix, transposeMask, m_matrix, _MM_PERM_BADC, _MM_PERM_BADC );

    transposeMask = _mm512_int2mask( 0x5a5a );
    rMatrix.m_matrix = _mm512_mask_shuf128x32( temp, transposeMask, temp, _MM_PERM_CDAB, _MM_PERM_CDAB );
}
예제 #2
0
__inline void mic_broadcast16x64(const double* inv, double* outv)
{
    __mmask8 k1 = _mm512_int2mask(0x0F);
    __mmask8 k2 = _mm512_int2mask(0xF0);
    for(int l = 0; l < 16; l += 2)
    {
        __m512d t = _mm512_setzero_pd();
        t = _mm512_mask_extload_pd(t, k1, &inv[(l%4)*4 + l/4], _MM_UPCONV_PD_NONE, _MM_BROADCAST_1X8, _MM_HINT_NONE);
        t = _mm512_mask_extload_pd(t, k2, &inv[((l+1)%4)*4 + (l+1)/4], _MM_UPCONV_PD_NONE, _MM_BROADCAST_1X8, _MM_HINT_NONE);

        _mm512_store_pd(&outv[l*4], t);
    }
}