static int Transpose32BitImage( uint8_t *p_dst_buffer, int dst_stride, const uint8_t *p_src_buffer, int src_stride, int src_width, int src_height) { int src_aligned_width = src_width & ~0x03; int src_aligned_height = src_height & ~0x03; if (src_aligned_width > 0) for (int src_y = 0; src_y < src_aligned_height; src_y += 4) { const uint32_t *p_src_row = reinterpret_cast<const uint32_t *>(p_src_buffer + src_y * src_stride); uint8_t *p_dst_column = p_dst_buffer + src_y * 4; for (int src_x = 0; src_x < src_aligned_width; src_x += 4) Transpose4x4( reinterpret_cast<uint32_t *>(p_dst_column + src_x * dst_stride), dst_stride, p_src_row + src_x, src_stride); } if (src_aligned_width < src_width) for (int src_y = 0; src_y < src_aligned_height; ++src_y) { const uint32_t *p_src_row = reinterpret_cast<const uint32_t *>(p_src_buffer + src_y * src_stride); uint8_t *p_dst_column = p_dst_buffer + src_y * 4; for (int src_x = src_aligned_width; src_x < src_width; ++src_x) *reinterpret_cast<uint32_t *>(p_dst_column + src_x * dst_stride) = p_src_row[src_x]; } for (int src_y = src_aligned_height; src_y < src_height; ++src_y) { const uint32_t *p_src_row = reinterpret_cast<const uint32_t *>(p_src_buffer + src_y * src_stride); uint8_t *p_dst_column = p_dst_buffer + src_y * 4; for (int src_x = 0; src_x < src_width; ++src_x) *reinterpret_cast<uint32_t *>(p_dst_column + src_x * dst_stride) = p_src_row[src_x]; } return NO_ERRORS; }
void ShaderManagerDX9::VSSetMatrix(int creg, const float* pMatrix) { float transp[16]; Transpose4x4(transp, pMatrix); pD3Ddevice->SetVertexShaderConstantF(creg, transp, 4); }