void ColStridedColumnPack ( Int height, Int colAlign, Int colStride, const T* A, T* BPortions, Int portionSize ) { for( Int k=0; k<colStride; ++k ) { const Int colShift = Shift_( k, colAlign, colStride ); const Int localHeight = Length_( height, colShift, colStride ); StridedMemCopy ( &BPortions[k*portionSize], 1, &A[colShift], colStride, localHeight ); } }
void InterleaveMatrix ( Int height, Int width, const T* A, Int colStrideA, Int rowStrideA, T* B, Int colStrideB, Int rowStrideB ) { // TODO: Add OpenMP parallelization and/or optimize if( colStrideA == 1 && colStrideB == 1 ) { lapack::Copy( 'F', height, width, A, rowStrideA, B, rowStrideB ); } else { for( Int j=0; j<width; ++j ) StridedMemCopy ( &B[j*rowStrideB], colStrideB, &A[j*rowStrideA], colStrideA, height ); } }
void PartialColStridedColumnUnpack ( Int height, Int colAlign, Int colStride, Int colStrideUnion, Int colStridePart, Int colRankPart, Int colShiftB, const T* APortions, Int portionSize, T* B ) { for( Int k=0; k<colStrideUnion; ++k ) { const Int colShift = Shift_( colRankPart+k*colStridePart, colAlign, colStride ); const Int colOffset = (colShift-colShiftB) / colStridePart; const Int localHeight = Length_( height, colShift, colStride ); StridedMemCopy ( &B[colOffset], colStrideUnion, &APortions[k*portionSize], 1, localHeight ); } }
void InterleaveMatrix ( Int height, Int width, const T* A, Int colStrideA, Int rowStrideA, T* B, Int colStrideB, Int rowStrideB ) { if( colStrideA == 1 && colStrideB == 1 ) { lapack::Copy( 'F', height, width, A, rowStrideA, B, rowStrideB ); } else { #ifdef EL_HAVE_MKL mkl::omatcopy ( NORMAL, height, width, T(1), A, rowStrideA, colStrideA, B, rowStrideB, colStrideB ); #else for( Int j=0; j<width; ++j ) StridedMemCopy ( &B[j*rowStrideB], colStrideB, &A[j*rowStrideA], colStrideA, height ); #endif } }