void NaturalNestedDissection ( Int nx, Int ny, Int nz, const Graph& graph, vector<Int>& map, Separator& sep, NodeInfo& node, Int cutoff ) { EL_DEBUG_CSE // NOTE: There is a potential memory leak here if sep or info is reused const Int numSources = graph.NumSources(); vector<Int> perm( numSources ); for( Int s=0; s<numSources; ++s ) perm[s] = s; NaturalNestedDissectionRecursion ( nx, ny, nz, graph, perm, sep, node, 0, cutoff ); // Construct the distributed reordering BuildMap( sep, map ); EL_DEBUG_ONLY(EnsurePermutation( map )) // Run the symbolic analysis Analysis( node ); }
void NaturalNestedDissection ( Int nx, Int ny, Int nz, const DistGraph& graph, DistMap& map, DistSeparator& sep, DistNodeInfo& node, Int cutoff, bool storeFactRecvInds ) { EL_DEBUG_CSE // NOTE: There is a potential memory leak here if sep or info is reused DistMap perm( graph.NumSources(), graph.Comm() ); const Int firstLocalSource = perm.FirstLocalSource(); const Int numLocalSources = perm.NumLocalSources(); for( Int s=0; s<numLocalSources; ++s ) perm.SetLocal( s, s+firstLocalSource ); NaturalNestedDissectionRecursion ( nx, ny, nz, graph, perm, sep, node, 0, cutoff ); // Construct the distributed reordering BuildMap( sep, map ); EL_DEBUG_ONLY(EnsurePermutation(map)) // Run the symbolic analysis Analysis( node, storeFactRecvInds ); }
void GetMappedDiagonal ( const DistMatrix<T,U,V>& A, AbstractDistMatrix<S>& dPre, function<S(const T&)> func, Int offset ) { EL_DEBUG_CSE EL_DEBUG_ONLY(AssertSameGrids( A, dPre )) ElementalProxyCtrl ctrl; ctrl.colConstrain = true; ctrl.colAlign = A.DiagonalAlign(offset); ctrl.rootConstrain = true; ctrl.root = A.DiagonalRoot(offset); DistMatrixWriteProxy<S,S,DiagCol<U,V>(),DiagRow<U,V>()> dProx( dPre, ctrl ); auto& d = dProx.Get(); d.Resize( A.DiagonalLength(offset), 1 ); if( d.Participating() ) { const Int diagShift = d.ColShift(); const Int iStart = diagShift + Max(-offset,0); const Int jStart = diagShift + Max( offset,0); const Int colStride = A.ColStride(); const Int rowStride = A.RowStride(); const Int iLocStart = (iStart-A.ColShift()) / colStride; const Int jLocStart = (jStart-A.RowShift()) / rowStride; const Int iLocStride = d.ColStride() / colStride; const Int jLocStride = d.ColStride() / rowStride; const Int localDiagLength = d.LocalHeight(); S* dBuf = d.Buffer(); const T* ABuf = A.LockedBuffer(); const Int ldim = A.LDim(); EL_PARALLEL_FOR for( Int k=0; k<localDiagLength; ++k ) { const Int iLoc = iLocStart + k*iLocStride; const Int jLoc = jLocStart + k*jLocStride; dBuf[k] = func(ABuf[iLoc+jLoc*ldim]); } } }
void LowerBlocked ( AbstractDistMatrix<F>& APre, AbstractDistMatrix<F>& householderScalarsPre ) { EL_DEBUG_CSE EL_DEBUG_ONLY(AssertSameGrids( APre, householderScalarsPre )) DistMatrixReadWriteProxy<F,F,MC,MR> AProx( APre ); DistMatrixWriteProxy<F,F,STAR,STAR> householderScalarsProx( householderScalarsPre ); auto& A = AProx.Get(); auto& householderScalars = householderScalarsProx.Get(); const Grid& g = A.Grid(); const Int n = A.Height(); householderScalars.Resize( Max(n-1,0), 1 ); DistMatrix<F,MC,STAR> UB1_MC_STAR(g), V21_MC_STAR(g); DistMatrix<F,MR,STAR> V01_MR_STAR(g), VB1_MR_STAR(g), UB1_MR_STAR(g); DistMatrix<F,STAR,STAR> G11_STAR_STAR(g); const Int bsize = Blocksize(); for( Int k=0; k<n-1; k+=bsize ) { const Int nb = Min(bsize,n-1-k); const Range<Int> ind0( 0, k ), ind1( k, k+nb ), indB( k, n ), indR( k, n ), ind2( k+nb, n ); auto ABR = A( indB, indR ); auto A22 = A( ind2, ind2 ); auto householderScalars1 = householderScalars( ind1, ALL ); UB1_MC_STAR.AlignWith( ABR ); UB1_MR_STAR.AlignWith( ABR ); VB1_MR_STAR.AlignWith( ABR ); UB1_MC_STAR.Resize( n-k, nb ); UB1_MR_STAR.Resize( n-k, nb ); VB1_MR_STAR.Resize( n-k, nb ); G11_STAR_STAR.Resize( nb, nb ); hessenberg::LowerPanel ( ABR, householderScalars1, UB1_MC_STAR, UB1_MR_STAR, VB1_MR_STAR, G11_STAR_STAR ); auto AB0 = A( indB, ind0 ); auto A2R = A( ind2, indR ); auto U21_MC_STAR = UB1_MC_STAR( IR(nb,END), ALL ); // AB0 := AB0 - (UB1 inv(G11)^H UB1^H AB0) // = AB0 - (UB1 ((AB0^H UB1) inv(G11))^H) // ------------------------------------------- V01_MR_STAR.AlignWith( AB0 ); Zeros( V01_MR_STAR, k, nb ); LocalGemm( ADJOINT, NORMAL, F(1), AB0, UB1_MC_STAR, F(0), V01_MR_STAR ); El::AllReduce( V01_MR_STAR, AB0.ColComm() ); LocalTrsm ( RIGHT, UPPER, NORMAL, NON_UNIT, F(1), G11_STAR_STAR, V01_MR_STAR ); LocalGemm ( NORMAL, ADJOINT, F(-1), UB1_MC_STAR, V01_MR_STAR, F(1), AB0 ); // A2R := (A2R - U21 inv(G11)^H VB1^H)(I - UB1 inv(G11) UB1^H) // ----------------------------------------------------------- // A2R := A2R - U21 inv(G11)^H VB1^H // (note: VB1 is overwritten) LocalTrsm ( RIGHT, UPPER, NORMAL, NON_UNIT, F(1), G11_STAR_STAR, VB1_MR_STAR ); LocalGemm ( NORMAL, ADJOINT, F(-1), U21_MC_STAR, VB1_MR_STAR, F(1), A2R ); // A2R := A2R - ((A2R UB1) inv(G11)) UB1^H V21_MC_STAR.AlignWith( A2R ); Zeros( V21_MC_STAR, A2R.Height(), nb ); LocalGemm( NORMAL, NORMAL, F(1), A2R, UB1_MR_STAR, F(0), V21_MC_STAR ); El::AllReduce( V21_MC_STAR, A2R.RowComm() ); LocalTrsm ( RIGHT, UPPER, NORMAL, NON_UNIT, F(1), G11_STAR_STAR, V21_MC_STAR ); LocalGemm ( NORMAL, ADJOINT, F(-1), V21_MC_STAR, UB1_MR_STAR, F(1), A2R ); } }