void IndexDependentMap ( AbstractDistMatrix<T>& A, function<T(Int,Int,T)> func ) { DEBUG_CSE const Int mLoc = A.LocalHeight(); const Int nLoc = A.LocalWidth(); auto& ALoc = A.Matrix(); for( Int jLoc=0; jLoc<nLoc; ++jLoc ) { const Int j = A.GlobalCol(jLoc); for( Int iLoc=0; iLoc<mLoc; ++iLoc ) { const Int i = A.GlobalRow(iLoc); ALoc(iLoc,jLoc) = func(i,j,ALoc(iLoc,jLoc)); } } }
PetscErrorCode FEMAssembleTotal2DLaplace(MPI_Comm comm, Mesh *mesh, Mat &A, Vec &b, PetscReal(*f)(Point), PetscReal(*K)(Point)) { PetscErrorCode ierr; PetscInt rank; MPI_Comm_rank(PETSC_COMM_WORLD, &rank); PetscInt size = mesh->vetrices.size(); ierr = MatCreateMPIAIJ(comm, size, size, PETSC_DECIDE, PETSC_DECIDE, 7, PETSC_NULL, 0, PETSC_NULL, &A); CHKERRQ(ierr); ierr = VecCreateMPI(comm, size, PETSC_DECIDE, &b); CHKERRQ(ierr); for (std::map<PetscInt, Element*>::iterator e = mesh->elements.begin(); e != mesh->elements.end(); e++) { PetscScalar bl[3]; PetscScalar Al[9]; PetscReal R[4]; PetscInt elSize = e->second->numVetrices; Point *vetrices = new Point[elSize]; PetscInt ixs[elSize]; for (int j = 0; j < elSize; j++) { ixs[j] = e->second->vetrices[j]; vetrices[j] = *(mesh->vetrices[ixs[j]]); } R[0] = vetrices[1].x - vetrices[0].x; R[2] = vetrices[1].y - vetrices[0].y; R[1] = vetrices[2].x - vetrices[0].x; R[3] = vetrices[2].y - vetrices[0].y; Point center = getCenterOfSet(vetrices, elSize); bLoc(R, bl, f(center)); ALoc(R, Al, K(center)); ierr = VecSetValues(b, elSize, ixs, bl, ADD_VALUES); CHKERRQ(ierr); ierr = MatSetValues(A, elSize, ixs, elSize, ixs, Al, ADD_VALUES); CHKERRQ(ierr); } ierr = VecAssemblyBegin(b); CHKERRQ(ierr); ierr = VecAssemblyEnd(b); CHKERRQ(ierr); ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); return ierr; }
void StackedGeometricColumnScaling ( const DistMatrix<Field, U,V >& A, const DistMatrix<Field, U,V >& B, DistMatrix<Base<Field>,V,STAR>& geomScaling ) { EL_DEBUG_CSE // NOTE: Assuming A.ColComm() == B.ColComm() and that the row alignments // are equal typedef Base<Field> Real; DistMatrix<Real,V,STAR> maxScalingA(A.Grid()), maxScalingB(A.Grid()); ColumnMaxNorms( A, maxScalingA ); ColumnMaxNorms( B, maxScalingB ); const Int mLocalA = A.LocalHeight(); const Int mLocalB = B.LocalHeight(); const Int nLocal = A.LocalWidth(); geomScaling.AlignWith( maxScalingA ); geomScaling.Resize( A.Width(), 1 ); auto& ALoc = A.LockedMatrix(); auto& BLoc = B.LockedMatrix(); auto& geomScalingLoc = geomScaling.Matrix(); auto& maxScalingALoc = maxScalingA.Matrix(); auto& maxScalingBLoc = maxScalingB.Matrix(); for( Int jLoc=0; jLoc<nLocal; ++jLoc ) { Real minAbs = Max(maxScalingALoc(jLoc),maxScalingBLoc(jLoc)); for( Int iLoc=0; iLoc<mLocalA; ++iLoc ) { const Real absVal = Abs(ALoc(iLoc,jLoc)); if( absVal > 0 && absVal < minAbs ) minAbs = Min(minAbs,absVal); } for( Int iLoc=0; iLoc<mLocalB; ++iLoc ) { const Real absVal = Abs(BLoc(iLoc,jLoc)); if( absVal > 0 && absVal < minAbs ) minAbs = Min(minAbs,absVal); } geomScalingLoc(jLoc) = minAbs; } mpi::AllReduce( geomScaling.Buffer(), nLocal, mpi::MIN, A.ColComm() ); for( Int jLoc=0; jLoc<nLocal; ++jLoc ) { const Real maxAbsA = maxScalingALoc(jLoc); const Real maxAbsB = maxScalingBLoc(jLoc); const Real maxAbs = Max(maxAbsA,maxAbsB); const Real minAbs = geomScalingLoc(jLoc); geomScalingLoc(jLoc) = Sqrt(minAbs*maxAbs); } }
void MakeExtendedKahan ( ElementalMatrix<F>& A, Base<F> phi, Base<F> mu ) { EL_DEBUG_CSE typedef Base<F> Real; if( A.Height() != A.Width() ) LogicError("Extended Kahan matrices must be square"); const Int n = A.Height(); if( n % 3 != 0 ) LogicError("Dimension must be an integer multiple of 3"); const Int l = n / 3; if( !l || (l & (l-1)) ) LogicError("n/3 is not a power of two"); Int k=0; while( Int(1u<<k) < l ) ++k; if( phi <= Real(0) || phi >= Real(1) ) LogicError("phi must be in (0,1)"); if( mu <= Real(0) || mu >= Real(1) ) LogicError("mu must be in (0,1)"); // Start by setting A to the identity, and then modify the necessary // l x l blocks of its 3 x 3 partitioning. MakeIdentity( A ); unique_ptr<ElementalMatrix<F>> ABlock( A.Construct(A.Grid(),A.Root()) ); View( *ABlock, A, IR(2*l,3*l), IR(2*l,3*l) ); *ABlock *= mu; View( *ABlock, A, IR(0,l), IR(l,2*l) ); Walsh( *ABlock, k ); *ABlock *= -phi; View( *ABlock, A, IR(l,2*l), IR(2*l,3*l) ); Walsh( *ABlock, k ); *ABlock *= phi; // Now scale A by S const Real zeta = Sqrt(Real(1)-phi*phi); auto& ALoc = A.Matrix(); for( Int iLoc=0; iLoc<A.LocalHeight(); ++iLoc ) { const Int i = A.GlobalRow(iLoc); const Real gamma = Pow(zeta,Real(i)); for( Int jLoc=0; jLoc<A.LocalWidth(); ++jLoc ) ALoc(iLoc,jLoc) *= gamma; } }
void IndexDependentMap ( const BlockMatrix<S>& A, BlockMatrix<T>& B, function<T(Int,Int,S)> func ) { DEBUG_CSE const Int mLoc = A.LocalHeight(); const Int nLoc = A.LocalWidth(); B.AlignWith( A.DistData() ); B.Resize( A.Height(), A.Width() ); auto& ALoc = A.LockedMatrix(); auto& BLoc = B.Matrix(); for( Int jLoc=0; jLoc<nLoc; ++jLoc ) { const Int j = A.GlobalCol(jLoc); for( Int iLoc=0; iLoc<mLoc; ++iLoc ) { const Int i = A.GlobalRow(iLoc); BLoc(iLoc,jLoc) = func(i,j,ALoc(iLoc,jLoc)); } } }
void FoxLi( ElementalMatrix<Complex<Real>>& APre, Int n, Real omega ) { DEBUG_CSE typedef Complex<Real> C; const Real pi = 4*Atan( Real(1) ); const C phi = Sqrt( C(0,omega/pi) ); DistMatrixWriteProxy<C,C,MC,MR> AProx( APre ); auto& A = AProx.Get(); // Compute Gauss quadrature points and weights const Grid& g = A.Grid(); DistMatrix<Real,VR,STAR> d(g), e(g); Zeros( d, n, 1 ); e.Resize( n-1, 1 ); auto& eLoc = e.Matrix(); for( Int iLoc=0; iLoc<e.LocalHeight(); ++iLoc ) { const Int i = e.GlobalRow(iLoc); const Real betaInv = 2*Sqrt(1-Pow(i+Real(1),-2)/4); eLoc(iLoc) = 1/betaInv; } DistMatrix<Real,VR,STAR> x(g); DistMatrix<Real,STAR,VR> Z(g); HermitianTridiagEig( d, e, x, Z, UNSORTED ); auto z = Z( IR(0), ALL ); DistMatrix<Real,STAR,VR> sqrtWeights( z ); auto& sqrtWeightsLoc = sqrtWeights.Matrix(); for( Int jLoc=0; jLoc<sqrtWeights.LocalWidth(); ++jLoc ) sqrtWeightsLoc(0,jLoc) = Sqrt(Real(2))*Abs(sqrtWeightsLoc(0,jLoc)); herm_eig::Sort( x, sqrtWeights, ASCENDING ); // Form the integral operator A.Resize( n, n ); DistMatrix<Real,MC,STAR> x_MC( A.Grid() ); DistMatrix<Real,MR,STAR> x_MR( A.Grid() ); x_MC.AlignWith( A ); x_MR.AlignWith( A ); x_MC = x; x_MR = x; auto& ALoc = A.Matrix(); auto& x_MCLoc = x_MC.Matrix(); auto& x_MRLoc = x_MR.Matrix(); for( Int jLoc=0; jLoc<A.LocalWidth(); ++jLoc ) { for( Int iLoc=0; iLoc<A.LocalHeight(); ++iLoc ) { const Real diff = x_MCLoc(iLoc)-x_MRLoc(jLoc); const Real theta = -omega*Pow(diff,2); const Real realPart = Cos(theta); const Real imagPart = Sin(theta); ALoc(iLoc,jLoc) = phi*C(realPart,imagPart); } } // Apply the weighting DistMatrix<Real,VR,STAR> sqrtWeightsTrans(g); Transpose( sqrtWeights, sqrtWeightsTrans ); DiagonalScale( LEFT, NORMAL, sqrtWeightsTrans, A ); DiagonalScale( RIGHT, NORMAL, sqrtWeightsTrans, A ); }
void Helper ( const AbstractDistMatrix<S>& A, AbstractDistMatrix<T>& B ) { EL_DEBUG_CSE // TODO: Decide whether S or T should be used as the transmission type // based upon which is smaller. Transmit S by default. const Int height = A.Height(); const Int width = A.Width(); const Grid& g = B.Grid(); B.Resize( height, width ); Zero( B ); const bool BPartic = B.Participating(); const int BRoot = B.Root(); const bool includeViewers = (A.Grid() != B.Grid()); const Int localHeight = A.LocalHeight(); const Int localWidth = A.LocalWidth(); auto& ALoc = A.LockedMatrix(); auto& BLoc = B.Matrix(); // TODO: Break into smaller pieces to avoid excessive memory usage? vector<Entry<S>> remoteEntries; vector<int> distOwners; if( A.RedundantRank() == 0 ) { const bool noRedundant = B.RedundantSize() == 1; const int colStride = B.ColStride(); const int rowRank = B.RowRank(); const int colRank = B.ColRank(); vector<Int> globalRows(localHeight), localRows(localHeight); vector<int> ownerRows(localHeight); for( Int iLoc=0; iLoc<localHeight; ++iLoc ) { const Int i = A.GlobalRow(iLoc); const int ownerRow = B.RowOwner(i); globalRows[iLoc] = i; ownerRows[iLoc] = ownerRow; localRows[iLoc] = B.LocalRow(i,ownerRow); } remoteEntries.reserve( localHeight*localWidth ); distOwners.reserve( localHeight*localWidth ); for( Int jLoc=0; jLoc<localWidth; ++jLoc ) { const Int j = A.GlobalCol(jLoc); const int ownerCol = B.ColOwner(j); const Int localCol = B.LocalCol(j,ownerCol); const bool isLocalCol = ( BPartic && ownerCol == rowRank ); for( Int iLoc=0; iLoc<localHeight; ++iLoc ) { const int ownerRow = ownerRows[iLoc]; const Int localRow = localRows[iLoc]; const bool isLocalRow = ( BPartic && ownerRow == colRank ); const S& alpha = ALoc(iLoc,jLoc); if( noRedundant && isLocalRow && isLocalCol ) { BLoc(localRow,localCol) = Caster<S,T>::Cast(alpha); } else { remoteEntries.push_back ( Entry<S>{localRow,localCol,alpha} ); distOwners.push_back( ownerRow + colStride*ownerCol ); } } } } // We will first push to redundant rank 0 of B const int redundantRootB = 0; // Compute the metadata // ==================== const Int totalSend = remoteEntries.size(); mpi::Comm comm; vector<int> sendCounts, owners(totalSend); if( includeViewers ) { comm = g.ViewingComm(); const int viewingSize = mpi::Size( g.ViewingComm() ); const int distBSize = mpi::Size( B.DistComm() ); vector<int> distBToViewing(distBSize); for( int distBRank=0; distBRank<distBSize; ++distBRank ) { const int vcOwner = g.CoordsToVC (B.ColDist(),B.RowDist(),distBRank,BRoot,redundantRootB); distBToViewing[distBRank] = g.VCToViewing(vcOwner); } sendCounts.resize(viewingSize,0); for( Int k=0; k<totalSend; ++k ) { owners[k] = distBToViewing[distOwners[k]]; ++sendCounts[owners[k]]; } } else { if( !g.InGrid() ) return; comm = g.VCComm(); const int distBSize = mpi::Size( B.DistComm() ); vector<int> distBToVC(distBSize); for( int distBRank=0; distBRank<distBSize; ++distBRank ) { distBToVC[distBRank] = g.CoordsToVC (B.ColDist(),B.RowDist(),distBRank,BRoot,redundantRootB); } const int vcSize = mpi::Size( g.VCComm() ); sendCounts.resize(vcSize,0); for( Int k=0; k<totalSend; ++k ) { owners[k] = distBToVC[distOwners[k]]; ++sendCounts[owners[k]]; } } SwapClear( distOwners ); // Pack the data // ============= vector<int> sendOffs; Scan( sendCounts, sendOffs ); vector<Entry<S>> sendBuf; FastResize( sendBuf, totalSend ); auto offs = sendOffs; for( Int k=0; k<totalSend; ++k ) sendBuf[offs[owners[k]]++] = remoteEntries[k]; SwapClear( remoteEntries ); SwapClear( owners ); // Exchange and unpack the data // ============================ auto recvBuf = mpi::AllToAll( sendBuf, sendCounts, sendOffs, comm ); if( BPartic ) { if( B.RedundantRank() == redundantRootB ) { Int recvBufSize = recvBuf.size(); for( Int k=0; k<recvBufSize; ++k ) { const auto& entry = recvBuf[k]; BLoc(entry.i,entry.j) = Caster<S,T>::Cast(entry.value); } } El::Broadcast( B, B.RedundantComm(), redundantRootB ); } }
PetscErrorCode FEMAssemble2DLaplace(MPI_Comm comm, Mesh *mesh, Mat &A, Vec &b, PetscReal(*f)(Point), PetscReal(*K)(Point)) { PetscErrorCode ierr; PetscInt rank; MPI_Comm_rank(PETSC_COMM_WORLD, &rank); PetscInt size = mesh->vetrices.size(); ierr = MatCreateMPIAIJ(comm, size, size, PETSC_DECIDE, PETSC_DECIDE, 7, PETSC_NULL, 0, PETSC_NULL, &A); CHKERRQ(ierr); ierr = VecCreateMPI(comm, size, PETSC_DECIDE, &b); CHKERRQ(ierr); std::set<PetscInt> indDirchlet; for (std::set<PetscInt>::iterator i = mesh->borderEdges.begin(); i != mesh->borderEdges.end(); i++) { for (int j = 0; j < 2; j++) { indDirchlet.insert(mesh->edges[*i]->vetrices[j]); } } for (std::map<PetscInt, Element*>::iterator e = mesh->elements.begin(); e != mesh->elements.end(); e++) { PetscScalar bl[3]; PetscScalar Al[9]; PetscReal R[4]; PetscInt elSize = e->second->numVetrices; Point *vetrices = new Point[elSize]; PetscInt ixs[elSize]; for (int j = 0; j < elSize; j++) { ixs[j] = e->second->vetrices[j]; vetrices[j] = *(mesh->vetrices[ixs[j]]); } R[0] = vetrices[1].x - vetrices[0].x; R[2] = vetrices[1].y - vetrices[0].y; R[1] = vetrices[2].x - vetrices[0].x; R[3] = vetrices[2].y - vetrices[0].y; Point center = getCenterOfSet(vetrices, elSize); bLoc(R, bl, f(center)); ALoc(R, Al, K(center)); //Enforce Dirchlet condition for (int j = 0; j < 3; j++) { if (indDirchlet.count(ixs[j]) > 0) { for (int k = 0; k < 3; k++) { Al[j * 3 + k] = 0; Al[k * 3 + j] = 0; } Al[j * 3 + j] = 1; bl[j] = 0; } } ierr = VecSetValues(b, elSize, ixs, bl, ADD_VALUES); CHKERRQ(ierr); ierr = MatSetValues(A, elSize, ixs, elSize, ixs, Al, ADD_VALUES); CHKERRQ(ierr); } ierr = VecAssemblyBegin(b); CHKERRQ(ierr); ierr = VecAssemblyEnd(b); CHKERRQ(ierr); ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); return ierr; }