Exemple #1
0
inline const DistNodalMatrix<F>&
DistNodalMatrix<F>::operator=( const DistNodalMultiVec<F>& X )
{
    DEBUG_ONLY(CallStackEntry cse("DistNodalMatrix::operator="))
    commMetas.clear();
    height_ = X.Height();
    width_ = X.Width();

    // Copy over the nontrivial distributed nodes
    const int numDist = X.distNodes.size();
    distNodes.resize( numDist );
    for( int s=0; s<numDist; ++s )
    {
        distNodes[s].SetGrid( X.distNodes[s].Grid() );
        distNodes[s] = X.distNodes[s];
    }

    // Copy over the local nodes
    const int numLocal = X.localNodes.size();
    localNodes.resize( numLocal );
    for( int s=0; s<numLocal; ++s )
        localNodes[s] = X.localNodes[s];

    return *this;
}
int
main( int argc, char* argv[] )
{
    Initialize( argc, argv );
    mpi::Comm comm = mpi::COMM_WORLD;
    const int commRank = mpi::CommRank( comm );
    typedef double R;
    typedef Complex<R> C;

    try
    {
        const int n1 = Input("--n1","first grid dimension",30);
        const int n2 = Input("--n2","second grid dimension",30);
        const int n3 = Input("--n3","third grid dimension",30);
        const double omega = Input("--omega","angular frequency",18.);
        const double damping = Input("--damping","damping parameter",7.);
        const bool intraPiv = Input("--intraPiv","frontal pivoting?",false);
        const bool analytic = Input("--analytic","analytic partitions?",true);
        const bool sequential = Input
            ("--sequential","sequential partitions?",true);
        const int numDistSeps = Input
            ("--numDistSeps",
             "number of separators to try per distributed partition",1);
        const int numSeqSeps = Input
            ("--numSeqSeps",
             "number of separators to try per sequential partition",1);
        const int cutoff = Input("--cutoff","cutoff for nested dissection",128);
        const bool print = Input("--print","print matrix?",false);
        const bool display = Input("--display","display matrix?",false);
        ProcessInput();

        const int N = n1*n2*n3;
        DistSparseMatrix<C> A( N, comm );
        C dampedOmega( omega, damping );
        const double hxInv = n1+1;
        const double hyInv = n2+1;
        const double hzInv = n3+1;
        const double hxInvSquared = hxInv*hxInv;
        const double hyInvSquared = hyInv*hyInv;
        const double hzInvSquared = hzInv*hzInv;
        const C mainTerm = 
            2*(hxInvSquared+hyInvSquared+hzInvSquared) - 
            dampedOmega*dampedOmega;

        // Fill our portion of the 3D Helmholtz operator over the unit-square 
        // using a n1 x n2 x n3 7-point stencil in natural ordering: 
        // (x,y,z) at x + y*n1 + z*n1*n2
        if( commRank == 0 )
        {
            std::cout << "Filling local portion of matrix...";
            std::cout.flush();
        }
        const double fillStart = mpi::Time();
        const int firstLocalRow = A.FirstLocalRow();
        const int localHeight = A.LocalHeight();
        A.StartAssembly();
        A.Reserve( 7*localHeight );
        for( int iLocal=0; iLocal<localHeight; ++iLocal )
        {
            const int i = firstLocalRow + iLocal;
            const int x = i % n1;
            const int y = (i/n1) % n2;
            const int z = i/(n1*n2);

            A.Update( i, i, mainTerm );
            if( x != 0 )
                A.Update( i, i-1, -hxInvSquared );
            if( x != n1-1 )
                A.Update( i, i+1, -hxInvSquared );
            if( y != 0 )
                A.Update( i, i-n1, -hyInvSquared );
            if( y != n2-1 )
                A.Update( i, i+n1, -hyInvSquared );
            if( z != 0 )
                A.Update( i, i-n1*n2, -hzInvSquared );
            if( z != n3-1 )
                A.Update( i, i+n1*n2, -hzInvSquared );
        } 
        A.StopAssembly();
        mpi::Barrier( comm );
        const double fillStop =  mpi::Time();
        if( commRank == 0 )
            std::cout << "done, " << fillStop-fillStart << " seconds" 
                      << std::endl;
        if( display )
            Display( A, "A" );
        if( print )
            Print( A, "A" );

        if( commRank == 0 )
        {
            std::cout << "Generating random vector x and forming y := A x...";
            std::cout.flush();
        }
        const double multiplyStart = mpi::Time();
        DistMultiVec<C> x( N, 1, comm ), y( N, 1, comm );
        MakeUniform( x );
        MakeZeros( y );
        Multiply( C(1), A, x, C(0), y );
        const double yOrigNorm = Norm( y );
        mpi::Barrier( comm );
        const double multiplyStop = mpi::Time();
        if( commRank == 0 )
            std::cout << "done, " << multiplyStop-multiplyStart << " seconds"
                      << std::endl;

        if( commRank == 0 )
        {
            std::cout << "Running nested dissection...";
            std::cout.flush();
        }
        const double nestedStart = mpi::Time();
        const DistGraph& graph = A.DistGraph();
        DistSymmInfo info;
        DistSeparatorTree sepTree;
        DistMap map, inverseMap;
        if( analytic )
            NaturalNestedDissection
            ( n1, n2, n3, graph, map, sepTree, info, cutoff );
        else
            NestedDissection
            ( graph, map, sepTree, info, 
              sequential, numDistSeps, numSeqSeps, cutoff );
        map.FormInverse( inverseMap );
        mpi::Barrier( comm );
        const double nestedStop = mpi::Time();
        if( commRank == 0 )
            std::cout << "done, " << nestedStop-nestedStart << " seconds"
                      << std::endl;

        const int rootSepSize = info.distNodes.back().size;
        if( commRank == 0 )
        {
            const int numDistNodes = info.distNodes.size();
            const int numLocalNodes = info.localNodes.size();
            std::cout << "\n"
                      << "On the root process:\n"
                      << "-----------------------------------------\n"
                      << numLocalNodes << " local nodes\n"
                      << numDistNodes  << " distributed nodes\n"
                      << rootSepSize << " vertices in root separator\n"
                      << std::endl;
        }

        if( commRank == 0 )
        {
            std::cout << "Building DistSymmFrontTree...";
            std::cout.flush();
        }
        mpi::Barrier( comm );
        const double buildStart = mpi::Time();
        DistSymmFrontTree<C> frontTree( A, map, sepTree, info, false );
        mpi::Barrier( comm );
        const double buildStop = mpi::Time();
        if( commRank == 0 )
            std::cout << "done, " << buildStop-buildStart << " seconds"
                      << std::endl;

        if( commRank == 0 )
        {
            std::cout << "Running block LDL^T...";
            std::cout.flush();
        }
        mpi::Barrier( comm );
        const double ldlStart = mpi::Time();
        if( intraPiv )
            LDL( info, frontTree, BLOCK_LDL_INTRAPIV_2D );
        else
            LDL( info, frontTree, BLOCK_LDL_2D );
        mpi::Barrier( comm );
        const double ldlStop = mpi::Time();
        if( commRank == 0 )
            std::cout << "done, " << ldlStop-ldlStart << " seconds" 
                      << std::endl;

        if( commRank == 0 )
        {
            std::cout << "Computing SVD of connectivity of second separator to "
                         "the root separator...";
            std::cout.flush();
        }
        const int numDistFronts = frontTree.distFronts.size();
        if( numDistFronts >= 2 && info.distNodes[numDistFronts-2].onLeft )
        {
            const double svdStart = mpi::Time();
            const DistMatrix<C>& frontL = 
                frontTree.distFronts[numDistFronts-2].front2dL;
            const Grid& grid = frontL.Grid();
            const int height = frontL.Height();
            const int width = frontL.Width();
            const int minDim = std::min(height,width);
            DistMatrix<C> B( grid );
            LockedView( B, frontL, width, 0, height-width, width );
            DistMatrix<C> BCopy( B );
            DistMatrix<R,VR,STAR> singVals_VR_STAR( grid );
            elem::SVD( BCopy, singVals_VR_STAR );
            DistMatrix<R,CIRC,CIRC> singVals( singVals_VR_STAR );
            mpi::Barrier( grid.Comm() );
            const R twoNorm = elem::MaxNorm( singVals_VR_STAR );
            if( grid.Rank() == singVals.Root() )
            {
                std::cout << "done, " << mpi::Time()-svdStart << " seconds\n"
                          << "  two norm=" << twoNorm << "\n";
                for( double tol=1e-1; tol>=1e-10; tol/=10 )
                {
                    int numRank = minDim;
                    for( int j=0; j<minDim; ++j )
                    {
                        if( singVals.GetLocal(j,0) <= twoNorm*tol )
                        {
                            numRank = j;
                            break;
                        }
                    }
                    std::cout << "  rank (" << tol << ")=" << numRank 
                              << "/" << minDim << std::endl;
                }
            }
        }

        if( commRank == 0 )
        {
            std::cout << "Computing SVD of the largest off-diagonal block of "
                         "numerical Green's function on root separator...";
            std::cout.flush();
        }
        {
            const double svdStart = mpi::Time();
            const DistMatrix<C>& front = frontTree.distFronts.back().front2dL;
            const Grid& grid = front.Grid();
            const int lowerHalf = rootSepSize/2;
            const int upperHalf = rootSepSize - lowerHalf;
            if( commRank == 0 )
                std::cout << "lowerHalf=" << lowerHalf
                          << ", upperHalf=" << upperHalf << std::endl;
            DistMatrix<C> offDiagBlock( grid );
            LockedView
            ( offDiagBlock, front, lowerHalf, 0, upperHalf, lowerHalf );
            DistMatrix<C> offDiagBlockCopy( offDiagBlock );
            DistMatrix<R,VR,STAR> singVals_VR_STAR( grid );
            elem::SVD( offDiagBlockCopy, singVals_VR_STAR );
            DistMatrix<R,CIRC,CIRC> singVals( singVals_VR_STAR );
            mpi::Barrier( grid.Comm() );
            const R twoNorm = elem::MaxNorm( singVals_VR_STAR );
            if( grid.Rank() == singVals.Root() )
            {
                std::cout << "done, " << mpi::Time()-svdStart << " seconds\n";
                for( double tol=1e-1; tol>=1e-10; tol/=10 )
                {
                    int numRank = lowerHalf;
                    for( int j=0; j<lowerHalf; ++j )
                    {
                        if( singVals.GetLocal(j,0) <= twoNorm*tol )
                        {
                            numRank = j;
                            break;
                        }
                    }
                    std::cout << "  rank (" << tol << ")=" << numRank
                              << "/" << lowerHalf << std::endl;
                }
            }
        }

        if( commRank == 0 )
        {
            std::cout << "Solving against y...";
            std::cout.flush();
        }
        const double solveStart = mpi::Time();
        DistNodalMultiVec<C> yNodal;
        yNodal.Pull( inverseMap, info, y );
        Solve( info, frontTree, yNodal );
        yNodal.Push( inverseMap, info, y );
        mpi::Barrier( comm );
        const double solveStop = mpi::Time();
        if( commRank == 0 )
            std::cout << "done, " << solveStop-solveStart << " seconds"
                      << std::endl;

        if( commRank == 0 )
            std::cout << "Checking error in computed solution..." << std::endl;
        const double xNorm = Norm( x );
        const double yNorm = Norm( y );
        Axpy( C(-1), x, y );
        const double errorNorm = Norm( y );
        if( commRank == 0 )
        {
            std::cout << "|| x     ||_2 = " << xNorm << "\n"
                      << "|| xComp ||_2 = " << yNorm << "\n"
                      << "|| A x   ||_2 = " << yOrigNorm << "\n"
                      << "|| error ||_2 / || x ||_2 = " 
                      << errorNorm/xNorm << "\n"
                      << "|| error ||_2 / || A x ||_2 = " 
                      << errorNorm/yOrigNorm
                      << std::endl;
        }
    }
    catch( std::exception& e ) { ReportException(e); }

    Finalize();
    return 0;
}
Exemple #3
0
int
main( int argc, char* argv[] )
{
    Initialize( argc, argv );
    mpi::Comm comm = mpi::COMM_WORLD;
    const int commRank = mpi::Rank( comm );

    try
    {
        const int n1 = Input("--n1","first grid dimension",30);
        const int n2 = Input("--n2","second grid dimension",30);
        const int n3 = Input("--n3","third grid dimension",30);
        const int numRhsBeg = Input("--numRhsBeg","min number of rhs's",100);
        const int numRhsInc = Input("--numRhsInc","stepsize for rhs's",100);
        const int numRhsEnd = Input("--numRhsEnd","max number of rhs's",1000);
        const bool intraPiv = Input("--intraPiv","frontal pivoting?",false);
        const bool solve2d = Input("--solve2d","use 2d solve?",false);
        const bool selInv = Input("--selInv","selectively invert?",false);
        const bool natural = Input("--natural","analytical nested-diss?",true);
        const bool sequential = Input
            ("--sequential","sequential partitions?",true);
        const int numDistSeps = Input
            ("--numDistSeps",
             "number of separators to try per distributed partition",1);
        const int numSeqSeps = Input
            ("--numSeqSeps",
             "number of separators to try per sequential partition",1);
        const int nbFact = Input("--nbFact","factorization blocksize",96);
        const int nbSolveBeg = Input("--nbSolveBeg","min solve blocksize",96);
        const int nbSolveInc = Input("--nbSolveInc","stepsize for bsize",16);
        const int nbSolveEnd = Input("--nbSolveEnd","max solve blocksize",256);
        const int cutoff = Input("--cutoff","cutoff for nested dissection",128);
        const bool print = Input("--print","print matrix?",false);
        const bool display = Input("--display","display matrix?",false);
        ProcessInput();

        const int N = n1*n2*n3;
        DistSparseMatrix<Complex<double> > A( N, comm );

        // Fill our portion of the 3D negative Laplacian using a n1 x n2 x n3
        // 7-point stencil in natural ordering: (x,y,z) at x + y*n1 + z*n1*n2
        if( commRank == 0 )
        {
            std::cout << "Filling local portion of matrix...";
            std::cout.flush();
        }
        const double fillStart = mpi::Time();
        const int firstLocalRow = A.FirstLocalRow();
        const int localHeight = A.LocalHeight();
        A.StartAssembly();
        A.Reserve( 7*localHeight );
        for( int iLocal=0; iLocal<localHeight; ++iLocal )
        {
            const int i = firstLocalRow + iLocal;
            const int x = i % n1;
            const int y = (i/n1) % n2;
            const int z = i/(n1*n2);

            A.Update( i, i, 6. );
            if( x != 0 )
                A.Update( i, i-1, -1. );
            if( x != n1-1 )
                A.Update( i, i+1, -1. );
            if( y != 0 )
                A.Update( i, i-n1, -1. );
            if( y != n2-1 )
                A.Update( i, i+n1, -1. );
            if( z != 0 )
                A.Update( i, i-n1*n2, -1. );
            if( z != n3-1 )
                A.Update( i, i+n1*n2, -1. );
        } 
        A.StopAssembly();
        mpi::Barrier( comm );
        const double fillStop =  mpi::Time();
        if( commRank == 0 )
            std::cout << "done, " << fillStop-fillStart << " seconds" 
                      << std::endl;
        if( display )
        {
            Display( A );
            Display( A.DistGraph() );
        }
        if( print )
        {
            Print( A );
            Print( A.DistGraph() );
        }

        if( commRank == 0 )
        {
            std::cout << "Running nested dissection...";
            std::cout.flush();
        }
        const double nestedStart = mpi::Time();
        const DistGraph& graph = A.DistGraph();
        DistSymmInfo info;
        DistSeparatorTree sepTree;
        DistMap map, inverseMap;
        if( natural )
        {
            NaturalNestedDissection
            ( n1, n2, n3, graph, map, sepTree, info, cutoff );
        }
        else
        {
            NestedDissection
            ( graph, map, sepTree, info, 
              sequential, numDistSeps, numSeqSeps, cutoff );
        }
        map.FormInverse( inverseMap );
        mpi::Barrier( comm );
        const double nestedStop = mpi::Time();
        if( commRank == 0 )
            std::cout << "done, " << nestedStop-nestedStart << " seconds"
                      << std::endl;

        if( commRank == 0 )
        {
            const int distNodes = info.distNodes.size();
            const int localNodes = info.localNodes.size();
            const int rootSepSize = info.distNodes.back().size;
            std::cout << "\n"
                      << "On the root process:\n"
                      << "-----------------------------------------\n"
                      << localNodes << " local nodes\n"
                      << distNodes  << " distributed nodes\n"
                      << rootSepSize << " vertices in root separator\n"
                      << std::endl;
        }
        if( display )
        {
            std::ostringstream osBefore, osAfter;
            osBefore << "Structure before fact. on process " << commRank;
            osAfter << "Structure after fact. on process " << commRank;
            DisplayLocal( info, false, osBefore.str() );
            DisplayLocal( info, true, osAfter.str() );
        }

        if( commRank == 0 )
        {
            std::cout << "Building DistSymmFrontTree...";
            std::cout.flush();
        }
        mpi::Barrier( comm );
        const double buildStart = mpi::Time();
        DistSymmFrontTree<Complex<double>> 
            frontTree( A, map, sepTree, info, false );
        mpi::Barrier( comm );
        const double buildStop = mpi::Time();
        if( commRank == 0 )
            std::cout << "done, " << buildStop-buildStart << " seconds"
                      << std::endl;

        double localEntries, minLocalEntries, maxLocalEntries, globalEntries;
        frontTree.MemoryInfo
        ( localEntries, minLocalEntries, maxLocalEntries, globalEntries );
        double localFactFlops, minLocalFactFlops, maxLocalFactFlops, 
               globalFactFlops;
        frontTree.FactorizationWork
        ( localFactFlops, minLocalFactFlops, maxLocalFactFlops, 
          globalFactFlops, selInv );
        if( commRank == 0 )
        {
            std::cout 
              << "Original memory usage for fronts...\n"
              << "  min local: " << minLocalEntries*2*sizeof(double)/1e6 
              << " MB\n"
              << "  max local: " << maxLocalEntries*2*sizeof(double)/1e6 
              << " MB\n"
              << "  global:    " << globalEntries*2*sizeof(double)/1e6
              << " MB\n"
              << "\n"
              << "Factorization (and possibly sel-inv) work...\n"
              << "  min local: " << minLocalFactFlops/1.e9 << " GFlops\n"
              << "  max local: " << maxLocalFactFlops/1.e9 << " GFlops\n"
              << "  global:    " << globalFactFlops/1.e9 << " GFlops\n"
              << std::endl;
        }

        if( commRank == 0 )
        {
            std::cout << "Running LDL^T and redistribution...";
            std::cout.flush();
        }
        El::SetBlocksize( nbFact );
        mpi::Barrier( comm );
        const double ldlStart = mpi::Time();
        if( solve2d )
        {
            if( intraPiv )
            {
                if( selInv )    
                    LDL( info, frontTree, LDL_INTRAPIV_SELINV_2D );
                else
                    LDL( info, frontTree, LDL_INTRAPIV_2D );
            }
            else
            {
                if( selInv )
                    LDL( info, frontTree, LDL_SELINV_2D );
                else
                    LDL( info, frontTree, LDL_2D );
            }
        }
        else
        {
            if( intraPiv )
            {
                if( selInv )
                    LDL( info, frontTree, LDL_INTRAPIV_SELINV_2D );
                else
                    LDL( info, frontTree, LDL_INTRAPIV_1D );
            }
            else
            {
                if( selInv )
                    LDL( info, frontTree, LDL_SELINV_2D );
                else
                    LDL( info, frontTree, LDL_1D );
            }
        }
        mpi::Barrier( comm );
        const double ldlStop = mpi::Time();
        const double factTime = ldlStop - ldlStart;
        const double factGFlops = globalFactFlops/(1.e9*factTime);
        if( commRank == 0 )
            std::cout << "done, " << factTime << " seconds, " 
                      << factGFlops << " GFlop/s" << std::endl;

        if( commRank == 0 )
            std::cout << "Memory usage for fronts after factorization..."
                      << std::endl;
        frontTree.MemoryInfo
        ( localEntries, minLocalEntries, maxLocalEntries, globalEntries );
        if( commRank == 0 )
        {
            std::cout << "  min local: " << minLocalEntries*2*sizeof(double)/1e6
                      << " MB\n"
                      << "  max local: " << maxLocalEntries*2*sizeof(double)/1e6
                      << " MB\n"
                      << "  global:    " << globalEntries*2*sizeof(double)/1e6
                      << " MB\n"
                      << std::endl;
        }

        for( int numRhs=numRhsBeg; numRhs<=numRhsEnd; numRhs+=numRhsInc )
        {
            double localSolveFlops, minLocalSolveFlops, maxLocalSolveFlops,
                   globalSolveFlops;
            frontTree.SolveWork
            ( localSolveFlops, minLocalSolveFlops, maxLocalSolveFlops,
              globalSolveFlops, numRhs );
            if( commRank == 0 )
            {
                std::cout
                  << "Solve with " << numRhs << " right-hand sides...\n"
                  << "  min local: " << minLocalSolveFlops/1.e9 << " GFlops\n"
                  << "  max local: " << maxLocalSolveFlops/1.e9 << " GFlops\n"
                  << "  global:    " << globalSolveFlops/1.e9 << " GFlops\n"
                  << std::endl;
            }

            DistMultiVec<Complex<double> > Y( N, numRhs, comm );
            for( int nbSolve=nbSolveBeg; nbSolve<=nbSolveEnd; 
                 nbSolve+=nbSolveInc )
            {
                MakeUniform( Y );
                El::SetBlocksize( nbSolve );
                if( commRank == 0 )
                {
                    std::cout << "  nbSolve=" << nbSolve << "...";
                    std::cout.flush();
                }
                double solveStart, solveStop;
                if( solve2d )
                {
                    DistNodalMatrix<Complex<double> > YNodal;
                    YNodal.Pull( inverseMap, info, Y );
                    mpi::Barrier( comm );
                    solveStart = mpi::Time();
                    Solve( info, frontTree, YNodal );
                    mpi::Barrier( comm );
                    solveStop = mpi::Time();
                    YNodal.Push( inverseMap, info, Y );
                }
                else
                {
                    DistNodalMultiVec<Complex<double> > YNodal;
                    YNodal.Pull( inverseMap, info, Y );
                    mpi::Barrier( comm );
                    solveStart = mpi::Time();
                    Solve( info, frontTree, YNodal );
                    mpi::Barrier( comm );
                    solveStop = mpi::Time();
                    YNodal.Push( inverseMap, info, Y );
                }
                const double solveTime = solveStop - solveStart;
                const double solveGFlops = globalSolveFlops/(1.e9*solveTime);
                if( commRank == 0 )
                    std::cout << "done, " << solveTime << " seconds, "
                              << solveGFlops << " GFlop/s" << std::endl;
            }
        }
    }
    catch( std::exception& e ) { ReportException(e); }

    Finalize();
    return 0;
}
Exemple #4
0
int
main( int argc, char* argv[] )
{
    Initialize( argc, argv );
    mpi::Comm comm = mpi::COMM_WORLD;
    const int commRank = mpi::CommRank( comm );

    try
    {
        const int n1 = Input("--n1","first grid dimension",30);
        const int n2 = Input("--n2","second grid dimension",30);
        const int n3 = Input("--n3","third grid dimension",30);
        const int numRepeats = Input
                               ("--numRepeats","number of repeated factorizations",5);
        const bool intraPiv = Input("--intraPiv","frontal pivoting?",false);
        const bool sequential = Input
                                ("--sequential","sequential partitions?",true);
        const int numDistSeps = Input
                                ("--numDistSeps",
                                 "number of partitions to try per distributed partition",1);
        const int numSeqSeps = Input
                               ("--numSeqSeps",
                                "number of partitions to try per sequential partition",1);
        const int cutoff = Input("--cutoff","cutoff for nested dissection",128);
        const bool print = Input("--print","print matrix?",false);
        const bool display = Input("--display","display matrix?",false);
        ProcessInput();

        const int N = n1*n2*n3;
        DistSparseMatrix<double> A( N, comm );

        // Fill our portion of the 3D negative Laplacian using a n1 x n2 x n3
        // 7-point stencil in natural ordering: (x,y,z) at x + y*n1 + z*n1*n2
        if( commRank == 0 )
        {
            std::cout << "Filling local portion of matrix...";
            std::cout.flush();
        }
        const double fillStart = mpi::Time();
        const int firstLocalRow = A.FirstLocalRow();
        const int localHeight = A.LocalHeight();
        A.StartAssembly();
        A.Reserve( 7*localHeight );
        for( int iLocal=0; iLocal<localHeight; ++iLocal )
        {
            const int i = firstLocalRow + iLocal;
            const int x = i % n1;
            const int y = (i/n1) % n2;
            const int z = i/(n1*n2);

            A.Update( i, i, 6. );
            if( x != 0 )
                A.Update( i, i-1, -1. );
            if( x != n1-1 )
                A.Update( i, i+1, -1. );
            if( y != 0 )
                A.Update( i, i-n1, -1. );
            if( y != n2-1 )
                A.Update( i, i+n1, -1. );
            if( z != 0 )
                A.Update( i, i-n1*n2, -1. );
            if( z != n3-1 )
                A.Update( i, i+n1*n2, -1. );
        }
        A.StopAssembly();
        mpi::Barrier( comm );
        const double fillStop =  mpi::Time();
        if( commRank == 0 )
            std::cout << "done, " << fillStop-fillStart << " seconds"
                      << std::endl;
        if( display )
        {
            Display( A );
            Display( A.DistGraph() );
        }
        if( print )
        {
            Print( A );
            Print( A.DistGraph() );
        }

        if( commRank == 0 )
        {
            std::cout << "Running nested dissection...";
            std::cout.flush();
        }
        const double nestedStart = mpi::Time();
        const DistGraph& graph = A.DistGraph();
        DistSymmInfo info;
        DistSeparatorTree sepTree;
        DistMap map, inverseMap;
        NestedDissection
        ( graph, map, sepTree, info,
          sequential, numDistSeps, numSeqSeps, cutoff );
        map.FormInverse( inverseMap );
        mpi::Barrier( comm );
        const double nestedStop = mpi::Time();
        if( commRank == 0 )
            std::cout << "done, " << nestedStop-nestedStart << " seconds"
                      << std::endl;

        if( commRank == 0 )
        {
            const int numDistNodes = info.distNodes.size();
            const int numLocalNodes = info.localNodes.size();
            const int rootSepSize = info.distNodes.back().size;
            std::cout << "\n"
                      << "On the root process:\n"
                      << "-----------------------------------------\n"
                      << numLocalNodes << " local nodes\n"
                      << numDistNodes  << " distributed nodes\n"
                      << rootSepSize << " vertices in root separator\n"
                      << std::endl;
        }

        if( commRank == 0 )
        {
            std::cout << "Building DistSymmFrontTree...";
            std::cout.flush();
        }
        mpi::Barrier( comm );
        const double buildStart = mpi::Time();
        DistSymmFrontTree<double> frontTree( A, map, sepTree, info, false );
        mpi::Barrier( comm );
        const double buildStop = mpi::Time();
        if( commRank == 0 )
            std::cout << "done, " << buildStop-buildStart << " seconds"
                      << std::endl;

        for( int repeat=0; repeat<numRepeats; ++repeat )
        {
            if( repeat != 0 )
            {
                // Reset to an unfactored, implicitly symmetric frontal tree
                if( commRank == 0 )
                    std::cout << "Resetting frontal tree." << std::endl;
                ChangeFrontType( frontTree, SYMM_2D );

                // Randomize the fronts
                if( commRank == 0 )
                    std::cout << "Randomizing fronts." << std::endl;
                const int numDistFronts = frontTree.distFronts.size();
                const int numLocalFronts = frontTree.localFronts.size();
                for( int s=0; s<numLocalFronts; ++s )
                    elem::MakeUniform( frontTree.localFronts[s].frontL );
                for( int s=1; s<numDistFronts; ++s )
                    elem::MakeUniform( frontTree.distFronts[s].front2dL );
            }

            if( commRank == 0 )
            {
                std::cout << "Running LDL^T and redistribution...";
                std::cout.flush();
            }
            mpi::Barrier( comm );
            const double ldlStart = mpi::Time();
            if( intraPiv )
                LDL( info, frontTree, LDL_INTRAPIV_1D );
            else
                LDL( info, frontTree, LDL_1D );
            mpi::Barrier( comm );
            const double ldlStop = mpi::Time();
            if( commRank == 0 )
                std::cout << "done, " << ldlStop-ldlStart << " seconds"
                          << std::endl;

            if( commRank == 0 )
            {
                std::cout << "Solving against random right-hand side...";
                std::cout.flush();
            }
            const double solveStart = mpi::Time();
            DistMultiVec<double> y( N, 1, comm );
            MakeUniform( y );
            DistNodalMultiVec<double> yNodal;
            yNodal.Pull( inverseMap, info, y );
            Solve( info, frontTree, yNodal );
            yNodal.Push( inverseMap, info, y );
            mpi::Barrier( comm );
            const double solveStop = mpi::Time();
            if( commRank == 0 )
                std::cout << "done, " << solveStop-solveStart << " seconds"
                          << std::endl;

            // TODO: Check residual error
        }
    }
    catch( std::exception& e ) {
        ReportException(e);
    }

    Finalize();
    return 0;
}