Int NaturalBisect
(       Int nx,
        Int ny,
        Int nz,
  const DistGraph& graph,
        Int& nxChild,
        Int& nyChild,
        Int& nzChild,
        DistGraph& child,
        DistMap& perm,
        bool& onLeft )
{
    DEBUG_CSE
    const Int numSources = graph.NumSources();
    const Int firstLocalSource = graph.FirstLocalSource();
    const Int numLocalSources = graph.NumLocalSources();
    mpi::Comm comm = graph.Comm();
    const Int commSize = mpi::Size( comm );
    if( commSize == 1 )
        LogicError
        ("This routine assumes at least two processes are used, "
         "otherwise one child will be lost");

    Int leftChildSize, rightChildSize, sepSize;
    Int nxLeft, nyLeft, nzLeft, nxRight, nyRight, nzRight;
    perm.SetComm( comm );
    perm.Resize( numSources );
    if( nx != 0 && ny != 0 && nz != 0 )
    {
        if( nx >= ny && nx >= nz )
        {
            nxLeft = (nx-1)/2;
            nyLeft = ny;
            nzLeft = nz;
            leftChildSize = nxLeft*nyLeft*nzLeft;

            nxRight = nx-1-nxLeft;
            nyRight = ny;
            nzRight = nz;
            rightChildSize = nxRight*nyRight*nzRight;

            sepSize = ny*nz;

            const Int rightOff=leftChildSize,
                      sepOff=leftChildSize+rightChildSize;
            for( Int iLocal=0; iLocal<numLocalSources; ++iLocal )
            {
                const Int i = iLocal + firstLocalSource;
                const Int x = i % nx;
                const Int y = (i/nx) % ny;
                const Int z = i/(nx*ny);
                if( x < nxLeft )
                {
                    const Int xLeft = x;
                    const Int leftInd = xLeft + y*nxLeft + z*nxLeft*ny;
                    perm.SetLocal( iLocal, leftInd );
                }
                else if( x > nxLeft )
                {
                    const Int xRight = x-(nxLeft+1);
                    const Int rightInd = xRight + y*nxRight + z*nxRight*ny;
                    perm.SetLocal( iLocal, rightOff+rightInd );
                }
                else
                {
                    const Int sepInd = y + z*ny;
                    perm.SetLocal( iLocal, sepOff+sepInd );
                }
            }
        }
        else if( ny >= nx && ny >= nz )
        {
            nxLeft = nx;
            nyLeft = (ny-1)/2;
            nzLeft = nz;
            leftChildSize = nxLeft*nyLeft*nzLeft;

            nxRight = nx;
            nyRight = ny-1-nyLeft;
            nzRight = nz;
            rightChildSize = nxRight*nyRight*nzRight;

            sepSize = nx*nz;

            const Int rightOff=leftChildSize,
                      sepOff=leftChildSize+rightChildSize;
            for( Int iLocal=0; iLocal<numLocalSources; ++iLocal )
            {
                const Int i = iLocal + firstLocalSource;
                const Int x = i % nx;
                const Int y = (i/nx) % ny;
                const Int z = i/(nx*ny);
                if( y < nyLeft )
                {
                    const Int yLeft = y;
                    const Int leftInd = x + yLeft*nx + z*nx*nyLeft;
                    perm.SetLocal( iLocal, leftInd );
                }
                else if( y > nyLeft )
                {
                    const Int yRight = y - (nyLeft+1);
                    const Int rightInd = x + yRight*nx + z*nx*nyRight;
                    perm.SetLocal( iLocal, rightOff+rightInd );
                }
                else
                {
                    const Int sepInd = x + z*nx;
                    perm.SetLocal( iLocal, sepOff+sepInd );
                }
            }
        }
        else
        {
            nxLeft = nx;
            nyLeft = ny;
            nzLeft = (nz-1)/2;
            leftChildSize = nxLeft*nyLeft*nzLeft;

            nxRight = nx;
            nyRight = ny;
            nzRight = nz-1-nzLeft;
            rightChildSize = nxRight*nyRight*nzRight;

            sepSize = nx*ny;

            const Int rightOff=leftChildSize,
                      sepOff=leftChildSize+rightChildSize;
            for( Int iLocal=0; iLocal<numLocalSources; ++iLocal )
            {
                const Int i = iLocal + firstLocalSource;
                const Int x = i % nx;
                const Int y = (i/nx) % ny;
                const Int z = i/(nx*ny);
                if( z < nzLeft )
                {
                    const Int zLeft = z;
                    const Int leftInd = x + y*nx + zLeft*nx*ny;
                    perm.SetLocal( iLocal, leftInd );
                }
                else if( z > nzLeft )
                {
                    const Int zRight = z - (nzLeft+1);
                    const Int rightInd = x + y*nx + zRight*nx*ny;
                    perm.SetLocal( iLocal, rightOff+rightInd );
                }
                else
                {
                    const Int sepInd = x + y*nx;
                    perm.SetLocal( iLocal, sepOff+sepInd );
                }
            }
        }
    }
    else
    {
        leftChildSize = rightChildSize = sepSize = 0;
        nxLeft = nx;
        nyLeft = ny;
        nzLeft = nz;
        nxRight = nx;
        nyRight = ny;
        nzRight = nz;
    }
    DEBUG_ONLY(EnsurePermutation( perm ))

    BuildChildFromPerm
    ( graph, perm, leftChildSize, rightChildSize, onLeft, child );

    if( onLeft )
    {
        nxChild = nxLeft;
        nyChild = nyLeft;
        nzChild = nzLeft;
    }
    else
    {
        nxChild = nxRight;
        nyChild = nyRight;
        nzChild = nzRight;
    }
    return sepSize;
}
inline void
NestedDissectionRecursion
( const DistGraph& graph, 
  const DistMap& perm,
        DistSeparator& sep, 
        DistNodeInfo& node,
        Int off, 
  const BisectCtrl& ctrl )
{
    DEBUG_ONLY(CSE cse("ldl::NestedDissectionRecursion"))
    mpi::Comm comm = graph.Comm();
    const int commSize = mpi::Size(comm);

    mpi::Dup( comm, sep.comm );
    mpi::Dup( comm, node.comm );

    if( commSize > 1 )
    {
        const Int numLocalSources = graph.NumLocalSources();
        const Int firstLocalSource = graph.FirstLocalSource();
        const Int* offsetBuf = graph.LockedOffsetBuffer();
        const Int* targetBuf = graph.LockedTargetBuffer();

        // Partition the graph and construct the inverse map
        DistGraph child;
        bool childIsOnLeft;
        DistMap map;
        const Int sepSize = Bisect( graph, child, map, childIsOnLeft, ctrl );
        const Int numSources = graph.NumSources();
        const Int childSize = child.NumSources();
        const Int leftChildSize = 
            ( childIsOnLeft ? childSize : numSources-sepSize-childSize );

        DistMap invMap;
        InvertMap( map, invMap );

        // Mostly fill this node of the DistSeparatorTree
        // (we will finish computing the separator indices at the end)
        sep.off = off + (numSources-sepSize);
        sep.inds.resize( sepSize );
        for( Int s=0; s<sepSize; ++s )
            sep.inds[s] = s + (numSources-sepSize);
        invMap.Translate( sep.inds );

        // Fill in this node of the DistNode
        node.size = sepSize;
        node.off = sep.off;

        set<Int> localLowerStruct;
        for( Int s=0; s<sepSize; ++s )
        {
            const Int source = sep.inds[s];
            if( source >= firstLocalSource && 
                source < firstLocalSource+numLocalSources )
            {
                const Int localSource = source - firstLocalSource;
                const Int edgeOff = offsetBuf[localSource];
                const Int numConn = offsetBuf[localSource+1] - edgeOff;
                for( Int t=0; t<numConn; ++t )
                {
                    const Int target = targetBuf[edgeOff+t];
                    if( target >= numSources )
                        localLowerStruct.insert( off+target );
                }
            }
        }
        const int numLocalConnected = localLowerStruct.size();
        vector<int> localConnectedSizes( commSize );
        mpi::AllGather
        ( &numLocalConnected, 1, localConnectedSizes.data(), 1, comm );
        vector<Int> localConnectedVec;
        CopySTL( localLowerStruct, localConnectedVec );
        vector<int> localConnectedOffs;
        const int sumOfLocalConnectedSizes = 
            Scan( localConnectedSizes, localConnectedOffs );
        vector<Int> localConnections( sumOfLocalConnectedSizes );
        mpi::AllGather
        ( localConnectedVec.data(), numLocalConnected,
          localConnections.data(), 
          localConnectedSizes.data(), localConnectedOffs.data(), comm );
        set<Int> lowerStruct
        ( localConnections.begin(), localConnections.end() );
        CopySTL( lowerStruct, node.origLowerStruct );

        // Finish computing the separator indices
        perm.Translate( sep.inds );

        // Construct map from child indices to the original ordering
        DistMap newPerm( child.NumSources(), child.Comm() );
        const Int localChildSize = child.NumLocalSources();
        const Int firstLocalChildSource = child.FirstLocalSource();
        auto& newPermLoc = newPerm.Map();
        if( childIsOnLeft )
            for( Int s=0; s<localChildSize; ++s )
                newPermLoc[s] = s+firstLocalChildSource;
        else
            for( Int s=0; s<localChildSize; ++s )
                newPermLoc[s] = s+firstLocalChildSource+leftChildSize;
        invMap.Extend( newPerm );
        perm.Extend( newPerm );

        // Recurse
        const Int childOff = ( childIsOnLeft ? off : off+leftChildSize );
        sep.child = new DistSeparator(&sep);
        node.child = new DistNodeInfo(&node);
        node.child->onLeft = childIsOnLeft;
        NestedDissectionRecursion
        ( child, newPerm, *sep.child, *node.child, childOff, ctrl );
    }
    else
    {
        Graph seqGraph( graph );

        sep.duplicate = new Separator(&sep);
        node.duplicate = new NodeInfo(&node);

        NestedDissectionRecursion
        ( seqGraph, perm.Map(), *sep.duplicate, *node.duplicate, off, ctrl );

        // Pull information up from the duplicates
        sep.off = sep.duplicate->off;
        sep.inds = sep.duplicate->inds;
        node.size = node.duplicate->size;
        node.off = node.duplicate->off;
        node.origLowerStruct = node.duplicate->origLowerStruct;
    }
}