Vec SimpleNewtonNonlinearSolver::Solve(PetscErrorCode (*pComputeResidual)(SNES,Vec,Vec,void*),
                                       PetscErrorCode (*pComputeJacobian)(SNES,Vec,Mat*,Mat*,MatStructure*,void*),
                                       Vec initialGuess,
                                       unsigned fill,
                                       void* pContext)
{
    PetscInt size;
    VecGetSize(initialGuess, &size);

    Vec current_solution;
    VecDuplicate(initialGuess, &current_solution);
    VecCopy(initialGuess, current_solution);

    // The "false" says that we are allowed to do new mallocs without PETSc 3.3 causing an error
    LinearSystem linear_system(current_solution, fill, false);

    (*pComputeResidual)(NULL, current_solution, linear_system.rGetRhsVector(), pContext);


    double residual_norm;
    VecNorm(linear_system.rGetRhsVector(), NORM_2, &residual_norm);
    double scaled_residual_norm = residual_norm/size;

    if (mWriteStats)
    {
        std::cout << "Newton's method:\n  Initial ||residual||/N = " << scaled_residual_norm
                  << "\n  Attempting to solve to tolerance " << mTolerance << "..\n";
    }

    double old_scaled_residual_norm;
    unsigned counter = 0;
    while (scaled_residual_norm > mTolerance)
    {
        counter++;

        // Store the old norm to check with the new later
        old_scaled_residual_norm = scaled_residual_norm;

        // Compute Jacobian and solve J dx = f for the (negative) update dx, (J the jacobian, f the residual)
        (*pComputeJacobian)(NULL, current_solution, &(linear_system.rGetLhsMatrix()), NULL, NULL, pContext);

        Vec negative_update = linear_system.Solve();


        Vec test_vec;
        VecDuplicate(initialGuess, &test_vec);

        double best_damping_factor = 1.0;
        double best_scaled_residual = 1e20; // large

        // Loop over all the possible damping value and determine which gives smallest residual
        for (unsigned i=0; i<mTestDampingValues.size(); i++)
        {
            // Note: WAXPY calls VecWAXPY(w,a,x,y) which computes w = ax+y
            PetscVecTools::WAXPY(test_vec,-mTestDampingValues[i],negative_update,current_solution);

            // Compute new residual
            linear_system.ZeroLinearSystem();
            (*pComputeResidual)(NULL, test_vec, linear_system.rGetRhsVector(), pContext);
            VecNorm(linear_system.rGetRhsVector(), NORM_2, &residual_norm);
            scaled_residual_norm = residual_norm/size;

            if (scaled_residual_norm < best_scaled_residual)
            {
                best_scaled_residual = scaled_residual_norm;
                best_damping_factor = mTestDampingValues[i];
            }
        }
        PetscTools::Destroy(test_vec);

        // Check the smallest residual was actually smaller than the previous; if not, quit
        if (best_scaled_residual > old_scaled_residual_norm)
        {
            // Free memory
            PetscTools::Destroy(current_solution);
            PetscTools::Destroy(negative_update);

            // Raise error
            EXCEPTION("Iteration " << counter << ", unable to find damping factor such that residual decreases in update direction");
        }

        if (mWriteStats)
        {
            std::cout << "    Best damping factor = " << best_damping_factor << "\n";
        }

        // Update solution: current_guess = current_solution - best_damping_factor*negative_update
        PetscVecTools::AddScaledVector(current_solution, negative_update, -best_damping_factor);
        scaled_residual_norm = best_scaled_residual;
        PetscTools::Destroy(negative_update);

        // Compute best residual vector again and store in linear_system for next Solve()
        linear_system.ZeroLinearSystem();
        (*pComputeResidual)(NULL, current_solution, linear_system.rGetRhsVector(), pContext);

        if (mWriteStats)
        {
            std::cout << "    Iteration " << counter << ": ||residual||/N = " << scaled_residual_norm << "\n";
        }
    }

    if (mWriteStats)
    {
        std::cout << "  ..solved!\n\n";
    }

    return current_solution;
}
    void TestPerdiodicBoundaryConditions()
    {
        const int SIZE = 5;

        DistributedVectorFactory factory(SIZE);
        Vec template_vec = factory.CreateVec(2);
        LinearSystem linear_system(template_vec, 2*SIZE);
        PetscTools::Destroy(template_vec);

        for (int i = 0; i < 2*SIZE; i++)
        {
            for (int j = 0; j < 2*SIZE; j++)
            {
                // LHS matrix is all 2s
                linear_system.SetMatrixElement(i,j,2);
            }
            // RHS vector is all 3s
            linear_system.SetRhsVectorElement(i,3);
        }

        linear_system.AssembleIntermediateLinearSystem();

        Node<3>* nodes[SIZE];
        BoundaryConditionsContainer<3,3,2> bcc;

        for (unsigned i=0; i<SIZE-1; i++)
        {
            nodes[i] = new Node<3>(i,true);
        }

        bcc.AddPeriodicBoundaryCondition(nodes[0], nodes[1]);
        bcc.AddPeriodicBoundaryCondition(nodes[2], nodes[3]);

        bcc.ApplyPeriodicBcsToLinearProblem(linear_system, true, true);

        linear_system.AssembleFinalLinearSystem();

        ReplicatableVector rhs_repl(linear_system.rGetRhsVector());
        TS_ASSERT_DELTA(rhs_repl[0], 0.0, 1e-12); // node 0, variable 0
        TS_ASSERT_DELTA(rhs_repl[1], 0.0, 1e-12); // node 0, variable 1
        TS_ASSERT_DELTA(rhs_repl[2], 3.0, 1e-12);
        TS_ASSERT_DELTA(rhs_repl[3], 3.0, 1e-12);
        TS_ASSERT_DELTA(rhs_repl[4], 0.0, 1e-12); // node 2, variable 0
        TS_ASSERT_DELTA(rhs_repl[5], 0.0, 1e-12); // node 2, variable 1
        TS_ASSERT_DELTA(rhs_repl[6], 3.0, 1e-12);
        TS_ASSERT_DELTA(rhs_repl[7], 3.0, 1e-12);
        TS_ASSERT_DELTA(rhs_repl[8], 3.0, 1e-12);
        TS_ASSERT_DELTA(rhs_repl[9], 3.0, 1e-12);


        //
        //  Matrix should have
        //   row 0 altered to be [1, 0 -1, 0, 0, ..., 0]
        //   row 1 altered to be [0, 1, 0 -1, 0, ..., 0]
        //   row 4 altered to be [0, 0, 0, 0, 1, 0, -1, 0, 0, 0]
        //   row 5 altered to be [0, 0, 0, 0, 0, 1, 0, -1, 0, 0]
        //   All other rows just [2, 2, ..., 2]

        Mat& r_mat = linear_system.rGetLhsMatrix();

        PetscInt lo, hi;
        PetscMatTools::GetOwnershipRange(r_mat, lo, hi);
        for(int i=lo; i<hi; i++)
        {
            if(i==0 || i==1 || i==4 || i==5)
            {
                unsigned col_one = i;
                unsigned col_minus_one = i+2;

                for(unsigned j=0; j<2*SIZE; j++)
                {
                    double val = 0.0;
                    if(j==col_one)
                    {
                        val = 1.0;
                    }
                    if(j==col_minus_one)
                    {
                        val = -1.0;
                    }
                    TS_ASSERT_DELTA( PetscMatTools::GetElement(r_mat, i, j), val, 1e-12);
                }
            }
            else
            {
                for(unsigned j=0; j<2*SIZE; j++)
                {
                    TS_ASSERT_DELTA( PetscMatTools::GetElement(r_mat, i, j), 2.0, 1e-12);
                }
            }
        }

        for (unsigned i=0; i<SIZE-1; i++)
        {
            delete nodes[i];
        }
    }
    void TestApplyToLinearSystem3Unknowns()
    {
        const int SIZE = 10;

        DistributedVectorFactory factory(SIZE);
        Vec template_vec = factory.CreateVec(3);
        LinearSystem linear_system(template_vec, 3*SIZE);
        PetscTools::Destroy(template_vec);

        for (int i = 0; i < 3*SIZE; i++)
        {
            for (int j = 0; j < 3*SIZE; j++)
            {
                // LHS matrix is all 1s
                linear_system.SetMatrixElement(i,j,1);
            }
            // RHS vector is all 2s
            linear_system.SetRhsVectorElement(i,2);
        }

        linear_system.AssembleIntermediateLinearSystem();

        Node<3>* nodes_array[SIZE];
        BoundaryConditionsContainer<3,3,3> bcc33;

        // Apply dirichlet boundary conditions to all but last node
        for (int i = 0; i < SIZE-1; i++)
        {
            nodes_array[i] = new Node<3>(i,true);

            ConstBoundaryCondition<3>* p_boundary_condition0 =
                new ConstBoundaryCondition<3>(-1);

            ConstBoundaryCondition<3>* p_boundary_condition1 =
                new ConstBoundaryCondition<3>(-2);

            ConstBoundaryCondition<3>* p_boundary_condition2 =
                new ConstBoundaryCondition<3>( 0);

            bcc33.AddDirichletBoundaryCondition(nodes_array[i], p_boundary_condition0, 0);
            bcc33.AddDirichletBoundaryCondition(nodes_array[i], p_boundary_condition1, 1);
            bcc33.AddDirichletBoundaryCondition(nodes_array[i], p_boundary_condition2, 2);
        }
        bcc33.ApplyDirichletToLinearProblem(linear_system);

        linear_system.AssembleFinalLinearSystem();

        Vec solution = linear_system.Solve();

        DistributedVector d_solution = factory.CreateDistributedVector(solution);
        DistributedVector::Stripe solution0(d_solution,0);
        DistributedVector::Stripe solution1(d_solution,1);
        DistributedVector::Stripe solution2(d_solution,2);

        for (DistributedVector::Iterator index = d_solution.Begin();
             index != d_solution.End();
             ++index)
        {
            if (index.Global!=SIZE-1)
            {
                TS_ASSERT_DELTA(solution0[index], -1.0, 0.000001);
                TS_ASSERT_DELTA(solution1[index], -2.0, 0.000001);
                TS_ASSERT_DELTA(solution2[index],  0.0, 0.000001);
            }

        }
        for (int i = 0; i < SIZE-1; i++)
        {
            delete nodes_array[i];
        }
        PetscTools::Destroy(solution);
    }
    void TestApplyToLinearSystem2Unknowns()
    {
        const int SIZE = 10;

        DistributedVectorFactory factory(SIZE);
        Vec template_vec = factory.CreateVec(2);
        LinearSystem linear_system(template_vec, 2*SIZE);
        PetscTools::Destroy(template_vec);

        for (int i = 0; i < 2*SIZE; i++)
        {
            for (int j = 0; j < 2*SIZE; j++)
            {
                // LHS matrix is all 1s
                linear_system.SetMatrixElement(i,j,1);
            }
            // RHS vector is all 2s
            linear_system.SetRhsVectorElement(i,2);
        }

        linear_system.AssembleIntermediateLinearSystem();

        Node<3>* nodes_array[SIZE];
        BoundaryConditionsContainer<3,3,2> bcc32;

        // Apply dirichlet boundary conditions to all but last node
        for (int i = 0; i < SIZE-1; i++)
        {
            nodes_array[i] = new Node<3>(i,true);

            ConstBoundaryCondition<3>* p_boundary_condition0 =
                new ConstBoundaryCondition<3>(-1);

            ConstBoundaryCondition<3>* p_boundary_condition1 =
                new ConstBoundaryCondition<3>(-2);

            bcc32.AddDirichletBoundaryCondition(nodes_array[i], p_boundary_condition0, 0);
            bcc32.AddDirichletBoundaryCondition(nodes_array[i], p_boundary_condition1, 1);
        }
        bcc32.ApplyDirichletToLinearProblem(linear_system);

        linear_system.AssembleFinalLinearSystem();

        /*
         * Matrix should now look like
         *   A = (1 0 0 0 .. 0)
         *       (0 1 0 0 .. 0)
         *       (     ..     )
         *       (1 1 ..     1)
         *       (1 1 ..     1)
         * and rhs vector looks like b=(-1, -2, -1, -2, ..., -1, -2, 2, 2)
         * so solution of Ax = b is  x=(-1, -2, -1, -2, ..., -1, -2, ?, ?).
         */
        Vec solution = linear_system.Solve();
        DistributedVector d_solution = factory.CreateDistributedVector(solution);
        DistributedVector::Stripe solution0(d_solution,0);
        DistributedVector::Stripe solution1(d_solution,1);

        for (DistributedVector::Iterator index = d_solution.Begin();
             index != d_solution.End();
             ++index)
        {
            if (index.Global!=SIZE-1) // last element of each stripe is not tested -- see ? in previous comment
            {
                TS_ASSERT_DELTA(solution0[index], -1.0, 0.000001);
                TS_ASSERT_DELTA(solution1[index], -2.0, 0.000001);
            }

        }

        for (int i = 0; i < SIZE-1; i++)
        {
            delete nodes_array[i];
        }

        PetscTools::Destroy(solution);
    }
    void TestApplyToSymmetricLinearSystem()
    {
        const int SIZE = 10;
        LinearSystem linear_system(SIZE, SIZE);
        linear_system.SetMatrixIsSymmetric(true);

        for (int i=0; i<SIZE; i++)
        {
            for (int j=0; j<SIZE; j++)
            {
                // LHS matrix is all 1s
                linear_system.SetMatrixElement(i,j,1);
            }
            // RHS vector is all 2s
            linear_system.SetRhsVectorElement(i,2);
        }

        linear_system.AssembleIntermediateLinearSystem();

        Node<3>* nodes_array[SIZE];
        BoundaryConditionsContainer<3,3,1> bcc3;

        // Apply dirichlet boundary conditions to all but last node
        for (int i=0; i<SIZE-1; i++)
        {
            nodes_array[i] = new Node<3>(i,true);
            ConstBoundaryCondition<3>* p_boundary_condition =
                new ConstBoundaryCondition<3>(-1);
            bcc3.AddDirichletBoundaryCondition(nodes_array[i], p_boundary_condition);
        }
        bcc3.ApplyDirichletToLinearProblem(linear_system);

        linear_system.AssembleFinalLinearSystem();

        /*
         * Based on the original system and the boundary conditions applied in a symmetric
         * manner, the resulting linear system looks like:
         *
         *      1 0 0 ... 0
         *      0 1 0 ... 0
         *      0 0 1 ... 0
         *      ...
         *      0 0 0 ... 1
         */
        int lo, hi;
        linear_system.GetOwnershipRange(lo, hi);

        for (int row=lo; row<hi; row++)
        {
            for (int column=0; column<row; column++)
            {
                TS_ASSERT_EQUALS(linear_system.GetMatrixElement(row,column), 0);
            }

            TS_ASSERT_EQUALS(linear_system.GetMatrixElement(row,row), 1);

            for (int column=row+1; column<SIZE; column++)
            {
                TS_ASSERT_EQUALS(linear_system.GetMatrixElement(row,column), 0);
            }
        }

        Vec solution = linear_system.Solve();

        DistributedVectorFactory factory(solution);
        DistributedVector d_solution = factory.CreateDistributedVector( solution );
        for (DistributedVector::Iterator index = d_solution.Begin();
             index != d_solution.End();
             ++index)
        {
            double expected = index.Global < SIZE-1 ? -1.0 : 11.0;
            TS_ASSERT_DELTA(d_solution[index], expected, 1e-6 );
        }

        for (int i=0; i<SIZE-1; i++)
        {
            delete nodes_array[i];
        }
        PetscTools::Destroy(solution);
    }
    void TestApplyToLinearSystem()
    {
        const int SIZE = 10;
        LinearSystem linear_system(SIZE, SIZE);
        for (int i=0; i<SIZE; i++)
        {
            for (int j=0; j<SIZE; j++)
            {
                // LHS matrix is all 1s
                linear_system.SetMatrixElement(i,j,1);
            }
            // RHS vector is all 2s
            linear_system.SetRhsVectorElement(i,2);
        }

        linear_system.AssembleIntermediateLinearSystem();

        Node<3>* nodes_array[SIZE];
        BoundaryConditionsContainer<3,3,1> bcc3;

        // Apply dirichlet boundary conditions to all but last node
        for (int i=0; i<SIZE-1; i++)
        {
            nodes_array[i] = new Node<3>(i,true);
            ConstBoundaryCondition<3>* p_boundary_condition =
                new ConstBoundaryCondition<3>(-1);
            bcc3.AddDirichletBoundaryCondition(nodes_array[i], p_boundary_condition);
        }

        //////////////////////////
        // 2010 AD code from here
        //////////////////////////

        // apply dirichlet bcs to matrix but not rhs vector
        bcc3.ApplyDirichletToLinearProblem(linear_system, true, false);
        ReplicatableVector vec_repl(linear_system.GetRhsVector());
        for (unsigned i=0; i<(unsigned)SIZE; i++)
        {
            TS_ASSERT_EQUALS(vec_repl[i], 2.0);
        }

        // now apply to the rhs vector
        bcc3.ApplyDirichletToLinearProblem(linear_system, false, true);

        linear_system.AssembleFinalLinearSystem();

        //////////////////////////
        // 2007 AD code from here
        //////////////////////////

        /*
         *  Based on the original system and the boundary conditions applied in a non-symmetric
         *  manner, the resulting linear system looks like:
         *
         *      1 0 0 ... 0
         *      0 1 0 ... 0
         *      0 0 1 ... 0
         *      ...
         *      1 1 1 ... 1
         *
         */
        /// \todo: this is very naughty. Must be checked in parallel as well.
        PetscInt lo, hi;
        PetscMatTools::GetOwnershipRange(linear_system.rGetLhsMatrix(), lo, hi);
        for(int row=lo; row<hi; row++)
        {
            if(row<SIZE-1)
            {
                for (int column=0; column<row; column++)
                {
                    TS_ASSERT_EQUALS(linear_system.GetMatrixElement(row,column), 0);
                }

                TS_ASSERT_EQUALS(linear_system.GetMatrixElement(row,row), 1);

                for (int column=row+1; column<SIZE; column++)
                {
                    TS_ASSERT_EQUALS(linear_system.GetMatrixElement(row,column), 0);
                }
            }

            if(row==SIZE-1)
            {
                for (int column=0; column<SIZE; column++)
                {
                    TS_ASSERT_EQUALS(linear_system.GetMatrixElement(row,column), 1);
                }
            }
        }

        Vec solution = linear_system.Solve();

        DistributedVectorFactory factory(solution);
        DistributedVector d_solution = factory.CreateDistributedVector( solution );
        for (DistributedVector::Iterator index = d_solution.Begin();
             index != d_solution.End();
             ++index)
        {
            double expected = index.Global < SIZE-1 ? -1.0 : 11.0;
            TS_ASSERT_DELTA(d_solution[index], expected, 1e-6 );
        }

        for (int i=0; i<SIZE-1; i++)
        {
            delete nodes_array[i];
        }
        PetscTools::Destroy(solution);
    }