TEST(generateExpression, algebra_solver) {
  static const bool user_facing = true;
  std::stringstream msgs;

  stan::lang::algebra_solver so;  // null ctor should work and not raise error
  std::string system_function_name = "bronzino";
  stan::lang::variable y("y_var_name");
  y.set_type(stan::lang::vector_type());
  stan::lang::variable theta("theta_var_name");
  theta.set_type(stan::lang::vector_type());
  stan::lang::variable x_r("x_r_r_var_name");
  x_r.set_type(stan::lang::bare_array_type(stan::lang::double_type()));
  stan::lang::variable x_i("x_i_var_name");
  x_i.set_type(stan::lang::bare_array_type(stan::lang::int_type()));
  stan::lang::algebra_solver so2(system_function_name, y, theta, x_r, x_i);
  stan::lang::expression e1 = so2;

  generate_expression(e1, user_facing, msgs);
  EXPECT_EQ(msgs.str(),
            "algebra_solver(bronzino_functor__(), y_var_name, "
            "theta_var_name, x_r_r_var_name, x_i_var_name, pstream__)");
}
double EigenSolverPoissonImageEditing::solve(const NamedParameters& solverParameters, const NamedParameters& problemParameters, bool profileSolve, std::vector<SolverIteration>& iters)
{
    int numUnknowns = 0;
    std::unordered_map<vec2i, int, vec2iHash> pixelLocationsToIndex;
    std::vector<vec2i> pixelLocations;
    size_t pixelCount = m_dims[0] * m_dims[1];
    std::vector<float4> h_unknownFloat(pixelCount);
    std::vector<float4> h_target(pixelCount);
    std::vector<float>  h_mask(pixelCount);

    findAndCopyArrayToCPU("X", h_unknownFloat, problemParameters);
    findAndCopyArrayToCPU("T", h_target, problemParameters);
    findAndCopyArrayToCPU("M", h_mask, problemParameters);

    for (int y = 0; y < (int)m_dims[1]; ++y) {
        for (int x = 0; x < (int)m_dims[0]; ++x) {
            if (h_mask[y*m_dims[0] + x] == 0.0f) {
                ++numUnknowns;
                vec2i p(x, y);
                pixelLocationsToIndex[p] =(int)pixelLocations.size();
                pixelLocations.push_back(p);
            }
        }
    }
    printf("# Unknowns: %d\n", numUnknowns);
    int numResiduals = (int)pixelLocations.size() * 4;

    Eigen::VectorXf x_r(numUnknowns), b_r(numResiduals);
    Eigen::VectorXf x_g(numUnknowns), b_g(numResiduals);
    Eigen::VectorXf x_b(numUnknowns), b_b(numResiduals);
    Eigen::VectorXf x_a(numUnknowns), b_a(numResiduals);

    b_r.setZero();
    b_g.setZero();
    b_b.setZero();
    b_a.setZero();

    for (int i = 0; i < pixelLocations.size(); ++i) {
        vec2i p = pixelLocations[i];
        float4 color = sampleImage(h_unknownFloat.data(), p, m_dims[0]);
        x_r[i] = color.x;
        //printf("%f\n", color.x);
        x_g[i] = color.y;
        x_b[i] = color.z;
        x_a[i] = color.w;
    }
    SpMatrixf A(numResiduals, numUnknowns);
    A.setZero();
    printf("Constructing Matrix\n");
    std::vector<Tripf> entriesA;

    std::vector<vec2i> offsets;
    offsets.push_back(vec2i(-1, 0));
    offsets.push_back(vec2i(1, 0));
    offsets.push_back(vec2i(0, -1));
    offsets.push_back(vec2i(0, 1));

    for (int i = 0; i < pixelLocations.size(); ++i) {
        vec2i p = pixelLocations[i];
        int numInternalNeighbors = 0;
        float4 g_p = sampleImage(h_target.data(), p, m_dims[0]);
        int j = 0;

        for (vec2i off : offsets) {
            vec2i q = p + off;
            if (q.x >= 0 && q.y >= 0 && q.x < (int)m_dims[0] && q.y < (int)m_dims[1]) {
                auto it = pixelLocationsToIndex.find(q);
                int row = 4 * i + j;
                if (it == pixelLocationsToIndex.end()) {
                    float4 f_q = sampleImage(h_unknownFloat.data(), q, m_dims[0]);
                    b_r[row] += f_q.x;
                    b_g[row] += f_q.y;
                    b_b[row] += f_q.z;
                    b_a[row] += f_q.w;
                }
                else {
                    entriesA.push_back(Tripf(row, it->second, -1.0f));
                }
                entriesA.push_back(Tripf(row, i, 1.0f));

                float4 g_q = sampleImage(h_target.data(), q, m_dims[0]);
                b_r[row] += (g_p.x - g_q.x);
                b_g[row] += (g_p.y - g_q.y);
                b_b[row] += (g_p.z - g_q.z);
                b_a[row] += (g_p.w - g_q.w);
            }
            ++j;
            
        }
    }
    

    printf("Entries Set\n");
    A.setFromTriplets(entriesA.begin(), entriesA.end());
    printf("Sparse Matrix Constructed\n");
    A.makeCompressed();
    printf("Matrix Compressed\n");
    {
        float totalCost = 0.0f;
        
        float cost_r = (A*x_r - b_r).squaredNorm();
        float cost_g = (A*x_g - b_g).squaredNorm();
        float cost_b = (A*x_b - b_b).squaredNorm();
        float cost_a = (A*x_a - b_a).squaredNorm();
        totalCost = cost_r + cost_g + cost_b + cost_a;
        printf("Initial Cost: %f : (%f, %f, %f, %f)\n", totalCost, cost_r, cost_g, cost_b, cost_a);

    }
    

    AxEqBSolver solver;
    solver.setMaxIterations(97);
    printf("Solvers Initialized\n");

    clock_t start = clock(), diff;
    
    solver.compute(A);
    //printf("solver.compute(A)\n");
    solveAxEqb(solver, b_r, x_r);
    //printf("Red solve done\n");
    solveAxEqb(solver, b_g, x_g);
    //printf("Green solve done\n");
    solveAxEqb(solver, b_b, x_b);
    //printf("Blue solve done\n");
    solveAxEqb(solver, b_a, x_a);

    diff = clock() - start;
    printf("Time taken %f ms\n", diff*1000.0 / double(CLOCKS_PER_SEC));

    float totalCost = 0.0f;
 
    float cost_r = (A*x_r - b_r).squaredNorm(); 
    float cost_g = (A*x_g - b_g).squaredNorm();
    float cost_b = (A*x_b - b_b).squaredNorm();
    float cost_a = (A*x_a - b_a).squaredNorm();
    totalCost = cost_r + cost_g + cost_b + cost_a;
    printf("Final Cost: %f : (%f, %f, %f, %f)\n", totalCost, cost_r, cost_g, cost_b, cost_a);

    for (int i = 0; i < pixelLocations.size(); ++i) {
        setPixel(h_unknownFloat.data(), pixelLocations[i], m_dims[0], x_r[i], x_g[i], x_b[i]);
    }
    findAndCopyToArrayFromCPU("X", h_unknownFloat, problemParameters);;
    return (double)totalCost;

}