Ejemplo n.º 1
0
void test_gradient_check_reg(TestFixture_T* pFix, gconstpointer pg){

    cs *X_crs = pFix->X_t;
    ffm_vector *y = pFix->y;
    int test_sample_row = 0;
    double y_true = ffm_vector_get(y, test_sample_row);
    int n_features = pFix->coef->w->size;

    double eps = 0.0001;

    ffm_param param = {.n_iter=1, .stepsize=.001,
        .init_sigma=.1, .k=2, .init_lambda_w=0.5, .init_lambda_V=1.5,
        .warm_start=1,
        .SOLVER=SOLVER_SGD, .TASK=TASK_REGRESSION, .rng_seed=44};

    ffm_coef *coef_t0 =  alloc_fm_coef(n_features, param.k, false);
    init_ffm_coef(coef_t0, param);

    ffm_coef *coef_t1 =  alloc_fm_coef(n_features, param.k, false);
    init_ffm_coef(coef_t1, param);

    ffm_fit_sgd(coef_t1, X_crs, y, &param);
    ffm_coef * grad = extract_gradient(coef_t0, coef_t1, param.stepsize);


    // check w gradient updates
    for (int i=0; i<n_features; i++)
    {
        // keep copy
        double tmp = ffm_vector_get(coef_t0->w, i);
        // x + eps
        ffm_vector_set(coef_t0->w, i, tmp + eps);
        double y_pred = ffm_predict_sample(coef_t0, X_crs, test_sample_row);
        double sq_loss = 0.5 * pow(y_true - y_pred, 2);
        double l_plus = sq_loss + 0.5 * l2_penalty(coef_t0);
        // x - eps
        ffm_vector_set(coef_t0->w, i, tmp - eps);
        y_pred = ffm_predict_sample(coef_t0, X_crs, test_sample_row);
        sq_loss = 0.5 * pow(y_true - y_pred, 2);
        double l_minus = sq_loss + 0.5 * l2_penalty(coef_t0);
        // restore
        ffm_vector_set(coef_t0->w, i, tmp);
        double grad_i = (l_plus - l_minus) / ( 2 * eps);

        g_assert_cmpfloat(fabs(grad_i - ffm_vector_get(grad->w, i)), < , 1e-10);
    }

    // check V gradient updates
    for (int f=0; f< param.k; f++)
        for (int i=0; i<n_features; i++)
        {
            // keep copy
            double tmp = ffm_matrix_get(coef_t0->V, f, i);
            // x + eps
            ffm_matrix_set(coef_t0->V, f, i, tmp + eps);
            double y_pred = ffm_predict_sample(coef_t0, X_crs, test_sample_row);
            double sq_loss = 0.5 * pow(y_true - y_pred, 2);
            double l_plus = sq_loss + 0.5 * l2_penalty(coef_t0);
            // x - eps
            ffm_matrix_set(coef_t0->V, f, i, tmp - eps);
            y_pred = ffm_predict_sample(coef_t0, X_crs, test_sample_row);
            sq_loss = 0.5 * pow(y_true - y_pred, 2);
            double l_minus = sq_loss + 0.5 * l2_penalty(coef_t0);
            // restore
            ffm_matrix_set(coef_t0->V, f, i, tmp);
            double grad_i = (l_plus - l_minus) / ( 2 * eps);

            g_assert_cmpfloat(fabs(grad_i - ffm_matrix_get(grad->V, f, i)),
                    < , 1e-10);
        }

    free_ffm_coef(coef_t0);
    free_ffm_coef(coef_t1);
    free_ffm_coef(grad);
}


void test_gradient_check_class(TestFixture_T* pFix, gconstpointer pg){

    cs *X_crs = pFix->X_t;
    ffm_vector *y = pFix->y;
    int test_sample_row = 0;
    double y_true = ffm_vector_get(y, test_sample_row);
    int n_features = pFix->coef->w->size;

    double eps = 0.0001;

    ffm_param param = {.n_iter=1, .stepsize=.01,
        .init_sigma=.01, .k=2, .init_lambda_w=1.5, .init_lambda_V=2.0,
        .warm_start=1,
        .SOLVER=SOLVER_SGD, .TASK=TASK_CLASSIFICATION, .rng_seed=44};

    ffm_coef *coef_t0 =  alloc_fm_coef(n_features, param.k, false);
    init_ffm_coef(coef_t0, param);

    ffm_coef *coef_t1 =  alloc_fm_coef(n_features, param.k, false);
    init_ffm_coef(coef_t1, param);

    ffm_fit_sgd(coef_t1, X_crs, y, &param);
    ffm_coef * grad = extract_gradient(coef_t0, coef_t1, param.stepsize);


    // check w gradient updates
    for (int i=0; i<n_features; i++)
    {
        // keep copy
        double tmp = ffm_vector_get(coef_t0->w, i);
        // x + eps
        ffm_vector_set(coef_t0->w, i, tmp + eps);
        double y_pred = ffm_predict_sample(coef_t0, X_crs, test_sample_row);
        double log_loss = - log(ffm_sigmoid(y_true * y_pred));
        double l_plus = log_loss + 0.5 * l2_penalty(coef_t0);
        // x - eps
        ffm_vector_set(coef_t0->w, i, tmp - eps);
        y_pred = ffm_predict_sample(coef_t0, X_crs, test_sample_row);
        log_loss = - log(ffm_sigmoid(y_true * y_pred));
        double l_minus = log_loss + 0.5 * l2_penalty(coef_t0);
        // restore
        ffm_vector_set(coef_t0->w, i, tmp);
        // finite central differences
        double grad_i = (l_plus - l_minus) / ( 2 * eps);

        //g_assert_cmpfloat(grad_i, ==, ffm_vector_get(grad->w, i));
        g_assert_cmpfloat(fabs(grad_i - ffm_vector_get(grad->w, i)), < , 1e-9);
    }

    // check V gradient updates
    for (int f=0; f< param.k; f++)
        for (int i=0; i<n_features; i++)
        {
            // keep copy
            double tmp = ffm_matrix_get(coef_t0->V, f, i);
            // x + eps
            ffm_matrix_set(coef_t0->V, f, i, tmp + eps);
            double y_pred = ffm_predict_sample(coef_t0, X_crs, test_sample_row);
            double log_loss = - log(ffm_sigmoid(y_true * y_pred));
            double l_plus = log_loss + 0.5 * l2_penalty(coef_t0);
            // x - eps
            ffm_matrix_set(coef_t0->V, f, i, tmp - eps);
            y_pred = ffm_predict_sample(coef_t0, X_crs, test_sample_row);
            log_loss = - log(ffm_sigmoid(y_true * y_pred));
            double l_minus = log_loss + 0.5 * l2_penalty(coef_t0);
            // restore
            ffm_matrix_set(coef_t0->V, f, i, tmp);
            // finite central differences
            double grad_i = (l_plus - l_minus) / ( 2 * eps);

            g_assert_cmpfloat(fabs(grad_i - ffm_matrix_get(grad->V, f, i)),
                    < , 1e-10);
        }

    free_ffm_coef(coef_t0);
    free_ffm_coef(coef_t1);
    free_ffm_coef(grad);
}


void test_gradient_check_bpr(TestFixture_T* pFix, gconstpointer pg){

    cs *X_crs = pFix->X_t;
    ffm_matrix * pairs = ffm_matrix_calloc(1, 2);
    int pos_row = 0; ffm_matrix_set(pairs, 0, 0, pos_row);
    int neg_row = 1; ffm_matrix_set(pairs, 0, 1, neg_row);

    int n_features = pFix->coef->w->size;

    double eps = 0.0001;

    ffm_param param = {.n_iter=1, .stepsize=.01,
        .init_sigma=.01, .k=2, .init_lambda_w=0.0, .init_lambda_V=0.0,
        .warm_start=1,
        .SOLVER=SOLVER_SGD, .TASK=TASK_RANKING, .rng_seed=44};


    ffm_coef *coef_t0 =  alloc_fm_coef(n_features, param.k, false);
    init_ffm_coef(coef_t0, param);

    ffm_coef *coef_t1 =  alloc_fm_coef(n_features, param.k, false);
    init_ffm_coef(coef_t1, param);

    ffm_fit_sgd_bpr(coef_t1, X_crs, pairs, param);
    ffm_coef * grad = extract_gradient(coef_t0, coef_t1, param.stepsize);


    double y_pos, y_neg, bpr_loss, l_plus, l_minus, grad_i, tmp;
    // check w gradient updates
    for (int i=0; i<n_features; i++)
    {
        // keep copy
        tmp = ffm_vector_get(coef_t0->w, i);
        // x + eps
        ffm_vector_set(coef_t0->w, i, tmp + eps);
        y_pos = ffm_predict_sample(coef_t0, X_crs, pos_row);
        y_neg = ffm_predict_sample(coef_t0, X_crs, neg_row);
        bpr_loss = - log(ffm_sigmoid(y_pos - y_neg));
        l_plus = bpr_loss + 0.5 * l2_penalty(coef_t0);
        // x - eps
        ffm_vector_set(coef_t0->w, i, tmp - eps);
        y_pos = ffm_predict_sample(coef_t0, X_crs, pos_row);
        y_neg = ffm_predict_sample(coef_t0, X_crs, neg_row);
        bpr_loss = - log(ffm_sigmoid(y_pos - y_neg));
        l_minus = bpr_loss + 0.5 * l2_penalty(coef_t0);
        // restore
        ffm_vector_set(coef_t0->w, i, tmp);
        // finite central differences
        grad_i = (l_plus - l_minus) / ( 2 * eps);

        //g_assert_cmpfloat(grad_i, ==, ffm_vector_get(grad->w, i));
        g_assert_cmpfloat(fabs(grad_i - ffm_vector_get(grad->w, i)), < , 1e-9);
    }

    // check V gradient updates
    for (int f=0; f< param.k; f++)
        for (int i=0; i<n_features; i++)
        {
            // keep copy
            tmp = ffm_matrix_get(coef_t0->V, f, i);
            // x + eps
            ffm_matrix_set(coef_t0->V, f, i, tmp + eps);
            y_pos = ffm_predict_sample(coef_t0, X_crs, pos_row);
            y_neg = ffm_predict_sample(coef_t0, X_crs, neg_row);
            bpr_loss = - log(ffm_sigmoid(y_pos - y_neg));
            l_plus = bpr_loss + 0.5 * l2_penalty(coef_t0);
            // x - eps
            ffm_matrix_set(coef_t0->V, f, i, tmp - eps);
            y_pos = ffm_predict_sample(coef_t0, X_crs, pos_row);
            y_neg = ffm_predict_sample(coef_t0, X_crs, neg_row);
            bpr_loss = - log(ffm_sigmoid(y_pos - y_neg));
            l_minus = bpr_loss + 0.5 * l2_penalty(coef_t0);
            // restore
            ffm_matrix_set(coef_t0->V, f, i, tmp);
            // finite central differences
            grad_i = (l_plus - l_minus) / ( 2 * eps);

            //g_assert_cmpfloat(grad_i, ==, ffm_matrix_get(grad->V, f, i));
            g_assert_cmpfloat(fabs(grad_i - ffm_matrix_get(grad->V, f, i)),
                    < , 1e-10);
        }

    free_ffm_coef(coef_t0);
    free_ffm_coef(coef_t1);
    free_ffm_coef(grad);
}


int main(int argc, char** argv)
{
    /*
    feenableexcept(FE_INVALID   | 
                   FE_DIVBYZERO | 
                   FE_OVERFLOW  | 
                   FE_UNDERFLOW);
    */

    g_test_init(&argc, &argv, NULL);

  TestFixture_T Fixture;
  g_test_add( "/sgd/util/predict sample",
      TestFixture_T,
      &Fixture,
      TestFixtureContructorWide,
      test_sgd_predict_sample,
      TestFixtureDestructor
      );
  g_test_add( "/sgd/reg/first order",
      TestFixture_T,
      &Fixture,
      TestFixtureContructorLong,
      test_first_order_sgd,
      TestFixtureDestructor
      );
  g_test_add( "/sgd/reg/second order",
      TestFixture_T,
      &Fixture,
      TestFixtureContructorLong,
      test_second_order_sgd,
      TestFixtureDestructor
      );
  g_test_add( "/sgd/class/full",
      TestFixture_T,
      &Fixture,
      TestFixtureContructorLong,
      test_sgd_classification,
      TestFixtureDestructor
      );
  g_test_add( "/sgd/bpr/update second order",
      TestFixture_T,
      &Fixture,
      TestFixtureContructorWide,
      test_update_second_order_bpr,
      TestFixtureDestructor
      );
  g_test_add( "/sgd/bpr/first order",
      TestFixture_T,
      &Fixture,
      TestFixtureContructorLong,
      test_first_order_bpr,
      TestFixtureDestructor
      );
  g_test_add( "/sgd/bpr/second order",
      TestFixture_T,
      &Fixture,
      TestFixtureContructorLong,
      test_second_order_bpr,
      TestFixtureDestructor
      );
    g_test_add_func("/sgd/class/generated data",
            test_sgd_classification_generated_data);
    g_test_add_func("/sgd/reg/generated data",
            test_sgd_generated_data);
    g_test_add_func("/sgd/bpr/generated data",
            test_sgd_bpr_generated_data);

    g_test_add_func("/sgd/util/extract_gradient",
            test_extract_gradient);
    g_test_add_func("/sgd/util/l2_penalty",
            test_l2_penalty);
    g_test_add("/sgd/reg/gradient check",
        TestFixture_T,
        &Fixture,
        TestFixtureContructorWide,
        test_gradient_check_reg,
        TestFixtureDestructor
        );
    g_test_add("/sgd/class/gradient check",
        TestFixture_T,
        &Fixture,
        TestFixtureContructorWide,
        test_gradient_check_class,
        TestFixtureDestructor
        );
    g_test_add("/sgd/bpr/gradient check",
        TestFixture_T,
        &Fixture,
        TestFixtureContructorWide,
        test_gradient_check_bpr,
        TestFixtureDestructor
        );
    return g_test_run();
}
Ejemplo n.º 2
0
void ffm_predict(double *w_0, double *w, double *V, cs *X, double *y_pred, int k) {
    int n_samples = X->m;
    int n_features = X->n;
    ffm_vector ffm_w = {.size=n_features, .data=w, .owner=0};
    ffm_matrix ffm_V = {.size0=k, .size1=n_features, .data=V, .owner=0};
    ffm_coef coef = {.w_0=*w_0, .w=&ffm_w, .V=&ffm_V};

    ffm_vector ffm_y_pred = {.size=n_samples, .data=y_pred, .owner=0};
    sparse_predict(&coef, X, &ffm_y_pred);
}

void ffm_als_fit(double *w_0, double *w, double *V, cs *X, double *y,
                 ffm_param *param) {
    param->SOLVER = SOLVER_ALS;
    int n_samples = X->m;
    int n_features = X->n;

    ffm_vector ffm_w = {.size=n_features, .data=w, .owner=0};
    ffm_matrix ffm_V = {.size0=param->k, .size1=n_features, .data=V, .owner=0};
    ffm_coef coef = {.w_0=*w_0, .w=&ffm_w, .V=&ffm_V,
                     .lambda_w=param->init_lambda_w
                    };
    if (param->k > 0)
    {
        coef.lambda_V = ffm_vector_alloc(param->k);
        coef.mu_V = ffm_vector_alloc(param->k);
        ffm_vector_set_all(coef.lambda_V, param->init_lambda_V);
    } else
    {
        coef.lambda_V = NULL;
        coef.mu_V = NULL;
    }

    ffm_vector ffm_y = {.size=n_samples, .data=y, .owner=0};
    sparse_fit(&coef, X, NULL, &ffm_y, NULL, *param);

    // copy the last coef values back into the python memory
    *w_0 = coef.w_0;
    ffm_vector_free_all(coef.lambda_V, coef.mu_V);
}


void ffm_mcmc_fit_predict(double *w_0, double *w, double *V,
                          cs *X_train, cs *X_test, double *y_train, double *y_pred,
                          ffm_param *param) {
    param->SOLVER = SOLVER_MCMC;
    int k = param->k;
    double * hyper_param = param->hyper_param;
    int n_test_samples = X_test->m;
    int n_train_samples = X_train->m;
    int n_features = X_train->n;
    ffm_vector ffm_w = {.size=n_features, .data=w, .owner=0};
    ffm_matrix ffm_V = {.size0=param->k, .size1=n_features, .data=V, .owner=0};
    ffm_coef coef = {.w_0=*w_0, .w=&ffm_w, .V=&ffm_V,
                     .lambda_w=param->init_lambda_w, .alpha=1, .mu_w=0
                    };
    if (k > 0)
    {
        coef.lambda_V = ffm_vector_alloc(param->k);
        coef.mu_V = ffm_vector_alloc(param->k);
    }
    else
    {
        coef.lambda_V = NULL;
        coef.mu_V = NULL;
    }

    // set inital values for hyperparameter
    int w_groups = 1;
    assert(param->n_hyper_param == 1 + 2 * k + 2 * w_groups &&
           "hyper_parameter vector has wrong size");
    if (param->warm_start)
    {
        coef.alpha = hyper_param[0];
        coef.lambda_w = hyper_param[1];
        // copy V lambda's over
        for (int i=0; i<k; i++) ffm_vector_set(coef.lambda_V, i,
                                                   hyper_param[i + 1 + w_groups]);
        coef.mu_w = hyper_param[k + 1 + w_groups];
        // copy V mu's over
        for (int i=0; i<k; i++) ffm_vector_set(coef.mu_V, i,
                                                   hyper_param[i + 1 + (2 * w_groups) + k]);
    }

    ffm_vector ffm_y_train = {.size=n_train_samples, .data=y_train, .owner=0};
    ffm_vector ffm_y_pred = {.size=n_test_samples, .data=y_pred, .owner=0};
    sparse_fit(&coef, X_train, X_test, &ffm_y_train, &ffm_y_pred, *param);
    // copy the last coef values back into the python memory
    *w_0 = coef.w_0;

    // copy current hyperparameter back
    hyper_param[0] = coef.alpha;
    hyper_param[1] = coef.lambda_w;
    // copy V lambda's back
    for (int i=0; i<k; i++) hyper_param[i + 1 + w_groups] =
            ffm_vector_get(coef.lambda_V, i);
    hyper_param[k + 1 + w_groups] = coef.mu_w;
    // copy mu's back
    for (int i=0; i<k; i++) hyper_param[i + 1 + (2 * w_groups) + k] =
            ffm_vector_get(coef.mu_V, i);

    if ( k > 0 )
        ffm_vector_free_all(coef.lambda_V, coef.mu_V);
}


void ffm_sgd_bpr_fit(double *w_0, double *w, double *V,
                     cs *X, double *pairs, int n_pairs, ffm_param *param) {

    int n_features = X->m;
    ffm_vector ffm_w = {.size=n_features, .data=w, .owner=0};
    ffm_matrix ffm_V = {.size0=param->k, .size1=n_features, .data=V, .owner=0};
    ffm_coef coef = {.w_0=*w_0, .w=&ffm_w, .V=&ffm_V,
                     .lambda_w=param->init_lambda_w
                    };
    if (param->k > 0)
    {
        coef.lambda_V = ffm_vector_alloc(param->k);
        coef.mu_V = ffm_vector_alloc(param->k);
    }
    else
    {
        coef.lambda_V = NULL;
        coef.mu_V = NULL;
    }

    ffm_matrix ffm_y = {.size0=n_pairs, .size1=2, .data=pairs, .owner=0};
    ffm_fit_sgd_bpr(&coef, X, &ffm_y, *param);

    // copy the last coef values back into the python memory
    *w_0 = coef.w_0;
    if ( param->k > 0 )
        ffm_vector_free_all(coef.lambda_V, coef.mu_V);
}

void ffm_sgd_fit(double *w_0, double *w, double *V,
                 cs *X, double *y, ffm_param *param) {
    int n_samples = X->n;
    int n_features = X->m;

    ffm_vector ffm_w = {.size=n_features, .data=w, .owner=0};
    ffm_matrix ffm_V = {.size0=param->k, .size1=n_features, .data=V, .owner=0};
    ffm_coef coef = {.w_0=*w_0, .w=&ffm_w, .V=&ffm_V,
                     .lambda_w=param->init_lambda_w
                    };
    if (param->k > 0)
    {
        coef.lambda_V = ffm_vector_alloc(param->k);
        coef.mu_V = ffm_vector_alloc(param->k);
    }
    else
    {
        coef.lambda_V = NULL;
        coef.mu_V = NULL;
    }

    ffm_vector ffm_y = {.size=n_samples, .data=y, .owner=0};
    ffm_fit_sgd(&coef, X, &ffm_y, param);

    // copy the last coef values back into the python memory
    *w_0 = coef.w_0;
    if ( param->k > 0 )
        ffm_vector_free_all(coef.lambda_V, coef.mu_V);
}
Ejemplo n.º 3
0
void test_first_order_sgd(TestFixture_T* pFix, gconstpointer pg){

    //int k = pFix->coef->V->size0;
    int k = 0;
    int n_features = pFix->X->n;
    int n_iter = 50;
    double init_sigma = .1;
    double step_size = .002;

    ffm_coef *coef =  alloc_fm_coef(n_features, k, false);

    ffm_vector * y_pred = ffm_vector_calloc(5);
    ffm_param param = {.n_iter=n_iter * 100, .init_sigma=init_sigma,
        .stepsize=step_size, .SOLVER=SOLVER_SGD, .TASK=TASK_REGRESSION};
    param.init_lambda_w = 0.5;
    ffm_fit_sgd(coef, pFix->X_t, pFix->y, &param);
    sparse_predict(coef, pFix->X, y_pred);

    g_assert_cmpfloat(ffm_r2_score(y_pred, pFix->y), > , .85);

    ffm_vector * y_pred_als = ffm_vector_calloc(5);
    ffm_coef *coef_als =  alloc_fm_coef(n_features, k, false);
    ffm_param param_als = {.n_iter=50, .init_sigma=0.1, .SOLVER=SOLVER_ALS,
    .TASK=TASK_REGRESSION};
    param_als.init_lambda_w = 3.5;
    sparse_fit(coef_als, pFix->X, pFix->X, pFix->y, y_pred_als, param_als);
    sparse_predict(coef_als, pFix->X, y_pred_als);

    // compare fit of als and sgd
    g_assert_cmpfloat(ffm_r2_score(y_pred, y_pred_als), > , .98);
    // compare coef of als and sgd
    g_assert_cmpfloat(ffm_r2_score(coef->w, coef_als->w), > , .98);

    ffm_vector_free_all(y_pred, y_pred_als);
    free_ffm_coef(coef);
    free_ffm_coef(coef_als);
}

void test_second_order_sgd(TestFixture_T* pFix, gconstpointer pg){

    int n_features = pFix->X->n;
    int k = 2; int n_iter = 10;
    double init_sigma = .01;
    double step_size = .0002;

    ffm_coef *coef =  alloc_fm_coef(n_features, k, false);

    ffm_vector * y_pred = ffm_vector_calloc(5);
    ffm_param param = {.n_iter=n_iter * 100, .init_sigma=init_sigma,
        .stepsize=step_size, .SOLVER=SOLVER_SGD, .TASK=TASK_REGRESSION};
    param.init_lambda_w = 0.5;
    param.init_lambda_V = 50.5;
    ffm_fit_sgd(coef, pFix->X_t, pFix->y, &param);
    sparse_predict(coef, pFix->X, y_pred);

    g_assert_cmpfloat(ffm_r2_score(y_pred, pFix->y), > , .98);

    ffm_vector * y_pred_als = ffm_vector_calloc(5);
    ffm_coef *coef_als =  alloc_fm_coef(n_features, k, false);

    ffm_param param_als = {.n_iter=10, .init_sigma=0.01, .SOLVER=SOLVER_ALS};
    param_als.init_lambda_w = 3.5;
    param_als.init_lambda_V = 50.5;
    sparse_fit(coef_als, pFix->X, pFix->X, pFix->y, y_pred_als, param_als);
    sparse_predict(coef_als, pFix->X, y_pred_als);

    // compare fit of als and sgd
    g_assert_cmpfloat(ffm_r2_score(y_pred, y_pred_als), > , .98);

    ffm_vector_free_all(y_pred, y_pred_als);
    free_ffm_coef(coef);
    free_ffm_coef(coef_als);
}


void test_sgd_classification(TestFixture_T* pFix, gconstpointer pg){

    int n_features = pFix->X->n;
    int k = 2;
    int n_iter = 10;
    double init_sigma = .01;
    double step_size = .0002;

    // map to classification problem
    ffm_vector_make_labels(pFix->y);

    ffm_coef *coef =  alloc_fm_coef(n_features, k, false);

    ffm_vector * y_pred = ffm_vector_calloc(5);
    ffm_param param = {.n_iter=n_iter * 100, .init_sigma=init_sigma,
        .stepsize=step_size, .SOLVER=SOLVER_SGD, .TASK=TASK_CLASSIFICATION};
    param.init_lambda_w = 0.5;
    param.init_lambda_V = 0.5;
    ffm_fit_sgd(coef, pFix->X_t, pFix->y, &param);
    sparse_predict(coef, pFix->X, y_pred);
    for(int i=0; i< y_pred->size; i++)
        ffm_vector_set(y_pred, i, ffm_sigmoid(ffm_vector_get(y_pred, i)));

    g_assert_cmpfloat(ffm_vector_accuracy(pFix->y, y_pred), >= , .8);

    ffm_vector_free(y_pred);
    free_ffm_coef(coef);
}

void test_first_order_bpr(TestFixture_T* pFix, gconstpointer pg){

    int n_features = pFix->X->n;
    int n_samples = pFix->X->m;
    int k = 0;
    int n_iter = 200;
    double init_sigma = .01;
    double step_size = .002;

    ffm_matrix *compares = ffm_vector_to_rank_comparision(pFix->y);
    ffm_vector * true_order = ffm_vector_get_order(pFix->y);

    ffm_coef *coef =  alloc_fm_coef(n_features, k, false);
    for(int i=0; i< 2; i++)
        coef->w->data[i] = 0.1;

    ffm_vector * y_pred = ffm_vector_calloc(n_samples);
    ffm_param param = {.n_iter=n_iter * 100, .init_sigma=init_sigma,
        .stepsize=step_size, .SOLVER=SOLVER_SGD, .TASK=TASK_RANKING};
    param.init_lambda_w = 0.0;
    ffm_fit_sgd_bpr(coef, pFix->X_t, compares, param);
    sparse_predict(coef, pFix->X, y_pred);
    ffm_vector * pred_order = ffm_vector_get_order(y_pred);
    double kendall_tau = \
            ffm_vector_kendall_tau(true_order, pred_order);
    g_assert_cmpfloat(kendall_tau, == , 1);

    ffm_vector_free_all(y_pred, true_order, pred_order);
    free_ffm_coef(coef);
}

void test_update_second_order_bpr(TestFixture_T* pFix, gconstpointer pg){
    double cache_p = 1.1;
    double cache_n = 2.2;
    double y_err = -1;
    double step_size = 0.1;
    double lambda_V = 4;

    int sample_row_p = 1;
    int sample_row_n = 0;
    int V_col = 0;
    update_second_order_bpr(pFix->X_t, pFix->coef->V, cache_n, cache_p, y_err,
            step_size, lambda_V, sample_row_p, sample_row_n, V_col);

    // 1 - 0.1*(-1 * (4*1.1 - 4^2 - (1*2.2 - 1^2*1)) + 4 *1) = -0.68
    g_assert_cmpfloat(fabs(ffm_matrix_get(pFix->coef->V, 0, 0) - (-0.68)), < , 1e-10);

    //> 2 - 0.1*(-1 * (0*1.1 - 0^2*2 - (2*2.2 - 2^2*2)) + 4 *2)
    //[1] 1.56
    g_assert_cmpfloat(ffm_matrix_get(pFix->coef->V, 0, 1), ==, 1.56);
}

void test_second_order_bpr(TestFixture_T* pFix, gconstpointer pg){

    int n_features = pFix->X->n;
    int n_samples = pFix->X->m;
    int k = 2;
    int n_iter = 200;
    double init_sigma = .01;
    double step_size = .02;

    ffm_matrix *compares = ffm_vector_to_rank_comparision(pFix->y);
    ffm_vector * true_order = ffm_vector_get_order(pFix->y);

    ffm_coef *coef =  alloc_fm_coef(n_features, k, false);

    ffm_vector * y_pred = ffm_vector_calloc(n_samples);
    ffm_param param = {.n_iter=n_iter, .init_sigma=init_sigma,
        .stepsize=step_size, .SOLVER=SOLVER_SGD, .TASK=TASK_RANKING};
    param.init_lambda_w = 0.5;
    param.init_lambda_V = 0.5;
    ffm_fit_sgd_bpr(coef, pFix->X_t, compares, param);

    sparse_predict(coef, pFix->X, y_pred);
    ffm_vector * pred_order = ffm_vector_get_order(y_pred);
    double kendall_tau = \
            ffm_vector_kendall_tau(true_order, pred_order);
    g_assert_cmpfloat(kendall_tau, == , 1);

    ffm_vector_free_all(y_pred, true_order, pred_order);
    free_ffm_coef(coef);
}

void test_sgd_generated_data(void){

    int n_features = 10;
    int n_samples = 100;
    int k = 0;
    TestFixture_T *data = makeTestFixture(124, n_samples, n_features, k);
    ffm_vector * y_pred = ffm_vector_calloc(n_samples);

    int n_iter = 40;
    double init_sigma = 0.1;
    double step_size = .05;

    ffm_coef *coef =  alloc_fm_coef(n_features, k, false);

    ffm_param param = {.n_iter=n_iter * 100, .init_sigma=init_sigma,
        .stepsize=step_size, .SOLVER=SOLVER_SGD, .TASK=TASK_REGRESSION};
    param.init_lambda_w = 0.05;
    ffm_fit_sgd(coef, data->X_t, data->y, &param);
    sparse_predict(coef, data->X, y_pred);

    g_assert_cmpfloat(ffm_r2_score(y_pred, data->y), > , 0.95);

    ffm_vector_free(y_pred);
    free_ffm_coef(coef);
    TestFixtureDestructor(data, NULL);
}

void test_sgd_classification_generated_data(void){

    int n_features = 10;
    int n_samples = 100;
    int k = 2;
    TestFixture_T *data = makeTestFixture(124, n_samples, n_features, k);
    ffm_vector_make_labels(data->y);
    ffm_vector * y_pred = ffm_vector_calloc(n_samples);

    int n_iter = 200;
    double init_sigma = 0.1;
    double step_size = .2;

    ffm_coef *coef =  alloc_fm_coef(n_features, k, false);

    ffm_param param = {.n_iter=n_iter, .init_sigma=init_sigma,
        .stepsize=step_size, .SOLVER=SOLVER_SGD, .TASK=TASK_CLASSIFICATION};
    param.init_lambda_w = 0.05;
    param.init_lambda_V = 0.05;

    ffm_fit_sgd(coef, data->X_t, data->y, &param);
    sparse_predict(coef, data->X, y_pred);
    for(int i=0; i< y_pred->size; i++)
        ffm_vector_set(y_pred, i, ffm_sigmoid(ffm_vector_get(y_pred, i)));

    g_assert_cmpfloat(ffm_vector_accuracy(data->y, y_pred), >= , .81);

    ffm_vector_free(y_pred);
    free_ffm_coef(coef);
    TestFixtureDestructor(data, NULL);
}