Пример #1
0
void test_l2_penalty(){
    int n_features = 2;
    int k = 1;
    ffm_coef *coef =  alloc_fm_coef(n_features, k, false);
    ffm_vector_set(coef->w, 0, 1);
    ffm_vector_set(coef->w, 1, 2);
    ffm_matrix_set(coef->V, 0, 0, 3); ffm_matrix_set(coef->V, 0, 1, 4);

    coef->lambda_w = 0.5;
    double lambda_V_all = 0.5;
    ffm_vector_set_all(coef->lambda_V, lambda_V_all);

    double true_loss =  coef->lambda_w * 5 + lambda_V_all * 25;
    double loss = l2_penalty(coef);
    g_assert_cmpfloat(true_loss, == , loss);
    free_ffm_coef(coef);
}
Пример #2
0
void test_extract_gradient(){

    int n_features = 3;
    int k = 2;
    double stepsize = .5;

    ffm_coef *coef_t0 =  alloc_fm_coef(n_features, k, false);
    coef_t0->w_0 = 0.5;
    ffm_vector_set(coef_t0->w, 0, 1);
    ffm_vector_set(coef_t0->w, 1, 2);
    ffm_vector_set(coef_t0->w, 2, 3);
    ffm_matrix_set(coef_t0->V, 0, 0, 4); ffm_matrix_set(coef_t0->V, 1, 0, 5);
    ffm_matrix_set(coef_t0->V, 0, 1, 6); ffm_matrix_set(coef_t0->V, 1, 1, 7);
    ffm_matrix_set(coef_t0->V, 0, 2, 8); ffm_matrix_set(coef_t0->V, 1, 2, 9);

    ffm_coef *coef_t1 =  alloc_fm_coef(n_features, k, false);

    ffm_coef * grad = extract_gradient(coef_t0, coef_t1, stepsize);

    g_assert_cmpfloat(coef_t0->w_0 , == , grad->w_0 * - stepsize );
    // check w grad
    for (int i=0; i< n_features; i++)
        g_assert_cmpfloat(ffm_vector_get(coef_t0->w, i) , == ,
                ffm_vector_get(grad->w, i) * stepsize );
    // check V grad
    for (int i=0; i< k; i++)
        for (int j=0; j< n_features; j++)
            g_assert_cmpfloat(ffm_matrix_get(coef_t0->V, i, j), == ,
                    ffm_matrix_get(grad->V, i, j) * stepsize);

    free_ffm_coef(coef_t0);
    free_ffm_coef(coef_t1);
    free_ffm_coef(grad);
}
Пример #3
0
void test_sparse_als_zero_order_only(TestFixture_T *pFix, gconstpointer pg) {
  int n_features = pFix->X->n;
  int k = 0;
  ffm_param param = {.n_iter = 1,
                     .warm_start = true,
                     .ignore_w = true,
                     .init_sigma = 0.1,
                     .SOLVER = SOLVER_ALS,
                     .TASK = TASK_REGRESSION};

  ffm_coef *coef = alloc_fm_coef(n_features, k, true);
  param.init_lambda_w = 0;

  sparse_fit(coef, pFix->X, NULL, pFix->y, NULL, param);
  // g_assert_cmpfloat(4466.666666, ==, coef->w_0);
  g_assert_cmpfloat(fabs(4466.666666 - coef->w_0), <, 1e-6);

  free_ffm_coef(coef);
}

void test_sparse_als_first_order_only(TestFixture_T *pFix, gconstpointer pg) {
  int n_features = pFix->X->n;
  int k = 0;
  ffm_param param = {.n_iter = 1,
                     .warm_start = true,
                     .ignore_w_0 = true,
                     .init_sigma = 0.1,
                     .SOLVER = SOLVER_ALS,
                     .TASK = TASK_REGRESSION};

  ffm_coef *coef = alloc_fm_coef(n_features, k, false);
  coef->w_0 = 0;
  param.init_lambda_w = 0;

  ffm_vector_set(coef->w, 0, 10);
  ffm_vector_set(coef->w, 1, 20);

  sparse_fit(coef, pFix->X, NULL, pFix->y, NULL, param);
  // hand calculated results 1660.57142857   -11.87755102
  g_assert_cmpfloat(fabs(1660.57142857 - ffm_vector_get(coef->w, 0)), <, 1e-8);
  g_assert_cmpfloat(fabs(-11.87755102 - ffm_vector_get(coef->w, 1)), <, 1e-8);

  free_ffm_coef(coef);
}

void test_sparse_als_second_order_only(TestFixture_T *pFix, gconstpointer pg) {
  int n_features = pFix->X->n;
  int k = 1;
  ffm_param param = {.n_iter = 1,
                     .warm_start = true,
                     .ignore_w_0 = true,
                     .ignore_w = true,
                     .init_sigma = 0.1,
                     .SOLVER = SOLVER_ALS,
                     .TASK = TASK_REGRESSION};

  ffm_coef *coef = alloc_fm_coef(n_features, k, false);
  coef->w_0 = 0;

  param.init_lambda_w = 0;
  param.init_lambda_V = 0;

  ffm_matrix_set(coef->V, 0, 0, 300);
  ffm_matrix_set(coef->V, 0, 1, 400);

  sparse_fit(coef, pFix->X, NULL, pFix->y, NULL, param);
  // hand calculated results  0.79866412  400.
  g_assert_cmpfloat(fabs(0.79866412 - ffm_matrix_get(coef->V, 0, 0)), <, 1e-8);
  g_assert_cmpfloat(fabs(400 - ffm_matrix_get(coef->V, 0, 1)), <, 1e-8);

  free_ffm_coef(coef);
}

void test_sparse_als_all_interactions(TestFixture_T *pFix, gconstpointer pg) {
  int n_features = pFix->X->n;
  int k = 1;
  ffm_param param = {.n_iter = 1,
                     .warm_start = true,
                     .ignore_w_0 = false,
                     .ignore_w = false,
                     .init_sigma = 0.1,
                     .SOLVER = SOLVER_ALS,
                     .TASK = TASK_REGRESSION};

  ffm_coef *coef = alloc_fm_coef(n_features, k, false);
  coef->w_0 = 0;

  ffm_vector_set(coef->w, 0, 10);
  ffm_vector_set(coef->w, 1, 20);

  ffm_matrix_set(coef->V, 0, 0, 300);
  ffm_matrix_set(coef->V, 0, 1, 400);

  sparse_fit(coef, pFix->X, NULL, pFix->y, NULL, param);
  // hand calculated results checked with libfm
  g_assert_cmpfloat(fabs(-1755643.33333 - coef->w_0), <, 1e-5);
  g_assert_cmpfloat(fabs(-191459.71428571 - ffm_vector_get(coef->w, 0)), <,
                    1e-6);
  g_assert_cmpfloat(fabs(30791.91836735 - ffm_vector_get(coef->w, 1)), <, 1e-6);
  g_assert_cmpfloat(fabs(253.89744249 - ffm_matrix_get(coef->V, 0, 0)), <,
                    1e-6);
  g_assert_cmpfloat(fabs(400 - ffm_matrix_get(coef->V, 0, 1)), <, 1e-6);

  param.n_iter = 99;
  sparse_fit(coef, pFix->X, NULL, pFix->y, NULL, param);

  g_assert_cmpfloat(fabs(210911.940403 - coef->w_0), <, 1e-7);
  g_assert_cmpfloat(fabs(-322970.68313639 - ffm_vector_get(coef->w, 0)), <,
                    1e-6);
  g_assert_cmpfloat(fabs(51927.60978978 - ffm_vector_get(coef->w, 1)), <, 1e-6);
  g_assert_cmpfloat(fabs(94.76612018 - ffm_matrix_get(coef->V, 0, 0)), <, 1e-6);
  g_assert_cmpfloat(fabs(400 - ffm_matrix_get(coef->V, 0, 1)), <, 1e-6);

  free_ffm_coef(coef);
}

void test_sparse_als_first_order_interactions(TestFixture_T *pFix,
                                              gconstpointer pg) {
  ffm_vector *y_pred = ffm_vector_calloc(5);

  int n_features = pFix->X->n;
  int k = 0;
  ffm_coef *coef = alloc_fm_coef(n_features, k, false);
  ffm_param param = {.n_iter = 500,
                     .init_sigma = 0.1,
                     .SOLVER = SOLVER_ALS,
                     .TASK = TASK_REGRESSION};
  sparse_fit(coef, pFix->X, NULL, pFix->y, NULL, param);
  sparse_predict(coef, pFix->X, y_pred);

  /* reference values from sklearn LinearRegression
  y_pred:  [ 321.05084746  346.6779661   -40.15254237  321.05084746
  790.37288136]
  coef: [  69.6779661   152.16949153]
  mse: 3134.91525424 */
  g_assert_cmpfloat(fabs(321.05084746 - ffm_vector_get(y_pred, 0)), <, 1e-6);
  g_assert_cmpfloat(fabs(346.6779661 - ffm_vector_get(y_pred, 1)), <, 1e-6);
  g_assert_cmpfloat(fabs(-40.15254237 - ffm_vector_get(y_pred, 2)), <, 1e-6);
  g_assert_cmpfloat(fabs(321.05084746 - ffm_vector_get(y_pred, 3)), <, 1e-6);
  g_assert_cmpfloat(fabs(790.37288136 - ffm_vector_get(y_pred, 4)), <, 1e-6);

  ffm_vector_free(y_pred);
  free_ffm_coef(coef);
}

void test_sparse_als_second_interactions(TestFixture_T *pFix,
                                         gconstpointer pg) {
  ffm_vector *y_pred = ffm_vector_calloc(5);

  int n_features = pFix->X->n;
  int k = 2;
  ffm_coef *coef = alloc_fm_coef(n_features, k, false);
  ffm_param param = {.n_iter = 1000, .init_sigma = 0.1, .SOLVER = SOLVER_ALS};
  sparse_fit(coef, pFix->X, NULL, pFix->y, NULL, param);
  sparse_predict(coef, pFix->X, y_pred);

  /* reference values from sklearn LinearRegression
  y_pred: [ 298.  266.   29.  298.  848.]
  coeff: [  9.   2.  40.]
  mse: 4.53374139449e-27 */
  g_assert_cmpfloat(fabs(298 - ffm_vector_get(y_pred, 0)), <, 1e-4);
  g_assert_cmpfloat(fabs(266 - ffm_vector_get(y_pred, 1)), <, 1e-4);
  g_assert_cmpfloat(fabs(29 - ffm_vector_get(y_pred, 2)), <, 1e-3);
  g_assert_cmpfloat(fabs(298 - ffm_vector_get(y_pred, 3)), <, 1e-4);
  g_assert_cmpfloat(fabs(848.0 - ffm_vector_get(y_pred, 4)), <, 1e-4);

  ffm_vector_free(y_pred);
  free_ffm_coef(coef);
}

void test_sparse_mcmc_second_interactions(TestFixture_T *pFix,
                                          gconstpointer pg) {
  int n_features = pFix->X->n;
  int n_samples = pFix->X->m;
  int k = 2;
  ffm_coef *coef = alloc_fm_coef(n_features, k, false);
  ffm_vector *y_pred = ffm_vector_calloc(n_samples);
  ffm_param param = {.n_iter = 100,
                     .init_sigma = 0.1,
                     .SOLVER = SOLVER_MCMC,
                     .TASK = TASK_REGRESSION,
                     .rng_seed = 1234};
  sparse_fit(coef, pFix->X, pFix->X, pFix->y, y_pred, param);

  g_assert_cmpfloat(ffm_r2_score(pFix->y, y_pred), >, .98);

  ffm_vector_free(y_pred);
  free_ffm_coef(coef);
}

void test_sparse_mcmc_second_interactions_classification(TestFixture_T *pFix,
                                                         gconstpointer pg) {
  int n_features = pFix->X->n;
  int n_samples = pFix->X->m;
  int k = 2;
  ffm_vector_make_labels(pFix->y);
  ffm_coef *coef = alloc_fm_coef(n_features, k, false);
  ffm_vector *y_pred = ffm_vector_calloc(n_samples);
  ffm_param param = {.n_iter = 10,
                     .init_sigma = 0.1,
                     .SOLVER = SOLVER_MCMC,
                     .TASK = TASK_CLASSIFICATION};
  sparse_fit(coef, pFix->X, pFix->X, pFix->y, y_pred, param);

  g_assert_cmpfloat(ffm_vector_accuracy(pFix->y, y_pred), >=, .98);

  ffm_vector_free(y_pred);
  free_ffm_coef(coef);
}

void test_train_test_of_different_size(TestFixture_T *pFix, gconstpointer pg) {
  int n_features = pFix->X->n;
  int k = 2;

  int n_samples_short = 3;
  int m = n_samples_short;
  int n = n_features;
  cs *X = cs_spalloc(m, n, m * n, 1, 1); /* create triplet identity matrix */
  cs_entry(X, 0, 0, 6);
  cs_entry(X, 0, 1, 1);
  cs_entry(X, 1, 0, 2);
  cs_entry(X, 1, 1, 3);
  cs_entry(X, 2, 0, 3);
  cs *X_csc = cs_compress(X); /* A = compressed-column form of T */
  cs *X_t = cs_transpose(X_csc, 1);
  cs_spfree(X);

  ffm_vector *y = ffm_vector_calloc(n_samples_short);
  // y [ 298 266 29 298 848 ]
  y->data[0] = 298;
  y->data[1] = 266;
  y->data[2] = 29;

  ffm_coef *coef = alloc_fm_coef(n_features, k, false);
  ffm_vector *y_pred = ffm_vector_calloc(n_samples_short);

  ffm_param param = {.n_iter = 20, .init_sigma = 0.01};
  // test: train > test

  param.SOLVER = SOLVER_ALS;
  sparse_fit(coef, pFix->X, NULL, pFix->y, NULL, param);
  sparse_predict(coef, X_csc, y_pred);
  param.TASK = TASK_CLASSIFICATION;
  sparse_fit(coef, pFix->X, NULL, pFix->y, NULL, param);
  sparse_predict(coef, X_csc, y_pred);

  param.SOLVER = SOLVER_MCMC;
  param.TASK = TASK_CLASSIFICATION;
  sparse_fit(coef, pFix->X, X_csc, pFix->y, y_pred, param);
  param.TASK = TASK_REGRESSION;
  sparse_fit(coef, pFix->X, X_csc, pFix->y, y_pred, param);

  // test: train < test
  param.SOLVER = SOLVER_MCMC;
  param.TASK = TASK_CLASSIFICATION;
  sparse_fit(coef, X_csc, pFix->X, y_pred, pFix->y, param);
  param.TASK = TASK_REGRESSION;
  sparse_fit(coef, X_csc, pFix->X, y_pred, pFix->y, param);

  param.SOLVER = SOLVER_ALS;
  sparse_fit(coef, X_csc, NULL, y_pred, NULL, param);
  sparse_predict(coef, pFix->X, pFix->y);
  param.TASK = TASK_CLASSIFICATION;
  sparse_fit(coef, X_csc, NULL, y_pred, NULL, param);
  sparse_predict(coef, pFix->X, pFix->y);

  ffm_vector_free(y_pred);
  free_ffm_coef(coef);
  cs_spfree(X_t);
  cs_spfree(X_csc);
}

void test_sparse_als_generated_data(void) {
  int n_features = 10;
  int n_samples = 100;
  int k = 2;

  TestFixture_T *data = makeTestFixture(124, n_samples, n_features, k);

  ffm_vector *y_pred = ffm_vector_calloc(n_samples);

  ffm_coef *coef = alloc_fm_coef(n_features, k, false);
  ffm_param param = {.n_iter = 50, .init_sigma = 0.01, .SOLVER = SOLVER_ALS};
  param.init_lambda_w = 23.5;
  param.init_lambda_V = 23.5;
  sparse_fit(coef, data->X, NULL, data->y, NULL, param);
  sparse_predict(coef, data->X, y_pred);

  g_assert_cmpfloat(ffm_r2_score(data->y, y_pred), >, 0.85);

  ffm_vector_free(y_pred);
  free_ffm_coef(coef);
  TestFixtureDestructor(data, NULL);
}

void test_hyerparameter_sampling(void) {
  ffm_rng *rng = ffm_rng_seed(12345);

  int n_features = 20;
  int n_samples = 150;
  int k = 1;  // don't just change k, the rank is hard coded in the test
              // (ffm_vector_get(coef->lambda_V, 0);)

  int n_replication = 40;
  int n_draws = 1000;
  ffm_vector *alpha_rep = ffm_vector_calloc(n_replication);
  ffm_vector *lambda_w_rep = ffm_vector_calloc(n_replication);
  ffm_vector *lambda_V_rep = ffm_vector_calloc(n_replication);
  ffm_vector *mu_w_rep = ffm_vector_calloc(n_replication);
  ffm_vector *mu_V_rep = ffm_vector_calloc(n_replication);
  ffm_vector *err = ffm_vector_alloc(n_samples);

  for (int j = 0; j < n_replication; j++) {
    TestFixture_T *data = makeTestFixture(124, n_samples, n_features, k);
    ffm_coef *coef = data->coef;

    sparse_predict(coef, data->X, err);
    ffm_vector_scale(err, -1);
    ffm_vector_add(err, data->y);

    // make sure that distribution is converged bevore selecting
    // reference / init values
    for (int l = 0; l < 50; l++) sample_hyper_parameter(coef, err, rng);

    double alpha_init = coef->alpha;
    double lambda_w_init = coef->lambda_w;
    double lambda_V_init = ffm_vector_get(coef->lambda_V, 0);
    double mu_w_init = coef->mu_w;
    double mu_V_init = ffm_vector_get(coef->mu_V, 0);

    double alpha_count = 0;
    double lambda_w_count = 0, lambda_V_count = 0;
    double mu_w_count = 0, mu_V_count = 0;

    for (int l = 0; l < n_draws; l++) {
      sample_hyper_parameter(coef, err, rng);
      if (alpha_init > coef->alpha) alpha_count++;
      if (lambda_w_init > coef->lambda_w) lambda_w_count++;
      if (lambda_V_init > ffm_vector_get(coef->lambda_V, 0)) lambda_V_count++;
      if (mu_w_init > coef->mu_w) mu_w_count++;
      if (mu_V_init > ffm_vector_get(coef->mu_V, 0)) mu_V_count++;
    }
    ffm_vector_set(alpha_rep, j, alpha_count / (n_draws + 1));
    ffm_vector_set(lambda_w_rep, j, lambda_w_count / (n_draws + 1));
    ffm_vector_set(lambda_V_rep, j, lambda_V_count / (n_draws + 1));
    ffm_vector_set(mu_w_rep, j, mu_w_count / (n_draws + 1));
    ffm_vector_set(mu_V_rep, j, mu_V_count / (n_draws + 1));

    TestFixtureDestructor(data, NULL);
  }
  double chi_alpha = 0;
  for (int i = 0; i < n_replication; i++)
    chi_alpha +=
        ffm_pow_2(gsl_cdf_ugaussian_Qinv(ffm_vector_get(alpha_rep, i)));
  g_assert_cmpfloat(gsl_ran_chisq_pdf(chi_alpha, n_replication), <, .05);

  double chi_lambda_w = 0;
  for (int i = 0; i < n_replication; i++)
    chi_lambda_w +=
        ffm_pow_2(gsl_cdf_ugaussian_Qinv(ffm_vector_get(lambda_w_rep, i)));
  g_assert_cmpfloat(gsl_ran_chisq_pdf(chi_lambda_w, n_replication), <, .05);

  double chi_lambda_V = 0;
  for (int i = 0; i < n_replication; i++)
    chi_lambda_V +=
        ffm_pow_2(gsl_cdf_ugaussian_Qinv(ffm_vector_get(lambda_V_rep, i)));
  g_assert_cmpfloat(gsl_ran_chisq_pdf(chi_lambda_V, n_replication), <, .05);

  double chi_mu_w = 0;
  for (int i = 0; i < n_replication; i++)
    chi_mu_w += ffm_pow_2(gsl_cdf_ugaussian_Qinv(ffm_vector_get(mu_w_rep, i)));
  g_assert_cmpfloat(gsl_ran_chisq_pdf(chi_mu_w, n_replication), <, .05);

  double chi_mu_V = 0;
  for (int i = 0; i < n_replication; i++)
    chi_mu_V += ffm_pow_2(gsl_cdf_ugaussian_Qinv(ffm_vector_get(mu_V_rep, i)));
  g_assert_cmpfloat(gsl_ran_chisq_pdf(chi_mu_V, n_replication), <, .05);

  ffm_vector_free_all(alpha_rep, lambda_w_rep, lambda_V_rep, mu_w_rep, mu_V_rep,
                      err);
  ffm_rng_free(rng);
}
Пример #4
0
void test_gradient_check_reg(TestFixture_T* pFix, gconstpointer pg){

    cs *X_crs = pFix->X_t;
    ffm_vector *y = pFix->y;
    int test_sample_row = 0;
    double y_true = ffm_vector_get(y, test_sample_row);
    int n_features = pFix->coef->w->size;

    double eps = 0.0001;

    ffm_param param = {.n_iter=1, .stepsize=.001,
        .init_sigma=.1, .k=2, .init_lambda_w=0.5, .init_lambda_V=1.5,
        .warm_start=1,
        .SOLVER=SOLVER_SGD, .TASK=TASK_REGRESSION, .rng_seed=44};

    ffm_coef *coef_t0 =  alloc_fm_coef(n_features, param.k, false);
    init_ffm_coef(coef_t0, param);

    ffm_coef *coef_t1 =  alloc_fm_coef(n_features, param.k, false);
    init_ffm_coef(coef_t1, param);

    ffm_fit_sgd(coef_t1, X_crs, y, &param);
    ffm_coef * grad = extract_gradient(coef_t0, coef_t1, param.stepsize);


    // check w gradient updates
    for (int i=0; i<n_features; i++)
    {
        // keep copy
        double tmp = ffm_vector_get(coef_t0->w, i);
        // x + eps
        ffm_vector_set(coef_t0->w, i, tmp + eps);
        double y_pred = ffm_predict_sample(coef_t0, X_crs, test_sample_row);
        double sq_loss = 0.5 * pow(y_true - y_pred, 2);
        double l_plus = sq_loss + 0.5 * l2_penalty(coef_t0);
        // x - eps
        ffm_vector_set(coef_t0->w, i, tmp - eps);
        y_pred = ffm_predict_sample(coef_t0, X_crs, test_sample_row);
        sq_loss = 0.5 * pow(y_true - y_pred, 2);
        double l_minus = sq_loss + 0.5 * l2_penalty(coef_t0);
        // restore
        ffm_vector_set(coef_t0->w, i, tmp);
        double grad_i = (l_plus - l_minus) / ( 2 * eps);

        g_assert_cmpfloat(fabs(grad_i - ffm_vector_get(grad->w, i)), < , 1e-10);
    }

    // check V gradient updates
    for (int f=0; f< param.k; f++)
        for (int i=0; i<n_features; i++)
        {
            // keep copy
            double tmp = ffm_matrix_get(coef_t0->V, f, i);
            // x + eps
            ffm_matrix_set(coef_t0->V, f, i, tmp + eps);
            double y_pred = ffm_predict_sample(coef_t0, X_crs, test_sample_row);
            double sq_loss = 0.5 * pow(y_true - y_pred, 2);
            double l_plus = sq_loss + 0.5 * l2_penalty(coef_t0);
            // x - eps
            ffm_matrix_set(coef_t0->V, f, i, tmp - eps);
            y_pred = ffm_predict_sample(coef_t0, X_crs, test_sample_row);
            sq_loss = 0.5 * pow(y_true - y_pred, 2);
            double l_minus = sq_loss + 0.5 * l2_penalty(coef_t0);
            // restore
            ffm_matrix_set(coef_t0->V, f, i, tmp);
            double grad_i = (l_plus - l_minus) / ( 2 * eps);

            g_assert_cmpfloat(fabs(grad_i - ffm_matrix_get(grad->V, f, i)),
                    < , 1e-10);
        }

    free_ffm_coef(coef_t0);
    free_ffm_coef(coef_t1);
    free_ffm_coef(grad);
}


void test_gradient_check_class(TestFixture_T* pFix, gconstpointer pg){

    cs *X_crs = pFix->X_t;
    ffm_vector *y = pFix->y;
    int test_sample_row = 0;
    double y_true = ffm_vector_get(y, test_sample_row);
    int n_features = pFix->coef->w->size;

    double eps = 0.0001;

    ffm_param param = {.n_iter=1, .stepsize=.01,
        .init_sigma=.01, .k=2, .init_lambda_w=1.5, .init_lambda_V=2.0,
        .warm_start=1,
        .SOLVER=SOLVER_SGD, .TASK=TASK_CLASSIFICATION, .rng_seed=44};

    ffm_coef *coef_t0 =  alloc_fm_coef(n_features, param.k, false);
    init_ffm_coef(coef_t0, param);

    ffm_coef *coef_t1 =  alloc_fm_coef(n_features, param.k, false);
    init_ffm_coef(coef_t1, param);

    ffm_fit_sgd(coef_t1, X_crs, y, &param);
    ffm_coef * grad = extract_gradient(coef_t0, coef_t1, param.stepsize);


    // check w gradient updates
    for (int i=0; i<n_features; i++)
    {
        // keep copy
        double tmp = ffm_vector_get(coef_t0->w, i);
        // x + eps
        ffm_vector_set(coef_t0->w, i, tmp + eps);
        double y_pred = ffm_predict_sample(coef_t0, X_crs, test_sample_row);
        double log_loss = - log(ffm_sigmoid(y_true * y_pred));
        double l_plus = log_loss + 0.5 * l2_penalty(coef_t0);
        // x - eps
        ffm_vector_set(coef_t0->w, i, tmp - eps);
        y_pred = ffm_predict_sample(coef_t0, X_crs, test_sample_row);
        log_loss = - log(ffm_sigmoid(y_true * y_pred));
        double l_minus = log_loss + 0.5 * l2_penalty(coef_t0);
        // restore
        ffm_vector_set(coef_t0->w, i, tmp);
        // finite central differences
        double grad_i = (l_plus - l_minus) / ( 2 * eps);

        //g_assert_cmpfloat(grad_i, ==, ffm_vector_get(grad->w, i));
        g_assert_cmpfloat(fabs(grad_i - ffm_vector_get(grad->w, i)), < , 1e-9);
    }

    // check V gradient updates
    for (int f=0; f< param.k; f++)
        for (int i=0; i<n_features; i++)
        {
            // keep copy
            double tmp = ffm_matrix_get(coef_t0->V, f, i);
            // x + eps
            ffm_matrix_set(coef_t0->V, f, i, tmp + eps);
            double y_pred = ffm_predict_sample(coef_t0, X_crs, test_sample_row);
            double log_loss = - log(ffm_sigmoid(y_true * y_pred));
            double l_plus = log_loss + 0.5 * l2_penalty(coef_t0);
            // x - eps
            ffm_matrix_set(coef_t0->V, f, i, tmp - eps);
            y_pred = ffm_predict_sample(coef_t0, X_crs, test_sample_row);
            log_loss = - log(ffm_sigmoid(y_true * y_pred));
            double l_minus = log_loss + 0.5 * l2_penalty(coef_t0);
            // restore
            ffm_matrix_set(coef_t0->V, f, i, tmp);
            // finite central differences
            double grad_i = (l_plus - l_minus) / ( 2 * eps);

            g_assert_cmpfloat(fabs(grad_i - ffm_matrix_get(grad->V, f, i)),
                    < , 1e-10);
        }

    free_ffm_coef(coef_t0);
    free_ffm_coef(coef_t1);
    free_ffm_coef(grad);
}


void test_gradient_check_bpr(TestFixture_T* pFix, gconstpointer pg){

    cs *X_crs = pFix->X_t;
    ffm_matrix * pairs = ffm_matrix_calloc(1, 2);
    int pos_row = 0; ffm_matrix_set(pairs, 0, 0, pos_row);
    int neg_row = 1; ffm_matrix_set(pairs, 0, 1, neg_row);

    int n_features = pFix->coef->w->size;

    double eps = 0.0001;

    ffm_param param = {.n_iter=1, .stepsize=.01,
        .init_sigma=.01, .k=2, .init_lambda_w=0.0, .init_lambda_V=0.0,
        .warm_start=1,
        .SOLVER=SOLVER_SGD, .TASK=TASK_RANKING, .rng_seed=44};


    ffm_coef *coef_t0 =  alloc_fm_coef(n_features, param.k, false);
    init_ffm_coef(coef_t0, param);

    ffm_coef *coef_t1 =  alloc_fm_coef(n_features, param.k, false);
    init_ffm_coef(coef_t1, param);

    ffm_fit_sgd_bpr(coef_t1, X_crs, pairs, param);
    ffm_coef * grad = extract_gradient(coef_t0, coef_t1, param.stepsize);


    double y_pos, y_neg, bpr_loss, l_plus, l_minus, grad_i, tmp;
    // check w gradient updates
    for (int i=0; i<n_features; i++)
    {
        // keep copy
        tmp = ffm_vector_get(coef_t0->w, i);
        // x + eps
        ffm_vector_set(coef_t0->w, i, tmp + eps);
        y_pos = ffm_predict_sample(coef_t0, X_crs, pos_row);
        y_neg = ffm_predict_sample(coef_t0, X_crs, neg_row);
        bpr_loss = - log(ffm_sigmoid(y_pos - y_neg));
        l_plus = bpr_loss + 0.5 * l2_penalty(coef_t0);
        // x - eps
        ffm_vector_set(coef_t0->w, i, tmp - eps);
        y_pos = ffm_predict_sample(coef_t0, X_crs, pos_row);
        y_neg = ffm_predict_sample(coef_t0, X_crs, neg_row);
        bpr_loss = - log(ffm_sigmoid(y_pos - y_neg));
        l_minus = bpr_loss + 0.5 * l2_penalty(coef_t0);
        // restore
        ffm_vector_set(coef_t0->w, i, tmp);
        // finite central differences
        grad_i = (l_plus - l_minus) / ( 2 * eps);

        //g_assert_cmpfloat(grad_i, ==, ffm_vector_get(grad->w, i));
        g_assert_cmpfloat(fabs(grad_i - ffm_vector_get(grad->w, i)), < , 1e-9);
    }

    // check V gradient updates
    for (int f=0; f< param.k; f++)
        for (int i=0; i<n_features; i++)
        {
            // keep copy
            tmp = ffm_matrix_get(coef_t0->V, f, i);
            // x + eps
            ffm_matrix_set(coef_t0->V, f, i, tmp + eps);
            y_pos = ffm_predict_sample(coef_t0, X_crs, pos_row);
            y_neg = ffm_predict_sample(coef_t0, X_crs, neg_row);
            bpr_loss = - log(ffm_sigmoid(y_pos - y_neg));
            l_plus = bpr_loss + 0.5 * l2_penalty(coef_t0);
            // x - eps
            ffm_matrix_set(coef_t0->V, f, i, tmp - eps);
            y_pos = ffm_predict_sample(coef_t0, X_crs, pos_row);
            y_neg = ffm_predict_sample(coef_t0, X_crs, neg_row);
            bpr_loss = - log(ffm_sigmoid(y_pos - y_neg));
            l_minus = bpr_loss + 0.5 * l2_penalty(coef_t0);
            // restore
            ffm_matrix_set(coef_t0->V, f, i, tmp);
            // finite central differences
            grad_i = (l_plus - l_minus) / ( 2 * eps);

            //g_assert_cmpfloat(grad_i, ==, ffm_matrix_get(grad->V, f, i));
            g_assert_cmpfloat(fabs(grad_i - ffm_matrix_get(grad->V, f, i)),
                    < , 1e-10);
        }

    free_ffm_coef(coef_t0);
    free_ffm_coef(coef_t1);
    free_ffm_coef(grad);
}


int main(int argc, char** argv)
{
    /*
    feenableexcept(FE_INVALID   | 
                   FE_DIVBYZERO | 
                   FE_OVERFLOW  | 
                   FE_UNDERFLOW);
    */

    g_test_init(&argc, &argv, NULL);

  TestFixture_T Fixture;
  g_test_add( "/sgd/util/predict sample",
      TestFixture_T,
      &Fixture,
      TestFixtureContructorWide,
      test_sgd_predict_sample,
      TestFixtureDestructor
      );
  g_test_add( "/sgd/reg/first order",
      TestFixture_T,
      &Fixture,
      TestFixtureContructorLong,
      test_first_order_sgd,
      TestFixtureDestructor
      );
  g_test_add( "/sgd/reg/second order",
      TestFixture_T,
      &Fixture,
      TestFixtureContructorLong,
      test_second_order_sgd,
      TestFixtureDestructor
      );
  g_test_add( "/sgd/class/full",
      TestFixture_T,
      &Fixture,
      TestFixtureContructorLong,
      test_sgd_classification,
      TestFixtureDestructor
      );
  g_test_add( "/sgd/bpr/update second order",
      TestFixture_T,
      &Fixture,
      TestFixtureContructorWide,
      test_update_second_order_bpr,
      TestFixtureDestructor
      );
  g_test_add( "/sgd/bpr/first order",
      TestFixture_T,
      &Fixture,
      TestFixtureContructorLong,
      test_first_order_bpr,
      TestFixtureDestructor
      );
  g_test_add( "/sgd/bpr/second order",
      TestFixture_T,
      &Fixture,
      TestFixtureContructorLong,
      test_second_order_bpr,
      TestFixtureDestructor
      );
    g_test_add_func("/sgd/class/generated data",
            test_sgd_classification_generated_data);
    g_test_add_func("/sgd/reg/generated data",
            test_sgd_generated_data);
    g_test_add_func("/sgd/bpr/generated data",
            test_sgd_bpr_generated_data);

    g_test_add_func("/sgd/util/extract_gradient",
            test_extract_gradient);
    g_test_add_func("/sgd/util/l2_penalty",
            test_l2_penalty);
    g_test_add("/sgd/reg/gradient check",
        TestFixture_T,
        &Fixture,
        TestFixtureContructorWide,
        test_gradient_check_reg,
        TestFixtureDestructor
        );
    g_test_add("/sgd/class/gradient check",
        TestFixture_T,
        &Fixture,
        TestFixtureContructorWide,
        test_gradient_check_class,
        TestFixtureDestructor
        );
    g_test_add("/sgd/bpr/gradient check",
        TestFixture_T,
        &Fixture,
        TestFixtureContructorWide,
        test_gradient_check_bpr,
        TestFixtureDestructor
        );
    return g_test_run();
}