void test_l2_penalty(){ int n_features = 2; int k = 1; ffm_coef *coef = alloc_fm_coef(n_features, k, false); ffm_vector_set(coef->w, 0, 1); ffm_vector_set(coef->w, 1, 2); ffm_matrix_set(coef->V, 0, 0, 3); ffm_matrix_set(coef->V, 0, 1, 4); coef->lambda_w = 0.5; double lambda_V_all = 0.5; ffm_vector_set_all(coef->lambda_V, lambda_V_all); double true_loss = coef->lambda_w * 5 + lambda_V_all * 25; double loss = l2_penalty(coef); g_assert_cmpfloat(true_loss, == , loss); free_ffm_coef(coef); }
void test_extract_gradient(){ int n_features = 3; int k = 2; double stepsize = .5; ffm_coef *coef_t0 = alloc_fm_coef(n_features, k, false); coef_t0->w_0 = 0.5; ffm_vector_set(coef_t0->w, 0, 1); ffm_vector_set(coef_t0->w, 1, 2); ffm_vector_set(coef_t0->w, 2, 3); ffm_matrix_set(coef_t0->V, 0, 0, 4); ffm_matrix_set(coef_t0->V, 1, 0, 5); ffm_matrix_set(coef_t0->V, 0, 1, 6); ffm_matrix_set(coef_t0->V, 1, 1, 7); ffm_matrix_set(coef_t0->V, 0, 2, 8); ffm_matrix_set(coef_t0->V, 1, 2, 9); ffm_coef *coef_t1 = alloc_fm_coef(n_features, k, false); ffm_coef * grad = extract_gradient(coef_t0, coef_t1, stepsize); g_assert_cmpfloat(coef_t0->w_0 , == , grad->w_0 * - stepsize ); // check w grad for (int i=0; i< n_features; i++) g_assert_cmpfloat(ffm_vector_get(coef_t0->w, i) , == , ffm_vector_get(grad->w, i) * stepsize ); // check V grad for (int i=0; i< k; i++) for (int j=0; j< n_features; j++) g_assert_cmpfloat(ffm_matrix_get(coef_t0->V, i, j), == , ffm_matrix_get(grad->V, i, j) * stepsize); free_ffm_coef(coef_t0); free_ffm_coef(coef_t1); free_ffm_coef(grad); }
void test_sparse_als_zero_order_only(TestFixture_T *pFix, gconstpointer pg) { int n_features = pFix->X->n; int k = 0; ffm_param param = {.n_iter = 1, .warm_start = true, .ignore_w = true, .init_sigma = 0.1, .SOLVER = SOLVER_ALS, .TASK = TASK_REGRESSION}; ffm_coef *coef = alloc_fm_coef(n_features, k, true); param.init_lambda_w = 0; sparse_fit(coef, pFix->X, NULL, pFix->y, NULL, param); // g_assert_cmpfloat(4466.666666, ==, coef->w_0); g_assert_cmpfloat(fabs(4466.666666 - coef->w_0), <, 1e-6); free_ffm_coef(coef); } void test_sparse_als_first_order_only(TestFixture_T *pFix, gconstpointer pg) { int n_features = pFix->X->n; int k = 0; ffm_param param = {.n_iter = 1, .warm_start = true, .ignore_w_0 = true, .init_sigma = 0.1, .SOLVER = SOLVER_ALS, .TASK = TASK_REGRESSION}; ffm_coef *coef = alloc_fm_coef(n_features, k, false); coef->w_0 = 0; param.init_lambda_w = 0; ffm_vector_set(coef->w, 0, 10); ffm_vector_set(coef->w, 1, 20); sparse_fit(coef, pFix->X, NULL, pFix->y, NULL, param); // hand calculated results 1660.57142857 -11.87755102 g_assert_cmpfloat(fabs(1660.57142857 - ffm_vector_get(coef->w, 0)), <, 1e-8); g_assert_cmpfloat(fabs(-11.87755102 - ffm_vector_get(coef->w, 1)), <, 1e-8); free_ffm_coef(coef); } void test_sparse_als_second_order_only(TestFixture_T *pFix, gconstpointer pg) { int n_features = pFix->X->n; int k = 1; ffm_param param = {.n_iter = 1, .warm_start = true, .ignore_w_0 = true, .ignore_w = true, .init_sigma = 0.1, .SOLVER = SOLVER_ALS, .TASK = TASK_REGRESSION}; ffm_coef *coef = alloc_fm_coef(n_features, k, false); coef->w_0 = 0; param.init_lambda_w = 0; param.init_lambda_V = 0; ffm_matrix_set(coef->V, 0, 0, 300); ffm_matrix_set(coef->V, 0, 1, 400); sparse_fit(coef, pFix->X, NULL, pFix->y, NULL, param); // hand calculated results 0.79866412 400. g_assert_cmpfloat(fabs(0.79866412 - ffm_matrix_get(coef->V, 0, 0)), <, 1e-8); g_assert_cmpfloat(fabs(400 - ffm_matrix_get(coef->V, 0, 1)), <, 1e-8); free_ffm_coef(coef); } void test_sparse_als_all_interactions(TestFixture_T *pFix, gconstpointer pg) { int n_features = pFix->X->n; int k = 1; ffm_param param = {.n_iter = 1, .warm_start = true, .ignore_w_0 = false, .ignore_w = false, .init_sigma = 0.1, .SOLVER = SOLVER_ALS, .TASK = TASK_REGRESSION}; ffm_coef *coef = alloc_fm_coef(n_features, k, false); coef->w_0 = 0; ffm_vector_set(coef->w, 0, 10); ffm_vector_set(coef->w, 1, 20); ffm_matrix_set(coef->V, 0, 0, 300); ffm_matrix_set(coef->V, 0, 1, 400); sparse_fit(coef, pFix->X, NULL, pFix->y, NULL, param); // hand calculated results checked with libfm g_assert_cmpfloat(fabs(-1755643.33333 - coef->w_0), <, 1e-5); g_assert_cmpfloat(fabs(-191459.71428571 - ffm_vector_get(coef->w, 0)), <, 1e-6); g_assert_cmpfloat(fabs(30791.91836735 - ffm_vector_get(coef->w, 1)), <, 1e-6); g_assert_cmpfloat(fabs(253.89744249 - ffm_matrix_get(coef->V, 0, 0)), <, 1e-6); g_assert_cmpfloat(fabs(400 - ffm_matrix_get(coef->V, 0, 1)), <, 1e-6); param.n_iter = 99; sparse_fit(coef, pFix->X, NULL, pFix->y, NULL, param); g_assert_cmpfloat(fabs(210911.940403 - coef->w_0), <, 1e-7); g_assert_cmpfloat(fabs(-322970.68313639 - ffm_vector_get(coef->w, 0)), <, 1e-6); g_assert_cmpfloat(fabs(51927.60978978 - ffm_vector_get(coef->w, 1)), <, 1e-6); g_assert_cmpfloat(fabs(94.76612018 - ffm_matrix_get(coef->V, 0, 0)), <, 1e-6); g_assert_cmpfloat(fabs(400 - ffm_matrix_get(coef->V, 0, 1)), <, 1e-6); free_ffm_coef(coef); } void test_sparse_als_first_order_interactions(TestFixture_T *pFix, gconstpointer pg) { ffm_vector *y_pred = ffm_vector_calloc(5); int n_features = pFix->X->n; int k = 0; ffm_coef *coef = alloc_fm_coef(n_features, k, false); ffm_param param = {.n_iter = 500, .init_sigma = 0.1, .SOLVER = SOLVER_ALS, .TASK = TASK_REGRESSION}; sparse_fit(coef, pFix->X, NULL, pFix->y, NULL, param); sparse_predict(coef, pFix->X, y_pred); /* reference values from sklearn LinearRegression y_pred: [ 321.05084746 346.6779661 -40.15254237 321.05084746 790.37288136] coef: [ 69.6779661 152.16949153] mse: 3134.91525424 */ g_assert_cmpfloat(fabs(321.05084746 - ffm_vector_get(y_pred, 0)), <, 1e-6); g_assert_cmpfloat(fabs(346.6779661 - ffm_vector_get(y_pred, 1)), <, 1e-6); g_assert_cmpfloat(fabs(-40.15254237 - ffm_vector_get(y_pred, 2)), <, 1e-6); g_assert_cmpfloat(fabs(321.05084746 - ffm_vector_get(y_pred, 3)), <, 1e-6); g_assert_cmpfloat(fabs(790.37288136 - ffm_vector_get(y_pred, 4)), <, 1e-6); ffm_vector_free(y_pred); free_ffm_coef(coef); } void test_sparse_als_second_interactions(TestFixture_T *pFix, gconstpointer pg) { ffm_vector *y_pred = ffm_vector_calloc(5); int n_features = pFix->X->n; int k = 2; ffm_coef *coef = alloc_fm_coef(n_features, k, false); ffm_param param = {.n_iter = 1000, .init_sigma = 0.1, .SOLVER = SOLVER_ALS}; sparse_fit(coef, pFix->X, NULL, pFix->y, NULL, param); sparse_predict(coef, pFix->X, y_pred); /* reference values from sklearn LinearRegression y_pred: [ 298. 266. 29. 298. 848.] coeff: [ 9. 2. 40.] mse: 4.53374139449e-27 */ g_assert_cmpfloat(fabs(298 - ffm_vector_get(y_pred, 0)), <, 1e-4); g_assert_cmpfloat(fabs(266 - ffm_vector_get(y_pred, 1)), <, 1e-4); g_assert_cmpfloat(fabs(29 - ffm_vector_get(y_pred, 2)), <, 1e-3); g_assert_cmpfloat(fabs(298 - ffm_vector_get(y_pred, 3)), <, 1e-4); g_assert_cmpfloat(fabs(848.0 - ffm_vector_get(y_pred, 4)), <, 1e-4); ffm_vector_free(y_pred); free_ffm_coef(coef); } void test_sparse_mcmc_second_interactions(TestFixture_T *pFix, gconstpointer pg) { int n_features = pFix->X->n; int n_samples = pFix->X->m; int k = 2; ffm_coef *coef = alloc_fm_coef(n_features, k, false); ffm_vector *y_pred = ffm_vector_calloc(n_samples); ffm_param param = {.n_iter = 100, .init_sigma = 0.1, .SOLVER = SOLVER_MCMC, .TASK = TASK_REGRESSION, .rng_seed = 1234}; sparse_fit(coef, pFix->X, pFix->X, pFix->y, y_pred, param); g_assert_cmpfloat(ffm_r2_score(pFix->y, y_pred), >, .98); ffm_vector_free(y_pred); free_ffm_coef(coef); } void test_sparse_mcmc_second_interactions_classification(TestFixture_T *pFix, gconstpointer pg) { int n_features = pFix->X->n; int n_samples = pFix->X->m; int k = 2; ffm_vector_make_labels(pFix->y); ffm_coef *coef = alloc_fm_coef(n_features, k, false); ffm_vector *y_pred = ffm_vector_calloc(n_samples); ffm_param param = {.n_iter = 10, .init_sigma = 0.1, .SOLVER = SOLVER_MCMC, .TASK = TASK_CLASSIFICATION}; sparse_fit(coef, pFix->X, pFix->X, pFix->y, y_pred, param); g_assert_cmpfloat(ffm_vector_accuracy(pFix->y, y_pred), >=, .98); ffm_vector_free(y_pred); free_ffm_coef(coef); } void test_train_test_of_different_size(TestFixture_T *pFix, gconstpointer pg) { int n_features = pFix->X->n; int k = 2; int n_samples_short = 3; int m = n_samples_short; int n = n_features; cs *X = cs_spalloc(m, n, m * n, 1, 1); /* create triplet identity matrix */ cs_entry(X, 0, 0, 6); cs_entry(X, 0, 1, 1); cs_entry(X, 1, 0, 2); cs_entry(X, 1, 1, 3); cs_entry(X, 2, 0, 3); cs *X_csc = cs_compress(X); /* A = compressed-column form of T */ cs *X_t = cs_transpose(X_csc, 1); cs_spfree(X); ffm_vector *y = ffm_vector_calloc(n_samples_short); // y [ 298 266 29 298 848 ] y->data[0] = 298; y->data[1] = 266; y->data[2] = 29; ffm_coef *coef = alloc_fm_coef(n_features, k, false); ffm_vector *y_pred = ffm_vector_calloc(n_samples_short); ffm_param param = {.n_iter = 20, .init_sigma = 0.01}; // test: train > test param.SOLVER = SOLVER_ALS; sparse_fit(coef, pFix->X, NULL, pFix->y, NULL, param); sparse_predict(coef, X_csc, y_pred); param.TASK = TASK_CLASSIFICATION; sparse_fit(coef, pFix->X, NULL, pFix->y, NULL, param); sparse_predict(coef, X_csc, y_pred); param.SOLVER = SOLVER_MCMC; param.TASK = TASK_CLASSIFICATION; sparse_fit(coef, pFix->X, X_csc, pFix->y, y_pred, param); param.TASK = TASK_REGRESSION; sparse_fit(coef, pFix->X, X_csc, pFix->y, y_pred, param); // test: train < test param.SOLVER = SOLVER_MCMC; param.TASK = TASK_CLASSIFICATION; sparse_fit(coef, X_csc, pFix->X, y_pred, pFix->y, param); param.TASK = TASK_REGRESSION; sparse_fit(coef, X_csc, pFix->X, y_pred, pFix->y, param); param.SOLVER = SOLVER_ALS; sparse_fit(coef, X_csc, NULL, y_pred, NULL, param); sparse_predict(coef, pFix->X, pFix->y); param.TASK = TASK_CLASSIFICATION; sparse_fit(coef, X_csc, NULL, y_pred, NULL, param); sparse_predict(coef, pFix->X, pFix->y); ffm_vector_free(y_pred); free_ffm_coef(coef); cs_spfree(X_t); cs_spfree(X_csc); } void test_sparse_als_generated_data(void) { int n_features = 10; int n_samples = 100; int k = 2; TestFixture_T *data = makeTestFixture(124, n_samples, n_features, k); ffm_vector *y_pred = ffm_vector_calloc(n_samples); ffm_coef *coef = alloc_fm_coef(n_features, k, false); ffm_param param = {.n_iter = 50, .init_sigma = 0.01, .SOLVER = SOLVER_ALS}; param.init_lambda_w = 23.5; param.init_lambda_V = 23.5; sparse_fit(coef, data->X, NULL, data->y, NULL, param); sparse_predict(coef, data->X, y_pred); g_assert_cmpfloat(ffm_r2_score(data->y, y_pred), >, 0.85); ffm_vector_free(y_pred); free_ffm_coef(coef); TestFixtureDestructor(data, NULL); } void test_hyerparameter_sampling(void) { ffm_rng *rng = ffm_rng_seed(12345); int n_features = 20; int n_samples = 150; int k = 1; // don't just change k, the rank is hard coded in the test // (ffm_vector_get(coef->lambda_V, 0);) int n_replication = 40; int n_draws = 1000; ffm_vector *alpha_rep = ffm_vector_calloc(n_replication); ffm_vector *lambda_w_rep = ffm_vector_calloc(n_replication); ffm_vector *lambda_V_rep = ffm_vector_calloc(n_replication); ffm_vector *mu_w_rep = ffm_vector_calloc(n_replication); ffm_vector *mu_V_rep = ffm_vector_calloc(n_replication); ffm_vector *err = ffm_vector_alloc(n_samples); for (int j = 0; j < n_replication; j++) { TestFixture_T *data = makeTestFixture(124, n_samples, n_features, k); ffm_coef *coef = data->coef; sparse_predict(coef, data->X, err); ffm_vector_scale(err, -1); ffm_vector_add(err, data->y); // make sure that distribution is converged bevore selecting // reference / init values for (int l = 0; l < 50; l++) sample_hyper_parameter(coef, err, rng); double alpha_init = coef->alpha; double lambda_w_init = coef->lambda_w; double lambda_V_init = ffm_vector_get(coef->lambda_V, 0); double mu_w_init = coef->mu_w; double mu_V_init = ffm_vector_get(coef->mu_V, 0); double alpha_count = 0; double lambda_w_count = 0, lambda_V_count = 0; double mu_w_count = 0, mu_V_count = 0; for (int l = 0; l < n_draws; l++) { sample_hyper_parameter(coef, err, rng); if (alpha_init > coef->alpha) alpha_count++; if (lambda_w_init > coef->lambda_w) lambda_w_count++; if (lambda_V_init > ffm_vector_get(coef->lambda_V, 0)) lambda_V_count++; if (mu_w_init > coef->mu_w) mu_w_count++; if (mu_V_init > ffm_vector_get(coef->mu_V, 0)) mu_V_count++; } ffm_vector_set(alpha_rep, j, alpha_count / (n_draws + 1)); ffm_vector_set(lambda_w_rep, j, lambda_w_count / (n_draws + 1)); ffm_vector_set(lambda_V_rep, j, lambda_V_count / (n_draws + 1)); ffm_vector_set(mu_w_rep, j, mu_w_count / (n_draws + 1)); ffm_vector_set(mu_V_rep, j, mu_V_count / (n_draws + 1)); TestFixtureDestructor(data, NULL); } double chi_alpha = 0; for (int i = 0; i < n_replication; i++) chi_alpha += ffm_pow_2(gsl_cdf_ugaussian_Qinv(ffm_vector_get(alpha_rep, i))); g_assert_cmpfloat(gsl_ran_chisq_pdf(chi_alpha, n_replication), <, .05); double chi_lambda_w = 0; for (int i = 0; i < n_replication; i++) chi_lambda_w += ffm_pow_2(gsl_cdf_ugaussian_Qinv(ffm_vector_get(lambda_w_rep, i))); g_assert_cmpfloat(gsl_ran_chisq_pdf(chi_lambda_w, n_replication), <, .05); double chi_lambda_V = 0; for (int i = 0; i < n_replication; i++) chi_lambda_V += ffm_pow_2(gsl_cdf_ugaussian_Qinv(ffm_vector_get(lambda_V_rep, i))); g_assert_cmpfloat(gsl_ran_chisq_pdf(chi_lambda_V, n_replication), <, .05); double chi_mu_w = 0; for (int i = 0; i < n_replication; i++) chi_mu_w += ffm_pow_2(gsl_cdf_ugaussian_Qinv(ffm_vector_get(mu_w_rep, i))); g_assert_cmpfloat(gsl_ran_chisq_pdf(chi_mu_w, n_replication), <, .05); double chi_mu_V = 0; for (int i = 0; i < n_replication; i++) chi_mu_V += ffm_pow_2(gsl_cdf_ugaussian_Qinv(ffm_vector_get(mu_V_rep, i))); g_assert_cmpfloat(gsl_ran_chisq_pdf(chi_mu_V, n_replication), <, .05); ffm_vector_free_all(alpha_rep, lambda_w_rep, lambda_V_rep, mu_w_rep, mu_V_rep, err); ffm_rng_free(rng); }
void test_gradient_check_reg(TestFixture_T* pFix, gconstpointer pg){ cs *X_crs = pFix->X_t; ffm_vector *y = pFix->y; int test_sample_row = 0; double y_true = ffm_vector_get(y, test_sample_row); int n_features = pFix->coef->w->size; double eps = 0.0001; ffm_param param = {.n_iter=1, .stepsize=.001, .init_sigma=.1, .k=2, .init_lambda_w=0.5, .init_lambda_V=1.5, .warm_start=1, .SOLVER=SOLVER_SGD, .TASK=TASK_REGRESSION, .rng_seed=44}; ffm_coef *coef_t0 = alloc_fm_coef(n_features, param.k, false); init_ffm_coef(coef_t0, param); ffm_coef *coef_t1 = alloc_fm_coef(n_features, param.k, false); init_ffm_coef(coef_t1, param); ffm_fit_sgd(coef_t1, X_crs, y, ¶m); ffm_coef * grad = extract_gradient(coef_t0, coef_t1, param.stepsize); // check w gradient updates for (int i=0; i<n_features; i++) { // keep copy double tmp = ffm_vector_get(coef_t0->w, i); // x + eps ffm_vector_set(coef_t0->w, i, tmp + eps); double y_pred = ffm_predict_sample(coef_t0, X_crs, test_sample_row); double sq_loss = 0.5 * pow(y_true - y_pred, 2); double l_plus = sq_loss + 0.5 * l2_penalty(coef_t0); // x - eps ffm_vector_set(coef_t0->w, i, tmp - eps); y_pred = ffm_predict_sample(coef_t0, X_crs, test_sample_row); sq_loss = 0.5 * pow(y_true - y_pred, 2); double l_minus = sq_loss + 0.5 * l2_penalty(coef_t0); // restore ffm_vector_set(coef_t0->w, i, tmp); double grad_i = (l_plus - l_minus) / ( 2 * eps); g_assert_cmpfloat(fabs(grad_i - ffm_vector_get(grad->w, i)), < , 1e-10); } // check V gradient updates for (int f=0; f< param.k; f++) for (int i=0; i<n_features; i++) { // keep copy double tmp = ffm_matrix_get(coef_t0->V, f, i); // x + eps ffm_matrix_set(coef_t0->V, f, i, tmp + eps); double y_pred = ffm_predict_sample(coef_t0, X_crs, test_sample_row); double sq_loss = 0.5 * pow(y_true - y_pred, 2); double l_plus = sq_loss + 0.5 * l2_penalty(coef_t0); // x - eps ffm_matrix_set(coef_t0->V, f, i, tmp - eps); y_pred = ffm_predict_sample(coef_t0, X_crs, test_sample_row); sq_loss = 0.5 * pow(y_true - y_pred, 2); double l_minus = sq_loss + 0.5 * l2_penalty(coef_t0); // restore ffm_matrix_set(coef_t0->V, f, i, tmp); double grad_i = (l_plus - l_minus) / ( 2 * eps); g_assert_cmpfloat(fabs(grad_i - ffm_matrix_get(grad->V, f, i)), < , 1e-10); } free_ffm_coef(coef_t0); free_ffm_coef(coef_t1); free_ffm_coef(grad); } void test_gradient_check_class(TestFixture_T* pFix, gconstpointer pg){ cs *X_crs = pFix->X_t; ffm_vector *y = pFix->y; int test_sample_row = 0; double y_true = ffm_vector_get(y, test_sample_row); int n_features = pFix->coef->w->size; double eps = 0.0001; ffm_param param = {.n_iter=1, .stepsize=.01, .init_sigma=.01, .k=2, .init_lambda_w=1.5, .init_lambda_V=2.0, .warm_start=1, .SOLVER=SOLVER_SGD, .TASK=TASK_CLASSIFICATION, .rng_seed=44}; ffm_coef *coef_t0 = alloc_fm_coef(n_features, param.k, false); init_ffm_coef(coef_t0, param); ffm_coef *coef_t1 = alloc_fm_coef(n_features, param.k, false); init_ffm_coef(coef_t1, param); ffm_fit_sgd(coef_t1, X_crs, y, ¶m); ffm_coef * grad = extract_gradient(coef_t0, coef_t1, param.stepsize); // check w gradient updates for (int i=0; i<n_features; i++) { // keep copy double tmp = ffm_vector_get(coef_t0->w, i); // x + eps ffm_vector_set(coef_t0->w, i, tmp + eps); double y_pred = ffm_predict_sample(coef_t0, X_crs, test_sample_row); double log_loss = - log(ffm_sigmoid(y_true * y_pred)); double l_plus = log_loss + 0.5 * l2_penalty(coef_t0); // x - eps ffm_vector_set(coef_t0->w, i, tmp - eps); y_pred = ffm_predict_sample(coef_t0, X_crs, test_sample_row); log_loss = - log(ffm_sigmoid(y_true * y_pred)); double l_minus = log_loss + 0.5 * l2_penalty(coef_t0); // restore ffm_vector_set(coef_t0->w, i, tmp); // finite central differences double grad_i = (l_plus - l_minus) / ( 2 * eps); //g_assert_cmpfloat(grad_i, ==, ffm_vector_get(grad->w, i)); g_assert_cmpfloat(fabs(grad_i - ffm_vector_get(grad->w, i)), < , 1e-9); } // check V gradient updates for (int f=0; f< param.k; f++) for (int i=0; i<n_features; i++) { // keep copy double tmp = ffm_matrix_get(coef_t0->V, f, i); // x + eps ffm_matrix_set(coef_t0->V, f, i, tmp + eps); double y_pred = ffm_predict_sample(coef_t0, X_crs, test_sample_row); double log_loss = - log(ffm_sigmoid(y_true * y_pred)); double l_plus = log_loss + 0.5 * l2_penalty(coef_t0); // x - eps ffm_matrix_set(coef_t0->V, f, i, tmp - eps); y_pred = ffm_predict_sample(coef_t0, X_crs, test_sample_row); log_loss = - log(ffm_sigmoid(y_true * y_pred)); double l_minus = log_loss + 0.5 * l2_penalty(coef_t0); // restore ffm_matrix_set(coef_t0->V, f, i, tmp); // finite central differences double grad_i = (l_plus - l_minus) / ( 2 * eps); g_assert_cmpfloat(fabs(grad_i - ffm_matrix_get(grad->V, f, i)), < , 1e-10); } free_ffm_coef(coef_t0); free_ffm_coef(coef_t1); free_ffm_coef(grad); } void test_gradient_check_bpr(TestFixture_T* pFix, gconstpointer pg){ cs *X_crs = pFix->X_t; ffm_matrix * pairs = ffm_matrix_calloc(1, 2); int pos_row = 0; ffm_matrix_set(pairs, 0, 0, pos_row); int neg_row = 1; ffm_matrix_set(pairs, 0, 1, neg_row); int n_features = pFix->coef->w->size; double eps = 0.0001; ffm_param param = {.n_iter=1, .stepsize=.01, .init_sigma=.01, .k=2, .init_lambda_w=0.0, .init_lambda_V=0.0, .warm_start=1, .SOLVER=SOLVER_SGD, .TASK=TASK_RANKING, .rng_seed=44}; ffm_coef *coef_t0 = alloc_fm_coef(n_features, param.k, false); init_ffm_coef(coef_t0, param); ffm_coef *coef_t1 = alloc_fm_coef(n_features, param.k, false); init_ffm_coef(coef_t1, param); ffm_fit_sgd_bpr(coef_t1, X_crs, pairs, param); ffm_coef * grad = extract_gradient(coef_t0, coef_t1, param.stepsize); double y_pos, y_neg, bpr_loss, l_plus, l_minus, grad_i, tmp; // check w gradient updates for (int i=0; i<n_features; i++) { // keep copy tmp = ffm_vector_get(coef_t0->w, i); // x + eps ffm_vector_set(coef_t0->w, i, tmp + eps); y_pos = ffm_predict_sample(coef_t0, X_crs, pos_row); y_neg = ffm_predict_sample(coef_t0, X_crs, neg_row); bpr_loss = - log(ffm_sigmoid(y_pos - y_neg)); l_plus = bpr_loss + 0.5 * l2_penalty(coef_t0); // x - eps ffm_vector_set(coef_t0->w, i, tmp - eps); y_pos = ffm_predict_sample(coef_t0, X_crs, pos_row); y_neg = ffm_predict_sample(coef_t0, X_crs, neg_row); bpr_loss = - log(ffm_sigmoid(y_pos - y_neg)); l_minus = bpr_loss + 0.5 * l2_penalty(coef_t0); // restore ffm_vector_set(coef_t0->w, i, tmp); // finite central differences grad_i = (l_plus - l_minus) / ( 2 * eps); //g_assert_cmpfloat(grad_i, ==, ffm_vector_get(grad->w, i)); g_assert_cmpfloat(fabs(grad_i - ffm_vector_get(grad->w, i)), < , 1e-9); } // check V gradient updates for (int f=0; f< param.k; f++) for (int i=0; i<n_features; i++) { // keep copy tmp = ffm_matrix_get(coef_t0->V, f, i); // x + eps ffm_matrix_set(coef_t0->V, f, i, tmp + eps); y_pos = ffm_predict_sample(coef_t0, X_crs, pos_row); y_neg = ffm_predict_sample(coef_t0, X_crs, neg_row); bpr_loss = - log(ffm_sigmoid(y_pos - y_neg)); l_plus = bpr_loss + 0.5 * l2_penalty(coef_t0); // x - eps ffm_matrix_set(coef_t0->V, f, i, tmp - eps); y_pos = ffm_predict_sample(coef_t0, X_crs, pos_row); y_neg = ffm_predict_sample(coef_t0, X_crs, neg_row); bpr_loss = - log(ffm_sigmoid(y_pos - y_neg)); l_minus = bpr_loss + 0.5 * l2_penalty(coef_t0); // restore ffm_matrix_set(coef_t0->V, f, i, tmp); // finite central differences grad_i = (l_plus - l_minus) / ( 2 * eps); //g_assert_cmpfloat(grad_i, ==, ffm_matrix_get(grad->V, f, i)); g_assert_cmpfloat(fabs(grad_i - ffm_matrix_get(grad->V, f, i)), < , 1e-10); } free_ffm_coef(coef_t0); free_ffm_coef(coef_t1); free_ffm_coef(grad); } int main(int argc, char** argv) { /* feenableexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW); */ g_test_init(&argc, &argv, NULL); TestFixture_T Fixture; g_test_add( "/sgd/util/predict sample", TestFixture_T, &Fixture, TestFixtureContructorWide, test_sgd_predict_sample, TestFixtureDestructor ); g_test_add( "/sgd/reg/first order", TestFixture_T, &Fixture, TestFixtureContructorLong, test_first_order_sgd, TestFixtureDestructor ); g_test_add( "/sgd/reg/second order", TestFixture_T, &Fixture, TestFixtureContructorLong, test_second_order_sgd, TestFixtureDestructor ); g_test_add( "/sgd/class/full", TestFixture_T, &Fixture, TestFixtureContructorLong, test_sgd_classification, TestFixtureDestructor ); g_test_add( "/sgd/bpr/update second order", TestFixture_T, &Fixture, TestFixtureContructorWide, test_update_second_order_bpr, TestFixtureDestructor ); g_test_add( "/sgd/bpr/first order", TestFixture_T, &Fixture, TestFixtureContructorLong, test_first_order_bpr, TestFixtureDestructor ); g_test_add( "/sgd/bpr/second order", TestFixture_T, &Fixture, TestFixtureContructorLong, test_second_order_bpr, TestFixtureDestructor ); g_test_add_func("/sgd/class/generated data", test_sgd_classification_generated_data); g_test_add_func("/sgd/reg/generated data", test_sgd_generated_data); g_test_add_func("/sgd/bpr/generated data", test_sgd_bpr_generated_data); g_test_add_func("/sgd/util/extract_gradient", test_extract_gradient); g_test_add_func("/sgd/util/l2_penalty", test_l2_penalty); g_test_add("/sgd/reg/gradient check", TestFixture_T, &Fixture, TestFixtureContructorWide, test_gradient_check_reg, TestFixtureDestructor ); g_test_add("/sgd/class/gradient check", TestFixture_T, &Fixture, TestFixtureContructorWide, test_gradient_check_class, TestFixtureDestructor ); g_test_add("/sgd/bpr/gradient check", TestFixture_T, &Fixture, TestFixtureContructorWide, test_gradient_check_bpr, TestFixtureDestructor ); return g_test_run(); }