示例#1
0
void test_sparse_map_gibbs_first_order_interactions(void) {
  int n_features = 10;
  int n_samples = 100;
  int k = 0;

  TestFixture_T *data = makeTestFixture(124, n_samples, n_features, k);

  ffm_vector *y_pred = ffm_vector_calloc(n_samples);

  ffm_coef *coef = alloc_fm_coef(n_features, k, false);
  ffm_param param = {.n_iter = 200, .init_sigma = 0.1, .SOLVER = SOLVER_MCMC};
  sparse_fit(coef, data->X, data->X, data->y, y_pred, param);

  g_assert_cmpfloat(ffm_r2_score(data->y, y_pred), >, .99);
  ffm_vector_free(y_pred);
  free_ffm_coef(coef);
  TestFixtureDestructor(data, NULL);
}

void test_train_test_data(void) {
  // test if training and test data a propertly handeled
  // no check of prediction quality
  int n_features = 10;
  int n_samples_train = 100;
  int n_samples_test = 30;
  int k = 3;

  TestFixture_T *data_train =
      makeTestFixture(124, n_samples_train, n_features, k);
  TestFixture_T *data_test =
      makeTestFixture(124, n_samples_test, n_features, k);

  ffm_vector *y_pred = ffm_vector_calloc(n_samples_test);
  // gibts
  ffm_coef *coef = alloc_fm_coef(n_features, k, false);
  ffm_param param = {.n_iter = 200, .init_sigma = 0.1, .SOLVER = SOLVER_MCMC};
  sparse_fit(coef, data_train->X, data_test->X, data_train->y, y_pred, param);
  free_ffm_coef(coef);
  // als
  coef = alloc_fm_coef(n_features, k, false);
  ffm_param param_als = {
      .n_iter = 200, .init_sigma = 0.1, .SOLVER = SOLVER_ALS};
  sparse_fit(coef, data_train->X, data_test->X, data_train->y, y_pred,
             param_als);
  sparse_predict(coef, data_test->X, y_pred);

  free_ffm_coef(coef);
  TestFixtureDestructor(data_train, NULL);
  TestFixtureDestructor(data_test, NULL);
}

void test_sparse_map_gibbs_second_interactions(void) {
  int n_features = 10;
  int n_samples = 1000;
  int k = 2;
  double init_sigma = 0.1;

  TestFixture_T *data = makeTestFixture(124, n_samples, n_features, k);

  ffm_vector *y_pred = ffm_vector_calloc(n_samples);

  ffm_coef *coef = alloc_fm_coef(n_features, k, false);
  ffm_param param = {
      .n_iter = 5, .init_sigma = init_sigma, .SOLVER = SOLVER_MCMC};
  sparse_fit(coef, data->X, data->X, data->y, y_pred, param);
  double score_5_samples = ffm_r2_score(data->y, y_pred);

  free_ffm_coef(coef);
  ffm_vector_set_all(y_pred, 0);
  coef = alloc_fm_coef(n_features, 0, false);
  ffm_param param_50 = {
      .n_iter = 50, .init_sigma = init_sigma, .SOLVER = SOLVER_MCMC};
  sparse_fit(coef, data->X, data->X, data->y, y_pred, param_50);
  double score_50_samples_first_order = ffm_r2_score(data->y, y_pred);

  free_ffm_coef(coef);
  ffm_vector_set_all(y_pred, 0);
  coef = alloc_fm_coef(n_features, k + 5, false);
  sparse_fit(coef, data->X, data->X, data->y, y_pred, param_50);
  double score_50_samples = ffm_r2_score(data->y, y_pred);

  g_assert_cmpfloat(score_50_samples, >, score_50_samples_first_order);
  g_assert_cmpfloat(score_50_samples, >, score_5_samples);
  g_assert_cmpfloat(score_50_samples, >, .72);

  ffm_vector_free(y_pred);
  free_ffm_coef(coef);
  TestFixtureDestructor(data, NULL);
}

void test_sparse_als_classification(void) {
  int n_features = 10;
  int n_samples = 100;
  int k = 2;
  double init_sigma = 0.01;

  TestFixture_T *data = makeTestFixture(124, n_samples, n_features, k);
  // map to classification problem
  ffm_vector_make_labels(data->y);

  ffm_vector *y_pred = ffm_vector_calloc(n_samples);

  ffm_coef *coef = alloc_fm_coef(n_features, k, false);
  ffm_param param = {.n_iter = 50,
                     .init_sigma = init_sigma,
                     .SOLVER = SOLVER_ALS,
                     .TASK = TASK_CLASSIFICATION};
  param.init_lambda_w = 5.5;
  param.init_lambda_V = 5.5;
  sparse_fit(coef, data->X, NULL, data->y, NULL, param);
  sparse_predict(coef, data->X, y_pred);
  ffm_vector_normal_cdf(y_pred);

  g_assert_cmpfloat(ffm_vector_accuracy(data->y, y_pred), >=, .8);

  ffm_vector_free(y_pred);
  free_ffm_coef(coef);
  TestFixtureDestructor(data, NULL);
}

void test_sparse_als_classification_path(void) {
  int n_features = 10;
  int n_samples = 200;
  int k = 4;
  double init_sigma = 0.1;

  TestFixture_T *data = makeTestFixture(124, n_samples, n_features, k);
  // map to classification problem
  ffm_vector_make_labels(data->y);

  ffm_vector *y_pred = ffm_vector_calloc(n_samples);

  ffm_coef *coef = alloc_fm_coef(n_features, k, false);
  ffm_param param = {.n_iter = 0,
                     .init_sigma = init_sigma,
                     .SOLVER = SOLVER_ALS,
                     .TASK = TASK_CLASSIFICATION};
  param.init_lambda_w = 5.5;
  param.init_lambda_V = 5.5;

  double acc = 0;
  // objective does not decline strigtly monotonic because of latend target
  // but should still decrease on average (at least till convergence)
  for (int i = 1; i < 9; i = i * 2) {
    param.n_iter = i;
    sparse_fit(coef, data->X, NULL, data->y, NULL, param);
    sparse_predict(coef, data->X, y_pred);
    ffm_vector_normal_cdf(y_pred);
    double tmp_acc = ffm_vector_accuracy(data->y, y_pred);
    // training error should (almost) always decrease
    // printf("iter %d, last acc %f\n", i, acc);
    g_assert_cmpfloat(tmp_acc, >=, acc);
    acc = tmp_acc;
  }

  ffm_vector_free(y_pred);
  free_ffm_coef(coef);
  TestFixtureDestructor(data, NULL);
}

void test_sparse_mcmc_classification(void) {
  int n_features = 10;
  int n_samples = 100;
  int k = 2;
  double init_sigma = 0.1;

  TestFixture_T *data = makeTestFixture(124, n_samples, n_features, k);
  // map to classification problem
  ffm_vector_make_labels(data->y);

  ffm_vector *y_pred = ffm_vector_calloc(n_samples);

  ffm_coef *coef = alloc_fm_coef(n_features, k, false);
  ffm_param param = {.n_iter = 50,
                     .init_sigma = init_sigma,
                     .SOLVER = SOLVER_MCMC,
                     .TASK = TASK_CLASSIFICATION};
  param.init_lambda_w = 5.5;
  param.init_lambda_V = 5.5;
  sparse_fit(coef, data->X, data->X, data->y, y_pred, param);
  sparse_predict(coef, data->X, y_pred);

  g_assert_cmpfloat(ffm_vector_accuracy(data->y, y_pred), >=, .84);

  ffm_vector_free(y_pred);
  free_ffm_coef(coef);
  TestFixtureDestructor(data, NULL);
}
void test_numerical_stability(void) {
  int n_features = 10;
  int n_samples = 10000;
  int k = 2;

  TestFixture_T *data = makeTestFixture(15, n_samples, n_features, k);

  ffm_vector *y_pred = ffm_vector_calloc(n_samples);

  ffm_coef *coef = alloc_fm_coef(n_features, k, false);
  ffm_param param = {.n_iter = 7, .init_sigma = 0.01, .SOLVER = SOLVER_ALS};
  param.init_lambda_w = 400;
  param.init_lambda_V = 400;
  sparse_fit(coef, data->X, data->X, data->y, y_pred, param);
  sparse_predict(coef, data->X, y_pred);
  double score_als = ffm_r2_score(data->y, y_pred);
  g_assert_cmpfloat(score_als, >, .98);

  free_ffm_coef(coef);
  ffm_vector_set_all(y_pred, 0);

  coef = alloc_fm_coef(n_features, k, false);
  ffm_param param_mcmc = {
      .n_iter = 50, .init_sigma = 0.01, .SOLVER = SOLVER_MCMC};
  sparse_fit(coef, data->X, data->X, data->y, y_pred, param_mcmc);
  double score_gibbs = ffm_r2_score(data->y, y_pred);
  g_assert_cmpfloat(score_gibbs, >, .99);

  ffm_vector_free(y_pred);
  free_ffm_coef(coef);
  TestFixtureDestructor(data, NULL);
}

void test_map_update_target(void) {
  double pred[] = {0.5, 0.2, -0.2, -0.5, -0.1, 0.8};
  double true_[] = {1, 1, -1, -1, 1, -1};
  double z[] = {0.509160434,  0.6750731798, -0.6750731798,
                -0.509160434, 0.8626174715, -1.3674022692};
  ffm_vector y_pred = {.data = pred, .size = 6};
  ffm_vector y_true = {.data = true_, .size = 6};
  ffm_vector *z_target = ffm_vector_alloc(6);
  map_update_target(&y_pred, z_target, &y_true);
  for (int i = 0; i < 6; i++)
    g_assert_cmpfloat(fabs(z_target->data[i] - z[i]), <=, 1e-9);
  ffm_vector_free(z_target);
}

void test_als_warm_start(TestFixture_T *pFix, gconstpointer pg) {
  int n_features = pFix->X->n;
  int n_samples = pFix->X->m;
  int k = 4;

  ffm_vector *y_10_iter = ffm_vector_calloc(n_samples);
  ffm_vector *y_15_iter = ffm_vector_calloc(n_samples);
  ffm_vector *y_5_plus_5_iter = ffm_vector_calloc(n_samples);

  ffm_param param = {.warm_start = false,
                     .init_sigma = 0.1,
                     .SOLVER = SOLVER_ALS,
                     .TASK = TASK_REGRESSION,
                     .rng_seed = 123};

  param.n_iter = 10;
  ffm_coef *coef = alloc_fm_coef(n_features, k, false);
  sparse_fit(coef, pFix->X, NULL, pFix->y, NULL, param);
  sparse_predict(coef, pFix->X, y_10_iter);

  param.n_iter = 15;
  sparse_fit(coef, pFix->X, NULL, pFix->y, NULL, param);
  sparse_predict(coef, pFix->X, y_15_iter);

  param.n_iter = 5;
  sparse_fit(coef, pFix->X, NULL, pFix->y, NULL, param);
  param.warm_start = true;
  sparse_fit(coef, pFix->X, NULL, pFix->y, NULL, param);
  sparse_predict(coef, pFix->X, y_5_plus_5_iter);

  // check that the results are equal
  double mse = ffm_vector_mean_squared_error(y_10_iter, y_5_plus_5_iter);
  double mse_diff = ffm_vector_mean_squared_error(y_15_iter, y_5_plus_5_iter);

  g_assert_cmpfloat(mse, <=, 1e-8);
  g_assert_cmpfloat(mse, <, mse_diff);

  free_ffm_coef(coef);
  ffm_vector_free_all(y_10_iter, y_5_plus_5_iter);
}

void test_mcmc_warm_start(TestFixture_T *pFix, gconstpointer pg) {
  int n_features = pFix->X->n;
  int n_samples = pFix->X->m;
  int k = 4;

  ffm_vector *y_10_iter = ffm_vector_calloc(n_samples);
  ffm_vector *y_15_iter = ffm_vector_calloc(n_samples);
  ffm_vector *y_5_plus_5_iter = ffm_vector_calloc(n_samples);

  ffm_param param = {.warm_start = false,
                     .init_sigma = 0.1,
                     .SOLVER = SOLVER_MCMC,
                     .TASK = TASK_REGRESSION,
                     .rng_seed = 125};

  param.n_iter = 100;
  // printf("n_iter %d\n", param.n_iter);
  ffm_coef *coef = alloc_fm_coef(n_features, k, false);
  sparse_fit(coef, pFix->X, pFix->X, pFix->y, y_10_iter, param);

  param.n_iter = 150;
  sparse_fit(coef, pFix->X, pFix->X, pFix->y, y_15_iter, param);

  param.n_iter = 50;
  sparse_fit(coef, pFix->X, pFix->X, pFix->y, y_5_plus_5_iter, param);
  param.warm_start = true;
  param.iter_count = param.n_iter;
  param.n_iter += 50;  // add more iterations
  sparse_fit(coef, pFix->X, pFix->X, pFix->y, y_5_plus_5_iter, param);

  // check that the results are equal
  // double mse10 = ffm_vector_mean_squared_error(pFix->y, y_5_plus_5_iter);
  double mse10_55 = ffm_vector_mean_squared_error(y_10_iter, y_5_plus_5_iter);
  double mse15_55 = ffm_vector_mean_squared_error(y_15_iter, y_5_plus_5_iter);

  g_assert_cmpfloat(mse10_55, <, mse15_55);

  free_ffm_coef(coef);
  ffm_vector_free_all(y_10_iter, y_5_plus_5_iter);
}

int main(int argc, char **argv) {
  /*
  feenableexcept(FE_INVALID   |
                 FE_DIVBYZERO |
                 FE_OVERFLOW  |
                 FE_UNDERFLOW);
  */

  g_test_init(&argc, &argv, NULL);

  TestFixture_T Fixture;

  g_test_add("/als/update second-order error", TestFixture_T, &Fixture,
             TestFixtureContructorLong, test_update_second_order_error,
             TestFixtureDestructor);

  g_test_add("/als/eval second-order term", TestFixture_T, &Fixture,
             TestFixtureContructorLong, test_eval_second_order_term,
             TestFixtureDestructor);

  g_test_add("/als/update v_ij", TestFixture_T, &Fixture,
             TestFixtureContructorLong, test_sparse_update_v_ij,
             TestFixtureDestructor);

  g_test_add("/general/predict", TestFixture_T, &Fixture,
             TestFixtureContructorLong, test_sparse_predict,
             TestFixtureDestructor);

  g_test_add("/general/row_predict", TestFixture_T, &Fixture,
             TestFixtureContructorLong, test_row_predict,
             TestFixtureDestructor);

  g_test_add("/general/col_predict", TestFixture_T, &Fixture,
             TestFixtureContructorLong, test_col_predict,
             TestFixtureDestructor);

  g_test_add("/als/zero order only", TestFixture_T, &Fixture,
             TestFixtureContructorSimple, test_sparse_als_zero_order_only,
             TestFixtureDestructor);

  g_test_add("/als/first order only", TestFixture_T, &Fixture,
             TestFixtureContructorSimple, test_sparse_als_first_order_only,
             TestFixtureDestructor);

  g_test_add("/als/second order only", TestFixture_T, &Fixture,
             TestFixtureContructorSimple, test_sparse_als_second_order_only,
             TestFixtureDestructor);

  g_test_add("/als/all interactions", TestFixture_T, &Fixture,
             TestFixtureContructorSimple, test_sparse_als_all_interactions,
             TestFixtureDestructor);

  g_test_add("/als/first order", TestFixture_T, &Fixture,
             TestFixtureContructorLong,
             test_sparse_als_first_order_interactions, TestFixtureDestructor);

  g_test_add("/als/second order", TestFixture_T, &Fixture,
             TestFixtureContructorLong, test_sparse_als_second_interactions,
             TestFixtureDestructor);

  g_test_add("/mcmc/second order", TestFixture_T, &Fixture,
             TestFixtureContructorLong, test_sparse_mcmc_second_interactions,
             TestFixtureDestructor);

  g_test_add("/mcmc/second order classification", TestFixture_T, &Fixture,
             TestFixtureContructorLong,
             test_sparse_mcmc_second_interactions_classification,
             TestFixtureDestructor);

  g_test_add("/general/train test different size", TestFixture_T, &Fixture,
             TestFixtureContructorLong, test_train_test_of_different_size,
             TestFixtureDestructor);

  g_test_add_func("/als/generated data", test_sparse_als_generated_data);

  g_test_add_func("/mcmc/MAP gibbs first order",
                  test_sparse_map_gibbs_first_order_interactions);

  g_test_add_func("/mcmc/hyperparameter sampling", test_hyerparameter_sampling);

  g_test_add_func("/mcmc/MAP gibbs second order",
                  test_sparse_map_gibbs_second_interactions);

  g_test_add_func("/general/numerical stability", test_numerical_stability);

  g_test_add_func("/mcmc/map update target", test_map_update_target);

  g_test_add_func("/als/classification", test_sparse_als_classification);
  g_test_add_func("/als/classification path",
                  test_sparse_als_classification_path);
  g_test_add_func("/mcmc/classification", test_sparse_mcmc_classification);

  g_test_add("/als/warm_start", TestFixture_T, &Fixture,
             TestFixtureContructorSimple, test_als_warm_start,
             TestFixtureDestructor);

  g_test_add("/mcmc/warm_start", TestFixture_T, &Fixture,
             TestFixtureContructorSimple, test_mcmc_warm_start,
             TestFixtureDestructor);

  return g_test_run();
}
示例#2
0
文件: cli.c 项目: altimin/fastFM-core
int
main (int argc, char **argv)
{
    feenableexcept(FE_INVALID   | 
                   FE_DIVBYZERO | 
                   FE_OVERFLOW  | 
                   FE_UNDERFLOW);

       struct arguments arguments;

       /* Default values. */
       arguments.silent = 0;
       arguments.verbose = 0;


       // file paths
       arguments.test_file = NULL;
       arguments.train_file = NULL;
       arguments.test_predict_file= NULL;
       arguments.train_pairs = NULL;

        // fm default parameters
        arguments.k = 8;
        arguments.n_iter = 50;
        arguments.init_var = 0.01;
        arguments.step_size = 0.01;
        arguments.l2_reg = 1;
        arguments.l2_reg_w = 0;
        arguments.l2_reg_V = 0;
        arguments.solver = "mcmc";
        arguments.task = "regression";
        arguments.rng_seed = time(NULL);

       int arg_count = 2;
       arguments.arg_count = &arg_count;

       /* Parse our arguments; every option seen by parse_opt will
          be reflected in arguments. */
       argp_parse (&argp, argc, argv, 0, 0, &arguments);

        ffm_param param = {.n_iter     = arguments.n_iter,
                           .init_sigma = arguments.init_var,
                           .k = arguments.k,
                           .stepsize   = arguments.step_size,
                            .rng_seed = arguments.rng_seed};

        // parse solver
        if (strcmp(arguments.solver,"mcmc") == 0)
            param.SOLVER = SOLVER_MCMC;
        else if(strcmp(arguments.solver,"als") == 0)
            param.SOLVER = SOLVER_ALS;
        else if(strcmp(arguments.solver,"sgd") == 0)
            param.SOLVER = SOLVER_SGD;
        else  assert(0 && "unknown solver");

        // parse task
        if (strcmp(arguments.task,"regression") == 0)
            param.TASK = TASK_REGRESSION;
        else if(strcmp(arguments.task,"classification") == 0)
            param.TASK = TASK_CLASSIFICATION;
        else if(strcmp(arguments.task,"ranking") == 0)
            param.TASK = TASK_RANKING;
        else  assert(0 && "unknown task");

       printf ("TRAIN_FILE = %s\nTEST_FILE = %s\n"
               "VERBOSE = %s\nSILENT = %s\n",
               arguments.args[0], arguments.args[1],
               arguments.verbose ? "yes" : "no",
               arguments.silent ? "yes" : "no");

       printf ("task=%s", arguments.task);
       printf (", init-var=%f", param.init_sigma);
       printf (", n-iter=%i", param.n_iter);
       if (param.TASK == TASK_RANKING)
            printf (", step-size=%f", param.stepsize);
       printf (", solver=%s", arguments.solver);
       printf (", k=%i", param.k);

       // default if no l2_reg_w specified
        param.init_lambda_w = arguments.l2_reg;
        param.init_lambda_V = arguments.l2_reg;

        if (arguments.l2_reg_w != 0.0)
            param.init_lambda_w = arguments.l2_reg_w;
        if (arguments.l2_reg_V != 0.0)
            param.init_lambda_V = arguments.l2_reg_V;

       if (strcmp(arguments.solver,"mcmc") != 0)
       {
            printf (", l2-reg-w=%f", param.init_lambda_w);
            if (arguments.k > 0)
                printf (", l2-reg-V=%f", param.init_lambda_V);
            printf("\n");
       }


        printf("\nload data\n");
        fm_data train_data = read_svm_light_file(arguments.args[0]);
        fm_data test_data = read_svm_light_file(arguments.args[1]);

        int n_features = train_data.X->n;
        ffm_vector *y_test_predict = ffm_vector_calloc(test_data.y->size);
        ffm_coef *coef =  alloc_fm_coef(n_features, arguments.k, false);


        printf("fit model\n");
    if (param.TASK == TASK_RANKING)
        {
            assert(arguments.train_pairs != NULL && "Ranking requires the option '--train-pairs'");
            ffm_matrix * train_pairs = ffm_matrix_from_file(arguments.train_pairs);
            cs *X_t = cs_transpose (train_data.X, 1);
            cs_spfree(train_data.X);
            train_data.X = X_t;
            ffm_fit_sgd_bpr(coef, train_data.X, train_pairs, param);
           //printf("c%", arguments.train_pairs);
        }
        else
            sparse_fit(coef, train_data.X, test_data.X, train_data.y, y_test_predict,
                param);

        // the predictions are calculated during the training phase for mcmc
        if (param.SOLVER != SOLVER_MCMC){
            sparse_predict(coef, test_data.X, y_test_predict);
            if (param.TASK == TASK_CLASSIFICATION)
                ffm_vector_normal_cdf(y_test_predict);
        }

        // save predictions
        if (arguments.test_predict_file){
            FILE *f = fopen(arguments.test_predict_file, "w");
            for (int i=0; i< y_test_predict->size; i++)
                fprintf(f, "%f\n", y_test_predict->data[i]);
            fclose(f);
        }

        if (param.TASK == TASK_REGRESSION)
            printf("\nr2 score: %f \n", ffm_r2_score(test_data.y, y_test_predict));
        if (param.TASK == TASK_CLASSIFICATION)
            printf("\nacc score: %f \n", ffm_vector_accuracy(test_data.y, y_test_predict));

        /*
        printf("calculate kendall tau\n");
        if (param.TASK == TASK_RANKING)
        {
            ffm_vector * true_order = ffm_vector_get_order(test_data.y);
            ffm_vector * pred_order = ffm_vector_get_order(y_test_predict);
            double kendall_tau = \
                    ffm_vector_kendall_tau(true_order, pred_order);
            printf("\nkendall tau: %f \n", kendall_tau);
        }
        */


       exit (0);
}