static int lbfgs_progress( void *instance, const lbfgsfloatval_t *x, const lbfgsfloatval_t *g, const lbfgsfloatval_t fx, const lbfgsfloatval_t xnorm, const lbfgsfloatval_t gnorm, const lbfgsfloatval_t step, int n, int k, int ls) { int i, num_active_features = 0; clock_t duration, clk = clock(); lbfgs_internal_t *lbfgsi = (lbfgs_internal_t*)instance; dataset_t *testset = lbfgsi->testset; encoder_t *gm = lbfgsi->gm; logging_t *lg = lbfgsi->lg; /* Compute the duration required for this iteration. */ duration = clk - lbfgsi->begin; lbfgsi->begin = clk; /* Store the feature weight in case L-BFGS terminates with an error. */ for (i = 0;i < n;++i) { lbfgsi->best_w[i] = x[i]; if (x[i] != 0.) ++num_active_features; } /* Report the progress. */ logging(lg, "***** Iteration #%d *****\n", k); logging(lg, "Loss: %f\n", fx); logging(lg, "Feature norm: %f\n", xnorm); logging(lg, "Error norm: %f\n", gnorm); logging(lg, "Active features: %d\n", num_active_features); logging(lg, "Line search trials: %d\n", ls); logging(lg, "Line search step: %f\n", step); logging(lg, "Seconds required for this iteration: %.3f\n", duration / (double)CLOCKS_PER_SEC); /* Send the tagger with the current parameters. */ if (testset != NULL) { holdout_evaluation(gm, testset, x, lg); } logging(lg, "\n"); /* Continue. */ return 0; }
static int l2sgd( encoder_t *gm, dataset_t *trainset, dataset_t *testset, floatval_t *w, logging_t *lg, const int N, const floatval_t t0, const floatval_t lambda, const int num_epochs, int calibration, int period, const floatval_t epsilon, floatval_t *ptr_loss ) { int i, epoch, ret = 0; floatval_t t = 0; floatval_t loss = 0, sum_loss = 0; floatval_t best_sum_loss = DBL_MAX; floatval_t eta, gain, decay = 1.; floatval_t improvement = 0.; floatval_t norm2 = 0.; floatval_t *pf = NULL; floatval_t *best_w = NULL; clock_t clk_prev, clk_begin = clock(); const int K = gm->num_features; if (!calibration) { pf = (floatval_t*)malloc(sizeof(floatval_t) * period); best_w = (floatval_t*)calloc(K, sizeof(floatval_t)); if (pf == NULL || best_w == NULL) { ret = CRFSUITEERR_OUTOFMEMORY; goto error_exit; } } /* Initialize the feature weights. */ vecset(w, 0, K); /* Loop for epochs. */ for (epoch = 1;epoch <= num_epochs;++epoch) { clk_prev = clock(); if (!calibration) { logging(lg, "***** Epoch #%d *****\n", epoch); /* Shuffle the training instances. */ dataset_shuffle(trainset); } /* Loop for instances. */ sum_loss = 0.; for (i = 0;i < N;++i) { const crfsuite_instance_t *inst = dataset_get(trainset, i); /* Update various factors. */ eta = 1 / (lambda * (t0 + t)); decay *= (1.0 - eta * lambda); gain = eta / decay; /* Compute the loss and gradients for the instance. */ gm->set_weights(gm, w, decay); gm->set_instance(gm, inst); gm->objective_and_gradients(gm, &loss, w, gain); sum_loss += loss; ++t; } /* Terminate when the loss is abnormal (NaN, -Inf, +Inf). */ if (!isfinite(loss)) { logging(lg, "ERROR: overflow loss\n"); ret = CRFSUITEERR_OVERFLOW; sum_loss = loss; goto error_exit; } /* Scale the feature weights. */ vecscale(w, decay, K); decay = 1.; /* Include the L2 norm of feature weights to the objective. */ /* The factor N is necessary because lambda = 2 * C / N. */ norm2 = vecdot(w, w, K); sum_loss += 0.5 * lambda * norm2 * N; /* One epoch finished. */ if (!calibration) { /* Check if the current epoch is the best. */ if (sum_loss < best_sum_loss) { /* Store the feature weights to best_w. */ best_sum_loss = sum_loss; veccopy(best_w, w, K); } /* We don't test the stopping criterion while period < epoch. */ if (period < epoch) { improvement = (pf[(epoch-1) % period] - sum_loss) / sum_loss; } else { improvement = epsilon; } /* Store the current value of the objective function. */ pf[(epoch-1) % period] = sum_loss; logging(lg, "Loss: %f\n", sum_loss); if (period < epoch) { logging(lg, "Improvement ratio: %f\n", improvement); } logging(lg, "Feature L2-norm: %f\n", sqrt(norm2)); logging(lg, "Learning rate (eta): %f\n", eta); logging(lg, "Total number of feature updates: %.0f\n", t); logging(lg, "Seconds required for this iteration: %.3f\n", (clock() - clk_prev) / (double)CLOCKS_PER_SEC); /* Holdout evaluation if necessary. */ if (testset != NULL) { holdout_evaluation(gm, testset, w, lg); } logging(lg, "\n"); /* Check for the stopping criterion. */ if (improvement < epsilon) { ret = 0; break; } } } /* Output the optimization result. */ if (!calibration) { if (ret == 0) { if (epoch < num_epochs) { logging(lg, "SGD terminated with the stopping criteria\n"); } else { logging(lg, "SGD terminated with the maximum number of iterations\n"); } } else { logging(lg, "SGD terminated with error code (%d)\n", ret); } } /* Restore the best weights. */ if (best_w != NULL) { sum_loss = best_sum_loss; veccopy(w, best_w, K); } error_exit: free(best_w); free(pf); if (ptr_loss != NULL) { *ptr_loss = sum_loss; } return ret; }