Exemplo n.º 1
0
static int lbfgs_progress(
    void *instance,
    const lbfgsfloatval_t *x,
    const lbfgsfloatval_t *g,
    const lbfgsfloatval_t fx,
    const lbfgsfloatval_t xnorm,
    const lbfgsfloatval_t gnorm,
    const lbfgsfloatval_t step,
    int n,
    int k,
    int ls)
{
    int i, num_active_features = 0;
    clock_t duration, clk = clock();
    lbfgs_internal_t *lbfgsi = (lbfgs_internal_t*)instance;
    dataset_t *testset = lbfgsi->testset;
    encoder_t *gm = lbfgsi->gm;
    logging_t *lg = lbfgsi->lg;

    /* Compute the duration required for this iteration. */
    duration = clk - lbfgsi->begin;
    lbfgsi->begin = clk;

    /* Store the feature weight in case L-BFGS terminates with an error. */
    for (i = 0;i < n;++i) {
        lbfgsi->best_w[i] = x[i];
        if (x[i] != 0.) ++num_active_features;
    }

    /* Report the progress. */
    logging(lg, "***** Iteration #%d *****\n", k);
    logging(lg, "Loss: %f\n", fx);
    logging(lg, "Feature norm: %f\n", xnorm);
    logging(lg, "Error norm: %f\n", gnorm);
    logging(lg, "Active features: %d\n", num_active_features);
    logging(lg, "Line search trials: %d\n", ls);
    logging(lg, "Line search step: %f\n", step);
    logging(lg, "Seconds required for this iteration: %.3f\n", duration / (double)CLOCKS_PER_SEC);

    /* Send the tagger with the current parameters. */
    if (testset != NULL) {
        holdout_evaluation(gm, testset, x, lg);
    }

    logging(lg, "\n");

    /* Continue. */
    return 0;
}
Exemplo n.º 2
0
static int l2sgd(
    encoder_t *gm,
    dataset_t *trainset,
    dataset_t *testset,
    floatval_t *w,
    logging_t *lg,
    const int N,
    const floatval_t t0,
    const floatval_t lambda,
    const int num_epochs,
    int calibration,
    int period,
    const floatval_t epsilon,
    floatval_t *ptr_loss
    )
{
    int i, epoch, ret = 0;
    floatval_t t = 0;
    floatval_t loss = 0, sum_loss = 0;
    floatval_t best_sum_loss = DBL_MAX;
    floatval_t eta, gain, decay = 1.;
    floatval_t improvement = 0.;
    floatval_t norm2 = 0.;
    floatval_t *pf = NULL;
    floatval_t *best_w = NULL;
    clock_t clk_prev, clk_begin = clock();
    const int K = gm->num_features;

    if (!calibration) {
        pf = (floatval_t*)malloc(sizeof(floatval_t) * period);
        best_w = (floatval_t*)calloc(K, sizeof(floatval_t));
        if (pf == NULL || best_w == NULL) {
            ret = CRFSUITEERR_OUTOFMEMORY;
            goto error_exit;
        }
    }

    /* Initialize the feature weights. */
    vecset(w, 0, K);

    /* Loop for epochs. */
    for (epoch = 1;epoch <= num_epochs;++epoch) {
        clk_prev = clock();

        if (!calibration) {
            logging(lg, "***** Epoch #%d *****\n", epoch);
            /* Shuffle the training instances. */
            dataset_shuffle(trainset);
        }

        /* Loop for instances. */
        sum_loss = 0.;
        for (i = 0;i < N;++i) {
            const crfsuite_instance_t *inst = dataset_get(trainset, i);

            /* Update various factors. */
            eta = 1 / (lambda * (t0 + t));
            decay *= (1.0 - eta * lambda);
            gain = eta / decay;

            /* Compute the loss and gradients for the instance. */
            gm->set_weights(gm, w, decay);
            gm->set_instance(gm, inst);
            gm->objective_and_gradients(gm, &loss, w, gain);

            sum_loss += loss;
            ++t;
        }

        /* Terminate when the loss is abnormal (NaN, -Inf, +Inf). */
        if (!isfinite(loss)) {
            logging(lg, "ERROR: overflow loss\n");
            ret = CRFSUITEERR_OVERFLOW;
            sum_loss = loss;
            goto error_exit;
        }

        /* Scale the feature weights. */
        vecscale(w, decay, K);
        decay = 1.;

        /* Include the L2 norm of feature weights to the objective. */
        /* The factor N is necessary because lambda = 2 * C / N. */
        norm2 = vecdot(w, w, K);
        sum_loss += 0.5 * lambda * norm2 * N;

        /* One epoch finished. */
        if (!calibration) {
            /* Check if the current epoch is the best. */
            if (sum_loss < best_sum_loss) {
                /* Store the feature weights to best_w. */
                best_sum_loss = sum_loss;
                veccopy(best_w, w, K);
            }

            /* We don't test the stopping criterion while period < epoch. */
            if (period < epoch) {
                improvement = (pf[(epoch-1) % period] - sum_loss) / sum_loss;
            } else {
                improvement = epsilon;
            }

            /* Store the current value of the objective function. */
            pf[(epoch-1) % period] = sum_loss;

            logging(lg, "Loss: %f\n", sum_loss);
            if (period < epoch) {
                logging(lg, "Improvement ratio: %f\n", improvement);
            }
            logging(lg, "Feature L2-norm: %f\n", sqrt(norm2));
            logging(lg, "Learning rate (eta): %f\n", eta);
            logging(lg, "Total number of feature updates: %.0f\n", t);
            logging(lg, "Seconds required for this iteration: %.3f\n", (clock() - clk_prev) / (double)CLOCKS_PER_SEC);

            /* Holdout evaluation if necessary. */
            if (testset != NULL) {
                holdout_evaluation(gm, testset, w, lg);
            }
            logging(lg, "\n");

            /* Check for the stopping criterion. */
            if (improvement < epsilon) {
                ret = 0;
                break;
            }
        }
    }

    /* Output the optimization result. */
    if (!calibration) {
        if (ret == 0) {
            if (epoch < num_epochs) {
                logging(lg, "SGD terminated with the stopping criteria\n");
            } else {
                logging(lg, "SGD terminated with the maximum number of iterations\n");
            }
        } else {
            logging(lg, "SGD terminated with error code (%d)\n", ret);
        }
    }

    /* Restore the best weights. */
    if (best_w != NULL) {
        sum_loss = best_sum_loss;
        veccopy(w, best_w, K);
    }

error_exit:
    free(best_w);
    free(pf);
    if (ptr_loss != NULL) {
        *ptr_loss = sum_loss;
    }
    return ret;
}