int pc_crf_prepare_4(void) { TERM p_fact_list; int size; p_fact_list = bpx_get_call_arg(1,4); size = bpx_get_integer(bpx_get_call_arg(2,4)); num_goals = bpx_get_integer(bpx_get_call_arg(3,4)); failure_root_index = bpx_get_integer(bpx_get_call_arg(4,4)); failure_observed = (failure_root_index != -1); if (failure_root_index != -1) { failure_subgoal_id = prism_goal_id_get(failure_atom); if (failure_subgoal_id == -1) { emit_internal_error("no subgoal ID allocated to `failure'"); RET_INTERNAL_ERR; } } initialize_egraph_index(); alloc_sorted_egraph(size); RET_ON_ERR(sort_crf_egraphs(p_fact_list)); #ifndef MPI if (verb_graph) { print_egraph(0, PRINT_NEUTRAL); } #endif /* !(MPI) */ alloc_occ_switches(); alloc_num_sw_vals(); return BP_TRUE; }
/* We check if all smoothing constants are positive (MAP), * or all smoothing constants are zero. If some are positive, * but the others are zero, die immediately. We also check * if there exist parameters fixed at zero in MAP estimation. */ int check_smooth(int *smooth) { /* q = +4 : found non-zero smoothing constants +2 : found zero-valued smoothing constants +1 : found parameters fixed to zero */ int i, q = 0; SW_INS_PTR sw_ins_ptr; for (i = 0; i < occ_switch_tab_size; i++) { sw_ins_ptr = occ_switches[i]; while (sw_ins_ptr != NULL) { if (sw_ins_ptr->smooth_prolog < 0) { emit_error("negative delta values in MAP estimation"); RET_ERR(err_invalid_numeric_value); } q |= (sw_ins_ptr->smooth_prolog < TINY_PROB) ? 2 : 4; q |= (sw_ins_ptr->fixed && sw_ins_ptr->inside < TINY_PROB) ? 1 : 0; sw_ins_ptr = sw_ins_ptr->next; } } switch (q) { case 0: /* p.counts = (none), w/o 0-valued params */ case 1: /* p.counts = (none), with 0-valued params */ emit_internal_error("unexpected case in check_smooth()"); RET_ERR(ierr_unmatched_branches); break; case 2: /* p.counts = 0 only, w/o 0-valued params */ case 3: /* p.counts = 0 only, with 0-valued params */ *smooth = 0; break; case 4: /* p.counts = + only, w/o 0-valued params */ *smooth = 1; break; case 5: /* p.counts = + only, with 0-valued params */ emit_error("parameters fixed to zero in MAP estimation"); RET_ERR(err_invalid_numeric_value); break; case 6: /* p.counts = (both), w/o 0-valued params */ case 7: /* p.counts = (both), with 0-valued params */ emit_error("mixture of zero and non-zero pseudo counts"); RET_ERR(err_invalid_numeric_value); } transfer_hyperparams_prolog(); return BP_TRUE; }
/* Just check if there is any negative hyperparameter */ int check_smooth_vb(void) { int i; SW_INS_PTR sw_ins_ptr; for (i = 0; i < occ_switch_tab_size; i++) { sw_ins_ptr = occ_switches[i]; while (sw_ins_ptr != NULL) { if (sw_ins_ptr->smooth_prolog <= -1.0) { emit_internal_error("illegal hyperparameters"); RET_INTERNAL_ERR; } sw_ins_ptr = sw_ins_ptr->next; } } transfer_hyperparams_prolog(); return BP_TRUE; }
/* * Be warned that eg_ptr->outside will have a value different from that * in the compute_expectation-family functions. */ int compute_outside_scaling_none(void) { int i,k; EG_PATH_PTR path_ptr; EG_NODE_PTR eg_ptr,node_ptr; double q; if (num_roots != 1) { emit_internal_error("illegal call to compute_outside"); RET_ERR(build_internal_error("no_observed_data")); } for (i = 0; i < sorted_egraph_size; i++) { sorted_expl_graph[i]->outside = 0.0; } eg_ptr = expl_graph[roots[0]->id]; eg_ptr->outside = roots[0]->count; for (i = (sorted_egraph_size - 1); i >= 0; i--) { eg_ptr = sorted_expl_graph[i]; path_ptr = eg_ptr->path_ptr; while (path_ptr != NULL) { q = eg_ptr->outside * path_ptr->inside; if (q > 0.0) { for (k = 0; k < path_ptr->children_len; k++) { node_ptr = path_ptr->children[k]; node_ptr->outside += q / node_ptr->inside; } } path_ptr = path_ptr->next; } } return BP_TRUE; }
/* main loop */ static int run_grd(CRF_ENG_PTR crf_ptr) { int r,iterate,old_valid,converged,conv_time,saved = 0; double likelihood,old_likelihood = 0.0; double crf_max_iterate = 0.0; double tmp_epsilon,alpha0,gf_sd,old_gf_sd = 0.0; config_crf(crf_ptr); initialize_weights(); if (crf_learn_mode == 1) { initialize_LBFGS(); printf("L-BFGS mode\n"); } if (crf_learning_rate==1) { printf("learning rate:annealing\n"); } else if (crf_learning_rate==2) { printf("learning rate:backtrack\n"); } else if (crf_learning_rate==3) { printf("learning rate:golden section\n"); } if (max_iterate == -1) { crf_max_iterate = DEFAULT_MAX_ITERATE; } else if (max_iterate >= +1) { crf_max_iterate = max_iterate; } for (r = 0; r < num_restart; r++) { SHOW_PROGRESS_HEAD("#crf-iters", r); initialize_crf_count(); initialize_lambdas(); initialize_visited_flags(); old_valid = 0; iterate = 0; tmp_epsilon = crf_epsilon; LBFGS_index = 0; conv_time = 0; while (1) { if (CTRLC_PRESSED) { SHOW_PROGRESS_INTR(); RET_ERR(err_ctrl_c_pressed); } RET_ON_ERR(crf_ptr->compute_feature()); crf_ptr->compute_crf_probs(); likelihood = crf_ptr->compute_likelihood(); if (verb_em) { prism_printf("Iteration #%d:\tlog_likelihood=%.9f\n", iterate, likelihood); } if (debug_level) { prism_printf("After I-step[%d]:\n", iterate); prism_printf("likelihood = %.9f\n", likelihood); print_egraph(debug_level, PRINT_EM); } if (!isfinite(likelihood)) { emit_internal_error("invalid log likelihood: %s (at iteration #%d)", isnan(likelihood) ? "NaN" : "infinity", iterate); RET_ERR(ierr_invalid_likelihood); } /* if (old_valid && old_likelihood - likelihood > prism_epsilon) { emit_error("log likelihood decreased [old: %.9f, new: %.9f] (at iteration #%d)", old_likelihood, likelihood, iterate); RET_ERR(err_invalid_likelihood); }*/ if (likelihood > 0.0) { emit_error("log likelihood greater than zero [value: %.9f] (at iteration #%d)", likelihood, iterate); RET_ERR(err_invalid_likelihood); } if (crf_learn_mode == 1 && iterate > 0) restore_old_gradient(); RET_ON_ERR(crf_ptr->compute_gradient()); if (crf_learn_mode == 1 && iterate > 0) { compute_LBFGS_y_rho(); compute_hessian(iterate); } else if (crf_learn_mode == 1 && iterate == 0) { initialize_LBFGS_q(); } converged = (old_valid && fabs(likelihood - old_likelihood) <= prism_epsilon); if (converged || REACHED_MAX_ITERATE(iterate)) { break; } old_likelihood = likelihood; old_valid = 1; if (debug_level) { prism_printf("After O-step[%d]:\n", iterate); print_egraph(debug_level, PRINT_EM); } SHOW_PROGRESS(iterate); if (crf_learning_rate == 1) { // annealing tmp_epsilon = (annealing_weight / (annealing_weight + iterate)) * crf_epsilon; } else if (crf_learning_rate == 2) { // line-search(backtrack) if (crf_learn_mode == 1) { gf_sd = compute_gf_sd_LBFGS(); } else { gf_sd = compute_gf_sd(); } if (iterate==0) { alpha0 = 1; } else { alpha0 = tmp_epsilon * old_gf_sd / gf_sd; } if (crf_learn_mode == 1) { tmp_epsilon = line_search_LBFGS(crf_ptr,alpha0,crf_ls_rho,crf_ls_c1,likelihood,gf_sd); } else { tmp_epsilon = line_search(crf_ptr,alpha0,crf_ls_rho,crf_ls_c1,likelihood,gf_sd); } if (tmp_epsilon < EPS) { emit_error("invalid alpha in line search(=0.0) (at iteration #%d)",iterate); RET_ERR(err_line_search); } old_gf_sd = gf_sd; } else if (crf_learning_rate == 3) { // line-search(golden section) if (crf_learn_mode == 1) { tmp_epsilon = golden_section_LBFGS(crf_ptr,0,crf_golden_b); } else { tmp_epsilon = golden_section(crf_ptr,0,crf_golden_b); } } crf_ptr->update_lambdas(tmp_epsilon); iterate++; } SHOW_PROGRESS_TAIL(converged, iterate, likelihood); if (r == 0 || likelihood > crf_ptr->likelihood) { crf_ptr->likelihood = likelihood; crf_ptr->iterate = iterate; saved = (r < num_restart - 1); if (saved) { save_params(); } } } if (crf_learn_mode == 1) clean_LBFGS(); INIT_VISITED_FLAGS; return BP_TRUE; }
static int compute_gradient_scaling_log_exp(void) { int i,k; EG_PATH_PTR path_ptr; EG_NODE_PTR eg_ptr,node_ptr; SW_INS_PTR sw_ptr; double q,r; for (i = 0; i < sw_ins_tab_size; i++) { switch_instances[i]->total_expect = 0.0; switch_instances[i]->has_first_expectation = 0; switch_instances[i]->first_expectation = 0.0; } for (i = 0; i < sorted_egraph_size; i++) { sorted_expl_graph[i]->outside = 0.0; sorted_expl_graph[i]->has_first_outside = 0; sorted_expl_graph[i]->first_outside = 0.0; } for (i = 0; i < num_roots; i++) { if (roots[i]->pid == -1) { eg_ptr = expl_graph[roots[i]->id]; if (i == failure_root_index) { eg_ptr->first_outside = log(num_goals / (1.0 - exp(inside_failure))); } else { eg_ptr->first_outside = log((double)(roots[i]->sgd_count)) - eg_ptr->inside; } eg_ptr->has_first_outside = 1; eg_ptr->outside = 1.0; } } /* sorted_expl_graph[to] must be a root node */ for (i = sorted_egraph_size - 1; i >= 0; i--) { eg_ptr = sorted_expl_graph[i]; if (eg_ptr->visited == 0) continue; /* First accumulate log-scale outside probabilities: */ if (!eg_ptr->has_first_outside) { emit_internal_error("unexpected has_first_outside[%s]", prism_goal_string(eg_ptr->id)); RET_INTERNAL_ERR; } else if (!(eg_ptr->outside > 0.0)) { emit_internal_error("unexpected outside[%s]", prism_goal_string(eg_ptr->id)); RET_INTERNAL_ERR; } else { eg_ptr->outside = eg_ptr->first_outside + log(eg_ptr->outside); } path_ptr = sorted_expl_graph[i]->path_ptr; while (path_ptr != NULL) { q = sorted_expl_graph[i]->outside + path_ptr->inside; for (k = 0; k < path_ptr->children_len; k++) { node_ptr = path_ptr->children[k]; r = q - node_ptr->inside; if (!node_ptr->has_first_outside) { node_ptr->first_outside = r; node_ptr->outside += 1.0; node_ptr->has_first_outside = 1; } else if (r - node_ptr->first_outside >= log(HUGE_PROB)) { node_ptr->outside *= exp(node_ptr->first_outside - r); node_ptr->first_outside = r; node_ptr->outside += 1.0; } else { node_ptr->outside += exp(r - node_ptr->first_outside); } } for (k = 0; k < path_ptr->sws_len; k++) { sw_ptr = path_ptr->sws[k]; if (!sw_ptr->has_first_expectation) { sw_ptr->first_expectation = q; sw_ptr->total_expect += 1.0; sw_ptr->has_first_expectation = 1; } else if (q - sw_ptr->first_expectation >= log(HUGE_PROB)) { sw_ptr->total_expect *= exp(sw_ptr->first_expectation - q); sw_ptr->first_expectation = q; sw_ptr->total_expect += 1.0; } else { sw_ptr->total_expect += exp(q - sw_ptr->first_expectation); } } path_ptr = path_ptr->next; } } /* unscale total_expect */ for (i = 0; i < sw_ins_tab_size; i++) { sw_ptr = switch_instances[i]; if (!sw_ptr->has_first_expectation) continue; if (!(sw_ptr->total_expect > 0.0)) { emit_error("unexpected expectation for %s",prism_sw_ins_string(i)); RET_ERR(err_invalid_numeric_value); } sw_ptr->total_expect = exp(sw_ptr->first_expectation + log(sw_ptr->total_expect)); } for (i=0; i<occ_switch_tab_size; i++) { sw_ptr = occ_switches[i]; while (sw_ptr!=NULL) { sw_ptr->gradient = (sw_ptr->count - sw_ptr->total_expect) * sw_ptr->inside_h; if (crf_penalty != 0.0) { sw_ptr->gradient -= sw_ptr->inside * crf_penalty; } sw_ptr = sw_ptr->next; } } return BP_TRUE; }
int compute_outside_scaling_log_exp(void) { int i,k; EG_PATH_PTR path_ptr; EG_NODE_PTR eg_ptr,node_ptr; double q,r; if (num_roots != 1) { emit_internal_error("illegal call to compute_outside"); RET_ERR(build_internal_error("no_observed_data")); } for (i = 0; i < sorted_egraph_size; i++) { sorted_expl_graph[i]->outside = 0.0; sorted_expl_graph[i]->has_first_outside = 0; sorted_expl_graph[i]->first_outside = 0.0; } eg_ptr = expl_graph[roots[0]->id]; eg_ptr->outside = 1.0; eg_ptr->has_first_outside = 1; eg_ptr->first_outside = log((double)(roots[0]->count)); /* sorted_expl_graph[to] must be a root node */ for (i = sorted_egraph_size - 1; i >= 0; i--) { eg_ptr = sorted_expl_graph[i]; /* First accumulate log-scale outside probabilities: */ if (!eg_ptr->has_first_outside) { emit_internal_error("unexpected has_first_outside[%s]",prism_goal_string(eg_ptr->id)); RET_INTERNAL_ERR; } else if (!(eg_ptr->outside > 0.0)) { emit_internal_error("unexpected outside[%s]", prism_goal_string(eg_ptr->id)); RET_INTERNAL_ERR; } else { eg_ptr->outside = eg_ptr->first_outside + log(eg_ptr->outside); } path_ptr = sorted_expl_graph[i]->path_ptr; while (path_ptr != NULL) { q = sorted_expl_graph[i]->outside + path_ptr->inside; for (k = 0; k < path_ptr->children_len; k++) { node_ptr = path_ptr->children[k]; r = q - node_ptr->inside; if (!node_ptr->has_first_outside) { node_ptr->first_outside = r; node_ptr->outside += 1.0; node_ptr->has_first_outside = 1; } else if (r - node_ptr->first_outside >= log(HUGE_PROB)) { node_ptr->outside *= exp(node_ptr->first_outside - r); node_ptr->first_outside = r; node_ptr->outside += 1.0; } else { node_ptr->outside += exp(r - node_ptr->first_outside); } } path_ptr = path_ptr->next; } } return BP_TRUE; }
int run_em(EM_ENG_PTR em_ptr) { int r, iterate, old_valid, converged, saved = 0; double likelihood, log_prior; double lambda, old_lambda = 0.0; config_em(em_ptr); for (r = 0; r < num_restart; r++) { SHOW_PROGRESS_HEAD("#em-iters", r); initialize_params(); itemp = daem ? itemp_init : 1.0; iterate = 0; /* [21 Aug 2007, by yuizumi] * while-loop for inversed temperature (DAEM). Note that this * loop is evaluated only once for EM without annealing, since * itemp initially set to 1.0 by the code above. */ while (1) { if (daem) { SHOW_PROGRESS_TEMP(itemp); } old_valid = 0; while (1) { if (CTRLC_PRESSED) { SHOW_PROGRESS_INTR(); RET_ERR(err_ctrl_c_pressed); } RET_ON_ERR(em_ptr->compute_inside()); RET_ON_ERR(em_ptr->examine_inside()); likelihood = em_ptr->compute_likelihood(); log_prior = em_ptr->smooth ? em_ptr->compute_log_prior() : 0.0; lambda = likelihood + log_prior; if (verb_em) { if (em_ptr->smooth) { prism_printf("Iteration #%d:\tlog_likelihood=%.9f\tlog_prior=%.9f\tlog_post=%.9f\n", iterate, likelihood, log_prior, lambda); } else { prism_printf("Iteration #%d:\tlog_likelihood=%.9f\n", iterate, likelihood); } } if (debug_level) { prism_printf("After I-step[%d]:\n", iterate); prism_printf("likelihood = %.9f\n", likelihood); print_egraph(debug_level, PRINT_EM); } if (!isfinite(lambda)) { emit_internal_error("invalid log likelihood or log post: %s (at iteration #%d)", isnan(lambda) ? "NaN" : "infinity", iterate); RET_ERR(ierr_invalid_likelihood); } if (old_valid && old_lambda - lambda > prism_epsilon) { emit_error("log likelihood or log post decreased [old: %.9f, new: %.9f] (at iteration #%d)", old_lambda, lambda, iterate); RET_ERR(err_invalid_likelihood); } if (itemp == 1.0 && likelihood > 0.0) { emit_error("log likelihood greater than zero [value: %.9f] (at iteration #%d)", likelihood, iterate); RET_ERR(err_invalid_likelihood); } converged = (old_valid && lambda - old_lambda <= prism_epsilon); if (converged || REACHED_MAX_ITERATE(iterate)) { break; } old_lambda = lambda; old_valid = 1; RET_ON_ERR(em_ptr->compute_expectation()); if (debug_level) { prism_printf("After O-step[%d]:\n", iterate); print_egraph(debug_level, PRINT_EM); } SHOW_PROGRESS(iterate); RET_ON_ERR(em_ptr->update_params()); iterate++; } /* [21 Aug 2007, by yuizumi] * Note that 1.0 can be represented exactly in IEEE 754. */ if (itemp == 1.0) { break; } itemp *= itemp_rate; if (itemp >= 1.0) { itemp = 1.0; } } SHOW_PROGRESS_TAIL(converged, iterate, lambda); if (r == 0 || lambda > em_ptr->lambda) { em_ptr->lambda = lambda; em_ptr->likelihood = likelihood; em_ptr->iterate = iterate; saved = (r < num_restart - 1); if (saved) { save_params(); } } } if (saved) { restore_params(); } em_ptr->bic = compute_bic(em_ptr->likelihood); em_ptr->cs = em_ptr->smooth ? compute_cs(em_ptr->likelihood) : 0.0; return BP_TRUE; }
int mpm_run_em(EM_ENG_PTR emptr) { int r, iterate, old_valid, converged, saved=0; double likelihood, log_prior; double lambda, old_lambda=0.0; config_em(emptr); for (r = 0; r < num_restart; r++) { SHOW_PROGRESS_HEAD("#em-iters", r); initialize_params(); mpm_bcast_inside(); clear_sw_msg_send(); itemp = daem ? itemp_init : 1.0; iterate = 0; while (1) { if (daem) { SHOW_PROGRESS_TEMP(itemp); } old_valid = 0; while (1) { if (CTRLC_PRESSED) { SHOW_PROGRESS_INTR(); RET_ERR(err_ctrl_c_pressed); } if (failure_observed) { inside_failure = mp_sum_value(0.0); } log_prior = emptr->smooth ? emptr->compute_log_prior() : 0.0; lambda = mp_sum_value(log_prior); likelihood = lambda - log_prior; mp_debug("local lambda = %.9f, lambda = %.9f", log_prior, lambda); if (verb_em) { if (emptr->smooth) { prism_printf("Iteration #%d:\tlog_likelihood=%.9f\tlog_prior=%.9f\tlog_post=%.9f\n", iterate, likelihood, log_prior, lambda); } else { prism_printf("Iteration #%d:\tlog_likelihood=%.9f\n", iterate, likelihood); } } if (!isfinite(lambda)) { emit_internal_error("invalid log likelihood or log post: %s (at iterateion #%d)", isnan(lambda) ? "NaN" : "infinity", iterate); RET_ERR(ierr_invalid_likelihood); } if (old_valid && old_lambda - lambda > prism_epsilon) { emit_error("log likelihood or log post decreased [old: %.9f, new: %.9f] (at iteration #%d)", old_lambda, lambda, iterate); RET_ERR(err_invalid_likelihood); } if (itemp == 1.0 && likelihood > 0.0) { emit_error("log likelihood greater than zero [value: %.9f] (at iteration #%d)", likelihood, iterate); RET_ERR(err_invalid_likelihood); } converged = (old_valid && lambda - old_lambda <= prism_epsilon); if (converged || REACHED_MAX_ITERATE(iterate)) { break; } old_lambda = lambda; old_valid = 1; mpm_share_expectation(); SHOW_PROGRESS(iterate); RET_ON_ERR(emptr->update_params()); iterate++; } if (itemp == 1.0) { break; } itemp *= itemp_rate; if (itemp >= 1.0) { itemp = 1.0; } } SHOW_PROGRESS_TAIL(converged, iterate, lambda); if (r == 0 || lambda > emptr->lambda) { emptr->lambda = lambda; emptr->likelihood = likelihood; emptr->iterate = iterate; saved = (r < num_restart - 1); if (saved) { save_params(); } } } if (saved) { restore_params(); } emptr->bic = compute_bic(emptr->likelihood); emptr->cs = emptr->smooth ? compute_cs(emptr->likelihood) : 0.0; return BP_TRUE; }