Beispiel #1
0
int pc_compute_fprobf_1(void) {
	int prmode;

	prmode = bpx_get_integer(bpx_get_call_arg(1,1));

	failure_root_index = -1;

	initialize_weights();

	/* [31 Mar 2008, by yuizumi]
	 * compute_outside_scaling_*() needs to be called because
	 * eg_ptr->outside computed by compute_expectation_scaling_*()
	 * is different from the outside probability.
	 */
	if (log_scale) {
		RET_ON_ERR(compute_feature_scaling_log_exp());
		if (prmode != 1) {
			RET_ON_ERR(compute_expectation_scaling_log_exp());
			RET_ON_ERR(compute_outside_scaling_log_exp());
		}
	} else {
		RET_ON_ERR(compute_feature_scaling_none());
		if (prmode != 1) {
			RET_ON_ERR(compute_expectation_scaling_none());
			RET_ON_ERR(compute_outside_scaling_none());
		}
	}

	return BP_TRUE;
}
Beispiel #2
0
int main(int argc, char **argv)
{
    int     i;
    int     errors;
    DataSet *train_set;
    DataSet *test_set;
    Network *adultnn = create_network(14);
    double  e;
    double  acc;
    Class   predicted, desired;

    // training
    train_set = read_dataset("adult.train");

    if (train_set == NULL) {
        fprintf(stderr, "Error reading training set\n");
        exit(1);
    }

    add_layer(adultnn, 28);  // hidden layer
    add_layer(adultnn, 2);  // output layer
    initialize_weights(adultnn, SEED);
    print_network_structure(adultnn);

    printf("Training network with %d epochs...\n", EPOCHS);
    e = batch_train(adultnn, train_set, LEARNING_RATE, EPOCHS,
                   sigmoid, dsigmoid);
    printf("Training finished, approximate final SSE: %f\n", e);

    print_network_structure(adultnn);

    // testing
    test_set = read_dataset("adult.test");

    if (test_set == NULL) {
        fprintf(stderr, "Error reading test set\n");
        exit(1);
    }

    errors = 0;
    printf("Testing with %d cases...\n", test_set->n_cases);
    for (i = 0; i < test_set->n_cases; ++i) {
        predicted = predict_class(adultnn, test_set->input[i]);
        desired = output_to_class(test_set->output[i]);
        if (predicted != desired)
            ++errors;
        printf("Case %d | predicted: %s, desired: %s, outputs: %4.3f %4.3f \n", i,
               class_to_string(predicted), class_to_string(desired),
               adultnn->output_layer->y[0], adultnn->output_layer->y[1]);
    }

    acc = 100.0 - (100.0 * errors / test_set->n_cases);
    printf("Testing accuracy: %f\n", acc);
    printf("Total classificarion errors: %d\n", errors);

    destroy_dataset(train_set);
    destroy_dataset(test_set);

    return 0;
}
Beispiel #3
0
int pc_compute_feature_2(void) {
	int gid;
	double prob;
	EG_NODE_PTR eg_ptr;

	gid = bpx_get_integer(bpx_get_call_arg(1,2));

	initialize_egraph_index();
	alloc_sorted_egraph(1);
	RET_ON_ERR(sort_one_egraph(gid, 0, 1));

	if (verb_graph) {
		print_egraph(0, PRINT_NEUTRAL);
	}

	eg_ptr = expl_graph[gid];

	initialize_weights();

	if (log_scale) {
		RET_ON_ERR(compute_feature_scaling_log_exp());
		prob = eg_ptr->inside;
	} else {
		RET_ON_ERR(compute_feature_scaling_none());
		prob = eg_ptr->inside;
	}

	return bpx_unify(bpx_get_call_arg(2,2), bpx_build_float(prob));
}
Beispiel #4
0
void LinearLayer::setup() {
  assert(has_bottom_layer());
  
  initialize_weights();
  
  initialize_bias();
  
  m_batch_average_vector = new DataCPU(1, get_bottom_layer()->get_output()->get_num_samples());
  SetConst(1).execute(m_batch_average_vector);
  
  m_output = new DataCPU(get_bottom_layer()->get_output()->get_num_samples(),
                         1,
                         1,
                         m_num_hidden);
  
  //provide the error to the layer below
  m_backprop_error = new DataCPU(get_bottom_layer()->get_output()->get_num_samples(),
                                 1,
                                 1,
                                 get_bottom_layer()->get_output()->get_count_per_sample());
}
Beispiel #5
0
/* main loop */
static int run_grd(CRF_ENG_PTR crf_ptr) {
	int r,iterate,old_valid,converged,conv_time,saved = 0;
	double likelihood,old_likelihood = 0.0;
	double crf_max_iterate = 0.0;
	double tmp_epsilon,alpha0,gf_sd,old_gf_sd = 0.0;

	config_crf(crf_ptr);

	initialize_weights();

	if (crf_learn_mode == 1) {
		initialize_LBFGS();
		printf("L-BFGS mode\n");
	}

	if (crf_learning_rate==1) {
		printf("learning rate:annealing\n");
	} else if (crf_learning_rate==2) {
		printf("learning rate:backtrack\n");
	} else if (crf_learning_rate==3) {
		printf("learning rate:golden section\n");
	}

	if (max_iterate == -1) {
		crf_max_iterate = DEFAULT_MAX_ITERATE;
	} else if (max_iterate >= +1) {
		crf_max_iterate = max_iterate;
	}

	for (r = 0; r < num_restart; r++) {
		SHOW_PROGRESS_HEAD("#crf-iters", r);

		initialize_crf_count();
		initialize_lambdas();
		initialize_visited_flags();

		old_valid = 0;
		iterate = 0;
		tmp_epsilon = crf_epsilon;

		LBFGS_index = 0;
		conv_time = 0;

		while (1) {
			if (CTRLC_PRESSED) {
				SHOW_PROGRESS_INTR();
				RET_ERR(err_ctrl_c_pressed);
			}

			RET_ON_ERR(crf_ptr->compute_feature());

			crf_ptr->compute_crf_probs();

			likelihood = crf_ptr->compute_likelihood();

			if (verb_em) {
				prism_printf("Iteration #%d:\tlog_likelihood=%.9f\n", iterate, likelihood);
			}

			if (debug_level) {
				prism_printf("After I-step[%d]:\n", iterate);
				prism_printf("likelihood = %.9f\n", likelihood);
				print_egraph(debug_level, PRINT_EM);
			}

			if (!isfinite(likelihood)) {
				emit_internal_error("invalid log likelihood: %s (at iteration #%d)",
				                    isnan(likelihood) ? "NaN" : "infinity", iterate);
				RET_ERR(ierr_invalid_likelihood);
			}
			/*        if (old_valid && old_likelihood - likelihood > prism_epsilon) {
					  emit_error("log likelihood decreased [old: %.9f, new: %.9f] (at iteration #%d)",
					  old_likelihood, likelihood, iterate);
					  RET_ERR(err_invalid_likelihood);
					  }*/
			if (likelihood > 0.0) {
				emit_error("log likelihood greater than zero [value: %.9f] (at iteration #%d)",
				           likelihood, iterate);
				RET_ERR(err_invalid_likelihood);
			}

			if (crf_learn_mode == 1 && iterate > 0) restore_old_gradient();

			RET_ON_ERR(crf_ptr->compute_gradient());

			if (crf_learn_mode == 1 && iterate > 0) {
				compute_LBFGS_y_rho();
				compute_hessian(iterate);
			} else if (crf_learn_mode == 1 && iterate == 0) {
				initialize_LBFGS_q();
			}

			converged = (old_valid && fabs(likelihood - old_likelihood) <= prism_epsilon);

			if (converged || REACHED_MAX_ITERATE(iterate)) {
				break;
			}

			old_likelihood = likelihood;
			old_valid = 1;

			if (debug_level) {
				prism_printf("After O-step[%d]:\n", iterate);
				print_egraph(debug_level, PRINT_EM);
			}

			SHOW_PROGRESS(iterate);

			if (crf_learning_rate == 1) { // annealing
				tmp_epsilon = (annealing_weight / (annealing_weight + iterate)) * crf_epsilon;
			} else if (crf_learning_rate == 2) { // line-search(backtrack)
				if (crf_learn_mode == 1) {
					gf_sd = compute_gf_sd_LBFGS();
				} else {
					gf_sd = compute_gf_sd();
				}
				if (iterate==0) {
					alpha0 = 1;
				} else {
					alpha0 = tmp_epsilon * old_gf_sd / gf_sd;
				}
				if (crf_learn_mode == 1) {
					tmp_epsilon = line_search_LBFGS(crf_ptr,alpha0,crf_ls_rho,crf_ls_c1,likelihood,gf_sd);
				} else {
					tmp_epsilon = line_search(crf_ptr,alpha0,crf_ls_rho,crf_ls_c1,likelihood,gf_sd);
				}

				if (tmp_epsilon < EPS) {
					emit_error("invalid alpha in line search(=0.0) (at iteration #%d)",iterate);
					RET_ERR(err_line_search);
				}
				old_gf_sd = gf_sd;
			} else if (crf_learning_rate == 3) { // line-search(golden section)
				if (crf_learn_mode == 1) {
					tmp_epsilon = golden_section_LBFGS(crf_ptr,0,crf_golden_b);
				} else {
					tmp_epsilon = golden_section(crf_ptr,0,crf_golden_b);
				}
			}
			crf_ptr->update_lambdas(tmp_epsilon);

			iterate++;
		}

		SHOW_PROGRESS_TAIL(converged, iterate, likelihood);

		if (r == 0 || likelihood > crf_ptr->likelihood) {
			crf_ptr->likelihood = likelihood;
			crf_ptr->iterate    = iterate;

			saved = (r < num_restart - 1);
			if (saved) {
				save_params();
			}
		}
	}

	if (crf_learn_mode == 1) clean_LBFGS();
	INIT_VISITED_FLAGS;
	return BP_TRUE;
}