int segment(const char* str, std::vector<std::string> & words) { ltp::framework::ViterbiFeatureContext ctx; ltp::framework::ViterbiScoreMatrix scm; ltp::framework::ViterbiDecoder decoder; ltp::segmentor::Instance inst; int ret = preprocessor.preprocess(str, inst.raw_forms, inst.forms, inst.chartypes); if (-1 == ret || 0 == ret) { words.clear(); return 0; } ltp::segmentor::SegmentationConstrain con; con.regist(&(inst.chartypes)); build_lexicon_match_state(lexicons, &inst); extract_features(inst, model, &ctx, false); calculate_scores(inst, (*model), ctx, true, &scm); // allocate a new decoder so that the segmentor support multithreaded // decoding. this modification was committed by niuox decoder.decode(scm, con, inst.predict_tagsidx); build_words(inst.raw_forms, inst.predict_tagsidx, words); return words.size(); }
void separator_pool::renew_pool(MISConfig & config, graph_access & G, bool init, bool scores, population_mis & pop) { mis_log::instance()->print_pool_title(); mis_log::instance()->restart_building_pool_timer(); if (!init) { clear_partitions(); clear_separators(); clear_k_partitions(); clear_k_separators(); } generate_partitions(config, G); generate_separators(config, G); if (config.use_multiway_vc) generate_k_partitions(config, G); else generate_k_separators(config, G); if (scores) calculate_scores(config, G, pop); mis_log::instance()->print_separator(); }
void Postagger::evaluate(void) { const char * holdout_file = train_opt.holdout_file.c_str(); ifstream ifs(holdout_file); if (!ifs) { ERROR_LOG("Failed to open holdout file."); return; } PostaggerReader reader(ifs, true); Instance * inst = NULL; int num_recalled_tags = 0; int num_tags = 0; while ((inst = reader.next())) { int len = inst->size(); inst->tagsidx.resize(len); for (int i = 0; i < len; ++ i) { inst->tagsidx[i] = model->labels.index(inst->tags[i]); } extract_features(inst, false); calculate_scores(inst, true); decoder->decode(inst); num_recalled_tags += inst->num_corrected_predicted_tags(); num_tags += inst->size(); delete inst; } double p = (double)num_recalled_tags / num_tags; TRACE_LOG("P: %lf ( %d / %d )", p, num_recalled_tags, num_tags); return; }
void t_score_dialog::initialize() { // calculate the final scores calculate_scores( m_map, m_winner_team, m_scores ); m_layout = g_layout.get(); m_difficulty_icons = g_difficulty_icons.get(); ////////////////////////////////////////////////////////////////// // find dimensions & center t_screen_point origin(0,0); t_bitmap_layer const* layer = m_layout->find( "background" ); t_screen_rect rect = layer->get_rect(); set_bitmap( layer ); rect += t_screen_point( (get_parent()->get_width() - rect.width()) / 2, (get_parent()->get_height() - rect.height()) / 2 ); move( rect ); set_drop_shadow(); ////////////////////////////////////////////////////////////////// // add title t_text_window* window; layer = m_layout->find( "title" ); rect = layer->get_rect(); window = new t_text_window( get_font( rect.height() ), rect, this, k_score_title, t_pixel_24(0,0,0) ); window->set_center_horizontal(); window->set_center_vertical(); window->set_drop_shadow( true, t_pixel_24(200,200,200)); create_text(); create_icons(); create_buttons(); }
void Postagger::test(void) { const char * model_file = test_opt.model_file.c_str(); ifstream mfs(model_file, std::ifstream::binary); if (!mfs) { ERROR_LOG("Failed to load model"); return; } model = new Model; if (!model->load(mfs)) { ERROR_LOG("Failed to load model"); return; } TRACE_LOG("Number of labels [%d]", model->num_labels()); TRACE_LOG("Number of features [%d]", model->space.num_features()); TRACE_LOG("Number of dimension [%d]", model->space.dim()); const char * test_file = test_opt.test_file.c_str(); ifstream ifs(test_file); if (!ifs) { ERROR_LOG("Failed to open holdout file."); return; } decoder = new Decoder(model->num_labels()); PostaggerReader reader(ifs, true); PostaggerWriter writer(cout); Instance * inst = NULL; int num_recalled_tags = 0; int num_tags = 0; double before = get_time(); while ((inst = reader.next())) { int len = inst->size(); inst->tagsidx.resize(len); for (int i = 0; i < len; ++ i) { inst->tagsidx[i] = model->labels.index(inst->tags[i]); } extract_features(inst); calculate_scores(inst, true); decoder->decode(inst); build_labels(inst, inst->predicted_tags); writer.write(inst); num_recalled_tags += inst->num_corrected_predicted_tags(); num_tags += inst->size(); delete inst; } double after = get_time(); double p = (double)num_recalled_tags / num_tags; TRACE_LOG("P: %lf ( %d / %d )", p, num_recalled_tags, num_tags); TRACE_LOG("Eclipse time %lf", after - before); sleep(1000000); return; }
void Postagger::train(void) { const char * train_file = train_opt.train_file.c_str(); // read in training instance read_instance(train_file); TRACE_LOG("Read in [%d] instances.", train_dat.size()); model = new Model; // build tag dictionary, map string tag to index TRACE_LOG("Start build configuration"); build_configuration(); TRACE_LOG("Build configuration is done."); TRACE_LOG("Number of labels: [%d]", model->labels.size()); // build feature space from the training instance TRACE_LOG("Start building feature space."); build_feature_space(); TRACE_LOG("Building feature space is done."); TRACE_LOG("Number of features: [%d]", model->space.num_features()); model->param.realloc(model->space.dim()); TRACE_LOG("Allocate [%d] dimensition parameter.", model->space.dim()); PostaggerWriter writer(cout); if (train_opt.algorithm == "mira") { // use mira algorithm /*kbest_decoder = new KBestDecoder(L); for (int iter = 0; iter < train_opt.max_iter; ++ iter) { for (int i = 0; i < train_dat.size(); ++ i) { extract_features(train_dat[i]); calculate_scores(train_dat[i]); KBestDecoder::KBestDecodeResult result; kbest_decoder->decode(train_dat[i], result); } }*/ } else { // use pa or average perceptron algorithm decoder = new Decoder(model->num_labels()); TRACE_LOG("Allocated plain decoder"); for (int iter = 0; iter < train_opt.max_iter; ++ iter) { TRACE_LOG("Training iteraition [%d]", (iter + 1)); for (int i = 0; i < train_dat.size(); ++ i) { // extract_features(train_dat[i]); Instance * inst = train_dat[i]; calculate_scores(inst, false); decoder->decode(inst); if (inst->features.dim() == 0) { collect_features(inst, inst->tagsidx, inst->features); } collect_features(inst, inst->predicted_tagsidx, inst->predicted_features); // writer.debug(inst, true); if (train_opt.algorithm == "pa") { SparseVec update_features; update_features.zero(); update_features.add(train_dat[i]->features, 1.); update_features.add(train_dat[i]->predicted_features, -1.); double error = train_dat[i]->num_errors(); double score = model->param.dot(update_features, false); double norm = update_features.L2(); double step = 0.; if (norm < EPS) { step = 0; } else { step = (error - score) / norm; } model->param.add(update_features, iter * train_dat.size() + i + 1, step); } else if (train_opt.algorithm == "ap") { SparseVec update_features; update_features.zero(); update_features.add(train_dat[i]->features, 1.); update_features.add(train_dat[i]->predicted_features, -1.); model->param.add(update_features, iter * train_dat.size() + i + 1, 1.); } if ((i+1) % train_opt.display_interval == 0) { TRACE_LOG("[%d] instances is trained.", i+1); } } TRACE_LOG("[%d] instances is trained.", train_dat.size()); model->param.flush( train_dat.size() * (iter + 1) ); Model * new_model = truncate(); swap(model, new_model); evaluate(); std::string saved_model_file = (train_opt.model_name + "." + strutils::to_str(iter) + ".model"); std::ofstream ofs(saved_model_file.c_str(), std::ofstream::binary); swap(model, new_model); new_model->save(ofs); delete new_model; // model->save(ofs); TRACE_LOG("Model for iteration [%d] is saved to [%s]", iter + 1, saved_model_file.c_str()); } } }