Exemple #1
0
  int segment(const char* str, std::vector<std::string> & words) {
    ltp::framework::ViterbiFeatureContext ctx;
    ltp::framework::ViterbiScoreMatrix scm;
    ltp::framework::ViterbiDecoder decoder;
    ltp::segmentor::Instance inst;
 
    int ret = preprocessor.preprocess(str, inst.raw_forms, inst.forms,
      inst.chartypes);

    if (-1 == ret || 0 == ret) {
      words.clear();
      return 0;
    }

    ltp::segmentor::SegmentationConstrain con;
    con.regist(&(inst.chartypes));
    build_lexicon_match_state(lexicons, &inst);
    extract_features(inst, model, &ctx, false);
    calculate_scores(inst, (*model), ctx, true, &scm);

    // allocate a new decoder so that the segmentor support multithreaded
    // decoding. this modification was committed by niuox
    decoder.decode(scm, con, inst.predict_tagsidx);
    build_words(inst.raw_forms, inst.predict_tagsidx, words);

    return words.size();
  }
Exemple #2
0
void separator_pool::renew_pool(MISConfig & config, graph_access & G, bool init, bool scores, population_mis & pop) {
    mis_log::instance()->print_pool_title();
    mis_log::instance()->restart_building_pool_timer();
    if (!init) {
        clear_partitions();
        clear_separators();
        clear_k_partitions();
        clear_k_separators();
    }
    
    generate_partitions(config, G);
    generate_separators(config, G);
    if (config.use_multiway_vc) generate_k_partitions(config, G);
    else generate_k_separators(config, G);

    if (scores) calculate_scores(config, G, pop);
    mis_log::instance()->print_separator();
}
Exemple #3
0
void Postagger::evaluate(void) {
    const char * holdout_file = train_opt.holdout_file.c_str();

    ifstream ifs(holdout_file);

    if (!ifs) {
        ERROR_LOG("Failed to open holdout file.");
        return;
    }

    PostaggerReader reader(ifs, true);
    Instance * inst = NULL;

    int num_recalled_tags = 0;
    int num_tags = 0;

    while ((inst = reader.next())) {
        int len = inst->size();
        inst->tagsidx.resize(len);
        for (int i = 0; i < len; ++ i) {
            inst->tagsidx[i] = model->labels.index(inst->tags[i]);
        }

        extract_features(inst, false);
        calculate_scores(inst, true);
        decoder->decode(inst);

        num_recalled_tags += inst->num_corrected_predicted_tags();
        num_tags += inst->size();

        delete inst;
    }

    double p = (double)num_recalled_tags / num_tags;

    TRACE_LOG("P: %lf ( %d / %d )", p, num_recalled_tags, num_tags);
    return;
}
Exemple #4
0
void t_score_dialog::initialize()
{
	// calculate the final scores
	calculate_scores( m_map, m_winner_team, m_scores );

	m_layout = g_layout.get();
	m_difficulty_icons = g_difficulty_icons.get();

	//////////////////////////////////////////////////////////////////
 	// find dimensions & center
	t_screen_point        origin(0,0);
	t_bitmap_layer const* layer = m_layout->find( "background" );
	t_screen_rect		  rect = layer->get_rect();

	set_bitmap( layer );
	rect += t_screen_point( (get_parent()->get_width() - rect.width()) / 2, 
		                           (get_parent()->get_height() - rect.height()) / 2 );
	move( rect );
	set_drop_shadow();

	//////////////////////////////////////////////////////////////////

	// add title
	t_text_window* window;
	layer = m_layout->find( "title" );
	rect = layer->get_rect();
	window = new t_text_window( get_font( rect.height() ), rect, this, 
		                         k_score_title, t_pixel_24(0,0,0) );
	window->set_center_horizontal();
	window->set_center_vertical();
	window->set_drop_shadow( true, t_pixel_24(200,200,200));

	create_text();
	create_icons();
	create_buttons();
}
Exemple #5
0
void Postagger::test(void) {
    const char * model_file = test_opt.model_file.c_str();
    ifstream mfs(model_file, std::ifstream::binary);

    if (!mfs) {
        ERROR_LOG("Failed to load model");
        return;
    }

    model = new Model;
    if (!model->load(mfs)) {
        ERROR_LOG("Failed to load model");
        return;
    }

    TRACE_LOG("Number of labels                 [%d]", model->num_labels());
    TRACE_LOG("Number of features               [%d]", model->space.num_features());
    TRACE_LOG("Number of dimension              [%d]", model->space.dim());

    const char * test_file = test_opt.test_file.c_str();

    ifstream ifs(test_file);

    if (!ifs) {
        ERROR_LOG("Failed to open holdout file.");
        return;
    }

    decoder = new Decoder(model->num_labels());
    PostaggerReader reader(ifs, true);
    PostaggerWriter writer(cout);
    Instance * inst = NULL;

    int num_recalled_tags = 0;
    int num_tags = 0;

    double before = get_time();

    while ((inst = reader.next())) {
        int len = inst->size();
        inst->tagsidx.resize(len);
        for (int i = 0; i < len; ++ i) {
            inst->tagsidx[i] = model->labels.index(inst->tags[i]);
        }

        extract_features(inst);
        calculate_scores(inst, true);
        decoder->decode(inst);

        build_labels(inst, inst->predicted_tags);
        writer.write(inst);
        num_recalled_tags += inst->num_corrected_predicted_tags();
        num_tags += inst->size();

        delete inst;
    }

    double after = get_time();

    double p = (double)num_recalled_tags / num_tags;

    TRACE_LOG("P: %lf ( %d / %d )", p, num_recalled_tags, num_tags);
    TRACE_LOG("Eclipse time %lf", after - before);

    sleep(1000000);
    return;
}
Exemple #6
0
void Postagger::train(void) {
    const char * train_file = train_opt.train_file.c_str();

    // read in training instance
    read_instance(train_file);
    TRACE_LOG("Read in [%d] instances.", train_dat.size());

    model = new Model;
    // build tag dictionary, map string tag to index
    TRACE_LOG("Start build configuration");
    build_configuration();
    TRACE_LOG("Build configuration is done.");
    TRACE_LOG("Number of labels: [%d]", model->labels.size());

    // build feature space from the training instance
    TRACE_LOG("Start building feature space.");
    build_feature_space();
    TRACE_LOG("Building feature space is done.");
    TRACE_LOG("Number of features: [%d]", model->space.num_features());

    model->param.realloc(model->space.dim());
    TRACE_LOG("Allocate [%d] dimensition parameter.", model->space.dim());

    PostaggerWriter writer(cout);

    if (train_opt.algorithm == "mira") {
        // use mira algorithm
        /*kbest_decoder = new KBestDecoder(L);

        for (int iter = 0; iter < train_opt.max_iter; ++ iter) {
            for (int i = 0; i < train_dat.size(); ++ i) {
                extract_features(train_dat[i]);
                calculate_scores(train_dat[i]);

                KBestDecoder::KBestDecodeResult result;
                kbest_decoder->decode(train_dat[i], result);
            }
        }*/
    } else {
        // use pa or average perceptron algorithm
        decoder = new Decoder(model->num_labels());
        TRACE_LOG("Allocated plain decoder");

        for (int iter = 0; iter < train_opt.max_iter; ++ iter) {
            TRACE_LOG("Training iteraition [%d]", (iter + 1));
            for (int i = 0; i < train_dat.size(); ++ i) {
                // extract_features(train_dat[i]);

                Instance * inst = train_dat[i];
                calculate_scores(inst, false);
                decoder->decode(inst);

                if (inst->features.dim() == 0) {
                    collect_features(inst, inst->tagsidx, inst->features);
                }
                collect_features(inst, inst->predicted_tagsidx, inst->predicted_features);

                // writer.debug(inst, true);

                if (train_opt.algorithm == "pa") {
                    SparseVec update_features;
                    update_features.zero();
                    update_features.add(train_dat[i]->features, 1.);
                    update_features.add(train_dat[i]->predicted_features, -1.);

                    double error = train_dat[i]->num_errors();
                    double score = model->param.dot(update_features, false);
                    double norm = update_features.L2();

                    double step = 0.;
                    if (norm < EPS) {
                       step = 0;
                    } else {
                        step = (error - score) / norm;
                    }

                    model->param.add(update_features,
                            iter * train_dat.size() + i + 1,
                            step);
                } else if (train_opt.algorithm == "ap") {
                    SparseVec update_features;
                    update_features.zero();
                    update_features.add(train_dat[i]->features, 1.);
                    update_features.add(train_dat[i]->predicted_features, -1.);

                    model->param.add(update_features,
                            iter * train_dat.size() + i + 1,
                            1.);
                }

                if ((i+1) % train_opt.display_interval == 0) {
                    TRACE_LOG("[%d] instances is trained.", i+1);
                }
            }
            TRACE_LOG("[%d] instances is trained.", train_dat.size());

            model->param.flush( train_dat.size() * (iter + 1) );
            Model * new_model = truncate();
            swap(model, new_model);
            evaluate();

            std::string saved_model_file = (train_opt.model_name + "." + strutils::to_str(iter) + ".model");
            std::ofstream ofs(saved_model_file.c_str(), std::ofstream::binary);

            swap(model, new_model);
            new_model->save(ofs);
            delete new_model;
            // model->save(ofs);

            TRACE_LOG("Model for iteration [%d] is saved to [%s]",
                    iter + 1,
                    saved_model_file.c_str());
        }
    }
}