void remap(PhonemeAlphabet& alph, Corpus& corp) { for(unsigned i = 0; i < corp.size(); i++) { auto& labels = corp.label(i); for(auto& p : labels) p.id = alph.new_id(p.id); auto& inputs = corp.input(i); for(auto& p : inputs) p.id = alph.new_id(p.id); } }
bool init_tool(int argc, const char** argv, Options* opts) { *opts = Options::parse_options(argc, argv); if(!Options::has_required(*opts)) return false; COLOR_ENABLED = !opts->has_opt("no-color"); FORCE_SCALE = opts->has_opt("force-scale"); SMOOTH = opts->has_opt("smooth"); SCALE_ENERGY = opts->has_opt("energy"); PRINT_SCALE = opts->has_opt("print-scale"); REPORT_PROGRESS = opts->has_opt("progress"); VLOG = std::ofstream(opts->get_opt<std::string>("vlog", "vlog.log")); crf.label_alphabet = &alphabet_synth; baseline_crf.label_alphabet = &alphabet_synth; build_data(*opts); pre_process(alphabet_synth, corpus_synth); pre_process(alphabet_test, corpus_test); alphabet_synth.optimize(); remap(alphabet_synth, corpus_synth); alphabet_test.optimize(); remap(alphabet_test, corpus_test); auto testSize = opts->get_opt<unsigned>("test-corpus-size", 10); for(auto i = testSize; i < corpus_test.size(); i++) corpus_eval.add(corpus_test.input(i), corpus_test.label(i)); corpus_test.set_max_size(testSize); INFO("Synth sequences = " << corpus_synth.size()); INFO("Test sequences = " << corpus_test.size()); INFO("Eval sequences = " << corpus_eval.size()); return true; }