std::string listToString(const std::list<int> &container, bool separateStrings) { std::string separator = separateStrings ? " \n" : " "; std::string retval = join_range(separator, container.begin(), container.end()); if (!container.empty()) retval += separator; return retval; }
void parseall(const void *model, const char* path, const char* test_sections_str, int embedding_dimension) { DArray *test_sections = parse_range(test_sections_str); signal(SIGINT, intHandler); log_info("Test sections to be used in %s: %s", path, join_range(test_sections)); CoNLLCorpus test = create_CoNLLCorpus(path, test_sections, embedding_dimension, NULL); log_info("Reading test corpus"); read_corpus(test, false); char* output_filename = (char*) malloc(sizeof (char) * (strlen(modelname) + 13)); check_mem(output_filename); sprintf(output_filename, "%s.gold.conll", modelname); FILE *gold_fp = fopen(output_filename, "w"); sprintf(output_filename, "%s.model.conll", modelname); FILE *model_fp = fopen(output_filename, "w"); ParserTestMetric test_metric = test_KernelPerceptronModel(model, test, true, gold_fp, model_fp); fclose(gold_fp); fclose(model_fp); printParserTestMetric(test_metric); freeParserTestMetric(test_metric); free(output_filename); return; error: log_err("Memory allocation error"); exit(1); }
void* optimize(int max_numit, int max_rec, const char* path, const char* train_sections_str, const char* dev_sections_str, int embedding_dimension) { DArray *train_sections = parse_range(train_sections_str); DArray *dev_sections = parse_range(dev_sections_str); signal(SIGINT, intHandler); log_info("Development sections to be used in %s: %s", path, join_range(dev_sections)); CoNLLCorpus dev = create_CoNLLCorpus(path, dev_sections, embedding_dimension, NULL); log_info("Training sections to be used in %s: %s", path, join_range(train_sections)); CoNLLCorpus train = create_CoNLLCorpus(path, train_sections, embedding_dimension, NULL); log_info("Reading training corpus"); read_corpus(train, false); log_info("Reading dev corpus"); read_corpus(dev, false); float *numit_dev_avg = (float*) malloc(sizeof (float)* max_numit); float *numit_train_avg = (float*) malloc(sizeof (float)*max_numit); check(numit_dev_avg != NULL, "Memory allocation failed for numit_dev_avg"); check(numit_train_avg != NULL, "Memory allocation failed for numit_train_avg"); PerceptronModel model = NULL; KernelPerceptron kmodel = NULL; if (kernel == KLINEAR){ log_info("Creating a averaged perceptron model"); model = create_PerceptronModel(train->transformed_embedding_length, NULL); } else if (kernel == KPOLYNOMIAL) kmodel = create_PolynomialKernelPerceptron(polynomial_degree, bias); else kmodel = create_RBFKernelPerceptron(rbf_lambda); int numit; int best_iter = -1; float best_score = 0.0; for (numit = 1; numit <= max_numit && keepRunning; numit++) { log_info("BEGIN-TRAIN: Iteration %d", numit); if (kernel == KLINEAR) train_once_PerceptronModel(model, train, max_rec); else train_once_KernelPerceptronModel(kmodel, train, max_rec); log_info("END-TRAIN: Iteration %d", numit); ParserTestMetric dev_metric; log_info("BEGIN-TEST: Iteration %d", numit); if (kernel == KLINEAR) dev_metric = test_KernelPerceptronModel(model, dev, true, NULL, NULL); else dev_metric = test_KernelPerceptronModel(kmodel, dev, true, NULL, NULL); log_info("END-TEST: Iteration %d", numit); log_info("\nnumit=%d", numit); printParserTestMetric(dev_metric); double dev_acc = (dev_metric->without_punc->true_prediction * 1.) / dev_metric->without_punc->total_prediction; numit_dev_avg[numit - 1] = dev_acc; numit_train_avg[numit - 1] = 0.0; freeParserTestMetric(dev_metric); if (best_score < dev_acc) { if (best_score + MIN_DELTA > dev_acc) log_warn("Improvement is less than %f", MIN_DELTA); best_score = dev_acc; best_iter = numit; if (kernel == KLINEAR) mark_best_PerceptronModel(model, numit); else mark_best_KernelPerceptronModel(kmodel, numit); } if (numit - best_iter > MAX_IDLE_ITER && STOP_ON_CONVERGE) { log_info("No improvement in last %d iterations", MAX_IDLE_ITER); keepRunning = false; } } log_info("Iteration\tAccuracy(dev)\tAccuracy(train)"); for (int i = 0; i < numit - 1; i++) { log_info("%d\t\t%f\t%f%s", i + 1, numit_dev_avg[i], numit_train_avg[i], (i + 1 == best_iter) ? " (*)" : ""); } //free_CoNLLCorpus(dev, true); //free_CoNLLCorpus(train, true); if (kernel == KLINEAR) return (void*) model; else return (void*) kmodel; error: log_err("Memory allocation error"); exit(1); }
Perceptron_t optimize(int max_numit, int max_rec, const char* path, const char* train_sections_str, const char* dev_sections_str) { DArray *train_sections = parse_range(train_sections_str); DArray *dev_sections = parse_range(dev_sections_str); signal(SIGINT, intHandler); log_info("Development sections to be used in %s: %s", path, join_range(dev_sections)); CoNLLCorpus dev = create_CoNLLCorpus(path, dev_sections); log_info("Training sections to be used in %s: %s", path, join_range(train_sections)); CoNLLCorpus train = create_CoNLLCorpus(path, train_sections); log_info("Reading training corpus"); read_corpus(train, max_rec, false); log_info("Reading dev corpus"); read_corpus(dev, -1, false); float *numit_dev_avg = (float*) malloc(sizeof (float)* max_numit); float *numit_train_avg = (float*) malloc(sizeof (float)*max_numit); long *numit_num_sv = (long*) malloc(sizeof (long)*max_numit); check(numit_dev_avg != NULL, "Memory allocation failed for numit_dev_avg"); check(numit_train_avg != NULL, "Memory allocation failed for numit_train_avg"); check(numit_num_sv != NULL, "Memory allocation failed for numit_num_sv"); Perceptron_t model = NULL; if (type == SIMPLE_PERCEPTRON) { #ifdef OPTIMIZED_TRANSFORMATION log_info("Creating a averaged perceptron model with optimized feature transformation."); model = newSimplePerceptron(ft); #else log_info("Creating a averaged perceptron model"); model = newSimplePerceptron(NULL); #endif //model = create_PerceptronModel(train->transformed_embedding_length, NULL); } else { if (kernel == POLYNOMIAL_KERNEL) model = newPolynomialKernelPerceptron(polynomial_degree, bias); //kmodel = newPolynomialKernelPerceptron(polynomial_degree, bias); else model = NULL; //kmodel = create_RBFKernelPerceptron(rbf_lambda); } int numit; int best_iter = -1; float best_score = 0.0; for (numit = 1; numit <= max_numit && keepRunning; numit++) { log_info("BEGIN-TRAIN: Iteration %d", numit); trainPerceptronOnce(model, train, max_rec); log_info("END-TRAIN: Iteration %d", numit); ParserTestMetric dev_metric; log_info("BEGIN-TEST: Iteration %d", numit); dev_metric = testPerceptron(model, dev, true, NULL, NULL); log_info("END-TEST: Iteration %d", numit); log_info("\nnumit=%d", numit); printParserTestMetric(dev_metric); double dev_acc = (dev_metric->without_punc->true_prediction * 1.) / dev_metric->without_punc->total_prediction; numit_dev_avg[numit - 1] = dev_acc; numit_train_avg[numit - 1] = 0.0; if ( model->type == KERNEL_PERCEPTRON ) numit_num_sv[ numit - 1 ] = ((KernelPerceptron_t)model->pDeriveObj)->kernel->matrix->ncol; freeParserTestMetric(dev_metric); if (best_score < dev_acc) { if (best_score + MIN_DELTA > dev_acc) log_warn("Improvement is less than %f", MIN_DELTA); best_score = dev_acc; best_iter = numit; //mark_best_PerceptronModel(model, numit); EPARSE_CHECK_RETURN(snapshotBest(model)); } if (numit - best_iter > MAX_IDLE_ITER && STOP_ON_CONVERGE) { log_info("No improvement in last %d iterations", MAX_IDLE_ITER); keepRunning = false; } } log_info("Iteration\tAccuracy(dev)\tAccuracy(train)\t# of SV"); for (int i = 0; i < numit - 1; i++) { log_info("%d\t\t%f\t%f\t%ld%s", i + 1, numit_dev_avg[i], numit_train_avg[i], numit_num_sv[i], (i + 1 == best_iter) ? " (*)" : ""); } //free_CoNLLCorpus(dev, true); //free_CoNLLCorpus(train, true); return model; error: log_err("Memory allocation error"); exit(1); }
std::string join(const std::string &separator, const char *strings[], size_t nstrings) { return join_range(separator, strings, strings+nstrings); }