Ejemplo n.º 1
0
std::string
listToString(const std::list<int> &container, bool separateStrings) {
    std::string separator = separateStrings ? " \n" : " ";
    std::string retval = join_range(separator, container.begin(), container.end());
    if (!container.empty())
        retval += separator;
    return retval;
}
Ejemplo n.º 2
0
void parseall(const void *model, const char* path, const char* test_sections_str, int embedding_dimension) {
    DArray *test_sections = parse_range(test_sections_str);

    signal(SIGINT, intHandler);

    log_info("Test sections to be used in %s: %s", path, join_range(test_sections));

    CoNLLCorpus test = create_CoNLLCorpus(path, test_sections, embedding_dimension, NULL);

    log_info("Reading test corpus");
    read_corpus(test, false);
    
    char* output_filename = (char*) malloc(sizeof (char) * (strlen(modelname) + 13));
    check_mem(output_filename);

    sprintf(output_filename, "%s.gold.conll", modelname);
    FILE *gold_fp = fopen(output_filename, "w");

    sprintf(output_filename, "%s.model.conll", modelname);
    FILE *model_fp = fopen(output_filename, "w");
    ParserTestMetric test_metric = test_KernelPerceptronModel(model, test, true, gold_fp, model_fp);
    fclose(gold_fp);
    fclose(model_fp);

    printParserTestMetric(test_metric);
    freeParserTestMetric(test_metric);

    free(output_filename);

    return;
error:
    log_err("Memory allocation error");

    exit(1);


}
Ejemplo n.º 3
0
void* optimize(int max_numit, int max_rec, const char* path, const char* train_sections_str, const char* dev_sections_str, int embedding_dimension) {
    DArray *train_sections = parse_range(train_sections_str);
    DArray *dev_sections = parse_range(dev_sections_str);

    signal(SIGINT, intHandler);

    log_info("Development sections to be used in %s: %s", path, join_range(dev_sections));

    CoNLLCorpus dev = create_CoNLLCorpus(path, dev_sections, embedding_dimension, NULL);

    log_info("Training sections to be used in %s: %s", path, join_range(train_sections));

    CoNLLCorpus train = create_CoNLLCorpus(path, train_sections, embedding_dimension, NULL);

    log_info("Reading training corpus");
    read_corpus(train, false);

    log_info("Reading dev corpus");
    read_corpus(dev, false);

    float *numit_dev_avg = (float*) malloc(sizeof (float)* max_numit);
    float *numit_train_avg = (float*) malloc(sizeof (float)*max_numit);

    check(numit_dev_avg != NULL, "Memory allocation failed for numit_dev_avg");
    check(numit_train_avg != NULL, "Memory allocation failed for numit_train_avg");

    PerceptronModel model = NULL;
    KernelPerceptron kmodel = NULL;
    if (kernel == KLINEAR){   
        log_info("Creating a averaged perceptron model");
        model = create_PerceptronModel(train->transformed_embedding_length, NULL);
    }
    else if (kernel == KPOLYNOMIAL)
        kmodel = create_PolynomialKernelPerceptron(polynomial_degree, bias);
    else
        kmodel = create_RBFKernelPerceptron(rbf_lambda);


    int numit;

    int best_iter = -1;
    float best_score = 0.0;

    for (numit = 1; numit <= max_numit && keepRunning; numit++) {
        log_info("BEGIN-TRAIN: Iteration %d", numit);

        if (kernel == KLINEAR)
            train_once_PerceptronModel(model, train, max_rec);
        else
            train_once_KernelPerceptronModel(kmodel, train, max_rec);


        log_info("END-TRAIN: Iteration %d", numit);

        ParserTestMetric dev_metric;
        log_info("BEGIN-TEST: Iteration %d", numit);
        
        if (kernel == KLINEAR)
            dev_metric = test_KernelPerceptronModel(model, dev, true, NULL, NULL);
        else
            dev_metric = test_KernelPerceptronModel(kmodel, dev, true, NULL, NULL);
        log_info("END-TEST: Iteration %d", numit);

        log_info("\nnumit=%d", numit);

        printParserTestMetric(dev_metric);

        double dev_acc = (dev_metric->without_punc->true_prediction * 1.) / dev_metric->without_punc->total_prediction;
        numit_dev_avg[numit - 1] = dev_acc;
        numit_train_avg[numit - 1] = 0.0;

        freeParserTestMetric(dev_metric);

        if (best_score < dev_acc) {
            if (best_score + MIN_DELTA > dev_acc)
                log_warn("Improvement is less than %f", MIN_DELTA);

            best_score = dev_acc;
            best_iter = numit;

            if (kernel == KLINEAR)
                mark_best_PerceptronModel(model, numit);
            else
                mark_best_KernelPerceptronModel(kmodel, numit);
        }

        if (numit - best_iter > MAX_IDLE_ITER && STOP_ON_CONVERGE) {
            log_info("No improvement in last %d iterations", MAX_IDLE_ITER);
            keepRunning = false;
        }
    }

    log_info("Iteration\tAccuracy(dev)\tAccuracy(train)");
    for (int i = 0; i < numit - 1; i++) {
        log_info("%d\t\t%f\t%f%s", i + 1, numit_dev_avg[i], numit_train_avg[i], (i + 1 == best_iter) ? " (*)" : "");
    }

    //free_CoNLLCorpus(dev, true);
    //free_CoNLLCorpus(train, true);

    if (kernel == KLINEAR)
        return (void*) model;
    else
        return (void*) kmodel;

error:
    log_err("Memory allocation error");

    exit(1);

}
Ejemplo n.º 4
0
Perceptron_t optimize(int max_numit, int max_rec, const char* path, const char* train_sections_str, const char* dev_sections_str) {
    DArray *train_sections = parse_range(train_sections_str);
    DArray *dev_sections = parse_range(dev_sections_str);

    signal(SIGINT, intHandler);

    log_info("Development sections to be used in %s: %s", path, join_range(dev_sections));

    CoNLLCorpus dev = create_CoNLLCorpus(path, dev_sections);

    log_info("Training sections to be used in %s: %s", path, join_range(train_sections));

    CoNLLCorpus train = create_CoNLLCorpus(path, train_sections);

    log_info("Reading training corpus");
    read_corpus(train, max_rec, false);

    log_info("Reading dev corpus");
    read_corpus(dev, -1, false);

    float *numit_dev_avg = (float*) malloc(sizeof (float)* max_numit);
    float *numit_train_avg = (float*) malloc(sizeof (float)*max_numit);
	long *numit_num_sv = (long*) malloc(sizeof (long)*max_numit);

    check(numit_dev_avg != NULL, "Memory allocation failed for numit_dev_avg");
    check(numit_train_avg != NULL, "Memory allocation failed for numit_train_avg");
	check(numit_num_sv != NULL, "Memory allocation failed for numit_num_sv");

    Perceptron_t model = NULL;
    
    if (type == SIMPLE_PERCEPTRON) {
        
        #ifdef	OPTIMIZED_TRANSFORMATION
        log_info("Creating a averaged perceptron model with optimized feature transformation.");
        model = newSimplePerceptron(ft);
        #else
        log_info("Creating a averaged perceptron model");
        model = newSimplePerceptron(NULL);
        #endif
        //model = create_PerceptronModel(train->transformed_embedding_length, NULL);
    }
    else {
        if (kernel == POLYNOMIAL_KERNEL)
            model = newPolynomialKernelPerceptron(polynomial_degree, bias);
            //kmodel = newPolynomialKernelPerceptron(polynomial_degree, bias);
        else
            model = NULL;
            //kmodel = create_RBFKernelPerceptron(rbf_lambda);
    }


    int numit;

    int best_iter = -1;
    float best_score = 0.0;

    for (numit = 1; numit <= max_numit && keepRunning; numit++) {
        log_info("BEGIN-TRAIN: Iteration %d", numit);

        trainPerceptronOnce(model, train, max_rec);

        log_info("END-TRAIN: Iteration %d", numit);

        ParserTestMetric dev_metric;
        log_info("BEGIN-TEST: Iteration %d", numit);

        dev_metric = testPerceptron(model, dev, true, NULL, NULL);
        
        log_info("END-TEST: Iteration %d", numit);

        log_info("\nnumit=%d", numit);

        printParserTestMetric(dev_metric);

        double dev_acc = (dev_metric->without_punc->true_prediction * 1.) / dev_metric->without_punc->total_prediction;
        numit_dev_avg[numit - 1] = dev_acc;
        numit_train_avg[numit - 1] = 0.0;
		
		if ( model->type == KERNEL_PERCEPTRON )
			numit_num_sv[ numit - 1 ] = ((KernelPerceptron_t)model->pDeriveObj)->kernel->matrix->ncol;	

        freeParserTestMetric(dev_metric);

        if (best_score < dev_acc) {
            if (best_score + MIN_DELTA > dev_acc)
                log_warn("Improvement is less than %f", MIN_DELTA);

            best_score = dev_acc;
            best_iter = numit;

            //mark_best_PerceptronModel(model, numit);
            
            EPARSE_CHECK_RETURN(snapshotBest(model));
        }

        if (numit - best_iter > MAX_IDLE_ITER && STOP_ON_CONVERGE) {
            log_info("No improvement in last %d iterations", MAX_IDLE_ITER);
            keepRunning = false;
        }
    }

    log_info("Iteration\tAccuracy(dev)\tAccuracy(train)\t# of SV");
    for (int i = 0; i < numit - 1; i++) {
        log_info("%d\t\t%f\t%f\t%ld%s", i + 1, numit_dev_avg[i], numit_train_avg[i], numit_num_sv[i], (i + 1 == best_iter) ? " (*)" : "");
    }

    //free_CoNLLCorpus(dev, true);
    //free_CoNLLCorpus(train, true);

    return model;

error:
    log_err("Memory allocation error");

    exit(1);

}
Ejemplo n.º 5
0
std::string
join(const std::string &separator, const char *strings[], size_t nstrings) {
    return join_range(separator, strings, strings+nstrings);
}