Ejemplo n.º 1
0
/*
 * A simple load and save test case
 */
int test_load_save()
{
    int i, j, err = 0;
    fvec_t *f, *g;
    gzFile z;

    test_printf("Loading and saving of feature vectors");

    fvec_reset_delim();
    config_set_string(&cfg, "generic.event_delim", " ");
    config_set_int(&cfg, "features.ngram_len", 2);

    /* Create and save feature vectors */
    z = gzopen(TEST_FILE, "wb9");
    if (!z) {
        printf("Could not create file (ignoring)\n");
        return FALSE;
    }

    for (i = 0; tests[i].str; i++) {
        f = fvec_extract(tests[i].str, strlen(tests[i].str), "test");
        fvec_save(f, z);
        fvec_destroy(f);
    }
    gzclose(z);


    /* Load and compare feature vectors */
    z = gzopen(TEST_FILE, "r");

    for (i = 0; tests[i].str; i++) {
        f = fvec_extract(tests[i].str, strlen(tests[i].str), "test");
        g = fvec_load(z);

        /* Check dimensions and values */
        for (j = 0; j < f->len && j < g->len; j++) {
            if (f->dim[j] != g->dim[j]) {
                test_error("(%d) f->dim[%d] != g->dim[%d]", i, j, j);
                break;
            }
            if (fabs(f->val[j] - g->val[j]) > 10e-10) {
                test_error("(%d) f->val[%d] != g->val[%d]", i, j, j);
                break;
            }
        }
        err += (j < f->len || j < g->len);

        fvec_destroy(f);
        fvec_destroy(g);
    }

    gzclose(z);
    unlink(TEST_FILE);

    test_return(err, i);
    return err;
}
Ejemplo n.º 2
0
/* 
 * A simple test for the binary embedding
 */
int test_embed_bin()
{
    int i, j, n, err = 0;
    string_t strs[10];

    input_config("lines");
    char *test_file = getenv("TEST_FILE");
    n = input_open(test_file);
    input_read(strs, n);

    test_printf("Testing binary embedding");
    config_set_string(&cfg, "features.vect_embed", "bin");
    config_set_string(&cfg, "features.vect_norm", "none");

    for (i = 0, err = 0; i < n; i++) {
        fvec_t *fv = fvec_extract(strs[i].str, strs[i].len);
        double n = 0;
        for (j = 0; j < fv->len; j++)
            n += fv->val[j];
        err += fabs(n - fv->len) > 1e-6;
        fvec_destroy(fv);
    }
    test_return(err, n);

    input_free(strs, n);
    input_close();
    return err;
}
Ejemplo n.º 3
0
/* 
 * A simple static test for the feature vectors
 */
int test_static()
{
    int i, err = 0;
    fvec_t *f;

    test_printf("Extraction of feature vectors");

    for (i = 0; tests[i].str; i++) {
        fvec_reset_delim();
        config_set_string(&cfg, "features.ngram_delim", tests[i].dlm);
        config_set_int(&cfg, "features.ngram_len", tests[i].nlen);

        /* Extract features */
        f = fvec_extract(tests[i].str, strlen(tests[i].str), "test");

        /* Check for correct number of dimensions */
        if (f->len != tests[i].len) {
            test_error("(%d) len %d != %d", i, f->len, tests[i].len);
            err++;
        }

        fvec_destroy(f);
    }

    test_return(err, i);
    return err;
}
Ejemplo n.º 4
0
/* 
 * A simple stress test for the feature table
 */
int test_stress()
{
    int i, j, err = 0;
    fvec_t *f;
    char buf[STR_LENGTH + 1];

    test_printf("Stress test for feature vectors");

    config_set_string(&cfg, "features.ngram_delim", "0");

    ftable_init();

    for (i = 0; i < STRESS_RUNS; i++) {
        config_set_int(&cfg, "features.ngram_len", rand() % 10 + 1);

        /* Create random key and string */
        for (j = 0; j < STR_LENGTH; j++)
            buf[j] = rand() % 10 + '0';
        buf[j] = 0;

        /* Extract features */
        f = fvec_extract(buf, strlen(buf), "test");
        /* Destroy features */
        fvec_destroy(f);
    }

    ftable_destroy();

    test_return(err, STRESS_RUNS);
    return err;
}
Ejemplo n.º 5
0
/**
 * Test clustering
 */
int test_cluster_single()
{
    int i, j, k, err = 0;

    test_printf("Clustering using prototypes (single)");

    /* Prepare test data */ ;
    farray_t *fa = farray_create("test");
    for (i = 0; i < DATA_LEN; i++) {
        fvec_t *f = fvec_extract(test_data[i], strlen(test_data[i]), NULL);
        farray_add(fa, f, "test");
    }

    /* Get clustering */
    config_set_string(&cfg, "cluster.link_mode", "single");
    cluster_t *c = cluster_linkage(fa, 0);

    /* Check number of clusters */
    err += (c->num != DATA_CLUSTER);

    /* Check position of prototypes */
    for (k = 0; k < DATA_LEN; k += DATA_LEN / DATA_CLUSTER)
        for (j = 0; j < DATA_LEN / DATA_CLUSTER - 1; j++)
            err += c->cluster[k + j] != c->cluster[k + j + 1];

    /* Clean up */
    cluster_destroy(c);
    farray_destroy(fa);

    test_return(err, 1 + DATA_CLUSTER * (DATA_LEN / DATA_CLUSTER - 1));
    return err;
}
Ejemplo n.º 6
0
/* 
 * A simple stress test for classification
 */
int test_stress()
{
    int i, j, k, err = 0;
    fvec_t *f;
    farray_t *fa;
    char buf[STR_LENGTH + 1], label[32];

    test_printf("Stress test for classification");

    for (i = 0; i < STRESS_RUNS; i++) {
        /* Create array */
        fa = farray_create("test");

        for (j = 0; j < NUM_VECTORS; j++) {
            for (k = 0; k < STR_LENGTH; k++)
                buf[k] = rand() % 10 + '0';
            buf[k] = 0;

            /* Extract features */
            f = fvec_extract(buf, strlen(buf), "test");
            snprintf(label, 32, "label%.2d", rand() % 10);

            /* Add to array */
            farray_add(fa, f, label);
        }

        assign_t *a = class_assign(fa, fa);
        assign_destroy(a);
        farray_destroy(fa);
    }

    test_return(err, STRESS_RUNS);
    return err;
}
Ejemplo n.º 7
0
/* 
 * A simple test for the l2 norm
 */
int test_norm_l2()
{
    int i, j, n, err = 0;
    string_t strs[10];

    input_config("lines");
    char *test_file = getenv("TEST_FILE");
    n = input_open(test_file);
    input_read(strs, n);

    test_printf("Testing L2 normalization");
    config_set_string(&cfg, "features.vect_norm", "l2");
    for (i = 0, err = 0; i < n; i++) {
        fvec_t *fv = fvec_extract(strs[i].str, strs[i].len);
        double n = 0;
        for (j = 0; j < fv->len; j++)
            n += fv->val[j] * fv->val[j];
        err += fabs(sqrt(n) - 1.0) > 1e-6;
        fvec_destroy(fv);
    }
    test_return(err, n);

    input_free(strs, n);
    input_close();
    return err;
}
Ejemplo n.º 8
0
Archivo: sally.c Proyecto: yangke/sally
/**
 * Main processing routine of Sally. This function processes chunks of
 * strings. It might be suitable for OpenMP support in a later version.
 */
static void sally_process()
{
    long read, i, j;
    int chunk;
    const char *hash_file;

    /* Check if a hash file is set */
    config_lookup_string(&cfg, "features.hash_file", &hash_file);

    /* Get chunk size */
    config_lookup_int(&cfg, "input.chunk_size", &chunk);

    /* Allocate space */
    fvec_t **fvec = malloc(sizeof(fvec_t *) * chunk);
    string_t *strs = malloc(sizeof(string_t) * chunk);

    if (!fvec || !strs)
        fatal("Could not allocate memory for embedding");

    info_msg(1, "Processing %d strings in chunks of %d.", entries, chunk);

    for (i = 0, read = 0; i < entries; i += read) {
        read = input_read(strs, chunk);
        if (read <= 0)
            fatal("Failed to read strings from input '%s'", input);

        /* Generic preprocessing of input */
        input_preproc(strs, read);

#ifdef ENABLE_OPENMP
#pragma omp parallel for
#endif
        for (j = 0; j < read; j++) {
            fvec[j] = fvec_extract(strs[j].str, strs[j].len);
            fvec_set_label(fvec[j], strs[j].label);
            fvec_set_source(fvec[j], strs[j].src);
        }

        if (!output_write(fvec, read))
            fatal("Failed to write vectors to output '%s'", output);

        /* Free memory */
        input_free(strs, read);
        output_free(fvec, read);

        /* Reset hash if enabled but no hash file is set */
        if (fhash_enabled() && !strlen(hash_file) > 0)
            fhash_reset();

        prog_bar(0, entries, i + read);
    }
    
    free(fvec);
    free(strs);
}
Ejemplo n.º 9
0
/*
 * A stres test for the addition of feature vectors
 */
int test_stress_add()
{
    int i, j, err = 0;
    fvec_t *fx, *fy, *fz;
    char buf[STR_LENGTH + 1];

    test_printf("Stress test for addition of feature vectors");

    /* Create empty vector */
    fz = fvec_extract("aa0bb0cc", 8, "zero");
    for (i = 0; i < NUM_VECTORS; i++) {

        /* Create random key and string */
        for (j = 0; j < STR_LENGTH; j++)
            buf[j] = rand() % 10 + '0';
        buf[j] = 0;

        /* Extract features */
        fx = fvec_extract(buf, strlen(buf), "test");

        /* Add fx to fz */
        fy = fvec_add(fz, fx);
        fvec_destroy(fz);

        err += fabs(fvec_norm2(fy) - 1.4142135623) > 1e-7;

        /* Substract fx from fz */
        fz = fvec_sub(fy, fx);
        fvec_sparsify(fz);

        /* Clean up */
        fvec_destroy(fy);
        fvec_destroy(fx);
    }

    fvec_destroy(fz);
    test_return(err, i);
    return err;
}
Ejemplo n.º 10
0
/**
 * Simple test cases classification
 */
int test_classify()
{
    int i, k, err = 0;
    fvec_t *f;

    test_printf("Classification using prototypes");

    /* Prepare training data */
    farray_t *fa1 = farray_create("train");
    for (i = 0; train_data[i].str; i++) {
        f = fvec_extract(train_data[i].str, strlen(train_data[i].str), NULL);
        farray_add(fa1, f, train_data[i].label);
    }

    /* Prepare testing data */
    farray_t *fa2 = farray_create("train");
    for (i = 0; test_data[i].str; i++) {
        f = fvec_extract(test_data[i].str, strlen(test_data[i].str), NULL);
        farray_add(fa2, f, test_data[i].label);
    }

    /* Classification of test data */
    config_set_float(&cfg, "classify.max_dist", 1.41);
    assign_t *a = class_assign(fa2, fa1);
    
    /* Check predicted labels */
    for (k = 0; test_data[k].str; k++) {
        char *l = farray_get_label(fa1, a->proto[k]);
        err += strcmp(l, test_data[k].label) != 0;
    }

    /* Clean up */
    assign_destroy(a);
    farray_destroy(fa1); 
    farray_destroy(fa2);

    test_return(err, i);
    return err;
}
Ejemplo n.º 11
0
/*
 * A simple static test for the dot-product of feature vectors
 */
int test_static_dot()
{
    int i, err = 0;
    fvec_t *fx, *fy;

    test_printf("Dot product of feature vectors");

    for (i = 0; test_dot[i].x; i++) {
        /* Extract features */
        fx = fvec_extract(test_dot[i].x, strlen(test_dot[i].x), "test");
        fy = fvec_extract(test_dot[i].y, strlen(test_dot[i].y), "test");

        /* Compute dot product */
        double d = fvec_dot(fx, fy);
        err += fabs(d - test_dot[i].res) > 1e-6;

        fvec_destroy(fx);
        fvec_destroy(fy);
    }

    test_return(err, i);
    return err;
}
Ejemplo n.º 12
0
/*
 * A stres test for the addition of feature vectors
 */
int test_stress_dot()
{
    int i, j, err = 0;
    fvec_t *fx, *fy;
    char buf[STR_LENGTH + 1];

    test_printf("Stress test for dot product of feature vectors");

    /* Create empty vector */
    for (i = 0; i < NUM_VECTORS; i++) {

        /* Create random key and string */
        for (j = 0; j < STR_LENGTH; j++)
            buf[j] = rand() % 10 + '0';
        buf[j] = 0;
        fx = fvec_extract(buf, strlen(buf), "test");

        /* Create random key and string */
        for (j = 0; j < STR_LENGTH; j++)
            buf[j] = rand() % 10 + '0';
        buf[j] = 0;
        fy = fvec_extract(buf, strlen(buf), "test");

        double nx = fvec_dot(fx, fx);
        double ny = fvec_dot(fy, fy);
        err += fabs(fvec_norm2(fx) - sqrt(nx)) > 1e-7;
        err += fabs(fvec_norm2(fy) - sqrt(ny)) > 1e-7;
        err += fabs(fvec_dot(fx, fy) > nx + ny);

        /* Clean up */
        fvec_destroy(fx);
        fvec_destroy(fy);
    }

    test_return(err, 3 * i);
    return err;
}
Ejemplo n.º 13
0
/*
 * A simple static test for the addition of feature vectors
 */
int test_static_add()
{
    int i, err = 0;
    fvec_t *fx, *fy, *fz;

    test_printf("Addition of feature vectors");

    for (i = 0; test_add[i].x; i++) {
        /* Extract features */
        fx = fvec_extract(test_add[i].x, strlen(test_add[i].x), "test");
        fy = fvec_extract(test_add[i].y, strlen(test_add[i].y), "test");

        /* Add test vectors */
        fz = fvec_add(fx, fy);
        err += fabs(fvec_norm1(fz) - test_add[i].res) > 1e-7;

        fvec_destroy(fz);
        fvec_destroy(fx);
        fvec_destroy(fy);
    }

    test_return(err, i);
    return err;
}
Ejemplo n.º 14
0
int test_pos_ngrams()
{
    int i, err = 0;
    fvec_t *f;

    /* Test for positional n-grams */
    test_t t[] = {
        {"b b b b b", 3, 0, 1},
        {"b b b b b", 3, 1, 3},
        {"b b b b b", 2, 0, 1},
        {"b b b b b", 2, 1, 4},
        {NULL, 0, 0, 0}
    };

    test_printf("Testing positional n-grams");

    /* Hack to set delimiters */
    config_set_string(&cfg, "features.granularity", "tokens");
    config_set_string(&cfg, "features.token_delim", " ");
    fvec_delim_set(" ");

    for (i = 0; t[i].str; i++) {

        config_set_int(&cfg, "features.ngram_len", t[i].nlen);
        config_set_bool(&cfg, "features.ngram_pos", t[i].flag);
        config_set_int(&cfg, "features.pos_shift", 0);

        /* Extract features */
        f = fvec_extract(t[i].str, strlen(t[i].str));

        /* Check for correct number of dimensions */
        if (f->len != t[i].len) {
            test_error("(%d) len %d != %d", i, f->len, t[i].len);
            err++;
        }

        fvec_destroy(f);
    }

    config_set_bool(&cfg, "features.ngram_pos", 0);

    test_return(err, i);
    return err;
}
Ejemplo n.º 15
0
/* 
 * A simple stress test for clustering
 */
int test_stress()
{
    int i, j, k, err = 0;
    fvec_t *f;
    farray_t *fa;
    char buf[STR_LENGTH + 1], label[32];

    test_printf("Stress test for clustering");

    for (i = 0; i < STRESS_RUNS; i++) {
        /* Create array */
        fa = farray_create("test");

        for (j = 0; j < NUM_VECTORS; j++) {
            for (k = 0; k < STR_LENGTH; k++)
                buf[k] = rand() % 10 + '0';
            buf[k] = 0;

            /* Extract features */
            f = fvec_extract(buf, strlen(buf), "test");
            snprintf(label, 32, "label%.2d", rand() % 10);

            /* Add to array */
            farray_add(fa, f, label);
        }

        /* Extract prototypes */
        cluster_t *c = cluster_linkage(fa, 0);

        /* Destroy features */
        cluster_destroy(c);
        farray_destroy(fa);
    }

    test_return(err, STRESS_RUNS);
    return err;
}
Ejemplo n.º 16
0
/**
 * Allocate an empty feature vector
 * @return feature vector
 */
fvec_t *fvec_zero()
{
    return fvec_extract("", 0, "zero");
}
Ejemplo n.º 17
0
Archivo: fvec.c Proyecto: MLDroid/sally
/**
 * Allocates and extracts an empty feature vector
 * @return feature vector
 */
fvec_t *fvec_zero()
{
    return fvec_extract("", 0);
}