Esempio n. 1
0
/**
 * Computes the bag distance of two strings. The distance approximates
 * and lower bounds the Levenshtein distance.
 * @param x first string 
 * @param y second string
 * @return Bag distance
 */
float dist_bag_compare(hstring_t x, hstring_t y)
{
    float d = 0;
    bag_t *xh, *yh, *xb, *yb;

    xh = bag_create(x);
    yh = bag_create(y);

    int missing = y.len;
    for (xb = xh; xb != NULL; xb = xb->hh.next) {
        HASH_FIND(hh, yh, &(xb->sym), sizeof(sym_t), yb);
        if (!yb) {
            d += xb->cnt;
        } else {
            d += fabs(xb->cnt - yb->cnt);
            missing -= yb->cnt;
        }
    }
    d += missing;

    bag_destroy(xh);
    bag_destroy(yh);

    return lnorm(n, d, x, y);
}
Esempio n. 2
0
void bag_destroy(BAG b) {
    if (b) {
	bag_destroy(b->next);
	treenode_destroy(b->pennant);
	free(b);
    }
}
Esempio n. 3
0
void entry_destroy(bag_elem_t e)
{
    entry_t *old_entry = e;
    free(old_entry -> entry_word);

    // empty and free the page index
    bag_traverse(old_entry->page_index, page_destroy);
    bag_destroy(old_entry->page_index);

    free(old_entry);
}
Esempio n. 4
0
int main(int argc, char *argv[])
{
    FILE *input, *log;
    int min_word_len = 0;
    bag_t *index;
    clock_t ticks;

    /* First, check that there is a first command line argument and
     * that it is the name of a file that can be opened for reading. */
    if (argc <= 1 || ! (input = fopen(argv[1], "r"))) {
        fprintf(stderr,
                "ERROR: missing or incorrect argument!\n"
                "USAGE: %s <filename> [minimum_word_length]\n"
                "  . <filename> is the name of a text file (required)\n"
                "  . [minimum_word_length] is a positive integer (optional)\n",
                argv[0]);
        exit(EXIT_FAILURE);
    }
    /* If we get here, the file has been opened for reading. */


    /* Next, check if there is a second command line argument to specify
     * a minimum word length. */
    if (argc < 3 || (min_word_len = (int) strtol(argv[2], NULL, 10)) <= 0)
        min_word_len = MIN_WORD_LEN;
    /* If we get here, the minimum word length has a positive value. */

    //creat or append to a runtime log file
    log = fopen("runtime_log.txt", "a");
    fprintf(log, "For %s and word %d characters and larger:\n", argv[1], min_word_len);

    /* Next, generate the index, close the input file (because we're done with
     * it at this point), and print timing data. */
    ticks = clock();
    index = generate_index(input, min_word_len);
    ticks = clock() - ticks;
    fclose(input);
    fprintf(log, "Elapsed time for generating the index: %gms\n",
                    1000.0 * ticks / CLOCKS_PER_SEC);
    /* Timing data is printed on stderr so we can isolate it from the rest of
     * the output below, if desired. */

    /* Finally, print the index on stdout and clean up: free the memory
     * allocated for each index entry, then the memory for the index itself. */
    if (index) {

        // timing how long it takes to print the index
        ticks = clock();
        bag_traverse(index, entry_print);
        ticks = clock() - ticks;
        fprintf(log, "Elapsed time for printing the index: %gms\n",
                        1000.0 * ticks / CLOCKS_PER_SEC);

        // timing how long it takes to destroy the index
        ticks = clock();
        bag_traverse(index, entry_destroy);
        bag_destroy(index);
        ticks = clock() - ticks;
        fprintf(log, "Elapsed time for destroy the index: %gms\n\n",
                        1000.0 * ticks / CLOCKS_PER_SEC);
    }

    fclose(log);

    return EXIT_SUCCESS;
}