Esempio n. 1
0
/**
 * Computes the bag distance of two strings. The distance approximates
 * and lower bounds the Levenshtein distance.
 * @param x first string 
 * @param y second string
 * @return Bag distance
 */
float dist_bag_compare(hstring_t x, hstring_t y)
{
    float d = 0;
    bag_t *xh, *yh, *xb, *yb;

    xh = bag_create(x);
    yh = bag_create(y);

    int missing = y.len;
    for (xb = xh; xb != NULL; xb = xb->hh.next) {
        HASH_FIND(hh, yh, &(xb->sym), sizeof(sym_t), yb);
        if (!yb) {
            d += xb->cnt;
        } else {
            d += fabs(xb->cnt - yb->cnt);
            missing -= yb->cnt;
        }
    }
    d += missing;

    bag_destroy(xh);
    bag_destroy(yh);

    return lnorm(n, d, x, y);
}
Esempio n. 2
0
bag_t *generate_index(FILE *input, int min_word_len)
{
    bag_t *index = bag_create(entry_cmp);

    if (index) {
        char word[LINE_LENGTH] = "";
        entry_t new_word, *existing_entry;
        bag_elem_t new_entry;
        unsigned page = 0;
        while (get_word(input, word, &page))
        {
            new_word.entry_word = word;
            // check if the length of the word is long enough
            if(strlen(word) >= min_word_len)
            {
                existing_entry = bag_contains(index, &new_word);
                if(existing_entry != NULL) // if the word is already in index
                {
                    entry_add(existing_entry, page); // add the location to the list of locations for that word
                }
                else // if the word isn't in the index
                {
                    new_entry = entry_create(word, page); // create the entry
                    bag_insert(index, new_entry); // add the location
                }
            }
        }
    }
    return index;
}
Esempio n. 3
0
File: bucket.c Progetto: i12345/LLLL
Bucket* bucket_create(
		Mailbin* mailbin,
		valueID unallocatedvalueids_start,
		valueID unallocatedvalueids_end,
		fileID bucketfileID
	) {
	Bucket* this =
		alloct(Bucket);
	

	this->unallocatedvalueids_start =
		unallocatedvalueids_start;
	
	this->unallocatedvalueids_end =
		unallocatedvalueids_end;


	this->unallocatedvalueids_flag =
		tflag_create(); /// this->un..flag+
	ref(this, this->unallocatedvalueids_flag); /// this->un..flag+
	release(this->unallocatedvalueids_flag); /// this->un..flag-

	this->rocks =
		bag_create(
				rock_serialize,
				rock_deserialize,
				bucketfileID
			); /// this->rocks+
	ref(this, this->rocks); /// this->rocks+
	release(this->rocks); /// this->rocks-


	this->mailman_mirrorrock =
		mailbin_allocate_mailman(
				mailbin,
				MESSAGE_TYPE_MIRROR_ROCKS,
				this,
				message_mirrorrocks_process
			);/// this->mailman_mirrorrock+
	ref(this, this->mailman_mirrorrock); /// this->mailman_mirrorrock+
	release(this->mailman_mirrorrock); /// this->mailman_mirrorrock-

	
	return this;
}
Esempio n. 4
0
bag_elem_t entry_create(const char *word, unsigned page)
{
    // Allocate the memory for the new entry
    entry_t *new_entry = malloc(sizeof(entry_t));
    
    // Copy the word into a new string and put it in the entry.
    new_entry -> entry_word = malloc((strlen(word) + 1) * sizeof(char));
    strcpy(new_entry -> entry_word, word);

    // Create the page index bag to hold the page numbers.
    new_entry->page_index = bag_create(page_cmp);
    page_entry *new_page = malloc(sizeof(page_entry));
    *new_page = page;
    
    // add the page to the page index.
    bag_insert(new_entry->page_index, new_page);
    return new_entry;
}