/** * Computes the bag distance of two strings. The distance approximates * and lower bounds the Levenshtein distance. * @param x first string * @param y second string * @return Bag distance */ float dist_bag_compare(hstring_t x, hstring_t y) { float d = 0; bag_t *xh, *yh, *xb, *yb; xh = bag_create(x); yh = bag_create(y); int missing = y.len; for (xb = xh; xb != NULL; xb = xb->hh.next) { HASH_FIND(hh, yh, &(xb->sym), sizeof(sym_t), yb); if (!yb) { d += xb->cnt; } else { d += fabs(xb->cnt - yb->cnt); missing -= yb->cnt; } } d += missing; bag_destroy(xh); bag_destroy(yh); return lnorm(n, d, x, y); }
bag_t *generate_index(FILE *input, int min_word_len) { bag_t *index = bag_create(entry_cmp); if (index) { char word[LINE_LENGTH] = ""; entry_t new_word, *existing_entry; bag_elem_t new_entry; unsigned page = 0; while (get_word(input, word, &page)) { new_word.entry_word = word; // check if the length of the word is long enough if(strlen(word) >= min_word_len) { existing_entry = bag_contains(index, &new_word); if(existing_entry != NULL) // if the word is already in index { entry_add(existing_entry, page); // add the location to the list of locations for that word } else // if the word isn't in the index { new_entry = entry_create(word, page); // create the entry bag_insert(index, new_entry); // add the location } } } } return index; }
Bucket* bucket_create( Mailbin* mailbin, valueID unallocatedvalueids_start, valueID unallocatedvalueids_end, fileID bucketfileID ) { Bucket* this = alloct(Bucket); this->unallocatedvalueids_start = unallocatedvalueids_start; this->unallocatedvalueids_end = unallocatedvalueids_end; this->unallocatedvalueids_flag = tflag_create(); /// this->un..flag+ ref(this, this->unallocatedvalueids_flag); /// this->un..flag+ release(this->unallocatedvalueids_flag); /// this->un..flag- this->rocks = bag_create( rock_serialize, rock_deserialize, bucketfileID ); /// this->rocks+ ref(this, this->rocks); /// this->rocks+ release(this->rocks); /// this->rocks- this->mailman_mirrorrock = mailbin_allocate_mailman( mailbin, MESSAGE_TYPE_MIRROR_ROCKS, this, message_mirrorrocks_process );/// this->mailman_mirrorrock+ ref(this, this->mailman_mirrorrock); /// this->mailman_mirrorrock+ release(this->mailman_mirrorrock); /// this->mailman_mirrorrock- return this; }
bag_elem_t entry_create(const char *word, unsigned page) { // Allocate the memory for the new entry entry_t *new_entry = malloc(sizeof(entry_t)); // Copy the word into a new string and put it in the entry. new_entry -> entry_word = malloc((strlen(word) + 1) * sizeof(char)); strcpy(new_entry -> entry_word, word); // Create the page index bag to hold the page numbers. new_entry->page_index = bag_create(page_cmp); page_entry *new_page = malloc(sizeof(page_entry)); *new_page = page; // add the page to the page index. bag_insert(new_entry->page_index, new_page); return new_entry; }