/* * Resizes a HacheTable to have 'newsize' buckets. * This is called automatically when adding or removing items so that the * hash table keeps at a sensible scale. * * FIXME: Halving the size of the hash table is simply a matter of coaelescing * every other bucket. Instead we currently rehash (which is slower). * Doubling the size of the hash table currently requires rehashing, but this * too could be optimised by storing the full 32-bit hash of the key along * with the key itself. This then means that it's just a matter of seeing what * the next significant bit is. It's a memory vs speed tradeoff though and * re-hashing is pretty quick. * * Returns 0 for success * -1 for failure */ int HacheTableResize(HacheTable *h, int newsize) { HacheTable *h2; int i; #ifdef DEBUG fprintf(stdout, "Resizing HacheTable %s to %d\n", hname(h), newsize); #endif /* Create a new hash table and rehash everything into it */ h2 = HacheTableCreate(newsize, h->options); for (i = 0; i < h->nbuckets; i++) { HacheItem *hi, *next; for (hi = h->bucket[i]; hi; hi = next) { assert(hi->h == h); uint32_t hv = hache(h2->options & HASH_FUNC_MASK, (uint8_t *)hi->key, hi->key_len) & h2->mask; next = hi->next; hi->next = h2->bucket[hv]; h2->bucket[hv] = hi; } } /* Swap the links over & free */ free(h->bucket); h->bucket = h2->bucket; h->nbuckets = h2->nbuckets; h->mask = h2->mask; if (h2->ordering) free(h2->ordering); free(h2); return 0; }
int belongs(char* word, Cellule* tab, int tab_len) { int hash = hache(word); Liste l = &tab[hash % tab_len]; if(get(word, l) != NULL) return 1; return 0; }
int add_word(Cellule* tab, long size_of_tab, unsigned char* word, unsigned short word_len, unsigned long sentence_pos, Liste* alphabetical_word_list) { int hash = hache((char*) word); Liste l = &tab[hash % size_of_tab]; /* we get the address of the row we have to write to */ /* we have to add it at the end of the cell list. */ do { if(l->valeur != NULL && strcmp((const char*)l->valeur->mot, (const char*) word) == 0) { celmot_add_position(l->valeur, sentence_pos); return 1; } if(l->suivant == NULL) break; l = l->suivant; } while(l->suivant != NULL); Liste to_add; if(l->valeur == NULL) to_add = l; else to_add = liste_new(); char* word_to_add = malloc(sizeof(char) * (word_len)); strcpy(word_to_add, word); Celmot* elem = celmot_new(word_to_add); to_add->valeur = elem; celmot_add_position(elem, sentence_pos); if(l != to_add) liste_add(&l, to_add); liste_add_alphabetical(alphabetical_word_list, elem); return 1; }
void print_positions(char* word, Cellule* tab, int tab_len) { int hash = hache(word); Liste l = &tab[hash % tab_len]; l = get(word, l); if(l != NULL) { Listepos pos = l->valeur->positions; while(pos != NULL){ printf("%d ", pos->position); pos = pos->suivant; } printf("\n"); } else fprintf(stderr, "Word not found: %s\n", word); }
void print_sentences_containing_word(char* word, char* fname, Cellule* tab, int tab_len) { int hash = hache(word); Liste l = &tab[hash % tab_len]; FILE* f = fopen(fname, "r"); if(f == NULL) { fprintf(stderr, "Error while trying to read file %s\n", fname); exit(1); } l = get(word, l); if( l != NULL) { Listepos pos = l->valeur->positions; while(pos != NULL) { print_sentence(f, pos->position); pos = pos->suivant; } printf("\n"); } else fprintf(stderr, "Word not found: %s\n", word); }