HEADER_DECLARE hash_t hash_rec(Term* term, hash_t hash){ term = chase(term); switch(term->type){ case INTEGER: return hash_integer(term->data.integer, hash); case FUNCTOR: hash = hash_atom(term->data.functor.atom, hash); functor_size_t size = term->data.functor.size; if(size){ hash = hash_byte(size, hash); for(functor_size_t i = 0; i < size; i++){ hash = hash_rec(term->data.functor.args[i], hash); } } return hash; case STRING: return hash_string(&term->data.string, hash); case VAR: fatal_error("Cannot hash variable '%s'", term->data.var.name); case DICT: fatal_error("unimplemented: hash dict"); case MOVED: fatal_error("Cannot hash a moved term"); } UNREACHABLE; }
hash_t hash_atom(atom_t atom, hash_t hash){ char* c = (char*)&atom; for(size_t i = 0; i < sizeof(atom); i++){ hash = hash_byte(c[i], hash); } return hash; }
hash_t hash_integer(integer_t x, hash_t hash){ char* c = (char*)&x; for(size_t i = 0; i < sizeof(x); i++){ hash = hash_byte(c[i], hash); } return hash; }
unsigned long hash_str(char * str) { unsigned long hash = HASH_INIT_VALUE; char c; while ((c = *str++)) hash = hash_byte(hash, c); return hash; }
int in_dict(unsigned char filter[], char *word) { if (!word) { err("[e] word is NULL"); } unsigned int byte = hash_byte(word); unsigned int bit = hash_bit(word); return get_bit(filter[byte], bit); }
void insert_word(unsigned char filter[], char *word) { if (!word) { err("[e] word is NULL"); } unsigned int byte = hash_byte(word); unsigned int bit = hash_bit(word); set_bit(&filter[byte], bit); }
unsigned long hash_bytes(void * vdata, int len) { char * data = (char *) vdata; unsigned long hash = HASH_INIT_VALUE; char c; int i; for (i = 0; i < len; i++) hash = hash_byte(hash, *data++); return hash; }
int main(int argc, char *argv[]) { srand((unsigned)time(NULL)); unsigned char filter[FILTER_SIZE_MAX]; if (argc == 1) err("usage:\n" " ./prog -hash some_word\n" " ./prog -m_byte 3 -m_bit 12 -hash some_word\n" " ./prog -d_file dict.txt -tr_files test.txt result.txt\n" " ./prog -d_file dict.txt -tr_files test.txt result.txt -b_file data.bin\n" " ./prog -show_options\n" ); FILTER_SIZE = MAX_DB_SIZE - MAX_JS_SIZE; init_filter(filter); int i; for (i = 0; i < argc; i++) { if (!strcmp(argv[i], "-m_byte")) { MULTIPLIER_BYTE = atoi(argv[i + 1]); } if (!strcmp(argv[i], "-m_bit")) { MULTIPLIER_BIT = atoi(argv[i + 1]); } if (!strcmp(argv[i], "-max_db")) { MAX_DB_SIZE = atoi(argv[i + 1]); FILTER_SIZE = MAX_DB_SIZE - MAX_JS_SIZE; } if (!strcmp(argv[i], "-max_js")) { MAX_JS_SIZE = atoi(argv[i + 1]); FILTER_SIZE = MAX_DB_SIZE - MAX_JS_SIZE; } if (!strcmp(argv[i], "-filter_size")) { FILTER_SIZE = atoi(argv[i + 1]); } if (FILTER_SIZE <= 0) { err("FILTER_SIZE must be > 0\n"); } if (!strcmp(argv[i], "-hash")) { unsigned int byte = hash_byte(argv[i + 1]); unsigned int bit = hash_bit(argv[i + 1]); printf("%s:%d:%d\n", argv[i + 1], byte, bit); } // dictionary file if (!strcmp(argv[i], "-d_file")) { load_words(filter, argv[i + 1]); } // test file, result file if (!strcmp(argv[i], "-tr_files")) { testing_words(filter, argv[i + 1], argv[i + 2]); } // binary file if (!strcmp(argv[i], "-b_file")) { save2bin_file(filter, argv[i + 1]); } if (!strcmp(argv[i], "-show_options")) { printf("MAX_DB_SIZE = %d;\n", MAX_DB_SIZE); printf("MAX_JS_SIZE = %d;\n", MAX_JS_SIZE); printf("FILTER_SIZE = %d;\n", FILTER_SIZE); printf("MULTIPLIER_BYTE = %d;\n", MULTIPLIER_BYTE); printf("MULTIPLIER_BIT = %d;\n", MULTIPLIER_BIT); } } return 0; }
hash_t hash_string(Buffer* str, hash_t hash){ for(size_t i = 0; i < str->end; i++){ hash = hash_byte(str->ptr[i], hash); } return hash; }