void extend(table* old) { table& old_table = *old; std::lock_guard<std::mutex> lock(extend_lock_); if (table_.load() != &old_table) { return; } std::vector<std::lock_guard<std::mutex>> lock_array; for (auto& slot_lock : lock_table_) { slot_lock.lock(); } const size_t new_size = old_table.size * 2; std::unique_ptr<table> new_table_ptr(init_table(new_size, 0)); table& new_table = *new_table_ptr; for (size_t i = 0; i < old_table.size; ++i) { kvp* target = old_table[i]; while (target != nullptr) { const size_t new_hash = MurmurHash2A(&target->key, sizeof(int), 0); const size_t slot = new_hash % new_size; std::unique_ptr<kvp> entry(new kvp(target->key, target->value)); kvp* orig = new_table[slot]; new_table[slot] = entry.get(); entry->next = orig; new_table.add_entry(); entry.release(); target = target->next; } } table_.store(new_table_ptr.release()); for (auto& slot_lock : lock_table_) { slot_lock.unlock(); } }
ERL_NIF_TERM erlang_murmurhash2a_1_impl(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ErlNifBinary bin; uint32_t h; if (!check_and_unpack_data(env, argv[0], &bin)) { return enif_make_badarg(env); } h = MurmurHash2A(bin.data, bin.size, 0); return enif_make_uint(env, h); }
void ring_murmurhash2a(void *pPointer) { char *key = NULL; int keylen; int seed = 0; uint32_t out; int ret_type = 0; if (RING_API_PARACOUNT < 2 || RING_API_PARACOUNT > 3) { RING_API_ERROR(RING_API_MISS2PARA); return ; } if (!RING_API_ISSTRING(1)) { RING_API_ERROR("murmurhash2a expects the first parameter to be a string"); return; } if (!RING_API_ISNUMBER(2)) { RING_API_ERROR("murmurhash2a expects the first parameter to be an integer"); return; } key = RING_API_GETSTRING(1); keylen = strlen(key); seed = RING_API_GETNUMBER(2); if (RING_API_PARACOUNT == 3) { if (RING_API_ISNUMBER(3)) { ret_type = RING_API_GETNUMBER(3); if (!is_bool(ret_type)) { RING_API_ERROR("Third parameter should be boolean value\n"); } } else { RING_API_ERROR("murmurhash2a expects the third parameter to be an integer\n"); } } out = MurmurHash2A(key, keylen, seed); MH_RETURN_INT(out, ret_type); }
void insert(int key, int value) { const size_t hash = MurmurHash2A(&key, sizeof(int), 0); table& t = *table_.load(std::memory_order_acquire); const size_t slot = hash % t.size; kvp* const entry = new kvp(key, value); { const size_t lock_slot = hash % lock_table_.size(); std::lock_guard<std::mutex> lock(lock_table_[lock_slot]); kvp* orig = t[slot]; t[slot] = entry; entry->next = orig; t.add_entry(); } // extend if needed if (t.get_entries() == t.size) { extend(&t); } }
int get(int key) const { table& t = *table_.load(std::memory_order_acquire); const size_t hash = MurmurHash2A(&key, sizeof(int), 0); const size_t slot = hash % t.size; kvp* target = t[slot]; if (target != nullptr) { // getting slot lock entire iterate list in bucket const size_t lock_slot = hash % lock_table_.size(); std::lock_guard<std::mutex> lock(lock_table_[lock_slot]); while (target != nullptr) { if (target->key == key) { return target->value; } target = target->next; } } return 0; // throw std::runtime_error("not found"); }
/* Dict::createDict(): creates a new dictionary file using the SongDB. */ void Dict::createDict() { ofstream dictfile; try { dictfile.open(dictfileName.c_str(), fstream::out | fstream::trunc); } catch (ifstream::failure e) { cout << "could not open dictionary file for output" << endl; return; } SongDB::Iterator dbi = songDB.begin(); Song theSong; string theWord; stringstream theLyrics; int theID; unsigned int theHash; int collisions = 0, inserts=0; MapValue mv; string t; srand(time(NULL)); seed = rand() % 40000000; bool flag = false; while (dbi != songDB.end()) { theSong = dbi.value(); theID = dbi.songId(); theLyrics << ""; theLyrics.clear(); t = theSong.getLyrics(); theLyrics.str(t); theWord = ""; while (theLyrics >> theWord) { theWord = strip(theWord); if (theWord.length() < 2) continue; transform(theWord.begin(), theWord.end(), theWord.begin(), ToLower()); if (USE_BLACKLIST && isBlacklisted(theWord)) continue; theHash = MurmurHash2A(theWord.c_str(), theWord.length(), seed); if (theMap.find(theHash) == theMap.end()) { mv = MapValue(); //cout << " (new)"; mv.songlist = vector<SongId>(); mv.songlist.push_back(theID); mv.key = theWord; theMap.insert(pair<unsigned int, MapValue >(theHash,mv)); inserts++; } else // append { // check for collision mv = theMap[theHash]; if (mv.key.compare(theWord)==0) // no collision, append { // cout << "No collision, append" << endl; // cout << "songlist size before = " << mv.songlist.size() << endl; // cout << "adding " << theID << " to the songID list for " << theWord << endl; // check to see if the ID we're on is already on this list flag = false; for (unsigned int j=0; j<mv.songlist.size(); j++) { if (mv.songlist[j] == theID) flag = true; } if (!flag) mv.songlist.push_back(theID); theMap[theHash] = mv; inserts++; } else { // collision, rehash with a different seed to try again //cout << " ** Hash Collision! " << theHash << " current: " << mv.key << " mew: " << theWord << endl; collisions++; // linear forward search do { mv = theMap[++theHash]; } while ((mv.songlist.size() > 0) && (mv.key.compare(theWord)!=0)); //Now, we either have the first blank address (for a new collision) or the previously resolved collision address. if (mv.songlist.size() > 0) { mv = MapValue(); mv.songlist = vector<SongId>(); mv.songlist.push_back(theID); mv.key = theWord; inserts++; theMap[theHash] = mv; //cout << "Previously resolved. New address is " << theHash << endl; } else { // mv = theMap[theHash]; // cout << "Resolution: " << theHash << endl; // cout << "Resolution: " << theHash << endl; mv.key = theWord; mv.songlist = vector<SongId>(); mv.songlist.push_back(theID); theMap[theHash] = mv; inserts++; } //goto restartRound; // I know. } } } if (inserts > MAX_SONGS) break; //return; dbi = dbi.next(); } cout << collisions << " collisions and " << inserts << " records (" << (((1.0*collisions)/inserts)*100) << "%)" << endl; //Now, output the semi-ordered map to file // cout << "outputting file..." << endl; dictfile.put((char)0); dictfile.write((char *)&seed, sizeof(unsigned int)); unsigned short tui = 0; char* tc; //cout << "Seed = " << hex << seed << endl; dictfile.put('\n'); // dictfile << "Seed " << seed << endl; for (map<unsigned int, MapValue>::iterator iter=theMap.begin(); iter != theMap.end(); iter++) { mv = (*iter).second; // dictfile << (*iter).first << " " << mv.key << " " << mv.songlist.size() << " "; dictfile.write((char *)&(*iter).first, sizeof(unsigned int)); //cout << hex << (*iter).first << " " << mv.key.length() << endl; tui = mv.key.length(); dictfile.write((char *)&tui, sizeof(unsigned short)); tc = new char[tui+1]; strcpy(tc, mv.key.c_str()); // cout << "tui = " << tui << endl; for (unsigned short j=0; j<=tui; j++) { // cout << tc[j]; dictfile.write(&tc[j], sizeof(char)); } delete[] tc; // cout << mv.key << " " << mv.songlist.size() << endl; tui = mv.songlist.size(); dictfile.write((char *)&tui, sizeof(unsigned short)); for (unsigned short j=0; j<tui; j++) { dictfile.write((char*)&mv.songlist[j], sizeof(unsigned int)); } //dictfile.put('\n'); } dictfile.close(); }
/* Dict::search: returns a vector of Song objects matching the given search terms (up to limit results) */ list<SongResult> Dict::search(string key, unsigned int limit) { list<SongResult> retval; SongResult tsr; key = strip(key); if (USE_BLACKLIST && isBlacklisted(key)) { cout << key << " is on the blacklist. Exiting." << endl; return retval; } unsigned int theHash = MurmurHash2A(key.c_str(), key.length(), seed); MapValue mv = theMap[theHash]; //cout << key << " hashes to " << theHash << " and the key at that location is " << mv.key << endl; if (mv.key.length() > 0 && mv.key.compare(key)==0) // match { //cout << "match found. " << mv.songlist.size() << " songs are attached." << endl; for (unsigned int i=0; i<mv.songlist.size(); i++) { if ((i+1) > limit) break; //cout << mv.songlist[i] << " "; tsr = SongResult(); tsr.key = key; tsr.keylength = key.length(); tsr.id = mv.songlist[i]; tsr.song = songDB.getSongById(mv.songlist[i]); retval.push_back(tsr); } //cout << endl; } else if (mv.key.length() > 0) // collision { // linear forward search do { mv = theMap[++theHash]; } while ((mv.key.length() > 0) && (mv.key.compare(key)!=0)); //Now, we ought to if (mv.key.length() > 0) { for (unsigned int i=0; i<mv.songlist.size(); i++) { if ((i+1) > limit) break; //cout << mv.songlist[i] << " "; tsr = SongResult(); tsr.key = key; tsr.keylength = key.length(); tsr.song = songDB.getSongById(mv.songlist[i]); retval.push_back(tsr); } } else { cout << "Unresolved hash collision on input. Fatal error." << endl; } } else { cout << "not found." << endl; // no results. } return retval; }