Пример #1
0
  void extend(table* old) {
    table& old_table = *old;
    std::lock_guard<std::mutex> lock(extend_lock_);
    if (table_.load() != &old_table) { return; }

    std::vector<std::lock_guard<std::mutex>> lock_array;
    for (auto& slot_lock : lock_table_) {
      slot_lock.lock();
    }

    const size_t new_size = old_table.size * 2;
    std::unique_ptr<table> new_table_ptr(init_table(new_size, 0));
    table& new_table = *new_table_ptr;
    for (size_t i = 0; i < old_table.size; ++i) {
      kvp* target = old_table[i];
      while (target != nullptr) {
        const size_t new_hash = MurmurHash2A(&target->key, sizeof(int), 0);
        const size_t slot = new_hash % new_size;
        std::unique_ptr<kvp> entry(new kvp(target->key, target->value));

        kvp* orig = new_table[slot];
        new_table[slot] = entry.get();
        entry->next = orig;
        new_table.add_entry();
        entry.release();

        target = target->next;
      }
    }
    table_.store(new_table_ptr.release());

    for (auto& slot_lock : lock_table_) {
      slot_lock.unlock();
    }
  }
ERL_NIF_TERM
erlang_murmurhash2a_1_impl(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
{
    ErlNifBinary bin;
    uint32_t     h;

    if (!check_and_unpack_data(env, argv[0], &bin)) {
        return enif_make_badarg(env);
    }

    h = MurmurHash2A(bin.data, bin.size, 0);

    return enif_make_uint(env, h);
}
Пример #3
0
void ring_murmurhash2a(void *pPointer)
{
    char *key = NULL;
    int keylen;
    int seed = 0;
    uint32_t out;
    int ret_type = 0;

    if (RING_API_PARACOUNT < 2 || RING_API_PARACOUNT > 3) {
		RING_API_ERROR(RING_API_MISS2PARA);
		return ;
	}

    if (!RING_API_ISSTRING(1)) {
        RING_API_ERROR("murmurhash2a expects the first parameter to be a string");
        return;
    }

    if (!RING_API_ISNUMBER(2)) {
        RING_API_ERROR("murmurhash2a expects the first parameter to be an integer");
        return;
    }

    key = RING_API_GETSTRING(1);
    keylen = strlen(key);
    seed = RING_API_GETNUMBER(2);

    if (RING_API_PARACOUNT == 3) {
        if (RING_API_ISNUMBER(3)) {
            ret_type = RING_API_GETNUMBER(3);
            if (!is_bool(ret_type)) {
                RING_API_ERROR("Third parameter should be boolean value\n");
            }
        } else {
            RING_API_ERROR("murmurhash2a expects the third parameter to be an integer\n");
        }
    }

    out = MurmurHash2A(key, keylen, seed);

    MH_RETURN_INT(out, ret_type);
}
Пример #4
0
  void insert(int key, int value) {
    const size_t hash = MurmurHash2A(&key, sizeof(int), 0);
    table& t = *table_.load(std::memory_order_acquire);

    const size_t slot = hash % t.size;
    kvp* const entry = new kvp(key, value);

    {
      const size_t lock_slot = hash % lock_table_.size();
      std::lock_guard<std::mutex> lock(lock_table_[lock_slot]);
      kvp* orig = t[slot];
      t[slot] = entry;
      entry->next = orig;
      t.add_entry();
    }

    // extend if needed
    if (t.get_entries() == t.size) {
      extend(&t);
    }
  }
Пример #5
0
  int get(int key) const {
    table& t = *table_.load(std::memory_order_acquire);
    const size_t hash = MurmurHash2A(&key, sizeof(int), 0);
    const size_t slot = hash % t.size;

    kvp* target = t[slot];
    if (target != nullptr) {
      // getting slot lock entire iterate list in bucket
      const size_t lock_slot = hash % lock_table_.size();
      std::lock_guard<std::mutex> lock(lock_table_[lock_slot]);

      while (target != nullptr) {
        if (target->key == key) {
          return target->value;
        }
        target = target->next;
      }
    }

    return 0;
    // throw std::runtime_error("not found");
  }
Пример #6
0
/* Dict::createDict(): creates a new dictionary file using the SongDB.
*/
void Dict::createDict()
{
    ofstream dictfile;
    try
    {
        dictfile.open(dictfileName.c_str(), fstream::out | fstream::trunc);
    }
    catch (ifstream::failure e)
    {
        cout << "could not open dictionary file for output" << endl;
        return;
    }

    SongDB::Iterator dbi = songDB.begin();
    Song theSong;
    string theWord;
    stringstream theLyrics;
    int theID;
    unsigned int theHash;

    int collisions = 0, inserts=0;

    MapValue mv;
    string t;

    srand(time(NULL));
    seed = rand() % 40000000;
    bool flag = false;

    while (dbi != songDB.end())
    {
        theSong = dbi.value();
        theID = dbi.songId();
        theLyrics << "";
        theLyrics.clear();
        t = theSong.getLyrics();
        theLyrics.str(t);

        theWord = "";

        while (theLyrics >> theWord)
        {
            theWord = strip(theWord);

            if (theWord.length() < 2)
                continue;

            transform(theWord.begin(), theWord.end(), theWord.begin(), ToLower());

            if (USE_BLACKLIST && isBlacklisted(theWord))
                continue;

            theHash = MurmurHash2A(theWord.c_str(), theWord.length(), seed);

            if (theMap.find(theHash) == theMap.end())
            {
                mv = MapValue();
                //cout << " (new)";
                mv.songlist = vector<SongId>();
                mv.songlist.push_back(theID);
                mv.key = theWord;
                theMap.insert(pair<unsigned int, MapValue >(theHash,mv));
                inserts++;
            }
            else    // append
            {
                // check for collision
                mv = theMap[theHash];

                if (mv.key.compare(theWord)==0) // no collision, append
                {
                    // cout << "No collision, append" << endl;
                    // cout << "songlist size before = " << mv.songlist.size() << endl;
                    //  cout << "adding " << theID << " to the songID list for " << theWord << endl;

                    // check to see if the ID we're on is already on this list
                    flag = false;
                    for (unsigned int j=0; j<mv.songlist.size(); j++)
                    {
                        if (mv.songlist[j] == theID)
                            flag = true;
                    }

                    if (!flag)
                        mv.songlist.push_back(theID);

                    theMap[theHash] = mv;
                    inserts++;
                }
                else
                {
                    // collision, rehash with a different seed to try again
                    //cout << " ** Hash Collision! " << theHash << " current: " << mv.key << " mew: " << theWord << endl;
                    collisions++;

                    // linear forward search
                    do
                    {
                        mv = theMap[++theHash];
                    }
                    while ((mv.songlist.size() > 0) && (mv.key.compare(theWord)!=0));

                    //Now, we either have the first blank address (for a new collision) or the previously resolved collision address.
                    if (mv.songlist.size() > 0)
                    {
                        mv = MapValue();

                        mv.songlist = vector<SongId>();
                        mv.songlist.push_back(theID);
                        mv.key = theWord;

                        inserts++;
                        theMap[theHash] = mv;

                        //cout << "Previously resolved.  New address is " << theHash << endl;
                    }
                    else
                    {
                        // mv = theMap[theHash];
                        // cout << "Resolution: " << theHash << endl;
                        // cout << "Resolution: " << theHash << endl;
                        mv.key = theWord;
                        mv.songlist = vector<SongId>();
                        mv.songlist.push_back(theID);

                        theMap[theHash] = mv;
                        inserts++;
                    }
                    //goto restartRound;  // I know.
                }
            }

        }

        if (inserts > MAX_SONGS)
            break;
        //return;
        dbi = dbi.next();
    }
    cout << collisions << " collisions and " << inserts << " records ("  << (((1.0*collisions)/inserts)*100) << "%)" << endl;
    //Now, output the semi-ordered map to file
    // cout << "outputting file..." << endl;

    dictfile.put((char)0);

    dictfile.write((char *)&seed, sizeof(unsigned int));
    unsigned short tui = 0;
    char* tc;

//cout << "Seed = " << hex << seed << endl;
    dictfile.put('\n');
    //  dictfile << "Seed " << seed << endl;
    for (map<unsigned int, MapValue>::iterator iter=theMap.begin(); iter != theMap.end(); iter++)
    {

        mv = (*iter).second;
        // dictfile << (*iter).first << " " << mv.key << " " << mv.songlist.size() << " ";
        dictfile.write((char *)&(*iter).first, sizeof(unsigned int));
        //cout << hex << (*iter).first << " " << mv.key.length() << endl;

        tui = mv.key.length();
        dictfile.write((char *)&tui, sizeof(unsigned short));

        tc = new char[tui+1];
        strcpy(tc, mv.key.c_str());
        // cout << "tui = " << tui << endl;
        for (unsigned short j=0; j<=tui; j++)
        {
            //  cout << tc[j];
            dictfile.write(&tc[j], sizeof(char));
        }
        delete[] tc;

        // cout << mv.key << " " << mv.songlist.size() << endl;

        tui = mv.songlist.size();
        dictfile.write((char *)&tui, sizeof(unsigned short));

        for (unsigned short j=0; j<tui; j++)
        {
            dictfile.write((char*)&mv.songlist[j], sizeof(unsigned int));
        }
        //dictfile.put('\n');
    }

    dictfile.close();
}
Пример #7
0
/* Dict::search: returns a vector of Song objects matching the given search terms (up to limit results) */
list<SongResult> Dict::search(string key, unsigned int limit)
{
    list<SongResult> retval;
    SongResult tsr;

    key = strip(key);

    if (USE_BLACKLIST && isBlacklisted(key))
    {
        cout << key << " is on the blacklist. Exiting." << endl;
        return retval;
    }

    unsigned int theHash = MurmurHash2A(key.c_str(), key.length(), seed);
    MapValue mv = theMap[theHash];



    //cout << key << " hashes to " << theHash << " and the key at that location is " << mv.key << endl;

    if (mv.key.length() > 0 && mv.key.compare(key)==0)  // match
    {
        //cout << "match found. " << mv.songlist.size() << " songs are attached." << endl;
        for (unsigned int i=0; i<mv.songlist.size(); i++)
        {
            if ((i+1) > limit)
                break;
            //cout << mv.songlist[i] << " ";
            tsr = SongResult();
            tsr.key = key;
            tsr.keylength = key.length();
            tsr.id = mv.songlist[i];
            tsr.song = songDB.getSongById(mv.songlist[i]);
            retval.push_back(tsr);
        }
        //cout << endl;
    }
    else if (mv.key.length() > 0)   // collision
    {
        // linear forward search
        do
        {
            mv = theMap[++theHash];
        }
        while ((mv.key.length() > 0) && (mv.key.compare(key)!=0));

        //Now, we ought to
        if (mv.key.length() > 0)
        {
            for (unsigned int i=0; i<mv.songlist.size(); i++)
            {
                if ((i+1) > limit)
                    break;
                //cout << mv.songlist[i] << " ";
                tsr = SongResult();
                tsr.key = key;
                tsr.keylength = key.length();
                tsr.song = songDB.getSongById(mv.songlist[i]);
                retval.push_back(tsr);
            }
        }
        else
        {
            cout << "Unresolved hash collision on input. Fatal error." << endl;
        }
    }
    else
    {
        cout << "not found." << endl;
        // no results.
    }

    return retval;
}