guint ipset_trinary_key_hash(ipset_trinary_key_t *key) { guint hash = 0; combine_hash(&hash, g_direct_hash(key->f)); combine_hash(&hash, g_direct_hash(key->g)); combine_hash(&hash, g_direct_hash(key->h)); return hash; }
/** * Allows to retrieve the hash value for the sub-m-gram * defined by the parameters * @param begin_word_idx the begin word index of the sub-m-gram * @param end_word_idx the end word index of the sub-m-gram * @return the hash value for the given sub-m-gram */ inline uint64_t get_hash(TModelLevel begin_word_idx, const TModelLevel end_word_idx) const { //Define the reference to the previous level TModelLevel & prev_level_ref = const_cast<TModelLevel &> (m_hash_level_row[begin_word_idx]); LOG_DEBUG1 << "Getting hash values for begin/end index: " << SSTR(begin_word_idx) << "/" << SSTR(end_word_idx) << ", the previous computed begin level " << "is: " << SSTR(prev_level_ref) << END_LOG; //Define the reference to the hash row uint64_t(& hash_row_ref)[MAX_LEVEL] = const_cast<uint64_t(&)[MAX_LEVEL]> (m_hash_matrix[begin_word_idx]); //Compute the current level const TModelLevel curr_level = CURR_LEVEL_MAP[begin_word_idx][end_word_idx]; //Check if the given hash is already available. if (curr_level > prev_level_ref) { //Check if there has its been computed before for this row if (prev_level_ref == M_GRAM_LEVEL_UNDEF) { //If there has not been anything computed yet, //then first initialize the starting word hash_row_ref[begin_word_idx] = BASE::m_word_ids[begin_word_idx]; LOG_DEBUG1 << "word[" << SSTR(begin_word_idx) << "] = " << BASE::m_word_ids[begin_word_idx] << ", hash[" << SSTR(begin_word_idx) << "] = " << hash_row_ref[begin_word_idx] << END_LOG; ++begin_word_idx; } else { //This is the case of at least a bi-gram, but the actual //begin word index is the one stored from before begin_word_idx += prev_level_ref; } //Iterate on and compute the subsequent hashes, if any for (; begin_word_idx <= end_word_idx; ++begin_word_idx) { //Incrementally build up hash, using the previous hash value and the next word id hash_row_ref[begin_word_idx] = combine_hash(BASE::m_word_ids[begin_word_idx], hash_row_ref[begin_word_idx - 1]); LOG_DEBUG1 << "hash[" << SSTR(begin_word_idx) << "] = combine( word[" << SSTR(begin_word_idx) << "] = " << BASE::m_word_ids[begin_word_idx] << ", hash[" << SSTR(begin_word_idx - 1) << "] = " << hash_row_ref[begin_word_idx - 1] << " ) = " << hash_row_ref[begin_word_idx] << END_LOG; } //Set the processed level prev_level_ref = curr_level; } LOG_DEBUG1 << "Resulting hash value: " << hash_row_ref[end_word_idx] << END_LOG; //Return the hash value that must have been pre-computed return hash_row_ref[end_word_idx]; }
/** * Allows to prepare the M-gram for being used for adding it to the trie * This includes registering the one gram in the word index */ inline void prepare_for_adding() { LOG_DEBUG1 << "Preparing the " << SSTR(BASE::m_actual_level) << "-gram for adding to the trie." << END_LOG; //If we have a unigram then add it to the index otherwise get the word ids if (BASE::m_actual_level == M_GRAM_LEVEL_1) { const TModelLevel & begin_word_idx = BASE::m_actual_begin_word_idx; if (BASE::m_word_index.is_word_registering_needed()) { //Register the word if it is needed BASE::m_word_ids[begin_word_idx] = BASE::m_word_index.register_word(BASE::m_tokens[begin_word_idx]); } else { //Otherwise jut get its id BASE::m_word_ids[begin_word_idx] = BASE::m_word_index.get_word_id(BASE::m_tokens[begin_word_idx]); } //The Unigram's hash value is equal to the word id m_hash_values[begin_word_idx] = BASE::m_word_ids[begin_word_idx]; LOG_DEBUG1 << "word[" << SSTR(begin_word_idx) << "] = " << BASE::m_word_ids[begin_word_idx] << ", hash[" << SSTR(begin_word_idx) << "] = " << m_hash_values[begin_word_idx] << END_LOG; } else { TModelLevel curr_idx = BASE::m_actual_begin_word_idx; //Start with the first word BASE::m_word_ids[curr_idx] = BASE::m_word_index.get_word_id(BASE::m_tokens[curr_idx]); //The Unigram's hash value is equal to the word id m_hash_values[curr_idx] = BASE::m_word_ids[curr_idx]; //Store the word ids without the unknown word flags and pre-compute the m-gram hash values for (++curr_idx; curr_idx <= BASE::m_actual_end_word_idx; ++curr_idx) { //Get the next word id BASE::m_word_ids[curr_idx] = BASE::m_word_index.get_word_id(BASE::m_tokens[curr_idx]); //Compute the next hash value m_hash_values[curr_idx] = combine_hash(BASE::m_word_ids[curr_idx], m_hash_values[curr_idx - 1]); LOG_DEBUG1 << "hash[" << SSTR(curr_idx) << "] = combine( word[" << SSTR(curr_idx) << "] = " << BASE::m_word_ids[curr_idx] << ", hash[" << SSTR(curr_idx - 1) << "] = " << m_hash_values[curr_idx - 1] << " ) = " << m_hash_values[curr_idx] << END_LOG; } } }