Пример #1
0
guint
ipset_trinary_key_hash(ipset_trinary_key_t *key)
{
    guint  hash = 0;
    combine_hash(&hash, g_direct_hash(key->f));
    combine_hash(&hash, g_direct_hash(key->g));
    combine_hash(&hash, g_direct_hash(key->h));
    return hash;
}
                    /**
                     * Allows to retrieve the hash value for the sub-m-gram 
                     * defined by the parameters
                     * @param begin_word_idx the begin word index of the sub-m-gram
                     * @param end_word_idx the end word index of the sub-m-gram
                     * @return the hash value for the given sub-m-gram
                     */
                    inline uint64_t get_hash(TModelLevel begin_word_idx, const TModelLevel end_word_idx) const {
                        //Define the reference to the previous level
                        TModelLevel & prev_level_ref = const_cast<TModelLevel &> (m_hash_level_row[begin_word_idx]);

                        LOG_DEBUG1 << "Getting hash values for begin/end index: " << SSTR(begin_word_idx)
                                << "/" << SSTR(end_word_idx) << ", the previous computed begin level "
                                << "is: " << SSTR(prev_level_ref) << END_LOG;

                        //Define the reference to the hash row
                        uint64_t(& hash_row_ref)[MAX_LEVEL] = const_cast<uint64_t(&)[MAX_LEVEL]> (m_hash_matrix[begin_word_idx]);

                        //Compute the current level
                        const TModelLevel curr_level = CURR_LEVEL_MAP[begin_word_idx][end_word_idx];
                        //Check if the given hash is already available.
                        if (curr_level > prev_level_ref) {
                            //Check if there has its been computed before for this row
                            if (prev_level_ref == M_GRAM_LEVEL_UNDEF) {
                                //If there has not been anything computed yet,
                                //then first initialize the starting word
                                hash_row_ref[begin_word_idx] = BASE::m_word_ids[begin_word_idx];

                                LOG_DEBUG1 << "word[" << SSTR(begin_word_idx) << "] = "
                                        << BASE::m_word_ids[begin_word_idx]
                                        << ", hash[" << SSTR(begin_word_idx) << "] = "
                                        << hash_row_ref[begin_word_idx] << END_LOG;

                                ++begin_word_idx;
                            } else {
                                //This is the case of at least a bi-gram, but the actual
                                //begin word index is the one stored from before
                                begin_word_idx += prev_level_ref;
                            }

                            //Iterate on and compute the subsequent hashes, if any
                            for (; begin_word_idx <= end_word_idx; ++begin_word_idx) {
                                //Incrementally build up hash, using the previous hash value and the next word id
                                hash_row_ref[begin_word_idx] = combine_hash(BASE::m_word_ids[begin_word_idx], hash_row_ref[begin_word_idx - 1]);

                                LOG_DEBUG1 << "hash[" << SSTR(begin_word_idx) << "] = combine( word["
                                        << SSTR(begin_word_idx) << "] = " << BASE::m_word_ids[begin_word_idx]
                                        << ", hash[" << SSTR(begin_word_idx - 1) << "] = "
                                        << hash_row_ref[begin_word_idx - 1] << " ) = "
                                        << hash_row_ref[begin_word_idx] << END_LOG;
                            }
                            //Set the processed level 
                            prev_level_ref = curr_level;
                        }

                        LOG_DEBUG1 << "Resulting hash value: " << hash_row_ref[end_word_idx] << END_LOG;

                        //Return the hash value that must have been pre-computed
                        return hash_row_ref[end_word_idx];
                    }
                    /**
                     * Allows to prepare the M-gram for being used for adding it to the trie
                     * This includes registering the one gram in the word index
                     */
                    inline void prepare_for_adding() {
                        LOG_DEBUG1 << "Preparing the " << SSTR(BASE::m_actual_level) << "-gram for adding to the trie." << END_LOG;

                        //If we have a unigram then add it to the index otherwise get the word ids
                        if (BASE::m_actual_level == M_GRAM_LEVEL_1) {
                            const TModelLevel & begin_word_idx = BASE::m_actual_begin_word_idx;
                            if (BASE::m_word_index.is_word_registering_needed()) {
                                //Register the word if it is needed
                                BASE::m_word_ids[begin_word_idx] = BASE::m_word_index.register_word(BASE::m_tokens[begin_word_idx]);
                            } else {
                                //Otherwise jut get its id
                                BASE::m_word_ids[begin_word_idx] = BASE::m_word_index.get_word_id(BASE::m_tokens[begin_word_idx]);
                            }
                            //The Unigram's hash value is equal to the word id
                            m_hash_values[begin_word_idx] = BASE::m_word_ids[begin_word_idx];

                            LOG_DEBUG1 << "word[" << SSTR(begin_word_idx) << "] = "
                                    << BASE::m_word_ids[begin_word_idx]
                                    << ", hash[" << SSTR(begin_word_idx) << "] = "
                                    << m_hash_values[begin_word_idx] << END_LOG;
                        } else {
                            TModelLevel curr_idx = BASE::m_actual_begin_word_idx;
                            //Start with the first word
                            BASE::m_word_ids[curr_idx] = BASE::m_word_index.get_word_id(BASE::m_tokens[curr_idx]);
                            //The Unigram's hash value is equal to the word id
                            m_hash_values[curr_idx] = BASE::m_word_ids[curr_idx];

                            //Store the word ids without the unknown word flags and pre-compute the m-gram hash values
                            for (++curr_idx; curr_idx <= BASE::m_actual_end_word_idx; ++curr_idx) {
                                //Get the next word id
                                BASE::m_word_ids[curr_idx] = BASE::m_word_index.get_word_id(BASE::m_tokens[curr_idx]);
                                //Compute the next hash value 
                                m_hash_values[curr_idx] = combine_hash(BASE::m_word_ids[curr_idx], m_hash_values[curr_idx - 1]);

                                LOG_DEBUG1 << "hash[" << SSTR(curr_idx) << "] = combine( word["
                                        << SSTR(curr_idx) << "] = " << BASE::m_word_ids[curr_idx]
                                        << ", hash[" << SSTR(curr_idx - 1) << "] = "
                                        << m_hash_values[curr_idx - 1] << " ) = "
                                        << m_hash_values[curr_idx] << END_LOG;
                            }
                        }
                    }