// insert void insert(const hashdb_element_t& hashdb_element, hashdb_changes_t& changes) { // validate block size if (settings.hash_block_size != 0 && (hashdb_element.hash_block_size != settings.hash_block_size)) { ++changes.hashes_not_inserted_mismatched_hash_block_size; return; } // validate the byte alignment, see configure.ac for HASHDB_BYTE_ALIGNMENT if (hashdb_element.file_offset % HASHDB_BYTE_ALIGNMENT != 0) { ++changes.hashes_not_inserted_invalid_byte_alignment; return; } // checks passed, insert or have reason not to insert // acquire existing or new source lookup index std::pair<bool, uint64_t> lookup_pair = source_lookup_index_manager.insert(hashdb_element.repository_name, hashdb_element.filename); uint64_t source_lookup_index = lookup_pair.second; // compose the source lookup encoding uint64_t encoding = source_lookup_encoding::get_source_lookup_encoding( source_lookup_index, hashdb_element.file_offset); // if the key may exist then check against duplicates and max count if (bloom_filter_manager.is_positive(hashdb_element.key)) { size_t count = 0; multimap_iterator_t it = multimap.lower_bound(hashdb_element.key); while (it != multimap.end() && it->first == hashdb_element.key) { if (it->second == encoding) { // this exact element already exists ++changes.hashes_not_inserted_duplicate_element; return; } ++count; ++it; } // do not exceed max count allowed if (settings.maximum_hash_duplicates > 0 && count >= settings.maximum_hash_duplicates) { // at maximum for this hash ++changes.hashes_not_inserted_exceeds_max_duplicates; return; } } // add the element since all the checks passed multimap.emplace(hashdb_element.key, encoding); ++changes.hashes_inserted; // add hash to bloom filter, too, even if already there bloom_filter_manager.add_hash_value(hashdb_element.key); }
// find_count uint32_t find_count(const hash_t& key) const { // if key not in bloom filter then clearly count=0 if (!bloom_filter_manager.is_positive(key)) { // key not present in bloom filter return 0; } else { // return count from multimap return multimap.count(key); } }
void insert(const std::string& binary_hash, uint64_t file_offset, uint32_t hash_block_size, lmdb_source_data_t source_data, const std::string& hash_label) { MUTEX_LOCK(&M); // validate the byte alignment if (file_offset % settings.byte_alignment != 0) { ++changes.hashes_not_inserted_invalid_byte_alignment; MUTEX_UNLOCK(&M); return; } // validate block size if (settings.hash_block_size != 0 && (hash_block_size != settings.hash_block_size)) { ++changes.hashes_not_inserted_mismatched_hash_block_size; MUTEX_UNLOCK(&M); return; } // acquire existing or new source lookup index const std::pair<bool, uint64_t> lookup_pair = name_store.insert(source_data.repository_name, source_data.filename); const uint64_t source_lookup_index = lookup_pair.second; // if the hash may exist then check against duplicates and max count if (bloom_filter_manager.is_positive(binary_hash)) { // disregard if key, value exists if (hash_store.find(binary_hash, source_lookup_index, file_offset, hash_label)) { // this exact entry already exists ++changes.hashes_not_inserted_duplicate_element; MUTEX_UNLOCK(&M); return; } // disregard if above max duplicates if (settings.maximum_hash_duplicates > 0) { const size_t count = hash_store.find_count(binary_hash); if (count >= settings.maximum_hash_duplicates) { // at maximum for this hash ++changes.hashes_not_inserted_exceeds_max_duplicates; MUTEX_UNLOCK(&M); return; } } } // add the entry since all the checks passed hash_store.insert(binary_hash, source_lookup_index, file_offset, hash_label); ++changes.hashes_inserted; // add source data in case it isn't there yet source_store.add(source_lookup_index, source_data); // add hash to bloom filter, too, even if already there bloom_filter_manager.add_hash_value(binary_hash); MUTEX_UNLOCK(&M); }