示例#1
0
  // insert
  void insert(const hashdb_element_t& hashdb_element, hashdb_changes_t& changes) {

    // validate block size
    if (settings.hash_block_size != 0 &&
        (hashdb_element.hash_block_size != settings.hash_block_size)) {
      ++changes.hashes_not_inserted_mismatched_hash_block_size;
      return;
    }

    // validate the byte alignment, see configure.ac for HASHDB_BYTE_ALIGNMENT
    if (hashdb_element.file_offset % HASHDB_BYTE_ALIGNMENT != 0) {
      ++changes.hashes_not_inserted_invalid_byte_alignment;
      return;
    }

    // checks passed, insert or have reason not to insert

    // acquire existing or new source lookup index
    std::pair<bool, uint64_t> lookup_pair =
         source_lookup_index_manager.insert(hashdb_element.repository_name,
                                            hashdb_element.filename);
    uint64_t source_lookup_index = lookup_pair.second;

    // compose the source lookup encoding
    uint64_t encoding = source_lookup_encoding::get_source_lookup_encoding(
                       source_lookup_index,
                       hashdb_element.file_offset);

    // if the key may exist then check against duplicates and max count
    if (bloom_filter_manager.is_positive(hashdb_element.key)) {
      size_t count = 0;
      multimap_iterator_t it = multimap.lower_bound(hashdb_element.key);
      while (it != multimap.end() && it->first == hashdb_element.key) {
        if (it->second == encoding) {
          // this exact element already exists
          ++changes.hashes_not_inserted_duplicate_element;
          return;
        }
        ++count;
        ++it;
      }

      // do not exceed max count allowed
      if (settings.maximum_hash_duplicates > 0 &&
                               count >= settings.maximum_hash_duplicates) {
        // at maximum for this hash
        ++changes.hashes_not_inserted_exceeds_max_duplicates;
        return;
      }
    }

    // add the element since all the checks passed
    multimap.emplace(hashdb_element.key, encoding);
    ++changes.hashes_inserted;

    // add hash to bloom filter, too, even if already there
    bloom_filter_manager.add_hash_value(hashdb_element.key);
  }
示例#2
0
 // find_count
 uint32_t find_count(const hash_t& key) const {
   // if key not in bloom filter then clearly count=0
   if (!bloom_filter_manager.is_positive(key)) {
     // key not present in bloom filter
     return 0;
   } else {
     // return count from multimap
     return multimap.count(key);
   }
 }
示例#3
0
  void insert(const std::string& binary_hash,
              uint64_t file_offset,
              uint32_t hash_block_size,
              lmdb_source_data_t source_data,
              const std::string& hash_label) {

    MUTEX_LOCK(&M);

    // validate the byte alignment
    if (file_offset % settings.byte_alignment != 0) {
      ++changes.hashes_not_inserted_invalid_byte_alignment;
      MUTEX_UNLOCK(&M);
      return;
    }

    // validate block size
    if (settings.hash_block_size != 0 &&
        (hash_block_size != settings.hash_block_size)) {
      ++changes.hashes_not_inserted_mismatched_hash_block_size;
      MUTEX_UNLOCK(&M);
      return;
    }

    // acquire existing or new source lookup index
    const std::pair<bool, uint64_t> lookup_pair =
         name_store.insert(source_data.repository_name, source_data.filename);
    const uint64_t source_lookup_index = lookup_pair.second;

    // if the hash may exist then check against duplicates and max count
    if (bloom_filter_manager.is_positive(binary_hash)) {

      // disregard if key, value exists
      if (hash_store.find(binary_hash,
                          source_lookup_index,
                          file_offset,
                          hash_label)) {
        // this exact entry already exists
        ++changes.hashes_not_inserted_duplicate_element;
        MUTEX_UNLOCK(&M);
        return;
      }

      // disregard if above max duplicates
      if (settings.maximum_hash_duplicates > 0) {
        const size_t count = hash_store.find_count(binary_hash);
        if (count >= settings.maximum_hash_duplicates) {
          // at maximum for this hash
          ++changes.hashes_not_inserted_exceeds_max_duplicates;
          MUTEX_UNLOCK(&M);
          return;
        }
      }
    }

    // add the entry since all the checks passed
    hash_store.insert(binary_hash,
                      source_lookup_index,
                      file_offset,
                      hash_label);
    ++changes.hashes_inserted;

    // add source data in case it isn't there yet
    source_store.add(source_lookup_index, source_data);

    // add hash to bloom filter, too, even if already there
    bloom_filter_manager.add_hash_value(binary_hash);

    MUTEX_UNLOCK(&M);
  }