static void reduce_using_and(IntMap &map, std::vector<std::unique_ptr<Step>> &steps) { IntPairs map_entries(begin(map), end(map)); IntMap tmp; tmp.reserve(map_entries.size()); std::bitset<32> mask; for (int bit = 31; bit >= 0; bit--) { auto candidate_mask = mask; candidate_mask.set(bit); auto bitmask = ~static_cast<uint32_t>(candidate_mask.to_ulong()); auto hash = [=](uint32_t x) { return x & bitmask; }; if (apply_hash_to_map(map_entries, hash, tmp) && tmp.size() < map_entries.size()) { std::swap(map, tmp); mask = candidate_mask; } } if (mask.any()) { steps.push_back(std::unique_ptr<Step>(new AndNotStep(mask.to_ulong()))); } }
static bool reduce_using_crc(IntMap &map, bool allow_conflicts, std::vector<std::unique_ptr<Step>> &steps) { IntPairs map_entries(begin(map), end(map)); std::vector<uint32_t> unique_values; unique_values.reserve(map_entries.size()); for (const auto &entry : map_entries) { unique_values.push_back(entry.second); } std::sort(unique_values.begin(), unique_values.end()); unique_values.erase(std::unique(unique_values.begin(), unique_values.end()), unique_values.end()); unsigned num_unique_values = unique_values.size(); uint32_t collision_sentinal; if (allow_conflicts) { collision_sentinal = find_unused_value(unique_values); ++num_unique_values; } uint32_t best = 0; HashCost best_cost = HashCost::max(); if (allow_conflicts) { best_cost.num_collisions = map_entries.size() / 4; } else { best_cost.num_collisions = 0; best_cost.table_size = get_lookup_table_size(map_entries); if (best_cost.table_size <= 4) return true; best_cost.table_size -= 4; } IntMap tmp; tmp.reserve(map_entries.size()); unsigned min_width = log2_ceil(num_unique_values) - 1; for (unsigned width = min_width; width < 32; width++) { // Don't check polynomials of higher widths - we are unlikely to get any // further improvement. if (best != 0 || (1 << width) > best_cost.table_size) break; for (uint32_t poly = 1 << width; poly < (1 << (width + 1)); poly++) { auto hash = [=](uint32_t x) { return crc32(x, poly, poly); }; HashCost cost; if (apply_hash_to_map(map_entries, hash, best_cost, collision_sentinal, tmp, cost)) { best = poly; best_cost = cost; } } } if (best == 0) return true; auto hash = [=](uint32_t x) { return crc32(x, best, best); }; if (best_cost.num_collisions != 0) { HashCost dummy; apply_hash_to_map(map_entries, hash, HashCost::max(), collision_sentinal, tmp, dummy); DataType data_type = pick_datatype(tmp); steps.push_back( std::unique_ptr<Step>( new CrcLookupAndReturnStep(best, std::move(tmp), data_type, collision_sentinal) ) ); // Build map containing only conflicting keys. IntMap remaining; for (const auto &entry : map) { uint32_t x = entry.first; if (steps.back()->apply(x) != Step::DONE) { remaining.insert(entry); } } std::swap(map, remaining); return false; } else { apply_hash_to_map(map_entries, hash, map); steps.push_back(std::unique_ptr<Step>(new CrcStep(best))); return true; } }