コード例 #1
0
int main(int argc, char ** argv)
{
	size_t n = atoi(argv[1]);
	size_t m = atoi(argv[2]);

	DB::Arena pool;
	std::vector<StringRef> data(n);

	std::cerr << "sizeof(Key) = " << sizeof(StringRef) << ", sizeof(Value) = " << sizeof(Value) << std::endl;

	{
		Stopwatch watch;
		DB::ReadBufferFromFileDescriptor in1(STDIN_FILENO);
		DB::CompressedReadBuffer in2(in1);

		std::string tmp;
		for (size_t i = 0; i < n && !in2.eof(); ++i)
		{
			DB::readStringBinary(tmp, in2);
			data[i] = StringRef(pool.insert(tmp.data(), tmp.size()), tmp.size());
		}

		watch.stop();
		std::cerr << std::fixed << std::setprecision(2)
			<< "Vector. Size: " << n
			<< ", elapsed: " << watch.elapsedSeconds()
			<< " (" << n / watch.elapsedSeconds() << " elem/sec.)"
			<< std::endl;
	}

	if (!m || m == 1) bench<StringRef_Compare1_Ptrs>				(data, "StringRef_Compare1_Ptrs");
	if (!m || m == 2) bench<StringRef_Compare1_Index>				(data, "StringRef_Compare1_Index");
	if (!m || m == 3) bench<StringRef_CompareMemcmp>				(data, "StringRef_CompareMemcmp");
	if (!m || m == 4) bench<StringRef_Compare8_1_byUInt64>			(data, "StringRef_Compare8_1_byUInt64");
	if (!m || m == 5) bench<StringRef_Compare16_1_byMemcmp>			(data, "StringRef_Compare16_1_byMemcmp");
	if (!m || m == 6) bench<StringRef_Compare16_1_byUInt64_logicAnd>(data, "StringRef_Compare16_1_byUInt64_logicAnd");
	if (!m || m == 7) bench<StringRef_Compare16_1_byUInt64_bitAnd>	(data, "StringRef_Compare16_1_byUInt64_bitAnd");
#if __SSE4_1__
	if (!m || m == 8) bench<StringRef_Compare16_1_byIntSSE>			(data, "StringRef_Compare16_1_byIntSSE");
	if (!m || m == 9) bench<StringRef_Compare16_1_byFloatSSE>		(data, "StringRef_Compare16_1_byFloatSSE");
	if (!m || m == 10) bench<StringRef_Compare16_1_bySSE4>			(data, "StringRef_Compare16_1_bySSE4");
	if (!m || m == 11) bench<StringRef_Compare16_1_bySSE4_wide>		(data, "StringRef_Compare16_1_bySSE4_wide");
	if (!m || m == 12) bench<StringRef_Compare16_1_bySSE_wide>		(data, "StringRef_Compare16_1_bySSE_wide");
#endif
	if (!m || m == 100) bench<StringRef_CompareAlwaysTrue>			(data, "StringRef_CompareAlwaysTrue");
	if (!m || m == 101) bench<StringRef_CompareAlmostAlwaysTrue>	(data, "StringRef_CompareAlmostAlwaysTrue");

	/// 10 > 8, 9
	/// 1, 2, 5 - bad


	return 0;
}
コード例 #2
0
int main(int argc, char ** argv)
{
	size_t n = atoi(argv[1]);
	size_t m = atoi(argv[2]);

	DB::Arena pool;
	std::vector<StringRef> data(n);

	std::cerr << "sizeof(Key) = " << sizeof(StringRef) << ", sizeof(Value) = " << sizeof(Value) << std::endl;

	{
		Stopwatch watch;
		DB::ReadBufferFromFileDescriptor in1(STDIN_FILENO);
		DB::CompressedReadBuffer in2(in1);

		std::string tmp;
		for (size_t i = 0; i < n && !in2.eof(); ++i)
		{
			DB::readStringBinary(tmp, in2);
			data[i] = StringRef(pool.insert(tmp.data(), tmp.size()), tmp.size());
		}

		watch.stop();
		std::cerr << std::fixed << std::setprecision(2)
			<< "Vector. Size: " << n
			<< ", elapsed: " << watch.elapsedSeconds()
			<< " (" << n / watch.elapsedSeconds() << " elem/sec.)"
			<< std::endl;
	}

	if (!m || m == 1) bench<StringRef_CompareMemcmp, DefaultHash<StringRef>>(data, "StringRef_CityHash64");
	if (!m || m == 2) bench<StringRef_CompareMemcmp, FastHash64>	(data, "StringRef_FastHash64");
	if (!m || m == 3) bench<StringRef_CompareMemcmp, SimpleHash>	(data, "StringRef_SimpleHash");

#if defined(__x86_64__)
	if (!m || m == 4) bench<StringRef_CompareMemcmp, CrapWow>		(data, "StringRef_CrapWow");
	if (!m || m == 5) bench<StringRef_CompareMemcmp, CRC32Hash>		(data, "StringRef_CRC32Hash");
	if (!m || m == 6) bench<StringRef_CompareMemcmp, CRC32ILPHash>	(data, "StringRef_CRC32ILPHash");
#endif

	if (!m || m == 7) bench<StringRef_CompareMemcmp, VerySimpleHash>(data, "StringRef_VerySimpleHash");
	if (!m || m == 8) bench<StringRef_CompareMemcmp, FarmHash64>(data, "StringRef_FarmHash64");
	if (!m || m == 9) bench<StringRef_CompareMemcmp, MetroHash64<metrohash64_1>>(data, "StringRef_MetroHash64_1");
	if (!m || m == 10) bench<StringRef_CompareMemcmp, MetroHash64<metrohash64_2>>(data, "StringRef_MetroHash64_2");

	return 0;
}
コード例 #3
0
ファイル: string_pool.cpp プロジェクト: Aahart911/ClickHouse
int main(int argc, char ** argv)
{
	std::cerr << std::fixed << std::setprecision(3);
	std::ofstream devnull("/dev/null");
	
	DB::ReadBufferFromFileDescriptor in(STDIN_FILENO);
	size_t n = atoi(argv[1]);
	size_t elems_show = 1;

	using Vec = std::vector<std::string>;
	using Set = std::unordered_map<std::string, int>;
	using RefsSet = std::unordered_map<StringRef, int, StringRefHash>;
	using DenseSet = google::dense_hash_map<std::string, int>;
	using RefsDenseSet = google::dense_hash_map<StringRef, int, StringRefHash>;
	using RefsHashMap = HashMap<StringRef, int, StringRefHash>;
	Vec vec;

	vec.reserve(n);

	{
		Stopwatch watch;

		std::string s;
		for (size_t i = 0; i < n && !in.eof(); ++i)
		{
			DB::readEscapedString(s, in);
			DB::assertChar('\n', in);
			vec.push_back(s);
		}

		std::cerr << "Read and inserted into vector in " << watch.elapsedSeconds() << " sec, "
			<< vec.size() / watch.elapsedSeconds() << " rows/sec., "
			<< in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
			<< std::endl;
	}

	{
		DB::Arena pool;
		Stopwatch watch;
		const char * res = nullptr;

		for (Vec::iterator it = vec.begin(); it != vec.end(); ++it)
		{
			const char * tmp = pool.insert(it->data(), it->size());
			if (it == vec.begin())
				res = tmp;
		}

		std::cerr << "Inserted into pool in " << watch.elapsedSeconds() << " sec, "
			<< vec.size() / watch.elapsedSeconds() << " rows/sec., "
			<< in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
			<< std::endl;

		devnull.write(res, 100);
		devnull << std::endl;
	}

	{
		Set set;
		Stopwatch watch;

		for (Vec::iterator it = vec.begin(); it != vec.end(); ++it)
			set[*it] = 0;

		std::cerr << "Inserted into std::unordered_map in " << watch.elapsedSeconds() << " sec, "
			<< vec.size() / watch.elapsedSeconds() << " rows/sec., "
			<< in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
			<< std::endl;

		size_t i = 0;
		for (Set::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
		{
			devnull << it->first;
			devnull << std::endl;
		}
	}

	{
		RefsSet set;
		Stopwatch watch;

		for (Vec::iterator it = vec.begin(); it != vec.end(); ++it)
			set[StringRef(*it)] = 0;

		std::cerr << "Inserted refs into std::unordered_map in " << watch.elapsedSeconds() << " sec, "
			<< vec.size() / watch.elapsedSeconds() << " rows/sec., "
			<< in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
			<< std::endl;

		size_t i = 0;
		for (RefsSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
		{
			devnull.write(it->first.data, it->first.size);
			devnull << std::endl;
		}
	}

	{
		DB::Arena pool;
		RefsSet set;
		Stopwatch watch;
		
		for (Vec::iterator it = vec.begin(); it != vec.end(); ++it)
			set[StringRef(pool.insert(it->data(), it->size()), it->size())] = 0;

		std::cerr << "Inserted into pool and refs into std::unordered_map in " << watch.elapsedSeconds() << " sec, "
			<< vec.size() / watch.elapsedSeconds() << " rows/sec., "
			<< in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
			<< std::endl;

		size_t i = 0;
		for (RefsSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
		{
			devnull.write(it->first.data, it->first.size);
			devnull << std::endl;
		}
	}

	{
		DenseSet set;
		set.set_empty_key(DenseSet::key_type());
		Stopwatch watch;

		for (Vec::iterator it = vec.begin(); it != vec.end(); ++it)
			set[*it] = 0;

		std::cerr << "Inserted into google::dense_hash_map in " << watch.elapsedSeconds() << " sec, "
			<< vec.size() / watch.elapsedSeconds() << " rows/sec., "
			<< in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
			<< std::endl;

		size_t i = 0;
		for (DenseSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
		{
			devnull << it->first;
			devnull << std::endl;
		}
	}

	{
		RefsDenseSet set;
		set.set_empty_key(RefsDenseSet::key_type());
		Stopwatch watch;

		for (Vec::iterator it = vec.begin(); it != vec.end(); ++it)
			set[StringRef(it->data(), it->size())] = 0;

		std::cerr << "Inserted refs into google::dense_hash_map in " << watch.elapsedSeconds() << " sec, "
			<< vec.size() / watch.elapsedSeconds() << " rows/sec., "
			<< in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
			<< std::endl;

		size_t i = 0;
		for (RefsDenseSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
		{
			devnull.write(it->first.data, it->first.size);
			devnull << std::endl;
		}
	}

	{
		DB::Arena pool;
		RefsDenseSet set;
		set.set_empty_key(RefsDenseSet::key_type());
		Stopwatch watch;

		for (Vec::iterator it = vec.begin(); it != vec.end(); ++it)
			set[StringRef(pool.insert(it->data(), it->size()), it->size())] = 0;

		std::cerr << "Inserted into pool and refs into google::dense_hash_map in " << watch.elapsedSeconds() << " sec, "
			<< vec.size() / watch.elapsedSeconds() << " rows/sec., "
			<< in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
			<< std::endl;

		size_t i = 0;
		for (RefsDenseSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
		{
			devnull.write(it->first.data, it->first.size);
			devnull << std::endl;
		}
	}

	{
		RefsHashMap set;
		Stopwatch watch;

		for (Vec::iterator it = vec.begin(); it != vec.end(); ++it)
		{
			RefsHashMap::iterator inserted_it;
			bool inserted;
			set.emplace(StringRef(*it), inserted_it, inserted);
		}

		std::cerr << "Inserted refs into HashMap in " << watch.elapsedSeconds() << " sec, "
			<< vec.size() / watch.elapsedSeconds() << " rows/sec., "
			<< in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
			<< std::endl;

		size_t i = 0;
		for (RefsHashMap::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
		{
			devnull.write(it->first.data, it->first.size);
			devnull << std::endl;
		}

		//std::cerr << set.size() << ", " << set.getCollisions() << std::endl;
	}

	{
		DB::Arena pool;
		RefsHashMap set;
		Stopwatch watch;

		for (Vec::iterator it = vec.begin(); it != vec.end(); ++it)
		{
			RefsHashMap::iterator inserted_it;
			bool inserted;
			set.emplace(StringRef(pool.insert(it->data(), it->size()), it->size()), inserted_it, inserted);
		}

		std::cerr << "Inserted into pool and refs into HashMap in " << watch.elapsedSeconds() << " sec, "
			<< vec.size() / watch.elapsedSeconds() << " rows/sec., "
			<< in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec."
			<< std::endl;

		size_t i = 0;
		for (RefsHashMap::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
		{
			devnull.write(it->first.data, it->first.size);
			devnull << std::endl;
		}
	}

	return 0;
}
コード例 #4
0
int main(int argc, char ** argv)
{
    size_t n = atoi(argv[1]);
    size_t m = atoi(argv[2]);

    DB::Arena pool;
    std::vector<Key> data(n);

    std::cerr << "sizeof(Key) = " << sizeof(Key) << ", sizeof(Value) = " << sizeof(Value) << std::endl;

    {
        Stopwatch watch;
        DB::ReadBufferFromFileDescriptor in1(STDIN_FILENO);
        DB::CompressedReadBuffer in2(in1);

        std::string tmp;
        for (size_t i = 0; i < n && !in2.eof(); ++i)
        {
            DB::readStringBinary(tmp, in2);
            data[i] = Key(pool.insert(tmp.data(), tmp.size()), tmp.size());
        }

        watch.stop();
        std::cerr << std::fixed << std::setprecision(2)
            << "Vector. Size: " << n
            << ", elapsed: " << watch.elapsedSeconds()
            << " (" << n / watch.elapsedSeconds() << " elem/sec.)"
            << std::endl;
    }

    if (!m || m == 1)
    {
        Stopwatch watch;

        //using Map = HashMap<Key, Value>;

        /// Saving the hash accelerates the resize by about 2 times, and the overall performance by 6-8%.
        using Map = HashMapWithSavedHash<Key, Value, DefaultHash<Key>, Grower>;

        Map map;
        Map::iterator it;
        bool inserted;

        for (size_t i = 0; i < n; ++i)
        {
            map.emplace(data[i], it, inserted);
            if (inserted)
                it->second = 0;
            ++it->second;
        }

        watch.stop();
        std::cerr << std::fixed << std::setprecision(2)
            << "HashMap (CityHash64). Size: " << map.size()
            << ", elapsed: " << watch.elapsedSeconds()
            << " (" << n / watch.elapsedSeconds() << " elem/sec.)"
#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
            << ", collisions: " << map.getCollisions()
#endif
            << std::endl;
    }

    if (!m || m == 2)
    {
        Stopwatch watch;

        using Map = HashMapWithSavedHash<Key, Value, FastHash64, Grower>;

        Map map;
        Map::iterator it;
        bool inserted;

        for (size_t i = 0; i < n; ++i)
        {
            map.emplace(data[i], it, inserted);
            if (inserted)
                it->second = 0;
            ++it->second;
        }

        watch.stop();
        std::cerr << std::fixed << std::setprecision(2)
            << "HashMap (FastHash64). Size: " << map.size()
            << ", elapsed: " << watch.elapsedSeconds()
            << " (" << n / watch.elapsedSeconds() << " elem/sec.)"
#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
            << ", collisions: " << map.getCollisions()
#endif
            << std::endl;
    }

    if (!m || m == 3)
    {
        Stopwatch watch;

        using Map = HashMapWithSavedHash<Key, Value, CrapWow, Grower>;

        Map map;
        Map::iterator it;
        bool inserted;

        for (size_t i = 0; i < n; ++i)
        {
            map.emplace(data[i], it, inserted);
            if (inserted)
                it->second = 0;
            ++it->second;
        }

        watch.stop();
        std::cerr << std::fixed << std::setprecision(2)
            << "HashMap (CrapWow). Size: " << map.size()
            << ", elapsed: " << watch.elapsedSeconds()
            << " (" << n / watch.elapsedSeconds() << " elem/sec.)"
#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
            << ", collisions: " << map.getCollisions()
#endif
            << std::endl;
    }

    if (!m || m == 4)
    {
        Stopwatch watch;

        using Map = HashMapWithSavedHash<Key, Value, SimpleHash, Grower>;

        Map map;
        Map::iterator it;
        bool inserted;

        for (size_t i = 0; i < n; ++i)
        {
            map.emplace(data[i], it, inserted);
            if (inserted)
                it->second = 0;
            ++it->second;
        }

        watch.stop();
        std::cerr << std::fixed << std::setprecision(2)
            << "HashMap (SimpleHash). Size: " << map.size()
            << ", elapsed: " << watch.elapsedSeconds()
            << " (" << n / watch.elapsedSeconds() << " elem/sec.)"
#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
            << ", collisions: " << map.getCollisions()
#endif
            << std::endl;
    }

    if (!m || m == 5)
    {
        Stopwatch watch;

        std::unordered_map<Key, Value, DefaultHash<Key> > map;
        for (size_t i = 0; i < n; ++i)
            ++map[data[i]];

        watch.stop();
        std::cerr << std::fixed << std::setprecision(2)
            << "std::unordered_map. Size: " << map.size()
            << ", elapsed: " << watch.elapsedSeconds()
            << " (" << n / watch.elapsedSeconds() << " elem/sec.)"
            << std::endl;
    }

    if (!m || m == 6)
    {
        Stopwatch watch;

        google::dense_hash_map<Key, Value, DefaultHash<Key> > map;
        map.set_empty_key(Key("\0", 1));
        for (size_t i = 0; i < n; ++i)
              ++map[data[i]];

        watch.stop();
        std::cerr << std::fixed << std::setprecision(2)
            << "google::dense_hash_map. Size: " << map.size()
            << ", elapsed: " << watch.elapsedSeconds()
            << " (" << n / watch.elapsedSeconds() << " elem/sec.)"
            << std::endl;
    }

    if (!m || m == 7)
    {
        Stopwatch watch;

        google::sparse_hash_map<Key, Value, DefaultHash<Key> > map;
        for (size_t i = 0; i < n; ++i)
            ++map[data[i]];

        watch.stop();
        std::cerr << std::fixed << std::setprecision(2)
            << "google::sparse_hash_map. Size: " << map.size()
            << ", elapsed: " << watch.elapsedSeconds()
            << " (" << n / watch.elapsedSeconds() << " elem/sec.)"
            << std::endl;
    }

    return 0;
}