Пример #1
0
void NO_INLINE bench(const std::vector<StringRef> & data, const char * name)
{
	Stopwatch watch;

	using Map = HashMapWithSavedHash<Key, Value, DefaultHash<Key>>;

	Map map;
	typename Map::iterator it;
	bool inserted;

	for (size_t i = 0, size = data.size(); i < size; ++i)
	{
		map.emplace(static_cast<const Key &>(data[i]), it, inserted);
		if (inserted)
			it->second = 0;
		++it->second;
	}

	watch.stop();
	std::cerr << std::fixed << std::setprecision(2)
		<< "HashMap (" << name << "). Size: " << map.size()
		<< ", elapsed: " << watch.elapsedSeconds()
		<< " (" << data.size() / watch.elapsedSeconds() << " elem/sec.)"
#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
		<< ", collisions: " << map.getCollisions()
#endif
		<< std::endl;
}
Пример #2
0
void test(size_t n, const char * name, F && kernel)
{
    x = 0;

    Stopwatch watch;
    Stopwatch watch_one;
    double max_seconds = 0;

    std::cerr << name << ":\n";

    for (size_t i = 0; i < n; ++i)
    {
        watch_one.restart();

        kernel();

        watch_one.stop();
        if (watch_one.elapsedSeconds() > max_seconds)
            max_seconds = watch_one.elapsedSeconds();
    }

    watch.stop();

    std::cerr
        << std::fixed << std::setprecision(2)
        << n << " ops in "
        << watch.elapsedSeconds() << " sec., "
        << n / watch.elapsedSeconds() << " ops/sec., "
        << "avg latency: " << watch.elapsedSeconds() / n * 1000000 << " μs, "
        << "max latency: " << max_seconds * 1000000 << " μs "
        << "(res = " << x << ")"
        << std::endl;
}
Пример #3
0
int main(int argc, char ** argv)
{
	try
	{
		DB::ReadBufferFromFileDescriptor in(STDIN_FILENO);
		Int64 n = 0;
		size_t nums = 0;

		Stopwatch watch;

		while (!in.eof())
		{
			DB::readIntText(n, in);
			in.ignore();

			//std::cerr << "n: " << n << std::endl;

			++nums;
		}

		watch.stop();
		std::cerr << std::fixed << std::setprecision(2)
			<< "Read " << nums << " numbers (" << in.count() / 1000000.0 << " MB) in " << watch.elapsedSeconds() << " sec., "
			<< nums / watch.elapsedSeconds() << " num/sec. (" << in.count() / watch.elapsedSeconds() / 1000000 << " MB/s.)"
			<< std::endl;
	}
	catch (const DB::Exception & e)
	{
		std::cerr << e.what() << ", " << e.displayText() << std::endl;
		return 1;
	}

	return 0;
}
Пример #4
0
void NO_INLINE bench(const std::vector<UInt16> & data, const char * name)
{
    Map map;
    typename Map::iterator it;
    bool inserted;

    Stopwatch watch;
    for (size_t i = 0, size = data.size(); i < size; ++i)
    {
        map.emplace(data[i], it, inserted);
        if (inserted)
            it->getSecond() = 1;
        else
            ++it->getSecond();
    }

    for (size_t i = 0, size = data.size(); i < size; ++i)
    {
        it = map.find(data[i]);
        ++it->getSecond();
    }
    watch.stop();
    std::cerr << std::fixed << std::setprecision(2) << "HashMap (" << name << "). Size: " << map.size()
              << ", elapsed: " << watch.elapsedSeconds() << " (" << data.size() / watch.elapsedSeconds() << " elem/sec.)"
#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
              << ", collisions: " << map.getCollisions()
#endif
              << std::endl;
}
Пример #5
0
int mainImpl(int argc, char ** argv)
{
	const char * file_name = 0;
	int mode = MODE_READ;
	size_t min_offset = 0;
	size_t max_offset = 0;
	size_t block_size = 0;
	size_t buffers_count = 0;
	size_t threads_count = 0;
	size_t count = 0;

	if (argc != 9)
	{
		std::cerr << "Usage: " << argv[0] << " file_name r|w min_offset max_offset block_size threads buffers count" << std::endl;
		return 1;
	}

	file_name = argv[1];
	if (argv[2][0] == 'w')
		mode = MODE_WRITE;
	min_offset = Poco::NumberParser::parseUnsigned64(argv[3]);
	max_offset = Poco::NumberParser::parseUnsigned64(argv[4]);
	block_size = Poco::NumberParser::parseUnsigned64(argv[5]);
	threads_count = Poco::NumberParser::parseUnsigned(argv[6]);
	buffers_count = Poco::NumberParser::parseUnsigned(argv[7]);
	count = Poco::NumberParser::parseUnsigned(argv[8]);

	int fd = open(file_name, ((mode == MODE_READ) ? O_RDONLY : O_WRONLY) | O_DIRECT);
	if (-1 == fd)
		throwFromErrno("Cannot open file");

	using Exceptions = std::vector<std::exception_ptr>;

	boost::threadpool::pool pool(threads_count);
	Exceptions exceptions(threads_count);

	Stopwatch watch;

	for (size_t i = 0; i < threads_count; ++i)
		pool.schedule(std::bind(thread, fd, mode, min_offset, max_offset, block_size, buffers_count, count, std::ref(exceptions[i])));
	pool.wait();

	watch.stop();

	for (size_t i = 0; i < threads_count; ++i)
		if (exceptions[i])
			std::rethrow_exception(exceptions[i]);

	if (0 != close(fd))
		throwFromErrno("Cannot close file");

	std::cout << std::fixed << std::setprecision(2)
	<< "Done " << count << " * " << threads_count << " ops";
	std::cout << " in " << watch.elapsedSeconds() << " sec."
	<< ", " << count * threads_count / watch.elapsedSeconds() << " ops/sec."
	<< ", " << count * threads_count * block_size / watch.elapsedSeconds() / 1000000 << " MB/sec."
	<< std::endl;

	return 0;
}
Пример #6
0
static bool performanceTest()
{
    static const int kTableMax = 100;
    IndirectRefTable irt;
    IndirectRef manyRefs[kTableMax];
    ClassObject* clazz = dvmFindClass("Ljava/lang/Object;", NULL);
    Object* obj0 = dvmAllocObject(clazz, ALLOC_DONT_TRACK);
    const u4 cookie = IRT_FIRST_SEGMENT;
    const int kLoops = 100000;
    Stopwatch stopwatch;

    DBUG_MSG("+++ START performance\n");

    if (!irt.init(kTableMax, kTableMax, kIndirectKindGlobal)) {
        return false;
    }

    stopwatch.reset();
    for (int loop = 0; loop < kLoops; loop++) {
        for (int i = 0; i < kTableMax; i++) {
            manyRefs[i] = irt.add(cookie, obj0);
        }
        for (int i = 0; i < kTableMax; i++) {
            irt.remove(cookie, manyRefs[i]);
        }
    }
    DBUG_MSG("Add/remove %d objects FIFO order, %d iterations, %0.3fms / iteration",
            kTableMax, kLoops, stopwatch.elapsedSeconds() * 1000 / kLoops);

    stopwatch.reset();
    for (int loop = 0; loop < kLoops; loop++) {
        for (int i = 0; i < kTableMax; i++) {
            manyRefs[i] = irt.add(cookie, obj0);
        }
        for (int i = kTableMax; i-- > 0; ) {
            irt.remove(cookie, manyRefs[i]);
        }
    }
    DBUG_MSG("Add/remove %d objects LIFO order, %d iterations, %0.3fms / iteration",
            kTableMax, kLoops, stopwatch.elapsedSeconds() * 1000  / kLoops);

    for (int i = 0; i < kTableMax; i++) {
        manyRefs[i] = irt.add(cookie, obj0);
    }
    stopwatch.reset();
    for (int loop = 0; loop < kLoops; loop++) {
        for (int i = 0; i < kTableMax; i++) {
            irt.get(manyRefs[i]);
        }
    }
    DBUG_MSG("Get %d objects, %d iterations, %0.3fms / iteration",
            kTableMax, kLoops, stopwatch.elapsedSeconds() * 1000  / kLoops);
    for (int i = kTableMax; i-- > 0; ) {
        irt.remove(cookie, manyRefs[i]);
    }

    irt.destroy();
    return true;
}
Пример #7
0
int main(int argc, char ** argv)
try
{
	using namespace DB;

	size_t n = argc == 2 ? parse<UInt64>(argv[1]) : 10ULL;

	std::string input = "SELECT number, number / 3, number * number";

	ParserSelectQuery parser;
	ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "");

	Context context;

	ExpressionAnalyzer analyzer(ast, context, {}, {NameAndTypePair("number", std::make_shared<DataTypeUInt64>())});
	ExpressionActionsChain chain;
	analyzer.appendSelect(chain, false);
	analyzer.appendProjectResult(chain, false);
	chain.finalize();
	ExpressionActionsPtr expression = chain.getLastActions();

	StoragePtr table = StorageSystemNumbers::create("Numbers");

	Names column_names;
	column_names.push_back("number");

	QueryProcessingStage::Enum stage;

	BlockInputStreamPtr in;
	in = table->read(column_names, 0, context, Settings(), stage)[0];
	in = std::make_shared<ExpressionBlockInputStream>(in, expression);
	in = std::make_shared<LimitBlockInputStream>(in, 10, std::max(static_cast<Int64>(0), static_cast<Int64>(n) - 10));

	WriteBufferFromOStream out1(std::cout);
	RowOutputStreamPtr out2 = std::make_shared<TabSeparatedRowOutputStream>(out1, expression->getSampleBlock());
	BlockOutputStreamFromRowOutputStream out(out2);

	{
		Stopwatch stopwatch;
		stopwatch.start();

		copyData(*in, out);

		stopwatch.stop();
		std::cout << std::fixed << std::setprecision(2)
			<< "Elapsed " << stopwatch.elapsedSeconds() << " sec."
			<< ", " << n / stopwatch.elapsedSeconds() << " rows/sec."
			<< std::endl;
	}

	return 0;
}
catch (const DB::Exception & e)
{
	std::cerr << e.what() << ", " << e.displayText() << std::endl;
	throw;
}
/// This test is useful for assessing the performance of acquiring block numbers in all partitions (and there
/// can be ~1000 of them). This is needed when creating a mutation entry for a ReplicatedMergeTree table.
int main(int argc, char ** argv)
try
{
    if (argc != 3)
    {
        std::cerr << "usage: " << argv[0] << " <zookeeper_config> <path_to_table>" << std::endl;
        return 3;
    }

    ConfigProcessor processor(argv[1], false, true);
    auto config = processor.loadConfig().configuration;
    String root_path = argv[2];

    zkutil::ZooKeeper zk(*config, "zookeeper");

    String temp_path = root_path + "/temp";
    String blocks_path = root_path + "/block_numbers";

    Stopwatch total_timer;
    Stopwatch timer;

    EphemeralLocksInAllPartitions locks(blocks_path, "test_lock-", temp_path, zk);

    std::cerr << "Locked, elapsed: " << timer.elapsedSeconds() << std::endl;
    for (const auto & lock : locks.getLocks())
        std::cout << lock.partition_id << " " << lock.number << std::endl;
    timer.restart();

    locks.unlock();
    std::cerr << "Abandoned, elapsed: " << timer.elapsedSeconds() << std::endl;

    std::cerr << "Total elapsed: " << total_timer.elapsedSeconds() << std::endl;

    return 0;
}
catch (const Exception & e)
{
    std::cerr << e.what() << ", " << e.displayText() << ": " << std::endl
              << e.getStackTrace().toString() << std::endl;
    throw;
}
catch (Poco::Exception & e)
{
    std::cerr << "Exception: " << e.displayText() << std::endl;
    throw;
}
catch (std::exception & e)
{
    std::cerr << "std::exception: " << e.what() << std::endl;
    throw;
}
catch (...)
{
    std::cerr << "Some exception" << std::endl;
    throw;
}
Пример #9
0
int main(int argc, char ** argv)
{
	size_t n = atoi(argv[1]);
	size_t m = atoi(argv[2]);

	DB::Arena pool;
	std::vector<StringRef> data(n);

	std::cerr << "sizeof(Key) = " << sizeof(StringRef) << ", sizeof(Value) = " << sizeof(Value) << std::endl;

	{
		Stopwatch watch;
		DB::ReadBufferFromFileDescriptor in1(STDIN_FILENO);
		DB::CompressedReadBuffer in2(in1);

		std::string tmp;
		for (size_t i = 0; i < n && !in2.eof(); ++i)
		{
			DB::readStringBinary(tmp, in2);
			data[i] = StringRef(pool.insert(tmp.data(), tmp.size()), tmp.size());
		}

		watch.stop();
		std::cerr << std::fixed << std::setprecision(2)
			<< "Vector. Size: " << n
			<< ", elapsed: " << watch.elapsedSeconds()
			<< " (" << n / watch.elapsedSeconds() << " elem/sec.)"
			<< std::endl;
	}

	if (!m || m == 1) bench<StringRef_Compare1_Ptrs>				(data, "StringRef_Compare1_Ptrs");
	if (!m || m == 2) bench<StringRef_Compare1_Index>				(data, "StringRef_Compare1_Index");
	if (!m || m == 3) bench<StringRef_CompareMemcmp>				(data, "StringRef_CompareMemcmp");
	if (!m || m == 4) bench<StringRef_Compare8_1_byUInt64>			(data, "StringRef_Compare8_1_byUInt64");
	if (!m || m == 5) bench<StringRef_Compare16_1_byMemcmp>			(data, "StringRef_Compare16_1_byMemcmp");
	if (!m || m == 6) bench<StringRef_Compare16_1_byUInt64_logicAnd>(data, "StringRef_Compare16_1_byUInt64_logicAnd");
	if (!m || m == 7) bench<StringRef_Compare16_1_byUInt64_bitAnd>	(data, "StringRef_Compare16_1_byUInt64_bitAnd");
#if __SSE4_1__
	if (!m || m == 8) bench<StringRef_Compare16_1_byIntSSE>			(data, "StringRef_Compare16_1_byIntSSE");
	if (!m || m == 9) bench<StringRef_Compare16_1_byFloatSSE>		(data, "StringRef_Compare16_1_byFloatSSE");
	if (!m || m == 10) bench<StringRef_Compare16_1_bySSE4>			(data, "StringRef_Compare16_1_bySSE4");
	if (!m || m == 11) bench<StringRef_Compare16_1_bySSE4_wide>		(data, "StringRef_Compare16_1_bySSE4_wide");
	if (!m || m == 12) bench<StringRef_Compare16_1_bySSE_wide>		(data, "StringRef_Compare16_1_bySSE_wide");
#endif
	if (!m || m == 100) bench<StringRef_CompareAlwaysTrue>			(data, "StringRef_CompareAlwaysTrue");
	if (!m || m == 101) bench<StringRef_CompareAlmostAlwaysTrue>	(data, "StringRef_CompareAlmostAlwaysTrue");

	/// 10 > 8, 9
	/// 1, 2, 5 - bad


	return 0;
}
Пример #10
0
int main(int, char **)
try
{
    std::cout << std::fixed << std::setprecision(2);

    size_t n = 100000000;
    Stopwatch stopwatch;

    {
        DB::WriteBufferFromFile buf("test_zlib_buffers.gz", DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_CREAT | O_TRUNC);
        DB::ZlibDeflatingWriteBuffer deflating_buf(buf, DB::CompressionMethod::Gzip, /* compression_level = */ 3);

        stopwatch.restart();
        for (size_t i = 0; i < n; ++i)
        {
            DB::writeIntText(i, deflating_buf);
            DB::writeChar('\t', deflating_buf);
        }
        deflating_buf.finish();

        stopwatch.stop();
        std::cout << "Writing done. Elapsed: " << stopwatch.elapsedSeconds() << " s."
            << ", " << (deflating_buf.count() / stopwatch.elapsedSeconds() / 1000000) << " MB/s"
            << std::endl;
    }

    {
        DB::ReadBufferFromFile buf("test_zlib_buffers.gz");
        DB::ZlibInflatingReadBuffer inflating_buf(buf, DB::CompressionMethod::Gzip);

        stopwatch.restart();
        for (size_t i = 0; i < n; ++i)
        {
            size_t x;
            DB::readIntText(x, inflating_buf);
            inflating_buf.ignore();

            if (x != i)
                throw DB::Exception("Failed!, read: " + std::to_string(x) + ", expected: " + std::to_string(i), 0);
        }
        stopwatch.stop();
        std::cout << "Reading done. Elapsed: " << stopwatch.elapsedSeconds() << " s."
            << ", " << (inflating_buf.count() / stopwatch.elapsedSeconds() / 1000000) << " MB/s"
            << std::endl;
    }

    return 0;
}
catch (const DB::Exception & e)
{
    std::cerr << e.what() << ", " << e.displayText() << std::endl;
    return 1;
}
Пример #11
0
    static void NO_INLINE execute(const Source & data, size_t num_threads,
                        Creator && creator, Updater && updater, Merger && merger,
                        ThreadPool & pool)
    {
        std::vector<std::unique_ptr<Map>> intermediate_results;

        Stopwatch watch;

        Aggregate::execute(data, num_threads, intermediate_results, std::forward<Creator>(creator), std::forward<Updater>(updater), pool);
        size_t num_maps = intermediate_results.size();

        watch.stop();
        double time_aggregated = watch.elapsedSeconds();
        std::cerr
            << "Aggregated in " << time_aggregated
            << " (" << data.size() / time_aggregated << " elem/sec.)"
            << std::endl;

        size_t size_before_merge = 0;
        std::cerr << "Sizes: ";
        for (size_t i = 0; i < num_threads; ++i)
        {
            std::cerr << (i == 0 ? "" : ", ") << intermediate_results[i]->size();
            size_before_merge += intermediate_results[i]->size();
        }
        std::cerr << std::endl;

        watch.restart();

        std::vector<Map*> intermediate_results_ptrs(num_maps);
        for (size_t i = 0; i < num_maps; ++i)
            intermediate_results_ptrs[i] = intermediate_results[i].get();

        Map * result_map;
        Merge::execute(intermediate_results_ptrs.data(), num_maps, result_map, std::forward<Merger>(merger), pool);

        watch.stop();
        double time_merged = watch.elapsedSeconds();
        std::cerr
            << "Merged in " << time_merged
            << " (" << size_before_merge / time_merged << " elem/sec.)"
            << std::endl;

        double time_total = time_aggregated + time_merged;
        std::cerr
            << "Total in " << time_total
            << " (" << data.size() / time_total << " elem/sec.)"
            << std::endl;
        std::cerr << "Size: " << result_map->size() << std::endl << std::endl;
    }
Пример #12
0
int main(int argc, char ** argv)
{
	size_t n = atoi(argv[1]);
	size_t m = atoi(argv[2]);

	DB::Arena pool;
	std::vector<StringRef> data(n);

	std::cerr << "sizeof(Key) = " << sizeof(StringRef) << ", sizeof(Value) = " << sizeof(Value) << std::endl;

	{
		Stopwatch watch;
		DB::ReadBufferFromFileDescriptor in1(STDIN_FILENO);
		DB::CompressedReadBuffer in2(in1);

		std::string tmp;
		for (size_t i = 0; i < n && !in2.eof(); ++i)
		{
			DB::readStringBinary(tmp, in2);
			data[i] = StringRef(pool.insert(tmp.data(), tmp.size()), tmp.size());
		}

		watch.stop();
		std::cerr << std::fixed << std::setprecision(2)
			<< "Vector. Size: " << n
			<< ", elapsed: " << watch.elapsedSeconds()
			<< " (" << n / watch.elapsedSeconds() << " elem/sec.)"
			<< std::endl;
	}

	if (!m || m == 1) bench<StringRef_CompareMemcmp, DefaultHash<StringRef>>(data, "StringRef_CityHash64");
	if (!m || m == 2) bench<StringRef_CompareMemcmp, FastHash64>	(data, "StringRef_FastHash64");
	if (!m || m == 3) bench<StringRef_CompareMemcmp, SimpleHash>	(data, "StringRef_SimpleHash");

#if defined(__x86_64__)
	if (!m || m == 4) bench<StringRef_CompareMemcmp, CrapWow>		(data, "StringRef_CrapWow");
	if (!m || m == 5) bench<StringRef_CompareMemcmp, CRC32Hash>		(data, "StringRef_CRC32Hash");
	if (!m || m == 6) bench<StringRef_CompareMemcmp, CRC32ILPHash>	(data, "StringRef_CRC32ILPHash");
#endif

	if (!m || m == 7) bench<StringRef_CompareMemcmp, VerySimpleHash>(data, "StringRef_VerySimpleHash");
	if (!m || m == 8) bench<StringRef_CompareMemcmp, FarmHash64>(data, "StringRef_FarmHash64");
	if (!m || m == 9) bench<StringRef_CompareMemcmp, MetroHash64<metrohash64_1>>(data, "StringRef_MetroHash64_1");
	if (!m || m == 10) bench<StringRef_CompareMemcmp, MetroHash64<metrohash64_2>>(data, "StringRef_MetroHash64_2");

	return 0;
}
Пример #13
0
void DynamicGameLoader::reload()
{
	std::cout << ConsoleColour(Console::BLUE) << "\n**RELOADING GAME**" << std::endl;
	Stopwatch timer;
	
	core->onSuspended();
	unload();
	load();
	hotPatch();
	core->onReloaded();

	timer.pause();
	
	std::cout << "Done in " << timer.elapsedSeconds() << " seconds.\n" << ConsoleColour() << std::endl;
}
Пример #14
0
    void execute(ConnectionPool::Entry & connection, Query & query)
    {
        Stopwatch watch;
        RemoteBlockInputStream stream(*connection, query, &settings, nullptr, Tables(), query_processing_stage);

        Progress progress;
        stream.setProgressCallback([&progress](const Progress & value) { progress.incrementPiecewiseAtomically(value); });

        stream.readPrefix();
        while (Block block = stream.read())
            ;
        stream.readSuffix();

        const BlockStreamProfileInfo & info = stream.getProfileInfo();

        double seconds = watch.elapsedSeconds();

        std::lock_guard<std::mutex> lock(mutex);
        info_per_interval.add(seconds, progress.rows, progress.bytes, info.rows, info.bytes);
        info_total.add(seconds, progress.rows, progress.bytes, info.rows, info.bytes);
    }
Пример #15
0
int main(int argc, char ** argv)
{
    if (argc < 3)
    {
        std::cerr << "Usage: program n m\n";
        return 1;
    }

    size_t n = atoi(argv[1]);
    size_t m = atoi(argv[2]);

    std::vector<UInt16> data(n);

    {
        Stopwatch watch;
        DB::ReadBufferFromFileDescriptor in1(STDIN_FILENO);
        DB::CompressedReadBuffer in2(in1);
        for (size_t i = 0; i < n && !in2.eof(); ++i)
        {
            DB::readBinary(data[i], in2);
        }

        watch.stop();
        std::cerr << std::fixed << std::setprecision(2) << "Vector. Size: " << n << ", elapsed: " << watch.elapsedSeconds() << " ("
                  << n / watch.elapsedSeconds() << " elem/sec.)" << std::endl;
    }

    using OldLookup = HashMap<UInt16, UInt8, TrivialHash, HashTableFixedGrower<16>>;
    using NewLookup = FixedHashMap<UInt16, UInt8>;

    if (!m || m == 1)
        bench<OldLookup>(data, "Old Lookup");
    if (!m || m == 2)
        bench<NewLookup>(data, "New Lookup");
    return 0;
}
Пример #16
0
    /// Try push new query and check cancellation conditions
    bool tryPushQueryInteractively(const String & query, InterruptListener & interrupt_listener)
    {
        bool inserted = false;

        while (!inserted)
        {
            inserted = queue.tryPush(query, 100);

            if (shutdown)
            {
                /// An exception occurred in a worker
                return false;
            }

            if (max_time > 0 && info_total.watch.elapsedSeconds() >= max_time)
            {
                std::cout << "Stopping launch of queries. Requested time limit is exhausted.\n";
                return false;
            }

            if (interrupt_listener.check())
            {
                std::cout << "Stopping launch of queries. SIGINT recieved.\n";
                return false;
            }

            if (delay > 0 && delay_watch.elapsedSeconds() > delay)
            {
                printNumberOfQueriesExecuted(info_total.queries);
                report(info_per_interval);
                delay_watch.restart();
            }
        };

        return true;
    }
Пример #17
0
int main(int argc, char ** argv)
try
{
	using namespace DB;

	size_t n = atoi(argv[1]);

	ColumnWithTypeAndName descr1;
	auto col1 = std::make_shared<ColumnUInt8>();
	descr1.type = std::make_shared<DataTypeUInt8>();
	descr1.column = col1;
	descr1.name = "x";
	col1->getData().resize(n);

	ColumnWithTypeAndName descr2;
	auto col2 = std::make_shared<ColumnInt16>();
	descr2.type = std::make_shared<DataTypeInt16>();
	descr2.column = col2;
	descr2.name = "x";

	Block block;
	block.insert(descr1);
	block.insert(descr2);
	col2->getData().resize(n);

	for (size_t i = 0; i < n; ++i)
	{
		col1->getData()[i] = 10;
		col2->getData()[i] = 3;
	}

	FunctionDivideFloating f;
	DataTypes arg_types;
	arg_types.push_back(descr1.type);
	arg_types.push_back(descr2.type);

	ColumnNumbers arg_nums;
	arg_nums.push_back(0);
	arg_nums.push_back(1);

	size_t res_num = 2;

	DataTypePtr res_type = f.getReturnType(arg_types);

	ColumnWithTypeAndName descr_res;
	descr_res.type = res_type;
	descr_res.name = "z";

	{
		Stopwatch stopwatch;
		stopwatch.start();

		f.execute(block, arg_nums, res_num);

		stopwatch.stop();
		std::cout << std::fixed << std::setprecision(2)
			<< "Elapsed " << stopwatch.elapsedSeconds() << " sec."
			<< ", " << n / stopwatch.elapsedSeconds() << " rows/sec."
			<< std::endl;
	}

	Float64 x = 0;
	for (size_t i = 0; i < n; ++i)
		x += get<Float64>((*block.getByPosition(2).column)[i]);

	std::cout << x << std::endl;
	return 0;
}
catch (const DB::Exception & e)
{
	std::cerr << e.displayText() << std::endl;
	throw;
}
Пример #18
0
int main(int argc, char ** argv)
{
	using namespace DB;

	FieldVisitorToString to_string;

	Field field = UInt64(0);
	std::cerr << applyVisitor(to_string, field) << std::endl;

	field = std::string("Hello, world!");
	std::cerr << applyVisitor(to_string, field) << std::endl;

	field = Null();
	std::cerr << applyVisitor(to_string, field) << std::endl;

	Field field2;
	field2 = field;
	std::cerr << applyVisitor(to_string, field2) << std::endl;

	Array array;
	array.push_back(UInt64(123));
	array.push_back(Int64(-123));
	array.push_back(String("Hello"));
	field = array;
	std::cerr << applyVisitor(to_string, field) << std::endl;

	get<Array &>(field).push_back(field);
	std::cerr << applyVisitor(to_string, field) << std::endl;

	std::cerr << (field < field2) << std::endl;
	std::cerr << (field2 < field) << std::endl;


	try
	{
		size_t n = argc == 2 ? parse<UInt64>(argv[1]) : 10000000;

		Stopwatch watch;

		{
			Array array(n);

			{
				Stopwatch watch;

				for (size_t i = 0; i < n; ++i)
					array[i] = String(i % 32, '!');

				watch.stop();
				std::cerr << std::fixed << std::setprecision(2)
					<< "Set " << n << " fields (" << n * sizeof(array[0]) / 1000000.0 << " MB) in " << watch.elapsedSeconds() << " sec., "
					<< n / watch.elapsedSeconds() << " elem/sec. (" << n * sizeof(array[0]) / watch.elapsedSeconds() / 1000000 << " MB/s.)"
					<< std::endl;
			}

			{
				Stopwatch watch;

				size_t sum = 0;
				for (size_t i = 0; i < n; ++i)
					sum += safeGet<const String &>(array[i]).size();

				watch.stop();
				std::cerr << std::fixed << std::setprecision(2)
					<< "Got " << n << " fields (" << n * sizeof(array[0]) / 1000000.0 << " MB) in " << watch.elapsedSeconds() << " sec., "
					<< n / watch.elapsedSeconds() << " elem/sec. (" << n * sizeof(array[0]) / watch.elapsedSeconds() / 1000000 << " MB/s.)"
					<< std::endl;

				std::cerr << sum << std::endl;
			}

			watch.restart();
		}

		watch.stop();

		std::cerr << std::fixed << std::setprecision(2)
			<< "Destroyed " << n << " fields (" << n * sizeof(Array::value_type) / 1000000.0 << " MB) in " << watch.elapsedSeconds() << " sec., "
			<< n / watch.elapsedSeconds() << " elem/sec. (" << n * sizeof(Array::value_type) / watch.elapsedSeconds() / 1000000 << " MB/s.)"
			<< std::endl;
	}
	catch (const Exception & e)
	{
		std::cerr << e.what() << ", " << e.displayText() << std::endl;
		return 1;
	}

	std::cerr << "sizeof(Field) = " << sizeof(Field) << std::endl;

	return 0;
}
Пример #19
0
int mainImpl(int argc, char ** argv)
{
    const char * file_name = 0;
    Mode mode = MODE_READ;
    size_t min_offset = 0;
    size_t max_offset = 0;
    size_t block_size = 0;
    size_t descriptors = 0;
    size_t count = 0;

    if (argc != 8)
    {
        std::cerr << "Usage: " << argv[0] << " file_name r|w min_offset max_offset block_size descriptors count" << std::endl;
        return 1;
    }

    file_name = argv[1];
    min_offset = Poco::NumberParser::parseUnsigned64(argv[3]);
    max_offset = Poco::NumberParser::parseUnsigned64(argv[4]);
    block_size = Poco::NumberParser::parseUnsigned64(argv[5]);
    descriptors = Poco::NumberParser::parseUnsigned(argv[6]);
    count = Poco::NumberParser::parseUnsigned(argv[7]);

    if (!strcmp(argv[2], "r"))
        mode = MODE_READ;
    else if (!strcmp(argv[2], "w"))
        mode = MODE_WRITE;
    else
        throw Poco::Exception("Invalid mode");

    std::vector<int> fds(descriptors);
    for (size_t i = 0; i < descriptors; ++i)
    {
        fds[i] = open(file_name, O_SYNC | ((mode == MODE_READ) ? O_RDONLY : O_WRONLY));
        if (-1 == fds[i])
            throwFromErrno("Cannot open file");
    }

    std::vector<char> buf(block_size);

    pcg64 rng(randomSeed());

    Stopwatch watch;

    std::vector<pollfd> polls(descriptors);

    for (size_t i = 0; i < descriptors; ++i)
    {
        polls[i].fd = fds[i];
        polls[i].events = (mode == MODE_READ) ? POLLIN : POLLOUT;
        polls[i].revents = 0;
    }

    size_t ops = 0;
    while (ops < count)
    {
        if (poll(&polls[0], descriptors, -1) <= 0)
            throwFromErrno("poll failed");
        for (size_t i = 0; i < descriptors; ++i)
        {
            if (!polls[i].revents)
                continue;

            if (polls[i].revents != polls[i].events)
                throw Poco::Exception("revents indicates error");
            polls[i].revents = 0;
            ++ops;

            long rand_result1 = rng();
            long rand_result2 = rng();
            long rand_result3 = rng();

            size_t rand_result = rand_result1 ^ (rand_result2 << 22) ^ (rand_result3 << 43);
            size_t offset;
            offset = min_offset + rand_result % ((max_offset - min_offset) / block_size) * block_size;

            if (mode == MODE_READ)
            {
                if (static_cast<int>(block_size) != pread(fds[i], &buf[0], block_size, offset))
                    throwFromErrno("Cannot read");
            }
            else
            {
                if (static_cast<int>(block_size) != pwrite(fds[i], &buf[0], block_size, offset))
                    throwFromErrno("Cannot write");
            }
        }
    }

    for (size_t i = 0; i < descriptors; ++i)
    {
        if (fsync(fds[i]))
            throwFromErrno("Cannot fsync");
    }

    watch.stop();

    for (size_t i = 0; i < descriptors; ++i)
    {
        if (0 != close(fds[i]))
            throwFromErrno("Cannot close file");
    }

    std::cout << std::fixed << std::setprecision(2)
    << "Done " << count  << " ops" << " in " << watch.elapsedSeconds() << " sec."
    << ", " << count / watch.elapsedSeconds() << " ops/sec."
    << ", " << count * block_size / watch.elapsedSeconds() / 1000000 << " MB/sec."
    << std::endl;

    return 0;
}
Пример #20
0
int main(int argc, char ** argv)
{
    size_t n = atoi(argv[1]);
    size_t num_threads = atoi(argv[2]);
    size_t method = argc <= 3 ? 0 : atoi(argv[3]);

    std::cerr << std::fixed << std::setprecision(2);

    ThreadPool pool(num_threads);

    Source data(n);

    {
        Stopwatch watch;
        DB::ReadBufferFromFileDescriptor in1(STDIN_FILENO);
        DB::CompressedReadBuffer in2(in1);

        in2.readStrict(reinterpret_cast<char*>(data.data()), sizeof(data[0]) * n);

        watch.stop();
        std::cerr << std::fixed << std::setprecision(2)
            << "Vector. Size: " << n
            << ", elapsed: " << watch.elapsedSeconds()
            << " (" << n / watch.elapsedSeconds() << " elem/sec.)"
            << std::endl << std::endl;
    }

    Creator creator;
    Updater updater;
    Merger merger;

    if (!method || method == 1)
        Work<
            Map,
            AggregateIndependent<Map>,
            MergeSequential<Map>
        >::execute(data, num_threads, creator, updater, merger, pool);

    if (!method || method == 2)
        Work<
            Map,
            AggregateIndependentWithSequentialKeysOptimization<Map>,
            MergeSequential<Map>
        >::execute(data, num_threads, creator, updater, merger, pool);

    if (!method || method == 3)
        Work<
            Map,
            AggregateIndependent<Map>,
            MergeSequentialTransposed<Map>
        >::execute(data, num_threads, creator, updater, merger, pool);

    if (!method || method == 4)
        Work<
            Map,
            AggregateIndependentWithSequentialKeysOptimization<Map>,
            MergeSequentialTransposed<Map>
        >::execute(data, num_threads, creator, updater, merger, pool);

    if (!method || method == 5)
        Work<
            MapTwoLevel,
            AggregateIndependent<MapTwoLevel>,
            MergeSequential<MapTwoLevel>
        >::execute(data, num_threads, creator, updater, merger, pool);

    if (!method || method == 6)
        Work<
            MapTwoLevel,
            AggregateIndependentWithSequentialKeysOptimization<MapTwoLevel>,
            MergeSequential<MapTwoLevel>
        >::execute(data, num_threads, creator, updater, merger, pool);

    if (!method || method == 7)
        Work<
            MapTwoLevel,
            AggregateIndependent<MapTwoLevel>,
            MergeSequentialTransposed<MapTwoLevel>
        >::execute(data, num_threads, creator, updater, merger, pool);

    if (!method || method == 8)
        Work<
            MapTwoLevel,
            AggregateIndependentWithSequentialKeysOptimization<MapTwoLevel>,
            MergeSequentialTransposed<MapTwoLevel>
        >::execute(data, num_threads, creator, updater, merger, pool);

    if (!method || method == 9)
        Work<
            MapTwoLevel,
            AggregateIndependent<MapTwoLevel>,
            MergeParallelForTwoLevelTable<MapTwoLevel, MergeSequential<MapTwoLevel::Impl>>
        >::execute(data, num_threads, creator, updater, merger, pool);

    if (!method || method == 10)
        Work<
            MapTwoLevel,
            AggregateIndependentWithSequentialKeysOptimization<MapTwoLevel>,
            MergeParallelForTwoLevelTable<MapTwoLevel, MergeSequential<MapTwoLevel::Impl>>
        >::execute(data, num_threads, creator, updater, merger, pool);

    if (!method || method == 13)
        Work<
            MapTwoLevel,
            AggregateIndependent<MapTwoLevel>,
            MergeParallelForTwoLevelTable<MapTwoLevel, MergeSequentialTransposed<MapTwoLevel::Impl>>
        >::execute(data, num_threads, creator, updater, merger, pool);

    if (!method || method == 14)
        Work<
            MapTwoLevel,
            AggregateIndependentWithSequentialKeysOptimization<MapTwoLevel>,
            MergeParallelForTwoLevelTable<MapTwoLevel, MergeSequentialTransposed<MapTwoLevel::Impl>>
        >::execute(data, num_threads, creator, updater, merger, pool);

    return 0;
}
Пример #21
0
int main(int argc, char ** argv)
{
    using namespace DB;

    try
    {
        size_t n = argc == 2 ? atoi(argv[1]) : 10;

        Block block;

        ColumnWithTypeAndName column_x;
        column_x.name = "x";
        column_x.type = std::make_shared<DataTypeInt16>();
        auto x = std::make_shared<ColumnInt16>();
        column_x.column = x;
        auto & vec_x = x->getData();

        vec_x.resize(n);
        for (size_t i = 0; i < n; ++i)
            vec_x[i] = i % 9;

        block.insert(column_x);

        const char * strings[] = {"abc", "def", "abcd", "defg", "ac"};

        ColumnWithTypeAndName column_s1;
        column_s1.name = "s1";
        column_s1.type = std::make_shared<DataTypeString>();
        column_s1.column = std::make_shared<ColumnString>();

        for (size_t i = 0; i < n; ++i)
            column_s1.column->insert(std::string(strings[i % 5]));

        block.insert(column_s1);

        ColumnWithTypeAndName column_s2;
        column_s2.name = "s2";
        column_s2.type = std::make_shared<DataTypeString>();
        column_s2.column = std::make_shared<ColumnString>();

        for (size_t i = 0; i < n; ++i)
            column_s2.column->insert(std::string(strings[i % 3]));

        block.insert(column_s2);

        BlockInputStreamPtr stream = std::make_shared<OneBlockInputStream>(block);
        AggregatedDataVariants aggregated_data_variants;

        Names key_column_names;
        key_column_names.emplace_back("x");
        key_column_names.emplace_back("s1");

        AggregateFunctionFactory factory;

        AggregateDescriptions aggregate_descriptions(1);

        DataTypes empty_list_of_types;
        aggregate_descriptions[0].function = factory.get("count", empty_list_of_types);

        Aggregator::Params params(key_column_names, aggregate_descriptions, false);
        Aggregator aggregator(params);

        {
            Stopwatch stopwatch;
            stopwatch.start();

            aggregator.execute(stream, aggregated_data_variants);

            stopwatch.stop();
            std::cout << std::fixed << std::setprecision(2)
                << "Elapsed " << stopwatch.elapsedSeconds() << " sec."
                << ", " << n / stopwatch.elapsedSeconds() << " rows/sec."
                << std::endl;
        }
    }
    catch (const Exception & e)
    {
        std::cerr << e.displayText() << std::endl;
    }

    return 0;
}
Пример #22
0
void ClearHistory::run()
{
	UarcRmemdServer::GetLogger().information("ClearHistory Process is running!");
	//1.获取当前时间
	time_t thistime;
	thistime = time(NULL);
	std::string TimeChar = "";
	g_pClearHistory->TimeToChar(thistime, TimeChar);
	UarcRmemdServer::GetLogger().information("首次执行,当前时间为:%s", TimeChar);


	//2.计算下次清除时间
	int nClearTime = g_pClearHistory->nextClearTime(thistime);

	long int timedeff = 0;
	timedeff = nClearTime - (long int) thistime;
	Poco::DateTime dataTime;
	dataTime += timedeff*1000000;

	//加入清除队列
	g_pClearHistory->TimeToChar(nClearTime,TimeChar);
	ClearQueue.enqueueNotification(new ClearNotofication(nClearTime),dataTime.timestamp());
	UarcRmemdServer::GetLogger().information("首次执行,设置下次清除数据时间为:%s", TimeChar);
	printf("首次执行,设置下次清除数据时间为:%s\n", TimeChar.c_str());
	while (!_stopped)
	{

		//1.等待清除任务时刻的到来
		Poco::Notification::Ptr pNf(ClearQueue.waitDequeueNotification());
		if (_stopped)
		{
			return ;
		}
		if(pNf)
		{
			//ClearNotofication* pSNf = pNf.cast<ClearNotofication> ();
			//2先设置下次清除时间
			time_t thistime;
			thistime = time(NULL);
			std::string TimeChar = "";
			g_pClearHistory->TimeToChar(thistime, TimeChar);

			UarcRmemdServer::GetLogger().information("清除%s 时刻的定时任务",TimeChar);


			//3.计算下次清除时间
			int nClearTime = g_pClearHistory->nextClearTime(thistime);
			long int timedeff = 0;
			timedeff = nClearTime - (long int) thistime;
			Poco::DateTime dataTime;
			dataTime += timedeff*1000000;
			//4再加入清除队列
			g_pClearHistory->TimeToChar(nClearTime,TimeChar);
			ClearQueue.enqueueNotification(new ClearNotofication(nClearTime ),dataTime.timestamp());

			UarcRmemdServer::GetLogger().information("设置下次清除数据时间为:%s", TimeChar);
			//5此时执行清除处理
			Clearstwch.restart();
			bool bCleard = false;
			bCleard = _rdbmsClearHis->clearHisData();
			Clearstwch.stop();
			if (bCleard == true)
			{
			 UarcRmemdServer::GetLogger().information("清除历史数据成功,用时%d 秒",(int)Clearstwch.elapsedSeconds());
			}
			else
			{
				UarcRmemdServer::GetLogger().information("清除历史数据失败,用时%d 秒",(int)Clearstwch.elapsedSeconds());
				UarcRmemdServer::GetLogger().information("再次调用清除命令");
				bCleard = _rdbmsClearHis->clearHisData();
				if (bCleard == true)
				{
					UarcRmemdServer::GetLogger().information("再次清除历史数据并且成功被清除");
				}
				else
				{
					UarcRmemdServer::GetLogger().information("连续两次清除历史均失败");
				}
			}

		}
	}
	UarcRmemdServer::GetLogger().information("ClearHistory Process quit!", __FILE__,	__LINE__);
}
Пример #23
0
int main(int argc, char ** argv)
{
	size_t n = DB::parse<size_t>(argv[1]);
	size_t method = DB::parse<size_t>(argv[2]);

	std::vector<Key> data(n);

//	srand(time(0));

	{
		Stopwatch watch;

		for (auto & elem : data)
			elem = rand();

		watch.stop();
		double elapsed = watch.elapsedSeconds();
		std::cerr
			<< "Filled in " << elapsed
			<< " (" << n / elapsed << " elem/sec., "
			<< n * sizeof(Key) / elapsed / 1048576 << " MB/sec.)"
			<< std::endl;
	}

	if (n <= 100)
	{
		std::cerr << std::endl;
		for (const auto & elem : data)
			std::cerr << elem << ' ';
		std::cerr << std::endl;
	}


	{
		Stopwatch watch;

		if (method == 1)	sort1(&data[0], n);
		if (method == 2)	sort2(&data[0], n);
		if (method == 3)	sort3(&data[0], n);

		watch.stop();
		double elapsed = watch.elapsedSeconds();
		std::cerr
			<< "Sorted in " << elapsed
			<< " (" << n / elapsed << " elem/sec., "
			<< n * sizeof(Key) / elapsed / 1048576 << " MB/sec.)"
			<< std::endl;
	}

	{
		Stopwatch watch;

		size_t i = 1;
		while (i < n)
		{
			if (!(data[i - 1] <= data[i]))
				break;
			++i;
		}

		watch.stop();
		double elapsed = watch.elapsedSeconds();
		std::cerr
			<< "Checked in " << elapsed
			<< " (" << n / elapsed << " elem/sec., "
			<< n * sizeof(Key) / elapsed / 1048576 << " MB/sec.)"
			<< std::endl
			<< "Result: " << (i == n ? "Ok." : "Fail!") << std::endl;
	}

	if (n <= 1000)
	{
		std::cerr << std::endl;

		std::cerr << data[0] << ' ';
		for (size_t i = 1; i < n; ++i)
		{
			if (!(data[i - 1] <= data[i]))
				std::cerr << "*** ";
			std::cerr << data[i] << ' ';
		}

		std::cerr << std::endl;
	}

	return 0;
}
Пример #24
0
void LinearHashTableTest::testPerformanceInt()
{
    const int N = 5000000;
    Stopwatch sw;

    {
        LinearHashTable<int, Hash<int> > lht(N);
        sw.start();
        for (int i = 0; i < N; ++i)
        {
            lht.insert(i);
        }
        sw.stop();
        std::cout << "Insert LHT: " << sw.elapsedSeconds() << std::endl;
        sw.reset();

        sw.start();
        for (int i = 0; i < N; ++i)
        {
            lht.find(i);
        }
        sw.stop();
        std::cout << "Find LHT: " << sw.elapsedSeconds() << std::endl;
        sw.reset();
    }

    {
        HashTable<int, int> ht;

        sw.start();
        for (int i = 0; i < N; ++i)
        {
            ht.insert(i, i);
        }
        sw.stop();
        std::cout << "Insert HT: " << sw.elapsedSeconds() << std::endl;
        sw.reset();

        sw.start();
        for (int i = 0; i < N; ++i)
        {
            ht.exists(i);
        }
        sw.stop();
        std::cout << "Find HT: " << sw.elapsedSeconds() << std::endl;
    }

    {
        std::set<int> s;
        sw.start();
        for (int i = 0; i < N; ++i)
        {
            s.insert(i);
        }
        sw.stop();
        std::cout << "Insert set: " << sw.elapsedSeconds() << std::endl;
        sw.reset();

        sw.start();
        for (int i = 0; i < N; ++i)
        {
            s.find(i);
        }
        sw.stop();
        std::cout << "Find set: " << sw.elapsedSeconds() << std::endl;
        sw.reset();
    }

}
int main(int argc, char ** argv)
{
	size_t n = atoi(argv[1]);
	size_t num_threads = atoi(argv[2]);
	size_t method = argc <= 3 ? 0 : atoi(argv[3]);

	std::cerr << std::fixed << std::setprecision(2);

	ThreadPool pool(num_threads);

	Source data(n);

	{
		Stopwatch watch;
		DB::ReadBufferFromFileDescriptor in1(STDIN_FILENO);
		DB::CompressedReadBuffer in2(in1);

		in2.readStrict(reinterpret_cast<char*>(&data[0]), sizeof(data[0]) * n);

		watch.stop();
		std::cerr << std::fixed << std::setprecision(2)
			<< "Vector. Size: " << n
			<< ", elapsed: " << watch.elapsedSeconds()
			<< " (" << n / watch.elapsedSeconds() << " elem/sec.)"
			<< std::endl << std::endl;
	}

	if (!method || method == 1)
	{
		/** Вариант 1.
		  * В разных потоках агрегируем независимо в разные хэш-таблицы.
		  * Затем сливаем их вместе.
		  */

		std::vector<Map> maps(num_threads);

		Stopwatch watch;

		for (size_t i = 0; i < num_threads; ++i)
			pool.schedule(std::bind(aggregate1,
				std::ref(maps[i]),
				data.begin() + (data.size() * i) / num_threads,
				data.begin() + (data.size() * (i + 1)) / num_threads));

		pool.wait();

		watch.stop();
		double time_aggregated = watch.elapsedSeconds();
		std::cerr
			<< "Aggregated in " << time_aggregated
			<< " (" << n / time_aggregated << " elem/sec.)"
			<< std::endl;

		size_t size_before_merge = 0;
		std::cerr << "Sizes: ";
		for (size_t i = 0; i < num_threads; ++i)
		{
			std::cerr << (i == 0 ? "" : ", ") << maps[i].size();
			size_before_merge += maps[i].size();
		}
		std::cerr << std::endl;

		watch.restart();

		for (size_t i = 1; i < num_threads; ++i)
			for (auto it = maps[i].begin(); it != maps[i].end(); ++it)
				maps[0][it->first] += it->second;

		watch.stop();
		double time_merged = watch.elapsedSeconds();
		std::cerr
			<< "Merged in " << time_merged
			<< " (" << size_before_merge / time_merged << " elem/sec.)"
			<< std::endl;

		double time_total = time_aggregated + time_merged;
		std::cerr
			<< "Total in " << time_total
			<< " (" << n / time_total << " elem/sec.)"
			<< std::endl;
		std::cerr << "Size: " << maps[0].size() << std::endl << std::endl;
	}

	if (!method || method == 12)
	{
		/** То же самое, но с оптимизацией для подряд идущих одинаковых значений.
		  */

		std::vector<Map> maps(num_threads);

		Stopwatch watch;

		for (size_t i = 0; i < num_threads; ++i)
			pool.schedule(std::bind(aggregate12,
									std::ref(maps[i]),
									data.begin() + (data.size() * i) / num_threads,
									data.begin() + (data.size() * (i + 1)) / num_threads));

		pool.wait();

		watch.stop();
		double time_aggregated = watch.elapsedSeconds();
		std::cerr
		<< "Aggregated in " << time_aggregated
		<< " (" << n / time_aggregated << " elem/sec.)"
		<< std::endl;

		size_t size_before_merge = 0;
		std::cerr << "Sizes: ";
		for (size_t i = 0; i < num_threads; ++i)
		{
			std::cerr << (i == 0 ? "" : ", ") << maps[i].size();
			size_before_merge += maps[i].size();
		}
		std::cerr << std::endl;

		watch.restart();

		for (size_t i = 1; i < num_threads; ++i)
			for (auto it = maps[i].begin(); it != maps[i].end(); ++it)
				maps[0][it->first] += it->second;

		watch.stop();

		double time_merged = watch.elapsedSeconds();
		std::cerr
		<< "Merged in " << time_merged
		<< " (" << size_before_merge / time_merged << " elem/sec.)"
		<< std::endl;

		double time_total = time_aggregated + time_merged;
		std::cerr
		<< "Total in " << time_total
		<< " (" << n / time_total << " elem/sec.)"
		<< std::endl;
		std::cerr << "Size: " << maps[0].size() << std::endl << std::endl;
	}

	if (!method || method == 11)
	{
		/** Вариант 11.
		  * То же, что вариант 1, но при мердже, изменён порядок циклов,
		  *  что потенциально может дать лучшую кэш-локальность.
		  *
		  * На практике, разницы нет.
		  */

		std::vector<Map> maps(num_threads);

		Stopwatch watch;

		for (size_t i = 0; i < num_threads; ++i)
			pool.schedule(std::bind(aggregate1,
				std::ref(maps[i]),
				data.begin() + (data.size() * i) / num_threads,
				data.begin() + (data.size() * (i + 1)) / num_threads));

		pool.wait();

		watch.stop();
		double time_aggregated = watch.elapsedSeconds();
		std::cerr
		<< "Aggregated in " << time_aggregated
		<< " (" << n / time_aggregated << " elem/sec.)"
		<< std::endl;

		size_t size_before_merge = 0;
		std::cerr << "Sizes: ";
		for (size_t i = 0; i < num_threads; ++i)
		{
			std::cerr << (i == 0 ? "" : ", ") << maps[i].size();
			size_before_merge += maps[i].size();
		}
		std::cerr << std::endl;

		watch.restart();

		std::vector<Map::iterator> iterators(num_threads);
		for (size_t i = 1; i < num_threads; ++i)
			iterators[i] = maps[i].begin();

		while (true)
		{
			bool finish = true;
			for (size_t i = 1; i < num_threads; ++i)
			{
				if (iterators[i] == maps[i].end())
					continue;

				finish = false;
				maps[0][iterators[i]->first] += iterators[i]->second;
				++iterators[i];
			}

			if (finish)
				break;
		}

		watch.stop();
		double time_merged = watch.elapsedSeconds();
		std::cerr
		<< "Merged in " << time_merged
		<< " (" << size_before_merge / time_merged << " elem/sec.)"
		<< std::endl;

		double time_total = time_aggregated + time_merged;
		std::cerr
		<< "Total in " << time_total
		<< " (" << n / time_total << " elem/sec.)"
		<< std::endl;
		std::cerr << "Size: " << maps[0].size() << std::endl << std::endl;
	}

	if (!method || method == 2)
	{
		/** Вариант 2.
		  * В разных потоках агрегируем независимо в разные two-level хэш-таблицы.
		  * Затем сливаем их вместе, распараллелив по bucket-ам первого уровня.
		  * При использовании хэш-таблиц больших размеров (10 млн. элементов и больше),
		  *  и большого количества потоков (8-32), слияние является узким местом,
		  *  и преимущество в производительности достигает 4 раз.
		  */

		std::vector<MapTwoLevel> maps(num_threads);

		Stopwatch watch;

		for (size_t i = 0; i < num_threads; ++i)
			pool.schedule(std::bind(aggregate2,
				std::ref(maps[i]),
				data.begin() + (data.size() * i) / num_threads,
				data.begin() + (data.size() * (i + 1)) / num_threads));

		pool.wait();

		watch.stop();
		double time_aggregated = watch.elapsedSeconds();
		std::cerr
			<< "Aggregated in " << time_aggregated
			<< " (" << n / time_aggregated << " elem/sec.)"
			<< std::endl;

		size_t size_before_merge = 0;
		std::cerr << "Sizes: ";
		for (size_t i = 0; i < num_threads; ++i)
		{
			std::cerr << (i == 0 ? "" : ", ") << maps[i].size();
			size_before_merge += maps[i].size();
		}
		std::cerr << std::endl;

		watch.restart();

		for (size_t i = 0; i < MapTwoLevel::NUM_BUCKETS; ++i)
			pool.schedule(std::bind(merge2,
				&maps[0], num_threads, i));

		pool.wait();

		watch.stop();
		double time_merged = watch.elapsedSeconds();
		std::cerr
			<< "Merged in " << time_merged
			<< " (" << size_before_merge / time_merged << " elem/sec.)"
			<< std::endl;

		double time_total = time_aggregated + time_merged;
		std::cerr
			<< "Total in " << time_total
			<< " (" << n / time_total << " elem/sec.)"
			<< std::endl;

		std::cerr << "Size: " << maps[0].size() << std::endl << std::endl;
	}

	if (!method || method == 22)
	{
		std::vector<MapTwoLevel> maps(num_threads);

		Stopwatch watch;

		for (size_t i = 0; i < num_threads; ++i)
			pool.schedule(std::bind(aggregate22,
									std::ref(maps[i]),
									data.begin() + (data.size() * i) / num_threads,
									data.begin() + (data.size() * (i + 1)) / num_threads));

		pool.wait();

		watch.stop();
		double time_aggregated = watch.elapsedSeconds();
		std::cerr
		<< "Aggregated in " << time_aggregated
		<< " (" << n / time_aggregated << " elem/sec.)"
		<< std::endl;

		size_t size_before_merge = 0;
		std::cerr << "Sizes: ";
		for (size_t i = 0; i < num_threads; ++i)
		{
			std::cerr << (i == 0 ? "" : ", ") << maps[i].size();
			size_before_merge += maps[i].size();
		}
		std::cerr << std::endl;

		watch.restart();

		for (size_t i = 0; i < MapTwoLevel::NUM_BUCKETS; ++i)
			pool.schedule(std::bind(merge2,
									&maps[0], num_threads, i));

		pool.wait();

		watch.stop();
		double time_merged = watch.elapsedSeconds();
		std::cerr
		<< "Merged in " << time_merged
		<< " (" << size_before_merge / time_merged << " elem/sec.)"
		<< std::endl;

		double time_total = time_aggregated + time_merged;
		std::cerr
		<< "Total in " << time_total
		<< " (" << n / time_total << " elem/sec.)"
		<< std::endl;

		std::cerr << "Size: " << maps[0].size() << std::endl << std::endl;
	}

	if (!method || method == 3)
	{
		/** Вариант 3.
		  * В разных потоках агрегируем независимо в разные хэш-таблицы,
		  *  пока их размер не станет достаточно большим.
		  * Если размер локальной хэш-таблицы большой, и в ней нет элемента,
		  *  то вставляем его в одну глобальную хэш-таблицу, защищённую mutex-ом,
		  *  а если mutex не удалось захватить, то вставляем в локальную.
		  * Затем сливаем все локальные хэш-таблицы в глобальную.
		  * Этот метод плохой - много contention-а.
		  */

		std::vector<Map> local_maps(num_threads);
		Map global_map;
		Mutex mutex;

		Stopwatch watch;

		for (size_t i = 0; i < num_threads; ++i)
			pool.schedule(std::bind(aggregate3,
				std::ref(local_maps[i]),
				std::ref(global_map),
				std::ref(mutex),
				data.begin() + (data.size() * i) / num_threads,
				data.begin() + (data.size() * (i + 1)) / num_threads));

		pool.wait();

		watch.stop();
		double time_aggregated = watch.elapsedSeconds();
		std::cerr
			<< "Aggregated in " << time_aggregated
			<< " (" << n / time_aggregated << " elem/sec.)"
			<< std::endl;

		size_t size_before_merge = 0;
		std::cerr << "Sizes (local): ";
		for (size_t i = 0; i < num_threads; ++i)
		{
			std::cerr << (i == 0 ? "" : ", ") << local_maps[i].size();
			size_before_merge += local_maps[i].size();
		}
		std::cerr << std::endl;
		std::cerr << "Size (global): " << global_map.size() << std::endl;
		size_before_merge += global_map.size();

		watch.restart();

		for (size_t i = 0; i < num_threads; ++i)
			for (auto it = local_maps[i].begin(); it != local_maps[i].end(); ++it)
				global_map[it->first] += it->second;

		pool.wait();

		watch.stop();
		double time_merged = watch.elapsedSeconds();
		std::cerr
			<< "Merged in " << time_merged
			<< " (" << size_before_merge / time_merged << " elem/sec.)"
			<< std::endl;

		double time_total = time_aggregated + time_merged;
		std::cerr
			<< "Total in " << time_total
			<< " (" << n / time_total << " elem/sec.)"
			<< std::endl;

		std::cerr << "Size: " << global_map.size() << std::endl << std::endl;
	}

	if (!method || method == 33)
	{
		/** Вариант 33.
		 * В разных потоках агрегируем независимо в разные хэш-таблицы,
		 *  пока их размер не станет достаточно большим.
		 * Затем сбрасываем данные в глобальную хэш-таблицу, защищённую mutex-ом, и продолжаем.
		 */

		std::vector<Map> local_maps(num_threads);
		Map global_map;
		Mutex mutex;

		Stopwatch watch;

		for (size_t i = 0; i < num_threads; ++i)
			pool.schedule(std::bind(aggregate33,
				std::ref(local_maps[i]),
				std::ref(global_map),
				std::ref(mutex),
				data.begin() + (data.size() * i) / num_threads,
				data.begin() + (data.size() * (i + 1)) / num_threads));

		pool.wait();

		watch.stop();
		double time_aggregated = watch.elapsedSeconds();
		std::cerr
		<< "Aggregated in " << time_aggregated
		<< " (" << n / time_aggregated << " elem/sec.)"
		<< std::endl;

		size_t size_before_merge = 0;
		std::cerr << "Sizes (local): ";
		for (size_t i = 0; i < num_threads; ++i)
		{
			std::cerr << (i == 0 ? "" : ", ") << local_maps[i].size();
			size_before_merge += local_maps[i].size();
		}
		std::cerr << std::endl;
		std::cerr << "Size (global): " << global_map.size() << std::endl;
		size_before_merge += global_map.size();

		watch.restart();

		for (size_t i = 0; i < num_threads; ++i)
			for (auto it = local_maps[i].begin(); it != local_maps[i].end(); ++it)
				global_map[it->first] += it->second;

		pool.wait();

		watch.stop();
		double time_merged = watch.elapsedSeconds();
		std::cerr
		<< "Merged in " << time_merged
		<< " (" << size_before_merge / time_merged << " elem/sec.)"
		<< std::endl;

		double time_total = time_aggregated + time_merged;
		std::cerr
		<< "Total in " << time_total
		<< " (" << n / time_total << " elem/sec.)"
		<< std::endl;

		std::cerr << "Size: " << global_map.size() << std::endl << std::endl;
	}

	if (!method || method == 4)
	{
		/** Вариант 4.
		  * В разных потоках агрегируем независимо в разные хэш-таблицы,
		  *  пока их размер не станет достаточно большим.
		  * Если размер локальной хэш-таблицы большой, и в ней нет элемента,
		  *  то вставляем его в одну из 256 глобальных хэш-таблиц, каждая из которых под своим mutex-ом.
		  * Затем сливаем все локальные хэш-таблицы в глобальную.
		  * Этот метод не такой уж плохой при большом количестве потоков, но хуже второго.
		  */

		std::vector<Map> local_maps(num_threads);
		MapTwoLevel global_map;
		std::vector<Mutex> mutexes(MapTwoLevel::NUM_BUCKETS);

		Stopwatch watch;

		for (size_t i = 0; i < num_threads; ++i)
			pool.schedule(std::bind(aggregate4,
				std::ref(local_maps[i]),
				std::ref(global_map),
				&mutexes[0],
				data.begin() + (data.size() * i) / num_threads,
				data.begin() + (data.size() * (i + 1)) / num_threads));

		pool.wait();

		watch.stop();
		double time_aggregated = watch.elapsedSeconds();
		std::cerr
			<< "Aggregated in " << time_aggregated
			<< " (" << n / time_aggregated << " elem/sec.)"
			<< std::endl;

		size_t size_before_merge = 0;
		std::cerr << "Sizes (local): ";
		for (size_t i = 0; i < num_threads; ++i)
		{
			std::cerr << (i == 0 ? "" : ", ") << local_maps[i].size();
			size_before_merge += local_maps[i].size();
		}
		std::cerr << std::endl;

		size_t sum_size = global_map.size();
		std::cerr << "Size (global): " << sum_size << std::endl;
		size_before_merge += sum_size;

		watch.restart();

		for (size_t i = 0; i < num_threads; ++i)
			for (auto it = local_maps[i].begin(); it != local_maps[i].end(); ++it)
				global_map[it->first] += it->second;

		pool.wait();

		watch.stop();
		double time_merged = watch.elapsedSeconds();
		std::cerr
			<< "Merged in " << time_merged
			<< " (" << size_before_merge / time_merged << " elem/sec.)"
			<< std::endl;

		double time_total = time_aggregated + time_merged;
		std::cerr
			<< "Total in " << time_total
			<< " (" << n / time_total << " elem/sec.)"
			<< std::endl;

		std::cerr << "Size: " << global_map.size() << std::endl << std::endl;
	}

/*	if (!method || method == 5)
	{
	*/	/** Вариант 5.
		  * В разных потоках агрегируем независимо в разные хэш-таблицы,
		  *  пока их размер не станет достаточно большим.
		  * Если размер локальной хэш-таблицы большой, и в ней нет элемента,
		  *  то вставляем его в одну глобальную хэш-таблицу, содержащую маленькие защёлки в каждой ячейке,
		  *  а если защёлку не удалось захватить, то вставляем в локальную.
		  * Затем сливаем все локальные хэш-таблицы в глобальную.
		  */
/*
		Map local_maps[num_threads];
		MapSmallLocks global_map;

		Stopwatch watch;

		for (size_t i = 0; i < num_threads; ++i)
			pool.schedule(std::bind(aggregate5,
				std::ref(local_maps[i]),
				std::ref(global_map),
				data.begin() + (data.size() * i) / num_threads,
				data.begin() + (data.size() * (i + 1)) / num_threads));

		pool.wait();

		watch.stop();
		double time_aggregated = watch.elapsedSeconds();
		std::cerr
			<< "Aggregated in " << time_aggregated
			<< " (" << n / time_aggregated << " elem/sec.)"
			<< std::endl;

		size_t size_before_merge = 0;
		std::cerr << "Sizes (local): ";
		for (size_t i = 0; i < num_threads; ++i)
		{
			std::cerr << (i == 0 ? "" : ", ") << local_maps[i].size();
			size_before_merge += local_maps[i].size();
		}
		std::cerr << std::endl;
		std::cerr << "Size (global): " << global_map.size() << std::endl;
		size_before_merge += global_map.size();

		watch.restart();

		for (size_t i = 0; i < num_threads; ++i)
			for (auto it = local_maps[i].begin(); it != local_maps[i].end(); ++it)
				global_map.insert(std::make_pair(it->first, 0)).first->second += it->second;

		pool.wait();

		watch.stop();
		double time_merged = watch.elapsedSeconds();
		std::cerr
			<< "Merged in " << time_merged
			<< " (" << size_before_merge / time_merged << " elem/sec.)"
			<< std::endl;

		double time_total = time_aggregated + time_merged;
		std::cerr
			<< "Total in " << time_total
			<< " (" << n / time_total << " elem/sec.)"
			<< std::endl;

		std::cerr << "Size: " << global_map.size() << std::endl << std::endl;
	}*/

	/*if (!method || method == 6)
	{
		*//** Вариант 6.
		  * В разных потоках агрегируем независимо в разные хэш-таблицы.
		  * Затем "сливаем" их, проходя по ним в одинаковом порядке ключей.
		  * Довольно тормозной вариант.
		  */
/*
		std::vector<Map> maps(num_threads);

		Stopwatch watch;

		for (size_t i = 0; i < num_threads; ++i)
			pool.schedule(std::bind(aggregate1,
				std::ref(maps[i]),
				data.begin() + (data.size() * i) / num_threads,
				data.begin() + (data.size() * (i + 1)) / num_threads));

		pool.wait();

		watch.stop();
		double time_aggregated = watch.elapsedSeconds();
		std::cerr
			<< "Aggregated in " << time_aggregated
			<< " (" << n / time_aggregated << " elem/sec.)"
			<< std::endl;

		size_t size_before_merge = 0;
		std::cerr << "Sizes: ";
		for (size_t i = 0; i < num_threads; ++i)
		{
			std::cerr << (i == 0 ? "" : ", ") << maps[i].size();
			size_before_merge += maps[i].size();
		}
		std::cerr << std::endl;

		watch.restart();

		using Maps = std::vector<Map *>;
		Maps maps_to_merge(num_threads);
		for (size_t i = 0; i < num_threads; ++i)
			maps_to_merge[i] = &maps[i];

		size_t size = 0;

		for (size_t i = 0; i < 100; ++i)
		processMergedHashTables(maps_to_merge,
			[] (Map::value_type & dst, const Map::value_type & src) { dst.second += src.second; },
			[&] (const Map::value_type & dst) { ++size; });

		watch.stop();
		double time_merged = watch.elapsedSeconds();
		std::cerr
			<< "Merged in " << time_merged
			<< " (" << size_before_merge / time_merged << " elem/sec.)"
			<< std::endl;

		double time_total = time_aggregated + time_merged;
		std::cerr
			<< "Total in " << time_total
			<< " (" << n / time_total << " elem/sec.)"
			<< std::endl;
		std::cerr << "Size: " << size << std::endl << std::endl;
	}*/

	return 0;
}
void CreatingSetsBlockInputStream::createOne(SubqueryForSet & subquery)
{
    LOG_TRACE(log, (subquery.set ? "Creating set. " : "")
        << (subquery.join ? "Creating join. " : "")
        << (subquery.table ? "Filling temporary table. " : ""));
    Stopwatch watch;

    BlockOutputStreamPtr table_out;
    if (subquery.table)
        table_out = subquery.table->write({}, context);

    bool done_with_set = !subquery.set;
    bool done_with_join = !subquery.join;
    bool done_with_table = !subquery.table;

    if (done_with_set && done_with_join && done_with_table)
        throw Exception("Logical error: nothing to do with subquery", ErrorCodes::LOGICAL_ERROR);

    if (table_out)
        table_out->writePrefix();

    while (Block block = subquery.source->read())
    {
        if (isCancelled())
        {
            LOG_DEBUG(log, "Query was cancelled during set / join or temporary table creation.");
            return;
        }

        if (!done_with_set)
        {
            if (!subquery.set->insertFromBlock(block))
                done_with_set = true;
        }

        if (!done_with_join)
        {
            subquery.renameColumns(block);

            if (subquery.joined_block_actions)
                subquery.joined_block_actions->execute(block);

            if (!subquery.join->insertFromBlock(block))
                done_with_join = true;
        }

        if (!done_with_table)
        {
            block = materializeBlock(block);
            table_out->write(block);

            rows_to_transfer += block.rows();
            bytes_to_transfer += block.bytes();

            if (!network_transfer_limits.check(rows_to_transfer, bytes_to_transfer, "IN/JOIN external table", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED))
                done_with_table = true;
        }

        if (done_with_set && done_with_join && done_with_table)
        {
            subquery.source->cancel(false);
            break;
        }
    }

    if (table_out)
        table_out->writeSuffix();

    watch.stop();

    size_t head_rows = 0;
    const BlockStreamProfileInfo & profile_info = subquery.source->getProfileInfo();

    head_rows = profile_info.rows;

    if (subquery.join)
        subquery.join->setTotals(subquery.source->getTotals());

    if (head_rows != 0)
    {
        std::stringstream msg;
        msg << std::fixed << std::setprecision(3);
        msg << "Created. ";

        if (subquery.set)
            msg << "Set with " << subquery.set->getTotalRowCount() << " entries from " << head_rows << " rows. ";
        if (subquery.join)
            msg << "Join with " << subquery.join->getTotalRowCount() << " entries from " << head_rows << " rows. ";
        if (subquery.table)
            msg << "Table with " << head_rows << " rows. ";

        msg << "In " << watch.elapsedSeconds() << " sec.";
        LOG_DEBUG(log, msg.rdbuf());
    }
    else
    {
        LOG_DEBUG(log, "Subquery has empty result.");
    }
}
Пример #27
0
int main(int argc, char ** argv)
{
    using namespace DB;

    try
    {
        std::string input = "SELECT x, s1, s2, "
            "/*"
            "2 + x * 2, x * 2, x % 3 == 1, "
            "s1 == 'abc', s1 == s2, s1 != 'abc', s1 != s2, "
            "s1 <  'abc', s1 <  s2, s1 >  'abc', s1 >  s2, "
            "s1 <= 'abc', s1 <= s2, s1 >= 'abc', s1 >= s2, "
            "*/"
            "s1 < s2 AND x % 3 < x % 5";

        ParserSelectQuery parser;
        ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0);

        formatAST(*ast, std::cerr);
        std::cerr << std::endl;

        Context context = Context::createGlobal();
        NamesAndTypesList columns
        {
            {"x", std::make_shared<DataTypeInt16>()},
            {"s1", std::make_shared<DataTypeString>()},
            {"s2", std::make_shared<DataTypeString>()}
        };

        auto syntax_result = SyntaxAnalyzer(context, {}).analyze(ast, columns);
        ExpressionAnalyzer analyzer(ast, syntax_result, context);
        ExpressionActionsChain chain(context);
        analyzer.appendSelect(chain, false);
        analyzer.appendProjectResult(chain);
        chain.finalize();
        ExpressionActionsPtr expression = chain.getLastActions();

        size_t n = argc == 2 ? atoi(argv[1]) : 10;

        Block block;

        {
            ColumnWithTypeAndName column;
            column.name = "x";
            column.type = std::make_shared<DataTypeInt16>();
            auto col = ColumnInt16::create();
            auto & vec_x = col->getData();

            vec_x.resize(n);
            for (size_t i = 0; i < n; ++i)
                vec_x[i] = i % 9;

            column.column = std::move(col);
            block.insert(column);
        }

        const char * strings[] = {"abc", "def", "abcd", "defg", "ac"};

        {
            ColumnWithTypeAndName column;
            column.name = "s1";
            column.type = std::make_shared<DataTypeString>();
            auto col = ColumnString::create();

            for (size_t i = 0; i < n; ++i)
                col->insert(std::string(strings[i % 5]));

            column.column = std::move(col);
            block.insert(column);
        }

        {
            ColumnWithTypeAndName column;
            column.name = "s2";
            column.type = std::make_shared<DataTypeString>();
            auto col = ColumnString::create();

            for (size_t i = 0; i < n; ++i)
                col->insert(std::string(strings[i % 3]));

            column.column = std::move(col);
            block.insert(column);
        }

        {
            Stopwatch stopwatch;
            stopwatch.start();

            expression->execute(block);

            stopwatch.stop();
            std::cout << std::fixed << std::setprecision(2)
                << "Elapsed " << stopwatch.elapsedSeconds() << " sec."
                << ", " << n / stopwatch.elapsedSeconds() << " rows/sec."
                << std::endl;
        }

        auto is = std::make_shared<OneBlockInputStream>(block);
        LimitBlockInputStream lis(is, 20, std::max(0, static_cast<int>(n) - 20));
        WriteBufferFromOStream out_buf(std::cout);
        BlockOutputStreamPtr out = FormatFactory::instance().getOutput("TabSeparated", out_buf, block, context);

        copyData(lis, *out);
    }
    catch (const Exception & e)
    {
        std::cerr << e.displayText() << std::endl;
    }

    return 0;
}
int main(int argc, char ** argv)
{
    if (argc < 2)
    {
        std::cerr << "Usage: program n\n";
        return 1;
    }

    std::cerr << std::fixed << std::setprecision(2);

    size_t n = parse<size_t>(argv[1]);
    std::vector<std::string> data;
    size_t sum_strings_size = 0;

    {
        Stopwatch watch;
        DB::ReadBufferFromFileDescriptor in1(STDIN_FILENO);
        DB::CompressedReadBuffer in2(in1);

        for (size_t i = 0; i < n && !in2.eof(); ++i)
        {
            data.emplace_back();
            readStringBinary(data.back(), in2);
            sum_strings_size += data.back().size() + 1;
        }

        watch.stop();
        std::cerr
            << "Read. Elements: " << data.size() << ", bytes: " << sum_strings_size
            << ", elapsed: " << watch.elapsedSeconds()
            << " (" << data.size() / watch.elapsedSeconds() << " elem/sec.,"
            << " " << sum_strings_size / 1048576.0 / watch.elapsedSeconds() << " MiB/sec.)"
            << std::endl;

        rusage resource_usage;
        if (0 != getrusage(RUSAGE_SELF, &resource_usage))
            throwFromErrno("Cannot getrusage", ErrorCodes::SYSTEM_ERROR);

        size_t allocated_bytes = resource_usage.ru_maxrss * 1024;
        std::cerr << "Current memory usage: " << allocated_bytes << " bytes.\n";
    }

    ArenaWithFreeLists arena;
    std::vector<StringRef> refs;
    refs.reserve(data.size());

    {
        Stopwatch watch;

        for (const auto & s : data)
        {
            auto ptr = arena.alloc(s.size() + 1);
            memcpy(ptr, s.data(), s.size() + 1);
            refs.emplace_back(ptr, s.size() + 1);
        }

        watch.stop();
        std::cerr
            << "Insert info arena. Bytes: " << arena.size()
            << ", elapsed: " << watch.elapsedSeconds()
            << " (" << data.size() / watch.elapsedSeconds() << " elem/sec.,"
            << " " << sum_strings_size / 1048576.0 / watch.elapsedSeconds() << " MiB/sec.)"
            << std::endl;
    }

    //while (true)
    {
        Stopwatch watch;

        size_t bytes = 0;
        for (size_t i = 0, size = data.size(); i < size; ++i)
        {
            size_t index_from = lrand48() % size;
            size_t index_to = lrand48() % size;

            arena.free(const_cast<char *>(refs[index_to].data), refs[index_to].size);
            const auto & s = data[index_from];
            auto ptr = arena.alloc(s.size() + 1);
            memcpy(ptr, s.data(), s.size() + 1);
            bytes += s.size() + 1;

            refs[index_to] = {ptr, s.size() + 1};
        }

        watch.stop();
        std::cerr
            << "Randomly remove and insert elements. Bytes: " << arena.size()
            << ", elapsed: " << watch.elapsedSeconds()
            << " (" << data.size() / watch.elapsedSeconds() << " elem/sec.,"
            << " " << bytes / 1048576.0 / watch.elapsedSeconds() << " MiB/sec.)"
            << std::endl;
    }

    Dictionary dictionary;
    dictionary.string_arena = std::make_unique<ArenaWithFreeLists>();

    constexpr size_t cache_size = 1024;

    Dictionary::Attribute attr;
    attr.type = Dictionary::AttributeUnderlyingType::String;
    std::get<Dictionary::ContainerPtrType<StringRef>>(attr.arrays).reset(new StringRef[cache_size]{});

    while (true)
    {
        Stopwatch watch;

        size_t bytes = 0;
        for (size_t i = 0, size = data.size(); i < size; ++i)
        {
            size_t index_from = lrand48() % size;
            size_t index_to = lrand48() % cache_size;

            dictionary.setAttributeValue(attr, index_to, data[index_from]);

            bytes += data[index_from].size() + 1;
        }

        watch.stop();
        std::cerr
            << "Filling cache. Bytes: " << arena.size()
            << ", elapsed: " << watch.elapsedSeconds()
            << " (" << data.size() / watch.elapsedSeconds() << " elem/sec.,"
            << " " << bytes / 1048576.0 / watch.elapsedSeconds() << " MiB/sec.)"
            << std::endl;
    }
}
Пример #29
0
int mainImpl(int argc, char ** argv)
{
	const char * file_name = 0;
	int mode = MODE_NONE;
	size_t min_offset = 0;
	size_t max_offset = 0;
	size_t block_size = 0;
	size_t threads = 0;
	size_t count = 0;

	if (argc != 8)
	{
		std::cerr << "Usage: " << argv[0] << " file_name (r|w)[a][d][s] min_offset max_offset block_size threads count" << std::endl <<
		             "a - aligned, d - direct, s - sync" << std::endl;
		return 1;
	}

	file_name = argv[1];
	min_offset = Poco::NumberParser::parseUnsigned64(argv[3]);
	max_offset = Poco::NumberParser::parseUnsigned64(argv[4]);
	block_size = Poco::NumberParser::parseUnsigned64(argv[5]);
	threads = Poco::NumberParser::parseUnsigned(argv[6]);
	count = Poco::NumberParser::parseUnsigned(argv[7]);

	for (int i = 0; argv[2][i]; ++i)
	{
		char c = argv[2][i];
		switch(c)
		{
			case 'r':
				mode |= MODE_READ;
				break;
			case 'w':
				mode |= MODE_WRITE;
				break;
			case 'a':
				mode |= MODE_ALIGNED;
				break;
			case 'd':
				mode |= MODE_DIRECT;
				break;
			case 's':
				mode |= MODE_SYNC;
				break;
			default:
				throw Poco::Exception("Invalid mode");
		}
	}

	ThreadPool pool(threads);

	#ifndef __APPLE__
	int fd = open(file_name, ((mode & MODE_READ) ? O_RDONLY : O_WRONLY) | ((mode & MODE_DIRECT) ? O_DIRECT : 0) | ((mode & MODE_SYNC) ? O_SYNC : 0));
	#else
	int fd = open(file_name, ((mode & MODE_READ) ? O_RDONLY : O_WRONLY) | ((mode & MODE_SYNC) ? O_SYNC : 0));
	#endif
	if (-1 == fd)
		throwFromErrno("Cannot open file");
	#ifdef __APPLE__
	if (mode & MODE_DIRECT)
		if (fcntl(fd, F_NOCACHE, 1) == -1)
			throwFromErrno("Cannot open file");
	#endif
	Stopwatch watch;

	for (size_t i = 0; i < threads; ++i)
		pool.schedule(std::bind(thread, fd, mode, min_offset, max_offset, block_size, count));
	pool.wait();

	fsync(fd);

	watch.stop();

	if (0 != close(fd))
		throwFromErrno("Cannot close file");

	std::cout << std::fixed << std::setprecision(2)
		<< "Done " << count << " * " << threads << " ops";
	if (mode & MODE_ALIGNED)
		std::cout << " (aligned)";
	if (mode & MODE_DIRECT)
		std::cout << " (direct)";
	if (mode & MODE_SYNC)
		std::cout << " (sync)";
	std::cout << " in " << watch.elapsedSeconds() << " sec."
		<< ", " << count * threads / watch.elapsedSeconds() << " ops/sec."
		<< ", " << count * threads * block_size / watch.elapsedSeconds() / 1000000 << " MB/sec."
		<< std::endl;

    return 0;
}
Пример #30
0
void LinearHashTableTest::testPerformanceStr()
{
    const int N = 5000000;
    Stopwatch sw;

    std::vector<std::string> values;
    for (int i = 0; i < N; ++i)
    {
        values.push_back(NumberFormatter::format0(i, 8));
    }

    {
        LinearHashTable<std::string, Hash<std::string> > lht(N);
        sw.start();
        for (int i = 0; i < N; ++i)
        {
            lht.insert(values[i]);
        }
        sw.stop();
        std::cout << "Insert LHT: " << sw.elapsedSeconds() << std::endl;
        sw.reset();

        sw.start();
        for (int i = 0; i < N; ++i)
        {
            lht.find(values[i]);
        }
        sw.stop();
        std::cout << "Find LHT: " << sw.elapsedSeconds() << std::endl;
        sw.reset();
    }

    {
        HashTable<std::string, int> ht;

        sw.start();
        for (int i = 0; i < N; ++i)
        {
            ht.insert(values[i], i);
        }
        sw.stop();
        std::cout << "Insert HT: " << sw.elapsedSeconds() << std::endl;
        sw.reset();

        sw.start();
        for (int i = 0; i < N; ++i)
        {
            ht.exists(values[i]);
        }
        sw.stop();
        std::cout << "Find HT: " << sw.elapsedSeconds() << std::endl;
    }

    {
        std::set<std::string> s;
        sw.start();
        for (int i = 0; i < N; ++i)
        {
            s.insert(values[i]);
        }
        sw.stop();
        std::cout << "Insert set: " << sw.elapsedSeconds() << std::endl;
        sw.reset();

        sw.start();
        for (int i = 0; i < N; ++i)
        {
            s.find(values[i]);
        }
        sw.stop();
        std::cout << "Find set: " << sw.elapsedSeconds() << std::endl;
        sw.reset();
    }
}