Esempio n. 1
0
ReadBufferFromFile::ReadBufferFromFile(
	const std::string & file_name_,
	size_t buf_size,
	int flags,
	char * existing_memory,
	size_t alignment)
	: ReadBufferFromFileDescriptor(-1, buf_size, existing_memory, alignment), file_name(file_name_)
{
	ProfileEvents::increment(ProfileEvents::FileOpen);

#ifdef __APPLE__
	bool o_direct = (flags != -1) && (flags & O_DIRECT);
	if (o_direct) {
		flags = flags & ~O_DIRECT;
	}
#endif
	fd = open(file_name.c_str(), flags == -1 ? O_RDONLY : flags);

	if (-1 == fd)
	{
		ProfileEvents::increment(ProfileEvents::FileOpenFailed);
		throwFromErrno("Cannot open file " + file_name, errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE);
	}
#ifdef __APPLE__
	if (o_direct)
	{
		if (fcntl(fd, F_NOCACHE, 1) == -1)
		{
			ProfileEvents::increment(ProfileEvents::FileOpenFailed);
			throwFromErrno("Cannot set F_NOCACHE on file " + file_name, ErrorCodes::CANNOT_OPEN_FILE);
		}
	}
#endif
}
Esempio n. 2
0
StatusFile::StatusFile(const std::string & path_)
	: path(path_)
{
	/// Если файл уже существует. NOTE Незначительный race condition.
	if (Poco::File(path).exists())
	{
		std::string contents;
		{
			ReadBufferFromFile in(path, 1024);
			LimitReadBuffer limit_in(in, 1024);
			WriteBufferFromString out(contents);
			copyData(limit_in, out);
		}

		if (!contents.empty())
			LOG_INFO(&Logger::get("StatusFile"), "Status file " << path << " already exists - unclean restart. Contents:\n" << contents);
		else
			LOG_INFO(&Logger::get("StatusFile"), "Status file " << path << " already exists and is empty - probably unclean hardware restart.");
	}

	fd = open(path.c_str(), O_WRONLY | O_CREAT, 0666);

	if (-1 == fd)
		throwFromErrno("Cannot open file " + path);

	try
	{
		int flock_ret = flock(fd, LOCK_EX | LOCK_NB);
		if (-1 == flock_ret)
		{
			if (errno == EWOULDBLOCK)
				throw Exception("Cannot lock file " + path + ". Another server instance in same directory is already running.");
			else
				throwFromErrno("Cannot lock file " + path);
		}

		if (0 != ftruncate(fd, 0))
			throwFromErrno("Cannot ftruncate " + path);

		if (0 != lseek(fd, 0, SEEK_SET))
			throwFromErrno("Cannot lseek " + path);

		/// Записываем в файл информацию о текущем экземпляре сервера.
		{
			WriteBufferFromFileDescriptor out(fd, 1024);
			out
				<< "PID: " << getpid() << "\n"
				<< "Started at: " << LocalDateTime(time(0)) << "\n"
				<< "Revision: " << ClickHouseRevision::get() << "\n";
		}
	}
	catch (...)
	{
		close(fd);
		throw;
	}
}
BlockIO InterpreterSystemQuery::execute()
{
    auto & query = typeid_cast<ASTSystemQuery &>(*query_ptr);

    using Type = ASTSystemQuery::Type;

    switch (query.type)
    {
        case Type::SHUTDOWN:
            if (kill(0, SIGTERM))
                throwFromErrno("System call kill(0, SIGTERM) failed", ErrorCodes::CANNOT_KILL);
            break;
        case Type::KILL:
            if (kill(0, SIGKILL))
                throwFromErrno("System call kill(0, SIGKILL) failed", ErrorCodes::CANNOT_KILL);
            break;
        case Type::DROP_DNS_CACHE:
            DNSCache::instance().drop();
            /// Reinitialize clusters to update their resolved_addresses
            context.reloadClusterConfig();
            break;
        case Type::DROP_MARK_CACHE:
            context.dropMarkCache();
            break;
        case Type::DROP_UNCOMPRESSED_CACHE:
            context.dropUncompressedCache();
            break;
        case Type::RELOAD_DICTIONARY:
            context.getExternalDictionaries().reloadDictionary(query.target_dictionary);
            break;
        case Type::RELOAD_DICTIONARIES:
        {
            auto status = getOverallExecutionStatusOfCommands(
                    [&] { context.getExternalDictionaries().reload(); },
                    [&] { context.getEmbeddedDictionaries().reload(); }
            );
            if (status.code != 0)
                throw Exception(status.message, status.code);
            break;
        }
        case Type::STOP_LISTEN_QUERIES:
        case Type::START_LISTEN_QUERIES:
        case Type::RESTART_REPLICAS:
        case Type::SYNC_REPLICA:
        case Type::STOP_MERGES:
        case Type::START_MERGES:
        case Type::STOP_REPLICATION_QUEUES:
        case Type::START_REPLICATION_QUEUES:
            throw Exception(String(ASTSystemQuery::typeToString(query.type)) + " is not supported yet", ErrorCodes::NOT_IMPLEMENTED);
        default:
            throw Exception("Unknown type of SYSTEM query", ErrorCodes::BAD_ARGUMENTS);
    }

    return BlockIO();
}
Esempio n. 4
0
std::unique_ptr<ShellCommand> ShellCommand::executeImpl(const char * filename, char * const argv[])
{
	/** Тут написано, что при обычном вызове vfork, есть шанс deadlock-а в многопоточных программах,
	  *  из-за резолвинга символов в shared-библиотеке:
	  * http://www.oracle.com/technetwork/server-storage/solaris10/subprocess-136439.html
	  * Поэтому, отделим резолвинг символа от вызова.
	  */
	static void * real_vfork = dlsym(RTLD_DEFAULT, "vfork");

	if (!real_vfork)
		throwFromErrno("Cannot find symbol vfork in myself", ErrorCodes::CANNOT_DLSYM);

	Pipe pipe_stdin;
	Pipe pipe_stdout;
	Pipe pipe_stderr;

	pid_t pid = reinterpret_cast<pid_t(*)()>(real_vfork)();

	if (-1 == pid)
		throwFromErrno("Cannot vfork", ErrorCodes::CANNOT_FORK);

	if (0 == pid)
	{
		/// Находимся в свежесозданном процессе.

		/// Почему _exit а не exit? Потому что exit вызывает atexit и деструкторы thread local storage.
		/// А там куча мусора (в том числе, например, блокируется mutex). А это нельзя делать после vfork - происходит deadlock.

		/// Заменяем файловые дескрипторы на концы наших пайпов.
		if (STDIN_FILENO != dup2(pipe_stdin.read_fd, STDIN_FILENO))
			_exit(int(ReturnCodes::CANNOT_DUP_STDIN));

		if (STDOUT_FILENO != dup2(pipe_stdout.write_fd, STDOUT_FILENO))
			_exit(int(ReturnCodes::CANNOT_DUP_STDOUT));

		if (STDERR_FILENO != dup2(pipe_stderr.write_fd, STDERR_FILENO))
			_exit(int(ReturnCodes::CANNOT_DUP_STDERR));

		execv(filename, argv);
		/// Если процесс запущен, то execv не возвращает сюда.

		_exit(int(ReturnCodes::CANNOT_EXEC));
	}

	std::unique_ptr<ShellCommand> res(new ShellCommand(pid, pipe_stdin.write_fd, pipe_stdout.read_fd, pipe_stderr.read_fd));

	/// Теперь владение файловыми дескрипторами передано в результат.
	pipe_stdin.write_fd = -1;
	pipe_stdout.read_fd = -1;
	pipe_stderr.read_fd = -1;

	return res;
}
/// Используется для проверки, выставили ли ноду is_active мы, или нет.
static String generateActiveNodeIdentifier()
{
	struct timespec times;
	if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &times))
		throwFromErrno("Cannot clock_gettime.", ErrorCodes::CANNOT_CLOCK_GETTIME);
	return "pid: " + toString(getpid()) + ", random: " + toString(times.tv_nsec + times.tv_sec + getpid());
}
Esempio n. 6
0
StorageStripeLog::StorageStripeLog(
    const std::string & path_,
    const std::string & name_,
    const NamesAndTypesList & columns_,
    const NamesAndTypesList & materialized_columns_,
    const NamesAndTypesList & alias_columns_,
    const ColumnDefaults & column_defaults_,
    bool attach,
    size_t max_compress_block_size_)
    : IStorage{materialized_columns_, alias_columns_, column_defaults_},
    path(path_), name(name_), columns(columns_),
    max_compress_block_size(max_compress_block_size_),
    file_checker(path + escapeForFileName(name) + '/' + "sizes.json"),
    log(&Logger::get("StorageStripeLog"))
{
    if (columns.empty())
        throw Exception("Empty list of columns passed to StorageStripeLog constructor", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED);

    String full_path = path + escapeForFileName(name) + '/';
    if (!attach)
    {
        /// create files if they do not exist
        if (0 != mkdir(full_path.c_str(), S_IRWXU | S_IRWXG | S_IRWXO) && errno != EEXIST)
            throwFromErrno("Cannot create directory " + full_path, ErrorCodes::CANNOT_CREATE_DIRECTORY);
    }
}
Esempio n. 7
0
    void thread(ConnectionPool::Entry connection)
    {
        Query query;

        try
        {
            /// In these threads we do not accept INT signal.
            sigset_t sig_set;
            if (sigemptyset(&sig_set)
                || sigaddset(&sig_set, SIGINT)
                || pthread_sigmask(SIG_BLOCK, &sig_set, nullptr))
                throwFromErrno("Cannot block signal.", ErrorCodes::CANNOT_BLOCK_SIGNAL);

            while (true)
            {
                bool extracted = false;

                while (!extracted)
                {
                    extracted = queue.tryPop(query, 100);

                    if (shutdown)
                        return;
                }

                execute(connection, query);
            }
        }
        catch (...)
        {
            shutdown = true;
            std::cerr << "An error occurred while processing query:\n" << query << "\n";
            throw;
        }
    }
Esempio n. 8
0
StorageTinyLog::StorageTinyLog(
    const std::string & path_,
    const std::string & name_,
    const ColumnsDescription & columns_,
    bool attach,
    size_t max_compress_block_size_)
    : IStorage{columns_},
    path(path_), name(name_),
    max_compress_block_size(max_compress_block_size_),
    file_checker(path + escapeForFileName(name) + '/' + "sizes.json"),
    log(&Logger::get("StorageTinyLog"))
{
    if (path.empty())
        throw Exception("Storage " + getName() + " requires data path", ErrorCodes::INCORRECT_FILE_NAME);

    String full_path = path + escapeForFileName(name) + '/';
    if (!attach)
    {
        /// create files if they do not exist
        if (0 != mkdir(full_path.c_str(), S_IRWXU | S_IRWXG | S_IRWXO) && errno != EEXIST)
            throwFromErrno("Cannot create directory " + full_path, ErrorCodes::CANNOT_CREATE_DIRECTORY);
    }

    for (const auto & col : getColumns().getAllPhysical())
        addFiles(col.name, *col.type);
}
Esempio n. 9
0
    StorageFileBlockInputStream(StorageFile & storage_, const Context & context, size_t max_block_size)
        : storage(storage_)
    {
        if (storage.use_table_fd)
        {
            storage.rwlock.lock();

            /// We could use common ReadBuffer and WriteBuffer in storage to leverage cache
            ///  and add ability to seek unseekable files, but cache sync isn't supported.

            if (storage.table_fd_was_used) /// We need seek to initial position
            {
                if (storage.table_fd_init_offset < 0)
                    throw Exception("File descriptor isn't seekable, inside " + storage.getName(), ErrorCodes::CANNOT_SEEK_THROUGH_FILE);

                /// ReadBuffer's seek() doesn't make sence, since cache is empty
                if (lseek(storage.table_fd, storage.table_fd_init_offset, SEEK_SET) < 0)
                    throwFromErrno("Cannot seek file descriptor, inside " + storage.getName(), ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
            }

            storage.table_fd_was_used = true;
            read_buf = std::make_unique<ReadBufferFromFileDescriptor>(storage.table_fd);
        }
        else
        {
            storage.rwlock.lock_shared();

            read_buf = std::make_unique<ReadBufferFromFile>(storage.path);
        }

        reader = FormatFactory().getInput(storage.format_name, *read_buf, storage.getSampleBlock(), context, max_block_size);
    }
 void sync()
 {
     int result = hdfsSync(fs.get(), fout);
     if (result < 0)
         throwFromErrno("Cannot HDFS sync" + hdfs_uri.toString() + " " + std::string(hdfsGetLastError()),
             ErrorCodes::CANNOT_FSYNC);
 }
Esempio n. 11
0
		IConv(const CharsetsFromTo & charsets)
		{
			impl = iconv_open(charsets.second.data(), charsets.first.data());
			if (impl == reinterpret_cast<iconv_t>(-1))
				throwFromErrno("Cannot iconv_open with charsets " + charsets.first + " and " + charsets.second,
					ErrorCodes::BAD_ARGUMENTS);
		}
void DistributedBlockOutputStream::writeToShard(const Block & block, const std::vector<std::string> & dir_names)
{
    /** tmp directory is used to ensure atomicity of transactions
      *  and keep monitor thread out from reading incomplete data
      */
    std::string first_file_tmp_path{};

    auto first = true;
    const auto & query_string = queryToString(query_ast);

    /// write first file, hardlink the others
    for (const auto & dir_name : dir_names)
    {
        const auto & path = storage.getPath() + dir_name + '/';

        /// ensure shard subdirectory creation and notify storage
        if (Poco::File(path).createDirectory())
            storage.requireDirectoryMonitor(dir_name);

        const auto & file_name = toString(storage.file_names_increment.get()) + ".bin";
        const auto & block_file_path = path + file_name;

        /** on first iteration write block to a temporary directory for subsequent hardlinking to ensure
            *  the inode is not freed until we're done */
        if (first)
        {
            first = false;

            const auto & tmp_path = path + "tmp/";
            Poco::File(tmp_path).createDirectory();
            const auto & block_file_tmp_path = tmp_path + file_name;

            first_file_tmp_path = block_file_tmp_path;

            WriteBufferFromFile out{block_file_tmp_path};
            CompressedWriteBuffer compress{out};
            NativeBlockOutputStream stream{compress, ClickHouseRevision::get()};

            writeStringBinary(query_string, out);

            stream.writePrefix();
            stream.write(block);
            stream.writeSuffix();
        }

        if (link(first_file_tmp_path.data(), block_file_path.data()))
            throwFromErrno("Could not link " + block_file_path + " to " + first_file_tmp_path);
    }

    /** remove the temporary file, enabling the OS to reclaim inode after all threads
        *  have removed their corresponding files */
    Poco::File(first_file_tmp_path).remove();
}
Esempio n. 13
0
void createHardLink(const String & source_path, const String & destination_path)
{
    if (0 != link(source_path.c_str(), destination_path.c_str()))
    {
        if (errno == EEXIST)
        {
            auto link_errno = errno;

            struct stat source_descr;
            struct stat destination_descr;

            if (0 != lstat(source_path.c_str(), &source_descr))
                throwFromErrno("Cannot stat " + source_path, ErrorCodes::CANNOT_STAT);

            if (0 != lstat(destination_path.c_str(), &destination_descr))
                throwFromErrno("Cannot stat " + destination_path, ErrorCodes::CANNOT_STAT);

            if (source_descr.st_ino != destination_descr.st_ino)
                throwFromErrno("Destination file " + destination_path + " is already exist and have different inode.", ErrorCodes::CANNOT_LINK, link_errno);
        }
        else
            throwFromErrno("Cannot link " + source_path + " to " + destination_path, ErrorCodes::CANNOT_LINK);
    }
}
Esempio n. 14
0
int ShellCommand::tryWait()
{
	int status = 0;
	if (-1 == waitpid(pid, &status, 0))
		throwFromErrno("Cannot waitpid", ErrorCodes::CANNOT_WAITPID);

	if (WIFEXITED(status))
		return WEXITSTATUS(status);

	if (WIFSIGNALED(status))
		throw Exception("Child process was terminated by signal " + toString(WTERMSIG(status)), ErrorCodes::CHILD_WAS_NOT_EXITED_NORMALLY);

	if (WIFSTOPPED(status))
		throw Exception("Child process was stopped by signal " + toString(WSTOPSIG(status)), ErrorCodes::CHILD_WAS_NOT_EXITED_NORMALLY);

	throw Exception("Child process was not exited normally by unknown reason", ErrorCodes::CHILD_WAS_NOT_EXITED_NORMALLY);
}
Esempio n. 15
0
/// Note: an additional page is allocated that will contain the data that
/// does not fit into the main buffer.
ReadBufferAIO::ReadBufferAIO(const std::string & filename_, size_t buffer_size_, int flags_, char * existing_memory_)
    : ReadBufferFromFileBase(buffer_size_ + DEFAULT_AIO_FILE_BLOCK_SIZE, existing_memory_, DEFAULT_AIO_FILE_BLOCK_SIZE),
      fill_buffer(BufferWithOwnMemory<ReadBuffer>(internalBuffer().size(), nullptr, DEFAULT_AIO_FILE_BLOCK_SIZE)),
      filename(filename_)
{
    ProfileEvents::increment(ProfileEvents::FileOpen);

    int open_flags = (flags_ == -1) ? O_RDONLY : flags_;
    open_flags |= O_DIRECT;

    fd = ::open(filename.c_str(), open_flags);
    if (fd == -1)
    {
        auto error_code = (errno == ENOENT) ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE;
        throwFromErrno("Cannot open file " + filename, error_code);
    }
}
Esempio n. 16
0
static std::string getUserName(uid_t user_id)
{
    /// Try to convert user id into user name.
    auto buffer_size = sysconf(_SC_GETPW_R_SIZE_MAX);
    if (buffer_size <= 0)
        buffer_size = 1024;
    std::string buffer;
    buffer.reserve(buffer_size);

    struct passwd passwd_entry;
    struct passwd * result = nullptr;
    const auto error = getpwuid_r(user_id, &passwd_entry, buffer.data(), buffer_size, &result);

    if (error)
        throwFromErrno("Failed to find user name for " + toString(user_id), ErrorCodes::FAILED_TO_GETPWUID, error);
    else if (result)
        return result->pw_name;
    return toString(user_id);
}
Esempio n. 17
0
	void convert(const String & from_charset, const String & to_charset,
		const ColumnString::Chars_t & from_chars, const ColumnString::Offsets_t & from_offsets,
		ColumnString::Chars_t & to_chars, ColumnString::Offsets_t & to_offsets)
	{
		auto converter = getConverter(CharsetsFromTo(from_charset, to_charset));
		iconv_t iconv_state = converter->impl;

		to_chars.resize(from_chars.size());
		to_offsets.resize(from_offsets.size());

		ColumnString::Offset_t current_from_offset = 0;
		ColumnString::Offset_t current_to_offset = 0;

		size_t size = from_offsets.size();

		for (size_t i = 0; i < size; ++i)
		{
			size_t from_string_size = from_offsets[i] - current_from_offset - 1;

			/// We assume that empty string is empty in every charset.
			if (0 != from_string_size)
			{
				/// reset state of iconv
				size_t res = iconv(iconv_state, nullptr, nullptr, nullptr, nullptr);
				if (static_cast<size_t>(-1) == res)
					throwFromErrno("Cannot reset iconv", ErrorCodes::CANNOT_ICONV);

				/// perform conversion; resize output buffer and continue if required

				char * in_buf = const_cast<char *>(reinterpret_cast<const char *>(&from_chars[current_from_offset]));
				size_t in_bytes_left = from_string_size;

				char * out_buf = reinterpret_cast<char *>(&to_chars[current_to_offset]);
				size_t out_bytes_left = to_chars.size() - current_to_offset;

				while (in_bytes_left)
				{
					size_t res = iconv(iconv_state, &in_buf, &in_bytes_left, &out_buf, &out_bytes_left);
					current_to_offset = to_chars.size() - out_bytes_left;

					if (static_cast<size_t>(-1) == res)
					{
						if (E2BIG == errno)
						{
							to_chars.resize(to_chars.size() * 2);
							out_buf = reinterpret_cast<char *>(&to_chars[current_to_offset]);
							out_bytes_left = to_chars.size() - current_to_offset;
							continue;
						}

						throwFromErrno("Cannot convert charset", ErrorCodes::CANNOT_ICONV);
					}
				}
			}

			if (to_chars.size() < current_to_offset + 1)
				to_chars.resize(current_to_offset + 1);

			to_chars[current_to_offset] = 0;

			++current_to_offset;
			to_offsets[i] = current_to_offset;

			current_from_offset = from_offsets[i];
		}

		to_chars.resize(current_to_offset);
	}
BlockIO InterpreterSystemQuery::execute()
{
    auto & query = typeid_cast<ASTSystemQuery &>(*query_ptr);

    using Type = ASTSystemQuery::Type;

    /// Use global context with fresh system profile settings
    Context system_context = context.getGlobalContext();
    system_context.setSetting("profile", context.getSystemProfileName());

    /// Make canonical query for simpler processing
    if (!query.target_table.empty() && query.target_database.empty())
         query.target_database = context.getCurrentDatabase();

    switch (query.type)
    {
        case Type::SHUTDOWN:
            if (kill(0, SIGTERM))
                throwFromErrno("System call kill(0, SIGTERM) failed", ErrorCodes::CANNOT_KILL);
            break;
        case Type::KILL:
            if (kill(0, SIGKILL))
                throwFromErrno("System call kill(0, SIGKILL) failed", ErrorCodes::CANNOT_KILL);
            break;
        case Type::DROP_DNS_CACHE:
            DNSResolver::instance().dropCache();
            /// Reinitialize clusters to update their resolved_addresses
            system_context.reloadClusterConfig();
            break;
        case Type::DROP_MARK_CACHE:
            system_context.dropMarkCache();
            break;
        case Type::DROP_UNCOMPRESSED_CACHE:
            system_context.dropUncompressedCache();
            break;
        case Type::RELOAD_DICTIONARY:
            system_context.getExternalDictionaries().reloadDictionary(query.target_dictionary);
            break;
        case Type::RELOAD_DICTIONARIES:
        {
            auto status = getOverallExecutionStatusOfCommands(
                    [&] { system_context.getExternalDictionaries().reload(); },
                    [&] { system_context.getEmbeddedDictionaries().reload(); }
            );
            if (status.code != 0)
                throw Exception(status.message, status.code);
            break;
        }
        case Type::RELOAD_EMBEDDED_DICTIONARIES:
            system_context.getEmbeddedDictionaries().reload();
            break;
        case Type::RELOAD_CONFIG:
            system_context.reloadConfig();
            break;
        case Type::STOP_MERGES:
            startStopAction(context, query, ActionLocks::PartsMerge, false);
            break;
        case Type::START_MERGES:
            startStopAction(context, query, ActionLocks::PartsMerge, true);
            break;
        case Type::STOP_FETCHES:
            startStopAction(context, query, ActionLocks::PartsFetch, false);
            break;
        case Type::START_FETCHES:
            startStopAction(context, query, ActionLocks::PartsFetch, true);
            break;
        case Type::STOP_REPLICATED_SENDS:
            startStopAction(context, query, ActionLocks::PartsSend, false);
            break;
        case Type::START_REPLICATEDS_SENDS:
            startStopAction(context, query, ActionLocks::PartsSend, false);
            break;
        case Type::STOP_REPLICATION_QUEUES:
            startStopAction(context, query, ActionLocks::ReplicationQueue, false);
            break;
        case Type::START_REPLICATION_QUEUES:
            startStopAction(context, query, ActionLocks::ReplicationQueue, true);
            break;
        case Type::SYNC_REPLICA:
            syncReplica(query);
            break;
        case Type::RESTART_REPLICAS:
            restartReplicas(system_context);
            break;
        case Type::RESTART_REPLICA:
            if (!tryRestartReplica(query.target_database, query.target_table, system_context))
                throw Exception("There is no " + query.target_database + "." + query.target_table + " replicated table",
                                ErrorCodes::BAD_ARGUMENTS);
            break;
        case Type::STOP_LISTEN_QUERIES:
        case Type::START_LISTEN_QUERIES:
            throw Exception(String(ASTSystemQuery::typeToString(query.type)) + " is not supported yet", ErrorCodes::NOT_IMPLEMENTED);
        default:
            throw Exception("Unknown type of SYSTEM query", ErrorCodes::BAD_ARGUMENTS);
    }

    return BlockIO();
}