void aio_testcase(uint64_t block_size, uint64_t total_size_mb, size_t concurrency, bool is_write, bool random_offset) { std::unique_ptr<char[]> buffer(new char[block_size]); std::atomic_uint remain_concurrency; remain_concurrency = concurrency; if (is_write && utils::filesystem::file_exists("temp")) { utils::filesystem::remove_path("temp"); dassert(!utils::filesystem::file_exists("temp"), ""); } auto file_handle = dsn_file_open("temp", O_CREAT | O_RDWR, 0666); auto total_size_bytes = total_size_mb * 1024 * 1024; auto tic = std::chrono::steady_clock::now(); for (int bytes_written = 0; bytes_written < total_size_bytes; ) { while (true) { if (remain_concurrency.fetch_sub(1, std::memory_order_acquire) <= 0) { remain_concurrency.fetch_add(1, std::memory_order_relaxed); } else { break; } } auto cb = [&](error_code ec, int sz) { dassert(ec == ERR_OK && uint64_t(sz) == block_size, "ec = %s, sz = %d, block_size = %" PRId64 "", ec.to_string(), sz, block_size ); remain_concurrency.fetch_add(1, std::memory_order_relaxed); }; auto offset = random_offset ? dsn_random64(0, total_size_bytes - block_size) : bytes_written; if (is_write) { file::write(file_handle, buffer.get(), block_size, offset, LPC_AIO_TEST, nullptr, cb); } else { file::read(file_handle, buffer.get(), block_size, offset, LPC_AIO_TEST, nullptr, cb); } bytes_written += block_size; } while (remain_concurrency != concurrency) { ; } dsn_file_flush(file_handle); auto toc = std::chrono::steady_clock::now(); auto c*k = dsn_file_close(file_handle); EXPECT_EQ(c*k, ERR_OK); std::cout << "is_write = " << is_write << " random_offset = " << random_offset << " block_size = " << block_size << " concurrency = " << concurrency << " throughput = " << double(total_size_mb) * 1000000 / std::chrono::duration_cast<std::chrono::microseconds>(toc - tic).count() << " mB/s" << std::endl; }
void meta_service::start(const char* data_dir, bool clean_state) { dassert(!_started, "meta service is already started"); _data_dir = data_dir; std::string checkpoint_path = _data_dir + "/checkpoint"; std::string oplog_path = _data_dir + "/oplog"; if (clean_state) { try { if (!dsn::utils::filesystem::remove_path(checkpoint_path)) { dassert(false, "Fail to remove file %s.", checkpoint_path.c_str()); } if (!dsn::utils::filesystem::remove_path(oplog_path)) { dassert(false, "Fail to remove file %s.", oplog_path.c_str()); } } catch (std::exception& ex) { ex; } } else { if (!dsn::utils::filesystem::create_directory(_data_dir)) { dassert(false, "Fail to create directory %s.", _data_dir.c_str()); } if (dsn::utils::filesystem::file_exists(checkpoint_path)) { _state->load(checkpoint_path.c_str()); } if (dsn::utils::filesystem::file_exists(oplog_path)) { replay_log(oplog_path.c_str()); _state->save(checkpoint_path.c_str()); if (!dsn::utils::filesystem::remove_path(oplog_path)) { dassert(false, "Fail to remove file %s.", oplog_path.c_str()); } } } _log = dsn_file_open((_data_dir + "/oplog").c_str(), O_RDWR | O_CREAT, 0666); _balancer = new load_balancer(_state); _failure_detector = new meta_server_failure_detector(_state, this); // TODO: use zookeeper for leader election _failure_detector->set_primary(primary_address()); // make sure the delay is larger than fd.grace to ensure // all machines are in the correct state (assuming connected initially) tasking::enqueue(LPC_LBM_START, this, &meta_service::on_load_balance_start, 0, _opts.fd_grace_seconds * 1000); auto err = _failure_detector->start( _opts.fd_check_interval_seconds, _opts.fd_beacon_interval_seconds, _opts.fd_lease_seconds, _opts.fd_grace_seconds, false ); dassert(err == ERR_OK, "FD start failed, err = %s", err.to_string()); register_rpc_handler( RPC_CM_QUERY_NODE_PARTITIONS, "RPC_CM_QUERY_NODE_PARTITIONS", &meta_service::on_query_configuration_by_node ); register_rpc_handler( RPC_CM_QUERY_PARTITION_CONFIG_BY_INDEX, "RPC_CM_QUERY_PARTITION_CONFIG_BY_INDEX", &meta_service::on_query_configuration_by_index ); register_rpc_handler( RPC_CM_UPDATE_PARTITION_CONFIGURATION, "RPC_CM_UPDATE_PARTITION_CONFIGURATION", &meta_service::on_update_configuration ); }
void aio_testcase(uint64_t block_size, size_t concurrency, bool is_write, bool shared) { std::unique_ptr<char[]> buffer(new char[block_size]); std::vector<dsn_handle_t> files; files.resize(concurrency); int flag; if (is_write) { flag = O_CREAT | O_RDWR; if (shared) { if (utils::filesystem::file_exists("temp")) utils::filesystem::remove_path("temp"); } else { for (int i = 0; i < concurrency; i++) { std::stringstream ss; ss << "temp." << i; auto file = ss.str(); if (utils::filesystem::file_exists(file)) utils::filesystem::remove_path(file); } } } else { flag = O_RDWR; } if (shared) { auto file_handle = dsn_file_open("temp", flag, 0666); EXPECT_TRUE(file_handle != nullptr); for (int i = 0; i < concurrency; i++) files[i] = file_handle; } else { for (int i = 0; i < concurrency; i++) { std::stringstream ss; ss << "temp." << i; auto file = ss.str(); auto file_handle = dsn_file_open(file.c_str(), flag, 0666); EXPECT_TRUE(file_handle != nullptr); files[i] = file_handle; } } std::atomic<uint64_t> io_count(0); std::atomic<uint64_t> cb_flying_count(0); volatile bool exit = false; std::function<void(int)> cb; std::vector<uint64_t> offsets; offsets.resize(concurrency); cb = [&](int index) { if (!exit) { auto ioc = io_count++; uint64_t offset; if (!shared) { offset = offsets[index]; offsets[index] += block_size; } else { offset = ioc * block_size; } cb_flying_count++; if (is_write) { file::write(files[index], buffer.get(), (int)block_size, offset, LPC_AIO_TEST, nullptr, [idx = index, &cb, &cb_flying_count](::dsn::error_code code, size_t sz) { if (ERR_OK == code) cb(idx); cb_flying_count--; }); } else { file::read(files[index], buffer.get(), (int)block_size, offset, LPC_AIO_TEST, nullptr, [idx = index, &cb, &cb_flying_count](::dsn::error_code code, size_t sz) { if (ERR_OK == code) cb(idx); cb_flying_count--; }); } } }; // start auto tic = std::chrono::steady_clock::now(); for (int i = 0; i < concurrency; i++) { offsets[i] = 0; cb(i); } // run for seconds std::this_thread::sleep_for(std::chrono::seconds(10)); auto ioc = io_count.load(); auto bytes = ioc * block_size; auto toc = std::chrono::steady_clock::now(); std::cout << "is_write = " << is_write << ", block_size = " << block_size << ", shared = " << shared << ", concurrency = " << concurrency << ", iops = " << (double)ioc / (double)std::chrono::duration_cast<std::chrono::microseconds>(toc - tic).count() * 1000000.0 << " #/s" << ", throughput = " << (double)bytes / std::chrono::duration_cast<std::chrono::microseconds>(toc - tic).count() << " mB/s" << ", avg_latency = " << (double)std::chrono::duration_cast<std::chrono::microseconds>(toc - tic).count() / (double)(ioc / concurrency) << " us" << std::endl; // safe exit exit = true; while (cb_flying_count.load() > 0) { std::this_thread::sleep_for(std::chrono::milliseconds(1)); } if (shared) { dsn_file_flush(files[0]); auto c*k = dsn_file_close(files[0]); EXPECT_EQ(c*k, ERR_OK); } else { for (auto& f : files) { dsn_file_flush(f); auto c*k = dsn_file_close(f); EXPECT_EQ(c*k, ERR_OK); } } }
error_code meta_state_service_simple::initialize() { _offset = 0; std::string log_path = "meta_state_service.log"; if (utils::filesystem::file_exists(log_path)) { if (FILE* fd = fopen(log_path.c_str(), "rb")) { for (;;) { log_header header; if (fread(&header, sizeof(log_header), 1, fd) != 1) { break; } if (header.magic != log_header::default_magic) { break; } std::shared_ptr<char> buffer(new char[header.size]); if (fread(buffer.get(), header.size, 1, fd) != 1) { break; } _offset += sizeof(header) + header.size; blob blob_wrapper(buffer, header.size); binary_reader reader(blob_wrapper); int op_type; unmarshall(reader, op_type); switch (static_cast<operation_type>(op_type)) { case operation_type::create_node: { std::string node; blob data; create_node_log::parse(reader, node, data); create_node_internal(node, data).end_tracking(); break; } case operation_type::delete_node: { std::string node; bool recursively_delete; delete_node_log::parse(reader, node, recursively_delete); delete_node_internal(node, recursively_delete).end_tracking(); break; } case operation_type::set_data: { std::string node; blob data; set_data_log::parse(reader, node, data); set_data_internal(node, data).end_tracking(); break; } default: //The log is complete but its content is modified by cosmic ray. This is unacceptable dassert(false, "meta state server log corrupted"); } } fclose(fd); } } _log = dsn_file_open(log_path.c_str(), O_RDWR | O_CREAT | O_BINARY, 0666); return ERR_OK; }
void nfs_service_impl::on_copy(const ::dsn::service::copy_request& request, ::dsn::rpc_replier< ::dsn::service::copy_response>& reply) { //dinfo(">>> on call RPC_COPY end, exec RPC_NFS_COPY"); std::string file_path = dsn::utils::filesystem::path_combine(request.source_dir, request.file_name); dsn_handle_t hfile; { zauto_lock l(_handles_map_lock); auto it = _handles_map.find(file_path); // find file handle cache first if (it == _handles_map.end()) // not found { hfile = dsn_file_open(file_path.c_str(), O_RDONLY | O_BINARY, 0); if (hfile) { file_handle_info_on_server* fh = new file_handle_info_on_server; fh->file_handle = hfile; fh->file_access_count = 1; fh->last_access_time = dsn_now_ms(); _handles_map.insert(std::pair<std::string, file_handle_info_on_server*>(file_path, fh)); } } else // found { hfile = it->second->file_handle; it->second->file_access_count++; it->second->last_access_time = dsn_now_ms(); } } dinfo("nfs: copy file %s [%" PRId64 ", %" PRId64 ")", file_path.c_str(), request.offset, request.offset + request.size ); if (hfile == 0) { derror("file open failed"); ::dsn::service::copy_response resp; resp.error = ERR_OBJECT_NOT_FOUND; reply(resp); return; } callback_para cp(reply); cp.bb = blob( std::shared_ptr<char>(new char[_opts.nfs_copy_block_bytes], std::default_delete<char[]>{}), _opts.nfs_copy_block_bytes); cp.dst_dir = std::move(request.dst_dir); cp.file_path = std::move(file_path); cp.hfile = hfile; cp.offset = request.offset; cp.size = request.size; auto buffer_save = cp.bb.buffer().get(); file::read( hfile, buffer_save, request.size, request.offset, LPC_NFS_READ, this, [this, cp_cap = std::move(cp)] (error_code err, int sz) { internal_read_callback(err, sz, std::move(cp_cap)); } ); }
void nfs_client_impl::continue_write() { // check write quota if (++_concurrent_local_write_count > _opts.max_concurrent_local_writes) { --_concurrent_local_write_count; return; } // get write dsn::ref_ptr<copy_request_ex> reqc; while (true) { { zauto_lock l(_local_writes_lock); if (!_local_writes.empty()) { reqc = _local_writes.front(); _local_writes.pop(); } else { reqc = nullptr; break; } } { zauto_lock l(reqc->lock); if (reqc->is_valid) break; } } if (nullptr == reqc) { --_concurrent_local_write_count; return; } // real write std::string file_path = dsn::utils::filesystem::path_combine(reqc->copy_req.dst_dir, reqc->file_ctx->file_name); std::string path = dsn::utils::filesystem::remove_file_name(file_path.c_str()); if (!dsn::utils::filesystem::create_directory(path)) { dassert(false, "Fail to create directory %s.", path.c_str()); } dsn_handle_t hfile = reqc->file_ctx->file.load(); if (!hfile) { zauto_lock l(reqc->file_ctx->user_req->user_req_lock); hfile = reqc->file_ctx->file.load(); if (!hfile) { hfile = dsn_file_open(file_path.c_str(), O_RDWR | O_CREAT | O_BINARY, 0666); reqc->file_ctx->file = hfile; } } if (!hfile) { derror("file open %s failed", file_path.c_str()); error_code err = ERR_FILE_OPERATION_FAILED; handle_completion(reqc->file_ctx->user_req, err); --_concurrent_local_write_count; continue_write(); return; } { zauto_lock l(reqc->lock); auto& reqc_save = *reqc.get(); reqc_save.local_write_task = file::write( hfile, reqc_save.response.file_content.data(), reqc_save.response.size, reqc_save.response.offset, LPC_NFS_WRITE, this, [this, reqc_cap = std::move(reqc)] (error_code err, int sz) { local_write_callback(err, sz, std::move(reqc_cap)); } ); } }
void meta_service::start(const char* data_dir, bool clean_state) { dassert(!_started, "meta service is already started"); _data_dir = data_dir; std::string checkpoint_path = _data_dir + "/checkpoint"; std::string oplog_path = _data_dir + "/oplog"; if (clean_state) { try { if (!dsn::utils::filesystem::remove_path(checkpoint_path)) { dassert(false, "Fail to remove file %s.", checkpoint_path.c_str()); } if (!dsn::utils::filesystem::remove_path(oplog_path)) { dassert(false, "Fail to remove file %s.", oplog_path.c_str()); } } catch (std::exception& ex) { ex; } } else { if (!dsn::utils::filesystem::create_directory(_data_dir)) { dassert(false, "Fail to create directory %s.", _data_dir.c_str()); } if (dsn::utils::filesystem::file_exists(checkpoint_path)) { _state->load(checkpoint_path.c_str()); } if (dsn::utils::filesystem::file_exists(oplog_path)) { replay_log(oplog_path.c_str()); _state->save(checkpoint_path.c_str()); if (!dsn::utils::filesystem::remove_path(oplog_path)) { dassert(false, "Fail to remove file %s.", oplog_path.c_str()); } } } _log = dsn_file_open((_data_dir + "/oplog").c_str(), O_RDWR | O_CREAT, 0666); _balancer = new load_balancer(_state); _failure_detector = new meta_server_failure_detector(_state, this); ::dsn::rpc_address primary; if (_state->get_meta_server_primary(primary) && primary == primary_address()) { _failure_detector->set_primary(true); } else _failure_detector->set_primary(false); register_rpc_handler(RPC_CM_CALL, "RPC_CM_CALL", &meta_service::on_request); // make sure the delay is larger than fd.grace to ensure // all machines are in the correct state (assuming connected initially) tasking::enqueue(LPC_LBM_START, this, &meta_service::on_load_balance_start, 0, _opts.fd_grace_seconds * 1000); auto err = _failure_detector->start( _opts.fd_check_interval_seconds, _opts.fd_beacon_interval_seconds, _opts.fd_lease_seconds, _opts.fd_grace_seconds, false ); dassert(err == ERR_OK, "FD start failed, err = %s", err.to_string()); }