ErrorStack ImpersonateSession::get_result() { wait(); if (is_valid()) { ThreadControlBlock* block = thread_->get_control_block(); if (block->current_ticket_ != ticket_ || block->status_ != kWaitingForClientRelease) { return ERROR_STACK(kErrorCodeSessionExpired); } return block->proc_result_.to_error_stack(); } else { return ERROR_STACK(kErrorCodeSessionExpired); } }
ErrorStack SharedMemoryRepo::attach_shared_memories( uint64_t master_upid, Eid master_eid, SocId my_soc_id, EngineOptions* options) { deallocate_shared_memories(); std::string base = get_master_path(master_upid, master_eid); std::string global_memory_path = base + std::string("_global"); global_memory_.attach(global_memory_path); if (global_memory_.is_null()) { deallocate_shared_memories(); return ERROR_STACK(kErrorCodeSocShmAttachFailed); } // read the options from global_memory uint64_t xml_size = 0; std::memcpy(&xml_size, global_memory_.get_block(), sizeof(xml_size)); ASSERT_ND(xml_size > 0); std::string xml(global_memory_.get_block() + sizeof(xml_size), xml_size); CHECK_ERROR(options->load_from_string(xml)); my_soc_id_ = my_soc_id; init_empty(*options); set_global_memory_anchors(xml_size, *options, false); bool failed = false; for (uint16_t node = 0; node < soc_count_; ++node) { std::string node_memory_str = base + std::string("_node_") + std::to_string(node); node_memories_[node].attach(node_memory_str); std::string vpool_str = base + std::string("_vpool_") + std::to_string(node); volatile_pools_[node].attach(vpool_str); if (node_memories_[node].is_null() || volatile_pools_[node].is_null()) { failed = true; } else { set_node_memory_anchors(node, *options, false); } } if (failed) { if (!node_memories_[my_soc_id].is_null()) { // then we can at least notify the error via the shared memory change_child_status(my_soc_id, ChildEngineStatus::kFatalError); } deallocate_shared_memories(); return ERROR_STACK(kErrorCodeSocShmAttachFailed); } return kRetOk; }
ErrorStack EnginePimpl::check_minimal_pool_size() const { // Can we at least start up? const thread::ThreadOptions& t = options_.thread_; const memory::MemoryOptions& m = options_.memory_; uint64_t total_threads = t.group_count_ * t.thread_count_per_group_; uint64_t minimal_page_pool = total_threads * m.private_page_pool_initial_grab_ * storage::kPageSize; if ((static_cast<uint64_t>(m.page_pool_size_mb_per_node_) * t.group_count_ << 20) < minimal_page_pool) { return ERROR_STACK(kErrorCodeMemoryPagePoolTooSmall); } return kRetOk; }
ErrorStack SavepointManagerPimpl::initialize_once() { control_block_ = engine_->get_soc_manager()->get_shared_memory_repo()-> get_global_memory_anchors()->savepoint_manager_memory_; if (engine_->is_master()) { // Savepoint takes place only in master control_block_->initialize(); savepoint_ = Savepoint(); savepoint_path_ = fs::Path(engine_->get_options().savepoint_.savepoint_path_.str()); LOG(INFO) << "Initializing SavepointManager.. path=" << savepoint_path_; auto logger_count = engine_->get_options().log_.loggers_per_node_ * engine_->get_options().thread_.group_count_; if (fs::exists(savepoint_path_)) { LOG(INFO) << "Existing savepoint file found. Loading.."; CHECK_ERROR(savepoint_.load_from_file(savepoint_path_)); if (!savepoint_.consistent(logger_count)) { return ERROR_STACK(kErrorCodeSpInconsistentSavepoint); } } else { LOG(INFO) << "Savepoint file does not exist. No savepoint taken so far."; // Create an empty savepoint file now. This makes sure the directory entry for the file // exists. savepoint_.populate_empty(logger_count); CHECK_ERROR(savepoint_.save_to_file(savepoint_path_)); } update_shared_savepoint(savepoint_); control_block_->initial_current_epoch_ = savepoint_.current_epoch_; control_block_->initial_durable_epoch_ = savepoint_.durable_epoch_; control_block_->saved_durable_epoch_ = savepoint_.durable_epoch_; control_block_->requested_durable_epoch_ = savepoint_.durable_epoch_; savepoint_thread_stop_requested_ = false; assorted::memory_fence_release(); savepoint_thread_ = std::move(std::thread(&SavepointManagerPimpl::savepoint_main, this)); control_block_->master_initialized_ = true; } else { // other engines wait for the master engine until it finishes the initialization of // relevant fields. Some of the following modules depend on these values. uint32_t sleep_cont = 0; while (control_block_->master_initialized_ == false) { std::this_thread::sleep_for(std::chrono::milliseconds(10)); if (++sleep_cont > 1000ULL) { return ERROR_STACK_MSG(kErrorCodeTimeout, "Master engine couldn't load savepoint??"); } } LOG(INFO) << "Okay, master-engine has finished loading initial savepoint."; } return kRetOk; }
ErrorStack EngineMemory::initialize_once() { LOG(INFO) << "Initializing EngineMemory.."; if (!engine_->get_debug()->is_initialized()) { return ERROR_STACK(kErrorCodeDepedentModuleUnavailableInit); } else if (::numa_available() < 0) { LOG(WARNING) << "WARNING, this machine is not a NUMA machine. FOEDUS still works fine," << " but it is mainly designed for large servers with many sockets and cores"; // Even if the kernel is built without NUMA (eg ARMv8), we keep running. // return ERROR_STACK(kErrorCodeMemoryNumaUnavailable); } ASSERT_ND(node_memories_.empty()); const EngineOptions& options = engine_->get_options(); const thread::ThreadGroupId numa_nodes = options.thread_.group_count_; GlobalVolatilePageResolver::Base bases[256]; uint64_t pool_begin = 0, pool_end = 0; for (thread::ThreadGroupId node = 0; node < numa_nodes; ++node) { NumaNodeMemoryRef* ref = new NumaNodeMemoryRef(engine_, node); node_memories_.push_back(ref); bases[node] = ref->get_volatile_pool()->get_base(); pool_begin = ref->get_volatile_pool()->get_resolver().begin_; pool_end = ref->get_volatile_pool()->get_resolver().end_; } global_volatile_page_resolver_ = GlobalVolatilePageResolver( bases, numa_nodes, pool_begin, pool_end); // Initialize local memory. if (!engine_->is_master()) { soc::SocId node = engine_->get_soc_id(); local_memory_ = new NumaNodeMemory(engine_, node); CHECK_ERROR(local_memory_->initialize()); LOG(INFO) << "Node memory-" << node << " was initialized!"; } else { if (options.memory_.rigorous_memory_boundary_check_) { LOG(WARNING) << "CAUTION: memory_.rigorous_memory_boundary_check_ is ON. We will" << " put mprotect-ed pages between memory regions for debugging. It will be SLOW!"; } if (options.memory_.rigorous_page_boundary_check_) { LOG(WARNING) << "CAUTION: memory_.rigorous_page_boundary_check_ is ON. We will" << " put mprotect-ed pages between every single page for debugging. It will be SLOOOW!"; } } return kRetOk; }
ErrorStack NumaNodeMemory::allocate_numa_memory_general( uint64_t size, uint64_t alignment, AlignedMemory *out) const { ASSERT_ND(out); if (engine_->get_options().memory_.use_mmap_hugepages_ && alignment >= kHugepageSize && size >= (1ULL << 30) * 8 / 10) { LOG(INFO) << "This is a big memory allocation. Let's use the mmap hugepage (1GB pages)"; out->alloc(size, 1ULL << 30, AlignedMemory::kNumaMmapOneGbPages, numa_node_); } else { out->alloc(size, alignment, AlignedMemory::kNumaAllocOnnode, numa_node_); } if (out->is_null()) { return ERROR_STACK(kErrorCodeOutofmemory); } return kRetOk; }
ErrorStack query_task(const proc::ProcArguments& args) { thread::Thread* context = args.context_; HashStorage hash = context->get_engine()->get_storage_manager()->get_hash("test2"); char buf[16]; xct::XctManager* xct_manager = context->get_engine()->get_xct_manager(); CHECK_ERROR(xct_manager->begin_xct(context, xct::kSerializable)); char key[100]; std::memset(key, 0, 100); uint16_t payload_capacity = 16; ErrorCode result = hash.get_record(context, key, 100, buf, &payload_capacity); if (result == kErrorCodeStrKeyNotFound) { std::cout << "Key not found!" << std::endl; } else if (result != kErrorCodeOk) { return ERROR_STACK(result); } Epoch commit_epoch; CHECK_ERROR(xct_manager->precommit_xct(context, &commit_epoch)); CHECK_ERROR(xct_manager->wait_for_commit(commit_epoch)); return foedus::kRetOk; }
ErrorStack EngineMemory::uninitialize_once() { LOG(INFO) << "Uninitializing EngineMemory.."; ErrorStackBatch batch; if (!engine_->get_debug()->is_initialized()) { batch.emprace_back(ERROR_STACK(kErrorCodeDepedentModuleUnavailableUninit)); } for (auto* ref : node_memories_) { delete ref; } node_memories_.clear(); // Uninitialize local memory. if (!engine_->is_master() && local_memory_) { soc::SocId node = engine_->get_soc_id(); batch.emprace_back(local_memory_->uninitialize()); delete local_memory_; local_memory_ = nullptr; LOG(INFO) << "Node memory-" << node << " was uninitialized!"; } return SUMMARIZE_ERROR_BATCH(batch); }
ErrorStack EnginePimpl::initialize_once() { if (is_master()) { CHECK_ERROR(check_valid_options()); } // SOC manager is special. We must initialize it first. CHECK_ERROR(soc_manager_.initialize()); on_module_initialized(kSoc); ErrorStack module_initialize_error = initialize_modules(); if (module_initialize_error.is_error()) { LOG(ERROR) << "*******************************************************************************"; LOG(ERROR) << "*** ERROR while module initailization in " << describe_short() << ". " << module_initialize_error << ""; LOG(ERROR) << "*******************************************************************************"; soc_manager_.report_engine_fatal_error(); CHECK_ERROR(module_initialize_error); } // The following can assume SOC manager is already initialized if (is_master()) { soc::SharedMemoryRepo* repo = soc_manager_.get_shared_memory_repo(); repo->change_master_status(soc::MasterEngineStatus::kRunning); // wait for children's kRunning status // TASK(Hideaki) should be a function in soc manager uint16_t soc_count = engine_->get_options().thread_.group_count_; while (true) { std::this_thread::sleep_for(std::chrono::milliseconds(5)); assorted::memory_fence_acq_rel(); bool error_happened = false; bool remaining = false; for (uint16_t node = 0; node < soc_count; ++node) { soc::ChildEngineStatus* status = repo->get_node_memory_anchors(node)->child_status_memory_; if (status->status_code_ == soc::ChildEngineStatus::kFatalError) { error_happened = true; break; } if (status->status_code_ == soc::ChildEngineStatus::kRunning) { continue; // ok } remaining = true; } if (error_happened) { LOG(ERROR) << "[FOEDUS] ERROR! error while waiting child kRunning"; soc_manager_.report_engine_fatal_error(); return ERROR_STACK(kErrorCodeSocChildInitFailed); } else if (!remaining) { break; } } } LOG(INFO) << "================================================================================"; LOG(INFO) << "================== FOEDUS ENGINE (" << describe_short() << ") INITIALIZATION DONE ==========="; LOG(INFO) << "================================================================================"; // In a few places, we check if we are running under valgrind and, if so, turn off // optimizations valgrind can't handle (eg hugepages). bool running_on_valgrind = RUNNING_ON_VALGRIND; if (running_on_valgrind) { LOG(INFO) << "=============== ATTENTION: VALGRIND MODE! =================="; LOG(INFO) << "This Engine is running under valgrind, which disables several optimizations"; LOG(INFO) << "If you see this message while usual execution, something is wrong."; LOG(INFO) << "=============== ATTENTION: VALGRIND MODE! =================="; } return kRetOk; }
ErrorStack MasstreeStoragePimpl::fatify_first_root_double(thread::Thread* context) { MasstreeIntermediatePage* root; WRAP_ERROR_CODE(get_first_root(context, true, &root)); ASSERT_ND(root->is_locked()); ASSERT_ND(!root->is_moved()); // assure that all children have volatile version for (MasstreeIntermediatePointerIterator it(root); it.is_valid(); it.next()) { if (it.get_pointer().volatile_pointer_.is_null()) { MasstreePage* child; WRAP_ERROR_CODE(follow_page( context, true, const_cast<DualPagePointer*>(&it.get_pointer()), &child)); } ASSERT_ND(!it.get_pointer().volatile_pointer_.is_null()); } std::vector<Child> original_children = list_children(root); ASSERT_ND(original_children.size() * 2U <= kMaxIntermediatePointers); std::vector<Child> new_children; for (const Child& child : original_children) { CHECK_ERROR(split_a_child(context, root, child, &new_children)); } ASSERT_ND(new_children.size() >= original_children.size()); memory::NumaCoreMemory* memory = context->get_thread_memory(); memory::PagePoolOffset new_offset = memory->grab_free_volatile_page(); if (new_offset == 0) { return ERROR_STACK(kErrorCodeMemoryNoFreePages); } // from now on no failure (we grabbed a free page). VolatilePagePointer new_pointer = combine_volatile_page_pointer( context->get_numa_node(), kVolatilePointerFlagSwappable, // pointer to root page might be swapped! get_first_root_pointer().volatile_pointer_.components.mod_count + 1, new_offset); MasstreeIntermediatePage* new_root = context->resolve_newpage_cast<MasstreeIntermediatePage>(new_pointer); new_root->initialize_volatile_page( get_id(), new_pointer, 0, root->get_btree_level(), // same as current root. this is not grow_root kInfimumSlice, kSupremumSlice); // no concurrent access to the new page, but just for the sake of assertion in the func. PageVersionLockScope new_scope(context, new_root->get_version_address()); new_root->split_foster_migrate_records_new_first_root(&new_children); ASSERT_ND(count_children(new_root) == new_children.size()); verify_new_root(context, new_root, new_children); // set the new first-root pointer. assorted::memory_fence_release(); get_first_root_pointer().volatile_pointer_.word = new_pointer.word; // first-root snapshot pointer is unchanged. // old root page and the direct children are now retired assorted::memory_fence_acq_rel(); root->set_moved(); // not quite moved, but assertions assume that. root->set_retired(); context->collect_retired_volatile_page( construct_volatile_page_pointer(root->header().page_id_)); for (const Child& child : original_children) { MasstreePage* original_page = context->resolve_cast<MasstreePage>(child.pointer_); if (original_page->is_moved()) { PageVersionLockScope scope(context, original_page->get_version_address()); original_page->set_retired(); context->collect_retired_volatile_page(child.pointer_); } else { // This means, the page had too small records to split. We must keep it. } } assorted::memory_fence_acq_rel(); LOG(INFO) << "Split done. " << original_children.size() << " -> " << new_children.size(); return kRetOk; }