ErrorStack ImpersonateSession::get_result() {
  wait();
  if (is_valid()) {
    ThreadControlBlock* block = thread_->get_control_block();
    if (block->current_ticket_ != ticket_ || block->status_ != kWaitingForClientRelease) {
      return ERROR_STACK(kErrorCodeSessionExpired);
    }
    return block->proc_result_.to_error_stack();
  } else {
    return ERROR_STACK(kErrorCodeSessionExpired);
  }
}
ErrorStack SharedMemoryRepo::attach_shared_memories(
  uint64_t master_upid,
  Eid master_eid,
  SocId my_soc_id,
  EngineOptions* options) {
  deallocate_shared_memories();

  std::string base = get_master_path(master_upid, master_eid);
  std::string global_memory_path = base + std::string("_global");
  global_memory_.attach(global_memory_path);
  if (global_memory_.is_null()) {
    deallocate_shared_memories();
    return ERROR_STACK(kErrorCodeSocShmAttachFailed);
  }

  // read the options from global_memory
  uint64_t xml_size = 0;
  std::memcpy(&xml_size, global_memory_.get_block(), sizeof(xml_size));
  ASSERT_ND(xml_size > 0);
  std::string xml(global_memory_.get_block() + sizeof(xml_size), xml_size);
  CHECK_ERROR(options->load_from_string(xml));

  my_soc_id_ = my_soc_id;
  init_empty(*options);
  set_global_memory_anchors(xml_size, *options, false);

  bool failed = false;
  for (uint16_t node = 0; node < soc_count_; ++node) {
    std::string node_memory_str = base + std::string("_node_") + std::to_string(node);
    node_memories_[node].attach(node_memory_str);
    std::string vpool_str = base + std::string("_vpool_") + std::to_string(node);
    volatile_pools_[node].attach(vpool_str);
    if (node_memories_[node].is_null() || volatile_pools_[node].is_null()) {
      failed = true;
    } else {
      set_node_memory_anchors(node, *options, false);
    }
  }

  if (failed) {
    if (!node_memories_[my_soc_id].is_null()) {
      // then we can at least notify the error via the shared memory
      change_child_status(my_soc_id, ChildEngineStatus::kFatalError);
    }
    deallocate_shared_memories();
    return ERROR_STACK(kErrorCodeSocShmAttachFailed);
  }
  return kRetOk;
}
Example #3
0
ErrorStack EnginePimpl::check_minimal_pool_size() const {
  // Can we at least start up?
  const thread::ThreadOptions& t = options_.thread_;
  const memory::MemoryOptions& m = options_.memory_;
  uint64_t total_threads = t.group_count_ * t.thread_count_per_group_;
  uint64_t minimal_page_pool
    = total_threads * m.private_page_pool_initial_grab_ * storage::kPageSize;
  if ((static_cast<uint64_t>(m.page_pool_size_mb_per_node_)
      * t.group_count_ << 20) < minimal_page_pool) {
    return ERROR_STACK(kErrorCodeMemoryPagePoolTooSmall);
  }
  return kRetOk;
}
ErrorStack SavepointManagerPimpl::initialize_once() {
  control_block_ = engine_->get_soc_manager()->get_shared_memory_repo()->
    get_global_memory_anchors()->savepoint_manager_memory_;
  if (engine_->is_master()) {
    // Savepoint takes place only in master
    control_block_->initialize();
    savepoint_ = Savepoint();
    savepoint_path_ = fs::Path(engine_->get_options().savepoint_.savepoint_path_.str());
    LOG(INFO) << "Initializing SavepointManager.. path=" << savepoint_path_;
    auto logger_count = engine_->get_options().log_.loggers_per_node_
      * engine_->get_options().thread_.group_count_;
    if (fs::exists(savepoint_path_)) {
      LOG(INFO) << "Existing savepoint file found. Loading..";
      CHECK_ERROR(savepoint_.load_from_file(savepoint_path_));
      if (!savepoint_.consistent(logger_count)) {
        return ERROR_STACK(kErrorCodeSpInconsistentSavepoint);
      }
    } else {
      LOG(INFO) << "Savepoint file does not exist. No savepoint taken so far.";
      // Create an empty savepoint file now. This makes sure the directory entry for the file
      // exists.
      savepoint_.populate_empty(logger_count);
      CHECK_ERROR(savepoint_.save_to_file(savepoint_path_));
    }
    update_shared_savepoint(savepoint_);
    control_block_->initial_current_epoch_ = savepoint_.current_epoch_;
    control_block_->initial_durable_epoch_ = savepoint_.durable_epoch_;
    control_block_->saved_durable_epoch_ = savepoint_.durable_epoch_;
    control_block_->requested_durable_epoch_ = savepoint_.durable_epoch_;
    savepoint_thread_stop_requested_ = false;
    assorted::memory_fence_release();
    savepoint_thread_ = std::move(std::thread(&SavepointManagerPimpl::savepoint_main, this));
    control_block_->master_initialized_ = true;
  } else {
    // other engines wait for the master engine until it finishes the initialization of
    // relevant fields. Some of the following modules depend on these values.
    uint32_t sleep_cont = 0;
    while (control_block_->master_initialized_ == false) {
      std::this_thread::sleep_for(std::chrono::milliseconds(10));
      if (++sleep_cont > 1000ULL) {
        return ERROR_STACK_MSG(kErrorCodeTimeout, "Master engine couldn't load savepoint??");
      }
    }
    LOG(INFO) << "Okay, master-engine has finished loading initial savepoint.";
  }
  return kRetOk;
}
Example #5
0
ErrorStack EngineMemory::initialize_once() {
  LOG(INFO) << "Initializing EngineMemory..";
  if (!engine_->get_debug()->is_initialized()) {
    return ERROR_STACK(kErrorCodeDepedentModuleUnavailableInit);
  } else if (::numa_available() < 0) {
    LOG(WARNING) << "WARNING, this machine is not a NUMA machine. FOEDUS still works fine,"
      << " but it is mainly designed for large servers with many sockets and cores";
    // Even if the kernel is built without NUMA (eg ARMv8), we keep running.
    // return ERROR_STACK(kErrorCodeMemoryNumaUnavailable);
  }

  ASSERT_ND(node_memories_.empty());
  const EngineOptions& options = engine_->get_options();
  const thread::ThreadGroupId numa_nodes = options.thread_.group_count_;
  GlobalVolatilePageResolver::Base bases[256];
  uint64_t pool_begin = 0, pool_end = 0;
  for (thread::ThreadGroupId node = 0; node < numa_nodes; ++node) {
    NumaNodeMemoryRef* ref = new NumaNodeMemoryRef(engine_, node);
    node_memories_.push_back(ref);
    bases[node] = ref->get_volatile_pool()->get_base();
    pool_begin = ref->get_volatile_pool()->get_resolver().begin_;
    pool_end = ref->get_volatile_pool()->get_resolver().end_;
  }
  global_volatile_page_resolver_ = GlobalVolatilePageResolver(
    bases,
    numa_nodes,
    pool_begin,
    pool_end);

  // Initialize local memory.
  if (!engine_->is_master()) {
    soc::SocId node = engine_->get_soc_id();
    local_memory_ = new NumaNodeMemory(engine_, node);
    CHECK_ERROR(local_memory_->initialize());
    LOG(INFO) << "Node memory-" << node << " was initialized!";
  } else {
    if (options.memory_.rigorous_memory_boundary_check_) {
      LOG(WARNING) << "CAUTION: memory_.rigorous_memory_boundary_check_ is ON. We will"
        << " put mprotect-ed pages between memory regions for debugging. It will be SLOW!";
    }
    if (options.memory_.rigorous_page_boundary_check_) {
      LOG(WARNING) << "CAUTION: memory_.rigorous_page_boundary_check_ is ON. We will"
        << " put mprotect-ed pages between every single page for debugging. It will be SLOOOW!";
    }
  }
  return kRetOk;
}
Example #6
0
ErrorStack NumaNodeMemory::allocate_numa_memory_general(
  uint64_t size,
  uint64_t alignment,
  AlignedMemory *out) const {
  ASSERT_ND(out);
  if (engine_->get_options().memory_.use_mmap_hugepages_ &&
    alignment >= kHugepageSize
    && size >= (1ULL << 30) * 8 / 10) {
    LOG(INFO) << "This is a big memory allocation. Let's use the mmap hugepage (1GB pages)";
    out->alloc(size, 1ULL << 30, AlignedMemory::kNumaMmapOneGbPages, numa_node_);
  } else {
    out->alloc(size, alignment, AlignedMemory::kNumaAllocOnnode, numa_node_);
  }
  if (out->is_null()) {
    return ERROR_STACK(kErrorCodeOutofmemory);
  }
  return kRetOk;
}
Example #7
0
ErrorStack query_task(const proc::ProcArguments& args) {
  thread::Thread* context = args.context_;
  HashStorage hash = context->get_engine()->get_storage_manager()->get_hash("test2");
  char buf[16];
  xct::XctManager* xct_manager = context->get_engine()->get_xct_manager();
  CHECK_ERROR(xct_manager->begin_xct(context, xct::kSerializable));
  char key[100];
  std::memset(key, 0, 100);
  uint16_t payload_capacity = 16;
  ErrorCode result = hash.get_record(context, key, 100, buf, &payload_capacity);
  if (result == kErrorCodeStrKeyNotFound) {
    std::cout << "Key not found!" << std::endl;
  } else if (result != kErrorCodeOk) {
    return ERROR_STACK(result);
  }
  Epoch commit_epoch;
  CHECK_ERROR(xct_manager->precommit_xct(context, &commit_epoch));
  CHECK_ERROR(xct_manager->wait_for_commit(commit_epoch));
  return foedus::kRetOk;
}
Example #8
0
ErrorStack EngineMemory::uninitialize_once() {
  LOG(INFO) << "Uninitializing EngineMemory..";
  ErrorStackBatch batch;
  if (!engine_->get_debug()->is_initialized()) {
    batch.emprace_back(ERROR_STACK(kErrorCodeDepedentModuleUnavailableUninit));
  }
  for (auto* ref : node_memories_) {
    delete ref;
  }
  node_memories_.clear();

  // Uninitialize local memory.
  if (!engine_->is_master() && local_memory_) {
    soc::SocId node = engine_->get_soc_id();
    batch.emprace_back(local_memory_->uninitialize());
    delete local_memory_;
    local_memory_ = nullptr;
    LOG(INFO) << "Node memory-" << node << " was uninitialized!";
  }
  return SUMMARIZE_ERROR_BATCH(batch);
}
Example #9
0
ErrorStack EnginePimpl::initialize_once() {
  if (is_master()) {
    CHECK_ERROR(check_valid_options());
  }
  // SOC manager is special. We must initialize it first.
  CHECK_ERROR(soc_manager_.initialize());
  on_module_initialized(kSoc);
  ErrorStack module_initialize_error = initialize_modules();
  if (module_initialize_error.is_error()) {
    LOG(ERROR) << "*******************************************************************************";
    LOG(ERROR) << "*** ERROR while module initailization in " << describe_short() << ". "
      << module_initialize_error << "";
    LOG(ERROR) << "*******************************************************************************";
    soc_manager_.report_engine_fatal_error();
    CHECK_ERROR(module_initialize_error);
  }

  // The following can assume SOC manager is already initialized
  if (is_master()) {
    soc::SharedMemoryRepo* repo = soc_manager_.get_shared_memory_repo();
    repo->change_master_status(soc::MasterEngineStatus::kRunning);
    // wait for children's kRunning status
    // TASK(Hideaki) should be a function in soc manager
    uint16_t soc_count = engine_->get_options().thread_.group_count_;
    while (true) {
      std::this_thread::sleep_for(std::chrono::milliseconds(5));
      assorted::memory_fence_acq_rel();
      bool error_happened = false;
      bool remaining = false;
      for (uint16_t node = 0; node < soc_count; ++node) {
        soc::ChildEngineStatus* status = repo->get_node_memory_anchors(node)->child_status_memory_;
        if (status->status_code_ == soc::ChildEngineStatus::kFatalError) {
          error_happened = true;
          break;
        }
        if (status->status_code_ == soc::ChildEngineStatus::kRunning) {
          continue;  // ok
        }
        remaining = true;
      }

      if (error_happened) {
        LOG(ERROR) << "[FOEDUS] ERROR! error while waiting child kRunning";
        soc_manager_.report_engine_fatal_error();
        return ERROR_STACK(kErrorCodeSocChildInitFailed);
      } else if (!remaining) {
        break;
      }
    }
  }
  LOG(INFO) << "================================================================================";
  LOG(INFO) << "================== FOEDUS ENGINE ("
    << describe_short() << ") INITIALIZATION DONE ===========";
  LOG(INFO) << "================================================================================";

  // In a few places, we check if we are running under valgrind and, if so, turn off
  // optimizations valgrind can't handle (eg hugepages).
  bool running_on_valgrind = RUNNING_ON_VALGRIND;
  if (running_on_valgrind) {
    LOG(INFO) << "=============== ATTENTION: VALGRIND MODE! ==================";
    LOG(INFO) << "This Engine is running under valgrind, which disables several optimizations";
    LOG(INFO) << "If you see this message while usual execution, something is wrong.";
    LOG(INFO) << "=============== ATTENTION: VALGRIND MODE! ==================";
  }
  return kRetOk;
}
ErrorStack MasstreeStoragePimpl::fatify_first_root_double(thread::Thread* context) {
  MasstreeIntermediatePage* root;
  WRAP_ERROR_CODE(get_first_root(context, true, &root));
  ASSERT_ND(root->is_locked());
  ASSERT_ND(!root->is_moved());

  // assure that all children have volatile version
  for (MasstreeIntermediatePointerIterator it(root); it.is_valid(); it.next()) {
    if (it.get_pointer().volatile_pointer_.is_null()) {
      MasstreePage* child;
      WRAP_ERROR_CODE(follow_page(
        context,
        true,
        const_cast<DualPagePointer*>(&it.get_pointer()),
        &child));
    }
    ASSERT_ND(!it.get_pointer().volatile_pointer_.is_null());
  }

  std::vector<Child> original_children = list_children(root);
  ASSERT_ND(original_children.size() * 2U <= kMaxIntermediatePointers);
  std::vector<Child> new_children;
  for (const Child& child : original_children) {
    CHECK_ERROR(split_a_child(context, root, child, &new_children));
  }
  ASSERT_ND(new_children.size() >= original_children.size());

  memory::NumaCoreMemory* memory = context->get_thread_memory();
  memory::PagePoolOffset new_offset = memory->grab_free_volatile_page();
  if (new_offset == 0) {
    return ERROR_STACK(kErrorCodeMemoryNoFreePages);
  }
  // from now on no failure (we grabbed a free page).

  VolatilePagePointer new_pointer = combine_volatile_page_pointer(
    context->get_numa_node(),
    kVolatilePointerFlagSwappable,  // pointer to root page might be swapped!
    get_first_root_pointer().volatile_pointer_.components.mod_count + 1,
    new_offset);
  MasstreeIntermediatePage* new_root
    = context->resolve_newpage_cast<MasstreeIntermediatePage>(new_pointer);
  new_root->initialize_volatile_page(
    get_id(),
    new_pointer,
    0,
    root->get_btree_level(),  // same as current root. this is not grow_root
    kInfimumSlice,
    kSupremumSlice);
  // no concurrent access to the new page, but just for the sake of assertion in the func.
  PageVersionLockScope new_scope(context, new_root->get_version_address());
  new_root->split_foster_migrate_records_new_first_root(&new_children);
  ASSERT_ND(count_children(new_root) == new_children.size());
  verify_new_root(context, new_root, new_children);

  // set the new first-root pointer.
  assorted::memory_fence_release();
  get_first_root_pointer().volatile_pointer_.word = new_pointer.word;
  // first-root snapshot pointer is unchanged.

  // old root page and the direct children are now retired
  assorted::memory_fence_acq_rel();
  root->set_moved();  // not quite moved, but assertions assume that.
  root->set_retired();
  context->collect_retired_volatile_page(
    construct_volatile_page_pointer(root->header().page_id_));
  for (const Child& child : original_children) {
    MasstreePage* original_page = context->resolve_cast<MasstreePage>(child.pointer_);
    if (original_page->is_moved()) {
      PageVersionLockScope scope(context, original_page->get_version_address());
      original_page->set_retired();
      context->collect_retired_volatile_page(child.pointer_);
    } else {
      // This means, the page had too small records to split. We must keep it.
    }
  }
  assorted::memory_fence_acq_rel();

  LOG(INFO) << "Split done. " << original_children.size() << " -> " << new_children.size();

  return kRetOk;
}