예제 #1
0
void IBConnection::accept_connect_request()
{
    L_(debug) << "accepting connection";

    // Accept rdma connection request
    auto private_data = get_private_data();
    assert(private_data->size() <= 255);

    struct rdma_conn_param conn_param = rdma_conn_param();
    conn_param.responder_resources = 1;
    conn_param.private_data = private_data->data();
    conn_param.private_data_len = static_cast<uint8_t>(private_data->size());
    int err = rdma_accept(cm_id_, &conn_param);
    if (err)
        throw InfinibandException("RDMA accept failed");
}
예제 #2
0
void Connection::on_connect_request(struct fi_eq_cm_entry* event,
                                    struct fid_domain* pd,
                                    struct fid_cq* cq) {
  int err = fi_endpoint(pd, event->info, &ep_, this);
  if (err) {
    L_(fatal) << "fi_endpoint failed: " << err << "=" << fi_strerror(-err);
    throw LibfabricException("fi_endpoint failed");
  }

#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wold-style-cast"
  err = fi_ep_bind(ep_, (::fid_t)eq_, 0);
  if (err) {
    L_(fatal) << "fi_ep_bind failed to eq: " << err << "=" << fi_strerror(-err);
    throw LibfabricException("fi_ep_bind failed to eq");
  }
  err = fi_ep_bind(ep_, (fid_t)cq, FI_SEND | FI_RECV | FI_SELECTIVE_COMPLETION);
  if (err) {
    L_(fatal) << "fi_ep_bind failed to cq: " << err << "=" << fi_strerror(-err);
    throw LibfabricException("fi_ep_bind failed to cq");
  }
#pragma GCC diagnostic pop

  // setup(pd);
  setup_mr(pd);

  auto private_data = get_private_data();
  assert(private_data->size() <= 255);

  err = fi_enable(ep_);
  if (err) {
    L_(fatal) << "fi_enable failed: " << err << "=" << fi_strerror(-err);
    throw LibfabricException("fi_enable failed");
  }
  // accept_connect_request();
  err = fi_accept(ep_, private_data->data(), private_data->size());
  if (err) {
    L_(fatal) << "fi_accept failed: " << err << "=" << fi_strerror(-err);
    throw LibfabricException("fi_accept failed");
  }

  // setup(pd);
  setup();
}
예제 #3
0
void IBConnection::on_route_resolved()
{
    L_(debug) << "route resolved";

    // Initiate rdma connection
    auto private_data = get_private_data();
    assert(private_data->size() <= 255);

    struct rdma_conn_param conn_param = rdma_conn_param();
    conn_param.initiator_depth = 1;
    conn_param.retry_count = 7;
    conn_param.private_data = private_data->data();
    conn_param.private_data_len = static_cast<uint8_t>(private_data->size());
    // TODO: Hack to prevent connection issues when using softiwarp.
    std::this_thread::sleep_for(std::chrono::milliseconds(500));
    int err = rdma_connect(cm_id_, &conn_param);
    if (err) {
        L_(fatal) << "rdma_connect failed: " << strerror(errno);
        throw InfinibandException("rdma_connect failed");
    }
}
예제 #4
0
void Connection::connect(const std::string& hostname,
                         const std::string& service,
                         struct fid_domain* domain,
                         struct fid_cq* cq,
                         struct fid_av* av) {
  auto private_data = get_private_data();
  assert(private_data->size() <= 255);

  L_(debug) << "connect: " << hostname << ":" << service;
  struct fi_info* info2 = nullptr;
  struct fi_info* hints = fi_dupinfo(Provider::getInst()->get_info());

  hints->rx_attr->size = max_recv_wr_;
  hints->rx_attr->iov_limit = max_recv_sge_;
  // TODO this attribute causes a problem while running flesnet
  // hints->tx_attr->size = max_send_wr_;
  hints->tx_attr->iov_limit = max_send_sge_;
  hints->tx_attr->inject_size = max_inline_data_;

  hints->src_addr = nullptr;
  hints->src_addrlen = 0;

  int err =
      fi_getinfo(FI_VERSION(1, 1), hostname == "" ? nullptr : hostname.c_str(),
                 service == "" ? nullptr : service.c_str(), 0, hints, &info2);
  if (err) {
    L_(fatal) << "fi_getinfo failed in make_endpoint: " << hostname << " "
              << service << "[" << err << "=" << fi_strerror(-err) << "]";
    throw LibfabricException("fi_getinfo failed in make_endpoint");
  }

  fi_freeinfo(hints);

  err = fi_endpoint(domain, info2, &ep_, this);
  if (err) {
    L_(fatal) << "fi_endpoint failed: " << err << "=" << fi_strerror(-err);
    throw LibfabricException("fi_endpoint failed");
  }

#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wold-style-cast"
  if (Provider::getInst()->has_eq_at_eps()) {
    err = fi_ep_bind(ep_, (::fid_t)eq_, 0);
    if (err) {
      L_(fatal) << "fi_ep_bind failed: " << err << "=" << fi_strerror(-err);
      throw LibfabricException("fi_ep_bind failed");
    }
  }
  err =
      fi_ep_bind(ep_, (::fid_t)cq, FI_SEND | FI_RECV | FI_SELECTIVE_COMPLETION);
  if (err) {
    L_(fatal) << "fi_ep_bind failed (cq): " << err << "=" << fi_strerror(-err);
    throw LibfabricException("fi_ep_bind failed (cq)");
  }
  if (Provider::getInst()->has_av()) {
    err = fi_ep_bind(ep_, (::fid_t)av, 0);
    if (err) {
      L_(fatal) << "fi_ep_bind failed (av): " << err << "="
                << fi_strerror(-err);
      throw LibfabricException("fi_ep_bind failed (av)");
    }
  }
#pragma GCC diagnostic pop
  err = fi_enable(ep_);
  if (err) {
    L_(fatal) << "fi_enable failed: " << err << "=" << fi_strerror(-err);
    throw LibfabricException("fi_enable failed");
  }

  setup_mr(domain);
  Provider::getInst()->connect(ep_, max_send_wr_, max_send_sge_, max_recv_wr_,
                               max_recv_sge_, max_inline_data_,
                               private_data->data(), private_data->size(),
                               info2->dest_addr);
  setup();
}