Beispiel #1
0
void FreeBuff(char *buff1, char *buff2)
{
  VAPI_ret_t ret;

  if(s_mr_hndl != VAPI_INVAL_HNDL) {
    LOGPRINTF("Deregistering send buffer\n");
    ret = VAPI_deregister_mr(hca_hndl, s_mr_hndl);
    if(ret != VAPI_OK) {
      fprintf(stderr, "Error deregistering send mr: %s\n", VAPI_strerror(ret));
    } else {
      s_mr_hndl = VAPI_INVAL_HNDL;
    }
  }

  if(r_mr_hndl != VAPI_INVAL_HNDL) {
    LOGPRINTF("Deregistering recv buffer\n");
    ret = VAPI_deregister_mr(hca_hndl, r_mr_hndl);
    if(ret != VAPI_OK) {
      fprintf(stderr, "Error deregistering recv mr: %s\n", VAPI_strerror(ret));
    } else {
      r_mr_hndl = VAPI_INVAL_HNDL;
    }
  }

  if(buff1 != NULL)
    free(buff1);

  if(buff2 != NULL)
    free(buff2);
}
Beispiel #2
0
void SendData(ArgStruct *p)
{
  VAPI_ret_t          ret;       /* Return code */
  VAPI_sr_desc_t      sr;        /* Send request */
  VAPI_sg_lst_entry_t sg_entry;  /* Scatter/Gather list - holds buff addr */

  /* Fill in send request struct */

  if(p->prot.commtype == NP_COMM_SENDRECV) {
     sr.opcode = VAPI_SEND;
     LOGPRINTF("Doing regular send\n");
  } else if(p->prot.commtype == NP_COMM_SENDRECV_WITH_IMM) {
     sr.opcode = VAPI_SEND_WITH_IMM;
     LOGPRINTF("Doing regular send with imm\n");
  } else if(p->prot.commtype == NP_COMM_RDMAWRITE) {
     sr.opcode = VAPI_RDMA_WRITE;
     sr.remote_addr = (VAPI_virt_addr_t)(MT_virt_addr_t)(remote_address + (p->s_ptr - p->s_buff));
     sr.r_key = remote_key;
     LOGPRINTF("Doing RDMA write (raddr=%p)\n", sr.remote_addr);
  } else if(p->prot.commtype == NP_COMM_RDMAWRITE_WITH_IMM) {
     sr.opcode = VAPI_RDMA_WRITE_WITH_IMM;
     sr.remote_addr = (VAPI_virt_addr_t)(MT_virt_addr_t)(remote_address + (p->s_ptr - p->s_buff));
     sr.r_key = remote_key;
     LOGPRINTF("Doing RDMA write with imm (raddr=%p)\n", sr.remote_addr);
  } else {
     fprintf(stderr, "Error, invalid communication type in SendData\n");
     exit(-1);
  }
  
  sr.comp_type = VAPI_UNSIGNALED;
  sr.set_se = FALSE; /* This needed due to a bug in Mellanox HW rel a-0 */

  sr.sg_lst_len = 1;
  sr.sg_lst_p = &sg_entry;

  sg_entry.lkey = s_mr_out.l_key; /* Local memory region key */
  sg_entry.len = p->bufflen;
  sg_entry.addr = (VAPI_virt_addr_t)(MT_virt_addr_t)p->s_ptr;

  ret = VAPI_post_sr(hca_hndl, qp_hndl, &sr);
  if(ret != VAPI_OK) {
    fprintf(stderr, "Error posting send request: %s\n", VAPI_strerror(ret));
  } else {
    LOGPRINTF("Posted send request\n");
  }

}
Beispiel #3
0
void PrepareToReceive(ArgStruct *p)
{
  VAPI_ret_t          ret;       /* Return code */
  VAPI_rr_desc_t      rr;        /* Receive request */
  VAPI_sg_lst_entry_t sg_entry;  /* Scatter/Gather list - holds buff addr */

  /* We don't need to post a receive if doing RDMA write with local polling */

  if( p->prot.commtype == NP_COMM_RDMAWRITE &&
      p->prot.comptype == NP_COMP_LOCALPOLL )
     return;
  
  rr.opcode = VAPI_RECEIVE;

  /* We only need signaled completions if using VAPI
   * completion methods.
   */
  if( p->prot.comptype == NP_COMP_LOCALPOLL )
     rr.comp_type = VAPI_UNSIGNALED;
  else
     rr.comp_type = VAPI_SIGNALED;

  rr.sg_lst_len = 1;
  rr.sg_lst_p = &sg_entry;

  sg_entry.lkey = r_mr_out.l_key;
  sg_entry.len = p->bufflen;
  sg_entry.addr = (VAPI_virt_addr_t)(MT_virt_addr_t)p->r_ptr;

  ret = VAPI_post_rr(hca_hndl, qp_hndl, &rr);
  if(ret != VAPI_OK) {
    fprintf(stderr, "Error posting recv request: %s\n", VAPI_strerror(ret));
    CleanUp(p);
    exit(-1);
  } else {
    LOGPRINTF("Posted recv request\n");
  }

  /* Set receive flag to zero and request event completion 
   * notification for this receive so the event handler will 
   * be triggered when the receive completes.
   */
  if( p->prot.comptype == NP_COMP_EVENT ) {
    receive_complete = 0;
  }
}
Beispiel #4
0
static
int psib_poll(hca_info_t *hca_info, int blocking)
{
    VAPI_ret_t rc;

    do {
	rc = psib_check_cq(hca_info);
    } while (blocking && (rc != VAPI_CQ_EMPTY));

    if (psib_debug &&
	(rc != VAPI_CQ_EMPTY) &&
	(rc != VAPI_OK)) {
	printf("psib_poll: %s: %s\n", VAPI_strerror_sym(rc), VAPI_strerror(rc));
    }

    return (rc == VAPI_CQ_EMPTY);
}
Beispiel #5
0
static
void psib_err_rc(char *str, VAPI_ret_t rc)
{
    const char *vapi_sym = VAPI_strerror_sym(rc);
    const char *vapi_err = VAPI_strerror(rc);
    int len = strlen(str) + strlen(vapi_sym) + strlen(vapi_err) + 20;
    char *msg = malloc(len);

    assert(msg);

    strcpy(msg, str);
    strcat(msg, " : ");
    strcat(msg, vapi_sym);
    strcat(msg, " - ");
    strcat(msg, vapi_err);

    psib_err(msg);
    free(msg);
}
Beispiel #6
0
void event_handler(VAPI_hca_hndl_t hca, VAPI_cq_hndl_t cq, void* data)
{
  VAPI_ret_t    ret;
 
  while(1) {
     
     ret = VAPI_poll_cq(hca, cq, &wc);

     if(ret == VAPI_CQ_EMPTY) {
        LOGPRINTF("Empty completion queue, requesting next notification\n");
        VAPI_req_comp_notif(hca_hndl, r_cq_hndl, VAPI_NEXT_COMP);
        return;
     } else if(ret != VAPI_OK) {
        fprintf(stderr, "Error in event_handler, polling cq: %s\n",
                VAPI_strerror(ret));
        exit(-1);
     } else if(wc.status != VAPI_SUCCESS) {
        fprintf(stderr, "Error in event_handler, on returned work completion "
                        "status: %s\n", VAPI_wc_status_sym(wc.status));
        exit(-1);
     }
     
     LOGPRINTF("Retrieved work completion\n");

     /* For ping-pong mode at least, this check shouldn't be needed for
      * normal operation, but it will help catch any bugs with multiple
      * sends coming through when we're only expecting one.
      */
     if(receive_complete == 1) {

        while(receive_complete != 0) sched_yield();

     }

     receive_complete = 1;

  }
  
}
Beispiel #7
0
/* Reset is used after a trial to empty the work request queues so we
   have enough room for the next trial to run */
void Reset(ArgStruct *p)
{

  VAPI_ret_t          ret;       /* Return code */
  VAPI_sr_desc_t      sr;        /* Send request */
  VAPI_rr_desc_t      rr;        /* Recv request */

  /* If comptype is event, then we'll use event handler to detect receive,
   * so initialize receive_complete flag
   */
  if(p->prot.comptype == NP_COMP_EVENT) receive_complete = 0;

  /* Prepost receive */
  rr.opcode = VAPI_RECEIVE;
  rr.comp_type = VAPI_SIGNALED;
  rr.sg_lst_len = 0;

  LOGPRINTF("Posting recv request in Reset\n");
  ret = VAPI_post_rr(hca_hndl, qp_hndl, &rr);
  if(ret != VAPI_OK) {
    fprintf(stderr, "  Error posting recv request: %s\n", VAPI_strerror(ret));
    CleanUp(p);
    exit(-1);
  }

  /* Make sure both nodes have preposted receives */
  Sync(p);

  /* Post Send */
  sr.opcode = VAPI_SEND;
  sr.comp_type = VAPI_SIGNALED;
  sr.set_se = FALSE; /* This needed due to a bug in Mellanox HW rel a-0 */
  sr.sg_lst_len = 0;

  LOGPRINTF("Posting send request \n");
  ret = VAPI_post_sr(hca_hndl, qp_hndl, &sr);
  if(ret != VAPI_OK) {
    fprintf(stderr, "  Error posting send request in Reset: %s\n", 
            VAPI_strerror(ret));
    exit(-1);
  }
  if(wc.status != VAPI_SUCCESS) {
     fprintf(stderr, "  Error in completion status: %s\n",
             VAPI_wc_status_sym(wc.status));
     exit(-1);
  }

  LOGPRINTF("Polling for completion of send request\n");
  ret = VAPI_CQ_EMPTY;
  while(ret == VAPI_CQ_EMPTY)
    ret = VAPI_poll_cq(hca_hndl, s_cq_hndl, &wc);

  if(ret != VAPI_OK) {
    fprintf(stderr, "Error polling CQ for send in Reset: %s\n", 
            VAPI_strerror(ret));
    exit(-1);
  }
  if(wc.status != VAPI_SUCCESS) {
     fprintf(stderr, "  Error in completion status: %s\n",
             VAPI_wc_status_sym(wc.status));
     exit(-1);
  }          
  
  LOGPRINTF("Status of send completion: %s\n", VAPI_wc_status_sym(wc.status));

  if(p->prot.comptype == NP_COMP_EVENT) { 
     /* If using event completion, the event handler will set receive_complete
      * when it gets the completion event.
      */
     LOGPRINTF("Waiting for receive_complete flag\n");
     while(receive_complete == 0) { /* BUSY WAIT */ }
  } else {
     LOGPRINTF("Polling for completion of recv request\n");
     ret = VAPI_CQ_EMPTY;
     while(ret == VAPI_CQ_EMPTY)
       ret = VAPI_poll_cq(hca_hndl, r_cq_hndl, &wc);
     
     if(ret != VAPI_OK) {
       fprintf(stderr, "Error polling CQ for recv in Reset: %s\n", 
               VAPI_strerror(ret));
       exit(-1);
     }
     if(wc.status != VAPI_SUCCESS) {
        fprintf(stderr, "  Error in completion status: %s\n",
                VAPI_wc_status_sym(wc.status));
        exit(-1);
     }

     LOGPRINTF("Status of recv completion: %s\n", VAPI_wc_status_sym(wc.status));
  }
  LOGPRINTF("Done with reset\n");
}
Beispiel #8
0
void RecvData(ArgStruct *p)
{
  VAPI_ret_t ret;

  /* Busy wait for incoming data */

  LOGPRINTF("Receiving at buffer address %p\n", p->r_ptr);

  if( p->prot.comptype == NP_COMP_LOCALPOLL ) {
       
    /* Poll for receive completion locally on the receive data */

    LOGPRINTF("Waiting for last byte of data to arrive\n");
     
    while(p->r_ptr[p->bufflen-1] != 'a' + (p->cache ? 1 - p->tr : 1) ) 
    {
       /* BUSY WAIT -- this should be fine since we 
        * declared r_ptr with volatile qualifier */ 
    }

    /* Reset last byte */
    p->r_ptr[p->bufflen-1] = 'a' + (p->cache ? p->tr : 0);

    LOGPRINTF("Received all of data\n");

  } else if( p->prot.comptype == NP_COMP_VAPIPOLL ) {
     
     /* Poll for receive completion using VAPI poll function */

     LOGPRINTF("Polling completion queue for VAPI work completion\n");
     
     ret = VAPI_CQ_EMPTY;
     while(ret == VAPI_CQ_EMPTY)
        ret = VAPI_poll_cq(hca_hndl, r_cq_hndl, &wc);

     if(ret != VAPI_OK) {
        fprintf(stderr, "Error in RecvData, polling for completion: %s\n",
                VAPI_strerror(ret));
        exit(-1);
     }

     if(wc.status != VAPI_SUCCESS) {
        fprintf(stderr, "Error in status of returned completion: %s\n",
              VAPI_wc_status_sym(wc.status));
        exit(-1);
     }

     LOGPRINTF("Retrieved successful completion\n");
     
  } else if( p->prot.comptype == NP_COMP_EVENT ) {

     /* Instead of polling directly on data or VAPI completion queue,
      * let the VAPI event completion handler set a flag when the receive
      * completes, and poll on that instead. Could try using semaphore here
      * as well to eliminate busy polling
      */

     LOGPRINTF("Polling receive flag\n");
     
     while( receive_complete == 0 )
     {
        /* BUSY WAIT */
     }

     /* If in prepost-burst mode, we won't be calling PrepareToReceive
      * between ping-pongs, so we need to reset the receive_complete
      * flag here.
      */
     if( p->preburst ) receive_complete = 0;

     LOGPRINTF("Receive completed\n");
  }
}
Beispiel #9
0
int finalizeIB(ArgStruct *p)
{
  VAPI_ret_t ret;

  LOGPRINTF("Finalizing IB stuff\n");

  /* Clear completion event handler */

  if(p->prot.comptype == NP_COMP_EVENT ) {
     LOGPRINTF("Clearing comp handler\n");
     ret = EVAPI_clear_comp_eventh(hca_hndl, ceh_hndl);
     if(ret != VAPI_OK) {
        fprintf(stderr, "Error clearing event handler: %s\n",
                VAPI_strerror(ret));
     }
  }

  if(qp_hndl != VAPI_INVAL_HNDL) {
    LOGPRINTF("Destroying QP\n");
    ret = VAPI_destroy_qp(hca_hndl, qp_hndl);
    if(ret != VAPI_OK) {
      fprintf(stderr, "Error destroying Queue Pair: %s\n", VAPI_strerror(ret));
    }
  }

  if(r_cq_hndl != VAPI_INVAL_HNDL) {
    LOGPRINTF("Destroying Recv CQ\n");
    ret = VAPI_destroy_cq(hca_hndl, r_cq_hndl);
    if(ret != VAPI_OK) {
      fprintf(stderr, "Error destroying recv CQ: %s\n", VAPI_strerror(ret));
    }
  }

  if(s_cq_hndl != VAPI_INVAL_HNDL) {
    LOGPRINTF("Destroying Send CQ\n");
    ret = VAPI_destroy_cq(hca_hndl, s_cq_hndl);
    if(ret != VAPI_OK) {
      fprintf(stderr, "Error destroying send CQ: %s\n", VAPI_strerror(ret));
    }
  }

  /* Check memory registrations just in case user bailed out */
  if(s_mr_hndl != VAPI_INVAL_HNDL) {
    LOGPRINTF("Deregistering send buffer\n");
    ret = VAPI_deregister_mr(hca_hndl, s_mr_hndl);
    if(ret != VAPI_OK) {
      fprintf(stderr, "Error deregistering send mr: %s\n", VAPI_strerror(ret));
    }
  }

  if(r_mr_hndl != VAPI_INVAL_HNDL) {
    LOGPRINTF("Deregistering recv buffer\n");
    ret = VAPI_deregister_mr(hca_hndl, r_mr_hndl);
    if(ret != VAPI_OK) {
      fprintf(stderr, "Error deregistering recv mr: %s\n", VAPI_strerror(ret));
    }
  }

  if(pd_hndl != VAPI_INVAL_HNDL) {
    LOGPRINTF("Deallocating PD\n");
    ret = VAPI_dealloc_pd(hca_hndl, pd_hndl);
    if(ret != VAPI_OK) {
      fprintf(stderr, "Error deallocating PD: %s\n", VAPI_strerror(ret));
    }
  }

  /* Application code should not close HCA, just release handle */

  if(hca_hndl != VAPI_INVAL_HNDL) {
    LOGPRINTF("Releasing HCA\n");
    ret = EVAPI_release_hca_hndl(hca_hndl);
    if(ret != VAPI_OK) {
      fprintf(stderr, "Error releasing HCA: %s\n", VAPI_strerror(ret));
    }
  }

  return 0;
}
Beispiel #10
0
int initIB(ArgStruct *p)
{
  VAPI_ret_t          ret;

  /* Open HCA */

  /* open hca just in case it was not opened by system earlier */
  ret = VAPI_open_hca("InfiniHost0", &hca_hndl); 

  ret = EVAPI_get_hca_hndl("InfiniHost0", &hca_hndl);
  if(ret != VAPI_OK) {
    fprintf(stderr, "Error opening Infiniband HCA: %s\n", VAPI_strerror(ret));
    return -1;
  } else {
    LOGPRINTF("Opened Infiniband HCA\n");
  }

  /* Get HCA properties */

  port_num=1;
  ret = VAPI_query_hca_port_prop(hca_hndl, (IB_port_t)port_num, 
                                 (VAPI_hca_port_t *)&hca_port);
  if(ret != VAPI_OK) {
    fprintf(stderr, "Error querying Infiniband HCA: %s\n", VAPI_strerror(ret));
    return -1;
  } else {
    LOGPRINTF("Queried Infiniband HCA\n");
  }
  lid = hca_port.lid;
  LOGPRINTF("  lid = %d\n", lid);


  /* Allocate Protection Domain */

  ret = VAPI_alloc_pd(hca_hndl, &pd_hndl);
  if(ret != VAPI_OK) {
    fprintf(stderr, "Error allocating PD: %s\n", VAPI_strerror(ret));
    return -1;
  } else {
    LOGPRINTF("Allocated Protection Domain\n");
  }


  /* Create send completion queue */
  
  num_cqe = 30000; /* Requested number of completion q elements */
  ret = VAPI_create_cq(hca_hndl, num_cqe, &s_cq_hndl, &act_num_cqe);
  if(ret != VAPI_OK) {
    fprintf(stderr, "Error creating send CQ: %s\n", VAPI_strerror(ret));
    return -1;
  } else {
    LOGPRINTF("Created Send Completion Queue with %d elements\n", act_num_cqe);
  }


  /* Create recv completion queue */
  
  num_cqe = 20000; /* Requested number of completion q elements */
  ret = VAPI_create_cq(hca_hndl, num_cqe, &r_cq_hndl, &act_num_cqe);
  if(ret != VAPI_OK) {
    fprintf(stderr, "Error creating recv CQ: %s\n", VAPI_strerror(ret));
    return -1;
  } else {
    LOGPRINTF("Created Recv Completion Queue with %d elements\n", act_num_cqe);
  }


  /* Placeholder for MR */


  /* Create Queue Pair */

  qp_init_attr.cap.max_oust_wr_rq = max_wq; /* Max outstanding WR on RQ      */
  qp_init_attr.cap.max_oust_wr_sq = max_wq; /* Max outstanding WR on SQ      */
  qp_init_attr.cap.max_sg_size_rq = 1; /* Max scatter/gather entries on RQ */
  qp_init_attr.cap.max_sg_size_sq = 1; /* Max scatter/gather entries on SQ */
  qp_init_attr.pd_hndl            = pd_hndl; /* Protection domain handle   */
  qp_init_attr.rdd_hndl           = 0; /* Reliable datagram domain handle  */
  qp_init_attr.rq_cq_hndl         = r_cq_hndl; /* CQ handle for RQ         */
  qp_init_attr.rq_sig_type        = VAPI_SIGNAL_REQ_WR; /* Signalling type */
  qp_init_attr.sq_cq_hndl         = s_cq_hndl; /* CQ handle for RQ         */
  qp_init_attr.sq_sig_type        = VAPI_SIGNAL_REQ_WR; /* Signalling type */
  qp_init_attr.ts_type            = IB_TS_RC; /* Transmission type         */
  
  ret = VAPI_create_qp(hca_hndl, &qp_init_attr, &qp_hndl, &qp_prop);
  if(ret != VAPI_OK) {
    fprintf(stderr, "Error creating Queue Pair: %s\n", VAPI_strerror(ret));
    return -1;
  } else {
    LOGPRINTF("Created Queue Pair, max outstanding WR on RQ: %d, on SQ: %d\n",
              qp_prop.cap.max_oust_wr_rq, qp_prop.cap.max_oust_wr_sq);
  }


  /* Exchange lid and qp_num with other node */
  
  if( write(p->commfd, &lid, sizeof(lid) ) != sizeof(lid) ) {
    fprintf(stderr, "Failed to send lid over socket\n");
    return -1;
  }
  if( write(p->commfd, &qp_prop.qp_num, sizeof(qp_prop.qp_num) ) != sizeof(qp_prop.qp_num) ) {
    fprintf(stderr, "Failed to send qpnum over socket\n");
    return -1;
  }
  if( read(p->commfd, &d_lid, sizeof(d_lid) ) != sizeof(d_lid) ) {
    fprintf(stderr, "Failed to read lid from socket\n");
    return -1;
  }
  if( read(p->commfd, &d_qp_num, sizeof(d_qp_num) ) != sizeof(d_qp_num) ) {
    fprintf(stderr, "Failed to read qpnum from socket\n");
    return -1;
  }
  
  LOGPRINTF("Local: lid=%d qp_num=%d Remote: lid=%d qp_num=%d\n",
         lid, qp_prop.qp_num, d_lid, d_qp_num);


  /* Bring up Queue Pair */
  
  /******* INIT state ******/

  QP_ATTR_MASK_CLR_ALL(qp_attr_mask);

  qp_attr.qp_state = VAPI_INIT;
  QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_QP_STATE);

  qp_attr.pkey_ix = 0;
  QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_PKEY_IX);

  qp_attr.port = port_num;
  QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_PORT);

  qp_attr.remote_atomic_flags = VAPI_EN_REM_WRITE | VAPI_EN_REM_READ;
  QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_REMOTE_ATOMIC_FLAGS);

  ret = VAPI_modify_qp(hca_hndl, qp_hndl, &qp_attr, &qp_attr_mask, &qp_cap);
  if(ret != VAPI_OK) {
    fprintf(stderr, "Error modifying QP to INIT: %s\n", VAPI_strerror(ret));
    return -1;
  }

  LOGPRINTF("Modified QP to INIT\n");

  /******* RTR (Ready-To-Receive) state *******/

  QP_ATTR_MASK_CLR_ALL(qp_attr_mask);

  qp_attr.qp_state = VAPI_RTR;
  QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_QP_STATE);

  qp_attr.qp_ous_rd_atom = 1;
  QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_QP_OUS_RD_ATOM);

  qp_attr.dest_qp_num = d_qp_num;
  QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_DEST_QP_NUM);

  qp_attr.av.sl = 0;
  qp_attr.av.grh_flag = FALSE;
  qp_attr.av.dlid = d_lid;
  qp_attr.av.static_rate = 0;
  qp_attr.av.src_path_bits = 0;
  QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_AV);

  qp_attr.path_mtu = p->prot.ib_mtu;
  QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_PATH_MTU);

  qp_attr.rq_psn = 0;
  QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_RQ_PSN);

  qp_attr.pkey_ix = 0;
  QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_PKEY_IX);

  qp_attr.min_rnr_timer = 5;
  QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_MIN_RNR_TIMER);
  
  ret = VAPI_modify_qp(hca_hndl, qp_hndl, &qp_attr, &qp_attr_mask, &qp_cap);
  if(ret != VAPI_OK) {
    fprintf(stderr, "Error modifying QP to RTR: %s\n", VAPI_strerror(ret));
    return -1;
  }

  LOGPRINTF("Modified QP to RTR\n");

  /* Sync before going to RTS state */
  Sync(p);

  /******* RTS (Ready-to-Send) state *******/

  QP_ATTR_MASK_CLR_ALL(qp_attr_mask);

  qp_attr.qp_state = VAPI_RTS;
  QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_QP_STATE);

  qp_attr.sq_psn = 0;
  QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_SQ_PSN);

  qp_attr.timeout = 31;
  QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_TIMEOUT);

  qp_attr.retry_count = 1;
  QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_RETRY_COUNT);

  qp_attr.rnr_retry = 1;
  QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_RNR_RETRY);

  qp_attr.ous_dst_rd_atom = 1;
  QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_OUS_DST_RD_ATOM);

  ret = VAPI_modify_qp(hca_hndl, qp_hndl, &qp_attr, &qp_attr_mask, &qp_cap);
  if(ret != VAPI_OK) {
    fprintf(stderr, "Error modifying QP to RTS: %s\n", VAPI_strerror(ret));
    return -1;
  }
  
  LOGPRINTF("Modified QP to RTS\n");

  /* If using event completion, register event completion handler and request
   * the initial notification
   */
  if( p->prot.comptype == NP_COMP_EVENT ) {

    EVAPI_set_comp_eventh(hca_hndl, r_cq_hndl, event_handler, p, &ceh_hndl);
    VAPI_req_comp_notif(hca_hndl, r_cq_hndl, VAPI_NEXT_COMP);

  }
 
  return 0;
}
Beispiel #11
0
void MyMalloc(ArgStruct *p, int bufflen, int soffset, int roffset)
{
  VAPI_ret_t ret;

  /* Allocate buffers */

  p->r_buff = malloc(bufflen+MAX(soffset,roffset));
  if(p->r_buff == NULL) {
    fprintf(stderr, "Error malloc'ing buffer\n");
    exit(-1);
  }

  if(p->cache) {

    /* Infiniband spec says we can register same memory region
     * more than once, so just copy buffer address. We will register
     * the same buffer twice with Infiniband.
     */
    p->s_buff = p->r_buff;

  } else {

    p->s_buff = malloc(bufflen+soffset);
    if(p->s_buff == NULL) {
      fprintf(stderr, "Error malloc'ing buffer\n");
      exit(-1);
    }

  }

  /* Register buffers with Infiniband */

  mr_in.acl = VAPI_EN_LOCAL_WRITE | VAPI_EN_REMOTE_WRITE;
  mr_in.l_key = 0;
  mr_in.pd_hndl = pd_hndl;
  mr_in.r_key = 0;
  mr_in.size = bufflen+MAX(soffset,roffset);
  mr_in.start = (VAPI_virt_addr_t)(MT_virt_addr_t)p->r_buff;
  mr_in.type = VAPI_MR;

  ret = VAPI_register_mr(hca_hndl, &mr_in, &r_mr_hndl, &r_mr_out);
  if(ret != VAPI_OK)
        {
    fprintf(stderr, "Error registering recv buffer: %s\n", VAPI_strerror(ret));
    exit(-1);
        }
        else
        {
         LOGPRINTF("Registered Recv Buffer\n");
        }

  mr_in.acl = VAPI_EN_LOCAL_WRITE;
  mr_in.l_key = 0;
  mr_in.pd_hndl = pd_hndl;
  mr_in.r_key = 0;
  mr_in.size = bufflen+soffset;
  mr_in.start = (VAPI_virt_addr_t)(MT_virt_addr_t)p->s_buff;
  mr_in.type = VAPI_MR;

  ret = VAPI_register_mr(hca_hndl, &mr_in, &s_mr_hndl, &s_mr_out);
  if(ret != VAPI_OK) {
    fprintf(stderr, "Error registering send buffer: %s\n", VAPI_strerror(ret));
    exit(-1);
  } else {
    LOGPRINTF("Registered Send Buffer\n");
  }

}
Beispiel #12
0
/* 
 * The queue pair has been created and we have received the remote 
 *  queue pair information from the peer so we init this queue pair 
 *  and are ready to roll. 
 */ 
int mca_btl_mvapi_endpoint_qp_init_query(

                                      mca_btl_mvapi_module_t* mvapi_btl, 
                                      VAPI_hca_hndl_t nic, 
                                      VAPI_qp_hndl_t qp_hndl, 
                                      VAPI_qp_num_t remote_qp_num, 
                                      IB_lid_t remote_lid, 
                                      IB_port_t port_id
                                      )
     
     
{
    
    VAPI_ret_t              ret;
    VAPI_qp_attr_t          qp_attr;

    VAPI_qp_attr_mask_t     qp_attr_mask;
    VAPI_qp_init_attr_t     qp_init_attr; 
    VAPI_qp_cap_t           qp_cap;

    /* Modifying  QP to INIT */
    QP_ATTR_MASK_CLR_ALL(qp_attr_mask);
    qp_attr.qp_state = VAPI_INIT;
    QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_QP_STATE);
    qp_attr.pkey_ix = mca_btl_mvapi_component.ib_pkey_ix;
    QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_PKEY_IX);
    qp_attr.port = port_id; 
    QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_PORT);
    qp_attr.remote_atomic_flags = VAPI_EN_REM_WRITE | VAPI_EN_REM_READ;
    QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_REMOTE_ATOMIC_FLAGS);

    ret = VAPI_modify_qp(nic, qp_hndl,
            &qp_attr, &qp_attr_mask, &qp_cap);

    if(VAPI_OK != ret) {
        BTL_ERROR(("Error modifying the queue pair: %s", VAPI_strerror(ret)));
        return OMPI_ERROR;
    }

    BTL_VERBOSE(("Modified to init..Qp %d", qp_hndl));

    /**********************  INIT --> RTR  ************************/
    QP_ATTR_MASK_CLR_ALL(qp_attr_mask);
    qp_attr.qp_state = VAPI_RTR;
    QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_QP_STATE);
    qp_attr.qp_ous_rd_atom = mca_btl_mvapi_component.ib_qp_ous_rd_atom;
    QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_QP_OUS_RD_ATOM);
    qp_attr.path_mtu = mca_btl_mvapi_component.ib_mtu;
    QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_PATH_MTU);
    qp_attr.rq_psn = mca_btl_mvapi_component.ib_psn;
    QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_RQ_PSN);
    qp_attr.pkey_ix = mca_btl_mvapi_component.ib_pkey_ix;
    QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_PKEY_IX);
    qp_attr.min_rnr_timer = mca_btl_mvapi_component.ib_min_rnr_timer;
    QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_MIN_RNR_TIMER);

    qp_attr.av.sl = mca_btl_mvapi_component.ib_service_level;
    qp_attr.av.grh_flag = FALSE;
    qp_attr.av.static_rate = mca_btl_mvapi_component.ib_static_rate;
    qp_attr.av.src_path_bits = mca_btl_mvapi_component.ib_src_path_bits;

    qp_attr.dest_qp_num = remote_qp_num;
    QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_DEST_QP_NUM);
    qp_attr.av.dlid = remote_lid;
    QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_AV);

    ret = VAPI_modify_qp(nic, qp_hndl,
            &qp_attr, &qp_attr_mask, &qp_cap);

    if(VAPI_OK != ret) {
        BTL_ERROR(("Error modifying the queue pair: %s", VAPI_strerror(ret)));
        return OMPI_ERROR;
    }
    
    BTL_VERBOSE(("Modified to RTR..Qp %d", qp_hndl));

    /************** RTS *******************/
    QP_ATTR_MASK_CLR_ALL(qp_attr_mask);
    qp_attr.qp_state = VAPI_RTS;
    QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_QP_STATE);
    qp_attr.sq_psn = mca_btl_mvapi_component.ib_psn;
    QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_SQ_PSN);
    qp_attr.timeout = mca_btl_mvapi_component.ib_timeout;
    QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_TIMEOUT);
    qp_attr.retry_count = mca_btl_mvapi_component.ib_retry_count;
    QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_RETRY_COUNT);
    qp_attr.rnr_retry = mca_btl_mvapi_component.ib_rnr_retry;
    QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_RNR_RETRY);
    qp_attr.ous_dst_rd_atom = mca_btl_mvapi_component.ib_max_rdma_dst_ops;
    QP_ATTR_MASK_SET(qp_attr_mask, QP_ATTR_OUS_DST_RD_ATOM);

    ret = VAPI_modify_qp(nic, qp_hndl,
            &qp_attr, &qp_attr_mask, &qp_cap);

    if(VAPI_OK != ret) {
        return OMPI_ERROR;
    }
    BTL_VERBOSE(("Modified to RTS..Qp %d", qp_hndl));
    
    ret = VAPI_query_qp(nic, qp_hndl, &qp_attr, &qp_attr_mask, &qp_init_attr );          
    if (ret != VAPI_OK) {                                                   
        BTL_ERROR(("Error modifying the queue pair: %s", VAPI_strerror(ret)));
        return OMPI_ERROR; 
    }                      
    
    mvapi_btl->ib_inline_max = qp_init_attr.cap.max_inline_data_sq;  
    
    return OMPI_SUCCESS;
}
Beispiel #13
0
int mca_btl_mvapi_endpoint_create_qp(
                                  mca_btl_mvapi_module_t* mvapi_btl, 
                                  VAPI_hca_hndl_t nic,
                                  VAPI_pd_hndl_t ptag, 
                                  VAPI_cq_hndl_t cq_hndl, 
#ifdef VAPI_FEATURE_SRQ
                                  VAPI_srq_hndl_t srq_hndl, 
#endif
                                  VAPI_qp_hndl_t* qp_hndl, 
                                  VAPI_qp_prop_t* qp_prop, 
                                  int transport_type)
{
    
    VAPI_ret_t ret;
    VAPI_qp_init_attr_t qp_init_attr;
#ifdef VAPI_FEATURE_SRQ
    VAPI_qp_init_attr_ext_t qp_init_attr_ext;
#endif

    /* worst case number of credit messages could be queued */
    switch(transport_type) {

    case VAPI_TS_RC: /* Set up RC qp parameters */
        qp_init_attr.cap.max_oust_wr_sq = mca_btl_mvapi_component.rd_num + 1;
        qp_init_attr.cap.max_oust_wr_rq = mca_btl_mvapi_component.rd_num + mca_btl_mvapi_component.rd_rsv;
        qp_init_attr.cap.max_sg_size_sq = mca_btl_mvapi_component.ib_sg_list_size;
        qp_init_attr.cap.max_sg_size_rq = mca_btl_mvapi_component.ib_sg_list_size;
        qp_init_attr.pd_hndl            = ptag;
        /* We don't have Reliable Datagram Handle right now */
        qp_init_attr.rdd_hndl           = 0;
        
        /* Signal all work requests on this queue pair */
        qp_init_attr.rq_sig_type        = VAPI_SIGNAL_REQ_WR;
        qp_init_attr.sq_sig_type        = VAPI_SIGNAL_REQ_WR;
        
        /* Use Reliable Connected  transport service */
        qp_init_attr.ts_type            = VAPI_TS_RC;
        
            
        
        /* Set Send and Recv completion queues */
        qp_init_attr.rq_cq_hndl         = cq_hndl;
        qp_init_attr.sq_cq_hndl         = cq_hndl;
         
            break;
        case VAPI_TS_UD: /* Set up UD qp parameters */
        default:
            return OMPI_ERR_NOT_IMPLEMENTED;
    }

#ifdef VAPI_FEATURE_SRQ    
    if(mca_btl_mvapi_component.use_srq) { 
        qp_init_attr_ext.srq_hndl = srq_hndl; 
        
        ret = VAPI_create_qp_ext(nic, 
                                 &qp_init_attr, 
                                 &qp_init_attr_ext,  
                                 qp_hndl, 
                                 qp_prop);
    } else 
#endif
    { 
        ret = VAPI_create_qp(nic, 
                             &qp_init_attr, 
                             qp_hndl, 
                             qp_prop); 
    }
    if(VAPI_OK != ret) {
        BTL_ERROR(("error creating the queue pair: %s", VAPI_strerror(ret))); 
        return OMPI_ERROR;
    }
    return OMPI_SUCCESS;
}
Beispiel #14
0
static inline int mca_btl_mvapi_endpoint_post_send(
    mca_btl_mvapi_module_t* mvapi_btl, 
    mca_btl_mvapi_endpoint_t * endpoint, 
    mca_btl_mvapi_frag_t * frag)
{
    int do_rdma = 0; 
    VAPI_qp_hndl_t qp_hndl; 
    int ret;

    if(frag->base.des_flags & MCA_BTL_DES_FLAGS_PRIORITY &&
            frag->size <= mvapi_btl->super.btl_eager_limit){ 

        /* check for a send wqe */
        if (OPAL_THREAD_ADD32(&endpoint->sd_wqe_hp,-1) < 0) {
            OPAL_THREAD_ADD32(&endpoint->sd_wqe_hp,1);
            opal_list_append(&endpoint->pending_frags_hp, (opal_list_item_t *)frag);
            return OMPI_SUCCESS;
        }
        /* check for rdma tocken */
        if (OPAL_THREAD_ADD32(&endpoint->eager_rdma_remote.tokens,-1) < 0) {
            OPAL_THREAD_ADD32(&endpoint->eager_rdma_remote.tokens,1);
            /* check for a token */
            if(!mca_btl_mvapi_component.use_srq &&
                    OPAL_THREAD_ADD32(&endpoint->sd_tokens_hp,-1) < 0) {
                OPAL_THREAD_ADD32(&endpoint->sd_wqe_hp,1);
                OPAL_THREAD_ADD32(&endpoint->sd_tokens_hp,1);
                opal_list_append(&endpoint->pending_frags_hp,
                        (opal_list_item_t *)frag);
                return OMPI_SUCCESS;

            } else if( mca_btl_mvapi_component.use_srq &&
                   OPAL_THREAD_ADD32(&mvapi_btl->sd_tokens_hp,-1) < 0) {
                OPAL_THREAD_ADD32(&endpoint->sd_wqe_hp,1);
                OPAL_THREAD_ADD32(&mvapi_btl->sd_tokens_hp,1);
                OPAL_THREAD_LOCK(&mvapi_btl->ib_lock);
                opal_list_append(&mvapi_btl->pending_frags_hp, (opal_list_item_t *)frag);
                OPAL_THREAD_UNLOCK(&mvapi_btl->ib_lock);
                return OMPI_SUCCESS;
            }
        } else {
            do_rdma = 1;
        }
        frag->hdr->credits =
            (endpoint->rd_credits_hp > 0) ? endpoint->rd_credits_hp : 0;
        OPAL_THREAD_ADD32(&endpoint->rd_credits_hp, -frag->hdr->credits);
        frag->hdr->rdma_credits = endpoint->eager_rdma_local.credits;
        OPAL_THREAD_ADD32(&endpoint->eager_rdma_local.credits,
                -frag->hdr->rdma_credits);
        qp_hndl = endpoint->lcl_qp_hndl_hp;
    } else {
        /* check for a send wqe */
        if (OPAL_THREAD_ADD32(&endpoint->sd_wqe_lp,-1) < 0) {

            OPAL_THREAD_ADD32(&endpoint->sd_wqe_lp,1);
            opal_list_append(&endpoint->pending_frags_lp, (opal_list_item_t *)frag);
            return OMPI_SUCCESS;

        /* check for a token */
        } else if(!mca_btl_mvapi_component.use_srq &&
            OPAL_THREAD_ADD32(&endpoint->sd_tokens_lp,-1) < 0 ) {

            OPAL_THREAD_ADD32(&endpoint->sd_wqe_lp,1);
            OPAL_THREAD_ADD32(&endpoint->sd_tokens_lp,1);
            opal_list_append(&endpoint->pending_frags_lp, (opal_list_item_t *)frag);
            return OMPI_SUCCESS;

        } else if(mca_btl_mvapi_component.use_srq &&
            OPAL_THREAD_ADD32(&mvapi_btl->sd_tokens_lp,-1) < 0) {

            OPAL_THREAD_ADD32(&endpoint->sd_wqe_lp,1);
            OPAL_THREAD_ADD32(&mvapi_btl->sd_tokens_lp,1);
            OPAL_THREAD_LOCK(&mvapi_btl->ib_lock);
            opal_list_append(&mvapi_btl->pending_frags_lp, (opal_list_item_t *)frag);
            OPAL_THREAD_UNLOCK(&mvapi_btl->ib_lock);
            return OMPI_SUCCESS;

        /* queue the request */
        } else {
            frag->hdr->credits = (endpoint->rd_credits_lp > 0) ? endpoint->rd_credits_lp : 0;
            OPAL_THREAD_ADD32(&endpoint->rd_credits_lp, -frag->hdr->credits);
            qp_hndl = endpoint->lcl_qp_hndl_lp;
        }
    } 
    
    frag->desc.sr_desc.remote_qkey = 0; 
    frag->sg_entry.addr = (VAPI_virt_addr_t) (MT_virt_addr_t) frag->hdr; 
    frag->sg_entry.len =
        frag->segment.seg_len + sizeof(mca_btl_mvapi_header_t) + 
        (do_rdma ? sizeof(mca_btl_mvapi_footer_t) : 0);

    if(do_rdma) {
        mca_btl_mvapi_footer_t* ftr =
            (mca_btl_mvapi_footer_t*)(((char*)frag->segment.seg_addr.pval) +
                                       frag->segment.seg_len);
        frag->desc.sr_desc.opcode = VAPI_RDMA_WRITE;
        MCA_BTL_MVAPI_RDMA_FRAG_SET_SIZE(ftr, frag->sg_entry.len);
        MCA_BTL_MVAPI_RDMA_MAKE_LOCAL(ftr);
#ifdef OMPI_ENABLE_DEBUG
        ftr->seq = endpoint->eager_rdma_remote.seq++;
#endif
        frag->desc.sr_desc.r_key = (VAPI_rkey_t)endpoint->eager_rdma_remote.rkey;
        frag->desc.sr_desc.remote_addr = (VAPI_virt_addr_t)
            endpoint->eager_rdma_remote.base.lval +
            endpoint->eager_rdma_remote.head *
            mvapi_btl->eager_rdma_frag_size +
            sizeof(mca_btl_mvapi_frag_t) +
            sizeof(mca_btl_mvapi_header_t) +
            frag->size +
            sizeof(mca_btl_mvapi_footer_t);
        frag->desc.sr_desc.remote_addr -= frag->sg_entry.len;
        MCA_BTL_MVAPI_RDMA_NEXT_INDEX (endpoint->eager_rdma_remote.head);
    } else {
        frag->desc.sr_desc.opcode = VAPI_SEND;
    }


    if(frag->sg_entry.len <= mvapi_btl->ib_inline_max) { 
        ret = EVAPI_post_inline_sr(mvapi_btl->nic, qp_hndl, &frag->desc.sr_desc); 
    } else { 
        ret = VAPI_post_sr(mvapi_btl->nic, qp_hndl, &frag->desc.sr_desc); 
    }

    if(VAPI_OK != ret) {
        BTL_ERROR(("VAPI_post_sr: %s\n", VAPI_strerror(ret)));
        return OMPI_ERROR; 
    }
#ifdef VAPI_FEATURE_SRQ
    if(mca_btl_mvapi_component.use_srq) { 
        MCA_BTL_MVAPI_POST_SRR_HIGH(mvapi_btl, 1); 
        MCA_BTL_MVAPI_POST_SRR_LOW(mvapi_btl, 1);
    } else
#endif
    {
        MCA_BTL_MVAPI_ENDPOINT_POST_RR_HIGH(endpoint, 1); 
        MCA_BTL_MVAPI_ENDPOINT_POST_RR_LOW(endpoint, 1); 
    }
    return OMPI_SUCCESS; 
}