static int scif_reg_mem (void *reg_data, void *base, size_t size, mca_mpool_base_registration_t *reg) { mca_btl_scif_reg_t *scif_reg = (mca_btl_scif_reg_t *)reg; int rc = OMPI_SUCCESS; unsigned int i; scif_reg->registrations = calloc (mca_btl_scif_module.endpoint_count, sizeof (off_t)); memset (scif_reg->registrations, -1, mca_btl_scif_module.endpoint_count * sizeof (off_t)); /* register the pointer with all connected endpoints */ for (i = 0 ; i < mca_btl_scif_module.endpoint_count ; ++i) { if (MCA_BTL_SCIF_EP_STATE_CONNECTED == mca_btl_scif_module.endpoints[i].state) { scif_reg->registrations[i] = scif_register(mca_btl_scif_module.endpoints[i].scif_epd, base, size, 0, SCIF_PROT_READ | SCIF_PROT_WRITE, 0); if (SCIF_REGISTER_FAILED == scif_reg->registrations[i]) { /* cleanup */ scif_dereg_mem (reg_data, reg); rc = OMPI_ERR_OUT_OF_RESOURCE; break; } } } return rc; }
static int scif_reg_mem (void *reg_data, void *base, size_t size, mca_mpool_base_registration_t *reg) { mca_btl_scif_reg_t *scif_reg = (mca_btl_scif_reg_t *)reg; int rc = OPAL_SUCCESS; unsigned int i; scif_reg->handles = calloc (mca_btl_scif_module.endpoint_count, sizeof (scif_reg->handles[0])); /* intialize all scif offsets to -1 and initialize the pointer back to the mpool registration */ for (i = 0 ; i < mca_btl_scif_module.endpoint_count ; ++i) { scif_reg->handles[i].btl_handle.scif_offset = -1; scif_reg->handles[i].btl_handle.scif_base = (intptr_t) base; scif_reg->handles[i].reg = scif_reg; } /* register the pointer with all connected endpoints */ for (i = 0 ; i < mca_btl_scif_module.endpoint_count ; ++i) { if (MCA_BTL_SCIF_EP_STATE_CONNECTED == mca_btl_scif_module.endpoints[i].state) { scif_reg->handles[i].btl_handle.scif_offset = scif_register (mca_btl_scif_module.endpoints[i].scif_epd, base, size, 0, SCIF_PROT_READ | SCIF_PROT_WRITE, 0); if (SCIF_REGISTER_FAILED == scif_reg->handles[i].btl_handle.scif_offset) { /* cleanup */ scif_dereg_mem (reg_data, reg); rc = OPAL_ERR_OUT_OF_RESOURCE; break; } } } return rc; }
static inline int mca_btl_scif_ep_get_buffer (mca_btl_base_endpoint_t *ep) { int rc; rc = posix_memalign ((void **) &ep->recv_buffer.buffer, opal_getpagesize(), mca_btl_scif_component.segment_size); if (0 > rc) { return OPAL_ERR_OUT_OF_RESOURCE; } memset (ep->recv_buffer.buffer, 0, mca_btl_scif_component.segment_size); ep->recv_buffer.scif_offset = scif_register (ep->scif_epd, ep->recv_buffer.buffer, mca_btl_scif_component.segment_size, 0, SCIF_PROT_READ | SCIF_PROT_WRITE, 0); if (SCIF_REGISTER_FAILED == ep->recv_buffer.scif_offset) { BTL_VERBOSE(("failed to register a scif buffer of size %d. errno = %d", mca_btl_scif_component.segment_size, errno)); free (ep->recv_buffer.buffer); ep->recv_buffer.buffer = NULL; return OPAL_ERROR; } ep->recv_buffer.startp = (uint32_t *) ep->recv_buffer.buffer; ep->recv_buffer.endp = ep->recv_buffer.startp + 1; ep->recv_buffer.startp[0] = ep->recv_buffer.endp[0] = 64; BTL_VERBOSE(("allocated buffer of size %d bytes. with scif registration %lu", mca_btl_scif_component.segment_size, (unsigned long) ep->recv_buffer.scif_offset)); return OPAL_SUCCESS; }
int m_pi_create_wr_q(struct mcm_qp *m_qp, int entries) { /* RDMA proxy WR pool, register with SCIF and IB, set pool and segm size with parameters */ m_qp->wrc.wr_sz = ALIGN_64(sizeof(struct mcm_wr_rx)); m_qp->wrc.wr_len = m_qp->wrc.wr_sz * entries; /* 64 byte aligned for signal_fence */ m_qp->wrc.wr_end = entries - 1; m_qp->wr_hd_r = 0; m_qp->wr_tl_r = 0; m_qp->wr_tl_r_wt = 1; /* start at tl+1 */ if (posix_memalign((void **)&m_qp->wrc.wr_addr, 4096, ALIGN_PAGE(m_qp->wrc.wr_len))) { mlog(0, "failed to allocate wr_rbuf, m_qp=%p, wr_len=%d, entries=%d\n", m_qp, m_qp->wrc.wr_len, entries); return -1; } memset((void*)m_qp->wrc.wr_addr, 0, ALIGN_PAGE(m_qp->wrc.wr_len)); mlog(4, " WR rbuf pool %p, LEN req=%d, act=%d\n", m_qp->wrc.wr_addr, m_qp->wrc.wr_len, ALIGN_PAGE(m_qp->wrc.wr_len) ); m_qp->wr_rbuf_mr = ibv_reg_mr(m_qp->smd->md->pd, (void*)m_qp->wrc.wr_addr, m_qp->wrc.wr_len, IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); if (!m_qp->wr_rbuf_mr) { mlog(0, " IB_register addr=%p,%d failed %s\n", m_qp->wrc.wr_addr, ALIGN_PAGE(m_qp->wrc.wr_len), strerror(errno)); return -1;; } m_qp->wrc.wr_addr = (uint64_t)(uintptr_t)m_qp->wr_rbuf_mr->addr; m_qp->wrc.wr_rkey = m_qp->wr_rbuf_mr->rkey; mlog(4, " IB_mr for wr_buf addr %p, off 0x%llx, len %d, entries %d, rkey %x lkey %x\n", m_qp->wrc.wr_addr, m_qp->wr_rbuf_mr->addr, ALIGN_PAGE(m_qp->wrc.wr_len), entries, m_qp->wr_rbuf_mr->rkey, m_qp->wr_rbuf_mr->rkey); m_qp->wr_off_r = scif_register(m_qp->smd->scif_tx_ep, (void*)m_qp->wrc.wr_addr, ALIGN_PAGE(m_qp->wrc.wr_len), (off_t)0, SCIF_PROT_READ | SCIF_PROT_WRITE, 0); if (m_qp->wr_off_r == (off_t)(-1)) { mlog(0, " SCIF_register addr=%p,%d failed %s\n", m_qp->wrc.wr_addr, ALIGN_PAGE(m_qp->wrc.wr_len), strerror(errno)); return -1; } mlog(4, " WR rbuf pool %p, LEN req=%d, act=%d\n", m_qp->wr_buf, m_qp->wr_len, ALIGN_PAGE(m_qp->wrc.wr_len)); mlog(4, " SCIF_mr for wr_rbuf addr %p, off 0x%llx, len %d, entries %d\n", m_qp->wrc.wr_addr, m_qp->wr_off_r, ALIGN_PAGE(m_qp->wrc.wr_len), entries); return 0; }
static mca_btl_base_registration_handle_t *mca_btl_scif_register_mem (struct mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *base, size_t size, uint32_t flags) { mca_btl_scif_reg_t *scif_reg; int access_flags = flags & MCA_BTL_REG_FLAG_ACCESS_ANY; int rc; if (MCA_BTL_ENDPOINT_ANY == endpoint) { /* it probably isn't possible to support registering memory to use with any endpoint so * return NULL */ return NULL; } if (OPAL_LIKELY(MCA_BTL_SCIF_EP_STATE_CONNECTED != endpoint->state)) { /* the endpoint needs to be connected before the fragment can be * registered. */ rc = mca_btl_scif_ep_connect (endpoint); if (OPAL_LIKELY(MCA_BTL_SCIF_EP_STATE_CONNECTED != endpoint->state)) { /* not yet connected */ return NULL; } } rc = btl->btl_mpool->mpool_register(btl->btl_mpool, base, size, 0, access_flags, (mca_mpool_base_registration_t **) &scif_reg); if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { return NULL; } /* register the memory location with this peer if it isn't already */ if ((off_t) -1 == scif_reg->handles[endpoint->id].btl_handle.scif_offset) { size_t seg_size = (size_t)((uintptr_t) scif_reg->base.bound - (uintptr_t) scif_reg->base.base) + 1; /* NTH: until we determine a way to pass permissions to the mpool just make all segments * read/write */ scif_reg->handles[endpoint->id].btl_handle.scif_offset = scif_register (endpoint->scif_epd, scif_reg->base.base, seg_size, 0, SCIF_PROT_READ | SCIF_PROT_WRITE, 0); BTL_VERBOSE(("registered fragment for scif DMA transaction. offset = %lu", (unsigned long) scif_reg->handles[endpoint->id].btl_handle.scif_offset)); } return &scif_reg->handles[endpoint->id].btl_handle; }
/* Allocate memory on the MIC. * Memory is register for remote direct access. */ void _starpu_mic_sink_allocate(const struct _starpu_mp_node *mp_node, void *arg, int arg_size) { STARPU_ASSERT(arg_size == sizeof(size_t)); void *addr = NULL; size_t size = *(size_t *)(arg); if (posix_memalign(&addr, STARPU_MIC_PAGE_SIZE, size) != 0) _starpu_mp_common_send_command(mp_node, STARPU_ERROR_ALLOCATE, NULL, 0); #ifndef STARPU_DISABLE_ASYNCHRONOUS_MIC_COPY scif_epd_t epd = mp_node->host_sink_dt_connection.mic_endpoint; size_t window_size = STARPU_MIC_GET_PAGE_SIZE_MULTIPLE(size); if (scif_register(epd, addr, window_size, (off_t)addr, SCIF_PROT_READ | SCIF_PROT_WRITE, SCIF_MAP_FIXED) < 0) { free(addr); _starpu_mp_common_send_command(mp_node, STARPU_ERROR_ALLOCATE, NULL, 0); } #endif _starpu_mp_common_send_command(mp_node, STARPU_ANSWER_ALLOCATE, &addr, sizeof(addr)); }
int main( ) { size_t len = 536870912; int align = 4096; scif_epd_t endpoint; struct scif_portID portid; int ret; uint8_t *in_key = malloc(16 * sizeof(uint8_t)); struct crypto_tfm *tfm = malloc( sizeof(struct crypto_tfm) + sizeof(struct crypto_aes_ctx) ); struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); ctx->key_length = AES_KEYSIZE_256; crypto_aes_set_key(tfm, in_key, AES_KEYSIZE_256); endpoint = scif_open( ); if( endpoint == SCIF_OPEN_FAILED ) { printf("scif open failed\n"); return 1; } ret = scif_bind(endpoint, 23955); if(ret==-1) { printf("scif_bind failed"); return 1; } portid.node = 0; portid.port = 23968; ret = scif_connect(endpoint, &portid); for( int attempt = 0; ret == -1 && attempt < 10; ++attempt ) { sleep(1); ret = scif_connect(endpoint, &portid); } if (ret==-1) { printf("scif_connect failed\n"); return 1; } void *ptr; ret = posix_memalign((void**)&ptr, align, len); if (ret) { printf("Allocating memory failed\n"); return 1; } memset(ptr, 0, len); if( SCIF_REGISTER_FAILED == scif_register(endpoint, ptr, len, (long)ptr, SCIF_PROT_READ | SCIF_PROT_WRITE, SCIF_MAP_FIXED ) ) { printf("scif_register of ptr failed due to: %s\n", strerror(errno)); return 1; } void *tempbuffer; ret = posix_memalign((void**)&tempbuffer, align, len); if (ret) { printf("Allocating tempbuffer failed\n"); return 1; } if( SCIF_REGISTER_FAILED == scif_register(endpoint, tempbuffer, len, (long)tempbuffer, SCIF_PROT_READ | SCIF_PROT_WRITE, SCIF_MAP_FIXED ) ) { printf("scif_register of temp failed due to: %s\n", strerror(errno)); return 1; } void *outbuffer; ret = posix_memalign((void**)&outbuffer, align, len); if (ret) { printf("Allocating outbuffer failed %s\n", strerror(errno)); return 1; } if( SCIF_REGISTER_FAILED == scif_register(endpoint, outbuffer, len, (long)outbuffer, SCIF_PROT_READ | SCIF_PROT_WRITE, SCIF_MAP_FIXED ) ) { printf("scif_register of outbuffer failed due to: %s\n", strerror(errno)); return 1; } void *remote_ptr; void *return_ptr; ret = scif_recv(endpoint, &remote_ptr, sizeof(void*), SCIF_RECV_BLOCK); if (ret==-1) { printf("scif_recv failed due to: %s\n", strerror(errno)); return 1; } ret = scif_recv(endpoint, &return_ptr, sizeof(void*), SCIF_RECV_BLOCK); if (ret==-1) { printf("scif_recv failed due to: %s\n", strerror(errno)); return 1; } struct timespec start_enc, stop_enc; clock_gettime(CLOCK_REALTIME, &start_enc); if (scif_readfrom(endpoint, (long)ptr, len, (long)remote_ptr, SCIF_RMA_SYNC)) { printf("scif_readfrom failed due to: %s\n", strerror(errno)); return 1; } #pragma omp parallel for for (int k = 0; k<len; k+=16) { aes_encrypt(tfm, (uint8_t*)&tempbuffer[k], (uint8_t*)&ptr[k]); } if (scif_writeto(endpoint, (long)tempbuffer, len, (long)return_ptr, SCIF_RMA_SYNC)) { printf("scif_writeto failed due to: %s\n", strerror(errno)); return 1; } clock_gettime(CLOCK_REALTIME, &stop_enc); double time_enc = (stop_enc.tv_sec - start_enc.tv_sec) + ( stop_enc.tv_nsec - start_enc.tv_nsec) / NANOSECONDS; double result0 = len/time_enc/1048576; printf("%1f,", result0); struct timespec start_for, stop_for; clock_gettime(CLOCK_REALTIME, &start_for); if (scif_readfrom(endpoint, (long)ptr, len, (long)remote_ptr, SCIF_RMA_SYNC)) { printf("scif_readfrom failed due to: %s\n", strerror(errno)); return 1; } #pragma omp parallel for for (int k=0; k<len; k+=16) { aes_decrypt(tfm, (uint8_t*)&outbuffer[k], (uint8_t*)&tempbuffer[k]); } if (scif_writeto(endpoint, (long)outbuffer, len, (long)return_ptr, SCIF_RMA_SYNC)) { printf("scif_writeto failed due to: %s\n", strerror(errno)); return 1; } clock_gettime(CLOCK_REALTIME, &stop_for); double time_for = (stop_for.tv_sec - start_for.tv_sec) + ( stop_for.tv_nsec - start_for.tv_nsec) / NANOSECONDS; double result = 536870912/time_for/1048576; printf("%1f \n", result); ret = scif_send(endpoint, &ptr, sizeof(long), SCIF_SEND_BLOCK); if (ret==-1) { printf("scif_send failed due to: %s\n", strerror(errno)); return 1; } ret = scif_unregister(endpoint, (off_t)ptr, len ); if(ret==-1 && errno!=ENOTCONN ) { printf("scif_unregister failed %s\n", strerror(errno)); return 1; } scif_close(endpoint); }