예제 #1
0
static int scif_reg_mem (void *reg_data, void *base, size_t size,
                         mca_mpool_base_registration_t *reg)
{
    mca_btl_scif_reg_t *scif_reg = (mca_btl_scif_reg_t *)reg;
    int rc = OMPI_SUCCESS;
    unsigned int i;

    scif_reg->registrations = calloc (mca_btl_scif_module.endpoint_count,
                                 sizeof (off_t));
    memset (scif_reg->registrations, -1, mca_btl_scif_module.endpoint_count * sizeof (off_t));

    /* register the pointer with all connected endpoints */
    for (i = 0 ; i < mca_btl_scif_module.endpoint_count ; ++i) {
        if (MCA_BTL_SCIF_EP_STATE_CONNECTED == mca_btl_scif_module.endpoints[i].state) {
            scif_reg->registrations[i] = scif_register(mca_btl_scif_module.endpoints[i].scif_epd,
                                                       base, size, 0, SCIF_PROT_READ |
                                                       SCIF_PROT_WRITE, 0);
            if (SCIF_REGISTER_FAILED == scif_reg->registrations[i]) {
                /* cleanup */
                scif_dereg_mem (reg_data, reg);
                rc = OMPI_ERR_OUT_OF_RESOURCE;
                break;
            }
        }
    }

    return rc;
}
예제 #2
0
static int scif_reg_mem (void *reg_data, void *base, size_t size,
                         mca_mpool_base_registration_t *reg)
{
    mca_btl_scif_reg_t *scif_reg = (mca_btl_scif_reg_t *)reg;
    int rc = OPAL_SUCCESS;
    unsigned int i;

    scif_reg->handles = calloc (mca_btl_scif_module.endpoint_count, sizeof (scif_reg->handles[0]));

    /* intialize all scif offsets to -1 and initialize the pointer back to the mpool registration */
    for (i = 0 ; i < mca_btl_scif_module.endpoint_count ; ++i) {
        scif_reg->handles[i].btl_handle.scif_offset = -1;
        scif_reg->handles[i].btl_handle.scif_base = (intptr_t) base;
        scif_reg->handles[i].reg = scif_reg;
    }

    /* register the pointer with all connected endpoints */
    for (i = 0 ; i < mca_btl_scif_module.endpoint_count ; ++i) {
        if (MCA_BTL_SCIF_EP_STATE_CONNECTED == mca_btl_scif_module.endpoints[i].state) {
            scif_reg->handles[i].btl_handle.scif_offset = scif_register (mca_btl_scif_module.endpoints[i].scif_epd,
                                                                         base, size, 0, SCIF_PROT_READ |
                                                                         SCIF_PROT_WRITE, 0);
            if (SCIF_REGISTER_FAILED == scif_reg->handles[i].btl_handle.scif_offset) {
                /* cleanup */
                scif_dereg_mem (reg_data, reg);
                rc = OPAL_ERR_OUT_OF_RESOURCE;
                break;
            }
        }
    }

    return rc;
}
예제 #3
0
static inline int mca_btl_scif_ep_get_buffer (mca_btl_base_endpoint_t *ep) {
    int rc;

    rc = posix_memalign ((void **) &ep->recv_buffer.buffer, opal_getpagesize(), mca_btl_scif_component.segment_size);
    if (0 > rc) {
        return OPAL_ERR_OUT_OF_RESOURCE;
    }

    memset (ep->recv_buffer.buffer, 0, mca_btl_scif_component.segment_size);

    ep->recv_buffer.scif_offset = scif_register (ep->scif_epd, ep->recv_buffer.buffer,
                                                 mca_btl_scif_component.segment_size, 0,
                                                 SCIF_PROT_READ | SCIF_PROT_WRITE, 0);
    if (SCIF_REGISTER_FAILED == ep->recv_buffer.scif_offset) {
        BTL_VERBOSE(("failed to register a scif buffer of size %d. errno = %d",
                     mca_btl_scif_component.segment_size, errno));
        free (ep->recv_buffer.buffer);
        ep->recv_buffer.buffer = NULL;
        return OPAL_ERROR;
    }

    ep->recv_buffer.startp = (uint32_t *) ep->recv_buffer.buffer;
    ep->recv_buffer.endp   = ep->recv_buffer.startp + 1;

    ep->recv_buffer.startp[0] = ep->recv_buffer.endp[0] = 64;

    BTL_VERBOSE(("allocated buffer of size %d bytes. with scif registration %lu",
                 mca_btl_scif_component.segment_size, (unsigned long) ep->recv_buffer.scif_offset));

    return OPAL_SUCCESS;
}
예제 #4
0
파일: mpxy_in.c 프로젝트: Cai900205/test
int m_pi_create_wr_q(struct mcm_qp *m_qp, int entries)
{
	/* RDMA proxy WR pool, register with SCIF and IB, set pool and segm size with parameters */
	m_qp->wrc.wr_sz = ALIGN_64(sizeof(struct mcm_wr_rx));
	m_qp->wrc.wr_len = m_qp->wrc.wr_sz * entries; /* 64 byte aligned for signal_fence */
	m_qp->wrc.wr_end = entries - 1;
	m_qp->wr_hd_r = 0;
	m_qp->wr_tl_r = 0;
	m_qp->wr_tl_r_wt = 1; /* start at tl+1 */

	if (posix_memalign((void **)&m_qp->wrc.wr_addr, 4096, ALIGN_PAGE(m_qp->wrc.wr_len))) {
		mlog(0, "failed to allocate wr_rbuf, m_qp=%p, wr_len=%d, entries=%d\n",
			m_qp, m_qp->wrc.wr_len, entries);
		return -1;
	}
	memset((void*)m_qp->wrc.wr_addr, 0, ALIGN_PAGE(m_qp->wrc.wr_len));

	mlog(4, " WR rbuf pool %p, LEN req=%d, act=%d\n",
		m_qp->wrc.wr_addr, m_qp->wrc.wr_len, ALIGN_PAGE(m_qp->wrc.wr_len) );

	m_qp->wr_rbuf_mr = ibv_reg_mr(m_qp->smd->md->pd, (void*)m_qp->wrc.wr_addr, m_qp->wrc.wr_len,
				       IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE);

	if (!m_qp->wr_rbuf_mr) {
		mlog(0, " IB_register addr=%p,%d failed %s\n",
		     m_qp->wrc.wr_addr, ALIGN_PAGE(m_qp->wrc.wr_len), strerror(errno));
		return -1;;
	}
	m_qp->wrc.wr_addr = (uint64_t)(uintptr_t)m_qp->wr_rbuf_mr->addr;
	m_qp->wrc.wr_rkey = m_qp->wr_rbuf_mr->rkey;

	mlog(4, " IB_mr for wr_buf addr %p, off 0x%llx, len %d, entries %d, rkey %x lkey %x\n",
		m_qp->wrc.wr_addr, m_qp->wr_rbuf_mr->addr, ALIGN_PAGE(m_qp->wrc.wr_len),
		entries, m_qp->wr_rbuf_mr->rkey, m_qp->wr_rbuf_mr->rkey);

	m_qp->wr_off_r = scif_register(m_qp->smd->scif_tx_ep, (void*)m_qp->wrc.wr_addr, ALIGN_PAGE(m_qp->wrc.wr_len),
				      (off_t)0, SCIF_PROT_READ | SCIF_PROT_WRITE, 0);
	if (m_qp->wr_off_r == (off_t)(-1)) {
		mlog(0, " SCIF_register addr=%p,%d failed %s\n",
		     m_qp->wrc.wr_addr, ALIGN_PAGE(m_qp->wrc.wr_len), strerror(errno));
		return -1;
	}

	mlog(4, " WR rbuf pool %p, LEN req=%d, act=%d\n", m_qp->wr_buf, m_qp->wr_len, ALIGN_PAGE(m_qp->wrc.wr_len));
	mlog(4, " SCIF_mr for wr_rbuf addr %p, off 0x%llx, len %d, entries %d\n",
		m_qp->wrc.wr_addr, m_qp->wr_off_r, ALIGN_PAGE(m_qp->wrc.wr_len), entries);

	return 0;
}
예제 #5
0
static mca_btl_base_registration_handle_t *mca_btl_scif_register_mem (struct mca_btl_base_module_t *btl,
        mca_btl_base_endpoint_t *endpoint,
        void *base, size_t size, uint32_t flags)
{
    mca_btl_scif_reg_t *scif_reg;
    int access_flags = flags & MCA_BTL_REG_FLAG_ACCESS_ANY;
    int rc;

    if (MCA_BTL_ENDPOINT_ANY == endpoint) {
        /* it probably isn't possible to support registering memory to use with any endpoint so
         * return NULL */
        return NULL;
    }

    if (OPAL_LIKELY(MCA_BTL_SCIF_EP_STATE_CONNECTED != endpoint->state)) {
        /* the endpoint needs to be connected before the fragment can be
         * registered. */
        rc = mca_btl_scif_ep_connect (endpoint);
        if (OPAL_LIKELY(MCA_BTL_SCIF_EP_STATE_CONNECTED != endpoint->state)) {
            /* not yet connected */
            return NULL;
        }
    }

    rc = btl->btl_mpool->mpool_register(btl->btl_mpool, base, size, 0, access_flags,
                                        (mca_mpool_base_registration_t **) &scif_reg);
    if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
        return NULL;
    }

    /* register the memory location with this peer if it isn't already */
    if ((off_t) -1 == scif_reg->handles[endpoint->id].btl_handle.scif_offset) {
        size_t seg_size = (size_t)((uintptr_t) scif_reg->base.bound - (uintptr_t) scif_reg->base.base) + 1;

        /* NTH: until we determine a way to pass permissions to the mpool just make all segments
         * read/write */
        scif_reg->handles[endpoint->id].btl_handle.scif_offset =
            scif_register (endpoint->scif_epd, scif_reg->base.base, seg_size, 0, SCIF_PROT_READ |
                           SCIF_PROT_WRITE, 0);
        BTL_VERBOSE(("registered fragment for scif DMA transaction. offset = %lu",
                     (unsigned long) scif_reg->handles[endpoint->id].btl_handle.scif_offset));
    }

    return &scif_reg->handles[endpoint->id].btl_handle;
}
/* Allocate memory on the MIC.
 * Memory is register for remote direct access. */
void _starpu_mic_sink_allocate(const struct _starpu_mp_node *mp_node, void *arg, int arg_size)
{
	STARPU_ASSERT(arg_size == sizeof(size_t));

	void *addr = NULL;
	size_t size = *(size_t *)(arg);
	
	if (posix_memalign(&addr, STARPU_MIC_PAGE_SIZE, size) != 0)
		_starpu_mp_common_send_command(mp_node, STARPU_ERROR_ALLOCATE, NULL, 0);

#ifndef STARPU_DISABLE_ASYNCHRONOUS_MIC_COPY
	scif_epd_t epd = mp_node->host_sink_dt_connection.mic_endpoint;
	size_t window_size = STARPU_MIC_GET_PAGE_SIZE_MULTIPLE(size);

	if (scif_register(epd, addr, window_size, (off_t)addr, SCIF_PROT_READ | SCIF_PROT_WRITE, SCIF_MAP_FIXED) < 0)
	{
		free(addr);
		_starpu_mp_common_send_command(mp_node, STARPU_ERROR_ALLOCATE, NULL, 0);
	}
#endif
	
	_starpu_mp_common_send_command(mp_node, STARPU_ANSWER_ALLOCATE, &addr, sizeof(addr));
}
int main( )
{
	size_t len = 536870912;
	int align = 4096;
	scif_epd_t endpoint;
	struct scif_portID portid;
	int ret;
	
	uint8_t *in_key     = malloc(16 * sizeof(uint8_t));
	struct crypto_tfm *tfm
        	= malloc(
                	 sizeof(struct crypto_tfm) +
                 	 sizeof(struct crypto_aes_ctx)
               		 );
	struct crypto_aes_ctx *ctx
        	= crypto_tfm_ctx(tfm);

	ctx->key_length = AES_KEYSIZE_256;
	crypto_aes_set_key(tfm, in_key, AES_KEYSIZE_256);
	
        endpoint = scif_open( );
	if( endpoint == SCIF_OPEN_FAILED ) 
	{
		printf("scif open failed\n");
		return 1;
        }

	ret = scif_bind(endpoint, 23955);
	if(ret==-1) 
	{
		printf("scif_bind failed");
		return 1;
	}

	portid.node = 0;
	portid.port = 23968;

	ret = scif_connect(endpoint, &portid);
	for( int attempt = 0; ret == -1 && attempt < 10; ++attempt ) 
	{
        	sleep(1);
        	ret = scif_connect(endpoint, &portid);
	}
	if (ret==-1)
	{ 
		printf("scif_connect failed\n");
		return 1;
	}

	void *ptr;
	ret = posix_memalign((void**)&ptr, align, len);	
	if (ret)
	{
		printf("Allocating memory failed\n");
		return 1;

	}
	memset(ptr, 0, len);

	if( SCIF_REGISTER_FAILED == scif_register(endpoint, ptr, len, (long)ptr, SCIF_PROT_READ | SCIF_PROT_WRITE, SCIF_MAP_FIXED ) )
        {
                printf("scif_register of ptr failed due to: %s\n", strerror(errno));
                return 1;
        }

	void *tempbuffer;
	ret = posix_memalign((void**)&tempbuffer, align, len);
	if (ret)
	{
		printf("Allocating tempbuffer failed\n");
		return 1;
	}

       	if( SCIF_REGISTER_FAILED == scif_register(endpoint, tempbuffer, len, (long)tempbuffer, SCIF_PROT_READ | SCIF_PROT_WRITE, SCIF_MAP_FIXED ) )
        {
               	printf("scif_register of temp failed due to: %s\n", strerror(errno));
                return 1;
        }

	void *outbuffer;
	ret = posix_memalign((void**)&outbuffer, align, len);
	if (ret)
	{
		printf("Allocating outbuffer failed %s\n", strerror(errno));
		return 1;
	}

	if( SCIF_REGISTER_FAILED == scif_register(endpoint, outbuffer, len, (long)outbuffer, SCIF_PROT_READ | SCIF_PROT_WRITE, SCIF_MAP_FIXED ) )
        {
                printf("scif_register of outbuffer failed due to: %s\n", strerror(errno));
                return 1;
        }

	void *remote_ptr;
	void *return_ptr;

	ret = scif_recv(endpoint, &remote_ptr, sizeof(void*), SCIF_RECV_BLOCK);
	if (ret==-1)
	{
		printf("scif_recv failed due to: %s\n", strerror(errno));
		return 1;
	}

        ret = scif_recv(endpoint, &return_ptr, sizeof(void*), SCIF_RECV_BLOCK);
        if (ret==-1)
        {
                printf("scif_recv failed due to: %s\n", strerror(errno));
                return 1;
        }
	
	struct timespec start_enc, stop_enc;
	clock_gettime(CLOCK_REALTIME, &start_enc);
	if (scif_readfrom(endpoint, (long)ptr, len, (long)remote_ptr, SCIF_RMA_SYNC))
	{
		printf("scif_readfrom failed due to: %s\n", strerror(errno));
		return 1;
	}

	#pragma omp parallel for 
	for (int k = 0; k<len; k+=16)
		{ aes_encrypt(tfm, (uint8_t*)&tempbuffer[k], (uint8_t*)&ptr[k]); }

	if (scif_writeto(endpoint, (long)tempbuffer, len, (long)return_ptr, SCIF_RMA_SYNC))
        {
		printf("scif_writeto failed due to: %s\n", strerror(errno));
		return 1;
	}					

	clock_gettime(CLOCK_REALTIME, &stop_enc);
	double time_enc = (stop_enc.tv_sec - start_enc.tv_sec) + ( stop_enc.tv_nsec - start_enc.tv_nsec) / NANOSECONDS;
	double result0 = len/time_enc/1048576;
       	printf("%1f,", result0);

        struct timespec start_for, stop_for;
        clock_gettime(CLOCK_REALTIME, &start_for);
        if (scif_readfrom(endpoint, (long)ptr, len, (long)remote_ptr, SCIF_RMA_SYNC))
        {
	        printf("scif_readfrom failed due to: %s\n", strerror(errno));
		return 1;
	}

	#pragma omp parallel for
        for (int k=0; k<len; k+=16)
    		{ aes_decrypt(tfm, (uint8_t*)&outbuffer[k], (uint8_t*)&tempbuffer[k]); }

        if (scif_writeto(endpoint, (long)outbuffer, len, (long)return_ptr, SCIF_RMA_SYNC))
    	{
        	printf("scif_writeto failed due to: %s\n", strerror(errno));
                return 1;
        }

	clock_gettime(CLOCK_REALTIME, &stop_for);
        double time_for = (stop_for.tv_sec - start_for.tv_sec) + ( stop_for.tv_nsec - start_for.tv_nsec) / NANOSECONDS;
        double result = 536870912/time_for/1048576;
        printf("%1f \n", result);

	ret = scif_send(endpoint, &ptr, sizeof(long), SCIF_SEND_BLOCK);
	if (ret==-1)
	{
		printf("scif_send failed due to: %s\n", strerror(errno));
		return 1;
	}

        ret = scif_unregister(endpoint, (off_t)ptr, len );
        if(ret==-1 && errno!=ENOTCONN )
        {
                printf("scif_unregister failed %s\n", strerror(errno));
                return 1;
        }

	scif_close(endpoint);	

}