/** * Initiate a get operation. * * @param btl (IN) BTL module * @param endpoint (IN) BTL addressing information * @param descriptor (IN) Description of the data to be transferred */ int mca_btl_scif_get (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, struct mca_btl_base_descriptor_t *des) { mca_btl_scif_segment_t *src = (mca_btl_scif_segment_t *) des->des_src; mca_btl_scif_segment_t *dst = (mca_btl_scif_segment_t *) des->des_dst; size_t len = lmin (src->base.seg_len, dst->base.seg_len); int rc, mark, flags = 0; off_t roffset, loffset; size_t to_get; #if defined(SCIF_TIMING) struct timespec ts; clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts); mca_btl_scif_component.get_count++; #endif BTL_VERBOSE(("Using DMA Get for frag %p from offset %lu", (void *) des, (unsigned long) src->scif_offset)); roffset = src->scif_offset + (off_t)(src->orig_ptr - src->base.seg_addr.lval); loffset = dst->scif_offset + (off_t)(dst->orig_ptr - dst->base.seg_addr.lval); if (mca_btl_scif_component.rma_use_cpu) { flags = SCIF_RMA_USECPU; } if (mca_btl_scif_component.rma_sync) { flags |= SCIF_RMA_SYNC; } /* start the read */ rc = scif_readfrom (endpoint->scif_epd, loffset, len, roffset, flags); if (OPAL_UNLIKELY(-1 == rc)) { return OMPI_ERROR; } /* always call the callback function */ des->des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK; if (!(flags & SCIF_RMA_SYNC)) { /* according to the scif documentation is is better to use a fence rather * than using the SCIF_RMA_SYNC flag with scif_readfrom */ scif_fence_mark (endpoint->scif_epd, SCIF_FENCE_INIT_SELF, &mark); scif_fence_wait (endpoint->scif_epd, mark); } #if defined(SCIF_TIMING) SCIF_UPDATE_TIMER(mca_btl_scif_component.get_time, mca_btl_scif_component.get_time_max, ts); #endif /* since we completed the fence the RMA operation is complete */ mca_btl_scif_frag_complete ((mca_btl_scif_base_frag_t *) des, OMPI_SUCCESS); return OMPI_SUCCESS; }
/** * Initiate a get operation. */ int mca_btl_scif_get (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle, mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata) { int rc, mark, scif_flags = 0; off_t roffset, loffset; #if defined(SCIF_TIMING) struct timespec ts; clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts); mca_btl_scif_component.get_count++; #endif BTL_VERBOSE(("Using DMA Get from remote address %" PRIx64 " to local address %p", remote_address, local_address)); roffset = remote_handle->scif_offset + (off_t)(remote_address - remote_handle->scif_base); loffset = local_handle->scif_offset + (off_t)((intptr_t)local_address - local_handle->scif_base); if (mca_btl_scif_component.rma_use_cpu) { scif_flags = SCIF_RMA_USECPU; } if (mca_btl_scif_component.rma_sync) { scif_flags |= SCIF_RMA_SYNC; } /* start the read */ rc = scif_readfrom (endpoint->scif_epd, loffset, size, roffset, scif_flags); if (OPAL_UNLIKELY(-1 == rc)) { return OPAL_ERROR; } if (!(scif_flags & SCIF_RMA_SYNC)) { /* according to the scif documentation is is better to use a fence rather * than using the SCIF_RMA_SYNC flag with scif_readfrom */ scif_fence_mark (endpoint->scif_epd, SCIF_FENCE_INIT_SELF, &mark); scif_fence_wait (endpoint->scif_epd, mark); } #if defined(SCIF_TIMING) SCIF_UPDATE_TIMER(mca_btl_scif_component.get_time, mca_btl_scif_component.get_time_max, ts); #endif /* always call the callback function */ cbfunc (btl, endpoint, local_address, local_handle, cbcontext, cbdata, OPAL_SUCCESS); return OPAL_SUCCESS; }
int main( ) { size_t len = 536870912; int align = 4096; scif_epd_t endpoint; struct scif_portID portid; int ret; uint8_t *in_key = malloc(16 * sizeof(uint8_t)); struct crypto_tfm *tfm = malloc( sizeof(struct crypto_tfm) + sizeof(struct crypto_aes_ctx) ); struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); ctx->key_length = AES_KEYSIZE_256; crypto_aes_set_key(tfm, in_key, AES_KEYSIZE_256); endpoint = scif_open( ); if( endpoint == SCIF_OPEN_FAILED ) { printf("scif open failed\n"); return 1; } ret = scif_bind(endpoint, 23955); if(ret==-1) { printf("scif_bind failed"); return 1; } portid.node = 0; portid.port = 23968; ret = scif_connect(endpoint, &portid); for( int attempt = 0; ret == -1 && attempt < 10; ++attempt ) { sleep(1); ret = scif_connect(endpoint, &portid); } if (ret==-1) { printf("scif_connect failed\n"); return 1; } void *ptr; ret = posix_memalign((void**)&ptr, align, len); if (ret) { printf("Allocating memory failed\n"); return 1; } memset(ptr, 0, len); if( SCIF_REGISTER_FAILED == scif_register(endpoint, ptr, len, (long)ptr, SCIF_PROT_READ | SCIF_PROT_WRITE, SCIF_MAP_FIXED ) ) { printf("scif_register of ptr failed due to: %s\n", strerror(errno)); return 1; } void *tempbuffer; ret = posix_memalign((void**)&tempbuffer, align, len); if (ret) { printf("Allocating tempbuffer failed\n"); return 1; } if( SCIF_REGISTER_FAILED == scif_register(endpoint, tempbuffer, len, (long)tempbuffer, SCIF_PROT_READ | SCIF_PROT_WRITE, SCIF_MAP_FIXED ) ) { printf("scif_register of temp failed due to: %s\n", strerror(errno)); return 1; } void *outbuffer; ret = posix_memalign((void**)&outbuffer, align, len); if (ret) { printf("Allocating outbuffer failed %s\n", strerror(errno)); return 1; } if( SCIF_REGISTER_FAILED == scif_register(endpoint, outbuffer, len, (long)outbuffer, SCIF_PROT_READ | SCIF_PROT_WRITE, SCIF_MAP_FIXED ) ) { printf("scif_register of outbuffer failed due to: %s\n", strerror(errno)); return 1; } void *remote_ptr; void *return_ptr; ret = scif_recv(endpoint, &remote_ptr, sizeof(void*), SCIF_RECV_BLOCK); if (ret==-1) { printf("scif_recv failed due to: %s\n", strerror(errno)); return 1; } ret = scif_recv(endpoint, &return_ptr, sizeof(void*), SCIF_RECV_BLOCK); if (ret==-1) { printf("scif_recv failed due to: %s\n", strerror(errno)); return 1; } struct timespec start_enc, stop_enc; clock_gettime(CLOCK_REALTIME, &start_enc); if (scif_readfrom(endpoint, (long)ptr, len, (long)remote_ptr, SCIF_RMA_SYNC)) { printf("scif_readfrom failed due to: %s\n", strerror(errno)); return 1; } #pragma omp parallel for for (int k = 0; k<len; k+=16) { aes_encrypt(tfm, (uint8_t*)&tempbuffer[k], (uint8_t*)&ptr[k]); } if (scif_writeto(endpoint, (long)tempbuffer, len, (long)return_ptr, SCIF_RMA_SYNC)) { printf("scif_writeto failed due to: %s\n", strerror(errno)); return 1; } clock_gettime(CLOCK_REALTIME, &stop_enc); double time_enc = (stop_enc.tv_sec - start_enc.tv_sec) + ( stop_enc.tv_nsec - start_enc.tv_nsec) / NANOSECONDS; double result0 = len/time_enc/1048576; printf("%1f,", result0); struct timespec start_for, stop_for; clock_gettime(CLOCK_REALTIME, &start_for); if (scif_readfrom(endpoint, (long)ptr, len, (long)remote_ptr, SCIF_RMA_SYNC)) { printf("scif_readfrom failed due to: %s\n", strerror(errno)); return 1; } #pragma omp parallel for for (int k=0; k<len; k+=16) { aes_decrypt(tfm, (uint8_t*)&outbuffer[k], (uint8_t*)&tempbuffer[k]); } if (scif_writeto(endpoint, (long)outbuffer, len, (long)return_ptr, SCIF_RMA_SYNC)) { printf("scif_writeto failed due to: %s\n", strerror(errno)); return 1; } clock_gettime(CLOCK_REALTIME, &stop_for); double time_for = (stop_for.tv_sec - start_for.tv_sec) + ( stop_for.tv_nsec - start_for.tv_nsec) / NANOSECONDS; double result = 536870912/time_for/1048576; printf("%1f \n", result); ret = scif_send(endpoint, &ptr, sizeof(long), SCIF_SEND_BLOCK); if (ret==-1) { printf("scif_send failed due to: %s\n", strerror(errno)); return 1; } ret = scif_unregister(endpoint, (off_t)ptr, len ); if(ret==-1 && errno!=ENOTCONN ) { printf("scif_unregister failed %s\n", strerror(errno)); return 1; } scif_close(endpoint); }