static void _XMP_reflect_start(_XMP_array_t *a, int *lwidth, int *uwidth, int *is_periodic, int tag) { _XMP_TSTART(t1); xmp_barrier(); for (int i = 0; i < a->dim; i++){ _XMP_reflect_sched_t *reflect = a->info[i].reflect_sched; _XMP_TSTART(t0); // for lower reflect if (lwidth[i] && reflect->hi_rank != -1){ for (int j = 0; j < reflect->count; j++){ FJMPI_Rdma_put(reflect->hi_rank, tag, (uint64_t)reflect->lo_recv_array + j * reflect->stride, (uint64_t)reflect->lo_send_array + j * reflect->stride, lwidth[i] * reflect->blocklength, FJMPI_RDMA_LOCAL_NIC0 | FJMPI_RDMA_REMOTE_NIC2); } } // for upper reflect if (uwidth[i] && reflect->lo_rank != -1){ for (int j = 0; j < reflect->count; j++){ FJMPI_Rdma_put(reflect->lo_rank, tag, (uint64_t)reflect->hi_recv_array + j * reflect->stride, (uint64_t)reflect->hi_send_array + j * reflect->stride, uwidth[i] * reflect->blocklength, FJMPI_RDMA_LOCAL_NIC1 | FJMPI_RDMA_REMOTE_NIC3); } } _XMP_TEND(xmptiming_.tdim_comm[i], t0); } _XMP_TEND(xmptiming_.t_comm, t1); }
void _XMP_fjrdma_atomic_define(int target_rank, _XMP_coarray_t *dst_desc, size_t dst_offset, int value, _XMP_coarray_t *src_desc, size_t src_offset, size_t elmt_size) { uint64_t raddr = (uint64_t)dst_desc->addr[target_rank] + elmt_size * dst_offset; uint64_t laddr; if(src_desc == NULL) laddr = FJMPI_Rdma_reg_mem(_XMP_TEMP_MEMID, &value, elmt_size); else laddr = src_desc->laddr + elmt_size * src_offset; FJMPI_Rdma_put(target_rank, _XMP_FJRDMA_TAG, raddr, laddr, elmt_size, _XMP_COARRAY_FLAG_NIC); _XMP_add_num_of_puts(); _XMP_fjrdma_sync_memory_put(); // ensure to complete the above put operation. if(src_desc == NULL) FJMPI_Rdma_dereg_mem(_XMP_TEMP_MEMID); }