int ARMCIX_Finalize () { DCMF_CriticalSection_enter(0); DCMF_Messager_finalize (); DCMF_CriticalSection_exit(0); return 0; }
/** * \brief ARMCI Extension non-blocking put operation. * * \param[in] src Source buffer on the local node * \param[in] dst Destination buffer on the remote node * \param[in] bytes Number of bytes to transfer * \param[in] proc Remote node rank * \param[in] nb_handle ARMCI non-blocking handle * * \return ??? */ int ARMCIX_NbPut (void * src, void * dst, int bytes, int proc, armci_ihdl_t nb_handle) { DCMF_CriticalSection_enter (0); armcix_dcmf_opaque_t * dcmf = (armcix_dcmf_opaque_t *) &nb_handle->cmpl_info; dcmf->active = 1; dcmf->connection = &__connection[proc]; __connection[proc].active++; __global_connection.active++; DCMF_Callback_t cb_free = { ARMCIX_DCMF_NbOp_cb_done, nb_handle }; ARMCIX_DCMF_Request_t * new_request = ARMCIX_DCMF_request_allocate (cb_free); DCMF_Callback_t cb_done = { (void(*)(void *)) ARMCIX_DCMF_request_free, new_request }; DCMF_Memregion_t * src_memregion = &__connection[proc].local_mem_region; DCMF_Memregion_t * dst_memregion = &__connection[proc].remote_mem_region; DCMF_Result result = DCMF_Put (&__put_protocol, &(new_request->request), cb_done, DCMF_SEQUENTIAL_CONSISTENCY, proc, bytes, src_memregion, dst_memregion, armcix_dcmf_va_to_offset (src_memregion, src), armcix_dcmf_va_to_offset (dst_memregion, dst)); DCMF_CriticalSection_exit (0); return (result != DCMF_SUCCESS); }
/** * \brief ARMCI Extension blocking put operation. * * \param[in] src Source buffer on the local node * \param[in] dst Destination buffer on the remote node * \param[in] bytes Number of bytes to transfer * \param[in] proc Remote node rank * * \return ??? */ int ARMCIX_Put( void * src, void * dst, int bytes, int proc) { DCMF_CriticalSection_enter (0); volatile unsigned active = 1; DCMF_Callback_t cb_wait = { ARMCIX_DCMF_cb_decrement, (void *)&active }; DCMF_Request_t request; DCMF_Memregion_t * src_memregion = &__connection[proc].local_mem_region; DCMF_Memregion_t * dst_memregion = &__connection[proc].remote_mem_region; DCMF_Result result = DCMF_Put (&__put_protocol, &request, cb_wait, DCMF_SEQUENTIAL_CONSISTENCY, proc, bytes, src_memregion, dst_memregion, armcix_dcmf_va_to_offset (src_memregion, src), armcix_dcmf_va_to_offset (dst_memregion, dst)); #ifdef BLOCKING_OPERATIONS_REQUIRE_FENCE ARMCIX_Fence (proc); #else while (active) DCMF_Messager_advance (); #endif DCMF_CriticalSection_exit (0); return (result != DCMF_SUCCESS); }
/** * \brief ARMCI Extension non-blocking strided put operation. * * \param[in] src_ptr pointer to 1st segment at source * \param[in] src_stride_arr array of strides at source * \param[in] dst_ptr pointer to 1st segment at destination * \param[in] dst_stride_arr array of strides at destination * \param[in] seg_count number of segments at each stride levels: count[0]=bytes * \param[in] stride_levels number of stride levels * \param[in] proc remote process(or) ID * \param[in] nb_handle ARMCI non-blocking handle * * \return ??? */ int ARMCIX_NbPutS (void * src_ptr, int * src_stride_arr, void * dst_ptr, int * dst_stride_arr, int * seg_count, int stride_levels, int proc, armci_ihdl_t nb_handle) { DCMF_CriticalSection_enter (0); // Calculate the number of requests unsigned i; unsigned n = 1; for (i = 0; i < stride_levels; i++) n = n * seg_count[i+1]; armcix_dcmf_opaque_t * dcmf = (armcix_dcmf_opaque_t *) &nb_handle->cmpl_info; dcmf->connection = &__connection[proc]; dcmf->active = n; __connection[proc].active += n; __global_connection.active += n; unsigned count; count = ARMCIX_DCMF_PutS_recurse (src_ptr, src_stride_arr, dst_ptr, dst_stride_arr, seg_count, stride_levels, proc, nb_handle); //fprintf (stderr, "ARMCIX_NbPutS() -- n=%d == count=%d\n", n, count); assert (n == count); DCMF_CriticalSection_exit (0); return 0; }
/** * \brief Point-to-point fence operation. * * Blocks until all active messages between the local node and the remote * node have completed and acknowledged by the remote node. * * \param[in] proc Rank of the remote node to fence * * \see ARMCIX_AllFence * \see ARMCIX_DCMF_ReceiveFenceRequest * \see ARMCIX_DCMF_ReceiveFenceAck */ void ARMCIX_Fence (int proc) { DCMF_CriticalSection_enter (0); DCMF_Request_t request; volatile unsigned active = 1; DCQuad quad; DCMF_Callback_t * cb = (DCMF_Callback_t *) &quad; cb->function = ARMCIX_DCMF_cb_decrement; cb->clientdata = (void *) &active; DCMF_Send ( &__fence_rts_protocol, &request, (DCMF_Callback_t) { NULL, NULL }, DCMF_SEQUENTIAL_CONSISTENCY, proc, 0, NULL, (DCQuad *) &quad, 1); while (active) DCMF_Messager_advance (); DCMF_CriticalSection_exit (0); }
/** * \brief DCMF ARMCI Extension blocking wait operation for all requests to all processes * * This function invokes DCMF_Messager_advance() until all operations to all * processes complete and the associated callbacks are invoked to * decrement the global active count. * * \todo define return values * \return 0 * * \see ARMCIX_DCMF_Connection_t * \see __global_connection */ int ARMCIX_WaitAll () { DCMF_CriticalSection_enter (0); while (__global_connection.active) DCMF_Messager_advance(); DCMF_CriticalSection_exit (0); return 0; }
/** * \brief DCMF ARMCI Extension blocking wait operation for all requests to a specific process * * This function invokes DCMF_Messager_advance() until all operations to the * specified process complete and the associated callbacks are invoked and * decrements the active count. * * \param[in] proc Remote process rank * * \todo define return values * \return 0 * * \see ARMCIX_DCMF_Connection_t * \see __connection */ int ARMCIX_WaitProc (int proc) { DCMF_CriticalSection_enter (0); while (__connection[proc].active) DCMF_Messager_advance(); DCMF_CriticalSection_exit (0); return 0; }
/** * \brief DCMF ARMCI Extension blocking wait operation for a specifc request * * The armcix_opaque_t structure is an opaque object contains a * armcix_dcmf_opaque_t structure which is used to maintain DCMF * ARMCIX state information for an operation in progress. * * This function invokes DCMF_Messager_advance() until the operation * completes and its associated callback is invoked and decrements the * active count. * * \param[in] cmpl_info Pointer to the ARMCIX opaque object * * \todo define return values * \return 0 * * \see armcix_dcmf_opaque_t */ int ARMCIX_Wait (armcix_opaque_t * cmpl_info) { DCMF_CriticalSection_enter (0); armcix_dcmf_opaque_t * dcmf = (armcix_dcmf_opaque_t *) cmpl_info; while (dcmf->active) DCMF_Messager_advance(); DCMF_CriticalSection_exit (0); return 0; }
/** * \brief Register the DCMF ARMCI Extention put operation. * * \param[in] connection_array Connection array * * \see DCMF_Send_register */ void ARMCIX_DCMF_Put_register (ARMCIX_DCMF_Connection_t * connection_array) { DCMF_CriticalSection_enter (0); DCMF_Put_Configuration_t put_configuration = { DCMF_DEFAULT_PUT_PROTOCOL }; DCMF_Put_register (&__put_protocol, &put_configuration); DCMF_CriticalSection_exit (0); }
/** * \brief ARMCI Extension non-blocking vector get operation. * * \param[in] darr Descriptor array * \param[in] len Length of descriptor array * \param[in] proc Remote process(or) ID * \param[in] nb_handle ARMCI non-blocking handle * * \return ??? */ int ARMCIX_NbGetV (armci_giov_t * darr, int len, int proc, armci_ihdl_t nb_handle) { DCMF_Result result = DCMF_ERROR; DCMF_CriticalSection_enter (0); //fprintf (stderr, "ARMCIX_NbGetV() >> len=%d, proc=%d\n", len, proc); // Calculate the number of requests unsigned n = 0; unsigned i, j; for (i = 0; i < len; i++) for (j = 0; j < darr[i].ptr_array_len; j++) n++; armcix_dcmf_opaque_t * dcmf = (armcix_dcmf_opaque_t *) &nb_handle->cmpl_info; dcmf->connection = &__connection[proc]; dcmf->active = n; __connection[proc].active += n; __global_connection.active += n; //fprintf (stderr, "ARMCIX_NbGetV() -- n=%d, dcmf->active=%d, __connection[%d].active=%d, __global_connection.active=%d\n", n, dcmf->active, proc, __connection[proc].active, __global_connection.active); DCMF_Memregion_t * src_memregion = &__connection[proc].remote_mem_region; DCMF_Memregion_t * dst_memregion = &__connection[proc].local_mem_region; DCMF_Callback_t cb_free = { ARMCIX_DCMF_NbOp_cb_done, nb_handle }; DCMF_Callback_t cb_done = { (void(*)(void *)) ARMCIX_DCMF_request_free, NULL }; for (i = 0; i < len; i++) { for (j = 0; j < darr[i].ptr_array_len; j++) { //fprintf (stderr, "ARMCIX_NbGetV() -- src=%p, dst=%p, bytes=%d\n", darr[i].src_ptr_array[j], darr[i].dst_ptr_array[j], darr[i].bytes); ARMCIX_DCMF_Request_t * new_request = ARMCIX_DCMF_request_allocate (cb_free); cb_done.clientdata = new_request; result = DCMF_Get (&__get_protocol, &(new_request->request), cb_done, DCMF_SEQUENTIAL_CONSISTENCY, proc, darr[i].bytes, src_memregion, dst_memregion, armcix_dcmf_va_to_offset (src_memregion, darr[i].src_ptr_array[j]), armcix_dcmf_va_to_offset (dst_memregion, darr[i].dst_ptr_array[j])); } } //fprintf (stderr, "ARMCIX_NbGetV() << result=%d\n", result); DCMF_CriticalSection_exit (0); return (result != DCMF_SUCCESS); }
void *armcix_advance(void * dummy) { DCMF_CriticalSection_enter (0); //fprintf(stdout,"entered armcix_advance\n"); while (armcix_advance_active) { DCMF_Messager_advance (0); DCMF_CriticalSection_cycle (0); } //fprintf(stdout,"exited armcix_advance\n"); DCMF_CriticalSection_exit(0); }
int ARMCIX_Finalize () { DCMF_CriticalSection_enter(0); // tell armcix_advance_thread to stop hitting DCMF_Messager_advance() armcix_advance_active = 0; DCMF_Messager_finalize (); DCMF_CriticalSection_exit(0); return 0; }
/** * \brief ARMCI Extension blocking read-modify-write operation. * * \param[in] op * \param[in] ploc * \param[in] prem * \param[in] extra * \param[in] proc * * \retval ??? */ int ARMCIX_Rmw (int op, int * ploc, int * prem, int extra, int proc) { DCMF_CriticalSection_enter (0); volatile unsigned active = 1; //fprintf (stderr, "ARMCIX_Rmw() - op == %d, ploc == %p, prem == %p, extra == %d, proc == %d\n", op, ploc, prem, extra, proc); /* Initialize the RMW request data */ ARMCIX_DCMF_RMWRequest_t info; info.op = op; info.ploc = ploc; info.prem = prem; switch (op) { case ARMCI_FETCH_AND_ADD: case ARMCI_FETCH_AND_ADD_LONG: info.extra = extra; break; case ARMCI_SWAP: case ARMCI_SWAP_LONG: info.extra = *ploc; break; default: armci_die("rmw: operation not supported",op); break; } info.active = (unsigned *)&active; DCMF_Request_t request; DCMF_Callback_t cb_wait = { NULL, NULL }; DCMF_Send ( &__rmw_request_protocol, &request, cb_wait, DCMF_SEQUENTIAL_CONSISTENCY, proc, 0, NULL, (DCQuad *)&info, 2); //fprintf (stderr, "ARMCIX_Rmw() > active == %d (&active == %p)\n", active, &active); while (active) DCMF_Messager_advance (); //fprintf (stderr, "ARMCIX_Rmw() < active == %d (&active == %p)\n", active, &active); DCMF_CriticalSection_exit (0); return 0; }
/** * \brief Initialize the DCMF ARMCI resources */ int ARMCIX_Init () { DCMF_CriticalSection_enter(0); DCMF_Messager_initialize (); ARMCIX_DCMF_Connection_initialize (); /* Determine request pool defaults */ int ARMCIX_DCMF_REQUESTPOOL_MAX = 1000; ENV_Int (getenv ("ARMCIX_DCMF_REQUESTPOOL_MAX"), &ARMCIX_DCMF_REQUESTPOOL_MAX); int ARMCIX_DCMF_REQUESTPOOL_INC = 0; ENV_Int (getenv ("ARMCIX_DCMF_REQUESTPOOL_INC"), &ARMCIX_DCMF_REQUESTPOOL_INC); ARMCIX_DCMF_request_initialize (ARMCIX_DCMF_REQUESTPOOL_MAX, ARMCIX_DCMF_REQUESTPOOL_INC); ARMCIX_DCMF_Get_register (); ARMCIX_DCMF_Put_register (__connection); ARMCIX_DCMF_Acc_register (__connection); ARMCIX_DCMF_Fence_register (__connection); ARMCIX_DCMF_Rmw_register (); /* Determine interrupt mode */ int interrupts = 1; ENV_Bool (getenv ("DCMF_INTERRUPT"), &interrupts); ENV_Bool (getenv ("DCMF_INTERRUPTS"), &interrupts); DCMF_Configure_t config; memset (&config, 0x00, sizeof(DCMF_Configure_t)); config.interrupts = (interrupts==0)?DCMF_INTERRUPTS_OFF:DCMF_INTERRUPTS_ON; DCMF_Messager_configure (&config, &config); DCMF_Messager_configure (NULL, &config); //ARMCIX_DCMF_request_print ("after armcix_init"); DCMF_CriticalSection_exit(0); return 0; }
int A1D_Finalize() { int mpi_status; int i; DCMF_Result dcmf_result; A1D_Print_stats(); #ifdef FLUSH_IMPLEMENTED /* free Put list */ free(A1D_Put_flush_list); #ifdef ACCUMULATE_IMPLEMENTED /* free Acc list */ free(A1D_Send_flush_list); #endif #endif /* barrier so that no one is able to access remote memregions after they are destroyed */ mpi_status = MPI_Barrier(A1D_COMM_WORLD); assert(mpi_status==0); /* destroy all memregions - not absolutely unnecessary if memregion creation has no side effects */ DCMF_CriticalSection_enter(0); for (i = 0; i < mpi_size; i++) { dcmf_result = DCMF_Memregion_destroy(&A1D_Memregion_list[i]); assert(dcmf_result==DCMF_SUCCESS); } DCMF_CriticalSection_exit(0); /* free memregion list */ free(A1D_Memregion_list); /* free base pointer list */ free(A1D_Baseptr_list); mpi_status = MPI_Comm_free(&A1D_COMM_WORLD); assert(mpi_status==0); return(0); }
/** * \brief Register the DCMF ARMCI Extention fence operation. * * \param[in] connection_array Connection array * * \see DCMF_Control_register */ void ARMCIX_DCMF_Fence_register (ARMCIX_DCMF_Connection_t * connection_array) { DCMF_CriticalSection_enter (0); DCMF_Send_Configuration_t send_configuration = { DCMF_DEFAULT_SEND_PROTOCOL, DCMF_DEFAULT_NETWORK, ARMCIX_DCMF_ReceiveFenceRequest, connection_array, NULL, NULL }; DCMF_Send_register (&__fence_rts_protocol, &send_configuration); DCMF_Control_Configuration_t configuration = { DCMF_DEFAULT_CONTROL_PROTOCOL, DCMF_DEFAULT_NETWORK, ARMCIX_DCMF_ReceiveFenceAck, connection_array }; DCMF_Control_register (&__fence_ack_protocol, &configuration); DCMF_CriticalSection_exit (0); }
/** * \brief Global fence operation. * * Blocks until all active messages between the local node and all remote * nodes have completed and acknowledged by the remote node. * * \see ARMCIX_Fence * \see ARMCIX_DCMF_ReceiveFenceRequest * \see ARMCIX_DCMF_ReceiveFenceAck */ void ARMCIX_AllFence () { DCMF_CriticalSection_enter (0); unsigned size = DCMF_Messager_size (); unsigned peer; volatile unsigned active = 0; DCQuad quad; DCMF_Callback_t * cb = (DCMF_Callback_t *) &quad; cb->function = ARMCIX_DCMF_cb_decrement; cb->clientdata = (void *) &active; DCMF_Callback_t cb_null = { NULL, NULL }; DCMF_Callback_t cb_done = { (void (*)(void *, DCMF_Error_t *))ARMCIX_DCMF_request_free, NULL }; for (peer = 0; peer < size; peer++) { ARMCIX_DCMF_Request_t * new_request = ARMCIX_DCMF_request_allocate (cb_null); cb_done.clientdata = new_request; active++; DCMF_Send ( &__fence_rts_protocol, &(new_request->request), cb_done, DCMF_SEQUENTIAL_CONSISTENCY, peer, 0, NULL, (DCQuad *) &quad, 1); while (active) DCMF_Messager_advance (); } DCMF_CriticalSection_exit (0); }
/** * \brief Register the DCMF ARMCI Extention rmw operation. * * \see DCMF_Control_register * \see DCMF_Send_register */ void ARMCIX_DCMF_Rmw_register () { DCMF_CriticalSection_enter (0); DCMF_Send_Configuration_t request_configuration = { DCMF_DEFAULT_SEND_PROTOCOL, DCMF_DEFAULT_NETWORK, ARMCIX_DCMF_RecvRMWRequest, NULL, NULL, NULL }; DCMF_Send_register (&__rmw_request_protocol, &request_configuration); DCMF_Control_Configuration_t response_configuration = { DCMF_DEFAULT_CONTROL_PROTOCOL, DCMF_DEFAULT_NETWORK, ARMCIX_DCMF_ReceiveRMWResponse, NULL }; DCMF_Control_register (&__rmw_response_protocol, &response_configuration); DCMF_CriticalSection_exit (0); }
int A1D_Initialize() { int mpi_initialized, mpi_provided; int mpi_status; int i; size_t bytes_in, bytes_out; DCMF_Result dcmf_result; DCMF_Configure_t dcmf_config; DCMF_Memregion_t local_memregion; /*************************************************** * * configure MPI * ***************************************************/ /* MPI has to be initialized for this implementation to work */ MPI_Initialized(&mpi_initialized); assert(mpi_initialized==1); /* MPI has to be thread-safe so that DCMF doesn't explode */ MPI_Query_thread(&mpi_provided); assert(mpi_provided==MPI_THREAD_MULTIPLE); /* have to use our own communicator for collectives to be proper */ mpi_status = MPI_Comm_dup(MPI_COMM_WORLD,&A1D_COMM_WORLD); assert(mpi_status==0); /* get my MPI rank */ mpi_status = MPI_Comm_rank(A1D_COMM_WORLD,&myrank); assert(mpi_status==0); /* get MPI world size */ mpi_status = MPI_Comm_size(A1D_COMM_WORLD,&mpi_size); assert(mpi_status==0); /* make sure MPI and DCMF agree */ assert(myrank==DCMF_Messager_rank()); assert(mpi_size==DCMF_Messager_size()); /* barrier before DCMF_Messager_configure to make sure MPI is ready everywhere */ mpi_status = MPI_Barrier(A1D_COMM_WORLD); assert(mpi_status==0); /*************************************************** * * configure DCMF * ***************************************************/ /* to be safe, but perhaps not necessary */ dcmf_config.thread_level = DCMF_THREAD_MULTIPLE; #ifdef ACCUMULATE_IMPLEMENTED /* interrupts required for accumulate only, Put/Get use DMA * if accumulate not used, MPI will query environment for DCMF_INTERRUPTS */ dcmf_config.interrupts = DCMF_INTERRUPTS_ON; #endif /* reconfigure DCMF with interrupts on */ DCMF_CriticalSection_enter(0); dcmf_result = DCMF_Messager_configure(&dcmf_config, &dcmf_config); assert(dcmf_result==DCMF_SUCCESS); DCMF_CriticalSection_exit(0); /* barrier after DCMF_Messager_configure to make sure everyone has the new DCMF config */ mpi_status = MPI_Barrier(A1D_COMM_WORLD); assert(mpi_status==0); /*************************************************** * * setup DCMF memregions * ***************************************************/ /* allocate memregion list */ A1D_Memregion_list = malloc( mpi_size * sizeof(DCMF_Memregion_t) ); assert(A1D_Memregion_list != NULL); /* allocate base pointer list */ A1D_Baseptr_list = malloc( mpi_size * sizeof(void*) ); assert(A1D_Memregion_list != NULL); /* create memregions */ bytes_in = -1; DCMF_CriticalSection_enter(0); dcmf_result = DCMF_Memregion_create(&local_memregion,&bytes_out,bytes_in,NULL,0); assert(dcmf_result==DCMF_SUCCESS); DCMF_CriticalSection_exit(0); /* exchange memregions because we don't use symmetry heap */ mpi_status = MPI_Allgather(&local_memregion,sizeof(DCMF_Memregion_t),MPI_BYTE, A1D_Memregion_list,sizeof(DCMF_Memregion_t),MPI_BYTE, A1D_COMM_WORLD); assert(mpi_status==0); /* destroy temporary local memregion */ DCMF_CriticalSection_enter(0); dcmf_result = DCMF_Memregion_destroy(&local_memregion); assert(dcmf_result==DCMF_SUCCESS); DCMF_CriticalSection_exit(0); /* check for valid memregions */ DCMF_CriticalSection_enter(0); for (i = 0; i < mpi_size; i++) { dcmf_result = DCMF_Memregion_query(&A1D_Memregion_list[i], &bytes_out, &A1D_Baseptr_list[i]); assert(dcmf_result==DCMF_SUCCESS); } DCMF_CriticalSection_exit(0); #ifdef FLUSH_IMPLEMENTED /*************************************************** * * setup flush list(s) * ***************************************************/ /* allocate Put list */ A1D_Put_flush_list = malloc( mpi_size * sizeof(int) ); assert(A1D_Put_flush_list != NULL); #ifdef ACCUMULATE_IMPLEMENTED /* allocate Acc list */ A1D_Send_flush_list = malloc( mpi_size * sizeof(int) ); assert(A1D_Send_flush_list != NULL); #endif #endif /*************************************************** * * define null callback * ***************************************************/ A1D_Nocallback.function = NULL; A1D_Nocallback.clientdata = NULL; return(0); }
/** * \brief Initialize the DCMF ARMCI resources */ int ARMCIX_Init () { DCMF_CriticalSection_enter(0); DCMF_Messager_initialize (); ARMCIX_DCMF_Connection_initialize (); /* Determine request pool defaults */ int ARMCIX_DCMF_REQUESTPOOL_MAX = 1000; ENV_Int (getenv ("ARMCIX_DCMF_REQUESTPOOL_MAX"), &ARMCIX_DCMF_REQUESTPOOL_MAX); int ARMCIX_DCMF_REQUESTPOOL_INC = 0; ENV_Int (getenv ("ARMCIX_DCMF_REQUESTPOOL_INC"), &ARMCIX_DCMF_REQUESTPOOL_INC); ARMCIX_DCMF_request_initialize (ARMCIX_DCMF_REQUESTPOOL_MAX, ARMCIX_DCMF_REQUESTPOOL_INC); ARMCIX_DCMF_Get_register (); ARMCIX_DCMF_Put_register (__connection); ARMCIX_DCMF_Acc_register (__connection); ARMCIX_DCMF_Fence_register (__connection); ARMCIX_DCMF_Rmw_register (); /* Initializer helper thread or configure interrupt mode */ int interrupts = 0; ENV_Bool (getenv ("DCMF_INTERRUPT"), &interrupts); ENV_Bool (getenv ("DCMF_INTERRUPTS"), &interrupts); //fprintf(stdout,"interrupts = %d\n",interrupts); /*if (interrupts==1){ if( 0==DCMF_Messager_rank() ) fprintf(stdout,"DCMF interrupts ON\n"); } else { if( 0==DCMF_Messager_rank() ) fprintf(stdout,"DCMF interrupts OFF\n"); }*/ if (interrupts==0) { int ret = pthread_create(&armcix_advance_thread, NULL, armcix_advance, NULL); if ( ret != 0 ) { if( 0==DCMF_Messager_rank() ) fprintf(stdout,"pthread_create failed\n"); armcix_advance_active = 0; } else { if( 0==DCMF_Messager_rank() ) fprintf(stdout,"pthread_create succeeded\n"); armcix_advance_active = 1; } } DCMF_Configure_t config; memset (&config, 0x00, sizeof(DCMF_Configure_t)); config.interrupts = (interrupts==0)?DCMF_INTERRUPTS_OFF:DCMF_INTERRUPTS_ON; DCMF_Messager_configure (&config, &config); DCMF_Messager_configure (NULL, &config); //ARMCIX_DCMF_request_print ("after armcix_init"); DCMF_CriticalSection_exit(0); return 0; }
void ARMCIX_DCMF_Connection_initialize () { DCMF_CriticalSection_enter(0); __global_connection.peer = (unsigned) -1; unsigned rank = DCMF_Messager_rank (); unsigned size = DCMF_Messager_size (); posix_memalign ((void **)&__connection, 16, sizeof(ARMCIX_DCMF_Connection_t) * size); bzero ((void *)__connection, sizeof(ARMCIX_DCMF_Connection_t) * size); void * base = NULL; size_t bytes = (size_t) -1; unsigned i; for (i = 0; i < size; i++) { __connection[i].peer = i; #warning fix memregion setup to handle non-global address space pinning. //DCMF_Result result = DCMF_Memregion_create (&__connection[i].local_mem_region, &bytes, (size_t) -1, NULL, 0); } // Register a send protocol to exchange memory regions DCMF_Protocol_t send_protocol; DCMF_Send_Configuration_t send_configuration = { DCMF_DEFAULT_SEND_PROTOCOL, DCMF_DEFAULT_NETWORK, ARMCIX_DCMF_RecvMemregion1, __connection, ARMCIX_DCMF_RecvMemregion2, __connection }; DCMF_Send_register (&send_protocol, &send_configuration); DCMF_Request_t request; volatile unsigned active; DCMF_Callback_t cb_done = { ARMCIX_DCMF_cb_decrement, (void *) &active }; // Exchange the memory regions __memregions_to_receive = size; for (i = 0; i < size; i++) { unsigned peer = (rank+i)%size; active = 1; DCMF_Send (&send_protocol, &request, cb_done, DCMF_SEQUENTIAL_CONSISTENCY, peer, sizeof(DCMF_Memregion_t), (char *) &__connection[peer].local_mem_region, (DCQuad *) NULL, 0); while (active) DCMF_Messager_advance(); } while (__memregions_to_receive) DCMF_Messager_advance(); DCMF_CriticalSection_exit(0); }
void send_remoteadvance() { DCMF_Request_t *send_req; DCMF_Callback_t send_done; int done_count; unsigned int msgsize, i, dst; DCQuad msginfo; send_req = (DCMF_Request_t *) malloc(sizeof(DCMF_Request_t) * ITERATIONS_LOCAL); send_done.function = done; send_done.clientdata = (void *) &done_count; if (myrank == 0) { printf("Send latency in usec\n"); fflush(stdout); } if (myrank == 0) { char buffer[100]; sprintf(buffer, "%20s %20s %20s", "Msg Size", "Send-Remote Barrier", "Send-Remote Sleep"); printf("%s \n", buffer); fflush(stdout); } if (myrank == 0) { for (msgsize = 1; msgsize < MAX_MSG_SIZE_LOCAL; msgsize *= 2) { /*********************** * start timer * ***********************/ t_start = DCMF_Timebase(); done_count = 10000; for (i = 0; i < ITERATIONS_LOCAL; i++) { DCMF_Send(&snd_reg, &send_req[i], send_done, DCMF_SEQUENTIAL_CONSISTENCY, (myrank + 1) % nranks, msgsize, source, &msginfo, 1); } while (done_count > 0) DCMF_Messager_advance(); t_stop = DCMF_Timebase(); t_usec = (t_stop - t_start) / (clockMHz * ITERATIONS_LOCAL); /*********************** * stop timer * ***********************/ if (myrank == 0) { printf("%20d %20.2f ", msgsize, t_usec); fflush(stdout); } barrier(); /*********************** * start timer * ***********************/ t_start = DCMF_Timebase(); done_count = 10000; for (i = 0; i < ITERATIONS_LOCAL; i++) { DCMF_Send(&snd_reg, &send_req[i], send_done, DCMF_SEQUENTIAL_CONSISTENCY, (myrank + 1) % nranks, msgsize, source, &msginfo, 1); } while (done_count > 0) DCMF_Messager_advance(); t_stop = DCMF_Timebase(); t_usec = (t_stop - t_start) / (clockMHz * ITERATIONS_LOCAL); /*********************** * stop timer * ***********************/ if (myrank == 0) { printf("%20.2f \n", t_usec); fflush(stdout); } barrier(); } } else { for (msgsize = 1; msgsize < MAX_MSG_SIZE_LOCAL; msgsize *= 2) { barrier(); DCMF_CriticalSection_enter(0); sleep(10); DCMF_CriticalSection_exit(0); barrier(); } } barrier(); }