/** * Call this function to give libcircle initial reduction data. * * @param buf pointer to buffer holding reduction data * @param size size of buffer in bytes */ __inline__ void CIRCLE_reduce(const void* buf, size_t size) { /* free existing buffer memory if we have any */ CIRCLE_free(&CIRCLE_INPUT_ST.reduce_buf); /* allocate memory to copy reduction data */ if(size > 0) { /* allocate memory */ void* copy = malloc(size); if(copy == NULL) { LOG(CIRCLE_LOG_FATAL, "Unable to allocate %llu bytes for reduction buffer.", (unsigned long long) size); /* TODO: bail with fatal error */ return; } /* copy data from user buffer */ memcpy(copy, buf, size); /* store buffer on input state */ CIRCLE_INPUT_ST.reduce_buf = copy; CIRCLE_INPUT_ST.reduce_buf_size = size; } }
/** * Free memory associated with state */ static void CIRCLE_finalize_local_state(CIRCLE_state_st* local_state) { CIRCLE_free(&local_state->request_offsets); CIRCLE_free(&local_state->work_offsets); CIRCLE_free(&local_state->request_flag); CIRCLE_free(&local_state->request_recv_buf); CIRCLE_free(&local_state->mpi_state_st->request_status); CIRCLE_free(&local_state->mpi_state_st->request_request); CIRCLE_free(&local_state->mpi_state_st->requestors); return; }
/** * After your program has executed, give libcircle a chance to clean up after * itself by calling this. This should be called after all libcircle API calls. */ __inline__ void CIRCLE_finalize(void) { CIRCLE_internal_queue_free(CIRCLE_INPUT_ST.queue); /* free buffer holding user reduction data */ CIRCLE_free(&CIRCLE_INPUT_ST.reduce_buf); /* free off MPI resources and shut it down */ MPI_Comm_free(&CIRCLE_INPUT_ST.comm); if(CIRCLE_must_finalize_mpi) { /* finalize MPI if we initialized it */ MPI_Finalize(); } CIRCLE_debug_stream = NULL; }
/** * @brief Sets up libcircle, calls work loop function * * - Main worker function. This function: * -# Initializes MPI * -# Initializes internal libcircle data structures * -# Calls libcircle's main work loop function. * -# Checkpoints if CIRCLE_abort has been called by a rank. */ int8_t CIRCLE_worker() { int rank = -1; int size = -1; int i = -1; /* Holds all worker state */ CIRCLE_state_st local_state; CIRCLE_state_st* sptr = &local_state; /* Holds all mpi state */ CIRCLE_mpi_state_st mpi_s; local_state.mpi_state_st = &mpi_s; /* Provides an interface to the queue. */ queue_handle.enqueue = &CIRCLE_enqueue; queue_handle.dequeue = &CIRCLE_dequeue; queue_handle.local_queue_size = &CIRCLE_local_queue_size; MPI_Comm_size(*CIRCLE_INPUT_ST.work_comm, &size); sptr->size = size; CIRCLE_init_local_state(sptr, size); MPI_Errhandler circle_err; MPI_Comm_create_errhandler(CIRCLE_MPI_error_handler, &circle_err); MPI_Comm_set_errhandler(*mpi_s.work_comm, circle_err); rank = CIRCLE_global_rank; local_state.rank = rank; local_state.token_partner_recv = (rank - 1 + size) % size; local_state.token_partner_send = (rank + 1 + size) % size; /* randomize the first task we will request work from */ local_state.seed = (unsigned) rank; CIRCLE_get_next_proc(&local_state); /* Initial local state */ local_objects_processed = 0; total_objects_processed = 0; /* Master rank starts out with the initial data creation */ size_t array_elems = (size_t) size; uint32_t* total_objects_processed_array = (uint32_t*) calloc(array_elems, sizeof(uint32_t)); uint32_t* total_work_requests_array = (uint32_t*) calloc(array_elems, sizeof(uint32_t)); uint32_t* total_no_work_received_array = (uint32_t*) calloc(array_elems, sizeof(uint32_t)); if(CIRCLE_INPUT_ST.options & CIRCLE_SPLIT_EQUAL) { LOG(CIRCLE_LOG_DBG, "Using equalized load splitting."); } if(CIRCLE_INPUT_ST.options & CIRCLE_SPLIT_RANDOM) { LOG(CIRCLE_LOG_DBG, "Using randomized load splitting."); } /* start the termination token on rank 0 */ if(rank == 0) { local_state.have_token = 1; } /* start by adding work to queue by calling create_cb, * only invoke on master unless CREATE_GLOBAL is set */ if(rank == 0 || CIRCLE_INPUT_ST.options & CIRCLE_CREATE_GLOBAL) { (*(CIRCLE_INPUT_ST.create_cb))(&queue_handle); } CIRCLE_work_loop(sptr, &queue_handle); CIRCLE_cleanup_mpi_messages(sptr); if(CIRCLE_ABORT_FLAG) { CIRCLE_checkpoint(); } MPI_Gather(&local_objects_processed, 1, MPI_INT, \ &total_objects_processed_array[0], 1, MPI_INT, 0, \ *mpi_s.work_comm); MPI_Gather(&local_work_requested, 1, MPI_INT, \ &total_work_requests_array[0], 1, MPI_INT, 0, \ *mpi_s.work_comm); MPI_Gather(&local_no_work_received, 1, MPI_INT, \ &total_no_work_received_array[0], 1, MPI_INT, 0, \ *mpi_s.work_comm); MPI_Reduce(&local_objects_processed, &total_objects_processed, 1, \ MPI_INT, MPI_SUM, 0, *mpi_s.work_comm); MPI_Reduce(&local_hop_bytes, &total_hop_bytes, 1, \ MPI_INT, MPI_SUM, 0, *mpi_s.work_comm); if(rank == 0) { for(i = 0; i < size; i++) { LOG(CIRCLE_LOG_INFO, "Rank %d\tObjects Processed %d\t%0.3lf%%", i, \ total_objects_processed_array[i], \ (double)total_objects_processed_array[i] / \ (double)total_objects_processed * 100.0); LOG(CIRCLE_LOG_INFO, "Rank %d\tWork requests: %d", i, total_work_requests_array[i]); LOG(CIRCLE_LOG_INFO, "Rank %d\tNo work replies: %d", i, total_no_work_received_array[i]); } LOG(CIRCLE_LOG_INFO, \ "Total Objects Processed: %d", total_objects_processed); LOG(CIRCLE_LOG_INFO, \ "Total hop-bytes: %"PRIu64, total_hop_bytes); LOG(CIRCLE_LOG_INFO, \ "Hop-bytes per file: %f", (float)total_hop_bytes / (float)total_objects_processed); } /* free memory */ CIRCLE_free(&total_no_work_received_array); CIRCLE_free(&total_work_requests_array); CIRCLE_free(&total_objects_processed_array); CIRCLE_finalize_local_state(sptr); return 0; }