int ml_coll_barrier_constant_group_data_setup( mca_coll_ml_topology_t *topo_info, mca_coll_ml_collective_operation_description_t *schedule) { /* local variables */ int i, j, cnt, value_to_set = -1, ret = OMPI_SUCCESS, num_up_levels, num_hierarchies = topo_info->n_levels, n_functions = schedule->n_fns, global_high_hierarchy_index = topo_info->global_highest_hier_group_index; bool call_for_top_function, prev_is_zero; mca_coll_ml_utility_data_t *constant_group_data = NULL; int *scratch_indx = NULL, *scratch_num = NULL; mca_bcol_base_module_t *prev_bcol = NULL, *bcol_module = NULL; /* Am I a member of the highest level subgroup ? */ if (global_high_hierarchy_index == topo_info->component_pairs[num_hierarchies - 1].bcol_index) { /* The process that is member of highest level subgroup should call for top algorithms in addition to fan-in/out steps*/ call_for_top_function = true; /* hier level run only top algorithm, so we deduct 1 */ num_up_levels = num_hierarchies - 1; } else { /* The process is not member of highest level subgroup, as result it does not call for top algorithm, but it calls for all fan-in/out steps */ call_for_top_function = false; num_up_levels = num_hierarchies; } /* Algorithm Description: * ===================== * The algorithm used here for an N level system * - up to level N-2, inclusive : up algorithm (Fan-In in Barrier) * - level N-1: top algorithm (Barrier algth) * - level N-2, to level 0: down algorithm (Fan-out) */ /* Starting scratch_num and scratch_index calculations */ /* =================================================== */ /* Figure out how many of the same bcols are called in a row. * The index of the bcol in row we store in scratch_indx and * the total number of bcols in the row we store in scratch_num */ scratch_indx = (int *) calloc (2 * num_hierarchies, sizeof (int)); if(NULL == scratch_indx) { ML_ERROR(("Can't allocate memory.")); ret = OMPI_ERR_OUT_OF_RESOURCE; goto Const_Data_Setup_Error; } scratch_num = (int *) malloc(sizeof(int) * (2 * num_hierarchies)); if(NULL == scratch_num) { ML_ERROR(("Can't allocate memory.")); ret = OMPI_ERR_OUT_OF_RESOURCE; goto Const_Data_Setup_Error; } /* We go through all stages of algorithm (up, top, down) * and calculate bcol index. If previous bcol is the same type as current * one the counter index is increased, other way the index is zero */ prev_bcol = NULL; /* Going up */ for (i = 0, cnt = 0; i < num_up_levels; ++i, ++cnt) { if (IS_BCOL_TYPE_IDENTICAL(prev_bcol, GET_BCOL(topo_info, i))) { scratch_indx[cnt] = scratch_indx[cnt - 1] + 1; } else { scratch_indx[cnt] = 0; prev_bcol = GET_BCOL(topo_info, i); } } /* Top - only if the proc arrive to highest_level_is_global_highest_level */ if (call_for_top_function) { if (IS_BCOL_TYPE_IDENTICAL(prev_bcol, GET_BCOL(topo_info, num_hierarchies - 1))) { scratch_indx[cnt] = scratch_indx[cnt - 1] + 1; } else { scratch_indx[cnt] = 0; prev_bcol = GET_BCOL(topo_info, num_hierarchies - 1); } ++cnt; } /* Going down */ for (i = num_up_levels - 1; i >= 0; --i, ++cnt) { if (IS_BCOL_TYPE_IDENTICAL(prev_bcol, GET_BCOL(topo_info, i))) { scratch_indx[cnt] = scratch_indx[cnt - 1] + 1; } else { scratch_indx[cnt] = 0; prev_bcol = GET_BCOL(topo_info, i); } } /* * Calculate the number of the same bcols in row. * We parse the index array, if index is zero * it means that the row is done and we start * to calculate next bcols row. The maximum number * for the row is equal to maximal bcol index in the row + 1 */ i = cnt - 1; prev_is_zero = true; do { if (prev_is_zero) { value_to_set = scratch_indx[i] + 1; prev_is_zero = false; } if (0 == scratch_indx[i]) { prev_is_zero = true; } scratch_num[i] = value_to_set; --i; } while(i >= 0); /* =========================================================== */ /* We are done with scratch_num and scratch_index calculations */ /* Setup function call for each algorithm step */ cnt = 0; /* Up phase */ for (i = 0; i < num_up_levels; ++i) { bcol_module = GET_BCOL(topo_info, i); constant_group_data = &schedule->component_functions[cnt].constant_group_data; constant_group_data->bcol_module = bcol_module; constant_group_data->index_in_consecutive_same_bcol_calls = scratch_indx[cnt]; constant_group_data->n_of_this_type_in_a_row = scratch_num[cnt]; ++cnt; } /* Top function */ if (call_for_top_function) { bcol_module = GET_BCOL(topo_info, num_hierarchies - 1); constant_group_data = &schedule->component_functions[cnt].constant_group_data; constant_group_data->bcol_module = bcol_module; constant_group_data->index_in_consecutive_same_bcol_calls = scratch_indx[cnt]; constant_group_data->n_of_this_type_in_a_row = scratch_num[cnt]; ++cnt; } /* Down phase */ for (i = num_up_levels - 1; i >= 0; --i) { bcol_module = GET_BCOL(topo_info, i); constant_group_data = &schedule->component_functions[cnt].constant_group_data; constant_group_data->bcol_module = bcol_module; /* All Fan-Outs will be done in parallel */ constant_group_data->index_in_consecutive_same_bcol_calls = 0; constant_group_data->n_of_this_type_in_a_row = 1; ++cnt; } /* Figure out how many times this bcol is used in this collective call */ for (i = 0; i < n_functions; ++i) { struct mca_coll_ml_compound_functions_t *component_functions = schedule->component_functions; mca_bcol_base_module_t *current_bcol = component_functions[i].constant_group_data.bcol_module; /* silence clang warning about possible NULL dereference of component_functions. * this case is a developer error if it occurs */ assert (NULL != component_functions && NULL != constant_group_data); cnt = 0; for (j = 0; j < n_functions; ++j) { if (current_bcol == component_functions[j].constant_group_data.bcol_module) { constant_group_data->index_of_this_type_in_collective = cnt; ++cnt; } } component_functions[i].constant_group_data.n_of_this_type_in_collective = cnt; } MCA_COLL_ML_SET_SCHEDULE_ORDER_INFO(schedule); /* Release temporary memories */ free(scratch_num); free(scratch_indx); return OMPI_SUCCESS; Const_Data_Setup_Error: if (NULL != scratch_indx) { free(scratch_indx); } if (NULL != scratch_num) { free(scratch_num); } return ret; }
/* * Fill up the collective descriptor * */ static int mca_coll_ml_build_static_reduce_schedule( mca_coll_ml_topology_t *topo_info, mca_coll_ml_collective_operation_description_t **coll_desc) { int i_hier, j_hier, n_fcns, n_hiers = topo_info->n_levels; int *scratch_indx = NULL, *scratch_num = NULL; int cnt, value_to_set = 0; int ret = OMPI_SUCCESS; bool prev_is_zero; mca_coll_ml_compound_functions_t *comp_fns_temp; mca_bcol_base_module_t *prev_bcol, *bcol_module; mca_coll_ml_compound_functions_t *comp_fn; mca_coll_ml_collective_operation_description_t *schedule = NULL; *coll_desc = (mca_coll_ml_collective_operation_description_t *) malloc(sizeof(mca_coll_ml_collective_operation_description_t)); schedule = *coll_desc; if (OPAL_UNLIKELY(NULL == schedule)) { ML_ERROR(("Can't allocate memory.\n")); ret = OMPI_ERR_OUT_OF_RESOURCE; goto Error; } scratch_indx = (int *) malloc(sizeof(int) * (n_hiers)); if (NULL == scratch_indx) { ML_ERROR(("Can't allocate memory.\n")); ret = OMPI_ERR_OUT_OF_RESOURCE; goto Error; } scratch_num = (int *) malloc(sizeof(int) * (n_hiers)); if (NULL == scratch_num) { ML_ERROR(("Can't allocate memory.\n")); ret = OMPI_ERR_OUT_OF_RESOURCE; goto Error; } prev_bcol = NULL; /* Calculate scratch numbers */ for (i_hier = 0; i_hier < n_hiers; i_hier++) { if (IS_BCOL_TYPE_IDENTICAL(prev_bcol, GET_BCOL(topo_info, i_hier))) { scratch_indx[i_hier] = scratch_indx[i_hier - 1] + 1; } else { scratch_indx[i_hier] = 0; prev_bcol = GET_BCOL(topo_info, i_hier); } } --i_hier; prev_is_zero = true; do { if (prev_is_zero) { value_to_set = scratch_indx[i_hier] + 1; prev_is_zero = false; } if (0 == scratch_indx[i_hier]) { prev_is_zero = true; } scratch_num[i_hier] = value_to_set; --i_hier; } while(i_hier >= 0); /* All hierarchies call one function, unlike other collectives */ n_fcns = n_hiers; /* Set dependencies equal to number of hierarchies */ schedule->n_fns = n_fcns; schedule->topo_info = topo_info; schedule->progress_type = 0; /* Allocated the component function */ schedule->component_functions = (struct mca_coll_ml_compound_functions_t *) calloc(n_fcns, sizeof(struct mca_coll_ml_compound_functions_t)); if (OPAL_UNLIKELY(NULL == schedule->component_functions)) { ML_ERROR(("Can't allocate memory.\n")); ret = OMPI_ERR_OUT_OF_RESOURCE; goto Error; } for (i_hier = 0; i_hier < n_hiers; ++i_hier) { comp_fn = &schedule->component_functions[i_hier]; /* The hierarchial level */ comp_fn->h_level = i_hier; bcol_module = GET_BCOL(topo_info, i_hier); comp_fn->bcol_function = bcol_module->filtered_fns_table[DATA_SRC_KNOWN][NON_BLOCKING][BCOL_REDUCE][1][0][0]; strcpy(comp_fn->fn_name, "REDUCE"); ML_VERBOSE(10, ("func indx %d set to %p", i_hier, comp_fn->bcol_function)); ML_VERBOSE(1,("In ML_REDUCE_SETUP .. looks fine here")); /* No need completion func for Barrier */ comp_fn->task_comp_fn = mca_coll_ml_task_comp_static_reduce; /* Constants */ comp_fn->constant_group_data.bcol_module = bcol_module; comp_fn->constant_group_data.index_in_consecutive_same_bcol_calls = scratch_indx[i_hier]; comp_fn->constant_group_data.n_of_this_type_in_a_row = scratch_num[i_hier]; comp_fn->constant_group_data.n_of_this_type_in_collective = 0; comp_fn->constant_group_data.index_of_this_type_in_collective = 0; ML_VERBOSE(10, ("Setting collective [reduce] fn_idx %d, n_of_this_type_in_a_row %d, " "index_in_consecutive_same_bcol_calls %d.", i_hier, comp_fn->constant_group_data.n_of_this_type_in_a_row, comp_fn->constant_group_data.index_in_consecutive_same_bcol_calls)); } /* Fill the rest of constant data */ for (i_hier = 0; i_hier < n_hiers; i_hier++) { mca_bcol_base_module_t *current_bcol = schedule->component_functions[i_hier]. constant_group_data.bcol_module; cnt = 0; for (j_hier = 0; j_hier < n_hiers; j_hier++) { if (current_bcol == schedule->component_functions[j_hier]. constant_group_data.bcol_module) { schedule->component_functions[j_hier]. constant_group_data.index_of_this_type_in_collective = cnt; cnt++; } } schedule->component_functions[i_hier]. constant_group_data.n_of_this_type_in_collective = cnt; } /* Manju: Reduction should always use the fixed schedule. * The subgroups that this process is leader should be executed first, then * it should execute the subgroups where this process is not a leader, and * then execute the subgroup that includes the root. */ /* Allocate the schedule list */ schedule->comp_fn_arr = (struct mca_coll_ml_compound_functions_t **) calloc(n_hiers,sizeof(struct mca_coll_ml_compound_functions_t *)); if (NULL == schedule->comp_fn_arr) { ML_ERROR(("Can't allocate memory.\n")); ret = OMPI_ERR_OUT_OF_RESOURCE; goto Error; } /* Now that the functions have been set-up properly, we can simple permute the ordering a bit */ for (i_hier = 0; i_hier < n_hiers; i_hier++) { /* first one is trivial */ int leader_hierarchy = 0; int non_leader_hierarchy = 0; int func_index; comp_fns_temp = (struct mca_coll_ml_compound_functions_t *) calloc(n_hiers, sizeof(struct mca_coll_ml_compound_functions_t)); leader_hierarchy = 0; non_leader_hierarchy = n_hiers - 2; for(j_hier = 0; j_hier < n_hiers - 1 ; j_hier++) { func_index = j_hier < i_hier ? j_hier : j_hier + 1; /* I'm a leader for this group */ if (0 == topo_info->component_pairs->subgroup_module->my_index) { comp_fns_temp[leader_hierarchy++] = schedule->component_functions[func_index]; } else { comp_fns_temp[non_leader_hierarchy--] = schedule->component_functions[func_index]; } } comp_fns_temp[j_hier] = schedule->component_functions[i_hier]; /* now let's attach this list to our array of lists */ schedule->comp_fn_arr[i_hier] = comp_fns_temp; } /* Manju: Do we need this ? */ /* I'm going to just loop over each schedule and * set up the scratch indices, scratch numbers * and other constant data */ /* for( i_hier = 1; i_hier < n_hiers; i_hier++) { ret = mca_coll_ml_setup_scratch_vals(schedule->comp_fn_arr[i_hier], scratch_indx, scratch_num, n_hiers); if( OMPI_SUCCESS != ret ) { ret = OMPI_ERROR; goto Error; } } */ /* Do I need this ? */ schedule->task_setup_fn[COLL_ML_ROOT_TASK_FN] = mca_coll_ml_static_reduce_root; schedule->task_setup_fn[COLL_ML_GENERAL_TASK_FN] = mca_coll_ml_static_reduce_non_root; MCA_COLL_ML_SET_SCHEDULE_ORDER_INFO(schedule); free(scratch_num); free(scratch_indx); return OMPI_SUCCESS; Error: if (NULL != schedule->component_functions) { free(schedule->component_functions); schedule->component_functions = NULL; } return ret; }