C++ (Cpp) MCA_COLL_ML_SET_SCHEDULE_ORDER_INFO 예제들

프로그래밍 언어: C++ (Cpp)

메소드/함수: MCA_COLL_ML_SET_SCHEDULE_ORDER_INFO

hotexamples.com에서의 예제들: 2

C++ (Cpp) MCA_COLL_ML_SET_SCHEDULE_ORDER_INFO - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 C++ (Cpp)의 MCA_COLL_ML_SET_SCHEDULE_ORDER_INFO에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: coll_ml_hier_algorithms_setup.c 프로젝트: Dissolubilis/ompi-svn-mirror

int ml_coll_barrier_constant_group_data_setup(
                mca_coll_ml_topology_t *topo_info,
                mca_coll_ml_collective_operation_description_t  *schedule)
{
    /* local variables */
    int i, j, cnt, value_to_set = -1, ret = OMPI_SUCCESS, num_up_levels,
        num_hierarchies = topo_info->n_levels, n_functions = schedule->n_fns,
        global_high_hierarchy_index = topo_info->global_highest_hier_group_index;

    bool call_for_top_function, prev_is_zero;
    mca_coll_ml_utility_data_t *constant_group_data = NULL;

    int *scratch_indx = NULL, *scratch_num = NULL;

    mca_bcol_base_module_t *prev_bcol = NULL,
                           *bcol_module = NULL;

    /* Am I a member of the highest level subgroup ? */
    if (global_high_hierarchy_index ==
          topo_info->component_pairs[num_hierarchies - 1].bcol_index) {
        /* The process that is member of highest level subgroup
           should call for top algorithms in addition to fan-in/out steps*/
        call_for_top_function = true;
        /* hier level run only top algorithm, so we deduct 1 */
        num_up_levels = num_hierarchies - 1;
    } else {
        /* The process is not member of highest level subgroup,
           as result it does not call for top algorithm,
           but it calls for all fan-in/out steps */
        call_for_top_function = false;
        num_up_levels = num_hierarchies;
    }

    /* Algorithm Description:
     * =====================
     * The algorithm used here for an N level system
     *  - up to level N-2, inclusive : up algorithm (Fan-In in Barrier)
     *  - level N-1: top algorithm (Barrier algth)
     *  - level N-2, to level 0: down algorithm (Fan-out)
     */


    /* Starting scratch_num and scratch_index calculations */
    /* =================================================== */

    /* Figure out how many of the same bcols are called in a row.
     * The index of the bcol in row we store in scratch_indx and
     * the total number of bcols in the row we store in scratch_num */
    scratch_indx = (int *) calloc (2 * num_hierarchies, sizeof (int));
    if(NULL == scratch_indx) {
        ML_ERROR(("Can't allocate memory."));
        ret = OMPI_ERR_OUT_OF_RESOURCE;
        goto Const_Data_Setup_Error;
    }

    scratch_num = (int *) malloc(sizeof(int) * (2 * num_hierarchies));
    if(NULL == scratch_num) {
        ML_ERROR(("Can't allocate memory."));
        ret = OMPI_ERR_OUT_OF_RESOURCE;
        goto Const_Data_Setup_Error;
    }

    /* We go through all stages of algorithm (up, top, down)
     * and calculate bcol index. If previous bcol is the same type as current
     * one the counter index is increased, other way the index is zero */
    prev_bcol = NULL;

    /* Going up */
    for (i = 0, cnt = 0; i < num_up_levels; ++i, ++cnt) {
        if (IS_BCOL_TYPE_IDENTICAL(prev_bcol, GET_BCOL(topo_info, i))) {
            scratch_indx[cnt] = scratch_indx[cnt - 1] + 1;
        } else {
            scratch_indx[cnt] = 0;
            prev_bcol = GET_BCOL(topo_info, i);
        }
    }

    /* Top  - only if the proc arrive to highest_level_is_global_highest_level */
    if (call_for_top_function) {
        if (IS_BCOL_TYPE_IDENTICAL(prev_bcol, GET_BCOL(topo_info, num_hierarchies - 1))) {
            scratch_indx[cnt] = scratch_indx[cnt - 1] + 1;
        } else {
            scratch_indx[cnt] = 0;
            prev_bcol = GET_BCOL(topo_info, num_hierarchies - 1);
        }

        ++cnt;
    }

    /* Going down */
    for (i = num_up_levels - 1; i >= 0; --i, ++cnt) {
        if (IS_BCOL_TYPE_IDENTICAL(prev_bcol, GET_BCOL(topo_info, i))) {
            scratch_indx[cnt] = scratch_indx[cnt - 1] + 1;
        } else {
            scratch_indx[cnt] = 0;
            prev_bcol = GET_BCOL(topo_info, i);
        }
    }

    /*
     * Calculate the number of the same bcols in row.
     * We parse the index array, if index is zero
     * it means that the row is done and we start
     * to calculate next bcols row. The maximum number
     * for the row is equal to maximal bcol index in the row + 1
     */
    i = cnt - 1;
    prev_is_zero = true;
    do {
        if (prev_is_zero) {
            value_to_set = scratch_indx[i] + 1;
            prev_is_zero = false;
        }

        if (0 == scratch_indx[i]) {
            prev_is_zero = true;
        }

        scratch_num[i] = value_to_set;
        --i;
    } while(i >= 0);

    /* =========================================================== */
    /* We are done with scratch_num and scratch_index calculations */

    /* Setup function call for each algorithm step */
    cnt = 0;

    /* Up phase */
    for (i = 0; i < num_up_levels; ++i) {
        bcol_module = GET_BCOL(topo_info, i);
        constant_group_data = &schedule->component_functions[cnt].constant_group_data;

        constant_group_data->bcol_module = bcol_module;
        constant_group_data->index_in_consecutive_same_bcol_calls = scratch_indx[cnt];
        constant_group_data->n_of_this_type_in_a_row = scratch_num[cnt];

        ++cnt;
    }

    /* Top function */
    if (call_for_top_function) {
        bcol_module = GET_BCOL(topo_info, num_hierarchies - 1);
        constant_group_data = &schedule->component_functions[cnt].constant_group_data;

        constant_group_data->bcol_module = bcol_module;
        constant_group_data->index_in_consecutive_same_bcol_calls = scratch_indx[cnt];
        constant_group_data->n_of_this_type_in_a_row = scratch_num[cnt];

        ++cnt;
    }

    /* Down phase */
    for (i = num_up_levels - 1; i >= 0; --i) {
        bcol_module = GET_BCOL(topo_info, i);
        constant_group_data = &schedule->component_functions[cnt].constant_group_data;

        constant_group_data->bcol_module = bcol_module;

        /* All Fan-Outs will be done in parallel */
        constant_group_data->index_in_consecutive_same_bcol_calls = 0;
        constant_group_data->n_of_this_type_in_a_row = 1;

        ++cnt;
    }

    /* Figure out how many times this bcol is used in this collective call */
    for (i = 0; i < n_functions; ++i) {
        struct mca_coll_ml_compound_functions_t *component_functions =
                                 schedule->component_functions;
        mca_bcol_base_module_t *current_bcol =
                                 component_functions[i].constant_group_data.bcol_module;

        /* silence clang warning about possible NULL dereference of component_functions.
         * this case is a developer error if it occurs */
        assert (NULL != component_functions && NULL != constant_group_data);

        cnt = 0;
        for (j = 0; j < n_functions; ++j) {
            if (current_bcol ==
                    component_functions[j].constant_group_data.bcol_module) {
                constant_group_data->index_of_this_type_in_collective = cnt;

                ++cnt;
            }
        }

        component_functions[i].constant_group_data.n_of_this_type_in_collective = cnt;
    }

    MCA_COLL_ML_SET_SCHEDULE_ORDER_INFO(schedule);

    /* Release temporary memories */
    free(scratch_num);
    free(scratch_indx);

    return OMPI_SUCCESS;

Const_Data_Setup_Error:
    if (NULL != scratch_indx) {
        free(scratch_indx);
    }

    if (NULL != scratch_num) {
        free(scratch_num);
    }

    return ret;
}

예제 #2

파일 보기

파일: coll_ml_hier_algorithms_reduce_setup.c 프로젝트: IanYXXL/A1

/*
 * Fill up the collective descriptor
 *
 */
static int mca_coll_ml_build_static_reduce_schedule(
                                    mca_coll_ml_topology_t *topo_info,
                                    mca_coll_ml_collective_operation_description_t **coll_desc)
{
    int i_hier, j_hier,  n_fcns,
        n_hiers = topo_info->n_levels;
    int *scratch_indx = NULL,
        *scratch_num = NULL;
    int cnt, value_to_set = 0;
    int ret = OMPI_SUCCESS;
    bool prev_is_zero;
    mca_coll_ml_compound_functions_t *comp_fns_temp;
    mca_bcol_base_module_t *prev_bcol,
                           *bcol_module;
    mca_coll_ml_compound_functions_t *comp_fn;
    mca_coll_ml_collective_operation_description_t  *schedule = NULL;

    *coll_desc = (mca_coll_ml_collective_operation_description_t *)
                  malloc(sizeof(mca_coll_ml_collective_operation_description_t));

    schedule = *coll_desc;
    if (OPAL_UNLIKELY(NULL == schedule)) {
        ML_ERROR(("Can't allocate memory.\n"));
        ret = OMPI_ERR_OUT_OF_RESOURCE;
        goto Error;
    }

    scratch_indx = (int *) malloc(sizeof(int) * (n_hiers));
    if (NULL == scratch_indx) {
        ML_ERROR(("Can't allocate memory.\n"));
        ret = OMPI_ERR_OUT_OF_RESOURCE;
        goto Error;
    }

    scratch_num = (int *) malloc(sizeof(int) * (n_hiers));
    if (NULL == scratch_num) {
        ML_ERROR(("Can't allocate memory.\n"));
        ret = OMPI_ERR_OUT_OF_RESOURCE;
        goto Error;
    }

    prev_bcol = NULL;

    /* Calculate scratch numbers */
    for (i_hier = 0; i_hier < n_hiers; i_hier++) {
        if (IS_BCOL_TYPE_IDENTICAL(prev_bcol, GET_BCOL(topo_info, i_hier))) {
            scratch_indx[i_hier] = scratch_indx[i_hier - 1] + 1;
        } else {
            scratch_indx[i_hier] = 0;
            prev_bcol = GET_BCOL(topo_info, i_hier);
        }
    }

    --i_hier;
    prev_is_zero = true;

    do {
        if (prev_is_zero) {
            value_to_set = scratch_indx[i_hier] + 1;
            prev_is_zero = false;
        }

        if (0 == scratch_indx[i_hier]) {
            prev_is_zero = true;
        }

        scratch_num[i_hier] = value_to_set;
        --i_hier;
    } while(i_hier >= 0);

    /* All hierarchies call one function, unlike other collectives */
    n_fcns = n_hiers;

    /* Set dependencies equal to number of hierarchies */
    schedule->n_fns = n_fcns;
    schedule->topo_info = topo_info;
    schedule->progress_type = 0;
    /* Allocated the component function */
    schedule->component_functions = (struct mca_coll_ml_compound_functions_t *)
                                     calloc(n_fcns, sizeof(struct mca_coll_ml_compound_functions_t));

    if (OPAL_UNLIKELY(NULL == schedule->component_functions)) {
        ML_ERROR(("Can't allocate memory.\n"));
        ret = OMPI_ERR_OUT_OF_RESOURCE;
        goto Error;
    }


    for (i_hier = 0; i_hier < n_hiers; ++i_hier) {
        comp_fn = &schedule->component_functions[i_hier];

        /* The hierarchial level */
        comp_fn->h_level = i_hier;
        bcol_module = GET_BCOL(topo_info, i_hier);

        comp_fn->bcol_function =
                bcol_module->filtered_fns_table[DATA_SRC_KNOWN][NON_BLOCKING][BCOL_REDUCE][1][0][0];

        strcpy(comp_fn->fn_name, "REDUCE");
        ML_VERBOSE(10, ("func indx %d set to %p", i_hier, comp_fn->bcol_function));


        ML_VERBOSE(1,("In ML_REDUCE_SETUP  .. looks fine here"));
        /* No need completion func for Barrier */
        comp_fn->task_comp_fn = mca_coll_ml_task_comp_static_reduce;

        /* Constants */
        comp_fn->constant_group_data.bcol_module = bcol_module;
        comp_fn->constant_group_data.index_in_consecutive_same_bcol_calls = scratch_indx[i_hier];
        comp_fn->constant_group_data.n_of_this_type_in_a_row = scratch_num[i_hier];
        comp_fn->constant_group_data.n_of_this_type_in_collective = 0;
        comp_fn->constant_group_data.index_of_this_type_in_collective = 0;

        ML_VERBOSE(10, ("Setting collective [reduce] fn_idx %d, n_of_this_type_in_a_row %d, "
                        "index_in_consecutive_same_bcol_calls %d.",
                         i_hier, comp_fn->constant_group_data.n_of_this_type_in_a_row,
                         comp_fn->constant_group_data.index_in_consecutive_same_bcol_calls));
    }


    /* Fill the rest of constant data */
    for (i_hier = 0; i_hier < n_hiers; i_hier++) {
        mca_bcol_base_module_t *current_bcol =
            schedule->component_functions[i_hier].
            constant_group_data.bcol_module;
        cnt = 0;
        for (j_hier = 0; j_hier < n_hiers; j_hier++) {
            if (current_bcol ==
                    schedule->component_functions[j_hier].
                    constant_group_data.bcol_module) {
                schedule->component_functions[j_hier].
                    constant_group_data.index_of_this_type_in_collective = cnt;
                cnt++;
            }
        }
        schedule->component_functions[i_hier].
            constant_group_data.n_of_this_type_in_collective = cnt;
    }

    /* Manju: Reduction should always use the fixed schedule.
     * The subgroups that this process is leader should be executed first, then
     * it should execute the subgroups where this process is not a leader, and
     * then execute the subgroup that includes the root.
     */

    /* Allocate the schedule list */
    schedule->comp_fn_arr = (struct mca_coll_ml_compound_functions_t **)
        calloc(n_hiers,sizeof(struct mca_coll_ml_compound_functions_t *));
    if (NULL == schedule->comp_fn_arr) {
        ML_ERROR(("Can't allocate memory.\n"));
        ret = OMPI_ERR_OUT_OF_RESOURCE;
        goto Error;
    }

    /* Now that the functions have been set-up properly, we can simple permute the ordering a bit */

    for (i_hier = 0; i_hier < n_hiers; i_hier++) {
        /* first one is trivial */
        int leader_hierarchy = 0;
        int non_leader_hierarchy = 0;
        int func_index;

        comp_fns_temp = (struct mca_coll_ml_compound_functions_t *)
            calloc(n_hiers, sizeof(struct mca_coll_ml_compound_functions_t));

        leader_hierarchy = 0;
        non_leader_hierarchy = n_hiers - 2;

        for(j_hier = 0; j_hier < n_hiers - 1 ; j_hier++) {

            func_index = j_hier < i_hier ? j_hier : j_hier + 1;
            /* I'm a leader for this group */
            if (0 == topo_info->component_pairs->subgroup_module->my_index) {
                comp_fns_temp[leader_hierarchy++] =
                    schedule->component_functions[func_index];
            }
            else {
                comp_fns_temp[non_leader_hierarchy--] =
                    schedule->component_functions[func_index];
            }
        }

        comp_fns_temp[j_hier] = schedule->component_functions[i_hier];
        /* now let's attach this list to our array of lists */
        schedule->comp_fn_arr[i_hier] = comp_fns_temp;
    }

    /* Manju: Do we need this ? */

    /* I'm going to just loop over each schedule and
     * set up the scratch indices, scratch numbers
     * and other constant data
     */
    /*
    for( i_hier = 1; i_hier < n_hiers; i_hier++) {
        ret = mca_coll_ml_setup_scratch_vals(schedule->comp_fn_arr[i_hier], scratch_indx,
                scratch_num, n_hiers);
        if( OMPI_SUCCESS != ret ) {
            ret = OMPI_ERROR;
            goto Error;
        }

    }
    */

    /* Do I need this ? */
    schedule->task_setup_fn[COLL_ML_ROOT_TASK_FN] = mca_coll_ml_static_reduce_root;
    schedule->task_setup_fn[COLL_ML_GENERAL_TASK_FN] = mca_coll_ml_static_reduce_non_root;

    MCA_COLL_ML_SET_SCHEDULE_ORDER_INFO(schedule);

    free(scratch_num);
    free(scratch_indx);

    return OMPI_SUCCESS;

Error:
    if (NULL != schedule->component_functions) {
        free(schedule->component_functions);
        schedule->component_functions = NULL;
    }

    return ret;
}