static int tuned_open(void) { int rc; #if OPAL_ENABLE_DEBUG { int param; param = mca_base_var_find("ompi", "coll", "base", "verbose"); if (param >= 0) { const int *verbose = NULL; mca_base_var_get_value(param, &verbose, NULL, NULL); if (verbose && verbose[0] > 0) { ompi_coll_tuned_stream = opal_output_open(NULL); } } } #endif /* OPAL_ENABLE_DEBUG */ /* now check that the user hasn't overrode any of the decision functions if dynamic rules are enabled */ /* the user can redo this before every comm dup/create if they like */ /* this is useful for benchmarking and user knows best tuning */ /* as this is the component we only lookup the indicies of the mca params */ /* the actual values are looked up during comm create via module init */ /* intra functions first */ /* if dynamic rules allowed then look up dynamic rules config filename, else we leave it an empty filename (NULL) */ /* by default DISABLE dynamic rules and instead use fixed [if based] rules */ if (ompi_coll_tuned_use_dynamic_rules) { if( ompi_coll_tuned_dynamic_rules_filename ) { OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:component_open Reading collective rules file [%s]", ompi_coll_tuned_dynamic_rules_filename)); rc = ompi_coll_tuned_read_rules_config_file( ompi_coll_tuned_dynamic_rules_filename, &(mca_coll_tuned_component.all_base_rules), COLLCOUNT); if( rc >= 0 ) { OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_open Read %d valid rules\n", rc)); } else { OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_open Reading collective rules file failed\n")); mca_coll_tuned_component.all_base_rules = NULL; } } } OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:component_open: done!")); return OMPI_SUCCESS; }
static int tuned_open(void) { int rc; #if OMPI_ENABLE_DEBUG { int param; param = mca_base_param_find("coll", NULL, "base_verbose"); if (param >= 0) { int verbose; mca_base_param_lookup_int(param, &verbose); if (verbose > 0) { ompi_coll_tuned_stream = opal_output_open(NULL); } } } #endif /* OMPI_ENABLE_DEBUG */ /* Use a low priority, but allow other components to be lower */ mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, "priority", "Priority of the tuned coll component", false, false, ompi_coll_tuned_priority, &ompi_coll_tuned_priority); /* parameter for pre-allocated memory requests etc */ mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, "pre_allocate_memory_comm_size_limit", "Size of communicator were we stop pre-allocating memory for the fixed internal buffer used for message requests etc that is hung off the communicator data segment. I.e. if you have a 100'000 nodes you might not want to pre-allocate 200'000 request handle slots per communicator instance!", false, false, ompi_coll_tuned_preallocate_memory_comm_size_limit, &ompi_coll_tuned_preallocate_memory_comm_size_limit); /* some initial guesses at topology parameters */ mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, "init_tree_fanout", "Inital fanout used in the tree topologies for each communicator. This is only an initial guess, if a tuned collective needs a different fanout for an operation, it build it dynamically. This parameter is only for the first guess and might save a little time", false, false, ompi_coll_tuned_init_tree_fanout, &ompi_coll_tuned_init_tree_fanout); mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, "init_chain_fanout", "Inital fanout used in the chain (fanout followed by pipeline) topologies for each communicator. This is only an initial guess, if a tuned collective needs a different fanout for an operation, it build it dynamically. This parameter is only for the first guess and might save a little time", false, false, ompi_coll_tuned_init_chain_fanout, &ompi_coll_tuned_init_chain_fanout); /* now check that the user hasn't overrode any of the decision functions if dynamic rules are enabled */ /* the user can redo this before every comm dup/create if they like */ /* this is useful for benchmarking and user knows best tuning */ /* as this is the component we only lookup the indicies of the mca params */ /* the actual values are looked up during comm create via module init */ /* intra functions first */ /* if dynamic rules allowed then look up dynamic rules config filename, else we leave it an empty filename (NULL) */ /* by default DISABLE dynamic rules and instead use fixed [if based] rules */ mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, "use_dynamic_rules", "Switch used to decide if we use static (compiled/if statements) or dynamic (built at runtime) decision function rules", false, false, ompi_coll_tuned_use_dynamic_rules, &ompi_coll_tuned_use_dynamic_rules); if (ompi_coll_tuned_use_dynamic_rules) { mca_base_param_reg_string(&mca_coll_tuned_component.super.collm_version, "dynamic_rules_filename", "Filename of configuration file that contains the dynamic (@runtime) decision function rules", false, false, ompi_coll_tuned_dynamic_rules_filename, &ompi_coll_tuned_dynamic_rules_filename); if( ompi_coll_tuned_dynamic_rules_filename ) { OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:component_open Reading collective rules file [%s]", ompi_coll_tuned_dynamic_rules_filename)); rc = ompi_coll_tuned_read_rules_config_file( ompi_coll_tuned_dynamic_rules_filename, &(mca_coll_tuned_component.all_base_rules), COLLCOUNT); if( rc >= 0 ) { OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_open Read %d valid rules\n", rc)); } else { OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_open Reading collective rules file failed\n")); mca_coll_tuned_component.all_base_rules = NULL; } } ompi_coll_tuned_allreduce_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLREDUCE]); ompi_coll_tuned_alltoall_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLTOALL]); ompi_coll_tuned_allgather_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLGATHER]); ompi_coll_tuned_allgatherv_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLGATHERV]); ompi_coll_tuned_alltoallv_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLTOALLV]); ompi_coll_tuned_barrier_intra_check_forced_init(&ompi_coll_tuned_forced_params[BARRIER]); ompi_coll_tuned_bcast_intra_check_forced_init(&ompi_coll_tuned_forced_params[BCAST]); ompi_coll_tuned_reduce_intra_check_forced_init(&ompi_coll_tuned_forced_params[REDUCE]); ompi_coll_tuned_reduce_scatter_intra_check_forced_init(&ompi_coll_tuned_forced_params[REDUCESCATTER]); ompi_coll_tuned_gather_intra_check_forced_init(&ompi_coll_tuned_forced_params[GATHER]); ompi_coll_tuned_scatter_intra_check_forced_init(&ompi_coll_tuned_forced_params[SCATTER]); } OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:component_open: done!")); return OMPI_SUCCESS; }
/* * Init module on the communicator */ const struct mca_coll_base_module_1_0_0_t * ompi_coll_tuned_module_init(struct ompi_communicator_t *comm) { int size, rank; struct mca_coll_base_comm_t *data; /* fanout parameters */ int rc=0; int i; OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init called.")); /* This routine will become more complex and might have to be */ /* broken into more sections/function calls */ /* Order of operations: * alloc memory for nb reqs (in case we fall through) * add decision rules if using dynamic rules * compact rules using communicator size info etc * build first guess cached topologies (might depend on the rules from above) * * then attach all to the communicator and return base module funct ptrs */ /* Allocate the data that hangs off the communicator */ if (OMPI_COMM_IS_INTER(comm)) { size = ompi_comm_remote_size(comm); } else { size = ompi_comm_size(comm); } /* * we still malloc data as it is used by the TUNED modules * if we don't allocate it and fall back to a BASIC module routine then confuses debuggers * we place any special info after the default data * * BUT on very large systems we might not be able to allocate all this memory so * we do check a MCA parameter to see if if we should allocate this memory * * The default is set very high * */ /* if we within the memory/size limit, allow preallocated data */ if (size<=ompi_coll_tuned_preallocate_memory_comm_size_limit) { data = (mca_coll_base_comm_t*)malloc(sizeof(struct mca_coll_base_comm_t) + (sizeof(ompi_request_t *) * size * 2)); if (NULL == data) { return NULL; } data->mcct_reqs = (ompi_request_t **) (data + 1); data->mcct_num_reqs = size * 2; } else { data = (mca_coll_base_comm_t*)malloc(sizeof(struct mca_coll_base_comm_t)); if (NULL == data) { return NULL; } data->mcct_reqs = (ompi_request_t **) NULL; data->mcct_num_reqs = 0; } /* * If using dynamic and you are MPI_COMM_WORLD and you want to use a parameter file.. * then this effects how much storage space you need * (This is a basic version of what will go into V2) * */ size = ompi_comm_size(comm); /* find size so we can (A) decide if to access the file directly */ /* (B) so we can get our very own customised ompi_coll_com_rule_t ptr */ /* which only has rules in it for our com size */ rank = ompi_comm_rank(comm); /* find rank as only MCW:0 opens any tuned conf files */ /* actually if they are below a threadhold, they all open it */ /* have to build a collective in here.. but just for MCW.. */ /* but we have to make sure we have the same rules everywhere :( */ /* if using dynamic rules make sure all overrides are NULL before we start override anything accidently */ if (ompi_coll_tuned_use_dynamic_rules) { /* base rules */ data->all_base_rules = (ompi_coll_alg_rule_t*) NULL; /* each collective rule for my com size */ for (i=0;i<COLLCOUNT;i++) { data->com_rules[i] = (ompi_coll_com_rule_t*) NULL; } } /* next dynamic state, recheck all forced rules as well */ /* warning, we should check to make sure this is really an INTRA comm here... */ if (ompi_coll_tuned_use_dynamic_rules) { ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[ALLREDUCE], &(data->user_forced[ALLREDUCE])); ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[ALLTOALL], &(data->user_forced[ALLTOALL])); ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[ALLGATHER], &(data->user_forced[ALLGATHER])); /* ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[ALLTOALLV], &(data->user_forced[ALLTOALLV])); */ ompi_coll_tuned_forced_getvalues_barrier (ompi_coll_tuned_forced_params[BARRIER], &(data->user_forced[BARRIER])); ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[BCAST], &(data->user_forced[BCAST])); ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[REDUCE], &(data->user_forced[REDUCE])); } if (&ompi_mpi_comm_world==comm) { if (ompi_coll_tuned_use_dynamic_rules) { OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init MCW & Dynamic")); if (ompi_coll_tuned_dynamic_rules_filename) { OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init Opening [%s]", ompi_coll_tuned_dynamic_rules_filename)); rc = ompi_coll_tuned_read_rules_config_file (ompi_coll_tuned_dynamic_rules_filename, &(data->all_base_rules), COLLCOUNT); if (rc>=0) { OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init Read %d valid rules\n", rc)); /* at this point we all have a base set of rules */ /* now we can get our customized communicator sized rule set, for each collective */ for (i=0;i<COLLCOUNT;i++) { data->com_rules[i] = ompi_coll_tuned_get_com_rule_ptr (data->all_base_rules, i, size); } } else { /* failed to read config file, thus make sure its a NULL... */ data->all_base_rules = (ompi_coll_alg_rule_t*) NULL; } } /* end if a config filename exists */ } /* end if dynamic_rules */ } /* end if MCW */ /* ok, if using dynamic rules, not MCW and we are just any rank and a base set of rules exist.. ref them */ /* order of eval is important here, if we are MCW ompi_mpi_comm_world.c_coll_selected_data is NULL still.. */ if ((ompi_coll_tuned_use_dynamic_rules)&&(!(&ompi_mpi_comm_world==comm))&& ((ompi_mpi_comm_world.c_coll_selected_data)->all_base_rules)) { OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init NOT MCW & Dynamic")); /* this will, erm fail if MCW doesn't exist which it should! */ data->all_base_rules = (ompi_mpi_comm_world.c_coll_selected_data)->all_base_rules; /* at this point we all have a base set of rules if they exist atall */ /* now we can get our customized communicator sized rule set, for each collective */ for (i=0;i<COLLCOUNT;i++) { data->com_rules[i] = ompi_coll_tuned_get_com_rule_ptr (data->all_base_rules, i, size); } } /* * now for the cached topo functions * guess the initial topologies to use rank 0 as root */ /* general n fan out tree */ data->cached_ntree = ompi_coll_tuned_topo_build_tree (ompi_coll_tuned_init_tree_fanout, comm, 0); data->cached_ntree_root = 0; data->cached_ntree_fanout = ompi_coll_tuned_init_tree_fanout; /* binary tree */ data->cached_bintree = ompi_coll_tuned_topo_build_tree (2, comm, 0); data->cached_bintree_root = 0; /* binomial tree */ data->cached_bmtree = ompi_coll_tuned_topo_build_bmtree (comm, 0); data->cached_bmtree_root = 0; /* * chains (fanout followed by pipelines) * are more difficuilt as the fan out really really depends on message size [sometimes].. * as size gets larger fan-out gets smaller [usually] * * will probably change how we cache this later, for now a midsize * GEF */ data->cached_chain = ompi_coll_tuned_topo_build_chain (ompi_coll_tuned_init_chain_fanout, comm, 0); data->cached_chain_root = 0; data->cached_chain_fanout = ompi_coll_tuned_init_chain_fanout; /* standard pipeline */ data->cached_pipeline = ompi_coll_tuned_topo_build_chain (1, comm, 0); data->cached_pipeline_root = 0; /* All done */ comm->c_coll_selected_data = data; OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init Tuned is in use")); return to_use; }