int test_error(char *my_nspace, int my_rank, test_params params) { size_t errhandler_refs[MAX_ERR_HANDLERS]; struct timespec ts; pmix_status_t status; pmix_proc_t source; TEST_VERBOSE(("test-error: running error handling test cases")); /* register specific client error handlers and test their invocation * by trigerring events from server side*/ status = PMIX_ERR_TIMEOUT; PMIx_Register_event_handler(&status, 1, NULL, 0, timeout_errhandler, errhandler_reg_callbk1, &errhandler_refs[0]); /* reg a handler for comm errors */ status = PMIX_ERR_LOST_PEER_CONNECTION; PMIx_Register_event_handler(&status, 1, NULL, 0, comfail_errhandler, errhandler_reg_callbk1, &errhandler_refs[1]); /* inject error from client */ done = false; (void)strncpy(source.nspace, my_nspace, PMIX_MAX_NSLEN); source.rank = my_rank; /* change error value to test other error notifications */ PMIx_Notify_event(TEST_NOTIFY, &source, PMIX_RANGE_NAMESPACE, NULL, 0, op1_callbk, NULL); while(!done) { ts.tv_sec = 0; ts.tv_nsec = 100000; nanosleep(&ts, NULL); } done = false; /* dereg all handlers*/ PMIx_Deregister_event_handler( errhandler_refs[0], op1_callbk, NULL); /* loop until we get callback */ while(!done) { ts.tv_sec = 0; ts.tv_nsec = 100000; nanosleep(&ts, NULL); } done = false; PMIx_Deregister_event_handler( errhandler_refs[1], op1_callbk, NULL); /* loop until we get callback */ while(!done) { ts.tv_sec = 0; ts.tv_nsec = 100000; nanosleep(&ts, NULL); } return PMIX_SUCCESS; }
int pmixp_lib_finalize(void) { int rc = SLURM_SUCCESS; /* deregister the errhandler */ PMIx_Deregister_event_handler(0, _op_callbk, NULL); if (PMIX_SUCCESS != PMIx_server_finalize()) { rc = SLURM_ERROR; } return rc; }
int main(int argc, char **argv) { char **client_env=NULL; char **client_argv=NULL; int rc; struct stat stat_buf; struct timeval tv; double test_start; cli_state_t order[CLI_TERM+1]; test_params params; INIT_TEST_PARAMS(params); int test_fail = 0; char *tmp; int ns_nprocs; gettimeofday(&tv, NULL); test_start = tv.tv_sec + 1E-6*tv.tv_usec; /* smoke test */ if (PMIX_SUCCESS != 0) { TEST_ERROR(("ERROR IN COMPUTING CONSTANTS: PMIX_SUCCESS = %d", PMIX_SUCCESS)); exit(1); } TEST_VERBOSE(("Testing version %s", PMIx_Get_version())); parse_cmd(argc, argv, ¶ms); TEST_VERBOSE(("Start PMIx_lite smoke test (timeout is %d)", params.timeout)); /* verify executable */ if( 0 > ( rc = stat(params.binary, &stat_buf) ) ){ TEST_ERROR(("Cannot stat() executable \"%s\": %d: %s", params.binary, errno, strerror(errno))); FREE_TEST_PARAMS(params); return 0; } else if( !S_ISREG(stat_buf.st_mode) ){ TEST_ERROR(("Client executable \"%s\": is not a regular file", params.binary)); FREE_TEST_PARAMS(params); return 0; }else if( !(stat_buf.st_mode & S_IXUSR) ){ TEST_ERROR(("Client executable \"%s\": has no executable flag", params.binary)); FREE_TEST_PARAMS(params); return 0; } /* setup the server library */ pmix_info_t info[1]; (void)strncpy(info[0].key, PMIX_SOCKET_MODE, PMIX_MAX_KEYLEN); info[0].value.type = PMIX_UINT32; info[0].value.data.uint32 = 0666; if (PMIX_SUCCESS != (rc = PMIx_server_init(&mymodule, info, 1))) { TEST_ERROR(("Init failed with error %d", rc)); FREE_TEST_PARAMS(params); return rc; } /* register the errhandler */ PMIx_Register_event_handler(NULL, 0, NULL, 0, errhandler, errhandler_reg_callbk, NULL); order[CLI_UNINIT] = CLI_FORKED; order[CLI_FORKED] = CLI_FIN; order[CLI_CONNECTED] = CLI_UNDEF; order[CLI_FIN] = CLI_TERM; order[CLI_DISCONN] = CLI_UNDEF; order[CLI_TERM] = CLI_UNDEF; cli_init(params.nprocs, order); /* set common argv and env */ client_env = pmix_argv_copy(environ); set_client_argv(¶ms, &client_argv); tmp = pmix_argv_join(client_argv, ' '); TEST_VERBOSE(("Executing test: %s", tmp)); free(tmp); int launched = 0; /* set namespaces and fork clients */ if (NULL == params.ns_dist) { /* we have a single namespace for all clients */ ns_nprocs = params.nprocs; rc = launch_clients(ns_nprocs, params.binary, &client_env, &client_argv); if (PMIX_SUCCESS != rc) { FREE_TEST_PARAMS(params); return rc; } launched += ns_nprocs; } else { char *pch; pch = strtok(params.ns_dist, ":"); while (NULL != pch) { ns_nprocs = (int)strtol(pch, NULL, 10); if (params.nprocs < (uint32_t)(launched+ns_nprocs)) { TEST_ERROR(("Total number of processes doesn't correspond number specified by ns_dist parameter.")); FREE_TEST_PARAMS(params); return PMIX_ERROR; } if (0 < ns_nprocs) { rc = launch_clients(ns_nprocs, params.binary, &client_env, &client_argv); if (PMIX_SUCCESS != rc) { FREE_TEST_PARAMS(params); return rc; } } pch = strtok (NULL, ":"); launched += ns_nprocs; } } if (params.nprocs != (uint32_t)launched) { TEST_ERROR(("Total number of processes doesn't correspond number specified by ns_dist parameter.")); cli_kill_all(); test_fail = 1; } /* hang around until the client(s) finalize */ while (!test_terminated()) { // To avoid test hang we want to interrupt the loop each 0.1s double test_current; // check if we exceed the max time gettimeofday(&tv, NULL); test_current = tv.tv_sec + 1E-6*tv.tv_usec; if( (test_current - test_start) > params.timeout ){ break; } cli_wait_all(0); } if( !test_terminated() ){ TEST_ERROR(("Test exited by a timeout!")); cli_kill_all(); test_fail = 1; } if( test_abort ){ TEST_ERROR(("Test was aborted!")); /* do not simply kill the clients as that generates * event notifications which these tests then print * out, flooding the log */ // cli_kill_all(); test_fail = 1; } if (0 != params.test_spawn) { PMIX_WAIT_FOR_COMPLETION(spawn_wait); } pmix_argv_free(client_argv); pmix_argv_free(client_env); /* deregister the errhandler */ PMIx_Deregister_event_handler(0, op_callbk, NULL); cli_wait_all(1.0); /* finalize the server library */ if (PMIX_SUCCESS != (rc = PMIx_server_finalize())) { TEST_ERROR(("Finalize failed with error %d", rc)); } FREE_TEST_PARAMS(params); if (0 == test_fail) { TEST_OUTPUT(("Test finished OK!")); } return test_fail; }
int main(int argc, char **argv, const char **environ) { pmix_status_t rc; pmix_info_t *info = NULL; bool flag; pmix_status_t retval; pmix_app_t *spawned_app = NULL; pmix_info_t *job_info = NULL; pmix_info_t *proc_info = NULL; int job_info_count = 0; int job_info_index = 0; int proc_info_count = 0; int proc_info_index = 0; char spawned_nsp[PMIX_MAX_NSLEN+1]; char *path_to_app = NULL; char *host_to_use = NULL; int number_of_clients = 0; int temp_counter = 0; done_flag = false; gethostname(hostn, 500); int spawned_app_argc = 0; char **scr_environ = NULL; int proc_count = 1; int node_count = 0; bool blocking_mode = true; char *node_list = NULL; bool forward_all_scr_envs = false; bool pmix_mode = false; const char *optstring = "+n:N:L:x:bB:pPvhe"; int temp_slen=0; /* todo: add arg parsing with ompi schizo */ verbose_print = false; int sleep_max = 30; const int fixed_sleep = 5; int c; while((c = getopt(argc, argv, optstring)) != -1){ switch(c){ case 'h': print_usage(argv[0]); exit(0); break; case 'n': proc_count = atoi(optarg); if(proc_count <= 0 || proc_count > 100){ printf("outside the range of allowable instances to spawn [1-100]\n"); exit(1); } if(verbose_print) { printf("proc_count = %d\n", proc_count); } break; case 'N': /* node_count = atoi(optarg); */ node_count = 1; if(verbose_print) { printf("node_count = %d\n", node_count); } break; case 'B': blocking_mode = true; sleep_max = atoi(optarg); if(sleep_max < 0){ printf("can't sleep for less than 0 seconds\n"); exit(1); } if(verbose_print){ printf("blocking mode = %x\n", blocking_mode); } break; case 'b': blocking_mode = false; if(verbose_print){ printf("blocking mode = %x\n", blocking_mode); } break; case 'L': node_list = optarg; host_to_use = node_list; if(verbose_print){ printf("node_list = '%s'\n", node_list); } break; case 'x': temp_slen = strlen(optarg); /* check if the string is the same length as 'SCR', if so compare them */ if(temp_slen == strlen(SCR_STRING)){ if(strncmp(optarg, SCR_STRING, strlen(SCR_STRING)) == 0){ /* if the string is SCR, then forward all SCR related env vars */ if(verbose_print) printf("all scr envs will be forwarded\n"); forward_all_scr_envs = true; } else{ /* handled like a normal env var */ handle_standard_env_var(optarg, &scr_environ); } } else{ /*handled like a normal env var */ handle_standard_env_var(optarg, &scr_environ); } break; case 'v': verbose_print = true; break; case 'p': pmix_mode = true; if(verbose_print){ printf("pmix_mode = %x\n", pmix_mode); } break; case 'P': pmix_mode = false; if(verbose_print){ printf("pmix_mode = %x\n", pmix_mode); } break; case 'e': experimental = true; break; case '?': printf("missing a required argument or invalid option: %x\n", optopt); print_usage(argv[0]); exit(1); break; default: printf("Unrecognized argument: %c\n", c); print_usage(argv[0]); exit(1); break; } } /* number of instances to spawn */ number_of_clients = proc_count; /* check to make sure an application was specified to launch */ if( optind < argc ){ /* if optind is < argc, it means there is at least one more arg * beyond the args for this program */ path_to_app = argv[optind]; spawned_app_argc = argc - optind; if(verbose_print) { printf("app to launch: %s @ %s:%d\n", path_to_app, __FILE__, __LINE__); } } else{ printf("program_to_spawn option was not provded\n"); print_usage(argv[0]); exit(1); } if(verbose_print){ printf("master process will spawn %d instances; app to run: %s\n\n", number_of_clients, path_to_app); printf("pmix version: %s (host: %s)\n", PMIx_Get_version(), hostn); } /* init pmix */ retval = PMIx_Init(&main_proc, NULL, 0); if(retval != PMIX_SUCCESS){ error_helper(retval, hostn, "error initializing pmix"); exit(0); } if(verbose_print){ printf("rank %d, host '%s', nspace: '%s' init'd pmix succesfully\n\n", main_proc.rank, hostn, main_proc.nspace); } /* we need to attach to a "system" PMIx server so we * can ask it to spawn applications for us. There can * only be one such connection on a node, so we will * instruct the tool library to only look for it */ int ninfo = 1; PMIX_INFO_CREATE(info, ninfo); flag = true; PMIX_INFO_LOAD(&info[0], PMIX_CONNECT_TO_SYSTEM, &flag, PMIX_BOOL); /* initialize the library and make the connection */ if (PMIX_SUCCESS != (rc = PMIx_tool_init(&tool_proc, NULL, 0 ))) { fprintf(stderr, "PMIx_tool_init failed: %d\n", rc ); exit(rc); } if (0 < ninfo) { PMIX_INFO_FREE(info, ninfo); } /* first call fence to sync all processes */ retval = fence_helper(); if(retval != PMIX_SUCCESS) { error_helper(retval, hostn, "error fencing"); exit(retval); } /* Process SCR env vars if needed */ if(forward_all_scr_envs){ parse_all_scr_envs(&scr_environ, environ); } /* finalize the env array so a NULL is in place */ finalize_array(scr_environ); /* Setup info structs to pass to this: */ /* pmix_info_t *error_info = NULL; */ /* PMIX_INFO_CREATE(error_info, 1); */ /* strncpy(error_info[0].key, PMIX_ERROR_GROUP_ABORT, PMIX_MAX_KEYLEN); error_info[0].value.type = PMIX_BOOL; error_info[0].value.data.flag = true; */ /* strncpy(error_info[0].key, PMIX_ERROR_GROUP_SPAWN, PMIX_MAX_KEYLEN); int t_val = 1; pmix_value_load(&error_info[1].value, &t_val, PMIX_BOOL); */ /*error_info[1].value.type = PMIX_BOOL; error_info[1].value.data.flag = true; */ /* strncpy(error_info[2].key, PMIX_ERROR_GROUP_GENERAL, PMIX_MAX_KEYLEN); error_info[2].value.type = PMIX_BOOL; error_info[2].value.data.flag = true; */ /* TODO: setup error handling when implemented in pmix with the * following error codes: */ /* pmix_status_t registered_codes[5]; registered_codes[0] = PMIX_ERR_JOB_TERMINATED; registered_codes[1] = PMIX_ERR_PROC_ABORTED; registered_codes[2] = PMIX_ERR_PROC_ABORTING; */ PMIx_Register_event_handler(NULL, 0, NULL, 0, errhandler_cb, errhandler_reg_callbk, (void *) NULL); /* PMIX_INFO_DESTRUCT(error_info); */ /* allocate memory to hold the spawend app struct */ PMIX_APP_CREATE(spawned_app, 1); /* maxprocs isn't documented very well, but it appears to control * how many instances of the spanwed app are created */ spawned_app->maxprocs = number_of_clients; /* set the app to run */ (void)asprintf(&spawned_app->cmd, "%s", path_to_app); /* set argv for spawned app starting with remaining argv */ spawned_app->argv = &argv[optind]; /* set the environment pointer */ spawned_app->env = scr_environ; /*--START: add all proc level infos */ /* add things to the proc level info */ if(!pmix_mode){ job_info_count++; } if(host_to_use != NULL){ proc_info_count++; } if(verbose_print){ printf("enabling debug feature for forwarding stdout/stderr\n"); proc_info_count+=2; /* add PMIX_FWD_STDOUT and PMIX_FWD_STDERR later*/ } if(experimental){ job_info_count++; } if(node_count == 1){ job_info_count++; } /*--END: add all proc level infos */ /*--START: append actual proc level info */ PMIX_INFO_CREATE(job_info, job_info_count); PMIX_INFO_CREATE(proc_info, proc_info_count); /* PMIX_VAL_set_assign(_v, _field, _val ) */ /* PMIX_VAL_set_strdup(_v, _field, _val ) */ if(host_to_use != NULL){ /* add info struct to the spawned app itself for the host */ /* old way */ strncpy(proc_info[proc_info_index].key, PMIX_HOST, PMIX_MAX_KEYLEN); //proc_info[proc_info_index].value.type = PMIX_STRING; /* set the data for host list to use */ //proc_info[proc_info_index].value.data.string = host_to_use; /* end old way */ if(verbose_print) printf("about to set host val\n"); PMIX_VAL_SET(&(proc_info[proc_info_index].value), string, host_to_use ); proc_info_index++; } if(!pmix_mode){ strncpy(job_info[job_info_index].key, PMIX_NON_PMI, PMIX_MAX_KEYLEN); if(verbose_print) printf("about to set non pmix flag\n"); PMIX_VAL_SET(&(job_info[job_info_index].value), flag, true); job_info_index++; } if(verbose_print){ strncpy(proc_info[proc_info_index].key, PMIX_FWD_STDOUT, PMIX_MAX_KEYLEN); if(verbose_print) printf("about to set stdout flag\n"); PMIX_VAL_SET(&(proc_info[proc_info_index].value), flag, true ); proc_info_index++; strncpy(proc_info[proc_info_index].key, PMIX_FWD_STDERR, PMIX_MAX_KEYLEN); if(verbose_print) printf("about to set stderr flag\n"); PMIX_VAL_SET(&(proc_info[proc_info_index].value), flag, true ); proc_info_index++; } if(experimental){ printf("attempting to perform experiment\n"); bool local_flag = true; PMIX_INFO_LOAD(&job_info[job_info_index], PMIX_NOTIFY_COMPLETION, &local_flag, PMIX_BOOL); job_info_index++; } if(node_count == 1){ strncpy(job_info[job_info_index].key, PMIX_PPR, PMIX_MAX_KEYLEN); PMIX_VAL_SET(&(job_info[job_info_index].value), string, "1:n"); job_info_index++; } /*--END: append actual proc level info */ /* sanity check to make sure we covered all the info structs */ if(proc_info_index != proc_info_count ){ printf("bug: mismatch with appending proc info\n"); exit(1); } if(job_info_index != job_info_count){ printf("bug: mismatch with appending job info\n"); exit(1); } /* TODO: TEST PMIX_NOTIFY_COMPLETION WHEN IT'S IMPLEMENTED IN PMIX */ /* fill in job_info */ /* strncpy(job_info[0].key, PMIX_TIMEOUT, PMIX_MAX_KEYLEN); job_info[0].value.type = PMIX_INT; job_info[0].value.data.integer = 10; */ /* strncpy(job_info[0].key, PMIX_NOTIFY_COMPLETION, PMIX_MAX_KEYLEN); job_info[0].value.type = PMIX_BOOL; job_info[0].value.data.flag = true; */ /*strncpy(spawned_app->info[0].key, PMIX_DISPLAY_MAP, PMIX_MAX_KEYLEN); job_info[0].value.type = PMIX_BOOL; job_info[0].value.data.flag = true;*/ /* TODO: TEST PMIX_NOTIFY_COMPLETION WHEN IT'S IMPLEMENTED IN PMIX */ spawned_app->info = proc_info; spawned_app->ninfo = proc_info_count; if(verbose_print){ printf("proc level info count: %d\n", proc_info_count); } /* call spawn */ retval = PMIx_Spawn(job_info, job_info_count, spawned_app, 1, spawned_nsp); if(verbose_print) { printf("rank %d (host %s) just called spawn; spawned nspace: %s, retval:%d\n", main_proc.rank, hostn, spawned_nsp, retval); } if(retval != PMIX_SUCCESS){ error_helper(retval, hostn, "error with spawn"); goto done; } /* TODO: TEMPORARY WORKAROUND TO WAIT FOR A SPAWNED PROCESS */ if(blocking_mode){ sleep(fixed_sleep); /* wait until app completes: */ while(!done_flag){ sleep(fixed_sleep); temp_counter++; if(temp_counter*fixed_sleep >= sleep_max) { if(verbose_print) printf("broke out early\n"); break; } } if(verbose_print){ if(done_flag == true) { printf("done_flag was set to true!\n"); } } } done: /* fence first */ retval = fence_helper(); if(retval != PMIX_SUCCESS){ if(verbose_print) printf("error fencing, finalize may fail ! \n"); } /* finalize */ PMIx_Deregister_event_handler(_g_errhandler_ref, NULL, NULL); if(verbose_print){ fprintf(stdout, "spawn master process (rank %d) (host %s) finalizing\n", main_proc.rank, hostn); } /* clean up pmix */ retval = PMIx_tool_finalize(); if(retval == PMIX_SUCCESS) { if(verbose_print){ printf("spawn master process %d finalize success\n\n", main_proc.rank); } } else { printf("spawn master process %d pmix_finalize FAILURE: %d\n\n", main_proc.rank, retval); } retval = PMIx_Finalize(NULL, 0); fflush(stdout); /* cleanup before returning */ PMIX_INFO_FREE(job_info, job_info_count); spawned_app->argv = NULL; PMIX_APP_FREE(spawned_app, 1); if(verbose_print) printf("%s exiting cleanly :)\n", argv[0]); return 0; }
int main(int argc, char **argv) { int rc; pmix_value_t value; pmix_value_t *val = &value; pmix_proc_t proc; uint32_t nprocs; /* init us */ if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Init failed: %d\n", myproc.nspace, myproc.rank, rc); exit(0); } fprintf(stderr, "Client ns %s rank %d: Running\n", myproc.nspace, myproc.rank); /* get our universe size */ if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, PMIX_UNIV_SIZE, NULL, 0, &val))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Get universe size failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); fprintf(stderr, "Client %s:%d universe size %d\n", myproc.nspace, myproc.rank, nprocs); completed = false; /* register our errhandler */ PMIx_Register_event_handler(NULL, 0, NULL, 0, notification_fn, errhandler_reg_callbk, NULL); /* call fence to sync */ PMIX_PROC_CONSTRUCT(&proc); (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, NULL, 0))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Fence failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } /* rank=0 calls abort */ if (0 == myproc.rank) { PMIx_Abort(PMIX_ERR_OUT_OF_RESOURCE, "Eat rocks", &proc, 1); fprintf(stderr, "Client ns %s rank %d: Abort called\n", myproc.nspace, myproc.rank); } /* everyone simply waits */ while (!completed) { struct timespec ts; ts.tv_sec = 0; ts.tv_nsec = 100000; nanosleep(&ts, NULL); } done: /* finalize us */ fprintf(stderr, "Client ns %s rank %d: Finalizing\n", myproc.nspace, myproc.rank); PMIx_Deregister_event_handler(1, op_callbk, NULL); if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, myproc.rank, rc); } else { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", myproc.nspace, myproc.rank); } fflush(stderr); return(0); }
int main(int argc, char **argv) { char **client_env=NULL; char **client_argv=NULL; int rc; struct stat stat_buf; struct timeval tv; double test_start; test_params params; INIT_TEST_PARAMS(params); int test_fail = 0; char *tmp; int ns_nprocs; gettimeofday(&tv, NULL); test_start = tv.tv_sec + 1E-6*tv.tv_usec; /* smoke test */ if (PMIX_SUCCESS != 0) { TEST_ERROR(("ERROR IN COMPUTING CONSTANTS: PMIX_SUCCESS = %d", PMIX_SUCCESS)); exit(1); } TEST_VERBOSE(("Testing version %s", PMIx_Get_version())); parse_cmd(argc, argv, ¶ms); TEST_VERBOSE(("Start PMIx_lite smoke test (timeout is %d)", params.timeout)); /* set common argv and env */ client_env = pmix_argv_copy(environ); set_client_argv(¶ms, &client_argv); tmp = pmix_argv_join(client_argv, ' '); TEST_VERBOSE(("Executing test: %s", tmp)); free(tmp); /* verify executable */ if( 0 > ( rc = stat(params.binary, &stat_buf) ) ){ TEST_ERROR(("Cannot stat() executable \"%s\": %d: %s", params.binary, errno, strerror(errno))); FREE_TEST_PARAMS(params); return 0; } else if( !S_ISREG(stat_buf.st_mode) ){ TEST_ERROR(("Client executable \"%s\": is not a regular file", params.binary)); FREE_TEST_PARAMS(params); return 0; }else if( !(stat_buf.st_mode & S_IXUSR) ){ TEST_ERROR(("Client executable \"%s\": has no executable flag", params.binary)); FREE_TEST_PARAMS(params); return 0; } if (PMIX_SUCCESS != (rc = server_init(¶ms))) { FREE_TEST_PARAMS(params); return rc; } cli_init(params.lsize); int launched = 0; /* set namespaces and fork clients */ if (NULL == params.ns_dist) { uint32_t i; int base_rank = 0; /* compute my start counter */ for(i = 0; i < (uint32_t)my_server_id; i++) { base_rank += (params.nprocs % params.nservers) > (uint32_t)i ? params.nprocs / params.nservers + 1 : params.nprocs / params.nservers; } /* we have a single namespace for all clients */ ns_nprocs = params.nprocs; launched += server_launch_clients(params.lsize, params.nprocs, base_rank, ¶ms, &client_env, &client_argv); } else { char *pch; pch = strtok(params.ns_dist, ":"); while (NULL != pch) { ns_nprocs = (int)strtol(pch, NULL, 10); if (params.nprocs < (uint32_t)(launched+ns_nprocs)) { TEST_ERROR(("Total number of processes doesn't correspond number specified by ns_dist parameter.")); FREE_TEST_PARAMS(params); return PMIX_ERROR; } if (0 < ns_nprocs) { launched += server_launch_clients(ns_nprocs, ns_nprocs, 0, ¶ms, &client_env, &client_argv); } pch = strtok (NULL, ":"); } } if (params.lsize != (uint32_t)launched) { TEST_ERROR(("Total number of processes doesn't correspond number specified by ns_dist parameter.")); cli_kill_all(); test_fail = 1; } /* hang around until the client(s) finalize */ while (!test_terminated()) { // To avoid test hang we want to interrupt the loop each 0.1s double test_current; // check if we exceed the max time gettimeofday(&tv, NULL); test_current = tv.tv_sec + 1E-6*tv.tv_usec; if( (test_current - test_start) > params.timeout ){ break; } cli_wait_all(0); } if( !test_terminated() ){ TEST_ERROR(("Test exited by a timeout!")); cli_kill_all(); test_fail = 1; } if( test_abort ){ TEST_ERROR(("Test was aborted!")); /* do not simply kill the clients as that generates * event notifications which these tests then print * out, flooding the log */ // cli_kill_all(); test_fail = 1; } if (0 != params.test_spawn) { PMIX_WAIT_FOR_COMPLETION(spawn_wait); } /* deregister the errhandler */ PMIx_Deregister_event_handler(0, op_callbk, NULL); cli_wait_all(1.0); test_fail += server_finalize(¶ms); FREE_TEST_PARAMS(params); pmix_argv_free(client_argv); pmix_argv_free(client_env); return test_fail; }