extern void android_main(struct android_app* state) { // make sure that the linker doesn't strip out our glue app_dummy(); setup_env(state); CEventLoop eventLoop(state); CXBMCApp xbmcApp(state->activity); if (xbmcApp.isValid()) { g_xbmcapp = &xbmcApp; IInputHandler inputHandler; eventLoop.run(xbmcApp, inputHandler); } else CXBMCApp::android_printf("android_main: setup failed"); CXBMCApp::android_printf("android_main: Exiting"); // We need to call exit() so that all loaded libraries are properly unloaded // otherwise on the next start of the Activity android will simple re-use // those loaded libs in the state they were in when we quit XBMC last time // which will lead to crashes because of global/static classes that haven't // been properly uninitialized exit(0); }
static int seaf_controller_init (SeafileController *ctl, char *central_config_dir, char *config_dir, char *seafile_dir, char *logdir) { init_seafile_path (); if (!g_file_test (config_dir, G_FILE_TEST_IS_DIR)) { seaf_warning ("invalid config_dir: %s\n", config_dir); return -1; } if (!g_file_test (seafile_dir, G_FILE_TEST_IS_DIR)) { seaf_warning ("invalid seafile_dir: %s\n", seafile_dir); return -1; } ctl->client = ccnet_client_new (); ctl->sync_client = ccnet_client_new (); if (ccnet_client_load_confdir (ctl->client, central_config_dir, config_dir) < 0) { seaf_warning ("Failed to load ccnet confdir\n"); return -1; } if (ccnet_client_load_confdir (ctl->sync_client, central_config_dir, config_dir) < 0) { seaf_warning ("Failed to load ccnet confdir\n"); return -1; } if (logdir == NULL) { char *topdir = g_path_get_dirname(config_dir); logdir = g_build_filename (topdir, "logs", NULL); if (checkdir_with_mkdir(logdir) < 0) { fprintf (stderr, "failed to create log folder \"%s\": %s\n", logdir, strerror(errno)); return -1; } g_free (topdir); } ctl->central_config_dir = central_config_dir; ctl->config_dir = config_dir; ctl->seafile_dir = seafile_dir; ctl->logdir = logdir; if (read_seafdav_config() < 0) { return -1; } init_pidfile_path (ctl); setup_env (); return 0; }
int main() { int i=0; setup_env(); snake_start(); endwin(); return 0; }
static int update_block(struct block *block) { FILE *child_stdout; int child_status, code; char output[2048], *text = output; if (setup_env(block)) return 1; /* Pipe, fork and exec a shell for the block command line */ child_stdout = popen(block->command, "r"); if (!child_stdout) { perror("popen"); return 1; } /* Do not distinguish EOF or error, just read child's output */ memset(output, 0, sizeof(output)); fread(output, 1, sizeof(output) - 1, child_stdout); /* Wait for the child process to terminate */ child_status = pclose(child_stdout); if (child_status == -1) { perror("pclose"); return 1; } if (!WIFEXITED(child_status)) { fprintf(stderr, "child did not exit correctly\n"); return 1; } code = WEXITSTATUS(child_status); if (code != 0 && code != 127) { char reason[1024]; fprintf(stderr, "bad return code %d, skipping %s\n", code, block->name); sprintf(reason, "[%s] ERROR: bad return code %d", block->name, code); failed(reason, block); return 1; } /* From here, the update went ok so merge the output */ strncpy(block->urgent, code == 127 ? "true" : "false", sizeof(block->urgent) - 1); linecpy(&text, block->full_text, sizeof(block->full_text) - 1); linecpy(&text, block->short_text, sizeof(block->short_text) - 1); linecpy(&text, block->color, sizeof(block->color) - 1); block->last_update = time(NULL); return 0; }
void do_exec(void) { if (!cvm_setugid()) respond(0, "NO Internal error: could not set UID/GID"); else if (!setup_env()) respond(0, "NO Internal error: could not set environment"); else { alarm(0); execvp(nextcmd[0], nextcmd); respond(0, "NO Could not execute second stage"); } exit(1); }
void block_update(struct block *block) { FILE *child_stdout; int child_status, code; char output[2048], *text = output; if (setup_env(block)) return mark_as_failed(block, "failed to setup env", -1); /* Pipe, fork and exec a shell for the block command line */ child_stdout = popen(block->command, "r"); if (!child_stdout) { berrorx(block, "popen(%s)", block->command); return mark_as_failed(block, "failed to fork", -1); } /* Do not distinguish EOF or error, just read child's output */ memset(output, 0, sizeof(output)); fread(output, 1, sizeof(output) - 1, child_stdout); /* Wait for the child process to terminate */ child_status = pclose(child_stdout); if (child_status == -1) { berrorx(block, "pclose"); return mark_as_failed(block, "failed to wait", -1); } if (!WIFEXITED(child_status)) { berror(block, "child did not exit correctly"); return mark_as_failed(block, "command did not exit", -1); } code = WEXITSTATUS(child_status); if (code != 0 && code != '!') { char reason[1024] = { 0 }; berror(block, "bad exit code %d", code); linecpy(&text, reason, sizeof(reason) - 1); return mark_as_failed(block, reason, code); } /* From here, the update went ok so merge the output */ strncpy(block->urgent, code == '!' ? "true" : "false", sizeof(block->urgent) - 1); linecpy(&text, block->full_text, sizeof(block->full_text) - 1); linecpy(&text, block->short_text, sizeof(block->short_text) - 1); linecpy(&text, block->color, sizeof(block->color) - 1); block->last_update = time(NULL); bdebug(block, "updated successfully"); }
int main(int ac, char **av) { t_box box; int fd; ft_bzero(&box, sizeof(t_box)); box.color = 0xFFFFFF; if (ac > 2) analyze_args(ac, av, &box); if (ac < 2) error_lem("NO ARGUMENT\n", &box); fd = open(av[ac - 1], O_RDONLY); setup_env(&box, fd); setup_map(&box); display_info(); mlx_hook(box.win, 2, (1L << 0), command, &box); mlx_loop(box.mlx); return (0); }
int main (int argc, char **argv) { int optidx = 0; krb5_error_code ret; krb5_context context; krb5_keytab kt; setprogname (argv[0]); ret = krb5_init_context (&context); if (ret) errx(1, "krb5_init_context failed: %u", ret); if (getarg(args, sizeof(args) / sizeof(args[0]), argc, argv, &optidx)) usage(1); if (help_flag) usage(0); if (version_flag) { print_version(NULL); return 0; } if (enctype_string) enc_type = enctype_string; if (session_enctype_string) session_enc_type = session_enctype_string; else session_enc_type = enc_type; setup_env(context, &kt); if (use_krb5) create_krb5_tickets(context, kt); krb5_kt_close(context, kt); return 0; }
int main(int argc, char*argv[]) { setup_env(); pthread_t threads[MAX_THREADS]; pami_geometry_t world_geometry; size_t num_algorithm[2]; pami_algorithm_t *always_works_algo = NULL; pami_metadata_t *always_works_md = NULL; pami_algorithm_t *must_query_algo = NULL; pami_metadata_t *must_query_md = NULL; pami_xfer_type_t barrier_xfer = PAMI_XFER_BARRIER; pami_xfer_t barrier; volatile unsigned poll_flag = 0; int num_threads = gNum_contexts; assert(gNum_contexts > 0); context = (pami_context_t*)malloc(sizeof(pami_context_t) * gNum_contexts); thread_data_t *td = (thread_data_t*)malloc(sizeof(thread_data_t) * gNum_contexts); int rc = pami_init(&client, /* Client */ context, /* Context */ NULL, /* Clientname=default */ &gNum_contexts, /* gNum_contexts */ NULL, /* null configuration */ 0, /* no configuration */ &task_id, /* task id */ &num_tasks); /* number of tasks */ if (rc != PAMI_SUCCESS) return 1; num_ep = num_tasks *gNum_contexts; assert(task_id >= 0); assert(task_id < num_tasks); int i=0; printf("Contexts: [ "); for(i=0;i<num_threads;i++) { td[i].context = context[i]; td[i].tid = i; td[i].logical_rank = task_id*num_threads+i; printf("(%d|%d)%p ", i, td[i].logical_rank, context[i]); } printf("]\n"); if(task_id == 0) printf("%s: Querying World Geometry\n", argv[0]); rc |= query_geometry_world(client, context[0], &world_geometry, barrier_xfer, num_algorithm, &always_works_algo, &always_works_md, &must_query_algo, &must_query_md); if (rc != PAMI_SUCCESS) return 1; /* Create the range geometry */ pami_geometry_range_t *range; int rangecount; rangecount = 1; range = (pami_geometry_range_t *)malloc(((rangecount)) * sizeof(pami_geometry_range_t)); /*init range */ range[0].lo = 0; range[0].hi = num_tasks-1; /*init range geometry; this will allocate an endpoint for each collective*/ if(task_id == 0) printf("%s: Creating All Context World Geometry\n", argv[0]); pami_geometry_t parent = (gNum_contexts>1)?PAMI_GEOMETRY_NULL:world_geometry; rc |= create_all_ctxt_geometry(client, context, gNum_contexts, parent, &newgeometry, range, rangecount, 1); if (rc != PAMI_SUCCESS) return 1; /* Set up world barrier */ barrier.cb_done = cb_done; barrier.cookie = (void*) & poll_flag; barrier.algorithm = always_works_algo[0]; rc |= blocking_coll_advance_all(0, context, &barrier, &poll_flag); if (rc != PAMI_SUCCESS) return 1; int t; assert(gNum_contexts >= num_threads); if(task_id == 0) printf("%s: Tasks:%zu Threads/task:%d Contexts/task:%zu\n", argv[0],num_tasks,num_threads, gNum_contexts); for(t=0; t<num_threads; t++){ rc = pthread_create(&threads[t], NULL, allgather_test, (void*)(&td[t])); if (rc){ printf("ERROR; return code from pthread_create() is %d\n", rc); exit(-1); } } void* status; for(t=0; t<num_threads; t++) { rc = pthread_join(threads[t], &status); if (rc) { printf("ERROR; return code from pthread_join() is %d\n", rc); exit(-1); } } blocking_coll_advance_all(0, context, &barrier, &poll_flag); free(always_works_algo); free(always_works_md); free(must_query_algo); free(must_query_md); rc |= pami_shutdown(&client, context, &gNum_contexts); return rc; }
int main(int argc, char*argv[]) { pami_client_t client; pami_context_t *context; pami_task_t task_id; size_t num_tasks; pami_geometry_t world_geometry; /* Barrier variables */ size_t barrier_num_algorithm[2]; pami_algorithm_t *bar_always_works_algo = NULL; pami_metadata_t *bar_always_works_md = NULL; pami_algorithm_t *bar_must_query_algo = NULL; pami_metadata_t *bar_must_query_md = NULL; pami_xfer_type_t barrier_xfer = PAMI_XFER_BARRIER; volatile unsigned bar_poll_flag = 0; /* Bcast variables */ size_t bcast_num_algorithm[2]; pami_algorithm_t *bcast_always_works_algo = NULL; pami_metadata_t *bcast_always_works_md = NULL; pami_algorithm_t *bcast_must_query_algo = NULL; pami_metadata_t *bcast_must_query_md = NULL; pami_xfer_type_t bcast_xfer = PAMI_XFER_BROADCAST; volatile unsigned bcast_poll_flag = 0; int nalg= 0; double ti, tf, usec; pami_xfer_t barrier; pami_xfer_t broadcast; /* Process environment variables and setup globals */ setup_env(); assert(gNum_contexts > 0); context = (pami_context_t*)malloc(sizeof(pami_context_t) * gNum_contexts); /* Initialize PAMI */ int rc = pami_init(&client, /* Client */ context, /* Context */ NULL, /* Clientname=default */ &gNum_contexts, /* gNum_contexts */ NULL, /* null configuration */ 0, /* no configuration */ &task_id, /* task id */ &num_tasks); /* number of tasks */ if (rc != PAMI_SUCCESS) return 1; int o; for(o = -1; o <= gOptimize ; o++) /* -1 = default, 0 = de-optimize, 1 = optimize */ { pami_configuration_t configuration[1]; configuration[0].name = PAMI_GEOMETRY_OPTIMIZE; configuration[0].value.intval = o; /* de/optimize */ if(o == -1) ; /* skip update, use defaults */ else rc |= update_geometry(client, context[0], world_geometry, configuration, 1); if (rc != PAMI_SUCCESS) return 1; if(gNumRoots > num_tasks) gNumRoots = num_tasks; /* Allocate buffer(s) */ int err = 0; void* buf = NULL; err = posix_memalign(&buf, 128, gMax_byte_count + gBuffer_offset); assert(err == 0); buf = (char*)buf + gBuffer_offset; unsigned iContext = 0; for (; iContext < gNum_contexts; ++iContext) { if (task_id == 0) printf("# Context: %u\n", iContext); /* Query the world geometry for barrier algorithms */ rc |= query_geometry_world(client, context[iContext], &world_geometry, barrier_xfer, barrier_num_algorithm, &bar_always_works_algo, &bar_always_works_md, &bar_must_query_algo, &bar_must_query_md); if (rc != PAMI_SUCCESS) return 1; /* Query the world geometry for broadcast algorithms */ rc |= query_geometry_world(client, context[iContext], &world_geometry, bcast_xfer, bcast_num_algorithm, &bcast_always_works_algo, &bcast_always_works_md, &bcast_must_query_algo, &bcast_must_query_md); if (rc != PAMI_SUCCESS) return 1; barrier.cb_done = cb_done; barrier.cookie = (void*) & bar_poll_flag; barrier.algorithm = bar_always_works_algo[0]; blocking_coll(context[iContext], &barrier, &bar_poll_flag); for (nalg = 0; nalg < bcast_num_algorithm[1]; nalg++) { broadcast.cb_done = cb_done; broadcast.cookie = (void*) & bcast_poll_flag; broadcast.algorithm = bcast_must_query_algo[nalg]; broadcast.cmd.xfer_broadcast.buf = buf; broadcast.cmd.xfer_broadcast.type = PAMI_TYPE_BYTE; broadcast.cmd.xfer_broadcast.typecount = 0; gProtocolName = bcast_must_query_md[nalg].name; metadata_result_t result = {0}; int k; for (k=0; k< gNumRoots; k++) { pami_endpoint_t root_ep; pami_task_t root_task = (pami_task_t)k; PAMI_Endpoint_create(client, root_task, 0, &root_ep); broadcast.cmd.xfer_broadcast.root = root_ep; if (task_id == root_task) { printf("# Broadcast Bandwidth Test(size:%zu) -- context = %d, optimize = %d, root = %d protocol: %s, Metadata: range %zu <-> %zd, mask %#X\n",num_tasks, iContext, o, root_task, gProtocolName, bcast_must_query_md[nalg].range_lo, bcast_must_query_md[nalg].range_hi, bcast_must_query_md[nalg].check_correct.bitmask_correct); printf("# Size(bytes) iterations bytes/sec usec\n"); printf("# ----------- ----------- ----------- ---------\n"); } if (((strstr(bcast_must_query_md[nalg].name,gSelected) == NULL) && gSelector) || ((strstr(bcast_must_query_md[nalg].name,gSelected) != NULL) && !gSelector)) continue; unsigned checkrequired = bcast_must_query_md[nalg].check_correct.values.checkrequired; /*must query every time */ assert(!checkrequired || bcast_must_query_md[nalg].check_fn); /* must have function if checkrequired. */ int i, j; for (i = gMin_byte_count; i <= gMax_byte_count; i *= 2) { size_t dataSent = i; int niter; if (dataSent < CUTOFF) niter = gNiterlat; else niter = NITERBW; broadcast.cmd.xfer_broadcast.typecount = i; result = check_metadata(bcast_must_query_md[nalg], broadcast, PAMI_TYPE_BYTE, dataSent, /* metadata uses bytes i, */ broadcast.cmd.xfer_broadcast.buf, PAMI_TYPE_BYTE, dataSent, broadcast.cmd.xfer_broadcast.buf); if (bcast_must_query_md[nalg].check_correct.values.nonlocal) { /* \note We currently ignore check_correct.values.nonlocal because these tests should not have nonlocal differences (so far). */ result.check.nonlocal = 0; } if (result.bitmask) continue; if (task_id == root_task) bcast_initialize_sndbuf (buf, i, root_task); else memset(buf, 0xFF, i); blocking_coll(context[iContext], &barrier, &bar_poll_flag); ti = timer(); for (j = 0; j < niter; j++) { if (checkrequired) /* must query every time */ { result = bcast_must_query_md[nalg].check_fn(&broadcast); if (result.bitmask) continue; } blocking_coll (context[iContext], &broadcast, &bcast_poll_flag); } blocking_coll(context[iContext], &barrier, &bar_poll_flag); tf = timer(); int rc_check; rc |= rc_check = bcast_check_rcvbuf (buf, i, root_task); if (rc_check) fprintf(stderr, "%s FAILED validation\n", gProtocolName); usec = (tf - ti) / (double)niter; if (task_id == root_task) { printf(" %11lld %16d %14.1f %12.2f\n", (long long)dataSent, niter, (double)1e6*(double)dataSent / (double)usec, usec); fflush(stdout); } } } } free(bar_always_works_algo); free(bar_always_works_md); free(bar_must_query_algo); free(bar_must_query_md); free(bcast_always_works_algo); free(bcast_always_works_md); free(bcast_must_query_algo); free(bcast_must_query_md); } /*for(unsigned iContext = 0; iContext < gNum_contexts; ++iContexts)*/ buf = (char*)buf - gBuffer_offset; free(buf); } /* optimize loop */ rc |= pami_shutdown(&client, context, &gNum_contexts); return rc; }
int main(int c, char *v[], char *e[]) { int rflag = ttyflg; int rsflag = 1; /* local restricted flag */ unsigned char *flagc = flagadr; struct namnod *n; mypid = getpid(); mypgid = getpgid(mypid); mysid = getsid(mypid); /* * Do locale processing only if /usr is mounted. */ localedir_exists = (access(localedir, F_OK) == 0); /* * initialize storage allocation */ if (stakbot == 0) { addblok((unsigned)0); } /* * If the first character of the last path element of v[0] is "-" * (ex. -sh, or /bin/-sh), this is a login shell */ if (*simple(v[0]) == '-') { signal(SIGXCPU, SIG_DFL); signal(SIGXFSZ, SIG_DFL); /* * As the previous comment states, this is a login shell. * Therefore, we set the login_shell flag to explicitly * indicate this condition. */ login_shell = TRUE; } stdsigs(); /* * set names from userenv */ setup_env(); /* * LC_MESSAGES is set here so that early error messages will * come out in the right style. * Note that LC_CTYPE is done later on and is *not* * taken from the previous environ */ /* * Do locale processing only if /usr is mounted. */ if (localedir_exists) (void) setlocale(LC_ALL, ""); #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */ #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't */ #endif (void) textdomain(TEXT_DOMAIN); /* * 'rsflag' is zero if SHELL variable is * set in environment and * the simple file part of the value. * is rsh */ if (n = findnam("SHELL")) { if (eq("rsh", simple(n->namval))) rsflag = 0; } /* * a shell is also restricted if the simple name of argv(0) is * rsh or -rsh in its simple name */ #ifndef RES if (c > 0 && (eq("rsh", simple(*v)) || eq("-rsh", simple(*v)))) rflag = 0; #endif if (eq("jsh", simple(*v)) || eq("-jsh", simple(*v))) flags |= monitorflg; hcreate(); set_dotpath(); /* * look for options * dolc is $# */ dolc = options(c, v); if (dolc < 2) { flags |= stdflg; { while (*flagc) flagc++; *flagc++ = STDFLG; *flagc = 0; } } if ((flags & stdflg) == 0) dolc--; if ((flags & privflg) == 0) { uid_t euid; gid_t egid; uid_t ruid; gid_t rgid; /* * Determine all of the user's id #'s for this process and * then decide if this shell is being entered as a result * of a fork/exec. * If the effective uid/gid do NOT match and the euid/egid * is < 100 and the egid is NOT 1, reset the uid and gid to * the user originally calling this process. */ euid = geteuid(); ruid = getuid(); egid = getegid(); rgid = getgid(); if ((euid != ruid) && (euid < 100)) setuid(ruid); /* reset the uid to the orig user */ if ((egid != rgid) && ((egid < 100) && (egid != 1))) setgid(rgid); /* reset the gid to the orig user */ } dolv = (unsigned char **)v + c - dolc; dolc--; /* * return here for shell file execution * but not for parenthesis subshells */ if (setjmp(subshell)) { freejobs(); flags |= subsh; } /* * number of positional parameters */ replace(&cmdadr, dolv[0]); /* cmdadr is $0 */ /* * set pidname '$$' */ assnum(&pidadr, (long)mypid); /* * set up temp file names */ settmp(); /* * default internal field separators * Do not allow importing of IFS from parent shell. * setup_env() may have set anything from parent shell to IFS. * Always set the default ifs to IFS. */ assign(&ifsnod, (unsigned char *)sptbnl); dfault(&mchknod, MAILCHECK); mailchk = stoi(mchknod.namval); /* initialize OPTIND for getopt */ n = lookup("OPTIND"); assign(n, (unsigned char *)"1"); /* * make sure that option parsing starts * at first character */ _sp = 1; if ((beenhere++) == FALSE) /* ? profile */ { if ((login_shell == TRUE) && (flags & privflg) == 0) { /* system profile */ #ifndef RES if ((input = pathopen(nullstr, sysprofile)) >= 0) exfile(rflag); /* file exists */ #endif /* user profile */ if ((input = pathopen(homenod.namval, profile)) >= 0) { exfile(rflag); flags &= ~ttyflg; } } if (rsflag == 0 || rflag == 0) { if ((flags & rshflg) == 0) { while (*flagc) flagc++; *flagc++ = 'r'; *flagc = '\0'; } flags |= rshflg; } /* * open input file if specified */ if (comdiv) { estabf(comdiv); input = -1; } else { if (flags & stdflg) { input = 0; } else { /* * If the command file specified by 'cmdadr' * doesn't exist, chkopen() will fail calling * exitsh(). If this is a login shell and * the $HOME/.profile file does not exist, the * above statement "flags &= ~ttyflg" does not * get executed and this makes exitsh() call * longjmp() instead of exiting. longjmp() will * return to the location specified by the last * active jmpbuffer, which is the one set up in * the function exfile() called after the system * profile file is executed (see lines above). * This would cause an infinite loop, because * chkopen() will continue to fail and exitsh() * to call longjmp(). To make exitsh() exit instead * of calling longjmp(), we then set the flag forcexit * at this stage. */ flags |= forcexit; input = chkopen(cmdadr, 0); flags &= ~forcexit; } #ifdef ACCT if (input != 0) preacct(cmdadr); #endif comdiv--; } } #ifdef pdp11 else *execargs = (char *)dolv; /* for `ps' cmd */ #endif exfile(0); done(0); }
int main(int argc, char*argv[]) { pami_client_t client; pami_context_t *context; pami_task_t task_id; size_t num_tasks; pami_geometry_t world_geometry; /* Barrier variables */ size_t barrier_num_algorithm[2]; pami_algorithm_t *bar_always_works_algo = NULL; pami_metadata_t *bar_always_works_md = NULL; pami_algorithm_t *bar_must_query_algo = NULL; pami_metadata_t *bar_must_query_md = NULL; pami_xfer_type_t barrier_xfer = PAMI_XFER_BARRIER; volatile unsigned bar_poll_flag = 0; /* Alltoallv variables */ size_t alltoallv_num_algorithm[2]; pami_algorithm_t *alltoallv_always_works_algo = NULL; pami_metadata_t *alltoallv_always_works_md = NULL; pami_algorithm_t *next_algo = NULL; pami_metadata_t *next_md= NULL; pami_algorithm_t *alltoallv_must_query_algo = NULL; pami_metadata_t *alltoallv_must_query_md = NULL; pami_xfer_type_t alltoallv_xfer = PAMI_XFER_ALLTOALLV; volatile unsigned alltoallv_poll_flag = 0; int nalg= 0, total_alg; double ti, tf, usec; pami_xfer_t barrier; pami_xfer_t alltoallv; /* Process environment variables and setup globals */ if(argc > 1 && argv[1][0] == '-' && (argv[1][1] == 'h' || argv[1][1] == 'H') ) setup_env_internal(1); else setup_env(); assert(gNum_contexts > 0); context = (pami_context_t*)malloc(sizeof(pami_context_t) * gNum_contexts); /* Initialize PAMI */ int rc = pami_init(&client, /* Client */ context, /* Context */ NULL, /* Clientname=default */ &gNum_contexts, /* gNum_contexts */ NULL, /* null configuration */ 0, /* no configuration */ &task_id, /* task id */ &num_tasks); /* number of tasks */ if (rc == 1) return 1; /* Allocate buffer(s) */ int err = 0; void* sbuf = NULL; err = posix_memalign((void*) & sbuf, 128, (gMax_byte_count * num_tasks) + gBuffer_offset); assert(err == 0); sbuf = (char*)sbuf + gBuffer_offset; void* rbuf = NULL; err = posix_memalign((void*) & rbuf, 128, (gMax_byte_count * num_tasks) + gBuffer_offset); assert(err == 0); rbuf = (char*)rbuf + gBuffer_offset; sndlens = (size_t*) malloc(num_tasks * sizeof(size_t)); assert(sndlens); sdispls = (size_t*) malloc(num_tasks * sizeof(size_t)); assert(sdispls); rcvlens = (size_t*) malloc(num_tasks * sizeof(size_t)); assert(rcvlens); rdispls = (size_t*) malloc(num_tasks * sizeof(size_t)); assert(rdispls); unsigned iContext = 0; for (; iContext < gNum_contexts; ++iContext) { if (task_id == 0) printf("# Context: %u\n", iContext); /* Query the world geometry for barrier algorithms */ rc |= query_geometry_world(client, context[iContext], &world_geometry, barrier_xfer, barrier_num_algorithm, &bar_always_works_algo, &bar_always_works_md, &bar_must_query_algo, &bar_must_query_md); if (rc != PAMI_SUCCESS) return 1; int o; for(o = -1; o <= gOptimize ; o++) /* -1 = default, 0 = de-optimize, 1 = optimize */ { pami_configuration_t configuration[1]; configuration[0].name = PAMI_GEOMETRY_OPTIMIZE; configuration[0].value.intval = o; /* de/optimize */ if(o == -1) ; /* skip update, use defaults */ else rc |= update_geometry(client, context[0], world_geometry, configuration, 1); if (rc != PAMI_SUCCESS) return 1; /* Query the world geometry for alltoallv algorithms */ rc |= query_geometry_world(client, context[iContext], &world_geometry, alltoallv_xfer, alltoallv_num_algorithm, &alltoallv_always_works_algo, &alltoallv_always_works_md, &alltoallv_must_query_algo, &alltoallv_must_query_md); if (rc != PAMI_SUCCESS) return 1; barrier.cb_done = cb_done; barrier.cookie = (void*) & bar_poll_flag; barrier.algorithm = bar_always_works_algo[0]; total_alg = alltoallv_num_algorithm[0]+alltoallv_num_algorithm[1]; for (nalg = 0; nalg < total_alg; nalg++) { metadata_result_t result = {0}; unsigned query_protocol; if(nalg < alltoallv_num_algorithm[0]) { query_protocol = 0; next_algo = &alltoallv_always_works_algo[nalg]; next_md = &alltoallv_always_works_md[nalg]; } else { query_protocol = 1; next_algo = &alltoallv_must_query_algo[nalg-alltoallv_num_algorithm[0]]; next_md = &alltoallv_must_query_md[nalg-alltoallv_num_algorithm[0]]; } gProtocolName = next_md->name; alltoallv.cb_done = cb_done; alltoallv.cookie = (void*) & alltoallv_poll_flag; alltoallv.algorithm = *next_algo; alltoallv.cmd.xfer_alltoallv.sndbuf = sbuf; alltoallv.cmd.xfer_alltoallv.stype = PAMI_TYPE_BYTE; alltoallv.cmd.xfer_alltoallv.stypecounts = sndlens; alltoallv.cmd.xfer_alltoallv.sdispls = sdispls; alltoallv.cmd.xfer_alltoallv.rcvbuf = rbuf; alltoallv.cmd.xfer_alltoallv.rtype = PAMI_TYPE_BYTE; alltoallv.cmd.xfer_alltoallv.rtypecounts = rcvlens; alltoallv.cmd.xfer_alltoallv.rdispls = rdispls; gProtocolName = next_md->name; if (task_id == 0) { printf("# Alltoallv Bandwidth Test(size:%zu) -- context = %d, optimize = %d, protocol: %s, Metadata: range %zu <-> %zd, mask %#X\n",num_tasks, iContext, o, gProtocolName, next_md->range_lo,(ssize_t)next_md->range_hi, next_md->check_correct.bitmask_correct); printf("# Size(bytes) iterations bytes/sec usec\n"); printf("# ----------- ----------- ----------- ---------\n"); } if (((strstr(next_md->name, gSelected) == NULL) && gSelector) || ((strstr(next_md->name, gSelected) != NULL) && !gSelector)) continue; int i, j; int dt,op=4/*SUM*/; unsigned checkrequired = next_md->check_correct.values.checkrequired; /*must query every time */ assert(!checkrequired || next_md->check_fn); /* must have function if checkrequired. */ for (dt = 0; dt < dt_count; dt++) { if ((gFull_test && ((dt != DT_NULL) && (dt != DT_BYTE))) || gValidTable[op][dt]) { if (task_id == 0) printf("Running Alltoallv: %s\n", dt_array_str[dt]); for ( i = gMin_byte_count? MAX(1,gMin_byte_count/get_type_size(dt_array[dt])) : 0; /*clumsy, only want 0 if hardcoded to 0, othersize min 1 */ i <= gMax_byte_count/get_type_size(dt_array[dt]); i = i ? i*2 : 1 /* handle zero min */) { size_t dataSent = i * get_type_size(dt_array[dt]); int niter; if (dataSent < CUTOFF) niter = gNiterlat; else niter = NITERBW; for (j = 0; j < num_tasks; j++) { sndlens[j] = rcvlens[j] = i; sdispls[j] = rdispls[j] = i * j; alltoallv_initialize_bufs_dt(sbuf, rbuf, sndlens, rcvlens, sdispls, rdispls, j, dt); } alltoallv.cmd.xfer_alltoallv.rtype = dt_array[dt]; alltoallv.cmd.xfer_alltoallv.stype = dt_array[dt]; if(query_protocol) { size_t sz=get_type_size(dt_array[dt])*i; /* Must initialize all of cmd for metadata */ result = check_metadata(*next_md, alltoallv, dt_array[dt], sz, /* metadata uses bytes i, */ alltoallv.cmd.xfer_alltoallv.sndbuf, dt_array[dt], sz, alltoallv.cmd.xfer_alltoallv.rcvbuf); if (next_md->check_correct.values.nonlocal) { /* \note We currently ignore check_correct.values.nonlocal because these tests should not have nonlocal differences (so far). */ result.check.nonlocal = 0; } if (result.bitmask) continue; } blocking_coll(context[iContext], &barrier, &bar_poll_flag); ti = timer(); for (j = 0; j < niter; j++) { if (checkrequired) /* must query every time */ { result = next_md->check_fn(&alltoallv); if (result.bitmask) continue; } blocking_coll(context[iContext], &alltoallv, &alltoallv_poll_flag); } tf = timer(); blocking_coll(context[iContext], &barrier, &bar_poll_flag); int rc_check; rc |= rc_check = alltoallv_check_rcvbuf_dt(rbuf, rcvlens, rdispls, num_tasks, task_id, dt); if (rc_check) fprintf(stderr, "%s FAILED validation on %s\n", gProtocolName, dt_array_str[dt]); usec = (tf - ti) / (double)niter; if (task_id == 0) { printf(" %11lld %16d %14.1f %12.2f\n", (long long)dataSent, niter, (double)1e6*(double)dataSent / (double)usec, usec); fflush(stdout); } } } } } free(bar_always_works_algo); free(bar_always_works_md); free(bar_must_query_algo); free(bar_must_query_md); free(alltoallv_always_works_algo); free(alltoallv_always_works_md); free(alltoallv_must_query_algo); free(alltoallv_must_query_md); } /* optimize loop */ } /*for(unsigned iContext = 0; iContext < gNum_contexts; ++iContexts)*/ sbuf = (char*)sbuf - gBuffer_offset; free(sbuf); rbuf = (char*)rbuf - gBuffer_offset; free(rbuf); free(sndlens); free(sdispls); free(rcvlens); free(rdispls); rc |= pami_shutdown(&client, context, &gNum_contexts); return rc; }
int main(int argc, char*argv[]) { pami_client_t client; pami_context_t *context; pami_task_t task_id; size_t num_tasks; pami_geometry_t world_geometry; /* Barrier variables */ size_t barrier_num_algorithm[2]; pami_algorithm_t *bar_always_works_algo = NULL; pami_metadata_t *bar_always_works_md = NULL; pami_algorithm_t *bar_must_query_algo = NULL; pami_metadata_t *bar_must_query_md = NULL; pami_xfer_type_t barrier_xfer = PAMI_XFER_BARRIER; volatile unsigned bar_poll_flag = 0; /* alltoallv_int variables */ size_t alltoallv_int_num_algorithm[2]; pami_algorithm_t *alltoallv_int_always_works_algo = NULL; pami_metadata_t *alltoallv_int_always_works_md = NULL; pami_algorithm_t *alltoallv_int_must_query_algo = NULL; pami_metadata_t *alltoallv_int_must_query_md = NULL; pami_xfer_type_t alltoallv_int_xfer = PAMI_XFER_ALLTOALLV_INT; volatile unsigned alltoallv_int_poll_flag = 0; int nalg = 0; double ti, tf, usec; pami_xfer_t barrier; pami_xfer_t alltoallv_int; /* Process environment variables and setup globals */ setup_env(); assert(gNum_contexts > 0); context = (pami_context_t*)malloc(sizeof(pami_context_t) * gNum_contexts); /* Initialize PAMI */ int rc = pami_init(&client, /* Client */ context, /* Context */ NULL, /* Clientname=default */ &gNum_contexts, /* gNum_contexts */ NULL, /* null configuration */ 0, /* no configuration */ &task_id, /* task id */ &num_tasks); /* number of tasks */ if (rc == 1) return 1; /* Allocate buffer(s) */ int err = 0; void* sbuf = NULL; err = posix_memalign((void*) & sbuf, 128, (gMax_byte_count * num_tasks) + gBuffer_offset); assert(err == 0); sbuf = (char*)sbuf + gBuffer_offset; void* rbuf = NULL; err = posix_memalign((void*) & rbuf, 128, (gMax_byte_count * num_tasks) + gBuffer_offset); assert(err == 0); rbuf = (char*)rbuf + gBuffer_offset; sndlens = (int*) malloc(num_tasks * sizeof(int)); assert(sndlens); sdispls = (int*) malloc(num_tasks * sizeof(int)); assert(sdispls); rcvlens = (int*) malloc(num_tasks * sizeof(int)); assert(rcvlens); rdispls = (int*) malloc(num_tasks * sizeof(int)); assert(rdispls); unsigned iContext = 0; for (; iContext < gNum_contexts; ++iContext) { if (task_id == 0) printf("# Context: %u\n", iContext); /* Query the world geometry for barrier algorithms */ rc |= query_geometry_world(client, context[iContext], &world_geometry, barrier_xfer, barrier_num_algorithm, &bar_always_works_algo, &bar_always_works_md, &bar_must_query_algo, &bar_must_query_md); if (rc == 1) return 1; /* Query the world geometry for alltoallv_int algorithms */ rc |= query_geometry_world(client, context[iContext], &world_geometry, alltoallv_int_xfer, alltoallv_int_num_algorithm, &alltoallv_int_always_works_algo, &alltoallv_int_always_works_md, &alltoallv_int_must_query_algo, &alltoallv_int_must_query_md); if (rc == 1) return 1; barrier.cb_done = cb_done; barrier.cookie = (void*) & bar_poll_flag; barrier.algorithm = bar_always_works_algo[0]; blocking_coll(context[iContext], &barrier, &bar_poll_flag); for (nalg = 0; nalg < alltoallv_int_num_algorithm[0]; nalg++) { alltoallv_int.cb_done = cb_done; alltoallv_int.cookie = (void*) & alltoallv_int_poll_flag; alltoallv_int.algorithm = alltoallv_int_always_works_algo[nalg]; alltoallv_int.cmd.xfer_alltoallv_int.sndbuf = sbuf; alltoallv_int.cmd.xfer_alltoallv_int.stype = PAMI_TYPE_BYTE; alltoallv_int.cmd.xfer_alltoallv_int.stypecounts = sndlens; alltoallv_int.cmd.xfer_alltoallv_int.sdispls = sdispls; alltoallv_int.cmd.xfer_alltoallv_int.rcvbuf = rbuf; alltoallv_int.cmd.xfer_alltoallv_int.rtype = PAMI_TYPE_BYTE; alltoallv_int.cmd.xfer_alltoallv_int.rtypecounts = rcvlens; alltoallv_int.cmd.xfer_alltoallv_int.rdispls = rdispls; gProtocolName = alltoallv_int_always_works_md[nalg].name; if (task_id == 0) { printf("# Alltoallv_int Bandwidth Test(size:%zu) -- context = %d, protocol: %s\n", num_tasks, iContext, gProtocolName); printf("# Size(bytes) iterations bytes/sec usec\n"); printf("# ----------- ----------- ----------- ---------\n"); } if (((strstr(alltoallv_int_always_works_md[nalg].name, gSelected) == NULL) && gSelector) || ((strstr(alltoallv_int_always_works_md[nalg].name, gSelected) != NULL) && !gSelector)) continue; int i, j; for (i = gMin_byte_count; i <= gMax_byte_count; i *= 2) { size_t dataSent = i; int niter; if (dataSent < CUTOFF) niter = gNiterlat; else niter = NITERBW; for (j = 0; j < num_tasks; j++) { sndlens[j] = rcvlens[j] = i; sdispls[j] = rdispls[j] = i * j; alltoallv_int_initialize_bufs(sbuf, rbuf, sndlens, rcvlens, sdispls, rdispls, j); } blocking_coll(context[iContext], &barrier, &bar_poll_flag); /* Warmup */ blocking_coll(context[iContext], &alltoallv_int, &alltoallv_int_poll_flag); blocking_coll(context[iContext], &barrier, &bar_poll_flag); ti = timer(); for (j = 0; j < niter; j++) { blocking_coll(context[iContext], &alltoallv_int, &alltoallv_int_poll_flag); } tf = timer(); blocking_coll(context[iContext], &barrier, &bar_poll_flag); int rc_check; rc |= rc_check = alltoallv_int_check_rcvbuf(rbuf, rcvlens, rdispls, num_tasks, task_id); if (rc_check) fprintf(stderr, "%s FAILED validation\n", gProtocolName); usec = (tf - ti) / (double)niter; if (task_id == 0) { printf(" %11lld %16d %14.1f %12.2f\n", (long long)dataSent, niter, (double)1e6*(double)dataSent / (double)usec, usec); fflush(stdout); } } } free(bar_always_works_algo); free(bar_always_works_md); free(bar_must_query_algo); free(bar_must_query_md); free(alltoallv_int_always_works_algo); free(alltoallv_int_always_works_md); free(alltoallv_int_must_query_algo); free(alltoallv_int_must_query_md); } /*for(unsigned iContext = 0; iContext < gNum_contexts; ++iContexts)*/ sbuf = (char*)sbuf - gBuffer_offset; free(sbuf); rbuf = (char*)rbuf - gBuffer_offset; free(rbuf); free(sndlens); free(sdispls); free(rcvlens); free(rdispls); rc |= pami_shutdown(&client, context, &gNum_contexts); return rc; }
/* * Current process is running as the user when this is called. */ extern void exec_task(stepd_step_rec_t *job, int local_proc_id) { uint32_t *gtids; /* pointer to array of ranks */ int fd, j; stepd_step_task_info_t *task = job->task[local_proc_id]; char **tmp_env; int saved_errno; uint32_t node_offset = 0, task_offset = 0; if (job->node_offset != NO_VAL) node_offset = job->node_offset; if (job->pack_task_offset != NO_VAL) task_offset = job->pack_task_offset; gtids = xmalloc(job->node_tasks * sizeof(uint32_t)); for (j = 0; j < job->node_tasks; j++) gtids[j] = job->task[j]->gtid + task_offset; job->envtp->sgtids = _uint32_array_to_str(job->node_tasks, gtids); xfree(gtids); if (job->pack_jobid != NO_VAL) job->envtp->jobid = job->pack_jobid; else job->envtp->jobid = job->jobid; job->envtp->stepid = job->stepid; job->envtp->nodeid = job->nodeid + node_offset; job->envtp->cpus_on_node = job->cpus; job->envtp->procid = task->gtid + task_offset; job->envtp->localid = task->id; job->envtp->task_pid = getpid(); job->envtp->distribution = job->task_dist; job->envtp->cpu_bind = xstrdup(job->cpu_bind); job->envtp->cpu_bind_type = job->cpu_bind_type; job->envtp->cpu_freq_min = job->cpu_freq_min; job->envtp->cpu_freq_max = job->cpu_freq_max; job->envtp->cpu_freq_gov = job->cpu_freq_gov; job->envtp->mem_bind = xstrdup(job->mem_bind); job->envtp->mem_bind_type = job->mem_bind_type; job->envtp->distribution = -1; job->envtp->ckpt_dir = xstrdup(job->ckpt_dir); job->envtp->batch_flag = job->batch; job->envtp->uid = job->uid; job->envtp->user_name = xstrdup(job->user_name); /* * Modify copy of job's environment. Do not alter in place or * concurrent searches of the environment can generate invalid memory * references. */ job->envtp->env = env_array_copy((const char **) job->env); setup_env(job->envtp, false); setenvf(&job->envtp->env, "SLURM_JOB_GID", "%d", job->gid); setenvf(&job->envtp->env, "SLURMD_NODENAME", "%s", conf->node_name); if (job->tres_bind) { setenvf(&job->envtp->env, "SLURMD_TRES_BIND", "%s", job->tres_bind); } if (job->tres_freq) { setenvf(&job->envtp->env, "SLURMD_TRES_FREQ", "%s", job->tres_freq); } tmp_env = job->env; job->env = job->envtp->env; env_array_free(tmp_env); job->envtp->env = NULL; xfree(job->envtp->task_count); if (task->argv[0] && *task->argv[0] != '/') { /* * Normally the client (srun) expands the command name * to a fully qualified path, but in --multi-prog mode it * is left up to the server to search the PATH for the * executable. */ task->argv[0] = _build_path(task->argv[0], job->env, NULL); } if (!job->batch && (job->stepid != SLURM_EXTERN_CONT)) { if (switch_g_job_attach(job->switch_job, &job->env, job->nodeid, (uint32_t) local_proc_id, job->nnodes, job->ntasks, task->gtid) < 0) { error("Unable to attach to interconnect: %m"); log_fini(); exit(1); } if (_setup_mpi(job, local_proc_id) != SLURM_SUCCESS) { error("Unable to configure MPI plugin: %m"); log_fini(); exit(1); } } /* task-specific pre-launch activities */ /* task plugin hook */ if (task_g_pre_launch(job)) { error("Failed to invoke task plugins: task_p_pre_launch error"); exit(1); } if (!job->batch && (job->accel_bind_type || job->tres_bind || job->tres_freq)) { /* * Modify copy of job's environment. Do not alter in place or * concurrent searches of the environment can generate invalid * memory references. * * Also sets GRES frequency as needed. */ job->envtp->env = env_array_copy((const char **) job->env); gres_plugin_step_set_env(&job->envtp->env, job->step_gres_list, job->accel_bind_type, job->tres_bind, job->tres_freq, local_proc_id); tmp_env = job->env; job->env = job->envtp->env; env_array_free(tmp_env); } if (spank_user_task(job, local_proc_id) < 0) { error("Failed to invoke spank plugin stack"); exit(1); } if (conf->task_prolog) { char *my_prolog; slurm_mutex_lock(&conf->config_mutex); my_prolog = xstrdup(conf->task_prolog); slurm_mutex_unlock(&conf->config_mutex); _run_script_and_set_env("slurm task_prolog", my_prolog, job); xfree(my_prolog); } if (job->task_prolog) { _run_script_and_set_env("user task_prolog", job->task_prolog, job); } /* * Set TMPDIR after running prolog scripts, since TMPDIR * might be set or changed in one of the prolog scripts. */ if (local_proc_id == 0) _make_tmpdir(job); if (!job->batch) pdebug_stop_current(job); if (job->env == NULL) { debug("job->env is NULL"); job->env = (char **)xmalloc(sizeof(char *)); job->env[0] = (char *)NULL; } if (job->restart_dir) { info("restart from %s", job->restart_dir); /* no return on success */ checkpoint_restart_task(job, job->restart_dir, task->gtid); error("Restart task failed: %m"); exit(errno); } if (task->argv[0] == NULL) { error("No executable program specified for this task"); exit(2); } /* Do this last so you don't worry too much about the users limits including the slurmstepd in with it. */ if (set_user_limits(job) < 0) { debug("Unable to set user limits"); log_fini(); exit(5); } execve(task->argv[0], task->argv, job->env); saved_errno = errno; /* * print error message and clean up if execve() returns: */ if ((errno == ENOENT) && ((fd = open(task->argv[0], O_RDONLY)) >= 0)) { char buf[256], *eol; int sz; sz = read(fd, buf, sizeof(buf)); if ((sz >= 3) && (xstrncmp(buf, "#!", 2) == 0)) { buf[sizeof(buf)-1] = '\0'; eol = strchr(buf, '\n'); if (eol) eol[0] = '\0'; slurm_seterrno(saved_errno); error("execve(): bad interpreter(%s): %m", buf+2); exit(errno); } } slurm_seterrno(saved_errno); error("execve(): %s: %m", task->argv[0]); exit(errno); }
int main(int argc, char *argv[]) { struct sockaddr_un sun; struct parse_result *res = NULL; struct imsg imsg; struct smtpd smtpd; int ctl_sock; int done = 0; int n, verbose = 0; /* parse options */ if (strcmp(__progname, "sendmail") == 0 || strcmp(__progname, "send-mail") == 0) sendmail = 1; else if (strcmp(__progname, "mailq") == 0) { if (geteuid()) errx(1, "need root privileges"); setup_env(&smtpd); show_queue(0); return 0; } else if (strcmp(__progname, "smtpctl") == 0) { /* check for root privileges */ if (geteuid()) errx(1, "need root privileges"); setup_env(&smtpd); if ((res = parse(argc - 1, argv + 1)) == NULL) exit(1); /* handle "disconnected" commands */ switch (res->action) { case SHOW_QUEUE: show_queue(0); break; case SHOW_RUNQUEUE: break; default: goto connected; } return 0; } else errx(1, "unsupported mode"); connected: /* connect to smtpd control socket */ if ((ctl_sock = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) err(1, "socket"); bzero(&sun, sizeof(sun)); sun.sun_family = AF_UNIX; strlcpy(sun.sun_path, SMTPD_SOCKET, sizeof(sun.sun_path)); if (connect(ctl_sock, (struct sockaddr *)&sun, sizeof(sun)) == -1) { if (sendmail) return enqueue_offline(argc, argv); err(1, "connect: %s", SMTPD_SOCKET); } if ((ibuf = calloc(1, sizeof(struct imsgbuf))) == NULL) err(1, NULL); imsg_init(ibuf, ctl_sock); if (sendmail) return enqueue(argc, argv); /* process user request */ switch (res->action) { case NONE: usage(); /* not reached */ case SCHEDULE: case REMOVE: { u_int64_t ulval; char *ep; errno = 0; ulval = strtoull(res->data, &ep, 16); if (res->data[0] == '\0' || *ep != '\0') errx(1, "invalid msgid/evpid"); if (errno == ERANGE && ulval == ULLONG_MAX) errx(1, "invalid msgid/evpid"); if (ulval == 0) errx(1, "invalid msgid/evpid"); if (res->action == SCHEDULE) imsg_compose(ibuf, IMSG_SCHEDULER_SCHEDULE, 0, 0, -1, &ulval, sizeof(ulval)); if (res->action == REMOVE) imsg_compose(ibuf, IMSG_SCHEDULER_REMOVE, 0, 0, -1, &ulval, sizeof(ulval)); break; } case SCHEDULE_ALL: { u_int64_t ulval = 0; imsg_compose(ibuf, IMSG_SCHEDULER_SCHEDULE, 0, 0, -1, &ulval, sizeof(ulval)); break; } case SHUTDOWN: imsg_compose(ibuf, IMSG_CTL_SHUTDOWN, 0, 0, -1, NULL, 0); break; case PAUSE_MDA: imsg_compose(ibuf, IMSG_QUEUE_PAUSE_MDA, 0, 0, -1, NULL, 0); break; case PAUSE_MTA: imsg_compose(ibuf, IMSG_QUEUE_PAUSE_MTA, 0, 0, -1, NULL, 0); break; case PAUSE_SMTP: imsg_compose(ibuf, IMSG_SMTP_PAUSE, 0, 0, -1, NULL, 0); break; case RESUME_MDA: imsg_compose(ibuf, IMSG_QUEUE_RESUME_MDA, 0, 0, -1, NULL, 0); break; case RESUME_MTA: imsg_compose(ibuf, IMSG_QUEUE_RESUME_MTA, 0, 0, -1, NULL, 0); break; case RESUME_SMTP: imsg_compose(ibuf, IMSG_SMTP_RESUME, 0, 0, -1, NULL, 0); break; case SHOW_STATS: imsg_compose(ibuf, IMSG_STATS, 0, 0, -1, NULL, 0); break; case MONITOR: /* XXX */ break; case LOG_VERBOSE: verbose = 1; /* FALLTHROUGH */ case LOG_BRIEF: imsg_compose(ibuf, IMSG_CTL_VERBOSE, 0, 0, -1, &verbose, sizeof(verbose)); printf("logging request sent.\n"); done = 1; break; default: errx(1, "unknown request (%d)", res->action); } while (ibuf->w.queued) if (msgbuf_write(&ibuf->w) < 0) err(1, "write error"); while (!done) { if ((n = imsg_read(ibuf)) == -1) errx(1, "imsg_read error"); if (n == 0) errx(1, "pipe closed"); while (!done) { if ((n = imsg_get(ibuf, &imsg)) == -1) errx(1, "imsg_get error"); if (n == 0) break; switch(res->action) { case REMOVE: case SCHEDULE: case SCHEDULE_ALL: case SHUTDOWN: case PAUSE_MDA: case PAUSE_MTA: case PAUSE_SMTP: case RESUME_MDA: case RESUME_MTA: case RESUME_SMTP: case LOG_VERBOSE: case LOG_BRIEF: done = show_command_output(&imsg); break; case SHOW_STATS: done = show_stats_output(&imsg); break; case NONE: break; case MONITOR: break; default: err(1, "unexpected reply (%d)", res->action); } /* insert imsg replies switch here */ imsg_free(&imsg); } } close(ctl_sock); free(ibuf); return (0); }
void start_tcl(void) { char *id = "start_tcl"; char buf[BUFSIZ]; int fd; int tot, len; interp = Tcl_CreateInterp(); if (Tcl_Init(interp) == TCL_ERROR) { sprintf(log_buffer, "Tcl_Init error: %s", Tcl_GetStringResult(interp)); log_err(-1, id, log_buffer); die(0); } #if TCLX #if TCL_MINOR_VERSION < 5 && TCL_MAJOR_VERSION < 8 if (TclX_Init(interp) == TCL_ERROR) { #else if (Tclx_Init(interp) == TCL_ERROR) { #endif sprintf(log_buffer, "Tclx_Init error: %s", Tcl_GetStringResult(interp)); log_err(-1, id, log_buffer); die(0); } #endif add_cmds(interp); if (initfil) { int code; code = Tcl_EvalFile(interp, initfil); if (code != TCL_OK) { char *trace; trace = (char *)Tcl_GetVar(interp, "errorInfo", 0); if (trace == NULL) trace = (char *)Tcl_GetStringResult(interp); fprintf(stderr, "%s: TCL error @ line %d: %s\n", initfil, interp->errorLine, trace); sprintf(log_buffer, "%s: TCL error @ line %d: %s", initfil, interp->errorLine, Tcl_GetStringResult(interp)); log_err(-1, id, log_buffer); die(0); } sprintf(log_buffer, "init file %s", initfil); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); } if ((fd = open(bodyfil, O_RDONLY)) == -1) { log_err(errno, id, bodyfil); die(0); } sprintf(log_buffer, "body file: %s", bodyfil); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); if (body) free(body); if ((body = malloc(BUFSIZ)) == NULL) { log_err(errno, id, "malloc"); die(0); } for (tot = 0; (len = read(fd, buf, sizeof(buf))) > 0; tot += len) { if ((body = realloc(body, tot + len + 1)) == NULL) { log_err(errno, id, "realloc"); die(0); } memcpy(&body[tot], buf, len); } if (len == -1) { log_err(errno, id, bodyfil); die(0); } body[tot] = '\0'; close(fd); #if TCL_MAJOR_VERSION >= 8 if (body_obj == NULL) { body_obj = Tcl_NewStringObj(body, tot); Tcl_IncrRefCount(body_obj); } else { Tcl_SetStringObj(body_obj, body, tot); } #endif } int addclient(name) char *name; { static char id[] = "addclient"; struct hostent *host, *gethostbyname(); struct in_addr saddr; if ((host = gethostbyname(name)) == NULL) { sprintf(log_buffer, "host %s not found", name); log_err(-1, id, log_buffer); return -1; } if (numclients >= START_CLIENTS) { pbs_net_t *newclients; newclients = realloc(okclients, sizeof(pbs_net_t) * (numclients + 1)); if (newclients == NULL) return -1; okclients = newclients; } memcpy((char *)&saddr, host->h_addr, host->h_length); okclients[numclients++] = saddr.s_addr; return 0; } /* * read_config - read and process the configuration file (see -c option) * * Currently, the only statement is $clienthost to specify which systems * can contact the scheduler. */ #define CONF_LINE_LEN 120 static int read_config(file) char *file; { static char *id = "read_config"; FILE *conf; int i; char line[CONF_LINE_LEN]; char *token; struct specialconfig { char *name; int (*handler)(); } special[] = { {"clienthost", addclient }, { NULL, NULL } }; #if !defined(DEBUG) && !defined(NO_SECURITY_CHECK) if (chk_file_sec(file, 0, 0, S_IWGRP | S_IWOTH, 1, 0)) return (-1); #endif if ((conf = fopen(file, "r")) == NULL) { log_err(errno, id, "cannot open config file"); return (-1); } while (fgets(line, CONF_LINE_LEN, conf)) { if ((line[0] == '#') || (line[0] == '\n')) continue; /* ignore comment & null line */ else if (line[0] == '$') /* special */ { if ((token = strtok(line, " \t")) == NULL) token = ""; for (i = 0; special[i].name; i++) { if (strcmp(token + 1, special[i].name) == 0) break; } if (special[i].name == NULL) { sprintf(log_buffer, "config name %s not known", token); log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, msg_daemonname, log_buffer); return (-1); } token = strtok(NULL, " \t"); if (*(token + strlen(token) - 1) == '\n') *(token + strlen(token) - 1) = '\0'; if (special[i].handler(token)) { fclose(conf); return (-1); } } else { log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, msg_daemonname, "invalid line in config file"); fclose(conf); return (-1); } } fclose(conf); return (0); } void restart(sig) int sig; { char *id = "restart"; if (sig) { sprintf(log_buffer, "restart on signal %d", sig); log_close(1); log_open(logfile, path_log); } else { sprintf(log_buffer, "restart command"); } log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); Tcl_DeleteInterp(interp); if (configfile) { if (read_config(configfile) != 0) die(0); } start_tcl(); } void badconn(msg) char *msg; { static char id[] = "badconn"; struct in_addr addr; char buf[5*sizeof(addr) + 100]; struct hostent *phe; addr = saddr.sin_addr; phe = gethostbyaddr((void *) & addr, sizeof(addr), AF_INET); if (phe == NULL) { char hold[6]; int i; union { struct in_addr aa; u_char bb[sizeof(addr)]; } uu; uu.aa = addr; sprintf(buf, "%u", uu.bb[0]); for (i = 1; i < (int)sizeof(addr); i++) { sprintf(hold, ".%u", uu.bb[i]); strcat(buf, hold); } } else { strncpy(buf, phe->h_name, sizeof(buf)); buf[sizeof(buf)-1] = '\0'; } sprintf(log_buffer, "%s on port %u %s", buf, ntohs(saddr.sin_port), msg); log_err(-1, id, log_buffer); return; } unsigned int server_command() { static char id[] = "server_command"; int new_socket; int i; torque_socklen_t slen; unsigned int cmd; pbs_net_t addr; slen = sizeof(saddr); new_socket = accept(server_sock, (struct sockaddr *) & saddr, &slen); if (new_socket == -1) { log_err(errno, id, "accept"); return SCH_ERROR; } if (ntohs(saddr.sin_port) >= IPPORT_RESERVED) { badconn("non-reserved port"); close(new_socket); return SCH_ERROR; } addr = (pbs_net_t)saddr.sin_addr.s_addr; for (i = 0; i < numclients; i++) { if (addr == okclients[i]) break; } if (i == numclients) { badconn("unauthorized host"); close(new_socket); return SCH_ERROR; } if ((connector = socket_to_conn(new_socket)) < 0) { log_err(errno, id, "socket_to_conn"); return SCH_ERROR; } if (get_4byte(new_socket, &cmd) != 1) { log_err(errno, id, "get4bytes"); return SCH_ERROR; } return cmd; } /* * lock_out - lock out other daemons from this directory. */ static void lock_out(fds, op) int fds; int op; /* F_WRLCK or F_UNLCK */ { struct flock flock; flock.l_type = op; flock.l_whence = SEEK_SET; flock.l_start = 0; flock.l_len = 0; /* whole file */ if (fcntl(fds, F_SETLK, &flock) < 0) { (void)strcpy(log_buffer, "pbs_sched: another scheduler running\n"); log_err(errno, msg_daemonname, log_buffer); fprintf(stderr, log_buffer); exit(1); } } int main(argc, argv) int argc; char *argv[]; { char *id = "main"; int code; struct hostent *hp; int go, c, errflg = 0; int lockfds; int t = 1; char *ptr; pid_t pid; char *cp, host[100]; char *homedir = PBS_SERVER_HOME; unsigned int port; char path_priv[_POSIX_PATH_MAX]; char *dbfile = "sched_out"; int alarm_time = 180; struct sigaction act; caddr_t curr_brk = 0, next_brk; extern char *optarg; extern int optind, opterr; fd_set fdset; #ifndef DEBUG if (IamRoot() == 0) { return (1); } #endif /* DEBUG */ glob_argv = argv; if ((cp = strrchr(argv[0], '/')) == NULL) cp = argv[0]; else cp++; msg_daemonname = strdup(cp); port = get_svrport(PBS_SCHEDULER_SERVICE_NAME, "tcp", PBS_SCHEDULER_SERVICE_PORT); while ((c = getopt(argc, argv, "L:S:d:i:b:t:p:a:vc:")) != EOF) { switch (c) { case 'L': logfile = optarg; break; case 'S': port = (unsigned int)atoi(optarg); if (port == 0) { fprintf(stderr, "%s: illegal port\n", optarg); errflg = 1; } break; case 'd': homedir = optarg; break; case 'i': /* initialize */ initfil = optarg; break; case 'b': bodyfil = optarg; break; case 't': termfil = optarg; break; case 'p': dbfile = optarg; break; case 'a': alarm_time = strtol(optarg, &ptr, 10); if (alarm_time <= 0 || *ptr != '\0') { fprintf(stderr, "%s: bad alarm time\n", optarg); errflg = 1; } break; case 'c': configfile = optarg; break; case 'v': verbose = 1; break; case '?': errflg = 1; break; } } if (errflg || optind != argc) { static char *options[] = { "[-L logfile]", "[-S port]", "[-d home]", "[-i init]", "[-b body]", "[-t term]", "[-p output]", "[-a alarm]", "[-c configfile]", "[-v]", NULL }; int i; fprintf(stderr, "usage: %s\n", argv[0]); for (i = 0; options[i]; i++) fprintf(stderr, "\t%s\n", options[i]); exit(1); } /* Save the original working directory for "restart" */ if ((oldpath = getcwd((char *)NULL, MAXPATHLEN)) == NULL) { fprintf(stderr, "cannot get current working directory\n"); exit(1); } (void)sprintf(path_priv, "%s/sched_priv", homedir); #if !defined(DEBUG) && !defined(NO_SECURITY_CHECK) c = chk_file_sec(path_priv, 1, 0, S_IWGRP | S_IWOTH, 1, 0); c |= chk_file_sec(PBS_ENVIRON, 0, 0, S_IWGRP | S_IWOTH, 0, 0); if (c != 0) exit(1); #endif /* not DEBUG and not NO_SECURITY_CHECK */ if (chdir(path_priv) == -1) { perror(path_priv); exit(1); } (void)sprintf(path_log, "%s/sched_logs", homedir); (void)strcpy(pbs_current_user, "Scheduler"); /* The following is code to reduce security risks */ /* start out with standard umask, system resource limit infinite */ umask(022); if (setup_env(PBS_ENVIRON) == -1) exit(1); c = getgid(); (void)setgroups(1, (gid_t *)&c); /* secure suppl. group ids */ c = sysconf(_SC_OPEN_MAX); while (--c > 2) (void)close(c); /* close any file desc left open by parent */ #ifndef DEBUG #ifdef _CRAY (void)limit(C_JOB, 0, L_CPROC, 0); (void)limit(C_JOB, 0, L_CPU, 0); (void)limit(C_JOBPROCS, 0, L_CPU, 0); (void)limit(C_PROC, 0, L_FD, 255); (void)limit(C_JOB, 0, L_FSBLK, 0); (void)limit(C_JOBPROCS, 0, L_FSBLK, 0); (void)limit(C_JOB, 0, L_MEM , 0); (void)limit(C_JOBPROCS, 0, L_MEM , 0); #else /* not _CRAY */ { struct rlimit rlimit; rlimit.rlim_cur = RLIM_INFINITY; rlimit.rlim_max = RLIM_INFINITY; (void)setrlimit(RLIMIT_CPU, &rlimit); (void)setrlimit(RLIMIT_FSIZE, &rlimit); (void)setrlimit(RLIMIT_DATA, &rlimit); (void)setrlimit(RLIMIT_STACK, &rlimit); #ifdef RLIMIT_RSS (void)setrlimit(RLIMIT_RSS , &rlimit); #endif /* RLIMIT_RSS */ #ifdef RLIMIT_VMEM (void)setrlimit(RLIMIT_VMEM , &rlimit); #endif /* RLIMIT_VMEM */ } #endif /* not _CRAY */ #if !defined(NO_SECURITY_CHECK) c = 0; if (initfil) { if (*initfil != '/') { (void)sprintf(log_buffer, "%s/%s", path_priv, initfil); c |= chk_file_sec(log_buffer, 0, 0, S_IWGRP | S_IWOTH, 1, 0); } else { c |= chk_file_sec(initfil, 0, 0, S_IWGRP | S_IWOTH, 1, 0); } } if (bodyfil) { if (*bodyfil != '/') { (void)sprintf(log_buffer, "%s/%s", path_priv, bodyfil); c |= chk_file_sec(log_buffer, 0, 0, S_IWGRP | S_IWOTH, 1, 0); } else { c |= chk_file_sec(bodyfil, 0, 0, S_IWGRP | S_IWOTH, 1, 0); } } if (termfil) { if (*termfil != '/') { (void)sprintf(log_buffer, "%s/%s", path_priv, termfil); c |= chk_file_sec(log_buffer, 0, 0, S_IWGRP | S_IWOTH, 1, 0); } else { c |= chk_file_sec(termfil, 0, 0, S_IWGRP | S_IWOTH, 1, 0); } } if (c) exit(1); #endif /* not NO_SECURITY_CHECK */ #endif /* not DEBUG */ if (log_open(logfile, path_log) == -1) { fprintf(stderr, "%s: logfile could not be opened\n", argv[0]); exit(1); } if (gethostname(host, sizeof(host)) == -1) { char *prob = "gethostname"; log_err(errno, id, prob); perror(prob); die(0); } if ((hp = gethostbyname(host)) == NULL) { char *prob = "gethostbyname"; log_err(errno, id, prob); perror(prob); die(0); } if ((server_sock = socket(AF_INET, SOCK_STREAM, 0)) < 0) { char *prob = "socket"; log_err(errno, id, prob); perror(prob); die(0); } if (setsockopt(server_sock, SOL_SOCKET, SO_REUSEADDR, (char *)&t, sizeof(t)) == -1) { char *prob = "setsockopt"; log_err(errno, id, prob); perror(prob); die(0); } saddr.sin_family = AF_INET; saddr.sin_port = htons((unsigned short)port); memcpy(&saddr.sin_addr, hp->h_addr, hp->h_length); if (bind(server_sock, (struct sockaddr *)&saddr, sizeof(saddr)) < 0) { char *prob = "bind"; log_err(errno, id, prob); perror(prob); die(0); } if (listen(server_sock, 5) < 0) { char *prob = "listen"; log_err(errno, id, prob); perror(prob); die(0); } okclients = (pbs_net_t *)calloc(START_CLIENTS, sizeof(pbs_net_t)); addclient("localhost"); /* who has permission to call MOM */ addclient(host); if (configfile) { if (read_config(configfile) != 0) die(0); } lockfds = open("sched.lock", O_CREAT | O_TRUNC | O_WRONLY, 0644); if (lockfds < 0) { char *prob = "lock file"; log_err(errno, id, prob); perror(prob); die(0); } lock_out(lockfds, F_WRLCK); #ifndef DEBUG lock_out(lockfds, F_UNLCK); if ((pid = fork()) == -1) /* error on fork */ { char *prob = "fork"; log_err(errno, id, prob); perror(prob); die(0); } else if (pid > 0) /* parent exits */ exit(0); if ((pid = setsid()) == -1) { log_err(errno, id, "setsid"); die(0); } lock_out(lockfds, F_WRLCK); freopen(dbfile, "a", stdout); setvbuf(stdout, NULL, _IOLBF, 0); dup2(fileno(stdout), fileno(stderr)); #else pid = getpid(); setvbuf(stdout, NULL, _IOLBF, 0); setvbuf(stderr, NULL, _IOLBF, 0); #endif freopen("/dev/null", "r", stdin); /* write schedulers pid into lockfile */ (void)sprintf(log_buffer, "%d\n", pid); (void)write(lockfds, log_buffer, strlen(log_buffer) + 1); #if (PLOCK_DAEMONS & 2) (void)plock(PROCLOCK); /* lock daemon into memory */ #endif sprintf(log_buffer, "%s startup pid %d", argv[0], pid); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); sprintf(log_buffer, "%s using TCL %s (%s)", argv[0], TCL_VERSION, TCL_PATCH_LEVEL); fprintf(stderr, "%s\n", log_buffer); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); fullresp(0); sigemptyset(&allsigs); act.sa_flags = 0; sigaddset(&allsigs, SIGHUP); /* remember to block these */ sigaddset(&allsigs, SIGINT); /* during critical sections */ sigaddset(&allsigs, SIGTERM); /* so we don't get confused */ act.sa_mask = allsigs; act.sa_handler = restart; /* do a restart on SIGHUP */ sigaction(SIGHUP, &act, NULL); act.sa_handler = toolong; /* handle an alarm call */ sigaction(SIGALRM, &act, NULL); act.sa_handler = die; /* bite the biscuit for all following */ sigaction(SIGINT, &act, NULL); sigaction(SIGTERM, &act, NULL); start_tcl(); FD_ZERO(&fdset); for (go = 1; go;) { unsigned int cmd; FD_SET(server_sock, &fdset); if (select(FD_SETSIZE, &fdset, NULL, NULL, NULL) == -1) { if (errno != EINTR) log_err(errno, id, "select"); continue; } if (!FD_ISSET(server_sock, &fdset)) continue; cmd = server_command(); if (cmd == (unsigned)SCH_ERROR || cmd == (unsigned)SCH_SCHEDULE_NULL) continue; if (sigprocmask(SIG_BLOCK, &allsigs, &oldsigs) == -1) log_err(errno, id, "sigprocmaskSIG_BLOCK)"); if (verbose) { sprintf(log_buffer, "command %d", cmd); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); } switch (cmd) { case SCH_SCHEDULE_NEW: case SCH_SCHEDULE_TERM: case SCH_SCHEDULE_TIME: case SCH_SCHEDULE_RECYC: case SCH_SCHEDULE_CMD: case SCH_SCHEDULE_FIRST: alarm(alarm_time); #if TCL_MAJOR_VERSION >= 8 /* execute compiled body code for TCL-8 */ code = Tcl_EvalObj(interp, body_obj); #else code = Tcl_Eval(interp, body); #endif alarm(0); switch (code) { case TCL_OK: case TCL_RETURN: break; default: { char *trace; char codename[20]; switch (code) { case TCL_BREAK: strcpy(codename, "break"); break; case TCL_CONTINUE: strcpy(codename, "continue"); break; default: strcpy(codename, "<unknown>"); break; } trace = (char *)Tcl_GetVar(interp, "errorInfo", 0); if (trace == NULL) trace = (char *)Tcl_GetStringResult(interp); fprintf(stderr, "%s: TCL interpreter return code %d (%s) @ line %d: %s\n", bodyfil, code, codename, interp->errorLine, trace); sprintf(log_buffer, "%s: TCL error @ line %d: %s", bodyfil, interp->errorLine, Tcl_GetStringResult(interp)); log_err(-1, id, log_buffer); die(0); } } break; case SCH_CONFIGURE: case SCH_RULESET: restart(0); break; case SCH_QUIT: go = 0; break; default: log_err(-1, id, "unknown command"); break; } if (connector >= 0 && server_disconnect(connector)) { log_err(errno, id, "server_disconnect"); die(0); } connector = -1; if (verbose) { next_brk = (caddr_t)sbrk(0); if (next_brk > curr_brk) { sprintf(log_buffer, "brk point %p", next_brk); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); curr_brk = next_brk; } } if (sigprocmask(SIG_SETMASK, &oldsigs, NULL) == -1) log_err(errno, id, "sigprocmask(SIG_SETMASK)"); } if (termfil) { code = Tcl_EvalFile(interp, termfil); if (code != TCL_OK) { char *trace; trace = (char *)Tcl_GetVar(interp, "errorInfo", 0); if (trace == NULL) trace = (char *)Tcl_GetStringResult(interp); fprintf(stderr, "%s: TCL error @ line %d: %s\n", termfil, interp->errorLine, trace); sprintf(log_buffer, "%s: TCL error @ line %d: %s", termfil, interp->errorLine, Tcl_GetStringResult(interp)); log_err(-1, id, log_buffer); die(0); } sprintf(log_buffer, "term file: %s", termfil); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); } sprintf(log_buffer, "%s normal finish pid %d", argv[0], pid); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); (void)close(server_sock); exit(0); }
/* * login - create a new login session for a user * * login is typically called by getty as the second step of a * new user session. getty is responsible for setting the line * characteristics to a reasonable set of values and getting * the name of the user to be logged in. login may also be * called to create a new user session on a pty for a variety * of reasons, such as X servers or network logins. * * the flags which login supports are * * -p - preserve the environment * -r - perform autologin protocol for rlogin * -f - do not perform authentication, user is preauthenticated * -h - the name of the remote host */ int main (int argc, char **argv) { const char *tmptty; char tty[BUFSIZ]; #ifdef RLOGIN char term[128] = ""; #endif /* RLOGIN */ #if defined(HAVE_STRFTIME) && !defined(USE_PAM) char ptime[80]; #endif unsigned int delay; unsigned int retries; bool failed; bool subroot = false; #ifndef USE_PAM bool is_console; #endif int err; const char *cp; char *tmp; char fromhost[512]; struct passwd *pwd = NULL; char **envp = environ; const char *failent_user; /*@null@*/struct utmp *utent; #ifdef USE_PAM int retcode; pid_t child; char *pam_user = NULL; #else struct spwd *spwd = NULL; #endif /* * Some quick initialization. */ sanitize_env (); (void) setlocale (LC_ALL, ""); (void) bindtextdomain (PACKAGE, LOCALEDIR); (void) textdomain (PACKAGE); initenv (); amroot = (getuid () == 0); Prog = Basename (argv[0]); if (geteuid() != 0) { fprintf (stderr, _("%s: Cannot possibly work without effective root\n"), Prog); exit (1); } process_flags (argc, argv); if ((isatty (0) == 0) || (isatty (1) == 0) || (isatty (2) == 0)) { exit (1); /* must be a terminal */ } utent = get_current_utmp (); /* * Be picky if run by normal users (possible if installed setuid * root), but not if run by root. This way it still allows logins * even if your getty is broken, or if something corrupts utmp, * but users must "exec login" which will use the existing utmp * entry (will not overwrite remote hostname). --marekm */ if (!amroot && (NULL == utent)) { (void) puts (_("No utmp entry. You must exec \"login\" from the lowest level \"sh\"")); exit (1); } /* NOTE: utent might be NULL afterwards */ tmptty = ttyname (0); if (NULL == tmptty) { tmptty = "UNKNOWN"; } STRFCPY (tty, tmptty); #ifndef USE_PAM is_console = console (tty); #endif if (rflg || hflg) { /* * Add remote hostname to the environment. I think * (not sure) I saw it once on Irix. --marekm */ addenv ("REMOTEHOST", hostname); } if (fflg) { preauth_flag = true; } if (hflg) { reason = PW_RLOGIN; } #ifdef RLOGIN if (rflg) { assert (NULL == username); username = xmalloc (USER_NAME_MAX_LENGTH + 1); username[USER_NAME_MAX_LENGTH] = '\0'; if (do_rlogin (hostname, username, USER_NAME_MAX_LENGTH, term, sizeof term)) { preauth_flag = true; } else { free (username); username = NULL; } } #endif /* RLOGIN */ OPENLOG ("login"); setup_tty (); #ifndef USE_PAM (void) umask (getdef_num ("UMASK", GETDEF_DEFAULT_UMASK)); { /* * Use the ULIMIT in the login.defs file, and if * there isn't one, use the default value. The * user may have one for themselves, but otherwise, * just take what you get. */ long limit = getdef_long ("ULIMIT", -1L); if (limit != -1) { set_filesize_limit (limit); } } #endif /* * The entire environment will be preserved if the -p flag * is used. */ if (pflg) { while (NULL != *envp) { /* add inherited environment, */ addenv (*envp, NULL); /* some variables change later */ envp++; } } #ifdef RLOGIN if (term[0] != '\0') { addenv ("TERM", term); } else #endif /* RLOGIN */ { /* preserve TERM from getty */ if (!pflg) { tmp = getenv ("TERM"); if (NULL != tmp) { addenv ("TERM", tmp); } } } init_env (); if (optind < argc) { /* now set command line variables */ set_env (argc - optind, &argv[optind]); } if (rflg || hflg) { cp = hostname; #ifdef HAVE_STRUCT_UTMP_UT_HOST } else if ((NULL != utent) && ('\0' != utent->ut_host[0])) { cp = utent->ut_host; #endif /* HAVE_STRUCT_UTMP_UT_HOST */ } else { cp = ""; } if ('\0' != *cp) { snprintf (fromhost, sizeof fromhost, " on '%.100s' from '%.200s'", tty, cp); } else { snprintf (fromhost, sizeof fromhost, " on '%.100s'", tty); } top: /* only allow ALARM sec. for login */ (void) signal (SIGALRM, alarm_handler); timeout = getdef_unum ("LOGIN_TIMEOUT", ALARM); if (timeout > 0) { (void) alarm (timeout); } environ = newenvp; /* make new environment active */ delay = getdef_unum ("FAIL_DELAY", 1); retries = getdef_unum ("LOGIN_RETRIES", RETRIES); #ifdef USE_PAM retcode = pam_start ("login", username, &conv, &pamh); if (retcode != PAM_SUCCESS) { fprintf (stderr, _("login: PAM Failure, aborting: %s\n"), pam_strerror (pamh, retcode)); SYSLOG ((LOG_ERR, "Couldn't initialize PAM: %s", pam_strerror (pamh, retcode))); exit (99); } /* * hostname & tty are either set to NULL or their correct values, * depending on how much we know. We also set PAM's fail delay to * ours. * * PAM_RHOST and PAM_TTY are used for authentication, only use * information coming from login or from the caller (e.g. no utmp) */ retcode = pam_set_item (pamh, PAM_RHOST, hostname); PAM_FAIL_CHECK; retcode = pam_set_item (pamh, PAM_TTY, tty); PAM_FAIL_CHECK; #ifdef HAS_PAM_FAIL_DELAY retcode = pam_fail_delay (pamh, 1000000 * delay); PAM_FAIL_CHECK; #endif /* if fflg, then the user has already been authenticated */ if (!fflg) { unsigned int failcount = 0; char hostn[256]; char loginprompt[256]; /* That's one hell of a prompt :) */ /* Make the login prompt look like we want it */ if (gethostname (hostn, sizeof (hostn)) == 0) { snprintf (loginprompt, sizeof (loginprompt), _("%s login: "******"login: "******"TOO MANY LOGIN TRIES (%u)%s FOR '%s'", failcount, fromhost, failent_user)); fprintf(stderr, _("Maximum number of tries exceeded (%u)\n"), failcount); PAM_END; exit(0); } else if (retcode == PAM_ABORT) { /* Serious problems, quit now */ (void) fputs (_("login: abort requested by PAM\n"), stderr); SYSLOG ((LOG_ERR,"PAM_ABORT returned from pam_authenticate()")); PAM_END; exit(99); } else if (retcode != PAM_SUCCESS) { SYSLOG ((LOG_NOTICE,"FAILED LOGIN (%u)%s FOR '%s', %s", failcount, fromhost, failent_user, pam_strerror (pamh, retcode))); failed = true; } if (!failed) { break; } #ifdef WITH_AUDIT audit_fd = audit_open (); audit_log_acct_message (audit_fd, AUDIT_USER_LOGIN, NULL, /* Prog. name */ "login", failent_user, AUDIT_NO_ID, hostname, NULL, /* addr */ tty, 0); /* result */ close (audit_fd); #endif /* WITH_AUDIT */ (void) puts (""); (void) puts (_("Login incorrect")); if (failcount >= retries) { SYSLOG ((LOG_NOTICE, "TOO MANY LOGIN TRIES (%u)%s FOR '%s'", failcount, fromhost, failent_user)); fprintf(stderr, _("Maximum number of tries exceeded (%u)\n"), failcount); PAM_END; exit(0); } /* * Let's give it another go around. * Even if a username was given on the command * line, prompt again for the username. */ retcode = pam_set_item (pamh, PAM_USER, NULL); PAM_FAIL_CHECK; } /* We don't get here unless they were authenticated above */ (void) alarm (0); } /* Check the account validity */ retcode = pam_acct_mgmt (pamh, 0); if (retcode == PAM_NEW_AUTHTOK_REQD) { retcode = pam_chauthtok (pamh, PAM_CHANGE_EXPIRED_AUTHTOK); } PAM_FAIL_CHECK; /* Open the PAM session */ get_pam_user (&pam_user); retcode = pam_open_session (pamh, hushed (pam_user) ? PAM_SILENT : 0); PAM_FAIL_CHECK; /* Grab the user information out of the password file for future usage * First get the username that we are actually using, though. * * From now on, we will discard changes of the user (PAM_USER) by * PAM APIs. */ get_pam_user (&pam_user); if (NULL != username) { free (username); } username = pam_user; failent_user = get_failent_user (username); pwd = xgetpwnam (username); if (NULL == pwd) { SYSLOG ((LOG_ERR, "cannot find user %s", failent_user)); exit (1); } /* This set up the process credential (group) and initialize the * supplementary group access list. * This has to be done before pam_setcred */ if (setup_groups (pwd) != 0) { exit (1); } retcode = pam_setcred (pamh, PAM_ESTABLISH_CRED); PAM_FAIL_CHECK; /* NOTE: If pam_setcred changes PAM_USER, this will not be taken * into account. */ #else /* ! USE_PAM */ while (true) { /* repeatedly get login/password pairs */ /* user_passwd is always a pointer to this constant string * or a passwd or shadow password that will be memzero by * pw_free / spw_free. * Do not free() user_passwd. */ const char *user_passwd = "!"; /* Do some cleanup to avoid keeping entries we do not need * anymore. */ if (NULL != pwd) { pw_free (pwd); pwd = NULL; } if (NULL != spwd) { spw_free (spwd); spwd = NULL; } failed = false; /* haven't failed authentication yet */ if (NULL == username) { /* need to get a login id */ if (subroot) { closelog (); exit (1); } preauth_flag = false; username = xmalloc (USER_NAME_MAX_LENGTH + 1); username[USER_NAME_MAX_LENGTH] = '\0'; login_prompt (_("\n%s login: "******"!", * the account is locked and the user cannot * login, even if they have been * "pre-authenticated." */ if ( ('!' == user_passwd[0]) || ('*' == user_passwd[0])) { failed = true; } } if (strcmp (user_passwd, SHADOW_PASSWD_STRING) == 0) { spwd = xgetspnam (username); if (NULL != spwd) { user_passwd = spwd->sp_pwdp; } else { /* The user exists in passwd, but not in * shadow. SHADOW_PASSWD_STRING indicates * that the password shall be in shadow. */ SYSLOG ((LOG_WARN, "no shadow password for '%s'%s", username, fromhost)); } } /* * The -r and -f flags provide a name which has already * been authenticated by some server. */ if (preauth_flag) { goto auth_ok; } if (pw_auth (user_passwd, username, reason, (char *) 0) == 0) { goto auth_ok; } SYSLOG ((LOG_WARN, "invalid password for '%s' %s", failent_user, fromhost)); failed = true; auth_ok: /* * This is the point where all authenticated users wind up. * If you reach this far, your password has been * authenticated and so on. */ if ( !failed && (NULL != pwd) && (0 == pwd->pw_uid) && !is_console) { SYSLOG ((LOG_CRIT, "ILLEGAL ROOT LOGIN %s", fromhost)); failed = true; } if ( !failed && !login_access (username, ('\0' != *hostname) ? hostname : tty)) { SYSLOG ((LOG_WARN, "LOGIN '%s' REFUSED %s", username, fromhost)); failed = true; } if ( (NULL != pwd) && getdef_bool ("FAILLOG_ENAB") && !failcheck (pwd->pw_uid, &faillog, failed)) { SYSLOG ((LOG_CRIT, "exceeded failure limit for '%s' %s", username, fromhost)); failed = true; } if (!failed) { break; } /* don't log non-existent users */ if ((NULL != pwd) && getdef_bool ("FAILLOG_ENAB")) { failure (pwd->pw_uid, tty, &faillog); } if (getdef_str ("FTMP_FILE") != NULL) { #ifdef USE_UTMPX struct utmpx *failent = prepare_utmpx (failent_user, tty, /* FIXME: or fromhost? */hostname, utent); #else /* !USE_UTMPX */ struct utmp *failent = prepare_utmp (failent_user, tty, hostname, utent); #endif /* !USE_UTMPX */ failtmp (failent_user, failent); free (failent); } retries--; if (retries <= 0) { SYSLOG ((LOG_CRIT, "REPEATED login failures%s", fromhost)); } /* * If this was a passwordless account and we get here, login * was denied (securetty, faillog, etc.). There was no * password prompt, so do it now (will always fail - the bad * guys won't see that the passwordless account exists at * all). --marekm */ if (user_passwd[0] == '\0') { pw_auth ("!", username, reason, (char *) 0); } /* * Authentication of this user failed. * The username must be confirmed in the next try. */ free (username); username = NULL; /* * Wait a while (a la SVR4 /usr/bin/login) before attempting * to login the user again. If the earlier alarm occurs * before the sleep() below completes, login will exit. */ if (delay > 0) { (void) sleep (delay); } (void) puts (_("Login incorrect")); /* allow only one attempt with -r or -f */ if (rflg || fflg || (retries <= 0)) { closelog (); exit (1); } } /* while (true) */ #endif /* ! USE_PAM */ assert (NULL != username); assert (NULL != pwd); (void) alarm (0); /* turn off alarm clock */ #ifndef USE_PAM /* PAM does this */ /* * porttime checks moved here, after the user has been * authenticated. now prints a message, as suggested * by Ivan Nejgebauer <*****@*****.**>. --marekm */ if ( getdef_bool ("PORTTIME_CHECKS_ENAB") && !isttytime (username, tty, time ((time_t *) 0))) { SYSLOG ((LOG_WARN, "invalid login time for '%s'%s", username, fromhost)); closelog (); bad_time_notify (); exit (1); } check_nologin (pwd->pw_uid == 0); #endif if (getenv ("IFS")) { /* don't export user IFS ... */ addenv ("IFS= \t\n", NULL); /* ... instead, set a safe IFS */ } if (pwd->pw_shell[0] == '*') { /* subsystem root */ pwd->pw_shell++; /* skip the '*' */ subsystem (pwd); /* figure out what to execute */ subroot = true; /* say I was here again */ endpwent (); /* close all of the file which were */ endgrent (); /* open in the original rooted file */ endspent (); /* system. they will be re-opened */ #ifdef SHADOWGRP endsgent (); /* in the new rooted file system */ #endif goto top; /* go do all this all over again */ } #ifdef WITH_AUDIT audit_fd = audit_open (); audit_log_acct_message (audit_fd, AUDIT_USER_LOGIN, NULL, /* Prog. name */ "login", username, AUDIT_NO_ID, hostname, NULL, /* addr */ tty, 1); /* result */ close (audit_fd); #endif /* WITH_AUDIT */ #ifndef USE_PAM /* pam_lastlog handles this */ if (getdef_bool ("LASTLOG_ENAB")) { /* give last login and log this one */ dolastlog (&ll, pwd, tty, hostname); } #endif #ifndef USE_PAM /* PAM handles this as well */ /* * Have to do this while we still have root privileges, otherwise we * don't have access to /etc/shadow. */ if (NULL != spwd) { /* check for age of password */ if (expire (pwd, spwd)) { /* The user updated her password, get the new * entries. * Use the x variants because we need to keep the * entry for a long time, and there might be other * getxxyy in between. */ pw_free (pwd); pwd = xgetpwnam (username); if (NULL == pwd) { SYSLOG ((LOG_ERR, "cannot find user %s after update of expired password", username)); exit (1); } spw_free (spwd); spwd = xgetspnam (username); } } setup_limits (pwd); /* nice, ulimit etc. */ #endif /* ! USE_PAM */ chown_tty (pwd); #ifdef USE_PAM /* * We must fork before setuid() because we need to call * pam_close_session() as root. */ (void) signal (SIGINT, SIG_IGN); child = fork (); if (child < 0) { /* error in fork() */ fprintf (stderr, _("%s: failure forking: %s"), Prog, strerror (errno)); PAM_END; exit (0); } else if (child != 0) { /* * parent - wait for child to finish, then cleanup * session */ wait (NULL); PAM_END; exit (0); } /* child */ #endif /* If we were init, we need to start a new session */ if (getppid() == 1) { setsid(); if (ioctl(0, TIOCSCTTY, 1) != 0) { fprintf (stderr, _("TIOCSCTTY failed on %s"), tty); } } /* * The utmp entry needs to be updated to indicate the new status * of the session, the new PID and SID. */ update_utmp (username, tty, hostname, utent); /* The pwd and spwd entries for the user have been copied. * * Close all the files so that unauthorized access won't occur. */ endpwent (); /* stop access to password file */ endgrent (); /* stop access to group file */ endspent (); /* stop access to shadow passwd file */ #ifdef SHADOWGRP endsgent (); /* stop access to shadow group file */ #endif /* Drop root privileges */ #ifndef USE_PAM if (setup_uid_gid (pwd, is_console)) #else /* The group privileges were already dropped. * See setup_groups() above. */ if (change_uid (pwd)) #endif { exit (1); } setup_env (pwd); /* set env vars, cd to the home dir */ #ifdef USE_PAM { const char *const *env; env = (const char *const *) pam_getenvlist (pamh); while ((NULL != env) && (NULL != *env)) { addenv (*env, NULL); env++; } } #endif (void) setlocale (LC_ALL, ""); (void) bindtextdomain (PACKAGE, LOCALEDIR); (void) textdomain (PACKAGE); if (!hushed (username)) { addenv ("HUSHLOGIN=FALSE", NULL); /* * pam_unix, pam_mail and pam_lastlog should take care of * this */ #ifndef USE_PAM motd (); /* print the message of the day */ if ( getdef_bool ("FAILLOG_ENAB") && (0 != faillog.fail_cnt)) { failprint (&faillog); /* Reset the lockout times if logged in */ if ( (0 != faillog.fail_max) && (faillog.fail_cnt >= faillog.fail_max)) { (void) puts (_("Warning: login re-enabled after temporary lockout.")); SYSLOG ((LOG_WARN, "login '%s' re-enabled after temporary lockout (%d failures)", username, (int) faillog.fail_cnt)); } } if ( getdef_bool ("LASTLOG_ENAB") && (ll.ll_time != 0)) { time_t ll_time = ll.ll_time; #ifdef HAVE_STRFTIME (void) strftime (ptime, sizeof (ptime), "%a %b %e %H:%M:%S %z %Y", localtime (&ll_time)); printf (_("Last login: %s on %s"), ptime, ll.ll_line); #else printf (_("Last login: %.19s on %s"), ctime (&ll_time), ll.ll_line); #endif #ifdef HAVE_LL_HOST /* __linux__ || SUN4 */ if ('\0' != ll.ll_host[0]) { printf (_(" from %.*s"), (int) sizeof ll.ll_host, ll.ll_host); } #endif printf (".\n"); } agecheck (spwd); mailcheck (); /* report on the status of mail */ #endif /* !USE_PAM */ } else { addenv ("HUSHLOGIN=TRUE", NULL); } ttytype (tty); (void) signal (SIGQUIT, SIG_DFL); /* default quit signal */ (void) signal (SIGTERM, SIG_DFL); /* default terminate signal */ (void) signal (SIGALRM, SIG_DFL); /* default alarm signal */ (void) signal (SIGHUP, SIG_DFL); /* added this. --marekm */ (void) signal (SIGINT, SIG_DFL); /* default interrupt signal */ if (0 == pwd->pw_uid) { SYSLOG ((LOG_NOTICE, "ROOT LOGIN %s", fromhost)); } else if (getdef_bool ("LOG_OK_LOGINS")) { SYSLOG ((LOG_INFO, "'%s' logged in %s", username, fromhost)); } closelog (); tmp = getdef_str ("FAKE_SHELL"); if (NULL != tmp) { err = shell (tmp, pwd->pw_shell, newenvp); /* fake shell */ } else { /* exec the shell finally */ err = shell (pwd->pw_shell, (char *) 0, newenvp); } return ((err == ENOENT) ? E_CMD_NOTFOUND : E_CMD_NOEXEC); }
int main(int argc, char*argv[]) { pami_client_t client; pami_context_t *context; pami_task_t task_id, task_zero=0; size_t num_tasks; pami_geometry_t world_geometry; /* Barrier variables */ size_t barrier_num_algorithm[2]; pami_algorithm_t *bar_always_works_algo = NULL; pami_metadata_t *bar_always_works_md = NULL; pami_algorithm_t *bar_must_query_algo = NULL; pami_metadata_t *bar_must_query_md = NULL; pami_xfer_type_t barrier_xfer = PAMI_XFER_BARRIER; volatile unsigned bar_poll_flag = 0; /* Allreduce variables */ size_t allreduce_num_algorithm[2]; pami_algorithm_t *allreduce_always_works_algo = NULL; pami_metadata_t *allreduce_always_works_md = NULL; pami_algorithm_t *allreduce_must_query_algo = NULL; pami_metadata_t *allreduce_must_query_md = NULL; pami_xfer_type_t allreduce_xfer = PAMI_XFER_ALLREDUCE; volatile unsigned allreduce_poll_flag = 0; int i, j, nalg = 0; double ti, tf, usec; pami_xfer_t barrier; pami_xfer_t allreduce; /* Process environment variables and setup globals */ setup_env(); assert(gNum_contexts > 0); context = (pami_context_t*)malloc(sizeof(pami_context_t) * gNum_contexts); /* Allocate buffer(s) */ int err = 0; void* sbuf = NULL; err = posix_memalign(&sbuf, 128, gMax_byte_count + gBuffer_offset); assert(err == 0); sbuf = (char*)sbuf + gBuffer_offset; void* rbuf = NULL; err = posix_memalign(&rbuf, 128, gMax_byte_count + gBuffer_offset); assert(err == 0); rbuf = (char*)rbuf + gBuffer_offset; /* Initialize PAMI */ int rc = pami_init(&client, /* Client */ context, /* Context */ NULL, /* Clientname=default */ &gNum_contexts, /* gNum_contexts */ NULL, /* null configuration */ 0, /* no configuration */ &task_id, /* task id */ &num_tasks); /* number of tasks */ if (rc != PAMI_SUCCESS) return 1; int o; for(o = -1; o <= gOptimize ; o++) /* -1 = default, 0 = de-optimize, 1 = optimize */ { pami_configuration_t configuration[1]; configuration[0].name = PAMI_GEOMETRY_OPTIMIZE; configuration[0].value.intval = o; /* de/optimize */ if(o == -1) ; /* skip update, use defaults */ else rc |= update_geometry(client, context[0], world_geometry, configuration, 1); if (rc != PAMI_SUCCESS) return 1; /* Query the world geometry for barrier algorithms */ rc |= query_geometry_world(client, context[0], &world_geometry, barrier_xfer, barrier_num_algorithm, &bar_always_works_algo, &bar_always_works_md, &bar_must_query_algo, &bar_must_query_md); if (rc != PAMI_SUCCESS) return 1; barrier.cb_done = cb_done; barrier.cookie = (void*) & bar_poll_flag; barrier.algorithm = bar_always_works_algo[0]; unsigned iContext = 0; for (; iContext < gNum_contexts; ++iContext) { if (task_id == 0) printf("# Context: %u\n", iContext); /* Query the world geometry for allreduce algorithms */ rc |= query_geometry_world(client, context[iContext], &world_geometry, allreduce_xfer, allreduce_num_algorithm, &allreduce_always_works_algo, &allreduce_always_works_md, &allreduce_must_query_algo, &allreduce_must_query_md); if (rc != PAMI_SUCCESS) return 1; for (nalg = 0; nalg < allreduce_num_algorithm[1]; nalg++) { metadata_result_t result = {0}; if (task_id == task_zero) { printf("# Allreduce Bandwidth Test(size:%zu) -- context = %d, optimize = %d, protocol: %s, Metadata: range %zu <-> %zd, mask %#X\n",num_tasks, iContext, o, allreduce_must_query_md[nalg].name, allreduce_must_query_md[nalg].range_lo,(ssize_t)allreduce_must_query_md[nalg].range_hi, allreduce_must_query_md[nalg].check_correct.bitmask_correct); printf("# Size(bytes) iterations bytes/sec usec\n"); printf("# ----------- ----------- ----------- ---------\n"); } if (((strstr(allreduce_must_query_md[nalg].name, gSelected) == NULL) && gSelector) || ((strstr(allreduce_must_query_md[nalg].name, gSelected) != NULL) && !gSelector)) continue; gProtocolName = allreduce_must_query_md[nalg].name; unsigned checkrequired = allreduce_must_query_md[nalg].check_correct.values.checkrequired; /*must query every time */ assert(!checkrequired || allreduce_must_query_md[nalg].check_fn); /* must have function if checkrequired. */ allreduce.cb_done = cb_done; allreduce.cookie = (void*) & allreduce_poll_flag; allreduce.algorithm = allreduce_must_query_algo[nalg]; allreduce.cmd.xfer_allreduce.sndbuf = sbuf; allreduce.cmd.xfer_allreduce.rcvbuf = rbuf; allreduce.cmd.xfer_allreduce.rtype = PAMI_TYPE_BYTE; allreduce.cmd.xfer_allreduce.rtypecount = 0; int op, dt; for (dt = 0; dt < dt_count; dt++) { for (op = 0; op < op_count; op++) { if (gValidTable[op][dt]) { if (task_id == task_zero) printf("Running Allreduce: %s, %s\n", dt_array_str[dt], op_array_str[op]); for (i = MAX(1,gMin_byte_count/get_type_size(dt_array[dt])); i <= gMax_byte_count/get_type_size(dt_array[dt]); i *= 2) { size_t sz=get_type_size(dt_array[dt]); size_t dataSent = i * sz; int niter; if (dataSent < CUTOFF) niter = gNiterlat; else niter = NITERBW; allreduce.cmd.xfer_allreduce.stypecount = i; allreduce.cmd.xfer_allreduce.rtypecount = dataSent; allreduce.cmd.xfer_allreduce.stype = dt_array[dt]; allreduce.cmd.xfer_allreduce.op = op_array[op]; result = check_metadata(allreduce_must_query_md[nalg], allreduce, dt_array[dt], dataSent, /* metadata uses bytes i, */ allreduce.cmd.xfer_allreduce.sndbuf, PAMI_TYPE_BYTE, dataSent, allreduce.cmd.xfer_allreduce.rcvbuf); if (allreduce_must_query_md[nalg].check_correct.values.nonlocal) { /* \note We currently ignore check_correct.values.nonlocal because these tests should not have nonlocal differences (so far). */ result.check.nonlocal = 0; } if (result.bitmask) continue; reduce_initialize_sndbuf (sbuf, i, op, dt, task_id, num_tasks); memset(rbuf, 0xFF, dataSent); /* We aren't testing barrier itself, so use context 0. */ blocking_coll(context[0], &barrier, &bar_poll_flag); ti = timer(); for (j = 0; j < niter; j++) { if (checkrequired) /* must query every time */ { result = allreduce_must_query_md[nalg].check_fn(&allreduce); if (result.bitmask) continue; } blocking_coll(context[iContext], &allreduce, &allreduce_poll_flag); } tf = timer(); /* We aren't testing barrier itself, so use context 0. */ blocking_coll(context[0], &barrier, &bar_poll_flag); int rc_check; rc |= rc_check = reduce_check_rcvbuf (rbuf, i, op, dt, task_id, num_tasks); if (rc_check) fprintf(stderr, "%s FAILED validation\n", gProtocolName); usec = (tf - ti) / (double)niter; if (task_id == task_zero) { printf(" %11lld %16d %14.1f %12.2f\n", (long long)dataSent, niter, (double)1e6*(double)dataSent / (double)usec, usec); fflush(stdout); } } } } } } free(allreduce_always_works_algo); free(allreduce_always_works_md); free(allreduce_must_query_algo); free(allreduce_must_query_md); } /*for(unsigned iContext = 0; iContext < gNum_contexts; ++iContexts)*/ free(bar_always_works_algo); free(bar_always_works_md); free(bar_must_query_algo); free(bar_must_query_md); } /* optimize loop */ sbuf = (char*)sbuf - gBuffer_offset; free(sbuf); rbuf = (char*)rbuf - gBuffer_offset; free(rbuf); rc |= pami_shutdown(&client, context, &gNum_contexts); return rc; }
int main(int argc, char*argv[]) { pami_client_t client; pami_context_t *context; pami_task_t task_id, local_task_id=0, task_zero=0; size_t num_tasks; pami_geometry_t world_geometry; /* Barrier variables */ size_t barrier_num_algorithm[2]; pami_algorithm_t *bar_always_works_algo = NULL; pami_metadata_t *bar_always_works_md = NULL; pami_algorithm_t *bar_must_query_algo = NULL; pami_metadata_t *bar_must_query_md = NULL; pami_xfer_type_t barrier_xfer = PAMI_XFER_BARRIER; volatile unsigned bar_poll_flag = 0; volatile unsigned newbar_poll_flag = 0; /* Allreduce variables */ size_t allreduce_num_algorithm[2]; pami_algorithm_t *allreduce_always_works_algo = NULL; pami_metadata_t *allreduce_always_works_md = NULL; pami_algorithm_t *allreduce_must_query_algo = NULL; pami_metadata_t *allreduce_must_query_md = NULL; pami_xfer_type_t allreduce_xfer = PAMI_XFER_ALLREDUCE; volatile unsigned allreduce_poll_flag = 0; int nalg = 0; double ti, tf, usec; pami_xfer_t barrier; pami_xfer_t allreduce; /* Process environment variables and setup globals */ setup_env(); assert(gNum_contexts > 0); context = (pami_context_t*)malloc(sizeof(pami_context_t) * gNum_contexts); /* Allocate buffer(s) */ int err = 0; void* sbuf = NULL; err = posix_memalign(&sbuf, 128, gMax_byte_count + gBuffer_offset); assert(err == 0); sbuf = (char*)sbuf + gBuffer_offset; void* rbuf = NULL; err = posix_memalign(&rbuf, 128, gMax_byte_count + gBuffer_offset); assert(err == 0); rbuf = (char*)rbuf + gBuffer_offset; /* Initialize PAMI */ int rc = pami_init(&client, /* Client */ context, /* Context */ NULL, /* Clientname=default */ &gNum_contexts, /* gNum_contexts */ NULL, /* null configuration */ 0, /* no configuration */ &task_id, /* task id */ &num_tasks); /* number of tasks */ if (rc == 1) return 1; if (num_tasks == 1) { fprintf(stderr, "No subcomms on 1 node\n"); return 0; } assert(task_id >= 0); assert(task_id < num_tasks); /* Query the world geometry for barrier algorithms */ rc |= query_geometry_world(client, context[0], &world_geometry, barrier_xfer, barrier_num_algorithm, &bar_always_works_algo, &bar_always_works_md, &bar_must_query_algo, &bar_must_query_md); if (rc == 1) return 1; /* Set up world barrier */ barrier.cb_done = cb_done; barrier.cookie = (void*) & bar_poll_flag; barrier.algorithm = bar_always_works_algo[0]; unsigned iContext = 0; /* Create the subgeometry */ pami_geometry_range_t *range; int rangecount; pami_geometry_t newgeometry; size_t newbar_num_algo[2]; pami_algorithm_t *newbar_algo = NULL; pami_metadata_t *newbar_md = NULL; pami_algorithm_t *q_newbar_algo = NULL; pami_metadata_t *q_newbar_md = NULL; pami_xfer_t newbarrier; size_t set[2]; int id; range = (pami_geometry_range_t *)malloc(((num_tasks + 1) / 2) * sizeof(pami_geometry_range_t)); int unused_non_task_zero[2]; get_split_method(&num_tasks, task_id, &rangecount, range, &local_task_id, set, &id, &task_zero,unused_non_task_zero); for (; iContext < gNum_contexts; ++iContext) { if (task_id == task_zero) printf("# Context: %u\n", iContext); /* Delay task_zero tasks, and emulate that he's doing "other" message passing. This will cause the geometry_create request from other nodes to be unexpected when doing parentless geometries and won't affect parented. */ if (task_id == task_zero) { delayTest(1); unsigned ii = 0; for (; ii < gNum_contexts; ++ii) PAMI_Context_advance (context[ii], 1000); } rc |= create_and_query_geometry(client, context[0], context[iContext], gParentless ? PAMI_GEOMETRY_NULL : world_geometry, &newgeometry, range, rangecount, id + iContext, /* Unique id for each context */ barrier_xfer, newbar_num_algo, &newbar_algo, &newbar_md, &q_newbar_algo, &q_newbar_md); if (rc == 1) return 1; /* Query the sub geometry for reduce algorithms */ rc |= query_geometry(client, context[iContext], newgeometry, allreduce_xfer, allreduce_num_algorithm, &allreduce_always_works_algo, &allreduce_always_works_md, &allreduce_must_query_algo, &allreduce_must_query_md); if (rc == 1) return 1; /* Set up sub geometry barrier */ newbarrier.cb_done = cb_done; newbarrier.cookie = (void*) & newbar_poll_flag; newbarrier.algorithm = newbar_algo[0]; for (nalg = 0; nalg < allreduce_num_algorithm[1]; nalg++) { metadata_result_t result = {0}; int i, j, k; for (k = 1; k >= 0; k--) { if (set[k]) { if (task_id == task_zero) { printf("# Allreduce Bandwidth Test(size:%zu) -- context = %d, task = %d protocol: %s, Metadata: range %zu <-> %zd, mask %#X\n",num_tasks, iContext, task_zero, allreduce_must_query_md[nalg].name, allreduce_must_query_md[nalg].range_lo,(ssize_t)allreduce_must_query_md[nalg].range_hi, allreduce_must_query_md[nalg].check_correct.bitmask_correct); printf("# Size(bytes) iterations bytes/sec usec\n"); printf("# ----------- ----------- ----------- ---------\n"); } if (((strstr(allreduce_must_query_md[nalg].name, gSelected) == NULL) && gSelector) || ((strstr(allreduce_must_query_md[nalg].name, gSelected) != NULL) && !gSelector)) continue; gProtocolName = allreduce_must_query_md[nalg].name; unsigned checkrequired = allreduce_must_query_md[nalg].check_correct.values.checkrequired; /*must query every time */ assert(!checkrequired || allreduce_must_query_md[nalg].check_fn); /* must have function if checkrequired. */ allreduce.cb_done = cb_done; allreduce.cookie = (void*) & allreduce_poll_flag; allreduce.algorithm = allreduce_must_query_algo[nalg]; allreduce.cmd.xfer_allreduce.sndbuf = sbuf; allreduce.cmd.xfer_allreduce.rcvbuf = rbuf; allreduce.cmd.xfer_allreduce.rtype = PAMI_TYPE_BYTE; allreduce.cmd.xfer_allreduce.rtypecount = 0; int op, dt; for (dt = 0; dt < dt_count; dt++) for (op = 0; op < op_count; op++) { if (gValidTable[op][dt]) { if (task_id == task_zero) printf("Running Allreduce: %s, %s\n", dt_array_str[dt], op_array_str[op]); for (i = MAX(1,gMin_byte_count/get_type_size(dt_array[dt])); i <= gMax_byte_count/get_type_size(dt_array[dt]); i *= 2) { size_t sz = get_type_size(dt_array[dt]); size_t dataSent = i * sz; int niter; if (dataSent < CUTOFF) niter = gNiterlat; else niter = NITERBW; allreduce.cmd.xfer_allreduce.stypecount = i; allreduce.cmd.xfer_allreduce.rtypecount = dataSent; allreduce.cmd.xfer_allreduce.stype = dt_array[dt]; allreduce.cmd.xfer_allreduce.op = op_array[op]; result = check_metadata(allreduce_must_query_md[nalg], allreduce, dt_array[dt], dataSent, /* metadata uses bytes i, */ allreduce.cmd.xfer_allreduce.sndbuf, PAMI_TYPE_BYTE, dataSent, allreduce.cmd.xfer_allreduce.rcvbuf); if (allreduce_must_query_md[nalg].check_correct.values.nonlocal) { /* \note We currently ignore check_correct.values.nonlocal because these tests should not have nonlocal differences (so far). */ result.check.nonlocal = 0; } if (result.bitmask) continue; reduce_initialize_sndbuf (sbuf, i, op, dt, local_task_id, num_tasks); blocking_coll(context[iContext], &newbarrier, &newbar_poll_flag); ti = timer(); for (j = 0; j < niter; j++) { if (checkrequired) /* must query every time */ { result = allreduce_must_query_md[nalg].check_fn(&allreduce); if (result.bitmask) continue; } blocking_coll(context[iContext], &allreduce, &allreduce_poll_flag); } tf = timer(); blocking_coll(context[iContext], &newbarrier, &newbar_poll_flag); int rc_check; rc |= rc_check = reduce_check_rcvbuf (rbuf, i, op, dt, local_task_id, num_tasks); if (rc_check) fprintf(stderr, "%s FAILED validation\n", gProtocolName); usec = (tf - ti) / (double)niter; if (task_id == task_zero) { printf(" %11lld %16d %14.1f %12.2f\n", (long long)dataSent, niter, (double)1e6*(double)dataSent / (double)usec, usec); fflush(stdout); } } } } } } } /* We aren't testing world barrier itself, so use context 0.*/ blocking_coll(context[0], &barrier, &bar_poll_flag); free(newbar_algo); free(newbar_md); free(q_newbar_algo); free(q_newbar_md); free(allreduce_always_works_algo); free(allreduce_always_works_md); free(allreduce_must_query_algo); free(allreduce_must_query_md); } /*for(unsigned iContext = 0; iContext < gNum_contexts; ++iContexts)*/ free(bar_always_works_algo); free(bar_always_works_md); free(bar_must_query_algo); free(bar_must_query_md); sbuf = (char*)sbuf - gBuffer_offset; free(sbuf); rbuf = (char*)rbuf - gBuffer_offset; free(rbuf); rc |= pami_shutdown(&client, context, &gNum_contexts); return rc; }
/*** Main ***/ int main(int argc, char **argv, char **envp) { Bool listenOnly = FALSE; int i; mach_msg_size_t mxmsgsz = sizeof(union MaxMsgSize) + MAX_TRAILER_SIZE; mach_port_t mp; kern_return_t kr; /* Setup our environment for our children */ setup_env(); /* The server must not run the PanoramiX operations. */ noPanoramiXExtension = TRUE; /* Setup the initial crasherporter info */ strlcpy(__crashreporter_info__, __crashreporter_info__base, __crashreporter_info__len); fprintf(stderr, "X11.app: main(): argc=%d\n", argc); for(i=0; i < argc; i++) { fprintf(stderr, "\targv[%u] = %s\n", (unsigned)i, argv[i]); if(!strcmp(argv[i], "--listenonly")) { listenOnly = TRUE; } } mp = checkin_or_register(server_bootstrap_name); if(mp == MACH_PORT_NULL) { fprintf(stderr, "NULL mach service: %s", server_bootstrap_name); return EXIT_FAILURE; } /* Check if we need to do something other than listen, and make another * thread handle it. */ if(!listenOnly) { pid_t child1, child2; int status; /* Do the fork-twice trick to avoid having to reap zombies */ child1 = fork(); switch (child1) { case -1: /* error */ break; case 0: /* child1 */ child2 = fork(); switch (child2) { int max_files, i; case -1: /* error */ break; case 0: /* child2 */ /* close all open files except for standard streams */ max_files = sysconf(_SC_OPEN_MAX); for(i = 3; i < max_files; i++) close(i); /* ensure stdin is on /dev/null */ close(0); open("/dev/null", O_RDONLY); return startup_trigger(argc, argv, envp); default: /* parent (child1) */ _exit(0); } break; default: /* parent */ waitpid(child1, &status, 0); } } /* Main event loop */ fprintf(stderr, "Waiting for startup parameters via Mach IPC.\n"); kr = mach_msg_server(mach_startup_server, mxmsgsz, mp, 0); if (kr != KERN_SUCCESS) { fprintf(stderr, "%s.X11(mp): %s\n", LAUNCHD_ID_PREFIX, mach_error_string(kr)); return EXIT_FAILURE; } return EXIT_SUCCESS; }
int main(int argc, char*argv[]) { pami_client_t client; pami_context_t *context; pami_task_t task_id, root_zero=0; size_t num_tasks; pami_geometry_t world_geometry; /* Barrier variables */ size_t barrier_num_algorithm[2]; pami_algorithm_t *bar_always_works_algo = NULL; pami_metadata_t *bar_always_works_md = NULL; pami_algorithm_t *bar_must_query_algo = NULL; pami_metadata_t *bar_must_query_md = NULL; pami_xfer_type_t barrier_xfer = PAMI_XFER_BARRIER; volatile unsigned bar_poll_flag = 0; /* Gatherv variables */ size_t gatherv_num_algorithm[2]; pami_algorithm_t *gatherv_always_works_algo = NULL; pami_metadata_t *gatherv_always_works_md = NULL; pami_algorithm_t *gatherv_must_query_algo = NULL; pami_metadata_t *gatherv_must_query_md = NULL; pami_xfer_type_t gatherv_xfer = PAMI_XFER_GATHERV_INT; volatile unsigned gatherv_poll_flag = 0; int nalg = 0; double ti, tf, usec; pami_xfer_t barrier; pami_xfer_t gatherv; /* Process environment variables and setup globals */ setup_env(); assert(gNum_contexts > 0); context = (pami_context_t*)malloc(sizeof(pami_context_t) * gNum_contexts); /* Initialize PAMI */ int rc = pami_init(&client, /* Client */ context, /* Context */ NULL, /* Clientname=default */ &gNum_contexts, /* gNum_contexts */ NULL, /* null configuration */ 0, /* no configuration */ &task_id, /* task id */ &num_tasks); /* number of tasks */ if (rc == 1) return 1; /* Allocate buffer(s) */ int err = 0; void* buf = NULL; err = posix_memalign(&buf, 128, (gMax_byte_count * num_tasks) + gBuffer_offset); assert(err == 0); buf = (char*)buf + gBuffer_offset; void* rbuf = NULL; err = posix_memalign(&rbuf, 128, (gMax_byte_count * num_tasks) + gBuffer_offset); assert(err == 0); rbuf = (char*)rbuf + gBuffer_offset; int *lengths = (int*)malloc(num_tasks * sizeof(int)); assert(lengths); int *displs = (int*)malloc(num_tasks * sizeof(int)); assert(displs); unsigned iContext = 0; for (; iContext < gNum_contexts; ++iContext) { if (task_id == root_zero) printf("# Context: %u\n", iContext); /* Query the world geometry for barrier algorithms */ rc |= query_geometry_world(client, context[iContext], &world_geometry, barrier_xfer, barrier_num_algorithm, &bar_always_works_algo, &bar_always_works_md, &bar_must_query_algo, &bar_must_query_md); if (rc == 1) return 1; /* Query the world geometry for gatherv algorithms */ rc |= query_geometry_world(client, context[iContext], &world_geometry, gatherv_xfer, gatherv_num_algorithm, &gatherv_always_works_algo, &gatherv_always_works_md, &gatherv_must_query_algo, &gatherv_must_query_md); if (rc == 1) return 1; barrier.cb_done = cb_done; barrier.cookie = (void*) & bar_poll_flag; barrier.algorithm = bar_always_works_algo[0]; blocking_coll(context[iContext], &barrier, &bar_poll_flag); for (nalg = 0; nalg < gatherv_num_algorithm[0]; nalg++) { root_zero = 0; gatherv.cb_done = cb_done; gatherv.cookie = (void*) & gatherv_poll_flag; gatherv.algorithm = gatherv_always_works_algo[nalg]; gatherv.cmd.xfer_gatherv_int.sndbuf = buf; gatherv.cmd.xfer_gatherv_int.stype = PAMI_TYPE_BYTE; gatherv.cmd.xfer_gatherv_int.stypecount = 0; gatherv.cmd.xfer_gatherv_int.rcvbuf = rbuf; gatherv.cmd.xfer_gatherv_int.rtype = PAMI_TYPE_BYTE; gatherv.cmd.xfer_gatherv_int.rtypecounts = lengths; gatherv.cmd.xfer_gatherv_int.rdispls = displs; gProtocolName = gatherv_always_works_md[nalg].name; if (task_id == root_zero) { printf("# Gatherv_int Bandwidth Test(size:%zu) -- context = %d, protocol: %s\n",num_tasks, iContext, gProtocolName); printf("# Size(bytes) iterations bytes/sec usec\n"); printf("# ----------- ----------- ----------- ---------\n"); } if (((strstr(gatherv_always_works_md[nalg].name,gSelected) == NULL) && gSelector) || ((strstr(gatherv_always_works_md[nalg].name,gSelected) != NULL) && !gSelector)) continue; size_t i, j; for (i = gMin_byte_count; i <= gMax_byte_count; i *= 2) { size_t dataSent = i; int niter; size_t k = 0; for (k = 0; k < num_tasks; k++) { lengths[k] = i; displs[k] = k * i; } lengths[k-1] = 0; if (dataSent < CUTOFF) niter = gNiterlat; else niter = NITERBW; blocking_coll(context[iContext], &barrier, &bar_poll_flag); ti = timer(); for (j = 0; j < niter; j++) { root_zero = (root_zero + num_tasks - 1) % num_tasks; pami_endpoint_t root_ep; PAMI_Endpoint_create(client, root_zero, 0, &root_ep); gatherv.cmd.xfer_gatherv_int.root = root_ep; gather_initialize_sndbuf(task_id, buf, i); if (task_id == root_zero) memset(rbuf, 0xFF, i*num_tasks); if (task_id != num_tasks - 1) gatherv.cmd.xfer_gatherv_int.stypecount = i; blocking_coll(context[iContext], &gatherv, &gatherv_poll_flag); if (task_id == root_zero) { int rc_check; rc |= rc_check = gather_check_rcvbuf(num_tasks-1, rbuf, i); if (rc_check) fprintf(stderr, "%s FAILED validation\n", gProtocolName); } } tf = timer(); blocking_coll(context[iContext], &barrier, &bar_poll_flag); usec = (tf - ti) / (double)niter; if (task_id == root_zero) { printf(" %11lld %16d %14.1f %12.2f\n", (long long)dataSent, niter, (double)1e6*(double)dataSent / (double)usec, usec); fflush(stdout); } } } free(bar_always_works_algo); free(bar_always_works_md); free(bar_must_query_algo); free(bar_must_query_md); free(gatherv_always_works_algo); free(gatherv_always_works_md); free(gatherv_must_query_algo); free(gatherv_must_query_md); } /*for(unsigned iContext = 0; iContext < gNum_contexts; ++iContexts)*/ buf = (char*)buf - gBuffer_offset; free(buf); rbuf = (char*)rbuf - gBuffer_offset; free(rbuf); free(lengths); free(displs); rc |= pami_shutdown(&client, context, &gNum_contexts); return rc; }
int main (int argc, char ** argv) { pami_client_t client; pami_context_t context; size_t num_contexts = 1; pami_task_t task_id, task_zero=0;; size_t num_tasks; pami_geometry_t world_geometry; /* Barrier variables */ size_t barrier_num_algorithm[2]; pami_algorithm_t *bar_always_works_algo = NULL; pami_metadata_t *bar_always_works_md = NULL; pami_algorithm_t *bar_must_query_algo = NULL; pami_metadata_t *bar_must_query_md = NULL; pami_xfer_type_t barrier_xfer = PAMI_XFER_BARRIER; volatile unsigned bar_poll_flag = 0; /* Allgatherv variables */ size_t allgatherv_num_algorithm[2]; pami_algorithm_t *next_algo = NULL; pami_metadata_t *next_md= NULL; pami_algorithm_t *allgatherv_always_works_algo = NULL; pami_metadata_t *allgatherv_always_works_md = NULL; pami_algorithm_t *allgatherv_must_query_algo = NULL; pami_metadata_t *allgatherv_must_query_md = NULL; pami_xfer_type_t allgatherv_xfer = PAMI_XFER_ALLGATHERV; volatile unsigned allgatherv_poll_flag = 0; int nalg= 0, total_alg; double ti, tf, usec; pami_xfer_t barrier; pami_xfer_t allgatherv; setup_env(); /* Initialize PAMI */ int rc = pami_init(&client, /* Client */ &context, /* Context */ NULL, /* Clientname=default */ &num_contexts, /* num_contexts */ NULL, /* null configuration */ 0, /* no configuration */ &task_id, /* task id */ &num_tasks); /* number of tasks */ if (rc == 1) return 1; /* Query the world geometry for barrier algorithms */ rc |= query_geometry_world(client, context, &world_geometry, barrier_xfer, barrier_num_algorithm, &bar_always_works_algo, &bar_always_works_md, &bar_must_query_algo, &bar_must_query_md); if (rc == 1) return 1; /* Query the world geometry for allgatherv algorithms */ rc |= query_geometry_world(client, context, &world_geometry, allgatherv_xfer, allgatherv_num_algorithm, &allgatherv_always_works_algo, &allgatherv_always_works_md, &allgatherv_must_query_algo, &allgatherv_must_query_md); if (rc == 1) return 1; /* Allocate buffer(s) */ int err = 0; void* buf = NULL; err = posix_memalign(&buf, 128, (gMax_byte_count * num_tasks) + gBuffer_offset); assert(err == 0); buf = (char*)buf + gBuffer_offset; void* rbuf = NULL; err = posix_memalign(&rbuf, 128, (gMax_byte_count * num_tasks) + gBuffer_offset); assert(err == 0); rbuf = (char*)rbuf + gBuffer_offset; size_t *lengths = (size_t*)malloc(num_tasks * sizeof(size_t)); size_t *displs = (size_t*)malloc(num_tasks * sizeof(size_t)); barrier.cb_done = cb_done; barrier.cookie = (void*) & bar_poll_flag; barrier.algorithm = bar_always_works_algo[0]; blocking_coll(context, &barrier, &bar_poll_flag); { total_alg = allgatherv_num_algorithm[0]+allgatherv_num_algorithm[1]; for (nalg = 0; nalg < total_alg; nalg++) { metadata_result_t result = {0}; unsigned query_protocol; if(nalg < allgatherv_num_algorithm[0]) { query_protocol = 0; next_algo = &allgatherv_always_works_algo[nalg]; next_md = &allgatherv_always_works_md[nalg]; } else { query_protocol = 1; next_algo = &allgatherv_must_query_algo[nalg-allgatherv_num_algorithm[0]]; next_md = &allgatherv_must_query_md[nalg-allgatherv_num_algorithm[0]]; } allgatherv.cb_done = cb_done; allgatherv.cookie = (void*) & allgatherv_poll_flag; allgatherv.algorithm = *next_algo; allgatherv.cmd.xfer_allgatherv.sndbuf = buf; allgatherv.cmd.xfer_allgatherv.rcvbuf = rbuf; allgatherv.cmd.xfer_allgatherv.rtypecounts = lengths; allgatherv.cmd.xfer_allgatherv.rdispls = displs; gProtocolName = next_md->name; if (task_id == 0) { printf("# Allgatherv Bandwidth Test(size:%zu) -- protocol: %s, Metadata: range %zu <-> %zd, mask %#X\n",num_tasks, gProtocolName, next_md->range_lo,(ssize_t)next_md->range_hi, next_md->check_correct.bitmask_correct); printf("# Size(bytes) iterations bytes/sec usec\n"); printf("# ----------- ----------- ----------- ---------\n"); } if (((strstr(next_md->name, gSelected) == NULL) && gSelector) || ((strstr(next_md->name, gSelected) != NULL) && !gSelector)) continue; unsigned checkrequired = next_md->check_correct.values.checkrequired; /*must query every time */ assert(!checkrequired || next_md->check_fn); /* must have function if checkrequired. */ unsigned i, j, k; int dt,op=4/*SUM*/; for (dt = 0; dt < dt_count; dt++) { if ((gFull_test && ((dt != DT_NULL) && (dt != DT_BYTE))) || gValidTable[op][dt]) { if (task_id == task_zero) printf("Running Allgatherv: %s\n", dt_array_str[dt]); for (i = MAX(1,gMin_byte_count/get_type_size(dt_array[dt])); i <= gMax_byte_count/get_type_size(dt_array[dt]); i *= 2) { size_t dataSent = i * get_type_size(dt_array[dt]); int niter; if (dataSent < CUTOFF) niter = gNiterlat; else niter = NITERBW; for (k = 0; k < num_tasks; k++)lengths[k] = i; for (k = 0; k < num_tasks; k++)displs[k] = k*i; allgatherv.cmd.xfer_allgatherv.stypecount = i; allgatherv.cmd.xfer_allgatherv.stype = dt_array[dt]; allgatherv.cmd.xfer_allgatherv.rtype = dt_array[dt]; gather_initialize_sndbuf_dt (buf, i, task_id, dt); memset(rbuf, 0xFF, i); if(query_protocol) { size_t sz=get_type_size(dt_array[dt])*i; result = check_metadata(*next_md, allgatherv, dt_array[dt], sz, /* metadata uses bytes i, */ allgatherv.cmd.xfer_allgatherv.sndbuf, dt_array[dt], sz, allgatherv.cmd.xfer_allgatherv.rcvbuf); if (next_md->check_correct.values.nonlocal) { /* \note We currently ignore check_correct.values.nonlocal because these tests should not have nonlocal differences (so far). */ result.check.nonlocal = 0; } if (result.bitmask) continue; } blocking_coll(context, &barrier, &bar_poll_flag); ti = timer(); for (j = 0; j < niter; j++) { if (checkrequired) /* must query every time */ { result = next_md->check_fn(&allgatherv); if (result.bitmask) continue; } blocking_coll(context, &allgatherv, &allgatherv_poll_flag); } tf = timer(); blocking_coll(context, &barrier, &bar_poll_flag); int rc_check; rc |= rc_check = gather_check_rcvbuf_dt (num_tasks, rbuf, i, dt); if (rc_check) fprintf(stderr, "%s FAILED validation on %s\n", gProtocolName, dt_array_str[dt]); usec = (tf - ti) / (double)niter; if (task_id == 0) { printf(" %11lld %16d %14.1f %12.2f\n", (long long)dataSent, niter, (double)1e6*(double)dataSent / (double)usec, usec); fflush(stdout); } } } } } } rc |= pami_shutdown(&client, &context, &num_contexts); free(bar_always_works_algo); free(bar_always_works_md); free(bar_must_query_algo); free(bar_must_query_md); free(allgatherv_always_works_algo); free(allgatherv_always_works_md); free(allgatherv_must_query_algo); free(allgatherv_must_query_md); return rc; };
int main(int argc, char*argv[]) { pami_client_t client; pami_context_t *context; pami_geometry_t world_geometry; pami_task_t root_task = 0; /* Barrier variables */ size_t barrier_num_algorithm[2]; pami_algorithm_t *bar_always_works_algo = NULL; pami_metadata_t *bar_always_works_md = NULL; pami_algorithm_t *bar_must_query_algo = NULL; pami_metadata_t *bar_must_query_md = NULL; pami_xfer_type_t barrier_xfer = PAMI_XFER_BARRIER; pami_xfer_t barrier; volatile unsigned bar_poll_flag = 0; /* Amscatter variables */ size_t amscatter_num_algorithm[2]; pami_algorithm_t *amscatter_always_works_algo = NULL; pami_metadata_t *amscatter_always_works_md = NULL; pami_algorithm_t *amscatter_must_query_algo = NULL; pami_metadata_t *amscatter_must_query_md = NULL; pami_xfer_type_t amscatter_xfer = PAMI_XFER_AMSCATTER; pami_xfer_t amscatter; volatile unsigned amscatter_total_count = 0; int nalg = 0, i; double ti, tf, usec; /* Process environment variables and setup globals */ setup_env(); assert(gNum_contexts > 0); context = (pami_context_t*)malloc(sizeof(pami_context_t) * gNum_contexts); /* \note Test environment variable" TEST_ROOT=N, defaults to 0.*/ char* sRoot = getenv("TEST_ROOT"); /* Override ROOT */ if (sRoot) root_task = (pami_task_t) atoi(sRoot); /* Initialize PAMI */ int rc = pami_init(&client, /* Client */ context, /* Context */ NULL, /* Clientname=default */ &gNum_contexts, /* gNum_contexts */ NULL, /* null configuration */ 0, /* no configuration */ &my_task_id, /* task id */ &num_tasks); /* number of tasks */ if (rc == 1) return 1; if (gNumRoots > num_tasks) gNumRoots = num_tasks; /* Allocate buffer(s) */ int err = 0; void *sbuf = NULL; err = posix_memalign(&sbuf, 128, (gMax_byte_count * num_tasks) + gBuffer_offset); assert(err == 0); sbuf = (char*)sbuf + gBuffer_offset; void* rbuf = NULL; err = posix_memalign(&rbuf, 128, gMax_byte_count + gBuffer_offset); assert(err == 0); rbuf = (char*)rbuf + gBuffer_offset; void *headers = NULL; err = posix_memalign((void **)&headers, 128, (num_tasks * sizeof(user_header_t)) + gBuffer_offset); headers = (char*)headers + gBuffer_offset; void *validation = NULL; err = posix_memalign((void **)&validation, 128, (num_tasks * sizeof(validation_t)) + gBuffer_offset); validation = (char*)validation + gBuffer_offset; /* Initialize the headers */ for(i = 0; i < num_tasks; ++i) { ((user_header_t *)headers)[i].dst_rank = i; } unsigned iContext = 0; for (; iContext < gNum_contexts; ++iContext) { if (my_task_id == 0) printf("# Context: %u\n", iContext); /* Query the world geometry for barrier algorithms */ rc |= query_geometry_world(client, context[iContext], &world_geometry, barrier_xfer, barrier_num_algorithm, &bar_always_works_algo, &bar_always_works_md, &bar_must_query_algo, &bar_must_query_md); if (rc == 1) return 1; /* Query the world geometry for amscatter algorithms */ rc |= query_geometry_world(client, context[iContext], &world_geometry, amscatter_xfer, amscatter_num_algorithm, &amscatter_always_works_algo, &amscatter_always_works_md, &amscatter_must_query_algo, &amscatter_must_query_md); if (rc == 1) return 1; _g_recv_buffer = rbuf; _g_send_buffer = sbuf; _g_val_buffer = validation; barrier.cb_done = cb_done; barrier.cookie = (void*) & bar_poll_flag; barrier.algorithm = bar_always_works_algo[0]; blocking_coll(context[iContext], &barrier, &bar_poll_flag); amscatter.algorithm = amscatter_always_works_algo[0]; amscatter.cmd.xfer_amscatter.headers = headers; amscatter.cmd.xfer_amscatter.headerlen = sizeof(user_header_t); amscatter.cmd.xfer_amscatter.sndbuf = sbuf; amscatter.cmd.xfer_amscatter.stype = PAMI_TYPE_BYTE; amscatter.cmd.xfer_amscatter.stypecount = 0; for (nalg = 0; nalg < amscatter_num_algorithm[0]; nalg++) { gProtocolName = amscatter_always_works_md[nalg].name; if (my_task_id == root_task) { printf("# AMScatter Bandwidth Test(size:%zu) -- context = %d, root = %d, protocol: %s\n",num_tasks, iContext, root_task, amscatter_always_works_md[nalg].name); printf("# Size(bytes) iterations bytes/sec usec\n"); printf("# ----------- ----------- ----------- ---------\n"); fflush(stdout); } if (((strstr(amscatter_always_works_md[nalg].name,gSelected) == NULL) && gSelector) || ((strstr(amscatter_always_works_md[nalg].name,gSelected) != NULL) && !gSelector)) continue; int j; pami_collective_hint_t h = {0}; pami_dispatch_callback_function fn; lgContext = context[iContext]; fn.amscatter = cb_amscatter_recv; PAMI_AMCollective_dispatch_set(context[iContext], amscatter_always_works_algo[nalg], root_task,/* Set the dispatch id, can be any arbitrary value */ fn, (void*) &amscatter_total_count, h); amscatter.cmd.xfer_amscatter.dispatch = root_task; amscatter.algorithm = amscatter_always_works_algo[nalg]; volatile unsigned *nscatter = &amscatter_total_count; for (i = gMin_byte_count; i <= gMax_byte_count; i *= 2) { size_t dataSent = i; int niter; pami_result_t result; if (dataSent < CUTOFF) niter = gNiterlat; else niter = NITERBW; *nscatter = 0; memset(rbuf, 0xFF, i); scatter_initialize_sndbuf (sbuf, i, num_tasks); blocking_coll(context[iContext], &barrier, &bar_poll_flag); ti = timer(); for (j = 0; j < niter; j++) { root_task = (root_task + num_tasks - 1) % num_tasks; if (my_task_id == root_task) { amscatter.cmd.xfer_amscatter.stypecount = i; result = PAMI_Collective(context[iContext], &amscatter); if (result != PAMI_SUCCESS) { fprintf (stderr, "Error. Unable to issue collective. result = %d\n", result); return 1; } } while (*nscatter <= j) result = PAMI_Context_advance (context[iContext], 1); rc |= _gRc; /* validation return code done in cb_amscatter_done */ } assert(*nscatter == niter); tf = timer(); blocking_coll(context[iContext], &barrier, &bar_poll_flag); usec = (tf - ti) / (double)niter; if(my_task_id == root_task) { printf(" %11lld %16d %14.1f %12.2f\n", (long long)dataSent, niter, (double)1e6*(double)dataSent / (double)usec, usec); fflush(stdout); } } lgContext = NULL; } free(bar_always_works_algo); free(bar_always_works_md); free(bar_must_query_algo); free(bar_must_query_md); free(amscatter_always_works_algo); free(amscatter_always_works_md); free(amscatter_must_query_algo); free(amscatter_must_query_md); } /*for(unsigned iContext = 0; iContext < gNum_contexts; ++iContexts)*/ sbuf = (char*)sbuf - gBuffer_offset; free(sbuf); rbuf = (char*)rbuf - gBuffer_offset; free(rbuf); headers = (char*)headers - gBuffer_offset; free(headers); validation = (char*)validation - gBuffer_offset; free(validation); rc |= pami_shutdown(&client, context, &gNum_contexts); return rc; }
int main(int argc, char*argv[]) { pami_client_t client; pami_context_t *context; pami_task_t task_id, local_task_id=0, task_zero = 0; size_t num_tasks; pami_geometry_t world_geometry; /* Barrier variables */ size_t barrier_num_algorithm[2]; pami_algorithm_t *bar_always_works_algo = NULL; pami_metadata_t *bar_always_works_md = NULL; pami_algorithm_t *bar_must_query_algo = NULL; pami_metadata_t *bar_must_query_md = NULL; pami_xfer_type_t barrier_xfer = PAMI_XFER_BARRIER; volatile unsigned bar_poll_flag = 0; volatile unsigned newbar_poll_flag = 0; /* Alltoallv variables */ size_t alltoallv_num_algorithm[2]; pami_algorithm_t *alltoallv_always_works_algo = NULL; pami_metadata_t *alltoallv_always_works_md = NULL; pami_algorithm_t *alltoallv_must_query_algo = NULL; pami_metadata_t *alltoallv_must_query_md = NULL; pami_xfer_type_t alltoallv_xfer = PAMI_XFER_ALLTOALLV; volatile unsigned alltoallv_poll_flag = 0; int nalg = 0; double ti, tf, usec; pami_xfer_t barrier; pami_xfer_t alltoallv; /* Process environment variables and setup globals */ setup_env(); assert(gNum_contexts > 0); context = (pami_context_t*)malloc(sizeof(pami_context_t) * gNum_contexts); /* Initialize PAMI */ int rc = pami_init(&client, /* Client */ context, /* Context */ NULL, /* Clientname=default */ &gNum_contexts, /* gNum_contexts */ NULL, /* null configuration */ 0, /* no configuration */ &task_id, /* task id */ &num_tasks); /* number of tasks */ if (rc == 1) return 1; if (num_tasks == 1) { fprintf(stderr, "No subcomms on 1 node\n"); return 0; } assert(task_id >= 0); assert(task_id < num_tasks); /* Allocate buffer(s) */ int err = 0; void* sbuf = NULL; err = posix_memalign((void*) & sbuf, 128, (gMax_byte_count * num_tasks) + gBuffer_offset); assert(err == 0); sbuf = (char*)sbuf + gBuffer_offset; void* rbuf = NULL; err = posix_memalign((void*) & rbuf, 128, (gMax_byte_count * num_tasks) + gBuffer_offset); assert(err == 0); rbuf = (char*)rbuf + gBuffer_offset; sndlens = (size_t*) malloc(num_tasks * sizeof(size_t)); assert(sndlens); sdispls = (size_t*) malloc(num_tasks * sizeof(size_t)); assert(sdispls); rcvlens = (size_t*) malloc(num_tasks * sizeof(size_t)); assert(rcvlens); rdispls = (size_t*) malloc(num_tasks * sizeof(size_t)); assert(rdispls); /* Query the world geometry for barrier algorithms */ rc |= query_geometry_world(client, context[0], &world_geometry, barrier_xfer, barrier_num_algorithm, &bar_always_works_algo, &bar_always_works_md, &bar_must_query_algo, &bar_must_query_md); if (rc == 1) return 1; /* Set up world barrier */ barrier.cb_done = cb_done; barrier.cookie = (void*) & bar_poll_flag; barrier.algorithm = bar_always_works_algo[0]; /* Create the subgeometry */ pami_geometry_range_t *range; int rangecount; pami_geometry_t newgeometry; size_t newbar_num_algo[2]; pami_algorithm_t *newbar_algo = NULL; pami_metadata_t *newbar_md = NULL; pami_algorithm_t *q_newbar_algo = NULL; pami_metadata_t *q_newbar_md = NULL; pami_xfer_t newbarrier; size_t set[2]; int id; range = (pami_geometry_range_t *)malloc(((num_tasks + 1) / 2) * sizeof(pami_geometry_range_t)); int unused_non_task_zero[2]; get_split_method(&num_tasks, task_id, &rangecount, range, &local_task_id, set, &id, &task_zero,unused_non_task_zero); unsigned iContext = 0; for (; iContext < gNum_contexts; ++iContext) { if (task_id == task_zero) printf("# Context: %u\n", iContext); /* Delay task_zero tasks, and emulate that he's doing "other" message passing. This will cause the geometry_create request from other nodes to be unexpected when doing parentless geometries and won't affect parented. */ if (task_id == task_zero) { delayTest(1); unsigned ii = 0; for (; ii < gNum_contexts; ++ii) PAMI_Context_advance (context[ii], 1000); } rc |= create_and_query_geometry(client, context[0], context[iContext], gParentless ? PAMI_GEOMETRY_NULL : world_geometry, &newgeometry, range, rangecount, id + iContext, /* Unique id for each context */ barrier_xfer, newbar_num_algo, &newbar_algo, &newbar_md, &q_newbar_algo, &q_newbar_md); if (rc == 1) return 1; /* Query the sub geometry for alltoallv algorithms */ rc |= query_geometry(client, context[iContext], newgeometry, alltoallv_xfer, alltoallv_num_algorithm, &alltoallv_always_works_algo, &alltoallv_always_works_md, &alltoallv_must_query_algo, &alltoallv_must_query_md); if (rc == 1) return 1; /* Set up sub geometry barrier */ newbarrier.cb_done = cb_done; newbarrier.cookie = (void*) & newbar_poll_flag; newbarrier.algorithm = newbar_algo[0]; for (nalg = 0; nalg < alltoallv_num_algorithm[0]; nalg++) { alltoallv.cb_done = cb_done; alltoallv.cookie = (void*) & alltoallv_poll_flag; alltoallv.algorithm = alltoallv_always_works_algo[nalg]; alltoallv.cmd.xfer_alltoallv.sndbuf = sbuf; alltoallv.cmd.xfer_alltoallv.stype = PAMI_TYPE_BYTE; alltoallv.cmd.xfer_alltoallv.stypecounts = sndlens; alltoallv.cmd.xfer_alltoallv.sdispls = sdispls; alltoallv.cmd.xfer_alltoallv.rcvbuf = rbuf; alltoallv.cmd.xfer_alltoallv.rtype = PAMI_TYPE_BYTE; alltoallv.cmd.xfer_alltoallv.rtypecounts = rcvlens; alltoallv.cmd.xfer_alltoallv.rdispls = rdispls; int k; gProtocolName = alltoallv_always_works_md[nalg].name; for (k = 1; k >= 0; k--) { if (set[k]) { if (task_id == task_zero) { printf("# Alltoallv Bandwidth Test(size:%zu) -- context = %d, task_zero = %d, protocol: %s\n", num_tasks, iContext, task_zero, gProtocolName); printf("# Size(bytes) iterations bytes/sec usec\n"); printf("# ----------- ----------- ----------- ---------\n"); } if (((strstr(alltoallv_always_works_md[nalg].name, gSelected) == NULL) && gSelector) || ((strstr(alltoallv_always_works_md[nalg].name, gSelected) != NULL) && !gSelector)) continue; blocking_coll(context[iContext], &newbarrier, &newbar_poll_flag); int i, j; for (i = gMin_byte_count; i <= gMax_byte_count; i *= 2) { size_t dataSent = i; int niter; if (dataSent < CUTOFF) niter = gNiterlat; else niter = NITERBW; for (j = 0; j < num_tasks; j++) { sndlens[j] = rcvlens[j] = i; sdispls[j] = rdispls[j] = i * j; alltoallv_initialize_bufs(sbuf, rbuf, sndlens, rcvlens, sdispls, rdispls, j); } blocking_coll(context[iContext], &newbarrier, &newbar_poll_flag); /* Warmup */ blocking_coll(context[iContext], &alltoallv, &alltoallv_poll_flag); blocking_coll(context[iContext], &newbarrier, &newbar_poll_flag); ti = timer(); for (j = 0; j < niter; j++) { blocking_coll(context[iContext], &alltoallv, &alltoallv_poll_flag); } tf = timer(); blocking_coll(context[iContext], &newbarrier, &newbar_poll_flag); int rc_check; rc |= rc_check = alltoallv_check_rcvbuf(rbuf, rcvlens, rdispls, num_tasks, local_task_id); if (rc_check) fprintf(stderr, "%s FAILED validation\n", gProtocolName); usec = (tf - ti) / (double)niter; if (task_id == task_zero) { printf(" %11lld %16d %14.1f %12.2f\n", (long long)dataSent, niter, (double)1e6*(double)dataSent / (double)usec, usec); fflush(stdout); } } } blocking_coll(context[iContext], &newbarrier, &newbar_poll_flag); fflush(stderr); } } /* We aren't testing world barrier itself, so use context 0.*/ blocking_coll(context[0], &barrier, &bar_poll_flag); free(bar_always_works_algo); free(bar_always_works_md); free(bar_must_query_algo); free(bar_must_query_md); free(alltoallv_always_works_algo); free(alltoallv_always_works_md); free(alltoallv_must_query_algo); free(alltoallv_must_query_md); } /*for(unsigned iContext = 0; iContext < gNum_contexts; ++iContexts)*/ sbuf = (char*)sbuf - gBuffer_offset; free(sbuf); rbuf = (char*)rbuf - gBuffer_offset; free(rbuf); free(sndlens); free(sdispls); free(rcvlens); free(rdispls); rc |= pami_shutdown(&client, context, &gNum_contexts); return rc; }
int main(int argc, char **argv) #endif /* WIN32 */ { #ifdef WIN32 struct arg_param *p = (struct arg_param *)pv; int argc; char **argv; SERVICE_STATUS ss; #endif /* WIN32 */ char *name = NULL; struct tpp_config conf; int rpp_fd; char *pc; int numthreads; char lockfile[MAXPATHLEN + 1]; char path_log[MAXPATHLEN + 1]; char svr_home[MAXPATHLEN + 1]; char *log_file = 0; char *host; int port; char *routers = NULL; int c, i, rc; extern char *optarg; int are_primary; int num_var_env; #ifndef WIN32 struct sigaction act; struct sigaction oact; #endif #ifndef WIN32 /*the real deal or just pbs_version and exit*/ execution_mode(argc, argv); #endif /* As a security measure and to make sure all file descriptors */ /* are available to us, close all above stderr */ #ifdef WIN32 _fcloseall(); #else i = sysconf(_SC_OPEN_MAX); while (--i > 2) (void)close(i); /* close any file desc left open by parent */ #endif /* If we are not run with real and effective uid of 0, forget it */ #ifdef WIN32 argc = p->argc; argv = p->argv; ZeroMemory(&ss, sizeof(ss)); ss.dwCheckPoint = 0; ss.dwServiceType = SERVICE_WIN32_OWN_PROCESS; ss.dwCurrentState = g_dwCurrentState; ss.dwControlsAccepted = SERVICE_ACCEPT_STOP | SERVICE_ACCEPT_SHUTDOWN; ss.dwWaitHint = 6000; if (g_ssHandle != 0) SetServiceStatus(g_ssHandle, &ss); if (!isAdminPrivilege(getlogin())) { fprintf(stderr, "%s: Must be run by root\n", argv[0]); return (2); } #else if ((getuid() != 0) || (geteuid() != 0)) { fprintf(stderr, "%s: Must be run by root\n", argv[0]); return (2); } #endif /* WIN32 */ /* set standard umask */ #ifndef WIN32 umask(022); #endif /* load the pbs conf file */ if (pbs_loadconf(0) == 0) { fprintf(stderr, "%s: Configuration error\n", argv[0]); return (1); } umask(022); #ifdef WIN32 save_env(); #endif /* The following is code to reduce security risks */ /* start out with standard umask, system resource limit infinite */ if ((num_var_env = setup_env(pbs_conf.pbs_environment)) == -1) { #ifdef WIN32 g_dwCurrentState = SERVICE_STOPPED; ss.dwCurrentState = g_dwCurrentState; ss.dwWin32ExitCode = ERROR_INVALID_ENVIRONMENT; if (g_ssHandle != 0) SetServiceStatus(g_ssHandle, &ss); return (1); #else exit(1); #endif /* WIN32 */ } #ifndef WIN32 i = getgid(); (void)setgroups(1, (gid_t *)&i); /* secure suppl. groups */ #endif log_event_mask = &pbs_conf.pbs_comm_log_events; tpp_set_logmask(*log_event_mask); #ifdef WIN32 winsock_init(); #endif routers = pbs_conf.pbs_comm_routers; numthreads = pbs_conf.pbs_comm_threads; server_host[0] = '\0'; if (pbs_conf.pbs_comm_name) { name = pbs_conf.pbs_comm_name; host = tpp_parse_hostname(name, &port); if (host) snprintf(server_host, sizeof(server_host), "%s", host); free(host); host = NULL; } else if (pbs_conf.pbs_leaf_name) { char *endp; snprintf(server_host, sizeof(server_host), "%s", pbs_conf.pbs_leaf_name); endp = strchr(server_host, ','); /* find the first name */ if (endp) *endp = '\0'; endp = strchr(server_host, ':'); /* cut out the port */ if (endp) *endp = '\0'; name = server_host; } else { if (gethostname(server_host, (sizeof(server_host) - 1)) == -1) { #ifndef WIN32 sprintf(log_buffer, "Could not determine my hostname, errno=%d", errno); #else sprintf(log_buffer, "Could not determine my hostname, errno=%d", WSAGetLastError()); #endif fprintf(stderr, "%s\n", log_buffer); return (1); } if ((get_fullhostname(server_host, server_host, (sizeof(server_host) - 1)) == -1)) { sprintf(log_buffer, "Could not determine my hostname"); fprintf(stderr, "%s\n", log_buffer); return (1); } name = server_host; } if (server_host[0] == '\0') { sprintf(log_buffer, "Could not determine server host"); fprintf(stderr, "%s\n", log_buffer); return (1); } while ((c = getopt(argc, argv, "r:t:e:N")) != -1) { switch (c) { case 'e': *log_event_mask = strtol(optarg, NULL, 0); break; case 'r': routers = optarg; break; case 't': numthreads = atol(optarg); if (numthreads == -1) { usage(argv[0]); return (1); } break; case 'N': stalone = 1; break; default: usage(argv[0]); return (1); } } (void)strcpy(daemonname, "Comm@"); (void)strcat(daemonname, name); if ((pc = strchr(daemonname, (int)'.')) != NULL) *pc = '\0'; if(set_msgdaemonname(daemonname)) { fprintf(stderr, "Out of memory\n"); return 1; } (void) snprintf(path_log, sizeof(path_log), "%s/%s", pbs_conf.pbs_home_path, PBS_COMM_LOGDIR); #ifdef WIN32 /* * let SCM wait 10 seconds for log_open() to complete * as it does network interface query which can take time */ ss.dwCheckPoint++; ss.dwWaitHint = 60000; if (g_ssHandle != 0) SetServiceStatus(g_ssHandle, &ss); #endif (void) log_open(log_file, path_log); /* set pbs_comm's process limits */ set_limits(); /* set_limits can call log_record, so call only after opening log file */ /* set tcp function pointers */ set_tpp_funcs(log_tppmsg); (void) snprintf(svr_home, sizeof(svr_home), "%s/%s", pbs_conf.pbs_home_path, PBS_SVR_PRIVATE); if (chdir(svr_home) != 0) { (void) sprintf(log_buffer, msg_init_chdir, svr_home); log_err(-1, __func__, log_buffer); return (1); } (void) sprintf(lockfile, "%s/%s/comm.lock", pbs_conf.pbs_home_path, PBS_SVR_PRIVATE); if ((are_primary = are_we_primary()) == FAILOVER_SECONDARY) { strcat(lockfile, ".secondary"); } else if (are_primary == FAILOVER_CONFIG_ERROR) { sprintf(log_buffer, "Failover configuration error"); log_err(-1, __func__, log_buffer); #ifdef WIN32 g_dwCurrentState = SERVICE_STOPPED; ss.dwCurrentState = g_dwCurrentState; ss.dwWin32ExitCode = ERROR_SERVICE_NOT_ACTIVE; if (g_ssHandle != 0) SetServiceStatus(g_ssHandle, &ss); #endif return (3); } if ((lockfds = open(lockfile, O_CREAT | O_WRONLY, 0600)) < 0) { (void) sprintf(log_buffer, "pbs_comm: unable to open lock file"); log_err(errno, __func__, log_buffer); return (1); } if ((host = tpp_parse_hostname(name, &port)) == NULL) { sprintf(log_buffer, "Out of memory parsing leaf name"); log_err(errno, __func__, log_buffer); return (1); } rc = 0; if (pbs_conf.auth_method == AUTH_RESV_PORT) { rc = set_tpp_config(&pbs_conf, &conf, host, port, routers, pbs_conf.pbs_use_compression, TPP_AUTH_RESV_PORT, NULL, NULL); } else { /* for all non-resv-port based authentication use a callback from TPP */ rc = set_tpp_config(&pbs_conf, &conf, host, port, routers, pbs_conf.pbs_use_compression, TPP_AUTH_EXTERNAL, get_ext_auth_data, validate_ext_auth_data); } if (rc == -1) { (void) sprintf(log_buffer, "Error setting TPP config"); log_err(-1, __func__, log_buffer); return (1); } free(host); i = 0; if (conf.routers) { while (conf.routers[i]) { sprintf(log_buffer, "Router[%d]:%s", i, conf.routers[i]); fprintf(stdout, "%s\n", log_buffer); log_event(PBSEVENT_SYSTEM | PBSEVENT_FORCE, PBS_EVENTCLASS_SERVER, LOG_INFO, msg_daemonname, log_buffer); i++; } } #ifndef DEBUG #ifndef WIN32 if (stalone != 1) go_to_background(); #endif #endif #ifdef WIN32 ss.dwCheckPoint = 0; g_dwCurrentState = SERVICE_RUNNING; ss.dwCurrentState = g_dwCurrentState; if (g_ssHandle != 0) SetServiceStatus(g_ssHandle, &ss); #endif if (already_forked == 0) lock_out(lockfds, F_WRLCK); /* go_to_backgroud call creates a forked process, * thus print/log pid only after go_to_background() * has been called */ sprintf(log_buffer, "%s ready (pid=%d), Proxy Name:%s, Threads:%d", argv[0], getpid(), conf.node_name, numthreads); fprintf(stdout, "%s\n", log_buffer); log_event(PBSEVENT_SYSTEM | PBSEVENT_FORCE, PBS_EVENTCLASS_SERVER, LOG_INFO, msg_daemonname, log_buffer); #ifndef DEBUG pbs_close_stdfiles(); #endif #ifdef WIN32 signal(SIGINT, stop_me); signal(SIGTERM, stop_me); #else sigemptyset(&act.sa_mask); act.sa_flags = 0; act.sa_handler = hup_me; if (sigaction(SIGHUP, &act, &oact) != 0) { log_err(errno, __func__, "sigaction for HUP"); return (2); } act.sa_handler = stop_me; if (sigaction(SIGINT, &act, &oact) != 0) { log_err(errno, __func__, "sigaction for INT"); return (2); } if (sigaction(SIGTERM, &act, &oact) != 0) { log_err(errno, __func__, "sigactin for TERM"); return (2); } if (sigaction(SIGQUIT, &act, &oact) != 0) { log_err(errno, __func__, "sigactin for QUIT"); return (2); } #ifdef SIGSHUTDN if (sigaction(SIGSHUTDN, &act, &oact) != 0) { log_err(errno, __func__, "sigactin for SHUTDN"); return (2); } #endif /* SIGSHUTDN */ act.sa_handler = SIG_IGN; if (sigaction(SIGPIPE, &act, &oact) != 0) { log_err(errno, __func__, "sigaction for PIPE"); return (2); } if (sigaction(SIGUSR1, &act, &oact) != 0) { log_err(errno, __func__, "sigaction for USR1"); return (2); } if (sigaction(SIGUSR2, &act, &oact) != 0) { log_err(errno, __func__, "sigaction for USR2"); return (2); } #endif /* WIN32 */ conf.node_type = TPP_ROUTER_NODE; conf.numthreads = numthreads; if ((rpp_fd = tpp_init_router(&conf)) == -1) { log_err(-1, __func__, "tpp init failed\n"); return 1; } /* Protect from being killed by kernel */ daemon_protect(0, PBS_DAEMON_PROTECT_ON); /* go in a while loop */ while (get_out == 0) { if (hupped == 1) { struct pbs_config pbs_conf_bak; int new_logevent; hupped = 0; /* reset back */ memcpy(&pbs_conf_bak, &pbs_conf, sizeof(struct pbs_config)); if (pbs_loadconf(1) == 0) { log_tppmsg(LOG_CRIT, NULL, "Configuration error, ignoring"); memcpy(&pbs_conf, &pbs_conf_bak, sizeof(struct pbs_config)); } else { /* restore old pbs.conf */ new_logevent = pbs_conf.pbs_comm_log_events; memcpy(&pbs_conf, &pbs_conf_bak, sizeof(struct pbs_config)); pbs_conf.pbs_comm_log_events = new_logevent; log_tppmsg(LOG_INFO, NULL, "Processed SIGHUP"); log_event_mask = &pbs_conf.pbs_comm_log_events; tpp_set_logmask(*log_event_mask); } } sleep(3); } tpp_router_shutdown(); log_event(PBSEVENT_SYSTEM | PBSEVENT_FORCE, PBS_EVENTCLASS_SERVER, LOG_NOTICE, msg_daemonname, "Exiting"); log_close(1); lock_out(lockfds, F_UNLCK); /* unlock */ (void)close(lockfds); (void)unlink(lockfile); return 0; }
int srun(int ac, char **av) { int debug_level; env_t *env = xmalloc(sizeof(env_t)); log_options_t logopt = LOG_OPTS_STDERR_ONLY; bool got_alloc = false; slurm_step_io_fds_t cio_fds = SLURM_STEP_IO_FDS_INITIALIZER; slurm_step_launch_callbacks_t step_callbacks; env->stepid = -1; env->procid = -1; env->localid = -1; env->nodeid = -1; env->cli = NULL; env->env = NULL; env->ckpt_dir = NULL; slurm_conf_init(NULL); debug_level = _slurm_debug_env_val(); logopt.stderr_level += debug_level; log_init(xbasename(av[0]), logopt, 0, NULL); _set_exit_code(); if (slurm_select_init(1) != SLURM_SUCCESS ) fatal( "failed to initialize node selection plugin" ); if (switch_init() != SLURM_SUCCESS ) fatal("failed to initialize switch plugin"); init_srun(ac, av, &logopt, debug_level, 1); create_srun_job(&job, &got_alloc, 0, 1); /* * Enhance environment for job */ if (opt.bcast_flag) _file_bcast(); if (opt.cpus_set) env->cpus_per_task = opt.cpus_per_task; if (opt.ntasks_per_node != NO_VAL) env->ntasks_per_node = opt.ntasks_per_node; if (opt.ntasks_per_socket != NO_VAL) env->ntasks_per_socket = opt.ntasks_per_socket; if (opt.ntasks_per_core != NO_VAL) env->ntasks_per_core = opt.ntasks_per_core; env->distribution = opt.distribution; if (opt.plane_size != NO_VAL) env->plane_size = opt.plane_size; env->cpu_bind_type = opt.cpu_bind_type; env->cpu_bind = opt.cpu_bind; env->cpu_freq_min = opt.cpu_freq_min; env->cpu_freq_max = opt.cpu_freq_max; env->cpu_freq_gov = opt.cpu_freq_gov; env->mem_bind_type = opt.mem_bind_type; env->mem_bind = opt.mem_bind; env->overcommit = opt.overcommit; env->slurmd_debug = opt.slurmd_debug; env->labelio = opt.labelio; env->comm_port = slurmctld_comm_addr.port; env->batch_flag = 0; if (opt.job_name) env->job_name = opt.job_name; if (job) { uint16_t *tasks = NULL; slurm_step_ctx_get(job->step_ctx, SLURM_STEP_CTX_TASKS, &tasks); env->select_jobinfo = job->select_jobinfo; env->nodelist = job->nodelist; env->partition = job->partition; /* If we didn't get the allocation don't overwrite the * previous info. */ if (got_alloc) env->nhosts = job->nhosts; env->ntasks = job->ntasks; env->task_count = _uint16_array_to_str(job->nhosts, tasks); env->jobid = job->jobid; env->stepid = job->stepid; env->account = job->account; env->qos = job->qos; env->resv_name = job->resv_name; } if (opt.pty && (set_winsize(job) < 0)) { error("Not using a pseudo-terminal, disregarding --pty option"); opt.pty = false; } if (opt.pty) { struct termios term; int fd = STDIN_FILENO; /* Save terminal settings for restore */ tcgetattr(fd, &termdefaults); tcgetattr(fd, &term); /* Set raw mode on local tty */ cfmakeraw(&term); /* Re-enable output processing such that debug() and * and error() work properly. */ term.c_oflag |= OPOST; tcsetattr(fd, TCSANOW, &term); atexit(&_pty_restore); block_sigwinch(); pty_thread_create(job); env->pty_port = job->pty_port; env->ws_col = job->ws_col; env->ws_row = job->ws_row; } setup_env(env, opt.preserve_env); xfree(env->task_count); xfree(env); _set_node_alias(); memset(&step_callbacks, 0, sizeof(step_callbacks)); step_callbacks.step_signal = launch_g_fwd_signal; /* re_launch: */ relaunch: pre_launch_srun_job(job, 0, 1); launch_common_set_stdio_fds(job, &cio_fds); if (!launch_g_step_launch(job, &cio_fds, &global_rc, &step_callbacks)) { if (launch_g_step_wait(job, got_alloc) == -1) goto relaunch; } fini_srun(job, got_alloc, &global_rc, 0); return (int)global_rc; }
static void _setup_one_job_env(opt_t *opt_local, srun_job_t *job, bool got_alloc) { env_t *env = xmalloc(sizeof(env_t)); uint16_t *tasks = NULL; xassert(job); env->localid = -1; env->nodeid = -1; env->procid = -1; env->stepid = -1; if (opt_local->bcast_flag) _file_bcast(opt_local, job); if (opt_local->cpus_set) env->cpus_per_task = opt_local->cpus_per_task; if (opt_local->ntasks_per_node != NO_VAL) env->ntasks_per_node = opt_local->ntasks_per_node; if (opt_local->ntasks_per_socket != NO_VAL) env->ntasks_per_socket = opt_local->ntasks_per_socket; if (opt_local->ntasks_per_core != NO_VAL) env->ntasks_per_core = opt_local->ntasks_per_core; env->distribution = opt_local->distribution; if (opt_local->plane_size != NO_VAL) env->plane_size = opt_local->plane_size; env->cpu_bind_type = opt_local->cpu_bind_type; env->cpu_bind = opt_local->cpu_bind; env->cpu_freq_min = opt_local->cpu_freq_min; env->cpu_freq_max = opt_local->cpu_freq_max; env->cpu_freq_gov = opt_local->cpu_freq_gov; env->mem_bind_type = opt_local->mem_bind_type; env->mem_bind = opt_local->mem_bind; env->overcommit = opt_local->overcommit; env->slurmd_debug = opt_local->slurmd_debug; env->labelio = opt_local->labelio; env->comm_port = slurmctld_comm_addr.port; if (opt_local->job_name) env->job_name = opt_local->job_name; slurm_step_ctx_get(job->step_ctx, SLURM_STEP_CTX_TASKS, &tasks); env->select_jobinfo = job->select_jobinfo; if (job->pack_node_list) env->nodelist = job->pack_node_list; else env->nodelist = job->nodelist; env->partition = job->partition; /* * If we didn't get the allocation don't overwrite the previous info. */ if (got_alloc) env->nhosts = job->nhosts; env->ntasks = job->ntasks; if (job->pack_ntasks != NO_VAL) env->ntasks = job->pack_ntasks; env->task_count = _uint16_array_to_str(job->nhosts, tasks); if (job->pack_jobid != NO_VAL) env->jobid = job->pack_jobid; else env->jobid = job->jobid; env->stepid = job->stepid; env->account = job->account; env->qos = job->qos; env->resv_name = job->resv_name; if (opt_local->pty && (set_winsize(job) < 0)) { error("Not using a pseudo-terminal, disregarding --pty option"); opt_local->pty = false; } if (opt_local->pty) { struct termios term; int fd = STDIN_FILENO; /* Save terminal settings for restore */ tcgetattr(fd, &termdefaults); tcgetattr(fd, &term); /* Set raw mode on local tty */ cfmakeraw(&term); /* Re-enable output processing such that debug() and * and error() work properly. */ term.c_oflag |= OPOST; tcsetattr(fd, TCSANOW, &term); atexit(&_pty_restore); block_sigwinch(); pty_thread_create(job); env->pty_port = job->pty_port; env->ws_col = job->ws_col; env->ws_row = job->ws_row; } env->env = env_array_copy((const char **) environ); setup_env(env, opt_local->preserve_env); job->env = env->env; xfree(env->task_count); xfree(env); }
int main(int argc, char*argv[]) { pami_client_t client; pami_context_t *context; pami_task_t task_id, root=0; size_t num_tasks; pami_geometry_t world_geometry; /* Barrier variables */ size_t barrier_num_algorithm[2]; pami_algorithm_t *bar_always_works_algo = NULL; pami_metadata_t *bar_always_works_md = NULL; pami_algorithm_t *bar_must_query_algo = NULL; pami_metadata_t *bar_must_query_md = NULL; pami_xfer_type_t barrier_xfer = PAMI_XFER_BARRIER; volatile unsigned bar_poll_flag = 0; /* Reduce variables */ size_t reduce_num_algorithm[2]; pami_algorithm_t *reduce_always_works_algo = NULL; pami_metadata_t *reduce_always_works_md = NULL; pami_algorithm_t *reduce_must_query_algo = NULL; pami_metadata_t *reduce_must_query_md = NULL; pami_xfer_type_t reduce_xfer = PAMI_XFER_REDUCE; volatile unsigned reduce_poll_flag = 0; int i, j, nalg = 0; double ti, tf, usec; pami_xfer_t barrier; pami_xfer_t reduce; pami_type_t pami_stype = 0; pami_type_t pami_rtype = 0; pami_result_t ret; /* Process environment variables and setup globals */ setup_env(); assert(gNum_contexts > 0); context = (pami_context_t*)malloc(sizeof(pami_context_t) * gNum_contexts); /* Allocate buffer(s) */ int err = 0; void* sbuf = NULL; err = posix_memalign(&sbuf, 128, gMax_byte_count + gBuffer_offset); assert(err == 0); sbuf = (char*)sbuf + gBuffer_offset; void* rbuf = NULL; err = posix_memalign(&rbuf, 128, gMax_byte_count + gBuffer_offset); assert(err == 0); rbuf = (char*)rbuf + gBuffer_offset; /* Initialize PAMI */ int rc = pami_init(&client, /* Client */ context, /* Context */ NULL, /* Clientname=default */ &gNum_contexts, /* gNum_contexts */ NULL, /* null configuration */ 0, /* no configuration */ &task_id, /* task id */ &num_tasks); /* number of tasks */ if (rc == 1) return 1; ret = PAMI_Type_create(&pami_stype); if(ret != PAMI_SUCCESS) return 1; ret = PAMI_Type_create(&pami_rtype); if(ret != PAMI_SUCCESS) return 1; PAMI_Type_add_typed(pami_stype, PAMI_TYPE_DOUBLE, 0, 1, sizeof(double)*2); PAMI_Type_add_typed(pami_rtype, PAMI_TYPE_DOUBLE, sizeof(double), 1, sizeof(double)); ret = PAMI_Type_complete(pami_stype, sizeof(double)); if(ret != PAMI_SUCCESS){ printf("Invalid atom size for stype\n"); return 1; } ret = PAMI_Type_complete(pami_rtype, sizeof(double)); if(ret != PAMI_SUCCESS){ printf("Invalid atom size for rtype\n"); return 1; } /* Query the world geometry for barrier algorithms */ rc |= query_geometry_world(client, context[0], &world_geometry, barrier_xfer, barrier_num_algorithm, &bar_always_works_algo, &bar_always_works_md, &bar_must_query_algo, &bar_must_query_md); if (rc == 1) return 1; barrier.cb_done = cb_done; barrier.cookie = (void*) & bar_poll_flag; barrier.algorithm = bar_always_works_algo[0]; unsigned iContext = 0; for (; iContext < gNum_contexts; ++iContext) { if (task_id == 0) printf("# Context: %u\n", iContext); /* Query the world geometry for reduce algorithms */ rc |= query_geometry_world(client, context[iContext], &world_geometry, reduce_xfer, reduce_num_algorithm, &reduce_always_works_algo, &reduce_always_works_md, &reduce_must_query_algo, &reduce_must_query_md); if (rc == 1) return 1; for (nalg = 0; nalg < reduce_num_algorithm[0]; nalg++) { if (task_id == 0) /* root not set yet */ { printf("# Reduce Bandwidth Test -- context = %d, root varies, protocol: %s\n", iContext, reduce_always_works_md[nalg].name); printf("# Size(bytes) cycles bytes/sec usec\n"); printf("# ----------- ----------- ----------- ---------\n"); } if (((strstr(reduce_always_works_md[nalg].name, gSelected) == NULL) && gSelector) || ((strstr(reduce_always_works_md[nalg].name, gSelected) != NULL) && !gSelector)) continue; gProtocolName = reduce_always_works_md[nalg].name; reduce.cb_done = cb_done; reduce.cookie = (void*) & reduce_poll_flag; reduce.algorithm = reduce_always_works_algo[nalg]; reduce.cmd.xfer_reduce.sndbuf = sbuf; reduce.cmd.xfer_reduce.rcvbuf = rbuf; reduce.cmd.xfer_reduce.rtype = PAMI_TYPE_BYTE; reduce.cmd.xfer_reduce.rtypecount = 0; if (task_id == 0) /* root not set yet */ printf("Running Reduce: Non-Contiguous datatype PAMI_DATA_SUM\n"); for (i = 1; i <= gMax_byte_count/(sizeof(double)*2); i *= 2) { size_t sz=sizeof(double)*2; size_t dataSent = i * sz; int niter; if (dataSent < CUTOFF) niter = gNiterlat; else niter = NITERBW; reduce.cmd.xfer_reduce.stypecount = i; reduce.cmd.xfer_reduce.rtypecount = i; reduce.cmd.xfer_reduce.stype = pami_stype; reduce.cmd.xfer_reduce.rtype = pami_rtype; reduce.cmd.xfer_reduce.op = PAMI_DATA_SUM; initialize_sndbuf (sbuf, i, task_id, num_tasks); memset(rbuf, 0xFF, i*2*sizeof(double)); /* We aren't testing barrier itself, so use context 0. */ blocking_coll(context[0], &barrier, &bar_poll_flag); ti = timer(); root = 0; for (j = 0; j < niter; j++) { pami_endpoint_t root_ep; PAMI_Endpoint_create(client, root, 0, &root_ep); reduce.cmd.xfer_reduce.root = root_ep; if (task_id == root) reduce.cmd.xfer_reduce.rcvbuf = rbuf; else reduce.cmd.xfer_reduce.rcvbuf = NULL; blocking_coll(context[iContext], &reduce, &reduce_poll_flag); root = (root + 1) % num_tasks; } tf = timer(); /* We aren't testing barrier itself, so use context 0. */ blocking_coll(context[0], &barrier, &bar_poll_flag); if(task_id < niter) /* only validate tasks which were roots in niter loop */ { int rc_check; rc |= rc_check = check_rcvbuf (rbuf, i, task_id, num_tasks); if (rc_check) fprintf(stderr, "%s FAILED validation\n", gProtocolName); } usec = (tf - ti) / (double)niter; if (task_id == root) { printf(" %11lld %16d %14.1f %12.2f\n", (long long)dataSent, niter, (double)1e6*(double)dataSent / (double)usec, usec); fflush(stdout); } if(!i)i++; } } free(reduce_always_works_algo); free(reduce_always_works_md); free(reduce_must_query_algo); free(reduce_must_query_md); } /*for(unsigned iContext = 0; iContext < gNum_contexts; ++iContexts)*/ free(bar_always_works_algo); free(bar_always_works_md); free(bar_must_query_algo); free(bar_must_query_md); sbuf = (char*)sbuf - gBuffer_offset; free(sbuf); rbuf = (char*)rbuf - gBuffer_offset; free(rbuf); rc |= pami_shutdown(&client, context, &gNum_contexts); return rc; }
/* * Current process is running as the user when this is called. */ void exec_task(slurmd_job_t *job, int i) { uint32_t *gtids; /* pointer to arrary of ranks */ int fd, j; slurmd_task_info_t *task = job->task[i]; char **tmp_env; if (i == 0) _make_tmpdir(job); gtids = xmalloc(job->node_tasks * sizeof(uint32_t)); for (j = 0; j < job->node_tasks; j++) gtids[j] = job->task[j]->gtid; job->envtp->sgtids = _uint32_array_to_str(job->node_tasks, gtids); xfree(gtids); job->envtp->jobid = job->jobid; job->envtp->stepid = job->stepid; job->envtp->nodeid = job->nodeid; job->envtp->cpus_on_node = job->cpus; job->envtp->procid = task->gtid; job->envtp->localid = task->id; job->envtp->task_pid = getpid(); job->envtp->distribution = job->task_dist; job->envtp->cpu_bind = xstrdup(job->cpu_bind); job->envtp->cpu_bind_type = job->cpu_bind_type; job->envtp->cpu_freq = job->cpu_freq; job->envtp->mem_bind = xstrdup(job->mem_bind); job->envtp->mem_bind_type = job->mem_bind_type; job->envtp->distribution = -1; job->envtp->ckpt_dir = xstrdup(job->ckpt_dir); job->envtp->batch_flag = job->batch; /* Modify copy of job's environment. Do not alter in place or * concurrent searches of the environment can generate invalid memory * references. */ job->envtp->env = env_array_copy((const char **) job->env); setup_env(job->envtp, false); setenvf(&job->envtp->env, "SLURMD_NODENAME", "%s", conf->node_name); tmp_env = job->env; job->env = job->envtp->env; env_array_free(tmp_env); job->envtp->env = NULL; xfree(job->envtp->task_count); if (task->argv[0] && *task->argv[0] != '/') { /* * Normally the client (srun) expands the command name * to a fully qualified path, but in --multi-prog mode it * is left up to the server to search the PATH for the * executable. */ task->argv[0] = _build_path(task->argv[0], job->env); } if (!job->batch) { if (interconnect_attach(job->switch_job, &job->env, job->nodeid, (uint32_t) i, job->nnodes, job->ntasks, task->gtid) < 0) { error("Unable to attach to interconnect: %m"); log_fini(); exit(1); } if (_setup_mpi(job, i) != SLURM_SUCCESS) { error("Unable to configure MPI plugin: %m"); log_fini(); exit(1); } } /* task-specific pre-launch activities */ if (spank_user_task (job, i) < 0) { error ("Failed to invoke task plugin stack"); exit (1); } /* task plugin hook */ if (pre_launch(job)) { error ("Failed task affinity setup"); exit (1); } if (conf->task_prolog) { char *my_prolog; slurm_mutex_lock(&conf->config_mutex); my_prolog = xstrdup(conf->task_prolog); slurm_mutex_unlock(&conf->config_mutex); _run_script_and_set_env("slurm task_prolog", my_prolog, job); xfree(my_prolog); } if (job->task_prolog) { _run_script_and_set_env("user task_prolog", job->task_prolog, job); } if (!job->batch) pdebug_stop_current(job); if (job->env == NULL) { debug("job->env is NULL"); job->env = (char **)xmalloc(sizeof(char *)); job->env[0] = (char *)NULL; } if (job->restart_dir) { info("restart from %s", job->restart_dir); /* no return on success */ checkpoint_restart_task(job, job->restart_dir, task->gtid); error("Restart task failed: %m"); exit(errno); } if (task->argv[0] == NULL) { error("No executable program specified for this task"); exit(2); } /* Do this last so you don't worry too much about the users limits including the slurmstepd in with it. */ if (set_user_limits(job) < 0) { debug("Unable to set user limits"); log_fini(); exit(5); } execve(task->argv[0], task->argv, job->env); /* * print error message and clean up if execve() returns: */ if ((errno == ENOENT) && ((fd = open(task->argv[0], O_RDONLY)) >= 0)) { char buf[256], *eol; int sz; sz = read(fd, buf, sizeof(buf)); if ((sz >= 3) && (strncmp(buf, "#!", 2) == 0)) { eol = strchr(buf, '\n'); if (eol) eol[0] = '\0'; else buf[sizeof(buf)-1] = '\0'; error("execve(): bad interpreter(%s): %m", buf+2); exit(errno); } } error("execve(): %s: %m", task->argv[0]); exit(errno); }
int main(int argc, char*argv[]) { pami_client_t client; pami_context_t *context; pami_task_t task_id; size_t num_tasks; pami_geometry_t world_geometry; /* Barrier variables */ size_t barrier_num_algorithm[2]; pami_algorithm_t *bar_always_works_algo = NULL; pami_metadata_t *bar_always_works_md = NULL; pami_algorithm_t *bar_must_query_algo = NULL; pami_metadata_t *bar_must_query_md = NULL; pami_xfer_type_t barrier_xfer = PAMI_XFER_BARRIER; volatile unsigned bar_poll_flag = 0; /* Alltoallv variables */ size_t alltoallv_num_algorithm[2]; pami_algorithm_t *alltoallv_always_works_algo = NULL; pami_metadata_t *alltoallv_always_works_md = NULL; pami_algorithm_t *next_algo = NULL; pami_metadata_t *next_md= NULL; pami_algorithm_t *alltoallv_must_query_algo = NULL; pami_metadata_t *alltoallv_must_query_md = NULL; pami_xfer_type_t alltoallv_xfer = PAMI_XFER_ALLTOALLV; volatile unsigned alltoallv_poll_flag = 0; int nalg= 0, total_alg; double ti, tf, usec; pami_xfer_t barrier; pami_xfer_t alltoallv; pami_type_t pami_stype = 0; pami_type_t pami_rtype = 0; pami_result_t ret; /* Process environment variables and setup globals */ setup_env(); assert(gNum_contexts > 0); context = (pami_context_t*)malloc(sizeof(pami_context_t) * gNum_contexts); /* Initialize PAMI */ int rc = pami_init(&client, /* Client */ context, /* Context */ NULL, /* Clientname=default */ &gNum_contexts, /* gNum_contexts */ NULL, /* null configuration */ 0, /* no configuration */ &task_id, /* task id */ &num_tasks); /* number of tasks */ if (rc == 1) return 1; /* Allocate buffer(s) */ int err = 0; void* sbuf = NULL; err = posix_memalign((void*) & sbuf, 128, (gMax_byte_count * num_tasks) + gBuffer_offset); assert(err == 0); sbuf = (char*)sbuf + gBuffer_offset; void* rbuf = NULL; err = posix_memalign((void*) & rbuf, 128, (gMax_byte_count * num_tasks) + gBuffer_offset); assert(err == 0); rbuf = (char*)rbuf + gBuffer_offset; sndlens = (size_t*) malloc(num_tasks * sizeof(size_t)); assert(sndlens); sdispls = (size_t*) malloc(num_tasks * sizeof(size_t)); assert(sdispls); rcvlens = (size_t*) malloc(num_tasks * sizeof(size_t)); assert(rcvlens); rdispls = (size_t*) malloc(num_tasks * sizeof(size_t)); assert(rdispls); ret = PAMI_Type_create(&pami_stype); if(ret != PAMI_SUCCESS) return 1; ret = PAMI_Type_create(&pami_rtype); if(ret != PAMI_SUCCESS) return 1; PAMI_Type_add_simple(pami_stype, sizeof(double), 0, 1, sizeof(double)*2); PAMI_Type_add_simple(pami_rtype, sizeof(double), sizeof(double), 1, sizeof(double)); ret = PAMI_Type_complete(pami_stype, sizeof(double)); if(ret != PAMI_SUCCESS){ printf("Invalid atom size for stype\n"); return 1; } ret = PAMI_Type_complete(pami_rtype, sizeof(double)); if(ret != PAMI_SUCCESS){ printf("Invalid atom size for rtype\n"); return 1; } unsigned iContext = 0; for (; iContext < gNum_contexts; ++iContext) { if (task_id == 0) printf("# Context: %u\n", iContext); /* Query the world geometry for barrier algorithms */ rc |= query_geometry_world(client, context[iContext], &world_geometry, barrier_xfer, barrier_num_algorithm, &bar_always_works_algo, &bar_always_works_md, &bar_must_query_algo, &bar_must_query_md); if (rc == 1) return 1; /* Query the world geometry for alltoallv algorithms */ rc |= query_geometry_world(client, context[iContext], &world_geometry, alltoallv_xfer, alltoallv_num_algorithm, &alltoallv_always_works_algo, &alltoallv_always_works_md, &alltoallv_must_query_algo, &alltoallv_must_query_md); if (rc == 1) return 1; barrier.cb_done = cb_done; barrier.cookie = (void*) & bar_poll_flag; barrier.algorithm = bar_always_works_algo[0]; total_alg = alltoallv_num_algorithm[0]+alltoallv_num_algorithm[1]; for (nalg = 0; nalg < total_alg; nalg++) { metadata_result_t result = {0}; unsigned query_protocol; if(nalg < alltoallv_num_algorithm[0]) { query_protocol = 0; next_algo = &alltoallv_always_works_algo[nalg]; next_md = &alltoallv_always_works_md[nalg]; } else { query_protocol = 1; next_algo = &alltoallv_must_query_algo[nalg-alltoallv_num_algorithm[0]]; next_md = &alltoallv_must_query_md[nalg-alltoallv_num_algorithm[0]]; } gProtocolName = next_md->name; alltoallv.cb_done = cb_done; alltoallv.cookie = (void*) & alltoallv_poll_flag; alltoallv.algorithm = *next_algo; alltoallv.cmd.xfer_alltoallv.sndbuf = sbuf; alltoallv.cmd.xfer_alltoallv.stype = pami_stype; alltoallv.cmd.xfer_alltoallv.stypecounts = sndlens; alltoallv.cmd.xfer_alltoallv.sdispls = sdispls; alltoallv.cmd.xfer_alltoallv.rcvbuf = rbuf; alltoallv.cmd.xfer_alltoallv.rtype = pami_rtype; alltoallv.cmd.xfer_alltoallv.rtypecounts = rcvlens; alltoallv.cmd.xfer_alltoallv.rdispls = rdispls; gProtocolName = next_md->name; if (task_id == 0) { printf("# Alltoallv Bandwidth Test(size:%zu) -- context = %d, protocol: %s, Metadata: range %zu <-> %zd, mask %#X\n",num_tasks, iContext, gProtocolName, next_md->range_lo,(ssize_t)next_md->range_hi, next_md->check_correct.bitmask_correct); printf("# Size(bytes) iterations bytes/sec usec\n"); printf("# ----------- ----------- ----------- ---------\n"); } if (((strstr(next_md->name, gSelected) == NULL) && gSelector) || ((strstr(next_md->name, gSelected) != NULL) && !gSelector)) continue; int i, j; unsigned checkrequired = next_md->check_correct.values.checkrequired; /*must query every time */ assert(!checkrequired || next_md->check_fn); /* must have function if checkrequired. */ for (i = 0; i <= (gMax_byte_count/(sizeof(double)*2)); i *= 2) { size_t dataSent = i; int niter; if (dataSent < CUTOFF) niter = gNiterlat; else niter = NITERBW; for (j = 0; j < num_tasks; j++) { sndlens[j] = rcvlens[j] = i; sdispls[j] = rdispls[j] = i * j; initialize_sndbuf( j, (double*)sbuf, (double*)rbuf ); } if(query_protocol) { size_t sz=get_type_size(pami_stype)*i; size_t rsz=get_type_size(pami_rtype)*i; result = check_metadata(*next_md, alltoallv, pami_stype, sz, /* metadata uses bytes i, */ alltoallv.cmd.xfer_alltoallv.sndbuf, pami_rtype, rsz, alltoallv.cmd.xfer_alltoallv.rcvbuf); if (next_md->check_correct.values.nonlocal) { /* \note We currently ignore check_correct.values.nonlocal because these tests should not have nonlocal differences (so far). */ result.check.nonlocal = 0; } if (result.bitmask) { if(!i)i++; continue; } } blocking_coll(context[iContext], &barrier, &bar_poll_flag); ti = timer(); for (j = 0; j < niter; j++) { if (checkrequired) /* must query every time */ { result = next_md->check_fn(&alltoallv); if (result.bitmask) { if(!i)i++; continue; } } blocking_coll(context[iContext], &alltoallv, &alltoallv_poll_flag); } tf = timer(); blocking_coll(context[iContext], &barrier, &bar_poll_flag); int rc_check; rc |= rc_check = check_rcvbuf(num_tasks, task_id, (double*)rbuf, (double*)sbuf); if (rc_check) fprintf(stderr, "%s FAILED validation\n", gProtocolName); usec = (tf - ti) / (double)niter; if (task_id == 0) { printf(" %11lld %16d %14.1f %12.2f\n", (long long)dataSent, niter, (double)1e6*(double)dataSent / (double)usec, usec); fflush(stdout); } if(!i)i++; } } free(bar_always_works_algo); free(bar_always_works_md); free(bar_must_query_algo); free(bar_must_query_md); free(alltoallv_always_works_algo); free(alltoallv_always_works_md); free(alltoallv_must_query_algo); free(alltoallv_must_query_md); } /*for(unsigned iContext = 0; iContext < gNum_contexts; ++iContexts)*/ sbuf = (char*)sbuf - gBuffer_offset; free(sbuf); rbuf = (char*)rbuf - gBuffer_offset; free(rbuf); free(sndlens); free(sdispls); free(rcvlens); free(rdispls); rc |= pami_shutdown(&client, context, &gNum_contexts); return rc; }