static void libfabric_init_addrvec(int rx_ctx_cnt, int rx_ctx_bits) { struct gather_info* my_addr_info; void* addr_infos; char* addrs; char* tai; size_t my_addr_len; size_t addr_info_len; int i, j; // Assumes my_addr_len is the same on all nodes my_addr_len = 0; OFICHKRET(fi_getname(&ofi.ep->fid, NULL, &my_addr_len), -FI_ETOOSMALL); addr_info_len = sizeof(struct gather_info) + my_addr_len; my_addr_info = chpl_mem_alloc(addr_info_len, CHPL_RT_MD_COMM_UTIL, 0, 0); my_addr_info->node = chpl_nodeID; OFICHKERR(fi_getname(&ofi.ep->fid, &my_addr_info->info, &my_addr_len)); addr_infos = chpl_mem_allocMany(chpl_numNodes, addr_info_len, CHPL_RT_MD_COMM_PER_LOC_INFO, 0, 0); chpl_comm_ofi_oob_allgather(my_addr_info, addr_infos, addr_info_len); addrs = chpl_mem_allocMany(chpl_numNodes, my_addr_len, CHPL_RT_MD_COMM_PER_LOC_INFO, 0, 0); for (tai = addr_infos, i = 0; i < chpl_numNodes; i++) { struct gather_info* ai = (struct gather_info*) tai; assert(i >= 0); assert(i < chpl_numNodes); memcpy(addrs + ai->node * my_addr_len, ai->info, my_addr_len); tai += addr_info_len; } ofi.fi_addrs = chpl_mem_allocMany(chpl_numNodes, sizeof(ofi.fi_addrs[0]), CHPL_RT_MD_COMM_PER_LOC_INFO, 0, 0); OFICHKRET(fi_av_insert(ofi.av, addrs, chpl_numNodes, ofi.fi_addrs, 0, NULL), chpl_numNodes); ofi.rx_addrs = chpl_mem_allocMany(chpl_numNodes, sizeof(ofi.rx_addrs[0]), CHPL_RT_MD_COMM_PER_LOC_INFO, 0, 0); for (i = 0; i < chpl_numNodes; i++) { ofi.rx_addrs[i] = chpl_mem_allocMany(rx_ctx_cnt, sizeof(ofi.rx_addrs[i][0]), CHPL_RT_MD_COMM_PER_LOC_INFO, 0, 0); for (j = 0; j < rx_ctx_cnt; j++) { ofi.rx_addrs[i][j] = fi_rx_addr(ofi.fi_addrs[i], j, rx_ctx_bits); } } chpl_mem_free(my_addr_info, 0, 0); chpl_mem_free(addr_infos, 0, 0); chpl_mem_free(addrs, 0, 0); }
// Note that this function can be called in parallel and more notably it can be // called with non-monotonic pid's. e.g. this may be called with pid 27, and // then pid 2, so it has to ensure that the privatized array has at least pid+1 // elements. Be __very__ careful if you have to update it. void chpl_newPrivatizedClass(void* v, int64_t pid) { chpl_sync_lock(&privatizationSync); // initialize array to a default size if (chpl_privateObjects == NULL) { chpl_capPrivateObjects = 2*max(pid, 4); chpl_privateObjects = chpl_mem_allocMany(chpl_capPrivateObjects, sizeof(void *), CHPL_RT_MD_COMM_PRV_OBJ_ARRAY, 0, 0); } else { // if we're out of space, double (or more) the array size if (pid >= chpl_capPrivateObjects) { void** tmp; int64_t oldCap; oldCap = chpl_capPrivateObjects; chpl_capPrivateObjects = 2*max(pid, oldCap); tmp = chpl_mem_allocMany(chpl_capPrivateObjects, sizeof(void *), CHPL_RT_MD_COMM_PRV_OBJ_ARRAY, 0, 0); chpl_memcpy((void*)tmp, (void*)chpl_privateObjects, (oldCap)*sizeof(void*)); chpl_privateObjects = tmp; // purposely leak old copies of chpl_privateObject to avoid the need to // lock chpl_getPrivatizedClass; TODO: fix with lock free data structure } } chpl_privateObjects[pid] = v; chpl_sync_unlock(&privatizationSync); }
void chpl_newPrivatizedClass(void* v, int64_t pid) { // We need to lock around this operation so two calls in rapid succession // that pass the chpl_capPrivateObjects limit don't both try to create a new // array. If they do, one of the calls will be leaked and an invalid pointer // to be placed in the table. chpl_sync_lock(&privatizationSync); pid += 1; if (pid == 1) { chpl_capPrivateObjects = 8; // "private" means "node-private", so we can use the system allocator. chpl_privateObjects = chpl_mem_allocMany(chpl_capPrivateObjects, sizeof(void*), CHPL_RT_MD_COMM_PRIVATE_OBJECTS_ARRAY, 0, ""); } else { if (pid > chpl_capPrivateObjects) { void** tmp; chpl_capPrivateObjects *= 2; tmp = chpl_mem_allocMany(chpl_capPrivateObjects, sizeof(void*), CHPL_RT_MD_COMM_PRIVATE_OBJECTS_ARRAY, 0, ""); memcpy((void*)tmp, (void*)chpl_privateObjects, (pid-1)*sizeof(void*)); chpl_privateObjects = tmp; // purposely leak old copies of chpl_privateObject to avoid the need to // lock chpl_getPrivatizedClass; TODO: fix with lock free data structure } } chpl_privateObjects[pid-1] = v; chpl_sync_unlock(&privatizationSync); }
static char* chpl_launch_create_command(int argc, char* argv[], int32_t numLocales) { int i; int size; char baseCommand[256]; char* command; chpl_compute_real_binary_name(argv[0]); sprintf(baseCommand, "mpirun -np %d %s %s", numLocales, MPIRUN_XTRA_OPTS, chpl_get_real_binary_name()); size = strlen(MPIRUN_PATH) + 1 + strlen(baseCommand) + 1; for (i=1; i<argc; i++) { size += strlen(argv[i]) + 3; } command = chpl_mem_allocMany(size, sizeof(char), CHPL_RT_MD_COMMAND_BUFFER, -1, ""); sprintf(command, "%s/%s", MPIRUN_PATH, baseCommand); for (i=1; i<argc; i++) { strcat(command, " '"); strcat(command, argv[i]); strcat(command, "'"); } if (strlen(command)+1 > size) { chpl_internal_error("buffer overflow"); } return command; }
static inline void spawn(int locale, chpl_fn_int_t fid, void *arg, int32_t arg_size, int32_t arg_tid, aligned_t *ret) { qthread_debug(CHAPEL_CALLS, "[%d] begin: locale=%d, fid=%d, arg_size=%d\n", chpl_localeID, locale, fid, arg_size); spawn_wrapper_args_t *wargs; size_t const wargs_size = sizeof(spawn_wrapper_args_t) + arg_size; wargs = (spawn_wrapper_args_t *)chpl_mem_allocMany(1, wargs_size, CHPL_RT_MD_COMM_FORK_SEND_INFO, 0, 0); wargs->serial_state = chpl_task_getSerial(); wargs->fid = fid; wargs->arg_size = arg_size; memcpy(&(wargs->arg), arg, arg_size); if (chpl_localeID == locale) { int const rc = qthread_fork_copyargs(spawn_wrapper, wargs, wargs_size, ret); assert(QTHREAD_SUCCESS == rc); } else { int const rc = qthread_fork_remote(spawn_wrapper, wargs, ret, locale, wargs_size); assert(SPR_OK == rc); } chpl_mem_free(wargs, 0, NULL); qthread_debug(CHAPEL_CALLS, "[%d] end: locale=%d, fid=%d, arg_size=%d\n", chpl_localeID, locale, fid, arg_size); }
// // Broadcast the value of 'id'th entry in chpl_private_broadcast_table // on the calling locale onto every other locale. This is done to set // up global constants of simple scalar types (primarily). // void chpl_comm_broadcast_private(int id, int32_t size, int32_t tid) { int i; bcast_private_args_t *payload; PROFILE_INCR(profile_comm_broadcast_private,1); qthread_debug(CHAPEL_CALLS, "[%d] begin id=%d, size=%d, tid=%d\n", chpl_localeID, id, size, tid); payload = chpl_mem_allocMany(1, sizeof(bcast_private_args_t) + size, CHPL_RT_MD_COMM_PRIVATE_BROADCAST_DATA, 0, 0); payload->id = id; payload->size = size; memcpy(payload->data, chpl_private_broadcast_table[id], size); qthread_debug(CHAPEL_DETAILS, "[%d] payload={.id=%d; .size=%d; .data=?}\n", chpl_localeID, payload->id, payload->size); aligned_t rets[chpl_numLocales]; for (i = 0; i < chpl_numLocales; i++) { if (i != chpl_localeID) { qthread_fork_remote(bcast_private, payload, &rets[i], i, sizeof(bcast_private_args_t) + size); } } for (i = 0; i < chpl_numLocales; i++) { if (i != chpl_localeID) { qthread_readFF(&rets[i], &rets[i]); } } chpl_mem_free(payload,0,0); qthread_debug(CHAPEL_CALLS, "[%d] end id=%d, size=%d, tid=%d\n", chpl_localeID, id, size, tid); }
static char* chpl_launch_create_command(int argc, char* argv[], int32_t numLocales) { int i; int size; char baseCommand[256]; char* command; if (numLocales != 1) { chpl_error("dummy launcher only supports numLocales==1", 0, "<command-line>"); } chpl_compute_real_binary_name(argv[0]); sprintf(baseCommand, "%s", chpl_get_real_binary_name()); size = strlen(baseCommand) + 1; for (i=1; i<argc; i++) { size += strlen(argv[i]) + 3; } command = chpl_mem_allocMany(size, sizeof(char), CHPL_RT_MD_COMMAND_BUFFER, -1, ""); sprintf(command, "%s", baseCommand); for (i=1; i<argc; i++) { strcat(command, " '"); strcat(command, argv[i]); strcat(command, "'"); } if (strlen(command)+1 > size) { chpl_internal_error("buffer overflow"); } return command; }
static char* chpl_launch_create_command(int argc, char* argv[], int32_t numLocales) { int i; int size; char baseCommand[256]; char* command; FILE* llFile, *expectFile; char* projectString = getenv(launcherAccountEnvvar); char* basenamePtr = strrchr(argv[0], '/'); pid_t mypid; if (basenamePtr == NULL) { basenamePtr = argv[0]; } else { basenamePtr++; } chpl_compute_real_binary_name(argv[0]); #ifndef DEBUG_LAUNCH mypid = getpid(); #else mypid = 0; #endif sprintf(expectFilename, "%s%d", baseExpectFilename, (int)mypid); sprintf(llFilename, "%s%d", baseLLFilename, (int)mypid); llFile = fopen(llFilename, "w"); fprintf(llFile, "# @ wall_clock_limit = 00:10:00\n"); fprintf(llFile, "# @ job_type = parallel\n"); fprintf(llFile, "# @ node = %d\n", numLocales); fprintf(llFile, "# @ tasks_per_node = 1\n"); if (projectString && strlen(projectString) > 0) fprintf(llFile, "# @ class = %s\n", projectString); fprintf(llFile, "# @ output = out.$(jobid)\n"); fprintf(llFile, "# @ error = err.$(jobid)\n"); fprintf(llFile, "# @ queue\n"); fprintf(llFile, "\n"); fprintf(llFile, "%s", chpl_get_real_binary_name()); for (i=1; i<argc; i++) { fprintf(llFile, " '%s'", argv[i]); } fprintf(llFile, "\n"); fclose(llFile); sprintf(baseCommand, "llsubmit %s", llFilename); size = strlen(baseCommand) + 1; command = chpl_mem_allocMany(size, sizeof(char), CHPL_RT_MD_COMMAND_BUFFER, -1, ""); sprintf(command, "%s", baseCommand); if (strlen(command)+1 > size) { chpl_internal_error("buffer overflow"); } return command; }
// // Populate the argv array and return the number of arguments added. // Return the number of arguments populated. // int chpl_get_charset_env_args(char *argv[]) { // If any of the relevant character set environment variables // are set, replicate the state of all of them. This needs to // be done separately from the -E mechanism because Perl // launchers modify the character set environment, losing our // settings. // // Note that if we are setting these variables, and one or more // of them is empty, we must set it with explicitly empty // contents (e.g. LC_ALL= instead of -u LC_ALL) so that the // Chapel launch mechanism will not overwrite it. char *lang = getenv("LANG"); char *lc_all = getenv("LC_ALL"); char *lc_collate = getenv("LC_COLLATE"); if (!lang && !lc_all && !lc_collate) return 0; argv[0] = (char *)"env"; if (lang == NULL) lang = (char *)""; char *lang_buf = chpl_mem_allocMany(sizeof("LANG=") + strlen(lang), sizeof(char), CHPL_RT_MD_COMMAND_BUFFER, -1, 0); strcpy(lang_buf, "LANG="); strcat(lang_buf, lang); argv[1] = lang_buf; if (lc_all == NULL) lc_all = (char *)""; char *lc_all_buf = chpl_mem_allocMany(sizeof("LC_ALL=") + strlen(lc_all), sizeof(char), CHPL_RT_MD_COMMAND_BUFFER, -1, 0); strcpy(lc_all_buf, "LC_ALL="); strcat(lc_all_buf, lc_all); argv[2] = lc_all_buf; if (lc_collate == NULL) lc_collate = (char *)""; char *lc_collate_buf = chpl_mem_allocMany( sizeof("LC_COLLATE=") + strlen(lc_collate), sizeof(char), CHPL_RT_MD_COMMAND_BUFFER, -1, 0); strcpy(lc_collate_buf, "LC_COLLATE="); strcat(lc_collate_buf, lc_collate); argv[3] = lc_collate_buf; return charset_env_nargs; }
int chpl_launch(int argc, char* argv[], int32_t numLocales) { int len = strlen(WRAP_TO_STR(LAUNCH_PATH)) + strlen("gasnetrun_ibv") + 1; char *cmd = chpl_mem_allocMany(len, sizeof(char), CHPL_RT_MD_COMMAND_BUFFER, -1, 0); sprintf(cmd, "%sgasnetrun_ibv", WRAP_TO_STR(LAUNCH_PATH)); return chpl_launch_using_exec(cmd, chpl_launch_create_argv(cmd, argc, argv, numLocales), argv[0]); }
int chpl_launch(int argc, char* argv[], int32_t numLocales) { int len = strlen(CHPL_THIRD_PARTY) + strlen(WRAP_TO_STR(LAUNCH_PATH)) + strlen("amudprun") + 2; char *cmd = chpl_mem_allocMany(len, sizeof(char), CHPL_RT_MD_COMMAND_BUFFER, -1, 0); snprintf(cmd, len, "%s/%samudprun", CHPL_THIRD_PARTY, WRAP_TO_STR(LAUNCH_PATH)); return chpl_launch_using_exec(cmd, chpl_launch_create_argv(cmd, argc, argv, numLocales), argv[0]); }
void chpl_task_taskCall(chpl_fn_p fp, void* arg, size_t arg_size, c_sublocid_t subloc, int lineno, int32_t filename) { void *arg_copy = NULL; if (arg != NULL) { arg_copy = chpl_mem_allocMany(1, arg_size, CHPL_RT_MD_TASK_ARG, 0, 0); chpl_memcpy(arg_copy, arg, arg_size); } taskCallBody(fp, NULL, arg_copy, subloc, false, lineno, filename); }
void _chpl_gc_init(size_t heapsize) { char *heap1, *heap2; // allocate the from and to spaces heap1 = (char*)chpl_mem_allocMany(1, heapsize, CHPL_RT_MD_GC_HEAP, 1, ""); heap2 = (char*)chpl_mem_allocMany(1, heapsize, CHPL_RT_MD_GC_HEAP, 1, ""); // allocate structs to point into the spaces _from_space = (_memory_space*)chpl_mem_allocMany(1, sizeof(_memory_space), CHPL_RT_MD_GC_SPACE_POINTER, 1, ""); _to_space = (_memory_space*)chpl_mem_allocMany(1, sizeof(_memory_space), CHPL_RT_MD_GC_SPACE_POINTER, 1, ""); // fill in the pointers _from_space->head = heap1; _from_space->tail = heap1+heapsize; _from_space->current = heap1; _to_space->head = heap2; _to_space->tail = heap2 + heapsize; _to_space->current = heap2; }
static void recordExecutionCommand(int argc, char *argv[]) { int i, length = 0; for (i = 0; i < argc; i++) { length += strlen(argv[i]) + 1; } chpl_executionCommand = (char*)chpl_mem_allocMany(length+1, sizeof(char), CHPL_RT_MD_EXECUTION_COMMAND, 0, 0); sprintf(chpl_executionCommand, "%s", argv[0]); for (i = 1; i < argc; i++) { strcat(chpl_executionCommand, " "); strcat(chpl_executionCommand, argv[i]); } }
static void add_env_options(int* argc, char** argv[]) { int envc; int new_argc; char** new_argv; int i; if (environ == NULL) return; // // Count the number of environment entries. // for (i = 0; environ[i] != NULL; i++) ; envc = i; // // Create a new argv with space for -E options for the env vars. // new_argc = *argc + 2 * envc; new_argv = (char**) chpl_mem_allocMany(new_argc, sizeof((*argv)[0]), CHPL_RT_MD_COMMAND_BUFFER, -1, ""); // // Duplicate the old argv into the start of the new one. // memcpy(new_argv, (*argv), *argc * sizeof((*argv)[0])); // // Add a -E option for each environment variable. // for (i = 0; i < envc; i++) { // except don't add -E for variables containing a ` // this is a workaround for poor quoting // in amudprun (see amudp_spawn.cpp AMUDP_SPMDSshSpawn // which just passes all the arguments to 'system') if( ! strchr(environ[i], '`' ) ) { new_argv[*argc + 2 * i + 0] = (char*) "-E"; new_argv[*argc + 2 * i + 1] = environ[i]; } } // // Return the new argv. // *argc = new_argc; *argv = new_argv; }
void chpl_comm_fork_nb(c_nodeid_t node, c_sublocid_t subloc, chpl_fn_int_t fid, void *arg, int32_t arg_size) { fork_t *info; int info_size; assert(node==0); info_size = sizeof(fork_t) + arg_size; info = (fork_t*)chpl_mem_allocMany(info_size, sizeof(char), CHPL_RT_MD_COMM_FORK_SEND_NB_INFO, 0, 0); info->fid = fid; info->arg_size = arg_size; if (arg_size) memcpy(&(info->arg), arg, arg_size); chpl_task_startMovedTask((chpl_fn_p)fork_nb_wrapper, (void*)info, subloc, chpl_nullTaskID, false); }
int main(int argc, char* argv[]) { // // This is a user invocation, so parse the arguments to determine // the number of locales. // int32_t execNumLocales; // Set up main argument parsing. chpl_gen_main_arg.argv = chpl_mem_allocMany(argc, sizeof(char*), CHPL_RT_MD_COMMAND_BUFFER, -1, 0); chpl_gen_main_arg.argv[0] = argv[0]; chpl_gen_main_arg.argc = 1; chpl_gen_main_arg.return_value = 0; CreateConfigVarTable(); parseArgs(true, parse_normally, &argc, argv); execNumLocales = getArgNumLocales(); // // If the user did not specify a number of locales let the // comm layer decide how many to use (or flag an error) // if (execNumLocales == 0) { execNumLocales = chpl_comm_default_num_locales(); } // // Before proceeding, allow the comm layer to verify that the // number of locales is reasonable // chpl_comm_verify_num_locales(execNumLocales); // // Let the comm layer do any last-minute pre-launch activities it // needs to. // CHPL_COMM_PRELAUNCH(); // // Launch the program // This may not return (e.g., if calling chpl_launch_using_exec()) // return chpl_launch(argc, argv, execNumLocales); }
int handlePossibleConfigVar(int* argc, char* argv[], int argnum, int32_t lineno, chpl_string filename) { int retval = 0; int arglen = strlen(argv[argnum]+2)+1; char* argCopy = chpl_mem_allocMany(arglen, sizeof(char), CHPL_RT_MD_CONFIG_ARG_COPY_DATA, argnum, "<command-line>"); char* equalsSign; const char* moduleName; char* varName; configVarType* configVar; strcpy(argCopy, argv[argnum]+2); configVar = breakIntoPiecesAndLookup(argCopy, &equalsSign, &moduleName, &varName, lineno, filename); if (configVar == NULL) { if (argv[argnum][1] == '-') { // this is a -- argument retval = handleNonstandardArg(argc, argv, argnum, lineno, filename); } else { // this is a -s argument handleUnexpectedConfigVar(moduleName, varName, lineno, filename); } } else { char* value = equalsSign + 1; checkDeprecatedConfig(varName, equalsSign ? value : equalsSign); if (equalsSign && *value) { initSetValue(varName, value, moduleName, lineno, filename); } else if (!strcmp(configVar->defaultValue, "bool")) { initSetValue(varName, "true", moduleName, lineno, filename); } else { if (argnum + 1 >= *argc) { char* message = chpl_glom_strings(3, "Configuration variable '", varName, "' is missing its initialization value"); chpl_error(message, lineno, filename); } else { initSetValue(varName, argv[argnum+1], moduleName, lineno, filename); retval = 1; } } } chpl_mem_free(argCopy, argnum, "<command-line>"); return retval; }
static void add_env_options(int* argc, char** argv[]) { int envc; int new_argc; char** new_argv; int i; if (environ == NULL) return; // // Count the number of environment entries. // for (i = 0; environ[i] != NULL; i++) ; envc = i; // // Create a new argv with space for -E options for the env vars. // new_argc = *argc + 2 * envc; new_argv = (char **)chpl_mem_allocMany(new_argc, sizeof((*argv)[0]), CHPL_RT_MD_COMMAND_BUFFER, -1, 0); // // Duplicate the old argv into the start of the new one. // memcpy(new_argv, (*argv), *argc * sizeof((*argv)[0])); // // Add a -E option for each environment variable. // for (i = 0; i < envc; i++) { new_argv[*argc + 2 * i + 0] = (char*) "-E"; new_argv[*argc + 2 * i + 1] = environ[i]; } // // Return the new argv. // *argc = new_argc; *argv = new_argv; }
void installConfigVar(const char* varName, const char* value, const char* moduleName) { unsigned hashValue; configVarType* configVar = (configVarType*) chpl_mem_allocMany(1, sizeof(configVarType), CHPL_RT_MD_CF_TABLE_DATA, 0, 0); hashValue = hash(varName); configVar->nextInBucket = configVarTable[hashValue]; configVar->nextInstalled = NULL; configVarTable[hashValue] = configVar; if (firstInTable == NULL) { firstInTable = configVar; } else { lastInTable->nextInstalled = configVar; } lastInTable = configVar; configVar->varName = chpl_glom_strings(1, varName); configVar->moduleName = chpl_glom_strings(1, moduleName); configVar->defaultValue = chpl_glom_strings(1, value); configVar->setValue = NULL; }
char* chpl_get_enviro_keys(char sep) { int pass; int i; int j; int k = 0; char* ret = NULL; for( pass = 0; pass < 2; pass++ ) { k = 0; for( i = 0; environ && environ[i]; i++ ) { // We could do this for only some environment // variables if we wanted to; that would amount // to an if statement checking environ[i]; // but we find it to be more similar to MPI/SLURM // to forward all environment variables. // Count/store the separator if( k > 0 ) { if( pass == 0 ) k++; else ret[k++] = sep; } for( j = 0; environ[i][j] && environ[i][j] != '='; j++ ) { if( pass == 0 ) { // on first pass, just count. k++; } else { // on second pass, add to buffer. ret[k++] = environ[i][j]; } } } if( pass == 0 ) ret = chpl_mem_allocMany(k+1, sizeof(char), CHPL_RT_MD_COMMAND_BUFFER,-1,0); } return ret; }
qioerr chpl_fs_is_mount(int* ret, const char* name) { qioerr err = 0; struct stat nBuf, parentBuf; int exitStatus = 0; size_t nameLen = strlen(name); char* parent = (char* ) chpl_mem_allocMany(nameLen + 4, sizeof(char), CHPL_RT_MD_OS_LAYER_TMP_DATA, 0, 0); char* safeNameCopy = (char* ) chpl_mem_allocMany(nameLen + 1, sizeof(char), CHPL_RT_MD_OS_LAYER_TMP_DATA, 0, 0); strncpy(safeNameCopy, name, nameLen + 1); // Need to copy name so that we can use it in the case of links err = chpl_fs_is_link(&exitStatus, name); if (err) { // The stat call in is_link returned an error, which we would encounter too, // so return immediately. chpl_mem_free(parent, 0, 0); chpl_mem_free(safeNameCopy, 0, 0); return err; } else if (exitStatus) { // We are dealing with a link. Using /.. will refer to the parent of the // linked location, rather than the parent of the link itself. We need to // perform some string token action. // Lydia note (03/17/2015): when the Path library is more fleshed out, this // operation could be done in module code and this function would instead // take the name of the parent and child instead of creating the parent name // itself. char* curTok = strtok(safeNameCopy, "/"); char* nextTok = strtok(NULL, "/"); // We need the next token to determine if the path is longer than a single // link name. assert(curTok != NULL); // curTok should never be null. The only string which would return null is // "/", but that directory is not a link, so won't be here in the first // place. if (nextTok != NULL) { // name includes a path longer than just the current symlink. // Thus, we should copy up to (but not including) the basename of the // path. strncpy(parent, curTok, strlen(curTok) + 1); curTok = nextTok; nextTok = strtok(NULL, "/"); while (nextTok != NULL) { // While we haven't found the end of the path (in nextTok) strncat(parent, "/", 1); // Restore the lost path separator. strncat(parent, curTok, strlen(curTok)); // Add the current token to the parent list curTok = nextTok; // And prepare to check if the next token is the last in the path nextTok = strtok(NULL, "/"); } } else { // name was merely the current symlink rather than a longer path. // That means its parent is "." or the current directory. strncpy(parent, ".", 2); } } else { // We are not referring to a link, so concatenating "/.." is fine. strncpy(parent, name, nameLen + 1); strncat(parent, "/..", 3); // TODO: Using "/" is not necessarily portable, look into this } exitStatus = lstat(name, &nBuf); if (exitStatus) { err = qio_mkerror_errno(); chpl_mem_free(parent, 0, 0); chpl_mem_free(safeNameCopy, 0, 0); return err; } exitStatus = lstat(parent, &parentBuf); if (exitStatus) { err = qio_mkerror_errno(); } else { if (nBuf.st_dev != parentBuf.st_dev) { *ret = 1; // Check if the st_dev matches that of its parent directory. // If they don't match, it is a mount point. } else { err = chpl_fs_samefile_string(ret, name, parent); // If the parent directory is the same as the current directory, we've // reached the root. If they don't, we know it isn't a mount point // because we already know their st_dev matches. } } chpl_mem_free(parent, 0, 0); chpl_mem_free(safeNameCopy, 0, 0); return err; }
static char* chpl_launch_create_command(int argc, char* argv[], int32_t numLocales) { int i; int size; char baseCommand[256]; char* command; FILE* slurmFile, *expectFile; char* projectString = getenv(launcherAccountEnvvar); char* constraint = getenv("CHPL_LAUNCHER_CONSTRAINT"); char* walltime = getenv("CHPL_LAUNCHER_WALLTIME"); char* outputfn = getenv("CHPL_LAUNCHER_SLURM_OUTPUT_FILENAME"); char* basenamePtr = strrchr(argv[0], '/'); pid_t mypid; if (basenamePtr == NULL) { basenamePtr = argv[0]; } else { basenamePtr++; } chpl_compute_real_binary_name(argv[0]); #ifndef DEBUG_LAUNCH mypid = getpid(); #else mypid = 0; #endif sprintf(sysFilename, "%s%d", baseSysFilename, (int)mypid); sprintf(expectFilename, "%s%d", baseExpectFilename, (int)mypid); sprintf(slurmFilename, "%s%d", baseSBATCHFilename, (int)mypid); if (getenv("CHPL_LAUNCHER_USE_SBATCH") != NULL) { slurmFile = fopen(slurmFilename, "w"); fprintf(slurmFile, "#!/bin/sh\n\n"); fprintf(slurmFile, "#SBATCH -J Chpl-%.10s\n", basenamePtr); genNumLocalesOptions(slurmFile, determineQsubVersion(), numLocales, getNumCoresPerLocale()); if (projectString && strlen(projectString) > 0) fprintf(slurmFile, "#SBATCH -A %s\n", projectString); if (getenv("CHPL_LAUNCHER_USE_SBATCH") != NULL) { // fprintf(slurmFile, "#SBATCH -joe\n"); if (outputfn!=NULL) fprintf(slurmFile, "#SBATCH -o %s.%%j.out\n", outputfn); else fprintf(slurmFile, "#SBATCH -o %s.%%j.out\n", argv[0]); // fprintf(slurmFile, "cd $SBATCH_O_WORKDIR\n"); fprintf(slurmFile, "%s/gasnetrun_ibv -n %d %s ", WRAP_TO_STR(LAUNCH_PATH), numLocales, chpl_get_real_binary_name()); for (i=1; i<argc; i++) { fprintf(slurmFile, " '%s'", argv[i]); } fprintf(slurmFile, "\n"); } fclose(slurmFile); chmod( slurmFilename, 0755); } if (getenv("CHPL_LAUNCHER_USE_SBATCH") == NULL) { expectFile = fopen(expectFilename, "w"); if (verbosity < 2) { // fprintf(expectFile, "log_user 0\n"); } fprintf(expectFile, "set timeout -1\n"); // fprintf(expectFile, "chmod +x %s\n",slurmFilename); fprintf(expectFile, "set prompt \"(%%|#|\\\\$|>) $\"\n"); // fprintf(expectFile, "spawn sbatch "); fprintf(expectFile, "spawn -noecho salloc "); fprintf(expectFile, "-J %.10s ",basenamePtr); // pass fprintf(expectFile, "-N %d ",numLocales); fprintf(expectFile, "--ntasks-per-node=1 ",numLocales); fprintf(expectFile, "--exclusive "); // give exclusive access to the nodes fprintf(expectFile, "--time=%s ",walltime); if (constraint) { fprintf(expectFile, " -C %s", constraint); } // fprintf(expectFile, "-I %s ", slurmFilename); fprintf(expectFile, " %s/gasnetrun_ibv -n %d %s ", WRAP_TO_STR(LAUNCH_PATH), numLocales, chpl_get_real_binary_name()); for (i=1; i<argc; i++) { fprintf(expectFile, " %s", argv[i]); } // fprintf(expectFile, "\\n\"\n"); fprintf(expectFile, "\n\n"); // fprintf(expectFile, "expect -re $prompt\n"); // fprintf(expectFile, "send \"cd \\$SBATCH_O_WORKDIR\\n\"\n"); // fprintf(expectFile, "expect -re $prompt\n"); // fprintf(expectFile, "sleep 10\n"); // fprintf(expectFile, "interact -o -re $prompt {return}\n"); // fprintf(expectFile, "send_user \"\\n\"\n"); // fprintf(expectFile, "send \"exit\\n\"\n"); fprintf(expectFile, "interact -o -re $prompt {return}\n"); fclose(expectFile); sprintf(baseCommand, "expect %s", expectFilename); } else { // sprintf(baseCommand, "sbatch %s\n", slurmFilename); sprintf(baseCommand, "sbatch %s\n", slurmFilename); } size = strlen(baseCommand) + 1; command = chpl_mem_allocMany(size, sizeof(char), CHPL_RT_MD_COMMAND_BUFFER, -1, ""); sprintf(command, "%s", baseCommand); if (strlen(command)+1 > size) { chpl_internal_error("buffer overflow"); } return command; }
static char* chpl_launch_create_command(int argc, char* argv[], int32_t numLocales) { int i; int size; char baseCommand[2*FILENAME_MAX]; char* command; FILE* slurmFile, *expectFile; char* projectString = getenv(launcherAccountEnvvar); char* constraint = getenv("CHPL_LAUNCHER_CONSTRAINT"); char* outputfn = getenv("CHPL_LAUNCHER_SLURM_OUTPUT_FILENAME"); char* basenamePtr = strrchr(argv[0], '/'); char* nodeAccessEnv = NULL; pid_t mypid; if (basenamePtr == NULL) { basenamePtr = argv[0]; } else { basenamePtr++; } chpl_compute_real_binary_name(argv[0]); // command line walltime takes precedence over env var if (!walltime) { walltime = getenv("CHPL_LAUNCHER_WALLTIME"); } // command line partition takes precedence over env var if (!partition) { partition = getenv("CHPL_LAUNCHER_PARTITION"); } // command line exclude list takes precedence over env var if (!exclude) { exclude = getenv("CHPL_LAUNCHER_EXCLUDE"); } // request exclusive node access by default, but allow user to override nodeAccessEnv = getenv("CHPL_LAUNCHER_NODE_ACCESS"); if (nodeAccessEnv == NULL || strcmp(nodeAccessEnv, "exclusive") == 0) { nodeAccessStr = "exclusive"; } else if (strcmp(nodeAccessEnv, "shared") == 0 || strcmp(nodeAccessEnv, "share") == 0 || strcmp(nodeAccessEnv, "oversubscribed") == 0 || strcmp(nodeAccessEnv, "oversubscribe") == 0) { nodeAccessStr = "share"; } else if (strcmp(nodeAccessEnv, "unset") == 0) { nodeAccessStr = NULL; } else { chpl_warning("unsupported 'CHPL_LAUNCHER_NODE_ACCESS' option", 0, 0); nodeAccessStr = "exclusive"; } if (debug) { mypid = 0; } else { mypid = getpid(); } sprintf(expectFilename, "%s%d", baseExpectFilename, (int)mypid); sprintf(slurmFilename, "%s%d", baseSBATCHFilename, (int)mypid); if (getenv("CHPL_LAUNCHER_USE_SBATCH") != NULL) { slurmFile = fopen(slurmFilename, "w"); fprintf(slurmFile, "#!/bin/sh\n\n"); fprintf(slurmFile, "#SBATCH -J Chpl-%.10s\n", basenamePtr); genNumLocalesOptions(slurmFile, determineSlurmVersion(), numLocales, getNumCoresPerLocale()); if (projectString && strlen(projectString) > 0) fprintf(slurmFile, "#SBATCH -A %s\n", projectString); if (getenv("CHPL_LAUNCHER_USE_SBATCH") != NULL) { // fprintf(slurmFile, "#SBATCH -joe\n"); if (outputfn!=NULL) fprintf(slurmFile, "#SBATCH -o %s\n", outputfn); else fprintf(slurmFile, "#SBATCH -o %s.%%j.out\n", argv[0]); // fprintf(slurmFile, "cd $SBATCH_O_WORKDIR\n"); fprintf(slurmFile, "%s/%s/gasnetrun_ibv -n %d -N %d", CHPL_THIRD_PARTY, WRAP_TO_STR(LAUNCH_PATH), numLocales, numLocales); propagate_environment(slurmFile); fprintf(slurmFile, " %s ", chpl_get_real_binary_name()); for (i=1; i<argc; i++) { fprintf(slurmFile, " '%s'", argv[i]); } fprintf(slurmFile, "\n"); } fclose(slurmFile); chmod( slurmFilename, 0755); } if (getenv("CHPL_LAUNCHER_USE_SBATCH") == NULL) { expectFile = fopen(expectFilename, "w"); if (verbosity < 2) { // fprintf(expectFile, "log_user 0\n"); } fprintf(expectFile, "set timeout -1\n"); // fprintf(expectFile, "chmod +x %s\n",slurmFilename); fprintf(expectFile, "set prompt \"(%%|#|\\\\$|>) $\"\n"); // fprintf(expectFile, "spawn sbatch "); fprintf(expectFile, "spawn -noecho salloc --quiet "); fprintf(expectFile, "-J %.10s ",basenamePtr); // pass fprintf(expectFile, "-N %d ",numLocales); fprintf(expectFile, "--ntasks-per-node=1 "); if (nodeAccessStr != NULL) fprintf(expectFile, "--%s ", nodeAccessStr); if (walltime) fprintf(expectFile, "--time=%s ",walltime); if(partition) fprintf(expectFile, "--partition=%s ",partition); if(exclude) fprintf(expectFile, "--exclude=%s ",exclude); if (constraint) { fprintf(expectFile, " -C %s", constraint); } // fprintf(expectFile, "-I %s ", slurmFilename); fprintf(expectFile, " %s/%s/gasnetrun_ibv -n %d -N %d", CHPL_THIRD_PARTY, WRAP_TO_STR(LAUNCH_PATH), numLocales, numLocales); propagate_environment(expectFile); fprintf(expectFile, " %s ", chpl_get_real_binary_name()); for (i=1; i<argc; i++) { fprintf(expectFile, " %s", argv[i]); } // fprintf(expectFile, "\\n\"\n"); fprintf(expectFile, "\n\n"); // fprintf(expectFile, "expect -re $prompt\n"); // fprintf(expectFile, "send \"cd \\$SBATCH_O_WORKDIR\\n\"\n"); // fprintf(expectFile, "expect -re $prompt\n"); // fprintf(expectFile, "sleep 10\n"); // fprintf(expectFile, "interact -o -re $prompt {return}\n"); // fprintf(expectFile, "send_user \"\\n\"\n"); // fprintf(expectFile, "send \"exit\\n\"\n"); fprintf(expectFile, "interact -o -re $prompt {return}\n"); fclose(expectFile); sprintf(baseCommand, "expect %s", expectFilename); } else { // sprintf(baseCommand, "sbatch %s\n", slurmFilename); sprintf(baseCommand, "sbatch %s\n", slurmFilename); } size = strlen(baseCommand) + 1; command = chpl_mem_allocMany(size, sizeof(char), CHPL_RT_MD_COMMAND_BUFFER, -1, 0); sprintf(command, "%s", baseCommand); if (strlen(command)+1 > size) { chpl_internal_error("buffer overflow"); } return command; }
// // If generate_qsub_script, return the filename of the qsub script that // was written with the qsub options // else return the qsub options for the command line as a string // static char* genQsubOptions(char* genFilename, char* projectString, qsubVersion qsub, int32_t numLocales, int32_t numCoresPerLocale) { const size_t maxOptLength = 256; char* optionString = NULL; int length = 0; FILE *qsubScript = NULL; char *qsubFilename = expectFilename; if (!queue) { queue = getenv("CHPL_LAUNCHER_QUEUE"); } if (!walltime) { walltime = getenv("CHPL_LAUNCHER_WALLTIME"); } if (generate_qsub_script) { pid_t mypid = debug ? 0 : getpid(); sprintf(qsubFilename, "qsub.%s-%d", genFilename, (int) mypid); qsubScript = fopen(qsubFilename, "w"); fprintf(qsubScript, "#PBS -j oe\n"); fprintf(qsubScript, "#PBS -zV\n"); fprintf(qsubScript, "#PBS -N Chpl-%.10s\n", genFilename); } else { optionString = chpl_mem_allocMany(maxOptLength, sizeof(char), CHPL_RT_MD_COMMAND_BUFFER, -1, ""); length += snprintf(optionString + length, maxOptLength - length, "-z -V -I -N Chpl-%.10s", genFilename); } if (projectString && strlen(projectString) != 0) { if (generate_qsub_script) { fprintf(qsubScript, "#PBS -A %s\n", projectString); } else { length += snprintf(optionString + length, maxOptLength - length, " -A %s", projectString); } } if (queue) { if (generate_qsub_script) { fprintf(qsubScript, "#PBS -q %s\n", queue); } else { length += snprintf(optionString + length, maxOptLength - length, " -q %s", queue); } } if (walltime) { if (generate_qsub_script) { fprintf(qsubScript, "#PBS -l walltime=%s\n", walltime); } else { length += snprintf(optionString + length, maxOptLength - length, " -l walltime=%s", walltime); } } switch (qsub) { case pbspro: case unknown: if (generate_qsub_script) { fprintf(qsubScript, "#PBS -l mppwidth=%d\n", numLocales); fprintf(qsubScript, "#PBS -l mppnppn=%d\n", procsPerNode); fprintf(qsubScript, "#PBS -l mppdepth=%d\n", numCoresPerLocale); } else { length += snprintf(optionString + length, maxOptLength - length, " -l mppwidth=%d -l mppnppn=%d -l mppdepth=%d", numLocales, procsPerNode, numCoresPerLocale); } break; case moab: if (generate_qsub_script) { fprintf(qsubScript, "#PBS -l nodes=%d\n", numLocales); } else { length += snprintf(optionString + length, maxOptLength - length, " -l nodes=%d", numLocales); } break; case nccs: if (generate_qsub_script) { fprintf(qsubScript, "#PBS -l nodes=%d\n", numLocales); } else { if (!queue && !walltime) chpl_error("An execution time must be specified for the NCCS launcher if no queue is\n" "specified -- use the CHPL_LAUNCHER_WALLTIME and/or CHPL_LAUNCHER_QUEUE\n" "environment variables", 0, 0); length += snprintf(optionString + length, maxOptLength - length, " -l nodes=%d\n", numLocales); } break; } if (generate_qsub_script) { fclose(qsubScript); optionString = qsubFilename; } return optionString; }
static void libfabric_init() { int i; struct fi_info *info = NULL; struct fi_info *hints = fi_allocinfo(); struct fi_av_attr av_attr = {0}; struct fi_cq_attr cq_attr = {0}; int max_tx_ctx, max_rx_ctx; int comm_concurrency; int rx_ctx_cnt; int rx_ctx_bits = 0; hints->mode = ~0; hints->caps = FI_RMA | FI_ATOMIC | FI_SOURCE /* do we want this? */ | FI_READ | FI_WRITE | FI_REMOTE_READ | FI_REMOTE_WRITE | FI_MULTI_RECV | FI_FENCE; hints->addr_format = FI_FORMAT_UNSPEC; #if defined(CHPL_COMM_SUBSTRATE_SOCKETS) // // fi_freeinfo(hints) will free() hints->fabric_attr->prov_name; this // is documented, though poorly. So, get that space from malloc(). // { const char s[] = "sockets"; char* sDup = sys_malloc(sizeof(s)); strcpy(sDup, s); hints->fabric_attr->prov_name = sDup; } #elif defined(CHPL_COMM_SUBSTRATE_GNI) #error "Substrate GNI not supported" #else #error "Substrate type not supported" #endif /* connectionless reliable */ hints->ep_attr->type = FI_EP_RDM; hints->domain_attr->threading = FI_THREAD_UNSPEC; hints->domain_attr->control_progress = FI_PROGRESS_MANUAL; hints->domain_attr->data_progress = FI_PROGRESS_MANUAL; hints->domain_attr->av_type = FI_AV_TABLE; hints->domain_attr->mr_mode = FI_MR_SCALABLE; hints->domain_attr->resource_mgmt = FI_RM_ENABLED; // hints->domain_attr->cq_data_size hints->tx_attr->op_flags = FI_COMPLETION; hints->rx_attr->op_flags = FI_COMPLETION; OFICHKERR(fi_getinfo(FI_VERSION(1,0), NULL, NULL, 0, hints, &info)); if (info == NULL) { chpl_internal_error("No fabrics detected."); } else { #ifdef PRINT_FI_GETINFO struct fi_info *cur; for (cur = info; cur; cur = cur->next) { printf("---\n"); printf("%s", fi_tostr(cur, FI_TYPE_INFO)); } printf("\n"); #endif } ofi.num_am_ctx = 1; // Would we ever want more? max_tx_ctx = info->domain_attr->max_ep_tx_ctx; max_rx_ctx = info->domain_attr->max_ep_rx_ctx; comm_concurrency = get_comm_concurrency(); ofi.num_tx_ctx = comm_concurrency+ofi.num_am_ctx > max_tx_ctx ? max_tx_ctx-ofi.num_am_ctx : comm_concurrency; ofi.num_rx_ctx = comm_concurrency+ofi.num_am_ctx > max_rx_ctx ? max_rx_ctx-ofi.num_am_ctx : comm_concurrency; info->ep_attr->tx_ctx_cnt = ofi.num_tx_ctx + ofi.num_am_ctx; info->ep_attr->rx_ctx_cnt = ofi.num_rx_ctx + ofi.num_am_ctx; OFICHKERR(fi_fabric(info->fabric_attr, &ofi.fabric, NULL)); OFICHKERR(fi_domain(ofi.fabric, info, &ofi.domain, NULL)); rx_ctx_cnt = ofi.num_rx_ctx + ofi.num_am_ctx; while (rx_ctx_cnt >> ++rx_ctx_bits); av_attr.rx_ctx_bits = rx_ctx_bits; av_attr.type = FI_AV_TABLE; av_attr.count = chpl_numNodes; OFICHKERR(fi_av_open(ofi.domain, &av_attr, &ofi.av, NULL)); OFICHKERR(fi_scalable_ep(ofi.domain, info, &ofi.ep, NULL)); OFICHKERR(fi_scalable_ep_bind(ofi.ep, &ofi.av->fid, 0)); /* set up tx and rx contexts */ cq_attr.format = FI_CQ_FORMAT_CONTEXT; cq_attr.size = 1024; /* ??? */ cq_attr.wait_obj = FI_WAIT_UNSPEC; ofi.tx_ep = (struct fid_ep **) chpl_mem_allocMany(ofi.num_tx_ctx, sizeof(ofi.tx_ep[0]), CHPL_RT_MD_COMM_PER_LOC_INFO, 0, 0); ofi.tx_cq = (struct fid_cq **) chpl_mem_allocMany(ofi.num_tx_ctx, sizeof(ofi.tx_cq[0]), CHPL_RT_MD_COMM_PER_LOC_INFO, 0, 0); for (i = 0; i < ofi.num_tx_ctx; i++) { OFICHKERR(fi_tx_context(ofi.ep, i, NULL, &ofi.tx_ep[i], NULL)); OFICHKERR(fi_cq_open(ofi.domain, &cq_attr, &ofi.tx_cq[i], NULL)); OFICHKERR(fi_ep_bind(ofi.tx_ep[i], &ofi.tx_cq[i]->fid, FI_TRANSMIT)); OFICHKERR(fi_enable(ofi.tx_ep[i])); } ofi.rx_ep = (struct fid_ep **) chpl_mem_allocMany(ofi.num_rx_ctx, sizeof(ofi.rx_ep[0]), CHPL_RT_MD_COMM_PER_LOC_INFO, 0, 0); ofi.rx_cq = (struct fid_cq **) chpl_mem_allocMany(ofi.num_rx_ctx, sizeof(ofi.rx_cq[0]), CHPL_RT_MD_COMM_PER_LOC_INFO, 0, 0); for (i = 0; i < ofi.num_rx_ctx; i++) { OFICHKERR(fi_rx_context(ofi.ep, i, NULL, &ofi.rx_ep[i], NULL)); OFICHKERR(fi_cq_open(ofi.domain, &cq_attr, &ofi.rx_cq[i], NULL)); OFICHKERR(fi_ep_bind(ofi.rx_ep[i], &ofi.rx_cq[i]->fid, FI_RECV)); OFICHKERR(fi_enable(ofi.rx_ep[i])); } ofi.am_tx_ep = (struct fid_ep **) chpl_mem_allocMany(ofi.num_am_ctx, sizeof(ofi.am_tx_ep[0]), CHPL_RT_MD_COMM_PER_LOC_INFO, 0, 0); ofi.am_tx_cq = (struct fid_cq **) chpl_mem_allocMany(ofi.num_am_ctx, sizeof(ofi.am_tx_cq[0]), CHPL_RT_MD_COMM_PER_LOC_INFO, 0, 0); /* set up AM contexts */ for (i = 0; i < ofi.num_am_ctx; i++) { OFICHKERR(fi_tx_context(ofi.ep, i+ofi.num_tx_ctx, NULL, &ofi.am_tx_ep[i], NULL)); OFICHKERR(fi_cq_open(ofi.domain, &cq_attr, &ofi.am_tx_cq[i], NULL)); OFICHKERR(fi_ep_bind(ofi.am_tx_ep[i], &ofi.am_tx_cq[i]->fid, FI_TRANSMIT)); OFICHKERR(fi_enable(ofi.am_tx_ep[i])); } ofi.am_rx_ep = (struct fid_ep **) chpl_mem_allocMany(ofi.num_am_ctx, sizeof(ofi.am_rx_ep[0]), CHPL_RT_MD_COMM_PER_LOC_INFO, 0, 0); ofi.am_rx_cq = (struct fid_cq **) chpl_mem_allocMany(ofi.num_am_ctx, sizeof(ofi.am_rx_cq[0]), CHPL_RT_MD_COMM_PER_LOC_INFO, 0, 0); for (i = 0; i < ofi.num_am_ctx; i++) { OFICHKERR(fi_rx_context(ofi.ep, i+ofi.num_rx_ctx, NULL, &ofi.am_rx_ep[i], NULL)); OFICHKERR(fi_cq_open(ofi.domain, &cq_attr, &ofi.am_rx_cq[i], NULL)); OFICHKERR(fi_ep_bind(ofi.am_rx_ep[i], &ofi.am_rx_cq[i]->fid, FI_RECV)); OFICHKERR(fi_enable(ofi.am_rx_ep[i])); } OFICHKERR(fi_enable(ofi.ep)); libfabric_init_addrvec(rx_ctx_cnt, rx_ctx_bits); OFICHKERR(fi_mr_reg(ofi.domain, 0, SIZE_MAX, FI_READ | FI_WRITE | FI_REMOTE_READ | FI_REMOTE_WRITE | FI_SEND | FI_RECV, 0, (uint64_t) chpl_nodeID, 0, &ofi.mr, NULL)); fi_freeinfo(info); /* No error returned */ fi_freeinfo(hints); /* No error returned */ chpl_msg(2, "%d: completed libfabric initialization\n", chpl_nodeID); }
/* * Set up the progress thread */ static void progress_thread(void *args) { struct progress_thread_info* pti = args; const int id = pti->id; const int num_rbufs = 2; struct iovec iov[num_rbufs]; struct fi_msg msg[num_rbufs]; struct ofi_am_info* dst_buf[num_rbufs]; const int rbuf_len = 10; const size_t rbuf_size = rbuf_len*sizeof(dst_buf[0][0]); const int num_cqes = rbuf_len; struct fi_cq_data_entry cqes[num_cqes]; int num_read; int i; for (i = 0; i < num_rbufs; i++) { dst_buf[i] = chpl_mem_allocMany(rbuf_len, sizeof(dst_buf[i][0]), CHPL_RT_MD_COMM_PER_LOC_INFO, 0, 0); iov[i].iov_base = dst_buf[i]; iov[i].iov_len = rbuf_size; msg[i].msg_iov = &iov[i]; msg[i].desc = (void **) fi_mr_desc(ofi.mr); msg[i].iov_count = 1; msg[i].addr = FI_ADDR_UNSPEC; msg[i].context = (void *) (uint64_t) i; msg[i].data = 0x0; OFICHKERR(fi_recvmsg(ofi.am_rx_ep[id], &msg[i], FI_MULTI_RECV)); } // Count this progress thread as running. The creator thread wants to // be released as soon as at least one progress thread is running, so // if we're the first, do that. if (atomic_fetch_add_uint_least32_t(&progress_thread_count, 1) == 0) { CALL_CHECK_ZERO(pthread_mutex_lock(&progress_thread_entEx_cond_mutex)); CALL_CHECK_ZERO(pthread_cond_signal(&progress_thread_enter_cond)); CALL_CHECK_ZERO(pthread_mutex_unlock(&progress_thread_entEx_cond_mutex)); } // Wait for events while (!atomic_load_bool(&progress_threads_please_exit)) { num_read = fi_cq_read(ofi.am_rx_cq[id], cqes, num_cqes); if (num_read > 0) { for (i = 0; i < num_read; i++) { chpl_comm_ofi_am_handler(&cqes[i]); // send ack } } else { if (num_read != -FI_EAGAIN) { chpl_internal_error(fi_strerror(-num_read)); } } } // Un-count this progress thread. Whoever told us to exit wants to // be released once all the progress threads are done, so if we're // the last, do that. if (atomic_fetch_sub_uint_least32_t(&progress_thread_count, 1) == 1) { CALL_CHECK_ZERO(pthread_mutex_lock(&progress_thread_entEx_cond_mutex)); CALL_CHECK_ZERO(pthread_cond_signal(&progress_thread_exit_cond)); CALL_CHECK_ZERO(pthread_mutex_unlock(&progress_thread_entEx_cond_mutex)); } }
// create the command that will actually launch the program and // create any files needed for the launch like the batch script static char* chpl_launch_create_command(int argc, char* argv[], int32_t numLocales) { int i; int size; char baseCommand[256]; char* command; FILE* slurmFile, *expectFile; char* account = getenv("CHPL_LAUNCHER_ACCOUNT"); char* constraint = getenv("CHPL_LAUNCHER_CONSTRAINT"); char* outputfn = getenv("CHPL_LAUNCHER_SLURM_OUTPUT_FILENAME"); char* basenamePtr = strrchr(argv[0], '/'); pid_t mypid; // command line walltime takes precedence over env var if (!walltime) { walltime = getenv("CHPL_LAUNCHER_WALLTIME"); } // command line nodelist takes precedence over env var if (!nodelist) { nodelist = getenv("CHPL_LAUNCHER_NODELIST"); } if (basenamePtr == NULL) { basenamePtr = argv[0]; } else { basenamePtr++; } chpl_compute_real_binary_name(argv[0]); if (debug) { mypid = 0; } else { mypid = getpid(); } // set the filenames sprintf(expectFilename, "%s%d", baseExpectFilename, (int)mypid); sprintf(slurmFilename, "%s%d", baseSBATCHFilename, (int)mypid); // if were running a batch job if (getenv("CHPL_LAUNCHER_USE_SBATCH") != NULL || generate_sbatch_script) { // open the batch file and create the header slurmFile = fopen(slurmFilename, "w"); fprintf(slurmFile, "#!/bin/sh\n\n"); // set the job name fprintf(slurmFile, "#SBATCH --job-name=Chpl-%.10s\n", basenamePtr); // suppress informational messages, will still display errors fprintf(slurmFile, "#SBATCH --quiet\n"); // request the number of locales, with 1 task per node, and number of cores // cpus-per-task. We probably don't need --nodes and --ntasks specified // since 1 task-per-node with n --tasks implies -n nodes fprintf(slurmFile, "#SBATCH --nodes=%d\n", numLocales); fprintf(slurmFile, "#SBATCH --ntasks=%d\n", numLocales); fprintf(slurmFile, "#SBATCH --ntasks-per-node=%d\n", procsPerNode); fprintf(slurmFile, "#SBATCH --cpus-per-task=%d\n", getCoresPerLocale()); //request exclusive access to nodes fprintf(slurmFile, "#SBATCH --exclusive\n"); // Set the walltime if it was specified if (walltime) { fprintf(slurmFile, "#SBATCH --time=%s\n", walltime); } // Set the nodelist if it was specified if (nodelist) { fprintf(slurmFile, "#SBATCH --nodelist=%s\n", nodelist); } // If needed a constraint can be specified with the env var CHPL_LAUNCHER_CONSTRAINT if (constraint) { fprintf(slurmFile, "#SBATCH --constraint=%s\n", constraint); } // set the account name if one was provided if (account && strlen(account) > 0) { fprintf(slurmFile, "#SBATCH --account=%s\n", account); } // set the output name to either the user specified // or to the binaryName.<jobID>.out if none specified if (outputfn!=NULL) { fprintf(slurmFile, "#SBATCH --output=%s\n", outputfn); } else { fprintf(slurmFile, "#SBATCH --output=%s.%%j.out\n", argv[0]); } // add the srun command fprintf(slurmFile, "srun %s ", chpl_get_real_binary_name()); // add any arguments passed to the launcher to the binary for (i=1; i<argc; i++) { fprintf(slurmFile, " '%s'", argv[i]); } fprintf(slurmFile, "\n"); // close the batch file and change permissions fclose(slurmFile); chmod(slurmFilename, 0755); if (generate_sbatch_script) { fprintf(stdout, "SBATCH script written to '%s'\n", slurmFilename); } // the baseCommand is what will call the batch file // that was just created sprintf(baseCommand, "sbatch %s\n", slurmFilename); } // else we're running an interactive job else { // expect is used to launch an interactive job // create the file and set some things for expect expectFile = fopen(expectFilename, "w"); fprintf(expectFile, "set timeout -1\n"); fprintf(expectFile, "set prompt \"(%%|#|\\\\$|>) $\"\n"); // create a silent salloc command fprintf(expectFile, "spawn -noecho srun "); // set the job name fprintf(expectFile, "--job-name=CHPL-%.10s ",basenamePtr); // suppress informational messages, will still display errors fprintf(expectFile, "--quiet "); // request the number of locales, with 1 task per node, and number of cores // cpus-per-task. We probably don't need --nodes and --ntasks specified // since 1 task-per-node with n --tasks implies -n nodes fprintf(expectFile, "--nodes=%d ",numLocales); fprintf(expectFile, "--ntasks=%d ", numLocales); fprintf(expectFile, "--ntasks-per-node=%d ", procsPerNode); fprintf(expectFile, "--cpus-per-task=%d ", getCoresPerLocale()); // request exclusive access fprintf(expectFile, "--exclusive "); // Set the walltime if i was specified if (walltime) { fprintf(expectFile, "--time=%s ", walltime); } // Set the walltime if it was specified if (nodelist) { fprintf(expectFile, "--nodelist=%s ", nodelist); } // set any constraints if (constraint) { fprintf(expectFile, " --constraint=%s ", constraint); } // set the account name if one was provided if (account && strlen(account) > 0) { fprintf(expectFile, "--account=%s ", account); } // the actual srun command fprintf(expectFile, "%s", chpl_get_real_binary_name()); // add any arguments passed to the launcher to the binary for (i=1; i<argc; i++) { fprintf(expectFile, " %s", argv[i]); } fprintf(expectFile, "\n\n"); // do some things required for expect and close the file fprintf(expectFile, "interact -o -re $prompt {return}\n"); fclose(expectFile); // the baseCommand is what will call the expect file sprintf(baseCommand, "expect %s", expectFilename); } // copy baseCommand into command and return it size = strlen(baseCommand) + 1; command = chpl_mem_allocMany(size, sizeof(char), CHPL_RT_MD_COMMAND_BUFFER, -1, ""); sprintf(command, "%s", baseCommand); if (strlen(command)+1 > size) { chpl_internal_error("buffer overflow"); } return command; }
// create the command that will actually launch the program and // create any files needed for the launch like the batch script static char* chpl_launch_create_command(int argc, char* argv[], int32_t numLocales) { int i; int size; char baseCommand[MAX_COM_LEN]; char* command; FILE* slurmFile; char* account = getenv("CHPL_LAUNCHER_ACCOUNT"); char* constraint = getenv("CHPL_LAUNCHER_CONSTRAINT"); char* outputfn = getenv("CHPL_LAUNCHER_SLURM_OUTPUT_FILENAME"); char* basenamePtr = strrchr(argv[0], '/'); pid_t mypid; // For programs with large amounts of output, a lot of time can be // spent syncing the stdout buffer to the output file. This can cause // tests to run extremely slow and can cause stdout and stderr to // become mixed in odd ways since stdout is buffered but stderr isn't. // To alleviate this problem (and to allow accurate external timings // of tests) this allows the output to be "buffered" to <tmpDir> and // copied once the job is done. // // Note that this should work even for multi-locale tests since all // the output is piped through a single node. // // The *NoFmt versions are the same as the regular version, except // that instead of using slurms output formatters, they use the // corresponding env var. e.g. you have to use '--output=%j.out to // have the output file be <jobid>.out, but when we copy the tmp file // to the real output file, the %j and other formatters aren't // available so we have to use the equivalent slurm env var // (SLURM_JOB_ID.) The env vars can't be used when specifying --output // because they haven't been initialized yet char* bufferStdout = getenv("CHPL_LAUNCHER_SLURM_BUFFER_STDOUT"); const char* tmpDir = getTmpDir(); char stdoutFile [MAX_COM_LEN]; char stdoutFileNoFmt [MAX_COM_LEN]; char tmpStdoutFile [MAX_COM_LEN]; char tmpStdoutFileNoFmt [MAX_COM_LEN]; // command line walltime takes precedence over env var if (!walltime) { walltime = getenv("CHPL_LAUNCHER_WALLTIME"); } // command line nodelist takes precedence over env var if (!nodelist) { nodelist = getenv("CHPL_LAUNCHER_NODELIST"); } // command line partition takes precedence over env var if (!partition) { partition = getenv("CHPL_LAUNCHER_PARTITION"); } // command line exclude takes precedence over env var if (!exclude) { exclude = getenv("CHPL_LAUNCHER_EXCLUDE"); } if (basenamePtr == NULL) { basenamePtr = argv[0]; } else { basenamePtr++; } chpl_compute_real_binary_name(argv[0]); if (debug) { mypid = 0; } else { mypid = getpid(); } // Elliot, 12/02/14: TODO we have a bunch of similar commands to build up the // interactive and batch versions. It would be nicer to build up the commands // and postprocess depending on interactive vs batch. As in build up "--quiet // --nodes ..." and afterwards split on ' ' and then add #SBATCH and a // newline for batch mode and leave it as is for interactive" // if were running a batch job if (getenv("CHPL_LAUNCHER_USE_SBATCH") != NULL || generate_sbatch_script) { // set the sbatch filename sprintf(slurmFilename, "%s%d", baseSBATCHFilename, (int)mypid); // open the batch file and create the header slurmFile = fopen(slurmFilename, "w"); fprintf(slurmFile, "#!/bin/sh\n\n"); // set the job name fprintf(slurmFile, "#SBATCH --job-name=Chpl-%.10s\n", basenamePtr); // suppress informational messages, will still display errors fprintf(slurmFile, "#SBATCH --quiet\n"); // request the number of locales, with 1 task per node, and number of cores // cpus-per-task. We probably don't need --nodes and --ntasks specified // since 1 task-per-node with n --tasks implies -n nodes fprintf(slurmFile, "#SBATCH --nodes=%d\n", numLocales); fprintf(slurmFile, "#SBATCH --ntasks=%d\n", numLocales); fprintf(slurmFile, "#SBATCH --ntasks-per-node=%d\n", procsPerNode); fprintf(slurmFile, "#SBATCH --cpus-per-task=%d\n", getCoresPerLocale()); //request exclusive access to nodes fprintf(slurmFile, "#SBATCH --exclusive\n"); // Set the walltime if it was specified if (walltime) { fprintf(slurmFile, "#SBATCH --time=%s\n", walltime); } // Set the nodelist if it was specified if (nodelist) { fprintf(slurmFile, "#SBATCH --nodelist=%s\n", nodelist); } // Set the partition if it was specified if (partition) { fprintf(slurmFile, "#SBATCH --partition=%s\n", partition); } // Set the exclude list if it was specified if (exclude) { fprintf(slurmFile, "#SBATCH --exclude=%s\n", exclude); } // If needed a constraint can be specified with the env var CHPL_LAUNCHER_CONSTRAINT if (constraint) { fprintf(slurmFile, "#SBATCH --constraint=%s\n", constraint); } // set the account name if one was provided if (account && strlen(account) > 0) { fprintf(slurmFile, "#SBATCH --account=%s\n", account); } // set the output file name to either the user specified // name or to the binaryName.<jobID>.out if none specified if (outputfn != NULL) { sprintf(stdoutFile, "%s", outputfn); sprintf(stdoutFileNoFmt, "%s", outputfn); } else { sprintf(stdoutFile, "%s.%s.out", argv[0], "%j"); sprintf(stdoutFileNoFmt, "%s.%s.out", argv[0], "$SLURM_JOB_ID"); } // We have slurm use the real output file to capture slurm errors/timeouts // We only redirect the program output to the tmp file fprintf(slurmFile, "#SBATCH --output=%s\n", stdoutFile); // If we're buffering the output, set the temp output file name. // It's always <tmpDir>/binaryName.<jobID>.out. if (bufferStdout != NULL) { sprintf(tmpStdoutFile, "%s/%s.%s.out", tmpDir, argv[0], "%j"); sprintf(tmpStdoutFileNoFmt, "%s/%s.%s.out", tmpDir, argv[0], "$SLURM_JOB_ID"); } // add the srun command and the (possibly wrapped) binary name. fprintf(slurmFile, "srun --kill-on-bad-exit %s %s ", chpl_get_real_binary_wrapper(), chpl_get_real_binary_name()); // add any arguments passed to the launcher to the binary for (i=1; i<argc; i++) { fprintf(slurmFile, "'%s' ", argv[i]); } // buffer program output to the tmp stdout file if (bufferStdout != NULL) { fprintf(slurmFile, "&> %s", tmpStdoutFileNoFmt); } fprintf(slurmFile, "\n"); // After the job is run, if we buffered stdout to <tmpDir>, we need // to copy the output to the actual output file. The <tmpDir> output // will only exist on one node, ignore failures on the other nodes if (bufferStdout != NULL) { fprintf(slurmFile, "cat %s >> %s\n", tmpStdoutFileNoFmt, stdoutFileNoFmt); fprintf(slurmFile, "rm %s &> /dev/null\n", tmpStdoutFileNoFmt); } // close the batch file and change permissions fclose(slurmFile); chmod(slurmFilename, 0755); if (generate_sbatch_script) { fprintf(stdout, "SBATCH script written to '%s'\n", slurmFilename); } // the baseCommand is what will call the batch file // that was just created sprintf(baseCommand, "sbatch %s\n", slurmFilename); } // else we're running an interactive job else { char iCom[1024]; int len; len = 0; // set the job name len += sprintf(iCom+len, "--job-name=CHPL-%.10s ",basenamePtr); // suppress informational messages, will still display errors len += sprintf(iCom+len, "--quiet "); // request the number of locales, with 1 task per node, and number of cores // cpus-per-task. We probably don't need --nodes and --ntasks specified // since 1 task-per-node with n --tasks implies -n nodes len += sprintf(iCom+len, "--nodes=%d ",numLocales); len += sprintf(iCom+len, "--ntasks=%d ", numLocales); len += sprintf(iCom+len, "--ntasks-per-node=%d ", procsPerNode); len += sprintf(iCom+len, "--cpus-per-task=%d ", getCoresPerLocale()); // request exclusive access len += sprintf(iCom+len, "--exclusive "); // kill the job if any program instance halts with non-zero exit status len += sprintf(iCom+len, "--kill-on-bad-exit "); // Set the walltime if it was specified if (walltime) { len += sprintf(iCom+len, "--time=%s ",walltime); } // Set the nodelist if it was specified if (nodelist) { len += sprintf(iCom+len, "--nodelist=%s ", nodelist); } // Set the partition if it was specified if (partition) { len += sprintf(iCom+len, "--partition=%s ", partition); } // Set the exclude list if it was specified if (exclude) { len += sprintf(iCom+len, "--exclude=%s ", exclude); } // set any constraints if (constraint) { len += sprintf(iCom+len, " --constraint=%s ", constraint); } // set the account name if one was provided if (account && strlen(account) > 0) { len += sprintf(iCom+len, "--account=%s ", account); } // add the (possibly wrapped) binary name len += sprintf(iCom+len, "%s %s ", chpl_get_real_binary_wrapper(), chpl_get_real_binary_name()); // add any arguments passed to the launcher to the binary for (i=1; i<argc; i++) { len += sprintf(iCom+len, "%s ", argv[i]); } // launch the job using srun sprintf(baseCommand, "srun %s ", iCom); } // copy baseCommand into command and return it size = strlen(baseCommand) + 1; command = chpl_mem_allocMany(size, sizeof(char), CHPL_RT_MD_COMMAND_BUFFER, -1, 0); sprintf(command, "%s", baseCommand); if (strlen(command)+1 > size) { chpl_internal_error("buffer overflow"); } return command; }
static char* chpl_launch_create_command(int argc, char* argv[], int32_t numLocales) { int i; int size; char baseCommand[256]; char* command; FILE* pbsFile, *expectFile; char* projectString = getenv(launcherAccountEnvvar); char* basenamePtr = strrchr(argv[0], '/'); pid_t mypid; if (basenamePtr == NULL) { basenamePtr = argv[0]; } else { basenamePtr++; } chpl_compute_real_binary_name(argv[0]); #ifndef DEBUG_LAUNCH mypid = getpid(); #else mypid = 0; #endif sprintf(sysFilename, "%s%d", baseSysFilename, (int)mypid); sprintf(expectFilename, "%s%d", baseExpectFilename, (int)mypid); sprintf(pbsFilename, "%s%d", basePBSFilename, (int)mypid); pbsFile = fopen(pbsFilename, "w"); fprintf(pbsFile, "#!/bin/sh\n\n"); fprintf(pbsFile, "#PBS -N Chpl-%.10s\n", basenamePtr); genNumLocalesOptions(pbsFile, determineQsubVersion(), numLocales, getNumCoresPerLocale()); if (projectString && strlen(projectString) > 0) fprintf(pbsFile, "#PBS -A %s\n", projectString); fclose(pbsFile); expectFile = fopen(expectFilename, "w"); if (verbosity < 2) { fprintf(expectFile, "log_user 0\n"); } fprintf(expectFile, "set timeout -1\n"); fprintf(expectFile, "set prompt \"(%%|#|\\\\$|>) $\"\n"); fprintf(expectFile, "spawn qsub -z "); fprintf(expectFile, "-V "); // pass through all environment variables fprintf(expectFile, "-I %s\n", pbsFilename); fprintf(expectFile, "expect -re $prompt\n"); fprintf(expectFile, "send \"cd \\$PBS_O_WORKDIR\\n\"\n"); fprintf(expectFile, "expect -re $prompt\n"); fprintf(expectFile, "send \"%s/%s/gasnetrun_ibv -n %d -N %d", CHPL_THIRD_PARTY, WRAP_TO_STR(LAUNCH_PATH), numLocales, numLocales); propagate_charset_environment(expectFile); fprintf(expectFile, " %s ", chpl_get_real_binary_name()); for (i=1; i<argc; i++) { fprintf(expectFile, " '%s'", argv[i]); } fprintf(expectFile, "\\n\"\n"); fprintf(expectFile, "interact -o -re $prompt {return}\n"); fprintf(expectFile, "send_user \"\\n\"\n"); fprintf(expectFile, "send \"exit\\n\"\n"); fclose(expectFile); sprintf(baseCommand, "expect %s", expectFilename); size = strlen(baseCommand) + 1; command = chpl_mem_allocMany(size, sizeof(char), CHPL_RT_MD_COMMAND_BUFFER, -1, 0); sprintf(command, "%s", baseCommand); if (strlen(command)+1 > size) { chpl_internal_error("buffer overflow"); } return command; }