static int get_comm_concurrency() { const char* s; int val; uint32_t lcpus; if ((s = getenv("CHPL_RT_COMM_CONCURRENCY")) != NULL && sscanf(s, "%d", &val) == 1) { if (val > 0) { return val; } else if (val == 0) { return 1; } else { chpl_warning("CHPL_RT_COMM_CONCURRENCY < 0, ignored", 0, 0); } } if ((s = getenv("CHPL_RT_NUM_HARDWARE_THREADS")) != NULL && sscanf(s, "%d", &val) == 1) { if (val > 0) { return val; } else { chpl_warning("CHPL_RT_NUM_HARDWARE_THREADS <= 0, ignored", 0, 0); } } if ((lcpus = chpl_topo_getNumCPUsLogical(true)) > 0) { return lcpus; } chpl_warning("Could not determine comm concurrency, using 1", 0, 0); return 1; }
// Get the number of locales from the environment variable or if that is not // set just use sinfo to get the number of cpus. static int getCoresPerLocale(void) { int numCores = -1; const int buflen = 1024; char buf[buflen]; char* argv[7]; char* numCoresString = getenv("CHPL_LAUNCHER_CORES_PER_LOCALE"); if (numCoresString) { numCores = atoi(numCoresString); if (numCores > 0) return numCores; chpl_warning("CHPL_LAUNCHER_CORES_PER_LOCALE must be > 0.", 0, 0); } argv[0] = (char *) "sinfo"; // use sinfo to get num cpus argv[1] = (char *) "--exact"; // get exact otherwise you get 16+, etc argv[2] = (char *) "--format=%c"; // format to get num cpu per node (%c) argv[3] = (char *) "--sort=+=#c"; // sort by num cpu (lower to higher) argv[4] = (char *) "--noheader"; // don't show header (hide "CPU" header) argv[5] = (char *) "--responding"; // only care about online nodes argv[6] = NULL; memset(buf, 0, buflen); if (chpl_run_utility1K("sinfo", argv, buf, buflen) <= 0) chpl_error("Error trying to determine number of cores per node", 0, 0); if (sscanf(buf, "%d", &numCores) != 1) chpl_error("unable to determine number of cores per locale; " "please set CHPL_LAUNCHER_CORES_PER_LOCALE", 0, 0); return numCores; }
void chpl_thread_init(int32_t numThreadsPerLocale, int32_t maxThreadsPerLocale, uint64_t callStackSize, void(*threadBeginFn)(void*), void(*threadEndFn)(void)) { // // If a value was specified for the call stack size config const, use // that (rounded up to a whole number of pages) to set the system // stack limit. // if (callStackSize != 0) { uint64_t pagesize = (uint64_t) sysconf(_SC_PAGESIZE); struct rlimit rlim; callStackSize = (callStackSize + pagesize - 1) & ~(pagesize - 1); if (getrlimit(RLIMIT_STACK, &rlim) != 0) chpl_internal_error("getrlimit() failed"); if (rlim.rlim_max != RLIM_INFINITY && callStackSize > rlim.rlim_max) { char warning[128]; sprintf(warning, "callStackSize capped at %lu\n", (unsigned long)rlim.rlim_max); chpl_warning(warning, 0, NULL); callStackSize = rlim.rlim_max; } rlim.rlim_cur = threadCallStackSize = callStackSize; if (setrlimit(RLIMIT_STACK, &rlim) != 0) chpl_internal_error("setrlimit() failed"); } }
// // Launch another thread, if it seems useful to do so and we can. // static void maybe_add_thread(void) { static chpl_bool warning_issued = false; if (!warning_issued && chpl_thread_canCreate()) { if (chpl_thread_create(NULL) == 0) { idle_thread_cnt++; } else { int32_t max_threads = chpl_thread_getMaxThreads(); uint32_t num_threads = chpl_thread_getNumThreads(); char msg[256]; if (max_threads) sprintf(msg, "max threads per locale is %" PRId32 ", but unable to create more than %d threads", max_threads, num_threads); else sprintf(msg, "max threads per locale is unbounded" ", but unable to create more than %d threads", num_threads); chpl_warning(msg, 0, 0); warning_issued = true; } } }
uint64_t chpl_memoryUsed(int32_t lineno, int32_t filename) { if (!chpl_memTrack) { chpl_warning("invalid call to memoryUsed(); rerun with --memTrack", lineno, filename); return 0; } return (uint64_t)totalMem; }
static void printMemAllocsByType(_Bool forLeaks, int32_t lineno, int32_t filename) { size_t* table; memTableEntry* me; int i; const int numberWidth = 9; const int numEntries = CHPL_RT_MD_NUM+chpl_mem_numDescs; if (!chpl_memTrack) { chpl_warning("invalid call to printMemAllocsByType(); rerun with " "--memTrack", lineno, filename); return; } table = (size_t*)sys_calloc(numEntries, 3*sizeof(size_t)); for (i = 0; i < hashSize; i++) { for (me = memTable[i]; me != NULL; me = me->nextInBucket) { table[3*me->description] += me->number*me->size; table[3*me->description+1] += 1; table[3*me->description+2] = me->description; } } qsort(table, numEntries, 3*sizeof(size_t), memTableEntryCmp); if (forLeaks) { fprintf(memLogFile, "====================\n"); fprintf(memLogFile, "Leaked Memory Report\n"); fprintf(memLogFile, "==============================================================\n"); fprintf(memLogFile, "Number of leaked allocations\n"); fprintf(memLogFile, " Total leaked memory (bytes)\n"); } else { fprintf(memLogFile, "================================\n"); fprintf(memLogFile, "Memory Allocation Report by Type\n"); fprintf(memLogFile, "==============================================================\n"); fprintf(memLogFile, "Number of allocations\n"); fprintf(memLogFile, " Total allocated bytes\n"); } fprintf(memLogFile, " Description of allocation\n"); fprintf(memLogFile, "==============================================================\n"); for (i = 0; i < 3*(CHPL_RT_MD_NUM+chpl_mem_numDescs); i += 3) { if (table[i] > 0) { fprintf(memLogFile, "%-*zu %-*zu %s\n", numberWidth, table[i+1], numberWidth, table[i], chpl_mem_descString(table[i+2])); } } fprintf(memLogFile, "==============================================================\n"); sys_free(table); }
/* ChapelArray.chpl:609 */ SingleLocaleDistribution distributed_warning(SingleLocaleDistribution d, int32_t _ln, _string _fn) { SingleLocaleDistribution T2 = NULL; chpl_bool T1; T1 = (numLocales>1); if (T1) { chpl_warning("'distributed' domains/arrays are not yet distributed across multiple locales", _ln, _fn); } T2 = d; return T2; }
static void chpl_launch_cleanup(void) { if (!debug) { if (unlink(expectFilename)) { char msg[1024]; snprintf(msg, 1024, "Error removing temporary file '%s': %s", expectFilename, strerror(errno)); chpl_warning(msg, 0, 0); } } }
// clean up the batch file static void chpl_launch_cleanup(void) { // leave file around if we're debugging if (!debug) { // remove sbatch file unless it was explicitly generated by the user if (getenv("CHPL_LAUNCHER_USE_SBATCH") != NULL && !generate_sbatch_script) { if (unlink(slurmFilename)) { char msg[1024]; snprintf(msg, 1024, "Error removing temporary file '%s': %s", slurmFilename, strerror(errno)); chpl_warning(msg, 0, 0); } } } }
int getCPUsPerCU() { int numCPUsPerCU = -1; char* numCPUsPerCUString = getenv("CHPL_LAUNCHER_CPUS_PER_CU"); if (numCPUsPerCUString) { numCPUsPerCU = atoi(numCPUsPerCUString); if (numCPUsPerCU < 0) chpl_warning("CHPL_LAUNCHER_CPUS_PER_CU set to invalid value; " "using 0 instead.", 0, 0); } if (numCPUsPerCU < 0 && strstr(CNA, "cpus_per_cu") != NULL) numCPUsPerCU = 0; return numCPUsPerCU; }
// clean up the batch file or expect file in an interactive job static void chpl_launch_cleanup(void) { // leave file around if we're debugging if (!debug) { // check if this is interactive or batch char* fileToRemove = NULL; if (getenv("CHPL_LAUNCHER_USE_SBATCH") == NULL) { fileToRemove = expectFilename; } else { fileToRemove = slurmFilename; } // actually remove file if (unlink(fileToRemove)) { char msg[1024]; snprintf(msg, 1024, "Error removing temporary file '%s': %s", fileToRemove, strerror(errno)); chpl_warning(msg, 0, 0); } } }
size_t chpl_comm_getenvMaxHeapSize(void) { char* p; static int env_checked = 0; static size_t size = 0; if (env_checked) return size; if ((p = getenv("CHPL_RT_MAX_HEAP_SIZE")) != NULL) { // // The user specified a maximum size, so start with that. // int num_scanned; char units; if ((num_scanned = sscanf(p, "%zi%c", &size, &units)) != 1) { if (num_scanned == 2 && strchr("kKmMgG", units) != NULL) { switch (units) { case 'k' : case 'K': size <<= 10; break; case 'm' : case 'M': size <<= 20; break; case 'g' : case 'G': size <<= 30; break; } } else { chpl_warning("Cannot parse CHPL_RT_MAX_HEAP_SIZE environment " "variable; assuming 1g", 0, NULL); size = ((size_t) 1) << 30; } } } env_checked = 1; return size; }
void chpl_thread_init(void(*threadBeginFn)(void*), void(*threadEndFn)(void)) { // // This threading layer does not have any inherent limit on the number // of threads. Its limit is the lesser of any limits imposed by the // comm layer and the user. // { uint32_t lim; if ((lim = chpl_task_getenvNumThreadsPerLocale()) > 0) maxThreads = lim; else if ((lim = chpl_comm_getMaxThreads()) > 0) maxThreads = lim; } // // Count the main thread on locale 0 as already existing, since it // is (or soon will be) running the main program. // if (chpl_nodeID == 0) numThreads = 1; // // If a value was specified for the call stack size config const, use // that (rounded up to a whole number of pages) to set the system and // pthread stack limits. // if (pthread_attr_init(&thread_attributes) != 0) chpl_internal_error("pthread_attr_init() failed"); // // If a value was specified for the call stack size, use that (rounded // up to a whole number of pages) to set the system and pthread stack // limits. This will in turn limit the stack for any task hosted by // either the main process or a pthread. // { size_t css; size_t pagesize = (size_t) sysconf(_SC_PAGESIZE); struct rlimit rlim; if ((css = chpl_task_getEnvCallStackSize()) == 0) css = chpl_task_getDefaultCallStackSize(); assert(css > 0); css = (css + pagesize - 1) & ~(pagesize - 1); if (getrlimit(RLIMIT_STACK, &rlim) != 0) chpl_internal_error("getrlimit() failed"); if (rlim.rlim_max != RLIM_INFINITY && css > rlim.rlim_max) { char warning[128]; sprintf(warning, "call stack size capped at %lu\n", (unsigned long)rlim.rlim_max); chpl_warning(warning, 0, 0); css = rlim.rlim_max; } rlim.rlim_cur = css; #ifndef __CYGWIN__ // // Cygwin can't do setrlimit(RLIMIT_STACK). // if (setrlimit(RLIMIT_STACK, &rlim) != 0) chpl_internal_error("setrlimit() failed"); #endif if (pthread_attr_setstacksize(&thread_attributes, css) != 0) chpl_internal_error("pthread_attr_setstacksize() failed"); } if (pthread_attr_getstacksize(&thread_attributes, &threadCallStackSize) != 0) chpl_internal_error("pthread_attr_getstacksize() failed"); saved_threadBeginFn = threadBeginFn; saved_threadEndFn = threadEndFn; CHPL_TLS_INIT(chpl_thread_id); CHPL_TLS_SET(chpl_thread_id, (intptr_t) --curr_thread_id); CHPL_TLS_INIT(chpl_thread_data); pthread_mutex_init(&thread_info_lock, NULL); pthread_mutex_init(&numThreadsLock, NULL); // // This is something of a hack, but it makes us a bit more resilient // if we're out of memory or near to it at shutdown time. Launch, // cancel, and join with an initial pthread, forcing initialization // needed by any of those activities. (In particular we have found // that cancellation needs to dlopen(3) a shared object, which fails // if we are out of memory. Doing it now means that shared object is // already available when we need it later.) // { pthread_t initial_pthread; if (!pthread_create(&initial_pthread, NULL, initial_pthread_func, NULL)) { (void) pthread_cancel(initial_pthread); (void) pthread_join(initial_pthread, NULL); } } }
int getCoresPerLocale() { int numCores = -1; char* numCoresString = getenv("CHPL_LAUNCHER_CORES_PER_LOCALE"); if (numCoresString) { numCores = atoi(numCoresString); if (numCores <= 0) chpl_warning("CHPL_LAUNCHER_CORES_PER_LOCALE set to invalid value.", 0, 0); } if (numCores > 0) return numCores; if (strstr(CNA, "numcores") != NULL) { const int buflen = 1024; char buf[buflen]; char* argv[3]; argv[0] = (char *) "cnselect"; argv[1] = (char *) "-Lnumcores"; argv[2] = NULL; memset(buf, 0, buflen); if (chpl_run_utility1K("cnselect", argv, buf, buflen) <= 0) chpl_error("Error trying to determine number of cores per node", 0, 0); if (sscanf(buf, "%d", &numCores) != 1) chpl_error("unable to determine number of cores per locale; " "please set CHPL_LAUNCHER_CORES_PER_LOCALE", 0, 0); return numCores; } if (strstr(CNA, "coremask") != NULL) { const int buflen = 1024; char buf[buflen]; char* argv[3]; argv[0] = (char *) "cnselect"; argv[1] = (char *) "-Lcoremask"; argv[2] = NULL; memset(buf, 0, buflen); if (chpl_run_utility1K("cnselect", argv, buf, buflen) <= 0) chpl_error("Error trying to determine number coremask on node", 0, 0); { int coreMask; int bitMask = 0x1; if (sscanf(buf, "%d", &coreMask) != 1) chpl_error("unable to determine coremask for locale; " "please set CHPL_LAUNCHER_CORES_PER_LOCALE", 0, 0); coreMask >>= 1; numCores = 1; while (coreMask & bitMask) { coreMask >>= 1; numCores += 1; } } return numCores; } // neither numcores nor coremask is available in this version chpl_error("Error trying to determine number of cores per node", 0, 0); return 0; }
static void printMemAllocs(chpl_mem_descInt_t description, int64_t threshold, int32_t lineno, int32_t filename) { const int numberWidth = 9; const int precision = sizeof(uintptr_t) * 2; const int addressWidth = precision+4; const int descWidth = 33; int filenameWidth = strlen("Allocated Memory (Bytes)"); int totalWidth; int filenameLength; memTableEntry* memEntry; c_string memEntryFilename; int n, i; char* loc; memTableEntry** table; if (!chpl_memTrack) { chpl_warning("invalid call to printMemAllocs(); rerun with --memTrack", lineno, filename); return; } n = 0; filenameWidth = strlen("Allocated Memory (Bytes)"); for (i = 0; i < hashSize; i++) { for (memEntry = memTable[i]; memEntry != NULL; memEntry = memEntry->nextInBucket) { size_t chunk = memEntry->number * memEntry->size; if (chunk < threshold) continue; if (description != -1 && memEntry->description != description) continue; n += 1; if (memEntry->filename) { memEntryFilename = chpl_lookupFilename(memEntry->filename); filenameLength = strlen(memEntryFilename); if (filenameLength > filenameWidth) filenameWidth = filenameLength; } } } totalWidth = filenameWidth+numberWidth*4+descWidth+20; for (i = 0; i < totalWidth; i++) fprintf(memLogFile, "="); fprintf(memLogFile, "\n"); fprintf(memLogFile, "%-*s%-*s%-*s%-*s%-*s%-*s\n", filenameWidth+numberWidth, "Allocated Memory (Bytes)", numberWidth, "Number", numberWidth, "Size", numberWidth, "Total", descWidth, "Description", 20, "Address"); for (i = 0; i < totalWidth; i++) fprintf(memLogFile, "="); fprintf(memLogFile, "\n"); table = (memTableEntry**)sys_malloc(n*sizeof(memTableEntry*)); if (!table) chpl_error("out of memory printing memory table", lineno, filename); n = 0; for (i = 0; i < hashSize; i++) { for (memEntry = memTable[i]; memEntry != NULL; memEntry = memEntry->nextInBucket) { size_t chunk = memEntry->number * memEntry->size; if (chunk < threshold) continue; if (description != -1 && memEntry->description != description) continue; table[n++] = memEntry; } } qsort(table, n, sizeof(memTableEntry*), descCmp); loc = (char*)sys_malloc((filenameWidth+numberWidth+1)*sizeof(char)); for (i = 0; i < n; i++) { memEntry = table[i]; if (memEntry->filename) { memEntryFilename = chpl_lookupFilename(memEntry->filename); sprintf(loc, "%s:%" PRId32, memEntryFilename, memEntry->lineno); } else { sprintf(loc, "--"); } fprintf(memLogFile, "%-*s%-*zu%-*zu%-*zu%-*s%#-*.*" PRIxPTR "\n", filenameWidth+numberWidth, loc, numberWidth, memEntry->number, numberWidth, memEntry->size, numberWidth, memEntry->size*memEntry->number, descWidth, chpl_mem_descString(memEntry->description), addressWidth, precision, (uintptr_t)memEntry->memAlloc); } for (i = 0; i < totalWidth; i++) fprintf(memLogFile, "="); fprintf(memLogFile, "\n"); putchar('\n'); sys_free(table); sys_free(loc); }
void chpl_printMemAllocStats(int32_t lineno, int32_t filename) { if (!chpl_memTrack) { chpl_warning("invalid call to printMemAllocStats(); rerun with --memTrack", lineno, filename); return; } // // To reduce the likelihood of corrupted output in multilocale runs, // print everything into an internal buffer just large enough to hold // it all, then send that to the memory log file in a single call. // Also in multi-locale runs, prefix each line with a node-specific // string to allow sorting the output by node ID for easier reading. // // // First, construct the line prefix. // const int nodeWidth = (int) lrint(ceil(log10((double) chpl_numNodes))); char prefixBuf[15 + nodeWidth + 1]; // room for "memStats: node N" if (chpl_numNodes == 1) { snprintf(prefixBuf, sizeof(prefixBuf), "memStats:"); } else { snprintf(prefixBuf, sizeof(prefixBuf), "memStats: node %*d", nodeWidth, chpl_nodeID); } // // Take a pre-run through the descriptions and values to figure // out how long each line will need to be. // static const struct { const char* desc; size_t* val; } descsVals[] = { { "Allocated Now:", &totalMem }, { "Allocation High Water Mark:", &maxMem }, { "Sum of Allocations:", &totalAllocated }, { "Sum of Frees:", &totalFreed }, }; const int nDescsVals = sizeof(descsVals) / sizeof(descsVals[0]); int descWidth = 0; int memWidth = 0; for (int i = 0; i < nDescsVals; i++) { const int thisDescWidth = strlen(descsVals[i].desc); if (thisDescWidth > descWidth) descWidth = thisDescWidth; const int thisMemWidth = (*descsVals[i].val == 0) ? 1 : (int) lrint(ceil(log10((double) *descsVals[i].val))); if (thisMemWidth > memWidth) memWidth = thisMemWidth; } // // Now finally, size the buffer, print the information, and send it // to the memory log file. // char buf[4 * (strlen(prefixBuf) + 1 + descWidth + 1 + memWidth + 1) + 1]; size_t len; memTrack_lock(); len = 0; for (int i = 0; i < nDescsVals; i++) { len += snprintf(buf + len, sizeof(buf) - len, "%s %-*s %*zd\n", prefixBuf, descWidth, descsVals[i].desc, memWidth, *descsVals[i].val); } memTrack_unlock(); fputs(buf, memLogFile); }
/* ChapelBase.chpl:1 */ void __init_ChapelBase(int32_t _ln, _string _fn) { chpl_bool T1; int32_t T5; chpl_bool T2; chpl_bool T3; int32_t T4; _string T6; int32_t T7; int32_t T11; chpl_bool T8; chpl_bool T9; int32_t T10; _string T12; int32_t T13; int32_t T14; chpl_bool T15; chpl_bool T16; _string T17; _string T18; _string T19; _string T20; _string T21; chpl_bool T22; _string T23; _string T24; T1 = (!__run_ChapelBase_firsttime0); if (T1) { goto _end___init_ChapelBase; } __run_ChapelBase_firsttime0 = false; T2 = _config_has_value("numLocales", "ChapelBase"); T3 = (!T2); if (T3) { T4 = _chpl_comm_default_num_locales(); T5 = T4; } else { T6 = _config_get_value("numLocales", "ChapelBase"); T7 = _string_to_int32_t(T6, _ln, _fn); T5 = T7; } numLocales = T5; T8 = _config_has_value("maxThreads", "ChapelBase"); T9 = (!T8); if (T9) { T10 = chpl_maxThreads(); T11 = T10; } else { T12 = _config_get_value("maxThreads", "ChapelBase"); T13 = _string_to_int32_t(T12, _ln, _fn); T11 = T13; } maxThreads = T11; T14 = chpl_maxThreadsLimit(); maxThreadsLimit = T14; T15 = (maxThreadsLimit!=0); if (T15) { T16 = (maxThreads>maxThreadsLimit); if (T16) { T17 = int32_t_to_string(maxThreads); T18 = string_concat("specified value of ", T17, _ln, _fn); T19 = string_concat(T18, " for maxThreads is too high; limit is ", _ln, _fn); T20 = int32_t_to_string(maxThreadsLimit); T21 = string_concat(T19, T20, _ln, _fn); chpl_warning(T21, _ln, _fn); } else { T22 = (maxThreads==0); if (T22) { T23 = int32_t_to_string(maxThreadsLimit); T24 = string_concat("maxThreads is unbounded; however, the limit is ", T23, _ln, _fn); chpl_warning(T24, _ln, _fn); } } } _end___init_ChapelBase:; return; }
// // Use this function to run short utility programs that will return less // than 1024 characters of output. The program must not expect any input. // On success, returns the number of bytes read and the output of the // command in outbuf. Returns -1 on failure. // int chpl_run_utility1K(const char *command, char *const argv[], char *outbuf, int outbuflen) { const int buflen = 1024; int curlen; char buf[buflen]; char *cur; int fdo[2], outfd; int fde[2], errfd; fd_set set; pid_t pid; int status; int rv, numRead; if (pipe(fdo) < 0) { sprintf(buf, "Unable to run '%s' (pipe failed): %s\n", command, strerror(errno)); chpl_internal_error(buf); } if (pipe(fde) < 0) { sprintf(buf, "Unable to run '%s' (pipe failed): %s\n", command, strerror(errno)); chpl_internal_error(buf); } pid = fork(); switch (pid) { case 0: // child should exit on errors close(fdo[0]); if (fdo[1] != STDOUT_FILENO) { if (dup2(fdo[1], STDOUT_FILENO) != STDOUT_FILENO) { sprintf(buf, "Unable to run '%s' (dup2 failed): %s", command, strerror(errno)); chpl_internal_error(buf); } } close(fde[0]); if (fde[1] != STDERR_FILENO) { if (dup2(fde[1], STDERR_FILENO) != STDERR_FILENO) { sprintf(buf, "Unable to run '%s' (dup2 failed): %s", command, strerror(errno)); chpl_internal_error(buf); } } execvp(command, argv); // should only return on error sprintf(buf, "Unable to run '%s': %s", command, strerror(errno)); chpl_internal_error(buf); case -1: sprintf(buf, "Unable to run '%s' (fork failed): %s", command, strerror(errno)); chpl_warning(buf, 0, 0); return -1; default: outfd = fdo[0]; errfd = fde[0]; close(fdo[1]); close(fde[1]); numRead = 0; curlen = buflen > outbuflen ? outbuflen : buflen; cur = buf; while (numRead < buflen) { struct timeval tv = { 1, 0 }; FD_ZERO(&set); FD_SET(outfd, &set); FD_SET(errfd, &set); select(errfd+1, &set, NULL, NULL, &tv); if (FD_ISSET(outfd, &set)) { rv = read(outfd, cur, buflen); if (rv == 0) { if (waitpid(pid, &status, WNOHANG) == pid) break; } else if (rv > 0) { cur += rv; numRead += rv; curlen -= rv; } else { sprintf(buf, "Unable to run '%s' (read failed): %s", command, strerror(errno)); chpl_warning(buf, 0, 0); return -1; } } if (FD_ISSET(errfd, &set)) { rv = read(errfd, cur, buflen); if (rv == 0) { if (waitpid(pid, &status, WNOHANG) == pid) break; } else if (rv > 0) { cur += rv; numRead += rv; curlen -= rv; } else { sprintf(buf, "Unable to run '%s' (read failed): %s", command, strerror(errno)); chpl_warning(buf, 0, 0); return -1; } } } if (numRead != 0) { if (strstr(buf, "internal error: ") == NULL) { memcpy(outbuf, buf, numRead); } else { // The utility program ran, but failed with an internal error // from child's branch above (dup2 or exevp) buf[numRead] = 0; chpl_warning(buf, 0, 0); return -1; } } else { sprintf(buf, "Unable to run '%s' (no bytes read)", command); chpl_warning(buf, 0, 0); return -1; } // NOTE: We don't do a waitpid() here, so the program may keep running. // That is a bad program, and I'm not going to deal with it here. } return numRead; }
static char* chpl_launch_create_command(int argc, char* argv[], int32_t numLocales) { int i; int size; char baseCommand[2*FILENAME_MAX]; char* command; FILE* slurmFile, *expectFile; char* projectString = getenv(launcherAccountEnvvar); char* constraint = getenv("CHPL_LAUNCHER_CONSTRAINT"); char* outputfn = getenv("CHPL_LAUNCHER_SLURM_OUTPUT_FILENAME"); char* basenamePtr = strrchr(argv[0], '/'); char* nodeAccessEnv = NULL; pid_t mypid; if (basenamePtr == NULL) { basenamePtr = argv[0]; } else { basenamePtr++; } chpl_compute_real_binary_name(argv[0]); // command line walltime takes precedence over env var if (!walltime) { walltime = getenv("CHPL_LAUNCHER_WALLTIME"); } // command line partition takes precedence over env var if (!partition) { partition = getenv("CHPL_LAUNCHER_PARTITION"); } // command line exclude list takes precedence over env var if (!exclude) { exclude = getenv("CHPL_LAUNCHER_EXCLUDE"); } // request exclusive node access by default, but allow user to override nodeAccessEnv = getenv("CHPL_LAUNCHER_NODE_ACCESS"); if (nodeAccessEnv == NULL || strcmp(nodeAccessEnv, "exclusive") == 0) { nodeAccessStr = "exclusive"; } else if (strcmp(nodeAccessEnv, "shared") == 0 || strcmp(nodeAccessEnv, "share") == 0 || strcmp(nodeAccessEnv, "oversubscribed") == 0 || strcmp(nodeAccessEnv, "oversubscribe") == 0) { nodeAccessStr = "share"; } else if (strcmp(nodeAccessEnv, "unset") == 0) { nodeAccessStr = NULL; } else { chpl_warning("unsupported 'CHPL_LAUNCHER_NODE_ACCESS' option", 0, 0); nodeAccessStr = "exclusive"; } if (debug) { mypid = 0; } else { mypid = getpid(); } sprintf(expectFilename, "%s%d", baseExpectFilename, (int)mypid); sprintf(slurmFilename, "%s%d", baseSBATCHFilename, (int)mypid); if (getenv("CHPL_LAUNCHER_USE_SBATCH") != NULL) { slurmFile = fopen(slurmFilename, "w"); fprintf(slurmFile, "#!/bin/sh\n\n"); fprintf(slurmFile, "#SBATCH -J Chpl-%.10s\n", basenamePtr); genNumLocalesOptions(slurmFile, determineSlurmVersion(), numLocales, getNumCoresPerLocale()); if (projectString && strlen(projectString) > 0) fprintf(slurmFile, "#SBATCH -A %s\n", projectString); if (getenv("CHPL_LAUNCHER_USE_SBATCH") != NULL) { // fprintf(slurmFile, "#SBATCH -joe\n"); if (outputfn!=NULL) fprintf(slurmFile, "#SBATCH -o %s\n", outputfn); else fprintf(slurmFile, "#SBATCH -o %s.%%j.out\n", argv[0]); // fprintf(slurmFile, "cd $SBATCH_O_WORKDIR\n"); fprintf(slurmFile, "%s/%s/gasnetrun_ibv -n %d -N %d", CHPL_THIRD_PARTY, WRAP_TO_STR(LAUNCH_PATH), numLocales, numLocales); propagate_environment(slurmFile); fprintf(slurmFile, " %s ", chpl_get_real_binary_name()); for (i=1; i<argc; i++) { fprintf(slurmFile, " '%s'", argv[i]); } fprintf(slurmFile, "\n"); } fclose(slurmFile); chmod( slurmFilename, 0755); } if (getenv("CHPL_LAUNCHER_USE_SBATCH") == NULL) { expectFile = fopen(expectFilename, "w"); if (verbosity < 2) { // fprintf(expectFile, "log_user 0\n"); } fprintf(expectFile, "set timeout -1\n"); // fprintf(expectFile, "chmod +x %s\n",slurmFilename); fprintf(expectFile, "set prompt \"(%%|#|\\\\$|>) $\"\n"); // fprintf(expectFile, "spawn sbatch "); fprintf(expectFile, "spawn -noecho salloc --quiet "); fprintf(expectFile, "-J %.10s ",basenamePtr); // pass fprintf(expectFile, "-N %d ",numLocales); fprintf(expectFile, "--ntasks-per-node=1 "); if (nodeAccessStr != NULL) fprintf(expectFile, "--%s ", nodeAccessStr); if (walltime) fprintf(expectFile, "--time=%s ",walltime); if(partition) fprintf(expectFile, "--partition=%s ",partition); if(exclude) fprintf(expectFile, "--exclude=%s ",exclude); if (constraint) { fprintf(expectFile, " -C %s", constraint); } // fprintf(expectFile, "-I %s ", slurmFilename); fprintf(expectFile, " %s/%s/gasnetrun_ibv -n %d -N %d", CHPL_THIRD_PARTY, WRAP_TO_STR(LAUNCH_PATH), numLocales, numLocales); propagate_environment(expectFile); fprintf(expectFile, " %s ", chpl_get_real_binary_name()); for (i=1; i<argc; i++) { fprintf(expectFile, " %s", argv[i]); } // fprintf(expectFile, "\\n\"\n"); fprintf(expectFile, "\n\n"); // fprintf(expectFile, "expect -re $prompt\n"); // fprintf(expectFile, "send \"cd \\$SBATCH_O_WORKDIR\\n\"\n"); // fprintf(expectFile, "expect -re $prompt\n"); // fprintf(expectFile, "sleep 10\n"); // fprintf(expectFile, "interact -o -re $prompt {return}\n"); // fprintf(expectFile, "send_user \"\\n\"\n"); // fprintf(expectFile, "send \"exit\\n\"\n"); fprintf(expectFile, "interact -o -re $prompt {return}\n"); fclose(expectFile); sprintf(baseCommand, "expect %s", expectFilename); } else { // sprintf(baseCommand, "sbatch %s\n", slurmFilename); sprintf(baseCommand, "sbatch %s\n", slurmFilename); } size = strlen(baseCommand) + 1; command = chpl_mem_allocMany(size, sizeof(char), CHPL_RT_MD_COMMAND_BUFFER, -1, 0); sprintf(command, "%s", baseCommand); if (strlen(command)+1 > size) { chpl_internal_error("buffer overflow"); } return command; }