/** * @brief MCast callback handler * * Handle a callback from the MCast facility. The callback-type is * passed within @a msgid. @a buf might hold additional information * related to the callback. Currently two types of callback are * handled: * * - MCAST_NEW_CONNECTION: a new partner inaccessible before was detected. * * - MCAST_LOST_CONNECTION: a partner that was recently accessible * disappeared. * * @param msgid Type of callback to handle * * @param buf Buffer holding extra information related to the * callback. * * @return No return value. */ static void MCastCallBack(int msgid, void *buf) { int node; switch(msgid) { case MCAST_NEW_CONNECTION: node = *(int*)buf; PSID_log(PSID_LOG_STATUS | PSID_LOG_MCAST, "%s(MCAST_NEW_CONNECTION,%d)\n", __func__, node); if (node!=PSC_getMyID() && !PSIDnodes_isUp(node)) { if (send_DAEMONCONNECT(node)<0) { PSID_warn(PSID_LOG_STATUS, errno, "%s: send_DAEMONCONNECT()", __func__); } } break; case MCAST_LOST_CONNECTION: node = *(int*)buf; PSID_log(PSID_LOG_STATUS | PSID_LOG_MCAST, "%s(MCAST_LOST_CONNECTION,%d)\n", __func__, node); if (node != PSC_getMyID()) declareNodeDead(node, 0, 0); /* * Send CONNECT msg via RDP. This should timeout and tell RDP that * the connection is down. */ send_DAEMONCONNECT(node); break; default: PSID_log(-1, "%s(%d,%p). Unhandled message\n", __func__, msgid, buf); } }
/** * @brief RDP callback handler * * Handle a callback from the RDP facility. The callback-type is * passed within @a msgid. @a buf might hold additional information * related to the callback. Currently four types of callback are * handled: * * - RDP_NEW_CONNECTION: a new partner inaccessible before was detected. * * - RDP_LOST_CONNECTION: a partner that was recently accessible * disappeared. * * - RDP_PKT_UNDELIVERABLE: RDP was not able to deliver a packet * originating on the local node. The actual packet is passed back * within @a buf in order to create a suitable answer. * * - RDP_CAN_CONTINUE: RDP's flow control signals the possibility to * send further packets to the destination indicated in @a buf. * * @param msgid Type of callback to handle * * @param buf Buffer holding extra information related to the * callback. * * @return No return value. */ static void RDPCallBack(int msgid, void *buf) { switch(msgid) { case RDP_NEW_CONNECTION: if (! (PSID_getDaemonState() & PSID_STATE_SHUTDOWN)) { int node = *(int*)buf; PSID_log(PSID_LOG_STATUS | PSID_LOG_RDP, "%s(RDP_NEW_CONNECTION,%d)\n", __func__, node); if (node != PSC_getMyID() && !PSIDnodes_isUp(node)) { if (send_DAEMONCONNECT(node)<0) { // @todo Really necessary ? PSID_warn(PSID_LOG_STATUS, errno, "%s: send_DAEMONCONNECT()", __func__); } } } break; case RDP_PKT_UNDELIVERABLE: { DDBufferMsg_t *msg = (DDBufferMsg_t*)((RDPDeadbuf*)buf)->buf; PSID_log(PSID_LOG_RDP, "%s(RDP_PKT_UNDELIVERABLE, dest %x source %x type %s)\n", __func__, msg->header.dest, msg->header.sender, PSDaemonP_printMsg(msg->header.type)); if (PSC_getPID(msg->header.sender)) { DDMsg_t contmsg = { .type = PSP_DD_SENDCONT, .sender = msg->header.dest, .dest = msg->header.sender, .len = sizeof(DDMsg_t) }; sendMsg(&contmsg); } PSID_dropMsg(msg); break; } case RDP_LOST_CONNECTION: if (! (PSID_getDaemonState() & PSID_STATE_SHUTDOWN)) { int node = *(int*)buf; PSID_log(node != PSC_getMyID() ? PSID_LOG_STATUS|PSID_LOG_RDP : -1, "%s(RDP_LOST_CONNECTION,%d)\n", __func__, node); if (node != PSC_getMyID()) declareNodeDead(node, 1, 0); } break; case RDP_CAN_CONTINUE: { int node = *(int*)buf; flushRDPMsgs(node); break; } default: PSID_log(-1, "%s(%d,%p). Unhandled message\n", __func__, msgid, buf); } }
PStask_ID_t PSC_getTID(PSnodes_ID_t node, pid_t pid) { #ifdef __linux__ /* Linux uses PIDs smaller than 32768, thus 16 bits for pid are enough */ if (node<0) { return (((PSC_getMyID()&0xFFFF)<<16)|(pid&0xFFFF)); } else { return (((node&0xFFFF)<<16)|(pid&0xFFFF)); } #else /* Maybe we should do this on every architecture ? *JH* */ /* But this would limit us to 4096 nodes! *NE* */ /* Tru64 V5.1 use 19 bit for PID's, we reserve 20 bits */ if (node<0) { return (((PSC_getMyID()&0xFFFL)<<20)|(pid&0xFFFFFL)); } else { return (((node&0xFFFL)<<20)|(pid&0xFFFFFL)); } #endif }
/** * @brief Handle a PSP_ACCOUNT_END message. * * This function will add extended accounting information to a * account end message. * * @param msg The message to handle. * * @return No return value. */ static void handleAccountEnd(DDTypedBufferMsg_t *msg) { PStask_ID_t sender = msg->header.sender, logger, childTID; PSnodes_ID_t childNode; Client_t *client; Job_t *job; pid_t child; uint64_t avgRss, avgVsize, avgThrds, dummy; size_t used = 0; mdbg(PSACC_LOG_ACC_MSG, "%s(%s)\n", __func__, PSC_printTID(sender)); PSP_getTypedMsgBuf(msg, &used, __func__, "logger", &logger, sizeof(logger)); /* end msg from logger */ if (sender == logger) { /* find the job */ job = findJobByLogger(logger); if (!job) { mlog("%s: job for logger %s not found\n", __func__, PSC_printTID(logger)); } else { job->endTime = time(NULL); job->complete = true; if (job->childsExit < job->nrOfChilds) { mdbg(PSACC_LOG_VERBOSE, "%s: logger %s exited, but %i" " children are still alive\n", __func__, PSC_printTID(logger), job->nrOfChilds - job->childsExit); } } return; } PSP_getTypedMsgBuf(msg, &used, __func__, "rank(skipped)", &dummy, sizeof(int32_t)); PSP_getTypedMsgBuf(msg, &used, __func__, "uid(skipped)", &dummy, sizeof(uid_t)); PSP_getTypedMsgBuf(msg, &used, __func__, "gid(skipped)", &dummy, sizeof(gid_t)); PSP_getTypedMsgBuf(msg, &used, __func__, "pid", &child, sizeof(child)); /* calculate childs TaskID */ childNode = PSC_getID(sender); childTID = PSC_getTID(childNode, child); /* find the child exiting */ client = findClientByTID(childTID); if (!client) { if (!findHist(logger)) { mlog("%s: end msg for unknown client %s from %s\n", __func__, PSC_printTID(childTID), PSC_printTID(sender)); } return; } if (client->type != ACC_CHILD_JOBSCRIPT && client->logger != logger) { mlog("%s: logger mismatch (%s/", __func__, PSC_printTID(logger)); mlog("%s)\n", PSC_printTID(client->logger)); } /* stop accounting of dead child */ client->doAccounting = false; client->endTime = time(NULL); PSP_getTypedMsgBuf(msg, &used, __func__, "rusage", &client->data.rusage, sizeof(client->data.rusage)); PSP_getTypedMsgBuf(msg, &used, __func__, "pageSize", &client->data.pageSize, sizeof(client->data.pageSize)); PSP_getTypedMsgBuf(msg, &used, __func__, "walltime", &client->walltime, sizeof(client->walltime)); PSP_getTypedMsgBuf(msg, &used, __func__, "status", &client->status, sizeof(client->status)); mdbg(PSACC_LOG_VERBOSE, "%s: child rank %i pid %i logger %s uid %i" " gid %i msg type %s finished\n", __func__, client->rank, child, PSC_printTID(client->logger), client->uid, client->gid, getAccountMsgType(msg->type)); if (client->type == ACC_CHILD_JOBSCRIPT) return; /* drop message */ /* Now add further information to the message */ msg->header.len = offsetof(DDTypedBufferMsg_t, buf) + used; uint32_t one = 1; PSP_putTypedMsgBuf(msg, __func__, "extended info", &one, sizeof(one)); PSP_putTypedMsgBuf(msg, __func__, "maxRss", &client->data.maxRss, sizeof(client->data.maxRss)); PSP_putTypedMsgBuf(msg, __func__, "maxVsize", &client->data.maxVsize, sizeof(client->data.maxVsize)); uint32_t myMaxThreads = client->data.maxThreads; PSP_putTypedMsgBuf(msg, __func__, "maxThreads", &myMaxThreads, sizeof(myMaxThreads)); PSP_putTypedMsgBuf(msg, __func__, "session", &client->data.session, sizeof(client->data.session)); /* add size of average used mem */ if (client->data.avgRssTotal < 1 || client->data.avgRssCount < 1) { avgRss = 0; } else { avgRss = client->data.avgRssTotal / client->data.avgRssCount; } PSP_putTypedMsgBuf(msg, __func__, "avgRss", &avgRss, sizeof(avgRss)); /* add size of average used vmem */ if (client->data.avgVsizeTotal < 1 || client->data.avgVsizeCount < 1) { avgVsize = 0; } else { avgVsize = client->data.avgVsizeTotal / client->data.avgVsizeCount; } PSP_putTypedMsgBuf(msg, __func__, "avgVsize", &avgVsize, sizeof(avgVsize)); /* add number of average threads */ if (client->data.avgThreadsTotal < 1 || client->data.avgThreadsCount < 1) { avgThrds = 0; } else { avgThrds = client->data.avgThreadsTotal / client->data.avgThreadsCount; } PSP_putTypedMsgBuf(msg, __func__, "avgThrds", &avgThrds, sizeof(avgThrds)); /* find the job */ job = findJobByLogger(client->logger); if (!job) { mlog("%s: job for child %i not found\n", __func__, child); } else { job->childsExit++; if (job->childsExit >= job->nrOfChilds) { /* all children exited */ if (globalCollectMode && PSC_getID(logger) != PSC_getMyID()) { forwardJobData(job, true); sendAggDataFinish(logger); } job->complete = true; job->endTime = time(NULL); mdbg(PSACC_LOG_VERBOSE, "%s: job complete [%i:%i]\n", __func__, job->childsExit, job->nrOfChilds); if (PSC_getID(job->logger) != PSC_getMyID()) { deleteJob(job->logger); } } } }
case PSP_ACCOUNT_FORWARD_END: mlog("%s: got obsolete msg %i\n", __func__, msg->type); break; default: mlog("%s: unknown msg type %i received form %s\n", __func__, msg->type, PSC_printTID(msg->header.sender)); } } int switchAccounting(PStask_ID_t clientTID, bool enable) { DDTypedBufferMsg_t msg = (DDTypedBufferMsg_t) { .header = (DDMsg_t) { .type = PSP_CC_PLUG_ACCOUNT, .sender = PSC_getMyTID(), .dest = PSC_getTID(PSC_getMyID(), 0), .len = sizeof(msg.header) + sizeof(msg.type)}, .type = enable ? PSP_ACCOUNT_ENABLE_UPDATE : PSP_ACCOUNT_DISABLE_UPDATE, .buf = {'\0'} }; /* send the messages */ PSP_putTypedMsgBuf(&msg, __func__, "client", &clientTID, sizeof(clientTID)); return doWriteP(daemonSock, &msg, msg.header.len); } static int setDaemonSock(void *dsock) { daemonSock = *(int *)dsock; return 0;
void doServer(void) { int forward_id, numClients; ps_send_info_t sinfo; FILE *input = NULL; PSnodes_ID_t node; PSP_PortH_t porth; char *buf, *buf2; buf = malloc(arg_maxmsize); if (!buf) { perror("malloc(buf)"); exit(1); } buf2 = malloc(arg_maxmsize); if (!buf2) { perror("malloc(buf2)"); exit(1); } input = stdin; if (arg_ifile) { input = fopen(arg_ifile, "r"); if (!input) { fprintf(stderr, "Cant open file '%s' for reading : %s\n", arg_ifile, strerror(errno)); exit(1); } } else if (arg_icmd) { input = popen(arg_icmd, "r"); if (!input) { fprintf(stderr, "Cant start input command '%s' : %s\n", arg_icmd, strerror(errno)); exit(1); } } porth = start_server(); if (arg_manual) { numClients = arg_manual; } else { int clientRank = 1; if (arg_hosts) { nodeList = getNLFromHosts(arg_hosts); } else if (arg_nodes) { nodeList = getNLFromNodes(arg_nodes); } else { nodeList = getNLFromNodes("all"); } if (!nodeList && !arg_manual) { fprintf(stderr, "Unknown clients\n"); exit(1); } /* Start clients */ for (node=0; node<PSC_getNrOfNodes(); node++) { if (node == PSC_getMyID()) continue; if (nodeList[node]) { int ret = PSE_spawnAdmin(node, clientRank, rem_argc, rem_argv, 0); if (!ret) clientRank++; } } numClients = clientRank - 1; if (arg_verbose) fprintf(stderr, "Distribute to %d clients\n", numClients); } forward_id = assign_clients(porth, numClients); PSP_StopListen(porth); ps_send_info_init(&sinfo, porth, forward_id); stat_time_start = getusec(); // read from stdin, forward to forward_id while (1) { int len; char *tmp; len = read(fileno(input), buf, arg_maxmsize); if (len <= 0) break; ps_send(&sinfo, buf, len); // swap buffers (ps_send use PSP_ISend. We can read more // data, while we transmit the old data.) tmp = buf; buf = buf2; buf2 = tmp; if (timer_called) { print_stat(arg_cp ? 1 : 0); timer_called = 0; } } if (arg_ifile) { fclose(input); } else if (arg_icmd) { pclose(input); } // Send eof: ps_send(&sinfo, NULL, 0); ps_send_close(&sinfo); free(buf); free(buf2); }
int main(int argc, const char *argv[]) { poptContext optCon; /* context for parsing command-line options */ int rc, version = 0, debugMask = 0, pipeFD[2] = {-1, -1}, magic = FORKMAGIC; char *logdest = NULL, *configfile = "/etc/parastation.conf"; FILE *logfile = NULL; struct poptOption optionsTable[] = { { "debug", 'd', POPT_ARG_INT, &debugMask, 0, "enable debugging with mask <mask>", "mask"}, { "configfile", 'f', POPT_ARG_STRING, &configfile, 0, "use <file> as config-file (default is /etc/parastation.conf)", "file"}, { "logfile", 'l', POPT_ARG_STRING, &logdest, 0, "use <file> for logging (default is syslog(3))." " <file> may be 'stderr' or 'stdout'", "file"}, { "version", 'v', POPT_ARG_NONE, &version, 0, "output version information and exit", NULL}, POPT_AUTOHELP { NULL, '\0', 0, NULL, 0, NULL, NULL} }; optCon = poptGetContext(NULL, argc, argv, optionsTable, 0); rc = poptGetNextOpt(optCon); /* Store arguments for later modification in forwarders, etc. */ PSID_argc = argc; PSID_argv = argv; if (version) { printVersion(); return 0; } if (logdest) { if (strcasecmp(logdest, "stderr")==0) { logfile = stderr; } else if (strcasecmp(logdest, "stdout")==0) { logfile = stdout; } else { logfile = fopen(logdest, "a+"); if (!logfile) { char *errstr = strerror(errno); fprintf(stderr, "Cannot open logfile '%s': %s\n", logdest, errstr ? errstr : "UNKNOWN"); exit(1); } } } if (!logfile) { openlog("psid", LOG_PID|LOG_CONS, LOG_DAEMON); } PSID_initLogs(logfile); printWelcome(); if (rc < -1) { /* an error occurred during option processing */ poptPrintUsage(optCon, stderr, 0); PSID_log(-1, "%s: %s\n", poptBadOption(optCon, POPT_BADOPTION_NOALIAS), poptStrerror(rc)); if (!logfile) fprintf(stderr, "%s: %s\n", poptBadOption(optCon, POPT_BADOPTION_NOALIAS), poptStrerror(rc)); return 1; } /* Save some space in order to modify the cmdline later on */ PSC_saveTitleSpace(PSID_argc, PSID_argv, 1); if (logfile!=stderr && logfile!=stdout) { /* Daemonize only if neither stdout nor stderr is used for logging */ if (pipe(pipeFD) < 0) { PSID_exit(errno, "unable to create pipe"); } /* Start as daemon */ switch (fork()) { case -1: PSID_exit(errno, "unable to fork server process"); break; case 0: /* I'm the child (and running further) */ close (pipeFD[0]); break; default: /* I'm the parent and exiting */ close (pipeFD[1]); /* Wait for child's magic data */ rc = read(pipeFD[0], &magic, sizeof(magic)); if (rc != sizeof(magic) || magic != (FORKMAGIC)) return -1; return 0; } } #define _PATH_TTY "/dev/tty" /* First disconnect from the old controlling tty. */ { int fd = open(_PATH_TTY, O_RDWR | O_NOCTTY); if (fd >= 0) { if (ioctl(fd, TIOCNOTTY, NULL)) { PSID_warn(-1, errno, "%s: ioctl(TIOCNOTTY)", __func__); } close(fd); } } /* * Disable stdin,stdout,stderr and install dummy replacement * Take care if stdout/stderr is used for logging */ { int dummy_fd; dummy_fd=open("/dev/null", O_WRONLY , 0); dup2(dummy_fd, STDIN_FILENO); if (logfile!=stdout) dup2(dummy_fd, STDOUT_FILENO); if (logfile!=stderr) dup2(dummy_fd, STDERR_FILENO); close(dummy_fd); } /* Forget about inherited window sizes */ unsetenv("LINES"); unsetenv("COLUMNS"); if (debugMask) { PSID_setDebugMask(debugMask); PSC_setDebugMask(debugMask); PSID_log(-1, "Debugging mode (mask 0x%x) enabled\n", debugMask); } /* Init the Selector facility as soon as possible */ if (!Selector_isInitialized()) Selector_init(logfile); PSID_registerLoopAct(Selector_gc); /* Initialize timer facility explicitely to ensure correct logging */ if (!Timer_isInitialized()) Timer_init(logfile); /* * Create the Local Service Port as early as possible. Actual * handling is enabled later. This gives psiadmin the chance to * connect. Additionally, this will guarantee exclusiveness */ PSID_createMasterSock(PSmasterSocketName); PSID_checkMaxPID(); /* read the config file */ PSID_readConfigFile(logfile, configfile); /* Now we can rely on the config structure */ { in_addr_t addr; PSID_log(-1, "My ID is %d\n", PSC_getMyID()); addr = PSIDnodes_getAddr(PSC_getMyID()); PSID_log(-1, "My IP is %s\n", inet_ntoa(*(struct in_addr *) &addr)); } if (!logfile && PSID_config->logDest!=LOG_DAEMON) { PSID_log(-1, "Changing logging dest from LOG_DAEMON to %s\n", PSID_config->logDest==LOG_KERN ? "LOG_KERN": PSID_config->logDest==LOG_LOCAL0 ? "LOG_LOCAL0" : PSID_config->logDest==LOG_LOCAL1 ? "LOG_LOCAL1" : PSID_config->logDest==LOG_LOCAL2 ? "LOG_LOCAL2" : PSID_config->logDest==LOG_LOCAL3 ? "LOG_LOCAL3" : PSID_config->logDest==LOG_LOCAL4 ? "LOG_LOCAL4" : PSID_config->logDest==LOG_LOCAL5 ? "LOG_LOCAL5" : PSID_config->logDest==LOG_LOCAL6 ? "LOG_LOCAL6" : PSID_config->logDest==LOG_LOCAL7 ? "LOG_LOCAL7" : "UNKNOWN"); closelog(); openlog("psid", LOG_PID|LOG_CONS, PSID_config->logDest); printWelcome(); } /* call startupScript, if any */ if (PSID_config->startupScript && *PSID_config->startupScript) { int ret = PSID_execScript(PSID_config->startupScript, NULL, NULL, NULL); if (ret > 1) { PSID_log(-1, "startup script '%s' failed. Exiting...\n", PSID_config->startupScript); PSID_finalizeLogs(); exit(1); } } /* Setup handling of signals */ initSigHandlers(); if (PSID_config->coreDir) { if (chdir(PSID_config->coreDir) < 0) { PSID_warn(-1, errno, "Unable to chdir() to coreDirectory '%s'", PSID_config->coreDir); } } PSIDnodes_setProtoV(PSC_getMyID(), PSProtocolVersion); PSIDnodes_setDmnProtoV(PSC_getMyID(), PSDaemonProtocolVersion); PSIDnodes_setHWStatus(PSC_getMyID(), 0); PSIDnodes_setKillDelay(PSC_getMyID(), PSID_config->killDelay); PSIDnodes_setAcctPollI(PSC_getMyID(), PSID_config->acctPollInterval); /* Bring node up with correct numbers of CPUs */ declareNodeAlive(PSC_getMyID(), PSID_getPhysCPUs(), PSID_getVirtCPUs()); /* Initialize timeouts, etc. */ PSID_initStarttime(); /* initialize various modules */ PSIDcomm_init(); /* This has to be first since it gives msgHandler hash */ PSIDclient_init(); initState(); initOptions(); initStatus(); initSignal(); PSIDspawn_init(); initPartition(); initHW(); initAccount(); initInfo(); initEnvironment(); /* Plugins shall be last since they use most of the ones before */ initPlugins(); /* Now we start all the hardware -- this might include the accounter */ PSID_log(PSID_LOG_HW, "%s: starting up the hardware\n", __func__); PSID_startAllHW(); /* * Prepare hostlist to initialize RDP and MCast */ { in_addr_t *hostlist; int i; hostlist = malloc(PSC_getNrOfNodes() * sizeof(unsigned int)); if (!hostlist) { PSID_exit(errno, "Failed to get memory for hostlist"); } for (i=0; i<PSC_getNrOfNodes(); i++) { hostlist[i] = PSIDnodes_getAddr(i); } if (PSID_config->useMCast) { /* Initialize MCast */ int MCastSock = initMCast(PSC_getNrOfNodes(), PSID_config->MCastGroup, PSID_config->MCastPort, logfile, hostlist, PSC_getMyID(), MCastCallBack); if (MCastSock<0) { PSID_exit(errno, "Error while trying initMCast"); } setDeadLimitMCast(PSID_config->deadInterval); PSID_log(-1, "MCast and "); } else { setStatusTimeout(PSID_config->statusTimeout); setMaxStatBCast(PSID_config->statusBroadcasts); setDeadLimit(PSID_config->deadLimit); setTmOutRDP(PSID_config->RDPTimeout); } /* Initialize RDP */ RDPSocket = RDP_init(PSC_getNrOfNodes(), PSIDnodes_getAddr(PSC_getMyID()), PSID_config->RDPPort, logfile, hostlist, PSIDRDP_handleMsg, RDPCallBack); if (RDPSocket<0) { PSID_exit(errno, "Error while trying initRDP"); } PSID_log(-1, "RDP (%d) initialized.\n", RDPSocket); free(hostlist); } /* Now start to listen for clients */ PSID_enableMasterSock(); /* Once RDP and the master socket are ready parents might be released */ if (pipeFD[1] > -1) { if (write(pipeFD[1], &magic, sizeof(magic)) <= 0) { /* We don't care */ } close(pipeFD[1]); } PSID_log(-1, "SelectTime=%d sec DeadInterval=%d\n", PSID_config->selectTime, PSID_config->deadInterval); /* Trigger status stuff, if necessary */ if (PSID_config->useMCast) { declareMaster(PSC_getMyID()); } else { int id = 0; while (id < PSC_getMyID() && (send_DAEMONCONNECT(id) < 0 && errno == EHOSTUNREACH)) { id++; } if (id == PSC_getMyID()) declareMaster(id); } /* * Main loop */ while (1) { int res = Swait(PSID_config->selectTime * 1000); if (res < 0) PSID_warn(-1, errno, "Error while Swait()"); /* Handle actions registered to main-loop */ PSID_handleLoopActions(); } }
/** * @brief Do memory binding. * * This is handling the binding types map_mem, mask_mem and rank. * The types local (default) and none are handled directly by the deamon. * * When using libnuma with API v1, this is a noop, just giving a warning. * * @param step Step structure * @param task Task structure * * @return No return value. */ void doMemBind(Step_t *step, PStask_t *task) { # ifndef HAVE_NUMA_ALLOCATE_NODEMASK mlog("%s: psslurm does not support memory binding types map_mem, mask_mem" " and rank with libnuma v1\n", __func__); fprintf(stderr, "Memory binding type not supported with used libnuma" " version"); return; # else const char delimiters[] = ","; uint32_t lTID; char *next, *saveptr, *ents, *myent, *endptr; char **entarray; unsigned int numents; uint16_t mynode; struct bitmask *nodemask = NULL; if (!(step->memBindType & MEM_BIND_MAP) && !(step->memBindType & MEM_BIND_MASK) && !(step->memBindType & MEM_BIND_RANK)) { /* things are handled elsewhere */ return; } if (!PSIDnodes_bindMem(PSC_getMyID()) || getenv("__PSI_NO_MEMBIND")) { // info messages already printed in doClamps() return; } if (numa_available()==-1) { fprintf(stderr, "NUMA not available:"); return; } nodemask = numa_allocate_nodemask(); if (!nodemask) { fprintf(stderr, "Allocation of nodemask failed:"); return; } lTID = getLocalRankID(task->rank, step, step->localNodeId); if (step->memBindType & MEM_BIND_RANK) { if (lTID > (unsigned int)numa_max_node()) { mlog("%s: memory binding to ranks not possible for rank %d." " (local rank %d > #numa_nodes %d)\n", __func__, task->rank, lTID, numa_max_node()); fprintf(stderr, "Memory binding to ranks not possible for rank %d," " local rank %u larger than max numa node %d.", task->rank, lTID, numa_max_node()); if (nodemask) numa_free_nodemask(nodemask); return; } if (numa_bitmask_isbitset(numa_get_mems_allowed(), lTID)) { numa_bitmask_setbit(nodemask, lTID); } else { mlog("%s: setting bit %d in memory mask not allowed in rank" " %d\n", __func__, lTID, task->rank); fprintf(stderr, "Not allowed to set bit %u in memory mask" " of rank %d\n", lTID, task->rank); } numa_set_membind(nodemask); if (nodemask) numa_free_nodemask(nodemask); return; } ents = ustrdup(step->memBind); entarray = umalloc(step->tasksToLaunch[step->localNodeId] * sizeof(char*)); numents = 0; myent = NULL; entarray[0] = NULL; next = strtok_r(ents, delimiters, &saveptr); while (next && (numents < step->tasksToLaunch[step->localNodeId])) { entarray[numents++] = next; if (numents == lTID+1) { myent = next; break; } next = strtok_r(NULL, delimiters, &saveptr); } if (!myent && numents) { myent = entarray[lTID % numents]; } if (!myent) { numa_set_membind(numa_all_nodes_ptr); if (step->memBindType & MEM_BIND_MASK) { mlog("%s: invalid mem mask string '%s'\n", __func__, ents); } else if (step->memBindType & MEM_BIND_MAP) { mlog("%s: invalid mem map string '%s'\n", __func__, ents); } goto cleanup; } if (step->memBindType & MEM_BIND_MAP) { if (strncmp(myent, "0x", 2) == 0) { mynode = strtoul (myent+2, &endptr, 16); } else { mynode = strtoul (myent, &endptr, 10); } if (*endptr == '\0' && mynode <= numa_max_node()) { if (numa_bitmask_isbitset(numa_get_mems_allowed(), mynode)) { numa_bitmask_setbit(nodemask, mynode); } else { mlog("%s: setting bit %d in memory mask not allowed in rank" " %d\n", __func__, mynode, task->rank); fprintf(stderr, "Not allowed to set bit %d in memory mask" " of rank %d\n", mynode, task->rank); } } else { mlog("%s: invalid memory map entry '%s' (%d) for rank %d\n", __func__, myent, mynode, task->rank); fprintf(stderr, "Invalid memory map entry '%s' for rank %d\n", myent, task->rank); numa_set_membind(numa_all_nodes_ptr); goto cleanup; } mdbg(PSSLURM_LOG_PART, "%s: (bind_map) node %i local task %i" " memstr '%s'\n", __func__, step->localNodeId, lTID, myent); } else if (step->memBindType & MEM_BIND_MASK) { parseNUMAmask(nodemask, myent, task->rank); } numa_set_membind(nodemask); cleanup: ufree(ents); ufree(entarray); if (nodemask) numa_free_nodemask(nodemask); # endif return; }