/** * @brief Handle a PSP_ACCOUNT_CHILD message. * * This message is sent when a new child is started. * * @param msg The message to handle. * * @return No return value. */ static void handleAccountChild(DDTypedBufferMsg_t *msg) { Job_t *job; Client_t *client; PStask_ID_t logger; uid_t uid; gid_t gid; int32_t rank; size_t used = 0; /* logger's task ID */ PSP_getTypedMsgBuf(msg, &used, __func__, "logger", &logger, sizeof(logger)); /* get job information */ job = findJobByLogger(logger); if (!job) job = addJob(logger); PSP_getTypedMsgBuf(msg, &used, __func__, "rank", &rank, sizeof(rank)); PSP_getTypedMsgBuf(msg, &used, __func__, "uid", &uid, sizeof(uid)); PSP_getTypedMsgBuf(msg, &used, __func__, "gid", &gid, sizeof(gid)); client = addClient(msg->header.sender, ACC_CHILD_PSIDCHILD); bool triggerMonitor = !job->latestChildStart; job->latestChildStart = time(NULL); if (triggerMonitor) triggerJobStartMonitor(); if (!findHist(logger)) saveHist(logger); job->nrOfChilds++; client->logger = logger; client->uid = uid; client->gid = gid; client->job = job; client->rank = rank; }
void setHist(const char* name, const int lc=0, const int ls=0, const int lw=0, const int msz=0, const int mst=0, const double norm = 1, const char* xtitle = "", const char* ytitle = "") { TH1F * h; if (h=dynamic_cast<TH1F*>(findHist(name))) setHist(h,lc,ls,lw,msz,mst,norm,xtitle,ytitle); }
/** * @brief Handle a PSP_ACCOUNT_END message. * * This function will add extended accounting information to a * account end message. * * @param msg The message to handle. * * @return No return value. */ static void handleAccountEnd(DDTypedBufferMsg_t *msg) { PStask_ID_t sender = msg->header.sender, logger, childTID; PSnodes_ID_t childNode; Client_t *client; Job_t *job; pid_t child; uint64_t avgRss, avgVsize, avgThrds, dummy; size_t used = 0; mdbg(PSACC_LOG_ACC_MSG, "%s(%s)\n", __func__, PSC_printTID(sender)); PSP_getTypedMsgBuf(msg, &used, __func__, "logger", &logger, sizeof(logger)); /* end msg from logger */ if (sender == logger) { /* find the job */ job = findJobByLogger(logger); if (!job) { mlog("%s: job for logger %s not found\n", __func__, PSC_printTID(logger)); } else { job->endTime = time(NULL); job->complete = true; if (job->childsExit < job->nrOfChilds) { mdbg(PSACC_LOG_VERBOSE, "%s: logger %s exited, but %i" " children are still alive\n", __func__, PSC_printTID(logger), job->nrOfChilds - job->childsExit); } } return; } PSP_getTypedMsgBuf(msg, &used, __func__, "rank(skipped)", &dummy, sizeof(int32_t)); PSP_getTypedMsgBuf(msg, &used, __func__, "uid(skipped)", &dummy, sizeof(uid_t)); PSP_getTypedMsgBuf(msg, &used, __func__, "gid(skipped)", &dummy, sizeof(gid_t)); PSP_getTypedMsgBuf(msg, &used, __func__, "pid", &child, sizeof(child)); /* calculate childs TaskID */ childNode = PSC_getID(sender); childTID = PSC_getTID(childNode, child); /* find the child exiting */ client = findClientByTID(childTID); if (!client) { if (!findHist(logger)) { mlog("%s: end msg for unknown client %s from %s\n", __func__, PSC_printTID(childTID), PSC_printTID(sender)); } return; } if (client->type != ACC_CHILD_JOBSCRIPT && client->logger != logger) { mlog("%s: logger mismatch (%s/", __func__, PSC_printTID(logger)); mlog("%s)\n", PSC_printTID(client->logger)); } /* stop accounting of dead child */ client->doAccounting = false; client->endTime = time(NULL); PSP_getTypedMsgBuf(msg, &used, __func__, "rusage", &client->data.rusage, sizeof(client->data.rusage)); PSP_getTypedMsgBuf(msg, &used, __func__, "pageSize", &client->data.pageSize, sizeof(client->data.pageSize)); PSP_getTypedMsgBuf(msg, &used, __func__, "walltime", &client->walltime, sizeof(client->walltime)); PSP_getTypedMsgBuf(msg, &used, __func__, "status", &client->status, sizeof(client->status)); mdbg(PSACC_LOG_VERBOSE, "%s: child rank %i pid %i logger %s uid %i" " gid %i msg type %s finished\n", __func__, client->rank, child, PSC_printTID(client->logger), client->uid, client->gid, getAccountMsgType(msg->type)); if (client->type == ACC_CHILD_JOBSCRIPT) return; /* drop message */ /* Now add further information to the message */ msg->header.len = offsetof(DDTypedBufferMsg_t, buf) + used; uint32_t one = 1; PSP_putTypedMsgBuf(msg, __func__, "extended info", &one, sizeof(one)); PSP_putTypedMsgBuf(msg, __func__, "maxRss", &client->data.maxRss, sizeof(client->data.maxRss)); PSP_putTypedMsgBuf(msg, __func__, "maxVsize", &client->data.maxVsize, sizeof(client->data.maxVsize)); uint32_t myMaxThreads = client->data.maxThreads; PSP_putTypedMsgBuf(msg, __func__, "maxThreads", &myMaxThreads, sizeof(myMaxThreads)); PSP_putTypedMsgBuf(msg, __func__, "session", &client->data.session, sizeof(client->data.session)); /* add size of average used mem */ if (client->data.avgRssTotal < 1 || client->data.avgRssCount < 1) { avgRss = 0; } else { avgRss = client->data.avgRssTotal / client->data.avgRssCount; } PSP_putTypedMsgBuf(msg, __func__, "avgRss", &avgRss, sizeof(avgRss)); /* add size of average used vmem */ if (client->data.avgVsizeTotal < 1 || client->data.avgVsizeCount < 1) { avgVsize = 0; } else { avgVsize = client->data.avgVsizeTotal / client->data.avgVsizeCount; } PSP_putTypedMsgBuf(msg, __func__, "avgVsize", &avgVsize, sizeof(avgVsize)); /* add number of average threads */ if (client->data.avgThreadsTotal < 1 || client->data.avgThreadsCount < 1) { avgThrds = 0; } else { avgThrds = client->data.avgThreadsTotal / client->data.avgThreadsCount; } PSP_putTypedMsgBuf(msg, __func__, "avgThrds", &avgThrds, sizeof(avgThrds)); /* find the job */ job = findJobByLogger(client->logger); if (!job) { mlog("%s: job for child %i not found\n", __func__, child); } else { job->childsExit++; if (job->childsExit >= job->nrOfChilds) { /* all children exited */ if (globalCollectMode && PSC_getID(logger) != PSC_getMyID()) { forwardJobData(job, true); sendAggDataFinish(logger); } job->complete = true; job->endTime = time(NULL); mdbg(PSACC_LOG_VERBOSE, "%s: job complete [%i:%i]\n", __func__, job->childsExit, job->nrOfChilds); if (PSC_getID(job->logger) != PSC_getMyID()) { deleteJob(job->logger); } } } }