int pt_dispatch_proc_to_job( lnk_link_t *job_list, int time_stamp, time_t last_time ) { char procnam[128]; int fd = -1; #if defined(LINUX) char buffer[BIGLINE]; lListElem *pr = NULL; SGE_STRUCT_STAT fst; unsigned long utime, stime, vsize, pid; int pos_pid = lGetPosInDescr(PRO_Type, PRO_pid); int pos_utime = lGetPosInDescr(PRO_Type, PRO_utime); int pos_stime = lGetPosInDescr(PRO_Type, PRO_stime); int pos_vsize = lGetPosInDescr(PRO_Type, PRO_vsize); int pos_groups = lGetPosInDescr(PRO_Type, PRO_groups); int pos_rel = lGetPosInDescr(PRO_Type, PRO_rel); int pos_run = lGetPosInDescr(PRO_Type, PRO_run); int pos_io = lGetPosInDescr(PRO_Type, PRO_io); int pos_group = lGetPosInDescr(GR_Type, GR_group); #else prstatus_t pr; prpsinfo_t pri; #endif #if defined(SOLARIS) || defined(ALPHA) prcred_t proc_cred; #endif int ret; u_long32 max_groups; gid_t *list; int groups=0; int pid_tmp; proc_elem_t *proc_elem = NULL; job_elem_t *job_elem = NULL; lnk_link_t *curr; double old_time = 0; uint64 old_vmem = 0; DENTER(TOP_LAYER, "pt_dispatch_proc_to_job"); max_groups = sge_sysconf(SGE_SYSCONF_NGROUPS_MAX); if (max_groups <= 0) { ERROR((SGE_EVENT, SFNMAX, MSG_SGE_NGROUPS_MAXOSRECONFIGURATIONNECESSARY)); DEXIT; return 1; } list = (gid_t*) malloc(max_groups*sizeof(gid_t)); if (list == NULL) { ERROR((SGE_EVENT, SFNMAX, MSG_SGE_PTDISPATCHPROCTOJOBMALLOCFAILED)); DEXIT; return 1; } /* find next valid entry in procfs */ while ((dent = readdir(cwd))) { char *pidname; if (!dent->d_name) continue; if (!dent->d_name[0]) continue; if (!strcmp(dent->d_name, "..") || !strcmp(dent->d_name, ".")) continue; if (dent->d_name[0] == '.') pidname = &dent->d_name[1]; else pidname = dent->d_name; if (atoi(pidname) == 0) continue; #if defined(LINUX) /* check only processes which belongs to a GE job */ if ((pr = get_pr(atoi(pidname))) != NULL) { /* set process as still running */ lSetPosBool(pr, pos_run, true); if (lGetPosBool(pr, pos_rel) != true) { continue; } } sprintf(procnam, PROC_DIR "/%s/stat", dent->d_name); if (SGE_STAT(procnam, &fst)) { if (errno != ENOENT) { #ifdef MONITOR_PDC INFO((SGE_EVENT, "could not stat %s: %s\n", procnam, strerror(errno))); #endif touch_time_stamp(dent->d_name, time_stamp, job_list); } continue; } /* TODO (SH): This does not work with Linux 2.6. I'm looking for a workaround. * If the stat file was not changed since our last parsing there is no need to do it again */ /*if (pr == NULL || fst.st_mtime > last_time) {*/ { #else sprintf(procnam, "%s/%s", PROC_DIR, dent->d_name); #endif if ((fd = open(procnam, O_RDONLY, 0)) == -1) { if (errno != ENOENT) { #ifdef MONITOR_PDC if (errno == EACCES) INFO((SGE_EVENT, "(uid:"gid_t_fmt" euid:"gid_t_fmt") could not open %s: %s\n", getuid(), geteuid(), procnam, strerror(errno))); else INFO((SGE_EVENT, "could not open %s: %s\n", procnam, strerror(errno))); #endif touch_time_stamp(dent->d_name, time_stamp, job_list); } continue; } /** ** get a list of supplementary group ids to decide ** whether this process will be needed; ** read also prstatus **/ # if defined(LINUX) /* * Read the line and append a 0-Byte */ if ((ret = read(fd, buffer, BIGLINE-1))<=0) { close(fd); if (ret == -1 && errno != ENOENT) { #ifdef MONITOR_PDC INFO((SGE_EVENT, "could not read %s: %s\n", procnam, strerror(errno))); #endif touch_time_stamp(dent->d_name, time_stamp, job_list); } continue; } buffer[BIGLINE-1] = '\0'; /* * get prstatus */ ret = sscanf(buffer, "%lu %*s %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %lu %lu %*d %*d %*d %*d %*d %*d %*u %lu", &pid, &utime, &stime, &vsize); if (ret != 4) { close(fd); continue; } if (pr == NULL) { pr = lCreateElem(PRO_Type); lSetPosUlong(pr, pos_pid, pid); lSetPosBool(pr, pos_rel, false); append_pr(pr); } lSetPosUlong(pr, pos_utime, utime); lSetPosUlong(pr, pos_stime, stime); lSetPosUlong(pr, pos_vsize, vsize); close(fd); } /* mark this proc as running */ lSetPosBool(pr, pos_run, true); /* * get number of groups; * get list of supplementary groups */ { char procnam[256]; lList *groupTable = lGetPosList(pr, pos_groups); sprintf(procnam, PROC_DIR "/%s/status", dent->d_name); if (SGE_STAT(procnam, &fst) != 0) { if (errno != ENOENT) { #ifdef MONITOR_PDC INFO((SGE_EVENT, "could not stat %s: %s\n", procnam, strerror(errno))); #endif touch_time_stamp(dent->d_name, time_stamp, job_list); } continue; } groups = 0; if (fst.st_mtime < last_time && groupTable != NULL) { lListElem *group; for_each(group, groupTable) { list[groups] = lGetPosUlong(group, pos_group); groups++; } } else {
/****** cull/dump_scan/lDumpElemFp() ****************************************** * NAME * lDumpElemFp() -- Dump a given element into FILE stream * * SYNOPSIS * int lDumpElemFp(FILE *fp, const lListElem *ep, int indent) * * FUNCTION * Dump a given element into FILE stream * * INPUTS * FILE *fp - file stream * const lListElem *ep - element * int indent - * * RESULT * int - error state * 0 - OK * -1 - Error * * NOTES * MT-NOTE: lDumpElemFp() is not MT safe ******************************************************************************/ int lDumpElemFp(FILE *fp, const lListElem *ep, int indent) { int i, ret = ~EOF; lList *tlp; lListElem *tep; char space[256]; const char *str; dstring dstr = DSTRING_INIT; DENTER(CULL_LAYER, "lDumpElemFp"); space[0] = '\0'; for (i = 0; i < indent; i++) strcat(space, INDENT_STRING); if (!fp) { LERROR(LEFILENULL); DEXIT; return -1; } if (!ep) { LERROR(LEELEMNULL); DEXIT; return -1; } ret = fprintf(fp, "%s{ \n", space); for (i = 0, ret = 0; ep->descr[i].nm != NoName && ret != EOF; i++) { char *tok = NULL; switch (mt_get_type(ep->descr[i].mt)) { case lIntT: ret = fprintf(fp, "%s/* %-20.20s */ %d\n", space, lNm2Str(ep->descr[i].nm), lGetPosInt(ep, i)); break; case lUlongT: ret = fprintf(fp, "%s/* %-20.20s */ " sge_u32 "\n", space, lNm2Str(ep->descr[i].nm), lGetPosUlong(ep, i)); break; case lStringT: str = lGetPosString(ep, i); /* quote " inside str */ if ((tok = sge_strtok(str, "\"")) != NULL) { sge_dstring_append(&dstr, tok); while ((tok=sge_strtok(NULL, "\"")) != NULL) { sge_dstring_append(&dstr, "\\\""); sge_dstring_append(&dstr, tok); } } str = sge_dstring_get_string(&dstr); ret = fprintf(fp, "%s/* %-20.20s */ \"%s\"\n", space, lNm2Str(ep->descr[i].nm), str != NULL ? str : ""); sge_dstring_clear(&dstr); break; case lHostT: str = lGetPosHost(ep, i); ret = fprintf(fp, "%s/* %-20.20s */ \"%s\"\n", space, lNm2Str(ep->descr[i].nm), str != NULL ? str : ""); break; case lFloatT: ret = fprintf(fp, "%s/* %-20.20s */ %f\n", space, lNm2Str(ep->descr[i].nm), lGetPosFloat(ep, i)); break; case lDoubleT: ret = fprintf(fp, "%s/* %-20.20s */ %f\n", space, lNm2Str(ep->descr[i].nm), lGetPosDouble(ep, i)); break; case lLongT: ret = fprintf(fp, "%s/* %-20.20s */%ld \n", space, lNm2Str(ep->descr[i].nm), lGetPosLong(ep, i)); break; case lCharT: ret = fprintf(fp, "%s/* %-20.20s */ %c\n", space, lNm2Str(ep->descr[i].nm), lGetPosChar(ep, i)); break; case lBoolT: ret = fprintf(fp, "%s/* %-20.20s */ %d\n", space, lNm2Str(ep->descr[i].nm), lGetPosBool(ep, i)); break; case lRefT: ret = fprintf(fp, "%s/* %-20.20s */ %ld\n", space, lNm2Str(ep->descr[i].nm), (long)lGetPosRef(ep, i)); break; case lObjectT: if ((tep = lGetPosObject(ep, i)) == NULL) ret = fprintf(fp, "%s/* %-20.20s */ none\n", space, lNm2Str(ep->descr[i].nm)); else { ret = fprintf(fp, "%s/* %-20.20s */ object\n", space, lNm2Str(ep->descr[i].nm)); if (ret != EOF) ret = lDumpObject(fp, tep, indent + 1); } break; case lListT: if ((tlp = lGetPosList(ep, i)) == NULL) ret = fprintf(fp, "%s/* %-20.20s */ empty\n", space, lNm2Str(ep->descr[i].nm)); else { ret = fprintf(fp, "%s/* %-20.20s */ full\n", space, lNm2Str(ep->descr[i].nm)); if (ret != EOF) ret = lDumpList(fp, tlp, indent + 1); } break; } } sge_dstring_free(&dstr); ret = fprintf(fp, "%s}\n", space); DEXIT; return (ret == EOF) ? -1 : 0; }