static bool SplitProcLine(const char *line, time_t pstime, char **names, int *start, int *end, PsColumnAlgorithm pca, char **fields) { if (line == NULL || line[0] == '\0') { return false; } size_t linelen = strlen(line); if (LogGetGlobalLevel() >= LOG_LEVEL_DEBUG) { Log(LOG_LEVEL_DEBUG, "Parsing ps line: '%s'", line); // Makes the entry line up with the line above. PrintStringIndexLine(18, linelen); } /* All platforms have been verified to not produce overlapping fields with currently used ps tools, and hence we can parse based on space separation (with some caveats, see below). Dates may have spaces in them, like "May 4", or not, like "May4". Prefer to match a date without spaces as long as it contains a number, but fall back to parsing letters followed by space(s) and a date. Commands will also have extra spaces, but it is always the last field, so we just include spaces at this point in the parsing. An additional complication is that some platforms (only AIX is known at the time of writing) can have empty columns when a process is a zombie. The plan is to match for this by checking the range between start and end directly below the header (same byte position). If the whole range is whitespace we consider the entry missing. The columns are (presumably) guaranteed to line up correctly for this, since zombie processes do not have memory usage which can produce large, potentially alignment-altering numbers. However, we cannot do this whitespace check in general, because non-zombie processes may shift columns in a way that leaves some columns apparently (but not actually) empty. Take these two examples: AIX: USER PID PPID PGID %CPU %MEM VSZ NI ST STIME TIME COMMAND jenkins 1036484 643150 1036484 0.0 0.0 584 20 A 09:29:20 00:00:00 bash 254232 729146 729146 20 Z 00:00:00 <defunct> Solaris 9: USER PID %CPU %MEM SZ RSS TT S STIME TIME COMMAND jenkins 29769 0.0 0.0 810 2976 pts/1 S 07:22:43 0:00 /usr/bin/perl ../../ps.pl jenkins 29835 - - 0 0 ? Z - 0:00 <defunct> jenkins 10026 0.0 0.3 30927 143632 ? S Jan_21 01:18:58 /usr/jdk/jdk1.6.0_45/bin/java -jar slave.jar Due to how the process state 'S' is shifted under the 'S' header in the second example, it is not possible to separate between this and a missing column. Counting spaces is no good, because commands can contain an arbitrary number of spaces, and there is no way to know the byte position where a command begins. Hence the only way is to base this algorithm on platform and only do the "empty column detection" when: * The platform is known to produce empty columns for zombie processes (see PCA_ZombieSkipEmptyColumns) * The platform is known to not shift columns when the process is a zombie. * The process is a zombie. */ bool zombie = false; if (pca == PCA_ZombieSkipEmptyColumns) { // Find out if the process is a zombie. for (int field = 0; names[field]; field++) { if (strcmp(names[field], "S") == 0 || strcmp(names[field], "ST") == 0) { // Check for zombie state. for (int pos = start[field]; pos <= end[field] && pos < linelen; pos++) { // 'Z' letter with word boundary on each side. if (isspace(line[pos - 1]) && line[pos] == 'Z' && (isspace(line[pos + 1]) || line[pos + 1] == '\0')) { Log(LOG_LEVEL_DEBUG, "Detected zombie process, " "skipping parsing of empty ps fields."); zombie = true; } } break; } } } int field = 0; int pos = 0; while (names[field]) { // Some sanity checks. if (pos >= linelen) { if (pca == PCA_ZombieSkipEmptyColumns && zombie) { Log(LOG_LEVEL_DEBUG, "Assuming '%s' field is empty, " "since ps line '%s' is not long enough to reach under its " "header.", names[field], line); fields[field] = xstrdup(""); field++; continue; } else { Log(LOG_LEVEL_ERR, "ps output line '%s' is shorter than its " "associated header.", line); return false; } } bool cmd = (strcmp(names[field], "CMD") == 0 || strcmp(names[field], "COMMAND") == 0); bool stime = !cmd && (strcmp(names[field], "STIME") == 0); // Equal boolean results, either both must be true, or both must be // false. IOW we must either both be at the last field, and it must be // CMD, or none of those. | // v if ((names[field + 1] != NULL) == cmd) { Log(LOG_LEVEL_ERR, "Last field of ps output '%s' is not " "CMD/COMMAND.", line); return false; } // If zombie, check if field is empty. if (pca == PCA_ZombieSkipEmptyColumns && zombie) { int empty_pos = start[field]; bool empty = true; while (empty_pos <= end[field]) { if (!isspace(line[empty_pos])) { empty = false; break; } empty_pos++; } if (empty) { Log(LOG_LEVEL_DEBUG, "Detected empty '%s' field between " "positions %d and %d\n", names[field], start[field], end[field]); fields[field] = xstrdup(""); pos = end[field] + 1; field++; continue; } else { Log(LOG_LEVEL_DEBUG, "Detected non-empty '%s' field between " "positions %d and %d\n", names[field], start[field], end[field]); } } // Preceding space. while (isspace(line[pos])) { pos++; } // Field. int last = pos; if (cmd) { // Last field, slurp up the rest, but discard trailing whitespace. last = linelen; while (isspace(line[last - 1])) { last--; } } else if (stime) { while (isalpha(line[last])) { last++; } if (isspace(line[last])) { // In this case we expect spaces followed by a number. // It means what we first read was the month, now is the date. do { last++; } while (isspace(line[last])); if (!isdigit(line[last])) { char fmt[200]; xsnprintf(fmt, sizeof(fmt), "Unable to parse STIME entry in ps " "output line '%%s': Expected day number after " "'%%.%ds'", (last - 1) - pos); Log(LOG_LEVEL_ERR, fmt, line, line + pos); return false; } } while (line[last] && !isspace(line[last])) { last++; } } else { // Generic fields while (line[last] && !isspace(line[last])) { last++; } } // Make a copy and store in fields. fields[field] = xstrndup(line + pos, last - pos); Log(LOG_LEVEL_DEBUG, "'%s' field '%s' extracted from between positions " "%d and %d", names[field], fields[field], pos, last - 1); pos = last; field++; } MaybeFixStartTime(line, pstime, names, fields); return true; }
static void GetProcessColumnNames(const char *proc, char **names, int *start, int *end) { char title[16]; int col, offset = 0; if (LogGetGlobalLevel() >= LOG_LEVEL_DEBUG) { Log(LOG_LEVEL_DEBUG, "Parsing ps line: '%s'", proc); // Makes the entry line up with the line above. PrintStringIndexLine(18, strlen(proc)); } for (col = 0; col < CF_PROCCOLS; col++) { start[col] = end[col] = -1; names[col] = NULL; } col = 0; for (const char *sp = proc; *sp != '\0'; sp++) { offset = sp - proc; if (isspace((unsigned char) *sp)) { if (start[col] != -1) { Log(LOG_LEVEL_DEBUG, "End of '%s' is %d", title, offset - 1); end[col++] = offset - 1; if (col >= CF_PROCCOLS) /* No space for more columns. */ { size_t blank = strspn(sp, " \t\r\n\f\v"); if (sp[blank]) /* i.e. that wasn't everything. */ { /* If this happens, we have more columns in * our ps output than space to store them. * Update the #define CF_PROCCOLS (last seen * in libpromises/cf3.defs.h) to a bigger * number ! */ Log(LOG_LEVEL_ERR, "Process table lacks space for last columns: %s", sp + blank); } break; } } } else if (start[col] == -1) { if (col == 0) { // The first column always extends all the way to the left. start[col] = 0; } else { start[col] = offset; } sscanf(sp, "%15s", title); Log(LOG_LEVEL_DEBUG, "Start of '%s' is %d", title, offset); names[col] = xstrdup(title); Log(LOG_LEVEL_DEBUG, "Col[%d] = '%s'", col, names[col]); } } if (end[col] == -1) { Log(LOG_LEVEL_DEBUG, "End of '%s' is %d", title, offset); end[col] = offset; } }
static bool SplitProcLine(const char *line, time_t pstime, char **names, int *start, int *end, PsColumnAlgorithm pca, char **fields) { if (line == NULL || line[0] == '\0') { return false; } size_t linelen = strlen(line); if (LogGetGlobalLevel() >= LOG_LEVEL_DEBUG) { LogDebug(LOG_MOD_PS, "Parsing ps line: '%s'", line); // Makes the entry line up with the line above. PrintStringIndexLine(18, linelen); } /* All platforms have been verified to not produce overlapping fields with currently used ps tools, and hence we can parse based on space separation (with some caveats, see below). Dates may have spaces in them, like "May 4", or not, like "May4". Prefer to match a date without spaces as long as it contains a number, but fall back to parsing letters followed by space(s) and a date. Commands will also have extra spaces, but it is always the last field, so we just include spaces at this point in the parsing. An additional complication is that some platforms (only AIX is known at the time of writing) can have empty columns when a process is a zombie. The plan is to match for this by checking the range between start and end directly below the header (same byte position). If the whole range is whitespace we consider the entry missing. The columns are (presumably) guaranteed to line up correctly for this, since zombie processes do not have memory usage which can produce large, potentially alignment-altering numbers. However, we cannot do this whitespace check in general, because non-zombie processes may shift columns in a way that leaves some columns apparently (but not actually) empty. Zombie processes have state Z and command <defunct> on AIX. Similarly processes marked with command <exiting> also have missing columns and need to be skipped. (AIX only). Take these two examples: AIX: USER PID PPID PGID %CPU %MEM VSZ NI S STIME TIME COMMAND root 1 0 0 0.0 0.0 784 20 A Nov 28 00:00:00 /etc/init root 1835344 1 1835344 0.0 0.0 944 20 A Nov 28 00:00:00 /usr/lib/errdemon root 2097594 1 1638802 0.0 0.0 596 20 A Nov 28 00:00:05 /usr/sbin/syncd 60 root 3408328 1 3408328 0.0 0.0 888 20 A Nov 28 00:00:00 /usr/sbin/srcmstr root 4325852 3408328 4325852 0.0 0.0 728 20 A Nov 28 00:00:00 /usr/sbin/syslogd root 4784534 3408328 4784534 0.0 0.0 1212 20 A Nov 28 00:00:00 sendmail: accepting connections root 5898690 1 5898690 0.0 0.0 1040 20 A Nov 28 00:00:00 /usr/sbin/cron 6095244 8913268 8913268 20 Z 00:00:00 <defunct> root 6160866 3408328 6160866 0.0 0.0 1612 20 A Nov 28 00:00:00 /opt/rsct/bin/IBM.ServiceRMd 6750680 17826152 17826152 20 Z 00:00:00 <defunct> root 7143692 3408328 7143692 0.0 0.0 476 20 A Nov 28 00:00:00 /var/perf/pm/bin/pmperfrec root 7340384 8651136 8651136 0.0 0.0 500 20 A Nov 28 00:00:00 [trspoolm] root 7602560 8978714 7602560 0.0 0.0 636 20 A Nov 28 00:00:00 sshd: u0013628 [priv] 7733720 - - - A - <exiting> Solaris 9: USER PID %CPU %MEM SZ RSS TT S STIME TIME COMMAND jenkins 29769 0.0 0.0 810 2976 pts/1 S 07:22:43 0:00 /usr/bin/perl ../../ps.pl jenkins 29835 - - 0 0 ? Z - 0:00 <defunct> jenkins 10026 0.0 0.3 30927 143632 ? S Jan_21 01:18:58 /usr/jdk/jdk1.6.0_45/bin/java -jar slave.jar Due to how the process state 'S' is shifted under the 'S' header in the second example, it is not possible to separate between this and a missing column. Counting spaces is no good, because commands can contain an arbitrary number of spaces, and there is no way to know the byte position where a command begins. Hence the only way is to base this algorithm on platform and only do the "empty column detection" when: * The platform is known to produce empty columns for zombie processes (see PCA_ZombieSkipEmptyColumns) * The platform is known to not shift columns when the process is a zombie. * It is a zombie / exiting / idle process (These states provide almost no useful info in ps output) */ bool skip = false; if (pca == PCA_ZombieSkipEmptyColumns) { // Find out if the process is a zombie. for (int field = 0; names[field] && !skip; field++) { if (strcmp(names[field], "S") == 0 || strcmp(names[field], "ST") == 0) { // Check for zombie state. for (int pos = start[field]; pos <= end[field] && pos < linelen && !skip; pos++) { // 'Z' letter with word boundary on each side. if (isspace(line[pos - 1]) && line[pos] == 'Z' && (isspace(line[pos + 1]) || line[pos + 1] == '\0')) { LogDebug(LOG_MOD_PS, "Detected zombie process, " "skipping parsing of empty ps fields."); skip = true; } } } else if (strcmp(names[field], "COMMAND") == 0) { // Check for exiting or idle state. for (int pos = start[field]; pos <= end[field] && pos < linelen && !skip; pos++) { if (!isspace(line[pos])) // Skip spaces { if (strncmp(line + pos, "<exiting>", 9) == 0 || strncmp(line + pos, "<idle>", 6) == 0) { LogDebug(LOG_MOD_PS, "Detected exiting/idle process, " "skipping parsing of empty ps fields."); skip = true; } else { break; } } } } } } int field = 0; int pos = 0; while (names[field]) { // Some sanity checks. if (pos >= linelen) { if (pca == PCA_ZombieSkipEmptyColumns && skip) { LogDebug(LOG_MOD_PS, "Assuming '%s' field is empty, " "since ps line '%s' is not long enough to reach under its " "header.", names[field], line); fields[field] = xstrdup(""); field++; continue; } else { Log(LOG_LEVEL_ERR, "ps output line '%s' is shorter than its " "associated header.", line); return false; } } bool cmd = (strcmp(names[field], "CMD") == 0 || strcmp(names[field], "COMMAND") == 0); bool stime = !cmd && (strcmp(names[field], "STIME") == 0); // Equal boolean results, either both must be true, or both must be // false. IOW we must either both be at the last field, and it must be // CMD, or none of those. | // v if ((names[field + 1] != NULL) == cmd) { Log(LOG_LEVEL_ERR, "Last field of ps output '%s' is not " "CMD/COMMAND.", line); return false; } // If zombie/exiting, check if field is empty. if (pca == PCA_ZombieSkipEmptyColumns && skip) { int empty_pos = start[field]; bool empty = true; while (empty_pos <= end[field]) { if (!isspace(line[empty_pos])) { empty = false; break; } empty_pos++; } if (empty) { LogDebug(LOG_MOD_PS, "Detected empty" " '%s' field between positions %d and %d", names[field], start[field], end[field]); fields[field] = xstrdup(""); pos = end[field] + 1; field++; continue; } else { LogDebug(LOG_MOD_PS, "Detected non-empty " "'%s' field between positions %d and %d", names[field], start[field], end[field]); } } // Preceding space. while (isspace(line[pos])) { pos++; } // Field. int last = pos; if (cmd) { // Last field, slurp up the rest, but discard trailing whitespace. last = linelen; while (isspace(line[last - 1])) { last--; } } else if (stime) { while (isalpha(line[last])) { last++; } if (isspace(line[last])) { // In this case we expect spaces followed by a number. // It means what we first read was the month, now is the date. do { last++; } while (isspace(line[last])); if (!isdigit(line[last])) { char fmt[200]; xsnprintf(fmt, sizeof(fmt), "Unable to parse STIME entry in ps " "output line '%%s': Expected day number after " "'%%.%ds'", (last - 1) - pos); Log(LOG_LEVEL_ERR, fmt, line, line + pos); return false; } } while (line[last] && !isspace(line[last])) { last++; } } else { // Generic fields while (line[last] && !isspace(line[last])) { last++; } } // Make a copy and store in fields. fields[field] = xstrndup(line + pos, last - pos); LogDebug(LOG_MOD_PS, "'%s' field '%s'" " extracted from between positions %d and %d", names[field], fields[field], pos, last - 1); pos = last; field++; } MaybeFixStartTime(line, pstime, names, fields); return true; }