コード例 #1
0
ファイル: check_procs.c プロジェクト: FatLASP/nagios-plugins
int
main (int argc, char **argv)
{
	char *input_buffer;
	char *input_line;
	char *procprog;
	char *proc_cgroup_hierarchy;

	pid_t mypid = 0;
	pid_t myppid = 0;
	struct stat statbuf;
	dev_t mydev = 0;
	ino_t myino = 0;
	int procuid = 0;
	pid_t procpid = 0;
	pid_t procppid = 0;
	pid_t kthread_ppid = 0;
	int procvsz = 0;
	int procrss = 0;
	int procseconds = 0;
	float procpcpu = 0;
	char procstat[8];
	char procetime[MAX_INPUT_BUFFER] = { '\0' };
	char *procargs;
	char *tmp;

	const char *zombie = "Z";

	int resultsum = 0; /* bitmask of the filter criteria met by a process */
	int found = 0; /* counter for number of lines returned in `ps` output */
	int procs = 0; /* counter for number of processes meeting filter criteria */
	int pos; /* number of spaces before 'args' in `ps` output */
	int cols; /* number of columns in ps output */
	int expected_cols = PS_COLS - 1;
	int warn = 0; /* number of processes in warn state */
	int crit = 0; /* number of processes in crit state */
	int i = 0, j = 0;
	int result = STATE_UNKNOWN;
	int ret = 0;
	output chld_out, chld_err;

	setlocale (LC_ALL, "");
	bindtextdomain (PACKAGE, LOCALEDIR);
	textdomain (PACKAGE);
	setlocale(LC_NUMERIC, "POSIX");

	input_buffer = malloc (MAX_INPUT_BUFFER);
	procprog = malloc (MAX_INPUT_BUFFER);
	proc_cgroup_hierarchy = malloc (MAX_INPUT_BUFFER);

	xasprintf (&metric_name, "PROCS");
	metric = METRIC_PROCS;

	/* Parse extra opts if any */
	argv=np_extra_opts (&argc, argv, progname);

	if (process_arguments (argc, argv) == ERROR)
		usage4 (_("Could not parse arguments"));

	/* find ourself */
	mypid = getpid();
	myppid = getppid();
	if (usepid || stat_exe(mypid, &statbuf) == -1) {
		/* usepid might have been set by -T */
		usepid = 1;
	} else {
		usepid = 0;
		mydev = statbuf.st_dev;
		myino = statbuf.st_ino;
	}

	/* Set signal handling and alarm timeout */
	if (signal (SIGALRM, timeout_alarm_handler) == SIG_ERR) {
		die (STATE_UNKNOWN, _("Cannot catch SIGALRM"));
	}
	(void) alarm ((unsigned) timeout_interval);

	if (verbose >= 2)
		printf (_("CMD: %s\n"), PS_COMMAND);

	if (input_filename == NULL) {
		result = cmd_run( PS_COMMAND, &chld_out, &chld_err, 0);
		if (chld_err.lines > 0) {
			printf ("%s: %s", _("System call sent warnings to stderr"), chld_err.line[0]);
			exit(STATE_WARNING);
		}
	} else {
		result = cmd_file_read( input_filename, &chld_out, 0);
	}

	/* flush first line: j starts at 1 */
	for (j = 1; j < chld_out.lines; j++) {
		input_line = chld_out.line[j];

		if (verbose >= 3)
			printf ("%s", input_line);

		strcpy (procprog, "");
		strcpy (proc_cgroup_hierarchy, "");
		xasprintf (&procargs, "%s", "");

		cols = sscanf (input_line, PS_FORMAT, PS_VARLIST);

		/* Zombie processes do not give a procprog command */
		if ( cols < expected_cols && strstr(procstat, zombie) ) {
			cols = expected_cols;
		}
		if ( cols >= expected_cols ) {
			resultsum = 0;
			xasprintf (&procargs, "%s", input_line + pos);
			strip (procargs);

			/* Some ps return full pathname for command. This removes path */
			strcpy(procprog, base_name(procprog));

			/* we need to convert the elapsed time to seconds */
			procseconds = convert_to_seconds(procetime);

			if (verbose >= 3) {
				printf ("proc#=%d uid=%d vsz=%d rss=%d pid=%d ppid=%d pcpu=%.2f stat=%s etime=%s prog=%s args=%s",
					procs, procuid, procvsz, procrss,
					procpid, procppid, procpcpu, procstat,
					procetime, procprog, procargs);
				if (strstr(PS_COMMAND, "cgroup") != NULL) {
					printf(" proc_cgroup_hierarchy=%s\n", proc_cgroup_hierarchy);
				} else {
					printf("\n");
				}
			}

			/* Ignore self */
			if ((usepid && mypid == procpid) ||
				(!usepid && ((ret = stat_exe(procpid, &statbuf) != -1) && statbuf.st_dev == mydev && statbuf.st_ino == myino) ||
				 (ret == -1 && errno == ENOENT))) {
				if (verbose >= 3)
					 printf("not considering - is myself or gone\n");
				continue;
			}
			/* Ignore parent*/
			else if (myppid == procpid) {
				if (verbose >= 3)
					 printf("not considering - is parent\n");
				continue;
			}

			/* filter kernel threads (childs of KTHREAD_PARENT)*/
			/* TODO adapt for other OSes than GNU/Linux
					sorry for not doing that, but I've no other OSes to test :-( */
			if (kthread_filter == 1) {
				/* get pid KTHREAD_PARENT */
				if (kthread_ppid == 0 && !strcmp(procprog, KTHREAD_PARENT) )
					kthread_ppid = procpid;

				if (kthread_ppid == procppid) {
					if (verbose >= 2)
						printf ("Ignore kernel thread: pid=%d ppid=%d prog=%s args=%s\n", procpid, procppid, procprog, procargs);
					continue;
				}
			}

			if ((options & STAT) && (strstr (statopts, procstat)))
				resultsum |= STAT;
			if ((options & ARGS) && procargs && (strstr (procargs, args) != NULL))
				resultsum |= ARGS;
			if ((options & EREG_ARGS) && procargs && (regexec(&re_args, procargs, (size_t) 0, NULL, 0) == 0))
				resultsum |= EREG_ARGS;
			if ((options & PROG) && procprog && (strcmp (prog, procprog) == 0))
				resultsum |= PROG;
			if ((options & PPID) && (procppid == ppid))
				resultsum |= PPID;
			if ((options & USER) && (procuid == uid))
				resultsum |= USER;
			if ((options & VSZ)  && (procvsz >= vsz))
				resultsum |= VSZ;
			if ((options & RSS)  && (procrss >= rss))
				resultsum |= RSS;
			if ((options & PCPU)  && (procpcpu >= pcpu))
				resultsum |= PCPU;
			if (options & CGROUP_HIERARCHY) {
				if(!strncmp(proc_cgroup_hierarchy,"-", 2) && !strncmp(cgroup_hierarchy,"/", 2)) {
					resultsum |= CGROUP_HIERARCHY;
				} else {
					if((tmp = strstr(proc_cgroup_hierarchy,":/")) != NULL) {
						if(!strcmp(tmp+1,cgroup_hierarchy)) {
							resultsum |= CGROUP_HIERARCHY;
						};
					};
				};
			};

			found++;

			/* Next line if filters not matched */
			if (!(options == resultsum || options == ALL))
				continue;

			procs++;
			if (verbose >= 2) {
				printf ("Matched: uid=%d vsz=%d rss=%d pid=%d ppid=%d pcpu=%.2f stat=%s etime=%s prog=%s args=%s",
					procuid, procvsz, procrss,
					procpid, procppid, procpcpu, procstat, 
					procetime, procprog, procargs);
				if (strstr(PS_COMMAND, "cgroup") != NULL) {
					printf(" cgroup_hierarchy=%s\n", cgroup_hierarchy);
				} else {
					printf("\n");
				}
			}

			if (metric == METRIC_VSZ)
				i = get_status ((double)procvsz, procs_thresholds);
			else if (metric == METRIC_RSS)
				i = get_status ((double)procrss, procs_thresholds);
			/* TODO? float thresholds for --metric=CPU */
			else if (metric == METRIC_CPU)
				i = get_status (procpcpu, procs_thresholds);
			else if (metric == METRIC_ELAPSED)
				i = get_status ((double)procseconds, procs_thresholds);

			if (metric != METRIC_PROCS) {
				if (i == STATE_WARNING) {
					warn++;
					xasprintf (&fails, "%s%s%s", fails, (strcmp(fails,"") ? ", " : ""), procprog);
					result = max_state (result, i);
				}
				if (i == STATE_CRITICAL) {
					crit++;
					xasprintf (&fails, "%s%s%s", fails, (strcmp(fails,"") ? ", " : ""), procprog);
					result = max_state (result, i);
				}
			}
		} 
		/* This should not happen */
		else if (verbose) {
			printf(_("Not parseable: %s"), input_buffer);
		}
	}

	if (found == 0) {							/* no process lines parsed so return STATE_UNKNOWN */
		printf (_("Unable to read output\n"));
		return STATE_UNKNOWN;
	}

	if ( result == STATE_UNKNOWN ) 
		result = STATE_OK;

	/* Needed if procs found, but none match filter */
	if ( metric == METRIC_PROCS ) {
		result = max_state (result, get_status ((double)procs, procs_thresholds) );
	}

	if ( result == STATE_OK ) {
		printf ("%s %s: ", metric_name, _("OK"));
	} else if (result == STATE_WARNING) {
		printf ("%s %s: ", metric_name, _("WARNING"));
		if ( metric != METRIC_PROCS ) {
			printf (_("%d warn out of "), warn);
		}
	} else if (result == STATE_CRITICAL) {
		printf ("%s %s: ", metric_name, _("CRITICAL"));
		if (metric != METRIC_PROCS) {
			printf (_("%d crit, %d warn out of "), crit, warn);
		}
	} 
	printf (ngettext ("%d process", "%d processes", (unsigned long) procs), procs);
	
	if (strcmp(fmt,"") != 0) {
		printf (_(" with %s"), fmt);
	}

	if ( verbose >= 1 && strcmp(fails,"") )
		printf (" [%s]", fails);

	if (metric == METRIC_PROCS)
		printf (" | procs=%d;%s;%s;0;", procs,
				warning_range ? warning_range : "",
				critical_range ? critical_range : "");
	else
		printf (" | procs=%d;;;0; procs_warn=%d;;;0; procs_crit=%d;;;0;", procs, warn, crit);

	printf ("\n");
	return result;
}
コード例 #2
0
ファイル: exedist.c プロジェクト: hocks/TSCC
/*
 * Overall executable distribution using fast.
 */
int
distribute_executable(void)
{
    int ret = 1;  /* failure */

#if HAVE_FAST_DIST
    const char *fast_command = FAST_DIST_PATH;  /* from configure */
    int i;
    int numtasks_save;
    int local_numtasks;
    tasks_t *tasks_save;
    cl_args_t cl_args_save;
    config_spec_t cs, root_cs;
    growstr_t *g, *root_g;
    int temp_fd;
    char *file_template;
    int port_num;
    FILE *fp;
    const char *exec_to_dist;
    int *usenodes;

    exec_to_dist = config_get_unique_executable();
    if (!exec_to_dist)
        return ret;

    if (!stat_exe(fast_command, 0))
        return ret;

    /* analyze nodes */
    usenodes = Malloc(numnodes * sizeof(*usenodes));
    memset(usenodes, 0, numnodes * sizeof(*usenodes));
    local_numtasks = 0;
    for (i=0; i<numtasks; i++) {
        if (!usenodes[tasks[i].node]) {
            usenodes[tasks[i].node] = 1;
            ++local_numtasks;
        }
    }

    /* don't bother if there is only one node */
    if (local_numtasks <= 1) {
        free(usenodes);
        return ret;
    }

    /* create temporary node file */
    file_template = strsave("/tmp/mpiexec-fast-XXXXXX");
    temp_fd = mkstemp(file_template);
    if (!temp_fd)
        goto out;
    debug(1, "%s: temp node list file is %d",__func__, temp_fd);
    fp = fdopen(temp_fd, "w");
    if (!fp)
        goto out;

    /* add nodes to the node file */
    for (i=0; i<numnodes; i++) {
        if (!usenodes[i])
            continue;
        if (fprintf(fp, "%s\n", nodes[i].name) <= 0) {
            fclose(fp);
            goto out;
        }
    }
    if (fclose(fp) != 0)
        goto out;

    /* pick a random port number between 6 and 8 thousand */
    srand(time(NULL));
    port_num = rand() % 2000 + 6000;

    /*
     * Back up the tasks structure and number of tasks as well as command
     * line args.
     */
    tasks_save = tasks;
    numtasks_save = numtasks;
    memcpy(&cl_args_save, cl_args, sizeof(*cl_args));

    /* set the fast_dist executable name */
    cs.exe = fast_command;
    root_cs.exe = cs.exe;

    /* set up the args to pass to the non-root nodes */
    g = growstr_init();
    growstr_printf(g, "-p %d", port_num);
    cs.args = g->s;
    debug(1, "%s: arg string for non root: %s", __func__, g->s);

    /* and to the root node */
    root_g = growstr_init();
    growstr_printf(root_g, "-p %d -r %s -e %s -n %s",
                   port_num, nodes[tasks[0].node].name, exec_to_dist, file_template);
    root_cs.args = root_g->s;
    debug(1, "%s: arg string for root: %s", __func__, root_g->s);

    /* build new tasks */
    cl_args->which_stdin = STDIN_NONE;
    cl_args->comm = COMM_NONE;
    tasks = Malloc(local_numtasks * sizeof(*tasks));
    numtasks = local_numtasks;
    for (i=0; i < numtasks; i++) {
        tasks[i].num_copies = 1;
        tasks[i].done = DONE_NOT_STARTED;
        *tasks[i].status = -1;
        /*
         * Slight race condition in that the root wants to actively connect
         * to some other nodes, but it will retry a bit.  Put root last to
         * hope that there is a bit of delay in startup.
         */
        if (i == numtasks - 1) {
            tasks[i].node = tasks_save[0].node;
            tasks[i].conf = &root_cs;
        } else {
            tasks[i].node = tasks_save[i+1].node;
            tasks[i].conf = &cs;
        }
        debug(1, "%s: task %d on %d", __func__, i, tasks[i].node);
    }

    /* spawn tasks */
    start_tasks(0);
    debug(1, "%s: tasks started", __func__);

    /* wait for them to exit */
    wait_tasks();

    /* make sure everyone finished successfully */
    ret = 0;
    for (i=0; i<numtasks; i++) {
        if (tasks[i].done == DONE_NO_EXIT_STATUS)
            continue;
        if (*tasks[i].status != 0) {
            ret = 1;
            break;
        }
    }
    debug(1, "%s: done, ret = %d", __func__, ret);

    /* put back original tasks structures */
    free(tasks);
    tasks = tasks_save;
    numtasks = numtasks_save;
    memcpy(cl_args, &cl_args_save, sizeof(*cl_args));
    growstr_free(g);
    growstr_free(root_g);

    /*
     * Update executable in old config structure to point to new /tmp exec,
     * using the same algorithm as fast_dist.  It is not deleted upon
     * completion but relies on $TMPDIR being deleted when PBS cleans up the
     * job or normal /tmp cleaning.
     */
    if (ret == 0) {
        const char *cp, *base;
        growstr_t *h;

        h = growstr_init();
        cp = getenv("TMPDIR");
        if (!cp || !*cp)
            cp = "/tmp";
        growstr_append(h, cp);

        for (cp=base=exec_to_dist; *cp; cp++)
            if (*cp == '/')
                base = cp+1;
        growstr_append(h, "/");
        growstr_append(h, base);

        config_set_unique_executable(strsave(h->s));
        growstr_free(h);
    }

out:
    unlink(file_template);
    free(file_template);
    free(usenodes);
#endif /* HAVE_FAST_DIST */

    return ret;
}