/****************************************************************************** * * * Function: * * * * Purpose: * * * * Parameters: * * * * Return value: * * * * Comments: * * * ******************************************************************************/ ZBX_THREAD_ENTRY(monitor_thread, args) { assert(args); zabbix_log(LOG_LEVEL_INFORMATION, "jobarg_monitor #%d started [monitor]", ((zbx_thread_args_t *) args)->thread_num); zbx_free(args); DBconnect(ZBX_DB_CONNECT_NORMAL); CONFIG_SPAN_TIME = ja_schedule_load_span(); while (ZBX_IS_RUNNING()) { zbx_setproctitle("process monitor"); process_monitor(); zbx_sleep(CONFIG_JAMONITOR_INTERVAL); } zabbix_log(LOG_LEVEL_INFORMATION, "jobarg_monitor stopped"); zbx_thread_exit(0); }
void limit_process(pid_t pid, double limit, int ignore_children) { //slice of the slot in which the process is allowed to run struct timespec twork; //slice of the slot in which the process is stopped struct timespec tsleep; //when the last twork has started struct timeval startwork; //when the last twork has finished struct timeval endwork; //initialization memset(&twork, 0, sizeof(struct timespec)); memset(&tsleep, 0, sizeof(struct timespec)); memset(&startwork, 0, sizeof(struct timeval)); memset(&endwork, 0, sizeof(struct timeval)); //last working time in microseconds unsigned long workingtime = 0; //generic list item struct list_node *node; //counter int c = 0; //get a better priority //increase_priority(); //build the family if (create_process_family(&pf, pid) == -1) { printf("create process family failed"); return ; } if (ignore_children) { //delete any process with a different pid than the father node = pf.members.first; while (node!=NULL) { struct process *proc = (struct process*)(node->data); if (proc->pid != pid) { remove_process_from_family(&pf, proc->pid); node = pf.members.first; continue; } node = node->next; } } if (!ignore_children && verbose) printf("Members in the family owned by %d: %d\n", pf.father, pf.members.count); //rate at which we are keeping active the processes (range 0-1) //1 means that the process are using all the twork slice double workingrate = -1; while(1) { if (!ignore_children && c%10==0) { //update the process family (checks only for new members) int new_children = update_process_family(&pf); if (verbose && new_children) { printf("%d new children processes detected (", new_children); int j; node = pf.members.last; for (j=0; j<new_children; j++) { printf("%d", ((struct process*)(node->data))->pid); if (j<new_children-1) printf(" "); node = node->previous; } printf(")\n"); } } if (pf.members.count==0) { if (verbose) printf("No more processes.\n"); break; } //total cpu actual usage (range 0-1) //1 means that the processes are using 100% cpu double pcpu = -1; //estimate how much the controlled processes are using the cpu in the working interval node = pf.members.first; while (node!=NULL) { struct process *proc = (struct process*)(node->data); if (proc->is_zombie) { //process is zombie, remove it from family fprintf(stderr,"Process %d is zombie!\n", proc->pid); remove_process_from_family(&pf, proc->pid); node = pf.members.first; continue; } if (process_monitor(proc) != 0) { //process is dead, remove it from family if (verbose) fprintf(stderr,"Process %d dead!\n", proc->pid); remove_process_from_family(&pf, proc->pid); node = pf.members.first; continue; } node = node->next; if (proc->cpu_usage<0) { continue; } if (pcpu<0) pcpu = 0; pcpu += proc->cpu_usage; } //adjust work and sleep time slices if (pcpu < 0) { //it's the 1st cycle, initialize workingrate pcpu = limit; workingrate = limit; twork.tv_nsec = TIME_SLOT*limit*1000; } else { //adjust workingrate workingrate = MIN(workingrate / pcpu * limit, 1); twork.tv_nsec = TIME_SLOT*1000*workingrate; } tsleep.tv_nsec = TIME_SLOT*1000-twork.tv_nsec; if (verbose) { if (c%200==0) printf("\n%%CPU\twork quantum\tsleep quantum\tactive rate\n"); if (c%10==0 && c>0) printf("%0.2lf%%\t%6ld us\t%6ld us\t%0.2lf%%\n",pcpu*100,twork.tv_nsec/1000,tsleep.tv_nsec/1000,workingrate*100); } //resume processes node = pf.members.first; while (node!=NULL) { struct process *proc = (struct process*)(node->data); if (kill(proc->pid,SIGCONT)!=0) { //process is dead, remove it from family if (verbose) fprintf(stderr,"Process %d dead!\n", proc->pid); remove_process_from_family(&pf, proc->pid); node = pf.members.first; continue; } node=node->next; } //now processes are free to run (same working slice for all) gettimeofday(&startwork, NULL); nanosleep(&twork,NULL); gettimeofday(&endwork, NULL); workingtime = timediff(&endwork,&startwork); long delay = workingtime-twork.tv_nsec/1000; if (c>0 && delay>10000) { //delay is too much! signal to user? //fprintf(stderr, "%d %ld us\n", c, delay); } if (tsleep.tv_nsec>0) { //stop only if tsleep>0, instead it's useless node=pf.members.first; while (node!=NULL) { struct process *proc = (struct process*)(node->data); if (kill(proc->pid,SIGSTOP)!=0) { //process is dead, remove it from family if (verbose) fprintf(stderr,"Process %d dead!\n", proc->pid); remove_process_from_family(&pf, proc->pid); node=pf.members.first; continue; } node=node->next; } //now the processes are sleeping nanosleep(&tsleep,NULL); } c++; } cleanup_process_family(&pf); }