/* verify our option */ static int verify_options(mod_gm_opt_t *opt) { /* open new logfile */ if ( opt->logmode == GM_LOG_MODE_AUTO && opt->logfile ) { opt->logmode = GM_LOG_MODE_FILE; } if(opt->logmode == GM_LOG_MODE_FILE && opt->logfile && opt->debug_level < GM_LOG_STDOUT) { opt->logfile_fp = fopen(opt->logfile, "a+"); if(opt->logfile_fp == NULL) { gm_log( GM_LOG_ERROR, "error opening logfile: %s\n", opt->logfile ); } } if ( opt->logmode == GM_LOG_MODE_AUTO ) { opt->logmode = GM_LOG_MODE_CORE; } /* did we get any server? */ if(opt->server_num == 0) { gm_log( GM_LOG_ERROR, "please specify at least one server\n" ); return(GM_ERROR); } if ( opt->result_queue == NULL ) opt->result_queue = GM_DEFAULT_RESULT_QUEUE; /* nothing set by hand -> defaults */ if( opt->set_queues_by_hand == 0 ) { gm_log( GM_LOG_DEBUG, "starting client with default queues\n" ); opt->hosts = GM_ENABLED; opt->services = GM_ENABLED; opt->events = GM_ENABLED; } return(GM_OK); }
/* callback for task completed */ void *result_worker( void * data ) { gearman_worker_st worker; int *worker_num = (int*)data; gm_log( GM_LOG_TRACE, "worker %d started\n", *worker_num ); pthread_setcancelstate (PTHREAD_CANCEL_ENABLE, NULL); pthread_setcanceltype (PTHREAD_CANCEL_ASYNCHRONOUS, NULL); set_worker(&worker); pthread_cleanup_push ( cancel_worker_thread, (void*) &worker); while ( 1 ) { gearman_return_t ret; ret = gearman_worker_work( &worker ); if ( ret != GEARMAN_SUCCESS ) { gm_log( GM_LOG_ERROR, "worker error: %s\n", gearman_worker_error( &worker ) ); gearman_job_free_all( &worker ); gearman_worker_free( &worker ); sleep(1); set_worker(&worker ); } } pthread_cleanup_pop(0); return NULL; }
/* create the gearman duplicate client */ int create_client_dup( gm_server_t * server_list[GM_LISTSIZE], gearman_client_st *client ) { gearman_return_t ret; int x = 0; gm_log( GM_LOG_TRACE, "create_client_dup()\n" ); signal(SIGPIPE, SIG_IGN); client = gearman_client_create(client); if ( client == NULL ) { gm_log( GM_LOG_ERROR, "Memory allocation failure on client creation\n" ); return GM_ERROR; } while ( server_list[x] != NULL ) { ret = gearman_client_add_server( client, server_list[x]->host, server_list[x]->port ); if ( ret != GEARMAN_SUCCESS ) { gm_log( GM_LOG_ERROR, "client error: %s\n", gearman_client_error( client ) ); return GM_ERROR; } x++; } current_client_dup = client; return GM_OK; }
/* do a clean exit */ void clean_exit(int sig) { gm_log( GM_LOG_TRACE, "clean_exit(%d)\n", sig); if(mod_gm_opt->pidfile != NULL) unlink(mod_gm_opt->pidfile); /* stop all children */ stop_children(GM_WORKER_STOP); /* detach shm */ if(shmdt(shm) < 0) perror("shmdt"); /* * clean up shared memory * will be removed when last client detaches */ if( shmctl( shmid, IPC_RMID, 0 ) == -1 ) { perror("shmctl"); } else { gm_log( GM_LOG_DEBUG, "shared memory deleted\n"); } gm_log( GM_LOG_INFO, "mod_gearman worker exited\n"); mod_gm_free_opt(mod_gm_opt); exit( EXIT_SUCCESS ); }
/* set new number of workers */ int adjust_number_of_worker(int min, int max, int cur_workers, int cur_jobs) { int perc_running; int idle; int target = min; if(cur_workers == 0) { gm_log( GM_LOG_TRACE3, "adjust_number_of_worker(min %d, max %d, worker %d, jobs %d) -> %d\n", min, max, cur_workers, cur_jobs, mod_gm_opt->min_worker); return mod_gm_opt->min_worker; } perc_running = (int)cur_jobs*100/cur_workers; idle = (int)cur_workers - cur_jobs; gm_log( GM_LOG_TRACE3, "adjust_number_of_worker(min %d, max %d, worker %d, jobs %d) = %d%% running\n", min, max, cur_workers, cur_jobs, perc_running); if(cur_workers == max) return max; /* > 90% workers running */ if(cur_jobs > 0 && ( perc_running > 90 || idle <= 2 )) { /* increase target number by 2 */ gm_log( GM_LOG_TRACE, "starting %d new workers\n", mod_gm_opt->spawn_rate); target = cur_workers + mod_gm_opt->spawn_rate; } /* dont go over the top */ if(target > max) { target = max; } if(target != cur_workers) gm_log( GM_LOG_TRACE3, "adjust_number_of_worker(min %d, max %d, worker %d, jobs %d) = %d%% running -> %d\n", min, max, cur_workers, cur_jobs, perc_running, target); return target; }
/* handle process events */ static int handle_process_events( int event_type, void *data ) { int x=0; struct nebstruct_process_struct *ps; gm_log( GM_LOG_TRACE, "handle_process_events(%i, data)\n", event_type ); ps = ( struct nebstruct_process_struct * )data; if ( ps->type == NEBTYPE_PROCESS_EVENTLOOPSTART ) { register_neb_callbacks(); start_threads(); send_now = TRUE; /* verify names of supplied groups * this cannot be done befor naemon has finished reading his config * verify local servicegroups names */ while ( mod_gm_opt->local_servicegroups_list[x] != NULL ) { servicegroup * temp_servicegroup = find_servicegroup( mod_gm_opt->local_servicegroups_list[x] ); if( temp_servicegroup == NULL ) { gm_log( GM_LOG_INFO, "Warning: servicegroup '%s' does not exist, possible typo?\n", mod_gm_opt->local_servicegroups_list[x] ); } x++; } /* verify local hostgroup names */ x = 0; while ( mod_gm_opt->local_hostgroups_list[x] != NULL ) { hostgroup * temp_hostgroup = find_hostgroup( mod_gm_opt->local_hostgroups_list[x] ); if( temp_hostgroup == NULL ) { gm_log( GM_LOG_INFO, "Warning: hostgroup '%s' does not exist, possible typo?\n", mod_gm_opt->local_hostgroups_list[x] ); } x++; } /* verify servicegroups names */ x = 0; while ( mod_gm_opt->servicegroups_list[x] != NULL ) { servicegroup * temp_servicegroup = find_servicegroup( mod_gm_opt->servicegroups_list[x] ); if( temp_servicegroup == NULL ) { gm_log( GM_LOG_INFO, "Warning: servicegroup '%s' does not exist, possible typo?\n", mod_gm_opt->servicegroups_list[x] ); } x++; } /* verify hostgroup names */ x = 0; while ( mod_gm_opt->hostgroups_list[x] != NULL ) { hostgroup * temp_hostgroup = find_hostgroup( mod_gm_opt->hostgroups_list[x] ); if( temp_hostgroup == NULL ) { gm_log( GM_LOG_INFO, "Warning: hostgroup '%s' does not exist, possible typo?\n", mod_gm_opt->hostgroups_list[x] ); } x++; } } return NEB_OK; }
/* deregister all events */ int nebmodule_deinit( int flags, int reason ) { int x; gm_log( GM_LOG_TRACE, "nebmodule_deinit(%i, %i)\n", flags, reason ); /* should be removed already, but just for the case it wasn't */ neb_deregister_callback( NEBCALLBACK_PROCESS_DATA, gearman_module_handle ); neb_deregister_callback( NEBCALLBACK_TIMED_EVENT_DATA, gearman_module_handle ); /* only if we have hostgroups defined or general hosts enabled */ if ( mod_gm_opt->do_hostchecks == GM_ENABLED && ( mod_gm_opt->hostgroups_num > 0 || mod_gm_opt->hosts == GM_ENABLED )) neb_deregister_callback( NEBCALLBACK_HOST_CHECK_DATA, gearman_module_handle ); /* only if we have groups defined or general services enabled */ if ( mod_gm_opt->servicegroups_num > 0 || mod_gm_opt->hostgroups_num > 0 || mod_gm_opt->services == GM_ENABLED ) neb_deregister_callback( NEBCALLBACK_SERVICE_CHECK_DATA, gearman_module_handle ); if ( mod_gm_opt->events == GM_ENABLED ) neb_deregister_callback( NEBCALLBACK_EVENT_HANDLER_DATA, gearman_module_handle ); if ( mod_gm_opt->perfdata == GM_ENABLED ) { neb_deregister_callback( NEBCALLBACK_HOST_CHECK_DATA, gearman_module_handle ); neb_deregister_callback( NEBCALLBACK_SERVICE_CHECK_DATA, gearman_module_handle ); } /* register export callbacks */ for(x=0;x<GM_NEBTYPESSIZE;x++) { if(mod_gm_opt->exports[x]->elem_number > 0) neb_deregister_callback( x, gearman_module_handle ); } neb_deregister_callback( NEBCALLBACK_PROCESS_DATA, gearman_module_handle ); gm_log( GM_LOG_DEBUG, "deregistered callbacks\n" ); /* stop result threads */ for(x = 0; x < result_threads_running; x++) { pthread_cancel(result_thr[x]); pthread_join(result_thr[x], NULL); } /* cleanup */ free_client(&client); /* close old logfile */ if(mod_gm_opt->logfile_fp != NULL) { fclose(mod_gm_opt->logfile_fp); } mod_gm_free_opt(mod_gm_opt); return NEB_OK; }
/* do a clean exit */ void clean_worker_exit(int sig) { int *shm; /* give us 30 seconds to stop */ signal(SIGALRM, exit_sighandler); alarm(30); gm_log( GM_LOG_TRACE, "clean_worker_exit(%d)\n", sig); /* clear gearmans job, otherwise it would be retried and retried */ if(current_gearman_job != NULL) { if(sig == SIGINT) { /* if worker stopped with sigint, let the job retry */ } else { send_failed_result(current_job, sig); gearman_job_send_complete(current_gearman_job, NULL, 0); } /* make sure no processes are left over */ kill_child_checks(); } gm_log( GM_LOG_TRACE, "cleaning worker\n"); gearman_worker_unregister_all(&worker); gearman_job_free_all( &worker ); gm_log( GM_LOG_TRACE, "cleaning client\n"); gearman_client_free( &client ); mod_gm_free_opt(mod_gm_opt); #ifdef EMBEDDEDPERL deinit_embedded_perl(0); #endif if(worker_run_mode == GM_WORKER_STANDALONE) exit( EXIT_SUCCESS ); /* Now we attach the segment to our data space. */ if((shm = shmat(shmid, NULL, 0)) == (int *) -1) { perror("shmat"); gm_log( GM_LOG_TRACE, "worker finished: %d\n", getpid() ); _exit( EXIT_FAILURE ); } /* clean our pid from worker list */ if( shm[shm_index] == current_pid || shm[shm_index] == -current_pid ) { shm[shm_index] = -1; } /* detach from shared memory */ if(shmdt(shm) < 0) perror("shmdt"); _exit( EXIT_SUCCESS ); }
/* start up new worker */ int make_new_child(int mode) { pid_t pid = 0; int next_shm_index; gm_log( GM_LOG_TRACE, "make_new_child(%d)\n", mode); if(mode == GM_WORKER_STATUS) { gm_log( GM_LOG_TRACE, "forking status worker\n"); next_shm_index = 3; } else { gm_log( GM_LOG_TRACE, "forking worker\n"); next_shm_index = get_next_shm_index(); } signal(SIGINT, SIG_DFL); signal(SIGTERM, SIG_DFL); /* fork a child process */ pid=fork(); /* an error occurred while trying to fork */ if(pid==-1){ perror("fork"); gm_log( GM_LOG_ERROR, "fork error\n" ); return GM_ERROR; } /* we are in the child process */ else if(pid==0){ gm_log( GM_LOG_DEBUG, "child started with pid: %d\n", getpid() ); shm[next_shm_index] = -getpid(); /* do the real work */ #ifdef EMBEDDEDPERL worker_client(mode, next_shm_index, shmid, start_env); #else worker_client(mode, next_shm_index, shmid); #endif exit(EXIT_SUCCESS); } /* parent */ else if(pid > 0){ signal(SIGINT, clean_exit); signal(SIGTERM,clean_exit); shm[next_shm_index] = -pid; } return GM_OK; }
/* start new worker if needed */ void check_worker_population() { int x, now, status, target_number_of_workers; gm_log( GM_LOG_TRACE3, "check_worker_population()\n"); now = (int)time(NULL); /* collect finished workers */ while(waitpid(-1, &status, WNOHANG) > 0) gm_log( GM_LOG_TRACE, "waitpid() worker exited with: %d\n", status); /* set current worker number */ count_current_worker(GM_ENABLED); /* check last check time, force restart all worker if there is no result in 2 minutes */ if( shm[SHM_WORKER_LAST_CHECK] < (now - 120) ) { gm_log( GM_LOG_INFO, "no checks in 2minutes, restarting all workers\n", shm[SHM_WORKER_LAST_CHECK]); shm[SHM_WORKER_LAST_CHECK] = now; for(x=SHM_SHIFT; x < mod_gm_opt->max_worker+SHM_SHIFT; x++) { save_kill(shm[x], SIGINT); } sleep(3); for(x=SHM_SHIFT; x < mod_gm_opt->max_worker+SHM_SHIFT; x++) { save_kill(shm[x], SIGKILL); shm[x] = -1; } } /* check if status worker died */ if( shm[SHM_STATUS_WORKER_PID] == -1 ) { make_new_child(GM_WORKER_STATUS); } /* keep up minimum population */ for (x = current_number_of_workers; x < mod_gm_opt->min_worker; x++) { make_new_child(GM_WORKER_MULTI); current_number_of_workers++; } /* check every second if we need to increase worker population */ if(last_time_increased >= now) return; target_number_of_workers = adjust_number_of_worker(mod_gm_opt->min_worker, mod_gm_opt->max_worker, current_number_of_workers, current_number_of_jobs); for (x = current_number_of_workers; x < target_number_of_workers; x++) { last_time_increased = now; /* top up the worker pool */ make_new_child(GM_WORKER_MULTI); } return; }
/* answer status querys */ void *return_status( gearman_job_st *job, void *context, size_t *result_size, gearman_return_t *ret_ptr ) { int wsize; char workload[GM_BUFFERSIZE]; int *shm; char * result; gm_log( GM_LOG_TRACE, "return_status()\n" ); /* contect is unused */ context = context; /* get the data */ wsize = gearman_job_workload_size(job); strncpy(workload, (const char*)gearman_job_workload(job), wsize); workload[wsize] = '\0'; gm_log( GM_LOG_TRACE, "got status job %s\n", gearman_job_handle( job ) ); gm_log( GM_LOG_TRACE, "%d +++>\n%s\n<+++\n", strlen(workload), workload ); /* set result pointer to success */ *ret_ptr= GEARMAN_SUCCESS; /* set size of result */ result = malloc(GM_BUFFERSIZE); *result_size = GM_BUFFERSIZE; /* give us 10 seconds to get state */ signal(SIGALRM, exit_sighandler); alarm(10); /* Now we attach the segment to our data space. */ if ((shm = shmat(shmid, NULL, 0)) == (int *) -1) { perror("shmat"); *result_size = 0; alarm(0); free(result); return NULL; } snprintf(result, GM_BUFFERSIZE, "%s has %i worker and is working on %i jobs. Version: %s|worker=%i;;;%i;%i jobs=%ic", hostname, shm[SHM_WORKER_TOTAL], shm[SHM_WORKER_RUNNING], GM_VERSION, shm[SHM_WORKER_TOTAL], mod_gm_opt->min_worker, mod_gm_opt->max_worker, shm[SHM_JOBS_DONE] ); /* and increase job counter */ shm[SHM_JOBS_DONE]++; /* detach from shared memory */ if(shmdt(shm) < 0) perror("shmdt"); alarm(0); return((void*)result); }
/* tell parent our state */ void set_state(int status) { int *shm; gm_log( GM_LOG_TRACE, "set_state(%d)\n", status ); if(worker_run_mode == GM_WORKER_STANDALONE) return; /* give us 10 seconds to set state */ signal(SIGALRM, exit_sighandler); alarm(10); /* Now we attach the segment to our data space. */ if ((shm = shmat(shmid, NULL, 0)) == (int *) -1) { perror("shmat"); gm_log( GM_LOG_TRACE, "worker finished: %d\n", getpid() ); clean_worker_exit(0); _exit( EXIT_FAILURE ); } if(status == GM_JOB_START) shm[shm_index] = current_pid; if(status == GM_JOB_END) { shm[SHM_JOBS_DONE]++; /* increase jobs done */ shm[SHM_WORKER_LAST_CHECK] = (int)time(NULL); /* set last job date */ /* status slot changed to -1 -> exit */ if( shm[shm_index] == -1 ) { gm_log( GM_LOG_TRACE, "worker finished: %d\n", getpid() ); clean_worker_exit(0); _exit( EXIT_SUCCESS ); } /* pid in our status slot changed, this should not happen -> exit */ if( shm[shm_index] != current_pid && shm[shm_index] != -current_pid ) { gm_log( GM_LOG_ERROR, "double used worker slot: %d != %d\n", current_pid, shm[shm_index] ); clean_worker_exit(0); _exit( EXIT_FAILURE ); } shm[shm_index] = -current_pid; } /* detach from shared memory */ if(shmdt(shm) < 0) perror("shmdt"); alarm(0); return; }
void worker_client(int worker_mode, int indx, int shid, char **env) { #else void worker_client(int worker_mode, int indx, int shid) { #endif gm_log( GM_LOG_TRACE, "%s worker client started\n", (worker_mode == GM_WORKER_STATUS ? "status" : "job" )); /* set signal handlers for a clean exit */ signal(SIGINT, clean_worker_exit); signal(SIGTERM,clean_worker_exit); worker_run_mode = worker_mode; shm_index = indx; shmid = shid; current_pid = getpid(); gethostname(hostname, GM_BUFFERSIZE-1); /* create worker */ if(set_worker(&worker) != GM_OK) { gm_log( GM_LOG_ERROR, "cannot start worker\n" ); clean_worker_exit(0); _exit( EXIT_FAILURE ); } /* create client */ if ( create_client( mod_gm_opt->server_list, &client ) != GM_OK ) { gm_log( GM_LOG_ERROR, "cannot start client\n" ); clean_worker_exit(0); _exit( EXIT_FAILURE ); } /* create duplicate client */ if( mod_gm_opt->dupserver_num ) { if ( create_client_dup( mod_gm_opt->dupserver_list, &client_dup ) != GM_OK ) { gm_log( GM_LOG_ERROR, "cannot start client for duplicate server\n" ); _exit( EXIT_FAILURE ); } } #ifdef EMBEDDEDPERL if(init_embedded_perl(env) == GM_ERROR) { _exit( EXIT_FAILURE ); } #endif worker_loop(); return; }
/* called when check runs into timeout */ void alarm_sighandler(int sig) { gm_log( GM_LOG_TRACE, "alarm_sighandler(%i)\n", sig ); printf("got no input after %i seconds! Either send plugin output to stdin or use --message=...\n", mod_gm_opt->timeout); exit( STATE_UNKNOWN ); }
/* create shared memory segments */ void setup_child_communicator() { int x; gm_log( GM_LOG_TRACE, "setup_child_communicator()\n"); /* Create the segment. */ mod_gm_shm_key = getpid(); /* use pid as shm key */ if ((shmid = shmget(mod_gm_shm_key, GM_SHM_SIZE, IPC_CREAT | 0600)) < 0) { perror("shmget"); exit( EXIT_FAILURE ); } /* Now we attach the segment to our data space. */ if ((shm = shmat(shmid, NULL, 0)) == (int *) -1) { perror("shmat"); exit( EXIT_FAILURE ); } shm[0] = 0; /* done jobs */ shm[1] = 0; /* total worker */ shm[2] = 0; /* running worker */ shm[3] = -1; /* status worker pid */ for(x = 0; x < mod_gm_opt->max_worker; x++) { shm[x+4] = -1; /* normal worker */ } return; }
/* start new worker if needed */ void check_worker_population() { int x, now, target_number_of_workers; gm_log( GM_LOG_TRACE3, "check_worker_population()\n"); /* set current worker number */ count_current_worker(GM_ENABLED); /* check if status worker died */ if( shm[3] == -1 ) { make_new_child(GM_WORKER_STATUS); } /* keep up minimum population */ for (x = current_number_of_workers; x < mod_gm_opt->min_worker; x++) { make_new_child(GM_WORKER_MULTI); current_number_of_workers++; } /* check every second */ now = (int)time(NULL); if(last_time_increased >= now) return; target_number_of_workers = adjust_number_of_worker(mod_gm_opt->min_worker, mod_gm_opt->max_worker, current_number_of_workers, current_number_of_jobs); for (x = current_number_of_workers; x < target_number_of_workers; x++) { last_time_increased = now; /* top up the worker pool */ make_new_child(GM_WORKER_MULTI); } return; }
/* called when check runs into timeout */ void alarm_sighandler(int sig) { gm_log( GM_LOG_TRACE, "alarm_sighandler(%i)\n", sig ); printf("Timeout after %d seconds - got no input! Send plugin output to stdin.\n", mod_gm_opt->timeout); exit( STATE_CRITICAL ); }
/* called when check runs into timeout */ void alarm_sighandler(int sig) { gm_log( GM_LOG_TRACE, "alarm_sighandler(%i)\n", sig ); printf("timeout while waiting for %s:%i\n", server_list[server_list_num-1]->host, server_list[server_list_num-1]->port); exit( STATE_CRITICAL ); }
/* create shared memory segments */ void setup_child_communicator() { int x; int now = (int)time(NULL); gm_log( GM_LOG_TRACE, "setup_child_communicator()\n"); /* Create the segment. */ mod_gm_shm_key = getpid(); /* use pid as shm key */ if ((shmid = shmget(mod_gm_shm_key, GM_SHM_SIZE, IPC_CREAT | 0600)) < 0) { perror("shmget"); exit( EXIT_FAILURE ); } /* Now we attach the segment to our data space. */ if ((shm = shmat(shmid, NULL, 0)) == (int *) -1) { perror("shmat"); exit( EXIT_FAILURE ); } /* change SHM_SHIFT if more global counters are added */ shm[SHM_JOBS_DONE] = 0; /* done jobs */ shm[SHM_WORKER_TOTAL] = 0; /* total worker */ shm[SHM_WORKER_RUNNING] = 0; /* running worker */ shm[SHM_STATUS_WORKER_PID] = -1; /* status worker pid */ shm[SHM_WORKER_LAST_CHECK] = now; /* time of last check */ for(x = 0; x < mod_gm_opt->max_worker; x++) { shm[x+SHM_SHIFT] = -1; /* normal worker */ } return; }
/* called when check runs into timeout */ void alarm_sighandler(int sig) { gm_log( GM_LOG_TRACE, "alarm_sighandler(%i)\n", sig ); printf("timeout while waiting for %s\n", opt_server); exit( STATE_CRITICAL ); }
char *read_multi_attribute(char *bufstart, char *bufend, char *element) { char start_element[GM_BUFFERSIZE], end_element[GM_BUFFERSIZE]; sprintf(start_element, "<%s>", element); sprintf(end_element, "</%s>", element); if ((bufstart=(char *)memmem(bufstart,bufend-bufstart,start_element,strlen(start_element))) == NULL) { gm_log( GM_LOG_TRACE, "\tread_multi_attribute: start element \'%s\' not found\n", start_element); return NULL; } bufstart+=strlen(start_element); if ((bufend=(char *)memmem(bufstart,bufend-bufstart,end_element,strlen(end_element))) == NULL) { gm_log( GM_LOG_TRACE, "\tread_multi_attribute: end element \'%s\' not found\n", end_element); return NULL; } *bufend='\0'; return bufstart; }
/* verify our option */ int verify_options(mod_gm_opt_t *opt) { /* stdout loggin in daemon mode is pointless */ if( opt->debug_level > GM_LOG_TRACE && opt->daemon_mode == GM_ENABLED) { opt->debug_level = GM_LOG_TRACE; } /* did we get any server? */ if(opt->server_num == 0) { gm_log( GM_LOG_ERROR, "please specify at least one server\n" ); return(GM_ERROR); } /* nothing set by hand -> defaults */ if( opt->set_queues_by_hand == 0 ) { gm_log( GM_LOG_DEBUG, "starting client with default queues\n" ); opt->hosts = GM_ENABLED; opt->services = GM_ENABLED; opt->events = GM_ENABLED; } /* do we have queues to serve? */ if( opt->servicegroups_num == 0 && opt->hostgroups_num == 0 && opt->hosts == GM_DISABLED && opt->services == GM_DISABLED && opt->events == GM_DISABLED ) { gm_log( GM_LOG_ERROR, "starting worker without any queues is useless\n" ); return(GM_ERROR); } if(opt->min_worker > opt->max_worker) opt->min_worker = opt->max_worker; /* encryption without key? */ if(opt->encryption == GM_ENABLED) { if(opt->crypt_key == NULL && opt->keyfile == NULL) { gm_log( GM_LOG_ERROR, "no encryption key provided, please use --key=... or keyfile=... or disable encryption\n"); return(GM_ERROR); } } return(GM_OK); }
/* get the worker */ int set_worker( gearman_worker_st *worker ) { create_worker( mod_gm_opt->server_list, worker ); if ( mod_gm_opt->result_queue == NULL ) { gm_log( GM_LOG_ERROR, "got no result queue!\n" ); return GM_ERROR; } gm_log( GM_LOG_DEBUG, "started result_worker thread for queue: %s\n", mod_gm_opt->result_queue ); if(worker_add_function( worker, mod_gm_opt->result_queue, get_results ) != GM_OK) { return GM_ERROR; } /* add our dummy queue, gearman sometimes forgets the last added queue */ worker_add_function( worker, "dummy", dummy); return GM_OK; }
/* register function on worker */ int worker_add_function( gearman_worker_st * worker, char * queue, gearman_worker_fn *function) { gearman_return_t ret; ret = gearman_worker_add_function( worker, queue, 0, function, NULL ); if ( ret != GEARMAN_SUCCESS ) { gm_log( GM_LOG_ERROR, "worker error: %s\n", gearman_worker_error( worker ) ); return GM_ERROR; } return GM_OK; }
/* set new number of workers */ int adjust_number_of_worker(int min, int max, int cur_workers, int cur_jobs) { int perc_running; int idle; int target = min; double load[3]; if(cur_workers == 0) { gm_log( GM_LOG_TRACE3, "adjust_number_of_worker(min %d, max %d, worker %d, jobs %d) -> %d\n", min, max, cur_workers, cur_jobs, mod_gm_opt->min_worker); return mod_gm_opt->min_worker; } perc_running = (int)cur_jobs*100/cur_workers; idle = (int)cur_workers - cur_jobs; gm_log( GM_LOG_TRACE3, "adjust_number_of_worker(min %d, max %d, worker %d, jobs %d) = %d%% running\n", min, max, cur_workers, cur_jobs, perc_running); if(cur_workers == max) return max; /* > 90% workers running */ if(cur_jobs > 0 && ( perc_running > 90 || idle <= 2 )) { if (getloadavg(load, 3) == -1) { gm_log( GM_LOG_ERROR, "failed to get current load\n"); perror("getloadavg"); } if(mod_gm_opt->load_limit1 > 0 && load[0] >= mod_gm_opt->load_limit1) { gm_log( GM_LOG_TRACE, "load limit 1min hit, not starting any more workers: %1.2f > %1.2f\n", load[0], mod_gm_opt->load_limit1); return cur_workers; } if(mod_gm_opt->load_limit5 > 0 && load[1] >= mod_gm_opt->load_limit5) { gm_log( GM_LOG_TRACE, "load limit 5min hit, not starting any more workers: %1.2f > %1.2f\n", load[1], mod_gm_opt->load_limit5); return cur_workers; } if(mod_gm_opt->load_limit15 > 0 && load[2] >= mod_gm_opt->load_limit15) { gm_log( GM_LOG_TRACE, "load limit 15min hit, not starting any more workers: %1.2f > %1.2f\n", load[2], mod_gm_opt->load_limit15); return cur_workers; } /* increase target number by spawn rate */ gm_log( GM_LOG_TRACE, "starting %d new workers\n", mod_gm_opt->spawn_rate); target = cur_workers + mod_gm_opt->spawn_rate; } /* dont go over the top */ if(target > max) { target = max; } if(target != cur_workers) gm_log( GM_LOG_TRACE3, "adjust_number_of_worker(min %d, max %d, worker %d, jobs %d) = %d%% running -> %d\n", min, max, cur_workers, cur_jobs, perc_running, target); return target; }
/* try to reload the config */ void reload_config(int sig) { gm_log( GM_LOG_TRACE, "reload_config(%d)\n", sig); if(parse_arguments(orig_argc, orig_argv) != GM_OK) { gm_log( GM_LOG_ERROR, "reload config failed, check your config\n"); return; } /* * restart workers gracefully: * send term signal to our children * children will finish the current job and exit */ stop_children(GM_WORKER_RESTART); /* start status worker */ make_new_child(GM_WORKER_STATUS); gm_log( GM_LOG_INFO, "reloading config was successful\n"); return; }
/* cleanup and exit this thread */ static void cancel_worker_thread (void * data) { gearman_worker_st *worker = (gearman_worker_st*) data; gearman_worker_unregister_all(worker); gearman_worker_remove_servers(worker); gearman_worker_free(worker); gm_log( GM_LOG_DEBUG, "worker thread finished\n" ); return; }
/* called when check runs into timeout */ void check_alarm_handler(int sig) { pid_t pid; gm_log( GM_LOG_TRACE, "check_alarm_handler(%i)\n", sig ); pid = getpid(); if(current_job != NULL && mod_gm_opt->fork_on_exec == GM_DISABLED) { /* create a useful log message*/ if ( !strcmp( current_job->type, "service" ) ) { gm_log( GM_LOG_INFO, "timeout (%is) hit for servicecheck: %s - %s\n", current_job->timeout, current_job->host_name, current_job->service_description); } else if ( !strcmp( current_job->type, "host" ) ) { gm_log( GM_LOG_INFO, "timeout (%is) hit for hostcheck: %s\n", current_job->timeout, current_job->host_name); } else if ( !strcmp( current_job->type, "eventhandler" ) ) { gm_log( GM_LOG_INFO, "timeout (%is) hit for eventhandler: %s\n", current_job->timeout, current_job->command_line); } send_timeout_result(current_job); gearman_job_send_complete(current_gearman_job, NULL, 0); } if(current_child_pid > 0) { kill_child_checks(); } else { signal(SIGINT, SIG_IGN); gm_log( GM_LOG_TRACE, "send SIGINT to %d\n", pid); kill(-pid, SIGINT); signal(SIGINT, SIG_DFL); sleep(1); gm_log( GM_LOG_TRACE, "send SIGKILL to %d\n", pid); kill(-pid, SIGKILL); } return; }
/* create the gearman client */ int create_client( char ** server_list, gearman_client_st *client ) { gearman_return_t ret; int x = 0; gm_log( GM_LOG_TRACE, "create_client()\n" ); signal(SIGPIPE, SIG_IGN); client = gearman_client_create(client); if ( client == NULL ) { gm_log( GM_LOG_ERROR, "Memory allocation failure on client creation\n" ); return GM_ERROR; } while ( server_list[x] != NULL ) { char * server = strdup( server_list[x] ); char * server_c = server; char * host = strsep( &server, ":" ); char * port_val = strsep( &server, "\x0" ); in_port_t port = GM_SERVER_DEFAULT_PORT; if(port_val != NULL) { port = ( in_port_t ) atoi( port_val ); } ret = gearman_client_add_server( client, host, port ); if ( ret != GEARMAN_SUCCESS ) { gm_log( GM_LOG_ERROR, "client error: %s\n", gearman_client_error( client ) ); free(server_c); return GM_ERROR; } free(server_c); x++; } assert(x != 0); current_client = client; return GM_OK; }
/* register eventhandler callback */ static void register_neb_callbacks(void) { /* only if we have hostgroups defined or general hosts enabled */ if ( mod_gm_opt->do_hostchecks == GM_ENABLED && ( mod_gm_opt->hostgroups_num > 0 || mod_gm_opt->hosts == GM_ENABLED || mod_gm_opt->queue_cust_var )) neb_register_callback( NEBCALLBACK_HOST_CHECK_DATA, gearman_module_handle, 0, handle_host_check ); /* only if we have groups defined or general services enabled */ if ( mod_gm_opt->servicegroups_num > 0 || mod_gm_opt->hostgroups_num > 0 || mod_gm_opt->services == GM_ENABLED || mod_gm_opt->queue_cust_var ) neb_register_callback( NEBCALLBACK_SERVICE_CHECK_DATA, gearman_module_handle, 0, handle_svc_check ); if ( mod_gm_opt->events == GM_ENABLED ) neb_register_callback( NEBCALLBACK_EVENT_HANDLER_DATA, gearman_module_handle, 0, handle_eventhandler ); if ( mod_gm_opt->perfdata == GM_ENABLED ) { if(process_performance_data == 0) gm_log( GM_LOG_INFO, "Warning: process_performance_data is disabled globally, cannot process performance data\n" ); neb_register_callback( NEBCALLBACK_HOST_CHECK_DATA, gearman_module_handle, 0, handle_perfdata ); neb_register_callback( NEBCALLBACK_SERVICE_CHECK_DATA, gearman_module_handle, 0, handle_perfdata ); } gm_log( GM_LOG_DEBUG, "registered neb callbacks\n" ); }