/* * This is a post- fix recursive function for starting every service * that s depends on before starting s. * @param s A Service_T object */ static void do_start(Service_T s) { ASSERT(s); if (s->visited) return; s->visited = TRUE; if (s->dependantlist) { Dependant_T d; for (d = s->dependantlist; d; d = d->next ) { Service_T parent = Util_getService(d->dependant); ASSERT(parent); do_start(parent); } } if (s->start && (s->type!=TYPE_PROCESS || !Util_isProcessRunning(s))) { LogInfo("'%s' start: %s\n", s->name, s->start->arg[0]); spawn(s, s->start, NULL); /* We only wait for a process type, other service types does not have a pid file to watch */ if (s->type == TYPE_PROCESS) wait_start(s); } Util_monitorSet(s); }
/* * This is a post- fix recursive function for enabling monitoring every service * that s depends on before monitor s. * @param s A Service_T object * @param flag A Custom flag */ static void do_monitor(Service_T s, int flag) { ASSERT(s); if (s->visited) return; s->visited = TRUE; if (s->dependantlist) { Dependant_T d; for (d = s->dependantlist; d; d = d->next ) { Service_T parent = Util_getService(d->dependant); ASSERT(parent); do_monitor(parent, flag); } } Util_monitorSet(s); }
static void update_v0(int services) { for (int i = 0; i < services; i++) { State0_T state; if (read(file, &state, sizeof(state)) != sizeof(state)) THROW(IOException, "Unable to read service state"); Service_T service; if ((service = Util_getService(state.name))) { service->nstart = state.nstart; service->ncycle = state.ncycle; if (state.monitor == MONITOR_NOT) service->monitor = state.monitor; else if (service->monitor == MONITOR_NOT) service->monitor = MONITOR_INIT; } } }
static void update_v1() { State1_T state; while (read(file, &state, sizeof(state)) == sizeof(state)) { Service_T service; if ((service = Util_getService(state.name)) && service->type == state.type) { service->nstart = state.nstart; service->ncycle = state.ncycle; if (state.monitor == MONITOR_NOT) service->monitor = state.monitor; else if (service->monitor == MONITOR_NOT) service->monitor = MONITOR_INIT; if (service->type == TYPE_FILE) { service->inf->priv.file.st_ino = state.priv.file.st_ino; service->inf->priv.file.readpos = state.priv.file.readpos; } } } }
/** * Update the current service list with data from the state file. We * do *only* change services found in *both* the monitrc file and in * the state file. The algorithm: * * Assume the control file was changed and a new service (B) was added * so the monitrc file now contains the services: A B and C. The * running monit daemon only knows the services A and C. Upon restart * after a crash the monit daemon first read the monitrc file and * creates the service list structure with A B and C. We then read the * state file and update the service A and C since they are found in * the state file, B is not found in this file and therefore not * changed. * * The same strategy is used if a service was removed, e.g. if the * service A was removed from monitrc; when reading the state file, * service A is not found in the current service list (the list is * always generated from monitrc) and therefore A is simply discarded. * * Finally, after the monit service state is updated this function * writes the new state file. */ void State_update() { int i; int l= 0; State_T s; FILE *S= NULL; Service_T service; int has_error= FALSE; if(! (S= open_state("r"))) return; errno= 0; if(fread(&l, 1, sizeof (int), S) != sizeof(int)) { LogError("%s: Unable to read monit state information from '%s'\n", prog, Run.statefile); has_error= TRUE; goto error; } if(l > 0) { for(i=0; i<l; i++) { if(fread(&s, 1, sizeof(State_T), S) != sizeof(State_T)) { LogError("%s: An error occured when updating monit state information\n", prog); has_error= TRUE; goto error; } if((service= Util_getService(s.name))) { update_service_state(service, &s); } } } error: close_state(S); if(!has_error) State_save(); }
/** * Check to see if we should try to start/stop service * @param S A service name as stated in the config file * @param A An action id describing the action to execute * @return FALSE for error, otherwise TRUE */ int control_service(const char *S, int A) { Service_T s = NULL; ASSERT(S); if (! (s = Util_getService(S))) { LogError("%s: service '%s' -- doesn't exist\n", prog, S); return FALSE; } switch(A) { case ACTION_START: if (s->type == TYPE_PROCESS) { if (Util_isProcessRunning(s)) { DEBUG("%s: Process already running -- process %s\n", prog, S); Util_monitorSet(s); return TRUE; } if (!s->start) { LogError("%s: Start method not defined -- process %s\n", prog, S); Util_monitorSet(s); return FALSE; } } do_depend(s, ACTION_STOP); do_start(s); do_depend(s, ACTION_START); break; case ACTION_STOP: if (s->type == TYPE_PROCESS && !s->stop) { LogError("%s: Stop method not defined -- process %s\n", prog, S); Util_monitorUnset(s); return FALSE; } /* soft unmonitor and stop: */ do_depend(s, ACTION_STOP); do_stop(s); /* hard unmonitor - will reset all counters and flags: */ do_depend(s, ACTION_UNMONITOR); do_unmonitor(s); break; case ACTION_RESTART: if (s->type == TYPE_PROCESS && (!s->start || !s->stop)) { LogError("%s: Start or stop method not defined -- process %s\n", prog, S); Util_monitorSet(s); return FALSE; } LogInfo("'%s' trying to restart\n", s->name); do_depend(s, ACTION_STOP); if (do_stop(s)) { /* Only start if stop succeeded */ do_start(s); do_depend(s, ACTION_START); } else { /* enable monitoring of this service again to allow the restart retry * in the next cycle up to timeout limit */ Util_monitorSet(s); } break; case ACTION_MONITOR: /* We only enable monitoring of this service and all prerequisite * services. Chain of services which depends on this service keep * its state */ do_monitor(s); break; case ACTION_UNMONITOR: /* We disable monitoring of this service and all services which * depends on it */ do_depend(s, ACTION_UNMONITOR); do_unmonitor(s); break; default: LogError("%s: service '%s' -- invalid action %s\n", prog, S, A); return FALSE; } return TRUE; }
/** * Reprocess the partially handled event queue */ void Event_queue_process() { /* return in the case that the eventqueue is not enabled or empty */ if (! Run.eventlist_dir || (! (Run.flags & Run_HandlerInit) && ! Run.handler_queue[Handler_Alert] && ! Run.handler_queue[Handler_Mmonit])) return; DIR *dir = opendir(Run.eventlist_dir); if (! dir) { if (errno != ENOENT) LogError("Cannot open the directory %s -- %s\n", Run.eventlist_dir, STRERROR); return; } struct dirent *de = readdir(dir); if (de) DEBUG("Processing postponed events queue\n"); Action_T a; NEW(a); EventAction_T ea; NEW(ea); while (de) { int handlers_passed = 0; /* In the case that all handlers failed, skip the further processing in this cycle. Alert handler is currently defined anytime (either explicitly or localhost by default) */ if ( (Run.mmonits && FLAG(Run.handler_flag, Handler_Mmonit) && FLAG(Run.handler_flag, Handler_Alert)) || FLAG(Run.handler_flag, Handler_Alert)) break; char file_name[PATH_MAX]; snprintf(file_name, sizeof(file_name), "%s/%s", Run.eventlist_dir, de->d_name); if (File_isFile(file_name)) { LogInfo("Processing queued event %s\n", file_name); FILE *file = fopen(file_name, "r"); if (! file) { LogError("Queued event processing failed - cannot open the file %s -- %s\n", file_name, STRERROR); goto error1; } size_t size; /* read event structure version */ int *version = file_readQueue(file, &size); if (! version) { LogError("skipping queued event %s - unknown data format\n", file_name); goto error2; } if (size != sizeof(int)) { LogError("Aborting queued event %s - invalid size %lu\n", file_name, (unsigned long)size); goto error3; } if (*version != EVENT_VERSION) { LogError("Aborting queued event %s - incompatible data format version %d\n", file_name, *version); goto error3; } /* read event structure */ Event_T e = file_readQueue(file, &size); if (! e) goto error3; if (size != sizeof(*e)) goto error4; /* read source */ char *service = file_readQueue(file, &size); if (! service) goto error4; if (! (e->source = Util_getService(service))) { LogError("Aborting queued event %s - service %s not found in monitor configuration\n", file_name, service); FREE(service); goto error4; } FREE(service); /* read message */ if (! (e->message = file_readQueue(file, &size))) goto error4; /* read event action */ Action_Type *action = file_readQueue(file, &size); if (! action) goto error5; if (size != sizeof(Action_Type)) goto error6; a->id = *action; switch (e->state) { case State_Succeeded: case State_ChangedNot: ea->succeeded = a; break; case State_Failed: case State_Changed: case State_Init: ea->failed = a; break; default: LogError("Aborting queue event %s -- invalid state: %d\n", file_name, e->state); goto error6; } e->action = ea; /* Retry all remaining handlers */ /* alert */ if (e->flag & Handler_Alert) { if (Run.flags & Run_HandlerInit) Run.handler_queue[Handler_Alert]++; if ((Run.handler_flag & Handler_Alert) != Handler_Alert) { if ( handle_alert(e) != Handler_Alert ) { e->flag &= ~Handler_Alert; Run.handler_queue[Handler_Alert]--; handlers_passed++; } else { LogError("Alert handler failed, retry scheduled for next cycle\n"); Run.handler_flag |= Handler_Alert; } } } /* mmonit */ if (e->flag & Handler_Mmonit) { if (Run.flags & Run_HandlerInit) Run.handler_queue[Handler_Mmonit]++; if ((Run.handler_flag & Handler_Mmonit) != Handler_Mmonit) { if ( handle_mmonit(e) != Handler_Mmonit ) { e->flag &= ~Handler_Mmonit; Run.handler_queue[Handler_Mmonit]--; handlers_passed++; } else { LogError("M/Monit handler failed, retry scheduled for next cycle\n"); Run.handler_flag |= Handler_Mmonit; } } } /* If no error persists, remove it from the queue */ if (e->flag == Handler_Succeeded) { DEBUG("Removing queued event %s\n", file_name); if (unlink(file_name) < 0) LogError("Failed to remove queued event file '%s' -- %s\n", file_name, STRERROR); } else if (handlers_passed > 0) { DEBUG("Updating queued event %s (some handlers passed)\n", file_name); _queueUpdate(e, file_name); } error6: FREE(action); error5: FREE(e->message); error4: FREE(e); error3: FREE(version); error2: fclose(file); } error1: de = readdir(dir); } Run.flags &= ~Run_HandlerInit; closedir(dir); FREE(a); FREE(ea); }