static void reap_workers(void) { waiter_drain(); for(;;) { int status; int pid = waiter_next(&status); if(pid == 0) return; if(pid < 0) { if(errno == EINTR) continue; if(errno != EAGAIN && errno != ECHILD) { perror("reap"); abort(); } break; } if(WIFSIGNALED(status) && SIGALRM==WTERMSIG(status)) { record(INFO, "sub %d retiring",pid); } else { record(INFO,"sub %d died (exit %d sig %d)", pid, WEXITSTATUS(status), WTERMSIG(status)); } int which; for(which=0;which<numworkers;++which) { if(workers[which].pid == pid) { remove_worker(which); break; } } // okay if never finds the pid, may have already been removed } }
int grpc_pollset_work(grpc_pollset *pollset, grpc_pollset_worker *worker, gpr_timespec deadline) { gpr_timespec now; int added_worker = 0; now = gpr_now(GPR_CLOCK_MONOTONIC); if (gpr_time_cmp(now, deadline) > 0) { return 0 /* GPR_FALSE */; } worker->next = worker->prev = NULL; gpr_cv_init(&worker->cv); if (grpc_maybe_call_delayed_callbacks(&pollset->mu, 1 /* GPR_TRUE */)) { goto done; } if (grpc_alarm_check(&pollset->mu, now, &deadline)) { goto done; } if (!pollset->kicked_without_pollers && !pollset->shutting_down) { push_front_worker(pollset, worker); added_worker = 1; gpr_cv_wait(&worker->cv, &pollset->mu, deadline); } else { pollset->kicked_without_pollers = 0; } done: gpr_cv_destroy(&worker->cv); if (added_worker) { remove_worker(pollset, worker); } return 1 /* GPR_TRUE */; }
static grpc_pollset_worker *pop_front_worker(grpc_pollset *p) { if (grpc_pollset_has_workers(p)) { grpc_pollset_worker *w = p->root_worker.next; remove_worker(p, w); return w; } else { return NULL; } }
static grpc_pollset_worker *pop_front_worker( grpc_pollset_worker *root, grpc_pollset_worker_link_type type) { if (has_workers(root, type)) { grpc_pollset_worker *w = root->links[type].next; remove_worker(w, type); return w; } else { return NULL; } }
void grpc_pollset_work(grpc_pollset *pollset, grpc_pollset_worker *worker, gpr_timespec now, gpr_timespec deadline) { /* pollset->mu already held */ int added_worker = 0; /* this must happen before we (potentially) drop pollset->mu */ worker->next = worker->prev = NULL; /* TODO(ctiller): pool these */ grpc_wakeup_fd_init(&worker->wakeup_fd); if (grpc_maybe_call_delayed_callbacks(&pollset->mu, 1)) { goto done; } if (grpc_alarm_check(&pollset->mu, now, &deadline)) { goto done; } if (pollset->shutting_down) { goto done; } if (pollset->in_flight_cbs) { /* Give do_promote priority so we don't starve it out */ gpr_mu_unlock(&pollset->mu); gpr_mu_lock(&pollset->mu); goto done; } if (!pollset->kicked_without_pollers) { push_front_worker(pollset, worker); added_worker = 1; gpr_tls_set(&g_current_thread_poller, (gpr_intptr)pollset); pollset->vtable->maybe_work(pollset, worker, deadline, now, 1); gpr_tls_set(&g_current_thread_poller, 0); } else { pollset->kicked_without_pollers = 0; } done: grpc_wakeup_fd_destroy(&worker->wakeup_fd); if (added_worker) { remove_worker(pollset, worker); } if (pollset->shutting_down) { if (grpc_pollset_has_workers(pollset)) { grpc_pollset_kick(pollset, NULL); } else if (!pollset->called_shutdown && pollset->in_flight_cbs == 0) { pollset->called_shutdown = 1; gpr_mu_unlock(&pollset->mu); finish_shutdown(pollset); /* Continuing to access pollset here is safe -- it is the caller's * responsibility to not destroy when it has outstanding calls to * grpc_pollset_work. * TODO(dklempner): Can we refactor the shutdown logic to avoid this? */ gpr_mu_lock(&pollset->mu); } } }
static int handle_worker_result(int sd, int events, void *arg) { wproc_object_job *oj = NULL; char *buf, *error_reason = NULL; unsigned long size; int ret; static struct kvvec kvv = KVVEC_INITIALIZER; struct wproc_worker *wp = (struct wproc_worker *)arg; if(iocache_capacity(wp->ioc) == 0) { logit(NSLOG_RUNTIME_WARNING, TRUE, "wproc: iocache_capacity() is 0 for worker %s.\n", wp->name); } ret = iocache_read(wp->ioc, wp->sd); if (ret < 0) { logit(NSLOG_RUNTIME_WARNING, TRUE, "wproc: iocache_read() from %s returned %d: %s\n", wp->name, ret, strerror(errno)); return 0; } else if (ret == 0) { logit(NSLOG_INFO_MESSAGE, TRUE, "wproc: Socket to worker %s broken, removing", wp->name); wproc_num_workers_online--; iobroker_unregister(nagios_iobs, sd); if (workers.len <= 0) { /* there aren't global workers left, we can't run any more checks * we should try respawning a few of the standard ones */ logit(NSLOG_RUNTIME_ERROR, TRUE, "wproc: All our workers are dead, we can't do anything!"); } remove_worker(wp); fanout_destroy(wp->jobs, fo_reassign_wproc_job); wp->jobs = NULL; wproc_destroy(wp, 0); return 0; } while ((buf = worker_ioc2msg(wp->ioc, &size, 0))) { struct wproc_job *job; wproc_result wpres; /* log messages are handled first */ if (size > 5 && !memcmp(buf, "log=", 4)) { logit(NSLOG_INFO_MESSAGE, TRUE, "wproc: %s: %s\n", wp->name, buf + 4); continue; } /* for everything else we need to actually parse */ if (buf2kvvec_prealloc(&kvv, buf, size, '=', '\0', KVVEC_ASSIGN) <= 0) { logit(NSLOG_RUNTIME_ERROR, TRUE, "wproc: Failed to parse key/value vector from worker response with len %lu. First kv=%s", size, buf ? buf : "(NULL)"); continue; } memset(&wpres, 0, sizeof(wpres)); wpres.job_id = -1; wpres.type = -1; wpres.response = &kvv; parse_worker_result(&wpres, &kvv); job = get_job(wp, wpres.job_id); if (!job) { logit(NSLOG_RUNTIME_WARNING, TRUE, "wproc: Job with id '%d' doesn't exist on %s.\n", wpres.job_id, wp->name); continue; } if (wpres.type != job->type) { logit(NSLOG_RUNTIME_WARNING, TRUE, "wproc: %s claims job %d is type %d, but we think it's type %d\n", wp->name, job->id, wpres.type, job->type); break; } oj = (wproc_object_job *)job->arg; /* * ETIME ("Timer expired") doesn't really happen * on any modern systems, so we reuse it to mean * "program timed out" */ if (wpres.error_code == ETIME) { wpres.early_timeout = TRUE; } if (wpres.early_timeout) { asprintf(&error_reason, "timed out after %.2fs", tv_delta_f(&wpres.start, &wpres.stop)); } else if (WIFSIGNALED(wpres.wait_status)) { asprintf(&error_reason, "died by signal %d%s after %.2f seconds", WTERMSIG(wpres.wait_status), WCOREDUMP(wpres.wait_status) ? " (core dumped)" : "", tv_delta_f(&wpres.start, &wpres.stop)); } else if (job->type != WPJOB_CHECK && WEXITSTATUS(wpres.wait_status) != 0) { asprintf(&error_reason, "is a non-check helper but exited with return code %d", WEXITSTATUS(wpres.wait_status)); } if (error_reason) { logit(NSLOG_RUNTIME_ERROR, TRUE, "wproc: %s job %d from worker %s %s", wpjob_type_name(job->type), job->id, wp->name, error_reason); logit(NSLOG_RUNTIME_ERROR, TRUE, "wproc: command: %s\n", job->command); if (job->type != WPJOB_CHECK && oj) { logit(NSLOG_RUNTIME_ERROR, TRUE, "wproc: host=%s; service=%s; contact=%s\n", oj->host_name ? oj->host_name : "(none)", oj->service_description ? oj->service_description : "(none)", oj->contact_name ? oj->contact_name : "(none)"); } else if (oj) { struct check_result *cr = (struct check_result *)job->arg; logit(NSLOG_RUNTIME_ERROR, TRUE, "wproc: host=%s; service=%s;\n", cr->host_name, cr->service_description); } logit(NSLOG_RUNTIME_ERROR, TRUE, "wproc: early_timeout=%d; exited_ok=%d; wait_status=%d; error_code=%d;\n", wpres.early_timeout, wpres.exited_ok, wpres.wait_status, wpres.error_code); wproc_logdump_buffer(NSLOG_RUNTIME_ERROR, TRUE, "wproc: stderr", wpres.outerr); wproc_logdump_buffer(NSLOG_RUNTIME_ERROR, TRUE, "wproc: stdout", wpres.outstd); } my_free(error_reason); switch (job->type) { case WPJOB_CHECK: ret = handle_worker_check(&wpres, wp, job); break; case WPJOB_NOTIFY: if (wpres.early_timeout) { if (oj->service_description) { logit(NSLOG_RUNTIME_WARNING, TRUE, "Warning: Notifying contact '%s' of service '%s' on host '%s' by command '%s' timed out after %.2f seconds\n", oj->contact_name, oj->service_description, oj->host_name, job->command, tv2float(&wpres.runtime)); } else { logit(NSLOG_RUNTIME_WARNING, TRUE, "Warning: Notifying contact '%s' of host '%s' by command '%s' timed out after %.2f seconds\n", oj->contact_name, oj->host_name, job->command, tv2float(&wpres.runtime)); } } break; case WPJOB_OCSP: if (wpres.early_timeout) { logit(NSLOG_RUNTIME_WARNING, TRUE, "Warning: OCSP command '%s' for service '%s' on host '%s' timed out after %.2f seconds\n", job->command, oj->service_description, oj->host_name, tv2float(&wpres.runtime)); } break; case WPJOB_OCHP: if (wpres.early_timeout) { logit(NSLOG_RUNTIME_WARNING, TRUE, "Warning: OCHP command '%s' for host '%s' timed out after %.2f seconds\n", job->command, oj->host_name, tv2float(&wpres.runtime)); } break; case WPJOB_GLOBAL_SVC_EVTHANDLER: if (wpres.early_timeout) { logit(NSLOG_EVENT_HANDLER | NSLOG_RUNTIME_WARNING, TRUE, "Warning: Global service event handler command '%s' timed out after %.2f seconds\n", job->command, tv2float(&wpres.runtime)); } break; case WPJOB_SVC_EVTHANDLER: if (wpres.early_timeout) { logit(NSLOG_EVENT_HANDLER | NSLOG_RUNTIME_WARNING, TRUE, "Warning: Service event handler command '%s' timed out after %.2f seconds\n", job->command, tv2float(&wpres.runtime)); } break; case WPJOB_GLOBAL_HOST_EVTHANDLER: if (wpres.early_timeout) { logit(NSLOG_EVENT_HANDLER | NSLOG_RUNTIME_WARNING, TRUE, "Warning: Global host event handler command '%s' timed out after %.2f seconds\n", job->command, tv2float(&wpres.runtime)); } break; case WPJOB_HOST_EVTHANDLER: if (wpres.early_timeout) { logit(NSLOG_EVENT_HANDLER | NSLOG_RUNTIME_WARNING, TRUE, "Warning: Host event handler command '%s' timed out after %.2f seconds\n", job->command, tv2float(&wpres.runtime)); } break; case WPJOB_CALLBACK: run_job_callback(job, &wpres, 0); break; default: logit(NSLOG_RUNTIME_WARNING, TRUE, "Worker %d: Unknown jobtype: %d\n", wp->pid, job->type); break; } destroy_job(job); } return 0; }
void grpc_pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, grpc_pollset_worker **worker_hdl, gpr_timespec now, gpr_timespec deadline) { grpc_pollset_worker worker; *worker_hdl = &worker; /* pollset->mu already held */ int added_worker = 0; int locked = 1; int queued_work = 0; int keep_polling = 0; GPR_TIMER_BEGIN("grpc_pollset_work", 0); /* this must happen before we (potentially) drop pollset->mu */ worker.next = worker.prev = NULL; worker.reevaluate_polling_on_wakeup = 0; if (pollset->local_wakeup_cache != NULL) { worker.wakeup_fd = pollset->local_wakeup_cache; pollset->local_wakeup_cache = worker.wakeup_fd->next; } else { worker.wakeup_fd = gpr_malloc(sizeof(*worker.wakeup_fd)); grpc_wakeup_fd_init(&worker.wakeup_fd->fd); } worker.kicked_specifically = 0; /* If there's work waiting for the pollset to be idle, and the pollset is idle, then do that work */ if (!grpc_pollset_has_workers(pollset) && !grpc_closure_list_empty(pollset->idle_jobs)) { GPR_TIMER_MARK("grpc_pollset_work.idle_jobs", 0); grpc_exec_ctx_enqueue_list(exec_ctx, &pollset->idle_jobs, NULL); goto done; } /* If we're shutting down then we don't execute any extended work */ if (pollset->shutting_down) { GPR_TIMER_MARK("grpc_pollset_work.shutting_down", 0); goto done; } /* Give do_promote priority so we don't starve it out */ if (pollset->in_flight_cbs) { GPR_TIMER_MARK("grpc_pollset_work.in_flight_cbs", 0); gpr_mu_unlock(&pollset->mu); locked = 0; goto done; } /* Start polling, and keep doing so while we're being asked to re-evaluate our pollers (this allows poll() based pollers to ensure they don't miss wakeups) */ keep_polling = 1; while (keep_polling) { keep_polling = 0; if (!pollset->kicked_without_pollers) { if (!added_worker) { push_front_worker(pollset, &worker); added_worker = 1; gpr_tls_set(&g_current_thread_worker, (intptr_t)&worker); } gpr_tls_set(&g_current_thread_poller, (intptr_t)pollset); GPR_TIMER_BEGIN("maybe_work_and_unlock", 0); pollset->vtable->maybe_work_and_unlock(exec_ctx, pollset, &worker, deadline, now); GPR_TIMER_END("maybe_work_and_unlock", 0); locked = 0; gpr_tls_set(&g_current_thread_poller, 0); } else { GPR_TIMER_MARK("grpc_pollset_work.kicked_without_pollers", 0); pollset->kicked_without_pollers = 0; } /* Finished execution - start cleaning up. Note that we may arrive here from outside the enclosing while() loop. In that case we won't loop though as we haven't added worker to the worker list, which means nobody could ask us to re-evaluate polling). */ done: if (!locked) { queued_work |= grpc_exec_ctx_flush(exec_ctx); gpr_mu_lock(&pollset->mu); locked = 1; } /* If we're forced to re-evaluate polling (via grpc_pollset_kick with GRPC_POLLSET_REEVALUATE_POLLING_ON_WAKEUP) then we land here and force a loop */ if (worker.reevaluate_polling_on_wakeup) { worker.reevaluate_polling_on_wakeup = 0; pollset->kicked_without_pollers = 0; if (queued_work || worker.kicked_specifically) { /* If there's queued work on the list, then set the deadline to be immediate so we get back out of the polling loop quickly */ deadline = gpr_inf_past(GPR_CLOCK_MONOTONIC); } keep_polling = 1; } } if (added_worker) { remove_worker(pollset, &worker); gpr_tls_set(&g_current_thread_worker, 0); } /* release wakeup fd to the local pool */ worker.wakeup_fd->next = pollset->local_wakeup_cache; pollset->local_wakeup_cache = worker.wakeup_fd; /* check shutdown conditions */ if (pollset->shutting_down) { if (grpc_pollset_has_workers(pollset)) { grpc_pollset_kick(pollset, NULL); } else if (!pollset->called_shutdown && pollset->in_flight_cbs == 0) { pollset->called_shutdown = 1; gpr_mu_unlock(&pollset->mu); finish_shutdown(exec_ctx, pollset); grpc_exec_ctx_flush(exec_ctx); /* Continuing to access pollset here is safe -- it is the caller's * responsibility to not destroy when it has outstanding calls to * grpc_pollset_work. * TODO(dklempner): Can we refactor the shutdown logic to avoid this? */ gpr_mu_lock(&pollset->mu); } else if (!grpc_closure_list_empty(pollset->idle_jobs)) { grpc_exec_ctx_enqueue_list(exec_ctx, &pollset->idle_jobs, NULL); gpr_mu_unlock(&pollset->mu); grpc_exec_ctx_flush(exec_ctx); gpr_mu_lock(&pollset->mu); } } *worker_hdl = NULL; GPR_TIMER_END("grpc_pollset_work", 0); }
void saltmine_item(int in, int cn) { struct saltmine_worker_data *workerdat; struct saltmine_ppd *ppd; char *drdata; static int laddernumber=0; int in2; drdata=it[in].drdata; if (!cn) { // ladder if (drdata[0]==1) { //xlog("item %d, at %d,%d, nr=%d",in,it[in].x,it[in].y,laddernumber); if (laddernumber<MAXLADDER) drdata[1]=laddernumber++; else xlog("Too many ladders in %s %d!!!",__FILE__,__LINE__); } // saltbag if (drdata[0]==2) { if (saltbag_itemx!=it[in].x || saltbag_itemy!=it[in].y) { xlog("wrong coordinates of saltbag item in %s %d!!!",__FILE__,__LINE__); } saltbag_itemx=it[in].x; saltbag_itemy=it[in].y; } // door if (drdata[0]==3) { if (door_itemx!=it[in].x || door_itemy!=it[in].y) { xlog("wrong coordinates of door item in %s %d!!!",__FILE__,__LINE__); } door_itemx=it[in].x; door_itemy=it[in].y; } } else { // ladder if (drdata[0]==1) { // player using it if (ch[cn].flags&CF_PLAYER) { // get data if (!(ppd=get_saltmine_ppd(cn))) return; // check if this ladder was already used if (ppd->laddertime[it[in].drdata[1]]+LADDER_REUSE_TIME>time(NULL)) { log_char(cn,LOG_SYSTEM,0,"Thou already got all the Salt out of this, so thou have to wait until it is refilled again."); return; } notify_area(it[in].x,it[in].y,NT_NPC,NTID_SALTMINE_USEITEM,cn,in); ppd->useitemflag=1; return; } // worker using it if (ch[cn].driver==CDR_MONK_WORKER) { // get data if (!(workerdat=set_data(cn,DRD_SALTMINE_WORKER,sizeof(struct saltmine_worker_data)))) return; if (!(ppd=get_saltmine_ppd(workerdat->follow_cn))) return; // check if this ladder was already used if (ppd->laddertime[it[in].drdata[1]]+LADDER_REUSE_TIME>time(NULL)) { log_area(ch[cn].x,ch[cn].y,LOG_SYSTEM,0,10,"%s shakes his head.",ch[cn].name); workerdat->useitem=0; return; } // set the time of the ladder ppd->laddertime[it[in].drdata[1]]=time(NULL); // make him invisible, give him the salt ch[cn].flags|=CF_INVISIBLE; workerdat->hassalt=1; workerdat->useitem=0; workerdat->turnvisible=ticker+3*TICKS; ch[cn].tmpx=it[in].x; ch[cn].tmpy=it[in].y; return; } } // saltbag if (drdata[0]==2) { if (ch[cn].driver==CDR_MONK_WORKER) { // get data if (!(workerdat=set_data(cn,DRD_SALTMINE_WORKER,sizeof(struct saltmine_worker_data)))) return; if (!(ppd=get_saltmine_ppd(workerdat->follow_cn))) return; if (workerdat->hassalt) { ppd->salt++; ppd->gatamastate=50; workerdat->hassalt=0; workerdat->useitem=0; log_area(ch[cn].x,ch[cn].y,LOG_SYSTEM,0,10,"%s drops his bag of salt.",ch[cn].name); } // set the time of the ladder //ppd->laddertime[it[in].drdata[1]]=time(NULL); !!!!!!!!!!! } if (ch[cn].flags&CF_PLAYER) { if (ch[cn].citem) return; if (!(ppd=get_saltmine_ppd(cn))) return; // if (!ppd->trusttime) { log_char(cn,LOG_SYSTEM,0,"Nothing happens."); return; } if (!ppd->salt) { log_char(cn,LOG_SYSTEM,0,"Thou feelst thou should bring salt to the monastery, before rewarding thinself."); return; } in2=create_item("salt"); if (!in2) return; it[in2].value*=ppd->salt*1000; *(unsigned int*)(it[in2].drdata)=ppd->salt*1000; set_salt_data(in2); ppd->salt=0; ppd->gatamastate=0; ch[cn].citem=in2; ch[cn].flags|=CF_ITEMS; it[in2].carried=cn; log_char(cn,LOG_SYSTEM,0,"Thou took %d units of salt, feeling thou have earned it.",*(unsigned int*)(it[in2].drdata)); } } // door if (drdata[0]==3) { if (ch[cn].driver==CDR_MONK_WORKER) remove_worker(cn); else log_char(cn,LOG_SYSTEM,0,"Thou canst not enter there."); } } }
void grpc_pollset_work(grpc_exec_ctx *exec_ctx, grpc_pollset *pollset, grpc_pollset_worker *worker, gpr_timespec now, gpr_timespec deadline) { int added_worker = 0; worker->links[GRPC_POLLSET_WORKER_LINK_POLLSET].next = worker->links[GRPC_POLLSET_WORKER_LINK_POLLSET].prev = worker->links[GRPC_POLLSET_WORKER_LINK_GLOBAL].next = worker->links[GRPC_POLLSET_WORKER_LINK_GLOBAL].prev = NULL; worker->kicked = 0; worker->pollset = pollset; gpr_cv_init(&worker->cv); if (grpc_timer_check(exec_ctx, now, &deadline)) { goto done; } if (!pollset->kicked_without_pollers && !pollset->shutting_down) { if (g_active_poller == NULL) { grpc_pollset_worker *next_worker; /* become poller */ pollset->is_iocp_worker = 1; g_active_poller = worker; gpr_mu_unlock(&grpc_polling_mu); grpc_iocp_work(exec_ctx, deadline); grpc_exec_ctx_flush(exec_ctx); gpr_mu_lock(&grpc_polling_mu); pollset->is_iocp_worker = 0; g_active_poller = NULL; /* try to get a worker from this pollsets worker list */ next_worker = pop_front_worker(&pollset->root_worker, GRPC_POLLSET_WORKER_LINK_POLLSET); if (next_worker == NULL) { /* try to get a worker from the global list */ next_worker = pop_front_worker(&g_global_root_worker, GRPC_POLLSET_WORKER_LINK_GLOBAL); } if (next_worker != NULL) { next_worker->kicked = 1; gpr_cv_signal(&next_worker->cv); } if (pollset->shutting_down && pollset->on_shutdown != NULL) { grpc_exec_ctx_enqueue(exec_ctx, pollset->on_shutdown, 1); pollset->on_shutdown = NULL; } goto done; } push_front_worker(&g_global_root_worker, GRPC_POLLSET_WORKER_LINK_GLOBAL, worker); push_front_worker(&pollset->root_worker, GRPC_POLLSET_WORKER_LINK_POLLSET, worker); added_worker = 1; while (!worker->kicked) { if (gpr_cv_wait(&worker->cv, &grpc_polling_mu, deadline)) { break; } } } else { pollset->kicked_without_pollers = 0; } done: if (!grpc_closure_list_empty(exec_ctx->closure_list)) { gpr_mu_unlock(&grpc_polling_mu); grpc_exec_ctx_flush(exec_ctx); gpr_mu_lock(&grpc_polling_mu); } if (added_worker) { remove_worker(worker, GRPC_POLLSET_WORKER_LINK_GLOBAL); remove_worker(worker, GRPC_POLLSET_WORKER_LINK_POLLSET); } gpr_cv_destroy(&worker->cv); }
void kill_worker(int which) { kill(workers[which].pid,SIGKILL); remove_worker(which); reap_workers(); }