static void *sched_thread_entrance(void *args) { ScheduleContext *pContext; ScheduleEntry *pPrevious; ScheduleEntry *pCurrent; ScheduleEntry *pSaveNext; ScheduleEntry *pNode; ScheduleEntry *pUntil; int exec_count; int i; pContext = (ScheduleContext *)args; if (sched_init_entries(&(pContext->scheduleArray)) != 0) { free(pContext); return NULL; } sched_make_chain(pContext); g_schedule_flag = true; while (*(pContext->pcontinue_flag)) { sched_check_waiting(pContext); if (pContext->scheduleArray.count == 0) //no schedule entry { sleep(1); g_current_time = time(NULL); continue; } g_current_time = time(NULL); while (pContext->head->next_call_time > g_current_time && *(pContext->pcontinue_flag)) { sleep(1); g_current_time = time(NULL); if (sched_check_waiting(pContext) == 0) { break; } } if (!(*(pContext->pcontinue_flag))) { break; } exec_count = 0; pCurrent = pContext->head; while (*(pContext->pcontinue_flag) && (pCurrent != NULL \ && pCurrent->next_call_time <= g_current_time)) { //fprintf(stderr, "exec task id=%d\n", pCurrent->id); pCurrent->task_func(pCurrent->func_args); do { pCurrent->next_call_time += pCurrent->interval; } while (pCurrent->next_call_time <= g_current_time); pCurrent = pCurrent->next; exec_count++; } if (exec_count == 0 || pContext->scheduleArray.count == 1) { continue; } if (exec_count > pContext->scheduleArray.count / 2) { sched_make_chain(pContext); continue; } pNode = pContext->head; pContext->head = pCurrent; //new chain head for (i=0; i<exec_count; i++) { if (pNode->next_call_time >= pContext->tail->next_call_time) { pContext->tail->next = pNode; pContext->tail = pNode; pNode = pNode->next; pContext->tail->next = NULL; continue; } pPrevious = NULL; pUntil = pContext->head; while (pUntil != NULL && \ pNode->next_call_time > pUntil->next_call_time) { pPrevious = pUntil; pUntil = pUntil->next; } pSaveNext = pNode->next; if (pPrevious == NULL) { pContext->head = pNode; } else { pPrevious->next = pNode; } pNode->next = pUntil; pNode = pSaveNext; } } g_schedule_flag = false; logDebug("file: "__FILE__", line: %d, " \ "schedule thread exit", __LINE__); free(pContext); return NULL; }
int schedule_loop(char * cfgfile, void * shm_addr, void * SOHandle) { int x; int cfg_max_parallel=0; int round_start, round_visitors; char * cfg_sched_pause; char * cfg_g_micros_before_after_check; char * cfg_sched_mode; char * cfg_sched_worker_count; int sched_pause; struct timeval stat_round_start, stat_round_end, run_c_start, run_c_end; char * cfg_mps; struct service * services; struct service_sort * ssort; int cfg_max_load; double current_load[3]; char * cfg_load_max; char * cfg_notification_aggregation; int notification_aggregate_interval; int ct, expt; int worker_slot=0; sched_pid=getpid(); gshm_addr=shm_addr; gSOHandle=SOHandle; gConfig=cfgfile; gshm_hdr=bartlby_SHM_GetHDR(shm_addr); ssort = malloc(sizeof(struct service_sort)*gshm_hdr->svccount); _log(LH_SCHED, B_LOG_INFO,"Scheduler working on %ld Services", gshm_hdr->svccount); cfg_notification_aggregation=getConfigValue("notification_aggregation_interval", cfgfile); if(cfg_notification_aggregation == NULL) { notification_aggregate_interval=0; } else { notification_aggregate_interval=atoi(cfg_notification_aggregation); free(cfg_notification_aggregation); } cfg_mps=getConfigValue("max_concurent_checks", cfgfile); if(cfg_mps == NULL) { _log(LH_SCHED, B_LOG_WARN,"<Warn>Defaulting max_concurent_checks to '20'"); cfg_max_parallel=20; } else { cfg_max_parallel=atoi(cfg_mps); free(cfg_mps); } cfg_load_max=getConfigValue("max_load", cfgfile); if(cfg_load_max == NULL) { cfg_max_load=0; } else { cfg_max_load=atoi(cfg_load_max); free(cfg_load_max); } signal(SIGINT, catch_signal); signal(SIGUSR1, catch_signal); signal(SIGUSR2, catch_signal); signal(SIGTERM, catch_signal); signal(SIGKILL, catch_signal); services=bartlby_SHM_ServiceMap(shm_addr); gservices=services; gshm_hdr->do_reload=0; cfg_sched_pause = getConfigValue("sched_round_pause", cfgfile); if(cfg_sched_pause == NULL) { sched_pause=100; _log(LH_SCHED, B_LOG_INFO,"info: sched_pause defaulted to: %d milli-seconds (set sched_round_pause to modify)", sched_pause); } else { sched_pause=atoi(cfg_sched_pause); free(cfg_sched_pause); if(sched_pause <= 0) { sched_pause=1; _log(LH_SCHED, B_LOG_WARN,"info: sched_pause really low should'nt be less than 1 milliseconds defaulting to it: %d", sched_pause); } } cfg_g_micros_before_after_check = getConfigValue("sched_micros_before_after_check", cfgfile); if(cfg_g_micros_before_after_check == NULL) { _log(LH_SCHED, B_LOG_INFO,"HINT: to tune performance see 'sched_micros_before_after_check' defaults to 700"); } else { g_micros_before_after_check=atoi(cfg_g_micros_before_after_check); _log(LH_SCHED, B_LOG_DEBUG,"micros_before_after=%d", g_micros_before_after_check); free(cfg_g_micros_before_after_check); } long local_svc_count=0; long local_idx=0; //Make a second sortable array for(x=0; x<gshm_hdr->svccount; x++) { if(bartlby_orchestra_belongs_to_orch(&services[x], cfgfile) < 0) { continue; //Kick from sched circle if service never would be checked } ssort[local_idx].svc=&services[x]; local_idx++; local_svc_count++; } _log(LH_SCHED, B_LOG_DEBUG,"Scheduler working on %ld Services after kick: %ld", gshm_hdr->svccount, local_idx); cfg_sched_mode = getConfigValue("sched_mode", cfgfile); if(cfg_sched_mode == NULL) { sched_mode=SCHED_MODE_FORK; _log(LH_SCHED, B_LOG_DEBUG,"Defaulting sched mode to SCHED_MODE_FORK"); } else { sched_mode=atoi(cfg_sched_mode); _log(LH_SCHED, B_LOG_DEBUG,"Set sched_mode to:%d", sched_mode); free(cfg_sched_mode); sched_worker_count=0; if(sched_mode == SCHED_MODE_WORKER) { cfg_sched_worker_count = getConfigValue("sched_worker_count", cfgfile); if(cfg_sched_worker_count == NULL) { sched_worker_count=5; _log(LH_SCHED, B_LOG_INFO,"Defaulting sched_worker_count to 5"); } else { sched_worker_count=atoi(cfg_sched_worker_count); _log(LH_SCHED, B_LOG_INFO,"Using %d workers", sched_worker_count); free(cfg_sched_worker_count); } _log(LH_SCHED, B_LOG_DEBUG,"USING WORKER MODE"); } if(sched_mode == SCHED_MODE_FORK) { _log(LH_SCHED, B_LOG_DEBUG,"using FORK MODE"); } if(sched_mode == SCHED_MODE_PROFILE) { _log(LH_SCHED, B_LOG_DEBUG, "using profile mode"); } } // Check if we should use worker or per check-fork sched_init_workers(); while(1) { if(gshm_hdr->do_reload == 1 || gshm_hdr->do_reload == 2) { _log(LH_SCHED, B_LOG_INFO,"queuing Reload"); sched_wait_open(1, 0); signal(SIGCHLD, SIG_IGN); free(ssort); return -2; } if(do_shutdown == 1) { _log(LH_SCHED, B_LOG_INFO,"Exit recieved"); sched_wait_open(1,0); signal(SIGCHLD, SIG_IGN); free(ssort); break; } if(gshm_hdr->sirene_mode == 1) { //We are in Sirene Mode dont check anything just notifie workers that something b ad is going on bartlby_check_sirene(cfgfile,shm_addr); continue; } round_start=time(NULL); gettimeofday(&stat_round_start,NULL); round_visitors=0; //Sort ascending on delay time so most delayed service will be checked rapidly ;) if(local_svc_count>0) { qsort(ssort, local_svc_count-1, sizeof(struct service_sort), cmpservice); } shortest_intervall=10; getloadavg(current_load, 3); sched_definitiv_running(); for(x=0; x<local_svc_count; x++) { if(do_shutdown == 1 || gshm_hdr->do_reload == 1 || gshm_hdr->do_reload == 2) { break; } if(gshm_hdr->current_running < cfg_max_parallel || (int)current_load[0] < cfg_max_load) { if(sched_check_waiting(shm_addr, ssort[x].svc, cfgfile, SOHandle, sched_pause) == 1) { if(sched_mode == SCHED_MODE_WORKER) { worker_slot=sched_find_open_worker(); if(worker_slot < 0) { sched_check_for_dead_workers(); continue; } } gettimeofday(&run_c_start,NULL); round_visitors++; ct = time(NULL); expt = (ssort[x].svc->last_check+ssort[x].svc->check_interval); if(ct > expt && ssort[x].svc->service_type != SVC_TYPE_PASSIVE) { // service check has delayed ssort[x].svc->delay_time.sum += ct - expt; } ssort[x].svc->delay_time.counter++; //WTF? if(ssort[x].svc->service_type != SVC_TYPE_PASSIVE) { ssort[x].svc->last_check=time(NULL); } bartlby_callback(EXTENSION_CALLBACK_CHECK_WILL_RUN, ssort[x].svc); sched_reschedule(ssort[x].svc); sched_run_check(ssort[x].svc, cfgfile, shm_addr, SOHandle, worker_slot); usleep(g_micros_before_after_check); gettimeofday(&run_c_end,NULL); } } else { sched_wait_for_childs(); sched_wait_open(60,cfg_max_parallel-1); } } if(time(NULL)-round_start > sched_pause*3 && sched_pause > 0) { _log(LH_SCHED, B_LOG_INFO,"Done %d Services in %ld Seconds", round_visitors, time(NULL)-round_start); } //Log Round End gettimeofday(&stat_round_end,NULL); bartlby_core_perf_track(gshm_hdr, &services[x], PERF_TYPE_ROUND_TIME, bartlby_milli_timediff(stat_round_end,stat_round_start)); sched_wait_for_childs(); usleep(sched_pause); if(shortest_intervall > 1) { usleep((shortest_intervall-1)*1000); } if(notification_aggregate_interval > 0 && time(NULL)-gshm_hdr->notification_log_aggregate_last_run >= notification_aggregate_interval) { _log(LH_SCHED, B_LOG_DEBUG,"AGGREGATION RUN"); bartlby_notification_log_aggregate(gshm_hdr, cfgfile); } bartlby_orchestra_check_timeouts(services, gshm_hdr, cfgfile, shm_addr, SOHandle); } return 1; }