static void PostInitStats(bool perProcessDir, Config& config) { zinfo->rootStat->makeImmutable(); zinfo->trigger = 15000; string pathStr = zinfo->outputDir; pathStr += "/"; // Absolute paths for stats files. Note these must be in the global heap. const char* pStatsFile = gm_strdup((pathStr + "zsim.h5").c_str()); const char* evStatsFile = gm_strdup((pathStr + "zsim-ev.h5").c_str()); const char* cmpStatsFile = gm_strdup((pathStr + "zsim-cmp.h5").c_str()); const char* statsFile = gm_strdup((pathStr + "zsim.out").c_str()); if (zinfo->statsPhaseInterval) { const char* periodicStatsFilter = config.get<const char*>("sim.periodicStatsFilter", ""); AggregateStat* prStat = (!strlen(periodicStatsFilter))? zinfo->rootStat : FilterStats(zinfo->rootStat, periodicStatsFilter); if (!prStat) panic("No stats match sim.periodicStatsFilter regex (%s)! Set interval to 0 to avoid periodic stats", periodicStatsFilter); zinfo->periodicStatsBackend = new HDF5Backend(pStatsFile, prStat, (1 << 20) /* 1MB chunks */, zinfo->skipStatsVectors, zinfo->compactPeriodicStats); zinfo->periodicStatsBackend->dump(true); //must have a first sample class PeriodicStatsDumpEvent : public Event { public: explicit PeriodicStatsDumpEvent(uint32_t period) : Event(period) {} void callback() { zinfo->trigger = 10000; zinfo->periodicStatsBackend->dump(true /*buffered*/); } }; zinfo->eventQueue->insert(new PeriodicStatsDumpEvent(zinfo->statsPhaseInterval)); } else { zinfo->periodicStatsBackend = NULL; } zinfo->eventualStatsBackend = new HDF5Backend(evStatsFile, zinfo->rootStat, (1 << 17) /* 128KB chunks */, zinfo->skipStatsVectors, false /* don't sum regular aggregates*/); zinfo->eventualStatsBackend->dump(true); //must have a first sample if (zinfo->maxMinInstrs) { warn("maxMinInstrs IS DEPRECATED"); for (uint32_t i = 0; i < zinfo->numCores; i++) { auto getInstrs = [i]() { return zinfo->cores[i]->getInstrs(); }; auto dumpStats = [i]() { info("Dumping eventual stats for core %d", i); zinfo->trigger = i; zinfo->eventualStatsBackend->dump(true /*buffered*/); }; zinfo->eventQueue->insert(makeAdaptiveEvent(getInstrs, dumpStats, 0, zinfo->maxMinInstrs, MAX_IPC*zinfo->phaseLength)); } } zinfo->compactStatsBackend = new HDF5Backend(cmpStatsFile, zinfo->rootStat, 0 /* no aggregation, this is just 1 record */, zinfo->skipStatsVectors, true); //don't dump a first sample. zinfo->statsBackend = new TextBackend(statsFile, zinfo->rootStat); }
/* send gearman admin */ int send2gearmandadmin(char * cmd, char * hostnam, int port, char ** output, char ** error) { int sockfd, n; struct sockaddr_in serv_addr; struct hostent *server; char buf[GM_BUFFERSIZE]; *error = gm_malloc(GM_BUFFERSIZE); snprintf(*error, GM_BUFFERSIZE, "%s", "" ); *output = gm_malloc(GM_BUFFERSIZE); snprintf(*output, GM_BUFFERSIZE, "%s", "" ); sockfd = socket(AF_INET, SOCK_STREAM, 0); if( sockfd < 0 ) { snprintf(*error, GM_BUFFERSIZE, "failed to open socket: %s\n", strerror(errno)); return( STATE_CRITICAL ); } server = gethostbyname(hostnam); if( server == NULL ) { snprintf(*error, GM_BUFFERSIZE, "failed to resolve %s\n", hostnam); close(sockfd); return( STATE_CRITICAL ); } serv_addr.sin_family = AF_INET; bcopy((char *)server->h_addr, (char *)&serv_addr.sin_addr.s_addr, server->h_length); serv_addr.sin_port = htons(port); if (connect(sockfd,(const struct sockaddr *)&serv_addr,sizeof(serv_addr)) < 0) { snprintf(*error, GM_BUFFERSIZE, "failed to connect to %s:%i - %s\n", hostnam, (int)port, strerror(errno)); close(sockfd); return( STATE_CRITICAL ); } n = write(sockfd,cmd,strlen(cmd)); if (n < 0) { snprintf(*error, GM_BUFFERSIZE, "failed to send to %s:%i - %s\n", hostnam, (int)port, strerror(errno)); close(sockfd); return( STATE_CRITICAL ); } n = read( sockfd, buf, GM_BUFFERSIZE-1 ); if (n < 0) { snprintf(*error, GM_BUFFERSIZE, "error reading from %s:%i - %s\n", hostnam, (int)port, strerror(errno)); close(sockfd); return( STATE_CRITICAL ); } buf[n] = '\x0'; free(*output); *output = gm_strdup(buf); close(sockfd); return( STATE_OK ); }
/* parse the module arguments */ static int read_arguments( const char *args_orig ) { int verify; int errors = 0; char *ptr; char *args; char *args_c; if (args_orig == NULL) { gm_log( GM_LOG_ERROR, "error parsing arguments: none provided.\n" ); return GM_ERROR; } args = gm_strdup(args_orig); args_c = args; while ( (ptr = strsep( &args, " " )) != NULL ) { if(parse_args_line(mod_gm_opt, ptr, 0) != GM_OK) { errors++; break; } } verify = verify_options(mod_gm_opt); if(mod_gm_opt->debug_level >= GM_LOG_DEBUG) { dumpconfig(mod_gm_opt, GM_NEB_MODE); } /* read keyfile */ if(mod_gm_opt->keyfile != NULL && read_keyfile(mod_gm_opt) != GM_OK) { errors++; } free(args_c); if(errors > 0) { return(GM_ERROR); } return(verify); }
/* parse command line arguments */ int parse_arguments(int argc, char **argv) { int i; int verify; int errors = 0; mod_gm_opt = gm_malloc(sizeof(mod_gm_opt_t)); set_default_options(mod_gm_opt); /* special default: encryption disabled */ mod_gm_opt->encryption = GM_DISABLED; for(i=1;i<argc;i++) { char * arg = gm_strdup( argv[i] ); char * arg_c = arg; if ( !strcmp( arg, "version" ) || !strcmp( arg, "--version" ) || !strcmp( arg, "-V" ) ) { print_version(); } if ( !strcmp( arg, "help" ) || !strcmp( arg, "--help" ) || !strcmp( arg, "-h" ) ) { print_usage(); } if(parse_args_line(mod_gm_opt, arg, 0) != GM_OK) { errors++; free(arg_c); break; } free(arg_c); } /* verify options */ verify = verify_options(mod_gm_opt); /* read keyfile */ if(mod_gm_opt->keyfile != NULL && read_keyfile(mod_gm_opt) != GM_OK) { errors++; } if(errors > 0 || verify != GM_OK) { return(GM_ERROR); } return(GM_OK); }
ProcStats::ProcStats(AggregateStat* parentStat, AggregateStat* _coreStats) : coreStats(_coreStats) { uint32_t maxProcs = zinfo->lineSize; lastUpdatePhase = 0; // Check that coreStats are appropriate assert(coreStats); for (uint32_t i = 0; i < coreStats->size(); i++) { Stat* s = coreStats->get(i); AggregateStat* as = dynamic_cast<AggregateStat*>(s); auto err = [s](const char* reason) { panic("Stat %s is not per-core (%s)", s->name(), reason); }; if (!as) err("not aggregate stat"); if (!as->isRegular()) err("irregular aggregate"); if (as->size() != zinfo->numCores) err("elems != cores"); } // Initialize all the buffers bufSize = StatSize(coreStats); buf = gm_calloc<uint64_t>(bufSize); lastBuf = gm_calloc<uint64_t>(bufSize); // Create the procStats procStats = new AggregateStat(true); procStats->init("procStats", "Per-process stats"); for (uint32_t p = 0; p < maxProcs; p++) { AggregateStat* ps = new AggregateStat(false); const char* name = gm_strdup(("procStats-" + Str(p)).c_str()); ps->init(name, "Per-process stats"); for (uint32_t i = 0; i < coreStats->size(); i++) { AggregateStat* as = dynamic_cast<AggregateStat*>(coreStats->get(i)); assert(as && as->isRegular()); ps->append(replStat(as->get(0), as->name(), as->desc())); } procStats->append(ps); } parentStat->append(procStats); }
/* submit result */ int submit_result() { char * buf; char * temp_buffer; char * result; struct timeval now; struct timeval starttime; struct timeval finishtime; int resultsize; gettimeofday(&now, NULL); if(mod_gm_opt->has_starttime == FALSE) { starttime = now; } else { starttime = mod_gm_opt->starttime; } if(mod_gm_opt->has_finishtime == FALSE) { finishtime = now; } else { finishtime = mod_gm_opt->finishtime; } if(mod_gm_opt->has_latency == FALSE) { mod_gm_opt->latency.tv_sec = 0; mod_gm_opt->latency.tv_usec = 0; } /* escape newline */ buf = gm_escape_newlines(mod_gm_opt->message, GM_DISABLED); free(mod_gm_opt->message); mod_gm_opt->message = gm_strdup(buf); free(buf); gm_log( GM_LOG_TRACE, "queue: %s\n", mod_gm_opt->result_queue ); resultsize = sizeof(char) * strlen(mod_gm_opt->message) + GM_BUFFERSIZE; result = gm_malloc(resultsize); snprintf( result, resultsize-1, "type=%s\nhost_name=%s\nstart_time=%Lf\nfinish_time=%Lf\nlatency=%Lf\nreturn_code=%i\nsource=send_gearman\n", mod_gm_opt->active == GM_ENABLED ? "active" : "passive", mod_gm_opt->host, timeval2double(&starttime), timeval2double(&finishtime), timeval2double(&mod_gm_opt->latency), mod_gm_opt->return_code ); temp_buffer = gm_malloc(resultsize); if(mod_gm_opt->service != NULL) { temp_buffer[0]='\x0'; strcat(temp_buffer, "service_description="); strcat(temp_buffer, mod_gm_opt->service); strcat(temp_buffer, "\n"); strcat(result, temp_buffer); } if(mod_gm_opt->message != NULL) { temp_buffer[0]='\x0'; strcat(temp_buffer, "output="); strcat(temp_buffer, mod_gm_opt->message); strcat(temp_buffer, "\n"); strcat(result, temp_buffer); } strcat(result, "\n"); gm_log( GM_LOG_TRACE, "data:\n%s\n", result); if(add_job_to_queue( &client, mod_gm_opt->server_list, mod_gm_opt->result_queue, NULL, result, GM_JOB_PRIO_NORMAL, GM_DEFAULT_JOB_RETRIES, mod_gm_opt->transportmode, TRUE ) == GM_OK) { gm_log( GM_LOG_TRACE, "send_result_back() finished successfully\n" ); if( mod_gm_opt->dupserver_num ) { if(add_job_to_queue( &client_dup, mod_gm_opt->dupserver_list, mod_gm_opt->result_queue, NULL, result, GM_JOB_PRIO_NORMAL, GM_DEFAULT_JOB_RETRIES, mod_gm_opt->transportmode, TRUE ) == GM_OK) { gm_log( GM_LOG_TRACE, "send_result_back() finished successfully for duplicate server.\n" ); } else { gm_log( GM_LOG_TRACE, "send_result_back() finished unsuccessfully for duplicate server\n" ); } } } else { gm_log( GM_LOG_TRACE, "send_result_back() finished unsuccessfully\n" ); free(result); free(temp_buffer); return( STATE_UNKNOWN ); } free(result); free(temp_buffer); return( STATE_OK ); }
/* send message to job server */ int send_result() { char *ptr1, *ptr2, *ptr3, *ptr4; char buffer[GM_BUFFERSIZE]; gm_log( GM_LOG_TRACE, "send_result()\n" ); if(mod_gm_opt->result_queue == NULL) { printf( "got no result queue, please use --result_queue=...\n" ); return( STATE_UNKNOWN ); } /* multiple results */ if(mod_gm_opt->host == NULL) { while(fgets(buffer,sizeof(buffer)-1,stdin)) { if(feof(stdin)) break; /* disable alarm */ alarm(0); /* read host_name */ ptr1=strtok(buffer,mod_gm_opt->delimiter); if(ptr1==NULL) continue; /* get the service description or return code */ ptr2=strtok(NULL,mod_gm_opt->delimiter); if(ptr2==NULL) continue; /* get the return code or plugin output */ ptr3=strtok(NULL,mod_gm_opt->delimiter); if(ptr3==NULL) continue; /* get the plugin output - if NULL, this is a host check result */ ptr4=strtok(NULL,"\n"); free(mod_gm_opt->host); if(mod_gm_opt->service != NULL) { free(mod_gm_opt->service); mod_gm_opt->service = NULL; } free(mod_gm_opt->message); /* host result */ if(ptr4 == NULL) { mod_gm_opt->host = gm_strdup(ptr1); mod_gm_opt->return_code = atoi(ptr2); mod_gm_opt->message = gm_strdup(ptr3); } else { /* service result */ mod_gm_opt->host = gm_strdup(ptr1); mod_gm_opt->service = gm_strdup(ptr2); mod_gm_opt->return_code = atoi(ptr3); mod_gm_opt->message = gm_strdup(ptr4); } if(submit_result() == STATE_OK) { results_sent++; } else { printf("failed to send result!\n"); return(STATE_UNKNOWN); } } printf("%d data packet(s) sent to host successfully.\n",results_sent); return(STATE_OK); } /* multi line plugin output */ else if(mod_gm_opt->message == NULL) { /* get all lines from stdin */ mod_gm_opt->message = gm_malloc(GM_BUFFERSIZE); alarm(mod_gm_opt->timeout); read_filepointer(&mod_gm_opt->message, stdin); alarm(0); } return(submit_result()); }
/* return human readable name for eventtype */ char * eventtype2str(int i) { return gm_strdup("UNKNOWN"); }
/* return human readable name for nebcallback */ char * nebcallback2str(int i) { return gm_strdup("UNKNOWN"); }
/* return human readable name for neb type */ char * nebtype2str(int i) { return gm_strdup("UNKNOWN"); }
int read_child_check(char *bufstart, char *bufend, struct timeval * end_time) { char *attribute = NULL; char *attribute2 = NULL; char *attribute3 = NULL; char *error = NULL; char temp_buffer[GM_BUFFERSIZE]; double end_time_d; struct timeval start_time; /* child check number */ if ((attribute=read_multi_attribute(bufstart,bufend,"no")) == NULL) { return 0; } else { /* skip parent check */ if (!strcmp(attribute,"0")) { return 0; } gm_log( GM_LOG_TRACE, "child check: %d\n", atoi(attribute)); } /* service description */ if ((attribute=read_multi_attribute(bufstart,bufend,"name")) == NULL) return 0; mod_gm_opt->service=gm_strdup(attribute); gm_log( GM_LOG_TRACE, "service_description: %s\n", mod_gm_opt->service); /* return code */ if ((attribute=read_multi_attribute(bufstart,bufend,"rc")) == NULL) return 0; mod_gm_opt->return_code=atoi(attribute); gm_log( GM_LOG_TRACE, "mod_gm_opt->return_code: %d\n", mod_gm_opt->return_code); /* runtime */ if ((attribute=read_multi_attribute(bufstart,bufend,"runtime")) == NULL) return 0; end_time_d = timeval2double(end_time); double2timeval(end_time_d - atof(attribute), &start_time); mod_gm_opt->starttime.tv_sec = start_time.tv_sec; mod_gm_opt->starttime.tv_usec = start_time.tv_usec; gm_log( GM_LOG_TRACE, "starttime: %d.%d\n", mod_gm_opt->starttime.tv_sec, mod_gm_opt->starttime.tv_usec); /* end time is the execution time of send_multi itself */ mod_gm_opt->finishtime.tv_sec = end_time->tv_sec; mod_gm_opt->finishtime.tv_usec = end_time->tv_usec; gm_log( GM_LOG_TRACE, "endtime: %d.%d\n", mod_gm_opt->finishtime.tv_sec, mod_gm_opt->finishtime.tv_usec); /* message */ if ((attribute=read_multi_attribute(bufstart,bufend,"output")) == NULL) return 0; /* stderr */ if ((error=read_multi_attribute(bufstart,bufend,"error")) == NULL) { return 0; /* if error found: 'error' -> ' [error]' */ } else if (*error) { *(--error)='['; *(--error)=' '; strcat(error,"]"); } /* performance data with multi headers */ if ((attribute2=read_multi_attribute(bufstart,bufend,"performance")) == NULL) { snprintf( temp_buffer, sizeof( temp_buffer )-1, "%s%s", decode_xml(attribute), decode_xml(error)); } else if ((attribute3=read_multi_attribute(bufstart,bufend,"pplugin")) == NULL) { return 0; } else { attribute2 = trim(attribute2); attribute2 = decode_xml(attribute2); /* do we have a single quote performance label? then single quote the whole multi header */ if (*attribute2 == '\'') { attribute2++; snprintf( temp_buffer, sizeof( temp_buffer )-1, "%s%s|\'%s::%s::%s", decode_xml(attribute), decode_xml(error), mod_gm_opt->service, decode_xml(attribute3), decode_xml(attribute2)); /* normal header without single quotes */ } else { snprintf( temp_buffer, sizeof( temp_buffer )-1, "%s%s|%s::%s::%s", decode_xml(attribute), decode_xml(error), mod_gm_opt->service, decode_xml(attribute3), decode_xml(attribute2)); } } mod_gm_opt->message=gm_strdup(temp_buffer); gm_log( GM_LOG_TRACE, "mod_gm_opt->message: %s\n", mod_gm_opt->message); return 1; }
/* handle service check events */ static int handle_svc_check( int event_type, void *data ) { host * hst = NULL; service * svc = NULL; char *raw_command=NULL; char *processed_command=NULL; nebstruct_service_check_data * svcdata; int prio = GM_JOB_PRIO_LOW; check_result * chk_result; struct timeval core_time; struct tm next_check; char buffer1[GM_BUFFERSIZE]; gettimeofday(&core_time,NULL); gm_log( GM_LOG_TRACE, "handle_svc_check(%i, data)\n", event_type ); svcdata = ( nebstruct_service_check_data * )data; if ( event_type != NEBCALLBACK_SERVICE_CHECK_DATA ) return NEB_OK; /* ignore non-initiate service checks */ if ( svcdata->type != NEBTYPE_SERVICECHECK_ASYNC_PRECHECK ) return NEB_OK; /* get objects and set target function */ if((svc=svcdata->object_ptr)==NULL) { gm_log( GM_LOG_ERROR, "Service handler received NULL service object pointer.\n" ); return NEBERROR_CALLBACKCANCEL; } /* find the host associated with this service */ if((hst=svc->host_ptr)==NULL) { gm_log( GM_LOG_ERROR, "Service handler received NULL host object pointer.\n" ); return NEBERROR_CALLBACKCANCEL; } set_target_queue( hst, svc ); /* local check? */ if(!strcmp( target_queue, "" )) { gm_log( GM_LOG_DEBUG, "passing by local servicecheck: %s - %s\n", svcdata->host_name, svcdata->service_description); return NEB_OK; } gm_log( GM_LOG_DEBUG, "received job for queue %s: %s - %s\n", target_queue, svcdata->host_name, svcdata->service_description ); temp_buffer[0]='\x0'; /* as we have to intercept service checks so early * (we cannot cancel checks otherwise) * we have to do some service check logic here * taken from checks.c: */ /* clear check options - we don't want old check options retained */ svc->check_options=CHECK_OPTION_NONE; /* unset the freshening flag, otherwise only the first freshness check would be run */ svc->is_being_freshened=FALSE; /* grab the host and service macro variables */ clear_volatile_macros(); grab_host_macros(hst); grab_service_macros(svc); /* get the raw command line */ get_raw_command_line(svc->check_command_ptr,svc->check_command,&raw_command,0); if(raw_command==NULL){ gm_log( GM_LOG_ERROR, "Raw check command for service '%s' on host '%s' was NULL - aborting.\n", svc->description, svc->host_name ); return NEBERROR_CALLBACKCANCEL; } /* process any macros contained in the argument */ process_macros(raw_command, &processed_command, 0); if(processed_command==NULL) { gm_log( GM_LOG_ERROR, "Processed check command for service '%s' on host '%s' was NULL - aborting.\n", svc->description, svc->host_name); my_free(raw_command); return NEBERROR_CALLBACKCANCEL; } /* log latency */ if(mod_gm_opt->debug_level >= GM_LOG_DEBUG) { localtime_r(&svc->next_check, &next_check); strftime(buffer1, sizeof(buffer1), "%Y-%m-%d %H:%M:%S", &next_check ); gm_log( GM_LOG_DEBUG, "service: '%s' - '%s', next_check is at %s, latency so far: %i\n", svcdata->host_name, svcdata->service_description, buffer1, ((int)core_time.tv_sec - (int)svc->next_check)); } /* increment number of service checks that are currently running... */ currently_running_service_checks++; /* set the execution flag */ svc->is_executing=TRUE; gm_log( GM_LOG_TRACE, "cmd_line: %s\n", processed_command ); snprintf( temp_buffer,GM_BUFFERSIZE-1,"type=service\nresult_queue=%s\nhost_name=%s\nservice_description=%s\nstart_time=%i.0\nnext_check=%i.0\ncore_time=%i.%i\ntimeout=%d\ncommand_line=%s\n\n\n", mod_gm_opt->result_queue, svcdata->host_name, svcdata->service_description, (int)svc->next_check, (int)svc->next_check, (int)core_time.tv_sec, (int)core_time.tv_usec, service_check_timeout, processed_command ); uniq[0]='\x0'; snprintf( uniq,GM_BUFFERSIZE-1,"%s-%s", svcdata->host_name, svcdata->service_description); /* execute forced checks with high prio as they are propably user requested */ if(svc->check_options & CHECK_OPTION_FORCE_EXECUTION) prio = GM_JOB_PRIO_HIGH; if(add_job_to_queue( &client, mod_gm_opt->server_list, target_queue, (mod_gm_opt->use_uniq_jobs == GM_ENABLED ? uniq : NULL), temp_buffer, prio, GM_DEFAULT_JOB_RETRIES, mod_gm_opt->transportmode, TRUE ) == GM_OK) { gm_log( GM_LOG_TRACE, "handle_svc_check() finished successfully\n" ); } else { my_free(raw_command); my_free(processed_command); /* unset the execution flag */ svc->is_executing=FALSE; /* decrement number of host checks that are currently running */ currently_running_service_checks--; gm_log( GM_LOG_TRACE, "handle_svc_check() finished unsuccessfully\n" ); return NEBERROR_CALLBACKCANCEL; } /* clean up */ my_free(raw_command); my_free(processed_command); /* orphaned check - submit fake result to mark service as orphaned */ if(mod_gm_opt->orphan_service_checks == GM_ENABLED && svc->check_options & CHECK_OPTION_ORPHAN_CHECK) { gm_log( GM_LOG_DEBUG, "service check for %s - %s orphaned\n", svc->host_name, svc->description ); if ( ( chk_result = ( check_result * )gm_malloc( sizeof *chk_result ) ) == 0 ) return NEBERROR_CALLBACKCANCEL; snprintf( temp_buffer,GM_BUFFERSIZE-1,"(service check orphaned, is the mod-gearman worker on queue '%s' running?)\n", target_queue); init_check_result(chk_result); chk_result->host_name = gm_strdup( svc->host_name ); chk_result->service_description = gm_strdup( svc->description ); chk_result->scheduled_check = TRUE; chk_result->reschedule_check = TRUE; chk_result->output_file = 0; chk_result->output_file_fp = NULL; chk_result->output = gm_strdup(temp_buffer); chk_result->return_code = mod_gm_opt->orphan_return; chk_result->check_options = CHECK_OPTION_NONE; chk_result->object_check_type = SERVICE_CHECK; chk_result->check_type = SERVICE_CHECK_ACTIVE; chk_result->start_time.tv_sec = (unsigned long)time(NULL); chk_result->finish_time.tv_sec = (unsigned long)time(NULL); chk_result->latency = 0; mod_gm_add_result_to_list( chk_result ); chk_result = NULL; } /* tell naemon to not execute */ gm_log( GM_LOG_TRACE, "handle_svc_check() finished successfully -> %d\n", NEBERROR_CALLBACKOVERRIDE ); return NEBERROR_CALLBACKOVERRIDE; }
/* handle host check events */ static int handle_host_check( int event_type, void *data ) { nebstruct_host_check_data * hostdata; char *raw_command=NULL; char *processed_command=NULL; host * hst; check_result * chk_result; int check_options; struct timeval core_time; struct tm next_check; char buffer1[GM_BUFFERSIZE]; gettimeofday(&core_time,NULL); gm_log( GM_LOG_TRACE, "handle_host_check(%i)\n", event_type ); if ( mod_gm_opt->do_hostchecks != GM_ENABLED ) return NEB_OK; hostdata = ( nebstruct_host_check_data * )data; gm_log( GM_LOG_TRACE, "---------------\nhost Job -> %i, %i\n", event_type, hostdata->type ); if ( event_type != NEBCALLBACK_HOST_CHECK_DATA ) return NEB_OK; /* ignore non-initiate host checks */ if ( hostdata->type != NEBTYPE_HOSTCHECK_ASYNC_PRECHECK && hostdata->type != NEBTYPE_HOSTCHECK_SYNC_PRECHECK) return NEB_OK; /* get objects and set target function */ if((hst=hostdata->object_ptr)==NULL) { gm_log( GM_LOG_ERROR, "Host handler received NULL host object pointer.\n" ); return NEBERROR_CALLBACKCANCEL; } set_target_queue( hst, NULL ); /* local check? */ if(!strcmp( target_queue, "" )) { gm_log( GM_LOG_DEBUG, "passing by local hostcheck: %s\n", hostdata->host_name ); return NEB_OK; } gm_log( GM_LOG_DEBUG, "received job for queue %s: %s\n", target_queue, hostdata->host_name ); /* as we have to intercept host checks so early * (we cannot cancel checks otherwise) * we have to do some host check logic here * taken from checks.c: */ /* clear check options - we don't want old check options retained */ check_options = hst->check_options; hst->check_options = CHECK_OPTION_NONE; /* unset the freshening flag, otherwise only the first freshness check would be run */ hst->is_being_freshened=FALSE; /* adjust host check attempt */ adjust_host_check_attempt(hst,TRUE); temp_buffer[0]='\x0'; /* grab the host macro variables */ clear_volatile_macros(); grab_host_macros(hst); /* get the raw command line */ get_raw_command_line(hst->check_command_ptr,hst->check_command,&raw_command,0); if(raw_command==NULL){ gm_log( GM_LOG_ERROR, "Raw check command for host '%s' was NULL - aborting.\n",hst->name ); return NEBERROR_CALLBACKCANCEL; } /* process any macros contained in the argument */ process_macros(raw_command,&processed_command,0); if(processed_command==NULL){ gm_log( GM_LOG_ERROR, "Processed check command for host '%s' was NULL - aborting.\n",hst->name); return NEBERROR_CALLBACKCANCEL; } /* log latency */ if(mod_gm_opt->debug_level >= GM_LOG_DEBUG) { localtime_r(&hst->next_check, &next_check); strftime(buffer1, sizeof(buffer1), "%Y-%m-%d %H:%M:%S", &next_check ); gm_log( GM_LOG_DEBUG, "host: '%s', next_check is at %s, latency so far: %i\n", hst->name, buffer1, ((int)core_time.tv_sec - (int)hst->next_check)); } /* increment number of host checks that are currently running */ currently_running_host_checks++; /* set the execution flag */ hst->is_executing=TRUE; gm_log( GM_LOG_TRACE, "cmd_line: %s\n", processed_command ); snprintf( temp_buffer,GM_BUFFERSIZE-1,"type=host\nresult_queue=%s\nhost_name=%s\nstart_time=%i.0\nnext_check=%i.0\ntimeout=%d\ncore_time=%i.%i\ncommand_line=%s\n\n\n", mod_gm_opt->result_queue, hst->name, (int)hst->next_check, (int)hst->next_check, host_check_timeout, (int)core_time.tv_sec, (int)core_time.tv_usec, processed_command ); if(add_job_to_queue( &client, mod_gm_opt->server_list, target_queue, (mod_gm_opt->use_uniq_jobs == GM_ENABLED ? hst->name : NULL), temp_buffer, GM_JOB_PRIO_NORMAL, GM_DEFAULT_JOB_RETRIES, mod_gm_opt->transportmode, TRUE ) == GM_OK) { } else { my_free(raw_command); my_free(processed_command); /* unset the execution flag */ hst->is_executing=FALSE; /* decrement number of host checks that are currently running */ currently_running_host_checks--; gm_log( GM_LOG_TRACE, "handle_host_check() finished unsuccessfully -> %d\n", NEBERROR_CALLBACKCANCEL ); return NEBERROR_CALLBACKCANCEL; } /* clean up */ my_free(raw_command); my_free(processed_command); /* orphaned check - submit fake result to mark host as orphaned */ if(mod_gm_opt->orphan_host_checks == GM_ENABLED && check_options & CHECK_OPTION_ORPHAN_CHECK) { gm_log( GM_LOG_DEBUG, "host check for %s orphaned\n", hst->name ); if ( ( chk_result = ( check_result * )gm_malloc( sizeof *chk_result ) ) == 0 ) return NEBERROR_CALLBACKCANCEL; snprintf( temp_buffer,GM_BUFFERSIZE-1,"(host check orphaned, is the mod-gearman worker on queue '%s' running?)\n", target_queue); init_check_result(chk_result); chk_result->host_name = gm_strdup( hst->name ); chk_result->scheduled_check = TRUE; chk_result->reschedule_check = TRUE; chk_result->output_file = 0; chk_result->output_file_fp = NULL; chk_result->output = gm_strdup(temp_buffer); chk_result->return_code = mod_gm_opt->orphan_return; chk_result->check_options = CHECK_OPTION_NONE; chk_result->object_check_type = HOST_CHECK; chk_result->check_type = HOST_CHECK_ACTIVE; chk_result->start_time.tv_sec = (unsigned long)time(NULL); chk_result->finish_time.tv_sec = (unsigned long)time(NULL); chk_result->latency = 0; mod_gm_add_result_to_list( chk_result ); chk_result = NULL; } /* tell naemon to not execute */ gm_log( GM_LOG_TRACE, "handle_host_check() finished successfully -> %d\n", NEBERROR_CALLBACKOVERRIDE ); return NEBERROR_CALLBACKOVERRIDE; }
void SimInit(const char* configFile, const char* outputDir, uint32_t shmid) { zinfo = gm_calloc<GlobSimInfo>(); zinfo->outputDir = gm_strdup(outputDir); Config config(configFile); //Debugging //NOTE: This should be as early as possible, so that we can attach to the debugger before initialization. zinfo->attachDebugger = config.get<bool>("sim.attachDebugger", false); zinfo->harnessPid = getppid(); getLibzsimAddrs(&zinfo->libzsimAddrs); if (zinfo->attachDebugger) { gm_set_secondary_ptr(&zinfo->libzsimAddrs); notifyHarnessForDebugger(zinfo->harnessPid); } PreInitStats(); //Get the number of cores //TODO: There is some duplication with the core creation code. This should be fixed eventually. uint32_t numCores = 0; vector<const char*> groups; config.subgroups("sys.cores", groups); for (const char* group : groups) { uint32_t cores = config.get<uint32_t>(string("sys.cores.") + group + ".cores", 1); numCores += cores; } if (numCores == 0) panic("Config must define some core classes in sys.cores; sys.numCores is deprecated"); zinfo->numCores = numCores; assert(numCores <= MAX_THREADS); //TODO: Is there any reason for this limit? zinfo->numDomains = config.get<uint32_t>("sim.domains", 1); uint32_t numSimThreads = config.get<uint32_t>("sim.contentionThreads", MAX((uint32_t)1, zinfo->numDomains/2)); //gives a bit of parallelism, TODO tune zinfo->contentionSim = new ContentionSim(zinfo->numDomains, numSimThreads); zinfo->contentionSim->initStats(zinfo->rootStat); zinfo->eventRecorders = gm_calloc<EventRecorder*>(numCores); // Global simulation values zinfo->numPhases = 0; zinfo->phaseLength = config.get<uint32_t>("sim.phaseLength", 10000); zinfo->statsPhaseInterval = config.get<uint32_t>("sim.statsPhaseInterval", 100); zinfo->freqMHz = config.get<uint32_t>("sys.frequency", 2000); //Maxima/termination conditions zinfo->maxPhases = config.get<uint64_t>("sim.maxPhases", 0); zinfo->maxMinInstrs = config.get<uint64_t>("sim.maxMinInstrs", 0); zinfo->maxTotalInstrs = config.get<uint64_t>("sim.maxTotalInstrs", 0); uint64_t maxSimTime = config.get<uint32_t>("sim.maxSimTime", 0); zinfo->maxSimTimeNs = maxSimTime*1000L*1000L*1000L; zinfo->maxProcEventualDumps = config.get<uint32_t>("sim.maxProcEventualDumps", 0); zinfo->procEventualDumps = 0; zinfo->skipStatsVectors = config.get<bool>("sim.skipStatsVectors", false); zinfo->compactPeriodicStats = config.get<bool>("sim.compactPeriodicStats", false); //Fast-forwarding and magic ops zinfo->ignoreHooks = config.get<bool>("sim.ignoreHooks", false); zinfo->ffReinstrument = config.get<bool>("sim.ffReinstrument", false); if (zinfo->ffReinstrument) warn("sim.ffReinstrument = true, switching fast-forwarding on a multi-threaded process may be unstable"); zinfo->registerThreads = config.get<bool>("sim.registerThreads", false); zinfo->globalPauseFlag = config.get<bool>("sim.startInGlobalPause", false); zinfo->eventQueue = new EventQueue(); //must be instantiated before the memory hierarchy //Build the scheduler uint32_t parallelism = config.get<uint32_t>("sim.parallelism", 2*sysconf(_SC_NPROCESSORS_ONLN)); if (parallelism < zinfo->numCores) info("Limiting concurrent threads to %d", parallelism); assert(parallelism > 0); //jeez... uint32_t schedQuantum = config.get<uint32_t>("sim.schedQuantum", 10000); //phases zinfo->sched = new Scheduler(EndOfPhaseActions, parallelism, zinfo->numCores, schedQuantum); zinfo->blockingSyscalls = config.get<bool>("sim.blockingSyscalls", false); if (zinfo->blockingSyscalls) { warn("sim.blockingSyscalls = True, will likely deadlock with multi-threaded apps!"); } InitGlobalStats(); //Core stats (initialized here for cosmetic reasons, to be above cache stats) AggregateStat* allCoreStats = new AggregateStat(false); allCoreStats->init("core", "Core stats"); zinfo->rootStat->append(allCoreStats); //Process tree needs this initialized, even though it is part of the memory hierarchy zinfo->lineSize = config.get<uint32_t>("sys.lineSize", 64); assert(zinfo->lineSize > 0); //Address randomization zinfo->addressRandomization = config.get<bool>("sys.addressRandomization", false); //Port virtualization for (uint32_t i = 0; i < MAX_PORT_DOMAINS; i++) zinfo->portVirt[i] = new PortVirtualizer(); //Process hierarchy //NOTE: Due to partitioning, must be done before initializing memory hierarchy CreateProcessTree(config); zinfo->procArray[0]->notifyStart(); //called here so that we can detect end-before-start races zinfo->pinCmd = new PinCmd(&config, NULL /*don't pass config file to children --- can go either way, it's optional*/, outputDir, shmid); //Caches, cores, memory controllers InitSystem(config); //Sched stats (deferred because of circular deps) zinfo->sched->initStats(zinfo->rootStat); zinfo->processStats = new ProcessStats(zinfo->rootStat); //It's a global stat, but I want it to be last... zinfo->profHeartbeats = new VectorCounter(); zinfo->profHeartbeats->init("heartbeats", "Per-process heartbeats", zinfo->lineSize); zinfo->rootStat->append(zinfo->profHeartbeats); bool perProcessDir = config.get<bool>("sim.perProcessDir", false); PostInitStats(perProcessDir, config); zinfo->perProcessCpuEnum = config.get<bool>("sim.perProcessCpuEnum", false); //Odds and ends bool printMemoryStats = config.get<bool>("sim.printMemoryStats", false); if (printMemoryStats) { gm_stats(); } //HACK: Read all variables that are read in the harness but not in init //This avoids warnings on those elements config.get<uint32_t>("sim.gmMBytes", (1 << 10)); if (!zinfo->attachDebugger) config.get<bool>("sim.deadlockDetection", true); config.get<bool>("sim.aslr", false); //Write config out bool strictConfig = config.get<bool>("sim.strictConfig", true); //if true, panic on unused variables config.writeAndClose((string(zinfo->outputDir) + "/out.cfg").c_str(), strictConfig); zinfo->contentionSim->postInit(); info("Initialization complete"); //Causes every other process to wake up gm_set_glob_ptr(zinfo); }
static void InitSystem(Config& config) { unordered_map<string, string> parentMap; //child -> parent unordered_map<string, vector<string>> childMap; //parent -> children (a parent may have multiple children, they are ordered by appearance in the file) //If a network file is specificied, build a Network string networkFile = config.get<const char*>("sys.networkFile", ""); Network* network = (networkFile != "")? new Network(networkFile.c_str()) : NULL; //Build the caches vector<const char*> cacheGroupNames; config.subgroups("sys.caches", cacheGroupNames); string prefix = "sys.caches."; for (const char* grp : cacheGroupNames) { string group(grp); if (group == "mem") panic("'mem' is an invalid cache group name"); if (parentMap.count(group)) panic("Duplicate cache group %s", (prefix + group).c_str()); string parent = config.get<const char*>(prefix + group + ".parent"); parentMap[group] = parent; if (!childMap.count(parent)) childMap[parent] = vector<string>(); childMap[parent].push_back(group); } //Check that all parents are valid: Either another cache, or "mem" for (const char* grp : cacheGroupNames) { string group(grp); string parent = parentMap[group]; if (parent != "mem" && !parentMap.count(parent)) panic("%s has invalid parent %s", (prefix + group).c_str(), parent.c_str()); } //Get the (single) LLC if (!childMap.count("mem")) panic("One cache must have mem as parent, none found"); if (childMap["mem"].size() != 1) panic("One cache must have mem as parent, multiple found"); string llc = childMap["mem"][0]; //Build each of the groups, starting with the LLC unordered_map<string, CacheGroup*> cMap; list<string> fringe; //FIFO fringe.push_back(llc); while (!fringe.empty()) { string group = fringe.front(); fringe.pop_front(); bool isTerminal = (childMap.count(group) == 0); //if no children, connected to cores if (cMap.count(group)) panic("The cache 'tree' has a loop at %s", group.c_str()); cMap[group] = BuildCacheGroup(config, group, isTerminal); if (!isTerminal) for (string child : childMap[group]) fringe.push_back(child); } //Check single LLC if (cMap[llc]->size() != 1) panic("Last-level cache %s must have caches = 1, but %ld were specified", llc.c_str(), cMap[llc]->size()); /* Since we have checked for no loops, parent is mandatory, and all parents are checked valid, * it follows that we have a fully connected tree finishing at the LLC. */ //Build the memory controllers uint32_t memControllers = config.get<uint32_t>("sys.mem.controllers", 1); assert(memControllers > 0); g_vector<MemObject*> mems; mems.resize(memControllers); zinfo->numMemoryControllers = memControllers; zinfo->hasNVMain = (config.get<const char*>("sys.mem.type", "Simple") == string("NVMain")) ? true : false; zinfo->hasDRAMCache = config.get<bool>("sys.mem.hasDRAMCache", false); for (uint32_t i = 0; i < memControllers; i++) { stringstream ss; ss << "mem-" << i; g_string name(ss.str().c_str()); //uint32_t domain = nextDomain(); //i*zinfo->numDomains/memControllers; uint32_t domain = i*zinfo->numDomains/memControllers; mems[i] = BuildMemoryController(config, zinfo->lineSize, zinfo->freqMHz, domain, name); } zinfo->memoryControllers = mems; if (memControllers > 1) { bool splitAddrs = config.get<bool>("sys.mem.splitAddrs", true); if (splitAddrs) { MemObject* splitter = new SplitAddrMemory(mems, "mem-splitter"); mems.resize(1); mems[0] = splitter; } } //Connect everything // mem to llc is a bit special, only one llc uint32_t childId = 0; for (BaseCache* llcBank : (*cMap[llc])[0]) { llcBank->setParents(childId++, mems, network); } // Rest of caches for (const char* grp : cacheGroupNames) { if (childMap.count(grp) == 0) continue; //skip terminal caches CacheGroup& parentCaches = *cMap[grp]; uint32_t parents = parentCaches.size(); assert(parents); //Concatenation of all child caches CacheGroup childCaches; for (string child : childMap[grp]) childCaches.insert(childCaches.end(), cMap[child]->begin(), cMap[child]->end()); uint32_t children = childCaches.size(); assert(children); uint32_t childrenPerParent = children/parents; if (children % parents != 0) { panic("%s has %d caches and %d children, they are non-divisible. " "Use multiple groups for non-homogeneous children per parent!", grp, parents, children); } //HACK FIXME: This solves the L1I+D-L2 connection bug, but it's not very clear. //A long-term solution is to specify whether the children should be interleaved or concatenated. bool terminalChildren = true; for (string child : childMap[grp]) terminalChildren &= (childMap.count(child) == 0 || config.get<bool>("sys.caches." + child + ".isPrefetcher", false)); if (terminalChildren) { info("%s's children are all terminal OR PREFETCHERS, interleaving them", grp); CacheGroup tmp(childCaches); uint32_t stride = children/childrenPerParent; for (uint32_t i = 0; i < children; i++) childCaches[i] = tmp[(i % childrenPerParent)*stride + i/childrenPerParent]; } for (uint32_t p = 0; p < parents; p++) { g_vector<MemObject*> parentsVec; parentsVec.insert(parentsVec.end(), parentCaches[p].begin(), parentCaches[p].end()); //BaseCache* to MemObject* is a safe cast uint32_t childId = 0; g_vector<BaseCache*> childrenVec; for (uint32_t c = p*childrenPerParent; c < (p+1)*childrenPerParent; c++) { for (BaseCache* bank : childCaches[c]) { bank->setParents(childId++, parentsVec, network); childrenVec.push_back(bank); } } for (BaseCache* bank : parentCaches[p]) { bank->setChildren(childrenVec, network); } } } //Check that all the terminal caches have a single bank for (const char* grp : cacheGroupNames) { if (childMap.count(grp) == 0) { uint32_t banks = (*cMap[grp])[0].size(); if (banks != 1) panic("Terminal cache group %s needs to have a single bank, has %d", grp, banks); } } //Tracks how many terminal caches have been allocated to cores unordered_map<string, uint32_t> assignedCaches; for (const char* grp : cacheGroupNames) if (childMap.count(grp) == 0) assignedCaches[grp] = 0; //Instantiate the cores vector<const char*> coreGroupNames; unordered_map <string, vector<Core*>> coreMap; config.subgroups("sys.cores", coreGroupNames); uint32_t coreIdx = 0; for (const char* group : coreGroupNames) { if (parentMap.count(group)) panic("Core group name %s is invalid, a cache group already has that name", group); coreMap[group] = vector<Core*>(); string prefix = string("sys.cores.") + group + "."; uint32_t cores = config.get<uint32_t>(prefix + "cores", 1); string type = config.get<const char*>(prefix + "type", "Simple"); //Build the core group union { SimpleCore* simpleCores; TimingCore* timingCores; OOOCore* oooCores; NullCore* nullCores; }; if (type == "Simple") { simpleCores = gm_memalign<SimpleCore>(CACHE_LINE_BYTES, cores); } else if (type == "Timing") { timingCores = gm_memalign<TimingCore>(CACHE_LINE_BYTES, cores); } else if (type == "OOO") { oooCores = gm_memalign<OOOCore>(CACHE_LINE_BYTES, cores); zinfo->oooDecode = true; //enable uop decoding, this is false by default, must be true if even one OOO cpu is in the system } else if (type == "Null") { nullCores = gm_memalign<NullCore>(CACHE_LINE_BYTES, cores); } else { panic("%s: Invalid core type %s", group, type.c_str()); } if (type != "Null") { string icache = config.get<const char*>(prefix + "icache"); string dcache = config.get<const char*>(prefix + "dcache"); if (!assignedCaches.count(icache)) panic("%s: Invalid icache parameter %s", group, icache.c_str()); if (!assignedCaches.count(dcache)) panic("%s: Invalid dcache parameter %s", group, dcache.c_str()); for (uint32_t j = 0; j < cores; j++) { stringstream ss; ss << group << "-" << j; g_string name(ss.str().c_str()); Core* core; //Get the caches CacheGroup& igroup = *cMap[icache]; CacheGroup& dgroup = *cMap[dcache]; if (assignedCaches[icache] >= igroup.size()) { panic("%s: icache group %s (%ld caches) is fully used, can't connect more cores to it", name.c_str(), icache.c_str(), igroup.size()); } FilterCache* ic = dynamic_cast<FilterCache*>(igroup[assignedCaches[icache]][0]); assert(ic); ic->setSourceId(coreIdx); ic->setFlags(MemReq::IFETCH | MemReq::NOEXCL); assignedCaches[icache]++; if (assignedCaches[dcache] >= dgroup.size()) { panic("%s: dcache group %s (%ld caches) is fully used, can't connect more cores to it", name.c_str(), dcache.c_str(), dgroup.size()); } FilterCache* dc = dynamic_cast<FilterCache*>(dgroup[assignedCaches[dcache]][0]); assert(dc); dc->setSourceId(coreIdx); assignedCaches[dcache]++; //Build the core if (type == "Simple") { core = new (&simpleCores[j]) SimpleCore(ic, dc, name); } else if (type == "Timing") { uint32_t domain = j*zinfo->numDomains/cores; TimingCore* tcore = new (&timingCores[j]) TimingCore(ic, dc, domain, name); zinfo->eventRecorders[coreIdx] = tcore->getEventRecorder(); zinfo->eventRecorders[coreIdx]->setSourceId(coreIdx); core = tcore; } else { assert(type == "OOO"); OOOCore* ocore = new (&oooCores[j]) OOOCore(ic, dc, name, j); zinfo->eventRecorders[coreIdx] = ocore->getEventRecorder(); zinfo->eventRecorders[coreIdx]->setSourceId(coreIdx); core = ocore; } coreMap[group].push_back(core); coreIdx++; } } else { assert(type == "Null"); for (uint32_t j = 0; j < cores; j++) { stringstream ss; ss << group << "-" << j; g_string name(ss.str().c_str()); Core* core = new (&nullCores[j]) NullCore(name); coreMap[group].push_back(core); coreIdx++; } } } //Check that all the terminal caches are fully connected for (const char* grp : cacheGroupNames) { if (childMap.count(grp) == 0 && assignedCaches[grp] != cMap[grp]->size()) { panic("%s: Terminal cache group not fully connected, %ld caches, %d assigned", grp, cMap[grp]->size(), assignedCaches[grp]); } } //Populate global core info assert(zinfo->numCores == coreIdx); zinfo->cores = gm_memalign<Core*>(CACHE_LINE_BYTES, zinfo->numCores); coreIdx = 0; for (const char* group : coreGroupNames) for (Core* core : coreMap[group]) zinfo->cores[coreIdx++] = core; //Init stats: cores, caches, mem for (const char* group : coreGroupNames) { AggregateStat* groupStat = new AggregateStat(true); groupStat->init(gm_strdup(group), "Core stats"); for (Core* core : coreMap[group]) core->initStats(groupStat); zinfo->rootStat->append(groupStat); } for (const char* group : cacheGroupNames) { AggregateStat* groupStat = new AggregateStat(true); groupStat->init(gm_strdup(group), "Cache stats"); for (vector<BaseCache*>& banks : *cMap[group]) for (BaseCache* bank : banks) bank->initStats(groupStat); zinfo->rootStat->append(groupStat); } //Initialize event recorders //for (uint32_t i = 0; i < zinfo->numCores; i++) eventRecorders[i] = new EventRecorder(); AggregateStat* memStat = new AggregateStat(true); memStat->init("mem", "Memory controller stats"); for (auto mem : mems) mem->initStats(memStat); zinfo->rootStat->append(memStat); //Odds and ends: BuildCacheGroup new'd the cache groups, we need to delete them for (pair<string, CacheGroup*> kv : cMap) delete kv.second; cMap.clear(); info("Initialized system"); }
/* send job to worker and check result */ int check_worker(char * queue, char * to_send, char * expect) { gearman_return_t ret; char * result; size_t result_size; char * job_handle; const char * unique_job_id; if (opt_unique_id == NULL) { unique_job_id = "check"; } else { unique_job_id = opt_unique_id; } /* create client */ if ( create_client( server_list, &client ) != GM_OK ) { printf("%s UNKNOWN - cannot create gearman client\n", PLUGIN_NAME); return( STATE_UNKNOWN ); } gearman_client_set_timeout(&client, (opt_timeout-1)*1000/server_list_num); while (1) { if (send_async) { result = gm_strdup("sending background job succeded"); job_handle = gm_malloc(GEARMAN_JOB_HANDLE_SIZE * sizeof(char)); ret= gearman_client_do_high_background( &client, queue, unique_job_id, (void *)to_send, (size_t)strlen(to_send), job_handle); free(job_handle); } else { result= (char *)gearman_client_do_high( &client, queue, unique_job_id, (void *)to_send, (size_t)strlen(to_send), &result_size, &ret); } if (ret == GEARMAN_WORK_DATA) { free(result); continue; } else if (ret == GEARMAN_WORK_STATUS) { continue; } else if (ret == GEARMAN_SUCCESS) { gearman_client_free(&client); } else if (ret == GEARMAN_WORK_FAIL) { printf("%s CRITICAL - Job failed\n", PLUGIN_NAME); gearman_client_free(&client); return( STATE_CRITICAL ); } else { printf("%s CRITICAL - Job failed: %s\n", PLUGIN_NAME, gearman_client_error(&client)); gearman_client_free(&client); return( STATE_CRITICAL ); } break; } if( !send_async && expect != NULL && result != NULL ) { if( strstr(result, expect) != NULL) { printf("%s OK - send worker '%s' response: '%s'\n", PLUGIN_NAME, to_send, result); return( STATE_OK ); } else { printf("%s CRITICAL - send worker: '%s' response: '%s', expected '%s'\n", PLUGIN_NAME, to_send, result, expect); return( STATE_CRITICAL ); } } printf("%s OK - %s\n", PLUGIN_NAME, result ); return( STATE_OK ); }
/* print stats */ void print_stats(char * hostnam) { char * hst = gm_strdup(hostnam); char * hst_c = hst; char * server = NULL; char * port_c = NULL; char * message = NULL; char * version = NULL; char format1[GM_BUFFERSIZE]; char format2[GM_BUFFERSIZE]; char cur_time[GM_BUFFERSIZE]; mod_gm_server_status_t *stats; int port = GM_SERVER_DEFAULT_PORT; int rc; int x; int max_length = 12; int found = 0; struct tm now; time_t t; gm_log( GM_LOG_DEBUG, "print_stats()\n"); server = strsep(&hst, ":"); port_c = strsep(&hst, "\x0"); if(port_c != NULL) port = atoi(port_c); /* get stats */ stats = (mod_gm_server_status_t*)gm_malloc(sizeof(mod_gm_server_status_t)); stats->function_num = 0; stats->worker_num = 0; rc = get_gearman_server_data(stats, &message, &version, server, port); t = time(NULL); now = *(localtime(&t)); strftime(cur_time, sizeof(cur_time), "%Y-%m-%d %H:%M:%S", &now ); my_printf("%s - %s:%i", cur_time, server, port ); if(version != NULL && strcmp(version, "") != 0) { if(version_saved != NULL) free(version_saved); version_saved = gm_strdup(version); } if(version_saved != NULL && strcmp(version_saved, "") != 0) my_printf(" - v%s", version_saved ); my_printf("\n\n"); if( rc == STATE_OK ) { for(x=0; x<stats->function_num;x++) { if(opt_quiet == GM_ENABLED && stats->function[x]->worker == 0 && stats->function[x]->total == 0) continue; if((int)strlen(stats->function[x]->queue) > max_length) { max_length = (int)strlen(stats->function[x]->queue); } } snprintf(format1, sizeof(format1), " %%-%is | %%16s | %%12s | %%12s\n", max_length); snprintf(format2, sizeof(format2), " %%-%is |%%16i |%%12i |%%12i \n", max_length); my_printf(format1, "Queue Name", "Worker Available", "Jobs Waiting", "Jobs Running"); for(x=0; x < max_length + 51; x++) my_printf("-"); my_printf("\n"); for(x=0; x<stats->function_num;x++) { if(opt_quiet == GM_ENABLED && stats->function[x]->worker == 0 && stats->function[x]->total == 0) continue; my_printf(format2, stats->function[x]->queue, stats->function[x]->worker, stats->function[x]->waiting, stats->function[x]->running); found++; } if(found == 0) { for(x=0; x < max_length + 25; x++) { my_printf(" "); } my_printf("no queues found\n"); } for(x=0; x < max_length + 51; x++) my_printf("-"); my_printf("\n"); } else { my_printf(" %s\n", message); } refresh(); free(hst_c); free(message); free(version); free_mod_gm_status_server(stats); return; }
/* parse command line arguments */ int parse_arguments(int argc, char **argv) { int i; int errors = 0; int verify; mod_gm_opt_t * mod_gm_new_opt; mod_gm_new_opt = gm_malloc(sizeof(mod_gm_opt_t)); set_default_options(mod_gm_new_opt); for(i=1;i<argc;i++) { char * arg = gm_strdup( argv[i] ); char * arg_c = arg; if ( !strcmp( arg, "version" ) || !strcmp( arg, "--version" ) || !strcmp( arg, "-V" ) ) { print_version(); } if ( !strcmp( arg, "help" ) || !strcmp( arg, "--help" ) || !strcmp( arg, "-h" ) ) { print_usage(); } if(parse_args_line(mod_gm_new_opt, arg, 0) != GM_OK) { errors++; free(arg_c); break; } free(arg_c); } /* set identifier to hostname unless specified */ if(mod_gm_new_opt->identifier == NULL) { gethostname(hostname, GM_BUFFERSIZE-1); mod_gm_new_opt->identifier = gm_strdup(hostname); } /* close old logfile */ if(mod_gm_opt->logfile_fp != NULL) { fclose(mod_gm_opt->logfile_fp); mod_gm_opt->logfile_fp = NULL; } /* verify options */ verify = verify_options(mod_gm_new_opt); /* set new options */ if(errors == 0 && verify == GM_OK) { mod_gm_free_opt(mod_gm_opt); mod_gm_opt = mod_gm_new_opt; } /* open new logfile */ if ( mod_gm_new_opt->logmode == GM_LOG_MODE_AUTO && mod_gm_new_opt->logfile ) { mod_gm_opt->logmode = GM_LOG_MODE_FILE; } if(mod_gm_new_opt->logmode == GM_LOG_MODE_FILE && mod_gm_opt->logfile && mod_gm_opt->debug_level < GM_LOG_STDOUT) { mod_gm_opt->logfile_fp = fopen(mod_gm_opt->logfile, "a+"); if(mod_gm_opt->logfile_fp == NULL) { perror(mod_gm_opt->logfile); errors++; } } /* read keyfile */ if(mod_gm_opt->keyfile != NULL && read_keyfile(mod_gm_opt) != GM_OK) { errors++; } if(verify != GM_OK || errors > 0 || mod_gm_new_opt->debug_level >= GM_LOG_DEBUG) { int old_debug = mod_gm_opt->debug_level; mod_gm_opt->debug_level = GM_LOG_DEBUG; dumpconfig(mod_gm_new_opt, GM_WORKER_MODE); mod_gm_opt->debug_level = old_debug; } if(errors > 0 || verify != GM_OK) { mod_gm_free_opt(mod_gm_new_opt); return(GM_ERROR); } return(GM_OK); }
/* get worker/jobs data from gearman server */ int get_gearman_server_data(mod_gm_server_status_t *stats, char ** message, char ** version, char * hostnam, int port) { int rc; char *total, *running, *worker, *output, *output_c, *line, *name; mod_gm_status_function_t *func; *version = gm_malloc(GM_BUFFERSIZE); snprintf(*version, GM_BUFFERSIZE, "%s", "" ); rc = send2gearmandadmin("status\nversion\n", hostnam, port, &output, message); if(rc != STATE_OK) { if(output != NULL) free(output); return rc; } output_c = output; while ( (line = strsep( &output, "\n" )) != NULL ) { gm_log( GM_LOG_TRACE, "%s\n", line ); if(!strcmp( line, ".")) { if((line = strsep( &output, "\n" )) != NULL) { gm_log( GM_LOG_TRACE, "%s\n", line ); if(line[0] == 'O') { strncpy(*version, line+3, 10); } else { snprintf(*version, GM_BUFFERSIZE, "%s", line); } gm_log( GM_LOG_TRACE, "extracted version: '%s'\n", *version ); } /* sort our array by queue name */ qsort(stats->function, stats->function_num, sizeof(mod_gm_status_function_t*), struct_cmp_by_queue); free(output_c); return( STATE_OK ); } name = strsep(&line, "\t"); if(name == NULL) break; total = strsep(&line, "\t"); if(total == NULL) break; running = strsep(&line, "\t"); if(running == NULL) break; worker = strsep(&line, "\x0"); if(worker == NULL) break; func = gm_malloc(sizeof(mod_gm_status_function_t)); func->queue = gm_strdup(name); func->running = atoi(running); func->total = atoi(total); func->worker = atoi(worker); func->waiting = func->total - func->running; /* skip the dummy queue if its empty */ if(!strcmp( name, "dummy") && func->total == 0) { free(func->queue); free(func); continue; } stats->function[stats->function_num++] = func; gm_log( GM_LOG_DEBUG, "%i: name:%-20s worker:%-5i waiting:%-5i running:%-5i\n", stats->function_num, func->queue, func->worker, func->waiting, func->running ); } snprintf(*message, GM_BUFFERSIZE, "got no valid data from %s:%i\n", hostnam, (int)port); free(output_c); return(rc); }