static void setup_gx(int port, apr_int64_t signature) { int e; apr_pool_t* subpool; /* set up pool */ if (0 != (e = apr_pool_create_alloc(&gx.pool, 0))) { gpsmon_fatalx(FLINE, e, "apr_pool_create_alloc failed"); } /* set up port, event */ gx.port = port; gx.signature = signature; if (!event_init()) { gpsmon_fatalx(FLINE, APR_FROM_OS_ERROR(errno), "event_init failed"); } if (0 != (e = apr_pool_create_alloc(&subpool, gx.pool))) { gpsmon_fatalx(FLINE, e, "apr_pool_create_alloc failed"); } /* qexec hash table */ gx.qexectab = apr_hash_make(subpool); CHECKMEM(gx.qexectab); /* qlog hash table */ gx.qlogtab = apr_hash_make(subpool); CHECKMEM(gx.qlogtab); /* segment hash table */ gx.segmenttab = apr_hash_make(subpool); CHECKMEM(gx.segmenttab); /* queryseg hash table */ gx.querysegtab = apr_hash_make(subpool); CHECKMEM(gx.querysegtab); /* pidtab */ gx.pidtab = apr_hash_make(subpool); CHECKMEM(gx.pidtab); /* device metrics hashes */ net_devices = apr_hash_make(gx.pool); CHECKMEM(net_devices); disk_devices = apr_hash_make(gx.pool); CHECKMEM(disk_devices); }
static void gx_gettcpcmd(SOCKET sock, short event, void* arg) { char dump; int n, e; apr_pool_t* oldpool; apr_hash_t* qetab; apr_hash_t* qdtab; apr_hash_t* pidtab; apr_hash_t* segtab; if (event & EV_TIMEOUT) // didn't get command from gpmmon, quit { if(gx.tcp_sock) { close(gx.tcp_sock); gx.tcp_sock=0; } return; } apr_hash_t* querysegtab; n = recv(sock, &dump, 1, 0); if (n == 0) gx_exit("peer closed"); if (n == -1) gx_exit("socket error"); if (dump != 'D') gx_exit("bad data"); TR1(("start dump %c\n", dump)); qetab = gx.qexectab; qdtab = gx.qlogtab; pidtab = gx.pidtab; segtab = gx.segmenttab; querysegtab = gx.querysegtab; oldpool = apr_hash_pool_get(qetab); /* make new hashtabs for next cycle */ { apr_pool_t* newpool; if (0 != (e = apr_pool_create_alloc(&newpool, gx.pool))) { gpsmon_fatalx(FLINE, e, "apr_pool_create_alloc failed"); } /* qexec hash table */ gx.qexectab = apr_hash_make(newpool); CHECKMEM(gx.qexectab); /* qlog hash table */ gx.qlogtab = apr_hash_make(newpool); CHECKMEM(gx.qlogtab); /* segment hash table */ gx.segmenttab = apr_hash_make(newpool); CHECKMEM(gx.segmenttab); /* queryseg hash table */ gx.querysegtab = apr_hash_make(newpool); CHECKMEM(gx.querysegtab); /* pidtab hash table */ gx.pidtab = apr_hash_make(newpool); CHECKMEM(gx.pidtab); } /* push out a metric of the machine */ send_machine_metrics(sock); send_fsinfo(sock); /* push out records */ { apr_hash_index_t* hi; gp_smon_to_mmon_packet_t* ppkt = 0; gp_smon_to_mmon_packet_t localPacketObject; pidrec_t* pidrec; int count = 0; apr_hash_t* query_cpu_table = NULL; for (hi = apr_hash_first(0, querysegtab); hi; hi = apr_hash_next(hi)) { void* vptr; apr_hash_this(hi, 0, 0, &vptr); ppkt = vptr; if (ppkt->header.pkttype != GPMON_PKTTYPE_QUERYSEG) continue; TR2(("sending magic %x, pkttype %d\n", ppkt->header.magic, ppkt->header.pkttype)); send_smon_to_mon_pkt(sock, ppkt); count++; } for (hi = apr_hash_first(0, segtab); hi; hi = apr_hash_next(hi)) { void* vptr; apr_hash_this(hi, 0, 0, &vptr); ppkt = vptr; if (ppkt->header.pkttype != GPMON_PKTTYPE_SEGINFO) continue; /* fill in hostname */ strncpy(ppkt->u.seginfo.hostname, gx.hostname, sizeof(ppkt->u.seginfo.hostname) - 1); ppkt->u.seginfo.hostname[sizeof(ppkt->u.seginfo.hostname) - 1] = 0; TR2(("sending magic %x, pkttype %d\n", ppkt->header.magic, ppkt->header.pkttype)); send_smon_to_mon_pkt(sock, ppkt); count++; } for (hi = apr_hash_first(0, qdtab); hi; hi = apr_hash_next(hi)) { void* vptr; apr_hash_this(hi, 0, 0, &vptr); ppkt = vptr; if (ppkt->header.pkttype != GPMON_PKTTYPE_QLOG) continue; TR2(("sending magic %x, pkttype %d\n", ppkt->header.magic, ppkt->header.pkttype)); send_smon_to_mon_pkt(sock, ppkt); count++; } for (hi = apr_hash_first(0, qetab); hi; hi = apr_hash_next(hi)) { gpmon_qexec_t* qexec; void *vptr; apr_hash_this(hi, 0, 0, &vptr); qexec = vptr; /* fill in _p_metrics */ pidrec = apr_hash_get(pidtab, &qexec->key.hash_key.pid, sizeof(qexec->key.hash_key.pid)); if (pidrec) { qexec->_p_metrics = pidrec->p_metrics; qexec->_cpu_elapsed = pidrec->cpu_elapsed; } else { memset(&qexec->_p_metrics, 0, sizeof(qexec->_p_metrics)); } /* fill in _hname */ strncpy(qexec->_hname, gx.hostname, sizeof(qexec->_hname) - 1); qexec->_hname[sizeof(qexec->_hname) - 1] = 0; if (0 == create_qexec_packet(qexec, &localPacketObject)) { break; } TR2(("sending qexec, pkttype %d\n", localPacketObject.header.pkttype)); send_smon_to_mon_pkt(sock, &localPacketObject); count++; } // calculate CPU utilization per query for this machine query_cpu_table = apr_hash_make(oldpool); CHECKMEM(query_cpu_table); // loop through PID's and add to Query CPU Hash Table for (hi = apr_hash_first(0, pidtab); hi; hi = apr_hash_next(hi)) { void* vptr; pidrec_t* lookup; apr_hash_this(hi, 0, 0, &vptr); pidrec = vptr; TR2(("tmid %d ssid %d ccnt %d pid %d (CPU elapsed %d CPU Percent %.2f)\n", pidrec->query_key.tmid, pidrec->query_key.ssid, pidrec->query_key.ccnt, pidrec->pid, pidrec->cpu_elapsed, pidrec->p_metrics.cpu_pct)); // table is keyed on query key lookup = apr_hash_get(query_cpu_table, &pidrec->query_key, sizeof(pidrec->query_key)); if (lookup) { // found other pids with same query key so add the metrics to that lookup->cpu_elapsed += pidrec->cpu_elapsed; lookup->p_metrics.cpu_pct += pidrec->p_metrics.cpu_pct; } else { // insert existing pid record into table keyed by query key apr_hash_set(query_cpu_table, &pidrec->query_key, sizeof(pidrec->query_key), pidrec); } } // reset packet to 0 ppkt = &localPacketObject; memset(ppkt, 0, sizeof(gp_smon_to_mmon_packet_t)); gp_smon_to_mmon_set_header(ppkt,GPMON_PKTTYPE_QUERY_HOST_METRICS); // add the hostname into the packet for DEBUGGING purposes only. This is not used strncpy(ppkt->u.qlog.user, gx.hostname, sizeof(ppkt->u.qlog.user) - 1); ppkt->u.qlog.user[sizeof(ppkt->u.qlog.user) - 1] = 0; // loop through the query per cpu table and send the metrics for (hi = apr_hash_first(0, query_cpu_table); hi; hi = apr_hash_next(hi)) { void* vptr; apr_hash_this(hi, 0, 0, &vptr); pidrec = vptr; ppkt->u.qlog.key.tmid = pidrec->query_key.tmid; ppkt->u.qlog.key.ssid = pidrec->query_key.ssid; ppkt->u.qlog.key.ccnt = pidrec->query_key.ccnt; ppkt->u.qlog.cpu_elapsed = pidrec->cpu_elapsed; ppkt->u.qlog.p_metrics.cpu_pct = pidrec->p_metrics.cpu_pct; TR2(("SEND tmid %d ssid %d ccnt %d (CPU elapsed %d CPU Percent %.2f)\n", ppkt->u.qlog.key.tmid, ppkt->u.qlog.key.ssid, ppkt->u.qlog.key.ccnt, ppkt->u.qlog.cpu_elapsed, ppkt->u.qlog.p_metrics.cpu_pct)); send_smon_to_mon_pkt(sock, ppkt); count++; } TR1(("end dump ... sent %d entries\n", count)); } /* get rid of the old pool */ { apr_pool_destroy(oldpool); } struct timeval tv; tv.tv_sec = opt.terminate_timeout; tv.tv_usec = 0; if (event_add(&gx.tcp_event, &tv)) //reset timeout { gpmon_warningx(FLINE, APR_FROM_OS_ERROR(errno), "event_add failed"); } return; }
static void parse_command_line(int argc, const char* const argv[]) { apr_getopt_t* os; int ch; const char* arg; const char* bin_start = NULL; int e; static apr_getopt_option_t option[] = { { NULL, '?', 0, "print help screen" }, { NULL, 'v', 1, "verbose" }, { NULL, 'D', 0, "debug mode" }, { NULL, 'l', 1, "log directory" }, { NULL, 'm', 1, "max log size" }, { NULL, 't', 1, "terminate timeout" }, { NULL, 'a', 0, "iterator aggregate" }, { NULL, 'i', 0, "ignore qexec packet" }, { NULL, 0, 0, NULL } }; apr_pool_t* pool; if (0 != (e = apr_pool_create_alloc(&pool, 0))) { gpsmon_fatalx(FLINE, e, "apr_pool_create_alloc failed"); } bin_start = argv[0] + strlen(argv[0]) - 1; while (bin_start != argv[0] && *bin_start != '/') bin_start--; if (bin_start[0] == '/') bin_start++; opt.pname = bin_start; opt.v = opt.D = 0; opt.max_log_size = 0; opt.terminate_timeout = 0; if (0 != (e = apr_getopt_init(&os, pool, argc, argv))) { gpsmon_fatalx(FLINE, e, "apr_getopt_init failed"); } while (0 == (e = apr_getopt_long(os, option, &ch, &arg))) { switch (ch) { case '?': usage(0); break; case 'v': opt.v = atoi(arg); break; case 'D': opt.D = 1; break; case 'l': opt.log_dir = strdup(arg); break; case 'm': opt.max_log_size = apr_atoi64(arg); break; case 't': opt.terminate_timeout = apr_atoi64(arg); break; } } if (e != APR_EOF) usage("Error: illegal arguments"); if (os->ind >= argc) usage("Error: missing port argument"); opt.arg_port = argv[os->ind++]; apr_pool_destroy(pool); verbose = opt.v; very_verbose = opt.V; }
void gpdb_get_hostlist(int* hostcnt, host_t** host_table, apr_pool_t* global_pool, mmon_options_t* opt) { apr_pool_t* pool; PGconn* conn = 0; PGresult* result = 0; int rowcount, i; unsigned int unique_hosts = 0; apr_hash_t* htab; struct hostinfo_holder_t* hostinfo_holder = NULL; host_t* hosts = NULL; int e; // 0 -- hostname, 1 -- address, 2 -- datadir, 3 -- is_master, const char *QUERY = "SELECT distinct hostname, address, case when content < 0 then 1 else 0 end as is_master, MAX(fselocation) as datadir FROM pg_filespace_entry " "JOIN gp_segment_configuration on (dbid = fsedbid) WHERE fsefsoid = (select oid from pg_filespace where fsname='pg_system') " "GROUP BY (hostname, address, is_master) order by hostname"; if (0 != (e = apr_pool_create_alloc(&pool, NULL))) { gpmon_fatalx(FLINE, e, "apr_pool_create_alloc failed"); } const char* errmsg = gpdb_exec(&conn, &result, QUERY); TR2((QUERY)); TR2(("\n")); if (errmsg) { gpmon_warning(FLINE, "GPDB error %s\n\tquery: %s\n", errmsg, QUERY); } else { // hash of hostnames to addresses htab = apr_hash_make(pool); rowcount = PQntuples(result); for (i = 0; i < rowcount; i++) { char* curr_hostname = PQgetvalue(result, i, 0); hostinfo_holder = apr_hash_get(htab, curr_hostname, APR_HASH_KEY_STRING); if (!hostinfo_holder) { hostinfo_holder = apr_pcalloc(pool, sizeof(struct hostinfo_holder_t)); CHECKMEM(hostinfo_holder); apr_hash_set(htab, curr_hostname, APR_HASH_KEY_STRING, hostinfo_holder); hostinfo_holder->hostname = curr_hostname; hostinfo_holder->is_master = atoi(PQgetvalue(result, i, 2)); hostinfo_holder->datadir = PQgetvalue(result, i, 3); // use permenant memory for address list -- stored for duration // populate 1st on list and save to head and tail hostinfo_holder->addressinfo_head = hostinfo_holder->addressinfo_tail = calloc(1, sizeof(addressinfo_holder_t)); CHECKMEM(hostinfo_holder->addressinfo_tail); // first is the hostname hostinfo_holder->addressinfo_tail->address = strdup(hostinfo_holder->hostname); CHECKMEM(hostinfo_holder->addressinfo_tail->address); // add a 2nd to the list hostinfo_holder->addressinfo_tail->next = calloc(1, sizeof(addressinfo_holder_t)); CHECKMEM(hostinfo_holder->addressinfo_tail); hostinfo_holder->addressinfo_tail = hostinfo_holder->addressinfo_tail->next; // second is address hostinfo_holder->addressinfo_tail->address = strdup(PQgetvalue(result, i, 1)); CHECKMEM(hostinfo_holder->addressinfo_tail->address); // one for hostname one for address hostinfo_holder->address_count = 2; } else { // permenant memory for address list -- stored for duration hostinfo_holder->addressinfo_tail->next = calloc(1, sizeof(addressinfo_holder_t)); CHECKMEM(hostinfo_holder->addressinfo_tail); hostinfo_holder->addressinfo_tail = hostinfo_holder->addressinfo_tail->next; // permenant memory for address list -- stored for duration hostinfo_holder->addressinfo_tail->address = strdup(PQgetvalue(result, i, 1)); CHECKMEM(hostinfo_holder->addressinfo_tail->address); hostinfo_holder->address_count++; } } // if we have any appliance specific hosts such as hadoop nodes add them to the hash table if (get_appliance_hosts_and_add_to_hosts(pool, htab)) { TR0(("Not an appliance: checking for SW Only hadoop hosts.\n")); get_hadoop_hosts_and_add_to_hosts(pool, htab, opt); // Not an appliance, so check for SW only hadoop nodes. } unique_hosts = apr_hash_count(htab); // allocate memory for host list (not freed ever) hosts = calloc(unique_hosts, sizeof(host_t)); apr_hash_index_t* hi; void* vptr; int hostcounter = 0; for (hi = apr_hash_first(0, htab); hi; hi = apr_hash_next(hi)) { // sanity check if (hostcounter >= unique_hosts) { gpmon_fatalx(FLINE, 0, "host counter exceeds unique hosts"); } apr_hash_this(hi, 0, 0, &vptr); hostinfo_holder = vptr; hosts[hostcounter].hostname = strdup(hostinfo_holder->hostname); hosts[hostcounter].data_dir = strdup(hostinfo_holder->datadir); if (hostinfo_holder->smon_dir) { hosts[hostcounter].smon_bin_location = strdup(hostinfo_holder->smon_dir); } hosts[hostcounter].is_master = hostinfo_holder->is_master; hosts[hostcounter].addressinfo_head = hostinfo_holder->addressinfo_head; hosts[hostcounter].addressinfo_tail = hostinfo_holder->addressinfo_tail; hosts[hostcounter].address_count = hostinfo_holder->address_count; hosts[hostcounter].connection_hostname.current = hosts[hostcounter].addressinfo_head; hosts[hostcounter].snmp_hostname.current = hosts[hostcounter].addressinfo_head; if (hostinfo_holder->is_hdm) hosts[hostcounter].is_hdm = 1; if (hostinfo_holder->is_hdw) hosts[hostcounter].is_hdw = 1; if (hostinfo_holder->is_etl) hosts[hostcounter].is_etl = 1; if (hostinfo_holder->is_hbw) hosts[hostcounter].is_hbw = 1; if (hostinfo_holder->is_hdc) hosts[hostcounter].is_hdc = 1; apr_thread_mutex_create(&hosts[hostcounter].mutex, APR_THREAD_MUTEX_UNNESTED, global_pool); // use the global pool so the mutexes last beyond this function hostcounter++; } *hostcnt = hostcounter; } apr_pool_destroy(pool); PQclear(result); PQfinish(conn); if (!hosts || *hostcnt < 1) { gpmon_fatalx(FLINE, 0, "no valid hosts found"); } *host_table = hosts; }