static void send_fsinfo(SOCKET sock) { sigar_file_system_usage_t fsusage; gp_smon_to_mmon_packet_t pkt; const char** fsdir; int status = 0; memset(&fsusage, 0, sizeof(sigar_file_system_usage_t)); for (fsdir = gx.fslist; *fsdir; fsdir++) { status = sigar_file_system_usage_get(gx.sigar, *fsdir, &fsusage); if (status == SIGAR_OK) { TR2(("sigar_file_system_usage_get() succeeded. fsdir: %s total: %lu free: %lu used: %lu \n", *fsdir, fsusage.total, fsusage.free, fsusage.used)); memset(&pkt, 0, sizeof(gp_smon_to_mmon_packet_t)); gp_smon_to_mmon_set_header(&pkt,GPMON_PKTTYPE_FSINFO); strncpy(pkt.u.fsinfo.key.fsname, *fsdir, sizeof(pkt.u.fsinfo.key.fsname) - 1); pkt.u.fsinfo.bytes_used = FSUSAGE_TOBYTES(fsusage.used); pkt.u.fsinfo.bytes_available = FSUSAGE_TOBYTES(fsusage.free); pkt.u.fsinfo.bytes_total = FSUSAGE_TOBYTES(fsusage.total); strncpy(pkt.u.fsinfo.key.hostname, gx.hostname, sizeof(pkt.u.fsinfo.key.hostname) - 1); send_smon_to_mon_pkt(sock, &pkt); } else { TR2(("sigar_file_system_usage_get() failed. fsdir: %s status: %i \n", *fsdir, status)); } } }
/* Helper function to send the header and then send the union packet */ static void send_smon_to_mon_pkt(SOCKET sock, gp_smon_to_mmon_packet_t* pkt) { send_fully(sock, &pkt->header, sizeof(gp_smon_to_mmon_header_t)); if (pkt->header.pkttype == GPMON_PKTTYPE_QEXEC) { send_fully(sock, &pkt->u.qexec_packet.data, sizeof(qexec_packet_data_t) ); } else { send_fully(sock, &pkt->u, get_size_by_pkttype_smon_to_mmon(pkt->header.pkttype)); } TR2(("Sent packet of type %d to mmon\n", pkt->header.pkttype)); }
// Drop pretty old partitons if exists. static void drop_old_partitions(PGconn* conn, const char* tbl, mmon_options_t *opt) { const int QRYBUFSIZ = 1024; PGresult* result = NULL; const char* errmsg; char qry[QRYBUFSIZ]; const char* SELECT_QRYFMT = "SELECT partitiontablename, partitionrangestart FROM pg_partitions " "WHERE tablename = '%s_history' " "ORDER BY partitionrangestart DESC OFFSET %d;"; const char* DROP_QRYFMT = "ALTER TABLE %s_history DROP PARTITION IF EXISTS FOR (%s);"; int partition_age = opt->partition_age; if (partition_age <= 0) return; // partition_age + 1 because we always add 2 partitions for the boundary case snprintf(qry, QRYBUFSIZ, SELECT_QRYFMT, tbl, partition_age + 1); TR2(("drop partition: executing select query '%s\n'", qry)); errmsg = gpdb_exec_only(conn, &result, qry); if (errmsg) { gpmon_warning(FLINE, "drop partition: select query '%s' response from server: %s\n", qry, errmsg); } else { int rowcount = PQntuples(result); int i = 0; for (; i < rowcount; i++) { PGresult* dropResult = NULL; char* partitiontablename = PQgetvalue(result, i, 0); char* partitionrangestart = PQgetvalue(result, i, 1); snprintf(qry, QRYBUFSIZ, DROP_QRYFMT, tbl, partitionrangestart); TR0(("Dropped partition table '%s\n'", partitiontablename)); errmsg = gpdb_exec_only(conn, &dropResult, qry); PQclear(dropResult); if (errmsg) { gpmon_warning(FLINE, "drop partion: drop query '%s' response from server: %s\n", qry, errmsg); break; } } } PQclear(result); }
void *khttpd_malloc(size_t size) { #ifdef KHTTPD_TRACE_MALLOC struct stack st; #endif void *mem; mem = malloc(size, M_KHTTPD, M_WAITOK); #ifdef KHTTPD_TRACE_MALLOC TR2("alloc %p %#lx", mem, size); stack_save(&st); CTRSTACK(KTR_GEN, &st, 8, 0); #endif return (mem); }
char *khttpd_strdup(const char *str) { #ifdef KHTTPD_TRACE_MALLOC struct stack st; #endif char *newstr; newstr = strdup(str, M_KHTTPD); #ifdef KHTTPD_TRACE_MALLOC TR2("alloc %p %#lx", newstr, strlen(newstr) + 1); stack_save(&st); CTRSTACK(KTR_GEN, &st, 8, 0); #endif return (newstr); }
void *khttpd_realloc(void *mem, size_t size) { #ifdef KHTTPD_TRACE_MALLOC struct stack st; #endif void *newmem; newmem = realloc(mem, size, M_KHTTPD, M_WAITOK); #ifdef KHTTPD_TRACE_MALLOC TR1("free %p", mem); TR2("alloc %p %#lx", newmem, size); stack_save(&st); CTRSTACK(KTR_GEN, &st, 8, 0); #endif return (newmem); }
static void gx_recvqexec(gpmon_packet_t* pkt) { gpmon_qexec_t* p; if (pkt->pkttype != GPMON_PKTTYPE_QEXEC) gpsmon_fatal(FLINE, "assert failed; expected pkttype qexec"); TR2(("received qexec packet\n")); p = &pkt->u.qexec; get_pid_metrics(p->key.hash_key.pid, p->key.tmid, p->key.ssid, p->key.ccnt); // Store some aggregated information somewhere for metrics in // queries_* tables, like cpu_elapsed, rows_out, and etc. extract_segments_exec(pkt); // We don't call gpmon_warning here because the number of // packet is big, and we would make log boating. return; }
/* got a packet from peer. put it in the queue */ static void gx_recvqlog(gpmon_packet_t* pkt) { gpmon_qlog_t* p; gp_smon_to_mmon_packet_t* rec; if (pkt->pkttype != GPMON_PKTTYPE_QLOG) gpsmon_fatal(FLINE, "assert failed; expected pkttype qlog"); p = &pkt->u.qlog; TR2(("Received qlog packet for query %d-%d-%d. Status now %d\n", p->key.tmid, p->key.ssid, p->key.ccnt, p->status)); rec = apr_hash_get(gx.qlogtab, &p->key, sizeof(p->key)); if (rec) { memcpy(&rec->u.qlog, p, sizeof(*p)); } else { rec = gx_pkt_to_smon_to_mmon(apr_hash_pool_get(gx.qlogtab), pkt); apr_hash_set(gx.qlogtab, &rec->u.qlog.key, sizeof(rec->u.qlog.key), rec); } }
static void gx_recvsegment(gpmon_packet_t* pkt) { gpmon_seginfo_t* p; gp_smon_to_mmon_packet_t* rec; if (pkt->pkttype != GPMON_PKTTYPE_SEGINFO) gpsmon_fatal(FLINE, "assert failed; expected pkttype segment"); p = &pkt->u.seginfo; TR2(("Received segment packet for dbid %d (dynamic_memory_used, dynamic_memory_available) (%llu %llu)\n", p->dbid, p->dynamic_memory_used, p->dynamic_memory_available)); rec = apr_hash_get(gx.segmenttab, &p->dbid, sizeof(p->dbid)); if (rec) { memcpy(&rec->u.seginfo, p, sizeof(*p)); } else { rec = gx_pkt_to_smon_to_mmon(apr_hash_pool_get(gx.segmenttab), pkt); apr_hash_set(gx.segmenttab, &rec->u.seginfo.dbid, sizeof(rec->u.seginfo.dbid), rec); } }
static void gx_accept(SOCKET sock, short event, void* arg) { SOCKET nsock; gp_smon_to_mmon_packet_t pkt; struct sockaddr_in a; socklen_t alen = sizeof(a); char* p; char* q; if (event & EV_TIMEOUT) { if (gx.tcp_sock) { /* start watching connect request again */ if (event_add(&gx.listen_event, 0)) { gpsmon_fatal(FLINE, "event_add failed"); } return; } gpmon_fatal(FLINE, "smon terminates due to no requests come after %" FMT64 " seconds\n", opt.terminate_timeout); } if (0 == (event & EV_READ)) return; if (-1 == (nsock = accept(sock, (void*) &a, &alen))) { gpmon_warningx(FLINE, APR_FROM_OS_ERROR(errno), "accept failed"); return; } TR1(("accepted\n")); /* we do this one at a time */ if (gx.tcp_sock) { gpmon_warning(FLINE, "cannot accept new connection before old one dies"); close(nsock); return; } p = (char*) &pkt; q = p + sizeof(pkt); while (p < q) { int n = recv(nsock, p, q - p, 0); if (n == -1) { gpmon_warningx(FLINE, APR_FROM_OS_ERROR(errno), "recv failed"); close(nsock); return; } p += n; } if (0 != gpmon_ntohpkt(pkt.header.magic, pkt.header.version, pkt.header.pkttype)) { close(nsock); return; } if (pkt.header.pkttype != GPMON_PKTTYPE_HELLO) { close(nsock); return; } if (pkt.u.hello.signature != gx.signature) { gx_exit("bad signature... maybe a new gpmmon has started"); } /* echo the hello */ pkt.u.hello.pid = getpid(); TR2(("accepted pkt.magic = %x\n", (int) pkt.header.magic)); send_smon_to_mon_pkt(nsock, &pkt); struct timeval tv; tv.tv_sec = opt.terminate_timeout; tv.tv_usec = 0; event_set(&gx.tcp_event, nsock, EV_READ | EV_PERSIST | EV_TIMEOUT, gx_gettcpcmd, 0); if (event_add(&gx.tcp_event, &tv)) { gpmon_warningx(FLINE, APR_FROM_OS_ERROR(errno), "event_add failed"); close(nsock); return; } gx.tcp_sock = nsock; TR1(("connection established --------------------- \n")); }
static void gx_gettcpcmd(SOCKET sock, short event, void* arg) { char dump; int n, e; apr_pool_t* oldpool; apr_hash_t* qetab; apr_hash_t* qdtab; apr_hash_t* pidtab; apr_hash_t* segtab; if (event & EV_TIMEOUT) // didn't get command from gpmmon, quit { if(gx.tcp_sock) { close(gx.tcp_sock); gx.tcp_sock=0; } return; } apr_hash_t* querysegtab; n = recv(sock, &dump, 1, 0); if (n == 0) gx_exit("peer closed"); if (n == -1) gx_exit("socket error"); if (dump != 'D') gx_exit("bad data"); TR1(("start dump %c\n", dump)); qetab = gx.qexectab; qdtab = gx.qlogtab; pidtab = gx.pidtab; segtab = gx.segmenttab; querysegtab = gx.querysegtab; oldpool = apr_hash_pool_get(qetab); /* make new hashtabs for next cycle */ { apr_pool_t* newpool; if (0 != (e = apr_pool_create_alloc(&newpool, gx.pool))) { gpsmon_fatalx(FLINE, e, "apr_pool_create_alloc failed"); } /* qexec hash table */ gx.qexectab = apr_hash_make(newpool); CHECKMEM(gx.qexectab); /* qlog hash table */ gx.qlogtab = apr_hash_make(newpool); CHECKMEM(gx.qlogtab); /* segment hash table */ gx.segmenttab = apr_hash_make(newpool); CHECKMEM(gx.segmenttab); /* queryseg hash table */ gx.querysegtab = apr_hash_make(newpool); CHECKMEM(gx.querysegtab); /* pidtab hash table */ gx.pidtab = apr_hash_make(newpool); CHECKMEM(gx.pidtab); } /* push out a metric of the machine */ send_machine_metrics(sock); send_fsinfo(sock); /* push out records */ { apr_hash_index_t* hi; gp_smon_to_mmon_packet_t* ppkt = 0; gp_smon_to_mmon_packet_t localPacketObject; pidrec_t* pidrec; int count = 0; apr_hash_t* query_cpu_table = NULL; for (hi = apr_hash_first(0, querysegtab); hi; hi = apr_hash_next(hi)) { void* vptr; apr_hash_this(hi, 0, 0, &vptr); ppkt = vptr; if (ppkt->header.pkttype != GPMON_PKTTYPE_QUERYSEG) continue; TR2(("sending magic %x, pkttype %d\n", ppkt->header.magic, ppkt->header.pkttype)); send_smon_to_mon_pkt(sock, ppkt); count++; } for (hi = apr_hash_first(0, segtab); hi; hi = apr_hash_next(hi)) { void* vptr; apr_hash_this(hi, 0, 0, &vptr); ppkt = vptr; if (ppkt->header.pkttype != GPMON_PKTTYPE_SEGINFO) continue; /* fill in hostname */ strncpy(ppkt->u.seginfo.hostname, gx.hostname, sizeof(ppkt->u.seginfo.hostname) - 1); ppkt->u.seginfo.hostname[sizeof(ppkt->u.seginfo.hostname) - 1] = 0; TR2(("sending magic %x, pkttype %d\n", ppkt->header.magic, ppkt->header.pkttype)); send_smon_to_mon_pkt(sock, ppkt); count++; } for (hi = apr_hash_first(0, qdtab); hi; hi = apr_hash_next(hi)) { void* vptr; apr_hash_this(hi, 0, 0, &vptr); ppkt = vptr; if (ppkt->header.pkttype != GPMON_PKTTYPE_QLOG) continue; TR2(("sending magic %x, pkttype %d\n", ppkt->header.magic, ppkt->header.pkttype)); send_smon_to_mon_pkt(sock, ppkt); count++; } for (hi = apr_hash_first(0, qetab); hi; hi = apr_hash_next(hi)) { gpmon_qexec_t* qexec; void *vptr; apr_hash_this(hi, 0, 0, &vptr); qexec = vptr; /* fill in _p_metrics */ pidrec = apr_hash_get(pidtab, &qexec->key.hash_key.pid, sizeof(qexec->key.hash_key.pid)); if (pidrec) { qexec->_p_metrics = pidrec->p_metrics; qexec->_cpu_elapsed = pidrec->cpu_elapsed; } else { memset(&qexec->_p_metrics, 0, sizeof(qexec->_p_metrics)); } /* fill in _hname */ strncpy(qexec->_hname, gx.hostname, sizeof(qexec->_hname) - 1); qexec->_hname[sizeof(qexec->_hname) - 1] = 0; if (0 == create_qexec_packet(qexec, &localPacketObject)) { break; } TR2(("sending qexec, pkttype %d\n", localPacketObject.header.pkttype)); send_smon_to_mon_pkt(sock, &localPacketObject); count++; } // calculate CPU utilization per query for this machine query_cpu_table = apr_hash_make(oldpool); CHECKMEM(query_cpu_table); // loop through PID's and add to Query CPU Hash Table for (hi = apr_hash_first(0, pidtab); hi; hi = apr_hash_next(hi)) { void* vptr; pidrec_t* lookup; apr_hash_this(hi, 0, 0, &vptr); pidrec = vptr; TR2(("tmid %d ssid %d ccnt %d pid %d (CPU elapsed %d CPU Percent %.2f)\n", pidrec->query_key.tmid, pidrec->query_key.ssid, pidrec->query_key.ccnt, pidrec->pid, pidrec->cpu_elapsed, pidrec->p_metrics.cpu_pct)); // table is keyed on query key lookup = apr_hash_get(query_cpu_table, &pidrec->query_key, sizeof(pidrec->query_key)); if (lookup) { // found other pids with same query key so add the metrics to that lookup->cpu_elapsed += pidrec->cpu_elapsed; lookup->p_metrics.cpu_pct += pidrec->p_metrics.cpu_pct; } else { // insert existing pid record into table keyed by query key apr_hash_set(query_cpu_table, &pidrec->query_key, sizeof(pidrec->query_key), pidrec); } } // reset packet to 0 ppkt = &localPacketObject; memset(ppkt, 0, sizeof(gp_smon_to_mmon_packet_t)); gp_smon_to_mmon_set_header(ppkt,GPMON_PKTTYPE_QUERY_HOST_METRICS); // add the hostname into the packet for DEBUGGING purposes only. This is not used strncpy(ppkt->u.qlog.user, gx.hostname, sizeof(ppkt->u.qlog.user) - 1); ppkt->u.qlog.user[sizeof(ppkt->u.qlog.user) - 1] = 0; // loop through the query per cpu table and send the metrics for (hi = apr_hash_first(0, query_cpu_table); hi; hi = apr_hash_next(hi)) { void* vptr; apr_hash_this(hi, 0, 0, &vptr); pidrec = vptr; ppkt->u.qlog.key.tmid = pidrec->query_key.tmid; ppkt->u.qlog.key.ssid = pidrec->query_key.ssid; ppkt->u.qlog.key.ccnt = pidrec->query_key.ccnt; ppkt->u.qlog.cpu_elapsed = pidrec->cpu_elapsed; ppkt->u.qlog.p_metrics.cpu_pct = pidrec->p_metrics.cpu_pct; TR2(("SEND tmid %d ssid %d ccnt %d (CPU elapsed %d CPU Percent %.2f)\n", ppkt->u.qlog.key.tmid, ppkt->u.qlog.key.ssid, ppkt->u.qlog.key.ccnt, ppkt->u.qlog.cpu_elapsed, ppkt->u.qlog.p_metrics.cpu_pct)); send_smon_to_mon_pkt(sock, ppkt); count++; } TR1(("end dump ... sent %d entries\n", count)); } /* get rid of the old pool */ { apr_pool_destroy(oldpool); } struct timeval tv; tv.tv_sec = opt.terminate_timeout; tv.tv_usec = 0; if (event_add(&gx.tcp_event, &tv)) //reset timeout { gpmon_warningx(FLINE, APR_FROM_OS_ERROR(errno), "event_add failed"); } return; }
static void send_machine_metrics(SOCKET sock) { sigar_mem_t mem; sigar_swap_t swap; sigar_cpu_t cpu; sigar_loadavg_t loadavg; sigar_disk_usage_t tdisk; sigar_net_interface_stat_t tnet; static int first = 1; static sigar_cpu_t pcpu = { 0 }; static sigar_swap_t pswap = { 0 }; gp_smon_to_mmon_packet_t pkt; struct timeval currenttime = { 0 }; double seconds_duration = 0.0; sigar_file_system_usage_t fsusage; const char** fsdir; const char** netname; sigar_net_interface_stat_t netstat; int cpu_total_diff; /* NIC metrics */ apr_uint64_t rx_packets = 0; apr_uint64_t tx_packets = 0; apr_uint64_t rx_bytes = 0; apr_uint64_t tx_bytes = 0; /* Disk metrics */ apr_uint64_t reads = 0; apr_uint64_t writes = 0; apr_uint64_t read_bytes = 0; apr_uint64_t write_bytes = 0; memset(&mem, 0, sizeof(mem)); sigar_mem_get(gx.sigar, &mem); TR2(("mem ram: %" FMT64 " total: %" FMT64 " used: %" FMT64 " free: %" FMT64 "\n", mem.ram, mem.total, mem.used, mem.free)); memset(&swap, 0, sizeof(swap)); sigar_swap_get(gx.sigar, &swap); TR2(("swap total: %" FMT64 " used: %" FMT64 "page_in: %" FMT64 " page_out: %" FMT64 "\n", swap.total, swap.used, swap.page_in, swap.page_out)); memset(&cpu, 0, sizeof(cpu)); sigar_cpu_get(gx.sigar, &cpu); TR2(("cpu user: %" FMT64 " sys: %" FMT64 " idle: %" FMT64 " wait: %" FMT64 " nice: %" FMT64 " total: %" FMT64 "\n", cpu.user, cpu.sys, cpu.idle, cpu.wait, cpu.nice, cpu.total)); memset(&loadavg, 0, sizeof(loadavg)); sigar_loadavg_get(gx.sigar, &loadavg); TR2(("load_avg: %e %e %e\n", loadavg.loadavg[0], loadavg.loadavg[1], loadavg.loadavg[2])); memset(&tdisk, 0, sizeof(tdisk)); memset(&tnet, 0, sizeof(tnet)); for (fsdir = gx.fslist; *fsdir; fsdir++) { int e = sigar_file_system_usage_get(gx.sigar, *fsdir, &fsusage); if (0 == e) { disk_device_t* disk = (disk_device_t*)apr_hash_get(disk_devices, *fsdir, APR_HASH_KEY_STRING); /* Check if this is a new device */ if (!disk) { disk = (disk_device_t*)apr_palloc(gx.pool, sizeof(disk_device_t)); disk->name = apr_pstrdup(gx.pool, *fsdir); disk->read_bytes = disk->write_bytes = disk->reads = disk->writes = 0; apr_hash_set(disk_devices, disk->name, APR_HASH_KEY_STRING, disk); } reads = disk->reads; writes = disk->writes; read_bytes = disk->read_bytes; write_bytes = disk->write_bytes; // DISK READS reads = metric_diff_calc(fsusage.disk.reads, disk->reads, disk->name, "disk reads"); disk->reads = fsusage.disk.reads; // old = new // DISK WRITES writes = metric_diff_calc(fsusage.disk.writes, disk->writes, disk->name, "disk writes"); disk->writes = fsusage.disk.writes; // old = new // WRITE BYTES write_bytes = metric_diff_calc(fsusage.disk.write_bytes, disk->write_bytes, disk->name, "disk write bytes"); disk->write_bytes = fsusage.disk.write_bytes; // old = new // READ BYTES read_bytes = metric_diff_calc(fsusage.disk.read_bytes, disk->read_bytes, disk->name, "disk read bytes"); disk->read_bytes = fsusage.disk.read_bytes; // old = new tdisk.reads += reads; tdisk.writes += writes; tdisk.write_bytes += write_bytes; tdisk.read_bytes += read_bytes; } } TR2(("disk reads: %" APR_UINT64_T_FMT " writes: %" APR_UINT64_T_FMT " rbytes: %" APR_UINT64_T_FMT " wbytes: %" APR_UINT64_T_FMT "\n", tdisk.reads, tdisk.writes, tdisk.read_bytes, tdisk.write_bytes)); for (netname = gx.netlist; *netname; netname++) { int e = sigar_net_interface_stat_get(gx.sigar, *netname, &netstat); if (0 == e) { net_device_t* nic = (net_device_t*)apr_hash_get(net_devices, *netname, APR_HASH_KEY_STRING); /* Check if this is a new device */ if (!nic) { nic = (net_device_t*)apr_palloc(gx.pool, sizeof(net_device_t)); nic->name = apr_pstrdup(gx.pool, *netname); nic->tx_bytes = nic->rx_bytes = nic->tx_packets = nic->rx_packets = 0; apr_hash_set(net_devices, nic->name, APR_HASH_KEY_STRING, nic); } //////// RECEIVE PACKEtS rx_packets = metric_diff_calc(netstat.rx_packets, nic->rx_packets, nic->name, "rx packets"); nic->rx_packets = netstat.rx_packets; // old = new //////// RECEIVE BYTES rx_bytes = metric_diff_calc(netstat.rx_bytes, nic->rx_bytes, nic->name, "rx bytes"); nic->rx_bytes = netstat.rx_bytes; // old = new //////// SEND PACKETS tx_packets = metric_diff_calc(netstat.tx_packets, nic->tx_packets, nic->name, "tx packets"); nic->tx_packets = netstat.tx_packets; // old = new //////// SEND BYTES tx_bytes = metric_diff_calc(netstat.tx_bytes, nic->tx_bytes, nic->name, "tx bytes"); nic->tx_bytes = netstat.tx_bytes; // old = new tnet.rx_packets += rx_packets; tnet.rx_bytes += rx_bytes; tnet.tx_packets += tx_packets; tnet.tx_bytes += tx_bytes; } } TR2(("rx: %" APR_UINT64_T_FMT " rx_bytes: %" APR_UINT64_T_FMT "\n", tnet.rx_packets, tnet.rx_bytes)); TR2(("tx: %" APR_UINT64_T_FMT " tx_bytes: %" APR_UINT64_T_FMT "\n", tnet.tx_packets, tnet.tx_bytes)); if (first) { pswap = swap, pcpu = cpu; /* We want 0s for these metrics on first pass rather * than some possibly huge number that will throw off * the UI graphs. */ memset(&tdisk, 0, sizeof(tdisk)); memset(&tnet, 0, sizeof(tnet)); } first = 0; gp_smon_to_mmon_set_header(&pkt,GPMON_PKTTYPE_METRICS); pkt.u.metrics.mem.total = mem.total; pkt.u.metrics.mem.used = mem.used; pkt.u.metrics.mem.actual_used = mem.actual_used; pkt.u.metrics.mem.actual_free = mem.actual_free; pkt.u.metrics.swap.total = swap.total; pkt.u.metrics.swap.used = swap.used; pkt.u.metrics.swap.page_in = swap.page_in - pswap.page_in; pkt.u.metrics.swap.page_out = swap.page_out - pswap.page_out; cpu_total_diff = cpu.total - pcpu.total; if (cpu_total_diff) { float cpu_user = calc_diff_percentage(cpu.user, pcpu.user, cpu_total_diff, "cpu.user") + calc_diff_percentage(cpu.nice, pcpu.nice, cpu_total_diff, "cpu.nice"); float cpu_sys = calc_diff_percentage(cpu.sys, pcpu.sys, cpu_total_diff, "cpu.sys") + calc_diff_percentage(cpu.wait, pcpu.wait, cpu_total_diff, "cpu.wait"); float cpu_idle = calc_diff_percentage(cpu.idle, pcpu.idle, cpu_total_diff, "cpu.idle"); pkt.u.metrics.cpu.user_pct = cpu_user; pkt.u.metrics.cpu.sys_pct = cpu_sys; pkt.u.metrics.cpu.idle_pct = cpu_idle; } else { pkt.u.metrics.cpu.user_pct = 0; pkt.u.metrics.cpu.sys_pct = 0; pkt.u.metrics.cpu.idle_pct = 0; } pkt.u.metrics.load_avg.value[0] = (float) loadavg.loadavg[0]; pkt.u.metrics.load_avg.value[1] = (float) loadavg.loadavg[1]; pkt.u.metrics.load_avg.value[2] = (float) loadavg.loadavg[2]; gettimeofday(¤ttime, NULL); seconds_duration = subtractTimeOfDay(&g_time_last_reading, ¤ttime); pkt.u.metrics.disk.ro_rate = (apr_uint64_t)ceil(tdisk.reads/seconds_duration); pkt.u.metrics.disk.wo_rate = (apr_uint64_t)ceil(tdisk.writes/seconds_duration); pkt.u.metrics.disk.rb_rate = (apr_uint64_t)ceil(tdisk.read_bytes/seconds_duration); pkt.u.metrics.disk.wb_rate = (apr_uint64_t)ceil(tdisk.write_bytes/seconds_duration); pkt.u.metrics.net.rp_rate = (apr_uint64_t)ceil(tnet.rx_packets/seconds_duration); pkt.u.metrics.net.wp_rate = (apr_uint64_t)ceil(tnet.tx_packets/seconds_duration); pkt.u.metrics.net.rb_rate = (apr_uint64_t)ceil(tnet.rx_bytes/seconds_duration); pkt.u.metrics.net.wb_rate = (apr_uint64_t)ceil(tnet.tx_bytes/seconds_duration); g_time_last_reading = currenttime; strncpy(pkt.u.metrics.hname, gx.hostname, sizeof(pkt.u.metrics.hname) - 1); pkt.u.metrics.hname[sizeof(pkt.u.metrics.hname) - 1] = 0; send_smon_to_mon_pkt(sock, &pkt); /* save for next time around */ pswap = swap, pcpu = cpu; }
static void get_pid_metrics(apr_int32_t pid, apr_int32_t tmid, apr_int32_t ssid, apr_int32_t ccnt) { apr_int32_t status; sigar_proc_cpu_t cpu; sigar_proc_mem_t mem; sigar_proc_fd_t fd; pidrec_t* rec; apr_pool_t* pool = apr_hash_pool_get(gx.pidtab); rec = apr_hash_get(gx.pidtab, &pid, sizeof(pid)); if (rec && rec->updated_tick == gx.tick) return; /* updated in current cycle */ memset(&cpu, 0, sizeof(cpu)); memset(&mem, 0, sizeof(mem)); memset(&fd, 0, sizeof(fd)); TR2(("--------------------- starting %d\n", pid)); if (!rec) { sigar_proc_exe_t exe; /* There might be cases where the pid no longer exist, so we'll just * zero out the memory first before doing anything */ rec = apr_pcalloc(pool, sizeof(*rec)); CHECKMEM(rec); rec->pid = pid; rec->query_key.tmid = tmid; rec->query_key.ssid = ssid; rec->query_key.ccnt = ccnt; rec->pname = rec->cwd = 0; if (0 == sigar_proc_exe_get(gx.sigar, pid, &exe)) { rec->pname = apr_pstrdup(pool, exe.name); rec->cwd = apr_pstrdup(pool, exe.root); } if (!rec->pname) rec->pname = "unknown"; if (!rec->cwd) rec->cwd = "unknown"; apr_hash_set(gx.pidtab, &rec->pid, sizeof(rec->pid), rec); } status = sigar_proc_mem_get(gx.sigar, pid, &mem); /* ESRCH is error 3: (No such process) */ if (status != SIGAR_OK) { if (status != ESRCH) { TR2(("[WARNING] %s. PID: %d\n", sigar_strerror(gx.sigar, status), pid)); } return; } status = sigar_proc_cpu_get(gx.sigar, pid, &cpu); if (status != SIGAR_OK) { if (status != ESRCH) { TR2(("[WARNING] %s. PID: %d\n", sigar_strerror(gx.sigar, status), pid)); } return; } status = sigar_proc_fd_get(gx.sigar, pid, &fd); if (status != SIGAR_OK) { if (status != ESRCH) { TR2(("[WARNING] %s. PID: %d\n", sigar_strerror(gx.sigar, status), pid)); } return; } rec->updated_tick = gx.tick; rec->p_metrics.fd_cnt = (apr_uint32_t) fd.total; rec->p_metrics.cpu_pct = (float) (cpu.percent * cpu_cores_utilization_multiplier); rec->p_metrics.mem.size = mem.size; rec->p_metrics.mem.resident = mem.resident; #ifdef __linux__ rec->p_metrics.mem.share = mem.share; #else rec->p_metrics.mem.share = 0; #endif rec->cpu_elapsed = cpu.total; }
void gx_main(int port, apr_int64_t signature) { /* set up our log files */ if (opt.log_dir) { mkdir(opt.log_dir, S_IRWXU | S_IRWXG); if (0 != chdir(opt.log_dir)) { /* Invalid dir for log file, try home dir */ char *home_dir = NULL; if (0 == apr_env_get(&home_dir, "HOME", gx.pool)) { if (home_dir) chdir(home_dir); } } } update_log_filename(); freopen(log_filename, "w", stdout); setlinebuf(stdout); if (!get_and_allocate_hostname()) gpsmon_fatalx(FLINE, 0, "failed to allocate memory for hostname"); TR0(("HOSTNAME = '%s'\n", gx.hostname)); // first chace to write to log file TR2(("signature = %" FMT64 "\n", signature)); TR1(("detected %d cpu cores\n", number_cpu_cores)); setup_gx(port, signature); setup_sigar(); setup_udp(); setup_tcp(); gx.tick = 0; for (;;) { struct timeval tv; apr_hash_index_t* hi; /* serve events every 2 second */ gx.tick++; gx.now = time(NULL); tv.tv_sec = 2; tv.tv_usec = 0; /* event dispatch blocks for a certain time based on the seconds given * to event_loopexit */ if (-1 == event_loopexit(&tv)) { gpmon_warningx(FLINE, APR_FROM_OS_ERROR(errno), "event_loopexit failed"); } if (-1 == event_dispatch()) { gpsmon_fatalx(FLINE, APR_FROM_OS_ERROR(errno), "event_dispatch failed"); } /* get pid metrics */ for (hi = apr_hash_first(0, gx.qexectab); hi; hi = apr_hash_next(hi)) { void* vptr; gpmon_qexec_t* rec; apr_hash_this(hi, 0, 0, &vptr); rec = vptr; get_pid_metrics(rec->key.hash_key.pid, rec->key.tmid, rec->key.ssid, rec->key.ccnt); } /* check log size */ if (gx.tick % 60 == 0) { apr_finfo_t finfo; if (0 == apr_stat(&finfo, log_filename, APR_FINFO_SIZE, gx.pool)) { if (opt.max_log_size != 0 && finfo.size > opt.max_log_size) { update_log_filename(); freopen(log_filename, "w", stdout); setlinebuf(stdout); } } } } }
static void setup_sigar(void) { sigar_file_system_list_t sigar_fslist; sigar_net_interface_list_t sigar_netlist; int i, e, cnt; int do_destroy = 0; /* initialize sigar */ if (0 != (e = sigar_open(&gx.sigar))) { gpsmon_fatalx(FLINE, e, "sigar_open failed"); } TR2(("sigar initialized\n")); do_destroy = 1; if (0 != sigar_net_interface_list_get(gx.sigar, &sigar_netlist)) { memset(&sigar_netlist, 0, sizeof(sigar_netlist)); do_destroy = 0; } gx.netlist = apr_pcalloc(gx.pool, sizeof(const char*) * (1 + sigar_netlist.number)); CHECKMEM(gx.netlist); for (i = 0; i < sigar_netlist.number; i++) { gx.netlist[i] = apr_pstrdup(gx.pool, sigar_netlist.data[i]); CHECKMEM(gx.netlist[i]); TR2(("sigar net %d: %s\n", i, gx.netlist[i])); } if (do_destroy) sigar_net_interface_list_destroy(gx.sigar, &sigar_netlist); do_destroy = 1; if (0 != sigar_file_system_list_get(gx.sigar, &sigar_fslist)) { memset(&sigar_fslist, 0, sizeof(sigar_fslist)); do_destroy = 0; } cnt = 0; TR2(("sigar fsnumber: %d\n", sigar_fslist.number)); for (i = 0; i < sigar_fslist.number; i++) { if (sigar_fslist.data[i].type == SIGAR_FSTYPE_LOCAL_DISK) { TR2(("sigar cnt: %d\n", cnt + 1)); cnt++; } } gx.fslist = apr_pcalloc(gx.pool, sizeof(const char*) * (cnt + 1)); CHECKMEM(gx.fslist); gx.devlist = apr_pcalloc(gx.pool, sizeof(const char*) * (cnt + 1)); CHECKMEM(gx.devlist); cnt = 0; for (i = 0; i < sigar_fslist.number; i++) { if (sigar_fslist.data[i].type == SIGAR_FSTYPE_LOCAL_DISK) { gx.fslist[cnt] = apr_pstrdup(gx.pool, sigar_fslist.data[i].dir_name); CHECKMEM(gx.fslist[cnt]); TR2(("fs: %s\n", gx.fslist[cnt])); gx.devlist[cnt] = apr_pstrdup(gx.pool, sigar_fslist.data[i].dev_name); CHECKMEM(gx.devlist[cnt]); cnt++; } } cnt = 0; for (i = 0; i < sigar_fslist.number; i++) { if (sigar_fslist.data[i].type == SIGAR_FSTYPE_LOCAL_DISK || sigar_fslist.data[i].type == SIGAR_FSTYPE_NETWORK) { TR2(("sigar cnt: %d\n", cnt + 1)); cnt++; } } gx.allfslist = apr_pcalloc(gx.pool, sizeof(const char*) * (cnt + 1)); CHECKMEM(gx.allfslist); cnt = 0; for (i = 0; i < sigar_fslist.number; i++) { if (sigar_fslist.data[i].type == SIGAR_FSTYPE_LOCAL_DISK || sigar_fslist.data[i].type == SIGAR_FSTYPE_NETWORK) { gx.allfslist[cnt] = apr_pstrdup(gx.pool, sigar_fslist.data[i].dir_name); CHECKMEM(gx.allfslist[cnt]); TR2(("allfs: %s\n", gx.allfslist[cnt])); cnt++; } } if (do_destroy) sigar_file_system_list_destroy(gx.sigar, &sigar_fslist); }
void gpdb_get_hostlist(int* hostcnt, host_t** host_table, apr_pool_t* global_pool, mmon_options_t* opt) { apr_pool_t* pool; PGconn* conn = 0; PGresult* result = 0; int rowcount, i; unsigned int unique_hosts = 0; apr_hash_t* htab; struct hostinfo_holder_t* hostinfo_holder = NULL; host_t* hosts = NULL; int e; // 0 -- hostname, 1 -- address, 2 -- datadir, 3 -- is_master, const char *QUERY = "SELECT distinct hostname, address, case when content < 0 then 1 else 0 end as is_master, MAX(fselocation) as datadir FROM pg_filespace_entry " "JOIN gp_segment_configuration on (dbid = fsedbid) WHERE fsefsoid = (select oid from pg_filespace where fsname='pg_system') " "GROUP BY (hostname, address, is_master) order by hostname"; if (0 != (e = apr_pool_create_alloc(&pool, NULL))) { gpmon_fatalx(FLINE, e, "apr_pool_create_alloc failed"); } const char* errmsg = gpdb_exec(&conn, &result, QUERY); TR2((QUERY)); TR2(("\n")); if (errmsg) { gpmon_warning(FLINE, "GPDB error %s\n\tquery: %s\n", errmsg, QUERY); } else { // hash of hostnames to addresses htab = apr_hash_make(pool); rowcount = PQntuples(result); for (i = 0; i < rowcount; i++) { char* curr_hostname = PQgetvalue(result, i, 0); hostinfo_holder = apr_hash_get(htab, curr_hostname, APR_HASH_KEY_STRING); if (!hostinfo_holder) { hostinfo_holder = apr_pcalloc(pool, sizeof(struct hostinfo_holder_t)); CHECKMEM(hostinfo_holder); apr_hash_set(htab, curr_hostname, APR_HASH_KEY_STRING, hostinfo_holder); hostinfo_holder->hostname = curr_hostname; hostinfo_holder->is_master = atoi(PQgetvalue(result, i, 2)); hostinfo_holder->datadir = PQgetvalue(result, i, 3); // use permenant memory for address list -- stored for duration // populate 1st on list and save to head and tail hostinfo_holder->addressinfo_head = hostinfo_holder->addressinfo_tail = calloc(1, sizeof(addressinfo_holder_t)); CHECKMEM(hostinfo_holder->addressinfo_tail); // first is the hostname hostinfo_holder->addressinfo_tail->address = strdup(hostinfo_holder->hostname); CHECKMEM(hostinfo_holder->addressinfo_tail->address); // add a 2nd to the list hostinfo_holder->addressinfo_tail->next = calloc(1, sizeof(addressinfo_holder_t)); CHECKMEM(hostinfo_holder->addressinfo_tail); hostinfo_holder->addressinfo_tail = hostinfo_holder->addressinfo_tail->next; // second is address hostinfo_holder->addressinfo_tail->address = strdup(PQgetvalue(result, i, 1)); CHECKMEM(hostinfo_holder->addressinfo_tail->address); // one for hostname one for address hostinfo_holder->address_count = 2; } else { // permenant memory for address list -- stored for duration hostinfo_holder->addressinfo_tail->next = calloc(1, sizeof(addressinfo_holder_t)); CHECKMEM(hostinfo_holder->addressinfo_tail); hostinfo_holder->addressinfo_tail = hostinfo_holder->addressinfo_tail->next; // permenant memory for address list -- stored for duration hostinfo_holder->addressinfo_tail->address = strdup(PQgetvalue(result, i, 1)); CHECKMEM(hostinfo_holder->addressinfo_tail->address); hostinfo_holder->address_count++; } } // if we have any appliance specific hosts such as hadoop nodes add them to the hash table if (get_appliance_hosts_and_add_to_hosts(pool, htab)) { TR0(("Not an appliance: checking for SW Only hadoop hosts.\n")); get_hadoop_hosts_and_add_to_hosts(pool, htab, opt); // Not an appliance, so check for SW only hadoop nodes. } unique_hosts = apr_hash_count(htab); // allocate memory for host list (not freed ever) hosts = calloc(unique_hosts, sizeof(host_t)); apr_hash_index_t* hi; void* vptr; int hostcounter = 0; for (hi = apr_hash_first(0, htab); hi; hi = apr_hash_next(hi)) { // sanity check if (hostcounter >= unique_hosts) { gpmon_fatalx(FLINE, 0, "host counter exceeds unique hosts"); } apr_hash_this(hi, 0, 0, &vptr); hostinfo_holder = vptr; hosts[hostcounter].hostname = strdup(hostinfo_holder->hostname); hosts[hostcounter].data_dir = strdup(hostinfo_holder->datadir); if (hostinfo_holder->smon_dir) { hosts[hostcounter].smon_bin_location = strdup(hostinfo_holder->smon_dir); } hosts[hostcounter].is_master = hostinfo_holder->is_master; hosts[hostcounter].addressinfo_head = hostinfo_holder->addressinfo_head; hosts[hostcounter].addressinfo_tail = hostinfo_holder->addressinfo_tail; hosts[hostcounter].address_count = hostinfo_holder->address_count; hosts[hostcounter].connection_hostname.current = hosts[hostcounter].addressinfo_head; hosts[hostcounter].snmp_hostname.current = hosts[hostcounter].addressinfo_head; if (hostinfo_holder->is_hdm) hosts[hostcounter].is_hdm = 1; if (hostinfo_holder->is_hdw) hosts[hostcounter].is_hdw = 1; if (hostinfo_holder->is_etl) hosts[hostcounter].is_etl = 1; if (hostinfo_holder->is_hbw) hosts[hostcounter].is_hbw = 1; if (hostinfo_holder->is_hdc) hosts[hostcounter].is_hdc = 1; apr_thread_mutex_create(&hosts[hostcounter].mutex, APR_THREAD_MUTEX_UNNESTED, global_pool); // use the global pool so the mutexes last beyond this function hostcounter++; } *hostcnt = hostcounter; } apr_pool_destroy(pool); PQclear(result); PQfinish(conn); if (!hosts || *hostcnt < 1) { gpmon_fatalx(FLINE, 0, "no valid hosts found"); } *host_table = hosts; }