Beispiel #1
0
static void send_fsinfo(SOCKET sock)
{
	sigar_file_system_usage_t fsusage;
	gp_smon_to_mmon_packet_t  pkt;
	const char**              fsdir;
	int                       status = 0;

	memset(&fsusage, 0, sizeof(sigar_file_system_usage_t));

	for (fsdir = gx.fslist; *fsdir; fsdir++)
	{
		status = sigar_file_system_usage_get(gx.sigar, *fsdir, &fsusage);
		if (status == SIGAR_OK)
		{
			TR2(("sigar_file_system_usage_get() succeeded. fsdir: %s total: %lu free: %lu used: %lu \n", *fsdir, fsusage.total, fsusage.free, fsusage.used));
			memset(&pkt, 0, sizeof(gp_smon_to_mmon_packet_t));

			gp_smon_to_mmon_set_header(&pkt,GPMON_PKTTYPE_FSINFO);

			strncpy(pkt.u.fsinfo.key.fsname, *fsdir, sizeof(pkt.u.fsinfo.key.fsname) - 1);

			pkt.u.fsinfo.bytes_used = FSUSAGE_TOBYTES(fsusage.used);
			pkt.u.fsinfo.bytes_available = FSUSAGE_TOBYTES(fsusage.free);
			pkt.u.fsinfo.bytes_total = FSUSAGE_TOBYTES(fsusage.total);
			strncpy(pkt.u.fsinfo.key.hostname, gx.hostname, sizeof(pkt.u.fsinfo.key.hostname) - 1);

			send_smon_to_mon_pkt(sock, &pkt);
		}
		else
		{
			TR2(("sigar_file_system_usage_get() failed.  fsdir: %s status: %i \n", *fsdir, status));
		}
	}
}
Beispiel #2
0
/* Helper function to send the header and then send the union packet */
static void send_smon_to_mon_pkt(SOCKET sock, gp_smon_to_mmon_packet_t* pkt)
{
	send_fully(sock, &pkt->header, sizeof(gp_smon_to_mmon_header_t));
	if (pkt->header.pkttype == GPMON_PKTTYPE_QEXEC) {
		send_fully(sock, &pkt->u.qexec_packet.data, sizeof(qexec_packet_data_t) );
	} else {
		send_fully(sock, &pkt->u, get_size_by_pkttype_smon_to_mmon(pkt->header.pkttype));
	}
	TR2(("Sent packet of type %d to mmon\n", pkt->header.pkttype));
}
Beispiel #3
0
// Drop pretty old partitons if exists.
static void drop_old_partitions(PGconn* conn, const char* tbl, mmon_options_t *opt)
{
	const int QRYBUFSIZ = 1024;
	PGresult* result = NULL;
	const char* errmsg;
	char qry[QRYBUFSIZ];

	const char* SELECT_QRYFMT = "SELECT partitiontablename, partitionrangestart FROM pg_partitions "
						        "WHERE tablename = '%s_history' "
								"ORDER BY partitionrangestart DESC OFFSET %d;";
	const char* DROP_QRYFMT   = "ALTER TABLE %s_history DROP PARTITION IF EXISTS FOR (%s);";

	int partition_age = opt->partition_age;

	if (partition_age <= 0)
		return;

	// partition_age + 1 because we always add 2 partitions for the boundary case
	snprintf(qry, QRYBUFSIZ, SELECT_QRYFMT, tbl, partition_age + 1);

	TR2(("drop partition: executing select query '%s\n'", qry));
	errmsg = gpdb_exec_only(conn, &result, qry);
	if (errmsg)
	{
		gpmon_warning(FLINE, "drop partition: select query '%s' response from server: %s\n", qry, errmsg);
	}
	else
	{
		int rowcount = PQntuples(result);
		int i = 0;
		for (; i < rowcount; i++)
		{
			PGresult* dropResult = NULL;
			char* partitiontablename  = PQgetvalue(result, i, 0);
			char* partitionrangestart = PQgetvalue(result, i, 1);
			snprintf(qry, QRYBUFSIZ, DROP_QRYFMT, tbl, partitionrangestart);
			TR0(("Dropped partition table '%s\n'", partitiontablename));
			errmsg = gpdb_exec_only(conn, &dropResult, qry);
			PQclear(dropResult);
			if (errmsg)
			{
				gpmon_warning(FLINE, "drop partion: drop query '%s' response from server: %s\n", qry, errmsg);
				break;
			}
		}
	}
	PQclear(result);
}
Beispiel #4
0
void *khttpd_malloc(size_t size)
{
#ifdef KHTTPD_TRACE_MALLOC
	struct stack st;
#endif
	void *mem;

	mem = malloc(size, M_KHTTPD, M_WAITOK);

#ifdef KHTTPD_TRACE_MALLOC
	TR2("alloc %p %#lx", mem, size);
	stack_save(&st);
	CTRSTACK(KTR_GEN, &st, 8, 0);
#endif
	return (mem);
}
Beispiel #5
0
char *khttpd_strdup(const char *str)
{
#ifdef KHTTPD_TRACE_MALLOC
	struct stack st;
#endif
	char *newstr;

	newstr = strdup(str, M_KHTTPD);

#ifdef KHTTPD_TRACE_MALLOC
	TR2("alloc %p %#lx", newstr, strlen(newstr) + 1);
	stack_save(&st);
	CTRSTACK(KTR_GEN, &st, 8, 0);
#endif

	return (newstr);
}
Beispiel #6
0
void *khttpd_realloc(void *mem, size_t size)
{
#ifdef KHTTPD_TRACE_MALLOC
	struct stack st;
#endif
	void *newmem;

	newmem = realloc(mem, size, M_KHTTPD, M_WAITOK);

#ifdef KHTTPD_TRACE_MALLOC
	TR1("free %p", mem);
	TR2("alloc %p %#lx", newmem, size);
	stack_save(&st);
	CTRSTACK(KTR_GEN, &st, 8, 0);
#endif

	return (newmem);
}
Beispiel #7
0
static void gx_recvqexec(gpmon_packet_t* pkt)
{
	gpmon_qexec_t* p;

	if (pkt->pkttype != GPMON_PKTTYPE_QEXEC)
		gpsmon_fatal(FLINE, "assert failed; expected pkttype qexec");
    TR2(("received qexec packet\n"));

	p = &pkt->u.qexec;
	get_pid_metrics(p->key.hash_key.pid,
					p->key.tmid,
					p->key.ssid,
					p->key.ccnt);
	// Store some aggregated information somewhere for metrics in
	// queries_* tables, like cpu_elapsed, rows_out, and etc.
	extract_segments_exec(pkt);
	// We don't call gpmon_warning here because the number of
	// packet is big, and we would make log boating.
	return;
}
Beispiel #8
0
/* got a packet from peer. put it in the queue */
static void gx_recvqlog(gpmon_packet_t* pkt)
{
	gpmon_qlog_t* p;
	gp_smon_to_mmon_packet_t* rec;

	if (pkt->pkttype != GPMON_PKTTYPE_QLOG)
		gpsmon_fatal(FLINE, "assert failed; expected pkttype qlog");

	p = &pkt->u.qlog;
	TR2(("Received qlog packet for query %d-%d-%d.  Status now %d\n", p->key.tmid, p->key.ssid, p->key.ccnt, p->status));
	rec = apr_hash_get(gx.qlogtab, &p->key, sizeof(p->key));
	if (rec)
	{
		memcpy(&rec->u.qlog, p, sizeof(*p));
	}
	else
	{
		rec = gx_pkt_to_smon_to_mmon(apr_hash_pool_get(gx.qlogtab), pkt);
		apr_hash_set(gx.qlogtab, &rec->u.qlog.key, sizeof(rec->u.qlog.key), rec);
	}
}
Beispiel #9
0
static void gx_recvsegment(gpmon_packet_t* pkt)
{
	gpmon_seginfo_t* p;
	gp_smon_to_mmon_packet_t* rec;

	if (pkt->pkttype != GPMON_PKTTYPE_SEGINFO)
		gpsmon_fatal(FLINE, "assert failed; expected pkttype segment");

	p = &pkt->u.seginfo;

	TR2(("Received segment packet for dbid %d (dynamic_memory_used, dynamic_memory_available) (%llu %llu)\n", p->dbid, p->dynamic_memory_used, p->dynamic_memory_available));

	rec = apr_hash_get(gx.segmenttab, &p->dbid, sizeof(p->dbid));
	if (rec)
	{
		memcpy(&rec->u.seginfo, p, sizeof(*p));
	}
	else
	{
		rec = gx_pkt_to_smon_to_mmon(apr_hash_pool_get(gx.segmenttab), pkt);
		apr_hash_set(gx.segmenttab, &rec->u.seginfo.dbid, sizeof(rec->u.seginfo.dbid), rec);
	}
}
Beispiel #10
0
static void gx_accept(SOCKET sock, short event, void* arg)
{
	SOCKET nsock;
	gp_smon_to_mmon_packet_t pkt;
	struct sockaddr_in a;
	socklen_t alen = sizeof(a);
	char* p;
	char* q;

	if (event & EV_TIMEOUT)
	{
		if (gx.tcp_sock)
		{
			/* start watching connect request again */
			if (event_add(&gx.listen_event, 0))
			{
				gpsmon_fatal(FLINE, "event_add failed");
			}
			return;
		}
		gpmon_fatal(FLINE, "smon terminates due to no requests come after %" FMT64 " seconds\n", opt.terminate_timeout);
	}

	if (0 == (event & EV_READ))
		return;

	if (-1 == (nsock = accept(sock, (void*) &a, &alen)))
	{
		gpmon_warningx(FLINE, APR_FROM_OS_ERROR(errno), "accept failed");
		return;
	}

	TR1(("accepted\n"));

	/* we do this one at a time */
	if (gx.tcp_sock)
	{
		gpmon_warning(FLINE, "cannot accept new connection before old one dies");
		close(nsock);
		return;
	}

	p = (char*) &pkt;
	q = p + sizeof(pkt);
	while (p < q)
	{
		int n = recv(nsock, p, q - p, 0);
		if (n == -1)
		{
			gpmon_warningx(FLINE, APR_FROM_OS_ERROR(errno), "recv failed");
			close(nsock);
			return;
		}
		p += n;
	}

	if (0 != gpmon_ntohpkt(pkt.header.magic, pkt.header.version, pkt.header.pkttype))
	{
		close(nsock);
		return;
	}

	if (pkt.header.pkttype != GPMON_PKTTYPE_HELLO)
	{
		close(nsock);
		return;
	}

	if (pkt.u.hello.signature != gx.signature)
	{
		gx_exit("bad signature... maybe a new gpmmon has started");
	}

	/* echo the hello */
	pkt.u.hello.pid = getpid();
	TR2(("accepted pkt.magic = %x\n", (int) pkt.header.magic));
	send_smon_to_mon_pkt(nsock, &pkt);

	struct timeval tv;
	tv.tv_sec = opt.terminate_timeout;
	tv.tv_usec = 0;
	event_set(&gx.tcp_event, nsock, EV_READ | EV_PERSIST | EV_TIMEOUT, gx_gettcpcmd, 0);
	if (event_add(&gx.tcp_event, &tv))
	{
		gpmon_warningx(FLINE, APR_FROM_OS_ERROR(errno), "event_add failed");
		close(nsock);
		return;
	}
	gx.tcp_sock = nsock;
	TR1(("connection established --------------------- \n"));
}
Beispiel #11
0
static void gx_gettcpcmd(SOCKET sock, short event, void* arg)
{
	char dump;
	int n, e;
	apr_pool_t* oldpool;
	apr_hash_t* qetab;
	apr_hash_t* qdtab;
	apr_hash_t* pidtab;
	apr_hash_t* segtab;
	if (event & EV_TIMEOUT) // didn't get command from gpmmon, quit
	{
		if(gx.tcp_sock)
		{
			close(gx.tcp_sock);
			gx.tcp_sock=0;
		}
		return;
	}
	apr_hash_t* querysegtab;
	n = recv(sock, &dump, 1, 0);
	if (n == 0)
		gx_exit("peer closed");

	if (n == -1)
		gx_exit("socket error");

	if (dump != 'D')
		gx_exit("bad data");

	TR1(("start dump %c\n", dump));

	qetab = gx.qexectab;
	qdtab = gx.qlogtab;
	pidtab = gx.pidtab;
	segtab = gx.segmenttab;
	querysegtab = gx.querysegtab;

	oldpool = apr_hash_pool_get(qetab);

	/* make new  hashtabs for next cycle */
	{
		apr_pool_t* newpool;
		if (0 != (e = apr_pool_create_alloc(&newpool, gx.pool)))
		{
			gpsmon_fatalx(FLINE, e, "apr_pool_create_alloc failed");
		}
		/* qexec hash table */
		gx.qexectab = apr_hash_make(newpool);
		CHECKMEM(gx.qexectab);

		/* qlog hash table */
		gx.qlogtab = apr_hash_make(newpool);
		CHECKMEM(gx.qlogtab);

		/* segment hash table */
		gx.segmenttab = apr_hash_make(newpool);
		CHECKMEM(gx.segmenttab);

		/* queryseg hash table */
		gx.querysegtab = apr_hash_make(newpool);
		CHECKMEM(gx.querysegtab);

		/* pidtab hash table */
		gx.pidtab = apr_hash_make(newpool);
		CHECKMEM(gx.pidtab);
	}

	/* push out a metric of the machine */
	send_machine_metrics(sock);
	send_fsinfo(sock);

	/* push out records */
	{
		apr_hash_index_t* hi;
		gp_smon_to_mmon_packet_t* ppkt = 0;
		gp_smon_to_mmon_packet_t localPacketObject;
		pidrec_t* pidrec;
		int count = 0;
		apr_hash_t* query_cpu_table = NULL;

		for (hi = apr_hash_first(0, querysegtab); hi; hi = apr_hash_next(hi))
		{
 			void* vptr;
			apr_hash_this(hi, 0, 0, &vptr);
			ppkt = vptr;
			if (ppkt->header.pkttype != GPMON_PKTTYPE_QUERYSEG)
				continue;

			TR2(("sending magic %x, pkttype %d\n", ppkt->header.magic, ppkt->header.pkttype));
			send_smon_to_mon_pkt(sock, ppkt);
			count++;
		}

		for (hi = apr_hash_first(0, segtab); hi; hi = apr_hash_next(hi))
		{
 			void* vptr;
			apr_hash_this(hi, 0, 0, &vptr);
			ppkt = vptr;
			if (ppkt->header.pkttype != GPMON_PKTTYPE_SEGINFO)
				continue;

			/* fill in hostname */
			strncpy(ppkt->u.seginfo.hostname, gx.hostname, sizeof(ppkt->u.seginfo.hostname) - 1);
			ppkt->u.seginfo.hostname[sizeof(ppkt->u.seginfo.hostname) - 1] = 0;

			TR2(("sending magic %x, pkttype %d\n", ppkt->header.magic, ppkt->header.pkttype));
			send_smon_to_mon_pkt(sock, ppkt);
			count++;
		}


		for (hi = apr_hash_first(0, qdtab); hi; hi = apr_hash_next(hi))
		{
 			void* vptr;
			apr_hash_this(hi, 0, 0, &vptr);
			ppkt = vptr;
			if (ppkt->header.pkttype != GPMON_PKTTYPE_QLOG)
				continue;
			TR2(("sending magic %x, pkttype %d\n", ppkt->header.magic, ppkt->header.pkttype));
			send_smon_to_mon_pkt(sock, ppkt);
			count++;
		}

		for (hi = apr_hash_first(0, qetab); hi; hi = apr_hash_next(hi))
		{
			gpmon_qexec_t* qexec;
			void *vptr;

			apr_hash_this(hi, 0, 0, &vptr);
            qexec = vptr;
            /* fill in _p_metrics */
            pidrec = apr_hash_get(pidtab, &qexec->key.hash_key.pid, sizeof(qexec->key.hash_key.pid));
            if (pidrec) {
                qexec->_p_metrics = pidrec->p_metrics;
                qexec->_cpu_elapsed = pidrec->cpu_elapsed;
            } else {
                memset(&qexec->_p_metrics, 0, sizeof(qexec->_p_metrics));
            }

			/* fill in _hname */
			strncpy(qexec->_hname, gx.hostname, sizeof(qexec->_hname) - 1);
			qexec->_hname[sizeof(qexec->_hname) - 1] = 0;

			if (0 == create_qexec_packet(qexec, &localPacketObject)) {
				break;
			}

			TR2(("sending qexec, pkttype %d\n", localPacketObject.header.pkttype));
			send_smon_to_mon_pkt(sock, &localPacketObject);
			count++;
		}

		// calculate CPU utilization per query for this machine
		query_cpu_table = apr_hash_make(oldpool);
		CHECKMEM(query_cpu_table);

		// loop through PID's and add to Query CPU Hash Table
		for (hi = apr_hash_first(0, pidtab); hi; hi = apr_hash_next(hi))
		{
			void* vptr;
			pidrec_t* lookup;

			apr_hash_this(hi, 0, 0, &vptr);
			pidrec = vptr;

			TR2(("tmid %d ssid %d ccnt %d pid %d (CPU elapsed %d CPU Percent %.2f)\n",
				pidrec->query_key.tmid, pidrec->query_key.ssid, pidrec->query_key.ccnt, pidrec->pid,
				pidrec->cpu_elapsed, pidrec->p_metrics.cpu_pct));

			// table is keyed on query key
			lookup = apr_hash_get(query_cpu_table, &pidrec->query_key, sizeof(pidrec->query_key));

			if (lookup)
			{
				// found other pids with same query key so add the metrics to that

				lookup->cpu_elapsed += pidrec->cpu_elapsed;
				lookup->p_metrics.cpu_pct += pidrec->p_metrics.cpu_pct;
			}
			else
			{
				// insert existing pid record into table keyed by query key
				apr_hash_set(query_cpu_table, &pidrec->query_key, sizeof(pidrec->query_key), pidrec);
			}

		}

		// reset packet to 0
		ppkt = &localPacketObject;
		memset(ppkt, 0, sizeof(gp_smon_to_mmon_packet_t));
		gp_smon_to_mmon_set_header(ppkt,GPMON_PKTTYPE_QUERY_HOST_METRICS);

		// add the hostname into the packet for DEBUGGING purposes only.  This is not used
		strncpy(ppkt->u.qlog.user, gx.hostname, sizeof(ppkt->u.qlog.user) - 1);
		ppkt->u.qlog.user[sizeof(ppkt->u.qlog.user) - 1] = 0;

		// loop through the query per cpu table and send the metrics
		for (hi = apr_hash_first(0, query_cpu_table); hi; hi = apr_hash_next(hi))
		{
			void* vptr;
			apr_hash_this(hi, 0, 0, &vptr);
			pidrec = vptr;

			ppkt->u.qlog.key.tmid = pidrec->query_key.tmid;
			ppkt->u.qlog.key.ssid = pidrec->query_key.ssid;
			ppkt->u.qlog.key.ccnt = pidrec->query_key.ccnt;
			ppkt->u.qlog.cpu_elapsed = pidrec->cpu_elapsed;
			ppkt->u.qlog.p_metrics.cpu_pct = pidrec->p_metrics.cpu_pct;

			TR2(("SEND tmid %d ssid %d ccnt %d (CPU elapsed %d CPU Percent %.2f)\n",
				ppkt->u.qlog.key.tmid, ppkt->u.qlog.key.ssid, ppkt->u.qlog.key.ccnt,
				ppkt->u.qlog.cpu_elapsed, ppkt->u.qlog.p_metrics.cpu_pct));

			send_smon_to_mon_pkt(sock, ppkt);
			count++;
		}

		TR1(("end dump ... sent %d entries\n", count));
	}

	/* get rid of the old pool */
	{
		apr_pool_destroy(oldpool);
	}
	struct timeval tv;
	tv.tv_sec = opt.terminate_timeout;
	tv.tv_usec = 0;
	if (event_add(&gx.tcp_event, &tv)) //reset timeout
        {
		gpmon_warningx(FLINE, APR_FROM_OS_ERROR(errno), "event_add failed");
        }
	return;
}
Beispiel #12
0
static void send_machine_metrics(SOCKET sock)
{
	sigar_mem_t mem;
	sigar_swap_t swap;
	sigar_cpu_t cpu;
	sigar_loadavg_t loadavg;
	sigar_disk_usage_t tdisk;
	sigar_net_interface_stat_t tnet;
	static int first = 1;
	static sigar_cpu_t pcpu = { 0 };
	static sigar_swap_t pswap = { 0 };
	gp_smon_to_mmon_packet_t pkt;
	struct timeval currenttime = { 0 };
	double seconds_duration = 0.0;
	sigar_file_system_usage_t fsusage;
	const char** fsdir;
	const char** netname;
	sigar_net_interface_stat_t netstat;
	int cpu_total_diff;

	/* NIC metrics */
	apr_uint64_t rx_packets = 0;
	apr_uint64_t tx_packets = 0;
	apr_uint64_t rx_bytes = 0;
	apr_uint64_t tx_bytes = 0;

	/* Disk metrics */
	apr_uint64_t reads = 0;
	apr_uint64_t writes = 0;
	apr_uint64_t read_bytes = 0;
	apr_uint64_t write_bytes = 0;

	memset(&mem, 0, sizeof(mem));
	sigar_mem_get(gx.sigar, &mem);
	TR2(("mem ram: %" FMT64 " total: %" FMT64 " used: %" FMT64 " free: %" FMT64 "\n",
		 mem.ram, mem.total, mem.used, mem.free));

	memset(&swap, 0, sizeof(swap));
	sigar_swap_get(gx.sigar, &swap);
	TR2(("swap total: %" FMT64 " used: %" FMT64 "page_in: %" FMT64 " page_out: %" FMT64 "\n",
		 swap.total, swap.used, swap.page_in, swap.page_out));

	memset(&cpu, 0, sizeof(cpu));
	sigar_cpu_get(gx.sigar, &cpu);
	TR2(("cpu user: %" FMT64 " sys: %" FMT64 " idle: %" FMT64 " wait: %" FMT64 " nice: %" FMT64 " total: %" FMT64 "\n",
			cpu.user, cpu.sys, cpu.idle, cpu.wait, cpu.nice, cpu.total));

	memset(&loadavg, 0, sizeof(loadavg));
	sigar_loadavg_get(gx.sigar, &loadavg);
	TR2(("load_avg: %e %e %e\n", loadavg.loadavg[0], loadavg.loadavg[1], loadavg.loadavg[2]));
	memset(&tdisk, 0, sizeof(tdisk));
	memset(&tnet, 0, sizeof(tnet));

	for (fsdir = gx.fslist; *fsdir; fsdir++)
	{
		int e = sigar_file_system_usage_get(gx.sigar, *fsdir, &fsusage);

		if (0 == e)
		{
			disk_device_t* disk = (disk_device_t*)apr_hash_get(disk_devices, *fsdir, APR_HASH_KEY_STRING);
			/* Check if this is a new device */
			if (!disk)
			{
				disk = (disk_device_t*)apr_palloc(gx.pool, sizeof(disk_device_t));
				disk->name = apr_pstrdup(gx.pool, *fsdir);
				disk->read_bytes = disk->write_bytes = disk->reads = disk->writes = 0;
				apr_hash_set(disk_devices, disk->name, APR_HASH_KEY_STRING, disk);
			}
			reads = disk->reads;
			writes = disk->writes;
			read_bytes = disk->read_bytes;
			write_bytes = disk->write_bytes;

			// DISK READS
			reads = metric_diff_calc(fsusage.disk.reads, disk->reads, disk->name, "disk reads");
			disk->reads = fsusage.disk.reads; // old = new

			// DISK WRITES
			writes = metric_diff_calc(fsusage.disk.writes, disk->writes, disk->name, "disk writes");
			disk->writes = fsusage.disk.writes; // old = new

			// WRITE BYTES
			write_bytes = metric_diff_calc(fsusage.disk.write_bytes, disk->write_bytes, disk->name, "disk write bytes");
			disk->write_bytes = fsusage.disk.write_bytes; // old = new

			// READ BYTES
			read_bytes = metric_diff_calc(fsusage.disk.read_bytes, disk->read_bytes, disk->name, "disk read bytes");
			disk->read_bytes = fsusage.disk.read_bytes; // old = new

			tdisk.reads += reads;
			tdisk.writes += writes;
			tdisk.write_bytes += write_bytes;
			tdisk.read_bytes += read_bytes;
		}
	}
	TR2(("disk reads: %" APR_UINT64_T_FMT " writes: %" APR_UINT64_T_FMT
		 " rbytes: %" APR_UINT64_T_FMT " wbytes: %" APR_UINT64_T_FMT "\n",
		 tdisk.reads, tdisk.writes, tdisk.read_bytes, tdisk.write_bytes));

	for (netname = gx.netlist; *netname; netname++)
	{
		int e = sigar_net_interface_stat_get(gx.sigar, *netname, &netstat);

		if (0 == e)
		{
			net_device_t* nic = (net_device_t*)apr_hash_get(net_devices, *netname, APR_HASH_KEY_STRING);

			/* Check if this is a new device */
			if (!nic)
			{
				nic = (net_device_t*)apr_palloc(gx.pool, sizeof(net_device_t));
				nic->name = apr_pstrdup(gx.pool, *netname);
				nic->tx_bytes = nic->rx_bytes = nic->tx_packets = nic->rx_packets = 0;
				apr_hash_set(net_devices, nic->name, APR_HASH_KEY_STRING, nic);
			}

			//////// RECEIVE PACKEtS
			rx_packets = metric_diff_calc(netstat.rx_packets, nic->rx_packets, nic->name, "rx packets");
			nic->rx_packets = netstat.rx_packets; // old = new

			//////// RECEIVE BYTES
			rx_bytes = metric_diff_calc(netstat.rx_bytes, nic->rx_bytes, nic->name, "rx bytes");
			nic->rx_bytes = netstat.rx_bytes; // old = new

			//////// SEND PACKETS
			tx_packets = metric_diff_calc(netstat.tx_packets, nic->tx_packets, nic->name, "tx packets");
			nic->tx_packets = netstat.tx_packets; // old = new

			//////// SEND BYTES
			tx_bytes = metric_diff_calc(netstat.tx_bytes, nic->tx_bytes, nic->name, "tx bytes");
			nic->tx_bytes = netstat.tx_bytes; // old = new

			tnet.rx_packets += rx_packets;
			tnet.rx_bytes += rx_bytes;
			tnet.tx_packets += tx_packets;
			tnet.tx_bytes += tx_bytes;
		}
	}

	TR2(("rx: %" APR_UINT64_T_FMT " rx_bytes: %" APR_UINT64_T_FMT "\n",
					tnet.rx_packets, tnet.rx_bytes));
	TR2(("tx: %" APR_UINT64_T_FMT " tx_bytes: %" APR_UINT64_T_FMT "\n",
					tnet.tx_packets, tnet.tx_bytes));

	if (first)
	{
		pswap = swap, pcpu = cpu;

		/* We want 0s for these metrics on first pass rather
		 * than some possibly huge number that will throw off
		 * the UI graphs.
		 */
		memset(&tdisk, 0, sizeof(tdisk));
		memset(&tnet, 0, sizeof(tnet));
	}
	first = 0;

	gp_smon_to_mmon_set_header(&pkt,GPMON_PKTTYPE_METRICS);

	pkt.u.metrics.mem.total = mem.total;
	pkt.u.metrics.mem.used = mem.used;
	pkt.u.metrics.mem.actual_used = mem.actual_used;
	pkt.u.metrics.mem.actual_free = mem.actual_free;
	pkt.u.metrics.swap.total = swap.total;
	pkt.u.metrics.swap.used = swap.used;
	pkt.u.metrics.swap.page_in = swap.page_in - pswap.page_in;
	pkt.u.metrics.swap.page_out = swap.page_out - pswap.page_out;
	cpu_total_diff = cpu.total - pcpu.total;
	if (cpu_total_diff)
	{
		float cpu_user = calc_diff_percentage(cpu.user, pcpu.user, cpu_total_diff, "cpu.user") + calc_diff_percentage(cpu.nice, pcpu.nice, cpu_total_diff, "cpu.nice");
		float cpu_sys  = calc_diff_percentage(cpu.sys,  pcpu.sys,  cpu_total_diff, "cpu.sys")  + calc_diff_percentage(cpu.wait, pcpu.wait, cpu_total_diff, "cpu.wait");
		float cpu_idle = calc_diff_percentage(cpu.idle, pcpu.idle, cpu_total_diff, "cpu.idle");


		pkt.u.metrics.cpu.user_pct = cpu_user;
		pkt.u.metrics.cpu.sys_pct = cpu_sys;
		pkt.u.metrics.cpu.idle_pct = cpu_idle;
	}
	else
	{
		pkt.u.metrics.cpu.user_pct = 0;
		pkt.u.metrics.cpu.sys_pct = 0;
		pkt.u.metrics.cpu.idle_pct = 0;
	}
	pkt.u.metrics.load_avg.value[0] = (float) loadavg.loadavg[0];
	pkt.u.metrics.load_avg.value[1] = (float) loadavg.loadavg[1];
	pkt.u.metrics.load_avg.value[2] = (float) loadavg.loadavg[2];

	gettimeofday(&currenttime, NULL);
	seconds_duration = subtractTimeOfDay(&g_time_last_reading, &currenttime);

	pkt.u.metrics.disk.ro_rate = (apr_uint64_t)ceil(tdisk.reads/seconds_duration);
	pkt.u.metrics.disk.wo_rate = (apr_uint64_t)ceil(tdisk.writes/seconds_duration);
	pkt.u.metrics.disk.rb_rate = (apr_uint64_t)ceil(tdisk.read_bytes/seconds_duration);
	pkt.u.metrics.disk.wb_rate = (apr_uint64_t)ceil(tdisk.write_bytes/seconds_duration);
	pkt.u.metrics.net.rp_rate = (apr_uint64_t)ceil(tnet.rx_packets/seconds_duration);
	pkt.u.metrics.net.wp_rate = (apr_uint64_t)ceil(tnet.tx_packets/seconds_duration);
	pkt.u.metrics.net.rb_rate = (apr_uint64_t)ceil(tnet.rx_bytes/seconds_duration);
	pkt.u.metrics.net.wb_rate = (apr_uint64_t)ceil(tnet.tx_bytes/seconds_duration);

	g_time_last_reading = currenttime;

	strncpy(pkt.u.metrics.hname, gx.hostname, sizeof(pkt.u.metrics.hname) - 1);
	pkt.u.metrics.hname[sizeof(pkt.u.metrics.hname) - 1] = 0;
	send_smon_to_mon_pkt(sock, &pkt);

	/* save for next time around */
	pswap = swap, pcpu = cpu;
}
Beispiel #13
0
static void get_pid_metrics(apr_int32_t pid, apr_int32_t tmid, apr_int32_t ssid, apr_int32_t ccnt)
{
	apr_int32_t status;
	sigar_proc_cpu_t cpu;
	sigar_proc_mem_t mem;
	sigar_proc_fd_t fd;
	pidrec_t* rec;
	apr_pool_t* pool = apr_hash_pool_get(gx.pidtab);

	rec = apr_hash_get(gx.pidtab, &pid, sizeof(pid));
	if (rec && rec->updated_tick == gx.tick)
		return; /* updated in current cycle */

	memset(&cpu, 0, sizeof(cpu));
	memset(&mem, 0, sizeof(mem));
	memset(&fd, 0, sizeof(fd));

	TR2(("--------------------- starting %d\n", pid));

	if (!rec)
	{
		sigar_proc_exe_t exe;

		/* There might be cases where the pid no longer exist, so we'll just
		 * zero out the memory first before doing anything */
		rec = apr_pcalloc(pool, sizeof(*rec));
		CHECKMEM(rec);

		rec->pid = pid;
		rec->query_key.tmid = tmid;
		rec->query_key.ssid = ssid;
		rec->query_key.ccnt = ccnt;

		rec->pname = rec->cwd = 0;
		if (0 == sigar_proc_exe_get(gx.sigar, pid, &exe))
		{
			rec->pname = apr_pstrdup(pool, exe.name);
			rec->cwd = apr_pstrdup(pool, exe.root);
		}
		if (!rec->pname)
			rec->pname = "unknown";
		if (!rec->cwd)
			rec->cwd = "unknown";

		apr_hash_set(gx.pidtab, &rec->pid, sizeof(rec->pid), rec);
	}

	status = sigar_proc_mem_get(gx.sigar, pid, &mem);
	/* ESRCH is error 3: (No such process) */
	if (status != SIGAR_OK)
	{
		if (status != ESRCH) {
			TR2(("[WARNING] %s. PID: %d\n", sigar_strerror(gx.sigar, status), pid));
		}
		return;
	}

	status = sigar_proc_cpu_get(gx.sigar, pid, &cpu);
	if (status != SIGAR_OK)
	{
		if (status != ESRCH) {
			TR2(("[WARNING] %s. PID: %d\n", sigar_strerror(gx.sigar, status), pid));
		}
		return;
	}

	status = sigar_proc_fd_get(gx.sigar, pid, &fd);
	if (status != SIGAR_OK)
	{
		if (status != ESRCH) {
			TR2(("[WARNING] %s. PID: %d\n", sigar_strerror(gx.sigar, status), pid));
		}
		return;
	}

	rec->updated_tick = gx.tick;
	rec->p_metrics.fd_cnt = (apr_uint32_t) fd.total;
	rec->p_metrics.cpu_pct = (float) (cpu.percent * cpu_cores_utilization_multiplier);
	rec->p_metrics.mem.size = mem.size;
	rec->p_metrics.mem.resident = mem.resident;

#ifdef __linux__
	rec->p_metrics.mem.share = mem.share;
#else
	rec->p_metrics.mem.share = 0;
#endif

	rec->cpu_elapsed = cpu.total;
}
Beispiel #14
0
void gx_main(int port, apr_int64_t signature)
{
	/* set up our log files */
	if (opt.log_dir)
	{
		mkdir(opt.log_dir, S_IRWXU | S_IRWXG);

		if (0 != chdir(opt.log_dir))
		{
			/* Invalid dir for log file, try home dir */
			char *home_dir = NULL;
			if (0 == apr_env_get(&home_dir, "HOME", gx.pool))
			{
				if (home_dir)
					chdir(home_dir);
			}
		}
	}

	update_log_filename();
	freopen(log_filename, "w", stdout);
	setlinebuf(stdout);

	if (!get_and_allocate_hostname())
		gpsmon_fatalx(FLINE, 0, "failed to allocate memory for hostname");
	TR0(("HOSTNAME = '%s'\n", gx.hostname));



	// first chace to write to log file
	TR2(("signature = %" FMT64 "\n", signature));
	TR1(("detected %d cpu cores\n", number_cpu_cores));

	setup_gx(port, signature);
	setup_sigar();
	setup_udp();
	setup_tcp();

	gx.tick = 0;
	for (;;)
	{
		struct timeval tv;
		apr_hash_index_t* hi;

		/* serve events every 2 second */
		gx.tick++;
		gx.now = time(NULL);
		tv.tv_sec = 2;
		tv.tv_usec = 0;

		/* event dispatch blocks for a certain time based on the seconds given
		 * to event_loopexit */
		if (-1 == event_loopexit(&tv))
		{
			gpmon_warningx(FLINE, APR_FROM_OS_ERROR(errno),
					"event_loopexit failed");
		}

		if (-1 == event_dispatch())
		{
			gpsmon_fatalx(FLINE, APR_FROM_OS_ERROR(errno), "event_dispatch failed");
		}

		/* get pid metrics */
		for (hi = apr_hash_first(0, gx.qexectab); hi; hi = apr_hash_next(hi))
		{
            void* vptr;
            gpmon_qexec_t* rec;
            apr_hash_this(hi, 0, 0, &vptr);
            rec = vptr;
            get_pid_metrics(rec->key.hash_key.pid,
                    rec->key.tmid,
                    rec->key.ssid,
                    rec->key.ccnt);
		}

		/* check log size */
		if (gx.tick % 60 == 0)
		{
			apr_finfo_t finfo;
			if (0 == apr_stat(&finfo, log_filename, APR_FINFO_SIZE, gx.pool))
			{
				if (opt.max_log_size != 0 && finfo.size > opt.max_log_size)
				{
					update_log_filename();
					freopen(log_filename, "w", stdout);
					setlinebuf(stdout);
				}
			}
		}
	}
}
Beispiel #15
0
static void setup_sigar(void)
{
	sigar_file_system_list_t sigar_fslist;
	sigar_net_interface_list_t sigar_netlist;
	int i, e, cnt;
	int do_destroy = 0;

	/* initialize sigar */
	if (0 != (e = sigar_open(&gx.sigar)))
	{
		gpsmon_fatalx(FLINE, e, "sigar_open failed");
	}

	TR2(("sigar initialized\n"));
	do_destroy = 1;
	if (0 != sigar_net_interface_list_get(gx.sigar, &sigar_netlist))
	{
		memset(&sigar_netlist, 0, sizeof(sigar_netlist));
		do_destroy = 0;
	}
	gx.netlist = apr_pcalloc(gx.pool, sizeof(const char*) * (1
			+ sigar_netlist.number));
	CHECKMEM(gx.netlist);
	for (i = 0; i < sigar_netlist.number; i++)
	{
		gx.netlist[i] = apr_pstrdup(gx.pool, sigar_netlist.data[i]);
		CHECKMEM(gx.netlist[i]);
		TR2(("sigar net %d: %s\n", i, gx.netlist[i]));
	}
	if (do_destroy)
		sigar_net_interface_list_destroy(gx.sigar, &sigar_netlist);

	do_destroy = 1;
	if (0 != sigar_file_system_list_get(gx.sigar, &sigar_fslist))
	{
		memset(&sigar_fslist, 0, sizeof(sigar_fslist));
		do_destroy = 0;
	}
	cnt = 0;
	TR2(("sigar fsnumber: %d\n", sigar_fslist.number));
	for (i = 0; i < sigar_fslist.number; i++)
	{
		if (sigar_fslist.data[i].type == SIGAR_FSTYPE_LOCAL_DISK)
		{
			TR2(("sigar cnt: %d\n", cnt + 1));
			cnt++;
		}
	}
	gx.fslist = apr_pcalloc(gx.pool, sizeof(const char*) * (cnt + 1));
	CHECKMEM(gx.fslist);
	gx.devlist = apr_pcalloc(gx.pool, sizeof(const char*) * (cnt + 1));
	CHECKMEM(gx.devlist);
	cnt = 0;
	for (i = 0; i < sigar_fslist.number; i++)
	{
		if (sigar_fslist.data[i].type == SIGAR_FSTYPE_LOCAL_DISK)
		{
			gx.fslist[cnt]
					= apr_pstrdup(gx.pool, sigar_fslist.data[i].dir_name);
			CHECKMEM(gx.fslist[cnt]);
			TR2(("fs: %s\n", gx.fslist[cnt]));
			gx.devlist[cnt] = apr_pstrdup(gx.pool,
					sigar_fslist.data[i].dev_name);
			CHECKMEM(gx.devlist[cnt]);
			cnt++;
		}
	}

	cnt = 0;
	for (i = 0; i < sigar_fslist.number; i++)
	{
		if (sigar_fslist.data[i].type == SIGAR_FSTYPE_LOCAL_DISK || sigar_fslist.data[i].type == SIGAR_FSTYPE_NETWORK)
		{
			TR2(("sigar cnt: %d\n", cnt + 1));
			cnt++;
		}
	}
	gx.allfslist = apr_pcalloc(gx.pool, sizeof(const char*) * (cnt + 1));
	CHECKMEM(gx.allfslist);

	cnt = 0;
	for (i = 0; i < sigar_fslist.number; i++)
	{
		if (sigar_fslist.data[i].type == SIGAR_FSTYPE_LOCAL_DISK || sigar_fslist.data[i].type == SIGAR_FSTYPE_NETWORK)
		{
			gx.allfslist[cnt]
					= apr_pstrdup(gx.pool, sigar_fslist.data[i].dir_name);
			CHECKMEM(gx.allfslist[cnt]);
			TR2(("allfs: %s\n", gx.allfslist[cnt]));
			cnt++;
		}
	}

	if (do_destroy)
		sigar_file_system_list_destroy(gx.sigar, &sigar_fslist);
}
Beispiel #16
0
void gpdb_get_hostlist(int* hostcnt, host_t** host_table, apr_pool_t* global_pool, mmon_options_t* opt)
{
	apr_pool_t* pool;
	PGconn* conn = 0;
	PGresult* result = 0;
	int rowcount, i;
	unsigned int unique_hosts = 0;
	apr_hash_t* htab;
	struct hostinfo_holder_t* hostinfo_holder = NULL;
	host_t* hosts = NULL;
	int e;

	// 0 -- hostname, 1 -- address, 2 -- datadir, 3 -- is_master,
	const char *QUERY = "SELECT distinct hostname, address, case when content < 0 then 1 else 0 end as is_master, MAX(fselocation) as datadir FROM pg_filespace_entry "
			    "JOIN gp_segment_configuration on (dbid = fsedbid) WHERE fsefsoid = (select oid from pg_filespace where fsname='pg_system') "
		  	    "GROUP BY (hostname, address, is_master) order by hostname";

	if (0 != (e = apr_pool_create_alloc(&pool, NULL)))
	{
		gpmon_fatalx(FLINE, e, "apr_pool_create_alloc failed");
	}

	const char* errmsg = gpdb_exec(&conn, &result, QUERY);

	TR2((QUERY));
	TR2(("\n"));

	if (errmsg)
	{
		gpmon_warning(FLINE, "GPDB error %s\n\tquery: %s\n", errmsg, QUERY);
	}
	else
	{
		// hash of hostnames to addresses
		htab = apr_hash_make(pool);

		rowcount = PQntuples(result);

		for (i = 0; i < rowcount; i++)
		{
			char* curr_hostname = PQgetvalue(result, i, 0);

			hostinfo_holder = apr_hash_get(htab, curr_hostname, APR_HASH_KEY_STRING);

			if (!hostinfo_holder)
			{
				hostinfo_holder = apr_pcalloc(pool, sizeof(struct hostinfo_holder_t));
				CHECKMEM(hostinfo_holder);

				apr_hash_set(htab, curr_hostname, APR_HASH_KEY_STRING, hostinfo_holder);

				hostinfo_holder->hostname = curr_hostname;
				hostinfo_holder->is_master = atoi(PQgetvalue(result, i, 2));
				hostinfo_holder->datadir = PQgetvalue(result, i, 3);

				// use permenant memory for address list -- stored for duration

				// populate 1st on list and save to head and tail
				hostinfo_holder->addressinfo_head = hostinfo_holder->addressinfo_tail = calloc(1, sizeof(addressinfo_holder_t));
				CHECKMEM(hostinfo_holder->addressinfo_tail);

				// first is the hostname
				hostinfo_holder->addressinfo_tail->address = strdup(hostinfo_holder->hostname);
				CHECKMEM(hostinfo_holder->addressinfo_tail->address);


				// add a 2nd to the list
				hostinfo_holder->addressinfo_tail->next = calloc(1, sizeof(addressinfo_holder_t));
				CHECKMEM(hostinfo_holder->addressinfo_tail);
				hostinfo_holder->addressinfo_tail = hostinfo_holder->addressinfo_tail->next;

				// second is address
				hostinfo_holder->addressinfo_tail->address = strdup(PQgetvalue(result, i, 1));
				CHECKMEM(hostinfo_holder->addressinfo_tail->address);

				// one for hostname one for address
				hostinfo_holder->address_count = 2;
			}
			else
			{
				// permenant memory for address list -- stored for duration
				hostinfo_holder->addressinfo_tail->next = calloc(1, sizeof(addressinfo_holder_t));
				CHECKMEM(hostinfo_holder->addressinfo_tail);

				hostinfo_holder->addressinfo_tail = hostinfo_holder->addressinfo_tail->next;

				// permenant memory for address list -- stored for duration
				hostinfo_holder->addressinfo_tail->address = strdup(PQgetvalue(result, i, 1));
				CHECKMEM(hostinfo_holder->addressinfo_tail->address);

				hostinfo_holder->address_count++;
			}

		}

		// if we have any appliance specific hosts such as hadoop nodes add them to the hash table
		if (get_appliance_hosts_and_add_to_hosts(pool, htab))
		{
			TR0(("Not an appliance: checking for SW Only hadoop hosts.\n"));
			get_hadoop_hosts_and_add_to_hosts(pool, htab, opt); // Not an appliance, so check for SW only hadoop nodes.
		}

		unique_hosts = apr_hash_count(htab);

		// allocate memory for host list (not freed ever)
		hosts = calloc(unique_hosts, sizeof(host_t));

		apr_hash_index_t* hi;
		void* vptr;
		int hostcounter = 0;
		for (hi = apr_hash_first(0, htab); hi; hi = apr_hash_next(hi))
		{
			// sanity check
			if (hostcounter >= unique_hosts)
			{
				gpmon_fatalx(FLINE, 0, "host counter exceeds unique hosts");
			}

			apr_hash_this(hi, 0, 0, &vptr);
			hostinfo_holder = vptr;

			hosts[hostcounter].hostname = strdup(hostinfo_holder->hostname);
			hosts[hostcounter].data_dir = strdup(hostinfo_holder->datadir);
			if (hostinfo_holder->smon_dir)
			{
				hosts[hostcounter].smon_bin_location = strdup(hostinfo_holder->smon_dir);
			}
			hosts[hostcounter].is_master = hostinfo_holder->is_master;
			hosts[hostcounter].addressinfo_head = hostinfo_holder->addressinfo_head;
			hosts[hostcounter].addressinfo_tail = hostinfo_holder->addressinfo_tail;
			hosts[hostcounter].address_count = hostinfo_holder->address_count;
			hosts[hostcounter].connection_hostname.current = hosts[hostcounter].addressinfo_head;
			hosts[hostcounter].snmp_hostname.current = hosts[hostcounter].addressinfo_head;

			if (hostinfo_holder->is_hdm)
				hosts[hostcounter].is_hdm = 1;

			if (hostinfo_holder->is_hdw)
				hosts[hostcounter].is_hdw = 1;

			if (hostinfo_holder->is_etl)
				hosts[hostcounter].is_etl = 1;

			if (hostinfo_holder->is_hbw)
				hosts[hostcounter].is_hbw = 1;

			if (hostinfo_holder->is_hdc)
				hosts[hostcounter].is_hdc = 1;

			apr_thread_mutex_create(&hosts[hostcounter].mutex, APR_THREAD_MUTEX_UNNESTED, global_pool); // use the global pool so the mutexes last beyond this function

			hostcounter++;
		}

		*hostcnt = hostcounter;
	}

	apr_pool_destroy(pool);
	PQclear(result);
	PQfinish(conn);

	if (!hosts || *hostcnt < 1)
	{
		gpmon_fatalx(FLINE, 0, "no valid hosts found");
	}

	*host_table = hosts;
}