static bool gpdb_insert_alert_log() { PGconn* conn = 0; PGresult* result = 0; const char* QRY = "insert into log_alert_history select * from log_alert_tail;"; const char* errmsg; errmsg = gpdb_exec(&conn, &result, QRY); bool success = true; if (errmsg) { gpmon_warningx( FLINE, 0, "---- ARCHIVING HISTORICAL ALERT DATA FAILED ---- on query %s with error %s\n", QRY, errmsg); success = false; } else { TR1(("load completed OK: alert_log\n")); } PQclear(result); PQfinish(conn); return success; }
void khttpd_free(void *mem) { free(mem, M_KHTTPD); #ifdef KHTTPD_TRACE_MALLOC TR1("free %p", mem); #endif }
void gpdb_import_alert_log(apr_pool_t *pool) { // Get alert log files to be imported. apr_array_header_t* tail_files = apr_array_make(pool, 10, sizeof(char*)); apr_array_header_t* success_append_files = apr_array_make(pool, 10, sizeof(char*)); get_alert_log_tail_files(tail_files, pool); // Create or truncate stage file. char *dst_file = apr_pstrcat(pool, GPMON_LOG, "/", GPMON_ALERT_LOG_STAGE, NULL); apr_status_t status = truncate_file(dst_file, pool); if (status != APR_SUCCESS) { gpmon_warningx(FLINE, 0, "failed truncating stage file:%s", dst_file); return; } // Append alert log tail file to stage file void *tail_file = NULL; while ((tail_file = apr_array_pop(tail_files))) { char *filename = *(char**)tail_file; void *success_file_slot = apr_array_push(success_append_files); if (!success_file_slot) { gpmon_warningx( FLINE, 0, "failed appending file:%s to stage file:%s due to out of memory", filename, dst_file); continue; } (*(char**)success_file_slot) = NULL; status = apr_file_append(filename, dst_file, APR_FILE_SOURCE_PERMS, pool); if (status != APR_SUCCESS) { gpmon_warningx(FLINE, status, "failed appending file:%s to stage file:%s", filename, dst_file); continue; } else { (*(char**)success_file_slot) = filename; TR1(("success appending file:%s to stage file:%s\n", filename, dst_file)); } } // Insert tail file to history table. if (gpdb_insert_alert_log()) { // Delete tail file gpdb_remove_success_files(success_append_files, pool); truncate_file(dst_file, pool); } }
// assumes a valid connection already exists static const char* gpdb_exec_only(PGconn* conn, PGresult** pres, const char* query) { PGresult* res = 0; ExecStatusType status; TR1(("Query: %s\n", query)); res = PQexec(conn, query); status = PQresultStatus(res); if (status != PGRES_COMMAND_OK && status != PGRES_TUPLES_OK) return PQerrorMessage(conn); *pres = res; return 0; }
static apr_status_t harvest(const char* tbl, apr_pool_t* pool, PGconn* conN) { PGconn* conn = 0; PGresult* result = 0; const int QRYBUFSIZ = 1792; char qrybuf[QRYBUFSIZ]; const char* QRYFMT = "insert into %s_history select * from _%s_tail;"; const char* errmsg; apr_status_t res = APR_SUCCESS; if (strcmp(tbl, "iterators") == 0) { //this is to leave the cpu percentage out of iterators history const char* ITERQRYFMT = "insert into %s_history (ctime, tmid, ssid, ccnt, segid, pid, nid, pnid, hostname, ntype, nstatus, tstart, " "tduration, pmemsize, pmemmax, memsize, memresid, memshare, cpu_elapsed, cpu_currpct, phase, rows_out, rows_out_est, m0_name, m0_unit, m0_val, " "m0_est, m1_name, m1_unit, m1_val, m1_est, m2_name, m2_unit, m2_val, m2_est, m3_name, m3_unit, m3_val, m3_est, m4_name, m4_unit, " "m4_val, m4_est, m5_name, m5_unit, m5_val, m5_est, m6_name, m6_unit, m6_val, m6_est, m7_name, m7_unit, m7_val, m7_est, m8_name, " "m8_unit, m8_val, m8_est, m9_name, m9_unit, m9_val, m9_est, m10_name, m10_unit, m10_val, m10_est, m11_name, m11_unit, m11_val, " "m11_est, m12_name, m12_unit, m12_val, m12_est, m13_name, m13_unit, m13_val, m13_est, m14_name, m14_unit, m14_val, m14_est, m15_name, " "m15_unit, m15_val, m15_est, t0_name, t0_val) select ctime, tmid, ssid, ccnt, segid, pid, nid, pnid, hostname, ntype, nstatus, tstart, " "tduration, pmemsize, pmemmax, memsize, memresid, memshare, cpu_elapsed, 0, phase, rows_out, rows_out_est, m0_name, m0_unit, m0_val, " "m0_est, m1_name, m1_unit, m1_val, m1_est, m2_name, m2_unit, m2_val, m2_est, m3_name, m3_unit, m3_val, m3_est, m4_name, m4_unit, " "m4_val, m4_est, m5_name, m5_unit, m5_val, m5_est, m6_name, m6_unit, m6_val, m6_est, m7_name, m7_unit, m7_val, m7_est, m8_name, " "m8_unit, m8_val, m8_est, m9_name, m9_unit, m9_val, m9_est, m10_name, m10_unit, m10_val, m10_est, m11_name, m11_unit, m11_val, " "m11_est, m12_name, m12_unit, m12_val, m12_est, m13_name, m13_unit, m13_val, m13_est, m14_name, m14_unit, m14_val, m14_est, " "m15_name, m15_unit, m15_val, m15_est, t0_name, t0_val from _%s_tail;"; snprintf(qrybuf, QRYBUFSIZ, ITERQRYFMT, tbl, tbl); } else { snprintf(qrybuf, QRYBUFSIZ, QRYFMT, tbl, tbl); } errmsg = gpdb_exec(&conn, &result, qrybuf); if (errmsg) { res = 1; gpmon_warningx(FLINE, 0, "---- HARVEST %s FAILED ---- on query %s with error %s\n", tbl, qrybuf, errmsg); } else { TR1(("load completed OK: %s\n", tbl)); } PQclear(result); PQfinish(conn); return res; }
void *khttpd_realloc(void *mem, size_t size) { #ifdef KHTTPD_TRACE_MALLOC struct stack st; #endif void *newmem; newmem = realloc(mem, size, M_KHTTPD, M_WAITOK); #ifdef KHTTPD_TRACE_MALLOC TR1("free %p", mem); TR2("alloc %p %#lx", newmem, size); stack_save(&st); CTRSTACK(KTR_GEN, &st, 8, 0); #endif return (newmem); }
/* append stage data to _tail file */ static apr_status_t append_to_harvest(const char* tbl, apr_pool_t* pool, PGconn* conn) { char srcfn[PATH_MAX]; char dstfn[PATH_MAX]; apr_status_t status; /* make the file names */ snprintf(srcfn, PATH_MAX, "%s%s_stage.dat", GPMON_DIR, tbl); snprintf(dstfn, PATH_MAX, "%s_%s_tail.dat", GPMON_DIR, tbl); status = apr_file_append(srcfn, dstfn, APR_FILE_SOURCE_PERMS, pool); if (status != APR_SUCCESS) { gpmon_warningx(FLINE, status, "harvest failed appending %s to %s", srcfn, dstfn); } else { TR1(("harvest append %s to %s: ok\n", srcfn, dstfn)); } return status; }
/** * This function removes the not null constraint from the segid column so that * we can set it to null when the segment aggregation flag is true */ apr_status_t remove_segid_constraint(void) { PGconn* conn = 0; PGresult* result = 0; const char* ALTERSTR = "alter table iterators_history alter column segid drop not null;"; const char* errmsg; apr_status_t res = APR_SUCCESS; errmsg = gpdb_exec(&conn, &result, ALTERSTR); if (errmsg) { res = 1; gpmon_warningx(FLINE, 0, "---- Alter FAILED ---- on command: %s with error %s\n", ALTERSTR, errmsg); } else { TR1(("remove_segid_constraint: alter completed OK\n")); } PQclear(result); PQfinish(conn); return res; }
apr_status_t gpdb_harvest_healthdata() { PGconn* conn = 0; PGresult* result = 0; const char* QRY = "insert into health_history select * from health_now;"; const char* errmsg; apr_status_t res = APR_SUCCESS; errmsg = gpdb_exec(&conn, &result, QRY); if (errmsg) { res = 1; gpmon_warningx(FLINE, 0, "---- ARCHIVING HISTORICAL HEALTH DATA FAILED ---- on query %s with error %s\n", QRY, errmsg); } else { TR1(("load completed OK: health\n")); } PQclear(result); PQfinish(conn); return res; }
/* rename tail to stage */ static apr_status_t rename_tail_files(const char* tbl, apr_pool_t* pool, PGconn* conn) { char srcfn[PATH_MAX]; char dstfn[PATH_MAX]; apr_status_t status; /* make the file names */ snprintf(srcfn, PATH_MAX, "%s%s_tail.dat", GPMON_DIR, tbl); snprintf(dstfn, PATH_MAX, "%s%s_stage.dat", GPMON_DIR, tbl); status = apr_file_rename(srcfn, dstfn, pool); if (status != APR_SUCCESS) { gpmon_warningx(FLINE, status, "harvest failed renaming %s to %s", srcfn, dstfn); return status; } else { TR1(("harvest rename %s to %s success\n", srcfn, dstfn)); } return status; }
apr_status_t truncate_file(char* fn, apr_pool_t* pool) { apr_file_t *fp = NULL; apr_status_t status; status = apr_file_open(&fp, fn, APR_WRITE|APR_CREATE|APR_TRUNCATE, APR_UREAD|APR_UWRITE, pool); if (status == APR_SUCCESS) { status = apr_file_trunc(fp, 0); apr_file_close(fp); } if (status != APR_SUCCESS) { gpmon_warningx(FLINE, 0, "harvest process truncate file %s failed", fn); } else { TR1(("harvest truncated file %s: ok\n", fn)); } return status; }
static void gx_accept(SOCKET sock, short event, void* arg) { SOCKET nsock; gp_smon_to_mmon_packet_t pkt; struct sockaddr_in a; socklen_t alen = sizeof(a); char* p; char* q; if (event & EV_TIMEOUT) { if (gx.tcp_sock) { /* start watching connect request again */ if (event_add(&gx.listen_event, 0)) { gpsmon_fatal(FLINE, "event_add failed"); } return; } gpmon_fatal(FLINE, "smon terminates due to no requests come after %" FMT64 " seconds\n", opt.terminate_timeout); } if (0 == (event & EV_READ)) return; if (-1 == (nsock = accept(sock, (void*) &a, &alen))) { gpmon_warningx(FLINE, APR_FROM_OS_ERROR(errno), "accept failed"); return; } TR1(("accepted\n")); /* we do this one at a time */ if (gx.tcp_sock) { gpmon_warning(FLINE, "cannot accept new connection before old one dies"); close(nsock); return; } p = (char*) &pkt; q = p + sizeof(pkt); while (p < q) { int n = recv(nsock, p, q - p, 0); if (n == -1) { gpmon_warningx(FLINE, APR_FROM_OS_ERROR(errno), "recv failed"); close(nsock); return; } p += n; } if (0 != gpmon_ntohpkt(pkt.header.magic, pkt.header.version, pkt.header.pkttype)) { close(nsock); return; } if (pkt.header.pkttype != GPMON_PKTTYPE_HELLO) { close(nsock); return; } if (pkt.u.hello.signature != gx.signature) { gx_exit("bad signature... maybe a new gpmmon has started"); } /* echo the hello */ pkt.u.hello.pid = getpid(); TR2(("accepted pkt.magic = %x\n", (int) pkt.header.magic)); send_smon_to_mon_pkt(nsock, &pkt); struct timeval tv; tv.tv_sec = opt.terminate_timeout; tv.tv_usec = 0; event_set(&gx.tcp_event, nsock, EV_READ | EV_PERSIST | EV_TIMEOUT, gx_gettcpcmd, 0); if (event_add(&gx.tcp_event, &tv)) { gpmon_warningx(FLINE, APR_FROM_OS_ERROR(errno), "event_add failed"); close(nsock); return; } gx.tcp_sock = nsock; TR1(("connection established --------------------- \n")); }
static void gx_gettcpcmd(SOCKET sock, short event, void* arg) { char dump; int n, e; apr_pool_t* oldpool; apr_hash_t* qetab; apr_hash_t* qdtab; apr_hash_t* pidtab; apr_hash_t* segtab; if (event & EV_TIMEOUT) // didn't get command from gpmmon, quit { if(gx.tcp_sock) { close(gx.tcp_sock); gx.tcp_sock=0; } return; } apr_hash_t* querysegtab; n = recv(sock, &dump, 1, 0); if (n == 0) gx_exit("peer closed"); if (n == -1) gx_exit("socket error"); if (dump != 'D') gx_exit("bad data"); TR1(("start dump %c\n", dump)); qetab = gx.qexectab; qdtab = gx.qlogtab; pidtab = gx.pidtab; segtab = gx.segmenttab; querysegtab = gx.querysegtab; oldpool = apr_hash_pool_get(qetab); /* make new hashtabs for next cycle */ { apr_pool_t* newpool; if (0 != (e = apr_pool_create_alloc(&newpool, gx.pool))) { gpsmon_fatalx(FLINE, e, "apr_pool_create_alloc failed"); } /* qexec hash table */ gx.qexectab = apr_hash_make(newpool); CHECKMEM(gx.qexectab); /* qlog hash table */ gx.qlogtab = apr_hash_make(newpool); CHECKMEM(gx.qlogtab); /* segment hash table */ gx.segmenttab = apr_hash_make(newpool); CHECKMEM(gx.segmenttab); /* queryseg hash table */ gx.querysegtab = apr_hash_make(newpool); CHECKMEM(gx.querysegtab); /* pidtab hash table */ gx.pidtab = apr_hash_make(newpool); CHECKMEM(gx.pidtab); } /* push out a metric of the machine */ send_machine_metrics(sock); send_fsinfo(sock); /* push out records */ { apr_hash_index_t* hi; gp_smon_to_mmon_packet_t* ppkt = 0; gp_smon_to_mmon_packet_t localPacketObject; pidrec_t* pidrec; int count = 0; apr_hash_t* query_cpu_table = NULL; for (hi = apr_hash_first(0, querysegtab); hi; hi = apr_hash_next(hi)) { void* vptr; apr_hash_this(hi, 0, 0, &vptr); ppkt = vptr; if (ppkt->header.pkttype != GPMON_PKTTYPE_QUERYSEG) continue; TR2(("sending magic %x, pkttype %d\n", ppkt->header.magic, ppkt->header.pkttype)); send_smon_to_mon_pkt(sock, ppkt); count++; } for (hi = apr_hash_first(0, segtab); hi; hi = apr_hash_next(hi)) { void* vptr; apr_hash_this(hi, 0, 0, &vptr); ppkt = vptr; if (ppkt->header.pkttype != GPMON_PKTTYPE_SEGINFO) continue; /* fill in hostname */ strncpy(ppkt->u.seginfo.hostname, gx.hostname, sizeof(ppkt->u.seginfo.hostname) - 1); ppkt->u.seginfo.hostname[sizeof(ppkt->u.seginfo.hostname) - 1] = 0; TR2(("sending magic %x, pkttype %d\n", ppkt->header.magic, ppkt->header.pkttype)); send_smon_to_mon_pkt(sock, ppkt); count++; } for (hi = apr_hash_first(0, qdtab); hi; hi = apr_hash_next(hi)) { void* vptr; apr_hash_this(hi, 0, 0, &vptr); ppkt = vptr; if (ppkt->header.pkttype != GPMON_PKTTYPE_QLOG) continue; TR2(("sending magic %x, pkttype %d\n", ppkt->header.magic, ppkt->header.pkttype)); send_smon_to_mon_pkt(sock, ppkt); count++; } for (hi = apr_hash_first(0, qetab); hi; hi = apr_hash_next(hi)) { gpmon_qexec_t* qexec; void *vptr; apr_hash_this(hi, 0, 0, &vptr); qexec = vptr; /* fill in _p_metrics */ pidrec = apr_hash_get(pidtab, &qexec->key.hash_key.pid, sizeof(qexec->key.hash_key.pid)); if (pidrec) { qexec->_p_metrics = pidrec->p_metrics; qexec->_cpu_elapsed = pidrec->cpu_elapsed; } else { memset(&qexec->_p_metrics, 0, sizeof(qexec->_p_metrics)); } /* fill in _hname */ strncpy(qexec->_hname, gx.hostname, sizeof(qexec->_hname) - 1); qexec->_hname[sizeof(qexec->_hname) - 1] = 0; if (0 == create_qexec_packet(qexec, &localPacketObject)) { break; } TR2(("sending qexec, pkttype %d\n", localPacketObject.header.pkttype)); send_smon_to_mon_pkt(sock, &localPacketObject); count++; } // calculate CPU utilization per query for this machine query_cpu_table = apr_hash_make(oldpool); CHECKMEM(query_cpu_table); // loop through PID's and add to Query CPU Hash Table for (hi = apr_hash_first(0, pidtab); hi; hi = apr_hash_next(hi)) { void* vptr; pidrec_t* lookup; apr_hash_this(hi, 0, 0, &vptr); pidrec = vptr; TR2(("tmid %d ssid %d ccnt %d pid %d (CPU elapsed %d CPU Percent %.2f)\n", pidrec->query_key.tmid, pidrec->query_key.ssid, pidrec->query_key.ccnt, pidrec->pid, pidrec->cpu_elapsed, pidrec->p_metrics.cpu_pct)); // table is keyed on query key lookup = apr_hash_get(query_cpu_table, &pidrec->query_key, sizeof(pidrec->query_key)); if (lookup) { // found other pids with same query key so add the metrics to that lookup->cpu_elapsed += pidrec->cpu_elapsed; lookup->p_metrics.cpu_pct += pidrec->p_metrics.cpu_pct; } else { // insert existing pid record into table keyed by query key apr_hash_set(query_cpu_table, &pidrec->query_key, sizeof(pidrec->query_key), pidrec); } } // reset packet to 0 ppkt = &localPacketObject; memset(ppkt, 0, sizeof(gp_smon_to_mmon_packet_t)); gp_smon_to_mmon_set_header(ppkt,GPMON_PKTTYPE_QUERY_HOST_METRICS); // add the hostname into the packet for DEBUGGING purposes only. This is not used strncpy(ppkt->u.qlog.user, gx.hostname, sizeof(ppkt->u.qlog.user) - 1); ppkt->u.qlog.user[sizeof(ppkt->u.qlog.user) - 1] = 0; // loop through the query per cpu table and send the metrics for (hi = apr_hash_first(0, query_cpu_table); hi; hi = apr_hash_next(hi)) { void* vptr; apr_hash_this(hi, 0, 0, &vptr); pidrec = vptr; ppkt->u.qlog.key.tmid = pidrec->query_key.tmid; ppkt->u.qlog.key.ssid = pidrec->query_key.ssid; ppkt->u.qlog.key.ccnt = pidrec->query_key.ccnt; ppkt->u.qlog.cpu_elapsed = pidrec->cpu_elapsed; ppkt->u.qlog.p_metrics.cpu_pct = pidrec->p_metrics.cpu_pct; TR2(("SEND tmid %d ssid %d ccnt %d (CPU elapsed %d CPU Percent %.2f)\n", ppkt->u.qlog.key.tmid, ppkt->u.qlog.key.ssid, ppkt->u.qlog.key.ccnt, ppkt->u.qlog.cpu_elapsed, ppkt->u.qlog.p_metrics.cpu_pct)); send_smon_to_mon_pkt(sock, ppkt); count++; } TR1(("end dump ... sent %d entries\n", count)); } /* get rid of the old pool */ { apr_pool_destroy(oldpool); } struct timeval tv; tv.tv_sec = opt.terminate_timeout; tv.tv_usec = 0; if (event_add(&gx.tcp_event, &tv)) //reset timeout { gpmon_warningx(FLINE, APR_FROM_OS_ERROR(errno), "event_add failed"); } return; }
void gx_main(int port, apr_int64_t signature) { /* set up our log files */ if (opt.log_dir) { mkdir(opt.log_dir, S_IRWXU | S_IRWXG); if (0 != chdir(opt.log_dir)) { /* Invalid dir for log file, try home dir */ char *home_dir = NULL; if (0 == apr_env_get(&home_dir, "HOME", gx.pool)) { if (home_dir) chdir(home_dir); } } } update_log_filename(); freopen(log_filename, "w", stdout); setlinebuf(stdout); if (!get_and_allocate_hostname()) gpsmon_fatalx(FLINE, 0, "failed to allocate memory for hostname"); TR0(("HOSTNAME = '%s'\n", gx.hostname)); // first chace to write to log file TR2(("signature = %" FMT64 "\n", signature)); TR1(("detected %d cpu cores\n", number_cpu_cores)); setup_gx(port, signature); setup_sigar(); setup_udp(); setup_tcp(); gx.tick = 0; for (;;) { struct timeval tv; apr_hash_index_t* hi; /* serve events every 2 second */ gx.tick++; gx.now = time(NULL); tv.tv_sec = 2; tv.tv_usec = 0; /* event dispatch blocks for a certain time based on the seconds given * to event_loopexit */ if (-1 == event_loopexit(&tv)) { gpmon_warningx(FLINE, APR_FROM_OS_ERROR(errno), "event_loopexit failed"); } if (-1 == event_dispatch()) { gpsmon_fatalx(FLINE, APR_FROM_OS_ERROR(errno), "event_dispatch failed"); } /* get pid metrics */ for (hi = apr_hash_first(0, gx.qexectab); hi; hi = apr_hash_next(hi)) { void* vptr; gpmon_qexec_t* rec; apr_hash_this(hi, 0, 0, &vptr); rec = vptr; get_pid_metrics(rec->key.hash_key.pid, rec->key.tmid, rec->key.ssid, rec->key.ccnt); } /* check log size */ if (gx.tick % 60 == 0) { apr_finfo_t finfo; if (0 == apr_stat(&finfo, log_filename, APR_FINFO_SIZE, gx.pool)) { if (opt.max_log_size != 0 && finfo.size > opt.max_log_size) { update_log_filename(); freopen(log_filename, "w", stdout); setlinebuf(stdout); } } } } }
static void setup_tcp(void) { SOCKET sock = 0; struct addrinfo hints; struct addrinfo *addrs, *rp; int s; char service[32]; /* * we let the system pick the TCP port here so we don't have to * manage port resources ourselves. */ snprintf(service,32,"%d",gx.port); memset(&hints, 0, sizeof(struct addrinfo)); hints.ai_family = AF_UNSPEC; /* Allow IPv4 or IPv6 */ hints.ai_socktype = SOCK_STREAM; /* TCP socket */ hints.ai_flags = AI_PASSIVE; /* For wildcard IP address */ hints.ai_protocol = 0; /* Any protocol */ s = getaddrinfo(NULL, service, &hints, &addrs); if (s != 0) gpsmon_fatalx(FLINE, 0, "getaddrinfo says %s",gai_strerror(s)); /* * getaddrinfo() returns a list of address structures, * one for each valid address and family we can use. * * Try each address until we successfully bind. * If socket (or bind) fails, we (close the socket * and) try the next address. This can happen if * the system supports IPv6, but IPv6 is disabled from * working, or if it supports IPv6 and IPv4 is disabled. */ /* * If there is both an AF_INET6 and an AF_INET choice, * we prefer the AF_INET6, because on UNIX it can receive either * protocol, whereas AF_INET can only get IPv4. Otherwise we'd need * to bind two sockets, one for each protocol. * * Why not just use AF_INET6 in the hints? That works perfect * if we know this machine supports IPv6 and IPv6 is enabled, * but we don't know that. */ #ifdef HAVE_IPV6 if (addrs->ai_family == AF_INET && addrs->ai_next != NULL && addrs->ai_next->ai_family == AF_INET6) { /* * We got both an INET and INET6 possibility, but we want to prefer the INET6 one if it works. * Reverse the order we got from getaddrinfo so that we try things in our preferred order. * If we got more possibilities (other AFs??), I don't think we care about them, so don't * worry if the list is more that two, we just rearrange the first two. */ struct addrinfo *temp = addrs->ai_next; /* second node */ addrs->ai_next = addrs->ai_next->ai_next; /* point old first node to third node if any */ temp->ai_next = addrs; /* point second node to first */ addrs = temp; /* start the list with the old second node */ } #endif for (rp = addrs; rp != NULL; rp = rp->ai_next) { int on = 1; struct linger linger; /* * getaddrinfo gives us all the parameters for the socket() call * as well as the parameters for the bind() call. */ sock = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol); if (sock == -1) continue; setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, (void*) &on, sizeof(on)); setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (void*) &on, sizeof(on)); linger.l_onoff = 1; linger.l_linger = 5; setsockopt(sock, SOL_SOCKET, SO_LINGER, (void*) &linger, sizeof(linger)); if (bind(sock, rp->ai_addr, rp->ai_addrlen) == 0) break; /* Success */ close(sock); } if (rp == NULL) { /* No address succeeded */ gpsmon_fatalx(FLINE, APR_FROM_OS_ERROR(errno), "unable to bind tcp socket"); } freeaddrinfo(addrs); if (-1 == listen(sock, 5)) { gpsmon_fatalx(FLINE, APR_FROM_OS_ERROR(errno), "listen failed"); } /* save it */ gx.listen_sock = sock; TR1(("TCP port %d opened\n", gx.port)); /* set up listen event, and associate with our event_base */ event_set(&gx.listen_event, sock, EV_READ | EV_PERSIST | EV_TIMEOUT, gx_accept, 0); struct timeval accept_timeout; accept_timeout.tv_sec = opt.terminate_timeout; accept_timeout.tv_usec = 0; /* start watching this event */ if (event_add(&gx.listen_event, opt.terminate_timeout ? &accept_timeout : 0)) { gpsmon_fatal(FLINE, "event_add failed"); } }
void process_line_in_hadoop_cluster_info(apr_pool_t* tmp_pool, apr_hash_t* htab, char* line, char* smon_bin_location, char* smon_log_location) { if (!line) { gpmon_warningx(FLINE, 0, "Line in hadoop cluster info file is null, skipping"); return; } char* host; char* category; char primary_hostname[64]; char* location = strchr(line, '#'); if (location) { *location = 0; // remove comments from the line } if (!line) { gpmon_warningx(FLINE, 0, "Line in devices file is null after removing comments, skipping"); return; } // we do these in reverse order so inserting null chars does not prevent finding other tokens if (find_token_in_config_string(line, &category, "Categories")) { return; } location = strchr(category, ','); //remove the comma and extra categories if (location) { *location = 0; } if (find_token_in_config_string(line, &host, "Hostname")) { return; } TR1(("Found hadoop host %s\n",host )); // look for the 3 hadoop host types int monitored_device = 0; int hostType = 0; if (strcmp(category, "hdm") == 0) { monitored_device = 1; hostType = GPMON_HOSTTTYPE_HDM; } if (strcmp(category, "hdw") == 0) { monitored_device = 1; hostType = GPMON_HOSTTTYPE_HDW; } if (strcmp(category, "hdc") == 0) { monitored_device = 1; hostType = GPMON_HOSTTTYPE_HDC; } // The below code is the same as the devices file parsing code // segment host, switch, etc ... we are only adding additional hosts required for performance monitoring if (!monitored_device) { return; } strncpy(primary_hostname, host, sizeof(primary_hostname)); primary_hostname[sizeof(primary_hostname) - 1] = 0; location = strchr(primary_hostname, ','); if (location) { *location = 0; } struct hostinfo_holder_t* hostinfo_holder = apr_hash_get(htab, primary_hostname, APR_HASH_KEY_STRING); if (hostinfo_holder) { gpmon_warningx(FLINE, 0, "Host '%s' is duplicated in clusterinfo.txt", primary_hostname); return; } // OK Lets add this record at this point hostinfo_holder = apr_pcalloc(tmp_pool, sizeof(struct hostinfo_holder_t)); CHECKMEM(hostinfo_holder); apr_hash_set(htab, primary_hostname, APR_HASH_KEY_STRING, hostinfo_holder); initializeHostInfoDataFromFileEntry(tmp_pool, hostinfo_holder, primary_hostname, host, hostType, smon_bin_location, smon_log_location); }