static void convert_tuples_to_hash(PGresult *result, apr_hash_t *hash, apr_pool_t *pool) { int rowcount = PQntuples(result); int i = 0; for (; i < rowcount; i++) { char* sessid = PQgetvalue(result, i, 0); char* query = PQgetvalue(result, i, 1); char *sessid_copy = apr_pstrdup(pool, sessid); char *query_copy = apr_pstrdup(pool, query); if (sessid_copy == NULL || query_copy == NULL) { gpmon_warning(FLINE, "Out of memory"); continue; } apr_hash_set(hash, sessid_copy, APR_HASH_KEY_STRING, query_copy); } }
// Whether log_alert_table needs to be recreated. The order command is // EXECUTE 'cat ...' which would crash if gpdb-alert log files contain invalid // character. We use 'iconv' instead of 'cat' to fix it. // returns true if given table exist and uses 'cat' instead of 'iconv', then should be // recreated. // return false otherwise static bool gpdb_should_recreate_log_alert(PGconn *conn, const char *table_name, const char *encoding, int expected_encoding_num, bool script_exist) { ASSERT(conn); ASSERT(strcasecmp(table_name, "log_alert_tail") == 0 || strcasecmp(table_name, "log_alert_now") == 0); PGresult *result = 0; const char* errmsg = NULL; const char* pattern = "select a.command, a.encoding from pg_exttable a, pg_class b " "where a.reloid = b.oid and b.relname='%s'"; const int QRYBUFSIZ = 2000; char query[QRYBUFSIZ]; snprintf(query, QRYBUFSIZ, pattern, table_name); bool ret = true; if (conn) errmsg = gpdb_exec_only(conn, &result, query); if (errmsg) { gpmon_warning(FLINE, "GPDB error %s\n\tquery: %s\n", errmsg, query); } else { ret = should_recreate_from_result(result, encoding, script_exist, expected_encoding_num); } PQclear(result); return ret; }
static void gx_accept(SOCKET sock, short event, void* arg) { SOCKET nsock; gp_smon_to_mmon_packet_t pkt; struct sockaddr_in a; socklen_t alen = sizeof(a); char* p; char* q; if (event & EV_TIMEOUT) { if (gx.tcp_sock) { /* start watching connect request again */ if (event_add(&gx.listen_event, 0)) { gpsmon_fatal(FLINE, "event_add failed"); } return; } gpmon_fatal(FLINE, "smon terminates due to no requests come after %" FMT64 " seconds\n", opt.terminate_timeout); } if (0 == (event & EV_READ)) return; if (-1 == (nsock = accept(sock, (void*) &a, &alen))) { gpmon_warningx(FLINE, APR_FROM_OS_ERROR(errno), "accept failed"); return; } TR1(("accepted\n")); /* we do this one at a time */ if (gx.tcp_sock) { gpmon_warning(FLINE, "cannot accept new connection before old one dies"); close(nsock); return; } p = (char*) &pkt; q = p + sizeof(pkt); while (p < q) { int n = recv(nsock, p, q - p, 0); if (n == -1) { gpmon_warningx(FLINE, APR_FROM_OS_ERROR(errno), "recv failed"); close(nsock); return; } p += n; } if (0 != gpmon_ntohpkt(pkt.header.magic, pkt.header.version, pkt.header.pkttype)) { close(nsock); return; } if (pkt.header.pkttype != GPMON_PKTTYPE_HELLO) { close(nsock); return; } if (pkt.u.hello.signature != gx.signature) { gx_exit("bad signature... maybe a new gpmmon has started"); } /* echo the hello */ pkt.u.hello.pid = getpid(); TR2(("accepted pkt.magic = %x\n", (int) pkt.header.magic)); send_smon_to_mon_pkt(nsock, &pkt); struct timeval tv; tv.tv_sec = opt.terminate_timeout; tv.tv_usec = 0; event_set(&gx.tcp_event, nsock, EV_READ | EV_PERSIST | EV_TIMEOUT, gx_gettcpcmd, 0); if (event_add(&gx.tcp_event, &tv)) { gpmon_warningx(FLINE, APR_FROM_OS_ERROR(errno), "event_add failed"); close(nsock); return; } gx.tcp_sock = nsock; TR1(("connection established --------------------- \n")); }
void gpdb_get_hostlist(int* hostcnt, host_t** host_table, apr_pool_t* global_pool, mmon_options_t* opt) { apr_pool_t* pool; PGconn* conn = 0; PGresult* result = 0; int rowcount, i; unsigned int unique_hosts = 0; apr_hash_t* htab; struct hostinfo_holder_t* hostinfo_holder = NULL; host_t* hosts = NULL; int e; // 0 -- hostname, 1 -- address, 2 -- datadir, 3 -- is_master, const char *QUERY = "SELECT distinct hostname, address, case when content < 0 then 1 else 0 end as is_master, MAX(fselocation) as datadir FROM pg_filespace_entry " "JOIN gp_segment_configuration on (dbid = fsedbid) WHERE fsefsoid = (select oid from pg_filespace where fsname='pg_system') " "GROUP BY (hostname, address, is_master) order by hostname"; if (0 != (e = apr_pool_create_alloc(&pool, NULL))) { gpmon_fatalx(FLINE, e, "apr_pool_create_alloc failed"); } const char* errmsg = gpdb_exec(&conn, &result, QUERY); TR2((QUERY)); TR2(("\n")); if (errmsg) { gpmon_warning(FLINE, "GPDB error %s\n\tquery: %s\n", errmsg, QUERY); } else { // hash of hostnames to addresses htab = apr_hash_make(pool); rowcount = PQntuples(result); for (i = 0; i < rowcount; i++) { char* curr_hostname = PQgetvalue(result, i, 0); hostinfo_holder = apr_hash_get(htab, curr_hostname, APR_HASH_KEY_STRING); if (!hostinfo_holder) { hostinfo_holder = apr_pcalloc(pool, sizeof(struct hostinfo_holder_t)); CHECKMEM(hostinfo_holder); apr_hash_set(htab, curr_hostname, APR_HASH_KEY_STRING, hostinfo_holder); hostinfo_holder->hostname = curr_hostname; hostinfo_holder->is_master = atoi(PQgetvalue(result, i, 2)); hostinfo_holder->datadir = PQgetvalue(result, i, 3); // use permenant memory for address list -- stored for duration // populate 1st on list and save to head and tail hostinfo_holder->addressinfo_head = hostinfo_holder->addressinfo_tail = calloc(1, sizeof(addressinfo_holder_t)); CHECKMEM(hostinfo_holder->addressinfo_tail); // first is the hostname hostinfo_holder->addressinfo_tail->address = strdup(hostinfo_holder->hostname); CHECKMEM(hostinfo_holder->addressinfo_tail->address); // add a 2nd to the list hostinfo_holder->addressinfo_tail->next = calloc(1, sizeof(addressinfo_holder_t)); CHECKMEM(hostinfo_holder->addressinfo_tail); hostinfo_holder->addressinfo_tail = hostinfo_holder->addressinfo_tail->next; // second is address hostinfo_holder->addressinfo_tail->address = strdup(PQgetvalue(result, i, 1)); CHECKMEM(hostinfo_holder->addressinfo_tail->address); // one for hostname one for address hostinfo_holder->address_count = 2; } else { // permenant memory for address list -- stored for duration hostinfo_holder->addressinfo_tail->next = calloc(1, sizeof(addressinfo_holder_t)); CHECKMEM(hostinfo_holder->addressinfo_tail); hostinfo_holder->addressinfo_tail = hostinfo_holder->addressinfo_tail->next; // permenant memory for address list -- stored for duration hostinfo_holder->addressinfo_tail->address = strdup(PQgetvalue(result, i, 1)); CHECKMEM(hostinfo_holder->addressinfo_tail->address); hostinfo_holder->address_count++; } } // if we have any appliance specific hosts such as hadoop nodes add them to the hash table if (get_appliance_hosts_and_add_to_hosts(pool, htab)) { TR0(("Not an appliance: checking for SW Only hadoop hosts.\n")); get_hadoop_hosts_and_add_to_hosts(pool, htab, opt); // Not an appliance, so check for SW only hadoop nodes. } unique_hosts = apr_hash_count(htab); // allocate memory for host list (not freed ever) hosts = calloc(unique_hosts, sizeof(host_t)); apr_hash_index_t* hi; void* vptr; int hostcounter = 0; for (hi = apr_hash_first(0, htab); hi; hi = apr_hash_next(hi)) { // sanity check if (hostcounter >= unique_hosts) { gpmon_fatalx(FLINE, 0, "host counter exceeds unique hosts"); } apr_hash_this(hi, 0, 0, &vptr); hostinfo_holder = vptr; hosts[hostcounter].hostname = strdup(hostinfo_holder->hostname); hosts[hostcounter].data_dir = strdup(hostinfo_holder->datadir); if (hostinfo_holder->smon_dir) { hosts[hostcounter].smon_bin_location = strdup(hostinfo_holder->smon_dir); } hosts[hostcounter].is_master = hostinfo_holder->is_master; hosts[hostcounter].addressinfo_head = hostinfo_holder->addressinfo_head; hosts[hostcounter].addressinfo_tail = hostinfo_holder->addressinfo_tail; hosts[hostcounter].address_count = hostinfo_holder->address_count; hosts[hostcounter].connection_hostname.current = hosts[hostcounter].addressinfo_head; hosts[hostcounter].snmp_hostname.current = hosts[hostcounter].addressinfo_head; if (hostinfo_holder->is_hdm) hosts[hostcounter].is_hdm = 1; if (hostinfo_holder->is_hdw) hosts[hostcounter].is_hdw = 1; if (hostinfo_holder->is_etl) hosts[hostcounter].is_etl = 1; if (hostinfo_holder->is_hbw) hosts[hostcounter].is_hbw = 1; if (hostinfo_holder->is_hdc) hosts[hostcounter].is_hdc = 1; apr_thread_mutex_create(&hosts[hostcounter].mutex, APR_THREAD_MUTEX_UNNESTED, global_pool); // use the global pool so the mutexes last beyond this function hostcounter++; } *hostcnt = hostcounter; } apr_pool_destroy(pool); PQclear(result); PQfinish(conn); if (!hosts || *hostcnt < 1) { gpmon_fatalx(FLINE, 0, "no valid hosts found"); } *host_table = hosts; }
// to mitigate upgrade hassle. void create_log_alert_table() { PGconn *conn = PQconnectdb(GPDB_CONNECTION_STRING); if (PQstatus(conn) != CONNECTION_OK) { gpmon_warning(FLINE, "error creating gpdb client connection to dynamically " "check/create gpperfmon partitions: %s", PQerrorMessage(conn)); PQfinish(conn); return; } const char *qry= "SELECT tablename FROM pg_tables " "WHERE tablename = 'log_alert_history' " "AND schemaname = 'public' ;"; const bool has_history_table = gpdb_exec_search_for_at_least_one_row(qry, conn); char owner[MAX_OWNER_LENGTH] = {}; bool success_get_owner = gpdb_get_gpperfmon_owner(conn, owner, sizeof(owner)); // log_alert_history: create table if not exist or alter it to use correct // distribution key. if (!has_history_table) { qry = "BEGIN; CREATE TABLE public.log_alert_history (LIKE " "gp_toolkit.__gp_log_master_ext) DISTRIBUTED BY (logtime) " "PARTITION BY range (logtime)(START (date '2010-01-01') " "END (date '2010-02-01') EVERY (interval '1 month')); COMMIT;"; TR0(("sounds like you have just upgraded your database, creating" " newer tables\n")); gpdb_exec_ddl(conn, qry); } else { /* * Upgrade: alter distributed key of log_alert_history from logsegment to logtime */ upgrade_log_alert_table_distributed_key(conn); } // log_alert_now/log_alert_tail: change to use 'gpperfmoncat.sh' from 'iconv/cat' to handle // encoding issue. if (recreate_alert_tables_if_needed(conn, owner)) { if (success_get_owner) { gpdb_change_alert_table_owner(conn, owner); } } else { TR0(("recreate alert_tables failed\n")); } PQfinish(conn); return; }
static apr_status_t check_partition(const char* tbl, apr_pool_t* pool, PGconn* conn, mmon_options_t *opt) { struct tm tm; time_t now; unsigned short year[3]; unsigned char month[3]; TR0(("check partitions on %s_history\n", tbl)); if (!conn) return APR_ENOMEM; now = time(NULL); if (!localtime_r(&now, &tm)) { gpmon_warning(FLINE, "error in check_partition getting current time\n"); return APR_EGENERAL; } year[0] = 1900 + tm.tm_year; month[0] = tm.tm_mon+1; if (year[0] < 1 || month[0] < 1 || year[0] > 2030 || month[0] > 12) { gpmon_warning(FLINE, "invalid current month/year in check_partition %u/%u\n", month, year); return APR_EGENERAL; } if (month[0] < 11) { month[1] = month[0] + 1; month[2] = month[0] + 2; year[1] = year[0]; year[2] = year[0]; } else if (month[0] == 11) { month[1] = 12; month[2] = 1; year[1] = year[0]; year[2] = year[0] + 1; } else { month[1] = 1; month[2] = 2; year[1] = year[0] + 1; year[2] = year[0] + 1; } check_and_add_partition(conn, tbl, year[0], month[0], year[1], month[1]); check_and_add_partition(conn, tbl, year[1], month[1], year[2], month[2]); drop_old_partitions(conn, tbl, opt); TR0(("check partitions on %s_history done\n", tbl)); return APR_SUCCESS; }