static int wd_send_response(int sock, WdPacket * recv_pack) { int rtn = WD_NG; WdInfo * p, *q; WdNodeInfo * node; WdLockInfo * lock; WdPacket send_packet; struct timeval tv; char pack_str[WD_MAX_PACKET_STRING]; int pack_str_len; char hash[(MD5_PASSWD_LEN+1)*2]; bool is_node_packet = false; if (recv_pack == NULL) { return rtn; } memset(&send_packet, 0, sizeof(WdPacket)); p = &(recv_pack->wd_body.wd_info); /* auhtentication */ if (strlen(pool_config->wd_authkey)) { /* calculate hash from packet */ pack_str_len = wd_packet_to_string(*recv_pack, pack_str, sizeof(pack_str)); wd_calc_hash(pack_str, pack_str_len, hash); if (strcmp(recv_pack->hash, hash)) { pool_log("wd_send_response: watchdog authentication failed"); rtn = wd_authentication_failed(sock); return rtn; } } /* set response packet no */ switch (recv_pack->packet_no) { /* add request into the watchdog list */ case WD_ADD_REQ: p = &(recv_pack->wd_body.wd_info); if (wd_set_wd_list(p->hostname,p->pgpool_port, p->wd_port, p->delegate_ip, &(p->tv), p->status) > 0) { send_packet.packet_no = WD_ADD_ACCEPT; } else { send_packet.packet_no = WD_ADD_REJECT; } memcpy(&(send_packet.wd_body.wd_info), WD_MYSELF, sizeof(WdInfo)); break; /* announce candidacy to be the new master */ case WD_STAND_FOR_MASTER: p = &(recv_pack->wd_body.wd_info); wd_set_wd_list(p->hostname,p->pgpool_port, p->wd_port, p->delegate_ip, &(p->tv), p->status); /* check exist master */ if ((q = wd_is_alive_master()) != NULL) { /* vote against the candidate */ send_packet.packet_no = WD_MASTER_EXIST; memcpy(&(send_packet.wd_body.wd_info), q, sizeof(WdInfo)); } else { if (WD_MYSELF->tv.tv_sec <= p->tv.tv_sec ) { memcpy(&tv,&(p->tv),sizeof(struct timeval)); tv.tv_sec += 1; wd_set_myself(&tv, WD_NORMAL); } /* vote for the candidate */ send_packet.packet_no = WD_VOTE_YOU; memcpy(&(send_packet.wd_body.wd_info), WD_MYSELF, sizeof(WdInfo)); } break; /* announce assumption to be the new master */ case WD_DECLARE_NEW_MASTER: p = &(recv_pack->wd_body.wd_info); wd_set_wd_list(p->hostname,p->pgpool_port, p->wd_port, p->delegate_ip, &(p->tv), p->status); if (WD_MYSELF->status == WD_MASTER) { /* resign master server */ pool_log("wd_declare_new_master: ifconfig down to resign master server"); wd_IP_down(); wd_set_myself(NULL, WD_NORMAL); } send_packet.packet_no = WD_READY; memcpy(&(send_packet.wd_body.wd_info), WD_MYSELF, sizeof(WdInfo)); break; /* annouce to assume lock holder */ case WD_STAND_FOR_LOCK_HOLDER: p = &(recv_pack->wd_body.wd_info); wd_set_wd_list(p->hostname,p->pgpool_port, p->wd_port, p->delegate_ip, &(p->tv), p->status); /* only master handles lock holder privilege */ if (WD_MYSELF->status == WD_MASTER) { /* if theare are no lock holder yet */ if (wd_get_lock_holder() != NULL) { send_packet.packet_no = WD_LOCK_HOLDER_EXIST; } } memcpy(&(send_packet.wd_body.wd_info), WD_MYSELF, sizeof(WdInfo)); break; case WD_DECLARE_LOCK_HOLDER: p = &(recv_pack->wd_body.wd_info); wd_set_wd_list(p->hostname,p->pgpool_port, p->wd_port, p->delegate_ip, &(p->tv), p->status); wd_set_lock_holder(p, true); send_packet.packet_no = WD_READY; memcpy(&(send_packet.wd_body.wd_info), WD_MYSELF, sizeof(WdInfo)); break; /* annouce to resigne lock holder */ case WD_RESIGN_LOCK_HOLDER: p = &(recv_pack->wd_body.wd_info); wd_set_wd_list(p->hostname,p->pgpool_port, p->wd_port, p->delegate_ip, &(p->tv), p->status); wd_set_lock_holder(p, false); send_packet.packet_no = WD_READY; memcpy(&(send_packet.wd_body.wd_info), WD_MYSELF, sizeof(WdInfo)); break; case WD_START_INTERLOCK: p = &(recv_pack->wd_body.wd_info); wd_set_wd_list(p->hostname,p->pgpool_port, p->wd_port, p->delegate_ip, &(p->tv), p->status); wd_set_interlocking(p, true); break; case WD_END_INTERLOCK: p = &(recv_pack->wd_body.wd_info); wd_set_wd_list(p->hostname,p->pgpool_port, p->wd_port, p->delegate_ip, &(p->tv), p->status); wd_set_interlocking(p, false); break; /* announce that server is down */ case WD_SERVER_DOWN: p = &(recv_pack->wd_body.wd_info); wd_set_wd_list(p->hostname,p->pgpool_port, p->wd_port, p->delegate_ip, &(p->tv), WD_DOWN); send_packet.packet_no = WD_READY; memcpy(&(send_packet.wd_body.wd_info), WD_MYSELF, sizeof(WdInfo)); if (wd_am_I_oldest() == WD_OK && WD_MYSELF->status != WD_MASTER) { wd_escalation(); } break; /* announce start online recovery */ case WD_START_RECOVERY: if (*InRecovery != RECOVERY_INIT) { send_packet.packet_no = WD_NODE_FAILED; } else { send_packet.packet_no = WD_NODE_READY; *InRecovery = RECOVERY_ONLINE; if (wait_connection_closed() != 0) { send_packet.packet_no = WD_NODE_FAILED; } } break; case WD_END_RECOVERY: send_packet.packet_no = WD_NODE_READY; *InRecovery = RECOVERY_INIT; kill(wd_ppid, SIGUSR2); break; case WD_FAILBACK_REQUEST: node = &(recv_pack->wd_body.wd_node_info); wd_set_node_mask(WD_FAILBACK_REQUEST,node->node_id_set,node->node_num); is_node_packet = true; send_packet.packet_no = WD_NODE_READY; break; case WD_DEGENERATE_BACKEND: node = &(recv_pack->wd_body.wd_node_info); wd_set_node_mask(WD_DEGENERATE_BACKEND,node->node_id_set, node->node_num); is_node_packet = true; send_packet.packet_no = WD_NODE_READY; break; case WD_PROMOTE_BACKEND: node = &(recv_pack->wd_body.wd_node_info); wd_set_node_mask(WD_PROMOTE_BACKEND,node->node_id_set, node->node_num); is_node_packet = true; send_packet.packet_no = WD_NODE_READY; break; case WD_UNLOCK_REQUEST: lock = &(recv_pack->wd_body.wd_lock_info); wd_set_lock(lock->lock_id, false); send_packet.packet_no = WD_LOCK_READY; break; default: send_packet.packet_no = WD_INVALID; memcpy(&(send_packet.wd_body.wd_info), WD_MYSELF, sizeof(WdInfo)); break; } /* send response packet */ rtn = wd_send_packet(sock, &send_packet); /* send node request signal. * wd_node_request_singnal() uses a semaphore lock internally, so should be * called after sending a response pakcet to prevent dead lock. */ if (is_node_packet) wd_node_request_signal(recv_pack->packet_no, node); return rtn; }
pid_t wd_hb_sender(int fork_wait_time, WdHbIf hb_if) { int sock; pid_t pid = 0; WdHbPacket pkt; WdInfo * p = WD_List; char pack_str[WD_MAX_PACKET_STRING]; int pack_str_len; pid = fork(); if (pid != 0) { if (pid == -1) pool_error("wd_hb_sender: fork() failed."); return pid; } if (fork_wait_time > 0) { sleep(fork_wait_time); } myargv = save_ps_display_args(myargc, myargv); POOL_SETMASK(&UnBlockSig); signal(SIGTERM, hb_sender_exit); signal(SIGINT, hb_sender_exit); signal(SIGQUIT, hb_sender_exit); signal(SIGCHLD, SIG_IGN); signal(SIGHUP, SIG_IGN); signal(SIGUSR1, SIG_IGN); signal(SIGUSR2, SIG_IGN); signal(SIGPIPE, SIG_IGN); signal(SIGALRM, SIG_IGN); init_ps_display("", "", "", ""); if ( (sock = wd_create_hb_send_socket(hb_if)) < 0) { pool_error("wd_hb_sender: socket create failed"); hb_sender_exit(SIGTERM); } set_ps_display("heartbeat sender", false); for(;;) { gettimeofday(&pkt.send_time, NULL); strlcpy(pkt.from, pool_config->wd_hostname, sizeof(pkt.from)); pkt.status = p->status; if (strlen(pool_config->wd_authkey)) { /* calculate hash from packet */ pack_str_len = packet_to_string_hb(pkt, pack_str, sizeof(pack_str)); wd_calc_hash(pack_str, pack_str_len, pkt.hash); } wd_hb_send(sock, &pkt, sizeof(pkt), hb_if.addr); pool_debug("wd_hb_sender: send heartbeat signal to %s", hb_if.addr); sleep(pool_config->wd_heartbeat_keepalive); } return pid; }
pid_t wd_hb_receiver(int fork_wait_time, WdHbIf hb_if) { int sock; pid_t pid = 0; WdHbPacket pkt; struct timeval tv; char from[WD_MAX_HOST_NAMELEN]; char buf[(MD5_PASSWD_LEN+1)*2]; char pack_str[WD_MAX_PACKET_STRING]; int pack_str_len; WdInfo * p; pid = fork(); if (pid != 0) { if (pid == -1) pool_error("wd_hb_receiver: fork() failed."); return pid; } if (fork_wait_time > 0) { sleep(fork_wait_time); } myargv = save_ps_display_args(myargc, myargv); POOL_SETMASK(&UnBlockSig); signal(SIGTERM, hb_receiver_exit); signal(SIGINT, hb_receiver_exit); signal(SIGQUIT, hb_receiver_exit); signal(SIGCHLD, SIG_IGN); signal(SIGHUP, SIG_IGN); signal(SIGUSR1, SIG_IGN); signal(SIGUSR2, SIG_IGN); signal(SIGPIPE, SIG_IGN); signal(SIGALRM, SIG_IGN); init_ps_display("", "", "", ""); if ( (sock = wd_create_hb_recv_socket(hb_if)) < 0) { pool_error("wd_hb_receiver: socket create failed"); hb_receiver_exit(SIGTERM); } set_ps_display("heartbeat receiver", false); for(;;) { if (wd_hb_recv(sock, &pkt) == WD_OK) { /* authentication */ if (strlen(pool_config->wd_authkey)) { /* calculate hash from packet */ pack_str_len = packet_to_string_hb(pkt, pack_str, sizeof(pack_str)); wd_calc_hash(pack_str, pack_str_len, buf); if (strcmp(pkt.hash, buf)) { pool_log("wd_hb_receiver: authentication failed"); continue; } } /* get current time */ gettimeofday(&tv, NULL); /* who send this packet? */ strlcpy(from, pkt.from, sizeof(from)); p = WD_List; while (p->status != WD_END) { if (!strcmp(p->hostname, from)) { /* this is the first packet or the latest packet */ if (!WD_TIME_ISSET(p->hb_send_time) || WD_TIME_BEFORE(p->hb_send_time, pkt.send_time)) { pool_debug("wd_hb_receiver: received heartbeat signal from %s", from); p->hb_send_time = pkt.send_time; p->hb_last_recv_time = tv; } break; } p++; } } } return pid; }
static void * wd_thread_negotiation(void * arg) { WdPacketThreadArg * thread_arg; int sock; uintptr_t rtn; WdPacket recv_packet; WdInfo * p; char pack_str[WD_MAX_PACKET_STRING]; int pack_str_len; thread_arg = (WdPacketThreadArg *)arg; sock = thread_arg->sock; gettimeofday(&(thread_arg->packet->send_time), NULL); if (strlen(pool_config->wd_authkey)) { /* calculate hash from packet */ pack_str_len = wd_packet_to_string(thread_arg->packet, pack_str, sizeof(pack_str)); wd_calc_hash(pack_str, pack_str_len, thread_arg->packet->hash); } /* packet send to target watchdog */ rtn = (uintptr_t)wd_send_packet(sock, thread_arg->packet); if (rtn != WD_OK) { close(sock); pthread_exit((void *)rtn); } /* receive response packet */ memset(&recv_packet,0,sizeof(WdPacket)); rtn = (uintptr_t)wd_recv_packet(sock, &recv_packet); if (rtn != WD_OK) { close(sock); pthread_exit((void *)rtn); } rtn = WD_OK; switch (thread_arg->packet->packet_no) { case WD_ADD_REQ: if (recv_packet.packet_no == WD_ADD_ACCEPT) { memcpy(thread_arg->target, &(recv_packet.wd_body.wd_info),sizeof(WdInfo)); } else { rtn = WD_NG; } break; case WD_STAND_FOR_MASTER: if (recv_packet.packet_no == WD_MASTER_EXIST) { p = &(recv_packet.wd_body.wd_info); wd_set_wd_info(p); rtn = WD_NG; } break; case WD_STAND_FOR_LOCK_HOLDER: case WD_DECLARE_LOCK_HOLDER: if (recv_packet.packet_no == WD_LOCK_HOLDER_EXIST) { rtn = WD_NG; } break; case WD_DECLARE_NEW_MASTER: case WD_RESIGN_LOCK_HOLDER: if (recv_packet.packet_no != WD_READY) { rtn = WD_NG; } break; case WD_START_RECOVERY: case WD_FAILBACK_REQUEST: case WD_DEGENERATE_BACKEND: case WD_PROMOTE_BACKEND: rtn = (recv_packet.packet_no == WD_NODE_FAILED) ? WD_NG : WD_OK; break; case WD_UNLOCK_REQUEST: rtn = (recv_packet.packet_no == WD_LOCK_FAILED) ? WD_NG : WD_OK; break; case WD_AUTH_FAILED: pool_log("wd_thread_negotiation: watchdog authentication failed"); rtn = WD_NG; break; default: break; } close(sock); pthread_exit((void *)rtn); }
/* fork heartbeat sender child */ pid_t wd_hb_sender(int fork_wait_time, WdHbIf *hb_if) { int sock; pid_t pid = 0; WdHbPacket pkt; WdInfo * p = WD_List; char pack_str[WD_MAX_PACKET_STRING]; int pack_str_len; sigjmp_buf local_sigjmp_buf; pid = fork(); if (pid != 0) { if (pid == -1) ereport(PANIC, (errmsg("failed to fork a heartbeat sender child"))); return pid; } on_exit_reset(); processType = PT_HB_SENDER; if (fork_wait_time > 0) { sleep(fork_wait_time); } POOL_SETMASK(&UnBlockSig); signal(SIGTERM, hb_sender_exit); signal(SIGINT, hb_sender_exit); signal(SIGQUIT, hb_sender_exit); signal(SIGCHLD, SIG_IGN); signal(SIGHUP, SIG_IGN); signal(SIGUSR1, SIG_IGN); signal(SIGUSR2, SIG_IGN); signal(SIGPIPE, SIG_IGN); signal(SIGALRM, SIG_IGN); init_ps_display("", "", "", ""); /* Create per loop iteration memory context */ ProcessLoopContext = AllocSetContextCreate(TopMemoryContext, "wdhb_sender", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); MemoryContextSwitchTo(TopMemoryContext); sock = wd_create_hb_send_socket(hb_if); set_ps_display("heartbeat sender", false); if (sigsetjmp(local_sigjmp_buf, 1) != 0) { /* Since not using PG_TRY, must reset error stack by hand */ error_context_stack = NULL; EmitErrorReport(); MemoryContextSwitchTo(TopMemoryContext); FlushErrorState(); sleep(pool_config->wd_heartbeat_keepalive); } /* We can now handle ereport(ERROR) */ PG_exception_stack = &local_sigjmp_buf; for(;;) { MemoryContextSwitchTo(ProcessLoopContext); MemoryContextResetAndDeleteChildren(ProcessLoopContext); /* contents of packet */ gettimeofday(&pkt.send_time, NULL); strlcpy(pkt.from, pool_config->wd_hostname, sizeof(pkt.from)); pkt.from_pgpool_port = pool_config->port; pkt.status = p->status; /* authentication key */ if (strlen(pool_config->wd_authkey)) { /* calculate hash from packet */ pack_str_len = packet_to_string_hb(&pkt, pack_str, sizeof(pack_str)); wd_calc_hash(pack_str, pack_str_len, pkt.hash); } /* send heartbeat signal */ wd_hb_send(sock, &pkt, sizeof(pkt), hb_if->addr, hb_if->dest_port); ereport(DEBUG1, (errmsg("watchdog heartbeat: send heartbeat signal to %s:%d", hb_if->addr, hb_if->dest_port))); sleep(pool_config->wd_heartbeat_keepalive); } return pid; }
/* fork heartbeat receiver child */ pid_t wd_hb_receiver(int fork_wait_time, WdHbIf *hb_if) { int sock; pid_t pid = 0; WdHbPacket pkt; struct timeval tv; char from[WD_MAX_HOST_NAMELEN]; int from_pgpool_port; char buf[(MD5_PASSWD_LEN+1)*2]; char pack_str[WD_MAX_PACKET_STRING]; int pack_str_len; sigjmp_buf local_sigjmp_buf; WdInfo * p; pid = fork(); if (pid != 0) { if (pid == -1) ereport(PANIC, (errmsg("failed to fork a heartbeat receiver child"))); return pid; } on_exit_reset(); processType = PT_HB_RECEIVER; if (fork_wait_time > 0) { sleep(fork_wait_time); } POOL_SETMASK(&UnBlockSig); signal(SIGTERM, hb_receiver_exit); signal(SIGINT, hb_receiver_exit); signal(SIGQUIT, hb_receiver_exit); signal(SIGCHLD, SIG_IGN); signal(SIGHUP, SIG_IGN); signal(SIGUSR1, SIG_IGN); signal(SIGUSR2, SIG_IGN); signal(SIGPIPE, SIG_IGN); signal(SIGALRM, SIG_IGN); init_ps_display("", "", "", ""); /* Create per loop iteration memory context */ ProcessLoopContext = AllocSetContextCreate(TopMemoryContext, "wdhb_hb_receiver", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); MemoryContextSwitchTo(TopMemoryContext); sock = wd_create_hb_recv_socket(hb_if); set_ps_display("heartbeat receiver", false); if (sigsetjmp(local_sigjmp_buf, 1) != 0) { /* Since not using PG_TRY, must reset error stack by hand */ error_context_stack = NULL; EmitErrorReport(); MemoryContextSwitchTo(TopMemoryContext); FlushErrorState(); } /* We can now handle ereport(ERROR) */ PG_exception_stack = &local_sigjmp_buf; for(;;) { MemoryContextSwitchTo(ProcessLoopContext); MemoryContextResetAndDeleteChildren(ProcessLoopContext); /* receive heartbeat signal */ wd_hb_recv(sock, &pkt); /* authentication */ if (strlen(pool_config->wd_authkey)) { /* calculate hash from packet */ pack_str_len = packet_to_string_hb(&pkt, pack_str, sizeof(pack_str)); wd_calc_hash(pack_str, pack_str_len, buf); if (strcmp(pkt.hash, buf)) ereport(ERROR, (errmsg("watchdog heartbeat receive"), errdetail("authentication failed"))); } /* get current time */ gettimeofday(&tv, NULL); /* who send this packet? */ strlcpy(from, pkt.from, sizeof(from)); from_pgpool_port = pkt.from_pgpool_port; p = WD_List; while (p->status != WD_END) { if (!strcmp(p->hostname, from) && p->pgpool_port == from_pgpool_port) { /* ignore the packet from down pgpool */ if (pkt.status == WD_DOWN) { ereport(DEBUG1, (errmsg("watchdog heartbeat: received heartbeat signal from \"%s:%d\" whose status is down. ignored", from, from_pgpool_port))); break; } /* this is the first packet or the latest packet */ if (!WD_TIME_ISSET(p->hb_send_time) || WD_TIME_BEFORE(p->hb_send_time, pkt.send_time)) { ereport(DEBUG1, (errmsg("watchdog heartbeat: received heartbeat signal from \"%s:%d\"", from, from_pgpool_port))); p->hb_send_time = pkt.send_time; p->hb_last_recv_time = tv; } else { ereport(DEBUG1, (errmsg("watchdog heartbeat: received heartbeat signal is older than the latest, ignored"))); } break; } p++; } } return pid; }