gfarm_error_t gfm_client_rpc_with_failover( gfarm_error_t (*rpc_op)(struct gfm_connection **, void *), gfarm_error_t (*post_failover_op)(struct gfm_connection *, void *), void (*exit_op)(struct gfm_connection *, gfarm_error_t, void *), int (*must_be_warned_op)(gfarm_error_t, void *), void *closure) { gfarm_error_t e; struct gfm_connection *gfm_server; int nretry = 1, post_nretry = 1; retry: gfm_server = NULL; e = rpc_op(&gfm_server, closure); if (nretry > 0 && gfm_client_connection_should_failover( gfm_server, e)) { if ((e = failover(gfm_server)) != GFARM_ERR_NO_ERROR) { gflog_debug(GFARM_MSG_1003865, "failover: %s", gfarm_error_string(e)); } else if (post_failover_op && (e = post_failover_op(gfm_server, closure)) != GFARM_ERR_NO_ERROR) { gflog_debug(GFARM_MSG_1003866, "post_failover_op: %s", gfarm_error_string(e)); if (gfm_client_is_connection_error(e) && post_nretry > 0) { /* * following cases: * - acquired conneciton in failover() is * created before failover(). * - connection error occurred after failover(). */ post_nretry--; goto retry; } } else { nretry--; goto retry; } } else if (e != GFARM_ERR_NO_ERROR) { gflog_debug(GFARM_MSG_1003867, "gfm_client_rpc_with_failover: rpc_op: %s", gfarm_error_string(e)); if (nretry == 0 && must_be_warned_op && must_be_warned_op(e, closure)) gflog_warning(GFARM_MSG_1003868, "error ocurred at retry for the operation after " "connection to metadb server was failed over, " "so the operation possibly succeeded in the server." " error='%s'", gfarm_error_string(e)); } if (exit_op) exit_op(gfm_server, e, closure); return (e); }
gfarm_error_t gfs_pio_failover(GFS_File gf) { gfarm_error_t e = failover(gf->gfm_server); if (e != GFARM_ERR_NO_ERROR) gf->error = e; return (e); }
int fixup_flags(str* param) { #define PV_DELIM ')' #define FLAG_ERR(_flag_msg_)\ do{\ LM_ERR("Cannot set " #_flag_msg_ " flag\n");\ return -1;\ } while(0); int index, ret=0; for(index=0; index < param->len; index++) { switch (param->s[index]) { case ' ': break; case 'f': case 'F': if (ret & DS_FAILOVER_ON) { FLAG_ERR(failover (F)); } else { ret |= DS_FAILOVER_ON; } break; case 'u': case 'U': if (ret & DS_HASH_USER_ONLY) { FLAG_ERR(hash user (U)); } else { ret |= DS_HASH_USER_ONLY; } break; case 'd': case 'D': if (ret & DS_USE_DEFAULT) { FLAG_ERR(use default (D)); } else { ret |= DS_USE_DEFAULT; } break; case 's': case 'S': if (ret & DS_FORCE_DST) { FLAG_ERR(force dst (S)); } else { ret |= DS_FORCE_DST; } break; default : LM_ERR("Invalid definition\n"); return -1; }
gfarm_error_t gfm_client_connection_failover(struct gfm_connection *gfm_server) { return (failover(gfm_server)); }
/* * pgpool main program */ int main(int argc, char **argv) { int opt; int i; int pid; int size; int retrycnt; int sys_retrycnt; myargc = argc; myargv = argv; snprintf(conf_file, sizeof(conf_file), "%s/%s", DEFAULT_CONFIGDIR, POOL_CONF_FILE_NAME); snprintf(pcp_conf_file, sizeof(pcp_conf_file), "%s/%s", DEFAULT_CONFIGDIR, PCP_PASSWD_FILE_NAME); snprintf(hba_file, sizeof(hba_file), "%s/%s", DEFAULT_CONFIGDIR, HBA_CONF_FILE_NAME); while ((opt = getopt(argc, argv, "a:cdf:F:hm:nv")) != -1) { switch (opt) { case 'a': /* specify hba configuration file */ if (!optarg) { usage(); exit(1); } strncpy(hba_file, optarg, sizeof(hba_file)); break; case 'c': /* clear cache option */ clear_cache = 1; break; case 'd': /* debug option */ debug = 1; break; case 'f': /* specify configuration file */ if (!optarg) { usage(); exit(1); } strncpy(conf_file, optarg, sizeof(conf_file)); break; case 'F': /* specify PCP password file */ if (!optarg) { usage(); exit(1); } strncpy(pcp_conf_file, optarg, sizeof(pcp_conf_file)); break; case 'h': usage(); exit(0); break; case 'm': /* stop mode */ if (!optarg) { usage(); exit(1); } if (*optarg == 's' || !strcmp("smart", optarg)) stop_sig = SIGTERM; /* smart shutdown */ else if (*optarg == 'f' || !strcmp("fast", optarg)) stop_sig = SIGINT; /* fast shutdown */ else if (*optarg == 'i' || !strcmp("immediate", optarg)) stop_sig = SIGQUIT; /* immediate shutdown */ else { usage(); exit(1); } break; case 'n': /* no detaching control ttys */ not_detach = 1; break; case 'v': show_version(); exit(0); default: usage(); exit(1); } } mypid = getpid(); if (pool_init_config()) exit(1); if (pool_get_config(conf_file, INIT_CONFIG)) { pool_error("Unable to get configuration. Exiting..."); exit(1); } if (pool_config->enable_pool_hba) load_hba(hba_file); /* * If a non-switch argument remains, then it should be either "reload" or "stop". */ if (optind == (argc - 1)) { if (!strcmp(argv[optind], "reload")) { pid_t pid; pid = read_pid_file(); if (pid < 0) { pool_error("could not read pid file"); pool_shmem_exit(1); exit(1); } if (kill(pid, SIGHUP) == -1) { pool_error("could not reload configuration file pid: %d. reason: %s", pid, strerror(errno)); pool_shmem_exit(1); exit(1); } pool_shmem_exit(0); exit(0); } if (!strcmp(argv[optind], "stop")) { stop_me(); pool_shmem_exit(0); exit(0); } else { usage(); pool_shmem_exit(1); exit(1); } } /* * else if no non-switch argument remains, then it should be a start request */ else if (optind == argc) { pid = read_pid_file(); if (pid > 0) { if (kill(pid, 0) == 0) { fprintf(stderr, "pid file found. is another pgpool(%d) is running?\n", pid); exit(1); } else fprintf(stderr, "pid file found but it seems bogus. Trying to start pgpool anyway...\n"); } } /* * otherwise an error... */ else { usage(); exit(1); } /* set signal masks */ poolinitmask(); if (not_detach) write_pid_file(); else daemonize(); if (pool_semaphore_create(MAX_NUM_SEMAPHORES)) { pool_error("Unable to create semaphores. Exiting..."); pool_shmem_exit(1); exit(1); } /* * Restore previous backend status if possible */ read_status_file(); /* clear cache */ if (clear_cache && pool_config->enable_query_cache && SYSDB_STATUS == CON_UP) { Interval interval[1]; interval[0].quantity = 0; interval[0].unit = second; pool_clear_cache_by_time(interval, 1); } /* set unix domain socket path */ snprintf(un_addr.sun_path, sizeof(un_addr.sun_path), "%s/.s.PGSQL.%d", pool_config->socket_dir, pool_config->port); /* set up signal handlers */ pool_signal(SIGPIPE, SIG_IGN); /* create unix domain socket */ unix_fd = create_unix_domain_socket(un_addr); /* create inet domain socket if any */ if (pool_config->listen_addresses[0]) { inet_fd = create_inet_domain_socket(pool_config->listen_addresses, pool_config->port); } size = pool_config->num_init_children * pool_config->max_pool * sizeof(ConnectionInfo); con_info = pool_shared_memory_create(size); if (con_info == NULL) { pool_error("failed to allocate connection informations"); myexit(1); } memset(con_info, 0, size); size = pool_config->num_init_children * (sizeof(ProcessInfo)); pids = pool_shared_memory_create(size); if (pids == NULL) { pool_error("failed to allocate pids"); myexit(1); } memset(pids, 0, size); for (i = 0; i < pool_config->num_init_children; i++) { pids[i].connection_info = &con_info[i * pool_config->max_pool]; } /* create fail over/switch over event area */ Req_info = pool_shared_memory_create(sizeof(POOL_REQUEST_INFO)); if (Req_info == NULL) { pool_error("failed to allocate Req_info"); myexit(1); } /* initialize Req_info */ Req_info->kind = NODE_UP_REQUEST; memset(Req_info->node_id, -1, sizeof(int) * MAX_NUM_BACKENDS); Req_info->master_node_id = get_next_master_node(); Req_info->conn_counter = 0; InRecovery = pool_shared_memory_create(sizeof(int)); if (InRecovery == NULL) { pool_error("failed to allocate InRecovery"); myexit(1); } *InRecovery = 0; /* * We need to block signal here. Otherwise child might send some * signals, for example SIGUSR1(fail over). Children will inherit * signal blocking but they do unblock signals at the very beginning * of process. So this is harmless. */ POOL_SETMASK(&BlockSig); /* fork the children */ for (i=0;i<pool_config->num_init_children;i++) { pids[i].pid = fork_a_child(unix_fd, inet_fd, i); pids[i].start_time = time(NULL); } /* set up signal handlers */ pool_signal(SIGTERM, exit_handler); pool_signal(SIGINT, exit_handler); pool_signal(SIGQUIT, exit_handler); pool_signal(SIGCHLD, reap_handler); pool_signal(SIGUSR1, failover_handler); pool_signal(SIGUSR2, wakeup_handler); pool_signal(SIGHUP, reload_config_handler); /* create pipe for delivering event */ if (pipe(pipe_fds) < 0) { pool_error("failed to create pipe"); myexit(1); } pool_log("pgpool successfully started"); /* fork a child for PCP handling */ snprintf(pcp_un_addr.sun_path, sizeof(pcp_un_addr.sun_path), "%s/.s.PGSQL.%d", pool_config->pcp_socket_dir, pool_config->pcp_port); pcp_unix_fd = create_unix_domain_socket(pcp_un_addr); /* maybe change "*" to pool_config->pcp_listen_addresses */ pcp_inet_fd = create_inet_domain_socket("*", pool_config->pcp_port); pcp_pid = pcp_fork_a_child(pcp_unix_fd, pcp_inet_fd, pcp_conf_file); retrycnt = 0; /* reset health check retry counter */ sys_retrycnt = 0; /* reset SystemDB health check retry counter */ /* * This is the main loop */ for (;;) { CHECK_REQUEST; /* do we need health checking for PostgreSQL? */ if (pool_config->health_check_period > 0) { int sts; int sys_sts = 0; unsigned int sleep_time; if (retrycnt == 0) { pool_debug("starting health checking"); } else { pool_debug("retrying %d th health checking", retrycnt); } if (pool_config->health_check_timeout > 0) { /* * set health checker timeout. we want to detect * communication path failure much earlier before * TCP/IP stack detects it. */ pool_signal(SIGALRM, health_check_timer_handler); alarm(pool_config->health_check_timeout); } /* * do actual health check. trying to connect to the backend */ errno = 0; health_check_timer_expired = 0; POOL_SETMASK(&UnBlockSig); sts = health_check(); POOL_SETMASK(&BlockSig); if (pool_config->parallel_mode || pool_config->enable_query_cache) sys_sts = system_db_health_check(); if ((sts > 0 || sys_sts < 0) && (errno != EINTR || (errno == EINTR && health_check_timer_expired))) { if (sts > 0) { sts--; if (!pool_config->parallel_mode) { pool_log("set %d th backend down status", sts); Req_info->kind = NODE_DOWN_REQUEST; Req_info->node_id[0] = sts; failover(); /* need to distribute this info to children */ } else { retrycnt++; pool_signal(SIGALRM, SIG_IGN); /* Cancel timer */ if (retrycnt > NUM_BACKENDS) { /* retry count over */ pool_log("set %d th backend down status", sts); Req_info->kind = NODE_DOWN_REQUEST; Req_info->node_id[0] = sts; failover(); retrycnt = 0; } else { /* continue to retry */ sleep_time = pool_config->health_check_period/NUM_BACKENDS; pool_debug("retry sleep time: %d seconds", sleep_time); pool_sleep(sleep_time); continue; } } } if (sys_sts < 0) { sys_retrycnt++; pool_signal(SIGALRM, SIG_IGN); if (sys_retrycnt > NUM_BACKENDS) { pool_log("set SystemDB down status"); SYSDB_STATUS = CON_DOWN; sys_retrycnt = 0; } else if (sts == 0) /* goes to sleep only when SystemDB alone was down */ { sleep_time = pool_config->health_check_period/NUM_BACKENDS; pool_debug("retry sleep time: %d seconds", sleep_time); pool_sleep(sleep_time); continue; } } } if (pool_config->health_check_timeout > 0) { /* seems ok. cancel health check timer */ pool_signal(SIGALRM, SIG_IGN); } sleep_time = pool_config->health_check_period; pool_sleep(sleep_time); } else { for (;;) { int r; struct timeval t = {3, 0}; POOL_SETMASK(&UnBlockSig); r = pool_pause(&t); POOL_SETMASK(&BlockSig); if (r > 0) break; } } } pool_shmem_exit(0); }
/* Called under bonding locks (bond_mii_monitor) */ int t3_failover(struct toedev *tdev, struct net_device *bond_dev, struct net_device *slave_dev, int event, struct net_device *last_dev) { struct bonding *bond = (struct bonding *)netdev_priv(bond_dev); int active_ports = 0; struct port *port; int if_port; /* differentiate 4 ports and 2 ports adapters */ if (tdev->nlldev > 2) { four_ports_failover(tdev, bond_dev, slave_dev, event); return 0; } /* Last slave removed. Map the event to a complete release */ if (event == TOE_RELEASE && bond->slave_cnt == 1) event = TOE_RELEASE_ALL; switch (bond->params.mode) { case BOND_MODE_ACTIVEBACKUP: if (event == TOE_ACTIVE_SLAVE) { if (!slave_dev || bond->slave_cnt == 1) tdev->ctl(tdev, FAILOVER_CLEAR, NULL); else { if_port = lookup_port(slave_dev); tdev->ctl(tdev, FAILOVER, &if_port); } } else if (event == TOE_RELEASE_ALL) tdev->ctl(tdev, FAILOVER_CLEAR, NULL); break; case BOND_MODE_8023AD: if (event == TOE_ACTIVE_SLAVE) return 0; for (port = toe_bond_get_first_port(bond); port; port = toe_bond_get_next_port(port)) active_ports += (port->slave->state == BOND_STATE_ACTIVE); /* One port enslaved only. Ignore failover events */ if (bond->slave_cnt == 1) return 0; /* No more active port */ if ((event == TOE_LINK_DOWN || event == TOE_RELEASE) && !active_ports) { tdev->ctl(tdev, FAILOVER_CLEAR, NULL); return 0; } /* Dead port back alive in a already active bond device */ if (event == TOE_LINK_UP && active_ports > 1) { if_port = lookup_port(slave_dev); tdev->ctl(tdev, FAILOVER_DONE, &if_port); return 0; } /* fall through */ case BOND_MODE_XOR: /* One port enslaved only. Ignore failover events */ if (bond->slave_cnt == 1) return 0; failover(tdev, bond_dev, slave_dev, event); } return 0; }