int gtmsource_mode_change(int to_mode) { uint4 savepid; int exit_status; int status, detach_status, remove_status; sgmnt_addrs *repl_csa; assert(holds_sem[SOURCE][JNL_POOL_ACCESS_SEM]); repl_log(stdout, TRUE, TRUE, "Initiating %s operation on source server pid [%d] for secondary instance [%s]\n", (GTMSOURCE_MODE_ACTIVE == to_mode) ? "ACTIVATE" : "DEACTIVATE", jnlpool.gtmsource_local->gtmsource_pid, jnlpool.gtmsource_local->secondary_instname); if (jnlpool.gtmsource_local->mode == to_mode) { repl_log(stderr, FALSE, TRUE, "Source Server already %s, not changing mode\n", (to_mode == GTMSOURCE_MODE_ACTIVE) ? "ACTIVE" : "PASSIVE"); return (ABNORMAL_SHUTDOWN); } assert(ROOTPRIMARY_UNSPECIFIED != gtmsource_options.rootprimary); if ((GTMSOURCE_MODE_ACTIVE == to_mode) && (ROOTPRIMARY_SPECIFIED == gtmsource_options.rootprimary) && jnlpool.jnlpool_ctl->upd_disabled) { /* ACTIVATE is specified with ROOTPRIMARY on a journal pool that was created with PROPAGATEPRIMARY. This is a * case of transition from propagating primary to root primary. Enable updates in this journal pool and append * a histinfo record to the replication instance file. The function "gtmsource_rootprimary_init" does just that. */ gtmsource_rootprimary_init(jnlpool.jnlpool_ctl->jnl_seqno); } DEBUG_ONLY(repl_csa = &FILE_INFO(jnlpool.jnlpool_dummy_reg)->s_addrs;)
int gtmsource_mode_change(int to_mode) { uint4 savepid; int exit_status; int status, detach_status, remove_status; int log_fd = 0, close_status = 0; char* err_code; int save_errno; sgmnt_addrs *repl_csa; assert(holds_sem[SOURCE][JNL_POOL_ACCESS_SEM]); repl_log(stdout, TRUE, TRUE, "Initiating %s operation on source server pid [%d] for secondary instance [%s]\n", (GTMSOURCE_MODE_ACTIVE_REQUESTED == to_mode) ? "ACTIVATE" : "DEACTIVATE", jnlpool.gtmsource_local->gtmsource_pid, jnlpool.gtmsource_local->secondary_instname); if ((jnlpool.gtmsource_local->mode == GTMSOURCE_MODE_ACTIVE_REQUESTED) || (jnlpool.gtmsource_local->mode == GTMSOURCE_MODE_PASSIVE_REQUESTED)) { repl_log(stderr, FALSE, TRUE, "Source Server %s already requested, not changing mode\n", (to_mode == GTMSOURCE_MODE_ACTIVE_REQUESTED) ? "ACTIVATE" : "DEACTIVATE"); return (ABNORMAL_SHUTDOWN); } if (((GTMSOURCE_MODE_ACTIVE == jnlpool.gtmsource_local->mode) && (GTMSOURCE_MODE_ACTIVE_REQUESTED == to_mode)) || ((GTMSOURCE_MODE_PASSIVE == jnlpool.gtmsource_local->mode) && (GTMSOURCE_MODE_PASSIVE_REQUESTED == to_mode))) { repl_log(stderr, FALSE, TRUE, "Source Server already %s, not changing mode\n", (to_mode == GTMSOURCE_MODE_ACTIVE_REQUESTED) ? "ACTIVE" : "PASSIVE"); return (ABNORMAL_SHUTDOWN); } assert(ROOTPRIMARY_UNSPECIFIED != gtmsource_options.rootprimary); /*check if the new log file is writable*/ if ('\0' != gtmsource_options.log_file[0] && 0 != STRCMP(jnlpool.gtmsource_local->log_file, gtmsource_options.log_file)) { OPENFILE3(gtmsource_options.log_file, O_RDWR | O_CREAT | O_APPEND, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH, log_fd); if (log_fd < 0) { save_errno = ERRNO; err_code = STRERROR(save_errno); gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(8) ERR_REPLLOGOPN, 6, LEN_AND_STR(gtmsource_options.log_file), LEN_AND_STR(err_code), LEN_AND_STR(NULL_DEVICE)); return (ABNORMAL_SHUTDOWN); } CLOSEFILE_IF_OPEN(log_fd, close_status); assert(close_status==0); } if ((GTMSOURCE_MODE_ACTIVE_REQUESTED == to_mode) && (ROOTPRIMARY_SPECIFIED == gtmsource_options.rootprimary) && jnlpool.jnlpool_ctl->upd_disabled) { /* ACTIVATE is specified with ROOTPRIMARY on a journal pool that was created with PROPAGATEPRIMARY. This is a * case of transition from propagating primary to root primary. Enable updates in this journal pool and append * a histinfo record to the replication instance file. The function "gtmsource_rootprimary_init" does just that. */ gtmsource_rootprimary_init(jnlpool.jnlpool_ctl->jnl_seqno); } DEBUG_ONLY(repl_csa = &FILE_INFO(jnlpool.jnlpool_dummy_reg)->s_addrs;)
int gtmsource_changelog(void) { uint4 changelog_desired = 0, changelog_accepted = 0; int log_fd = 0; /*used to indicate whether the new specified log file is writable*/ int close_status = 0; /*used to indicate if log file is successfully closed*/ char* err_code; int save_errno; assert(holds_sem[SOURCE][JNL_POOL_ACCESS_SEM]); repl_log(stderr, TRUE, TRUE, "Initiating CHANGELOG operation on source server pid [%d] for secondary instance [%s]\n", jnlpool.gtmsource_local->gtmsource_pid, jnlpool.gtmsource_local->secondary_instname); if (0 != jnlpool.gtmsource_local->changelog) { util_out_print("Change log is already in progress. Not initiating change in log file or log interval", TRUE); return (ABNORMAL_SHUTDOWN); } if ('\0' != gtmsource_options.log_file[0]) /* trigger change in log file */ { changelog_desired |= REPLIC_CHANGE_LOGFILE; if (0 != STRCMP(jnlpool.gtmsource_local->log_file, gtmsource_options.log_file)) { /*check if the new log file is writable*/ OPENFILE3_CLOEXEC(gtmsource_options.log_file, O_RDWR | O_CREAT | O_APPEND, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH, log_fd); if (log_fd < 0) { save_errno = ERRNO; err_code = STRERROR(save_errno); gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(8) ERR_REPLLOGOPN, 6, LEN_AND_STR(gtmsource_options.log_file), LEN_AND_STR(err_code), LEN_AND_STR(NULL_DEVICE)); } else { CLOSEFILE_IF_OPEN(log_fd, close_status); assert(close_status==0); changelog_accepted |= REPLIC_CHANGE_LOGFILE; STRCPY(jnlpool.gtmsource_local->log_file, gtmsource_options.log_file); util_out_print("Change log initiated with file !AD", TRUE, LEN_AND_STR(gtmsource_options.log_file)); } } else util_out_print("Log file is already !AD. Not initiating change in log file", TRUE, LEN_AND_STR(gtmsource_options.log_file)); } if (0 != gtmsource_options.src_log_interval) /* trigger change in log interval */ { changelog_desired |= REPLIC_CHANGE_LOGINTERVAL; if (gtmsource_options.src_log_interval != jnlpool.gtmsource_local->log_interval) { changelog_accepted |= REPLIC_CHANGE_LOGINTERVAL; jnlpool.gtmsource_local->log_interval = gtmsource_options.src_log_interval; util_out_print("Change log initiated with interval !UL", TRUE, gtmsource_options.src_log_interval); } else util_out_print("Log interval is already !UL. Not initiating change in log interval", TRUE, gtmsource_options.src_log_interval); } if (0 != changelog_accepted) jnlpool.gtmsource_local->changelog = changelog_accepted; else util_out_print("No change to log file or log interval", TRUE); return ((0 != changelog_accepted && changelog_accepted == changelog_desired) ? NORMAL_SHUTDOWN : ABNORMAL_SHUTDOWN); }
int updhelper_reader(void) { uint4 pre_read_offset; int lcnt; boolean_t continue_reading; call_on_signal = updhelper_reader_sigstop; updhelper_init(UPD_HELPER_READER); repl_log(updhelper_log_fp, TRUE, TRUE, "Helper reader started. PID %d [0x%X]\n", process_id, process_id); GVKEY_INIT(gv_currkey, gv_keysize); GVKEY_INIT(gv_altkey, gv_keysize); last_pre_read_offset = 0; continue_reading = TRUE; do { REPL_DPRINT1("Will wait\n"); for (lcnt = 1; continue_reading && last_pre_read_offset == (pre_read_offset = recvpool.upd_helper_ctl->pre_read_offset); ) { SHORT_SLEEP(lcnt); if (lcnt <= MAX_LCNT) lcnt++; continue_reading = NO_SHUTDOWN == helper_entry->helper_shutdown; } last_pre_read_offset = pre_read_offset; } while (continue_reading && updproc_preread()); updhelper_reader_end(); return SS_NORMAL; }
static void updproc_stop(boolean_t exit) { int4 status; int fclose_res; call_on_signal = NULL; /* Don't reenter on error */ if (pool_init) { rel_lock(jnlpool.jnlpool_dummy_reg); /* nullify jnlpool_ctl before detaching from jnlpool since if it is the other way, we might be interrupted * by the periodic timer routines and end up in jnl_write_epoch_rec() routine that dereferences jnlpool_ctl * since it is non-NULL although it has been detached from and is no longer valid memory. */ jnlpool_ctl = NULL; #ifdef UNIX mutex_cleanup(jnlpool.jnlpool_dummy_reg); SHMDT(jnlpool.jnlpool_ctl); #elif defined(VMS) if (SS$_NORMAL != (status = detach_shm(jnlpool.shm_range))) repl_log(stderr, TRUE, TRUE, "Error detaching from jnlpool : %s\n", REPL_STR_ERROR); if (SS$_NORMAL != (status = signoff_from_gsec(jnlpool.shm_lockid))) repl_log(stderr, TRUE, TRUE, "Error dequeueing lock on jnlpool global section : %s\n", REPL_STR_ERROR); #else #error Unsupported Platform #endif jnlpool.jnlpool_ctl = NULL; pool_init = FALSE; } recvpool.upd_proc_local->upd_proc_shutdown = NORMAL_SHUTDOWN; recvpool.upd_proc_local->upd_proc_pid = 0; #ifdef UNIX SHMDT(recvpool.recvpool_ctl); #elif defined(VMS) if(SS$_NORMAL != (status = detach_shm(recvpool.shm_range))) repl_log(stderr, TRUE, TRUE, "Update process could not detach from recvpool : %s\n", REPL_STR_ERROR); if (SS$_NORMAL != (status = signoff_from_gsec(recvpool.shm_lockid))) repl_log(stderr, TRUE, TRUE, "Error dequeueing lock on recvpool global section : %s\n", REPL_STR_ERROR); #else #error Unsupported Platform #endif recvpool.recvpool_ctl = NULL; gtm_event_log_close(); if (exit) mupip_exit(SS_NORMAL); return; }
int gtmsource_needrestart(void) { gtmsource_local_ptr_t gtmsource_local; sgmnt_addrs *repl_csa; error_def(ERR_MUPCLIERR); assert(holds_sem[SOURCE][JNL_POOL_ACCESS_SEM]); gtmsource_local = jnlpool.gtmsource_local; if (NULL != gtmsource_local) { assert(!STRCMP(gtmsource_options.secondary_instname, gtmsource_local->secondary_instname)); repl_log(stderr, TRUE, TRUE, "Initiating NEEDRESTART operation on source server pid [%d] for secondary instance [%s]\n", gtmsource_local->gtmsource_pid, gtmsource_options.secondary_instname); } else repl_log(stderr, TRUE, TRUE, "Initiating NEEDRESTART operation for secondary instance [%s]\n", gtmsource_options.secondary_instname); DEBUG_ONLY(repl_csa = &FILE_INFO(jnlpool.jnlpool_dummy_reg)->s_addrs;)
int gtmrecv_endupd(void) { pid_t savepid; int exit_status; pid_t waitpid_res; repl_log(stdout, TRUE, TRUE, "Initiating shut down of Update Process\n"); recvpool.upd_proc_local->upd_proc_shutdown = SHUTDOWN; /* Wait for update process to shut down */ while((SHUTDOWN == recvpool.upd_proc_local->upd_proc_shutdown) && (0 < (savepid = (pid_t)recvpool.upd_proc_local->upd_proc_pid)) && is_proc_alive(savepid, 0)) { SHORT_SLEEP(GTMRECV_WAIT_FOR_UPD_SHUTDOWN); WAITPID(savepid, &exit_status, WNOHANG, waitpid_res); /* Release defunct update process if dead */ } exit_status = recvpool.upd_proc_local->upd_proc_shutdown; if (SHUTDOWN == exit_status) { if (0 == savepid) /* No Update Process */ exit_status = NORMAL_SHUTDOWN; else /* Update Process Crashed */ { repl_log(stderr, TRUE, TRUE, "Update Process exited abnormally, INTEGRITY CHECK might be warranted\n"); exit_status = ABNORMAL_SHUTDOWN; } } /* Wait for the Update Process to detach */ if (0 == grab_sem(RECV, UPD_PROC_COUNT_SEM)) { if(0 != (errno = rel_sem(RECV, UPD_PROC_COUNT_SEM))) repl_log(stderr, TRUE, TRUE, "Error releasing the Update Process Count semaphore : %s\n", REPL_SEM_ERROR); repl_log(stdout, TRUE, TRUE, "Update Process exited\n"); } else { repl_log(stderr, TRUE, TRUE, "Error in update proc count semaphore : %s\n", REPL_SEM_ERROR); exit_status = ABNORMAL_SHUTDOWN; } return (exit_status); }
int gtmsource_losttncomplete(void) { int idx; gtmsource_local_ptr_t gtmsourcelocal_ptr; sgmnt_addrs *repl_csa; assert(holds_sem[SOURCE][JNL_POOL_ACCESS_SEM]); assert(NULL == jnlpool.gtmsource_local); repl_log(stderr, TRUE, TRUE, "Initiating LOSTTNCOMPLETE operation on instance [%s]\n", jnlpool.repl_inst_filehdr->inst_info.this_instname); /* If this is a root primary instance, propagate this information to secondaries as well so they reset zqgblmod_seqno to 0. * If propagating primary, no need to send this to tertiaries as the receiver on the tertiary cannot have started with * non-zero "zqgblmod_seqno" to begin with (PRIMARYNOTROOT error would have been issued). */ if (!jnlpool.jnlpool_ctl->upd_disabled) { DEBUG_ONLY(repl_csa = &FILE_INFO(jnlpool.jnlpool_dummy_reg)->s_addrs;)
int gtmsource_statslog(void) { assert(holds_sem[SOURCE][JNL_POOL_ACCESS_SEM]); repl_log(stderr, TRUE, TRUE, "Initiating STATSLOG operation on source server pid [%d] for secondary instance [%s]\n", jnlpool.gtmsource_local->gtmsource_pid, jnlpool.gtmsource_local->secondary_instname); if (gtmsource_options.statslog == jnlpool.gtmsource_local->statslog) { util_out_print("STATSLOG is already !AD. Not initiating change in stats log", TRUE, gtmsource_options.statslog ? strlen("ON") : strlen("OFF"), gtmsource_options.statslog ? "ON" : "OFF"); return (ABNORMAL_SHUTDOWN); } if (!gtmsource_options.statslog) { jnlpool.gtmsource_local->statslog = FALSE; jnlpool.gtmsource_local->statslog_file[0] = '\0'; util_out_print("STATSLOG turned OFF", TRUE); return (NORMAL_SHUTDOWN); } jnlpool.gtmsource_local->statslog = TRUE; util_out_print("Stats log turned on", TRUE); return (NORMAL_SHUTDOWN); }
int gtmsource_losttncomplete(void) { int idx; gtmsource_local_ptr_t gtmsourcelocal_ptr; error_def(ERR_MUPCLIERR); error_def(ERR_TEXT); assert(holds_sem[SOURCE][JNL_POOL_ACCESS_SEM]); /* We dont need the access control semaphore here. So release it first and avoid any potential deadlocks. */ if (0 != rel_sem(SOURCE, JNL_POOL_ACCESS_SEM)) rts_error(VARLSTCNT(5) ERR_TEXT, 2, RTS_ERROR_LITERAL("Error in source server losttncomplete rel_sem"), REPL_SEM_ERRNO); assert(NULL == jnlpool.gtmsource_local); repl_log(stderr, TRUE, TRUE, "Initiating LOSTTNCOMPLETE operation on instance [%s]\n", jnlpool.repl_inst_filehdr->this_instname); /* If this is a root primary instance, propagate this information to secondaries as well so they reset zqgblmod_seqno to 0. * If propagating primary, no need to send this to tertiaries as the receiver on the tertiary cannot have started with * non-zero "zqgblmod_seqno" to begin with (PRIMARYNOTROOT error would have been issued). */ if (!jnlpool.jnlpool_ctl->upd_disabled) { grab_lock(jnlpool.jnlpool_dummy_reg); jnlpool.jnlpool_ctl->send_losttn_complete = TRUE; gtmsourcelocal_ptr = jnlpool.gtmsource_local_array; for (idx = 0; idx < NUM_GTMSRC_LCL; idx++, gtmsourcelocal_ptr++) { if (('\0' == gtmsourcelocal_ptr->secondary_instname[0]) && (0 == gtmsourcelocal_ptr->read_jnl_seqno) && (0 == gtmsourcelocal_ptr->connect_jnl_seqno)) continue; gtmsourcelocal_ptr->send_losttn_complete = TRUE; } rel_lock(jnlpool.jnlpool_dummy_reg); } /* Reset zqgblmod_seqno and zqgblmod_tn to 0 in this instance as well */ repl_inst_reset_zqgblmod_seqno_and_tn(); return (NORMAL_SHUTDOWN); }
void mupip_exit_handler(void) { char err_log[1024]; FILE *fp; boolean_t files_closed = TRUE; if (exit_handler_active) /* Don't recurse if exit handler exited */ return; exit_handler_active = TRUE; SET_PROCESS_EXITING_TRUE; if (jgbl.mupip_journal) { files_closed = mur_close_files(); mupip_jnl_recover = FALSE; } jgbl.dont_reset_gbl_jrec_time = jgbl.forw_phase_recovery = FALSE; CANCEL_TIMERS; /* Cancel all unsafe timers - No unpleasant surprises */ secshr_db_clnup(NORMAL_TERMINATION); if (dollar_tlevel) OP_TROLLBACK(0); if (is_updhelper && NULL != helper_entry) /* haven't had a chance to cleanup, must be an abnormal exit */ { helper_entry->helper_shutdown = ABNORMAL_SHUTDOWN; helper_entry->helper_pid = 0; /* vacate my slot */ helper_entry = NULL; } if (recvpool.recvpool_ctl) { SHMDT(recvpool.recvpool_ctl); recvpool.recvpool_ctl = NULL; } gv_rundown(); /* also takes care of detaching from the journal pool */ relinkctl_rundown(TRUE, TRUE); /* decrement relinkctl-attach & rtnobj-reference counts */ /* Log the exit of replication servers. In case they are exiting abnormally, their log file pointers * might not be set up. In that case, use "stderr" for logging. */ if (is_src_server) { fp = (NULL != gtmsource_log_fp) ? gtmsource_log_fp : stderr; repl_log(fp, TRUE, TRUE, "Source server exiting...\n\n"); } else if (is_rcvr_server) { fp = (NULL != gtmrecv_log_fp) ? gtmrecv_log_fp : stderr; repl_log(fp, TRUE, TRUE, "Receiver server exiting...\n\n"); } else if (is_updproc) { fp = (NULL != updproc_log_fp) ? updproc_log_fp : stderr; repl_log(fp, TRUE, TRUE, "Update process exiting...\n\n"); } else if (is_updhelper) { fp = (NULL != updhelper_log_fp) ? updhelper_log_fp : stderr; repl_log(fp, TRUE, TRUE, "Helper exiting...\n\n"); } else mu_reset_term_characterstics(); /* the replication servers use files for output/error, not terminal */ flush_pio(); util_out_close(); close_repl_logfiles(); print_exit_stats(); io_rundown(RUNDOWN_EXCEPT_STD); GTMCRYPT_CLOSE; if (need_core && !created_core) { ++core_in_progress; DUMP_CORE; /* This will not return */ } if (!files_closed) _exit(EXIT_FAILURE); }
static int helper_init(upd_helper_entry_ptr_t helper, recvpool_user helper_type) { int save_errno, save_shutdown; char helper_cmd[UPDHELPER_CMD_MAXLEN]; int status; int4 i4status; mstr helper_log_cmd, helper_trans_cmd; upd_helper_ctl_ptr_t upd_helper_ctl; #ifdef UNIX pid_t helper_pid, waitpid_res; #elif defined(VMS) uint4 helper_pid, cmd_channel; char mbx_suffix[2 + 1]; /* hex representation of numbers 0 through MAX_UPD_HELPERS-1, +1 for '\0' */ $DESCRIPTOR(cmd_desc_reader, UPDHELPER_READER_CMD_STR); $DESCRIPTOR(cmd_desc_writer, UPDHELPER_WRITER_CMD_STR); #endif upd_helper_ctl = recvpool.upd_helper_ctl; save_shutdown = helper->helper_shutdown; helper->helper_shutdown = NO_SHUTDOWN; #ifdef UNIX if (0 > (helper_pid = fork())) /* BYPASSOK: we exec immediately, no FORK_CLEAN needed */ { save_errno = errno; helper->helper_shutdown = save_shutdown; gtm_putmsg(VARLSTCNT(7) ERR_RECVPOOLSETUP, 0, ERR_TEXT, 2, LEN_AND_LIT("Could not fork update process"), save_errno); repl_errno = EREPL_UPDSTART_FORK; return UPDPROC_START_ERR; } if (0 == helper_pid) { /* helper */ getjobnum(); helper->helper_pid_prev = process_id; /* identify owner of slot */ helper_log_cmd.len = STR_LIT_LEN(UPDHELPER_CMD); helper_log_cmd.addr = UPDHELPER_CMD; if (SS_NORMAL != (i4status = TRANS_LOG_NAME(&helper_log_cmd, &helper_trans_cmd, helper_cmd, SIZEOF(helper_cmd), dont_sendmsg_on_log2long))) { helper->helper_shutdown = save_shutdown; gtm_putmsg(VARLSTCNT(6) ERR_RECVPOOLSETUP, 0, ERR_TEXT, 2, LEN_AND_LIT("Could not find path of Helper Process. Check value of $gtm_dist")); if (SS_LOG2LONG == i4status) gtm_putmsg(VARLSTCNT(5) ERR_LOGTOOLONG, 3, LEN_AND_LIT(UPDHELPER_CMD), SIZEOF(helper_cmd) - 1); repl_errno = EREPL_UPDSTART_BADPATH; return UPDPROC_START_ERR; } helper_cmd[helper_trans_cmd.len] = '\0'; if (-1 == EXECL(helper_cmd, helper_cmd, UPDHELPER_CMD_ARG1, UPDHELPER_CMD_ARG2, (UPD_HELPER_READER == helper_type) ? UPDHELPER_READER_CMD_ARG3 : UPDHELPER_WRITER_CMD_ARG3, NULL)) { save_errno = errno; helper->helper_shutdown = save_shutdown; gtm_putmsg(VARLSTCNT(7) ERR_RECVPOOLSETUP, 0, ERR_TEXT, 2, LEN_AND_LIT("Could not exec Helper Process"), save_errno); repl_errno = EREPL_UPDSTART_EXEC; return UPDPROC_START_ERR; } } #elif defined(VMS) /* Create detached server and write startup commands to it */ i2hex(helper - upd_helper_ctl->helper_list, LIT_AND_LEN(mbx_suffix)); mbx_suffix[SIZEOF(mbx_suffix) - 1] = '\0'; /* A mailbox is created per helper, and the mailbox name is assigned to a logical. This logical will persist until the * helper terminates. So, we need to assign a unique logical per helper. Hence the suffix. */ if (SS_NORMAL != (status = repl_create_server((UPD_HELPER_READER == helper_type) ? &cmd_desc_reader : &cmd_desc_writer, UPDHELPER_MBX_PREFIX, mbx_suffix, &cmd_channel, &helper->helper_pid_prev, ERR_RECVPOOLSETUP))) { gtm_putmsg(VARLSTCNT(7) ERR_RECVPOOLSETUP, 0, ERR_TEXT, 2, LEN_AND_LIT("Unable to spawn Helper process"), status); helper->helper_shutdown = save_shutdown; repl_errno = EREPL_UPDSTART_FORK; return UPDPROC_START_ERR; } helper_pid = helper->helper_pid_prev; #endif /* Wait for helper to startup */ while (helper_pid != helper->helper_pid && is_proc_alive(helper_pid, 0)) { SHORT_SLEEP(GTMRECV_WAIT_FOR_SRV_START); UNIX_ONLY(WAITPID(helper_pid, &status, WNOHANG, waitpid_res);) /* Release defunct helper process if dead */ } /* The helper has now gone far enough in the initialization, or died before initialization. Consider startup completed. */ #if defined(VMS) /* Deassign the send-cmd mailbox channel */ if (SS_NORMAL != (status = sys$dassgn(cmd_channel))) { gtm_putmsg(VARLSTCNT(7) ERR_RECVPOOLSETUP, 0, ERR_TEXT, 2, RTS_ERROR_LITERAL("Unable to close upd-send-cmd mbox channel"), status); helper->helper_shutdown = save_shutdown; repl_errno = EREPL_UPDSTART_BADPATH; /* Just to make an auto-shutdown */ return UPDPROC_START_ERR; } #endif repl_log(gtmrecv_log_fp, TRUE, TRUE, "Helper %s started. PID %d [0x%X]\n", (UPD_HELPER_READER == helper_type) ? "reader" : "writer", helper_pid, helper_pid); return UPDPROC_STARTED; }
int gtmrecv_end1(boolean_t auto_shutdown) { int4 strm_idx; int exit_status, idx, status, save_errno; int fclose_res, rc; seq_num log_seqno, log_seqno1, jnlpool_seqno, jnlpool_strm_seqno[MAX_SUPPL_STRMS]; uint4 savepid; exit_status = gtmrecv_end_helpers(TRUE); exit_status = gtmrecv_endupd(); log_seqno = recvpool.recvpool_ctl->jnl_seqno; log_seqno1 = recvpool.upd_proc_local->read_jnl_seqno; strm_idx = recvpool.gtmrecv_local->strm_index; /* Detach from receive pool */ recvpool.gtmrecv_local->shutdown = exit_status; recvpool.gtmrecv_local->recv_serv_pid = 0; if (0 > SHMDT(recvpool.recvpool_ctl)) repl_log(stderr, TRUE, TRUE, "Error detaching from Receive Pool : %s\n", REPL_STR_ERROR); recvpool.recvpool_ctl = NULL; assert((NULL != jnlpool_ctl) && (jnlpool_ctl == jnlpool.jnlpool_ctl)); if (NULL != jnlpool.jnlpool_ctl) { /* Reset fields that might have been initialized by the receiver server after connecting to the primary. * It is ok not to hold the journal pool lock while updating jnlpool_ctl fields since this will be the * only process updating those fields. */ jnlpool.jnlpool_ctl->primary_instname[0] = '\0'; jnlpool.jnlpool_ctl->gtmrecv_pid = 0; jnlpool_seqno = jnlpool.jnlpool_ctl->jnl_seqno; for (idx = 0; idx < MAX_SUPPL_STRMS; idx++) jnlpool_strm_seqno[idx] = jnlpool.jnlpool_ctl->strm_seqno[idx]; /* Also take this opportunity to detach from the journal pool except in the auto_shutdown case. This is because * the fields "jnlpool_ctl->repl_inst_filehdr->recvpool_semid" and "jnlpool_ctl->repl_inst_filehdr->recvpool_shmid" * need to be reset by "gtmrecv_jnlpool_reset" (called from "gtmrecv_shutdown") which is invoked a little later. */ if (!auto_shutdown) { JNLPOOL_SHMDT(status, save_errno); if (0 > status) repl_log(stderr, TRUE, TRUE, "Error detaching from Journal Pool : %s\n", STRERROR(save_errno)); jnlpool.jnlpool_ctl = jnlpool_ctl = NULL; jnlpool.repl_inst_filehdr = NULL; jnlpool.gtmsrc_lcl_array = NULL; jnlpool.gtmsource_local_array = NULL; jnlpool.jnldata_base = NULL; pool_init = FALSE; } } else jnlpool_seqno = 0; gtmrecv_free_msgbuff(); gtmrecv_free_filter_buff(); recvpool.recvpool_ctl = NULL; /* Close the connection with the Receiver */ if (FD_INVALID != gtmrecv_listen_sock_fd) CLOSEFILE_RESET(gtmrecv_listen_sock_fd, rc); /* resets "gtmrecv_listen_sock_fd" to FD_INVALID */ if (FD_INVALID != gtmrecv_sock_fd) CLOSEFILE_RESET(gtmrecv_sock_fd, rc); /* resets "gtmrecv_sock_fd" to FD_INVALID */ repl_log(gtmrecv_log_fp, TRUE, FALSE, "REPL INFO - Current Jnlpool Seqno : %llu\n", jnlpool_seqno); for (idx = 0; idx < MAX_SUPPL_STRMS; idx++) { if (jnlpool_strm_seqno[idx]) repl_log(gtmrecv_log_fp, TRUE, FALSE, "REPL INFO - Stream # %d : Current Jnlpool Stream Seqno : %llu\n", idx, jnlpool_strm_seqno[idx]); } if (0 < strm_idx) repl_log(gtmrecv_log_fp, TRUE, FALSE, "REPL INFO - Receiver server has Stream # %d\n", strm_idx); repl_log(gtmrecv_log_fp, TRUE, TRUE, "REPL INFO - Current Update process Read Seqno : %llu\n", log_seqno1); repl_log(gtmrecv_log_fp, TRUE, TRUE, "REPL INFO - Current Receive Pool Seqno : %llu\n", log_seqno); /* If log_seqno/log_seqno1 is 0, then do not decrement it as that will be interpreted as a huge positive seqno. Keep it 0 */ if (log_seqno) log_seqno--; if (log_seqno1) log_seqno1--; repl_log(gtmrecv_log_fp, TRUE, FALSE, "REPL INFO - Last Recvd Seqno : %llu Jnl Total : %llu Msg Total : %llu\n", log_seqno, repl_recv_data_processed, repl_recv_data_recvd); repl_log(gtmrecv_log_fp, TRUE, TRUE, "REPL INFO - Last Seqno processed by update process : %llu\n", log_seqno1); gtm_event_log_close(); if (gtmrecv_filter & EXTERNAL_FILTER) repl_stop_filter(); if (auto_shutdown) return (exit_status); else gtmrecv_exit(exit_status - NORMAL_SHUTDOWN); return -1; /* This will never get executed, added to make compiler happy */ }
int gtmrecv_poll_actions1(int *pending_data_len, int *buff_unprocessed, unsigned char *buffp) { static int report_cnt = 1; static int next_report_at = 1; static boolean_t send_xoff = FALSE; static boolean_t xoff_sent = FALSE; static seq_num send_seqno; static boolean_t log_draining_msg = FALSE; static boolean_t send_badtrans = FALSE; static boolean_t send_cmp2uncmp = FALSE; static boolean_t upd_shut_too_early_logged = FALSE; static time_t last_reap_time = 0; repl_msg_t xoff_msg; repl_badtrans_msg_t bad_trans_msg; boolean_t alert = FALSE, info = FALSE; int return_status; gd_region *region_top; unsigned char *msg_ptr; /* needed for REPL_{SEND,RECV}_LOOP */ int tosend_len, sent_len, sent_this_iter; /* needed for REPL_SEND_LOOP */ int torecv_len, recvd_len, recvd_this_iter; /* needed for REPL_RECV_LOOP */ int status, poll_dir; /* needed for REPL_{SEND,RECV}_LOOP */ int temp_len, pending_msg_size; int upd_start_status, upd_start_attempts; int buffered_data_len; int upd_exit_status; seq_num temp_send_seqno; boolean_t bad_trans_detected = FALSE, onln_rlbk_flg_set = FALSE; recvpool_ctl_ptr_t recvpool_ctl; upd_proc_local_ptr_t upd_proc_local; gtmrecv_local_ptr_t gtmrecv_local; upd_helper_ctl_ptr_t upd_helper_ctl; pid_t waitpid_res; int4 msg_type, msg_len; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; recvpool_ctl = recvpool.recvpool_ctl; upd_proc_local = recvpool.upd_proc_local; gtmrecv_local = recvpool.gtmrecv_local; upd_helper_ctl = recvpool.upd_helper_ctl; if (SHUTDOWN == gtmrecv_local->shutdown) { repl_log(gtmrecv_log_fp, TRUE, TRUE, "Shutdown signalled\n"); gtmrecv_end(); /* Won't return */ } # ifdef GTM_TLS /* If we sent a REPL_RENEG_ACK, then we cannot afford to send anymore asynchronous messages (like XOFF_ACK_ME) until we * receive a REPL_RENEG_COMPLETE from the source server. This ensures that while the source server attempts to do a SSL/TLS * renegotiation, it doesn't have any application data (like XOFF_ACK_ME) sitting in the pipe. */ if (REPLTLS_WAITING_FOR_RENEG_COMPLETE == repl_tls.renegotiate_state) return STOP_POLL; # endif /* Reset report_cnt and next_report_at to 1 when a new upd proc is forked */ if ((1 == report_cnt) || (report_cnt == next_report_at)) { /* A comment on the usage of NO_SHUTDOWN below for the alert variable. Since upd_proc_local->upd_proc_shutdown is * a shared memory field (and could be concurrently changed by either the receiver server or the update process), * we want to make sure it is the same value BEFORE and AFTER checking whether the update process is alive or not. * If it is not NO_SHUTDOWN (i.e. is SHUTDOWN or NORMAL_SHUTDOWN or ABNORMAL_SHUTDOWN) it has shut down due to * an external request so we do want to send out a false update-process-is-not-alive alert. */ if ((alert = ((NO_SHUTDOWN == upd_proc_local->upd_proc_shutdown) && (SRV_DEAD == is_updproc_alive()) && (NO_SHUTDOWN == upd_proc_local->upd_proc_shutdown))) || (info = (((NORMAL_SHUTDOWN == upd_proc_local->upd_proc_shutdown) || (ABNORMAL_SHUTDOWN == upd_proc_local->upd_proc_shutdown)) && (SRV_DEAD == is_updproc_alive())))) { if (alert) repl_log(gtmrecv_log_fp, TRUE, TRUE, "ALERT : Receiver Server detected that Update Process is not ALIVE\n"); else repl_log(gtmrecv_log_fp, TRUE, TRUE, "INFO : Update process not running due to user initiated shutdown\n"); if (1 == report_cnt) { send_xoff = TRUE; recvpool_ctl->old_jnl_seqno = recvpool_ctl->jnl_seqno; recvpool_ctl->jnl_seqno = 0; /* Even though we have identified that the update process is NOT alive, a waitpid on the update * process PID is necessary so that the system doesn't leave any zombie process lying around. * This is possible since any child process that dies without the parent doing a waitpid on it * will be defunct unless the parent dies at which point the "init" process takes the role of * the parent and invokes waitpid to remove the zombies. * NOTE: It is possible that the update process was killed before the receiver server got a * chance to record it's PID in the recvpool.upd_proc_local structure. In such a case, don't * invoke waitpid as that will block us (receiver server) if this instance of the receiver * server was started with helper processes. */ if (0 < upd_proc_local->upd_proc_pid) { WAITPID(upd_proc_local->upd_proc_pid, &upd_exit_status, 0, waitpid_res); /* Since the update process as part of its shutdown does NOT reset the upd_proc_pid, reset * it here ONLY if the update process was NOT kill -9ed. This is needed because receiver * server as part of its shutdown relies on this field (upd_proc_pid) to determine if the * update process was cleanly shutdown or was kill -9ed. */ if (!alert) upd_proc_local->upd_proc_pid = 0; } upd_proc_local->bad_trans = FALSE; /* No point in doing bad transaction processing */ upd_proc_local->onln_rlbk_flg = FALSE; /* No point handling online rollback */ } gtmrecv_wait_for_jnl_seqno = TRUE; REPL_DPRINT1( "gtmrecv_poll_actions : Setting gtmrecv_wait_for_jnl_seqno to TRUE because of upd crash/shutdown\n"); next_report_at *= GTMRECV_NEXT_REPORT_FACTOR; report_cnt++; } } else report_cnt++; /* Check if REPL_CMP2UNCMP or REPL_BADTRANS message needs to be sent */ if (upd_proc_local->onln_rlbk_flg) { /* Update process detected an online rollback and is requesting us to restart the connection. But before that, send * REPL_XOFF source side and drain the replication pipe */ onln_rlbk_flg_set = TRUE; send_xoff = TRUE; } else if (!send_cmp2uncmp && gtmrecv_send_cmp2uncmp) { send_xoff = TRUE; send_seqno = recvpool_ctl->jnl_seqno; send_cmp2uncmp = TRUE; } else if (!send_badtrans && upd_proc_local->bad_trans) { send_xoff = TRUE; send_seqno = upd_proc_local->read_jnl_seqno; send_badtrans = TRUE; bad_trans_detected = TRUE; } else if (!upd_proc_local->bad_trans && send_badtrans && 1 != report_cnt) { send_badtrans = FALSE; bad_trans_detected = FALSE; } if (send_xoff && !xoff_sent) { /* Send XOFF_ACK_ME if the receiver has a connection to the source. Do not attempt to send it if we dont even * know the endianness of the remote side. In that case, we are guaranteed no initial handshake occurred and * so no point sending the XOFF too. This saves us lots of trouble in case of cross-endian replication connections. */ assert((FD_INVALID != gtmrecv_sock_fd) || repl_connection_reset); if ((FD_INVALID != gtmrecv_sock_fd) && remote_side->endianness_known) { send_seqno = upd_proc_local->read_jnl_seqno; if (!remote_side->cross_endian) { xoff_msg.type = REPL_XOFF_ACK_ME; xoff_msg.len = MIN_REPL_MSGLEN; memcpy((uchar_ptr_t)&xoff_msg.msg[0], (uchar_ptr_t)&send_seqno, SIZEOF(seq_num)); } else { xoff_msg.type = GTM_BYTESWAP_32(REPL_XOFF_ACK_ME); xoff_msg.len = GTM_BYTESWAP_32(MIN_REPL_MSGLEN); temp_send_seqno = GTM_BYTESWAP_64(send_seqno); memcpy((uchar_ptr_t)&xoff_msg.msg[0], (uchar_ptr_t)&temp_send_seqno, SIZEOF(seq_num)); } REPL_SEND_LOOP(gtmrecv_sock_fd, &xoff_msg, MIN_REPL_MSGLEN, REPL_POLL_NOWAIT) ; /* Empty Body */ if (SS_NORMAL != status) { if (REPL_CONN_RESET(status) && EREPL_SEND == repl_errno) { repl_log(gtmrecv_log_fp, TRUE, TRUE, "Connection reset while sending XOFF_ACK_ME. " "Status = %d ; %s\n", status, STRERROR(status)); repl_close(>mrecv_sock_fd); repl_connection_reset = TRUE; xoff_sent = FALSE; send_badtrans = FALSE; } else if (EREPL_SEND == repl_errno) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_REPLCOMM, 0, ERR_TEXT, 2, LEN_AND_LIT("Error sending XOFF msg due to BAD_TRANS or UPD crash/shutdown. " "Error in send"), status); else { assert(EREPL_SELECT == repl_errno); rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_REPLCOMM, 0, ERR_TEXT, 2, LEN_AND_LIT("Error sending XOFF msg due to BAD_TRANS or UPD crash/shutdown. " "Error in select"), status); } } else { xoff_sent = TRUE; log_draining_msg = TRUE; } repl_log(gtmrecv_log_fp, TRUE, TRUE, "REPL_XOFF_ACK_ME sent due to upd shutdown/crash or bad trans " "or ONLINE_ROLLBACK\n"); send_xoff = FALSE; } else { /* Connection has been lost OR initial handshake needs to happen again, so no point sending XOFF/BADTRANS */ send_xoff = FALSE; send_badtrans = FALSE; } } /* Drain pipe */ if (xoff_sent) { if (log_draining_msg) { /* avoid multiple logs per instance */ repl_log(gtmrecv_log_fp, TRUE, TRUE, "REPL INFO - Draining replication pipe due to %s\n", send_cmp2uncmp ? "CMP2UNCMP" : (send_badtrans ? "BAD_TRANS" : (onln_rlbk_flg_set ? "ONLINE_ROLLBACK" : "UPD shutdown/crash"))); log_draining_msg = FALSE; } if (0 != *buff_unprocessed) { /* Throw away the current contents of the buffer */ buffered_data_len = ((*pending_data_len <= *buff_unprocessed) ? *pending_data_len : *buff_unprocessed); *buff_unprocessed -= buffered_data_len; buffp += buffered_data_len; *pending_data_len -= buffered_data_len; REPL_DPRINT2("gtmrecv_poll_actions : (1) Throwing away %d bytes from old buffer while draining\n", buffered_data_len); assert(remote_side->endianness_known); /* only then is remote_side->cross_endian reliable */ while (REPL_MSG_HDRLEN <= *buff_unprocessed) { assert(0 == (((unsigned long)buffp) % REPL_MSG_ALIGN)); msg_len = ((repl_msg_ptr_t)buffp)->len; msg_type = ((repl_msg_ptr_t)buffp)->type; if (remote_side->cross_endian) { msg_len = GTM_BYTESWAP_32(msg_len); msg_type = GTM_BYTESWAP_32(msg_type); } msg_type = (msg_type & REPL_TR_CMP_MSG_TYPE_MASK); assert((REPL_TR_CMP_JNL_RECS == msg_type) || (0 == (msg_len % REPL_MSG_ALIGN))); *pending_data_len = ROUND_UP2(msg_len, REPL_MSG_ALIGN); buffered_data_len = ((*pending_data_len <= *buff_unprocessed) ? *pending_data_len : *buff_unprocessed); *buff_unprocessed -= buffered_data_len; buffp += buffered_data_len; *pending_data_len -= buffered_data_len; REPL_DPRINT3("gtmrecv_poll_actions : (1) Throwing away message of " "type %d and length %d from old buffer while draining\n", msg_type, buffered_data_len); } if (0 < *buff_unprocessed) { memmove((unsigned char *)gtmrecv_msgp, buffp, *buff_unprocessed); REPL_DPRINT2("gtmrecv_poll_actions : Incomplete header of length %d while draining\n", *buff_unprocessed); } } status = SS_NORMAL; if (0 != *buff_unprocessed || 0 == *pending_data_len) { /* Receive the header of a message */ assert(REPL_MSG_HDRLEN > *buff_unprocessed); /* so we dont pass negative length in REPL_RECV_LOOP */ REPL_RECV_LOOP(gtmrecv_sock_fd, ((unsigned char *)gtmrecv_msgp) + *buff_unprocessed, (REPL_MSG_HDRLEN - *buff_unprocessed), REPL_POLL_WAIT) ; /* Empty Body */ if (SS_NORMAL == status) { assert(remote_side->endianness_known); /* only then is remote_side->cross_endian reliable */ if (!remote_side->cross_endian) { msg_len = gtmrecv_msgp->len; msg_type = gtmrecv_msgp->type; } else { msg_len = GTM_BYTESWAP_32(gtmrecv_msgp->len); msg_type = GTM_BYTESWAP_32(gtmrecv_msgp->type); } msg_type = (msg_type & REPL_TR_CMP_MSG_TYPE_MASK); assert((REPL_TR_CMP_JNL_RECS == msg_type) || (0 == (msg_len % REPL_MSG_ALIGN))); msg_len = ROUND_UP2(msg_len, REPL_MSG_ALIGN); REPL_DPRINT3("gtmrecv_poll_actions : Received message of type %d and length %d while draining\n", msg_type, msg_len); } } if ((SS_NORMAL == status) && (0 != *buff_unprocessed || 0 == *pending_data_len) && (REPL_XOFF_ACK == msg_type)) { /* Receive the rest of the XOFF_ACK msg and signal the drain as complete */ REPL_RECV_LOOP(gtmrecv_sock_fd, gtmrecv_msgp, (MIN_REPL_MSGLEN - REPL_MSG_HDRLEN), REPL_POLL_WAIT) ; /* Empty Body */ if (SS_NORMAL == status) { repl_log(gtmrecv_log_fp, TRUE, TRUE, "REPL INFO - XOFF_ACK received. Drained replication pipe completely\n"); upd_shut_too_early_logged = FALSE; xoff_sent = FALSE; return_status = STOP_POLL; } } else if (SS_NORMAL == status) { /* Drain the rest of the message */ if (0 < *pending_data_len) { pending_msg_size = *pending_data_len; REPL_DPRINT2("gtmrecv_poll_actions : (2) Throwing away %d bytes from pipe\n", pending_msg_size); } else { pending_msg_size = msg_len - REPL_MSG_HDRLEN; REPL_DPRINT3("gtmrecv_poll_actions : (2) Throwing away message of " "type %d and length %d from pipe\n", msg_type, msg_len); } for ( ; SS_NORMAL == status && 0 < pending_msg_size; pending_msg_size -= gtmrecv_max_repl_msglen) { temp_len = (pending_msg_size < gtmrecv_max_repl_msglen)? pending_msg_size : gtmrecv_max_repl_msglen; REPL_RECV_LOOP(gtmrecv_sock_fd, gtmrecv_msgp, temp_len, REPL_POLL_WAIT) ; /* Empty Body */ } *buff_unprocessed = 0; *pending_data_len = 0; if (SS_NORMAL == status && info && !upd_shut_too_early_logged) { repl_log(gtmrecv_log_fp, TRUE, TRUE, "ALERT : User initiated shutdown of Update Process done " "when there was data in the replication pipe\n"); upd_shut_too_early_logged = TRUE; } return_status = CONTINUE_POLL; } if (SS_NORMAL != status) { if (EREPL_RECV == repl_errno) { if (REPL_CONN_RESET(status)) { repl_log(gtmrecv_log_fp, TRUE, TRUE, "Connection reset while receiving XOFF_ACK. " "Status = %d ; %s\n", status, STRERROR(status)); repl_close(>mrecv_sock_fd); repl_connection_reset = TRUE; xoff_sent = FALSE; send_badtrans = FALSE; return_status = STOP_POLL; } else rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_REPLCOMM, 0, ERR_TEXT, 2, LEN_AND_LIT("Error while draining replication pipe. Error in recv"), status); } else { assert(EREPL_SELECT == repl_errno); rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_REPLCOMM, 0, ERR_TEXT, 2, LEN_AND_LIT("Error while draining replication pipe. Error in select"), status); } } } else return_status = STOP_POLL; /* Like was done before for the XOFF_ACK_ME message, send a BADTRANS/CMP2UNCMP message only if we know * the endianness of the other side. If not, no point in sending one anyways and saves us trouble in * case of cross-endian replication connections. */ if ((STOP_POLL == return_status) && (send_badtrans || send_cmp2uncmp) && (FD_INVALID != gtmrecv_sock_fd) && remote_side->endianness_known) { /* Send REPL_BADTRANS or REPL_CMP2UNCMP message */ if (!remote_side->cross_endian) { bad_trans_msg.type = send_cmp2uncmp ? REPL_CMP2UNCMP : REPL_BADTRANS; bad_trans_msg.len = MIN_REPL_MSGLEN; bad_trans_msg.start_seqno = send_seqno; } else { bad_trans_msg.type = send_cmp2uncmp ? GTM_BYTESWAP_32(REPL_CMP2UNCMP) : GTM_BYTESWAP_32(REPL_BADTRANS); bad_trans_msg.len = GTM_BYTESWAP_32(MIN_REPL_MSGLEN); bad_trans_msg.start_seqno = GTM_BYTESWAP_64(send_seqno); } REPL_SEND_LOOP(gtmrecv_sock_fd, &bad_trans_msg, bad_trans_msg.len, REPL_POLL_NOWAIT) ; /* Empty Body */ if (SS_NORMAL == status) { if (send_cmp2uncmp) repl_log(gtmrecv_log_fp, TRUE, TRUE, "REPL_CMP2UNCMP message sent with seqno %llu\n", send_seqno); else repl_log(gtmrecv_log_fp, TRUE, TRUE, "REPL_BADTRANS message sent with seqno %llu\n", send_seqno); } else { if (REPL_CONN_RESET(status) && EREPL_SEND == repl_errno) { if (send_cmp2uncmp) { repl_log(gtmrecv_log_fp, TRUE, TRUE, "Connection reset while sending REPL_CMP2UNCMP. " "Status = %d ; %s\n", status, STRERROR(status)); } else { repl_log(gtmrecv_log_fp, TRUE, TRUE, "Connection reset while sending REPL_BADTRANS. " "Status = %d ; %s\n", status, STRERROR(status)); } repl_close(>mrecv_sock_fd); repl_connection_reset = TRUE; return_status = STOP_POLL; } else if (EREPL_SEND == repl_errno) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_REPLCOMM, 0, ERR_TEXT, 2, LEN_AND_LIT("Error sending REPL_BADTRANS/REPL_CMP2UNCMP. Error in send"), status); else { assert(EREPL_SELECT == repl_errno); rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_REPLCOMM, 0, ERR_TEXT, 2, LEN_AND_LIT("Error sending REPL_BADTRANS/REPL_CMP2UNCMP. Error in select"), status); } } send_badtrans = FALSE; if (send_cmp2uncmp) { REPL_DPRINT1("gtmrecv_poll_actions : Setting gtmrecv_wait_for_jnl_seqno to TRUE because this receiver" "server requested a fall-back from compressed to uncompressed operation\n"); gtmrecv_wait_for_jnl_seqno = TRUE;/* set this to TRUE to break out and go back to a fresh "do_main_loop" */ gtmrecv_bad_trans_sent = TRUE; gtmrecv_send_cmp2uncmp = FALSE; send_cmp2uncmp = FALSE; } } if ((upd_proc_local->bad_trans && bad_trans_detected) || onln_rlbk_flg_set || (UPDPROC_START == upd_proc_local->start_upd) && (1 != report_cnt)) { if (UPDPROC_START == upd_proc_local->start_upd) { assert(is_updproc_alive() != SRV_ALIVE); upd_proc_local->upd_proc_shutdown = NO_SHUTDOWN; } recvpool_ctl->wrapped = FALSE; recvpool_ctl->write_wrap = recvpool_ctl->recvpool_size; recvpool_ctl->write = 0; /* Reset last_rcvd_histinfo, last_valid_histinfo etc. as they reflect context from unprocessed data * in the receive pool and those are no longer valid because we have drained the receive pool. */ GTMRECV_CLEAR_CACHED_HISTINFO(recvpool.recvpool_ctl, jnlpool, jnlpool_ctl, INSERT_STRM_HISTINFO_FALSE); if (UPDPROC_START == upd_proc_local->start_upd) { /* Attempt starting the update process */ for (upd_start_attempts = 0; UPDPROC_START_ERR == (upd_start_status = gtmrecv_upd_proc_init(FALSE)) && GTMRECV_MAX_UPDSTART_ATTEMPTS > upd_start_attempts; upd_start_attempts++) { if (EREPL_UPDSTART_SEMCTL == repl_errno || EREPL_UPDSTART_BADPATH == repl_errno) { gtmrecv_autoshutdown(); } else if (EREPL_UPDSTART_FORK == repl_errno) { /* Couldn't start up update now, can try later */ LONG_SLEEP(GTMRECV_WAIT_FOR_PROC_SLOTS); continue; } else if (EREPL_UPDSTART_EXEC == repl_errno) { /* In forked child, could not exec, should exit */ gtmrecv_exit(ABNORMAL_SHUTDOWN); } } if (UPDPROC_STARTED == (upd_proc_local->start_upd = upd_start_status)) { REPL_DPRINT1("gtmrecv_poll_actions : Setting gtmrecv_wait_for_jnl_seqno to TRUE because of " "upd restart\n"); gtmrecv_wait_for_jnl_seqno = TRUE; report_cnt = next_report_at = 1; if (send_xoff && (FD_INVALID == gtmrecv_sock_fd)) { /* Update start command was issued before connection was established, * no point in sending XOFF. */ send_xoff = FALSE; } } else { repl_log(gtmrecv_log_fp, TRUE, TRUE, "%d failed attempts to fork update process. Try later\n", upd_start_attempts); } } else { gtmrecv_wait_for_jnl_seqno = TRUE;/* set this to TRUE to break out and go back to a fresh "do_main_loop" */ if (onln_rlbk_flg_set) { assert(NULL != jnlpool_ctl); repl_log(gtmrecv_log_fp, TRUE, TRUE, "Closing connection due to ONLINE ROLLBACK\n"); repl_log(gtmrecv_log_fp, TRUE, TRUE, "REPL INFO - Current Jnlpool Seqno : %llu\n", jnlpool_ctl->jnl_seqno); repl_log(gtmrecv_log_fp, TRUE, TRUE, "REPL INFO - Current Receive Pool Seqno : %llu\n", recvpool_ctl->jnl_seqno); repl_close(>mrecv_sock_fd); repl_connection_reset = TRUE; xoff_sent = FALSE; send_badtrans = FALSE; upd_proc_local->onln_rlbk_flg = FALSE; /* Before restarting afresh, sync the online rollback cycles. This way any future grab_lock that * we do after restarting should not realize an unhandled online rollback. For receiver, it is * just syncing the journal pool cycles as the databases are not opened. But, to be safe, grab * the lock and sync the cycles. */ grab_lock(jnlpool.jnlpool_dummy_reg, TRUE, GRAB_LOCK_ONLY); SYNC_ONLN_RLBK_CYCLES; rel_lock(jnlpool.jnlpool_dummy_reg); return_status = STOP_POLL; recvpool_ctl->jnl_seqno = 0; } else { REPL_DPRINT1("gtmrecv_poll_actions : Setting gtmrecv_wait_for_jnl_seqno to TRUE because bad trans" "sent\n"); gtmrecv_bad_trans_sent = TRUE; upd_proc_local->bad_trans = FALSE; recvpool_ctl->jnl_seqno = upd_proc_local->read_jnl_seqno; } } } if ((0 == *pending_data_len) && (0 != gtmrecv_local->changelog)) { if (gtmrecv_local->changelog & REPLIC_CHANGE_LOGINTERVAL) { repl_log(gtmrecv_log_fp, TRUE, TRUE, "Changing log interval from %u to %u\n", log_interval, gtmrecv_local->log_interval); log_interval = gtmrecv_local->log_interval; gtmrecv_reinit_logseqno(); /* will force a LOG on the first recv following the interval change */ } if (gtmrecv_local->changelog & REPLIC_CHANGE_LOGFILE) { repl_log(gtmrecv_log_fp, TRUE, TRUE, "Changing log file to %s\n", gtmrecv_local->log_file); repl_log_init(REPL_GENERAL_LOG, >mrecv_log_fd, gtmrecv_local->log_file); repl_log_fd2fp(>mrecv_log_fp, gtmrecv_log_fd); repl_log(gtmrecv_log_fp, TRUE, TRUE, "Change log to %s successful\n",gtmrecv_local->log_file); } /* NOTE: update process and receiver each ignore any setting specific to the other (REPLIC_CHANGE_UPD_LOGINTERVAL, * REPLIC_CHANGE_LOGINTERVAL) */ if (REPLIC_CHANGE_LOGINTERVAL == gtmrecv_local->changelog) upd_proc_local->changelog = 0; else upd_proc_local->changelog = gtmrecv_local->changelog; /* Pass changelog request to the update process */ gtmrecv_local->changelog = 0; } if (0 == *pending_data_len && !gtmrecv_logstats && gtmrecv_local->statslog) { gtmrecv_logstats = TRUE; repl_log(gtmrecv_log_fp, TRUE, TRUE, "Begin statistics logging\n"); } else if (0 == *pending_data_len && gtmrecv_logstats && !gtmrecv_local->statslog) { gtmrecv_logstats = FALSE; /* Force all data out to the file before closing the file */ repl_log(gtmrecv_log_fp, TRUE, TRUE, "End statistics logging\n"); } if (0 == *pending_data_len) { if (upd_helper_ctl->start_helpers) { gtmrecv_helpers_init(upd_helper_ctl->start_n_readers, upd_helper_ctl->start_n_writers); upd_helper_ctl->start_helpers = FALSE; } if (HELPER_REAP_NONE != (status = upd_helper_ctl->reap_helpers) || (double)GTMRECV_REAP_HELPERS_INTERVAL <= difftime(gtmrecv_now, last_reap_time)) { gtmrecv_reap_helpers(HELPER_REAP_WAIT == status); last_reap_time = gtmrecv_now; } } return (return_status); }
int gtmsource_checkhealth(void) { uint4 gtmsource_pid; int status, semval, save_errno; boolean_t srv_alive, all_files_open; gtmsource_local_ptr_t gtmsourcelocal_ptr; int4 index, num_servers; seq_num reg_seqno, jnlseqno; gd_region *reg, *region_top; sgmnt_addrs *csa; sgmnt_data_ptr_t csd; char errtxt[OUT_BUFF_SIZE]; char *modestr; assert(holds_sem[SOURCE][JNL_POOL_ACCESS_SEM]); if (NULL != jnlpool.gtmsource_local) /* Check health of a specific source server */ gtmsourcelocal_ptr = jnlpool.gtmsource_local; else gtmsourcelocal_ptr = &jnlpool.gtmsource_local_array[0]; num_servers = 0; status = SRV_ALIVE; for (index = 0; index < NUM_GTMSRC_LCL; index++, gtmsourcelocal_ptr++) { if ('\0' == gtmsourcelocal_ptr->secondary_instname[0]) { assert(NULL == jnlpool.gtmsource_local); continue; } gtmsource_pid = gtmsourcelocal_ptr->gtmsource_pid; /* If CHECKHEALTH on a specific secondary instance is requested, print the health information irrespective * of whether a source server for that instance is alive or not. For CHECKHEALTH on ALL secondary instances * print health information only for those instances that have an active or passive source server alive. */ if ((NULL == jnlpool.gtmsource_local) && (0 == gtmsource_pid)) continue; repl_log(stdout, TRUE, TRUE, "Initiating CHECKHEALTH operation on source server pid [%d] for secondary instance" " name [%s]\n", gtmsource_pid, gtmsourcelocal_ptr->secondary_instname); srv_alive = (0 == gtmsource_pid) ? FALSE : is_proc_alive(gtmsource_pid, 0); if (srv_alive) { if (GTMSOURCE_MODE_ACTIVE == gtmsourcelocal_ptr->mode) modestr = "ACTIVE"; else if (GTMSOURCE_MODE_ACTIVE_REQUESTED == gtmsourcelocal_ptr->mode) modestr = "ACTIVE REQUESTED"; else if (GTMSOURCE_MODE_PASSIVE == gtmsourcelocal_ptr->mode) modestr = "PASSIVE"; else if (GTMSOURCE_MODE_PASSIVE_REQUESTED == gtmsourcelocal_ptr->mode) modestr = "PASSIVE REQUESTED"; else { assert(gtmsourcelocal_ptr->mode != gtmsourcelocal_ptr->mode); modestr = "UNKNOWN"; } repl_log(stderr, FALSE, TRUE, FORMAT_STR1, gtmsource_pid, "Source server", "", modestr); status |= SRV_ALIVE; num_servers++; } else { repl_log(stderr, FALSE, TRUE, FORMAT_STR, gtmsource_pid, "Source server", " NOT"); gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_SRCSRVNOTEXIST, 2, LEN_AND_STR(gtmsourcelocal_ptr->secondary_instname)); status |= SRV_DEAD; } if (NULL != jnlpool.gtmsource_local) break; } if (NULL == jnlpool.gtmsource_local) { /* Compare number of servers that were found alive with the current value of the COUNT semaphore. * If they are not equal, report the discrepancy. */ semval = get_sem_info(SOURCE, SRC_SERV_COUNT_SEM, SEM_INFO_VAL); if (-1 == semval) { save_errno = errno; repl_log(stderr, FALSE, TRUE, "Error fetching source server count semaphore value : %s\n", STRERROR(save_errno)); status |= SRV_ERR; } else if (semval != num_servers) { repl_log(stderr, FALSE, FALSE, "Error : Expected %d source server(s) to be alive but found %d actually alive\n", semval, num_servers); repl_log(stderr, FALSE, TRUE, "Error : Check if any pid reported above is NOT a source server process\n"); status |= SRV_ERR; } } /* Check that there are no regions with replication state = WAS_ON (i.e. repl_was_open). If so report that. * But to determine that, we need to attach to all the database regions. */ gvinit(); /* We use the same code dse uses to open all regions but we must make sure they are all open before proceeding. */ all_files_open = region_init(FALSE); if (!all_files_open) { gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_NOTALLDBOPN); status |= SRV_ERR; } else { for (reg = gd_header->regions, region_top = gd_header->regions + gd_header->n_regions; reg < region_top; reg++) { csa = &FILE_INFO(reg)->s_addrs; csd = csa->hdr; if (REPL_WAS_ENABLED(csd)) { assert(!JNL_ENABLED(csd) || REPL_ENABLED(csd)); /* || is for turning replication on concurrently */ reg_seqno = csd->reg_seqno; jnlseqno = (NULL != jnlpool.jnlpool_ctl) ? jnlpool.jnlpool_ctl->jnl_seqno : MAX_SEQNO; sgtm_putmsg(errtxt, VARLSTCNT(8) ERR_REPLJNLCLOSED, 6, DB_LEN_STR(reg), ®_seqno, ®_seqno, &jnlseqno, &jnlseqno); repl_log(stderr, FALSE, TRUE, errtxt); status |= SRV_ERR; } } } if (jnlpool.jnlpool_ctl->freeze) { repl_log(stderr, FALSE, FALSE, "Warning: Instance Freeze is ON\n"); repl_log(stderr, FALSE, TRUE, " Freeze Comment: %s\n", jnlpool.jnlpool_ctl->freeze_comment); status |= SRV_ERR; } return (status + NORMAL_SHUTDOWN); }
int gtmrecv_shutdown(boolean_t auto_shutdown, int exit_status) { uint4 savepid; boolean_t shut_upd_too = FALSE; int status; unix_db_info *udi; error_def(ERR_RECVPOOLSETUP); error_def(ERR_TEXT); repl_log(stdout, TRUE, TRUE, "Initiating shut down\n"); call_on_signal = NULL; /* So we don't reenter on error */ /* assert that auto shutdown should be invoked only if the current process is a receiver server */ assert(!auto_shutdown || gtmrecv_srv_count); if (auto_shutdown) { /* grab the ftok semaphore and recvpool access control lock IN THAT ORDER (to avoid deadlocks) */ repl_inst_ftok_sem_lock(); status = grab_sem(RECV, RECV_POOL_ACCESS_SEM); if (0 > status) { repl_log(stderr, TRUE, TRUE, "Error grabbing receive pool control semaphore : %s. Shutdown not complete\n", REPL_SEM_ERROR); return (ABNORMAL_SHUTDOWN); } } else { /* ftok semaphore and recvpool access semaphore should already be held from the previous call to "recvpool_init" */ DEBUG_ONLY(udi = (unix_db_info *)FILE_INFO(recvpool.recvpool_dummy_reg);) assert(udi->grabbed_ftok_sem); assert(holds_sem[RECV][RECV_POOL_ACCESS_SEM]); /* We do not want to hold the options semaphore to avoid deadlocks with receiver server startup (C9F12-002766) */ assert(!holds_sem[RECV][RECV_SERV_OPTIONS_SEM]); recvpool.gtmrecv_local->shutdown = SHUTDOWN; /* Wait for receiver server to die. But release ftok semaphore and recvpool access control semaphore before * waiting as the concurrently running receiver server might need these (e.g. if it is about to call the * function "repl_inst_was_rootprimary"). */ if (0 != rel_sem(RECV, RECV_POOL_ACCESS_SEM)) gtm_putmsg(VARLSTCNT(7) ERR_TEXT, 2, RTS_ERROR_LITERAL("Error in receiver server shutdown rel_sem"), REPL_SEM_ERRNO); repl_inst_ftok_sem_release(); /* Wait for receiver server to shut down */ while((SHUTDOWN == recvpool.gtmrecv_local->shutdown) && (0 < (savepid = recvpool.gtmrecv_local->recv_serv_pid)) && is_proc_alive(savepid, 0)) SHORT_SLEEP(GTMRECV_WAIT_FOR_SHUTDOWN); /* (Re)Grab the ftok semaphore and recvpool access control semaphore IN THAT ORDER (to avoid deadlocks) */ repl_inst_ftok_sem_lock(); status = grab_sem(RECV, RECV_POOL_ACCESS_SEM); if (0 > status) { repl_log(stderr, TRUE, TRUE, "Error regrabbing receive pool control semaphore : %s. Shutdown not complete\n", REPL_SEM_ERROR); return (ABNORMAL_SHUTDOWN); } exit_status = recvpool.gtmrecv_local->shutdown; if (SHUTDOWN == exit_status) { if (0 == savepid) /* No Receiver Process */ exit_status = NORMAL_SHUTDOWN; else /* Receiver Server Crashed */ { repl_log(stderr, FALSE, TRUE, "Receiver Server exited abnormally\n"); exit_status = ABNORMAL_SHUTDOWN; shut_upd_too = TRUE; } } }
static int helper_init(upd_helper_entry_ptr_t helper, recvpool_user helper_type) { int save_errno, save_shutdown; char helper_cmd[UPDHELPER_CMD_MAXLEN]; int helper_cmd_len; int status; int4 i4status; pid_t helper_pid, waitpid_res; save_shutdown = helper->helper_shutdown; helper->helper_shutdown = NO_SHUTDOWN; if (!gtm_dist_ok_to_use) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_GTMDISTUNVERIF, 4, STRLEN(gtm_dist), gtm_dist, gtmImageNames[image_type].imageNameLen, gtmImageNames[image_type].imageName); if (WBTEST_ENABLED(WBTEST_MAXGTMDIST_HELPER_PROCESS)) { memset(gtm_dist, 'a', GTM_PATH_MAX-2); gtm_dist[GTM_PATH_MAX-1] = '\0'; } helper_cmd_len = SNPRINTF(helper_cmd, UPDHELPER_CMD_MAXLEN, UPDHELPER_CMD, gtm_dist); if ((-1 == helper_cmd_len) || (UPDHELPER_CMD_MAXLEN <= helper_cmd_len)) { helper->helper_shutdown = save_shutdown; gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_HLPPROC, 0, ERR_TEXT, 2, LEN_AND_LIT("Could not find path of Helper Process. Check value of $gtm_dist")); repl_errno = EREPL_UPDSTART_BADPATH; return UPDPROC_START_ERR ; } FORK(helper_pid); if (0 > helper_pid) { save_errno = errno; helper->helper_shutdown = save_shutdown; gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_HLPPROC, 0, ERR_TEXT, 2, LEN_AND_LIT("Could not fork Helper process"), save_errno); repl_errno = EREPL_UPDSTART_FORK; return UPDPROC_START_ERR; } if (0 == helper_pid) { /* helper */ getjobnum(); helper->helper_pid_prev = process_id; /* identify owner of slot */ if (WBTEST_ENABLED(WBTEST_BADEXEC_HELPER_PROCESS)) STRCPY(helper_cmd, "ersatz"); if (-1 == EXECL(helper_cmd, helper_cmd, UPDHELPER_CMD_ARG1, UPDHELPER_CMD_ARG2, (UPD_HELPER_READER == helper_type) ? UPDHELPER_READER_CMD_ARG3 : UPDHELPER_WRITER_CMD_ARG3, NULL)) { save_errno = errno; helper->helper_shutdown = save_shutdown; gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_HLPPROC, 0, ERR_TEXT, 2, LEN_AND_LIT("Could not exec Helper Process"), save_errno); repl_errno = EREPL_UPDSTART_EXEC; UNDERSCORE_EXIT(UPDPROC_START_ERR); } } /* Wait for helper to startup */ while (helper_pid != helper->helper_pid && is_proc_alive(helper_pid, 0)) { SHORT_SLEEP(GTMRECV_WAIT_FOR_SRV_START); UNIX_ONLY(WAITPID(helper_pid, &status, WNOHANG, waitpid_res);) /* Release defunct helper process if dead */ } /* The helper has now gone far enough in the initialization, or died before initialization. Consider startup completed. */ repl_log(gtmrecv_log_fp, TRUE, TRUE, "Helper %s started. PID %d [0x%X]\n", (UPD_HELPER_READER == helper_type) ? "reader" : "writer", helper_pid, helper_pid); return UPDPROC_STARTED; }
int gtmsource() { int status, log_init_status, waitpid_res, save_errno; char print_msg[1024], tmpmsg[1024]; gd_region *reg, *region_top; sgmnt_addrs *csa, *repl_csa; boolean_t all_files_open, isalive; pid_t pid, ppid, procgp; seq_num read_jnl_seqno, jnl_seqno; unix_db_info *udi; gtmsource_local_ptr_t gtmsource_local; boolean_t this_side_std_null_coll; int null_fd, rc; memset((uchar_ptr_t)&jnlpool, 0, SIZEOF(jnlpool_addrs)); call_on_signal = gtmsource_sigstop; ESTABLISH_RET(gtmsource_ch, SS_NORMAL); if (-1 == gtmsource_get_opt()) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_MUPCLIERR); if (gtmsource_options.shut_down) { /* Wait till shutdown time nears even before going to "jnlpool_init". This is because the latter will return * with the ftok semaphore and access semaphore held and we do not want to be holding those locks (while * waiting for the user specified timeout to expire) as that will affect new GTM processes and/or other * MUPIP REPLIC commands that need these locks for their function. */ if (0 < gtmsource_options.shutdown_time) { repl_log(stdout, TRUE, TRUE, "Waiting for %d seconds before signalling shutdown\n", gtmsource_options.shutdown_time); LONG_SLEEP(gtmsource_options.shutdown_time); } else repl_log(stdout, TRUE, TRUE, "Signalling shutdown immediate\n"); } else if (gtmsource_options.start) { repl_log(stdout, TRUE, TRUE, "Initiating START of source server for secondary instance [%s]\n", gtmsource_options.secondary_instname); } if (gtmsource_options.activate && (ROOTPRIMARY_SPECIFIED == gtmsource_options.rootprimary)) { /* MUPIP REPLIC -SOURCE -ACTIVATE -UPDOK has been specified. We need to open the gld and db regions now * in case this is a secondary -> primary transition. This is so we can later switch journal files in all * journaled regions when the transition actually happens inside "gtmsource_rootprimary_init". But since * we have not yet done a "jnlpool_init", we dont know if updates are disabled in it or not. Although we * need to do the gld/db open only if updates are currently disabled in the jnlpool, we do this always * because once we do a jnlpool_init, we will come back with the ftok on the jnlpool held and that has * issues with later db open since we will try to hold the db ftok as part of db open and the ftok logic * currently has assumptions that a process holds only one ftok at any point in time. */ assert(NULL == gd_header); gvinit(); all_files_open = region_init(FALSE); if (!all_files_open) { gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_NOTALLDBOPN); gtmsource_exit(ABNORMAL_SHUTDOWN); } } jnlpool_init(GTMSOURCE, gtmsource_options.start, &is_jnlpool_creator); /* is_jnlpool_creator == TRUE ==> this process created the journal pool * is_jnlpool_creator == FALSE ==> journal pool already existed and this process simply attached to it. */ if (gtmsource_options.shut_down) gtmsource_exit(gtmsource_shutdown(FALSE, NORMAL_SHUTDOWN) - NORMAL_SHUTDOWN); else if (gtmsource_options.activate) gtmsource_exit(gtmsource_mode_change(GTMSOURCE_MODE_ACTIVE_REQUESTED) - NORMAL_SHUTDOWN); else if (gtmsource_options.deactivate) gtmsource_exit(gtmsource_mode_change(GTMSOURCE_MODE_PASSIVE_REQUESTED) - NORMAL_SHUTDOWN); else if (gtmsource_options.checkhealth) gtmsource_exit(gtmsource_checkhealth() - NORMAL_SHUTDOWN); else if (gtmsource_options.changelog) gtmsource_exit(gtmsource_changelog() - NORMAL_SHUTDOWN); else if (gtmsource_options.showbacklog) gtmsource_exit(gtmsource_showbacklog() - NORMAL_SHUTDOWN); else if (gtmsource_options.stopsourcefilter) gtmsource_exit(gtmsource_stopfilter() - NORMAL_SHUTDOWN); else if (gtmsource_options.jnlpool) gtmsource_exit(gtmsource_jnlpool() - NORMAL_SHUTDOWN); else if (gtmsource_options.losttncomplete) gtmsource_exit(gtmsource_losttncomplete() - NORMAL_SHUTDOWN); else if (gtmsource_options.needrestart) gtmsource_exit(gtmsource_needrestart() - NORMAL_SHUTDOWN); else if (gtmsource_options.showfreeze) gtmsource_exit(gtmsource_showfreeze() - NORMAL_SHUTDOWN); else if (gtmsource_options.setfreeze) gtmsource_exit(gtmsource_setfreeze() - NORMAL_SHUTDOWN); else if (!gtmsource_options.start) { assert(CLI_PRESENT == cli_present("STATSLOG")); gtmsource_exit(gtmsource_statslog() - NORMAL_SHUTDOWN); } assert(gtmsource_options.start); # ifndef REPL_DEBUG_NOBACKGROUND /* Set "child_server_running" to FALSE before forking off child. Wait for it to be set to TRUE by the child. */ gtmsource_local = jnlpool.gtmsource_local; gtmsource_local->child_server_running = FALSE; FORK(pid); if (0 > pid) { save_errno = errno; rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_JNLPOOLSETUP, 0, ERR_TEXT, 2, RTS_ERROR_LITERAL("Could not fork source server"), save_errno); } else if (0 < pid) { /* Parent. Wait until child sets "child_server_running" to FALSE. That is an indication that the child * source server has completed its initialization phase and is all set so the parent command can return. */ while (isalive = is_proc_alive(pid, 0)) /* note : intended assignment */ { if (gtmsource_local->child_server_running) break; /* To take care of reassignment of PIDs, the while condition should be && with the condition * (PPID of pid == process_id) */ SHORT_SLEEP(GTMSOURCE_WAIT_FOR_SRV_START); WAITPID(pid, &status, WNOHANG, waitpid_res); /* Release defunct child if dead */ } if (isalive) { /* Child process is alive and started with no issues */ if (0 != (save_errno = rel_sem(SOURCE, JNL_POOL_ACCESS_SEM))) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_JNLPOOLSETUP, 0, ERR_TEXT, 2, RTS_ERROR_LITERAL("Error in rel_sem"), save_errno); ftok_sem_release(jnlpool.jnlpool_dummy_reg, TRUE, TRUE); } else { /* Child source server process errored out at startup and is no longer alive. * If we were the one who created the journal pool, let us clean it up. */ repl_log(stdout, TRUE, TRUE, "Source server startup failed. See source server log file\n"); if (is_jnlpool_creator) status = gtmsource_shutdown(TRUE, NORMAL_SHUTDOWN); } /* If the parent is killed (or crashes) between the fork and exit, checkhealth may not detect that startup * is in progress - parent forks and dies, the system will release sem 0 and 1, checkhealth might test the * value of sem 1 before the child grabs sem 1. */ gtmsource_exit(isalive ? SRV_ALIVE : SRV_ERR); } /* Point stdin to /dev/null */ OPENFILE("/dev/null", O_RDONLY, null_fd); if (0 > null_fd) rts_error_csa(CSA_ARG(NULL) ERR_REPLERR, RTS_ERROR_LITERAL("Failed to open /dev/null for read"), errno, 0); FCNTL3(null_fd, F_DUPFD, 0, rc); if (0 > rc) rts_error_csa(CSA_ARG(NULL) ERR_REPLERR, RTS_ERROR_LITERAL("Failed to set stdin to /dev/null"), errno, 0); CLOSEFILE(null_fd, rc); if (0 > rc) rts_error_csa(CSA_ARG(NULL) ERR_REPLERR, RTS_ERROR_LITERAL("Failed to close /dev/null"), errno, 0); /* The parent process (source server startup command) will be holding the ftok semaphore and jnlpool access semaphore * at this point. The variables that indicate this would have been copied over to the child during the fork. This will * make the child think it is actually holding them as well when actually it is not. Reset those variables in the child * to ensure they do not misrepresent the holder of those semaphores. */ ftok_sem_reg = NULL; udi = FILE_INFO(jnlpool.jnlpool_dummy_reg); assert(udi->grabbed_ftok_sem); udi->grabbed_ftok_sem = FALSE; assert(holds_sem[SOURCE][JNL_POOL_ACCESS_SEM]); holds_sem[SOURCE][JNL_POOL_ACCESS_SEM] = FALSE; assert(!holds_sem[SOURCE][SRC_SERV_COUNT_SEM]); /* Start child source server initialization */ is_src_server = TRUE; OPERATOR_LOG_MSG; process_id = getpid(); /* Reinvoke secshr related initialization with the child's pid */ INVOKE_INIT_SECSHR_ADDRS; /* Initialize mutex socket, memory semaphore etc. before any "grab_lock" is done by this process on the journal pool. * Note that the initialization would already have been done by the parent receiver startup command but we need to * redo the initialization with the child process id. */ assert(mutex_per_process_init_pid && (mutex_per_process_init_pid != process_id)); mutex_per_process_init(); START_HEARTBEAT_IF_NEEDED; ppid = getppid(); log_init_status = repl_log_init(REPL_GENERAL_LOG, >msource_log_fd, gtmsource_options.log_file); assert(SS_NORMAL == log_init_status); repl_log_fd2fp(>msource_log_fp, gtmsource_log_fd); if (-1 == (procgp = setsid())) send_msg_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_JNLPOOLSETUP, 0, ERR_TEXT, 2, RTS_ERROR_LITERAL("Source server error in setsid"), errno); # endif /* REPL_DEBUG_NOBACKGROUND */ if (ZLIB_CMPLVL_NONE != gtm_zlib_cmp_level) gtm_zlib_init(); /* Open zlib shared library for compression/decompression */ REPL_DPRINT1("Setting up regions\n"); gvinit(); /* We use the same code dse uses to open all regions but we must make sure they are all open before proceeding. */ all_files_open = region_init(FALSE); if (!all_files_open) { gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_NOTALLDBOPN); gtmsource_exit(ABNORMAL_SHUTDOWN); } /* Determine primary side null subscripts collation order */ /* Also check whether all regions have same null collation order */ this_side_std_null_coll = -1; for (reg = gd_header->regions, region_top = gd_header->regions + gd_header->n_regions; reg < region_top; reg++) { csa = &FILE_INFO(reg)->s_addrs; if (this_side_std_null_coll != csa->hdr->std_null_coll) { if (-1 == this_side_std_null_coll) this_side_std_null_coll = csa->hdr->std_null_coll; else { gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_NULLCOLLDIFF); gtmsource_exit(ABNORMAL_SHUTDOWN); } } if (!REPL_ALLOWED(csa) && JNL_ALLOWED(csa)) { gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_REPLOFFJNLON, 2, DB_LEN_STR(reg)); gtmsource_exit(ABNORMAL_SHUTDOWN); } if (reg->read_only && REPL_ALLOWED(csa)) { gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_JNLPOOLSETUP, 0, ERR_TEXT, 2, RTS_ERROR_LITERAL("Source Server does not have write permissions to one or " "more database files that are replicated")); gtmsource_exit(ABNORMAL_SHUTDOWN); } } /* Initialize source server alive/dead state related fields in "gtmsource_local" before the ftok semaphore is released */ gtmsource_local->gtmsource_pid = process_id; gtmsource_local->gtmsource_state = GTMSOURCE_START; if (is_jnlpool_creator) { DEBUG_ONLY(jnlpool.jnlpool_ctl->jnlpool_creator_pid = process_id); gtmsource_seqno_init(this_side_std_null_coll); if (ROOTPRIMARY_SPECIFIED == gtmsource_options.rootprimary) { /* Created the journal pool as a root primary. Append a history record to the replication instance file. * Invoke the function "gtmsource_rootprimary_init" to do that. */ gtmsource_rootprimary_init(jnlpool.jnlpool_ctl->jnl_seqno); } } /* after this point we can no longer have the case where all the regions are unreplicated/non-journaled. */ # ifndef REPL_DEBUG_NOBACKGROUND /* It is necessary for every process that is using the ftok semaphore to increment the counter by 1. This is used * by the last process that shuts down to delete the ftok semaphore when it notices the counter to be 0. * Note that the parent source server startup command would have done an increment of the ftok counter semaphore * for the replication instance file. But the source server process (the child) that comes here would not have done * that. Do that while the parent is still holding on to the ftok semaphore waiting for our okay. */ if (!ftok_sem_incrcnt(jnlpool.jnlpool_dummy_reg)) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_JNLPOOLSETUP); /* Increment the source server count semaphore */ status = incr_sem(SOURCE, SRC_SERV_COUNT_SEM); if (0 != status) { save_errno = errno; rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_JNLPOOLSETUP, 0, ERR_TEXT, 2, RTS_ERROR_LITERAL("Counter semaphore increment failure in child source server"), save_errno); } # else if (0 != (save_errno = rel_sem_immediate(SOURCE, JNL_POOL_ACCESS_SEM))) { rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_JNLPOOLSETUP, 0, ERR_TEXT, 2, RTS_ERROR_LITERAL("Error in rel_sem_immediate"), save_errno); } # endif /* REPL_DEBUG_NOBACKGROUND */ gtmsource_srv_count++; gtmsource_local->child_server_running = TRUE; /* At this point, the parent startup command will stop waiting for child */ gtm_event_log_init(); /* Log source server startup command line first */ SPRINTF(tmpmsg, "%s %s\n", cli_lex_in_ptr->argv[0], cli_lex_in_ptr->in_str); repl_log(gtmsource_log_fp, TRUE, TRUE, tmpmsg); SPRINTF(tmpmsg, "GTM Replication Source Server with Pid [%d] started for Secondary Instance [%s]", process_id, gtmsource_local->secondary_instname); sgtm_putmsg(print_msg, VARLSTCNT(4) ERR_REPLINFO, 2, LEN_AND_STR(tmpmsg)); repl_log(gtmsource_log_fp, TRUE, TRUE, print_msg); if (is_jnlpool_creator) { repl_log(gtmsource_log_fp, TRUE, TRUE, "Created jnlpool with shmid = [%d] and semid = [%d]\n", jnlpool.repl_inst_filehdr->jnlpool_shmid, jnlpool.repl_inst_filehdr->jnlpool_semid); } else repl_log(gtmsource_log_fp, TRUE, TRUE, "Attached to existing jnlpool with shmid = [%d] and semid = [%d]\n", jnlpool.repl_inst_filehdr->jnlpool_shmid, jnlpool.repl_inst_filehdr->jnlpool_semid); gtm_event_log(GTM_EVENT_LOG_ARGC, "MUPIP", "REPLINFO", print_msg); # ifdef GTM_TLS if (REPL_TLS_REQUESTED) { repl_do_tls_init(gtmsource_log_fp); assert(REPL_TLS_REQUESTED || PLAINTEXT_FALLBACK); } # endif if (jnlpool.jnlpool_ctl->freeze) { last_seen_freeze_flag = jnlpool.jnlpool_ctl->freeze; sgtm_putmsg(print_msg, VARLSTCNT(3) ERR_REPLINSTFROZEN, 1, jnlpool.repl_inst_filehdr->inst_info.this_instname); repl_log(gtmsource_log_fp, TRUE, FALSE, print_msg); sgtm_putmsg(print_msg, VARLSTCNT(3) ERR_REPLINSTFREEZECOMMENT, 1, jnlpool.jnlpool_ctl->freeze_comment); repl_log(gtmsource_log_fp, TRUE, TRUE, print_msg); } gtmsource_local->jnlfileonly = gtmsource_options.jnlfileonly; do { /* If mode is passive, go to sleep. Wakeup every now and then and check to see if I have to become active. */ gtmsource_state = gtmsource_local->gtmsource_state = GTMSOURCE_START; if ((gtmsource_local->mode == GTMSOURCE_MODE_PASSIVE) && (gtmsource_local->shutdown == NO_SHUTDOWN)) { gtmsource_poll_actions(FALSE); SHORT_SLEEP(GTMSOURCE_WAIT_FOR_MODE_CHANGE); continue; } if (GTMSOURCE_MODE_PASSIVE == gtmsource_local->mode) { /* Shutdown initiated */ assert(gtmsource_local->shutdown == SHUTDOWN); sgtm_putmsg(print_msg, VARLSTCNT(4) ERR_REPLINFO, 2, RTS_ERROR_LITERAL("GTM Replication Source Server Shutdown signalled")); repl_log(gtmsource_log_fp, TRUE, TRUE, print_msg); gtm_event_log(GTM_EVENT_LOG_ARGC, "MUPIP", "REPLINFO", print_msg); break; } gtmsource_poll_actions(FALSE); if (GTMSOURCE_CHANGING_MODE == gtmsource_state) continue; if (GTMSOURCE_MODE_ACTIVE_REQUESTED == gtmsource_local->mode) gtmsource_local->mode = GTMSOURCE_MODE_ACTIVE; SPRINTF(tmpmsg, "GTM Replication Source Server now in ACTIVE mode using port %d", gtmsource_local->secondary_port); sgtm_putmsg(print_msg, VARLSTCNT(4) ERR_REPLINFO, 2, LEN_AND_STR(tmpmsg)); repl_log(gtmsource_log_fp, TRUE, TRUE, print_msg); gtm_event_log(GTM_EVENT_LOG_ARGC, "MUPIP", "REPLINFO", print_msg); DEBUG_ONLY(repl_csa = &FILE_INFO(jnlpool.jnlpool_dummy_reg)->s_addrs;) assert(!repl_csa->hold_onto_crit); /* so it is ok to invoke "grab_lock" and "rel_lock" unconditionally */ grab_lock(jnlpool.jnlpool_dummy_reg, TRUE, HANDLE_CONCUR_ONLINE_ROLLBACK); if (GTMSOURCE_HANDLE_ONLN_RLBK == gtmsource_state) { repl_log(gtmsource_log_fp, TRUE, TRUE, "Starting afresh due to ONLINE ROLLBACK\n"); repl_log(gtmsource_log_fp, TRUE, TRUE, "REPL INFO - Current Jnlpool Seqno : %llu\n", jnlpool.jnlpool_ctl->jnl_seqno); continue; } QWASSIGN(gtmsource_local->read_addr, jnlpool.jnlpool_ctl->write_addr); gtmsource_local->read = jnlpool.jnlpool_ctl->write; gtmsource_local->read_state = gtmsource_local->jnlfileonly ? READ_FILE : READ_POOL; read_jnl_seqno = gtmsource_local->read_jnl_seqno; assert(read_jnl_seqno <= jnlpool.jnlpool_ctl->jnl_seqno); if (read_jnl_seqno < jnlpool.jnlpool_ctl->jnl_seqno) { gtmsource_local->read_state = READ_FILE; QWASSIGN(gtmsource_save_read_jnl_seqno, jnlpool.jnlpool_ctl->jnl_seqno); gtmsource_pool2file_transition = TRUE; /* so that we read the latest gener jnl files */ } rel_lock(jnlpool.jnlpool_dummy_reg); if (SS_NORMAL != (status = gtmsource_alloc_tcombuff())) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_REPLCOMM, 0, ERR_TEXT, 2, RTS_ERROR_LITERAL("Error allocating initial tcom buffer space. Malloc error"), status); gtmsource_filter = NO_FILTER; if ('\0' != gtmsource_local->filter_cmd[0]) { if (SS_NORMAL == (status = repl_filter_init(gtmsource_local->filter_cmd))) gtmsource_filter |= EXTERNAL_FILTER; else gtmsource_exit(ABNORMAL_SHUTDOWN); } gtmsource_process(); /* gtmsource_process returns only when mode needs to be changed to PASSIVE */ assert(gtmsource_state == GTMSOURCE_CHANGING_MODE); gtmsource_ctl_close(); gtmsource_free_msgbuff(); gtmsource_free_tcombuff(); gtmsource_free_filter_buff(); gtmsource_stop_heartbeat(); if (FD_INVALID != gtmsource_sock_fd) repl_close(>msource_sock_fd); if (gtmsource_filter & EXTERNAL_FILTER) repl_stop_filter(); } while (TRUE);
int gtmsource_ipc_cleanup(boolean_t auto_shutdown, int *exit_status) { boolean_t i_am_the_last_user, attempt_ipc_cleanup; int status, detach_status, remove_status, expected_nattach; unix_db_info *udi; struct shmid_ds shm_buf; /* Attempt cleaning up the IPCs */ attempt_ipc_cleanup = TRUE; /* Wait for the Source Server to detach and takeover the semaphore */ if (!auto_shutdown && 0 > grab_sem(SOURCE, SRC_SERV_COUNT_SEM)) { repl_log(stderr, FALSE, TRUE, "Error taking control of source server count semaphore : %s. Shutdown not complete\n", REPL_SEM_ERROR); *exit_status = ABNORMAL_SHUTDOWN; attempt_ipc_cleanup = FALSE; } udi = (unix_db_info *)FILE_INFO(jnlpool.jnlpool_dummy_reg); if (!auto_shutdown || !gtmsource_srv_count) expected_nattach = 1; /* Self, or parent */ else expected_nattach = 0; /* Source server already detached */ i_am_the_last_user = (((status = shmctl(udi->shmid, IPC_STAT, &shm_buf)) == 0) && (shm_buf.shm_nattch == expected_nattach)); if (!i_am_the_last_user) { if (status < 0) repl_log(stderr, FALSE, TRUE, "Error in jnlpool shmctl : %s\n", STRERROR(ERRNO)); else repl_log(stderr, FALSE, TRUE, "Not deleting jnlpool ipcs. %d processes still attached to jnlpool\n", shm_buf.shm_nattch - expected_nattach); attempt_ipc_cleanup = FALSE; *exit_status = ABNORMAL_SHUTDOWN; } if (attempt_ipc_cleanup) { if (INVALID_SHMID != udi->shmid && (auto_shutdown || (detach_status = SHMDT(jnlpool.jnlpool_ctl)) == 0) && (remove_status = shm_rmid(udi->shmid)) == 0) { jnlpool.jnlpool_ctl = jnlpool_ctl = NULL; pool_init = FALSE; repl_log(stdout, FALSE, FALSE, "Journal pool shared memory removed\n"); if (0 == (status = remove_sem_set(SOURCE))) repl_log(stdout, FALSE, TRUE, "Journal pool semaphore removed\n"); else { repl_log(stderr, FALSE, TRUE, "Error removing jnlpool semaphore : %s\n", STRERROR(status)); *exit_status = ABNORMAL_SHUTDOWN; } } else if (INVALID_SHMID != udi->shmid) { if (!auto_shutdown && detach_status < 0) repl_log(stderr, FALSE, FALSE, "Error detaching from jnlpool shared memory : %s. Shared memory not removed\n", STRERROR(ERRNO)); else if (remove_status != 0) { if (!auto_shutdown) { jnlpool.jnlpool_ctl = NULL; /* Detached successfully */ jnlpool_ctl = NULL; pool_init = FALSE; } repl_log(stderr, FALSE, TRUE, "Error removing jnlpool shared memory : %s\n", STRERROR(ERRNO)); } *exit_status = ABNORMAL_SHUTDOWN; } } return attempt_ipc_cleanup; }
int gtmrecv_ipc_cleanup(boolean_t auto_shutdown, int *exit_status) { boolean_t i_am_the_last_user, attempt_ipc_cleanup; int status, detach_status, remove_status, expected_nattach; struct shmid_ds shm_buf; /* Attempt cleaning up the IPCs */ attempt_ipc_cleanup = TRUE; /* * Wait for the Receiver Server and Update Process to detach and * takeover the semaphores. Note that the Receiver Server has already * waited for the Update Process to detach. It is done here as a * precaution against Receiver Server crashes. */ if (!auto_shutdown) status = grab_sem(RECV, RECV_SERV_COUNT_SEM); else status = 0; if (0 == status && 0 > (status = grab_sem(RECV, UPD_PROC_COUNT_SEM))) rel_sem(RECV, RECV_SERV_COUNT_SEM); if (status < 0) { repl_log(stderr, FALSE, TRUE, "Error taking control of Receiver Server/Update Process count semaphore : %s. Shutdown not complete\n", REPL_SEM_ERROR); *exit_status = ABNORMAL_SHUTDOWN; attempt_ipc_cleanup = FALSE; } /* Now we have locked out all users from the receive pool */ if (!auto_shutdown || !gtmrecv_srv_count) expected_nattach = 1; /* Self, or parent */ else expected_nattach = 0; /* Receiver server already detached */ i_am_the_last_user = (((status = shmctl(recvpool_shmid, IPC_STAT, &shm_buf)) == 0) && (shm_buf.shm_nattch == expected_nattach)); if (!i_am_the_last_user) { if (status < 0) repl_log(stderr, FALSE, TRUE, "Error in jnlpool shmctl : %s\n", STRERROR(ERRNO)); else repl_log(stderr, FALSE, TRUE, "Not deleting receive pool ipcs. %d processes still attached to receive pool\n", shm_buf.shm_nattch - expected_nattach); attempt_ipc_cleanup = FALSE; *exit_status = ABNORMAL_SHUTDOWN; } if (attempt_ipc_cleanup) { if (INVALID_SHMID != recvpool_shmid && (auto_shutdown || (detach_status = SHMDT(recvpool.recvpool_ctl)) == 0) && (remove_status = shm_rmid(recvpool_shmid)) == 0) { recvpool.recvpool_ctl = NULL; repl_log(stdout, FALSE, FALSE, "Receive pool shared memory removed\n"); if (0 == (status = remove_sem_set(RECV))) repl_log(stdout, FALSE, TRUE, "Receive pool semaphore removed\n"); else { repl_log(stderr, FALSE, TRUE, "Error removing receive pool semaphore : %s\n", STRERROR(status)); *exit_status = ABNORMAL_SHUTDOWN; } } else if (INVALID_SHMID != recvpool_shmid) { if (!auto_shutdown && detach_status < 0) repl_log(stderr, FALSE, FALSE, "Error detaching from receive pool shared memory : %s. Shared memory not removed\n", STRERROR(ERRNO)); else if (remove_status != 0) { if (!auto_shutdown) recvpool.recvpool_ctl = NULL; /* Detached successfully */ repl_log(stderr, FALSE, TRUE, "Error removing receive pool shared memory : %s\n", STRERROR(ERRNO)); } *exit_status = ABNORMAL_SHUTDOWN; } } return attempt_ipc_cleanup; }