int gtmrecv_changelog(void) { uint4 changelog_desired = 0, changelog_accepted = 0; /* Grab the recvpool jnlpool option write lock */ if (0 > grab_sem(RECV, RECV_SERV_OPTIONS_SEM)) { util_out_print("Error grabbing recvpool option write lock. Could not initiate change log", TRUE); return (ABNORMAL_SHUTDOWN); } if (0 != recvpool.gtmrecv_local->changelog || 0 != recvpool.upd_proc_local->changelog) { util_out_print("Change log is already in progress. Not initiating change in log file or log interval", TRUE); rel_sem(RECV, RECV_SERV_OPTIONS_SEM); return (ABNORMAL_SHUTDOWN); } if ('\0' != gtmrecv_options.log_file[0]) /* trigger change in log file (for both receiver and update process) */ { changelog_desired |= REPLIC_CHANGE_LOGFILE; if (0 != strcmp(recvpool.gtmrecv_local->log_file, gtmrecv_options.log_file)) { changelog_accepted |= REPLIC_CHANGE_LOGFILE; strcpy(recvpool.gtmrecv_local->log_file, gtmrecv_options.log_file); util_out_print("Change log initiated with file !AD", TRUE, LEN_AND_STR(gtmrecv_options.log_file)); } else util_out_print("Log file is already !AD. Not initiating change in log file", TRUE, LEN_AND_STR(gtmrecv_options.log_file)); } if (0 != gtmrecv_options.rcvr_log_interval) /* trigger change in receiver log interval */ { changelog_desired |= REPLIC_CHANGE_LOGINTERVAL; if (gtmrecv_options.rcvr_log_interval != recvpool.gtmrecv_local->log_interval) { changelog_accepted |= REPLIC_CHANGE_LOGINTERVAL; recvpool.gtmrecv_local->log_interval = gtmrecv_options.rcvr_log_interval; util_out_print("Change initiated with receiver log interval !UL", TRUE, gtmrecv_options.rcvr_log_interval); } else util_out_print("Receiver log interval is already !UL. Not initiating change in log interval", TRUE, gtmrecv_options.rcvr_log_interval); } if (0 != gtmrecv_options.upd_log_interval) /* trigger change in update process log interval */ { changelog_desired |= REPLIC_CHANGE_UPD_LOGINTERVAL; if (gtmrecv_options.upd_log_interval != recvpool.upd_proc_local->log_interval) { changelog_accepted |= REPLIC_CHANGE_UPD_LOGINTERVAL; recvpool.upd_proc_local->log_interval = gtmrecv_options.upd_log_interval; util_out_print("Change initiated with update process log interval !UL", TRUE, gtmrecv_options.upd_log_interval); } else util_out_print("Update process log interval is already !UL. Not initiating change in log interval", TRUE, gtmrecv_options.upd_log_interval); } if (0 != changelog_accepted) recvpool.gtmrecv_local->changelog = changelog_accepted; else util_out_print("No change to log file or log interval", TRUE); rel_sem(RECV, RECV_SERV_OPTIONS_SEM); return ((0 != changelog_accepted && changelog_accepted == changelog_desired) ? NORMAL_SHUTDOWN : ABNORMAL_SHUTDOWN); }
int gtmsource_secnd_update(boolean_t print_message) { if (grab_sem(SOURCE, SRC_SERV_OPTIONS_SEM) < 0) { util_out_print("Error grabbing jnlpool option write lock. Could not initiate change log", TRUE); return(ABNORMAL_SHUTDOWN); } grab_lock(jnlpool.jnlpool_dummy_reg, ASSERT_NO_ONLINE_ROLLBACK); jnlpool.jnlpool_ctl->upd_disabled = update_disable; rel_lock(jnlpool.jnlpool_dummy_reg); rel_sem(SOURCE, SRC_SERV_OPTIONS_SEM); if (print_message) util_out_print("Updates are now !AZ", TRUE, update_disable ? "disabled" : "enabled"); return(NORMAL_SHUTDOWN); }
int gtmrecv_endupd(void) { pid_t savepid; int exit_status; pid_t waitpid_res; repl_log(stdout, TRUE, TRUE, "Initiating shut down of Update Process\n"); recvpool.upd_proc_local->upd_proc_shutdown = SHUTDOWN; /* Wait for update process to shut down */ while((SHUTDOWN == recvpool.upd_proc_local->upd_proc_shutdown) && (0 < (savepid = (pid_t)recvpool.upd_proc_local->upd_proc_pid)) && is_proc_alive(savepid, 0)) { SHORT_SLEEP(GTMRECV_WAIT_FOR_UPD_SHUTDOWN); WAITPID(savepid, &exit_status, WNOHANG, waitpid_res); /* Release defunct update process if dead */ } exit_status = recvpool.upd_proc_local->upd_proc_shutdown; if (SHUTDOWN == exit_status) { if (0 == savepid) /* No Update Process */ exit_status = NORMAL_SHUTDOWN; else /* Update Process Crashed */ { repl_log(stderr, TRUE, TRUE, "Update Process exited abnormally, INTEGRITY CHECK might be warranted\n"); exit_status = ABNORMAL_SHUTDOWN; } } /* Wait for the Update Process to detach */ if (0 == grab_sem(RECV, UPD_PROC_COUNT_SEM)) { if(0 != (errno = rel_sem(RECV, UPD_PROC_COUNT_SEM))) repl_log(stderr, TRUE, TRUE, "Error releasing the Update Process Count semaphore : %s\n", REPL_SEM_ERROR); repl_log(stdout, TRUE, TRUE, "Update Process exited\n"); } else { repl_log(stderr, TRUE, TRUE, "Error in update proc count semaphore : %s\n", REPL_SEM_ERROR); exit_status = ABNORMAL_SHUTDOWN; } return (exit_status); }
int gtmsource_losttncomplete(void) { int idx; gtmsource_local_ptr_t gtmsourcelocal_ptr; error_def(ERR_MUPCLIERR); error_def(ERR_TEXT); assert(holds_sem[SOURCE][JNL_POOL_ACCESS_SEM]); /* We dont need the access control semaphore here. So release it first and avoid any potential deadlocks. */ if (0 != rel_sem(SOURCE, JNL_POOL_ACCESS_SEM)) rts_error(VARLSTCNT(5) ERR_TEXT, 2, RTS_ERROR_LITERAL("Error in source server losttncomplete rel_sem"), REPL_SEM_ERRNO); assert(NULL == jnlpool.gtmsource_local); repl_log(stderr, TRUE, TRUE, "Initiating LOSTTNCOMPLETE operation on instance [%s]\n", jnlpool.repl_inst_filehdr->this_instname); /* If this is a root primary instance, propagate this information to secondaries as well so they reset zqgblmod_seqno to 0. * If propagating primary, no need to send this to tertiaries as the receiver on the tertiary cannot have started with * non-zero "zqgblmod_seqno" to begin with (PRIMARYNOTROOT error would have been issued). */ if (!jnlpool.jnlpool_ctl->upd_disabled) { grab_lock(jnlpool.jnlpool_dummy_reg); jnlpool.jnlpool_ctl->send_losttn_complete = TRUE; gtmsourcelocal_ptr = jnlpool.gtmsource_local_array; for (idx = 0; idx < NUM_GTMSRC_LCL; idx++, gtmsourcelocal_ptr++) { if (('\0' == gtmsourcelocal_ptr->secondary_instname[0]) && (0 == gtmsourcelocal_ptr->read_jnl_seqno) && (0 == gtmsourcelocal_ptr->connect_jnl_seqno)) continue; gtmsourcelocal_ptr->send_losttn_complete = TRUE; } rel_lock(jnlpool.jnlpool_dummy_reg); } /* Reset zqgblmod_seqno and zqgblmod_tn to 0 in this instance as well */ repl_inst_reset_zqgblmod_seqno_and_tn(); return (NORMAL_SHUTDOWN); }
/* * This will rundown a replication instance journal (and receiver) pool. * Input Parameter: * replpool_id of the instance. Instance file name must be null terminated in replpool_id. * Returns : * TRUE, if successful. * FALSE, otherwise. */ boolean_t mu_rndwn_repl_instance(replpool_identifier *replpool_id, boolean_t immediate, boolean_t rndwn_both_pools, boolean_t *jnlpool_sem_created) { boolean_t jnlpool_stat = SS_NORMAL, recvpool_stat = SS_NORMAL, decr_cnt, sem_created = FALSE, ipc_rmvd; char *instfilename; unsigned char ipcs_buff[MAX_IPCS_ID_BUF], *ipcs_ptr; gd_region *r_save; repl_inst_hdr repl_instance; static gd_region *reg = NULL; struct semid_ds semstat; struct shmid_ds shmstat; unix_db_info *udi; int save_errno, sem_id, shm_id, status; sgmnt_addrs *repl_csa; boolean_t was_crit; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; if (NULL == reg) { r_save = gv_cur_region; mu_gv_cur_reg_init(); reg = gv_cur_region; gv_cur_region = r_save; } *jnlpool_sem_created = FALSE; /* Assert that the layout of replpool_identifier is identical for all versions going forward as the function * "validate_replpool_shm_entry" (used by the argumentless mupip rundown aka "mupip rundown") relies on this. * This assert is placed here (instead of there) because the automated tests exercise this logic much more * than the argumentless code. If any of these asserts fail, "validate_replpool_shm_entry" needs to change * to handle the old and new layouts. * * Structure ----> replpool_identifier <---- size 312 [0x0138] * * offset = 0000 [0x0000] size = 0012 [0x000c] ----> replpool_identifier.label * offset = 0012 [0x000c] size = 0001 [0x0001] ----> replpool_identifier.pool_type * offset = 0013 [0x000d] size = 0036 [0x0024] ----> replpool_identifier.now_running * offset = 0052 [0x0034] size = 0004 [0x0004] ----> replpool_identifier.repl_pool_key_filler * offset = 0056 [0x0038] size = 0256 [0x0100] ----> replpool_identifier.instfilename */ assert(0 == OFFSETOF(replpool_identifier, label[0])); assert(12 == SIZEOF(((replpool_identifier *)NULL)->label)); assert(12 == OFFSETOF(replpool_identifier, pool_type)); assert(1 == SIZEOF(((replpool_identifier *)NULL)->pool_type)); assert(13 == OFFSETOF(replpool_identifier, now_running[0])); assert(36 == SIZEOF(((replpool_identifier *)NULL)->now_running)); assert(56 == OFFSETOF(replpool_identifier, instfilename[0])); assert(256 == SIZEOF(((replpool_identifier *)NULL)->instfilename)); /* End asserts */ jnlpool.jnlpool_dummy_reg = reg; recvpool.recvpool_dummy_reg = reg; instfilename = replpool_id->instfilename; reg->dyn.addr->fname_len = strlen(instfilename); assert(0 == instfilename[reg->dyn.addr->fname_len]); memcpy((char *)reg->dyn.addr->fname, instfilename, reg->dyn.addr->fname_len + 1); udi = FILE_INFO(reg); udi->fn = (char *)reg->dyn.addr->fname; /* Lock replication instance using ftok semaphore so that no other replication process can startup until we are done with * rundown */ if (!ftok_sem_get(reg, TRUE, REPLPOOL_ID, immediate)) return FALSE; ESTABLISH_RET(mu_rndwn_repl_instance_ch, FALSE); repl_inst_read(instfilename, (off_t)0, (sm_uc_ptr_t)&repl_instance, SIZEOF(repl_inst_hdr)); assert(rndwn_both_pools || JNLPOOL_SEGMENT == replpool_id->pool_type || RECVPOOL_SEGMENT == replpool_id->pool_type); if (rndwn_both_pools || (JNLPOOL_SEGMENT == replpool_id->pool_type)) { /* -------------------------- * First rundown Journal pool * -------------------------- */ shm_id = repl_instance.jnlpool_shmid; if (SS_NORMAL == (jnlpool_stat = mu_replpool_grab_sem(&repl_instance, JNLPOOL_SEGMENT, &sem_created, immediate))) { /* Got JNL_POOL_ACCESS_SEM and incremented SRC_SRV_COUNT_SEM */ assert(holds_sem[SOURCE][JNL_POOL_ACCESS_SEM]); assert(holds_sem[SOURCE][SRC_SERV_COUNT_SEM]); sem_id = repl_instance.jnlpool_semid; if ((INVALID_SHMID == shm_id) || (-1 == shmctl(shm_id, IPC_STAT, &shmstat)) || (shmstat.shm_ctime != repl_instance.jnlpool_shmid_ctime)) { repl_instance.jnlpool_shmid = shm_id = INVALID_SHMID; repl_instance.jnlpool_shmid_ctime = 0; } assert((INVALID_SHMID != shm_id) || ((NULL == jnlpool.jnlpool_ctl) && (NULL == jnlpool_ctl))); ipc_rmvd = TRUE; if (INVALID_SHMID != shm_id) { replpool_id->pool_type = JNLPOOL_SEGMENT; jnlpool_stat = mu_rndwn_replpool(replpool_id, &repl_instance, shm_id, &ipc_rmvd); ipcs_ptr = i2asc((uchar_ptr_t)ipcs_buff, shm_id); *ipcs_ptr = '\0'; if (rndwn_both_pools && ((SS_NORMAL != jnlpool_stat) || ipc_rmvd)) gtm_putmsg(VARLSTCNT(6) (jnlpool_stat ? ERR_MUJPOOLRNDWNFL : ERR_MUJPOOLRNDWNSUC), 4, LEN_AND_STR(ipcs_buff), LEN_AND_STR(instfilename)); } assert(ipc_rmvd || (NULL != jnlpool_ctl)); assert((NULL == jnlpool.jnlpool_ctl) || (SS_NORMAL == jnlpool_stat) || jgbl.onlnrlbk); assert((INVALID_SHMID != repl_instance.jnlpool_shmid) || (0 == repl_instance.jnlpool_shmid_ctime)); assert((INVALID_SHMID == repl_instance.jnlpool_shmid) || (0 != repl_instance.jnlpool_shmid_ctime)); assert(INVALID_SEMID != sem_id); if (!mur_options.rollback) { /* Invoked by MUPIP RUNDOWN in which case the semaphores needs to be removed. But, remove the * semaphore ONLY if we created it here OR the journal pool was successfully removed. */ if (NULL == jnlpool_ctl) { if (((sem_created || (SS_NORMAL == jnlpool_stat)) && (SS_NORMAL == mu_replpool_release_sem(&repl_instance, JNLPOOL_SEGMENT, TRUE)))) { /* Now that semaphores are removed, reset fields in file header */ if (!sem_created) { /* If sem_id was created by mu_replpool_grab_sem then do NOT report the * MURPOOLRNDWNSUC message as it indicates that the semaphore was orphaned * and we removed it when in fact there was no orphaned semaphore and we * created it as part of mu_replpool_grab_sem to get standalone access to * rundown the receiver pool (which may or may not exist) */ ipcs_ptr = i2asc((uchar_ptr_t)ipcs_buff, sem_id); *ipcs_ptr = '\0'; gtm_putmsg(VARLSTCNT(9) ERR_MUJPOOLRNDWNSUC, 4, LEN_AND_STR(ipcs_buff), LEN_AND_STR(instfilename), ERR_SEMREMOVED, 1, sem_id); } repl_inst_jnlpool_reset(); } } else { /* Anticipatory Freeze scheme is turned ON. So, release just the JNL_POOL_ACCESS_SEM. The * semaphore will be released/removed in the caller (mupip_rundown) */ assert(ANTICIPATORY_FREEZE_AVAILABLE); assertpro(SS_NORMAL == (status = rel_sem(SOURCE, JNL_POOL_ACCESS_SEM))); assert(!holds_sem[SOURCE][JNL_POOL_ACCESS_SEM]); /* Since we are not resetting the semaphore IDs in the file header, we need to write out * the semaphore IDs in the instance file (if we created them). */ if (sem_created) repl_inst_write(instfilename, (off_t)0, (sm_uc_ptr_t)&repl_instance, SIZEOF(repl_inst_hdr)); } /* If semaphore is not created and the journal pool rundown failed (due to attached processes), * rundown process continues to holds the journal pool access control semaphore. This way, we hold * the semaphore on behalf of the source server (now no longer alive) to prevent mu_rndwn_sem_all * (invoked later) from cleaning up this orphaned semaphore (which causes REPLREQROLLBACK if the * source server is restarted). But, since the semaphore is not released (until the rundown process * dies), holds_sem[SOURCE][JNL_POOL_ACCESS_SEM] continues to remain TRUE. This causes asserts in * ftok_sem_get if mu_rndwn_repl_instance is invoked for a different journal/receive pool. To * workaround it, set holds_sem[SOURCE][JNL_POOL_ACCESS_SEM] to FALSE. This is an interim solution * until we record such semaphores in an ignore-list (or some such) and change mu_rndwn_sem_all to * skip the ones that are present in the ignore list. */ holds_sem[SOURCE][JNL_POOL_ACCESS_SEM] = FALSE; } } else if (rndwn_both_pools && (INVALID_SHMID != shm_id)) { ipcs_ptr = i2asc((uchar_ptr_t)ipcs_buff, shm_id); *ipcs_ptr = '\0'; if (rndwn_both_pools) gtm_putmsg(VARLSTCNT(6) ERR_MUJPOOLRNDWNFL, 4, LEN_AND_STR(ipcs_buff), LEN_AND_STR(instfilename)); } *jnlpool_sem_created = sem_created; } if (((SS_NORMAL == jnlpool_stat) || !jgbl.mur_rollback) && (rndwn_both_pools || (RECVPOOL_SEGMENT == replpool_id->pool_type))) { /* -------------------------- * Now rundown Receivpool * -------------------------- * Note: RECVPOOL is rundown ONLY if the JNLPOOL rundown was successful. This way, we don't end up * creating new semaphores for the RECVPOOL if ROLLBACK is not going to start anyways because of the failed * JNLPOOL rundown. The only exception is MUPIP RUNDOWN command in which case we try running down the * RECVPOOL even if the JNLPOOL rundown failed. */ shm_id = repl_instance.recvpool_shmid; if (SS_NORMAL == (recvpool_stat = mu_replpool_grab_sem(&repl_instance, RECVPOOL_SEGMENT, &sem_created, immediate))) { sem_id = repl_instance.recvpool_semid; if ((INVALID_SHMID == shm_id) || (-1 == shmctl(shm_id, IPC_STAT, &shmstat)) || (shmstat.shm_ctime != repl_instance.recvpool_shmid_ctime)) { repl_instance.recvpool_shmid = shm_id = INVALID_SHMID; repl_instance.recvpool_shmid_ctime = 0; } ipc_rmvd = TRUE; if (INVALID_SHMID != shm_id) { replpool_id->pool_type = RECVPOOL_SEGMENT; recvpool_stat = mu_rndwn_replpool(replpool_id, &repl_instance, shm_id, &ipc_rmvd); ipcs_ptr = i2asc((uchar_ptr_t)ipcs_buff, shm_id); *ipcs_ptr = '\0'; if (rndwn_both_pools && ((SS_NORMAL != recvpool_stat) || ipc_rmvd)) gtm_putmsg(VARLSTCNT(6) (recvpool_stat ? ERR_MURPOOLRNDWNFL : ERR_MURPOOLRNDWNSUC), 4, LEN_AND_STR(ipcs_buff), LEN_AND_STR(instfilename)); } assert((TRUE == ipc_rmvd) || (SS_NORMAL != recvpool_stat) || jgbl.onlnrlbk); assert((INVALID_SHMID != repl_instance.recvpool_shmid) || (0 == repl_instance.recvpool_shmid_ctime)); assert((INVALID_SHMID == repl_instance.recvpool_shmid) || (0 != repl_instance.recvpool_shmid_ctime)); assert(INVALID_SEMID != sem_id); if (!mur_options.rollback) { /* Invoked by MUPIP RUNDOWN in which case the semaphores needs to be removed. But, remove the * semaphore ONLY if we created it here OR the receive pool was successfully removed. */ if ((sem_created || (SS_NORMAL == recvpool_stat)) && (SS_NORMAL == mu_replpool_release_sem(&repl_instance, RECVPOOL_SEGMENT, TRUE))) { /* Now that semaphores are removed, reset fields in file header */ if (!sem_created) { /* if sem_id was "created" by mu_replpool_grab_sem then do NOT report the * MURPOOLRNDWNSUC message as it indicates that the semaphore was orphaned and we * removed it when in fact there was no orphaned semaphore and we "created" it as * part of mu_replpool_grab_sem to get standalone access to rundown the receiver * pool (which may or may not exist) */ ipcs_ptr = i2asc((uchar_ptr_t)ipcs_buff, sem_id); *ipcs_ptr = '\0'; gtm_putmsg(VARLSTCNT(9) ERR_MURPOOLRNDWNSUC, 4, LEN_AND_STR(ipcs_buff), LEN_AND_STR(instfilename), ERR_SEMREMOVED, 1, sem_id); } if (NULL != jnlpool_ctl) { /* Journal pool is not yet removed. So, grab lock before resetting semid/shmid * fields in the file header as the function expects the caller to hold crit * if the journal pool is available */ repl_csa = &FILE_INFO(jnlpool.jnlpool_dummy_reg)->s_addrs; assert(!repl_csa->now_crit); assert(!repl_csa->hold_onto_crit); was_crit = repl_csa->now_crit; /* Since we do grab_lock, below, we need to do a per-process initialization. Also, * start heartbeat so that grab_lock can issue MUTEXLCKALERT and get C-stacks if * waiting for crit */ START_HEARTBEAT_IF_NEEDED; mutex_per_process_init(); if (!was_crit) grab_lock(jnlpool.jnlpool_dummy_reg, TRUE, GRAB_LOCK_ONLY); } repl_inst_recvpool_reset(); if ((NULL != jnlpool_ctl) && !was_crit) rel_lock(jnlpool.jnlpool_dummy_reg); } /* If semaphore is not created and the receive pool rundown failed (due to attached processes), * rundown process continues to holds the receive pool access control semaphore. This way, we hold * the semaphore on behalf of the receiver server (now no longer alive) to prevent mu_rndwn_sem_all * (invoked later) from cleaning up this orphaned semaphore (which causes REPLREQROLLBACK if the * receiver is restarted). But, since the semaphore is not released (until the rundown process * dies), holds_sem[RECV][RECV_POOL_ACCESS_SEM] continues to remain TRUE. This causes asserts in * ftok_sem_get if mu_rndwn_repl_instance is invoked for a different journal/receive pool. To * workaround it, set holds_sem[SOURCE][RECV_POOL_ACCESS_SEM] to FALSE. This is an interim solution * until we record such semaphores in an ignore-list (or some such) and change mu_rndwn_sem_all to * skip the ones that are present in the ignore list. */ assert((sem_created || (SS_NORMAL == recvpool_stat)) || holds_sem[RECV][RECV_POOL_ACCESS_SEM]); DEBUG_ONLY(set_sem_set_recvr(sem_id)); } } else if (rndwn_both_pools && (INVALID_SHMID != shm_id)) { ipcs_ptr = i2asc((uchar_ptr_t)ipcs_buff, shm_id); *ipcs_ptr = '\0'; if (rndwn_both_pools) gtm_putmsg(VARLSTCNT(6) ERR_MURPOOLRNDWNFL, 4, LEN_AND_STR(ipcs_buff), LEN_AND_STR(instfilename)); } } assert(jgbl.onlnrlbk || ANTICIPATORY_FREEZE_AVAILABLE || (NULL == jnlpool.repl_inst_filehdr)); if (mur_options.rollback && (SS_NORMAL == jnlpool_stat) && (SS_NORMAL == recvpool_stat)) { assert(jgbl.onlnrlbk || ANTICIPATORY_FREEZE_AVAILABLE || ((INVALID_SHMID == repl_instance.jnlpool_shmid) && (INVALID_SHMID == repl_instance.recvpool_shmid))); /* Initialize jnlpool.repl_inst_filehdr as it is used later by gtmrecv_fetchresync() */ decr_cnt = FALSE; if (NULL == jnlpool.repl_inst_filehdr) { /* Possible if there is NO journal pool in the first place. In this case, malloc the structure here and * copy the file header from repl_instance structure. */ jnlpool.repl_inst_filehdr = (repl_inst_hdr_ptr_t)malloc(SIZEOF(repl_inst_hdr)); memcpy(jnlpool.repl_inst_filehdr, &repl_instance, SIZEOF(repl_inst_hdr)); } else { assert(repl_instance.jnlpool_semid == jnlpool.repl_inst_filehdr->jnlpool_semid); assert(repl_instance.jnlpool_semid_ctime == jnlpool.repl_inst_filehdr->jnlpool_semid_ctime); assert(repl_instance.jnlpool_shmid == jnlpool.repl_inst_filehdr->jnlpool_shmid); assert(repl_instance.jnlpool_shmid_ctime == jnlpool.repl_inst_filehdr->jnlpool_shmid_ctime); /* If the ONLINE ROLLBACK command is run on the primary when the source server is up and running, * jnlpool.repl_inst_filehdr->recvpool_semid will be INVALID because there is NO receiver server * running. However, ROLLBACK creates semaphores for both journal pool and receive pool and writes * it to the instance file header. Copy this information to the file header copy in the jnlpool * as well */ jnlpool.repl_inst_filehdr->recvpool_semid = repl_instance.recvpool_semid; jnlpool.repl_inst_filehdr->recvpool_semid_ctime = repl_instance.recvpool_semid_ctime; } /* Flush changes to the replication instance file header to disk */ repl_inst_write(instfilename, (off_t)0, (sm_uc_ptr_t)&repl_instance, SIZEOF(repl_inst_hdr)); } else /* for MUPIP RUNDOWN, semid fields in the file header are reset and is written in mu_replpool_release_sem() above */ decr_cnt = (NULL == jnlpool_ctl); /* for anticipatory freeze, mupip_rundown releases the semaphore */ REVERT; /* Release replication instance ftok semaphore lock */ if (!ftok_sem_release(reg, decr_cnt, immediate)) /* Do not decrement the counter if ROLLBACK */ return FALSE; return ((SS_NORMAL == jnlpool_stat) && (SS_NORMAL == recvpool_stat)); }
int decr_sem(int set_index, int sem_num) { return rel_sem(set_index, sem_num); }
int gtmrecv_ipc_cleanup(boolean_t auto_shutdown, int *exit_status) { boolean_t i_am_the_last_user, attempt_ipc_cleanup; int status, detach_status, remove_status, expected_nattach; struct shmid_ds shm_buf; /* Attempt cleaning up the IPCs */ attempt_ipc_cleanup = TRUE; /* * Wait for the Receiver Server and Update Process to detach and * takeover the semaphores. Note that the Receiver Server has already * waited for the Update Process to detach. It is done here as a * precaution against Receiver Server crashes. */ if (!auto_shutdown) status = grab_sem(RECV, RECV_SERV_COUNT_SEM); else status = 0; if (0 == status && 0 > (status = grab_sem(RECV, UPD_PROC_COUNT_SEM))) rel_sem(RECV, RECV_SERV_COUNT_SEM); if (status < 0) { repl_log(stderr, FALSE, TRUE, "Error taking control of Receiver Server/Update Process count semaphore : %s. Shutdown not complete\n", REPL_SEM_ERROR); *exit_status = ABNORMAL_SHUTDOWN; attempt_ipc_cleanup = FALSE; } /* Now we have locked out all users from the receive pool */ if (!auto_shutdown || !gtmrecv_srv_count) expected_nattach = 1; /* Self, or parent */ else expected_nattach = 0; /* Receiver server already detached */ i_am_the_last_user = (((status = shmctl(recvpool_shmid, IPC_STAT, &shm_buf)) == 0) && (shm_buf.shm_nattch == expected_nattach)); if (!i_am_the_last_user) { if (status < 0) repl_log(stderr, FALSE, TRUE, "Error in jnlpool shmctl : %s\n", STRERROR(ERRNO)); else repl_log(stderr, FALSE, TRUE, "Not deleting receive pool ipcs. %d processes still attached to receive pool\n", shm_buf.shm_nattch - expected_nattach); attempt_ipc_cleanup = FALSE; *exit_status = ABNORMAL_SHUTDOWN; } if (attempt_ipc_cleanup) { if (INVALID_SHMID != recvpool_shmid && (auto_shutdown || (detach_status = SHMDT(recvpool.recvpool_ctl)) == 0) && (remove_status = shm_rmid(recvpool_shmid)) == 0) { recvpool.recvpool_ctl = NULL; repl_log(stdout, FALSE, FALSE, "Receive pool shared memory removed\n"); if (0 == (status = remove_sem_set(RECV))) repl_log(stdout, FALSE, TRUE, "Receive pool semaphore removed\n"); else { repl_log(stderr, FALSE, TRUE, "Error removing receive pool semaphore : %s\n", STRERROR(status)); *exit_status = ABNORMAL_SHUTDOWN; } } else if (INVALID_SHMID != recvpool_shmid) { if (!auto_shutdown && detach_status < 0) repl_log(stderr, FALSE, FALSE, "Error detaching from receive pool shared memory : %s. Shared memory not removed\n", STRERROR(ERRNO)); else if (remove_status != 0) { if (!auto_shutdown) recvpool.recvpool_ctl = NULL; /* Detached successfully */ repl_log(stderr, FALSE, TRUE, "Error removing receive pool shared memory : %s\n", STRERROR(ERRNO)); } *exit_status = ABNORMAL_SHUTDOWN; } } return attempt_ipc_cleanup; }
int gtmsource() { int status, log_init_status, waitpid_res, save_errno; char print_msg[1024], tmpmsg[1024]; gd_region *reg, *region_top; sgmnt_addrs *csa, *repl_csa; boolean_t all_files_open, isalive; pid_t pid, ppid, procgp; seq_num read_jnl_seqno, jnl_seqno; unix_db_info *udi; gtmsource_local_ptr_t gtmsource_local; boolean_t this_side_std_null_coll; int null_fd, rc; memset((uchar_ptr_t)&jnlpool, 0, SIZEOF(jnlpool_addrs)); call_on_signal = gtmsource_sigstop; ESTABLISH_RET(gtmsource_ch, SS_NORMAL); if (-1 == gtmsource_get_opt()) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_MUPCLIERR); if (gtmsource_options.shut_down) { /* Wait till shutdown time nears even before going to "jnlpool_init". This is because the latter will return * with the ftok semaphore and access semaphore held and we do not want to be holding those locks (while * waiting for the user specified timeout to expire) as that will affect new GTM processes and/or other * MUPIP REPLIC commands that need these locks for their function. */ if (0 < gtmsource_options.shutdown_time) { repl_log(stdout, TRUE, TRUE, "Waiting for %d seconds before signalling shutdown\n", gtmsource_options.shutdown_time); LONG_SLEEP(gtmsource_options.shutdown_time); } else repl_log(stdout, TRUE, TRUE, "Signalling shutdown immediate\n"); } else if (gtmsource_options.start) { repl_log(stdout, TRUE, TRUE, "Initiating START of source server for secondary instance [%s]\n", gtmsource_options.secondary_instname); } if (gtmsource_options.activate && (ROOTPRIMARY_SPECIFIED == gtmsource_options.rootprimary)) { /* MUPIP REPLIC -SOURCE -ACTIVATE -UPDOK has been specified. We need to open the gld and db regions now * in case this is a secondary -> primary transition. This is so we can later switch journal files in all * journaled regions when the transition actually happens inside "gtmsource_rootprimary_init". But since * we have not yet done a "jnlpool_init", we dont know if updates are disabled in it or not. Although we * need to do the gld/db open only if updates are currently disabled in the jnlpool, we do this always * because once we do a jnlpool_init, we will come back with the ftok on the jnlpool held and that has * issues with later db open since we will try to hold the db ftok as part of db open and the ftok logic * currently has assumptions that a process holds only one ftok at any point in time. */ assert(NULL == gd_header); gvinit(); all_files_open = region_init(FALSE); if (!all_files_open) { gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_NOTALLDBOPN); gtmsource_exit(ABNORMAL_SHUTDOWN); } } jnlpool_init(GTMSOURCE, gtmsource_options.start, &is_jnlpool_creator); /* is_jnlpool_creator == TRUE ==> this process created the journal pool * is_jnlpool_creator == FALSE ==> journal pool already existed and this process simply attached to it. */ if (gtmsource_options.shut_down) gtmsource_exit(gtmsource_shutdown(FALSE, NORMAL_SHUTDOWN) - NORMAL_SHUTDOWN); else if (gtmsource_options.activate) gtmsource_exit(gtmsource_mode_change(GTMSOURCE_MODE_ACTIVE_REQUESTED) - NORMAL_SHUTDOWN); else if (gtmsource_options.deactivate) gtmsource_exit(gtmsource_mode_change(GTMSOURCE_MODE_PASSIVE_REQUESTED) - NORMAL_SHUTDOWN); else if (gtmsource_options.checkhealth) gtmsource_exit(gtmsource_checkhealth() - NORMAL_SHUTDOWN); else if (gtmsource_options.changelog) gtmsource_exit(gtmsource_changelog() - NORMAL_SHUTDOWN); else if (gtmsource_options.showbacklog) gtmsource_exit(gtmsource_showbacklog() - NORMAL_SHUTDOWN); else if (gtmsource_options.stopsourcefilter) gtmsource_exit(gtmsource_stopfilter() - NORMAL_SHUTDOWN); else if (gtmsource_options.jnlpool) gtmsource_exit(gtmsource_jnlpool() - NORMAL_SHUTDOWN); else if (gtmsource_options.losttncomplete) gtmsource_exit(gtmsource_losttncomplete() - NORMAL_SHUTDOWN); else if (gtmsource_options.needrestart) gtmsource_exit(gtmsource_needrestart() - NORMAL_SHUTDOWN); else if (gtmsource_options.showfreeze) gtmsource_exit(gtmsource_showfreeze() - NORMAL_SHUTDOWN); else if (gtmsource_options.setfreeze) gtmsource_exit(gtmsource_setfreeze() - NORMAL_SHUTDOWN); else if (!gtmsource_options.start) { assert(CLI_PRESENT == cli_present("STATSLOG")); gtmsource_exit(gtmsource_statslog() - NORMAL_SHUTDOWN); } assert(gtmsource_options.start); # ifndef REPL_DEBUG_NOBACKGROUND /* Set "child_server_running" to FALSE before forking off child. Wait for it to be set to TRUE by the child. */ gtmsource_local = jnlpool.gtmsource_local; gtmsource_local->child_server_running = FALSE; FORK(pid); if (0 > pid) { save_errno = errno; rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_JNLPOOLSETUP, 0, ERR_TEXT, 2, RTS_ERROR_LITERAL("Could not fork source server"), save_errno); } else if (0 < pid) { /* Parent. Wait until child sets "child_server_running" to FALSE. That is an indication that the child * source server has completed its initialization phase and is all set so the parent command can return. */ while (isalive = is_proc_alive(pid, 0)) /* note : intended assignment */ { if (gtmsource_local->child_server_running) break; /* To take care of reassignment of PIDs, the while condition should be && with the condition * (PPID of pid == process_id) */ SHORT_SLEEP(GTMSOURCE_WAIT_FOR_SRV_START); WAITPID(pid, &status, WNOHANG, waitpid_res); /* Release defunct child if dead */ } if (isalive) { /* Child process is alive and started with no issues */ if (0 != (save_errno = rel_sem(SOURCE, JNL_POOL_ACCESS_SEM))) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_JNLPOOLSETUP, 0, ERR_TEXT, 2, RTS_ERROR_LITERAL("Error in rel_sem"), save_errno); ftok_sem_release(jnlpool.jnlpool_dummy_reg, TRUE, TRUE); } else { /* Child source server process errored out at startup and is no longer alive. * If we were the one who created the journal pool, let us clean it up. */ repl_log(stdout, TRUE, TRUE, "Source server startup failed. See source server log file\n"); if (is_jnlpool_creator) status = gtmsource_shutdown(TRUE, NORMAL_SHUTDOWN); } /* If the parent is killed (or crashes) between the fork and exit, checkhealth may not detect that startup * is in progress - parent forks and dies, the system will release sem 0 and 1, checkhealth might test the * value of sem 1 before the child grabs sem 1. */ gtmsource_exit(isalive ? SRV_ALIVE : SRV_ERR); } /* Point stdin to /dev/null */ OPENFILE("/dev/null", O_RDONLY, null_fd); if (0 > null_fd) rts_error_csa(CSA_ARG(NULL) ERR_REPLERR, RTS_ERROR_LITERAL("Failed to open /dev/null for read"), errno, 0); FCNTL3(null_fd, F_DUPFD, 0, rc); if (0 > rc) rts_error_csa(CSA_ARG(NULL) ERR_REPLERR, RTS_ERROR_LITERAL("Failed to set stdin to /dev/null"), errno, 0); CLOSEFILE(null_fd, rc); if (0 > rc) rts_error_csa(CSA_ARG(NULL) ERR_REPLERR, RTS_ERROR_LITERAL("Failed to close /dev/null"), errno, 0); /* The parent process (source server startup command) will be holding the ftok semaphore and jnlpool access semaphore * at this point. The variables that indicate this would have been copied over to the child during the fork. This will * make the child think it is actually holding them as well when actually it is not. Reset those variables in the child * to ensure they do not misrepresent the holder of those semaphores. */ ftok_sem_reg = NULL; udi = FILE_INFO(jnlpool.jnlpool_dummy_reg); assert(udi->grabbed_ftok_sem); udi->grabbed_ftok_sem = FALSE; assert(holds_sem[SOURCE][JNL_POOL_ACCESS_SEM]); holds_sem[SOURCE][JNL_POOL_ACCESS_SEM] = FALSE; assert(!holds_sem[SOURCE][SRC_SERV_COUNT_SEM]); /* Start child source server initialization */ is_src_server = TRUE; OPERATOR_LOG_MSG; process_id = getpid(); /* Reinvoke secshr related initialization with the child's pid */ INVOKE_INIT_SECSHR_ADDRS; /* Initialize mutex socket, memory semaphore etc. before any "grab_lock" is done by this process on the journal pool. * Note that the initialization would already have been done by the parent receiver startup command but we need to * redo the initialization with the child process id. */ assert(mutex_per_process_init_pid && (mutex_per_process_init_pid != process_id)); mutex_per_process_init(); START_HEARTBEAT_IF_NEEDED; ppid = getppid(); log_init_status = repl_log_init(REPL_GENERAL_LOG, >msource_log_fd, gtmsource_options.log_file); assert(SS_NORMAL == log_init_status); repl_log_fd2fp(>msource_log_fp, gtmsource_log_fd); if (-1 == (procgp = setsid())) send_msg_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_JNLPOOLSETUP, 0, ERR_TEXT, 2, RTS_ERROR_LITERAL("Source server error in setsid"), errno); # endif /* REPL_DEBUG_NOBACKGROUND */ if (ZLIB_CMPLVL_NONE != gtm_zlib_cmp_level) gtm_zlib_init(); /* Open zlib shared library for compression/decompression */ REPL_DPRINT1("Setting up regions\n"); gvinit(); /* We use the same code dse uses to open all regions but we must make sure they are all open before proceeding. */ all_files_open = region_init(FALSE); if (!all_files_open) { gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_NOTALLDBOPN); gtmsource_exit(ABNORMAL_SHUTDOWN); } /* Determine primary side null subscripts collation order */ /* Also check whether all regions have same null collation order */ this_side_std_null_coll = -1; for (reg = gd_header->regions, region_top = gd_header->regions + gd_header->n_regions; reg < region_top; reg++) { csa = &FILE_INFO(reg)->s_addrs; if (this_side_std_null_coll != csa->hdr->std_null_coll) { if (-1 == this_side_std_null_coll) this_side_std_null_coll = csa->hdr->std_null_coll; else { gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_NULLCOLLDIFF); gtmsource_exit(ABNORMAL_SHUTDOWN); } } if (!REPL_ALLOWED(csa) && JNL_ALLOWED(csa)) { gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_REPLOFFJNLON, 2, DB_LEN_STR(reg)); gtmsource_exit(ABNORMAL_SHUTDOWN); } if (reg->read_only && REPL_ALLOWED(csa)) { gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_JNLPOOLSETUP, 0, ERR_TEXT, 2, RTS_ERROR_LITERAL("Source Server does not have write permissions to one or " "more database files that are replicated")); gtmsource_exit(ABNORMAL_SHUTDOWN); } } /* Initialize source server alive/dead state related fields in "gtmsource_local" before the ftok semaphore is released */ gtmsource_local->gtmsource_pid = process_id; gtmsource_local->gtmsource_state = GTMSOURCE_START; if (is_jnlpool_creator) { DEBUG_ONLY(jnlpool.jnlpool_ctl->jnlpool_creator_pid = process_id); gtmsource_seqno_init(this_side_std_null_coll); if (ROOTPRIMARY_SPECIFIED == gtmsource_options.rootprimary) { /* Created the journal pool as a root primary. Append a history record to the replication instance file. * Invoke the function "gtmsource_rootprimary_init" to do that. */ gtmsource_rootprimary_init(jnlpool.jnlpool_ctl->jnl_seqno); } } /* after this point we can no longer have the case where all the regions are unreplicated/non-journaled. */ # ifndef REPL_DEBUG_NOBACKGROUND /* It is necessary for every process that is using the ftok semaphore to increment the counter by 1. This is used * by the last process that shuts down to delete the ftok semaphore when it notices the counter to be 0. * Note that the parent source server startup command would have done an increment of the ftok counter semaphore * for the replication instance file. But the source server process (the child) that comes here would not have done * that. Do that while the parent is still holding on to the ftok semaphore waiting for our okay. */ if (!ftok_sem_incrcnt(jnlpool.jnlpool_dummy_reg)) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_JNLPOOLSETUP); /* Increment the source server count semaphore */ status = incr_sem(SOURCE, SRC_SERV_COUNT_SEM); if (0 != status) { save_errno = errno; rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_JNLPOOLSETUP, 0, ERR_TEXT, 2, RTS_ERROR_LITERAL("Counter semaphore increment failure in child source server"), save_errno); } # else if (0 != (save_errno = rel_sem_immediate(SOURCE, JNL_POOL_ACCESS_SEM))) { rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_JNLPOOLSETUP, 0, ERR_TEXT, 2, RTS_ERROR_LITERAL("Error in rel_sem_immediate"), save_errno); } # endif /* REPL_DEBUG_NOBACKGROUND */ gtmsource_srv_count++; gtmsource_local->child_server_running = TRUE; /* At this point, the parent startup command will stop waiting for child */ gtm_event_log_init(); /* Log source server startup command line first */ SPRINTF(tmpmsg, "%s %s\n", cli_lex_in_ptr->argv[0], cli_lex_in_ptr->in_str); repl_log(gtmsource_log_fp, TRUE, TRUE, tmpmsg); SPRINTF(tmpmsg, "GTM Replication Source Server with Pid [%d] started for Secondary Instance [%s]", process_id, gtmsource_local->secondary_instname); sgtm_putmsg(print_msg, VARLSTCNT(4) ERR_REPLINFO, 2, LEN_AND_STR(tmpmsg)); repl_log(gtmsource_log_fp, TRUE, TRUE, print_msg); if (is_jnlpool_creator) { repl_log(gtmsource_log_fp, TRUE, TRUE, "Created jnlpool with shmid = [%d] and semid = [%d]\n", jnlpool.repl_inst_filehdr->jnlpool_shmid, jnlpool.repl_inst_filehdr->jnlpool_semid); } else repl_log(gtmsource_log_fp, TRUE, TRUE, "Attached to existing jnlpool with shmid = [%d] and semid = [%d]\n", jnlpool.repl_inst_filehdr->jnlpool_shmid, jnlpool.repl_inst_filehdr->jnlpool_semid); gtm_event_log(GTM_EVENT_LOG_ARGC, "MUPIP", "REPLINFO", print_msg); # ifdef GTM_TLS if (REPL_TLS_REQUESTED) { repl_do_tls_init(gtmsource_log_fp); assert(REPL_TLS_REQUESTED || PLAINTEXT_FALLBACK); } # endif if (jnlpool.jnlpool_ctl->freeze) { last_seen_freeze_flag = jnlpool.jnlpool_ctl->freeze; sgtm_putmsg(print_msg, VARLSTCNT(3) ERR_REPLINSTFROZEN, 1, jnlpool.repl_inst_filehdr->inst_info.this_instname); repl_log(gtmsource_log_fp, TRUE, FALSE, print_msg); sgtm_putmsg(print_msg, VARLSTCNT(3) ERR_REPLINSTFREEZECOMMENT, 1, jnlpool.jnlpool_ctl->freeze_comment); repl_log(gtmsource_log_fp, TRUE, TRUE, print_msg); } gtmsource_local->jnlfileonly = gtmsource_options.jnlfileonly; do { /* If mode is passive, go to sleep. Wakeup every now and then and check to see if I have to become active. */ gtmsource_state = gtmsource_local->gtmsource_state = GTMSOURCE_START; if ((gtmsource_local->mode == GTMSOURCE_MODE_PASSIVE) && (gtmsource_local->shutdown == NO_SHUTDOWN)) { gtmsource_poll_actions(FALSE); SHORT_SLEEP(GTMSOURCE_WAIT_FOR_MODE_CHANGE); continue; } if (GTMSOURCE_MODE_PASSIVE == gtmsource_local->mode) { /* Shutdown initiated */ assert(gtmsource_local->shutdown == SHUTDOWN); sgtm_putmsg(print_msg, VARLSTCNT(4) ERR_REPLINFO, 2, RTS_ERROR_LITERAL("GTM Replication Source Server Shutdown signalled")); repl_log(gtmsource_log_fp, TRUE, TRUE, print_msg); gtm_event_log(GTM_EVENT_LOG_ARGC, "MUPIP", "REPLINFO", print_msg); break; } gtmsource_poll_actions(FALSE); if (GTMSOURCE_CHANGING_MODE == gtmsource_state) continue; if (GTMSOURCE_MODE_ACTIVE_REQUESTED == gtmsource_local->mode) gtmsource_local->mode = GTMSOURCE_MODE_ACTIVE; SPRINTF(tmpmsg, "GTM Replication Source Server now in ACTIVE mode using port %d", gtmsource_local->secondary_port); sgtm_putmsg(print_msg, VARLSTCNT(4) ERR_REPLINFO, 2, LEN_AND_STR(tmpmsg)); repl_log(gtmsource_log_fp, TRUE, TRUE, print_msg); gtm_event_log(GTM_EVENT_LOG_ARGC, "MUPIP", "REPLINFO", print_msg); DEBUG_ONLY(repl_csa = &FILE_INFO(jnlpool.jnlpool_dummy_reg)->s_addrs;) assert(!repl_csa->hold_onto_crit); /* so it is ok to invoke "grab_lock" and "rel_lock" unconditionally */ grab_lock(jnlpool.jnlpool_dummy_reg, TRUE, HANDLE_CONCUR_ONLINE_ROLLBACK); if (GTMSOURCE_HANDLE_ONLN_RLBK == gtmsource_state) { repl_log(gtmsource_log_fp, TRUE, TRUE, "Starting afresh due to ONLINE ROLLBACK\n"); repl_log(gtmsource_log_fp, TRUE, TRUE, "REPL INFO - Current Jnlpool Seqno : %llu\n", jnlpool.jnlpool_ctl->jnl_seqno); continue; } QWASSIGN(gtmsource_local->read_addr, jnlpool.jnlpool_ctl->write_addr); gtmsource_local->read = jnlpool.jnlpool_ctl->write; gtmsource_local->read_state = gtmsource_local->jnlfileonly ? READ_FILE : READ_POOL; read_jnl_seqno = gtmsource_local->read_jnl_seqno; assert(read_jnl_seqno <= jnlpool.jnlpool_ctl->jnl_seqno); if (read_jnl_seqno < jnlpool.jnlpool_ctl->jnl_seqno) { gtmsource_local->read_state = READ_FILE; QWASSIGN(gtmsource_save_read_jnl_seqno, jnlpool.jnlpool_ctl->jnl_seqno); gtmsource_pool2file_transition = TRUE; /* so that we read the latest gener jnl files */ } rel_lock(jnlpool.jnlpool_dummy_reg); if (SS_NORMAL != (status = gtmsource_alloc_tcombuff())) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_REPLCOMM, 0, ERR_TEXT, 2, RTS_ERROR_LITERAL("Error allocating initial tcom buffer space. Malloc error"), status); gtmsource_filter = NO_FILTER; if ('\0' != gtmsource_local->filter_cmd[0]) { if (SS_NORMAL == (status = repl_filter_init(gtmsource_local->filter_cmd))) gtmsource_filter |= EXTERNAL_FILTER; else gtmsource_exit(ABNORMAL_SHUTDOWN); } gtmsource_process(); /* gtmsource_process returns only when mode needs to be changed to PASSIVE */ assert(gtmsource_state == GTMSOURCE_CHANGING_MODE); gtmsource_ctl_close(); gtmsource_free_msgbuff(); gtmsource_free_tcombuff(); gtmsource_free_filter_buff(); gtmsource_stop_heartbeat(); if (FD_INVALID != gtmsource_sock_fd) repl_close(>msource_sock_fd); if (gtmsource_filter & EXTERNAL_FILTER) repl_stop_filter(); } while (TRUE);
int gtmrecv_shutdown(boolean_t auto_shutdown, int exit_status) { uint4 savepid; boolean_t shut_upd_too = FALSE; int status; unix_db_info *udi; error_def(ERR_RECVPOOLSETUP); error_def(ERR_TEXT); repl_log(stdout, TRUE, TRUE, "Initiating shut down\n"); call_on_signal = NULL; /* So we don't reenter on error */ /* assert that auto shutdown should be invoked only if the current process is a receiver server */ assert(!auto_shutdown || gtmrecv_srv_count); if (auto_shutdown) { /* grab the ftok semaphore and recvpool access control lock IN THAT ORDER (to avoid deadlocks) */ repl_inst_ftok_sem_lock(); status = grab_sem(RECV, RECV_POOL_ACCESS_SEM); if (0 > status) { repl_log(stderr, TRUE, TRUE, "Error grabbing receive pool control semaphore : %s. Shutdown not complete\n", REPL_SEM_ERROR); return (ABNORMAL_SHUTDOWN); } } else { /* ftok semaphore and recvpool access semaphore should already be held from the previous call to "recvpool_init" */ DEBUG_ONLY(udi = (unix_db_info *)FILE_INFO(recvpool.recvpool_dummy_reg);) assert(udi->grabbed_ftok_sem); assert(holds_sem[RECV][RECV_POOL_ACCESS_SEM]); /* We do not want to hold the options semaphore to avoid deadlocks with receiver server startup (C9F12-002766) */ assert(!holds_sem[RECV][RECV_SERV_OPTIONS_SEM]); recvpool.gtmrecv_local->shutdown = SHUTDOWN; /* Wait for receiver server to die. But release ftok semaphore and recvpool access control semaphore before * waiting as the concurrently running receiver server might need these (e.g. if it is about to call the * function "repl_inst_was_rootprimary"). */ if (0 != rel_sem(RECV, RECV_POOL_ACCESS_SEM)) gtm_putmsg(VARLSTCNT(7) ERR_TEXT, 2, RTS_ERROR_LITERAL("Error in receiver server shutdown rel_sem"), REPL_SEM_ERRNO); repl_inst_ftok_sem_release(); /* Wait for receiver server to shut down */ while((SHUTDOWN == recvpool.gtmrecv_local->shutdown) && (0 < (savepid = recvpool.gtmrecv_local->recv_serv_pid)) && is_proc_alive(savepid, 0)) SHORT_SLEEP(GTMRECV_WAIT_FOR_SHUTDOWN); /* (Re)Grab the ftok semaphore and recvpool access control semaphore IN THAT ORDER (to avoid deadlocks) */ repl_inst_ftok_sem_lock(); status = grab_sem(RECV, RECV_POOL_ACCESS_SEM); if (0 > status) { repl_log(stderr, TRUE, TRUE, "Error regrabbing receive pool control semaphore : %s. Shutdown not complete\n", REPL_SEM_ERROR); return (ABNORMAL_SHUTDOWN); } exit_status = recvpool.gtmrecv_local->shutdown; if (SHUTDOWN == exit_status) { if (0 == savepid) /* No Receiver Process */ exit_status = NORMAL_SHUTDOWN; else /* Receiver Server Crashed */ { repl_log(stderr, FALSE, TRUE, "Receiver Server exited abnormally\n"); exit_status = ABNORMAL_SHUTDOWN; shut_upd_too = TRUE; } } }