int continue_proc(pid_t pid) { DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; DEBUG_ONLY(if (!TREF(gtm_usesecshr))) /* Cause debug builds to talk to gtmsecshr more often */ { if (WBTEST_ENABLED(WBTEST_HOLD_GTMSOURCE_SRV_LATCH)) { /* Simulate the kill below, but ignore its return status so that we end up invoking gtmsecshr */ kill(pid, SIGCONT); /* Wait until the target quits so that kill() call by gtmsecshr fails with ESRCH */ while (is_proc_alive(pid, 0)) LONG_SLEEP(1); } else if (0 == kill(pid, SIGCONT)) return 0; else if (ESRCH == errno) { send_msg_csa(CSA_ARG(NULL) VARLSTCNT(5) ERR_NOSUCHPROC, 3, pid, RTS_ERROR_LITERAL("continue")); return ESRCH; } else assert(EINVAL != errno); } return send_mesg2gtmsecshr(CONTINUE_PROCESS, pid, NULL, 0); }
/* This function is invoked once all parallel invocations of "mur_forward_multi_proc" are done. * This transfers information from shared memory (populated by parallel processes) into parent process private memory. * Note: This function is invoked even if no parallel invocations happen. In that case, it only does merge-sort of * journal extract/losttrans/brokentrans files if necessary. * * It returns 0 for normal exit. A non-zero value for an error. */ int mur_forward_multi_proc_finish(reg_ctl_list *rctl) { shm_reg_ctl_t *shm_rctl; reg_ctl_list *rctl_top; enum broken_type recstat; sgmnt_addrs *csa; if (multi_proc_in_use) { multi_proc_key = NULL; /* reset key now that parallel invocations are done */ DEBUG_ONLY(multi_proc_key_exception = TRUE;) /* "multi_proc_in_use" is still TRUE even though all usage of * multiple processes is done so set this dbg flag to avoid asserts. * Keep this set until we return back to "gtm_multi_proc" (where * "multi_proc_in_use" gets reset). */ assert(multi_proc_shm_hdr->parent_pid == process_id); /* Only parent process should invoke this function */ assert(NULL == rctl); shm_rctl = mur_shm_hdr->shm_rctl_start; for (rctl = mur_ctl, rctl_top = mur_ctl + murgbl.reg_total; rctl < rctl_top; rctl++, shm_rctl++) { /* Compute murgbl.consist_jnl_seqno as MAX of individual process values */ if (murgbl.consist_jnl_seqno < shm_rctl->consist_jnl_seqno) murgbl.consist_jnl_seqno = shm_rctl->consist_jnl_seqno; /* Compute murgbl.err_cnt as SUM of individual process values. The parallel process makes sure it * updates only ONE owning region (even if it owns multiple regions) with its murgbl.err_cnt value. */ murgbl.err_cnt += shm_rctl->err_cnt; /* Compute murgbl.wrn_count as SUM of individual process values. The parallel process makes sure it * updates only ONE owning region (even if it owns multiple regions) with its murgbl.wrn_count value. */ murgbl.wrn_count += shm_rctl->wrn_count; for (recstat = 0; recstat < TOT_EXTR_TYPES; recstat++) { assert(!rctl->extr_file_created[recstat]); rctl->extr_file_created[recstat] = shm_rctl->extr_file_created[recstat]; } if (jgbl.onlnrlbk) { /* The child process borrowed crit from the parent (in "mur_forward_multi_proc"). Reclaim it */ csa = rctl->csa; assert(csa->hold_onto_crit); /* would have been set at "mur_open_files" time */ assert(csa->now_crit); /* would have been set at "mur_open_files" time */ assert(csa->nl->in_crit != process_id); assert(FALSE == is_proc_alive(shm_rctl->owning_pid, 0)); assert(csa->nl->in_crit == shm_rctl->owning_pid); csa->nl->in_crit = process_id; /* reset pid back to parent process now that owning child is done */ assert(csa->nl->onln_rlbk_pid == shm_rctl->owning_pid); csa->nl->onln_rlbk_pid = process_id; } } }
/* Waits for a concurrently running read (from disk into a global buffer) to complete. * * Returns TRUE if read completes within timeout of approx. 1 minute. * Returns FALSE otherwise. * * Similar logic is also present in t_qread and wcs_recover but they are different enough that * they have not been folded into this routine yet. */ boolean_t wcs_read_in_progress_wait(cache_rec_ptr_t cr, wbtest_code_t wbox_test_code) { uint4 lcnt, r_epid; int4 n; for (lcnt = 1; -1 != cr->read_in_progress; lcnt++) { if (-1 > cr->read_in_progress) { /* outside of design; clear to known state */ INTERLOCK_INIT(cr); assert(0 == cr->r_epid); cr->r_epid = 0; break; } wcs_sleep(lcnt); GTM_WHITE_BOX_TEST(wbox_test_code, lcnt, (2 * BUF_OWNER_STUCK)); if (BUF_OWNER_STUCK < lcnt) { /* sick of waiting */ /* Since cr->r_epid can be changing concurrently, take a local copy before using it below, * particularly before calling is_proc_alive as we dont want to call it with a 0 r_epid. */ r_epid = cr->r_epid; if (0 != r_epid) { if (FALSE == is_proc_alive(r_epid, cr->image_count)) { /* process gone; release its lock */ RELEASE_BUFF_READ_LOCK(cr); } else { assert(gtm_white_box_test_case_enabled); return FALSE; } } else { /* process stopped before could set r_epid */ RELEASE_BUFF_READ_LOCK(cr); if (-1 > cr->read_in_progress) { /* process released since if (cr->r_epid); rectify semaphore */ LOCK_BUFF_FOR_READ(cr, n); } } } /* sick of waiting */ } return TRUE; }
int gtmrecv_endupd(void) { pid_t savepid; int exit_status; pid_t waitpid_res; repl_log(stdout, TRUE, TRUE, "Initiating shut down of Update Process\n"); recvpool.upd_proc_local->upd_proc_shutdown = SHUTDOWN; /* Wait for update process to shut down */ while((SHUTDOWN == recvpool.upd_proc_local->upd_proc_shutdown) && (0 < (savepid = (pid_t)recvpool.upd_proc_local->upd_proc_pid)) && is_proc_alive(savepid, 0)) { SHORT_SLEEP(GTMRECV_WAIT_FOR_UPD_SHUTDOWN); WAITPID(savepid, &exit_status, WNOHANG, waitpid_res); /* Release defunct update process if dead */ } exit_status = recvpool.upd_proc_local->upd_proc_shutdown; if (SHUTDOWN == exit_status) { if (0 == savepid) /* No Update Process */ exit_status = NORMAL_SHUTDOWN; else /* Update Process Crashed */ { repl_log(stderr, TRUE, TRUE, "Update Process exited abnormally, INTEGRITY CHECK might be warranted\n"); exit_status = ABNORMAL_SHUTDOWN; } } /* Wait for the Update Process to detach */ if (0 == grab_sem(RECV, UPD_PROC_COUNT_SEM)) { if(0 != (errno = rel_sem(RECV, UPD_PROC_COUNT_SEM))) repl_log(stderr, TRUE, TRUE, "Error releasing the Update Process Count semaphore : %s\n", REPL_SEM_ERROR); repl_log(stdout, TRUE, TRUE, "Update Process exited\n"); } else { repl_log(stderr, TRUE, TRUE, "Error in update proc count semaphore : %s\n", REPL_SEM_ERROR); exit_status = ABNORMAL_SHUTDOWN; } return (exit_status); }
static void exec_read(BFILE *bf, char *buf, int nbytes) { int needed, got; int4 status; char *curr; pid_t waitpid_res; assert(nbytes > 0); needed = nbytes; curr = buf; #ifdef DEBUG_ONLINE PRINTF("file descriptor is %d and bytes needed is %d\n", bf->fd, needed); #endif while(0 != (got = read(bf->fd, curr, needed))) { if (got == needed) break; else if (got > 0) { needed -= got; curr += got; } /* the check for EINTR below is valid and should not be converted to an EINTR * wrapper macro, for an immediate retry is not attempted. Instead, wcs_sleep * is called. */ else if ((EINTR != errno) && (EAGAIN != errno)) { gtm_putmsg(VARLSTCNT(1) errno); if ((pipe_child > 0) && (FALSE != is_proc_alive(pipe_child, 0))) WAITPID(pipe_child, (int *)&status, 0, waitpid_res); close(bf->fd); restore_read_errno = errno; break; } wcs_sleep(100); } return; }
static void _sos_VMFaultHandler_reply(void* token, int err){ dprintf(3, "sos_vmf_reply called\n"); VMF_cont_t *cont= (VMF_cont_t*)token; if(err){ dprintf(3, "sos_vmf received an err\n"); } if (!is_proc_alive(cont->pid)) { cspace_free_slot(cur_cspace, cont->reply_cap); free(cont); return; } /* Flush the i-cache. Ideally, this should only be done when faulting on text segment */ //if (cont->is_code) { if (!err) { seL4_Word vpage = PAGE_ALIGN(cont->vaddr); int x = PT_L1_INDEX(vpage); int y = PT_L2_INDEX(vpage); seL4_Word kvaddr = (cont->as->as_pd_regs[x][y] & PTE_KVADDR_MASK); seL4_CPtr kframe_cap; err = frame_get_cap(kvaddr, &kframe_cap); //assert(!err); // This kvaddr is ready to use, there should be no error seL4_ARM_Page_Unify_Instruction(kframe_cap, 0, PAGESIZE); } //} /* If there is an err here, it is not the process's fault * It is either the kernel running out of memory or swapping doesn't work */ seL4_MessageInfo_t reply = seL4_MessageInfo_new(0, 0, 0, 0); seL4_Send(cont->reply_cap, reply); cspace_free_slot(cur_cspace, cont->reply_cap); free(cont); set_cur_proc(PROC_NULL); }
int gtmsource_checkhealth(void) { uint4 gtmsource_pid; int status, semval, save_errno; boolean_t srv_alive, all_files_open; gtmsource_local_ptr_t gtmsourcelocal_ptr; int4 index, num_servers; seq_num reg_seqno, jnlseqno; gd_region *reg, *region_top; sgmnt_addrs *csa; sgmnt_data_ptr_t csd; char errtxt[OUT_BUFF_SIZE]; char *modestr; assert(holds_sem[SOURCE][JNL_POOL_ACCESS_SEM]); if (NULL != jnlpool.gtmsource_local) /* Check health of a specific source server */ gtmsourcelocal_ptr = jnlpool.gtmsource_local; else gtmsourcelocal_ptr = &jnlpool.gtmsource_local_array[0]; num_servers = 0; status = SRV_ALIVE; for (index = 0; index < NUM_GTMSRC_LCL; index++, gtmsourcelocal_ptr++) { if ('\0' == gtmsourcelocal_ptr->secondary_instname[0]) { assert(NULL == jnlpool.gtmsource_local); continue; } gtmsource_pid = gtmsourcelocal_ptr->gtmsource_pid; /* If CHECKHEALTH on a specific secondary instance is requested, print the health information irrespective * of whether a source server for that instance is alive or not. For CHECKHEALTH on ALL secondary instances * print health information only for those instances that have an active or passive source server alive. */ if ((NULL == jnlpool.gtmsource_local) && (0 == gtmsource_pid)) continue; repl_log(stdout, TRUE, TRUE, "Initiating CHECKHEALTH operation on source server pid [%d] for secondary instance" " name [%s]\n", gtmsource_pid, gtmsourcelocal_ptr->secondary_instname); srv_alive = (0 == gtmsource_pid) ? FALSE : is_proc_alive(gtmsource_pid, 0); if (srv_alive) { if (GTMSOURCE_MODE_ACTIVE == gtmsourcelocal_ptr->mode) modestr = "ACTIVE"; else if (GTMSOURCE_MODE_ACTIVE_REQUESTED == gtmsourcelocal_ptr->mode) modestr = "ACTIVE REQUESTED"; else if (GTMSOURCE_MODE_PASSIVE == gtmsourcelocal_ptr->mode) modestr = "PASSIVE"; else if (GTMSOURCE_MODE_PASSIVE_REQUESTED == gtmsourcelocal_ptr->mode) modestr = "PASSIVE REQUESTED"; else { assert(gtmsourcelocal_ptr->mode != gtmsourcelocal_ptr->mode); modestr = "UNKNOWN"; } repl_log(stderr, FALSE, TRUE, FORMAT_STR1, gtmsource_pid, "Source server", "", modestr); status |= SRV_ALIVE; num_servers++; } else { repl_log(stderr, FALSE, TRUE, FORMAT_STR, gtmsource_pid, "Source server", " NOT"); gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_SRCSRVNOTEXIST, 2, LEN_AND_STR(gtmsourcelocal_ptr->secondary_instname)); status |= SRV_DEAD; } if (NULL != jnlpool.gtmsource_local) break; } if (NULL == jnlpool.gtmsource_local) { /* Compare number of servers that were found alive with the current value of the COUNT semaphore. * If they are not equal, report the discrepancy. */ semval = get_sem_info(SOURCE, SRC_SERV_COUNT_SEM, SEM_INFO_VAL); if (-1 == semval) { save_errno = errno; repl_log(stderr, FALSE, TRUE, "Error fetching source server count semaphore value : %s\n", STRERROR(save_errno)); status |= SRV_ERR; } else if (semval != num_servers) { repl_log(stderr, FALSE, FALSE, "Error : Expected %d source server(s) to be alive but found %d actually alive\n", semval, num_servers); repl_log(stderr, FALSE, TRUE, "Error : Check if any pid reported above is NOT a source server process\n"); status |= SRV_ERR; } } /* Check that there are no regions with replication state = WAS_ON (i.e. repl_was_open). If so report that. * But to determine that, we need to attach to all the database regions. */ gvinit(); /* We use the same code dse uses to open all regions but we must make sure they are all open before proceeding. */ all_files_open = region_init(FALSE); if (!all_files_open) { gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_NOTALLDBOPN); status |= SRV_ERR; } else { for (reg = gd_header->regions, region_top = gd_header->regions + gd_header->n_regions; reg < region_top; reg++) { csa = &FILE_INFO(reg)->s_addrs; csd = csa->hdr; if (REPL_WAS_ENABLED(csd)) { assert(!JNL_ENABLED(csd) || REPL_ENABLED(csd)); /* || is for turning replication on concurrently */ reg_seqno = csd->reg_seqno; jnlseqno = (NULL != jnlpool.jnlpool_ctl) ? jnlpool.jnlpool_ctl->jnl_seqno : MAX_SEQNO; sgtm_putmsg(errtxt, VARLSTCNT(8) ERR_REPLJNLCLOSED, 6, DB_LEN_STR(reg), ®_seqno, ®_seqno, &jnlseqno, &jnlseqno); repl_log(stderr, FALSE, TRUE, errtxt); status |= SRV_ERR; } } } if (jnlpool.jnlpool_ctl->freeze) { repl_log(stderr, FALSE, FALSE, "Warning: Instance Freeze is ON\n"); repl_log(stderr, FALSE, TRUE, " Freeze Comment: %s\n", jnlpool.jnlpool_ctl->freeze_comment); status |= SRV_ERR; } return (status + NORMAL_SHUTDOWN); }
int gtmrecv_upd_proc_init(boolean_t fresh_start) { /* Update Process initialization */ mstr upd_proc_log_cmd, upd_proc_trans_cmd; char upd_proc_cmd[UPDPROC_CMD_MAXLEN]; int status; int upd_status, save_upd_status; #ifdef UNIX pid_t upd_pid, waitpid_res; #elif defined(VMS) uint4 upd_pid; uint4 cmd_channel; $DESCRIPTOR(cmd_desc, UPDPROC_CMD_STR); #endif /* Check if the update process is alive */ if ((upd_status = is_updproc_alive()) == SRV_ERR) { gtm_putmsg(VARLSTCNT(7) ERR_RECVPOOLSETUP, 0, ERR_TEXT, 2, RTS_ERROR_LITERAL("Receive pool semctl failure"), REPL_SEM_ERRNO); repl_errno = EREPL_UPDSTART_SEMCTL; return(UPDPROC_START_ERR); } else if (upd_status == SRV_ALIVE && !fresh_start) { gtm_putmsg(VARLSTCNT(4) ERR_TEXT, 2, RTS_ERROR_LITERAL("Update process already exists. Not starting it")); return(UPDPROC_EXISTS); } else if (upd_status == SRV_ALIVE) { gtm_putmsg(VARLSTCNT(6) ERR_RECVPOOLSETUP, 0, ERR_TEXT, 2, RTS_ERROR_LITERAL("Update process already exists. Please kill it before a fresh start")); return(UPDPROC_EXISTS); } save_upd_status = recvpool.upd_proc_local->upd_proc_shutdown; recvpool.upd_proc_local->upd_proc_shutdown = NO_SHUTDOWN; #ifdef UNIX if (0 > (upd_pid = fork())) /* BYPASSOK: we exec immediately, no FORK_CLEAN needed */ { recvpool.upd_proc_local->upd_proc_shutdown = save_upd_status; gtm_putmsg(VARLSTCNT(7) ERR_RECVPOOLSETUP, 0, ERR_TEXT, 2, RTS_ERROR_LITERAL("Could not fork update process"), errno); repl_errno = EREPL_UPDSTART_FORK; return(UPDPROC_START_ERR); } if (0 == upd_pid) { /* Update Process */ upd_proc_log_cmd.len = SIZEOF(UPDPROC_CMD) - 1; upd_proc_log_cmd.addr = UPDPROC_CMD; status = TRANS_LOG_NAME(&upd_proc_log_cmd, &upd_proc_trans_cmd, upd_proc_cmd, SIZEOF(upd_proc_cmd), dont_sendmsg_on_log2long); if (status != SS_NORMAL) { gtm_putmsg(VARLSTCNT(6) ERR_RECVPOOLSETUP, 0, ERR_TEXT, 2, RTS_ERROR_LITERAL("Could not find path of Update Process. Check value of $gtm_dist")); if (SS_LOG2LONG == status) gtm_putmsg(VARLSTCNT(5) ERR_LOGTOOLONG, 3, LEN_AND_LIT(UPDPROC_CMD), SIZEOF(upd_proc_cmd) - 1); repl_errno = EREPL_UPDSTART_BADPATH; return(UPDPROC_START_ERR); } upd_proc_cmd[upd_proc_trans_cmd.len] = '\0'; if (EXECL(upd_proc_cmd, upd_proc_cmd, UPDPROC_CMD_ARG1, UPDPROC_CMD_ARG2, NULL) < 0) { gtm_putmsg(VARLSTCNT(7) ERR_RECVPOOLSETUP, 0, ERR_TEXT, 2, RTS_ERROR_LITERAL("Could not exec Update Process"), errno); repl_errno = EREPL_UPDSTART_EXEC; return(UPDPROC_START_ERR); } } #elif defined(VMS) /* Create detached server and write startup commands to it */ status = repl_create_server(&cmd_desc, "GTMU", "", &cmd_channel, &upd_pid, ERR_RECVPOOLSETUP); if (SS_NORMAL != status) { gtm_putmsg(VARLSTCNT(7) ERR_RECVPOOLSETUP, 0, ERR_TEXT, 2, RTS_ERROR_LITERAL("Unable to spawn Update process"), status); recvpool.upd_proc_local->upd_proc_shutdown = save_upd_status; repl_errno = EREPL_UPDSTART_FORK; return(UPDPROC_START_ERR); } #endif if (recvpool.upd_proc_local->upd_proc_pid) recvpool.upd_proc_local->upd_proc_pid_prev = recvpool.upd_proc_local->upd_proc_pid; else recvpool.upd_proc_local->upd_proc_pid_prev = upd_pid; recvpool.upd_proc_local->upd_proc_pid = upd_pid; /* Receiver Server; wait for the update process to startup */ REPL_DPRINT2("Waiting for update process %d to startup\n", upd_pid); while (get_sem_info(RECV, UPD_PROC_COUNT_SEM, SEM_INFO_VAL) == 0 && is_proc_alive(upd_pid, 0)) { /* To take care of reassignment of PIDs, the while condition should be && with the * condition (PPID of pid == process_id) */ REPL_DPRINT2("Waiting for update process %d to startup\n", upd_pid); UNIX_ONLY(WAITPID(upd_pid, &status, WNOHANG, waitpid_res);) /* Release defunct update process if dead */ SHORT_SLEEP(GTMRECV_WAIT_FOR_SRV_START); }
static int helper_init(upd_helper_entry_ptr_t helper, recvpool_user helper_type) { int save_errno, save_shutdown; char helper_cmd[UPDHELPER_CMD_MAXLEN]; int status; int4 i4status; mstr helper_log_cmd, helper_trans_cmd; upd_helper_ctl_ptr_t upd_helper_ctl; #ifdef UNIX pid_t helper_pid, waitpid_res; #elif defined(VMS) uint4 helper_pid, cmd_channel; char mbx_suffix[2 + 1]; /* hex representation of numbers 0 through MAX_UPD_HELPERS-1, +1 for '\0' */ $DESCRIPTOR(cmd_desc_reader, UPDHELPER_READER_CMD_STR); $DESCRIPTOR(cmd_desc_writer, UPDHELPER_WRITER_CMD_STR); #endif upd_helper_ctl = recvpool.upd_helper_ctl; save_shutdown = helper->helper_shutdown; helper->helper_shutdown = NO_SHUTDOWN; #ifdef UNIX if (0 > (helper_pid = fork())) /* BYPASSOK: we exec immediately, no FORK_CLEAN needed */ { save_errno = errno; helper->helper_shutdown = save_shutdown; gtm_putmsg(VARLSTCNT(7) ERR_RECVPOOLSETUP, 0, ERR_TEXT, 2, LEN_AND_LIT("Could not fork update process"), save_errno); repl_errno = EREPL_UPDSTART_FORK; return UPDPROC_START_ERR; } if (0 == helper_pid) { /* helper */ getjobnum(); helper->helper_pid_prev = process_id; /* identify owner of slot */ helper_log_cmd.len = STR_LIT_LEN(UPDHELPER_CMD); helper_log_cmd.addr = UPDHELPER_CMD; if (SS_NORMAL != (i4status = TRANS_LOG_NAME(&helper_log_cmd, &helper_trans_cmd, helper_cmd, SIZEOF(helper_cmd), dont_sendmsg_on_log2long))) { helper->helper_shutdown = save_shutdown; gtm_putmsg(VARLSTCNT(6) ERR_RECVPOOLSETUP, 0, ERR_TEXT, 2, LEN_AND_LIT("Could not find path of Helper Process. Check value of $gtm_dist")); if (SS_LOG2LONG == i4status) gtm_putmsg(VARLSTCNT(5) ERR_LOGTOOLONG, 3, LEN_AND_LIT(UPDHELPER_CMD), SIZEOF(helper_cmd) - 1); repl_errno = EREPL_UPDSTART_BADPATH; return UPDPROC_START_ERR; } helper_cmd[helper_trans_cmd.len] = '\0'; if (-1 == EXECL(helper_cmd, helper_cmd, UPDHELPER_CMD_ARG1, UPDHELPER_CMD_ARG2, (UPD_HELPER_READER == helper_type) ? UPDHELPER_READER_CMD_ARG3 : UPDHELPER_WRITER_CMD_ARG3, NULL)) { save_errno = errno; helper->helper_shutdown = save_shutdown; gtm_putmsg(VARLSTCNT(7) ERR_RECVPOOLSETUP, 0, ERR_TEXT, 2, LEN_AND_LIT("Could not exec Helper Process"), save_errno); repl_errno = EREPL_UPDSTART_EXEC; return UPDPROC_START_ERR; } } #elif defined(VMS) /* Create detached server and write startup commands to it */ i2hex(helper - upd_helper_ctl->helper_list, LIT_AND_LEN(mbx_suffix)); mbx_suffix[SIZEOF(mbx_suffix) - 1] = '\0'; /* A mailbox is created per helper, and the mailbox name is assigned to a logical. This logical will persist until the * helper terminates. So, we need to assign a unique logical per helper. Hence the suffix. */ if (SS_NORMAL != (status = repl_create_server((UPD_HELPER_READER == helper_type) ? &cmd_desc_reader : &cmd_desc_writer, UPDHELPER_MBX_PREFIX, mbx_suffix, &cmd_channel, &helper->helper_pid_prev, ERR_RECVPOOLSETUP))) { gtm_putmsg(VARLSTCNT(7) ERR_RECVPOOLSETUP, 0, ERR_TEXT, 2, LEN_AND_LIT("Unable to spawn Helper process"), status); helper->helper_shutdown = save_shutdown; repl_errno = EREPL_UPDSTART_FORK; return UPDPROC_START_ERR; } helper_pid = helper->helper_pid_prev; #endif /* Wait for helper to startup */ while (helper_pid != helper->helper_pid && is_proc_alive(helper_pid, 0)) { SHORT_SLEEP(GTMRECV_WAIT_FOR_SRV_START); UNIX_ONLY(WAITPID(helper_pid, &status, WNOHANG, waitpid_res);) /* Release defunct helper process if dead */ } /* The helper has now gone far enough in the initialization, or died before initialization. Consider startup completed. */ #if defined(VMS) /* Deassign the send-cmd mailbox channel */ if (SS_NORMAL != (status = sys$dassgn(cmd_channel))) { gtm_putmsg(VARLSTCNT(7) ERR_RECVPOOLSETUP, 0, ERR_TEXT, 2, RTS_ERROR_LITERAL("Unable to close upd-send-cmd mbox channel"), status); helper->helper_shutdown = save_shutdown; repl_errno = EREPL_UPDSTART_BADPATH; /* Just to make an auto-shutdown */ return UPDPROC_START_ERR; } #endif repl_log(gtmrecv_log_fp, TRUE, TRUE, "Helper %s started. PID %d [0x%X]\n", (UPD_HELPER_READER == helper_type) ? "reader" : "writer", helper_pid, helper_pid); return UPDPROC_STARTED; }
void op_fngetjpi(mint jpid, mval *kwd, mval *ret) { error_def (ERR_BADJPIPARAM); struct tms proc_times; int4 info ; int keywd_indx; char upcase[MAX_KEY_LEN]; assert (stringpool.free >= stringpool.base); assert (stringpool.top >= stringpool.free); if (stringpool.top - stringpool.free < MAX_STR) stp_gcol(MAX_STR); MV_FORCE_STR(kwd); if (kwd->str.len == 0) rts_error(VARLSTCNT(4) ERR_BADJPIPARAM, 2, 4, "Null"); if (MAX_KEY < kwd->str.len) rts_error(VARLSTCNT(4) ERR_BADJPIPARAM, 2, kwd->str.len, kwd->str.addr); lower_to_upper((uchar_ptr_t)upcase, (uchar_ptr_t)kwd->str.addr, (int)kwd->str.len); keywd_indx = kw_cputim ; /* future enhancement: * (i) since keywords are sorted, we can exit the while loop if 0 < memcmp. * (ii) also, the current comparison relies on kwd->str.len which means a C would imply CPUTIM instead of CSTIME * or CUTIME this ambiguity should probably be removed by asking for an exact match of the full keyword */ while ((0 != memcmp(upcase, key[keywd_indx], kwd->str.len)) && keywd_indx < MAX_KEY) keywd_indx++; if( keywd_indx == MAX_KEY ) { rts_error(VARLSTCNT(4) ERR_BADJPIPARAM, 2, kwd->str.len, kwd->str.addr); } if ((kw_isprocalive != keywd_indx) && ((unsigned int)-1 == times(&proc_times))) { rts_error(VARLSTCNT(1) errno); /* need a more specific GTM error message in addition to errno */ return; } switch (keywd_indx) { case kw_cputim: info = proc_times.tms_utime + proc_times.tms_stime + proc_times.tms_cutime + proc_times.tms_cstime; break; case kw_cstime: info = proc_times.tms_cstime; break; case kw_cutime: info = proc_times.tms_cutime; break; case kw_isprocalive: info = (0 != jpid) ? is_proc_alive(jpid, 0) : 1; break; case kw_stime: info = proc_times.tms_stime; break; case kw_utime: info = proc_times.tms_utime; break; case kw_end: default: rts_error(VARLSTCNT(4) ERR_BADJPIPARAM, 2, kwd->str.len, kwd->str.addr); return; } if (kw_isprocalive != keywd_indx) info = (int4)((info * 100) / sysconf(_SC_CLK_TCK)); /* Convert to standard 100 ticks per second */ i2mval(ret, info); }
void op_fngetjpi(mint jpid, mval *keyword, mval *ret) { out_struct out_quad; int4 out_long, jpi_code, pid; short index, length, slot, last_slot, out_len; uint4 status; char upcase[MAX_KEY_LEN]; $DESCRIPTOR(out_string, ""); error_def(ERR_BADJPIPARAM); assert (stringpool.free >= stringpool.base); assert (stringpool.top >= stringpool.free); ENSURE_STP_FREE_SPACE(MAX_JPI_STRLEN); MV_FORCE_STR(keyword); if (keyword->str.len == 0) rts_error(VARLSTCNT(4) ERR_BADJPIPARAM, 2, 4, "Null"); if (keyword->str.len > MAX_KEY_LEN) rts_error(VARLSTCNT(4) ERR_BADJPIPARAM, 2, keyword->str.len, keyword->str.addr ); lower_to_upper((uchar_ptr_t)upcase, (uchar_ptr_t)keyword->str.addr, keyword->str.len); if ((index = upcase[0] - 'A') < MIN_INDEX || index > MAX_INDEX) rts_error(VARLSTCNT(4) ERR_BADJPIPARAM, 2, keyword->str.len, keyword->str.addr ); /* Before checking if it is a VMS JPI attribute, check if it is GT.M specific "ISPROCALIVE" attribute */ if ((keyword->str.len == STR_LIT_LEN("ISPROCALIVE")) && !memcmp(upcase, "ISPROCALIVE", keyword->str.len)) { out_long = (0 != jpid) ? is_proc_alive(jpid, 0) : 1; i2mval(ret, out_long); return; } /* Check if it is a VMS JPI attribute */ slot = jpi_index_table[ index ].index; last_slot = jpi_index_table[ index ].len; jpi_code = 0; /* future enhancement: * (i) since keywords are sorted, we can exit the for loop if 0 < memcmp. * (ii) also, the current comparison relies on kwd->str.len which means a C would imply CPUTIM instead of CSTIME * or CUTIME this ambiguity should probably be removed by asking for an exact match of the full keyword */ for ( ; slot < last_slot ; slot++ ) { if (jpi_param_table[ slot ].len == keyword->str.len && !(memcmp(jpi_param_table[ slot ].name, upcase, keyword->str.len))) { jpi_code = jpi_param_table[ slot ].item_code; break; } } if (!jpi_code) rts_error(VARLSTCNT(4) ERR_BADJPIPARAM, 2, keyword->str.len, keyword->str.addr); assert (jpid >= 0); switch( jpi_code ) { /* **** This is a fall through for all codes that require a string returned **** */ case JPI$_ACCOUNT: case JPI$_AUTHPRIV: case JPI$_CLINAME: case JPI$_CURPRIV: case JPI$_IMAGNAME: case JPI$_IMAGPRIV: case JPI$_PRCNAM: case JPI$_PROCPRIV: case JPI$_TABLENAME: case JPI$_TERMINAL: case JPI$_USERNAME: out_string.dsc$a_pointer = stringpool.free; out_string.dsc$w_length = MAX_JPI_STRLEN; if ((status = lib$getjpi( &jpi_code ,&jpid ,0 ,0 ,&out_string ,&out_len )) != SS$_NORMAL) { rts_error(VARLSTCNT(1) status ); /* need a more specific GTM error message here and below */ } ret->str.addr = stringpool.free; ret->str.len = out_len; ret->mvtype = MV_STR; stringpool.free += out_len; assert (stringpool.top >= stringpool.free); assert (stringpool.free >= stringpool.base); return; case JPI$_LOGINTIM: { int4 days; int4 seconds; if ((status = lib$getjpi( &jpi_code ,&jpid ,0 ,&out_quad ,0 ,0 )) != SS$_NORMAL) { rts_error(VARLSTCNT(1) status ); } if ((status = lib$day( &days ,&out_quad ,&seconds)) != SS$_NORMAL) { rts_error(VARLSTCNT(1) status ); } days += DAYS; seconds /= CENTISECONDS; ret->str.addr = stringpool.free; stringpool.free = i2s(&days); *stringpool.free++ = ','; stringpool.free = i2s(&seconds); ret->str.len = (char *) stringpool.free - ret->str.addr; ret->mvtype = MV_STR; return; } default: if ((status = lib$getjpi( &jpi_code ,&jpid ,0 ,&out_long ,0 ,0 )) != SS$_NORMAL) { rts_error(VARLSTCNT(1) status ); } i2mval(ret, out_long) ; return; } }
static int helper_init(upd_helper_entry_ptr_t helper, recvpool_user helper_type) { int save_errno, save_shutdown; char helper_cmd[UPDHELPER_CMD_MAXLEN]; int helper_cmd_len; int status; int4 i4status; pid_t helper_pid, waitpid_res; save_shutdown = helper->helper_shutdown; helper->helper_shutdown = NO_SHUTDOWN; if (!gtm_dist_ok_to_use) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_GTMDISTUNVERIF, 4, STRLEN(gtm_dist), gtm_dist, gtmImageNames[image_type].imageNameLen, gtmImageNames[image_type].imageName); if (WBTEST_ENABLED(WBTEST_MAXGTMDIST_HELPER_PROCESS)) { memset(gtm_dist, 'a', GTM_PATH_MAX-2); gtm_dist[GTM_PATH_MAX-1] = '\0'; } helper_cmd_len = SNPRINTF(helper_cmd, UPDHELPER_CMD_MAXLEN, UPDHELPER_CMD, gtm_dist); if ((-1 == helper_cmd_len) || (UPDHELPER_CMD_MAXLEN <= helper_cmd_len)) { helper->helper_shutdown = save_shutdown; gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_HLPPROC, 0, ERR_TEXT, 2, LEN_AND_LIT("Could not find path of Helper Process. Check value of $gtm_dist")); repl_errno = EREPL_UPDSTART_BADPATH; return UPDPROC_START_ERR ; } FORK(helper_pid); if (0 > helper_pid) { save_errno = errno; helper->helper_shutdown = save_shutdown; gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_HLPPROC, 0, ERR_TEXT, 2, LEN_AND_LIT("Could not fork Helper process"), save_errno); repl_errno = EREPL_UPDSTART_FORK; return UPDPROC_START_ERR; } if (0 == helper_pid) { /* helper */ getjobnum(); helper->helper_pid_prev = process_id; /* identify owner of slot */ if (WBTEST_ENABLED(WBTEST_BADEXEC_HELPER_PROCESS)) STRCPY(helper_cmd, "ersatz"); if (-1 == EXECL(helper_cmd, helper_cmd, UPDHELPER_CMD_ARG1, UPDHELPER_CMD_ARG2, (UPD_HELPER_READER == helper_type) ? UPDHELPER_READER_CMD_ARG3 : UPDHELPER_WRITER_CMD_ARG3, NULL)) { save_errno = errno; helper->helper_shutdown = save_shutdown; gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_HLPPROC, 0, ERR_TEXT, 2, LEN_AND_LIT("Could not exec Helper Process"), save_errno); repl_errno = EREPL_UPDSTART_EXEC; UNDERSCORE_EXIT(UPDPROC_START_ERR); } } /* Wait for helper to startup */ while (helper_pid != helper->helper_pid && is_proc_alive(helper_pid, 0)) { SHORT_SLEEP(GTMRECV_WAIT_FOR_SRV_START); UNIX_ONLY(WAITPID(helper_pid, &status, WNOHANG, waitpid_res);) /* Release defunct helper process if dead */ } /* The helper has now gone far enough in the initialization, or died before initialization. Consider startup completed. */ repl_log(gtmrecv_log_fp, TRUE, TRUE, "Helper %s started. PID %d [0x%X]\n", (UPD_HELPER_READER == helper_type) ? "reader" : "writer", helper_pid, helper_pid); return UPDPROC_STARTED; }
void mupip_restore(void) { static readonly char label[] = GDS_LABEL; char db_name[MAX_FN_LEN + 1], *inbuf, *p; inc_list_struct *ptr; inc_header *inhead; sgmnt_data *old_data; short iosb[4]; unsigned short n_len; int4 status, vbn, rsize, temp, save_errno; uint4 rest_blks, totblks; trans_num curr_tn; uint4 ii; block_id blk_num; bool extend; uint4 cli_status; BFILE *in; int i, db_fd; uint4 old_blk_size, old_tot_blks, bplmap; short old_start_vbn; off_t new_eof; char buff[DISK_BLOCK_SIZE]; char msg_buffer[1024], *newmap, *newmap_bptr; mstr msg_string; char addr[SA_MAXLEN+1]; unsigned char tcp[5]; backup_type type; unsigned short port; int4 timeout, cut, match; char debug_info[256]; void (*common_read)(); char *errptr; pid_t waitpid_res; error_def(ERR_MUPRESTERR); error_def(ERR_MUPCLIERR); error_def(ERR_IOEOF); extend = TRUE; if (CLI_NEGATED == (cli_status = cli_present("EXTEND"))) extend = FALSE; mu_outofband_setup(); mu_gv_cur_reg_init(); n_len = sizeof(db_name); if (cli_get_str("DATABASE", db_name, &n_len) == FALSE) mupip_exit(ERR_MUPCLIERR); strcpy((char *)gv_cur_region->dyn.addr->fname, db_name); gv_cur_region->dyn.addr->fname_len = n_len; if (!mu_rndwn_file(gv_cur_region, TRUE)) { util_out_print("Error securing stand alone access to output file !AD. Aborting restore.", TRUE, n_len, db_name); mupip_exit(ERR_MUPRESTERR); } OPENFILE(db_name, O_RDWR, db_fd); if (-1 == db_fd) { save_errno = errno; util_out_print("Error accessing output file !AD. Aborting restore.", TRUE, n_len, db_name); errptr = (char *)STRERROR(save_errno); util_out_print("open : !AZ", TRUE, errptr); mupip_exit(save_errno); } murgetlst(); inbuf = (char*)malloc(INC_BACKUP_CHUNK_SIZE); old_data = (sgmnt_data*)malloc(sizeof(sgmnt_data)); LSEEKREAD(db_fd, 0, old_data, sizeof(sgmnt_data), save_errno); if (0 != save_errno) { util_out_print("Error accessing output file !AD. Aborting restore.", TRUE, n_len, db_name); if (-1 != save_errno) { errptr = (char *)STRERROR(save_errno); util_out_print("read : !AZ", TRUE, errptr); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(save_errno); } else { db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(ERR_IOEOF); } } if (memcmp(&old_data->label[0], &label[0], GDS_LABEL_SZ)) { util_out_print("Output file !AD has an unrecognizable format", TRUE, n_len, db_name); free(old_data); free(inbuf); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(ERR_MUPRESTERR); } curr_tn = old_data->trans_hist.curr_tn; old_blk_size = old_data->blk_size; old_tot_blks = old_data->trans_hist.total_blks; old_start_vbn = old_data->start_vbn; bplmap = old_data->bplmap; free(old_data); msg_string.addr = msg_buffer; msg_string.len = sizeof(msg_buffer); inhead = (inc_header *)malloc(sizeof(inc_header) + 8); inhead = (inc_header *)((((int4)inhead) + 7) & -8); rest_blks = 0; for (ptr = in_files.next; ptr; ptr = ptr->next) { /* --- determine source type --- */ type = backup_to_file; if (0 == ptr->input_file.len) continue; else if ('|' == *(ptr->input_file.addr + ptr->input_file.len - 1)) { type = backup_to_exec; ptr->input_file.len--; *(ptr->input_file.addr + ptr->input_file.len) = '\0'; } else if (ptr->input_file.len > 5) { lower_to_upper(tcp, (uchar_ptr_t)ptr->input_file.addr, 5); if (0 == memcmp(tcp, "TCP:/", 5)) { type = backup_to_tcp; cut = 5; while ('/' == *(ptr->input_file.addr + cut)) cut++; ptr->input_file.len -= cut; p = ptr->input_file.addr; while (p < ptr->input_file.addr + ptr->input_file.len) { *p = *(p + cut); p++; } *p = '\0'; } } /* --- open the input stream --- */ restore_read_errno = 0; switch(type) { case backup_to_file: common_read = iob_read; if ((in = iob_open_rd(ptr->input_file.addr, DISK_BLOCK_SIZE, BLOCKING_FACTOR)) == NULL) { save_errno = errno; util_out_print("Error accessing input file !AD. Aborting restore.", TRUE, ptr->input_file.len, ptr->input_file.addr); errptr = (char *)STRERROR(save_errno); util_out_print("open : !AZ", TRUE, errptr); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(save_errno); } ESTABLISH(iob_io_error); break; case backup_to_exec: pipe_child = 0; common_read = exec_read; in = (BFILE *)malloc(sizeof(BFILE)); if (0 > (in->fd = gtm_pipe(ptr->input_file.addr, input_from_comm))) { util_out_print("Error creating input pipe from !AD.", TRUE, ptr->input_file.len, ptr->input_file.addr); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(ERR_MUPRESTERR); } #ifdef DEBUG_ONLINE PRINTF("file descriptor for the openned pipe is %d.\n", in->fd); PRINTF("the command passed to gtm_pipe is %s.\n", ptr->input_file.addr); #endif break; case backup_to_tcp: common_read = tcp_read; /* parse the input */ switch (match = SSCANF(ptr->input_file.addr, "%[^:]:%hu", addr, &port)) { case 1 : port = DEFAULT_BKRS_PORT; case 2 : break; default : util_out_print("Error : A hostname has to be specified.", TRUE); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(ERR_MUPRESTERR); } if ((0 == cli_get_int("NETTIMEOUT", &timeout)) || (0 > timeout)) timeout = DEFAULT_BKRS_TIMEOUT; in = (BFILE *)malloc(sizeof(BFILE)); iotcp_fillroutine(); if (0 > (in->fd = tcp_open(addr, port, timeout, TRUE))) { util_out_print("Error establishing TCP connection to !AD.", TRUE, ptr->input_file.len, ptr->input_file.addr); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(ERR_MUPRESTERR); } break; default: util_out_print("Aborting restore!/", TRUE); util_out_print("Unrecognized input format !AD", TRUE, ptr->input_file.len, ptr->input_file.addr); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(ERR_MUPRESTERR); } COMMON_READ(in, inhead, sizeof(inc_header)); if (memcmp(&inhead->label[0], INC_HEADER_LABEL, INC_HDR_LABEL_SZ)) { util_out_print("Input file !AD has an unrecognizable format", TRUE, ptr->input_file.len, ptr->input_file.addr); free(inbuf); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(ERR_MUPRESTERR); } if (curr_tn != inhead->start_tn) { util_out_print("Transaction in input file !AD does not align with database TN.!/DB: !XL!_Input file: !XL", TRUE, ptr->input_file.len, ptr->input_file.addr, curr_tn, inhead->start_tn); free(inbuf); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(ERR_MUPRESTERR); } if (old_blk_size != inhead->blk_size) { util_out_print("Incompatable block size. Output file !AD has block size !XL,", TRUE, n_len, db_name); util_out_print("while input file !AD is from a database with block size !XL,", TRUE, ptr->input_file.len, ptr->input_file.addr, inhead->blk_size); free(inbuf); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(ERR_MUPRESTERR); } if (old_tot_blks != inhead->db_total_blks) { if (old_tot_blks > inhead->db_total_blks || !extend) { totblks = old_tot_blks - DIVIDE_ROUND_UP(old_tot_blks, DISK_BLOCK_SIZE); util_out_print("Incompatable database sizes. Output file !AD has!/ !UL (!XL hex) total blocks,", TRUE, n_len, db_name, totblks, totblks); totblks = inhead->db_total_blks - DIVIDE_ROUND_UP(inhead->db_total_blks, DISK_BLOCK_SIZE); util_out_print("while input file !AD is from a database with!/ !UL (!XL hex) total blocks", TRUE, ptr->input_file.len, ptr->input_file.addr, totblks, totblks); free(inbuf); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(ERR_MUPRESTERR); } else { /* this part of the code is similar to gdsfilext except that you don't need to do * most of the work that gdsfilext does. However, for situations where the database * extended since the last backup (the beginning of this incremental backup), and * there are new bitmaps that are never touched later on by GT.M, these bitmaps * will have tn == 0, which prevents the backup process to pick up these blocks, * so, we need to initialize these bitmaps here */ new_eof = ((off_t)(old_start_vbn - 1) * DISK_BLOCK_SIZE) + ((off_t)inhead->db_total_blks * old_blk_size); memset(buff, 0, DISK_BLOCK_SIZE); LSEEKWRITE(db_fd, new_eof, buff, DISK_BLOCK_SIZE, status); if (0 != status) { util_out_print("Aborting restore!/", TRUE); util_out_print("lseek or write error : Unable to extend output file !AD!/", TRUE, n_len, db_name); util_out_print(" from !UL (!XL hex) total blocks to !UL (!XL hex) total blocks.!/", TRUE, old_tot_blks, old_tot_blks, inhead->db_total_blks, inhead->db_total_blks); util_out_print(" Current input file is !AD with !UL (!XL hex) total blocks!/", TRUE, ptr->input_file.len, ptr->input_file.addr, inhead->db_total_blks, inhead->db_total_blks); gtm_putmsg(VARLSTCNT(1) status); free(inbuf); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(ERR_MUPRESTERR); } /* --- initialize all new bitmaps, just in case they are not touched later --- */ if (DIVIDE_ROUND_DOWN(inhead->db_total_blks, bplmap) > DIVIDE_ROUND_DOWN(old_tot_blks, bplmap)) { /* -- similar logic exist in bml_newmap.c, which need to pick up any new updates here -- */ newmap = (char *)malloc(old_blk_size); ((blk_hdr *)newmap)->bsiz = BM_SIZE(bplmap); ((blk_hdr *)newmap)->levl = LCL_MAP_LEVL; ((blk_hdr *)newmap)->tn = curr_tn; newmap_bptr = newmap + sizeof(blk_hdr); *newmap_bptr++ = THREE_BLKS_FREE; memset(newmap_bptr, FOUR_BLKS_FREE, BM_SIZE(bplmap) - sizeof(blk_hdr) - 1); for (ii = ROUND_UP(old_tot_blks, bplmap); ii <= inhead->db_total_blks; ii += bplmap) { new_eof = (off_t)(old_start_vbn - 1) * DISK_BLOCK_SIZE + (off_t)ii * old_blk_size; LSEEKWRITE(db_fd, new_eof, newmap, old_blk_size, status); if (0 != status) { util_out_print("Aborting restore!/", TRUE); util_out_print("Bitmap 0x!XL initialization error!", TRUE, ii); gtm_putmsg(VARLSTCNT(1) status); free(inbuf); free(newmap); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(ERR_MUPRESTERR); } } free(newmap); } old_tot_blks = inhead->db_total_blks; } } COMMON_READ(in, &rsize, sizeof(int4)); for ( ; ;) { /* rsize is the size of the record, including the size, but, since the size has already been read in, this will read in the current record and the size for the next record */ /* ensure we have a reasonable record size, at least */ if (rsize - sizeof(int4) - sizeof(block_id) > old_blk_size) { util_out_print("Invalid information in restore file !AD. Aborting restore.", TRUE, ptr->input_file.len, ptr->input_file.addr); iob_close(in); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(ERR_MUPRESTERR); } COMMON_READ(in, inbuf, rsize); if (!memcmp(inbuf, &end_msg[0], sizeof end_msg - 1)) break; rest_blks++; blk_num = *(block_id*)inbuf; vbn = old_start_vbn - 1 + (old_blk_size / DISK_BLOCK_SIZE * blk_num); LSEEKWRITE(db_fd, (off_t)vbn * DISK_BLOCK_SIZE, inbuf + sizeof(block_id), rsize - sizeof(block_id) - sizeof(int4), save_errno); if (0 != save_errno) { util_out_print("Error accessing output file !AD. Aborting restore.", TRUE, n_len, db_name); errptr = (char *)STRERROR(save_errno); util_out_print("write : !AZ", TRUE, errptr); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(save_errno); } GET_LONG(temp, (inbuf + rsize - sizeof(int4))); rsize = temp; } GET_LONG(temp, (inbuf + rsize - sizeof(int4))); rsize = temp; vbn = 0; for (i = 0; ; i++) /* Restore file header */ { COMMON_READ(in, inbuf, rsize); if (!memcmp(inbuf, &hdr_msg[0], sizeof hdr_msg - 1)) break; LSEEKWRITE(db_fd, vbn, inbuf, rsize - sizeof(int4), save_errno); if (0 != save_errno) { util_out_print("Error accessing output file !AD. Aborting restore.", TRUE, n_len, db_name); errptr = (char *)STRERROR(save_errno); util_out_print("write : !AZ", TRUE, errptr); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(save_errno); } vbn += rsize - sizeof(int4); GET_LONG(temp, (inbuf + rsize - sizeof(int4))); rsize = temp; } curr_tn = inhead->end_tn; switch (type) { case backup_to_file: REVERT; iob_close(in); break; case backup_to_exec: close(in->fd); if ((pipe_child > 0) && (FALSE != is_proc_alive(pipe_child, 0))) WAITPID(pipe_child, (int *)&status, 0, waitpid_res); break; case backup_to_tcp: break; } } util_out_print("!/RESTORE COMPLETED", TRUE); util_out_print("!UL blocks restored", TRUE, rest_blks); free(inbuf); db_ipcs_reset(gv_cur_region, FALSE); mu_gv_cur_reg_free(); mupip_exit(SS_NORMAL); }
sm_uc_ptr_t t_qread(block_id blk, sm_int_ptr_t cycle, cache_rec_ptr_ptr_t cr_out) /* cycle is used in t_end to detect if the buffer has been refreshed since the t_qread */ { uint4 status, duint4, blocking_pid; cache_rec_ptr_t cr; bt_rec_ptr_t bt; bool clustered, was_crit; int dummy, lcnt, ocnt; cw_set_element *cse; off_chain chain1; register sgmnt_addrs *csa; register sgmnt_data_ptr_t csd; int4 dummy_errno; boolean_t already_built, is_mm, reset_first_tp_srch_status, set_wc_blocked; error_def(ERR_DBFILERR); error_def(ERR_BUFOWNERSTUCK); first_tp_srch_status = NULL; reset_first_tp_srch_status = FALSE; csa = cs_addrs; csd = csa->hdr; INCR_DB_CSH_COUNTER(csa, n_t_qreads, 1); is_mm = (dba_mm == csd->acc_meth); assert((t_tries < CDB_STAGNATE) || csa->now_crit); if (0 < dollar_tlevel) { assert(sgm_info_ptr); if (0 != sgm_info_ptr->cw_set_depth) { chain1 = *(off_chain *)&blk; if (1 == chain1.flag) { assert(sgm_info_ptr->cw_set_depth); if ((int)chain1.cw_index < sgm_info_ptr->cw_set_depth) tp_get_cw(sgm_info_ptr->first_cw_set, (int)chain1.cw_index, &cse); else { assert(FALSE == csa->now_crit); rdfail_detail = cdb_sc_blknumerr; return (sm_uc_ptr_t)NULL; } } else { first_tp_srch_status = (srch_blk_status *)lookup_hashtab_ent(sgm_info_ptr->blks_in_use, (void *)blk, &duint4); ASSERT_IS_WITHIN_TP_HIST_ARRAY_BOUNDS(first_tp_srch_status, sgm_info_ptr); cse = first_tp_srch_status ? first_tp_srch_status->ptr : NULL; } assert(!cse || !cse->high_tlevel); if (cse) { /* transaction has modified the sought after block */ assert(gds_t_writemap != cse->mode); if (FALSE == cse->done) { /* out of date, so make it current */ already_built = (NULL != cse->new_buff); gvcst_blk_build(cse, (uchar_ptr_t)cse->new_buff, 0); assert(cse->blk_target); if (!already_built && !chain1.flag) { assert(first_tp_srch_status && (is_mm || first_tp_srch_status->cr) && first_tp_srch_status->buffaddr); if (first_tp_srch_status->tn <= ((blk_hdr_ptr_t)(first_tp_srch_status->buffaddr))->tn) { assert(CDB_STAGNATE > t_tries); rdfail_detail = cdb_sc_blkmod; /* should this be something else */ TP_TRACE_HIST_MOD(blk, gv_target, tp_blkmod_t_qread, cs_data, first_tp_srch_status->tn, ((blk_hdr_ptr_t)(first_tp_srch_status->buffaddr))->tn, ((blk_hdr_ptr_t)(first_tp_srch_status->buffaddr))->levl); return (sm_uc_ptr_t)NULL; } if ((!is_mm) && (first_tp_srch_status->cycle != first_tp_srch_status->cr->cycle || first_tp_srch_status->blk_num != first_tp_srch_status->cr->blk)) { assert(CDB_STAGNATE > t_tries); rdfail_detail = cdb_sc_lostcr; /* should this be something else */ return (sm_uc_ptr_t)NULL; } if (certify_all_blocks && FALSE == cert_blk(gv_cur_region, blk, (blk_hdr_ptr_t)cse->new_buff, cse->blk_target->root)) GTMASSERT; } cse->done = TRUE; } *cycle = CYCLE_PVT_COPY; *cr_out = 0; return (sm_uc_ptr_t)cse->new_buff; } assert(!chain1.flag); } else first_tp_srch_status = (srch_blk_status *)lookup_hashtab_ent(sgm_info_ptr->blks_in_use, (void *)blk, &duint4); ASSERT_IS_WITHIN_TP_HIST_ARRAY_BOUNDS(first_tp_srch_status, sgm_info_ptr); if (!is_mm && first_tp_srch_status) { assert(first_tp_srch_status->cr && !first_tp_srch_status->ptr); if (first_tp_srch_status->cycle == first_tp_srch_status->cr->cycle) { *cycle = first_tp_srch_status->cycle; *cr_out = first_tp_srch_status->cr; first_tp_srch_status->cr->refer = TRUE; if (CDB_STAGNATE <= t_tries) /* mu_reorg doesn't use TP else should have an || for that */ CWS_INSERT(blk); return (sm_uc_ptr_t)first_tp_srch_status->buffaddr; } else { /* Block was already part of the read-set of this transaction, but got recycled. Allow for * recycling. But update the first_tp_srch_status (for this blk) in the si->first_tp_hist * array to reflect the new buffer, cycle and cache-record. Since we know those only at the end of * t_qread, set a variable here that will enable the updation before returning from t_qread(). */ reset_first_tp_srch_status = TRUE; } } } if ((blk >= csa->ti->total_blks) || (blk < 0)) { /* requested block out of range; could occur because of a concurrency conflict */ if ((&FILE_INFO(gv_cur_region)->s_addrs != csa) || (csd != cs_data)) GTMASSERT; assert(FALSE == csa->now_crit); rdfail_detail = cdb_sc_blknumerr; return (sm_uc_ptr_t)NULL; } if (is_mm) { *cycle = CYCLE_SHRD_COPY; *cr_out = 0; return (sm_uc_ptr_t)(mm_read(blk)); } assert(dba_bg == csd->acc_meth); assert(!first_tp_srch_status || !first_tp_srch_status->cr || first_tp_srch_status->cycle != first_tp_srch_status->cr->cycle); if (FALSE == (clustered = csd->clustered)) bt = NULL; was_crit = csa->now_crit; ocnt = 0; set_wc_blocked = FALSE; /* to indicate whether csd->wc_blocked was set to TRUE by us */ do { if (NULL == (cr = db_csh_get(blk))) { /* not in memory */ if (clustered && (NULL != (bt = bt_get(blk))) && (FALSE == bt->flushing)) bt = NULL; if (FALSE == csa->now_crit) { if (NULL != bt) { /* at this point, bt is not NULL only if clustered and flushing - wait no crit */ assert(clustered); wait_for_block_flush(bt, blk); /* try for no other node currently writing the block */ } if (csd->flush_trigger <= csa->nl->wcs_active_lvl && FALSE == gv_cur_region->read_only) JNL_ENSURE_OPEN_WCS_WTSTART(csa, gv_cur_region, 0, dummy_errno); /* a macro that dclast's wcs_wtstart() and checks for errors etc. */ grab_crit(gv_cur_region); cr = db_csh_get(blk); /* in case blk arrived before crit */ } if (clustered && (NULL != (bt = bt_get(blk))) && (TRUE == bt->flushing)) { /* Once crit, need to assure that if clustered, that flushing is [still] complete * If it isn't, we missed an entire WM cycle and have to wait for another node to finish */ wait_for_block_flush(bt, blk); /* ensure no other node currently writing the block */ } if (NULL == cr) { /* really not in memory - must get a new buffer */ assert(csa->now_crit); cr = db_csh_getn(blk); if (CR_NOTVALID == (sm_long_t)cr) { SET_TRACEABLE_VAR(cs_data->wc_blocked, TRUE); BG_TRACE_PRO_ANY(csa, wc_blocked_t_qread_db_csh_getn_invalid_blk); set_wc_blocked = TRUE; break; } assert(0 <= cr->read_in_progress); *cycle = cr->cycle; cr->tn = csa->ti->curr_tn; if (FALSE == was_crit) rel_crit(gv_cur_region); /* read outside of crit may be of a stale block but should be detected by t_end or tp_tend */ assert(0 == cr->dirty); assert(cr->read_in_progress >= 0); INCR_DB_CSH_COUNTER(csa, n_dsk_reads, 1); if (SS_NORMAL != (status = dsk_read(blk, GDS_REL2ABS(cr->buffaddr)))) { RELEASE_BUFF_READ_LOCK(cr); assert(was_crit == csa->now_crit); if (FUTURE_READ == status) { /* in cluster, block can be in the "future" with respect to the local history */ assert(TRUE == clustered); assert(FALSE == csa->now_crit); rdfail_detail = cdb_sc_future_read; /* t_retry forces the history up to date */ return (sm_uc_ptr_t)NULL; } rts_error(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region), status); } assert(0 <= cr->read_in_progress); assert(0 == cr->dirty); cr->r_epid = 0; RELEASE_BUFF_READ_LOCK(cr); assert(-1 <= cr->read_in_progress); *cr_out = cr; assert(was_crit == csa->now_crit); if (reset_first_tp_srch_status) { /* keep the parantheses for the if (although single line) since the following is a macro */ RESET_FIRST_TP_SRCH_STATUS(first_tp_srch_status, cr, *cycle); } return (sm_uc_ptr_t)GDS_REL2ABS(cr->buffaddr); } else if ((FALSE == was_crit) && (BAD_LUCK_ABOUNDS > ocnt)) { assert(TRUE == csa->now_crit); assert(csa->nl->in_crit == process_id); rel_crit(gv_cur_region); } } if (CR_NOTVALID == (sm_long_t)cr) { SET_TRACEABLE_VAR(cs_data->wc_blocked, TRUE); BG_TRACE_PRO_ANY(csa, wc_blocked_t_qread_db_csh_get_invalid_blk); set_wc_blocked = TRUE; break; } for (lcnt = 1; ; lcnt++) { if (0 > cr->read_in_progress) { /* it's not being read */ if (clustered && (0 == cr->bt_index) && (cr->tn < ((th_rec *)((uchar_ptr_t)csa->th_base + csa->th_base->tnque.fl))->tn)) { /* can't rely on the buffer */ cr->cycle++; /* increment cycle whenever blk number changes (tp_hist depends on this) */ cr->blk = CR_BLKEMPTY; break; } *cycle = cr->cycle; *cr_out = cr; VMS_ONLY( /* If we were doing the db_csh_get() above (in t_qread itself) and located the cache-record * which, before coming here and taking a copy of cr->cycle a few lines above, was made an * older twin by another process in bg_update (note this can happen in VMS only) which has * already incremented the cycle, we will end up having a copy of the old cache-record with * its incremented cycle number and hence will succeed in tp_hist validation if we return * this <cr,cycle> combination although we don't want to since this "cr" is not current for * the given block as of now. Note that the "indexmod" optimization in tp_tend() relies on * an accurate intermediate validation by tp_hist() which in turn relies on the <cr,cycle> * value returned by t_qread() to be accurate for a given blk at the current point in time. * We detect the older-twin case by the following check. Note that here we depend on the * the fact that bg_update() sets cr->bt_index to 0 before incrementing cr->cycle. * Given that order, cr->bt_index can be guaranteed to be 0 if we read the incremented cycle */ if (cr->twin && (0 == cr->bt_index)) break; ) if (cr->blk != blk) break; if (was_crit != csa->now_crit) rel_crit(gv_cur_region); assert(was_crit == csa->now_crit); if (reset_first_tp_srch_status) { /* keep the parantheses for the if (although single line) since the following is a macro */ RESET_FIRST_TP_SRCH_STATUS(first_tp_srch_status, cr, *cycle); } /* Note that at this point we expect t_qread() to return a <cr,cycle> combination that * corresponds to "blk" passed in. It is crucial to get an accurate value for both the fields * since tp_hist() relies on this for its intermediate validation. */ return (sm_uc_ptr_t)GDS_ANY_REL2ABS(csa, cr->buffaddr); } if (blk != cr->blk) break; if (lcnt >= BUF_OWNER_STUCK && (0 == (lcnt % BUF_OWNER_STUCK))) { if (FALSE == csa->now_crit) grab_crit(gv_cur_region); if (cr->read_in_progress < -1) { /* outside of design; clear to known state */ BG_TRACE_PRO(t_qread_out_of_design); INTERLOCK_INIT(cr); assert(0 == cr->r_epid); cr->r_epid = 0; } else if (cr->read_in_progress >= 0) { BG_TRACE_PRO(t_qread_buf_owner_stuck); if (0 != (blocking_pid = cr->r_epid)) { if (FALSE == is_proc_alive(blocking_pid, cr->image_count)) { /* process gone: release that process's lock */ assert(0 == cr->bt_index); if (cr->bt_index) { SET_TRACEABLE_VAR(csd->wc_blocked, TRUE); BG_TRACE_PRO_ANY(csa, wc_blocked_t_qread_bad_bt_index1); set_wc_blocked = TRUE; break; } cr->cycle++; /* increment cycle for blk number changes (for tp_hist) */ cr->blk = CR_BLKEMPTY; RELEASE_BUFF_READ_LOCK(cr); } else { rel_crit(gv_cur_region); send_msg(VARLSTCNT(4) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region)); send_msg(VARLSTCNT(9) ERR_BUFOWNERSTUCK, 7, process_id, blocking_pid, cr->blk, cr->blk, (lcnt / BUF_OWNER_STUCK), cr->read_in_progress, cr->rip_latch.latch_pid); if ((4 * BUF_OWNER_STUCK) <= lcnt) GTMASSERT; /* Kickstart the process taking a long time in case it was suspended */ UNIX_ONLY(continue_proc(blocking_pid)); } } else { /* process stopped before could set r_epid */ assert(0 == cr->bt_index); if (cr->bt_index) { SET_TRACEABLE_VAR(csd->wc_blocked, TRUE); BG_TRACE_PRO_ANY(csa, wc_blocked_t_qread_bad_bt_index2); set_wc_blocked = TRUE; break; } cr->cycle++; /* increment cycle for blk number changes (for tp_hist) */ cr->blk = CR_BLKEMPTY; RELEASE_BUFF_READ_LOCK(cr); if (cr->read_in_progress < -1) /* race: process released since if r_epid */ LOCK_BUFF_FOR_READ(cr, dummy); } } if (was_crit != csa->now_crit) rel_crit(gv_cur_region); } else wcs_sleep(lcnt); } if (set_wc_blocked) /* cannot use csd->wc_blocked here as we might not necessarily have crit */ break; ocnt++; if (BAD_LUCK_ABOUNDS <= ocnt) { if (BAD_LUCK_ABOUNDS < ocnt || csa->now_crit) { rel_crit(gv_cur_region); GTMASSERT; } if (FALSE == csa->now_crit) grab_crit(gv_cur_region); } } while (TRUE);
void recover_truncate(sgmnt_addrs *csa, sgmnt_data_ptr_t csd, gd_region* reg) { char *err_msg; uint4 old_total, cur_total, new_total; off_t old_size, cur_size, new_size; int ftrunc_status, status; unix_db_info *udi; int semval; if (NULL != csa->nl && csa->nl->trunc_pid && !is_proc_alive(csa->nl->trunc_pid, 0)) csa->nl->trunc_pid = 0; if (!csd->before_trunc_total_blks) return; assert((GDSVCURR == csd->desired_db_format) && (csd->blks_to_upgrd == 0) && (dba_mm != csd->acc_meth)); /* If called from db_init, assure we've grabbed the access semaphor and are the only process attached to the database. * Otherwise, we should have crit when called from wcs_recover. */ udi = FILE_INFO(reg); assert((udi->grabbed_access_sem && (1 == (semval = semctl(udi->semid, 1, GETVAL)))) || csa->now_crit); /* Interrupted truncate scenario */ if (NULL != csa->nl) csa->nl->root_search_cycle++; old_total = csd->before_trunc_total_blks; /* Pre-truncate total_blks */ old_size = (off_t)SIZEOF_FILE_HDR(csd) /* Pre-truncate file size (in bytes) */ + (off_t)old_total * csd->blk_size + DISK_BLOCK_SIZE; cur_total = csa->ti->total_blks; /* Actual total_blks right now */ cur_size = (off_t)gds_file_size(reg->dyn.addr->file_cntl) * DISK_BLOCK_SIZE; /* Actual file size right now (in bytes) */ new_total = csd->after_trunc_total_blks; /* Post-truncate total_blks */ new_size = old_size - (off_t)(old_total - new_total) * csd->blk_size; /* Post-truncate file size (in bytes) */ /* We don't expect FTRUNCATE to leave the file size in an 'in between' state, hence the assert below. */ assert(old_size == cur_size || new_size == cur_size); if (new_total == cur_total && old_size == cur_size) { /* Crash after reducing total_blks, before successful FTRUNCATE. Complete the FTRUNCATE here. */ DBGEHND((stdout, "DBG:: recover_truncate() -- completing truncate, old_total = [%lu], cur_total = [%lu]\n", old_total, new_total)); assert(csd->before_trunc_free_blocks >= DELTA_FREE_BLOCKS(old_total, new_total)); csa->ti->free_blocks = csd->before_trunc_free_blocks - DELTA_FREE_BLOCKS(old_total, new_total); clear_cache_array(csa, csd, reg, new_total, old_total); WRITE_EOF_BLOCK(reg, csd, new_total, status); if (status != 0) { err_msg = (char *)STRERROR(errno); rts_error(VARLSTCNT(6) ERR_MUTRUNCERROR, 4, REG_LEN_STR(reg), LEN_AND_STR(err_msg)); return; } FTRUNCATE(FILE_INFO(reg)->fd, new_size, ftrunc_status); if (ftrunc_status != 0) { err_msg = (char *)STRERROR(errno); rts_error(VARLSTCNT(6) ERR_MUTRUNCERROR, 4, REG_LEN_STR(reg), LEN_AND_STR(err_msg)); return; } } else { /* Crash before even changing csa->ti->total_blks OR after successful FTRUNCATE */ /* In either case, the db file is in a consistent state, so no need to do anything further */ assert((old_total == cur_total && old_size == cur_size) || (new_total == cur_total && new_size == cur_size)); if (!((old_total == cur_total && old_size == cur_size) || (new_total == cur_total && new_size == cur_size))) { rts_error(VARLSTCNT(4) ERR_DBFILERR, 2, DB_LEN_STR(reg)); } } csd->before_trunc_total_blks = 0; /* indicate CONSISTENT */ }
int gtmsource() { int status, log_init_status, waitpid_res, save_errno; char print_msg[1024], tmpmsg[1024]; gd_region *reg, *region_top; sgmnt_addrs *csa, *repl_csa; boolean_t all_files_open, isalive; pid_t pid, ppid, procgp; seq_num read_jnl_seqno, jnl_seqno; unix_db_info *udi; gtmsource_local_ptr_t gtmsource_local; boolean_t this_side_std_null_coll; int null_fd, rc; memset((uchar_ptr_t)&jnlpool, 0, SIZEOF(jnlpool_addrs)); call_on_signal = gtmsource_sigstop; ESTABLISH_RET(gtmsource_ch, SS_NORMAL); if (-1 == gtmsource_get_opt()) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_MUPCLIERR); if (gtmsource_options.shut_down) { /* Wait till shutdown time nears even before going to "jnlpool_init". This is because the latter will return * with the ftok semaphore and access semaphore held and we do not want to be holding those locks (while * waiting for the user specified timeout to expire) as that will affect new GTM processes and/or other * MUPIP REPLIC commands that need these locks for their function. */ if (0 < gtmsource_options.shutdown_time) { repl_log(stdout, TRUE, TRUE, "Waiting for %d seconds before signalling shutdown\n", gtmsource_options.shutdown_time); LONG_SLEEP(gtmsource_options.shutdown_time); } else repl_log(stdout, TRUE, TRUE, "Signalling shutdown immediate\n"); } else if (gtmsource_options.start) { repl_log(stdout, TRUE, TRUE, "Initiating START of source server for secondary instance [%s]\n", gtmsource_options.secondary_instname); } if (gtmsource_options.activate && (ROOTPRIMARY_SPECIFIED == gtmsource_options.rootprimary)) { /* MUPIP REPLIC -SOURCE -ACTIVATE -UPDOK has been specified. We need to open the gld and db regions now * in case this is a secondary -> primary transition. This is so we can later switch journal files in all * journaled regions when the transition actually happens inside "gtmsource_rootprimary_init". But since * we have not yet done a "jnlpool_init", we dont know if updates are disabled in it or not. Although we * need to do the gld/db open only if updates are currently disabled in the jnlpool, we do this always * because once we do a jnlpool_init, we will come back with the ftok on the jnlpool held and that has * issues with later db open since we will try to hold the db ftok as part of db open and the ftok logic * currently has assumptions that a process holds only one ftok at any point in time. */ assert(NULL == gd_header); gvinit(); all_files_open = region_init(FALSE); if (!all_files_open) { gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_NOTALLDBOPN); gtmsource_exit(ABNORMAL_SHUTDOWN); } } jnlpool_init(GTMSOURCE, gtmsource_options.start, &is_jnlpool_creator); /* is_jnlpool_creator == TRUE ==> this process created the journal pool * is_jnlpool_creator == FALSE ==> journal pool already existed and this process simply attached to it. */ if (gtmsource_options.shut_down) gtmsource_exit(gtmsource_shutdown(FALSE, NORMAL_SHUTDOWN) - NORMAL_SHUTDOWN); else if (gtmsource_options.activate) gtmsource_exit(gtmsource_mode_change(GTMSOURCE_MODE_ACTIVE_REQUESTED) - NORMAL_SHUTDOWN); else if (gtmsource_options.deactivate) gtmsource_exit(gtmsource_mode_change(GTMSOURCE_MODE_PASSIVE_REQUESTED) - NORMAL_SHUTDOWN); else if (gtmsource_options.checkhealth) gtmsource_exit(gtmsource_checkhealth() - NORMAL_SHUTDOWN); else if (gtmsource_options.changelog) gtmsource_exit(gtmsource_changelog() - NORMAL_SHUTDOWN); else if (gtmsource_options.showbacklog) gtmsource_exit(gtmsource_showbacklog() - NORMAL_SHUTDOWN); else if (gtmsource_options.stopsourcefilter) gtmsource_exit(gtmsource_stopfilter() - NORMAL_SHUTDOWN); else if (gtmsource_options.jnlpool) gtmsource_exit(gtmsource_jnlpool() - NORMAL_SHUTDOWN); else if (gtmsource_options.losttncomplete) gtmsource_exit(gtmsource_losttncomplete() - NORMAL_SHUTDOWN); else if (gtmsource_options.needrestart) gtmsource_exit(gtmsource_needrestart() - NORMAL_SHUTDOWN); else if (gtmsource_options.showfreeze) gtmsource_exit(gtmsource_showfreeze() - NORMAL_SHUTDOWN); else if (gtmsource_options.setfreeze) gtmsource_exit(gtmsource_setfreeze() - NORMAL_SHUTDOWN); else if (!gtmsource_options.start) { assert(CLI_PRESENT == cli_present("STATSLOG")); gtmsource_exit(gtmsource_statslog() - NORMAL_SHUTDOWN); } assert(gtmsource_options.start); # ifndef REPL_DEBUG_NOBACKGROUND /* Set "child_server_running" to FALSE before forking off child. Wait for it to be set to TRUE by the child. */ gtmsource_local = jnlpool.gtmsource_local; gtmsource_local->child_server_running = FALSE; FORK(pid); if (0 > pid) { save_errno = errno; rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_JNLPOOLSETUP, 0, ERR_TEXT, 2, RTS_ERROR_LITERAL("Could not fork source server"), save_errno); } else if (0 < pid) { /* Parent. Wait until child sets "child_server_running" to FALSE. That is an indication that the child * source server has completed its initialization phase and is all set so the parent command can return. */ while (isalive = is_proc_alive(pid, 0)) /* note : intended assignment */ { if (gtmsource_local->child_server_running) break; /* To take care of reassignment of PIDs, the while condition should be && with the condition * (PPID of pid == process_id) */ SHORT_SLEEP(GTMSOURCE_WAIT_FOR_SRV_START); WAITPID(pid, &status, WNOHANG, waitpid_res); /* Release defunct child if dead */ } if (isalive) { /* Child process is alive and started with no issues */ if (0 != (save_errno = rel_sem(SOURCE, JNL_POOL_ACCESS_SEM))) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_JNLPOOLSETUP, 0, ERR_TEXT, 2, RTS_ERROR_LITERAL("Error in rel_sem"), save_errno); ftok_sem_release(jnlpool.jnlpool_dummy_reg, TRUE, TRUE); } else { /* Child source server process errored out at startup and is no longer alive. * If we were the one who created the journal pool, let us clean it up. */ repl_log(stdout, TRUE, TRUE, "Source server startup failed. See source server log file\n"); if (is_jnlpool_creator) status = gtmsource_shutdown(TRUE, NORMAL_SHUTDOWN); } /* If the parent is killed (or crashes) between the fork and exit, checkhealth may not detect that startup * is in progress - parent forks and dies, the system will release sem 0 and 1, checkhealth might test the * value of sem 1 before the child grabs sem 1. */ gtmsource_exit(isalive ? SRV_ALIVE : SRV_ERR); } /* Point stdin to /dev/null */ OPENFILE("/dev/null", O_RDONLY, null_fd); if (0 > null_fd) rts_error_csa(CSA_ARG(NULL) ERR_REPLERR, RTS_ERROR_LITERAL("Failed to open /dev/null for read"), errno, 0); FCNTL3(null_fd, F_DUPFD, 0, rc); if (0 > rc) rts_error_csa(CSA_ARG(NULL) ERR_REPLERR, RTS_ERROR_LITERAL("Failed to set stdin to /dev/null"), errno, 0); CLOSEFILE(null_fd, rc); if (0 > rc) rts_error_csa(CSA_ARG(NULL) ERR_REPLERR, RTS_ERROR_LITERAL("Failed to close /dev/null"), errno, 0); /* The parent process (source server startup command) will be holding the ftok semaphore and jnlpool access semaphore * at this point. The variables that indicate this would have been copied over to the child during the fork. This will * make the child think it is actually holding them as well when actually it is not. Reset those variables in the child * to ensure they do not misrepresent the holder of those semaphores. */ ftok_sem_reg = NULL; udi = FILE_INFO(jnlpool.jnlpool_dummy_reg); assert(udi->grabbed_ftok_sem); udi->grabbed_ftok_sem = FALSE; assert(holds_sem[SOURCE][JNL_POOL_ACCESS_SEM]); holds_sem[SOURCE][JNL_POOL_ACCESS_SEM] = FALSE; assert(!holds_sem[SOURCE][SRC_SERV_COUNT_SEM]); /* Start child source server initialization */ is_src_server = TRUE; OPERATOR_LOG_MSG; process_id = getpid(); /* Reinvoke secshr related initialization with the child's pid */ INVOKE_INIT_SECSHR_ADDRS; /* Initialize mutex socket, memory semaphore etc. before any "grab_lock" is done by this process on the journal pool. * Note that the initialization would already have been done by the parent receiver startup command but we need to * redo the initialization with the child process id. */ assert(mutex_per_process_init_pid && (mutex_per_process_init_pid != process_id)); mutex_per_process_init(); START_HEARTBEAT_IF_NEEDED; ppid = getppid(); log_init_status = repl_log_init(REPL_GENERAL_LOG, >msource_log_fd, gtmsource_options.log_file); assert(SS_NORMAL == log_init_status); repl_log_fd2fp(>msource_log_fp, gtmsource_log_fd); if (-1 == (procgp = setsid())) send_msg_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_JNLPOOLSETUP, 0, ERR_TEXT, 2, RTS_ERROR_LITERAL("Source server error in setsid"), errno); # endif /* REPL_DEBUG_NOBACKGROUND */ if (ZLIB_CMPLVL_NONE != gtm_zlib_cmp_level) gtm_zlib_init(); /* Open zlib shared library for compression/decompression */ REPL_DPRINT1("Setting up regions\n"); gvinit(); /* We use the same code dse uses to open all regions but we must make sure they are all open before proceeding. */ all_files_open = region_init(FALSE); if (!all_files_open) { gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_NOTALLDBOPN); gtmsource_exit(ABNORMAL_SHUTDOWN); } /* Determine primary side null subscripts collation order */ /* Also check whether all regions have same null collation order */ this_side_std_null_coll = -1; for (reg = gd_header->regions, region_top = gd_header->regions + gd_header->n_regions; reg < region_top; reg++) { csa = &FILE_INFO(reg)->s_addrs; if (this_side_std_null_coll != csa->hdr->std_null_coll) { if (-1 == this_side_std_null_coll) this_side_std_null_coll = csa->hdr->std_null_coll; else { gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_NULLCOLLDIFF); gtmsource_exit(ABNORMAL_SHUTDOWN); } } if (!REPL_ALLOWED(csa) && JNL_ALLOWED(csa)) { gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_REPLOFFJNLON, 2, DB_LEN_STR(reg)); gtmsource_exit(ABNORMAL_SHUTDOWN); } if (reg->read_only && REPL_ALLOWED(csa)) { gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_JNLPOOLSETUP, 0, ERR_TEXT, 2, RTS_ERROR_LITERAL("Source Server does not have write permissions to one or " "more database files that are replicated")); gtmsource_exit(ABNORMAL_SHUTDOWN); } } /* Initialize source server alive/dead state related fields in "gtmsource_local" before the ftok semaphore is released */ gtmsource_local->gtmsource_pid = process_id; gtmsource_local->gtmsource_state = GTMSOURCE_START; if (is_jnlpool_creator) { DEBUG_ONLY(jnlpool.jnlpool_ctl->jnlpool_creator_pid = process_id); gtmsource_seqno_init(this_side_std_null_coll); if (ROOTPRIMARY_SPECIFIED == gtmsource_options.rootprimary) { /* Created the journal pool as a root primary. Append a history record to the replication instance file. * Invoke the function "gtmsource_rootprimary_init" to do that. */ gtmsource_rootprimary_init(jnlpool.jnlpool_ctl->jnl_seqno); } } /* after this point we can no longer have the case where all the regions are unreplicated/non-journaled. */ # ifndef REPL_DEBUG_NOBACKGROUND /* It is necessary for every process that is using the ftok semaphore to increment the counter by 1. This is used * by the last process that shuts down to delete the ftok semaphore when it notices the counter to be 0. * Note that the parent source server startup command would have done an increment of the ftok counter semaphore * for the replication instance file. But the source server process (the child) that comes here would not have done * that. Do that while the parent is still holding on to the ftok semaphore waiting for our okay. */ if (!ftok_sem_incrcnt(jnlpool.jnlpool_dummy_reg)) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_JNLPOOLSETUP); /* Increment the source server count semaphore */ status = incr_sem(SOURCE, SRC_SERV_COUNT_SEM); if (0 != status) { save_errno = errno; rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_JNLPOOLSETUP, 0, ERR_TEXT, 2, RTS_ERROR_LITERAL("Counter semaphore increment failure in child source server"), save_errno); } # else if (0 != (save_errno = rel_sem_immediate(SOURCE, JNL_POOL_ACCESS_SEM))) { rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_JNLPOOLSETUP, 0, ERR_TEXT, 2, RTS_ERROR_LITERAL("Error in rel_sem_immediate"), save_errno); } # endif /* REPL_DEBUG_NOBACKGROUND */ gtmsource_srv_count++; gtmsource_local->child_server_running = TRUE; /* At this point, the parent startup command will stop waiting for child */ gtm_event_log_init(); /* Log source server startup command line first */ SPRINTF(tmpmsg, "%s %s\n", cli_lex_in_ptr->argv[0], cli_lex_in_ptr->in_str); repl_log(gtmsource_log_fp, TRUE, TRUE, tmpmsg); SPRINTF(tmpmsg, "GTM Replication Source Server with Pid [%d] started for Secondary Instance [%s]", process_id, gtmsource_local->secondary_instname); sgtm_putmsg(print_msg, VARLSTCNT(4) ERR_REPLINFO, 2, LEN_AND_STR(tmpmsg)); repl_log(gtmsource_log_fp, TRUE, TRUE, print_msg); if (is_jnlpool_creator) { repl_log(gtmsource_log_fp, TRUE, TRUE, "Created jnlpool with shmid = [%d] and semid = [%d]\n", jnlpool.repl_inst_filehdr->jnlpool_shmid, jnlpool.repl_inst_filehdr->jnlpool_semid); } else repl_log(gtmsource_log_fp, TRUE, TRUE, "Attached to existing jnlpool with shmid = [%d] and semid = [%d]\n", jnlpool.repl_inst_filehdr->jnlpool_shmid, jnlpool.repl_inst_filehdr->jnlpool_semid); gtm_event_log(GTM_EVENT_LOG_ARGC, "MUPIP", "REPLINFO", print_msg); # ifdef GTM_TLS if (REPL_TLS_REQUESTED) { repl_do_tls_init(gtmsource_log_fp); assert(REPL_TLS_REQUESTED || PLAINTEXT_FALLBACK); } # endif if (jnlpool.jnlpool_ctl->freeze) { last_seen_freeze_flag = jnlpool.jnlpool_ctl->freeze; sgtm_putmsg(print_msg, VARLSTCNT(3) ERR_REPLINSTFROZEN, 1, jnlpool.repl_inst_filehdr->inst_info.this_instname); repl_log(gtmsource_log_fp, TRUE, FALSE, print_msg); sgtm_putmsg(print_msg, VARLSTCNT(3) ERR_REPLINSTFREEZECOMMENT, 1, jnlpool.jnlpool_ctl->freeze_comment); repl_log(gtmsource_log_fp, TRUE, TRUE, print_msg); } gtmsource_local->jnlfileonly = gtmsource_options.jnlfileonly; do { /* If mode is passive, go to sleep. Wakeup every now and then and check to see if I have to become active. */ gtmsource_state = gtmsource_local->gtmsource_state = GTMSOURCE_START; if ((gtmsource_local->mode == GTMSOURCE_MODE_PASSIVE) && (gtmsource_local->shutdown == NO_SHUTDOWN)) { gtmsource_poll_actions(FALSE); SHORT_SLEEP(GTMSOURCE_WAIT_FOR_MODE_CHANGE); continue; } if (GTMSOURCE_MODE_PASSIVE == gtmsource_local->mode) { /* Shutdown initiated */ assert(gtmsource_local->shutdown == SHUTDOWN); sgtm_putmsg(print_msg, VARLSTCNT(4) ERR_REPLINFO, 2, RTS_ERROR_LITERAL("GTM Replication Source Server Shutdown signalled")); repl_log(gtmsource_log_fp, TRUE, TRUE, print_msg); gtm_event_log(GTM_EVENT_LOG_ARGC, "MUPIP", "REPLINFO", print_msg); break; } gtmsource_poll_actions(FALSE); if (GTMSOURCE_CHANGING_MODE == gtmsource_state) continue; if (GTMSOURCE_MODE_ACTIVE_REQUESTED == gtmsource_local->mode) gtmsource_local->mode = GTMSOURCE_MODE_ACTIVE; SPRINTF(tmpmsg, "GTM Replication Source Server now in ACTIVE mode using port %d", gtmsource_local->secondary_port); sgtm_putmsg(print_msg, VARLSTCNT(4) ERR_REPLINFO, 2, LEN_AND_STR(tmpmsg)); repl_log(gtmsource_log_fp, TRUE, TRUE, print_msg); gtm_event_log(GTM_EVENT_LOG_ARGC, "MUPIP", "REPLINFO", print_msg); DEBUG_ONLY(repl_csa = &FILE_INFO(jnlpool.jnlpool_dummy_reg)->s_addrs;) assert(!repl_csa->hold_onto_crit); /* so it is ok to invoke "grab_lock" and "rel_lock" unconditionally */ grab_lock(jnlpool.jnlpool_dummy_reg, TRUE, HANDLE_CONCUR_ONLINE_ROLLBACK); if (GTMSOURCE_HANDLE_ONLN_RLBK == gtmsource_state) { repl_log(gtmsource_log_fp, TRUE, TRUE, "Starting afresh due to ONLINE ROLLBACK\n"); repl_log(gtmsource_log_fp, TRUE, TRUE, "REPL INFO - Current Jnlpool Seqno : %llu\n", jnlpool.jnlpool_ctl->jnl_seqno); continue; } QWASSIGN(gtmsource_local->read_addr, jnlpool.jnlpool_ctl->write_addr); gtmsource_local->read = jnlpool.jnlpool_ctl->write; gtmsource_local->read_state = gtmsource_local->jnlfileonly ? READ_FILE : READ_POOL; read_jnl_seqno = gtmsource_local->read_jnl_seqno; assert(read_jnl_seqno <= jnlpool.jnlpool_ctl->jnl_seqno); if (read_jnl_seqno < jnlpool.jnlpool_ctl->jnl_seqno) { gtmsource_local->read_state = READ_FILE; QWASSIGN(gtmsource_save_read_jnl_seqno, jnlpool.jnlpool_ctl->jnl_seqno); gtmsource_pool2file_transition = TRUE; /* so that we read the latest gener jnl files */ } rel_lock(jnlpool.jnlpool_dummy_reg); if (SS_NORMAL != (status = gtmsource_alloc_tcombuff())) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_REPLCOMM, 0, ERR_TEXT, 2, RTS_ERROR_LITERAL("Error allocating initial tcom buffer space. Malloc error"), status); gtmsource_filter = NO_FILTER; if ('\0' != gtmsource_local->filter_cmd[0]) { if (SS_NORMAL == (status = repl_filter_init(gtmsource_local->filter_cmd))) gtmsource_filter |= EXTERNAL_FILTER; else gtmsource_exit(ABNORMAL_SHUTDOWN); } gtmsource_process(); /* gtmsource_process returns only when mode needs to be changed to PASSIVE */ assert(gtmsource_state == GTMSOURCE_CHANGING_MODE); gtmsource_ctl_close(); gtmsource_free_msgbuff(); gtmsource_free_tcombuff(); gtmsource_free_filter_buff(); gtmsource_stop_heartbeat(); if (FD_INVALID != gtmsource_sock_fd) repl_close(>msource_sock_fd); if (gtmsource_filter & EXTERNAL_FILTER) repl_stop_filter(); } while (TRUE);
int gtmrecv_shutdown(boolean_t auto_shutdown, int exit_status) { uint4 savepid; boolean_t shut_upd_too = FALSE; int status; unix_db_info *udi; error_def(ERR_RECVPOOLSETUP); error_def(ERR_TEXT); repl_log(stdout, TRUE, TRUE, "Initiating shut down\n"); call_on_signal = NULL; /* So we don't reenter on error */ /* assert that auto shutdown should be invoked only if the current process is a receiver server */ assert(!auto_shutdown || gtmrecv_srv_count); if (auto_shutdown) { /* grab the ftok semaphore and recvpool access control lock IN THAT ORDER (to avoid deadlocks) */ repl_inst_ftok_sem_lock(); status = grab_sem(RECV, RECV_POOL_ACCESS_SEM); if (0 > status) { repl_log(stderr, TRUE, TRUE, "Error grabbing receive pool control semaphore : %s. Shutdown not complete\n", REPL_SEM_ERROR); return (ABNORMAL_SHUTDOWN); } } else { /* ftok semaphore and recvpool access semaphore should already be held from the previous call to "recvpool_init" */ DEBUG_ONLY(udi = (unix_db_info *)FILE_INFO(recvpool.recvpool_dummy_reg);) assert(udi->grabbed_ftok_sem); assert(holds_sem[RECV][RECV_POOL_ACCESS_SEM]); /* We do not want to hold the options semaphore to avoid deadlocks with receiver server startup (C9F12-002766) */ assert(!holds_sem[RECV][RECV_SERV_OPTIONS_SEM]); recvpool.gtmrecv_local->shutdown = SHUTDOWN; /* Wait for receiver server to die. But release ftok semaphore and recvpool access control semaphore before * waiting as the concurrently running receiver server might need these (e.g. if it is about to call the * function "repl_inst_was_rootprimary"). */ if (0 != rel_sem(RECV, RECV_POOL_ACCESS_SEM)) gtm_putmsg(VARLSTCNT(7) ERR_TEXT, 2, RTS_ERROR_LITERAL("Error in receiver server shutdown rel_sem"), REPL_SEM_ERRNO); repl_inst_ftok_sem_release(); /* Wait for receiver server to shut down */ while((SHUTDOWN == recvpool.gtmrecv_local->shutdown) && (0 < (savepid = recvpool.gtmrecv_local->recv_serv_pid)) && is_proc_alive(savepid, 0)) SHORT_SLEEP(GTMRECV_WAIT_FOR_SHUTDOWN); /* (Re)Grab the ftok semaphore and recvpool access control semaphore IN THAT ORDER (to avoid deadlocks) */ repl_inst_ftok_sem_lock(); status = grab_sem(RECV, RECV_POOL_ACCESS_SEM); if (0 > status) { repl_log(stderr, TRUE, TRUE, "Error regrabbing receive pool control semaphore : %s. Shutdown not complete\n", REPL_SEM_ERROR); return (ABNORMAL_SHUTDOWN); } exit_status = recvpool.gtmrecv_local->shutdown; if (SHUTDOWN == exit_status) { if (0 == savepid) /* No Receiver Process */ exit_status = NORMAL_SHUTDOWN; else /* Receiver Server Crashed */ { repl_log(stderr, FALSE, TRUE, "Receiver Server exited abnormally\n"); exit_status = ABNORMAL_SHUTDOWN; shut_upd_too = TRUE; } } }
void erts_whereis_name(Process *c_p, ErtsProcLocks c_p_locks, Eterm name, Process** proc, ErtsProcLocks need_locks, int flags, Port** port, int lock_port) { RegProc* rp = NULL; HashValue hval; int ix; HashBucket* b; #ifdef ERTS_SMP ErtsProcLocks current_c_p_locks; Port *pending_port = NULL; if (!c_p) c_p_locks = 0; current_c_p_locks = c_p_locks; restart: reg_safe_read_lock(c_p, ¤t_c_p_locks); /* Locked locks: * - port lock on pending_port if pending_port != NULL * - read reg lock * - current_c_p_locks (either c_p_locks or 0) on c_p */ #endif hval = REG_HASH(name); ix = hval % process_reg.size; b = process_reg.bucket[ix]; /* * Note: We have inlined the code from hash.c for speed. */ while (b) { if (((RegProc *) b)->name == name) { rp = (RegProc *) b; break; } b = b->next; } if (proc) { if (!rp) *proc = NULL; else { #ifdef ERTS_SMP if (!rp->p) *proc = NULL; else { if (need_locks) { erts_proc_safelock(c_p, current_c_p_locks, c_p_locks, rp->p, 0, need_locks); current_c_p_locks = c_p_locks; } if ((flags & ERTS_P2P_FLG_ALLOW_OTHER_X) || is_proc_alive(rp->p)) *proc = rp->p; else { if (need_locks) erts_smp_proc_unlock(rp->p, need_locks); *proc = NULL; } } #else if (rp->p && ((flags & ERTS_P2P_FLG_ALLOW_OTHER_X) || is_proc_alive(rp->p))) *proc = rp->p; else *proc = NULL; #endif if (*proc && (flags & ERTS_P2P_FLG_INC_REFC)) erts_proc_inc_refc(*proc); } } if (port) { if (!rp || !rp->pt) *port = NULL; else { #ifdef ERTS_SMP if (lock_port) { if (pending_port == rp->pt) pending_port = NULL; else { if (pending_port) { /* Ahh! Registered port changed while reg lock was unlocked... */ erts_port_release(pending_port); pending_port = NULL; } if (erts_smp_port_trylock(rp->pt) == EBUSY) { Eterm id = rp->pt->common.id; /* id read only... */ /* Unlock all locks, acquire port lock, and restart... */ if (current_c_p_locks) { erts_smp_proc_unlock(c_p, current_c_p_locks); current_c_p_locks = 0; } reg_read_unlock(); pending_port = erts_id2port(id); goto restart; } } ERTS_SMP_LC_ASSERT(erts_lc_is_port_locked(rp->pt)); } #endif *port = rp->pt; } } #ifdef ERTS_SMP if (c_p && !current_c_p_locks) erts_smp_proc_lock(c_p, c_p_locks); if (pending_port) erts_port_release(pending_port); #endif reg_read_unlock(); }
int4 gds_rundown(void) { boolean_t canceled_dbsync_timer, canceled_flush_timer, ok_to_write_pfin; boolean_t have_standalone_access, ipc_deleted, err_caught; boolean_t is_cur_process_ss_initiator, remove_shm, vermismatch, we_are_last_user, we_are_last_writer, is_mm; boolean_t unsafe_last_writer; char time_str[CTIME_BEFORE_NL + 2]; /* for GET_CUR_TIME macro */ gd_region *reg; int save_errno, status, rc; int4 semval, ftok_semval, sopcnt, ftok_sopcnt; short crash_count; sm_long_t munmap_len; sgmnt_addrs *csa; sgmnt_data_ptr_t csd; node_local_ptr_t cnl; struct shmid_ds shm_buf; struct sembuf sop[2], ftok_sop[2]; uint4 jnl_status; unix_db_info *udi; jnl_private_control *jpc; jnl_buffer_ptr_t jbp; shm_snapshot_t *ss_shm_ptr; uint4 ss_pid, onln_rlbk_pid, holder_pid; boolean_t was_crit; boolean_t safe_mode; /* Do not flush or take down shared memory. */ boolean_t bypassed_ftok = FALSE, bypassed_access = FALSE, may_bypass_ftok, inst_is_frozen, ftok_counter_halted, access_counter_halted; int secshrstat; intrpt_state_t prev_intrpt_state; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; jnl_status = 0; reg = gv_cur_region; /* Local copy */ /* early out for cluster regions * to avoid tripping the assert below. * Note: * This early out is consistent with VMS. It has been * noted that all of the gtcm assignments * to gv_cur_region should use the TP_CHANGE_REG * macro. This would also avoid the assert problem * and should be done eventually. */ if (dba_cm == reg->dyn.addr->acc_meth) return EXIT_NRM; udi = FILE_INFO(reg); csa = &udi->s_addrs; csd = csa->hdr; assert(csa == cs_addrs && csd == cs_data); if ((reg->open) && (dba_usr == csd->acc_meth)) { change_reg(); gvusr_rundown(); return EXIT_NRM; } /* If the process has standalone access, it has udi->grabbed_access_sem set to TRUE at this point. Note that down in a local * variable as the udi->grabbed_access_sem is set to TRUE even for non-standalone access below and hence we can't rely on * that later to determine if the process had standalone access or not when it entered this function. We need to guarantee * that none else access database file header when semid/shmid fields are reset. We already have created ftok semaphore in * db_init or, mu_rndwn_file and did not remove it. So just lock it. We do it in blocking mode. */ have_standalone_access = udi->grabbed_access_sem; /* process holds standalone access */ DEFER_INTERRUPTS(INTRPT_IN_GDS_RUNDOWN, prev_intrpt_state); ESTABLISH_NORET(gds_rundown_ch, err_caught); if (err_caught) { REVERT; WITH_CH(gds_rundown_ch, gds_rundown_err_cleanup(have_standalone_access), 0); ENABLE_INTERRUPTS(INTRPT_IN_GDS_RUNDOWN, prev_intrpt_state); DEBUG_ONLY(ok_to_UNWIND_in_exit_handling = FALSE); return EXIT_ERR; } assert(reg->open); /* if we failed to open, dbinit_ch should have taken care of proper clean up */ assert(!reg->opening); /* see comment above */ assert((dba_bg == csd->acc_meth) || (dba_mm == csd->acc_meth)); is_mm = (dba_bg != csd->acc_meth); assert(!csa->hold_onto_crit || (csa->now_crit && jgbl.onlnrlbk)); /* If we are online rollback, we should already be holding crit and should release it only at the end of this module. This * is usually done by noting down csa->now_crit in a local variable (was_crit) and using it whenever we are about to * grab_crit. But, there are instances (like mupip_set_journal.c) where we grab_crit but invoke gds_rundown without any * preceeding rel_crit. Such code relies on the fact that gds_rundown does rel_crit unconditionally (to get locks to a known * state). So, augment csa->now_crit with jgbl.onlnrlbk to track if we can rel_crit unconditionally or not in gds_rundown. */ was_crit = (csa->now_crit && jgbl.onlnrlbk); /* Cancel any pending flush timer for this region by this task */ canceled_flush_timer = FALSE; canceled_dbsync_timer = FALSE; CANCEL_DB_TIMERS(reg, csa, canceled_flush_timer, canceled_dbsync_timer); we_are_last_user = FALSE; inst_is_frozen = IS_REPL_INST_FROZEN && REPL_ALLOWED(csa->hdr); if (!csa->persistent_freeze) region_freeze(reg, FALSE, FALSE, FALSE); if (!was_crit) { rel_crit(reg); /* get locks to known state */ mutex_cleanup(reg); } /* The only process that can invoke gds_rundown while holding access control semaphore is RECOVER/ROLLBACK. All the others * (like MUPIP SET -FILE/MUPIP EXTEND would have invoked db_ipcs_reset() before invoking gds_rundown (from * mupip_exit_handler). The only exception is when these processes encounter a terminate signal and they reach * mupip_exit_handler while holding access control semaphore. Assert accordingly. */ assert(!have_standalone_access || mupip_jnl_recover || process_exiting); /* If we have standalone access, then ensure that a concurrent online rollback cannot be running at the same time as it * needs the access control lock as well. The only expection is we are online rollback and currently running down. */ cnl = csa->nl; onln_rlbk_pid = cnl->onln_rlbk_pid; assert(!have_standalone_access || mupip_jnl_recover || !onln_rlbk_pid || !is_proc_alive(onln_rlbk_pid, 0)); if (!have_standalone_access) { if (-1 == (ftok_semval = semctl(udi->ftok_semid, DB_COUNTER_SEM, GETVAL))) /* Check # of procs counted on FTOK */ { save_errno = errno; assert(FALSE); rts_error_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_SYSCALL, 5, RTS_ERROR_TEXT("gds_rundown SEMCTL failed to get ftok_semval"), CALLFROM, errno); } may_bypass_ftok = CAN_BYPASS(ftok_semval, csd, inst_is_frozen); /* Do we need a blocking wait? */ /* We need to guarantee that no one else access database file header when semid/shmid fields are reset. * We already have created ftok semaphore in db_init or mu_rndwn_file and did not remove it. So just lock it. */ if (!ftok_sem_lock(reg, may_bypass_ftok)) { if (may_bypass_ftok) { /* We did a non-blocking wait. It's ok to proceed without locking */ bypassed_ftok = TRUE; holder_pid = semctl(udi->ftok_semid, DB_CONTROL_SEM, GETPID); if ((uint4)-1 == holder_pid) rts_error_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_SYSCALL, 5, RTS_ERROR_TEXT("gds_rundown SEMCTL failed to get holder_pid"), CALLFROM, errno); if (!IS_GTM_IMAGE) /* MUMPS processes should not flood syslog with bypass messages. */ { send_msg_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_RESRCINTRLCKBYPAS, 10, LEN_AND_STR(gtmImageNames[image_type].imageName), process_id, LEN_AND_LIT("FTOK"), REG_LEN_STR(reg), DB_LEN_STR(reg), holder_pid); send_msg_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_TEXT, 2, LEN_AND_LIT("FTOK bypassed at rundown")); } } else { /* We did a blocking wait but something bad happened. */ FTOK_TRACE(csa, csa->ti->curr_tn, ftok_ops_lock, process_id); rts_error_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_DBFILERR, 2, DB_LEN_STR(reg)); } } sop[0].sem_num = DB_CONTROL_SEM; sop[0].sem_op = 0; /* Wait for 0 */ sop[1].sem_num = DB_CONTROL_SEM; sop[1].sem_op = 1; /* Lock */ sopcnt = 2; sop[0].sem_flg = sop[1].sem_flg = SEM_UNDO | IPC_NOWAIT; /* Don't wait the first time thru */ SEMOP(udi->semid, sop, sopcnt, status, NO_WAIT); if (0 != status) { save_errno = errno; /* Check # of processes counted on access sem. */ if (-1 == (semval = semctl(udi->semid, DB_COUNTER_SEM, GETVAL))) { assert(FALSE); rts_error_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_SYSCALL, 5, RTS_ERROR_TEXT("gds_rundown SEMCTL failed to get semval"), CALLFROM, errno); } bypassed_access = CAN_BYPASS(semval, csd, inst_is_frozen) || onln_rlbk_pid || csd->file_corrupt; /* Before attempting again in the blocking mode, see if the holding process is an online rollback. * If so, it is likely we won't get the access control semaphore anytime soon. In that case, we * are better off skipping rundown and continuing with sanity cleanup and exit. */ holder_pid = semctl(udi->semid, DB_CONTROL_SEM, GETPID); if ((uint4)-1 == holder_pid) rts_error_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_SYSCALL, 5, RTS_ERROR_TEXT("gds_rundown SEMCTL failed to get holder_pid"), CALLFROM, errno); if (!bypassed_access) { /* We couldn't get it in one shot-- see if we already have it */ if (holder_pid == process_id) { send_msg_csa(CSA_ARG(csa) VARLSTCNT(5) MAKE_MSG_INFO(ERR_CRITSEMFAIL), 2, DB_LEN_STR(reg), ERR_RNDWNSEMFAIL); REVERT; ENABLE_INTERRUPTS(INTRPT_IN_GDS_RUNDOWN, prev_intrpt_state); assert(FALSE); return EXIT_ERR; } if (EAGAIN != save_errno) { assert(FALSE); rts_error_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_SYSCALL, 5, RTS_ERROR_TEXT("gds_rundown SEMOP on access control semaphore"), CALLFROM, save_errno); } sop[0].sem_flg = sop[1].sem_flg = SEM_UNDO; /* Try again - blocking this time */ SEMOP(udi->semid, sop, 2, status, FORCED_WAIT); if (-1 == status) /* We couldn't get it at all.. */ rts_error_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_SYSCALL, 5, RTS_ERROR_TEXT("gds_rundown SEMOP on access control semaphore"), CALLFROM, errno); } else if (!IS_GTM_IMAGE) { send_msg_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_RESRCINTRLCKBYPAS, 10, LEN_AND_STR(gtmImageNames[image_type].imageName), process_id, LEN_AND_LIT("access control"), REG_LEN_STR(reg), DB_LEN_STR(reg), holder_pid); send_msg_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_TEXT, 2, LEN_AND_LIT("Access control bypassed at rundown")); } udi->grabbed_access_sem = !bypassed_access; } } /* else we we hold the access control semaphore and therefore have standalone access. We do not release it now - we * release it later in mupip_exit_handler.c. Since we already hold the access control semaphore, we don't need the * ftok semaphore and trying it could cause deadlock */ /* Note that in the case of online rollback, "udi->grabbed_access_sem" (and in turn "have_standalone_access") is TRUE. * But there could be other processes still having the database open so we cannot safely reset the halted fields. */ if (have_standalone_access && !jgbl.onlnrlbk) csd->ftok_counter_halted = csd->access_counter_halted = FALSE; ftok_counter_halted = csd->ftok_counter_halted; access_counter_halted = csd->access_counter_halted; /* If we bypassed any of the semaphores, activate safe mode. * Also, if the replication instance is frozen and this db has replication turned on (which means * no flushes of dirty buffers to this db can happen while the instance is frozen) activate safe mode. */ ok_to_write_pfin = !(bypassed_access || bypassed_ftok || inst_is_frozen); safe_mode = !ok_to_write_pfin || ftok_counter_halted || access_counter_halted; /* At this point we are guaranteed no one else is doing a db_init/rundown as we hold the access control semaphore */ assert(csa->ref_cnt); /* decrement private ref_cnt before shared ref_cnt decrement. */ csa->ref_cnt--; /* Currently journaling logic in gds_rundown() in VMS relies on this order to detect last writer */ assert(!csa->ref_cnt); --cnl->ref_cnt; if (memcmp(cnl->now_running, gtm_release_name, gtm_release_name_len + 1)) { /* VERMISMATCH condition. Possible only if DSE */ assert(dse_running); vermismatch = TRUE; } else vermismatch = FALSE; if (-1 == shmctl(udi->shmid, IPC_STAT, &shm_buf)) { save_errno = errno; rts_error_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_SYSCALL, 5, RTS_ERROR_TEXT("gds_rundown shmctl"), CALLFROM, save_errno); } else we_are_last_user = (1 == shm_buf.shm_nattch) && !vermismatch && !safe_mode; /* recover => one user except ONLINE ROLLBACK, or standalone with frozen instance */ assert(!have_standalone_access || we_are_last_user || jgbl.onlnrlbk || inst_is_frozen); if (-1 == (semval = semctl(udi->semid, DB_COUNTER_SEM, GETVAL))) rts_error_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_SYSCALL, 5, RTS_ERROR_TEXT("gds_rundown SEMCTL failed to get semval"), CALLFROM, errno); /* There's one writer left and I am it */ assert(reg->read_only || semval >= 0); unsafe_last_writer = (DB_COUNTER_SEM_INCR == semval) && (FALSE == reg->read_only) && !vermismatch; we_are_last_writer = unsafe_last_writer && !safe_mode; assert(!we_are_last_writer || !safe_mode); assert(!we_are_last_user || !safe_mode); /* recover + R/W region => one writer except ONLINE ROLLBACK, or standalone with frozen instance, leading to safe_mode */ assert(!(have_standalone_access && !reg->read_only) || we_are_last_writer || jgbl.onlnrlbk || inst_is_frozen); GTM_WHITE_BOX_TEST(WBTEST_ANTIFREEZE_JNLCLOSE, we_are_last_writer, 1); /* Assume we are the last writer to invoke wcs_flu */ if (!have_standalone_access && (-1 == (ftok_semval = semctl(udi->ftok_semid, DB_COUNTER_SEM, GETVAL)))) rts_error_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_SYSCALL, 5, RTS_ERROR_TEXT("gds_rundown SEMCTL failed to get ftok_semval"), CALLFROM, errno); if (NULL != csa->ss_ctx) ss_destroy_context(csa->ss_ctx); /* SS_MULTI: If multiple snapshots are supported, then we have to run through each of the snapshots */ assert(1 == MAX_SNAPSHOTS); ss_shm_ptr = (shm_snapshot_ptr_t)SS_GETSTARTPTR(csa); ss_pid = ss_shm_ptr->ss_info.ss_pid; is_cur_process_ss_initiator = (process_id == ss_pid); if (ss_pid && (is_cur_process_ss_initiator || we_are_last_user)) { /* Try getting snapshot crit latch. If we don't get latch, we won't hang for eternity and will skip * doing the orphaned snapshot cleanup. It will be cleaned up eventually either by subsequent MUPIP * INTEG or by a MUPIP RUNDOWN. */ if (ss_get_lock_nowait(reg) && (ss_pid == ss_shm_ptr->ss_info.ss_pid) && (is_cur_process_ss_initiator || !is_proc_alive(ss_pid, 0))) { ss_release(NULL); ss_release_lock(reg); } } /* If cnl->donotflush_dbjnl is set, it means mupip recover/rollback was interrupted and therefore we need not flush * shared memory contents to disk as they might be in an inconsistent state. Moreover, any more flushing will only cause * future rollback to undo more journal records (PBLKs). In this case, we will go ahead and remove shared memory (without * flushing the contents) in this routine. A reissue of the recover/rollback command will restore the database to a * consistent state. */ if (!cnl->donotflush_dbjnl && !reg->read_only && !vermismatch) { /* If we had an orphaned block and were interrupted, set wc_blocked so we can invoke wcs_recover. Do it ONLY * if there is NO concurrent online rollback running (as we need crit to set wc_blocked) */ if (csa->wbuf_dqd && !is_mm) { /* If we had an orphaned block and were interrupted, mupip_exit_handler will invoke secshr_db_clnup which * will clear this field and so we should never come to gds_rundown with a non-zero wbuf_dqd. The only * exception is if we are recover/rollback in which case gds_rundown (from mur_close_files) is invoked * BEFORE secshr_db_clnup in mur_close_files. * Note: It is NOT possible for online rollback to reach here with wbuf_dqd being non-zero. This is because * the moment we apply the first PBLK, we stop all interrupts and hence can never be interrupted in * wcs_wtstart or wcs_get_space. Assert accordingly. */ assert(mupip_jnl_recover && !jgbl.onlnrlbk && !safe_mode); if (!was_crit) grab_crit(reg); SET_TRACEABLE_VAR(cnl->wc_blocked, TRUE); BG_TRACE_PRO_ANY(csa, wcb_gds_rundown); send_msg_csa(CSA_ARG(csa) VARLSTCNT(8) ERR_WCBLOCKED, 6, LEN_AND_LIT("wcb_gds_rundown"), process_id, &csa->ti->curr_tn, DB_LEN_STR(reg)); csa->wbuf_dqd = 0; wcs_recover(reg); BG_TRACE_PRO_ANY(csa, lost_block_recovery); if (!was_crit) rel_crit(reg); } if (JNL_ENABLED(csd) && IS_GTCM_GNP_SERVER_IMAGE) originator_prc_vec = NULL; /* If we are the last writing user, then everything must be flushed */ if (we_are_last_writer) { /* Time to flush out all of our buffers */ assert(!safe_mode); if (is_mm) { MM_DBFILEXT_REMAP_IF_NEEDED(csa, reg); cnl->remove_shm = TRUE; } if (cnl->wc_blocked && jgbl.onlnrlbk) { /* if the last update done by online rollback was not committed in the normal code-path but was * completed by secshr_db_clnup, wc_blocked will be set to TRUE. But, since online rollback never * invokes grab_crit (since csa->hold_onto_crit is set to TRUE), wcs_recover is never invoked. This * could result in the last update never getting flushed to the disk and if online rollback happened * to be the last writer then the shared memory will be flushed and removed and the last update will * be lost. So, force wcs_recover if we find ourselves in such a situation. But, wc_blocked is * possible only if phase1 or phase2 errors are induced using white box test cases */ assert(WB_COMMIT_ERR_ENABLED); wcs_recover(reg); } /* Note WCSFLU_SYNC_EPOCH ensures the epoch is synced to the journal and indirectly * also ensures that the db is fsynced. We don't want to use it in the calls to * wcs_flu() from t_end() and tp_tend() since we can defer it to out-of-crit there. * In this case, since we are running down, we don't have any such option. */ cnl->remove_shm = wcs_flu(WCSFLU_FLUSH_HDR | WCSFLU_WRITE_EPOCH | WCSFLU_SYNC_EPOCH); /* Since we_are_last_writer, we should be guaranteed that wcs_flu() did not change csd, (in * case of MM for potential file extension), even if it did a grab_crit(). Therefore, make * sure that's true. */ assert(csd == csa->hdr); assert(0 == memcmp(csd->label, GDS_LABEL, GDS_LABEL_SZ - 1)); } else if (((canceled_flush_timer && (0 > cnl->wcs_timers)) || canceled_dbsync_timer) && !inst_is_frozen) { /* canceled pending db or jnl flush timers - flush database and journal buffers to disk */ if (!was_crit) grab_crit(reg); /* we need to sync the epoch as the fact that there is no active pending flush timer implies * there will be noone else who will flush the dirty buffers and EPOCH to disk in a timely fashion */ wcs_flu(WCSFLU_FLUSH_HDR | WCSFLU_WRITE_EPOCH | WCSFLU_SYNC_EPOCH); if (!was_crit) rel_crit(reg); assert((dba_mm == cs_data->acc_meth) || (csd == cs_data)); csd = cs_data; /* In case this is MM and wcs_flu() remapped an extended database, reset csd */ } /* Do rundown journal processing after buffer flushes since they require jnl to be open */ if (JNL_ENABLED(csd)) { /* the following tp_change_reg() is not needed due to the assert csa == cs_addrs at the beginning * of gds_rundown(), but just to be safe. To be removed by 2002!! --- nars -- 2001/04/25. */ tp_change_reg(); /* call this because jnl_ensure_open checks cs_addrs rather than gv_cur_region */ jpc = csa->jnl; jbp = jpc->jnl_buff; if (jbp->fsync_in_prog_latch.u.parts.latch_pid == process_id) { assert(FALSE); COMPSWAP_UNLOCK(&jbp->fsync_in_prog_latch, process_id, 0, LOCK_AVAILABLE, 0); } if (jbp->io_in_prog_latch.u.parts.latch_pid == process_id) { assert(FALSE); COMPSWAP_UNLOCK(&jbp->io_in_prog_latch, process_id, 0, LOCK_AVAILABLE, 0); } if ((((NOJNL != jpc->channel) && !JNL_FILE_SWITCHED(jpc)) || we_are_last_writer && (0 != cnl->jnl_file.u.inode)) && ok_to_write_pfin) { /* We need to close the journal file cleanly if we have the latest generation journal file open * or if we are the last writer and the journal file is open in shared memory (not necessarily * by ourselves e.g. the only process that opened the journal got shot abnormally) * Note: we should not infer anything from the shared memory value of cnl->jnl_file.u.inode * if we are not the last writer as it can be concurrently updated. */ if (!was_crit) grab_crit(reg); if (JNL_ENABLED(csd)) { SET_GBL_JREC_TIME; /* jnl_ensure_open/jnl_put_jrt_pini/pfin/jnl_file_close all need it */ /* Before writing to jnlfile, adjust jgbl.gbl_jrec_time if needed to maintain time order * of jnl records. This needs to be done BEFORE the jnl_ensure_open as that could write * journal records (if it decides to switch to a new journal file). */ ADJUST_GBL_JREC_TIME(jgbl, jbp); jnl_status = jnl_ensure_open(); if (0 == jnl_status) { /* If we_are_last_writer, we would have already done a wcs_flu() which would * have written an epoch record and we are guaranteed no further updates * since we are the last writer. So, just close the journal. * If the freeaddr == post_epoch_freeaddr, wcs_flu may have skipped writing * a pini, so allow for that. */ assert(!jbp->before_images || is_mm || !we_are_last_writer || (0 != jpc->pini_addr) || jgbl.mur_extract || (jpc->jnl_buff->freeaddr == jpc->jnl_buff->post_epoch_freeaddr)); /* If we haven't written a pini, let jnl_file_close write the pini/pfin. */ if (!jgbl.mur_extract && (0 != jpc->pini_addr)) jnl_put_jrt_pfin(csa); /* If not the last writer and no pending flush timer left, do jnl flush now */ if (!we_are_last_writer && (0 > cnl->wcs_timers)) { if (SS_NORMAL == (jnl_status = jnl_flush(reg))) { assert(jbp->freeaddr == jbp->dskaddr); jnl_fsync(reg, jbp->dskaddr); assert(jbp->fsync_dskaddr == jbp->dskaddr); } else { send_msg_csa(CSA_ARG(csa) VARLSTCNT(9) ERR_JNLFLUSH, 2, JNL_LEN_STR(csd), ERR_TEXT, 2, RTS_ERROR_TEXT("Error with journal flush in gds_rundown"), jnl_status); assert(NOJNL == jpc->channel);/* jnl file lost has been triggered */ /* In this routine, all code that follows from here on does not * assume anything about the journaling characteristics of this * database so it is safe to continue execution even though * journaling got closed in the middle. */ } } jnl_file_close(reg, we_are_last_writer, FALSE); } else send_msg_csa(CSA_ARG(csa) VARLSTCNT(6) jnl_status, 4, JNL_LEN_STR(csd), DB_LEN_STR(reg)); } if (!was_crit) rel_crit(reg); } } if (we_are_last_writer) /* Flush the fileheader last and harden the file to disk */ { if (!was_crit) grab_crit(reg); /* To satisfy crit requirement in fileheader_sync() */ memset(csd->machine_name, 0, MAX_MCNAMELEN); /* clear the machine_name field */ if (!have_standalone_access && we_are_last_user) { /* mupip_exit_handler will do this after mur_close_file */ csd->semid = INVALID_SEMID; csd->shmid = INVALID_SHMID; csd->gt_sem_ctime.ctime = 0; csd->gt_shm_ctime.ctime = 0; } fileheader_sync(reg); if (!was_crit) rel_crit(reg); if (!is_mm) { GTM_DB_FSYNC(csa, udi->fd, rc); /* Sync it all */ if (-1 == rc) { rts_error_csa(CSA_ARG(csa) VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, RTS_ERROR_TEXT("Error during file sync at close"), errno); } } else { /* Now do final MM file sync before exit */ assert(csa->ti->total_blks == csa->total_blks); #ifdef _AIX GTM_DB_FSYNC(csa, udi->fd, rc); if (-1 == rc) #else if (-1 == MSYNC((caddr_t)csa->db_addrs[0], (caddr_t)csa->db_addrs[1])) #endif { rts_error_csa(CSA_ARG(csa) VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, RTS_ERROR_TEXT("Error during file sync at close"), errno); } } } else if (unsafe_last_writer && !cnl->lastwriterbypas_msg_issued) { send_msg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_LASTWRITERBYPAS, 2, DB_LEN_STR(reg)); cnl->lastwriterbypas_msg_issued = TRUE; } } /* end if (!reg->read_only && !cnl->donotflush_dbjnl) */ /* We had canceled all db timers at start of rundown. In case as part of rundown (wcs_flu above), we had started * any timers, cancel them BEFORE setting reg->open to FALSE (assert in wcs_clean_dbsync relies on this). */ CANCEL_DB_TIMERS(reg, csa, canceled_flush_timer, canceled_dbsync_timer); if (reg->read_only && we_are_last_user && !have_standalone_access && cnl->remove_shm) { /* mupip_exit_handler will do this after mur_close_file */ db_ipcs.semid = INVALID_SEMID; db_ipcs.shmid = INVALID_SHMID; db_ipcs.gt_sem_ctime = 0; db_ipcs.gt_shm_ctime = 0; db_ipcs.fn_len = reg->dyn.addr->fname_len; memcpy(db_ipcs.fn, reg->dyn.addr->fname, reg->dyn.addr->fname_len); db_ipcs.fn[reg->dyn.addr->fname_len] = 0; /* request gtmsecshr to flush. read_only cannot flush itself */ WAIT_FOR_REPL_INST_UNFREEZE_SAFE(csa); if (!csa->read_only_fs) { secshrstat = send_mesg2gtmsecshr(FLUSH_DB_IPCS_INFO, 0, (char *)NULL, 0); if (0 != secshrstat) rts_error_csa(CSA_ARG(csa) VARLSTCNT(8) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, RTS_ERROR_TEXT("gtmsecshr failed to update database file header")); } } /* Done with file now, close it */ CLOSEFILE_RESET(udi->fd, rc); /* resets "udi->fd" to FD_INVALID */ if (-1 == rc) { rts_error_csa(CSA_ARG(csa) VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, LEN_AND_LIT("Error during file close"), errno); } /* Unmap storage if mm mode but only the part that is not the fileheader (so shows up in dumps) */ # if !defined(_AIX) if (is_mm && (NULL != csa->db_addrs[0])) { assert(csa->db_addrs[1] > csa->db_addrs[0]); munmap_len = (sm_long_t)(csa->db_addrs[1] - csa->db_addrs[0]); if (0 < munmap_len) munmap((caddr_t)(csa->db_addrs[0]), (size_t)(munmap_len)); } # endif /* Detach our shared memory while still under lock so reference counts will be correct for the next process to run down * this region. In the process also get the remove_shm status from node_local before detaching. * If cnl->donotflush_dbjnl is TRUE, it means we can safely remove shared memory without compromising data * integrity as a reissue of recover will restore the database to a consistent state. */ remove_shm = !vermismatch && (cnl->remove_shm || cnl->donotflush_dbjnl); /* We are done with online rollback on this region. Indicate to other processes by setting the onln_rlbk_pid to 0. * Do it before releasing crit (t_end relies on this ordering when accessing cnl->onln_rlbk_pid). */ if (jgbl.onlnrlbk) cnl->onln_rlbk_pid = 0; rel_crit(reg); /* Since we are about to detach from the shared memory, release crit and reset onln_rlbk_pid */ /* If we had skipped flushing journal and database buffers due to a concurrent online rollback, increment the counter * indicating that in the shared memory so that online rollback can report the # of such processes when it shuts down. * The same thing is done for both FTOK and access control semaphores when there are too many MUMPS processes. */ if (safe_mode) /* indicates flushing was skipped */ { if (bypassed_access) cnl->dbrndwn_access_skip++; /* Access semaphore can be bypassed during online rollback */ if (bypassed_ftok) cnl->dbrndwn_ftok_skip++; } if (jgbl.onlnrlbk) csa->hold_onto_crit = FALSE; GTM_WHITE_BOX_TEST(WBTEST_HOLD_SEM_BYPASS, cnl->wbox_test_seq_num, 0); status = shmdt((caddr_t)cnl); csa->nl = NULL; /* dereferencing nl after detach is not right, so we set it to NULL so that we can test before dereference*/ /* Note that although csa->nl is NULL, we use CSA_ARG(csa) below (not CSA_ARG(NULL)) to be consistent with similar * usages before csa->nl became NULL. The "is_anticipatory_freeze_needed" function (which is in turn called by the * CHECK_IF_FREEZE_ON_ERROR_NEEDED macro) does a check of csa->nl before dereferencing shared memory contents so * we are safe passing "csa". */ if (-1 == status) send_msg_csa(CSA_ARG(csa) VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, LEN_AND_LIT("Error during shmdt"), errno); REMOVE_CSA_FROM_CSADDRSLIST(csa); /* remove "csa" from list of open regions (cs_addrs_list) */ reg->open = FALSE; /* If file is still not in good shape, die here and now before we get rid of our storage */ assertpro(0 == csa->wbuf_dqd); ipc_deleted = FALSE; /* If we are the very last user, remove shared storage id and the semaphores */ if (we_are_last_user) { /* remove shared storage, only if last writer to rundown did a successful wcs_flu() */ assert(!vermismatch); if (remove_shm) { ipc_deleted = TRUE; if (0 != shm_rmid(udi->shmid)) rts_error_csa(CSA_ARG(csa) VARLSTCNT(8) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, RTS_ERROR_TEXT("Unable to remove shared memory")); /* Note that we no longer have a new shared memory. Currently only used/usable for standalone rollback. */ udi->new_shm = FALSE; /* mupip recover/rollback don't release the semaphore here, but do it later in db_ipcs_reset (invoked from * mur_close_files()) */ if (!have_standalone_access) { if (0 != sem_rmid(udi->semid)) rts_error_csa(CSA_ARG(csa) VARLSTCNT(8) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, RTS_ERROR_TEXT("Unable to remove semaphore")); udi->new_sem = FALSE; /* Note that we no longer have a new semaphore */ udi->grabbed_access_sem = FALSE; udi->counter_acc_incremented = FALSE; } } else if (is_src_server || is_updproc) { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(6) ERR_DBRNDWNWRN, 4, DB_LEN_STR(reg), process_id, process_id); send_msg_csa(CSA_ARG(csa) VARLSTCNT(6) ERR_DBRNDWNWRN, 4, DB_LEN_STR(reg), process_id, process_id); } else send_msg_csa(CSA_ARG(csa) VARLSTCNT(6) ERR_DBRNDWNWRN, 4, DB_LEN_STR(reg), process_id, process_id); } else { assert(!have_standalone_access || jgbl.onlnrlbk || safe_mode); if (!jgbl.onlnrlbk && !have_standalone_access) { /* If we were writing, get rid of our writer access count semaphore */ if (!reg->read_only) { if (!access_counter_halted) { save_errno = do_semop(udi->semid, DB_COUNTER_SEM, -DB_COUNTER_SEM_INCR, SEM_UNDO); if (0 != save_errno) rts_error_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_SYSCALL, 5, RTS_ERROR_TEXT("gds_rundown access control semaphore decrement"), CALLFROM, save_errno); } udi->counter_acc_incremented = FALSE; } assert(safe_mode || !bypassed_access); /* Now remove the rundown lock */ if (!bypassed_access) { if (0 != (save_errno = do_semop(udi->semid, DB_CONTROL_SEM, -1, SEM_UNDO))) rts_error_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_SYSCALL, 5, RTS_ERROR_TEXT("gds_rundown access control semaphore release"), CALLFROM, save_errno); udi->grabbed_access_sem = FALSE; } } /* else access control semaphore will be released in db_ipcs_reset */ } if (!have_standalone_access) { if (bypassed_ftok) { if (!ftok_counter_halted) if (0 != (save_errno = do_semop(udi->ftok_semid, DB_COUNTER_SEM, -DB_COUNTER_SEM_INCR, SEM_UNDO))) rts_error_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_DBFILERR, 2, DB_LEN_STR(reg)); } else if (!ftok_sem_release(reg, !ftok_counter_halted, FALSE)) { FTOK_TRACE(csa, csa->ti->curr_tn, ftok_ops_release, process_id); rts_error_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_DBFILERR, 2, DB_LEN_STR(reg)); } udi->grabbed_ftok_sem = FALSE; udi->counter_ftok_incremented = FALSE; } ENABLE_INTERRUPTS(INTRPT_IN_GDS_RUNDOWN, prev_intrpt_state); if (!ipc_deleted) { GET_CUR_TIME(time_str); if (is_src_server) gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(8) ERR_IPCNOTDEL, 6, CTIME_BEFORE_NL, time_str, LEN_AND_LIT("Source server"), REG_LEN_STR(reg)); if (is_updproc) gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(8) ERR_IPCNOTDEL, 6, CTIME_BEFORE_NL, time_str, LEN_AND_LIT("Update process"), REG_LEN_STR(reg)); if (mupip_jnl_recover && (!jgbl.onlnrlbk || !we_are_last_user)) { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(8) ERR_IPCNOTDEL, 6, CTIME_BEFORE_NL, time_str, LEN_AND_LIT("Mupip journal process"), REG_LEN_STR(reg)); send_msg_csa(CSA_ARG(csa) VARLSTCNT(8) ERR_IPCNOTDEL, 6, CTIME_BEFORE_NL, time_str, LEN_AND_LIT("Mupip journal process"), REG_LEN_STR(reg)); } } REVERT; return EXIT_NRM; }
uint4 lcnt, saved_dsk_addr, saved_status; sgmnt_addrs *csa; sgmnt_data_ptr_t csd; int4 lck_state; int fsync_ret, save_errno; DEBUG_ONLY(uint4 onln_rlbk_pid;) csa = &FILE_INFO(reg)->s_addrs; jpc = csa->jnl; jb = jpc->jnl_buff; /* If a concurrent online rollback is running, we should never be here since online rollback at the start flushes all the * dirty buffers and ensures that the journal buffers are all synced to disk. So, there is no need for GT.M processes to * reach here with a concurrent online rollback. Assert to that effect. */ DEBUG_ONLY(onln_rlbk_pid = csa->nl->onln_rlbk_pid); assert(jgbl.onlnrlbk || !onln_rlbk_pid || !is_proc_alive(onln_rlbk_pid, 0)); if ((NOJNL != jpc->channel) && !JNL_FILE_SWITCHED(jpc)) { csd = csa->hdr; for (lcnt = 1; fsync_addr > jb->fsync_dskaddr && !JNL_FILE_SWITCHED(jpc); lcnt++) { if (MAX_FSYNC_WAIT_CNT / 2 == lcnt) /* half way into the wait timeout */ { saved_status = jpc->status; jpc->status = SS_NORMAL; DEBUG_ONLY( if (CURRENT_WRITER) GET_C_STACK_FROM_SCRIPT("JNLFSYNCSTUCK_HALF_TIME", process_id, CURRENT_WRITER, ONCE); ) jnl_send_oper(jpc, ERR_JNLFSYNCLSTCK);
/* Note about usage of this function : Create dummy gd_region, gd_segment, file_control, * unix_db_info, sgmnt_addrs, and allocate mutex_struct (and NUM_CRIT_ENTRY * mutex_que_entry), * mutex_spin_parms_struct, and node_local in shared memory. Initialize the fields as in * jnlpool_init(). Pass the address of the dummy region as argument to this function. */ boolean_t grab_lock(gd_region *reg, boolean_t is_blocking_wait, uint4 onln_rlbk_action) { unix_db_info *udi; sgmnt_addrs *csa; enum cdb_sc status; mutex_spin_parms_ptr_t mutex_spin_parms; char scndry_msg[OUT_BUFF_SIZE]; # ifdef DEBUG DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; # endif udi = FILE_INFO(reg); csa = &udi->s_addrs; assert(!csa->hold_onto_crit); assert(!csa->now_crit); if (!csa->now_crit) { assert(0 == crit_count); crit_count++; /* prevent interrupts */ DEBUG_ONLY(locknl = csa->nl); /* for DEBUG_ONLY LOCK_HIST macro */ mutex_spin_parms = (mutex_spin_parms_ptr_t)((sm_uc_ptr_t)csa->critical + JNLPOOL_CRIT_SPACE); /* This assumes that mutex_spin_parms_t is located immediately after the crit structures */ /* As of 10/07/98, crashcnt field in mutex_struct is not changed by any function for the dummy region */ if (is_blocking_wait) status = mutex_lockw(reg, mutex_spin_parms, 0); else status = mutex_lockwim(reg, mutex_spin_parms, 0); DEBUG_ONLY(locknl = NULL); /* restore "locknl" to default value */ if (status != cdb_sc_normal) { crit_count = 0; switch(status) { case cdb_sc_critreset: /* As of 10/07/98, this return value is not possible */ rts_error_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_CRITRESET, 2, REG_LEN_STR(reg)); case cdb_sc_dbccerr: rts_error_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_DBCCERR, 2, REG_LEN_STR(reg)); case cdb_sc_nolock: return FALSE; default: assertpro(FALSE && status); } return FALSE; } /* There is only one case we know of when csa->nl->in_crit can be non-zero and that is when a process holding * crit gets kill -9ed and another process ends up invoking "secshr_db_clnup" which in turn clears the * crit semaphore (making it available for waiters) but does not also clear csa->nl->in_crit since it does not * hold crit at that point. But in that case, the pid reported in csa->nl->in_crit should be dead. Check that. */ assert((0 == csa->nl->in_crit) || (FALSE == is_proc_alive(csa->nl->in_crit, 0))); csa->nl->in_crit = process_id; CRIT_TRACE(crit_ops_gw); /* see gdsbt.h for comment on placement */ crit_count = 0; if (jnlpool.repl_inst_filehdr->file_corrupt && !jgbl.onlnrlbk) { /* Journal pool indicates an abnormally terminated online rollback. Cannot continue until the rollback * command is re-run to bring the journal pool/file and instance file to a consistent state. */ SNPRINTF(scndry_msg, OUT_BUFF_SIZE, "Instance file header has file_corrupt field set to TRUE"); /* No need to do rel_lock before rts_error (mupip_exit_handler will do it for us) - BYPASSOK rts_error */ rts_error_csa(CSA_ARG(NULL) VARLSTCNT(8) ERR_REPLREQROLLBACK, 2, LEN_AND_STR(udi->fn), ERR_TEXT, 2, LEN_AND_STR(scndry_msg)); } /* If ASSERT_NO_ONLINE_ROLLBACK, then no concurrent online rollbacks can happen at this point. So, the jnlpool * should be in in sync. There are two exceptions. If this is GT.CM GNP Server and the last client disconnected, the * server invokes gtcmd_rundown which in-turn invokes gds_rundown thereby running down all active databases at this * point but leaves the journal pool up and running. Now, if an online rollback is attempted, it increments the * onln_rlbk_cycle in the journal pool, but csa->onln_rlbk_cycle is not synced yet. So, the grab_crit done in t_end * will NOT detect a concurrent online rollback and it doesn't need to because the rollback happened AFTER the * rundown. Assert that this is the only case we know of for the cycles to be out-of-sync. In PRO * jnlpool_ctl->onln_rlbk_cycle is used only by the replication servers (which GT.CM is not) and so even if it * continues with an out-of-sync csa->onln_rlbk_cycle, t_end logic does the right thing. The other exception is if * GT.M initialized journal pool while opening database (belonging to a different instance) in gvcst_init (for * anticipatory freeze) followed by an online rollback which increments the jnlpool_ctl->onln_rlbk_cycle but leaves * the repl_csa->onln_rlbk_cycle out-of-sync. At this point, if a replicated database is open for the first time, * we'll reach t_end to commit the update but will end up failing the below assert due to the out-of-sync * onln_rlbk_cycle. So, assert accordingly. Note : even though the cycles are out-of-sync they are not an issue for * GT.M because it always relies on the onln_rlbk_cycle from csa->nl and not from repl_csa. But, we don't remove the * assert as it is valuable for replication servers (Source, Receiver and Update Process). */ assert((ASSERT_NO_ONLINE_ROLLBACK != onln_rlbk_action) || (csa->onln_rlbk_cycle == jnlpool.jnlpool_ctl->onln_rlbk_cycle) || IS_GTCM_GNP_SERVER_IMAGE || (jnlpool_init_needed && INST_FREEZE_ON_ERROR_POLICY)); if ((HANDLE_CONCUR_ONLINE_ROLLBACK == onln_rlbk_action) && (csa->onln_rlbk_cycle != jnlpool.jnlpool_ctl->onln_rlbk_cycle)) { assert(is_src_server); SYNC_ONLN_RLBK_CYCLES; gtmsource_onln_rlbk_clnup(); /* side-effect : sets gtmsource_state */ rel_lock(reg); /* caller knows to disconnect and re-establish the connection */ } } return TRUE; }