void process_reorg_encrypt_restart(void) { intrpt_state_t prev_intrpt_state; enc_info_t *encr_ptr; int gtmcrypt_errno; gd_segment *seg; sgmnt_addrs *csa; csa = reorg_encrypt_restart_csa; assert(NULL != csa); /* caller should have ensured this */ /* Opening handles for encryption is a heavyweight operation. Caller should have ensured we are not in crit for * any region when the new key handles are opened for any one region. Assert that. */ assert(0 == have_crit(CRIT_HAVE_ANY_REG)); DEFER_INTERRUPTS(INTRPT_IN_CRYPT_RECONFIG, prev_intrpt_state); encr_ptr = csa->encr_ptr; assert(NULL != encr_ptr); DBG_RECORD_CRYPT_RECEIVE(csa->hdr, csa, csa->nl, process_id, encr_ptr); seg = csa->region->dyn.addr; INIT_DB_OR_JNL_ENCRYPTION(csa, encr_ptr, seg->fname_len, seg->fname, gtmcrypt_errno); if (0 != gtmcrypt_errno) { ENABLE_INTERRUPTS(INTRPT_IN_CRYPT_RECONFIG, prev_intrpt_state); GTMCRYPT_REPORT_ERROR(gtmcrypt_errno, rts_error, seg->fname_len, seg->fname); } reorg_encrypt_restart_csa = NULL; ENABLE_INTERRUPTS(INTRPT_IN_CRYPT_RECONFIG, prev_intrpt_state); }
STATICFNDEF int gtm_trigger_invoke(void) { /* Invoke trigger M routine. Separate so error returns to gtm_trigger with proper retcode */ int rc; ESTABLISH_RET(gtm_trigger_ch, mumps_status); gtm_trigger_depth++; DBGTRIGR((stderr, "gtm_trigger: Dispatching trigger at depth %d\n", gtm_trigger_depth)); assert(0 < gtm_trigger_depth); assert(GTM_TRIGGER_DEPTH_MAX >= gtm_trigger_depth); /* Allow interrupts to occur while the trigger is running */ ENABLE_INTERRUPTS(INTRPT_IN_TRIGGER_NOMANS_LAND); rc = dm_start(); /* Now that we no longer have a trigger stack frame, we are back in trigger no-mans-land */ DEFER_INTERRUPTS(INTRPT_IN_TRIGGER_NOMANS_LAND); gtm_trigger_depth--; DBGTRIGR((stderr, "gtm_trigger: Trigger returns with rc %d\n", rc)); REVERT; assert(frame_pointer->type & SFT_TRIGR); assert(0 <= gtm_trigger_depth); return rc; }
void tp_unwind(uint4 newlevel, enum tp_unwind_invocation invocation_type, int *tprestart_rc) { mlk_pvtblk **prior, *mlkp; mlk_tp *oldlock, *nextlock; int tl; lv_val *save_lv, *curr_lv, *lv; tp_var *restore_ent; mv_stent *mvc; boolean_t restore_lv, rollback_locks; lvscan_blk *lvscan, *lvscan_next, first_lvscan; int elemindx, rc; lvTree *lvt_child; /* We are about to clean up structures. Defer MUPIP STOP/signal handling until function end. */ DEFER_INTERRUPTS(INTRPT_IN_TP_UNWIND); /* Unwind the requested TP levels */ # if defined(DEBUG_REFCNT) || defined(DEBUG_ERRHND) DBGFPF((stderr, "\ntp_unwind: Beginning TP unwind process\n")); # endif restore_lv = (RESTART_INVOCATION == invocation_type); lvscan = &first_lvscan; lvscan->next = NULL; lvscan->elemcnt = 0; assert((tp_sp <= tpstackbase) && (tp_sp > tpstacktop)); assert((tp_pointer <= (tp_frame *)tpstackbase) && (tp_pointer > (tp_frame *)tpstacktop)); for (tl = dollar_tlevel; tl > newlevel; --tl) { DBGRFCT((stderr, "\ntp_unwind: Unwinding level %d -- tp_pointer: 0x"lvaddr"\n", tl, tp_pointer)); assertpro(NULL != tp_pointer); for (restore_ent = tp_pointer->vars; NULL != restore_ent; restore_ent = tp_pointer->vars) { /*********************************************************************************/ /* TP_VAR_CLONE sets the var_cloned flag, showing that the tree has been cloned */ /* If var_cloned is not set, it shows that curr_lv and save_lv are still sharing */ /* the tree, so it should not be killed. */ /*********************************************************************************/ curr_lv = restore_ent->current_value; save_lv = restore_ent->save_value; assert(curr_lv); assert(save_lv); assert(LV_IS_BASE_VAR(curr_lv)); assert(LV_IS_BASE_VAR(save_lv)); assert(0 < curr_lv->stats.trefcnt); assert(curr_lv->tp_var); assert(curr_lv->tp_var == restore_ent); /* In order to restart sub-transactions, this would have to maintain * the chain that currently is not built by op_tstart() */ if (restore_lv) { rc = tp_unwind_restlv(curr_lv, save_lv, restore_ent, NULL, tprestart_rc); # ifdef GTM_TRIGGER if (0 != rc) { dollar_tlevel = tl; /* Record fact if we unwound some tp_frames */ ENABLE_INTERRUPTS(INTRPT_IN_TP_UNWIND); /* drive any MUPIP STOP/signals deferred * while in this function */ TPUNWND_WBOX_TEST; /* Debug-only wbox-test to simulate SIGTERM */ INVOKE_RESTART; } # endif } else if (restore_ent->var_cloned) { /* curr_lv has been cloned. * Note: LV_CHILD(save_lv) can be non-NULL only if restore_ent->var_cloned is TRUE */ DBGRFCT((stderr, "\ntp_unwind: Not restoring curr_lv and is cloned\n")); lvt_child = LV_GET_CHILD(save_lv); if (NULL != lvt_child) { /* If subtree exists, we have to blow away the cloned tree */ DBGRFCT((stderr, "\ntp_unwind: save_lv has children\n")); assert(save_lv->tp_var); DBGRFCT((stderr,"\ntp_unwind: For lv_val 0x"lvaddr": Deleting saved lv_val 0x"lvaddr"\n", curr_lv, save_lv)); assert(LVT_PARENT(lvt_child) == (lvTreeNode *)save_lv); lv_kill(save_lv, DOTPSAVE_FALSE, DO_SUBTREE_TRUE); } restore_ent->var_cloned = FALSE; } else { /* If not cloned, we still have to reduce the reference counts of any * container vars in the untouched tree that were added to keep anything * they referenced from disappearing. */ DBGRFCT((stderr, "\ntp_unwind: Not restoring curr_lv and is NOT cloned\n")); lvt_child = LV_GET_CHILD(curr_lv); if (NULL != lvt_child) { DBGRFCT((stderr, "\ntp_unwind: curr_lv has children and so reducing ref counts\n")); TPUNWND_CNTNRS_IN_TREE(curr_lv); } } LV_FREESLOT(save_lv); /* Not easy to predict what the trefcnt will be except that it should be greater than zero. In * most cases, it will have its own hash table ref plus the extras we added but it is also * possible that the entry has been kill *'d in which case the ONLY ref that will be left is * our own increment but there is no [quick] way to distinguish this case so we just * test for > 0. */ assert(0 < curr_lv->stats.trefcnt); assert(0 < curr_lv->stats.crefcnt); DECR_CREFCNT(curr_lv); /* Remove the copy refcnt we added in in op_tstart() or lv_newname() */ DECR_BASE_REF_NOSYM(curr_lv, FALSE); curr_lv->tp_var = NULL; tp_pointer->vars = restore_ent->next; free(restore_ent); } if ((tp_pointer->fp == frame_pointer) && (MVST_TPHOLD == mv_chain->mv_st_type) && (msp == (unsigned char *)mv_chain)) POP_MV_STENT(); if (NULL == tp_pointer->old_tp_frame) tp_sp = tpstackbase; else tp_sp = (unsigned char *)tp_pointer->old_tp_frame; if (tp_sp > tpstackbase) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_STACKUNDERFLO); if (tp_pointer->tp_save_all_flg) --tp_pointer->sym->tp_save_all; if ((NULL != (tp_pointer = tp_pointer->old_tp_frame)) /* Note assignment */ && ((tp_pointer < (tp_frame *)tp_sp) || (tp_pointer > (tp_frame *)tpstackbase) || (tp_pointer < (tp_frame *)tpstacktop))) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_STACKUNDERFLO); } if ((0 != newlevel) && restore_lv) { /* Restore current context (without releasing) */ assertpro(NULL != tp_pointer); DBGRFCT((stderr, "\n\n** tp_unwind: Newlevel (%d) != 0 loop processing\n", newlevel)); for (restore_ent = tp_pointer->vars; NULL != restore_ent; restore_ent = restore_ent->next) { curr_lv = restore_ent->current_value; save_lv = restore_ent->save_value; assert(curr_lv); assert(save_lv); assert(LV_IS_BASE_VAR(curr_lv)); assert(LV_IS_BASE_VAR(save_lv)); assert(curr_lv->tp_var); assert(curr_lv->tp_var == restore_ent); assert(0 < curr_lv->stats.trefcnt); rc = tp_unwind_restlv(curr_lv, save_lv, restore_ent, &lvscan, tprestart_rc); # ifdef GTM_TRIGGER if (0 != rc) { dollar_tlevel = tl; /* Record fact if we unwound some levels */ ENABLE_INTERRUPTS(INTRPT_IN_TP_UNWIND); /* drive any MUPIP STOP/signals deferred while * in this function */ TPUNWND_WBOX_TEST; /* Debug-only wbox-test to simulate SIGTERM */ INVOKE_RESTART; } # endif assert(0 < curr_lv->stats.trefcnt); /* Should have its own hash table ref plus the extras we added */ assert(0 < curr_lv->stats.crefcnt); } /* If we have any lv_vals queued up to be scanned for container vars, do that now */ DBGRFCT((stderr, "\ntp_unwind: Starting deferred rescan of lv trees needing refcnt processing\n")); while (0 < lvscan->elemcnt) { assert(ARY_SCNCNTNR_DIM >= lvscan->elemcnt); for (elemindx = 0; lvscan->elemcnt > elemindx; ++elemindx) { lv = lvscan->ary_scncntnr[elemindx]; DBGRFCT((stderr, "\n**tp_unwind_process_lvscan_array: Deferred processing lv 0x"lvaddr"\n", lv)); assert(LV_IS_BASE_VAR(lv)); /* This is the final level being restored so redo the counters on these vars */ TPREST_CNTNRS_IN_TREE(lv); } /* If we allocated any secondary blocks, we are done with them now so release them. Only the * very last block on the chain is the original block that was automatically allocated which * should not be freed in this fashion. */ lvscan_next = lvscan->next; if (NULL != lvscan_next) { /* There is another block on the chain so this one can be freed */ free(lvscan); DBGRFCT((stderr, "\ntp_unwind_process_lvscan_array: Freeing lvscan array\n")); lvscan = lvscan_next; } else { /* Since this is the original block allocated on the C stack which we may reuse, * zero the element count. */ lvscan->elemcnt = 0; DBGRFCT((stderr, "\ntp_unwind_process_lvscan_array: Setting elemcnt to 0 in original " "lvscan block\n")); assert(lvscan == &first_lvscan); } } } assert(0 == lvscan->elemcnt); /* verify no elements queued that were not scanned */ rollback_locks = (COMMIT_INVOCATION != invocation_type); for (prior = &mlk_pvt_root, mlkp = *prior; NULL != mlkp; mlkp = *prior) { if (mlkp->granted) { /* This was a pre-existing lock */ for (oldlock = mlkp->tp; (NULL != oldlock) && ((int)oldlock->tplevel > newlevel); oldlock = nextlock) { /* Remove references to the lock from levels being unwound */ nextlock = oldlock->next; free(oldlock); } if (rollback_locks) { if (NULL == oldlock) { /* Lock did not exist at the tp level being unwound to */ mlk_unlock(mlkp); mlk_pvtblk_delete(prior); continue; } else { /* Lock still exists but restore lock state as it was when the transaction started. */ mlkp->level = oldlock->level; mlkp->zalloc = oldlock->zalloc; } } if ((NULL != oldlock) && (oldlock->tplevel == newlevel)) { /* Remove lock reference from level being unwound to, * now that any {level,zalloc} state information has been restored. */ assert((NULL == oldlock->next) || (oldlock->next->tplevel < newlevel)); mlkp->tp = oldlock->next; /* update root reference pointer */ free(oldlock); } else mlkp->tp = oldlock; /* update root reference pointer */ prior = &mlkp->next; } else mlk_pvtblk_delete(prior); } DBGRFCT((stderr, "tp_unwind: Processing complete\n")); dollar_tlevel = newlevel; ENABLE_INTERRUPTS(INTRPT_IN_TP_UNWIND); /* check if any MUPIP STOP/signals were deferred while in this function */ }
void deferred_signal_handler(void) { void (*signal_routine)(); DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; /* To avoid nested calls to this routine, we set forced_exit to FALSE at the very beginning */ forced_exit = FALSE; if (exit_handler_active) { assert(FALSE); /* at this point in time (June 2003) there is no way we know of to get here, hence the assert */ return; /* since anyway we are exiting currently, resume exit handling instead of reissuing another one */ } /* For signals that get a delayed response so we can get out of crit, we also delay the messages. * This routine will output those delayed messages from the appropriate structures to both the * user and the system console. */ /* note can't use switch here because ERR_xxx are not defined as constants */ if (ERR_KILLBYSIG == forced_exit_err) { send_msg(VARLSTCNT(6) ERR_KILLBYSIG, 4, GTMIMAGENAMETXT(image_type), process_id, signal_info.signal); gtm_putmsg(VARLSTCNT(6) ERR_KILLBYSIG, 4, GTMIMAGENAMETXT(image_type), process_id, signal_info.signal); } else if (ERR_KILLBYSIGUINFO == forced_exit_err) { send_msg(VARLSTCNT(8) ERR_KILLBYSIGUINFO, 6, GTMIMAGENAMETXT(image_type), process_id, signal_info.signal, signal_info.send_pid, signal_info.send_uid); gtm_putmsg(VARLSTCNT(8) ERR_KILLBYSIGUINFO, 6, GTMIMAGENAMETXT(image_type), process_id, signal_info.signal, signal_info.send_pid, signal_info.send_uid); } else if (ERR_KILLBYSIGSINFO1 == forced_exit_err) { send_msg(VARLSTCNT(8) ERR_KILLBYSIGSINFO1, 6, GTMIMAGENAMETXT(image_type), process_id, signal_info.signal, signal_info.int_iadr, signal_info.bad_vadr); gtm_putmsg(VARLSTCNT(8) ERR_KILLBYSIGSINFO1, 6, GTMIMAGENAMETXT(image_type), process_id, signal_info.signal, signal_info.int_iadr, signal_info.bad_vadr); } else if (ERR_KILLBYSIGSINFO2 == forced_exit_err) { send_msg(VARLSTCNT(7) ERR_KILLBYSIGSINFO2, 5, GTMIMAGENAMETXT(image_type), process_id, signal_info.signal, signal_info.int_iadr); gtm_putmsg(VARLSTCNT(7) ERR_KILLBYSIGSINFO2, 5, GTMIMAGENAMETXT(image_type), process_id, signal_info.signal, signal_info.int_iadr); } else if (ERR_KILLBYSIGSINFO3 == forced_exit_err) { send_msg(VARLSTCNT(7) ERR_KILLBYSIGSINFO3, 5, GTMIMAGENAMETXT(image_type), process_id, signal_info.signal, signal_info.bad_vadr); gtm_putmsg(VARLSTCNT(7) ERR_KILLBYSIGSINFO3, 5, GTMIMAGENAMETXT(image_type), process_id, signal_info.signal, signal_info.bad_vadr); } else if (ERR_FORCEDHALT != forced_exit_err || !gtm_quiet_halt) { /* No HALT messages if quiet halt is requested */ send_msg(VARLSTCNT(1) forced_exit_err); gtm_putmsg(VARLSTCNT(1) forced_exit_err); } assert(OK_TO_INTERRUPT); /* Signal intent to exit BEFORE driving condition handlers. This avoids checks that will otherwise fail (for example * if mdb_condition_handler/preemptive_ch gets called below, that could invoke the RESET_GV_TARGET macro which in turn * would assert that gv_target->gd_csa is equal to cs_addrs. This could not be true in case we were in mainline code * that was interrupted by the flush timer for a different region which in turn was interrupted by an external signal * that would drive us to exit. Setting the "process_exiting" variable causes those csa checks to pass. */ SET_PROCESS_EXITING_TRUE; # ifdef DEBUG if (gtm_white_box_test_case_enabled && (WBTEST_DEFERRED_TIMERS == gtm_white_box_test_case_number) && (2 == gtm_white_box_test_case_count)) { DEFER_INTERRUPTS(INTRPT_NO_TIMER_EVENTS); DBGFPF((stderr, "DEFERRED_SIGNAL_HANDLER: will sleep for 20 seconds\n")); LONG_SLEEP(20); DBGFPF((stderr, "DEFERRED_SIGNAL_HANDLER: done sleeping\n")); ENABLE_INTERRUPTS(INTRPT_NO_TIMER_EVENTS); } # endif /* If any special routines are registered to be driven on a signal, drive them now */ if ((0 != exi_condition) && (NULL != call_on_signal)) { signal_routine = call_on_signal; call_on_signal = NULL; /* So we don't recursively call ourselves */ (*signal_routine)(); } /* Note, we do not drive create_fatal_error zshow_dmp() in this routine since any deferrable signals are * by definition not fatal. */ exit(-exi_condition); }
void obj_code (uint4 src_lines, void *checksum_ctx) { int status; rhdtyp rhead; mline *mlx, *mly; var_tabent *vptr; int4 lnr_pad_len; intrpt_state_t prev_intrpt_state; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; assert(!run_time); obj_init(); /* Define the routine name global symbol. */ define_symbol(GTM_MODULE_DEF_PSECT, (mstr *)&int_module_name, 0); memset(&rhead, 0, SIZEOF(rhead)); alloc_reg(); jmp_opto(); curr_addr = SIZEOF(rhdtyp); cg_phase = CGP_APPROX_ADDR; cg_phase_last = CGP_NOSTATE; code_gen(); code_size = curr_addr; cg_phase = CGP_ADDR_OPT; shrink_jmps(); comp_lits(&rhead); if ((cmd_qlf.qlf & CQ_MACHINE_CODE)) { cg_phase = CGP_ASSEMBLY; code_gen(); } if (!(cmd_qlf.qlf & CQ_OBJECT)) return; rhead.ptext_ptr = SIZEOF(rhead); set_rtnhdr_checksum(&rhead, (gtm_rtn_src_chksum_ctx *)checksum_ctx); rhead.vartab_ptr = code_size; rhead.vartab_len = mvmax; code_size += mvmax * SIZEOF(var_tabent); rhead.labtab_ptr = code_size; rhead.labtab_len = mlmax; code_size += mlmax * SIZEOF(lab_tabent); rhead.lnrtab_ptr = code_size; rhead.lnrtab_len = src_lines; rhead.compiler_qlf = cmd_qlf.qlf; if (cmd_qlf.qlf & CQ_EMBED_SOURCE) { rhead.routine_source_offset = TREF(routine_source_offset); rhead.routine_source_length = (uint4)(stringpool.free - stringpool.base) - TREF(routine_source_offset); } rhead.temp_mvals = sa_temps[TVAL_REF]; rhead.temp_size = sa_temps_offset[TCAD_REF]; code_size += src_lines * SIZEOF(int4); lnr_pad_len = PADLEN(code_size, SECTION_ALIGN_BOUNDARY); code_size += lnr_pad_len; DEFER_INTERRUPTS(INTRPT_IN_OBJECT_FILE_COMPILE, prev_intrpt_state); create_object_file(&rhead); ENABLE_INTERRUPTS(INTRPT_IN_OBJECT_FILE_COMPILE, prev_intrpt_state); cg_phase = CGP_MACHINE; code_gen(); /* Variable table: */ vptr = (var_tabent *)mcalloc(mvmax * SIZEOF(var_tabent)); if (mvartab) walktree(mvartab, cg_var, (char *)&vptr); else assert(0 == mvmax); emit_immed((char *)vptr, mvmax * SIZEOF(var_tabent)); /* Label table: */ if (mlabtab) walktree((mvar *)mlabtab, cg_lab, (char *)rhead.lnrtab_ptr); else assert(0 == mlmax); /* External entry definitions: */ emit_immed((char *)&(mline_root.externalentry->rtaddr), SIZEOF(mline_root.externalentry->rtaddr)); /* line 0 */ for (mlx = mline_root.child; mlx; mlx = mly) { if (mlx->table) emit_immed((char *)&(mlx->externalentry->rtaddr), SIZEOF(mlx->externalentry->rtaddr)); if (0 == (mly = mlx->child)) /* note assignment */ if (0 == (mly = mlx->sibling)) /* note assignment */ for (mly = mlx; ; ) { if (0 == (mly = mly->parent)) /* note assignment */ break; if (mly->sibling) { mly = mly->sibling; break; } } } if (0 != lnr_pad_len) /* emit padding so literal text pool starts on proper boundary */ emit_immed(PADCHARS, lnr_pad_len); # if !defined(__MVS__) && !defined(__s390__) /* assert not valid for instructions on OS390 */ assert(code_size == psect_use_tab[GTM_CODE]); # endif emit_literals(); DEFER_INTERRUPTS(INTRPT_IN_OBJECT_FILE_COMPILE, prev_intrpt_state); finish_object_file(); ENABLE_INTERRUPTS(INTRPT_IN_OBJECT_FILE_COMPILE, prev_intrpt_state); CLOSE_OBJECT_FILE(object_file_des, status); if (-1 == status) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(8) ERR_SYSCALL, 5, RTS_ERROR_LITERAL("close()"), CALLFROM, errno); /* Ready to make object visible. Rename from tmp name to real routine name */ RENAME_TMP_OBJECT_FILE(object_file_name); }
boolean_t mu_truncate(int4 truncate_percent) { sgmnt_addrs *csa; sgmnt_data_ptr_t csd; int num_local_maps; int lmap_num, lmap_blk_num; int bml_status, sigkill; int save_errno; int ftrunc_status; uint4 jnl_status; uint4 old_total, new_total; uint4 old_free, new_free; uint4 end_blocks; int4 blks_in_lmap, blk; gtm_uint64_t before_trunc_file_size; off_t trunc_file_size; off_t padding; uchar_ptr_t lmap_addr; boolean_t was_crit; uint4 found_busy_blk; srch_blk_status bmphist; srch_blk_status *blkhist; srch_hist alt_hist; trans_num curr_tn; blk_hdr_ptr_t lmap_blk_hdr; block_id *blkid_ptr; unix_db_info *udi; jnl_private_control *jpc; jnl_buffer_ptr_t jbp; char *err_msg; intrpt_state_t prev_intrpt_state; off_t offset; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; csa = cs_addrs; csd = cs_data; if (dba_mm == csd->acc_meth) { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_MUTRUNCNOTBG, 2, REG_LEN_STR(gv_cur_region)); return TRUE; } if ((GDSVCURR != csd->desired_db_format) || (csd->blks_to_upgrd != 0)) { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_MUTRUNCNOV4, 2, REG_LEN_STR(gv_cur_region)); return TRUE; } if (csa->ti->free_blocks < (truncate_percent * csa->ti->total_blks / 100)) { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(5) ERR_MUTRUNCNOSPACE, 3, REG_LEN_STR(gv_cur_region), truncate_percent); return TRUE; } /* already checked for parallel truncates on this region --- see mupip_reorg.c */ gv_target = NULL; assert(csa->nl->trunc_pid == process_id); assert(dba_mm != csd->acc_meth); old_total = csa->ti->total_blks; old_free = csa->ti->free_blocks; sigkill = 0; found_busy_blk = 0; memset(&alt_hist, 0, SIZEOF(alt_hist)); /* null-initialize history */ assert(csd->bplmap == BLKS_PER_LMAP); end_blocks = old_total % BLKS_PER_LMAP; /* blocks in the last lmap (first one we start scanning) */ if (0 == end_blocks) end_blocks = BLKS_PER_LMAP; num_local_maps = DIVIDE_ROUND_UP(old_total, BLKS_PER_LMAP); /* ======================================== PHASE 1 ======================================== */ for (lmap_num = num_local_maps - 1; (lmap_num > 0 && !found_busy_blk); lmap_num--) { if (mu_ctrly_occurred || mu_ctrlc_occurred) return TRUE; assert(csa->ti->total_blks >= old_total); /* otherwise, a concurrent truncate happened... */ if (csa->ti->total_blks != old_total) /* Extend (likely called by mupip extend) -- don't truncate */ { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(5) ERR_MUTRUNCNOSPACE, 3, REG_LEN_STR(gv_cur_region), truncate_percent); return TRUE; } lmap_blk_num = lmap_num * BLKS_PER_LMAP; if (csa->nl->highest_lbm_with_busy_blk >= lmap_blk_num) { found_busy_blk = lmap_blk_num; break; } blks_in_lmap = (lmap_num == num_local_maps - 1) ? end_blocks : BLKS_PER_LMAP; /* Loop through non-bitmap blocks of this lmap, do recycled2free */ DBGEHND((stdout, "DBG:: lmap_num = [%lu], lmap_blk_num = [%lu], blks_in_lmap = [%lu]\n", lmap_num, lmap_blk_num, blks_in_lmap)); for (blk = 1; blk < blks_in_lmap && blk != -1 && !found_busy_blk;) { t_begin(ERR_MUTRUNCFAIL, UPDTRNS_DB_UPDATED_MASK); for (;;) /* retry loop for recycled to free transactions */ { curr_tn = csd->trans_hist.curr_tn; /* Read the nth local bitmap into memory */ bmphist.blk_num = lmap_blk_num; bmphist.buffaddr = t_qread(bmphist.blk_num, &bmphist.cycle, &bmphist.cr); lmap_blk_hdr = (blk_hdr_ptr_t)bmphist.buffaddr; if (!(bmphist.buffaddr) || (BM_SIZE(BLKS_PER_LMAP) != lmap_blk_hdr->bsiz)) { /* Could not read the block successfully. Retry. */ t_retry((enum cdb_sc)rdfail_detail); continue; } lmap_addr = bmphist.buffaddr + SIZEOF(blk_hdr); /* starting from the hint (blk itself), find the first busy or recycled block */ blk = bml_find_busy_recycled(blk, lmap_addr, blks_in_lmap, &bml_status); assert(blk < BLKS_PER_LMAP); if (blk == -1 || blk >= blks_in_lmap) { /* done with this lmap, continue to next */ t_abort(gv_cur_region, csa); break; } else if (BLK_BUSY == bml_status || csa->nl->highest_lbm_with_busy_blk >= lmap_blk_num) { /* stop processing blocks... skip ahead to phase 2 */ found_busy_blk = lmap_blk_num; t_abort(gv_cur_region, csa); break; } else if (BLK_RECYCLED == bml_status) { /* Write PBLK records for recycled blocks only if before_image journaling is * enabled. t_end() takes care of checking if journaling is enabled and * writing PBLK record. We have to at least mark the recycled block as free. */ RESET_UPDATE_ARRAY; update_trans = UPDTRNS_DB_UPDATED_MASK; *((block_id *)update_array_ptr) = blk; update_array_ptr += SIZEOF(block_id); *(int *)update_array_ptr = 0; alt_hist.h[1].blk_num = 0; alt_hist.h[0].level = 0; alt_hist.h[0].cse = NULL; alt_hist.h[0].tn = curr_tn; alt_hist.h[0].blk_num = lmap_blk_num + blk; alt_hist.h[0].buffaddr = t_qread(alt_hist.h[0].blk_num, &alt_hist.h[0].cycle, &alt_hist.h[0].cr); if (!alt_hist.h[0].buffaddr) { t_retry((enum cdb_sc)rdfail_detail); continue; } if (!t_recycled2free(&alt_hist.h[0])) { t_retry(cdb_sc_lostbmlcr); continue; } t_write_map(&bmphist, (unsigned char *)update_array, curr_tn, 0); /* Set the opcode for INCTN record written by t_end() */ inctn_opcode = inctn_blkmarkfree; if ((trans_num)0 == t_end(&alt_hist, NULL, TN_NOT_SPECIFIED)) continue; /* block processed, scan from the next one */ blk++; break; } else { assert(t_tries < CDB_STAGNATE); t_retry(cdb_sc_badbitmap); continue; } } /* END recycled2free retry loop */ } /* END scanning blocks of this particular lmap */ /* Write PBLK for the bitmap block, in case it hasn't been written i.e. t_end() was never called above */ /* Do a transaction that just increments the bitmap block's tn so that t_end() can do its thing */ DBGEHND((stdout, "DBG:: bitmap block inctn -- lmap_blk_num = [%lu]\n", lmap_blk_num)); t_begin(ERR_MUTRUNCFAIL, UPDTRNS_DB_UPDATED_MASK); for (;;) { RESET_UPDATE_ARRAY; BLK_ADDR(blkid_ptr, SIZEOF(block_id), block_id); *blkid_ptr = 0; update_trans = UPDTRNS_DB_UPDATED_MASK; inctn_opcode = inctn_mu_reorg; /* inctn_mu_truncate */ curr_tn = csd->trans_hist.curr_tn; blkhist = &alt_hist.h[0]; blkhist->blk_num = lmap_blk_num; blkhist->tn = curr_tn; blkhist->cse = NULL; /* start afresh (do not use value from previous retry) */ /* Read the nth local bitmap into memory */ blkhist->buffaddr = t_qread(lmap_blk_num, (sm_int_ptr_t)&blkhist->cycle, &blkhist->cr); lmap_blk_hdr = (blk_hdr_ptr_t)blkhist->buffaddr; if (!(blkhist->buffaddr) || (BM_SIZE(BLKS_PER_LMAP) != lmap_blk_hdr->bsiz)) { /* Could not read the block successfully. Retry. */ t_retry((enum cdb_sc)rdfail_detail); continue; } t_write_map(blkhist, (unsigned char *)blkid_ptr, curr_tn, 0); blkhist->blk_num = 0; /* create empty history for bitmap block */ if ((trans_num)0 == t_end(&alt_hist, NULL, TN_NOT_SPECIFIED)) continue; break; } } /* END scanning lmaps */ /* ======================================== PHASE 2 ======================================== */ assert(!csa->now_crit); for (;;) { /* wait for FREEZE, we don't want to truncate a frozen database */ grab_crit(gv_cur_region); if (FROZEN_CHILLED(cs_data)) DO_CHILLED_AUTORELEASE(csa, cs_data); if (!FROZEN(cs_data) && !IS_REPL_INST_FROZEN) break; rel_crit(gv_cur_region); while (FROZEN(cs_data) || IS_REPL_INST_FROZEN) { hiber_start(1000); if (FROZEN_CHILLED(cs_data) && CHILLED_AUTORELEASE(cs_data)) break; } } assert(csa->nl->trunc_pid == process_id); /* Flush pending updates to disk. If this is not done, old updates can be flushed AFTER ftruncate, extending the file. */ if (!wcs_flu(WCSFLU_FLUSH_HDR | WCSFLU_WRITE_EPOCH | WCSFLU_MSYNC_DB)) { assert(FALSE); gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(6) ERR_BUFFLUFAILED, 4, LEN_AND_LIT("MUPIP REORG TRUNCATE"), DB_LEN_STR(gv_cur_region)); rel_crit(gv_cur_region); return FALSE; } csa->nl->highest_lbm_with_busy_blk = MAX(found_busy_blk, csa->nl->highest_lbm_with_busy_blk); assert(IS_BITMAP_BLK(csa->nl->highest_lbm_with_busy_blk)); new_total = MIN(old_total, csa->nl->highest_lbm_with_busy_blk + BLKS_PER_LMAP); if (mu_ctrly_occurred || mu_ctrlc_occurred) { rel_crit(gv_cur_region); return TRUE; } else if (csa->ti->total_blks != old_total || new_total == old_total) { assert(csa->ti->total_blks >= old_total); /* Better have been an extend, not a truncate... */ gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(5) ERR_MUTRUNCNOSPACE, 3, REG_LEN_STR(gv_cur_region), truncate_percent); rel_crit(gv_cur_region); return TRUE; } else if (GDSVCURR != csd->desired_db_format || csd->blks_to_upgrd != 0 || !csd->fully_upgraded) { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_MUTRUNCNOV4, 2, REG_LEN_STR(gv_cur_region)); rel_crit(gv_cur_region); return TRUE; } else if (SNAPSHOTS_IN_PROG(csa->nl)) { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_MUTRUNCSSINPROG, 2, REG_LEN_STR(gv_cur_region)); rel_crit(gv_cur_region); return TRUE; } else if (BACKUP_NOT_IN_PROGRESS != cs_addrs->nl->nbb) { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_MUTRUNCBACKINPROG, 2, REG_LEN_STR(gv_cur_region)); rel_crit(gv_cur_region); return TRUE; } DEFER_INTERRUPTS(INTRPT_IN_TRUNC, prev_intrpt_state); if (JNL_ENABLED(csa)) { /* Write JRT_TRUNC and INCTN records */ if (!jgbl.dont_reset_gbl_jrec_time) SET_GBL_JREC_TIME; /* needed before jnl_ensure_open as that can write jnl records */ jpc = csa->jnl; jbp = jpc->jnl_buff; /* Before writing to jnlfile, adjust jgbl.gbl_jrec_time if needed to maintain time order * of jnl records. This needs to be done BEFORE the jnl_ensure_open as that could write * journal records (if it decides to switch to a new journal file). */ ADJUST_GBL_JREC_TIME(jgbl, jbp); jnl_status = jnl_ensure_open(gv_cur_region, csa); if (SS_NORMAL != jnl_status) send_msg_csa(CSA_ARG(csa) VARLSTCNT(6) jnl_status, 4, JNL_LEN_STR(csd), DB_LEN_STR(gv_cur_region)); else { if (0 == jpc->pini_addr) jnl_put_jrt_pini(csa); jnl_write_trunc_rec(csa, old_total, csa->ti->free_blocks, new_total); inctn_opcode = inctn_mu_reorg; jnl_write_inctn_rec(csa); jnl_status = jnl_flush(gv_cur_region); if (SS_NORMAL != jnl_status) { send_msg_csa(CSA_ARG(csa) VARLSTCNT(9) ERR_JNLFLUSH, 2, JNL_LEN_STR(csd), ERR_TEXT, 2, RTS_ERROR_TEXT("Error with journal flush during mu_truncate"), jnl_status); assert(NOJNL == jpc->channel); /* jnl file lost has been triggered */ } } } /* Good to go ahead and REALLY truncate (reduce total_blks, clear cache_array, FTRUNCATE) */ curr_tn = csa->ti->curr_tn; CHECK_TN(csa, csd, curr_tn); udi = FILE_INFO(gv_cur_region); /* Information used by recover_truncate to check if the file size and csa->ti->total_blks are INCONSISTENT */ trunc_file_size = BLK_ZERO_OFF(csd->start_vbn) + ((off_t)csd->blk_size * (new_total + 1)); csd->after_trunc_total_blks = new_total; csd->before_trunc_free_blocks = csa->ti->free_blocks; csd->before_trunc_total_blks = old_total; /* Flags interrupted truncate for recover_truncate */ /* file size and total blocks: INCONSISTENT */ csa->ti->total_blks = new_total; /* past the point of no return -- shared memory intact */ assert(csa->ti->free_blocks >= DELTA_FREE_BLOCKS(old_total, new_total)); csa->ti->free_blocks -= DELTA_FREE_BLOCKS(old_total, new_total); new_free = csa->ti->free_blocks; KILL_TRUNC_TEST(WBTEST_CRASH_TRUNCATE_1); /* 55 : Issue a kill -9 before 1st fsync */ fileheader_sync(gv_cur_region); DB_FSYNC(gv_cur_region, udi, csa, db_fsync_in_prog, save_errno); CHECK_DBSYNC(gv_cur_region, save_errno); /* past the point of no return -- shared memory deleted */ KILL_TRUNC_TEST(WBTEST_CRASH_TRUNCATE_2); /* 56 : Issue a kill -9 after 1st fsync */ clear_cache_array(csa, csd, gv_cur_region, new_total, old_total); offset = (off_t)BLK_ZERO_OFF(csd->start_vbn) + (off_t)new_total * csd->blk_size; save_errno = db_write_eof_block(udi, udi->fd, csd->blk_size, offset, &(TREF(dio_buff))); if (0 != save_errno) { err_msg = (char *)STRERROR(errno); rts_error_csa(CSA_ARG(csa) VARLSTCNT(6) ERR_MUTRUNCERROR, 4, REG_LEN_STR(gv_cur_region), LEN_AND_STR(err_msg)); return FALSE; } KILL_TRUNC_TEST(WBTEST_CRASH_TRUNCATE_3); /* 57 : Issue a kill -9 after reducing csa->ti->total_blks, before FTRUNCATE */ /* Execute an ftruncate() and truncate the DB file * ftruncate() is a SYSTEM CALL on almost all platforms (except SunOS) * It ignores kill -9 signal till its operation is completed. * So we can safely assume that the result of ftruncate() will be complete. */ FTRUNCATE(FILE_INFO(gv_cur_region)->fd, trunc_file_size, ftrunc_status); if (0 != ftrunc_status) { err_msg = (char *)STRERROR(errno); rts_error_csa(CSA_ARG(csa) VARLSTCNT(6) ERR_MUTRUNCERROR, 4, REG_LEN_STR(gv_cur_region), LEN_AND_STR(err_msg)); /* should go through recover_truncate now, which will again try to FTRUNCATE */ return FALSE; } /* file size and total blocks: CONSISTENT (shrunk) */ KILL_TRUNC_TEST(WBTEST_CRASH_TRUNCATE_4); /* 58 : Issue a kill -9 after FTRUNCATE, before 2nd fsync */ csa->nl->root_search_cycle++; /* Force concurrent processes to restart in t_end/tp_tend to make sure no one * tries to commit updates past the end of the file. Bitmap validations together * with highest_lbm_with_busy_blk should actually be sufficient, so this is * just to be safe. */ csd->before_trunc_total_blks = 0; /* indicate CONSISTENT */ /* Increment TN */ assert(csa->ti->early_tn == csa->ti->curr_tn); csd->trans_hist.early_tn = csd->trans_hist.curr_tn + 1; INCREMENT_CURR_TN(csd); fileheader_sync(gv_cur_region); DB_FSYNC(gv_cur_region, udi, csa, db_fsync_in_prog, save_errno); KILL_TRUNC_TEST(WBTEST_CRASH_TRUNCATE_5); /* 58 : Issue a kill -9 after after 2nd fsync */ CHECK_DBSYNC(gv_cur_region, save_errno); ENABLE_INTERRUPTS(INTRPT_IN_TRUNC, prev_intrpt_state); curr_tn = csa->ti->curr_tn; rel_crit(gv_cur_region); send_msg_csa(CSA_ARG(csa) VARLSTCNT(7) ERR_MUTRUNCSUCCESS, 5, DB_LEN_STR(gv_cur_region), old_total, new_total, &curr_tn); util_out_print("Truncated region: !AD. Reduced total blocks from [!UL] to [!UL]. Reduced free blocks from [!UL] to [!UL].", FLUSH, REG_LEN_STR(gv_cur_region), old_total, new_total, old_free, new_free); return TRUE; } /* END of mu_truncate() */
cache_rec_ptr_t db_csh_getn(block_id block) { cache_rec_ptr_t hdr, q0, start_cr, cr; bt_rec_ptr_t bt; unsigned int lcnt, ocnt; int rip, max_ent, pass1, pass2, pass3; int4 flsh_trigger; uint4 first_r_epid, latest_r_epid; sgmnt_addrs *csa; sgmnt_data_ptr_t csd; srch_blk_status *tp_srch_status; ht_ent_int4 *tabent; csa = cs_addrs; csd = csa->hdr; assert(csa->now_crit); assert(csa == &FILE_INFO(gv_cur_region)->s_addrs); max_ent = csd->n_bts; cr = (cache_rec_ptr_t)GDS_REL2ABS(csa->nl->cur_lru_cache_rec_off); hdr = csa->acc_meth.bg.cache_state->cache_array + (block % csd->bt_buckets); start_cr = csa->acc_meth.bg.cache_state->cache_array + csd->bt_buckets; pass1 = max_ent; /* skip referred or dirty or read-into cache records */ pass2 = 2 * max_ent; /* skip referred cache records */ pass3 = 3 * max_ent; /* skip nothing */ INCR_DB_CSH_COUNTER(csa, n_db_csh_getns, 1); DEFER_INTERRUPTS(INTRPT_IN_DB_CSH_GETN); for (lcnt = 0; ; lcnt++) { if (lcnt > pass3) { BG_TRACE_PRO(wc_blocked_db_csh_getn_loopexceed); assert(FALSE); break; } cr++; if (cr == start_cr + max_ent) cr = start_cr; VMS_ONLY( if ((lcnt == pass1) || (lcnt == pass2)) wcs_wtfini(gv_cur_region); ) if (cr->refer && (lcnt < pass2)) { /* in passes 1 & 2, set refer to FALSE and skip; in the third pass attempt reuse even if TRUE == refer */ cr->refer = FALSE; continue; } if (cr->in_cw_set || cr->in_tend) { /* some process already has this pinned for reading and/or updating. skip it. */ cr->refer = TRUE; continue; } if (CDB_STAGNATE <= t_tries || mu_reorg_process) { /* Prevent stepping on self when crit for entire transaction. * This is done by looking up in sgm_info_ptr->blk_in_use and cw_stagnate for presence of the block. * The following two hashtable lookups are not similar, since in TP, sgm_info_ptr->blks_in_use * is updated to the latest cw_stagnate list of blocks only in "tp_hist". * Also note that the lookup in sgm_info_ptr->blks_in_use reuses blocks that don't have cse's. * This is to allow big-read TP transactions which may use up more than the available global buffers. * There is one issue here in that a block that has been only read till now may be stepped upon here * but may later be needed for update. It is handled by updating the block's corresponding * entry in the set of histories (sgm_info_ptr->first_tp_hist[index] structure) to hold the * "cr" and "cycle" of the t_qread done for the block when it was intended to be changed for the * first time within the transaction since otherwise the transaction would restart due to a * cdb_sc_lostcr status. Note that "tn" (read_tn of the block) in the first_tp_hist will still * remain the "tn" when the block was first read within this transaction to ensure the block * hasn't been modified since the start of the transaction. Once we intend on changing the * block i.e. srch_blk_status->cse is non-NULL, we ensure in the code below not to step on it. * ["tp_hist" is the routine that updates the "cr", "cycle" and "tn" of the block]. * Note that usually in a transaction the first_tp_hist[] structure holds the "cr", "cycle", and "tn" * of the first t_qread of the block within that transaction. The above is the only exception. * Also note that for blocks in cw_stagnate (i.e. current TP mini-action), we don't reuse any of * them even if they don't have a cse. This is to ensure that the current action doesn't * encounter a restart due to cdb_sc_lostcr in "tp_hist" even in the fourth-retry. */ tp_srch_status = NULL; if (dollar_tlevel && (NULL != (tabent = lookup_hashtab_int4(sgm_info_ptr->blks_in_use, (uint4 *)&cr->blk))) && (tp_srch_status = (srch_blk_status *)tabent->value) && (tp_srch_status->cse)) { /* this process is already using the block - skip it */ cr->refer = TRUE; continue; } if (NULL != lookup_hashtab_int4(&cw_stagnate, (uint4 *)&cr->blk)) { /* this process is already using the block for the current gvcst_search - skip it */ cr->refer = TRUE; continue; } if (NULL != tp_srch_status) { /* About to reuse a buffer that is part of the read-set of the current TP transaction. * Reset clue as otherwise the next global reference of that global will use an outofdate clue. * Even though tp_srch_status is available after the sgm_info_ptr->blks_in_use hashtable check, * we dont want to reset the clue in case the cw_stagnate hashtable check causes the same cr * to be skipped from reuse. Hence the placement of this reset logic AFTER the cw_stagnate check. */ tp_srch_status->blk_target->clue.end = 0; } } if (cr->dirty) { /* Note that in Unix, it is possible that we see a stale value of cr->dirty (possible if a * concurrent "wcs_wtstart" has reset dirty to 0 but that update did not reach us yet). In this * case the call to "wcs_get_space" below will do the necessary memory barrier instructions * (through calls to "aswp") which will allow us to see the non-stale value of cr->dirty. * * It is also possible that cr->dirty is non-zero but < cr->flushed_dirty_tn. In this case, wcs_get_space * done below will return FALSE forcing a cache-rebuild which will fix this situation. * * In VMS, another process cannot be concurrently resetting cr->dirty to 0 as the resetting routine * is "wcs_wtfini" which is executed in crit which another process cannot be in as we are in crit now. */ if (gv_cur_region->read_only) continue; if (lcnt < pass1) { if (!csa->timer && (csa->nl->wcs_timers < 1)) wcs_timer_start(gv_cur_region, FALSE); continue; } BG_TRACE_PRO(db_csh_getn_flush_dirty); if (FALSE == wcs_get_space(gv_cur_region, 0, cr)) { /* failed to flush it out - force a rebuild */ BG_TRACE_PRO(wc_blocked_db_csh_getn_wcsstarvewrt); assert(csa->nl->wc_blocked); /* only reason we currently know why wcs_get_space could fail */ assert(gtm_white_box_test_case_enabled); break; } assert(0 == cr->dirty); } UNIX_ONLY( /* the cache-record is not free for reuse until the write-latch value becomes LATCH_CLEAR. * In VMS, resetting the write-latch value occurs in "wcs_wtfini" which is in CRIT, we are fine. * In Unix, this resetting is done by "wcs_wtstart" which is out-of-crit. Therefore, we need to * wait for this value to be LATCH_CLEAR before reusing this cache-record. * Note that we are examining the write-latch-value without holding the interlock. It is ok to do * this because the only two routines that modify the latch value are "bg_update" and * "wcs_wtstart". The former cannot be concurrently executing because we are in crit. * The latter will not update the latch value unless this cache-record is dirty. But in this * case we would have most likely gone through the if (cr->dirty) check above. Most likely * because there is one rare possibility where a concurrent "wcs_wtstart" has set cr->dirty * to 0 but not yet cleared the latch. In that case we wait for the latch to be cleared. * In all other cases, nobody is modifying the latch since when we got crit and therefore * it is safe to observe the value of the latch without holding the interlock. */ if (LATCH_CLEAR != WRITE_LATCH_VAL(cr)) { /* possible if a concurrent "wcs_wtstart" has set cr->dirty to 0 but not yet * cleared the latch. this should be very rare though. */ if (lcnt < pass2) continue; /* try to find some other cache-record to reuse until the 3rd pass */ for (ocnt = 1; (MAXWRTLATCHWAIT >= ocnt) && (LATCH_CLEAR != WRITE_LATCH_VAL(cr)); ocnt++) wcs_sleep(SLEEP_WRTLATCHWAIT); /* since it is a short lock, sleep the minimum */ if (MAXWRTLATCHWAIT <= ocnt) { BG_TRACE_PRO(db_csh_getn_wrt_latch_stuck); assert(FALSE); continue; } } )
/* * ------------------------------------------ * Hang the process for a specified time. * * Goes to sleep for a positive value. * Any caught signal will terminate the sleep * following the execution of that signal's catching routine. * * Arguments: * num - time to sleep * * Return: * none * ------------------------------------------ */ void op_hang(mval* num) { int ms; mv_stent *mv_zintcmd; ABS_TIME cur_time, end_time; # ifdef VMS uint4 time[2]; int4 efn_mask, status; # endif DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; ms = 0; MV_FORCE_NUM(num); if (num->mvtype & MV_INT) { if (0 < num->m[1]) { assert(MV_BIAS >= 1000); /* if formats change overflow may need attention */ ms = num->m[1] * (1000 / MV_BIAS); } } else if (0 == num->sgn) /* if sign is not 0 it means num is negative */ ms = mval2i(num) * 1000; /* too big to care about fractional amounts */ if (ms) { if (TREF(tpnotacidtime) * 1000 < ms) TPNOTACID_CHECK(HANGSTR); # if defined(DEBUG) && defined(UNIX) if (gtm_white_box_test_case_enabled && (WBTEST_DEFERRED_TIMERS == gtm_white_box_test_case_number) && (3 > gtm_white_box_test_case_count) && (123000 == ms)) { DEFER_INTERRUPTS(INTRPT_NO_TIMER_EVENTS); DBGFPF((stderr, "OP_HANG: will sleep for 20 seconds\n")); LONG_SLEEP(20); DBGFPF((stderr, "OP_HANG: done sleeping\n")); ENABLE_INTERRUPTS(INTRPT_NO_TIMER_EVENTS); return; } if (gtm_white_box_test_case_enabled && (WBTEST_BREAKMPC == gtm_white_box_test_case_number) && (0 == gtm_white_box_test_case_count) && (999 == ms)) { frame_pointer->old_frame_pointer->mpc = (unsigned char *)GTM64_ONLY(0xdeadbeef12345678) NON_GTM64_ONLY(0xdead1234); return; } /* Upon seeing a .999s hang this white-box test launches a timer that pops with a period of UTIL_OUT_SYSLOG_INTERVAL * and prints a long message via util_out_ptr. */ if (gtm_white_box_test_case_enabled && (WBTEST_UTIL_OUT_BUFFER_PROTECTION == gtm_white_box_test_case_number) && (0 == gtm_white_box_test_case_count) && (999 == ms)) { start_timer((TID)&util_out_syslog_dump, UTIL_OUT_SYSLOG_INTERVAL, util_out_syslog_dump, 0, NULL); return; } # endif sys_get_curr_time(&cur_time); mv_zintcmd = find_mvstent_cmd(ZINTCMD_HANG, restart_pc, restart_ctxt, FALSE); if (!mv_zintcmd) add_int_to_abs_time(&cur_time, ms, &end_time); else { end_time = mv_zintcmd->mv_st_cont.mvs_zintcmd.end_or_remain; cur_time = sub_abs_time(&end_time, &cur_time); /* get remaing time to sleep */ if (0 <= cur_time.at_sec) ms = (int4)(cur_time.at_sec * 1000 + cur_time.at_usec / 1000); else ms = 0; /* all done */ /* restore/pop previous zintcmd_active[ZINTCMD_HANG] hints */ TAREF1(zintcmd_active, ZINTCMD_HANG).restart_pc_last = mv_zintcmd->mv_st_cont.mvs_zintcmd.restart_pc_prior; TAREF1(zintcmd_active, ZINTCMD_HANG).restart_ctxt_last = mv_zintcmd->mv_st_cont.mvs_zintcmd.restart_ctxt_prior; TAREF1(zintcmd_active, ZINTCMD_HANG).count--; assert(0 <= TAREF1(zintcmd_active, ZINTCMD_HANG).count); if (mv_chain == mv_zintcmd) POP_MV_STENT(); /* just pop if top of stack */ else { /* flag as not active */ mv_zintcmd->mv_st_cont.mvs_zintcmd.command = ZINTCMD_NOOP; mv_zintcmd->mv_st_cont.mvs_zintcmd.restart_pc_check = NULL; } if (0 == ms) return; /* done HANGing */ } UNIX_ONLY(hiber_start(ms);) VMS_ONLY( time[0] = -time_low_ms(ms); time[1] = -time_high_ms(ms) - 1; efn_mask = (1 << efn_outofband | 1 << efn_timer); if (SS$_NORMAL != (status = sys$setimr(efn_timer, &time, NULL, &time, 0))) rts_error(VARLSTCNT(8) ERR_SYSCALL, 5, RTS_ERROR_LITERAL("$setimr"), CALLFROM, status); if (SS$_NORMAL != (status = sys$wflor(efn_outofband, efn_mask))) rts_error(VARLSTCNT(8) ERR_SYSCALL, 5, RTS_ERROR_LITERAL("$wflor"), CALLFROM, status); ) if (outofband)
/* * ------------------------------------------ * Hang the process for a specified time. * * Goes to sleep for a positive value. * Any caught signal will terminate the sleep * following the execution of that signal's catching routine. * * The actual hang duration should be NO LESS than the specified * duration for specified durations greater than .001 seconds. * Certain applications depend on this assumption. * * Arguments: * num - time to sleep * * Return: * none * ------------------------------------------ */ void op_hang(mval* num) { int ms; double tmp; mv_stent *mv_zintcmd; ABS_TIME cur_time, end_time; # ifdef VMS uint4 time[2]; int4 efn_mask, status; # endif DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; ms = 0; MV_FORCE_NUM(num); if (num->mvtype & MV_INT) { if (0 < num->m[1]) { assert(MV_BIAS >= 1000); /* if formats change overflow may need attention */ ms = num->m[1] * (1000 / MV_BIAS); } } else if (0 == num->sgn) /* if sign is not 0 it means num is negative */ { tmp = mval2double(num) * (double)1000; ms = ((double)MAXPOSINT4 >= tmp) ? (int)tmp : (int)MAXPOSINT4; } if (ms) { if (TREF(tpnotacidtime) * 1000 < ms) TPNOTACID_CHECK(HANGSTR); # if defined(DEBUG) && defined(UNIX) if (WBTEST_ENABLED(WBTEST_DEFERRED_TIMERS) && (3 > gtm_white_box_test_case_count) && (123000 == ms)) { DEFER_INTERRUPTS(INTRPT_NO_TIMER_EVENTS); DBGFPF((stderr, "OP_HANG: will sleep for 20 seconds\n")); LONG_SLEEP(20); DBGFPF((stderr, "OP_HANG: done sleeping\n")); ENABLE_INTERRUPTS(INTRPT_NO_TIMER_EVENTS); return; } if (WBTEST_ENABLED(WBTEST_BREAKMPC)&& (0 == gtm_white_box_test_case_count) && (999 == ms)) { frame_pointer->old_frame_pointer->mpc = (unsigned char *)GTM64_ONLY(0xdeadbeef12345678) NON_GTM64_ONLY(0xdead1234); return; } if (WBTEST_ENABLED(WBTEST_UTIL_OUT_BUFFER_PROTECTION) && (0 == gtm_white_box_test_case_count) && (999 == ms)) { /* Upon seeing a .999s hang this white-box test launches a timer that pops with a period of * UTIL_OUT_SYSLOG_INTERVAL and prints a long message via util_out_ptr. */ start_timer((TID)&util_out_syslog_dump, UTIL_OUT_SYSLOG_INTERVAL, util_out_syslog_dump, 0, NULL); return; } # endif sys_get_curr_time(&cur_time); mv_zintcmd = find_mvstent_cmd(ZINTCMD_HANG, restart_pc, restart_ctxt, FALSE); if (!mv_zintcmd) add_int_to_abs_time(&cur_time, ms, &end_time); else { end_time = mv_zintcmd->mv_st_cont.mvs_zintcmd.end_or_remain; cur_time = sub_abs_time(&end_time, &cur_time); /* get remaing time to sleep */ if (0 <= cur_time.at_sec) ms = (int4)(cur_time.at_sec * 1000 + cur_time.at_usec / 1000); else ms = 0; /* all done */ /* restore/pop previous zintcmd_active[ZINTCMD_HANG] hints */ TAREF1(zintcmd_active, ZINTCMD_HANG).restart_pc_last = mv_zintcmd->mv_st_cont.mvs_zintcmd.restart_pc_prior; TAREF1(zintcmd_active, ZINTCMD_HANG).restart_ctxt_last = mv_zintcmd->mv_st_cont.mvs_zintcmd.restart_ctxt_prior; TAREF1(zintcmd_active, ZINTCMD_HANG).count--; assert(0 <= TAREF1(zintcmd_active, ZINTCMD_HANG).count); if (mv_chain == mv_zintcmd) POP_MV_STENT(); /* just pop if top of stack */ else { /* flag as not active */ mv_zintcmd->mv_st_cont.mvs_zintcmd.command = ZINTCMD_NOOP; mv_zintcmd->mv_st_cont.mvs_zintcmd.restart_pc_check = NULL; } if (0 == ms) return; /* done HANGing */ } # ifdef UNIX if (ms < 10) SLEEP_USEC(ms * 1000, TRUE); /* Finish the sleep if it is less than 10ms. */ else hiber_start(ms); # elif defined(VMS) time[0] = -time_low_ms(ms); time[1] = -time_high_ms(ms) - 1; efn_mask = (1 << efn_outofband | 1 << efn_timer); if (SS$_NORMAL != (status = sys$setimr(efn_timer, &time, NULL, &time, 0))) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(8) ERR_SYSCALL, 5, RTS_ERROR_LITERAL("$setimr"), CALLFROM, status); if (SS$_NORMAL != (status = sys$wflor(efn_outofband, efn_mask))) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(8) ERR_SYSCALL, 5, RTS_ERROR_LITERAL("$wflor"), CALLFROM, status); if (outofband) { if (SS$_WASCLR == (status = sys$readef(efn_timer, &efn_mask))) { if (SS$_NORMAL != (status = sys$cantim(&time, 0))) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(8) ERR_SYSCALL, 5, RTS_ERROR_LITERAL("$cantim"), CALLFROM, status); } else assertpro(SS$_WASSET == status); } # endif } else rel_quant(); if (outofband) { PUSH_MV_STENT(MVST_ZINTCMD); mv_chain->mv_st_cont.mvs_zintcmd.end_or_remain = end_time; mv_chain->mv_st_cont.mvs_zintcmd.restart_ctxt_check = restart_ctxt; mv_chain->mv_st_cont.mvs_zintcmd.restart_pc_check = restart_pc; /* save current information from zintcmd_active */ mv_chain->mv_st_cont.mvs_zintcmd.restart_ctxt_prior = TAREF1(zintcmd_active, ZINTCMD_HANG).restart_ctxt_last; mv_chain->mv_st_cont.mvs_zintcmd.restart_pc_prior = TAREF1(zintcmd_active, ZINTCMD_HANG).restart_pc_last; TAREF1(zintcmd_active, ZINTCMD_HANG).restart_pc_last = restart_pc; TAREF1(zintcmd_active, ZINTCMD_HANG).restart_ctxt_last = restart_ctxt; TAREF1(zintcmd_active, ZINTCMD_HANG).count++; mv_chain->mv_st_cont.mvs_zintcmd.command = ZINTCMD_HANG; outofband_action(FALSE); } return; }
int4 gds_rundown(void) { boolean_t canceled_dbsync_timer, canceled_flush_timer, ok_to_write_pfin; boolean_t have_standalone_access, ipc_deleted, err_caught; boolean_t is_cur_process_ss_initiator, remove_shm, vermismatch, we_are_last_user, we_are_last_writer, is_mm; boolean_t unsafe_last_writer; char time_str[CTIME_BEFORE_NL + 2]; /* for GET_CUR_TIME macro */ gd_region *reg; int save_errno, status, rc; int4 semval, ftok_semval, sopcnt, ftok_sopcnt; short crash_count; sm_long_t munmap_len; sgmnt_addrs *csa; sgmnt_data_ptr_t csd; node_local_ptr_t cnl; struct shmid_ds shm_buf; struct sembuf sop[2], ftok_sop[2]; uint4 jnl_status; unix_db_info *udi; jnl_private_control *jpc; jnl_buffer_ptr_t jbp; shm_snapshot_t *ss_shm_ptr; uint4 ss_pid, onln_rlbk_pid, holder_pid; boolean_t was_crit; boolean_t safe_mode; /* Do not flush or take down shared memory. */ boolean_t bypassed_ftok = FALSE, bypassed_access = FALSE, may_bypass_ftok, inst_is_frozen, ftok_counter_halted, access_counter_halted; int secshrstat; intrpt_state_t prev_intrpt_state; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; jnl_status = 0; reg = gv_cur_region; /* Local copy */ /* early out for cluster regions * to avoid tripping the assert below. * Note: * This early out is consistent with VMS. It has been * noted that all of the gtcm assignments * to gv_cur_region should use the TP_CHANGE_REG * macro. This would also avoid the assert problem * and should be done eventually. */ if (dba_cm == reg->dyn.addr->acc_meth) return EXIT_NRM; udi = FILE_INFO(reg); csa = &udi->s_addrs; csd = csa->hdr; assert(csa == cs_addrs && csd == cs_data); if ((reg->open) && (dba_usr == csd->acc_meth)) { change_reg(); gvusr_rundown(); return EXIT_NRM; } /* If the process has standalone access, it has udi->grabbed_access_sem set to TRUE at this point. Note that down in a local * variable as the udi->grabbed_access_sem is set to TRUE even for non-standalone access below and hence we can't rely on * that later to determine if the process had standalone access or not when it entered this function. We need to guarantee * that none else access database file header when semid/shmid fields are reset. We already have created ftok semaphore in * db_init or, mu_rndwn_file and did not remove it. So just lock it. We do it in blocking mode. */ have_standalone_access = udi->grabbed_access_sem; /* process holds standalone access */ DEFER_INTERRUPTS(INTRPT_IN_GDS_RUNDOWN, prev_intrpt_state); ESTABLISH_NORET(gds_rundown_ch, err_caught); if (err_caught) { REVERT; WITH_CH(gds_rundown_ch, gds_rundown_err_cleanup(have_standalone_access), 0); ENABLE_INTERRUPTS(INTRPT_IN_GDS_RUNDOWN, prev_intrpt_state); DEBUG_ONLY(ok_to_UNWIND_in_exit_handling = FALSE); return EXIT_ERR; } assert(reg->open); /* if we failed to open, dbinit_ch should have taken care of proper clean up */ assert(!reg->opening); /* see comment above */ assert((dba_bg == csd->acc_meth) || (dba_mm == csd->acc_meth)); is_mm = (dba_bg != csd->acc_meth); assert(!csa->hold_onto_crit || (csa->now_crit && jgbl.onlnrlbk)); /* If we are online rollback, we should already be holding crit and should release it only at the end of this module. This * is usually done by noting down csa->now_crit in a local variable (was_crit) and using it whenever we are about to * grab_crit. But, there are instances (like mupip_set_journal.c) where we grab_crit but invoke gds_rundown without any * preceeding rel_crit. Such code relies on the fact that gds_rundown does rel_crit unconditionally (to get locks to a known * state). So, augment csa->now_crit with jgbl.onlnrlbk to track if we can rel_crit unconditionally or not in gds_rundown. */ was_crit = (csa->now_crit && jgbl.onlnrlbk); /* Cancel any pending flush timer for this region by this task */ canceled_flush_timer = FALSE; canceled_dbsync_timer = FALSE; CANCEL_DB_TIMERS(reg, csa, canceled_flush_timer, canceled_dbsync_timer); we_are_last_user = FALSE; inst_is_frozen = IS_REPL_INST_FROZEN && REPL_ALLOWED(csa->hdr); if (!csa->persistent_freeze) region_freeze(reg, FALSE, FALSE, FALSE); if (!was_crit) { rel_crit(reg); /* get locks to known state */ mutex_cleanup(reg); } /* The only process that can invoke gds_rundown while holding access control semaphore is RECOVER/ROLLBACK. All the others * (like MUPIP SET -FILE/MUPIP EXTEND would have invoked db_ipcs_reset() before invoking gds_rundown (from * mupip_exit_handler). The only exception is when these processes encounter a terminate signal and they reach * mupip_exit_handler while holding access control semaphore. Assert accordingly. */ assert(!have_standalone_access || mupip_jnl_recover || process_exiting); /* If we have standalone access, then ensure that a concurrent online rollback cannot be running at the same time as it * needs the access control lock as well. The only expection is we are online rollback and currently running down. */ cnl = csa->nl; onln_rlbk_pid = cnl->onln_rlbk_pid; assert(!have_standalone_access || mupip_jnl_recover || !onln_rlbk_pid || !is_proc_alive(onln_rlbk_pid, 0)); if (!have_standalone_access) { if (-1 == (ftok_semval = semctl(udi->ftok_semid, DB_COUNTER_SEM, GETVAL))) /* Check # of procs counted on FTOK */ { save_errno = errno; assert(FALSE); rts_error_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_SYSCALL, 5, RTS_ERROR_TEXT("gds_rundown SEMCTL failed to get ftok_semval"), CALLFROM, errno); } may_bypass_ftok = CAN_BYPASS(ftok_semval, csd, inst_is_frozen); /* Do we need a blocking wait? */ /* We need to guarantee that no one else access database file header when semid/shmid fields are reset. * We already have created ftok semaphore in db_init or mu_rndwn_file and did not remove it. So just lock it. */ if (!ftok_sem_lock(reg, may_bypass_ftok)) { if (may_bypass_ftok) { /* We did a non-blocking wait. It's ok to proceed without locking */ bypassed_ftok = TRUE; holder_pid = semctl(udi->ftok_semid, DB_CONTROL_SEM, GETPID); if ((uint4)-1 == holder_pid) rts_error_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_SYSCALL, 5, RTS_ERROR_TEXT("gds_rundown SEMCTL failed to get holder_pid"), CALLFROM, errno); if (!IS_GTM_IMAGE) /* MUMPS processes should not flood syslog with bypass messages. */ { send_msg_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_RESRCINTRLCKBYPAS, 10, LEN_AND_STR(gtmImageNames[image_type].imageName), process_id, LEN_AND_LIT("FTOK"), REG_LEN_STR(reg), DB_LEN_STR(reg), holder_pid); send_msg_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_TEXT, 2, LEN_AND_LIT("FTOK bypassed at rundown")); } } else { /* We did a blocking wait but something bad happened. */ FTOK_TRACE(csa, csa->ti->curr_tn, ftok_ops_lock, process_id); rts_error_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_DBFILERR, 2, DB_LEN_STR(reg)); } } sop[0].sem_num = DB_CONTROL_SEM; sop[0].sem_op = 0; /* Wait for 0 */ sop[1].sem_num = DB_CONTROL_SEM; sop[1].sem_op = 1; /* Lock */ sopcnt = 2; sop[0].sem_flg = sop[1].sem_flg = SEM_UNDO | IPC_NOWAIT; /* Don't wait the first time thru */ SEMOP(udi->semid, sop, sopcnt, status, NO_WAIT); if (0 != status) { save_errno = errno; /* Check # of processes counted on access sem. */ if (-1 == (semval = semctl(udi->semid, DB_COUNTER_SEM, GETVAL))) { assert(FALSE); rts_error_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_SYSCALL, 5, RTS_ERROR_TEXT("gds_rundown SEMCTL failed to get semval"), CALLFROM, errno); } bypassed_access = CAN_BYPASS(semval, csd, inst_is_frozen) || onln_rlbk_pid || csd->file_corrupt; /* Before attempting again in the blocking mode, see if the holding process is an online rollback. * If so, it is likely we won't get the access control semaphore anytime soon. In that case, we * are better off skipping rundown and continuing with sanity cleanup and exit. */ holder_pid = semctl(udi->semid, DB_CONTROL_SEM, GETPID); if ((uint4)-1 == holder_pid) rts_error_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_SYSCALL, 5, RTS_ERROR_TEXT("gds_rundown SEMCTL failed to get holder_pid"), CALLFROM, errno); if (!bypassed_access) { /* We couldn't get it in one shot-- see if we already have it */ if (holder_pid == process_id) { send_msg_csa(CSA_ARG(csa) VARLSTCNT(5) MAKE_MSG_INFO(ERR_CRITSEMFAIL), 2, DB_LEN_STR(reg), ERR_RNDWNSEMFAIL); REVERT; ENABLE_INTERRUPTS(INTRPT_IN_GDS_RUNDOWN, prev_intrpt_state); assert(FALSE); return EXIT_ERR; } if (EAGAIN != save_errno) { assert(FALSE); rts_error_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_SYSCALL, 5, RTS_ERROR_TEXT("gds_rundown SEMOP on access control semaphore"), CALLFROM, save_errno); } sop[0].sem_flg = sop[1].sem_flg = SEM_UNDO; /* Try again - blocking this time */ SEMOP(udi->semid, sop, 2, status, FORCED_WAIT); if (-1 == status) /* We couldn't get it at all.. */ rts_error_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_SYSCALL, 5, RTS_ERROR_TEXT("gds_rundown SEMOP on access control semaphore"), CALLFROM, errno); } else if (!IS_GTM_IMAGE) { send_msg_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_RESRCINTRLCKBYPAS, 10, LEN_AND_STR(gtmImageNames[image_type].imageName), process_id, LEN_AND_LIT("access control"), REG_LEN_STR(reg), DB_LEN_STR(reg), holder_pid); send_msg_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_TEXT, 2, LEN_AND_LIT("Access control bypassed at rundown")); } udi->grabbed_access_sem = !bypassed_access; } } /* else we we hold the access control semaphore and therefore have standalone access. We do not release it now - we * release it later in mupip_exit_handler.c. Since we already hold the access control semaphore, we don't need the * ftok semaphore and trying it could cause deadlock */ /* Note that in the case of online rollback, "udi->grabbed_access_sem" (and in turn "have_standalone_access") is TRUE. * But there could be other processes still having the database open so we cannot safely reset the halted fields. */ if (have_standalone_access && !jgbl.onlnrlbk) csd->ftok_counter_halted = csd->access_counter_halted = FALSE; ftok_counter_halted = csd->ftok_counter_halted; access_counter_halted = csd->access_counter_halted; /* If we bypassed any of the semaphores, activate safe mode. * Also, if the replication instance is frozen and this db has replication turned on (which means * no flushes of dirty buffers to this db can happen while the instance is frozen) activate safe mode. */ ok_to_write_pfin = !(bypassed_access || bypassed_ftok || inst_is_frozen); safe_mode = !ok_to_write_pfin || ftok_counter_halted || access_counter_halted; /* At this point we are guaranteed no one else is doing a db_init/rundown as we hold the access control semaphore */ assert(csa->ref_cnt); /* decrement private ref_cnt before shared ref_cnt decrement. */ csa->ref_cnt--; /* Currently journaling logic in gds_rundown() in VMS relies on this order to detect last writer */ assert(!csa->ref_cnt); --cnl->ref_cnt; if (memcmp(cnl->now_running, gtm_release_name, gtm_release_name_len + 1)) { /* VERMISMATCH condition. Possible only if DSE */ assert(dse_running); vermismatch = TRUE; } else vermismatch = FALSE; if (-1 == shmctl(udi->shmid, IPC_STAT, &shm_buf)) { save_errno = errno; rts_error_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_SYSCALL, 5, RTS_ERROR_TEXT("gds_rundown shmctl"), CALLFROM, save_errno); } else we_are_last_user = (1 == shm_buf.shm_nattch) && !vermismatch && !safe_mode; /* recover => one user except ONLINE ROLLBACK, or standalone with frozen instance */ assert(!have_standalone_access || we_are_last_user || jgbl.onlnrlbk || inst_is_frozen); if (-1 == (semval = semctl(udi->semid, DB_COUNTER_SEM, GETVAL))) rts_error_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_SYSCALL, 5, RTS_ERROR_TEXT("gds_rundown SEMCTL failed to get semval"), CALLFROM, errno); /* There's one writer left and I am it */ assert(reg->read_only || semval >= 0); unsafe_last_writer = (DB_COUNTER_SEM_INCR == semval) && (FALSE == reg->read_only) && !vermismatch; we_are_last_writer = unsafe_last_writer && !safe_mode; assert(!we_are_last_writer || !safe_mode); assert(!we_are_last_user || !safe_mode); /* recover + R/W region => one writer except ONLINE ROLLBACK, or standalone with frozen instance, leading to safe_mode */ assert(!(have_standalone_access && !reg->read_only) || we_are_last_writer || jgbl.onlnrlbk || inst_is_frozen); GTM_WHITE_BOX_TEST(WBTEST_ANTIFREEZE_JNLCLOSE, we_are_last_writer, 1); /* Assume we are the last writer to invoke wcs_flu */ if (!have_standalone_access && (-1 == (ftok_semval = semctl(udi->ftok_semid, DB_COUNTER_SEM, GETVAL)))) rts_error_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_SYSCALL, 5, RTS_ERROR_TEXT("gds_rundown SEMCTL failed to get ftok_semval"), CALLFROM, errno); if (NULL != csa->ss_ctx) ss_destroy_context(csa->ss_ctx); /* SS_MULTI: If multiple snapshots are supported, then we have to run through each of the snapshots */ assert(1 == MAX_SNAPSHOTS); ss_shm_ptr = (shm_snapshot_ptr_t)SS_GETSTARTPTR(csa); ss_pid = ss_shm_ptr->ss_info.ss_pid; is_cur_process_ss_initiator = (process_id == ss_pid); if (ss_pid && (is_cur_process_ss_initiator || we_are_last_user)) { /* Try getting snapshot crit latch. If we don't get latch, we won't hang for eternity and will skip * doing the orphaned snapshot cleanup. It will be cleaned up eventually either by subsequent MUPIP * INTEG or by a MUPIP RUNDOWN. */ if (ss_get_lock_nowait(reg) && (ss_pid == ss_shm_ptr->ss_info.ss_pid) && (is_cur_process_ss_initiator || !is_proc_alive(ss_pid, 0))) { ss_release(NULL); ss_release_lock(reg); } } /* If cnl->donotflush_dbjnl is set, it means mupip recover/rollback was interrupted and therefore we need not flush * shared memory contents to disk as they might be in an inconsistent state. Moreover, any more flushing will only cause * future rollback to undo more journal records (PBLKs). In this case, we will go ahead and remove shared memory (without * flushing the contents) in this routine. A reissue of the recover/rollback command will restore the database to a * consistent state. */ if (!cnl->donotflush_dbjnl && !reg->read_only && !vermismatch) { /* If we had an orphaned block and were interrupted, set wc_blocked so we can invoke wcs_recover. Do it ONLY * if there is NO concurrent online rollback running (as we need crit to set wc_blocked) */ if (csa->wbuf_dqd && !is_mm) { /* If we had an orphaned block and were interrupted, mupip_exit_handler will invoke secshr_db_clnup which * will clear this field and so we should never come to gds_rundown with a non-zero wbuf_dqd. The only * exception is if we are recover/rollback in which case gds_rundown (from mur_close_files) is invoked * BEFORE secshr_db_clnup in mur_close_files. * Note: It is NOT possible for online rollback to reach here with wbuf_dqd being non-zero. This is because * the moment we apply the first PBLK, we stop all interrupts and hence can never be interrupted in * wcs_wtstart or wcs_get_space. Assert accordingly. */ assert(mupip_jnl_recover && !jgbl.onlnrlbk && !safe_mode); if (!was_crit) grab_crit(reg); SET_TRACEABLE_VAR(cnl->wc_blocked, TRUE); BG_TRACE_PRO_ANY(csa, wcb_gds_rundown); send_msg_csa(CSA_ARG(csa) VARLSTCNT(8) ERR_WCBLOCKED, 6, LEN_AND_LIT("wcb_gds_rundown"), process_id, &csa->ti->curr_tn, DB_LEN_STR(reg)); csa->wbuf_dqd = 0; wcs_recover(reg); BG_TRACE_PRO_ANY(csa, lost_block_recovery); if (!was_crit) rel_crit(reg); } if (JNL_ENABLED(csd) && IS_GTCM_GNP_SERVER_IMAGE) originator_prc_vec = NULL; /* If we are the last writing user, then everything must be flushed */ if (we_are_last_writer) { /* Time to flush out all of our buffers */ assert(!safe_mode); if (is_mm) { MM_DBFILEXT_REMAP_IF_NEEDED(csa, reg); cnl->remove_shm = TRUE; } if (cnl->wc_blocked && jgbl.onlnrlbk) { /* if the last update done by online rollback was not committed in the normal code-path but was * completed by secshr_db_clnup, wc_blocked will be set to TRUE. But, since online rollback never * invokes grab_crit (since csa->hold_onto_crit is set to TRUE), wcs_recover is never invoked. This * could result in the last update never getting flushed to the disk and if online rollback happened * to be the last writer then the shared memory will be flushed and removed and the last update will * be lost. So, force wcs_recover if we find ourselves in such a situation. But, wc_blocked is * possible only if phase1 or phase2 errors are induced using white box test cases */ assert(WB_COMMIT_ERR_ENABLED); wcs_recover(reg); } /* Note WCSFLU_SYNC_EPOCH ensures the epoch is synced to the journal and indirectly * also ensures that the db is fsynced. We don't want to use it in the calls to * wcs_flu() from t_end() and tp_tend() since we can defer it to out-of-crit there. * In this case, since we are running down, we don't have any such option. */ cnl->remove_shm = wcs_flu(WCSFLU_FLUSH_HDR | WCSFLU_WRITE_EPOCH | WCSFLU_SYNC_EPOCH); /* Since we_are_last_writer, we should be guaranteed that wcs_flu() did not change csd, (in * case of MM for potential file extension), even if it did a grab_crit(). Therefore, make * sure that's true. */ assert(csd == csa->hdr); assert(0 == memcmp(csd->label, GDS_LABEL, GDS_LABEL_SZ - 1)); } else if (((canceled_flush_timer && (0 > cnl->wcs_timers)) || canceled_dbsync_timer) && !inst_is_frozen) { /* canceled pending db or jnl flush timers - flush database and journal buffers to disk */ if (!was_crit) grab_crit(reg); /* we need to sync the epoch as the fact that there is no active pending flush timer implies * there will be noone else who will flush the dirty buffers and EPOCH to disk in a timely fashion */ wcs_flu(WCSFLU_FLUSH_HDR | WCSFLU_WRITE_EPOCH | WCSFLU_SYNC_EPOCH); if (!was_crit) rel_crit(reg); assert((dba_mm == cs_data->acc_meth) || (csd == cs_data)); csd = cs_data; /* In case this is MM and wcs_flu() remapped an extended database, reset csd */ } /* Do rundown journal processing after buffer flushes since they require jnl to be open */ if (JNL_ENABLED(csd)) { /* the following tp_change_reg() is not needed due to the assert csa == cs_addrs at the beginning * of gds_rundown(), but just to be safe. To be removed by 2002!! --- nars -- 2001/04/25. */ tp_change_reg(); /* call this because jnl_ensure_open checks cs_addrs rather than gv_cur_region */ jpc = csa->jnl; jbp = jpc->jnl_buff; if (jbp->fsync_in_prog_latch.u.parts.latch_pid == process_id) { assert(FALSE); COMPSWAP_UNLOCK(&jbp->fsync_in_prog_latch, process_id, 0, LOCK_AVAILABLE, 0); } if (jbp->io_in_prog_latch.u.parts.latch_pid == process_id) { assert(FALSE); COMPSWAP_UNLOCK(&jbp->io_in_prog_latch, process_id, 0, LOCK_AVAILABLE, 0); } if ((((NOJNL != jpc->channel) && !JNL_FILE_SWITCHED(jpc)) || we_are_last_writer && (0 != cnl->jnl_file.u.inode)) && ok_to_write_pfin) { /* We need to close the journal file cleanly if we have the latest generation journal file open * or if we are the last writer and the journal file is open in shared memory (not necessarily * by ourselves e.g. the only process that opened the journal got shot abnormally) * Note: we should not infer anything from the shared memory value of cnl->jnl_file.u.inode * if we are not the last writer as it can be concurrently updated. */ if (!was_crit) grab_crit(reg); if (JNL_ENABLED(csd)) { SET_GBL_JREC_TIME; /* jnl_ensure_open/jnl_put_jrt_pini/pfin/jnl_file_close all need it */ /* Before writing to jnlfile, adjust jgbl.gbl_jrec_time if needed to maintain time order * of jnl records. This needs to be done BEFORE the jnl_ensure_open as that could write * journal records (if it decides to switch to a new journal file). */ ADJUST_GBL_JREC_TIME(jgbl, jbp); jnl_status = jnl_ensure_open(); if (0 == jnl_status) { /* If we_are_last_writer, we would have already done a wcs_flu() which would * have written an epoch record and we are guaranteed no further updates * since we are the last writer. So, just close the journal. * If the freeaddr == post_epoch_freeaddr, wcs_flu may have skipped writing * a pini, so allow for that. */ assert(!jbp->before_images || is_mm || !we_are_last_writer || (0 != jpc->pini_addr) || jgbl.mur_extract || (jpc->jnl_buff->freeaddr == jpc->jnl_buff->post_epoch_freeaddr)); /* If we haven't written a pini, let jnl_file_close write the pini/pfin. */ if (!jgbl.mur_extract && (0 != jpc->pini_addr)) jnl_put_jrt_pfin(csa); /* If not the last writer and no pending flush timer left, do jnl flush now */ if (!we_are_last_writer && (0 > cnl->wcs_timers)) { if (SS_NORMAL == (jnl_status = jnl_flush(reg))) { assert(jbp->freeaddr == jbp->dskaddr); jnl_fsync(reg, jbp->dskaddr); assert(jbp->fsync_dskaddr == jbp->dskaddr); } else { send_msg_csa(CSA_ARG(csa) VARLSTCNT(9) ERR_JNLFLUSH, 2, JNL_LEN_STR(csd), ERR_TEXT, 2, RTS_ERROR_TEXT("Error with journal flush in gds_rundown"), jnl_status); assert(NOJNL == jpc->channel);/* jnl file lost has been triggered */ /* In this routine, all code that follows from here on does not * assume anything about the journaling characteristics of this * database so it is safe to continue execution even though * journaling got closed in the middle. */ } } jnl_file_close(reg, we_are_last_writer, FALSE); } else send_msg_csa(CSA_ARG(csa) VARLSTCNT(6) jnl_status, 4, JNL_LEN_STR(csd), DB_LEN_STR(reg)); } if (!was_crit) rel_crit(reg); } } if (we_are_last_writer) /* Flush the fileheader last and harden the file to disk */ { if (!was_crit) grab_crit(reg); /* To satisfy crit requirement in fileheader_sync() */ memset(csd->machine_name, 0, MAX_MCNAMELEN); /* clear the machine_name field */ if (!have_standalone_access && we_are_last_user) { /* mupip_exit_handler will do this after mur_close_file */ csd->semid = INVALID_SEMID; csd->shmid = INVALID_SHMID; csd->gt_sem_ctime.ctime = 0; csd->gt_shm_ctime.ctime = 0; } fileheader_sync(reg); if (!was_crit) rel_crit(reg); if (!is_mm) { GTM_DB_FSYNC(csa, udi->fd, rc); /* Sync it all */ if (-1 == rc) { rts_error_csa(CSA_ARG(csa) VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, RTS_ERROR_TEXT("Error during file sync at close"), errno); } } else { /* Now do final MM file sync before exit */ assert(csa->ti->total_blks == csa->total_blks); #ifdef _AIX GTM_DB_FSYNC(csa, udi->fd, rc); if (-1 == rc) #else if (-1 == MSYNC((caddr_t)csa->db_addrs[0], (caddr_t)csa->db_addrs[1])) #endif { rts_error_csa(CSA_ARG(csa) VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, RTS_ERROR_TEXT("Error during file sync at close"), errno); } } } else if (unsafe_last_writer && !cnl->lastwriterbypas_msg_issued) { send_msg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_LASTWRITERBYPAS, 2, DB_LEN_STR(reg)); cnl->lastwriterbypas_msg_issued = TRUE; } } /* end if (!reg->read_only && !cnl->donotflush_dbjnl) */ /* We had canceled all db timers at start of rundown. In case as part of rundown (wcs_flu above), we had started * any timers, cancel them BEFORE setting reg->open to FALSE (assert in wcs_clean_dbsync relies on this). */ CANCEL_DB_TIMERS(reg, csa, canceled_flush_timer, canceled_dbsync_timer); if (reg->read_only && we_are_last_user && !have_standalone_access && cnl->remove_shm) { /* mupip_exit_handler will do this after mur_close_file */ db_ipcs.semid = INVALID_SEMID; db_ipcs.shmid = INVALID_SHMID; db_ipcs.gt_sem_ctime = 0; db_ipcs.gt_shm_ctime = 0; db_ipcs.fn_len = reg->dyn.addr->fname_len; memcpy(db_ipcs.fn, reg->dyn.addr->fname, reg->dyn.addr->fname_len); db_ipcs.fn[reg->dyn.addr->fname_len] = 0; /* request gtmsecshr to flush. read_only cannot flush itself */ WAIT_FOR_REPL_INST_UNFREEZE_SAFE(csa); if (!csa->read_only_fs) { secshrstat = send_mesg2gtmsecshr(FLUSH_DB_IPCS_INFO, 0, (char *)NULL, 0); if (0 != secshrstat) rts_error_csa(CSA_ARG(csa) VARLSTCNT(8) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, RTS_ERROR_TEXT("gtmsecshr failed to update database file header")); } } /* Done with file now, close it */ CLOSEFILE_RESET(udi->fd, rc); /* resets "udi->fd" to FD_INVALID */ if (-1 == rc) { rts_error_csa(CSA_ARG(csa) VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, LEN_AND_LIT("Error during file close"), errno); } /* Unmap storage if mm mode but only the part that is not the fileheader (so shows up in dumps) */ # if !defined(_AIX) if (is_mm && (NULL != csa->db_addrs[0])) { assert(csa->db_addrs[1] > csa->db_addrs[0]); munmap_len = (sm_long_t)(csa->db_addrs[1] - csa->db_addrs[0]); if (0 < munmap_len) munmap((caddr_t)(csa->db_addrs[0]), (size_t)(munmap_len)); } # endif /* Detach our shared memory while still under lock so reference counts will be correct for the next process to run down * this region. In the process also get the remove_shm status from node_local before detaching. * If cnl->donotflush_dbjnl is TRUE, it means we can safely remove shared memory without compromising data * integrity as a reissue of recover will restore the database to a consistent state. */ remove_shm = !vermismatch && (cnl->remove_shm || cnl->donotflush_dbjnl); /* We are done with online rollback on this region. Indicate to other processes by setting the onln_rlbk_pid to 0. * Do it before releasing crit (t_end relies on this ordering when accessing cnl->onln_rlbk_pid). */ if (jgbl.onlnrlbk) cnl->onln_rlbk_pid = 0; rel_crit(reg); /* Since we are about to detach from the shared memory, release crit and reset onln_rlbk_pid */ /* If we had skipped flushing journal and database buffers due to a concurrent online rollback, increment the counter * indicating that in the shared memory so that online rollback can report the # of such processes when it shuts down. * The same thing is done for both FTOK and access control semaphores when there are too many MUMPS processes. */ if (safe_mode) /* indicates flushing was skipped */ { if (bypassed_access) cnl->dbrndwn_access_skip++; /* Access semaphore can be bypassed during online rollback */ if (bypassed_ftok) cnl->dbrndwn_ftok_skip++; } if (jgbl.onlnrlbk) csa->hold_onto_crit = FALSE; GTM_WHITE_BOX_TEST(WBTEST_HOLD_SEM_BYPASS, cnl->wbox_test_seq_num, 0); status = shmdt((caddr_t)cnl); csa->nl = NULL; /* dereferencing nl after detach is not right, so we set it to NULL so that we can test before dereference*/ /* Note that although csa->nl is NULL, we use CSA_ARG(csa) below (not CSA_ARG(NULL)) to be consistent with similar * usages before csa->nl became NULL. The "is_anticipatory_freeze_needed" function (which is in turn called by the * CHECK_IF_FREEZE_ON_ERROR_NEEDED macro) does a check of csa->nl before dereferencing shared memory contents so * we are safe passing "csa". */ if (-1 == status) send_msg_csa(CSA_ARG(csa) VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, LEN_AND_LIT("Error during shmdt"), errno); REMOVE_CSA_FROM_CSADDRSLIST(csa); /* remove "csa" from list of open regions (cs_addrs_list) */ reg->open = FALSE; /* If file is still not in good shape, die here and now before we get rid of our storage */ assertpro(0 == csa->wbuf_dqd); ipc_deleted = FALSE; /* If we are the very last user, remove shared storage id and the semaphores */ if (we_are_last_user) { /* remove shared storage, only if last writer to rundown did a successful wcs_flu() */ assert(!vermismatch); if (remove_shm) { ipc_deleted = TRUE; if (0 != shm_rmid(udi->shmid)) rts_error_csa(CSA_ARG(csa) VARLSTCNT(8) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, RTS_ERROR_TEXT("Unable to remove shared memory")); /* Note that we no longer have a new shared memory. Currently only used/usable for standalone rollback. */ udi->new_shm = FALSE; /* mupip recover/rollback don't release the semaphore here, but do it later in db_ipcs_reset (invoked from * mur_close_files()) */ if (!have_standalone_access) { if (0 != sem_rmid(udi->semid)) rts_error_csa(CSA_ARG(csa) VARLSTCNT(8) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, RTS_ERROR_TEXT("Unable to remove semaphore")); udi->new_sem = FALSE; /* Note that we no longer have a new semaphore */ udi->grabbed_access_sem = FALSE; udi->counter_acc_incremented = FALSE; } } else if (is_src_server || is_updproc) { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(6) ERR_DBRNDWNWRN, 4, DB_LEN_STR(reg), process_id, process_id); send_msg_csa(CSA_ARG(csa) VARLSTCNT(6) ERR_DBRNDWNWRN, 4, DB_LEN_STR(reg), process_id, process_id); } else send_msg_csa(CSA_ARG(csa) VARLSTCNT(6) ERR_DBRNDWNWRN, 4, DB_LEN_STR(reg), process_id, process_id); } else { assert(!have_standalone_access || jgbl.onlnrlbk || safe_mode); if (!jgbl.onlnrlbk && !have_standalone_access) { /* If we were writing, get rid of our writer access count semaphore */ if (!reg->read_only) { if (!access_counter_halted) { save_errno = do_semop(udi->semid, DB_COUNTER_SEM, -DB_COUNTER_SEM_INCR, SEM_UNDO); if (0 != save_errno) rts_error_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_SYSCALL, 5, RTS_ERROR_TEXT("gds_rundown access control semaphore decrement"), CALLFROM, save_errno); } udi->counter_acc_incremented = FALSE; } assert(safe_mode || !bypassed_access); /* Now remove the rundown lock */ if (!bypassed_access) { if (0 != (save_errno = do_semop(udi->semid, DB_CONTROL_SEM, -1, SEM_UNDO))) rts_error_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_SYSCALL, 5, RTS_ERROR_TEXT("gds_rundown access control semaphore release"), CALLFROM, save_errno); udi->grabbed_access_sem = FALSE; } } /* else access control semaphore will be released in db_ipcs_reset */ } if (!have_standalone_access) { if (bypassed_ftok) { if (!ftok_counter_halted) if (0 != (save_errno = do_semop(udi->ftok_semid, DB_COUNTER_SEM, -DB_COUNTER_SEM_INCR, SEM_UNDO))) rts_error_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_DBFILERR, 2, DB_LEN_STR(reg)); } else if (!ftok_sem_release(reg, !ftok_counter_halted, FALSE)) { FTOK_TRACE(csa, csa->ti->curr_tn, ftok_ops_release, process_id); rts_error_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_DBFILERR, 2, DB_LEN_STR(reg)); } udi->grabbed_ftok_sem = FALSE; udi->counter_ftok_incremented = FALSE; } ENABLE_INTERRUPTS(INTRPT_IN_GDS_RUNDOWN, prev_intrpt_state); if (!ipc_deleted) { GET_CUR_TIME(time_str); if (is_src_server) gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(8) ERR_IPCNOTDEL, 6, CTIME_BEFORE_NL, time_str, LEN_AND_LIT("Source server"), REG_LEN_STR(reg)); if (is_updproc) gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(8) ERR_IPCNOTDEL, 6, CTIME_BEFORE_NL, time_str, LEN_AND_LIT("Update process"), REG_LEN_STR(reg)); if (mupip_jnl_recover && (!jgbl.onlnrlbk || !we_are_last_user)) { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(8) ERR_IPCNOTDEL, 6, CTIME_BEFORE_NL, time_str, LEN_AND_LIT("Mupip journal process"), REG_LEN_STR(reg)); send_msg_csa(CSA_ARG(csa) VARLSTCNT(8) ERR_IPCNOTDEL, 6, CTIME_BEFORE_NL, time_str, LEN_AND_LIT("Mupip journal process"), REG_LEN_STR(reg)); } } REVERT; return EXIT_NRM; }
/************************************************************************************************** * Routine to perform string-level case conversion to "upper", "lower" and "title" case. * Since ICU only supports API using UTF-16 representation, case conversion of UTF-8 strings involves * encoding conversion as described below: * 1. First, the UTF-8 source string is converted to UTF-16 representation (u_strFromUTF8()) * which is stored in a local buffer of size MAX_ZCONVBUFF. If this space is not sufficient, * we try to allocate it in heap. * 2. Since case conversion may expand the string, we compute the desired space required by * preflighting the ICU case conversion API and then allocate the space before performing * the real conversion. * 3. Translating the converted UTF-16 string back to UTF-8 is done in stringpool (with similar * preflighting to compute the required space. * NOTE: * Malloc is used only if the size exceeds 2K characters (a very unlikely situation esp. with * case conversion). * ***************************************************************************************************/ void op_fnzconvert2(mval *src, mval *kase, mval *dst) { int index; int32_t src_ustr_len, src_chlen, dst_chlen, ulen, dstlen = 0; UErrorCode status; char *dstbase; UChar src_ustr[MAX_ZCONVBUFF], dst_ustr[MAX_ZCONVBUFF], *src_ustr_ptr, *dst_ustr_ptr; intrpt_state_t prev_intrpt_state; DEFER_INTERRUPTS(INTRPT_IN_FUNC_WITH_MALLOC, prev_intrpt_state); MV_FORCE_STR(kase); if (-1 == (index = verify_case(&kase->str))) { ENABLE_INTERRUPTS(INTRPT_IN_FUNC_WITH_MALLOC, prev_intrpt_state); rts_error_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_BADCASECODE, 2, kase->str.len, kase->str.addr); } MV_FORCE_STR(src); /* allocate stringpool */ if (!gtm_utf8_mode) { dstlen = src->str.len; ENSURE_STP_FREE_SPACE(dstlen); dstbase = (char *)stringpool.free; assert(NULL != casemaps[index].m); (*casemaps[index].m)((unsigned char *)dstbase, (unsigned char *)src->str.addr, dstlen); } else if (0 != src->str.len) { MV_FORCE_LEN_STRICT(src); if (2 * src->str.char_len <= MAX_ZCONVBUFF) { /* Check if the stack buffer is sufficient considering the worst case where all characters are surrogate pairs, each of which needs 2 UChars */ src_ustr_ptr = src_ustr; src_ustr_len = MAX_ZCONVBUFF; } else { /* To avoid preflight, allocate (2 * lenght of src->str.char_len). */ src_ustr_len = 2 * src->str.char_len; src_ustr_ptr = (UChar*)malloc(src_ustr_len * SIZEOF(UChar)); } /* Convert UTF-8 src to UTF-16 (UChar*) representation */ status = U_ZERO_ERROR; u_strFromUTF8(src_ustr_ptr, src_ustr_len, &src_chlen, src->str.addr, src->str.len, &status); if (U_FAILURE(status)) { RELEASE_IF_NOT_LOCAL(src_ustr_ptr, src_ustr); if (U_ILLEGAL_CHAR_FOUND == status || U_INVALID_CHAR_FOUND == status) utf8_len_strict((unsigned char *)src->str.addr, src->str.len); /* to report BADCHAR error */ ENABLE_INTERRUPTS(INTRPT_IN_FUNC_WITH_MALLOC, prev_intrpt_state); rts_error_csa(CSA_ARG(NULL) VARLSTCNT(3) ERR_ICUERROR, 1, status); /* ICU said bad, we say good or don't recognize error*/ } status = U_ZERO_ERROR; dst_ustr_ptr = dst_ustr; dst_chlen = (*casemaps[index].u)(dst_ustr_ptr, MAX_ZCONVBUFF, src_ustr_ptr, src_chlen, NULL, &status); if ( U_BUFFER_OVERFLOW_ERROR == status ) { status = U_ZERO_ERROR; dst_ustr_ptr = (UChar*)malloc(dst_chlen * SIZEOF(UChar)); /* Now, perform the real conversion with sufficient buffers */ dst_chlen = (*casemaps[index].u)(dst_ustr_ptr, dst_chlen, src_ustr_ptr, src_chlen, NULL, &status); } else if ( U_FILE_ACCESS_ERROR == status ) { RELEASE_IF_NOT_LOCAL(src_ustr_ptr, src_ustr); ENABLE_INTERRUPTS(INTRPT_IN_FUNC_WITH_MALLOC, prev_intrpt_state); rts_error_csa(CSA_ARG(NULL) VARLSTCNT(3) ERR_ICUERROR, 1, status); } RELEASE_IF_NOT_LOCAL(src_ustr_ptr, src_ustr); /* Fake the conversion from UTF-16 to UTF-8 to compute the required buffer size */ status = U_ZERO_ERROR; dstlen = 0; u_strToUTF8(NULL, 0, &dstlen, dst_ustr_ptr, dst_chlen, &status); assert(U_BUFFER_OVERFLOW_ERROR == status || U_SUCCESS(status)); if (MAX_STRLEN < dstlen) { RELEASE_IF_NOT_LOCAL(dst_ustr_ptr, dst_ustr); ENABLE_INTERRUPTS(INTRPT_IN_FUNC_WITH_MALLOC, prev_intrpt_state); rts_error_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_MAXSTRLEN); } ENSURE_STP_FREE_SPACE(dstlen); dstbase = (char *)stringpool.free; status = U_ZERO_ERROR; u_strToUTF8(dstbase, dstlen, &ulen, dst_ustr_ptr, dst_chlen, &status); if (U_FAILURE(status)) { RELEASE_IF_NOT_LOCAL(src_ustr_ptr, src_ustr); RELEASE_IF_NOT_LOCAL(dst_ustr_ptr, dst_ustr); ENABLE_INTERRUPTS(INTRPT_IN_FUNC_WITH_MALLOC, prev_intrpt_state); rts_error_csa(CSA_ARG(NULL) VARLSTCNT(3) ERR_ICUERROR, 1, status); /* ICU said bad, but same call above just returned OK */ } assertpro(ulen == dstlen); RELEASE_IF_NOT_LOCAL(dst_ustr_ptr, dst_ustr); } MV_INIT_STRING(dst, dstlen, dstbase); stringpool.free += dstlen; ENABLE_INTERRUPTS(INTRPT_IN_FUNC_WITH_MALLOC, prev_intrpt_state); }
int gtm_trigger(gv_trigger_t *trigdsc, gtm_trigger_parms *trigprm) { mval *lvvalue; lnr_tabent *lbl_offset_p; uchar_ptr_t transfer_addr; lv_val *lvval; mname_entry *mne_p; uint4 *indx_p; ht_ent_mname *tabent; boolean_t added; int clrlen, rc, i, unwinds; mval **lvvalarray; mv_stent *mv_st_ent; symval *new_symval; uint4 dollar_tlevel_start; stack_frame *fp, *fpprev; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; assert(!skip_dbtriggers); /* should not come here if triggers are not supposed to be invoked */ assert(trigdsc); assert(trigprm); assert((NULL != trigdsc->rtn_desc.rt_adr) || ((MV_STR & trigdsc->xecute_str.mvtype) && (0 != trigdsc->xecute_str.str.len) && (NULL != trigdsc->xecute_str.str.addr))); assert(dollar_tlevel); /* Determine if trigger needs to be compiled */ if (NULL == trigdsc->rtn_desc.rt_adr) { /* No routine hdr addr exists. Need to do compile */ if (0 != gtm_trigger_complink(trigdsc, TRUE)) { PRN_ERROR; /* Leave record of what error caused the compilation failure if any */ rts_error(VARLSTCNT(4) ERR_TRIGCOMPFAIL, 2, trigdsc->rtn_desc.rt_name.len, trigdsc->rtn_desc.rt_name.addr); } } assert(trigdsc->rtn_desc.rt_adr); assert(trigdsc->rtn_desc.rt_adr == CURRENT_RHEAD_ADR(trigdsc->rtn_desc.rt_adr)); /* Setup trigger environment stack frame(s) for execution */ if (!(frame_pointer->type & SFT_TRIGR)) { /* Create new trigger base frame first that back-stops stack unrolling and return to us */ if (GTM_TRIGGER_DEPTH_MAX < (gtm_trigger_depth + 1)) /* Verify we won't nest too deep */ rts_error(VARLSTCNT(3) ERR_MAXTRIGNEST, 1, GTM_TRIGGER_DEPTH_MAX); DBGTRIGR((stderr, "gtm_trigger: PUSH: frame_pointer 0x%016lx ctxt value: 0x%016lx\n", frame_pointer, ctxt)); /* Protect against interrupts while we have only a trigger base frame on the stack */ DEFER_INTERRUPTS(INTRPT_IN_TRIGGER_NOMANS_LAND); /* The current frame invoked a trigger. We cannot return to it for a TP restart or other reason unless * either the total operation (including trigger) succeeds and we unwind normally or unless the mpc is reset * (like what happens in various error or restart conditions) because right now it returns to where a database * command (KILL, SET or ZTRIGGER) was entered. Set flag in the frame to prevent MUM_TSTART unless the frame gets * reset. */ frame_pointer->flags |= SFF_TRIGR_CALLD; /* Do not return to this frame via MUM_TSTART */ DBGTRIGR((stderr, "gtm_trigger: Setting SFF_TRIGR_CALLD in frame 0x"lvaddr"\n", frame_pointer)); base_frame(trigdsc->rtn_desc.rt_adr); /* Finish base frame initialization - reset mpc/context to return to us without unwinding base frame */ frame_pointer->type |= SFT_TRIGR; # if defined(__hpux) && defined(__hppa) /* For HPUX-HPPA (PA-RISC), we use longjmp() to return to gtm_trigger() to avoid some some space register * corruption issues. Use call-ins already existing mechanism for doing this. Although we no longer support * HPUX-HPPA for triggers due to some unlocated space register error, this code (effectively always ifdef'd * out) left in in case it gets resurrected in the future (01/2010 SE). */ frame_pointer->mpc = CODE_ADDRESS(ci_ret_code); frame_pointer->ctxt = GTM_CONTEXT(ci_ret_code); # else frame_pointer->mpc = CODE_ADDRESS(gtm_levl_ret_code); frame_pointer->ctxt = GTM_CONTEXT(gtm_levl_ret_code); # endif /* This base stack frame is also where we save environmental info for all triggers invoked at this stack level. * Subsequent triggers fired at this level in this trigger invocation need only reinitialize a few things but * can avoid "the big save". */ if (NULL == trigr_symval_list) { /* No available symvals for use with this trigger, create one */ symbinit(); /* Initialize a symbol table the trigger will use */ curr_symval->trigr_symval = TRUE; /* Mark as trigger symval so will be saved not decommissioned */ } else { /* Trigger symval is available for reuse */ new_symval = trigr_symval_list; assert(new_symval->trigr_symval); trigr_symval_list = new_symval->last_tab; /* dequeue new curr_symval from list */ REINIT_SYMVAL_BLK(new_symval, curr_symval); curr_symval = new_symval; PUSH_MV_STENT(MVST_STAB); mv_chain->mv_st_cont.mvs_stab = new_symval; /* So unw_mv_ent() can requeue it for later use */ } /* Push our trigger environment save mv_stent onto the chain */ PUSH_MV_STENT(MVST_TRIGR); mv_st_ent = mv_chain; /* Initialize the mv_stent elements processed by stp_gcol which can be called by either op_gvsavtarg() or * by the extnam saving code below. This initialization keeps stp_gcol - should it be called - from attempting * to process unset fields filled with garbage in them as valid mstr address/length pairs. */ mv_st_ent->mv_st_cont.mvs_trigr.savtarg.str.len = 0; mv_st_ent->mv_st_cont.mvs_trigr.savextref.len = 0; mv_st_ent->mv_st_cont.mvs_trigr.dollar_etrap_save.str.len = 0; mv_st_ent->mv_st_cont.mvs_trigr.dollar_ztrap_save.str.len = 0; mv_st_ent->mv_st_cont.mvs_trigr.saved_dollar_truth = dollar_truth; op_gvsavtarg(&mv_st_ent->mv_st_cont.mvs_trigr.savtarg); if (extnam_str.len) { ENSURE_STP_FREE_SPACE(extnam_str.len); mv_st_ent->mv_st_cont.mvs_trigr.savextref.addr = (char *)stringpool.free; memcpy(mv_st_ent->mv_st_cont.mvs_trigr.savextref.addr, extnam_str.addr, extnam_str.len); stringpool.free += extnam_str.len; assert(stringpool.free <= stringpool.top); } mv_st_ent->mv_st_cont.mvs_trigr.savextref.len = extnam_str.len; mv_st_ent->mv_st_cont.mvs_trigr.ztname_save = dollar_ztname; mv_st_ent->mv_st_cont.mvs_trigr.ztdata_save = dollar_ztdata; mv_st_ent->mv_st_cont.mvs_trigr.ztoldval_save = dollar_ztoldval; mv_st_ent->mv_st_cont.mvs_trigr.ztriggerop_save = dollar_ztriggerop; mv_st_ent->mv_st_cont.mvs_trigr.ztupdate_save = dollar_ztupdate; mv_st_ent->mv_st_cont.mvs_trigr.ztvalue_save = dollar_ztvalue; mv_st_ent->mv_st_cont.mvs_trigr.ztvalue_changed_ptr = ztvalue_changed_ptr; # ifdef DEBUG /* In a debug process, these fields give clues of what trigger we are working on */ mv_st_ent->mv_st_cont.mvs_trigr.gtm_trigdsc_last_save = trigdsc; mv_st_ent->mv_st_cont.mvs_trigr.gtm_trigprm_last_save = trigprm; # endif assert(((0 == gtm_trigger_depth) && (ch_at_trigger_init == ctxt->ch)) || ((0 < gtm_trigger_depth) && (&mdb_condition_handler == ctxt->ch))); mv_st_ent->mv_st_cont.mvs_trigr.ctxt_save = ctxt; mv_st_ent->mv_st_cont.mvs_trigr.gtm_trigger_depth_save = gtm_trigger_depth; if (0 == gtm_trigger_depth) { /* Only back up $*trap settings when initiating the first trigger level */ mv_st_ent->mv_st_cont.mvs_trigr.dollar_etrap_save = dollar_etrap; mv_st_ent->mv_st_cont.mvs_trigr.dollar_ztrap_save = dollar_ztrap; mv_st_ent->mv_st_cont.mvs_trigr.ztrap_explicit_null_save = ztrap_explicit_null; dollar_ztrap.str.len = 0; ztrap_explicit_null = FALSE; if (NULL != gtm_trigger_etrap.str.addr) /* An etrap was defined for the trigger environment - Else existing $etrap persists */ dollar_etrap = gtm_trigger_etrap; } mv_st_ent->mv_st_cont.mvs_trigr.mumps_status_save = mumps_status; mv_st_ent->mv_st_cont.mvs_trigr.run_time_save = run_time; /* See if a MERGE launched the trigger. If yes, save some state so ZWRITE, ZSHOW and/or MERGE can be * run in the trigger we dispatch. */ if ((0 != merge_args) || TREF(in_zwrite)) PUSH_MVST_MRGZWRSV; mumps_status = 0; run_time = TRUE; /* Previous value saved just above restored when frame pops */ } else { /* Trigger base frame exists so reinitialize the symbol table for new trigger invocation */ REINIT_SYMVAL_BLK(curr_symval, curr_symval->last_tab); /* Locate the MVST_TRIGR mv_stent containing the backed up values. Some of those values need * to be restored so the 2nd trigger has the same environment as the previous trigger at this level */ for (mv_st_ent = mv_chain; (NULL != mv_st_ent) && (MVST_TRIGR != mv_st_ent->mv_st_type); mv_st_ent = (mv_stent *)(mv_st_ent->mv_st_next + (char *)mv_st_ent)) ; assert(NULL != mv_st_ent); assert((char *)mv_st_ent < (char *)frame_pointer); /* Ensure mv_stent associated this trigger frame */ /* Reinit backed up values from the trigger environment backup */ dollar_truth = mv_st_ent->mv_st_cont.mvs_trigr.saved_dollar_truth; op_gvrectarg(&mv_st_ent->mv_st_cont.mvs_trigr.savtarg); extnam_str.len = mv_st_ent->mv_st_cont.mvs_trigr.savextref.len; if (extnam_str.len) memcpy(extnam_str.addr, mv_st_ent->mv_st_cont.mvs_trigr.savextref.addr, extnam_str.len); mumps_status = 0; assert(run_time); /* Note we do not reset the handlers for parallel triggers - set one time only when enter first level * trigger. After that, whatever happens in trigger world, stays in trigger world. */ } assert(frame_pointer->type & SFT_TRIGR); # ifdef DEBUG gtm_trigdsc_last = trigdsc; gtm_trigprm_last = trigprm; # endif /* Set new value of trigger ISVs. Previous values already saved in trigger base frame */ dollar_ztname = &trigdsc->rtn_desc.rt_name; dollar_ztdata = (mval *)trigprm->ztdata_new; dollar_ztoldval = trigprm->ztoldval_new; dollar_ztriggerop = (mval *)trigprm->ztriggerop_new; dollar_ztupdate = trigprm->ztupdate_new; dollar_ztvalue = trigprm->ztvalue_new; ztvalue_changed_ptr = &trigprm->ztvalue_changed; /* Set values associated with trigger into symbol table */ lvvalarray = trigprm->lvvalarray; for (i = 0, mne_p = trigdsc->lvnamearray, indx_p = trigdsc->lvindexarray; i < trigdsc->numlvsubs; ++i, ++mne_p, ++indx_p) { /* Once thru for each subscript we are to set */ lvvalue = lvvalarray[*indx_p]; /* Locate mval that contains value */ assert(NULL != lvvalue); assert(MV_DEFINED(lvvalue)); /* No sense in defining the undefined */ lvval = lv_getslot(curr_symval); /* Allocate an lvval to put into symbol table */ LVVAL_INIT(lvval, curr_symval); lvval->v = *lvvalue; /* Copy mval into lvval */ added = add_hashtab_mname_symval(&curr_symval->h_symtab, mne_p, lvval, &tabent); assert(added); assert(NULL != tabent); } /* While the routine header is available in trigdsc, we also need the <null> label address associated with * the first (and only) line of code. */ lbl_offset_p = LNRTAB_ADR(trigdsc->rtn_desc.rt_adr); transfer_addr = (uchar_ptr_t)LINE_NUMBER_ADDR(trigdsc->rtn_desc.rt_adr, lbl_offset_p); /* Create new stack frame for invoked trigger in same fashion as gtm_init_env() creates its 2ndary frame */ # ifdef HAS_LITERAL_SECT new_stack_frame(trigdsc->rtn_desc.rt_adr, (unsigned char *)LINKAGE_ADR(trigdsc->rtn_desc.rt_adr), transfer_addr); # else /* Any platform that does not follow pv-based linkage model either * (1) uses the following calculation to determine the context pointer value, or * (2) doesn't need a context pointer */ new_stack_frame(trigdsc->rtn_desc.rt_adr, PTEXT_ADR(trigdsc->rtn_desc.rt_adr), transfer_addr); # endif dollar_tlevel_start = dollar_tlevel; assert(gv_target->gd_csa == cs_addrs); gv_target->trig_local_tn = local_tn; /* Record trigger being driven for this global */ /* Invoke trigger generated code */ rc = gtm_trigger_invoke(); if (1 == rc) { /* Normal return code (from dm_start). Check if TP has been unwound or not */ assert(dollar_tlevel <= dollar_tlevel_start); /* Bigger would be quite the surprise */ if (dollar_tlevel < dollar_tlevel_start) { /* Our TP level was unwound during the trigger so throw an error */ DBGTRIGR((stderr, "gtm_trigger: $TLEVEL less than at start - throwing TRIGTLVLCHNG\n")); gtm_trigger_fini(TRUE, FALSE); /* dump this trigger level */ rts_error(VARLSTCNT(4) ERR_TRIGTLVLCHNG, 2, trigdsc->rtn_desc.rt_name.len, trigdsc->rtn_desc.rt_name.addr); } rc = 0; /* Be polite and return 0 for the (hopefully common) success case */ } else if (ERR_TPRETRY == rc) { /* We are restarting the entire transaction. There are two possibilities here: * 1) This is a nested trigger level in which case we need to unwind further or * the outer trigger level was created by M code. If either is true, just * rethrow the TPRETRY error. * 2) This is the outer trigger and the call to op_tstart() was done by our caller. * In this case, we just return to our caller with a code signifying they need * to restart the implied transaction. */ assert(dollar_tlevel && (tstart_trigger_depth <= gtm_trigger_depth)); if ((tstart_trigger_depth < gtm_trigger_depth) || !tp_pointer->implicit_tstart || !tp_pointer->implicit_trigger) { /* Unwind a trigger level to restart level or to next trigger boundary */ gtm_trigger_fini(FALSE, FALSE); /* Get rid of this trigger level - we won't be returning */ DBGTRIGR((stderr, "gtm_trigger: dm_start returned rethrow code - rethrowing ERR_TPRETRY\n")); INVOKE_RESTART; } else { /* It is possible we are restarting a transaction that never got around to creating a base * frame yet the implicit TStart was done. So if there is no trigger base frame, do not * run gtm_trigger_fini() but instead do the one piece of cleanup it does that we still need. */ assert(donot_INVOKE_MUMTSTART); if (SFT_TRIGR & frame_pointer->type) { /* Normal case when TP restart unwinding back to implicit beginning */ gtm_trigger_fini(FALSE, FALSE); DBGTRIGR((stderr, "gtm_trigger: dm_start returned rethrow code - returning to gvcst_<caller>\n")); } else { /* Unusual case of trigger that died in no-mans-land before trigger base frame established. * Remove the "do not return to me" flag only on non-error unwinds */ assert(tp_pointer->implicit_tstart); assert(SFF_TRIGR_CALLD & frame_pointer->flags); frame_pointer->flags &= SFF_TRIGR_CALLD_OFF; DBGTRIGR((stderr, "gtm_trigger: turning off SFF_TRIGR_CALLD (1) in frame 0x"lvaddr"\n", frame_pointer)); DBGTRIGR((stderr, "gtm_trigger: unwinding no-base-frame trigger for TP restart\n")); } } /* Fall out and return ERR_TPRETRY to caller */ } else if (0 == rc) /* We should never get a return code of 0. This would be out-of-design and a signal that something * is quite broken. We cannot "rethrow" outside the trigger because it was not initially an error so * mdb_condition_handler would have no record of it (rethrown errors must have originally occurred in * or to be RE-thrown) and assert fail at best. */ GTMASSERT; else { /* We have an unexpected return code due to some error during execution of the trigger that tripped * gtm_trigger's safety handler (i.e. an error occurred in mdb_condition_handler() established by * dm_start(). Since we are going to unwind the trigger frame and rethrow the error, we also have * to unwind all the stack frames on top of the trigger frame. Figure out how many frames that is, * unwind them all plus the trigger base frame before rethrowing the error. */ for (unwinds = 0, fp = frame_pointer; (NULL != fp) && !(SFT_TRIGR & fp->type); fp = fp->old_frame_pointer) unwinds++; assert((NULL != fp) && (SFT_TRIGR & fp->type)); GOFRAMES(unwinds, TRUE, FALSE); assert((NULL != frame_pointer) && !(SFT_TRIGR & frame_pointer->type)); DBGTRIGR((stderr, "gtm_trigger: Unsupported return code (%d) - unwound %d frames and now rethrowing error\n", rc, unwinds)); rts_error(VARLSTCNT(1) ERR_REPEATERROR); } return rc; }