void iosocket_destroy (io_desc *ciod) { io_log_name **lpp, *lp; /* logical name pointers */ d_socket_struct *dsocketptr; assertpro(ciod->type == gtmsocket); assertpro(ciod->state == dev_closed); dsocketptr = (d_socket_struct *) ciod->dev_sp; assertpro(dsocketptr != NULL); for (lpp = &io_root_log_name, lp = *lpp; lp; lp = *lpp) { if (lp->iod->pair.in == ciod) { /* The only device that may be "split" is the principal device. Since it is permanently open, * it will never get here. */ assert(lp->iod == ciod); assert(lp->iod->pair.out == ciod); *lpp = (*lpp)->next; free(lp); } else lpp = &lp->next; } free(dsocketptr); free(ciod); }
/* Halt the process similar to op_halt but allow a return code to be specified. If no return code * is specified, return code 0 is used as a default (making it identical to op_halt). */ int m_zhalt(void) { triple *triptr; oprtype ot; int status; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; /* Let m_halt() handle the case of the missing return code */ if ((TK_SPACE == TREF(window_token)) || (TK_EOL == TREF(window_token))) return m_halt(); switch (status = expr(&ot, MUMPS_NUM)) /* NOTE assignment */ { case EXPR_FAIL: return FALSE; case EXPR_GOOD: triptr = newtriple(OC_ZHALT); triptr->operand[0] = ot; return TRUE; case EXPR_INDR: make_commarg(&ot, indir_zhalt); return TRUE; default: assertpro(FALSE); } return FALSE; /* This should never get executed, added to make compiler happy */ }
int fd_ioready(int sock_fd, int poll_direction, int timeout) { int save_errno, status, EAGAIN_cnt = 0; # ifdef USE_POLL struct pollfd fds; # else fd_set fds, *readfds, *writefds; struct timeval timeout_spec; # endif assert(timeout < MILLISECS_IN_SEC); SELECT_ONLY(timeout = timeout * 1000); /* Convert to microseconds (~ 1sec) */ assert((timeout >= 0) && (timeout < POLL_ONLY(MILLISECS_IN_SEC) SELECT_ONLY(MICROSEC_IN_SEC))); # ifdef USE_POLL fds.fd = sock_fd; fds.events = (REPL_POLLIN == poll_direction) ? POLLIN : POLLOUT; # else readfds = writefds = NULL; timeout_spec.tv_sec = 0; timeout_spec.tv_usec = timeout; assertpro(FD_SETSIZE > sock_fd); FD_ZERO(&fds); FD_SET(sock_fd, &fds); writefds = (REPL_POLLOUT == poll_direction) ? &fds : NULL; readfds = (REPL_POLLIN == poll_direction) ? &fds : NULL; # endif POLL_ONLY(while (-1 == (status = poll(&fds, 1, timeout)))) SELECT_ONLY(while (-1 == (status = select(sock_fd + 1, readfds, writefds, NULL, &timeout_spec)))) { save_errno = ERRNO; if (EINTR == save_errno) { /* Give it another shot. But, halve the timeout so we don't keep doing this forever. */ timeout = timeout >> 1; } else if (EAGAIN == save_errno)
static char *encode_os() { unsigned char *p; int count, osidx; count = 0; p = (unsigned char *)gtm_release_name; /* third arg in release name string */ while (*p && count < 2) { if (*p == ' ') count++; p++; } if (count == 2) { for (osidx = 0; osidx < SIZEOF(gtcm_proto_os_info)/SIZEOF(gtcm_proto_os_info_t) - 1; osidx++) { if (0 == memcmp(p, gtcm_proto_os_info[osidx].os_in_rel_str, gtcm_proto_os_info[osidx].size_of_os_in_rel_str)) return gtcm_proto_os_info[osidx].proto_os; } } assertpro(FALSE && "os match"); return NULL; /* Added to make compiler happy and not throw warning */ }
static char *encode_cpu() { unsigned char *p; int count, cpuidx; count = 0; p = (unsigned char *)gtm_release_name; /* fourth arg in release name string */ while (*p && count < 3) { if (*p == ' ') count++; p++; } if (count == 3) { for (cpuidx = 0; cpuidx < SIZEOF(gtcm_proto_cpu_info)/SIZEOF(gtcm_proto_cpu_info_t) - 1; cpuidx++) { if (0 == memcmp(p, gtcm_proto_cpu_info[cpuidx].cpu_in_rel_str, gtcm_proto_cpu_info[cpuidx].size_of_cpu_in_rel_str)) return gtcm_proto_cpu_info[cpuidx].proto_cpu; } } assertpro(FALSE && "cpu match"); return NULL; /* Added to make compiler happy and not throw warning */ }
void mubexpfilnam(char *dirname, unsigned int dirlen, backup_reg_list *list) { char *c1; mstr file; char tmp_mstr_addr[MAX_FN_LEN]; file.len = MAX_FN_LEN; file.addr = tmp_mstr_addr; if (list != mu_repl_inst_reg_list) { /* Database region */ if (!mupfndfil(list->reg, &file, LOG_ERROR_TRUE)) { util_out_print("Backup not finished because of the above error.", TRUE); error_mupip = TRUE; return; } } else { /* Replication instance region */ if (!repl_inst_get_name(file.addr, (unsigned int *)&file.len, MAX_FN_LEN, issue_rts_error)) assertpro(FALSE); /* rts_error should have been issued by repl_inst_get_name */ } for (c1 = file.addr + file.len; (*c1 != '/') && (c1 != file.addr); c1--) ; list->backup_file.len = INTCAST(dirlen + (file.len - (c1 - file.addr))); list->backup_file.addr = (char *)malloc(list->backup_file.len + 1); memcpy(list->backup_file.addr, dirname, dirlen); memcpy(list->backup_file.addr + dirlen, c1, (file.len - (c1 - file.addr))); list->backup_file.addr[list->backup_file.len] = '\0'; return; }
unsigned char i386_reg(unsigned char vax_reg) { unsigned char reg; switch (vax_reg & 0xf) /* mask out VAX register mode field */ { case 0: reg = I386_REG_EAX; break; case 1: reg = I386_REG_EDX; break; case 8: reg = I386_REG_ESI; break; case 9: reg = I386_REG_EDI; break; case 11: reg = I386_REG_EBX; break; default: assertpro(FALSE && (vax_reg & 0xf)); break; } return reg; }
/* This routine finds the position of a region in the global directory which we use to index arrays of region information */ int find_reg_hash_idx(gd_region *reg) { gd_region *regl; int index; for (index = gd_header->n_regions-1, regl = gd_header->regions + index; reg != regl; regl--, index--) assertpro(0 <= index); return index; }
/* ------------------------------------------------------------------ * Perform action corresponding to the first async event that * was logged. * ------------------------------------------------------------------ */ void async_action(bool lnfetch_or_start) { /* Double-check that we should be here: */ assert(0 < num_deferred); switch(first_event) { case (outofband_event): /* This function can be invoked only by a op_*intrrpt* transfer table function. Those transfer table * functions should be active only for a short duration between the occurrence of an outofband event * and the handling of it at a logical boundary (next M-line). We dont expect to be running with * those transfer table functions for more than one M-line. If "outofband" is set to 0, the call to * "outofband_action" below will do nothing and we will end up running with the op_*intrrpt* transfer * table functions indefinitely. In this case M-FOR loops are known to return incorrect results which * might lead to application integrity issues. It is therefore considered safer to GTMASSERT as we * will at least have the core for analysis. */ assertpro(0 != outofband); outofband_action(lnfetch_or_start); break; case (tt_write_error_event): # ifdef UNIX xfer_reset_if_setter(tt_write_error_event); iott_wrterr(); # endif /* VMS tt error processing is done in op_*intrrpt */ break; case (network_error_event): /* ------------------------------------------------------- * Network error not implemented here yet. Need to move * from mdb_condition_handler after review. * ------------------------------------------------------- */ case (zstp_or_zbrk_event): /* ------------------------------------------------------- * ZStep/Zbreak events not implemented here yet. Need to * move here after review. * ------------------------------------------------------- */ default: assertpro(FALSE); /* see above assertpro() for comment as to why this is needed */ } }
int trigger_fill_xecute_buffer(gv_trigger_t *trigdsc) { int src_fetch_status; assert(!dollar_tlevel || (tstart_trigger_depth <= gtm_trigger_depth)); /* We have 3 cases to consider - all of which REQUIRE a TP fence to already be in effect. The reason for this is, if we * detect a restartable condition, we are going to cause this region's triggers to be unloaded which destroys the block * our parameter is pointing to so the restart logic MUST take place outside of this routine. * * 1. We have an active transaction due to an IMPLICIT TSTART done by trigger handling but the trigger level has not yet * been created. We don't need another TP wrapper in this case but we do need a condition handler to trap the thrown * retry to again prevent C stack unwind and return to the caller in the same shape that gtm_trigger would return. * 2. We have an active transaction due to an EXPLICIT M-code TSTART command. For this case, the trigger loads proceed * as normal with restarts handled in the regular automatic fashion. Note this case also covers the tp restarts done * by both the update process and mupip recover forward since those functions have their own way of intercepting and * dealing with restarts. To cover those cases, tp->implicit_tstart can be TRUE but tp_implicit_trigger MUST be * FALSE. * 3. We have an active transaction due to an IMPLICIT TSTART done by trigger handling and one or more triggers are * running. This becomes like case 2 since the restart will be handled by gtm_trigger and the proper thing will * be done. * * An extra note about case 3. Case 3 can be the identified case if in a nested trigger we are in trigger-no-mans-land * with a base frame for the nested trigger (having driven one of a set of parallel nested triggers) but no actual trigger * execution frame yet exists. This is really a case 1 situation with a nested trigger but it turns out that dealing with * like case 3 does the right thing because if/when mdb_condition_handler catches a thrown TPRETRY error, mdb_condition * handler will peal the nested trigger frame off before doing the restart which works for us and avoids issues of * multi-level implicit restarts we would otherwise have to handle. * * Note, this routine is for loading trigger source when the trigger is to be driven. The trigger_source_read_andor_verify() * routine should be used when fetching trigger source for reasons other than driving the triggers. This routine is lighter * weight but has a dependence on the restartability of the trigger-drive logic for getting the triggers reloaded as * necessary. */ assertpro(0 < dollar_tlevel); if (!tp_pointer->implicit_trigger /* Case 2 */ || (tp_pointer->implicit_tstart && tp_pointer->implicit_trigger && (tstart_trigger_depth != gtm_trigger_depth))) /* Case 3 */ { /* Test for Case 3/4 where we get to do very little: */ DBGTRIGR((stderr, "trigger_fill_xecute_buffer: Case 2/3\n")); assert((!tp_pointer->implicit_trigger) || (0 < gtm_trigger_depth)); trigger_fill_xecute_buffer_read_trigger_source(trigdsc); } else { /* Test for Case 1 where we only need a condition handler */ DBGTRIGR((stderr, "trigger_fill_xecute_buffer: Case 1\n")); assert(tp_pointer->implicit_tstart && tp_pointer->implicit_trigger); assert(tstart_trigger_depth == gtm_trigger_depth); ESTABLISH_RET(trigger_fill_xecute_buffer_ch, SIGNAL); trigger_fill_xecute_buffer_read_trigger_source(trigdsc); REVERT; } /* return our bounty to caller */ trigdsc->xecute_str.mvtype = MV_STR; return 0; /* Could return ERR_TPRETRY if return is via our condition handler */ }
boolean_t op_gvqueryget(mval *key, mval *val) { boolean_t gotit; gv_key *save_key; gvnh_reg_t *gvnh_reg; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; if (TREF(gv_last_subsc_null) && NEVER == gv_cur_region->null_subs) sgnl_gvnulsubsc(); switch (REG_ACC_METH(gv_cur_region)) { case dba_bg: case dba_mm: gvnh_reg = TREF(gd_targ_gvnh_reg); if (NULL == gvnh_reg) gotit = ((0 != gv_target->root) ? gvcst_queryget(val) : FALSE); /* global does not exist if root is 0 */ else INVOKE_GVCST_SPR_XXX(gvnh_reg, gotit = gvcst_spr_queryget(val)); break; case dba_cm: gotit = gvcmx_query(val); break; case dba_usr: save_key = gv_currkey; gv_currkey = gv_altkey; /* We rely on the fact that *gv_altkey area is not modified by gvusr_queryget, and don't change gv_altkey. * If and when *gv_altkey area is modified by gvusr_queryget, we have to set up a spare key area * (apart from gv_altkey and gv_currkey), and make gv_altkey point the spare area before calling gvusr_queryget */ memcpy(gv_currkey, save_key, SIZEOF(*save_key) + save_key->end); gotit = gvusr_queryget(val); gv_altkey = gv_currkey; gv_currkey = save_key; break; default: assertpro(FALSE && REG_ACC_METH(gv_cur_region)); } if (gotit) { key->mvtype = MV_STR; key->str.addr = (char *)gv_altkey->base; key->str.len = gv_altkey->end + 1; s2pool(&key->str); } else { *key = literal_null; *val = literal_null; } return gotit; }
void print_destination_operand() { switch(instruction.destination_operand_class) { case undefined_class : assertpro(FALSE); break; case register_class : assert(instruction.destination_operand_reg != NULL); *obpt++ = '%'; *obpt++ = instruction.reg_prefix; SET_OBPT_STR(instruction.destination_operand_reg, STRLEN(instruction.destination_operand_reg)); break; case memory_class : assert(instruction.source_operand_class != memory_class); if (instruction.destination_operand_reg != NULL) { SET_OBPT_INT4(instruction.offset); *obpt++ = '('; *obpt++ = '%'; *obpt++ = instruction.reg_prefix; SET_OBPT_STR(instruction.destination_operand_reg, STRLEN(instruction.destination_operand_reg)); *obpt++ = ')'; } else { SET_OBPT_INT4(instruction.offset); } break; case immediate_class : *obpt++ = '0'; *obpt++ = 'x'; SET_OBPT_INT8(instruction.immediate); break; default : assertpro(FALSE); } }
void print_instruction() { list_chkpage(); obpt = &outbuf[0]; memset(obpt, SP, ASM_OUT_BUFF); obpt += 10; i2hex((curr_addr - PTEXT_OFFSET), obpt, 8); curr_addr += (instidx - prev_idx); obpt += 10; for( ; prev_idx < instidx; prev_idx++) { i2hex(code_buf[prev_idx], obpt, 2); obpt += 2; } obpt += 10; *obpt++ = '\n'; *obpt++ = '\t'; *obpt++ = '\t'; *obpt++ = '\t'; *obpt++ = '\t'; *obpt++ = '\t'; *obpt++ = '\t'; assert( instruction.opcode_mnemonic != NULL ); SET_OBPT_STR(instruction.opcode_mnemonic, STRLEN(instruction.opcode_mnemonic)); *obpt++ = instruction.opcode_suffix; *obpt++ = '\t'; instruction.num_operands = (instruction.num_operands > grp_prefix) ? (instruction.num_operands - grp_prefix) : instruction.num_operands; switch (instruction.num_operands) { case 0 : break; case 1 : /* single operand assumed to be in the source operand only.. */ assert(instruction.destination_operand_class == undefined_class); print_source_operand(); break; case 2 : print_source_operand(); *obpt++ = ','; print_destination_operand(); break; default : assertpro(FALSE); } /* Now reset the instruction structure */ emit_eoi(); reset_instruction(); }
int gtmrecv_helpers_init(int n_readers, int n_writers) { /* Receiver server interface to start n_readers and n_writers helper processes */ upd_helper_ctl_ptr_t upd_helper_ctl; upd_helper_entry_ptr_t helper, helper_top; int reader_count, writer_count, error_count, avail_slots, status; assert(0 != n_readers || 0 != n_writers); upd_helper_ctl = recvpool.upd_helper_ctl; for (avail_slots = 0, helper = upd_helper_ctl->helper_list, helper_top = helper + MAX_UPD_HELPERS; helper < helper_top; helper++) { if (0 == helper->helper_pid) avail_slots++; } if (n_readers + n_writers > avail_slots) { /* adjust reader/writer count for available slots according to the percentage specified by user */ n_writers = (int)(((float)n_writers/(n_readers + n_writers)) * (float)avail_slots); /* may round down */ n_readers = avail_slots - n_writers; /* preference to readers, writer count may round down */ } /* Start helpers, readers first */ for (helper = upd_helper_ctl->helper_list, helper_top = helper + MAX_UPD_HELPERS, reader_count = 0, writer_count = 0, error_count = 0; (reader_count + writer_count + error_count) < (n_readers + n_writers); ) { for (; 0 != helper->helper_pid && helper < helper_top; helper++) /* find next vacant slot */ ; assertpro(helper != helper_top); status = helper_init(helper, ((reader_count + error_count) < n_readers) ? UPD_HELPER_READER : UPD_HELPER_WRITER); if (UPDPROC_STARTED == status) { if ((reader_count + error_count) < n_readers) reader_count++; else writer_count++; } else /* UPDPROC_START_ERR == status */ { if ((EREPL_UPDSTART_BADPATH == repl_errno) /* receiver server lost gtm_dist environment, bad situation */ || (EREPL_UPDSTART_EXEC == repl_errno)) /* in forked child, could not exec, should exit */ gtmrecv_exit(ABNORMAL_SHUTDOWN); error_count++; } } upd_helper_ctl->start_n_readers = reader_count; upd_helper_ctl->start_n_writers = writer_count; SHM_WRITE_MEMORY_BARRIER; upd_helper_ctl->start_helpers = FALSE; return ((0 == error_count) ? NORMAL_SHUTDOWN : ABNORMAL_SHUTDOWN); }
void set_memory_reg() { instruction.reg_prefix = 'r'; if (instruction.source_operand_class == memory_class) instruction.source_operand_reg = (char *)register_list[modrm_byte.modrm.r_m + 8 * rex_prefix.Base]; else if (instruction.destination_operand_class == memory_class) instruction.destination_operand_reg = (char *)register_list[modrm_byte.modrm.r_m + 8 * rex_prefix.Base]; /* Printing of RIP has to be handled differently */ if (instruction.reg_rip) if (instruction.source_operand_class == memory_class) instruction.source_operand_reg = (char *)register_list[REG_RIP]; else if (instruction.destination_operand_class == memory_class) instruction.destination_operand_reg = (char *)register_list[REG_RIP]; else assertpro(FALSE); }
/* The routine that does the actual work of determining the length and responding appropriately in the event an invalid * UTF8 character is detected. */ STATICFNDEF int utf8_len_real(utf8_err_type err_type, mstr* str) { int charlen, bytelen; char *ptrtop, *ptr; boolean_t err_raised; assert(gtm_utf8_mode); ptr = str->addr; ptrtop = ptr + str->len; charlen = 0; err_raised = FALSE; if (!badchar_inhibit) { for (; ptr < ptrtop; charlen++, ptr += bytelen) { if (!UTF8_VALID(ptr, ptrtop, bytelen)) { switch(err_type) { case err_rts: UTF8_BADCHAR(0, ptr, ptrtop, 0, NULL); break; /* Never get here but keeps compiler happy */ case err_stx: UTF8_BADCHAR_STX(0, ptr, ptrtop, 0, NULL); return -1; case err_dec: if (!err_raised) { UTF8_BADCHAR_DEC(0, ptr, ptrtop, 0, NULL); err_raised = TRUE; } bytelen = 1; /* Assume only one char is broken */ break; default: assertpro(FALSE /* Invalid error type */); } } } } else { for (; ptr < ptrtop; charlen++) ptr = (char *)UTF8_MBNEXT(ptr, ptrtop); } assert(ptr == ptrtop); str->char_len = charlen; return charlen; }
void op_gvincr(mval *increment, mval *result) { unsigned char buff[MAX_ZWR_KEY_SZ], *end; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; /* If specified var name is global ^%Y*, the name is illegal to use in a SET or KILL command, only GETs are allowed */ if ((RESERVED_NAMESPACE_LEN <= gv_currkey->end) && (0 == MEMCMP_LIT(gv_currkey->base, RESERVED_NAMESPACE))) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_PCTYRESERVED); if (gv_cur_region->read_only) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_DBPRIVERR, 2, DB_LEN_STR(gv_cur_region)); if ((TREF(gv_last_subsc_null) || TREF(gv_some_subsc_null)) && (ALWAYS != gv_cur_region->null_subs)) sgnl_gvnulsubsc(); assert(gv_currkey->end + 1 <= gv_cur_region->max_key_size); MV_FORCE_NUM(increment); switch (gv_cur_region->dyn.addr->acc_meth) { case dba_bg: case dba_mm: gvcst_incr(increment, result); break; case dba_cm: gvcmx_increment(increment, result); break; case dba_usr: /* $INCR not supported for DDP/USR access method */ if (0 == (end = format_targ_key(buff, MAX_ZWR_KEY_SZ, gv_currkey, TRUE))) end = &buff[MAX_ZWR_KEY_SZ - 1]; rts_error_csa(CSA_ARG(NULL) VARLSTCNT(10) ERR_UNIMPLOP, 0, ERR_TEXT, 2, LEN_AND_LIT("GTCM DDP server does not support $INCREMENT"), ERR_GVIS, 2, end - buff, buff, ERR_TEXT, 2, REG_LEN_STR(gv_cur_region)); break; default: assertpro(FALSE); } assert(MV_DEFINED(result)); }
void job_addr(mstr *rtn, mstr *label, int4 offset, char **hdr, char **labaddr) { rhdtyp *rt_hdr; int4 *lp; mval rt; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; if (NULL == (rt_hdr = find_rtn_hdr(rtn))) { rt.mvtype = MV_STR; rt.str = *rtn; op_zlink(&rt, NULL); assertpro(NULL != (rt_hdr = find_rtn_hdr(rtn))); } lp = NULL; if ((rt_hdr->compiler_qlf & CQ_LINE_ENTRY) || (0 == offset)) /* Label offset with routine compiled with NOLINE_ENTRY should cause error. */ lp = find_line_addr(rt_hdr, label, offset, NULL); if (!lp) rts_error(VARLSTCNT(1) ERR_JOBLABOFF); /* Set the pointer to address / offset for line number entry storage in lab_proxy. */ USHBIN_ONLY((TREF(lab_proxy)).lnr_adr = lp;)
int x86_64_arg_reg(int indx) { switch(indx) { case 0: return I386_REG_RDI; case 1: return I386_REG_RSI; case 2: return I386_REG_RDX; case 3: return I386_REG_RCX; case 4: return I386_REG_R8; case 5: return I386_REG_R9; default: assertpro(FALSE); break ; } /* Control will never reach here */ return -1 ; }
void op_zut(mval *s) { struct timeval tv; gtm_int8 microseconds, msectmp; int numdigs; int4 pwr; assertpro(-1 != gettimeofday(&tv, NULL)); microseconds = (1LL * MICROSEC_IN_SEC * tv.tv_sec) + tv.tv_usec; if ((microseconds < 0) && (microseconds > E_18)) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_WEIRDSYSTIME); if (microseconds < E_6) { s->m[1] = ((int4)microseconds * 1000); s->mvtype = MV_INT | MV_NM; } else { msectmp = microseconds; /* Count the number of digits */ for (numdigs = 0; msectmp; numdigs++, msectmp /= 10); if (numdigs <= NUM_DEC_DG_1L) { s->m[0] = 0; s->m[1] = (int4)microseconds * ten_pwr[NUM_DEC_DG_1L - numdigs]; } else { pwr = ten_pwr[numdigs - NUM_DEC_DG_1L]; s->m[0] = (microseconds % pwr) * ten_pwr[NUM_DEC_DG_2L - numdigs]; s->m[1] = microseconds / pwr; } s->mvtype = MV_NM; s->e = MV_XBIAS + numdigs; } s->sgn = 0; return; }
void tp_unwind(uint4 newlevel, enum tp_unwind_invocation invocation_type, int *tprestart_rc) { mlk_pvtblk **prior, *mlkp; mlk_tp *oldlock, *nextlock; int tl; lv_val *save_lv, *curr_lv, *lv; tp_var *restore_ent; mv_stent *mvc; boolean_t restore_lv, rollback_locks; lvscan_blk *lvscan, *lvscan_next, first_lvscan; int elemindx, rc; lvTree *lvt_child; /* We are about to clean up structures. Defer MUPIP STOP/signal handling until function end. */ DEFER_INTERRUPTS(INTRPT_IN_TP_UNWIND); /* Unwind the requested TP levels */ # if defined(DEBUG_REFCNT) || defined(DEBUG_ERRHND) DBGFPF((stderr, "\ntp_unwind: Beginning TP unwind process\n")); # endif restore_lv = (RESTART_INVOCATION == invocation_type); lvscan = &first_lvscan; lvscan->next = NULL; lvscan->elemcnt = 0; assert((tp_sp <= tpstackbase) && (tp_sp > tpstacktop)); assert((tp_pointer <= (tp_frame *)tpstackbase) && (tp_pointer > (tp_frame *)tpstacktop)); for (tl = dollar_tlevel; tl > newlevel; --tl) { DBGRFCT((stderr, "\ntp_unwind: Unwinding level %d -- tp_pointer: 0x"lvaddr"\n", tl, tp_pointer)); assertpro(NULL != tp_pointer); for (restore_ent = tp_pointer->vars; NULL != restore_ent; restore_ent = tp_pointer->vars) { /*********************************************************************************/ /* TP_VAR_CLONE sets the var_cloned flag, showing that the tree has been cloned */ /* If var_cloned is not set, it shows that curr_lv and save_lv are still sharing */ /* the tree, so it should not be killed. */ /*********************************************************************************/ curr_lv = restore_ent->current_value; save_lv = restore_ent->save_value; assert(curr_lv); assert(save_lv); assert(LV_IS_BASE_VAR(curr_lv)); assert(LV_IS_BASE_VAR(save_lv)); assert(0 < curr_lv->stats.trefcnt); assert(curr_lv->tp_var); assert(curr_lv->tp_var == restore_ent); /* In order to restart sub-transactions, this would have to maintain * the chain that currently is not built by op_tstart() */ if (restore_lv) { rc = tp_unwind_restlv(curr_lv, save_lv, restore_ent, NULL, tprestart_rc); # ifdef GTM_TRIGGER if (0 != rc) { dollar_tlevel = tl; /* Record fact if we unwound some tp_frames */ ENABLE_INTERRUPTS(INTRPT_IN_TP_UNWIND); /* drive any MUPIP STOP/signals deferred * while in this function */ TPUNWND_WBOX_TEST; /* Debug-only wbox-test to simulate SIGTERM */ INVOKE_RESTART; } # endif } else if (restore_ent->var_cloned) { /* curr_lv has been cloned. * Note: LV_CHILD(save_lv) can be non-NULL only if restore_ent->var_cloned is TRUE */ DBGRFCT((stderr, "\ntp_unwind: Not restoring curr_lv and is cloned\n")); lvt_child = LV_GET_CHILD(save_lv); if (NULL != lvt_child) { /* If subtree exists, we have to blow away the cloned tree */ DBGRFCT((stderr, "\ntp_unwind: save_lv has children\n")); assert(save_lv->tp_var); DBGRFCT((stderr,"\ntp_unwind: For lv_val 0x"lvaddr": Deleting saved lv_val 0x"lvaddr"\n", curr_lv, save_lv)); assert(LVT_PARENT(lvt_child) == (lvTreeNode *)save_lv); lv_kill(save_lv, DOTPSAVE_FALSE, DO_SUBTREE_TRUE); } restore_ent->var_cloned = FALSE; } else { /* If not cloned, we still have to reduce the reference counts of any * container vars in the untouched tree that were added to keep anything * they referenced from disappearing. */ DBGRFCT((stderr, "\ntp_unwind: Not restoring curr_lv and is NOT cloned\n")); lvt_child = LV_GET_CHILD(curr_lv); if (NULL != lvt_child) { DBGRFCT((stderr, "\ntp_unwind: curr_lv has children and so reducing ref counts\n")); TPUNWND_CNTNRS_IN_TREE(curr_lv); } } LV_FREESLOT(save_lv); /* Not easy to predict what the trefcnt will be except that it should be greater than zero. In * most cases, it will have its own hash table ref plus the extras we added but it is also * possible that the entry has been kill *'d in which case the ONLY ref that will be left is * our own increment but there is no [quick] way to distinguish this case so we just * test for > 0. */ assert(0 < curr_lv->stats.trefcnt); assert(0 < curr_lv->stats.crefcnt); DECR_CREFCNT(curr_lv); /* Remove the copy refcnt we added in in op_tstart() or lv_newname() */ DECR_BASE_REF_NOSYM(curr_lv, FALSE); curr_lv->tp_var = NULL; tp_pointer->vars = restore_ent->next; free(restore_ent); } if ((tp_pointer->fp == frame_pointer) && (MVST_TPHOLD == mv_chain->mv_st_type) && (msp == (unsigned char *)mv_chain)) POP_MV_STENT(); if (NULL == tp_pointer->old_tp_frame) tp_sp = tpstackbase; else tp_sp = (unsigned char *)tp_pointer->old_tp_frame; if (tp_sp > tpstackbase) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_STACKUNDERFLO); if (tp_pointer->tp_save_all_flg) --tp_pointer->sym->tp_save_all; if ((NULL != (tp_pointer = tp_pointer->old_tp_frame)) /* Note assignment */ && ((tp_pointer < (tp_frame *)tp_sp) || (tp_pointer > (tp_frame *)tpstackbase) || (tp_pointer < (tp_frame *)tpstacktop))) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_STACKUNDERFLO); } if ((0 != newlevel) && restore_lv) { /* Restore current context (without releasing) */ assertpro(NULL != tp_pointer); DBGRFCT((stderr, "\n\n** tp_unwind: Newlevel (%d) != 0 loop processing\n", newlevel)); for (restore_ent = tp_pointer->vars; NULL != restore_ent; restore_ent = restore_ent->next) { curr_lv = restore_ent->current_value; save_lv = restore_ent->save_value; assert(curr_lv); assert(save_lv); assert(LV_IS_BASE_VAR(curr_lv)); assert(LV_IS_BASE_VAR(save_lv)); assert(curr_lv->tp_var); assert(curr_lv->tp_var == restore_ent); assert(0 < curr_lv->stats.trefcnt); rc = tp_unwind_restlv(curr_lv, save_lv, restore_ent, &lvscan, tprestart_rc); # ifdef GTM_TRIGGER if (0 != rc) { dollar_tlevel = tl; /* Record fact if we unwound some levels */ ENABLE_INTERRUPTS(INTRPT_IN_TP_UNWIND); /* drive any MUPIP STOP/signals deferred while * in this function */ TPUNWND_WBOX_TEST; /* Debug-only wbox-test to simulate SIGTERM */ INVOKE_RESTART; } # endif assert(0 < curr_lv->stats.trefcnt); /* Should have its own hash table ref plus the extras we added */ assert(0 < curr_lv->stats.crefcnt); } /* If we have any lv_vals queued up to be scanned for container vars, do that now */ DBGRFCT((stderr, "\ntp_unwind: Starting deferred rescan of lv trees needing refcnt processing\n")); while (0 < lvscan->elemcnt) { assert(ARY_SCNCNTNR_DIM >= lvscan->elemcnt); for (elemindx = 0; lvscan->elemcnt > elemindx; ++elemindx) { lv = lvscan->ary_scncntnr[elemindx]; DBGRFCT((stderr, "\n**tp_unwind_process_lvscan_array: Deferred processing lv 0x"lvaddr"\n", lv)); assert(LV_IS_BASE_VAR(lv)); /* This is the final level being restored so redo the counters on these vars */ TPREST_CNTNRS_IN_TREE(lv); } /* If we allocated any secondary blocks, we are done with them now so release them. Only the * very last block on the chain is the original block that was automatically allocated which * should not be freed in this fashion. */ lvscan_next = lvscan->next; if (NULL != lvscan_next) { /* There is another block on the chain so this one can be freed */ free(lvscan); DBGRFCT((stderr, "\ntp_unwind_process_lvscan_array: Freeing lvscan array\n")); lvscan = lvscan_next; } else { /* Since this is the original block allocated on the C stack which we may reuse, * zero the element count. */ lvscan->elemcnt = 0; DBGRFCT((stderr, "\ntp_unwind_process_lvscan_array: Setting elemcnt to 0 in original " "lvscan block\n")); assert(lvscan == &first_lvscan); } } } assert(0 == lvscan->elemcnt); /* verify no elements queued that were not scanned */ rollback_locks = (COMMIT_INVOCATION != invocation_type); for (prior = &mlk_pvt_root, mlkp = *prior; NULL != mlkp; mlkp = *prior) { if (mlkp->granted) { /* This was a pre-existing lock */ for (oldlock = mlkp->tp; (NULL != oldlock) && ((int)oldlock->tplevel > newlevel); oldlock = nextlock) { /* Remove references to the lock from levels being unwound */ nextlock = oldlock->next; free(oldlock); } if (rollback_locks) { if (NULL == oldlock) { /* Lock did not exist at the tp level being unwound to */ mlk_unlock(mlkp); mlk_pvtblk_delete(prior); continue; } else { /* Lock still exists but restore lock state as it was when the transaction started. */ mlkp->level = oldlock->level; mlkp->zalloc = oldlock->zalloc; } } if ((NULL != oldlock) && (oldlock->tplevel == newlevel)) { /* Remove lock reference from level being unwound to, * now that any {level,zalloc} state information has been restored. */ assert((NULL == oldlock->next) || (oldlock->next->tplevel < newlevel)); mlkp->tp = oldlock->next; /* update root reference pointer */ free(oldlock); } else mlkp->tp = oldlock; /* update root reference pointer */ prior = &mlkp->next; } else mlk_pvtblk_delete(prior); } DBGRFCT((stderr, "tp_unwind: Processing complete\n")); dollar_tlevel = newlevel; ENABLE_INTERRUPTS(INTRPT_IN_TP_UNWIND); /* check if any MUPIP STOP/signals were deferred while in this function */ }
/* Restore given local variable from supplied TP restore entry into given symval. Note lvscan_anchor will only be non-NULL * for the final level we are restoring (but not unwinding). We don't need to restore counters for any vars except the * very last level. * * The return code is only used when unrolling the M stack runs into a trigger base frame which must be unrolled * by gtm_trigger. A non-zero return code signals to tp_unwind() that it needs to rethrow the tprestart error. */ int tp_unwind_restlv(lv_val *curr_lv, lv_val *save_lv, tp_var *restore_ent, lvscan_blk **lvscan_anchor, int *tprestart_rc) { ht_ent_mname *tabent; lv_val *inuse_lv; int elemindx; mv_stent *mvc; lvscan_blk *lvscan, *newlvscan; lvTree *lvt_child; boolean_t var_cloned; assert(curr_lv); assert(LV_IS_BASE_VAR(curr_lv)); assert(curr_lv->tp_var); DBGRFCT((stderr, "\ntp_unwind_restlv: Entered for varname: '%.*s' curr_lv: 0x"lvaddr" save_lv: 0x"lvaddr"\n", restore_ent->key.var_name.len, restore_ent->key.var_name.addr, curr_lv, save_lv)); DBGRFCT((stderr, "tp_unwind_restlv: tp_pointer/current: fp: 0x"lvaddr"/0x"lvaddr" mvc: 0x"lvaddr"/0x"lvaddr " symval: 0x"lvaddr"/0x"lvaddr"\n", tp_pointer->fp, frame_pointer, tp_pointer->mvc, mv_chain, tp_pointer->sym, curr_symval)); /* First get the stack in the position where we can actually process this entry. Need to make sure we are processing * the symbol table we need to be processing so unwind enough stuff to get there. */ if (curr_symval != tp_pointer->sym) { /* Unwind as many stackframes as are necessary up to the max */ while((curr_symval != tp_pointer->sym) && (frame_pointer < tp_pointer->fp)) { # ifdef GTM_TRIGGER if (SFT_TRIGR & frame_pointer->type) { /* We have encountered a trigger base frame. We cannot unroll it because there are C frames * associated with it so we must interrupt this tp_restart and return to gtm_trigger() so * it can unroll the base frame and rethrow the error to properly unroll the C stack. */ *tprestart_rc = ERR_TPRETRY; tprestart_state = TPRESTART_STATE_TPUNW; DBGTRIGR((stderr, "tp_unwind: Encountered trigger base frame during M-stack unwind - " "rethrowing\n")); return -1; } # endif op_unwind(); } if (curr_symval != tp_pointer->sym) { /* Unwind as many mv_stents as are necessary up to the max */ mvc = mv_chain; while((curr_symval != tp_pointer->sym) && (mvc < tp_pointer->mvc)) { unw_mv_ent(mvc); mvc = (mv_stent *)(mvc->mv_st_next + (char *)mvc); } mv_chain = mvc; /* Final check */ assertpro(curr_symval == tp_pointer->sym); } } var_cloned = curr_lv->tp_var->var_cloned; if (var_cloned) { /* Var/tree has been copied (and modified) -- see about restoring it */ DBGRFCT((stderr, "\ntp_unwind_restlv: curr_lv was modified and cloned -- needs restoration\n")); if (NULL != restore_ent->key.var_name.addr) { /* Restore data into a named variable (hash table entry) * Step 1 -- find its hash table address to see what lv_val is there now. */ tabent = lookup_hashtab_mname(&((tp_pointer->sym)->h_symtab), &restore_ent->key); assert(tabent); /* Step 2 -- If lv_val is NOT the same as it was, then we must replace the lv_val * currently in use. Decrement its use count (which will delete it and the tree if * it is no longer used) and replace with desired previous lv_val whose use count * was incremented when it was saved. */ if (curr_lv != (inuse_lv = (lv_val *)tabent->value)) /* Note assignment */ { if (inuse_lv) DECR_BASE_REF_RQ(tabent, inuse_lv, FALSE); DBGRFCT((stderr, "tp_unwind: hte 0x"lvaddr" being reset from 0x"lvaddr" to 0x"lvaddr"\n", tabent, tabent->value, curr_lv)); tabent->value = (void *)curr_lv; INCR_TREFCNT(curr_lv); /* Back in the hash table, bump its reference */ } } /* Else, if restoring orphaned data, just prune the old var and copy in the saved tree (if one existed) */ /* Step 3 -- We have the correct lv_val in the hash table now but it has the wrong value. * Get rid of its current tree if any. */ if (lvt_child = LV_GET_CHILD(curr_lv)) /* Note assignment */ { DBGRFCT((stderr, "\ntp_unwind_restlv: Killing children of curr_lv 0x"lvaddr"\n", curr_lv)); assert((lvTreeNode *)curr_lv == LVT_PARENT(lvt_child)); LV_CHILD(curr_lv) = NULL; /* prevent recursion due to alias containers */ lv_killarray(lvt_child, FALSE); } /* Step 4: Copy in the needed fields from the saved flavor lv_val back to curr_lv. * Preserve the ref counts of the current var since the copy's ref counts have not been kept up to date. */ DBGRFCT((stderr, "\ntp_unwind_restlv: Restoring value of lv 0x"lvaddr" back into lv 0x"lvaddr"\n", save_lv, curr_lv)); /* The following is optimized to do the initialization of just the needed structure members. For that it assumes a * particular "lv_val" structure layout. The assumed layout is asserted so any changes to the layout will * automatically show an issue here and cause the below initialization to be accordingly reworked. */ assert(0 == OFFSETOF(lv_val, v)); assert(OFFSETOF(lv_val, v) + SIZEOF(curr_lv->v) == OFFSETOF(lv_val, ptrs)); assert(OFFSETOF(lv_val, ptrs) + SIZEOF(curr_lv->ptrs) == OFFSETOF(lv_val, stats)); assert(OFFSETOF(lv_val, stats) + SIZEOF(curr_lv->stats) == OFFSETOF(lv_val, has_aliascont)); assert(OFFSETOF(lv_val, has_aliascont) + SIZEOF(curr_lv->has_aliascont) == OFFSETOF(lv_val, lvmon_mark)); assert(OFFSETOF(lv_val, lvmon_mark) + SIZEOF(curr_lv->lvmon_mark) == OFFSETOF(lv_val, tp_var)); assert(OFFSETOF(lv_val, tp_var) + SIZEOF(curr_lv->tp_var) == SIZEOF(lv_val)); /* save_lv -> curr_lv Copy begin */ curr_lv->v = save_lv->v; curr_lv->ptrs = save_lv->ptrs; assert(0 < curr_lv->stats.trefcnt); /* No need to copy "stats" as curr_lv is more uptodate */ assert(0 < curr_lv->stats.crefcnt); assert(8 == (OFFSETOF(lv_val, tp_var) - OFFSETOF(lv_val, has_aliascont))); curr_lv->has_aliascont = save_lv->has_aliascont; DBGALS_ONLY(curr_lv->lvmon_mark = save_lv->has_aliascont); assert(save_lv->tp_var == curr_lv->tp_var); /* no need to copy this field */ /* save_lv -> curr_lv Copy done */ /* Some fixup may need to be done if the variable was cloned (and thus moved around) */ curr_lv->tp_var->var_cloned = FALSE; if (lvt_child = LV_GET_CHILD(curr_lv)) { /* Some pointer fix up needs to be done since the owner of the restored tree changed */ assert(LVT_PARENT(lvt_child) == ((lvTreeNode *)curr_lv->tp_var->save_value)); LV_CHILD(save_lv) = NULL; /* now that curr_lv->tp_var->var_cloned has been reset */ LVT_PARENT(lvt_child) = (lvTreeNode *)curr_lv; if (curr_lv->has_aliascont && (NULL != lvscan_anchor)) { /* Some ref counts need to be restored for arrays this tree points to -- but only if the * array contains pointers (alias containers). */ DBGRFCT((stderr, "\ntp_unwind_restlv: Putting lv 0x:"lvaddr" on the lvscan list\n", curr_lv)); /* This array needs to have container pointer target reference counts reestablished. Record * the lv so this can happen after all vars are restored. */ lvscan = *lvscan_anchor; elemindx = lvscan->elemcnt++; /* Note post increment so elemindx has minus-one value */ if (ARY_SCNCNTNR_MAX < elemindx) { /* Present block is full so allocate a new one and chain it on */ lvscan->elemcnt--; /* New element ended up not being in that block.. */ newlvscan = (lvscan_blk *)malloc(SIZEOF(lvscan_blk)); newlvscan->next = lvscan; newlvscan->elemcnt = 1; /* Going to use first one *now* */ elemindx = 0; *lvscan_anchor = newlvscan; lvscan = newlvscan; } assert((ARY_SCNCNTNR_MAX >= elemindx) && (0 <= elemindx)); lvscan->ary_scncntnr[elemindx] = curr_lv; } } } else { DBGRFCT((stderr, "\ntp_unwind_restlv: curr_lv was NOT modified or cloned\n")); assert(NULL == LV_CHILD(save_lv)); assert(!save_lv->tp_var->var_cloned); /* We know that the subscript array underneath curr_lv did not change since saving it into save_lv. But the * unsubscripted lv could have changed (have no way of checking if that is the case) so restore it unconditionally. */ curr_lv->v = save_lv->v; /* No need to copy "save_lv->ptrs" as "ptrs" contains 2 fields both of which are already correct in "curr_lv" */ assert(save_lv->ptrs.val_ent.parent.sym == curr_lv->ptrs.val_ent.parent.sym); assert(NULL == save_lv->ptrs.val_ent.children); /* No need to copy "save_lv->stats" as "curr_lv->stats" is more uptodate */ assert(save_lv->has_aliascont == curr_lv->has_aliascont); /* no need to copy this field */ assert(save_lv->lvmon_mark == curr_lv->lvmon_mark); /* no need to copy this field */ assert(save_lv->tp_var == curr_lv->tp_var); /* no need to copy this field */ } if (NULL == lvscan_anchor) /* Means this is completely unwinding a nested level so we need to reset the tstartcycle in this * lvval so it gets handled correctly when this lv is encountered again after the restart completes. */ curr_lv->stats.tstartcycle = 0; return 0; }
void bx_boollit_tail(triple *t, boolean_t jmp_type_one, boolean_t jmp_to_next, boolean_t sense, oprtype *addr) /* search the Boolean in t (recursively) for literal leaves; the logic is similar to bx_tail * the rest of the arguments parallel those in bx_boolop and used primarily handling basic Boolean operations (ON, NOR, AND, NAND) * to get the jump target and sense right for the left-hand operand of the operation * jmp_type_one gives the sense of the jump associated with the first operand * jmp_to_next gives whether we need a second jump to complete the operation * sense gives the sense of the requested operation * *addr points the operand for the jump and is eventually used by logic back in the invocation stack to fill in a target location */ { boolean_t sin[ARRAYSIZE(t->operand)], tv[ARRAYSIZE(t->operand)]; int com, comval, dummy, j, neg, num, tvr; mval *mv, *v[ARRAYSIZE(t->operand)]; opctype c; oprtype *i, *p; triple *cob[ARRAYSIZE(t->operand)], *ref0, *tl[ARRAYSIZE(t->operand)]; assert(OCT_BOOL & oc_tab[t->opcode].octype); assert(TRIP_REF == t->operand[0].oprclass); assert((OC_COBOOL != t->opcode) && (OC_COM != t->opcode) || (TRIP_REF == t->operand[1].oprclass)); for (i = t->operand, j = 0; i < ARRAYTOP(t->operand); i++, j++) { /* checkout an operand to see if we can simplify it */ p = i; com = 0; for (tl[j] = i->oprval.tref; OCT_UNARY & oc_tab[(c = tl[j]->opcode)].octype; tl[j] = p->oprval.tref) { /* find the real object of affection; WARNING assignment above */ assert((TRIP_REF == tl[j]->operand[0].oprclass) && (NO_REF == tl[j]->operand[1].oprclass)); com ^= (OC_COM == c); /* if we make a recursive call below, COM matters, but NEG and FORCENUM don't */ p = &tl[j]->operand[0]; } if (OCT_ARITH & oc_tab[c].octype) ex_tail(p); /* chained arithmetic */ else if (OCT_BOOL & oc_tab[c].octype) { /* recursively check an operand */ sin[j] = sense; p = addr; if (!j && !(OCT_REL & oc_tab[t->opcode].octype)) { /* left hand operand of parent */ sin[j] = jmp_type_one; if (jmp_to_next) { /* left operands need extra attention to decide between jump next or to the end */ p = (oprtype *)mcalloc(SIZEOF(oprtype)); *p = put_tjmp(t); } } bx_boollit(tl[j], sin[j] ^ com, p); } if ((OC_JMPTRUE != tl[j]->opcode) && (OC_JMPFALSE != tl[j]->opcode) && (OC_LIT != tl[j]->opcode)) return; /* this operation doesn't qualify */ com = comval = neg = num = 0; cob[j] = NULL; for (ref0 = i->oprval.tref; OCT_UNARY & oc_tab[(c = ref0->opcode)].octype; ref0 = ref0->operand[0].oprval.tref) { /* we may be able to clean up this operand; WARNING assignment above */ assert((TRIP_REF == ref0->operand[0].oprclass) && (NO_REF == ref0->operand[1].oprclass)); num += (OC_FORCENUM == c); com += (OC_COM == c); if (!com) /* "outside" com renders neg mute */ neg ^= (OC_NEG == c); if (!comval && (NULL == cob[j])) { if (comval = (OC_COMVAL == c)) /* WARNING assignment */ { if (ref0 != t->operand[j].oprval.tref) dqdel(t->operand[j].oprval.tref, exorder); t->operand[j].oprval.tref = tl[j]; /* need mval: no COBOOL needed */ } else if (OC_COBOOL == c) { /* the operand needs a COBOOL in case its operator remains unresolved */ cob[j] = t->operand[j].oprval.tref; if (ref0 == cob[j]) continue; /* already where it belongs */ cob[j]->opcode = OC_COBOOL; cob[j]->operand[0].oprval.tref = tl[j]; } else if (ref0 == t->operand[j].oprval.tref) continue; } dqdel(ref0, exorder); } assert(ref0 == tl[j]); if (!comval && (NULL == cob[j]) && (tl[j] != t->operand[j].oprval.tref)) { /* left room for a COBOOL, but there's no need */ dqdel(t->operand[j].oprval.tref, exorder); t->operand[j].oprval.tref = tl[j]; } if ((OC_JMPTRUE == ref0->opcode) || (OC_JMPFALSE == ref0->opcode)) { /* switch to a literal representation of TRUE / FALSE */ assert(INDR_REF == ref0->operand[0].oprclass); ref0->operand[1] = ref0->operand[0]; /* track info as we switch opcode */ PUT_LITERAL_TRUTH((sin[j] ? OC_JMPFALSE : OC_JMPTRUE) == ref0->opcode, ref0); ref0->opcode = OC_LIT; com = 0; /* already accounted for by sin */ } assert((OC_LIT == ref0->opcode) && (MLIT_REF == ref0->operand[0].oprclass)); v[j] = &ref0->operand[0].oprval.mlit->v; if (com) { /* any complement reduces the literal value to [unsigned] 1 or 0 */ unuse_literal(v[j]); tv[j] = (0 == v[j]->m[1]); assert(ref0 == tl[j]); PUT_LITERAL_TRUTH(tv[j], ref0); v[j] = &ref0->operand[0].oprval.mlit->v; num = 0; /* any complement trumps num */ } if (neg || num) { /* get literal into uniform state */ unuse_literal(v[j]); mv = (mval *)mcalloc(SIZEOF(mval)); *mv = *v[j]; if (neg) { if (MV_INT & mv->mvtype) { if (0 != mv->m[1]) mv->m[1] = -mv->m[1]; else mv->sgn = 0; } else if (MV_NM & mv->mvtype) mv->sgn = !mv->sgn; } else s2n(mv); n2s(mv); v[j] = mv; assert(ref0 == tl[j]); put_lit_s(v[j], ref0); } } assert(tl[0] != tl[1]); /* start processing a live one */ for (tvr = j, j = 0; j < tvr; j++) { /* both arguments are literals, so do the operation at compile time */ if (NULL != cob[j]) dqdel(cob[j], exorder); v[j] = &tl[j]->operand[0].oprval.mlit->v; tv[j] = (0 != v[j]->m[1]); unuse_literal(v[j]); tl[j]->opcode = OC_NOOP; tl[j]->operand[0].oprclass = NO_REF; } t->operand[1].oprclass = NO_REF; switch (c = t->opcode) /* WARNING assignment */ { /* optimize the Boolean operations here */ case OC_NAND: case OC_AND: tvr = (tv[0] && tv[1]); break; case OC_NOR: case OC_OR: tvr = (tv[0] || tv[1]); break; case OC_NCONTAIN: case OC_CONTAIN: tvr = 1; (void)matchc(v[1]->str.len, (unsigned char *)v[1]->str.addr, v[0]->str.len, (unsigned char *)v[0]->str.addr, &dummy, &tvr); tvr ^= 1; break; case OC_NEQU: case OC_EQU: tvr = is_equ(v[0], v[1]); break; case OC_NFOLLOW: case OC_FOLLOW: tvr = 0 < memvcmp(v[0]->str.addr, v[0]->str.len, v[1]->str.addr, v[1]->str.len); break; case OC_NGT: case OC_GT: tvr = 0 < numcmp(v[0], v[1]); break; case OC_NLT: case OC_LT: tvr = 0 > numcmp(v[0], v[1]); break; case OC_NPATTERN: case OC_PATTERN: tvr = !(*(uint4 *)v[1]->str.addr) ? do_pattern(v[0], v[1]) : do_patfixed(v[0], v[1]); break; case OC_NSORTS_AFTER: case OC_SORTS_AFTER: tvr = 0 < sorts_after(v[0], v[1]); break; default: assertpro(FALSE); } tvr ^= !sense; t->operand[0] = put_indr(addr); t->opcode = tvr ? OC_JMPFALSE : OC_JMPTRUE; return; }
uint4 gdsfilext(uint4 blocks, uint4 filesize, boolean_t trans_in_prog) { sm_uc_ptr_t old_base[2], mmap_retaddr; boolean_t was_crit, is_mm; int result, save_errno, status; DEBUG_ONLY(int first_save_errno); uint4 new_bit_maps, bplmap, map, new_blocks, new_total, max_tot_blks, old_total; uint4 jnl_status; gtm_uint64_t avail_blocks, mmap_sz; off_t new_eof, new_size; trans_num curr_tn; unix_db_info *udi; inctn_opcode_t save_inctn_opcode; int4 prev_extend_blks_to_upgrd; jnl_private_control *jpc; jnl_buffer_ptr_t jbp; cache_rec_ptr_t cr; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; assert(!IS_DSE_IMAGE); assert((cs_addrs->nl == NULL) || (process_id != cs_addrs->nl->trunc_pid)); /* mu_truncate shouldn't extend file... */ assert(!process_exiting); DEBUG_ONLY(old_base[0] = old_base[1] = NULL); assert(!gv_cur_region->read_only); udi = FILE_INFO(gv_cur_region); is_mm = (dba_mm == cs_addrs->hdr->acc_meth); # if !defined(MM_FILE_EXT_OK) if (!udi->grabbed_access_sem && is_mm) return (uint4)(NO_FREE_SPACE); /* should this be changed to show extension not allowed ? */ # endif /* Both blocks and total blocks are unsigned ints so make sure we aren't asking for huge numbers that will overflow and end up doing silly things. */ assert((blocks <= (MAXTOTALBLKS(cs_data) - cs_data->trans_hist.total_blks)) || WBTEST_ENABLED(WBTEST_FILE_EXTEND_ERROR)); # if defined(__sun) || defined(__hpux) cs_data->defer_allocate = TRUE; # endif if (!blocks && (cs_data->defer_allocate || (TRANS_IN_PROG_TRUE == trans_in_prog))) return (uint4)(NO_FREE_SPACE); /* should this be changed to show extension not enabled ? */ bplmap = cs_data->bplmap; /* New total of non-bitmap blocks will be number of current, non-bitmap blocks, plus new blocks desired * There are (bplmap - 1) non-bitmap blocks per bitmap, so add (bplmap - 2) to number of non-bitmap blocks * and divide by (bplmap - 1) to get total number of bitmaps for expanded database. (must round up in this * manner as every non-bitmap block must have an associated bitmap) * Current number of bitmaps is (total number of current blocks + bplmap - 1) / bplmap. * Subtract current number of bitmaps from number needed for expanded database to get number of new bitmaps needed. */ new_bit_maps = DIVIDE_ROUND_UP(cs_data->trans_hist.total_blks - DIVIDE_ROUND_UP(cs_data->trans_hist.total_blks, bplmap) + blocks, bplmap - 1) - DIVIDE_ROUND_UP(cs_data->trans_hist.total_blks, bplmap); new_blocks = blocks + new_bit_maps; assert((0 < (int)new_blocks) || (!cs_data->defer_allocate && (0 == new_blocks))); if (new_blocks + cs_data->trans_hist.total_blks > MAXTOTALBLKS(cs_data)) { assert(WBTEST_ENABLED(WBTEST_FILE_EXTEND_ERROR)); send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(1) ERR_TOTALBLKMAX); return (uint4)(NO_FREE_SPACE); } if (0 != (save_errno = disk_block_available(udi->fd, &avail_blocks, FALSE))) { send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region), save_errno); rts_error_csa(CSA_ARG(cs_addrs) VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region), save_errno); } else { if (!(gtmDebugLevel & GDL_IgnoreAvailSpace)) { /* Bypass this space check if debug flag above is on. Allows us to create a large sparce DB * in space it could never fit it if wasn't sparse. Needed for some tests. */ avail_blocks = avail_blocks / (cs_data->blk_size / DISK_BLOCK_SIZE); if ((blocks * EXTEND_WARNING_FACTOR) > avail_blocks) { if (blocks > (uint4)avail_blocks) { if (!INST_FREEZE_ON_NOSPC_ENABLED(cs_addrs)) return (uint4)(NO_FREE_SPACE); else send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(6) MAKE_MSG_WARNING(ERR_NOSPACEEXT), 4, DB_LEN_STR(gv_cur_region), new_blocks, (uint4)avail_blocks); } else send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(5) ERR_DSKSPACEFLOW, 3, DB_LEN_STR(gv_cur_region), (uint4)(avail_blocks - ((new_blocks <= avail_blocks) ? new_blocks : 0))); } } } # ifdef DEBUG if (WBTEST_ENABLED(WBTEST_MM_CONCURRENT_FILE_EXTEND) && dollar_tlevel && !MEMCMP_LIT(gv_cur_region->rname, "DEFAULT")) { SYSTEM("$gtm_dist/mumps -run $gtm_wbox_mrtn"); assert(1 == cs_addrs->nl->wbox_test_seq_num); /* should have been set by mubfilcpy */ cs_addrs->nl->wbox_test_seq_num = 2; /* signal mupip backup to stop sleeping in mubfilcpy */ } # endif /* From here on, we need to use GDSFILEXT_CLNUP before returning to the caller */ was_crit = cs_addrs->now_crit; assert(!cs_addrs->hold_onto_crit || was_crit); /* If we are coming from mupip_extend (which gets crit itself) we better have waited for any unfreezes to occur. * If we are coming from online rollback (when that feature is available), we will come in holding crit and in * the final retry. In that case too, we expect to have waited for unfreezes to occur in the caller itself. * Therefore if we are coming in holding crit from MUPIP, we expect the db to be unfrozen so no need to wait for * freeze. * If we are coming from GT.M and final retry (in which case we come in holding crit) we expect to have waited * for any unfreezes (by invoking tp_crit_all_regions) to occur (TP or non-TP) before coming into this * function. However, there is one exception. In the final retry, if tp_crit_all_regions notices that * at least one of the participating regions did ONLY READs, it will not wait for any freeze on THAT region * to complete before grabbing crit. Later, in the final retry, if THAT region did an update which caused * op_tcommit to invoke bm_getfree->gdsfilext, then we would have come here with a frozen region on which * we hold crit. */ assert(!was_crit || !FROZEN_HARD(cs_data) || (dollar_tlevel && (CDB_STAGNATE <= t_tries))); /* * If we are in the final retry and already hold crit, it is possible that csa->nl->wc_blocked is also set to TRUE * (by a concurrent process in phase2 which encountered an error in the midst of commit and secshr_db_clnup * finished the job for it). In this case we do NOT want to invoke wcs_recover as that will update the "bt" * transaction numbers without correspondingly updating the history transaction numbers (effectively causing * a cdb_sc_blkmod type of restart). Therefore do NOT call grab_crit (which unconditionally invokes wcs_recover) * if we already hold crit. */ if (!was_crit) { for ( ; ; ) { grab_crit(gv_cur_region); if (FROZEN_CHILLED(cs_data)) DO_CHILLED_AUTORELEASE(cs_addrs, cs_data); if (!FROZEN(cs_data) && !IS_REPL_INST_FROZEN) break; rel_crit(gv_cur_region); while (FROZEN(cs_data) || IS_REPL_INST_FROZEN) { hiber_start(1000); if (FROZEN_CHILLED(cs_data) && CHILLED_AUTORELEASE(cs_data)) break; } } } else if (FROZEN_HARD(cs_data) && dollar_tlevel) { /* We don't want to continue with file extension as explained above. Hence return with an error code which * op_tcommit will recognize (as a cdb_sc_needcrit/cdb_sc_instancefreeze type of restart) and restart accordingly. */ assert(CDB_STAGNATE <= t_tries); GDSFILEXT_CLNUP; return (uint4)FINAL_RETRY_FREEZE_PROG; } else WAIT_FOR_REGION_TO_UNCHILL(cs_addrs, cs_data); if (IS_REPL_INST_FROZEN && trans_in_prog) { assert(CDB_STAGNATE <= t_tries); GDSFILEXT_CLNUP; return (uint4)FINAL_RETRY_INST_FREEZE; } assert(cs_addrs->ti->total_blks == cs_data->trans_hist.total_blks); old_total = cs_data->trans_hist.total_blks; if (old_total != filesize) { /* Somebody else has already extended it, since we are in crit, this is trust-worthy. However, in case of MM, * we still need to remap the database */ assert((old_total > filesize) || !is_mm); /* For BG, someone else could have truncated or extended - we have no idea */ GDSFILEXT_CLNUP; return (SS_NORMAL); } if (trans_in_prog && SUSPICIOUS_EXTEND) { if (!was_crit) { GDSFILEXT_CLNUP; return (uint4)(EXTEND_SUSPECT); } /* If free_blocks counter is not ok, then correct it. Do the check again. If still fails, then it means we held * crit through bm_getfree into gdsfilext and still didn't get it right. */ assertpro(!is_free_blks_ctr_ok() && !SUSPICIOUS_EXTEND); } if (JNL_ENABLED(cs_data)) { if (!jgbl.dont_reset_gbl_jrec_time) SET_GBL_JREC_TIME; /* needed before jnl_ensure_open as that can write jnl records */ jpc = cs_addrs->jnl; jbp = jpc->jnl_buff; /* Before writing to jnlfile, adjust jgbl.gbl_jrec_time if needed to maintain time order * of jnl records. This needs to be done BEFORE the jnl_ensure_open as that could write * journal records (if it decides to switch to a new journal file). */ ADJUST_GBL_JREC_TIME(jgbl, jbp); jnl_status = jnl_ensure_open(gv_cur_region, cs_addrs); if (jnl_status) { GDSFILEXT_CLNUP; send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(6) jnl_status, 4, JNL_LEN_STR(cs_data), DB_LEN_STR(gv_cur_region)); return (uint4)(NO_FREE_SPACE); /* should have better return status */ } } if (is_mm) { cs_addrs->nl->mm_extender_pid = process_id; status = wcs_wtstart(gv_cur_region, 0, NULL, NULL); cs_addrs->nl->mm_extender_pid = 0; assertpro(SS_NORMAL == status); old_base[0] = cs_addrs->db_addrs[0]; old_base[1] = cs_addrs->db_addrs[1]; cs_addrs->db_addrs[0] = NULL; /* don't rely on it until the mmap below */ # ifdef _AIX status = shmdt(old_base[0] - BLK_ZERO_OFF(cs_data->start_vbn)); # else status = munmap((caddr_t)old_base[0], (size_t)(old_base[1] - old_base[0])); # endif if (0 != status) { save_errno = errno; GDSFILEXT_CLNUP; send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(12) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region), ERR_SYSCALL, 5, LEN_AND_STR(MEM_UNMAP_SYSCALL), CALLFROM, save_errno); return (uint4)(NO_FREE_SPACE); } } else { /* Due to concurrency issues, it is possible some process had issued a disk read of the GDS block# corresponding * to "old_total" right after a truncate wrote a GDS-block of zeros on disk (to signal end of the db file). * If so, the global buffer containing this block needs to be invalidated now as part of the extend. If not, it is * possible the EOF block on disk is now going to be overwritten by a properly initialized bitmap block (as part * of the gdsfilext below) while the global buffer continues to have an incorrect copy of that bitmap block and * this in turn would cause XXXX failures due to a bad bitmap block in shared memory. (GTM-7519) */ cr = db_csh_get((block_id)old_total); if ((NULL != cr) && ((cache_rec_ptr_t)CR_NOTVALID != cr)) { assert((0 == cr->dirty) && (0 == cr->bt_index) && !cr->stopped); cr->cycle++; cr->blk = CR_BLKEMPTY; } } CHECK_TN(cs_addrs, cs_data, cs_data->trans_hist.curr_tn); /* can issue rts_error TNTOOLARGE */ new_total = old_total + new_blocks; new_eof = BLK_ZERO_OFF(cs_data->start_vbn) + ((off_t)new_total * cs_data->blk_size); # if !defined(__sun) && !defined(__hpux) if (!cs_data->defer_allocate) { new_size = new_eof + cs_data->blk_size; save_errno = posix_fallocate(udi->fd, 0, new_size); DEBUG_ONLY(first_save_errno = save_errno); if ((ENOSPC == save_errno) && IS_GTM_IMAGE) save_errno = extend_wait_for_fallocate(udi, new_size); if (0 != save_errno) { GDSFILEXT_CLNUP; assert(ENOSPC == save_errno); if (ENOSPC != save_errno) send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(5) ERR_PREALLOCATEFAIL, 2, DB_LEN_STR(gv_cur_region), save_errno); return (uint4)(NO_FREE_SPACE); } } # endif save_errno = db_write_eof_block(udi, udi->fd, cs_data->blk_size, new_eof, &(TREF(dio_buff))); if ((ENOSPC == save_errno) && IS_GTM_IMAGE) save_errno = extend_wait_for_write(udi, cs_data->blk_size, new_eof); if (0 != save_errno) { GDSFILEXT_CLNUP; if (ENOSPC != save_errno) send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region), save_errno); return (uint4)(NO_FREE_SPACE); } if (WBTEST_ENABLED(WBTEST_FILE_EXTEND_INTERRUPT_1)) { LONG_SLEEP(600); assert(FALSE); } /* Ensure the EOF and metadata get to disk BEFORE any bitmap writes. Otherwise, the file size could no longer reflect * a proper extent and subsequent invocations of gdsfilext could corrupt the database. */ if (!IS_STATSDB_CSA(cs_addrs)) { GTM_DB_FSYNC(cs_addrs, udi->fd, status); assert(0 == status); if (0 != status) { GDSFILEXT_CLNUP; send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(8) ERR_DBFILERR, 5, RTS_ERROR_LITERAL("fsync1()"), CALLFROM, status); return (uint4)(NO_FREE_SPACE); } } if (WBTEST_ENABLED(WBTEST_FILE_EXTEND_INTERRUPT_2)) { LONG_SLEEP(600); assert(FALSE); /* Should be killed before that */ } DEBUG_ONLY(prev_extend_blks_to_upgrd = cs_data->blks_to_upgrd;)
void view_arg_convert(viewtab_entry *vtp, int vtp_parm, mval *parm, viewparm *parmblk, boolean_t is_dollar_view) { static int4 first_time = TRUE; char *cptr; char *strtokptr; gd_binding *gd_map; gd_region *gd_reg_start, *r_ptr, *r_top; gvnh_reg_t *gvnh_reg; gvnh_spanreg_t *gvspan; gv_namehead *tmp_gvt; ht_ent_mname *tabent; int n, reg_index; mident_fixed lcl_buff; mname_entry gvent, lvent; mstr namestr, tmpstr; unsigned char *c, *c_top, *dst, *dst_top, global_names[1024], *nextsrc, *src, *src_top, stashed, y; switch (vtp_parm) { case VTP_NULL: if (parm != 0) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_VIEWARGCNT, 2, strlen((const char *)vtp->keyword), vtp->keyword); break; case (VTP_NULL | VTP_VALUE): if (NULL == parm) { parmblk->value = (mval *)&literal_one; break; } /* caution: fall through */ case VTP_VALUE: if (NULL == parm) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_VIEWARGCNT, 2, strlen((const char *)vtp->keyword), vtp->keyword); parmblk->value = parm; break; case (VTP_NULL | VTP_DBREGION): if (!is_dollar_view && ((NULL == parm) || ((1 == parm->str.len) && ('*' == *parm->str.addr)))) { parmblk->gv_ptr = NULL; break; } /* caution: fall through */ case VTP_DBREGION: if (NULL == parm) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_VIEWARGCNT, 2, strlen((const char *)vtp->keyword), vtp->keyword); if (!gd_header) /* IF GD_HEADER ==0 THEN OPEN GBLDIR */ gvinit(); r_ptr = gd_header->regions; if (!parm->str.len && vtp->keycode == VTK_GVNEXT) /* "" => 1st region */ parmblk->gv_ptr = r_ptr; else { for (cptr = parm->str.addr, n = 0; n < parm->str.len; cptr++, n++) lcl_buff.c[n] = TOUPPER(*cptr); /* Region names are upper-case ASCII */ namestr.len = n; namestr.addr = &lcl_buff.c[0]; for (r_top = r_ptr + gd_header->n_regions; ; r_ptr++) { if (r_ptr >= r_top) { format2zwr((sm_uc_ptr_t)parm->str.addr, parm->str.len, global_names, &n); rts_error_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_NOREGION,2, n, global_names); } tmpstr.len = r_ptr->rname_len; tmpstr.addr = (char *)r_ptr->rname; MSTR_CMP(tmpstr, namestr, n); if (0 == n) break; } parmblk->gv_ptr = r_ptr; } break; case VTP_DBKEY: if (NULL == parm) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_VIEWARGCNT, 2, strlen((const char *)vtp->keyword), vtp->keyword); if (!parm->str.len) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_NOTGBL, 2, parm->str.len, NULL); if (!gd_header) /* IF GD_HEADER ==0 THEN OPEN GBLDIR */ gvinit(); c = (unsigned char *)parm->str.addr; if ('^' != *c) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_NOTGBL, 2, parm->str.len, c); c_top = c + parm->str.len; c++; /* skip initial '^' */ parmblk->str.addr = (char *)c; for ( ; (c < c_top) && ('(' != *c); c++) ; parmblk->str.len = (char *)c - parmblk->str.addr; if (MAX_MIDENT_LEN < parmblk->str.len) parmblk->str.len = MAX_MIDENT_LEN; if (!valid_mname(&parmblk->str)) { format2zwr((sm_uc_ptr_t)parm->str.addr, parm->str.len, global_names, &n); rts_error_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_VIEWGVN, 2, n, global_names); } break; case VTP_RTNAME: if (NULL == parm) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_VIEWARGCNT, 2, strlen((const char *)vtp->keyword), vtp->keyword); memset(&parmblk->ident.c[0], 0, SIZEOF(parmblk->ident)); if (parm->str.len > 0) memcpy(&parmblk->ident.c[0], parm->str.addr, (parm->str.len <= MAX_MIDENT_LEN ? parm->str.len : MAX_MIDENT_LEN)); break; case VTP_NULL | VTP_DBKEYLIST: if (NULL == parm || 0 == parm->str.len) { parmblk->ni_list.gvnh_list = NULL; parmblk->ni_list.type = NOISOLATION_NULL; break; } /* caution : explicit fall through */ case VTP_DBKEYLIST: if (NULL == parm) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_VIEWARGCNT, 2, strlen((const char *)vtp->keyword), vtp->keyword); if (!gd_header) gvinit(); if (first_time) { noisolation_buddy_list = (buddy_list *)malloc(SIZEOF(buddy_list)); initialize_list(noisolation_buddy_list, SIZEOF(noisolation_element), NOISOLATION_INIT_ALLOC); gvt_pending_buddy_list = (buddy_list *)malloc(SIZEOF(buddy_list)); initialize_list(gvt_pending_buddy_list, SIZEOF(gvt_container), NOISOLATION_INIT_ALLOC); first_time = FALSE; } assertpro(SIZEOF(global_names) > parm->str.len); tmpstr.len = parm->str.len; /* we need to change len and should not change parm->str, so take a copy */ tmpstr.addr = parm->str.addr; if (0 != tmpstr.len) { switch (*tmpstr.addr) { case '+' : parmblk->ni_list.type = NOISOLATION_PLUS; tmpstr.addr++; tmpstr.len--; break; case '-' : parmblk->ni_list.type = NOISOLATION_MINUS; tmpstr.addr++; tmpstr.len--; break; default : parmblk->ni_list.type = NOISOLATION_NULL; break; } if (!tmpstr.len) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_VIEWGVN, 2, tmpstr.len, NULL); memcpy(global_names, tmpstr.addr, tmpstr.len); global_names[tmpstr.len] = '\0'; src = (unsigned char *)STRTOK_R((char *)global_names, ",", &strtokptr); REINITIALIZE_LIST(noisolation_buddy_list); /* reinitialize the noisolation buddy_list */ parmblk->ni_list.gvnh_list = NULL; for ( ; src < &global_names[tmpstr.len + 1]; src = nextsrc) { nextsrc = (unsigned char *)STRTOK_R(NULL, ",", &strtokptr); if (NULL == nextsrc) nextsrc = &global_names[tmpstr.len + 1]; if (nextsrc - src >= 2 && '^' == *src) { namestr.addr = (char *)src + 1; /* skip initial '^' */ namestr.len = INTCAST(nextsrc - src - 2); /* don't count initial ^ and trailing 0 */ if (namestr.len > MAX_MIDENT_LEN) namestr.len = MAX_MIDENT_LEN; if (valid_mname(&namestr)) { memcpy(&lcl_buff.c[0], namestr.addr, namestr.len); gvent.var_name.len = namestr.len; } else { memcpy(&lcl_buff.c[0], src, nextsrc - src - 1); format2zwr((sm_uc_ptr_t)&lcl_buff.c, nextsrc - src - 1, global_names, &n); rts_error_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_VIEWGVN, 2, n, global_names); } } else { memcpy(&lcl_buff.c[0], src, nextsrc - src - 1); format2zwr((sm_uc_ptr_t)&lcl_buff.c, nextsrc - src - 1, global_names, &n); rts_error_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_VIEWGVN, 2, n, global_names); } tmp_gvt = NULL; gvent.var_name.addr = &lcl_buff.c[0]; COMPUTE_HASH_MNAME(&gvent); if (NULL != (tabent = lookup_hashtab_mname(gd_header->tab_ptr, &gvent))) { gvnh_reg = (gvnh_reg_t *)tabent->value; assert(NULL != gvnh_reg); tmp_gvt = gvnh_reg->gvt; } else { gd_map = gv_srch_map(gd_header, gvent.var_name.addr, gvent.var_name.len, SKIP_BASEDB_OPEN_FALSE); r_ptr = gd_map->reg.addr; tmp_gvt = (gv_namehead *)targ_alloc(r_ptr->max_key_size, &gvent, r_ptr); GVNH_REG_INIT(gd_header, gd_header->tab_ptr, gd_map, tmp_gvt, r_ptr, gvnh_reg, tabent); /* In case of a global spanning multiple regions, the gvt pointer corresponding to * the region where the unsubscripted global reference maps to is stored in TWO * locations (one in gvnh_reg->gvspan->gvt_array[index] and one in gvnh_reg->gvt. * So pass in both these pointer addresses to be stored in the pending list in * case this gvt gets reallocated (due to different keysizes between gld and db). */ if (NULL == (gvspan = gvnh_reg->gvspan)) { ADD_TO_GVT_PENDING_LIST_IF_REG_NOT_OPEN(r_ptr, &gvnh_reg->gvt, NULL); } else { gd_reg_start = &gd_header->regions[0]; GET_REG_INDEX(gd_header, gd_reg_start, r_ptr, reg_index); /* the above sets "reg_index" */ assert(reg_index >= gvspan->min_reg_index); assert(reg_index <= gvspan->max_reg_index); reg_index -= gvspan->min_reg_index; ADD_TO_GVT_PENDING_LIST_IF_REG_NOT_OPEN(r_ptr, &gvspan->gvt_array[reg_index], &gvnh_reg->gvt); } } ADD_GVT_TO_VIEW_NOISOLATION_LIST(tmp_gvt, parmblk); if (!is_dollar_view && (NULL != gvnh_reg->gvspan)) { /* Global spans multiple regions. Make sure gv_targets corresponding to ALL * spanned regions are allocated so NOISOLATION status can be set in all of * them even if the corresponding regions are not open yet. Do this only for * VIEW "NOISOLATION" commands which change the noisolation characteristic. * $VIEW("NOISOLATION") only examines the characteristics and so no need to * allocate all the gv-targets in that case. Just one is enough. */ gvnh_spanreg_subs_gvt_init(gvnh_reg, gd_header, parmblk); } } } else rts_error_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_VIEWGVN, 2, tmpstr.len, tmpstr.addr); break; case VTP_LVN: if (NULL == parm) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_VIEWARGCNT, 2, strlen((const char *)vtp->keyword), vtp->keyword); if (0 < parm->str.len) { lvent.var_name.addr = parm->str.addr; lvent.var_name.len = parm->str.len; if (lvent.var_name.len > MAX_MIDENT_LEN) lvent.var_name.len = MAX_MIDENT_LEN; if (!valid_mname(&lvent.var_name)) { format2zwr((sm_uc_ptr_t)parm->str.addr, parm->str.len, global_names, &n); rts_error_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_VIEWLVN, 2, n, global_names); } } else rts_error_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_VIEWLVN, 2, parm->str.len, parm->str.addr); /* Now look up the name.. */ COMPUTE_HASH_MNAME(&lvent); if ((tabent = lookup_hashtab_mname(&curr_symval->h_symtab, &lvent)) && (NULL != tabent->value)) parmblk->value = (mval *)tabent->value; /* Return lv_val ptr */ else rts_error_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_VIEWLVN, 2, parm->str.len, parm->str.addr); break; default: assertpro(FALSE && vtp_parm); } }
uint4 jnl_file_lost(jnl_private_control *jpc, uint4 jnl_stat) { /* Notify operator and terminate journaling */ unsigned int status; sgmnt_addrs *csa; seq_num reg_seqno, jnlseqno; boolean_t was_lockid = FALSE, instfreeze_environ; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; switch(jpc->region->dyn.addr->acc_meth) { case dba_mm: case dba_bg: csa = &FILE_INFO(jpc->region)->s_addrs; break; default: assertpro(FALSE && jpc->region->dyn.addr->acc_meth); } # ifdef VMS /* The following assert has been removed as it could be FALSE if the caller is "jnl_file_extend" * assert(0 != memcmp(csa->nl->jnl_file.jnl_file_id.fid, zero_fid, SIZEOF(zero_fid))); */ # endif assert(csa->now_crit); /* We issue an rts_error (instead of shutting off journaling) in the following cases : {BYPASSOK} * 1) $gtm_error_on_jnl_file_lost is set to issue runtime error (if not already issued) in case of journaling issues. * 2) The process has the given message set in $gtm_custom_errors (indicative of instance freeze on error setup) * in which case the goal is to never shut-off journaling */ UNIX_ONLY(assert(jnlpool.jnlpool_ctl == jnlpool_ctl)); UNIX_ONLY(instfreeze_environ = INST_FREEZE_ON_MSG_ENABLED(csa, jnl_stat)); VMS_ONLY(instfreeze_environ = FALSE); if ((JNL_FILE_LOST_ERRORS == TREF(error_on_jnl_file_lost)) || instfreeze_environ) { VMS_ONLY(assert(FALSE)); /* Not fully implemented / supported on VMS. */ if (!process_exiting || instfreeze_environ || !csa->jnl->error_reported) { csa->jnl->error_reported = TRUE; in_wcs_recover = FALSE; /* in case we're called in wcs_recover() */ if (SS_NORMAL != jpc->status) rts_error_csa(CSA_ARG(csa) VARLSTCNT(7) jnl_stat, 4, JNL_LEN_STR(csa->hdr), DB_LEN_STR(gv_cur_region), jpc->status); else rts_error_csa(CSA_ARG(csa) VARLSTCNT(6) jnl_stat, 4, JNL_LEN_STR(csa->hdr), DB_LEN_STR(gv_cur_region)); } return jnl_stat; } if (0 != jnl_stat) jnl_send_oper(jpc, jnl_stat); csa->hdr->jnl_state = jnl_closed; jpc->jnl_buff->cycle++; /* increment shared cycle so all future callers of jnl_ensure_open recognize journal switch */ assert(jpc->cycle < jpc->jnl_buff->cycle); if (REPL_ENABLED(csa->hdr)) { csa->hdr->repl_state = repl_was_open; reg_seqno = csa->hdr->reg_seqno; jnlseqno = (NULL != jnlpool.jnlpool_ctl) ? jnlpool.jnlpool_ctl->jnl_seqno : MAX_SEQNO; send_msg_csa(CSA_ARG(csa) VARLSTCNT(8) ERR_REPLJNLCLOSED, 6, DB_LEN_STR(jpc->region), ®_seqno, ®_seqno, &jnlseqno, &jnlseqno); } else send_msg_csa(CSA_ARG(csa) VARLSTCNT(5) ERR_JNLCLOSED, 3, DB_LEN_STR(jpc->region), &csa->ti->curr_tn); #ifdef VMS /* We can get a jnl_file_lost before the file is even created, so locking is done only if the lock exist */ if (0 != csa->jnl->jnllsb->lockid) { was_lockid = TRUE; status = gtm_enqw(EFN$C_ENF, LCK$K_EXMODE, csa->jnl->jnllsb, LCK$M_CONVERT | LCK$M_NODLCKBLK, NULL, 0, NULL, 0, NULL, PSL$C_USER, 0); if (SS$_NORMAL == status) status = csa->jnl->jnllsb->cond; } jnl_file_close(jpc->region, FALSE, FALSE); if (was_lockid) { if (SS$_NORMAL == status) status = gtm_deq(csa->jnl->jnllsb->lockid, NULL, PSL$C_USER, 0); assertpro(SS$_NORMAL == status); } # else jnl_file_close(jpc->region, FALSE, FALSE); #endif return EXIT_NRM; }
/* * This will rundown a replication instance journal (and receiver) pool. * Input Parameter: * replpool_id of the instance. Instance file name must be null terminated in replpool_id. * Returns : * TRUE, if successful. * FALSE, otherwise. */ boolean_t mu_rndwn_repl_instance(replpool_identifier *replpool_id, boolean_t immediate, boolean_t rndwn_both_pools, boolean_t *jnlpool_sem_created) { boolean_t jnlpool_stat = SS_NORMAL, recvpool_stat = SS_NORMAL, decr_cnt, sem_created = FALSE, ipc_rmvd; char *instfilename; unsigned char ipcs_buff[MAX_IPCS_ID_BUF], *ipcs_ptr; gd_region *r_save; repl_inst_hdr repl_instance; static gd_region *reg = NULL; struct semid_ds semstat; struct shmid_ds shmstat; unix_db_info *udi; int save_errno, sem_id, shm_id, status; sgmnt_addrs *repl_csa; boolean_t was_crit; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; if (NULL == reg) { r_save = gv_cur_region; mu_gv_cur_reg_init(); reg = gv_cur_region; gv_cur_region = r_save; } *jnlpool_sem_created = FALSE; /* Assert that the layout of replpool_identifier is identical for all versions going forward as the function * "validate_replpool_shm_entry" (used by the argumentless mupip rundown aka "mupip rundown") relies on this. * This assert is placed here (instead of there) because the automated tests exercise this logic much more * than the argumentless code. If any of these asserts fail, "validate_replpool_shm_entry" needs to change * to handle the old and new layouts. * * Structure ----> replpool_identifier <---- size 312 [0x0138] * * offset = 0000 [0x0000] size = 0012 [0x000c] ----> replpool_identifier.label * offset = 0012 [0x000c] size = 0001 [0x0001] ----> replpool_identifier.pool_type * offset = 0013 [0x000d] size = 0036 [0x0024] ----> replpool_identifier.now_running * offset = 0052 [0x0034] size = 0004 [0x0004] ----> replpool_identifier.repl_pool_key_filler * offset = 0056 [0x0038] size = 0256 [0x0100] ----> replpool_identifier.instfilename */ assert(0 == OFFSETOF(replpool_identifier, label[0])); assert(12 == SIZEOF(((replpool_identifier *)NULL)->label)); assert(12 == OFFSETOF(replpool_identifier, pool_type)); assert(1 == SIZEOF(((replpool_identifier *)NULL)->pool_type)); assert(13 == OFFSETOF(replpool_identifier, now_running[0])); assert(36 == SIZEOF(((replpool_identifier *)NULL)->now_running)); assert(56 == OFFSETOF(replpool_identifier, instfilename[0])); assert(256 == SIZEOF(((replpool_identifier *)NULL)->instfilename)); /* End asserts */ jnlpool.jnlpool_dummy_reg = reg; recvpool.recvpool_dummy_reg = reg; instfilename = replpool_id->instfilename; reg->dyn.addr->fname_len = strlen(instfilename); assert(0 == instfilename[reg->dyn.addr->fname_len]); memcpy((char *)reg->dyn.addr->fname, instfilename, reg->dyn.addr->fname_len + 1); udi = FILE_INFO(reg); udi->fn = (char *)reg->dyn.addr->fname; /* Lock replication instance using ftok semaphore so that no other replication process can startup until we are done with * rundown */ if (!ftok_sem_get(reg, TRUE, REPLPOOL_ID, immediate)) return FALSE; ESTABLISH_RET(mu_rndwn_repl_instance_ch, FALSE); repl_inst_read(instfilename, (off_t)0, (sm_uc_ptr_t)&repl_instance, SIZEOF(repl_inst_hdr)); assert(rndwn_both_pools || JNLPOOL_SEGMENT == replpool_id->pool_type || RECVPOOL_SEGMENT == replpool_id->pool_type); if (rndwn_both_pools || (JNLPOOL_SEGMENT == replpool_id->pool_type)) { /* -------------------------- * First rundown Journal pool * -------------------------- */ shm_id = repl_instance.jnlpool_shmid; if (SS_NORMAL == (jnlpool_stat = mu_replpool_grab_sem(&repl_instance, JNLPOOL_SEGMENT, &sem_created, immediate))) { /* Got JNL_POOL_ACCESS_SEM and incremented SRC_SRV_COUNT_SEM */ assert(holds_sem[SOURCE][JNL_POOL_ACCESS_SEM]); assert(holds_sem[SOURCE][SRC_SERV_COUNT_SEM]); sem_id = repl_instance.jnlpool_semid; if ((INVALID_SHMID == shm_id) || (-1 == shmctl(shm_id, IPC_STAT, &shmstat)) || (shmstat.shm_ctime != repl_instance.jnlpool_shmid_ctime)) { repl_instance.jnlpool_shmid = shm_id = INVALID_SHMID; repl_instance.jnlpool_shmid_ctime = 0; } assert((INVALID_SHMID != shm_id) || ((NULL == jnlpool.jnlpool_ctl) && (NULL == jnlpool_ctl))); ipc_rmvd = TRUE; if (INVALID_SHMID != shm_id) { replpool_id->pool_type = JNLPOOL_SEGMENT; jnlpool_stat = mu_rndwn_replpool(replpool_id, &repl_instance, shm_id, &ipc_rmvd); ipcs_ptr = i2asc((uchar_ptr_t)ipcs_buff, shm_id); *ipcs_ptr = '\0'; if (rndwn_both_pools && ((SS_NORMAL != jnlpool_stat) || ipc_rmvd)) gtm_putmsg(VARLSTCNT(6) (jnlpool_stat ? ERR_MUJPOOLRNDWNFL : ERR_MUJPOOLRNDWNSUC), 4, LEN_AND_STR(ipcs_buff), LEN_AND_STR(instfilename)); } assert(ipc_rmvd || (NULL != jnlpool_ctl)); assert((NULL == jnlpool.jnlpool_ctl) || (SS_NORMAL == jnlpool_stat) || jgbl.onlnrlbk); assert((INVALID_SHMID != repl_instance.jnlpool_shmid) || (0 == repl_instance.jnlpool_shmid_ctime)); assert((INVALID_SHMID == repl_instance.jnlpool_shmid) || (0 != repl_instance.jnlpool_shmid_ctime)); assert(INVALID_SEMID != sem_id); if (!mur_options.rollback) { /* Invoked by MUPIP RUNDOWN in which case the semaphores needs to be removed. But, remove the * semaphore ONLY if we created it here OR the journal pool was successfully removed. */ if (NULL == jnlpool_ctl) { if (((sem_created || (SS_NORMAL == jnlpool_stat)) && (SS_NORMAL == mu_replpool_release_sem(&repl_instance, JNLPOOL_SEGMENT, TRUE)))) { /* Now that semaphores are removed, reset fields in file header */ if (!sem_created) { /* If sem_id was created by mu_replpool_grab_sem then do NOT report the * MURPOOLRNDWNSUC message as it indicates that the semaphore was orphaned * and we removed it when in fact there was no orphaned semaphore and we * created it as part of mu_replpool_grab_sem to get standalone access to * rundown the receiver pool (which may or may not exist) */ ipcs_ptr = i2asc((uchar_ptr_t)ipcs_buff, sem_id); *ipcs_ptr = '\0'; gtm_putmsg(VARLSTCNT(9) ERR_MUJPOOLRNDWNSUC, 4, LEN_AND_STR(ipcs_buff), LEN_AND_STR(instfilename), ERR_SEMREMOVED, 1, sem_id); } repl_inst_jnlpool_reset(); } } else { /* Anticipatory Freeze scheme is turned ON. So, release just the JNL_POOL_ACCESS_SEM. The * semaphore will be released/removed in the caller (mupip_rundown) */ assert(ANTICIPATORY_FREEZE_AVAILABLE); assertpro(SS_NORMAL == (status = rel_sem(SOURCE, JNL_POOL_ACCESS_SEM))); assert(!holds_sem[SOURCE][JNL_POOL_ACCESS_SEM]); /* Since we are not resetting the semaphore IDs in the file header, we need to write out * the semaphore IDs in the instance file (if we created them). */ if (sem_created) repl_inst_write(instfilename, (off_t)0, (sm_uc_ptr_t)&repl_instance, SIZEOF(repl_inst_hdr)); } /* If semaphore is not created and the journal pool rundown failed (due to attached processes), * rundown process continues to holds the journal pool access control semaphore. This way, we hold * the semaphore on behalf of the source server (now no longer alive) to prevent mu_rndwn_sem_all * (invoked later) from cleaning up this orphaned semaphore (which causes REPLREQROLLBACK if the * source server is restarted). But, since the semaphore is not released (until the rundown process * dies), holds_sem[SOURCE][JNL_POOL_ACCESS_SEM] continues to remain TRUE. This causes asserts in * ftok_sem_get if mu_rndwn_repl_instance is invoked for a different journal/receive pool. To * workaround it, set holds_sem[SOURCE][JNL_POOL_ACCESS_SEM] to FALSE. This is an interim solution * until we record such semaphores in an ignore-list (or some such) and change mu_rndwn_sem_all to * skip the ones that are present in the ignore list. */ holds_sem[SOURCE][JNL_POOL_ACCESS_SEM] = FALSE; } } else if (rndwn_both_pools && (INVALID_SHMID != shm_id)) { ipcs_ptr = i2asc((uchar_ptr_t)ipcs_buff, shm_id); *ipcs_ptr = '\0'; if (rndwn_both_pools) gtm_putmsg(VARLSTCNT(6) ERR_MUJPOOLRNDWNFL, 4, LEN_AND_STR(ipcs_buff), LEN_AND_STR(instfilename)); } *jnlpool_sem_created = sem_created; } if (((SS_NORMAL == jnlpool_stat) || !jgbl.mur_rollback) && (rndwn_both_pools || (RECVPOOL_SEGMENT == replpool_id->pool_type))) { /* -------------------------- * Now rundown Receivpool * -------------------------- * Note: RECVPOOL is rundown ONLY if the JNLPOOL rundown was successful. This way, we don't end up * creating new semaphores for the RECVPOOL if ROLLBACK is not going to start anyways because of the failed * JNLPOOL rundown. The only exception is MUPIP RUNDOWN command in which case we try running down the * RECVPOOL even if the JNLPOOL rundown failed. */ shm_id = repl_instance.recvpool_shmid; if (SS_NORMAL == (recvpool_stat = mu_replpool_grab_sem(&repl_instance, RECVPOOL_SEGMENT, &sem_created, immediate))) { sem_id = repl_instance.recvpool_semid; if ((INVALID_SHMID == shm_id) || (-1 == shmctl(shm_id, IPC_STAT, &shmstat)) || (shmstat.shm_ctime != repl_instance.recvpool_shmid_ctime)) { repl_instance.recvpool_shmid = shm_id = INVALID_SHMID; repl_instance.recvpool_shmid_ctime = 0; } ipc_rmvd = TRUE; if (INVALID_SHMID != shm_id) { replpool_id->pool_type = RECVPOOL_SEGMENT; recvpool_stat = mu_rndwn_replpool(replpool_id, &repl_instance, shm_id, &ipc_rmvd); ipcs_ptr = i2asc((uchar_ptr_t)ipcs_buff, shm_id); *ipcs_ptr = '\0'; if (rndwn_both_pools && ((SS_NORMAL != recvpool_stat) || ipc_rmvd)) gtm_putmsg(VARLSTCNT(6) (recvpool_stat ? ERR_MURPOOLRNDWNFL : ERR_MURPOOLRNDWNSUC), 4, LEN_AND_STR(ipcs_buff), LEN_AND_STR(instfilename)); } assert((TRUE == ipc_rmvd) || (SS_NORMAL != recvpool_stat) || jgbl.onlnrlbk); assert((INVALID_SHMID != repl_instance.recvpool_shmid) || (0 == repl_instance.recvpool_shmid_ctime)); assert((INVALID_SHMID == repl_instance.recvpool_shmid) || (0 != repl_instance.recvpool_shmid_ctime)); assert(INVALID_SEMID != sem_id); if (!mur_options.rollback) { /* Invoked by MUPIP RUNDOWN in which case the semaphores needs to be removed. But, remove the * semaphore ONLY if we created it here OR the receive pool was successfully removed. */ if ((sem_created || (SS_NORMAL == recvpool_stat)) && (SS_NORMAL == mu_replpool_release_sem(&repl_instance, RECVPOOL_SEGMENT, TRUE))) { /* Now that semaphores are removed, reset fields in file header */ if (!sem_created) { /* if sem_id was "created" by mu_replpool_grab_sem then do NOT report the * MURPOOLRNDWNSUC message as it indicates that the semaphore was orphaned and we * removed it when in fact there was no orphaned semaphore and we "created" it as * part of mu_replpool_grab_sem to get standalone access to rundown the receiver * pool (which may or may not exist) */ ipcs_ptr = i2asc((uchar_ptr_t)ipcs_buff, sem_id); *ipcs_ptr = '\0'; gtm_putmsg(VARLSTCNT(9) ERR_MURPOOLRNDWNSUC, 4, LEN_AND_STR(ipcs_buff), LEN_AND_STR(instfilename), ERR_SEMREMOVED, 1, sem_id); } if (NULL != jnlpool_ctl) { /* Journal pool is not yet removed. So, grab lock before resetting semid/shmid * fields in the file header as the function expects the caller to hold crit * if the journal pool is available */ repl_csa = &FILE_INFO(jnlpool.jnlpool_dummy_reg)->s_addrs; assert(!repl_csa->now_crit); assert(!repl_csa->hold_onto_crit); was_crit = repl_csa->now_crit; /* Since we do grab_lock, below, we need to do a per-process initialization. Also, * start heartbeat so that grab_lock can issue MUTEXLCKALERT and get C-stacks if * waiting for crit */ START_HEARTBEAT_IF_NEEDED; mutex_per_process_init(); if (!was_crit) grab_lock(jnlpool.jnlpool_dummy_reg, TRUE, GRAB_LOCK_ONLY); } repl_inst_recvpool_reset(); if ((NULL != jnlpool_ctl) && !was_crit) rel_lock(jnlpool.jnlpool_dummy_reg); } /* If semaphore is not created and the receive pool rundown failed (due to attached processes), * rundown process continues to holds the receive pool access control semaphore. This way, we hold * the semaphore on behalf of the receiver server (now no longer alive) to prevent mu_rndwn_sem_all * (invoked later) from cleaning up this orphaned semaphore (which causes REPLREQROLLBACK if the * receiver is restarted). But, since the semaphore is not released (until the rundown process * dies), holds_sem[RECV][RECV_POOL_ACCESS_SEM] continues to remain TRUE. This causes asserts in * ftok_sem_get if mu_rndwn_repl_instance is invoked for a different journal/receive pool. To * workaround it, set holds_sem[SOURCE][RECV_POOL_ACCESS_SEM] to FALSE. This is an interim solution * until we record such semaphores in an ignore-list (or some such) and change mu_rndwn_sem_all to * skip the ones that are present in the ignore list. */ assert((sem_created || (SS_NORMAL == recvpool_stat)) || holds_sem[RECV][RECV_POOL_ACCESS_SEM]); DEBUG_ONLY(set_sem_set_recvr(sem_id)); } } else if (rndwn_both_pools && (INVALID_SHMID != shm_id)) { ipcs_ptr = i2asc((uchar_ptr_t)ipcs_buff, shm_id); *ipcs_ptr = '\0'; if (rndwn_both_pools) gtm_putmsg(VARLSTCNT(6) ERR_MURPOOLRNDWNFL, 4, LEN_AND_STR(ipcs_buff), LEN_AND_STR(instfilename)); } } assert(jgbl.onlnrlbk || ANTICIPATORY_FREEZE_AVAILABLE || (NULL == jnlpool.repl_inst_filehdr)); if (mur_options.rollback && (SS_NORMAL == jnlpool_stat) && (SS_NORMAL == recvpool_stat)) { assert(jgbl.onlnrlbk || ANTICIPATORY_FREEZE_AVAILABLE || ((INVALID_SHMID == repl_instance.jnlpool_shmid) && (INVALID_SHMID == repl_instance.recvpool_shmid))); /* Initialize jnlpool.repl_inst_filehdr as it is used later by gtmrecv_fetchresync() */ decr_cnt = FALSE; if (NULL == jnlpool.repl_inst_filehdr) { /* Possible if there is NO journal pool in the first place. In this case, malloc the structure here and * copy the file header from repl_instance structure. */ jnlpool.repl_inst_filehdr = (repl_inst_hdr_ptr_t)malloc(SIZEOF(repl_inst_hdr)); memcpy(jnlpool.repl_inst_filehdr, &repl_instance, SIZEOF(repl_inst_hdr)); } else { assert(repl_instance.jnlpool_semid == jnlpool.repl_inst_filehdr->jnlpool_semid); assert(repl_instance.jnlpool_semid_ctime == jnlpool.repl_inst_filehdr->jnlpool_semid_ctime); assert(repl_instance.jnlpool_shmid == jnlpool.repl_inst_filehdr->jnlpool_shmid); assert(repl_instance.jnlpool_shmid_ctime == jnlpool.repl_inst_filehdr->jnlpool_shmid_ctime); /* If the ONLINE ROLLBACK command is run on the primary when the source server is up and running, * jnlpool.repl_inst_filehdr->recvpool_semid will be INVALID because there is NO receiver server * running. However, ROLLBACK creates semaphores for both journal pool and receive pool and writes * it to the instance file header. Copy this information to the file header copy in the jnlpool * as well */ jnlpool.repl_inst_filehdr->recvpool_semid = repl_instance.recvpool_semid; jnlpool.repl_inst_filehdr->recvpool_semid_ctime = repl_instance.recvpool_semid_ctime; } /* Flush changes to the replication instance file header to disk */ repl_inst_write(instfilename, (off_t)0, (sm_uc_ptr_t)&repl_instance, SIZEOF(repl_inst_hdr)); } else /* for MUPIP RUNDOWN, semid fields in the file header are reset and is written in mu_replpool_release_sem() above */ decr_cnt = (NULL == jnlpool_ctl); /* for anticipatory freeze, mupip_rundown releases the semaphore */ REVERT; /* Release replication instance ftok semaphore lock */ if (!ftok_sem_release(reg, decr_cnt, immediate)) /* Do not decrement the counter if ROLLBACK */ return FALSE; return ((SS_NORMAL == jnlpool_stat) && (SS_NORMAL == recvpool_stat)); }
void ojparams (char *p, job_params_type *job_params) { unsigned char ch; int4 status; mstr_len_t handle_len; /* Initializations */ job_params->baspri = 0; job_params->input.len = 0; job_params->output.len = 0; job_params->error.len = 0; job_params->gbldir.len = 0; job_params->startup.len = 0; job_params->directory.len = 0; job_params->directory.addr = 0; job_params->cmdline.len = 0; job_params->cmdline.addr = 0; job_params->passcurlvn = FALSE; /* Process parameter list */ while (*p != jp_eol) { switch (ch = *p++) { case jp_default: if (*p != 0) { job_params->directory.len = (int)((unsigned char) *p); job_params->directory.addr = (p + 1); } break; case jp_error: if (*p != 0) { job_params->error.len = (int)((unsigned char) *p); job_params->error.addr = (p + 1); } break; case jp_gbldir: if (*p != 0) { job_params->gbldir.len = (int)((unsigned char) *p); job_params->gbldir.addr = (p + 1); } break; case jp_input: if (*p != 0) { job_params->input.len = (int)((unsigned char) *p); job_params->input.addr = p + 1; } break; case jp_output: if (*p != 0) { job_params->output.len = (int)((unsigned char) *p); job_params->output.addr = p + 1; } break; case jp_priority: job_params->baspri = (int4)(*((int4 *)p)); break; case jp_startup: if (*p != 0) { job_params->startup.len = (int)((unsigned char) *p); job_params->startup.addr = p + 1; } break; case jp_cmdline: if(*p != 0) { job_params->cmdline.len = (int)((unsigned char) *p); job_params->cmdline.addr = p + 1; } break; case jp_passcurlvn: job_params->passcurlvn = TRUE; break; case jp_account: case jp_detached: case jp_image: case jp_logfile: case jp_noaccount: case jp_nodetached: case jp_noswapping: case jp_process_name: case jp_schedule: case jp_swapping: break; default: assertpro(ch != ch); } switch (job_param_datatypes[ch]) { case jpdt_nul: break; case jpdt_num: p += SIZEOF(int4); break; case jpdt_str: p += ((int)((unsigned char)*p)) + 1; break; default: assertpro((jpdt_nul == job_param_datatypes[ch]) || (jpdt_num == job_param_datatypes[ch]) || (jpdt_str == job_param_datatypes[ch])); } } /* Defaults and Checks */ /* * Input file */ if (job_params->input.len == 0) { job_params->input.len = STRLEN(definput); job_params->input.addr = definput; } else if (IS_JOB_SOCKET(job_params->input.addr, job_params->input.len)) { handle_len = JOB_SOCKET_HANDLE_LEN(job_params->input.len); if ((NULL == socket_pool) || (-1 == iosocket_handle(JOB_SOCKET_HANDLE(job_params->input.addr), &handle_len, FALSE, socket_pool))) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_PARFILSPC, 4, 5, "INPUT", job_params->input.len, job_params->input.addr); } else if (!(status = ojchkfs (job_params->input.addr, job_params->input.len, TRUE))) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_PARFILSPC, 4, 5, "INPUT", job_params->input.len, job_params->input.addr); /* * Output file */ if (job_params->output.len == 0) { if (!defoutbuf) defoutbuf = malloc(MAX_FILSPC_LEN); memcpy (&defoutbuf[0], job_params->routine.addr, job_params->routine.len); memcpy (&defoutbuf[job_params->routine.len], defoutext.addr, defoutext.len); if (*defoutbuf == '%') *defoutbuf = '_'; job_params->output.len = job_params->routine.len + defoutext.len; job_params->output.addr = &defoutbuf[0]; } else if (IS_JOB_SOCKET(job_params->output.addr, job_params->output.len)) { handle_len = JOB_SOCKET_HANDLE_LEN(job_params->output.len); if ((NULL == socket_pool) || (-1 == iosocket_handle(JOB_SOCKET_HANDLE(job_params->output.addr), &handle_len, FALSE, socket_pool))) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_PARFILSPC, 4, 5, "OUTPUT", job_params->output.len, job_params->output.addr); } else if (!(status = ojchkfs (job_params->output.addr, job_params->output.len, FALSE))) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_PARFILSPC, 4, 6, "OUTPUT", job_params->output.len, job_params->output.addr); /* * Error file */ if (job_params->error.len == 0) { if (!deferrbuf) deferrbuf = malloc(MAX_FILSPC_LEN); memcpy (&deferrbuf[0], job_params->routine.addr, job_params->routine.len); memcpy (&deferrbuf[job_params->routine.len], deferrext.addr, deferrext.len); if (*deferrbuf == '%') *deferrbuf = '_'; job_params->error.len = job_params->routine.len + deferrext.len; job_params->error.addr = &deferrbuf[0]; } else if (IS_JOB_SOCKET(job_params->error.addr, job_params->error.len)) { handle_len = JOB_SOCKET_HANDLE_LEN(job_params->error.len); if ((NULL == socket_pool) || (-1 == iosocket_handle(JOB_SOCKET_HANDLE(job_params->error.addr), &handle_len, FALSE, socket_pool))) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_PARFILSPC, 4, 5, "ERROR", job_params->error.len, job_params->error.addr); } else if (!(status = ojchkfs (job_params->error.addr, job_params->error.len, FALSE))) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_PARFILSPC, 4, 5, "ERROR", job_params->error.len, job_params->error.addr); /* * Global Directory */ if (job_params->gbldir.len) if (!(status = ojchkfs (job_params->gbldir.addr, job_params->gbldir.len, FALSE))) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_PARFILSPC, 4, 6, "GBLDIR", job_params->gbldir.len, job_params->gbldir.addr); /* * Startup */ if (job_params->startup.len) if (!(status = ojchkfs (job_params->startup.addr, job_params->startup.len, TRUE))) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_PARFILSPC, 4, 7, "STARTUP", job_params->startup.len, job_params->startup.addr); /* * Default Directory */ if (job_params->directory.len) if (!(status = ojchkfs (job_params->directory.addr, job_params->directory.len, FALSE))) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_PARFILSPC, 4, 7, "DEFAULT", job_params->directory.len, job_params->directory.addr); }
/* ------------------------------------------------------------------ * Reset transfer table to normal settings. * * - Intent: Put back all state that was or could have been changed * due to prior deferral(s). * - Would be easier to implement this assumption if this routine * were changed to delegate responsibility as does the * corresponding set routine. * - Note that all events are reenabled before user's handler * would be executed (assuming one is appropriate for this event * and has been specified) * => It's possible to have handler-in-handler execution. * => If no handler executed, would lose other deferred events due * to reset of all pending. * - If M profiling is active, some entries should be set to the * op_mprof* routines. * - Return value indicates whether reset type matches set type. * If it does not, this indicates an "abnormal" path. * - Should still reset the table in this case. * - BUT: Consider also calling a reset routine for all setters * that have been logged, to allow them to reset themselves, * (for example, to reset TP timer & flags, or anything else * that could cause unintended effects if left set after * deferred events have been cleared). * - May need to update behavior to ensure it doesn't miss a * critical event between registration of first event * and clearing of all events. This seems problematic only if * the following are true: * - Two events are deferred at one time (call them A and B). * - An M exception handler (ZTRAP or device) is required to * execute due to B and perform action X. * - Either no handler executes due to A, or the handler that * does execute does not perform action X in response to B * (this includes the possibility of performing X but not * as needed by B, e.g. perhaps it should happen for both * A and B but only happens for A). * Seems like most or all of these can be addressed by carefully * specifying coding requirements on M handlers. * ------------------------------------------------------------------ */ boolean_t xfer_reset_handlers(int4 event_type) { int4 e_type; boolean_t reset_type_is_set_type; int4 status; int e, ei, e_tot = 0; /* ------------------------------------------------------------------ * Note: If reset routine can preempt path from handler to * set routine (e.g. clearing event before acting on it), * these assertions can fail. * Should not happen in current design. * ------------------------------------------------------------------ */ assert(0 < num_deferred); assert(0 < xfer_table_events[event_type]); if (is_tracing_on) { FIX_XFER_ENTRY(xf_linefetch, op_mproflinefetch); FIX_XFER_ENTRY(xf_linestart, op_mproflinestart); FIX_XFER_ENTRY(xf_forchk1, op_mprofforchk1); } else { FIX_XFER_ENTRY(xf_linefetch, op_linefetch); FIX_XFER_ENTRY(xf_linestart, op_linestart); FIX_XFER_ENTRY(xf_forchk1, op_forchk1); } FIX_XFER_ENTRY(xf_forloop, op_forloop); FIX_XFER_ENTRY(xf_zbfetch, op_zbfetch); FIX_XFER_ENTRY(xf_zbstart, op_zbstart); FIX_XFER_ENTRY(xf_ret, opp_ret); FIX_XFER_ENTRY(xf_retarg, op_retarg); DBGDFRDEVNT((stderr, "xfer_reset_handlers: Reset xfer_table for event type %d.\n", event_type)); reset_type_is_set_type = (event_type == first_event); # ifdef DEBUG if (!reset_type_is_set_type) rts_error(VARLSTCNT(4) ERR_DEFEREVENT, 2, event_type, first_event); # endif # ifdef DEBUG_DEFERRED_EVENT /* Note: concurrent modification of array elements means events that occur during this section will * cause inconsistent totals. */ for (ei = no_event; ei < DEFERRED_EVENTS; ei++) e_tot += xfer_table_events[ei]; if (1 < e_tot) { DBGDFRDEVNT((stderr, "xfer_reset_handlers: Event Log:\n")); for (ei=no_event; ei<DEFERRED_EVENTS; ei++) DBGDFRDEVNT((stderr, "xfer_reset_handlers: Event type %d: count was %d.\n", ei, xfer_table_events[ei])); } # endif /* ------------------------------------------------------------------------- * Kluge(?): set all locations to nonzero value to * prevent interleaving with reset activities. * * Would be better to aswp with 0: * - Won't lose any new events that way. * ------------------------------------------------------------------------- */ for (e_type = 1; DEFERRED_EVENTS > e_type; e_type++) { xfer_table_events[e_type] = 1; } /* ------------------------------------------------------------------------- * Reset external event modules that need it. * (Should do this in a more modular fashion.) * None * ------------------------------------------------------------------------- */ /* -------------------------------------------- * Reset private variables. * -------------------------------------------- */ first_event = no_event; num_deferred = 0; ctrap_action_is = 0; outofband = 0; # ifdef VMS status = sys$clref(efn_outofband); assert(SS$_WASSET == status); assertpro((SS$_WASSET == status) || (SS$_WASCLR == status)); # endif /* ****************************************************************** * There is a race here: * If a new event interrupts after previous line and before * corresponding assignment in next loop, it will be missed. * For most events, we're going to an M handler anyway, so it won't * matter (assuming the handler would handle all pending events). * But if not going to an M handler (e.g. if resetting zbreak/zstep), * could miss another event. * * Better (to avoid missing any events): * aswp xfer_table_events elements (as described above), and * check here if still zero. If not, must have missed that event * since aswp, possibly before num_deferred was reset => never set * xfer_table => should do that now. * If more than one is nonzero, choose first arbitrarily * unless first_event is now set -- unless it is, we've lost track of * which event was first. * ****************************************************************** */ /* Clear to allow new events to be reset only after we're all done. */ for (e_type = 1; DEFERRED_EVENTS > e_type; e_type++) xfer_table_events[e_type] = FALSE; return reset_type_is_set_type; }
int mur_forward_multi_proc(reg_ctl_list *rctl) { boolean_t multi_proc, this_reg_stuck, release_latch, ok_to_play; boolean_t cancelled_dbsync_timer, cancelled_timer; reg_ctl_list *rctl_top, *prev_rctl; jnl_ctl_list *jctl; gd_region *reg; sgmnt_addrs *csa; seq_num rec_token_seq; jnl_tm_t rec_time; enum broken_type recstat; jnl_record *rec; enum jnl_record_type rectype; char errstr[256]; int i, rctl_index, save_errno, num_procs_stuck, num_reg_stuck; uint4 status, regcnt_stuck, num_partners, start_hrtbt_cntr; forw_multi_struct *forw_multi; shm_forw_multi_t *sfm; multi_struct *multi; jnl_tm_t adjusted_resolve_time; shm_reg_ctl_t *shm_rctl_start, *shm_rctl, *first_shm_rctl; size_t shm_size, reccnt, copy_size; int4 *size_ptr; char *shmPtr; /* not using "shm_ptr" since it is already used in an AIX include file */ int shmid; multi_proc_shm_hdr_t *mp_hdr; /* Pointer to "multi_proc_shm_hdr_t" structure in shared memory */ status = 0; /* Although we made sure the # of tasks is the same as the # of processes forked off (in the "gtm_multi_proc" * invocation in "mur_forward"), it is possible one of the forked process finishes one invocation of * "mur_forward_multi_proc" before even another forked process gets assigned one task in "gtm_multi_proc_helper". * In this case, we would be invoked more than once. But the first invocation would have done all the needed stuff * so return for later invocations. */ if (mur_forward_multi_proc_done) return 0; mur_forward_multi_proc_done = TRUE; /* Note: "rctl" is unused. But cannot avoid passing it since "gtm_multi_proc" expects something */ prev_rctl = NULL; rctl_start = NULL; adjusted_resolve_time = murgbl.adjusted_resolve_time; assert(0 == murgbl.regcnt_remaining); multi_proc = multi_proc_in_use; /* cache value in "local" to speed up access inside loops below */ if (multi_proc) { mp_hdr = multi_proc_shm_hdr; shm_rctl_start = mur_shm_hdr->shm_rctl_start; if (jgbl.onlnrlbk) { for (rctl = mur_ctl, rctl_top = mur_ctl + murgbl.reg_total; rctl < rctl_top; rctl++) { assert(rctl->csa->hold_onto_crit); /* would have been set in parent process */ rctl->csa->hold_onto_crit = FALSE; /* reset since we dont own this region */ assert(rctl->csa->now_crit); /* would have been set in parent process */ rctl->csa->now_crit = FALSE; /* reset since we dont own this region */ } } START_HEARTBEAT_IF_NEEDED; /* heartbeat timer needed later (in case not already started by "gtm_multi_proc") */ } first_shm_rctl = NULL; /* Phase1 of forward recovery starts */ for (rctl = mur_ctl, rctl_top = mur_ctl + murgbl.reg_total; rctl < rctl_top; rctl++) { /* Check if "rctl" is available for us or if some other concurrent process has taken it */ if (multi_proc) { rctl_index = rctl - &mur_ctl[0]; shm_rctl = &shm_rctl_start[rctl_index]; if (shm_rctl->owning_pid) { assert(process_id != shm_rctl->owning_pid); continue; } GRAB_MULTI_PROC_LATCH_IF_NEEDED(release_latch); assert(release_latch); for ( ; rctl < rctl_top; rctl++, shm_rctl++) { if (shm_rctl->owning_pid) { assert(process_id != shm_rctl->owning_pid); continue; } shm_rctl->owning_pid = process_id; /* Declare ownership */ rctl->this_pid_is_owner = TRUE; if (jgbl.onlnrlbk) { /* This is an online rollback and crit was grabbed on all regions by the parent rollback * process. But this child process now owns this region and does the actual rollback on * this region so borrow crit for the duration of this child process. */ csa = rctl->csa; csa->hold_onto_crit = TRUE; csa->now_crit = TRUE; assert(csa->nl->in_crit == mp_hdr->parent_pid); csa->nl->in_crit = process_id; assert(csa->nl->onln_rlbk_pid == mp_hdr->parent_pid); csa->nl->onln_rlbk_pid = process_id; } if (NULL == first_shm_rctl) first_shm_rctl = shm_rctl; break; } REL_MULTI_PROC_LATCH_IF_NEEDED(release_latch); if (rctl >= rctl_top) { assert(rctl == rctl_top); break; } /* Set key to print this rctl'ss region-name as prefix in case this forked off process prints any output */ MUR_SET_MULTI_PROC_KEY(rctl, multi_proc_key); # ifdef MUR_DEBUG fprintf(stderr, "pid = %d : Owns region %s\n", process_id, multi_proc_key); # endif } else rctl->this_pid_is_owner = TRUE; if (mur_options.forward) { assert(NULL == rctl->jctl_turn_around); jctl = rctl->jctl = rctl->jctl_head; assert(jctl->reg_ctl == rctl); jctl->rec_offset = JNL_HDR_LEN; jnl_fence_ctl.fence_list = JNL_FENCE_LIST_END; /* initialized to reflect journaling is not enabled */ if (mur_options.rollback) jgbl.mur_jrec_seqno = jctl->jfh->start_seqno; } else { jctl = rctl->jctl = (NULL == rctl->jctl_turn_around) ? rctl->jctl_head : rctl->jctl_turn_around; assert(jctl->reg_ctl == rctl); jctl->rec_offset = jctl->turn_around_offset; jgbl.mur_jrec_seqno = jctl->turn_around_seqno; assert((NULL != rctl->jctl_turn_around) || (0 == jctl->rec_offset)); } if (mur_options.rollback) { if (murgbl.consist_jnl_seqno < jgbl.mur_jrec_seqno) { /* Assert that murgbl.losttn_seqno is never lesser than jgbl.mur_jrec_seqno (the turnaround * point seqno) as this is what murgbl.consist_jnl_seqno is going to be set to and will * eventually be the post-rollback seqno. If this condition is violated, the result of the * recovery is a compromised database (the file header will indicate a Region Seqno which * is not necessarily correct since seqnos prior to it might be absent in the database). * Therefore, this is an out-of-design situation with respect to rollback and so stop it. */ assert(murgbl.losttn_seqno >= jgbl.mur_jrec_seqno); murgbl.consist_jnl_seqno = jgbl.mur_jrec_seqno; } assert(murgbl.consist_jnl_seqno <= murgbl.losttn_seqno); } if (mur_options.update || mur_options.extr[GOOD_TN]) { reg = rctl->gd; gv_cur_region = reg; tp_change_reg(); /* note : sets cs_addrs to non-NULL value even if gv_cur_region->open is FALSE * (cs_data could still be NULL). */ rctl->csa = cs_addrs; cs_addrs->miscptr = (void *)rctl; rctl->csd = cs_data; rctl->sgm_info_ptr = cs_addrs->sgm_info_ptr; assert(!reg->open || (NULL != cs_addrs->dir_tree)); gv_target = cs_addrs->dir_tree; } jctl->after_end_of_data = FALSE; status = mur_next(jctl, jctl->rec_offset); assert(ERR_JNLREADEOF != status); /* cannot get EOF at start of forward processing */ if (SS_NORMAL != status) goto finish; PRINT_VERBOSE_STAT(jctl, "mur_forward:at the start"); rctl->process_losttn = FALSE; /* Any multi-region TP transaction will be processed as multiple single-region TP transactions up * until the tp-resolve-time is reached. From then on, they will be treated as one multi-region TP * transaction. This is needed for proper lost-tn determination (any multi-region transaction that * gets played in a region AFTER it has already encountered a broken tn should treat this as a lost tn). */ do { if (multi_proc && IS_FORCED_MULTI_PROC_EXIT(mp_hdr)) { /* We are at a logical point. So exit if signaled by parent */ status = ERR_FORCEDHALT; goto finish; } assert(jctl == rctl->jctl); rec = rctl->mur_desc->jnlrec; rec_time = rec->prefix.time; if (rec_time > mur_options.before_time) break; /* Records after -BEFORE_TIME do not go to extract or losttrans or brkntrans files */ if (rec_time < mur_options.after_time) { status = mur_next_rec(&jctl); continue; /* Records before -AFTER_TIME do not go to extract or losttrans or brkntrans files */ } if (rec_time >= adjusted_resolve_time) break; /* Records after this adjusted resolve_time will be processed below in phase2 */ /* Note: Since we do hashtable token processing only for records from tp_resolve_time onwards, * it is possible that if we encounter any broken transactions here we wont know they are broken * but will play them as is. That is unavoidable. Specify -SINCE_TIME (for -BACKWARD rollback/recover) * and -VERIFY (for -FORWARD rollback/recover) to control tp_resolve_time (and in turn more * effective broken tn determination). */ status = mur_forward_play_cur_jrec(rctl); if (SS_NORMAL != status) break; status = mur_next_rec(&jctl); } while (SS_NORMAL == status); CHECK_IF_EOF_REACHED(rctl, status); /* sets rctl->forw_eof_seen if needed; resets "status" to SS_NORMAL */ if (SS_NORMAL != status) { /* ERR_FILENOTCREATE is possible from "mur_cre_file_extfmt" OR ERR_FORCEDHALT is possible * from "mur_forward_play_cur_jrec". No other errors are known to occur here. Assert accordingly. */ assert((ERR_FILENOTCREATE == status) || (ERR_FORCEDHALT == status)); goto finish; } if (rctl->forw_eof_seen) { PRINT_VERBOSE_STAT(jctl, "mur_forward:Reached EOF before tp_resolve_time"); continue; /* Reached EOF before even getting to tp_resolve_time. * Do not even consider region for next processing loop */ } rctl->last_tn = 0; murgbl.regcnt_remaining++; /* # of regions participating in recovery at this point */ if (NULL == rctl_start) rctl_start = rctl; if (NULL != prev_rctl) { prev_rctl->next_rctl = rctl; rctl->prev_rctl = prev_rctl; } prev_rctl = rctl; assert(murgbl.ok_to_update_db || !rctl->db_updated); PRINT_VERBOSE_STAT(jctl, "mur_forward:at tp_resolve_time"); } if (multi_proc) multi_proc_key = NULL; /* reset key until it can be set to rctl's region-name again */ /* Note that it is possible for rctl_start to be NULL at this point. That is there is no journal record in any region * AFTER the calculated tp-resolve-time. This is possible if for example -AFTER_TIME was used and has a time later * than any journal record in all journal files. If rctl_start is NULL, prev_rctl should also be NULL and vice versa. */ if (NULL != rctl_start) { assert(NULL != prev_rctl); prev_rctl->next_rctl = rctl_start; rctl_start->prev_rctl = prev_rctl; } rctl = rctl_start; regcnt_stuck = 0; /* # of regions we are stuck in waiting for other regions to resolve a multi-region TP transaction */ assert((NULL == rctl) || (NULL == rctl->forw_multi)); gv_cur_region = NULL; /* clear out any previous value to ensure gv_cur_region/cs_addrs/cs_data * all get set in sync by the MUR_CHANGE_REG macro below. */ /* Phase2 of forward recovery starts */ while (NULL != rctl) { /* while there is at least one region remaining with unprocessed journal records */ assert(NULL != rctl_start); assert(0 < murgbl.regcnt_remaining); if (NULL != rctl->forw_multi) { /* This region's current journal record is part of a TP transaction waiting for other regions */ regcnt_stuck++; assert(regcnt_stuck <= murgbl.regcnt_remaining); if (regcnt_stuck == murgbl.regcnt_remaining) { assertpro(multi_proc_in_use); /* Else : Out-of-design situation. Stuck in ALL regions. */ /* Check one last time if all regions are stuck waiting for another process to resolve the * multi-region TP transaction. If so, wait in a sleep loop. If not, we can proceed. */ rctl = rctl_start; start_hrtbt_cntr = heartbeat_counter; do { if (IS_FORCED_MULTI_PROC_EXIT(mp_hdr)) { /* We are at a logical point. So exit if signaled by parent */ status = ERR_FORCEDHALT; goto finish; } forw_multi = rctl->forw_multi; assert(NULL != forw_multi); sfm = forw_multi->shm_forw_multi; assert(NULL != sfm); assert(sfm->num_reg_seen_forward <= sfm->num_reg_seen_backward); # ifdef MUR_DEBUG fprintf(stderr, "Pid = %d : Line %d : token = %llu : forward = %d : backward = %d\n", process_id, __LINE__, (long long int)sfm->token, sfm->num_reg_seen_forward, sfm->num_reg_seen_backward); # endif if (sfm->num_reg_seen_forward == sfm->num_reg_seen_backward) { /* We are no longer stuck in this region */ assert(!forw_multi->no_longer_stuck); forw_multi->no_longer_stuck = TRUE; break; } rctl = rctl->next_rctl; /* Move on to the next available region */ assert(NULL != rctl); if (rctl == rctl_start) { /* We went through all regions once and are still stuck. * Sleep until at leat TWO heartbeats have elapsed after which check for deadlock. * Do this only in the child process that owns the FIRST region in the region list. * This way we dont have contention for the GRAB_MULTI_PROC_LATCH from * all children at more or less the same time. */ if ((rctl == mur_ctl) && (heartbeat_counter > (start_hrtbt_cntr + 2))) { /* Check if all processes are stuck for a while. If so assertpro */ GRAB_MULTI_PROC_LATCH_IF_NEEDED(release_latch); assert(release_latch); shm_rctl_start = mur_shm_hdr->shm_rctl_start; num_reg_stuck = 0; for (i = 0; i < murgbl.reg_total; i++) { shm_rctl = &shm_rctl_start[i]; sfm = shm_rctl->shm_forw_multi; if (NULL != sfm) { if (sfm->num_reg_seen_forward != sfm->num_reg_seen_backward) num_reg_stuck++; } } REL_MULTI_PROC_LATCH_IF_NEEDED(release_latch); /* If everyone is stuck at this point, it is an out-of-design situation */ assertpro(num_reg_stuck < murgbl.reg_total); start_hrtbt_cntr = heartbeat_counter; } else { /* Sleep and recheck if any region we are stuck in got resolved. * To minimize time spent sleeping, we just yield our timeslice. */ rel_quant(); continue; } } } while (TRUE); } else { rctl = rctl->next_rctl; /* Move on to the next available region */ assert(NULL != rctl); continue; } } regcnt_stuck = 0; /* restart the counter now that we found at least one non-stuck region */ MUR_CHANGE_REG(rctl); jctl = rctl->jctl; this_reg_stuck = FALSE; for ( status = SS_NORMAL; SS_NORMAL == status; ) { if (multi_proc && IS_FORCED_MULTI_PROC_EXIT(mp_hdr)) { /* We are at a logical point. So exit if signaled by parent */ status = ERR_FORCEDHALT; goto finish; } assert(jctl == rctl->jctl); rec = rctl->mur_desc->jnlrec; rec_time = rec->prefix.time; if (rec_time > mur_options.before_time) break; /* Records after -BEFORE_TIME do not go to extract or losttrans or brkntrans files */ assert((rec_time >= adjusted_resolve_time) || (mur_options.notncheck && !mur_options.verify)); assert((0 == mur_options.after_time) || (mur_options.forward && !rctl->db_updated)); if (rec_time < mur_options.after_time) { status = mur_next_rec(&jctl); continue; /* Records before -AFTER_TIME do not go to extract or losttrans or brkntrans files */ } /* Check if current journal record can be played right away or need to wait for corresponding journal * records from other participating TP regions to be reached. A non-TP or ZTP transaction can be played * without issues (i.e. has no dependencies with any other regions). A single-region TP transaction too * falls in the same category. A multi-region TP transaction needs to wait until all participating regions * have played all journal records BEFORE this TP in order to ensure recover plays records in the exact * same order that GT.M performed them in. */ /* If FENCE_NONE is specified, we would not have maintained any multi hashtable in mur_back_process for * broken transaction processing. So we process multi-region TP transactions as multiple single-region * TP transactions in forward phase. */ if (FENCE_NONE != mur_options.fences) { rectype = (enum jnl_record_type)rec->prefix.jrec_type; if (IS_TP(rectype) && IS_TUPD(rectype)) { assert(IS_SET_KILL_ZKILL_ZTWORM_LGTRIG_ZTRIG(rectype)); assert(&rec->jrec_set_kill.num_participants == &rec->jrec_ztworm.num_participants); assert(&rec->jrec_set_kill.num_participants == &rec->jrec_lgtrig.num_participants); num_partners = rec->jrec_set_kill.num_participants; assert(0 < num_partners); if (1 < num_partners) { this_reg_stuck = TRUE; assert(&rec->jrec_set_kill.update_num == &rec->jrec_ztworm.update_num); assert(&rec->jrec_set_kill.update_num == &rec->jrec_lgtrig.update_num); } } } if (this_reg_stuck) { rec_token_seq = GET_JNL_SEQNO(rec); MUR_FORW_TOKEN_LOOKUP(forw_multi, rec_token_seq, rec_time); if (NULL != forw_multi) { /* This token has already been seen in another region in forward processing. * Add current region as well. If all regions have been resolved, then play * the entire transaction maintaining the exact same order of updates within. */ if (!forw_multi->no_longer_stuck) MUR_FORW_TOKEN_ONE_MORE_REG(forw_multi, rctl); } else { /* First time we are seeing this token in forward processing. Check if this * has already been determined to be a broken transaction. */ recstat = GOOD_TN; multi = NULL; if (IS_REC_POSSIBLY_BROKEN(rec_time, rec_token_seq)) { multi = MUR_TOKEN_LOOKUP(rec_token_seq, rec_time, TPFENCE); if ((NULL != multi) && (0 < multi->partner)) recstat = BROKEN_TN; } MUR_FORW_TOKEN_ADD(forw_multi, rec_token_seq, rec_time, rctl, num_partners, recstat, multi); } /* Check that "tabent" field has been initialized above (by either the MUR_FORW_TOKEN_LOOKUP * or MUR_FORW_TOKEN_ADD macros). This is relied upon by "mur_forward_play_multireg_tp" below. */ assert(NULL != forw_multi->u.tabent); assert(forw_multi->num_reg_seen_forward <= forw_multi->num_reg_seen_backward); if (multi_proc) { sfm = forw_multi->shm_forw_multi; ok_to_play = (NULL == sfm) || (sfm->num_reg_seen_forward == sfm->num_reg_seen_backward); } else ok_to_play = (forw_multi->num_reg_seen_forward == forw_multi->num_reg_seen_backward); assert(ok_to_play || !forw_multi->no_longer_stuck); if (ok_to_play ) { /* We have enough information to proceed with playing this multi-region TP in * forward processing (even if we might not have seen all needed regions). Now play it. * Note that the TP could be BROKEN_TN or GOOD_TN. The callee handles it. */ assert(forw_multi == rctl->forw_multi); status = mur_forward_play_multireg_tp(forw_multi, rctl); this_reg_stuck = FALSE; /* Note that as part of playing the TP transaction, we could have reached * the EOF of rctl. In this case, we need to break out of the loop. */ if ((SS_NORMAL != status) || rctl->forw_eof_seen) break; assert(NULL == rctl->forw_multi); assert(!dollar_tlevel); jctl = rctl->jctl; /* In case the first record after the most recently processed * TP transaction is in the next generation journal file */ continue; } break; } else { status = mur_forward_play_cur_jrec(rctl); if (SS_NORMAL != status) break; } assert(!this_reg_stuck); status = mur_next_rec(&jctl); } assert((NULL == rctl->forw_multi) || this_reg_stuck); assert((NULL != rctl->forw_multi) || !this_reg_stuck); if (!this_reg_stuck) { /* We are not stuck in this region (to resolve a multi-region TP). * This means we are done processing all the records of this region. */ assert(NULL == rctl->forw_multi); if (!rctl->forw_eof_seen) { CHECK_IF_EOF_REACHED(rctl, status); /* sets rctl->forw_eof_seen if needed; resets "status" to SS_NORMAL */ if (SS_NORMAL != status) { assert(ERR_FILENOTCREATE == status); goto finish; } assert(!dollar_tlevel); DELETE_RCTL_FROM_UNPROCESSED_LIST(rctl); /* since all of its records should have been processed */ } else { /* EOF was seen in rctl inside "mur_forward_play_multireg_tp" and it was removed * from the unprocessed list of rctls. At the time rctl was removed, its "next_rctl" * field could have been pointing to another <rctl> that has since then also been * removed inside the same function. Therefore the "next_rctl" field is not reliable * in this case but instead we should rely on the global variable "rctl_start" which * points to the list of unprocessed rctls. Set "next_rctl" accordingly. */ rctl->next_rctl = rctl_start; if (ERR_JNLREADEOF == status) status = SS_NORMAL; } assert(rctl->deleted_from_unprocessed_list); } assert(SS_NORMAL == status); assert(!this_reg_stuck || !rctl->forw_eof_seen); assert((NULL == rctl->next_rctl) || (NULL != rctl_start)); assert((NULL == rctl->next_rctl) || (0 < murgbl.regcnt_remaining)); rctl = rctl->next_rctl; /* Note : even though "rctl" could have been deleted from the doubly linked list above, * rctl->next_rctl is not touched so we can still use it to get to the next element. */ } assert(0 == murgbl.regcnt_remaining); jgbl.mur_pini_addr_reset_fnptr = NULL; /* No more simulation of GT.M activity for any region */ prc_vec = murgbl.prc_vec; /* Use process-vector of MUPIP RECOVER (not any simulating GT.M process) now onwards */ assert(0 == dollar_tlevel); for (rctl = mur_ctl, rctl_top = mur_ctl + murgbl.reg_total; rctl < rctl_top; rctl++) { if (!rctl->this_pid_is_owner) { assert(multi_proc_in_use); continue; /* in a parallel processing environment, process only regions we own */ } if (multi_proc) { /* Set key to print this rctl's region-name as prefix in case this forked off process prints any output */ MUR_SET_MULTI_PROC_KEY(rctl, multi_proc_key); } PRINT_VERBOSE_STAT(rctl->jctl, "mur_forward:at the end"); assert(!mur_options.rollback || (0 != murgbl.consist_jnl_seqno)); assert(mur_options.rollback || (0 == murgbl.consist_jnl_seqno)); assert(!dollar_tlevel); /* In case it applied a broken TUPD */ assert(murgbl.ok_to_update_db || !rctl->db_updated); rctl->mur_plst = NULL; /* reset now that simulation of GT.M updates is done */ /* Ensure mur_block_count_correct is called if updates allowed */ if (murgbl.ok_to_update_db && (SS_NORMAL != mur_block_count_correct(rctl))) { gtm_putmsg_csa(CSA_ARG(rctl->csa) VARLSTCNT(4) ERR_BLKCNTEDITFAIL, 2, DB_LEN_STR(rctl->gd)); murgbl.wrn_count++; } } finish: if (multi_proc) multi_proc_key = NULL; /* reset key until it can be set to rctl's region-name again */ if ((SS_NORMAL == status) && mur_options.show) mur_output_show(); if (NULL != first_shm_rctl) { /* Transfer needed process-private information to shared memory so parent process can later inherit this. */ first_shm_rctl->err_cnt = murgbl.err_cnt; first_shm_rctl->wrn_count = murgbl.wrn_count; first_shm_rctl->consist_jnl_seqno = murgbl.consist_jnl_seqno; /* If extract files were created by this process for one or more regions, then copy that information to * shared memory so parent process can use this information to do a merge sort. */ shm_rctl = mur_shm_hdr->shm_rctl_start; for (rctl = mur_ctl, rctl_top = mur_ctl + murgbl.reg_total; rctl < rctl_top; rctl++, shm_rctl++) { assert(multi_proc_in_use); if (!rctl->this_pid_is_owner) continue; /* in a parallel processing environment, process only regions we own */ /* Cancel any flush/dbsync timers by this child process for this region. This is because the * child is not going to go through exit handling code (no gds_rundown etc.). And we need to * clear up csa->nl->wcs_timers. (normally done by gds_rundown). */ if (NULL != rctl->csa) /* rctl->csa can be NULL in case of "mupip journal -extract" etc. */ CANCEL_DB_TIMERS(rctl->gd, rctl->csa, cancelled_timer, cancelled_dbsync_timer); reccnt = 0; for (size_ptr = &rctl->jnlext_multi_list_size[0], recstat = 0; recstat < TOT_EXTR_TYPES; recstat++, size_ptr++) { /* Assert "extr_file_created" information is in sync between rctl and shm_rctl. * This was done at the end of "mur_cre_file_extfmt". */ assert(shm_rctl->extr_file_created[recstat] == rctl->extr_file_created[recstat]); /* Assert that if *size_ptr is non-zero, then we better have created an extract file. * Note that the converse is not true. It is possible we created a file for example to * write an INCTN record but decided to not write anything because it was not a -detail * type of extract. So *sizeptr could be 0 even though we created the extract file. */ assert(!*size_ptr || rctl->extr_file_created[recstat]); shm_rctl->jnlext_list_size[recstat] = *size_ptr; reccnt += *size_ptr; } assert(INVALID_SHMID == shm_rctl->jnlext_shmid); shm_size = reccnt * SIZEOF(jnlext_multi_t); /* If we are quitting because of an abnormal status OR a forced signal to terminate * OR if the parent is dead (kill -9) dont bother creating shmid to communicate back with parent. */ if (mp_hdr->parent_pid != getppid()) { SET_FORCED_MULTI_PROC_EXIT; /* Also signal sibling children to stop processing */ if (SS_NORMAL != status) status = ERR_FORCEDHALT; } if ((SS_NORMAL == status) && shm_size) { shmid = shmget(IPC_PRIVATE, shm_size, 0600 | IPC_CREAT); if (-1 == shmid) { save_errno = errno; SNPRINTF(errstr, SIZEOF(errstr), "shmget() : shmsize=0x%llx", shm_size); MUR_SET_MULTI_PROC_KEY(rctl, multi_proc_key); /* to print region name prefix */ rts_error_csa(CSA_ARG(NULL) VARLSTCNT(8) ERR_SYSCALL, 5, LEN_AND_STR(errstr), CALLFROM, save_errno); } shmPtr = (char *)do_shmat(shmid, 0, 0); if (-1 == (sm_long_t)shmPtr) { save_errno = errno; SNPRINTF(errstr, SIZEOF(errstr), "shmat() : shmid=%d shmsize=0x%llx", shmid, shm_size); MUR_SET_MULTI_PROC_KEY(rctl, multi_proc_key); /* to print region name prefix */ rts_error_csa(CSA_ARG(NULL) VARLSTCNT(8) ERR_SYSCALL, 5, LEN_AND_STR(errstr), CALLFROM, save_errno); } shm_rctl->jnlext_shmid = shmid; shm_rctl->jnlext_shm_size = shm_size; for (size_ptr = &rctl->jnlext_multi_list_size[0], recstat = 0; recstat < TOT_EXTR_TYPES; recstat++, size_ptr++) { shm_size = *size_ptr; if (shm_size) { copy_size = copy_list_to_buf(rctl->jnlext_multi_list[recstat], (int4)shm_size, shmPtr); assert(copy_size == (shm_size * SIZEOF(jnlext_multi_t))); shmPtr += copy_size; } } } } } mur_close_file_extfmt(IN_MUR_CLOSE_FILES_FALSE); /* Need to flush buffered extract/losttrans/brokentrans files */ return (int)status; }