void gvzwrite_clnup(void) { gv_key *old; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; gv_cur_region = gvzwrite_block->gd_reg; change_reg(); assert(reset_gv_target == ((gv_namehead *)gvzwrite_block->old_targ)); if (NULL != gvzwrite_block->old_key) { old = (gv_key *)gvzwrite_block->old_key; memcpy(&gv_currkey->base[0], &old->base[0], old->end + 1); gv_currkey->end = old->end; gv_currkey->prev = old->prev; gd_map = gvzwrite_block->old_map; gd_map_top = gvzwrite_block->old_map_top; free(gvzwrite_block->old_key); gvzwrite_block->old_key = gvzwrite_block->old_targ = (unsigned char *)NULL; gvzwrite_block->subsc_count = 0; TREF(gv_last_subsc_null) = gvzwrite_block->gv_last_subsc_null; TREF(gv_some_subsc_null) = gvzwrite_block->gv_some_subsc_null; } RESET_GV_TARGET(DO_GVT_GVKEY_CHECK); }
void op_gvkill(void) { gd_region *reg; error_def(ERR_DBPRIVERR); if (gv_cur_region->read_only) rts_error(VARLSTCNT(4) ERR_DBPRIVERR, 2, DB_LEN_STR(gv_cur_region)); if (gv_curr_subsc_null && gv_cur_region->null_subs == FALSE) sgnl_gvnulsubsc(); if (gv_cur_region->dyn.addr->acc_meth == dba_bg || gv_cur_region->dyn.addr->acc_meth == dba_mm) { if (gv_target->root) { gvcst_kill(TRUE); } } else if (gv_cur_region->dyn.addr->acc_meth == dba_cm) { gvcmx_kill(TRUE); }else { gvusr_kill(TRUE); } if (gv_cur_region->dyn.addr->repl_list) { gv_replication_error = gv_replopen_error; gv_replopen_error = FALSE; reg = gv_cur_region; while (gv_cur_region = gv_cur_region->dyn.addr->repl_list) /* set replicated segments */ { if (gv_cur_region->open) { change_reg(); kill_var(); } else gv_replication_error = TRUE; } gv_cur_region = reg; change_reg(); if (gv_replication_error) sgnl_gvreplerr(); } }
void op_gvkill(void) { gd_region *reg; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; if (gv_cur_region->read_only) rts_error(VARLSTCNT(4) ERR_DBPRIVERR, 2, DB_LEN_STR(gv_cur_region)); if (TREF(gv_last_subsc_null) && NEVER == gv_cur_region->null_subs) sgnl_gvnulsubsc(); if (gv_cur_region->dyn.addr->acc_meth == dba_bg || gv_cur_region->dyn.addr->acc_meth == dba_mm) { if (IS_OK_TO_INVOKE_GVCST_KILL(gv_target)) gvcst_kill(TRUE); } else if (gv_cur_region->dyn.addr->acc_meth == dba_cm) gvcmx_kill(TRUE); else gvusr_kill(TRUE); if (gv_cur_region->dyn.addr->repl_list) { gv_replication_error = gv_replopen_error; gv_replopen_error = FALSE; reg = gv_cur_region; while (gv_cur_region = gv_cur_region->dyn.addr->repl_list) /* set replicated segments */ { if (gv_cur_region->open) { change_reg(); kill_var(); } else gv_replication_error = TRUE; } gv_cur_region = reg; change_reg(); if (gv_replication_error) sgnl_gvreplerr(); } }
boolean_t region_init(bool cm_regions) { gd_region *region_top; boolean_t file_open, is_cm, all_files_open; error_def (ERR_DBNOREGION); file_open = FALSE; all_files_open = TRUE; region_top = gd_header->regions + gd_header->n_regions; for (gv_cur_region = gd_header->regions; gv_cur_region < region_top; gv_cur_region++) { if (gv_cur_region->open == FALSE && (gv_cur_region->dyn.addr->acc_meth == dba_bg || gv_cur_region->dyn.addr->acc_meth == dba_mm)) { is_cm = reg_cmcheck(gv_cur_region); if (!is_cm || cm_regions) { region_open(); if (gv_cur_region->open) file_open = TRUE; else all_files_open = FALSE; } } } if (!file_open) rts_error(VARLSTCNT(1) ERR_DBNOREGION); /* arbitrary assignment of the first region */ for (gv_cur_region = gd_header->regions; gv_cur_region < region_top; gv_cur_region++) { if (gv_cur_region->open) { change_reg(); break; } } return all_files_open; }
/* Upgrade ^#t global in "reg" region */ void trigger_upgrade(gd_region *reg) { boolean_t est_first_pass, do_upgrade, is_defined; boolean_t was_null = FALSE, is_null = FALSE; int seq_num, trig_seq_num; int currlabel; mval tmpmval, xecuteimval, *gvname, *tmpmv, *tmpmv2; int4 result, tmpint4; uint4 curend, gvname_prev, xecute_curend; uint4 hash_code, kill_hash_code; int count, i, xecutei, tncount; char *trigname, *trigindex, *ptr; char name_and_index[MAX_MIDENT_LEN + 1 + MAX_DIGITS_IN_INT]; char trigvn[MAX_MIDENT_LEN + 1 + MAX_DIGITS_IN_INT], nullbyte[1]; uint4 trigname_len, name_index_len; int ilen; sgmnt_addrs *csa; jnl_private_control *jpc; uint4 sts; int close_res; hash128_state_t hash_state, kill_hash_state; uint4 hash_totlen, kill_hash_totlen; int trig_protected_mval_push_count; # ifdef DEBUG int save_dollar_tlevel; # endif DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; assert(gv_cur_region == reg); assert(!dollar_tlevel); /* caller should have ensured this. this is needed as otherwise things get complicated. */ assert(!is_replicator); /* caller should have ensured this. this is needed so we dont bump jnl_seqno (if replicating) */ csa = &FILE_INFO(reg)->s_addrs; assert(csa->hdr->hasht_upgrade_needed); /* If before-image journaling is turned on in this region (does not matter if replication is turned on or not), * once this transaction is done, we need to switch to new journal file and cut the back link because * otherwise it is possible for backward journal recovery (or rollback) or source server to encounter * the journal records generated in this ^#t-upgrade-transaction in which case they dont know to handle * it properly (e.g. rollback or backward recovery does not know to restore csa->hdr->hasht_upgrade_needed * if it rolls back this transaction). To achieve this, we set hold_onto_crit to TRUE and do the jnl link * cut AFTER the transaction commits but before anyone else can sneak in to do any more updates. * Since most often we expect databases to be journaled, we do this hold_onto_crit even for the non-journaled case. */ grab_crit(reg); csa->hold_onto_crit = TRUE; DEBUG_ONLY(save_dollar_tlevel = dollar_tlevel); assert(!donot_INVOKE_MUMTSTART); DEBUG_ONLY(donot_INVOKE_MUMTSTART = TRUE); op_tstart(IMPLICIT_TSTART, TRUE, &literal_batch, 0); /* 0 ==> save no locals but RESTART OK */ ESTABLISH_NORET(trigger_upgrade_ch, est_first_pass); /* On a TP restart anywhere down below, this line is where the restart resumes execution from */ assert(donot_INVOKE_MUMTSTART); /* Make sure still set for every try/retry of TP transaction */ change_reg(); /* TP_CHANGE_REG wont work as we need to set sgm_info_ptr */ assert(NULL != cs_addrs); assert(csa == cs_addrs); SET_GVTARGET_TO_HASHT_GBL(csa); /* sets up gv_target */ assert(NULL != gv_target); INITIAL_HASHT_ROOT_SEARCH_IF_NEEDED; /* Needed to do every retry in case restart was due to an online rollback. * This also sets up gv_currkey */ /* Do actual upgrade of ^#t global. * * Below is a sample layout of the label 2 ^#t global * ------------------------------------------------------- * ^#t("#TNAME","x")="a"_$C(0)_"1" (present in DEFAULT only) * ^#t("#TRHASH",89771515,1)="a"_$C(0)_"1" (present in DEFAULT only) * ^#t("#TRHASH",106937755,1)="a"_$C(0)_"1" (present in DEFAULT only) * ^#t("a",1,"BHASH")="106937755" * ^#t("a",1,"CHSET")="M" * ^#t("a",1,"CMD")="S" * ^#t("a",1,"LHASH")="89771515" * ^#t("a",1,"TRIGNAME")="x#" * ^#t("a",1,"XECUTE")=" do ^twork" * ^#t("a","#COUNT")="1" * ^#t("a","#CYCLE")="1" * ^#t("a","#LABEL")="2" * * Below is a sample layout of the label 3 ^#t global * ------------------------------------------------------- * ^#t("#LABEL")="3" (present only after upgrade, not regular trigger load) * ^#t("#TNAME","x")="a"_$C(0)_"1" (present in CURRENT region) * ^#t("a",1,"BHASH")="71945627" * ^#t("a",1,"CHSET")="M" * ^#t("a",1,"CMD")="S" * ^#t("a",1,"LHASH")="71945627" * ^#t("a",1,"TRIGNAME")="x#" * ^#t("a",1,"XECUTE")=" do ^twork" * ^#t("a","#COUNT")="1" * ^#t("a","#CYCLE")="2" * ^#t("a","#LABEL")="3" * ^#t("a","#TRHASH",71945627,1)="a"_$C(0)_"1" * * Key aspects of the format change * ---------------------------------- * 1) New ^#t("#LABEL")="3" to indicate the format of the ^#t global. This is in addition to * ^#t("a","#LABEL") etc. which is already there. This way we have a #LABEL for not just the installed * triggers but also for the name information stored in the #TNAME nodes. * 2) In the BHASH and LHASH fields. The hash computation is different so there are more chances of BHASH and LHASH * matching in which case we store only one #TRHASH entry (instead of two). So thre is fewer ^#t records in the new * format in most cases. * 3) ^#t("a","#LABEL") bumps from 2 to 3. Similarly ^#t("a","#CYCLE") bumps by one (to make sure triggers for this * global get re-read if and when we implement an -ONLINE upgrade). * 4) DEFAULT used to have ^#t("#TNAME",...) nodes corresponding to triggers across ALL regions in the gbldir and * other regions used to have NO ^#t("#TNAME",...) nodes whereas after the upgrade every region have * ^#t("#TNAME",...) nodes corresponding to triggers installed in that region. So it is safer to kill ^#t("#TNAME") * nodes and add them as needed. * 5) #TRHASH has moved from ^#t() to ^#t(<gbl>). So it is safer to kill ^#t("#TRHASH") nodes and add them as needed. * * Below is a sample layout of the label 4 ^#t global * ------------------------------------------------------- * ^#t("#TNAME","x")="a"_$C(0)_"1" (present in CURRENT region) * ^#t("a",1,"BHASH")="71945627" * ^#t("a",1,"CHSET")="M" * ^#t("a",1,"CMD")="S" * ^#t("a",1,"LHASH")="71945627" * ^#t("a",1,"TRIGNAME")="x#" * ^#t("a",1,"XECUTE")=" do ^twork" * ^#t("a","#COUNT")="1" * ^#t("a","#CYCLE")="2" * ^#t("a","#LABEL")="4" * ^#t("a","#TRHASH",71945627,1)="a"_$C(0)_"1" * * Key aspects of the format change * ---------------------------------- * 1) Removed ^#t("#LABEL") as it is redundant information and trigger load does not include it * 2) Multiline triggers were incorrectly processed resulting in incorrect BHASH and LHASH values. Upgrade fixes this * 3) ^#t("a","#LABEL") bumps from 3 to 4. Similarly ^#t("a","#CYCLE") bumps by one (to make sure * triggers for this global get re-read if and when we implement an -ONLINE upgrade). */ tmpmv = &tmpmval; /* At all points maintain this relationship. The two are used interchangeably below */ if (gv_target->root) do_upgrade = TRUE; /* The below logic assumes ^#t global does not have any integrity errors */ assert(do_upgrade); /* caller should have not invoked us otherwise */ if (do_upgrade) { /* kill ^#t("#TRHASH"), ^#t("#TNAME") and ^#t("#LABEL") first. Regenerate each again as we process ^#t(<gbl>,...) */ csa->incr_db_trigger_cycle = TRUE; /* so that we increment csd->db_trigger_cycle at commit time. * this forces concurrent processes to read upgraded triggers. */ if (JNL_WRITE_LOGICAL_RECS(csa)) { /* Note that the ^#t upgrade is a physical layout change. But it has no logical change (i.e. users * see the same MUPIP TRIGGER -SELECT output as before). So write only a dummy LGTRIG journal * record for this operation. Hence write a string that starts with a trigger comment character ";". */ assert(!gv_cur_region->read_only); jnl_format(JNL_LGTRIG, NULL, (mval *)&literal_trigjnlrec, 0); } /* KILL ^#t("#LABEL") unconditionally */ BUILD_HASHT_SUB_CURRKEY(LITERAL_HASHLABEL, STRLEN(LITERAL_HASHLABEL)); if (0 != gvcst_data()) gvcst_kill(TRUE); /* KILL ^#t("#TNAME") unconditionally and regenerate */ BUILD_HASHT_SUB_CURRKEY(LITERAL_HASHTNAME, STRLEN(LITERAL_HASHTNAME)); if (0 != gvcst_data()) gvcst_kill(TRUE); /* KILL ^#t("#TRHASH") unconditionally and regenerate */ BUILD_HASHT_SUB_CURRKEY(LITERAL_HASHTRHASH, STRLEN(LITERAL_HASHTRHASH)); if (0 != gvcst_data()) gvcst_kill(TRUE); /* Loop through all global names for which ^#t(<gvn>) exists. The only first-level subscripts of ^#t starting * with # are #TNAME and #TRHASH in collation order. So after #TRHASH we expect to find subscripts that are * global names. Hence the HASHTRHASH code is placed AFTER the HASHTNAME code above. */ TREF(gd_targ_gvnh_reg) = NULL; /* needed so op_gvorder below goes through gvcst_order (i.e. focuses only * on the current region) and NOT through gvcst_spr_order (which does not * apply anyways in the case of ^#t). */ nullbyte[0] = '\0'; trig_protected_mval_push_count = 0; INCR_AND_PUSH_MV_STENT(gvname); /* Protect gvname from garbage collection */ do { op_gvorder(gvname); if (0 == gvname->str.len) break; assert(ARRAYSIZE(trigvn) > gvname->str.len); memcpy(&trigvn[0], gvname->str.addr, gvname->str.len); gvname->str.addr = &trigvn[0]; /* point away from stringpool to avoid stp_gcol issues */ /* Save gv_currkey->prev so it is restored before next call to op_gvorder (which cares about this field). * gv_currkey->prev gets tampered with in the for loop below (e.g. BUILD_HASHT_SUB_CURRKEY macro). * No need to do this for gv_currkey->end since the body of the for loop takes care of restoring it. */ gvname_prev = gv_currkey->prev; BUILD_HASHT_SUB_CURRKEY(gvname->str.addr, gvname->str.len); /* At this point, gv_currkey is ^#t(<gvn>) */ /* Increment ^#t(<gvn>,"#CYCLE") */ is_defined = gvtr_get_hasht_gblsubs((mval *)&literal_hashcycle, tmpmv); assert(is_defined); tmpint4 = mval2i(tmpmv); tmpint4++; i2mval(tmpmv, tmpint4); gvtr_set_hasht_gblsubs((mval *)&literal_hashcycle, tmpmv); /* Read ^#t(<gvn>,"#COUNT") */ is_defined = gvtr_get_hasht_gblsubs((mval *)&literal_hashcount, tmpmv); if (is_defined) { tmpint4 = mval2i(tmpmv); count = tmpint4; /* Get ^#t(<gvn>,"#LABEL"), error out for invalid values. Upgrade disallowed for label 1 triggers */ is_defined = gvtr_get_hasht_gblsubs((mval *)&literal_hashlabel, tmpmv); assert(is_defined); currlabel = mval2i(tmpmv); if ((V19_HASHT_GBL_LABEL_INT >= currlabel) || (HASHT_GBL_CURLABEL_INT <= currlabel)) rts_error_csa(CSA_ARG(csa) VARLSTCNT(8) ERR_TRIGUPBADLABEL, 6, currlabel, HASHT_GBL_CURLABEL_INT, gvname->str.len, gvname->str.addr, REG_LEN_STR(reg)); /* Set ^#t(<gvn>,"#LABEL")=HASHT_GBL_CURLABEL */ gvtr_set_hasht_gblsubs((mval *)&literal_hashlabel, (mval *)&literal_curlabel); } else count = 0; /* Kill ^#t(<gvn>,"#TRHASH") unconditionally and regenerate */ gvtr_kill_hasht_gblsubs((mval *)&literal_hashtrhash, TRUE); /* At this point, gv_currkey is ^#t(<gvn>) */ for (i = 1; i <= count; i++) { /* At this point, gv_currkey is ^#t(<gvn>) */ curend = gv_currkey->end; /* note gv_currkey->end before changing it so we can restore it later */ assert(KEY_DELIMITER == gv_currkey->base[curend]); assert(gv_target->gd_csa == cs_addrs); i2mval(tmpmv, i); COPY_SUBS_TO_GVCURRKEY(tmpmv, gv_cur_region, gv_currkey, was_null, is_null); /* At this point, gv_currkey is ^#t(<gvn>,i) */ /* Compute new LHASH and BHASH hash values. * LHASH uses : GVSUBS, XECUTE * BHASH uses : GVSUBS, DELIM, ZDELIM, PIECES, XECUTE * So reach each of these pieces and compute hash along the way. */ STR_PHASH_INIT(hash_state, hash_totlen); STR_PHASH_PROCESS(hash_state, hash_totlen, gvname->str.addr, gvname->str.len); STR_PHASH_PROCESS(hash_state, hash_totlen, nullbyte, 1); /* Read in ^#t(<gvn>,i,"GVSUBS") */ is_defined = gvtr_get_hasht_gblsubs((mval *)&literal_gvsubs, tmpmv); if (is_defined) { STR_PHASH_PROCESS(hash_state, hash_totlen, tmpmval.str.addr, tmpmval.str.len); STR_PHASH_PROCESS(hash_state, hash_totlen, nullbyte, 1); } /* Copy over SET hash state (2-tuple <state,totlen>) to KILL hash state before adding * the PIECES, DELIM, ZDELIM portions (those are only part of the SET hash). */ kill_hash_state = hash_state; kill_hash_totlen = hash_totlen; /* Read in ^#t(<gvn>,i,"PIECES") */ is_defined = gvtr_get_hasht_gblsubs((mval *)&literal_pieces, tmpmv); if (is_defined) { STR_PHASH_PROCESS(hash_state, hash_totlen, tmpmval.str.addr, tmpmval.str.len); STR_PHASH_PROCESS(hash_state, hash_totlen, nullbyte, 1); } /* Read in ^#t(<gvn>,i,"DELIM") */ is_defined = gvtr_get_hasht_gblsubs((mval *)&literal_delim, tmpmv); if (is_defined) { STR_PHASH_PROCESS(hash_state, hash_totlen, tmpmval.str.addr, tmpmval.str.len); STR_PHASH_PROCESS(hash_state, hash_totlen, nullbyte, 1); } /* Read in ^#t(<gvn>,i,"ZDELIM") */ is_defined = gvtr_get_hasht_gblsubs((mval *)&literal_zdelim, tmpmv); if (is_defined) { STR_PHASH_PROCESS(hash_state, hash_totlen, tmpmval.str.addr, tmpmval.str.len); STR_PHASH_PROCESS(hash_state, hash_totlen, nullbyte, 1); } /* Read in ^#t(<gvn>,i,"XECUTE"). * Note: The XECUTE portion of the trigger definition is used in SET and KILL hash. * But since we have started maintaining "hash_state" and "kill_hash_state" separately * (due to PIECES, DELIM, ZDELIM) we need to update the hash for both using same input string. */ is_defined = gvtr_get_hasht_gblsubs((mval *)&literal_xecute, tmpmv); if (is_defined) { STR_PHASH_PROCESS(hash_state, hash_totlen, tmpmval.str.addr, tmpmval.str.len); STR_PHASH_PROCESS(kill_hash_state, kill_hash_totlen, tmpmval.str.addr, tmpmval.str.len); } else { /* Multi-record XECUTE string */ /* At this point, gv_currkey is ^#t(<gvn>,i) */ xecute_curend = gv_currkey->end; /* note gv_currkey->end so we can restore it later */ assert(KEY_DELIMITER == gv_currkey->base[xecute_curend]); tmpmv2 = (mval *)&literal_xecute; COPY_SUBS_TO_GVCURRKEY(tmpmv2, gv_cur_region, gv_currkey, was_null, is_null); xecutei = 1; do { i2mval(&xecuteimval, xecutei); is_defined = gvtr_get_hasht_gblsubs(&xecuteimval, tmpmv); if (!is_defined) break; STR_PHASH_PROCESS(hash_state, hash_totlen, tmpmval.str.addr, tmpmval.str.len); STR_PHASH_PROCESS(kill_hash_state, kill_hash_totlen, tmpmval.str.addr, tmpmval.str.len); xecutei++; } while (TRUE); /* Restore gv_currkey to ^#t(<gvn>,i) */ gv_currkey->end = xecute_curend; gv_currkey->base[xecute_curend] = KEY_DELIMITER; } STR_PHASH_RESULT(hash_state, hash_totlen, hash_code); STR_PHASH_RESULT(kill_hash_state, kill_hash_totlen, kill_hash_code); /* Set ^#t(<gvn>,i,"LHASH") */ MV_FORCE_UMVAL(tmpmv, kill_hash_code); gvtr_set_hasht_gblsubs((mval *)&literal_lhash, tmpmv); /* Set ^#t(<gvn>,i,"BHASH") */ MV_FORCE_UMVAL(tmpmv, hash_code); gvtr_set_hasht_gblsubs((mval *)&literal_bhash, tmpmv); /* Read in ^#t(<gvn>,i,"TRIGNAME") to determine if #SEQNUM/#TNCOUNT needs to be maintained */ is_defined = gvtr_get_hasht_gblsubs((mval *)&literal_trigname, tmpmv); assert(is_defined); assert('#' == tmpmval.str.addr[tmpmval.str.len - 1]); tmpmval.str.len--; if ((tmpmval.str.len <= ARRAYSIZE(name_and_index)) && (NULL != (ptr = memchr(tmpmval.str.addr, '#', tmpmval.str.len)))) { /* Auto-generated name. Need to maintain #SEQNUM/#TNCOUNT */ /* Take copy of trigger name into non-stringpool location to avoid stp_gcol issues */ trigname_len = ptr - tmpmval.str.addr; ptr++; name_index_len = (tmpmval.str.addr + tmpmval.str.len) - ptr; assert(ARRAYSIZE(name_and_index) >= (trigname_len + 1 + name_index_len)); trigname = &name_and_index[0]; trigindex = ptr; memcpy(trigname, tmpmval.str.addr, tmpmval.str.len); A2I(ptr, ptr + name_index_len, trig_seq_num); /* At this point, gv_currkey is ^#t(<gvn>,i) */ /* $get(^#t("#TNAME",<trigger name>,"#SEQNUM")) */ BUILD_HASHT_SUB_SUB_SUB_CURRKEY(LITERAL_HASHTNAME, STR_LIT_LEN(LITERAL_HASHTNAME), trigname, trigname_len, LITERAL_HASHSEQNUM, STR_LIT_LEN(LITERAL_HASHSEQNUM)); seq_num = gvcst_get(tmpmv) ? mval2i(tmpmv) : 0; if (trig_seq_num > seq_num) { /* Set ^#t("#TNAME",<trigger name>,"#SEQNUM") = trig_seq_num */ SET_TRIGGER_GLOBAL_SUB_SUB_SUB_STR(LITERAL_HASHTNAME, STR_LIT_LEN(LITERAL_HASHTNAME), trigname, trigname_len, LITERAL_HASHSEQNUM, STR_LIT_LEN(LITERAL_HASHSEQNUM), trigindex, name_index_len, result); assert(PUT_SUCCESS == result); } /* set ^#t("#TNAME",<trigger name>,"#TNCOUNT")++ */ BUILD_HASHT_SUB_SUB_SUB_CURRKEY(LITERAL_HASHTNAME, STR_LIT_LEN(LITERAL_HASHTNAME), trigname, trigname_len, LITERAL_HASHTNCOUNT, STR_LIT_LEN(LITERAL_HASHTNCOUNT)); tncount = gvcst_get(tmpmv) ? mval2i(tmpmv) + 1 : 1; i2mval(tmpmv, tncount); SET_TRIGGER_GLOBAL_SUB_SUB_SUB_MVAL(LITERAL_HASHTNAME, STR_LIT_LEN(LITERAL_HASHTNAME), trigname, trigname_len, LITERAL_HASHTNCOUNT, STR_LIT_LEN(LITERAL_HASHTNCOUNT), tmpmval, result); trigname_len += 1 + name_index_len; /* in preparation for ^#t("#TNAME") set below */ assert(PUT_SUCCESS == result); BUILD_HASHT_SUB_CURRKEY(gvname->str.addr, gvname->str.len); /* At this point, gv_currkey is ^#t(<gvn>) */ } else { /* Take copy of trigger name into non-stringpool location to avoid stp_gcol issues */ trigname = &name_and_index[0]; /* in preparation for ^#t("#TNAME") set below */ trigname_len = MIN(tmpmval.str.len, ARRAYSIZE(name_and_index)); assert(ARRAYSIZE(name_and_index) >= trigname_len); memcpy(trigname, tmpmval.str.addr, trigname_len); /* Restore gv_currkey to what it was at beginning of for loop iteration */ gv_currkey->end = curend; gv_currkey->base[curend] = KEY_DELIMITER; } /* At this point, gv_currkey is ^#t(<gvn>) */ if (kill_hash_code != hash_code) gvtr_set_hashtrhash(gvname->str.addr, gvname->str.len, kill_hash_code, i); /* Set ^#t(<gvn>,"#TRHASH",hash_code,i) */ gvtr_set_hashtrhash(gvname->str.addr, gvname->str.len, hash_code, i); /* Set ^#t("#TNAME",<trigname>)=<gvn>_$c(0)_<trigindx> */ /* The upgrade assumes that the region does not contain two triggers with the same name. * V62000 and before could potentially have this out of design case. Once implemented * the trigger integrity check will warn users of this edge case */ ptr = &trigvn[gvname->str.len]; *ptr++ = '\0'; ilen = 0; I2A(ptr, ilen, i); ptr += ilen; assert(ptr <= ARRAYTOP(trigvn)); SET_TRIGGER_GLOBAL_SUB_SUB_STR(LITERAL_HASHTNAME, STR_LIT_LEN(LITERAL_HASHTNAME), trigname, trigname_len, trigvn, ptr - gvname->str.addr, result); assert(PUT_SUCCESS == result); BUILD_HASHT_SUB_CURRKEY(gvname->str.addr, gvname->str.len); /* At this point, gv_currkey is ^#t(<gvn>) */ } /* At this point, gv_currkey is ^#t(<gvn>) i.e. gv_currkey->end is correct but gv_currkey->prev * might have been tampered with. Restore it to proper value first. */ gv_currkey->prev = gvname_prev; gvname->mvtype = 0; /* can now be garbage collected in the next iteration */ } while (TRUE); } op_tcommit(); REVERT; /* remove our condition handler */ DEBUG_ONLY(donot_INVOKE_MUMTSTART = FALSE;) if (csa->hold_onto_crit)
void trigger_delete_all(void) { int count; char count_str[MAX_DIGITS_IN_INT + 1]; sgmnt_addrs *csa; mval curr_gbl_name; int cycle; mstr gbl_name; mname_entry gvent; gv_namehead *hasht_tree, *gvt; mval *mv_count_ptr; mval *mv_cycle_ptr; mval mv_indx; gd_region *reg; int reg_indx; int4 result; char save_currkey[SIZEOF(gv_key) + DBKEYSIZE(MAX_KEY_SZ)]; gv_key *save_gv_currkey; gd_region *save_gv_cur_region; gv_namehead *save_gv_target; sgm_info *save_sgm_info_ptr; int trig_indx; mval trigger_cycle; mval trigger_count; mval val; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; assert(0 < dollar_tlevel); /* Before we delete any triggers, verify that none of the triggers have been fired in this transaction. If they have, * this creates an un-commitable transaction that will end in a TPFAIL error. Since that error indicates database * damage, we'd rather detect this avoidable condition and give a descriptive error instead (TRIGMODINTP). */ for (gvt = gv_target_list; NULL != gvt; gvt = gvt->next_gvnh) { if (gvt->trig_local_tn == local_tn) rts_error(VARLSTCNT(1) ERR_TRIGMODINTP); } SWITCH_TO_DEFAULT_REGION; INITIAL_HASHT_ROOT_SEARCH_IF_NEEDED; if (0 != gv_target->root) { /* kill ^#t("#TRHASH") */ BUILD_HASHT_SUB_CURRKEY(LITERAL_HASHTRHASH, STRLEN(LITERAL_HASHTRHASH)); gvcst_kill(TRUE); /* kill ^#t("#TNAME") */ BUILD_HASHT_SUB_CURRKEY(LITERAL_HASHTNAME, STRLEN(LITERAL_HASHTNAME)); gvcst_kill(TRUE); } for (reg_indx = 0, reg = gd_header->regions; reg_indx < gd_header->n_regions; reg_indx++, reg++) { if (!reg->open) gv_init_reg(reg); if (!reg->read_only) { gv_cur_region = reg; change_reg(); csa = cs_addrs; SETUP_TRIGGER_GLOBAL; INITIAL_HASHT_ROOT_SEARCH_IF_NEEDED; /* There might not be any ^#t in this region, so check */ if (0 != gv_target->root) { /* Kill all descendents of ^#t(trigvn, indx) where trigvn is any global with a trigger, * but skip the "#XYZ" entries. setup ^#t(trigvn,"$") as the PREV key for op_gvorder */ BUILD_HASHT_SUB_CURRKEY(LITERAL_MAXHASHVAL, STRLEN(LITERAL_MAXHASHVAL)); TREF(gv_last_subsc_null) = FALSE; /* We know its not null, but prior state is unreliable */ while (TRUE) { op_gvorder(&curr_gbl_name); /* quit:$length(curr_gbl_name)=0 */ if (0 == curr_gbl_name.str.len) break; /* $get(^#t(curr_gbl_name,#COUNT)) */ BUILD_HASHT_SUB_SUB_CURRKEY(curr_gbl_name.str.addr, curr_gbl_name.str.len, LITERAL_HASHCOUNT, STRLEN(LITERAL_HASHCOUNT)); if (gvcst_get(&trigger_count)) { mv_count_ptr = &trigger_count; count = MV_FORCE_INT(mv_count_ptr); /* $get(^#t(curr_gbl_name,#CYCLE)) */ BUILD_HASHT_SUB_SUB_CURRKEY(curr_gbl_name.str.addr, curr_gbl_name.str.len, LITERAL_HASHCYCLE, STRLEN(LITERAL_HASHCYCLE)); if (!gvcst_get(&trigger_cycle)) assert(FALSE); /* Found #COUNT, there must be #CYCLE */ mv_cycle_ptr = &trigger_cycle; cycle = MV_FORCE_INT(mv_cycle_ptr); /* kill ^#t(curr_gbl_name) */ BUILD_HASHT_SUB_CURRKEY(curr_gbl_name.str.addr, curr_gbl_name.str.len); gvcst_kill(TRUE); cycle++; MV_FORCE_MVAL(&trigger_cycle, cycle); /* set ^#t(curr_gbl_name,#CYCLE)=trigger_cycle */ SET_TRIGGER_GLOBAL_SUB_SUB_MVAL(curr_gbl_name.str.addr, curr_gbl_name.str.len, LITERAL_HASHCYCLE, STRLEN(LITERAL_HASHCYCLE), trigger_cycle, result); assert(PUT_SUCCESS == result); } /* else there is no #COUNT, then no triggers, leave #CYCLE alone */ /* get ready for op_gvorder() call for next trigger under ^#t */ BUILD_HASHT_SUB_CURRKEY(curr_gbl_name.str.addr, curr_gbl_name.str.len); } csa->incr_db_trigger_cycle = TRUE; if (dollar_ztrigger_invoked) { /* increment db_dztrigger_cycle so that next gvcst_put/gvcst_kill in this transaction, * on this region, will re-read. See trigger_update.c for a comment on why it is okay * for db_dztrigger_cycle to be incremented more than once in the same transaction */ csa->db_dztrigger_cycle++; } } } } util_out_print_gtmio("All existing triggers deleted", FLUSH); }
int4 mupip_set_file(int db_fn_len, char *db_fn) { bool got_standalone; boolean_t bypass_partial_recov, need_standalone = FALSE; char acc_spec[MAX_ACC_METH_LEN], ver_spec[MAX_DB_VER_LEN], exit_stat, *fn; unsigned short acc_spec_len = MAX_ACC_METH_LEN, ver_spec_len = MAX_DB_VER_LEN; int fd, fn_len; int4 status; int4 status1; int glbl_buff_status, defer_status, rsrvd_bytes_status, extn_count_status, lock_space_status, disk_wait_status; int4 new_disk_wait, new_cache_size, new_extn_count, new_lock_space, reserved_bytes, defer_time; sgmnt_data_ptr_t csd; tp_region *rptr, single; enum db_acc_method access, access_new; enum db_ver desired_dbver; gd_region *temp_cur_region; char *errptr, *command = "MUPIP SET VERSION"; int save_errno; error_def(ERR_DBPREMATEOF); error_def(ERR_DBRDERR); error_def(ERR_DBRDONLY); error_def(ERR_INVACCMETHOD); error_def(ERR_MUNOACTION); error_def(ERR_RBWRNNOTCHG); error_def(ERR_WCERRNOTCHG); error_def(ERR_WCWRNNOTCHG); error_def(ERR_MMNODYNDWNGRD); exit_stat = EXIT_NRM; defer_status = cli_present("DEFER_TIME"); if (defer_status) need_standalone = TRUE; bypass_partial_recov = cli_present("PARTIAL_RECOV_BYPASS") == CLI_PRESENT; if (bypass_partial_recov) need_standalone = TRUE; if (disk_wait_status = cli_present("WAIT_DISK")) { if (cli_get_int("WAIT_DISK", &new_disk_wait)) { if (new_disk_wait < 0) { util_out_print("!UL negative, minimum WAIT_DISK allowed is 0.", TRUE, new_disk_wait); return (int4)ERR_WCWRNNOTCHG; } need_standalone = TRUE; } else { util_out_print("Error getting WAIT_DISK qualifier value", TRUE); return (int4)ERR_WCWRNNOTCHG; } } if (glbl_buff_status = cli_present("GLOBAL_BUFFERS")) { if (cli_get_int("GLOBAL_BUFFERS", &new_cache_size)) { if (new_cache_size > WC_MAX_BUFFS) { util_out_print("!UL too large, maximum write cache buffers allowed is !UL", TRUE, new_cache_size, WC_MAX_BUFFS); return (int4)ERR_WCWRNNOTCHG; } if (new_cache_size < WC_MIN_BUFFS) { util_out_print("!UL too small, minimum cache buffers allowed is !UL", TRUE, new_cache_size, WC_MIN_BUFFS); return (int4)ERR_WCWRNNOTCHG; } } else { util_out_print("Error getting GLOBAL BUFFER qualifier value", TRUE); return (int4)ERR_WCWRNNOTCHG; } need_standalone = TRUE; } /* EXTENSION_COUNT does not require standalone access and hence need_standalone will not be set to TRUE for this. */ if (extn_count_status = cli_present("EXTENSION_COUNT")) { if (cli_get_int("EXTENSION_COUNT", &new_extn_count)) { if (new_extn_count > MAX_EXTN_COUNT) { util_out_print("!UL too large, maximum extension count allowed is !UL", TRUE, new_extn_count, MAX_EXTN_COUNT); return (int4)ERR_WCWRNNOTCHG; } if (new_extn_count < MIN_EXTN_COUNT) { util_out_print("!UL too small, minimum extension count allowed is !UL", TRUE, new_extn_count, MIN_EXTN_COUNT); return (int4)ERR_WCWRNNOTCHG; } } else { util_out_print("Error getting EXTENSION COUNT qualifier value", TRUE); return (int4)ERR_WCWRNNOTCHG; } } if (lock_space_status = cli_present("LOCK_SPACE")) { if (cli_get_int("LOCK_SPACE", &new_lock_space)) { if (new_lock_space > MAX_LOCK_SPACE) { util_out_print("!UL too large, maximum lock space allowed is !UL", TRUE, new_lock_space, MAX_LOCK_SPACE); return (int4)ERR_WCWRNNOTCHG; } else if (new_lock_space < MIN_LOCK_SPACE) { util_out_print("!UL too small, minimum lock space allowed is !UL", TRUE, new_lock_space, MIN_LOCK_SPACE); return (int4)ERR_WCWRNNOTCHG; } } else { util_out_print("Error getting LOCK_SPACE qualifier value", TRUE); return (int4)ERR_WCWRNNOTCHG; } need_standalone = TRUE; } if (rsrvd_bytes_status = cli_present("RESERVED_BYTES")) { if (!cli_get_int("RESERVED_BYTES", &reserved_bytes)) { util_out_print("Error getting RESERVED BYTES qualifier value", TRUE); return (int4)ERR_RBWRNNOTCHG; } need_standalone = TRUE; } if (cli_present("ACCESS_METHOD")) { cli_get_str("ACCESS_METHOD", acc_spec, &acc_spec_len); cli_strupper(acc_spec); if (0 == memcmp(acc_spec, "MM", acc_spec_len)) access = dba_mm; else if (0 == memcmp(acc_spec, "BG", acc_spec_len)) access = dba_bg; else mupip_exit(ERR_INVACCMETHOD); need_standalone = TRUE; } else access = n_dba; /* really want to keep current method, which has not yet been read */ if (cli_present("VERSION")) { assert(!need_standalone); cli_get_str("VERSION", ver_spec, &ver_spec_len); cli_strupper(ver_spec); if (0 == memcmp(ver_spec, "V4", ver_spec_len)) desired_dbver = GDSV4; else if (0 == memcmp(ver_spec, "V5", ver_spec_len)) desired_dbver = GDSV5; else GTMASSERT; /* CLI should prevent us ever getting here */ } else desired_dbver = GDSVLAST; /* really want to keep version, which has not yet been read */ if (region) rptr = grlist; else { rptr = &single; memset(&single, 0, sizeof(single)); } csd = (sgmnt_data *)malloc(ROUND_UP(sizeof(sgmnt_data), DISK_BLOCK_SIZE)); in_backup = FALSE; /* Only want yes/no from mupfndfil, not an address */ for (; rptr != NULL; rptr = rptr->fPtr) { if (region) { if (dba_usr == rptr->reg->dyn.addr->acc_meth) { util_out_print("!/Region !AD is not a GDS access type", TRUE, REG_LEN_STR(rptr->reg)); exit_stat |= EXIT_WRN; continue; } if (!mupfndfil(rptr->reg, NULL)) continue; fn = (char *)rptr->reg->dyn.addr->fname; fn_len = rptr->reg->dyn.addr->fname_len; } else { fn = db_fn; fn_len = db_fn_len; } mu_gv_cur_reg_init(); strcpy((char *)gv_cur_region->dyn.addr->fname, fn); gv_cur_region->dyn.addr->fname_len = fn_len; if (!need_standalone) { gvcst_init(gv_cur_region); change_reg(); /* sets cs_addrs and cs_data */ if (gv_cur_region->read_only) { gtm_putmsg(VARLSTCNT(4) ERR_DBRDONLY, 2, DB_LEN_STR(gv_cur_region)); exit_stat |= EXIT_ERR; gds_rundown(); mu_gv_cur_reg_free(); continue; } grab_crit(gv_cur_region); status = EXIT_NRM; access_new = (n_dba == access ? cs_data->acc_meth : access); /* recalculate; n_dba is a proxy for no change */ change_fhead_timer("FLUSH_TIME", cs_data->flush_time, (dba_bg == access_new ? TIM_FLU_MOD_BG : TIM_FLU_MOD_MM), FALSE); if (GDSVLAST != desired_dbver) { if ((dba_mm != access_new) || (GDSV4 != desired_dbver)) status1 = desired_db_format_set(gv_cur_region, desired_dbver, command); else { status1 = ERR_MMNODYNDWNGRD; gtm_putmsg(VARLSTCNT(4) status1, 2, REG_LEN_STR(gv_cur_region)); } if (SS_NORMAL != status1) { /* "desired_db_format_set" would have printed appropriate error messages */ if (ERR_MUNOACTION != status1) { /* real error occurred while setting the db format. skip to next region */ status = EXIT_ERR; } } } if (EXIT_NRM == status) { if (extn_count_status) cs_data->extension_size = (uint4)new_extn_count; wcs_flu(WCSFLU_FLUSH_HDR); if (extn_count_status) util_out_print("Database file !AD now has extension count !UL", TRUE, fn_len, fn, cs_data->extension_size); if (GDSVLAST != desired_dbver) util_out_print("Database file !AD now has desired DB format !AD", TRUE, fn_len, fn, LEN_AND_STR(gtm_dbversion_table[cs_data->desired_db_format])); } else exit_stat |= status; rel_crit(gv_cur_region); gds_rundown(); } else { /* Following part needs standalone access */ assert(GDSVLAST == desired_dbver); got_standalone = mu_rndwn_file(gv_cur_region, TRUE); if (FALSE == got_standalone) return (int4)ERR_WCERRNOTCHG; /* we should open it (for changing) after mu_rndwn_file, since mu_rndwn_file changes the file header too */ if (-1 == (fd = OPEN(fn, O_RDWR))) { save_errno = errno; errptr = (char *)STRERROR(save_errno); util_out_print("open : !AZ", TRUE, errptr); exit_stat |= EXIT_ERR; db_ipcs_reset(gv_cur_region, FALSE); mu_gv_cur_reg_free(); continue; } LSEEKREAD(fd, 0, csd, sizeof(sgmnt_data), status); if (0 != status) { save_errno = errno; PERROR("Error reading header of file"); errptr = (char *)STRERROR(save_errno); util_out_print("read : !AZ", TRUE, errptr); util_out_print("Error reading header of file", TRUE); util_out_print("Database file !AD not changed: ", TRUE, fn_len, fn); if (-1 != status) rts_error(VARLSTCNT(4) ERR_DBRDERR, 2, fn_len, fn); else rts_error(VARLSTCNT(4) ERR_DBPREMATEOF, 2, fn_len, fn); } if (rsrvd_bytes_status) { if (reserved_bytes > MAX_RESERVE_B(csd)) { util_out_print("!UL too large, maximum reserved bytes allowed is !UL for database file !AD", TRUE, reserved_bytes, MAX_RESERVE_B(csd), fn_len, fn); close(fd); db_ipcs_reset(gv_cur_region, FALSE); return (int4)ERR_RBWRNNOTCHG; } csd->reserved_bytes = reserved_bytes; } access_new = (n_dba == access ? csd->acc_meth : access); /* recalculate; n_dba is a proxy for no change */ change_fhead_timer("FLUSH_TIME", csd->flush_time, (dba_bg == access_new ? TIM_FLU_MOD_BG : TIM_FLU_MOD_MM), FALSE); if ((n_dba != access) && (csd->acc_meth != access)) /* n_dba is a proxy for no change */ { if (dba_mm == access) csd->defer_time = 1; /* defer defaults to 1 */ csd->acc_meth = access; if (0 == csd->n_bts) { csd->n_bts = WC_DEF_BUFFS; csd->bt_buckets = getprime(csd->n_bts); } } if (glbl_buff_status) { csd->n_bts = BT_FACTOR(new_cache_size); csd->bt_buckets = getprime(csd->n_bts); csd->n_wrt_per_flu = 7; csd->flush_trigger = FLUSH_FACTOR(csd->n_bts); } if (disk_wait_status) csd->wait_disk_space = new_disk_wait; if (extn_count_status) csd->extension_size = (uint4)new_extn_count; if (lock_space_status) csd->lock_space_size = (uint4)new_lock_space * OS_PAGELET_SIZE; if (bypass_partial_recov) { csd->file_corrupt = FALSE; util_out_print("Database file !AD now has partial recovery flag set to !UL(FALSE) ", TRUE, fn_len, fn, csd->file_corrupt); } if (dba_mm == access_new) { if (CLI_NEGATED == defer_status) csd->defer_time = 0; else if (CLI_PRESENT == defer_status) { if (!cli_get_num("DEFER_TIME", &defer_time)) { util_out_print("Error getting DEFER_TIME qualifier value", TRUE); db_ipcs_reset(gv_cur_region, FALSE); return (int4)ERR_RBWRNNOTCHG; } if (-1 > defer_time) { util_out_print("DEFER_TIME cannot take negative values less than -1", TRUE); util_out_print("Database file !AD not changed", TRUE, fn_len, fn); exit_stat |= EXIT_WRN; db_ipcs_reset(gv_cur_region, FALSE); mu_gv_cur_reg_free(); continue; } csd->defer_time = defer_time; } if (csd->blks_to_upgrd) { util_out_print("MM access method cannot be set if there are blocks to upgrade", TRUE); util_out_print("Database file !AD not changed", TRUE, fn_len, fn); exit_stat |= EXIT_WRN; db_ipcs_reset(gv_cur_region, FALSE); mu_gv_cur_reg_free(); continue; } if (GDSVCURR != csd->desired_db_format) { util_out_print("MM access method cannot be set in DB compatibility mode", TRUE); util_out_print("Database file !AD not changed", TRUE, fn_len, fn); exit_stat |= EXIT_WRN; db_ipcs_reset(gv_cur_region, FALSE); mu_gv_cur_reg_free(); continue; } if (JNL_ENABLED(csd) && csd->jnl_before_image) { util_out_print("MM access method cannot be set with BEFORE image journaling", TRUE); util_out_print("Database file !AD not changed", TRUE, fn_len, fn); exit_stat |= EXIT_WRN; db_ipcs_reset(gv_cur_region, FALSE); mu_gv_cur_reg_free(); continue; } csd->jnl_before_image = FALSE; } else { if (defer_status) { util_out_print("DEFER cannot be specified with BG access method.", TRUE); util_out_print("Database file !AD not changed", TRUE, fn_len, fn); exit_stat |= EXIT_WRN; db_ipcs_reset(gv_cur_region, FALSE); mu_gv_cur_reg_free(); continue; } } LSEEKWRITE(fd, 0, csd, sizeof(sgmnt_data), status); if (0 != status) { save_errno = errno; errptr = (char *)STRERROR(save_errno); util_out_print("write : !AZ", TRUE, errptr); util_out_print("Error writing header of file", TRUE); util_out_print("Database file !AD not changed: ", TRUE, fn_len, fn); rts_error(VARLSTCNT(4) ERR_DBRDERR, 2, fn_len, fn); } close(fd); /* --------------------- report results ------------------------- */ if (glbl_buff_status) util_out_print("Database file !AD now has !UL global buffers", TRUE, fn_len, fn, csd->n_bts); if (defer_status && (dba_mm == csd->acc_meth)) util_out_print("Database file !AD now has defer_time set to !SL", TRUE, fn_len, fn, csd->defer_time); if (rsrvd_bytes_status) util_out_print("Database file !AD now has !UL reserved bytes", TRUE, fn_len, fn, csd->reserved_bytes); if (extn_count_status) util_out_print("Database file !AD now has extension count !UL", TRUE, fn_len, fn, csd->extension_size); if (lock_space_status) util_out_print("Database file !AD now has lock space !UL pages", TRUE, fn_len, fn, csd->lock_space_size/OS_PAGELET_SIZE); if (disk_wait_status) util_out_print("Database file !AD now has wait disk set to !UL seconds", TRUE, fn_len, fn, csd->wait_disk_space); db_ipcs_reset(gv_cur_region, FALSE); } /* end of else part if (!need_standalone) */ mu_gv_cur_reg_free(); } free(csd); assert(!(exit_stat & EXIT_INF)); return (exit_stat & EXIT_ERR ? (int4)ERR_WCERRNOTCHG : (exit_stat & EXIT_WRN ? (int4)ERR_WCWRNNOTCHG : SS_NORMAL)); }
void mu_int_reg(gd_region *reg, boolean_t *return_value) { boolean_t read_only, was_crit; freeze_status status; node_local_ptr_t cnl; sgmnt_addrs *csa; sgmnt_data_ptr_t csd; # ifdef DEBUG boolean_t need_to_wait = FALSE; int trynum; uint4 curr_wbox_seq_num; # endif sgmnt_data *csd_copy_ptr; gd_segment *seg; int gtmcrypt_errno; *return_value = FALSE; UNIX_ONLY(jnlpool_init_needed = TRUE); ESTABLISH(mu_int_reg_ch); if (dba_usr == reg->dyn.addr->acc_meth) { util_out_print("!/Can't integ region !AD; not GDS format", TRUE, REG_LEN_STR(reg)); mu_int_skipreg_cnt++; return; } gv_cur_region = reg; if (reg_cmcheck(reg)) { util_out_print("!/Can't integ region across network", TRUE); mu_int_skipreg_cnt++; return; } gvcst_init(gv_cur_region); if (gv_cur_region->was_open) { /* already open under another name */ gv_cur_region->open = FALSE; return; } change_reg(); csa = &FILE_INFO(gv_cur_region)->s_addrs; cnl = csa->nl; csd = csa->hdr; read_only = gv_cur_region->read_only; assert(NULL != mu_int_master); /* Ensure that we don't see an increase in the file header and master map size compared to it's maximum values */ assert(SGMNT_HDR_LEN >= SIZEOF(sgmnt_data) && (MASTER_MAP_SIZE_MAX >= MASTER_MAP_SIZE(csd))); /* ONLINE INTEG if asked for explicitly by specifying -ONLINE is an error if the db has partial V4 blocks. * However, if -ONLINE is not explicitly specified but rather assumed implicitly (as default for -REG) * then turn off ONLINE INTEG for this region and continue as if -NOONLINE was specified */ # ifdef GTM_SNAPSHOT if (!csd->fully_upgraded) { ointeg_this_reg = FALSE; /* Turn off ONLINE INTEG for this region */ if (online_specified) { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_SSV4NOALLOW, 2, DB_LEN_STR(gv_cur_region)); util_out_print(NO_ONLINE_ERR_MSG, TRUE); mu_int_skipreg_cnt++; return; } } # endif if (!ointeg_this_reg || read_only) { status = region_freeze(gv_cur_region, TRUE, FALSE, TRUE); switch (status) { case REG_ALREADY_FROZEN: UNIX_ONLY(if (csa->read_only_fs) break); util_out_print("!/Database for region !AD is already frozen, not integing", TRUE, REG_LEN_STR(gv_cur_region)); mu_int_skipreg_cnt++; return; case REG_HAS_KIP: /* We have already waited for KIP to reset. This time do not wait for KIP */ status = region_freeze(gv_cur_region, TRUE, FALSE, FALSE); if (REG_ALREADY_FROZEN == status) { UNIX_ONLY(if (csa->read_only_fs) break); util_out_print("!/Database for region !AD is already frozen, not integing", TRUE, REG_LEN_STR(gv_cur_region)); mu_int_skipreg_cnt++; return; } break; case REG_FREEZE_SUCCESS: break; default: assert(FALSE); }
int4 gds_rundown(void) { boolean_t canceled_dbsync_timer, canceled_flush_timer, ok_to_write_pfin; boolean_t have_standalone_access, ipc_deleted, err_caught; boolean_t is_cur_process_ss_initiator, remove_shm, vermismatch, we_are_last_user, we_are_last_writer, is_mm; boolean_t unsafe_last_writer; char time_str[CTIME_BEFORE_NL + 2]; /* for GET_CUR_TIME macro */ gd_region *reg; int save_errno, status, rc; int4 semval, ftok_semval, sopcnt, ftok_sopcnt; short crash_count; sm_long_t munmap_len; sgmnt_addrs *csa; sgmnt_data_ptr_t csd; node_local_ptr_t cnl; struct shmid_ds shm_buf; struct sembuf sop[2], ftok_sop[2]; uint4 jnl_status; unix_db_info *udi; jnl_private_control *jpc; jnl_buffer_ptr_t jbp; shm_snapshot_t *ss_shm_ptr; uint4 ss_pid, onln_rlbk_pid, holder_pid; boolean_t was_crit; boolean_t safe_mode; /* Do not flush or take down shared memory. */ boolean_t bypassed_ftok = FALSE, bypassed_access = FALSE, may_bypass_ftok, inst_is_frozen, ftok_counter_halted, access_counter_halted; int secshrstat; intrpt_state_t prev_intrpt_state; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; jnl_status = 0; reg = gv_cur_region; /* Local copy */ /* early out for cluster regions * to avoid tripping the assert below. * Note: * This early out is consistent with VMS. It has been * noted that all of the gtcm assignments * to gv_cur_region should use the TP_CHANGE_REG * macro. This would also avoid the assert problem * and should be done eventually. */ if (dba_cm == reg->dyn.addr->acc_meth) return EXIT_NRM; udi = FILE_INFO(reg); csa = &udi->s_addrs; csd = csa->hdr; assert(csa == cs_addrs && csd == cs_data); if ((reg->open) && (dba_usr == csd->acc_meth)) { change_reg(); gvusr_rundown(); return EXIT_NRM; } /* If the process has standalone access, it has udi->grabbed_access_sem set to TRUE at this point. Note that down in a local * variable as the udi->grabbed_access_sem is set to TRUE even for non-standalone access below and hence we can't rely on * that later to determine if the process had standalone access or not when it entered this function. We need to guarantee * that none else access database file header when semid/shmid fields are reset. We already have created ftok semaphore in * db_init or, mu_rndwn_file and did not remove it. So just lock it. We do it in blocking mode. */ have_standalone_access = udi->grabbed_access_sem; /* process holds standalone access */ DEFER_INTERRUPTS(INTRPT_IN_GDS_RUNDOWN, prev_intrpt_state); ESTABLISH_NORET(gds_rundown_ch, err_caught); if (err_caught) { REVERT; WITH_CH(gds_rundown_ch, gds_rundown_err_cleanup(have_standalone_access), 0); ENABLE_INTERRUPTS(INTRPT_IN_GDS_RUNDOWN, prev_intrpt_state); DEBUG_ONLY(ok_to_UNWIND_in_exit_handling = FALSE); return EXIT_ERR; } assert(reg->open); /* if we failed to open, dbinit_ch should have taken care of proper clean up */ assert(!reg->opening); /* see comment above */ assert((dba_bg == csd->acc_meth) || (dba_mm == csd->acc_meth)); is_mm = (dba_bg != csd->acc_meth); assert(!csa->hold_onto_crit || (csa->now_crit && jgbl.onlnrlbk)); /* If we are online rollback, we should already be holding crit and should release it only at the end of this module. This * is usually done by noting down csa->now_crit in a local variable (was_crit) and using it whenever we are about to * grab_crit. But, there are instances (like mupip_set_journal.c) where we grab_crit but invoke gds_rundown without any * preceeding rel_crit. Such code relies on the fact that gds_rundown does rel_crit unconditionally (to get locks to a known * state). So, augment csa->now_crit with jgbl.onlnrlbk to track if we can rel_crit unconditionally or not in gds_rundown. */ was_crit = (csa->now_crit && jgbl.onlnrlbk); /* Cancel any pending flush timer for this region by this task */ canceled_flush_timer = FALSE; canceled_dbsync_timer = FALSE; CANCEL_DB_TIMERS(reg, csa, canceled_flush_timer, canceled_dbsync_timer); we_are_last_user = FALSE; inst_is_frozen = IS_REPL_INST_FROZEN && REPL_ALLOWED(csa->hdr); if (!csa->persistent_freeze) region_freeze(reg, FALSE, FALSE, FALSE); if (!was_crit) { rel_crit(reg); /* get locks to known state */ mutex_cleanup(reg); } /* The only process that can invoke gds_rundown while holding access control semaphore is RECOVER/ROLLBACK. All the others * (like MUPIP SET -FILE/MUPIP EXTEND would have invoked db_ipcs_reset() before invoking gds_rundown (from * mupip_exit_handler). The only exception is when these processes encounter a terminate signal and they reach * mupip_exit_handler while holding access control semaphore. Assert accordingly. */ assert(!have_standalone_access || mupip_jnl_recover || process_exiting); /* If we have standalone access, then ensure that a concurrent online rollback cannot be running at the same time as it * needs the access control lock as well. The only expection is we are online rollback and currently running down. */ cnl = csa->nl; onln_rlbk_pid = cnl->onln_rlbk_pid; assert(!have_standalone_access || mupip_jnl_recover || !onln_rlbk_pid || !is_proc_alive(onln_rlbk_pid, 0)); if (!have_standalone_access) { if (-1 == (ftok_semval = semctl(udi->ftok_semid, DB_COUNTER_SEM, GETVAL))) /* Check # of procs counted on FTOK */ { save_errno = errno; assert(FALSE); rts_error_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_SYSCALL, 5, RTS_ERROR_TEXT("gds_rundown SEMCTL failed to get ftok_semval"), CALLFROM, errno); } may_bypass_ftok = CAN_BYPASS(ftok_semval, csd, inst_is_frozen); /* Do we need a blocking wait? */ /* We need to guarantee that no one else access database file header when semid/shmid fields are reset. * We already have created ftok semaphore in db_init or mu_rndwn_file and did not remove it. So just lock it. */ if (!ftok_sem_lock(reg, may_bypass_ftok)) { if (may_bypass_ftok) { /* We did a non-blocking wait. It's ok to proceed without locking */ bypassed_ftok = TRUE; holder_pid = semctl(udi->ftok_semid, DB_CONTROL_SEM, GETPID); if ((uint4)-1 == holder_pid) rts_error_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_SYSCALL, 5, RTS_ERROR_TEXT("gds_rundown SEMCTL failed to get holder_pid"), CALLFROM, errno); if (!IS_GTM_IMAGE) /* MUMPS processes should not flood syslog with bypass messages. */ { send_msg_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_RESRCINTRLCKBYPAS, 10, LEN_AND_STR(gtmImageNames[image_type].imageName), process_id, LEN_AND_LIT("FTOK"), REG_LEN_STR(reg), DB_LEN_STR(reg), holder_pid); send_msg_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_TEXT, 2, LEN_AND_LIT("FTOK bypassed at rundown")); } } else { /* We did a blocking wait but something bad happened. */ FTOK_TRACE(csa, csa->ti->curr_tn, ftok_ops_lock, process_id); rts_error_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_DBFILERR, 2, DB_LEN_STR(reg)); } } sop[0].sem_num = DB_CONTROL_SEM; sop[0].sem_op = 0; /* Wait for 0 */ sop[1].sem_num = DB_CONTROL_SEM; sop[1].sem_op = 1; /* Lock */ sopcnt = 2; sop[0].sem_flg = sop[1].sem_flg = SEM_UNDO | IPC_NOWAIT; /* Don't wait the first time thru */ SEMOP(udi->semid, sop, sopcnt, status, NO_WAIT); if (0 != status) { save_errno = errno; /* Check # of processes counted on access sem. */ if (-1 == (semval = semctl(udi->semid, DB_COUNTER_SEM, GETVAL))) { assert(FALSE); rts_error_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_SYSCALL, 5, RTS_ERROR_TEXT("gds_rundown SEMCTL failed to get semval"), CALLFROM, errno); } bypassed_access = CAN_BYPASS(semval, csd, inst_is_frozen) || onln_rlbk_pid || csd->file_corrupt; /* Before attempting again in the blocking mode, see if the holding process is an online rollback. * If so, it is likely we won't get the access control semaphore anytime soon. In that case, we * are better off skipping rundown and continuing with sanity cleanup and exit. */ holder_pid = semctl(udi->semid, DB_CONTROL_SEM, GETPID); if ((uint4)-1 == holder_pid) rts_error_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_SYSCALL, 5, RTS_ERROR_TEXT("gds_rundown SEMCTL failed to get holder_pid"), CALLFROM, errno); if (!bypassed_access) { /* We couldn't get it in one shot-- see if we already have it */ if (holder_pid == process_id) { send_msg_csa(CSA_ARG(csa) VARLSTCNT(5) MAKE_MSG_INFO(ERR_CRITSEMFAIL), 2, DB_LEN_STR(reg), ERR_RNDWNSEMFAIL); REVERT; ENABLE_INTERRUPTS(INTRPT_IN_GDS_RUNDOWN, prev_intrpt_state); assert(FALSE); return EXIT_ERR; } if (EAGAIN != save_errno) { assert(FALSE); rts_error_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_SYSCALL, 5, RTS_ERROR_TEXT("gds_rundown SEMOP on access control semaphore"), CALLFROM, save_errno); } sop[0].sem_flg = sop[1].sem_flg = SEM_UNDO; /* Try again - blocking this time */ SEMOP(udi->semid, sop, 2, status, FORCED_WAIT); if (-1 == status) /* We couldn't get it at all.. */ rts_error_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_SYSCALL, 5, RTS_ERROR_TEXT("gds_rundown SEMOP on access control semaphore"), CALLFROM, errno); } else if (!IS_GTM_IMAGE) { send_msg_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_RESRCINTRLCKBYPAS, 10, LEN_AND_STR(gtmImageNames[image_type].imageName), process_id, LEN_AND_LIT("access control"), REG_LEN_STR(reg), DB_LEN_STR(reg), holder_pid); send_msg_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_TEXT, 2, LEN_AND_LIT("Access control bypassed at rundown")); } udi->grabbed_access_sem = !bypassed_access; } } /* else we we hold the access control semaphore and therefore have standalone access. We do not release it now - we * release it later in mupip_exit_handler.c. Since we already hold the access control semaphore, we don't need the * ftok semaphore and trying it could cause deadlock */ /* Note that in the case of online rollback, "udi->grabbed_access_sem" (and in turn "have_standalone_access") is TRUE. * But there could be other processes still having the database open so we cannot safely reset the halted fields. */ if (have_standalone_access && !jgbl.onlnrlbk) csd->ftok_counter_halted = csd->access_counter_halted = FALSE; ftok_counter_halted = csd->ftok_counter_halted; access_counter_halted = csd->access_counter_halted; /* If we bypassed any of the semaphores, activate safe mode. * Also, if the replication instance is frozen and this db has replication turned on (which means * no flushes of dirty buffers to this db can happen while the instance is frozen) activate safe mode. */ ok_to_write_pfin = !(bypassed_access || bypassed_ftok || inst_is_frozen); safe_mode = !ok_to_write_pfin || ftok_counter_halted || access_counter_halted; /* At this point we are guaranteed no one else is doing a db_init/rundown as we hold the access control semaphore */ assert(csa->ref_cnt); /* decrement private ref_cnt before shared ref_cnt decrement. */ csa->ref_cnt--; /* Currently journaling logic in gds_rundown() in VMS relies on this order to detect last writer */ assert(!csa->ref_cnt); --cnl->ref_cnt; if (memcmp(cnl->now_running, gtm_release_name, gtm_release_name_len + 1)) { /* VERMISMATCH condition. Possible only if DSE */ assert(dse_running); vermismatch = TRUE; } else vermismatch = FALSE; if (-1 == shmctl(udi->shmid, IPC_STAT, &shm_buf)) { save_errno = errno; rts_error_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_SYSCALL, 5, RTS_ERROR_TEXT("gds_rundown shmctl"), CALLFROM, save_errno); } else we_are_last_user = (1 == shm_buf.shm_nattch) && !vermismatch && !safe_mode; /* recover => one user except ONLINE ROLLBACK, or standalone with frozen instance */ assert(!have_standalone_access || we_are_last_user || jgbl.onlnrlbk || inst_is_frozen); if (-1 == (semval = semctl(udi->semid, DB_COUNTER_SEM, GETVAL))) rts_error_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_SYSCALL, 5, RTS_ERROR_TEXT("gds_rundown SEMCTL failed to get semval"), CALLFROM, errno); /* There's one writer left and I am it */ assert(reg->read_only || semval >= 0); unsafe_last_writer = (DB_COUNTER_SEM_INCR == semval) && (FALSE == reg->read_only) && !vermismatch; we_are_last_writer = unsafe_last_writer && !safe_mode; assert(!we_are_last_writer || !safe_mode); assert(!we_are_last_user || !safe_mode); /* recover + R/W region => one writer except ONLINE ROLLBACK, or standalone with frozen instance, leading to safe_mode */ assert(!(have_standalone_access && !reg->read_only) || we_are_last_writer || jgbl.onlnrlbk || inst_is_frozen); GTM_WHITE_BOX_TEST(WBTEST_ANTIFREEZE_JNLCLOSE, we_are_last_writer, 1); /* Assume we are the last writer to invoke wcs_flu */ if (!have_standalone_access && (-1 == (ftok_semval = semctl(udi->ftok_semid, DB_COUNTER_SEM, GETVAL)))) rts_error_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_SYSCALL, 5, RTS_ERROR_TEXT("gds_rundown SEMCTL failed to get ftok_semval"), CALLFROM, errno); if (NULL != csa->ss_ctx) ss_destroy_context(csa->ss_ctx); /* SS_MULTI: If multiple snapshots are supported, then we have to run through each of the snapshots */ assert(1 == MAX_SNAPSHOTS); ss_shm_ptr = (shm_snapshot_ptr_t)SS_GETSTARTPTR(csa); ss_pid = ss_shm_ptr->ss_info.ss_pid; is_cur_process_ss_initiator = (process_id == ss_pid); if (ss_pid && (is_cur_process_ss_initiator || we_are_last_user)) { /* Try getting snapshot crit latch. If we don't get latch, we won't hang for eternity and will skip * doing the orphaned snapshot cleanup. It will be cleaned up eventually either by subsequent MUPIP * INTEG or by a MUPIP RUNDOWN. */ if (ss_get_lock_nowait(reg) && (ss_pid == ss_shm_ptr->ss_info.ss_pid) && (is_cur_process_ss_initiator || !is_proc_alive(ss_pid, 0))) { ss_release(NULL); ss_release_lock(reg); } } /* If cnl->donotflush_dbjnl is set, it means mupip recover/rollback was interrupted and therefore we need not flush * shared memory contents to disk as they might be in an inconsistent state. Moreover, any more flushing will only cause * future rollback to undo more journal records (PBLKs). In this case, we will go ahead and remove shared memory (without * flushing the contents) in this routine. A reissue of the recover/rollback command will restore the database to a * consistent state. */ if (!cnl->donotflush_dbjnl && !reg->read_only && !vermismatch) { /* If we had an orphaned block and were interrupted, set wc_blocked so we can invoke wcs_recover. Do it ONLY * if there is NO concurrent online rollback running (as we need crit to set wc_blocked) */ if (csa->wbuf_dqd && !is_mm) { /* If we had an orphaned block and were interrupted, mupip_exit_handler will invoke secshr_db_clnup which * will clear this field and so we should never come to gds_rundown with a non-zero wbuf_dqd. The only * exception is if we are recover/rollback in which case gds_rundown (from mur_close_files) is invoked * BEFORE secshr_db_clnup in mur_close_files. * Note: It is NOT possible for online rollback to reach here with wbuf_dqd being non-zero. This is because * the moment we apply the first PBLK, we stop all interrupts and hence can never be interrupted in * wcs_wtstart or wcs_get_space. Assert accordingly. */ assert(mupip_jnl_recover && !jgbl.onlnrlbk && !safe_mode); if (!was_crit) grab_crit(reg); SET_TRACEABLE_VAR(cnl->wc_blocked, TRUE); BG_TRACE_PRO_ANY(csa, wcb_gds_rundown); send_msg_csa(CSA_ARG(csa) VARLSTCNT(8) ERR_WCBLOCKED, 6, LEN_AND_LIT("wcb_gds_rundown"), process_id, &csa->ti->curr_tn, DB_LEN_STR(reg)); csa->wbuf_dqd = 0; wcs_recover(reg); BG_TRACE_PRO_ANY(csa, lost_block_recovery); if (!was_crit) rel_crit(reg); } if (JNL_ENABLED(csd) && IS_GTCM_GNP_SERVER_IMAGE) originator_prc_vec = NULL; /* If we are the last writing user, then everything must be flushed */ if (we_are_last_writer) { /* Time to flush out all of our buffers */ assert(!safe_mode); if (is_mm) { MM_DBFILEXT_REMAP_IF_NEEDED(csa, reg); cnl->remove_shm = TRUE; } if (cnl->wc_blocked && jgbl.onlnrlbk) { /* if the last update done by online rollback was not committed in the normal code-path but was * completed by secshr_db_clnup, wc_blocked will be set to TRUE. But, since online rollback never * invokes grab_crit (since csa->hold_onto_crit is set to TRUE), wcs_recover is never invoked. This * could result in the last update never getting flushed to the disk and if online rollback happened * to be the last writer then the shared memory will be flushed and removed and the last update will * be lost. So, force wcs_recover if we find ourselves in such a situation. But, wc_blocked is * possible only if phase1 or phase2 errors are induced using white box test cases */ assert(WB_COMMIT_ERR_ENABLED); wcs_recover(reg); } /* Note WCSFLU_SYNC_EPOCH ensures the epoch is synced to the journal and indirectly * also ensures that the db is fsynced. We don't want to use it in the calls to * wcs_flu() from t_end() and tp_tend() since we can defer it to out-of-crit there. * In this case, since we are running down, we don't have any such option. */ cnl->remove_shm = wcs_flu(WCSFLU_FLUSH_HDR | WCSFLU_WRITE_EPOCH | WCSFLU_SYNC_EPOCH); /* Since we_are_last_writer, we should be guaranteed that wcs_flu() did not change csd, (in * case of MM for potential file extension), even if it did a grab_crit(). Therefore, make * sure that's true. */ assert(csd == csa->hdr); assert(0 == memcmp(csd->label, GDS_LABEL, GDS_LABEL_SZ - 1)); } else if (((canceled_flush_timer && (0 > cnl->wcs_timers)) || canceled_dbsync_timer) && !inst_is_frozen) { /* canceled pending db or jnl flush timers - flush database and journal buffers to disk */ if (!was_crit) grab_crit(reg); /* we need to sync the epoch as the fact that there is no active pending flush timer implies * there will be noone else who will flush the dirty buffers and EPOCH to disk in a timely fashion */ wcs_flu(WCSFLU_FLUSH_HDR | WCSFLU_WRITE_EPOCH | WCSFLU_SYNC_EPOCH); if (!was_crit) rel_crit(reg); assert((dba_mm == cs_data->acc_meth) || (csd == cs_data)); csd = cs_data; /* In case this is MM and wcs_flu() remapped an extended database, reset csd */ } /* Do rundown journal processing after buffer flushes since they require jnl to be open */ if (JNL_ENABLED(csd)) { /* the following tp_change_reg() is not needed due to the assert csa == cs_addrs at the beginning * of gds_rundown(), but just to be safe. To be removed by 2002!! --- nars -- 2001/04/25. */ tp_change_reg(); /* call this because jnl_ensure_open checks cs_addrs rather than gv_cur_region */ jpc = csa->jnl; jbp = jpc->jnl_buff; if (jbp->fsync_in_prog_latch.u.parts.latch_pid == process_id) { assert(FALSE); COMPSWAP_UNLOCK(&jbp->fsync_in_prog_latch, process_id, 0, LOCK_AVAILABLE, 0); } if (jbp->io_in_prog_latch.u.parts.latch_pid == process_id) { assert(FALSE); COMPSWAP_UNLOCK(&jbp->io_in_prog_latch, process_id, 0, LOCK_AVAILABLE, 0); } if ((((NOJNL != jpc->channel) && !JNL_FILE_SWITCHED(jpc)) || we_are_last_writer && (0 != cnl->jnl_file.u.inode)) && ok_to_write_pfin) { /* We need to close the journal file cleanly if we have the latest generation journal file open * or if we are the last writer and the journal file is open in shared memory (not necessarily * by ourselves e.g. the only process that opened the journal got shot abnormally) * Note: we should not infer anything from the shared memory value of cnl->jnl_file.u.inode * if we are not the last writer as it can be concurrently updated. */ if (!was_crit) grab_crit(reg); if (JNL_ENABLED(csd)) { SET_GBL_JREC_TIME; /* jnl_ensure_open/jnl_put_jrt_pini/pfin/jnl_file_close all need it */ /* Before writing to jnlfile, adjust jgbl.gbl_jrec_time if needed to maintain time order * of jnl records. This needs to be done BEFORE the jnl_ensure_open as that could write * journal records (if it decides to switch to a new journal file). */ ADJUST_GBL_JREC_TIME(jgbl, jbp); jnl_status = jnl_ensure_open(); if (0 == jnl_status) { /* If we_are_last_writer, we would have already done a wcs_flu() which would * have written an epoch record and we are guaranteed no further updates * since we are the last writer. So, just close the journal. * If the freeaddr == post_epoch_freeaddr, wcs_flu may have skipped writing * a pini, so allow for that. */ assert(!jbp->before_images || is_mm || !we_are_last_writer || (0 != jpc->pini_addr) || jgbl.mur_extract || (jpc->jnl_buff->freeaddr == jpc->jnl_buff->post_epoch_freeaddr)); /* If we haven't written a pini, let jnl_file_close write the pini/pfin. */ if (!jgbl.mur_extract && (0 != jpc->pini_addr)) jnl_put_jrt_pfin(csa); /* If not the last writer and no pending flush timer left, do jnl flush now */ if (!we_are_last_writer && (0 > cnl->wcs_timers)) { if (SS_NORMAL == (jnl_status = jnl_flush(reg))) { assert(jbp->freeaddr == jbp->dskaddr); jnl_fsync(reg, jbp->dskaddr); assert(jbp->fsync_dskaddr == jbp->dskaddr); } else { send_msg_csa(CSA_ARG(csa) VARLSTCNT(9) ERR_JNLFLUSH, 2, JNL_LEN_STR(csd), ERR_TEXT, 2, RTS_ERROR_TEXT("Error with journal flush in gds_rundown"), jnl_status); assert(NOJNL == jpc->channel);/* jnl file lost has been triggered */ /* In this routine, all code that follows from here on does not * assume anything about the journaling characteristics of this * database so it is safe to continue execution even though * journaling got closed in the middle. */ } } jnl_file_close(reg, we_are_last_writer, FALSE); } else send_msg_csa(CSA_ARG(csa) VARLSTCNT(6) jnl_status, 4, JNL_LEN_STR(csd), DB_LEN_STR(reg)); } if (!was_crit) rel_crit(reg); } } if (we_are_last_writer) /* Flush the fileheader last and harden the file to disk */ { if (!was_crit) grab_crit(reg); /* To satisfy crit requirement in fileheader_sync() */ memset(csd->machine_name, 0, MAX_MCNAMELEN); /* clear the machine_name field */ if (!have_standalone_access && we_are_last_user) { /* mupip_exit_handler will do this after mur_close_file */ csd->semid = INVALID_SEMID; csd->shmid = INVALID_SHMID; csd->gt_sem_ctime.ctime = 0; csd->gt_shm_ctime.ctime = 0; } fileheader_sync(reg); if (!was_crit) rel_crit(reg); if (!is_mm) { GTM_DB_FSYNC(csa, udi->fd, rc); /* Sync it all */ if (-1 == rc) { rts_error_csa(CSA_ARG(csa) VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, RTS_ERROR_TEXT("Error during file sync at close"), errno); } } else { /* Now do final MM file sync before exit */ assert(csa->ti->total_blks == csa->total_blks); #ifdef _AIX GTM_DB_FSYNC(csa, udi->fd, rc); if (-1 == rc) #else if (-1 == MSYNC((caddr_t)csa->db_addrs[0], (caddr_t)csa->db_addrs[1])) #endif { rts_error_csa(CSA_ARG(csa) VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, RTS_ERROR_TEXT("Error during file sync at close"), errno); } } } else if (unsafe_last_writer && !cnl->lastwriterbypas_msg_issued) { send_msg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_LASTWRITERBYPAS, 2, DB_LEN_STR(reg)); cnl->lastwriterbypas_msg_issued = TRUE; } } /* end if (!reg->read_only && !cnl->donotflush_dbjnl) */ /* We had canceled all db timers at start of rundown. In case as part of rundown (wcs_flu above), we had started * any timers, cancel them BEFORE setting reg->open to FALSE (assert in wcs_clean_dbsync relies on this). */ CANCEL_DB_TIMERS(reg, csa, canceled_flush_timer, canceled_dbsync_timer); if (reg->read_only && we_are_last_user && !have_standalone_access && cnl->remove_shm) { /* mupip_exit_handler will do this after mur_close_file */ db_ipcs.semid = INVALID_SEMID; db_ipcs.shmid = INVALID_SHMID; db_ipcs.gt_sem_ctime = 0; db_ipcs.gt_shm_ctime = 0; db_ipcs.fn_len = reg->dyn.addr->fname_len; memcpy(db_ipcs.fn, reg->dyn.addr->fname, reg->dyn.addr->fname_len); db_ipcs.fn[reg->dyn.addr->fname_len] = 0; /* request gtmsecshr to flush. read_only cannot flush itself */ WAIT_FOR_REPL_INST_UNFREEZE_SAFE(csa); if (!csa->read_only_fs) { secshrstat = send_mesg2gtmsecshr(FLUSH_DB_IPCS_INFO, 0, (char *)NULL, 0); if (0 != secshrstat) rts_error_csa(CSA_ARG(csa) VARLSTCNT(8) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, RTS_ERROR_TEXT("gtmsecshr failed to update database file header")); } } /* Done with file now, close it */ CLOSEFILE_RESET(udi->fd, rc); /* resets "udi->fd" to FD_INVALID */ if (-1 == rc) { rts_error_csa(CSA_ARG(csa) VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, LEN_AND_LIT("Error during file close"), errno); } /* Unmap storage if mm mode but only the part that is not the fileheader (so shows up in dumps) */ # if !defined(_AIX) if (is_mm && (NULL != csa->db_addrs[0])) { assert(csa->db_addrs[1] > csa->db_addrs[0]); munmap_len = (sm_long_t)(csa->db_addrs[1] - csa->db_addrs[0]); if (0 < munmap_len) munmap((caddr_t)(csa->db_addrs[0]), (size_t)(munmap_len)); } # endif /* Detach our shared memory while still under lock so reference counts will be correct for the next process to run down * this region. In the process also get the remove_shm status from node_local before detaching. * If cnl->donotflush_dbjnl is TRUE, it means we can safely remove shared memory without compromising data * integrity as a reissue of recover will restore the database to a consistent state. */ remove_shm = !vermismatch && (cnl->remove_shm || cnl->donotflush_dbjnl); /* We are done with online rollback on this region. Indicate to other processes by setting the onln_rlbk_pid to 0. * Do it before releasing crit (t_end relies on this ordering when accessing cnl->onln_rlbk_pid). */ if (jgbl.onlnrlbk) cnl->onln_rlbk_pid = 0; rel_crit(reg); /* Since we are about to detach from the shared memory, release crit and reset onln_rlbk_pid */ /* If we had skipped flushing journal and database buffers due to a concurrent online rollback, increment the counter * indicating that in the shared memory so that online rollback can report the # of such processes when it shuts down. * The same thing is done for both FTOK and access control semaphores when there are too many MUMPS processes. */ if (safe_mode) /* indicates flushing was skipped */ { if (bypassed_access) cnl->dbrndwn_access_skip++; /* Access semaphore can be bypassed during online rollback */ if (bypassed_ftok) cnl->dbrndwn_ftok_skip++; } if (jgbl.onlnrlbk) csa->hold_onto_crit = FALSE; GTM_WHITE_BOX_TEST(WBTEST_HOLD_SEM_BYPASS, cnl->wbox_test_seq_num, 0); status = shmdt((caddr_t)cnl); csa->nl = NULL; /* dereferencing nl after detach is not right, so we set it to NULL so that we can test before dereference*/ /* Note that although csa->nl is NULL, we use CSA_ARG(csa) below (not CSA_ARG(NULL)) to be consistent with similar * usages before csa->nl became NULL. The "is_anticipatory_freeze_needed" function (which is in turn called by the * CHECK_IF_FREEZE_ON_ERROR_NEEDED macro) does a check of csa->nl before dereferencing shared memory contents so * we are safe passing "csa". */ if (-1 == status) send_msg_csa(CSA_ARG(csa) VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, LEN_AND_LIT("Error during shmdt"), errno); REMOVE_CSA_FROM_CSADDRSLIST(csa); /* remove "csa" from list of open regions (cs_addrs_list) */ reg->open = FALSE; /* If file is still not in good shape, die here and now before we get rid of our storage */ assertpro(0 == csa->wbuf_dqd); ipc_deleted = FALSE; /* If we are the very last user, remove shared storage id and the semaphores */ if (we_are_last_user) { /* remove shared storage, only if last writer to rundown did a successful wcs_flu() */ assert(!vermismatch); if (remove_shm) { ipc_deleted = TRUE; if (0 != shm_rmid(udi->shmid)) rts_error_csa(CSA_ARG(csa) VARLSTCNT(8) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, RTS_ERROR_TEXT("Unable to remove shared memory")); /* Note that we no longer have a new shared memory. Currently only used/usable for standalone rollback. */ udi->new_shm = FALSE; /* mupip recover/rollback don't release the semaphore here, but do it later in db_ipcs_reset (invoked from * mur_close_files()) */ if (!have_standalone_access) { if (0 != sem_rmid(udi->semid)) rts_error_csa(CSA_ARG(csa) VARLSTCNT(8) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, RTS_ERROR_TEXT("Unable to remove semaphore")); udi->new_sem = FALSE; /* Note that we no longer have a new semaphore */ udi->grabbed_access_sem = FALSE; udi->counter_acc_incremented = FALSE; } } else if (is_src_server || is_updproc) { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(6) ERR_DBRNDWNWRN, 4, DB_LEN_STR(reg), process_id, process_id); send_msg_csa(CSA_ARG(csa) VARLSTCNT(6) ERR_DBRNDWNWRN, 4, DB_LEN_STR(reg), process_id, process_id); } else send_msg_csa(CSA_ARG(csa) VARLSTCNT(6) ERR_DBRNDWNWRN, 4, DB_LEN_STR(reg), process_id, process_id); } else { assert(!have_standalone_access || jgbl.onlnrlbk || safe_mode); if (!jgbl.onlnrlbk && !have_standalone_access) { /* If we were writing, get rid of our writer access count semaphore */ if (!reg->read_only) { if (!access_counter_halted) { save_errno = do_semop(udi->semid, DB_COUNTER_SEM, -DB_COUNTER_SEM_INCR, SEM_UNDO); if (0 != save_errno) rts_error_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_SYSCALL, 5, RTS_ERROR_TEXT("gds_rundown access control semaphore decrement"), CALLFROM, save_errno); } udi->counter_acc_incremented = FALSE; } assert(safe_mode || !bypassed_access); /* Now remove the rundown lock */ if (!bypassed_access) { if (0 != (save_errno = do_semop(udi->semid, DB_CONTROL_SEM, -1, SEM_UNDO))) rts_error_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_SYSCALL, 5, RTS_ERROR_TEXT("gds_rundown access control semaphore release"), CALLFROM, save_errno); udi->grabbed_access_sem = FALSE; } } /* else access control semaphore will be released in db_ipcs_reset */ } if (!have_standalone_access) { if (bypassed_ftok) { if (!ftok_counter_halted) if (0 != (save_errno = do_semop(udi->ftok_semid, DB_COUNTER_SEM, -DB_COUNTER_SEM_INCR, SEM_UNDO))) rts_error_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_DBFILERR, 2, DB_LEN_STR(reg)); } else if (!ftok_sem_release(reg, !ftok_counter_halted, FALSE)) { FTOK_TRACE(csa, csa->ti->curr_tn, ftok_ops_release, process_id); rts_error_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_DBFILERR, 2, DB_LEN_STR(reg)); } udi->grabbed_ftok_sem = FALSE; udi->counter_ftok_incremented = FALSE; } ENABLE_INTERRUPTS(INTRPT_IN_GDS_RUNDOWN, prev_intrpt_state); if (!ipc_deleted) { GET_CUR_TIME(time_str); if (is_src_server) gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(8) ERR_IPCNOTDEL, 6, CTIME_BEFORE_NL, time_str, LEN_AND_LIT("Source server"), REG_LEN_STR(reg)); if (is_updproc) gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(8) ERR_IPCNOTDEL, 6, CTIME_BEFORE_NL, time_str, LEN_AND_LIT("Update process"), REG_LEN_STR(reg)); if (mupip_jnl_recover && (!jgbl.onlnrlbk || !we_are_last_user)) { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(8) ERR_IPCNOTDEL, 6, CTIME_BEFORE_NL, time_str, LEN_AND_LIT("Mupip journal process"), REG_LEN_STR(reg)); send_msg_csa(CSA_ARG(csa) VARLSTCNT(8) ERR_IPCNOTDEL, 6, CTIME_BEFORE_NL, time_str, LEN_AND_LIT("Mupip journal process"), REG_LEN_STR(reg)); } } REVERT; return EXIT_NRM; }
void gds_rundown(void) { bool is_mm, we_are_last_user, we_are_last_writer; boolean_t ipc_deleted, remove_shm, cancelled_timer, cancelled_dbsync_timer, vermismatch; now_t now; /* for GET_CUR_TIME macro */ char *time_ptr, time_str[CTIME_BEFORE_NL + 2]; /* for GET_CUR_TIME macro */ gd_region *reg; int save_errno, status; int4 semval, ftok_semval, sopcnt, ftok_sopcnt; short crash_count; sm_long_t munmap_len; sgmnt_addrs *csa; sgmnt_data_ptr_t csd; struct shmid_ds shm_buf; struct sembuf sop[2], ftok_sop[2]; uint4 jnl_status; unix_db_info *udi; jnl_private_control *jpc; jnl_buffer_ptr_t jbp; error_def(ERR_CRITSEMFAIL); error_def(ERR_DBCCERR); error_def(ERR_DBFILERR); error_def(ERR_DBRNDWNWRN); error_def(ERR_ERRCALL); error_def(ERR_GBLOFLOW); error_def(ERR_GTMASSERT); error_def(ERR_IPCNOTDEL); error_def(ERR_JNLFLUSH); error_def(ERR_RNDWNSEMFAIL); error_def(ERR_TEXT); error_def(ERR_WCBLOCKED); forced_exit = FALSE; /* Okay, we're dying already -- let rel_crit live in peace now. * If coming through a DAL, not necessarily dying. what to do then? -- nars -- 8/15/2001 */ grabbed_access_sem = FALSE; jnl_status = 0; reg = gv_cur_region; /* Local copy */ /* * early out for cluster regions * to avoid tripping the assert below. * Note: * This early out is consistent with VMS. It has been * noted that all of the gtcm assignments * to gv_cur_region should use the TP_CHANGE_REG * macro. This would also avoid the assert problem * and should be done eventually. */ if (dba_cm == reg->dyn.addr->acc_meth) return; udi = FILE_INFO(reg); csa = &udi->s_addrs; csd = csa->hdr; assert(csa == cs_addrs && csd == cs_data); if ((reg->open) && (dba_usr == csd->acc_meth)) { change_reg(); gvusr_rundown(); return; } ESTABLISH(gds_rundown_ch); if (!reg->open) /* Not open, no point to rundown */ { if (reg->opening) /* Died partway open, kill rest of way */ { rel_crit(reg); mutex_cleanup(reg); /* revist this to handle MM properly SMW 98/12/16 if (NULL != csa->nl) { status = shmdt((caddr_t)csa->nl); if (-1 == status) send_msg(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, LEN_AND_LIT("Error during shmdt"), errno); } */ shmdt((caddr_t)csa->nl); csa->nl = NULL; } REVERT; return; } switch(csd->acc_meth) { /* Pass mm and bg through */ case dba_bg: is_mm = FALSE; break; case dba_mm: is_mm = TRUE; break; case dba_usr: assert(FALSE); default: REVERT; return; } /* Cancel any pending flush timer for this region by this task */ CANCEL_DB_TIMERS(reg, cancelled_timer, cancelled_dbsync_timer); we_are_last_user = FALSE; if (!csa->persistent_freeze) region_freeze(reg, FALSE, FALSE, FALSE); assert(!csa->read_lock); rel_crit(reg); /* get locks to known state */ mutex_cleanup(reg); /* * We need to guarantee that none else access database file header when semid/shmid fields are reset. * We already have created ftok semaphore in db_init or, mu_rndwn_file and did not remove it. * So just lock it. We do it in blocking mode. */ if (!ftok_sem_lock(reg, FALSE, FALSE)) rts_error(VARLSTCNT(4) ERR_DBFILERR, 2, DB_LEN_STR(reg)); /* * For mupip_jnl_recover we already have database access control semaphore. * We do not release it. We release it from mur_close_files. */ if (!mupip_jnl_recover) { sop[0].sem_num = 0; sop[0].sem_op = 0; /* Wait for 0 */ sop[1].sem_num = 0; sop[1].sem_op = 1; /* Lock */ sopcnt = 2; sop[0].sem_flg = sop[1].sem_flg = SEM_UNDO | IPC_NOWAIT; /* Don't wait the first time thru */ SEMOP(udi->semid, sop, sopcnt, status); if (-1 == status) /* We couldn't get it in one shot -- see if we already have it */ { save_errno = errno; /* see comment about Linux specific difference in behaviour of semctl() with GETPID in gds_rundown_ch() */ if (semctl(udi->semid, 0, GETPID) == process_id) { send_msg(VARLSTCNT(5) MAKE_MSG_INFO(ERR_CRITSEMFAIL), 2, DB_LEN_STR(reg), ERR_RNDWNSEMFAIL); REVERT; return; /* Already in rundown for this region */ } if (EAGAIN != save_errno) { assert(FALSE); rts_error(VARLSTCNT(9) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_TEXT, 2, RTS_ERROR_TEXT("gds_rundown first semop/semctl"), save_errno); } sop[0].sem_flg = sop[1].sem_flg = SEM_UNDO; /* Try again - blocking this time */ SEMOP(udi->semid, sop, 2, status); if (-1 == status) /* We couldn't get it at all.. */ rts_error(VARLSTCNT(5) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), errno); } } grabbed_access_sem = TRUE; /* * We now have the dbinit/rundown lock, so we are alone in this code for this region * and nobody else can attach. * See if we are all alone in accessing this database shared memory. */ assert(csa->ref_cnt); /* decrement private ref_cnt before shared ref_cnt decrement. */ csa->ref_cnt--; /* Currently journaling logic in gds_rundown() in VMS relies on this order to detect last writer */ assert(!csa->ref_cnt); --csa->nl->ref_cnt; if (memcmp(csa->nl->now_running, gtm_release_name, gtm_release_name_len + 1)) { /* VERMISMATCH condition. Possible only if DSE */ assert(dse_running); vermismatch = TRUE; } else vermismatch = FALSE; if (-1 == shmctl(udi->shmid, IPC_STAT, &shm_buf)) { save_errno = errno; rts_error(VARLSTCNT(9) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_TEXT, 2, RTS_ERROR_TEXT("gds_rundown shmctl"), save_errno); } else we_are_last_user = (1 == shm_buf.shm_nattch) && !vermismatch; assert(!mupip_jnl_recover || we_are_last_user); /* recover => one user */ if (-1 == (semval = semctl(udi->semid, 1, GETVAL))) rts_error(VARLSTCNT(5) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), errno); we_are_last_writer = (1 == semval) && (FALSE == reg->read_only) && !vermismatch;/* There's one writer left and I am it */ assert(!(mupip_jnl_recover && !reg->read_only) || we_are_last_writer); /* recover + R/W region => one writer */ if (-1 == (ftok_semval = semctl(udi->ftok_semid, 1, GETVAL))) rts_error(VARLSTCNT(5) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), errno); /* If csa->nl->donotflush_dbjnl is set, it means mupip recover/rollback was interrupted and therefore we should * not flush shared memory contents to disk as they might be in an inconsistent state. * In this case, we will go ahead and remove shared memory (without flushing the contents) in this routine. * A reissue of the recover/rollback command will restore the database to a consistent state. * Otherwise, if we have write access to this region, let us perform a few writing tasks. */ if (csa->nl->donotflush_dbjnl) csa->wbuf_dqd = 0; /* ignore csa->wbuf_dqd status as we do not care about the cache contents */ else if (!reg->read_only && !vermismatch) { /* If we had an orphaned block and were interrupted, set wc_blocked so we can invoke wcs_recover */ if (csa->wbuf_dqd) { grab_crit(reg); SET_TRACEABLE_VAR(csd->wc_blocked, TRUE); BG_TRACE_PRO_ANY(csa, wcb_gds_rundown); send_msg(VARLSTCNT(8) ERR_WCBLOCKED, 6, LEN_AND_LIT("wcb_gds_rundown"), process_id, &csa->ti->curr_tn, DB_LEN_STR(reg)); csa->wbuf_dqd = 0; wcs_recover(reg); if (is_mm) { assert(FALSE); csd = csa->hdr; } BG_TRACE_PRO_ANY(csa, lost_block_recovery); rel_crit(reg); } if (JNL_ENABLED(csd) && (GTCM_GNP_SERVER_IMAGE == image_type)) originator_prc_vec = NULL; /* If we are the last writing user, then everything must be flushed */ if (we_are_last_writer) { /* Time to flush out all of our buffers */ if (is_mm) { if (csa->total_blks != csa->ti->total_blks) /* do remap if file had been extended */ { grab_crit(reg); wcs_mm_recover(reg); csd = csa->hdr; rel_crit(reg); } csa->nl->remove_shm = TRUE; } /* Note WCSFLU_SYNC_EPOCH ensures the epoch is synced to the journal and indirectly * also ensures that the db is fsynced. We don't want to use it in the calls to * wcs_flu() from t_end() and tp_tend() since we can defer it to out-of-crit there. * In this case, since we are running down, we don't have any such option. */ csa->nl->remove_shm = wcs_flu(WCSFLU_FLUSH_HDR | WCSFLU_WRITE_EPOCH | WCSFLU_SYNC_EPOCH); /* Since we_are_last_writer, we should be guaranteed that wcs_flu() did not change csd, (in * case of MM for potential file extension), even if it did a grab_crit(). Therefore, make * sure that's true. */ assert(csd == csa->hdr); assert(0 == memcmp(csd->label, GDS_LABEL, GDS_LABEL_SZ - 1)); csd->trans_hist.header_open_tn = csd->trans_hist.curr_tn; } else if ((cancelled_timer && (0 > csa->nl->wcs_timers)) || cancelled_dbsync_timer) { /* cancelled pending db or jnl flush timers - flush database and journal buffers to disk */ grab_crit(reg); /* we need to sync the epoch as the fact that there is no active pending flush timer implies * there will be noone else who will flush the dirty buffers and EPOCH to disk in a timely fashion */ wcs_flu(WCSFLU_FLUSH_HDR | WCSFLU_WRITE_EPOCH | WCSFLU_SYNC_EPOCH); rel_crit(reg); assert((dba_mm == cs_data->acc_meth) || (csd == cs_data)); csd = cs_data; /* In case this is MM and wcs_flu() remapped an extended database, reset csd */ } /* Do rundown journal processing after buffer flushes since they require jnl to be open */ if (JNL_ENABLED(csd)) { /* the following tp_change_reg() is not needed due to the assert csa == cs_addrs at the beginning * of gds_rundown(), but just to be safe. To be removed by 2002!! --- nars -- 2001/04/25. */ tp_change_reg(); /* call this because jnl_ensure_open checks cs_addrs rather than gv_cur_region */ jpc = csa->jnl; jbp = jpc->jnl_buff; if (jbp->fsync_in_prog_latch.u.parts.latch_pid == process_id) { assert(FALSE); COMPSWAP_UNLOCK(&jbp->fsync_in_prog_latch, process_id, 0, LOCK_AVAILABLE, 0); } if (jbp->io_in_prog_latch.u.parts.latch_pid == process_id) { assert(FALSE); COMPSWAP_UNLOCK(&jbp->io_in_prog_latch, process_id, 0, LOCK_AVAILABLE, 0); } if (((NOJNL != jpc->channel) && !JNL_FILE_SWITCHED(jpc)) || we_are_last_writer && (0 != csa->nl->jnl_file.u.inode)) { /* We need to close the journal file cleanly if we have the latest generation journal file open * or if we are the last writer and the journal file is open in shared memory (not necessarily * by ourselves e.g. the only process that opened the journal got shot abnormally) * Note: we should not infer anything from the shared memory value of csa->nl->jnl_file.u.inode * if we are not the last writer as it can be concurrently updated. */ grab_crit(reg); if (JNL_ENABLED(csd)) { SET_GBL_JREC_TIME; /* jnl_ensure_open/jnl_put_jrt_pini/pfin/jnl_file_close all need it */ /* Before writing to jnlfile, adjust jgbl.gbl_jrec_time if needed to maintain time order * of jnl records. This needs to be done BEFORE the jnl_ensure_open as that could write * journal records (if it decides to switch to a new journal file). */ ADJUST_GBL_JREC_TIME(jgbl, jbp); jnl_status = jnl_ensure_open(); if (0 == jnl_status) { /* If we_are_last_writer, we would have already done a wcs_flu() which would * have written an epoch record and we are guaranteed no further updates * since we are the last writer. So, just close the journal. * Although we assert pini_addr should be non-zero for last_writer, we * play it safe in PRO and write a PINI record if not written already. */ assert(!jbp->before_images || is_mm || !we_are_last_writer || 0 != jpc->pini_addr); if (we_are_last_writer && 0 == jpc->pini_addr) jnl_put_jrt_pini(csa); if (0 != jpc->pini_addr) jnl_put_jrt_pfin(csa); /* If not the last writer and no pending flush timer left, do jnl flush now */ if (!we_are_last_writer && (0 > csa->nl->wcs_timers)) { if (SS_NORMAL == (jnl_status = jnl_flush(reg))) { assert(jbp->freeaddr == jbp->dskaddr); jnl_fsync(reg, jbp->dskaddr); assert(jbp->fsync_dskaddr == jbp->dskaddr); } else { send_msg(VARLSTCNT(9) ERR_JNLFLUSH, 2, JNL_LEN_STR(csd), ERR_TEXT, 2, RTS_ERROR_TEXT("Error with journal flush in gds_rundown"), jnl_status); assert(NOJNL == jpc->channel);/* jnl file lost has been triggered */ /* In this routine, all code that follows from here on does not * assume anything about the journaling characteristics of this * database so it is safe to continue execution even though * journaling got closed in the middle. */ } } jnl_file_close(reg, we_are_last_writer, FALSE); } else send_msg(VARLSTCNT(6) jnl_status, 4, JNL_LEN_STR(csd), DB_LEN_STR(reg)); } rel_crit(reg); } } if (we_are_last_writer) /* Flush the fileheader last and harden the file to disk */ { grab_crit(reg); /* To satisfy crit requirement in fileheader_sync() */ memset(csd->machine_name, 0, MAX_MCNAMELEN); /* clear the machine_name field */ if (!mupip_jnl_recover && we_are_last_user) { /* mupip_jnl_recover will do this after mur_close_file */ csd->semid = INVALID_SEMID; csd->shmid = INVALID_SHMID; csd->gt_sem_ctime.ctime = 0; csd->gt_shm_ctime.ctime = 0; } fileheader_sync(reg); rel_crit(reg); if (FALSE == is_mm) { if (-1 == fsync(udi->fd)) /* Sync it all */ { rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, RTS_ERROR_TEXT("Error during file sync at close"), errno); } } else { /* Now do final MM file sync before exit */ #if !defined(TARGETED_MSYNC) && !defined(NO_MSYNC) if (-1 == fsync(udi->fd)) /* Sync it all */ { rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, RTS_ERROR_TEXT("Error during file sync at close"), errno); } #else if (-1 == msync((caddr_t)csa->db_addrs[0], (size_t)(csa->db_addrs[1] - csa->db_addrs[0]), MS_SYNC)) { rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, RTS_ERROR_TEXT("Error during file msync at close"), errno); } #endif } } } /* end if (!reg->read_only && !csa->nl->donotflush_dbjnl) */ if (reg->read_only && we_are_last_user && !mupip_jnl_recover) { /* mupip_jnl_recover will do this after mur_close_file */ db_ipcs.semid = INVALID_SEMID; db_ipcs.shmid = INVALID_SHMID; db_ipcs.gt_sem_ctime = 0; db_ipcs.gt_shm_ctime = 0; db_ipcs.fn_len = reg->dyn.addr->fname_len; memcpy(db_ipcs.fn, reg->dyn.addr->fname, reg->dyn.addr->fname_len); db_ipcs.fn[reg->dyn.addr->fname_len] = 0; /* request gtmsecshr to flush. read_only cannot flush itself */ if (0 != send_mesg2gtmsecshr(FLUSH_DB_IPCS_INFO, 0, (char *)NULL, 0)) rts_error(VARLSTCNT(8) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, RTS_ERROR_TEXT("gtmsecshr failed to update database file header")); } /* Done with file now, close it */ if (-1 == close(udi->fd)) { rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, LEN_AND_LIT("Error during file close"), errno); } /* Unmap storage if mm mode but only the part that is not the fileheader (so shows up in dumps) */ if (is_mm) { munmap_len = (sm_long_t)((csa->db_addrs[1] - csa->db_addrs[0]) - ROUND_UP(SIZEOF_FILE_HDR(csa->hdr), MSYNC_ADDR_INCS)); if (munmap_len > 0) { munmap((caddr_t)(csa->db_addrs[0] + ROUND_UP(SIZEOF_FILE_HDR(csa->hdr), MSYNC_ADDR_INCS)), (size_t)(munmap_len)); #ifdef DEBUG_DB64 rel_mmseg((caddr_t)csa->db_addrs[0]); #endif } } /* Detach our shared memory while still under lock so reference counts will be * correct for the next process to run down this region. * In the process also get the remove_shm status from node_local before detaching. * If csa->nl->donotflush_dbjnl is TRUE, it means we can safely remove shared memory without compromising data * integrity as a reissue of recover will restore the database to a consistent state. */ remove_shm = !vermismatch && (csa->nl->remove_shm || csa->nl->donotflush_dbjnl); status = shmdt((caddr_t)csa->nl); csa->nl = NULL; /* dereferencing nl after detach is not right, so we set it to NULL so that we can test before dereference*/ if (-1 == status) send_msg(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, LEN_AND_LIT("Error during shmdt"), errno); reg->open = FALSE; /* If file is still not in good shape, die here and now before we get rid of our storage */ if (csa->wbuf_dqd) GTMASSERT; ipc_deleted = FALSE; /* If we are the very last user, remove shared storage id and the semaphores */ if (we_are_last_user) { /* remove shared storage, only if last writer to rundown did a successful wcs_flu() */ assert(!vermismatch); if (remove_shm) { ipc_deleted = TRUE; if (0 != shm_rmid(udi->shmid)) rts_error(VARLSTCNT(8) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, RTS_ERROR_TEXT("Unable to remove shared memory")); } else if (is_src_server || is_updproc) { gtm_putmsg(VARLSTCNT(6) ERR_DBRNDWNWRN, 4, DB_LEN_STR(reg), process_id, process_id); send_msg(VARLSTCNT(6) ERR_DBRNDWNWRN, 4, DB_LEN_STR(reg), process_id, process_id); } else send_msg(VARLSTCNT(6) ERR_DBRNDWNWRN, 4, DB_LEN_STR(reg), process_id, process_id); /* * Don't release semaphore in case of mupip recover/rollback; since it has standalone access. * It will release the semaphore in mur_close_files. */ if (!mupip_jnl_recover) { if (0 != sem_rmid(udi->semid)) rts_error(VARLSTCNT(8) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, RTS_ERROR_TEXT("Unable to remove semaphore")); grabbed_access_sem = FALSE; } } else { assert(!mupip_jnl_recover); /* If we were writing, get rid of our writer access count semaphore */ if (!reg->read_only) if (0 != (save_errno = do_semop(udi->semid, 1, -1, SEM_UNDO))) rts_error(VARLSTCNT(9) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_TEXT, 2, RTS_ERROR_TEXT("gds_rundown write semaphore release"), save_errno); /* Now remove the rundown lock */ if (0 != (save_errno = do_semop(udi->semid, 0, -1, SEM_UNDO))) rts_error(VARLSTCNT(9) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_TEXT, 2, RTS_ERROR_TEXT("gds_rundown rundown semaphore release"), save_errno); grabbed_access_sem = FALSE; } if (!ftok_sem_release(reg, !mupip_jnl_recover, FALSE)) rts_error(VARLSTCNT(4) ERR_DBFILERR, 2, DB_LEN_STR(reg)); if (!ipc_deleted) { GET_CUR_TIME; if (is_src_server) gtm_putmsg(VARLSTCNT(8) ERR_IPCNOTDEL, 6, CTIME_BEFORE_NL, time_ptr, LEN_AND_LIT("Source server"), REG_LEN_STR(reg)); if (is_updproc) gtm_putmsg(VARLSTCNT(8) ERR_IPCNOTDEL, 6, CTIME_BEFORE_NL, time_ptr, LEN_AND_LIT("Update process"), REG_LEN_STR(reg)); if (mupip_jnl_recover) { gtm_putmsg(VARLSTCNT(8) ERR_IPCNOTDEL, 6, CTIME_BEFORE_NL, time_ptr, LEN_AND_LIT("Mupip journal process"), REG_LEN_STR(reg)); send_msg(VARLSTCNT(8) ERR_IPCNOTDEL, 6, CTIME_BEFORE_NL, time_ptr, LEN_AND_LIT("Mupip journal process"), REG_LEN_STR(reg)); } } REVERT; }
void op_gvorder (mval *v) { int4 n; gd_binding *map; mstr name; enum db_acc_method acc_meth; boolean_t found, ok_to_change_currkey; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; acc_meth = gv_cur_region->dyn.addr->acc_meth; /* Modify gv_currkey such that a gvcst_search of the resulting gv_currkey will find the next available subscript. * But in case of dba_usr (the custom implementation of $ORDER which is overloaded for DDP but could be more in the * future) it is better to hand over gv_currkey as it is so the custom implementation can decide what to do with it. */ ok_to_change_currkey = (dba_usr != acc_meth); if (ok_to_change_currkey) { /* Modify gv_currkey to reflect the next possible key value in collating order */ if (!TREF(gv_last_subsc_null) || gv_cur_region->std_null_coll) { *(&gv_currkey->base[0] + gv_currkey->end - 1) = 1; *(&gv_currkey->base[0] + gv_currkey->end + 1) = 0; gv_currkey->end += 1; } else { assert(STR_SUB_PREFIX == gv_currkey->base[gv_currkey->prev]); assert(KEY_DELIMITER == gv_currkey->base[gv_currkey->end]); assert(KEY_DELIMITER == gv_currkey->base[gv_currkey->end - 1]); assert(2 == (gv_currkey->end - gv_currkey->prev)); *(&gv_currkey->base[0] + gv_currkey->prev) = 01; } } if (gv_currkey->prev) { if (acc_meth == dba_bg || acc_meth == dba_mm) { if (gv_target->root == 0) /* global does not exist */ found = FALSE; else found = gvcst_order(); } else if (acc_meth == dba_cm) found = gvcmx_order(); else found = gvusr_order(); v->mvtype = 0; /* so stp_gcol (if invoked below) can free up space currently occupied by * this to-be-overwritten mval */ if (found) { gv_altkey->prev = gv_currkey->prev; if (!(IS_STP_SPACE_AVAILABLE(MAX_KEY_SZ))) { if (*(&gv_altkey->base[0] + gv_altkey->prev) != 0xFF) n = MAX_FORM_NUM_SUBLEN; else { n = gv_altkey->end - gv_altkey->prev; assert (n > 0); } ENSURE_STP_FREE_SPACE(n); } v->str.addr = (char *)stringpool.free; stringpool.free = gvsub2str (&gv_altkey->base[0] + gv_altkey->prev, stringpool.free, FALSE); v->str.len = INTCAST((char *)stringpool.free - v->str.addr); assert (v->str.addr < (char *)stringpool.top && v->str.addr >= (char *)stringpool.base); assert (v->str.addr + v->str.len <= (char *)stringpool.top && v->str.addr + v->str.len >= (char *)stringpool.base); } else v->str.len = 0; v->mvtype = MV_STR; /* initialize mvtype now that mval has been otherwise completely set up */ if (ok_to_change_currkey) { /* Restore gv_currkey to what it was at function entry time */ if (!TREF(gv_last_subsc_null) || gv_cur_region->std_null_coll) { assert(1 == gv_currkey->base[gv_currkey->end - 2]); assert(KEY_DELIMITER == gv_currkey->base[gv_currkey->end-1]); assert(KEY_DELIMITER == gv_currkey->base[gv_currkey->end]); gv_currkey->base[gv_currkey->end - 2] = KEY_DELIMITER; gv_currkey->end--; } else { assert(01 == gv_currkey->base[gv_currkey->prev]); gv_currkey->base[gv_currkey->prev] = STR_SUB_PREFIX; } } } else /* the following section is for $O(^gname) */ { assert (2 < gv_currkey->end); assert (gv_currkey->end < (MAX_MIDENT_LEN + 3)); /* until names are not in midents */ map = gd_map + 1; while (map < gd_map_top && (memcmp(gv_currkey->base, map->name, gv_currkey->end == (MAX_MIDENT_LEN + 2) ? MAX_MIDENT_LEN : gv_currkey->end - 1) >= 0)) { map++; } for (; map < gd_map_top; ++map) { gv_cur_region = map->reg.addr; if (!gv_cur_region->open) gv_init_reg(gv_cur_region); change_reg(); acc_meth = gv_cur_region->dyn.addr->acc_meth; for (; ;) /* search region, entries in directory tree could be empty */ { if (acc_meth == dba_bg || acc_meth == dba_mm) { gv_target = cs_addrs->dir_tree; found = gvcst_order (); } else if (acc_meth == dba_cm) found = gvcmx_order (); else found = gvusr_order(); if (!found) break; assert (1 < gv_altkey->end); assert (gv_altkey->end < (MAX_MIDENT_LEN + 2)); /* until names are not in midents */ if (memcmp(gv_altkey->base, map->name, gv_altkey->end - 1) > 0) { found = FALSE; break; } name.addr = (char *)&gv_altkey->base[0]; name.len = gv_altkey->end - 1; if (acc_meth == dba_cm) break; GV_BIND_NAME_AND_ROOT_SEARCH(gd_header, &name); if (gv_cur_region != map->reg.addr) { found = FALSE; break; } if ((gv_target->root != 0) && (gvcst_data() != 0)) break; *(&gv_currkey->base[0] + gv_currkey->end - 1) = 1; *(&gv_currkey->base[0] + gv_currkey->end + 1) = 0; gv_currkey->end += 1; } if (found) break; else { assert(SIZEOF(map->name) == SIZEOF(mident_fixed)); gv_currkey->end = mid_len((mident_fixed *)map->name); memcpy(&gv_currkey->base[0], map->name, gv_currkey->end); gv_currkey->base[ gv_currkey->end - 1 ] -= 1; gv_currkey->base[ gv_currkey->end ] = 0xFF; /* back off 1 spot from map */ gv_currkey->base[ gv_currkey->end + 1] = 0; gv_currkey->base[ gv_currkey->end + 2] = 0; gv_currkey->end += 2; } } /* Reset gv_currkey as we have potentially skipped one or more regions so we no * longer can expect gv_currkey/gv_cur_region/gv_target to match each other. */ gv_currkey->end = 0; gv_currkey->base[0] = 0; v->mvtype = 0; /* so stp_gcol (if invoked below) can free up space currently occupied by * this to-be-overwritten mval */ if (found) { if (!IS_STP_SPACE_AVAILABLE(name.len + 1)) { v->str.len = 0; /* so stp_gcol ignores otherwise incompletely setup mval */ INVOKE_STP_GCOL(name.len + 1); } #ifdef mips /* the following line works around a tandem compiler bug. */ v->str.addr = (char *)0; #endif v->str.addr = (char *)stringpool.free; *stringpool.free++ = '^'; memcpy (stringpool.free, name.addr, name.len); stringpool.free += name.len; v->str.len = name.len + 1; assert (v->str.addr < (char *)stringpool.top && v->str.addr >= (char *)stringpool.base); assert (v->str.addr + v->str.len <= (char *)stringpool.top && v->str.addr + v->str.len >= (char *)stringpool.base); } else v->str.len = 0; v->mvtype = MV_STR; /* initialize mvtype now that mval has been otherwise completely set up */ } return; }
short rc_fnd_file(rc_xdsid *xdsid) { gv_namehead *g; short dsid, node; gd_binding *map; char buff[1024], *cp, *cp1; mstr fpath1, fpath2; mval v; int i, keysize; int len, node2; GET_SHORT(dsid, &xdsid->dsid.value); GET_SHORT(node, &xdsid->node.value); if (!dsid_list) { /* open special database, set up entry */ dsid_list = (rc_dsid_list *)malloc(SIZEOF(rc_dsid_list)); dsid_list->dsid = RC_NSPACE_DSID; dsid_list->next = NULL; fpath1.addr = RC_NSPACE_PATH; fpath1.len = SIZEOF(RC_NSPACE_PATH); if (SS_NORMAL != TRANS_LOG_NAME(&fpath1, &fpath2, buff, SIZEOF(buff), do_sendmsg_on_log2long)) { char msg[256]; SPRINTF(msg, "Invalid DB filename, \"%s\"", fpath1.addr); gtcm_rep_err(msg, errno); return RC_BADFILESPEC; } if (fpath2.len > MAX_FN_LEN) return RC_BADFILESPEC; dsid_list->fname = (char *)malloc(fpath2.len + 1); memcpy(dsid_list->fname, fpath2.addr, fpath2.len); *((char*)(dsid_list->fname + fpath2.len)) = 0; gv_cur_region = (gd_region *)malloc(SIZEOF(gd_region)); memset(gv_cur_region, 0, SIZEOF(gd_region)); gv_cur_region->dyn.addr = (gd_segment *)malloc(SIZEOF(gd_segment)); memset(gv_cur_region->dyn.addr, 0, SIZEOF(gd_segment)); memcpy(gv_cur_region->dyn.addr->fname, fpath2.addr, fpath2.len); gv_cur_region->dyn.addr->fname_len = fpath2.len; gv_cur_region->dyn.addr->acc_meth = dba_bg; ESTABLISH_RET(rc_fnd_file_ch1, RC_SUCCESS); gvcst_init(gv_cur_region); REVERT; change_reg(); /* check to see if this DB has the reserved bytes field set * correctly. Global pages must always have some extra unused * space left in them (RC_RESERVED bytes) so that the page * will fit into the client buffer when unpacked by the * client. */ if (cs_data->reserved_bytes < RC_RESERVED) { OMI_DBG((omi_debug, "Unable to access database file: \"%s\"\nReserved_bytes field in the file header is too small for GT.CM\n", fpath2.addr)); free(dsid_list->fname); dsid_list->fname = NULL; free(dsid_list); dsid_list = NULL; free(gv_cur_region->dyn.addr); gv_cur_region->dyn.addr = NULL; free(gv_cur_region); gv_cur_region = NULL; return RC_FILEACCESS; } gv_keysize = DBKEYSIZE(gv_cur_region->max_key_size); GVKEY_INIT(gv_currkey, gv_keysize); GVKEY_INIT(gv_altkey, gv_keysize); cs_addrs->dir_tree = (gv_namehead *)malloc(SIZEOF(gv_namehead) + 2 * SIZEOF(gv_key) + 3 * (gv_keysize - 1)); g = cs_addrs->dir_tree; g->first_rec = (gv_key*)(g->clue.base + gv_keysize); g->last_rec = (gv_key*)(g->first_rec->base + gv_keysize); g->clue.top = g->last_rec->top = g->first_rec->top = gv_keysize; g->clue.prev = g->clue.end = 0; g->root = DIR_ROOT; dsid_list->gda = (gd_addr*)malloc(SIZEOF(gd_addr) + 3 * SIZEOF(gd_binding)); dsid_list->gda->n_maps = 3; dsid_list->gda->n_regions = 1; dsid_list->gda->n_segments = 1; dsid_list->gda->maps = (gd_binding*)((char*)dsid_list->gda + SIZEOF(gd_addr)); dsid_list->gda->max_rec_size = gv_cur_region->max_rec_size; map = dsid_list->gda->maps; map ++; memset(map->name, 0, SIZEOF(map->name)); map->name[0] = '%'; map->reg.addr = gv_cur_region; map++; map->reg.addr = gv_cur_region; memset(map->name, -1, SIZEOF(map->name)); dsid_list->gda->tab_ptr = (hash_table_mname *)malloc(SIZEOF(hash_table_mname)); init_hashtab_mname(dsid_list->gda->tab_ptr, 0, HASHTAB_NO_COMPACT, HASHTAB_NO_SPARE_TABLE); change_reg(); if (rc_overflow->top < cs_addrs->hdr->blk_size) { if (rc_overflow->buff) free(rc_overflow->buff); rc_overflow->top = cs_addrs->hdr->blk_size; rc_overflow->buff = (char*)malloc(rc_overflow->top); if (rc_overflow_size < rc_overflow->top) rc_overflow_size = rc_overflow->top; } } for (fdi_ptr = dsid_list; fdi_ptr && (fdi_ptr->dsid != dsid); fdi_ptr = fdi_ptr->next) ; if (!fdi_ptr) { /* need to open new database, add to list, set fdi_ptr */ gd_header = dsid_list->gda; gv_currkey->end = 0; v.mvtype = MV_STR; v.str.len = RC_NSPACE_GLOB_LEN-1; v.str.addr = RC_NSPACE_GLOB; GV_BIND_NAME_AND_ROOT_SEARCH(gd_header, &v.str); if (!gv_target->root) /* No namespace global */ return RC_UNDEFNAMSPC; v.mvtype = MV_STR; v.str.len = SIZEOF(RC_NSPACE_DSI_SUB)-1; v.str.addr = RC_NSPACE_DSI_SUB; mval2subsc(&v,gv_currkey); node2 = node; MV_FORCE_MVAL(&v,node2); mval2subsc(&v,gv_currkey); i = dsid / 256; MV_FORCE_MVAL(&v,i); mval2subsc(&v,gv_currkey); if (gvcst_get(&v)) return RC_UNDEFNAMSPC; for (cp = v.str.addr, i = 1; i < RC_FILESPEC_PIECE; i++) for (; *cp++ != RC_FILESPEC_DELIM; ) ; for (cp1 = cp; *cp1++ != RC_FILESPEC_DELIM; ) ; cp1--; len = (int)(cp1 - cp); if (len > MAX_FN_LEN) return RC_BADFILESPEC; fdi_ptr = (rc_dsid_list *)malloc(SIZEOF(rc_dsid_list)); fdi_ptr->fname = (char *)malloc(len+1); fdi_ptr->dsid = dsid; memcpy(fdi_ptr->fname, cp, len); *(fdi_ptr->fname + (len)) = 0; gv_cur_region = (gd_region *)malloc(SIZEOF(gd_region)); memset(gv_cur_region, 0, SIZEOF(gd_region)); gv_cur_region->dyn.addr = (gd_segment *)malloc(SIZEOF(gd_segment)); memset(gv_cur_region->dyn.addr, 0, SIZEOF(gd_segment)); memcpy(gv_cur_region->dyn.addr->fname, cp, len); gv_cur_region->dyn.addr->fname_len = len; gv_cur_region->dyn.addr->acc_meth = dba_bg; ESTABLISH_RET(rc_fnd_file_ch2, RC_SUCCESS); gvcst_init(gv_cur_region); REVERT; change_reg(); /* check to see if this DB has the reserved bytes field set * correctly. Global pages must always have some extra unused * space left in them (RC_RESERVED bytes) so that the page * will fit into the client buffer when unpacked by the * client. */ if (cs_data->reserved_bytes < RC_RESERVED) { OMI_DBG((omi_debug, "Unable to access database file: \"%s\"\nReserved_bytes field in the file header is too small for GT.CM\n", fdi_ptr->fname)); free(dsid_list->fname); dsid_list->fname = NULL; free(dsid_list); dsid_list = NULL; free(gv_cur_region->dyn.addr); gv_cur_region->dyn.addr = NULL; free(gv_cur_region); gv_cur_region = NULL; return RC_FILEACCESS; } assert(!cs_addrs->hold_onto_crit); /* this ensures we can safely do unconditional grab_crit and rel_crit */ grab_crit(gv_cur_region); cs_data->rc_srv_cnt++; if (!cs_data->dsid) { cs_data->dsid = dsid; cs_data->rc_node = node; } else if (cs_data->dsid != dsid || cs_data->rc_node != node) { cs_data->rc_srv_cnt--; rel_crit(gv_cur_region); OMI_DBG((omi_debug, "Dataset ID/RC node mismatch")); OMI_DBG((omi_debug, "DB file: \"%s\"\n", dsid_list->fname)); OMI_DBG((omi_debug, "Stored DSID: %d\tRC Node: %d\n", cs_data->dsid, cs_data->rc_node)); OMI_DBG((omi_debug, "RC Rq DSID: %d\tRC Node: %d\n", dsid,node)); free(fdi_ptr->fname); fdi_ptr->fname = NULL; free(fdi_ptr); fdi_ptr = NULL; free(gv_cur_region->dyn.addr); gv_cur_region->dyn.addr = NULL; free(gv_cur_region); gv_cur_region = NULL; return RC_FILEACCESS; } rel_crit(gv_cur_region); keysize = DBKEYSIZE(gv_cur_region->max_key_size); GVKEYSIZE_INCREASE_IF_NEEDED(keysize); cs_addrs->dir_tree = (gv_namehead *)malloc(SIZEOF(gv_namehead) + 2 * SIZEOF(gv_key) + 3 * (keysize - 1)); g = cs_addrs->dir_tree; g->first_rec = (gv_key*)(g->clue.base + keysize); g->last_rec = (gv_key*)(g->first_rec->base + keysize); g->clue.top = g->last_rec->top = g->first_rec->top = keysize; g->clue.prev = g->clue.end = 0; g->root = DIR_ROOT; fdi_ptr->gda = (gd_addr*)malloc(SIZEOF(gd_addr) + 3 * SIZEOF(gd_binding)); fdi_ptr->gda->n_maps = 3; fdi_ptr->gda->n_regions = 1; fdi_ptr->gda->n_segments = 1; fdi_ptr->gda->maps = (gd_binding*)((char*)fdi_ptr->gda + SIZEOF(gd_addr)); fdi_ptr->gda->max_rec_size = gv_cur_region->max_rec_size; map = fdi_ptr->gda->maps; map ++; memset(map->name, 0, SIZEOF(map->name)); map->name[0] = '%'; map->reg.addr = gv_cur_region; map++; map->reg.addr = gv_cur_region; memset(map->name, -1, SIZEOF(map->name)); fdi_ptr->gda->tab_ptr = (hash_table_mname *)malloc(SIZEOF(hash_table_mname)); init_hashtab_mname(fdi_ptr->gda->tab_ptr, 0, HASHTAB_NO_COMPACT, HASHTAB_NO_SPARE_TABLE); fdi_ptr->next = dsid_list->next; dsid_list->next = fdi_ptr; } gv_cur_region = fdi_ptr->gda->maps[1].reg.addr; change_reg(); if (rc_overflow->top < cs_addrs->hdr->blk_size) { if (rc_overflow->buff) free(rc_overflow->buff); rc_overflow->top = cs_addrs->hdr->blk_size; rc_overflow->buff = (char*)malloc(rc_overflow->top); if (rc_overflow_size < rc_overflow->top) rc_overflow_size = rc_overflow->top; } if (!rc_overflow -> top) { rc_overflow -> top = rc_overflow_size; rc_overflow->buff = (char *)malloc(rc_overflow->top); } gd_header = fdi_ptr->gda; return RC_SUCCESS; }
void gv_bind_name(gd_addr *addr, mstr *targ) { gd_binding *map; ht_ent_mname *tabent; mname_entry gvent; int res; boolean_t added; enum db_acc_method acc_meth; gd_region *reg; gvnh_reg_t *gvnh_reg; int keylen; char format_key[MAX_MIDENT_LEN + 1]; /* max key length + 1 byte for '^' */ gv_namehead *tmp_gvt; sgmnt_addrs *csa; gd_map = addr->maps; gd_map_top = gd_map + addr->n_maps; gvent.var_name.addr = targ->addr; gvent.var_name.len = MIN(targ->len, MAX_MIDENT_LEN); COMPUTE_HASH_MNAME(&gvent); if ((NULL != (tabent = lookup_hashtab_mname((hash_table_mname *)addr->tab_ptr, &gvent))) && (NULL != (gvnh_reg = (gvnh_reg_t *)tabent->value))) { reg = gvnh_reg->gd_reg; if (!reg->open) { gv_init_reg(reg); /* could modify gvnh_reg->gvt if multiple regions map to same db file */ assert(0 == gvnh_reg->gvt->clue.end); } gv_target = gvnh_reg->gvt; gv_cur_region = reg; acc_meth = gv_cur_region->dyn.addr->acc_meth; } else { map = gd_map + 1; /* get past local locks */ for (; (res = memcmp(gvent.var_name.addr, &(map->name[0]), gvent.var_name.len)) >= 0; map++) { assert(map < gd_map_top); if (0 == res && 0 != map->name[gvent.var_name.len]) break; } if (!map->reg.addr->open) gv_init_reg(map->reg.addr); gv_cur_region = map->reg.addr; acc_meth = gv_cur_region->dyn.addr->acc_meth; if ((dba_cm == acc_meth) || (dba_usr == acc_meth)) { tmp_gvt = malloc(SIZEOF(gv_namehead) + gvent.var_name.len); memset(tmp_gvt, 0, SIZEOF(gv_namehead) + gvent.var_name.len); tmp_gvt->gvname.var_name.addr = (char *)tmp_gvt + SIZEOF(gv_namehead); tmp_gvt->nct = 0; tmp_gvt->collseq = NULL; tmp_gvt->regcnt = 1; memcpy(tmp_gvt->gvname.var_name.addr, gvent.var_name.addr, gvent.var_name.len); tmp_gvt->gvname.var_name.len = gvent.var_name.len; tmp_gvt->gvname.hash_code = gvent.hash_code; } else { assert(gv_cur_region->max_key_size <= MAX_KEY_SZ); tmp_gvt = (gv_namehead *)targ_alloc(gv_cur_region->max_key_size, &gvent, gv_cur_region); } gvnh_reg = (gvnh_reg_t *)malloc(SIZEOF(gvnh_reg_t)); gvnh_reg->gvt = tmp_gvt; gvnh_reg->gd_reg = gv_cur_region; if (NULL != tabent) { /* Since the global name was found but gv_target was null and now we created a new gv_target, * the hash table key must point to the newly created gv_target->gvname. */ tabent->key = tmp_gvt->gvname; tabent->value = (char *)gvnh_reg; } else { added = add_hashtab_mname((hash_table_mname *)addr->tab_ptr, &tmp_gvt->gvname, gvnh_reg, &tabent); assert(added); } gv_target = tmp_gvt; /* now that any error possibilities (out-of-memory issues in malloc/add_hashtab_mname) * are all done, it is safe to set gv_target. Setting it before could casue gv_target * and gv_currkey to get out of sync in case of an error condition. */ } if ((keylen = gvent.var_name.len + 2) > gv_cur_region->max_key_size) /* caution: embedded assignment of "keylen" */ { assert(ARRAYSIZE(format_key) >= (1 + gvent.var_name.len)); format_key[0] = '^'; memcpy(&format_key[1], gvent.var_name.addr, gvent.var_name.len); csa = &FILE_INFO(gv_cur_region)->s_addrs; rts_error_csa(CSA_ARG(csa) VARLSTCNT(10) ERR_KEY2BIG, 4, keylen, (int4)gv_cur_region->max_key_size, REG_LEN_STR(gv_cur_region), ERR_GVIS, 2, 1 + gvent.var_name.len, format_key); } memcpy(gv_currkey->base, gvent.var_name.addr, gvent.var_name.len); gv_currkey->base[gvent.var_name.len] = 0; gvent.var_name.len++; gv_currkey->base[gvent.var_name.len] = 0; gv_currkey->end = gvent.var_name.len; gv_currkey->prev = 0; change_reg(); return; }
static void process_input(void) { char buffer[1024]; char command; char param1[1024]; char param2[1024]; int num; unsigned ucurrent_seg, ucurrent_off; unsigned dcurrent_seg, dcurrent_off; unsigned ecurrent_seg, ecurrent_off; unsigned next_ip; int count; unsigned temp; ucurrent_seg = sregs[CS]; ucurrent_off = ip; ecurrent_seg = dcurrent_seg = sregs[DS]; ecurrent_off = dcurrent_off = 0; print_regs(); next_ip = disassemble(sregs[CS], ip, 1); for(;;) { #ifdef __hpux sigset_t newmask, oldmask; #endif fputc('-', stdout); fflush(stdout); fflush(stdin); #ifdef __hpux sigfillset(&newmask); sigprocmask(SIG_SETMASK, &newmask, &oldmask); #endif if (fgets(buffer, sizeof buffer, stdin) == NULL) exit_emu(); #ifdef __hpux sigprocmask(SIG_SETMASK, &oldmask, NULL); #endif debug_abort = FALSE; strlwr(buffer); num = sscanf(buffer," %c %s %s \n", &command, param1, param2); if (num >= 1) { switch(command) { case 'x': printf("memory = %p\n", memory); printf("c_es = %p / %04X\n", c_es, (c_es-memory) >> 4); printf("c_cs = %p / %04X\n", c_cs, (c_cs-memory) >> 4); printf("c_ds = %p / %04X\n", c_ds, (c_ds-memory) >> 4); printf("c_ss = %p / %04X\n", c_ss, (c_ss-memory) >> 4); printf("c_stack = %p / %04X\n", c_stack, (c_stack-memory) >> 4); break; case 'q': exit_emu(); break; case 'g': if (num == 1) { running = TRUE; return; } else { unsigned seg,off; seg = sregs[CS]; if (get_address(param1,&seg,&off) >= 0) { breakpoint = TRUE; bpoint = &memory[(seg << 4) + off]; return; } } break; case 't': return; case 'r': if (num == 1) { print_regs(); next_ip = disassemble(sregs[CS],ip,1); ucurrent_seg = sregs[CS]; ucurrent_off = ip; } else change_reg(param1); break; case 'p': for (temp = ip;; temp = (WORD)(temp+1)) { num = memory[(sregs[CS] << 4) + temp]; if (num==0x26 || num==0x2e || num==0x36 || num==0x3e) continue; else break; } switch(num) { case 0xff: num = memory[(sregs[CS] << 4) + (WORD)(temp+1)]; switch (num & 0x38) { case 0x10: case 0x18: break; default: return; } /* FALL THROUGH */ case 0x9a: case 0xcc: case 0xcd: case 0xce: case 0xe0: case 0xe1: case 0xe2: case 0xe8: running = FALSE; breakpoint = TRUE; bpoint = &c_cs[next_ip]; break; } return; case 's': pcemu_refresh(); break; case 'u': count = 16; if (num > 1) { ucurrent_seg = sregs[CS]; if (get_address(param1,&ucurrent_seg, &ucurrent_off) < 0) break; if (num > 2) { count = get_number(param2); if (count < 0) break; } } ucurrent_off = disassemble(ucurrent_seg, ucurrent_off, count); break; case 'd': count = ((dcurrent_off + 16*8) & 0xfff0)-dcurrent_off; if (num > 1) { dcurrent_seg = sregs[DS]; if (get_address(param1,&dcurrent_seg, &dcurrent_off) < 0) break; if (num > 2) { count = get_number(param2); if (count < 0) break; } else count = ((dcurrent_off + 16*8) & 0xfff0)-dcurrent_off; } dcurrent_off = hexdump(dcurrent_seg, dcurrent_off, count); break; case 'e': if (num > 1) { ecurrent_seg = sregs[DS]; if (get_address(param1,&ecurrent_seg, &ecurrent_off) < 0) break; enter_bytes(ecurrent_seg, ecurrent_off); } break; case 'b': if (num == 2 && (param1[0] == 'd' || param1[0] == 'h')) numbase = param1[0] == 'd' ? 0 : 16; else printf("Parameter must be either 'd' or 'h'\n"); break; default: printf("Unrecognised command\n"); break; } } } }
void mu_int_reg(gd_region *reg, boolean_t *return_value, boolean_t return_after_open) { boolean_t read_only, was_crit; freeze_status status; node_local_ptr_t cnl; sgmnt_addrs *csa; sgmnt_data_ptr_t csd; sgmnt_data *csd_copy_ptr; gd_segment *seg; int gtmcrypt_errno; # ifdef DEBUG boolean_t need_to_wait = FALSE; int trynum; uint4 curr_wbox_seq_num; # endif *return_value = FALSE; jnlpool_init_needed = TRUE; ESTABLISH(mu_int_reg_ch); if (dba_usr == reg->dyn.addr->acc_meth) { util_out_print("!/Can't integ region !AD; not GDS format", TRUE, REG_LEN_STR(reg)); mu_int_skipreg_cnt++; return; } gv_cur_region = reg; if (reg_cmcheck(reg)) { util_out_print("!/Can't integ region across network", TRUE); mu_int_skipreg_cnt++; return; } gvcst_init(gv_cur_region); if (gv_cur_region->was_open) { /* already open under another name */ gv_cur_region->open = FALSE; return; } if (return_after_open) { *return_value = TRUE; return; } change_reg(); csa = &FILE_INFO(gv_cur_region)->s_addrs; cnl = csa->nl; csd = csa->hdr; read_only = gv_cur_region->read_only; assert(NULL != mu_int_master); /* Ensure that we don't see an increase in the file header and master map size compared to it's maximum values */ assert(SGMNT_HDR_LEN >= SIZEOF(sgmnt_data) && (MASTER_MAP_SIZE_MAX >= MASTER_MAP_SIZE(csd))); /* ONLINE INTEG if asked for explicitly by specifying -ONLINE is an error if the db has partial V4 blocks. * However, if -ONLINE is not explicitly specified but rather assumed implicitly (as default for -REG) * then turn off ONLINE INTEG for this region and continue as if -NOONLINE was specified */ if (!csd->fully_upgraded) { ointeg_this_reg = FALSE; /* Turn off ONLINE INTEG for this region */ if (online_specified) { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_SSV4NOALLOW, 2, DB_LEN_STR(gv_cur_region)); util_out_print(NO_ONLINE_ERR_MSG, TRUE); mu_int_skipreg_cnt++; return; } } if (!ointeg_this_reg || read_only) { status = region_freeze(gv_cur_region, TRUE, FALSE, TRUE, FALSE, !read_only); switch (status) { case REG_ALREADY_FROZEN: if (csa->read_only_fs) break; util_out_print("!/Database for region !AD is already frozen, not integing", TRUE, REG_LEN_STR(gv_cur_region)); mu_int_skipreg_cnt++; return; case REG_FLUSH_ERROR: gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(6) ERR_BUFFLUFAILED, 4, LEN_AND_LIT(MUPIP_INTEG), DB_LEN_STR(gv_cur_region)); mu_int_skipreg_cnt++; return; case REG_HAS_KIP: /* We have already waited for KIP to reset. This time do not wait for KIP */ status = region_freeze(gv_cur_region, TRUE, FALSE, FALSE, FALSE, !read_only); if (REG_ALREADY_FROZEN == status) { if (csa->read_only_fs) break; util_out_print("!/Database for region !AD is already frozen, not integing", TRUE, REG_LEN_STR(gv_cur_region)); mu_int_skipreg_cnt++; return; } else if (REG_FLUSH_ERROR == status) { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(6) ERR_BUFFLUFAILED, 4, LEN_AND_LIT(MUPIP_INTEG), DB_LEN_STR(gv_cur_region)); mu_int_skipreg_cnt++; return; } assert(REG_FREEZE_SUCCESS == status); /* no break */ case REG_FREEZE_SUCCESS: break; default: assert(FALSE); /* no break */ } if (read_only && (dba_bg == csa->hdr->acc_meth) && !mu_int_wait_rdonly(csa, MUPIP_INTEG)) { mu_int_skipreg_cnt++; return; } } if (!ointeg_this_reg) { /* Take a copy of the file-header. To ensure it is consistent, do it while holding crit. */ was_crit = csa->now_crit; if (!was_crit) grab_crit(gv_cur_region); memcpy((uchar_ptr_t)&mu_int_data, (uchar_ptr_t)csd, SIZEOF(sgmnt_data)); if (!was_crit) rel_crit(gv_cur_region); memcpy(mu_int_master, MM_ADDR(csd), MASTER_MAP_SIZE(csd)); csd_copy_ptr = &mu_int_data; } else { if (!ss_initiate(gv_cur_region, util_ss_ptr, &csa->ss_ctx, preserve_snapshot, MUPIP_INTEG)) { mu_int_skipreg_cnt++; assert(NULL != csa->ss_ctx); ss_release(&csa->ss_ctx); ointeg_this_reg = FALSE; /* Turn off ONLINE INTEG for this region */ assert(process_id != cnl->in_crit); /* Ensure ss_initiate released the crit before returning */ assert(!FROZEN_HARD(csd)); /* Ensure region is unfrozen before returning from ss_initiate */ assert(INTRPT_IN_SS_INITIATE != intrpt_ok_state); /* Ensure ss_initiate released intrpt_ok_state */ return; } assert(process_id != cnl->in_crit); /* Ensure ss_initiate released the crit before returning */ assert(INTRPT_IN_SS_INITIATE != intrpt_ok_state); /* Ensure ss_initiate released intrpt_ok_state */ csd_copy_ptr = &csa->ss_ctx->ss_shm_ptr->shadow_file_header; # if defined(DEBUG) curr_wbox_seq_num = 1; cnl->wbox_test_seq_num = curr_wbox_seq_num; /* indicate we took the next step */ GTM_WHITE_BOX_TEST(WBTEST_OINTEG_WAIT_ON_START, need_to_wait, TRUE); if (need_to_wait) /* wait for them to take next step */ { trynum = 30; /* given 30 cycles to tell you to go */ while ((curr_wbox_seq_num == cnl->wbox_test_seq_num) && trynum--) LONG_SLEEP(1); cnl->wbox_test_seq_num++; /* let them know we took the next step */ assert(trynum); } # endif } if (USES_ANY_KEY(csd_copy_ptr)) { /* Initialize mu_int_encrypt_key_handle to be used in mu_int_read */ seg = gv_cur_region->dyn.addr; INIT_DB_OR_JNL_ENCRYPTION(&mu_int_encr_handles, csd_copy_ptr, seg->fname_len, (char *)seg->fname, gtmcrypt_errno); if (0 != gtmcrypt_errno) { GTMCRYPT_REPORT_ERROR(gtmcrypt_errno, gtm_putmsg, seg->fname_len, seg->fname); mu_int_skipreg_cnt++; return; } } *return_value = mu_int_fhead(); REVERT; return; }
void op_zprevious(mval *v) { int4 n; int min_reg_index, reg_index, res; mname_entry gvname; mval tmpmval, *datamval; enum db_acc_method acc_meth; boolean_t found, ok_to_change_currkey; gd_binding *gd_map_start, *map, *prev_map; gd_addr *gd_targ; gvnh_reg_t *gvnh_reg; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; assert(gv_currkey->prev || !TREF(gv_last_subsc_null)); if (gv_currkey->prev) { /* If last subscript is a NULL subscript, modify gv_currkey such that a gvcst_search of the resulting gv_currkey * will find the last available subscript. But in case of dba_usr, (the custom implementation of $ZPREVIOUS which * is overloaded for DDP now but could be more in the future) it is better to hand over gv_currkey as it is so * the custom implementation can decide what to do with it. */ acc_meth = REG_ACC_METH(gv_cur_region); ok_to_change_currkey = (dba_usr != acc_meth); if (TREF(gv_last_subsc_null) && ok_to_change_currkey) { /* Replace the last subscript with the highest possible subscript value i.e. the byte sequence * 0xFF (STR_SUB_MAXVAL), 0xFF, 0xFF ... as much as possible i.e. until gv_currkey->top permits. * This subscript is guaranteed to be NOT present in the database since a user who tried to set this * exact subscripted global would have gotten a GVSUBOFLOW error (because GT.M sets aside a few bytes * of padding space). And yet this is guaranteed to collate AFTER any existing subscript. Therefore we * can safely do a gvcst_zprevious on this key to get at the last existing key in the database. * * With standard null collation, the last subscript will be 0x01 * Without standard null collation, the last subscript will be 0xFF * Assert that is indeed the case as this will be used to restore the replaced subscript at the end. */ assert(gv_cur_region->std_null_coll || (STR_SUB_PREFIX == gv_currkey->base[gv_currkey->prev])); assert(!gv_cur_region->std_null_coll || (SUBSCRIPT_STDCOL_NULL == gv_currkey->base[gv_currkey->prev])); assert(KEY_DELIMITER == gv_currkey->base[gv_currkey->prev + 1]); assert(gv_currkey->end == gv_currkey->prev + 2); assert(gv_currkey->end < gv_currkey->top); /* need "<" (not "<=") to account for terminating 0x00 */ GVZPREVIOUS_APPEND_MAX_SUBS_KEY(gv_currkey, gv_target); } if ((dba_bg == acc_meth) || (dba_mm == acc_meth)) { gvnh_reg = TREF(gd_targ_gvnh_reg); if (NULL == gvnh_reg) found = (gv_target->root ? gvcst_zprevious() : FALSE); else INVOKE_GVCST_SPR_XXX(gvnh_reg, found = gvcst_spr_zprevious()); } else if (dba_cm == acc_meth) found = gvcmx_zprevious(); else found = gvusr_zprevious(); v->mvtype = 0; /* so stp_gcol (if invoked below) can free up space currently occupied (BYPASSOK) * by this to-be-overwritten mval */ if (found) { gv_altkey->prev = gv_currkey->prev; if (!IS_STP_SPACE_AVAILABLE(MAX_KEY_SZ)) { if ((0xFF != gv_altkey->base[gv_altkey->prev]) && (SUBSCRIPT_STDCOL_NULL != gv_altkey->base[gv_altkey->prev])) n = MAX_FORM_NUM_SUBLEN; else { n = gv_altkey->end - gv_altkey->prev; assert(n > 0); } v->str.len = 0; /* so stp_gcol (if invoked) can free up space currently occupied by this (BYPASSOK) * to-be-overwritten mval */ ENSURE_STP_FREE_SPACE(n); } v->str.addr = (char *)stringpool.free; v->str.len = MAX_KEY_SZ; stringpool.free = gvsub2str(&gv_altkey->base[gv_altkey->prev], &(v->str), FALSE); v->str.len = INTCAST((char *)stringpool.free - v->str.addr); assert(v->str.addr < (char *)stringpool.top && v->str.addr >= (char *)stringpool.base); assert(v->str.addr + v->str.len <= (char *)stringpool.top && v->str.addr + v->str.len >= (char *)stringpool.base); } else v->str.len = 0; v->mvtype = MV_STR; /* initialize mvtype now that mval has been otherwise completely set up */ if (TREF(gv_last_subsc_null) && ok_to_change_currkey) { /* Restore gv_currkey to what it was at function entry time */ gv_currkey->base[gv_currkey->prev + 1] = KEY_DELIMITER; if (gv_cur_region->std_null_coll) gv_currkey->base[gv_currkey->prev] = SUBSCRIPT_STDCOL_NULL; assert(gv_cur_region->std_null_coll || (STR_SUB_PREFIX == gv_currkey->base[gv_currkey->prev])); gv_currkey->end = gv_currkey->prev + 2; gv_currkey->base[gv_currkey->end] = KEY_DELIMITER; } assert(KEY_DELIMITER == gv_currkey->base[gv_currkey->end]); } else { /* the following section is for $ZPREVIOUS(^gname) */ assert(2 <= gv_currkey->end); assert(gv_currkey->end < (MAX_MIDENT_LEN + 2)); /* until names are not in midents */ assert(KEY_DELIMITER == gv_currkey->base[gv_currkey->end]); assert(KEY_DELIMITER == gv_currkey->base[gv_currkey->end - 1]); gd_targ = TREF(gd_targ_addr); gd_map_start = gd_targ->maps; map = gv_srch_map(gd_targ, (char *)&gv_currkey->base[0], gv_currkey->end - 1); assert(map > (gd_map_start + 1)); /* If ^gname starts at "map" start search from map-1 since $ZPREVIOUS(^gname) is sought */ BACK_OFF_ONE_MAP_ENTRY_IF_EDGECASE(gv_currkey->base, gv_currkey->end - 1, map); found = FALSE; /* The first map entry corresponds to local locks. The second map entry does not contain any globals. * Therefore, any search for globals needs to only look after these maps. Hence the "gd_map_start + 1" below. */ for ( ; map > gd_map_start + 1; map = prev_map) { prev_map = map - 1; gv_cur_region = map->reg.addr; if (!gv_cur_region->open) gv_init_reg(gv_cur_region); change_reg(); acc_meth = REG_ACC_METH(gv_cur_region); /* search region, entries in directory tree could have empty GVT in which case move on to previous entry */ for ( ; ; ) { assert(0 == gv_currkey->prev); /* or else gvcst_zprevious could get confused */ if ((dba_bg == acc_meth) || (dba_mm == acc_meth)) { gv_target = cs_addrs->dir_tree; found = gvcst_zprevious(); } else if (dba_cm == acc_meth) found = gvcmx_zprevious(); else found = gvusr_zprevious(); if ('#' == gv_altkey->base[0]) /* don't want to give any hidden ^#* global, e.g "^#t" */ found = FALSE; if (!found) break; assert(1 < gv_altkey->end); assert(gv_altkey->end < (MAX_MIDENT_LEN + 2)); /* until names are not in midents */ res = memcmp(gv_altkey->base, prev_map->gvkey.addr, gv_altkey->end); assert((0 != res) || (gv_altkey->end <= prev_map->gvkey_len)); if (0 > res) { /* The global name we found is less than the maximum value in the previous map * so this name is not part of the current map for sure. Move on to previous map. */ found = FALSE; break; } gvname.var_name.addr = (char *)gv_altkey->base; gvname.var_name.len = gv_altkey->end - 1; if (dba_cm == acc_meth) break; COMPUTE_HASH_MNAME(&gvname); GV_BIND_NAME_AND_ROOT_SEARCH(gd_targ, &gvname, gvnh_reg); /* updates "gv_currkey" */ assert((NULL != gvnh_reg->gvspan) || (gv_cur_region == map->reg.addr)); if (NULL != gvnh_reg->gvspan) { /* gv_target would NOT have been initialized by GV_BIND_NAME in this case. * So finish that initialization. */ datamval = &tmpmval; /* The below macro finishes the task of GV_BIND_NAME_AND_ROOT_SEARCH * (e.g. setting gv_cur_region for spanning globals) */ GV_BIND_SUBSNAME_IF_GVSPAN(gvnh_reg, gd_targ, gv_currkey, gvnh_reg->gd_reg); op_gvdata(datamval); if (MV_FORCE_INT(datamval)) break; } else { /* else gv_target->root would have been initialized by GV_BIND_NAME_AND_ROOT_SEARCH */ if ((0 != gv_target->root) && (0 != gvcst_data())) break; } } if (found) break; /* If previous map corresponding to a spanning global, then do not update gv_currkey as that would * effectively cause the spanning global to be skipped. If gvkey_len == gvname_len + 1 it is NOT * a spanning global map entry. */ assert(prev_map->gvkey_len >= (prev_map->gvname_len + 1)); if ((prev_map > (gd_map_start + 1)) && (prev_map->gvkey_len == (prev_map->gvname_len + 1))) { assert(strlen(prev_map->gvkey.addr) == prev_map->gvname_len); gv_currkey->end = prev_map->gvname_len + 1; assert(gv_currkey->end <= (MAX_MIDENT_LEN + 1)); memcpy(gv_currkey->base, prev_map->gvkey.addr, gv_currkey->end); assert(KEY_DELIMITER == gv_currkey->base[gv_currkey->end - 1]); gv_currkey->base[gv_currkey->end] = KEY_DELIMITER; assert(gv_currkey->top > gv_currkey->end); /* ensure we are within allocated bounds */ } } /* Reset gv_currkey as we have potentially skipped one or more regions so we no * longer can expect gv_currkey/gv_cur_region/gv_target to match each other. */ gv_currkey->end = 0; gv_currkey->base[0] = KEY_DELIMITER; v->mvtype = 0; /* so stp_gcol (if invoked below) can free up space currently occupied (BYPASSOK) * by this to-be-overwritten mval */ if (found) { if (!IS_STP_SPACE_AVAILABLE(gvname.var_name.len + 1)) { v->str.len = 0; /* so stp_gcol ignores otherwise incompletely setup mval (BYPASSOK) */ INVOKE_STP_GCOL(gvname.var_name.len + 1); } v->str.addr = (char *)stringpool.free; *stringpool.free++ = '^'; memcpy(stringpool.free, gvname.var_name.addr, gvname.var_name.len); stringpool.free += gvname.var_name.len; v->str.len = gvname.var_name.len + 1; assert(v->str.addr < (char *)stringpool.top && v->str.addr >= (char *)stringpool.base); assert(v->str.addr + v->str.len <= (char *)stringpool.top && v->str.addr + v->str.len >= (char *)stringpool.base); } else v->str.len = 0; v->mvtype = MV_STR; /* initialize mvtype now that mval has been otherwise completely set up */ /* No need to restore gv_currkey (to what it was at function entry) as it is already set to NULL */ } return; }
/* This function is called primarily to append a new histinfo record to the replication instance file by one of the following * 1) MUPIP REPLIC -SOURCE -START -ROOTPRIMARY command (after forking the child source server) if it created the journal pool. * 2) MUPIP REPLIC -SOURCE -ACTIVATE -ROOTPRIMARY command if this is a propagating primary to root primary transition. * In addition, this function also initializes the "lms_group_info" field in the instance file (from the "inst_info" field) * if the current value is NULL. */ void gtmsource_rootprimary_init(seq_num start_seqno) { unix_db_info *udi; repl_histinfo histinfo; boolean_t was_crit, switch_jnl; gd_region *reg, *region_top; jnl_private_control *jpc; jnl_buffer_ptr_t jbp; uint4 jnl_status; udi = FILE_INFO(jnlpool.jnlpool_dummy_reg); assert(NULL != jnlpool.repl_inst_filehdr); /* Update journal pool fields to reflect this is a root primary startup and updates are enabled */ assert(!udi->s_addrs.hold_onto_crit || jgbl.onlnrlbk); was_crit = udi->s_addrs.now_crit; if (!was_crit) grab_lock(jnlpool.jnlpool_dummy_reg, TRUE, ASSERT_NO_ONLINE_ROLLBACK); jnlpool.repl_inst_filehdr->root_primary_cycle++; /* If this instance is transitioning from a non-rootprimary to rootprimary, switch journal files. * This helps with maintaining accurate value of csd->zqgblmod_tn when the former primary connects * to the current primary through a fetchresync-rollback or receiver-server-autorollback.. */ switch_jnl = (!jnlpool.repl_inst_filehdr->was_rootprimary && (0 < jnlpool.repl_inst_filehdr->num_histinfo)); jnlpool.repl_inst_filehdr->was_rootprimary = TRUE; assert(start_seqno >= jnlpool.jnlpool_ctl->start_jnl_seqno); assert(start_seqno == jnlpool.jnlpool_ctl->jnl_seqno); jnlpool.repl_inst_filehdr->jnl_seqno = start_seqno; assert(jgbl.onlnrlbk || jnlpool.jnlpool_ctl->upd_disabled); if (!jgbl.onlnrlbk) jnlpool.jnlpool_ctl->upd_disabled = FALSE; if (IS_REPL_INST_UUID_NULL(jnlpool.repl_inst_filehdr->lms_group_info)) { /* This is the first time this instance is being brought up either as a root primary or as a propagating * primary. Initialize the "lms_group_info" fields in the instance file header in journal pool shared memory. * They will be flushed to the instance file as part of the "repl_inst_histinfo_add -> repl_inst_flush_filehdr" * function invocation below. */ assert('\0' == jnlpool.repl_inst_filehdr->lms_group_info.created_nodename[0]); assert('\0' == jnlpool.repl_inst_filehdr->lms_group_info.this_instname[0]); assert(!jnlpool.repl_inst_filehdr->lms_group_info.creator_pid); jnlpool.repl_inst_filehdr->lms_group_info = jnlpool.repl_inst_filehdr->inst_info; assert('\0' != jnlpool.repl_inst_filehdr->lms_group_info.created_nodename[0]); DBG_CHECK_CREATED_NODENAME(jnlpool.repl_inst_filehdr->lms_group_info.created_nodename); assert('\0' != jnlpool.repl_inst_filehdr->lms_group_info.this_instname[0]); assert(jnlpool.repl_inst_filehdr->lms_group_info.created_time); assert(jnlpool.repl_inst_filehdr->lms_group_info.creator_pid); } /* Initialize histinfo fields */ memcpy(histinfo.root_primary_instname, jnlpool.repl_inst_filehdr->inst_info.this_instname, MAX_INSTNAME_LEN - 1); histinfo.root_primary_instname[MAX_INSTNAME_LEN - 1] = '\0'; assert('\0' != histinfo.root_primary_instname[0]); histinfo.start_seqno = start_seqno; assert(jnlpool.jnlpool_ctl->strm_seqno[0] == jnlpool.repl_inst_filehdr->strm_seqno[0]); assert(jnlpool.repl_inst_filehdr->is_supplementary || (0 == jnlpool.jnlpool_ctl->strm_seqno[0])); histinfo.strm_seqno = (!jnlpool.repl_inst_filehdr->is_supplementary) ? 0 : jnlpool.jnlpool_ctl->strm_seqno[0]; histinfo.root_primary_cycle = jnlpool.repl_inst_filehdr->root_primary_cycle; assert(process_id == getpid()); histinfo.creator_pid = process_id; JNL_SHORT_TIME(histinfo.created_time); histinfo.strm_index = 0; histinfo.history_type = HISTINFO_TYPE_NORMAL; NULL_INITIALIZE_REPL_INST_UUID(histinfo.lms_group); /* The following fields will be initialized in the "repl_inst_histinfo_add" function call below. * histinfo.histinfo_num * histinfo.prev_histinfo_num * histinfo.last_histinfo_num[] */ /* Add the histinfo record to the instance file and flush the changes in the journal pool to the file header */ repl_inst_histinfo_add(&histinfo); if (!was_crit) rel_lock(jnlpool.jnlpool_dummy_reg); if (switch_jnl) { SET_GBL_JREC_TIME; /* jnl_ensure_open/jnl_file_extend and its callees assume jgbl.gbl_jrec_time is set */ for (reg = gd_header->regions, region_top = gd_header->regions + gd_header->n_regions; reg < region_top; reg++) { gv_cur_region = reg; change_reg(); /* sets cs_addrs/cs_data (needed by jnl_ensure_open) */ if (!JNL_ENABLED(cs_addrs)) continue; grab_crit(gv_cur_region); jpc = cs_addrs->jnl; /* Before writing to jnlfile, adjust jgbl.gbl_jrec_time if needed to maintain time order of jnl * records. This needs to be done BEFORE the jnl_ensure_open as that could write journal records * (if it decides to switch to a new journal file) */ jbp = jpc->jnl_buff; ADJUST_GBL_JREC_TIME(jgbl, jbp); jnl_status = jnl_ensure_open(); if (0 == jnl_status) { if (EXIT_ERR == SWITCH_JNL_FILE(jpc)) rts_error_csa(CSA_ARG(cs_addrs) VARLSTCNT(4) ERR_JNLEXTEND, 2, JNL_LEN_STR(cs_data)); } else { if (SS_NORMAL != jpc->status) rts_error_csa(CSA_ARG(cs_addrs) VARLSTCNT(7) jnl_status, 4, JNL_LEN_STR(cs_data), DB_LEN_STR(gv_cur_region), jpc->status); else rts_error_csa(CSA_ARG(cs_addrs) VARLSTCNT(6) jnl_status, 4, JNL_LEN_STR(cs_data), DB_LEN_STR(gv_cur_region)); } rel_crit(gv_cur_region); } } }
void gvcst_spr_kill(void) { boolean_t spr_tpwrapped; boolean_t est_first_pass; int reg_index; gd_binding *start_map, *end_map, *map; gd_region *reg, *gd_reg_start; gd_addr *addr; gv_namehead *start_map_gvt; gvnh_reg_t *gvnh_reg; trans_num gd_targ_tn, *tn_array; # ifdef DEBUG int save_dollar_tlevel; # endif DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; start_map = TREF(gd_targ_map); /* set up by op_gvname/op_gvnaked/op_gvextnam done just before invoking op_gvkill */ start_map_gvt = gv_target; /* save gv_target corresponding to start_map so we can restore at end */ /* Find out if the next (in terms of $order) key maps to same map as currkey. If so, no spanning activity needed */ GVKEY_INCREMENT_ORDER(gv_currkey); end_map = gv_srch_map_linear(start_map, (char *)&gv_currkey->base[0], gv_currkey->end - 1); BACK_OFF_ONE_MAP_ENTRY_IF_EDGECASE(gv_currkey->base, gv_currkey->end - 1, end_map); GVKEY_UNDO_INCREMENT_ORDER(gv_currkey); if (start_map == end_map) { assert(gv_target == start_map_gvt); if (IS_OK_TO_INVOKE_GVCST_KILL(start_map_gvt)) gvcst_kill(TRUE); return; } /* Do any initialization that is independent of retries BEFORE the op_tstart */ addr = TREF(gd_targ_addr); assert(NULL != addr); gd_reg_start = &addr->regions[0]; tn_array = TREF(gd_targ_reg_array); gvnh_reg = TREF(gd_targ_gvnh_reg); assert(NULL != gvnh_reg); assert(NULL != gvnh_reg->gvspan); /* Now that we know the keyrange maps to more than one region, go through each of them and do the kill. * Since multiple regions are potentially involved, need a TP fence. */ DEBUG_ONLY(save_dollar_tlevel = dollar_tlevel); if (!dollar_tlevel) { spr_tpwrapped = TRUE; op_tstart((IMPLICIT_TSTART), TRUE, &literal_batch, 0); ESTABLISH_NORET(gvcst_spr_kill_ch, est_first_pass); GVCST_ROOT_SEARCH_AND_PREP(est_first_pass); } else spr_tpwrapped = FALSE; assert(gv_cur_region == start_map->reg.addr); DBG_CHECK_GVTARGET_GVCURRKEY_IN_SYNC(CHECK_CSA_TRUE); /* Do any initialization that is dependent on retries AFTER the op_tstart */ map = start_map; INCREMENT_GD_TARG_TN(gd_targ_tn); /* takes a copy of incremented "TREF(gd_targ_tn)" into local variable "gd_targ_tn" */ /* Verify that initializations that happened before op_tstart are still unchanged */ assert(addr == TREF(gd_targ_addr)); assert(tn_array == TREF(gd_targ_reg_array)); assert(gvnh_reg == TREF(gd_targ_gvnh_reg)); for ( ; map <= end_map; map++) { reg = map->reg.addr; GET_REG_INDEX(addr, gd_reg_start, reg, reg_index); /* sets "reg_index" */ assert((map != start_map) || (tn_array[reg_index] != gd_targ_tn)); assert(TREF(gd_targ_reg_array_size) > reg_index); if (tn_array[reg_index] == gd_targ_tn) continue; if (map != start_map) GV_BIND_SUBSREG(addr, reg, gvnh_reg); /* sets gv_target/gv_cur_region/cs_addrs */ assert(reg->open); if (IS_OK_TO_INVOKE_GVCST_KILL(gv_target)) gvcst_kill(TRUE); tn_array[reg_index] = gd_targ_tn; } if (gv_target != start_map_gvt) { /* Restore gv_cur_region/gv_target etc. */ gv_target = start_map_gvt; gv_cur_region = start_map->reg.addr; change_reg(); } DBG_CHECK_GVTARGET_GVCURRKEY_IN_SYNC(CHECK_CSA_TRUE); if (spr_tpwrapped) { op_tcommit(); REVERT; /* remove our condition handler */ } assert(save_dollar_tlevel == dollar_tlevel); return; }