int4 add_inter(int val, sm_int_ptr_t addr, sm_global_latch_ptr_t latch) { int4 cntrval, newcntrval, spins, maxspins, retries; boolean_t cswpsuccess; sm_int_ptr_t volatile cntrval_p; ++fast_lock_count; maxspins = num_additional_processors ? MAX_LOCK_SPINS(LOCK_SPINS, num_additional_processors) : 1; cntrval_p = addr; /* Need volatile context especially on Itanium */ for (retries = LOCK_TRIES - 1; 0 < retries; retries--) /* - 1 so do rel_quant 3 times first */ { /* seems like a legitinate spin which could take advantage of transactional memory */ for (spins = maxspins; 0 < spins; spins--) { cntrval = *cntrval_p; newcntrval = cntrval + val; /* This is (currently as of 08/2007) the only non-locking usage of compswap in GT.M. We are not passing compswap an actual sm_global_latch_ptr_t addr like its function would normally dictate. However, since the address of the field we want to deal with is the first int in the global_latch_t, we just pass our int address properly cast to the type that compswap is expecting. The assert below verifies that this assumption has not changed (SE 08/2007) */ assert(0 == OFFSETOF(global_latch_t, u.parts.latch_pid)); IA64_ONLY(cswpsuccess = compswap_unlock(RECAST(sm_global_latch_ptr_t)cntrval_p, cntrval, newcntrval)); NON_IA64_ONLY(cswpsuccess = compswap((sm_global_latch_ptr_t)cntrval_p, cntrval, newcntrval)); if (cswpsuccess) { --fast_lock_count; assert(0 <= fast_lock_count); return newcntrval; } } if (retries & 0x3) /* On all but every 4th pass, do a simple rel_quant */ rel_quant(); /* Release processor to holder of lock (hopefully) */ else { /* On every 4th pass, we bide for awhile */ wcs_sleep(LOCK_SLEEP); assert(0 == (LOCK_TRIES % 4)); /* assures there are 3 rel_quants prior to first wcs_sleep() */ } } --fast_lock_count; assert(FALSE); rts_error_csa(CSA_ARG(NULL) VARLSTCNT(9) ERR_DBCCERR, 2, LEN_AND_LIT("*unknown*"), ERR_ERRCALL, 3, CALLFROM); return 0; /* To keep the compiler quiet */ }
/* * ----------------------------------------------- * Maintain in parallel with op_zalloc2 * Arguments: * timeout - max. time to wait for locks before giving up * laflag - passed to gvcmx* routines as "laflag" argument; * originally indicated the request was a Lock or * zAllocate request (hence the name "laflag"), but * now capable of holding more values signifying * additional information * * Return: * 1 - if not timeout specified * if timeout specified: * != 0 - all the locks int the list obtained, or * 0 - blocked * The return result is suited to be placed directly into * the $T variable by the caller if timeout is specified. * ----------------------------------------------- */ int op_lock2(int4 timeout, unsigned char laflag) /* timeout is in seconds */ { boolean_t blocked, timer_on; signed char gotit; unsigned short locks_bckout, locks_done; int4 msec_timeout; /* timeout in milliseconds */ mlk_pvtblk *pvt_ptr1, *pvt_ptr2, **prior; unsigned char action; ABS_TIME cur_time, end_time, remain_time; mv_stent *mv_zintcmd; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; gotit = -1; cm_action = laflag; out_of_time = FALSE; if (timeout < 0) timeout = 0; else if (TREF(tpnotacidtime) < timeout) TPNOTACID_CHECK(LOCKTIMESTR); if (!(timer_on = (NO_M_TIMEOUT != timeout))) /* NOTE assignment */ msec_timeout = NO_M_TIMEOUT; else { msec_timeout = timeout2msec(timeout); if (0 == msec_timeout) { out_of_time = TRUE; timer_on = FALSE; } else { mv_zintcmd = find_mvstent_cmd(ZINTCMD_LOCK, restart_pc, restart_ctxt, FALSE); if (mv_zintcmd) { remain_time = mv_zintcmd->mv_st_cont.mvs_zintcmd.end_or_remain; if (0 <= remain_time.at_sec) msec_timeout = (int4)(remain_time.at_sec * 1000 + remain_time.at_usec / 1000); else msec_timeout = 0; TAREF1(zintcmd_active, ZINTCMD_LOCK).restart_pc_last = mv_zintcmd->mv_st_cont.mvs_zintcmd.restart_pc_prior; TAREF1(zintcmd_active, ZINTCMD_LOCK).restart_ctxt_last = mv_zintcmd->mv_st_cont.mvs_zintcmd.restart_ctxt_prior; TAREF1(zintcmd_active, ZINTCMD_LOCK).count--; assert(0 <= TAREF1(zintcmd_active, ZINTCMD_LOCK).count); if (mv_chain == mv_zintcmd) POP_MV_STENT(); /* just pop if top of stack */ else { /* flag as not active */ mv_zintcmd->mv_st_cont.mvs_zintcmd.command = ZINTCMD_NOOP; mv_zintcmd->mv_st_cont.mvs_zintcmd.restart_pc_check = NULL; } } if (0 < msec_timeout) { sys_get_curr_time(&cur_time); add_int_to_abs_time(&cur_time, msec_timeout, &end_time); start_timer((TID)&timer_on, msec_timeout, wake_alarm, 0, NULL); } else { out_of_time = TRUE; timer_on = FALSE; } } } lckclr(); for (blocked = FALSE; !blocked;) { /* if this is a request for a remote node */ if (remlkreq) { if (gotit >= 0) gotit = gvcmx_resremlk(cm_action); else gotit = gvcmx_reqremlk(cm_action, msec_timeout); /* REQIMMED if 2nd arg == 0 */ if (!gotit) { /* only REQIMMED returns false */ blocked = TRUE; break; } } for (pvt_ptr1 = mlk_pvt_root, locks_done = 0; locks_done < lks_this_cmd; pvt_ptr1 = pvt_ptr1->next, locks_done++) { /* Go thru the list of all locks to be obtained attempting to lock * each one. If any lock could not be obtained, break out of the loop */ if (!mlk_lock(pvt_ptr1, 0, TRUE)) { /* If lock is obtained */ pvt_ptr1->granted = TRUE; switch (laflag) { case CM_LOCKS: pvt_ptr1->level = 1; break; case INCREMENTAL: if (pvt_ptr1->level < 511) /* The same lock can not be incremented more than 511 times. */ pvt_ptr1->level += pvt_ptr1->translev; else level_err(pvt_ptr1); break; default: GTMASSERT; break; } } else { blocked = TRUE; break; } } /* If we did not get blocked, we are all done */ if (!blocked) break; /* We got blocked and need to keep retrying after some time interval */ if (remlkreq) gvcmx_susremlk(cm_action); switch (cm_action) { case CM_LOCKS: action = LOCKED; break; case INCREMENTAL: action = INCREMENTAL; break; default: GTMASSERT; break; } for (pvt_ptr2 = mlk_pvt_root, locks_bckout = 0; locks_bckout < locks_done; pvt_ptr2 = pvt_ptr2->next, locks_bckout++) { assert(pvt_ptr2->granted && (pvt_ptr2 != pvt_ptr1)); mlk_bckout(pvt_ptr2, action); } if (dollar_tlevel && (CDB_STAGNATE <= t_tries)) { /* upper TPNOTACID_CHECK conditioned on no short timeout; this one rel_crits to avoid potential deadlock */ assert(TREF(tpnotacidtime) >= timeout); TPNOTACID_CHECK(LOCKTIMESTR); } for (;;) { if (out_of_time || outofband) { /* if time expired || control-c, tptimeout, or jobinterrupt encountered */ if (outofband || !lk_check_own(pvt_ptr1)) { /* If CTL-C, check lock owner */ if (pvt_ptr1->nodptr) /* Get off pending list to be sent a wake */ mlk_unpend(pvt_ptr1); /* Cancel all remote locks obtained so far */ if (remlkreq) { gvcmx_canremlk(); gvcmz_clrlkreq(); remlkreq = FALSE; } if (outofband) { if (timer_on && !out_of_time) { cancel_timer((TID)&timer_on); timer_on = FALSE; } if (!out_of_time && (NO_M_TIMEOUT != timeout)) { /* get remain = end_time - cur_time */ sys_get_curr_time(&cur_time); remain_time = sub_abs_time(&end_time, &cur_time); if (0 <= remain_time.at_sec) msec_timeout = (int4)(remain_time.at_sec * 1000 + remain_time.at_usec / 1000); else msec_timeout = 0; /* treat as out_of_time */ if (0 >= msec_timeout) { out_of_time = TRUE; timer_on = FALSE; /* as if LOCK :0 */ break; } PUSH_MV_STENT(MVST_ZINTCMD); mv_chain->mv_st_cont.mvs_zintcmd.end_or_remain = remain_time; mv_chain->mv_st_cont.mvs_zintcmd.restart_ctxt_check = restart_ctxt; mv_chain->mv_st_cont.mvs_zintcmd.restart_pc_check = restart_pc; /* save current information from zintcmd_active */ mv_chain->mv_st_cont.mvs_zintcmd.restart_ctxt_prior = TAREF1(zintcmd_active, ZINTCMD_LOCK).restart_ctxt_last; mv_chain->mv_st_cont.mvs_zintcmd.restart_pc_prior = TAREF1(zintcmd_active, ZINTCMD_LOCK).restart_pc_last; TAREF1(zintcmd_active, ZINTCMD_LOCK).restart_pc_last = restart_pc; TAREF1(zintcmd_active, ZINTCMD_LOCK).restart_ctxt_last = restart_ctxt; TAREF1(zintcmd_active, ZINTCMD_LOCK).count++; mv_chain->mv_st_cont.mvs_zintcmd.command = ZINTCMD_LOCK; outofband_action(FALSE); /* no return */ } } break; } } if (!mlk_lock(pvt_ptr1, 0, FALSE)) { /* If we got the lock, break out of timer loop */ blocked = FALSE; if (pvt_ptr1 != mlk_pvt_root) { rel_quant(); /* attempt to get a full timeslice for maximum chance to get all */ mlk_unlock(pvt_ptr1); } break; } if (pvt_ptr1->nodptr) lk_check_own(pvt_ptr1); /* clear an abandoned owner */ hiber_start_wait_any(LOCK_SELF_WAKE); } if (blocked && out_of_time) break; } if (remlkreq) { gvcmz_clrlkreq(); remlkreq = FALSE; } if (NO_M_TIMEOUT != timeout) { /* was timed or immediate */ if (timer_on && !out_of_time) cancel_timer((TID)&timer_on); if (blocked) { for (prior = &mlk_pvt_root; *prior;) { if (!(*prior)->granted) { /* if entry was never granted, delete list entry */ mlk_pvtblk_delete(prior); } else prior = &((*prior)->next); } mlk_stats.n_user_locks_fail++; return (FALSE); } } mlk_stats.n_user_locks_success++; return (TRUE); }
int mur_forward_multi_proc(reg_ctl_list *rctl) { boolean_t multi_proc, this_reg_stuck, release_latch, ok_to_play; boolean_t cancelled_dbsync_timer, cancelled_timer; reg_ctl_list *rctl_top, *prev_rctl; jnl_ctl_list *jctl; gd_region *reg; sgmnt_addrs *csa; seq_num rec_token_seq; jnl_tm_t rec_time; enum broken_type recstat; jnl_record *rec; enum jnl_record_type rectype; char errstr[256]; int i, rctl_index, save_errno, num_procs_stuck, num_reg_stuck; uint4 status, regcnt_stuck, num_partners, start_hrtbt_cntr; forw_multi_struct *forw_multi; shm_forw_multi_t *sfm; multi_struct *multi; jnl_tm_t adjusted_resolve_time; shm_reg_ctl_t *shm_rctl_start, *shm_rctl, *first_shm_rctl; size_t shm_size, reccnt, copy_size; int4 *size_ptr; char *shmPtr; /* not using "shm_ptr" since it is already used in an AIX include file */ int shmid; multi_proc_shm_hdr_t *mp_hdr; /* Pointer to "multi_proc_shm_hdr_t" structure in shared memory */ status = 0; /* Although we made sure the # of tasks is the same as the # of processes forked off (in the "gtm_multi_proc" * invocation in "mur_forward"), it is possible one of the forked process finishes one invocation of * "mur_forward_multi_proc" before even another forked process gets assigned one task in "gtm_multi_proc_helper". * In this case, we would be invoked more than once. But the first invocation would have done all the needed stuff * so return for later invocations. */ if (mur_forward_multi_proc_done) return 0; mur_forward_multi_proc_done = TRUE; /* Note: "rctl" is unused. But cannot avoid passing it since "gtm_multi_proc" expects something */ prev_rctl = NULL; rctl_start = NULL; adjusted_resolve_time = murgbl.adjusted_resolve_time; assert(0 == murgbl.regcnt_remaining); multi_proc = multi_proc_in_use; /* cache value in "local" to speed up access inside loops below */ if (multi_proc) { mp_hdr = multi_proc_shm_hdr; shm_rctl_start = mur_shm_hdr->shm_rctl_start; if (jgbl.onlnrlbk) { for (rctl = mur_ctl, rctl_top = mur_ctl + murgbl.reg_total; rctl < rctl_top; rctl++) { assert(rctl->csa->hold_onto_crit); /* would have been set in parent process */ rctl->csa->hold_onto_crit = FALSE; /* reset since we dont own this region */ assert(rctl->csa->now_crit); /* would have been set in parent process */ rctl->csa->now_crit = FALSE; /* reset since we dont own this region */ } } START_HEARTBEAT_IF_NEEDED; /* heartbeat timer needed later (in case not already started by "gtm_multi_proc") */ } first_shm_rctl = NULL; /* Phase1 of forward recovery starts */ for (rctl = mur_ctl, rctl_top = mur_ctl + murgbl.reg_total; rctl < rctl_top; rctl++) { /* Check if "rctl" is available for us or if some other concurrent process has taken it */ if (multi_proc) { rctl_index = rctl - &mur_ctl[0]; shm_rctl = &shm_rctl_start[rctl_index]; if (shm_rctl->owning_pid) { assert(process_id != shm_rctl->owning_pid); continue; } GRAB_MULTI_PROC_LATCH_IF_NEEDED(release_latch); assert(release_latch); for ( ; rctl < rctl_top; rctl++, shm_rctl++) { if (shm_rctl->owning_pid) { assert(process_id != shm_rctl->owning_pid); continue; } shm_rctl->owning_pid = process_id; /* Declare ownership */ rctl->this_pid_is_owner = TRUE; if (jgbl.onlnrlbk) { /* This is an online rollback and crit was grabbed on all regions by the parent rollback * process. But this child process now owns this region and does the actual rollback on * this region so borrow crit for the duration of this child process. */ csa = rctl->csa; csa->hold_onto_crit = TRUE; csa->now_crit = TRUE; assert(csa->nl->in_crit == mp_hdr->parent_pid); csa->nl->in_crit = process_id; assert(csa->nl->onln_rlbk_pid == mp_hdr->parent_pid); csa->nl->onln_rlbk_pid = process_id; } if (NULL == first_shm_rctl) first_shm_rctl = shm_rctl; break; } REL_MULTI_PROC_LATCH_IF_NEEDED(release_latch); if (rctl >= rctl_top) { assert(rctl == rctl_top); break; } /* Set key to print this rctl'ss region-name as prefix in case this forked off process prints any output */ MUR_SET_MULTI_PROC_KEY(rctl, multi_proc_key); # ifdef MUR_DEBUG fprintf(stderr, "pid = %d : Owns region %s\n", process_id, multi_proc_key); # endif } else rctl->this_pid_is_owner = TRUE; if (mur_options.forward) { assert(NULL == rctl->jctl_turn_around); jctl = rctl->jctl = rctl->jctl_head; assert(jctl->reg_ctl == rctl); jctl->rec_offset = JNL_HDR_LEN; jnl_fence_ctl.fence_list = JNL_FENCE_LIST_END; /* initialized to reflect journaling is not enabled */ if (mur_options.rollback) jgbl.mur_jrec_seqno = jctl->jfh->start_seqno; } else { jctl = rctl->jctl = (NULL == rctl->jctl_turn_around) ? rctl->jctl_head : rctl->jctl_turn_around; assert(jctl->reg_ctl == rctl); jctl->rec_offset = jctl->turn_around_offset; jgbl.mur_jrec_seqno = jctl->turn_around_seqno; assert((NULL != rctl->jctl_turn_around) || (0 == jctl->rec_offset)); } if (mur_options.rollback) { if (murgbl.consist_jnl_seqno < jgbl.mur_jrec_seqno) { /* Assert that murgbl.losttn_seqno is never lesser than jgbl.mur_jrec_seqno (the turnaround * point seqno) as this is what murgbl.consist_jnl_seqno is going to be set to and will * eventually be the post-rollback seqno. If this condition is violated, the result of the * recovery is a compromised database (the file header will indicate a Region Seqno which * is not necessarily correct since seqnos prior to it might be absent in the database). * Therefore, this is an out-of-design situation with respect to rollback and so stop it. */ assert(murgbl.losttn_seqno >= jgbl.mur_jrec_seqno); murgbl.consist_jnl_seqno = jgbl.mur_jrec_seqno; } assert(murgbl.consist_jnl_seqno <= murgbl.losttn_seqno); } if (mur_options.update || mur_options.extr[GOOD_TN]) { reg = rctl->gd; gv_cur_region = reg; tp_change_reg(); /* note : sets cs_addrs to non-NULL value even if gv_cur_region->open is FALSE * (cs_data could still be NULL). */ rctl->csa = cs_addrs; cs_addrs->miscptr = (void *)rctl; rctl->csd = cs_data; rctl->sgm_info_ptr = cs_addrs->sgm_info_ptr; assert(!reg->open || (NULL != cs_addrs->dir_tree)); gv_target = cs_addrs->dir_tree; } jctl->after_end_of_data = FALSE; status = mur_next(jctl, jctl->rec_offset); assert(ERR_JNLREADEOF != status); /* cannot get EOF at start of forward processing */ if (SS_NORMAL != status) goto finish; PRINT_VERBOSE_STAT(jctl, "mur_forward:at the start"); rctl->process_losttn = FALSE; /* Any multi-region TP transaction will be processed as multiple single-region TP transactions up * until the tp-resolve-time is reached. From then on, they will be treated as one multi-region TP * transaction. This is needed for proper lost-tn determination (any multi-region transaction that * gets played in a region AFTER it has already encountered a broken tn should treat this as a lost tn). */ do { if (multi_proc && IS_FORCED_MULTI_PROC_EXIT(mp_hdr)) { /* We are at a logical point. So exit if signaled by parent */ status = ERR_FORCEDHALT; goto finish; } assert(jctl == rctl->jctl); rec = rctl->mur_desc->jnlrec; rec_time = rec->prefix.time; if (rec_time > mur_options.before_time) break; /* Records after -BEFORE_TIME do not go to extract or losttrans or brkntrans files */ if (rec_time < mur_options.after_time) { status = mur_next_rec(&jctl); continue; /* Records before -AFTER_TIME do not go to extract or losttrans or brkntrans files */ } if (rec_time >= adjusted_resolve_time) break; /* Records after this adjusted resolve_time will be processed below in phase2 */ /* Note: Since we do hashtable token processing only for records from tp_resolve_time onwards, * it is possible that if we encounter any broken transactions here we wont know they are broken * but will play them as is. That is unavoidable. Specify -SINCE_TIME (for -BACKWARD rollback/recover) * and -VERIFY (for -FORWARD rollback/recover) to control tp_resolve_time (and in turn more * effective broken tn determination). */ status = mur_forward_play_cur_jrec(rctl); if (SS_NORMAL != status) break; status = mur_next_rec(&jctl); } while (SS_NORMAL == status); CHECK_IF_EOF_REACHED(rctl, status); /* sets rctl->forw_eof_seen if needed; resets "status" to SS_NORMAL */ if (SS_NORMAL != status) { /* ERR_FILENOTCREATE is possible from "mur_cre_file_extfmt" OR ERR_FORCEDHALT is possible * from "mur_forward_play_cur_jrec". No other errors are known to occur here. Assert accordingly. */ assert((ERR_FILENOTCREATE == status) || (ERR_FORCEDHALT == status)); goto finish; } if (rctl->forw_eof_seen) { PRINT_VERBOSE_STAT(jctl, "mur_forward:Reached EOF before tp_resolve_time"); continue; /* Reached EOF before even getting to tp_resolve_time. * Do not even consider region for next processing loop */ } rctl->last_tn = 0; murgbl.regcnt_remaining++; /* # of regions participating in recovery at this point */ if (NULL == rctl_start) rctl_start = rctl; if (NULL != prev_rctl) { prev_rctl->next_rctl = rctl; rctl->prev_rctl = prev_rctl; } prev_rctl = rctl; assert(murgbl.ok_to_update_db || !rctl->db_updated); PRINT_VERBOSE_STAT(jctl, "mur_forward:at tp_resolve_time"); } if (multi_proc) multi_proc_key = NULL; /* reset key until it can be set to rctl's region-name again */ /* Note that it is possible for rctl_start to be NULL at this point. That is there is no journal record in any region * AFTER the calculated tp-resolve-time. This is possible if for example -AFTER_TIME was used and has a time later * than any journal record in all journal files. If rctl_start is NULL, prev_rctl should also be NULL and vice versa. */ if (NULL != rctl_start) { assert(NULL != prev_rctl); prev_rctl->next_rctl = rctl_start; rctl_start->prev_rctl = prev_rctl; } rctl = rctl_start; regcnt_stuck = 0; /* # of regions we are stuck in waiting for other regions to resolve a multi-region TP transaction */ assert((NULL == rctl) || (NULL == rctl->forw_multi)); gv_cur_region = NULL; /* clear out any previous value to ensure gv_cur_region/cs_addrs/cs_data * all get set in sync by the MUR_CHANGE_REG macro below. */ /* Phase2 of forward recovery starts */ while (NULL != rctl) { /* while there is at least one region remaining with unprocessed journal records */ assert(NULL != rctl_start); assert(0 < murgbl.regcnt_remaining); if (NULL != rctl->forw_multi) { /* This region's current journal record is part of a TP transaction waiting for other regions */ regcnt_stuck++; assert(regcnt_stuck <= murgbl.regcnt_remaining); if (regcnt_stuck == murgbl.regcnt_remaining) { assertpro(multi_proc_in_use); /* Else : Out-of-design situation. Stuck in ALL regions. */ /* Check one last time if all regions are stuck waiting for another process to resolve the * multi-region TP transaction. If so, wait in a sleep loop. If not, we can proceed. */ rctl = rctl_start; start_hrtbt_cntr = heartbeat_counter; do { if (IS_FORCED_MULTI_PROC_EXIT(mp_hdr)) { /* We are at a logical point. So exit if signaled by parent */ status = ERR_FORCEDHALT; goto finish; } forw_multi = rctl->forw_multi; assert(NULL != forw_multi); sfm = forw_multi->shm_forw_multi; assert(NULL != sfm); assert(sfm->num_reg_seen_forward <= sfm->num_reg_seen_backward); # ifdef MUR_DEBUG fprintf(stderr, "Pid = %d : Line %d : token = %llu : forward = %d : backward = %d\n", process_id, __LINE__, (long long int)sfm->token, sfm->num_reg_seen_forward, sfm->num_reg_seen_backward); # endif if (sfm->num_reg_seen_forward == sfm->num_reg_seen_backward) { /* We are no longer stuck in this region */ assert(!forw_multi->no_longer_stuck); forw_multi->no_longer_stuck = TRUE; break; } rctl = rctl->next_rctl; /* Move on to the next available region */ assert(NULL != rctl); if (rctl == rctl_start) { /* We went through all regions once and are still stuck. * Sleep until at leat TWO heartbeats have elapsed after which check for deadlock. * Do this only in the child process that owns the FIRST region in the region list. * This way we dont have contention for the GRAB_MULTI_PROC_LATCH from * all children at more or less the same time. */ if ((rctl == mur_ctl) && (heartbeat_counter > (start_hrtbt_cntr + 2))) { /* Check if all processes are stuck for a while. If so assertpro */ GRAB_MULTI_PROC_LATCH_IF_NEEDED(release_latch); assert(release_latch); shm_rctl_start = mur_shm_hdr->shm_rctl_start; num_reg_stuck = 0; for (i = 0; i < murgbl.reg_total; i++) { shm_rctl = &shm_rctl_start[i]; sfm = shm_rctl->shm_forw_multi; if (NULL != sfm) { if (sfm->num_reg_seen_forward != sfm->num_reg_seen_backward) num_reg_stuck++; } } REL_MULTI_PROC_LATCH_IF_NEEDED(release_latch); /* If everyone is stuck at this point, it is an out-of-design situation */ assertpro(num_reg_stuck < murgbl.reg_total); start_hrtbt_cntr = heartbeat_counter; } else { /* Sleep and recheck if any region we are stuck in got resolved. * To minimize time spent sleeping, we just yield our timeslice. */ rel_quant(); continue; } } } while (TRUE); } else { rctl = rctl->next_rctl; /* Move on to the next available region */ assert(NULL != rctl); continue; } } regcnt_stuck = 0; /* restart the counter now that we found at least one non-stuck region */ MUR_CHANGE_REG(rctl); jctl = rctl->jctl; this_reg_stuck = FALSE; for ( status = SS_NORMAL; SS_NORMAL == status; ) { if (multi_proc && IS_FORCED_MULTI_PROC_EXIT(mp_hdr)) { /* We are at a logical point. So exit if signaled by parent */ status = ERR_FORCEDHALT; goto finish; } assert(jctl == rctl->jctl); rec = rctl->mur_desc->jnlrec; rec_time = rec->prefix.time; if (rec_time > mur_options.before_time) break; /* Records after -BEFORE_TIME do not go to extract or losttrans or brkntrans files */ assert((rec_time >= adjusted_resolve_time) || (mur_options.notncheck && !mur_options.verify)); assert((0 == mur_options.after_time) || (mur_options.forward && !rctl->db_updated)); if (rec_time < mur_options.after_time) { status = mur_next_rec(&jctl); continue; /* Records before -AFTER_TIME do not go to extract or losttrans or brkntrans files */ } /* Check if current journal record can be played right away or need to wait for corresponding journal * records from other participating TP regions to be reached. A non-TP or ZTP transaction can be played * without issues (i.e. has no dependencies with any other regions). A single-region TP transaction too * falls in the same category. A multi-region TP transaction needs to wait until all participating regions * have played all journal records BEFORE this TP in order to ensure recover plays records in the exact * same order that GT.M performed them in. */ /* If FENCE_NONE is specified, we would not have maintained any multi hashtable in mur_back_process for * broken transaction processing. So we process multi-region TP transactions as multiple single-region * TP transactions in forward phase. */ if (FENCE_NONE != mur_options.fences) { rectype = (enum jnl_record_type)rec->prefix.jrec_type; if (IS_TP(rectype) && IS_TUPD(rectype)) { assert(IS_SET_KILL_ZKILL_ZTWORM_LGTRIG_ZTRIG(rectype)); assert(&rec->jrec_set_kill.num_participants == &rec->jrec_ztworm.num_participants); assert(&rec->jrec_set_kill.num_participants == &rec->jrec_lgtrig.num_participants); num_partners = rec->jrec_set_kill.num_participants; assert(0 < num_partners); if (1 < num_partners) { this_reg_stuck = TRUE; assert(&rec->jrec_set_kill.update_num == &rec->jrec_ztworm.update_num); assert(&rec->jrec_set_kill.update_num == &rec->jrec_lgtrig.update_num); } } } if (this_reg_stuck) { rec_token_seq = GET_JNL_SEQNO(rec); MUR_FORW_TOKEN_LOOKUP(forw_multi, rec_token_seq, rec_time); if (NULL != forw_multi) { /* This token has already been seen in another region in forward processing. * Add current region as well. If all regions have been resolved, then play * the entire transaction maintaining the exact same order of updates within. */ if (!forw_multi->no_longer_stuck) MUR_FORW_TOKEN_ONE_MORE_REG(forw_multi, rctl); } else { /* First time we are seeing this token in forward processing. Check if this * has already been determined to be a broken transaction. */ recstat = GOOD_TN; multi = NULL; if (IS_REC_POSSIBLY_BROKEN(rec_time, rec_token_seq)) { multi = MUR_TOKEN_LOOKUP(rec_token_seq, rec_time, TPFENCE); if ((NULL != multi) && (0 < multi->partner)) recstat = BROKEN_TN; } MUR_FORW_TOKEN_ADD(forw_multi, rec_token_seq, rec_time, rctl, num_partners, recstat, multi); } /* Check that "tabent" field has been initialized above (by either the MUR_FORW_TOKEN_LOOKUP * or MUR_FORW_TOKEN_ADD macros). This is relied upon by "mur_forward_play_multireg_tp" below. */ assert(NULL != forw_multi->u.tabent); assert(forw_multi->num_reg_seen_forward <= forw_multi->num_reg_seen_backward); if (multi_proc) { sfm = forw_multi->shm_forw_multi; ok_to_play = (NULL == sfm) || (sfm->num_reg_seen_forward == sfm->num_reg_seen_backward); } else ok_to_play = (forw_multi->num_reg_seen_forward == forw_multi->num_reg_seen_backward); assert(ok_to_play || !forw_multi->no_longer_stuck); if (ok_to_play ) { /* We have enough information to proceed with playing this multi-region TP in * forward processing (even if we might not have seen all needed regions). Now play it. * Note that the TP could be BROKEN_TN or GOOD_TN. The callee handles it. */ assert(forw_multi == rctl->forw_multi); status = mur_forward_play_multireg_tp(forw_multi, rctl); this_reg_stuck = FALSE; /* Note that as part of playing the TP transaction, we could have reached * the EOF of rctl. In this case, we need to break out of the loop. */ if ((SS_NORMAL != status) || rctl->forw_eof_seen) break; assert(NULL == rctl->forw_multi); assert(!dollar_tlevel); jctl = rctl->jctl; /* In case the first record after the most recently processed * TP transaction is in the next generation journal file */ continue; } break; } else { status = mur_forward_play_cur_jrec(rctl); if (SS_NORMAL != status) break; } assert(!this_reg_stuck); status = mur_next_rec(&jctl); } assert((NULL == rctl->forw_multi) || this_reg_stuck); assert((NULL != rctl->forw_multi) || !this_reg_stuck); if (!this_reg_stuck) { /* We are not stuck in this region (to resolve a multi-region TP). * This means we are done processing all the records of this region. */ assert(NULL == rctl->forw_multi); if (!rctl->forw_eof_seen) { CHECK_IF_EOF_REACHED(rctl, status); /* sets rctl->forw_eof_seen if needed; resets "status" to SS_NORMAL */ if (SS_NORMAL != status) { assert(ERR_FILENOTCREATE == status); goto finish; } assert(!dollar_tlevel); DELETE_RCTL_FROM_UNPROCESSED_LIST(rctl); /* since all of its records should have been processed */ } else { /* EOF was seen in rctl inside "mur_forward_play_multireg_tp" and it was removed * from the unprocessed list of rctls. At the time rctl was removed, its "next_rctl" * field could have been pointing to another <rctl> that has since then also been * removed inside the same function. Therefore the "next_rctl" field is not reliable * in this case but instead we should rely on the global variable "rctl_start" which * points to the list of unprocessed rctls. Set "next_rctl" accordingly. */ rctl->next_rctl = rctl_start; if (ERR_JNLREADEOF == status) status = SS_NORMAL; } assert(rctl->deleted_from_unprocessed_list); } assert(SS_NORMAL == status); assert(!this_reg_stuck || !rctl->forw_eof_seen); assert((NULL == rctl->next_rctl) || (NULL != rctl_start)); assert((NULL == rctl->next_rctl) || (0 < murgbl.regcnt_remaining)); rctl = rctl->next_rctl; /* Note : even though "rctl" could have been deleted from the doubly linked list above, * rctl->next_rctl is not touched so we can still use it to get to the next element. */ } assert(0 == murgbl.regcnt_remaining); jgbl.mur_pini_addr_reset_fnptr = NULL; /* No more simulation of GT.M activity for any region */ prc_vec = murgbl.prc_vec; /* Use process-vector of MUPIP RECOVER (not any simulating GT.M process) now onwards */ assert(0 == dollar_tlevel); for (rctl = mur_ctl, rctl_top = mur_ctl + murgbl.reg_total; rctl < rctl_top; rctl++) { if (!rctl->this_pid_is_owner) { assert(multi_proc_in_use); continue; /* in a parallel processing environment, process only regions we own */ } if (multi_proc) { /* Set key to print this rctl's region-name as prefix in case this forked off process prints any output */ MUR_SET_MULTI_PROC_KEY(rctl, multi_proc_key); } PRINT_VERBOSE_STAT(rctl->jctl, "mur_forward:at the end"); assert(!mur_options.rollback || (0 != murgbl.consist_jnl_seqno)); assert(mur_options.rollback || (0 == murgbl.consist_jnl_seqno)); assert(!dollar_tlevel); /* In case it applied a broken TUPD */ assert(murgbl.ok_to_update_db || !rctl->db_updated); rctl->mur_plst = NULL; /* reset now that simulation of GT.M updates is done */ /* Ensure mur_block_count_correct is called if updates allowed */ if (murgbl.ok_to_update_db && (SS_NORMAL != mur_block_count_correct(rctl))) { gtm_putmsg_csa(CSA_ARG(rctl->csa) VARLSTCNT(4) ERR_BLKCNTEDITFAIL, 2, DB_LEN_STR(rctl->gd)); murgbl.wrn_count++; } } finish: if (multi_proc) multi_proc_key = NULL; /* reset key until it can be set to rctl's region-name again */ if ((SS_NORMAL == status) && mur_options.show) mur_output_show(); if (NULL != first_shm_rctl) { /* Transfer needed process-private information to shared memory so parent process can later inherit this. */ first_shm_rctl->err_cnt = murgbl.err_cnt; first_shm_rctl->wrn_count = murgbl.wrn_count; first_shm_rctl->consist_jnl_seqno = murgbl.consist_jnl_seqno; /* If extract files were created by this process for one or more regions, then copy that information to * shared memory so parent process can use this information to do a merge sort. */ shm_rctl = mur_shm_hdr->shm_rctl_start; for (rctl = mur_ctl, rctl_top = mur_ctl + murgbl.reg_total; rctl < rctl_top; rctl++, shm_rctl++) { assert(multi_proc_in_use); if (!rctl->this_pid_is_owner) continue; /* in a parallel processing environment, process only regions we own */ /* Cancel any flush/dbsync timers by this child process for this region. This is because the * child is not going to go through exit handling code (no gds_rundown etc.). And we need to * clear up csa->nl->wcs_timers. (normally done by gds_rundown). */ if (NULL != rctl->csa) /* rctl->csa can be NULL in case of "mupip journal -extract" etc. */ CANCEL_DB_TIMERS(rctl->gd, rctl->csa, cancelled_timer, cancelled_dbsync_timer); reccnt = 0; for (size_ptr = &rctl->jnlext_multi_list_size[0], recstat = 0; recstat < TOT_EXTR_TYPES; recstat++, size_ptr++) { /* Assert "extr_file_created" information is in sync between rctl and shm_rctl. * This was done at the end of "mur_cre_file_extfmt". */ assert(shm_rctl->extr_file_created[recstat] == rctl->extr_file_created[recstat]); /* Assert that if *size_ptr is non-zero, then we better have created an extract file. * Note that the converse is not true. It is possible we created a file for example to * write an INCTN record but decided to not write anything because it was not a -detail * type of extract. So *sizeptr could be 0 even though we created the extract file. */ assert(!*size_ptr || rctl->extr_file_created[recstat]); shm_rctl->jnlext_list_size[recstat] = *size_ptr; reccnt += *size_ptr; } assert(INVALID_SHMID == shm_rctl->jnlext_shmid); shm_size = reccnt * SIZEOF(jnlext_multi_t); /* If we are quitting because of an abnormal status OR a forced signal to terminate * OR if the parent is dead (kill -9) dont bother creating shmid to communicate back with parent. */ if (mp_hdr->parent_pid != getppid()) { SET_FORCED_MULTI_PROC_EXIT; /* Also signal sibling children to stop processing */ if (SS_NORMAL != status) status = ERR_FORCEDHALT; } if ((SS_NORMAL == status) && shm_size) { shmid = shmget(IPC_PRIVATE, shm_size, 0600 | IPC_CREAT); if (-1 == shmid) { save_errno = errno; SNPRINTF(errstr, SIZEOF(errstr), "shmget() : shmsize=0x%llx", shm_size); MUR_SET_MULTI_PROC_KEY(rctl, multi_proc_key); /* to print region name prefix */ rts_error_csa(CSA_ARG(NULL) VARLSTCNT(8) ERR_SYSCALL, 5, LEN_AND_STR(errstr), CALLFROM, save_errno); } shmPtr = (char *)do_shmat(shmid, 0, 0); if (-1 == (sm_long_t)shmPtr) { save_errno = errno; SNPRINTF(errstr, SIZEOF(errstr), "shmat() : shmid=%d shmsize=0x%llx", shmid, shm_size); MUR_SET_MULTI_PROC_KEY(rctl, multi_proc_key); /* to print region name prefix */ rts_error_csa(CSA_ARG(NULL) VARLSTCNT(8) ERR_SYSCALL, 5, LEN_AND_STR(errstr), CALLFROM, save_errno); } shm_rctl->jnlext_shmid = shmid; shm_rctl->jnlext_shm_size = shm_size; for (size_ptr = &rctl->jnlext_multi_list_size[0], recstat = 0; recstat < TOT_EXTR_TYPES; recstat++, size_ptr++) { shm_size = *size_ptr; if (shm_size) { copy_size = copy_list_to_buf(rctl->jnlext_multi_list[recstat], (int4)shm_size, shmPtr); assert(copy_size == (shm_size * SIZEOF(jnlext_multi_t))); shmPtr += copy_size; } } } } } mur_close_file_extfmt(IN_MUR_CLOSE_FILES_FALSE); /* Need to flush buffered extract/losttrans/brokentrans files */ return (int)status; }
/* * ----------------------------------------------- * Maintain in parallel with op_zalloc2 * Arguments: * timeout - max. time to wait for locks before giving up * laflag - passed to gvcmx* routines as "laflag" argument; * originally indicated the request was a Lock or * zAllocate request (hence the name "laflag"), but * now capable of holding more values signifying * additional information * * Return: * 1 - if not timeout specified * if timeout specified: * != 0 - all the locks int the list obtained, or * 0 - blocked * The return result is suited to be placed directly into * the $T variable by the caller if timeout is specified. * ----------------------------------------------- */ int op_lock2(int4 timeout, unsigned char laflag) /* timeout is in seconds */ { bool blocked, timer_on; signed char gotit; unsigned short locks_bckout, locks_done; int4 msec_timeout; /* timeout in milliseconds */ mlk_pvtblk *pvt_ptr1, *pvt_ptr2, **prior; unsigned char action; gotit = -1; cm_action = laflag; timer_on = (NO_M_TIMEOUT != timeout); out_of_time = FALSE; if (!timer_on) msec_timeout = NO_M_TIMEOUT; else { msec_timeout = timeout2msec(timeout); if (0 == msec_timeout) out_of_time = TRUE; else start_timer((TID)&timer_on, msec_timeout, wake_alarm, 0, NULL); } lckclr(); for (blocked = FALSE; !blocked;) { /* if this is a request for a remote node */ if (remlkreq) { if (gotit >= 0) gotit = gvcmx_resremlk(cm_action); else gotit = gvcmx_reqremlk(cm_action, timeout); if (!gotit) { /* only REQIMMED returns false */ blocked = TRUE; break; } } for (pvt_ptr1 = mlk_pvt_root, locks_done = 0; locks_done < lks_this_cmd; pvt_ptr1 = pvt_ptr1->next, locks_done++) { /* Go thru the list of all locks to be obtained attempting to lock * each one. If any lock could not be obtained, break out of the loop */ if (!mlk_lock(pvt_ptr1, 0, TRUE)) { /* If lock is obtained */ pvt_ptr1->granted = TRUE; switch (laflag) { case CM_LOCKS: pvt_ptr1->level = 1; break; case INCREMENTAL: pvt_ptr1->level += pvt_ptr1->translev; break; default: GTMASSERT; break; } } else { blocked = TRUE; break; } } /* If we did not get blocked, we are all done */ if (!blocked) break; /* We got blocked and need to keep retrying after some time interval */ if (remlkreq) gvcmx_susremlk(cm_action); switch (cm_action) { case CM_LOCKS: action = LOCKED; break; case INCREMENTAL: action = INCREMENTAL; break; default: GTMASSERT; break; } for (pvt_ptr2 = mlk_pvt_root, locks_bckout = 0; locks_bckout < locks_done; pvt_ptr2 = pvt_ptr2->next, locks_bckout++) { assert(pvt_ptr2->granted && (pvt_ptr2 != pvt_ptr1)); mlk_bckout(pvt_ptr2, action); } if (dollar_tlevel && (CDB_STAGNATE <= t_tries)) { mlk_unpend(pvt_ptr1); /* Eliminated the dangling request block */ if (timer_on) cancel_timer((TID)&timer_on); t_retry(cdb_sc_needlock); /* release crit to prevent a deadlock */ } for (;;) { if (out_of_time || outofband) { /* if time expired || control-c encountered */ if (outofband || !lk_check_own(pvt_ptr1)) { /* If CTL-C, check lock owner */ if (pvt_ptr1->nodptr) /* Get off pending list to be sent a wake */ mlk_unpend(pvt_ptr1); /* Cancel all remote locks obtained so far */ if (remlkreq) { gvcmx_canremlk(); gvcmz_clrlkreq(); remlkreq = FALSE; } if (outofband) { cancel_timer((TID)&timer_on); outofband_action(FALSE); } break; } } if (!mlk_lock(pvt_ptr1, 0, FALSE)) { /* If we got the lock, break out of timer loop */ blocked = FALSE; if (pvt_ptr1 != mlk_pvt_root) { rel_quant(); /* attempt to get a full timeslice for maximum chance to get all */ mlk_unlock(pvt_ptr1); } break; } if (pvt_ptr1->nodptr) lk_check_own(pvt_ptr1); /* clear an abandoned owner */ hiber_start_wait_any(LOCK_SELF_WAKE); } if (blocked && out_of_time) break; } if (remlkreq) { gvcmz_clrlkreq(); remlkreq = FALSE; } if (timer_on) { cancel_timer((TID)&timer_on); if (blocked) { for (prior = &mlk_pvt_root; *prior;) { if (!(*prior)->granted) { /* if entry was never granted, delete list entry */ mlk_pvtblk_delete(prior); } else prior = &((*prior)->next); } mlk_stats.n_user_locks_fail++; return (FALSE); } } mlk_stats.n_user_locks_success++; return (TRUE); }
/* * ------------------------------------------ * Hang the process for a specified time. * * Goes to sleep for a positive value. * Any caught signal will terminate the sleep * following the execution of that signal's catching routine. * * The actual hang duration should be NO LESS than the specified * duration for specified durations greater than .001 seconds. * Certain applications depend on this assumption. * * Arguments: * num - time to sleep * * Return: * none * ------------------------------------------ */ void op_hang(mval* num) { int ms; double tmp; mv_stent *mv_zintcmd; ABS_TIME cur_time, end_time; # ifdef VMS uint4 time[2]; int4 efn_mask, status; # endif DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; ms = 0; MV_FORCE_NUM(num); if (num->mvtype & MV_INT) { if (0 < num->m[1]) { assert(MV_BIAS >= 1000); /* if formats change overflow may need attention */ ms = num->m[1] * (1000 / MV_BIAS); } } else if (0 == num->sgn) /* if sign is not 0 it means num is negative */ { tmp = mval2double(num) * (double)1000; ms = ((double)MAXPOSINT4 >= tmp) ? (int)tmp : (int)MAXPOSINT4; } if (ms) { if (TREF(tpnotacidtime) * 1000 < ms) TPNOTACID_CHECK(HANGSTR); # if defined(DEBUG) && defined(UNIX) if (WBTEST_ENABLED(WBTEST_DEFERRED_TIMERS) && (3 > gtm_white_box_test_case_count) && (123000 == ms)) { DEFER_INTERRUPTS(INTRPT_NO_TIMER_EVENTS); DBGFPF((stderr, "OP_HANG: will sleep for 20 seconds\n")); LONG_SLEEP(20); DBGFPF((stderr, "OP_HANG: done sleeping\n")); ENABLE_INTERRUPTS(INTRPT_NO_TIMER_EVENTS); return; } if (WBTEST_ENABLED(WBTEST_BREAKMPC)&& (0 == gtm_white_box_test_case_count) && (999 == ms)) { frame_pointer->old_frame_pointer->mpc = (unsigned char *)GTM64_ONLY(0xdeadbeef12345678) NON_GTM64_ONLY(0xdead1234); return; } if (WBTEST_ENABLED(WBTEST_UTIL_OUT_BUFFER_PROTECTION) && (0 == gtm_white_box_test_case_count) && (999 == ms)) { /* Upon seeing a .999s hang this white-box test launches a timer that pops with a period of * UTIL_OUT_SYSLOG_INTERVAL and prints a long message via util_out_ptr. */ start_timer((TID)&util_out_syslog_dump, UTIL_OUT_SYSLOG_INTERVAL, util_out_syslog_dump, 0, NULL); return; } # endif sys_get_curr_time(&cur_time); mv_zintcmd = find_mvstent_cmd(ZINTCMD_HANG, restart_pc, restart_ctxt, FALSE); if (!mv_zintcmd) add_int_to_abs_time(&cur_time, ms, &end_time); else { end_time = mv_zintcmd->mv_st_cont.mvs_zintcmd.end_or_remain; cur_time = sub_abs_time(&end_time, &cur_time); /* get remaing time to sleep */ if (0 <= cur_time.at_sec) ms = (int4)(cur_time.at_sec * 1000 + cur_time.at_usec / 1000); else ms = 0; /* all done */ /* restore/pop previous zintcmd_active[ZINTCMD_HANG] hints */ TAREF1(zintcmd_active, ZINTCMD_HANG).restart_pc_last = mv_zintcmd->mv_st_cont.mvs_zintcmd.restart_pc_prior; TAREF1(zintcmd_active, ZINTCMD_HANG).restart_ctxt_last = mv_zintcmd->mv_st_cont.mvs_zintcmd.restart_ctxt_prior; TAREF1(zintcmd_active, ZINTCMD_HANG).count--; assert(0 <= TAREF1(zintcmd_active, ZINTCMD_HANG).count); if (mv_chain == mv_zintcmd) POP_MV_STENT(); /* just pop if top of stack */ else { /* flag as not active */ mv_zintcmd->mv_st_cont.mvs_zintcmd.command = ZINTCMD_NOOP; mv_zintcmd->mv_st_cont.mvs_zintcmd.restart_pc_check = NULL; } if (0 == ms) return; /* done HANGing */ } # ifdef UNIX if (ms < 10) SLEEP_USEC(ms * 1000, TRUE); /* Finish the sleep if it is less than 10ms. */ else hiber_start(ms); # elif defined(VMS) time[0] = -time_low_ms(ms); time[1] = -time_high_ms(ms) - 1; efn_mask = (1 << efn_outofband | 1 << efn_timer); if (SS$_NORMAL != (status = sys$setimr(efn_timer, &time, NULL, &time, 0))) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(8) ERR_SYSCALL, 5, RTS_ERROR_LITERAL("$setimr"), CALLFROM, status); if (SS$_NORMAL != (status = sys$wflor(efn_outofband, efn_mask))) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(8) ERR_SYSCALL, 5, RTS_ERROR_LITERAL("$wflor"), CALLFROM, status); if (outofband) { if (SS$_WASCLR == (status = sys$readef(efn_timer, &efn_mask))) { if (SS$_NORMAL != (status = sys$cantim(&time, 0))) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(8) ERR_SYSCALL, 5, RTS_ERROR_LITERAL("$cantim"), CALLFROM, status); } else assertpro(SS$_WASSET == status); } # endif } else rel_quant(); if (outofband) { PUSH_MV_STENT(MVST_ZINTCMD); mv_chain->mv_st_cont.mvs_zintcmd.end_or_remain = end_time; mv_chain->mv_st_cont.mvs_zintcmd.restart_ctxt_check = restart_ctxt; mv_chain->mv_st_cont.mvs_zintcmd.restart_pc_check = restart_pc; /* save current information from zintcmd_active */ mv_chain->mv_st_cont.mvs_zintcmd.restart_ctxt_prior = TAREF1(zintcmd_active, ZINTCMD_HANG).restart_ctxt_last; mv_chain->mv_st_cont.mvs_zintcmd.restart_pc_prior = TAREF1(zintcmd_active, ZINTCMD_HANG).restart_pc_last; TAREF1(zintcmd_active, ZINTCMD_HANG).restart_pc_last = restart_pc; TAREF1(zintcmd_active, ZINTCMD_HANG).restart_ctxt_last = restart_ctxt; TAREF1(zintcmd_active, ZINTCMD_HANG).count++; mv_chain->mv_st_cont.mvs_zintcmd.command = ZINTCMD_HANG; outofband_action(FALSE); } return; }
void iosocket_tls(mval *optionmval, int4 timeoutarg, mval *tlsid, mval *password, mval *extraarg) { /* note extraarg is not currently used */ int4 length, flags, timeout, msec_timeout, status, status2, len, errlen, devlen, tls_errno, save_errno; io_desc *iod; d_socket_struct *dsocketptr; socket_struct *socketptr; char optionstr[MAX_TLSOPTION], idstr[MAX_TLSID_LEN], passwordstr[GTM_PASSPHRASE_MAX_ASCII + 1]; const char *errp; tls_option option; gtm_tls_socket_t *tlssocket; ABS_TIME cur_time, end_time; # ifdef USE_POLL struct pollfd fds; # else fd_set fds, *readfds, *writefds; struct timeval timeout_spec, *timeout_ptr; # endif iod = io_curr_device.out; assert(gtmsocket == iod->type); dsocketptr = (d_socket_struct *)iod->dev_sp; if (0 >= dsocketptr->n_socket) { rts_error_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_NOSOCKETINDEV); return; } if (dsocketptr->n_socket <= dsocketptr->current_socket) { rts_error_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_CURRSOCKOFR, 2, dsocketptr->current_socket, dsocketptr->n_socket); return; } if (dsocketptr->mupintr) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_ZINTRECURSEIO); socketptr = dsocketptr->socket[dsocketptr->current_socket]; ENSURE_DATA_SOCKET(socketptr); if (socket_tcpip != socketptr->protocol) { rts_error_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_TLSPARAM, 4, RTS_ERROR_MVAL(optionmval), LEN_AND_LIT("but socket is not TCP")); return; } if (socket_connected != socketptr->state) { rts_error_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_TLSPARAM, 4, LEN_AND_LIT("/TLS"), LEN_AND_LIT("but socket not connected")); return; } if (NULL != tlsid) { length = tlsid->str.len; if (MAX_TLSID_LEN < (length + 1)) /* for null */ { rts_error_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_TLSPARAM, 4, LEN_AND_LIT("TLSID"), LEN_AND_LIT("too long")); return; } STRNCPY_STR(idstr, tlsid->str.addr, length); idstr[length] = '\0'; } else idstr[0] = '\0'; if (NULL != password) { length = password->str.len; if (GTM_PASSPHRASE_MAX_ASCII < length) { rts_error_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_TLSPARAM, 4, LEN_AND_LIT("passphrase"), LEN_AND_LIT("too long")); return; } STRNCPY_STR(passwordstr, password->str.addr, length); passwordstr[length] = '\0'; } else passwordstr[0] = '\0'; length = MIN(MAX_TLSOPTION, optionmval->str.len); lower_to_upper((uchar_ptr_t)optionstr, (uchar_ptr_t)optionmval->str.addr, length); if (0 == memcmp(optionstr, "CLIENT", length)) option = tlsopt_client; else if (0 == memcmp(optionstr, "SERVER", length)) option = tlsopt_server; else if (0 == memcmp(optionstr, "RENEGOTIATE", length)) option = tlsopt_renegotiate; else option = tlsopt_invalid; memcpy(iod->dollar.device, "0", SIZEOF("0")); if (NO_M_TIMEOUT != timeoutarg) { msec_timeout = timeout2msec(timeoutarg); sys_get_curr_time(&cur_time); add_int_to_abs_time(&cur_time, msec_timeout, &end_time); } else msec_timeout = -1; if ((tlsopt_client == option) || (tlsopt_server == option)) { /* most of the setup is common */ if (socketptr->tlsenabled) { rts_error_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_TLSPARAM, 4, LEN_AND_STR(optionstr), LEN_AND_LIT("but TLS already enabled")); return; } assertpro((0 >= socketptr->buffered_length) && (0 >= socketptr->obuffer_length)); if (NULL == tls_ctx) { /* first use of TLS */ if (-1 == gtm_tls_loadlibrary()) { rts_error_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_TLSDLLNOOPEN, 0, ERR_TEXT, 2, LEN_AND_STR(dl_err)); return; } if (NULL == (tls_ctx = (gtm_tls_init(GTM_TLS_API_VERSION, GTMTLS_OP_INTERACTIVE_MODE)))) { errp = gtm_tls_get_error(); len = SIZEOF(ONE_COMMA) - 1; memcpy(iod->dollar.device, ONE_COMMA, len); errlen = STRLEN(errp); devlen = MIN((SIZEOF(iod->dollar.device) - len - 1), errlen); memcpy(&iod->dollar.device[len], errp, devlen + 1); if (devlen < errlen) iod->dollar.device[SIZEOF(iod->dollar.device) - 1] = '\0'; if (socketptr->ioerror) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_TLSINIT, 0, ERR_TEXT, 2, errlen, errp); if (NO_M_TIMEOUT != timeoutarg) dollar_truth = FALSE; return; } } socketptr->tlsenabled = TRUE; flags = GTMTLS_OP_SOCKET_DEV | ((tlsopt_client == option) ? GTMTLS_OP_CLIENT_MODE : 0); socketptr->tlssocket = gtm_tls_socket(tls_ctx, NULL, socketptr->sd, idstr, flags); if (NULL == socketptr->tlssocket) { socketptr->tlsenabled = FALSE; errp = gtm_tls_get_error(); len = SIZEOF(ONE_COMMA) - 1; memcpy(iod->dollar.device, ONE_COMMA, len); errlen = STRLEN(errp); devlen = MIN((SIZEOF(iod->dollar.device) - len - 1), errlen); memcpy(&iod->dollar.device[len], errp, devlen + 1); if (devlen < errlen) iod->dollar.device[SIZEOF(iod->dollar.device) - 1] = '\0'; if (socketptr->ioerror) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_TLSCONVSOCK, 0, ERR_TEXT, 2, errlen, errp); if (NO_M_TIMEOUT != timeoutarg) dollar_truth = FALSE; return; } status = 0; # ifndef USE_POLL if (NO_M_TIMEOUT == timeoutarg) timeout_ptr = NULL; else { timeout_spec.tv_sec = msec_timeout / 1000; timeout_spec.tv_usec = (msec_timeout % 1000) * 1000; /* remainder in millsecs to microsecs */ timeout_ptr = &timeout_spec; } # endif do { status2 = 0; if (0 != status) { # ifdef USE_POLL fds.fd = socketptr->sd; fds.events = (GTMTLS_WANT_READ == status) ? POLLIN : POLLOUT; # else readfds = writefds = NULL; assertpro(FD_SETSIZE > socketptr->sd); FD_ZERO(&fds); FD_SET(socketptr->sd, &fds); writefds = (GTMTLS_WANT_WRITE == status) ? &fds : NULL; readfds = (GTMTLS_WANT_READ == status) ? &fds : NULL; # endif POLL_ONLY(if (-1 == (status2 = poll(&fds, 1, msec_timeout)))) SELECT_ONLY(if (-1 == (status2 = select(socketptr->sd + 1, readfds, writefds, NULL, timeout_ptr)))) { save_errno = errno; if (EAGAIN == save_errno) { rel_quant(); /* allow resources to become available */ status2 = 0; /* treat as timeout */ } else if (EINTR == save_errno) status2 = 0; } } else status2 = 1; /* do accept/connect first time */ if (0 < status2) { if (tlsopt_server == option) status = gtm_tls_accept((gtm_tls_socket_t *)socketptr->tlssocket); else status = gtm_tls_connect((gtm_tls_socket_t *)socketptr->tlssocket); } if ((0 > status2) || ((status != 0) && ((GTMTLS_WANT_READ != status) && (GTMTLS_WANT_WRITE != status)))) { if (0 != status) { tls_errno = gtm_tls_errno(); if (0 > tls_errno) errp = gtm_tls_get_error(); else errp = STRERROR(tls_errno); } else errp = STRERROR(save_errno); socketptr->tlsenabled = FALSE; len = SIZEOF(ONE_COMMA) - 1; memcpy(iod->dollar.device, ONE_COMMA, len); errlen = STRLEN(errp); devlen = MIN((SIZEOF(iod->dollar.device) - len - 1), errlen); memcpy(&iod->dollar.device[len], errp, devlen + 1); if (devlen < errlen) iod->dollar.device[SIZEOF(iod->dollar.device) - 1] = '\0'; if (socketptr->ioerror) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_TLSHANDSHAKE, 0, ERR_TEXT, 2, errlen, errp); return; } if ((0 != status) && (0 <= status2)) /* not accepted/connected and not error */ { /* check for timeout if not error or want read or write */ if ((0 != timeoutarg) && (NO_M_TIMEOUT != timeoutarg)) { sys_get_curr_time(&cur_time); cur_time = sub_abs_time(&end_time, &cur_time); if (0 >= cur_time.at_sec) { /* no more time */ gtm_tls_session_close((gtm_tls_socket_t **)&socketptr->tlssocket); socketptr->tlsenabled = FALSE; dollar_truth = FALSE; return; } else { /* adjust msec_timeout for poll/select */ # ifdef USE_POLL msec_timeout = (cur_time.at_sec * 1000) + (cur_time.at_usec / 1000); # else timeout_spec.tv_sec = cur_time.at_sec; timeout_spec.tv_usec = (gtm_tv_usec_t)cur_time.at_usec; # endif } } else if (0 == timeoutarg) { /* only one chance */ gtm_tls_session_close((gtm_tls_socket_t **)&socketptr->tlssocket); socketptr->tlsenabled = FALSE; dollar_truth = FALSE; return; } continue; } } while ((GTMTLS_WANT_READ == status) || (GTMTLS_WANT_WRITE == status)); /* turn on output buffering */ if (0 == socketptr->obuffer_size) socketptr->obuffer_size = socketptr->buffer_size; socketptr->obuffer_length = socketptr->obuffer_offset = 0; socketptr->obuffer_wait_time = DEFAULT_WRITE_WAIT; socketptr->obuffer_flush_time = DEFAULT_WRITE_WAIT * 2; /* until add device parameter */ socketptr->obuffer = malloc(socketptr->obuffer_size); } else if (tlsopt_renegotiate == option)
/* Note we don't increment fast_lock_count as part of getting the latch and decrement it when releasing it because ROLLBACK * can hold onto this latch for a long while and can do updates in this duration and we should NOT have a non-zero fast_lock_count * as many places like t_begin/dsk_read have asserts to this effect. It is okay to NOT increment fast_lock_count as ROLLBACK * anyways have logic to disable interrupts the moment it starts doing database updates. */ boolean_t grab_gtmsource_srv_latch(sm_global_latch_ptr_t latch, uint4 max_timeout_in_secs, uint4 onln_rlbk_action) { int spins, maxspins, retries, max_retries; unix_db_info *udi; sgmnt_addrs *repl_csa; boolean_t cycle_mismatch; assert(!have_crit(CRIT_HAVE_ANY_REG)); udi = FILE_INFO(jnlpool.jnlpool_dummy_reg); repl_csa = &udi->s_addrs; maxspins = num_additional_processors ? MAX_LOCK_SPINS(LOCK_SPINS, num_additional_processors) : 1; max_retries = max_timeout_in_secs * 4 * 1000; /* outer-loop : X minutes, 1 loop in 4 is sleep of 1 ms */ for (retries = max_retries - 1; 0 < retries; retries--) { for (spins = maxspins; 0 < spins; spins--) { assert(latch->u.parts.latch_pid != process_id); /* We better not hold it if trying to get it */ if (GET_SWAPLOCK(latch)) { DEBUG_ONLY(locknl = repl_csa->nl); /* Use the journal pool to maintain lock history */ LOCK_HIST("OBTN", latch, process_id, retries); DEBUG_ONLY(locknl = NULL); if (jnlpool.repl_inst_filehdr->file_corrupt && !jgbl.onlnrlbk) { /* Journal pool indicates an abnormally terminated online rollback. Cannot continue until * the rollback command is re-run to bring the journal pool/file and instance file to a * consistent state. */ /* No need to release the latch before rts_error (mupip_exit_handler will do it for us) */ rts_error(VARLSTCNT(8) ERR_REPLREQROLLBACK, 2, LEN_AND_STR(udi->fn), ERR_TEXT, 2, LEN_AND_LIT("file_corrupt field in instance file header is set to" " TRUE")); } cycle_mismatch = (repl_csa->onln_rlbk_cycle != jnlpool.jnlpool_ctl->onln_rlbk_cycle); assert((ASSERT_NO_ONLINE_ROLLBACK != onln_rlbk_action) || !cycle_mismatch); if ((HANDLE_CONCUR_ONLINE_ROLLBACK == onln_rlbk_action) && cycle_mismatch) { assert(is_src_server); SYNC_ONLN_RLBK_CYCLES; gtmsource_onln_rlbk_clnup(); /* side-effect : sets gtmsource_state */ rel_gtmsource_srv_latch(latch); } return TRUE; } } if (retries & 0x3) { /* On all but every 4th pass, do a simple rel_quant */ rel_quant(); } else { /* On every 4th pass, we bide for awhile */ wcs_sleep(LOCK_SLEEP); if (RETRY_CASLATCH_CUTOFF == (retries % LOCK_TRIES)) performCASLatchCheck(latch, TRUE); } } DUMP_LOCKHIST(); assert(FALSE); assert(jnlpool.gtmsource_local && jnlpool.gtmsource_local->gtmsource_pid); rts_error(VARLSTCNT(5) ERR_SRVLCKWT2LNG, 2, max_timeout_in_secs, jnlpool.gtmsource_local->gtmsource_pid); return FALSE; /* to keep the compiler happy */ }