bool gtcmtr_kill(void) { cm_region_list *reg_ref; unsigned char *ptr, regnum; unsigned short len; static readonly gds_file_id file; error_def(ERR_DBPRIVERR); ptr = curr_entry->clb_ptr->mbf; assert(*ptr == CMMS_Q_KILL); ptr++; GET_SHORT(len, ptr); ptr += sizeof(unsigned short); regnum = *ptr++; reg_ref = gtcm_find_region(curr_entry,regnum); len--; /* subtract size of regnum */ CM_GET_GVCURRKEY(ptr, len); gtcm_bind_name(reg_ref->reghead, TRUE); if (gv_cur_region->read_only) rts_error(VARLSTCNT(4) ERR_DBPRIVERR, 2, DB_LEN_STR(gv_cur_region)); if (JNL_ALLOWED(cs_addrs)) { /* we need to copy client's specific prc_vec into the global variable in order that the gvcst* routines * do the right job. actually we need to do this only if JNL_ENABLED(cs_addrs), but since it is not * easy to re-execute the following two assignments in case gvcst_kill()'s call to t_end() encounters a * cdb_sc_jnlstatemod retry code, we choose the easier approach of executing the following segment * if JNL_ALLOWED(cs_addrs) is TRUE instead of checking for JNL_ENABLED(cs_addrs) to be TRUE. * this approach has the overhead that we will be doing the following assignments even though JNL_ENABLED * might not be TRUE but since the following two are just pointer copies, it is not considered a big overhead. * this approach ensures that the jnl_put_jrt_pini() gets the appropriate prc_vec for writing into the * journal record in case JNL_ENABLED turns out to be TRUE in t_end() time. * note that the value of JNL_ALLOWED(cs_addrs) cannot be changed on the fly without obtaining standalone access * and hence the correctness of prc_vec whenever it turns out necessary is guaranteed. */ originator_prc_vec = curr_entry->pvec; cs_addrs->jnl->pini_addr = reg_ref->pini_addr; } if (gv_target->root) gvcst_kill(TRUE); if (JNL_ALLOWED(cs_addrs)) reg_ref->pini_addr = cs_addrs->jnl->pini_addr; /* In case journal switch occurred */ ptr = curr_entry->clb_ptr->mbf; *ptr++ = CMMS_R_KILL; curr_entry->clb_ptr->cbl = S_HDRSIZE; return TRUE; }
void db_init(gd_region *reg, sgmnt_data_ptr_t tsd) { static boolean_t mutex_init_done = FALSE; boolean_t is_bg, read_only; char machine_name[MAX_MCNAMELEN]; file_control *fc; int gethostname_res, stat_res, mm_prot; int4 status, semval, dblksize, fbwsize; sm_long_t status_l; sgmnt_addrs *csa; sgmnt_data_ptr_t csd; struct sembuf sop[3]; struct stat stat_buf; union semun semarg; struct semid_ds semstat; struct shmid_ds shmstat; struct statvfs dbvfs; uint4 sopcnt; unix_db_info *udi; #ifdef periodic_timer_removed void periodic_flush_check(); #endif error_def(ERR_CLSTCONFLICT); error_def(ERR_CRITSEMFAIL); error_def(ERR_DBNAMEMISMATCH); error_def(ERR_DBIDMISMATCH); error_def(ERR_NLMISMATCHCALC); error_def(ERR_REQRUNDOWN); error_def(ERR_SYSCALL); assert(tsd->acc_meth == dba_bg || tsd->acc_meth == dba_mm); is_bg = (dba_bg == tsd->acc_meth); read_only = reg->read_only; new_dbinit_ipc = FALSE; /* we did not create a new ipc resource */ udi = FILE_INFO(reg); memset(machine_name, 0, sizeof(machine_name)); if (GETHOSTNAME(machine_name, MAX_MCNAMELEN, gethostname_res)) rts_error(VARLSTCNT(5) ERR_TEXT, 2, LEN_AND_LIT("Unable to get the hostname"), errno); assert(strlen(machine_name) < MAX_MCNAMELEN); csa = &udi->s_addrs; csa->db_addrs[0] = csa->db_addrs[1] = csa->lock_addrs[0] = NULL; /* to help in dbinit_ch and gds_rundown */ reg->opening = TRUE; /* * Create ftok semaphore for this region. * We do not want to make ftok counter semaphore to be 2 for on mupip journal recover process. */ if (!ftok_sem_get(reg, !mupip_jnl_recover, GTM_ID, FALSE)) rts_error(VARLSTCNT(4) ERR_DBFILERR, 2, DB_LEN_STR(reg)); /* * At this point we have ftok_semid sempahore based on ftok key. * Any ftok conflicted region will block at this point. * Say, a.dat and b.dat both has same ftok and we have process A to access a.dat and * process B to access b.dat. In this case only one can continue to do db_init() */ fc = reg->dyn.addr->file_cntl; fc->file_type = reg->dyn.addr->acc_meth; fc->op = FC_READ; fc->op_buff = (sm_uc_ptr_t)tsd; fc->op_len = sizeof(*tsd); fc->op_pos = 1; dbfilop(fc); /* Read file header */ udi->shmid = tsd->shmid; udi->semid = tsd->semid; udi->sem_ctime = tsd->sem_ctime.ctime; udi->shm_ctime = tsd->shm_ctime.ctime; dbsecspc(reg, tsd); /* Find db segment size */ if (!mupip_jnl_recover) { if (INVALID_SEMID == udi->semid) { if (0 != udi->sem_ctime || INVALID_SHMID != udi->shmid || 0 != udi->shm_ctime) /* We must have somthing wrong in protocol or, code, if this happens */ GTMASSERT; /* * Create new semaphore using IPC_PRIVATE. System guarantees a unique id. */ if (-1 == (udi->semid = semget(IPC_PRIVATE, FTOK_SEM_PER_ID, RWDALL | IPC_CREAT))) { udi->semid = INVALID_SEMID; rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, LEN_AND_LIT("Error with database control semget"), errno); } udi->shmid = INVALID_SHMID; /* reset shmid so dbinit_ch does not get confused in case we go there */ new_dbinit_ipc = TRUE; tsd->semid = udi->semid; semarg.val = GTM_ID; /* * Following will set semaphore number 2 (=FTOK_SEM_PER_ID - 1) value as GTM_ID. * In case we have orphaned semaphore for some reason, mupip rundown will be * able to identify GTM semaphores from the value and can remove. */ if (-1 == semctl(udi->semid, FTOK_SEM_PER_ID - 1, SETVAL, semarg)) rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, LEN_AND_LIT("Error with database control semctl SETVAL"), errno); /* * Warning: We must read the sem_ctime using IPC_STAT after SETVAL, which changes it. * We must NOT do any more SETVAL after this. Our design is to use * sem_ctime as creation time of semaphore. */ semarg.buf = &semstat; if (-1 == semctl(udi->semid, FTOK_SEM_PER_ID - 1, IPC_STAT, semarg)) rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, LEN_AND_LIT("Error with database control semctl IPC_STAT"), errno); tsd->sem_ctime.ctime = udi->sem_ctime = semarg.buf->sem_ctime; } else { if (INVALID_SHMID == udi->shmid) /* if mu_rndwn_file gets standalone access of this region and * somehow mupip process crashes, we can have semid != -1 but shmid == -1 */ rts_error(VARLSTCNT(10) ERR_REQRUNDOWN, 4, DB_LEN_STR(reg), LEN_AND_STR(tsd->machine_name), ERR_TEXT, 2, LEN_AND_LIT("semid is valid but shmid is invalid")); semarg.buf = &semstat; if (-1 == semctl(udi->semid, 0, IPC_STAT, semarg)) /* file header has valid semid but semaphore does not exists */ rts_error(VARLSTCNT(6) ERR_REQRUNDOWN, 4, DB_LEN_STR(reg), LEN_AND_STR(tsd->machine_name)); else if (semarg.buf->sem_ctime != tsd->sem_ctime.ctime) rts_error(VARLSTCNT(10) ERR_REQRUNDOWN, 4, DB_LEN_STR(reg), LEN_AND_STR(tsd->machine_name), ERR_TEXT, 2, LEN_AND_LIT("sem_ctime does not match")); if (-1 == shmctl(udi->shmid, IPC_STAT, &shmstat)) rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, LEN_AND_LIT("Error with database control shmctl"), errno); else if (shmstat.shm_ctime != tsd->shm_ctime.ctime) rts_error(VARLSTCNT(10) ERR_REQRUNDOWN, 4, DB_LEN_STR(reg), LEN_AND_STR(tsd->machine_name), ERR_TEXT, 2, LEN_AND_LIT("shm_ctime does not match")); } /* We already have ftok semaphore of this region, so just plainly do semaphore operation */ /* This is the database access control semaphore for any region */ sop[0].sem_num = 0; sop[0].sem_op = 0; /* Wait for 0 */ sop[1].sem_num = 0; sop[1].sem_op = 1; /* Lock */ sopcnt = 2; if (!read_only) { sop[2].sem_num = 1; sop[2].sem_op = 1; /* increment r/w access counter */ sopcnt = 3; } sop[0].sem_flg = sop[1].sem_flg = sop[2].sem_flg = SEM_UNDO | IPC_NOWAIT; SEMOP(udi->semid, sop, sopcnt, status); if (-1 == status) { errno_save = errno; gtm_putmsg(VARLSTCNT(4) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg)); rts_error(VARLSTCNT(8) ERR_SYSCALL, 5, RTS_ERROR_LITERAL("semop()"), CALLFROM, errno_save); } } else /* for mupip_jnl_recover we were already in mu_rndwn_file and got "semid" semaphore */ { if (INVALID_SEMID == udi->semid || 0 == udi->sem_ctime) /* make sure mu_rndwn_file() has reset created semaphore for standalone access */ GTMASSERT; if (INVALID_SHMID != udi->shmid || 0 != udi->shm_ctime) /* make sure mu_rndwn_file() has reset shared memory */ GTMASSERT; udi->shmid = INVALID_SHMID; /* reset shmid so dbinit_ch does not get confused in case we go there */ new_dbinit_ipc = TRUE; } sem_incremented = TRUE; if (new_dbinit_ipc) { /* Create new shared memory using IPC_PRIVATE. System guarantees a unique id */ #ifdef __MVS__ if (-1 == (status_l = udi->shmid = shmget(IPC_PRIVATE, ROUND_UP(reg->sec_size, MEGA_BOUND), __IPC_MEGA | IPC_CREAT | RWDALL))) #else if (-1 == (status_l = udi->shmid = shmget(IPC_PRIVATE, reg->sec_size, RWDALL | IPC_CREAT))) #endif { udi->shmid = status_l = INVALID_SHMID; rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, LEN_AND_LIT("Error with database shmget"), errno); } tsd->shmid = udi->shmid; if (-1 == shmctl(udi->shmid, IPC_STAT, &shmstat)) rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, LEN_AND_LIT("Error with database control shmctl"), errno); tsd->shm_ctime.ctime = udi->shm_ctime = shmstat.shm_ctime; } #ifdef DEBUG_DB64 status_l = (sm_long_t)(csa->db_addrs[0] = (sm_uc_ptr_t)do_shmat(udi->shmid, next_smseg, SHM_RND)); next_smseg = (sm_uc_ptr_t)ROUND_UP((sm_long_t)(next_smseg + reg->sec_size), SHMAT_ADDR_INCS); #else status_l = (sm_long_t)(csa->db_addrs[0] = (sm_uc_ptr_t)do_shmat(udi->shmid, 0, SHM_RND)); #endif if (-1 == status_l) { rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, LEN_AND_LIT("Error attaching to database shared memory"), errno); } csa->nl = (node_local_ptr_t)csa->db_addrs[0]; csa->critical = (mutex_struct_ptr_t)(csa->db_addrs[0] + NODE_LOCAL_SIZE); assert(((int)csa->critical & 0xf) == 0); /* critical should be 16-byte aligned */ #ifdef CACHELINE_SIZE assert(0 == ((int)csa->critical & (CACHELINE_SIZE - 1))); #endif /* Note: Here we check jnl_sate from database file and its value cannot change without standalone access. * The jnl_buff buffer should be initialized irrespective of read/write process */ JNL_INIT(csa, reg, tsd); csa->backup_buffer = (backup_buff_ptr_t)(csa->db_addrs[0] + NODE_LOCAL_SPACE + JNL_SHARE_SIZE(tsd)); csa->lock_addrs[0] = (sm_uc_ptr_t)csa->backup_buffer + BACKUP_BUFFER_SIZE + 1; csa->lock_addrs[1] = csa->lock_addrs[0] + LOCK_SPACE_SIZE(tsd) - 1; csa->total_blks = tsd->trans_hist.total_blks; /* For test to see if file has extended */ if (new_dbinit_ipc) { memset(csa->nl, 0, sizeof(*csa->nl)); /* We allocated shared storage -- we have to init it */ if (JNL_ALLOWED(csa)) { /* initialize jb->cycle to a value different from initial value of jpc->cycle (0). although this is not * necessary right now, in the future, the plan is to change jnl_ensure_open() to only do a cycle mismatch * check in order to determine whether to call jnl_file_open() or not. this is in preparation for that. */ csa->jnl->jnl_buff->cycle = 1; } } if (is_bg) csd = csa->hdr = (sgmnt_data_ptr_t)(csa->lock_addrs[1] + 1 + CACHE_CONTROL_SIZE(tsd)); else { csa->acc_meth.mm.mmblk_state = (mmblk_que_heads_ptr_t)(csa->lock_addrs[1] + 1); FSTAT_FILE(udi->fd, &stat_buf, stat_res); if (-1 == stat_res) rts_error(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(reg), errno); mm_prot = read_only ? PROT_READ : (PROT_READ | PROT_WRITE); #ifdef DEBUG_DB64 if (-1 == (sm_long_t)(csa->db_addrs[0] = (sm_uc_ptr_t)mmap((caddr_t)get_mmseg((size_t)stat_buf.st_size), (size_t)stat_buf.st_size, mm_prot, GTM_MM_FLAGS, udi->fd, (off_t)0))) rts_error(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(reg), errno); put_mmseg((caddr_t)(csa->db_addrs[0]), (size_t)stat_buf.st_size); #else if (-1 == (sm_long_t)(csa->db_addrs[0] = (sm_uc_ptr_t)mmap((caddr_t)NULL, (size_t)stat_buf.st_size, mm_prot, GTM_MM_FLAGS, udi->fd, (off_t)0))) rts_error(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(reg), errno); #endif csa->db_addrs[1] = csa->db_addrs[0] + stat_buf.st_size - 1; csd = csa->hdr = (sgmnt_data_ptr_t)csa->db_addrs[0]; } if (!csa->nl->glob_sec_init) { assert(new_dbinit_ipc); if (is_bg) *csd = *tsd; if (csd->machine_name[0]) /* crash occured */ { if (0 != memcmp(csd->machine_name, machine_name, MAX_MCNAMELEN)) /* crashed on some other node */ rts_error(VARLSTCNT(6) ERR_CLSTCONFLICT, 4, DB_LEN_STR(reg), LEN_AND_STR(csd->machine_name)); else rts_error(VARLSTCNT(6) ERR_REQRUNDOWN, 4, DB_LEN_STR(reg), LEN_AND_STR(csd->machine_name)); } if (is_bg) { bt_malloc(csa); csa->nl->cache_off = -CACHE_CONTROL_SIZE(tsd); db_csh_ini(csa); } db_csh_ref(csa); strcpy(csa->nl->machine_name, machine_name); /* machine name */ assert(MAX_REL_NAME > gtm_release_name_len); memcpy(csa->nl->now_running, gtm_release_name, gtm_release_name_len + 1); /* GT.M release name */ memcpy(csa->nl->label, GDS_LABEL, GDS_LABEL_SZ - 1); /* GDS label */ memcpy(csa->nl->fname, reg->dyn.addr->fname, reg->dyn.addr->fname_len); /* database filename */ csa->nl->creation_date_time = csd->creation.date_time; csa->nl->highest_lbm_blk_changed = -1; csa->nl->wcs_timers = -1; csa->nl->nbb = BACKUP_NOT_IN_PROGRESS; csa->nl->unique_id.uid = FILE_INFO(reg)->fileid; /* save what file we initialized this storage for */ /* save pointers in csa to access shared memory */ csa->nl->critical = (sm_off_t)((sm_uc_ptr_t)csa->critical - (sm_uc_ptr_t)csa->nl); if (JNL_ALLOWED(csa)) csa->nl->jnl_buff = (sm_off_t)((sm_uc_ptr_t)csa->jnl->jnl_buff - (sm_uc_ptr_t)csa->nl); csa->nl->backup_buffer = (sm_off_t)((sm_uc_ptr_t)csa->backup_buffer - (sm_uc_ptr_t)csa->nl); csa->nl->hdr = (sm_off_t)((sm_uc_ptr_t)csd - (sm_uc_ptr_t)csa->nl); csa->nl->lock_addrs = (sm_off_t)((sm_uc_ptr_t)csa->lock_addrs[0] - (sm_uc_ptr_t)csa->nl); if (!read_only || is_bg) { csd->trans_hist.early_tn = csd->trans_hist.curr_tn; csd->max_update_array_size = csd->max_non_bm_update_array_size = ROUND_UP2(MAX_NON_BITMAP_UPDATE_ARRAY_SIZE(csd), UPDATE_ARRAY_ALIGN_SIZE); csd->max_update_array_size += ROUND_UP2(MAX_BITMAP_UPDATE_ARRAY_SIZE, UPDATE_ARRAY_ALIGN_SIZE); /* add current db_csh counters into the cumulative counters and reset the current counters */ #define TAB_DB_CSH_ACCT_REC(COUNTER, DUMMY1, DUMMY2) \ csd->COUNTER.cumul_count += csd->COUNTER.curr_count; \ csd->COUNTER.curr_count = 0; #include "tab_db_csh_acct_rec.h" #undef TAB_DB_CSH_ACCT_REC } if (!read_only) { if (is_bg) { assert(memcmp(csd, GDS_LABEL, GDS_LABEL_SZ - 1) == 0); LSEEKWRITE(udi->fd, (off_t)0, (sm_uc_ptr_t)csd, sizeof(sgmnt_data), errno_save); if (0 != errno_save) { rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2, LEN_AND_LIT("Error with database write"), errno_save); } } } reg->dyn.addr->ext_blk_count = csd->extension_size; mlk_shr_init(csa->lock_addrs[0], csd->lock_space_size, csa, (FALSE == read_only)); DEBUG_ONLY(locknl = csa->nl;) /* for DEBUG_ONLY LOCK_HIST macro */
/* Initialize the TP structures we will be using for the successive TP operations */ void gvcst_tp_init(gd_region *greg) { sgm_info *si; sgmnt_addrs *csa; csa = (sgmnt_addrs *)&FILE_INFO(greg)->s_addrs; if (NULL == csa->sgm_info_ptr) { si = csa->sgm_info_ptr = (sgm_info *)malloc(sizeof(sgm_info)); assert(32768 > sizeof(sgm_info)); memset(si, 0, sizeof(sgm_info)); si->tp_hist_size = TP_MAX_MM_TRANSIZE; si->cur_tp_hist_size = INIT_CUR_TP_HIST_SIZE; /* should be very much less than si->tp_hist_size */ assert(si->cur_tp_hist_size <= si->tp_hist_size); si->blks_in_use = (hash_table_int4 *)malloc(sizeof(hash_table_int4)); init_hashtab_int4(si->blks_in_use, BLKS_IN_USE_INIT_ELEMS); /* See comment in tp.h about cur_tp_hist_size for details */ si->first_tp_hist = si->last_tp_hist = (srch_blk_status *)malloc(sizeof(srch_blk_status) * si->cur_tp_hist_size); si->cw_set_list = (buddy_list *)malloc(sizeof(buddy_list)); initialize_list(si->cw_set_list, sizeof(cw_set_element), CW_SET_LIST_INIT_ALLOC); si->tlvl_cw_set_list = (buddy_list *)malloc(sizeof(buddy_list)); initialize_list(si->tlvl_cw_set_list, sizeof(cw_set_element), TLVL_CW_SET_LIST_INIT_ALLOC); si->tlvl_info_list = (buddy_list *)malloc(sizeof(buddy_list)); initialize_list(si->tlvl_info_list, sizeof(tlevel_info), TLVL_INFO_LIST_INIT_ALLOC); si->new_buff_list = (buddy_list *)malloc(sizeof(buddy_list)); initialize_list(si->new_buff_list, SIZEOF(que_ent) + csa->hdr->blk_size, NEW_BUFF_LIST_INIT_ALLOC); si->recompute_list = (buddy_list *)malloc(sizeof(buddy_list)); initialize_list(si->recompute_list, sizeof(key_cum_value), RECOMPUTE_LIST_INIT_ALLOC); /* The size of the si->cr_array can go up to TP_MAX_MM_TRANSIZE, but usually is quite less. * Therefore, initially allocate a small array and expand as needed later. */ if (dba_bg == greg->dyn.addr->acc_meth) { si->cr_array_size = si->cur_tp_hist_size; si->cr_array = (cache_rec_ptr_ptr_t)malloc(sizeof(cache_rec_ptr_t) * si->cr_array_size); } else { si->cr_array_size = 0; si->cr_array = NULL; } si->fresh_start = TRUE; } else si = csa->sgm_info_ptr; si->gv_cur_region = greg; si->tp_csa = csa; si->tp_csd = csa->hdr; si->start_tn = csa->ti->curr_tn; if (JNL_ALLOWED(csa)) { si->total_jnl_rec_size = csa->min_total_tpjnl_rec_size; /* Reinitialize total_jnl_rec_size */ /* Since the following jnl-mallocs are independent of any dynamically-changeable parameter of the * database, we can as well use the existing malloced jnl structures if at all they exist. */ if (NULL == si->jnl_tail) { si->jnl_tail = &si->jnl_head; si->jnl_list = (buddy_list *)malloc(sizeof(buddy_list)); initialize_list(si->jnl_list, sizeof(jnl_format_buffer), JNL_LIST_INIT_ALLOC); si->format_buff_list = (buddy_list *)malloc(sizeof(buddy_list)); /* Minimum value of elemSize is 8 due to alignment requirements of the returned memory location. * Therefore, we request an elemSize of 8 bytes for the format-buffer and will convert as much * bytes as we need into as many 8-byte multiple segments (see code in jnl_format). */ initialize_list(si->format_buff_list, JFB_ELE_SIZE, DIVIDE_ROUND_UP(JNL_FORMAT_BUFF_INIT_ALLOC, JFB_ELE_SIZE)); } } else if (NULL != si->jnl_tail) { /* journaling is currently disallowed although it was allowed (non-zero si->jnl_tail) * during the prior use of this region. Free up unnecessary region-specific structures now. */ FREEUP_BUDDY_LIST(si->jnl_list); FREEUP_BUDDY_LIST(si->format_buff_list); si->jnl_tail = NULL; } }
bool gtcmtr_increment(void) { cm_region_list *reg_ref; mval incr_delta, post_incr; unsigned char buff[MAX_ZWR_KEY_SZ], *end; unsigned char *ptr, regnum; short n; unsigned short top, len, temp_short; static readonly gds_file_id file; error_def(ERR_KEY2BIG); error_def(ERR_GVIS); error_def(ERR_DBPRIVERR); ptr = curr_entry->clb_ptr->mbf; assert(*ptr == CMMS_Q_INCREMENT); ptr++; GET_USHORT(len, ptr); ptr += SIZEOF(unsigned short); regnum = *ptr++; reg_ref = gtcm_find_region(curr_entry,regnum); len--; /* subtract size of regnum */ CM_GET_GVCURRKEY(ptr, len); gtcm_bind_name(reg_ref->reghead, TRUE); if (gv_cur_region->read_only) rts_error(VARLSTCNT(4) ERR_DBPRIVERR, 2, DB_LEN_STR(gv_cur_region)); if (JNL_ALLOWED(cs_addrs)) { /* we need to copy client's specific prc_vec into the global variable in order that the gvcst* routines * do the right job. actually we need to do this only if JNL_ENABLED(cs_addrs), but since it is not * easy to re-execute the following two assignments in case gvcst_incr's call to t_end encounters a * cdb_sc_jnlstatemod retry code, we choose the easier approach of executing the following segment * if JNL_ALLOWED(cs_addrs) is TRUE instead of checking for JNL_ENABLED(cs_addrs) to be TRUE. * this approach has the overhead that we will be doing the following assignments even though JNL_ENABLED * might not be TRUE but since the following two are just pointer copies, it is not considered a big overhead. * this approach ensures that the jnl_put_jrt_pini gets the appropriate prc_vec for writing into the * journal record in case JNL_ENABLED turns out to be TRUE in t_end time. * note that the value of JNL_ALLOWED(cs_addrs) cannot be changed on the fly without obtaining standalone access * and hence the correctness of prc_vec (whenever it turns out necessary) is guaranteed. */ originator_prc_vec = curr_entry->pvec; cs_addrs->jnl->pini_addr = reg_ref->pini_addr; } GET_USHORT(len, ptr); ptr += SIZEOF(unsigned short); incr_delta.mvtype = MV_STR; incr_delta.str.len = len; incr_delta.str.addr = (char *)ptr; if ((n = gv_currkey->end + 1) > gv_cur_region->max_key_size) { if ((end = format_targ_key(&buff[0], MAX_ZWR_KEY_SZ, gv_currkey, TRUE)) == 0) end = &buff[MAX_ZWR_KEY_SZ - 1]; rts_error(VARLSTCNT(11) ERR_KEY2BIG, 4, n, (int4)gv_cur_region->max_key_size, REG_LEN_STR(gv_cur_region), 0, ERR_GVIS, 2, end - buff, buff); } MV_FORCE_NUMD(&incr_delta); gvcst_incr(&incr_delta, &post_incr); if (JNL_ALLOWED(cs_addrs)) reg_ref->pini_addr = cs_addrs->jnl->pini_addr; /* In case journal switch occurred */ ptr = curr_entry->clb_ptr->mbf; if (MV_DEFINED(&post_incr)) { temp_short = (unsigned short)post_incr.str.len; assert((int4)temp_short == post_incr.str.len); /* ushort <- int4 assignment lossy? */ if (curr_entry->clb_ptr->mbl < 1 + /* msg header */ SIZEOF(temp_short) + /* size of length of $INCR return value */ temp_short) /* length of $INCR return value */ { /* resize buffer */ cmi_realloc_mbf(curr_entry->clb_ptr, 1 + SIZEOF(temp_short) + temp_short); ptr = curr_entry->clb_ptr->mbf; } *ptr++ = CMMS_R_INCREMENT; PUT_USHORT(ptr, temp_short); ptr += SIZEOF(unsigned short); memcpy(ptr, post_incr.str.addr, temp_short); ptr += temp_short; } else
void gtcmd_rundown(connection_struct *cnx, bool clean_exit) { int4 link; cm_region_list *ptr, *last, *que_next, *que_last; cm_region_head *region; uint4 jnl_status; jnl_private_control *jpc; jnl_buffer_ptr_t jbp; int refcnt; boolean_t was_crit; int4 rundown_status = EXIT_NRM; /* if gds_rundown went smoothly */ for (ptr = cnx->region_root; ptr;) { region = ptr->reghead; TP_CHANGE_REG(region->reg); jpc = cs_addrs->jnl; if (ptr->pini_addr && clean_exit && JNL_ENABLED(cs_data) && (NOJNL != jpc->channel)) { was_crit = cs_addrs->now_crit; if (!was_crit) grab_crit(gv_cur_region); if (JNL_ENABLED(cs_data)) { jpc->pini_addr = ptr->pini_addr; SET_GBL_JREC_TIME; /* jnl_ensure_open/jnl_put_jrt_pfin needs this to be set */ jbp = jpc->jnl_buff; /* Before writing to jnlfile, adjust jgbl.gbl_jrec_time if needed to maintain time order * of jnl records. This needs to be done BEFORE the jnl_ensure_open as that could write * journal records (if it decides to switch to a new journal file). */ ADJUST_GBL_JREC_TIME(jgbl, jbp); jnl_status = jnl_ensure_open(); if (0 == jnl_status) { if (0 != jpc->pini_addr) jnl_put_jrt_pfin(cs_addrs); } else send_msg(VARLSTCNT(6) jnl_status, 4, JNL_LEN_STR(cs_data), DB_LEN_STR(gv_cur_region)); } if (!was_crit) rel_crit(gv_cur_region); } refcnt = --region->refcnt; /* Dont know how refcnt can become negative but in pro handle it by bypassing this region. The reason is the * following. refcnt should have originally been a positive value. Every time this function is invoked, it would * be decremented by one. There should have been one invocation that saw refcnt to be zero. That would have * done the rundown of the region or if it is still in the stack the rundown is still in progress. Therefore * it is not a good idea to try running down this region when we see refcnt to be negative (as otherwise we * will get confused and could potentially end up with SIG-11 or ACCVIO errors). The worst case is that we * would not have rundown the region in which case an externally issued MUPIP RUNDOWN would be enough. */ assert(0 <= refcnt); if (0 == refcnt) { /* free up only as little as needed to facilitate structure reuse when the region is opened again */ assert(region->head.fl == region->head.bl); VMS_ONLY(gtcm_ast_avail++); if (JNL_ALLOWED(cs_data)) jpc->pini_addr = 0; UNIX_ONLY(rundown_status |=) gds_rundown(); gd_ht_kill(region->reg_hash, TRUE); /* TRUE to free up the table and the gv_targets it holds too */ FREE_CSA_DIR_TREE(cs_addrs); cm_del_gdr_ptr(gv_cur_region); } que_next = (cm_region_list *)((unsigned char *)ptr + ptr->regque.fl); que_last = (cm_region_list *)((unsigned char *)ptr + ptr->regque.bl); link = (int4)((unsigned char *)que_next - (unsigned char *)que_last); que_last->regque.fl = link; que_next->regque.bl = -link; last = ptr; ptr = ptr->next; free(last); }
void mupip_upgrade(void) { bool rbno; unsigned char *upgrd_buff[2], upgrd_label[GDS_LABEL_SZ]="UPGRADE0304"; char fn[256]; char answer[4]; unsigned short fn_len; int4 fd, save_errno, old_hdr_size, new_hdr_size, status, bufsize, dsize, datasize[2]; int4 old_hdr_size_vbn, new_hdr_size_vbn; int fstat_res; off_t last_full_grp_startoff, old_file_len, old_file_len2, read_off, write_off, old_start_vbn_off; block_id last_full_grp_startblk; v3_sgmnt_data old_head_data, *old_head; sgmnt_data new_head_data, *new_head; struct stat stat_buf; error_def(ERR_MUNODBNAME); error_def(ERR_MUNOUPGRD); error_def(ERR_DBOPNERR); error_def(ERR_DBRDONLY); error_def(ERR_DBFILOPERR); error_def(ERR_DBPREMATEOF); ESTABLISH(mupip_upgrade_ch); fn_len = sizeof(fn); if (!cli_get_str("FILE", fn, &fn_len)) rts_error(VARLSTCNT(1) ERR_MUNODBNAME); if (!(mupip_upgrade_standalone(fn, &upgrade_standalone_sems))) rts_error(VARLSTCNT(1) ERR_MUNOUPGRD); if (-1 == (fd = OPEN(fn, O_RDWR))) { save_errno = errno; if (-1 != (fd = OPEN(fn, O_RDONLY))) { util_out_print("Cannot update read-only database.", FLUSH); rts_error(VARLSTCNT(5) ERR_DBRDONLY, 2, fn_len, fn, errno); } rts_error(VARLSTCNT(5) ERR_DBRDONLY, 2, fn_len, fn, save_errno); } /* Confirm before proceed */ if (!mu_upgrd_confirmed(TRUE)) { util_out_print("Upgrade canceled by user", FLUSH); rts_error(VARLSTCNT(1) ERR_MUNOUPGRD); } util_out_print("Do not interrupt to avoid damage in database!!", FLUSH); util_out_print("Mupip upgrade started ...!/", FLUSH); mu_upgrd_sig_init(); /* get file status */ FSTAT_FILE(fd, &stat_buf, fstat_res); if (-1 == fstat_res) rts_error(VARLSTCNT(5) ERR_DBOPNERR, 2, fn_len, fn, errno); old_file_len = stat_buf.st_size; /* Prepare v3.x file header buffer */ old_hdr_size = sizeof(*old_head); old_head = &old_head_data; /* Prepare v4.x file header buffer */ new_hdr_size = sizeof(*new_head); new_head = &new_head_data; memset(new_head, 0, new_hdr_size); old_hdr_size_vbn = DIVIDE_ROUND_UP(old_hdr_size, DISK_BLOCK_SIZE); new_hdr_size_vbn = DIVIDE_ROUND_UP(new_hdr_size, DISK_BLOCK_SIZE); /* READ header from V3.x file */ LSEEKREAD(fd, 0, old_head, old_hdr_size, status); if (0 != status) if (-1 == status) rts_error(VARLSTCNT(4) ERR_DBPREMATEOF, 2, fn_len, fn); else rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status); /* Check version */ if (memcmp(&old_head->label[0], GDS_LABEL, GDS_LABEL_SZ - 1)) { if (memcmp(&old_head->label[0], GDS_LABEL, GDS_LABEL_SZ - 3)) { /* it is not a GTM database */ close(fd); util_out_print("File !AD is not a GT.M database.!/", FLUSH, fn_len, fn); rts_error(VARLSTCNT(1) ERR_MUNOUPGRD); }else { /* it is GTM database */ /* is it not v3.x database? */ if (memcmp(&old_head->label[GDS_LABEL_SZ - 3],GDS_V30,2) !=0 && memcmp(&old_head->label[GDS_LABEL_SZ - 3],GDS_ALT_V30,2) != 0) { close(fd); util_out_print("File !AD has an unrecognized database version!/", FLUSH, fn_len, fn); rts_error(VARLSTCNT(1) ERR_MUNOUPGRD); } } } else { /* Note: We assume that if the V4.x header and current GT.M file header * has same field names, they are at same offset */ /* READ the header from file again as V4.x header */ LSEEKREAD(fd, 0, new_head, new_hdr_size, status); if (0 != status) if (-1 != status) rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status); else rts_error(VARLSTCNT(4) ERR_DBPREMATEOF, 2, fn_len, fn); if (QWNE(new_head->reg_seqno, seq_num_zero) || QWNE(new_head->resync_seqno, seq_num_zero) || (new_head->resync_tn != 0) || new_head->repl_state != repl_closed) { util_out_print("!AD might already have been upgraded", FLUSH, fn_len, fn); util_out_print("Do you wish to continue with the upgrade? [y/n] ", FLUSH); SCANF("%s", answer); if (answer[0] != 'y' && answer[0] != 'Y') { close(fd); util_out_print("Upgrade canceled by user", FLUSH); rts_error(VARLSTCNT(1) ERR_MUNOUPGRD); } } init_replication(new_head); new_head->max_update_array_size = new_head->max_non_bm_update_array_size = ROUND_UP2(MAX_NON_BITMAP_UPDATE_ARRAY_SIZE(new_head), UPDATE_ARRAY_ALIGN_SIZE); new_head->max_update_array_size += ROUND_UP2(MAX_BITMAP_UPDATE_ARRAY_SIZE, UPDATE_ARRAY_ALIGN_SIZE); new_head->mutex_spin_parms.mutex_hard_spin_count = MUTEX_HARD_SPIN_COUNT; new_head->mutex_spin_parms.mutex_sleep_spin_count = MUTEX_SLEEP_SPIN_COUNT; new_head->mutex_spin_parms.mutex_spin_sleep_mask = MUTEX_SPIN_SLEEP_MASK; new_head->semid = INVALID_SEMID; new_head->shmid = INVALID_SHMID; if (JNL_ALLOWED(new_head)) { /* Following 3 are new fields starting from V43001. * Initialize them appropriately. */ new_head->epoch_interval = DEFAULT_EPOCH_INTERVAL; new_head->alignsize = DISK_BLOCK_SIZE * JNL_DEF_ALIGNSIZE; if (!new_head->jnl_alq) new_head->jnl_alq = JNL_ALLOC_DEF; /* note new_head->jnl_deq is carried over without any change even if it is zero since a zero * jnl file extension size is supported starting V43001 */ new_head->autoswitchlimit = ALIGNED_ROUND_DOWN(JNL_ALLOC_MAX, new_head->jnl_alq, new_head->jnl_deq); /* following field is assumed as non-zero by set_jnl_info starting V43001A */ if (JNL_ALLOWED(new_head) && !new_head->jnl_buffer_size) new_head->jnl_buffer_size = JNL_BUFFER_DEF; } else { new_head->epoch_interval = 0; new_head->alignsize = 0; new_head->autoswitchlimit = 0; } new_head->yield_lmt = DEFAULT_YIELD_LIMIT; /* writing header */ LSEEKWRITE(fd, 0, new_head, new_hdr_size, status); if (0 != status) rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status); close(fd); util_out_print("File !AD successfully upgraded.!/", FLUSH, fn_len, fn); if (0 != sem_rmid(upgrade_standalone_sems)) { util_out_print("Error with sem_rmid : %d [0x%x]", TRUE, upgrade_standalone_sems, upgrade_standalone_sems); rts_error(VARLSTCNT(1) ERR_MUNOUPGRD); } mupip_exit(SS_NORMAL); } util_out_print("Old header size: !SL", FLUSH, old_hdr_size); util_out_print("New header size: !SL", FLUSH, new_hdr_size); if (old_head->createinprogress) { close(fd); util_out_print("Database creation in progress on file !AD.!/", FLUSH, fn_len, fn); rts_error(VARLSTCNT(1) ERR_MUNOUPGRD); } if (old_head->file_corrupt) { close(fd); util_out_print("Database !AD is corrupted.!/", FLUSH, fn_len, fn); rts_error(VARLSTCNT(1) ERR_MUNOUPGRD); } if ((((off_t)old_head->start_vbn - 1) * DISK_BLOCK_SIZE + (off_t)old_head->trans_hist.total_blks * old_head->blk_size + (off_t)DISK_BLOCK_SIZE != old_file_len) && (((off_t)old_head->start_vbn - 1) * DISK_BLOCK_SIZE + (off_t)old_head->trans_hist.total_blks * old_head->blk_size + (off_t)old_head->blk_size != old_file_len)) { util_out_print("Incorrect start_vbn !SL or, block size !SL or, total blocks !SL", FLUSH, old_head->start_vbn, old_head->blk_size, old_head->trans_hist.total_blks); rts_error(VARLSTCNT(1) ERR_MUNOUPGRD); } if (ROUND_DOWN(old_head->blk_size, DISK_BLOCK_SIZE) != old_head->blk_size) { util_out_print("Database block size !SL is not divisible by DISK_BLOCK_SIZE", FLUSH, old_head->blk_size); rts_error(VARLSTCNT(1) ERR_MUNOUPGRD); } mu_upgrd_header(old_head, new_head); /* Update header from v3.x to v4.x */ new_head->start_vbn = new_hdr_size_vbn + 1; new_head->free_space = 0; new_head->wc_blocked_t_end_hist.evnt_cnt = old_head->wc_blocked_t_end_hist2.evnt_cnt; new_head->wc_blocked_t_end_hist.evnt_tn = old_head->wc_blocked_t_end_hist2.evnt_tn; init_replication(new_head); /* A simple way of doing mupip upgrade is to move all the data after file header towards the eof to make space and write down the header. This does not need any computation or, change in data/index blocks. This is a slow process because it has mainly I/O, though no manipulation of database structures. or index blocks. This is okay for small database. A time efficient way is to physically move second group of BLKS_PER_LMAP number of blocks towards the eof and move first group of BLKS_PER_LMAP number of blocks in place of 2nd group. Finally adjust all indices to point to the blocks correctly. Also adjust master bit map. (note: we cannot move first group from the beginning). Detail algorithm as follows: --------------------------- // Allocate two buffers each to hold one group of data. Read v3.x header and upgrade to v4.x if file is big enough read group 1 in buff[0] read_off = offset of starting block of 2nd group. read group 2 in buff[1] write buff[0] at offset read_off last_full_grp_startblk = points to the block where 2nd group of 512 blocks of old file will be written back. //Instead of searching for a free group we will write at the last full group //Say, we have 3000 blocks. last_full_grp_startblk = 2048 // (not 2560, because it is not full) //All data from that point upto eof will be read and saved in buffer read all remaining data from the point last_full_grp_startblk upto eof in buff[0] write buff[1] at the point of last_full_grp_startblk Now write buff[0] at the end of last write //Graphical Example: Each letter corresponds to a group of 512 blocks where first block // is local bit map. Last group U may be a group of less than 512 blocks. // Extend towards right -------------------------------------------------------> // old permutation: [v3 head] A B C D E F G H I J K L M N O P Q R S T U // new permutation: [v4 head ] A C D E F G H I J K L M N O P Q R S T B U Finally traverse the tree and adjust block pointers Adjust master map write new v4.x header at bof else bufsize = size of data for a group rbno = 0 // read buffer no. This switches between 0 and 1 read_off = 0 write_off = 0 upgrd_buff[rbno] = new header data_size[rbno] = new header size rbno = INVERT(rbno); do while not eof data_size[rbno] = MIN(bufsize, remaining_data_size) Read data of size data_size[rbno] in upgrd_buff[rbno] and adjust read_off rbno = INVERT(rbno); Write upgrd_buff[rbno] of datasize[rbno] at write_off and increase write_off Enddo rbno = INVERT(rbno) Write upgrd_buff[rbno] of datasize[rbno] at write_off endif */ bufsize = old_head->blk_size * BLKS_PER_LMAP; upgrd_buff[0] = (unsigned char*) malloc(bufsize); upgrd_buff[1] = (unsigned char*) malloc(bufsize); read_off = old_start_vbn_off = (off_t)(old_head->start_vbn - 1) * DISK_BLOCK_SIZE; /* start vbn offset in bytes */ last_full_grp_startblk = ROUND_DOWN(new_head->trans_hist.total_blks, BLKS_PER_LMAP); /* in block_id */ last_full_grp_startoff = old_start_vbn_off + (off_t)last_full_grp_startblk * new_head->blk_size; /* offset in bytes */ /* this calculation is used because some 3.2x database has GDS blk_size bytes at the end instead of DISK_BLOCK_SIZE bytes. */ old_file_len2 = old_head->start_vbn * DISK_BLOCK_SIZE + (off_t)old_head->blk_size * old_head->trans_hist.total_blks; /* Change Label to a temporary dummy value, so that other GTM process does not come while doing upgrade and corrupts database */ LSEEKWRITE(fd, 0, upgrd_label, GDS_LABEL_SZ - 1, status); if (0 != status) rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status); if (old_head->trans_hist.total_blks > BLKS_PER_LMAP * 2) { /* recalculate start_vbn and free space, because there will be a gap after header */ new_head->start_vbn = old_head->start_vbn + bufsize / DISK_BLOCK_SIZE; new_head->free_space = bufsize - (new_hdr_size_vbn - old_hdr_size_vbn) * DISK_BLOCK_SIZE; util_out_print("New starting VBN is: !SL !/", FLUSH, new_head->start_vbn); /* read 1st group of blocks */ LSEEKREAD(fd, read_off, upgrd_buff[0], bufsize, status); if (0 != status) if (-1 == status) rts_error(VARLSTCNT(4) ERR_DBPREMATEOF, 2, fn_len, fn); else rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status); read_off = read_off + bufsize; /* read 2nd group of blocks */ LSEEKREAD(fd, read_off, upgrd_buff[1], bufsize, status); if (0 != status) if (-1 == status) rts_error(VARLSTCNT(4) ERR_DBPREMATEOF, 2, fn_len, fn); else rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status); /* write 1st group of blocks in place of 2nd group */ write_off = old_start_vbn_off + bufsize; LSEEKWRITE(fd, write_off, upgrd_buff[0], bufsize, status); if (0 != status) rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status); /* read last group (# of blks <= BLKS_PER_LMAP) */ dsize = old_file_len2 - last_full_grp_startoff; assert (dsize <= bufsize); LSEEKREAD(fd, last_full_grp_startoff, upgrd_buff[0], dsize, status); if (0 != status) if (-1 == status) rts_error(VARLSTCNT(4) ERR_DBPREMATEOF, 2, fn_len, fn); else rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status); /* write 2nd group of blocks */ LSEEKWRITE(fd, last_full_grp_startoff, upgrd_buff[1], bufsize, status); if (0 != status) rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status); /* write last group read from old file */ LSEEKWRITE(fd, last_full_grp_startoff + bufsize, upgrd_buff[0], dsize, status); if (0 != status) rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status); util_out_print("Please wait while index is being adjusted...!/", FLUSH); mu_upgrd_adjust_blkptr(1L, TRUE, new_head, fd, fn, fn_len); mu_upgrd_adjust_mm(new_head->master_map, DIVIDE_ROUND_UP(new_head->trans_hist.total_blks+1,BLKS_PER_LMAP)); /* writing header */ LSEEKWRITE(fd, 0, new_head, new_hdr_size, status); if (0 != status) rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status); } else /* very small database */ { rbno = 0; write_off = 0; datasize[rbno] = new_hdr_size; memcpy(upgrd_buff[0], new_head, new_hdr_size); rbno = INVERT(rbno); while(read_off < old_file_len2) { datasize[rbno] = MIN (old_file_len2 - read_off, bufsize); LSEEKREAD(fd, read_off, upgrd_buff[rbno], datasize[rbno], status); if (0 != status) if (-1 == status) rts_error(VARLSTCNT(4) ERR_DBPREMATEOF, 2, fn_len, fn); else rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status); read_off += datasize[rbno]; rbno = INVERT(rbno); LSEEKWRITE(fd, write_off, upgrd_buff[rbno], datasize[rbno], status); if (0 != status) rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status); write_off+= datasize[rbno]; } rbno = INVERT(rbno); LSEEKWRITE(fd, write_off, upgrd_buff[rbno], datasize[rbno], status); if (0 != status) rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status); } /* end if small database */ free(upgrd_buff[0]); free(upgrd_buff[1]); close(fd); util_out_print("File !AD successfully upgraded.!/", FLUSH, fn_len, fn); REVERT; if (0 != sem_rmid(upgrade_standalone_sems)) { util_out_print("Error with sem_rmid : %d [0x%x]", TRUE, upgrade_standalone_sems, upgrade_standalone_sems); rts_error(VARLSTCNT(1) ERR_MUNOUPGRD); } mupip_exit(SS_NORMAL); }
void mucregini(int4 blk_init_size) { int4 status; int4 i; th_index_ptr_t th; collseq *csp; uint4 ustatus; mstr jnlfile, jnldef, tmpjnlfile; time_t ctime; MEMCPY_LIT(cs_data->label, GDS_LABEL); cs_data->desired_db_format = GDSVCURR; cs_data->fully_upgraded = TRUE; cs_data->db_got_to_v5_once = TRUE; /* no V4 format blocks that are non-upgradeable */ cs_data->minor_dbver = GDSMVCURR; cs_data->certified_for_upgrade_to = GDSVCURR; cs_data->creation_db_ver = GDSVCURR; cs_data->creation_mdb_ver = GDSMVCURR; cs_data->master_map_len = MASTER_MAP_SIZE_DFLT; cs_data->bplmap = BLKS_PER_LMAP; assert(BLK_SIZE <= MAX_DB_BLK_SIZE); cs_data->blk_size = BLK_SIZE; i = cs_data->trans_hist.total_blks; cs_data->trans_hist.free_blocks = i - DIVIDE_ROUND_UP(i, BLKS_PER_LMAP) - 2; cs_data->max_rec_size = gv_cur_region->max_rec_size; cs_data->max_key_size = gv_cur_region->max_key_size; cs_data->null_subs = gv_cur_region->null_subs; cs_data->std_null_coll = gv_cur_region->std_null_coll; #ifdef UNIX cs_data->freeze_on_fail = gv_cur_region->freeze_on_fail; cs_data->mumps_can_bypass = gv_cur_region->mumps_can_bypass; #endif cs_data->reserved_bytes = gv_cur_region->dyn.addr->reserved_bytes; cs_data->clustered = FALSE; cs_data->file_corrupt = 0; if (gv_cur_region->dyn.addr->lock_space) cs_data->lock_space_size = gv_cur_region->dyn.addr->lock_space * OS_PAGELET_SIZE; else cs_data->lock_space_size = DEF_LOCK_SIZE; cs_data->staleness[0] = -300000000; /* staleness timer = 30 seconds */ cs_data->staleness[1] = -1; cs_data->ccp_quantum_interval[0] = -20000000; /* 2 sec */ cs_data->ccp_quantum_interval[1] = -1; cs_data->ccp_response_interval[0] = -600000000; /* 1 min */ cs_data->ccp_response_interval[1] = -1; cs_data->ccp_tick_interval[0] = -1000000; /* 1/10 sec */ cs_data->ccp_tick_interval[1] = -1; cs_data->last_com_backup = 1; cs_data->last_inc_backup = 1; cs_data->last_rec_backup = 1; cs_data->defer_time = gv_cur_region->dyn.addr->defer_time; cs_data->jnl_alq = gv_cur_region->jnl_alq; if (cs_data->jnl_state && !cs_data->jnl_alq) cs_data->jnl_alq = JNL_ALLOC_DEF; cs_data->jnl_deq = gv_cur_region->jnl_deq; cs_data->jnl_before_image = gv_cur_region->jnl_before_image; cs_data->jnl_state = gv_cur_region->jnl_state; cs_data->epoch_interval = JNL_ALLOWED(cs_data) ? DEFAULT_EPOCH_INTERVAL : 0; cs_data->alignsize = JNL_ALLOWED(cs_data) ? (DISK_BLOCK_SIZE * JNL_DEF_ALIGNSIZE) : 0; ROUND_UP_JNL_BUFF_SIZE(cs_data->jnl_buffer_size, gv_cur_region->jnl_buffer_size, cs_data); #ifdef UNIX if (JNL_ALLOWED(cs_data)) { if (cs_data->jnl_alq + cs_data->jnl_deq > gv_cur_region->jnl_autoswitchlimit) { cs_data->autoswitchlimit = gv_cur_region->jnl_autoswitchlimit; cs_data->jnl_alq = cs_data->autoswitchlimit; } else cs_data->autoswitchlimit = ALIGNED_ROUND_DOWN(gv_cur_region->jnl_autoswitchlimit, cs_data->jnl_alq, cs_data->jnl_deq); } else cs_data->autoswitchlimit = 0; assert(!(MAX_IO_BLOCK_SIZE % DISK_BLOCK_SIZE)); if (cs_data->jnl_alq + cs_data->jnl_deq > cs_data->autoswitchlimit) cs_data->jnl_alq = cs_data->autoswitchlimit; #else cs_data->autoswitchlimit = JNL_ALLOWED(cs_data) ? ALIGNED_ROUND_DOWN(JNL_ALLOC_MAX, cs_data->jnl_alq, cs_data->jnl_deq) : 0; #endif if (!cs_data->jnl_buffer_size) ROUND_UP_JNL_BUFF_SIZE(cs_data->jnl_buffer_size, JNL_BUFFER_DEF, cs_data); if (JNL_ALLOWED(cs_data)) if (cs_data->jnl_buffer_size < JNL_BUFF_PORT_MIN(cs_data)) { ROUND_UP_MIN_JNL_BUFF_SIZE(cs_data->jnl_buffer_size, cs_data); } else if (cs_data->jnl_buffer_size > JNL_BUFFER_MAX) { ROUND_DOWN_MAX_JNL_BUFF_SIZE(cs_data->jnl_buffer_size, cs_data); } cs_data->def_coll = gv_cur_region->def_coll; if (cs_data->def_coll) { if (csp = ready_collseq((int)(cs_data->def_coll))) { cs_data->def_coll_ver = (csp->version)(cs_data->def_coll); if (!do_verify(csp, cs_data->def_coll, cs_data->def_coll_ver)) { gtm_putmsg(VARLSTCNT(4) ERR_COLLTYPVERSION, 2, cs_data->def_coll, cs_data->def_coll_ver); mupip_exit(ERR_MUNOACTION); } } else { gtm_putmsg(VARLSTCNT(3) ERR_COLLATIONUNDEF, 1, cs_data->def_coll); mupip_exit(ERR_MUNOACTION); } } /* mupip_set_journal() relies on cs_data->jnl_file_len being 0 if cs_data->jnl_state is jnl_notallowed. * Note that even though gv_cur_region->jnl_state is jnl_notallowed, gv_cur_region->jnl_file_len can be non-zero */ cs_data->jnl_file_len = JNL_ALLOWED(cs_data) ? gv_cur_region->jnl_file_len : 0; cs_data->reg_seqno = 1; VMS_ONLY( cs_data->resync_seqno = 1; cs_data->old_resync_seqno = 1; cs_data->resync_tn = 1; )
int gtmsource() { int status, log_init_status, waitpid_res, save_errno; char print_msg[1024], tmpmsg[1024]; gd_region *reg, *region_top; sgmnt_addrs *csa, *repl_csa; boolean_t all_files_open, isalive; pid_t pid, ppid, procgp; seq_num read_jnl_seqno, jnl_seqno; unix_db_info *udi; gtmsource_local_ptr_t gtmsource_local; boolean_t this_side_std_null_coll; int null_fd, rc; memset((uchar_ptr_t)&jnlpool, 0, SIZEOF(jnlpool_addrs)); call_on_signal = gtmsource_sigstop; ESTABLISH_RET(gtmsource_ch, SS_NORMAL); if (-1 == gtmsource_get_opt()) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_MUPCLIERR); if (gtmsource_options.shut_down) { /* Wait till shutdown time nears even before going to "jnlpool_init". This is because the latter will return * with the ftok semaphore and access semaphore held and we do not want to be holding those locks (while * waiting for the user specified timeout to expire) as that will affect new GTM processes and/or other * MUPIP REPLIC commands that need these locks for their function. */ if (0 < gtmsource_options.shutdown_time) { repl_log(stdout, TRUE, TRUE, "Waiting for %d seconds before signalling shutdown\n", gtmsource_options.shutdown_time); LONG_SLEEP(gtmsource_options.shutdown_time); } else repl_log(stdout, TRUE, TRUE, "Signalling shutdown immediate\n"); } else if (gtmsource_options.start) { repl_log(stdout, TRUE, TRUE, "Initiating START of source server for secondary instance [%s]\n", gtmsource_options.secondary_instname); } if (gtmsource_options.activate && (ROOTPRIMARY_SPECIFIED == gtmsource_options.rootprimary)) { /* MUPIP REPLIC -SOURCE -ACTIVATE -UPDOK has been specified. We need to open the gld and db regions now * in case this is a secondary -> primary transition. This is so we can later switch journal files in all * journaled regions when the transition actually happens inside "gtmsource_rootprimary_init". But since * we have not yet done a "jnlpool_init", we dont know if updates are disabled in it or not. Although we * need to do the gld/db open only if updates are currently disabled in the jnlpool, we do this always * because once we do a jnlpool_init, we will come back with the ftok on the jnlpool held and that has * issues with later db open since we will try to hold the db ftok as part of db open and the ftok logic * currently has assumptions that a process holds only one ftok at any point in time. */ assert(NULL == gd_header); gvinit(); all_files_open = region_init(FALSE); if (!all_files_open) { gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_NOTALLDBOPN); gtmsource_exit(ABNORMAL_SHUTDOWN); } } jnlpool_init(GTMSOURCE, gtmsource_options.start, &is_jnlpool_creator); /* is_jnlpool_creator == TRUE ==> this process created the journal pool * is_jnlpool_creator == FALSE ==> journal pool already existed and this process simply attached to it. */ if (gtmsource_options.shut_down) gtmsource_exit(gtmsource_shutdown(FALSE, NORMAL_SHUTDOWN) - NORMAL_SHUTDOWN); else if (gtmsource_options.activate) gtmsource_exit(gtmsource_mode_change(GTMSOURCE_MODE_ACTIVE_REQUESTED) - NORMAL_SHUTDOWN); else if (gtmsource_options.deactivate) gtmsource_exit(gtmsource_mode_change(GTMSOURCE_MODE_PASSIVE_REQUESTED) - NORMAL_SHUTDOWN); else if (gtmsource_options.checkhealth) gtmsource_exit(gtmsource_checkhealth() - NORMAL_SHUTDOWN); else if (gtmsource_options.changelog) gtmsource_exit(gtmsource_changelog() - NORMAL_SHUTDOWN); else if (gtmsource_options.showbacklog) gtmsource_exit(gtmsource_showbacklog() - NORMAL_SHUTDOWN); else if (gtmsource_options.stopsourcefilter) gtmsource_exit(gtmsource_stopfilter() - NORMAL_SHUTDOWN); else if (gtmsource_options.jnlpool) gtmsource_exit(gtmsource_jnlpool() - NORMAL_SHUTDOWN); else if (gtmsource_options.losttncomplete) gtmsource_exit(gtmsource_losttncomplete() - NORMAL_SHUTDOWN); else if (gtmsource_options.needrestart) gtmsource_exit(gtmsource_needrestart() - NORMAL_SHUTDOWN); else if (gtmsource_options.showfreeze) gtmsource_exit(gtmsource_showfreeze() - NORMAL_SHUTDOWN); else if (gtmsource_options.setfreeze) gtmsource_exit(gtmsource_setfreeze() - NORMAL_SHUTDOWN); else if (!gtmsource_options.start) { assert(CLI_PRESENT == cli_present("STATSLOG")); gtmsource_exit(gtmsource_statslog() - NORMAL_SHUTDOWN); } assert(gtmsource_options.start); # ifndef REPL_DEBUG_NOBACKGROUND /* Set "child_server_running" to FALSE before forking off child. Wait for it to be set to TRUE by the child. */ gtmsource_local = jnlpool.gtmsource_local; gtmsource_local->child_server_running = FALSE; FORK(pid); if (0 > pid) { save_errno = errno; rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_JNLPOOLSETUP, 0, ERR_TEXT, 2, RTS_ERROR_LITERAL("Could not fork source server"), save_errno); } else if (0 < pid) { /* Parent. Wait until child sets "child_server_running" to FALSE. That is an indication that the child * source server has completed its initialization phase and is all set so the parent command can return. */ while (isalive = is_proc_alive(pid, 0)) /* note : intended assignment */ { if (gtmsource_local->child_server_running) break; /* To take care of reassignment of PIDs, the while condition should be && with the condition * (PPID of pid == process_id) */ SHORT_SLEEP(GTMSOURCE_WAIT_FOR_SRV_START); WAITPID(pid, &status, WNOHANG, waitpid_res); /* Release defunct child if dead */ } if (isalive) { /* Child process is alive and started with no issues */ if (0 != (save_errno = rel_sem(SOURCE, JNL_POOL_ACCESS_SEM))) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_JNLPOOLSETUP, 0, ERR_TEXT, 2, RTS_ERROR_LITERAL("Error in rel_sem"), save_errno); ftok_sem_release(jnlpool.jnlpool_dummy_reg, TRUE, TRUE); } else { /* Child source server process errored out at startup and is no longer alive. * If we were the one who created the journal pool, let us clean it up. */ repl_log(stdout, TRUE, TRUE, "Source server startup failed. See source server log file\n"); if (is_jnlpool_creator) status = gtmsource_shutdown(TRUE, NORMAL_SHUTDOWN); } /* If the parent is killed (or crashes) between the fork and exit, checkhealth may not detect that startup * is in progress - parent forks and dies, the system will release sem 0 and 1, checkhealth might test the * value of sem 1 before the child grabs sem 1. */ gtmsource_exit(isalive ? SRV_ALIVE : SRV_ERR); } /* Point stdin to /dev/null */ OPENFILE("/dev/null", O_RDONLY, null_fd); if (0 > null_fd) rts_error_csa(CSA_ARG(NULL) ERR_REPLERR, RTS_ERROR_LITERAL("Failed to open /dev/null for read"), errno, 0); FCNTL3(null_fd, F_DUPFD, 0, rc); if (0 > rc) rts_error_csa(CSA_ARG(NULL) ERR_REPLERR, RTS_ERROR_LITERAL("Failed to set stdin to /dev/null"), errno, 0); CLOSEFILE(null_fd, rc); if (0 > rc) rts_error_csa(CSA_ARG(NULL) ERR_REPLERR, RTS_ERROR_LITERAL("Failed to close /dev/null"), errno, 0); /* The parent process (source server startup command) will be holding the ftok semaphore and jnlpool access semaphore * at this point. The variables that indicate this would have been copied over to the child during the fork. This will * make the child think it is actually holding them as well when actually it is not. Reset those variables in the child * to ensure they do not misrepresent the holder of those semaphores. */ ftok_sem_reg = NULL; udi = FILE_INFO(jnlpool.jnlpool_dummy_reg); assert(udi->grabbed_ftok_sem); udi->grabbed_ftok_sem = FALSE; assert(holds_sem[SOURCE][JNL_POOL_ACCESS_SEM]); holds_sem[SOURCE][JNL_POOL_ACCESS_SEM] = FALSE; assert(!holds_sem[SOURCE][SRC_SERV_COUNT_SEM]); /* Start child source server initialization */ is_src_server = TRUE; OPERATOR_LOG_MSG; process_id = getpid(); /* Reinvoke secshr related initialization with the child's pid */ INVOKE_INIT_SECSHR_ADDRS; /* Initialize mutex socket, memory semaphore etc. before any "grab_lock" is done by this process on the journal pool. * Note that the initialization would already have been done by the parent receiver startup command but we need to * redo the initialization with the child process id. */ assert(mutex_per_process_init_pid && (mutex_per_process_init_pid != process_id)); mutex_per_process_init(); START_HEARTBEAT_IF_NEEDED; ppid = getppid(); log_init_status = repl_log_init(REPL_GENERAL_LOG, >msource_log_fd, gtmsource_options.log_file); assert(SS_NORMAL == log_init_status); repl_log_fd2fp(>msource_log_fp, gtmsource_log_fd); if (-1 == (procgp = setsid())) send_msg_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_JNLPOOLSETUP, 0, ERR_TEXT, 2, RTS_ERROR_LITERAL("Source server error in setsid"), errno); # endif /* REPL_DEBUG_NOBACKGROUND */ if (ZLIB_CMPLVL_NONE != gtm_zlib_cmp_level) gtm_zlib_init(); /* Open zlib shared library for compression/decompression */ REPL_DPRINT1("Setting up regions\n"); gvinit(); /* We use the same code dse uses to open all regions but we must make sure they are all open before proceeding. */ all_files_open = region_init(FALSE); if (!all_files_open) { gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_NOTALLDBOPN); gtmsource_exit(ABNORMAL_SHUTDOWN); } /* Determine primary side null subscripts collation order */ /* Also check whether all regions have same null collation order */ this_side_std_null_coll = -1; for (reg = gd_header->regions, region_top = gd_header->regions + gd_header->n_regions; reg < region_top; reg++) { csa = &FILE_INFO(reg)->s_addrs; if (this_side_std_null_coll != csa->hdr->std_null_coll) { if (-1 == this_side_std_null_coll) this_side_std_null_coll = csa->hdr->std_null_coll; else { gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_NULLCOLLDIFF); gtmsource_exit(ABNORMAL_SHUTDOWN); } } if (!REPL_ALLOWED(csa) && JNL_ALLOWED(csa)) { gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_REPLOFFJNLON, 2, DB_LEN_STR(reg)); gtmsource_exit(ABNORMAL_SHUTDOWN); } if (reg->read_only && REPL_ALLOWED(csa)) { gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_JNLPOOLSETUP, 0, ERR_TEXT, 2, RTS_ERROR_LITERAL("Source Server does not have write permissions to one or " "more database files that are replicated")); gtmsource_exit(ABNORMAL_SHUTDOWN); } } /* Initialize source server alive/dead state related fields in "gtmsource_local" before the ftok semaphore is released */ gtmsource_local->gtmsource_pid = process_id; gtmsource_local->gtmsource_state = GTMSOURCE_START; if (is_jnlpool_creator) { DEBUG_ONLY(jnlpool.jnlpool_ctl->jnlpool_creator_pid = process_id); gtmsource_seqno_init(this_side_std_null_coll); if (ROOTPRIMARY_SPECIFIED == gtmsource_options.rootprimary) { /* Created the journal pool as a root primary. Append a history record to the replication instance file. * Invoke the function "gtmsource_rootprimary_init" to do that. */ gtmsource_rootprimary_init(jnlpool.jnlpool_ctl->jnl_seqno); } } /* after this point we can no longer have the case where all the regions are unreplicated/non-journaled. */ # ifndef REPL_DEBUG_NOBACKGROUND /* It is necessary for every process that is using the ftok semaphore to increment the counter by 1. This is used * by the last process that shuts down to delete the ftok semaphore when it notices the counter to be 0. * Note that the parent source server startup command would have done an increment of the ftok counter semaphore * for the replication instance file. But the source server process (the child) that comes here would not have done * that. Do that while the parent is still holding on to the ftok semaphore waiting for our okay. */ if (!ftok_sem_incrcnt(jnlpool.jnlpool_dummy_reg)) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_JNLPOOLSETUP); /* Increment the source server count semaphore */ status = incr_sem(SOURCE, SRC_SERV_COUNT_SEM); if (0 != status) { save_errno = errno; rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_JNLPOOLSETUP, 0, ERR_TEXT, 2, RTS_ERROR_LITERAL("Counter semaphore increment failure in child source server"), save_errno); } # else if (0 != (save_errno = rel_sem_immediate(SOURCE, JNL_POOL_ACCESS_SEM))) { rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_JNLPOOLSETUP, 0, ERR_TEXT, 2, RTS_ERROR_LITERAL("Error in rel_sem_immediate"), save_errno); } # endif /* REPL_DEBUG_NOBACKGROUND */ gtmsource_srv_count++; gtmsource_local->child_server_running = TRUE; /* At this point, the parent startup command will stop waiting for child */ gtm_event_log_init(); /* Log source server startup command line first */ SPRINTF(tmpmsg, "%s %s\n", cli_lex_in_ptr->argv[0], cli_lex_in_ptr->in_str); repl_log(gtmsource_log_fp, TRUE, TRUE, tmpmsg); SPRINTF(tmpmsg, "GTM Replication Source Server with Pid [%d] started for Secondary Instance [%s]", process_id, gtmsource_local->secondary_instname); sgtm_putmsg(print_msg, VARLSTCNT(4) ERR_REPLINFO, 2, LEN_AND_STR(tmpmsg)); repl_log(gtmsource_log_fp, TRUE, TRUE, print_msg); if (is_jnlpool_creator) { repl_log(gtmsource_log_fp, TRUE, TRUE, "Created jnlpool with shmid = [%d] and semid = [%d]\n", jnlpool.repl_inst_filehdr->jnlpool_shmid, jnlpool.repl_inst_filehdr->jnlpool_semid); } else repl_log(gtmsource_log_fp, TRUE, TRUE, "Attached to existing jnlpool with shmid = [%d] and semid = [%d]\n", jnlpool.repl_inst_filehdr->jnlpool_shmid, jnlpool.repl_inst_filehdr->jnlpool_semid); gtm_event_log(GTM_EVENT_LOG_ARGC, "MUPIP", "REPLINFO", print_msg); # ifdef GTM_TLS if (REPL_TLS_REQUESTED) { repl_do_tls_init(gtmsource_log_fp); assert(REPL_TLS_REQUESTED || PLAINTEXT_FALLBACK); } # endif if (jnlpool.jnlpool_ctl->freeze) { last_seen_freeze_flag = jnlpool.jnlpool_ctl->freeze; sgtm_putmsg(print_msg, VARLSTCNT(3) ERR_REPLINSTFROZEN, 1, jnlpool.repl_inst_filehdr->inst_info.this_instname); repl_log(gtmsource_log_fp, TRUE, FALSE, print_msg); sgtm_putmsg(print_msg, VARLSTCNT(3) ERR_REPLINSTFREEZECOMMENT, 1, jnlpool.jnlpool_ctl->freeze_comment); repl_log(gtmsource_log_fp, TRUE, TRUE, print_msg); } gtmsource_local->jnlfileonly = gtmsource_options.jnlfileonly; do { /* If mode is passive, go to sleep. Wakeup every now and then and check to see if I have to become active. */ gtmsource_state = gtmsource_local->gtmsource_state = GTMSOURCE_START; if ((gtmsource_local->mode == GTMSOURCE_MODE_PASSIVE) && (gtmsource_local->shutdown == NO_SHUTDOWN)) { gtmsource_poll_actions(FALSE); SHORT_SLEEP(GTMSOURCE_WAIT_FOR_MODE_CHANGE); continue; } if (GTMSOURCE_MODE_PASSIVE == gtmsource_local->mode) { /* Shutdown initiated */ assert(gtmsource_local->shutdown == SHUTDOWN); sgtm_putmsg(print_msg, VARLSTCNT(4) ERR_REPLINFO, 2, RTS_ERROR_LITERAL("GTM Replication Source Server Shutdown signalled")); repl_log(gtmsource_log_fp, TRUE, TRUE, print_msg); gtm_event_log(GTM_EVENT_LOG_ARGC, "MUPIP", "REPLINFO", print_msg); break; } gtmsource_poll_actions(FALSE); if (GTMSOURCE_CHANGING_MODE == gtmsource_state) continue; if (GTMSOURCE_MODE_ACTIVE_REQUESTED == gtmsource_local->mode) gtmsource_local->mode = GTMSOURCE_MODE_ACTIVE; SPRINTF(tmpmsg, "GTM Replication Source Server now in ACTIVE mode using port %d", gtmsource_local->secondary_port); sgtm_putmsg(print_msg, VARLSTCNT(4) ERR_REPLINFO, 2, LEN_AND_STR(tmpmsg)); repl_log(gtmsource_log_fp, TRUE, TRUE, print_msg); gtm_event_log(GTM_EVENT_LOG_ARGC, "MUPIP", "REPLINFO", print_msg); DEBUG_ONLY(repl_csa = &FILE_INFO(jnlpool.jnlpool_dummy_reg)->s_addrs;) assert(!repl_csa->hold_onto_crit); /* so it is ok to invoke "grab_lock" and "rel_lock" unconditionally */ grab_lock(jnlpool.jnlpool_dummy_reg, TRUE, HANDLE_CONCUR_ONLINE_ROLLBACK); if (GTMSOURCE_HANDLE_ONLN_RLBK == gtmsource_state) { repl_log(gtmsource_log_fp, TRUE, TRUE, "Starting afresh due to ONLINE ROLLBACK\n"); repl_log(gtmsource_log_fp, TRUE, TRUE, "REPL INFO - Current Jnlpool Seqno : %llu\n", jnlpool.jnlpool_ctl->jnl_seqno); continue; } QWASSIGN(gtmsource_local->read_addr, jnlpool.jnlpool_ctl->write_addr); gtmsource_local->read = jnlpool.jnlpool_ctl->write; gtmsource_local->read_state = gtmsource_local->jnlfileonly ? READ_FILE : READ_POOL; read_jnl_seqno = gtmsource_local->read_jnl_seqno; assert(read_jnl_seqno <= jnlpool.jnlpool_ctl->jnl_seqno); if (read_jnl_seqno < jnlpool.jnlpool_ctl->jnl_seqno) { gtmsource_local->read_state = READ_FILE; QWASSIGN(gtmsource_save_read_jnl_seqno, jnlpool.jnlpool_ctl->jnl_seqno); gtmsource_pool2file_transition = TRUE; /* so that we read the latest gener jnl files */ } rel_lock(jnlpool.jnlpool_dummy_reg); if (SS_NORMAL != (status = gtmsource_alloc_tcombuff())) rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_REPLCOMM, 0, ERR_TEXT, 2, RTS_ERROR_LITERAL("Error allocating initial tcom buffer space. Malloc error"), status); gtmsource_filter = NO_FILTER; if ('\0' != gtmsource_local->filter_cmd[0]) { if (SS_NORMAL == (status = repl_filter_init(gtmsource_local->filter_cmd))) gtmsource_filter |= EXTERNAL_FILTER; else gtmsource_exit(ABNORMAL_SHUTDOWN); } gtmsource_process(); /* gtmsource_process returns only when mode needs to be changed to PASSIVE */ assert(gtmsource_state == GTMSOURCE_CHANGING_MODE); gtmsource_ctl_close(); gtmsource_free_msgbuff(); gtmsource_free_tcombuff(); gtmsource_free_filter_buff(); gtmsource_stop_heartbeat(); if (FD_INVALID != gtmsource_sock_fd) repl_close(>msource_sock_fd); if (gtmsource_filter & EXTERNAL_FILTER) repl_stop_filter(); } while (TRUE);
bool gtcmtr_put(void) { cm_region_list *reg_ref; mval v; unsigned char buff[MAX_ZWR_KEY_SZ], *end; unsigned char *ptr, regnum; short n; unsigned short top, len; static readonly gds_file_id file; error_def(ERR_KEY2BIG); error_def(ERR_REC2BIG); error_def(ERR_GVIS); error_def(ERR_DBPRIVERR); ptr = curr_entry->clb_ptr->mbf; assert(*ptr == CMMS_Q_PUT); ptr++; GET_USHORT(len, ptr); ptr += SIZEOF(unsigned short); regnum = *ptr++; reg_ref = gtcm_find_region(curr_entry,regnum); len--; /* subtract size of regnum */ CM_GET_GVCURRKEY(ptr, len); gtcm_bind_name(reg_ref->reghead, TRUE); if (gv_cur_region->read_only) rts_error(VARLSTCNT(4) ERR_DBPRIVERR, 2, DB_LEN_STR(gv_cur_region)); if (JNL_ALLOWED(cs_addrs)) { /* we need to copy client's specific prc_vec into the global variable in order that the gvcst* routines * do the right job. actually we need to do this only if JNL_ENABLED(cs_addrs), but since it is not * easy to re-execute the following two assignments in case gvcst_put()'s call to t_end() encounters a * cdb_sc_jnlstatemod retry code, we choose the easier approach of executing the following segment * if JNL_ALLOWED(cs_addrs) is TRUE instead of checking for JNL_ENABLED(cs_addrs) to be TRUE. * this approach has the overhead that we will be doing the following assignments even though JNL_ENABLED * might not be TRUE but since the following two are just pointer copies, it is not considered a big overhead. * this approach ensures that the jnl_put_jrt_pini() gets the appropriate prc_vec for writing into the * journal record in case JNL_ENABLED turns out to be TRUE in t_end() time. * note that the value of JNL_ALLOWED(cs_addrs) cannot be changed on the fly without obtaining standalone access * and hence the correctness of prc_vec (whenever it turns out necessary) is guaranteed. */ originator_prc_vec = curr_entry->pvec; cs_addrs->jnl->pini_addr = reg_ref->pini_addr; } GET_USHORT(len, ptr); ptr += SIZEOF(unsigned short); v.mvtype = MV_STR; v.str.len = len; v.str.addr = (char *)ptr; if ((n = gv_currkey->end + 1) > gv_cur_region->max_key_size) { if ((end = format_targ_key(&buff[0], MAX_ZWR_KEY_SZ, gv_currkey, TRUE)) == 0) end = &buff[MAX_ZWR_KEY_SZ - 1]; rts_error(VARLSTCNT(11) ERR_KEY2BIG, 4, n, (int4)gv_cur_region->max_key_size, REG_LEN_STR(gv_cur_region), 0, ERR_GVIS, 2, end - buff, buff); } if (n + v.str.len + SIZEOF(rec_hdr) > gv_cur_region->max_rec_size) { if ((end = format_targ_key(&buff[0], MAX_ZWR_KEY_SZ, gv_currkey, TRUE)) == 0) end = &buff[MAX_ZWR_KEY_SZ - 1]; rts_error(VARLSTCNT(10) ERR_REC2BIG, 4, n + v.str.len + SIZEOF(rec_hdr), (int4)gv_cur_region->max_rec_size, REG_LEN_STR(gv_cur_region), ERR_GVIS, 2, end - buff, buff); } gvcst_put(&v); if (JNL_ALLOWED(cs_addrs)) reg_ref->pini_addr = cs_addrs->jnl->pini_addr; /* In case journal switch occurred */ ptr = curr_entry->clb_ptr->mbf; *ptr++ = CMMS_R_PUT; curr_entry->clb_ptr->cbl = S_HDRSIZE; return TRUE; }