static inline void nrf24_ce(uint8_t level) { /* * Make sure the minimum time period has passed since the previous * CE edge for the new edge to be detected. The spec doesn't say * what's the actual CE sampling frequency, but the minimum period * for a rising edge on Tx start to be detected is specified to be * 10us. However this doesn't seem to work in our configuration, a * minimum of ~150-200us is required so we use 200us for a rising * edge and 10us for a falling edge which seems to be enough. * * Falling edges are important because there's no direct transition * between the Tx and Rx states in the nRF24L01+ state diagram, the * Standby-I state is always necessary. If we attempt to go from * Tx (or Rx) to Standby-I and then immediately go to Rx (or Tx) * the low CE period could be too short. */ #ifdef TIMER static uint32_t prev_ce_edge; if (level) while (timer_read() - prev_ce_edge <= DIVIDE_ROUND_UP(F_TMR, 100000)); else while (timer_read() - prev_ce_edge <= DIVIDE_ROUND_UP(F_TMR, 5000)); #else /* This should take at least 10us (rising) or 200us (falling) */ if (level) my_delay(0.01); else my_delay(0.2); #endif digitalWrite(ce_pin, level); #ifdef TIMER prev_ce_edge = timer_read(); #endif }
/* This routine returns whether the free_blocks counter in the file-header is ok (TRUE) or not (FALSE). * If not, it corrects it. This assumes cs_addrs, cs_data and gv_cur_region to point to the region of interest. * It also assumes that the master-map is correct and finds out non-full local bitmaps and counts the number of * free blocks in each of them and sums them up to determine the perceived correct free_blocks count. * The reason why this is ok is that even if the master-map incorrectly reports a local bitmap as full, our new free_blocks * count will effectively make the free space in that local-bitmap invisible and make a gdsfilext necessary and valid. * A later mupip integ will scavenge that invisible space for us. The worst that can therefore happen is that we will transiently * not be using up existing space. But we will always ensure that the free_blocks counter goes in sync with the master-map. */ boolean_t is_free_blks_ctr_ok(void) { boolean_t blk_used; block_id bml, free_bit, free_bml, maxbitsthismap; cache_rec_ptr_t cr; int cycle; sm_uc_ptr_t bmp; unsigned int local_maps, total_blks, free_blocks; error_def(ERR_DBBADFREEBLKCTR); assert(&FILE_INFO(gv_cur_region)->s_addrs == cs_addrs && cs_addrs->hdr == cs_data && cs_addrs->now_crit); total_blks = (dba_mm == cs_data->acc_meth) ? cs_addrs->total_blks : cs_addrs->ti->total_blks; local_maps = DIVIDE_ROUND_UP(total_blks, BLKS_PER_LMAP); for (free_blocks = 0, free_bml = 0; free_bml < local_maps; free_bml++) { bml = bmm_find_free((uint4)free_bml, (sm_uc_ptr_t)MM_ADDR(cs_data), local_maps); if (bml < free_bml) break; free_bml = bml; bml *= BLKS_PER_LMAP; if (!(bmp = t_qread(bml, (sm_int_ptr_t)&cycle, &cr)) || (BM_SIZE(BLKS_PER_LMAP) != ((blk_hdr_ptr_t)bmp)->bsiz) || (LCL_MAP_LEVL != ((blk_hdr_ptr_t)bmp)->levl)) { assert(FALSE); /* In pro, we will simply skip counting this local bitmap. */ continue; } assert(free_bml <= (local_maps - 1)); maxbitsthismap = (free_bml != (local_maps - 1)) ? BLKS_PER_LMAP : total_blks - bml; for (free_bit = 0; free_bit < maxbitsthismap; free_bit++) { free_bit = bm_find_blk(free_bit, (sm_uc_ptr_t)bmp + sizeof(blk_hdr), maxbitsthismap, &blk_used); assert(NO_FREE_SPACE <= free_bit); if (0 > free_bit) break; free_blocks++; } } assert(cs_addrs->ti->free_blocks == free_blocks); if (cs_addrs->ti->free_blocks != free_blocks) { send_msg(VARLSTCNT(6) ERR_DBBADFREEBLKCTR, 4, DB_LEN_STR(gv_cur_region), cs_addrs->ti->free_blocks, free_blocks); cs_addrs->ti->free_blocks = free_blocks; return FALSE; } return TRUE; }
STATICFNDEF int extend_wait_for_fallocate(unix_db_info *udi, off_t new_size) { int to_wait, to_msg, wait_period, save_errno; /* Attempt to fallocate every second, and send message to operator every 1/20 of cs_data->wait_disk_space */ wait_period = to_wait = DIVIDE_ROUND_UP(cs_data->wait_disk_space, CDB_STAGNATE + 1); to_msg = (to_wait / 8) ? (to_wait / 8) : 1; /* send around 8 messages during 1 wait_period */ do { if ((to_wait == cs_data->wait_disk_space) || (to_wait % to_msg == 0)) ISSUE_WAITDSKSPACE(to_wait, wait_period, send_msg_csa); hiber_start(1000); to_wait--; save_errno = posix_fallocate(udi->fd, 0, new_size); } while ((to_wait > 0) && (ENOSPC == save_errno)); return save_errno; }
STATICFNDEF int extend_wait_for_write(unix_db_info *udi, int blk_size, off_t new_eof) { int to_wait, to_msg, wait_period, save_errno; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; /* Attempt to write every second, and send message to operator every 1/20 of cs_data->wait_disk_space */ wait_period = to_wait = DIVIDE_ROUND_UP(cs_data->wait_disk_space, CDB_STAGNATE + 1); to_msg = (to_wait / 8) ? (to_wait / 8) : 1; /* send around 8 messages during 1 wait_period */ do { if ((to_wait == cs_data->wait_disk_space) || (to_wait % to_msg == 0)) ISSUE_WAITDSKSPACE(to_wait, wait_period, send_msg_csa); hiber_start(1000); to_wait--; save_errno = db_write_eof_block(udi, udi->fd, blk_size, new_eof, &(TREF(dio_buff))); } while ((to_wait > 0) && (ENOSPC == save_errno)); return save_errno; }
int4 disk_block_available(int fd, GTM_BAVAIL_TYPE *ret, boolean_t fill_unix_holes) { struct stat fstat_buf; struct statvfs fstatvfs_buf; int status; FSTATVFS_FILE(fd, &fstatvfs_buf, status); if (-1 == status) return errno; *ret = (GTM_BAVAIL_TYPE)((fstatvfs_buf.f_frsize / DISK_BLOCK_SIZE) * fstatvfs_buf.f_bavail); if (fill_unix_holes) { FSTAT_FILE(fd, &fstat_buf, status); if (-1 == status) return errno; *ret -= (GTM_BAVAIL_TYPE)(DEV_BSIZE / DISK_BLOCK_SIZE * (DIVIDE_ROUND_UP(fstat_buf.st_size, DEV_BSIZE) - fstat_buf.st_blocks)); } return 0; }
int jnl_file_extend(jnl_private_control *jpc, uint4 total_jnl_rec_size) { file_control *fc; boolean_t need_extend; jnl_buffer_ptr_t jb; jnl_create_info jnl_info; jnl_file_header header; uint4 new_alq; sgmnt_addrs *csa; sgmnt_data_ptr_t csd; char prev_jnl_fn[JNL_NAME_SIZE]; uint4 jnl_status = 0, status; int new_blocks, result; GTM_BAVAIL_TYPE avail_blocks; uint4 aligned_tot_jrec_size, count; switch(jpc->region->dyn.addr->acc_meth) { case dba_mm: case dba_bg: csa = &FILE_INFO(jpc->region)->s_addrs; break; default: GTMASSERT; } csd = csa->hdr; assert(csa == cs_addrs && csd == cs_data); assert(csa->now_crit || (csd->clustered && (CCST_CLOSED == csa->nl->ccp_state))); assert(jpc->region == gv_cur_region); assert(csa->jnl_state == csd->jnl_state); if (!JNL_ENABLED(csa) || (NOJNL == jpc->channel) || (JNL_FILE_SWITCHED(jpc))) GTMASSERT; /* crit and messing with the journal file - how could it have vanished? */ if (!csd->jnl_deq) { assert(DIVIDE_ROUND_UP(total_jnl_rec_size, DISK_BLOCK_SIZE) <= csd->jnl_alq); assert(csd->jnl_alq == csd->autoswitchlimit); new_blocks = csd->jnl_alq; } else /* May cause extension of csd->jnl_deq * n blocks where n > 0 */ new_blocks = ROUND_UP(DIVIDE_ROUND_UP(total_jnl_rec_size, DISK_BLOCK_SIZE), csd->jnl_deq); jpc->status = SS_NORMAL; jb = jpc->jnl_buff; assert(0 <= new_blocks); DEBUG_ONLY(count = 0); for (need_extend = (0 != new_blocks); need_extend; ) { DEBUG_ONLY(count++); /* usually we will do the loop just once where we do the file extension. * rarely we might need to do an autoswitch instead after which again rarely * we might need to do an extension on the new journal to fit in the transaction's journal requirements. * therefore we should do this loop a maximum of twice. hence the assert below. */ assert(count <= 2); need_extend = FALSE; if (SS_NORMAL == (status = disk_block_available(jpc->channel, &avail_blocks, TRUE))) { if ((new_blocks * EXTEND_WARNING_FACTOR) > avail_blocks) { if (new_blocks > avail_blocks) { /* if we cannot satisfy the request, it is an error */ send_msg(VARLSTCNT(6) ERR_NOSPACEEXT, 4, JNL_LEN_STR(csd), new_blocks, avail_blocks); new_blocks = 0; jpc->status = SS_NORMAL; break; } else send_msg(VARLSTCNT(5) ERR_DSKSPACEFLOW, 3, JNL_LEN_STR(csd), (avail_blocks - new_blocks)); } } else send_msg(VARLSTCNT(5) ERR_JNLFILEXTERR, 2, JNL_LEN_STR(csd), status); new_alq = jb->filesize + new_blocks; /* ensure current journal file size is well within autoswitchlimit --> design constraint */ assert(csd->autoswitchlimit >= jb->filesize); if (csd->autoswitchlimit < (jb->filesize + (EXTEND_WARNING_FACTOR * new_blocks))) /* close to max */ send_msg(VARLSTCNT(5) ERR_JNLSPACELOW, 3, JNL_LEN_STR(csd), csd->autoswitchlimit - jb->filesize); if (csd->autoswitchlimit < new_alq) { /* Reached max, need to autoswitch */ /* Ensure new journal file can hold the entire current transaction's journal record requirements */ assert(csd->autoswitchlimit >= MAX_REQD_JNL_FILE_SIZE(total_jnl_rec_size)); memset(&jnl_info, 0, sizeof(jnl_info)); jnl_info.prev_jnl = &prev_jnl_fn[0]; set_jnl_info(gv_cur_region, &jnl_info); assert(JNL_ENABLED(csa) && (NOJNL != jpc->channel) && !(JNL_FILE_SWITCHED(jpc))); jnl_status = jnl_ensure_open(); if (0 == jnl_status) { /* flush the cache and jnl-buffer-contents to current journal file before * switching to a new journal. */ wcs_flu(WCSFLU_FLUSH_HDR | WCSFLU_WRITE_EPOCH); jnl_file_close(gv_cur_region, TRUE, TRUE); } else rts_error(VARLSTCNT(7) jnl_status, 4, JNL_LEN_STR(csd), DB_LEN_STR(gv_cur_region), jpc->status); assert(!jgbl.forw_phase_recovery || (NULL != jgbl.mur_pini_addr_reset_fnptr)); if (jgbl.forw_phase_recovery && (NULL != jgbl.mur_pini_addr_reset_fnptr)) (*jgbl.mur_pini_addr_reset_fnptr)(); assert(!jnl_info.no_rename); assert(!jnl_info.no_prev_link); if (EXIT_NRM == cre_jnl_file(&jnl_info)) { assert(0 == memcmp(csd->jnl_file_name, jnl_info.jnl, jnl_info.jnl_len)); assert(csd->jnl_file_name[jnl_info.jnl_len] == '\0'); assert(csd->jnl_file_len == jnl_info.jnl_len); assert(csd->jnl_buffer_size == jnl_info.buffer); assert(csd->jnl_alq == jnl_info.alloc); assert(csd->jnl_deq == jnl_info.extend); assert(csd->jnl_before_image == jnl_info.before_images); csd->trans_hist.header_open_tn = jnl_info.tn; /* needed for successful jnl_file_open() */ send_msg(VARLSTCNT(4) ERR_NEWJNLFILECREATE, 2, JNL_LEN_STR(csd)); fc = gv_cur_region->dyn.addr->file_cntl; fc->op = FC_WRITE; fc->op_buff = (sm_uc_ptr_t)csd; status = dbfilop(fc); if (SS_NORMAL != status) send_msg(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region), status); assert(JNL_ENABLED(csa)); /* call jnl_ensure_open instead of jnl_file_open to make sure jpc->pini_addr is set to 0 */ jnl_status = jnl_ensure_open(); /* sets jpc->status */ if (0 != jnl_status) rts_error(VARLSTCNT(6) jnl_status, 4, JNL_LEN_STR(csd), DB_LEN_STR(gv_cur_region)); assert(jb->filesize == csd->jnl_alq); aligned_tot_jrec_size = ALIGNED_ROUND_UP(MAX_REQD_JNL_FILE_SIZE(total_jnl_rec_size), csd->jnl_alq, csd->jnl_deq); if (aligned_tot_jrec_size > csd->jnl_alq) { /* need to extend more than initial allocation in the new journal file * to accommodate the current transaction. */ new_blocks = aligned_tot_jrec_size - csd->jnl_alq; assert(new_blocks); assert(0 == new_blocks % csd->jnl_deq); need_extend = TRUE; } } else { send_msg(VARLSTCNT(4) ERR_JNLCREATERR, 2, JNL_LEN_STR(csd)); jpc->status = ERR_JNLNOCREATE; new_blocks = -1; } } else { assert(!need_extend); /* ensure we won't go through the for loop again */ /* Virtually extend currently used journal file */ jb->filesize = new_alq; /* Actually this is virtual file size blocks */ DO_FILE_READ(jpc->channel, 0, &header, JNL_HDR_LEN, jpc->status, jpc->status2); if (SS_NORMAL != jpc->status) rts_error(VARLSTCNT(5) ERR_JNLRDERR, 2, JNL_LEN_STR(csd), jpc->status); assert((header.virtual_size + new_blocks) == new_alq); header.virtual_size = new_alq; DO_FILE_WRITE(jpc->channel, 0, &header, JNL_HDR_LEN, jpc->status, jpc->status2); if (SS_NORMAL != jpc->status) rts_error(VARLSTCNT(5) ERR_JNLWRERR, 2, JNL_LEN_STR(csd), jpc->status); } if (0 >= new_blocks) break; } if (0 >= new_blocks) { jpc->status = ERR_JNLREADEOF; jnl_file_lost(jpc, ERR_JNLEXTEND); new_blocks = -1; } return new_blocks; }
unsigned char mu_cre_file(void) { unsigned char *inadr[2], *c, exit_stat; enum db_acc_method temp_acc_meth; uint4 lcnt, retadr[2]; int4 blk_init_size, initial_alq, free_blocks; gtm_uint64_t free_blocks_ll, blocks_for_extension; char buff[GLO_NAME_MAXLEN], fn_buff[MAX_FN_LEN]; unsigned int status; int free_space; struct FAB *fcb; struct NAM nam; gds_file_id new_id; io_status_block_disk iosb; char node[16]; short len; struct { short blen; short code; char *buf; short *len; int4 terminator; } item = {15, SYI$_NODENAME, &node, &len, 0}; $DESCRIPTOR(desc, buff); exit_stat = EXIT_NRM; /* The following calculations should duplicate the BT_SIZE macro from GDSBT and the LOCK_BLOCK macro from GDSFHEAD.H, * but without using a sgmnt_data which is not yet set up at this point */ #ifdef GT_CX_DEF /* This section needs serious chnages for the fileheader changes in V5 if it is ever resurrected */ over_head = DIVIDE_ROUND_UP(SIZEOF_FILE_HDR_DFLT + (WC_MAX_BUFFS + getprime(WC_MAX_BUFFS) + 1) * SIZEOF(bt_rec), DISK_BLOCK_SIZE); if (gv_cur_region->dyn.addr->acc_meth == dba_bg) { free_space = over_head - DIVIDE_ROUND_UP(SIZEOF_FILE_HDR_DFLT + (gv_cur_region->dyn.addr->global_buffers + getprime(gv_cur_region->dyn.addr->global_buffers) + 1) * SIZEOF(bt_rec), DISK_BLOCK_SIZE); over_head += gv_cur_region->dyn.addr->lock_space ? gv_cur_region->dyn.addr->lock_space : DEF_LOCK_SIZE / OS_PAGELET_SIZE; } else if (gv_cur_region->dyn.addr->acc_meth == dba_mm) { free_space = over_head - DIVIDE_ROUND_UP(SIZEOF_FILE_HDR_DFLT, DISK_BLOCK_SIZE); if (gv_cur_region->dyn.addr->lock_space) { over_head += gv_cur_region->dyn.addr->lock_space; free_space += gv_cur_region->dyn.addr->lock_space; } else { over_head += DEF_LOCK_SIZE / OS_PAGELET_SIZE; free_space += DEF_LOCK_SIZE / OS_PAGELET_SIZE; } } free_space *= DISK_BLOCK_SIZE; #else assert(START_VBN_CURRENT > DIVIDE_ROUND_UP(SIZEOF_FILE_HDR_DFLT, DISK_BLOCK_SIZE)); free_space = ((START_VBN_CURRENT - 1) * DISK_BLOCK_SIZE) - SIZEOF_FILE_HDR_DFLT; #endif switch (gv_cur_region->dyn.addr->acc_meth) { case dba_bg: case dba_mm: mu_cre_vms_structs(gv_cur_region); fcb = ((vms_gds_info *)(gv_cur_region->dyn.addr->file_cntl->file_info))->fab; cs_addrs = &((vms_gds_info *)(gv_cur_region->dyn.addr->file_cntl->file_info))->s_addrs; fcb->fab$b_shr &= FAB$M_NIL; /* No access to this file while it is created */ fcb->fab$l_nam = &nam; nam = cc$rms_nam; /* There are (bplmap - 1) non-bitmap blocks per bitmap, so add (bplmap - 2) to number of non-bitmap blocks * and divide by (bplmap - 1) to get total number of bitmaps for expanded database. (must round up in this * manner as every non-bitmap block must have an associated bitmap) */ fcb->fab$l_alq += DIVIDE_ROUND_UP(fcb->fab$l_alq, BLKS_PER_LMAP - 1); /* Bitmaps */ blk_init_size = fcb->fab$l_alq; fcb->fab$l_alq *= BLK_SIZE / DISK_BLOCK_SIZE; fcb->fab$l_alq += START_VBN_CURRENT - 1; initial_alq = fcb->fab$l_alq; fcb->fab$w_mrs = 512; /* no longer a relevent field to us */ break; case dba_usr: util_out_print("Database file for region !AD not created; access method is not GDS.", TRUE, REG_LEN_STR(gv_cur_region)); return EXIT_WRN; default: gtm_putmsg(VARLSTCNT(1) ERR_BADACCMTHD); return EXIT_ERR; } nam.nam$b_ess = SIZEOF(fn_buff); nam.nam$l_esa = fn_buff; nam.nam$b_nop |= NAM$M_SYNCHK; status = sys$parse(fcb, 0, 0); if (RMS$_NORMAL != status) { gtm_putmsg(VARLSTCNT(8) ERR_DBFILERR, 2, fcb->fab$b_fns, fcb->fab$l_fna, status, 0, fcb->fab$l_stv, 0); return EXIT_ERR; } if (nam.nam$b_node != 0) { status = sys$getsyiw(EFN$C_ENF, 0, 0, &item, &iosb, 0, 0); if (SS$_NORMAL == status) status = iosb.cond; if (SS$_NORMAL == status) { if (len == nam.nam$b_node-2 && !memcmp(nam.nam$l_esa, node, len)) { fcb->fab$l_fna = nam.nam$l_esa + nam.nam$b_node; fcb->fab$b_fns = nam.nam$b_esl - nam.nam$b_node; } } else { util_out_print("Could not get node for !AD.", TRUE, REG_LEN_STR(gv_cur_region)); exit_stat = EXIT_WRN; } } assert(gv_cur_region->dyn.addr->acc_meth == dba_bg || gv_cur_region->dyn.addr->acc_meth == dba_mm); nam.nam$l_esa = NULL; nam.nam$b_esl = 0; status = sys$create(fcb); if (status != RMS$_CREATED && status != RMS$_FILEPURGED) { switch(status) { case RMS$_FLK: util_out_print("Database file for region !AD not created; currently locked by another user.", TRUE, REG_LEN_STR(gv_cur_region)); exit_stat = EXIT_INF; break; case RMS$_NORMAL: util_out_print("Database file for region !AD not created; already exists.", TRUE, REG_LEN_STR(gv_cur_region)); exit_stat = EXIT_INF; break; case RMS$_SUPPORT: util_out_print("Database file for region !AD not created; cannot create across network.", TRUE, REG_LEN_STR(gv_cur_region)); exit_stat = EXIT_WRN; break; case RMS$_FUL: send_msg(VARLSTCNT(8) ERR_DBFILERR, 2, fcb->fab$b_fns, fcb->fab$l_fna, status, 0, fcb->fab$l_stv, 0); /* intentionally falling through */ default: gtm_putmsg(VARLSTCNT(8) ERR_DBFILERR, 2, fcb->fab$b_fns, fcb->fab$l_fna, status, 0, fcb->fab$l_stv, 0); exit_stat = EXIT_ERR; } sys$dassgn(fcb->fab$l_stv); return exit_stat; } memcpy(new_id.dvi, nam.nam$t_dvi, SIZEOF(nam.nam$t_dvi)); memcpy(new_id.did, nam.nam$w_did, SIZEOF(nam.nam$w_did)); memcpy(new_id.fid, nam.nam$w_fid, SIZEOF(nam.nam$w_fid)); global_name("GT$S", &new_id, buff); /* 2nd parm is actually a gds_file_id * in global_name */ desc.dsc$w_length = buff[0]; /* By definition, a gds_file_id is dvi,fid,did from nam */ desc.dsc$a_pointer = &buff[1]; cs_addrs->db_addrs[0] = cs_addrs->db_addrs[1] = inadr[0] = inadr[1] = inadr; /* used to determine p0 or p1 allocation */ status = init_sec(cs_addrs->db_addrs, &desc, fcb->fab$l_stv, (START_VBN_CURRENT - 1), SEC$M_DZRO|SEC$M_GBL|SEC$M_WRT|SEC$M_EXPREG); if ((SS$_CREATED != status) && (SS$_NORMAL != status)) { gtm_putmsg(VARLSTCNT(8) ERR_DBFILERR, 2, fcb->fab$b_fns, fcb->fab$l_fna, status, 0, fcb->fab$l_stv, 0); sys$dassgn(fcb->fab$l_stv); return EXIT_ERR; } cs_data = (sgmnt_data *)cs_addrs->db_addrs[0]; memset(cs_data, 0, SIZEOF_FILE_HDR_DFLT); cs_data->createinprogress = TRUE; cs_data->trans_hist.total_blks = (initial_alq - (START_VBN_CURRENT - 1)) / (BLK_SIZE / DISK_BLOCK_SIZE); /* assert that total_blks stored in file-header = non-bitmap blocks (initial allocation) + bitmap blocks */ assert(cs_data->trans_hist.total_blks == gv_cur_region->dyn.addr->allocation + DIVIDE_ROUND_UP(gv_cur_region->dyn.addr->allocation, BLKS_PER_LMAP - 1)); cs_data->start_vbn = START_VBN_CURRENT; temp_acc_meth = gv_cur_region->dyn.addr->acc_meth; cs_data->acc_meth = gv_cur_region->dyn.addr->acc_meth = dba_bg; cs_data->extension_size = gv_cur_region->dyn.addr->ext_blk_count; mucregini(blk_init_size); cs_addrs->hdr->free_space = free_space; #ifndef GT_CX_DEF cs_addrs->hdr->unbacked_cache = TRUE; #endif cs_data->acc_meth = gv_cur_region->dyn.addr->acc_meth = temp_acc_meth; cs_data->createinprogress = FALSE; if (SS$_NORMAL == (status = disk_block_available(fcb->fab$l_stv, &free_blocks))) { blocks_for_extension = (cs_data->blk_size / DISK_BLOCK_SIZE * (DIVIDE_ROUND_UP(EXTEND_WARNING_FACTOR * (gtm_uint64_t)cs_data->extension_size, BLKS_PER_LMAP - 1) + EXTEND_WARNING_FACTOR * (gtm_uint64_t)cs_data->extension_size)); if ((gtm_uint64_t)free_blocks < blocks_for_extension) { free_blocks_ll = (gtm_uint64_t)free_blocks; gtm_putmsg(VARLSTCNT(8) ERR_LOWSPACECRE, 6, fcb->fab$b_fns, fcb->fab$l_fna, EXTEND_WARNING_FACTOR, &blocks_for_extension, DISK_BLOCK_SIZE, &free_blocks_ll); send_msg(VARLSTCNT(8) ERR_LOWSPACECRE, 6, fcb->fab$b_fns, fcb->fab$l_fna, EXTEND_WARNING_FACTOR, &blocks_for_extension, DISK_BLOCK_SIZE, &free_blocks_ll); } } if (SS$_NORMAL == (status = sys$updsec(((vms_gds_info *)(gv_cur_region->dyn.addr->file_cntl->file_info))->s_addrs.db_addrs, NULL, PSL$C_USER, 0, efn_immed_wait, &iosb, NULL, 0))) { status = sys$synch(efn_immed_wait, &iosb); if (SS$_NORMAL == status) status = iosb.cond; } else if (SS$_NOTMODIFIED == status) status = SS$_NORMAL; if (SS$_NORMAL == status) status = del_sec(SEC$M_GBL, &desc, 0); if (SS$_NORMAL == status) status = sys$deltva(cs_addrs->db_addrs, retadr, PSL$C_USER); if (SS$_NORMAL == status) status = sys$dassgn(fcb->fab$l_stv); if (SS$_NORMAL == status) { util_out_print("Database file for region !AD created.", TRUE, REG_LEN_STR(gv_cur_region)); /* the open and close are an attempt to ensure that the file is available, not under the control of an ACP, * before MUPIP exits */ fcb->fab$b_shr = FAB$M_SHRPUT | FAB$M_SHRGET | FAB$M_UPI; fcb->fab$l_fop = 0; for (lcnt = 1; (60 * MAX_OPEN_RETRY) >= lcnt; lcnt++) { /* per VMS engineering a delay is expected. We will wait up to an hour as a * Delete Global Section operation is essentially and inherently asynchronous in nature * and could take an arbitrary amount of time. */ if (RMS$_FLK != (status = sys$open(fcb, NULL, NULL))) break; wcs_sleep(lcnt); } assert(RMS$_NORMAL == status); if (RMS$_NORMAL == status) { status = sys$close(fcb); assert(RMS$_NORMAL == status); } if (RMS$_NORMAL != status) exit_stat = EXIT_WRN; } else exit_stat = EXIT_ERR; if (RMS$_NORMAL != status) gtm_putmsg(VARLSTCNT(8) ERR_DBFILERR, 2, fcb->fab$b_fns, fcb->fab$l_fna, status, 0, fcb->fab$l_stv, 0); if ((MAX_RMS_RECORDSIZE - SIZEOF(shmpool_blk_hdr)) < cs_data->blk_size) gtm_putmsg(VARLSTCNT(5) ERR_MUNOSTRMBKUP, 3, fcb->fab$b_fns, fcb->fab$l_fna, 32 * 1024 - DISK_BLOCK_SIZE); return exit_stat; }
int gtmsource_get_opt(void) { char *connect_parm_token_str, *connect_parm; char *connect_parms_str, tmp_connect_parms_str[GTMSOURCE_CONN_PARMS_LEN + 1]; char secondary_sys[MAX_SECONDARY_LEN], *c, inst_name[MAX_FN_LEN + 1]; char statslog_val[SIZEOF("OFF")]; /* "ON" or "OFF" */ char update_val[SIZEOF("DISABLE")]; /* "ENABLE" or "DISABLE" */ char freeze_val[SIZEOF("OFF")]; /* "ON" or "OFF" */ char freeze_comment[SIZEOF(gtmsource_options.freeze_comment)]; int tries, index = 0, timeout_status, connect_parms_index, status, renegotiate_interval; struct hostent *sec_hostentry; unsigned short log_file_len, filter_cmd_len; unsigned short secondary_len, inst_name_len, statslog_val_len, update_val_len, connect_parms_str_len; unsigned short freeze_val_len, freeze_comment_len, tlsid_len; int errcode; int port_len; char *ip_end; mstr log_nam, trans_name; boolean_t secondary, dotted_notation, log, log_interval_specified, connect_parms_badval, plaintext_fallback; memset((char *)>msource_options, 0, SIZEOF(gtmsource_options)); gtmsource_options.start = (CLI_PRESENT == cli_present("START")); gtmsource_options.shut_down = (CLI_PRESENT == cli_present("SHUTDOWN")); gtmsource_options.activate = (CLI_PRESENT == cli_present("ACTIVATE")); gtmsource_options.deactivate = (CLI_PRESENT == cli_present("DEACTIVATE")); gtmsource_options.checkhealth = (CLI_PRESENT == cli_present("CHECKHEALTH")); gtmsource_options.statslog = (CLI_PRESENT == cli_present("STATSLOG")); gtmsource_options.showbacklog = (CLI_PRESENT == cli_present("SHOWBACKLOG")); gtmsource_options.changelog = (CLI_PRESENT == cli_present("CHANGELOG")); gtmsource_options.stopsourcefilter = (CLI_PRESENT == cli_present("STOPSOURCEFILTER")); gtmsource_options.needrestart = (CLI_PRESENT == cli_present("NEEDRESTART")); gtmsource_options.losttncomplete = (CLI_PRESENT == cli_present("LOSTTNCOMPLETE")); gtmsource_options.jnlpool = (CLI_PRESENT == cli_present("JNLPOOL")); secondary = (CLI_PRESENT == cli_present("SECONDARY")); gtmsource_options.rootprimary = ROOTPRIMARY_UNSPECIFIED; /* to indicate unspecified state */ if ((CLI_PRESENT == cli_present("ROOTPRIMARY")) || (CLI_PRESENT == cli_present("UPDOK"))) gtmsource_options.rootprimary = ROOTPRIMARY_SPECIFIED; else if ((CLI_PRESENT == cli_present("PROPAGATEPRIMARY")) || (CLI_PRESENT == cli_present("UPDNOTOK"))) gtmsource_options.rootprimary = PROPAGATEPRIMARY_SPECIFIED; else { /* Neither ROOTPRIMARY (or UPDOK) nor PROPAGATEPRIMARY (or UPDNOTOK) specified. Assume default values. * Assume ROOTPRIMARY for -START -SECONDARY (active source server start) and -ACTIVATE commands. * Assume PROPAGATEPRIMARY for -START -PASSIVE (passive source server start) and -DEACTIVATE commands. */ if ((gtmsource_options.start && secondary) || gtmsource_options.activate) gtmsource_options.rootprimary = ROOTPRIMARY_SPECIFIED; if ((gtmsource_options.start && !secondary) || gtmsource_options.deactivate) gtmsource_options.rootprimary = PROPAGATEPRIMARY_SPECIFIED; } gtmsource_options.instsecondary = (CLI_PRESENT == cli_present("INSTSECONDARY")); if (gtmsource_options.instsecondary) { /* -INSTSECONDARY is specified in the command line. */ inst_name_len = SIZEOF(inst_name);; if (!cli_get_str("INSTSECONDARY", &inst_name[0], &inst_name_len)) { util_out_print("Error parsing INSTSECONDARY qualifier", TRUE); return(-1); } } else { /* Check if environment variable "gtm_repl_instsecondary" is defined. * Do that only if any of the following qualifiers is present as these are the only ones that honour it. * Mandatory : START, ACTIVATE, DEACTIVATE, STOPSOURCEFILTER, CHANGELOG, STATSLOG, NEEDRESTART, * Optional : CHECKHEALTH, SHOWBACKLOG or SHUTDOWN */ if (gtmsource_options.start || gtmsource_options.activate || gtmsource_options.deactivate || gtmsource_options.stopsourcefilter || gtmsource_options.changelog || gtmsource_options.statslog || gtmsource_options.needrestart || gtmsource_options.checkhealth || gtmsource_options.showbacklog || gtmsource_options.shut_down) { log_nam.addr = GTM_REPL_INSTSECONDARY; log_nam.len = SIZEOF(GTM_REPL_INSTSECONDARY) - 1; trans_name.addr = &inst_name[0]; if (SS_NORMAL == (status = TRANS_LOG_NAME(&log_nam, &trans_name, inst_name, SIZEOF(inst_name), do_sendmsg_on_log2long))) { gtmsource_options.instsecondary = TRUE; inst_name_len = trans_name.len; } else if (!gtmsource_options.checkhealth && !gtmsource_options.showbacklog && !gtmsource_options.shut_down) { if (SS_LOG2LONG == status) gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(5) ERR_LOGTOOLONG, 3, log_nam.len, log_nam.addr, SIZEOF(inst_name) - 1); gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_REPLINSTSECUNDF); return (-1); } } } if (gtmsource_options.instsecondary) { /* Secondary instance name specified either through -INSTSECONDARY or "gtm_repl_instsecondary" */ inst_name[inst_name_len] = '\0'; if ((MAX_INSTNAME_LEN <= inst_name_len) || (0 == inst_name_len)) { gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_REPLINSTSECLEN, 2, inst_name_len, inst_name); return (-1); } assert((inst_name_len + 1) <= MAX_INSTNAME_LEN); memcpy(gtmsource_options.secondary_instname, inst_name, inst_name_len + 1); /* copy terminating '\0' as well */ } if (gtmsource_options.start || gtmsource_options.activate) { if (secondary) { secondary_len = MAX_SECONDARY_LEN; if (!cli_get_str("SECONDARY", secondary_sys, &secondary_len)) { util_out_print("Error parsing SECONDARY qualifier", TRUE); return(-1); } /* Parse secondary_sys into secondary_host * and secondary_port */ c = secondary_sys; dotted_notation = TRUE; if ('[' == *c) { ip_end = strchr(++c, ']'); if (NULL == ip_end || 0 == (index = ip_end - c)) { util_out_print("Invalid IP address !AD", TRUE, LEN_AND_STR(secondary_sys)); return(-1); } memcpy(gtmsource_options.secondary_host, c, index); gtmsource_options.secondary_host[index] = '\0'; c = ip_end + 1; } else { while(*c && (':' != *c)) gtmsource_options.secondary_host[index++] = *c++; gtmsource_options.secondary_host[index] = '\0'; } if (':' != *c) { util_out_print("Secondary port number should be specified", TRUE); return(-1); } port_len = strlen(++c); errno = 0; if (((0 == (gtmsource_options.secondary_port = ATOI(c))) && (0 != errno)) || (0 >= gtmsource_options.secondary_port)) { util_out_print("Error parsing secondary port number !AD", TRUE, LEN_AND_STR(c)); return(-1); } } if (CLI_PRESENT == cli_present("CONNECTPARAMS")) { connect_parms_str_len = GTMSOURCE_CONN_PARMS_LEN + 1; if (!cli_get_str("CONNECTPARAMS", tmp_connect_parms_str, &connect_parms_str_len)) { util_out_print("Error parsing CONNECTPARAMS qualifier", TRUE); return(-1); } #ifdef VMS /* strip the quotes around the string. (DCL doesn't do it) */ assert('"' == tmp_connect_parms_str[0]); assert('"' == tmp_connect_parms_str[connect_parms_str_len - 1]); connect_parms_str = &tmp_connect_parms_str[1]; tmp_connect_parms_str[connect_parms_str_len - 1] = '\0'; #else connect_parms_str = &tmp_connect_parms_str[0]; #endif for (connect_parms_index = GTMSOURCE_CONN_HARD_TRIES_COUNT, connect_parms_badval = FALSE, connect_parm_token_str = connect_parms_str; !connect_parms_badval && connect_parms_index < GTMSOURCE_CONN_PARMS_COUNT && (connect_parm = strtok(connect_parm_token_str, GTMSOURCE_CONN_PARMS_DELIM)) != NULL; connect_parms_index++, connect_parm_token_str = NULL) { errno = 0; if ((0 == (gtmsource_options.connect_parms[connect_parms_index] = ATOI(connect_parm)) && 0 != errno) || 0 >= gtmsource_options.connect_parms[connect_parms_index]) connect_parms_badval = TRUE; } if (connect_parms_badval) { util_out_print("Error parsing or invalid value parameter in CONNECTPARAMS", TRUE); return(-1); } if (GTMSOURCE_CONN_PARMS_COUNT != connect_parms_index) { util_out_print( "All CONNECTPARAMS - HARD TRIES, HARD TRIES PERIOD, " "SOFT TRIES PERIOD, " "ALERT TIME, HEARTBEAT INTERVAL, " "MAX HEARBEAT WAIT should be specified", TRUE); return(-1); } } else { gtmsource_options.connect_parms[GTMSOURCE_CONN_HARD_TRIES_COUNT] = REPL_CONN_HARD_TRIES_COUNT; gtmsource_options.connect_parms[GTMSOURCE_CONN_HARD_TRIES_PERIOD] = REPL_CONN_HARD_TRIES_PERIOD; gtmsource_options.connect_parms[GTMSOURCE_CONN_SOFT_TRIES_PERIOD] = REPL_CONN_SOFT_TRIES_PERIOD; gtmsource_options.connect_parms[GTMSOURCE_CONN_ALERT_PERIOD] = REPL_CONN_ALERT_ALERT_PERIOD; gtmsource_options.connect_parms[GTMSOURCE_CONN_HEARTBEAT_PERIOD] = REPL_CONN_HEARTBEAT_PERIOD; gtmsource_options.connect_parms[GTMSOURCE_CONN_HEARTBEAT_MAX_WAIT] = REPL_CONN_HEARTBEAT_MAX_WAIT; } if (gtmsource_options.connect_parms[GTMSOURCE_CONN_ALERT_PERIOD]< gtmsource_options.connect_parms[GTMSOURCE_CONN_SOFT_TRIES_PERIOD]) gtmsource_options.connect_parms[GTMSOURCE_CONN_ALERT_PERIOD] = gtmsource_options.connect_parms[GTMSOURCE_CONN_SOFT_TRIES_PERIOD]; if (gtmsource_options.connect_parms[GTMSOURCE_CONN_HEARTBEAT_MAX_WAIT] < gtmsource_options.connect_parms[GTMSOURCE_CONN_HEARTBEAT_PERIOD]) gtmsource_options.connect_parms[GTMSOURCE_CONN_HEARTBEAT_MAX_WAIT] = gtmsource_options.connect_parms[GTMSOURCE_CONN_HEARTBEAT_PERIOD]; } if (gtmsource_options.start || gtmsource_options.statslog || gtmsource_options.changelog || gtmsource_options.activate) { log = (cli_present("LOG") == CLI_PRESENT); log_interval_specified = (CLI_PRESENT == cli_present("LOG_INTERVAL")); if (log) { log_file_len = MAX_FN_LEN + 1; if (!cli_get_str("LOG", gtmsource_options.log_file, &log_file_len)) { util_out_print("Error parsing LOG qualifier", TRUE); return(-1); } } else gtmsource_options.log_file[0] = '\0'; gtmsource_options.src_log_interval = 0; if (log_interval_specified) { if (!cli_get_num("LOG_INTERVAL", (int4 *)>msource_options.src_log_interval)) { util_out_print("Error parsing LOG_INTERVAL qualifier", TRUE); return (-1); } } if (gtmsource_options.start && 0 == gtmsource_options.src_log_interval) gtmsource_options.src_log_interval = LOGTRNUM_INTERVAL; /* For changelog/activate, interval == 0 implies don't change log interval already established */ /* We ignore interval specification for statslog, Vinaya 2005/02/07 */ } if (gtmsource_options.start) { assert(secondary || CLI_PRESENT == cli_present("PASSIVE")); gtmsource_options.mode = ((secondary) ? GTMSOURCE_MODE_ACTIVE : GTMSOURCE_MODE_PASSIVE); if (CLI_PRESENT == cli_present("BUFFSIZE")) { if (!cli_get_int("BUFFSIZE", >msource_options.buffsize)) { util_out_print("Error parsing BUFFSIZE qualifier", TRUE); return(-1); } if (MIN_JNLPOOL_SIZE > gtmsource_options.buffsize) gtmsource_options.buffsize = MIN_JNLPOOL_SIZE; } else gtmsource_options.buffsize = DEFAULT_JNLPOOL_SIZE; /* Round up buffsize to the nearest (~JNL_WRT_END_MASK + 1) multiple */ gtmsource_options.buffsize = ((gtmsource_options.buffsize + ~JNL_WRT_END_MASK) & JNL_WRT_END_MASK); if (CLI_PRESENT == cli_present("FILTER")) { filter_cmd_len = MAX_FILTER_CMD_LEN; if (!cli_get_str("FILTER", gtmsource_options.filter_cmd, &filter_cmd_len)) { util_out_print("Error parsing FILTER qualifier", TRUE); return(-1); } } else gtmsource_options.filter_cmd[0] = '\0'; /* Check if compression level is specified */ if (CLI_PRESENT == cli_present("CMPLVL")) { if (!cli_get_int("CMPLVL", >msource_options.cmplvl)) { util_out_print("Error parsing CMPLVL qualifier", TRUE); return(-1); } if (GTM_CMPLVL_OUT_OF_RANGE(gtmsource_options.cmplvl)) gtmsource_options.cmplvl = ZLIB_CMPLVL_MIN; /* no compression in this case */ /* CMPLVL qualifier should override any value specified in the environment variable gtm_zlib_cmp_level */ gtm_zlib_cmp_level = gtmsource_options.cmplvl; } else gtmsource_options.cmplvl = ZLIB_CMPLVL_MIN; /* no compression in this case */ /* Check if SSL/TLS secure communication is requested. */ # ifdef GTM_TLS if (CLI_PRESENT == cli_present("TLSID")) { tlsid_len = MAX_TLSID_LEN; if (!cli_get_str("TLSID", repl_tls.id, &tlsid_len)) { util_out_print("Error parsing TLSID qualifier", TRUE); return -1; } assert(0 < tlsid_len); if (CLI_PRESENT == cli_present("RENEGOTIATE_INTERVAL")) { if (!cli_get_int("RENEGOTIATE_INTERVAL", &renegotiate_interval)) { util_out_print("Error parsing RENEGOTIATE_INTERVAL qualifier", TRUE); return -1; } if (0 > renegotiate_interval) { util_out_print("Negative values are not allowed for RENEGOTIATE_INTERVAL qualifier", TRUE); return -1; } else if ((0 < renegotiate_interval) && (renegotiate_interval < MIN_RENEGOTIATE_TIMEOUT)) renegotiate_interval = MIN_RENEGOTIATE_TIMEOUT; renegotiate_interval = renegotiate_interval * 60; /* Convert to seconds. */ } else renegotiate_interval = DEFAULT_RENEGOTIATE_TIMEOUT * 60; /* Convert to seconds. */ /* Convert renegotiate_interval to heartbeat units (# of 8 second intervals). */ renegotiate_interval = DIVIDE_ROUND_UP(renegotiate_interval, HEARTBEAT_INTERVAL_IN_SECS); gtmsource_options.renegotiate_interval = renegotiate_interval; /* Check if plaintext-fallback mode is specified. Default option is NOPLAINTEXTFALLBACK. */ if (CLI_PRESENT == (plaintext_fallback = cli_present("PLAINTEXTFALLBACK"))) repl_tls.plaintext_fallback = (plaintext_fallback != CLI_NEGATED); else repl_tls.plaintext_fallback = FALSE; } # endif } if (gtmsource_options.shut_down) { if ((timeout_status = cli_present("TIMEOUT")) == CLI_PRESENT) { if (!cli_get_int("TIMEOUT", >msource_options.shutdown_time)) { util_out_print("Error parsing TIMEOUT qualifier", TRUE); return(-1); } if (DEFAULT_SHUTDOWN_TIMEOUT < gtmsource_options.shutdown_time || 0 > gtmsource_options.shutdown_time) { gtmsource_options.shutdown_time = DEFAULT_SHUTDOWN_TIMEOUT; util_out_print("shutdown TIMEOUT changed to !UL", TRUE, gtmsource_options.shutdown_time); } } else if (CLI_NEGATED == timeout_status) gtmsource_options.shutdown_time = -1; else /* TIMEOUT not specified */ gtmsource_options.shutdown_time = DEFAULT_SHUTDOWN_TIMEOUT; } if (gtmsource_options.statslog) { statslog_val_len = 4; /* max(strlen("ON"), strlen("OFF")) + 1 */ if (!cli_get_str("STATSLOG", statslog_val, &statslog_val_len)) { util_out_print("Error parsing STATSLOG qualifier", TRUE); return(-1); } UNIX_ONLY(cli_strupper(statslog_val);) if (0 == STRCMP(statslog_val, "ON"))
short iorm_open(io_log_name *iol, mval *pp, int fd, mval *mspace, int4 timeout) { int4 status; io_desc *iod; /* local pointer to io_curr_device */ d_rm_struct *d_rm; struct XABFHC xabfhc; struct XABPRO xabpro; struct RAB *r; struct FAB *f; struct NAM *nam; mstr newtln; struct dsc$descriptor_s devname, outname; uint4 width; uint4 acebin[128]; /* needs to be big enough for any other ACLs on file */ uint4 *acebinptr; struct acedef *aceptr; char *acetop; boolean_t acefound = FALSE, created = FALSE, isdisk = FALSE, noacl = FALSE; unsigned int devclass, devchar, devchar2, devtype, dvistat, iosb[2]; short devclassret, devcharret, devchar2ret, devtyperet; struct { item_list_3 item[4]; int terminator; } item_list; unsigned char resultant_name[255]; unsigned char tmpfdns; /** unsigned char resultant_name[MAX_TRANS_NAME_LEN]; THIS WOULD BE RIGHT BUT MAX_TRANS_NAME_LEN MUST BE <= 255 **/ /* while sr_unix/iorm_open.c prefixes errors with ERR_DEVOPENFAIL and it might be nice to be consistent */ /* changing VMS after all this time could break user programs */ /* An exception is being made for the extremely unlikely problem creating a GTM ACE so it stands out */ iod = iol->iod; assert(*(pp->str.addr) < n_iops); assert(iod); assert(iod->state >= 0 && iod->state < n_io_dev_states); assert(rm == iod->type); if (dev_never_opened == iod->state) { iod->dev_sp = (d_rm_struct *)(malloc(SIZEOF(d_rm_struct))); d_rm = (d_rm_struct *)iod->dev_sp; memset(d_rm, 0, SIZEOF(*d_rm)); iod->width = DEF_RM_WIDTH; iod->length = DEF_RM_LENGTH; r = &d_rm->r; f = &d_rm->f; *r = cc$rms_rab; *f = cc$rms_fab; r->rab$l_fab = f; r->rab$w_usz = d_rm->l_usz = DEF_RM_WIDTH; f->fab$w_mrs = d_rm->l_mrs = DEF_RM_WIDTH; f->fab$b_rfm = d_rm->b_rfm = FAB$C_VAR; /* default is variable record format */ f->fab$l_fop = FAB$M_CIF | FAB$M_SQO | FAB$M_CBT | FAB$M_NAM; f->fab$b_fac = FAB$M_GET | FAB$M_PUT | FAB$M_TRN; /* TRN allows truncate option to be specified in RAB later */ f->fab$b_rat = FAB$M_CR; f->fab$l_dna = DFLT_FILE_EXT; f->fab$b_dns = SIZEOF(DFLT_FILE_EXT) - 1; d_rm->f.fab$l_nam = nam = malloc(SIZEOF(*nam)); *nam = cc$rms_nam; nam->nam$l_esa = resultant_name; nam->nam$b_ess = SIZEOF(resultant_name); nam->nam$b_nop = NAM$M_NOCONCEAL; r->rab$l_rop = RAB$M_TMO | RAB$M_WBH | RAB$M_RAH; d_rm->promask = 0xFFFF; } else { d_rm = (d_rm_struct *)iod->dev_sp; if (dev_closed == iod->state) d_rm->f.fab$w_bls = 0; /* Reset the block size to pass the block-record check below. * The FAB initialization sets the block size later so it's OK to zero it here. */ nam = d_rm->f.fab$l_nam; nam->nam$l_esa = 0; nam->nam$b_ess = 0; nam->nam$b_esl = 0; } iorm_use(iod, pp); if (dev_open != iod->state) { if (!d_rm->largerecord && (d_rm->f.fab$w_bls > 0) && (FAB$C_FIX != d_rm->f.fab$b_rfm) && (d_rm->f.fab$w_bls < (d_rm->r.rab$w_usz + VREC_HDR_LEN))) rts_error(VARLSTCNT(1) ERR_VARRECBLKSZ); d_rm->r.rab$l_ctx = FAB$M_GET; d_rm->f.fab$l_fna = iol->dollar_io; d_rm->f.fab$b_fns = iol->len; /* smw next overrides any xab set by iorm_use */ xabpro = cc$rms_xabpro; d_rm->f.fab$l_xab = &xabpro; memset(acebin, 0, SIZEOF(acebin)); status = sys$parse(&d_rm->f); /* to get device for getdvi */ if ((1 & status)) { devname.dsc$w_length = nam->nam$b_dev; devname.dsc$a_pointer = nam->nam$l_dev; devname.dsc$b_dtype = DSC$K_DTYPE_T; devname.dsc$b_class = DSC$K_CLASS_S; item_list.item[0].item_code = DVI$_DEVCLASS; item_list.item[0].buffer_length = SIZEOF(devclass); item_list.item[0].buffer_address = &devclass; item_list.item[0].return_length_address = &devclassret; item_list.item[1].item_code = DVI$_DEVCHAR; item_list.item[1].buffer_length = SIZEOF(devchar); item_list.item[1].buffer_address = &devchar; item_list.item[1].return_length_address = &devcharret; item_list.item[2].item_code = DVI$_DEVCHAR2; item_list.item[2].buffer_length = SIZEOF(devchar2); item_list.item[2].buffer_address = &devchar2; item_list.item[2].return_length_address = &devchar2ret; item_list.item[3].item_code = DVI$_DEVTYPE; item_list.item[3].buffer_length = SIZEOF(devtype); item_list.item[3].buffer_address = &devtype; item_list.item[3].return_length_address = &devtyperet; item_list.terminator = 0; dvistat = sys$getdviw(EFN$C_ENF, NULL, &devname, &item_list, iosb, NULL, 0, 0); if (SS$_NORMAL == dvistat) dvistat = iosb[0]; if (SS$_NONLOCAL == dvistat || (SS$_NORMAL == dvistat && ((DC$_DISK != devclass || (DEV$M_NET & devchar) || (DEV$M_DAP | DEV$M_DFS) & devchar2) || /* UCX NFS sets DFS */ (DT$_FD1 <= devtype && DT$_FD8 >= devtype) ))) /* but not tcpware so check foreign disk */ { /* if not disk, dfs/nfs, or non local, create gets BADATTRIB in stv if acl buf and siz set */ noacl = TRUE; } } else /* let create/open report the problem */ noacl = TRUE; if (DEV$M_NET & d_rm->f.fab$l_dev) { /* need to release sys$parse channel if DECnet */ tmpfdns = d_rm->f.fab$b_dns; d_rm->f.fab$b_dns = 0; assert(0 == nam->nam$l_rlf); nam->nam$l_rlf = 0; nam->nam$b_nop |= NAM$M_SYNCHK; status = sys$parse(&d_rm->f); /* give up channel */ d_rm->f.fab$b_dns = tmpfdns; /* restore */ nam->nam$b_nop &= ~NAM$M_SYNCHK; } if (noacl) { if (d_rm->largerecord && MAX_RMS_RECORDSIZE < d_rm->l_mrs) rts_error(VARLSTCNT(1) ERR_RMWIDTHTOOBIG); d_rm->largerecord = FALSE; } if (d_rm->largerecord && FAB$M_GET != d_rm->f.fab$b_fac) { /* if readonly use format from existing file */ aceptr = acebin; aceptr->ace$b_size = GTM_ACE_SIZE * SIZEOF(uint4); aceptr->ace$b_type = ACE$C_INFO; /* without NOPROPAGATE, new versions will get ACE, PROTECTED prevents set acl /dele unless =all */ aceptr->ace$w_flags = ACE$M_NOPROPAGATE | ACE$M_PROTECTED; /* if HIDDEN, dir/sec does not display which may make it harder to check if problems aceptr->ace$w_flags |= ACE$M_HIDDEN; */ aceptr->ace$v_info_type = ACE$C_CUST; /* must be after flags */ aceptr->ace$w_application_facility = GTM_ACE_FAC; /* GTM error fac */ aceptr->ace$w_application_flags = GTM_ACE_BIGREC; assert(SIZEOF(uint4) * GTM_ACE_LAB_OFF == (&aceptr->ace$t_info_start - (char *)aceptr)); acebin[GTM_ACE_LAB_OFF] = GTM_ACE_LABEL; acebin[GTM_ACE_RFM_OFF] = d_rm->b_rfm; acebin[GTM_ACE_MRS_OFF] = d_rm->l_mrs; acebin[GTM_ACE_SIZE] = 0; /* terminate */ d_rm->f.fab$b_rfm = FAB$C_UDF; d_rm->f.fab$w_mrs = 0; } if (!noacl) { /* tape gets BADATTRIB in stv if acl buf and siz set */ xabpro.xab$l_aclbuf = acebin; xabpro.xab$w_aclsiz = SIZEOF(acebin); } if (FAB$M_GET == d_rm->f.fab$b_fac) { xabfhc = cc$rms_xabfhc; xabpro.xab$l_nxt = &xabfhc; status = sys$open(&d_rm->f); } else { xabpro.xab$w_pro = d_rm->promask; status = sys$create(&d_rm->f); } nam->nam$l_esa = 0; nam->nam$b_ess = 0; nam->nam$b_esl = 0; d_rm->f.fab$l_xab = 0; switch (status) { case RMS$_NORMAL: if (d_rm->f.fab$l_fop & FAB$M_MXV) created = iod->dollar.zeof = TRUE; break; case RMS$_CRE_STM: case RMS$_CREATED: case RMS$_SUPERSEDE: case RMS$_FILEPURGED: if (d_rm->f.fab$l_dev & DEV$M_FOD) created = iod->dollar.zeof = TRUE; break; case RMS$_ACT: case RMS$_FLK: return(FALSE); default: rts_error(VARLSTCNT(2) status, d_rm->f.fab$l_stv); } if (!noacl && (DEV$M_RND & d_rm->f.fab$l_dev) && !(DEV$M_NET & d_rm->f.fab$l_dev)) isdisk = TRUE; /* local disk */ else if (created && d_rm->largerecord && MAX_RMS_RECORDSIZE < d_rm->l_mrs) rts_error(VARLSTCNT(1) ERR_RMWIDTHTOOBIG); /* $create does not return the ACE: if a new file is created aclsts is IVACL */ /* if CIF and existing file has no acl aclsts ACLEMPTY */ /* if CIF and existing file has acl aclsts is NORMAL */ if (isdisk && ((created && SS$_IVACL == xabpro.xab$l_aclsts) || (0 != xabpro.xab$l_aclsts && (FAB$M_GET != d_rm->f.fab$b_fac && ((1 & xabpro.xab$l_aclsts) || SS$_ACLEMPTY != xabpro.xab$l_aclsts))))) { xabpro.xab$l_aclctx = 0; /* reset context */ d_rm->f.fab$l_xab = &xabpro; status = sys$display(&d_rm->f); d_rm->f.fab$l_xab = 0; /* prevent close error */ if (!(1 & status)) rts_error(VARLSTCNT(2) status, d_rm->f.fab$l_stv); if (0 != xabpro.xab$l_aclsts && !(1 & xabpro.xab$l_aclsts) && SS$_ACLEMPTY != xabpro.xab$l_aclsts) rts_error(VARLSTCNT(1) xabpro.xab$l_aclsts); } if (isdisk && (1 & status) && 0 != xabpro.xab$l_aclsts && !(1 & xabpro.xab$l_aclsts) && SS$_ACLEMPTY != xabpro.xab$l_aclsts) rts_error(VARLSTCNT(1) xabpro.xab$l_aclsts); if (isdisk && 0 != xabpro.xab$w_acllen && (1 & status)) /* acl and success */ { if (SIZEOF(acebin) < xabpro.xab$w_acllen) { /* get a new buffer big enough */ xabpro.xab$l_aclbuf = malloc(xabpro.xab$w_acllen); xabpro.xab$w_aclsiz = xabpro.xab$w_acllen; xabpro.xab$l_aclctx = 0; /* reset context */ d_rm->f.fab$l_xab = &xabpro; status = sys$display(&d_rm->f); d_rm->f.fab$l_xab = 0; if (!(1 & status)) rts_error(VARLSTCNT(2) status, d_rm->f.fab$l_stv); if (!(1 & xabpro.xab$l_aclsts)) rts_error(VARLSTCNT(1) xabpro.xab$l_aclsts); } acetop = (char *)xabpro.xab$l_aclbuf + xabpro.xab$w_acllen; for (aceptr = xabpro.xab$l_aclbuf; aceptr < acetop; aceptr = (char *)aceptr + aceptr->ace$b_size) { if (0 == aceptr->ace$b_size) break; if (ACE$C_INFO == aceptr->ace$b_type && ACE$C_CUST == aceptr->ace$v_info_type && GTM_ACE_FAC == aceptr->ace$w_application_facility && GTM_ACE_BIGREC == aceptr->ace$w_application_flags) { /* info for large records */ acebinptr = aceptr; assert(GTM_ACE_LABEL == acebinptr[GTM_ACE_LAB_OFF]); d_rm->largerecord = TRUE; d_rm->b_rfm = (unsigned char)acebinptr[GTM_ACE_RFM_OFF]; d_rm->l_mrs = acebinptr[GTM_ACE_MRS_OFF]; acefound = TRUE; break; } } if (acebin != xabpro.xab$l_aclbuf) { /* free larger buffer now */ free(xabpro.xab$l_aclbuf); xabpro.xab$l_aclbuf = acebin; xabpro.xab$w_aclsiz = SIZEOF(acebin); } } if (!acefound) { if (!created) { /* copy from exisiting file */ if (isdisk && d_rm->largerecord && FAB$C_UDF == d_rm->f.fab$b_rfm) rts_error(VARLSTCNT(1) ERR_BIGNOACL); /* maybe lost in copy */ d_rm->b_rfm = d_rm->f.fab$b_rfm; d_rm->l_mrs = d_rm->f.fab$w_mrs; } else if (isdisk && d_rm->largerecord) rts_error(VARLSTCNT(8) ERR_DEVOPENFAIL, 2, iol->len, iol->dollar_io, ERR_TEXT, 2, LEN_AND_LIT("GTM ACE on new file disappeared - possible VMS problem")); d_rm->largerecord = FALSE; } /* smw does next overwriting of mrs make sense to RMS */ /* if not largerecord, read only, sequential, not magtape ... */ if (!d_rm->largerecord && (FAB$M_GET == d_rm->f.fab$b_fac) && (0 == d_rm->f.fab$w_mrs) && xabfhc.xab$w_lrl) d_rm->l_mrs = d_rm->l_usz = d_rm->r.rab$w_usz = d_rm->f.fab$w_mrs = xabfhc.xab$w_lrl; if (d_rm->largerecord) { /* guess at a good blocks per IO */ uint4 blocksperrec; blocksperrec = DIVIDE_ROUND_UP(d_rm->l_mrs, RMS_DISK_BLOCK); if (RMS_MAX_MBC <= blocksperrec * 2) d_rm->r.rab$b_mbc = RMS_MAX_MBC; else if (RMS_DEF_MBC < blocksperrec * 2) d_rm->r.rab$b_mbc = blocksperrec * 2; } status = sys$connect(&d_rm->r); if (RMS$_NORMAL != status) rts_error(VARLSTCNT(2) status, d_rm->r.rab$l_stv); if (d_rm->r.rab$l_rop & RAB$M_EOF) iod->dollar.zeof = TRUE; if (ESC == iod->trans_name->dollar_io[0]) { /* process permanent file...get real name */ status = sys$display(&d_rm->f); if (status & 1) { devname.dsc$w_length = nam->nam$t_dvi[0]; devname.dsc$b_dtype = DSC$K_DTYPE_T; devname.dsc$b_class = DSC$K_CLASS_S; devname.dsc$a_pointer = &nam->nam$t_dvi[1]; outname.dsc$w_length = SIZEOF(resultant_name); outname.dsc$b_dtype = DSC$K_DTYPE_T; outname.dsc$b_class = DSC$K_CLASS_S; outname.dsc$a_pointer = resultant_name; status = lib$fid_to_name(&devname, &nam->nam$w_fid, & outname, &newtln.len, 0, 0); if ((status & 1) && (0 != newtln.len)) { newtln.addr = resultant_name; iod->trans_name = get_log_name(&newtln, INSERT); iod->trans_name->iod = iod; } } } else { /* smw since esl zeroed above this is dead code since early days */ if (nam->nam$b_esl && (iod->trans_name->len != nam->nam$b_esl || memcmp(&iod->trans_name->dollar_io[0], resultant_name, nam->nam$b_esl))) { newtln.addr = resultant_name; newtln.len = nam->nam$b_esl; iod->trans_name = get_log_name(&newtln, INSERT); iod->trans_name->iod = iod; } } if (0 == d_rm->l_mrs) d_rm->l_mrs = iod->width; iod->width = d_rm->l_usz = d_rm->l_mrs; if (!d_rm->largerecord) { d_rm->r.rab$w_usz = d_rm->f.fab$w_mrs = d_rm->l_mrs; if (FAB$C_VFC == d_rm->f.fab$b_rfm) /* have to leave two bytes for the fixed control */ iod->width = MIN(iod->width, VFC_MAX_RECLEN); } width = iod->width; if (d_rm->largerecord) { width = ROUND_UP(width, SIZEOF(uint4)); if (FAB$C_VAR == d_rm->b_rfm) width += SIZEOF(uint4); /* for count */ } d_rm->bufsize = width + 1; d_rm->inbuf = (char*)malloc(width + 1); d_rm->outbuf_start = (char*)malloc(width + 1); d_rm->inbuf_pos = d_rm->inbuf; d_rm->inbuf_top = d_rm->inbuf + iod->width; d_rm->outbuf_pos = d_rm->outbuf = d_rm->outbuf_start + (d_rm->largerecord && FAB$C_VAR == d_rm->b_rfm ? SIZEOF(uint4) : 0); d_rm->outbuf_top = d_rm->outbuf + iod->width; d_rm->promask = xabpro.xab$w_pro; iod->state = dev_open; } return TRUE; }
uint4 mur_process_intrpt_recov() { jnl_ctl_list *jctl, *last_jctl; reg_ctl_list *rctl, *rctl_top; int rename_fn_len, save_name_len; char prev_jnl_fn[MAX_FN_LEN + 1], rename_fn[MAX_FN_LEN + 1], save_name[MAX_FN_LEN + 1]; jnl_create_info jnl_info; uint4 status, status2; uint4 max_autoswitchlimit, max_jnl_alq, max_jnl_deq, freeblks; sgmnt_data_ptr_t csd; #if defined(VMS) io_status_block_disk iosb; #endif boolean_t jfh_changed; for (rctl = mur_ctl, rctl_top = mur_ctl + murgbl.reg_total; rctl < rctl_top; rctl++) { gv_cur_region = rctl->gd; /* wcs_flu requires this to be set */ cs_addrs = rctl->csa; csd = cs_data = rctl->csd; /* MM logic after wcs_flu call requires this to be set */ assert(csd == rctl->csa->hdr); jctl = rctl->jctl_turn_around; max_jnl_alq = max_jnl_deq = max_autoswitchlimit = 0; for (last_jctl = NULL ; (NULL != jctl); last_jctl = jctl, jctl = jctl->next_gen) { if (max_autoswitchlimit < jctl->jfh->autoswitchlimit) { /* Note that max_jnl_alq, max_jnl_deq are not the maximum journal allocation/extensions across * generations, but rather the allocation/extension corresponding to the maximum autoswitchlimit. */ max_autoswitchlimit = jctl->jfh->autoswitchlimit; max_jnl_alq = jctl->jfh->jnl_alq; max_jnl_deq = jctl->jfh->jnl_deq; } /* Until now, "rctl->blks_to_upgrd_adjust" holds the number of V4 format newly created bitmap blocks * seen in INCTN records in backward processing. It is possible that backward processing might have * missed out on seeing those INCTN records which are part of virtually-truncated or completely-rolled-bak * journal files. The journal file-header has a separate field "prev_recov_blks_to_upgrd_adjust" which * maintains exactly this count. Therefore adjust the rctl counter accordingly. */ assert(!jctl->jfh->prev_recov_blks_to_upgrd_adjust || !jctl->jfh->recover_interrupted); assert(!jctl->jfh->prev_recov_blks_to_upgrd_adjust || jctl->jfh->prev_recov_end_of_data); rctl->blks_to_upgrd_adjust += jctl->jfh->prev_recov_blks_to_upgrd_adjust; } if (max_autoswitchlimit > last_jctl->jfh->autoswitchlimit) { csd->jnl_alq = max_jnl_alq; csd->jnl_deq = max_jnl_deq; csd->autoswitchlimit = max_autoswitchlimit; } else { assert(csd->jnl_alq == last_jctl->jfh->jnl_alq); assert(csd->jnl_deq == last_jctl->jfh->jnl_deq); assert(csd->autoswitchlimit == last_jctl->jfh->autoswitchlimit); } /* now that rctl->blks_to_upgrd_adjust is completely computed, use that to increment filehdr blks_to_upgrd. */ csd->blks_to_upgrd += rctl->blks_to_upgrd_adjust; if (csd->blks_to_upgrd) csd->fully_upgraded = FALSE; jctl = rctl->jctl_turn_around; csd->trans_hist.early_tn = jctl->turn_around_tn; csd->trans_hist.curr_tn = csd->trans_hist.early_tn; /* INCREMENT_CURR_TN macro not used but noted in comment * to identify all places that set curr_tn */ csd->jnl_eovtn = csd->trans_hist.curr_tn; csd->turn_around_point = TRUE; /* MUPIP REORG UPGRADE/DOWNGRADE stores its partially processed state in the database file header. * It is difficult for recovery to restore those fields to a correct partial value. * Hence reset the related fields as if the desired_db_format got set just ONE tn BEFORE the EPOCH record * and that there was no more processing that happened. * This might potentially mean some duplicate processing for MUPIP REORG UPGRADE/DOWNGRADE after the recovery. * But that will only be the case as long as the database is in compatibility (mixed) mode (hopefully not long). */ if (csd->desired_db_format_tn >= jctl->turn_around_tn) csd->desired_db_format_tn = jctl->turn_around_tn - 1; if (csd->reorg_db_fmt_start_tn >= jctl->turn_around_tn) csd->reorg_db_fmt_start_tn = jctl->turn_around_tn - 1; if (csd->tn_upgrd_blks_0 > jctl->turn_around_tn) csd->tn_upgrd_blks_0 = (trans_num)-1; csd->reorg_upgrd_dwngrd_restart_block = 0; /* Compute current value of "free_blocks" based on the value of "free_blocks" at the turnaround point epoch * record and the change in "total_blks" since that epoch to the present form of the database. Any difference * in "total_blks" implies database file extensions happened since the turnaround point. A backward rollback * undoes everything (including all updates) except file extensions (it does not truncate the file size). * Therefore every block that was newly allocated as part of those file extensions should be considered FREE * for the current calculations except for the local bitmap blocks which are BUSY the moment they are created. */ assert(rctl->trnarnd_total_blks <= csd->trans_hist.total_blks); csd->trans_hist.free_blocks = rctl->trnarnd_free_blocks + (csd->trans_hist.total_blks - rctl->trnarnd_total_blks) - DIVIDE_ROUND_UP(csd->trans_hist.total_blks, BLKS_PER_LMAP) + DIVIDE_ROUND_UP(rctl->trnarnd_total_blks, BLKS_PER_LMAP); assert((freeblks = mur_blocks_free(rctl)) == csd->trans_hist.free_blocks); if (dba_bg == csd->acc_meth) /* This is taken from bt_refresh() */ ((th_rec *)((uchar_ptr_t)cs_addrs->th_base + cs_addrs->th_base->tnque.fl))->tn = jctl->turn_around_tn - 1; wcs_flu(WCSFLU_FLUSH_HDR | WCSFLU_FSYNC_DB); csd->turn_around_point = FALSE; /* In case this is MM and wcs_flu() remapped an extended database, reset rctl->csd */ assert((dba_mm == cs_data->acc_meth) || (rctl->csd == cs_data)); rctl->csd = cs_data; } for (rctl = mur_ctl, rctl_top = mur_ctl + murgbl.reg_total; rctl < rctl_top; rctl++) { if (!rctl->jfh_recov_interrupted) jctl = rctl->jctl_turn_around; else { DEBUG_ONLY( for (jctl = rctl->jctl_turn_around; NULL != jctl->next_gen; jctl = jctl->next_gen) ; /* check that latest gener file name does not match db header */ assert((rctl->csd->jnl_file_len != jctl->jnl_fn_len) || (0 != memcmp(rctl->csd->jnl_file_name, jctl->jnl_fn, jctl->jnl_fn_len))); ) jctl = rctl->jctl_alt_head; } assert(NULL != jctl); for ( ; NULL != jctl->next_gen; jctl = jctl->next_gen) ; assert(rctl->csd->jnl_file_len == jctl->jnl_fn_len); /* latest gener file name */ assert(0 == memcmp(rctl->csd->jnl_file_name, jctl->jnl_fn, jctl->jnl_fn_len)); /* should match db header */ if (SS_NORMAL != (status = prepare_unique_name((char *)jctl->jnl_fn, jctl->jnl_fn_len, "", "", rename_fn, &rename_fn_len, &status2))) return status; jctl->jnl_fn_len = rename_fn_len; /* change the name in memory to the proposed name */ memcpy(jctl->jnl_fn, rename_fn, rename_fn_len + 1); /* Rename hasn't happened yet at the filesystem level. In case current recover command is interrupted, * we need to update jfh->next_jnl_file_name before mur_forward(). Update jfh->next_jnl_file_name for * all journal files from which PBLK records were applied. Create new journal files for forward play. */ assert(NULL != rctl->jctl_turn_around); jctl = rctl->jctl_turn_around; /* points to journal file which has current recover's turn around point */ assert(0 != jctl->turn_around_offset); jctl->jfh->turn_around_offset = jctl->turn_around_offset; /* save progress in file header for */ jctl->jfh->turn_around_time = jctl->turn_around_time; /* possible re-issue of recover */ jfh_changed = TRUE; for ( ; NULL != jctl; jctl = jctl->next_gen) { /* setup the next_jnl links. note that in the case of interrupted recovery, next_jnl links * would have been already set starting from the turn-around point journal file of the * interrupted recovery but the new recovery MIGHT have taken us to a still previous * generation journal file that needs its next_jnl link set. this is why we do the next_jnl * link setup even in the case of interrupted recovery although in most cases it is unnecessary. */ if (NULL != jctl->next_gen) { jctl->jfh->next_jnl_file_name_length = jctl->next_gen->jnl_fn_len; memcpy(jctl->jfh->next_jnl_file_name, jctl->next_gen->jnl_fn, jctl->next_gen->jnl_fn_len); jfh_changed = TRUE; } else assert(0 == jctl->jfh->next_jnl_file_name_length); /* null link from latest generation */ if (jctl->jfh->turn_around_offset && (jctl != rctl->jctl_turn_around)) { /* It is possible that the current recovery has a turn-around-point much before the * previously interrupted recovery. If it happens to be a previous generation journal * file then we have to reset the original turn-around-point to be zero in the journal * file header in order to ensure if this recovery gets interrupted we do interrupted * recovery processing until the new turn-around-point instead of stopping incorrectly * at the original turn-around-point itself. Note that there could be more than one * journal file with a non-zero turn_around_offset (depending on how many previous * recoveries got interrupted in this loop) that need to be reset. */ assert(!jctl->turn_around_offset); assert(rctl->recov_interrupted); /* rctl->jfh_recov_interrupted can fail */ jctl->jfh->turn_around_offset = 0; jctl->jfh->turn_around_time = 0; jfh_changed = TRUE; } if (jfh_changed) { DO_FILE_WRITE(jctl->channel, 0, jctl->jfh, REAL_JNL_HDR_LEN, jctl->status, jctl->status2); if (SS_NORMAL != jctl->status) { assert(FALSE); if (SS_NORMAL == jctl->status2) gtm_putmsg(VARLSTCNT(5) ERR_JNLWRERR, 2, jctl->jnl_fn_len, jctl->jnl_fn, jctl->status); else gtm_putmsg(VARLSTCNT1(6) ERR_JNLWRERR, 2, jctl->jnl_fn_len, jctl->jnl_fn, jctl->status, PUT_SYS_ERRNO(jctl->status2)); return jctl->status; } UNIX_ONLY( GTM_FSYNC(jctl->channel, jctl->status); if (-1 == jctl->status) { jctl->status2 = errno; assert(FALSE); gtm_putmsg(VARLSTCNT(9) ERR_JNLFSYNCERR, 2, jctl->jnl_fn_len, jctl->jnl_fn, ERR_TEXT, 2, RTS_ERROR_TEXT("Error with fsync"), jctl->status2); return ERR_JNLFSYNCERR; } ) } jfh_changed = FALSE; } memset(&jnl_info, 0, SIZEOF(jnl_info)); jnl_info.status = jnl_info.status2 = SS_NORMAL; jnl_info.prev_jnl = &prev_jnl_fn[0]; set_jnl_info(rctl->gd, &jnl_info); jnl_info.prev_jnl_len = rctl->jctl_turn_around->jnl_fn_len; memcpy(jnl_info.prev_jnl, rctl->jctl_turn_around->jnl_fn, rctl->jctl_turn_around->jnl_fn_len); jnl_info.prev_jnl[jnl_info.prev_jnl_len] = 0; jnl_info.jnl_len = rctl->csd->jnl_file_len; memcpy(jnl_info.jnl, rctl->csd->jnl_file_name, jnl_info.jnl_len); jnl_info.jnl[jnl_info.jnl_len] = 0; assert(!mur_options.rollback || jgbl.mur_rollback); jnl_info.reg_seqno = rctl->jctl_turn_around->turn_around_seqno; jgbl.gbl_jrec_time = rctl->jctl_turn_around->turn_around_time; /* time needed for cre_jnl_file_common() */ if (EXIT_NRM != cre_jnl_file_common(&jnl_info, rename_fn, rename_fn_len)) { gtm_putmsg(VARLSTCNT(4) ERR_JNLNOCREATE, 2, jnl_info.jnl_len, jnl_info.jnl); return jnl_info.status; } if (NULL != rctl->jctl_alt_head) /* remove the journal files created by last interrupted recover process */ { mur_rem_jctls(rctl); rctl->jctl_alt_head = NULL; } /* From this point on, journal records are written into the newly created journal file. However, we still read * from old journal files. */ }
boolean_t mu_truncate(int4 truncate_percent) { sgmnt_addrs *csa; sgmnt_data_ptr_t csd; int num_local_maps; int lmap_num, lmap_blk_num; int bml_status, sigkill; int save_errno; int ftrunc_status; uint4 jnl_status; uint4 old_total, new_total; uint4 old_free, new_free; uint4 end_blocks; int4 blks_in_lmap, blk; gtm_uint64_t before_trunc_file_size; off_t trunc_file_size; off_t padding; uchar_ptr_t lmap_addr; boolean_t was_crit; uint4 found_busy_blk; srch_blk_status bmphist; srch_blk_status *blkhist; srch_hist alt_hist; trans_num curr_tn; blk_hdr_ptr_t lmap_blk_hdr; block_id *blkid_ptr; unix_db_info *udi; jnl_private_control *jpc; jnl_buffer_ptr_t jbp; char *err_msg; intrpt_state_t prev_intrpt_state; off_t offset; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; csa = cs_addrs; csd = cs_data; if (dba_mm == csd->acc_meth) { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_MUTRUNCNOTBG, 2, REG_LEN_STR(gv_cur_region)); return TRUE; } if ((GDSVCURR != csd->desired_db_format) || (csd->blks_to_upgrd != 0)) { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_MUTRUNCNOV4, 2, REG_LEN_STR(gv_cur_region)); return TRUE; } if (csa->ti->free_blocks < (truncate_percent * csa->ti->total_blks / 100)) { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(5) ERR_MUTRUNCNOSPACE, 3, REG_LEN_STR(gv_cur_region), truncate_percent); return TRUE; } /* already checked for parallel truncates on this region --- see mupip_reorg.c */ gv_target = NULL; assert(csa->nl->trunc_pid == process_id); assert(dba_mm != csd->acc_meth); old_total = csa->ti->total_blks; old_free = csa->ti->free_blocks; sigkill = 0; found_busy_blk = 0; memset(&alt_hist, 0, SIZEOF(alt_hist)); /* null-initialize history */ assert(csd->bplmap == BLKS_PER_LMAP); end_blocks = old_total % BLKS_PER_LMAP; /* blocks in the last lmap (first one we start scanning) */ if (0 == end_blocks) end_blocks = BLKS_PER_LMAP; num_local_maps = DIVIDE_ROUND_UP(old_total, BLKS_PER_LMAP); /* ======================================== PHASE 1 ======================================== */ for (lmap_num = num_local_maps - 1; (lmap_num > 0 && !found_busy_blk); lmap_num--) { if (mu_ctrly_occurred || mu_ctrlc_occurred) return TRUE; assert(csa->ti->total_blks >= old_total); /* otherwise, a concurrent truncate happened... */ if (csa->ti->total_blks != old_total) /* Extend (likely called by mupip extend) -- don't truncate */ { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(5) ERR_MUTRUNCNOSPACE, 3, REG_LEN_STR(gv_cur_region), truncate_percent); return TRUE; } lmap_blk_num = lmap_num * BLKS_PER_LMAP; if (csa->nl->highest_lbm_with_busy_blk >= lmap_blk_num) { found_busy_blk = lmap_blk_num; break; } blks_in_lmap = (lmap_num == num_local_maps - 1) ? end_blocks : BLKS_PER_LMAP; /* Loop through non-bitmap blocks of this lmap, do recycled2free */ DBGEHND((stdout, "DBG:: lmap_num = [%lu], lmap_blk_num = [%lu], blks_in_lmap = [%lu]\n", lmap_num, lmap_blk_num, blks_in_lmap)); for (blk = 1; blk < blks_in_lmap && blk != -1 && !found_busy_blk;) { t_begin(ERR_MUTRUNCFAIL, UPDTRNS_DB_UPDATED_MASK); for (;;) /* retry loop for recycled to free transactions */ { curr_tn = csd->trans_hist.curr_tn; /* Read the nth local bitmap into memory */ bmphist.blk_num = lmap_blk_num; bmphist.buffaddr = t_qread(bmphist.blk_num, &bmphist.cycle, &bmphist.cr); lmap_blk_hdr = (blk_hdr_ptr_t)bmphist.buffaddr; if (!(bmphist.buffaddr) || (BM_SIZE(BLKS_PER_LMAP) != lmap_blk_hdr->bsiz)) { /* Could not read the block successfully. Retry. */ t_retry((enum cdb_sc)rdfail_detail); continue; } lmap_addr = bmphist.buffaddr + SIZEOF(blk_hdr); /* starting from the hint (blk itself), find the first busy or recycled block */ blk = bml_find_busy_recycled(blk, lmap_addr, blks_in_lmap, &bml_status); assert(blk < BLKS_PER_LMAP); if (blk == -1 || blk >= blks_in_lmap) { /* done with this lmap, continue to next */ t_abort(gv_cur_region, csa); break; } else if (BLK_BUSY == bml_status || csa->nl->highest_lbm_with_busy_blk >= lmap_blk_num) { /* stop processing blocks... skip ahead to phase 2 */ found_busy_blk = lmap_blk_num; t_abort(gv_cur_region, csa); break; } else if (BLK_RECYCLED == bml_status) { /* Write PBLK records for recycled blocks only if before_image journaling is * enabled. t_end() takes care of checking if journaling is enabled and * writing PBLK record. We have to at least mark the recycled block as free. */ RESET_UPDATE_ARRAY; update_trans = UPDTRNS_DB_UPDATED_MASK; *((block_id *)update_array_ptr) = blk; update_array_ptr += SIZEOF(block_id); *(int *)update_array_ptr = 0; alt_hist.h[1].blk_num = 0; alt_hist.h[0].level = 0; alt_hist.h[0].cse = NULL; alt_hist.h[0].tn = curr_tn; alt_hist.h[0].blk_num = lmap_blk_num + blk; alt_hist.h[0].buffaddr = t_qread(alt_hist.h[0].blk_num, &alt_hist.h[0].cycle, &alt_hist.h[0].cr); if (!alt_hist.h[0].buffaddr) { t_retry((enum cdb_sc)rdfail_detail); continue; } if (!t_recycled2free(&alt_hist.h[0])) { t_retry(cdb_sc_lostbmlcr); continue; } t_write_map(&bmphist, (unsigned char *)update_array, curr_tn, 0); /* Set the opcode for INCTN record written by t_end() */ inctn_opcode = inctn_blkmarkfree; if ((trans_num)0 == t_end(&alt_hist, NULL, TN_NOT_SPECIFIED)) continue; /* block processed, scan from the next one */ blk++; break; } else { assert(t_tries < CDB_STAGNATE); t_retry(cdb_sc_badbitmap); continue; } } /* END recycled2free retry loop */ } /* END scanning blocks of this particular lmap */ /* Write PBLK for the bitmap block, in case it hasn't been written i.e. t_end() was never called above */ /* Do a transaction that just increments the bitmap block's tn so that t_end() can do its thing */ DBGEHND((stdout, "DBG:: bitmap block inctn -- lmap_blk_num = [%lu]\n", lmap_blk_num)); t_begin(ERR_MUTRUNCFAIL, UPDTRNS_DB_UPDATED_MASK); for (;;) { RESET_UPDATE_ARRAY; BLK_ADDR(blkid_ptr, SIZEOF(block_id), block_id); *blkid_ptr = 0; update_trans = UPDTRNS_DB_UPDATED_MASK; inctn_opcode = inctn_mu_reorg; /* inctn_mu_truncate */ curr_tn = csd->trans_hist.curr_tn; blkhist = &alt_hist.h[0]; blkhist->blk_num = lmap_blk_num; blkhist->tn = curr_tn; blkhist->cse = NULL; /* start afresh (do not use value from previous retry) */ /* Read the nth local bitmap into memory */ blkhist->buffaddr = t_qread(lmap_blk_num, (sm_int_ptr_t)&blkhist->cycle, &blkhist->cr); lmap_blk_hdr = (blk_hdr_ptr_t)blkhist->buffaddr; if (!(blkhist->buffaddr) || (BM_SIZE(BLKS_PER_LMAP) != lmap_blk_hdr->bsiz)) { /* Could not read the block successfully. Retry. */ t_retry((enum cdb_sc)rdfail_detail); continue; } t_write_map(blkhist, (unsigned char *)blkid_ptr, curr_tn, 0); blkhist->blk_num = 0; /* create empty history for bitmap block */ if ((trans_num)0 == t_end(&alt_hist, NULL, TN_NOT_SPECIFIED)) continue; break; } } /* END scanning lmaps */ /* ======================================== PHASE 2 ======================================== */ assert(!csa->now_crit); for (;;) { /* wait for FREEZE, we don't want to truncate a frozen database */ grab_crit(gv_cur_region); if (FROZEN_CHILLED(cs_data)) DO_CHILLED_AUTORELEASE(csa, cs_data); if (!FROZEN(cs_data) && !IS_REPL_INST_FROZEN) break; rel_crit(gv_cur_region); while (FROZEN(cs_data) || IS_REPL_INST_FROZEN) { hiber_start(1000); if (FROZEN_CHILLED(cs_data) && CHILLED_AUTORELEASE(cs_data)) break; } } assert(csa->nl->trunc_pid == process_id); /* Flush pending updates to disk. If this is not done, old updates can be flushed AFTER ftruncate, extending the file. */ if (!wcs_flu(WCSFLU_FLUSH_HDR | WCSFLU_WRITE_EPOCH | WCSFLU_MSYNC_DB)) { assert(FALSE); gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(6) ERR_BUFFLUFAILED, 4, LEN_AND_LIT("MUPIP REORG TRUNCATE"), DB_LEN_STR(gv_cur_region)); rel_crit(gv_cur_region); return FALSE; } csa->nl->highest_lbm_with_busy_blk = MAX(found_busy_blk, csa->nl->highest_lbm_with_busy_blk); assert(IS_BITMAP_BLK(csa->nl->highest_lbm_with_busy_blk)); new_total = MIN(old_total, csa->nl->highest_lbm_with_busy_blk + BLKS_PER_LMAP); if (mu_ctrly_occurred || mu_ctrlc_occurred) { rel_crit(gv_cur_region); return TRUE; } else if (csa->ti->total_blks != old_total || new_total == old_total) { assert(csa->ti->total_blks >= old_total); /* Better have been an extend, not a truncate... */ gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(5) ERR_MUTRUNCNOSPACE, 3, REG_LEN_STR(gv_cur_region), truncate_percent); rel_crit(gv_cur_region); return TRUE; } else if (GDSVCURR != csd->desired_db_format || csd->blks_to_upgrd != 0 || !csd->fully_upgraded) { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_MUTRUNCNOV4, 2, REG_LEN_STR(gv_cur_region)); rel_crit(gv_cur_region); return TRUE; } else if (SNAPSHOTS_IN_PROG(csa->nl)) { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_MUTRUNCSSINPROG, 2, REG_LEN_STR(gv_cur_region)); rel_crit(gv_cur_region); return TRUE; } else if (BACKUP_NOT_IN_PROGRESS != cs_addrs->nl->nbb) { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_MUTRUNCBACKINPROG, 2, REG_LEN_STR(gv_cur_region)); rel_crit(gv_cur_region); return TRUE; } DEFER_INTERRUPTS(INTRPT_IN_TRUNC, prev_intrpt_state); if (JNL_ENABLED(csa)) { /* Write JRT_TRUNC and INCTN records */ if (!jgbl.dont_reset_gbl_jrec_time) SET_GBL_JREC_TIME; /* needed before jnl_ensure_open as that can write jnl records */ jpc = csa->jnl; jbp = jpc->jnl_buff; /* Before writing to jnlfile, adjust jgbl.gbl_jrec_time if needed to maintain time order * of jnl records. This needs to be done BEFORE the jnl_ensure_open as that could write * journal records (if it decides to switch to a new journal file). */ ADJUST_GBL_JREC_TIME(jgbl, jbp); jnl_status = jnl_ensure_open(gv_cur_region, csa); if (SS_NORMAL != jnl_status) send_msg_csa(CSA_ARG(csa) VARLSTCNT(6) jnl_status, 4, JNL_LEN_STR(csd), DB_LEN_STR(gv_cur_region)); else { if (0 == jpc->pini_addr) jnl_put_jrt_pini(csa); jnl_write_trunc_rec(csa, old_total, csa->ti->free_blocks, new_total); inctn_opcode = inctn_mu_reorg; jnl_write_inctn_rec(csa); jnl_status = jnl_flush(gv_cur_region); if (SS_NORMAL != jnl_status) { send_msg_csa(CSA_ARG(csa) VARLSTCNT(9) ERR_JNLFLUSH, 2, JNL_LEN_STR(csd), ERR_TEXT, 2, RTS_ERROR_TEXT("Error with journal flush during mu_truncate"), jnl_status); assert(NOJNL == jpc->channel); /* jnl file lost has been triggered */ } } } /* Good to go ahead and REALLY truncate (reduce total_blks, clear cache_array, FTRUNCATE) */ curr_tn = csa->ti->curr_tn; CHECK_TN(csa, csd, curr_tn); udi = FILE_INFO(gv_cur_region); /* Information used by recover_truncate to check if the file size and csa->ti->total_blks are INCONSISTENT */ trunc_file_size = BLK_ZERO_OFF(csd->start_vbn) + ((off_t)csd->blk_size * (new_total + 1)); csd->after_trunc_total_blks = new_total; csd->before_trunc_free_blocks = csa->ti->free_blocks; csd->before_trunc_total_blks = old_total; /* Flags interrupted truncate for recover_truncate */ /* file size and total blocks: INCONSISTENT */ csa->ti->total_blks = new_total; /* past the point of no return -- shared memory intact */ assert(csa->ti->free_blocks >= DELTA_FREE_BLOCKS(old_total, new_total)); csa->ti->free_blocks -= DELTA_FREE_BLOCKS(old_total, new_total); new_free = csa->ti->free_blocks; KILL_TRUNC_TEST(WBTEST_CRASH_TRUNCATE_1); /* 55 : Issue a kill -9 before 1st fsync */ fileheader_sync(gv_cur_region); DB_FSYNC(gv_cur_region, udi, csa, db_fsync_in_prog, save_errno); CHECK_DBSYNC(gv_cur_region, save_errno); /* past the point of no return -- shared memory deleted */ KILL_TRUNC_TEST(WBTEST_CRASH_TRUNCATE_2); /* 56 : Issue a kill -9 after 1st fsync */ clear_cache_array(csa, csd, gv_cur_region, new_total, old_total); offset = (off_t)BLK_ZERO_OFF(csd->start_vbn) + (off_t)new_total * csd->blk_size; save_errno = db_write_eof_block(udi, udi->fd, csd->blk_size, offset, &(TREF(dio_buff))); if (0 != save_errno) { err_msg = (char *)STRERROR(errno); rts_error_csa(CSA_ARG(csa) VARLSTCNT(6) ERR_MUTRUNCERROR, 4, REG_LEN_STR(gv_cur_region), LEN_AND_STR(err_msg)); return FALSE; } KILL_TRUNC_TEST(WBTEST_CRASH_TRUNCATE_3); /* 57 : Issue a kill -9 after reducing csa->ti->total_blks, before FTRUNCATE */ /* Execute an ftruncate() and truncate the DB file * ftruncate() is a SYSTEM CALL on almost all platforms (except SunOS) * It ignores kill -9 signal till its operation is completed. * So we can safely assume that the result of ftruncate() will be complete. */ FTRUNCATE(FILE_INFO(gv_cur_region)->fd, trunc_file_size, ftrunc_status); if (0 != ftrunc_status) { err_msg = (char *)STRERROR(errno); rts_error_csa(CSA_ARG(csa) VARLSTCNT(6) ERR_MUTRUNCERROR, 4, REG_LEN_STR(gv_cur_region), LEN_AND_STR(err_msg)); /* should go through recover_truncate now, which will again try to FTRUNCATE */ return FALSE; } /* file size and total blocks: CONSISTENT (shrunk) */ KILL_TRUNC_TEST(WBTEST_CRASH_TRUNCATE_4); /* 58 : Issue a kill -9 after FTRUNCATE, before 2nd fsync */ csa->nl->root_search_cycle++; /* Force concurrent processes to restart in t_end/tp_tend to make sure no one * tries to commit updates past the end of the file. Bitmap validations together * with highest_lbm_with_busy_blk should actually be sufficient, so this is * just to be safe. */ csd->before_trunc_total_blks = 0; /* indicate CONSISTENT */ /* Increment TN */ assert(csa->ti->early_tn == csa->ti->curr_tn); csd->trans_hist.early_tn = csd->trans_hist.curr_tn + 1; INCREMENT_CURR_TN(csd); fileheader_sync(gv_cur_region); DB_FSYNC(gv_cur_region, udi, csa, db_fsync_in_prog, save_errno); KILL_TRUNC_TEST(WBTEST_CRASH_TRUNCATE_5); /* 58 : Issue a kill -9 after after 2nd fsync */ CHECK_DBSYNC(gv_cur_region, save_errno); ENABLE_INTERRUPTS(INTRPT_IN_TRUNC, prev_intrpt_state); curr_tn = csa->ti->curr_tn; rel_crit(gv_cur_region); send_msg_csa(CSA_ARG(csa) VARLSTCNT(7) ERR_MUTRUNCSUCCESS, 5, DB_LEN_STR(gv_cur_region), old_total, new_total, &curr_tn); util_out_print("Truncated region: !AD. Reduced total blocks from [!UL] to [!UL]. Reduced free blocks from [!UL] to [!UL].", FLUSH, REG_LEN_STR(gv_cur_region), old_total, new_total, old_free, new_free); return TRUE; } /* END of mu_truncate() */
uint4 gdsfilext (uint4 blocks, uint4 filesize) { sm_uc_ptr_t old_base[2]; boolean_t was_crit, need_to_restore_mask = FALSE; char *buff; int mm_prot, result, save_errno, status; uint4 new_bit_maps, bplmap, map, new_blocks, new_total, max_tot_blks; uint4 jnl_status, to_wait, to_msg, wait_period; GTM_BAVAIL_TYPE avail_blocks; sgmnt_data_ptr_t tmp_csd; off_t new_eof; trans_num curr_tn; unix_db_info *udi; sigset_t savemask; inctn_opcode_t save_inctn_opcode; int4 prev_extend_blks_to_upgrd; jnl_private_control *jpc; jnl_buffer_ptr_t jbp; error_def(ERR_DBFILERR); error_def(ERR_DBFILEXT); error_def(ERR_DSKSPACEFLOW); error_def(ERR_JNLFLUSH); error_def(ERR_TEXT); error_def(ERR_TOTALBLKMAX); error_def(ERR_WAITDSKSPACE); #ifdef __hppa if (dba_mm == cs_addrs->hdr->acc_meth) return (uint4)(NO_FREE_SPACE); /* should this be changed to show extension not allowed ? */ #endif /* Both blocks and total blocks are unsigned ints so make sure we aren't asking for huge numbers that will overflow and end up doing silly things. */ assert(blocks <= (MAXTOTALBLKS(cs_data) - cs_data->trans_hist.total_blks)); if (!blocks) return (uint4)(NO_FREE_SPACE); /* should this be changed to show extension not enabled ? */ bplmap = cs_data->bplmap; /* new total of non-bitmap blocks will be number of current, non-bitmap blocks, plus new blocks desired There are (bplmap - 1) non-bitmap blocks per bitmap, so add (bplmap - 2) to number of non-bitmap blocks and divide by (bplmap - 1) to get total number of bitmaps for expanded database. (must round up in this manner as every non-bitmap block must have an associated bitmap) Current number of bitmaps is (total number of current blocks + bplmap - 1) / bplmap. Subtract current number of bitmaps from number needed for expanded database to get number of new bitmaps needed. */ new_bit_maps = DIVIDE_ROUND_UP(cs_data->trans_hist.total_blks - DIVIDE_ROUND_UP(cs_data->trans_hist.total_blks, bplmap) + blocks, bplmap - 1) - DIVIDE_ROUND_UP(cs_data->trans_hist.total_blks, bplmap); new_blocks = blocks + new_bit_maps; assert(0 < (int)new_blocks); udi = FILE_INFO(gv_cur_region); if (0 != (save_errno = disk_block_available(udi->fd, &avail_blocks, FALSE))) { send_msg(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region), save_errno); rts_error(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region), save_errno); } else { avail_blocks = avail_blocks / (cs_data->blk_size / DISK_BLOCK_SIZE); if ((blocks * EXTEND_WARNING_FACTOR) > avail_blocks) { send_msg(VARLSTCNT(5) ERR_DSKSPACEFLOW, 3, DB_LEN_STR(gv_cur_region), (uint4)(avail_blocks - ((new_blocks <= avail_blocks) ? new_blocks : 0))); #ifndef __MVS__ if (blocks > (uint4)avail_blocks) return (uint4)(NO_FREE_SPACE); #endif } } cs_addrs->extending = TRUE; was_crit = cs_addrs->now_crit; /* If we are coming from mupip_extend (which gets crit itself) we better have waited for any unfreezes to occur */ assert(!was_crit || CDB_STAGNATE == t_tries || FALSE == cs_data->freeze); for ( ; ; ) { /* If we are in the final retry and already hold crit, it is possible that csd->wc_blocked is also set to TRUE * (by a concurrent process in phase2 which encountered an error in the midst of commit and secshr_db_clnup * finished the job for it). In this case we do NOT want to invoke wcs_recover as that will update the "bt" * transaction numbers without correspondingly updating the history transaction numbers (effectively causing * a cdb_sc_blkmod type of restart). Therefore do NOT call grab_crit (which unconditionally invokes wcs_recover) * if we already hold crit. */ if (!was_crit) grab_crit(gv_cur_region); if (FALSE == cs_data->freeze) break; rel_crit(gv_cur_region); if (was_crit) { /* Two cases. * (i) Final retry and in TP. We might be holding crit in other regions too. * We can't do a grab_crit() on this region again unless it is deadlock-safe. * To be on the safer side, we do a restart. The tp_restart() logic will wait * for this region's freeze to be removed before grabbing crit. * (ii) Final retry and not in TP. In that case too, it is better to restart in case there is * some validation code that shortcuts the checking for the final retry assuming we were * in crit from t_begin() to t_end(). t_retry() has logic that will wait for unfreeze. * In either case, we need to restart. Returning EXTEND_UNFREEZECRIT will cause one in t_end/tp_tend. */ return (uint4)(EXTEND_UNFREEZECRIT); } while (cs_data->freeze) hiber_start(1000); } assert(cs_addrs->ti->total_blks == cs_data->trans_hist.total_blks); if (cs_data->trans_hist.total_blks != filesize) { /* somebody else has already extended it, since we are in crit, this is trust-worthy * however, in case of MM, we still need to remap the database */ assert(cs_data->trans_hist.total_blks > filesize); GDSFILEXT_CLNUP; return (SS_NORMAL); } if (run_time && (2 * ((0 < dollar_tlevel) ? sgm_info_ptr->cw_set_depth : cw_set_depth) < cs_addrs->ti->free_blocks)) { if (FALSE == was_crit) { rel_crit(gv_cur_region); return (uint4)(EXTEND_SUSPECT); } /* If free_blocks counter is not ok, then correct it. Do the check again. If still fails, then GTMASSERT. */ if (is_free_blks_ctr_ok() || (2 * ((0 < dollar_tlevel) ? sgm_info_ptr->cw_set_depth : cw_set_depth) < cs_addrs->ti->free_blocks)) GTMASSERT; /* held crit through bm_getfree into gdsfilext and still didn't get it right */ } if (JNL_ENABLED(cs_data)) { if (!jgbl.dont_reset_gbl_jrec_time) SET_GBL_JREC_TIME; /* needed before jnl_ensure_open as that can write jnl records */ jpc = cs_addrs->jnl; jbp = jpc->jnl_buff; /* Before writing to jnlfile, adjust jgbl.gbl_jrec_time if needed to maintain time order * of jnl records. This needs to be done BEFORE the jnl_ensure_open as that could write * journal records (if it decides to switch to a new journal file). */ ADJUST_GBL_JREC_TIME(jgbl, jbp); jnl_status = jnl_ensure_open(); if (jnl_status) { GDSFILEXT_CLNUP; send_msg(VARLSTCNT(6) jnl_status, 4, JNL_LEN_STR(cs_data), DB_LEN_STR(gv_cur_region)); return (uint4)(NO_FREE_SPACE); /* should have better return status */ } } if (dba_mm == cs_addrs->hdr->acc_meth) { #if defined(UNTARGETED_MSYNC) status = msync((caddr_t)cs_addrs->db_addrs[0], (size_t)(cs_addrs->db_addrs[1] - cs_addrs->db_addrs[0]), MS_SYNC); #else cs_addrs->nl->mm_extender_pid = process_id; status = wcs_wtstart(gv_cur_region, 0); cs_addrs->nl->mm_extender_pid = 0; if (0 != cs_addrs->acc_meth.mm.mmblk_state->mmblkq_active.fl) GTMASSERT; status = 0; #endif if (0 == status) { /* Block SIGALRM for the duration when cs_data and cs_addrs are out of sync */ sigprocmask(SIG_BLOCK, &blockalrm, &savemask); need_to_restore_mask = TRUE; tmp_csd = cs_data; cs_data = (sgmnt_data_ptr_t)malloc(sizeof(*cs_data)); memcpy((sm_uc_ptr_t)cs_data, (uchar_ptr_t)tmp_csd, sizeof(*cs_data)); status = munmap((caddr_t)cs_addrs->db_addrs[0], (size_t)(cs_addrs->db_addrs[1] - cs_addrs->db_addrs[0])); #ifdef DEBUG_DB64 if (-1 != status) rel_mmseg((caddr_t)cs_addrs->db_addrs[0]); #endif } else tmp_csd = NULL; if (0 != status) { if (tmp_csd) { free(cs_data); cs_data = tmp_csd; } GDSFILEXT_CLNUP; send_msg(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region), status); return (uint4)(NO_FREE_SPACE); } cs_addrs->hdr = cs_data; cs_addrs->ti = &cs_data->trans_hist; } if (new_blocks + cs_data->trans_hist.total_blks > MAXTOTALBLKS(cs_data)) { GDSFILEXT_CLNUP; send_msg(VARLSTCNT(1) ERR_TOTALBLKMAX); return (uint4)(NO_FREE_SPACE); } CHECK_TN(cs_addrs, cs_data, cs_data->trans_hist.curr_tn); /* can issue rts_error TNTOOLARGE */ assert(0 < (int)new_blocks); new_total = cs_data->trans_hist.total_blks + new_blocks; new_eof = ((off_t)(cs_data->start_vbn - 1) * DISK_BLOCK_SIZE) + ((off_t)new_total * cs_data->blk_size); buff = (char *)malloc(DISK_BLOCK_SIZE); memset(buff, 0, DISK_BLOCK_SIZE); LSEEKWRITE(udi->fd, new_eof, buff, DISK_BLOCK_SIZE, save_errno); if ((ENOSPC == save_errno) && run_time) { /* try to write it every second, and send message to operator * log every 1/20 of cs_data->wait_disk_space */ wait_period = to_wait = DIVIDE_ROUND_UP(cs_data->wait_disk_space, CDB_STAGNATE + 1); to_msg = (to_wait / 8) ? (to_wait / 8) : 1; /* send around 8 messages during 1 wait_period */ while ((to_wait > 0) && (ENOSPC == save_errno)) { if ((to_wait == cs_data->wait_disk_space) || (to_wait % to_msg == 0)) { send_msg(VARLSTCNT(11) ERR_WAITDSKSPACE, 4, process_id, to_wait + (CDB_STAGNATE - t_tries) * wait_period, DB_LEN_STR(gv_cur_region), ERR_TEXT, 2, RTS_ERROR_TEXT("Please make more disk space available or shutdown GT.M to avoid data loss"), save_errno); gtm_putmsg(VARLSTCNT(11) ERR_WAITDSKSPACE, 4, process_id, to_wait + (CDB_STAGNATE - t_tries) * wait_period, DB_LEN_STR(gv_cur_region), ERR_TEXT, 2, RTS_ERROR_TEXT("Please make more disk space available or shutdown GT.M to avoid data loss"), save_errno); } if (!was_crit) rel_crit(gv_cur_region); hiber_start(1000); to_wait--; if (!was_crit) grab_crit(gv_cur_region); LSEEKWRITE(udi->fd, new_eof, buff, DISK_BLOCK_SIZE, save_errno); } } free(buff); if (0 != save_errno) { GDSFILEXT_CLNUP; if (ENOSPC == save_errno) return (uint4)(NO_FREE_SPACE); send_msg(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region), save_errno); return (uint4)(NO_FREE_SPACE); } DEBUG_ONLY(prev_extend_blks_to_upgrd = cs_data->blks_to_upgrd;)
block_id bm_getfree(block_id orig_hint, boolean_t *blk_used, unsigned int cw_work, cw_set_element *cs, int *cw_depth_ptr) { cw_set_element *cs1; sm_uc_ptr_t bmp; block_id bml, hint, hint_cycled, hint_limit; block_id_ptr_t b_ptr; int cw_set_top, depth, lcnt; unsigned int local_maps, map_size, n_decrements = 0, total_blks; trans_num ctn; int4 free_bit, offset; uint4 space_needed; uint4 status; srch_blk_status blkhist; total_blks = (dba_mm == cs_data->acc_meth) ? cs_addrs->total_blks : cs_addrs->ti->total_blks; if (orig_hint >= total_blks) /* for TP, hint can be > total_blks */ orig_hint = 1; hint = orig_hint; hint_cycled = DIVIDE_ROUND_UP(total_blks, BLKS_PER_LMAP); hint_limit = DIVIDE_ROUND_DOWN(orig_hint, BLKS_PER_LMAP); local_maps = hint_cycled + 2; /* for (up to) 2 wraps */ for (lcnt = 0; lcnt <= local_maps; lcnt++) { bml = bmm_find_free(hint / BLKS_PER_LMAP, (sm_uc_ptr_t)MM_ADDR(cs_data), local_maps); if ((NO_FREE_SPACE == bml) || (bml >= hint_cycled)) { /* if no free space or might have looped to original map, extend */ if ((NO_FREE_SPACE != bml) && (hint_limit < hint_cycled)) { hint_cycled = hint_limit; hint = 1; continue; } if (SS_NORMAL != (status = gdsfilext(cs_data->extension_size, total_blks))) return (status); if (dba_mm == cs_data->acc_meth) return (FILE_EXTENDED); hint = total_blks; total_blks = cs_addrs->ti->total_blks; hint_cycled = DIVIDE_ROUND_UP(total_blks, BLKS_PER_LMAP); local_maps = hint_cycled + 2; /* for (up to) 2 wraps */ /* * note that you can make an optimization of not going back over the whole database and going over * only the extended section. but since it is very unlikely that a free block won't be found * in the extended section and the fact that we are starting from the extended section in either * approach and the fact that we have a GTMASSERT to check that we don't have a lot of * free blocks while doing an extend and the fact that it is very easy to make the change to do * a full-pass, the full-pass solution is currently being implemented */ lcnt = -1; /* allow it one extra pass to ensure that it can take advantage of the entension */ n_decrements++; /* used only for debugging purposes */ continue; } bml *= BLKS_PER_LMAP; if (ROUND_DOWN2(hint, BLKS_PER_LMAP) != bml) { /* not within requested map */ if ((bml < hint) && (hint_cycled)) /* wrap? - second one should force an extend for sure */ hint_cycled = (hint_limit < hint_cycled) ? hint_limit: 0; hint = bml + 1; /* start at beginning */ } if (ROUND_DOWN2(total_blks, BLKS_PER_LMAP) == bml) map_size = (total_blks - bml); else map_size = BLKS_PER_LMAP; if (0 != dollar_tlevel) { depth = cw_work; cw_set_top = *cw_depth_ptr; if (depth < cw_set_top) tp_get_cw(cs, cw_work, &cs1); for (; depth < cw_set_top; depth++, cs1 = cs1->next_cw_set) { /* do tp front to back because list is more efficient than tp_get_cw and forward pointers exist */ if (bml == cs1->blk) { TRAVERSE_TO_LATEST_CSE(cs1); break; } } if (depth >= cw_set_top) { assert(cw_set_top == depth); depth = 0; } } else { for (depth = *cw_depth_ptr - 1; depth >= cw_work; depth--) { /* do non-tp back to front, because of adjacency */ if (bml == (cs + depth)->blk) { cs1 = cs + depth; break; } } if (depth < cw_work) { assert(cw_work - 1 == depth); depth = 0; } } if (0 == depth) { ctn = cs_addrs->ti->curr_tn; if (!(bmp = t_qread(bml, (sm_int_ptr_t)&blkhist.cycle, &blkhist.cr))) return MAP_RD_FAIL; if ((BM_SIZE(BLKS_PER_LMAP) != ((blk_hdr_ptr_t)bmp)->bsiz) || (LCL_MAP_LEVL != ((blk_hdr_ptr_t)bmp)->levl)) { assert(CDB_STAGNATE > t_tries); rdfail_detail = cdb_sc_badbitmap; return MAP_RD_FAIL; } offset = 0; } else { bmp = cs1->old_block; b_ptr = (block_id_ptr_t)(cs1->upd_addr); b_ptr += cs1->reference_cnt - 1; offset = *b_ptr + 1; } if (offset < map_size) { free_bit = bm_find_blk(offset, (sm_uc_ptr_t)bmp + sizeof(blk_hdr), map_size, blk_used); if (MAP_RD_FAIL == free_bit) return MAP_RD_FAIL; } else free_bit = NO_FREE_SPACE; if (NO_FREE_SPACE != free_bit) break; if ((hint = bml + BLKS_PER_LMAP) >= total_blks) /* if map is full, start at 1st blk in next map */ { /* wrap - second one should force an extend for sure */ hint = 1; if (hint_cycled) hint_cycled = (hint_limit < hint_cycled) ? hint_limit: 0; } if ((0 == depth) && (FALSE != cs_addrs->now_crit)) /* if it's from the cw_set, its state is murky */ bit_clear(bml / BLKS_PER_LMAP, MM_ADDR(cs_data)); /* if crit, repair master map error */ } /* If not in the final retry, it is possible that free_bit is >= map_size (e.g. if bitmap block gets recycled). */ if (map_size <= (uint4)free_bit && CDB_STAGNATE <= t_tries) { /* bad free bit */ assert((NO_FREE_SPACE == free_bit) && (lcnt > local_maps)); /* All maps full, should have extended */ GTMASSERT; } if (0 != depth) { b_ptr = (block_id_ptr_t)(cs1->upd_addr); b_ptr += cs1->reference_cnt++; *b_ptr = free_bit; } else { space_needed = (BLKS_PER_LMAP + 1) * sizeof(block_id); if (dollar_tlevel) { ENSURE_UPDATE_ARRAY_SPACE(space_needed); /* have brackets for "if" for macros */ } BLK_ADDR(b_ptr, space_needed, block_id); memset(b_ptr, 0, space_needed); *b_ptr = free_bit; blkhist.blk_num = bml; blkhist.buffaddr = bmp; /* cycle and cr have already been assigned from t_qread */ t_write_map(&blkhist, (uchar_ptr_t)b_ptr, ctn, 1); /* last parameter 1 is what cs->reference_cnt gets set to */ } return bml + free_bit; }
void mupip_upgrade(void) { bool rbno; unsigned char *upgrd_buff[2], upgrd_label[GDS_LABEL_SZ]="UPGRADE0304"; char fn[256]; char answer[4]; unsigned short fn_len; int4 fd, save_errno, old_hdr_size, new_hdr_size, status, bufsize, dsize, datasize[2]; int4 old_hdr_size_vbn, new_hdr_size_vbn; int fstat_res; off_t last_full_grp_startoff, old_file_len, old_file_len2, read_off, write_off, old_start_vbn_off; block_id last_full_grp_startblk; v3_sgmnt_data old_head_data, *old_head; sgmnt_data new_head_data, *new_head; struct stat stat_buf; error_def(ERR_MUNODBNAME); error_def(ERR_MUNOUPGRD); error_def(ERR_DBOPNERR); error_def(ERR_DBRDONLY); error_def(ERR_DBFILOPERR); error_def(ERR_DBPREMATEOF); ESTABLISH(mupip_upgrade_ch); fn_len = sizeof(fn); if (!cli_get_str("FILE", fn, &fn_len)) rts_error(VARLSTCNT(1) ERR_MUNODBNAME); if (!(mupip_upgrade_standalone(fn, &upgrade_standalone_sems))) rts_error(VARLSTCNT(1) ERR_MUNOUPGRD); if (-1 == (fd = OPEN(fn, O_RDWR))) { save_errno = errno; if (-1 != (fd = OPEN(fn, O_RDONLY))) { util_out_print("Cannot update read-only database.", FLUSH); rts_error(VARLSTCNT(5) ERR_DBRDONLY, 2, fn_len, fn, errno); } rts_error(VARLSTCNT(5) ERR_DBRDONLY, 2, fn_len, fn, save_errno); } /* Confirm before proceed */ if (!mu_upgrd_confirmed(TRUE)) { util_out_print("Upgrade canceled by user", FLUSH); rts_error(VARLSTCNT(1) ERR_MUNOUPGRD); } util_out_print("Do not interrupt to avoid damage in database!!", FLUSH); util_out_print("Mupip upgrade started ...!/", FLUSH); mu_upgrd_sig_init(); /* get file status */ FSTAT_FILE(fd, &stat_buf, fstat_res); if (-1 == fstat_res) rts_error(VARLSTCNT(5) ERR_DBOPNERR, 2, fn_len, fn, errno); old_file_len = stat_buf.st_size; /* Prepare v3.x file header buffer */ old_hdr_size = sizeof(*old_head); old_head = &old_head_data; /* Prepare v4.x file header buffer */ new_hdr_size = sizeof(*new_head); new_head = &new_head_data; memset(new_head, 0, new_hdr_size); old_hdr_size_vbn = DIVIDE_ROUND_UP(old_hdr_size, DISK_BLOCK_SIZE); new_hdr_size_vbn = DIVIDE_ROUND_UP(new_hdr_size, DISK_BLOCK_SIZE); /* READ header from V3.x file */ LSEEKREAD(fd, 0, old_head, old_hdr_size, status); if (0 != status) if (-1 == status) rts_error(VARLSTCNT(4) ERR_DBPREMATEOF, 2, fn_len, fn); else rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status); /* Check version */ if (memcmp(&old_head->label[0], GDS_LABEL, GDS_LABEL_SZ - 1)) { if (memcmp(&old_head->label[0], GDS_LABEL, GDS_LABEL_SZ - 3)) { /* it is not a GTM database */ close(fd); util_out_print("File !AD is not a GT.M database.!/", FLUSH, fn_len, fn); rts_error(VARLSTCNT(1) ERR_MUNOUPGRD); }else { /* it is GTM database */ /* is it not v3.x database? */ if (memcmp(&old_head->label[GDS_LABEL_SZ - 3],GDS_V30,2) !=0 && memcmp(&old_head->label[GDS_LABEL_SZ - 3],GDS_ALT_V30,2) != 0) { close(fd); util_out_print("File !AD has an unrecognized database version!/", FLUSH, fn_len, fn); rts_error(VARLSTCNT(1) ERR_MUNOUPGRD); } } } else { /* Note: We assume that if the V4.x header and current GT.M file header * has same field names, they are at same offset */ /* READ the header from file again as V4.x header */ LSEEKREAD(fd, 0, new_head, new_hdr_size, status); if (0 != status) if (-1 != status) rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status); else rts_error(VARLSTCNT(4) ERR_DBPREMATEOF, 2, fn_len, fn); if (QWNE(new_head->reg_seqno, seq_num_zero) || QWNE(new_head->resync_seqno, seq_num_zero) || (new_head->resync_tn != 0) || new_head->repl_state != repl_closed) { util_out_print("!AD might already have been upgraded", FLUSH, fn_len, fn); util_out_print("Do you wish to continue with the upgrade? [y/n] ", FLUSH); SCANF("%s", answer); if (answer[0] != 'y' && answer[0] != 'Y') { close(fd); util_out_print("Upgrade canceled by user", FLUSH); rts_error(VARLSTCNT(1) ERR_MUNOUPGRD); } } init_replication(new_head); new_head->max_update_array_size = new_head->max_non_bm_update_array_size = ROUND_UP2(MAX_NON_BITMAP_UPDATE_ARRAY_SIZE(new_head), UPDATE_ARRAY_ALIGN_SIZE); new_head->max_update_array_size += ROUND_UP2(MAX_BITMAP_UPDATE_ARRAY_SIZE, UPDATE_ARRAY_ALIGN_SIZE); new_head->mutex_spin_parms.mutex_hard_spin_count = MUTEX_HARD_SPIN_COUNT; new_head->mutex_spin_parms.mutex_sleep_spin_count = MUTEX_SLEEP_SPIN_COUNT; new_head->mutex_spin_parms.mutex_spin_sleep_mask = MUTEX_SPIN_SLEEP_MASK; new_head->semid = INVALID_SEMID; new_head->shmid = INVALID_SHMID; if (JNL_ALLOWED(new_head)) { /* Following 3 are new fields starting from V43001. * Initialize them appropriately. */ new_head->epoch_interval = DEFAULT_EPOCH_INTERVAL; new_head->alignsize = DISK_BLOCK_SIZE * JNL_DEF_ALIGNSIZE; if (!new_head->jnl_alq) new_head->jnl_alq = JNL_ALLOC_DEF; /* note new_head->jnl_deq is carried over without any change even if it is zero since a zero * jnl file extension size is supported starting V43001 */ new_head->autoswitchlimit = ALIGNED_ROUND_DOWN(JNL_ALLOC_MAX, new_head->jnl_alq, new_head->jnl_deq); /* following field is assumed as non-zero by set_jnl_info starting V43001A */ if (JNL_ALLOWED(new_head) && !new_head->jnl_buffer_size) new_head->jnl_buffer_size = JNL_BUFFER_DEF; } else { new_head->epoch_interval = 0; new_head->alignsize = 0; new_head->autoswitchlimit = 0; } new_head->yield_lmt = DEFAULT_YIELD_LIMIT; /* writing header */ LSEEKWRITE(fd, 0, new_head, new_hdr_size, status); if (0 != status) rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status); close(fd); util_out_print("File !AD successfully upgraded.!/", FLUSH, fn_len, fn); if (0 != sem_rmid(upgrade_standalone_sems)) { util_out_print("Error with sem_rmid : %d [0x%x]", TRUE, upgrade_standalone_sems, upgrade_standalone_sems); rts_error(VARLSTCNT(1) ERR_MUNOUPGRD); } mupip_exit(SS_NORMAL); } util_out_print("Old header size: !SL", FLUSH, old_hdr_size); util_out_print("New header size: !SL", FLUSH, new_hdr_size); if (old_head->createinprogress) { close(fd); util_out_print("Database creation in progress on file !AD.!/", FLUSH, fn_len, fn); rts_error(VARLSTCNT(1) ERR_MUNOUPGRD); } if (old_head->file_corrupt) { close(fd); util_out_print("Database !AD is corrupted.!/", FLUSH, fn_len, fn); rts_error(VARLSTCNT(1) ERR_MUNOUPGRD); } if ((((off_t)old_head->start_vbn - 1) * DISK_BLOCK_SIZE + (off_t)old_head->trans_hist.total_blks * old_head->blk_size + (off_t)DISK_BLOCK_SIZE != old_file_len) && (((off_t)old_head->start_vbn - 1) * DISK_BLOCK_SIZE + (off_t)old_head->trans_hist.total_blks * old_head->blk_size + (off_t)old_head->blk_size != old_file_len)) { util_out_print("Incorrect start_vbn !SL or, block size !SL or, total blocks !SL", FLUSH, old_head->start_vbn, old_head->blk_size, old_head->trans_hist.total_blks); rts_error(VARLSTCNT(1) ERR_MUNOUPGRD); } if (ROUND_DOWN(old_head->blk_size, DISK_BLOCK_SIZE) != old_head->blk_size) { util_out_print("Database block size !SL is not divisible by DISK_BLOCK_SIZE", FLUSH, old_head->blk_size); rts_error(VARLSTCNT(1) ERR_MUNOUPGRD); } mu_upgrd_header(old_head, new_head); /* Update header from v3.x to v4.x */ new_head->start_vbn = new_hdr_size_vbn + 1; new_head->free_space = 0; new_head->wc_blocked_t_end_hist.evnt_cnt = old_head->wc_blocked_t_end_hist2.evnt_cnt; new_head->wc_blocked_t_end_hist.evnt_tn = old_head->wc_blocked_t_end_hist2.evnt_tn; init_replication(new_head); /* A simple way of doing mupip upgrade is to move all the data after file header towards the eof to make space and write down the header. This does not need any computation or, change in data/index blocks. This is a slow process because it has mainly I/O, though no manipulation of database structures. or index blocks. This is okay for small database. A time efficient way is to physically move second group of BLKS_PER_LMAP number of blocks towards the eof and move first group of BLKS_PER_LMAP number of blocks in place of 2nd group. Finally adjust all indices to point to the blocks correctly. Also adjust master bit map. (note: we cannot move first group from the beginning). Detail algorithm as follows: --------------------------- // Allocate two buffers each to hold one group of data. Read v3.x header and upgrade to v4.x if file is big enough read group 1 in buff[0] read_off = offset of starting block of 2nd group. read group 2 in buff[1] write buff[0] at offset read_off last_full_grp_startblk = points to the block where 2nd group of 512 blocks of old file will be written back. //Instead of searching for a free group we will write at the last full group //Say, we have 3000 blocks. last_full_grp_startblk = 2048 // (not 2560, because it is not full) //All data from that point upto eof will be read and saved in buffer read all remaining data from the point last_full_grp_startblk upto eof in buff[0] write buff[1] at the point of last_full_grp_startblk Now write buff[0] at the end of last write //Graphical Example: Each letter corresponds to a group of 512 blocks where first block // is local bit map. Last group U may be a group of less than 512 blocks. // Extend towards right -------------------------------------------------------> // old permutation: [v3 head] A B C D E F G H I J K L M N O P Q R S T U // new permutation: [v4 head ] A C D E F G H I J K L M N O P Q R S T B U Finally traverse the tree and adjust block pointers Adjust master map write new v4.x header at bof else bufsize = size of data for a group rbno = 0 // read buffer no. This switches between 0 and 1 read_off = 0 write_off = 0 upgrd_buff[rbno] = new header data_size[rbno] = new header size rbno = INVERT(rbno); do while not eof data_size[rbno] = MIN(bufsize, remaining_data_size) Read data of size data_size[rbno] in upgrd_buff[rbno] and adjust read_off rbno = INVERT(rbno); Write upgrd_buff[rbno] of datasize[rbno] at write_off and increase write_off Enddo rbno = INVERT(rbno) Write upgrd_buff[rbno] of datasize[rbno] at write_off endif */ bufsize = old_head->blk_size * BLKS_PER_LMAP; upgrd_buff[0] = (unsigned char*) malloc(bufsize); upgrd_buff[1] = (unsigned char*) malloc(bufsize); read_off = old_start_vbn_off = (off_t)(old_head->start_vbn - 1) * DISK_BLOCK_SIZE; /* start vbn offset in bytes */ last_full_grp_startblk = ROUND_DOWN(new_head->trans_hist.total_blks, BLKS_PER_LMAP); /* in block_id */ last_full_grp_startoff = old_start_vbn_off + (off_t)last_full_grp_startblk * new_head->blk_size; /* offset in bytes */ /* this calculation is used because some 3.2x database has GDS blk_size bytes at the end instead of DISK_BLOCK_SIZE bytes. */ old_file_len2 = old_head->start_vbn * DISK_BLOCK_SIZE + (off_t)old_head->blk_size * old_head->trans_hist.total_blks; /* Change Label to a temporary dummy value, so that other GTM process does not come while doing upgrade and corrupts database */ LSEEKWRITE(fd, 0, upgrd_label, GDS_LABEL_SZ - 1, status); if (0 != status) rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status); if (old_head->trans_hist.total_blks > BLKS_PER_LMAP * 2) { /* recalculate start_vbn and free space, because there will be a gap after header */ new_head->start_vbn = old_head->start_vbn + bufsize / DISK_BLOCK_SIZE; new_head->free_space = bufsize - (new_hdr_size_vbn - old_hdr_size_vbn) * DISK_BLOCK_SIZE; util_out_print("New starting VBN is: !SL !/", FLUSH, new_head->start_vbn); /* read 1st group of blocks */ LSEEKREAD(fd, read_off, upgrd_buff[0], bufsize, status); if (0 != status) if (-1 == status) rts_error(VARLSTCNT(4) ERR_DBPREMATEOF, 2, fn_len, fn); else rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status); read_off = read_off + bufsize; /* read 2nd group of blocks */ LSEEKREAD(fd, read_off, upgrd_buff[1], bufsize, status); if (0 != status) if (-1 == status) rts_error(VARLSTCNT(4) ERR_DBPREMATEOF, 2, fn_len, fn); else rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status); /* write 1st group of blocks in place of 2nd group */ write_off = old_start_vbn_off + bufsize; LSEEKWRITE(fd, write_off, upgrd_buff[0], bufsize, status); if (0 != status) rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status); /* read last group (# of blks <= BLKS_PER_LMAP) */ dsize = old_file_len2 - last_full_grp_startoff; assert (dsize <= bufsize); LSEEKREAD(fd, last_full_grp_startoff, upgrd_buff[0], dsize, status); if (0 != status) if (-1 == status) rts_error(VARLSTCNT(4) ERR_DBPREMATEOF, 2, fn_len, fn); else rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status); /* write 2nd group of blocks */ LSEEKWRITE(fd, last_full_grp_startoff, upgrd_buff[1], bufsize, status); if (0 != status) rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status); /* write last group read from old file */ LSEEKWRITE(fd, last_full_grp_startoff + bufsize, upgrd_buff[0], dsize, status); if (0 != status) rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status); util_out_print("Please wait while index is being adjusted...!/", FLUSH); mu_upgrd_adjust_blkptr(1L, TRUE, new_head, fd, fn, fn_len); mu_upgrd_adjust_mm(new_head->master_map, DIVIDE_ROUND_UP(new_head->trans_hist.total_blks+1,BLKS_PER_LMAP)); /* writing header */ LSEEKWRITE(fd, 0, new_head, new_hdr_size, status); if (0 != status) rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status); } else /* very small database */ { rbno = 0; write_off = 0; datasize[rbno] = new_hdr_size; memcpy(upgrd_buff[0], new_head, new_hdr_size); rbno = INVERT(rbno); while(read_off < old_file_len2) { datasize[rbno] = MIN (old_file_len2 - read_off, bufsize); LSEEKREAD(fd, read_off, upgrd_buff[rbno], datasize[rbno], status); if (0 != status) if (-1 == status) rts_error(VARLSTCNT(4) ERR_DBPREMATEOF, 2, fn_len, fn); else rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status); read_off += datasize[rbno]; rbno = INVERT(rbno); LSEEKWRITE(fd, write_off, upgrd_buff[rbno], datasize[rbno], status); if (0 != status) rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status); write_off+= datasize[rbno]; } rbno = INVERT(rbno); LSEEKWRITE(fd, write_off, upgrd_buff[rbno], datasize[rbno], status); if (0 != status) rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status); } /* end if small database */ free(upgrd_buff[0]); free(upgrd_buff[1]); close(fd); util_out_print("File !AD successfully upgraded.!/", FLUSH, fn_len, fn); REVERT; if (0 != sem_rmid(upgrade_standalone_sems)) { util_out_print("Error with sem_rmid : %d [0x%x]", TRUE, upgrade_standalone_sems, upgrade_standalone_sems); rts_error(VARLSTCNT(1) ERR_MUNOUPGRD); } mupip_exit(SS_NORMAL); }
errval_t flounder_stub_lmp_send_buf(struct lmp_chan *chan, lmp_send_flags_t flags, const void *bufp, size_t len, size_t *pos) { errval_t err; const uint8_t *buf = bufp; do { // compute number of words for this message size_t msg_words = DIVIDE_ROUND_UP(len - *pos, sizeof(uintptr_t)); if (*pos == 0) { // space for header msg_words += 1; } if (msg_words > LMP_MSG_LENGTH) msg_words = LMP_MSG_LENGTH; // store initial position for retry size_t restartpos = *pos; // is this the start of the string? uintptr_t w1; if (*pos == 0) { // if so, send the length in the first word w1 = len; } else { // otherwise use it for payload w1 = getword(buf, pos, len); } // get the rest of the message, painfully #if LMP_MSG_LENGTH > 1 uintptr_t w2 = getword(buf, pos, len); #endif #if LMP_MSG_LENGTH > 2 uintptr_t w3 = getword(buf, pos, len); #endif #if LMP_MSG_LENGTH > 3 uintptr_t w4 = getword(buf, pos, len); #endif #if LMP_MSG_LENGTH > 4 uintptr_t w5 = getword(buf, pos, len); #endif #if LMP_MSG_LENGTH > 5 uintptr_t w6 = getword(buf, pos, len); #endif #if LMP_MSG_LENGTH > 6 uintptr_t w7 = getword(buf, pos, len); #endif #if LMP_MSG_LENGTH > 7 uintptr_t w8 = getword(buf, pos, len); #endif #if LMP_MSG_LENGTH > 8 uintptr_t w9 = getword(buf, pos, len); #endif #if LMP_MSG_LENGTH > 9 uintptr_t w10 = getword(buf, pos, len); #endif #if LMP_MSG_LENGTH > 10 #error Need to unroll message send loop further #endif // only set the sync flag if this is the last fragment lmp_send_flags_t f = flags; if (*pos < len) { f &= ~LMP_FLAG_SYNC; } // try to send err = lmp_chan_send(chan, f, NULL_CAP, msg_words, w1 #if LMP_MSG_LENGTH > 1 , w2 #endif #if LMP_MSG_LENGTH > 2 , w3 #endif #if LMP_MSG_LENGTH > 3 , w4 #endif #if LMP_MSG_LENGTH > 4 , w5 #endif #if LMP_MSG_LENGTH > 5 , w6 #endif #if LMP_MSG_LENGTH > 6 , w7 #endif #if LMP_MSG_LENGTH > 7 , w8 #endif #if LMP_MSG_LENGTH > 8 , w9 #endif #if LMP_MSG_LENGTH > 9 , w10 #endif ); if (err_is_fail(err)) { *pos = restartpos; } } while (err_is_ok(err) && *pos < len); // do we need to send more? if not, zero out our state for the next send if (*pos >= len) { *pos = 0; } return err; }
void dse_maps(void) { block_id blk, bml_blk; blk_segment *bs1, *bs_ptr; int4 blk_seg_cnt, blk_size; /* needed for BLK_INIT, BLK_SEG and BLK_FINI macros */ sm_uc_ptr_t bp; char util_buff[MAX_UTIL_LEN]; int4 bml_size, bml_list_size, blk_index, bml_index; int4 total_blks, blks_in_bitmap; int4 bplmap, dummy_int; unsigned char *bml_list; cache_rec_ptr_t cr, dummy_cr; bt_rec_ptr_t btr; int util_len; uchar_ptr_t blk_ptr; boolean_t was_crit; uint4 jnl_status; srch_blk_status blkhist; jnl_private_control *jpc; jnl_buffer_ptr_t jbp; sgmnt_addrs *csa; sgmnt_data_ptr_t csd; if (CLI_PRESENT == cli_present("BUSY") || CLI_PRESENT == cli_present("FREE") || CLI_PRESENT == cli_present("MASTER") || CLI_PRESENT == cli_present("RESTORE_ALL")) { if (gv_cur_region->read_only) rts_error(VARLSTCNT(4) ERR_DBRDONLY, 2, DB_LEN_STR(gv_cur_region)); } CHECK_AND_RESET_UPDATE_ARRAY; /* reset update_array_ptr to update_array */ csa = cs_addrs; assert(&FILE_INFO(gv_cur_region)->s_addrs == csa); was_crit = csa->now_crit; if (csa->critical) crash_count = csa->critical->crashcnt; csd = csa->hdr; bplmap = csd->bplmap; if (CLI_PRESENT == cli_present("BLOCK")) { if (!cli_get_hex("BLOCK", (uint4 *)&blk)) return; if (blk < 0 || blk >= csa->ti->total_blks) { util_out_print("Error: invalid block number.", TRUE); return; } patch_curr_blk = blk; } else blk = patch_curr_blk; if (CLI_PRESENT == cli_present("FREE")) { if (0 == bplmap) { util_out_print("Cannot perform map updates: bplmap field of file header is zero.", TRUE); return; } if (blk / bplmap * bplmap == blk) { util_out_print("Cannot perform action on a map block.", TRUE); return; } bml_blk = blk / bplmap * bplmap; bm_setmap(bml_blk, blk, FALSE); return; } if (CLI_PRESENT == cli_present("BUSY")) { if (0 == bplmap) { util_out_print("Cannot perform map updates: bplmap field of file header is zero.", TRUE); return; } if (blk / bplmap * bplmap == blk) { util_out_print("Cannot perform action on a map block.", TRUE); return; } bml_blk = blk / bplmap * bplmap; bm_setmap(bml_blk, blk, TRUE); return; } blk_size = csd->blk_size; if (CLI_PRESENT == cli_present("MASTER")) { if (0 == bplmap) { util_out_print("Cannot perform maps updates: bplmap field of file header is zero.", TRUE); return; } if (!was_crit) grab_crit(gv_cur_region); bml_blk = blk / bplmap * bplmap; if (dba_mm == csd->acc_meth) bp = MM_BASE_ADDR(csa) + (off_t)bml_blk * blk_size; else { assert(dba_bg == csd->acc_meth); if (!(bp = t_qread(bml_blk, &dummy_int, &dummy_cr))) rts_error(VARLSTCNT(1) ERR_DSEBLKRDFAIL); } if ((csa->ti->total_blks / bplmap) * bplmap == bml_blk) total_blks = (csa->ti->total_blks - bml_blk); else total_blks = bplmap; if (NO_FREE_SPACE == bml_find_free(0, bp + SIZEOF(blk_hdr), total_blks)) bit_clear(bml_blk / bplmap, csa->bmm); else bit_set(bml_blk / bplmap, csa->bmm); if (bml_blk > csa->nl->highest_lbm_blk_changed) csa->nl->highest_lbm_blk_changed = bml_blk; if (!was_crit) rel_crit(gv_cur_region); return; } if (CLI_PRESENT == cli_present("RESTORE_ALL")) { if (0 == bplmap) { util_out_print("Cannot perform maps updates: bplmap field of file header is zero.", TRUE); return; } total_blks = csa->ti->total_blks; assert(ROUND_DOWN2(blk_size, 2 * SIZEOF(int4)) == blk_size); bml_size = BM_SIZE(bplmap); bml_list_size = (total_blks + bplmap - 1) / bplmap * bml_size; bml_list = (unsigned char *)malloc(bml_list_size); for (blk_index = 0, bml_index = 0; blk_index < total_blks; blk_index += bplmap, bml_index++) bml_newmap((blk_hdr_ptr_t)(bml_list + bml_index * bml_size), bml_size, csa->ti->curr_tn); if (!was_crit) { grab_crit(gv_cur_region); csa->hold_onto_crit = TRUE; /* need to do this AFTER grab_crit */ } blk = get_dir_root(); assert(blk < bplmap); csa->ti->free_blocks = total_blks - DIVIDE_ROUND_UP(total_blks, bplmap); bml_busy(blk, bml_list + SIZEOF(blk_hdr)); csa->ti->free_blocks = csa->ti->free_blocks - 1; dse_m_rest(blk, bml_list, bml_size, &csa->ti->free_blocks, TRUE); for (blk_index = 0, bml_index = 0; blk_index < total_blks; blk_index += bplmap, bml_index++) { t_begin_crit(ERR_DSEFAIL); CHECK_TN(csa, csd, csd->trans_hist.curr_tn); /* can issue rts_error TNTOOLARGE */ CWS_RESET; CHECK_AND_RESET_UPDATE_ARRAY; /* reset update_array_ptr to update_array */ assert(csa->ti->early_tn == csa->ti->curr_tn); blk_ptr = bml_list + bml_index * bml_size; blkhist.blk_num = blk_index; if (!(blkhist.buffaddr = t_qread(blkhist.blk_num, &blkhist.cycle, &blkhist.cr))) rts_error(VARLSTCNT(1) ERR_DSEBLKRDFAIL); BLK_INIT(bs_ptr, bs1); BLK_SEG(bs_ptr, blk_ptr + SIZEOF(blk_hdr), bml_size - SIZEOF(blk_hdr)); BLK_FINI(bs_ptr, bs1); t_write(&blkhist, (unsigned char *)bs1, 0, 0, LCL_MAP_LEVL, TRUE, FALSE, GDS_WRITE_KILLTN); BUILD_AIMG_IF_JNL_ENABLED(csd, csa->ti->curr_tn); t_end(&dummy_hist, NULL, csa->ti->curr_tn); } /* Fill in master map */ for (blk_index = 0, bml_index = 0; blk_index < total_blks; blk_index += bplmap, bml_index++) { blks_in_bitmap = (blk_index + bplmap <= total_blks) ? bplmap : total_blks - blk_index; assert(1 < blks_in_bitmap); /* the last valid block in the database should never be a bitmap block */ if (NO_FREE_SPACE != bml_find_free(0, (bml_list + bml_index * bml_size) + SIZEOF(blk_hdr), blks_in_bitmap)) bit_set(blk_index / bplmap, csa->bmm); else bit_clear(blk_index / bplmap, csa->bmm); if (blk_index > csa->nl->highest_lbm_blk_changed) csa->nl->highest_lbm_blk_changed = blk_index; } if (!was_crit) { csa->hold_onto_crit = FALSE; /* need to do this before the rel_crit */ rel_crit(gv_cur_region); } if (unhandled_stale_timer_pop) process_deferred_stale(); free(bml_list); csd->kill_in_prog = csd->abandoned_kills = 0; return; } MEMCPY_LIT(util_buff, "!/Block "); util_len = SIZEOF("!/Block ") - 1; util_len += i2hex_nofill(blk, (uchar_ptr_t)&util_buff[util_len], 8); memcpy(&util_buff[util_len], " is marked !AD in its local bit map.!/", SIZEOF(" is marked !AD in its local bit map.!/") - 1); util_len += SIZEOF(" is marked !AD in its local bit map.!/") - 1; util_buff[util_len] = 0; if (!was_crit) grab_crit(gv_cur_region); util_out_print(util_buff, TRUE, 4, dse_is_blk_free(blk, &dummy_int, &dummy_cr) ? "free" : "busy"); if (!was_crit) rel_crit(gv_cur_region); return; }
uint4 mur_block_count_correct(reg_ctl_list *rctl) { gtm_uint64_t native_size, size; sgmnt_data_ptr_t mu_data; int4 mu_int_ovrhd; uint4 total_blks; uint4 status; uint4 new_bit_maps, bplmap, new_blocks, tmpcnt; enum db_acc_method acc_meth; MUR_CHANGE_REG(rctl); mu_data = cs_data; acc_meth = mu_data->acc_meth; switch (acc_meth) { case dba_bg: case dba_mm: mu_int_ovrhd = (int4)DIVIDE_ROUND_UP(SIZEOF_FILE_HDR(mu_data) + mu_data->free_space, DISK_BLOCK_SIZE); break; default: assertpro(FALSE && acc_meth); } mu_int_ovrhd += 1; assert(mu_int_ovrhd == mu_data->start_vbn); size = mu_int_ovrhd + (off_t)(mu_data->blk_size / DISK_BLOCK_SIZE) * mu_data->trans_hist.total_blks; native_size = gds_file_size(gv_cur_region->dyn.addr->file_cntl); /* In the following tests, the EOF block should always be 1 greater than the actual size of the file. * This is due to the GDS being allocated in even DISK_BLOCK_SIZE-byte blocks. */ if (native_size && (size < native_size)) { total_blks = (dba_mm == acc_meth) ? cs_addrs->total_blks : cs_addrs->ti->total_blks; if (JNL_ENABLED(cs_addrs)) cs_addrs->jnl->pini_addr = 0; /* Stop simulation of GTM process journal record writing (if any active)*/ /* If journaling, gdsfilext will need to write an inctn record. The timestamp of that journal record will * need to be adjusted to the current system time to reflect that it is recovery itself writing that record * instead of simulating GT.M activity. Since the variable jgbl.dont_reset_gbl_jrec_time is still set, gdsfilext * will NOT modify jgbl.gbl_jrec_time. Temporarily reset it to allow for adjustments to gbl_jrec_time. */ assert(jgbl.dont_reset_gbl_jrec_time); jgbl.dont_reset_gbl_jrec_time = FALSE; /* Calculate the number of blocks to add based on the difference between the real file size and the file size * computed from the header->total_blks. Takes into account that gdsfilext() will automatically add new_bit_maps * to the amount of blocks we request. */ bplmap = cs_data->bplmap; new_blocks = (native_size - size)/(mu_data->blk_size / DISK_BLOCK_SIZE); new_bit_maps = DIVIDE_ROUND_UP(total_blks + new_blocks, bplmap) - DIVIDE_ROUND_UP(total_blks, bplmap); tmpcnt = new_blocks - new_bit_maps; /* Call GDSFILEXT only if the no of blocks by which DB needs to be extended is not '0' since GDSFILEXT() treats * extension by count 0 as unavailability of space(NO_FREE_SPACE error). And in the following case, tmpcnt could * be '0' on AIX because in MM mode AIX increases the native_size to the nearest multiple of OS_PAGE_SIZE. * And this increase could be less than GT.M block size.*/ if (tmpcnt && SS_NORMAL != (status = GDSFILEXT(new_blocks - new_bit_maps, total_blks, TRANS_IN_PROG_FALSE))) { jgbl.dont_reset_gbl_jrec_time = TRUE; return (status); } jgbl.dont_reset_gbl_jrec_time = TRUE; # ifdef DEBUG /* Check that the filesize and blockcount in the fileheader match now after the extend */ size = mu_int_ovrhd + (off_t)(mu_data->blk_size / DISK_BLOCK_SIZE) * mu_data->trans_hist.total_blks; native_size = gds_file_size(gv_cur_region->dyn.addr->file_cntl); ALIGN_DBFILE_SIZE_IF_NEEDED(size, native_size); assert(size == native_size); # endif } return SS_NORMAL; }
uint4 jnl_file_extend(jnl_private_control *jpc, uint4 total_jnl_rec_size) { file_control *fc; boolean_t need_extend; jnl_buffer_ptr_t jb; jnl_create_info jnl_info; jnl_file_header *header; unsigned char hdr_buff[REAL_JNL_HDR_LEN + MAX_IO_BLOCK_SIZE]; uint4 new_alq; sgmnt_addrs *csa; sgmnt_data_ptr_t csd; char prev_jnl_fn[JNL_NAME_SIZE]; uint4 jnl_status = 0, status; int new_blocks, warn_blocks, result; gtm_uint64_t avail_blocks; uint4 aligned_tot_jrec_size, count; uint4 jnl_fs_block_size, read_write_size; DCL_THREADGBL_ACCESS; switch(jpc->region->dyn.addr->acc_meth) { case dba_mm: case dba_bg: csa = &FILE_INFO(jpc->region)->s_addrs; break; default: GTMASSERT; } csd = csa->hdr; assert(csa == cs_addrs && csd == cs_data); assert(csa->now_crit || (csd->clustered && (CCST_CLOSED == csa->nl->ccp_state))); assert(&FILE_INFO(jpc->region)->s_addrs == csa); assert(csa->jnl_state == csd->jnl_state); assertpro(JNL_ENABLED(csa) && (NOJNL != jpc->channel) && (!JNL_FILE_SWITCHED(jpc))); /* crit and messing with the journal file - how could it have vanished? */ if (!csd->jnl_deq || (csd->jnl_alq + csd->jnl_deq > csd->autoswitchlimit)) { assert(DIVIDE_ROUND_UP(total_jnl_rec_size, DISK_BLOCK_SIZE) <= csd->jnl_alq); assert(csd->jnl_alq == csd->autoswitchlimit); new_blocks = csd->jnl_alq; } else /* May cause extension of csd->jnl_deq * n blocks where n > 0 */ new_blocks = ROUND_UP(DIVIDE_ROUND_UP(total_jnl_rec_size, DISK_BLOCK_SIZE), csd->jnl_deq); jpc->status = SS_NORMAL; jb = jpc->jnl_buff; assert(0 <= new_blocks); DEBUG_ONLY(count = 0); for (need_extend = (0 != new_blocks); need_extend; ) { DEBUG_ONLY(count++); /* usually we will do the loop just once where we do the file extension. * rarely we might need to do an autoswitch instead after which again rarely * we might need to do an extension on the new journal to fit in the transaction's journal requirements. * therefore we should do this loop a maximum of twice. hence the assert below. */ assert(count <= 2); need_extend = FALSE; if (SS_NORMAL == (status = disk_block_available(jpc->channel, &avail_blocks, TRUE))) { warn_blocks = (csd->jnl_alq + csd->jnl_deq > csd->autoswitchlimit) ? ((csd->jnl_deq > csd->autoswitchlimit) ? csd->jnl_deq : csd->autoswitchlimit) : new_blocks; if ((warn_blocks * EXTEND_WARNING_FACTOR) > avail_blocks) { if (new_blocks > avail_blocks) { /* If we cannot satisfy the request, it is an error, unless the anticipatory freeze * scheme is in effect in which case, we will assume space is available even if * it is not and go ahead with writes to the disk. If the writes fail with ENOSPC * we will freeze the instance and wait for space to become available and keep * retrying the writes. Therefore, we make the NOSPACEEXT a warning in this case. */ SETUP_THREADGBL_ACCESS; if (!ANTICIPATORY_FREEZE_ENABLED(csa)) { send_msg(VARLSTCNT(6) ERR_NOSPACEEXT, 4, JNL_LEN_STR(csd), new_blocks, avail_blocks); new_blocks = 0; jpc->status = SS_NORMAL; break; } else send_msg(VARLSTCNT(6) MAKE_MSG_WARNING(ERR_NOSPACEEXT), 4, JNL_LEN_STR(csd), new_blocks, avail_blocks); } else send_msg(VARLSTCNT(5) ERR_DSKSPACEFLOW, 3, JNL_LEN_STR(csd), (avail_blocks - warn_blocks)); } } else send_msg(VARLSTCNT(5) ERR_JNLFILEXTERR, 2, JNL_LEN_STR(csd), status); new_alq = jb->filesize + new_blocks; /* ensure current journal file size is well within autoswitchlimit --> design constraint */ assert(csd->autoswitchlimit >= jb->filesize); if (csd->autoswitchlimit < (jb->filesize + (EXTEND_WARNING_FACTOR * new_blocks))) /* close to max */ send_msg(VARLSTCNT(5) ERR_JNLSPACELOW, 3, JNL_LEN_STR(csd), csd->autoswitchlimit - jb->filesize); if (csd->autoswitchlimit < new_alq) { /* Reached max, need to autoswitch */ /* Ensure new journal file can hold the entire current transaction's journal record requirements */ assert(csd->autoswitchlimit >= MAX_REQD_JNL_FILE_SIZE(total_jnl_rec_size)); memset(&jnl_info, 0, SIZEOF(jnl_info)); jnl_info.prev_jnl = &prev_jnl_fn[0]; set_jnl_info(gv_cur_region, &jnl_info); assert(JNL_ENABLED(csa) && (NOJNL != jpc->channel) && !(JNL_FILE_SWITCHED(jpc))); jnl_status = jnl_ensure_open(); if (0 == jnl_status) { /* flush the cache and jnl-buffer-contents to current journal file before * switching to a new journal. Set a global variable in_jnl_file_autoswitch * so jnl_write can know not to do the padding check. But because this is a global * variable, we also need to make sure it is reset in case of errors during the * autoswitch (or else calls to jnl_write after we are out of the autoswitch logic * will continue to incorrectly not do the padding check. Hence a condition handler. */ assert(!in_jnl_file_autoswitch); in_jnl_file_autoswitch = TRUE; /* Also make sure time is not changed. This way if "jnl_write" as part of writing a * journal record invokes jnl_file_extend, when the autoswitch is done and writing * of the parent jnl_write resumes, we want it to continue with the same timestamp * and not have to reset its time (non-trivial task) to reflect any changes since then. */ assert(!jgbl.save_dont_reset_gbl_jrec_time); jgbl.save_dont_reset_gbl_jrec_time = jgbl.dont_reset_gbl_jrec_time; jgbl.dont_reset_gbl_jrec_time = TRUE; /* Establish a condition handler so we reset a few global variables that have * temporarily been modified in case of errors inside wcs_flu/jnl_file_close. */ ESTABLISH_RET(jnl_file_autoswitch_ch, EXIT_ERR); /* It is possible we still have not written a PINI record in this journal file * (e.g. mupip extend saw the need to do jnl_file_extend inside jnl_write while * trying to write a PINI record). Write a PINI record in that case before closing * the journal file that way the EOF record will have a non-zero pini_addr. */ if (0 == jpc->pini_addr) jnl_put_jrt_pini(csa); wcs_flu(WCSFLU_FLUSH_HDR | WCSFLU_WRITE_EPOCH | WCSFLU_SPEEDUP_NOBEFORE); jnl_file_close(gv_cur_region, TRUE, TRUE); REVERT; in_jnl_file_autoswitch = FALSE; jgbl.dont_reset_gbl_jrec_time = jgbl.save_dont_reset_gbl_jrec_time; DEBUG_ONLY(jgbl.save_dont_reset_gbl_jrec_time = FALSE); assert((dba_mm == cs_data->acc_meth) || (csd == cs_data)); csd = cs_data; /* In MM, wcs_flu() can remap an extended DB, so reset csd to be sure */ } else { if (SS_NORMAL != jpc->status) rts_error(VARLSTCNT(7) jnl_status, 4, JNL_LEN_STR(csd), DB_LEN_STR(gv_cur_region), jpc->status); else rts_error(VARLSTCNT(6) jnl_status, 4, JNL_LEN_STR(csd), DB_LEN_STR(gv_cur_region)); } assert(!jgbl.forw_phase_recovery || (NULL != jgbl.mur_pini_addr_reset_fnptr)); assert(jgbl.forw_phase_recovery || (NULL == jgbl.mur_pini_addr_reset_fnptr)); if (NULL != jgbl.mur_pini_addr_reset_fnptr) (*jgbl.mur_pini_addr_reset_fnptr)(csa); assert(!jnl_info.no_rename); assert(!jnl_info.no_prev_link); if (EXIT_NRM == cre_jnl_file(&jnl_info)) { assert(0 == memcmp(csd->jnl_file_name, jnl_info.jnl, jnl_info.jnl_len)); assert(csd->jnl_file_name[jnl_info.jnl_len] == '\0'); assert(csd->jnl_file_len == jnl_info.jnl_len); assert(csd->jnl_buffer_size == jnl_info.buffer); assert(csd->jnl_alq == jnl_info.alloc); assert(csd->jnl_deq == jnl_info.extend); assert(csd->jnl_before_image == jnl_info.before_images); csd->jnl_checksum = jnl_info.checksum; csd->jnl_eovtn = csd->trans_hist.curr_tn; send_msg(VARLSTCNT(4) ERR_NEWJNLFILECREAT, 2, JNL_LEN_STR(csd)); fc = gv_cur_region->dyn.addr->file_cntl; fc->op = FC_WRITE; fc->op_buff = (sm_uc_ptr_t)csd; fc->op_len = SGMNT_HDR_LEN; fc->op_pos = 1; status = dbfilop(fc); if (SS_NORMAL != status) send_msg(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region), status); assert(JNL_ENABLED(csa)); /* call jnl_ensure_open instead of jnl_file_open to make sure jpc->pini_addr is set to 0 */ jnl_status = jnl_ensure_open(); /* sets jpc->status */ if (0 != jnl_status) { if (jpc->status) rts_error(VARLSTCNT(7) jnl_status, 4, JNL_LEN_STR(csd), DB_LEN_STR(gv_cur_region), jpc->status); else rts_error(VARLSTCNT(6) jnl_status, 4, JNL_LEN_STR(csd), DB_LEN_STR(gv_cur_region)); } assert(jb->filesize == csd->jnl_alq); if (csd->jnl_alq + csd->jnl_deq <= csd->autoswitchlimit) { aligned_tot_jrec_size = ALIGNED_ROUND_UP(MAX_REQD_JNL_FILE_SIZE(total_jnl_rec_size), csd->jnl_alq, csd->jnl_deq); if (aligned_tot_jrec_size > csd->jnl_alq) { /* need to extend more than initial allocation in the new journal file * to accommodate the current transaction. */ new_blocks = aligned_tot_jrec_size - csd->jnl_alq; assert(new_blocks); assert(0 == new_blocks % csd->jnl_deq); need_extend = TRUE; } } } else { send_msg(VARLSTCNT(4) ERR_JNLNOCREATE, 2, JNL_LEN_STR(csd)); jpc->status = ERR_JNLNOCREATE; new_blocks = -1; } } else { assert(!need_extend); /* ensure we won't go through the for loop again */ /* Virtually extend currently used journal file */ jnl_fs_block_size = jb->fs_block_size; header = (jnl_file_header *)(ROUND_UP2((uintszofptr_t)hdr_buff, jnl_fs_block_size)); read_write_size = ROUND_UP2(REAL_JNL_HDR_LEN, jnl_fs_block_size); assert((unsigned char *)header + read_write_size <= ARRAYTOP(hdr_buff)); DO_FILE_READ(jpc->channel, 0, header, read_write_size, jpc->status, jpc->status2); if (SS_NORMAL != jpc->status) { assert(FALSE); rts_error(VARLSTCNT(5) ERR_JNLRDERR, 2, JNL_LEN_STR(csd), jpc->status); } assert((header->virtual_size + new_blocks) == new_alq); jb->filesize = new_alq; /* Actually this is virtual file size blocks */ header->virtual_size = new_alq; JNL_DO_FILE_WRITE(csa, csd->jnl_file_name, jpc->channel, 0, header, read_write_size, jpc->status, jpc->status2); if (SS_NORMAL != jpc->status) { assert(FALSE); rts_error(VARLSTCNT(5) ERR_JNLWRERR, 2, JNL_LEN_STR(csd), jpc->status); } } if (0 >= new_blocks) break; } if (0 < new_blocks) { INCR_GVSTATS_COUNTER(csa, csa->nl, n_jnl_extends, 1); return EXIT_NRM; } jpc->status = ERR_JNLREADEOF; jnl_file_lost(jpc, ERR_JNLEXTEND); return EXIT_ERR; }
void cce_dbdump(void) { uint4 channel, status, flags, pid, real_size, req_size, size, addrs[2], sec_addrs[2]; int i, j, k, l; gds_file_id file; m_iosb stat_blk; sgmnt_data *sd; unsigned char mbuff[512], *c, *cptr, *ctop; $DESCRIPTOR(d_sec,mbuff); static readonly $DESCRIPTOR(d_cmd,"install lis/glo"); static readonly $DESCRIPTOR(d_mnam,"CCE$DBDUMPMBX"); static readonly $DESCRIPTOR(d_pnam,"CCE$DBDUMPPRC"); char filename[]="SYS$LOGIN:CCE_DBDUMP.DMP", buff[RECORD_SIZE], id_lab[]=" FILE ID:"; struct FAB fab; struct RAB rab; error_def(ERR_CCEDBDUMP); error_def(ERR_CCEDBNODUMP); util_out_open(0); status = sys$crembx(0, &channel, 512, 0, 0, PSL$C_USER, &d_mnam); if (status != SS$_NORMAL) sys$exit(status); flags = CLI$M_NOWAIT | CLI$M_NOLOGNAM; status = lib$spawn(&d_cmd, 0, &d_mnam, &flags, &d_pnam, &pid); if (status != SS$_NORMAL) { if (status == SS$_DUPLNAM) { util_out_print("Spawned process CCE$DBDUMPPRC already exists, cannot continue rundown",TRUE); } sys$exit(status); } /* the following guess at the dump file size is modeled on the calculation for a section */ size = DIVIDE_ROUND_UP((SIZEOF(sgmnt_data) + (WC_MAX_BUFFS + getprime(WC_MAX_BUFFS) + 1) * SIZEOF(bt_rec) + (DEF_LOCK_SIZE / OS_PAGELET_SIZE) + (WC_MAX_BUFFS + getprime(WC_MAX_BUFFS)) * SIZEOF(cache_rec) + SIZEOF(cache_que_heads)), OS_PAGELET_SIZE); size += EXTRA_SPACE; fab = cc$rms_fab; fab.fab$b_fac = FAB$M_PUT; fab.fab$l_fop = FAB$M_CBT | FAB$M_MXV | FAB$M_TEF; fab.fab$l_fna = filename; fab.fab$b_fns = SIZEOF(filename); fab.fab$b_rfm = FAB$C_FIX; fab.fab$w_mrs = RECORD_SIZE; fab.fab$w_deq = size; fab.fab$l_alq = size; switch (status = sys$create(&fab)) { case RMS$_NORMAL: case RMS$_CREATED: case RMS$_SUPERSEDE: case RMS$_FILEPURGED: break; default: util_out_print("Error: Cannot create dump file !AD.",TRUE,fab.fab$b_fns,fab.fab$l_fna); sys$exit(status); } rab = cc$rms_rab; rab.rab$l_fab = &fab; status = sys$connect(&rab); if (status != RMS$_NORMAL) { util_out_print("Error: Cannot connect to dump file !AD.",TRUE,fab.fab$b_fns,fab.fab$l_fna); sys$exit(status); } rab.rab$w_rsz = SIZEOF(buff); for (; ;) { status = sys$qiow (0, channel,IO$_READVBLK ,&stat_blk, 0, 0, mbuff, 512,0,0,0,0); if (status != SS$_NORMAL) { sys$exit(status); break; } if (stat_blk.status == SS$_ENDOFFILE) break; if (stat_blk.status != SS$_NORMAL) { sys$exit(stat_blk.status); break; } if (!memcmp("GT$S",mbuff,4)) { for ( c = mbuff; *c > 32 ; c++) ; d_sec.dsc$w_length = c - mbuff; flags = SEC$M_GBL | SEC$M_WRT | SEC$M_SYSGBL | SEC$M_PAGFIL | SEC$M_DZRO | SEC$M_PERM; addrs[0] = addrs[1] = 0; fid_from_sec(&d_sec,&file); real_size = cce_sec_size(&file); if (real_size == 0) real_size = size; real_size += 1; assert(OS_PAGE_SIZE % OS_PAGELET_SIZE == 0); /* Request enough pagelets to ensure enough contiguous pages to contain desired number of pagelets. */ req_size = ROUND_UP(real_size, OS_PAGE_SIZE); lib$get_vm_page(&req_size, &addrs[0]); /* addrs will hold addresses of start and end of contiguous block of pagelets for use by $deltva. */ assert((addrs[0] + (req_size * OS_PAGELET_SIZE) - 1) == addrs[1]); /* $get_vm_page returns pagelets; we must align to integral page boundary. */ /* sec_addrs will contain the starting and ending addresses of the mapped section. */ sec_addrs[0] = ROUND_UP(addrs[0], OS_PAGE_SIZE); /* align to first integral page boundary */ sec_addrs[1] = addrs[0] + (real_size * OS_PAGELET_SIZE); sec_addrs[1] = ROUND_UP(addrs[1], OS_PAGE_SIZE) - 1; /* A(last byte of last page) */ status = init_sec(sec_addrs, &d_sec, 0, real_size, flags); if (status & 1) { sd = sec_addrs[0]; memset(buff, 0, RECORD_SIZE); memcpy(buff, d_sec.dsc$a_pointer, d_sec.dsc$w_length); cptr = &buff[0] + d_sec.dsc$w_length; memcpy(cptr,id_lab,SIZEOF(id_lab)); cptr += SIZEOF(id_lab); memcpy(cptr,&file,SIZEOF(file)); rab.rab$l_rbf = buff; status = sys$put(&rab); if (status != RMS$_NORMAL) { util_out_print("Error writing to dump file !AD.",TRUE,fab.fab$b_fns,fab.fab$l_fna); util_out_print("Status code is !UL.",TRUE,status); break; } for (c = sd, i = 0; (real_size + EXTRA_SPACE) >= i; c += RECORD_SIZE, i += (RECORD_SIZE / DISK_BLOCK_SIZE)) { rab.rab$l_rbf = c; status = sys$put(&rab); if (status != RMS$_NORMAL) { util_out_print("Error writing to dump file !AD.",TRUE,fab.fab$b_fns,fab.fab$l_fna); util_out_print("Status code is !UL.",TRUE,status); break; } } lib$signal(ERR_CCEDBDUMP,2,d_sec.dsc$w_length,d_sec.dsc$a_pointer); }else { lib$signal(ERR_CCEDBNODUMP,2,d_sec.dsc$w_length,d_sec.dsc$a_pointer,status,0); } gtm_deltva(&addrs[0],0,0); lib$free_vm_page(&real_size,&addrs[0]); } } sys$exit(SS$_NORMAL); }
static errval_t do_map(struct pmap_arm *pmap, genvaddr_t vaddr, struct capref frame, size_t offset, size_t size, vregion_flags_t flags, size_t *retoff, size_t *retsize) { errval_t err; size = ROUND_UP(size, BASE_PAGE_SIZE); size_t pte_count = DIVIDE_ROUND_UP(size, BASE_PAGE_SIZE); genvaddr_t vend = vaddr + size; if (ARM_L1_OFFSET(vaddr) == ARM_L1_OFFSET(vend-1)) { // fast path err = do_single_map(pmap, vaddr, vend, frame, offset, pte_count, flags); if (err_is_fail(err)) { DEBUG_ERR(err, "[do_map] in fast path"); return err_push(err, LIB_ERR_PMAP_DO_MAP); } } else { // multiple leaf page tables // first leaf uint32_t c = ARM_L2_MAX_ENTRIES - ARM_L2_OFFSET(vaddr); genvaddr_t temp_end = vaddr + c * BASE_PAGE_SIZE; err = do_single_map(pmap, vaddr, temp_end, frame, offset, c, flags); if (err_is_fail(err)) { return err_push(err, LIB_ERR_PMAP_DO_MAP); } // map full leaves while (ARM_L1_OFFSET(temp_end) < ARM_L1_OFFSET(vend)) { // update vars vaddr = temp_end; temp_end = vaddr + ARM_L2_MAX_ENTRIES * BASE_PAGE_SIZE; offset += c * BASE_PAGE_SIZE; c = ARM_L2_MAX_ENTRIES; // copy cap struct capref next; err = slot_alloc(&next); if (err_is_fail(err)) { return err_push(err, LIB_ERR_PMAP_DO_MAP); } err = cap_copy(next, frame); if (err_is_fail(err)) { return err_push(err, LIB_ERR_PMAP_DO_MAP); } frame = next; // do mapping err = do_single_map(pmap, vaddr, temp_end, frame, offset, ARM_L2_MAX_ENTRIES, flags); if (err_is_fail(err)) { return err_push(err, LIB_ERR_PMAP_DO_MAP); } } // map remaining part offset += c * BASE_PAGE_SIZE; c = ARM_L2_OFFSET(vend) - ARM_L2_OFFSET(temp_end); if (c) { // copy cap struct capref next; err = slot_alloc(&next); if (err_is_fail(err)) { return err_push(err, LIB_ERR_PMAP_DO_MAP); } err = cap_copy(next, frame); if (err_is_fail(err)) { return err_push(err, LIB_ERR_PMAP_DO_MAP); } // do mapping err = do_single_map(pmap, temp_end, vend, next, offset, c, flags); if (err_is_fail(err)) { return err_push(err, LIB_ERR_PMAP_DO_MAP); } } } if (retoff) { *retoff = offset; } if (retsize) { *retsize = size; } //has_vnode_debug = false; return SYS_ERR_OK; #if 0 errval_t err; uintptr_t pmap_flags = vregion_flags_to_kpi_paging_flags(flags); for (size_t i = offset; i < offset + size; i += BASE_PAGE_SIZE) { vaddr += BASE_PAGE_SIZE; } if (retoff) { *retoff = offset; } if (retsize) { *retsize = size; } return SYS_ERR_OK; #endif }
/* Initialize the TP structures we will be using for the successive TP operations */ void gvcst_tp_init(gd_region *greg) { sgm_info *si; sgmnt_addrs *csa; csa = (sgmnt_addrs *)&FILE_INFO(greg)->s_addrs; if (NULL == csa->sgm_info_ptr) { si = csa->sgm_info_ptr = (sgm_info *)malloc(sizeof(sgm_info)); assert(32768 > sizeof(sgm_info)); memset(si, 0, sizeof(sgm_info)); si->tp_hist_size = TP_MAX_MM_TRANSIZE; si->cur_tp_hist_size = INIT_CUR_TP_HIST_SIZE; /* should be very much less than si->tp_hist_size */ assert(si->cur_tp_hist_size <= si->tp_hist_size); si->blks_in_use = (hash_table_int4 *)malloc(sizeof(hash_table_int4)); init_hashtab_int4(si->blks_in_use, BLKS_IN_USE_INIT_ELEMS); /* See comment in tp.h about cur_tp_hist_size for details */ si->first_tp_hist = si->last_tp_hist = (srch_blk_status *)malloc(sizeof(srch_blk_status) * si->cur_tp_hist_size); si->cw_set_list = (buddy_list *)malloc(sizeof(buddy_list)); initialize_list(si->cw_set_list, sizeof(cw_set_element), CW_SET_LIST_INIT_ALLOC); si->tlvl_cw_set_list = (buddy_list *)malloc(sizeof(buddy_list)); initialize_list(si->tlvl_cw_set_list, sizeof(cw_set_element), TLVL_CW_SET_LIST_INIT_ALLOC); si->tlvl_info_list = (buddy_list *)malloc(sizeof(buddy_list)); initialize_list(si->tlvl_info_list, sizeof(tlevel_info), TLVL_INFO_LIST_INIT_ALLOC); si->new_buff_list = (buddy_list *)malloc(sizeof(buddy_list)); initialize_list(si->new_buff_list, SIZEOF(que_ent) + csa->hdr->blk_size, NEW_BUFF_LIST_INIT_ALLOC); si->recompute_list = (buddy_list *)malloc(sizeof(buddy_list)); initialize_list(si->recompute_list, sizeof(key_cum_value), RECOMPUTE_LIST_INIT_ALLOC); /* The size of the si->cr_array can go up to TP_MAX_MM_TRANSIZE, but usually is quite less. * Therefore, initially allocate a small array and expand as needed later. */ if (dba_bg == greg->dyn.addr->acc_meth) { si->cr_array_size = si->cur_tp_hist_size; si->cr_array = (cache_rec_ptr_ptr_t)malloc(sizeof(cache_rec_ptr_t) * si->cr_array_size); } else { si->cr_array_size = 0; si->cr_array = NULL; } si->fresh_start = TRUE; } else si = csa->sgm_info_ptr; si->gv_cur_region = greg; si->tp_csa = csa; si->tp_csd = csa->hdr; si->start_tn = csa->ti->curr_tn; if (JNL_ALLOWED(csa)) { si->total_jnl_rec_size = csa->min_total_tpjnl_rec_size; /* Reinitialize total_jnl_rec_size */ /* Since the following jnl-mallocs are independent of any dynamically-changeable parameter of the * database, we can as well use the existing malloced jnl structures if at all they exist. */ if (NULL == si->jnl_tail) { si->jnl_tail = &si->jnl_head; si->jnl_list = (buddy_list *)malloc(sizeof(buddy_list)); initialize_list(si->jnl_list, sizeof(jnl_format_buffer), JNL_LIST_INIT_ALLOC); si->format_buff_list = (buddy_list *)malloc(sizeof(buddy_list)); /* Minimum value of elemSize is 8 due to alignment requirements of the returned memory location. * Therefore, we request an elemSize of 8 bytes for the format-buffer and will convert as much * bytes as we need into as many 8-byte multiple segments (see code in jnl_format). */ initialize_list(si->format_buff_list, JFB_ELE_SIZE, DIVIDE_ROUND_UP(JNL_FORMAT_BUFF_INIT_ALLOC, JFB_ELE_SIZE)); } } else if (NULL != si->jnl_tail) { /* journaling is currently disallowed although it was allowed (non-zero si->jnl_tail) * during the prior use of this region. Free up unnecessary region-specific structures now. */ FREEUP_BUDDY_LIST(si->jnl_list); FREEUP_BUDDY_LIST(si->format_buff_list); si->jnl_tail = NULL; } }
uint4 gdsfilext(uint4 blocks, uint4 filesize, boolean_t trans_in_prog) { sm_uc_ptr_t old_base[2], mmap_retaddr; boolean_t was_crit, is_mm; int result, save_errno, status; DEBUG_ONLY(int first_save_errno); uint4 new_bit_maps, bplmap, map, new_blocks, new_total, max_tot_blks, old_total; uint4 jnl_status; gtm_uint64_t avail_blocks, mmap_sz; off_t new_eof, new_size; trans_num curr_tn; unix_db_info *udi; inctn_opcode_t save_inctn_opcode; int4 prev_extend_blks_to_upgrd; jnl_private_control *jpc; jnl_buffer_ptr_t jbp; cache_rec_ptr_t cr; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; assert(!IS_DSE_IMAGE); assert((cs_addrs->nl == NULL) || (process_id != cs_addrs->nl->trunc_pid)); /* mu_truncate shouldn't extend file... */ assert(!process_exiting); DEBUG_ONLY(old_base[0] = old_base[1] = NULL); assert(!gv_cur_region->read_only); udi = FILE_INFO(gv_cur_region); is_mm = (dba_mm == cs_addrs->hdr->acc_meth); # if !defined(MM_FILE_EXT_OK) if (!udi->grabbed_access_sem && is_mm) return (uint4)(NO_FREE_SPACE); /* should this be changed to show extension not allowed ? */ # endif /* Both blocks and total blocks are unsigned ints so make sure we aren't asking for huge numbers that will overflow and end up doing silly things. */ assert((blocks <= (MAXTOTALBLKS(cs_data) - cs_data->trans_hist.total_blks)) || WBTEST_ENABLED(WBTEST_FILE_EXTEND_ERROR)); # if defined(__sun) || defined(__hpux) cs_data->defer_allocate = TRUE; # endif if (!blocks && (cs_data->defer_allocate || (TRANS_IN_PROG_TRUE == trans_in_prog))) return (uint4)(NO_FREE_SPACE); /* should this be changed to show extension not enabled ? */ bplmap = cs_data->bplmap; /* New total of non-bitmap blocks will be number of current, non-bitmap blocks, plus new blocks desired * There are (bplmap - 1) non-bitmap blocks per bitmap, so add (bplmap - 2) to number of non-bitmap blocks * and divide by (bplmap - 1) to get total number of bitmaps for expanded database. (must round up in this * manner as every non-bitmap block must have an associated bitmap) * Current number of bitmaps is (total number of current blocks + bplmap - 1) / bplmap. * Subtract current number of bitmaps from number needed for expanded database to get number of new bitmaps needed. */ new_bit_maps = DIVIDE_ROUND_UP(cs_data->trans_hist.total_blks - DIVIDE_ROUND_UP(cs_data->trans_hist.total_blks, bplmap) + blocks, bplmap - 1) - DIVIDE_ROUND_UP(cs_data->trans_hist.total_blks, bplmap); new_blocks = blocks + new_bit_maps; assert((0 < (int)new_blocks) || (!cs_data->defer_allocate && (0 == new_blocks))); if (new_blocks + cs_data->trans_hist.total_blks > MAXTOTALBLKS(cs_data)) { assert(WBTEST_ENABLED(WBTEST_FILE_EXTEND_ERROR)); send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(1) ERR_TOTALBLKMAX); return (uint4)(NO_FREE_SPACE); } if (0 != (save_errno = disk_block_available(udi->fd, &avail_blocks, FALSE))) { send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region), save_errno); rts_error_csa(CSA_ARG(cs_addrs) VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region), save_errno); } else { if (!(gtmDebugLevel & GDL_IgnoreAvailSpace)) { /* Bypass this space check if debug flag above is on. Allows us to create a large sparce DB * in space it could never fit it if wasn't sparse. Needed for some tests. */ avail_blocks = avail_blocks / (cs_data->blk_size / DISK_BLOCK_SIZE); if ((blocks * EXTEND_WARNING_FACTOR) > avail_blocks) { if (blocks > (uint4)avail_blocks) { if (!INST_FREEZE_ON_NOSPC_ENABLED(cs_addrs)) return (uint4)(NO_FREE_SPACE); else send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(6) MAKE_MSG_WARNING(ERR_NOSPACEEXT), 4, DB_LEN_STR(gv_cur_region), new_blocks, (uint4)avail_blocks); } else send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(5) ERR_DSKSPACEFLOW, 3, DB_LEN_STR(gv_cur_region), (uint4)(avail_blocks - ((new_blocks <= avail_blocks) ? new_blocks : 0))); } } } # ifdef DEBUG if (WBTEST_ENABLED(WBTEST_MM_CONCURRENT_FILE_EXTEND) && dollar_tlevel && !MEMCMP_LIT(gv_cur_region->rname, "DEFAULT")) { SYSTEM("$gtm_dist/mumps -run $gtm_wbox_mrtn"); assert(1 == cs_addrs->nl->wbox_test_seq_num); /* should have been set by mubfilcpy */ cs_addrs->nl->wbox_test_seq_num = 2; /* signal mupip backup to stop sleeping in mubfilcpy */ } # endif /* From here on, we need to use GDSFILEXT_CLNUP before returning to the caller */ was_crit = cs_addrs->now_crit; assert(!cs_addrs->hold_onto_crit || was_crit); /* If we are coming from mupip_extend (which gets crit itself) we better have waited for any unfreezes to occur. * If we are coming from online rollback (when that feature is available), we will come in holding crit and in * the final retry. In that case too, we expect to have waited for unfreezes to occur in the caller itself. * Therefore if we are coming in holding crit from MUPIP, we expect the db to be unfrozen so no need to wait for * freeze. * If we are coming from GT.M and final retry (in which case we come in holding crit) we expect to have waited * for any unfreezes (by invoking tp_crit_all_regions) to occur (TP or non-TP) before coming into this * function. However, there is one exception. In the final retry, if tp_crit_all_regions notices that * at least one of the participating regions did ONLY READs, it will not wait for any freeze on THAT region * to complete before grabbing crit. Later, in the final retry, if THAT region did an update which caused * op_tcommit to invoke bm_getfree->gdsfilext, then we would have come here with a frozen region on which * we hold crit. */ assert(!was_crit || !FROZEN_HARD(cs_data) || (dollar_tlevel && (CDB_STAGNATE <= t_tries))); /* * If we are in the final retry and already hold crit, it is possible that csa->nl->wc_blocked is also set to TRUE * (by a concurrent process in phase2 which encountered an error in the midst of commit and secshr_db_clnup * finished the job for it). In this case we do NOT want to invoke wcs_recover as that will update the "bt" * transaction numbers without correspondingly updating the history transaction numbers (effectively causing * a cdb_sc_blkmod type of restart). Therefore do NOT call grab_crit (which unconditionally invokes wcs_recover) * if we already hold crit. */ if (!was_crit) { for ( ; ; ) { grab_crit(gv_cur_region); if (FROZEN_CHILLED(cs_data)) DO_CHILLED_AUTORELEASE(cs_addrs, cs_data); if (!FROZEN(cs_data) && !IS_REPL_INST_FROZEN) break; rel_crit(gv_cur_region); while (FROZEN(cs_data) || IS_REPL_INST_FROZEN) { hiber_start(1000); if (FROZEN_CHILLED(cs_data) && CHILLED_AUTORELEASE(cs_data)) break; } } } else if (FROZEN_HARD(cs_data) && dollar_tlevel) { /* We don't want to continue with file extension as explained above. Hence return with an error code which * op_tcommit will recognize (as a cdb_sc_needcrit/cdb_sc_instancefreeze type of restart) and restart accordingly. */ assert(CDB_STAGNATE <= t_tries); GDSFILEXT_CLNUP; return (uint4)FINAL_RETRY_FREEZE_PROG; } else WAIT_FOR_REGION_TO_UNCHILL(cs_addrs, cs_data); if (IS_REPL_INST_FROZEN && trans_in_prog) { assert(CDB_STAGNATE <= t_tries); GDSFILEXT_CLNUP; return (uint4)FINAL_RETRY_INST_FREEZE; } assert(cs_addrs->ti->total_blks == cs_data->trans_hist.total_blks); old_total = cs_data->trans_hist.total_blks; if (old_total != filesize) { /* Somebody else has already extended it, since we are in crit, this is trust-worthy. However, in case of MM, * we still need to remap the database */ assert((old_total > filesize) || !is_mm); /* For BG, someone else could have truncated or extended - we have no idea */ GDSFILEXT_CLNUP; return (SS_NORMAL); } if (trans_in_prog && SUSPICIOUS_EXTEND) { if (!was_crit) { GDSFILEXT_CLNUP; return (uint4)(EXTEND_SUSPECT); } /* If free_blocks counter is not ok, then correct it. Do the check again. If still fails, then it means we held * crit through bm_getfree into gdsfilext and still didn't get it right. */ assertpro(!is_free_blks_ctr_ok() && !SUSPICIOUS_EXTEND); } if (JNL_ENABLED(cs_data)) { if (!jgbl.dont_reset_gbl_jrec_time) SET_GBL_JREC_TIME; /* needed before jnl_ensure_open as that can write jnl records */ jpc = cs_addrs->jnl; jbp = jpc->jnl_buff; /* Before writing to jnlfile, adjust jgbl.gbl_jrec_time if needed to maintain time order * of jnl records. This needs to be done BEFORE the jnl_ensure_open as that could write * journal records (if it decides to switch to a new journal file). */ ADJUST_GBL_JREC_TIME(jgbl, jbp); jnl_status = jnl_ensure_open(gv_cur_region, cs_addrs); if (jnl_status) { GDSFILEXT_CLNUP; send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(6) jnl_status, 4, JNL_LEN_STR(cs_data), DB_LEN_STR(gv_cur_region)); return (uint4)(NO_FREE_SPACE); /* should have better return status */ } } if (is_mm) { cs_addrs->nl->mm_extender_pid = process_id; status = wcs_wtstart(gv_cur_region, 0, NULL, NULL); cs_addrs->nl->mm_extender_pid = 0; assertpro(SS_NORMAL == status); old_base[0] = cs_addrs->db_addrs[0]; old_base[1] = cs_addrs->db_addrs[1]; cs_addrs->db_addrs[0] = NULL; /* don't rely on it until the mmap below */ # ifdef _AIX status = shmdt(old_base[0] - BLK_ZERO_OFF(cs_data->start_vbn)); # else status = munmap((caddr_t)old_base[0], (size_t)(old_base[1] - old_base[0])); # endif if (0 != status) { save_errno = errno; GDSFILEXT_CLNUP; send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(12) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region), ERR_SYSCALL, 5, LEN_AND_STR(MEM_UNMAP_SYSCALL), CALLFROM, save_errno); return (uint4)(NO_FREE_SPACE); } } else { /* Due to concurrency issues, it is possible some process had issued a disk read of the GDS block# corresponding * to "old_total" right after a truncate wrote a GDS-block of zeros on disk (to signal end of the db file). * If so, the global buffer containing this block needs to be invalidated now as part of the extend. If not, it is * possible the EOF block on disk is now going to be overwritten by a properly initialized bitmap block (as part * of the gdsfilext below) while the global buffer continues to have an incorrect copy of that bitmap block and * this in turn would cause XXXX failures due to a bad bitmap block in shared memory. (GTM-7519) */ cr = db_csh_get((block_id)old_total); if ((NULL != cr) && ((cache_rec_ptr_t)CR_NOTVALID != cr)) { assert((0 == cr->dirty) && (0 == cr->bt_index) && !cr->stopped); cr->cycle++; cr->blk = CR_BLKEMPTY; } } CHECK_TN(cs_addrs, cs_data, cs_data->trans_hist.curr_tn); /* can issue rts_error TNTOOLARGE */ new_total = old_total + new_blocks; new_eof = BLK_ZERO_OFF(cs_data->start_vbn) + ((off_t)new_total * cs_data->blk_size); # if !defined(__sun) && !defined(__hpux) if (!cs_data->defer_allocate) { new_size = new_eof + cs_data->blk_size; save_errno = posix_fallocate(udi->fd, 0, new_size); DEBUG_ONLY(first_save_errno = save_errno); if ((ENOSPC == save_errno) && IS_GTM_IMAGE) save_errno = extend_wait_for_fallocate(udi, new_size); if (0 != save_errno) { GDSFILEXT_CLNUP; assert(ENOSPC == save_errno); if (ENOSPC != save_errno) send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(5) ERR_PREALLOCATEFAIL, 2, DB_LEN_STR(gv_cur_region), save_errno); return (uint4)(NO_FREE_SPACE); } } # endif save_errno = db_write_eof_block(udi, udi->fd, cs_data->blk_size, new_eof, &(TREF(dio_buff))); if ((ENOSPC == save_errno) && IS_GTM_IMAGE) save_errno = extend_wait_for_write(udi, cs_data->blk_size, new_eof); if (0 != save_errno) { GDSFILEXT_CLNUP; if (ENOSPC != save_errno) send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region), save_errno); return (uint4)(NO_FREE_SPACE); } if (WBTEST_ENABLED(WBTEST_FILE_EXTEND_INTERRUPT_1)) { LONG_SLEEP(600); assert(FALSE); } /* Ensure the EOF and metadata get to disk BEFORE any bitmap writes. Otherwise, the file size could no longer reflect * a proper extent and subsequent invocations of gdsfilext could corrupt the database. */ if (!IS_STATSDB_CSA(cs_addrs)) { GTM_DB_FSYNC(cs_addrs, udi->fd, status); assert(0 == status); if (0 != status) { GDSFILEXT_CLNUP; send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(8) ERR_DBFILERR, 5, RTS_ERROR_LITERAL("fsync1()"), CALLFROM, status); return (uint4)(NO_FREE_SPACE); } } if (WBTEST_ENABLED(WBTEST_FILE_EXTEND_INTERRUPT_2)) { LONG_SLEEP(600); assert(FALSE); /* Should be killed before that */ } DEBUG_ONLY(prev_extend_blks_to_upgrd = cs_data->blks_to_upgrd;)
void mupip_restore(void) { static readonly char label[] = GDS_LABEL; char db_name[MAX_FN_LEN + 1], *inbuf, *p; inc_list_struct *ptr; inc_header *inhead; sgmnt_data *old_data; short iosb[4]; unsigned short n_len; int4 status, vbn, rsize, temp, save_errno; uint4 rest_blks, totblks; trans_num curr_tn; uint4 ii; block_id blk_num; bool extend; uint4 cli_status; BFILE *in; int i, db_fd; uint4 old_blk_size, old_tot_blks, bplmap; short old_start_vbn; off_t new_eof; char buff[DISK_BLOCK_SIZE]; char msg_buffer[1024], *newmap, *newmap_bptr; mstr msg_string; char addr[SA_MAXLEN+1]; unsigned char tcp[5]; backup_type type; unsigned short port; int4 timeout, cut, match; char debug_info[256]; void (*common_read)(); char *errptr; pid_t waitpid_res; error_def(ERR_MUPRESTERR); error_def(ERR_MUPCLIERR); error_def(ERR_IOEOF); extend = TRUE; if (CLI_NEGATED == (cli_status = cli_present("EXTEND"))) extend = FALSE; mu_outofband_setup(); mu_gv_cur_reg_init(); n_len = sizeof(db_name); if (cli_get_str("DATABASE", db_name, &n_len) == FALSE) mupip_exit(ERR_MUPCLIERR); strcpy((char *)gv_cur_region->dyn.addr->fname, db_name); gv_cur_region->dyn.addr->fname_len = n_len; if (!mu_rndwn_file(gv_cur_region, TRUE)) { util_out_print("Error securing stand alone access to output file !AD. Aborting restore.", TRUE, n_len, db_name); mupip_exit(ERR_MUPRESTERR); } OPENFILE(db_name, O_RDWR, db_fd); if (-1 == db_fd) { save_errno = errno; util_out_print("Error accessing output file !AD. Aborting restore.", TRUE, n_len, db_name); errptr = (char *)STRERROR(save_errno); util_out_print("open : !AZ", TRUE, errptr); mupip_exit(save_errno); } murgetlst(); inbuf = (char*)malloc(INC_BACKUP_CHUNK_SIZE); old_data = (sgmnt_data*)malloc(sizeof(sgmnt_data)); LSEEKREAD(db_fd, 0, old_data, sizeof(sgmnt_data), save_errno); if (0 != save_errno) { util_out_print("Error accessing output file !AD. Aborting restore.", TRUE, n_len, db_name); if (-1 != save_errno) { errptr = (char *)STRERROR(save_errno); util_out_print("read : !AZ", TRUE, errptr); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(save_errno); } else { db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(ERR_IOEOF); } } if (memcmp(&old_data->label[0], &label[0], GDS_LABEL_SZ)) { util_out_print("Output file !AD has an unrecognizable format", TRUE, n_len, db_name); free(old_data); free(inbuf); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(ERR_MUPRESTERR); } curr_tn = old_data->trans_hist.curr_tn; old_blk_size = old_data->blk_size; old_tot_blks = old_data->trans_hist.total_blks; old_start_vbn = old_data->start_vbn; bplmap = old_data->bplmap; free(old_data); msg_string.addr = msg_buffer; msg_string.len = sizeof(msg_buffer); inhead = (inc_header *)malloc(sizeof(inc_header) + 8); inhead = (inc_header *)((((int4)inhead) + 7) & -8); rest_blks = 0; for (ptr = in_files.next; ptr; ptr = ptr->next) { /* --- determine source type --- */ type = backup_to_file; if (0 == ptr->input_file.len) continue; else if ('|' == *(ptr->input_file.addr + ptr->input_file.len - 1)) { type = backup_to_exec; ptr->input_file.len--; *(ptr->input_file.addr + ptr->input_file.len) = '\0'; } else if (ptr->input_file.len > 5) { lower_to_upper(tcp, (uchar_ptr_t)ptr->input_file.addr, 5); if (0 == memcmp(tcp, "TCP:/", 5)) { type = backup_to_tcp; cut = 5; while ('/' == *(ptr->input_file.addr + cut)) cut++; ptr->input_file.len -= cut; p = ptr->input_file.addr; while (p < ptr->input_file.addr + ptr->input_file.len) { *p = *(p + cut); p++; } *p = '\0'; } } /* --- open the input stream --- */ restore_read_errno = 0; switch(type) { case backup_to_file: common_read = iob_read; if ((in = iob_open_rd(ptr->input_file.addr, DISK_BLOCK_SIZE, BLOCKING_FACTOR)) == NULL) { save_errno = errno; util_out_print("Error accessing input file !AD. Aborting restore.", TRUE, ptr->input_file.len, ptr->input_file.addr); errptr = (char *)STRERROR(save_errno); util_out_print("open : !AZ", TRUE, errptr); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(save_errno); } ESTABLISH(iob_io_error); break; case backup_to_exec: pipe_child = 0; common_read = exec_read; in = (BFILE *)malloc(sizeof(BFILE)); if (0 > (in->fd = gtm_pipe(ptr->input_file.addr, input_from_comm))) { util_out_print("Error creating input pipe from !AD.", TRUE, ptr->input_file.len, ptr->input_file.addr); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(ERR_MUPRESTERR); } #ifdef DEBUG_ONLINE PRINTF("file descriptor for the openned pipe is %d.\n", in->fd); PRINTF("the command passed to gtm_pipe is %s.\n", ptr->input_file.addr); #endif break; case backup_to_tcp: common_read = tcp_read; /* parse the input */ switch (match = SSCANF(ptr->input_file.addr, "%[^:]:%hu", addr, &port)) { case 1 : port = DEFAULT_BKRS_PORT; case 2 : break; default : util_out_print("Error : A hostname has to be specified.", TRUE); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(ERR_MUPRESTERR); } if ((0 == cli_get_int("NETTIMEOUT", &timeout)) || (0 > timeout)) timeout = DEFAULT_BKRS_TIMEOUT; in = (BFILE *)malloc(sizeof(BFILE)); iotcp_fillroutine(); if (0 > (in->fd = tcp_open(addr, port, timeout, TRUE))) { util_out_print("Error establishing TCP connection to !AD.", TRUE, ptr->input_file.len, ptr->input_file.addr); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(ERR_MUPRESTERR); } break; default: util_out_print("Aborting restore!/", TRUE); util_out_print("Unrecognized input format !AD", TRUE, ptr->input_file.len, ptr->input_file.addr); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(ERR_MUPRESTERR); } COMMON_READ(in, inhead, sizeof(inc_header)); if (memcmp(&inhead->label[0], INC_HEADER_LABEL, INC_HDR_LABEL_SZ)) { util_out_print("Input file !AD has an unrecognizable format", TRUE, ptr->input_file.len, ptr->input_file.addr); free(inbuf); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(ERR_MUPRESTERR); } if (curr_tn != inhead->start_tn) { util_out_print("Transaction in input file !AD does not align with database TN.!/DB: !XL!_Input file: !XL", TRUE, ptr->input_file.len, ptr->input_file.addr, curr_tn, inhead->start_tn); free(inbuf); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(ERR_MUPRESTERR); } if (old_blk_size != inhead->blk_size) { util_out_print("Incompatable block size. Output file !AD has block size !XL,", TRUE, n_len, db_name); util_out_print("while input file !AD is from a database with block size !XL,", TRUE, ptr->input_file.len, ptr->input_file.addr, inhead->blk_size); free(inbuf); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(ERR_MUPRESTERR); } if (old_tot_blks != inhead->db_total_blks) { if (old_tot_blks > inhead->db_total_blks || !extend) { totblks = old_tot_blks - DIVIDE_ROUND_UP(old_tot_blks, DISK_BLOCK_SIZE); util_out_print("Incompatable database sizes. Output file !AD has!/ !UL (!XL hex) total blocks,", TRUE, n_len, db_name, totblks, totblks); totblks = inhead->db_total_blks - DIVIDE_ROUND_UP(inhead->db_total_blks, DISK_BLOCK_SIZE); util_out_print("while input file !AD is from a database with!/ !UL (!XL hex) total blocks", TRUE, ptr->input_file.len, ptr->input_file.addr, totblks, totblks); free(inbuf); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(ERR_MUPRESTERR); } else { /* this part of the code is similar to gdsfilext except that you don't need to do * most of the work that gdsfilext does. However, for situations where the database * extended since the last backup (the beginning of this incremental backup), and * there are new bitmaps that are never touched later on by GT.M, these bitmaps * will have tn == 0, which prevents the backup process to pick up these blocks, * so, we need to initialize these bitmaps here */ new_eof = ((off_t)(old_start_vbn - 1) * DISK_BLOCK_SIZE) + ((off_t)inhead->db_total_blks * old_blk_size); memset(buff, 0, DISK_BLOCK_SIZE); LSEEKWRITE(db_fd, new_eof, buff, DISK_BLOCK_SIZE, status); if (0 != status) { util_out_print("Aborting restore!/", TRUE); util_out_print("lseek or write error : Unable to extend output file !AD!/", TRUE, n_len, db_name); util_out_print(" from !UL (!XL hex) total blocks to !UL (!XL hex) total blocks.!/", TRUE, old_tot_blks, old_tot_blks, inhead->db_total_blks, inhead->db_total_blks); util_out_print(" Current input file is !AD with !UL (!XL hex) total blocks!/", TRUE, ptr->input_file.len, ptr->input_file.addr, inhead->db_total_blks, inhead->db_total_blks); gtm_putmsg(VARLSTCNT(1) status); free(inbuf); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(ERR_MUPRESTERR); } /* --- initialize all new bitmaps, just in case they are not touched later --- */ if (DIVIDE_ROUND_DOWN(inhead->db_total_blks, bplmap) > DIVIDE_ROUND_DOWN(old_tot_blks, bplmap)) { /* -- similar logic exist in bml_newmap.c, which need to pick up any new updates here -- */ newmap = (char *)malloc(old_blk_size); ((blk_hdr *)newmap)->bsiz = BM_SIZE(bplmap); ((blk_hdr *)newmap)->levl = LCL_MAP_LEVL; ((blk_hdr *)newmap)->tn = curr_tn; newmap_bptr = newmap + sizeof(blk_hdr); *newmap_bptr++ = THREE_BLKS_FREE; memset(newmap_bptr, FOUR_BLKS_FREE, BM_SIZE(bplmap) - sizeof(blk_hdr) - 1); for (ii = ROUND_UP(old_tot_blks, bplmap); ii <= inhead->db_total_blks; ii += bplmap) { new_eof = (off_t)(old_start_vbn - 1) * DISK_BLOCK_SIZE + (off_t)ii * old_blk_size; LSEEKWRITE(db_fd, new_eof, newmap, old_blk_size, status); if (0 != status) { util_out_print("Aborting restore!/", TRUE); util_out_print("Bitmap 0x!XL initialization error!", TRUE, ii); gtm_putmsg(VARLSTCNT(1) status); free(inbuf); free(newmap); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(ERR_MUPRESTERR); } } free(newmap); } old_tot_blks = inhead->db_total_blks; } } COMMON_READ(in, &rsize, sizeof(int4)); for ( ; ;) { /* rsize is the size of the record, including the size, but, since the size has already been read in, this will read in the current record and the size for the next record */ /* ensure we have a reasonable record size, at least */ if (rsize - sizeof(int4) - sizeof(block_id) > old_blk_size) { util_out_print("Invalid information in restore file !AD. Aborting restore.", TRUE, ptr->input_file.len, ptr->input_file.addr); iob_close(in); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(ERR_MUPRESTERR); } COMMON_READ(in, inbuf, rsize); if (!memcmp(inbuf, &end_msg[0], sizeof end_msg - 1)) break; rest_blks++; blk_num = *(block_id*)inbuf; vbn = old_start_vbn - 1 + (old_blk_size / DISK_BLOCK_SIZE * blk_num); LSEEKWRITE(db_fd, (off_t)vbn * DISK_BLOCK_SIZE, inbuf + sizeof(block_id), rsize - sizeof(block_id) - sizeof(int4), save_errno); if (0 != save_errno) { util_out_print("Error accessing output file !AD. Aborting restore.", TRUE, n_len, db_name); errptr = (char *)STRERROR(save_errno); util_out_print("write : !AZ", TRUE, errptr); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(save_errno); } GET_LONG(temp, (inbuf + rsize - sizeof(int4))); rsize = temp; } GET_LONG(temp, (inbuf + rsize - sizeof(int4))); rsize = temp; vbn = 0; for (i = 0; ; i++) /* Restore file header */ { COMMON_READ(in, inbuf, rsize); if (!memcmp(inbuf, &hdr_msg[0], sizeof hdr_msg - 1)) break; LSEEKWRITE(db_fd, vbn, inbuf, rsize - sizeof(int4), save_errno); if (0 != save_errno) { util_out_print("Error accessing output file !AD. Aborting restore.", TRUE, n_len, db_name); errptr = (char *)STRERROR(save_errno); util_out_print("write : !AZ", TRUE, errptr); db_ipcs_reset(gv_cur_region, TRUE); mu_gv_cur_reg_free(); mupip_exit(save_errno); } vbn += rsize - sizeof(int4); GET_LONG(temp, (inbuf + rsize - sizeof(int4))); rsize = temp; } curr_tn = inhead->end_tn; switch (type) { case backup_to_file: REVERT; iob_close(in); break; case backup_to_exec: close(in->fd); if ((pipe_child > 0) && (FALSE != is_proc_alive(pipe_child, 0))) WAITPID(pipe_child, (int *)&status, 0, waitpid_res); break; case backup_to_tcp: break; } } util_out_print("!/RESTORE COMPLETED", TRUE); util_out_print("!UL blocks restored", TRUE, rest_blks); free(inbuf); db_ipcs_reset(gv_cur_region, FALSE); mu_gv_cur_reg_free(); mupip_exit(SS_NORMAL); }
unsigned char mu_cre_file(void) { char *cc = NULL, path[MAX_FBUFF + 1], errbuff[512]; unsigned char buff[DISK_BLOCK_SIZE]; int fd = -1, i, lower, upper, status, padded_len, padded_vbn, norm_vbn; uint4 raw_dev_size; /* size of a raw device, in bytes */ int4 blocks_for_create, blocks_for_extension, save_errno; GTM_BAVAIL_TYPE avail_blocks; file_control fc; mstr file; parse_blk pblk; unix_db_info udi_struct, *udi; char *fgets_res; gd_segment *seg; error_def(ERR_NOSPACECRE); error_def(ERR_LOWSPACECRE); assert((-(sizeof(uint4) * 2) & sizeof(sgmnt_data)) == sizeof(sgmnt_data)); cs_addrs = &udi_struct.s_addrs; cs_data = (sgmnt_data_ptr_t)NULL; /* for CLEANUP */ memset(&pblk, 0, sizeof(pblk)); pblk.fop = (F_SYNTAXO | F_PARNODE); pblk.buffer = path; pblk.buff_size = MAX_FBUFF; file.addr = (char*)gv_cur_region->dyn.addr->fname; file.len = gv_cur_region->dyn.addr->fname_len; strncpy(path,file.addr,file.len); *(path+file.len) = '\0'; if (is_raw_dev(path)) { /* do not use a default extension for raw device files */ pblk.def1_buf = DEF_NODBEXT; pblk.def1_size = sizeof(DEF_NODBEXT) - 1; } else { pblk.def1_buf = DEF_DBEXT; pblk.def1_size = sizeof(DEF_DBEXT) - 1; } if (1 != (parse_file(&file, &pblk) & 1)) { PRINTF("Error translating filename %s.\n", gv_cur_region->dyn.addr->fname); return EXIT_ERR; } path[pblk.b_esl] = 0; if (pblk.fnb & F_HAS_NODE) { /* Remote node specification given */ assert(pblk.b_node); PRINTF("Database file for region %s not created; cannot create across network.\n", path); return EXIT_WRN; } udi = &udi_struct; udi->raw = is_raw_dev(pblk.l_dir); if (udi->raw) { fd = OPEN(pblk.l_dir,O_EXCL | O_RDWR); if (-1 == fd) { SPRINTF_AND_PERROR("Error opening file %s\n"); return EXIT_ERR; } if (-1 != (status = (ssize_t)lseek(fd, 0, SEEK_SET))) { DOREADRC(fd, buff, sizeof(buff), status); } else status = errno; if (0 != status) { SPRINTF_AND_PERROR("Error reading header for file %s\n"); return EXIT_ERR; } if (!memcmp(buff, GDS_LABEL, STR_LIT_LEN(GDS_LABEL))) { char rsp[80]; PRINTF("Database already exists on device %s\n", path); PRINTF("Do you wish to re-initialize (all current data will be lost) [y/n] ? "); FGETS(rsp, 79, stdin, fgets_res); if ('y' != *rsp) return EXIT_NRM; } PRINTF("Determining size of raw device...\n"); for(i = 1; read(fd, buff, sizeof(buff)) == sizeof(buff);) { i *= 2; lseek(fd, (off_t)i * BUFSIZ, SEEK_SET); } lower = i / 2; upper = i; while ((lower + upper) / 2 != lower) { i = (lower + upper) / 2; lseek(fd, (off_t)i * BUFSIZ, SEEK_SET); if (read(fd, buff, sizeof(buff)) == sizeof(buff)) lower = i; else upper = i; } raw_dev_size = i * BUFSIZ; } else { fd = OPEN3(pblk.l_dir, O_CREAT | O_EXCL | O_RDWR, 0600); if (-1 == fd) { SPRINTF_AND_PERROR("Error opening file %s\n"); return EXIT_ERR; } if (0 != (save_errno = disk_block_available(fd, &avail_blocks, FALSE))) { errno = save_errno; SPRINTF_AND_PERROR("Error checking available disk space for %s\n"); CLEANUP(EXIT_ERR); return EXIT_ERR; } seg = gv_cur_region->dyn.addr; /* blocks_for_create is in the unit of DISK_BLOCK_SIZE */ blocks_for_create = DIVIDE_ROUND_UP(sizeof(sgmnt_data), DISK_BLOCK_SIZE) + 1 + (seg->blk_size / DISK_BLOCK_SIZE * ((DIVIDE_ROUND_UP(seg->allocation, BLKS_PER_LMAP - 1)) + seg->allocation)); if ((uint4)avail_blocks < blocks_for_create) { gtm_putmsg(VARLSTCNT(6) ERR_NOSPACECRE, 4, LEN_AND_STR(path), blocks_for_create, (uint4)avail_blocks); send_msg(VARLSTCNT(6) ERR_NOSPACECRE, 4, LEN_AND_STR(path), blocks_for_create, (uint4)avail_blocks); CLEANUP(EXIT_ERR); return EXIT_ERR; } blocks_for_extension = (seg->blk_size / DISK_BLOCK_SIZE * ((DIVIDE_ROUND_UP(EXTEND_WARNING_FACTOR * seg->ext_blk_count, BLKS_PER_LMAP - 1)) + EXTEND_WARNING_FACTOR * seg->ext_blk_count)); if ((uint4)(avail_blocks - blocks_for_create) < blocks_for_extension) { gtm_putmsg(VARLSTCNT(8) ERR_LOWSPACECRE, 6, LEN_AND_STR(path), EXTEND_WARNING_FACTOR, blocks_for_extension, DISK_BLOCK_SIZE, (uint4)(avail_blocks - blocks_for_create)); send_msg(VARLSTCNT(8) ERR_LOWSPACECRE, 6, LEN_AND_STR(path), EXTEND_WARNING_FACTOR, blocks_for_extension, DISK_BLOCK_SIZE, (uint4)(avail_blocks - blocks_for_create)); } } gv_cur_region->dyn.addr->file_cntl = &fc; fc.file_info = (void*)&udi_struct; udi->fd = fd; cs_data = (sgmnt_data_ptr_t)malloc(sizeof(sgmnt_data)); memset(cs_data, 0, sizeof(*cs_data)); cs_data->createinprogress = TRUE; cs_data->semid = INVALID_SEMID; cs_data->shmid = INVALID_SHMID; /* We want our datablocks to start on what would be a block boundary within the file so pad the fileheader if necessary to make this happen. */ padded_len = ROUND_UP(sizeof(sgmnt_data), BLK_SIZE); padded_vbn = DIVIDE_ROUND_UP(padded_len, DISK_BLOCK_SIZE) + 1; norm_vbn = DIVIDE_ROUND_UP(sizeof(sgmnt_data), DISK_BLOCK_SIZE) + 1; cs_data->start_vbn = padded_vbn; cs_data->free_space += (padded_vbn - norm_vbn) * DISK_BLOCK_SIZE; cs_data->acc_meth = gv_cur_region->dyn.addr->acc_meth; if (udi->raw) { /* calculate total blocks, reduce to make room for the * database header (size rounded up to a block), then * make into a multiple of BLKS_PER_LMAP to have a complete bitmap * for each set of blocks. */ cs_data->trans_hist.total_blks = raw_dev_size - ROUND_UP(sizeof(sgmnt_data), DISK_BLOCK_SIZE); cs_data->trans_hist.total_blks /= (uint4)(((gd_segment *)gv_cur_region->dyn.addr)->blk_size); if (0 == (cs_data->trans_hist.total_blks - DIVIDE_ROUND_UP(cs_data->trans_hist.total_blks, BLKS_PER_LMAP - 1) % (BLKS_PER_LMAP - 1))) cs_data->trans_hist.total_blks -= 1; /* don't create a bitmap with no data blocks */ cs_data->extension_size = 0; PRINTF("Raw device size is %dK, %d GDS blocks\n", raw_dev_size / 1000, cs_data->trans_hist.total_blks); } else { cs_data->trans_hist.total_blks = gv_cur_region->dyn.addr->allocation; /* There are (bplmap - 1) non-bitmap blocks per bitmap, so add (bplmap - 2) to number of non-bitmap blocks * and divide by (bplmap - 1) to get total number of bitmaps for expanded database. (must round up in this * manner as every non-bitmap block must have an associated bitmap) */ cs_data->trans_hist.total_blks += DIVIDE_ROUND_UP(cs_data->trans_hist.total_blks, BLKS_PER_LMAP - 1); cs_data->extension_size = gv_cur_region->dyn.addr->ext_blk_count; } mucregini(cs_data->trans_hist.total_blks); cs_data->createinprogress = FALSE; LSEEKWRITE(udi->fd, 0, cs_data, sizeof(sgmnt_data), status); if (0 != status) { SPRINTF_AND_PERROR("Error writing out header for file %s\n"); CLEANUP(EXIT_ERR); return EXIT_ERR; } cc = (char*)malloc(DISK_BLOCK_SIZE); memset(cc, 0, DISK_BLOCK_SIZE); LSEEKWRITE(udi->fd, (cs_data->start_vbn - 1) * DISK_BLOCK_SIZE + ((off_t)(cs_data->trans_hist.total_blks) * cs_data->blk_size), cc, DISK_BLOCK_SIZE, status); if (0 != status) { SPRINTF_AND_PERROR("Error writing out end of file %s\n"); CLEANUP(EXIT_ERR); return EXIT_ERR; } if ((!udi->raw) && (-1 == CHMOD(pblk.l_dir, 0666))) { SPRINTF_AND_PERROR("Error changing file mode on file %s\n"); CLEANUP(EXIT_WRN); return EXIT_WRN; } CLEANUP(EXIT_NRM); PRINTF("Created file %s\n", path); return EXIT_NRM; }
/* Finds a free block and adds information to update array and cw_set */ block_id swap_root_or_directory_block(int parent_blk_lvl, int child_blk_lvl, srch_hist *dir_hist_ptr, block_id child_blk_id, sm_uc_ptr_t child_blk_ptr, kill_set *kill_set_list, trans_num curr_tn) { sgmnt_data_ptr_t csd; sgmnt_addrs *csa; node_local_ptr_t cnl; srch_blk_status bmlhist, freeblkhist; block_id hint_blk_num, free_blk_id, parent_blk_id; boolean_t free_blk_recycled; int4 master_bit, num_local_maps, free_bit, hint_bit, maxbitsthismap; uint4 total_blks; int blk_seg_cnt, blk_size; sm_uc_ptr_t parent_blk_ptr, bn_ptr, saved_blk; blk_segment *bs1, *bs_ptr; int parent_blk_size, child_blk_size, bsiz; int rec_size1, curr_offset, bpntr_end, hdr_len; int tmp_cmpc; cw_set_element *tmpcse; jnl_buffer_ptr_t jbbp; /* jbbp is non-NULL only if before-image journaling */ unsigned short temp_ushort; unsigned long temp_long; unsigned char save_cw_set_depth; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; csd = cs_data; csa = cs_addrs; cnl = csa->nl; blk_size = csd->blk_size; /* Find a free/recycled block for new block location. */ hint_blk_num = 0; total_blks = csa->ti->total_blks; num_local_maps = DIVIDE_ROUND_UP(total_blks, BLKS_PER_LMAP); master_bit = bmm_find_free((hint_blk_num / BLKS_PER_LMAP), csa->bmm, num_local_maps); if ((NO_FREE_SPACE == master_bit)) { t_abort(gv_cur_region, csa); return ABORT_SWAP; } bmlhist.blk_num = (block_id)master_bit * BLKS_PER_LMAP; if (NULL == (bmlhist.buffaddr = t_qread(bmlhist.blk_num, (sm_int_ptr_t)&bmlhist.cycle, &bmlhist.cr))) { assert(t_tries < CDB_STAGNATE); t_retry((enum cdb_sc)rdfail_detail); return RETRY_SWAP; } hint_bit = 0; maxbitsthismap = (master_bit != (num_local_maps - 1)) ? BLKS_PER_LMAP : total_blks - bmlhist.blk_num; free_bit = bm_find_blk(hint_bit, bmlhist.buffaddr + SIZEOF(blk_hdr), maxbitsthismap, &free_blk_recycled); free_blk_id = bmlhist.blk_num + free_bit; if (DIR_ROOT >= free_blk_id) { /* Bitmap block 0 and directory tree root block 1 should always be marked busy. */ assert(t_tries < CDB_STAGNATE); t_retry(cdb_sc_badbitmap); return RETRY_SWAP; } if (child_blk_id <= free_blk_id) { /* stop swapping root or DT blocks once the database is truncated well enough. A good heuristic for this is to check * if the block is to be swapped into a higher block number and if so do not swap */ t_abort(gv_cur_region, csa); return ABORT_SWAP; } /* ====== begin update array ====== * Four blocks get changed. * 1. Free block becomes busy and gains the contents of child (root block/directory tree block) * 2. Parent block in directory tree remains busy, but points to new root block location. * 3. Free block's corresponding bitmap reflects above change. * 4. Child block gets marked recycled in bitmap. (GVCST_BMP_MARK_FREE) */ parent_blk_ptr = dir_hist_ptr->h[parent_blk_lvl].buffaddr; /* parent_blk_lvl is 0 iff we're moving a gvt root block */ parent_blk_id = dir_hist_ptr->h[parent_blk_lvl].blk_num; CHECK_AND_RESET_UPDATE_ARRAY; if (free_blk_recycled) { /* Otherwise, it's a completely free block, in which case no need to read. */ freeblkhist.blk_num = (block_id)free_blk_id; if (NULL == (freeblkhist.buffaddr = t_qread(free_blk_id, (sm_int_ptr_t)&freeblkhist.cycle, &freeblkhist.cr))) { assert(t_tries < CDB_STAGNATE); t_retry((enum cdb_sc)rdfail_detail); return RETRY_SWAP; } } child_blk_size = ((blk_hdr_ptr_t)child_blk_ptr)->bsiz; BLK_INIT(bs_ptr, bs1); BLK_ADDR(saved_blk, child_blk_size, unsigned char); memcpy(saved_blk, child_blk_ptr, child_blk_size); BLK_SEG(bs_ptr, saved_blk + SIZEOF(blk_hdr), child_blk_size - SIZEOF(blk_hdr)); assert(blk_seg_cnt == child_blk_size); if (!BLK_FINI(bs_ptr, bs1)) { assert(t_tries < CDB_STAGNATE); t_retry(cdb_sc_blkmod); return RETRY_SWAP; } tmpcse = &cw_set[cw_set_depth]; (free_blk_recycled) ? BIT_SET_RECYCLED_AND_CLEAR_FREE(tmpcse->blk_prior_state) : BIT_CLEAR_RECYCLED_AND_SET_FREE(tmpcse->blk_prior_state); t_create(free_blk_id, (unsigned char *)bs1, 0, 0, child_blk_lvl); tmpcse->mode = gds_t_acquired; if (!free_blk_recycled || !cs_data->db_got_to_v5_once) tmpcse->old_block = NULL; else { tmpcse->old_block = freeblkhist.buffaddr; tmpcse->cr = freeblkhist.cr; tmpcse->cycle = freeblkhist.cycle; jbbp = (JNL_ENABLED(csa) && csa->jnl_before_image) ? csa->jnl->jnl_buff : NULL; if ((NULL != jbbp) && (((blk_hdr_ptr_t)tmpcse->old_block)->tn < jbbp->epoch_tn)) { bsiz = ((blk_hdr_ptr_t)(tmpcse->old_block))->bsiz; if (bsiz > blk_size) { assert(CDB_STAGNATE > t_tries); t_retry(cdb_sc_lostbmlcr); return RETRY_SWAP; } JNL_GET_CHECKSUM_ACQUIRED_BLK(tmpcse, csd, csa, tmpcse->old_block, bsiz); } } /* 2. Parent block in directory tree remains busy, but points to new child block location. */ curr_offset = dir_hist_ptr->h[parent_blk_lvl].curr_rec.offset; parent_blk_size = ((blk_hdr_ptr_t)parent_blk_ptr)->bsiz; GET_RSIZ(rec_size1, (parent_blk_ptr + curr_offset)); if ((parent_blk_size < rec_size1 + curr_offset) || (BSTAR_REC_SIZE > rec_size1)) { assert(t_tries < CDB_STAGNATE); t_retry(cdb_sc_blkmod); return RETRY_SWAP; } BLK_INIT(bs_ptr, bs1); if (0 == parent_blk_lvl) /* There can be collation stuff in the record value after the block pointer. See gvcst_root_search. */ hdr_len = SIZEOF(rec_hdr) + gv_altkey->end + 1 - EVAL_CMPC((rec_hdr_ptr_t)(parent_blk_ptr + curr_offset)); else hdr_len = rec_size1 - SIZEOF(block_id); bpntr_end = curr_offset + hdr_len + SIZEOF(block_id); BLK_SEG(bs_ptr, parent_blk_ptr + SIZEOF(blk_hdr), curr_offset + hdr_len - SIZEOF(blk_hdr)); BLK_ADDR(bn_ptr, SIZEOF(block_id), unsigned char); PUT_LONG(bn_ptr, free_blk_id); BLK_SEG(bs_ptr, bn_ptr, SIZEOF(block_id)); BLK_SEG(bs_ptr, parent_blk_ptr + bpntr_end, parent_blk_size - bpntr_end); assert(blk_seg_cnt == parent_blk_size); if (!BLK_FINI(bs_ptr, bs1)) { assert(t_tries < CDB_STAGNATE); t_retry(cdb_sc_blkmod); return RETRY_SWAP; } t_write(&dir_hist_ptr->h[parent_blk_lvl], (unsigned char *)bs1, 0, 0, parent_blk_lvl, FALSE, TRUE, GDS_WRITE_KILLTN); /* To indicate later snapshot file writing process during fast_integ not to skip writing the block to snapshot file */ BIT_SET_DIR_TREE(cw_set[cw_set_depth-1].blk_prior_state); /* 3. Free block's corresponding bitmap reflects above change. */ PUT_LONG(update_array_ptr, free_bit); save_cw_set_depth = cw_set_depth; /* Bit maps go on end of cw_set (more fake acquired) */ assert(!cw_map_depth); t_write_map(&bmlhist, (uchar_ptr_t)update_array_ptr, curr_tn, 1); cw_map_depth = cw_set_depth; cw_set_depth = save_cw_set_depth; update_array_ptr += SIZEOF(block_id); temp_long = 0; PUT_LONG(update_array_ptr, temp_long); update_array_ptr += SIZEOF(block_id); assert(1 == cw_set[cw_map_depth - 1].reference_cnt); /* 4. Child block gets marked recycled in bitmap. (GVCST_BMP_MARK_FREE) */ kill_set_list->blk[kill_set_list->used].flag = 0; kill_set_list->blk[kill_set_list->used].level = 0; kill_set_list->blk[kill_set_list->used++].block = child_blk_id; return free_blk_id; }
OS_PAGE_SIZE_DECLARE uint4 gdsfilext(uint4 blocks, uint4 filesize, boolean_t trans_in_prog) { sm_uc_ptr_t old_base[2], mmap_retaddr; boolean_t was_crit, is_mm; char buff[DISK_BLOCK_SIZE]; int result, save_errno, status; uint4 new_bit_maps, bplmap, map, new_blocks, new_total, max_tot_blks, old_total; uint4 jnl_status, to_wait, to_msg, wait_period; gtm_uint64_t avail_blocks, mmap_sz; off_t new_eof; trans_num curr_tn; unix_db_info *udi; inctn_opcode_t save_inctn_opcode; int4 prev_extend_blks_to_upgrd; jnl_private_control *jpc; jnl_buffer_ptr_t jbp; cache_rec_ptr_t cr; DCL_THREADGBL_ACCESS; assert(!IS_DSE_IMAGE); assert((cs_addrs->nl == NULL) || (process_id != cs_addrs->nl->trunc_pid)); /* mu_truncate shouldn't extend file... */ assert(!process_exiting); DEBUG_ONLY(old_base[0] = old_base[1] = NULL); assert(!gv_cur_region->read_only); udi = FILE_INFO(gv_cur_region); is_mm = (dba_mm == cs_addrs->hdr->acc_meth); # if !defined(MM_FILE_EXT_OK) if (!udi->grabbed_access_sem && is_mm) return (uint4)(NO_FREE_SPACE); /* should this be changed to show extension not allowed ? */ # endif /* Both blocks and total blocks are unsigned ints so make sure we aren't asking for huge numbers that will overflow and end up doing silly things. */ assert((blocks <= (MAXTOTALBLKS(cs_data) - cs_data->trans_hist.total_blks)) || WBTEST_ENABLED(WBTEST_FILE_EXTEND_ERROR)); if (!blocks) return (uint4)(NO_FREE_SPACE); /* should this be changed to show extension not enabled ? */ bplmap = cs_data->bplmap; /* New total of non-bitmap blocks will be number of current, non-bitmap blocks, plus new blocks desired * There are (bplmap - 1) non-bitmap blocks per bitmap, so add (bplmap - 2) to number of non-bitmap blocks * and divide by (bplmap - 1) to get total number of bitmaps for expanded database. (must round up in this * manner as every non-bitmap block must have an associated bitmap) * Current number of bitmaps is (total number of current blocks + bplmap - 1) / bplmap. * Subtract current number of bitmaps from number needed for expanded database to get number of new bitmaps needed. */ new_bit_maps = DIVIDE_ROUND_UP(cs_data->trans_hist.total_blks - DIVIDE_ROUND_UP(cs_data->trans_hist.total_blks, bplmap) + blocks, bplmap - 1) - DIVIDE_ROUND_UP(cs_data->trans_hist.total_blks, bplmap); new_blocks = blocks + new_bit_maps; assert(0 < (int)new_blocks); if (new_blocks + cs_data->trans_hist.total_blks > MAXTOTALBLKS(cs_data)) { assert(FALSE); send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(1) ERR_TOTALBLKMAX); return (uint4)(NO_FREE_SPACE); } if (0 != (save_errno = disk_block_available(udi->fd, &avail_blocks, FALSE))) { send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region), save_errno); rts_error_csa(CSA_ARG(cs_addrs) VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region), save_errno); } else { if (!(gtmDebugLevel & GDL_IgnoreAvailSpace)) { /* Bypass this space check if debug flag above is on. Allows us to create a large sparce DB * in space it could never fit it if wasn't sparse. Needed for some tests. */ avail_blocks = avail_blocks / (cs_data->blk_size / DISK_BLOCK_SIZE); if ((blocks * EXTEND_WARNING_FACTOR) > avail_blocks) { if (blocks > (uint4)avail_blocks) { SETUP_THREADGBL_ACCESS; if (!INST_FREEZE_ON_NOSPC_ENABLED(cs_addrs)) return (uint4)(NO_FREE_SPACE); else send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(6) MAKE_MSG_WARNING(ERR_NOSPACEEXT), 4, DB_LEN_STR(gv_cur_region), new_blocks, (uint4)avail_blocks); } else send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(5) ERR_DSKSPACEFLOW, 3, DB_LEN_STR(gv_cur_region), (uint4)(avail_blocks - ((new_blocks <= avail_blocks) ? new_blocks : 0))); } } } /* From here on, we need to use GDSFILEXT_CLNUP before returning to the caller */ was_crit = cs_addrs->now_crit; assert(!cs_addrs->hold_onto_crit || was_crit); /* If we are coming from mupip_extend (which gets crit itself) we better have waited for any unfreezes to occur. * If we are coming from online rollback (when that feature is available), we will come in holding crit and in * the final retry. In that case too, we expect to have waited for unfreezes to occur in the caller itself. * Therefore if we are coming in holding crit from MUPIP, we expect the db to be unfrozen so no need to wait for * freeze. * If we are coming from GT.M and final retry (in which case we come in holding crit) we expect to have waited * for any unfreezes (by invoking tp_crit_all_regions) to occur (TP or non-TP) before coming into this * function. However, there is one exception. In the final retry, if tp_crit_all_regions notices that * at least one of the participating regions did ONLY READs, it will not wait for any freeze on THAT region * to complete before grabbing crit. Later, in the final retry, if THAT region did an update which caused * op_tcommit to invoke bm_getfree->gdsfilext, then we would have come here with a frozen region on which * we hold crit. */ assert(!was_crit || !cs_data->freeze || (dollar_tlevel && (CDB_STAGNATE <= t_tries))); /* * If we are in the final retry and already hold crit, it is possible that csa->nl->wc_blocked is also set to TRUE * (by a concurrent process in phase2 which encountered an error in the midst of commit and secshr_db_clnup * finished the job for it). In this case we do NOT want to invoke wcs_recover as that will update the "bt" * transaction numbers without correspondingly updating the history transaction numbers (effectively causing * a cdb_sc_blkmod type of restart). Therefore do NOT call grab_crit (which unconditionally invokes wcs_recover) * if we already hold crit. */ if (!was_crit) { for ( ; ; ) { grab_crit(gv_cur_region); if (!cs_data->freeze && !IS_REPL_INST_FROZEN) break; rel_crit(gv_cur_region); while (cs_data->freeze || IS_REPL_INST_FROZEN) hiber_start(1000); } } else if (cs_data->freeze && dollar_tlevel) { /* We don't want to continue with file extension as explained above. Hence return with an error code which * op_tcommit will recognize (as a cdb_sc_needcrit/cdb_sc_instancefreeze type of restart) and restart accordingly. */ assert(CDB_STAGNATE <= t_tries); GDSFILEXT_CLNUP; return (uint4)FINAL_RETRY_FREEZE_PROG; } if (IS_REPL_INST_FROZEN && trans_in_prog) { assert(CDB_STAGNATE <= t_tries); GDSFILEXT_CLNUP; return (uint4)FINAL_RETRY_INST_FREEZE; } assert(cs_addrs->ti->total_blks == cs_data->trans_hist.total_blks); old_total = cs_data->trans_hist.total_blks; if (old_total != filesize) { /* Somebody else has already extended it, since we are in crit, this is trust-worthy. However, in case of MM, * we still need to remap the database */ assert((old_total > filesize) GTM_TRUNCATE_ONLY( || !is_mm)); /* For BG, someone else could have truncated or extended - we have no idea */ GDSFILEXT_CLNUP; return (SS_NORMAL); }
uint4 mur_block_count_correct(reg_ctl_list *rctl) { unsigned int native_size, size; sgmnt_data_ptr_t mu_data; int4 mu_int_ovrhd; uint4 total_blks; uint4 status; uint4 new_bit_maps, bplmap, new_blocks; MUR_CHANGE_REG(rctl); mu_data = cs_data; switch (mu_data->acc_meth) { default: GTMASSERT; break; #if defined(VMS) && defined(GT_CX_DEF) case dba_bg: /* necessary to do calculation in this manner to prevent double rounding causing an error */ if (mu_data->unbacked_cache) mu_int_ovrhd = DIVIDE_ROUND_UP(SIZEOF_FILE_HDR(mu_data) + mu_data->free_space + mu_data->lock_space_size, DISK_BLOCK_SIZE); else mu_int_ovrhd = DIVIDE_ROUND_UP(SIZEOF_FILE_HDR(mu_data) + BT_SIZE(mu_data) + mu_data->free_space + mu_data->lock_space_size, DISK_BLOCK_SIZE); break; #else case dba_bg: #endif case dba_mm: mu_int_ovrhd = (int4)DIVIDE_ROUND_UP(SIZEOF_FILE_HDR(mu_data) + mu_data->free_space, DISK_BLOCK_SIZE); break; } mu_int_ovrhd += 1; assert(mu_int_ovrhd == mu_data->start_vbn); size = mu_int_ovrhd + (mu_data->blk_size / DISK_BLOCK_SIZE) * mu_data->trans_hist.total_blks; native_size = gds_file_size(gv_cur_region->dyn.addr->file_cntl); /* In the following tests, the EOF block should always be 1 greater than the actual size of the file. * This is due to the GDS being allocated in even DISK_BLOCK_SIZE-byte blocks. */ if (native_size && (size < native_size)) { total_blks = (dba_mm == mu_data->acc_meth) ? cs_addrs->total_blks : cs_addrs->ti->total_blks; if (JNL_ENABLED(cs_addrs)) cs_addrs->jnl->pini_addr = 0; /* Stop simulation of GTM process journal record writing (if any active)*/ /* If journaling, gdsfilext will need to write an inctn record. The timestamp of that journal record will * need to be adjusted to the current system time to reflect that it is recovery itself writing that record * instead of simulating GT.M activity. Since the variable jgbl.dont_reset_gbl_jrec_time is still set, gdsfilext * will NOT modify jgbl.gbl_jrec_time. Temporarily reset it to allow for adjustments to gbl_jrec_time. */ assert(jgbl.dont_reset_gbl_jrec_time); jgbl.dont_reset_gbl_jrec_time = FALSE; /* Calculate the number of blocks to add based on the difference between the real file size and the file size * computed from the header->total_blks. Takes into account that gdsfilext() will automatically add new_bit_maps * to the amount of blocks we request. */ bplmap = cs_data->bplmap; new_blocks = (native_size - size)/(mu_data->blk_size / DISK_BLOCK_SIZE); new_bit_maps = DIVIDE_ROUND_UP(total_blks + new_blocks, bplmap) - DIVIDE_ROUND_UP(total_blks, bplmap); if (SS_NORMAL != (status = gdsfilext(new_blocks - new_bit_maps, total_blks))) { jgbl.dont_reset_gbl_jrec_time = TRUE; return (status); } jgbl.dont_reset_gbl_jrec_time = TRUE; DEBUG_ONLY( /* Check that the filesize and blockcount in the fileheader match now after the extend */ size = mu_int_ovrhd + (mu_data->blk_size / DISK_BLOCK_SIZE) * mu_data->trans_hist.total_blks; native_size = gds_file_size(gv_cur_region->dyn.addr->file_cntl); assert(size == native_size); ) }
void mucregini(int4 blk_init_size) { int4 status; int4 i; th_index_ptr_t th; collseq *csp; uint4 ustatus; mstr jnlfile, jnldef, tmpjnlfile; time_t ctime; MEMCPY_LIT(cs_data->label, GDS_LABEL); cs_data->desired_db_format = GDSVCURR; cs_data->fully_upgraded = TRUE; cs_data->db_got_to_v5_once = TRUE; /* no V4 format blocks that are non-upgradeable */ cs_data->minor_dbver = GDSMVCURR; cs_data->certified_for_upgrade_to = GDSVCURR; cs_data->creation_db_ver = GDSVCURR; cs_data->creation_mdb_ver = GDSMVCURR; cs_data->master_map_len = MASTER_MAP_SIZE_DFLT; cs_data->bplmap = BLKS_PER_LMAP; assert(BLK_SIZE <= MAX_DB_BLK_SIZE); cs_data->blk_size = BLK_SIZE; i = cs_data->trans_hist.total_blks; cs_data->trans_hist.free_blocks = i - DIVIDE_ROUND_UP(i, BLKS_PER_LMAP) - 2; cs_data->max_rec_size = gv_cur_region->max_rec_size; cs_data->max_key_size = gv_cur_region->max_key_size; cs_data->null_subs = gv_cur_region->null_subs; cs_data->std_null_coll = gv_cur_region->std_null_coll; #ifdef UNIX cs_data->freeze_on_fail = gv_cur_region->freeze_on_fail; cs_data->mumps_can_bypass = gv_cur_region->mumps_can_bypass; #endif cs_data->reserved_bytes = gv_cur_region->dyn.addr->reserved_bytes; cs_data->clustered = FALSE; cs_data->file_corrupt = 0; if (gv_cur_region->dyn.addr->lock_space) cs_data->lock_space_size = gv_cur_region->dyn.addr->lock_space * OS_PAGELET_SIZE; else cs_data->lock_space_size = DEF_LOCK_SIZE; cs_data->staleness[0] = -300000000; /* staleness timer = 30 seconds */ cs_data->staleness[1] = -1; cs_data->ccp_quantum_interval[0] = -20000000; /* 2 sec */ cs_data->ccp_quantum_interval[1] = -1; cs_data->ccp_response_interval[0] = -600000000; /* 1 min */ cs_data->ccp_response_interval[1] = -1; cs_data->ccp_tick_interval[0] = -1000000; /* 1/10 sec */ cs_data->ccp_tick_interval[1] = -1; cs_data->last_com_backup = 1; cs_data->last_inc_backup = 1; cs_data->last_rec_backup = 1; cs_data->defer_time = gv_cur_region->dyn.addr->defer_time; cs_data->jnl_alq = gv_cur_region->jnl_alq; if (cs_data->jnl_state && !cs_data->jnl_alq) cs_data->jnl_alq = JNL_ALLOC_DEF; cs_data->jnl_deq = gv_cur_region->jnl_deq; cs_data->jnl_before_image = gv_cur_region->jnl_before_image; cs_data->jnl_state = gv_cur_region->jnl_state; cs_data->epoch_interval = JNL_ALLOWED(cs_data) ? DEFAULT_EPOCH_INTERVAL : 0; cs_data->alignsize = JNL_ALLOWED(cs_data) ? (DISK_BLOCK_SIZE * JNL_DEF_ALIGNSIZE) : 0; ROUND_UP_JNL_BUFF_SIZE(cs_data->jnl_buffer_size, gv_cur_region->jnl_buffer_size, cs_data); #ifdef UNIX if (JNL_ALLOWED(cs_data)) { if (cs_data->jnl_alq + cs_data->jnl_deq > gv_cur_region->jnl_autoswitchlimit) { cs_data->autoswitchlimit = gv_cur_region->jnl_autoswitchlimit; cs_data->jnl_alq = cs_data->autoswitchlimit; } else cs_data->autoswitchlimit = ALIGNED_ROUND_DOWN(gv_cur_region->jnl_autoswitchlimit, cs_data->jnl_alq, cs_data->jnl_deq); } else cs_data->autoswitchlimit = 0; assert(!(MAX_IO_BLOCK_SIZE % DISK_BLOCK_SIZE)); if (cs_data->jnl_alq + cs_data->jnl_deq > cs_data->autoswitchlimit) cs_data->jnl_alq = cs_data->autoswitchlimit; #else cs_data->autoswitchlimit = JNL_ALLOWED(cs_data) ? ALIGNED_ROUND_DOWN(JNL_ALLOC_MAX, cs_data->jnl_alq, cs_data->jnl_deq) : 0; #endif if (!cs_data->jnl_buffer_size) ROUND_UP_JNL_BUFF_SIZE(cs_data->jnl_buffer_size, JNL_BUFFER_DEF, cs_data); if (JNL_ALLOWED(cs_data)) if (cs_data->jnl_buffer_size < JNL_BUFF_PORT_MIN(cs_data)) { ROUND_UP_MIN_JNL_BUFF_SIZE(cs_data->jnl_buffer_size, cs_data); } else if (cs_data->jnl_buffer_size > JNL_BUFFER_MAX) { ROUND_DOWN_MAX_JNL_BUFF_SIZE(cs_data->jnl_buffer_size, cs_data); } cs_data->def_coll = gv_cur_region->def_coll; if (cs_data->def_coll) { if (csp = ready_collseq((int)(cs_data->def_coll))) { cs_data->def_coll_ver = (csp->version)(cs_data->def_coll); if (!do_verify(csp, cs_data->def_coll, cs_data->def_coll_ver)) { gtm_putmsg(VARLSTCNT(4) ERR_COLLTYPVERSION, 2, cs_data->def_coll, cs_data->def_coll_ver); mupip_exit(ERR_MUNOACTION); } } else { gtm_putmsg(VARLSTCNT(3) ERR_COLLATIONUNDEF, 1, cs_data->def_coll); mupip_exit(ERR_MUNOACTION); } } /* mupip_set_journal() relies on cs_data->jnl_file_len being 0 if cs_data->jnl_state is jnl_notallowed. * Note that even though gv_cur_region->jnl_state is jnl_notallowed, gv_cur_region->jnl_file_len can be non-zero */ cs_data->jnl_file_len = JNL_ALLOWED(cs_data) ? gv_cur_region->jnl_file_len : 0; cs_data->reg_seqno = 1; VMS_ONLY( cs_data->resync_seqno = 1; cs_data->old_resync_seqno = 1; cs_data->resync_tn = 1; )
/* This function is part of the MUMPS compiler. It adds one pattern atom to the string of compiled pattern atoms. * If the atom to be added can be "compressed" with the previous one, this function will allow compress() to do so. */ boolean_t add_atom(int *count, uint4 pattern_mask, pat_strlit *strlit_buff, boolean_t infinite, int *min, int *max, int *size, int *total_min, int *total_max, int lower_bound, int upper_bound, int altmin, int altmax, boolean_t *last_infinite_ptr, uint4 **fstchar_ptr, uint4 **outchar_ptr, uint4 **lastpatptr_ptr) { uint4 *patmaskptr; gtm_uint64_t bound; int4 bytelen; if ((pattern_mask & PATM_STRLIT) && !strlit_buff->bytelen && *count) { /* A special case is a pattern like xxx?1N5.7""2A . Since there is an infinite number of empty strings between * any two characters in a string, a pattern atom that counts repetitions of the fixed string "" can be ignored. * That is, such an atom can only be ignored if it is not the only one in the pattern... */ return TRUE; } if (*count && !*(size - 1)) { /* If the previous atom was an n.m"", it should be removed. In such a case, the last four values * in the 'outchar' array are PATM_STRLIT (pattern mask), 0 (bytelen), 0 (charlen), flags (ASCII and no BADCHAR). */ assert(3 == PAT_STRLIT_PADDING); assert(PATM_STRLIT == *(*outchar_ptr - (PAT_STRLIT_PADDING + 1))); assert(0 == *(*outchar_ptr - 3)); /* bytelen */ assert(0 == *(*outchar_ptr - 2)); /* charlen */ assert(!((*(*outchar_ptr - 1)) & PATM_STRLIT_NONASCII)); /* flags - ascii */ assert(!((*(*outchar_ptr - 1)) & PATM_STRLIT_BADCHAR)); /* flags - no badchar */ *outchar_ptr -= (PAT_STRLIT_PADDING + 1); (*count)--; assert(0 == *count); min--; max--; size--; } if (pattern_mask & PATM_ALT) { lower_bound = BOUND_MULTIPLY(lower_bound, altmin, bound); upper_bound = BOUND_MULTIPLY(upper_bound, altmax, bound); } if (*count && pat_compress(pattern_mask, strlit_buff, infinite, *last_infinite_ptr, *lastpatptr_ptr)) { min--; max--; size--; *min = MIN(*min + lower_bound, PAT_MAX_REPEAT); *max = MIN(*max + upper_bound, PAT_MAX_REPEAT); } else { *min = MIN(lower_bound, PAT_MAX_REPEAT); *max = MIN(upper_bound, PAT_MAX_REPEAT); *lastpatptr_ptr = patmaskptr = *outchar_ptr; *last_infinite_ptr = infinite; (*outchar_ptr)++; if (*outchar_ptr - *fstchar_ptr > MAX_PATTERN_LENGTH) return FALSE; if ((pattern_mask & PATM_ALT) || !(pattern_mask & PATM_STRLIT)) { *patmaskptr++ = pattern_mask; *size = 1; } else { bytelen = strlit_buff->bytelen; *outchar_ptr += DIVIDE_ROUND_UP(bytelen, SIZEOF(uint4)) + PAT_STRLIT_PADDING; if (*outchar_ptr - *fstchar_ptr > MAX_PATTERN_LENGTH) return FALSE; *patmaskptr++ = pattern_mask; memcpy(patmaskptr, strlit_buff, bytelen + PAT_STRLIT_PADDING * SIZEOF(uint4)); *size = strlit_buff->charlen; } (*count)++; } *total_min += BOUND_MULTIPLY(*size, lower_bound, bound); if (*total_min > PAT_MAX_REPEAT) *total_min = PAT_MAX_REPEAT; *total_max += BOUND_MULTIPLY(*size, upper_bound, bound); if (*total_max > PAT_MAX_REPEAT) *total_max = PAT_MAX_REPEAT; return TRUE; }
/* go after a specific number of buffers or a particular buffer */ bool wcs_get_space(gd_region *reg, int needed, cache_rec *cr) { unsigned int lcnt, ocnt, status; sgmnt_addrs *csa; sgmnt_data_ptr_t csd; node_local_ptr_t cnl; que_ent_ptr_t base, q0; int4 dummy_errno; boolean_t is_mm; assert((0 != needed) || (NULL != cr)); csa = &(FILE_INFO(reg)->s_addrs); assert(csa == cs_addrs); csd = csa->hdr; is_mm = (dba_mm == csd->acc_meth); assert(is_mm || (dba_bg == csd->acc_meth)); cnl = csa->nl; if (FALSE == csa->now_crit) { assert(0 != needed); /* if needed == 0, then we should be in crit */ for (lcnt = DIVIDE_ROUND_UP(needed, csd->n_wrt_per_flu); 0 < lcnt; lcnt--) JNL_ENSURE_OPEN_WCS_WTSTART(csa, reg, 0, dummy_errno); /* a macro that ensure jnl is open, dclast's wcs_wtstart and checks for errors etc. */ return TRUE; } if (FALSE == wcs_wtfini(reg)) return FALSE; /* while calculating flush_trigger, the decrement should be atleast 1 if still not reached the minimum allowed */ csd->flush_trigger = MAX(csd->flush_trigger - MAX(csd->flush_trigger/STEP_FACTOR, 1), MIN_FLUSH_TRIGGER(csd->n_bts)); if (0 == needed) { if (!is_mm) { /* If another process is concurrently finishing up phase2 of commit, wait for that to complete first. */ if (cr->in_tend && !wcs_phase2_commit_wait(csa, cr)) return FALSE; /* assumption is that caller will set wc_blocked and trigger cache recovery */ } for (lcnt = 1; (MAXGETSPACEWAIT > lcnt) && (0 != cr->dirty); lcnt++) { /* We want to flush a specific cache-record. We speed up the wait by moving the dirty cache-record * to the head of the active queue. But to do this, we need exclusive access to the active queue. * The only other processes outside of crit that can be touching this concurrently are wcs_wtstart * (which can remove entries from the queue) and bg_update_phase2 (which can add entries to the queue). * In the case of writers, we can wait for those to complete (by setting cnl->wc_blocked to TRUE) * and then play with the queue. But in the case of bg_update_phase2, it is not easily possible to * do a similar wait so in this case we choose to do plain wcs_wtstart (which uses interlocked * queue operations and hence can work well with concurrent bg_update_phase2) and wait until the * cache record of interest becomes non-dirty. The consequence is we might wait a little longer than * necessary but that is considered acceptable for now. */ /* Check if cache recovery is needed (could be set by another process in * secshr_db_clnup finishing off a phase2 commit). If so, no point invoking * wcs_wtstart as it will return right away. Instead return FALSE so * cache-recovery can be triggered by the caller. */ if (cnl->wc_blocked) { assert(gtm_white_box_test_case_enabled); return FALSE; } if (!is_mm && cnl->wcs_phase2_commit_pidcnt) { JNL_ENSURE_OPEN_WCS_WTSTART(csa, reg, 0, dummy_errno); /* a macro that ensure jnl is open, dclast's wcs_wtstart and checks for errors etc. */ wcs_sleep(lcnt); } else if (LATCH_CLEAR == WRITE_LATCH_VAL(cr)) { SIGNAL_WRITERS_TO_STOP(cnl); /* to stop all active writers */ WAIT_FOR_WRITERS_TO_STOP(cnl, ocnt, MAXGETSPACEWAIT); if (MAXGETSPACEWAIT <= ocnt) { assert(FALSE); return FALSE; } if (LATCH_CLEAR == WRITE_LATCH_VAL(cr)) { /* Check if cache-record is part of the active queue. If so, then remove it from the * tail of the active queue and move it to the head to try and speed up the flush. * If not and if cr->dirty is non-zero, then the only way this is possible we know * of is if a concurrent process encountered an error in the midst of commit in phase2 * of bg_update and finished the update but did not reinsert the cache-record in the * active queue (see comment in secshr_db_clnup about why INSQ*I macros are not used * in VMS). In this case, return FALSE as wcs_get_space cannot flush this cache-record. * The caller will trigger appropriate error handling. We are guaranteed that cr cannot * be part of the wip queue because WRITE_LATCH_VAL(cr) is LATCH_CLEAR (in wip queue it * will be > LATCH_CLEAR). */ if (0 != cr->state_que.fl) { /* We are about to play with the queues without using interlocks. * Assert no one else could be concurrently playing with the queue. */ assert(!cnl->wcs_phase2_commit_pidcnt && !cnl->in_wtstart); base = &csa->acc_meth.bg.cache_state->cacheq_active; q0 = (que_ent_ptr_t)((sm_uc_ptr_t)&cr->state_que + cr->state_que.fl); shuffqth((que_ent_ptr_t)q0, (que_ent_ptr_t)base); } else if (cr->dirty) { assert(gtm_white_box_test_case_enabled); return FALSE; } } SIGNAL_WRITERS_TO_RESUME(cnl); JNL_ENSURE_OPEN_WCS_WTSTART(csa, reg, 0, dummy_errno); /* a macro that ensure jnl is open, dclast's wcs_wtstart and checks for errors etc. */ wcs_sleep(lcnt); } else if ((0 == cr->iosb.cond) || (WRT_STRT_PNDNG == cr->iosb.cond)) { JNL_ENSURE_OPEN_WCS_WTSTART(csa, reg, 0, dummy_errno); /* a macro that ensure jnl is open, dclast's wcs_wtstart and checks for errors etc. */ wcs_sleep(lcnt); } if (FALSE == wcs_wtfini(reg)) return FALSE; } if (0 == cr->dirty) return TRUE; assert(FALSE); return FALSE; } for (lcnt = 1; ((cnl->wc_in_free < needed) && (MAXGETSPACEWAIT > lcnt)); lcnt++) { DCLAST_WCS_WTSTART(reg, 0, dummy_errno); /* a macro that dclast's wcs_wtstart and checks for errors etc. */ wcs_sleep(lcnt); if (FALSE == wcs_wtfini(reg)) return FALSE; } if (cnl->wc_in_free < needed) { assert(FALSE); return FALSE; } return TRUE; }