/* Marshal the header without reassigning a new timestamp. */ long SDMmessage::MarshalHeaderOldTimeStamp(char* buf) { buf[0] = MsgName; PUT_LONG(&buf[1],sec); PUT_LONG(&buf[5],subsec); PUT_USHORT(&buf[9],msg_length); return HEADER_SIZE; }
long SDMmessage::MarshalHeader(char* buf) { #ifdef __VXWORKS__ struct timespec time; clock_gettime(CLOCK_REALTIME, &time); sec = time.tv_sec; subsec = time.tv_nsec/1000; #else struct timeval time; gettimeofday(&time,NULL); sec = time.tv_sec; subsec = time.tv_usec; #endif buf[0] = MsgName; PUT_LONG(&buf[1],sec); PUT_LONG(&buf[5],subsec); PUT_USHORT(&buf[9],msg_length); return HEADER_SIZE; }
/* **************************************************************** * Atualiza o bloco de informações da FAT32 * **************************************************************** */ void update_fat32_info (void) { UNI *up = &uni; FSINFO fsinfo; /* * Só é chamada se for RW */ if (up->u_fs_info == 0) return; #undef DEBUG #ifdef DEBUG printf ( "u_disk_infree = %d, u_infree = %d\n", up->u_disk_infree, up->u_infree ); #endif DEBUG if (up->u_disk_infree == up->u_infree) return; /* * Atualiza o bloco de informações */ #ifdef DEBUG printf ("Atualizando ...\n"); #endif DEBUG bread (up->u_fs_info, &fsinfo); /*** bread (up->u_fs_info + 1, (void *)&fsinfo + BLSZ); ***/ PUT_LONG (up->u_infree, &fsinfo.fs_infree); bwrite (up->u_fs_info, &fsinfo); /*** bwrite (up->u_fs_info + 1, (void *)&fsinfo + BLSZ); ***/ up->u_disk_infree = up->u_infree; } /* update_fat32_info */
/* Finds a free block and adds information to update array and cw_set */ block_id swap_root_or_directory_block(int parent_blk_lvl, int child_blk_lvl, srch_hist *dir_hist_ptr, block_id child_blk_id, sm_uc_ptr_t child_blk_ptr, kill_set *kill_set_list, trans_num curr_tn) { sgmnt_data_ptr_t csd; sgmnt_addrs *csa; node_local_ptr_t cnl; srch_blk_status bmlhist, freeblkhist; block_id hint_blk_num, free_blk_id, parent_blk_id; boolean_t free_blk_recycled; int4 master_bit, num_local_maps, free_bit, hint_bit, maxbitsthismap; uint4 total_blks; int blk_seg_cnt, blk_size; sm_uc_ptr_t parent_blk_ptr, bn_ptr, saved_blk; blk_segment *bs1, *bs_ptr; int parent_blk_size, child_blk_size, bsiz; int rec_size1, curr_offset, bpntr_end, hdr_len; int tmp_cmpc; cw_set_element *tmpcse; jnl_buffer_ptr_t jbbp; /* jbbp is non-NULL only if before-image journaling */ unsigned short temp_ushort; unsigned long temp_long; unsigned char save_cw_set_depth; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; csd = cs_data; csa = cs_addrs; cnl = csa->nl; blk_size = csd->blk_size; /* Find a free/recycled block for new block location. */ hint_blk_num = 0; total_blks = csa->ti->total_blks; num_local_maps = DIVIDE_ROUND_UP(total_blks, BLKS_PER_LMAP); master_bit = bmm_find_free((hint_blk_num / BLKS_PER_LMAP), csa->bmm, num_local_maps); if ((NO_FREE_SPACE == master_bit)) { t_abort(gv_cur_region, csa); return ABORT_SWAP; } bmlhist.blk_num = (block_id)master_bit * BLKS_PER_LMAP; if (NULL == (bmlhist.buffaddr = t_qread(bmlhist.blk_num, (sm_int_ptr_t)&bmlhist.cycle, &bmlhist.cr))) { assert(t_tries < CDB_STAGNATE); t_retry((enum cdb_sc)rdfail_detail); return RETRY_SWAP; } hint_bit = 0; maxbitsthismap = (master_bit != (num_local_maps - 1)) ? BLKS_PER_LMAP : total_blks - bmlhist.blk_num; free_bit = bm_find_blk(hint_bit, bmlhist.buffaddr + SIZEOF(blk_hdr), maxbitsthismap, &free_blk_recycled); free_blk_id = bmlhist.blk_num + free_bit; if (DIR_ROOT >= free_blk_id) { /* Bitmap block 0 and directory tree root block 1 should always be marked busy. */ assert(t_tries < CDB_STAGNATE); t_retry(cdb_sc_badbitmap); return RETRY_SWAP; } if (child_blk_id <= free_blk_id) { /* stop swapping root or DT blocks once the database is truncated well enough. A good heuristic for this is to check * if the block is to be swapped into a higher block number and if so do not swap */ t_abort(gv_cur_region, csa); return ABORT_SWAP; } /* ====== begin update array ====== * Four blocks get changed. * 1. Free block becomes busy and gains the contents of child (root block/directory tree block) * 2. Parent block in directory tree remains busy, but points to new root block location. * 3. Free block's corresponding bitmap reflects above change. * 4. Child block gets marked recycled in bitmap. (GVCST_BMP_MARK_FREE) */ parent_blk_ptr = dir_hist_ptr->h[parent_blk_lvl].buffaddr; /* parent_blk_lvl is 0 iff we're moving a gvt root block */ parent_blk_id = dir_hist_ptr->h[parent_blk_lvl].blk_num; CHECK_AND_RESET_UPDATE_ARRAY; if (free_blk_recycled) { /* Otherwise, it's a completely free block, in which case no need to read. */ freeblkhist.blk_num = (block_id)free_blk_id; if (NULL == (freeblkhist.buffaddr = t_qread(free_blk_id, (sm_int_ptr_t)&freeblkhist.cycle, &freeblkhist.cr))) { assert(t_tries < CDB_STAGNATE); t_retry((enum cdb_sc)rdfail_detail); return RETRY_SWAP; } } child_blk_size = ((blk_hdr_ptr_t)child_blk_ptr)->bsiz; BLK_INIT(bs_ptr, bs1); BLK_ADDR(saved_blk, child_blk_size, unsigned char); memcpy(saved_blk, child_blk_ptr, child_blk_size); BLK_SEG(bs_ptr, saved_blk + SIZEOF(blk_hdr), child_blk_size - SIZEOF(blk_hdr)); assert(blk_seg_cnt == child_blk_size); if (!BLK_FINI(bs_ptr, bs1)) { assert(t_tries < CDB_STAGNATE); t_retry(cdb_sc_blkmod); return RETRY_SWAP; } tmpcse = &cw_set[cw_set_depth]; (free_blk_recycled) ? BIT_SET_RECYCLED_AND_CLEAR_FREE(tmpcse->blk_prior_state) : BIT_CLEAR_RECYCLED_AND_SET_FREE(tmpcse->blk_prior_state); t_create(free_blk_id, (unsigned char *)bs1, 0, 0, child_blk_lvl); tmpcse->mode = gds_t_acquired; if (!free_blk_recycled || !cs_data->db_got_to_v5_once) tmpcse->old_block = NULL; else { tmpcse->old_block = freeblkhist.buffaddr; tmpcse->cr = freeblkhist.cr; tmpcse->cycle = freeblkhist.cycle; jbbp = (JNL_ENABLED(csa) && csa->jnl_before_image) ? csa->jnl->jnl_buff : NULL; if ((NULL != jbbp) && (((blk_hdr_ptr_t)tmpcse->old_block)->tn < jbbp->epoch_tn)) { bsiz = ((blk_hdr_ptr_t)(tmpcse->old_block))->bsiz; if (bsiz > blk_size) { assert(CDB_STAGNATE > t_tries); t_retry(cdb_sc_lostbmlcr); return RETRY_SWAP; } JNL_GET_CHECKSUM_ACQUIRED_BLK(tmpcse, csd, csa, tmpcse->old_block, bsiz); } } /* 2. Parent block in directory tree remains busy, but points to new child block location. */ curr_offset = dir_hist_ptr->h[parent_blk_lvl].curr_rec.offset; parent_blk_size = ((blk_hdr_ptr_t)parent_blk_ptr)->bsiz; GET_RSIZ(rec_size1, (parent_blk_ptr + curr_offset)); if ((parent_blk_size < rec_size1 + curr_offset) || (BSTAR_REC_SIZE > rec_size1)) { assert(t_tries < CDB_STAGNATE); t_retry(cdb_sc_blkmod); return RETRY_SWAP; } BLK_INIT(bs_ptr, bs1); if (0 == parent_blk_lvl) /* There can be collation stuff in the record value after the block pointer. See gvcst_root_search. */ hdr_len = SIZEOF(rec_hdr) + gv_altkey->end + 1 - EVAL_CMPC((rec_hdr_ptr_t)(parent_blk_ptr + curr_offset)); else hdr_len = rec_size1 - SIZEOF(block_id); bpntr_end = curr_offset + hdr_len + SIZEOF(block_id); BLK_SEG(bs_ptr, parent_blk_ptr + SIZEOF(blk_hdr), curr_offset + hdr_len - SIZEOF(blk_hdr)); BLK_ADDR(bn_ptr, SIZEOF(block_id), unsigned char); PUT_LONG(bn_ptr, free_blk_id); BLK_SEG(bs_ptr, bn_ptr, SIZEOF(block_id)); BLK_SEG(bs_ptr, parent_blk_ptr + bpntr_end, parent_blk_size - bpntr_end); assert(blk_seg_cnt == parent_blk_size); if (!BLK_FINI(bs_ptr, bs1)) { assert(t_tries < CDB_STAGNATE); t_retry(cdb_sc_blkmod); return RETRY_SWAP; } t_write(&dir_hist_ptr->h[parent_blk_lvl], (unsigned char *)bs1, 0, 0, parent_blk_lvl, FALSE, TRUE, GDS_WRITE_KILLTN); /* To indicate later snapshot file writing process during fast_integ not to skip writing the block to snapshot file */ BIT_SET_DIR_TREE(cw_set[cw_set_depth-1].blk_prior_state); /* 3. Free block's corresponding bitmap reflects above change. */ PUT_LONG(update_array_ptr, free_bit); save_cw_set_depth = cw_set_depth; /* Bit maps go on end of cw_set (more fake acquired) */ assert(!cw_map_depth); t_write_map(&bmlhist, (uchar_ptr_t)update_array_ptr, curr_tn, 1); cw_map_depth = cw_set_depth; cw_set_depth = save_cw_set_depth; update_array_ptr += SIZEOF(block_id); temp_long = 0; PUT_LONG(update_array_ptr, temp_long); update_array_ptr += SIZEOF(block_id); assert(1 == cw_set[cw_map_depth - 1].reference_cnt); /* 4. Child block gets marked recycled in bitmap. (GVCST_BMP_MARK_FREE) */ kill_set_list->blk[kill_set_list->used].flag = 0; kill_set_list->blk[kill_set_list->used].level = 0; kill_set_list->blk[kill_set_list->used++].block = child_blk_id; return free_blk_id; }
enum cdb_sc gvcst_kill_blk(srch_blk_status *blkhist, char level, gv_key *search_key, srch_rec_status low, srch_rec_status high, boolean_t right_extra, cw_set_element **cseptr) { typedef sm_uc_ptr_t bytptr; unsigned short temp_ushort; int4 temp_long; int tmp_cmpc; int blk_size, blk_seg_cnt, lmatch, rmatch, targ_len, prev_len, targ_base, next_rec_shrink, temp_int, blkseglen; bool kill_root, first_copy; blk_hdr_ptr_t old_blk_hdr; rec_hdr_ptr_t left_ptr; /*pointer to record before first record to delete*/ rec_hdr_ptr_t del_ptr; /*pointer to first record to delete*/ rec_hdr_ptr_t right_ptr; /*pointer to record after last record to delete*/ rec_hdr_ptr_t right_prev_ptr; rec_hdr_ptr_t rp, rp1; /*scratch record pointer*/ rec_hdr_ptr_t first_in_blk, top_of_block, new_rec_hdr, star_rec_hdr; blk_segment *bs1, *bs_ptr; block_index new_block_index; unsigned char *skb; static readonly block_id zeroes = 0; cw_set_element *cse, *old_cse; bytptr curr, prev, right_bytptr; off_chain chain1, curr_chain, prev_chain; block_id blk; sm_uc_ptr_t buffer; srch_blk_status *t1; *cseptr = NULL; if (low.offset == high.offset) return cdb_sc_normal; blk = blkhist->blk_num; if (dollar_tlevel) { PUT_LONG(&chain1, blk); if ((1 == chain1.flag) && ((int)chain1.cw_index >= sgm_info_ptr->cw_set_depth)) { assert(sgm_info_ptr->tp_csa == cs_addrs); assert(FALSE == cs_addrs->now_crit); return cdb_sc_blknumerr; } } buffer = blkhist->buffaddr; old_blk_hdr = (blk_hdr_ptr_t)buffer; kill_root = FALSE; blk_size = cs_data->blk_size; first_in_blk = (rec_hdr_ptr_t)((bytptr)old_blk_hdr + SIZEOF(blk_hdr)); top_of_block = (rec_hdr_ptr_t)((bytptr)old_blk_hdr + old_blk_hdr->bsiz); left_ptr = (rec_hdr_ptr_t)((bytptr)old_blk_hdr + low.offset); right_ptr = (rec_hdr_ptr_t)((bytptr)old_blk_hdr + high.offset); if (right_extra && right_ptr < top_of_block) { right_prev_ptr = right_ptr; GET_USHORT(temp_ushort, &right_ptr->rsiz); right_ptr = (rec_hdr_ptr_t)((bytptr)right_ptr + temp_ushort); } if ((bytptr)left_ptr < (bytptr)old_blk_hdr || (bytptr)right_ptr > (bytptr)top_of_block || (bytptr)left_ptr >= (bytptr)right_ptr) { assert(CDB_STAGNATE > t_tries); return cdb_sc_rmisalign; } if ((bytptr)left_ptr == (bytptr)old_blk_hdr) { if ((bytptr)right_ptr == (bytptr)top_of_block) { if ((bytptr)first_in_blk == (bytptr)top_of_block) { if (0 != level) { assert(CDB_STAGNATE > t_tries); return cdb_sc_rmisalign; } return cdb_sc_normal; } if (!gv_target->hist.h[level + 1].blk_num) kill_root = TRUE; else { /* We are about to free up the contents of this entire block. If this block corresponded to * a global that has NOISOLATION turned on and has a non-zero recompute list (i.e. some SETs * already happened in this same TP transaction), make sure we disable the NOISOLATION * optimization in this case as that is applicable only if one or more SETs happened in this * data block and NOT if a KILL happens. Usually this is done by a t_write(GDS_WRITE_KILLTN) * call but since in this case the entire block is being freed, "t_write" wont be invoked * so we need to explicitly set GDS_WRITE_KILLTN like t_write would have (GTM-8269). * Note: blkhist->first_tp_srch_status is not reliable outside of TP. Thankfully the recompute * list is also maintained only in case of TP so a check of dollar_tlevel is enough to * dereference both "first_tp_srch_status" and "recompute_list_head". */ if (dollar_tlevel) { t1 = blkhist->first_tp_srch_status ? blkhist->first_tp_srch_status : blkhist; cse = t1->cse; if ((NULL != cse) && cse->recompute_list_head) cse->write_type |= GDS_WRITE_KILLTN; } return cdb_sc_delete_parent; } } del_ptr = first_in_blk; } else { GET_USHORT(temp_ushort, &left_ptr->rsiz); del_ptr = (rec_hdr_ptr_t)((bytptr)left_ptr + temp_ushort); if ((bytptr)del_ptr <= (bytptr)(left_ptr + 1) || (bytptr)del_ptr > (bytptr)right_ptr) { assert(CDB_STAGNATE > t_tries); return cdb_sc_rmisalign; } } if ((bytptr)del_ptr == (bytptr)right_ptr) return cdb_sc_normal; lmatch = low.match; rmatch = high.match; if (level) { for (rp = del_ptr ; rp < right_ptr ; rp = rp1) { GET_USHORT(temp_ushort, &rp->rsiz); rp1 = (rec_hdr_ptr_t)((bytptr)rp + temp_ushort); if (((bytptr)rp1 < (bytptr)(rp + 1) + SIZEOF(block_id)) || ((bytptr)rp1 < buffer) || ((bytptr)rp1 > (buffer + blk_size))) { assert(CDB_STAGNATE > t_tries); return cdb_sc_rmisalign; } GET_LONG(temp_long, ((bytptr)rp1 - SIZEOF(block_id))); if (dollar_tlevel) { chain1 = *(off_chain *)&temp_long; if ((1 == chain1.flag) && ((int)chain1.cw_index >= sgm_info_ptr->cw_set_depth)) { assert(sgm_info_ptr->tp_csa == cs_addrs); assert(FALSE == cs_addrs->now_crit); return cdb_sc_blknumerr; } } gvcst_delete_blk(temp_long, level - 1, FALSE); } } if (kill_root) { /* create an empty data block */ BLK_INIT(bs_ptr, bs1); if (!BLK_FINI(bs_ptr, bs1)) { assert(CDB_STAGNATE > t_tries); return cdb_sc_mkblk; } new_block_index = t_create(blk, (uchar_ptr_t)bs1, 0, 0, 0); /* create index block */ BLK_ADDR(new_rec_hdr, SIZEOF(rec_hdr), rec_hdr); new_rec_hdr->rsiz = SIZEOF(rec_hdr) + SIZEOF(block_id); SET_CMPC(new_rec_hdr, 0); BLK_INIT(bs_ptr, bs1); BLK_SEG(bs_ptr, (bytptr)new_rec_hdr, SIZEOF(rec_hdr)); BLK_SEG(bs_ptr, (bytptr)&zeroes, SIZEOF(block_id)); if (!BLK_FINI(bs_ptr, bs1)) { assert(CDB_STAGNATE > t_tries); return cdb_sc_mkblk; } cse = t_write(blkhist, (unsigned char *)bs1, SIZEOF(blk_hdr) + SIZEOF(rec_hdr), new_block_index, 1, TRUE, FALSE, GDS_WRITE_KILLTN); assert(!dollar_tlevel || !cse->high_tlevel); *cseptr = cse; if (NULL != cse) cse->first_off = 0; return cdb_sc_normal; } next_rec_shrink = (int)(old_blk_hdr->bsiz + ((bytptr)del_ptr - (bytptr)right_ptr)); if (SIZEOF(blk_hdr) >= next_rec_shrink) { assert(CDB_STAGNATE > t_tries); return cdb_sc_rmisalign; } if ((bytptr)right_ptr == (bytptr)top_of_block) { if (level) { GET_USHORT(temp_ushort, &left_ptr->rsiz); next_rec_shrink += SIZEOF(rec_hdr) + SIZEOF(block_id) - temp_ushort; } } else { targ_base = (rmatch < lmatch) ? rmatch : lmatch; prev_len = 0; if (right_extra) { EVAL_CMPC2(right_prev_ptr, tmp_cmpc); targ_len = tmp_cmpc - targ_base; if (targ_len < 0) targ_len = 0; temp_int = tmp_cmpc - EVAL_CMPC(right_ptr); if (0 >= temp_int) prev_len = - temp_int; else { if (temp_int < targ_len) targ_len -= temp_int; else targ_len = 0; } } else { targ_len = EVAL_CMPC(right_ptr) - targ_base; if (targ_len < 0) targ_len = 0; } next_rec_shrink += targ_len + prev_len; } BLK_INIT(bs_ptr, bs1); first_copy = TRUE; blkseglen = (int)((bytptr)del_ptr - (bytptr)first_in_blk); if (0 < blkseglen) { if (((bytptr)right_ptr != (bytptr)top_of_block) || (0 == level)) { BLK_SEG(bs_ptr, (bytptr)first_in_blk, blkseglen); first_copy = FALSE; } else { blkseglen = (int)((bytptr)left_ptr - (bytptr)first_in_blk); if (0 < blkseglen) { BLK_SEG(bs_ptr, (bytptr)first_in_blk, blkseglen); first_copy = FALSE; } BLK_ADDR(star_rec_hdr, SIZEOF(rec_hdr), rec_hdr); SET_CMPC(star_rec_hdr, 0); star_rec_hdr->rsiz = (unsigned short)(SIZEOF(rec_hdr) + SIZEOF(block_id)); BLK_SEG(bs_ptr, (bytptr)star_rec_hdr, SIZEOF(rec_hdr)); GET_USHORT(temp_ushort, &left_ptr->rsiz); BLK_SEG(bs_ptr, ((bytptr)left_ptr + temp_ushort - SIZEOF(block_id)), SIZEOF(block_id)); } } blkseglen = (int)((bytptr)top_of_block - (bytptr)right_ptr); assert(0 <= blkseglen); if (0 != blkseglen) { next_rec_shrink = targ_len + prev_len; if (0 >= next_rec_shrink) { BLK_SEG(bs_ptr, (bytptr)right_ptr, blkseglen); } else { BLK_ADDR(new_rec_hdr, SIZEOF(rec_hdr), rec_hdr); SET_CMPC(new_rec_hdr, EVAL_CMPC(right_ptr) - next_rec_shrink); GET_USHORT(temp_ushort, &right_ptr->rsiz); new_rec_hdr->rsiz = temp_ushort + next_rec_shrink; BLK_SEG(bs_ptr, (bytptr)new_rec_hdr, SIZEOF(rec_hdr)); if (targ_len) { BLK_ADDR(skb, targ_len, unsigned char); memcpy(skb, &search_key->base[targ_base], targ_len); BLK_SEG(bs_ptr, skb, targ_len); } if (prev_len) BLK_SEG(bs_ptr, (bytptr)(right_prev_ptr + 1) , prev_len); right_bytptr = (bytptr)(right_ptr + 1); blkseglen = (int)((bytptr)top_of_block - right_bytptr); if (0 < blkseglen) { BLK_SEG(bs_ptr, right_bytptr, blkseglen); } else { assert(CDB_STAGNATE > t_tries); return cdb_sc_rmisalign; } } } if (!BLK_FINI(bs_ptr, bs1)) { assert(CDB_STAGNATE > t_tries); return cdb_sc_mkblk; } cse = t_write(blkhist, (unsigned char *)bs1, 0, 0, level, first_copy, TRUE, GDS_WRITE_KILLTN); assert(!dollar_tlevel || !cse->high_tlevel); *cseptr = cse; if (horiz_growth) { old_cse = cse->low_tlevel; assert(old_cse && old_cse->done); assert(2 == (SIZEOF(old_cse->undo_offset) / SIZEOF(old_cse->undo_offset[0]))); assert(2 == (SIZEOF(old_cse->undo_next_off) / SIZEOF(old_cse->undo_next_off[0]))); assert(!old_cse->undo_next_off[0] && !old_cse->undo_offset[0]); assert(!old_cse->undo_next_off[1] && !old_cse->undo_offset[1]); } if ((NULL != cse) && (0 != cse->first_off)) { /* fix up chains in the block to account for deleted records */ prev = NULL; curr = buffer + cse->first_off; GET_LONGP(&curr_chain, curr); while (curr < (bytptr)del_ptr) { /* follow chain to first deleted record */ if (0 == curr_chain.next_off) break; if (right_ptr == top_of_block && (bytptr)del_ptr - curr == SIZEOF(off_chain)) break; /* special case described below: stop just before the first deleted record */ prev = curr; curr += curr_chain.next_off; GET_LONGP(&curr_chain, curr); } if (right_ptr == top_of_block && (bytptr)del_ptr - curr == SIZEOF(off_chain)) { /* if the right side of the block is gone and our last chain is in the last record, * terminate the chain and adjust the previous entry to point at the new *-key * NOTE: this assumes there's NEVER a TP delete of records in the GVT */ assert(0 != level); /* store next_off in old_cse before actually changing it in the buffer(for rolling back) */ if (horiz_growth) { old_cse->undo_next_off[0] = curr_chain.next_off; old_cse->undo_offset[0] = (block_offset)(curr - buffer); assert(old_cse->undo_offset[0]); } curr_chain.next_off = 0; GET_LONGP(curr, &curr_chain); if (NULL != prev) { /* adjust previous chain next_off to reflect the fact that the record it refers to is now a *-key */ GET_LONGP(&prev_chain, prev); /* store next_off in old_cse before actually changing it in the buffer(for rolling back) */ if (horiz_growth) { old_cse->undo_next_off[1] = prev_chain.next_off; old_cse->undo_offset[1] = (block_offset)(prev - buffer); assert(old_cse->undo_offset[1]); } prev_chain.next_off = (unsigned int)((bytptr)left_ptr - prev + (unsigned int)(SIZEOF(rec_hdr))); GET_LONGP(prev, &prev_chain); } else /* it's the first (and only) one */ cse->first_off = (block_offset)((bytptr)left_ptr - buffer + SIZEOF(rec_hdr)); } else if (curr >= (bytptr)del_ptr) { /* may be more records on the right that aren't deleted */ while (curr < (bytptr)right_ptr) { /* follow chain past last deleted record */ if (0 == curr_chain.next_off) break; curr += curr_chain.next_off; GET_LONGP(&curr_chain, curr); } /* prev : ptr to chain record immediately preceding the deleted area, * or 0 if none. * * curr : ptr to chain record immediately following the deleted area, * or to last chain record. */ if (curr < (bytptr)right_ptr) { /* the former end of the chain is going, going, gone */ if (NULL != prev) { /* terminate the chain before the delete */ GET_LONGP(&prev_chain, prev); /* store next_off in old_cse before actually changing it in the buffer(for rolling back) */ if (horiz_growth) { old_cse->undo_next_off[0] = prev_chain.next_off; old_cse->undo_offset[0] = (block_offset)(prev - buffer); assert(old_cse->undo_offset[0]); } prev_chain.next_off = 0; GET_LONGP(prev, &prev_chain); } else cse->first_off = 0; /* the whole chain is gone */ } else { /* stitch up the left and right to account for the hole in the middle */ /* next_rec_shrink is the change in record size due to the new compression count */ if (NULL != prev) { GET_LONGP(&prev_chain, prev); /* ??? new compression may be less (ie +) so why are negative shrinks ignored? */ /* store next_off in old_cse before actually changing it in the buffer(for rolling back) */ if (horiz_growth) { old_cse->undo_next_off[0] = prev_chain.next_off; old_cse->undo_offset[0] = (block_offset)(prev - buffer); assert(old_cse->undo_offset[0]); } prev_chain.next_off = (unsigned int)(curr - prev - ((bytptr)right_ptr - (bytptr)del_ptr) + (next_rec_shrink > 0 ? next_rec_shrink : 0)); GET_LONGP(prev, &prev_chain); } else /* curr remains first: adjust the head */ cse->first_off = (block_offset)(curr - buffer - ((bytptr)right_ptr - (bytptr)del_ptr) + (next_rec_shrink > 0 ? next_rec_shrink : 0)); } } } horiz_growth = FALSE; return cdb_sc_normal; }
void gvcst_delete_blk(block_id blk, int level, boolean_t committed) { cw_set_element *cse, *old_cse; kill_set *ks; off_chain chain; srch_blk_status *tp_srch_status; uint4 dummy, iter; /* an assert to verify the validity of the block number was removed * because it could be triggered by a concurrency conflict */ horiz_growth = FALSE; if (dollar_tlevel == 0) ks = kill_set_tail; else { PUT_LONG(&chain, blk); tp_srch_status = NULL; if (chain.flag == 1) tp_get_cw(sgm_info_ptr->first_cw_set, (int)chain.cw_index, &cse); else { tp_srch_status = (srch_blk_status *)lookup_hashtab_ent(sgm_info_ptr->blks_in_use, (void *)blk, &dummy); cse = tp_srch_status ? tp_srch_status->ptr : NULL; } assert(!cse || !cse->high_tlevel); if (cse) { if (!committed) { assert(dollar_tlevel >= cse->t_level); if (cse->t_level != dollar_tlevel) { /* this part of the code is almost similar to that in t_write(), * any changes in one should be reflected in the other */ horiz_growth = TRUE; old_cse = cse; cse = (cw_set_element *)get_new_free_element(sgm_info_ptr->tlvl_cw_set_list); memcpy(cse, old_cse, sizeof(cw_set_element)); cse->low_tlevel = old_cse; cse->high_tlevel = NULL; old_cse->high_tlevel = cse; cse->t_level = dollar_tlevel; assert(2 == (sizeof(cse->undo_offset) / sizeof(cse->undo_offset[0]))); assert(2 == (sizeof(cse->undo_next_off) / sizeof(cse->undo_next_off[0]))); for (iter = 0; iter < 2; iter++) cse->undo_next_off[iter] = cse->undo_offset[iter] = 0; if (!old_cse->new_buff) /* it's possible to arrive here with an unbuilt block */ gvcst_blk_build(old_cse, (uchar_ptr_t)old_cse->new_buff, 0); old_cse->done = TRUE; cse->new_buff = ((new_buff_buddy_list *) get_new_free_element(sgm_info_ptr->new_buff_list))->new_buff; memcpy(cse->new_buff, old_cse->new_buff, ((blk_hdr_ptr_t)old_cse->new_buff)->bsiz); /* tp_srch_status->ptr has to be updated here, since gvcst_kill() does * not call tp_hist() at the end as in gvcst_put_blk() */ if (tp_srch_status) tp_srch_status->ptr = (void *)cse; } switch (cse->mode) { case gds_t_create: cse->mode = kill_t_create; if (level == 0) return; break; case gds_t_write: cse->mode = kill_t_write; break; default: ; } } else { switch(cse->mode) { case kill_t_create: if (level == 0) return; break; default: if (chain.flag) { chain.flag = 0; blk = cse->blk; } break; } } } ks = sgm_info_ptr->kill_set_tail; if (NULL == ks) /* Allocate first kill set to sgm_info_ptr block */ { ks = sgm_info_ptr->kill_set_tail = sgm_info_ptr->kill_set_head = (kill_set *)malloc(sizeof(kill_set)); ks->used = 0; ks->next_kill_set = NULL; } } while (ks->used >= BLKS_IN_KILL_SET) { if (ks->next_kill_set == NULL) { ks->next_kill_set = (kill_set *)malloc(sizeof(kill_set)); ks->next_kill_set->used = 0; ks->next_kill_set->next_kill_set = NULL; } ks = kill_set_tail = ks->next_kill_set; } ks->blk[ks->used].level = level; if (dollar_tlevel == 0 || chain.flag == 0) { ks->blk[ks->used].block = blk; ks->blk[ks->used].flag = 0; } else { ks->blk[ks->used].block = chain.cw_index; ks->blk[ks->used].flag = chain.flag; } ++ks->used; assert(ks->used <= BLKS_IN_KILL_SET); }
/****************************************************************************************** Input Parameters: level: level of working block dest_blk_id: last destination used for swap Output Parameters: kill_set_ptr: Kill set to be freed *exclude_glist_ptr: List of globals not to be moved for a swap destination Input/Output Parameters: gv_target : as working block's history reorg_gv_target->hist : as desitnitions block's history ******************************************************************************************/ enum cdb_sc mu_swap_blk(int level, block_id *pdest_blk_id, kill_set *kill_set_ptr, glist *exclude_glist_ptr) { unsigned char x_blk_lmap; unsigned short temp_ushort; int rec_size1, rec_size2; int wlevel, nslevel, dest_blk_level; int piece_len1, piece_len2, first_offset, second_offset, work_blk_size, work_parent_size, dest_blk_size, dest_parent_size; int dest_child_cycle; int blk_seg_cnt, blk_size; trans_num ctn; int key_len, key_len_dir; block_id dest_blk_id, work_blk_id, child1, child2; enum cdb_sc status; srch_hist *dest_hist_ptr, *dir_hist_ptr; cache_rec_ptr_t dest_child_cr; blk_segment *bs1, *bs_ptr; sm_uc_ptr_t saved_blk, work_blk_ptr, work_parent_ptr, dest_parent_ptr, dest_blk_ptr, bn_ptr, bmp_buff, tblk_ptr, rec_base, rPtr1; boolean_t gbl_target_was_set, blk_was_free, deleted; gv_namehead *save_targ; srch_blk_status bmlhist, destblkhist, *hist_ptr; unsigned char save_cw_set_depth; cw_set_element *tmpcse; jnl_buffer_ptr_t jbbp; /* jbbp is non-NULL only if before-image journaling */ unsigned int bsiz; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; dest_blk_id = *pdest_blk_id; CHECK_AND_RESET_UPDATE_ARRAY; /* reset update_array_ptr to update_array */ if (NULL == TREF(gv_reorgkey)) GVKEY_INIT(TREF(gv_reorgkey), DBKEYSIZE(MAX_KEY_SZ)); dest_hist_ptr = &(reorg_gv_target->hist); dir_hist_ptr = reorg_gv_target->alt_hist; blk_size = cs_data->blk_size; work_parent_ptr = gv_target->hist.h[level+1].buffaddr; work_parent_size = ((blk_hdr_ptr_t)work_parent_ptr)->bsiz; work_blk_ptr = gv_target->hist.h[level].buffaddr; work_blk_size = ((blk_hdr_ptr_t)work_blk_ptr)->bsiz; work_blk_id = gv_target->hist.h[level].blk_num; if (blk_size < work_blk_size) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } cws_reorg_remove_index = 0; /*===== Infinite loop to find the destination block =====*/ for ( ; ; ) { blk_was_free = FALSE; INCR_BLK_NUM(dest_blk_id); /* A Pre-order traversal should not cause a child block to go to its parent. * However, in case it happens because already the organization was like that or for any other reason, skip swap. * If we decide to swap, code below should be changed to take care of the special case. * Still a grand-child can go to its grand-parent. This is rare and following code can handle it. */ if (dest_blk_id == gv_target->hist.h[level+1].blk_num) continue; if (cs_data->trans_hist.total_blks <= dest_blk_id || dest_blk_id == work_blk_id) { *pdest_blk_id = dest_blk_id; return cdb_sc_oprnotneeded; } ctn = cs_addrs->ti->curr_tn; /* We need to save the block numbers that were NEWLY ADDED (since entering this function "mu_swap_blk") * through the CWS_INSERT macro (in db_csh_get/db_csh_getn which can be called by t_qread or gvcst_search below). * This is so that we can delete these blocks from the "cw_stagnate" hashtable in case we determine the need to * choose a different "dest_blk_id" in this for loop (i.e. come to the next iteration). If these blocks are not * deleted, then the hashtable will keep growing (a good example will be if -EXCLUDE qualifier is specified and * a lot of prospective dest_blk_ids get skipped because they contain EXCLUDEd global variables) and very soon * the hashtable will contain more entries than there are global buffers and at that point db_csh_getn will not * be able to get a free global buffer for a new block (since it checks the "cw_stagnate" hashtable before reusing * a buffer in case of MUPIP REORG). To delete these previous iteration blocks, we use the "cws_reorg_remove_array" * variable. This array should have enough entries to accommodate the maximum number of blocks that can be t_qread * in one iteration down below. And that number is the sum of * + MAX_BT_DEPTH : for the t_qread while loop down the tree done below * + 2 * MAX_BT_DEPTH : for the two calls to gvcst_search done below * + 2 : 1 for the t_qread of dest_blk_id and 1 more for the t_qread of a * bitmap block done inside the call to get_lmap below * = 3 * MAX_BT_DEPTH + 2 * To be safe, we give a buffer of MAX_BT_DEPTH elements i.e. (4 * MAX_BT_DEPTH) + 2. * This is defined in the macro CWS_REMOVE_ARRAYSIZE in cws_insert.h */ /* reset whatever blocks the previous iteration of this for loop had filled in the cw_stagnate hashtable */ for ( ; cws_reorg_remove_index > 0; cws_reorg_remove_index--) { deleted = delete_hashtab_int4(&cw_stagnate, (uint4 *)&cws_reorg_remove_array[cws_reorg_remove_index]); assert(deleted); } /* read corresponding bitmap block before attempting to read destination block. * if bitmap indicates block is free, we will not read the destination block */ bmp_buff = get_lmap(dest_blk_id, &x_blk_lmap, (sm_int_ptr_t)&bmlhist.cycle, &bmlhist.cr); if (!bmp_buff || BLK_MAPINVALID == x_blk_lmap || ((blk_hdr_ptr_t)bmp_buff)->bsiz != BM_SIZE(BLKS_PER_LMAP) || ((blk_hdr_ptr_t)bmp_buff)->levl != LCL_MAP_LEVL) { assert(CDB_STAGNATE > t_tries); return cdb_sc_badbitmap; } if (BLK_FREE != x_blk_lmap) { /* x_blk_lmap is either BLK_BUSY or BLK_RECYCLED. In either case, we need to read destination block * in case we later detect that the before-image needs to be written. */ if (!(dest_blk_ptr = t_qread(dest_blk_id, (sm_int_ptr_t)&destblkhist.cycle, &destblkhist.cr))) { assert(t_tries < CDB_STAGNATE); return (enum cdb_sc)rdfail_detail; } destblkhist.blk_num = dest_blk_id; destblkhist.buffaddr = dest_blk_ptr; destblkhist.level = dest_blk_level = ((blk_hdr_ptr_t)dest_blk_ptr)->levl; } if (BLK_BUSY != x_blk_lmap) { /* x_blk_map is either BLK_FREE or BLK_RECYCLED both of which mean the block is not used in the bitmap */ blk_was_free = TRUE; break; } /* dest_blk_id might contain a *-record only. * So follow the pointer to go to the data/index block, which has a non-* key to search. */ nslevel = dest_blk_level; if (MAX_BT_DEPTH <= nslevel) { assert(CDB_STAGNATE > t_tries); return cdb_sc_maxlvl; } rec_base = dest_blk_ptr + SIZEOF(blk_hdr); GET_RSIZ(rec_size1, rec_base); tblk_ptr = dest_blk_ptr; while ((BSTAR_REC_SIZE == rec_size1) && (0 != nslevel)) { GET_LONG(child1, (rec_base + SIZEOF(rec_hdr))); if (0 == child1 || child1 > cs_data->trans_hist.total_blks - 1) { assert(t_tries < CDB_STAGNATE); return cdb_sc_rdfail; } if (!(tblk_ptr = t_qread(child1, (sm_int_ptr_t)&dest_child_cycle, &dest_child_cr))) { assert(t_tries < CDB_STAGNATE); return (enum cdb_sc)rdfail_detail; } /* leaf of a killed GVT can have block header only. Skip those blocks */ if (SIZEOF(blk_hdr) >= ((blk_hdr_ptr_t)tblk_ptr)->bsiz) break; nslevel--; rec_base = tblk_ptr + SIZEOF(blk_hdr); GET_RSIZ(rec_size1, rec_base); } /* leaf of a killed GVT can have block header only. Skip those blocks */ if (SIZEOF(blk_hdr) >= ((blk_hdr_ptr_t)tblk_ptr)->bsiz) continue; /* get length of global variable name (do not read subscript) for dest_blk_id */ GET_GBLNAME_LEN(key_len_dir, rec_base + SIZEOF(rec_hdr)); /* key_len = length of 1st key value (including subscript) for dest_blk_id */ GET_KEY_LEN(key_len, rec_base + SIZEOF(rec_hdr)); if ((1 >= key_len_dir || MAX_MIDENT_LEN + 1 < key_len_dir) || (2 >= key_len || MAX_KEY_SZ < key_len)) { /* Earlier used to restart here always. But dest_blk_id can be a block, * which is just killed and still marked busy. Skip it, if we are in last retry. */ if (CDB_STAGNATE <= t_tries) continue; else return cdb_sc_blkmod; } memcpy(&((TREF(gv_reorgkey))->base[0]), rec_base + SIZEOF(rec_hdr), key_len_dir); (TREF(gv_reorgkey))->base[key_len_dir] = 0; (TREF(gv_reorgkey))->end = key_len_dir; if (exclude_glist_ptr->next) { /* exclude blocks for globals in the list of EXCLUDE option */ if (in_exclude_list(&((TREF(gv_reorgkey))->base[0]), key_len_dir - 1, exclude_glist_ptr)) continue; } save_targ = gv_target; if (INVALID_GV_TARGET != reset_gv_target) gbl_target_was_set = TRUE; else { gbl_target_was_set = FALSE; reset_gv_target = save_targ; } gv_target = reorg_gv_target; gv_target->root = cs_addrs->dir_tree->root; gv_target->clue.end = 0; /* assign Directory tree path to find dest_blk_id in dir_hist_ptr */ status = gvcst_search(TREF(gv_reorgkey), dir_hist_ptr); if (cdb_sc_normal != status) { assert(t_tries < CDB_STAGNATE); RESET_GV_TARGET_LCL_AND_CLR_GBL(save_targ, DO_GVT_GVKEY_CHECK); return status; } if (dir_hist_ptr->h[0].curr_rec.match != (TREF(gv_reorgkey))->end + 1) { /* may be in a kill_set of another process */ RESET_GV_TARGET_LCL_AND_CLR_GBL(save_targ, DO_GVT_GVKEY_CHECK); continue; } for (wlevel = 0; wlevel <= dir_hist_ptr->depth && dir_hist_ptr->h[wlevel].blk_num != dest_blk_id; wlevel++); if (dir_hist_ptr->h[wlevel].blk_num == dest_blk_id) { /* do not swap a dir_tree block */ RESET_GV_TARGET_LCL_AND_CLR_GBL(save_targ, DO_GVT_GVKEY_CHECK); continue; } /* gv_reorgkey will now have the first key from dest_blk_id, * or, from a descendant of dest_blk_id (in case it had a *-key only). */ memcpy(&((TREF(gv_reorgkey))->base[0]), rec_base + SIZEOF(rec_hdr), key_len); (TREF(gv_reorgkey))->end = key_len - 1; GET_KEY_LEN(key_len_dir, dir_hist_ptr->h[0].buffaddr + dir_hist_ptr->h[0].curr_rec.offset + SIZEOF(rec_hdr)); /* Get root of GVT for dest_blk_id */ GET_LONG(gv_target->root, dir_hist_ptr->h[0].buffaddr + dir_hist_ptr->h[0].curr_rec.offset + SIZEOF(rec_hdr) + key_len_dir); if ((0 == gv_target->root) || (gv_target->root > (cs_data->trans_hist.total_blks - 1))) { assert(t_tries < CDB_STAGNATE); RESET_GV_TARGET_LCL_AND_CLR_GBL(save_targ, DO_GVT_GVKEY_CHECK); return cdb_sc_blkmod; } /* Assign Global Variable Tree path to find dest_blk_id in dest_hist_ptr */ gv_target->clue.end = 0; status = gvcst_search(TREF(gv_reorgkey), dest_hist_ptr); RESET_GV_TARGET_LCL_AND_CLR_GBL(save_targ, DO_GVT_GVKEY_CHECK); if (dest_blk_level >= dest_hist_ptr->depth || /* do not swap in root level */ dest_hist_ptr->h[dest_blk_level].blk_num != dest_blk_id) /* must be in a kill set of another process. */ continue; if ((cdb_sc_normal != status) || (dest_hist_ptr->h[nslevel].curr_rec.match != ((TREF(gv_reorgkey))->end + 1))) { assert(t_tries < CDB_STAGNATE); return (cdb_sc_normal != status ? status : cdb_sc_blkmod); } for (wlevel = nslevel; wlevel <= dest_blk_level; wlevel++) dest_hist_ptr->h[wlevel].tn = ctn; dest_blk_ptr = dest_hist_ptr->h[dest_blk_level].buffaddr; dest_blk_size = ((blk_hdr_ptr_t)dest_blk_ptr)->bsiz; dest_parent_ptr = dest_hist_ptr->h[dest_blk_level+1].buffaddr; dest_parent_size = ((blk_hdr_ptr_t)dest_parent_ptr)->bsiz; break; } /*===== End of infinite loop to find the destination block =====*/ /*----------------------------------------------------- Now modify blocks for swapping. Maximum of 4 blocks. -----------------------------------------------------*/ if (!blk_was_free) { /* 1: dest_blk_id into work_blk_id */ BLK_INIT(bs_ptr, bs1); BLK_SEG(bs_ptr, dest_blk_ptr + SIZEOF(blk_hdr), dest_blk_size - SIZEOF(blk_hdr)); if (!BLK_FINI (bs_ptr,bs1)) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } assert(gv_target->hist.h[level].blk_num == work_blk_id); assert(gv_target->hist.h[level].buffaddr == work_blk_ptr); t_write(&gv_target->hist.h[level], (unsigned char *)bs1, 0, 0, dest_blk_level, TRUE, TRUE, GDS_WRITE_KILLTN); } /* 2: work_blk_id into dest_blk_id */ if (!blk_was_free && work_blk_id == dest_hist_ptr->h[dest_blk_level+1].blk_num) { /* work_blk_id will be swapped with its child. * This is the only vertical swap. Here working block goes to its child. * Working block cannot goto its parent because of traversal */ if (dest_blk_level + 1 != level || dest_parent_size != work_blk_size) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } BLK_INIT(bs_ptr, bs1); BLK_ADDR(saved_blk, dest_parent_size, unsigned char); memcpy(saved_blk, dest_parent_ptr, dest_parent_size); first_offset = dest_hist_ptr->h[dest_blk_level+1].curr_rec.offset; GET_RSIZ(rec_size1, saved_blk + first_offset); if (work_blk_size < first_offset + rec_size1) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } piece_len1 = first_offset + rec_size1; BLK_SEG(bs_ptr, saved_blk + SIZEOF(blk_hdr), piece_len1 - SIZEOF(block_id) - SIZEOF(blk_hdr)); BLK_ADDR(bn_ptr, SIZEOF(block_id), unsigned char); PUT_LONG(bn_ptr, work_blk_id); /* since work_blk_id will now be the child of dest_blk_id */ BLK_SEG(bs_ptr, bn_ptr, SIZEOF(block_id)); BLK_SEG(bs_ptr, saved_blk + piece_len1, dest_parent_size - piece_len1); if (!BLK_FINI(bs_ptr, bs1)) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } assert(dest_blk_id == dest_hist_ptr->h[dest_blk_level].blk_num); assert(dest_blk_ptr == dest_hist_ptr->h[dest_blk_level].buffaddr); t_write(&dest_hist_ptr->h[dest_blk_level], (unsigned char *)bs1, 0, 0, level, TRUE, TRUE, GDS_WRITE_KILLTN); } else /* free block or, when working block does not move vertically (swap with parent/child) */ { BLK_INIT(bs_ptr, bs1); BLK_ADDR(saved_blk, work_blk_size, unsigned char); memcpy(saved_blk, work_blk_ptr, work_blk_size); BLK_SEG(bs_ptr, saved_blk + SIZEOF(blk_hdr), work_blk_size - SIZEOF(blk_hdr)); if (!BLK_FINI(bs_ptr, bs1)) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } if (blk_was_free) { tmpcse = &cw_set[cw_set_depth]; t_create(dest_blk_id, (unsigned char *)bs1, 0, 0, level); /* Although we invoked t_create, we do not want t_end to allocate the block (i.e. change mode * from gds_t_create to gds_t_acquired). Instead we do that and a little more (that t_end does) all here. */ assert(dest_blk_id == tmpcse->blk); tmpcse->mode = gds_t_acquired; /* If snapshots are in progress, we might want to read the before images of the FREE blocks also. * Since mu_swap_blk mimics a small part of t_end, it sets cse->mode to gds_t_acquired and hence * will not read the before images of the FREE blocks in t_end. To workaround this, set * cse->was_free to TRUE so that in t_end, this condition can be used to read the before images of * the FREE blocks if needed. */ (BLK_FREE == x_blk_lmap) ? SET_FREE(tmpcse) : SET_NFREE(tmpcse); /* No need to write before-image in case the block is FREE. In case the database had never been fully * upgraded from V4 to V5 format (after the MUPIP UPGRADE), all RECYCLED blocks can basically be considered * FREE (i.e. no need to write before-images since backward journal recovery will never be expected * to take the database to a point BEFORE the mupip upgrade). */ if ((BLK_FREE == x_blk_lmap) || !cs_data->db_got_to_v5_once) tmpcse->old_block = NULL; else { /* Destination is a recycled block that needs a before image */ tmpcse->old_block = destblkhist.buffaddr; /* Record cr,cycle. This is used later in t_end to determine if checksums need to be recomputed */ tmpcse->cr = destblkhist.cr; tmpcse->cycle = destblkhist.cycle; jbbp = (JNL_ENABLED(cs_addrs) && cs_addrs->jnl_before_image) ? cs_addrs->jnl->jnl_buff : NULL; if ((NULL != jbbp) && (((blk_hdr_ptr_t)tmpcse->old_block)->tn < jbbp->epoch_tn)) { /* Compute CHECKSUM for writing PBLK record before getting crit. * It is possible that we are reading a block that is actually marked free in * the bitmap (due to concurrency issues at this point). Therefore we might be * actually reading uninitialized block headers and in turn a bad value of * "old_block->bsiz". Restart if we ever access a buffer whose size is greater * than the db block size. */ bsiz = ((blk_hdr_ptr_t)(tmpcse->old_block))->bsiz; if (bsiz > blk_size) { assert(CDB_STAGNATE > t_tries); return cdb_sc_lostbmlcr; } JNL_GET_CHECKSUM_ACQUIRED_BLK(tmpcse, cs_data, cs_addrs, tmpcse->old_block, bsiz); } } assert(GDSVCURR == tmpcse->ondsk_blkver); /* should have been set by t_create above */ } else { hist_ptr = &dest_hist_ptr->h[dest_blk_level]; assert(dest_blk_id == hist_ptr->blk_num); assert(dest_blk_ptr == hist_ptr->buffaddr); t_write(hist_ptr, (unsigned char *)bs1, 0, 0, level, TRUE, TRUE, GDS_WRITE_KILLTN); } } if (!blk_was_free) { /* 3: Parent of destination block (may be parent of working block too) */ if (gv_target->hist.h[level+1].blk_num == dest_hist_ptr->h[dest_blk_level+1].blk_num) { /* dest parent == work_blk parent */ BLK_INIT(bs_ptr, bs1); /* Interchange pointer to dest_blk_id and work_blk_id */ if (level != dest_blk_level || gv_target->hist.h[level+1].curr_rec.offset == dest_hist_ptr->h[level+1].curr_rec.offset) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } if (gv_target->hist.h[level+1].curr_rec.offset < dest_hist_ptr->h[level+1].curr_rec.offset) { first_offset = gv_target->hist.h[level+1].curr_rec.offset; second_offset = dest_hist_ptr->h[level+1].curr_rec.offset; } else { first_offset = dest_hist_ptr->h[level+1].curr_rec.offset; second_offset = gv_target->hist.h[level+1].curr_rec.offset; } GET_RSIZ(rec_size1, dest_parent_ptr + first_offset); GET_RSIZ(rec_size2, dest_parent_ptr + second_offset); if (dest_parent_size < first_offset + rec_size1 || dest_parent_size < second_offset + rec_size2 || BSTAR_REC_SIZE >= rec_size1 || BSTAR_REC_SIZE > rec_size2) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } piece_len1 = first_offset + rec_size1 - SIZEOF(block_id); piece_len2 = second_offset + rec_size2 - SIZEOF(block_id); GET_LONG(child1, dest_parent_ptr + piece_len1); GET_LONG(child2, dest_parent_ptr + piece_len2); BLK_SEG(bs_ptr, dest_parent_ptr + SIZEOF(blk_hdr), piece_len1 - SIZEOF(blk_hdr)); BLK_ADDR(bn_ptr, SIZEOF(block_id), unsigned char); PUT_LONG(bn_ptr, child2); BLK_SEG(bs_ptr, bn_ptr, SIZEOF(block_id)); BLK_SEG(bs_ptr, dest_parent_ptr + first_offset + rec_size1, second_offset + rec_size2 - SIZEOF(block_id) - first_offset - rec_size1); BLK_ADDR(bn_ptr, SIZEOF(block_id), unsigned char); PUT_LONG(bn_ptr, child1); BLK_SEG(bs_ptr, bn_ptr, SIZEOF(block_id)); BLK_SEG(bs_ptr, dest_parent_ptr + second_offset + rec_size2, dest_parent_size - second_offset - rec_size2); if (!BLK_FINI(bs_ptr,bs1)) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } assert(level == dest_blk_level); assert(dest_parent_ptr == dest_hist_ptr->h[level+1].buffaddr); t_write(&dest_hist_ptr->h[level+1], (unsigned char *)bs1, 0, 0, level+1, FALSE, TRUE, GDS_WRITE_KILLTN); } else if (work_blk_id != dest_hist_ptr->h[dest_blk_level+1].blk_num) { /* Destination block moved in the position of working block. * So destination block's parent's pointer should be changed to work_blk_id */ BLK_INIT(bs_ptr, bs1); GET_RSIZ(rec_size1, dest_parent_ptr + dest_hist_ptr->h[dest_blk_level+1].curr_rec.offset); if (dest_parent_size < rec_size1 + dest_hist_ptr->h[dest_blk_level+1].curr_rec.offset || BSTAR_REC_SIZE > rec_size1) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } BLK_SEG (bs_ptr, dest_parent_ptr + SIZEOF(blk_hdr), dest_hist_ptr->h[dest_blk_level+1].curr_rec.offset + rec_size1 - SIZEOF(blk_hdr) - SIZEOF(block_id)); BLK_ADDR(bn_ptr, SIZEOF(block_id), unsigned char); PUT_LONG(bn_ptr, work_blk_id); BLK_SEG(bs_ptr, bn_ptr, SIZEOF(block_id)); BLK_SEG(bs_ptr, dest_parent_ptr + dest_hist_ptr->h[dest_blk_level+1].curr_rec.offset + rec_size1, dest_parent_size - dest_hist_ptr->h[dest_blk_level+1].curr_rec.offset - rec_size1); if (!BLK_FINI(bs_ptr,bs1)) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } assert(dest_parent_ptr == dest_hist_ptr->h[dest_blk_level+1].buffaddr); t_write(&dest_hist_ptr->h[dest_blk_level+1], (unsigned char *)bs1, 0, 0, dest_blk_level+1, FALSE, TRUE, GDS_WRITE_KILLTN); } } /* 4: Parent of working block, if different than destination's parent or, destination was a free block */ if (blk_was_free || gv_target->hist.h[level+1].blk_num != dest_hist_ptr->h[dest_blk_level+1].blk_num) { /* Parent block of working blk should correctly point the working block. Working block went to dest_blk_id */ GET_RSIZ(rec_size1, (work_parent_ptr + gv_target->hist.h[level+1].curr_rec.offset)); if (work_parent_size < rec_size1 + gv_target->hist.h[level+1].curr_rec.offset || BSTAR_REC_SIZE > rec_size1) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } BLK_INIT(bs_ptr, bs1); BLK_SEG(bs_ptr, work_parent_ptr + SIZEOF(blk_hdr), gv_target->hist.h[level+1].curr_rec.offset + rec_size1 - SIZEOF(blk_hdr) - SIZEOF(block_id)); BLK_ADDR(bn_ptr, SIZEOF(block_id), unsigned char); PUT_LONG(bn_ptr, dest_blk_id); BLK_SEG(bs_ptr, bn_ptr, SIZEOF(block_id)); BLK_SEG(bs_ptr, work_parent_ptr + gv_target->hist.h[level+1].curr_rec.offset + rec_size1, work_parent_size - gv_target->hist.h[level+1].curr_rec.offset - rec_size1); if (!BLK_FINI(bs_ptr, bs1)) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } assert(gv_target->hist.h[level+1].buffaddr == work_parent_ptr); t_write(&gv_target->hist.h[level+1], (unsigned char *)bs1, 0, 0, level+1, FALSE, TRUE, GDS_WRITE_KILLTN); } /* else already taken care of, when dest_blk_id moved */ if (blk_was_free) { /* A free/recycled block will become busy block. * So the local bitmap must be updated. * Local bit map block will be added in the list of update arrray for concurrency check and * also the cw_set element will be created to mark the free/recycled block as free. * kill_set_ptr will save the block which will become free. */ child1 = ROUND_DOWN2(dest_blk_id, BLKS_PER_LMAP); /* bit map block */ bmlhist.buffaddr = bmp_buff; bmlhist.blk_num = child1; child1 = dest_blk_id - child1; assert(child1); PUT_LONG(update_array_ptr, child1); /* Need to put bit maps on the end of the cw set for concurrency checking. * We want to simulate t_write_map, except we want to update "cw_map_depth" instead of "cw_set_depth". * Hence the save and restore logic (for "cw_set_depth") below. */ save_cw_set_depth = cw_set_depth; assert(!cw_map_depth); t_write_map(&bmlhist, (uchar_ptr_t)update_array_ptr, ctn, 1); /* will increment cw_set_depth */ cw_map_depth = cw_set_depth; /* set cw_map_depth to the latest cw_set_depth */ cw_set_depth = save_cw_set_depth; /* restore cw_set_depth */ /* t_write_map simulation end */ update_array_ptr += SIZEOF(block_id); child1 = 0; PUT_LONG(update_array_ptr, child1); update_array_ptr += SIZEOF(block_id); assert(1 == cw_set[cw_map_depth - 1].reference_cnt); /* 1 free block is now becoming BLK_USED in the bitmap */ /* working block will be removed */ kill_set_ptr->blk[kill_set_ptr->used].flag = 0; kill_set_ptr->blk[kill_set_ptr->used].level = 0; kill_set_ptr->blk[kill_set_ptr->used++].block = work_blk_id; } *pdest_blk_id = dest_blk_id; return cdb_sc_normal; }