void tp_get_cw (cw_set_element *cs, int depth, cw_set_element **cs1) { cw_set_element *cs_tmp; /* to avoid double dereferencing in the TRAVERSE macro */ assert (depth < sgm_info_ptr->cw_set_depth); cs_tmp = (cw_set_element *)find_element(sgm_info_ptr->cw_set_list, depth); /* Above returns the first cse (least t_level) in the horizontal list. * Traverse the horizontal list to go to the latest - * since the usual transaction depth is not much (on an average 2), this does * not hamper performance so much to necessiate maintaining links to the head * and tail of horizontal list of cw_set_elements */ assert(cs_tmp); TRAVERSE_TO_LATEST_CSE(cs_tmp); *cs1 = cs_tmp; }
void tp_incr_clean_up(short newlevel) { uint4 num_free; boolean_t freed; sgm_info *si; cw_set_element *cse, *next_cse, *tmp_cse; cw_set_element *cse_newlvl; /* pointer to that cse in a given horizontal list closest to "newlevel" */ srch_blk_status *tp_srch_status; int min_t_level; /* t_level of the head of the horizontal-list of a given cw-set-element */ gd_region *tmp_gv_cur_region; ht_ent_int4 *tabent; assert(newlevel > 0); if (JNL_FENCE_LIST_END != jnl_fence_ctl.fence_list) /* currently global_tlvl_info struct holds only jnl related info */ rollbk_gbl_tlvl_info(newlevel); tmp_gv_cur_region = gv_cur_region; /* save region and associated pointers to restore them later */ for (si = first_sgm_info; si != NULL; si = si->next_sgm_info) { num_free = 0; sgm_info_ptr = si; /* maintain sgm_info_ptr & gv_cur_region binding whenever doing TP_CHANGE_REG */ TP_CHANGE_REG_IF_NEEDED(si->gv_cur_region); rollbk_sgm_tlvl_info(newlevel, si); /* rollback all the tlvl specific info */ cse = si->first_cw_set; DEBUG_ONLY(min_t_level = 1); /* A property that will help a lot in understanding this algorithm is the following. * All cse's in a given horizontal list will have their "next_cw_set" pointing to the same cse * which is guaranteed to be the head of the horizontal list of the next cw-set-element in the vertical list. */ while (NULL != cse) { assert(NULL == cse->low_tlevel); next_cse = cse->next_cw_set; /* Note down tp_srch_status corresponding to cse (in case it exists). Need to later reset "->cse" field * of this structure to point to the new cse for this block. Note that if cse->mode is gds_t_create, * there will be no tp_srch_status entry allotted for cse->blk (one will be there only for the chain.flag * representation of this to-be-created block). Same case with mode of kill_t_create as it also corresponds * to a non-existent block#. Therefore dont try looking up the hashtable for this block in those cases. */ tp_srch_status = NULL; assert((gds_t_create == cse->mode) || (kill_t_create == cse->mode) || (gds_t_write == cse->mode) || (kill_t_write == cse->mode)); if ((gds_t_create != cse->mode) && (kill_t_create != cse->mode) && (NULL != (tabent = lookup_hashtab_int4(si->blks_in_use, (uint4 *)&cse->blk)))) tp_srch_status = tabent->value; DEBUG_ONLY( tmp_cse = cse; TRAVERSE_TO_LATEST_CSE(tmp_cse); assert((NULL == tp_srch_status) || (tp_srch_status->cse == tmp_cse)); ) if (newlevel < cse->t_level) { /* delete the entire horizontal list for this cw-set-element. * And because of the following assert, we will be deleting the entire horizontal list for * all cw-set-elements following the current one in the vertical list. */ assert(min_t_level <= cse->t_level); DEBUG_ONLY(min_t_level = cse->t_level;) if (!num_free) { /* first time an entire cw-set-element's horizontal-list needs to be removed. * reset si->first_cw_set or si->last_cw_set pointers as appropriate. * the actual free up of the cw-set-elements will occur later in this loop */ tmp_cse = cse->prev_cw_set; assert(((NULL == tmp_cse) && (cse == si->first_cw_set)) || ((NULL != tmp_cse) && (cse != si->first_cw_set))); if (cse == si->first_cw_set) si->first_cw_set = NULL; si->last_cw_set = tmp_cse; while (NULL != tmp_cse) { /* reset forward-link of horizontal-list of the previous cw_set_element */ assert(tmp_cse->next_cw_set == cse); tmp_cse->next_cw_set = NULL; tmp_cse = tmp_cse->high_tlevel; } } num_free++; /* count of number of elements whose vertical list has been completely removed */ cse_newlvl = NULL; } else
block_id bm_getfree(block_id orig_hint, boolean_t *blk_used, unsigned int cw_work, cw_set_element *cs, int *cw_depth_ptr) { cw_set_element *cs1; sm_uc_ptr_t bmp; block_id bml, hint, hint_cycled, hint_limit; block_id_ptr_t b_ptr; int cw_set_top, depth, lcnt; unsigned int local_maps, map_size, n_decrements = 0, total_blks; trans_num ctn; int4 free_bit, offset; uint4 space_needed; uint4 status; srch_blk_status blkhist; total_blks = (dba_mm == cs_data->acc_meth) ? cs_addrs->total_blks : cs_addrs->ti->total_blks; if (orig_hint >= total_blks) /* for TP, hint can be > total_blks */ orig_hint = 1; hint = orig_hint; hint_cycled = DIVIDE_ROUND_UP(total_blks, BLKS_PER_LMAP); hint_limit = DIVIDE_ROUND_DOWN(orig_hint, BLKS_PER_LMAP); local_maps = hint_cycled + 2; /* for (up to) 2 wraps */ for (lcnt = 0; lcnt <= local_maps; lcnt++) { bml = bmm_find_free(hint / BLKS_PER_LMAP, (sm_uc_ptr_t)MM_ADDR(cs_data), local_maps); if ((NO_FREE_SPACE == bml) || (bml >= hint_cycled)) { /* if no free space or might have looped to original map, extend */ if ((NO_FREE_SPACE != bml) && (hint_limit < hint_cycled)) { hint_cycled = hint_limit; hint = 1; continue; } if (SS_NORMAL != (status = gdsfilext(cs_data->extension_size, total_blks))) return (status); if (dba_mm == cs_data->acc_meth) return (FILE_EXTENDED); hint = total_blks; total_blks = cs_addrs->ti->total_blks; hint_cycled = DIVIDE_ROUND_UP(total_blks, BLKS_PER_LMAP); local_maps = hint_cycled + 2; /* for (up to) 2 wraps */ /* * note that you can make an optimization of not going back over the whole database and going over * only the extended section. but since it is very unlikely that a free block won't be found * in the extended section and the fact that we are starting from the extended section in either * approach and the fact that we have a GTMASSERT to check that we don't have a lot of * free blocks while doing an extend and the fact that it is very easy to make the change to do * a full-pass, the full-pass solution is currently being implemented */ lcnt = -1; /* allow it one extra pass to ensure that it can take advantage of the entension */ n_decrements++; /* used only for debugging purposes */ continue; } bml *= BLKS_PER_LMAP; if (ROUND_DOWN2(hint, BLKS_PER_LMAP) != bml) { /* not within requested map */ if ((bml < hint) && (hint_cycled)) /* wrap? - second one should force an extend for sure */ hint_cycled = (hint_limit < hint_cycled) ? hint_limit: 0; hint = bml + 1; /* start at beginning */ } if (ROUND_DOWN2(total_blks, BLKS_PER_LMAP) == bml) map_size = (total_blks - bml); else map_size = BLKS_PER_LMAP; if (0 != dollar_tlevel) { depth = cw_work; cw_set_top = *cw_depth_ptr; if (depth < cw_set_top) tp_get_cw(cs, cw_work, &cs1); for (; depth < cw_set_top; depth++, cs1 = cs1->next_cw_set) { /* do tp front to back because list is more efficient than tp_get_cw and forward pointers exist */ if (bml == cs1->blk) { TRAVERSE_TO_LATEST_CSE(cs1); break; } } if (depth >= cw_set_top) { assert(cw_set_top == depth); depth = 0; } } else { for (depth = *cw_depth_ptr - 1; depth >= cw_work; depth--) { /* do non-tp back to front, because of adjacency */ if (bml == (cs + depth)->blk) { cs1 = cs + depth; break; } } if (depth < cw_work) { assert(cw_work - 1 == depth); depth = 0; } } if (0 == depth) { ctn = cs_addrs->ti->curr_tn; if (!(bmp = t_qread(bml, (sm_int_ptr_t)&blkhist.cycle, &blkhist.cr))) return MAP_RD_FAIL; if ((BM_SIZE(BLKS_PER_LMAP) != ((blk_hdr_ptr_t)bmp)->bsiz) || (LCL_MAP_LEVL != ((blk_hdr_ptr_t)bmp)->levl)) { assert(CDB_STAGNATE > t_tries); rdfail_detail = cdb_sc_badbitmap; return MAP_RD_FAIL; } offset = 0; } else { bmp = cs1->old_block; b_ptr = (block_id_ptr_t)(cs1->upd_addr); b_ptr += cs1->reference_cnt - 1; offset = *b_ptr + 1; } if (offset < map_size) { free_bit = bm_find_blk(offset, (sm_uc_ptr_t)bmp + sizeof(blk_hdr), map_size, blk_used); if (MAP_RD_FAIL == free_bit) return MAP_RD_FAIL; } else free_bit = NO_FREE_SPACE; if (NO_FREE_SPACE != free_bit) break; if ((hint = bml + BLKS_PER_LMAP) >= total_blks) /* if map is full, start at 1st blk in next map */ { /* wrap - second one should force an extend for sure */ hint = 1; if (hint_cycled) hint_cycled = (hint_limit < hint_cycled) ? hint_limit: 0; } if ((0 == depth) && (FALSE != cs_addrs->now_crit)) /* if it's from the cw_set, its state is murky */ bit_clear(bml / BLKS_PER_LMAP, MM_ADDR(cs_data)); /* if crit, repair master map error */ } /* If not in the final retry, it is possible that free_bit is >= map_size (e.g. if bitmap block gets recycled). */ if (map_size <= (uint4)free_bit && CDB_STAGNATE <= t_tries) { /* bad free bit */ assert((NO_FREE_SPACE == free_bit) && (lcnt > local_maps)); /* All maps full, should have extended */ GTMASSERT; } if (0 != depth) { b_ptr = (block_id_ptr_t)(cs1->upd_addr); b_ptr += cs1->reference_cnt++; *b_ptr = free_bit; } else { space_needed = (BLKS_PER_LMAP + 1) * sizeof(block_id); if (dollar_tlevel) { ENSURE_UPDATE_ARRAY_SPACE(space_needed); /* have brackets for "if" for macros */ } BLK_ADDR(b_ptr, space_needed, block_id); memset(b_ptr, 0, space_needed); *b_ptr = free_bit; blkhist.blk_num = bml; blkhist.buffaddr = bmp; /* cycle and cr have already been assigned from t_qread */ t_write_map(&blkhist, (uchar_ptr_t)b_ptr, ctn, 1); /* last parameter 1 is what cs->reference_cnt gets set to */ } return bml + free_bit; }
void tp_incr_commit(void) { sgm_info *si; cw_set_element *cse, *orig_cse, *prev_cse, *next_cse, *low_cse, *lower_cse; tlevel_info *tli, *prev_tli = NULL, *last_prev_tli = NULL; global_tlvl_info *gtli, *prev_gtli; srch_blk_status *tp_srch_status; ht_ent_int4 *tabent; for (si = first_sgm_info; si != NULL; si = si->next_sgm_info) { for (cse = si->first_cw_set; cse; cse = orig_cse->next_cw_set) { orig_cse = cse; TRAVERSE_TO_LATEST_CSE(cse); assert(dollar_tlevel >= cse->t_level); if (dollar_tlevel == cse->t_level) { cse->t_level--; low_cse = cse->low_tlevel; if (low_cse && low_cse->t_level == cse->t_level) /* delete the duplicate link */ { lower_cse = low_cse->low_tlevel; assert((low_cse->done && low_cse->new_buff) || (n_gds_t_op < cse->mode)); if (lower_cse) { assert(lower_cse->t_level < cse->t_level); lower_cse->high_tlevel = cse; cse->low_tlevel = lower_cse; if (!cse->new_buff) { /* if we never needed to build in the new level, copy the built copy * (if any) of the older level before going back to that level */ assert(!cse->done); cse->new_buff = low_cse->new_buff; } else if (low_cse->new_buff) free_element(si->new_buff_list, (char *)low_cse->new_buff); free_element(si->tlvl_cw_set_list, (char *)low_cse); orig_cse = cse; } else { /* In this case, there are only two elements in the horizontal list out of * which we are going to delete one. We prefer to copy the second link into * the first and delete the second (rather than simply deleting the first), since * the first element may be an intermediate element in the vertical list and * buddy list wont permit use of both free_element() and free_last_n_elements() * with a given list together. This might disturb the tp_srch_status->cse, so * reset it properly. Note that if cse->mode is gds_t_create, there will be no * tp_srch_status entry allotted for cse->blk (one will be there only for the * chain.flag representation of this to-be-created block). Same case with mode of * kill_t_create as it also corresponds to a non-existent block#. Therefore dont * try looking up the hashtable for this block in those cases. */ assert((gds_t_create == cse->mode) || (kill_t_create == cse->mode) || (gds_t_write == cse->mode) || (kill_t_write == cse->mode)); if ((gds_t_create != cse->mode) && (kill_t_create != cse->mode)) { if (NULL != (tabent = lookup_hashtab_int4(si->blks_in_use, (uint4 *)&cse->blk))) tp_srch_status = tabent->value; else tp_srch_status = NULL; assert(!tp_srch_status || tp_srch_status->cse == cse); if (tp_srch_status) tp_srch_status->cse = low_cse; } assert(low_cse == orig_cse); /* Members that may not be uptodate in cse need to be copied back from low_cse. * They are next_cw_set, prev_cw_set, new_buff and done. */ prev_cse = low_cse->prev_cw_set; next_cse = low_cse->next_cw_set; if (!cse->new_buff) { /* if we never needed to build in the new level, copy the * built copy of the older level before going back to that level */ assert(!cse->done); cse->new_buff = low_cse->new_buff; } else if (low_cse->new_buff) free_element(si->new_buff_list, (char *)low_cse->new_buff); memcpy(low_cse, cse, SIZEOF(cw_set_element)); low_cse->next_cw_set = next_cse; low_cse->prev_cw_set = prev_cse; low_cse->high_tlevel = NULL; low_cse->low_tlevel = NULL; free_element(si->tlvl_cw_set_list, (char *)cse); orig_cse = low_cse; } } else assert(low_cse || orig_cse == cse); } }/* for (cse) */ /* delete the tlvl_info for this t_level */ for (tli = si->tlvl_info_head; tli; tli = tli->next_tlevel_info) { if (tli->t_level >= dollar_tlevel) break; prev_tli = tli; } assert(!tli || !tli->next_tlevel_info); if (prev_tli) prev_tli->next_tlevel_info = NULL; else si->tlvl_info_head = NULL; if (tli) free_last_n_elements(si->tlvl_info_list, 1); }/* for (si) */ /* delete the global (across all segments) tlvl info for this t_level */ for (prev_gtli = NULL, gtli = global_tlvl_info_head; gtli; gtli = gtli->next_global_tlvl_info) { if (dollar_tlevel <= gtli->t_level) break; prev_gtli = gtli; } assert(!global_tlvl_info_head || gtli); assert(!gtli || !gtli->next_global_tlvl_info); assert(!prev_gtli || (gtli && (dollar_tlevel == gtli->t_level))); FREE_GBL_TLVL_INFO(gtli); if (prev_gtli) prev_gtli->next_global_tlvl_info = NULL; else global_tlvl_info_head = NULL; }
void tp_incr_commit(void) { uint4 duint4; sgm_info *si; cw_set_element *cse, *orig_cse, *prev_cse, *next_cse, *low_cse, *lower_cse; tlevel_info *tli, *prev_tli = NULL, *last_prev_tli = NULL; global_tlvl_info *gtli, *prev_gtli; srch_blk_status *tp_srch_status; for (si = first_sgm_info; si != NULL; si = si->next_sgm_info) { for (cse = si->first_cw_set; cse; cse = orig_cse->next_cw_set) { orig_cse = cse; TRAVERSE_TO_LATEST_CSE(cse); assert(dollar_tlevel >= cse->t_level); if (dollar_tlevel == cse->t_level) { cse->t_level--; low_cse = cse->low_tlevel; if (low_cse && low_cse->t_level == cse->t_level) /* delete the duplicate link */ { lower_cse = low_cse->low_tlevel; if (lower_cse) { assert(lower_cse->t_level < cse->t_level); lower_cse->high_tlevel = cse; cse->low_tlevel = lower_cse; assert(low_cse->new_buff); if (!cse->new_buff) { /* if we never needed to build in the new level, copy the * built copy of the older level before going back to that level */ assert(!cse->done && low_cse->done); cse->new_buff = low_cse->new_buff; } else free_element(si->new_buff_list, (char *)low_cse->new_buff - sizeof(que_ent)); free_element(si->tlvl_cw_set_list, (char *)low_cse); orig_cse = cse; } else { /* In this case, there are only two elements in the horizontal list out of * which we are going to delete one. We prefer to copy the second link into * the first and delete the second (rather than simply deleting the first), since * the first element may be an intermediate element in the vertical list and * buddy list wont permit use of both free_element() and free_last_n_elements() * with a given list together. * This might disturb the tp_srch_status->ptr, so reset it properly. */ tp_srch_status = (srch_blk_status *)lookup_hashtab_ent(si->blks_in_use, (void *)cse->blk, &duint4); assert(!tp_srch_status || tp_srch_status->ptr == cse); if (tp_srch_status) tp_srch_status->ptr = low_cse; assert(low_cse == orig_cse); /* Members that may not be uptodate in cse need to be copied back from low_cse. * They are next_cw_set, prev_cw_set, new_buff and done. */ prev_cse = low_cse->prev_cw_set; next_cse = low_cse->next_cw_set; assert(low_cse->new_buff); if (!cse->new_buff) { /* if we never needed to build in the new level, copy the * built copy of the older level before going back to that level */ assert(!cse->done && low_cse->done); cse->new_buff = low_cse->new_buff; } else free_element(si->new_buff_list, (char *)low_cse->new_buff - sizeof(que_ent)); memcpy(low_cse, cse, sizeof(cw_set_element)); low_cse->next_cw_set = next_cse; low_cse->prev_cw_set = prev_cse; low_cse->high_tlevel = NULL; low_cse->low_tlevel = NULL; free_element(si->tlvl_cw_set_list, (char *)cse); orig_cse = low_cse; } } else assert(low_cse || orig_cse == cse); } }/* for (cse) */ /* delete the tlvl_info for this t_level */ for (tli = si->tlvl_info_head; tli; tli = tli->next_tlevel_info) { if (tli->t_level >= dollar_tlevel) break; prev_tli = tli; } assert(!tli || !tli->next_tlevel_info); if (prev_tli) prev_tli->next_tlevel_info = NULL; else si->tlvl_info_head = NULL; if (tli) free_last_n_elements(si->tlvl_info_list, 1); }/* for (si) */ /* delete the global (across all segments) tlvl info for this t_level */ for (prev_gtli = NULL, gtli = global_tlvl_info_head; gtli; gtli = gtli->next_global_tlvl_info) { if (dollar_tlevel <= gtli->t_level) break; prev_gtli = gtli; } assert(!global_tlvl_info_head || gtli); assert(!gtli || !gtli->next_global_tlvl_info); assert(!prev_gtli || (gtli && (dollar_tlevel == gtli->t_level))); FREE_GBL_TLVL_INFO(gtli); if (prev_gtli) prev_gtli->next_global_tlvl_info = NULL; else global_tlvl_info_head = NULL; }