void Dbtup::tuxFreeNode(Uint32 fragPtrI, Uint32 pageId, Uint32 pageOffset, Uint32* node) { jamEntry(); FragrecordPtr fragPtr; fragPtr.i= fragPtrI; ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord); TablerecPtr tablePtr; tablePtr.i= fragPtr.p->fragTableId; ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec); Local_key key; key.m_page_no = pageId; key.m_page_idx = pageOffset; PagePtr pagePtr; Tuple_header* ptr = (Tuple_header*)get_ptr(&pagePtr, &key, tablePtr.p); Uint32 attrDescIndex= tablePtr.p->tabDescriptor + (0 << ZAD_LOG_SIZE); Uint32 attrDataOffset= AttributeOffset::getOffset(tableDescriptor[attrDescIndex + 1].tabDescr); ndbrequire(node == (Uint32*)ptr + attrDataOffset); free_fix_rec(fragPtr.p, tablePtr.p, &key, (Fix_page*)pagePtr.p); }
/* Allocator for variable sized segments Part of the external interface for variable sized segments This method is used to allocate and free variable sized tuples and parts of tuples. This part can be used to implement variable sized attributes without wasting memory. It can be used to support small BLOB's attached to the record. It can also be used to support adding and dropping attributes without the need to copy the entire table. SYNOPSIS fragPtr A pointer to the fragment description tabPtr A pointer to the table description alloc_size Size of the allocated record signal The signal object to be used if a signal needs to be sent RETURN VALUES Returns true if allocation was successful otherwise false page_offset Page offset of allocated record page_index Page index of allocated record page_ptr The i and p value of the page where the record was allocated */ Uint32* Dbtup::alloc_var_rec(Fragrecord* fragPtr, Tablerec* tabPtr, Uint32 alloc_size, Local_key* key, Uint32 * out_frag_page_id) { /** * TODO alloc fix+var part */ Uint32 *ptr = alloc_fix_rec(fragPtr, tabPtr, key, out_frag_page_id); if (unlikely(ptr == 0)) { return 0; } ndbassert(alloc_size >= tabPtr->m_offsets[MM].m_fix_header_size); alloc_size -= tabPtr->m_offsets[MM].m_fix_header_size; Local_key varref; if (likely(alloc_var_part(fragPtr, tabPtr, alloc_size, &varref) != 0)) { Tuple_header* tuple = (Tuple_header*)ptr; Var_part_ref* dst = tuple->get_var_part_ref_ptr(tabPtr); dst->assign(&varref); return ptr; } PagePtr pagePtr; c_page_pool.getPtr(pagePtr, key->m_page_no); free_fix_rec(fragPtr, tabPtr, key, (Fix_page*)pagePtr.p); return 0; }
void Dbtup::execTUP_DEALLOCREQ(Signal* signal) { TablerecPtr regTabPtr; FragrecordPtr regFragPtr; Uint32 frag_page_id, frag_id; jamEntry(); frag_id= signal->theData[0]; regTabPtr.i= signal->theData[1]; frag_page_id= signal->theData[2]; Uint32 page_index= signal->theData[3]; ptrCheckGuard(regTabPtr, cnoOfTablerec, tablerec); getFragmentrec(regFragPtr, frag_id, regTabPtr.p); ndbassert(regFragPtr.p != NULL); if (! Local_key::isInvalid(frag_page_id, page_index)) { Local_key tmp; tmp.m_page_no= getRealpid(regFragPtr.p, frag_page_id); tmp.m_page_idx= page_index; PagePtr pagePtr; Tuple_header* ptr= (Tuple_header*)get_ptr(&pagePtr, &tmp, regTabPtr.p); ndbrequire(ptr->m_header_bits & Tuple_header::FREED); if (regTabPtr.p->m_attributes[MM].m_no_of_varsize + regTabPtr.p->m_attributes[MM].m_no_of_dynamic) { jam(); free_var_rec(regFragPtr.p, regTabPtr.p, &tmp, pagePtr); } else { free_fix_rec(regFragPtr.p, regTabPtr.p, &tmp, (Fix_page*)pagePtr.p); } } }
/* Deallocator for variable sized segments Part of the external interface for variable sized segments SYNOPSIS fragPtr A pointer to the fragment description tabPtr A pointer to the table description signal The signal object to be used if a signal needs to be sent page_ptr A reference to the page of the variable sized segment free_page_index Page index on page of variable sized segment which is freed RETURN VALUES Returns true if deallocation was successful otherwise false */ void Dbtup::free_var_rec(Fragrecord* fragPtr, Tablerec* tabPtr, Local_key* key, Ptr<Page> pagePtr) { /** * TODO free fix + var part */ Uint32 *ptr = ((Fix_page*)pagePtr.p)->get_ptr(key->m_page_idx, 0); Tuple_header* tuple = (Tuple_header*)ptr; Local_key ref; Var_part_ref * varref = tuple->get_var_part_ref_ptr(tabPtr); varref->copyout(&ref); free_fix_rec(fragPtr, tabPtr, key, (Fix_page*)pagePtr.p); c_page_pool.getPtr(pagePtr, ref.m_page_no); ((Var_page*)pagePtr.p)->free_record(ref.m_page_idx, Var_page::CHAIN); ndbassert(pagePtr.p->free_space <= Var_page::DATA_WORDS); if (pagePtr.p->free_space == Var_page::DATA_WORDS - 1) { jam(); /* This code could be used when we release pages. remove_free_page(signal,fragPtr,page_header,page_header->list_index); return_empty_page(fragPtr, page_header); */ update_free_page_list(fragPtr, pagePtr); } else { jam(); update_free_page_list(fragPtr, pagePtr); } return; }
bool Dbtup::scanNext(Signal* signal, ScanOpPtr scanPtr) { ScanOp& scan = *scanPtr.p; ScanPos& pos = scan.m_scanPos; Local_key& key = pos.m_key; const Uint32 bits = scan.m_bits; // table TablerecPtr tablePtr; tablePtr.i = scan.m_tableId; ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec); Tablerec& table = *tablePtr.p; // fragment FragrecordPtr fragPtr; fragPtr.i = scan.m_fragPtrI; ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord); Fragrecord& frag = *fragPtr.p; // tuple found Tuple_header* th = 0; Uint32 thbits = 0; Uint32 loop_count = 0; Uint32 scanGCI = scanPtr.p->m_scanGCI; Uint32 foundGCI; const bool mm = (bits & ScanOp::SCAN_DD); const bool lcp = (bits & ScanOp::SCAN_LCP); Uint32 lcp_list = fragPtr.p->m_lcp_keep_list; Uint32 size = table.m_offsets[mm].m_fix_header_size; if (lcp && lcp_list != RNIL) goto found_lcp_keep; switch(pos.m_get){ case ScanPos::Get_next_tuple: case ScanPos::Get_next_tuple_fs: jam(); key.m_page_idx += size; // fall through case ScanPos::Get_tuple: case ScanPos::Get_tuple_fs: jam(); /** * We need to refetch page after timeslice */ pos.m_get = ScanPos::Get_page; break; default: break; } while (true) { switch (pos.m_get) { case ScanPos::Get_next_page: // move to next page jam(); { if (! (bits & ScanOp::SCAN_DD)) pos.m_get = ScanPos::Get_next_page_mm; else pos.m_get = ScanPos::Get_next_page_dd; } continue; case ScanPos::Get_page: // get real page jam(); { if (! (bits & ScanOp::SCAN_DD)) pos.m_get = ScanPos::Get_page_mm; else pos.m_get = ScanPos::Get_page_dd; } continue; case ScanPos::Get_next_page_mm: // move to next logical TUP page jam(); { key.m_page_no++; if (key.m_page_no >= frag.noOfPages) { jam(); if ((bits & ScanOp::SCAN_NR) && (scan.m_endPage != RNIL)) { jam(); if (key.m_page_no < scan.m_endPage) { jam(); ndbout_c("scanning page %u", key.m_page_no); goto cont; } } // no more pages, scan ends pos.m_get = ScanPos::Get_undef; scan.m_state = ScanOp::Last; return true; } cont: key.m_page_idx = 0; pos.m_get = ScanPos::Get_page_mm; // clear cached value pos.m_realpid_mm = RNIL; } /*FALLTHRU*/ case ScanPos::Get_page_mm: // get TUP real page jam(); { if (pos.m_realpid_mm == RNIL) { jam(); if (key.m_page_no < frag.noOfPages) pos.m_realpid_mm = getRealpid(fragPtr.p, key.m_page_no); else { ndbassert(bits & ScanOp::SCAN_NR); goto nopage; } } PagePtr pagePtr; c_page_pool.getPtr(pagePtr, pos.m_realpid_mm); if (pagePtr.p->page_state == ZEMPTY_MM) { // skip empty page jam(); if (! (bits & ScanOp::SCAN_NR)) { pos.m_get = ScanPos::Get_next_page_mm; break; // incr loop count } else { jam(); pos.m_realpid_mm = RNIL; } } nopage: pos.m_page = pagePtr.p; pos.m_get = ScanPos::Get_tuple; } continue; case ScanPos::Get_next_page_dd: // move to next disk page jam(); { Disk_alloc_info& alloc = frag.m_disk_alloc_info; Local_fragment_extent_list list(c_extent_pool, alloc.m_extent_list); Ptr<Extent_info> ext_ptr; c_extent_pool.getPtr(ext_ptr, pos.m_extent_info_ptr_i); Extent_info* ext = ext_ptr.p; key.m_page_no++; if (key.m_page_no >= ext->m_first_page_no + alloc.m_extent_size) { // no more pages in this extent jam(); if (! list.next(ext_ptr)) { // no more extents, scan ends jam(); pos.m_get = ScanPos::Get_undef; scan.m_state = ScanOp::Last; return true; } else { // move to next extent jam(); pos.m_extent_info_ptr_i = ext_ptr.i; ext = c_extent_pool.getPtr(pos.m_extent_info_ptr_i); key.m_file_no = ext->m_key.m_file_no; key.m_page_no = ext->m_first_page_no; } } key.m_page_idx = 0; pos.m_get = ScanPos::Get_page_dd; /* read ahead for scan in disk order do read ahead every 8:th page */ if ((bits & ScanOp::SCAN_DD) && (((key.m_page_no - ext->m_first_page_no) & 7) == 0)) { jam(); // initialize PGMAN request Page_cache_client::Request preq; preq.m_page = pos.m_key; preq.m_callback = TheNULLCallback; // set maximum read ahead Uint32 read_ahead = m_max_page_read_ahead; while (true) { // prepare page read ahead in current extent Uint32 page_no = preq.m_page.m_page_no; Uint32 page_no_limit = page_no + read_ahead; Uint32 limit = ext->m_first_page_no + alloc.m_extent_size; if (page_no_limit > limit) { jam(); // read ahead crosses extent, set limit for this extent read_ahead = page_no_limit - limit; page_no_limit = limit; // and make sure we only read one extra extent next time around if (read_ahead > alloc.m_extent_size) read_ahead = alloc.m_extent_size; } else { jam(); read_ahead = 0; // no more to read ahead after this } // do read ahead pages for this extent while (page_no < page_no_limit) { // page request to PGMAN jam(); preq.m_page.m_page_no = page_no; int flags = 0; // ignore result m_pgman.get_page(signal, preq, flags); jamEntry(); page_no++; } if (!read_ahead || !list.next(ext_ptr)) { // no more extents after this or read ahead done jam(); break; } // move to next extent and initialize PGMAN request accordingly Extent_info* ext = c_extent_pool.getPtr(ext_ptr.i); preq.m_page.m_file_no = ext->m_key.m_file_no; preq.m_page.m_page_no = ext->m_first_page_no; } } // if ScanOp::SCAN_DD read ahead } /*FALLTHRU*/ case ScanPos::Get_page_dd: // get global page in PGMAN cache jam(); { // check if page is un-allocated or empty if (likely(! (bits & ScanOp::SCAN_NR))) { Tablespace_client tsman(signal, c_tsman, frag.fragTableId, frag.fragmentId, frag.m_tablespace_id); unsigned uncommitted, committed; uncommitted = committed = ~(unsigned)0; int ret = tsman.get_page_free_bits(&key, &uncommitted, &committed); ndbrequire(ret == 0); if (committed == 0 && uncommitted == 0) { // skip empty page jam(); pos.m_get = ScanPos::Get_next_page_dd; break; // incr loop count } } // page request to PGMAN Page_cache_client::Request preq; preq.m_page = pos.m_key; preq.m_callback.m_callbackData = scanPtr.i; preq.m_callback.m_callbackFunction = safe_cast(&Dbtup::disk_page_tup_scan_callback); int flags = 0; int res = m_pgman.get_page(signal, preq, flags); jamEntry(); if (res == 0) { jam(); // request queued pos.m_get = ScanPos::Get_tuple; return false; } ndbrequire(res > 0); pos.m_page = (Page*)m_pgman.m_ptr.p; } pos.m_get = ScanPos::Get_tuple; continue; // get tuple // move to next tuple case ScanPos::Get_next_tuple: case ScanPos::Get_next_tuple_fs: // move to next fixed size tuple jam(); { key.m_page_idx += size; pos.m_get = ScanPos::Get_tuple_fs; } /*FALLTHRU*/ case ScanPos::Get_tuple: case ScanPos::Get_tuple_fs: // get fixed size tuple jam(); { Fix_page* page = (Fix_page*)pos.m_page; if (key.m_page_idx + size <= Fix_page::DATA_WORDS) { pos.m_get = ScanPos::Get_next_tuple_fs; th = (Tuple_header*)&page->m_data[key.m_page_idx]; if (likely(! (bits & ScanOp::SCAN_NR))) { jam(); thbits = th->m_header_bits; if (! (thbits & Tuple_header::FREE)) { goto found_tuple; } } else { if (pos.m_realpid_mm == RNIL) { jam(); foundGCI = 0; goto found_deleted_rowid; } thbits = th->m_header_bits; if ((foundGCI = *th->get_mm_gci(tablePtr.p)) > scanGCI || foundGCI == 0) { if (! (thbits & Tuple_header::FREE)) { jam(); goto found_tuple; } else { goto found_deleted_rowid; } } else if (thbits != Fix_page::FREE_RECORD && th->m_operation_ptr_i != RNIL) { jam(); goto found_tuple; // Locked tuple... // skip free tuple } } } else { jam(); // no more tuples on this page pos.m_get = ScanPos::Get_next_page; } } break; // incr loop count found_tuple: // found possible tuple to return jam(); { // caller has already set pos.m_get to next tuple if (! (bits & ScanOp::SCAN_LCP && thbits & Tuple_header::LCP_SKIP)) { Local_key& key_mm = pos.m_key_mm; if (! (bits & ScanOp::SCAN_DD)) { key_mm = pos.m_key; // real page id is already set } else { key_mm.assref(th->m_base_record_ref); // recompute for each disk tuple pos.m_realpid_mm = getRealpid(fragPtr.p, key_mm.m_page_no); } // TUPKEYREQ handles savepoint stuff scan.m_state = ScanOp::Current; return true; } else { jam(); // clear it so that it will show up in next LCP th->m_header_bits = thbits & ~(Uint32)Tuple_header::LCP_SKIP; if (tablePtr.p->m_bits & Tablerec::TR_Checksum) { jam(); setChecksum(th, tablePtr.p); } } } break; found_deleted_rowid: jam(); { ndbassert(bits & ScanOp::SCAN_NR); Local_key& key_mm = pos.m_key_mm; if (! (bits & ScanOp::SCAN_DD)) { key_mm = pos.m_key; // caller has already set pos.m_get to next tuple // real page id is already set } else { key_mm.assref(th->m_base_record_ref); // recompute for each disk tuple pos.m_realpid_mm = getRealpid(fragPtr.p, key_mm.m_page_no); Fix_page *mmpage = (Fix_page*)c_page_pool.getPtr(pos.m_realpid_mm); th = (Tuple_header*)(mmpage->m_data + key_mm.m_page_idx); if ((foundGCI = *th->get_mm_gci(tablePtr.p)) > scanGCI || foundGCI == 0) { if (! (thbits & Tuple_header::FREE)) break; } } NextScanConf* const conf = (NextScanConf*)signal->getDataPtrSend(); conf->scanPtr = scan.m_userPtr; conf->accOperationPtr = RNIL; conf->fragId = frag.fragmentId; conf->localKey[0] = pos.m_key_mm.ref(); conf->localKey[1] = 0; conf->localKeyLength = 1; conf->gci = foundGCI; Uint32 blockNo = refToBlock(scan.m_userRef); EXECUTE_DIRECT(blockNo, GSN_NEXT_SCANCONF, signal, 7); jamEntry(); // TUPKEYREQ handles savepoint stuff loop_count = 32; scan.m_state = ScanOp::Next; return false; } break; // incr loop count default: ndbrequire(false); break; } if (++loop_count >= 32) break; } // TODO: at drop table we have to flush and terminate these jam(); signal->theData[0] = ZTUP_SCAN; signal->theData[1] = scanPtr.i; sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB); return false; found_lcp_keep: Local_key tmp; tmp.assref(lcp_list); tmp.m_page_no = getRealpid(fragPtr.p, tmp.m_page_no); Ptr<Page> pagePtr; c_page_pool.getPtr(pagePtr, tmp.m_page_no); Tuple_header* ptr = (Tuple_header*) ((Fix_page*)pagePtr.p)->get_ptr(tmp.m_page_idx, 0); Uint32 headerbits = ptr->m_header_bits; ndbrequire(headerbits & Tuple_header::LCP_KEEP); Uint32 next = ptr->m_operation_ptr_i; ptr->m_operation_ptr_i = RNIL; ptr->m_header_bits = headerbits & ~(Uint32)Tuple_header::FREE; if (tablePtr.p->m_bits & Tablerec::TR_Checksum) { jam(); setChecksum(ptr, tablePtr.p); } NextScanConf* const conf = (NextScanConf*)signal->getDataPtrSend(); conf->scanPtr = scan.m_userPtr; conf->accOperationPtr = (Uint32)-1; conf->fragId = frag.fragmentId; conf->localKey[0] = lcp_list; conf->localKey[1] = 0; conf->localKeyLength = 1; conf->gci = 0; Uint32 blockNo = refToBlock(scan.m_userRef); EXECUTE_DIRECT(blockNo, GSN_NEXT_SCANCONF, signal, 7); fragPtr.p->m_lcp_keep_list = next; ptr->m_header_bits |= Tuple_header::FREED; // RESTORE free flag if (headerbits & Tuple_header::FREED) { if (tablePtr.p->m_attributes[MM].m_no_of_varsize) { jam(); free_var_rec(fragPtr.p, tablePtr.p, &tmp, pagePtr); } else { jam(); free_fix_rec(fragPtr.p, tablePtr.p, &tmp, (Fix_page*)pagePtr.p); } } return false; }