void Dbtup::execTUP_DEALLOCREQ(Signal* signal) { TablerecPtr regTabPtr; FragrecordPtr regFragPtr; Uint32 frag_page_id, frag_id; jamEntry(); frag_id= signal->theData[0]; regTabPtr.i= signal->theData[1]; frag_page_id= signal->theData[2]; Uint32 page_index= signal->theData[3]; ptrCheckGuard(regTabPtr, cnoOfTablerec, tablerec); getFragmentrec(regFragPtr, frag_id, regTabPtr.p); ndbassert(regFragPtr.p != NULL); if (! Local_key::isInvalid(frag_page_id, page_index)) { Local_key tmp; tmp.m_page_no= getRealpid(regFragPtr.p, frag_page_id); tmp.m_page_idx= page_index; PagePtr pagePtr; Tuple_header* ptr= (Tuple_header*)get_ptr(&pagePtr, &tmp, regTabPtr.p); ndbrequire(ptr->m_header_bits & Tuple_header::FREED); if (regTabPtr.p->m_attributes[MM].m_no_of_varsize + regTabPtr.p->m_attributes[MM].m_no_of_dynamic) { jam(); free_var_rec(regFragPtr.p, regTabPtr.p, &tmp, pagePtr); } else { free_fix_rec(regFragPtr.p, regTabPtr.p, &tmp, (Fix_page*)pagePtr.p); } } }
bool Dbtup::scanNext(Signal* signal, ScanOpPtr scanPtr) { ScanOp& scan = *scanPtr.p; ScanPos& pos = scan.m_scanPos; Local_key& key = pos.m_key; const Uint32 bits = scan.m_bits; // table TablerecPtr tablePtr; tablePtr.i = scan.m_tableId; ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec); Tablerec& table = *tablePtr.p; // fragment FragrecordPtr fragPtr; fragPtr.i = scan.m_fragPtrI; ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord); Fragrecord& frag = *fragPtr.p; // tuple found Tuple_header* th = 0; Uint32 thbits = 0; Uint32 loop_count = 0; Uint32 scanGCI = scanPtr.p->m_scanGCI; Uint32 foundGCI; const bool mm = (bits & ScanOp::SCAN_DD); const bool lcp = (bits & ScanOp::SCAN_LCP); Uint32 lcp_list = fragPtr.p->m_lcp_keep_list; Uint32 size = table.m_offsets[mm].m_fix_header_size; if (lcp && lcp_list != RNIL) goto found_lcp_keep; switch(pos.m_get){ case ScanPos::Get_next_tuple: case ScanPos::Get_next_tuple_fs: jam(); key.m_page_idx += size; // fall through case ScanPos::Get_tuple: case ScanPos::Get_tuple_fs: jam(); /** * We need to refetch page after timeslice */ pos.m_get = ScanPos::Get_page; break; default: break; } while (true) { switch (pos.m_get) { case ScanPos::Get_next_page: // move to next page jam(); { if (! (bits & ScanOp::SCAN_DD)) pos.m_get = ScanPos::Get_next_page_mm; else pos.m_get = ScanPos::Get_next_page_dd; } continue; case ScanPos::Get_page: // get real page jam(); { if (! (bits & ScanOp::SCAN_DD)) pos.m_get = ScanPos::Get_page_mm; else pos.m_get = ScanPos::Get_page_dd; } continue; case ScanPos::Get_next_page_mm: // move to next logical TUP page jam(); { key.m_page_no++; if (key.m_page_no >= frag.noOfPages) { jam(); if ((bits & ScanOp::SCAN_NR) && (scan.m_endPage != RNIL)) { jam(); if (key.m_page_no < scan.m_endPage) { jam(); ndbout_c("scanning page %u", key.m_page_no); goto cont; } } // no more pages, scan ends pos.m_get = ScanPos::Get_undef; scan.m_state = ScanOp::Last; return true; } cont: key.m_page_idx = 0; pos.m_get = ScanPos::Get_page_mm; // clear cached value pos.m_realpid_mm = RNIL; } /*FALLTHRU*/ case ScanPos::Get_page_mm: // get TUP real page jam(); { if (pos.m_realpid_mm == RNIL) { jam(); if (key.m_page_no < frag.noOfPages) pos.m_realpid_mm = getRealpid(fragPtr.p, key.m_page_no); else { ndbassert(bits & ScanOp::SCAN_NR); goto nopage; } } PagePtr pagePtr; c_page_pool.getPtr(pagePtr, pos.m_realpid_mm); if (pagePtr.p->page_state == ZEMPTY_MM) { // skip empty page jam(); if (! (bits & ScanOp::SCAN_NR)) { pos.m_get = ScanPos::Get_next_page_mm; break; // incr loop count } else { jam(); pos.m_realpid_mm = RNIL; } } nopage: pos.m_page = pagePtr.p; pos.m_get = ScanPos::Get_tuple; } continue; case ScanPos::Get_next_page_dd: // move to next disk page jam(); { Disk_alloc_info& alloc = frag.m_disk_alloc_info; Local_fragment_extent_list list(c_extent_pool, alloc.m_extent_list); Ptr<Extent_info> ext_ptr; c_extent_pool.getPtr(ext_ptr, pos.m_extent_info_ptr_i); Extent_info* ext = ext_ptr.p; key.m_page_no++; if (key.m_page_no >= ext->m_first_page_no + alloc.m_extent_size) { // no more pages in this extent jam(); if (! list.next(ext_ptr)) { // no more extents, scan ends jam(); pos.m_get = ScanPos::Get_undef; scan.m_state = ScanOp::Last; return true; } else { // move to next extent jam(); pos.m_extent_info_ptr_i = ext_ptr.i; ext = c_extent_pool.getPtr(pos.m_extent_info_ptr_i); key.m_file_no = ext->m_key.m_file_no; key.m_page_no = ext->m_first_page_no; } } key.m_page_idx = 0; pos.m_get = ScanPos::Get_page_dd; /* read ahead for scan in disk order do read ahead every 8:th page */ if ((bits & ScanOp::SCAN_DD) && (((key.m_page_no - ext->m_first_page_no) & 7) == 0)) { jam(); // initialize PGMAN request Page_cache_client::Request preq; preq.m_page = pos.m_key; preq.m_callback = TheNULLCallback; // set maximum read ahead Uint32 read_ahead = m_max_page_read_ahead; while (true) { // prepare page read ahead in current extent Uint32 page_no = preq.m_page.m_page_no; Uint32 page_no_limit = page_no + read_ahead; Uint32 limit = ext->m_first_page_no + alloc.m_extent_size; if (page_no_limit > limit) { jam(); // read ahead crosses extent, set limit for this extent read_ahead = page_no_limit - limit; page_no_limit = limit; // and make sure we only read one extra extent next time around if (read_ahead > alloc.m_extent_size) read_ahead = alloc.m_extent_size; } else { jam(); read_ahead = 0; // no more to read ahead after this } // do read ahead pages for this extent while (page_no < page_no_limit) { // page request to PGMAN jam(); preq.m_page.m_page_no = page_no; int flags = 0; // ignore result m_pgman.get_page(signal, preq, flags); jamEntry(); page_no++; } if (!read_ahead || !list.next(ext_ptr)) { // no more extents after this or read ahead done jam(); break; } // move to next extent and initialize PGMAN request accordingly Extent_info* ext = c_extent_pool.getPtr(ext_ptr.i); preq.m_page.m_file_no = ext->m_key.m_file_no; preq.m_page.m_page_no = ext->m_first_page_no; } } // if ScanOp::SCAN_DD read ahead } /*FALLTHRU*/ case ScanPos::Get_page_dd: // get global page in PGMAN cache jam(); { // check if page is un-allocated or empty if (likely(! (bits & ScanOp::SCAN_NR))) { Tablespace_client tsman(signal, c_tsman, frag.fragTableId, frag.fragmentId, frag.m_tablespace_id); unsigned uncommitted, committed; uncommitted = committed = ~(unsigned)0; int ret = tsman.get_page_free_bits(&key, &uncommitted, &committed); ndbrequire(ret == 0); if (committed == 0 && uncommitted == 0) { // skip empty page jam(); pos.m_get = ScanPos::Get_next_page_dd; break; // incr loop count } } // page request to PGMAN Page_cache_client::Request preq; preq.m_page = pos.m_key; preq.m_callback.m_callbackData = scanPtr.i; preq.m_callback.m_callbackFunction = safe_cast(&Dbtup::disk_page_tup_scan_callback); int flags = 0; int res = m_pgman.get_page(signal, preq, flags); jamEntry(); if (res == 0) { jam(); // request queued pos.m_get = ScanPos::Get_tuple; return false; } ndbrequire(res > 0); pos.m_page = (Page*)m_pgman.m_ptr.p; } pos.m_get = ScanPos::Get_tuple; continue; // get tuple // move to next tuple case ScanPos::Get_next_tuple: case ScanPos::Get_next_tuple_fs: // move to next fixed size tuple jam(); { key.m_page_idx += size; pos.m_get = ScanPos::Get_tuple_fs; } /*FALLTHRU*/ case ScanPos::Get_tuple: case ScanPos::Get_tuple_fs: // get fixed size tuple jam(); { Fix_page* page = (Fix_page*)pos.m_page; if (key.m_page_idx + size <= Fix_page::DATA_WORDS) { pos.m_get = ScanPos::Get_next_tuple_fs; th = (Tuple_header*)&page->m_data[key.m_page_idx]; if (likely(! (bits & ScanOp::SCAN_NR))) { jam(); thbits = th->m_header_bits; if (! (thbits & Tuple_header::FREE)) { goto found_tuple; } } else { if (pos.m_realpid_mm == RNIL) { jam(); foundGCI = 0; goto found_deleted_rowid; } thbits = th->m_header_bits; if ((foundGCI = *th->get_mm_gci(tablePtr.p)) > scanGCI || foundGCI == 0) { if (! (thbits & Tuple_header::FREE)) { jam(); goto found_tuple; } else { goto found_deleted_rowid; } } else if (thbits != Fix_page::FREE_RECORD && th->m_operation_ptr_i != RNIL) { jam(); goto found_tuple; // Locked tuple... // skip free tuple } } } else { jam(); // no more tuples on this page pos.m_get = ScanPos::Get_next_page; } } break; // incr loop count found_tuple: // found possible tuple to return jam(); { // caller has already set pos.m_get to next tuple if (! (bits & ScanOp::SCAN_LCP && thbits & Tuple_header::LCP_SKIP)) { Local_key& key_mm = pos.m_key_mm; if (! (bits & ScanOp::SCAN_DD)) { key_mm = pos.m_key; // real page id is already set } else { key_mm.assref(th->m_base_record_ref); // recompute for each disk tuple pos.m_realpid_mm = getRealpid(fragPtr.p, key_mm.m_page_no); } // TUPKEYREQ handles savepoint stuff scan.m_state = ScanOp::Current; return true; } else { jam(); // clear it so that it will show up in next LCP th->m_header_bits = thbits & ~(Uint32)Tuple_header::LCP_SKIP; if (tablePtr.p->m_bits & Tablerec::TR_Checksum) { jam(); setChecksum(th, tablePtr.p); } } } break; found_deleted_rowid: jam(); { ndbassert(bits & ScanOp::SCAN_NR); Local_key& key_mm = pos.m_key_mm; if (! (bits & ScanOp::SCAN_DD)) { key_mm = pos.m_key; // caller has already set pos.m_get to next tuple // real page id is already set } else { key_mm.assref(th->m_base_record_ref); // recompute for each disk tuple pos.m_realpid_mm = getRealpid(fragPtr.p, key_mm.m_page_no); Fix_page *mmpage = (Fix_page*)c_page_pool.getPtr(pos.m_realpid_mm); th = (Tuple_header*)(mmpage->m_data + key_mm.m_page_idx); if ((foundGCI = *th->get_mm_gci(tablePtr.p)) > scanGCI || foundGCI == 0) { if (! (thbits & Tuple_header::FREE)) break; } } NextScanConf* const conf = (NextScanConf*)signal->getDataPtrSend(); conf->scanPtr = scan.m_userPtr; conf->accOperationPtr = RNIL; conf->fragId = frag.fragmentId; conf->localKey[0] = pos.m_key_mm.ref(); conf->localKey[1] = 0; conf->localKeyLength = 1; conf->gci = foundGCI; Uint32 blockNo = refToBlock(scan.m_userRef); EXECUTE_DIRECT(blockNo, GSN_NEXT_SCANCONF, signal, 7); jamEntry(); // TUPKEYREQ handles savepoint stuff loop_count = 32; scan.m_state = ScanOp::Next; return false; } break; // incr loop count default: ndbrequire(false); break; } if (++loop_count >= 32) break; } // TODO: at drop table we have to flush and terminate these jam(); signal->theData[0] = ZTUP_SCAN; signal->theData[1] = scanPtr.i; sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB); return false; found_lcp_keep: Local_key tmp; tmp.assref(lcp_list); tmp.m_page_no = getRealpid(fragPtr.p, tmp.m_page_no); Ptr<Page> pagePtr; c_page_pool.getPtr(pagePtr, tmp.m_page_no); Tuple_header* ptr = (Tuple_header*) ((Fix_page*)pagePtr.p)->get_ptr(tmp.m_page_idx, 0); Uint32 headerbits = ptr->m_header_bits; ndbrequire(headerbits & Tuple_header::LCP_KEEP); Uint32 next = ptr->m_operation_ptr_i; ptr->m_operation_ptr_i = RNIL; ptr->m_header_bits = headerbits & ~(Uint32)Tuple_header::FREE; if (tablePtr.p->m_bits & Tablerec::TR_Checksum) { jam(); setChecksum(ptr, tablePtr.p); } NextScanConf* const conf = (NextScanConf*)signal->getDataPtrSend(); conf->scanPtr = scan.m_userPtr; conf->accOperationPtr = (Uint32)-1; conf->fragId = frag.fragmentId; conf->localKey[0] = lcp_list; conf->localKey[1] = 0; conf->localKeyLength = 1; conf->gci = 0; Uint32 blockNo = refToBlock(scan.m_userRef); EXECUTE_DIRECT(blockNo, GSN_NEXT_SCANCONF, signal, 7); fragPtr.p->m_lcp_keep_list = next; ptr->m_header_bits |= Tuple_header::FREED; // RESTORE free flag if (headerbits & Tuple_header::FREED) { if (tablePtr.p->m_attributes[MM].m_no_of_varsize) { jam(); free_var_rec(fragPtr.p, tablePtr.p, &tmp, pagePtr); } else { jam(); free_fix_rec(fragPtr.p, tablePtr.p, &tmp, (Fix_page*)pagePtr.p); } } return false; }