Esempio n. 1
0
void
Dbtup::tuxFreeNode(Uint32 fragPtrI,
                   Uint32 pageId,
                   Uint32 pageOffset,
                   Uint32* node)
{
  jamEntry();
  FragrecordPtr fragPtr;
  fragPtr.i= fragPtrI;
  ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord);
  TablerecPtr tablePtr;
  tablePtr.i= fragPtr.p->fragTableId;
  ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec);

  Local_key key;
  key.m_page_no = pageId;
  key.m_page_idx = pageOffset;
  PagePtr pagePtr;
  Tuple_header* ptr = (Tuple_header*)get_ptr(&pagePtr, &key, tablePtr.p);

  Uint32 attrDescIndex= tablePtr.p->tabDescriptor + (0 << ZAD_LOG_SIZE);
  Uint32 attrDataOffset= AttributeOffset::getOffset(tableDescriptor[attrDescIndex + 1].tabDescr);
  ndbrequire(node == (Uint32*)ptr + attrDataOffset);

  free_fix_rec(fragPtr.p, tablePtr.p, &key, (Fix_page*)pagePtr.p);
}
Esempio n. 2
0
/*
  Allocator for variable sized segments
  Part of the external interface for variable sized segments

  This method is used to allocate and free variable sized tuples and
  parts of tuples. This part can be used to implement variable sized
  attributes without wasting memory. It can be used to support small
  BLOB's attached to the record. It can also be used to support adding
  and dropping attributes without the need to copy the entire table.

  SYNOPSIS
    fragPtr         A pointer to the fragment description
    tabPtr          A pointer to the table description
    alloc_size       Size of the allocated record
    signal           The signal object to be used if a signal needs to
                     be sent
  RETURN VALUES
    Returns true if allocation was successful otherwise false

    page_offset      Page offset of allocated record
    page_index       Page index of allocated record
    page_ptr         The i and p value of the page where the record was
                     allocated
*/
Uint32* Dbtup::alloc_var_rec(Fragrecord* fragPtr,
			     Tablerec* tabPtr,
			     Uint32 alloc_size,
			     Local_key* key,
			     Uint32 * out_frag_page_id)
{
  /**
   * TODO alloc fix+var part
   */
  Uint32 *ptr = alloc_fix_rec(fragPtr, tabPtr, key, out_frag_page_id);
  if (unlikely(ptr == 0))
  {
    return 0;
  }

  ndbassert(alloc_size >= tabPtr->m_offsets[MM].m_fix_header_size);
  
  alloc_size -= tabPtr->m_offsets[MM].m_fix_header_size;

  
  Local_key varref;
  if (likely(alloc_var_part(fragPtr, tabPtr, alloc_size, &varref) != 0))
  {
    Tuple_header* tuple = (Tuple_header*)ptr;
    Var_part_ref* dst = tuple->get_var_part_ref_ptr(tabPtr);
    dst->assign(&varref);
    return ptr;
  }
  
  PagePtr pagePtr;
  c_page_pool.getPtr(pagePtr, key->m_page_no);
  free_fix_rec(fragPtr, tabPtr, key, (Fix_page*)pagePtr.p);
  return 0;
}
void Dbtup::execTUP_DEALLOCREQ(Signal* signal)
{
  TablerecPtr regTabPtr;
  FragrecordPtr regFragPtr;
  Uint32 frag_page_id, frag_id;

  jamEntry();

  frag_id= signal->theData[0];
  regTabPtr.i= signal->theData[1];
  frag_page_id= signal->theData[2];
  Uint32 page_index= signal->theData[3];

  ptrCheckGuard(regTabPtr, cnoOfTablerec, tablerec);
  
  getFragmentrec(regFragPtr, frag_id, regTabPtr.p);
  ndbassert(regFragPtr.p != NULL);
  
  if (! Local_key::isInvalid(frag_page_id, page_index))
  {
    Local_key tmp;
    tmp.m_page_no= getRealpid(regFragPtr.p, frag_page_id); 
    tmp.m_page_idx= page_index;
    
    PagePtr pagePtr;
    Tuple_header* ptr= (Tuple_header*)get_ptr(&pagePtr, &tmp, regTabPtr.p);

    ndbrequire(ptr->m_header_bits & Tuple_header::FREED);

    if (regTabPtr.p->m_attributes[MM].m_no_of_varsize +
        regTabPtr.p->m_attributes[MM].m_no_of_dynamic)
    {
      jam();
      free_var_rec(regFragPtr.p, regTabPtr.p, &tmp, pagePtr);
    } else {
      free_fix_rec(regFragPtr.p, regTabPtr.p, &tmp, (Fix_page*)pagePtr.p);
    }
  }
}
Esempio n. 4
0
/*
  Deallocator for variable sized segments
  Part of the external interface for variable sized segments

  SYNOPSIS
    fragPtr         A pointer to the fragment description
    tabPtr          A pointer to the table description
    signal           The signal object to be used if a signal needs to
                     be sent
    page_ptr         A reference to the page of the variable sized
                     segment
    free_page_index  Page index on page of variable sized segment
                     which is freed
  RETURN VALUES
    Returns true if deallocation was successful otherwise false
*/
void Dbtup::free_var_rec(Fragrecord* fragPtr,
			 Tablerec* tabPtr,
			 Local_key* key,
			 Ptr<Page> pagePtr)
{
  /**
   * TODO free fix + var part
   */
  Uint32 *ptr = ((Fix_page*)pagePtr.p)->get_ptr(key->m_page_idx, 0);
  Tuple_header* tuple = (Tuple_header*)ptr;

  Local_key ref;
  Var_part_ref * varref = tuple->get_var_part_ref_ptr(tabPtr);
  varref->copyout(&ref);

  free_fix_rec(fragPtr, tabPtr, key, (Fix_page*)pagePtr.p);

  c_page_pool.getPtr(pagePtr, ref.m_page_no);
  ((Var_page*)pagePtr.p)->free_record(ref.m_page_idx, Var_page::CHAIN);
  
  ndbassert(pagePtr.p->free_space <= Var_page::DATA_WORDS);
  if (pagePtr.p->free_space == Var_page::DATA_WORDS - 1)
  {
    jam();
    /*
      This code could be used when we release pages.
      remove_free_page(signal,fragPtr,page_header,page_header->list_index);
      return_empty_page(fragPtr, page_header);
    */
    update_free_page_list(fragPtr, pagePtr);
  } else {
    jam();
    update_free_page_list(fragPtr, pagePtr);
  }
  return;
}
Esempio n. 5
0
bool
Dbtup::scanNext(Signal* signal, ScanOpPtr scanPtr)
{
  ScanOp& scan = *scanPtr.p;
  ScanPos& pos = scan.m_scanPos;
  Local_key& key = pos.m_key;
  const Uint32 bits = scan.m_bits;
  // table
  TablerecPtr tablePtr;
  tablePtr.i = scan.m_tableId;
  ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec);
  Tablerec& table = *tablePtr.p;
  // fragment
  FragrecordPtr fragPtr;
  fragPtr.i = scan.m_fragPtrI;
  ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord);
  Fragrecord& frag = *fragPtr.p;
  // tuple found
  Tuple_header* th = 0;
  Uint32 thbits = 0;
  Uint32 loop_count = 0;
  Uint32 scanGCI = scanPtr.p->m_scanGCI;
  Uint32 foundGCI;
 
  const bool mm = (bits & ScanOp::SCAN_DD);
  const bool lcp = (bits & ScanOp::SCAN_LCP);
  
  Uint32 lcp_list = fragPtr.p->m_lcp_keep_list;
  Uint32 size = table.m_offsets[mm].m_fix_header_size;

  if (lcp && lcp_list != RNIL)
    goto found_lcp_keep;

  switch(pos.m_get){
  case ScanPos::Get_next_tuple:
  case ScanPos::Get_next_tuple_fs:
    jam();
    key.m_page_idx += size;
    // fall through
  case ScanPos::Get_tuple:
  case ScanPos::Get_tuple_fs:
    jam();
    /**
     * We need to refetch page after timeslice
     */
    pos.m_get = ScanPos::Get_page;
    break;
  default:
    break;
  }
  
  while (true) {
    switch (pos.m_get) {
    case ScanPos::Get_next_page:
      // move to next page
      jam();
      {
        if (! (bits & ScanOp::SCAN_DD))
          pos.m_get = ScanPos::Get_next_page_mm;
        else
          pos.m_get = ScanPos::Get_next_page_dd;
      }
      continue;
    case ScanPos::Get_page:
      // get real page
      jam();
      {
        if (! (bits & ScanOp::SCAN_DD))
          pos.m_get = ScanPos::Get_page_mm;
        else
          pos.m_get = ScanPos::Get_page_dd;
      }
      continue;
    case ScanPos::Get_next_page_mm:
      // move to next logical TUP page
      jam();
      {
        key.m_page_no++;
        if (key.m_page_no >= frag.noOfPages) {
          jam();

          if ((bits & ScanOp::SCAN_NR) && (scan.m_endPage != RNIL))
          {
            jam();
            if (key.m_page_no < scan.m_endPage)
            {
              jam();
              ndbout_c("scanning page %u", key.m_page_no);
              goto cont;
            }
          }
          // no more pages, scan ends
          pos.m_get = ScanPos::Get_undef;
          scan.m_state = ScanOp::Last;
          return true;
        }
    cont:
        key.m_page_idx = 0;
        pos.m_get = ScanPos::Get_page_mm;
        // clear cached value
        pos.m_realpid_mm = RNIL;
      }
      /*FALLTHRU*/
    case ScanPos::Get_page_mm:
      // get TUP real page
      jam();
      {
        if (pos.m_realpid_mm == RNIL) {
          jam();
          if (key.m_page_no < frag.noOfPages)
            pos.m_realpid_mm = getRealpid(fragPtr.p, key.m_page_no);
          else
          {
            ndbassert(bits & ScanOp::SCAN_NR);
            goto nopage;
          }
        }
        PagePtr pagePtr;
	c_page_pool.getPtr(pagePtr, pos.m_realpid_mm);

        if (pagePtr.p->page_state == ZEMPTY_MM) {
          // skip empty page
          jam();
          if (! (bits & ScanOp::SCAN_NR))
          {
            pos.m_get = ScanPos::Get_next_page_mm;
            break; // incr loop count
          }
          else
          {
            jam();
            pos.m_realpid_mm = RNIL;
          }
        }
    nopage:
        pos.m_page = pagePtr.p;
        pos.m_get = ScanPos::Get_tuple;
      }
      continue;
    case ScanPos::Get_next_page_dd:
      // move to next disk page
      jam();
      {
        Disk_alloc_info& alloc = frag.m_disk_alloc_info;
        Local_fragment_extent_list list(c_extent_pool, alloc.m_extent_list);
        Ptr<Extent_info> ext_ptr;
        c_extent_pool.getPtr(ext_ptr, pos.m_extent_info_ptr_i);
        Extent_info* ext = ext_ptr.p;
        key.m_page_no++;
        if (key.m_page_no >= ext->m_first_page_no + alloc.m_extent_size) {
          // no more pages in this extent
          jam();
          if (! list.next(ext_ptr)) {
            // no more extents, scan ends
            jam();
            pos.m_get = ScanPos::Get_undef;
            scan.m_state = ScanOp::Last;
            return true;
          } else {
            // move to next extent
            jam();
            pos.m_extent_info_ptr_i = ext_ptr.i;
            ext = c_extent_pool.getPtr(pos.m_extent_info_ptr_i);
            key.m_file_no = ext->m_key.m_file_no;
            key.m_page_no = ext->m_first_page_no;
          }
        }
        key.m_page_idx = 0;
        pos.m_get = ScanPos::Get_page_dd;
        /*
          read ahead for scan in disk order
          do read ahead every 8:th page
        */
        if ((bits & ScanOp::SCAN_DD) &&
            (((key.m_page_no - ext->m_first_page_no) & 7) == 0))
        {
          jam();
          // initialize PGMAN request
          Page_cache_client::Request preq;
          preq.m_page = pos.m_key;
          preq.m_callback = TheNULLCallback;

          // set maximum read ahead
          Uint32 read_ahead = m_max_page_read_ahead;

          while (true)
          {
            // prepare page read ahead in current extent
            Uint32 page_no = preq.m_page.m_page_no;
            Uint32 page_no_limit = page_no + read_ahead;
            Uint32 limit = ext->m_first_page_no + alloc.m_extent_size;
            if (page_no_limit > limit)
            {
              jam();
              // read ahead crosses extent, set limit for this extent
              read_ahead = page_no_limit - limit;
              page_no_limit = limit;
              // and make sure we only read one extra extent next time around
              if (read_ahead > alloc.m_extent_size)
                read_ahead = alloc.m_extent_size;
            }
            else
            {
              jam();
              read_ahead = 0; // no more to read ahead after this
            }
            // do read ahead pages for this extent
            while (page_no < page_no_limit)
            {
              // page request to PGMAN
              jam();
              preq.m_page.m_page_no = page_no;
              int flags = 0;
              // ignore result
              m_pgman.get_page(signal, preq, flags);
              jamEntry();
              page_no++;
            }
            if (!read_ahead || !list.next(ext_ptr))
            {
              // no more extents after this or read ahead done
              jam();
              break;
            }
            // move to next extent and initialize PGMAN request accordingly
            Extent_info* ext = c_extent_pool.getPtr(ext_ptr.i);
            preq.m_page.m_file_no = ext->m_key.m_file_no;
            preq.m_page.m_page_no = ext->m_first_page_no;
          }
        } // if ScanOp::SCAN_DD read ahead
      }
      /*FALLTHRU*/
    case ScanPos::Get_page_dd:
      // get global page in PGMAN cache
      jam();
      {
        // check if page is un-allocated or empty
	if (likely(! (bits & ScanOp::SCAN_NR)))
	{
	  Tablespace_client tsman(signal, c_tsman,
				  frag.fragTableId, 
				  frag.fragmentId, 
				  frag.m_tablespace_id);
	  unsigned uncommitted, committed;
	  uncommitted = committed = ~(unsigned)0;
	  int ret = tsman.get_page_free_bits(&key, &uncommitted, &committed);
	  ndbrequire(ret == 0);
	  if (committed == 0 && uncommitted == 0) {
	    // skip empty page
	    jam();
	    pos.m_get = ScanPos::Get_next_page_dd;
	    break; // incr loop count
	  }
	}
        // page request to PGMAN
        Page_cache_client::Request preq;
        preq.m_page = pos.m_key;
        preq.m_callback.m_callbackData = scanPtr.i;
        preq.m_callback.m_callbackFunction =
          safe_cast(&Dbtup::disk_page_tup_scan_callback);
        int flags = 0;
        int res = m_pgman.get_page(signal, preq, flags);
        jamEntry();
        if (res == 0) {
          jam();
          // request queued
          pos.m_get = ScanPos::Get_tuple;
          return false;
        }
        ndbrequire(res > 0);
        pos.m_page = (Page*)m_pgman.m_ptr.p;
      }
      pos.m_get = ScanPos::Get_tuple;
      continue;
      // get tuple
      // move to next tuple
    case ScanPos::Get_next_tuple:
    case ScanPos::Get_next_tuple_fs:
      // move to next fixed size tuple
      jam();
      {
        key.m_page_idx += size;
        pos.m_get = ScanPos::Get_tuple_fs;
      }
      /*FALLTHRU*/
    case ScanPos::Get_tuple:
    case ScanPos::Get_tuple_fs:
      // get fixed size tuple
      jam();
      {
        Fix_page* page = (Fix_page*)pos.m_page;
        if (key.m_page_idx + size <= Fix_page::DATA_WORDS) 
	{
	  pos.m_get = ScanPos::Get_next_tuple_fs;
          th = (Tuple_header*)&page->m_data[key.m_page_idx];
	  
	  if (likely(! (bits & ScanOp::SCAN_NR)))
	  {
	    jam();
            thbits = th->m_header_bits;
	    if (! (thbits & Tuple_header::FREE))
	    {
              goto found_tuple;
	    } 
	  }
	  else
	  {
            if (pos.m_realpid_mm == RNIL)
            {
              jam();
              foundGCI = 0;
              goto found_deleted_rowid;
            }
            thbits = th->m_header_bits;
	    if ((foundGCI = *th->get_mm_gci(tablePtr.p)) > scanGCI ||
                foundGCI == 0)
	    {
	      if (! (thbits & Tuple_header::FREE))
	      {
		jam();
		goto found_tuple;
	      }
	      else
	      {
		goto found_deleted_rowid;
	      }
	    }
	    else if (thbits != Fix_page::FREE_RECORD && 
		     th->m_operation_ptr_i != RNIL)
	    {
	      jam();
	      goto found_tuple; // Locked tuple...
	      // skip free tuple
	    }
	  }
        } else {
          jam();
          // no more tuples on this page
          pos.m_get = ScanPos::Get_next_page;
        }
      }
      break; // incr loop count
  found_tuple:
      // found possible tuple to return
      jam();
      {
        // caller has already set pos.m_get to next tuple
        if (! (bits & ScanOp::SCAN_LCP && thbits & Tuple_header::LCP_SKIP)) {
          Local_key& key_mm = pos.m_key_mm;
          if (! (bits & ScanOp::SCAN_DD)) {
            key_mm = pos.m_key;
            // real page id is already set
          } else {
	    key_mm.assref(th->m_base_record_ref);
            // recompute for each disk tuple
            pos.m_realpid_mm = getRealpid(fragPtr.p, key_mm.m_page_no);
          }
          // TUPKEYREQ handles savepoint stuff
          scan.m_state = ScanOp::Current;
          return true;
        } else {
          jam();
          // clear it so that it will show up in next LCP
          th->m_header_bits = thbits & ~(Uint32)Tuple_header::LCP_SKIP;
	  if (tablePtr.p->m_bits & Tablerec::TR_Checksum) {
	    jam();
	    setChecksum(th, tablePtr.p);
	  }
        }
      }
      break;
  found_deleted_rowid:
      jam();
      {
	ndbassert(bits & ScanOp::SCAN_NR);
	Local_key& key_mm = pos.m_key_mm;
	if (! (bits & ScanOp::SCAN_DD)) {
	  key_mm = pos.m_key;
	  // caller has already set pos.m_get to next tuple
	  // real page id is already set
	} else {
	  key_mm.assref(th->m_base_record_ref);
	  // recompute for each disk tuple
	  pos.m_realpid_mm = getRealpid(fragPtr.p, key_mm.m_page_no);
	  
	  Fix_page *mmpage = (Fix_page*)c_page_pool.getPtr(pos.m_realpid_mm);
	  th = (Tuple_header*)(mmpage->m_data + key_mm.m_page_idx);
	  if ((foundGCI = *th->get_mm_gci(tablePtr.p)) > scanGCI ||
              foundGCI == 0)
	  {
	    if (! (thbits & Tuple_header::FREE))
	      break;
	  }
	}
	
	NextScanConf* const conf = (NextScanConf*)signal->getDataPtrSend();
	conf->scanPtr = scan.m_userPtr;
	conf->accOperationPtr = RNIL;
	conf->fragId = frag.fragmentId;
	conf->localKey[0] = pos.m_key_mm.ref();
	conf->localKey[1] = 0;
	conf->localKeyLength = 1;
	conf->gci = foundGCI;
	Uint32 blockNo = refToBlock(scan.m_userRef);
	EXECUTE_DIRECT(blockNo, GSN_NEXT_SCANCONF, signal, 7);
	jamEntry();

	// TUPKEYREQ handles savepoint stuff
	loop_count = 32;
	scan.m_state = ScanOp::Next;
	return false;
      }
      break; // incr loop count
    default:
      ndbrequire(false);
      break;
    }
    if (++loop_count >= 32)
      break;
  }
  // TODO: at drop table we have to flush and terminate these
  jam();
  signal->theData[0] = ZTUP_SCAN;
  signal->theData[1] = scanPtr.i;
  sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
  return false;

found_lcp_keep:
  Local_key tmp;
  tmp.assref(lcp_list);
  tmp.m_page_no = getRealpid(fragPtr.p, tmp.m_page_no);
  
  Ptr<Page> pagePtr;
  c_page_pool.getPtr(pagePtr, tmp.m_page_no);
  Tuple_header* ptr = (Tuple_header*)
    ((Fix_page*)pagePtr.p)->get_ptr(tmp.m_page_idx, 0);
  Uint32 headerbits = ptr->m_header_bits;
  ndbrequire(headerbits & Tuple_header::LCP_KEEP);
  
  Uint32 next = ptr->m_operation_ptr_i;
  ptr->m_operation_ptr_i = RNIL;
  ptr->m_header_bits = headerbits & ~(Uint32)Tuple_header::FREE;
  
  if (tablePtr.p->m_bits & Tablerec::TR_Checksum) {
    jam();
    setChecksum(ptr, tablePtr.p);
  }
  
  NextScanConf* const conf = (NextScanConf*)signal->getDataPtrSend();
  conf->scanPtr = scan.m_userPtr;
  conf->accOperationPtr = (Uint32)-1;
  conf->fragId = frag.fragmentId;
  conf->localKey[0] = lcp_list;
  conf->localKey[1] = 0;
  conf->localKeyLength = 1;
  conf->gci = 0;
  Uint32 blockNo = refToBlock(scan.m_userRef);
  EXECUTE_DIRECT(blockNo, GSN_NEXT_SCANCONF, signal, 7);
  
  fragPtr.p->m_lcp_keep_list = next;
  ptr->m_header_bits |= Tuple_header::FREED; // RESTORE free flag
  if (headerbits & Tuple_header::FREED)
  {
    if (tablePtr.p->m_attributes[MM].m_no_of_varsize)
    {
      jam();
      free_var_rec(fragPtr.p, tablePtr.p, &tmp, pagePtr);
    } else {
      jam();
      free_fix_rec(fragPtr.p, tablePtr.p, &tmp, (Fix_page*)pagePtr.p);
    }
  }
  return false;
}