/*
 *	_hash_getovflpage()
 *
 *	Find an available overflow page and return it.	The returned buffer
 *	is pinned and write-locked, and has had _hash_pageinit() applied,
 *	but it is caller's responsibility to fill the special space.
 *
 * The caller must hold a pin, but no lock, on the metapage buffer.
 * That buffer is left in the same state at exit.
 */
static Buffer
_hash_getovflpage(Relation rel, Buffer metabuf)
{
    HashMetaPage metap;
    Buffer		mapbuf = 0;
    Buffer		newbuf;
    BlockNumber blkno;
    uint32		orig_firstfree;
    uint32		splitnum;
    uint32	   *freep = NULL;
    uint32		max_ovflpg;
    uint32		bit;
    uint32		first_page;
    uint32		last_bit;
    uint32		last_page;
    uint32		i,
                j;

    /* Get exclusive lock on the meta page */
    _hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE);

    _hash_checkpage(rel, metabuf, LH_META_PAGE);
    metap = HashPageGetMeta(BufferGetPage(metabuf));

    /* start search at hashm_firstfree */
    orig_firstfree = metap->hashm_firstfree;
    first_page = orig_firstfree >> BMPG_SHIFT(metap);
    bit = orig_firstfree & BMPG_MASK(metap);
    i = first_page;
    j = bit / BITS_PER_MAP;
    bit &= ~(BITS_PER_MAP - 1);

    /* outer loop iterates once per bitmap page */
    for (;;)
    {
        BlockNumber mapblkno;
        Page		mappage;
        uint32		last_inpage;

        /* want to end search with the last existing overflow page */
        splitnum = metap->hashm_ovflpoint;
        max_ovflpg = metap->hashm_spares[splitnum] - 1;
        last_page = max_ovflpg >> BMPG_SHIFT(metap);
        last_bit = max_ovflpg & BMPG_MASK(metap);

        if (i > last_page)
            break;

        Assert(i < metap->hashm_nmaps);
        mapblkno = metap->hashm_mapp[i];

        if (i == last_page)
            last_inpage = last_bit;
        else
            last_inpage = BMPGSZ_BIT(metap) - 1;

        /* Release exclusive lock on metapage while reading bitmap page */
        _hash_chgbufaccess(rel, metabuf, HASH_READ, HASH_NOLOCK);

        mapbuf = _hash_getbuf(rel, mapblkno, HASH_WRITE, LH_BITMAP_PAGE);
        mappage = BufferGetPage(mapbuf);
        freep = HashPageGetBitmap(mappage);

        for (; bit <= last_inpage; j++, bit += BITS_PER_MAP)
        {
            if (freep[j] != ALL_SET)
                goto found;
        }

        /* No free space here, try to advance to next map page */
        _hash_relbuf(rel, mapbuf);
        i++;
        j = 0;					/* scan from start of next map page */
        bit = 0;

        /* Reacquire exclusive lock on the meta page */
        _hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE);
    }

    /*
     * No free pages --- have to extend the relation to add an overflow page.
     * First, check to see if we have to add a new bitmap page too.
     */
    if (last_bit == (uint32) (BMPGSZ_BIT(metap) - 1))
    {
        /*
         * We create the new bitmap page with all pages marked "in use".
         * Actually two pages in the new bitmap's range will exist
         * immediately: the bitmap page itself, and the following page which
         * is the one we return to the caller.	Both of these are correctly
         * marked "in use".  Subsequent pages do not exist yet, but it is
         * convenient to pre-mark them as "in use" too.
         */
        bit = metap->hashm_spares[splitnum];
        _hash_initbitmap(rel, metap, bitno_to_blkno(metap, bit), MAIN_FORKNUM);
        metap->hashm_spares[splitnum]++;
    }
    else
    {
        /*
         * Nothing to do here; since the page will be past the last used page,
         * we know its bitmap bit was preinitialized to "in use".
         */
    }

    /* Calculate address of the new overflow page */
    bit = metap->hashm_spares[splitnum];
    blkno = bitno_to_blkno(metap, bit);

    /*
     * Fetch the page with _hash_getnewbuf to ensure smgr's idea of the
     * relation length stays in sync with ours.  XXX It's annoying to do this
     * with metapage write lock held; would be better to use a lock that
     * doesn't block incoming searches.
     */
    newbuf = _hash_getnewbuf(rel, blkno, MAIN_FORKNUM);

    metap->hashm_spares[splitnum]++;

    /*
     * Adjust hashm_firstfree to avoid redundant searches.	But don't risk
     * changing it if someone moved it while we were searching bitmap pages.
     */
    if (metap->hashm_firstfree == orig_firstfree)
        metap->hashm_firstfree = bit + 1;

    /* Write updated metapage and release lock, but not pin */
    _hash_chgbufaccess(rel, metabuf, HASH_WRITE, HASH_NOLOCK);

    return newbuf;

found:
    /* convert bit to bit number within page */
    bit += _hash_firstfreebit(freep[j]);

    /* mark page "in use" in the bitmap */
    SETBIT(freep, bit);
    _hash_wrtbuf(rel, mapbuf);

    /* Reacquire exclusive lock on the meta page */
    _hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE);

    /* convert bit to absolute bit number */
    bit += (i << BMPG_SHIFT(metap));

    /* Calculate address of the recycled overflow page */
    blkno = bitno_to_blkno(metap, bit);

    /*
     * Adjust hashm_firstfree to avoid redundant searches.	But don't risk
     * changing it if someone moved it while we were searching bitmap pages.
     */
    if (metap->hashm_firstfree == orig_firstfree)
    {
        metap->hashm_firstfree = bit + 1;

        /* Write updated metapage and release lock, but not pin */
        _hash_chgbufaccess(rel, metabuf, HASH_WRITE, HASH_NOLOCK);
    }
    else
    {
        /* We didn't change the metapage, so no need to write */
        _hash_chgbufaccess(rel, metabuf, HASH_READ, HASH_NOLOCK);
    }

    /* Fetch, init, and return the recycled page */
    return _hash_getinitbuf(rel, blkno);
}
Example #2
0
/*
 *	_hash_metapinit() -- Initialize the metadata page of a hash index,
 *				the initial buckets, and the initial bitmap page.
 *
 * The initial number of buckets is dependent on num_tuples, an estimate
 * of the number of tuples to be loaded into the index initially.  The
 * chosen number of buckets is returned.
 *
 * We are fairly cavalier about locking here, since we know that no one else
 * could be accessing this index.  In particular the rule about not holding
 * multiple buffer locks is ignored.
 */
uint32
_hash_metapinit(Relation rel, double num_tuples, ForkNumber forkNum)
{
    HashMetaPage metap;
    HashPageOpaque pageopaque;
    Buffer		metabuf;
    Buffer		buf;
    Page		pg;
    int32		data_width;
    int32		item_width;
    int32		ffactor;
    double		dnumbuckets;
    uint32		num_buckets;
    uint32		log2_num_buckets;
    uint32		i;

    /* safety check */
    if (RelationGetNumberOfBlocksInFork(rel, forkNum) != 0)
        elog(ERROR, "cannot initialize non-empty hash index \"%s\"",
             RelationGetRelationName(rel));

    /*
     * Determine the target fill factor (in tuples per bucket) for this index.
     * The idea is to make the fill factor correspond to pages about as full
     * as the user-settable fillfactor parameter says.	We can compute it
     * exactly since the index datatype (i.e. uint32 hash key) is fixed-width.
     */
    data_width = sizeof(uint32);
    item_width = MAXALIGN(sizeof(IndexTupleData)) + MAXALIGN(data_width) +
                 sizeof(ItemIdData);		/* include the line pointer */
    ffactor = RelationGetTargetPageUsage(rel, HASH_DEFAULT_FILLFACTOR) / item_width;
    /* keep to a sane range */
    if (ffactor < 10)
        ffactor = 10;

    /*
     * Choose the number of initial bucket pages to match the fill factor
     * given the estimated number of tuples.  We round up the result to the
     * next power of 2, however, and always force at least 2 bucket pages. The
     * upper limit is determined by considerations explained in
     * _hash_expandtable().
     */
    dnumbuckets = num_tuples / ffactor;
    if (dnumbuckets <= 2.0)
        num_buckets = 2;
    else if (dnumbuckets >= (double) 0x40000000)
        num_buckets = 0x40000000;
    else
        num_buckets = ((uint32) 1) << _hash_log2((uint32) dnumbuckets);

    log2_num_buckets = _hash_log2(num_buckets);
    Assert(num_buckets == (((uint32) 1) << log2_num_buckets));
    Assert(log2_num_buckets < HASH_MAX_SPLITPOINTS);

    /*
     * We initialize the metapage, the first N bucket pages, and the first
     * bitmap page in sequence, using _hash_getnewbuf to cause smgrextend()
     * calls to occur.	This ensures that the smgr level has the right idea of
     * the physical index length.
     */
    metabuf = _hash_getnewbuf(rel, HASH_METAPAGE, forkNum);
    pg = BufferGetPage(metabuf);

    pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg);
    pageopaque->hasho_prevblkno = InvalidBlockNumber;
    pageopaque->hasho_nextblkno = InvalidBlockNumber;
    pageopaque->hasho_bucket = -1;
    pageopaque->hasho_flag = LH_META_PAGE;
    pageopaque->hasho_page_id = HASHO_PAGE_ID;

    metap = HashPageGetMeta(pg);

    metap->hashm_magic = HASH_MAGIC;
    metap->hashm_version = HASH_VERSION;
    metap->hashm_ntuples = 0;
    metap->hashm_nmaps = 0;
    metap->hashm_ffactor = ffactor;
    metap->hashm_bsize = HashGetMaxBitmapSize(pg);
    /* find largest bitmap array size that will fit in page size */
    for (i = _hash_log2(metap->hashm_bsize); i > 0; --i)
    {
        if ((1 << i) <= metap->hashm_bsize)
            break;
    }
    Assert(i > 0);
    metap->hashm_bmsize = 1 << i;
    metap->hashm_bmshift = i + BYTE_TO_BIT;
    Assert((1 << BMPG_SHIFT(metap)) == (BMPG_MASK(metap) + 1));

    /*
     * Label the index with its primary hash support function's OID.  This is
     * pretty useless for normal operation (in fact, hashm_procid is not used
     * anywhere), but it might be handy for forensic purposes so we keep it.
     */
    metap->hashm_procid = index_getprocid(rel, 1, HASHPROC);

    /*
     * We initialize the index with N buckets, 0 .. N-1, occupying physical
     * blocks 1 to N.  The first freespace bitmap page is in block N+1. Since
     * N is a power of 2, we can set the masks this way:
     */
    metap->hashm_maxbucket = metap->hashm_lowmask = num_buckets - 1;
    metap->hashm_highmask = (num_buckets << 1) - 1;

    MemSet(metap->hashm_spares, 0, sizeof(metap->hashm_spares));
    MemSet(metap->hashm_mapp, 0, sizeof(metap->hashm_mapp));

    /* Set up mapping for one spare page after the initial splitpoints */
    metap->hashm_spares[log2_num_buckets] = 1;
    metap->hashm_ovflpoint = log2_num_buckets;
    metap->hashm_firstfree = 0;

    /*
     * Release buffer lock on the metapage while we initialize buckets.
     * Otherwise, we'll be in interrupt holdoff and the CHECK_FOR_INTERRUPTS
     * won't accomplish anything.  It's a bad idea to hold buffer locks for
     * long intervals in any case, since that can block the bgwriter.
     */
    _hash_chgbufaccess(rel, metabuf, HASH_WRITE, HASH_NOLOCK);

    /*
     * Initialize the first N buckets
     */
    for (i = 0; i < num_buckets; i++)
    {
        /* Allow interrupts, in case N is huge */
        CHECK_FOR_INTERRUPTS();

        buf = _hash_getnewbuf(rel, BUCKET_TO_BLKNO(metap, i), forkNum);
        pg = BufferGetPage(buf);
        pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg);
        pageopaque->hasho_prevblkno = InvalidBlockNumber;
        pageopaque->hasho_nextblkno = InvalidBlockNumber;
        pageopaque->hasho_bucket = i;
        pageopaque->hasho_flag = LH_BUCKET_PAGE;
        pageopaque->hasho_page_id = HASHO_PAGE_ID;
        _hash_wrtbuf(rel, buf);
    }

    /* Now reacquire buffer lock on metapage */
    _hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE);

    /*
     * Initialize first bitmap page
     */
    _hash_initbitmap(rel, metap, num_buckets + 1, forkNum);

    /* all done */
    _hash_wrtbuf(rel, metabuf);

    return num_buckets;
}
Example #3
0
/*
 *	_hash_metapinit() -- Initialize the metadata page of a hash index,
 *				the two buckets that we begin with and the initial
 *				bitmap page.
 *
 * We are fairly cavalier about locking here, since we know that no one else
 * could be accessing this index.  In particular the rule about not holding
 * multiple buffer locks is ignored.
 */
void
_hash_metapinit(Relation rel)
{
	HashMetaPage metap;
	HashPageOpaque pageopaque;
	Buffer		metabuf;
	Buffer		buf;
	Page		pg;
	int32		data_width;
	int32		item_width;
	int32		ffactor;
	uint16		i;

	/* safety check */
	if (RelationGetNumberOfBlocks(rel) != 0)
		elog(ERROR, "cannot initialize non-empty hash index \"%s\"",
			 RelationGetRelationName(rel));

	/*
	 * Determine the target fill factor (tuples per bucket) for this index.
	 * The idea is to make the fill factor correspond to pages about 3/4ths
	 * full.  We can compute it exactly if the index datatype is fixed-width,
	 * but for var-width there's some guessing involved.
	 */
	data_width = get_typavgwidth(RelationGetDescr(rel)->attrs[0]->atttypid,
								 RelationGetDescr(rel)->attrs[0]->atttypmod);
	item_width = MAXALIGN(sizeof(HashItemData)) + MAXALIGN(data_width) +
		sizeof(ItemIdData);		/* include the line pointer */
	ffactor = (BLCKSZ * 3 / 4) / item_width;
	/* keep to a sane range */
	if (ffactor < 10)
		ffactor = 10;

	metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE);
	pg = BufferGetPage(metabuf);
	_hash_pageinit(pg, BufferGetPageSize(metabuf));

	pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg);
	pageopaque->hasho_prevblkno = InvalidBlockNumber;
	pageopaque->hasho_nextblkno = InvalidBlockNumber;
	pageopaque->hasho_bucket = -1;
	pageopaque->hasho_flag = LH_META_PAGE;
	pageopaque->hasho_filler = HASHO_FILL;

	metap = (HashMetaPage) pg;

	metap->hashm_magic = HASH_MAGIC;
	metap->hashm_version = HASH_VERSION;
	metap->hashm_ntuples = 0;
	metap->hashm_nmaps = 0;
	metap->hashm_ffactor = ffactor;
	metap->hashm_bsize = BufferGetPageSize(metabuf);
	/* find largest bitmap array size that will fit in page size */
	for (i = _hash_log2(metap->hashm_bsize); i > 0; --i)
	{
		if ((1 << i) <= (metap->hashm_bsize -
						 (MAXALIGN(sizeof(PageHeaderData)) +
						  MAXALIGN(sizeof(HashPageOpaqueData)))))
			break;
	}
	Assert(i > 0);
	metap->hashm_bmsize = 1 << i;
	metap->hashm_bmshift = i + BYTE_TO_BIT;
	Assert((1 << BMPG_SHIFT(metap)) == (BMPG_MASK(metap) + 1));

	metap->hashm_procid = index_getprocid(rel, 1, HASHPROC);

	/*
	 * We initialize the index with two buckets, 0 and 1, occupying physical
	 * blocks 1 and 2.  The first freespace bitmap page is in block 3.
	 */
	metap->hashm_maxbucket = metap->hashm_lowmask = 1;	/* nbuckets - 1 */
	metap->hashm_highmask = 3;	/* (nbuckets << 1) - 1 */

	MemSet((char *) metap->hashm_spares, 0, sizeof(metap->hashm_spares));
	MemSet((char *) metap->hashm_mapp, 0, sizeof(metap->hashm_mapp));

	metap->hashm_spares[1] = 1;	/* the first bitmap page is only spare */
	metap->hashm_ovflpoint = 1;
	metap->hashm_firstfree = 0;

	/*
	 * Initialize the first two buckets
	 */
	for (i = 0; i <= 1; i++)
	{
		buf = _hash_getbuf(rel, BUCKET_TO_BLKNO(metap, i), HASH_WRITE);
		pg = BufferGetPage(buf);
		_hash_pageinit(pg, BufferGetPageSize(buf));
		pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg);
		pageopaque->hasho_prevblkno = InvalidBlockNumber;
		pageopaque->hasho_nextblkno = InvalidBlockNumber;
		pageopaque->hasho_bucket = i;
		pageopaque->hasho_flag = LH_BUCKET_PAGE;
		pageopaque->hasho_filler = HASHO_FILL;
		_hash_wrtbuf(rel, buf);
	}

	/*
	 * Initialize first bitmap page.  Can't do this until we
	 * create the first two buckets, else smgr will complain.
	 */
	_hash_initbitmap(rel, metap, 3);

	/* all done */
	_hash_wrtbuf(rel, metabuf);
}
Example #4
0
/*
 *	_hash_metapinit() -- Initialize the metadata page of a hash index,
 *				the two buckets that we begin with and the initial
 *				bitmap page.
 *
 * We are fairly cavalier about locking here, since we know that no one else
 * could be accessing this index.  In particular the rule about not holding
 * multiple buffer locks is ignored.
 */
void
_hash_metapinit(Relation rel)
{
	MIRROREDLOCK_BUFMGR_DECLARE;

	HashMetaPage metap;
	HashPageOpaque pageopaque;
	Buffer		metabuf;
	Buffer		buf;
	Page		pg;
	int32		data_width;
	int32		item_width;
	int32		ffactor;
	uint16		i;

	/* safety check */
	if (RelationGetNumberOfBlocks(rel) != 0)
		elog(ERROR, "cannot initialize non-empty hash index \"%s\"",
			 RelationGetRelationName(rel));

	/*
	 * Determine the target fill factor (in tuples per bucket) for this index.
	 * The idea is to make the fill factor correspond to pages about as full
	 * as the user-settable fillfactor parameter says.	We can compute it
	 * exactly if the index datatype is fixed-width, but for var-width there's
	 * some guessing involved.
	 */
	data_width = get_typavgwidth(RelationGetDescr(rel)->attrs[0]->atttypid,
								 RelationGetDescr(rel)->attrs[0]->atttypmod);
	item_width = MAXALIGN(sizeof(IndexTupleData)) + MAXALIGN(data_width) +
		sizeof(ItemIdData);		/* include the line pointer */
	ffactor = RelationGetTargetPageUsage(rel, HASH_DEFAULT_FILLFACTOR) / item_width;
	/* keep to a sane range */
	if (ffactor < 10)
		ffactor = 10;

	/*
	 * We initialize the metapage, the first two bucket pages, and the
	 * first bitmap page in sequence, using _hash_getnewbuf to cause
	 * smgrextend() calls to occur.  This ensures that the smgr level
	 * has the right idea of the physical index length.
	 */
	
	// -------- MirroredLock ----------
	MIRROREDLOCK_BUFMGR_LOCK;
	
	metabuf = _hash_getnewbuf(rel, HASH_METAPAGE, HASH_WRITE);
	pg = BufferGetPage(metabuf);
	_hash_pageinit(pg, BufferGetPageSize(metabuf));

	pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg);
	pageopaque->hasho_prevblkno = InvalidBlockNumber;
	pageopaque->hasho_nextblkno = InvalidBlockNumber;
	pageopaque->hasho_bucket = -1;
	pageopaque->hasho_flag = LH_META_PAGE;
	pageopaque->hasho_filler = HASHO_FILL;

	metap = (HashMetaPage) pg;

	metap->hashm_magic = HASH_MAGIC;
	metap->hashm_version = HASH_VERSION;
	metap->hashm_ntuples = 0;
	metap->hashm_nmaps = 0;
	metap->hashm_ffactor = ffactor;
	metap->hashm_bsize = BufferGetPageSize(metabuf);
	/* find largest bitmap array size that will fit in page size */
	for (i = _hash_log2(metap->hashm_bsize); i > 0; --i)
	{
		if ((1 << i) <= (metap->hashm_bsize -
						 (MAXALIGN(sizeof(PageHeaderData)) +
						  MAXALIGN(sizeof(HashPageOpaqueData)))))
			break;
	}
	Assert(i > 0);
	metap->hashm_bmsize = 1 << i;
	metap->hashm_bmshift = i + BYTE_TO_BIT;
	Assert((1 << BMPG_SHIFT(metap)) == (BMPG_MASK(metap) + 1));

	metap->hashm_procid = index_getprocid(rel, 1, HASHPROC);

	/*
	 * We initialize the index with two buckets, 0 and 1, occupying physical
	 * blocks 1 and 2.	The first freespace bitmap page is in block 3.
	 */
	metap->hashm_maxbucket = metap->hashm_lowmask = 1;	/* nbuckets - 1 */
	metap->hashm_highmask = 3;	/* (nbuckets << 1) - 1 */

	MemSet(metap->hashm_spares, 0, sizeof(metap->hashm_spares));
	MemSet(metap->hashm_mapp, 0, sizeof(metap->hashm_mapp));

	metap->hashm_spares[1] = 1; /* the first bitmap page is only spare */
	metap->hashm_ovflpoint = 1;
	metap->hashm_firstfree = 0;

	/*
	 * Initialize the first two buckets
	 */
	for (i = 0; i <= 1; i++)
	{
		buf = _hash_getnewbuf(rel, BUCKET_TO_BLKNO(metap, i), HASH_WRITE);
		pg = BufferGetPage(buf);
		_hash_pageinit(pg, BufferGetPageSize(buf));
		pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg);
		pageopaque->hasho_prevblkno = InvalidBlockNumber;
		pageopaque->hasho_nextblkno = InvalidBlockNumber;
		pageopaque->hasho_bucket = i;
		pageopaque->hasho_flag = LH_BUCKET_PAGE;
		pageopaque->hasho_filler = HASHO_FILL;
		_hash_wrtbuf(rel, buf);
	}

	/*
	 * Initialize first bitmap page
	 */
	_hash_initbitmap(rel, metap, 3);

	/* all done */
	_hash_wrtbuf(rel, metabuf);
	
	MIRROREDLOCK_BUFMGR_UNLOCK;
	// -------- MirroredLock ----------
	
}