Esempio n. 1
0
/*
 * Given two deformed tuples, adjust the first one so that it's consistent
 * with the summary values in both.
 */
static void
union_tuples(BrinDesc *bdesc, BrinMemTuple *a, BrinTuple *b)
{
	int			keyno;
	BrinMemTuple *db;
	MemoryContext cxt;
	MemoryContext oldcxt;

	/* Use our own memory context to avoid retail pfree */
	cxt = AllocSetContextCreate(CurrentMemoryContext,
								"brin union",
								ALLOCSET_DEFAULT_SIZES);
	oldcxt = MemoryContextSwitchTo(cxt);
	db = brin_deform_tuple(bdesc, b);
	MemoryContextSwitchTo(oldcxt);

	for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++)
	{
		FmgrInfo   *unionFn;
		BrinValues *col_a = &a->bt_columns[keyno];
		BrinValues *col_b = &db->bt_columns[keyno];

		unionFn = index_getprocinfo(bdesc->bd_index, keyno + 1,
									BRIN_PROCNUM_UNION);
		FunctionCall3Coll(unionFn,
						  bdesc->bd_index->rd_indcollation[keyno],
						  PointerGetDatum(bdesc),
						  PointerGetDatum(col_a),
						  PointerGetDatum(col_b));
	}

	MemoryContextDelete(cxt);
}
Esempio n. 2
0
/*
 * Extract all item values from a BRIN index page
 *
 * Usage: SELECT * FROM brin_page_items(get_raw_page('idx', 1), 'idx'::regclass);
 */
Datum
brin_page_items(PG_FUNCTION_ARGS)
{
    bytea	   *raw_page = PG_GETARG_BYTEA_P(0);
    Oid			indexRelid = PG_GETARG_OID(1);
    ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
    TupleDesc	tupdesc;
    MemoryContext oldcontext;
    Tuplestorestate *tupstore;
    Relation	indexRel;
    brin_column_state **columns;
    BrinDesc   *bdesc;
    BrinMemTuple *dtup;
    Page		page;
    OffsetNumber offset;
    AttrNumber	attno;
    bool		unusedItem;

    if (!superuser())
        ereport(ERROR,
                (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
                 (errmsg("must be superuser to use raw page functions"))));

    /* check to see if caller supports us returning a tuplestore */
    if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
        ereport(ERROR,
                (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
                 errmsg("set-valued function called in context that cannot accept a set")));
    if (!(rsinfo->allowedModes & SFRM_Materialize) ||
            rsinfo->expectedDesc == NULL)
        ereport(ERROR,
                (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
                 errmsg("materialize mode required, but it is not allowed in this context")));

    /* Build a tuple descriptor for our result type */
    if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
        elog(ERROR, "return type must be a row type");

    /* Build tuplestore to hold the result rows */
    oldcontext = MemoryContextSwitchTo(rsinfo->econtext->ecxt_per_query_memory);

    tupstore = tuplestore_begin_heap(true, false, work_mem);
    rsinfo->returnMode = SFRM_Materialize;
    rsinfo->setResult = tupstore;
    rsinfo->setDesc = tupdesc;

    MemoryContextSwitchTo(oldcontext);

    indexRel = index_open(indexRelid, AccessShareLock);
    bdesc = brin_build_desc(indexRel);

    /* minimally verify the page we got */
    page = verify_brin_page(raw_page, BRIN_PAGETYPE_REGULAR, "regular");

    /*
     * Initialize output functions for all indexed datatypes; simplifies
     * calling them later.
     */
    columns = palloc(sizeof(brin_column_state *) * RelationGetDescr(indexRel)->natts);
    for (attno = 1; attno <= bdesc->bd_tupdesc->natts; attno++)
    {
        Oid			output;
        bool		isVarlena;
        BrinOpcInfo *opcinfo;
        int			i;
        brin_column_state *column;

        opcinfo = bdesc->bd_info[attno - 1];
        column = palloc(offsetof(brin_column_state, outputFn) +
                        sizeof(FmgrInfo) * opcinfo->oi_nstored);

        column->nstored = opcinfo->oi_nstored;
        for (i = 0; i < opcinfo->oi_nstored; i++)
        {
            getTypeOutputInfo(opcinfo->oi_typcache[i]->type_id, &output, &isVarlena);
            fmgr_info(output, &column->outputFn[i]);
        }

        columns[attno - 1] = column;
    }

    offset = FirstOffsetNumber;
    unusedItem = false;
    dtup = NULL;
    for (;;)
    {
        Datum		values[7];
        bool		nulls[7];

        /*
         * This loop is called once for every attribute of every tuple in the
         * page.  At the start of a tuple, we get a NULL dtup; that's our
         * signal for obtaining and decoding the next one.  If that's not the
         * case, we output the next attribute.
         */
        if (dtup == NULL)
        {
            ItemId		itemId;

            /* verify item status: if there's no data, we can't decode */
            itemId = PageGetItemId(page, offset);
            if (ItemIdIsUsed(itemId))
            {
                dtup = brin_deform_tuple(bdesc,
                                         (BrinTuple *) PageGetItem(page, itemId));
                attno = 1;
                unusedItem = false;
            }
            else
                unusedItem = true;
        }
        else
            attno++;

        MemSet(nulls, 0, sizeof(nulls));

        if (unusedItem)
        {
            values[0] = UInt16GetDatum(offset);
            nulls[1] = true;
            nulls[2] = true;
            nulls[3] = true;
            nulls[4] = true;
            nulls[5] = true;
            nulls[6] = true;
        }
        else
        {
            int			att = attno - 1;

            values[0] = UInt16GetDatum(offset);
            values[1] = UInt32GetDatum(dtup->bt_blkno);
            values[2] = UInt16GetDatum(attno);
            values[3] = BoolGetDatum(dtup->bt_columns[att].bv_allnulls);
            values[4] = BoolGetDatum(dtup->bt_columns[att].bv_hasnulls);
            values[5] = BoolGetDatum(dtup->bt_placeholder);
            if (!dtup->bt_columns[att].bv_allnulls)
            {
                BrinValues *bvalues = &dtup->bt_columns[att];
                StringInfoData s;
                bool		first;
                int			i;

                initStringInfo(&s);
                appendStringInfoChar(&s, '{');

                first = true;
                for (i = 0; i < columns[att]->nstored; i++)
                {
                    char	   *val;

                    if (!first)
                        appendStringInfoString(&s, " .. ");
                    first = false;
                    val = OutputFunctionCall(&columns[att]->outputFn[i],
                                             bvalues->bv_values[i]);
                    appendStringInfoString(&s, val);
                    pfree(val);
                }
                appendStringInfoChar(&s, '}');

                values[6] = CStringGetTextDatum(s.data);
                pfree(s.data);
            }
            else
            {
                nulls[6] = true;
            }
        }

        tuplestore_putvalues(tupstore, tupdesc, values, nulls);

        /*
         * If the item was unused, jump straight to the next one; otherwise,
         * the only cleanup needed here is to set our signal to go to the next
         * tuple in the following iteration, by freeing the current one.
         */
        if (unusedItem)
            offset = OffsetNumberNext(offset);
        else if (attno >= bdesc->bd_tupdesc->natts)
        {
            pfree(dtup);
            dtup = NULL;
            offset = OffsetNumberNext(offset);
        }

        /*
         * If we're beyond the end of the page, we're done.
         */
        if (offset > PageGetMaxOffsetNumber(page))
            break;
    }

    /* clean up and return the tuplestore */
    brin_free_desc(bdesc);
    tuplestore_donestoring(tupstore);
    index_close(indexRel, AccessShareLock);

    return (Datum) 0;
}
Esempio n. 3
0
/*
 * Execute the index scan.
 *
 * This works by reading index TIDs from the revmap, and obtaining the index
 * tuples pointed to by them; the summary values in the index tuples are
 * compared to the scan keys.  We return into the TID bitmap all the pages in
 * ranges corresponding to index tuples that match the scan keys.
 *
 * If a TID from the revmap is read as InvalidTID, we know that range is
 * unsummarized.  Pages in those ranges need to be returned regardless of scan
 * keys.
 */
int64
bringetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
{
	Relation	idxRel = scan->indexRelation;
	Buffer		buf = InvalidBuffer;
	BrinDesc   *bdesc;
	Oid			heapOid;
	Relation	heapRel;
	BrinOpaque *opaque;
	BlockNumber nblocks;
	BlockNumber heapBlk;
	int			totalpages = 0;
	FmgrInfo   *consistentFn;
	MemoryContext oldcxt;
	MemoryContext perRangeCxt;

	opaque = (BrinOpaque *) scan->opaque;
	bdesc = opaque->bo_bdesc;
	pgstat_count_index_scan(idxRel);

	/*
	 * We need to know the size of the table so that we know how long to
	 * iterate on the revmap.
	 */
	heapOid = IndexGetRelation(RelationGetRelid(idxRel), false);
	heapRel = heap_open(heapOid, AccessShareLock);
	nblocks = RelationGetNumberOfBlocks(heapRel);
	heap_close(heapRel, AccessShareLock);

	/*
	 * Make room for the consistent support procedures of indexed columns.  We
	 * don't look them up here; we do that lazily the first time we see a scan
	 * key reference each of them.  We rely on zeroing fn_oid to InvalidOid.
	 */
	consistentFn = palloc0(sizeof(FmgrInfo) * bdesc->bd_tupdesc->natts);

	/*
	 * Setup and use a per-range memory context, which is reset every time we
	 * loop below.  This avoids having to free the tuples within the loop.
	 */
	perRangeCxt = AllocSetContextCreate(CurrentMemoryContext,
										"bringetbitmap cxt",
										ALLOCSET_DEFAULT_SIZES);
	oldcxt = MemoryContextSwitchTo(perRangeCxt);

	/*
	 * Now scan the revmap.  We start by querying for heap page 0,
	 * incrementing by the number of pages per range; this gives us a full
	 * view of the table.
	 */
	for (heapBlk = 0; heapBlk < nblocks; heapBlk += opaque->bo_pagesPerRange)
	{
		bool		addrange;
		BrinTuple  *tup;
		OffsetNumber off;
		Size		size;

		CHECK_FOR_INTERRUPTS();

		MemoryContextResetAndDeleteChildren(perRangeCxt);

		tup = brinGetTupleForHeapBlock(opaque->bo_rmAccess, heapBlk, &buf,
									   &off, &size, BUFFER_LOCK_SHARE,
									   scan->xs_snapshot);
		if (tup)
		{
			tup = brin_copy_tuple(tup, size);
			LockBuffer(buf, BUFFER_LOCK_UNLOCK);
		}

		/*
		 * For page ranges with no indexed tuple, we must return the whole
		 * range; otherwise, compare it to the scan keys.
		 */
		if (tup == NULL)
		{
			addrange = true;
		}
		else
		{
			BrinMemTuple *dtup;

			dtup = brin_deform_tuple(bdesc, tup);
			if (dtup->bt_placeholder)
			{
				/*
				 * Placeholder tuples are always returned, regardless of the
				 * values stored in them.
				 */
				addrange = true;
			}
			else
			{
				int			keyno;

				/*
				 * Compare scan keys with summary values stored for the range.
				 * If scan keys are matched, the page range must be added to
				 * the bitmap.  We initially assume the range needs to be
				 * added; in particular this serves the case where there are
				 * no keys.
				 */
				addrange = true;
				for (keyno = 0; keyno < scan->numberOfKeys; keyno++)
				{
					ScanKey		key = &scan->keyData[keyno];
					AttrNumber	keyattno = key->sk_attno;
					BrinValues *bval = &dtup->bt_columns[keyattno - 1];
					Datum		add;

					/*
					 * The collation of the scan key must match the collation
					 * used in the index column (but only if the search is not
					 * IS NULL/ IS NOT NULL).  Otherwise we shouldn't be using
					 * this index ...
					 */
					Assert((key->sk_flags & SK_ISNULL) ||
						   (key->sk_collation ==
					  bdesc->bd_tupdesc->attrs[keyattno - 1]->attcollation));

					/* First time this column? look up consistent function */
					if (consistentFn[keyattno - 1].fn_oid == InvalidOid)
					{
						FmgrInfo   *tmp;

						tmp = index_getprocinfo(idxRel, keyattno,
												BRIN_PROCNUM_CONSISTENT);
						fmgr_info_copy(&consistentFn[keyattno - 1], tmp,
									   CurrentMemoryContext);
					}

					/*
					 * Check whether the scan key is consistent with the page
					 * range values; if so, have the pages in the range added
					 * to the output bitmap.
					 *
					 * When there are multiple scan keys, failure to meet the
					 * criteria for a single one of them is enough to discard
					 * the range as a whole, so break out of the loop as soon
					 * as a false return value is obtained.
					 */
					add = FunctionCall3Coll(&consistentFn[keyattno - 1],
											key->sk_collation,
											PointerGetDatum(bdesc),
											PointerGetDatum(bval),
											PointerGetDatum(key));
					addrange = DatumGetBool(add);
					if (!addrange)
						break;
				}
			}
		}

		/* add the pages in the range to the output bitmap, if needed */
		if (addrange)
		{
			BlockNumber pageno;

			for (pageno = heapBlk;
				 pageno <= heapBlk + opaque->bo_pagesPerRange - 1;
				 pageno++)
			{
				MemoryContextSwitchTo(oldcxt);
				tbm_add_page(tbm, pageno);
				totalpages++;
				MemoryContextSwitchTo(perRangeCxt);
			}
		}
	}

	MemoryContextSwitchTo(oldcxt);
	MemoryContextDelete(perRangeCxt);

	if (buf != InvalidBuffer)
		ReleaseBuffer(buf);

	/*
	 * XXX We have an approximation of the number of *pages* that our scan
	 * returns, but we don't have a precise idea of the number of heap tuples
	 * involved.
	 */
	return totalpages * 10;
}
Esempio n. 4
0
/*
 * A tuple in the heap is being inserted.  To keep a brin index up to date,
 * we need to obtain the relevant index tuple and compare its stored values
 * with those of the new tuple.  If the tuple values are not consistent with
 * the summary tuple, we need to update the index tuple.
 *
 * If the range is not currently summarized (i.e. the revmap returns NULL for
 * it), there's nothing to do.
 */
bool
brininsert(Relation idxRel, Datum *values, bool *nulls,
		   ItemPointer heaptid, Relation heapRel,
		   IndexUniqueCheck checkUnique)
{
	BlockNumber pagesPerRange;
	BrinDesc   *bdesc = NULL;
	BrinRevmap *revmap;
	Buffer		buf = InvalidBuffer;
	MemoryContext tupcxt = NULL;
	MemoryContext oldcxt = NULL;

	revmap = brinRevmapInitialize(idxRel, &pagesPerRange, NULL);

	for (;;)
	{
		bool		need_insert = false;
		OffsetNumber off;
		BrinTuple  *brtup;
		BrinMemTuple *dtup;
		BlockNumber heapBlk;
		int			keyno;

		CHECK_FOR_INTERRUPTS();

		heapBlk = ItemPointerGetBlockNumber(heaptid);
		/* normalize the block number to be the first block in the range */
		heapBlk = (heapBlk / pagesPerRange) * pagesPerRange;
		brtup = brinGetTupleForHeapBlock(revmap, heapBlk, &buf, &off, NULL,
										 BUFFER_LOCK_SHARE, NULL);

		/* if range is unsummarized, there's nothing to do */
		if (!brtup)
			break;

		/* First time through? */
		if (bdesc == NULL)
		{
			bdesc = brin_build_desc(idxRel);
			tupcxt = AllocSetContextCreate(CurrentMemoryContext,
										   "brininsert cxt",
										   ALLOCSET_DEFAULT_SIZES);
			oldcxt = MemoryContextSwitchTo(tupcxt);
		}

		dtup = brin_deform_tuple(bdesc, brtup);

		/*
		 * Compare the key values of the new tuple to the stored index values;
		 * our deformed tuple will get updated if the new tuple doesn't fit
		 * the original range (note this means we can't break out of the loop
		 * early). Make a note of whether this happens, so that we know to
		 * insert the modified tuple later.
		 */
		for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++)
		{
			Datum		result;
			BrinValues *bval;
			FmgrInfo   *addValue;

			bval = &dtup->bt_columns[keyno];
			addValue = index_getprocinfo(idxRel, keyno + 1,
										 BRIN_PROCNUM_ADDVALUE);
			result = FunctionCall4Coll(addValue,
									   idxRel->rd_indcollation[keyno],
									   PointerGetDatum(bdesc),
									   PointerGetDatum(bval),
									   values[keyno],
									   nulls[keyno]);
			/* if that returned true, we need to insert the updated tuple */
			need_insert |= DatumGetBool(result);
		}

		if (!need_insert)
		{
			/*
			 * The tuple is consistent with the new values, so there's nothing
			 * to do.
			 */
			LockBuffer(buf, BUFFER_LOCK_UNLOCK);
		}
		else
		{
			Page		page = BufferGetPage(buf);
			ItemId		lp = PageGetItemId(page, off);
			Size		origsz;
			BrinTuple  *origtup;
			Size		newsz;
			BrinTuple  *newtup;
			bool		samepage;

			/*
			 * Make a copy of the old tuple, so that we can compare it after
			 * re-acquiring the lock.
			 */
			origsz = ItemIdGetLength(lp);
			origtup = brin_copy_tuple(brtup, origsz);

			/*
			 * Before releasing the lock, check if we can attempt a same-page
			 * update.  Another process could insert a tuple concurrently in
			 * the same page though, so downstream we must be prepared to cope
			 * if this turns out to not be possible after all.
			 */
			newtup = brin_form_tuple(bdesc, heapBlk, dtup, &newsz);
			samepage = brin_can_do_samepage_update(buf, origsz, newsz);
			LockBuffer(buf, BUFFER_LOCK_UNLOCK);

			/*
			 * Try to update the tuple.  If this doesn't work for whatever
			 * reason, we need to restart from the top; the revmap might be
			 * pointing at a different tuple for this block now, so we need to
			 * recompute to ensure both our new heap tuple and the other
			 * inserter's are covered by the combined tuple.  It might be that
			 * we don't need to update at all.
			 */
			if (!brin_doupdate(idxRel, pagesPerRange, revmap, heapBlk,
							   buf, off, origtup, origsz, newtup, newsz,
							   samepage))
			{
				/* no luck; start over */
				MemoryContextResetAndDeleteChildren(tupcxt);
				continue;
			}
		}

		/* success! */
		break;
	}

	brinRevmapTerminate(revmap);
	if (BufferIsValid(buf))
		ReleaseBuffer(buf);
	if (bdesc != NULL)
	{
		brin_free_desc(bdesc);
		MemoryContextSwitchTo(oldcxt);
		MemoryContextDelete(tupcxt);
	}

	return false;
}
Esempio n. 5
0
/*
 * A tuple in the heap is being inserted.  To keep a brin index up to date,
 * we need to obtain the relevant index tuple and compare its stored values
 * with those of the new tuple.  If the tuple values are not consistent with
 * the summary tuple, we need to update the index tuple.
 *
 * If the range is not currently summarized (i.e. the revmap returns NULL for
 * it), there's nothing to do.
 */
Datum
brininsert(PG_FUNCTION_ARGS)
{
	Relation	idxRel = (Relation) PG_GETARG_POINTER(0);
	Datum	   *values = (Datum *) PG_GETARG_POINTER(1);
	bool	   *nulls = (bool *) PG_GETARG_POINTER(2);
	ItemPointer heaptid = (ItemPointer) PG_GETARG_POINTER(3);

	/* we ignore the rest of our arguments */
	BlockNumber pagesPerRange;
	BrinDesc   *bdesc = NULL;
	BrinRevmap *revmap;
	Buffer		buf = InvalidBuffer;
	MemoryContext tupcxt = NULL;
	MemoryContext oldcxt = NULL;

	revmap = brinRevmapInitialize(idxRel, &pagesPerRange);

	for (;;)
	{
		bool		need_insert = false;
		OffsetNumber off;
		BrinTuple  *brtup;
		BrinMemTuple *dtup;
		BlockNumber heapBlk;
		int			keyno;
#ifdef USE_ASSERT_CHECKING
		BrinTuple  *tmptup;
		BrinMemTuple *tmpdtup;
		Size 		tmpsiz;
#endif

		CHECK_FOR_INTERRUPTS();

		heapBlk = ItemPointerGetBlockNumber(heaptid);
		/* normalize the block number to be the first block in the range */
		heapBlk = (heapBlk / pagesPerRange) * pagesPerRange;
		brtup = brinGetTupleForHeapBlock(revmap, heapBlk, &buf, &off, NULL,
										 BUFFER_LOCK_SHARE);

		/* if range is unsummarized, there's nothing to do */
		if (!brtup)
			break;

		/* First time through? */
		if (bdesc == NULL)
		{
			bdesc = brin_build_desc(idxRel);
			tupcxt = AllocSetContextCreate(CurrentMemoryContext,
										   "brininsert cxt",
										   ALLOCSET_DEFAULT_MINSIZE,
										   ALLOCSET_DEFAULT_INITSIZE,
										   ALLOCSET_DEFAULT_MAXSIZE);
			oldcxt = MemoryContextSwitchTo(tupcxt);
		}

		dtup = brin_deform_tuple(bdesc, brtup);

#ifdef USE_ASSERT_CHECKING
		{
			/*
			 * When assertions are enabled, we use this as an opportunity to
			 * test the "union" method, which would otherwise be used very
			 * rarely: first create a placeholder tuple, and addValue the
			 * value we just got into it.  Then union the existing index tuple
			 * with the updated placeholder tuple.  The tuple resulting from
			 * that union should be identical to the one resulting from the
			 * regular operation (straight addValue) below.
			 *
			 * Here we create the tuple to compare with; the actual comparison
			 * is below.
			 */
			tmptup = brin_form_placeholder_tuple(bdesc, heapBlk, &tmpsiz);
			tmpdtup = brin_deform_tuple(bdesc, tmptup);
			for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++)
			{
				BrinValues *bval;
				FmgrInfo   *addValue;

				bval = &tmpdtup->bt_columns[keyno];
				addValue = index_getprocinfo(idxRel, keyno + 1,
											 BRIN_PROCNUM_ADDVALUE);
				FunctionCall4Coll(addValue,
								  idxRel->rd_indcollation[keyno],
								  PointerGetDatum(bdesc),
								  PointerGetDatum(bval),
								  values[keyno],
								  nulls[keyno]);
			}

			union_tuples(bdesc, tmpdtup, brtup);

			tmpdtup->bt_placeholder = dtup->bt_placeholder;
			tmptup = brin_form_tuple(bdesc, heapBlk, tmpdtup, &tmpsiz);
		}
#endif

		/*
		 * Compare the key values of the new tuple to the stored index values;
		 * our deformed tuple will get updated if the new tuple doesn't fit
		 * the original range (note this means we can't break out of the loop
		 * early). Make a note of whether this happens, so that we know to
		 * insert the modified tuple later.
		 */
		for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++)
		{
			Datum		result;
			BrinValues *bval;
			FmgrInfo   *addValue;

			bval = &dtup->bt_columns[keyno];
			addValue = index_getprocinfo(idxRel, keyno + 1,
										 BRIN_PROCNUM_ADDVALUE);
			result = FunctionCall4Coll(addValue,
									   idxRel->rd_indcollation[keyno],
									   PointerGetDatum(bdesc),
									   PointerGetDatum(bval),
									   values[keyno],
									   nulls[keyno]);
			/* if that returned true, we need to insert the updated tuple */
			need_insert |= DatumGetBool(result);
		}

#ifdef USE_ASSERT_CHECKING
		{
			/*
			 * Now we can compare the tuple produced by the union function
			 * with the one from plain addValue.
			 */
			BrinTuple  *cmptup;
			Size		cmpsz;

			cmptup = brin_form_tuple(bdesc, heapBlk, dtup, &cmpsz);
			Assert(brin_tuples_equal(tmptup, tmpsiz, cmptup, cmpsz));
		}
#endif

		if (!need_insert)
		{
			/*
			 * The tuple is consistent with the new values, so there's nothing
			 * to do.
			 */
			LockBuffer(buf, BUFFER_LOCK_UNLOCK);
		}
		else
		{
			Page		page = BufferGetPage(buf);
			ItemId		lp = PageGetItemId(page, off);
			Size		origsz;
			BrinTuple  *origtup;
			Size		newsz;
			BrinTuple  *newtup;
			bool		samepage;

			/*
			 * Make a copy of the old tuple, so that we can compare it after
			 * re-acquiring the lock.
			 */
			origsz = ItemIdGetLength(lp);
			origtup = brin_copy_tuple(brtup, origsz);

			/*
			 * Before releasing the lock, check if we can attempt a same-page
			 * update.  Another process could insert a tuple concurrently in
			 * the same page though, so downstream we must be prepared to cope
			 * if this turns out to not be possible after all.
			 */
			newtup = brin_form_tuple(bdesc, heapBlk, dtup, &newsz);
			samepage = brin_can_do_samepage_update(buf, origsz, newsz);
			LockBuffer(buf, BUFFER_LOCK_UNLOCK);

			/*
			 * Try to update the tuple.  If this doesn't work for whatever
			 * reason, we need to restart from the top; the revmap might be
			 * pointing at a different tuple for this block now, so we need to
			 * recompute to ensure both our new heap tuple and the other
			 * inserter's are covered by the combined tuple.  It might be that
			 * we don't need to update at all.
			 */
			if (!brin_doupdate(idxRel, pagesPerRange, revmap, heapBlk,
							   buf, off, origtup, origsz, newtup, newsz,
							   samepage))
			{
				/* no luck; start over */
				MemoryContextResetAndDeleteChildren(tupcxt);
				continue;
			}
		}

		/* success! */
		break;
	}

	brinRevmapTerminate(revmap);
	if (BufferIsValid(buf))
		ReleaseBuffer(buf);
	if (bdesc != NULL)
	{
		brin_free_desc(bdesc);
		MemoryContextSwitchTo(oldcxt);
		MemoryContextDelete(tupcxt);
	}

	return BoolGetDatum(false);
}
Esempio n. 6
0
/*
 * Extract all item values from a BRIN index page
 *
 * Usage: SELECT * FROM brin_page_items(get_raw_page('idx', 1), 'idx'::regclass);
 */
Datum
brin_page_items(PG_FUNCTION_ARGS)
{
	brin_page_state *state;
	FuncCallContext *fctx;

	if (!superuser())
		ereport(ERROR,
				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
				 (errmsg("must be superuser to use raw page functions"))));

	if (SRF_IS_FIRSTCALL())
	{
		bytea	   *raw_page = PG_GETARG_BYTEA_P(0);
		Oid			indexRelid = PG_GETARG_OID(1);
		Page		page;
		TupleDesc	tupdesc;
		MemoryContext mctx;
		Relation	indexRel;
		AttrNumber	attno;

		/* minimally verify the page we got */
		page = verify_brin_page(raw_page, BRIN_PAGETYPE_REGULAR, "regular");

		/* create a function context for cross-call persistence */
		fctx = SRF_FIRSTCALL_INIT();

		/* switch to memory context appropriate for multiple function calls */
		mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);

		/* Build a tuple descriptor for our result type */
		if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
			elog(ERROR, "return type must be a row type");

		indexRel = index_open(indexRelid, AccessShareLock);

		state = palloc(offsetof(brin_page_state, columns) +
			  sizeof(brin_column_state) * RelationGetDescr(indexRel)->natts);

		state->bdesc = brin_build_desc(indexRel);
		state->page = page;
		state->offset = FirstOffsetNumber;
		state->unusedItem = false;
		state->done = false;
		state->dtup = NULL;

		/*
		 * Initialize output functions for all indexed datatypes; simplifies
		 * calling them later.
		 */
		for (attno = 1; attno <= state->bdesc->bd_tupdesc->natts; attno++)
		{
			Oid			output;
			bool		isVarlena;
			BrinOpcInfo *opcinfo;
			int			i;
			brin_column_state *column;

			opcinfo = state->bdesc->bd_info[attno - 1];
			column = palloc(offsetof(brin_column_state, outputFn) +
							sizeof(FmgrInfo) * opcinfo->oi_nstored);

			column->nstored = opcinfo->oi_nstored;
			for (i = 0; i < opcinfo->oi_nstored; i++)
			{
				getTypeOutputInfo(opcinfo->oi_typcache[i]->type_id, &output, &isVarlena);
				fmgr_info(output, &column->outputFn[i]);
			}

			state->columns[attno - 1] = column;
		}

		index_close(indexRel, AccessShareLock);

		fctx->user_fctx = state;
		fctx->tuple_desc = BlessTupleDesc(tupdesc);

		MemoryContextSwitchTo(mctx);
	}

	fctx = SRF_PERCALL_SETUP();
	state = fctx->user_fctx;

	if (!state->done)
	{
		HeapTuple	result;
		Datum		values[7];
		bool		nulls[7];

		/*
		 * This loop is called once for every attribute of every tuple in the
		 * page.  At the start of a tuple, we get a NULL dtup; that's our
		 * signal for obtaining and decoding the next one.  If that's not the
		 * case, we output the next attribute.
		 */
		if (state->dtup == NULL)
		{
			BrinTuple  *tup;
			MemoryContext mctx;
			ItemId		itemId;

			/* deformed tuple must live across calls */
			mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);

			/* verify item status: if there's no data, we can't decode */
			itemId = PageGetItemId(state->page, state->offset);
			if (ItemIdIsUsed(itemId))
			{
				tup = (BrinTuple *) PageGetItem(state->page,
												PageGetItemId(state->page,
															  state->offset));
				state->dtup = brin_deform_tuple(state->bdesc, tup);
				state->attno = 1;
				state->unusedItem = false;
			}
			else
				state->unusedItem = true;

			MemoryContextSwitchTo(mctx);
		}
		else
			state->attno++;

		MemSet(nulls, 0, sizeof(nulls));

		if (state->unusedItem)
		{
			values[0] = UInt16GetDatum(state->offset);
			nulls[1] = true;
			nulls[2] = true;
			nulls[3] = true;
			nulls[4] = true;
			nulls[5] = true;
			nulls[6] = true;
		}
		else
		{
			int			att = state->attno - 1;

			values[0] = UInt16GetDatum(state->offset);
			values[1] = UInt32GetDatum(state->dtup->bt_blkno);
			values[2] = UInt16GetDatum(state->attno);
			values[3] = BoolGetDatum(state->dtup->bt_columns[att].bv_allnulls);
			values[4] = BoolGetDatum(state->dtup->bt_columns[att].bv_hasnulls);
			values[5] = BoolGetDatum(state->dtup->bt_placeholder);
			if (!state->dtup->bt_columns[att].bv_allnulls)
			{
				BrinValues *bvalues = &state->dtup->bt_columns[att];
				StringInfoData s;
				bool		first;
				int			i;

				initStringInfo(&s);
				appendStringInfoChar(&s, '{');

				first = true;
				for (i = 0; i < state->columns[att]->nstored; i++)
				{
					char	   *val;

					if (!first)
						appendStringInfoString(&s, " .. ");
					first = false;
					val = OutputFunctionCall(&state->columns[att]->outputFn[i],
											 bvalues->bv_values[i]);
					appendStringInfoString(&s, val);
					pfree(val);
				}
				appendStringInfoChar(&s, '}');

				values[6] = CStringGetTextDatum(s.data);
				pfree(s.data);
			}
			else
			{
				nulls[6] = true;
			}
		}

		result = heap_form_tuple(fctx->tuple_desc, values, nulls);

		/*
		 * If the item was unused, jump straight to the next one; otherwise,
		 * the only cleanup needed here is to set our signal to go to the next
		 * tuple in the following iteration, by freeing the current one.
		 */
		if (state->unusedItem)
			state->offset = OffsetNumberNext(state->offset);
		else if (state->attno >= state->bdesc->bd_tupdesc->natts)
		{
			pfree(state->dtup);
			state->dtup = NULL;
			state->offset = OffsetNumberNext(state->offset);
		}

		/*
		 * If we're beyond the end of the page, set flag to end the function
		 * in the following iteration.
		 */
		if (state->offset > PageGetMaxOffsetNumber(state->page))
			state->done = true;

		SRF_RETURN_NEXT(fctx, HeapTupleGetDatum(result));
	}

	brin_free_desc(state->bdesc);

	SRF_RETURN_DONE(fctx);
}
Esempio n. 7
0
/*
 * A tuple in the heap is being inserted.  To keep a brin index up to date,
 * we need to obtain the relevant index tuple and compare its stored values
 * with those of the new tuple.  If the tuple values are not consistent with
 * the summary tuple, we need to update the index tuple.
 *
 * If autosummarization is enabled, check if we need to summarize the previous
 * page range.
 *
 * If the range is not currently summarized (i.e. the revmap returns NULL for
 * it), there's nothing to do for this tuple.
 */
bool
brininsert(Relation idxRel, Datum *values, bool *nulls,
		   ItemPointer heaptid, Relation heapRel,
		   IndexUniqueCheck checkUnique,
		   IndexInfo *indexInfo)
{
	BlockNumber pagesPerRange;
	BlockNumber origHeapBlk;
	BlockNumber heapBlk;
	BrinDesc   *bdesc = (BrinDesc *) indexInfo->ii_AmCache;
	BrinRevmap *revmap;
	Buffer		buf = InvalidBuffer;
	MemoryContext tupcxt = NULL;
	MemoryContext oldcxt = CurrentMemoryContext;
	bool		autosummarize = BrinGetAutoSummarize(idxRel);

	revmap = brinRevmapInitialize(idxRel, &pagesPerRange, NULL);

	/*
	 * origHeapBlk is the block number where the insertion occurred.  heapBlk
	 * is the first block in the corresponding page range.
	 */
	origHeapBlk = ItemPointerGetBlockNumber(heaptid);
	heapBlk = (origHeapBlk / pagesPerRange) * pagesPerRange;

	for (;;)
	{
		bool		need_insert = false;
		OffsetNumber off;
		BrinTuple  *brtup;
		BrinMemTuple *dtup;
		int			keyno;

		CHECK_FOR_INTERRUPTS();

		/*
		 * If auto-summarization is enabled and we just inserted the first
		 * tuple into the first block of a new non-first page range, request a
		 * summarization run of the previous range.
		 */
		if (autosummarize &&
			heapBlk > 0 &&
			heapBlk == origHeapBlk &&
			ItemPointerGetOffsetNumber(heaptid) == FirstOffsetNumber)
		{
			BlockNumber lastPageRange = heapBlk - 1;
			BrinTuple  *lastPageTuple;

			lastPageTuple =
				brinGetTupleForHeapBlock(revmap, lastPageRange, &buf, &off,
										 NULL, BUFFER_LOCK_SHARE, NULL);
			if (!lastPageTuple)
			{
				bool		recorded;

				recorded = AutoVacuumRequestWork(AVW_BRINSummarizeRange,
												 RelationGetRelid(idxRel),
												 lastPageRange);
				if (!recorded)
					ereport(LOG,
							(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
							 errmsg("request for BRIN range summarization for index \"%s\" page %u was not recorded",
									RelationGetRelationName(idxRel),
									lastPageRange)));
			}
			else
				LockBuffer(buf, BUFFER_LOCK_UNLOCK);
		}

		brtup = brinGetTupleForHeapBlock(revmap, heapBlk, &buf, &off,
										 NULL, BUFFER_LOCK_SHARE, NULL);

		/* if range is unsummarized, there's nothing to do */
		if (!brtup)
			break;

		/* First time through in this statement? */
		if (bdesc == NULL)
		{
			MemoryContextSwitchTo(indexInfo->ii_Context);
			bdesc = brin_build_desc(idxRel);
			indexInfo->ii_AmCache = (void *) bdesc;
			MemoryContextSwitchTo(oldcxt);
		}
		/* First time through in this brininsert call? */
		if (tupcxt == NULL)
		{
			tupcxt = AllocSetContextCreate(CurrentMemoryContext,
										   "brininsert cxt",
										   ALLOCSET_DEFAULT_SIZES);
			MemoryContextSwitchTo(tupcxt);
		}

		dtup = brin_deform_tuple(bdesc, brtup, NULL);

		/*
		 * Compare the key values of the new tuple to the stored index values;
		 * our deformed tuple will get updated if the new tuple doesn't fit
		 * the original range (note this means we can't break out of the loop
		 * early). Make a note of whether this happens, so that we know to
		 * insert the modified tuple later.
		 */
		for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++)
		{
			Datum		result;
			BrinValues *bval;
			FmgrInfo   *addValue;

			bval = &dtup->bt_columns[keyno];
			addValue = index_getprocinfo(idxRel, keyno + 1,
										 BRIN_PROCNUM_ADDVALUE);
			result = FunctionCall4Coll(addValue,
									   idxRel->rd_indcollation[keyno],
									   PointerGetDatum(bdesc),
									   PointerGetDatum(bval),
									   values[keyno],
									   nulls[keyno]);
			/* if that returned true, we need to insert the updated tuple */
			need_insert |= DatumGetBool(result);
		}

		if (!need_insert)
		{
			/*
			 * The tuple is consistent with the new values, so there's nothing
			 * to do.
			 */
			LockBuffer(buf, BUFFER_LOCK_UNLOCK);
		}
		else
		{
			Page		page = BufferGetPage(buf);
			ItemId		lp = PageGetItemId(page, off);
			Size		origsz;
			BrinTuple  *origtup;
			Size		newsz;
			BrinTuple  *newtup;
			bool		samepage;

			/*
			 * Make a copy of the old tuple, so that we can compare it after
			 * re-acquiring the lock.
			 */
			origsz = ItemIdGetLength(lp);
			origtup = brin_copy_tuple(brtup, origsz, NULL, NULL);

			/*
			 * Before releasing the lock, check if we can attempt a same-page
			 * update.  Another process could insert a tuple concurrently in
			 * the same page though, so downstream we must be prepared to cope
			 * if this turns out to not be possible after all.
			 */
			newtup = brin_form_tuple(bdesc, heapBlk, dtup, &newsz);
			samepage = brin_can_do_samepage_update(buf, origsz, newsz);
			LockBuffer(buf, BUFFER_LOCK_UNLOCK);

			/*
			 * Try to update the tuple.  If this doesn't work for whatever
			 * reason, we need to restart from the top; the revmap might be
			 * pointing at a different tuple for this block now, so we need to
			 * recompute to ensure both our new heap tuple and the other
			 * inserter's are covered by the combined tuple.  It might be that
			 * we don't need to update at all.
			 */
			if (!brin_doupdate(idxRel, pagesPerRange, revmap, heapBlk,
							   buf, off, origtup, origsz, newtup, newsz,
							   samepage))
			{
				/* no luck; start over */
				MemoryContextResetAndDeleteChildren(tupcxt);
				continue;
			}
		}

		/* success! */
		break;
	}

	brinRevmapTerminate(revmap);
	if (BufferIsValid(buf))
		ReleaseBuffer(buf);
	MemoryContextSwitchTo(oldcxt);
	if (tupcxt != NULL)
		MemoryContextDelete(tupcxt);

	return false;
}