/* * Per-tuple callback from IndexBuildHeapScan */ static void hashbuildCallback(Relation index, HeapTuple htup, Datum *values, bool *isnull, bool tupleIsAlive, void *state) { HashBuildState *buildstate = (HashBuildState *) state; IndexTuple itup; /* form an index tuple and point it at the heap tuple */ itup = index_form_tuple(RelationGetDescr(index), values, isnull); itup->t_tid = htup->t_self; /* Hash indexes don't index nulls, see notes in hashinsert */ if (IndexTupleHasNulls(itup)) { pfree(itup); return; } _hash_doinsert(index, itup); buildstate->indtuples += 1; pfree(itup); }
/* * _hash_formitem -- construct a hash index entry */ HashItem _hash_formitem(IndexTuple itup) { int nbytes_hitem; HashItem hitem; Size tuplen; /* disallow nulls in hash keys */ if (IndexTupleHasNulls(itup)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("hash indexes cannot contain null keys"))); /* * make a copy of the index tuple (XXX do we still need to copy?) * * HashItemData used to have more fields than IndexTupleData, but no * longer... */ tuplen = IndexTupleSize(itup); nbytes_hitem = tuplen + (sizeof(HashItemData) - sizeof(IndexTupleData)); hitem = (HashItem) palloc(nbytes_hitem); memcpy((char *) &(hitem->hash_itup), (char *) itup, tuplen); return hitem; }
/* * Extract the label datums of the nodes within innerTuple * * Returns NULL if label datums are NULLs */ Datum * spgExtractNodeLabels(SpGistState *state, SpGistInnerTuple innerTuple) { Datum *nodeLabels; int nullcount = 0; int i; SpGistNodeTuple node; nodeLabels = (Datum *) palloc(sizeof(Datum) * innerTuple->nNodes); SGITITERATE(innerTuple, i, node) { if (IndexTupleHasNulls(node)) nullcount++; else nodeLabels[i] = SGNTDATUM(node, state); } if (nullcount == innerTuple->nNodes) { /* They're all null, so just return NULL */ pfree(nodeLabels); return NULL; } if (nullcount != 0) elog(ERROR, "some but not all node labels are null in SPGiST inner tuple"); return nodeLabels; }
/* * Per-tuple callback from IndexBuildHeapScan */ static void hashbuildCallback(Relation index, HeapTuple htup, Datum *values, bool *isnull, bool tupleIsAlive, void *state) { HashBuildState *buildstate = (HashBuildState *) state; IndexTuple itup; /* form an index tuple and point it at the heap tuple */ itup = _hash_form_tuple(index, values, isnull); itup->t_tid = htup->t_self; /* Hash indexes don't index nulls, see notes in hashinsert */ if (IndexTupleHasNulls(itup)) { pfree(itup); return; } /* Either spool the tuple for sorting, or just put it into the index */ if (buildstate->spool) _h_spool(itup, buildstate->spool); else _hash_doinsert(index, itup); buildstate->indtuples += 1; pfree(itup); }
/* * Per-tuple callback from IndexBuildHeapScan */ static void rtbuildCallback(Relation index, HeapTuple htup, Datum *values, bool *isnull, bool tupleIsAlive, void *state) { RTBuildState *buildstate = (RTBuildState *) state; IndexTuple itup; /* form an index tuple and point it at the heap tuple */ itup = index_form_tuple(RelationGetDescr(index), values, isnull); itup->t_tid = htup->t_self; /* rtree indexes don't index nulls, see notes in rtinsert */ if (IndexTupleHasNulls(itup)) { pfree(itup); return; } /* * Since we already have the index relation locked, we call rtdoinsert * directly. Normal access method calls dispatch through rtinsert, which * locks the relation for write. This is the right thing to do if you're * inserting single tups, but not when you're initializing the whole index * at once. */ rtdoinsert(index, itup, &buildstate->rtState); buildstate->indtuples += 1; pfree(itup); }
/*------------------------------------------------------- * bt_page_print_tuples() * * Form a tuple describing index tuple at a given offset * ------------------------------------------------------ */ static Datum bt_page_print_tuples(FuncCallContext *fctx, Page page, OffsetNumber offset) { char *values[6]; HeapTuple tuple; ItemId id; IndexTuple itup; int j; int off; int dlen; char *dump; char *ptr; id = PageGetItemId(page, offset); if (!ItemIdIsValid(id)) elog(ERROR, "invalid ItemId"); itup = (IndexTuple) PageGetItem(page, id); j = 0; values[j++] = psprintf("%d", offset); values[j++] = psprintf("(%u,%u)", ItemPointerGetBlockNumberNoCheck(&itup->t_tid), ItemPointerGetOffsetNumberNoCheck(&itup->t_tid)); values[j++] = psprintf("%d", (int) IndexTupleSize(itup)); values[j++] = psprintf("%c", IndexTupleHasNulls(itup) ? 't' : 'f'); values[j++] = psprintf("%c", IndexTupleHasVarwidths(itup) ? 't' : 'f'); ptr = (char *) itup + IndexInfoFindDataOffset(itup->t_info); dlen = IndexTupleSize(itup) - IndexInfoFindDataOffset(itup->t_info); dump = palloc0(dlen * 3 + 1); values[j] = dump; for (off = 0; off < dlen; off++) { if (off > 0) *dump++ = ' '; sprintf(dump, "%02x", *(ptr + off) & 0xff); dump += 2; } tuple = BuildTupleFromCStrings(fctx->attinmeta, values); return HeapTupleGetDatum(tuple); }
/* * hashinsert() -- insert an index tuple into a hash table. * * Hash on the index tuple's key, find the appropriate location * for the new tuple, put it there, and return an InsertIndexResult * to the caller. */ Datum hashinsert(PG_FUNCTION_ARGS) { Relation rel = (Relation) PG_GETARG_POINTER(0); Datum *datum = (Datum *) PG_GETARG_POINTER(1); char *nulls = (char *) PG_GETARG_POINTER(2); ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3); #ifdef NOT_USED Relation heapRel = (Relation) PG_GETARG_POINTER(4); bool checkUnique = PG_GETARG_BOOL(5); #endif InsertIndexResult res; HashItem hitem; IndexTuple itup; /* generate an index tuple */ itup = index_formtuple(RelationGetDescr(rel), datum, nulls); itup->t_tid = *ht_ctid; /* * If the single index key is null, we don't insert it into the index. * Hash tables support scans on '='. Relational algebra says that A = * B returns null if either A or B is null. This means that no * qualification used in an index scan could ever return true on a * null attribute. It also means that indices can't be used by ISNULL * or NOTNULL scans, but that's an artifact of the strategy map * architecture chosen in 1986, not of the way nulls are handled here. */ if (IndexTupleHasNulls(itup)) { pfree(itup); PG_RETURN_POINTER((InsertIndexResult) NULL); } hitem = _hash_formitem(itup); res = _hash_doinsert(rel, hitem); pfree(hitem); pfree(itup); PG_RETURN_POINTER(res); }
/* * Extract the label datums of the nodes within innerTuple * * Returns NULL if label datums are NULLs */ Datum * spgExtractNodeLabels(SpGistState *state, SpGistInnerTuple innerTuple) { Datum *nodeLabels; int i; SpGistNodeTuple node; /* Either all the labels must be NULL, or none. */ node = SGITNODEPTR(innerTuple); if (IndexTupleHasNulls(node)) { SGITITERATE(innerTuple, i, node) { if (!IndexTupleHasNulls(node)) elog(ERROR, "some but not all node labels are null in SPGiST inner tuple"); } /* They're all null, so just return NULL */ return NULL; } else {
/* * rtinsert -- wrapper for rtree tuple insertion. * * This is the public interface routine for tuple insertion in rtrees. * It doesn't do any work; just locks the relation and passes the buck. */ Datum rtinsert(PG_FUNCTION_ARGS) { Relation r = (Relation) PG_GETARG_POINTER(0); Datum *values = (Datum *) PG_GETARG_POINTER(1); bool *isnull = (bool *) PG_GETARG_POINTER(2); ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3); #ifdef NOT_USED Relation heapRel = (Relation) PG_GETARG_POINTER(4); bool checkUnique = PG_GETARG_BOOL(5); #endif IndexTuple itup; RTSTATE rtState; /* generate an index tuple */ itup = index_form_tuple(RelationGetDescr(r), values, isnull); itup->t_tid = *ht_ctid; /* * Currently, rtrees do not support indexing NULLs; considerable * infrastructure work would have to be done to do anything reasonable * with a NULL. */ if (IndexTupleHasNulls(itup)) { pfree(itup); PG_RETURN_BOOL(false); } initRtstate(&rtState, r); /* * Since rtree is not marked "amconcurrent" in pg_am, caller should have * acquired exclusive lock on index relation. We need no locking here. */ rtdoinsert(r, itup, &rtState); PG_RETURN_BOOL(true); }
/* * hashinsert() -- insert an index tuple into a hash table. * * Hash on the heap tuple's key, form an index tuple with hash code. * Find the appropriate location for the new tuple, and put it there. */ Datum hashinsert(PG_FUNCTION_ARGS) { Relation rel = (Relation) PG_GETARG_POINTER(0); Datum *values = (Datum *) PG_GETARG_POINTER(1); bool *isnull = (bool *) PG_GETARG_POINTER(2); ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3); #ifdef NOT_USED Relation heapRel = (Relation) PG_GETARG_POINTER(4); IndexUniqueCheck checkUnique = (IndexUniqueCheck) PG_GETARG_INT32(5); #endif IndexTuple itup; /* generate an index tuple */ itup = _hash_form_tuple(rel, values, isnull); itup->t_tid = *ht_ctid; /* * If the single index key is null, we don't insert it into the index. * Hash tables support scans on '='. Relational algebra says that A = B * returns null if either A or B is null. This means that no * qualification used in an index scan could ever return true on a null * attribute. It also means that indices can't be used by ISNULL or * NOTNULL scans, but that's an artifact of the strategy map architecture * chosen in 1986, not of the way nulls are handled here. */ if (IndexTupleHasNulls(itup)) { pfree(itup); PG_RETURN_BOOL(false); } _hash_doinsert(rel, itup); pfree(itup); PG_RETURN_BOOL(false); }
/* * Form a non-leaf entry tuple by copying the key data from the given tuple, * which can be either a leaf or non-leaf entry tuple. * * Any posting list in the source tuple is not copied. The specified child * block number is inserted into t_tid. */ static IndexTuple RumFormInteriorTuple(RumBtree btree, IndexTuple itup, Page page, BlockNumber childblk) { IndexTuple nitup; RumNullCategory category; if (RumPageIsLeaf(page) && !RumIsPostingTree(itup)) { /* Tuple contains a posting list, just copy stuff before that */ uint32 origsize = RumGetPostingOffset(itup); origsize = MAXALIGN(origsize); nitup = (IndexTuple) palloc(origsize); memcpy(nitup, itup, origsize); /* ... be sure to fix the size header field ... */ nitup->t_info &= ~INDEX_SIZE_MASK; nitup->t_info |= origsize; } else { /* Copy the tuple as-is */ nitup = (IndexTuple) palloc(IndexTupleSize(itup)); memcpy(nitup, itup, IndexTupleSize(itup)); } /* Now insert the correct downlink */ RumSetDownlink(nitup, childblk); rumtuple_get_key(btree->rumstate, itup, &category); if (category != RUM_CAT_NORM_KEY) { Assert(IndexTupleHasNulls(itup)); nitup->t_info |= INDEX_NULL_MASK; RumSetNullCategory(nitup, category); } return nitup; }
/* * Per-tuple callback from IndexBuildHeapScan */ static void hashbuildCallback(Relation index, HeapTuple htup, Datum *attdata, char *nulls, bool tupleIsAlive, void *state) { HashBuildState *buildstate = (HashBuildState *) state; IndexTuple itup; HashItem hitem; InsertIndexResult res; /* form an index tuple and point it at the heap tuple */ itup = index_formtuple(RelationGetDescr(index), attdata, nulls); itup->t_tid = htup->t_self; /* Hash indexes don't index nulls, see notes in hashinsert */ if (IndexTupleHasNulls(itup)) { pfree(itup); return; } hitem = _hash_formitem(itup); res = _hash_doinsert(index, hitem); if (res) pfree(res); buildstate->indtuples += 1; pfree(hitem); pfree(itup); }
/* checks the individual attributes of the tuple */ uint32 check_index_tuple_attributes(Relation rel, PageHeader header, int block, int i, char *buffer) { IndexTuple tuple; uint32 nerrs = 0; int j, off; bits8 * bitmap; BTPageOpaque opaque; ereport(DEBUG2,(errmsg("[%d:%d] checking attributes for the tuple", block, i))); /* get the index tuple and info about the page */ tuple = (IndexTuple)(buffer + header->pd_linp[i].lp_off); opaque = (BTPageOpaque)(buffer + header->pd_special); /* current attribute offset - always starts at (buffer + off) */ off = header->pd_linp[i].lp_off + IndexInfoFindDataOffset(tuple->t_info); ereport(DEBUG3,(errmsg("[%d:%d] tuple has %d attributes", block, (i+1), RelationGetNumberOfAttributes(rel)))); bitmap = (bits8*)(buffer + header->pd_linp[i].lp_off + sizeof(IndexTupleData)); /* TODO This is mostly copy'n'paste from check_heap_tuple_attributes, so maybe it could be refactored to share the code. */ /* For left-most tuples on non-leaf pages, there are no data actually (see src/backend/access/nbtree/README, last paragraph in section "Notes About Data Representation") Use P_LEFTMOST/P_ISLEAF to identify such cases (for the leftmost item only) and set len = 0. */ if (P_LEFTMOST(opaque) && (! P_ISLEAF(opaque)) && (i == 0)) { ereport(DEBUG3, (errmsg("[%d:%d] leftmost tuple on non-leaf block => no data, skipping", block, i))); return nerrs; } /* check all the index attributes */ for (j = 0; j < rel->rd_att->natts; j++) { /* default length of the attribute */ int len = rel->rd_att->attrs[j]->attlen; /* copy from src/backend/commands/analyze.c */ bool is_varlena = (!rel->rd_att->attrs[j]->attbyval && len == -1); bool is_varwidth = (!rel->rd_att->attrs[j]->attbyval && len < 0); /* thus it's "len = -2" */ /* if the attribute is marked as NULL (in the tuple header), skip to the next attribute */ if (IndexTupleHasNulls(tuple) && att_isnull(j, bitmap)) { ereport(DEBUG3, (errmsg("[%d:%d] attribute '%s' is NULL (skipping)", block, (i+1), rel->rd_att->attrs[j]->attname.data))); continue; } /* fix the alignment (see src/include/access/tupmacs.h) */ off = att_align_pointer(off, rel->rd_att->attrs[j]->attalign, rel->rd_att->attrs[j]->attlen, buffer+off); if (is_varlena) { /* other interesting macros (see postgres.h) - should do something about those ... VARATT_IS_COMPRESSED(PTR) VARATT_IS_4B_C(PTR) VARATT_IS_EXTERNAL(PTR) VARATT_IS_1B_E(PTR) VARATT_IS_SHORT(PTR) VARATT_IS_1B(PTR) VARATT_IS_EXTENDED(PTR) (!VARATT_IS_4B_U(PTR)) */ len = VARSIZE_ANY(buffer + off); if (len < 0) { ereport(WARNING, (errmsg("[%d:%d] attribute '%s' has negative length < 0 (%d)", block, (i+1), rel->rd_att->attrs[j]->attname.data, len))); ++nerrs; break; } if (VARATT_IS_COMPRESSED(buffer + off)) { /* the raw length should be less than 1G (and positive) */ if ((VARRAWSIZE_4B_C(buffer + off) < 0) || (VARRAWSIZE_4B_C(buffer + off) > 1024*1024)) { ereport(WARNING, (errmsg("[%d:%d] attribute '%s' has invalid length %d (should be between 0 and 1G)", block, (i+1), rel->rd_att->attrs[j]->attname.data, VARRAWSIZE_4B_C(buffer + off)))); ++nerrs; /* no break here, this does not break the page structure - we may check the other attributes */ } } /* FIXME Check if the varlena value may be detoasted. */ } else if (is_varwidth) { /* get the C-string length (at most to the end of tuple), +1 as it does not include '\0' at the end */ /* if the string is not properly terminated, then this returns 'remaining space + 1' so it's detected */ len = strnlen(buffer + off, header->pd_linp[i].lp_off + len + header->pd_linp[i].lp_len - off) + 1; } /* Check if the length makes sense (is not negative and does not overflow * the tuple end, stop validating the other rows (we don't know where to * continue anyway). */ if (off + len > (header->pd_linp[i].lp_off + header->pd_linp[i].lp_len)) { ereport(WARNING, (errmsg("[%d:%d] attribute '%s' (off=%d len=%d) overflows tuple end (off=%d, len=%d)", block, (i+1), rel->rd_att->attrs[j]->attname.data, off, len, header->pd_linp[i].lp_off, header->pd_linp[i].lp_len))); ++nerrs; break; } /* skip to the next attribute */ off += len; ereport(DEBUG3,(errmsg("[%d:%d] attribute '%s' len=%d", block, (i+1), rel->rd_att->attrs[j]->attname.data, len))); } ereport(DEBUG3,(errmsg("[%d:%d] last attribute ends at %d, tuple ends at %d", block, (i+1), off, header->pd_linp[i].lp_off + header->pd_linp[i].lp_len))); /* after the last attribute, the offset should be exactly the same as the end of the tuple */ if (MAXALIGN(off) != header->pd_linp[i].lp_off + header->pd_linp[i].lp_len) { ereport(WARNING, (errmsg("[%d:%d] the last attribute ends at %d but the tuple ends at %d", block, (i+1), off, header->pd_linp[i].lp_off + header->pd_linp[i].lp_len))); ++nerrs; } return nerrs; }
/* * Form a tuple for entry tree. * * If the tuple would be too big to be stored, function throws a suitable * error if errorTooBig is TRUE, or returns NULL if errorTooBig is FALSE. * * See src/backend/access/gin/README for a description of the index tuple * format that is being built here. We build on the assumption that we * are making a leaf-level key entry containing a posting list of nipd items. * If the caller is actually trying to make a posting-tree entry, non-leaf * entry, or pending-list entry, it should pass dataSize = 0 and then overwrite * the t_tid fields as necessary. In any case, 'data' can be NULL to skip * filling in the posting list; the caller is responsible for filling it * afterwards if data = NULL and nipd > 0. */ IndexTuple GinFormTuple(GinState *ginstate, OffsetNumber attnum, Datum key, GinNullCategory category, Pointer data, Size dataSize, int nipd, bool errorTooBig) { Datum datums[2]; bool isnull[2]; IndexTuple itup; uint32 newsize; /* Build the basic tuple: optional column number, plus key datum */ if (ginstate->oneCol) { datums[0] = key; isnull[0] = (category != GIN_CAT_NORM_KEY); } else { datums[0] = UInt16GetDatum(attnum); isnull[0] = false; datums[1] = key; isnull[1] = (category != GIN_CAT_NORM_KEY); } itup = index_form_tuple(ginstate->tupdesc[attnum - 1], datums, isnull); /* * Determine and store offset to the posting list, making sure there is * room for the category byte if needed. * * Note: because index_form_tuple MAXALIGNs the tuple size, there may well * be some wasted pad space. Is it worth recomputing the data length to * prevent that? That would also allow us to Assert that the real data * doesn't overlap the GinNullCategory byte, which this code currently * takes on faith. */ newsize = IndexTupleSize(itup); if (IndexTupleHasNulls(itup)) { uint32 minsize; Assert(category != GIN_CAT_NORM_KEY); minsize = GinCategoryOffset(itup, ginstate) + sizeof(GinNullCategory); newsize = Max(newsize, minsize); } newsize = SHORTALIGN(newsize); GinSetPostingOffset(itup, newsize); GinSetNPosting(itup, nipd); /* * Add space needed for posting list, if any. Then check that the tuple * won't be too big to store. */ newsize += dataSize; newsize = MAXALIGN(newsize); if (newsize > GinMaxItemSize) { if (errorTooBig) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("index row size %zu exceeds maximum %zu for index \"%s\"", (Size) newsize, (Size) GinMaxItemSize, RelationGetRelationName(ginstate->index)))); pfree(itup); return NULL; } /* * Resize tuple if needed */ if (newsize != IndexTupleSize(itup)) { itup = repalloc(itup, newsize); /* * PostgreSQL 9.3 and earlier did not clear this new space, so we * might find uninitialized padding when reading tuples from disk. */ memset((char *) itup + IndexTupleSize(itup), 0, newsize - IndexTupleSize(itup)); /* set new size in tuple header */ itup->t_info &= ~INDEX_SIZE_MASK; itup->t_info |= newsize; } /* * Copy in the posting list, if provided */ if (data) { char *ptr = GinGetPosting(itup); memcpy(ptr, data, dataSize); } /* * Insert category byte, if needed */ if (category != GIN_CAT_NORM_KEY) { Assert(IndexTupleHasNulls(itup)); GinSetNullCategory(itup, ginstate, category); } return itup; }
/* ---------------- * nocache_index_getattr * * This gets called from index_getattr() macro, and only in cases * where we can't use cacheoffset and the value is not null. * * This caches attribute offsets in the attribute descriptor. * * An alternative way to speed things up would be to cache offsets * with the tuple, but that seems more difficult unless you take * the storage hit of actually putting those offsets into the * tuple you send to disk. Yuck. * * This scheme will be slightly slower than that, but should * perform well for queries which hit large #'s of tuples. After * you cache the offsets once, examining all the other tuples using * the same attribute descriptor will go much quicker. -cim 5/4/91 * ---------------- */ Datum nocache_index_getattr(IndexTuple tup, int attnum, TupleDesc tupleDesc) { Form_pg_attribute *att = tupleDesc->attrs; char *tp; /* ptr to data part of tuple */ bits8 *bp = NULL; /* ptr to null bitmap in tuple */ bool slow = false; /* do we have to walk attrs? */ int data_off; /* tuple data offset */ int off; /* current offset within data */ /* ---------------- * Three cases: * * 1: No nulls and no variable-width attributes. * 2: Has a null or a var-width AFTER att. * 3: Has nulls or var-widths BEFORE att. * ---------------- */ data_off = IndexInfoFindDataOffset(tup->t_info); attnum--; if (IndexTupleHasNulls(tup)) { /* * there's a null somewhere in the tuple * * check to see if desired att is null */ /* XXX "knows" t_bits are just after fixed tuple header! */ bp = (bits8 *) ((char *) tup + sizeof(IndexTupleData)); /* * Now check to see if any preceding bits are null... */ { int byte = attnum >> 3; int finalbit = attnum & 0x07; /* check for nulls "before" final bit of last byte */ if ((~bp[byte]) & ((1 << finalbit) - 1)) slow = true; else { /* check for nulls in any "earlier" bytes */ int i; for (i = 0; i < byte; i++) { if (bp[i] != 0xFF) { slow = true; break; } } } } } tp = (char *) tup + data_off; if (!slow) { /* * If we get here, there are no nulls up to and including the target * attribute. If we have a cached offset, we can use it. */ if (att[attnum]->attcacheoff >= 0) { return fetchatt(att[attnum], tp + att[attnum]->attcacheoff); } /* * Otherwise, check for non-fixed-length attrs up to and including * target. If there aren't any, it's safe to cheaply initialize the * cached offsets for these attrs. */ if (IndexTupleHasVarwidths(tup)) { int j; for (j = 0; j <= attnum; j++) { if (att[j]->attlen <= 0) { slow = true; break; } } } } if (!slow) { int natts = tupleDesc->natts; int j = 1; /* * If we get here, we have a tuple with no nulls or var-widths up to * and including the target attribute, so we can use the cached offset * ... only we don't have it yet, or we'd not have got here. Since * it's cheap to compute offsets for fixed-width columns, we take the * opportunity to initialize the cached offsets for *all* the leading * fixed-width columns, in hope of avoiding future visits to this * routine. */ att[0]->attcacheoff = 0; /* we might have set some offsets in the slow path previously */ while (j < natts && att[j]->attcacheoff > 0) j++; off = att[j - 1]->attcacheoff + att[j - 1]->attlen; for (; j < natts; j++) { if (att[j]->attlen <= 0) break; off = att_align_nominal(off, att[j]->attalign); att[j]->attcacheoff = off; off += att[j]->attlen; } Assert(j > attnum); off = att[attnum]->attcacheoff; } else { bool usecache = true; int i; /* * Now we know that we have to walk the tuple CAREFULLY. But we still * might be able to cache some offsets for next time. * * Note - This loop is a little tricky. For each non-null attribute, * we have to first account for alignment padding before the attr, * then advance over the attr based on its length. Nulls have no * storage and no alignment padding either. We can use/set * attcacheoff until we reach either a null or a var-width attribute. */ off = 0; for (i = 0;; i++) /* loop exit is at "break" */ { if (IndexTupleHasNulls(tup) && att_isnull(i, bp)) { usecache = false; continue; /* this cannot be the target att */ } /* If we know the next offset, we can skip the rest */ if (usecache && att[i]->attcacheoff >= 0) off = att[i]->attcacheoff; else if (att[i]->attlen == -1) { /* * We can only cache the offset for a varlena attribute if the * offset is already suitably aligned, so that there would be * no pad bytes in any case: then the offset will be valid for * either an aligned or unaligned value. */ if (usecache && off == att_align_nominal(off, att[i]->attalign)) att[i]->attcacheoff = off; else { off = att_align_pointer(off, att[i]->attalign, -1, tp + off); usecache = false; } } else { /* not varlena, so safe to use att_align_nominal */ off = att_align_nominal(off, att[i]->attalign); if (usecache) att[i]->attcacheoff = off; } if (i == attnum) break; off = att_addlength_pointer(off, att[i]->attlen, tp + off); if (usecache && att[i]->attlen <= 0) usecache = false; } } return fetchatt(att[attnum], tp + off); }
/* ---------------- * nocache_index_getattr * * This gets called from index_getattr() macro, and only in cases * where we can't use cacheoffset and the value is not null. * * This caches attribute offsets in the attribute descriptor. * * An alternate way to speed things up would be to cache offsets * with the tuple, but that seems more difficult unless you take * the storage hit of actually putting those offsets into the * tuple you send to disk. Yuck. * * This scheme will be slightly slower than that, but should * perform well for queries which hit large #'s of tuples. After * you cache the offsets once, examining all the other tuples using * the same attribute descriptor will go much quicker. -cim 5/4/91 * ---------------- */ Datum nocache_index_getattr(IndexTuple tup, int attnum, TupleDesc tupleDesc, bool *isnull) { Form_pg_attribute *att = tupleDesc->attrs; char *tp; /* ptr to att in tuple */ bits8 *bp = NULL; /* ptr to null bitmask in tuple */ bool slow = false; /* do we have to walk nulls? */ int data_off; /* tuple data offset */ (void) isnull; /* not used */ /* * sanity checks */ /* ---------------- * Three cases: * * 1: No nulls and no variable-width attributes. * 2: Has a null or a var-width AFTER att. * 3: Has nulls or var-widths BEFORE att. * ---------------- */ #ifdef IN_MACRO /* This is handled in the macro */ Assert(PointerIsValid(isnull)); Assert(attnum > 0); *isnull = false; #endif data_off = IndexInfoFindDataOffset(tup->t_info); attnum--; if (!IndexTupleHasNulls(tup)) { #ifdef IN_MACRO /* This is handled in the macro */ if (att[attnum]->attcacheoff != -1) { return fetchatt(att[attnum], (char *) tup + data_off + att[attnum]->attcacheoff); } #endif } else { /* * there's a null somewhere in the tuple * * check to see if desired att is null */ /* XXX "knows" t_bits are just after fixed tuple header! */ bp = (bits8 *) ((char *) tup + sizeof(IndexTupleData)); #ifdef IN_MACRO /* This is handled in the macro */ if (att_isnull(attnum, bp)) { *isnull = true; return (Datum) NULL; } #endif /* * Now check to see if any preceding bits are null... */ { int byte = attnum >> 3; int finalbit = attnum & 0x07; /* check for nulls "before" final bit of last byte */ if ((~bp[byte]) & ((1 << finalbit) - 1)) slow = true; else { /* check for nulls in any "earlier" bytes */ int i; for (i = 0; i < byte; i++) { if (bp[i] != 0xFF) { slow = true; break; } } } } } tp = (char *) tup + data_off; /* * now check for any non-fixed length attrs before our attribute */ if (!slow) { if (att[attnum]->attcacheoff != -1) { return fetchatt(att[attnum], tp + att[attnum]->attcacheoff); } else if (IndexTupleHasVarwidths(tup)) { int j; for (j = 0; j < attnum; j++) { if (att[j]->attlen <= 0) { slow = true; break; } } } } /* * If slow is false, and we got here, we know that we have a tuple with no * nulls or var-widths before the target attribute. If possible, we also * want to initialize the remainder of the attribute cached offset values. */ if (!slow) { int j = 1; long off; /* * need to set cache for some atts */ att[0]->attcacheoff = 0; while (j < attnum && att[j]->attcacheoff > 0) j++; off = att[j - 1]->attcacheoff + att[j - 1]->attlen; for (; j <= attnum; j++) { off = att_align(off, att[j]->attalign); att[j]->attcacheoff = off; off += att[j]->attlen; } return fetchatt(att[attnum], tp + att[attnum]->attcacheoff); } else { bool usecache = true; int off = 0; int i; /* * Now we know that we have to walk the tuple CAREFULLY. */ for (i = 0; i < attnum; i++) { if (IndexTupleHasNulls(tup)) { if (att_isnull(i, bp)) { usecache = false; continue; } } /* If we know the next offset, we can skip the rest */ if (usecache && att[i]->attcacheoff != -1) off = att[i]->attcacheoff; else { off = att_align(off, att[i]->attalign); if (usecache) att[i]->attcacheoff = off; } off = att_addlength(off, att[i]->attlen, tp + off); if (usecache && att[i]->attlen <= 0) usecache = false; } off = att_align(off, att[attnum]->attalign); return fetchatt(att[attnum], tp + off); } }
Datum bt_page_items(PG_FUNCTION_ARGS) { text *relname = PG_GETARG_TEXT_P(0); uint32 blkno = PG_GETARG_UINT32(1); Datum result; char *values[6]; HeapTuple tuple; FuncCallContext *fctx; MemoryContext mctx; struct user_args *uargs; if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), (errmsg("must be superuser to use pageinspect functions")))); if (SRF_IS_FIRSTCALL()) { RangeVar *relrv; Relation rel; Buffer buffer; BTPageOpaque opaque; TupleDesc tupleDesc; fctx = SRF_FIRSTCALL_INIT(); relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); rel = relation_openrv(relrv, AccessShareLock); if (!IS_INDEX(rel) || !IS_BTREE(rel)) elog(ERROR, "relation \"%s\" is not a btree index", RelationGetRelationName(rel)); /* * Reject attempts to read non-local temporary relations; we would be * likely to get wrong data since we have no visibility into the * owning session's local buffers. */ if (RELATION_IS_OTHER_TEMP(rel)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot access temporary tables of other sessions"))); if (blkno == 0) elog(ERROR, "block 0 is a meta page"); CHECK_RELATION_BLOCK_RANGE(rel, blkno); buffer = ReadBuffer(rel, blkno); LockBuffer(buffer, BUFFER_LOCK_SHARE); /* * We copy the page into local storage to avoid holding pin on the * buffer longer than we must, and possibly failing to release it at * all if the calling query doesn't fetch all rows. */ mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx); uargs = palloc(sizeof(struct user_args)); uargs->page = palloc(BLCKSZ); memcpy(uargs->page, BufferGetPage(buffer), BLCKSZ); UnlockReleaseBuffer(buffer); relation_close(rel, AccessShareLock); uargs->offset = FirstOffsetNumber; opaque = (BTPageOpaque) PageGetSpecialPointer(uargs->page); if (P_ISDELETED(opaque)) elog(NOTICE, "page is deleted"); fctx->max_calls = PageGetMaxOffsetNumber(uargs->page); /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); fctx->attinmeta = TupleDescGetAttInMetadata(tupleDesc); fctx->user_fctx = uargs; MemoryContextSwitchTo(mctx); } fctx = SRF_PERCALL_SETUP(); uargs = fctx->user_fctx; if (fctx->call_cntr < fctx->max_calls) { ItemId id; IndexTuple itup; int j; int off; int dlen; char *dump; char *ptr; id = PageGetItemId(uargs->page, uargs->offset); if (!ItemIdIsValid(id)) elog(ERROR, "invalid ItemId"); itup = (IndexTuple) PageGetItem(uargs->page, id); j = 0; values[j++] = psprintf("%d", uargs->offset); values[j++] = psprintf("(%u,%u)", BlockIdGetBlockNumber(&(itup->t_tid.ip_blkid)), itup->t_tid.ip_posid); values[j++] = psprintf("%d", (int) IndexTupleSize(itup)); values[j++] = psprintf("%c", IndexTupleHasNulls(itup) ? 't' : 'f'); values[j++] = psprintf("%c", IndexTupleHasVarwidths(itup) ? 't' : 'f'); ptr = (char *) itup + IndexInfoFindDataOffset(itup->t_info); dlen = IndexTupleSize(itup) - IndexInfoFindDataOffset(itup->t_info); dump = palloc0(dlen * 3 + 1); values[j] = dump; for (off = 0; off < dlen; off++) { if (off > 0) *dump++ = ' '; sprintf(dump, "%02x", *(ptr + off) & 0xff); dump += 2; } tuple = BuildTupleFromCStrings(fctx->attinmeta, values); result = HeapTupleGetDatum(tuple); uargs->offset = uargs->offset + 1; SRF_RETURN_NEXT(fctx, result); } else { pfree(uargs->page); pfree(uargs); SRF_RETURN_DONE(fctx); } }
/* * Convert an index tuple into Datum/isnull arrays. * * The caller must allocate sufficient storage for the output arrays. * (INDEX_MAX_KEYS entries should be enough.) * * This is nearly the same as heap_deform_tuple(), but for IndexTuples. * One difference is that the tuple should never have any missing columns. */ void index_deform_tuple(IndexTuple tup, TupleDesc tupleDescriptor, Datum *values, bool *isnull) { int hasnulls = IndexTupleHasNulls(tup); int natts = tupleDescriptor->natts; /* number of atts to extract */ int attnum; char *tp; /* ptr to tuple data */ int off; /* offset in tuple data */ bits8 *bp; /* ptr to null bitmap in tuple */ bool slow = false; /* can we use/set attcacheoff? */ /* Assert to protect callers who allocate fixed-size arrays */ Assert(natts <= INDEX_MAX_KEYS); /* XXX "knows" t_bits are just after fixed tuple header! */ bp = (bits8 *) ((char *) tup + sizeof(IndexTupleData)); tp = (char *) tup + IndexInfoFindDataOffset(tup->t_info); off = 0; for (attnum = 0; attnum < natts; attnum++) { Form_pg_attribute thisatt = TupleDescAttr(tupleDescriptor, attnum); if (hasnulls && att_isnull(attnum, bp)) { values[attnum] = (Datum) 0; isnull[attnum] = true; slow = true; /* can't use attcacheoff anymore */ continue; } isnull[attnum] = false; if (!slow && thisatt->attcacheoff >= 0) off = thisatt->attcacheoff; else if (thisatt->attlen == -1) { /* * We can only cache the offset for a varlena attribute if the * offset is already suitably aligned, so that there would be no * pad bytes in any case: then the offset will be valid for either * an aligned or unaligned value. */ if (!slow && off == att_align_nominal(off, thisatt->attalign)) thisatt->attcacheoff = off; else { off = att_align_pointer(off, thisatt->attalign, -1, tp + off); slow = true; } } else { /* not varlena, so safe to use att_align_nominal */ off = att_align_nominal(off, thisatt->attalign); if (!slow) thisatt->attcacheoff = off; } values[attnum] = fetchatt(thisatt, tp + off); off = att_addlength_pointer(off, thisatt->attlen, tp + off); if (thisatt->attlen <= 0) slow = true; /* can't use attcacheoff anymore */ } }
Datum bt_page_items(PG_FUNCTION_ARGS) { text *relname = PG_GETARG_TEXT_P(0); uint32 blkno = PG_GETARG_UINT32(1); RangeVar *relrv; Datum result; char *values[BTPAGEITEMS_NCOLUMNS]; BTPageOpaque opaque; HeapTuple tuple; ItemId id; FuncCallContext *fctx; MemoryContext mctx; struct user_args *uargs = NULL; if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), (errmsg("must be superuser to use pgstattuple functions")))); if (blkno == 0) elog(ERROR, "Block 0 is a meta page."); if (SRF_IS_FIRSTCALL()) { fctx = SRF_FIRSTCALL_INIT(); mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx); uargs = palloc(sizeof(struct user_args)); uargs->tupd = RelationNameGetTupleDesc(BTPAGEITEMS_TYPE); uargs->offset = FirstOffsetNumber; relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); uargs->rel = relation_openrv(relrv, AccessShareLock); CHECK_RELATION_BLOCK_RANGE(uargs->rel, blkno); uargs->buffer = ReadBuffer(uargs->rel, blkno); if (!IS_INDEX(uargs->rel) || !IS_BTREE(uargs->rel)) elog(ERROR, "bt_page_items() can be used only on b-tree index."); uargs->page = BufferGetPage(uargs->buffer); opaque = (BTPageOpaque) PageGetSpecialPointer(uargs->page); if (P_ISDELETED(opaque)) elog(NOTICE, "bt_page_items(): this page is deleted."); fctx->max_calls = PageGetMaxOffsetNumber(uargs->page); fctx->user_fctx = uargs; MemoryContextSwitchTo(mctx); } fctx = SRF_PERCALL_SETUP(); uargs = fctx->user_fctx; if (fctx->call_cntr < fctx->max_calls) { IndexTuple itup; id = PageGetItemId(uargs->page, uargs->offset); if (!ItemIdIsValid(id)) elog(ERROR, "Invalid ItemId."); itup = (IndexTuple) PageGetItem(uargs->page, id); { int j = 0; BlockNumber blkno = BlockIdGetBlockNumber(&(itup->t_tid.ip_blkid)); values[j] = palloc(32); snprintf(values[j++], 32, "%d", uargs->offset); values[j] = palloc(32); snprintf(values[j++], 32, "(%u,%u)", blkno, itup->t_tid.ip_posid); values[j] = palloc(32); snprintf(values[j++], 32, "%d", (int) IndexTupleSize(itup)); values[j] = palloc(32); snprintf(values[j++], 32, "%c", IndexTupleHasNulls(itup) ? 't' : 'f'); values[j] = palloc(32); snprintf(values[j++], 32, "%c", IndexTupleHasVarwidths(itup) ? 't' : 'f'); { int off; char *dump; char *ptr = (char *) itup + IndexInfoFindDataOffset(itup->t_info); dump = palloc(IndexTupleSize(itup) * 3); memset(dump, 0, IndexTupleSize(itup) * 3); for (off = 0; off < IndexTupleSize(itup) - IndexInfoFindDataOffset(itup->t_info); off++) { if (dump[0] == '\0') sprintf(dump, "%02x", *(ptr + off) & 0xff); else { char buf[4]; sprintf(buf, " %02x", *(ptr + off) & 0xff); strcat(dump, buf); } } values[j] = dump; } tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(uargs->tupd), values); result = TupleGetDatum(TupleDescGetSlot(uargs->tupd), tuple); } uargs->offset = uargs->offset + 1; SRF_RETURN_NEXT(fctx, result); } else { ReleaseBuffer(uargs->buffer); relation_close(uargs->rel, AccessShareLock); SRF_RETURN_DONE(fctx); } }
SGITITERATE(innerTuple, i, node) { if (IndexTupleHasNulls(node)) elog(ERROR, "some but not all node labels are null in SPGiST inner tuple"); nodeLabels[i] = SGNTDATUM(node, state); }