/* * Determine size of a large object * * NOTE: LOs can contain gaps, just like Unix files. We actually return * the offset of the last byte + 1. */ static uint32 inv_getsize(LargeObjectDesc *obj_desc) { uint32 lastbyte = 0; ScanKeyData skey[1]; SysScanDesc sd; HeapTuple tuple; Assert(PointerIsValid(obj_desc)); open_lo_relation(); ScanKeyInit(&skey[0], Anum_pg_largeobject_loid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(obj_desc->id)); sd = systable_beginscan_ordered(lo_heap_r, lo_index_r, obj_desc->snapshot, 1, skey); /* * Because the pg_largeobject index is on both loid and pageno, but we * constrain only loid, a backwards scan should visit all pages of the * large object in reverse pageno order. So, it's sufficient to examine * the first valid tuple (== last valid page). */ tuple = systable_getnext_ordered(sd, BackwardScanDirection); if (HeapTupleIsValid(tuple)) { Form_pg_largeobject data; bytea *datafield; bool pfreeit; if (HeapTupleHasNulls(tuple)) /* paranoia */ elog(ERROR, "null field found in pg_largeobject"); data = (Form_pg_largeobject) GETSTRUCT(tuple); datafield = &(data->data); /* see note at top of file */ pfreeit = false; if (VARATT_IS_EXTENDED(datafield)) { datafield = (bytea *) heap_tuple_untoast_attr((struct varlena *) datafield); pfreeit = true; } lastbyte = data->pageno * LOBLKSIZE + getbytealen(datafield); if (pfreeit) pfree(datafield); } systable_endscan_ordered(sd); return lastbyte; }
/* ---------- * toast_delete_datum - * * Delete a single external stored value. * ---------- */ static void toast_delete_datum(Relation rel, Datum value) { struct varlena *attr = (struct varlena *) DatumGetPointer(value); struct varatt_external toast_pointer; Relation toastrel; Relation toastidx; ScanKeyData toastkey; SysScanDesc toastscan; HeapTuple toasttup; if (!VARATT_IS_EXTERNAL(attr)) return; /* Must copy to access aligned fields */ VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); /* * Open the toast relation and its index */ toastrel = heap_open(toast_pointer.va_toastrelid, RowExclusiveLock); toastidx = index_open(toastrel->rd_rel->reltoastidxid, RowExclusiveLock); /* * Setup a scan key to find chunks with matching va_valueid */ ScanKeyInit(&toastkey, (AttrNumber) 1, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(toast_pointer.va_valueid)); /* * Find all the chunks. (We don't actually care whether we see them in * sequence or not, but since we've already locked the index we might as * well use systable_beginscan_ordered.) */ toastscan = systable_beginscan_ordered(toastrel, toastidx, SnapshotToast, 1, &toastkey); while ((toasttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL) { /* * Have a chunk, delete it */ simple_heap_delete(toastrel, &toasttup->t_self); } /* * End scan and close relations */ systable_endscan_ordered(toastscan); index_close(toastidx, RowExclusiveLock); heap_close(toastrel, RowExclusiveLock); }
/* * Determine size of a large object * * NOTE: LOs can contain gaps, just like Unix files. We actually return * the offset of the last byte + 1. */ static uint32 inv_getsize(struct lobj *obj_desc) { uint32 lastbyte = 0; struct scankey skey[1]; struct sys_scan* sd; struct heap_tuple* tuple; ASSERT(PTR_VALID(obj_desc)); open_lo_relation(); scankey_init(&skey[0], Anum_pg_largeobject_loid, BT_EQ_STRAT_NR, F_OIDEQ, OID_TO_D(obj_desc->id)); sd = systable_beginscan_ordered(lo_heap_r, lo_index_r, obj_desc->snapshot, 1, skey); /* * Because the pg_largeobject index is on both loid and pageno, but we * constrain only loid, a backwards scan should visit all pages of the * large object in reverse pageno order. So, it's sufficient to examine * the first valid tuple (== last valid page). */ tuple = systable_getnext_ordered(sd, BACKWARD_SCANDIR); if (HT_VALID(tuple)) { Form_pg_largeobject data; bytea* datafield; bool pfreeit; if (HT_HAS_NULLS(tuple)) /* paranoia */ elog(ERROR, "null field found in pg_largeobject"); data = (Form_pg_largeobject) GET_STRUCT(tuple); datafield = &(data->data); /* see note at top of file */ pfreeit = false; if (VLA_EXTENDED(datafield)) { datafield = (bytea *) heap_tuple_untoast_attr((struct vla *) datafield); pfreeit = true; } lastbyte = data->pageno * LO_BLK_SIZE + getbytealen(datafield); if (pfreeit) pfree(datafield); } systable_endscan_ordered(sd); return lastbyte; }
/* * enum_endpoint: common code for enum_first/enum_last */ static Oid enum_endpoint(Oid enumtypoid, ScanDirection direction) { Relation enum_rel; Relation enum_idx; SysScanDesc enum_scan; HeapTuple enum_tuple; ScanKeyData skey; Oid minmax; /* * Find the first/last enum member using pg_enum_typid_sortorder_index. * Note we must not use the syscache. See comments for RenumberEnumType * in catalog/pg_enum.c for more info. */ ScanKeyInit(&skey, Anum_pg_enum_enumtypid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(enumtypoid)); enum_rel = heap_open(EnumRelationId, AccessShareLock); enum_idx = index_open(EnumTypIdSortOrderIndexId, AccessShareLock); enum_scan = systable_beginscan_ordered(enum_rel, enum_idx, NULL, 1, &skey); enum_tuple = systable_getnext_ordered(enum_scan, direction); if (HeapTupleIsValid(enum_tuple)) { /* check it's safe to use in SQL */ check_safe_enum_use(enum_tuple); minmax = HeapTupleGetOid(enum_tuple); } else { /* should only happen with an empty enum */ minmax = InvalidOid; } systable_endscan_ordered(enum_scan); index_close(enum_idx, AccessShareLock); heap_close(enum_rel, AccessShareLock); return minmax; }
/* * enum_endpoint: common code for enum_first/enum_last */ static Oid enum_endpoint(Oid enumtypoid, ScanDirection direction) { Relation enum_rel; Relation enum_idx; SysScanDesc enum_scan; HeapTuple enum_tuple; ScanKeyData skey; Oid minmax; /* * Find the first/last enum member using pg_enum_typid_sortorder_index. * Note we must not use the syscache, and must use an MVCC snapshot here. * See comments for RenumberEnumType in catalog/pg_enum.c for more info. */ ScanKeyInit(&skey, Anum_pg_enum_enumtypid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(enumtypoid)); enum_rel = heap_open(EnumRelationId, AccessShareLock); enum_idx = index_open(EnumTypIdSortOrderIndexId, AccessShareLock); enum_scan = systable_beginscan_ordered(enum_rel, enum_idx, GetTransactionSnapshot(), 1, &skey); enum_tuple = systable_getnext_ordered(enum_scan, direction); if (HeapTupleIsValid(enum_tuple)) minmax = HeapTupleGetOid(enum_tuple); else minmax = InvalidOid; systable_endscan_ordered(enum_scan); index_close(enum_idx, AccessShareLock); heap_close(enum_rel, AccessShareLock); return minmax; }
static ArrayType * enum_range_internal(Oid enumtypoid, Oid lower, Oid upper) { ArrayType *result; Relation enum_rel; Relation enum_idx; SysScanDesc enum_scan; HeapTuple enum_tuple; ScanKeyData skey; Datum *elems; int max, cnt; bool left_found; /* * Scan the enum members in order using pg_enum_typid_sortorder_index. * Note we must not use the syscache. See comments for RenumberEnumType * in catalog/pg_enum.c for more info. */ ScanKeyInit(&skey, Anum_pg_enum_enumtypid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(enumtypoid)); enum_rel = heap_open(EnumRelationId, AccessShareLock); enum_idx = index_open(EnumTypIdSortOrderIndexId, AccessShareLock); enum_scan = systable_beginscan_ordered(enum_rel, enum_idx, NULL, 1, &skey); max = 64; elems = (Datum *) palloc(max * sizeof(Datum)); cnt = 0; left_found = !OidIsValid(lower); while (HeapTupleIsValid(enum_tuple = systable_getnext_ordered(enum_scan, ForwardScanDirection))) { Oid enum_oid = HeapTupleGetOid(enum_tuple); if (!left_found && lower == enum_oid) left_found = true; if (left_found) { /* check it's safe to use in SQL */ check_safe_enum_use(enum_tuple); if (cnt >= max) { max *= 2; elems = (Datum *) repalloc(elems, max * sizeof(Datum)); } elems[cnt++] = ObjectIdGetDatum(enum_oid); } if (OidIsValid(upper) && upper == enum_oid) break; } systable_endscan_ordered(enum_scan); index_close(enum_idx, AccessShareLock); heap_close(enum_rel, AccessShareLock); /* and build the result array */ /* note this hardwires some details about the representation of Oid */ result = construct_array(elems, cnt, enumtypoid, sizeof(Oid), true, 'i'); pfree(elems); return result; }
void inv_truncate(LargeObjectDesc *obj_desc, int len) { int32 pageno = (int32) (len / LOBLKSIZE); int off; ScanKeyData skey[2]; SysScanDesc sd; HeapTuple oldtuple; Form_pg_largeobject olddata; struct { bytea hdr; char data[LOBLKSIZE]; /* make struct big enough */ int32 align_it; /* ensure struct is aligned well enough */ } workbuf; char *workb = VARDATA(&workbuf.hdr); HeapTuple newtup; Datum values[Natts_pg_largeobject]; bool nulls[Natts_pg_largeobject]; bool replace[Natts_pg_largeobject]; CatalogIndexState indstate; Assert(PointerIsValid(obj_desc)); /* enforce writability because snapshot is probably wrong otherwise */ if ((obj_desc->flags & IFS_WRLOCK) == 0) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("large object %u was not opened for writing", obj_desc->id))); /* check existence of the target largeobject */ if (!LargeObjectExists(obj_desc->id)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("large object %u was already dropped", obj_desc->id))); open_lo_relation(); indstate = CatalogOpenIndexes(lo_heap_r); /* * Set up to find all pages with desired loid and pageno >= target */ ScanKeyInit(&skey[0], Anum_pg_largeobject_loid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(obj_desc->id)); ScanKeyInit(&skey[1], Anum_pg_largeobject_pageno, BTGreaterEqualStrategyNumber, F_INT4GE, Int32GetDatum(pageno)); sd = systable_beginscan_ordered(lo_heap_r, lo_index_r, obj_desc->snapshot, 2, skey); /* * If possible, get the page the truncation point is in. The truncation * point may be beyond the end of the LO or in a hole. */ olddata = NULL; if ((oldtuple = systable_getnext_ordered(sd, ForwardScanDirection)) != NULL) { if (HeapTupleHasNulls(oldtuple)) /* paranoia */ elog(ERROR, "null field found in pg_largeobject"); olddata = (Form_pg_largeobject) GETSTRUCT(oldtuple); Assert(olddata->pageno >= pageno); } /* * If we found the page of the truncation point we need to truncate the * data in it. Otherwise if we're in a hole, we need to create a page to * mark the end of data. */ if (olddata != NULL && olddata->pageno == pageno) { /* First, load old data into workbuf */ bytea *datafield = &(olddata->data); /* see note at top of * file */ bool pfreeit = false; int pagelen; if (VARATT_IS_EXTENDED(datafield)) { datafield = (bytea *) heap_tuple_untoast_attr((struct varlena *) datafield); pfreeit = true; } pagelen = getbytealen(datafield); Assert(pagelen <= LOBLKSIZE); memcpy(workb, VARDATA(datafield), pagelen); if (pfreeit) pfree(datafield); /* * Fill any hole */ off = len % LOBLKSIZE; if (off > pagelen) MemSet(workb + pagelen, 0, off - pagelen); /* compute length of new page */ SET_VARSIZE(&workbuf.hdr, off + VARHDRSZ); /* * Form and insert updated tuple */ memset(values, 0, sizeof(values)); memset(nulls, false, sizeof(nulls)); memset(replace, false, sizeof(replace)); values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf); replace[Anum_pg_largeobject_data - 1] = true; newtup = heap_modify_tuple(oldtuple, RelationGetDescr(lo_heap_r), values, nulls, replace); simple_heap_update(lo_heap_r, &newtup->t_self, newtup); CatalogIndexInsert(indstate, newtup); heap_freetuple(newtup); } else { /* * If the first page we found was after the truncation point, we're in * a hole that we'll fill, but we need to delete the later page * because the loop below won't visit it again. */ if (olddata != NULL) { Assert(olddata->pageno > pageno); simple_heap_delete(lo_heap_r, &oldtuple->t_self); } /* * Write a brand new page. * * Fill the hole up to the truncation point */ off = len % LOBLKSIZE; if (off > 0) MemSet(workb, 0, off); /* compute length of new page */ SET_VARSIZE(&workbuf.hdr, off + VARHDRSZ); /* * Form and insert new tuple */ memset(values, 0, sizeof(values)); memset(nulls, false, sizeof(nulls)); values[Anum_pg_largeobject_loid - 1] = ObjectIdGetDatum(obj_desc->id); values[Anum_pg_largeobject_pageno - 1] = Int32GetDatum(pageno); values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf); newtup = heap_form_tuple(lo_heap_r->rd_att, values, nulls); simple_heap_insert(lo_heap_r, newtup); CatalogIndexInsert(indstate, newtup); heap_freetuple(newtup); } /* * Delete any pages after the truncation point. If the initial search * didn't find a page, then of course there's nothing more to do. */ if (olddata != NULL) { while ((oldtuple = systable_getnext_ordered(sd, ForwardScanDirection)) != NULL) { simple_heap_delete(lo_heap_r, &oldtuple->t_self); } } systable_endscan_ordered(sd); CatalogCloseIndexes(indstate); /* * Advance command counter so that tuple updates will be seen by later * large-object operations in this transaction. */ CommandCounterIncrement(); }
int inv_write(LargeObjectDesc *obj_desc, const char *buf, int nbytes) { int nwritten = 0; int n; int off; int len; int32 pageno = (int32) (obj_desc->offset / LOBLKSIZE); ScanKeyData skey[2]; SysScanDesc sd; HeapTuple oldtuple; Form_pg_largeobject olddata; bool neednextpage; bytea *datafield; bool pfreeit; struct { bytea hdr; char data[LOBLKSIZE]; /* make struct big enough */ int32 align_it; /* ensure struct is aligned well enough */ } workbuf; char *workb = VARDATA(&workbuf.hdr); HeapTuple newtup; Datum values[Natts_pg_largeobject]; bool nulls[Natts_pg_largeobject]; bool replace[Natts_pg_largeobject]; CatalogIndexState indstate; Assert(PointerIsValid(obj_desc)); Assert(buf != NULL); /* enforce writability because snapshot is probably wrong otherwise */ if ((obj_desc->flags & IFS_WRLOCK) == 0) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("large object %u was not opened for writing", obj_desc->id))); /* check existence of the target largeobject */ if (!LargeObjectExists(obj_desc->id)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("large object %u was already dropped", obj_desc->id))); if (nbytes <= 0) return 0; open_lo_relation(); indstate = CatalogOpenIndexes(lo_heap_r); ScanKeyInit(&skey[0], Anum_pg_largeobject_loid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(obj_desc->id)); ScanKeyInit(&skey[1], Anum_pg_largeobject_pageno, BTGreaterEqualStrategyNumber, F_INT4GE, Int32GetDatum(pageno)); sd = systable_beginscan_ordered(lo_heap_r, lo_index_r, obj_desc->snapshot, 2, skey); oldtuple = NULL; olddata = NULL; neednextpage = true; while (nwritten < nbytes) { /* * If possible, get next pre-existing page of the LO. We expect the * indexscan will deliver these in order --- but there may be holes. */ if (neednextpage) { if ((oldtuple = systable_getnext_ordered(sd, ForwardScanDirection)) != NULL) { if (HeapTupleHasNulls(oldtuple)) /* paranoia */ elog(ERROR, "null field found in pg_largeobject"); olddata = (Form_pg_largeobject) GETSTRUCT(oldtuple); Assert(olddata->pageno >= pageno); } neednextpage = false; } /* * If we have a pre-existing page, see if it is the page we want to * write, or a later one. */ if (olddata != NULL && olddata->pageno == pageno) { /* * Update an existing page with fresh data. * * First, load old data into workbuf */ datafield = &(olddata->data); /* see note at top of file */ pfreeit = false; if (VARATT_IS_EXTENDED(datafield)) { datafield = (bytea *) heap_tuple_untoast_attr((struct varlena *) datafield); pfreeit = true; } len = getbytealen(datafield); Assert(len <= LOBLKSIZE); memcpy(workb, VARDATA(datafield), len); if (pfreeit) pfree(datafield); /* * Fill any hole */ off = (int) (obj_desc->offset % LOBLKSIZE); if (off > len) MemSet(workb + len, 0, off - len); /* * Insert appropriate portion of new data */ n = LOBLKSIZE - off; n = (n <= (nbytes - nwritten)) ? n : (nbytes - nwritten); memcpy(workb + off, buf + nwritten, n); nwritten += n; obj_desc->offset += n; off += n; /* compute valid length of new page */ len = (len >= off) ? len : off; SET_VARSIZE(&workbuf.hdr, len + VARHDRSZ); /* * Form and insert updated tuple */ memset(values, 0, sizeof(values)); memset(nulls, false, sizeof(nulls)); memset(replace, false, sizeof(replace)); values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf); replace[Anum_pg_largeobject_data - 1] = true; newtup = heap_modify_tuple(oldtuple, RelationGetDescr(lo_heap_r), values, nulls, replace); simple_heap_update(lo_heap_r, &newtup->t_self, newtup); CatalogIndexInsert(indstate, newtup); heap_freetuple(newtup); /* * We're done with this old page. */ oldtuple = NULL; olddata = NULL; neednextpage = true; } else { /* * Write a brand new page. * * First, fill any hole */ off = (int) (obj_desc->offset % LOBLKSIZE); if (off > 0) MemSet(workb, 0, off); /* * Insert appropriate portion of new data */ n = LOBLKSIZE - off; n = (n <= (nbytes - nwritten)) ? n : (nbytes - nwritten); memcpy(workb + off, buf + nwritten, n); nwritten += n; obj_desc->offset += n; /* compute valid length of new page */ len = off + n; SET_VARSIZE(&workbuf.hdr, len + VARHDRSZ); /* * Form and insert updated tuple */ memset(values, 0, sizeof(values)); memset(nulls, false, sizeof(nulls)); values[Anum_pg_largeobject_loid - 1] = ObjectIdGetDatum(obj_desc->id); values[Anum_pg_largeobject_pageno - 1] = Int32GetDatum(pageno); values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf); newtup = heap_form_tuple(lo_heap_r->rd_att, values, nulls); simple_heap_insert(lo_heap_r, newtup); CatalogIndexInsert(indstate, newtup); heap_freetuple(newtup); } pageno++; } systable_endscan_ordered(sd); CatalogCloseIndexes(indstate); /* * Advance command counter so that my tuple updates will be seen by later * large-object operations in this transaction. */ CommandCounterIncrement(); return nwritten; }
/* * Fetch configuration cache entry */ TSConfigCacheEntry * lookup_ts_config_cache(Oid cfgId) { TSConfigCacheEntry *entry; if (TSConfigCacheHash == NULL) { /* First time through: initialize the hash table */ init_ts_config_cache(); } /* Check single-entry cache */ if (lastUsedConfig && lastUsedConfig->cfgId == cfgId && lastUsedConfig->isvalid) return lastUsedConfig; /* Try to look up an existing entry */ entry = (TSConfigCacheEntry *) hash_search(TSConfigCacheHash, (void *) &cfgId, HASH_FIND, NULL); if (entry == NULL || !entry->isvalid) { /* * If we didn't find one, we want to make one. But first look up the * object to be sure the OID is real. */ HeapTuple tp; Form_pg_ts_config cfg; Relation maprel; Relation mapidx; ScanKeyData mapskey; SysScanDesc mapscan; HeapTuple maptup; ListDictionary maplists[MAXTOKENTYPE + 1]; Oid mapdicts[MAXDICTSPERTT]; int maxtokentype; int ndicts; int i; tp = SearchSysCache1(TSCONFIGOID, ObjectIdGetDatum(cfgId)); if (!HeapTupleIsValid(tp)) elog(ERROR, "cache lookup failed for text search configuration %u", cfgId); cfg = (Form_pg_ts_config) GETSTRUCT(tp); /* * Sanity checks */ if (!OidIsValid(cfg->cfgparser)) elog(ERROR, "text search configuration %u has no parser", cfgId); if (entry == NULL) { bool found; /* Now make the cache entry */ entry = (TSConfigCacheEntry *) hash_search(TSConfigCacheHash, (void *) &cfgId, HASH_ENTER, &found); Assert(!found); /* it wasn't there a moment ago */ } else { /* Cleanup old contents */ if (entry->map) { for (i = 0; i < entry->lenmap; i++) if (entry->map[i].dictIds) pfree(entry->map[i].dictIds); pfree(entry->map); } } MemSet(entry, 0, sizeof(TSConfigCacheEntry)); entry->cfgId = cfgId; entry->prsId = cfg->cfgparser; ReleaseSysCache(tp); /* * Scan pg_ts_config_map to gather dictionary list for each token type * * Because the index is on (mapcfg, maptokentype, mapseqno), we will * see the entries in maptokentype order, and in mapseqno order for * each token type, even though we didn't explicitly ask for that. */ MemSet(maplists, 0, sizeof(maplists)); maxtokentype = 0; ndicts = 0; ScanKeyInit(&mapskey, Anum_pg_ts_config_map_mapcfg, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(cfgId)); maprel = heap_open(TSConfigMapRelationId, AccessShareLock); mapidx = index_open(TSConfigMapIndexId, AccessShareLock); mapscan = systable_beginscan_ordered(maprel, mapidx, NULL, 1, &mapskey); while ((maptup = systable_getnext_ordered(mapscan, ForwardScanDirection)) != NULL) { Form_pg_ts_config_map cfgmap = (Form_pg_ts_config_map) GETSTRUCT(maptup); int toktype = cfgmap->maptokentype; if (toktype <= 0 || toktype > MAXTOKENTYPE) elog(ERROR, "maptokentype value %d is out of range", toktype); if (toktype < maxtokentype) elog(ERROR, "maptokentype entries are out of order"); if (toktype > maxtokentype) { /* starting a new___ token type, but first save the prior data */ if (ndicts > 0) { maplists[maxtokentype].len = ndicts; maplists[maxtokentype].dictIds = (Oid *) MemoryContextAlloc(CacheMemoryContext, sizeof(Oid) * ndicts); memcpy(maplists[maxtokentype].dictIds, mapdicts, sizeof(Oid) * ndicts); } maxtokentype = toktype; mapdicts[0] = cfgmap->mapdict; ndicts = 1; } else { /* continuing data for current token type */ if (ndicts >= MAXDICTSPERTT) elog(ERROR, "too many pg_ts_config_map entries for one token type"); mapdicts[ndicts++] = cfgmap->mapdict; } } systable_endscan_ordered(mapscan); index_close(mapidx, AccessShareLock); heap_close(maprel, AccessShareLock); if (ndicts > 0) { /* save the last token type's dictionaries */ maplists[maxtokentype].len = ndicts; maplists[maxtokentype].dictIds = (Oid *) MemoryContextAlloc(CacheMemoryContext, sizeof(Oid) * ndicts); memcpy(maplists[maxtokentype].dictIds, mapdicts, sizeof(Oid) * ndicts); /* and save the overall map */ entry->lenmap = maxtokentype + 1; entry->map = (ListDictionary *) MemoryContextAlloc(CacheMemoryContext, sizeof(ListDictionary) * entry->lenmap); memcpy(entry->map, maplists, sizeof(ListDictionary) * entry->lenmap); } entry->isvalid = true; } lastUsedConfig = entry; return entry; }
void inv_truncate(struct lobj *obj_desc, int len) { int32 pageno = (int32) (len / LO_BLK_SIZE); int off; struct scankey skey[2]; struct sys_scan* sd; struct heap_tuple* oldtuple; Form_pg_largeobject olddata; struct { bytea hdr; char data[LO_BLK_SIZE]; /* make struct big enough */ int32 align_it;/* ensure struct is aligned well enough */ } workbuf; char* workb = VLA_DATA(&workbuf.hdr); struct heap_tuple* newtup; datum_t values[Natts_pg_largeobject]; bool nulls[Natts_pg_largeobject]; bool replace[Natts_pg_largeobject]; CatalogIndexState indstate; ASSERT(PTR_VALID(obj_desc)); /* enforce writability because snapshot is probably wrong otherwise */ if ((obj_desc->flags & IFS_WRLOCK) == 0) ereport(ERROR, ( errcode(E_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("large object %u was not opened for writing", obj_desc->id))); /* check existence of the target largeobject */ if (!large_obj_exists(obj_desc->id)) ereport(ERROR, ( errcode(E_UNDEFINED_OBJECT), errmsg("large object %u was already dropped", obj_desc->id))); open_lo_relation(); indstate = cat_open_indexes(lo_heap_r); /* * Set up to find all pages with desired loid and pageno >= target */ scankey_init(&skey[0], Anum_pg_largeobject_loid, BT_EQ_STRAT_NR, F_OIDEQ, OID_TO_D(obj_desc->id)); scankey_init(&skey[1], Anum_pg_largeobject_pageno, BT_GE_STRAT_NR, F_INT4GE, INT32_TO_D(pageno)); sd = systable_beginscan_ordered(lo_heap_r, lo_index_r, obj_desc->snapshot, 2, skey); /* * If possible, get the page the truncation point is in. The truncation * point may be beyond the end of the LO or in a hole. */ olddata = NULL; if ((oldtuple = systable_getnext_ordered(sd, FORWARD_SCANDIR)) != NULL) { if (HT_HAS_NULLS(oldtuple)) /* paranoia */ elog(ERROR, "null field found in pg_largeobject"); olddata = (Form_pg_largeobject) GET_STRUCT(oldtuple); ASSERT(olddata->pageno >= pageno); } /* * If we found the page of the truncation point we need to truncate the * data in it. Otherwise if we're in a hole, we need to create a page to * mark the end of data. */ if (olddata != NULL && olddata->pageno == pageno) { /* First, load old data into workbuf */ bytea* datafield = &(olddata->data); /* see note at top of file */ bool pfreeit = false; int pagelen; if (VLA_EXTENDED(datafield)) { datafield = (bytea *) heap_tuple_untoast_attr((struct vla *) datafield); pfreeit = true; } pagelen = getbytealen(datafield); ASSERT(pagelen <= LO_BLK_SIZE); memcpy(workb, VLA_DATA(datafield), pagelen); if (pfreeit) pfree(datafield); /* * Fill any hole */ off = len % LO_BLK_SIZE; if (off > pagelen) pg_memset(workb + pagelen, 0, off - pagelen); /* compute length of new page */ VLA_SET_SZ_STND(&workbuf.hdr, off + VAR_HDR_SZ); /* * Form and insert updated tuple */ memset(values, 0, sizeof(values)); memset(nulls, false, sizeof(nulls)); memset(replace, false, sizeof(replace)); values[Anum_pg_largeobject_data - 1] = PTR_TO_D(&workbuf); replace[Anum_pg_largeobject_data - 1] = true; newtup = heap_modify_tuple(oldtuple, REL_DESC(lo_heap_r), values, nulls, replace); simple_heap_update(lo_heap_r, &newtup->t_self, newtup); cat_index_insert(indstate, newtup); heap_free_tuple(newtup); } else { /* * If the first page we found was after the truncation point, we're in * a hole that we'll fill, but we need to delete the later page * because the loop below won't visit it again. */ if (olddata != NULL) { ASSERT(olddata->pageno > pageno); simple_heap_delete(lo_heap_r, &oldtuple->t_self); } /* * Write a brand new page. * * Fill the hole up to the truncation point */ off = len % LO_BLK_SIZE; if (off > 0) pg_memset(workb, 0, off); /* compute length of new page */ VLA_SET_SZ_STND(&workbuf.hdr, off + VAR_HDR_SZ); /* * Form and insert new tuple */ memset(values, 0, sizeof(values)); memset(nulls, false, sizeof(nulls)); values[Anum_pg_largeobject_loid - 1] = OID_TO_D(obj_desc->id); values[Anum_pg_largeobject_pageno - 1] = INT32_TO_D(pageno); values[Anum_pg_largeobject_data - 1] = PTR_TO_D(&workbuf); newtup = heap_form_tuple(lo_heap_r->rd_att, values, nulls); simple_heap_insert(lo_heap_r, newtup); cat_index_insert(indstate, newtup); heap_free_tuple(newtup); } /* * Delete any pages after the truncation point. If the initial search * didn't find a page, then of course there's nothing more to do. */ if (olddata != NULL) { while ((oldtuple = systable_getnext_ordered(sd, FORWARD_SCANDIR)) != NULL) { simple_heap_delete(lo_heap_r, &oldtuple->t_self); } } systable_endscan_ordered(sd); cat_close_indexes(indstate); /* * Advance command counter so that tuple updates will be seen by later * large-object operations in this transaction. */ cmd_count_incr(); }
int inv_write(struct lobj *obj_desc, const char *buf, int nbytes) { int nwritten = 0; int n; int off; int len; int32 pageno = (int32) (obj_desc->offset / LO_BLK_SIZE); struct scankey skey[2]; struct sys_scan * sd; struct heap_tuple * oldtuple; Form_pg_largeobject olddata; bool neednextpage; bytea* datafield; bool pfreeit; struct { bytea hdr; char data[LO_BLK_SIZE]; /* make struct big enough */ int32 align_it; /* ensure struct is aligned well enough */ } workbuf; char* workb = VLA_DATA(&workbuf.hdr); struct heap_tuple* newtup; datum_t values[Natts_pg_largeobject]; bool nulls[Natts_pg_largeobject]; bool replace[Natts_pg_largeobject]; CatalogIndexState indstate; ASSERT(PTR_VALID(obj_desc)); ASSERT(buf != NULL); /* enforce writability because snapshot is probably wrong otherwise */ if ((obj_desc->flags & IFS_WRLOCK) == 0) ereport(ERROR, ( errcode(E_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("large object %u was not opened for writing", obj_desc->id))); /* check existence of the target largeobject */ if (!large_obj_exists(obj_desc->id)) ereport(ERROR, ( errcode(E_UNDEFINED_OBJECT), errmsg("large object %u was already dropped", obj_desc->id))); if (nbytes <= 0) return 0; open_lo_relation(); indstate = cat_open_indexes(lo_heap_r); scankey_init(&skey[0], Anum_pg_largeobject_loid, BT_EQ_STRAT_NR, F_OIDEQ, OID_TO_D(obj_desc->id)); scankey_init(&skey[1], Anum_pg_largeobject_pageno, BT_GE_STRAT_NR, F_INT4GE, INT32_TO_D(pageno)); sd = systable_beginscan_ordered(lo_heap_r, lo_index_r, obj_desc->snapshot, 2, skey); oldtuple = NULL; olddata = NULL; neednextpage = true; while (nwritten < nbytes) { /* * If possible, get next pre-existing page of the LO. We expect the * indexscan will deliver these in order --- but there may be holes. */ if (neednextpage) { if ((oldtuple = systable_getnext_ordered(sd, FORWARD_SCANDIR)) != NULL) { if (HT_HAS_NULLS(oldtuple)) /* paranoia */ elog(ERROR, "null field found in pg_largeobject"); olddata = (Form_pg_largeobject) GET_STRUCT(oldtuple); ASSERT(olddata->pageno >= pageno); } neednextpage = false; } /* * If we have a pre-existing page, see if it is the page we want to * write, or a later one. */ if (olddata != NULL && olddata->pageno == pageno) { /* * Update an existing page with fresh data. * * First, load old data into workbuf */ datafield = &(olddata->data); /* see note at top of file */ pfreeit = false; if (VLA_EXTENDED(datafield)) { datafield = (bytea *) heap_tuple_untoast_attr((struct vla *) datafield); pfreeit = true; } len = getbytealen(datafield); ASSERT(len <= LO_BLK_SIZE); memcpy(workb, VLA_DATA(datafield), len); if (pfreeit) pfree(datafield); /* * Fill any hole */ off = (int)(obj_desc->offset % LO_BLK_SIZE); if (off > len) pg_memset(workb + len, 0, off - len); /* * Insert appropriate portion of new data */ n = LO_BLK_SIZE - off; n = (n <= (nbytes - nwritten))? n : (nbytes - nwritten); memcpy(workb + off, buf + nwritten, n); nwritten += n; obj_desc->offset += n; off += n; /* compute valid length of new page */ len = (len >= off) ? len : off; VLA_SET_SZ_STND(&workbuf.hdr, len + VAR_HDR_SZ); /* * Form and insert updated tuple */ memset(values, 0, sizeof(values)); memset(nulls, false, sizeof(nulls)); memset(replace, false, sizeof(replace)); values[Anum_pg_largeobject_data - 1] = PTR_TO_D(&workbuf); replace[Anum_pg_largeobject_data - 1] = true; newtup = heap_modify_tuple(oldtuple, REL_DESC(lo_heap_r), values, nulls, replace); simple_heap_update(lo_heap_r, &newtup->t_self, newtup); cat_index_insert(indstate, newtup); heap_free_tuple(newtup); /* * We're done with this old page. */ oldtuple = NULL; olddata = NULL; neednextpage = true; } else { /* * Write a brand new page. * * First, fill any hole */ off = (int)(obj_desc->offset % LO_BLK_SIZE); if (off > 0) pg_memset(workb, 0, off); /* * Insert appropriate portion of new data */ n = LO_BLK_SIZE - off; n = (n <= (nbytes - nwritten))? n : (nbytes - nwritten); memcpy(workb + off, buf + nwritten, n); nwritten += n; obj_desc->offset += n; /* compute valid length of new page */ len = off + n; VLA_SET_SZ_STND(&workbuf.hdr, len + VAR_HDR_SZ); /* * Form and insert updated tuple */ memset(values, 0, sizeof(values)); memset(nulls, false, sizeof(nulls)); values[Anum_pg_largeobject_loid - 1] = OID_TO_D(obj_desc->id); values[Anum_pg_largeobject_pageno - 1] = INT32_TO_D(pageno); values[Anum_pg_largeobject_data - 1] = PTR_TO_D(&workbuf); newtup = heap_form_tuple(lo_heap_r->rd_att, values, nulls); simple_heap_insert(lo_heap_r, newtup); cat_index_insert(indstate, newtup); heap_free_tuple(newtup); } pageno++; } systable_endscan_ordered(sd); cat_close_indexes(indstate); /* * Advance command counter so that my tuple updates will be seen by * later large-object operations in this transaction. */ cmd_count_incr(); return nwritten; }
int inv_read(struct lobj *obj_desc, char *buf, int nbytes) { int nread = 0; int n; int off; int len; int32 pageno = (int32) (obj_desc->offset / LO_BLK_SIZE); uint32 pageoff; struct scankey skey[2]; struct sys_scan* sd; struct heap_tuple* tuple; ASSERT(PTR_VALID(obj_desc)); ASSERT(buf != NULL); if (nbytes <= 0) return 0; open_lo_relation(); scankey_init(&skey[0], Anum_pg_largeobject_loid, BT_EQ_STRAT_NR, F_OIDEQ, OID_TO_D(obj_desc->id)); scankey_init(&skey[1], Anum_pg_largeobject_pageno, BT_GE_STRAT_NR, F_INT4GE, INT32_TO_D(pageno)); sd = systable_beginscan_ordered(lo_heap_r, lo_index_r, obj_desc->snapshot, 2, skey); while ((tuple = systable_getnext_ordered(sd, FORWARD_SCANDIR)) != NULL) { Form_pg_largeobject data; bytea* datafield; bool pfreeit; if (HT_HAS_NULLS(tuple)) /* paranoia */ elog(ERROR, "null field found in pg_largeobject"); data = (Form_pg_largeobject) GET_STRUCT(tuple); /* * We expect the indexscan will deliver pages in order. However, * there may be missing pages if the LO contains unwritten "holes". We * want missing sections to read out as zeroes. */ pageoff = ((uint32) data->pageno) * LO_BLK_SIZE; if (pageoff > obj_desc->offset) { n = pageoff - obj_desc->offset; n = (n <= (nbytes - nread)) ? n : (nbytes - nread); pg_memset(buf + nread, 0, n); nread += n; obj_desc->offset += n; } if (nread < nbytes) { ASSERT(obj_desc->offset >= pageoff); off = (int) (obj_desc->offset - pageoff); ASSERT(off >= 0 && off < LO_BLK_SIZE); datafield = &(data->data); /* see note at top of file */ pfreeit = false; if (VLA_EXTENDED(datafield)) { datafield = (bytea *) heap_tuple_untoast_attr((struct vla *) datafield); pfreeit = true; } len = getbytealen(datafield); if (len > off) { n = len - off; n = (n <= (nbytes - nread)) ? n : (nbytes - nread); memcpy(buf + nread, VLA_DATA(datafield) + off, n); nread += n; obj_desc->offset += n; } if (pfreeit) pfree(datafield); } if (nread >= nbytes) break; } systable_endscan_ordered(sd); return nread; }
/* * Rebuild the event trigger cache. */ static void BuildEventTriggerCache(void) { HASHCTL ctl; HTAB *cache; MemoryContext oldcontext; Relation rel; Relation irel; SysScanDesc scan; if (EventTriggerCacheContext != NULL) { /* * Free up any memory already allocated in EventTriggerCacheContext. * This can happen either because a previous rebuild failed, or * because an invalidation happened before the rebuild was complete. */ MemoryContextResetAndDeleteChildren(EventTriggerCacheContext); } else { /* * This is our first time attempting to build the cache, so we need * to set up the memory context and register a syscache callback to * capture future invalidation events. */ if (CacheMemoryContext == NULL) CreateCacheMemoryContext(); EventTriggerCacheContext = AllocSetContextCreate(CacheMemoryContext, "EventTriggerCache", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); CacheRegisterSyscacheCallback(EVENTTRIGGEROID, InvalidateEventCacheCallback, (Datum) 0); } /* Switch to correct memory context. */ oldcontext = MemoryContextSwitchTo(EventTriggerCacheContext); /* Prevent the memory context from being nuked while we're rebuilding. */ EventTriggerCacheState = ETCS_REBUILD_STARTED; /* Create new hash table. */ MemSet(&ctl, 0, sizeof(ctl)); ctl.keysize = sizeof(EventTriggerEvent); ctl.entrysize = sizeof(EventTriggerCacheEntry); ctl.hash = tag_hash; ctl.hcxt = EventTriggerCacheContext; cache = hash_create("Event Trigger Cache", 32, &ctl, HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT); /* * Prepare to scan pg_event_trigger in name order. We use an MVCC * snapshot to avoid getting inconsistent results if the table is * being concurrently updated. */ rel = relation_open(EventTriggerRelationId, AccessShareLock); irel = index_open(EventTriggerNameIndexId, AccessShareLock); scan = systable_beginscan_ordered(rel, irel, GetLatestSnapshot(), 0, NULL); /* * Build a cache item for each pg_event_trigger tuple, and append each * one to the appropriate cache entry. */ for (;;) { HeapTuple tup; Form_pg_event_trigger form; char *evtevent; EventTriggerEvent event; EventTriggerCacheItem *item; Datum evttags; bool evttags_isnull; EventTriggerCacheEntry *entry; bool found; /* Get next tuple. */ tup = systable_getnext_ordered(scan, ForwardScanDirection); if (!HeapTupleIsValid(tup)) break; /* Skip trigger if disabled. */ form = (Form_pg_event_trigger) GETSTRUCT(tup); if (form->evtenabled == TRIGGER_DISABLED) continue; /* Decode event name. */ evtevent = NameStr(form->evtevent); if (strcmp(evtevent, "ddl_command_start") == 0) event = EVT_DDLCommandStart; else continue; /* Allocate new cache item. */ item = palloc0(sizeof(EventTriggerCacheItem)); item->fnoid = form->evtfoid; item->enabled = form->evtenabled; /* Decode and sort tags array. */ evttags = heap_getattr(tup, Anum_pg_event_trigger_evttags, RelationGetDescr(rel), &evttags_isnull); if (!evttags_isnull) { item->ntags = DecodeTextArrayToCString(evttags, &item->tag); qsort(item->tag, item->ntags, sizeof(char *), pg_qsort_strcmp); } /* Add to cache entry. */ entry = hash_search(cache, &event, HASH_ENTER, &found); if (found) entry->triggerlist = lappend(entry->triggerlist, item); else entry->triggerlist = list_make1(item); } /* Done with pg_event_trigger scan. */ systable_endscan_ordered(scan); index_close(irel, AccessShareLock); relation_close(rel, AccessShareLock); /* Restore previous memory context. */ MemoryContextSwitchTo(oldcontext); /* Install new cache. */ EventTriggerCache = cache; /* * If the cache has been invalidated since we entered this routine, we * still use and return the cache we just finished constructing, to avoid * infinite loops, but we leave the cache marked stale so that we'll * rebuild it again on next access. Otherwise, we mark the cache valid. */ if (EventTriggerCacheState == ETCS_REBUILD_STARTED) EventTriggerCacheState = ETCS_VALID; }
/* ---------- * toast_fetch_datum - * * Reconstruct an in memory Datum from the chunks saved * in the toast relation * ---------- */ static struct varlena * toast_fetch_datum(struct varlena * attr) { Relation toastrel; Relation toastidx; ScanKeyData toastkey; SysScanDesc toastscan; HeapTuple ttup; TupleDesc toasttupDesc; struct varlena *result; struct varatt_external toast_pointer; int32 ressize; int32 residx, nextidx; int32 numchunks; Pointer chunk; bool isnull; char *chunkdata; int32 chunksize; /* Must copy to access aligned fields */ VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); ressize = toast_pointer.va_extsize; numchunks = ((ressize - 1) / TOAST_MAX_CHUNK_SIZE) + 1; result = (struct varlena *) palloc(ressize + VARHDRSZ); if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) SET_VARSIZE_COMPRESSED(result, ressize + VARHDRSZ); else SET_VARSIZE(result, ressize + VARHDRSZ); /* * Open the toast relation and its index */ toastrel = heap_open(toast_pointer.va_toastrelid, AccessShareLock); toasttupDesc = toastrel->rd_att; toastidx = index_open(toastrel->rd_rel->reltoastidxid, AccessShareLock); /* * Setup a scan key to fetch from the index by va_valueid */ ScanKeyInit(&toastkey, (AttrNumber) 1, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(toast_pointer.va_valueid)); /* * Read the chunks by index * * Note that because the index is actually on (valueid, chunkidx) we will * see the chunks in chunkidx order, even though we didn't explicitly ask * for it. */ nextidx = 0; toastscan = systable_beginscan_ordered(toastrel, toastidx, SnapshotToast, 1, &toastkey); while ((ttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL) { /* * Have a chunk, extract the sequence number and the data */ residx = DatumGetInt32(fastgetattr(ttup, 2, toasttupDesc, &isnull)); Assert(!isnull); chunk = DatumGetPointer(fastgetattr(ttup, 3, toasttupDesc, &isnull)); Assert(!isnull); if (!VARATT_IS_EXTENDED(chunk)) { chunksize = VARSIZE(chunk) - VARHDRSZ; chunkdata = VARDATA(chunk); } else if (VARATT_IS_SHORT(chunk)) { /* could happen due to heap_form_tuple doing its thing */ chunksize = VARSIZE_SHORT(chunk) - VARHDRSZ_SHORT; chunkdata = VARDATA_SHORT(chunk); } else { /* should never happen */ elog(ERROR, "found toasted toast chunk for toast value %u in %s", toast_pointer.va_valueid, RelationGetRelationName(toastrel)); chunksize = 0; /* keep compiler quiet */ chunkdata = NULL; } /* * Some checks on the data we've found */ if (residx != nextidx) elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u in %s", residx, nextidx, toast_pointer.va_valueid, RelationGetRelationName(toastrel)); if (residx < numchunks - 1) { if (chunksize != TOAST_MAX_CHUNK_SIZE) elog(ERROR, "unexpected chunk size %d (expected %d) in chunk %d of %d for toast value %u in %s", chunksize, (int) TOAST_MAX_CHUNK_SIZE, residx, numchunks, toast_pointer.va_valueid, RelationGetRelationName(toastrel)); } else if (residx == numchunks - 1) { if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != ressize) elog(ERROR, "unexpected chunk size %d (expected %d) in final chunk %d for toast value %u in %s", chunksize, (int) (ressize - residx * TOAST_MAX_CHUNK_SIZE), residx, toast_pointer.va_valueid, RelationGetRelationName(toastrel)); } else elog(ERROR, "unexpected chunk number %d (out of range %d..%d) for toast value %u in %s", residx, 0, numchunks - 1, toast_pointer.va_valueid, RelationGetRelationName(toastrel)); /* * Copy the data into proper place in our result */ memcpy(VARDATA(result) + residx * TOAST_MAX_CHUNK_SIZE, chunkdata, chunksize); nextidx++; } /* * Final checks that we successfully fetched the datum */ if (nextidx != numchunks) elog(ERROR, "missing chunk number %d for toast value %u in %s", nextidx, toast_pointer.va_valueid, RelationGetRelationName(toastrel)); /* * End scan and close relations */ systable_endscan_ordered(toastscan); index_close(toastidx, AccessShareLock); heap_close(toastrel, AccessShareLock); return result; }
int inv_read(LargeObjectDesc *obj_desc, char *buf, int nbytes) { int nread = 0; int n; int off; int len; int32 pageno = (int32) (obj_desc->offset / LOBLKSIZE); uint32 pageoff; ScanKeyData skey[2]; SysScanDesc sd; HeapTuple tuple; Assert(PointerIsValid(obj_desc)); Assert(buf != NULL); if (nbytes <= 0) return 0; open_lo_relation(); ScanKeyInit(&skey[0], Anum_pg_largeobject_loid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(obj_desc->id)); ScanKeyInit(&skey[1], Anum_pg_largeobject_pageno, BTGreaterEqualStrategyNumber, F_INT4GE, Int32GetDatum(pageno)); sd = systable_beginscan_ordered(lo_heap_r, lo_index_r, obj_desc->snapshot, 2, skey); while ((tuple = systable_getnext_ordered(sd, ForwardScanDirection)) != NULL) { Form_pg_largeobject data; bytea *datafield; bool pfreeit; if (HeapTupleHasNulls(tuple)) /* paranoia */ elog(ERROR, "null field found in pg_largeobject"); data = (Form_pg_largeobject) GETSTRUCT(tuple); /* * We expect the indexscan will deliver pages in order. However, * there may be missing pages if the LO contains unwritten "holes". We * want missing sections to read out as zeroes. */ pageoff = ((uint32) data->pageno) * LOBLKSIZE; if (pageoff > obj_desc->offset) { n = pageoff - obj_desc->offset; n = (n <= (nbytes - nread)) ? n : (nbytes - nread); MemSet(buf + nread, 0, n); nread += n; obj_desc->offset += n; } if (nread < nbytes) { Assert(obj_desc->offset >= pageoff); off = (int) (obj_desc->offset - pageoff); Assert(off >= 0 && off < LOBLKSIZE); datafield = &(data->data); /* see note at top of file */ pfreeit = false; if (VARATT_IS_EXTENDED(datafield)) { datafield = (bytea *) heap_tuple_untoast_attr((struct varlena *) datafield); pfreeit = true; } len = getbytealen(datafield); if (len > off) { n = len - off; n = (n <= (nbytes - nread)) ? n : (nbytes - nread); memcpy(buf + nread, VARDATA(datafield) + off, n); nread += n; obj_desc->offset += n; } if (pfreeit) pfree(datafield); } if (nread >= nbytes) break; } systable_endscan_ordered(sd); return nread; }
/* ---------- * toast_fetch_datum_slice - * * Reconstruct a segment of a Datum from the chunks saved * in the toast relation * ---------- */ static struct varlena * toast_fetch_datum_slice(struct varlena * attr, int32 sliceoffset, int32 length) { Relation toastrel; Relation toastidx; ScanKeyData toastkey[3]; int nscankeys; SysScanDesc toastscan; HeapTuple ttup; TupleDesc toasttupDesc; struct varlena *result; struct varatt_external toast_pointer; int32 attrsize; int32 residx; int32 nextidx; int numchunks; int startchunk; int endchunk; int32 startoffset; int32 endoffset; int totalchunks; Pointer chunk; bool isnull; char *chunkdata; int32 chunksize; int32 chcpystrt; int32 chcpyend; Assert(VARATT_IS_EXTERNAL(attr)); /* Must copy to access aligned fields */ VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); /* * It's nonsense to fetch slices of a compressed datum -- this isn't lo_* * we can't return a compressed datum which is meaningful to toast later */ Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)); attrsize = toast_pointer.va_extsize; totalchunks = ((attrsize - 1) / TOAST_MAX_CHUNK_SIZE) + 1; if (sliceoffset >= attrsize) { sliceoffset = 0; length = 0; } if (((sliceoffset + length) > attrsize) || length < 0) length = attrsize - sliceoffset; result = (struct varlena *) palloc(length + VARHDRSZ); if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) SET_VARSIZE_COMPRESSED(result, length + VARHDRSZ); else SET_VARSIZE(result, length + VARHDRSZ); if (length == 0) return result; /* Can save a lot of work at this point! */ startchunk = sliceoffset / TOAST_MAX_CHUNK_SIZE; endchunk = (sliceoffset + length - 1) / TOAST_MAX_CHUNK_SIZE; numchunks = (endchunk - startchunk) + 1; startoffset = sliceoffset % TOAST_MAX_CHUNK_SIZE; endoffset = (sliceoffset + length - 1) % TOAST_MAX_CHUNK_SIZE; /* * Open the toast relation and its index */ toastrel = heap_open(toast_pointer.va_toastrelid, AccessShareLock); toasttupDesc = toastrel->rd_att; toastidx = index_open(toastrel->rd_rel->reltoastidxid, AccessShareLock); /* * Setup a scan key to fetch from the index. This is either two keys or * three depending on the number of chunks. */ ScanKeyInit(&toastkey[0], (AttrNumber) 1, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(toast_pointer.va_valueid)); /* * Use equality condition for one chunk, a range condition otherwise: */ if (numchunks == 1) { ScanKeyInit(&toastkey[1], (AttrNumber) 2, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(startchunk)); nscankeys = 2; } else { ScanKeyInit(&toastkey[1], (AttrNumber) 2, BTGreaterEqualStrategyNumber, F_INT4GE, Int32GetDatum(startchunk)); ScanKeyInit(&toastkey[2], (AttrNumber) 2, BTLessEqualStrategyNumber, F_INT4LE, Int32GetDatum(endchunk)); nscankeys = 3; } /* * Read the chunks by index * * The index is on (valueid, chunkidx) so they will come in order */ nextidx = startchunk; toastscan = systable_beginscan_ordered(toastrel, toastidx, SnapshotToast, nscankeys, toastkey); while ((ttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL) { /* * Have a chunk, extract the sequence number and the data */ residx = DatumGetInt32(fastgetattr(ttup, 2, toasttupDesc, &isnull)); Assert(!isnull); chunk = DatumGetPointer(fastgetattr(ttup, 3, toasttupDesc, &isnull)); Assert(!isnull); if (!VARATT_IS_EXTENDED(chunk)) { chunksize = VARSIZE(chunk) - VARHDRSZ; chunkdata = VARDATA(chunk); } else if (VARATT_IS_SHORT(chunk)) { /* could happen due to heap_form_tuple doing its thing */ chunksize = VARSIZE_SHORT(chunk) - VARHDRSZ_SHORT; chunkdata = VARDATA_SHORT(chunk); } else { /* should never happen */ elog(ERROR, "found toasted toast chunk for toast value %u in %s", toast_pointer.va_valueid, RelationGetRelationName(toastrel)); chunksize = 0; /* keep compiler quiet */ chunkdata = NULL; } /* * Some checks on the data we've found */ if ((residx != nextidx) || (residx > endchunk) || (residx < startchunk)) elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u in %s", residx, nextidx, toast_pointer.va_valueid, RelationGetRelationName(toastrel)); if (residx < totalchunks - 1) { if (chunksize != TOAST_MAX_CHUNK_SIZE) elog(ERROR, "unexpected chunk size %d (expected %d) in chunk %d of %d for toast value %u in %s when fetching slice", chunksize, (int) TOAST_MAX_CHUNK_SIZE, residx, totalchunks, toast_pointer.va_valueid, RelationGetRelationName(toastrel)); } else if (residx == totalchunks - 1) { if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != attrsize) elog(ERROR, "unexpected chunk size %d (expected %d) in final chunk %d for toast value %u in %s when fetching slice", chunksize, (int) (attrsize - residx * TOAST_MAX_CHUNK_SIZE), residx, toast_pointer.va_valueid, RelationGetRelationName(toastrel)); } else elog(ERROR, "unexpected chunk number %d (out of range %d..%d) for toast value %u in %s", residx, 0, totalchunks - 1, toast_pointer.va_valueid, RelationGetRelationName(toastrel)); /* * Copy the data into proper place in our result */ chcpystrt = 0; chcpyend = chunksize - 1; if (residx == startchunk) chcpystrt = startoffset; if (residx == endchunk) chcpyend = endoffset; memcpy(VARDATA(result) + (residx * TOAST_MAX_CHUNK_SIZE - sliceoffset) + chcpystrt, chunkdata + chcpystrt, (chcpyend - chcpystrt) + 1); nextidx++; } /* * Final checks that we successfully fetched the datum */ if (nextidx != (endchunk + 1)) elog(ERROR, "missing chunk number %d for toast value %u in %s", nextidx, toast_pointer.va_valueid, RelationGetRelationName(toastrel)); /* * End scan and close relations */ systable_endscan_ordered(toastscan); index_close(toastidx, AccessShareLock); heap_close(toastrel, AccessShareLock); return result; }
/* * Rebuild the event trigger cache. */ static void BuildEventTriggerCache(void) { HASHCTL ctl; HTAB *cache; MemoryContext oldcontext; Relation rel; Relation irel; SysScanDesc scan; if (EventTriggerCacheContext != NULL) { /* * The cache has been previously built, and subsequently invalidated, * and now we're trying to rebuild it. Normally, there won't be * anything in the context at this point, because the invalidation * will have already reset it. But if the previous attempt to rebuild * the cache failed, then there might be some junk lying around * that we want to reclaim. */ MemoryContextReset(EventTriggerCacheContext); } else { /* * This is our first time attempting to build the cache, so we need * to set up the memory context and register a syscache callback to * capture future invalidation events. */ if (CacheMemoryContext == NULL) CreateCacheMemoryContext(); EventTriggerCacheContext = AllocSetContextCreate(CacheMemoryContext, "EventTriggerCache", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); CacheRegisterSyscacheCallback(EVENTTRIGGEROID, InvalidateEventCacheCallback, (Datum) 0); } /* Switch to correct memory context. */ oldcontext = MemoryContextSwitchTo(EventTriggerCacheContext); /* * Create a new hash table, but don't assign it to the global variable * until it accurately represents the state of the catalogs, so that * if we fail midway through this we won't end up with incorrect cache * contents. */ MemSet(&ctl, 0, sizeof(ctl)); ctl.keysize = sizeof(EventTriggerEvent); ctl.entrysize = sizeof(EventTriggerCacheEntry); ctl.hash = tag_hash; cache = hash_create("Event Trigger Cache", 32, &ctl, HASH_ELEM | HASH_FUNCTION); /* * Prepare to scan pg_event_trigger in name order. We use an MVCC * snapshot to avoid getting inconsistent results if the table is * being concurrently updated. */ rel = relation_open(EventTriggerRelationId, AccessShareLock); irel = index_open(EventTriggerNameIndexId, AccessShareLock); scan = systable_beginscan_ordered(rel, irel, GetLatestSnapshot(), 0, NULL); /* * Build a cache item for each pg_event_trigger tuple, and append each * one to the appropriate cache entry. */ for (;;) { HeapTuple tup; Form_pg_event_trigger form; char *evtevent; EventTriggerEvent event; EventTriggerCacheItem *item; Datum evttags; bool evttags_isnull; EventTriggerCacheEntry *entry; bool found; /* Get next tuple. */ tup = systable_getnext_ordered(scan, ForwardScanDirection); if (!HeapTupleIsValid(tup)) break; /* Skip trigger if disabled. */ form = (Form_pg_event_trigger) GETSTRUCT(tup); if (form->evtenabled == TRIGGER_DISABLED) continue; /* Decode event name. */ evtevent = NameStr(form->evtevent); if (strcmp(evtevent, "ddl_command_start") == 0) event = EVT_DDLCommandStart; else continue; /* Allocate new cache item. */ item = palloc0(sizeof(EventTriggerCacheItem)); item->fnoid = form->evtfoid; item->enabled = form->evtenabled; /* Decode and sort tags array. */ evttags = heap_getattr(tup, Anum_pg_event_trigger_evttags, RelationGetDescr(rel), &evttags_isnull); if (!evttags_isnull) { item->ntags = DecodeTextArrayToCString(evttags, &item->tag); qsort(item->tag, item->ntags, sizeof(char *), pg_qsort_strcmp); } /* Add to cache entry. */ entry = hash_search(cache, &event, HASH_ENTER, &found); if (found) entry->triggerlist = lappend(entry->triggerlist, item); else entry->triggerlist = list_make1(item); } /* Done with pg_event_trigger scan. */ systable_endscan_ordered(scan); index_close(irel, AccessShareLock); relation_close(rel, AccessShareLock); /* Restore previous memory context. */ MemoryContextSwitchTo(oldcontext); /* Cache is now valid. */ EventTriggerCache = cache; }