/* * Extract data field from a pg_largeobject tuple, detoasting if needed * and verifying that the length is sane. Returns data pointer (a bytea *), * data length, and an indication of whether to pfree the data pointer. */ static void getdatafield(Form_pg_largeobject tuple, bytea **pdatafield, int *plen, bool *pfreeit) { bytea *datafield; int len; bool freeit; datafield = &(tuple->data); /* see note at top of file */ freeit = false; if (VARATT_IS_EXTENDED(datafield)) { datafield = (bytea *) heap_tuple_untoast_attr((struct varlena *) datafield); freeit = true; } len = VARSIZE(datafield) - VARHDRSZ; if (len < 0 || len > LOBLKSIZE) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), errmsg("pg_largeobject entry for OID %u, page %d has invalid data field size %d", tuple->loid, tuple->pageno, len))); *pdatafield = datafield; *plen = len; *pfreeit = freeit; }
/* * Determine size of a large object * * NOTE: LOs can contain gaps, just like Unix files. We actually return * the offset of the last byte + 1. */ static uint32 inv_getsize(LargeObjectDesc *obj_desc) { bool found = false; uint32 lastbyte = 0; ScanKeyData skey[1]; IndexScanDesc sd; HeapTuple tuple; Assert(PointerIsValid(obj_desc)); open_lo_relation(); ScanKeyInit(&skey[0], Anum_pg_largeobject_loid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(obj_desc->id)); sd = index_beginscan(lo_heap_r, lo_index_r, obj_desc->snapshot, 1, skey); /* * Because the pg_largeobject index is on both loid and pageno, but we * constrain only loid, a backwards scan should visit all pages of the * large object in reverse pageno order. So, it's sufficient to examine * the first valid tuple (== last valid page). */ while ((tuple = index_getnext(sd, BackwardScanDirection)) != NULL) { Form_pg_largeobject data; bytea *datafield; bool pfreeit; found = true; if (HeapTupleHasNulls(tuple)) /* paranoia */ elog(ERROR, "null field found in pg_largeobject"); data = (Form_pg_largeobject) GETSTRUCT(tuple); datafield = &(data->data); /* see note at top of file */ pfreeit = false; if (VARATT_IS_EXTENDED(datafield)) { datafield = (bytea *) heap_tuple_untoast_attr((struct varlena *) datafield); pfreeit = true; } lastbyte = data->pageno * LOBLKSIZE + getbytealen(datafield); if (pfreeit) pfree(datafield); break; } index_endscan(sd); if (!found) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("large object %u does not exist", obj_desc->id))); return lastbyte; }
static int32 getbytealen(bytea *data) { Assert(!VARATT_IS_EXTENDED(data)); if (VARSIZE(data) < VARHDRSZ) elog(ERROR, "invalid VARSIZE(data)"); return (VARSIZE(data) - VARHDRSZ); }
/** * If this function is changed then update varattrib_untoast_len as well */ void varattrib_untoast_ptr_len(Datum d, char **datastart, int *len, void **tofree) { if (DatumGetPointer(d) == NULL) { ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg(" Unable to detoast datum "), errprintstack(true))); } struct varlena *va = (struct varlena *) DatumGetPointer(d); varattrib *attr = (varattrib *) va; *len = -1; *tofree = NULL; if(VARATT_IS_EXTENDED(attr)) { if(VARATT_IS_EXTERNAL(attr)) { attr = (varattrib *)toast_fetch_datum((struct varlena *)attr); /* toast_fetch_datum will palloc, so set it up for free */ *tofree = attr; } if(VARATT_IS_COMPRESSED(attr)) { PGLZ_Header *tmp = (PGLZ_Header *) attr; attr = (varattrib *) palloc(PGLZ_RAW_SIZE(tmp) + VARHDRSZ); SET_VARSIZE(attr, PGLZ_RAW_SIZE(tmp) + VARHDRSZ); pglz_decompress(tmp, VARDATA(attr)); /* If tofree is set, that is, we get it from toast_fetch_datum. * We need to free it here */ if(*tofree) pfree(*tofree); *tofree = attr; } else if(VARATT_IS_SHORT(attr)) { /* Warning! Return unaligned pointer! */ *len = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT; *datastart = VARDATA_SHORT(attr); attr = NULL; } } if(*len == -1) { *datastart = VARDATA(attr); *len = VARSIZE(attr) - VARHDRSZ; } Assert(*len >= 0); }
/* * Determine size of a large object * * NOTE: LOs can contain gaps, just like Unix files. We actually return * the offset of the last byte + 1. */ static uint64 inv_getsize(LargeObjectDesc *obj_desc) { uint64 lastbyte = 0; ScanKeyData skey[1]; SysScanDesc sd; HeapTuple tuple; Assert(PointerIsValid(obj_desc)); open_lo_relation(); ScanKeyInit(&skey[0], Anum_pg_largeobject_loid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(obj_desc->id)); sd = systable_beginscan_ordered(lo_heap_r, lo_index_r, obj_desc->snapshot, 1, skey); /* * Because the pg_largeobject index is on both loid and pageno, but we * constrain only loid, a backwards scan should visit all pages of the * large object in reverse pageno order. So, it's sufficient to examine * the first valid tuple (== last valid page). */ tuple = systable_getnext_ordered(sd, BackwardScanDirection); if (HeapTupleIsValid(tuple)) { Form_pg_largeobject data; bytea *datafield; bool pfreeit; if (HeapTupleHasNulls(tuple)) /* paranoia */ elog(ERROR, "null field found in pg_largeobject"); data = (Form_pg_largeobject) GETSTRUCT(tuple); datafield = &(data->data); /* see note at top of file */ pfreeit = false; if (VARATT_IS_EXTENDED(datafield)) { datafield = (bytea *) heap_tuple_untoast_attr((struct varlena *) datafield); pfreeit = true; } lastbyte = (uint64) data->pageno * LOBLKSIZE + getbytealen(datafield); if (pfreeit) pfree(datafield); } systable_endscan_ordered(sd); return lastbyte; }
/** * Peak into a #GSERIALIZED datum to find the bounding box. If the * box is there, copy it out and return it. If not, calculate the box from the * full object and return the box based on that. If no box is available, * return #LW_FAILURE, otherwise #LW_SUCCESS. */ static int gserialized_datum_get_box2df_p(Datum gsdatum, BOX2DF *box2df) { GSERIALIZED *gpart; uint8_t flags; int result = 0; /* ** The most info we need is the 8 bytes of serialized header plus the ** of floats necessary to hold the bounding box. */ if (VARATT_IS_EXTENDED(gsdatum)) { gpart = (GSERIALIZED*)PG_DETOAST_DATUM_SLICE(gsdatum, 0, 8 + sizeof(BOX2DF)); } else { gpart = (GSERIALIZED*)PG_DETOAST_DATUM(gsdatum); } flags = gpart->flags; /* Do we even have a serialized bounding box? */ if ( FLAGS_GET_BBOX(flags) ) { /* Yes! Copy it out into the box! */ memcpy(box2df, gpart->data, sizeof(BOX2DF)); result = 1; } else { /* No, we need to calculate it from the full object. */ GBOX gbox; GSERIALIZED *g = (GSERIALIZED*)PG_DETOAST_DATUM(gsdatum); /* LWGEOM *lwgeom = lwgeom_from_gserialized(g); if ( lwgeom_calculate_gbox(lwgeom, &gbox) == LW_FAILURE ) { POSTGIS_DEBUG(4, "could not calculate bbox, returning failure"); lwgeom_free(lwgeom); return LW_FAILURE; } lwgeom_free(lwgeom); */ // result = box2df_from_gbox_p(&gbox, box2df); } printf("BOX2DF(%f %f, %f %f)\n", box2df->xmin, box2df->ymin, box2df->xmax, box2df->ymax); return result; }
/** * If this function is changed then update varattrib_untoast_ptr_len as well */ int varattrib_untoast_len(Datum d) { if (DatumGetPointer(d) == NULL) { ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg(" Unable to detoast datum "), errprintstack(true))); } struct varlena *va = (struct varlena *) DatumGetPointer(d); varattrib *attr = (varattrib *) va; int len = -1; void *toFree = NULL; if(VARATT_IS_EXTENDED(attr)) { if(VARATT_IS_EXTERNAL(attr)) { attr = (varattrib *)toast_fetch_datum((struct varlena *)attr); /* toast_fetch_datum will palloc, so set it up for free */ toFree = attr; } if(VARATT_IS_COMPRESSED(attr)) { PGLZ_Header *tmp = (PGLZ_Header *) attr; len = PGLZ_RAW_SIZE(tmp); } else if(VARATT_IS_SHORT(attr)) { len = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT; } } if(len == -1) { len = VARSIZE(attr) - VARHDRSZ; } if ( toFree) pfree(toFree); Assert(len >= 0); return len; }
/* Decode char(N), varchar(N), text, json or xml types and pass data out. */ static int DecodeBytesBinary(const char *buffer, unsigned int buff_size, unsigned int *processed_size, char *out_data, unsigned int *out_length) { if (!VARATT_IS_EXTENDED(buffer)) { *out_length = VARSIZE(buffer) - VARHDRSZ; *processed_size = VARSIZE(buffer); memcpy(out_data, VARDATA(buffer), *out_length); } else { printf("Error: unable read TOAST value.\n"); } return 0; }
/* * This function accepts an array, and returns one item for each entry in the * array */ Datum int_enum(PG_FUNCTION_ARGS) { PGARRAY *p = (PGARRAY *) PG_GETARG_POINTER(0); CTX *pc; ReturnSetInfo *rsi = (ReturnSetInfo *) fcinfo->resultinfo; if (!rsi || !IsA(rsi, ReturnSetInfo)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("int_enum called in context that cannot accept a set"))); if (!p) { elog(WARNING, "no data sent"); PG_RETURN_NULL(); } if (!fcinfo->flinfo->fn_extra) { /* Allocate working state */ MemoryContext oldcontext; oldcontext = MemoryContextSwitchTo(fcinfo->flinfo->fn_mcxt); pc = (CTX *) palloc(sizeof(CTX)); /* Don't copy attribute if you don't need to */ if (VARATT_IS_EXTENDED(p)) { /* Toasted!!! */ pc->p = (PGARRAY *) PG_DETOAST_DATUM_COPY(p); pc->flags = TOASTED; } else { /* Untoasted */ pc->p = p; pc->flags = 0; } /* Now that we have a detoasted array, verify dimensions */ /* We'll treat a zero-D array as empty, below */ if (pc->p->a.ndim > 1) elog(ERROR, "int_enum only accepts 1-D arrays"); pc->num = 0; fcinfo->flinfo->fn_extra = (void *) pc; MemoryContextSwitchTo(oldcontext); } else /* use existing working state */ pc = (CTX *) fcinfo->flinfo->fn_extra; /* Are we done yet? */ if (pc->p->a.ndim < 1 || pc->num >= pc->p->items) { /* We are done */ if (pc->flags & TOASTED) pfree(pc->p); pfree(pc); fcinfo->flinfo->fn_extra = NULL; rsi->isDone = ExprEndResult; } else { /* nope, return the next value */ int val = pc->p->array[pc->num++]; rsi->isDone = ExprMultipleResult; PG_RETURN_INT32(val); } PG_RETURN_NULL(); }
char *WTBtree_convertGeoHash(GISTENTRY *entry, int size) { char *minPnt, *maxPnt, *cvtGeoHash; BOX2DF *box2df; box2df = (BOX2DF *) palloc(sizeof(BOX2DF)); GSERIALIZED *gpart; uint8_t flags; if (VARATT_IS_EXTENDED(entry->key)) { printf("true\n"); gpart = (GSERIALIZED*)PG_DETOAST_DATUM_SLICE(entry->key, 0, 8 + sizeof(BOX2DF)); printf("gpart->size : %d\n", gpart->size); printf("gpart->flags : %d\n", gpart->flags); flags = gpart->flags; } else { printf("false\n"); // gpart = (GSERIALIZED*)PG_DETOAST_DATUM(gsdatum); } if ( FLAGS_GET_BBOX(flags) ) { printf("IsFlags : %d\n", flags); memcpy(box2df, gpart->data, sizeof(BOX2DF)); //result = LW_SUCCESS; printf("----------------: %f\n", box2df->xmin); printf("----------------: %f\n", box2df->ymin); printf("----------------: %f\n", box2df->xmax); printf("----------------: %f\n", box2df->ymax); minPnt = (char*) palloc(size); maxPnt = (char*) palloc(size); cvtGeoHash = (char*) palloc(size); memcpy(minPnt, geohash_encode((double) box2df->ymin, (double) box2df->xmin, size), size); memcpy(maxPnt, geohash_encode((double) box2df->ymax, (double) box2df->xmax, size), size); cvtGeoHash = convert_GeoHash_from_box2d(minPnt, maxPnt, size); printf("-----------geohash_encode : %s\n", cvtGeoHash); } else { printf("NotFlags : %d\n", flags); /* GBOX gbox; GSERIALIZED *g = (GSERIALIZED*)PG_DETOAST_DATUM(entry->key); LWGEOM *lwgeom = lwgeom_from_gserialized(g); if ( lwgeom_calculate_gbox(lwgeom, &gbox) == LW_FAILURE ) { POSTGIS_DEBUG(4, "could not calculate bbox, returning failure"); lwgeom_free(lwgeom); return LW_FAILURE; } lwgeom_free(lwgeom); result = box2df_from_gbox_p(&gbox, box2df); */ } return cvtGeoHash; }
/* ---------- * toast_fetch_datum_slice - * * Reconstruct a segment of a Datum from the chunks saved * in the toast relation * ---------- */ static struct varlena * toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 length) { Relation toastrel; Relation toastidx; ScanKeyData toastkey[3]; int nscankeys; IndexScanDesc toastscan; HeapTuple ttup; TupleDesc toasttupDesc; varattrib *result; int32 attrsize; int32 residx; int32 nextidx; int numchunks; int startchunk; int endchunk; int32 startoffset; int32 endoffset; int totalchunks; Pointer chunk; bool isnull; int32 chunksize; int32 chcpystrt; int32 chcpyend; attrsize = ((varattrib *)attr)->va_external.va_extsize; totalchunks = ((attrsize - 1) / TOAST_MAX_CHUNK_SIZE) + 1; if (sliceoffset >= attrsize) { sliceoffset = 0; length = 0; } if (((sliceoffset + length) > attrsize) || length < 0) length = attrsize - sliceoffset; result = (varattrib *) palloc(length + VARHDRSZ); SET_VARSIZE(result, length + VARHDRSZ); if (VARATT_EXTERNAL_IS_COMPRESSED(attr)) VARATT_SET_COMPRESSED(result); if (length == 0) return (struct varlena *)result; /* Can save a lot of work at this point! */ startchunk = sliceoffset / TOAST_MAX_CHUNK_SIZE; endchunk = (sliceoffset + length - 1) / TOAST_MAX_CHUNK_SIZE; numchunks = (endchunk - startchunk) + 1; startoffset = sliceoffset % TOAST_MAX_CHUNK_SIZE; endoffset = (sliceoffset + length - 1) % TOAST_MAX_CHUNK_SIZE; /* * Open the toast relation and its index */ toastrel = heap_open(((varattrib *)attr)->va_external.va_toastrelid, AccessShareLock); toasttupDesc = toastrel->rd_att; toastidx = index_open(toastrel->rd_rel->reltoastidxid, AccessShareLock); /* * Setup a scan key to fetch from the index. This is either two keys or * three depending on the number of chunks. */ ScanKeyInit(&toastkey[0], (AttrNumber) 1, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(((varattrib *)attr)->va_external.va_valueid)); /* * Use equality condition for one chunk, a range condition otherwise: */ if (numchunks == 1) { ScanKeyInit(&toastkey[1], (AttrNumber) 2, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(startchunk)); nscankeys = 2; } else { ScanKeyInit(&toastkey[1], (AttrNumber) 2, BTGreaterEqualStrategyNumber, F_INT4GE, Int32GetDatum(startchunk)); ScanKeyInit(&toastkey[2], (AttrNumber) 2, BTLessEqualStrategyNumber, F_INT4LE, Int32GetDatum(endchunk)); nscankeys = 3; } /* * Read the chunks by index * * The index is on (valueid, chunkidx) so they will come in order */ nextidx = startchunk; toastscan = index_beginscan(toastrel, toastidx, SnapshotToast, nscankeys, toastkey); while ((ttup = index_getnext(toastscan, ForwardScanDirection)) != NULL) { /* * Have a chunk, extract the sequence number and the data */ residx = DatumGetInt32(fastgetattr(ttup, 2, toasttupDesc, &isnull)); Assert(!isnull); chunk = DatumGetPointer(fastgetattr(ttup, 3, toasttupDesc, &isnull)); Assert(!isnull); if (VARATT_IS_SHORT((varattrib *)chunk)) chunksize = VARSIZE_SHORT((varattrib *)chunk) - VARHDRSZ_SHORT; else if (!VARATT_IS_EXTENDED((varattrib *)chunk)) chunksize = VARSIZE((varattrib *)chunk) - VARHDRSZ; else { elog(ERROR, "found toasted toast chunk?"); chunksize = 0; /* shut compiler up */ } /* * Some checks on the data we've found */ if ((residx != nextidx) || (residx > endchunk) || (residx < startchunk)) elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u", residx, nextidx, ((varattrib *)attr)->va_external.va_valueid); if (residx < totalchunks - 1) { if (chunksize != TOAST_MAX_CHUNK_SIZE) elog(ERROR, "unexpected chunk size %d in chunk %d for toast value %u of %d when fetching slice (expected %d)", chunksize, residx, ((varattrib *)attr)->va_external.va_valueid, totalchunks-1, (int)TOAST_MAX_CHUNK_SIZE); } else if (residx == totalchunks-1) { if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != attrsize) elog(ERROR, "unexpected chunk size %d in chunk %d for final toast value %u when fetching slice (expected %d)", chunksize, residx, ((varattrib *)attr)->va_external.va_valueid, attrsize - residx * (int)TOAST_MAX_CHUNK_SIZE); } else { elog(ERROR, "unexpected chunk"); } /* * Copy the data into proper place in our result */ chcpystrt = 0; chcpyend = chunksize - 1; if (residx == startchunk) chcpystrt = startoffset; if (residx == endchunk) chcpyend = endoffset; memcpy(((char *) VARDATA(result)) + (residx * TOAST_MAX_CHUNK_SIZE - sliceoffset) + chcpystrt, VARDATA((varattrib *)chunk) + chcpystrt, (chcpyend - chcpystrt) + 1); nextidx++; } /* * Final checks that we successfully fetched the datum */ if (nextidx != (endchunk + 1)) elog(ERROR, "missing chunk number %d for toast value %u", nextidx, ((varattrib *)attr)->va_external.va_valueid); /* * End scan and close relations */ index_endscan(toastscan); index_close(toastidx, AccessShareLock); heap_close(toastrel, AccessShareLock); return (struct varlena *)result; }
/* ---------- * toast_fetch_datum - * * Reconstruct an in memory Datum from the chunks saved * in the toast relation * ---------- */ static struct varlena * toast_fetch_datum(struct varlena *attr) { Relation toastrel; Relation toastidx; ScanKeyData toastkey; IndexScanDesc toastscan; HeapTuple ttup; TupleDesc toasttupDesc; varattrib *result; int32 ressize; int32 residx, nextidx; int32 numchunks; Pointer chunk; bool isnull; int32 chunksize; void *chunkdata; ressize = ((varattrib *)attr)->va_external.va_extsize; numchunks = ((ressize - 1) / TOAST_MAX_CHUNK_SIZE) + 1; result = (varattrib *) palloc(ressize + VARHDRSZ); SET_VARSIZE(result, ressize + VARHDRSZ); if (VARATT_EXTERNAL_IS_COMPRESSED(attr)) VARATT_SET_COMPRESSED(result); /* * Open the toast relation and its index */ toastrel = heap_open(((varattrib *)attr)->va_external.va_toastrelid, AccessShareLock); toasttupDesc = toastrel->rd_att; toastidx = index_open(toastrel->rd_rel->reltoastidxid, AccessShareLock); /* * Setup a scan key to fetch from the index by va_valueid */ ScanKeyInit(&toastkey, (AttrNumber) 1, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(((varattrib *)attr)->va_external.va_valueid)); /* * Read the chunks by index * * Note that because the index is actually on (valueid, chunkidx) we will * see the chunks in chunkidx order, even though we didn't explicitly ask * for it. */ nextidx = 0; toastscan = index_beginscan(toastrel, toastidx, SnapshotToast, 1, &toastkey); while ((ttup = index_getnext(toastscan, ForwardScanDirection)) != NULL) { /* * Have a chunk, extract the sequence number and the data */ residx = DatumGetInt32(fastgetattr(ttup, 2, toasttupDesc, &isnull)); Assert(!isnull); chunk = DatumGetPointer(fastgetattr(ttup, 3, toasttupDesc, &isnull)); Assert(!isnull); if (VARATT_IS_SHORT(chunk)) { chunksize = VARSIZE_SHORT(chunk) - VARHDRSZ_SHORT; chunkdata = VARDATA_SHORT(chunk); } else if (!VARATT_IS_EXTENDED(chunk)) { chunksize = VARSIZE(chunk) - VARHDRSZ; chunkdata = VARDATA(chunk); } else { elog(ERROR, "found toasted toast chunk?"); chunksize = 0; /* shut compiler up */ chunkdata = NULL; } /* * Some checks on the data we've found */ if (residx != nextidx) elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u", residx, nextidx, ((varattrib *)attr)->va_external.va_valueid); if (residx < numchunks - 1) { if (chunksize != TOAST_MAX_CHUNK_SIZE) elog(ERROR, "unexpected chunk size %d in chunk %d of %d for toast value %u (expected %d)", chunksize, residx, ((varattrib *)attr)->va_external.va_valueid, numchunks-1, (int)TOAST_MAX_CHUNK_SIZE); } else if (residx == numchunks-1) { if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != ressize) elog(ERROR, "unexpected chunk size %d in final chunk %d for toast value %u (expected %d)", chunksize, residx, ((varattrib *)attr)->va_external.va_valueid, ressize - residx*(int)TOAST_MAX_CHUNK_SIZE); } else elog(ERROR, "unexpected chunk number %d for toast value %u (expected in %d..%d)", residx, ((varattrib *)attr)->va_external.va_valueid, 0, numchunks-1); /* * Copy the data into proper place in our result */ memcpy(((char *) VARDATA(result)) + residx * TOAST_MAX_CHUNK_SIZE, chunkdata, chunksize); nextidx++; } /* * Final checks that we successfully fetched the datum */ if (nextidx != numchunks) elog(ERROR, "missing chunk number %d for toast value %u", nextidx, ((varattrib *)attr)->va_external.va_valueid); /* * End scan and close relations */ index_endscan(toastscan); index_close(toastidx, AccessShareLock); heap_close(toastrel, AccessShareLock); return (struct varlena *)result; }
void inv_truncate(LargeObjectDesc *obj_desc, int len) { int32 pageno = (int32) (len / LOBLKSIZE); int off; ScanKeyData skey[2]; SysScanDesc sd; HeapTuple oldtuple; Form_pg_largeobject olddata; struct { bytea hdr; char data[LOBLKSIZE]; /* make struct big enough */ int32 align_it; /* ensure struct is aligned well enough */ } workbuf; char *workb = VARDATA(&workbuf.hdr); HeapTuple newtup; Datum values[Natts_pg_largeobject]; bool nulls[Natts_pg_largeobject]; bool replace[Natts_pg_largeobject]; CatalogIndexState indstate; Assert(PointerIsValid(obj_desc)); /* enforce writability because snapshot is probably wrong otherwise */ if ((obj_desc->flags & IFS_WRLOCK) == 0) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("large object %u was not opened for writing", obj_desc->id))); /* check existence of the target largeobject */ if (!LargeObjectExists(obj_desc->id)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("large object %u was already dropped", obj_desc->id))); open_lo_relation(); indstate = CatalogOpenIndexes(lo_heap_r); /* * Set up to find all pages with desired loid and pageno >= target */ ScanKeyInit(&skey[0], Anum_pg_largeobject_loid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(obj_desc->id)); ScanKeyInit(&skey[1], Anum_pg_largeobject_pageno, BTGreaterEqualStrategyNumber, F_INT4GE, Int32GetDatum(pageno)); sd = systable_beginscan_ordered(lo_heap_r, lo_index_r, obj_desc->snapshot, 2, skey); /* * If possible, get the page the truncation point is in. The truncation * point may be beyond the end of the LO or in a hole. */ olddata = NULL; if ((oldtuple = systable_getnext_ordered(sd, ForwardScanDirection)) != NULL) { if (HeapTupleHasNulls(oldtuple)) /* paranoia */ elog(ERROR, "null field found in pg_largeobject"); olddata = (Form_pg_largeobject) GETSTRUCT(oldtuple); Assert(olddata->pageno >= pageno); } /* * If we found the page of the truncation point we need to truncate the * data in it. Otherwise if we're in a hole, we need to create a page to * mark the end of data. */ if (olddata != NULL && olddata->pageno == pageno) { /* First, load old data into workbuf */ bytea *datafield = &(olddata->data); /* see note at top of * file */ bool pfreeit = false; int pagelen; if (VARATT_IS_EXTENDED(datafield)) { datafield = (bytea *) heap_tuple_untoast_attr((struct varlena *) datafield); pfreeit = true; } pagelen = getbytealen(datafield); Assert(pagelen <= LOBLKSIZE); memcpy(workb, VARDATA(datafield), pagelen); if (pfreeit) pfree(datafield); /* * Fill any hole */ off = len % LOBLKSIZE; if (off > pagelen) MemSet(workb + pagelen, 0, off - pagelen); /* compute length of new page */ SET_VARSIZE(&workbuf.hdr, off + VARHDRSZ); /* * Form and insert updated tuple */ memset(values, 0, sizeof(values)); memset(nulls, false, sizeof(nulls)); memset(replace, false, sizeof(replace)); values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf); replace[Anum_pg_largeobject_data - 1] = true; newtup = heap_modify_tuple(oldtuple, RelationGetDescr(lo_heap_r), values, nulls, replace); simple_heap_update(lo_heap_r, &newtup->t_self, newtup); CatalogIndexInsert(indstate, newtup); heap_freetuple(newtup); } else { /* * If the first page we found was after the truncation point, we're in * a hole that we'll fill, but we need to delete the later page * because the loop below won't visit it again. */ if (olddata != NULL) { Assert(olddata->pageno > pageno); simple_heap_delete(lo_heap_r, &oldtuple->t_self); } /* * Write a brand new page. * * Fill the hole up to the truncation point */ off = len % LOBLKSIZE; if (off > 0) MemSet(workb, 0, off); /* compute length of new page */ SET_VARSIZE(&workbuf.hdr, off + VARHDRSZ); /* * Form and insert new tuple */ memset(values, 0, sizeof(values)); memset(nulls, false, sizeof(nulls)); values[Anum_pg_largeobject_loid - 1] = ObjectIdGetDatum(obj_desc->id); values[Anum_pg_largeobject_pageno - 1] = Int32GetDatum(pageno); values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf); newtup = heap_form_tuple(lo_heap_r->rd_att, values, nulls); simple_heap_insert(lo_heap_r, newtup); CatalogIndexInsert(indstate, newtup); heap_freetuple(newtup); } /* * Delete any pages after the truncation point. If the initial search * didn't find a page, then of course there's nothing more to do. */ if (olddata != NULL) { while ((oldtuple = systable_getnext_ordered(sd, ForwardScanDirection)) != NULL) { simple_heap_delete(lo_heap_r, &oldtuple->t_self); } } systable_endscan_ordered(sd); CatalogCloseIndexes(indstate); /* * Advance command counter so that tuple updates will be seen by later * large-object operations in this transaction. */ CommandCounterIncrement(); }
/* ---------- * toast_fetch_datum - * * Reconstruct an in memory Datum from the chunks saved * in the toast relation * ---------- */ static struct varlena * toast_fetch_datum(struct varlena * attr) { Relation toastrel; Relation toastidx; ScanKeyData toastkey; SysScanDesc toastscan; HeapTuple ttup; TupleDesc toasttupDesc; struct varlena *result; struct varatt_external toast_pointer; int32 ressize; int32 residx, nextidx; int32 numchunks; Pointer chunk; bool isnull; char *chunkdata; int32 chunksize; /* Must copy to access aligned fields */ VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); ressize = toast_pointer.va_extsize; numchunks = ((ressize - 1) / TOAST_MAX_CHUNK_SIZE) + 1; result = (struct varlena *) palloc(ressize + VARHDRSZ); if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) SET_VARSIZE_COMPRESSED(result, ressize + VARHDRSZ); else SET_VARSIZE(result, ressize + VARHDRSZ); /* * Open the toast relation and its index */ toastrel = heap_open(toast_pointer.va_toastrelid, AccessShareLock); toasttupDesc = toastrel->rd_att; toastidx = index_open(toastrel->rd_rel->reltoastidxid, AccessShareLock); /* * Setup a scan key to fetch from the index by va_valueid */ ScanKeyInit(&toastkey, (AttrNumber) 1, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(toast_pointer.va_valueid)); /* * Read the chunks by index * * Note that because the index is actually on (valueid, chunkidx) we will * see the chunks in chunkidx order, even though we didn't explicitly ask * for it. */ nextidx = 0; toastscan = systable_beginscan_ordered(toastrel, toastidx, SnapshotToast, 1, &toastkey); while ((ttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL) { /* * Have a chunk, extract the sequence number and the data */ residx = DatumGetInt32(fastgetattr(ttup, 2, toasttupDesc, &isnull)); Assert(!isnull); chunk = DatumGetPointer(fastgetattr(ttup, 3, toasttupDesc, &isnull)); Assert(!isnull); if (!VARATT_IS_EXTENDED(chunk)) { chunksize = VARSIZE(chunk) - VARHDRSZ; chunkdata = VARDATA(chunk); } else if (VARATT_IS_SHORT(chunk)) { /* could happen due to heap_form_tuple doing its thing */ chunksize = VARSIZE_SHORT(chunk) - VARHDRSZ_SHORT; chunkdata = VARDATA_SHORT(chunk); } else { /* should never happen */ elog(ERROR, "found toasted toast chunk for toast value %u in %s", toast_pointer.va_valueid, RelationGetRelationName(toastrel)); chunksize = 0; /* keep compiler quiet */ chunkdata = NULL; } /* * Some checks on the data we've found */ if (residx != nextidx) elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u in %s", residx, nextidx, toast_pointer.va_valueid, RelationGetRelationName(toastrel)); if (residx < numchunks - 1) { if (chunksize != TOAST_MAX_CHUNK_SIZE) elog(ERROR, "unexpected chunk size %d (expected %d) in chunk %d of %d for toast value %u in %s", chunksize, (int) TOAST_MAX_CHUNK_SIZE, residx, numchunks, toast_pointer.va_valueid, RelationGetRelationName(toastrel)); } else if (residx == numchunks - 1) { if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != ressize) elog(ERROR, "unexpected chunk size %d (expected %d) in final chunk %d for toast value %u in %s", chunksize, (int) (ressize - residx * TOAST_MAX_CHUNK_SIZE), residx, toast_pointer.va_valueid, RelationGetRelationName(toastrel)); } else elog(ERROR, "unexpected chunk number %d (out of range %d..%d) for toast value %u in %s", residx, 0, numchunks - 1, toast_pointer.va_valueid, RelationGetRelationName(toastrel)); /* * Copy the data into proper place in our result */ memcpy(VARDATA(result) + residx * TOAST_MAX_CHUNK_SIZE, chunkdata, chunksize); nextidx++; } /* * Final checks that we successfully fetched the datum */ if (nextidx != numchunks) elog(ERROR, "missing chunk number %d for toast value %u in %s", nextidx, toast_pointer.va_valueid, RelationGetRelationName(toastrel)); /* * End scan and close relations */ systable_endscan_ordered(toastscan); index_close(toastidx, AccessShareLock); heap_close(toastrel, AccessShareLock); return result; }
/* ---------------- * index_form_tuple * * This shouldn't leak any memory; otherwise, callers such as * tuplesort_putindextuplevalues() will be very unhappy. * ---------------- */ IndexTuple index_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull) { char *tp; /* tuple pointer */ IndexTuple tuple; /* return tuple */ Size size, data_size, hoff; int i; unsigned short infomask = 0; bool hasnull = false; uint16 tupmask = 0; int numberOfAttributes = tupleDescriptor->natts; #ifdef TOAST_INDEX_HACK Datum untoasted_values[INDEX_MAX_KEYS]; bool untoasted_free[INDEX_MAX_KEYS]; #endif if (numberOfAttributes > INDEX_MAX_KEYS) ereport(ERROR, (errcode(ERRCODE_TOO_MANY_COLUMNS), errmsg("number of index columns (%d) exceeds limit (%d)", numberOfAttributes, INDEX_MAX_KEYS))); #ifdef TOAST_INDEX_HACK for (i = 0; i < numberOfAttributes; i++) { Form_pg_attribute att = tupleDescriptor->attrs[i]; untoasted_values[i] = values[i]; untoasted_free[i] = false; /* Do nothing if value is NULL or not of varlena type */ if (isnull[i] || att->attlen != -1) continue; /* * If value is stored EXTERNAL, must fetch it so we are not depending * on outside storage. This should be improved someday. */ if (VARATT_IS_EXTERNAL(DatumGetPointer(values[i]))) { untoasted_values[i] = PointerGetDatum(heap_tuple_fetch_attr((struct varlena *) DatumGetPointer(values[i]))); untoasted_free[i] = true; } /* * If value is above size target, and is of a compressible datatype, * try to compress it in-line. */ if (!VARATT_IS_EXTENDED(DatumGetPointer(untoasted_values[i])) && VARSIZE(DatumGetPointer(untoasted_values[i])) > TOAST_INDEX_TARGET && (att->attstorage == 'x' || att->attstorage == 'm')) { Datum cvalue = toast_compress_datum(untoasted_values[i]); if (DatumGetPointer(cvalue) != NULL) { /* successful compression */ if (untoasted_free[i]) pfree(DatumGetPointer(untoasted_values[i])); untoasted_values[i] = cvalue; untoasted_free[i] = true; } } } #endif for (i = 0; i < numberOfAttributes; i++) { if (isnull[i]) { hasnull = true; break; } } if (hasnull) infomask |= INDEX_NULL_MASK; hoff = IndexInfoFindDataOffset(infomask); #ifdef TOAST_INDEX_HACK data_size = heap_compute_data_size(tupleDescriptor, untoasted_values, isnull); #else data_size = heap_compute_data_size(tupleDescriptor, values, isnull); #endif size = hoff + data_size; size = MAXALIGN(size); /* be conservative */ tp = (char *) palloc0(size); tuple = (IndexTuple) tp; heap_fill_tuple(tupleDescriptor, #ifdef TOAST_INDEX_HACK untoasted_values, #else values, #endif isnull, (char *) tp + hoff, data_size, &tupmask, (hasnull ? (bits8 *) tp + sizeof(IndexTupleData) : NULL)); #ifdef TOAST_INDEX_HACK for (i = 0; i < numberOfAttributes; i++) { if (untoasted_free[i]) pfree(DatumGetPointer(untoasted_values[i])); } #endif /* * We do this because heap_fill_tuple wants to initialize a "tupmask" * which is used for HeapTuples, but we want an indextuple infomask. The * only relevant info is the "has variable attributes" field. We have * already set the hasnull bit above. */ if (tupmask & HEAP_HASVARWIDTH) infomask |= INDEX_VAR_MASK; /* Also assert we got rid of external attributes */ #ifdef TOAST_INDEX_HACK Assert((tupmask & HEAP_HASEXTERNAL) == 0); #endif /* * Here we make sure that the size will fit in the field reserved for it * in t_info. */ if ((size & INDEX_SIZE_MASK) != size) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("index row requires %zu bytes, maximum size is %zu", size, (Size) INDEX_SIZE_MASK))); infomask |= size; /* * initialize metadata */ tuple->t_info = infomask; return tuple; }
/* ---------- * toast_flatten_tuple_attribute - * * If a Datum is of composite type, "flatten" it to contain no toasted fields. * This must be invoked on any potentially-composite field that is to be * inserted into a tuple. Doing this preserves the invariant that toasting * goes only one level deep in a tuple. * ---------- */ Datum toast_flatten_tuple_attribute(Datum value, Oid typeId, int32 typeMod) { TupleDesc tupleDesc; HeapTupleHeader olddata; HeapTupleHeader new_data; int32 new_len; HeapTupleData tmptup; Form_pg_attribute *att; int numAttrs; int i; bool need_change = false; bool has_nulls = false; Datum toast_values[MaxTupleAttributeNumber]; bool toast_isnull[MaxTupleAttributeNumber]; bool toast_free[MaxTupleAttributeNumber]; /* * See if it's a composite type, and get the tupdesc if so. */ tupleDesc = lookup_rowtype_tupdesc_noerror(typeId, typeMod, true); if (tupleDesc == NULL) return value; /* not a composite type */ tupleDesc = CreateTupleDescCopy(tupleDesc); att = tupleDesc->attrs; numAttrs = tupleDesc->natts; /* * Break down the tuple into fields. */ olddata = DatumGetHeapTupleHeader(value); Assert(typeId == HeapTupleHeaderGetTypeId(olddata)); Assert(typeMod == HeapTupleHeaderGetTypMod(olddata)); /* Build a temporary HeapTuple control structure */ tmptup.t_len = HeapTupleHeaderGetDatumLength(olddata); ItemPointerSetInvalid(&(tmptup.t_self)); tmptup.t_tableOid = InvalidOid; tmptup.t_data = olddata; Assert(numAttrs <= MaxTupleAttributeNumber); heap_deform_tuple(&tmptup, tupleDesc, toast_values, toast_isnull); memset(toast_free, 0, numAttrs * sizeof(bool)); for (i = 0; i < numAttrs; i++) { /* * Look at non-null varlena attributes */ if (toast_isnull[i]) has_nulls = true; else if (att[i]->attlen == -1) { varattrib *new_value; new_value = (varattrib *) DatumGetPointer(toast_values[i]); if (VARATT_IS_EXTENDED(new_value)) { new_value = heap_tuple_untoast_attr(new_value); toast_values[i] = PointerGetDatum(new_value); toast_free[i] = true; need_change = true; } } } /* * If nothing to untoast, just return the original tuple. */ if (!need_change) { FreeTupleDesc(tupleDesc); return value; } /* * Calculate the new size of the tuple. Header size should not change, * but data size might. */ new_len = offsetof(HeapTupleHeaderData, t_bits); if (has_nulls) new_len += BITMAPLEN(numAttrs); if (olddata->t_infomask & HEAP_HASOID) new_len += sizeof(Oid); new_len = MAXALIGN(new_len); Assert(new_len == olddata->t_hoff); new_len += heap_compute_data_size(tupleDesc, toast_values, toast_isnull); new_data = (HeapTupleHeader) palloc0(new_len); /* * Put the tuple header and the changed values into place */ memcpy(new_data, olddata, olddata->t_hoff); HeapTupleHeaderSetDatumLength(new_data, new_len); heap_fill_tuple(tupleDesc, toast_values, toast_isnull, (char *) new_data + olddata->t_hoff, &(new_data->t_infomask), has_nulls ? new_data->t_bits : NULL); /* * Free allocated temp values */ for (i = 0; i < numAttrs; i++) if (toast_free[i]) pfree(DatumGetPointer(toast_values[i])); FreeTupleDesc(tupleDesc); return PointerGetDatum(new_data); }
int inv_write(LargeObjectDesc *obj_desc, char *buf, int nbytes) { int nwritten = 0; int n; int off; int len; int32 pageno = (int32) (obj_desc->offset / LOBLKSIZE); ScanKeyData skey[2]; IndexScanDesc sd; HeapTuple oldtuple; Form_pg_largeobject olddata; bool neednextpage; bytea *datafield; bool pfreeit; struct { bytea hdr; char data[LOBLKSIZE]; } workbuf; char *workb = VARATT_DATA(&workbuf.hdr); HeapTuple newtup; Datum values[Natts_pg_largeobject]; char nulls[Natts_pg_largeobject]; char replace[Natts_pg_largeobject]; CatalogIndexState indstate; Assert(PointerIsValid(obj_desc)); Assert(buf != NULL); if (nbytes <= 0) return 0; open_lo_relation(); indstate = CatalogOpenIndexes(lo_heap_r); ScanKeyInit(&skey[0], Anum_pg_largeobject_loid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(obj_desc->id)); ScanKeyInit(&skey[1], Anum_pg_largeobject_pageno, BTGreaterEqualStrategyNumber, F_INT4GE, Int32GetDatum(pageno)); sd = index_beginscan(lo_heap_r, lo_index_r, SnapshotNow, 2, skey); oldtuple = NULL; olddata = NULL; neednextpage = true; while (nwritten < nbytes) { /* * If possible, get next pre-existing page of the LO. We assume * the indexscan will deliver these in order --- but there may be * holes. */ if (neednextpage) { if ((oldtuple = index_getnext(sd, ForwardScanDirection)) != NULL) { olddata = (Form_pg_largeobject) GETSTRUCT(oldtuple); Assert(olddata->pageno >= pageno); } neednextpage = false; } /* * If we have a pre-existing page, see if it is the page we want * to write, or a later one. */ if (olddata != NULL && olddata->pageno == pageno) { /* * Update an existing page with fresh data. * * First, load old data into workbuf */ datafield = &(olddata->data); pfreeit = false; if (VARATT_IS_EXTENDED(datafield)) { datafield = (bytea *) heap_tuple_untoast_attr((varattrib *) datafield); pfreeit = true; } len = getbytealen(datafield); Assert(len <= LOBLKSIZE); memcpy(workb, VARDATA(datafield), len); if (pfreeit) pfree(datafield); /* * Fill any hole */ off = (int) (obj_desc->offset % LOBLKSIZE); if (off > len) MemSet(workb + len, 0, off - len); /* * Insert appropriate portion of new data */ n = LOBLKSIZE - off; n = (n <= (nbytes - nwritten)) ? n : (nbytes - nwritten); memcpy(workb + off, buf + nwritten, n); nwritten += n; obj_desc->offset += n; off += n; /* compute valid length of new page */ len = (len >= off) ? len : off; VARATT_SIZEP(&workbuf.hdr) = len + VARHDRSZ; /* * Form and insert updated tuple */ memset(values, 0, sizeof(values)); memset(nulls, ' ', sizeof(nulls)); memset(replace, ' ', sizeof(replace)); values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf); replace[Anum_pg_largeobject_data - 1] = 'r'; newtup = heap_modifytuple(oldtuple, lo_heap_r, values, nulls, replace); simple_heap_update(lo_heap_r, &newtup->t_self, newtup); CatalogIndexInsert(indstate, newtup); heap_freetuple(newtup); /* * We're done with this old page. */ oldtuple = NULL; olddata = NULL; neednextpage = true; } else { /* * Write a brand new page. * * First, fill any hole */ off = (int) (obj_desc->offset % LOBLKSIZE); if (off > 0) MemSet(workb, 0, off); /* * Insert appropriate portion of new data */ n = LOBLKSIZE - off; n = (n <= (nbytes - nwritten)) ? n : (nbytes - nwritten); memcpy(workb + off, buf + nwritten, n); nwritten += n; obj_desc->offset += n; /* compute valid length of new page */ len = off + n; VARATT_SIZEP(&workbuf.hdr) = len + VARHDRSZ; /* * Form and insert updated tuple */ memset(values, 0, sizeof(values)); memset(nulls, ' ', sizeof(nulls)); values[Anum_pg_largeobject_loid - 1] = ObjectIdGetDatum(obj_desc->id); values[Anum_pg_largeobject_pageno - 1] = Int32GetDatum(pageno); values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf); newtup = heap_formtuple(lo_heap_r->rd_att, values, nulls); simple_heap_insert(lo_heap_r, newtup); CatalogIndexInsert(indstate, newtup); heap_freetuple(newtup); } pageno++; } index_endscan(sd); CatalogCloseIndexes(indstate); /* * Advance command counter so that my tuple updates will be seen by * later large-object operations in this transaction. */ CommandCounterIncrement(); return nwritten; }
/* * heap_formtuple * * construct a tuple from the given values[] and nulls[] arrays * * Null attributes are indicated by a 'n' in the appropriate byte * of nulls[]. Non-null attributes are indicated by a ' ' (space). * * OLD API with char 'n'/' ' convention for indicating nulls. * This is deprecated and should not be used in new code, but we keep it * around for use by old add-on modules. */ HeapTuple heap_formtuple(TupleDesc tupleDescriptor, Datum *values, char *nulls) { HeapTuple tuple; /* return tuple */ HeapTupleHeader td; /* tuple data */ Size len, data_len; int hoff; bool hasnull = false; Form_pg_attribute *att = tupleDescriptor->attrs; int numberOfAttributes = tupleDescriptor->natts; int i; if (numberOfAttributes > MaxTupleAttributeNumber) ereport(ERROR, (errcode(ERRCODE_TOO_MANY_COLUMNS), errmsg("number of columns (%d) exceeds limit (%d)", numberOfAttributes, MaxTupleAttributeNumber))); /* * Check for nulls and embedded tuples; expand any toasted attributes in * embedded tuples. This preserves the invariant that toasting can only * go one level deep. * * We can skip calling toast_flatten_tuple_attribute() if the attribute * couldn't possibly be of composite type. All composite datums are * varlena and have alignment 'd'; furthermore they aren't arrays. Also, * if an attribute is already toasted, it must have been sent to disk * already and so cannot contain toasted attributes. */ for (i = 0; i < numberOfAttributes; i++) { if (nulls[i] != ' ') hasnull = true; else if (att[i]->attlen == -1 && att[i]->attalign == 'd' && att[i]->attndims == 0 && !VARATT_IS_EXTENDED(values[i])) { values[i] = toast_flatten_tuple_attribute(values[i], att[i]->atttypid, att[i]->atttypmod); } } /* * Determine total space needed */ len = offsetof(HeapTupleHeaderData, t_bits); if (hasnull) len += BITMAPLEN(numberOfAttributes); if (tupleDescriptor->tdhasoid) len += sizeof(Oid); hoff = len = MAXALIGN(len); /* align user data safely */ data_len = ComputeDataSize(tupleDescriptor, values, nulls); len += data_len; /* * Allocate and zero the space needed. Note that the tuple body and * HeapTupleData management structure are allocated in one chunk. */ tuple = (HeapTuple) palloc0(HEAPTUPLESIZE + len); tuple->t_data = td = (HeapTupleHeader) ((char *) tuple + HEAPTUPLESIZE); /* * And fill in the information. Note we fill the Datum fields even though * this tuple may never become a Datum. */ tuple->t_len = len; ItemPointerSetInvalid(&(tuple->t_self)); HeapTupleHeaderSetDatumLength(td, len); HeapTupleHeaderSetTypeId(td, tupleDescriptor->tdtypeid); HeapTupleHeaderSetTypMod(td, tupleDescriptor->tdtypmod); HeapTupleHeaderSetNatts(td, numberOfAttributes); td->t_hoff = hoff; if (tupleDescriptor->tdhasoid) /* else leave infomask = 0 */ td->t_infomask = HEAP_HASOID; heap_fill_tuple(tupleDescriptor, values, nulls, (char *) td + hoff, data_len, &td->t_infomask, (hasnull ? td->t_bits : NULL)); return tuple; }
/* * heap_form_minimal_tuple * construct a MinimalTuple from the given values[] and isnull[] arrays, * which are of the length indicated by tupleDescriptor->natts * * This is exactly like heap_form_tuple() except that the result is a * "minimal" tuple lacking a HeapTupleData header as well as room for system * columns. * * The result is allocated in the current memory context. */ MinimalTuple heap_form_minimal_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull) { MinimalTuple tuple; /* return tuple */ Size len, data_len; int hoff; bool hasnull = false; Form_pg_attribute *att = tupleDescriptor->attrs; int numberOfAttributes = tupleDescriptor->natts; int i; if (numberOfAttributes > MaxTupleAttributeNumber) ereport(ERROR, (errcode(ERRCODE_TOO_MANY_COLUMNS), errmsg("number of columns (%d) exceeds limit (%d)", numberOfAttributes, MaxTupleAttributeNumber))); /* * Check for nulls and embedded tuples; expand any toasted attributes in * embedded tuples. This preserves the invariant that toasting can only * go one level deep. * * We can skip calling toast_flatten_tuple_attribute() if the attribute * couldn't possibly be of composite type. All composite datums are * varlena and have alignment 'd'; furthermore they aren't arrays. Also, * if an attribute is already toasted, it must have been sent to disk * already and so cannot contain toasted attributes. */ for (i = 0; i < numberOfAttributes; i++) { if (isnull[i]) hasnull = true; else if (att[i]->attlen == -1 && att[i]->attalign == 'd' && att[i]->attndims == 0 && !VARATT_IS_EXTENDED(values[i])) { values[i] = toast_flatten_tuple_attribute(values[i], att[i]->atttypid, att[i]->atttypmod); } } /* * Determine total space needed */ len = offsetof(MinimalTupleData, t_bits); if (hasnull) len += BITMAPLEN(numberOfAttributes); if (tupleDescriptor->tdhasoid) len += sizeof(Oid); hoff = len = MAXALIGN(len); /* align user data safely */ data_len = heap_compute_data_size(tupleDescriptor, values, isnull); len += data_len; /* * Allocate and zero the space needed. */ tuple = (MinimalTuple) palloc0(len); /* * And fill in the information. */ tuple->t_len = len; HeapTupleHeaderSetNatts(tuple, numberOfAttributes); tuple->t_hoff = hoff + MINIMAL_TUPLE_OFFSET; if (tupleDescriptor->tdhasoid) /* else leave infomask = 0 */ tuple->t_infomask = HEAP_HASOID; heap_fill_tuple(tupleDescriptor, values, isnull, (char *) tuple + hoff, data_len, &tuple->t_infomask, (hasnull ? tuple->t_bits : NULL)); return tuple; }
/* * make_variant_int: Converts our external (Variant) representation to a VariantInt. */ static VariantInt make_variant_int(Variant v, FunctionCallInfo fcinfo, IOFuncSelector func) { VariantCache *cache; VariantInt vi; long data_length; /* long instead of size_t because we're subtracting */ Pointer ptr; uint flags; /* Ensure v is fully detoasted */ Assert(!VARATT_IS_EXTENDED(v)); /* May need to be careful about what context this stuff is palloc'd in */ vi = palloc0(sizeof(VariantDataInt)); vi->typid = get_oid(v, &flags); #ifdef VARIANT_TEST_OID vi->typid -= OID_MASK; #endif vi->typmod = v->typmod; vi->isnull = (flags & VAR_ISNULL ? true : false); cache = get_cache(fcinfo, vi, func); /* * by-value type. We do special things with all pass-by-reference when we * store, so we only use this for typbyval even though fetch_att supports * pass-by-reference. * * Note that fetch_att sanity-checks typlen for us (because we're only passing typbyval). */ if(cache->typbyval) { if(!vi->isnull) { Pointer p = VDATAPTR_ALIGN(v, cache->typalign); vi->data = fetch_att(p, cache->typbyval, cache->typlen); } return vi; } /* we don't store a varlena header for varlena data; instead we compute * it's size based on ours: * * Our size - our header size - overflow byte (if present) * * For cstring, we don't store the trailing NUL */ data_length = VARSIZE(v) - VHDRSZ - (flags & VAR_OVERFLOW ? 1 : 0); if( data_length < 0 ) elog(ERROR, "Negative data_length %li", data_length); if (cache->typlen == -1) /* varlena */ { ptr = palloc0(data_length + VARHDRSZ); SET_VARSIZE(ptr, data_length + VARHDRSZ); memcpy(VARDATA(ptr), VDATAPTR(v), data_length); } else if(cache->typlen == -2) /* cstring */ { ptr = palloc(data_length + 1); /* Need space for NUL terminator */ memcpy(ptr, VDATAPTR(v), data_length); *(ptr + data_length + 1) = '\0'; } else /* Fixed size, pass by reference */ { if(vi->isnull) { vi->data = (Datum) 0; return vi; } Assert(data_length == cache->typlen); ptr = palloc0(data_length); Assert(ptr == (char *) att_align_nominal(ptr, cache->typalign)); memcpy(ptr, VDATAPTR(v), data_length); } vi->data = PointerGetDatum(ptr); return vi; }
int64 datumstreamwrite_lob(DatumStreamWrite * acc, Datum d, AppendOnlyBlockDirectory *blockDirectory, int colGroupNo, bool addColAction) { uint8 *p; int32 varLen; Assert(acc); Assert(acc->datumStreamVersion == DatumStreamVersion_Original || acc->datumStreamVersion == DatumStreamVersion_Dense || acc->datumStreamVersion == DatumStreamVersion_Dense_Enhanced); if (acc->typeInfo.datumlen >= 0) { elog(ERROR, "Large object must be variable length objects (varlena)"); } /* * If the datum is toasted / compressed -- an error. */ if (VARATT_IS_EXTENDED(DatumGetPointer(d))) { elog(ERROR, "Expected large object / variable length objects (varlena) to be de-toasted and/or de-compressed at this point"); } /* * De-Toast Datum */ if (VARATT_IS_EXTERNAL(DatumGetPointer(d))) { d = PointerGetDatum(heap_tuple_fetch_attr(DatumGetPointer(d))); } p = (uint8 *) DatumGetPointer(d); varLen = VARSIZE_ANY(p); if (Debug_datumstream_write_print_large_varlena_info) { datumstreamwrite_print_large_varlena_info( acc, p); } /* Set the BlockFirstRowNum */ AppendOnlyStorageWrite_SetFirstRowNum(&acc->ao_write, acc->blockFirstRowNum); AppendOnlyStorageWrite_Content( &acc->ao_write, p, varLen, AOCSBK_BLOB, /* rowCount */ 1); /* Insert an entry to the block directory */ AppendOnlyBlockDirectory_InsertEntry( blockDirectory, colGroupNo, acc->blockFirstRowNum, AppendOnlyStorageWrite_LogicalBlockStartOffset(&acc->ao_write), 1, /*itemCount -- always just the lob just inserted */ addColAction); return varLen; }
/* ---------- * toast_insert_or_update - * * Delete no-longer-used toast-entries and create new ones to * make the new tuple fit on INSERT or UPDATE * * Inputs: * newtup: the candidate new tuple to be inserted * oldtup: the old row version for UPDATE, or NULL for INSERT * Result: * either newtup if no toasting is needed, or a palloc'd modified tuple * that is what should actually get stored * * NOTE: neither newtup nor oldtup will be modified. This is a change * from the pre-8.1 API of this routine. * ---------- */ HeapTuple toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup) { HeapTuple result_tuple; TupleDesc tupleDesc; Form_pg_attribute *att; int numAttrs; int i; bool need_change = false; bool need_free = false; bool need_delold = false; bool has_nulls = false; Size maxDataLen; char toast_action[MaxHeapAttributeNumber]; bool toast_isnull[MaxHeapAttributeNumber]; bool toast_oldisnull[MaxHeapAttributeNumber]; Datum toast_values[MaxHeapAttributeNumber]; Datum toast_oldvalues[MaxHeapAttributeNumber]; int32 toast_sizes[MaxHeapAttributeNumber]; bool toast_free[MaxHeapAttributeNumber]; bool toast_delold[MaxHeapAttributeNumber]; /* * Get the tuple descriptor and break down the tuple(s) into fields. */ tupleDesc = rel->rd_att; att = tupleDesc->attrs; numAttrs = tupleDesc->natts; Assert(numAttrs <= MaxHeapAttributeNumber); heap_deform_tuple(newtup, tupleDesc, toast_values, toast_isnull); if (oldtup != NULL) heap_deform_tuple(oldtup, tupleDesc, toast_oldvalues, toast_oldisnull); /* ---------- * Then collect information about the values given * * NOTE: toast_action[i] can have these values: * ' ' default handling * 'p' already processed --- don't touch it * 'x' incompressible, but OK to move off * * NOTE: toast_sizes[i] is only made valid for varlena attributes with * toast_action[i] different from 'p'. * ---------- */ memset(toast_action, ' ', numAttrs * sizeof(char)); memset(toast_free, 0, numAttrs * sizeof(bool)); memset(toast_delold, 0, numAttrs * sizeof(bool)); for (i = 0; i < numAttrs; i++) { varattrib *old_value; varattrib *new_value; if (oldtup != NULL) { /* * For UPDATE get the old and new values of this attribute */ old_value = (varattrib *) DatumGetPointer(toast_oldvalues[i]); new_value = (varattrib *) DatumGetPointer(toast_values[i]); /* * If the old value is an external stored one, check if it has * changed so we have to delete it later. */ if (att[i]->attlen == -1 && !toast_oldisnull[i] && VARATT_IS_EXTERNAL(old_value)) { if (toast_isnull[i] || !VARATT_IS_EXTERNAL(new_value) || old_value->va_content.va_external.va_valueid != new_value->va_content.va_external.va_valueid || old_value->va_content.va_external.va_toastrelid != new_value->va_content.va_external.va_toastrelid) { /* * The old external stored value isn't needed any more * after the update */ toast_delold[i] = true; need_delold = true; } else { /* * This attribute isn't changed by this update so we reuse * the original reference to the old value in the new * tuple. */ toast_action[i] = 'p'; toast_sizes[i] = VARATT_SIZE(toast_values[i]); continue; } } } else { /* * For INSERT simply get the new value */ new_value = (varattrib *) DatumGetPointer(toast_values[i]); } /* * Handle NULL attributes */ if (toast_isnull[i]) { toast_action[i] = 'p'; has_nulls = true; continue; } /* * Now look at varlena attributes */ if (att[i]->attlen == -1) { /* * If the table's attribute says PLAIN always, force it so. */ if (att[i]->attstorage == 'p') toast_action[i] = 'p'; /* * We took care of UPDATE above, so any external value we find * still in the tuple must be someone else's we cannot reuse. * Expand it to plain (and, probably, toast it again below). */ if (VARATT_IS_EXTERNAL(new_value)) { new_value = heap_tuple_untoast_attr(new_value); toast_values[i] = PointerGetDatum(new_value); toast_free[i] = true; need_change = true; need_free = true; } /* * Remember the size of this attribute */ toast_sizes[i] = VARATT_SIZE(new_value); } else { /* * Not a varlena attribute, plain storage always */ toast_action[i] = 'p'; } } /* ---------- * Compress and/or save external until data fits into target length * * 1: Inline compress attributes with attstorage 'x' * 2: Store attributes with attstorage 'x' or 'e' external * 3: Inline compress attributes with attstorage 'm' * 4: Store attributes with attstorage 'm' external * ---------- */ /* compute header overhead --- this should match heap_form_tuple() */ maxDataLen = offsetof(HeapTupleHeaderData, t_bits); if (has_nulls) maxDataLen += BITMAPLEN(numAttrs); if (newtup->t_data->t_infomask & HEAP_HASOID) maxDataLen += sizeof(Oid); maxDataLen = MAXALIGN(maxDataLen); Assert(maxDataLen == newtup->t_data->t_hoff); /* now convert to a limit on the tuple data size */ maxDataLen = TOAST_TUPLE_TARGET - maxDataLen; /* * Look for attributes with attstorage 'x' to compress */ while (heap_compute_data_size(tupleDesc, toast_values, toast_isnull) > maxDataLen) { int biggest_attno = -1; int32 biggest_size = MAXALIGN(sizeof(varattrib)); Datum old_value; Datum new_value; /* * Search for the biggest yet uncompressed internal attribute */ for (i = 0; i < numAttrs; i++) { if (toast_action[i] != ' ') continue; if (VARATT_IS_EXTENDED(toast_values[i])) continue; if (att[i]->attstorage != 'x') continue; if (toast_sizes[i] > biggest_size) { biggest_attno = i; biggest_size = toast_sizes[i]; } } if (biggest_attno < 0) break; /* * Attempt to compress it inline */ i = biggest_attno; old_value = toast_values[i]; new_value = toast_compress_datum(old_value); if (DatumGetPointer(new_value) != NULL) { /* successful compression */ if (toast_free[i]) pfree(DatumGetPointer(old_value)); toast_values[i] = new_value; toast_free[i] = true; toast_sizes[i] = VARATT_SIZE(toast_values[i]); need_change = true; need_free = true; } else { /* * incompressible data, ignore on subsequent compression passes */ toast_action[i] = 'x'; } } /* * Second we look for attributes of attstorage 'x' or 'e' that are still * inline. */ while (heap_compute_data_size(tupleDesc, toast_values, toast_isnull) > maxDataLen && rel->rd_rel->reltoastrelid != InvalidOid) { int biggest_attno = -1; int32 biggest_size = MAXALIGN(sizeof(varattrib)); Datum old_value; /*------ * Search for the biggest yet inlined attribute with * attstorage equals 'x' or 'e' *------ */ for (i = 0; i < numAttrs; i++) { if (toast_action[i] == 'p') continue; if (VARATT_IS_EXTERNAL(toast_values[i])) continue; if (att[i]->attstorage != 'x' && att[i]->attstorage != 'e') continue; if (toast_sizes[i] > biggest_size) { biggest_attno = i; biggest_size = toast_sizes[i]; } } if (biggest_attno < 0) break; /* * Store this external */ i = biggest_attno; old_value = toast_values[i]; toast_action[i] = 'p'; toast_values[i] = toast_save_datum(rel, toast_values[i]); if (toast_free[i]) pfree(DatumGetPointer(old_value)); toast_free[i] = true; toast_sizes[i] = VARATT_SIZE(toast_values[i]); need_change = true; need_free = true; } /* * Round 3 - this time we take attributes with storage 'm' into * compression */ while (heap_compute_data_size(tupleDesc, toast_values, toast_isnull) > maxDataLen) { int biggest_attno = -1; int32 biggest_size = MAXALIGN(sizeof(varattrib)); Datum old_value; Datum new_value; /* * Search for the biggest yet uncompressed internal attribute */ for (i = 0; i < numAttrs; i++) { if (toast_action[i] != ' ') continue; if (VARATT_IS_EXTENDED(toast_values[i])) continue; if (att[i]->attstorage != 'm') continue; if (toast_sizes[i] > biggest_size) { biggest_attno = i; biggest_size = toast_sizes[i]; } } if (biggest_attno < 0) break; /* * Attempt to compress it inline */ i = biggest_attno; old_value = toast_values[i]; new_value = toast_compress_datum(old_value); if (DatumGetPointer(new_value) != NULL) { /* successful compression */ if (toast_free[i]) pfree(DatumGetPointer(old_value)); toast_values[i] = new_value; toast_free[i] = true; toast_sizes[i] = VARATT_SIZE(toast_values[i]); need_change = true; need_free = true; } else { /* * incompressible data, ignore on subsequent compression passes */ toast_action[i] = 'x'; } } /* * Finally we store attributes of type 'm' external */ while (heap_compute_data_size(tupleDesc, toast_values, toast_isnull) > maxDataLen && rel->rd_rel->reltoastrelid != InvalidOid) { int biggest_attno = -1; int32 biggest_size = MAXALIGN(sizeof(varattrib)); Datum old_value; /*-------- * Search for the biggest yet inlined attribute with * attstorage = 'm' *-------- */ for (i = 0; i < numAttrs; i++) { if (toast_action[i] == 'p') continue; if (VARATT_IS_EXTERNAL(toast_values[i])) continue; if (att[i]->attstorage != 'm') continue; if (toast_sizes[i] > biggest_size) { biggest_attno = i; biggest_size = toast_sizes[i]; } } if (biggest_attno < 0) break; /* * Store this external */ i = biggest_attno; old_value = toast_values[i]; toast_action[i] = 'p'; toast_values[i] = toast_save_datum(rel, toast_values[i]); if (toast_free[i]) pfree(DatumGetPointer(old_value)); toast_free[i] = true; toast_sizes[i] = VARATT_SIZE(toast_values[i]); need_change = true; need_free = true; } /* * In the case we toasted any values, we need to build a new heap tuple * with the changed values. */ if (need_change) { HeapTupleHeader olddata = newtup->t_data; HeapTupleHeader new_data; int32 new_len; /* * Calculate the new size of the tuple. Header size should not * change, but data size might. */ new_len = offsetof(HeapTupleHeaderData, t_bits); if (has_nulls) new_len += BITMAPLEN(numAttrs); if (olddata->t_infomask & HEAP_HASOID) new_len += sizeof(Oid); new_len = MAXALIGN(new_len); Assert(new_len == olddata->t_hoff); new_len += heap_compute_data_size(tupleDesc, toast_values, toast_isnull); /* * Allocate and zero the space needed, and fill HeapTupleData fields. */ result_tuple = (HeapTuple) palloc0(HEAPTUPLESIZE + new_len); result_tuple->t_len = new_len; result_tuple->t_self = newtup->t_self; result_tuple->t_tableOid = newtup->t_tableOid; new_data = (HeapTupleHeader) ((char *) result_tuple + HEAPTUPLESIZE); result_tuple->t_data = new_data; /* * Put the existing tuple header and the changed values into place */ memcpy(new_data, olddata, olddata->t_hoff); heap_fill_tuple(tupleDesc, toast_values, toast_isnull, (char *) new_data + olddata->t_hoff, &(new_data->t_infomask), has_nulls ? new_data->t_bits : NULL); } else result_tuple = newtup; /* * Free allocated temp values */ if (need_free) for (i = 0; i < numAttrs; i++) if (toast_free[i]) pfree(DatumGetPointer(toast_values[i])); /* * Delete external values from the old tuple */ if (need_delold) for (i = 0; i < numAttrs; i++) if (toast_delold[i]) toast_delete_datum(rel, toast_oldvalues[i]); return result_tuple; }
/* ---------- * toast_insert_or_update - * * Delete no-longer-used toast-entries and create new ones to * make the new tuple fit on INSERT or UPDATE * ---------- */ static void toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup) { TupleDesc tupleDesc; Form_pg_attribute *att; int numAttrs; int i; bool old_isnull; bool new_isnull; bool need_change = false; bool need_free = false; bool need_delold = false; bool has_nulls = false; Size maxDataLen; char toast_action[MaxHeapAttributeNumber]; char toast_nulls[MaxHeapAttributeNumber]; Datum toast_values[MaxHeapAttributeNumber]; int32 toast_sizes[MaxHeapAttributeNumber]; bool toast_free[MaxHeapAttributeNumber]; bool toast_delold[MaxHeapAttributeNumber]; /* * Get the tuple descriptor, the number of and attribute descriptors * and the location of the tuple values. */ tupleDesc = rel->rd_att; numAttrs = tupleDesc->natts; att = tupleDesc->attrs; /* ---------- * Then collect information about the values given * * NOTE: toast_action[i] can have these values: * ' ' default handling * 'p' already processed --- don't touch it * 'x' incompressible, but OK to move off * ---------- */ memset(toast_action, ' ', numAttrs * sizeof(char)); memset(toast_nulls, ' ', numAttrs * sizeof(char)); memset(toast_free, 0, numAttrs * sizeof(bool)); memset(toast_delold, 0, numAttrs * sizeof(bool)); for (i = 0; i < numAttrs; i++) { varattrib *old_value; varattrib *new_value; if (oldtup != NULL) { /* * For UPDATE get the old and new values of this attribute */ old_value = (varattrib *) DatumGetPointer( heap_getattr(oldtup, i + 1, tupleDesc, &old_isnull)); toast_values[i] = heap_getattr(newtup, i + 1, tupleDesc, &new_isnull); new_value = (varattrib *) DatumGetPointer(toast_values[i]); /* * If the old value is an external stored one, check if it has * changed so we have to delete it later. */ if (!old_isnull && att[i]->attlen == -1 && VARATT_IS_EXTERNAL(old_value)) { if (new_isnull || !VARATT_IS_EXTERNAL(new_value) || old_value->va_content.va_external.va_valueid != new_value->va_content.va_external.va_valueid || old_value->va_content.va_external.va_toastrelid != new_value->va_content.va_external.va_toastrelid) { /* * The old external store value isn't needed any more * after the update */ toast_delold[i] = true; need_delold = true; } else { /* * This attribute isn't changed by this update so we * reuse the original reference to the old value in * the new tuple. */ toast_action[i] = 'p'; toast_sizes[i] = VARATT_SIZE(toast_values[i]); continue; } } } else { /* * For INSERT simply get the new value */ toast_values[i] = heap_getattr(newtup, i + 1, tupleDesc, &new_isnull); } /* * Handle NULL attributes */ if (new_isnull) { toast_action[i] = 'p'; toast_nulls[i] = 'n'; has_nulls = true; continue; } /* * Now look at varsize attributes */ if (att[i]->attlen == -1) { /* * If the table's attribute says PLAIN always, force it so. */ if (att[i]->attstorage == 'p') toast_action[i] = 'p'; /* * We took care of UPDATE above, so any external value we find * still in the tuple must be someone else's we cannot reuse. * Expand it to plain (and, probably, toast it again below). */ if (VARATT_IS_EXTERNAL(DatumGetPointer(toast_values[i]))) { toast_values[i] = PointerGetDatum(heap_tuple_untoast_attr( (varattrib *) DatumGetPointer(toast_values[i]))); toast_free[i] = true; need_change = true; need_free = true; } /* * Remember the size of this attribute */ toast_sizes[i] = VARATT_SIZE(DatumGetPointer(toast_values[i])); } else { /* * Not a variable size attribute, plain storage always */ toast_action[i] = 'p'; toast_sizes[i] = att[i]->attlen; } } /* ---------- * Compress and/or save external until data fits into target length * * 1: Inline compress attributes with attstorage 'x' * 2: Store attributes with attstorage 'x' or 'e' external * 3: Inline compress attributes with attstorage 'm' * 4: Store attributes with attstorage 'm' external * ---------- */ maxDataLen = offsetof(HeapTupleHeaderData, t_bits); if (has_nulls) maxDataLen += BITMAPLEN(numAttrs); maxDataLen = TOAST_TUPLE_TARGET - MAXALIGN(maxDataLen); /* * Look for attributes with attstorage 'x' to compress */ while (MAXALIGN(ComputeDataSize(tupleDesc, toast_values, toast_nulls)) > maxDataLen) { int biggest_attno = -1; int32 biggest_size = MAXALIGN(sizeof(varattrib)); Datum old_value; Datum new_value; /* * Search for the biggest yet uncompressed internal attribute */ for (i = 0; i < numAttrs; i++) { if (toast_action[i] != ' ') continue; if (VARATT_IS_EXTENDED(toast_values[i])) continue; if (att[i]->attstorage != 'x') continue; if (toast_sizes[i] > biggest_size) { biggest_attno = i; biggest_size = toast_sizes[i]; } } if (biggest_attno < 0) break; /* * Attempt to compress it inline */ i = biggest_attno; old_value = toast_values[i]; new_value = toast_compress_datum(old_value); if (DatumGetPointer(new_value) != NULL) { /* successful compression */ if (toast_free[i]) pfree(DatumGetPointer(old_value)); toast_values[i] = new_value; toast_free[i] = true; toast_sizes[i] = VARATT_SIZE(toast_values[i]); need_change = true; need_free = true; } else { /* * incompressible data, ignore on subsequent compression * passes */ toast_action[i] = 'x'; } } /* * Second we look for attributes of attstorage 'x' or 'e' that are * still inline. */ while (MAXALIGN(ComputeDataSize(tupleDesc, toast_values, toast_nulls)) > maxDataLen && rel->rd_rel->reltoastrelid != InvalidOid) { int biggest_attno = -1; int32 biggest_size = MAXALIGN(sizeof(varattrib)); Datum old_value; /*------ * Search for the biggest yet inlined attribute with * attstorage equals 'x' or 'e' *------ */ for (i = 0; i < numAttrs; i++) { if (toast_action[i] == 'p') continue; if (VARATT_IS_EXTERNAL(toast_values[i])) continue; if (att[i]->attstorage != 'x' && att[i]->attstorage != 'e') continue; if (toast_sizes[i] > biggest_size) { biggest_attno = i; biggest_size = toast_sizes[i]; } } if (biggest_attno < 0) break; /* * Store this external */ i = biggest_attno; old_value = toast_values[i]; toast_action[i] = 'p'; toast_values[i] = toast_save_datum(rel, toast_values[i]); if (toast_free[i]) pfree(DatumGetPointer(old_value)); toast_free[i] = true; toast_sizes[i] = VARATT_SIZE(toast_values[i]); need_change = true; need_free = true; } /* * Round 3 - this time we take attributes with storage 'm' into * compression */ while (MAXALIGN(ComputeDataSize(tupleDesc, toast_values, toast_nulls)) > maxDataLen) { int biggest_attno = -1; int32 biggest_size = MAXALIGN(sizeof(varattrib)); Datum old_value; Datum new_value; /* * Search for the biggest yet uncompressed internal attribute */ for (i = 0; i < numAttrs; i++) { if (toast_action[i] != ' ') continue; if (VARATT_IS_EXTENDED(toast_values[i])) continue; if (att[i]->attstorage != 'm') continue; if (toast_sizes[i] > biggest_size) { biggest_attno = i; biggest_size = toast_sizes[i]; } } if (biggest_attno < 0) break; /* * Attempt to compress it inline */ i = biggest_attno; old_value = toast_values[i]; new_value = toast_compress_datum(old_value); if (DatumGetPointer(new_value) != NULL) { /* successful compression */ if (toast_free[i]) pfree(DatumGetPointer(old_value)); toast_values[i] = new_value; toast_free[i] = true; toast_sizes[i] = VARATT_SIZE(toast_values[i]); need_change = true; need_free = true; } else { /* * incompressible data, ignore on subsequent compression * passes */ toast_action[i] = 'x'; } } /* * Finally we store attributes of type 'm' external */ while (MAXALIGN(ComputeDataSize(tupleDesc, toast_values, toast_nulls)) > maxDataLen && rel->rd_rel->reltoastrelid != InvalidOid) { int biggest_attno = -1; int32 biggest_size = MAXALIGN(sizeof(varattrib)); Datum old_value; /*-------- * Search for the biggest yet inlined attribute with * attstorage = 'm' *-------- */ for (i = 0; i < numAttrs; i++) { if (toast_action[i] == 'p') continue; if (VARATT_IS_EXTERNAL(toast_values[i])) continue; if (att[i]->attstorage != 'm') continue; if (toast_sizes[i] > biggest_size) { biggest_attno = i; biggest_size = toast_sizes[i]; } } if (biggest_attno < 0) break; /* * Store this external */ i = biggest_attno; old_value = toast_values[i]; toast_action[i] = 'p'; toast_values[i] = toast_save_datum(rel, toast_values[i]); if (toast_free[i]) pfree(DatumGetPointer(old_value)); toast_free[i] = true; toast_sizes[i] = VARATT_SIZE(toast_values[i]); need_change = true; need_free = true; } /* * In the case we toasted any values, we need to build a new heap * tuple with the changed values. */ if (need_change) { HeapTupleHeader olddata = newtup->t_data; char *new_data; int32 new_len; /* * Calculate the new size of the tuple. Header size should not * change, but data size might. */ new_len = offsetof(HeapTupleHeaderData, t_bits); if (has_nulls) new_len += BITMAPLEN(numAttrs); if (olddata->t_infomask & HEAP_HASOID) new_len += sizeof(Oid); new_len = MAXALIGN(new_len); Assert(new_len == olddata->t_hoff); new_len += ComputeDataSize(tupleDesc, toast_values, toast_nulls); /* * Allocate new tuple in same context as old one. */ new_data = (char *) MemoryContextAlloc(newtup->t_datamcxt, new_len); newtup->t_data = (HeapTupleHeader) new_data; newtup->t_len = new_len; /* * Put the tuple header and the changed values into place */ memcpy(new_data, olddata, olddata->t_hoff); DataFill((char *) new_data + olddata->t_hoff, tupleDesc, toast_values, toast_nulls, &(newtup->t_data->t_infomask), has_nulls ? newtup->t_data->t_bits : NULL); /* * In the case we modified a previously modified tuple again, free * the memory from the previous run */ if ((char *) olddata != ((char *) newtup + HEAPTUPLESIZE)) pfree(olddata); } /* * Free allocated temp values */ if (need_free) for (i = 0; i < numAttrs; i++) if (toast_free[i]) pfree(DatumGetPointer(toast_values[i])); /* * Delete external values from the old tuple */ if (need_delold) for (i = 0; i < numAttrs; i++) if (toast_delold[i]) toast_delete_datum(rel, heap_getattr(oldtup, i + 1, tupleDesc, &old_isnull)); }
int inv_read(LargeObjectDesc *obj_desc, char *buf, int nbytes) { int nread = 0; int n; int off; int len; int32 pageno = (int32) (obj_desc->offset / LOBLKSIZE); uint32 pageoff; ScanKeyData skey[2]; SysScanDesc sd; HeapTuple tuple; Assert(PointerIsValid(obj_desc)); Assert(buf != NULL); if (nbytes <= 0) return 0; open_lo_relation(); ScanKeyInit(&skey[0], Anum_pg_largeobject_loid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(obj_desc->id)); ScanKeyInit(&skey[1], Anum_pg_largeobject_pageno, BTGreaterEqualStrategyNumber, F_INT4GE, Int32GetDatum(pageno)); sd = systable_beginscan_ordered(lo_heap_r, lo_index_r, obj_desc->snapshot, 2, skey); while ((tuple = systable_getnext_ordered(sd, ForwardScanDirection)) != NULL) { Form_pg_largeobject data; bytea *datafield; bool pfreeit; if (HeapTupleHasNulls(tuple)) /* paranoia */ elog(ERROR, "null field found in pg_largeobject"); data = (Form_pg_largeobject) GETSTRUCT(tuple); /* * We expect the indexscan will deliver pages in order. However, * there may be missing pages if the LO contains unwritten "holes". We * want missing sections to read out as zeroes. */ pageoff = ((uint32) data->pageno) * LOBLKSIZE; if (pageoff > obj_desc->offset) { n = pageoff - obj_desc->offset; n = (n <= (nbytes - nread)) ? n : (nbytes - nread); MemSet(buf + nread, 0, n); nread += n; obj_desc->offset += n; } if (nread < nbytes) { Assert(obj_desc->offset >= pageoff); off = (int) (obj_desc->offset - pageoff); Assert(off >= 0 && off < LOBLKSIZE); datafield = &(data->data); /* see note at top of file */ pfreeit = false; if (VARATT_IS_EXTENDED(datafield)) { datafield = (bytea *) heap_tuple_untoast_attr((struct varlena *) datafield); pfreeit = true; } len = getbytealen(datafield); if (len > off) { n = len - off; n = (n <= (nbytes - nread)) ? n : (nbytes - nread); memcpy(buf + nread, VARDATA(datafield) + off, n); nread += n; obj_desc->offset += n; } if (pfreeit) pfree(datafield); } if (nread >= nbytes) break; } systable_endscan_ordered(sd); return nread; }
/* ---------- * toast_fetch_datum_slice - * * Reconstruct a segment of a Datum from the chunks saved * in the toast relation * ---------- */ static struct varlena * toast_fetch_datum_slice(struct varlena * attr, int32 sliceoffset, int32 length) { Relation toastrel; Relation toastidx; ScanKeyData toastkey[3]; int nscankeys; SysScanDesc toastscan; HeapTuple ttup; TupleDesc toasttupDesc; struct varlena *result; struct varatt_external toast_pointer; int32 attrsize; int32 residx; int32 nextidx; int numchunks; int startchunk; int endchunk; int32 startoffset; int32 endoffset; int totalchunks; Pointer chunk; bool isnull; char *chunkdata; int32 chunksize; int32 chcpystrt; int32 chcpyend; Assert(VARATT_IS_EXTERNAL(attr)); /* Must copy to access aligned fields */ VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); /* * It's nonsense to fetch slices of a compressed datum -- this isn't lo_* * we can't return a compressed datum which is meaningful to toast later */ Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)); attrsize = toast_pointer.va_extsize; totalchunks = ((attrsize - 1) / TOAST_MAX_CHUNK_SIZE) + 1; if (sliceoffset >= attrsize) { sliceoffset = 0; length = 0; } if (((sliceoffset + length) > attrsize) || length < 0) length = attrsize - sliceoffset; result = (struct varlena *) palloc(length + VARHDRSZ); if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) SET_VARSIZE_COMPRESSED(result, length + VARHDRSZ); else SET_VARSIZE(result, length + VARHDRSZ); if (length == 0) return result; /* Can save a lot of work at this point! */ startchunk = sliceoffset / TOAST_MAX_CHUNK_SIZE; endchunk = (sliceoffset + length - 1) / TOAST_MAX_CHUNK_SIZE; numchunks = (endchunk - startchunk) + 1; startoffset = sliceoffset % TOAST_MAX_CHUNK_SIZE; endoffset = (sliceoffset + length - 1) % TOAST_MAX_CHUNK_SIZE; /* * Open the toast relation and its index */ toastrel = heap_open(toast_pointer.va_toastrelid, AccessShareLock); toasttupDesc = toastrel->rd_att; toastidx = index_open(toastrel->rd_rel->reltoastidxid, AccessShareLock); /* * Setup a scan key to fetch from the index. This is either two keys or * three depending on the number of chunks. */ ScanKeyInit(&toastkey[0], (AttrNumber) 1, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(toast_pointer.va_valueid)); /* * Use equality condition for one chunk, a range condition otherwise: */ if (numchunks == 1) { ScanKeyInit(&toastkey[1], (AttrNumber) 2, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(startchunk)); nscankeys = 2; } else { ScanKeyInit(&toastkey[1], (AttrNumber) 2, BTGreaterEqualStrategyNumber, F_INT4GE, Int32GetDatum(startchunk)); ScanKeyInit(&toastkey[2], (AttrNumber) 2, BTLessEqualStrategyNumber, F_INT4LE, Int32GetDatum(endchunk)); nscankeys = 3; } /* * Read the chunks by index * * The index is on (valueid, chunkidx) so they will come in order */ nextidx = startchunk; toastscan = systable_beginscan_ordered(toastrel, toastidx, SnapshotToast, nscankeys, toastkey); while ((ttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL) { /* * Have a chunk, extract the sequence number and the data */ residx = DatumGetInt32(fastgetattr(ttup, 2, toasttupDesc, &isnull)); Assert(!isnull); chunk = DatumGetPointer(fastgetattr(ttup, 3, toasttupDesc, &isnull)); Assert(!isnull); if (!VARATT_IS_EXTENDED(chunk)) { chunksize = VARSIZE(chunk) - VARHDRSZ; chunkdata = VARDATA(chunk); } else if (VARATT_IS_SHORT(chunk)) { /* could happen due to heap_form_tuple doing its thing */ chunksize = VARSIZE_SHORT(chunk) - VARHDRSZ_SHORT; chunkdata = VARDATA_SHORT(chunk); } else { /* should never happen */ elog(ERROR, "found toasted toast chunk for toast value %u in %s", toast_pointer.va_valueid, RelationGetRelationName(toastrel)); chunksize = 0; /* keep compiler quiet */ chunkdata = NULL; } /* * Some checks on the data we've found */ if ((residx != nextidx) || (residx > endchunk) || (residx < startchunk)) elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u in %s", residx, nextidx, toast_pointer.va_valueid, RelationGetRelationName(toastrel)); if (residx < totalchunks - 1) { if (chunksize != TOAST_MAX_CHUNK_SIZE) elog(ERROR, "unexpected chunk size %d (expected %d) in chunk %d of %d for toast value %u in %s when fetching slice", chunksize, (int) TOAST_MAX_CHUNK_SIZE, residx, totalchunks, toast_pointer.va_valueid, RelationGetRelationName(toastrel)); } else if (residx == totalchunks - 1) { if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != attrsize) elog(ERROR, "unexpected chunk size %d (expected %d) in final chunk %d for toast value %u in %s when fetching slice", chunksize, (int) (attrsize - residx * TOAST_MAX_CHUNK_SIZE), residx, toast_pointer.va_valueid, RelationGetRelationName(toastrel)); } else elog(ERROR, "unexpected chunk number %d (out of range %d..%d) for toast value %u in %s", residx, 0, totalchunks - 1, toast_pointer.va_valueid, RelationGetRelationName(toastrel)); /* * Copy the data into proper place in our result */ chcpystrt = 0; chcpyend = chunksize - 1; if (residx == startchunk) chcpystrt = startoffset; if (residx == endchunk) chcpyend = endoffset; memcpy(VARDATA(result) + (residx * TOAST_MAX_CHUNK_SIZE - sliceoffset) + chcpystrt, chunkdata + chcpystrt, (chcpyend - chcpystrt) + 1); nextidx++; } /* * Final checks that we successfully fetched the datum */ if (nextidx != (endchunk + 1)) elog(ERROR, "missing chunk number %d for toast value %u in %s", nextidx, toast_pointer.va_valueid, RelationGetRelationName(toastrel)); /* * End scan and close relations */ systable_endscan_ordered(toastscan); index_close(toastidx, AccessShareLock); heap_close(toastrel, AccessShareLock); return result; }
int inv_write(LargeObjectDesc *obj_desc, const char *buf, int nbytes) { int nwritten = 0; int n; int off; int len; int32 pageno = (int32) (obj_desc->offset / LOBLKSIZE); ScanKeyData skey[2]; SysScanDesc sd; HeapTuple oldtuple; Form_pg_largeobject olddata; bool neednextpage; bytea *datafield; bool pfreeit; struct { bytea hdr; char data[LOBLKSIZE]; /* make struct big enough */ int32 align_it; /* ensure struct is aligned well enough */ } workbuf; char *workb = VARDATA(&workbuf.hdr); HeapTuple newtup; Datum values[Natts_pg_largeobject]; bool nulls[Natts_pg_largeobject]; bool replace[Natts_pg_largeobject]; CatalogIndexState indstate; Assert(PointerIsValid(obj_desc)); Assert(buf != NULL); /* enforce writability because snapshot is probably wrong otherwise */ if ((obj_desc->flags & IFS_WRLOCK) == 0) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("large object %u was not opened for writing", obj_desc->id))); /* check existence of the target largeobject */ if (!LargeObjectExists(obj_desc->id)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("large object %u was already dropped", obj_desc->id))); if (nbytes <= 0) return 0; open_lo_relation(); indstate = CatalogOpenIndexes(lo_heap_r); ScanKeyInit(&skey[0], Anum_pg_largeobject_loid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(obj_desc->id)); ScanKeyInit(&skey[1], Anum_pg_largeobject_pageno, BTGreaterEqualStrategyNumber, F_INT4GE, Int32GetDatum(pageno)); sd = systable_beginscan_ordered(lo_heap_r, lo_index_r, obj_desc->snapshot, 2, skey); oldtuple = NULL; olddata = NULL; neednextpage = true; while (nwritten < nbytes) { /* * If possible, get next pre-existing page of the LO. We expect the * indexscan will deliver these in order --- but there may be holes. */ if (neednextpage) { if ((oldtuple = systable_getnext_ordered(sd, ForwardScanDirection)) != NULL) { if (HeapTupleHasNulls(oldtuple)) /* paranoia */ elog(ERROR, "null field found in pg_largeobject"); olddata = (Form_pg_largeobject) GETSTRUCT(oldtuple); Assert(olddata->pageno >= pageno); } neednextpage = false; } /* * If we have a pre-existing page, see if it is the page we want to * write, or a later one. */ if (olddata != NULL && olddata->pageno == pageno) { /* * Update an existing page with fresh data. * * First, load old data into workbuf */ datafield = &(olddata->data); /* see note at top of file */ pfreeit = false; if (VARATT_IS_EXTENDED(datafield)) { datafield = (bytea *) heap_tuple_untoast_attr((struct varlena *) datafield); pfreeit = true; } len = getbytealen(datafield); Assert(len <= LOBLKSIZE); memcpy(workb, VARDATA(datafield), len); if (pfreeit) pfree(datafield); /* * Fill any hole */ off = (int) (obj_desc->offset % LOBLKSIZE); if (off > len) MemSet(workb + len, 0, off - len); /* * Insert appropriate portion of new data */ n = LOBLKSIZE - off; n = (n <= (nbytes - nwritten)) ? n : (nbytes - nwritten); memcpy(workb + off, buf + nwritten, n); nwritten += n; obj_desc->offset += n; off += n; /* compute valid length of new page */ len = (len >= off) ? len : off; SET_VARSIZE(&workbuf.hdr, len + VARHDRSZ); /* * Form and insert updated tuple */ memset(values, 0, sizeof(values)); memset(nulls, false, sizeof(nulls)); memset(replace, false, sizeof(replace)); values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf); replace[Anum_pg_largeobject_data - 1] = true; newtup = heap_modify_tuple(oldtuple, RelationGetDescr(lo_heap_r), values, nulls, replace); simple_heap_update(lo_heap_r, &newtup->t_self, newtup); CatalogIndexInsert(indstate, newtup); heap_freetuple(newtup); /* * We're done with this old page. */ oldtuple = NULL; olddata = NULL; neednextpage = true; } else { /* * Write a brand new page. * * First, fill any hole */ off = (int) (obj_desc->offset % LOBLKSIZE); if (off > 0) MemSet(workb, 0, off); /* * Insert appropriate portion of new data */ n = LOBLKSIZE - off; n = (n <= (nbytes - nwritten)) ? n : (nbytes - nwritten); memcpy(workb + off, buf + nwritten, n); nwritten += n; obj_desc->offset += n; /* compute valid length of new page */ len = off + n; SET_VARSIZE(&workbuf.hdr, len + VARHDRSZ); /* * Form and insert updated tuple */ memset(values, 0, sizeof(values)); memset(nulls, false, sizeof(nulls)); values[Anum_pg_largeobject_loid - 1] = ObjectIdGetDatum(obj_desc->id); values[Anum_pg_largeobject_pageno - 1] = Int32GetDatum(pageno); values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf); newtup = heap_form_tuple(lo_heap_r->rd_att, values, nulls); simple_heap_insert(lo_heap_r, newtup); CatalogIndexInsert(indstate, newtup); heap_freetuple(newtup); } pageno++; } systable_endscan_ordered(sd); CatalogCloseIndexes(indstate); /* * Advance command counter so that my tuple updates will be seen by later * large-object operations in this transaction. */ CommandCounterIncrement(); return nwritten; }