/* ---------- * toast_raw_datum_size - * * Return the raw (detoasted) size of a varlena datum * (including the VARHDRSZ header) * ---------- */ Size toast_raw_datum_size(Datum value) { varattrib *attr = (varattrib *) DatumGetPointer(value); Size result; if (VARATT_IS_EXTERNAL(attr)) { /* va_rawsize is the size of the original datum -- including header */ result = attr->va_external.va_rawsize; } else if (VARATT_IS_COMPRESSED(attr)) { /* here, va_rawsize is just the payload size */ result = attr->va_compressed.va_rawsize + VARHDRSZ; } else if (VARATT_IS_SHORT(attr)) { /* * we have to normalize the header length to VARHDRSZ or else the * callers of this function will be confused. */ result = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT + VARHDRSZ; } else { /* plain untoasted datum */ result = VARSIZE(attr); } return result; }
/* ---------- * toast_datum_size * * Return the physical storage size (possibly compressed) of a varlena datum * ---------- */ Size toast_datum_size(Datum value) { struct varlena *attr = (struct varlena *) DatumGetPointer(value); Size result; if (VARATT_IS_EXTERNAL(attr)) { /* * Attribute is stored externally - return the extsize whether * compressed or not. We do not count the size of the toast pointer * ... should we? */ struct varatt_external toast_pointer; VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); result = toast_pointer.va_extsize; } else if (VARATT_IS_SHORT(attr)) { result = VARSIZE_SHORT(attr); } else { /* * Attribute is stored inline either compressed or not, just calculate * the size of the datum in either case. */ result = VARSIZE(attr); } return result; }
/* ---------- * toast_raw_datum_size - * * Return the raw (detoasted) size of a varlena datum * (including the VARHDRSZ header) * ---------- */ Size toast_raw_datum_size(Datum value) { struct varlena *attr = (struct varlena *) DatumGetPointer(value); Size result; if (VARATT_IS_EXTERNAL(attr)) { /* va_rawsize is the size of the original datum -- including header */ struct varatt_external toast_pointer; VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); result = toast_pointer.va_rawsize; } else if (VARATT_IS_COMPRESSED(attr)) { /* here, va_rawsize is just the payload size */ result = VARRAWSIZE_4B_C(attr) + VARHDRSZ; } else if (VARATT_IS_SHORT(attr)) { /* * we have to normalize the header length to VARHDRSZ or else the * callers of this function will be confused. */ result = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT + VARHDRSZ; } else { /* plain untoasted datum */ result = VARSIZE(attr); } return result; }
/** * If this function is changed then update varattrib_untoast_len as well */ void varattrib_untoast_ptr_len(Datum d, char **datastart, int *len, void **tofree) { if (DatumGetPointer(d) == NULL) { ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg(" Unable to detoast datum "), errprintstack(true))); } struct varlena *va = (struct varlena *) DatumGetPointer(d); varattrib *attr = (varattrib *) va; *len = -1; *tofree = NULL; if(VARATT_IS_EXTENDED(attr)) { if(VARATT_IS_EXTERNAL(attr)) { attr = (varattrib *)toast_fetch_datum((struct varlena *)attr); /* toast_fetch_datum will palloc, so set it up for free */ *tofree = attr; } if(VARATT_IS_COMPRESSED(attr)) { PGLZ_Header *tmp = (PGLZ_Header *) attr; attr = (varattrib *) palloc(PGLZ_RAW_SIZE(tmp) + VARHDRSZ); SET_VARSIZE(attr, PGLZ_RAW_SIZE(tmp) + VARHDRSZ); pglz_decompress(tmp, VARDATA(attr)); /* If tofree is set, that is, we get it from toast_fetch_datum. * We need to free it here */ if(*tofree) pfree(*tofree); *tofree = attr; } else if(VARATT_IS_SHORT(attr)) { /* Warning! Return unaligned pointer! */ *len = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT; *datastart = VARDATA_SHORT(attr); attr = NULL; } } if(*len == -1) { *datastart = VARDATA(attr); *len = VARSIZE(attr) - VARHDRSZ; } Assert(*len >= 0); }
/* ---------- * heap_tuple_untoast_attr - * * Public entry point to get back a toasted value from compression * or external storage. * ---------- */ struct varlena * heap_tuple_untoast_attr(struct varlena * attr) { if (VARATT_IS_EXTERNAL(attr)) { /* * This is an externally stored datum --- fetch it back from there */ attr = toast_fetch_datum(attr); /* If it's compressed, decompress it */ if (VARATT_IS_COMPRESSED(attr)) { PGLZ_Header *tmp = (PGLZ_Header *) attr; attr = (struct varlena *) palloc(PGLZ_RAW_SIZE(tmp) + VARHDRSZ); SET_VARSIZE(attr, PGLZ_RAW_SIZE(tmp) + VARHDRSZ); pglz_decompress(tmp, VARDATA(attr)); pfree(tmp); } } else if (VARATT_IS_COMPRESSED(attr)) { /* * This is a compressed value inside of the main tuple */ PGLZ_Header *tmp = (PGLZ_Header *) attr; attr = (struct varlena *) palloc(PGLZ_RAW_SIZE(tmp) + VARHDRSZ); SET_VARSIZE(attr, PGLZ_RAW_SIZE(tmp) + VARHDRSZ); pglz_decompress(tmp, VARDATA(attr)); } else if (VARATT_IS_SHORT(attr)) { /* * This is a short-header varlena --- convert to 4-byte header format */ Size data_size = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT; Size new_size = data_size + VARHDRSZ; struct varlena *new_attr; new_attr = (struct varlena *) palloc(new_size); SET_VARSIZE(new_attr, new_size); memcpy(VARDATA(new_attr), VARDATA_SHORT(attr), data_size); attr = new_attr; } return attr; }
/** * If this function is changed then update varattrib_untoast_ptr_len as well */ int varattrib_untoast_len(Datum d) { if (DatumGetPointer(d) == NULL) { ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg(" Unable to detoast datum "), errprintstack(true))); } struct varlena *va = (struct varlena *) DatumGetPointer(d); varattrib *attr = (varattrib *) va; int len = -1; void *toFree = NULL; if(VARATT_IS_EXTENDED(attr)) { if(VARATT_IS_EXTERNAL(attr)) { attr = (varattrib *)toast_fetch_datum((struct varlena *)attr); /* toast_fetch_datum will palloc, so set it up for free */ toFree = attr; } if(VARATT_IS_COMPRESSED(attr)) { PGLZ_Header *tmp = (PGLZ_Header *) attr; len = PGLZ_RAW_SIZE(tmp); } else if(VARATT_IS_SHORT(attr)) { len = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT; } } if(len == -1) { len = VARSIZE(attr) - VARHDRSZ; } if ( toFree) pfree(toFree); Assert(len >= 0); return len; }
/* ---------- * heap_tuple_untoast_attr - * * Public entry point to get back a toasted value from compression * or external storage. * ---------- */ struct varlena * heap_tuple_untoast_attr(struct varlena *attr) { if (VARATT_IS_EXTERNAL(attr)) { /* * This is an externally stored datum --- fetch it back from there */ attr = toast_fetch_datum(attr); /* fall through to IS_COMPRESSED if it's a compressed external datum */ } if (VARATT_IS_COMPRESSED(attr)) { /* * This is a compressed value inside of the main tuple */ PGLZ_Header *tmp = (PGLZ_Header *) attr; attr = (struct varlena *) palloc(PGLZ_RAW_SIZE(tmp) + VARHDRSZ); SET_VARSIZE(attr, PGLZ_RAW_SIZE(tmp) + VARHDRSZ); pglz_decompress(tmp, VARDATA(attr)); } else if (VARATT_IS_SHORT(attr)) { /* * This is a short-header varlena --- convert to 4-byte header format */ Size data_size = VARSIZE_SHORT(attr); Size new_size = data_size - VARHDRSZ_SHORT + VARHDRSZ; varattrib *tmp = (varattrib *)attr; /* This is a "short" varlena header but is otherwise a normal varlena */ attr = (struct varlena *) palloc(new_size); SET_VARSIZE(attr, new_size); memcpy(VARDATA(attr), VARDATA_SHORT(tmp), data_size - VARHDRSZ_SHORT); } return attr; }
/* * heap_deform_tuple * Given a tuple, extract data into values/isnull arrays; this is * the inverse of heap_form_tuple. * * Storage for the values/isnull arrays is provided by the caller; * it should be sized according to tupleDesc->natts not tuple->t_natts. * * Note that for pass-by-reference datatypes, the pointer placed * in the Datum will point into the given tuple. * * When all or most of a tuple's fields need to be extracted, * this routine will be significantly quicker than a loop around * heap_getattr; the loop will become O(N^2) as soon as any * noncacheable attribute offsets are involved. */ void heap_deform_tuple(HeapTuple tuple, TupleDesc tupleDesc, Datum *values, bool *isnull) { HeapTupleHeader tup = tuple->t_data; bool hasnulls = HeapTupleHasNulls(tuple); Form_pg_attribute *att = tupleDesc->attrs; int tdesc_natts = tupleDesc->natts; int natts; /* number of atts to extract */ int attnum; char *tp; /* ptr to tuple data */ long off; /* offset in tuple data */ bits8 *bp = tup->t_bits; /* ptr to null bitmap in tuple */ bool slow = false; /* can we use/set attcacheoff? */ Assert(!is_heaptuple_memtuple(tuple)); natts = HeapTupleHeaderGetNatts(tup); /* * In inheritance situations, it is possible that the given tuple actually * has more fields than the caller is expecting. Don't run off the end of * the caller's arrays. */ natts = Min(natts, tdesc_natts); tp = (char *) tup + tup->t_hoff; off = 0; for (attnum = 0; attnum < natts; attnum++) { Form_pg_attribute thisatt = att[attnum]; if (hasnulls && att_isnull(attnum, bp)) { values[attnum] = (Datum) 0; isnull[attnum] = true; slow = true; /* can't use attcacheoff anymore */ continue; } isnull[attnum] = false; if (!slow && thisatt->attcacheoff >= 0) off = thisatt->attcacheoff; else if (thisatt->attlen == -1) { /* * We can only cache the offset for a varlena attribute if the * offset is already suitably aligned, so that there would be no * pad bytes in any case: then the offset will be valid for either * an aligned or unaligned value. */ if (!slow && off == att_align_nominal(off, thisatt->attalign)) thisatt->attcacheoff = off; else { off = att_align_pointer(off, thisatt->attalign, -1, tp + off); slow = true; } } else { /* not varlena, so safe to use att_align_nominal */ off = att_align_nominal(off, thisatt->attalign); if (!slow) thisatt->attcacheoff = off; } if (!slow && thisatt->attlen < 0) slow = true; values[attnum] = fetchatt(thisatt, tp + off); #ifdef USE_ASSERT_CHECKING /* Ignore attributes with dropped types */ if (thisatt->attlen == -1 && !thisatt->attisdropped) { Assert(VARATT_IS_SHORT(DatumGetPointer(values[attnum])) || !VARATT_CAN_MAKE_SHORT(DatumGetPointer(values[attnum])) || thisatt->atttypid == OIDVECTOROID || thisatt->atttypid == INT2VECTOROID || thisatt->atttypid >= FirstNormalObjectId); } #endif off = att_addlength_pointer(off, thisatt->attlen, tp + off); } /* * If tuple doesn't have all the atts indicated by tupleDesc, read the * rest as null */ for (; attnum < tdesc_natts; attnum++) { values[attnum] = (Datum) 0; isnull[attnum] = true; } }
/* ---------- * heap_tuple_untoast_attr_slice - * * Public entry point to get back part of a toasted value * from compression or external storage. * ---------- */ struct varlena * heap_tuple_untoast_attr_slice(struct varlena * attr, int32 sliceoffset, int32 slicelength) { struct varlena *preslice; struct varlena *result; char *attrdata; int32 attrsize; if (VARATT_IS_EXTERNAL(attr)) { struct varatt_external toast_pointer; VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); /* fast path for non-compressed external datums */ if (!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) return toast_fetch_datum_slice(attr, sliceoffset, slicelength); /* fetch it back (compressed marker will get set automatically) */ preslice = toast_fetch_datum(attr); } else preslice = attr; if (VARATT_IS_COMPRESSED(preslice)) { PGLZ_Header *tmp = (PGLZ_Header *) preslice; Size size = PGLZ_RAW_SIZE(tmp) + VARHDRSZ; preslice = (struct varlena *) palloc(size); SET_VARSIZE(preslice, size); pglz_decompress(tmp, VARDATA(preslice)); if (tmp != (PGLZ_Header *) attr) pfree(tmp); } if (VARATT_IS_SHORT(preslice)) { attrdata = VARDATA_SHORT(preslice); attrsize = VARSIZE_SHORT(preslice) - VARHDRSZ_SHORT; } else { attrdata = VARDATA(preslice); attrsize = VARSIZE(preslice) - VARHDRSZ; } /* slicing of datum for compressed cases and plain value */ if (sliceoffset >= attrsize) { sliceoffset = 0; slicelength = 0; } if (((sliceoffset + slicelength) > attrsize) || slicelength < 0) slicelength = attrsize - sliceoffset; result = (struct varlena *) palloc(slicelength + VARHDRSZ); SET_VARSIZE(result, slicelength + VARHDRSZ); memcpy(VARDATA(result), attrdata + sliceoffset, slicelength); if (preslice != attr) pfree(preslice); return result; }
/* ---------- * toast_fetch_datum_slice - * * Reconstruct a segment of a Datum from the chunks saved * in the toast relation * ---------- */ static struct varlena * toast_fetch_datum_slice(struct varlena * attr, int32 sliceoffset, int32 length) { Relation toastrel; Relation toastidx; ScanKeyData toastkey[3]; int nscankeys; SysScanDesc toastscan; HeapTuple ttup; TupleDesc toasttupDesc; struct varlena *result; struct varatt_external toast_pointer; int32 attrsize; int32 residx; int32 nextidx; int numchunks; int startchunk; int endchunk; int32 startoffset; int32 endoffset; int totalchunks; Pointer chunk; bool isnull; char *chunkdata; int32 chunksize; int32 chcpystrt; int32 chcpyend; Assert(VARATT_IS_EXTERNAL(attr)); /* Must copy to access aligned fields */ VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); /* * It's nonsense to fetch slices of a compressed datum -- this isn't lo_* * we can't return a compressed datum which is meaningful to toast later */ Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)); attrsize = toast_pointer.va_extsize; totalchunks = ((attrsize - 1) / TOAST_MAX_CHUNK_SIZE) + 1; if (sliceoffset >= attrsize) { sliceoffset = 0; length = 0; } if (((sliceoffset + length) > attrsize) || length < 0) length = attrsize - sliceoffset; result = (struct varlena *) palloc(length + VARHDRSZ); if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) SET_VARSIZE_COMPRESSED(result, length + VARHDRSZ); else SET_VARSIZE(result, length + VARHDRSZ); if (length == 0) return result; /* Can save a lot of work at this point! */ startchunk = sliceoffset / TOAST_MAX_CHUNK_SIZE; endchunk = (sliceoffset + length - 1) / TOAST_MAX_CHUNK_SIZE; numchunks = (endchunk - startchunk) + 1; startoffset = sliceoffset % TOAST_MAX_CHUNK_SIZE; endoffset = (sliceoffset + length - 1) % TOAST_MAX_CHUNK_SIZE; /* * Open the toast relation and its index */ toastrel = heap_open(toast_pointer.va_toastrelid, AccessShareLock); toasttupDesc = toastrel->rd_att; toastidx = index_open(toastrel->rd_rel->reltoastidxid, AccessShareLock); /* * Setup a scan key to fetch from the index. This is either two keys or * three depending on the number of chunks. */ ScanKeyInit(&toastkey[0], (AttrNumber) 1, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(toast_pointer.va_valueid)); /* * Use equality condition for one chunk, a range condition otherwise: */ if (numchunks == 1) { ScanKeyInit(&toastkey[1], (AttrNumber) 2, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(startchunk)); nscankeys = 2; } else { ScanKeyInit(&toastkey[1], (AttrNumber) 2, BTGreaterEqualStrategyNumber, F_INT4GE, Int32GetDatum(startchunk)); ScanKeyInit(&toastkey[2], (AttrNumber) 2, BTLessEqualStrategyNumber, F_INT4LE, Int32GetDatum(endchunk)); nscankeys = 3; } /* * Read the chunks by index * * The index is on (valueid, chunkidx) so they will come in order */ nextidx = startchunk; toastscan = systable_beginscan_ordered(toastrel, toastidx, SnapshotToast, nscankeys, toastkey); while ((ttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL) { /* * Have a chunk, extract the sequence number and the data */ residx = DatumGetInt32(fastgetattr(ttup, 2, toasttupDesc, &isnull)); Assert(!isnull); chunk = DatumGetPointer(fastgetattr(ttup, 3, toasttupDesc, &isnull)); Assert(!isnull); if (!VARATT_IS_EXTENDED(chunk)) { chunksize = VARSIZE(chunk) - VARHDRSZ; chunkdata = VARDATA(chunk); } else if (VARATT_IS_SHORT(chunk)) { /* could happen due to heap_form_tuple doing its thing */ chunksize = VARSIZE_SHORT(chunk) - VARHDRSZ_SHORT; chunkdata = VARDATA_SHORT(chunk); } else { /* should never happen */ elog(ERROR, "found toasted toast chunk for toast value %u in %s", toast_pointer.va_valueid, RelationGetRelationName(toastrel)); chunksize = 0; /* keep compiler quiet */ chunkdata = NULL; } /* * Some checks on the data we've found */ if ((residx != nextidx) || (residx > endchunk) || (residx < startchunk)) elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u in %s", residx, nextidx, toast_pointer.va_valueid, RelationGetRelationName(toastrel)); if (residx < totalchunks - 1) { if (chunksize != TOAST_MAX_CHUNK_SIZE) elog(ERROR, "unexpected chunk size %d (expected %d) in chunk %d of %d for toast value %u in %s when fetching slice", chunksize, (int) TOAST_MAX_CHUNK_SIZE, residx, totalchunks, toast_pointer.va_valueid, RelationGetRelationName(toastrel)); } else if (residx == totalchunks - 1) { if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != attrsize) elog(ERROR, "unexpected chunk size %d (expected %d) in final chunk %d for toast value %u in %s when fetching slice", chunksize, (int) (attrsize - residx * TOAST_MAX_CHUNK_SIZE), residx, toast_pointer.va_valueid, RelationGetRelationName(toastrel)); } else elog(ERROR, "unexpected chunk number %d (out of range %d..%d) for toast value %u in %s", residx, 0, totalchunks - 1, toast_pointer.va_valueid, RelationGetRelationName(toastrel)); /* * Copy the data into proper place in our result */ chcpystrt = 0; chcpyend = chunksize - 1; if (residx == startchunk) chcpystrt = startoffset; if (residx == endchunk) chcpyend = endoffset; memcpy(VARDATA(result) + (residx * TOAST_MAX_CHUNK_SIZE - sliceoffset) + chcpystrt, chunkdata + chcpystrt, (chcpyend - chcpystrt) + 1); nextidx++; } /* * Final checks that we successfully fetched the datum */ if (nextidx != (endchunk + 1)) elog(ERROR, "missing chunk number %d for toast value %u in %s", nextidx, toast_pointer.va_valueid, RelationGetRelationName(toastrel)); /* * End scan and close relations */ systable_endscan_ordered(toastscan); index_close(toastidx, AccessShareLock); heap_close(toastrel, AccessShareLock); return result; }
/* ---------- * toast_fetch_datum - * * Reconstruct an in memory Datum from the chunks saved * in the toast relation * ---------- */ static struct varlena * toast_fetch_datum(struct varlena * attr) { Relation toastrel; Relation toastidx; ScanKeyData toastkey; SysScanDesc toastscan; HeapTuple ttup; TupleDesc toasttupDesc; struct varlena *result; struct varatt_external toast_pointer; int32 ressize; int32 residx, nextidx; int32 numchunks; Pointer chunk; bool isnull; char *chunkdata; int32 chunksize; /* Must copy to access aligned fields */ VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); ressize = toast_pointer.va_extsize; numchunks = ((ressize - 1) / TOAST_MAX_CHUNK_SIZE) + 1; result = (struct varlena *) palloc(ressize + VARHDRSZ); if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) SET_VARSIZE_COMPRESSED(result, ressize + VARHDRSZ); else SET_VARSIZE(result, ressize + VARHDRSZ); /* * Open the toast relation and its index */ toastrel = heap_open(toast_pointer.va_toastrelid, AccessShareLock); toasttupDesc = toastrel->rd_att; toastidx = index_open(toastrel->rd_rel->reltoastidxid, AccessShareLock); /* * Setup a scan key to fetch from the index by va_valueid */ ScanKeyInit(&toastkey, (AttrNumber) 1, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(toast_pointer.va_valueid)); /* * Read the chunks by index * * Note that because the index is actually on (valueid, chunkidx) we will * see the chunks in chunkidx order, even though we didn't explicitly ask * for it. */ nextidx = 0; toastscan = systable_beginscan_ordered(toastrel, toastidx, SnapshotToast, 1, &toastkey); while ((ttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL) { /* * Have a chunk, extract the sequence number and the data */ residx = DatumGetInt32(fastgetattr(ttup, 2, toasttupDesc, &isnull)); Assert(!isnull); chunk = DatumGetPointer(fastgetattr(ttup, 3, toasttupDesc, &isnull)); Assert(!isnull); if (!VARATT_IS_EXTENDED(chunk)) { chunksize = VARSIZE(chunk) - VARHDRSZ; chunkdata = VARDATA(chunk); } else if (VARATT_IS_SHORT(chunk)) { /* could happen due to heap_form_tuple doing its thing */ chunksize = VARSIZE_SHORT(chunk) - VARHDRSZ_SHORT; chunkdata = VARDATA_SHORT(chunk); } else { /* should never happen */ elog(ERROR, "found toasted toast chunk for toast value %u in %s", toast_pointer.va_valueid, RelationGetRelationName(toastrel)); chunksize = 0; /* keep compiler quiet */ chunkdata = NULL; } /* * Some checks on the data we've found */ if (residx != nextidx) elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u in %s", residx, nextidx, toast_pointer.va_valueid, RelationGetRelationName(toastrel)); if (residx < numchunks - 1) { if (chunksize != TOAST_MAX_CHUNK_SIZE) elog(ERROR, "unexpected chunk size %d (expected %d) in chunk %d of %d for toast value %u in %s", chunksize, (int) TOAST_MAX_CHUNK_SIZE, residx, numchunks, toast_pointer.va_valueid, RelationGetRelationName(toastrel)); } else if (residx == numchunks - 1) { if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != ressize) elog(ERROR, "unexpected chunk size %d (expected %d) in final chunk %d for toast value %u in %s", chunksize, (int) (ressize - residx * TOAST_MAX_CHUNK_SIZE), residx, toast_pointer.va_valueid, RelationGetRelationName(toastrel)); } else elog(ERROR, "unexpected chunk number %d (out of range %d..%d) for toast value %u in %s", residx, 0, numchunks - 1, toast_pointer.va_valueid, RelationGetRelationName(toastrel)); /* * Copy the data into proper place in our result */ memcpy(VARDATA(result) + residx * TOAST_MAX_CHUNK_SIZE, chunkdata, chunksize); nextidx++; } /* * Final checks that we successfully fetched the datum */ if (nextidx != numchunks) elog(ERROR, "missing chunk number %d for toast value %u in %s", nextidx, toast_pointer.va_valueid, RelationGetRelationName(toastrel)); /* * End scan and close relations */ systable_endscan_ordered(toastscan); index_close(toastidx, AccessShareLock); heap_close(toastrel, AccessShareLock); return result; }
/* ---------- * toast_save_datum - * * Save one single datum into the secondary relation and return * a Datum reference for it. * ---------- */ static Datum toast_save_datum(Relation rel, Datum value, int options) { Relation toastrel; Relation toastidx; HeapTuple toasttup; TupleDesc toasttupDesc; Datum t_values[3]; bool t_isnull[3]; CommandId mycid = GetCurrentCommandId(true); struct varlena *result; struct varatt_external toast_pointer; struct { struct varlena hdr; char data[TOAST_MAX_CHUNK_SIZE]; /* make struct big enough */ int32 align_it; /* ensure struct is aligned well enough */ } chunk_data; int32 chunk_size; int32 chunk_seq = 0; char *data_p; int32 data_todo; Pointer dval = DatumGetPointer(value); /* * Open the toast relation and its index. We can use the index to check * uniqueness of the OID we assign to the toasted item, even though it has * additional columns besides OID. */ toastrel = heap_open(rel->rd_rel->reltoastrelid, RowExclusiveLock); toasttupDesc = toastrel->rd_att; toastidx = index_open(toastrel->rd_rel->reltoastidxid, RowExclusiveLock); /* * Get the data pointer and length, and compute va_rawsize and va_extsize. * * va_rawsize is the size of the equivalent fully uncompressed datum, so * we have to adjust for short headers. * * va_extsize is the actual size of the data payload in the toast records. */ if (VARATT_IS_SHORT(dval)) { data_p = VARDATA_SHORT(dval); data_todo = VARSIZE_SHORT(dval) - VARHDRSZ_SHORT; toast_pointer.va_rawsize = data_todo + VARHDRSZ; /* as if not short */ toast_pointer.va_extsize = data_todo; } else if (VARATT_IS_COMPRESSED(dval)) { data_p = VARDATA(dval); data_todo = VARSIZE(dval) - VARHDRSZ; /* rawsize in a compressed datum is just the size of the payload */ toast_pointer.va_rawsize = VARRAWSIZE_4B_C(dval) + VARHDRSZ; toast_pointer.va_extsize = data_todo; /* Assert that the numbers look like it's compressed */ Assert(VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)); } else { data_p = VARDATA(dval); data_todo = VARSIZE(dval) - VARHDRSZ; toast_pointer.va_rawsize = VARSIZE(dval); toast_pointer.va_extsize = data_todo; } /* * Insert the correct table OID into the result TOAST pointer. * * Normally this is the actual OID of the target toast table, but during * table-rewriting operations such as CLUSTER, we have to insert the OID * of the table's real permanent toast table instead. rd_toastoid is set * if we have to substitute such an OID. */ if (OidIsValid(rel->rd_toastoid)) toast_pointer.va_toastrelid = rel->rd_toastoid; else toast_pointer.va_toastrelid = RelationGetRelid(toastrel); /* * Choose an unused OID within the toast table for this toast value. */ toast_pointer.va_valueid = GetNewOidWithIndex(toastrel, RelationGetRelid(toastidx), (AttrNumber) 1); /* * Initialize constant parts of the tuple data */ t_values[0] = ObjectIdGetDatum(toast_pointer.va_valueid); t_values[2] = PointerGetDatum(&chunk_data); t_isnull[0] = false; t_isnull[1] = false; t_isnull[2] = false; /* * Split up the item into chunks */ while (data_todo > 0) { /* * Calculate the size of this chunk */ chunk_size = Min(TOAST_MAX_CHUNK_SIZE, data_todo); /* * Build a tuple and store it */ t_values[1] = Int32GetDatum(chunk_seq++); SET_VARSIZE(&chunk_data, chunk_size + VARHDRSZ); memcpy(VARDATA(&chunk_data), data_p, chunk_size); toasttup = heap_form_tuple(toasttupDesc, t_values, t_isnull); heap_insert(toastrel, toasttup, mycid, options, NULL); /* * Create the index entry. We cheat a little here by not using * FormIndexDatum: this relies on the knowledge that the index columns * are the same as the initial columns of the table. * * Note also that there had better not be any user-created index on * the TOAST table, since we don't bother to update anything else. */ index_insert(toastidx, t_values, t_isnull, &(toasttup->t_self), toastrel, toastidx->rd_index->indisunique ? UNIQUE_CHECK_YES : UNIQUE_CHECK_NO); /* * Free memory */ heap_freetuple(toasttup); /* * Move on to next chunk */ data_todo -= chunk_size; data_p += chunk_size; } /* * Done - close toast relation */ index_close(toastidx, RowExclusiveLock); heap_close(toastrel, RowExclusiveLock); /* * Create the TOAST pointer value that we'll return */ result = (struct varlena *) palloc(TOAST_POINTER_SIZE); SET_VARSIZE_EXTERNAL(result, TOAST_POINTER_SIZE); memcpy(VARDATA_EXTERNAL(result), &toast_pointer, sizeof(toast_pointer)); return PointerGetDatum(result); }
/* * heap_fill_tuple * Load data portion of a tuple from values/isnull arrays * * We also fill the null bitmap (if any) and set the infomask bits * that reflect the tuple's data contents. * * NOTE: it is now REQUIRED that the caller have pre-zeroed the data area. */ void heap_fill_tuple(TupleDesc tupleDesc, Datum *values, bool *isnull, char *data, Size data_size, uint16 *infomask, bits8 *bit) { bits8 *bitP; int bitmask; int i; int numberOfAttributes = tupleDesc->natts; Form_pg_attribute *att = tupleDesc->attrs; #ifdef USE_ASSERT_CHECKING char *start = data; #endif if (bit != NULL) { bitP = &bit[-1]; bitmask = HIGHBIT; } else { /* just to keep compiler quiet */ bitP = NULL; bitmask = 0; } *infomask &= ~(HEAP_HASNULL | HEAP_HASVARWIDTH | HEAP_HASEXTERNAL); for (i = 0; i < numberOfAttributes; i++) { Size data_length; if (bit != NULL) { if (bitmask != HIGHBIT) bitmask <<= 1; else { bitP += 1; *bitP = 0x0; bitmask = 1; } if (isnull[i]) { *infomask |= HEAP_HASNULL; continue; } *bitP |= bitmask; } /* * XXX we use the att_align macros on the pointer value itself, not on * an offset. This is a bit of a hack. */ if (att[i]->attbyval) { /* pass-by-value */ data = (char *) att_align_nominal(data, att[i]->attalign); store_att_byval(data, values[i], att[i]->attlen); data_length = att[i]->attlen; } else if (att[i]->attlen == -1) { /* varlena */ Pointer val = DatumGetPointer(values[i]); *infomask |= HEAP_HASVARWIDTH; if (VARATT_IS_EXTERNAL(val)) { *infomask |= HEAP_HASEXTERNAL; /* no alignment, since it's short by definition */ data_length = VARSIZE_EXTERNAL(val); memcpy(data, val, data_length); } else if (VARATT_IS_SHORT(val)) { /* no alignment for short varlenas */ data_length = VARSIZE_SHORT(val); memcpy(data, val, data_length); } else if (VARLENA_ATT_IS_PACKABLE(att[i]) && VARATT_CAN_MAKE_SHORT(val)) { /* convert to short varlena -- no alignment */ data_length = VARATT_CONVERTED_SHORT_SIZE(val); SET_VARSIZE_SHORT(data, data_length); memcpy(data + 1, VARDATA(val), data_length - 1); } else { /* full 4-byte header varlena */ data = (char *) att_align_nominal(data, att[i]->attalign); data_length = VARSIZE(val); memcpy(data, val, data_length); } } else if (att[i]->attlen == -2) { /* cstring ... never needs alignment */ *infomask |= HEAP_HASVARWIDTH; Assert(att[i]->attalign == 'c'); data_length = strlen(DatumGetCString(values[i])) + 1; memcpy(data, DatumGetPointer(values[i]), data_length); } else { /* fixed-length pass-by-reference */ data = (char *) att_align_nominal(data, att[i]->attalign); Assert(att[i]->attlen > 0); data_length = att[i]->attlen; memcpy(data, DatumGetPointer(values[i]), data_length); } data += data_length; } Assert((data - start) == data_size); }
/* * Per-attribute helper for heap_fill_tuple and other routines building tuples. * * Fill in either a data value or a bit in the null bitmask */ static inline void fill_val(Form_pg_attribute att, bits8 **bit, int *bitmask, char **dataP, uint16 *infomask, Datum datum, bool isnull) { Size data_length; char *data = *dataP; /* * If we're building a null bitmap, set the appropriate bit for the * current column value here. */ if (bit != NULL) { if (*bitmask != HIGHBIT) *bitmask <<= 1; else { *bit += 1; **bit = 0x0; *bitmask = 1; } if (isnull) { *infomask |= HEAP_HASNULL; return; } **bit |= *bitmask; } /* * XXX we use the att_align macros on the pointer value itself, not on an * offset. This is a bit of a hack. */ if (att->attbyval) { /* pass-by-value */ data = (char *) att_align_nominal(data, att->attalign); store_att_byval(data, datum, att->attlen); data_length = att->attlen; } else if (att->attlen == -1) { /* varlena */ Pointer val = DatumGetPointer(datum); *infomask |= HEAP_HASVARWIDTH; if (VARATT_IS_EXTERNAL(val)) { if (VARATT_IS_EXTERNAL_EXPANDED(val)) { /* * we want to flatten the expanded value so that the * constructed tuple doesn't depend on it */ ExpandedObjectHeader *eoh = DatumGetEOHP(datum); data = (char *) att_align_nominal(data, att->attalign); data_length = EOH_get_flat_size(eoh); EOH_flatten_into(eoh, data, data_length); } else { *infomask |= HEAP_HASEXTERNAL; /* no alignment, since it's short by definition */ data_length = VARSIZE_EXTERNAL(val); memcpy(data, val, data_length); } } else if (VARATT_IS_SHORT(val)) { /* no alignment for short varlenas */ data_length = VARSIZE_SHORT(val); memcpy(data, val, data_length); } else if (VARLENA_ATT_IS_PACKABLE(att) && VARATT_CAN_MAKE_SHORT(val)) { /* convert to short varlena -- no alignment */ data_length = VARATT_CONVERTED_SHORT_SIZE(val); SET_VARSIZE_SHORT(data, data_length); memcpy(data + 1, VARDATA(val), data_length - 1); } else { /* full 4-byte header varlena */ data = (char *) att_align_nominal(data, att->attalign); data_length = VARSIZE(val); memcpy(data, val, data_length); } } else if (att->attlen == -2) { /* cstring ... never needs alignment */ *infomask |= HEAP_HASVARWIDTH; Assert(att->attalign == 'c'); data_length = strlen(DatumGetCString(datum)) + 1; memcpy(data, DatumGetPointer(datum), data_length); } else { /* fixed-length pass-by-reference */ data = (char *) att_align_nominal(data, att->attalign); Assert(att->attlen > 0); data_length = att->attlen; memcpy(data, DatumGetPointer(datum), data_length); } data += data_length; *dataP = data; }
/* ---------- * heap_tuple_untoast_attr_slice - * * Public entry point to get back part of a toasted value * from compression or external storage. * ---------- */ struct varlena * heap_tuple_untoast_attr_slice(struct varlena * attr, int32 sliceoffset, int32 slicelength) { varattrib *preslice; varattrib *result; char *attrdata; int32 attrsize; if (VARATT_IS_EXTERNAL(attr)) { /* fast path for non-compressed external datums */ if (!VARATT_EXTERNAL_IS_COMPRESSED(attr)) return toast_fetch_datum_slice(attr, sliceoffset, slicelength); /* this automatically sets the compressed flag if appropriate */ preslice = (varattrib *)toast_fetch_datum(attr); } else preslice = (varattrib *)attr; if (VARATT_IS_COMPRESSED(preslice)) { unsigned size; PGLZ_Header *tmp; tmp = (PGLZ_Header *) preslice; size = PGLZ_RAW_SIZE(tmp) + VARHDRSZ; preslice = (varattrib *) palloc(size); SET_VARSIZE(preslice, size); pglz_decompress(tmp, VARDATA(preslice)); if (tmp != (PGLZ_Header *) attr) pfree(tmp); } if (VARATT_IS_SHORT(preslice)) { attrdata = VARDATA_SHORT(preslice); attrsize = VARSIZE_SHORT(preslice) - VARHDRSZ_SHORT; } else { attrdata = VARDATA(preslice); attrsize = VARSIZE(preslice) - VARHDRSZ; } /* slicing of datum for compressed cases and plain value */ if (sliceoffset >= attrsize) { sliceoffset = 0; slicelength = 0; } if (((sliceoffset + slicelength) > attrsize) || slicelength < 0) slicelength = attrsize - sliceoffset; result = (varattrib *) palloc(slicelength + VARHDRSZ); SET_VARSIZE(result, slicelength + VARHDRSZ); memcpy(VARDATA(result), attrdata + sliceoffset, slicelength); if ((struct varlena *)preslice != (struct varlena *)attr) pfree(preslice); return (struct varlena *)result; }
/* ---------- * toast_fetch_datum_slice - * * Reconstruct a segment of a Datum from the chunks saved * in the toast relation * ---------- */ static struct varlena * toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 length) { Relation toastrel; Relation toastidx; ScanKeyData toastkey[3]; int nscankeys; IndexScanDesc toastscan; HeapTuple ttup; TupleDesc toasttupDesc; varattrib *result; int32 attrsize; int32 residx; int32 nextidx; int numchunks; int startchunk; int endchunk; int32 startoffset; int32 endoffset; int totalchunks; Pointer chunk; bool isnull; int32 chunksize; int32 chcpystrt; int32 chcpyend; attrsize = ((varattrib *)attr)->va_external.va_extsize; totalchunks = ((attrsize - 1) / TOAST_MAX_CHUNK_SIZE) + 1; if (sliceoffset >= attrsize) { sliceoffset = 0; length = 0; } if (((sliceoffset + length) > attrsize) || length < 0) length = attrsize - sliceoffset; result = (varattrib *) palloc(length + VARHDRSZ); SET_VARSIZE(result, length + VARHDRSZ); if (VARATT_EXTERNAL_IS_COMPRESSED(attr)) VARATT_SET_COMPRESSED(result); if (length == 0) return (struct varlena *)result; /* Can save a lot of work at this point! */ startchunk = sliceoffset / TOAST_MAX_CHUNK_SIZE; endchunk = (sliceoffset + length - 1) / TOAST_MAX_CHUNK_SIZE; numchunks = (endchunk - startchunk) + 1; startoffset = sliceoffset % TOAST_MAX_CHUNK_SIZE; endoffset = (sliceoffset + length - 1) % TOAST_MAX_CHUNK_SIZE; /* * Open the toast relation and its index */ toastrel = heap_open(((varattrib *)attr)->va_external.va_toastrelid, AccessShareLock); toasttupDesc = toastrel->rd_att; toastidx = index_open(toastrel->rd_rel->reltoastidxid, AccessShareLock); /* * Setup a scan key to fetch from the index. This is either two keys or * three depending on the number of chunks. */ ScanKeyInit(&toastkey[0], (AttrNumber) 1, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(((varattrib *)attr)->va_external.va_valueid)); /* * Use equality condition for one chunk, a range condition otherwise: */ if (numchunks == 1) { ScanKeyInit(&toastkey[1], (AttrNumber) 2, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(startchunk)); nscankeys = 2; } else { ScanKeyInit(&toastkey[1], (AttrNumber) 2, BTGreaterEqualStrategyNumber, F_INT4GE, Int32GetDatum(startchunk)); ScanKeyInit(&toastkey[2], (AttrNumber) 2, BTLessEqualStrategyNumber, F_INT4LE, Int32GetDatum(endchunk)); nscankeys = 3; } /* * Read the chunks by index * * The index is on (valueid, chunkidx) so they will come in order */ nextidx = startchunk; toastscan = index_beginscan(toastrel, toastidx, SnapshotToast, nscankeys, toastkey); while ((ttup = index_getnext(toastscan, ForwardScanDirection)) != NULL) { /* * Have a chunk, extract the sequence number and the data */ residx = DatumGetInt32(fastgetattr(ttup, 2, toasttupDesc, &isnull)); Assert(!isnull); chunk = DatumGetPointer(fastgetattr(ttup, 3, toasttupDesc, &isnull)); Assert(!isnull); if (VARATT_IS_SHORT((varattrib *)chunk)) chunksize = VARSIZE_SHORT((varattrib *)chunk) - VARHDRSZ_SHORT; else if (!VARATT_IS_EXTENDED((varattrib *)chunk)) chunksize = VARSIZE((varattrib *)chunk) - VARHDRSZ; else { elog(ERROR, "found toasted toast chunk?"); chunksize = 0; /* shut compiler up */ } /* * Some checks on the data we've found */ if ((residx != nextidx) || (residx > endchunk) || (residx < startchunk)) elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u", residx, nextidx, ((varattrib *)attr)->va_external.va_valueid); if (residx < totalchunks - 1) { if (chunksize != TOAST_MAX_CHUNK_SIZE) elog(ERROR, "unexpected chunk size %d in chunk %d for toast value %u of %d when fetching slice (expected %d)", chunksize, residx, ((varattrib *)attr)->va_external.va_valueid, totalchunks-1, (int)TOAST_MAX_CHUNK_SIZE); } else if (residx == totalchunks-1) { if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != attrsize) elog(ERROR, "unexpected chunk size %d in chunk %d for final toast value %u when fetching slice (expected %d)", chunksize, residx, ((varattrib *)attr)->va_external.va_valueid, attrsize - residx * (int)TOAST_MAX_CHUNK_SIZE); } else { elog(ERROR, "unexpected chunk"); } /* * Copy the data into proper place in our result */ chcpystrt = 0; chcpyend = chunksize - 1; if (residx == startchunk) chcpystrt = startoffset; if (residx == endchunk) chcpyend = endoffset; memcpy(((char *) VARDATA(result)) + (residx * TOAST_MAX_CHUNK_SIZE - sliceoffset) + chcpystrt, VARDATA((varattrib *)chunk) + chcpystrt, (chcpyend - chcpystrt) + 1); nextidx++; } /* * Final checks that we successfully fetched the datum */ if (nextidx != (endchunk + 1)) elog(ERROR, "missing chunk number %d for toast value %u", nextidx, ((varattrib *)attr)->va_external.va_valueid); /* * End scan and close relations */ index_endscan(toastscan); index_close(toastidx, AccessShareLock); heap_close(toastrel, AccessShareLock); return (struct varlena *)result; }
/* ---------- * toast_fetch_datum - * * Reconstruct an in memory Datum from the chunks saved * in the toast relation * ---------- */ static struct varlena * toast_fetch_datum(struct varlena *attr) { Relation toastrel; Relation toastidx; ScanKeyData toastkey; IndexScanDesc toastscan; HeapTuple ttup; TupleDesc toasttupDesc; varattrib *result; int32 ressize; int32 residx, nextidx; int32 numchunks; Pointer chunk; bool isnull; int32 chunksize; void *chunkdata; ressize = ((varattrib *)attr)->va_external.va_extsize; numchunks = ((ressize - 1) / TOAST_MAX_CHUNK_SIZE) + 1; result = (varattrib *) palloc(ressize + VARHDRSZ); SET_VARSIZE(result, ressize + VARHDRSZ); if (VARATT_EXTERNAL_IS_COMPRESSED(attr)) VARATT_SET_COMPRESSED(result); /* * Open the toast relation and its index */ toastrel = heap_open(((varattrib *)attr)->va_external.va_toastrelid, AccessShareLock); toasttupDesc = toastrel->rd_att; toastidx = index_open(toastrel->rd_rel->reltoastidxid, AccessShareLock); /* * Setup a scan key to fetch from the index by va_valueid */ ScanKeyInit(&toastkey, (AttrNumber) 1, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(((varattrib *)attr)->va_external.va_valueid)); /* * Read the chunks by index * * Note that because the index is actually on (valueid, chunkidx) we will * see the chunks in chunkidx order, even though we didn't explicitly ask * for it. */ nextidx = 0; toastscan = index_beginscan(toastrel, toastidx, SnapshotToast, 1, &toastkey); while ((ttup = index_getnext(toastscan, ForwardScanDirection)) != NULL) { /* * Have a chunk, extract the sequence number and the data */ residx = DatumGetInt32(fastgetattr(ttup, 2, toasttupDesc, &isnull)); Assert(!isnull); chunk = DatumGetPointer(fastgetattr(ttup, 3, toasttupDesc, &isnull)); Assert(!isnull); if (VARATT_IS_SHORT(chunk)) { chunksize = VARSIZE_SHORT(chunk) - VARHDRSZ_SHORT; chunkdata = VARDATA_SHORT(chunk); } else if (!VARATT_IS_EXTENDED(chunk)) { chunksize = VARSIZE(chunk) - VARHDRSZ; chunkdata = VARDATA(chunk); } else { elog(ERROR, "found toasted toast chunk?"); chunksize = 0; /* shut compiler up */ chunkdata = NULL; } /* * Some checks on the data we've found */ if (residx != nextidx) elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u", residx, nextidx, ((varattrib *)attr)->va_external.va_valueid); if (residx < numchunks - 1) { if (chunksize != TOAST_MAX_CHUNK_SIZE) elog(ERROR, "unexpected chunk size %d in chunk %d of %d for toast value %u (expected %d)", chunksize, residx, ((varattrib *)attr)->va_external.va_valueid, numchunks-1, (int)TOAST_MAX_CHUNK_SIZE); } else if (residx == numchunks-1) { if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != ressize) elog(ERROR, "unexpected chunk size %d in final chunk %d for toast value %u (expected %d)", chunksize, residx, ((varattrib *)attr)->va_external.va_valueid, ressize - residx*(int)TOAST_MAX_CHUNK_SIZE); } else elog(ERROR, "unexpected chunk number %d for toast value %u (expected in %d..%d)", residx, ((varattrib *)attr)->va_external.va_valueid, 0, numchunks-1); /* * Copy the data into proper place in our result */ memcpy(((char *) VARDATA(result)) + residx * TOAST_MAX_CHUNK_SIZE, chunkdata, chunksize); nextidx++; } /* * Final checks that we successfully fetched the datum */ if (nextidx != numchunks) elog(ERROR, "missing chunk number %d for toast value %u", nextidx, ((varattrib *)attr)->va_external.va_valueid); /* * End scan and close relations */ index_endscan(toastscan); index_close(toastidx, AccessShareLock); heap_close(toastrel, AccessShareLock); return (struct varlena *)result; }