/* ---------- * toast_datum_size * * Return the physical storage size (possibly compressed) of a varlena datum * ---------- */ Size toast_datum_size(Datum value) { struct varlena *attr = (struct varlena *) DatumGetPointer(value); Size result; if (VARATT_IS_EXTERNAL(attr)) { /* * Attribute is stored externally - return the extsize whether * compressed or not. We do not count the size of the toast pointer * ... should we? */ struct varatt_external toast_pointer; VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); result = toast_pointer.va_extsize; } else if (VARATT_IS_SHORT(attr)) { result = VARSIZE_SHORT(attr); } else { /* * Attribute is stored inline either compressed or not, just calculate * the size of the datum in either case. */ result = VARSIZE(attr); } return result; }
/* ---------- * toast_raw_datum_size - * * Return the raw (detoasted) size of a varlena datum * (including the VARHDRSZ header) * ---------- */ Size toast_raw_datum_size(Datum value) { struct varlena *attr = (struct varlena *) DatumGetPointer(value); Size result; if (VARATT_IS_EXTERNAL(attr)) { /* va_rawsize is the size of the original datum -- including header */ struct varatt_external toast_pointer; VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); result = toast_pointer.va_rawsize; } else if (VARATT_IS_COMPRESSED(attr)) { /* here, va_rawsize is just the payload size */ result = VARRAWSIZE_4B_C(attr) + VARHDRSZ; } else if (VARATT_IS_SHORT(attr)) { /* * we have to normalize the header length to VARHDRSZ or else the * callers of this function will be confused. */ result = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT + VARHDRSZ; } else { /* plain untoasted datum */ result = VARSIZE(attr); } return result; }
/* ---------- * toast_delete_datum - * * Delete a single external stored value. * ---------- */ static void toast_delete_datum(Relation rel, Datum value) { struct varlena *attr = (struct varlena *) DatumGetPointer(value); struct varatt_external toast_pointer; Relation toastrel; Relation toastidx; ScanKeyData toastkey; SysScanDesc toastscan; HeapTuple toasttup; if (!VARATT_IS_EXTERNAL(attr)) return; /* Must copy to access aligned fields */ VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); /* * Open the toast relation and its index */ toastrel = heap_open(toast_pointer.va_toastrelid, RowExclusiveLock); toastidx = index_open(toastrel->rd_rel->reltoastidxid, RowExclusiveLock); /* * Setup a scan key to find chunks with matching va_valueid */ ScanKeyInit(&toastkey, (AttrNumber) 1, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(toast_pointer.va_valueid)); /* * Find all the chunks. (We don't actually care whether we see them in * sequence or not, but since we've already locked the index we might as * well use systable_beginscan_ordered.) */ toastscan = systable_beginscan_ordered(toastrel, toastidx, SnapshotToast, 1, &toastkey); while ((toasttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL) { /* * Have a chunk, delete it */ simple_heap_delete(toastrel, &toasttup->t_self); } /* * End scan and close relations */ systable_endscan_ordered(toastscan); index_close(toastidx, RowExclusiveLock); heap_close(toastrel, RowExclusiveLock); }
/* ---------- * heap_tuple_untoast_attr_slice - * * Public entry point to get back part of a toasted value * from compression or external storage. * ---------- */ struct varlena * heap_tuple_untoast_attr_slice(struct varlena * attr, int32 sliceoffset, int32 slicelength) { struct varlena *preslice; struct varlena *result; char *attrdata; int32 attrsize; if (VARATT_IS_EXTERNAL(attr)) { struct varatt_external toast_pointer; VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); /* fast path for non-compressed external datums */ if (!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) return toast_fetch_datum_slice(attr, sliceoffset, slicelength); /* fetch it back (compressed marker will get set automatically) */ preslice = toast_fetch_datum(attr); } else preslice = attr; if (VARATT_IS_COMPRESSED(preslice)) { PGLZ_Header *tmp = (PGLZ_Header *) preslice; Size size = PGLZ_RAW_SIZE(tmp) + VARHDRSZ; preslice = (struct varlena *) palloc(size); SET_VARSIZE(preslice, size); pglz_decompress(tmp, VARDATA(preslice)); if (tmp != (PGLZ_Header *) attr) pfree(tmp); } if (VARATT_IS_SHORT(preslice)) { attrdata = VARDATA_SHORT(preslice); attrsize = VARSIZE_SHORT(preslice) - VARHDRSZ_SHORT; } else { attrdata = VARDATA(preslice); attrsize = VARSIZE(preslice) - VARHDRSZ; } /* slicing of datum for compressed cases and plain value */ if (sliceoffset >= attrsize) { sliceoffset = 0; slicelength = 0; } if (((sliceoffset + slicelength) > attrsize) || slicelength < 0) slicelength = attrsize - sliceoffset; result = (struct varlena *) palloc(slicelength + VARHDRSZ); SET_VARSIZE(result, slicelength + VARHDRSZ); memcpy(VARDATA(result), attrdata + sliceoffset, slicelength); if (preslice != attr) pfree(preslice); return result; }
/* ---------- * toast_fetch_datum_slice - * * Reconstruct a segment of a Datum from the chunks saved * in the toast relation * ---------- */ static struct varlena * toast_fetch_datum_slice(struct varlena * attr, int32 sliceoffset, int32 length) { Relation toastrel; Relation toastidx; ScanKeyData toastkey[3]; int nscankeys; SysScanDesc toastscan; HeapTuple ttup; TupleDesc toasttupDesc; struct varlena *result; struct varatt_external toast_pointer; int32 attrsize; int32 residx; int32 nextidx; int numchunks; int startchunk; int endchunk; int32 startoffset; int32 endoffset; int totalchunks; Pointer chunk; bool isnull; char *chunkdata; int32 chunksize; int32 chcpystrt; int32 chcpyend; Assert(VARATT_IS_EXTERNAL(attr)); /* Must copy to access aligned fields */ VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); /* * It's nonsense to fetch slices of a compressed datum -- this isn't lo_* * we can't return a compressed datum which is meaningful to toast later */ Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)); attrsize = toast_pointer.va_extsize; totalchunks = ((attrsize - 1) / TOAST_MAX_CHUNK_SIZE) + 1; if (sliceoffset >= attrsize) { sliceoffset = 0; length = 0; } if (((sliceoffset + length) > attrsize) || length < 0) length = attrsize - sliceoffset; result = (struct varlena *) palloc(length + VARHDRSZ); if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) SET_VARSIZE_COMPRESSED(result, length + VARHDRSZ); else SET_VARSIZE(result, length + VARHDRSZ); if (length == 0) return result; /* Can save a lot of work at this point! */ startchunk = sliceoffset / TOAST_MAX_CHUNK_SIZE; endchunk = (sliceoffset + length - 1) / TOAST_MAX_CHUNK_SIZE; numchunks = (endchunk - startchunk) + 1; startoffset = sliceoffset % TOAST_MAX_CHUNK_SIZE; endoffset = (sliceoffset + length - 1) % TOAST_MAX_CHUNK_SIZE; /* * Open the toast relation and its index */ toastrel = heap_open(toast_pointer.va_toastrelid, AccessShareLock); toasttupDesc = toastrel->rd_att; toastidx = index_open(toastrel->rd_rel->reltoastidxid, AccessShareLock); /* * Setup a scan key to fetch from the index. This is either two keys or * three depending on the number of chunks. */ ScanKeyInit(&toastkey[0], (AttrNumber) 1, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(toast_pointer.va_valueid)); /* * Use equality condition for one chunk, a range condition otherwise: */ if (numchunks == 1) { ScanKeyInit(&toastkey[1], (AttrNumber) 2, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(startchunk)); nscankeys = 2; } else { ScanKeyInit(&toastkey[1], (AttrNumber) 2, BTGreaterEqualStrategyNumber, F_INT4GE, Int32GetDatum(startchunk)); ScanKeyInit(&toastkey[2], (AttrNumber) 2, BTLessEqualStrategyNumber, F_INT4LE, Int32GetDatum(endchunk)); nscankeys = 3; } /* * Read the chunks by index * * The index is on (valueid, chunkidx) so they will come in order */ nextidx = startchunk; toastscan = systable_beginscan_ordered(toastrel, toastidx, SnapshotToast, nscankeys, toastkey); while ((ttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL) { /* * Have a chunk, extract the sequence number and the data */ residx = DatumGetInt32(fastgetattr(ttup, 2, toasttupDesc, &isnull)); Assert(!isnull); chunk = DatumGetPointer(fastgetattr(ttup, 3, toasttupDesc, &isnull)); Assert(!isnull); if (!VARATT_IS_EXTENDED(chunk)) { chunksize = VARSIZE(chunk) - VARHDRSZ; chunkdata = VARDATA(chunk); } else if (VARATT_IS_SHORT(chunk)) { /* could happen due to heap_form_tuple doing its thing */ chunksize = VARSIZE_SHORT(chunk) - VARHDRSZ_SHORT; chunkdata = VARDATA_SHORT(chunk); } else { /* should never happen */ elog(ERROR, "found toasted toast chunk for toast value %u in %s", toast_pointer.va_valueid, RelationGetRelationName(toastrel)); chunksize = 0; /* keep compiler quiet */ chunkdata = NULL; } /* * Some checks on the data we've found */ if ((residx != nextidx) || (residx > endchunk) || (residx < startchunk)) elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u in %s", residx, nextidx, toast_pointer.va_valueid, RelationGetRelationName(toastrel)); if (residx < totalchunks - 1) { if (chunksize != TOAST_MAX_CHUNK_SIZE) elog(ERROR, "unexpected chunk size %d (expected %d) in chunk %d of %d for toast value %u in %s when fetching slice", chunksize, (int) TOAST_MAX_CHUNK_SIZE, residx, totalchunks, toast_pointer.va_valueid, RelationGetRelationName(toastrel)); } else if (residx == totalchunks - 1) { if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != attrsize) elog(ERROR, "unexpected chunk size %d (expected %d) in final chunk %d for toast value %u in %s when fetching slice", chunksize, (int) (attrsize - residx * TOAST_MAX_CHUNK_SIZE), residx, toast_pointer.va_valueid, RelationGetRelationName(toastrel)); } else elog(ERROR, "unexpected chunk number %d (out of range %d..%d) for toast value %u in %s", residx, 0, totalchunks - 1, toast_pointer.va_valueid, RelationGetRelationName(toastrel)); /* * Copy the data into proper place in our result */ chcpystrt = 0; chcpyend = chunksize - 1; if (residx == startchunk) chcpystrt = startoffset; if (residx == endchunk) chcpyend = endoffset; memcpy(VARDATA(result) + (residx * TOAST_MAX_CHUNK_SIZE - sliceoffset) + chcpystrt, chunkdata + chcpystrt, (chcpyend - chcpystrt) + 1); nextidx++; } /* * Final checks that we successfully fetched the datum */ if (nextidx != (endchunk + 1)) elog(ERROR, "missing chunk number %d for toast value %u in %s", nextidx, toast_pointer.va_valueid, RelationGetRelationName(toastrel)); /* * End scan and close relations */ systable_endscan_ordered(toastscan); index_close(toastidx, AccessShareLock); heap_close(toastrel, AccessShareLock); return result; }
/* ---------- * toast_fetch_datum - * * Reconstruct an in memory Datum from the chunks saved * in the toast relation * ---------- */ static struct varlena * toast_fetch_datum(struct varlena * attr) { Relation toastrel; Relation toastidx; ScanKeyData toastkey; SysScanDesc toastscan; HeapTuple ttup; TupleDesc toasttupDesc; struct varlena *result; struct varatt_external toast_pointer; int32 ressize; int32 residx, nextidx; int32 numchunks; Pointer chunk; bool isnull; char *chunkdata; int32 chunksize; /* Must copy to access aligned fields */ VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); ressize = toast_pointer.va_extsize; numchunks = ((ressize - 1) / TOAST_MAX_CHUNK_SIZE) + 1; result = (struct varlena *) palloc(ressize + VARHDRSZ); if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) SET_VARSIZE_COMPRESSED(result, ressize + VARHDRSZ); else SET_VARSIZE(result, ressize + VARHDRSZ); /* * Open the toast relation and its index */ toastrel = heap_open(toast_pointer.va_toastrelid, AccessShareLock); toasttupDesc = toastrel->rd_att; toastidx = index_open(toastrel->rd_rel->reltoastidxid, AccessShareLock); /* * Setup a scan key to fetch from the index by va_valueid */ ScanKeyInit(&toastkey, (AttrNumber) 1, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(toast_pointer.va_valueid)); /* * Read the chunks by index * * Note that because the index is actually on (valueid, chunkidx) we will * see the chunks in chunkidx order, even though we didn't explicitly ask * for it. */ nextidx = 0; toastscan = systable_beginscan_ordered(toastrel, toastidx, SnapshotToast, 1, &toastkey); while ((ttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL) { /* * Have a chunk, extract the sequence number and the data */ residx = DatumGetInt32(fastgetattr(ttup, 2, toasttupDesc, &isnull)); Assert(!isnull); chunk = DatumGetPointer(fastgetattr(ttup, 3, toasttupDesc, &isnull)); Assert(!isnull); if (!VARATT_IS_EXTENDED(chunk)) { chunksize = VARSIZE(chunk) - VARHDRSZ; chunkdata = VARDATA(chunk); } else if (VARATT_IS_SHORT(chunk)) { /* could happen due to heap_form_tuple doing its thing */ chunksize = VARSIZE_SHORT(chunk) - VARHDRSZ_SHORT; chunkdata = VARDATA_SHORT(chunk); } else { /* should never happen */ elog(ERROR, "found toasted toast chunk for toast value %u in %s", toast_pointer.va_valueid, RelationGetRelationName(toastrel)); chunksize = 0; /* keep compiler quiet */ chunkdata = NULL; } /* * Some checks on the data we've found */ if (residx != nextidx) elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u in %s", residx, nextidx, toast_pointer.va_valueid, RelationGetRelationName(toastrel)); if (residx < numchunks - 1) { if (chunksize != TOAST_MAX_CHUNK_SIZE) elog(ERROR, "unexpected chunk size %d (expected %d) in chunk %d of %d for toast value %u in %s", chunksize, (int) TOAST_MAX_CHUNK_SIZE, residx, numchunks, toast_pointer.va_valueid, RelationGetRelationName(toastrel)); } else if (residx == numchunks - 1) { if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != ressize) elog(ERROR, "unexpected chunk size %d (expected %d) in final chunk %d for toast value %u in %s", chunksize, (int) (ressize - residx * TOAST_MAX_CHUNK_SIZE), residx, toast_pointer.va_valueid, RelationGetRelationName(toastrel)); } else elog(ERROR, "unexpected chunk number %d (out of range %d..%d) for toast value %u in %s", residx, 0, numchunks - 1, toast_pointer.va_valueid, RelationGetRelationName(toastrel)); /* * Copy the data into proper place in our result */ memcpy(VARDATA(result) + residx * TOAST_MAX_CHUNK_SIZE, chunkdata, chunksize); nextidx++; } /* * Final checks that we successfully fetched the datum */ if (nextidx != numchunks) elog(ERROR, "missing chunk number %d for toast value %u in %s", nextidx, toast_pointer.va_valueid, RelationGetRelationName(toastrel)); /* * End scan and close relations */ systable_endscan_ordered(toastscan); index_close(toastidx, AccessShareLock); heap_close(toastrel, AccessShareLock); return result; }
/* * Write a tuple to the outputstream, in the most efficient format possible. */ static void pglogical_write_tuple(StringInfo out, PGLogicalOutputData *data, Relation rel, HeapTuple tuple) { TupleDesc desc; Datum values[MaxTupleAttributeNumber]; bool isnull[MaxTupleAttributeNumber]; int i; uint16 nliveatts = 0; desc = RelationGetDescr(rel); pq_sendbyte(out, 'T'); /* sending TUPLE */ for (i = 0; i < desc->natts; i++) { if (desc->attrs[i]->attisdropped) continue; nliveatts++; } pq_sendint(out, nliveatts, 2); /* try to allocate enough memory from the get go */ enlargeStringInfo(out, tuple->t_len + nliveatts * (1 + 4)); /* * XXX: should this prove to be a relevant bottleneck, it might be * interesting to inline heap_deform_tuple() here, we don't actually need * the information in the form we get from it. */ heap_deform_tuple(tuple, desc, values, isnull); for (i = 0; i < desc->natts; i++) { HeapTuple typtup; Form_pg_type typclass; Form_pg_attribute att = desc->attrs[i]; char transfer_type; /* skip dropped columns */ if (att->attisdropped) continue; if (isnull[i]) { pq_sendbyte(out, 'n'); /* null column */ continue; } else if (att->attlen == -1 && VARATT_IS_EXTERNAL_ONDISK(values[i])) { pq_sendbyte(out, 'u'); /* unchanged toast column */ continue; } typtup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(att->atttypid)); if (!HeapTupleIsValid(typtup)) elog(ERROR, "cache lookup failed for type %u", att->atttypid); typclass = (Form_pg_type) GETSTRUCT(typtup); transfer_type = decide_datum_transfer(att, typclass, data->allow_internal_basetypes, data->allow_binary_basetypes); pq_sendbyte(out, transfer_type); switch (transfer_type) { case 'b': /* internal-format binary data follows */ /* pass by value */ if (att->attbyval) { pq_sendint(out, att->attlen, 4); /* length */ enlargeStringInfo(out, att->attlen); store_att_byval(out->data + out->len, values[i], att->attlen); out->len += att->attlen; out->data[out->len] = '\0'; } /* fixed length non-varlena pass-by-reference type */ else if (att->attlen > 0) { pq_sendint(out, att->attlen, 4); /* length */ appendBinaryStringInfo(out, DatumGetPointer(values[i]), att->attlen); } /* varlena type */ else if (att->attlen == -1) { char *data = DatumGetPointer(values[i]); /* send indirect datums inline */ if (VARATT_IS_EXTERNAL_INDIRECT(values[i])) { struct varatt_indirect redirect; VARATT_EXTERNAL_GET_POINTER(redirect, data); data = (char *) redirect.pointer; } Assert(!VARATT_IS_EXTERNAL(data)); pq_sendint(out, VARSIZE_ANY(data), 4); /* length */ appendBinaryStringInfo(out, data, VARSIZE_ANY(data)); } else elog(ERROR, "unsupported tuple type"); break; case 's': /* binary send/recv data follows */ { bytea *outputbytes; int len; outputbytes = OidSendFunctionCall(typclass->typsend, values[i]); len = VARSIZE(outputbytes) - VARHDRSZ; pq_sendint(out, len, 4); /* length */ pq_sendbytes(out, VARDATA(outputbytes), len); /* data */ pfree(outputbytes); } break; default: { char *outputstr; int len; outputstr = OidOutputFunctionCall(typclass->typoutput, values[i]); len = strlen(outputstr) + 1; pq_sendint(out, len, 4); /* length */ appendBinaryStringInfo(out, outputstr, len); /* data */ pfree(outputstr); } } ReleaseSysCache(typtup); } }
static int ReadStringFromToast(const char *buffer, unsigned int buff_size, unsigned int* out_size) { int result = 0; /* If toasted value is on disk, we'll try to restore it. */ if (VARATT_IS_EXTERNAL_ONDISK(buffer)) { varatt_external toast_ptr; char *toast_data = NULL; /* Number of chunks the TOAST data is divided into */ int32 num_chunks; /* Actual size of external TOASTed value */ int32 toast_ext_size; /* Path to directory with TOAST realtion file */ char *toast_relation_path; /* Filename of TOAST relation file */ char toast_relation_filename[MAXPGPATH]; FILE *toast_rel_fp; unsigned int block_options = 0; unsigned int control_options = 0; VARATT_EXTERNAL_GET_POINTER(toast_ptr, buffer); printf(" TOAST value. Raw size: %8d, external size: %8d, " "value id: %6d, toast relation id: %6d\n", toast_ptr.va_rawsize, toast_ptr.va_extsize, toast_ptr.va_valueid, toast_ptr.va_toastrelid); /* Extract TOASTed value */ toast_ext_size = toast_ptr.va_extsize; num_chunks = (toast_ext_size - 1) / TOAST_MAX_CHUNK_SIZE + 1; printf(" Number of chunks: %d\n", num_chunks); /* Open TOAST relation file */ toast_relation_path = strdup(fileName); get_parent_directory(toast_relation_path); sprintf(toast_relation_filename, "%s/%d", toast_relation_path, toast_ptr.va_toastrelid); printf(" Read TOAST relation %s\n", toast_relation_filename); toast_rel_fp = fopen(toast_relation_filename, "rb"); if (!toast_rel_fp) { printf("Cannot open TOAST relation %s\n", toast_relation_filename); result = -1; } if (result == 0) { unsigned int toast_relation_block_size = GetBlockSize(toast_rel_fp); fseek(toast_rel_fp, 0, SEEK_SET); toast_data = malloc(toast_ptr.va_rawsize); result = DumpFileContents(block_options, control_options, toast_rel_fp, toast_relation_block_size, -1, /* no start block */ -1, /* no end block */ true, /* is toast relation */ toast_ptr.va_valueid, toast_ptr.va_extsize, toast_data); if (result == 0) { if (VARATT_EXTERNAL_IS_COMPRESSED(toast_ptr)) result = DumpCompressedString(toast_data, toast_ext_size); else CopyAppendEncode(toast_data, toast_ext_size); } else { printf("Error in TOAST file.\n"); } free(toast_data); } fclose(toast_rel_fp); free(toast_relation_path); } /* If tag is indirect or expanded, it was stored in memory. */ else { CopyAppend("(TOASTED IN MEMORY)"); } return result; }