/* ---------- * toast_compress_datum - * * Create a compressed version of a varlena datum * * If we fail (ie, compressed result is actually bigger than original) * then return NULL. We must not use compressed data if it'd expand * the tuple! * ---------- */ Datum toast_compress_datum(Datum value) { varattrib *tmp; tmp = (varattrib *) palloc(sizeof(PGLZ_Header) + VARATT_SIZE(value)); pglz_compress(VARATT_DATA(value), VARATT_SIZE(value) - VARHDRSZ, (PGLZ_Header *) tmp, PGLZ_strategy_default); if (VARATT_SIZE(tmp) < VARATT_SIZE(value)) { /* successful compression */ VARATT_SIZEP(tmp) |= VARATT_FLAG_COMPRESSED; return PointerGetDatum(tmp); } else { /* incompressible data */ pfree(tmp); return PointerGetDatum(NULL); } }
/*------------------------------------------------------------------------- * datumGetSize * * Find the "real" size of a datum, given the datum value, * whether it is a "by value", and the declared type length. * * This is essentially an out-of-line version of the att_addlength() * macro in access/tupmacs.h. We do a tad more error checking though. *------------------------------------------------------------------------- */ Size datumGetSize(Datum value, bool typByVal, int typLen) { Size size; if (typByVal) { /* Pass-by-value types are always fixed-length */ Assert(typLen > 0 && typLen <= sizeof(Datum)); size = (Size) typLen; } else { if (typLen > 0) { /* Fixed-length pass-by-ref type */ size = (Size) typLen; } else if (typLen == -1) { /* It is a varlena datatype */ struct varlena *s = (struct varlena *) DatumGetPointer(value); if (!PointerIsValid(s)) ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("invalid Datum pointer"))); size = (Size) VARATT_SIZE(s); } else if (typLen == -2) { /* It is a cstring datatype */ char *s = (char *) DatumGetPointer(value); if (!PointerIsValid(s)) ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("invalid Datum pointer"))); size = (Size) (strlen(s) + 1); } else { elog(ERROR, "invalid typLen: %d", typLen); size = 0; /* keep compiler quiet */ } } return size; }
/* ---------- * toast_save_datum - * * Save one single datum into the secondary relation and return * a varattrib reference for it. * ---------- */ static Datum toast_save_datum(Relation rel, Datum value) { Relation toastrel; Relation toastidx; HeapTuple toasttup; InsertIndexResult idxres; TupleDesc toasttupDesc; Datum t_values[3]; char t_nulls[3]; varattrib *result; struct { struct varlena hdr; char data[TOAST_MAX_CHUNK_SIZE]; } chunk_data; int32 chunk_size; int32 chunk_seq = 0; char *data_p; int32 data_todo; /* * Create the varattrib reference */ result = (varattrib *) palloc(sizeof(varattrib)); result->va_header = sizeof(varattrib) | VARATT_FLAG_EXTERNAL; if (VARATT_IS_COMPRESSED(value)) { result->va_header |= VARATT_FLAG_COMPRESSED; result->va_content.va_external.va_rawsize = ((varattrib *) value)->va_content.va_compressed.va_rawsize; } else result->va_content.va_external.va_rawsize = VARATT_SIZE(value); result->va_content.va_external.va_extsize = VARATT_SIZE(value) - VARHDRSZ; result->va_content.va_external.va_valueid = newoid(); result->va_content.va_external.va_toastrelid = rel->rd_rel->reltoastrelid; /* * Initialize constant parts of the tuple data */ t_values[0] = ObjectIdGetDatum(result->va_content.va_external.va_valueid); t_values[2] = PointerGetDatum(&chunk_data); t_nulls[0] = ' '; t_nulls[1] = ' '; t_nulls[2] = ' '; /* * Get the data to process */ data_p = VARATT_DATA(value); data_todo = VARATT_SIZE(value) - VARHDRSZ; /* * Open the toast relation */ toastrel = heap_open(rel->rd_rel->reltoastrelid, RowExclusiveLock); toasttupDesc = toastrel->rd_att; toastidx = index_open(toastrel->rd_rel->reltoastidxid); /* * Split up the item into chunks */ while (data_todo > 0) { /* * Calculate the size of this chunk */ chunk_size = Min(TOAST_MAX_CHUNK_SIZE, data_todo); /* * Build a tuple and store it */ t_values[1] = Int32GetDatum(chunk_seq++); VARATT_SIZEP(&chunk_data) = chunk_size + VARHDRSZ; memcpy(VARATT_DATA(&chunk_data), data_p, chunk_size); toasttup = heap_formtuple(toasttupDesc, t_values, t_nulls); if (!HeapTupleIsValid(toasttup)) elog(ERROR, "failed to build TOAST tuple"); simple_heap_insert(toastrel, toasttup); /* * Create the index entry. We cheat a little here by not using * FormIndexDatum: this relies on the knowledge that the index * columns are the same as the initial columns of the table. * * Note also that there had better not be any user-created index on * the TOAST table, since we don't bother to update anything else. */ idxres = index_insert(toastidx, t_values, t_nulls, &(toasttup->t_self), toastrel, toastidx->rd_index->indisunique); if (idxres == NULL) elog(ERROR, "failed to insert index entry for TOAST tuple"); /* * Free memory */ pfree(idxres); heap_freetuple(toasttup); /* * Move on to next chunk */ data_todo -= chunk_size; data_p += chunk_size; } /* * Done - close toast relation and return the reference */ index_close(toastidx); heap_close(toastrel, RowExclusiveLock); return PointerGetDatum(result); }
/* ---------- * toast_insert_or_update - * * Delete no-longer-used toast-entries and create new ones to * make the new tuple fit on INSERT or UPDATE * ---------- */ static void toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup) { TupleDesc tupleDesc; Form_pg_attribute *att; int numAttrs; int i; bool old_isnull; bool new_isnull; bool need_change = false; bool need_free = false; bool need_delold = false; bool has_nulls = false; Size maxDataLen; char toast_action[MaxHeapAttributeNumber]; char toast_nulls[MaxHeapAttributeNumber]; Datum toast_values[MaxHeapAttributeNumber]; int32 toast_sizes[MaxHeapAttributeNumber]; bool toast_free[MaxHeapAttributeNumber]; bool toast_delold[MaxHeapAttributeNumber]; /* * Get the tuple descriptor, the number of and attribute descriptors * and the location of the tuple values. */ tupleDesc = rel->rd_att; numAttrs = tupleDesc->natts; att = tupleDesc->attrs; /* ---------- * Then collect information about the values given * * NOTE: toast_action[i] can have these values: * ' ' default handling * 'p' already processed --- don't touch it * 'x' incompressible, but OK to move off * ---------- */ memset(toast_action, ' ', numAttrs * sizeof(char)); memset(toast_nulls, ' ', numAttrs * sizeof(char)); memset(toast_free, 0, numAttrs * sizeof(bool)); memset(toast_delold, 0, numAttrs * sizeof(bool)); for (i = 0; i < numAttrs; i++) { varattrib *old_value; varattrib *new_value; if (oldtup != NULL) { /* * For UPDATE get the old and new values of this attribute */ old_value = (varattrib *) DatumGetPointer( heap_getattr(oldtup, i + 1, tupleDesc, &old_isnull)); toast_values[i] = heap_getattr(newtup, i + 1, tupleDesc, &new_isnull); new_value = (varattrib *) DatumGetPointer(toast_values[i]); /* * If the old value is an external stored one, check if it has * changed so we have to delete it later. */ if (!old_isnull && att[i]->attlen == -1 && VARATT_IS_EXTERNAL(old_value)) { if (new_isnull || !VARATT_IS_EXTERNAL(new_value) || old_value->va_content.va_external.va_valueid != new_value->va_content.va_external.va_valueid || old_value->va_content.va_external.va_toastrelid != new_value->va_content.va_external.va_toastrelid) { /* * The old external store value isn't needed any more * after the update */ toast_delold[i] = true; need_delold = true; } else { /* * This attribute isn't changed by this update so we * reuse the original reference to the old value in * the new tuple. */ toast_action[i] = 'p'; toast_sizes[i] = VARATT_SIZE(toast_values[i]); continue; } } } else { /* * For INSERT simply get the new value */ toast_values[i] = heap_getattr(newtup, i + 1, tupleDesc, &new_isnull); } /* * Handle NULL attributes */ if (new_isnull) { toast_action[i] = 'p'; toast_nulls[i] = 'n'; has_nulls = true; continue; } /* * Now look at varsize attributes */ if (att[i]->attlen == -1) { /* * If the table's attribute says PLAIN always, force it so. */ if (att[i]->attstorage == 'p') toast_action[i] = 'p'; /* * We took care of UPDATE above, so any external value we find * still in the tuple must be someone else's we cannot reuse. * Expand it to plain (and, probably, toast it again below). */ if (VARATT_IS_EXTERNAL(DatumGetPointer(toast_values[i]))) { toast_values[i] = PointerGetDatum(heap_tuple_untoast_attr( (varattrib *) DatumGetPointer(toast_values[i]))); toast_free[i] = true; need_change = true; need_free = true; } /* * Remember the size of this attribute */ toast_sizes[i] = VARATT_SIZE(DatumGetPointer(toast_values[i])); } else { /* * Not a variable size attribute, plain storage always */ toast_action[i] = 'p'; toast_sizes[i] = att[i]->attlen; } } /* ---------- * Compress and/or save external until data fits into target length * * 1: Inline compress attributes with attstorage 'x' * 2: Store attributes with attstorage 'x' or 'e' external * 3: Inline compress attributes with attstorage 'm' * 4: Store attributes with attstorage 'm' external * ---------- */ maxDataLen = offsetof(HeapTupleHeaderData, t_bits); if (has_nulls) maxDataLen += BITMAPLEN(numAttrs); maxDataLen = TOAST_TUPLE_TARGET - MAXALIGN(maxDataLen); /* * Look for attributes with attstorage 'x' to compress */ while (MAXALIGN(ComputeDataSize(tupleDesc, toast_values, toast_nulls)) > maxDataLen) { int biggest_attno = -1; int32 biggest_size = MAXALIGN(sizeof(varattrib)); Datum old_value; Datum new_value; /* * Search for the biggest yet uncompressed internal attribute */ for (i = 0; i < numAttrs; i++) { if (toast_action[i] != ' ') continue; if (VARATT_IS_EXTENDED(toast_values[i])) continue; if (att[i]->attstorage != 'x') continue; if (toast_sizes[i] > biggest_size) { biggest_attno = i; biggest_size = toast_sizes[i]; } } if (biggest_attno < 0) break; /* * Attempt to compress it inline */ i = biggest_attno; old_value = toast_values[i]; new_value = toast_compress_datum(old_value); if (DatumGetPointer(new_value) != NULL) { /* successful compression */ if (toast_free[i]) pfree(DatumGetPointer(old_value)); toast_values[i] = new_value; toast_free[i] = true; toast_sizes[i] = VARATT_SIZE(toast_values[i]); need_change = true; need_free = true; } else { /* * incompressible data, ignore on subsequent compression * passes */ toast_action[i] = 'x'; } } /* * Second we look for attributes of attstorage 'x' or 'e' that are * still inline. */ while (MAXALIGN(ComputeDataSize(tupleDesc, toast_values, toast_nulls)) > maxDataLen && rel->rd_rel->reltoastrelid != InvalidOid) { int biggest_attno = -1; int32 biggest_size = MAXALIGN(sizeof(varattrib)); Datum old_value; /*------ * Search for the biggest yet inlined attribute with * attstorage equals 'x' or 'e' *------ */ for (i = 0; i < numAttrs; i++) { if (toast_action[i] == 'p') continue; if (VARATT_IS_EXTERNAL(toast_values[i])) continue; if (att[i]->attstorage != 'x' && att[i]->attstorage != 'e') continue; if (toast_sizes[i] > biggest_size) { biggest_attno = i; biggest_size = toast_sizes[i]; } } if (biggest_attno < 0) break; /* * Store this external */ i = biggest_attno; old_value = toast_values[i]; toast_action[i] = 'p'; toast_values[i] = toast_save_datum(rel, toast_values[i]); if (toast_free[i]) pfree(DatumGetPointer(old_value)); toast_free[i] = true; toast_sizes[i] = VARATT_SIZE(toast_values[i]); need_change = true; need_free = true; } /* * Round 3 - this time we take attributes with storage 'm' into * compression */ while (MAXALIGN(ComputeDataSize(tupleDesc, toast_values, toast_nulls)) > maxDataLen) { int biggest_attno = -1; int32 biggest_size = MAXALIGN(sizeof(varattrib)); Datum old_value; Datum new_value; /* * Search for the biggest yet uncompressed internal attribute */ for (i = 0; i < numAttrs; i++) { if (toast_action[i] != ' ') continue; if (VARATT_IS_EXTENDED(toast_values[i])) continue; if (att[i]->attstorage != 'm') continue; if (toast_sizes[i] > biggest_size) { biggest_attno = i; biggest_size = toast_sizes[i]; } } if (biggest_attno < 0) break; /* * Attempt to compress it inline */ i = biggest_attno; old_value = toast_values[i]; new_value = toast_compress_datum(old_value); if (DatumGetPointer(new_value) != NULL) { /* successful compression */ if (toast_free[i]) pfree(DatumGetPointer(old_value)); toast_values[i] = new_value; toast_free[i] = true; toast_sizes[i] = VARATT_SIZE(toast_values[i]); need_change = true; need_free = true; } else { /* * incompressible data, ignore on subsequent compression * passes */ toast_action[i] = 'x'; } } /* * Finally we store attributes of type 'm' external */ while (MAXALIGN(ComputeDataSize(tupleDesc, toast_values, toast_nulls)) > maxDataLen && rel->rd_rel->reltoastrelid != InvalidOid) { int biggest_attno = -1; int32 biggest_size = MAXALIGN(sizeof(varattrib)); Datum old_value; /*-------- * Search for the biggest yet inlined attribute with * attstorage = 'm' *-------- */ for (i = 0; i < numAttrs; i++) { if (toast_action[i] == 'p') continue; if (VARATT_IS_EXTERNAL(toast_values[i])) continue; if (att[i]->attstorage != 'm') continue; if (toast_sizes[i] > biggest_size) { biggest_attno = i; biggest_size = toast_sizes[i]; } } if (biggest_attno < 0) break; /* * Store this external */ i = biggest_attno; old_value = toast_values[i]; toast_action[i] = 'p'; toast_values[i] = toast_save_datum(rel, toast_values[i]); if (toast_free[i]) pfree(DatumGetPointer(old_value)); toast_free[i] = true; toast_sizes[i] = VARATT_SIZE(toast_values[i]); need_change = true; need_free = true; } /* * In the case we toasted any values, we need to build a new heap * tuple with the changed values. */ if (need_change) { HeapTupleHeader olddata = newtup->t_data; char *new_data; int32 new_len; /* * Calculate the new size of the tuple. Header size should not * change, but data size might. */ new_len = offsetof(HeapTupleHeaderData, t_bits); if (has_nulls) new_len += BITMAPLEN(numAttrs); if (olddata->t_infomask & HEAP_HASOID) new_len += sizeof(Oid); new_len = MAXALIGN(new_len); Assert(new_len == olddata->t_hoff); new_len += ComputeDataSize(tupleDesc, toast_values, toast_nulls); /* * Allocate new tuple in same context as old one. */ new_data = (char *) MemoryContextAlloc(newtup->t_datamcxt, new_len); newtup->t_data = (HeapTupleHeader) new_data; newtup->t_len = new_len; /* * Put the tuple header and the changed values into place */ memcpy(new_data, olddata, olddata->t_hoff); DataFill((char *) new_data + olddata->t_hoff, tupleDesc, toast_values, toast_nulls, &(newtup->t_data->t_infomask), has_nulls ? newtup->t_data->t_bits : NULL); /* * In the case we modified a previously modified tuple again, free * the memory from the previous run */ if ((char *) olddata != ((char *) newtup + HEAPTUPLESIZE)) pfree(olddata); } /* * Free allocated temp values */ if (need_free) for (i = 0; i < numAttrs; i++) if (toast_free[i]) pfree(DatumGetPointer(toast_values[i])); /* * Delete external values from the old tuple */ if (need_delold) for (i = 0; i < numAttrs; i++) if (toast_delold[i]) toast_delete_datum(rel, heap_getattr(oldtup, i + 1, tupleDesc, &old_isnull)); }
/* ---------- * toast_fetch_datum_slice - * * Reconstruct a segment of a varattrib from the chunks saved * in the toast relation * ---------- */ static varattrib * toast_fetch_datum_slice(varattrib *attr, int32 sliceoffset, int32 length) { Relation toastrel; Relation toastidx; ScanKeyData toastkey[3]; int nscankeys; IndexScanDesc toastscan; HeapTuple ttup; TupleDesc toasttupDesc; varattrib *result; int32 attrsize; int32 residx; int32 nextidx; int numchunks; int startchunk; int endchunk; int32 startoffset; int32 endoffset; int totalchunks; Pointer chunk; bool isnull; int32 chunksize; int32 chcpystrt; int32 chcpyend; attrsize = attr->va_content.va_external.va_extsize; totalchunks = ((attrsize - 1) / TOAST_MAX_CHUNK_SIZE) + 1; if (sliceoffset >= attrsize) { sliceoffset = 0; length = 0; } if (((sliceoffset + length) > attrsize) || length < 0) length = attrsize - sliceoffset; result = (varattrib *) palloc(length + VARHDRSZ); VARATT_SIZEP(result) = length + VARHDRSZ; if (VARATT_IS_COMPRESSED(attr)) VARATT_SIZEP(result) |= VARATT_FLAG_COMPRESSED; if (length == 0) return (result); /* Can save a lot of work at this point! */ startchunk = sliceoffset / TOAST_MAX_CHUNK_SIZE; endchunk = (sliceoffset + length - 1) / TOAST_MAX_CHUNK_SIZE; numchunks = (endchunk - startchunk) + 1; startoffset = sliceoffset % TOAST_MAX_CHUNK_SIZE; endoffset = (sliceoffset + length - 1) % TOAST_MAX_CHUNK_SIZE; /* * Open the toast relation and it's index */ toastrel = heap_open(attr->va_content.va_external.va_toastrelid, AccessShareLock); toasttupDesc = toastrel->rd_att; toastidx = index_open(toastrel->rd_rel->reltoastidxid); /* * Setup a scan key to fetch from the index. This is either two keys * or three depending on the number of chunks. */ ScanKeyEntryInitialize(&toastkey[0], (bits16) 0, (AttrNumber) 1, (RegProcedure) F_OIDEQ, ObjectIdGetDatum(attr->va_content.va_external.va_valueid)); /* * Now dependent on number of chunks: */ if (numchunks == 1) { ScanKeyEntryInitialize(&toastkey[1], (bits16) 0, (AttrNumber) 2, (RegProcedure) F_INT4EQ, Int32GetDatum(startchunk)); nscankeys = 2; } else { ScanKeyEntryInitialize(&toastkey[1], (bits16) 0, (AttrNumber) 2, (RegProcedure) F_INT4GE, Int32GetDatum(startchunk)); ScanKeyEntryInitialize(&toastkey[2], (bits16) 0, (AttrNumber) 2, (RegProcedure) F_INT4LE, Int32GetDatum(endchunk)); nscankeys = 3; } /* * Read the chunks by index * * The index is on (valueid, chunkidx) so they will come in order */ nextidx = startchunk; toastscan = index_beginscan(toastrel, toastidx, SnapshotToast, nscankeys, toastkey); while ((ttup = index_getnext(toastscan, ForwardScanDirection)) != NULL) { /* * Have a chunk, extract the sequence number and the data */ residx = DatumGetInt32(heap_getattr(ttup, 2, toasttupDesc, &isnull)); Assert(!isnull); chunk = DatumGetPointer(heap_getattr(ttup, 3, toasttupDesc, &isnull)); Assert(!isnull); chunksize = VARATT_SIZE(chunk) - VARHDRSZ; /* * Some checks on the data we've found */ if ((residx != nextidx) || (residx > endchunk) || (residx < startchunk)) elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u", residx, nextidx, attr->va_content.va_external.va_valueid); if (residx < totalchunks - 1) { if (chunksize != TOAST_MAX_CHUNK_SIZE) elog(ERROR, "unexpected chunk size %d in chunk %d for toast value %u", chunksize, residx, attr->va_content.va_external.va_valueid); } else { if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != attrsize) elog(ERROR, "unexpected chunk size %d in chunk %d for toast value %u", chunksize, residx, attr->va_content.va_external.va_valueid); } /* * Copy the data into proper place in our result */ chcpystrt = 0; chcpyend = chunksize - 1; if (residx == startchunk) chcpystrt = startoffset; if (residx == endchunk) chcpyend = endoffset; memcpy(((char *) VARATT_DATA(result)) + (residx * TOAST_MAX_CHUNK_SIZE - sliceoffset) + chcpystrt, VARATT_DATA(chunk) + chcpystrt, (chcpyend - chcpystrt) + 1); nextidx++; } /* * Final checks that we successfully fetched the datum */ if (nextidx != (endchunk + 1)) elog(ERROR, "missing chunk number %d for toast value %u", nextidx, attr->va_content.va_external.va_valueid); /* * End scan and close relations */ index_endscan(toastscan); index_close(toastidx); heap_close(toastrel, AccessShareLock); return result; }
/* ---------- * toast_fetch_datum - * * Reconstruct an in memory varattrib from the chunks saved * in the toast relation * ---------- */ static varattrib * toast_fetch_datum(varattrib *attr) { Relation toastrel; Relation toastidx; ScanKeyData toastkey; IndexScanDesc toastscan; HeapTuple ttup; TupleDesc toasttupDesc; varattrib *result; int32 ressize; int32 residx, nextidx; int32 numchunks; Pointer chunk; bool isnull; int32 chunksize; ressize = attr->va_content.va_external.va_extsize; numchunks = ((ressize - 1) / TOAST_MAX_CHUNK_SIZE) + 1; result = (varattrib *) palloc(ressize + VARHDRSZ); VARATT_SIZEP(result) = ressize + VARHDRSZ; if (VARATT_IS_COMPRESSED(attr)) VARATT_SIZEP(result) |= VARATT_FLAG_COMPRESSED; /* * Open the toast relation and its index */ toastrel = heap_open(attr->va_content.va_external.va_toastrelid, AccessShareLock); toasttupDesc = toastrel->rd_att; toastidx = index_open(toastrel->rd_rel->reltoastidxid); /* * Setup a scan key to fetch from the index by va_valueid */ ScanKeyEntryInitialize(&toastkey, (bits16) 0, (AttrNumber) 1, (RegProcedure) F_OIDEQ, ObjectIdGetDatum(attr->va_content.va_external.va_valueid)); /* * Read the chunks by index * * Note that because the index is actually on (valueid, chunkidx) we will * see the chunks in chunkidx order, even though we didn't explicitly * ask for it. */ nextidx = 0; toastscan = index_beginscan(toastrel, toastidx, SnapshotToast, 1, &toastkey); while ((ttup = index_getnext(toastscan, ForwardScanDirection)) != NULL) { /* * Have a chunk, extract the sequence number and the data */ residx = DatumGetInt32(heap_getattr(ttup, 2, toasttupDesc, &isnull)); Assert(!isnull); chunk = DatumGetPointer(heap_getattr(ttup, 3, toasttupDesc, &isnull)); Assert(!isnull); chunksize = VARATT_SIZE(chunk) - VARHDRSZ; /* * Some checks on the data we've found */ if (residx != nextidx) elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u", residx, nextidx, attr->va_content.va_external.va_valueid); if (residx < numchunks - 1) { if (chunksize != TOAST_MAX_CHUNK_SIZE) elog(ERROR, "unexpected chunk size %d in chunk %d for toast value %u", chunksize, residx, attr->va_content.va_external.va_valueid); } else if (residx < numchunks) { if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != ressize) elog(ERROR, "unexpected chunk size %d in chunk %d for toast value %u", chunksize, residx, attr->va_content.va_external.va_valueid); } else elog(ERROR, "unexpected chunk number %d for toast value %u", residx, attr->va_content.va_external.va_valueid); /* * Copy the data into proper place in our result */ memcpy(((char *) VARATT_DATA(result)) + residx * TOAST_MAX_CHUNK_SIZE, VARATT_DATA(chunk), chunksize); nextidx++; } /* * Final checks that we successfully fetched the datum */ if (nextidx != numchunks) elog(ERROR, "missing chunk number %d for toast value %u", nextidx, attr->va_content.va_external.va_valueid); /* * End scan and close relations */ index_endscan(toastscan); index_close(toastidx); heap_close(toastrel, AccessShareLock); return result; }
/* ---------------- * index_form_tuple * ---------------- */ IndexTuple index_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull) { char *tp; /* tuple pointer */ IndexTuple tuple; /* return tuple */ Size size, hoff; int i; unsigned short infomask = 0; bool hasnull = false; uint16 tupmask = 0; int numberOfAttributes = tupleDescriptor->natts; #ifdef TOAST_INDEX_HACK Datum untoasted_values[INDEX_MAX_KEYS]; bool untoasted_free[INDEX_MAX_KEYS]; #endif if (numberOfAttributes > INDEX_MAX_KEYS) ereport(ERROR, (errcode(ERRCODE_TOO_MANY_COLUMNS), errmsg("number of index columns (%d) exceeds limit (%d)", numberOfAttributes, INDEX_MAX_KEYS))); #ifdef TOAST_INDEX_HACK for (i = 0; i < numberOfAttributes; i++) { Form_pg_attribute att = tupleDescriptor->attrs[i]; untoasted_values[i] = values[i]; untoasted_free[i] = false; /* Do nothing if value is NULL or not of varlena type */ if (isnull[i] || att->attlen != -1) continue; /* * If value is stored EXTERNAL, must fetch it so we are not depending * on outside storage. This should be improved someday. */ if (VARATT_IS_EXTERNAL(values[i])) { untoasted_values[i] = PointerGetDatum( heap_tuple_fetch_attr( (varattrib *) DatumGetPointer(values[i]))); untoasted_free[i] = true; } /* * If value is above size target, and is of a compressible datatype, * try to compress it in-line. */ if (VARATT_SIZE(untoasted_values[i]) > TOAST_INDEX_TARGET && !VARATT_IS_EXTENDED(untoasted_values[i]) && (att->attstorage == 'x' || att->attstorage == 'm')) { Datum cvalue = toast_compress_datum(untoasted_values[i]); if (DatumGetPointer(cvalue) != NULL) { /* successful compression */ if (untoasted_free[i]) pfree(DatumGetPointer(untoasted_values[i])); untoasted_values[i] = cvalue; untoasted_free[i] = true; } } } #endif for (i = 0; i < numberOfAttributes; i++) { if (isnull[i]) { hasnull = true; break; } } if (hasnull) infomask |= INDEX_NULL_MASK; hoff = IndexInfoFindDataOffset(infomask); #ifdef TOAST_INDEX_HACK size = hoff + heap_compute_data_size(tupleDescriptor, untoasted_values, isnull); #else size = hoff + heap_compute_data_size(tupleDescriptor, values, isnull); #endif size = MAXALIGN(size); /* be conservative */ tp = (char *) palloc0(size); tuple = (IndexTuple) tp; heap_fill_tuple(tupleDescriptor, #ifdef TOAST_INDEX_HACK untoasted_values, #else values, #endif isnull, (char *) tp + hoff, &tupmask, (hasnull ? (bits8 *) tp + sizeof(IndexTupleData) : NULL)); #ifdef TOAST_INDEX_HACK for (i = 0; i < numberOfAttributes; i++) { if (untoasted_free[i]) pfree(DatumGetPointer(untoasted_values[i])); } #endif /* * We do this because heap_fill_tuple wants to initialize a "tupmask" * which is used for HeapTuples, but we want an indextuple infomask. The * only relevant info is the "has variable attributes" field. We have * already set the hasnull bit above. */ if (tupmask & HEAP_HASVARWIDTH) infomask |= INDEX_VAR_MASK; /* * Here we make sure that the size will fit in the field reserved for it * in t_info. */ if ((size & INDEX_SIZE_MASK) != size) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("index row requires %lu bytes, maximum size is %lu", (unsigned long) size, (unsigned long) INDEX_SIZE_MASK))); infomask |= size; /* * initialize metadata */ tuple->t_info = infomask; return tuple; }
/* ---------- * pglz_decompress - * * Decompresses source into dest. * ---------- */ int pglz_decompress(PGLZ_Header *source, char *dest) { unsigned char *dp; unsigned char *dend; unsigned char *bp; unsigned char ctrl; int32 ctrlc; int32 len; int32 off; dp = ((unsigned char *) source) + sizeof(PGLZ_Header); dend = ((unsigned char *) source) + VARATT_SIZE(source); bp = (unsigned char *) dest; if (VARATT_SIZE(source) == source->rawsize + sizeof(PGLZ_Header)) { memcpy(dest, dp, source->rawsize); return source->rawsize; } while (dp < dend) { /* * Read one control byte and process the next 8 items. */ ctrl = *dp++; for (ctrlc = 0; ctrlc < 8 && dp < dend; ctrlc++) { if (ctrl & 1) { /* * Otherwise it contains the match length minus 3 and the * upper 4 bits of the offset. The next following byte * contains the lower 8 bits of the offset. If the length is * coded as 18, another extension tag byte tells how much * longer the match really was (0-255). */ len = (dp[0] & 0x0f) + 3; off = ((dp[0] & 0xf0) << 4) | dp[1]; dp += 2; if (len == 18) len += *dp++; /* * Now we copy the bytes specified by the tag from OUTPUT to * OUTPUT. It is dangerous and platform dependent to use * memcpy() here, because the copied areas could overlap * extremely! */ while (len--) { *bp = bp[-off]; bp++; } } else { /* * An unset control bit means LITERAL BYTE. So we just copy * one from INPUT to OUTPUT. */ *bp++ = *dp++; } /* * Advance the control bit */ ctrl >>= 1; } } /* * That's it. */ return (char *) bp - dest; }
/* ---------- * pglz_compress - * * Compresses source into dest using strategy. * ---------- */ int pglz_compress(char *source, int32 slen, PGLZ_Header *dest, PGLZ_Strategy *strategy) { unsigned char *bp = ((unsigned char *) dest) + sizeof(PGLZ_Header); unsigned char *bstart = bp; int hist_next = 0; bool hist_recycle = false; char *dp = source; char *dend = source + slen; unsigned char ctrl_dummy = 0; unsigned char *ctrlp = &ctrl_dummy; unsigned char ctrlb = 0; unsigned char ctrl = 0; int32 match_len; int32 match_off; int32 good_match; int32 good_drop; int32 do_compress = 1; int32 result_size = -1; int32 result_max; int32 need_rate; /* * Our fallback strategy is the default. */ if (strategy == NULL) strategy = PGLZ_strategy_default; /* * Save the original source size in the header. */ dest->rawsize = slen; /* * If the strategy forbids compression (at all or if source chunk too * small), copy input to output without compression. */ if (strategy->match_size_good == 0) { memcpy(bstart, source, slen); return (dest->varsize = slen + sizeof(PGLZ_Header)); } else { if (slen < strategy->min_input_size) { memcpy(bstart, source, slen); return (dest->varsize = slen + sizeof(PGLZ_Header)); } } /* * Limit the match size to the maximum implementation allowed value */ if ((good_match = strategy->match_size_good) > PGLZ_MAX_MATCH) good_match = PGLZ_MAX_MATCH; if (good_match < 17) good_match = 17; if ((good_drop = strategy->match_size_drop) < 0) good_drop = 0; if (good_drop > 100) good_drop = 100; /* * Initialize the history lists to empty. We do not need to zero the * hist_entries[] array; its entries are initialized as they are used. */ memset((void *) hist_start, 0, sizeof(hist_start)); /* * Compute the maximum result size allowed by the strategy. If the input * size exceeds force_input_size, the max result size is the input size * itself. Otherwise, it is the input size minus the minimum wanted * compression rate. */ if (slen >= strategy->force_input_size) result_max = slen; else { need_rate = strategy->min_comp_rate; if (need_rate < 0) need_rate = 0; else if (need_rate > 99) need_rate = 99; result_max = slen - ((slen * need_rate) / 100); } /* * Compress the source directly into the output buffer. */ while (dp < dend) { /* * If we already exceeded the maximum result size, set no compression * flag and stop this. But don't check too often. */ if (bp - bstart >= result_max) { do_compress = 0; break; } /* * Try to find a match in the history */ if (pglz_find_match(hist_start, dp, dend, &match_len, &match_off, good_match, good_drop)) { /* * Create the tag and add history entries for all matched * characters. */ pglz_out_tag(ctrlp, ctrlb, ctrl, bp, match_len, match_off); while (match_len--) { pglz_hist_add(hist_start, hist_entries, hist_next, hist_recycle, dp, dend); dp++; /* Do not do this ++ in the line above! */ /* The macro would do it four times - Jan. */ } } else { /* * No match found. Copy one literal byte. */ pglz_out_literal(ctrlp, ctrlb, ctrl, bp, *dp); pglz_hist_add(hist_start, hist_entries, hist_next, hist_recycle, dp, dend); dp++; /* Do not do this ++ in the line above! */ /* The macro would do it four times - Jan. */ } } /* * If we are still in compressing mode, write out the last control byte * and determine if the compression gained the rate requested by the * strategy. */ if (do_compress) { *ctrlp = ctrlb; result_size = bp - bstart; if (result_size >= result_max) do_compress = 0; } /* * Done - if we successfully compressed and matched the strategy's * constraints, return the compressed result. Otherwise copy the original * source over it and return the original length. */ if (do_compress) { dest->varsize = result_size + sizeof(PGLZ_Header); return VARATT_SIZE(dest); } else { memcpy(((char *) dest) + sizeof(PGLZ_Header), source, slen); dest->varsize = slen + sizeof(PGLZ_Header); return VARATT_SIZE(dest); } }
/* ---------- * toast_insert_or_update - * * Delete no-longer-used toast-entries and create new ones to * make the new tuple fit on INSERT or UPDATE * * Inputs: * newtup: the candidate new tuple to be inserted * oldtup: the old row version for UPDATE, or NULL for INSERT * Result: * either newtup if no toasting is needed, or a palloc'd modified tuple * that is what should actually get stored * * NOTE: neither newtup nor oldtup will be modified. This is a change * from the pre-8.1 API of this routine. * ---------- */ HeapTuple toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup) { HeapTuple result_tuple; TupleDesc tupleDesc; Form_pg_attribute *att; int numAttrs; int i; bool need_change = false; bool need_free = false; bool need_delold = false; bool has_nulls = false; Size maxDataLen; char toast_action[MaxHeapAttributeNumber]; bool toast_isnull[MaxHeapAttributeNumber]; bool toast_oldisnull[MaxHeapAttributeNumber]; Datum toast_values[MaxHeapAttributeNumber]; Datum toast_oldvalues[MaxHeapAttributeNumber]; int32 toast_sizes[MaxHeapAttributeNumber]; bool toast_free[MaxHeapAttributeNumber]; bool toast_delold[MaxHeapAttributeNumber]; /* * Get the tuple descriptor and break down the tuple(s) into fields. */ tupleDesc = rel->rd_att; att = tupleDesc->attrs; numAttrs = tupleDesc->natts; Assert(numAttrs <= MaxHeapAttributeNumber); heap_deform_tuple(newtup, tupleDesc, toast_values, toast_isnull); if (oldtup != NULL) heap_deform_tuple(oldtup, tupleDesc, toast_oldvalues, toast_oldisnull); /* ---------- * Then collect information about the values given * * NOTE: toast_action[i] can have these values: * ' ' default handling * 'p' already processed --- don't touch it * 'x' incompressible, but OK to move off * * NOTE: toast_sizes[i] is only made valid for varlena attributes with * toast_action[i] different from 'p'. * ---------- */ memset(toast_action, ' ', numAttrs * sizeof(char)); memset(toast_free, 0, numAttrs * sizeof(bool)); memset(toast_delold, 0, numAttrs * sizeof(bool)); for (i = 0; i < numAttrs; i++) { varattrib *old_value; varattrib *new_value; if (oldtup != NULL) { /* * For UPDATE get the old and new values of this attribute */ old_value = (varattrib *) DatumGetPointer(toast_oldvalues[i]); new_value = (varattrib *) DatumGetPointer(toast_values[i]); /* * If the old value is an external stored one, check if it has * changed so we have to delete it later. */ if (att[i]->attlen == -1 && !toast_oldisnull[i] && VARATT_IS_EXTERNAL(old_value)) { if (toast_isnull[i] || !VARATT_IS_EXTERNAL(new_value) || old_value->va_content.va_external.va_valueid != new_value->va_content.va_external.va_valueid || old_value->va_content.va_external.va_toastrelid != new_value->va_content.va_external.va_toastrelid) { /* * The old external stored value isn't needed any more * after the update */ toast_delold[i] = true; need_delold = true; } else { /* * This attribute isn't changed by this update so we reuse * the original reference to the old value in the new * tuple. */ toast_action[i] = 'p'; toast_sizes[i] = VARATT_SIZE(toast_values[i]); continue; } } } else { /* * For INSERT simply get the new value */ new_value = (varattrib *) DatumGetPointer(toast_values[i]); } /* * Handle NULL attributes */ if (toast_isnull[i]) { toast_action[i] = 'p'; has_nulls = true; continue; } /* * Now look at varlena attributes */ if (att[i]->attlen == -1) { /* * If the table's attribute says PLAIN always, force it so. */ if (att[i]->attstorage == 'p') toast_action[i] = 'p'; /* * We took care of UPDATE above, so any external value we find * still in the tuple must be someone else's we cannot reuse. * Expand it to plain (and, probably, toast it again below). */ if (VARATT_IS_EXTERNAL(new_value)) { new_value = heap_tuple_untoast_attr(new_value); toast_values[i] = PointerGetDatum(new_value); toast_free[i] = true; need_change = true; need_free = true; } /* * Remember the size of this attribute */ toast_sizes[i] = VARATT_SIZE(new_value); } else { /* * Not a varlena attribute, plain storage always */ toast_action[i] = 'p'; } } /* ---------- * Compress and/or save external until data fits into target length * * 1: Inline compress attributes with attstorage 'x' * 2: Store attributes with attstorage 'x' or 'e' external * 3: Inline compress attributes with attstorage 'm' * 4: Store attributes with attstorage 'm' external * ---------- */ /* compute header overhead --- this should match heap_form_tuple() */ maxDataLen = offsetof(HeapTupleHeaderData, t_bits); if (has_nulls) maxDataLen += BITMAPLEN(numAttrs); if (newtup->t_data->t_infomask & HEAP_HASOID) maxDataLen += sizeof(Oid); maxDataLen = MAXALIGN(maxDataLen); Assert(maxDataLen == newtup->t_data->t_hoff); /* now convert to a limit on the tuple data size */ maxDataLen = TOAST_TUPLE_TARGET - maxDataLen; /* * Look for attributes with attstorage 'x' to compress */ while (heap_compute_data_size(tupleDesc, toast_values, toast_isnull) > maxDataLen) { int biggest_attno = -1; int32 biggest_size = MAXALIGN(sizeof(varattrib)); Datum old_value; Datum new_value; /* * Search for the biggest yet uncompressed internal attribute */ for (i = 0; i < numAttrs; i++) { if (toast_action[i] != ' ') continue; if (VARATT_IS_EXTENDED(toast_values[i])) continue; if (att[i]->attstorage != 'x') continue; if (toast_sizes[i] > biggest_size) { biggest_attno = i; biggest_size = toast_sizes[i]; } } if (biggest_attno < 0) break; /* * Attempt to compress it inline */ i = biggest_attno; old_value = toast_values[i]; new_value = toast_compress_datum(old_value); if (DatumGetPointer(new_value) != NULL) { /* successful compression */ if (toast_free[i]) pfree(DatumGetPointer(old_value)); toast_values[i] = new_value; toast_free[i] = true; toast_sizes[i] = VARATT_SIZE(toast_values[i]); need_change = true; need_free = true; } else { /* * incompressible data, ignore on subsequent compression passes */ toast_action[i] = 'x'; } } /* * Second we look for attributes of attstorage 'x' or 'e' that are still * inline. */ while (heap_compute_data_size(tupleDesc, toast_values, toast_isnull) > maxDataLen && rel->rd_rel->reltoastrelid != InvalidOid) { int biggest_attno = -1; int32 biggest_size = MAXALIGN(sizeof(varattrib)); Datum old_value; /*------ * Search for the biggest yet inlined attribute with * attstorage equals 'x' or 'e' *------ */ for (i = 0; i < numAttrs; i++) { if (toast_action[i] == 'p') continue; if (VARATT_IS_EXTERNAL(toast_values[i])) continue; if (att[i]->attstorage != 'x' && att[i]->attstorage != 'e') continue; if (toast_sizes[i] > biggest_size) { biggest_attno = i; biggest_size = toast_sizes[i]; } } if (biggest_attno < 0) break; /* * Store this external */ i = biggest_attno; old_value = toast_values[i]; toast_action[i] = 'p'; toast_values[i] = toast_save_datum(rel, toast_values[i]); if (toast_free[i]) pfree(DatumGetPointer(old_value)); toast_free[i] = true; toast_sizes[i] = VARATT_SIZE(toast_values[i]); need_change = true; need_free = true; } /* * Round 3 - this time we take attributes with storage 'm' into * compression */ while (heap_compute_data_size(tupleDesc, toast_values, toast_isnull) > maxDataLen) { int biggest_attno = -1; int32 biggest_size = MAXALIGN(sizeof(varattrib)); Datum old_value; Datum new_value; /* * Search for the biggest yet uncompressed internal attribute */ for (i = 0; i < numAttrs; i++) { if (toast_action[i] != ' ') continue; if (VARATT_IS_EXTENDED(toast_values[i])) continue; if (att[i]->attstorage != 'm') continue; if (toast_sizes[i] > biggest_size) { biggest_attno = i; biggest_size = toast_sizes[i]; } } if (biggest_attno < 0) break; /* * Attempt to compress it inline */ i = biggest_attno; old_value = toast_values[i]; new_value = toast_compress_datum(old_value); if (DatumGetPointer(new_value) != NULL) { /* successful compression */ if (toast_free[i]) pfree(DatumGetPointer(old_value)); toast_values[i] = new_value; toast_free[i] = true; toast_sizes[i] = VARATT_SIZE(toast_values[i]); need_change = true; need_free = true; } else { /* * incompressible data, ignore on subsequent compression passes */ toast_action[i] = 'x'; } } /* * Finally we store attributes of type 'm' external */ while (heap_compute_data_size(tupleDesc, toast_values, toast_isnull) > maxDataLen && rel->rd_rel->reltoastrelid != InvalidOid) { int biggest_attno = -1; int32 biggest_size = MAXALIGN(sizeof(varattrib)); Datum old_value; /*-------- * Search for the biggest yet inlined attribute with * attstorage = 'm' *-------- */ for (i = 0; i < numAttrs; i++) { if (toast_action[i] == 'p') continue; if (VARATT_IS_EXTERNAL(toast_values[i])) continue; if (att[i]->attstorage != 'm') continue; if (toast_sizes[i] > biggest_size) { biggest_attno = i; biggest_size = toast_sizes[i]; } } if (biggest_attno < 0) break; /* * Store this external */ i = biggest_attno; old_value = toast_values[i]; toast_action[i] = 'p'; toast_values[i] = toast_save_datum(rel, toast_values[i]); if (toast_free[i]) pfree(DatumGetPointer(old_value)); toast_free[i] = true; toast_sizes[i] = VARATT_SIZE(toast_values[i]); need_change = true; need_free = true; } /* * In the case we toasted any values, we need to build a new heap tuple * with the changed values. */ if (need_change) { HeapTupleHeader olddata = newtup->t_data; HeapTupleHeader new_data; int32 new_len; /* * Calculate the new size of the tuple. Header size should not * change, but data size might. */ new_len = offsetof(HeapTupleHeaderData, t_bits); if (has_nulls) new_len += BITMAPLEN(numAttrs); if (olddata->t_infomask & HEAP_HASOID) new_len += sizeof(Oid); new_len = MAXALIGN(new_len); Assert(new_len == olddata->t_hoff); new_len += heap_compute_data_size(tupleDesc, toast_values, toast_isnull); /* * Allocate and zero the space needed, and fill HeapTupleData fields. */ result_tuple = (HeapTuple) palloc0(HEAPTUPLESIZE + new_len); result_tuple->t_len = new_len; result_tuple->t_self = newtup->t_self; result_tuple->t_tableOid = newtup->t_tableOid; new_data = (HeapTupleHeader) ((char *) result_tuple + HEAPTUPLESIZE); result_tuple->t_data = new_data; /* * Put the existing tuple header and the changed values into place */ memcpy(new_data, olddata, olddata->t_hoff); heap_fill_tuple(tupleDesc, toast_values, toast_isnull, (char *) new_data + olddata->t_hoff, &(new_data->t_infomask), has_nulls ? new_data->t_bits : NULL); } else result_tuple = newtup; /* * Free allocated temp values */ if (need_free) for (i = 0; i < numAttrs; i++) if (toast_free[i]) pfree(DatumGetPointer(toast_values[i])); /* * Delete external values from the old tuple */ if (need_delold) for (i = 0; i < numAttrs; i++) if (toast_delold[i]) toast_delete_datum(rel, toast_oldvalues[i]); return result_tuple; }