/* ---------- * toast_compress_datum - * * Create a compressed version of a varlena datum * * If we fail (ie, compressed result is actually bigger than original) * then return NULL. We must not use compressed data if it'd expand * the tuple! * * We use VAR{SIZE,DATA}_ANY so we can handle short varlenas here without * copying them. But we can't handle external or compressed datums. * ---------- */ Datum toast_compress_datum(Datum value) { varattrib *tmp; int32 valsize = VARSIZE_ANY_EXHDR_D(value); Assert(!VARATT_IS_EXTERNAL(DatumGetPointer(value))); Assert(!VARATT_IS_COMPRESSED(DatumGetPointer(value))); /* * No point in wasting a palloc cycle if value size is out of the allowed * range for compression */ if (valsize < PGLZ_strategy_default->min_input_size || valsize > PGLZ_strategy_default->max_input_size) return PointerGetDatum(NULL); tmp = (varattrib *) palloc(PGLZ_MAX_OUTPUT(valsize)); if (pglz_compress(VARDATA_ANY_D(value), valsize, (PGLZ_Header *) tmp, PGLZ_strategy_default) && VARSIZE(tmp) < VARSIZE_ANY_D(value)) { /* successful compression */ VARATT_SET_COMPRESSED(tmp); return PointerGetDatum(tmp); } else { /* incompressible data */ pfree(tmp); return PointerGetDatum(NULL); } }
/* ---------- * toast_compress_datum - * * Create a compressed version of a varlena datum * * If we fail (ie, compressed result is actually bigger than original) * then return NULL. We must not use compressed data if it'd expand * the tuple! * * We use VAR{SIZE,DATA}_ANY so we can handle short varlenas here without * copying them. But we can't handle external or compressed datums. * ---------- */ Datum toast_compress_datum(Datum value) { struct varlena *tmp; int32 valsize = VARSIZE_ANY_EXHDR(DatumGetPointer(value)); Assert(!VARATT_IS_EXTERNAL(DatumGetPointer(value))); Assert(!VARATT_IS_COMPRESSED(DatumGetPointer(value))); /* * No point in wasting a palloc cycle if value size is out of the allowed * range for compression */ if (valsize < PGLZ_strategy_default->min_input_size || valsize > PGLZ_strategy_default->max_input_size) return PointerGetDatum(NULL); tmp = (struct varlena *) palloc(PGLZ_MAX_OUTPUT(valsize)); /* * We recheck the actual size even if pglz_compress() reports success, * because it might be satisfied with having saved as little as one byte * in the compressed data --- which could turn into a net loss once you * consider header and alignment padding. Worst case, the compressed * format might require three padding bytes (plus header, which is * included in VARSIZE(tmp)), whereas the uncompressed format would take * only one header byte and no padding if the value is short enough. So * we insist on a savings of more than 2 bytes to ensure we have a gain. */ if (pglz_compress(VARDATA_ANY(DatumGetPointer(value)), valsize, (PGLZ_Header *) tmp, PGLZ_strategy_default) && VARSIZE(tmp) < valsize - 2) { /* successful compression */ return PointerGetDatum(tmp); } else { /* incompressible data */ pfree(tmp); return PointerGetDatum(NULL); } }
/* * Create a compressed version of a backup block image. * * Returns FALSE if compression fails (i.e., compressed result is actually * bigger than original). Otherwise, returns TRUE and sets 'dlen' to * the length of compressed block image. */ static bool XLogCompressBackupBlock(char *page, uint16 hole_offset, uint16 hole_length, char *dest, uint16 *dlen) { int32 orig_len = BLCKSZ - hole_length; int32 len; int32 extra_bytes = 0; char *source; char tmp[BLCKSZ]; if (hole_length != 0) { /* must skip the hole */ source = tmp; memcpy(source, page, hole_offset); memcpy(source + hole_offset, page + (hole_offset + hole_length), BLCKSZ - (hole_length + hole_offset)); /* * Extra data needs to be stored in WAL record for the compressed * version of block image if the hole exists. */ extra_bytes = SizeOfXLogRecordBlockCompressHeader; } else source = page; /* * We recheck the actual size even if pglz_compress() reports success and * see if the number of bytes saved by compression is larger than the * length of extra data needed for the compressed version of block image. */ len = pglz_compress(source, orig_len, dest, PGLZ_strategy_default); if (len >= 0 && len + extra_bytes < orig_len) { *dlen = (uint16) len; /* successful compression */ return true; } return false; }
/* * compress_data * * Compress the bytea buffer and return the result as bytea. */ Datum compress_data(PG_FUNCTION_ARGS) { bytea *raw_data = PG_GETARG_BYTEA_P(0); bytea *res; int32 compressed_len; char *compressed_data; PGLZ_Strategy strategy; memcpy(&strategy, (PGLZ_Strategy *) PGLZ_strategy_always, sizeof(PGLZ_Strategy)); /* Get custom values if specified by user */ if (PG_NARGS() == 7) { strategy.min_input_size = PG_GETARG_INT32(1); strategy.max_input_size = PG_GETARG_INT32(2); strategy.min_comp_rate = PG_GETARG_INT32(3); strategy.first_success_by = PG_GETARG_INT32(4); strategy.match_size_good = PG_GETARG_INT32(5); strategy.match_size_drop = PG_GETARG_INT32(6); } /* Compress data in build */ compressed_data = palloc(PGLZ_MAX_OUTPUT(VARSIZE(raw_data) - VARHDRSZ)); compressed_len = pglz_compress(VARDATA(raw_data), VARSIZE(raw_data) - VARHDRSZ, compressed_data, &strategy); /* if compression failed return the original data */ if (compressed_len < 0) PG_RETURN_BYTEA_P(raw_data); /* Build result */ res = (bytea *) palloc(VARHDRSZ + compressed_len); SET_VARSIZE(res, compressed_len + VARHDRSZ); memcpy(VARDATA(res), compressed_data, compressed_len); pfree(compressed_data); PG_RETURN_BYTEA_P(res); }
/* ---------- * toast_compress_datum - * * Create a compressed version of a varlena datum * * If we fail (ie, compressed result is actually bigger than original) * then return NULL. We must not use compressed data if it'd expand * the tuple! * ---------- */ Datum toast_compress_datum(Datum value) { varattrib *tmp; tmp = (varattrib *) palloc(sizeof(PGLZ_Header) + VARATT_SIZE(value)); pglz_compress(VARATT_DATA(value), VARATT_SIZE(value) - VARHDRSZ, (PGLZ_Header *) tmp, PGLZ_strategy_default); if (VARATT_SIZE(tmp) < VARATT_SIZE(value)) { /* successful compression */ VARATT_SIZEP(tmp) |= VARATT_FLAG_COMPRESSED; return PointerGetDatum(tmp); } else { /* incompressible data */ pfree(tmp); return PointerGetDatum(NULL); } }
size_t cfs_compress(void* dst, size_t dst_size, void const* src, size_t src_size) { return pglz_compress(src, src_size, dst, PGLZ_strategy_always); }
/* * FlushStripe compresses the data in the current stripe, flushes the compressed * data into the file, and returns the stripe metadata. To do this, the function * first creates the data buffers, and then updates position and length statistics * in stripe's skip list. Then, the function creates the skip list and footer * buffers. Finally, the function flushes the skip list, data, and footer buffers * to the file. */ static StripeMetadata FlushStripe(TableWriteState *writeState) { StripeMetadata stripeMetadata = {0, 0, 0, 0}; uint64 skipListLength = 0; uint64 dataLength = 0; StringInfo **existsBufferArray = NULL; StringInfo **valueBufferArray = NULL; CompressionType **valueCompressionTypeArray = NULL; StringInfo *skipListBufferArray = NULL; StripeFooter *stripeFooter = NULL; StringInfo stripeFooterBuffer = NULL; uint32 columnIndex = 0; uint32 blockIndex = 0; FILE *tableFile = writeState->tableFile; StripeData *stripeData = writeState->stripeData; StripeSkipList *stripeSkipList = writeState->stripeSkipList; CompressionType compressionType = writeState->compressionType; TupleDesc tupleDescriptor = writeState->tupleDescriptor; uint32 columnCount = tupleDescriptor->natts; uint32 blockCount = stripeSkipList->blockCount; /* create "exists" and "value" buffers */ existsBufferArray = CreateExistsBufferArray(stripeData->columnDataArray, stripeSkipList); valueBufferArray = CreateValueBufferArray(stripeData->columnDataArray, stripeSkipList, tupleDescriptor); valueCompressionTypeArray = palloc0(columnCount * sizeof(CompressionType *)); for (columnIndex = 0; columnIndex < columnCount; columnIndex++) { CompressionType *blockCompressionTypeArray = palloc0(blockCount * sizeof(CompressionType)); valueCompressionTypeArray[columnIndex] = blockCompressionTypeArray; for (blockIndex = 0; blockIndex < blockCount; blockIndex++) { StringInfo valueBuffer = NULL; uint64 maximumLength = 0; PGLZ_Header *compressedData = NULL; bool compressable = false; if (compressionType == COMPRESSION_NONE) { blockCompressionTypeArray[blockIndex] = COMPRESSION_NONE; continue; } /* the only other supported compression type is pg_lz for now */ Assert(compressionType == COMPRESSION_PG_LZ); valueBuffer = valueBufferArray[columnIndex][blockIndex]; maximumLength = PGLZ_MAX_OUTPUT(valueBuffer->len); compressedData = palloc0(maximumLength); compressable = pglz_compress((const char *) valueBuffer->data, valueBuffer->len, compressedData, PGLZ_strategy_always); if (compressable) { pfree(valueBuffer->data); valueBuffer->data = (char *) compressedData; valueBuffer->len = VARSIZE(compressedData); valueBuffer->maxlen = maximumLength; blockCompressionTypeArray[blockIndex] = COMPRESSION_PG_LZ; } else { pfree(compressedData); blockCompressionTypeArray[blockIndex] = COMPRESSION_NONE; } } } /* update buffer sizes and positions in stripe skip list */ for (columnIndex = 0; columnIndex < columnCount; columnIndex++) { ColumnBlockSkipNode **columnSkipNodeArray = stripeSkipList->blockSkipNodeArray; ColumnBlockSkipNode *blockSkipNodeArray = columnSkipNodeArray[columnIndex]; uint32 blockCount = stripeSkipList->blockCount; uint32 blockIndex = 0; uint64 currentExistsBlockOffset = 0; uint64 currentValueBlockOffset = 0; for (blockIndex = 0; blockIndex < blockCount; blockIndex++) { uint64 existsBufferSize = existsBufferArray[columnIndex][blockIndex]->len; uint64 valueBufferSize = valueBufferArray[columnIndex][blockIndex]->len; CompressionType valueCompressionType = valueCompressionTypeArray[columnIndex][blockIndex]; ColumnBlockSkipNode *blockSkipNode = &blockSkipNodeArray[blockIndex]; blockSkipNode->existsBlockOffset = currentExistsBlockOffset; blockSkipNode->existsLength = existsBufferSize; blockSkipNode->valueBlockOffset = currentValueBlockOffset; blockSkipNode->valueLength = valueBufferSize; blockSkipNode->valueCompressionType = valueCompressionType; currentExistsBlockOffset += existsBufferSize; currentValueBlockOffset += valueBufferSize; } } /* create skip list and footer buffers */ skipListBufferArray = CreateSkipListBufferArray(stripeSkipList, tupleDescriptor); stripeFooter = CreateStripeFooter(stripeSkipList, skipListBufferArray); stripeFooterBuffer = SerializeStripeFooter(stripeFooter); /* * Each stripe has three sections: * (1) Skip list, which contains statistics for each column block, and can * be used to skip reading row blocks that are refuted by WHERE clause list, * (2) Data section, in which we store data for each column continuously. * We store data for each for each column in blocks. For each block, we * store two buffers: "exists" buffer, and "value" buffer. "exists" buffer * tells which values are not NULL. "value" buffer contains values for * present values. For each column, we first store all "exists" buffers, * and then all "value" buffers. * (3) Stripe footer, which contains the skip list buffer size, exists buffer * size, and value buffer size for each of the columns. * * We start by flushing the skip list buffers. */ for (columnIndex = 0; columnIndex < columnCount; columnIndex++) { StringInfo skipListBuffer = skipListBufferArray[columnIndex]; WriteToFile(tableFile, skipListBuffer->data, skipListBuffer->len); } /* then, we flush the data buffers */ for (columnIndex = 0; columnIndex < columnCount; columnIndex++) { uint32 blockIndex = 0; for (blockIndex = 0; blockIndex < stripeSkipList->blockCount; blockIndex++) { StringInfo existsBuffer = existsBufferArray[columnIndex][blockIndex]; WriteToFile(tableFile, existsBuffer->data, existsBuffer->len); } for (blockIndex = 0; blockIndex < stripeSkipList->blockCount; blockIndex++) { StringInfo valueBuffer = valueBufferArray[columnIndex][blockIndex]; WriteToFile(tableFile, valueBuffer->data, valueBuffer->len); } } /* finally, we flush the footer buffer */ WriteToFile(tableFile, stripeFooterBuffer->data, stripeFooterBuffer->len); /* set stripe metadata */ for (columnIndex = 0; columnIndex < columnCount; columnIndex++) { skipListLength += stripeFooter->skipListSizeArray[columnIndex]; dataLength += stripeFooter->existsSizeArray[columnIndex]; dataLength += stripeFooter->valueSizeArray[columnIndex]; } stripeMetadata.fileOffset = writeState->currentFileOffset; stripeMetadata.skipListLength = skipListLength; stripeMetadata.dataLength = dataLength; stripeMetadata.footerLength = stripeFooterBuffer->len; /* advance current file offset */ writeState->currentFileOffset += skipListLength; writeState->currentFileOffset += dataLength; writeState->currentFileOffset += stripeFooterBuffer->len; return stripeMetadata; }