示例#1
0
文件: tuptoaster.c 项目: nskyzh/gpdb
/* ----------
 * toast_compress_datum -
 *
 *	Create a compressed version of a varlena datum
 *
 *	If we fail (ie, compressed result is actually bigger than original)
 *	then return NULL.  We must not use compressed data if it'd expand
 *	the tuple!
 *
 *	We use VAR{SIZE,DATA}_ANY so we can handle short varlenas here without
 *	copying them.  But we can't handle external or compressed datums.
 * ----------
 */
Datum
toast_compress_datum(Datum value)
{
	varattrib  *tmp;
	int32		valsize = VARSIZE_ANY_EXHDR_D(value);

	Assert(!VARATT_IS_EXTERNAL(DatumGetPointer(value)));
	Assert(!VARATT_IS_COMPRESSED(DatumGetPointer(value)));

	/*
	 * No point in wasting a palloc cycle if value size is out of the allowed
	 * range for compression
	 */
	if (valsize < PGLZ_strategy_default->min_input_size ||
		valsize > PGLZ_strategy_default->max_input_size)
		return PointerGetDatum(NULL);
		
	tmp = (varattrib *) palloc(PGLZ_MAX_OUTPUT(valsize));
	if (pglz_compress(VARDATA_ANY_D(value), valsize,
					  (PGLZ_Header *) tmp, PGLZ_strategy_default) &&
		VARSIZE(tmp) < VARSIZE_ANY_D(value))
	{
		/* successful compression */
		VARATT_SET_COMPRESSED(tmp);
		return PointerGetDatum(tmp);
	}
	else
	{
		/* incompressible data */
		pfree(tmp);
		return PointerGetDatum(NULL);
	}
}
示例#2
0
/* ----------
 * toast_compress_datum -
 *
 *	Create a compressed version of a varlena datum
 *
 *	If we fail (ie, compressed result is actually bigger than original)
 *	then return NULL.  We must not use compressed data if it'd expand
 *	the tuple!
 *
 *	We use VAR{SIZE,DATA}_ANY so we can handle short varlenas here without
 *	copying them.  But we can't handle external or compressed datums.
 * ----------
 */
Datum
toast_compress_datum(Datum value)
{
	struct varlena *tmp;
	int32		valsize = VARSIZE_ANY_EXHDR(DatumGetPointer(value));

	Assert(!VARATT_IS_EXTERNAL(DatumGetPointer(value)));
	Assert(!VARATT_IS_COMPRESSED(DatumGetPointer(value)));

	/*
	 * No point in wasting a palloc cycle if value size is out of the allowed
	 * range for compression
	 */
	if (valsize < PGLZ_strategy_default->min_input_size ||
		valsize > PGLZ_strategy_default->max_input_size)
		return PointerGetDatum(NULL);

	tmp = (struct varlena *) palloc(PGLZ_MAX_OUTPUT(valsize));

	/*
	 * We recheck the actual size even if pglz_compress() reports success,
	 * because it might be satisfied with having saved as little as one byte
	 * in the compressed data --- which could turn into a net loss once you
	 * consider header and alignment padding.  Worst case, the compressed
	 * format might require three padding bytes (plus header, which is
	 * included in VARSIZE(tmp)), whereas the uncompressed format would take
	 * only one header byte and no padding if the value is short enough.  So
	 * we insist on a savings of more than 2 bytes to ensure we have a gain.
	 */
	if (pglz_compress(VARDATA_ANY(DatumGetPointer(value)), valsize,
					  (PGLZ_Header *) tmp, PGLZ_strategy_default) &&
		VARSIZE(tmp) < valsize - 2)
	{
		/* successful compression */
		return PointerGetDatum(tmp);
	}
	else
	{
		/* incompressible data */
		pfree(tmp);
		return PointerGetDatum(NULL);
	}
}
示例#3
0
/*
 * compress_data
 *
 * Compress the bytea buffer and return the result as bytea.
 */
Datum
compress_data(PG_FUNCTION_ARGS)
{
	bytea	*raw_data = PG_GETARG_BYTEA_P(0);
	bytea   *res;
	int32	compressed_len;
	char	*compressed_data;
	PGLZ_Strategy strategy;

	memcpy(&strategy, (PGLZ_Strategy *) PGLZ_strategy_always,
		   sizeof(PGLZ_Strategy));

	/* Get custom values if specified by user */
	if (PG_NARGS() == 7)
	{
		strategy.min_input_size = PG_GETARG_INT32(1);
		strategy.max_input_size = PG_GETARG_INT32(2);
		strategy.min_comp_rate = PG_GETARG_INT32(3);
		strategy.first_success_by = PG_GETARG_INT32(4);
		strategy.match_size_good = PG_GETARG_INT32(5);
		strategy.match_size_drop = PG_GETARG_INT32(6);
	}

	/* Compress data in build */
	compressed_data = palloc(PGLZ_MAX_OUTPUT(VARSIZE(raw_data) - VARHDRSZ));
	compressed_len = pglz_compress(VARDATA(raw_data),
								   VARSIZE(raw_data) - VARHDRSZ,
								   compressed_data,
								   &strategy);

	/* if compression failed return the original data */
	if (compressed_len < 0)
		PG_RETURN_BYTEA_P(raw_data);

	/* Build result */
	res = (bytea *) palloc(VARHDRSZ + compressed_len);
	SET_VARSIZE(res, compressed_len + VARHDRSZ);
	memcpy(VARDATA(res), compressed_data, compressed_len);
	pfree(compressed_data);
	PG_RETURN_BYTEA_P(res);
}
/* ----------
 * toast_compress_datum -
 *
 *	Create a compressed version of a varlena datum
 *
 *	If we fail (ie, compressed result is actually bigger than original)
 *	then return NULL.  We must not use compressed data if it'd expand
 *	the tuple!
 * ----------
 */
Datum
toast_compress_datum(Datum value)
{
	varattrib  *tmp;
	int32		valsize = VARATT_SIZE(value) - VARHDRSZ;

	tmp = (varattrib *) palloc(PGLZ_MAX_OUTPUT(valsize));
	if (pglz_compress(VARATT_DATA(value), valsize,
					  (PGLZ_Header *) tmp, PGLZ_strategy_default) &&
		VARATT_SIZE(tmp) < VARATT_SIZE(value))
	{
		/* successful compression */
		VARATT_SIZEP(tmp) |= VARATT_FLAG_COMPRESSED;
		return PointerGetDatum(tmp);
	}
	else
	{
		/* incompressible data */
		pfree(tmp);
		return PointerGetDatum(NULL);
	}
}
示例#5
0
/*
 * SerializeBlockData serializes and compresses block data at given block index with given
 * compression type for every column.
 */
static void
SerializeBlockData(TableWriteState *writeState, uint32 blockIndex, uint32 rowCount)
{
	uint32 columnIndex = 0;
	StripeBuffers *stripeBuffers = writeState->stripeBuffers;
	ColumnBlockData **blockDataArray = writeState->blockDataArray;
	CompressionType requestedCompressionType = writeState->compressionType;
	const uint32 columnCount = stripeBuffers->columnCount;
	StringInfo compressionBuffer = writeState->compressionBuffer;

	/* serialize exist values, data values are already serialized */
	for (columnIndex = 0; columnIndex < columnCount; columnIndex++)
	{
		ColumnBuffers *columnBuffers = stripeBuffers->columnBuffersArray[columnIndex];
		ColumnBlockBuffers *blockBuffers = columnBuffers->blockBuffersArray[blockIndex];
		ColumnBlockData *blockData = blockDataArray[columnIndex];

		blockBuffers->existsBuffer = SerializeBoolArray(blockData->existsArray, rowCount);
	}

	/*
	 * check and compress value buffers, if a value buffer is not compressable
	 * then keep it as uncompressed, store compression information.
	 */
	for (columnIndex = 0; columnIndex < columnCount; columnIndex++)
	{
		uint64 maximumLength = 0;
		bool compressable = false;
		ColumnBuffers *columnBuffers = stripeBuffers->columnBuffersArray[columnIndex];
		ColumnBlockBuffers *blockBuffers = columnBuffers->blockBuffersArray[blockIndex];
		ColumnBlockData *blockData = blockDataArray[columnIndex];
		StringInfo serializedValueBuffer = NULL;
		CompressionType actualCompressionType = COMPRESSION_NONE;

		serializedValueBuffer = blockData->valueBuffer;

		/* the only other supported compression type is pg_lz for now */
		Assert(requestedCompressionType == COMPRESSION_NONE ||
			   requestedCompressionType == COMPRESSION_PG_LZ);

		/*
		 * if serializedValueBuffer is be compressed, update serializedValueBuffer
		 * with compressed data and store compression type.
		 */
		if (requestedCompressionType == COMPRESSION_PG_LZ)
		{
			maximumLength = PGLZ_MAX_OUTPUT(serializedValueBuffer->len);

			resetStringInfo(compressionBuffer);
			enlargeStringInfo(compressionBuffer, maximumLength);

			compressable = cstore_pglz_compress((const char *) serializedValueBuffer->data,
										  serializedValueBuffer->len,
										  (PGLZ_Header*)compressionBuffer->data,
										  PGLZ_strategy_always);

			if (compressable)
			{
				serializedValueBuffer = compressionBuffer;
				serializedValueBuffer->len = VARSIZE(compressionBuffer->data);
				actualCompressionType = COMPRESSION_PG_LZ;
			}
		}

		/* store (compressed) value buffer */
		blockBuffers->valueCompressionType = actualCompressionType;
		blockBuffers->valueBuffer = CopyStringInfo(serializedValueBuffer);

		/* valueBuffer needs to be reset for next block's data */
		resetStringInfo(blockData->valueBuffer);
	}
}
/*
 * FlushStripe compresses the data in the current stripe, flushes the compressed
 * data into the file, and returns the stripe metadata. To do this, the function
 * first creates the data buffers, and then updates position and length statistics
 * in stripe's skip list. Then, the function creates the skip list and footer
 * buffers. Finally, the function flushes the skip list, data, and footer buffers
 * to the file.
 */
static StripeMetadata
FlushStripe(TableWriteState *writeState)
{
	StripeMetadata stripeMetadata = {0, 0, 0, 0};
	uint64 skipListLength = 0;
	uint64 dataLength = 0;
	StringInfo **existsBufferArray = NULL;
	StringInfo **valueBufferArray = NULL;
	CompressionType **valueCompressionTypeArray = NULL;
	StringInfo *skipListBufferArray = NULL;
	StripeFooter *stripeFooter = NULL;
	StringInfo stripeFooterBuffer = NULL;
	uint32 columnIndex = 0;
	uint32 blockIndex = 0;

	FILE *tableFile = writeState->tableFile;
	StripeData *stripeData = writeState->stripeData;
	StripeSkipList *stripeSkipList = writeState->stripeSkipList;
	CompressionType compressionType = writeState->compressionType;
	TupleDesc tupleDescriptor = writeState->tupleDescriptor;
	uint32 columnCount = tupleDescriptor->natts;
	uint32 blockCount = stripeSkipList->blockCount;

	/* create "exists" and "value" buffers */
	existsBufferArray = CreateExistsBufferArray(stripeData->columnDataArray,
												stripeSkipList);
	valueBufferArray = CreateValueBufferArray(stripeData->columnDataArray,
											  stripeSkipList, tupleDescriptor);

	valueCompressionTypeArray = palloc0(columnCount * sizeof(CompressionType *));
	for (columnIndex = 0; columnIndex < columnCount; columnIndex++)
	{
		CompressionType *blockCompressionTypeArray =
			palloc0(blockCount * sizeof(CompressionType));
		valueCompressionTypeArray[columnIndex] = blockCompressionTypeArray;

		for (blockIndex = 0; blockIndex < blockCount; blockIndex++)
		{
			StringInfo valueBuffer = NULL;
			uint64 maximumLength = 0;
			PGLZ_Header *compressedData = NULL;
			bool compressable = false;

			if (compressionType == COMPRESSION_NONE)
			{
				blockCompressionTypeArray[blockIndex] = COMPRESSION_NONE;
				continue;
			}

			/* the only other supported compression type is pg_lz for now */
			Assert(compressionType == COMPRESSION_PG_LZ);

			valueBuffer = valueBufferArray[columnIndex][blockIndex];
			maximumLength = PGLZ_MAX_OUTPUT(valueBuffer->len);
			compressedData = palloc0(maximumLength);
			compressable = pglz_compress((const char *) valueBuffer->data,
										 valueBuffer->len, compressedData,
										 PGLZ_strategy_always);
			if (compressable)
			{
				pfree(valueBuffer->data);

				valueBuffer->data = (char *) compressedData;
				valueBuffer->len = VARSIZE(compressedData);
				valueBuffer->maxlen = maximumLength;

				blockCompressionTypeArray[blockIndex] = COMPRESSION_PG_LZ;
			}
			else
			{
				pfree(compressedData);
				blockCompressionTypeArray[blockIndex] = COMPRESSION_NONE;
			}
		}
	}

	/* update buffer sizes and positions in stripe skip list */
	for (columnIndex = 0; columnIndex < columnCount; columnIndex++)
	{
		ColumnBlockSkipNode **columnSkipNodeArray = stripeSkipList->blockSkipNodeArray;
		ColumnBlockSkipNode *blockSkipNodeArray = columnSkipNodeArray[columnIndex];
		uint32 blockCount = stripeSkipList->blockCount;
		uint32 blockIndex = 0;
		uint64 currentExistsBlockOffset = 0;
		uint64 currentValueBlockOffset = 0;

		for (blockIndex = 0; blockIndex < blockCount; blockIndex++)
		{
			uint64 existsBufferSize = existsBufferArray[columnIndex][blockIndex]->len;
			uint64 valueBufferSize = valueBufferArray[columnIndex][blockIndex]->len;
			CompressionType valueCompressionType =
				valueCompressionTypeArray[columnIndex][blockIndex];
			ColumnBlockSkipNode *blockSkipNode = &blockSkipNodeArray[blockIndex];

			blockSkipNode->existsBlockOffset = currentExistsBlockOffset;
			blockSkipNode->existsLength = existsBufferSize;
			blockSkipNode->valueBlockOffset = currentValueBlockOffset;
			blockSkipNode->valueLength = valueBufferSize;
			blockSkipNode->valueCompressionType = valueCompressionType;

			currentExistsBlockOffset += existsBufferSize;
			currentValueBlockOffset += valueBufferSize;
		}
	}

	/* create skip list and footer buffers */
	skipListBufferArray = CreateSkipListBufferArray(stripeSkipList, tupleDescriptor);
	stripeFooter = CreateStripeFooter(stripeSkipList, skipListBufferArray);
	stripeFooterBuffer = SerializeStripeFooter(stripeFooter);

	/*
	 * Each stripe has three sections:
	 * (1) Skip list, which contains statistics for each column block, and can
	 * be used to skip reading row blocks that are refuted by WHERE clause list,
	 * (2) Data section, in which we store data for each column continuously.
	 * We store data for each for each column in blocks. For each block, we
	 * store two buffers: "exists" buffer, and "value" buffer. "exists" buffer
	 * tells which values are not NULL. "value" buffer contains values for
	 * present values. For each column, we first store all "exists" buffers,
	 * and then all "value" buffers.
	 * (3) Stripe footer, which contains the skip list buffer size, exists buffer
	 * size, and value buffer size for each of the columns.
	 *
	 * We start by flushing the skip list buffers.
	 */
	for (columnIndex = 0; columnIndex < columnCount; columnIndex++)
	{
		StringInfo skipListBuffer = skipListBufferArray[columnIndex];
		WriteToFile(tableFile, skipListBuffer->data, skipListBuffer->len);
	}

	/* then, we flush the data buffers */
	for (columnIndex = 0; columnIndex < columnCount; columnIndex++)
	{
		uint32 blockIndex = 0;
		for (blockIndex = 0; blockIndex < stripeSkipList->blockCount; blockIndex++)
		{
			StringInfo existsBuffer = existsBufferArray[columnIndex][blockIndex];
			WriteToFile(tableFile, existsBuffer->data, existsBuffer->len);
		}

		for (blockIndex = 0; blockIndex < stripeSkipList->blockCount; blockIndex++)
		{
			StringInfo valueBuffer = valueBufferArray[columnIndex][blockIndex];
			WriteToFile(tableFile, valueBuffer->data, valueBuffer->len);
		}
	}

	/* finally, we flush the footer buffer */
	WriteToFile(tableFile, stripeFooterBuffer->data, stripeFooterBuffer->len);

	/* set stripe metadata */
	for (columnIndex = 0; columnIndex < columnCount; columnIndex++)
	{
		skipListLength += stripeFooter->skipListSizeArray[columnIndex];
		dataLength += stripeFooter->existsSizeArray[columnIndex];
		dataLength += stripeFooter->valueSizeArray[columnIndex];
	}

	stripeMetadata.fileOffset = writeState->currentFileOffset;
	stripeMetadata.skipListLength = skipListLength;
	stripeMetadata.dataLength = dataLength;
	stripeMetadata.footerLength = stripeFooterBuffer->len;

	/* advance current file offset */
	writeState->currentFileOffset += skipListLength;
	writeState->currentFileOffset += dataLength;
	writeState->currentFileOffset += stripeFooterBuffer->len;

	return stripeMetadata;
}