C++ (Cpp) pglz_compress примеры использования

Пример #1

0

Показать файл

Файл: tuptoaster.c Проект: nskyzh/gpdb

/* ----------
 * toast_compress_datum -
 *
 *	Create a compressed version of a varlena datum
 *
 *	If we fail (ie, compressed result is actually bigger than original)
 *	then return NULL.  We must not use compressed data if it'd expand
 *	the tuple!
 *
 *	We use VAR{SIZE,DATA}_ANY so we can handle short varlenas here without
 *	copying them.  But we can't handle external or compressed datums.
 * ----------
 */
Datum
toast_compress_datum(Datum value)
{
	varattrib  *tmp;
	int32		valsize = VARSIZE_ANY_EXHDR_D(value);

	Assert(!VARATT_IS_EXTERNAL(DatumGetPointer(value)));
	Assert(!VARATT_IS_COMPRESSED(DatumGetPointer(value)));

	/*
	 * No point in wasting a palloc cycle if value size is out of the allowed
	 * range for compression
	 */
	if (valsize < PGLZ_strategy_default->min_input_size ||
		valsize > PGLZ_strategy_default->max_input_size)
		return PointerGetDatum(NULL);
		
	tmp = (varattrib *) palloc(PGLZ_MAX_OUTPUT(valsize));
	if (pglz_compress(VARDATA_ANY_D(value), valsize,
					  (PGLZ_Header *) tmp, PGLZ_strategy_default) &&
		VARSIZE(tmp) < VARSIZE_ANY_D(value))
	{
		/* successful compression */
		VARATT_SET_COMPRESSED(tmp);
		return PointerGetDatum(tmp);
	}
	else
	{
		/* incompressible data */
		pfree(tmp);
		return PointerGetDatum(NULL);
	}
}

Пример #2

0

Показать файл

Файл: tuptoaster.c Проект: LittleForker/postgres

/* ----------
 * toast_compress_datum -
 *
 *	Create a compressed version of a varlena datum
 *
 *	If we fail (ie, compressed result is actually bigger than original)
 *	then return NULL.  We must not use compressed data if it'd expand
 *	the tuple!
 *
 *	We use VAR{SIZE,DATA}_ANY so we can handle short varlenas here without
 *	copying them.  But we can't handle external or compressed datums.
 * ----------
 */
Datum
toast_compress_datum(Datum value)
{
	struct varlena *tmp;
	int32		valsize = VARSIZE_ANY_EXHDR(DatumGetPointer(value));

	Assert(!VARATT_IS_EXTERNAL(DatumGetPointer(value)));
	Assert(!VARATT_IS_COMPRESSED(DatumGetPointer(value)));

	/*
	 * No point in wasting a palloc cycle if value size is out of the allowed
	 * range for compression
	 */
	if (valsize < PGLZ_strategy_default->min_input_size ||
		valsize > PGLZ_strategy_default->max_input_size)
		return PointerGetDatum(NULL);

	tmp = (struct varlena *) palloc(PGLZ_MAX_OUTPUT(valsize));

	/*
	 * We recheck the actual size even if pglz_compress() reports success,
	 * because it might be satisfied with having saved as little as one byte
	 * in the compressed data --- which could turn into a net loss once you
	 * consider header and alignment padding.  Worst case, the compressed
	 * format might require three padding bytes (plus header, which is
	 * included in VARSIZE(tmp)), whereas the uncompressed format would take
	 * only one header byte and no padding if the value is short enough.  So
	 * we insist on a savings of more than 2 bytes to ensure we have a gain.
	 */
	if (pglz_compress(VARDATA_ANY(DatumGetPointer(value)), valsize,
					  (PGLZ_Header *) tmp, PGLZ_strategy_default) &&
		VARSIZE(tmp) < valsize - 2)
	{
		/* successful compression */
		return PointerGetDatum(tmp);
	}
	else
	{
		/* incompressible data */
		pfree(tmp);
		return PointerGetDatum(NULL);
	}
}

Пример #3

0

Показать файл

Файл: xloginsert.c Проект: bocap/postgres

/*
 * Create a compressed version of a backup block image.
 *
 * Returns FALSE if compression fails (i.e., compressed result is actually
 * bigger than original). Otherwise, returns TRUE and sets 'dlen' to
 * the length of compressed block image.
 */
static bool
XLogCompressBackupBlock(char *page, uint16 hole_offset, uint16 hole_length,
						char *dest, uint16 *dlen)
{
	int32		orig_len = BLCKSZ - hole_length;
	int32		len;
	int32		extra_bytes = 0;
	char	   *source;
	char		tmp[BLCKSZ];

	if (hole_length != 0)
	{
		/* must skip the hole */
		source = tmp;
		memcpy(source, page, hole_offset);
		memcpy(source + hole_offset,
			   page + (hole_offset + hole_length),
			   BLCKSZ - (hole_length + hole_offset));

		/*
		 * Extra data needs to be stored in WAL record for the compressed
		 * version of block image if the hole exists.
		 */
		extra_bytes = SizeOfXLogRecordBlockCompressHeader;
	}
	else
		source = page;

	/*
	 * We recheck the actual size even if pglz_compress() reports success and
	 * see if the number of bytes saved by compression is larger than the
	 * length of extra data needed for the compressed version of block image.
	 */
	len = pglz_compress(source, orig_len, dest, PGLZ_strategy_default);
	if (len >= 0 &&
		len + extra_bytes < orig_len)
	{
		*dlen = (uint16) len;	/* successful compression */
		return true;
	}
	return false;
}

Пример #4

0

Показать файл

Файл: compression_test.c Проект: jiaoyk/pg_plugins

/*
 * compress_data
 *
 * Compress the bytea buffer and return the result as bytea.
 */
Datum
compress_data(PG_FUNCTION_ARGS)
{
	bytea	*raw_data = PG_GETARG_BYTEA_P(0);
	bytea   *res;
	int32	compressed_len;
	char	*compressed_data;
	PGLZ_Strategy strategy;

	memcpy(&strategy, (PGLZ_Strategy *) PGLZ_strategy_always,
		   sizeof(PGLZ_Strategy));

	/* Get custom values if specified by user */
	if (PG_NARGS() == 7)
	{
		strategy.min_input_size = PG_GETARG_INT32(1);
		strategy.max_input_size = PG_GETARG_INT32(2);
		strategy.min_comp_rate = PG_GETARG_INT32(3);
		strategy.first_success_by = PG_GETARG_INT32(4);
		strategy.match_size_good = PG_GETARG_INT32(5);
		strategy.match_size_drop = PG_GETARG_INT32(6);
	}

	/* Compress data in build */
	compressed_data = palloc(PGLZ_MAX_OUTPUT(VARSIZE(raw_data) - VARHDRSZ));
	compressed_len = pglz_compress(VARDATA(raw_data),
								   VARSIZE(raw_data) - VARHDRSZ,
								   compressed_data,
								   &strategy);

	/* if compression failed return the original data */
	if (compressed_len < 0)
		PG_RETURN_BYTEA_P(raw_data);

	/* Build result */
	res = (bytea *) palloc(VARHDRSZ + compressed_len);
	SET_VARSIZE(res, compressed_len + VARHDRSZ);
	memcpy(VARDATA(res), compressed_data, compressed_len);
	pfree(compressed_data);
	PG_RETURN_BYTEA_P(res);
}

Пример #5

0

Показать файл

Файл: tuptoaster.c Проект: sunyangkobe/cscd43

/* ----------
 * toast_compress_datum -
 *
 *	Create a compressed version of a varlena datum
 *
 *	If we fail (ie, compressed result is actually bigger than original)
 *	then return NULL.  We must not use compressed data if it'd expand
 *	the tuple!
 * ----------
 */
Datum
toast_compress_datum(Datum value)
{
	varattrib  *tmp;

	tmp = (varattrib *) palloc(sizeof(PGLZ_Header) + VARATT_SIZE(value));
	pglz_compress(VARATT_DATA(value), VARATT_SIZE(value) - VARHDRSZ,
				  (PGLZ_Header *) tmp,
				  PGLZ_strategy_default);
	if (VARATT_SIZE(tmp) < VARATT_SIZE(value))
	{
		/* successful compression */
		VARATT_SIZEP(tmp) |= VARATT_FLAG_COMPRESSED;
		return PointerGetDatum(tmp);
	}
	else
	{
		/* incompressible data */
		pfree(tmp);
		return PointerGetDatum(NULL);
	}
}

Пример #6

0

Показать файл

Файл: cfs.c Проект: knizhnik/postgresql

size_t cfs_compress(void* dst, size_t dst_size, void const* src, size_t src_size)
{
	return pglz_compress(src, src_size, dst, PGLZ_strategy_always);
}

Пример #7

0

Показать файл

Файл: cstore_writer.c Проект: TheFlowerGarden/postgres_vectorization_test

/*
 * FlushStripe compresses the data in the current stripe, flushes the compressed
 * data into the file, and returns the stripe metadata. To do this, the function
 * first creates the data buffers, and then updates position and length statistics
 * in stripe's skip list. Then, the function creates the skip list and footer
 * buffers. Finally, the function flushes the skip list, data, and footer buffers
 * to the file.
 */
static StripeMetadata
FlushStripe(TableWriteState *writeState)
{
	StripeMetadata stripeMetadata = {0, 0, 0, 0};
	uint64 skipListLength = 0;
	uint64 dataLength = 0;
	StringInfo **existsBufferArray = NULL;
	StringInfo **valueBufferArray = NULL;
	CompressionType **valueCompressionTypeArray = NULL;
	StringInfo *skipListBufferArray = NULL;
	StripeFooter *stripeFooter = NULL;
	StringInfo stripeFooterBuffer = NULL;
	uint32 columnIndex = 0;
	uint32 blockIndex = 0;

	FILE *tableFile = writeState->tableFile;
	StripeData *stripeData = writeState->stripeData;
	StripeSkipList *stripeSkipList = writeState->stripeSkipList;
	CompressionType compressionType = writeState->compressionType;
	TupleDesc tupleDescriptor = writeState->tupleDescriptor;
	uint32 columnCount = tupleDescriptor->natts;
	uint32 blockCount = stripeSkipList->blockCount;

	/* create "exists" and "value" buffers */
	existsBufferArray = CreateExistsBufferArray(stripeData->columnDataArray,
												stripeSkipList);
	valueBufferArray = CreateValueBufferArray(stripeData->columnDataArray,
											  stripeSkipList, tupleDescriptor);

	valueCompressionTypeArray = palloc0(columnCount * sizeof(CompressionType *));
	for (columnIndex = 0; columnIndex < columnCount; columnIndex++)
	{
		CompressionType *blockCompressionTypeArray =
			palloc0(blockCount * sizeof(CompressionType));
		valueCompressionTypeArray[columnIndex] = blockCompressionTypeArray;

		for (blockIndex = 0; blockIndex < blockCount; blockIndex++)
		{
			StringInfo valueBuffer = NULL;
			uint64 maximumLength = 0;
			PGLZ_Header *compressedData = NULL;
			bool compressable = false;

			if (compressionType == COMPRESSION_NONE)
			{
				blockCompressionTypeArray[blockIndex] = COMPRESSION_NONE;
				continue;
			}

			/* the only other supported compression type is pg_lz for now */
			Assert(compressionType == COMPRESSION_PG_LZ);

			valueBuffer = valueBufferArray[columnIndex][blockIndex];
			maximumLength = PGLZ_MAX_OUTPUT(valueBuffer->len);
			compressedData = palloc0(maximumLength);
			compressable = pglz_compress((const char *) valueBuffer->data,
										 valueBuffer->len, compressedData,
										 PGLZ_strategy_always);
			if (compressable)
			{
				pfree(valueBuffer->data);

				valueBuffer->data = (char *) compressedData;
				valueBuffer->len = VARSIZE(compressedData);
				valueBuffer->maxlen = maximumLength;

				blockCompressionTypeArray[blockIndex] = COMPRESSION_PG_LZ;
			}
			else
			{
				pfree(compressedData);
				blockCompressionTypeArray[blockIndex] = COMPRESSION_NONE;
			}
		}
	}

	/* update buffer sizes and positions in stripe skip list */
	for (columnIndex = 0; columnIndex < columnCount; columnIndex++)
	{
		ColumnBlockSkipNode **columnSkipNodeArray = stripeSkipList->blockSkipNodeArray;
		ColumnBlockSkipNode *blockSkipNodeArray = columnSkipNodeArray[columnIndex];
		uint32 blockCount = stripeSkipList->blockCount;
		uint32 blockIndex = 0;
		uint64 currentExistsBlockOffset = 0;
		uint64 currentValueBlockOffset = 0;

		for (blockIndex = 0; blockIndex < blockCount; blockIndex++)
		{
			uint64 existsBufferSize = existsBufferArray[columnIndex][blockIndex]->len;
			uint64 valueBufferSize = valueBufferArray[columnIndex][blockIndex]->len;
			CompressionType valueCompressionType =
				valueCompressionTypeArray[columnIndex][blockIndex];
			ColumnBlockSkipNode *blockSkipNode = &blockSkipNodeArray[blockIndex];

			blockSkipNode->existsBlockOffset = currentExistsBlockOffset;
			blockSkipNode->existsLength = existsBufferSize;
			blockSkipNode->valueBlockOffset = currentValueBlockOffset;
			blockSkipNode->valueLength = valueBufferSize;
			blockSkipNode->valueCompressionType = valueCompressionType;

			currentExistsBlockOffset += existsBufferSize;
			currentValueBlockOffset += valueBufferSize;
		}
	}

	/* create skip list and footer buffers */
	skipListBufferArray = CreateSkipListBufferArray(stripeSkipList, tupleDescriptor);
	stripeFooter = CreateStripeFooter(stripeSkipList, skipListBufferArray);
	stripeFooterBuffer = SerializeStripeFooter(stripeFooter);

	/*
	 * Each stripe has three sections:
	 * (1) Skip list, which contains statistics for each column block, and can
	 * be used to skip reading row blocks that are refuted by WHERE clause list,
	 * (2) Data section, in which we store data for each column continuously.
	 * We store data for each for each column in blocks. For each block, we
	 * store two buffers: "exists" buffer, and "value" buffer. "exists" buffer
	 * tells which values are not NULL. "value" buffer contains values for
	 * present values. For each column, we first store all "exists" buffers,
	 * and then all "value" buffers.
	 * (3) Stripe footer, which contains the skip list buffer size, exists buffer
	 * size, and value buffer size for each of the columns.
	 *
	 * We start by flushing the skip list buffers.
	 */
	for (columnIndex = 0; columnIndex < columnCount; columnIndex++)
	{
		StringInfo skipListBuffer = skipListBufferArray[columnIndex];
		WriteToFile(tableFile, skipListBuffer->data, skipListBuffer->len);
	}

	/* then, we flush the data buffers */
	for (columnIndex = 0; columnIndex < columnCount; columnIndex++)
	{
		uint32 blockIndex = 0;
		for (blockIndex = 0; blockIndex < stripeSkipList->blockCount; blockIndex++)
		{
			StringInfo existsBuffer = existsBufferArray[columnIndex][blockIndex];
			WriteToFile(tableFile, existsBuffer->data, existsBuffer->len);
		}

		for (blockIndex = 0; blockIndex < stripeSkipList->blockCount; blockIndex++)
		{
			StringInfo valueBuffer = valueBufferArray[columnIndex][blockIndex];
			WriteToFile(tableFile, valueBuffer->data, valueBuffer->len);
		}
	}

	/* finally, we flush the footer buffer */
	WriteToFile(tableFile, stripeFooterBuffer->data, stripeFooterBuffer->len);

	/* set stripe metadata */
	for (columnIndex = 0; columnIndex < columnCount; columnIndex++)
	{
		skipListLength += stripeFooter->skipListSizeArray[columnIndex];
		dataLength += stripeFooter->existsSizeArray[columnIndex];
		dataLength += stripeFooter->valueSizeArray[columnIndex];
	}

	stripeMetadata.fileOffset = writeState->currentFileOffset;
	stripeMetadata.skipListLength = skipListLength;
	stripeMetadata.dataLength = dataLength;
	stripeMetadata.footerLength = stripeFooterBuffer->len;

	/* advance current file offset */
	writeState->currentFileOffset += skipListLength;
	writeState->currentFileOffset += dataLength;
	writeState->currentFileOffset += stripeFooterBuffer->len;

	return stripeMetadata;
}