/* * FlushStripe flushes current stripe data into the file. The function first ensures * the last data block for each column is properly serialized and compressed. Then, * the function creates the skip list and footer buffers. Finally, the function * flushes the skip list, data, and footer buffers to the file. */ static StripeMetadata FlushStripe(TableWriteState *writeState) { StripeMetadata stripeMetadata = {0, 0, 0, 0}; uint64 skipListLength = 0; uint64 dataLength = 0; StringInfo *skipListBufferArray = NULL; StripeFooter *stripeFooter = NULL; StringInfo stripeFooterBuffer = NULL; uint32 columnIndex = 0; uint32 blockIndex = 0; TableFooter *tableFooter = writeState->tableFooter; FILE *tableFile = writeState->tableFile; StripeBuffers *stripeBuffers = writeState->stripeBuffers; StripeSkipList *stripeSkipList = writeState->stripeSkipList; ColumnBlockSkipNode **columnSkipNodeArray = stripeSkipList->blockSkipNodeArray; TupleDesc tupleDescriptor = writeState->tupleDescriptor; uint32 columnCount = tupleDescriptor->natts; uint32 blockCount = stripeSkipList->blockCount; uint32 blockRowCount = tableFooter->blockRowCount; uint32 lastBlockIndex = stripeBuffers->rowCount / blockRowCount; uint32 lastBlockRowCount = stripeBuffers->rowCount % blockRowCount; /* * check if the last block needs serialization , the last block was not serialized * if it was not full yet, e.g. (rowCount > 0) */ if (lastBlockRowCount > 0) { SerializeBlockData(writeState, lastBlockIndex, lastBlockRowCount); } /* update buffer sizes and positions in stripe skip list */ for (columnIndex = 0; columnIndex < columnCount; columnIndex++) { ColumnBlockSkipNode *blockSkipNodeArray = columnSkipNodeArray[columnIndex]; uint64 currentExistsBlockOffset = 0; uint64 currentValueBlockOffset = 0; ColumnBuffers *columnBuffers = stripeBuffers->columnBuffersArray[columnIndex]; for (blockIndex = 0; blockIndex < blockCount; blockIndex++) { ColumnBlockBuffers *blockBuffers = columnBuffers->blockBuffersArray[blockIndex]; uint64 existsBufferSize = blockBuffers->existsBuffer->len; uint64 valueBufferSize = blockBuffers->valueBuffer->len; CompressionType valueCompressionType = blockBuffers->valueCompressionType; ColumnBlockSkipNode *blockSkipNode = &blockSkipNodeArray[blockIndex]; blockSkipNode->existsBlockOffset = currentExistsBlockOffset; blockSkipNode->existsLength = existsBufferSize; blockSkipNode->valueBlockOffset = currentValueBlockOffset; blockSkipNode->valueLength = valueBufferSize; blockSkipNode->valueCompressionType = valueCompressionType; currentExistsBlockOffset += existsBufferSize; currentValueBlockOffset += valueBufferSize; } } /* create skip list and footer buffers */ skipListBufferArray = CreateSkipListBufferArray(stripeSkipList, tupleDescriptor); stripeFooter = CreateStripeFooter(stripeSkipList, skipListBufferArray); stripeFooterBuffer = SerializeStripeFooter(stripeFooter); /* * Each stripe has three sections: * (1) Skip list, which contains statistics for each column block, and can * be used to skip reading row blocks that are refuted by WHERE clause list, * (2) Data section, in which we store data for each column continuously. * We store data for each for each column in blocks. For each block, we * store two buffers: "exists" buffer, and "value" buffer. "exists" buffer * tells which values are not NULL. "value" buffer contains values for * present values. For each column, we first store all "exists" buffers, * and then all "value" buffers. * (3) Stripe footer, which contains the skip list buffer size, exists buffer * size, and value buffer size for each of the columns. * * We start by flushing the skip list buffers. */ for (columnIndex = 0; columnIndex < columnCount; columnIndex++) { StringInfo skipListBuffer = skipListBufferArray[columnIndex]; WriteToFile(tableFile, skipListBuffer->data, skipListBuffer->len); } /* then, we flush the data buffers */ for (columnIndex = 0; columnIndex < columnCount; columnIndex++) { ColumnBuffers *columnBuffers = stripeBuffers->columnBuffersArray[columnIndex]; uint32 blockIndex = 0; for (blockIndex = 0; blockIndex < stripeSkipList->blockCount; blockIndex++) { ColumnBlockBuffers *blockBuffers = columnBuffers->blockBuffersArray[blockIndex]; StringInfo existsBuffer = blockBuffers->existsBuffer; WriteToFile(tableFile, existsBuffer->data, existsBuffer->len); } for (blockIndex = 0; blockIndex < stripeSkipList->blockCount; blockIndex++) { ColumnBlockBuffers *blockBuffers = columnBuffers->blockBuffersArray[blockIndex]; StringInfo valueBuffer = blockBuffers->valueBuffer; WriteToFile(tableFile, valueBuffer->data, valueBuffer->len); } } /* finally, we flush the footer buffer */ WriteToFile(tableFile, stripeFooterBuffer->data, stripeFooterBuffer->len); /* set stripe metadata */ for (columnIndex = 0; columnIndex < columnCount; columnIndex++) { skipListLength += stripeFooter->skipListSizeArray[columnIndex]; dataLength += stripeFooter->existsSizeArray[columnIndex]; dataLength += stripeFooter->valueSizeArray[columnIndex]; } stripeMetadata.fileOffset = writeState->currentFileOffset; stripeMetadata.skipListLength = skipListLength; stripeMetadata.dataLength = dataLength; stripeMetadata.footerLength = stripeFooterBuffer->len; /* advance current file offset */ writeState->currentFileOffset += skipListLength; writeState->currentFileOffset += dataLength; writeState->currentFileOffset += stripeFooterBuffer->len; return stripeMetadata; }
/* * FlushStripe compresses the data in the current stripe, flushes the compressed * data into the file, and returns the stripe metadata. To do this, the function * first creates the data buffers, and then updates position and length statistics * in stripe's skip list. Then, the function creates the skip list and footer * buffers. Finally, the function flushes the skip list, data, and footer buffers * to the file. */ static StripeMetadata FlushStripe(TableWriteState *writeState) { StripeMetadata stripeMetadata = {0, 0, 0, 0}; uint64 skipListLength = 0; uint64 dataLength = 0; StringInfo **existsBufferArray = NULL; StringInfo **valueBufferArray = NULL; CompressionType **valueCompressionTypeArray = NULL; StringInfo *skipListBufferArray = NULL; StripeFooter *stripeFooter = NULL; StringInfo stripeFooterBuffer = NULL; uint32 columnIndex = 0; uint32 blockIndex = 0; FILE *tableFile = writeState->tableFile; StripeData *stripeData = writeState->stripeData; StripeSkipList *stripeSkipList = writeState->stripeSkipList; CompressionType compressionType = writeState->compressionType; TupleDesc tupleDescriptor = writeState->tupleDescriptor; uint32 columnCount = tupleDescriptor->natts; uint32 blockCount = stripeSkipList->blockCount; /* create "exists" and "value" buffers */ existsBufferArray = CreateExistsBufferArray(stripeData->columnDataArray, stripeSkipList); valueBufferArray = CreateValueBufferArray(stripeData->columnDataArray, stripeSkipList, tupleDescriptor); valueCompressionTypeArray = palloc0(columnCount * sizeof(CompressionType *)); for (columnIndex = 0; columnIndex < columnCount; columnIndex++) { CompressionType *blockCompressionTypeArray = palloc0(blockCount * sizeof(CompressionType)); valueCompressionTypeArray[columnIndex] = blockCompressionTypeArray; for (blockIndex = 0; blockIndex < blockCount; blockIndex++) { StringInfo valueBuffer = NULL; uint64 maximumLength = 0; PGLZ_Header *compressedData = NULL; bool compressable = false; if (compressionType == COMPRESSION_NONE) { blockCompressionTypeArray[blockIndex] = COMPRESSION_NONE; continue; } /* the only other supported compression type is pg_lz for now */ Assert(compressionType == COMPRESSION_PG_LZ); valueBuffer = valueBufferArray[columnIndex][blockIndex]; maximumLength = PGLZ_MAX_OUTPUT(valueBuffer->len); compressedData = palloc0(maximumLength); compressable = pglz_compress((const char *) valueBuffer->data, valueBuffer->len, compressedData, PGLZ_strategy_always); if (compressable) { pfree(valueBuffer->data); valueBuffer->data = (char *) compressedData; valueBuffer->len = VARSIZE(compressedData); valueBuffer->maxlen = maximumLength; blockCompressionTypeArray[blockIndex] = COMPRESSION_PG_LZ; } else { pfree(compressedData); blockCompressionTypeArray[blockIndex] = COMPRESSION_NONE; } } } /* update buffer sizes and positions in stripe skip list */ for (columnIndex = 0; columnIndex < columnCount; columnIndex++) { ColumnBlockSkipNode **columnSkipNodeArray = stripeSkipList->blockSkipNodeArray; ColumnBlockSkipNode *blockSkipNodeArray = columnSkipNodeArray[columnIndex]; uint32 blockCount = stripeSkipList->blockCount; uint32 blockIndex = 0; uint64 currentExistsBlockOffset = 0; uint64 currentValueBlockOffset = 0; for (blockIndex = 0; blockIndex < blockCount; blockIndex++) { uint64 existsBufferSize = existsBufferArray[columnIndex][blockIndex]->len; uint64 valueBufferSize = valueBufferArray[columnIndex][blockIndex]->len; CompressionType valueCompressionType = valueCompressionTypeArray[columnIndex][blockIndex]; ColumnBlockSkipNode *blockSkipNode = &blockSkipNodeArray[blockIndex]; blockSkipNode->existsBlockOffset = currentExistsBlockOffset; blockSkipNode->existsLength = existsBufferSize; blockSkipNode->valueBlockOffset = currentValueBlockOffset; blockSkipNode->valueLength = valueBufferSize; blockSkipNode->valueCompressionType = valueCompressionType; currentExistsBlockOffset += existsBufferSize; currentValueBlockOffset += valueBufferSize; } } /* create skip list and footer buffers */ skipListBufferArray = CreateSkipListBufferArray(stripeSkipList, tupleDescriptor); stripeFooter = CreateStripeFooter(stripeSkipList, skipListBufferArray); stripeFooterBuffer = SerializeStripeFooter(stripeFooter); /* * Each stripe has three sections: * (1) Skip list, which contains statistics for each column block, and can * be used to skip reading row blocks that are refuted by WHERE clause list, * (2) Data section, in which we store data for each column continuously. * We store data for each for each column in blocks. For each block, we * store two buffers: "exists" buffer, and "value" buffer. "exists" buffer * tells which values are not NULL. "value" buffer contains values for * present values. For each column, we first store all "exists" buffers, * and then all "value" buffers. * (3) Stripe footer, which contains the skip list buffer size, exists buffer * size, and value buffer size for each of the columns. * * We start by flushing the skip list buffers. */ for (columnIndex = 0; columnIndex < columnCount; columnIndex++) { StringInfo skipListBuffer = skipListBufferArray[columnIndex]; WriteToFile(tableFile, skipListBuffer->data, skipListBuffer->len); } /* then, we flush the data buffers */ for (columnIndex = 0; columnIndex < columnCount; columnIndex++) { uint32 blockIndex = 0; for (blockIndex = 0; blockIndex < stripeSkipList->blockCount; blockIndex++) { StringInfo existsBuffer = existsBufferArray[columnIndex][blockIndex]; WriteToFile(tableFile, existsBuffer->data, existsBuffer->len); } for (blockIndex = 0; blockIndex < stripeSkipList->blockCount; blockIndex++) { StringInfo valueBuffer = valueBufferArray[columnIndex][blockIndex]; WriteToFile(tableFile, valueBuffer->data, valueBuffer->len); } } /* finally, we flush the footer buffer */ WriteToFile(tableFile, stripeFooterBuffer->data, stripeFooterBuffer->len); /* set stripe metadata */ for (columnIndex = 0; columnIndex < columnCount; columnIndex++) { skipListLength += stripeFooter->skipListSizeArray[columnIndex]; dataLength += stripeFooter->existsSizeArray[columnIndex]; dataLength += stripeFooter->valueSizeArray[columnIndex]; } stripeMetadata.fileOffset = writeState->currentFileOffset; stripeMetadata.skipListLength = skipListLength; stripeMetadata.dataLength = dataLength; stripeMetadata.footerLength = stripeFooterBuffer->len; /* advance current file offset */ writeState->currentFileOffset += skipListLength; writeState->currentFileOffset += dataLength; writeState->currentFileOffset += stripeFooterBuffer->len; return stripeMetadata; }