/* * Flush and close the current segment file under a transaction. * * Handles mirror loss end transaction work. * * No error if the current is already closed. */ static void TransactionFlushAndCloseFile( ParquetInsertDesc parquetInsertDesc, int64 *newLogicalEof, /* The new EOF for the segment file. */ int64 *fileLen_uncompressed) { Assert(parquetInsertDesc != NULL); if (parquetInsertDesc->parquet_file == -1) { *newLogicalEof = 0; *fileLen_uncompressed = 0; return; } /* Get Logical Eof and uncompressed length*/ *newLogicalEof = FileNonVirtualTell(parquetInsertDesc->parquet_file); if (*newLogicalEof < 0){ ereport(ERROR, (errcode_for_file_access(), errmsg("file tell position error in file '%s' for relation '%s': %s", parquetInsertDesc->parquetFilePathName, parquetInsertDesc->relname, strerror(errno)))); } Assert(parquetInsertDesc->fileLen == *newLogicalEof); *fileLen_uncompressed = parquetInsertDesc->fileLen_uncompressed; int primaryError = 0; MirroredAppendOnly_FlushAndClose(parquetInsertDesc->mirroredOpen, &primaryError); if (primaryError != 0) ereport(ERROR, (errcode_for_file_access(), errmsg("file flush error when flushing (fsync) segment file '%s' to " "disk for relation '%s': %s", parquetInsertDesc->parquetFilePathName, parquetInsertDesc->relname, strerror(primaryError)))); parquetInsertDesc->parquet_file = -1; if(parquetInsertDesc->file_previousmetadata == -1) return; FileClose(parquetInsertDesc->file_previousmetadata); parquetInsertDesc->file_previousmetadata = -1; /*assign newLogicalEof and fileLen_uncompressed*/ }
/* * Opens the next segment file to write. The file must already exist. * This routine is responsible for seeking to the proper write location * given the logical EOF. * * @filePathName: The name of the segment file to open. * @logicalEof: The last committed write transaction's EOF * value to use as the end of the segment file. * @parquet_file The file handler of segment file */ static void OpenSegmentFile( MirroredAppendOnlyOpen *mirroredOpen, char *filePathName, int64 logicalEof, RelFileNode *relFileNode, int32 segmentFileNum, char *relname, File *parquet_file, File *parquet_file_previous, CompactProtocol **protocol_read, TupleDesc tableAttrs, ParquetMetadata *parquetMetadata, int64 *fileLen, int64 *fileLen_uncompressed, int *previous_rowgroupcnt) { int primaryError; File file; int64 seekResult; Assert(filePathName != NULL); bool metadataExist = false; /* * Open the file for metadata reading. */ MirroredAppendOnly_OpenReadWrite(mirroredOpen, relFileNode, segmentFileNum, relname, logicalEof, true, &primaryError); if (primaryError != 0) ereport(ERROR, (errcode_for_file_access(), errmsg("file open error when opening file " "'%s' for relation '%s': %s", filePathName, relname, strerror(primaryError)))); *parquet_file_previous = mirroredOpen->primaryFile; int64 fileSize = FileSeek(*parquet_file_previous, 0, SEEK_END); if (fileSize < 0){ ereport(ERROR, (errcode_for_file_access(), errmsg("file seek error in file '%s' for relation " "'%s'", filePathName, relname))); } if (logicalEof > fileSize) { ereport(ERROR, (errcode(ERRCODE_GP_INTERNAL_ERROR), errmsg("logical eof exceeds file size in file '%s' for relation '%s'", filePathName, relname))); } /*read parquet footer, get metadata information before rowgroup metadata*/ metadataExist = readParquetFooter(*parquet_file_previous, parquetMetadata, protocol_read, logicalEof, filePathName); *previous_rowgroupcnt = (*parquetMetadata)->blockCount; /* * Open the file for writing. */ MirroredAppendOnly_OpenReadWrite(mirroredOpen, relFileNode, segmentFileNum, relname, logicalEof, false, &primaryError); if (primaryError != 0) ereport(ERROR, (errcode_for_file_access(), errmsg("file open error when opening file '%s' " "for relation '%s': %s", filePathName, relname, strerror(primaryError)))); file = mirroredOpen->primaryFile; seekResult = FileNonVirtualTell(file); if (seekResult != logicalEof) { /* previous transaction is aborted truncate file*/ if (FileTruncate(file, logicalEof)) { MirroredAppendOnly_Close(mirroredOpen); ereport(ERROR, (errcode_for_file_access(), errmsg("file truncate error in file '%s' for relation " "'%s' to position " INT64_FORMAT ": %s", filePathName, relname, logicalEof, strerror(errno)))); } } *parquet_file = file; /*if metadata not exist, should initialize the metadata, and write out file header*/ if (metadataExist == false) { /* init parquet metadata information, init schema information using table attributes, * and may get existing information from data file*/ initparquetMetadata(*parquetMetadata, tableAttrs, *parquet_file); /*should judge whether file already exists, if a new file, should write header out*/ writeParquetHeader(*parquet_file, filePathName, fileLen, fileLen_uncompressed); } else { if (!checkAndSyncMetadata(*parquetMetadata, tableAttrs)) { ereport(ERROR, (errcode(ERRCODE_GP_INTERNAL_ERROR), errmsg("parquet storage write file's metadata incompatible " "with table's schema for relation '%s'.", relname))); } } }
/* * Perform a large read i/o. */ static void BufferedReadIo( BufferedRead *bufferedRead) { int32 largeReadLen; uint8 *largeReadMemory; int32 offset; largeReadLen = bufferedRead->largeReadLen; Assert(bufferedRead->largeReadLen > 0); largeReadMemory = bufferedRead->largeReadMemory; #ifdef USE_ASSERT_CHECKING { int64 currentReadPosition; currentReadPosition = FileNonVirtualTell(bufferedRead->file); if (currentReadPosition < 0) ereport(ERROR, (errcode_for_file_access(), errmsg("unable to get current position for table \"%s\" in file \"%s\" (errcode %d)", bufferedRead->relationName, bufferedRead->filePathName, errno), errdetail("%s", HdfsGetLastError()))); if (currentReadPosition != bufferedRead->largeReadPosition) { ereport(ERROR, (errcode_for_file_access(), errmsg("Current position mismatch actual " INT64_FORMAT ", expected " INT64_FORMAT " for table \"%s\" in file \"%s\"", currentReadPosition, bufferedRead->largeReadPosition, bufferedRead->relationName, bufferedRead->filePathName), errdetail("%s", HdfsGetLastError()))); } } #endif offset = 0; while (largeReadLen > 0) { int actualLen = FileRead( bufferedRead->file, (char*)largeReadMemory, largeReadLen); if (actualLen == 0) ereport(ERROR, (errcode_for_file_access(), errmsg("read beyond eof in table \"%s\" in file \"%s\"", bufferedRead->relationName, bufferedRead->filePathName))); else if (actualLen < 0) ereport(ERROR, (errcode_for_file_access(), errmsg("unable to read table \"%s\" file \"%s\" (errcode %d)", bufferedRead->relationName, bufferedRead->filePathName, errno), errdetail("%s", HdfsGetLastError()))); if (Debug_appendonly_print_read_block) { elog(LOG, "Append-Only storage read: table '%s', segment file '%s', read postition " INT64_FORMAT " (small offset %d), " "actual read length %d (equals large read length %d is %s)", bufferedRead->relationName, bufferedRead->filePathName, bufferedRead->largeReadPosition, offset, actualLen, bufferedRead->largeReadLen, (actualLen == bufferedRead->largeReadLen ? "true" : "false")); } largeReadLen -= actualLen; largeReadMemory += actualLen; offset += actualLen; } }