/* * AppendOnlySegmentFileTruncateToEOF() * * Assumes that the segment file lock is already held. * * For the segment file is truncates to the eof. */ static void AppendOnlySegmentFileTruncateToEOF(Relation aorel, FileSegInfo *fsinfo) { const char* relname = RelationGetRelationName(aorel); MirroredAppendOnlyOpen mirroredOpened; int32 fileSegNo; char filenamepath[MAXPGPATH]; int segno; int64 segeof; Assert(fsinfo); Assert(RelationIsAoRows(aorel)); segno = fsinfo->segno; relname = RelationGetRelationName(aorel); segeof = (int64)fsinfo->eof; /* Open and truncate the relation segfile beyond its eof */ MakeAOSegmentFileName(aorel, segno, -1, &fileSegNo, filenamepath); elogif(Debug_appendonly_print_compaction, LOG, "Opening AO relation \"%s.%s\", relation id %u, relfilenode %u (physical segment file #%d, logical EOF " INT64_FORMAT ")", get_namespace_name(RelationGetNamespace(aorel)), relname, aorel->rd_id, aorel->rd_node.relNode, segno, segeof); if (OpenAOSegmentFile(aorel, filenamepath, fileSegNo, segeof, &mirroredOpened)) { TruncateAOSegmentFile(&mirroredOpened, aorel, segeof, ERROR); CloseAOSegmentFile(&mirroredOpened); elogif(Debug_appendonly_print_compaction, LOG, "Successfully truncated AO ROL relation \"%s.%s\", relation id %u, relfilenode %u (physical segment file #%d, logical EOF " INT64_FORMAT ")", get_namespace_name(RelationGetNamespace(aorel)), relname, aorel->rd_id, aorel->rd_node.relNode, segno, segeof); } else { elogif(Debug_appendonly_print_compaction, LOG, "No gp_relation_node entry for AO ROW relation \"%s.%s\", relation id %u, relfilenode %u (physical segment file #%d, logical EOF " INT64_FORMAT ")", get_namespace_name(RelationGetNamespace(aorel)), relname, aorel->rd_id, aorel->rd_node.relNode, segno, segeof); } }
void test__MakeAOSegmentFileName(void **state) { #ifdef FIX_UNIT_TEST char* basepath = "base/21381/123"; int32 fileSegNo; char filepathname[256]; RelationData reldata; expect_any_count(relpath, &rnode, -1); // seg 0, no columns will_return(relpath, strdup(basepath)); MakeAOSegmentFileName(&reldata, 0, -1, &fileSegNo, filepathname); assert_string_equal(filepathname, "base/21381/123"); assert_int_equal(fileSegNo, 0); // seg 1, no columns will_return(relpath, strdup(basepath)); MakeAOSegmentFileName(&reldata, 1, -1, &fileSegNo, filepathname); assert_string_equal(filepathname, "base/21381/123.1"); assert_int_equal(fileSegNo, 1); // seg 0, column 1 will_return(relpath, strdup(basepath)); MakeAOSegmentFileName(&reldata, 0, 1, &fileSegNo, filepathname); assert_string_equal(filepathname, "base/21381/123.128"); assert_int_equal(fileSegNo, 128); // seg 1, column 1 will_return(relpath, strdup(basepath)); MakeAOSegmentFileName(&reldata, 1, 1, &fileSegNo, filepathname); assert_string_equal(filepathname, "base/21381/123.129"); assert_int_equal(fileSegNo, 129); // seg 0, column 2 will_return(relpath, strdup(basepath)); MakeAOSegmentFileName(&reldata, 0, 2, &fileSegNo, filepathname); assert_string_equal(filepathname, "base/21381/123.256"); assert_int_equal(fileSegNo, 256); #endif /* FIX_UNIT_TEST */ }
/* * Drops a segment file. * * Actually, we just truncate the segfile to 0 bytes, to reclaim the space. * Before GPDB 6, we used to remove the file, but with WAL replication, we * no longer have a convenient function to remove a single segment of a * relation. An empty file is as almost as good as a non-existent file. If * the relation is dropped later, the code in mdunlink() will remove all * segments, including any empty ones we've left behind. */ static void AOCSCompaction_DropSegmentFile(Relation aorel, int segno) { int col; Assert(RelationIsAoCols(aorel)); for (col = 0; col < RelationGetNumberOfAttributes(aorel); col++) { char filenamepath[MAXPGPATH]; int pseudoSegNo; File fd; /* Open and truncate the relation segfile */ MakeAOSegmentFileName(aorel, segno, col, &pseudoSegNo, filenamepath); elogif(Debug_appendonly_print_compaction, LOG, "Drop segment file: " "segno %d", pseudoSegNo); fd = OpenAOSegmentFile(aorel, filenamepath, pseudoSegNo, 0); if (fd >= 0) { TruncateAOSegmentFile(fd, aorel, pseudoSegNo, 0); CloseAOSegmentFile(fd); } else { /* * The file we were about to drop/truncate didn't exist. That's normal, * for example, if a column is added with ALTER TABLE ADD COLUMN. */ elog(DEBUG1, "could not truncate segfile %s, because it does not exist", filenamepath); } } }
/* * Open the next file segment for write. */ static void SetCurrentFileSegForWrite(ParquetInsertDesc parquetInsertDesc, ResultRelSegFileInfo *segfileinfo) { ParquetFileSegInfo *fsinfo; int32 fileSegNo; /* Make the 'segment' file name */ MakeAOSegmentFileName(parquetInsertDesc->parquet_rel, parquetInsertDesc->cur_segno, -1, &fileSegNo, parquetInsertDesc->parquetFilePathName); Assert( strlen(parquetInsertDesc->parquetFilePathName) + 1 <= parquetInsertDesc->parquetFilePathNameMaxLen); /* * In order to append to this file segment entry we must first * acquire the relation parquet segment file (transaction-scope) lock (tag * LOCKTAG_RELATION_APPENDONLY_SEGMENT_FILE) in order to guarantee * stability of the pg_aoseg information on this segment file and exclusive right * to append data to the segment file. * * NOTE: This is a transaction scope lock that must be held until commit / abort. */ LockRelationAppendOnlySegmentFile(&parquetInsertDesc->parquet_rel->rd_node, parquetInsertDesc->cur_segno, AccessExclusiveLock, /* dontWait */false); /* Now, get the information for the file segment we are going to append to. */ parquetInsertDesc->fsInfo = (ParquetFileSegInfo *) palloc0(sizeof(ParquetFileSegInfo)); /* * in hawq, we cannot insert a new catalog entry and then update, * since we cannot get the tid of added tuple. * we should add the new catalog entry on master and then dispatch it to segments for update. */ Assert(parquetInsertDesc->fsInfo != NULL); Assert(segfileinfo->numfiles == 1); fsinfo = parquetInsertDesc->fsInfo; fsinfo->segno = segfileinfo->segno; fsinfo->tupcount = segfileinfo->tupcount; fsinfo->eof = segfileinfo->eof[0]; fsinfo->eof_uncompressed = segfileinfo->uncompressed_eof[0]; parquetInsertDesc->fileLen = (int64)fsinfo->eof; parquetInsertDesc->fileLen_uncompressed = (int64)fsinfo->eof_uncompressed; parquetInsertDesc->rowCount = fsinfo->tupcount; /* Open the existing file for write.*/ OpenSegmentFile( parquetInsertDesc->mirroredOpen, parquetInsertDesc->parquetFilePathName, fsinfo->eof, &parquetInsertDesc->parquet_rel->rd_node, parquetInsertDesc->cur_segno, parquetInsertDesc->relname, &parquetInsertDesc->parquet_file, &parquetInsertDesc->file_previousmetadata, &parquetInsertDesc->protocol_read, parquetInsertDesc->parquet_rel->rd_att, &parquetInsertDesc->parquetMetadata, &parquetInsertDesc->fileLen, &parquetInsertDesc->fileLen_uncompressed, &parquetInsertDesc->previous_rowgroupcnt); initSerializeFooter(&(parquetInsertDesc->footerProtocol), parquetInsertDesc->parquetFilePathName); }
/* * Open the next file segment to scan and allocate all resources needed for it. */ static bool SetNextFileSegForRead(ParquetScanDesc scan) { Relation reln = scan->pqs_rd; int segno = -1; int64 eof = 0; bool finished_all_splits = true; /* assume */ int32 fileSegNo; bool parquetMetadataCorrect; bool toOpenFile = true; // by default need to open segment file to read Assert(scan->pqs_need_new_split); /* only call me when last segfile completed */ Assert(!scan->pqs_done_all_splits); /* don't call me if I told you to stop */ /* * There is no guarantee that the current memory context will be preserved between calls, * so switch to a safe memory context for retrieving compression information. */ MemoryContext oldMemoryContext = MemoryContextSwitchTo(scan->parquetScanInitContext); if (!scan->initedStorageRoutines) { ParquetStorageRead_Init( &scan->storageRead, scan->parquetScanInitContext, NameStr(scan->pqs_rd->rd_rel->relname), &scan->storageAttributes); ParquetRowGroupReader_Init( &scan->rowGroupReader, scan->pqs_rd, &scan->storageRead); scan->bufferDone = true; /* so we read a new buffer right away */ scan->initedStorageRoutines = true; } /* * Do we have more segment files to read or are we done? */ while (scan->pqs_splits_processed < list_length(scan->splits)) { /* still have more segment files to read. get info of the next one */ FileSplit split = (FileSplitNode *)list_nth(scan->splits, scan->pqs_splits_processed); /* For splits within the same segment file, no need to reopen file */ if (scan->pqs_splits_processed > 0) { FileSplit lastSplit = (FileSplitNode *)list_nth( scan->splits, scan->pqs_splits_processed - 1); if (split->segno == lastSplit->segno) { /* * if all rowgroups already processed, omit the remaining splits */ if (scan->storageRead.rowGroupCount == scan->storageRead.rowGroupProcessedCount) { scan->pqs_splits_processed++; continue; } else { toOpenFile = false; } } } scan->toCloseFile = true; if (scan->pqs_splits_processed + 1 < list_length(scan->splits)) { FileSplit nextSplit = (FileSplitNode *)list_nth( scan->splits, scan->pqs_splits_processed + 1); if (split->segno == nextSplit->segno) scan->toCloseFile = false; } segno = split->segno; eof = split->logiceof; scan->pqs_splits_processed++; /* * special case: we are the QD reading from a parquet table in utility mode * (gp_dump). We see entries in the parquetseg table but no files or data * actually exist. If we try to open this file we'll get an error, so * we must skip to the next. For now, we can test if the file exists by * looking at the eof value - it's always 0 on the QD. */ if(eof > 0) { finished_all_splits = false; break; } } if(finished_all_splits) { /* finished reading all segment files */ scan->pqs_need_new_split = false; scan->pqs_done_all_splits = true; return false; } MakeAOSegmentFileName(reln, segno, -1, &fileSegNo, scan->pqs_filenamepath); Assert(strlen(scan->pqs_filenamepath) + 1 <= scan->pqs_filenamepath_maxlen); Assert(scan->initedStorageRoutines); if (toOpenFile) { /**need open files here*/ ParquetStorageRead_OpenFile( &scan->storageRead, scan->pqs_filenamepath, eof, scan->pqs_rd->rd_att); parquetMetadataCorrect = ValidateParquetSegmentFile(scan->pqs_tupDesc, scan->hawqAttrToParquetColChunks, scan->storageRead.parquetMetadata); if(!parquetMetadataCorrect){ elog(ERROR, "parquet metadata information conflicts with hawq table information"); } } scan->pqs_need_new_split = false; if (Debug_appendonly_print_scan) elog(LOG,"Parquet scan initialize for table '%s', %u/%u/%u, segment file %u, EOF " INT64_FORMAT ", " "(compression = %s)", NameStr(scan->pqs_rd->rd_rel->relname), scan->pqs_rd->rd_node.spcNode, scan->pqs_rd->rd_node.dbNode, scan->pqs_rd->rd_node.relNode, segno, eof, (scan->storageAttributes.compress ? "true" : "false")); /* Switch back to caller's memory context. */ MemoryContextSwitchTo(oldMemoryContext); return true; }
/* * AOCSSegmentFileTruncateToEOF() * * Assumes that the segment file lock is already held. * * For the segment file is truncates to the eof. */ static void AOCSSegmentFileTruncateToEOF(Relation aorel, AOCSFileSegInfo *fsinfo) { const char *relname = RelationGetRelationName(aorel); int segno; int j; Assert(fsinfo); Assert(RelationIsAoCols(aorel)); segno = fsinfo->segno; relname = RelationGetRelationName(aorel); for (j = 0; j < fsinfo->vpinfo.nEntry; ++j) { int64 segeof; char filenamepath[MAXPGPATH]; AOCSVPInfoEntry *entry; File fd; int32 fileSegNo; entry = getAOCSVPEntry(fsinfo, j); segeof = entry->eof; /* Open and truncate the relation segfile to its eof */ MakeAOSegmentFileName(aorel, segno, j, &fileSegNo, filenamepath); elogif(Debug_appendonly_print_compaction, LOG, "Opening AO COL relation \"%s.%s\", relation id %u, relfilenode %u column #%d, logical segment #%d (physical segment file #%d, logical EOF " INT64_FORMAT ")", get_namespace_name(RelationGetNamespace(aorel)), relname, aorel->rd_id, aorel->rd_node.relNode, j, segno, fileSegNo, segeof); fd = OpenAOSegmentFile(aorel, filenamepath, fileSegNo, segeof); if (fd >= 0) { TruncateAOSegmentFile(fd, aorel, fileSegNo, segeof); CloseAOSegmentFile(fd); elogif(Debug_appendonly_print_compaction, LOG, "Successfully truncated AO COL relation \"%s.%s\", relation id %u, relfilenode %u column #%d, logical segment #%d (physical segment file #%d, logical EOF " INT64_FORMAT ")", get_namespace_name(RelationGetNamespace(aorel)), relname, aorel->rd_id, aorel->rd_node.relNode, j, segno, fileSegNo, segeof); } else { elogif(Debug_appendonly_print_compaction, LOG, "No gp_relation_node entry for AO COL relation \"%s.%s\", relation id %u, relfilenode %u column #%d, logical segment #%d (physical segment file #%d, logical EOF " INT64_FORMAT ")", get_namespace_name(RelationGetNamespace(aorel)), relname, aorel->rd_id, aorel->rd_node.relNode, j, segno, fileSegNo, segeof); } } }