예제 #1
0
/*
 * AppendOnlySegmentFileTruncateToEOF()
 *
 * Assumes that the segment file lock is already held.
 *
 * For the segment file is truncates to the eof.
 */
static void
AppendOnlySegmentFileTruncateToEOF(Relation aorel, 
		FileSegInfo *fsinfo)
{
	const char* relname = RelationGetRelationName(aorel);
	MirroredAppendOnlyOpen mirroredOpened;
	int32				   fileSegNo;
	char			filenamepath[MAXPGPATH];
	int				segno;
	int64			segeof;

	Assert(fsinfo);
	Assert(RelationIsAoRows(aorel));

	segno = fsinfo->segno;
	relname = RelationGetRelationName(aorel);
	segeof = (int64)fsinfo->eof;

	/* Open and truncate the relation segfile beyond its eof */
	MakeAOSegmentFileName(aorel, segno, -1, &fileSegNo, filenamepath);

	elogif(Debug_appendonly_print_compaction, LOG,
		   "Opening AO relation \"%s.%s\", relation id %u, relfilenode %u (physical segment file #%d, logical EOF " INT64_FORMAT ")",
		   get_namespace_name(RelationGetNamespace(aorel)),
		   relname,
		   aorel->rd_id,
		   aorel->rd_node.relNode,
		   segno,
		   segeof);

	if (OpenAOSegmentFile(aorel, filenamepath, fileSegNo, segeof, &mirroredOpened))
	{
		TruncateAOSegmentFile(&mirroredOpened, aorel, segeof, ERROR);
		CloseAOSegmentFile(&mirroredOpened);

		elogif(Debug_appendonly_print_compaction, LOG,
				 "Successfully truncated AO ROL relation \"%s.%s\", relation id %u, relfilenode %u (physical segment file #%d, logical EOF " INT64_FORMAT ")",
				 get_namespace_name(RelationGetNamespace(aorel)),
				 relname,
				 aorel->rd_id,
				 aorel->rd_node.relNode,
				 segno,
				 segeof);
	}
	else
	{
		elogif(Debug_appendonly_print_compaction, LOG,
				 "No gp_relation_node entry for AO ROW relation \"%s.%s\", relation id %u, relfilenode %u (physical segment file #%d, logical EOF " INT64_FORMAT ")",
				 get_namespace_name(RelationGetNamespace(aorel)),
				 relname,
				 aorel->rd_id,
				 aorel->rd_node.relNode,
				 segno,
				 segeof);
	}
}
예제 #2
0
void 
test__MakeAOSegmentFileName(void **state) 
{
#ifdef FIX_UNIT_TEST
	char* basepath = "base/21381/123";
	int32 fileSegNo;
	char filepathname[256];
	RelationData reldata;

	expect_any_count(relpath, &rnode, -1);

	// seg 0, no columns
	will_return(relpath, strdup(basepath));
	MakeAOSegmentFileName(&reldata, 0, -1, &fileSegNo, filepathname);
	assert_string_equal(filepathname, "base/21381/123");
	assert_int_equal(fileSegNo, 0);

	// seg 1, no columns
	will_return(relpath, strdup(basepath));
	MakeAOSegmentFileName(&reldata, 1, -1, &fileSegNo, filepathname);
	assert_string_equal(filepathname, "base/21381/123.1");
	assert_int_equal(fileSegNo, 1);

	// seg 0, column 1
	will_return(relpath, strdup(basepath));
	MakeAOSegmentFileName(&reldata, 0, 1, &fileSegNo, filepathname);
	assert_string_equal(filepathname, "base/21381/123.128");
	assert_int_equal(fileSegNo, 128);

	// seg 1, column 1
	will_return(relpath, strdup(basepath));
	MakeAOSegmentFileName(&reldata, 1, 1, &fileSegNo, filepathname);
	assert_string_equal(filepathname, "base/21381/123.129");
	assert_int_equal(fileSegNo, 129);

	// seg 0, column 2
	will_return(relpath, strdup(basepath));
	MakeAOSegmentFileName(&reldata, 0, 2, &fileSegNo, filepathname);
	assert_string_equal(filepathname, "base/21381/123.256");
	assert_int_equal(fileSegNo, 256);
#endif /* FIX_UNIT_TEST */
}
예제 #3
0
/*
 * Drops a segment file.
 *
 * Actually, we just truncate the segfile to 0 bytes, to reclaim the space.
 * Before GPDB 6, we used to remove the file, but with WAL replication, we
 * no longer have a convenient function to remove a single segment of a
 * relation. An empty file is as almost as good as a non-existent file. If
 * the relation is dropped later, the code in mdunlink() will remove all
 * segments, including any empty ones we've left behind.
 */
static void
AOCSCompaction_DropSegmentFile(Relation aorel,
							   int segno)
{
	int			col;

	Assert(RelationIsAoCols(aorel));

	for (col = 0; col < RelationGetNumberOfAttributes(aorel); col++)
	{
		char		filenamepath[MAXPGPATH];
		int			pseudoSegNo;
		File		fd;

		/* Open and truncate the relation segfile */
		MakeAOSegmentFileName(aorel, segno, col, &pseudoSegNo, filenamepath);

		elogif(Debug_appendonly_print_compaction, LOG,
			   "Drop segment file: "
			   "segno %d",
			   pseudoSegNo);

		fd = OpenAOSegmentFile(aorel, filenamepath, pseudoSegNo, 0);
		if (fd >= 0)
		{
			TruncateAOSegmentFile(fd, aorel, pseudoSegNo, 0);
			CloseAOSegmentFile(fd);
		}
		else
		{
			/*
			 * The file we were about to drop/truncate didn't exist. That's normal,
			 * for example, if a column is added with ALTER TABLE ADD COLUMN.
			 */
			elog(DEBUG1, "could not truncate segfile %s, because it does not exist", filenamepath);
		}
	}
}
예제 #4
0
/*
 * Open the next file segment for write.
 */
static void SetCurrentFileSegForWrite(ParquetInsertDesc parquetInsertDesc, ResultRelSegFileInfo *segfileinfo) {
	ParquetFileSegInfo *fsinfo;
	int32 fileSegNo;

	/* Make the 'segment' file name */
	MakeAOSegmentFileName(parquetInsertDesc->parquet_rel,
			parquetInsertDesc->cur_segno, -1, &fileSegNo,
			parquetInsertDesc->parquetFilePathName);
	Assert(
			strlen(parquetInsertDesc->parquetFilePathName) + 1 <=
			parquetInsertDesc->parquetFilePathNameMaxLen);

	/*
	 * In order to append to this file segment entry we must first
	 * acquire the relation parquet segment file (transaction-scope) lock (tag
	 * LOCKTAG_RELATION_APPENDONLY_SEGMENT_FILE) in order to guarantee
	 * stability of the pg_aoseg information on this segment file and exclusive right
	 * to append data to the segment file.
	 *
	 * NOTE: This is a transaction scope lock that must be held until commit / abort.
	 */
	LockRelationAppendOnlySegmentFile(&parquetInsertDesc->parquet_rel->rd_node,
			parquetInsertDesc->cur_segno, AccessExclusiveLock,
			/* dontWait */false);

	/* Now, get the information for the file segment we are going to append to. */
	parquetInsertDesc->fsInfo = (ParquetFileSegInfo *) palloc0(sizeof(ParquetFileSegInfo));

	/*
	 * in hawq, we cannot insert a new catalog entry and then update,
	 * since we cannot get the tid of added tuple.
	 * we should add the new catalog entry on master and then dispatch it to segments for update.
	 */
	Assert(parquetInsertDesc->fsInfo != NULL);
	Assert(segfileinfo->numfiles == 1);
	fsinfo = parquetInsertDesc->fsInfo;
	fsinfo->segno = segfileinfo->segno;
	fsinfo->tupcount = segfileinfo->tupcount;
	fsinfo->eof = segfileinfo->eof[0];
	fsinfo->eof_uncompressed = segfileinfo->uncompressed_eof[0];

	parquetInsertDesc->fileLen = (int64)fsinfo->eof;
	parquetInsertDesc->fileLen_uncompressed = (int64)fsinfo->eof_uncompressed;
	parquetInsertDesc->rowCount = fsinfo->tupcount;

	/* Open the existing file for write.*/
	OpenSegmentFile(
			parquetInsertDesc->mirroredOpen,
			parquetInsertDesc->parquetFilePathName, fsinfo->eof,
			&parquetInsertDesc->parquet_rel->rd_node,
			parquetInsertDesc->cur_segno, parquetInsertDesc->relname,
			&parquetInsertDesc->parquet_file,
			&parquetInsertDesc->file_previousmetadata,
			&parquetInsertDesc->protocol_read,
			parquetInsertDesc->parquet_rel->rd_att,
			&parquetInsertDesc->parquetMetadata,
			&parquetInsertDesc->fileLen,
			&parquetInsertDesc->fileLen_uncompressed,
			&parquetInsertDesc->previous_rowgroupcnt);

	initSerializeFooter(&(parquetInsertDesc->footerProtocol), parquetInsertDesc->parquetFilePathName);

}
예제 #5
0
/*
 * Open the next file segment to scan and allocate all resources needed for it.
 */
static bool
SetNextFileSegForRead(ParquetScanDesc scan)
{
	Relation		reln = scan->pqs_rd;
	int				segno = -1;
	int64			eof = 0;
	bool			finished_all_splits = true; /* assume */
	int32			fileSegNo;
	bool			parquetMetadataCorrect;
	bool			toOpenFile = true; // by default need to open segment file to read

	Assert(scan->pqs_need_new_split);   /* only call me when last segfile completed */
	Assert(!scan->pqs_done_all_splits); /* don't call me if I told you to stop */

	/*
	 * There is no guarantee that the current memory context will be preserved between calls,
	 * so switch to a safe memory context for retrieving compression information.
	 */
	MemoryContext oldMemoryContext = MemoryContextSwitchTo(scan->parquetScanInitContext);
	if (!scan->initedStorageRoutines)
	{
		ParquetStorageRead_Init(
							&scan->storageRead,
							scan->parquetScanInitContext,
							NameStr(scan->pqs_rd->rd_rel->relname),
							&scan->storageAttributes);

		ParquetRowGroupReader_Init(
							&scan->rowGroupReader,
							scan->pqs_rd,
							&scan->storageRead);

		scan->bufferDone = true; /* so we read a new buffer right away */

		scan->initedStorageRoutines = true;
	}

	/*
	 * Do we have more segment files to read or are we done?
	 */
	while (scan->pqs_splits_processed < list_length(scan->splits)) {
	    /* still have more segment files to read. get info of the next one */
	    FileSplit split =
	        (FileSplitNode *)list_nth(scan->splits, scan->pqs_splits_processed);

	    /* For splits within the same segment file, no need to reopen file */
	    if (scan->pqs_splits_processed > 0) {
	      FileSplit lastSplit = (FileSplitNode *)list_nth(
	          scan->splits, scan->pqs_splits_processed - 1);
	      if (split->segno == lastSplit->segno) {
	    	  /*
	    	   * if all rowgroups already processed, omit the remaining splits
	    	   */
	    	  if (scan->storageRead.rowGroupCount == scan->storageRead.rowGroupProcessedCount) {
	    		scan->pqs_splits_processed++;
	    		continue;
	        } else {
	        	toOpenFile = false;
	        }
	      }
	    }

	    scan->toCloseFile = true;
	    if (scan->pqs_splits_processed + 1 < list_length(scan->splits)) {
	      FileSplit nextSplit = (FileSplitNode *)list_nth(
	          scan->splits, scan->pqs_splits_processed + 1);
	      if (split->segno == nextSplit->segno)
	        scan->toCloseFile = false;
	    }

	    segno = split->segno;
		eof = split->logiceof;

		scan->pqs_splits_processed++;

		/*
		 * special case: we are the QD reading from a parquet table in utility mode
		 * (gp_dump). We see entries in the parquetseg table but no files or data
		 * actually exist. If we try to open this file we'll get an error, so
		 * we must skip to the next. For now, we can test if the file exists by
		 * looking at the eof value - it's always 0 on the QD.
		 */
		if(eof > 0)
		{
			finished_all_splits = false;
			break;
		}
	}

	if(finished_all_splits)
	{
		/* finished reading all segment files */
		scan->pqs_need_new_split = false;
		scan->pqs_done_all_splits = true;
		return false;
	}

	MakeAOSegmentFileName(reln, segno, -1, &fileSegNo, scan->pqs_filenamepath);
	Assert(strlen(scan->pqs_filenamepath) + 1 <= scan->pqs_filenamepath_maxlen);

	Assert(scan->initedStorageRoutines);

	if (toOpenFile) {
		/**need open files here*/
		ParquetStorageRead_OpenFile(
							&scan->storageRead,
							scan->pqs_filenamepath,
							eof,
							scan->pqs_rd->rd_att);

		parquetMetadataCorrect = ValidateParquetSegmentFile(scan->pqs_tupDesc,
				scan->hawqAttrToParquetColChunks, scan->storageRead.parquetMetadata);

		if(!parquetMetadataCorrect){
			elog(ERROR, "parquet metadata information conflicts with hawq table information");
		}
	}

	scan->pqs_need_new_split = false;

	if (Debug_appendonly_print_scan)
		elog(LOG,"Parquet scan initialize for table '%s', %u/%u/%u, segment file %u, EOF " INT64_FORMAT ", "
			 "(compression = %s)",
			 NameStr(scan->pqs_rd->rd_rel->relname),
			 scan->pqs_rd->rd_node.spcNode,
			 scan->pqs_rd->rd_node.dbNode,
			 scan->pqs_rd->rd_node.relNode,
			 segno,
			 eof,
			 (scan->storageAttributes.compress ? "true" : "false"));

	/* Switch back to caller's memory context. */
	MemoryContextSwitchTo(oldMemoryContext);

	return true;
}
예제 #6
0
/*
 * AOCSSegmentFileTruncateToEOF()
 *
 * Assumes that the segment file lock is already held.
 *
 * For the segment file is truncates to the eof.
 */
static void
AOCSSegmentFileTruncateToEOF(Relation aorel,
							 AOCSFileSegInfo *fsinfo)
{
	const char *relname = RelationGetRelationName(aorel);
	int			segno;
	int			j;

	Assert(fsinfo);
	Assert(RelationIsAoCols(aorel));

	segno = fsinfo->segno;
	relname = RelationGetRelationName(aorel);

	for (j = 0; j < fsinfo->vpinfo.nEntry; ++j)
	{
		int64		segeof;
		char		filenamepath[MAXPGPATH];
		AOCSVPInfoEntry *entry;
		File		fd;
		int32		fileSegNo;

		entry = getAOCSVPEntry(fsinfo, j);
		segeof = entry->eof;

		/* Open and truncate the relation segfile to its eof */
		MakeAOSegmentFileName(aorel, segno, j, &fileSegNo, filenamepath);

		elogif(Debug_appendonly_print_compaction, LOG,
			   "Opening AO COL relation \"%s.%s\", relation id %u, relfilenode %u column #%d, logical segment #%d (physical segment file #%d, logical EOF " INT64_FORMAT ")",
			   get_namespace_name(RelationGetNamespace(aorel)),
			   relname,
			   aorel->rd_id,
			   aorel->rd_node.relNode,
			   j,
			   segno,
			   fileSegNo,
			   segeof);

		fd = OpenAOSegmentFile(aorel, filenamepath, fileSegNo, segeof);
		if (fd >= 0)
		{
			TruncateAOSegmentFile(fd, aorel, fileSegNo, segeof);
			CloseAOSegmentFile(fd);

			elogif(Debug_appendonly_print_compaction, LOG,
				   "Successfully truncated AO COL relation \"%s.%s\", relation id %u, relfilenode %u column #%d, logical segment #%d (physical segment file #%d, logical EOF " INT64_FORMAT ")",
				   get_namespace_name(RelationGetNamespace(aorel)),
				   relname,
				   aorel->rd_id,
				   aorel->rd_node.relNode,
				   j,
				   segno,
				   fileSegNo,
				   segeof);
		}
		else
		{
			elogif(Debug_appendonly_print_compaction, LOG,
				   "No gp_relation_node entry for AO COL relation \"%s.%s\", relation id %u, relfilenode %u column #%d, logical segment #%d (physical segment file #%d, logical EOF " INT64_FORMAT ")",
				   get_namespace_name(RelationGetNamespace(aorel)),
				   relname,
				   aorel->rd_id,
				   aorel->rd_node.relNode,
				   j,
				   segno,
				   fileSegNo,
				   segeof);
		}
	}
}