int
writeEndofParquetMetadata(
		ParquetMetadata parquetMetadata,
		CompactProtocol *prot)
{
	uint32_t xfer = 0;
	/** write out key value metadata */
	/*hack here. The last field is rowgroup, the id should be 4*/
	setLastFieldId(prot, 4);
	xfer += writeFieldBegin(prot, T_LIST, 5);
	xfer += writeListBegin(prot, T_STRUCT, 1);
	xfer += writeStructBegin(prot);
	/*write out key*/
	xfer += writeFieldBegin(prot, T_STRING, 1);
	xfer += writeString(prot, "hawq.schema", strlen("hawq.schema"));
	/*write out value*/
	xfer += writeFieldBegin(prot, T_STRING, 2);
	xfer += writeString(prot, parquetMetadata->hawqschemastr, strlen(parquetMetadata->hawqschemastr));
	/*write out end of key value*/
	xfer += writeFieldStop(prot);
	xfer += writeStructEnd(prot);

	/*write out the file metadata field end identifier*/
	xfer += writeFieldStop(prot);
	xfer += writeStructEnd(prot);

	return xfer;
}
int
writeRowGroupInfo(
		struct BlockMetadata_4C* rowGroupInfo,
		CompactProtocol *prot)
{
	uint32_t xfer = 0;
	xfer += writeStructBegin(prot);

	/*write out the column chunk metadata*/
	xfer += writeFieldBegin(prot, T_LIST, 1);
	xfer += writeListBegin(prot, T_STRUCT, rowGroupInfo->ColChunkCount);
	for(int i = 0; i < rowGroupInfo->ColChunkCount; i++){
		/*write out each column chunk metadata*/
		xfer += writeColumnChunk(&(rowGroupInfo->columns[i]), prot);
	}

	/*write out total byte size*/
	xfer += writeFieldBegin(prot, T_I64, 2);
	xfer += writeI64(prot, rowGroupInfo->totalByteSize);

	/*write out num_rows*/
	xfer += writeFieldBegin(prot, T_I64, 3);
	xfer += writeI64(prot, rowGroupInfo->rowCount);

	xfer += writeFieldStop(prot);
	xfer += writeStructEnd(prot);
	return xfer;
}
int
writeColumnChunk(
		struct ColumnChunkMetadata_4C *columnInfo,
		CompactProtocol *prot)
{
	uint32_t xfer = 0;
	xfer += writeStructBegin(prot);

	/*write out column path*/
	if(columnInfo->path != NULL)
	{
		xfer += writeFieldBegin(prot, T_STRING, 1);
		xfer += writeString(prot, columnInfo->path, strlen(columnInfo->path));
	}

	/*write out file offset*/
	xfer += writeFieldBegin(prot, T_I64, 2);
	xfer += writeI64(prot, columnInfo->file_offset);

	/*write out column metadata*/
	xfer += writeFieldBegin(prot, T_STRUCT, 3);
	xfer += writeColumnMetadata(columnInfo, prot);

	xfer += writeFieldStop(prot);
	xfer += writeStructEnd(prot);

	return xfer;
}
Esempio n. 4
0
void CGenerator::BeginStruct()
{
  // verbose output
  writeRep("struct $(name)\n", stdout);
  writeStr("{\n", stdout);
  // do the work
  writeStructBegin();
}
/**
 * Write out begin of parquet file metadata (part before rowgroup),including version,
 * schema, and num_rows
 */
int
writePreviousParquetFileMetadata(
		ParquetMetadata parquetMetadata,
		char *fileName,
		File file,
		int rowgroupCnt,
		CompactProtocol **read_prot)
{
	uint32_t xfer = 0;
	CompactProtocol *write_prot = (struct CompactProtocol *) palloc0(sizeof(struct CompactProtocol));

	initCompactProtocol(write_prot, file, fileName, -1,
			PARQUET_FOOTER_BUFFERMODE_WRITE);

	xfer += writeStructBegin(write_prot);

	/*write out version*/
	xfer += writeFieldBegin(write_prot, T_I32, 1);
	xfer += writeI32(write_prot, (int32_t)parquetMetadata->version);

	/*write out schema*/
	xfer += writeFieldBegin(write_prot, T_LIST, 2);
	xfer += writeListBegin(write_prot, T_STRUCT, parquetMetadata->schemaTreeNodeCount + 1);
	xfer += writeSchemaElement(parquetMetadata->pfield, parquetMetadata->fieldCount, parquetMetadata->schemaTreeNodeCount, write_prot);

	/*write out number of rows*/
	xfer += writeFieldBegin(write_prot, T_I64, 3);
	xfer += writeI64(write_prot, (int64_t)parquetMetadata->num_rows);

	/*write out rowgroup size*/
	xfer += writeFieldBegin(write_prot, T_LIST, 4);
	xfer += writeListBegin(write_prot, T_STRUCT, parquetMetadata->blockCount);

	/*write out the previous row group metadata information before deserialize*/
	writePerviousRowGroupMetadata(rowgroupCnt, parquetMetadata, *read_prot, write_prot);

	/*append the first part of footer to file*/
	xfer = appendFooterBufferTempData(file, write_prot->footerProcessor);

	/*free the write protocol for first part of file*/
	freeCompactProtocol(write_prot);
	pfree(write_prot);

	/*if there is previous metadata, should end footer serializer*/
	if(rowgroupCnt != 0)
		endDeserializerFooter(parquetMetadata, read_prot);

	return xfer;
}
int
writeSchemaElement_Single(
		CompactProtocol *prot,
		PrimitiveTypeName *fieldType,
		int32_t type_length,
		RepetitionType *repetition_type,
		char *fieldName,
		int32_t num_children)
{
	uint32_t xfer = 0;
	xfer += writeStructBegin(prot);

	/*write out type*/
	if(fieldType){
		xfer += writeFieldBegin(prot, T_I32, 1);
		xfer += writeI32(prot, *(int32_t*)fieldType);
	}

	/*write out type length*/
	if(type_length != 0){
		xfer += writeFieldBegin(prot, T_I32, 2);
		xfer += writeI32(prot, type_length * 8);
	}

	/*write out repetition type. Is there repetition type for root??? Need verify*/
	if(repetition_type)
	{
		xfer += writeFieldBegin(prot, T_I32, 3);
		xfer += writeI32(prot, *repetition_type);
	}

	/*write out name*/
	xfer += writeFieldBegin(prot, T_STRING, 4);
	xfer += writeString(prot, fieldName, strlen(fieldName));

	/*write out number of children*/
	if(num_children != 0)
	{
		xfer += writeFieldBegin(prot, T_I32, 5);
		xfer += writeI32(prot, num_children);
	}

	/*no need to write out converted type, since there is no converted type
	 *in hawq parquet implemention.*/

	xfer += writeFieldStop(prot);
	xfer += writeStructEnd(prot);
	return xfer;
}
/**
 * Write part functions
 */
int
writeColumnMetadata(
		struct ColumnChunkMetadata_4C *columnInfo,
		CompactProtocol *prot)
{
	uint32_t xfer = 0;
	char *elemPath = NULL;
	const char *delim = ":";
	Assert(NULL != columnInfo->pathInSchema);
	char path[strlen(columnInfo->pathInSchema) + 1];

	xfer += writeStructBegin(prot);

	/*write out type*/
	xfer += writeFieldBegin(prot, T_I32, 1);
	xfer += writeI32(prot, columnInfo->type);

	/*write out encoding*/
	xfer += writeFieldBegin(prot, T_LIST, 2);
	xfer += writeListBegin(prot, T_I32, columnInfo->EncodingCount);
	for (int i = 0; i < columnInfo->EncodingCount; i++) {
		xfer += writeI32(prot, (int32_t)(columnInfo->pEncodings[i]));
	}

	/*write out path_in_schema*/
	xfer += writeFieldBegin(prot, T_LIST, 3);
	xfer += writeListBegin(prot, T_STRING, columnInfo->depth);
	strcpy(path, columnInfo->pathInSchema);

	elemPath = strtok(path, delim);
	if (elemPath == NULL) {
		ereport(ERROR,
				(errcode(ERRCODE_GP_INTERNAL_ERROR),
						errmsg("file metadata column metadata(path_in_schema) not correct")));
	}
	xfer += writeString(prot, elemPath, strlen(elemPath));
	for (int i = 1; i < columnInfo->depth; i++) {
		elemPath = strtok(NULL, delim);
		if (elemPath == NULL) {
			ereport(ERROR,
					(errcode(ERRCODE_GP_INTERNAL_ERROR),
							errmsg("file metadata column metadata(path_in_schema) not correct")));
		}
		xfer += writeString(prot, elemPath, strlen(elemPath));
	}

	/*write out codec*/
	xfer += writeFieldBegin(prot, T_I32, 4);
	xfer += writeI32(prot, (int32_t)columnInfo->codec);

	/*write out num of values*/
	xfer += writeFieldBegin(prot, T_I64, 5);
	xfer += writeI64(prot, (int64_t)columnInfo->valueCount);

	/*write total uncompressed size*/
	xfer += writeFieldBegin(prot, T_I64, 6);
	xfer += writeI64(prot, columnInfo->totalUncompressedSize);

	/*write out total compressed size*/
	xfer += writeFieldBegin(prot, T_I64, 7);
	xfer += writeI64(prot, columnInfo->totalSize);

	/*write out key value metadata.*/
	/*There's no key value metadata for parquet storage, don't need to write it out*/

	/*write out data page offset*/
	xfer += writeFieldBegin(prot, T_I64, 9);
	xfer += writeI64(prot, columnInfo->firstDataPage);

	/*write out index page offset and dictionary page offset. No need to write currently*/

	/*write out field stop identifier*/
	xfer += writeFieldStop(prot);
	xfer += writeStructEnd(prot);

	return xfer;
}