int writeEndofParquetMetadata( ParquetMetadata parquetMetadata, CompactProtocol *prot) { uint32_t xfer = 0; /** write out key value metadata */ /*hack here. The last field is rowgroup, the id should be 4*/ setLastFieldId(prot, 4); xfer += writeFieldBegin(prot, T_LIST, 5); xfer += writeListBegin(prot, T_STRUCT, 1); xfer += writeStructBegin(prot); /*write out key*/ xfer += writeFieldBegin(prot, T_STRING, 1); xfer += writeString(prot, "hawq.schema", strlen("hawq.schema")); /*write out value*/ xfer += writeFieldBegin(prot, T_STRING, 2); xfer += writeString(prot, parquetMetadata->hawqschemastr, strlen(parquetMetadata->hawqschemastr)); /*write out end of key value*/ xfer += writeFieldStop(prot); xfer += writeStructEnd(prot); /*write out the file metadata field end identifier*/ xfer += writeFieldStop(prot); xfer += writeStructEnd(prot); return xfer; }
int writeRowGroupInfo( struct BlockMetadata_4C* rowGroupInfo, CompactProtocol *prot) { uint32_t xfer = 0; xfer += writeStructBegin(prot); /*write out the column chunk metadata*/ xfer += writeFieldBegin(prot, T_LIST, 1); xfer += writeListBegin(prot, T_STRUCT, rowGroupInfo->ColChunkCount); for(int i = 0; i < rowGroupInfo->ColChunkCount; i++){ /*write out each column chunk metadata*/ xfer += writeColumnChunk(&(rowGroupInfo->columns[i]), prot); } /*write out total byte size*/ xfer += writeFieldBegin(prot, T_I64, 2); xfer += writeI64(prot, rowGroupInfo->totalByteSize); /*write out num_rows*/ xfer += writeFieldBegin(prot, T_I64, 3); xfer += writeI64(prot, rowGroupInfo->rowCount); xfer += writeFieldStop(prot); xfer += writeStructEnd(prot); return xfer; }
int writeColumnChunk( struct ColumnChunkMetadata_4C *columnInfo, CompactProtocol *prot) { uint32_t xfer = 0; xfer += writeStructBegin(prot); /*write out column path*/ if(columnInfo->path != NULL) { xfer += writeFieldBegin(prot, T_STRING, 1); xfer += writeString(prot, columnInfo->path, strlen(columnInfo->path)); } /*write out file offset*/ xfer += writeFieldBegin(prot, T_I64, 2); xfer += writeI64(prot, columnInfo->file_offset); /*write out column metadata*/ xfer += writeFieldBegin(prot, T_STRUCT, 3); xfer += writeColumnMetadata(columnInfo, prot); xfer += writeFieldStop(prot); xfer += writeStructEnd(prot); return xfer; }
void CGenerator::BeginStruct() { // verbose output writeRep("struct $(name)\n", stdout); writeStr("{\n", stdout); // do the work writeStructBegin(); }
/** * Write out begin of parquet file metadata (part before rowgroup),including version, * schema, and num_rows */ int writePreviousParquetFileMetadata( ParquetMetadata parquetMetadata, char *fileName, File file, int rowgroupCnt, CompactProtocol **read_prot) { uint32_t xfer = 0; CompactProtocol *write_prot = (struct CompactProtocol *) palloc0(sizeof(struct CompactProtocol)); initCompactProtocol(write_prot, file, fileName, -1, PARQUET_FOOTER_BUFFERMODE_WRITE); xfer += writeStructBegin(write_prot); /*write out version*/ xfer += writeFieldBegin(write_prot, T_I32, 1); xfer += writeI32(write_prot, (int32_t)parquetMetadata->version); /*write out schema*/ xfer += writeFieldBegin(write_prot, T_LIST, 2); xfer += writeListBegin(write_prot, T_STRUCT, parquetMetadata->schemaTreeNodeCount + 1); xfer += writeSchemaElement(parquetMetadata->pfield, parquetMetadata->fieldCount, parquetMetadata->schemaTreeNodeCount, write_prot); /*write out number of rows*/ xfer += writeFieldBegin(write_prot, T_I64, 3); xfer += writeI64(write_prot, (int64_t)parquetMetadata->num_rows); /*write out rowgroup size*/ xfer += writeFieldBegin(write_prot, T_LIST, 4); xfer += writeListBegin(write_prot, T_STRUCT, parquetMetadata->blockCount); /*write out the previous row group metadata information before deserialize*/ writePerviousRowGroupMetadata(rowgroupCnt, parquetMetadata, *read_prot, write_prot); /*append the first part of footer to file*/ xfer = appendFooterBufferTempData(file, write_prot->footerProcessor); /*free the write protocol for first part of file*/ freeCompactProtocol(write_prot); pfree(write_prot); /*if there is previous metadata, should end footer serializer*/ if(rowgroupCnt != 0) endDeserializerFooter(parquetMetadata, read_prot); return xfer; }
int writeSchemaElement_Single( CompactProtocol *prot, PrimitiveTypeName *fieldType, int32_t type_length, RepetitionType *repetition_type, char *fieldName, int32_t num_children) { uint32_t xfer = 0; xfer += writeStructBegin(prot); /*write out type*/ if(fieldType){ xfer += writeFieldBegin(prot, T_I32, 1); xfer += writeI32(prot, *(int32_t*)fieldType); } /*write out type length*/ if(type_length != 0){ xfer += writeFieldBegin(prot, T_I32, 2); xfer += writeI32(prot, type_length * 8); } /*write out repetition type. Is there repetition type for root??? Need verify*/ if(repetition_type) { xfer += writeFieldBegin(prot, T_I32, 3); xfer += writeI32(prot, *repetition_type); } /*write out name*/ xfer += writeFieldBegin(prot, T_STRING, 4); xfer += writeString(prot, fieldName, strlen(fieldName)); /*write out number of children*/ if(num_children != 0) { xfer += writeFieldBegin(prot, T_I32, 5); xfer += writeI32(prot, num_children); } /*no need to write out converted type, since there is no converted type *in hawq parquet implemention.*/ xfer += writeFieldStop(prot); xfer += writeStructEnd(prot); return xfer; }
/** * Write part functions */ int writeColumnMetadata( struct ColumnChunkMetadata_4C *columnInfo, CompactProtocol *prot) { uint32_t xfer = 0; char *elemPath = NULL; const char *delim = ":"; Assert(NULL != columnInfo->pathInSchema); char path[strlen(columnInfo->pathInSchema) + 1]; xfer += writeStructBegin(prot); /*write out type*/ xfer += writeFieldBegin(prot, T_I32, 1); xfer += writeI32(prot, columnInfo->type); /*write out encoding*/ xfer += writeFieldBegin(prot, T_LIST, 2); xfer += writeListBegin(prot, T_I32, columnInfo->EncodingCount); for (int i = 0; i < columnInfo->EncodingCount; i++) { xfer += writeI32(prot, (int32_t)(columnInfo->pEncodings[i])); } /*write out path_in_schema*/ xfer += writeFieldBegin(prot, T_LIST, 3); xfer += writeListBegin(prot, T_STRING, columnInfo->depth); strcpy(path, columnInfo->pathInSchema); elemPath = strtok(path, delim); if (elemPath == NULL) { ereport(ERROR, (errcode(ERRCODE_GP_INTERNAL_ERROR), errmsg("file metadata column metadata(path_in_schema) not correct"))); } xfer += writeString(prot, elemPath, strlen(elemPath)); for (int i = 1; i < columnInfo->depth; i++) { elemPath = strtok(NULL, delim); if (elemPath == NULL) { ereport(ERROR, (errcode(ERRCODE_GP_INTERNAL_ERROR), errmsg("file metadata column metadata(path_in_schema) not correct"))); } xfer += writeString(prot, elemPath, strlen(elemPath)); } /*write out codec*/ xfer += writeFieldBegin(prot, T_I32, 4); xfer += writeI32(prot, (int32_t)columnInfo->codec); /*write out num of values*/ xfer += writeFieldBegin(prot, T_I64, 5); xfer += writeI64(prot, (int64_t)columnInfo->valueCount); /*write total uncompressed size*/ xfer += writeFieldBegin(prot, T_I64, 6); xfer += writeI64(prot, columnInfo->totalUncompressedSize); /*write out total compressed size*/ xfer += writeFieldBegin(prot, T_I64, 7); xfer += writeI64(prot, columnInfo->totalSize); /*write out key value metadata.*/ /*There's no key value metadata for parquet storage, don't need to write it out*/ /*write out data page offset*/ xfer += writeFieldBegin(prot, T_I64, 9); xfer += writeI64(prot, columnInfo->firstDataPage); /*write out index page offset and dictionary page offset. No need to write currently*/ /*write out field stop identifier*/ xfer += writeFieldStop(prot); xfer += writeStructEnd(prot); return xfer; }