int
writeSchemaElement_Single(
		CompactProtocol *prot,
		PrimitiveTypeName *fieldType,
		int32_t type_length,
		RepetitionType *repetition_type,
		char *fieldName,
		int32_t num_children)
{
	uint32_t xfer = 0;
	xfer += writeStructBegin(prot);

	/*write out type*/
	if(fieldType){
		xfer += writeFieldBegin(prot, T_I32, 1);
		xfer += writeI32(prot, *(int32_t*)fieldType);
	}

	/*write out type length*/
	if(type_length != 0){
		xfer += writeFieldBegin(prot, T_I32, 2);
		xfer += writeI32(prot, type_length * 8);
	}

	/*write out repetition type. Is there repetition type for root??? Need verify*/
	if(repetition_type)
	{
		xfer += writeFieldBegin(prot, T_I32, 3);
		xfer += writeI32(prot, *repetition_type);
	}

	/*write out name*/
	xfer += writeFieldBegin(prot, T_STRING, 4);
	xfer += writeString(prot, fieldName, strlen(fieldName));

	/*write out number of children*/
	if(num_children != 0)
	{
		xfer += writeFieldBegin(prot, T_I32, 5);
		xfer += writeI32(prot, num_children);
	}

	/*no need to write out converted type, since there is no converted type
	 *in hawq parquet implemention.*/

	xfer += writeFieldStop(prot);
	xfer += writeStructEnd(prot);
	return xfer;
}
Пример #2
0
uint32_t TBinaryProtocol::writeSetBegin(const TType elemType,
                                        const uint32_t size) {
  uint32_t wsize = 0;
  wsize += writeByte((int8_t)elemType);
  wsize += writeI32((int32_t)size);
  return wsize;
}
Пример #3
0
uint32_t TBinaryProtocol::writeString(const string& str) {
  uint32_t size = str.size();
  uint32_t result = writeI32((int32_t)size);
  if (size > 0) {
    trans_->write((uint8_t*)str.data(), size);
  }
  return result + size;
}
Пример #4
0
uint32_t TBinaryProtocol::writeMessageBegin(const std::string& name,
                                            const TMessageType messageType,
                                            const int32_t seqid) {
  if (strict_write_) {
    int32_t version = (VERSION_1) | ((int32_t)messageType);
    uint32_t wsize = 0;
    wsize += writeI32(version);
    wsize += writeString(name);
    wsize += writeI32(seqid);
    return wsize;
  } else {
    uint32_t wsize = 0;
    wsize += writeString(name);
    wsize += writeByte((int8_t)messageType);
    wsize += writeI32(seqid);
    return wsize;
  }
}
Пример #5
0
uint32_t TBinaryProtocol::writeMapBegin(const TType keyType,
                                        const TType valType,
                                        const uint32_t size) {
  uint32_t wsize = 0;
  wsize += writeByte((int8_t)keyType);
  wsize += writeByte((int8_t)valType);
  wsize += writeI32((int32_t)size);
  return wsize;
}
/**
 * Write out begin of parquet file metadata (part before rowgroup),including version,
 * schema, and num_rows
 */
int
writePreviousParquetFileMetadata(
		ParquetMetadata parquetMetadata,
		char *fileName,
		File file,
		int rowgroupCnt,
		CompactProtocol **read_prot)
{
	uint32_t xfer = 0;
	CompactProtocol *write_prot = (struct CompactProtocol *) palloc0(sizeof(struct CompactProtocol));

	initCompactProtocol(write_prot, file, fileName, -1,
			PARQUET_FOOTER_BUFFERMODE_WRITE);

	xfer += writeStructBegin(write_prot);

	/*write out version*/
	xfer += writeFieldBegin(write_prot, T_I32, 1);
	xfer += writeI32(write_prot, (int32_t)parquetMetadata->version);

	/*write out schema*/
	xfer += writeFieldBegin(write_prot, T_LIST, 2);
	xfer += writeListBegin(write_prot, T_STRUCT, parquetMetadata->schemaTreeNodeCount + 1);
	xfer += writeSchemaElement(parquetMetadata->pfield, parquetMetadata->fieldCount, parquetMetadata->schemaTreeNodeCount, write_prot);

	/*write out number of rows*/
	xfer += writeFieldBegin(write_prot, T_I64, 3);
	xfer += writeI64(write_prot, (int64_t)parquetMetadata->num_rows);

	/*write out rowgroup size*/
	xfer += writeFieldBegin(write_prot, T_LIST, 4);
	xfer += writeListBegin(write_prot, T_STRUCT, parquetMetadata->blockCount);

	/*write out the previous row group metadata information before deserialize*/
	writePerviousRowGroupMetadata(rowgroupCnt, parquetMetadata, *read_prot, write_prot);

	/*append the first part of footer to file*/
	xfer = appendFooterBufferTempData(file, write_prot->footerProcessor);

	/*free the write protocol for first part of file*/
	freeCompactProtocol(write_prot);
	pfree(write_prot);

	/*if there is previous metadata, should end footer serializer*/
	if(rowgroupCnt != 0)
		endDeserializerFooter(parquetMetadata, read_prot);

	return xfer;
}
/**
 * Write part functions
 */
int
writeColumnMetadata(
		struct ColumnChunkMetadata_4C *columnInfo,
		CompactProtocol *prot)
{
	uint32_t xfer = 0;
	char *elemPath = NULL;
	const char *delim = ":";
	Assert(NULL != columnInfo->pathInSchema);
	char path[strlen(columnInfo->pathInSchema) + 1];

	xfer += writeStructBegin(prot);

	/*write out type*/
	xfer += writeFieldBegin(prot, T_I32, 1);
	xfer += writeI32(prot, columnInfo->type);

	/*write out encoding*/
	xfer += writeFieldBegin(prot, T_LIST, 2);
	xfer += writeListBegin(prot, T_I32, columnInfo->EncodingCount);
	for (int i = 0; i < columnInfo->EncodingCount; i++) {
		xfer += writeI32(prot, (int32_t)(columnInfo->pEncodings[i]));
	}

	/*write out path_in_schema*/
	xfer += writeFieldBegin(prot, T_LIST, 3);
	xfer += writeListBegin(prot, T_STRING, columnInfo->depth);
	strcpy(path, columnInfo->pathInSchema);

	elemPath = strtok(path, delim);
	if (elemPath == NULL) {
		ereport(ERROR,
				(errcode(ERRCODE_GP_INTERNAL_ERROR),
						errmsg("file metadata column metadata(path_in_schema) not correct")));
	}
	xfer += writeString(prot, elemPath, strlen(elemPath));
	for (int i = 1; i < columnInfo->depth; i++) {
		elemPath = strtok(NULL, delim);
		if (elemPath == NULL) {
			ereport(ERROR,
					(errcode(ERRCODE_GP_INTERNAL_ERROR),
							errmsg("file metadata column metadata(path_in_schema) not correct")));
		}
		xfer += writeString(prot, elemPath, strlen(elemPath));
	}

	/*write out codec*/
	xfer += writeFieldBegin(prot, T_I32, 4);
	xfer += writeI32(prot, (int32_t)columnInfo->codec);

	/*write out num of values*/
	xfer += writeFieldBegin(prot, T_I64, 5);
	xfer += writeI64(prot, (int64_t)columnInfo->valueCount);

	/*write total uncompressed size*/
	xfer += writeFieldBegin(prot, T_I64, 6);
	xfer += writeI64(prot, columnInfo->totalUncompressedSize);

	/*write out total compressed size*/
	xfer += writeFieldBegin(prot, T_I64, 7);
	xfer += writeI64(prot, columnInfo->totalSize);

	/*write out key value metadata.*/
	/*There's no key value metadata for parquet storage, don't need to write it out*/

	/*write out data page offset*/
	xfer += writeFieldBegin(prot, T_I64, 9);
	xfer += writeI64(prot, columnInfo->firstDataPage);

	/*write out index page offset and dictionary page offset. No need to write currently*/

	/*write out field stop identifier*/
	xfer += writeFieldStop(prot);
	xfer += writeStructEnd(prot);

	return xfer;
}
Пример #8
0
static bool
output_val(PyObject* output, PyObject* value, TType type, PyObject* typeargs) {
    /*
     * Refcounting Strategy:
     *
     * We assume that elements of the thrift_spec tuple are not going to be
     * mutated, so we don't ref count those at all. Other than that, we try to
     * keep a reference to all the user-created objects while we work with them.
     * output_val assumes that a reference is already held. The *caller* is
     * responsible for handling references
     */

    switch (type) {

    case T_BOOL: {
        int v = PyObject_IsTrue(value);
        if (v == -1) {
            return false;
        }

        writeByte(output, (int8_t) v);
        break;
    }
    case T_I08: {
        int32_t val;

        if (!parse_pyint(value, &val, INT8_MIN, INT8_MAX)) {
            return false;
        }

        writeByte(output, (int8_t) val);
        break;
    }
    case T_I16: {
        int32_t val;

        if (!parse_pyint(value, &val, INT16_MIN, INT16_MAX)) {
            return false;
        }

        writeI16(output, (int16_t) val);
        break;
    }
    case T_I32: {
        int32_t val;

        if (!parse_pyint(value, &val, INT32_MIN, INT32_MAX)) {
            return false;
        }

        writeI32(output, val);
        break;
    }
    case T_I64: {
        int64_t nval = PyLong_AsLongLong(value);

        if (INT_CONV_ERROR_OCCURRED(nval)) {
            return false;
        }

        if (!CHECK_RANGE(nval, INT64_MIN, INT64_MAX)) {
            PyErr_SetString(PyExc_OverflowError, "int out of range");
            return false;
        }

        writeI64(output, nval);
        break;
    }

    case T_DOUBLE: {
        double nval = PyFloat_AsDouble(value);
        if (nval == -1.0 && PyErr_Occurred()) {
            return false;
        }

        writeDouble(output, nval);
        break;
    }

    case T_STRING: {
        Py_ssize_t len = 0;
        if (is_utf8(typeargs) && PyUnicode_Check(value))
            value = PyUnicode_AsUTF8String(value);
        len = PyString_Size(value);

        if (!check_ssize_t_32(len)) {
            return false;
        }

        writeI32(output, (int32_t) len);
        PycStringIO->cwrite(output, PyString_AsString(value), (int32_t) len);
        break;
    }

    case T_LIST:
    case T_SET: {
        Py_ssize_t len;
        SetListTypeArgs parsedargs;
        PyObject *item;
        PyObject *iterator;

        if (!parse_set_list_args(&parsedargs, typeargs)) {
            return false;
        }

        len = PyObject_Length(value);

        if (!check_ssize_t_32(len)) {
            return false;
        }

        writeByte(output, parsedargs.element_type);
        writeI32(output, (int32_t) len);

        iterator =  PyObject_GetIter(value);
        if (iterator == NULL) {
            return false;
        }

        while ((item = PyIter_Next(iterator))) {
            if (!output_val(output, item, parsedargs.element_type, parsedargs.typeargs)) {
                Py_DECREF(item);
                Py_DECREF(iterator);
                return false;
            }
            Py_DECREF(item);
        }

        Py_DECREF(iterator);

        if (PyErr_Occurred()) {
            return false;
        }

        break;
    }

    case T_MAP: {
        PyObject *k, *v;
        Py_ssize_t pos = 0;
        Py_ssize_t len;

        MapTypeArgs parsedargs;

        len = PyDict_Size(value);
        if (!check_ssize_t_32(len)) {
            return false;
        }

        if (!parse_map_args(&parsedargs, typeargs)) {
            return false;
        }

        writeByte(output, parsedargs.ktag);
        writeByte(output, parsedargs.vtag);
        writeI32(output, len);

        // TODO(bmaurer): should support any mapping, not just dicts
        while (PyDict_Next(value, &pos, &k, &v)) {
            // TODO(dreiss): Think hard about whether these INCREFs actually
            //               turn any unsafe scenarios into safe scenarios.
            Py_INCREF(k);
            Py_INCREF(v);

            if (!output_val(output, k, parsedargs.ktag, parsedargs.ktypeargs)
                    || !output_val(output, v, parsedargs.vtag, parsedargs.vtypeargs)) {
                Py_DECREF(k);
                Py_DECREF(v);
                return false;
            }
            Py_DECREF(k);
            Py_DECREF(v);
        }
        break;
    }

    // TODO(dreiss): Consider breaking this out as a function
    //               the way we did for decode_struct.
    case T_STRUCT: {
        StructTypeArgs parsedargs;
        Py_ssize_t nspec;
        Py_ssize_t i;

        if (!parse_struct_args(&parsedargs, typeargs)) {
            return false;
        }

        nspec = PyTuple_Size(parsedargs.spec);

        if (nspec == -1) {
            return false;
        }

        for (i = 0; i < nspec; i++) {
            StructItemSpec parsedspec;
            PyObject* spec_tuple;
            PyObject* instval = NULL;

            spec_tuple = PyTuple_GET_ITEM(parsedargs.spec, i);
            if (spec_tuple == Py_None) {
                continue;
            }

            if (!parse_struct_item_spec (&parsedspec, spec_tuple)) {
                return false;
            }

            instval = PyObject_GetAttr(value, parsedspec.attrname);

            if (!instval) {
                return false;
            }

            if (instval == Py_None) {
                Py_DECREF(instval);
                continue;
            }

            writeByte(output, (int8_t) parsedspec.type);
            writeI16(output, parsedspec.tag);

            if (!output_val(output, instval, parsedspec.type, parsedspec.typeargs)) {
                Py_DECREF(instval);
                return false;
            }

            Py_DECREF(instval);
        }

        writeByte(output, (int8_t)T_STOP);
        break;
    }

    case T_STOP:
    case T_VOID:
    case T_UTF16:
    case T_UTF8:
    case T_U64:
    default:
        PyErr_SetString(PyExc_TypeError, "Unexpected TType");
        return false;

    }

    return true;
}