static void read_using_writer_schema(const char *filename) { avro_file_reader_t file; avro_schema_t writer_schema; avro_value_iface_t *writer_iface; avro_value_t writer_value; // Open an Avro file and grab the writer schema that was used to create the // file. check_i(avro_file_reader(filename, &file)); writer_schema = avro_file_reader_get_writer_schema(file); // Then create a value that is an instance of the writer schema. As above, // we use the built-in "generic" value implementation for the value instance // that will actually store the data. check_p(writer_iface = avro_generic_class_from_schema(writer_schema)); check_i(avro_generic_value_new(writer_iface, &writer_value)); // Read values from the file until we run out, printing the contents of each // one. Here, we can read directly into `writer_value` since we know that // it's an instance of the schema that was used to create the file. while (avro_file_reader_read_value(file, &writer_value) == 0) { avro_value_t field; int32_t a; int32_t b; check_i(avro_value_get_by_name(&writer_value, "a", &field, NULL)); check_i(avro_value_get_int(&field, &a)); check_i(avro_value_get_by_name(&writer_value, "b", &field, NULL)); check_i(avro_value_get_int(&field, &b)); printf(" a: %" PRId32 ", b: %" PRId32 "\n", a, b); } // Close the file and clean up after ourselves. avro_file_reader_close(file); avro_value_decref(&writer_value); avro_value_iface_decref(writer_iface); avro_schema_decref(writer_schema); }
static int read_data() { int rval; int records_read = 0; avro_file_reader_t reader; avro_value_iface_t *iface; avro_value_t value; fprintf(stderr, "\nReading...\n"); rval = avro_file_reader(filename, &reader); if (rval) { fprintf(stderr, "Error: %s\n", avro_strerror()); return -1; } avro_schema_t schema = avro_file_reader_get_writer_schema(reader); iface = avro_generic_class_from_schema(schema); avro_generic_value_new(iface, &value); while ((rval = avro_file_reader_read_value(reader, &value)) == 0) { avro_value_t field; int32_t val; avro_value_get_by_index(&value, 0, &field, NULL); avro_value_get_int(&field, &val); fprintf(stderr, "value = %d\n", val); records_read++; avro_value_reset(&value); } avro_value_decref(&value); avro_value_iface_decref(iface); avro_schema_decref(schema); avro_file_reader_close(reader); fprintf(stderr, "read %d records.\n", records_read); if (rval != EOF) { fprintf(stderr, "Error: %s\n", avro_strerror()); return -1; } return records_read; }
rkv_error_t r_kv_avro_value_get_int(avro_value_t *avro_value, const char *name, int *ret_value) { avro_value_t field; rkv_error_t ret = RKV_SUCCESS; int value = 0; if (!avro_value || !name || !ret_value) { return RKV_INVALID_ARGUEMENTS; } if (avro_value_get_by_name(avro_value, name, &field, NULL) != 0) { return RKV_INVALID_ARGUEMENTS; } if (avro_value_get_int(&field, &value) == EINVAL) { ret = RKV_INVALID_AVRO_SET_OP; } *ret_value = value; //avro_value_decref(&field); return ret; }
/* returns new reference */ PyObject * avro_to_python(ConvertInfo *info, avro_value_t *value) { avro_type_t type = avro_value_get_type(value); switch (type) { case AVRO_BOOLEAN: { int val; avro_value_get_boolean(value, &val); return PyBool_FromLong(val); } case AVRO_BYTES: { const void *buf; size_t size; avro_value_get_bytes(value, &buf, &size); /* got pointer into underlying value. no need to free */ return PyString_FromStringAndSize(buf, size); } case AVRO_DOUBLE: { double val; avro_value_get_double(value, &val); return PyFloat_FromDouble(val); } case AVRO_FLOAT: { float val; avro_value_get_float(value, &val); return PyFloat_FromDouble(val); } case AVRO_INT32: { int32_t val; avro_value_get_int(value, &val); return PyInt_FromLong(val); } case AVRO_INT64: { int64_t val; avro_value_get_long(value, &val); return PyLong_FromLongLong((PY_LONG_LONG)val); } case AVRO_NULL: { avro_value_get_null(value); Py_INCREF(Py_None); return Py_None; } case AVRO_STRING: { /* TODO: Convert the UTF-8 to the current * locale's character set */ const char *buf; size_t size; avro_value_get_string(value, &buf, &size); /* For strings, size includes the NUL terminator. */ return PyString_FromStringAndSize(buf, size - 1); } case AVRO_ARRAY: return array_to_python(info, value); case AVRO_ENUM: if (info->types == NULL) { /* just an int */ return enum_to_python(info, value); } else { /* a fancy object */ return enum_to_python_object(info, value); } case AVRO_FIXED: { const void *buf; size_t size; avro_value_get_fixed(value, &buf, &size); return PyString_FromStringAndSize((const char *)buf, size); } case AVRO_MAP: return map_to_python(info, value); case AVRO_RECORD: if (info->types == NULL) { /* just a dictionary */ return record_to_python(info, value); } else { /* a fancy object */ return record_to_python_object(info, value); } case AVRO_UNION: return union_to_python(info, value); default: return NULL; } }
static void read_with_schema_resolution(const char *filename, const char *reader_schema_json, const char *field_name) { avro_file_reader_t file; avro_schema_error_t error; avro_schema_t reader_schema; avro_schema_t writer_schema; avro_value_iface_t *writer_iface; avro_value_iface_t *reader_iface; avro_value_t writer_value; avro_value_t reader_value; // Open an Avro file and grab the writer schema that was used to create the // file. check_i(avro_file_reader(filename, &file)); writer_schema = avro_file_reader_get_writer_schema(file); // Create a value instance that we want to read the data into. Note that // this is *not* the writer schema! check_i(avro_schema_from_json (reader_schema_json, 0, &reader_schema, &error)); check_p(reader_iface = avro_generic_class_from_schema(reader_schema)); check_i(avro_generic_value_new(reader_iface, &reader_value)); // Create a resolved writer that will perform the schema resolution for us. // If the two schemas aren't compatible, this function will return an error, // and the error text should describe which parts of the schemas are // incompatible. check_p(writer_iface = avro_resolved_writer_new(writer_schema, reader_schema)); // Create an instance of the resolved writer, and tell it to wrap our reader // value instance. check_i(avro_resolved_writer_new_value(writer_iface, &writer_value)); avro_resolved_writer_set_dest(&writer_value, &reader_value); // Now we've got the same basic loop as above. But we've got two value // instances floating around! Which do we use? We have the file reader // fill in `writer_value`, since that's the value that is an instance of the // file's writer schema. Since it's an instance of a resolved writer, // though, it doesn't actually store any data itself. Instead, it will // perform schema resolution on the data read from the file, and fill in its // wrapped value (which in our case is `reader_value`). That means that // once the data has been read, we can get its (schema-resolved) contents // via `reader_value`. while (avro_file_reader_read_value(file, &writer_value) == 0) { avro_value_t field; int32_t value; check_i(avro_value_get_by_name(&reader_value, field_name, &field, NULL)); check_i(avro_value_get_int(&field, &value)); printf(" %s: %" PRId32 "\n", field_name, value); } // Close the file and clean up after ourselves. avro_file_reader_close(file); avro_value_decref(&writer_value); avro_value_iface_decref(writer_iface); avro_schema_decref(writer_schema); avro_value_decref(&reader_value); avro_value_iface_decref(reader_iface); avro_schema_decref(reader_schema); }
int avro_value_equal_fast(avro_value_t *val1, avro_value_t *val2) { avro_type_t type1 = avro_value_get_type(val1); avro_type_t type2 = avro_value_get_type(val2); if (type1 != type2) { return 0; } switch (type1) { case AVRO_BOOLEAN: { int v1; int v2; check_return(0, avro_value_get_boolean(val1, &v1)); check_return(0, avro_value_get_boolean(val2, &v2)); return (v1 == v2); } case AVRO_BYTES: { const void *buf1; const void *buf2; size_t size1; size_t size2; check_return(0, avro_value_get_bytes(val1, &buf1, &size1)); check_return(0, avro_value_get_bytes(val2, &buf2, &size2)); if (size1 != size2) { return 0; } return (memcmp(buf1, buf2, size1) == 0); } case AVRO_DOUBLE: { double v1; double v2; check_return(0, avro_value_get_double(val1, &v1)); check_return(0, avro_value_get_double(val2, &v2)); return (v1 == v2); } case AVRO_FLOAT: { float v1; float v2; check_return(0, avro_value_get_float(val1, &v1)); check_return(0, avro_value_get_float(val2, &v2)); return (v1 == v2); } case AVRO_INT32: { int32_t v1; int32_t v2; check_return(0, avro_value_get_int(val1, &v1)); check_return(0, avro_value_get_int(val2, &v2)); return (v1 == v2); } case AVRO_INT64: { int64_t v1; int64_t v2; check_return(0, avro_value_get_long(val1, &v1)); check_return(0, avro_value_get_long(val2, &v2)); return (v1 == v2); } case AVRO_NULL: { check_return(0, avro_value_get_null(val1)); check_return(0, avro_value_get_null(val2)); return 1; } case AVRO_STRING: { const char *buf1; const char *buf2; size_t size1; size_t size2; check_return(0, avro_value_get_string(val1, &buf1, &size1)); check_return(0, avro_value_get_string(val2, &buf2, &size2)); if (size1 != size2) { return 0; } return (memcmp(buf1, buf2, size1) == 0); } case AVRO_ARRAY: { size_t count1; size_t count2; check_return(0, avro_value_get_size(val1, &count1)); check_return(0, avro_value_get_size(val2, &count2)); if (count1 != count2) { return 0; } size_t i; for (i = 0; i < count1; i++) { avro_value_t child1; avro_value_t child2; check_return(0, avro_value_get_by_index (val1, i, &child1, NULL)); check_return(0, avro_value_get_by_index (val2, i, &child2, NULL)); if (!avro_value_equal_fast(&child1, &child2)) { return 0; } } return 1; } case AVRO_ENUM: { int v1; int v2; check_return(0, avro_value_get_enum(val1, &v1)); check_return(0, avro_value_get_enum(val2, &v2)); return (v1 == v2); } case AVRO_FIXED: { const void *buf1; const void *buf2; size_t size1; size_t size2; check_return(0, avro_value_get_fixed(val1, &buf1, &size1)); check_return(0, avro_value_get_fixed(val2, &buf2, &size2)); if (size1 != size2) { return 0; } return (memcmp(buf1, buf2, size1) == 0); } case AVRO_MAP: { size_t count1; size_t count2; check_return(0, avro_value_get_size(val1, &count1)); check_return(0, avro_value_get_size(val2, &count2)); if (count1 != count2) { return 0; } size_t i; for (i = 0; i < count1; i++) { avro_value_t child1; avro_value_t child2; const char *key1; check_return(0, avro_value_get_by_index (val1, i, &child1, &key1)); check_return(0, avro_value_get_by_name (val2, key1, &child2, NULL)); if (!avro_value_equal_fast(&child1, &child2)) { return 0; } } return 1; } case AVRO_RECORD: { size_t count1; check_return(0, avro_value_get_size(val1, &count1)); size_t i; for (i = 0; i < count1; i++) { avro_value_t child1; avro_value_t child2; check_return(0, avro_value_get_by_index (val1, i, &child1, NULL)); check_return(0, avro_value_get_by_index (val2, i, &child2, NULL)); if (!avro_value_equal_fast(&child1, &child2)) { return 0; } } return 1; } case AVRO_UNION: { int disc1; int disc2; check_return(0, avro_value_get_discriminant(val1, &disc1)); check_return(0, avro_value_get_discriminant(val2, &disc2)); if (disc1 != disc2) { return 0; } avro_value_t branch1; avro_value_t branch2; check_return(0, avro_value_get_current_branch(val1, &branch1)); check_return(0, avro_value_get_current_branch(val2, &branch2)); return avro_value_equal_fast(&branch1, &branch2); } default: return 0; } }
int avro_value_copy_fast(avro_value_t *dest, const avro_value_t *src) { avro_type_t dest_type = avro_value_get_type(dest); avro_type_t src_type = avro_value_get_type(src); if (dest_type != src_type) { return 0; } int rval; check(rval, avro_value_reset(dest)); switch (dest_type) { case AVRO_BOOLEAN: { int val; check(rval, avro_value_get_boolean(src, &val)); return avro_value_set_boolean(dest, val); } case AVRO_BYTES: { avro_wrapped_buffer_t val; check(rval, avro_value_grab_bytes(src, &val)); return avro_value_give_bytes(dest, &val); } case AVRO_DOUBLE: { double val; check(rval, avro_value_get_double(src, &val)); return avro_value_set_double(dest, val); } case AVRO_FLOAT: { float val; check(rval, avro_value_get_float(src, &val)); return avro_value_set_float(dest, val); } case AVRO_INT32: { int32_t val; check(rval, avro_value_get_int(src, &val)); return avro_value_set_int(dest, val); } case AVRO_INT64: { int64_t val; check(rval, avro_value_get_long(src, &val)); return avro_value_set_long(dest, val); } case AVRO_NULL: { check(rval, avro_value_get_null(src)); return avro_value_set_null(dest); } case AVRO_STRING: { avro_wrapped_buffer_t val; check(rval, avro_value_grab_string(src, &val)); return avro_value_give_string_len(dest, &val); } case AVRO_ARRAY: { size_t count; check(rval, avro_value_get_size(src, &count)); size_t i; for (i = 0; i < count; i++) { avro_value_t src_child; avro_value_t dest_child; check(rval, avro_value_get_by_index (src, i, &src_child, NULL)); check(rval, avro_value_append (dest, &dest_child, NULL)); check(rval, avro_value_copy_fast (&dest_child, &src_child)); } return 0; } case AVRO_ENUM: { int val; check(rval, avro_value_get_enum(src, &val)); return avro_value_set_enum(dest, val); } case AVRO_FIXED: { avro_wrapped_buffer_t val; check(rval, avro_value_grab_fixed(src, &val)); return avro_value_give_fixed(dest, &val); } case AVRO_MAP: { size_t count; check(rval, avro_value_get_size(src, &count)); size_t i; for (i = 0; i < count; i++) { avro_value_t src_child; avro_value_t dest_child; const char *key; check(rval, avro_value_get_by_index (src, i, &src_child, &key)); check(rval, avro_value_add (dest, key, &dest_child, NULL, NULL)); check(rval, avro_value_copy_fast (&dest_child, &src_child)); } return 0; } case AVRO_RECORD: { size_t count; check(rval, avro_value_get_size(src, &count)); size_t i; for (i = 0; i < count; i++) { avro_value_t src_child; avro_value_t dest_child; check(rval, avro_value_get_by_index (src, i, &src_child, NULL)); check(rval, avro_value_get_by_index (dest, i, &dest_child, NULL)); check(rval, avro_value_copy_fast (&dest_child, &src_child)); } return 0; } case AVRO_UNION: { int disc; check(rval, avro_value_get_discriminant(src, &disc)); avro_value_t src_branch; avro_value_t dest_branch; check(rval, avro_value_get_current_branch(src, &src_branch)); check(rval, avro_value_set_branch(dest, disc, &dest_branch)); return avro_value_copy_fast(&dest_branch, &src_branch); } default: return 0; } }
int avro_value_cmp_fast(avro_value_t *val1, avro_value_t *val2) { avro_type_t type1 = avro_value_get_type(val1); avro_type_t type2 = avro_value_get_type(val2); if (type1 != type2) { return -1; } switch (type1) { case AVRO_BOOLEAN: { int v1; int v2; check_return(0, avro_value_get_boolean(val1, &v1)); check_return(0, avro_value_get_boolean(val2, &v2)); return cmp(!!v1, !!v2); } case AVRO_BYTES: { const void *buf1; const void *buf2; size_t size1; size_t size2; size_t min_size; int result; check_return(0, avro_value_get_bytes(val1, &buf1, &size1)); check_return(0, avro_value_get_bytes(val2, &buf2, &size2)); min_size = (size1 < size2)? size1: size2; result = memcmp(buf1, buf2, min_size); if (result != 0) { return result; } else { return cmp(size1, size2); } } case AVRO_DOUBLE: { double v1; double v2; check_return(0, avro_value_get_double(val1, &v1)); check_return(0, avro_value_get_double(val2, &v2)); return cmp(v1, v2); } case AVRO_FLOAT: { float v1; float v2; check_return(0, avro_value_get_float(val1, &v1)); check_return(0, avro_value_get_float(val2, &v2)); return cmp(v1, v2); } case AVRO_INT32: { int32_t v1; int32_t v2; check_return(0, avro_value_get_int(val1, &v1)); check_return(0, avro_value_get_int(val2, &v2)); return cmp(v1, v2); } case AVRO_INT64: { int64_t v1; int64_t v2; check_return(0, avro_value_get_long(val1, &v1)); check_return(0, avro_value_get_long(val2, &v2)); return cmp(v1, v2); } case AVRO_NULL: { check_return(0, avro_value_get_null(val1)); check_return(0, avro_value_get_null(val2)); return 0; } case AVRO_STRING: { const char *buf1; const char *buf2; size_t size1; size_t size2; size_t min_size; int result; check_return(0, avro_value_get_string(val1, &buf1, &size1)); check_return(0, avro_value_get_string(val2, &buf2, &size2)); min_size = (size1 < size2)? size1: size2; result = memcmp(buf1, buf2, min_size); if (result != 0) { return result; } else { return cmp(size1, size2); } } case AVRO_ARRAY: { size_t count1; size_t count2; size_t min_count; size_t i; check_return(0, avro_value_get_size(val1, &count1)); check_return(0, avro_value_get_size(val2, &count2)); min_count = (count1 < count2)? count1: count2; for (i = 0; i < min_count; i++) { avro_value_t child1; avro_value_t child2; int result; check_return(0, avro_value_get_by_index (val1, i, &child1, NULL)); check_return(0, avro_value_get_by_index (val2, i, &child2, NULL)); result = avro_value_cmp_fast(&child1, &child2); if (result != 0) { return result; } } return cmp(count1, count2); } case AVRO_ENUM: { int v1; int v2; check_return(0, avro_value_get_enum(val1, &v1)); check_return(0, avro_value_get_enum(val2, &v2)); return cmp(v1, v2); } case AVRO_FIXED: { const void *buf1; const void *buf2; size_t size1; size_t size2; check_return(0, avro_value_get_fixed(val1, &buf1, &size1)); check_return(0, avro_value_get_fixed(val2, &buf2, &size2)); if (size1 != size2) { return -1; } return memcmp(buf1, buf2, size1); } case AVRO_MAP: { return -1; } case AVRO_RECORD: { size_t count1; check_return(0, avro_value_get_size(val1, &count1)); size_t i; for (i = 0; i < count1; i++) { avro_value_t child1; avro_value_t child2; int result; check_return(0, avro_value_get_by_index (val1, i, &child1, NULL)); check_return(0, avro_value_get_by_index (val2, i, &child2, NULL)); result = avro_value_cmp_fast(&child1, &child2); if (result != 0) { return result; } } return 0; } case AVRO_UNION: { int disc1; int disc2; check_return(0, avro_value_get_discriminant(val1, &disc1)); check_return(0, avro_value_get_discriminant(val2, &disc2)); if (disc1 == disc2) { avro_value_t branch1; avro_value_t branch2; check_return(0, avro_value_get_current_branch(val1, &branch1)); check_return(0, avro_value_get_current_branch(val2, &branch2)); return avro_value_cmp_fast(&branch1, &branch2); } else { return cmp(disc1, disc2); } } default: return 0; } }