avro_schema_t avro_schema_union_branch_by_name (avro_schema_t unionp, int *branch_index, const char *name) { union { st_data_t data; int branch_index; } val; if (!st_lookup(avro_schema_to_union(unionp)->branches_byname, (st_data_t) name, &val.data)) { avro_set_error("No union branch named %s", name); return NULL; } if (branch_index != NULL) { *branch_index = val.branch_index; } return avro_schema_union_branch(unionp, val.branch_index); }
int schema_traverse(const avro_schema_t schema, json_t *json, json_t *dft, avro_value_t *current_val, int quiet, int strjson, size_t max_str_sz) { json = json ? json : dft; if (!json) { fprintf(stderr, "ERROR: Avro schema does not match JSON\n"); return 1; } switch (schema->type) { case AVRO_RECORD: { if (!json_is_object(json)) { if (!quiet) fprintf(stderr, "ERROR: Expecting JSON object for Avro record, got something else\n"); return 1; } int len = avro_schema_record_size(schema), i; for (i=0; i<len; i++) { const char *name = avro_schema_record_field_name(schema, i); avro_schema_t field_schema = avro_schema_record_field_get_by_index(schema, i); json_t *json_val = json_object_get(json, name); json_t *dft = avro_schema_record_field_default_get_by_index(schema, i); avro_value_t field; avro_value_get_by_index(current_val, i, &field, NULL); if (schema_traverse(field_schema, json_val, dft, &field, quiet, strjson, max_str_sz)) return 1; } } break; case AVRO_LINK: /* TODO */ fprintf(stderr, "ERROR: AVRO_LINK is not implemented\n"); return 1; break; case AVRO_STRING: if (!json_is_string(json)) { if (json && strjson) { /* -j specified, just dump the remaining json as string */ char * js = json_dumps(json, JSON_COMPACT|JSON_SORT_KEYS|JSON_ENCODE_ANY); if (max_str_sz && (strlen(js) > max_str_sz)) js[max_str_sz] = 0; /* truncate the string - this will result in invalid JSON! */ avro_value_set_string(current_val, js); free(js); break; } if (!quiet) fprintf(stderr, "ERROR: Expecting JSON string for Avro string, got something else\n"); return 1; } else { const char *js = json_string_value(json); if (max_str_sz && (strlen(js) > max_str_sz)) { /* truncate the string */ char *jst = malloc(strlen(js)); strcpy(jst, js); jst[max_str_sz] = 0; avro_value_set_string(current_val, jst); free(jst); } else avro_value_set_string(current_val, js); } break; case AVRO_BYTES: if (!json_is_string(json)) { if (!quiet) fprintf(stderr, "ERROR: Expecting JSON string for Avro string, got something else\n"); return 1; } /* NB: Jansson uses null-terminated strings, so embedded nulls are NOT supported, not even escaped ones */ const char *s = json_string_value(json); avro_value_set_bytes(current_val, (void *)s, strlen(s)); break; case AVRO_INT32: if (!json_is_integer(json)) { if (!quiet) fprintf(stderr, "ERROR: Expecting JSON integer for Avro int, got something else\n"); return 1; } avro_value_set_int(current_val, json_integer_value(json)); break; case AVRO_INT64: if (!json_is_integer(json)) { if (!quiet) fprintf(stderr, "ERROR: Expecting JSON integer for Avro long, got something else\n"); return 1; } avro_value_set_long(current_val, json_integer_value(json)); break; case AVRO_FLOAT: if (!json_is_number(json)) { if (!quiet) fprintf(stderr, "ERROR: Expecting JSON number for Avro float, got something else\n"); return 1; } avro_value_set_float(current_val, json_number_value(json)); break; case AVRO_DOUBLE: if (!json_is_number(json)) { if (!quiet) fprintf(stderr, "ERROR: Expecting JSON number for Avro double, got something else\n"); return 1; } avro_value_set_double(current_val, json_number_value(json)); break; case AVRO_BOOLEAN: if (!json_is_boolean(json)) { if (!quiet) fprintf(stderr, "ERROR: Expecting JSON boolean for Avro boolean, got something else\n"); return 1; } avro_value_set_boolean(current_val, json_is_true(json)); break; case AVRO_NULL: if (!json_is_null(json)) { if (!quiet) fprintf(stderr, "ERROR: Expecting JSON null for Avro null, got something else\n"); return 1; } avro_value_set_null(current_val); break; case AVRO_ENUM: // TODO ??? break; case AVRO_ARRAY: if (!json_is_array(json)) { if (!quiet) fprintf(stderr, "ERROR: Expecting JSON array for Avro array, got something else\n"); return 1; } else { int i, len = json_array_size(json); avro_schema_t items = avro_schema_array_items(schema); avro_value_t val; for (i=0; i<len; i++) { avro_value_append(current_val, &val, NULL); if (schema_traverse(items, json_array_get(json, i), NULL, &val, quiet, strjson, max_str_sz)) return 1; } } break; case AVRO_MAP: if (!json_is_object(json)) { if (!quiet) fprintf(stderr, "ERROR: Expecting JSON object for Avro map, got something else\n"); return 1; } else { avro_schema_t values = avro_schema_map_values(schema); void *iter = json_object_iter(json); avro_value_t val; while (iter) { avro_value_add(current_val, json_object_iter_key(iter), &val, 0, 0); if (schema_traverse(values, json_object_iter_value(iter), NULL, &val, quiet, strjson, max_str_sz)) return 1; iter = json_object_iter_next(json, iter); } } break; case AVRO_UNION: { int i; avro_value_t branch; for (i=0; i<avro_schema_union_size(schema); i++) { avro_value_set_branch(current_val, i, &branch); avro_schema_t type = avro_schema_union_branch(schema, i); if (!schema_traverse(type, json, NULL, &branch, 1, strjson, max_str_sz)) break; } if (i==avro_schema_union_size(schema)) { fprintf(stderr, "ERROR: No type in the Avro union matched the JSON type we got\n"); return 1; } break; } case AVRO_FIXED: if (!json_is_string(json)) { if (!quiet) fprintf(stderr, "ERROR: Expecting JSON string for Avro fixed, got something else\n"); return 1; } /* NB: Jansson uses null-terminated strings, so embedded nulls are NOT supported, not even escaped ones */ const char *f = json_string_value(json); if (avro_value_set_fixed(current_val, (void *)f, strlen(f))) { fprintf(stderr, "ERROR: Setting Avro fixed value FAILED\n"); return 1; } break; default: fprintf(stderr, "ERROR: Unknown type: %d\n", schema->type); return 1; } return 0; }
static int avro_schema_from_json_t(json_t *json, avro_schema_t *schema, st_table *named_schemas) { #ifdef _WIN32 #pragma message("#warning: Bug: '0' is not of type avro_type_t.") #else #warning "Bug: '0' is not of type avro_type_t." #endif /* We should really have an "AVRO_INVALID" type in * avro_type_t. Suppress warning below in which we set type to 0. */ avro_type_t type = (avro_type_t) 0; unsigned int i; avro_schema_t named_type = NULL; if (avro_type_from_json_t(json, &type, named_schemas, &named_type)) { return EINVAL; } switch (type) { case AVRO_LINK: *schema = avro_schema_link(named_type); break; case AVRO_STRING: *schema = avro_schema_string(); break; case AVRO_BYTES: *schema = avro_schema_bytes(); break; case AVRO_INT32: *schema = avro_schema_int(); break; case AVRO_INT64: *schema = avro_schema_long(); break; case AVRO_FLOAT: *schema = avro_schema_float(); break; case AVRO_DOUBLE: *schema = avro_schema_double(); break; case AVRO_BOOLEAN: *schema = avro_schema_boolean(); break; case AVRO_NULL: *schema = avro_schema_null(); break; case AVRO_RECORD: { json_t *json_name = json_object_get(json, "name"); json_t *json_namespace = json_object_get(json, "namespace"); json_t *json_fields = json_object_get(json, "fields"); unsigned int num_fields; const char *record_name; const char *record_namespace; if (!json_is_string(json_name)) { avro_set_error("Record type must have a \"name\""); return EINVAL; } if (!json_is_array(json_fields)) { avro_set_error("Record type must have \"fields\""); return EINVAL; } num_fields = json_array_size(json_fields); if (num_fields == 0) { avro_set_error("Record type must have at least one field"); return EINVAL; } record_name = json_string_value(json_name); if (!record_name) { avro_set_error("Record type must have a \"name\""); return EINVAL; } if (json_is_string(json_namespace)) { record_namespace = json_string_value(json_namespace); } else { record_namespace = NULL; } *schema = avro_schema_record(record_name, record_namespace); if (save_named_schemas(record_name, *schema, named_schemas)) { avro_set_error("Cannot save record schema"); return ENOMEM; } for (i = 0; i < num_fields; i++) { json_t *json_field = json_array_get(json_fields, i); json_t *json_field_name; json_t *json_field_type; json_t *json_default_value; avro_schema_t json_field_type_schema; int field_rval; if (!json_is_object(json_field)) { avro_set_error("Record field %d must be an array", i); avro_schema_decref(*schema); return EINVAL; } json_field_name = json_object_get(json_field, "name"); if (!json_field_name) { avro_set_error("Record field %d must have a \"name\"", i); avro_schema_decref(*schema); return EINVAL; } json_field_type = json_object_get(json_field, "type"); if (!json_field_type) { avro_set_error("Record field %d must have a \"type\"", i); avro_schema_decref(*schema); return EINVAL; } field_rval = avro_schema_from_json_t(json_field_type, &json_field_type_schema, named_schemas); if (field_rval) { avro_schema_decref(*schema); return field_rval; } json_default_value = json_object_get(json_field, "default"); avro_datum_t default_value = NULL; if (json_default_value) { avro_schema_t default_schema = json_field_type_schema; if (json_field_type_schema->type == AVRO_UNION) { // From the spec: "Default values for union fields correspond // to the first schema in the union." default_schema = avro_schema_union_branch(json_field_type_schema, 0); } default_value = json_t_to_avro_value(default_schema, json_default_value); if (default_value == NULL) { avro_schema_decref(*schema); return EINVAL; } } field_rval = avro_schema_record_field_append(*schema, json_string_value (json_field_name), json_field_type_schema, default_value); avro_schema_decref(json_field_type_schema); if (field_rval != 0) { avro_schema_decref(*schema); return field_rval; } } } break; case AVRO_ENUM: { json_t *json_name = json_object_get(json, "name"); json_t *json_symbols = json_object_get(json, "symbols"); const char *name; unsigned int num_symbols; if (!json_is_string(json_name)) { avro_set_error("Enum type must have a \"name\""); return EINVAL; } if (!json_is_array(json_symbols)) { avro_set_error("Enum type must have \"symbols\""); return EINVAL; } name = json_string_value(json_name); if (!name) { avro_set_error("Enum type must have a \"name\""); return EINVAL; } num_symbols = json_array_size(json_symbols); if (num_symbols == 0) { avro_set_error("Enum type must have at least one symbol"); return EINVAL; } *schema = avro_schema_enum(name); if (save_named_schemas(name, *schema, named_schemas)) { avro_set_error("Cannot save enum schema"); return ENOMEM; } for (i = 0; i < num_symbols; i++) { int enum_rval; json_t *json_symbol = json_array_get(json_symbols, i); const char *symbol; if (!json_is_string(json_symbol)) { avro_set_error("Enum symbol %d must be a string", i); avro_schema_decref(*schema); return EINVAL; } symbol = json_string_value(json_symbol); enum_rval = avro_schema_enum_symbol_append(*schema, symbol); if (enum_rval != 0) { avro_schema_decref(*schema); return enum_rval; } } } break; case AVRO_ARRAY: { int items_rval; json_t *json_items = json_object_get(json, "items"); avro_schema_t items_schema; if (!json_items) { avro_set_error("Array type must have \"items\""); return EINVAL; } items_rval = avro_schema_from_json_t(json_items, &items_schema, named_schemas); if (items_rval) { return items_rval; } *schema = avro_schema_array(items_schema); avro_schema_decref(items_schema); } break; case AVRO_MAP: { int values_rval; json_t *json_values = json_object_get(json, "values"); avro_schema_t values_schema; if (!json_values) { avro_set_error("Map type must have \"values\""); return EINVAL; } values_rval = avro_schema_from_json_t(json_values, &values_schema, named_schemas); if (values_rval) { return values_rval; } *schema = avro_schema_map(values_schema); avro_schema_decref(values_schema); } break; case AVRO_UNION: { unsigned int num_schemas = json_array_size(json); avro_schema_t s; if (num_schemas == 0) { avro_set_error("Union type must have at least one branch"); return EINVAL; } *schema = avro_schema_union(); for (i = 0; i < num_schemas; i++) { int schema_rval; json_t *schema_json = json_array_get(json, i); if (!schema_json) { avro_set_error("Cannot retrieve branch JSON"); return EINVAL; } schema_rval = avro_schema_from_json_t(schema_json, &s, named_schemas); if (schema_rval != 0) { avro_schema_decref(*schema); return schema_rval; } schema_rval = avro_schema_union_append(*schema, s); avro_schema_decref(s); if (schema_rval != 0) { avro_schema_decref(*schema); return schema_rval; } } } break; case AVRO_FIXED: { json_t *json_size = json_object_get(json, "size"); json_t *json_name = json_object_get(json, "name"); json_int_t size; const char *name; if (!json_is_integer(json_size)) { avro_set_error("Fixed type must have a \"size\""); return EINVAL; } if (!json_is_string(json_name)) { avro_set_error("Fixed type must have a \"name\""); return EINVAL; } size = json_integer_value(json_size); name = json_string_value(json_name); *schema = avro_schema_fixed(name, (int64_t) size); if (save_named_schemas(name, *schema, named_schemas)) { avro_set_error("Cannot save fixed schema"); return ENOMEM; } } break; default: avro_set_error("Unknown schema type"); return EINVAL; } return 0; }
PyObject * declare_types(ConvertInfo *info, avro_schema_t schema) { avro_type_t type = schema->type; switch (type) { case AVRO_NULL: /* PyNone_Type is not publicly visible */ return (PyObject *)Py_TYPE(Py_None); case AVRO_BOOLEAN: return (PyObject *)&PyBool_Type; case AVRO_BYTES: case AVRO_STRING: case AVRO_FIXED: return (PyObject *)&PyString_Type; case AVRO_DOUBLE: case AVRO_FLOAT: return (PyObject *)&PyFloat_Type; case AVRO_INT32: return (PyObject *)&PyInt_Type; case AVRO_ENUM: return get_python_enum_type(info->types, schema); case AVRO_INT64: return (PyObject *)&PyLong_Type; case AVRO_ARRAY: declare_types(info, avro_schema_array_items(schema)); return (PyObject *)&PyList_Type; case AVRO_MAP: declare_types(info, avro_schema_map_values(schema)); return (PyObject *)&PyDict_Type; case AVRO_UNION: { size_t union_size = avro_schema_union_size(schema); size_t i; for (i = 0; i < union_size; i++) { declare_types(info, avro_schema_union_branch(schema, i)); } } return (PyObject *)&PyBaseObject_Type; /* XXX list of types might be better */ case AVRO_RECORD: { size_t field_count = avro_schema_record_size(schema); size_t i; const char *record_name = avro_schema_name(schema); PyObject *record_type = PyObject_GetAttrString(info->types, record_name); PyObject *field_types; if (record_type != NULL) { /* already declared this record type */ Py_DECREF(record_type); return record_type; } /* create the Python type for this schema */ record_type = get_python_obj_type(info->types, schema); field_types = PyObject_GetAttrString(record_type, "_fieldtypes"); for (i = 0; i < field_count; i++) { PyObject *field_type = declare_types(info, avro_schema_record_field_get_by_index(schema, i)); /* this will INCREF, so takes hold of the object */ PyMapping_SetItemString(field_types, (char*)avro_schema_record_field_name(schema, i), field_type); } Py_DECREF(field_types); return record_type; } case AVRO_LINK: return declare_types(info, avro_schema_link_target(schema)); default: /* other types don't hold records */ break; } return NULL; }