예제 #1
0
파일: schema.c 프로젝트: AlexChen12/avro
avro_schema_t avro_schema_union_branch_by_name
(avro_schema_t unionp, int *branch_index, const char *name)
{
	union {
		st_data_t data;
		int  branch_index;
	} val;

	if (!st_lookup(avro_schema_to_union(unionp)->branches_byname,
		       (st_data_t) name, &val.data)) {
		avro_set_error("No union branch named %s", name);
		return NULL;
	}

	if (branch_index != NULL) {
		*branch_index = val.branch_index;
	}
	return avro_schema_union_branch(unionp, val.branch_index);
}
예제 #2
0
int schema_traverse(const avro_schema_t schema, json_t *json, json_t *dft,
                    avro_value_t *current_val, int quiet, int strjson, size_t max_str_sz) {

    json = json ? json : dft;
    if (!json) {
        fprintf(stderr, "ERROR: Avro schema does not match JSON\n");
        return 1;
    }

    switch (schema->type) {
    case AVRO_RECORD:
    {
        if (!json_is_object(json)) {
            if (!quiet)
                fprintf(stderr, "ERROR: Expecting JSON object for Avro record, got something else\n");
            return 1;
        }

        int len = avro_schema_record_size(schema), i;
        for (i=0; i<len; i++) {

            const char *name = avro_schema_record_field_name(schema, i);
            avro_schema_t field_schema = avro_schema_record_field_get_by_index(schema, i);

            json_t *json_val = json_object_get(json, name);
            json_t *dft = avro_schema_record_field_default_get_by_index(schema, i);

            avro_value_t field;
            avro_value_get_by_index(current_val, i, &field, NULL);

            if (schema_traverse(field_schema, json_val, dft, &field, quiet, strjson, max_str_sz))
                return 1;
        }
    } break;

    case AVRO_LINK:
        /* TODO */
        fprintf(stderr, "ERROR: AVRO_LINK is not implemented\n");
        return 1;
        break;

    case AVRO_STRING:
        if (!json_is_string(json)) {
            if (json && strjson) {
                /* -j specified, just dump the remaining json as string */
                char * js = json_dumps(json, JSON_COMPACT|JSON_SORT_KEYS|JSON_ENCODE_ANY);
                if (max_str_sz && (strlen(js) > max_str_sz))
                    js[max_str_sz] = 0; /* truncate the string - this will result in invalid JSON! */
                avro_value_set_string(current_val, js);
                free(js);
                break;
            }
            if (!quiet)
                fprintf(stderr, "ERROR: Expecting JSON string for Avro string, got something else\n");
            return 1;
        } else {
            const char *js = json_string_value(json);
            if (max_str_sz && (strlen(js) > max_str_sz)) {
                /* truncate the string */
                char *jst = malloc(strlen(js));
                strcpy(jst, js);
                jst[max_str_sz] = 0;
                avro_value_set_string(current_val, jst);
                free(jst);
            } else
                avro_value_set_string(current_val, js);
        }
        break;

    case AVRO_BYTES:
        if (!json_is_string(json)) {
            if (!quiet)
                fprintf(stderr, "ERROR: Expecting JSON string for Avro string, got something else\n");
            return 1;
        }
        /* NB: Jansson uses null-terminated strings, so embedded nulls are NOT
           supported, not even escaped ones */
        const char *s = json_string_value(json);
        avro_value_set_bytes(current_val, (void *)s, strlen(s));
        break;

    case AVRO_INT32:
        if (!json_is_integer(json)) {
            if (!quiet)
                fprintf(stderr, "ERROR: Expecting JSON integer for Avro int, got something else\n");
            return 1;
        }
        avro_value_set_int(current_val, json_integer_value(json));
        break;

    case AVRO_INT64:
        if (!json_is_integer(json)) {
            if (!quiet)
                fprintf(stderr, "ERROR: Expecting JSON integer for Avro long, got something else\n");
            return 1;
        }
        avro_value_set_long(current_val, json_integer_value(json));
        break;

    case AVRO_FLOAT:
        if (!json_is_number(json)) {
            if (!quiet)
                fprintf(stderr, "ERROR: Expecting JSON number for Avro float, got something else\n");
            return 1;
        }
        avro_value_set_float(current_val, json_number_value(json));
        break;

    case AVRO_DOUBLE:
        if (!json_is_number(json)) {
            if (!quiet)
                fprintf(stderr, "ERROR: Expecting JSON number for Avro double, got something else\n");
            return 1;
        }
        avro_value_set_double(current_val, json_number_value(json));
        break;

    case AVRO_BOOLEAN:
        if (!json_is_boolean(json)) {
            if (!quiet)
                fprintf(stderr, "ERROR: Expecting JSON boolean for Avro boolean, got something else\n");
            return 1;
        }
        avro_value_set_boolean(current_val, json_is_true(json));
        break;

    case AVRO_NULL:
        if (!json_is_null(json)) {
            if (!quiet)
                fprintf(stderr, "ERROR: Expecting JSON null for Avro null, got something else\n");
            return 1;
        }
        avro_value_set_null(current_val);
        break;

    case AVRO_ENUM:
        // TODO ???
        break;

    case AVRO_ARRAY:
        if (!json_is_array(json)) {
            if (!quiet)
                fprintf(stderr, "ERROR: Expecting JSON array for Avro array, got something else\n");
            return 1;
        } else {
            int i, len = json_array_size(json);
            avro_schema_t items = avro_schema_array_items(schema);
            avro_value_t val;
            for (i=0; i<len; i++) {
                avro_value_append(current_val, &val, NULL);
                if (schema_traverse(items, json_array_get(json, i), NULL, &val, quiet, strjson, max_str_sz))
                    return 1;
            }
        }
        break;

    case AVRO_MAP:
        if (!json_is_object(json)) {
            if (!quiet)
                fprintf(stderr, "ERROR: Expecting JSON object for Avro map, got something else\n");
            return 1;
        } else {
            avro_schema_t values = avro_schema_map_values(schema);
            void *iter = json_object_iter(json);
            avro_value_t val;
            while (iter) {
                avro_value_add(current_val, json_object_iter_key(iter), &val, 0, 0);
                if (schema_traverse(values, json_object_iter_value(iter), NULL, &val, quiet, strjson, max_str_sz))
                    return 1;
                iter = json_object_iter_next(json, iter);
            }
        }
        break;

    case AVRO_UNION:
    {
        int i;
        avro_value_t branch;
        for (i=0; i<avro_schema_union_size(schema); i++) {
            avro_value_set_branch(current_val, i, &branch);
            avro_schema_t type = avro_schema_union_branch(schema, i);
            if (!schema_traverse(type, json, NULL, &branch, 1, strjson, max_str_sz))
                break;
        }
        if (i==avro_schema_union_size(schema)) {
            fprintf(stderr, "ERROR: No type in the Avro union matched the JSON type we got\n");
            return 1;
        }
        break;
    }
    case AVRO_FIXED:
        if (!json_is_string(json)) {
            if (!quiet)
                fprintf(stderr, "ERROR: Expecting JSON string for Avro fixed, got something else\n");
            return 1;
        }
        /* NB: Jansson uses null-terminated strings, so embedded nulls are NOT
           supported, not even escaped ones */
        const char *f = json_string_value(json);
        if (avro_value_set_fixed(current_val, (void *)f, strlen(f))) {
            fprintf(stderr, "ERROR: Setting Avro fixed value FAILED\n");
            return 1;
        }
        break;

    default:
        fprintf(stderr, "ERROR: Unknown type: %d\n", schema->type);
        return 1;
    }
    return 0;
}
예제 #3
0
파일: schema.c 프로젝트: 7472741/impala
static int
avro_schema_from_json_t(json_t *json, avro_schema_t *schema,
			st_table *named_schemas)
{
#ifdef _WIN32
 #pragma message("#warning: Bug: '0' is not of type avro_type_t.")
#else
 #warning "Bug: '0' is not of type avro_type_t."
#endif
  /* We should really have an "AVRO_INVALID" type in
   * avro_type_t. Suppress warning below in which we set type to 0.
   */
	avro_type_t type = (avro_type_t) 0;
	unsigned int i;
	avro_schema_t named_type = NULL;

	if (avro_type_from_json_t(json, &type, named_schemas, &named_type)) {
		return EINVAL;
	}

	switch (type) {
	case AVRO_LINK:
		*schema = avro_schema_link(named_type);
		break;

	case AVRO_STRING:
		*schema = avro_schema_string();
		break;

	case AVRO_BYTES:
		*schema = avro_schema_bytes();
		break;

	case AVRO_INT32:
		*schema = avro_schema_int();
		break;

	case AVRO_INT64:
		*schema = avro_schema_long();
		break;

	case AVRO_FLOAT:
		*schema = avro_schema_float();
		break;

	case AVRO_DOUBLE:
		*schema = avro_schema_double();
		break;

	case AVRO_BOOLEAN:
		*schema = avro_schema_boolean();
		break;

	case AVRO_NULL:
		*schema = avro_schema_null();
		break;

	case AVRO_RECORD:
		{
			json_t *json_name = json_object_get(json, "name");
			json_t *json_namespace =
			    json_object_get(json, "namespace");
			json_t *json_fields = json_object_get(json, "fields");
			unsigned int num_fields;
			const char *record_name;
			const char *record_namespace;

			if (!json_is_string(json_name)) {
				avro_set_error("Record type must have a \"name\"");
				return EINVAL;
			}
			if (!json_is_array(json_fields)) {
				avro_set_error("Record type must have \"fields\"");
				return EINVAL;
			}
			num_fields = json_array_size(json_fields);
			if (num_fields == 0) {
				avro_set_error("Record type must have at least one field");
				return EINVAL;
			}
			record_name = json_string_value(json_name);
			if (!record_name) {
				avro_set_error("Record type must have a \"name\"");
				return EINVAL;
			}
			if (json_is_string(json_namespace)) {
				record_namespace =
				    json_string_value(json_namespace);
			} else {
				record_namespace = NULL;
			}
			*schema =
			    avro_schema_record(record_name, record_namespace);
			if (save_named_schemas(record_name, *schema, named_schemas)) {
				avro_set_error("Cannot save record schema");
				return ENOMEM;
			}
			for (i = 0; i < num_fields; i++) {
				json_t *json_field =
				    json_array_get(json_fields, i);
				json_t *json_field_name;
				json_t *json_field_type;
				json_t *json_default_value;
				avro_schema_t json_field_type_schema;
				int field_rval;

				if (!json_is_object(json_field)) {
					avro_set_error("Record field %d must be an array", i);
					avro_schema_decref(*schema);
					return EINVAL;
				}
				json_field_name =
				    json_object_get(json_field, "name");
				if (!json_field_name) {
					avro_set_error("Record field %d must have a \"name\"", i);
					avro_schema_decref(*schema);
					return EINVAL;
				}
				json_field_type =
				    json_object_get(json_field, "type");
				if (!json_field_type) {
					avro_set_error("Record field %d must have a \"type\"", i);
					avro_schema_decref(*schema);
					return EINVAL;
				}
				field_rval =
				    avro_schema_from_json_t(json_field_type,
							    &json_field_type_schema,
							    named_schemas);
				if (field_rval) {
					avro_schema_decref(*schema);
					return field_rval;
				}
				json_default_value =
						json_object_get(json_field, "default");
				avro_datum_t default_value = NULL;
				if (json_default_value) {
					avro_schema_t default_schema = json_field_type_schema;
					if (json_field_type_schema->type == AVRO_UNION) {
						// From the spec: "Default values for union fields correspond
						// to the first schema in the union."
						default_schema =
								avro_schema_union_branch(json_field_type_schema, 0);
					}
					default_value =
							json_t_to_avro_value(default_schema, json_default_value);
					if (default_value == NULL) {
						avro_schema_decref(*schema);
						return EINVAL;
					}
				}
				field_rval =
				    avro_schema_record_field_append(*schema,
								    json_string_value
								    (json_field_name),
										json_field_type_schema,
										default_value);
				avro_schema_decref(json_field_type_schema);
				if (field_rval != 0) {
					avro_schema_decref(*schema);
					return field_rval;
				}
			}
		}
		break;

	case AVRO_ENUM:
		{
			json_t *json_name = json_object_get(json, "name");
			json_t *json_symbols = json_object_get(json, "symbols");
			const char *name;
			unsigned int num_symbols;

			if (!json_is_string(json_name)) {
				avro_set_error("Enum type must have a \"name\"");
				return EINVAL;
			}
			if (!json_is_array(json_symbols)) {
				avro_set_error("Enum type must have \"symbols\"");
				return EINVAL;
			}

			name = json_string_value(json_name);
			if (!name) {
				avro_set_error("Enum type must have a \"name\"");
				return EINVAL;
			}
			num_symbols = json_array_size(json_symbols);
			if (num_symbols == 0) {
				avro_set_error("Enum type must have at least one symbol");
				return EINVAL;
			}
			*schema = avro_schema_enum(name);
			if (save_named_schemas(name, *schema, named_schemas)) {
				avro_set_error("Cannot save enum schema");
				return ENOMEM;
			}
			for (i = 0; i < num_symbols; i++) {
				int enum_rval;
				json_t *json_symbol =
				    json_array_get(json_symbols, i);
				const char *symbol;
				if (!json_is_string(json_symbol)) {
					avro_set_error("Enum symbol %d must be a string", i);
					avro_schema_decref(*schema);
					return EINVAL;
				}
				symbol = json_string_value(json_symbol);
				enum_rval =
				    avro_schema_enum_symbol_append(*schema,
								   symbol);
				if (enum_rval != 0) {
					avro_schema_decref(*schema);
					return enum_rval;
				}
			}
		}
		break;

	case AVRO_ARRAY:
		{
			int items_rval;
			json_t *json_items = json_object_get(json, "items");
			avro_schema_t items_schema;
			if (!json_items) {
				avro_set_error("Array type must have \"items\"");
				return EINVAL;
			}
			items_rval =
			    avro_schema_from_json_t(json_items, &items_schema,
						    named_schemas);
			if (items_rval) {
				return items_rval;
			}
			*schema = avro_schema_array(items_schema);
			avro_schema_decref(items_schema);
		}
		break;

	case AVRO_MAP:
		{
			int values_rval;
			json_t *json_values = json_object_get(json, "values");
			avro_schema_t values_schema;

			if (!json_values) {
				avro_set_error("Map type must have \"values\"");
				return EINVAL;
			}
			values_rval =
			    avro_schema_from_json_t(json_values, &values_schema,
						    named_schemas);
			if (values_rval) {
				return values_rval;
			}
			*schema = avro_schema_map(values_schema);
			avro_schema_decref(values_schema);
		}
		break;

	case AVRO_UNION:
		{
			unsigned int num_schemas = json_array_size(json);
			avro_schema_t s;
			if (num_schemas == 0) {
				avro_set_error("Union type must have at least one branch");
				return EINVAL;
			}
			*schema = avro_schema_union();
			for (i = 0; i < num_schemas; i++) {
				int schema_rval;
				json_t *schema_json = json_array_get(json, i);
				if (!schema_json) {
					avro_set_error("Cannot retrieve branch JSON");
					return EINVAL;
				}
				schema_rval =
				    avro_schema_from_json_t(schema_json, &s,
							    named_schemas);
				if (schema_rval != 0) {
					avro_schema_decref(*schema);
					return schema_rval;
				}
				schema_rval =
				    avro_schema_union_append(*schema, s);
				avro_schema_decref(s);
				if (schema_rval != 0) {
					avro_schema_decref(*schema);
					return schema_rval;
				}
			}
		}
		break;

	case AVRO_FIXED:
		{
			json_t *json_size = json_object_get(json, "size");
			json_t *json_name = json_object_get(json, "name");
			json_int_t size;
			const char *name;
			if (!json_is_integer(json_size)) {
				avro_set_error("Fixed type must have a \"size\"");
				return EINVAL;
			}
			if (!json_is_string(json_name)) {
				avro_set_error("Fixed type must have a \"name\"");
				return EINVAL;
			}
			size = json_integer_value(json_size);
			name = json_string_value(json_name);
			*schema = avro_schema_fixed(name, (int64_t) size);
			if (save_named_schemas(name, *schema, named_schemas)) {
				avro_set_error("Cannot save fixed schema");
				return ENOMEM;
			}
		}
		break;

	default:
		avro_set_error("Unknown schema type");
		return EINVAL;
	}
	return 0;
}
예제 #4
0
PyObject *
declare_types(ConvertInfo *info, avro_schema_t schema)
{
    avro_type_t type = schema->type;

    switch (type) {
    case AVRO_NULL:
        /* PyNone_Type is not publicly visible */
        return (PyObject *)Py_TYPE(Py_None);
    case AVRO_BOOLEAN:
        return (PyObject *)&PyBool_Type;
    case AVRO_BYTES:
    case AVRO_STRING:
    case AVRO_FIXED:
        return (PyObject *)&PyString_Type;
    case AVRO_DOUBLE:
    case AVRO_FLOAT:
        return (PyObject *)&PyFloat_Type;
    case AVRO_INT32:
        return (PyObject *)&PyInt_Type;
    case AVRO_ENUM:
        return get_python_enum_type(info->types, schema);
    case AVRO_INT64:
        return (PyObject *)&PyLong_Type;
    case AVRO_ARRAY:
        declare_types(info, avro_schema_array_items(schema));
        return (PyObject *)&PyList_Type;
    case AVRO_MAP:
        declare_types(info, avro_schema_map_values(schema));
        return (PyObject *)&PyDict_Type;
    case AVRO_UNION:
        {
            size_t union_size = avro_schema_union_size(schema);
            size_t i;
            for (i = 0; i < union_size; i++) {
                declare_types(info, avro_schema_union_branch(schema, i));
            }
        }
        return (PyObject *)&PyBaseObject_Type;  /* XXX list of types might be better */
    case AVRO_RECORD:
        {
            size_t field_count = avro_schema_record_size(schema);
            size_t i;
            const char *record_name = avro_schema_name(schema);
            PyObject *record_type = PyObject_GetAttrString(info->types, record_name);
            PyObject *field_types;
            if (record_type != NULL) {
                /* already declared this record type */
                Py_DECREF(record_type);
                return record_type;
            }
            /* create the Python type for this schema */
            record_type = get_python_obj_type(info->types, schema);
            field_types = PyObject_GetAttrString(record_type, "_fieldtypes");
            for (i = 0; i < field_count; i++) {
                PyObject *field_type = declare_types(info, avro_schema_record_field_get_by_index(schema, i));
                /* this will INCREF, so takes hold of the object */
                PyMapping_SetItemString(field_types, (char*)avro_schema_record_field_name(schema, i), field_type);
            }
            Py_DECREF(field_types);
            return record_type;
        }
    case AVRO_LINK:
        return declare_types(info, avro_schema_link_target(schema));
    default:
        /* other types don't hold records */
        break;
    }
    return NULL;
}