int process_frame_table_schema(avro_value_t *record_val, frame_reader_t reader, uint64_t wal_pos) { int err = 0, key_schema_present; avro_value_t relid_val, hash_val, key_schema_val, row_schema_val, branch_val; int64_t relid; const void *hash; const char *key_schema_json = NULL, *row_schema_json; size_t hash_len, key_schema_len = 1, row_schema_len; avro_schema_t key_schema = NULL, row_schema; check(err, avro_value_get_by_index(record_val, 0, &relid_val, NULL)); check(err, avro_value_get_by_index(record_val, 1, &hash_val, NULL)); check(err, avro_value_get_by_index(record_val, 2, &key_schema_val, NULL)); check(err, avro_value_get_by_index(record_val, 3, &row_schema_val, NULL)); check(err, avro_value_get_long(&relid_val, &relid)); check(err, avro_value_get_fixed(&hash_val, &hash, &hash_len)); check(err, avro_value_get_discriminant(&key_schema_val, &key_schema_present)); check(err, avro_value_get_string(&row_schema_val, &row_schema_json, &row_schema_len)); check(err, avro_schema_from_json_length(row_schema_json, row_schema_len - 1, &row_schema)); schema_list_entry *entry = schema_list_replace(reader, relid); entry->relid = relid; entry->hash = *((uint64_t *) hash); entry->row_schema = row_schema; entry->row_iface = avro_generic_class_from_schema(row_schema); avro_generic_value_new(entry->row_iface, &entry->row_value); avro_generic_value_new(entry->row_iface, &entry->old_value); entry->avro_reader = avro_reader_memory(NULL, 0); if (key_schema_present) { check(err, avro_value_get_current_branch(&key_schema_val, &branch_val)); check(err, avro_value_get_string(&branch_val, &key_schema_json, &key_schema_len)); check(err, avro_schema_from_json_length(key_schema_json, key_schema_len - 1, &key_schema)); entry->key_schema = key_schema; entry->key_iface = avro_generic_class_from_schema(key_schema); avro_generic_value_new(entry->key_iface, &entry->key_value); } else { entry->key_schema = NULL; } if (reader->on_table_schema) { check(err, reader->on_table_schema(reader->cb_context, wal_pos, relid, key_schema_json, key_schema_len - 1, key_schema, row_schema_json, row_schema_len - 1, row_schema)); } return err; }
/* Returns 0 on success. On failure, sets mapper->error and returns nonzero. */ int table_metadata_update_schema(table_mapper_t mapper, table_metadata_t table, int is_key, const char* schema_json, size_t schema_len) { int prev_schema_id = is_key ? table->key_schema_id : table->row_schema_id; int schema_id = TABLE_MAPPER_SCHEMA_ID_MISSING; int err; if (mapper->registry) { err = schema_registry_request(mapper->registry, rd_kafka_topic_name(table->topic), is_key, schema_json, schema_len, &schema_id); if (err) { mapper_error(mapper, "Failed to register %s schema: %s", is_key ? "key" : "row", mapper->registry->error); return err; } table_metadata_set_schema_id(table, is_key, schema_id); } avro_schema_t schema; /* If running with a schema registry, we can use the registry to detect * if the schema we just saw is the same as the one we remembered * previously (since the registry guarantees to return the same id for * identical schemas). If the registry returns the same id as before, we * can skip parsing the new schema and just keep the previous one. * * However, if we're running without a registry, it's not so easy to detect * whether or not the schema changed, so in that case we just always parse * the new schema. (We could store the previous schema JSON and strcmp() * it with the new JSON, but that probably wouldn't save much over just * parsing the JSON, given this isn't a hot code path.) */ if (prev_schema_id == TABLE_MAPPER_SCHEMA_ID_MISSING || prev_schema_id != schema_id) { if (schema_json) { err = avro_schema_from_json_length(schema_json, schema_len, &schema); if (err) { mapper_error(mapper, "Could not parse %s schema: %s", is_key ? "key" : "row", avro_strerror()); return err; } } else { schema = NULL; } table_metadata_set_schema(table, is_key, schema); if (schema) avro_schema_decref(schema); } return 0; }
int main(int argc, char *argv[]) { FILE *input; avro_schema_t schema; avro_file_writer_t out; const char *key; int opt, opterr = 0, verbose = 0, memstat = 0, errabort = 0, strjson = 0; char *schema_arg = NULL; char *codec = NULL; char *endptr = NULL; char *outpath = NULL; size_t block_sz = 0; size_t max_str_sz = 0; extern char *optarg; extern int optind, optopt; while ((opt = getopt(argc, argv, "c:s:S:b:z:dmxjh")) != -1) { switch (opt) { case 's': schema_arg = optarg; break; case 'S': schema_arg = read_schema_file(optarg); break; case 'b': block_sz = strtol(optarg, &endptr, 0); if (*endptr) { fprintf(stderr, "ERROR: Invalid block size for -b: %s\n", optarg); opterr++; } break; case 'z': max_str_sz = strtol(optarg, &endptr, 0); if (*endptr) { fprintf(stderr, "ERROR: Invalid maximum string size for -z: %s\n", optarg); opterr++; } break; case 'c': codec = optarg; break; case 'd': verbose = 1; break; case 'x': errabort = 1; break; case 'j': strjson = 1; break; case 'm': #if defined(__linux__) memstat = 1; #else usage_error(argv[0], "Memory stats is a Linux-only feature!"); #endif break; case 'h': print_help(argv[0]); exit(0); case ':': fprintf(stderr, "ERROR: Option -%c requires an operand\n", optopt); opterr++; break; case '?': fprintf(stderr, "ERROR: Unrecognized option: -%c\n", optopt); opterr++; } } int file_args_cnt = (argc - optind); if (file_args_cnt == 0) { usage_error(argv[0], "Please provide at least one file name argument"); } if (file_args_cnt > 2) { fprintf(stderr, "Too many file name arguments: %d!\n", file_args_cnt); usage_error(argv[0], 0); } if (opterr) usage_error(argv[0], 0); if (!schema_arg) usage_error(argv[0], "Please provide correct schema!"); if (!codec) codec = "null"; else if (strcmp(codec, "snappy") && strcmp(codec, "deflate") && strcmp(codec, "lzma") && strcmp(codec, "null")) { fprintf(stderr, "ERROR: Invalid codec %s, valid codecs: snappy, deflate, lzma, null\n", codec); exit(EXIT_FAILURE); } if ((argc - optind) == 1) { input = stdin; outpath = argv[optind]; } else { outpath = argv[optind+1]; input = fopen(argv[optind], "rb"); if ( errno != 0 ) { fprintf(stderr, "ERROR: Cannot open input file: %s: ", argv[optind]); perror(0); exit(EXIT_FAILURE); } } if (avro_schema_from_json_length(schema_arg, strlen(schema_arg), &schema)) { fprintf(stderr, "ERROR: Unable to parse schema: '%s'\n", schema_arg); exit(EXIT_FAILURE); } if (!strcmp(outpath, "-")) { if (avro_file_writer_create_with_codec_fp(stdout, outpath, 0, schema, &out, codec, block_sz)) { fprintf(stderr, "ERROR: avro_file_writer_create_with_codec_fp FAILED: %s\n", avro_strerror()); exit(EXIT_FAILURE); } } else { remove(outpath); if (avro_file_writer_create_with_codec(outpath, schema, &out, codec, block_sz)) { fprintf(stderr, "ERROR: avro_file_writer_create_with_codec FAILED: %s\n", avro_strerror()); exit(EXIT_FAILURE); } } if (verbose) fprintf(stderr, "Using codec: %s\n", codec); process_file(input, out, schema, verbose, memstat, errabort, strjson, max_str_sz); if (verbose) printf("Closing writer....\n"); avro_file_writer_close(out); }