int process_frame_table_schema(avro_value_t *record_val, frame_reader_t reader, uint64_t wal_pos) {
    int err = 0, key_schema_present;
    avro_value_t relid_val, hash_val, key_schema_val, row_schema_val, branch_val;
    int64_t relid;
    const void *hash;
    const char *key_schema_json = NULL, *row_schema_json;
    size_t hash_len, key_schema_len = 1, row_schema_len;
    avro_schema_t key_schema = NULL, row_schema;

    check(err, avro_value_get_by_index(record_val, 0, &relid_val,      NULL));
    check(err, avro_value_get_by_index(record_val, 1, &hash_val,       NULL));
    check(err, avro_value_get_by_index(record_val, 2, &key_schema_val, NULL));
    check(err, avro_value_get_by_index(record_val, 3, &row_schema_val, NULL));
    check(err, avro_value_get_long(&relid_val, &relid));
    check(err, avro_value_get_fixed(&hash_val, &hash, &hash_len));
    check(err, avro_value_get_discriminant(&key_schema_val, &key_schema_present));
    check(err, avro_value_get_string(&row_schema_val, &row_schema_json, &row_schema_len));
    check(err, avro_schema_from_json_length(row_schema_json, row_schema_len - 1, &row_schema));

    schema_list_entry *entry = schema_list_replace(reader, relid);
    entry->relid = relid;
    entry->hash = *((uint64_t *) hash);
    entry->row_schema = row_schema;
    entry->row_iface = avro_generic_class_from_schema(row_schema);
    avro_generic_value_new(entry->row_iface, &entry->row_value);
    avro_generic_value_new(entry->row_iface, &entry->old_value);
    entry->avro_reader = avro_reader_memory(NULL, 0);

    if (key_schema_present) {
        check(err, avro_value_get_current_branch(&key_schema_val, &branch_val));
        check(err, avro_value_get_string(&branch_val, &key_schema_json, &key_schema_len));
        check(err, avro_schema_from_json_length(key_schema_json, key_schema_len - 1, &key_schema));
        entry->key_schema = key_schema;
        entry->key_iface = avro_generic_class_from_schema(key_schema);
        avro_generic_value_new(entry->key_iface, &entry->key_value);
    } else {
        entry->key_schema = NULL;
    }

    if (reader->on_table_schema) {
        check(err, reader->on_table_schema(reader->cb_context, wal_pos, relid,
                                           key_schema_json, key_schema_len - 1, key_schema,
                                           row_schema_json, row_schema_len - 1, row_schema));
    }
    return err;
}
/* Returns 0 on success.  On failure, sets mapper->error and returns nonzero. */
int table_metadata_update_schema(table_mapper_t mapper, table_metadata_t table, int is_key, const char* schema_json, size_t schema_len) {
    int prev_schema_id = is_key ? table->key_schema_id : table->row_schema_id;
    int schema_id = TABLE_MAPPER_SCHEMA_ID_MISSING;

    int err;

    if (mapper->registry) {
        err = schema_registry_request(mapper->registry, rd_kafka_topic_name(table->topic), is_key,
                schema_json, schema_len,
                &schema_id);
        if (err) {
            mapper_error(mapper, "Failed to register %s schema: %s",
                    is_key ? "key" : "row", mapper->registry->error);
            return err;
        }

        table_metadata_set_schema_id(table, is_key, schema_id);
    }

    avro_schema_t schema;

    /* If running with a schema registry, we can use the registry to detect
     * if the schema we just saw is the same as the one we remembered
     * previously (since the registry guarantees to return the same id for
     * identical schemas).  If the registry returns the same id as before, we
     * can skip parsing the new schema and just keep the previous one.
     *
     * However, if we're running without a registry, it's not so easy to detect
     * whether or not the schema changed, so in that case we just always parse
     * the new schema.  (We could store the previous schema JSON and strcmp()
     * it with the new JSON, but that probably wouldn't save much over just
     * parsing the JSON, given this isn't a hot code path.) */
    if (prev_schema_id == TABLE_MAPPER_SCHEMA_ID_MISSING || prev_schema_id != schema_id) {
        if (schema_json) {
            err = avro_schema_from_json_length(schema_json, schema_len, &schema);

            if (err) {
                mapper_error(mapper, "Could not parse %s schema: %s",
                        is_key ? "key" : "row", avro_strerror());
                return err;
            }
        } else {
            schema = NULL;
        }

        table_metadata_set_schema(table, is_key, schema);

        if (schema) avro_schema_decref(schema);
    }

    return 0;
}
Example #3
0
int main(int argc, char *argv[]) {
    FILE *input;

    avro_schema_t schema;
    avro_file_writer_t out;
    const char *key;

    int opt, opterr = 0, verbose = 0, memstat = 0, errabort = 0, strjson = 0;
    char *schema_arg = NULL;
    char *codec = NULL;
    char *endptr = NULL;
    char *outpath = NULL;
    size_t block_sz = 0;
    size_t max_str_sz = 0;
    extern char *optarg;
    extern int optind, optopt;

    while ((opt = getopt(argc, argv, "c:s:S:b:z:dmxjh")) != -1) {
        switch (opt) {
        case 's':
            schema_arg = optarg;
            break;
        case 'S':
            schema_arg = read_schema_file(optarg);
            break;
        case 'b':
            block_sz = strtol(optarg, &endptr, 0);
            if (*endptr) {
                fprintf(stderr, "ERROR: Invalid block size for -b: %s\n", optarg);
                opterr++;
            }
            break;
        case 'z':
            max_str_sz = strtol(optarg, &endptr, 0);
            if (*endptr) {
                fprintf(stderr, "ERROR: Invalid maximum string size for -z: %s\n", optarg);
                opterr++;
            }
            break;
        case 'c':
            codec = optarg;
            break;
        case 'd':
            verbose = 1;
            break;
        case 'x':
            errabort = 1;
            break;
        case 'j':
            strjson = 1;
            break;
        case 'm':
            #if defined(__linux__)
              memstat = 1;
            #else
              usage_error(argv[0], "Memory stats is a Linux-only feature!");
            #endif
            break;
        case 'h':
            print_help(argv[0]);
            exit(0);
        case ':':
            fprintf(stderr, "ERROR: Option -%c requires an operand\n", optopt);
            opterr++;
            break;
        case '?':
            fprintf(stderr, "ERROR: Unrecognized option: -%c\n", optopt);
            opterr++;
        }
    }

    int file_args_cnt = (argc - optind);
    if (file_args_cnt == 0) {
        usage_error(argv[0], "Please provide at least one file name argument");
    }
    if (file_args_cnt > 2) {
        fprintf(stderr, "Too many file name arguments: %d!\n", file_args_cnt);
        usage_error(argv[0], 0);
    }

    if (opterr) usage_error(argv[0], 0);
    if (!schema_arg) usage_error(argv[0], "Please provide correct schema!");

    if (!codec) codec = "null";
    else if (strcmp(codec, "snappy") && strcmp(codec, "deflate") && strcmp(codec, "lzma") && strcmp(codec, "null")) {
        fprintf(stderr, "ERROR: Invalid codec %s, valid codecs: snappy, deflate, lzma, null\n", codec);
        exit(EXIT_FAILURE);
    }

    if ((argc - optind) == 1) {
        input = stdin;
        outpath = argv[optind];
    } else {
        outpath = argv[optind+1];
        input = fopen(argv[optind], "rb");
        if ( errno != 0 ) {
            fprintf(stderr, "ERROR: Cannot open input file: %s: ", argv[optind]);
            perror(0);
            exit(EXIT_FAILURE);
        }
    }

    if (avro_schema_from_json_length(schema_arg, strlen(schema_arg), &schema)) {
        fprintf(stderr, "ERROR: Unable to parse schema: '%s'\n", schema_arg);
        exit(EXIT_FAILURE);
    }

    if (!strcmp(outpath, "-")) {
        if (avro_file_writer_create_with_codec_fp(stdout, outpath, 0, schema, &out, codec, block_sz)) {
            fprintf(stderr, "ERROR: avro_file_writer_create_with_codec_fp FAILED: %s\n", avro_strerror());
            exit(EXIT_FAILURE);
        }

    } else {
        remove(outpath);
        if (avro_file_writer_create_with_codec(outpath, schema, &out, codec, block_sz)) {
            fprintf(stderr, "ERROR: avro_file_writer_create_with_codec FAILED: %s\n", avro_strerror());
            exit(EXIT_FAILURE);
        }
    }

    if (verbose)
        fprintf(stderr, "Using codec: %s\n", codec);

    process_file(input, out, schema, verbose, memstat, errabort, strjson, max_str_sz);

    if (verbose)
        printf("Closing writer....\n");
    avro_file_writer_close(out);
}