Beispiel #1
0
rs_ctx_t *ctx_init() {
    rs_ctx_t *ctx = malloc(sizeof(rs_ctx_t));
    ctx->label_set_dict = ck_hash_table_init(1024);
    return ctx;
}
Beispiel #2
0
static readstat_error_t read_por_file_data(readstat_por_ctx_t *ctx) {
    int i;
    char string[256];
    char error_buf[1024];
    int retval = 0;
    readstat_error_t rs_retval = READSTAT_OK;

    while (1) {
        for (i=0; i<ctx->var_count; i++) {
            spss_varinfo_t *info = &ctx->varinfo[i];
            readstat_value_t value = { .type = info->type };

            if (info->type == READSTAT_TYPE_STRING) {
                retval = read_string(ctx, string, sizeof(string));
                if (i == 0 && retval == 1) {
                    return 0;
                } else if (retval == -1) {
                    if (ctx->error_handler) {
                        snprintf(error_buf, sizeof(error_buf), "Error in %s\n", info->name);
                        ctx->error_handler(error_buf, ctx->user_ctx);
                    }
                    rs_retval = READSTAT_ERROR_PARSE;
                    goto cleanup;
                }
                value.v.string_value = string;
//                printf("String value: %s\n", string);
            } else if (info->type == READSTAT_TYPE_DOUBLE) {
                retval = read_double(ctx, &value.v.double_value);
                if (i == 0 && retval == 1) {
                    return READSTAT_OK;
                } else if (retval != 0) {
                    if (ctx->error_handler) {
                        snprintf(error_buf, sizeof(error_buf), "Error in %s\n", info->name);
                        ctx->error_handler(error_buf, ctx->user_ctx);
                    }
                    rs_retval = READSTAT_ERROR_PARSE;
                    goto cleanup;
                }
                spss_tag_missing_double(&value, &info->missingness);
            }
            ctx->value_handler(ctx->obs_count, i, value, ctx->user_ctx);
        }
        ctx->obs_count++;

        rs_retval = por_update_progress(ctx);
        if (rs_retval != READSTAT_OK)
            break;
    }
cleanup:
    return rs_retval;
}

readstat_error_t readstat_parse_por(readstat_parser_t *parser, const char *filename, void *user_ctx) {
    readstat_error_t retval = READSTAT_OK;
    unsigned char reverse_lookup[256];
    char vanity[200];

    readstat_por_ctx_t *ctx = calloc(1, sizeof(readstat_por_ctx_t));

    ctx->space = ' ';
    ctx->var_dict = ck_hash_table_init(1024);
    ctx->info_handler = parser->info_handler;
    ctx->variable_handler = parser->variable_handler;
    ctx->value_handler = parser->value_handler;
    ctx->value_label_handler = parser->value_label_handler;
    ctx->error_handler = parser->error_handler;
    ctx->progress_handler = parser->progress_handler;
    ctx->user_ctx = user_ctx;

    if ((ctx->fd = readstat_open(filename)) == -1) {
        free(ctx);
        return READSTAT_ERROR_OPEN;
    }

    if ((ctx->file_size = lseek(ctx->fd, 0, SEEK_END)) == -1) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }

    if (lseek(ctx->fd, 0, SEEK_SET) == -1) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }

    if (read_bytes(ctx, vanity, sizeof(vanity)) != sizeof(vanity)) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }

    if (read_bytes(ctx, reverse_lookup, sizeof(reverse_lookup)) != sizeof(reverse_lookup)) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }

    ctx->space = reverse_lookup[126];

    int i;

    for (i=0; i<256; i++)
        ctx->lookup[reverse_lookup[i]] = unicode_lookup[i];

    unsigned char check[9];
    char tr_check[9];

    if (read_bytes(ctx, check, sizeof(check)-1) != sizeof(check)-1) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }

    check[8] = '\0';

    utf8_encode(check, sizeof(check), tr_check, sizeof(tr_check), ctx->lookup);

    if (strcmp("SPSSPORT", tr_check) != 0) {
        retval = READSTAT_ERROR_PARSE;
        goto cleanup;
    }

    ctx->var_offset = -1;

    unsigned char version;
    char string[256];

    if (read_bytes(ctx, &version, sizeof(version)) != sizeof(version)) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }
    if (read_string(ctx, string, sizeof(string)) == -1) { /* creation date */
        retval = READSTAT_ERROR_PARSE;
        goto cleanup;
    }
    if (read_string(ctx, string, sizeof(string)) == -1) { /* creation time */
        retval = READSTAT_ERROR_PARSE;
        goto cleanup;
    }

    while (1) {
        uint16_t tr_tag = read_tag(ctx);
        switch (tr_tag) {
        case '1': /* product ID */
        case '2': /* author ID */
        case '3': /* sub-product ID */
            if (read_string(ctx, string, sizeof(string)) == -1) {
                retval = READSTAT_ERROR_PARSE;
                goto cleanup;
            }
            break;
        case '4': /* variable count */
            retval = read_variable_count_record(ctx);
            if (retval != READSTAT_OK)
                goto cleanup;
            break;
        case '6': /* case weight */
            retval = read_case_weight_record(ctx);
            if (retval != READSTAT_OK)
                goto cleanup;
            break;
        case '7': /* variable */
            retval = read_variable_record(ctx);
            if (retval != READSTAT_OK)
                goto cleanup;
            break;
        case '8': /* missing value */
            retval = read_missing_value_record(ctx);
            if (retval != READSTAT_OK)
                goto cleanup;
            break;
        case 'B': /* missing value range */
            retval = read_missing_value_range_record(ctx);
            if (retval != READSTAT_OK)
                goto cleanup;
            break;
        case '9': /* LO THRU x */
            retval = read_missing_value_lo_range_record(ctx);
            if (retval != READSTAT_OK)
                goto cleanup;
            break;
        case 'A': /* x THRU HI */
            retval = read_missing_value_hi_range_record(ctx);
            if (retval != READSTAT_OK)
                goto cleanup;
            break;
        case 'C': /* variable label */
            retval = read_variable_label_record(ctx);
            if (retval != READSTAT_OK)
                goto cleanup;
            break;
        case 'D': /* value label */
            retval = read_value_label_record(ctx);
            if (retval != READSTAT_OK)
                goto cleanup;
            break;
        case 'E': /* document record */
            retval = read_document_record(ctx);
            if (retval != READSTAT_OK)
                goto cleanup;
            break;
        case 'F': /* file data */
            if (ctx->var_offset != ctx->var_count - 1) {
                retval = READSTAT_ERROR_PARSE;
                goto cleanup;
            }
            for (i=0; i<ctx->var_count; i++) {
                char label_name_buf[256];
                spss_varinfo_t *info = &ctx->varinfo[i];
                info->missingness = spss_missingness_for_info(info);

                readstat_variable_t *variable = spss_init_variable_for_info(info);

                snprintf(label_name_buf, sizeof(label_name_buf), POR_LABEL_NAME_PREFIX "%d", info->labels_index);

                int cb_retval = ctx->variable_handler(i, variable,
                                                      info->labels_index == -1 ? NULL : label_name_buf,
                                                      user_ctx);

                spss_free_variable(variable);

                if (cb_retval) {
                    retval = READSTAT_ERROR_USER_ABORT;
                    goto cleanup;
                }
            }
            if (parser->fweight_handler && ctx->fweight_name[0]) {
                for (i=0; i<ctx->var_count; i++) {
                    spss_varinfo_t *info = &ctx->varinfo[i];
                    if (strcmp(info->name, ctx->fweight_name) == 0) {
                        if (parser->fweight_handler(i, user_ctx)) {
                            retval = READSTAT_ERROR_USER_ABORT;
                            goto cleanup;
                        }
                        break;
                    }
                }
            }
            retval = read_por_file_data(ctx);
            if (retval != READSTAT_OK)
                goto cleanup;

            if (parser->info_handler) {
                if (parser->info_handler(ctx->obs_count, ctx->var_count, ctx->user_ctx)) {
                    retval = READSTAT_ERROR_USER_ABORT;
                }
            }
            goto cleanup;

            break;
        default:
            retval = READSTAT_ERROR_PARSE;
            goto cleanup;

            break;
        }
    }

cleanup:
    readstat_close(ctx->fd);
    por_ctx_free(ctx);

    return retval;
}