Ejemplo n.º 1
0
static readstat_error_t sav_submit_value_labels(value_label_t *value_labels, int32_t label_count, 
        readstat_type_t value_type, sav_ctx_t *ctx) {
    char label_name_buf[256];
    readstat_error_t retval = READSTAT_OK;
    int32_t i;

    snprintf(label_name_buf, sizeof(label_name_buf), SAV_LABEL_NAME_PREFIX "%d", ctx->value_labels_count);

    for (i=0; i<label_count; i++) {
        value_label_t *vlabel = &value_labels[i];
        readstat_value_t value = { .type = value_type };
        if (value_type == READSTAT_TYPE_DOUBLE) {
            double val_d = 0.0;
            memcpy(&val_d, vlabel->value, 8);
            if (ctx->bswap)
                val_d = byteswap_double(val_d);

            value.v.double_value = val_d;
            sav_tag_missing_double(&value, ctx);
        } else {
            char unpadded_val[8*4+1];
            retval = readstat_convert(unpadded_val, sizeof(unpadded_val), vlabel->value, 8, ctx->converter);
            if (retval != READSTAT_OK)
                break;

            value.v.string_value = unpadded_val;
        }
        ctx->value_label_handler(label_name_buf, value, vlabel->label, ctx->user_ctx);
    }
    return retval;
}
Ejemplo n.º 2
0
sav_ctx_t *sav_ctx_init(sav_file_header_record_t *header, readstat_io_t *io) {
    sav_ctx_t *ctx = NULL;
    if ((ctx = malloc(sizeof(sav_ctx_t))) == NULL) {
        return NULL;
    }
    memset(ctx, 0, sizeof(sav_ctx_t));
    
    ctx->bswap = !(header->layout_code == 2 || header->layout_code == 3);

    ctx->data_is_compressed = (header->compressed != 0);
    ctx->record_count = ctx->bswap ? byteswap4(header->ncases) : header->ncases;
    ctx->fweight_index = ctx->bswap ? byteswap4(header->weight_index) : header->weight_index;

    ctx->missing_double = SAV_MISSING_DOUBLE;
    ctx->lowest_double = SAV_LOWEST_DOUBLE;
    ctx->highest_double = SAV_HIGHEST_DOUBLE;
    
    double bias = ctx->bswap ? byteswap_double(header->bias) : header->bias;
    
    if (bias != 100.0) {
        sav_ctx_free(ctx);
        return NULL;
    }
    
    ctx->varinfo_capacity = SAV_VARINFO_INITIAL_CAPACITY;
    
    if ((ctx->varinfo = calloc(ctx->varinfo_capacity, sizeof(spss_varinfo_t))) == NULL) {
        sav_ctx_free(ctx);
        return NULL;
    }

    ctx->io = io;
    
    return ctx;
}
Ejemplo n.º 3
0
static readstat_error_t read_value_vector(rdata_sexptype_header_t header, const char *name, rdata_ctx_t *ctx) {
    readstat_error_t retval = READSTAT_OK;
    int32_t length;
    size_t input_elem_size = 0;
    void *vals = NULL;
    size_t buf_len = 0;
    int output_data_type;
    int i;
    
    switch (header.type) {
        case RDATA_SEXPTYPE_REAL_VECTOR:
            input_elem_size = sizeof(double);
            output_data_type = READSTAT_TYPE_DOUBLE;
            break;
        case RDATA_SEXPTYPE_INTEGER_VECTOR:
            input_elem_size = sizeof(int32_t);
            output_data_type = READSTAT_TYPE_DOUBLE;
            break;
        case RDATA_SEXPTYPE_LOGICAL_VECTOR:
            input_elem_size = sizeof(int32_t);
            output_data_type = READSTAT_TYPE_DOUBLE;
            break;
        default:
            retval = READSTAT_ERROR_PARSE;
            break;
    }
    if (retval != READSTAT_OK)
        goto cleanup;

    if ((retval = read_length(&length, ctx)) != READSTAT_OK)
        goto cleanup;

    buf_len = length * input_elem_size;
    
    vals = malloc(buf_len);
    if (vals == NULL) {
        retval = READSTAT_ERROR_MALLOC;
        goto cleanup;
    }
    
    if (read_st(ctx, vals, buf_len) != buf_len) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }
    
    if (ctx->machine_needs_byteswap) {
        if (input_elem_size == sizeof(double)) {
            double *d_vals = (double *)vals;
            for (i=0; i<buf_len/sizeof(double); i++) {
                d_vals[i] = byteswap_double(d_vals[i]);
            }
        } else {
            uint32_t *i_vals = (uint32_t *)vals;
            for (i=0; i<buf_len/sizeof(uint32_t); i++) {
                i_vals[i] = byteswap4(i_vals[i]);
            }
        }
    }
    
    ctx->class_is_posixct = 0;
    if (header.attributes) {
        if ((retval = read_attributes(&handle_vector_attribute, ctx)) != READSTAT_OK)
            goto cleanup;
    }
    
    if (ctx->column_handler) {
        if (header.type == RDATA_SEXPTYPE_LOGICAL_VECTOR ||
                header.type == RDATA_SEXPTYPE_INTEGER_VECTOR) {
            double *real_vals = malloc(length * sizeof(double));
            int32_t *i_vals = (int32_t *)vals;
            for (i=0; i<length; i++) {
                if (i_vals[i] == INT32_MIN) {
                    real_vals[i] = NAN;
                } else {
                    real_vals[i] = i_vals[i];
                }
            }
            if (ctx->column_handler(name, output_data_type, NULL, real_vals, length, ctx->user_ctx)) {
                retval = READSTAT_ERROR_USER_ABORT;
                goto cleanup;
            }
            free(real_vals);
        } else {
            if (ctx->column_handler(name, output_data_type, ctx->class_is_posixct ? "%ts" : NULL, vals, length, ctx->user_ctx)) {
                retval = READSTAT_ERROR_USER_ABORT;
                goto cleanup;
            }
        }
    }

cleanup:
    
    return retval;
}
Ejemplo n.º 4
0
static readstat_error_t sav_process_row(unsigned char *buffer, size_t buffer_len, sav_ctx_t *ctx) {
    readstat_error_t retval = READSTAT_OK;
    double fp_value;
    int offset = 0;
    readstat_off_t data_offset = 0;
    size_t raw_str_used = 0;
    int segment_offset = 0;
    int var_index = 0, col = 0;

    while (data_offset < buffer_len && col < ctx->var_index) {
        spss_varinfo_t *col_info = &ctx->varinfo[col];
        spss_varinfo_t *var_info = &ctx->varinfo[var_index];
        readstat_value_t value = { .type = var_info->type };
        if (offset > 31) {
            retval = READSTAT_ERROR_PARSE;
            goto done;
        }
        if (var_info->type == READSTAT_TYPE_STRING) {
            if (raw_str_used + 8 <= ctx->raw_string_len) {
                memcpy(ctx->raw_string + raw_str_used, &buffer[data_offset], 8);
                raw_str_used += 8;
            }
            if (++offset == col_info->width) {
                if (++segment_offset < var_info->n_segments) {
                    raw_str_used--;
                }
                offset = 0;
                col++;
            }
            if (segment_offset == var_info->n_segments) {
                retval = readstat_convert(ctx->utf8_string, ctx->utf8_string_len, 
                        ctx->raw_string, raw_str_used, ctx->converter);
                if (retval != READSTAT_OK)
                    goto done;
                value.v.string_value = ctx->utf8_string;
                if (ctx->value_handler(ctx->current_row, ctx->variables[var_info->index],
                            value, ctx->user_ctx)) {
                    retval = READSTAT_ERROR_USER_ABORT;
                    goto done;
                }
                raw_str_used = 0;
                segment_offset = 0;
                var_index += var_info->n_segments;
            }
        } else if (var_info->type == READSTAT_TYPE_DOUBLE) {
            memcpy(&fp_value, &buffer[data_offset], 8);
            if (ctx->bswap) {
                fp_value = byteswap_double(fp_value);
            }
            value.v.double_value = fp_value;
            sav_tag_missing_double(&value, ctx);
            if (ctx->value_handler(ctx->current_row, ctx->variables[var_info->index],
                        value, ctx->user_ctx)) {
                retval = READSTAT_ERROR_USER_ABORT;
                goto done;
            }
            var_index += var_info->n_segments;
            col++;
        }
        data_offset += 8;
    }
    ctx->current_row++;
done:
    return retval;
}

static readstat_error_t sav_read_uncompressed_data(sav_ctx_t *ctx) {
    readstat_error_t retval = READSTAT_OK;
    readstat_io_t *io = ctx->io;
    unsigned char *buffer = NULL;
    size_t bytes_read = 0;
    size_t buffer_len = ctx->var_offset * 8;

    buffer = malloc(buffer_len);

    while (ctx->current_row < ctx->row_limit) {
        retval = sav_update_progress(ctx);
        if (retval != READSTAT_OK)
            goto done;

        if ((bytes_read = io->read(buffer, buffer_len, io->io_ctx)) != buffer_len)
            goto done;

        retval = sav_process_row(buffer, buffer_len, ctx);
        if (retval != READSTAT_OK)
            goto done;
    }
done:
    if (buffer)
        free(buffer);

    return retval;
}
Ejemplo n.º 5
0
static readstat_error_t sav_read_variable_record(sav_ctx_t *ctx) {
    readstat_io_t *io = ctx->io;
    sav_variable_record_t variable;
    readstat_error_t retval = READSTAT_OK;
    if (ctx->var_index == ctx->varinfo_capacity) {
        if ((ctx->varinfo = realloc(ctx->varinfo, (ctx->varinfo_capacity *= 2) * sizeof(spss_varinfo_t))) == NULL) {
            retval = READSTAT_ERROR_MALLOC;
            goto cleanup;
        }
    }
    if (io->read(&variable, sizeof(sav_variable_record_t), io->io_ctx) < sizeof(sav_variable_record_t)) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }
    variable.print = ctx->bswap ? byteswap4(variable.print) : variable.print;
    variable.write = ctx->bswap ? byteswap4(variable.write) : variable.write;

    readstat_type_t dta_type = READSTAT_TYPE_DOUBLE;
    int32_t type = ctx->bswap ? byteswap4(variable.type) : variable.type;
    int i;
    if (type < 0) {
        if (ctx->var_index == 0) {
            return READSTAT_ERROR_PARSE;
        }
        ctx->var_offset++;
        spss_varinfo_t *prev = &ctx->varinfo[ctx->var_index-1];
        prev->width++;
        return 0;
    }
    if (type > 0) {
        dta_type = READSTAT_TYPE_STRING;
        // len = type;
    }
    spss_varinfo_t *info = &ctx->varinfo[ctx->var_index];
    memset(info, 0, sizeof(spss_varinfo_t));
    info->width = 1;
    info->n_segments = 1;
    info->index = ctx->var_index;
    info->offset = ctx->var_offset;
    info->type = dta_type;

    retval = readstat_convert(info->name, sizeof(info->name),
            variable.name, sizeof(variable.name), ctx->converter);
    if (retval != READSTAT_OK)
        goto cleanup;

    retval = readstat_convert(info->longname, sizeof(info->longname), 
            variable.name, sizeof(variable.name), ctx->converter);
    if (retval != READSTAT_OK)
        goto cleanup;

    info->print_format.decimal_places = (variable.print & 0x000000FF);
    info->print_format.width = (variable.print & 0x0000FF00) >> 8;
    info->print_format.type = (variable.print  & 0x00FF0000) >> 16;

    info->write_format.decimal_places = (variable.write & 0x000000FF);
    info->write_format.width = (variable.write & 0x0000FF00) >> 8;
    info->write_format.type = (variable.write  & 0x00FF0000) >> 16;
    
    if (variable.has_var_label) {
        int32_t label_len;
        if (io->read(&label_len, sizeof(int32_t), io->io_ctx) < sizeof(int32_t)) {
            retval = READSTAT_ERROR_READ;
            goto cleanup;
        }
        label_len = ctx->bswap ? byteswap4(label_len) : label_len;
        int32_t label_capacity = (label_len + 3) / 4 * 4;
        char *label_buf = malloc(label_capacity);
        size_t out_label_len = label_len*4+1;
        info->label = malloc(out_label_len);
        if (label_buf == NULL || info->label == NULL) {
            retval = READSTAT_ERROR_MALLOC;
            goto cleanup;
        }
        if (io->read(label_buf, label_capacity, io->io_ctx) < label_capacity) {
            retval = READSTAT_ERROR_READ;
            free(label_buf);
            free(info->label);
            info->label = NULL;
            goto cleanup;
        }
        retval = readstat_convert(info->label, out_label_len, label_buf, label_len, ctx->converter);
        free(label_buf);
        if (retval != READSTAT_OK)
            goto cleanup;
    }
    
    ctx->varinfo[ctx->var_index].labels_index = -1;
    
    if (variable.n_missing_values) {
        info->n_missing_values = ctx->bswap ? byteswap4(variable.n_missing_values) : variable.n_missing_values;
        if (info->n_missing_values < 0) {
            info->missing_range = 1;
            info->n_missing_values = abs(info->n_missing_values);
        } else {
            info->missing_range = 0;
        }
        if (info->n_missing_values > 3) {
            retval = READSTAT_ERROR_PARSE;
            goto cleanup;
        }
        if (io->read(info->missing_values, info->n_missing_values * sizeof(double), io->io_ctx) < info->n_missing_values * sizeof(double)) {
            retval = READSTAT_ERROR_READ;
            goto cleanup;
        }
        for (i=0; i<info->n_missing_values; i++) {
            if (ctx->bswap) {
                info->missing_values[i] = byteswap_double(info->missing_values[i]);
            }

            uint64_t long_value = 0;
            memcpy(&long_value, &info->missing_values[i], 8);

            if (long_value == ctx->missing_double)
                info->missing_values[i] = NAN;
            if (long_value == ctx->lowest_double)
                info->missing_values[i] = -HUGE_VAL;
            if (long_value == ctx->highest_double)
                info->missing_values[i] = HUGE_VAL;
        }
    }
    
    ctx->var_index++;
    ctx->var_offset++;
    
cleanup:
    
    return retval;
}
Ejemplo n.º 6
0
readstat_error_t sas_read_header(readstat_io_t *io, sas_header_info_t *hinfo, 
        readstat_error_handler error_handler, void *user_ctx) {
    sas_header_start_t  header_start;
    sas_header_end_t    header_end;
    int retval = READSTAT_OK;
    char error_buf[1024];
    struct tm epoch_tm = { .tm_year = 60, .tm_mday = 1 };
    time_t epoch = mktime(&epoch_tm);

    if (io->read(&header_start, sizeof(sas_header_start_t), io->io_ctx) < sizeof(sas_header_start_t)) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }
    if (memcmp(header_start.magic, sas7bdat_magic_number, sizeof(sas7bdat_magic_number)) != 0 &&
            memcmp(header_start.magic, sas7bcat_magic_number, sizeof(sas7bcat_magic_number)) != 0) {
        retval = READSTAT_ERROR_PARSE;
        goto cleanup;
    }
    if (header_start.a1 == SAS_ALIGNMENT_OFFSET_4) {
        hinfo->pad1 = 4;
    }
    if (header_start.a2 == SAS_ALIGNMENT_OFFSET_4) {
        hinfo->u64 = 1;
    }
    int bswap = 0;
    if (header_start.endian == SAS_ENDIAN_BIG) {
        bswap = machine_is_little_endian();
        hinfo->little_endian = 0;
    } else if (header_start.endian == SAS_ENDIAN_LITTLE) {
        bswap = !machine_is_little_endian();
        hinfo->little_endian = 1;
    } else {
        retval = READSTAT_ERROR_PARSE;
        goto cleanup;
    }
    int i;
    for (i=0; i<sizeof(_charset_table)/sizeof(_charset_table[0]); i++) {
        if (header_start.encoding == _charset_table[i].code) {
            hinfo->encoding = _charset_table[i].name;
            break;
        }
    }
    if (hinfo->encoding == NULL) {
        if (error_handler) {
            snprintf(error_buf, sizeof(error_buf), "Unsupported character set code: %d\n", header_start.encoding);
            error_handler(error_buf, user_ctx);
        }
        retval = READSTAT_ERROR_UNSUPPORTED_CHARSET;
        goto cleanup;
    }
    memcpy(hinfo->file_label, header_start.file_label, sizeof(header_start.file_label));
    if (io->seek(hinfo->pad1, READSTAT_SEEK_CUR, io->io_ctx) == -1) {
        retval = READSTAT_ERROR_SEEK;
        goto cleanup;
    }

    double creation_time, modification_time;
    if (io->read(&creation_time, sizeof(double), io->io_ctx) < sizeof(double)) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }
    if (io->read(&modification_time, sizeof(double), io->io_ctx) < sizeof(double)) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }
    hinfo->creation_time = bswap ? byteswap_double(creation_time) + epoch : creation_time + epoch;
    hinfo->modification_time = bswap ? byteswap_double(creation_time) + epoch : creation_time + epoch;

    if (io->seek(16, READSTAT_SEEK_CUR, io->io_ctx) == -1) {
        retval = READSTAT_ERROR_SEEK;
        goto cleanup;
    }

    uint32_t header_size, page_size;

    if (io->read(&header_size, sizeof(uint32_t), io->io_ctx) < sizeof(uint32_t)) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }
    if (io->read(&page_size, sizeof(uint32_t), io->io_ctx) < sizeof(uint32_t)) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }

    hinfo->header_size = bswap ? byteswap4(header_size) : header_size;

    hinfo->page_size = bswap ? byteswap4(page_size) : page_size;

    if (hinfo->header_size < 1024) {
        retval = READSTAT_ERROR_PARSE;
        goto cleanup;
    }

    if (hinfo->u64) {
        uint64_t page_count;
        if (io->read(&page_count, sizeof(uint64_t), io->io_ctx) < sizeof(uint64_t)) {
            retval = READSTAT_ERROR_READ;
            goto cleanup;
        }
        hinfo->page_count = bswap ? byteswap8(page_count) : page_count;
    } else {
        uint32_t page_count;
        if (io->read(&page_count, sizeof(uint32_t), io->io_ctx) < sizeof(uint32_t)) {
            retval = READSTAT_ERROR_READ;
            goto cleanup;
        }
        hinfo->page_count = bswap ? byteswap4(page_count) : page_count;
    }
    
    if (io->seek(8, READSTAT_SEEK_CUR, io->io_ctx) == -1) {
        retval = READSTAT_ERROR_SEEK;
        if (error_handler) {
            snprintf(error_buf, sizeof(error_buf), "ReadStat: Failed to seek forward by %d\n", 8);
            error_handler(error_buf, user_ctx);
        }
        goto cleanup;
    }
    if (io->read(&header_end, sizeof(sas_header_end_t), io->io_ctx) < sizeof(sas_header_end_t)) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }
    int major, minor, revision;
    if (sscanf(header_end.release, "%1d.%04dM%1d", &major, &minor, &revision) == 3) {
        hinfo->major_version = major;
        hinfo->minor_version = minor;
        hinfo->revision = revision;
    }
    if (major == 9 && minor == 0 && revision == 0) {
        /* A bit of a hack, but most SAS installations are running a minor update */
        hinfo->vendor = READSTAT_VENDOR_STAT_TRANSFER;
    } else {
        hinfo->vendor = READSTAT_VENDOR_SAS;
    }
    if (io->seek(hinfo->header_size, READSTAT_SEEK_SET, io->io_ctx) == -1) {
        retval = READSTAT_ERROR_SEEK;
        if (error_handler) {
            snprintf(error_buf, sizeof(error_buf), 
                    "ReadStat: Failed to seek to position %" PRId64 "\n", hinfo->header_size);
            error_handler(error_buf, user_ctx);
        }
        goto cleanup;
    }

cleanup:
    return retval;
}