Пример #1
0
static readstat_error_t sas7bcat_parse_block(const char *data, size_t data_size, sas7bcat_ctx_t *ctx) {
    readstat_error_t retval = READSTAT_OK;

    size_t pad = (data[2] & 0x08) ? 4 : 0; // might be 0x10, not sure

    int label_count_capacity = sas_read4(&data[38+pad], ctx->bswap);
    int label_count_used = sas_read4(&data[42+pad], ctx->bswap);
    char name[4*32+1];

    if ((retval = readstat_convert(name, sizeof(name), &data[8], 8, ctx->converter)) != READSTAT_OK)
        goto cleanup;

    if (pad) {
        pad += 16;
    }

    if ((data[2] & 0x80)) { // has long name
        retval = readstat_convert(name, sizeof(name), &data[106+pad], 32, ctx->converter);
        if (retval != READSTAT_OK)
            goto cleanup;
        pad += 32;
    }

    if ((retval = sas7bcat_parse_value_labels(&data[106+pad], data_size - 106 - pad, 
                    label_count_used, label_count_capacity, name, ctx)) != READSTAT_OK)
        goto cleanup;

cleanup:
    return retval;
}
Пример #2
0
static readstat_error_t xport_read_labels_v8(xport_ctx_t *ctx, int label_count) {
    readstat_error_t retval = READSTAT_OK;
    uint16_t labeldef[3];
    int i;
    for (i=0; i<label_count; i++) {
        int index, name_len, label_len;
        if (read_bytes(ctx, labeldef, sizeof(labeldef)) != sizeof(labeldef)) {
            retval = READSTAT_ERROR_READ;
            goto cleanup;
        }

        if (machine_is_little_endian()) {
            index = byteswap2(labeldef[0]);
            name_len = byteswap2(labeldef[1]);
            label_len = byteswap2(labeldef[2]);
        } else {
            index = labeldef[0];
            name_len = labeldef[1];
            label_len = labeldef[2];
        }

        if (index >= ctx->var_count) {
            retval = READSTAT_ERROR_PARSE;
            goto cleanup;
        }

        char name[name_len];
        char label[label_len];
        readstat_variable_t *variable = ctx->variables[index];

        if (read_bytes(ctx, name, name_len) != name_len ||
                read_bytes(ctx, label, label_len) != label_len) {
            retval = READSTAT_ERROR_READ;
            goto cleanup;
        }

        retval = readstat_convert(variable->name, sizeof(variable->name),
                name, name_len,  NULL);
        if (retval != READSTAT_OK)
            goto cleanup;

        retval = readstat_convert(variable->label, sizeof(variable->label),
                label, label_len,  NULL);
        if (retval != READSTAT_OK)
            goto cleanup;
    }

    retval = xport_skip_rest_of_record(ctx);
    if (retval != READSTAT_OK)
        goto cleanup;

    retval = xport_read_obs_header_record(ctx);
    if (retval != READSTAT_OK)
        goto cleanup;

cleanup:
    return retval;
}
Пример #3
0
static readstat_error_t sav_submit_value_labels(value_label_t *value_labels, int32_t label_count, 
        readstat_type_t value_type, sav_ctx_t *ctx) {
    char label_name_buf[256];
    readstat_error_t retval = READSTAT_OK;
    int32_t i;

    snprintf(label_name_buf, sizeof(label_name_buf), SAV_LABEL_NAME_PREFIX "%d", ctx->value_labels_count);

    for (i=0; i<label_count; i++) {
        value_label_t *vlabel = &value_labels[i];
        readstat_value_t value = { .type = value_type };
        if (value_type == READSTAT_TYPE_DOUBLE) {
            double val_d = 0.0;
            memcpy(&val_d, vlabel->value, 8);
            if (ctx->bswap)
                val_d = byteswap_double(val_d);

            value.v.double_value = val_d;
            sav_tag_missing_double(&value, ctx);
        } else {
            char unpadded_val[8*4+1];
            retval = readstat_convert(unpadded_val, sizeof(unpadded_val), vlabel->value, 8, ctx->converter);
            if (retval != READSTAT_OK)
                break;

            value.v.string_value = unpadded_val;
        }
        ctx->value_label_handler(label_name_buf, value, vlabel->label, ctx->user_ctx);
    }
    return retval;
}
Пример #4
0
static readstat_variable_t *dta_init_variable(dta_ctx_t *ctx, int i, readstat_type_t type, size_t max_len) {
    readstat_variable_t *variable = calloc(1, sizeof(readstat_variable_t));

    variable->type = type;
    variable->index = i;
    variable->storage_width = max_len;

    readstat_convert(variable->name, sizeof(variable->name), 
            &ctx->varlist[ctx->variable_name_len*i],
            ctx->variable_name_len, ctx->converter);

    if (ctx->variable_labels[ctx->variable_labels_entry_len*i]) {
        readstat_convert(variable->label, sizeof(variable->label),
                &ctx->variable_labels[ctx->variable_labels_entry_len*i],
                ctx->variable_labels_entry_len, ctx->converter);
    }

    if (ctx->fmtlist[ctx->fmtlist_entry_len*i]) {
        readstat_convert(variable->format, sizeof(variable->format),
                &ctx->fmtlist[ctx->fmtlist_entry_len*i],
                ctx->fmtlist_entry_len, ctx->converter);
        if (variable->format[0] == '%') {
            if (variable->format[1] == '-') {
                variable->alignment = READSTAT_ALIGNMENT_LEFT;
            } else if (variable->format[1] == '~') {
                variable->alignment = READSTAT_ALIGNMENT_CENTER;
            } else {
                variable->alignment = READSTAT_ALIGNMENT_RIGHT;
            }
        }
        int display_width;
        if (sscanf(variable->format, "%%%ds", &display_width) == 1 ||
                sscanf(variable->format, "%%-%ds", &display_width) == 1) {
            variable->display_width = display_width;
        }
    }

    return variable;
}
Пример #5
0
static readstat_error_t sas7bdat_handle_data_value(readstat_variable_t *variable, 
        col_info_t *col_info, const char *col_data, sas7bdat_ctx_t *ctx) {
    readstat_error_t retval = READSTAT_OK;
    char error_buf[ERROR_BUF_SIZE];
    int cb_retval = 0;
    readstat_value_t value;
    memset(&value, 0, sizeof(readstat_value_t));

    value.type = col_info->type;

    if (col_info->type == READSTAT_TYPE_STRING) {
        retval = readstat_convert(ctx->scratch_buffer, ctx->scratch_buffer_len,
                col_data, col_info->width, ctx->converter);
        if (retval != READSTAT_OK) {
            if (ctx->error_handler) {
                snprintf(error_buf, sizeof(error_buf),
                        "ReadStat: Error converting string to specified encoding: %.*s\n",
                        col_info->width, col_data);
                ctx->error_handler(error_buf, ctx->user_ctx);
            }
            goto cleanup;
        }

        value.v.string_value = ctx->scratch_buffer;
    } else if (col_info->type == READSTAT_TYPE_DOUBLE) {
        uint64_t  val = 0;
        double dval = NAN;
        if (ctx->little_endian) {
            int k;
            for (k=0; k<col_info->width; k++) {
                val = (val << 8) | (unsigned char)col_data[col_info->width-1-k];
            }
        } else {
            int k;
            for (k=0; k<col_info->width; k++) {
                val = (val << 8) | (unsigned char)col_data[k];
            }
        }
        val <<= (8-col_info->width)*8;

        memcpy(&dval, &val, 8);

        if (isnan(dval)) {
            value.v.double_value = NAN;
            value.tag = ~((val >> 40) & 0xFF);
            if (value.tag) {
                value.is_tagged_missing = 1;
            } else {
                value.is_system_missing = 1;
            }
        } else {
Пример #6
0
static readstat_variable_t *dta_init_variable(dta_ctx_t *ctx, int i, readstat_types_t type) {
    readstat_variable_t *variable = calloc(1, sizeof(readstat_variable_t));

    variable->type = type;
    variable->index = i;

    readstat_convert(variable->name, sizeof(variable->name), 
            &ctx->varlist[ctx->variable_name_len*i],
            ctx->variable_name_len, ctx->converter);

    if (ctx->variable_labels[ctx->variable_labels_entry_len*i]) {
        readstat_convert(variable->label, sizeof(variable->label),
                &ctx->variable_labels[ctx->variable_labels_entry_len*i],
                ctx->variable_labels_entry_len, ctx->converter);
    }

    if (ctx->fmtlist[ctx->fmtlist_entry_len*i]) {
        readstat_convert(variable->format, sizeof(variable->format),
                &ctx->fmtlist[ctx->fmtlist_entry_len*i],
                ctx->fmtlist_entry_len, ctx->converter);
    }

    return variable;
}
Пример #7
0
static readstat_error_t xport_construct_format(char *dst, size_t dst_len,
        const char *src, size_t src_len, int width, int decimals) {
    char format[4*src_len+1];
    readstat_error_t retval = readstat_convert(format, sizeof(format), src, src_len, NULL);

    if (decimals) {
        snprintf(dst, dst_len, "%s%d.%d",
                format, width, decimals);
    } else if (width) {
        snprintf(dst, dst_len, "%s%d",
                format, width);
    } else {
        strcpy(dst, format);
    }

    return retval;
}
Пример #8
0
static readstat_error_t handle_data_value(const char *col_data, col_info_t *col_info, sas_ctx_t *ctx) {
    readstat_error_t retval = READSTAT_OK;
    int cb_retval = 0;
    readstat_value_t value;
    memset(&value, 0, sizeof(readstat_value_t));

    value.type = col_info->type;

    if (col_info->type == READSTAT_TYPE_STRING) {
        retval = readstat_convert(ctx->scratch_buffer, ctx->scratch_buffer_len,
                col_data, col_info->width, ctx->converter);
        if (retval != READSTAT_OK)
            goto cleanup;

        value.v.string_value = ctx->scratch_buffer;
    } else if (col_info->type == READSTAT_TYPE_DOUBLE) {
        uint64_t  val = 0;
        double dval = NAN;
        if (ctx->little_endian) {
            int k;
            for (k=0; k<col_info->width; k++) {
                val = (val << 8) | (unsigned char)col_data[col_info->width-1-k];
            }
        } else {
            int k;
            for (k=0; k<col_info->width; k++) {
                val = (val << 8) | (unsigned char)col_data[k];
            }
        }
        val <<= (8-col_info->width)*8;

        memcpy(&dval, &val, 8);

        if (isnan(dval)) {
            value.v.double_value = NAN;
            value.tag = ~((val >> 40) & 0xFF);
            if (value.tag) {
                value.is_tagged_missing = 1;
            } else {
                value.is_system_missing = 1;
            }
        } else {
Пример #9
0
static readstat_error_t copy_text_ref(char *out_buffer, size_t out_buffer_len, sas_text_ref_t text_ref, sas_ctx_t *ctx) {
    if (text_ref.index < 0 || text_ref.index >= ctx->text_blob_count)
        return READSTAT_ERROR_PARSE;
    
    if (text_ref.length == 0) {
        out_buffer[0] = '\0';
        return READSTAT_OK;
    }

    char *blob = ctx->text_blobs[text_ref.index];

    if (text_ref.offset < 0 || text_ref.length < 0)
        return READSTAT_ERROR_PARSE;
    
    if (text_ref.offset + text_ref.length > ctx->text_blob_lengths[text_ref.index])
        return READSTAT_ERROR_PARSE;

    return readstat_convert(out_buffer, out_buffer_len, &blob[text_ref.offset], text_ref.length,
            ctx->converter);
}
Пример #10
0
static readstat_error_t xport_read_file_label_record(xport_ctx_t *ctx) {
    char line[LINE_LEN+1];
    char label[40*4+1];
    readstat_error_t retval = READSTAT_OK;

    retval = xport_read_record(ctx, line);
    if (retval != READSTAT_OK)
        goto cleanup;

    retval = readstat_convert(label, sizeof(label), &line[32], 40, NULL);
    if (retval != READSTAT_OK)
        goto cleanup;

    if (ctx->metadata_handler) {
        if (ctx->metadata_handler(label, ctx->timestamp, ctx->version, ctx->user_ctx) != READSTAT_HANDLER_OK) {
            retval = READSTAT_ERROR_USER_ABORT;
            goto cleanup;
        }
    }

cleanup:
    return retval;
}
Пример #11
0
static readstat_error_t sav_read_document_record(sav_ctx_t *ctx) {
    if (!ctx->note_handler)
        return sav_skip_document_record(ctx);

    int32_t n_lines;
    readstat_error_t retval = READSTAT_OK;
    readstat_io_t *io = ctx->io;
    if (io->read(&n_lines, sizeof(int32_t), io->io_ctx) < sizeof(int32_t)) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }
    if (ctx->bswap)
        n_lines = byteswap4(n_lines);

    char raw_buffer[SPSS_DOC_LINE_SIZE];
    char utf8_buffer[4*SPSS_DOC_LINE_SIZE+1];
    int i;
    for (i=0; i<n_lines; i++) {
        if (io->read(raw_buffer, SPSS_DOC_LINE_SIZE, io->io_ctx) < SPSS_DOC_LINE_SIZE) {
            retval = READSTAT_ERROR_READ;
            goto cleanup;
        }

        retval = readstat_convert(utf8_buffer, sizeof(utf8_buffer),
                raw_buffer, sizeof(raw_buffer), ctx->converter);
        if (retval != READSTAT_OK)
            goto cleanup;

        if (ctx->note_handler(i, utf8_buffer, ctx->user_ctx)) {
            retval = READSTAT_ERROR_USER_ABORT;
            goto cleanup;
        }
    }

cleanup:
    return retval;
}
Пример #12
0
static readstat_error_t sav_process_row(unsigned char *buffer, size_t buffer_len, sav_ctx_t *ctx) {
    readstat_error_t retval = READSTAT_OK;
    double fp_value;
    int offset = 0;
    readstat_off_t data_offset = 0;
    size_t raw_str_used = 0;
    int segment_offset = 0;
    int var_index = 0, col = 0;

    while (data_offset < buffer_len && col < ctx->var_index) {
        spss_varinfo_t *col_info = &ctx->varinfo[col];
        spss_varinfo_t *var_info = &ctx->varinfo[var_index];
        readstat_value_t value = { .type = var_info->type };
        if (offset > 31) {
            retval = READSTAT_ERROR_PARSE;
            goto done;
        }
        if (var_info->type == READSTAT_TYPE_STRING) {
            if (raw_str_used + 8 <= ctx->raw_string_len) {
                memcpy(ctx->raw_string + raw_str_used, &buffer[data_offset], 8);
                raw_str_used += 8;
            }
            if (++offset == col_info->width) {
                if (++segment_offset < var_info->n_segments) {
                    raw_str_used--;
                }
                offset = 0;
                col++;
            }
            if (segment_offset == var_info->n_segments) {
                retval = readstat_convert(ctx->utf8_string, ctx->utf8_string_len, 
                        ctx->raw_string, raw_str_used, ctx->converter);
                if (retval != READSTAT_OK)
                    goto done;
                value.v.string_value = ctx->utf8_string;
                if (ctx->value_handler(ctx->current_row, ctx->variables[var_info->index],
                            value, ctx->user_ctx)) {
                    retval = READSTAT_ERROR_USER_ABORT;
                    goto done;
                }
                raw_str_used = 0;
                segment_offset = 0;
                var_index += var_info->n_segments;
            }
        } else if (var_info->type == READSTAT_TYPE_DOUBLE) {
            memcpy(&fp_value, &buffer[data_offset], 8);
            if (ctx->bswap) {
                fp_value = byteswap_double(fp_value);
            }
            value.v.double_value = fp_value;
            sav_tag_missing_double(&value, ctx);
            if (ctx->value_handler(ctx->current_row, ctx->variables[var_info->index],
                        value, ctx->user_ctx)) {
                retval = READSTAT_ERROR_USER_ABORT;
                goto done;
            }
            var_index += var_info->n_segments;
            col++;
        }
        data_offset += 8;
    }
    ctx->current_row++;
done:
    return retval;
}

static readstat_error_t sav_read_uncompressed_data(sav_ctx_t *ctx) {
    readstat_error_t retval = READSTAT_OK;
    readstat_io_t *io = ctx->io;
    unsigned char *buffer = NULL;
    size_t bytes_read = 0;
    size_t buffer_len = ctx->var_offset * 8;

    buffer = malloc(buffer_len);

    while (ctx->current_row < ctx->row_limit) {
        retval = sav_update_progress(ctx);
        if (retval != READSTAT_OK)
            goto done;

        if ((bytes_read = io->read(buffer, buffer_len, io->io_ctx)) != buffer_len)
            goto done;

        retval = sav_process_row(buffer, buffer_len, ctx);
        if (retval != READSTAT_OK)
            goto done;
    }
done:
    if (buffer)
        free(buffer);

    return retval;
}
Пример #13
0
readstat_error_t readstat_parse_dta(readstat_parser_t *parser, const char *filename, void *user_ctx) {
    readstat_error_t retval = READSTAT_OK;
    int i;
    size_t  record_len = 0;
    int fd = -1;
    char *buf = NULL;
    dta_header_t  header;
    dta_ctx_t    *ctx = NULL;
    char  str_buf[2048];
    char *long_string = NULL;
    size_t file_size = 0;

    if ((fd = readstat_open(filename)) == -1) {
        retval = READSTAT_ERROR_OPEN;
        goto cleanup;
    }

    char magic[4];
    if (read(fd, magic, 4) != 4) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }

    file_size = readstat_lseek(fd, 0, SEEK_END);
    if (file_size == -1) {
        retval = READSTAT_ERROR_SEEK;
        goto cleanup;
    }

    if (readstat_lseek(fd, 0, SEEK_SET) == -1) {
        retval = READSTAT_ERROR_SEEK;
        goto cleanup;
    }

    if (strncmp(magic, "<sta", 4) == 0) {
        retval = dta_read_xmlish_preamble(fd, ctx, &header);
    } else {
        if (read(fd, &header, sizeof(header)) != sizeof(header)) {
            retval = READSTAT_ERROR_READ;
            goto cleanup;
        }
    }

    if ((ctx = dta_ctx_init(header.nvar, header.nobs, header.byteorder, header.ds_format)) == NULL) {
        retval = READSTAT_ERROR_MALLOC;
        goto cleanup;
    }

    ctx->user_ctx = user_ctx;
    ctx->file_size = file_size;
    ctx->progress_handler = parser->progress_handler;

    retval = dta_update_progress(fd, ctx);
    if (retval != READSTAT_OK)
        goto cleanup;
    
    if (parser->info_handler) {
        if (parser->info_handler(ctx->nobs, ctx->nvar, user_ctx)) {
            retval = READSTAT_ERROR_USER_ABORT;
            goto cleanup;
        }
    }
    
    if (ctx->file_is_xmlish) {
        uint16_t label_len = 0;
        unsigned char timestamp_len;

        if ((retval = dta_read_tag(fd, ctx, "<label>")) != READSTAT_OK) {
            goto cleanup;
        }
        
        if (ctx->data_label_len_len == 2) {
            if (read(fd, &label_len, sizeof(uint16_t)) != sizeof(uint16_t)) {
                retval = READSTAT_ERROR_READ;
                goto cleanup;
            }
            label_len = ctx->machine_needs_byte_swap ? byteswap2(label_len) : label_len;
        } else if (ctx->data_label_len_len == 1) {
            unsigned char label_len_char;
            if (read(fd, &label_len_char, sizeof(unsigned char)) != sizeof(unsigned char)) {
                retval = READSTAT_ERROR_READ;
                goto cleanup;
            }
            label_len = label_len_char;
        }
        
        if (readstat_lseek(fd, label_len, SEEK_CUR) == -1) {
            retval = READSTAT_ERROR_SEEK;
            goto cleanup;
        }
        
        if ((retval = dta_read_tag(fd, ctx, "</label>")) != READSTAT_OK) {
            goto cleanup;
        }
        
        if ((retval = dta_read_tag(fd, ctx, "<timestamp>")) != READSTAT_OK) {
            goto cleanup;
        }
        
        if (read(fd, &timestamp_len, 1) != 1) {
            retval = READSTAT_ERROR_READ;
            goto cleanup;
        }
        
        if (readstat_lseek(fd, timestamp_len, SEEK_CUR) == -1) {
            retval = READSTAT_ERROR_SEEK;
            goto cleanup;
        }

        if ((retval = dta_read_tag(fd, ctx, "</timestamp>")) != READSTAT_OK) {
            goto cleanup;
        }
    } else {
        if (readstat_lseek(fd, ctx->data_label_len, SEEK_CUR) == -1) {
            retval = READSTAT_ERROR_SEEK;
            goto cleanup;
        }
        
        if (ctx->time_stamp_len) {
            if (readstat_lseek(fd, ctx->time_stamp_len, SEEK_CUR) == -1) {
                retval = READSTAT_ERROR_SEEK;
                goto cleanup;
            }
        }
    }
    
    if ((retval = dta_read_tag(fd, ctx, "</header>")) != READSTAT_OK) {
        goto cleanup;
    }

    if (dta_read_map(fd, ctx) != READSTAT_OK) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }

    if (dta_read_descriptors(fd, ctx) != READSTAT_OK) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }

    for (i=0; i<ctx->nvar; i++) {
        size_t      max_len;
        readstat_types_t type = dta_type_info(ctx->typlist[i], &max_len, ctx);

        record_len += max_len;

        if (type == READSTAT_TYPE_STRING)
            max_len++; /* might append NULL */

        if (parser->variable_handler) {
            readstat_variable_t *variable = dta_init_variable(ctx, i, type);

            const char *value_labels = NULL;

            if (ctx->lbllist[ctx->lbllist_entry_len*i])
                value_labels = &ctx->lbllist[ctx->lbllist_entry_len*i];

            int cb_retval = parser->variable_handler(i, variable, value_labels, user_ctx);

            free(variable);

            if (cb_retval) {
                retval = READSTAT_ERROR_USER_ABORT;
                goto cleanup;
            }
        }
    }

    if ((retval = dta_skip_expansion_fields(fd, ctx)) != READSTAT_OK) {
        goto cleanup;
    }
    
    if (record_len == 0) {
        retval = READSTAT_ERROR_PARSE;
        goto cleanup;
    }

    if ((retval = dta_read_tag(fd, ctx, "<data>")) != READSTAT_OK) {
        goto cleanup;
    }

    if ((retval = dta_update_progress(fd, ctx)) != READSTAT_OK) {
        goto cleanup;
    }

    if ((buf = malloc(record_len)) == NULL) {
        retval = READSTAT_ERROR_MALLOC;
        goto cleanup;
    }

    for (i=0; i<ctx->nobs; i++) {
        if (read(fd, buf, record_len) != record_len) {
            retval = READSTAT_ERROR_READ;
            goto cleanup;
        }
        int j;
        off_t offset = 0;
        for (j=0; j<ctx->nvar; j++) {
            size_t max_len;
            readstat_value_t value;
            memset(&value, 0, sizeof(readstat_value_t));

            value.type = dta_type_info(ctx->typlist[j], &max_len, ctx);

            if (value.type == READSTAT_TYPE_STRING) {
                readstat_convert(str_buf, sizeof(str_buf), &buf[offset], max_len, ctx->converter);
                value.v.string_value = str_buf;
            } else if (value.type == READSTAT_TYPE_LONG_STRING) {
                uint32_t v, o;
                v = *((uint32_t *)&buf[offset]);
                o = *((uint32_t *)&buf[offset+4]);
                if (ctx->machine_needs_byte_swap) {
                    v = byteswap4(v);
                    o = byteswap4(o);
                }
                if (v > 0 && o > 0) {
                    off_t cur_pos = readstat_lseek(fd, 0, SEEK_CUR);
                    if (cur_pos == -1) {
                        retval = READSTAT_ERROR_SEEK;
                        goto cleanup;
                    }
                    retval = dta_read_long_string(fd, ctx, v, o, &long_string);
                    if (retval != READSTAT_OK) {
                        goto cleanup;
                    }
                    value.v.string_value = long_string;
                    if (readstat_lseek(fd, cur_pos, SEEK_SET) == -1) {
                        retval = READSTAT_ERROR_SEEK;
                        goto cleanup;
                    }
                }
            } else if (value.type == READSTAT_TYPE_CHAR) {
                char byte = buf[offset];
                if (ctx->machine_is_twos_complement) {
                    byte = ones_to_twos_complement1(byte);
                }
                if (byte > DTA_MAX_CHAR) {
                    value.is_system_missing = 1;
                    if (byte > DTA_MISSING_CHAR) {
                        value.tag = 'a' + (byte - DTA_MISSING_CHAR_A);
                    }
                }
                value.v.char_value = byte;
            } else if (value.type == READSTAT_TYPE_INT16) {
                int16_t num = *((int16_t *)&buf[offset]);
                if (ctx->machine_needs_byte_swap) {
                    num = byteswap2(num);
                }
                if (ctx->machine_is_twos_complement) {
                    num = ones_to_twos_complement2(num);
                }
                if (num > DTA_MAX_INT16) {
                    value.is_system_missing = 1;
                    if (num > DTA_MISSING_INT16) {
                        value.tag = 'a' + (num - DTA_MISSING_INT16_A);
                    }
                }
                value.v.i16_value = num;
            } else if (value.type == READSTAT_TYPE_INT32) {
                int32_t num = *((int32_t *)&buf[offset]);
                if (ctx->machine_needs_byte_swap) {
                    num = byteswap4(num);
                }
                if (ctx->machine_is_twos_complement) {
                    num = ones_to_twos_complement4(num);
                }
                if (num > DTA_MAX_INT32) {
                    value.is_system_missing = 1;
                    if (num > DTA_MISSING_INT32) {
                        value.tag = 'a' + (num - DTA_MISSING_INT32_A);
                    }
                }
                value.v.i32_value = num;
            } else if (value.type == READSTAT_TYPE_FLOAT) {
                uint32_t num = *((uint32_t *)&buf[offset]);
                float f_num = NAN;
                if (ctx->machine_needs_byte_swap) {
                    num = byteswap4(num);
                }
                if (num > DTA_MAX_FLOAT) {
                    value.is_system_missing = 1;
                    if (num > DTA_MISSING_FLOAT) {
                        value.tag = 'a' + ((num - DTA_MISSING_FLOAT_A) >> 11);
                    }
                } else {
Пример #14
0
static readstat_error_t sas7bcat_parse_value_labels(const char *value_start, size_t value_labels_len, 
        int label_count_used, int label_count_capacity, const char *name, sas7bcat_ctx_t *ctx) {
    readstat_error_t retval = READSTAT_OK;
    int i;
    const char *lbp1 = value_start;
    uint32_t *value_offset = calloc(label_count_used, sizeof(uint32_t));
    /* Doubles appear to be stored as big-endian, always */
    int bswap_doubles = machine_is_little_endian();
    int is_string = (name[0] == '$');

    /* Pass 1 -- find out the offset of the labels */
    for (i=0; i<label_count_capacity; i++) {
        if (&lbp1[2] - value_start > value_labels_len) {
            retval = READSTAT_ERROR_PARSE;
            goto cleanup;
        }
        if (i<label_count_used) {
            uint32_t label_pos = sas_read4(&lbp1[10+ctx->pad1], ctx->bswap);
            if (label_pos >= label_count_used) {
                retval = READSTAT_ERROR_PARSE;
                goto cleanup;
            }
            value_offset[label_pos] = lbp1 - value_start;
        }
        lbp1 += 6 + lbp1[2];
    }

    const char *lbp2 = lbp1;

    /* Pass 2 -- parse pairs of values & labels */
    for (i=0; i<label_count_used && i<label_count_capacity; i++) {
        lbp1 = value_start + value_offset[i];

        if (&lbp1[30] - value_start > value_labels_len ||
                &lbp2[10] - value_start > value_labels_len) {
            retval = READSTAT_ERROR_PARSE;
            goto cleanup;
        }
        size_t label_len = sas_read2(&lbp2[8], ctx->bswap);
        size_t value_entry_len = 6 + lbp1[2];
        const char *label = &lbp2[10];
        readstat_value_t value = { .type = is_string ? READSTAT_TYPE_STRING : READSTAT_TYPE_DOUBLE };
        if (is_string) {
            char val[4*16+1];
            retval = readstat_convert(val, sizeof(val), &lbp1[value_entry_len-16], 16, ctx->converter);
            if (retval != READSTAT_OK)
                goto cleanup;

            value.v.string_value = val;
        } else {
            uint64_t val = sas_read8(&lbp1[22], bswap_doubles);
            double dval = NAN;
            if ((val | 0xFF0000000000) == 0xFFFFFFFFFFFF) {
                value.tag = (val >> 40);
                if (value.tag) {
                    value.is_tagged_missing = 1;
                } else {
                    value.is_system_missing = 1;
                }
            } else {
                memcpy(&dval, &val, 8);
                dval *= -1.0;
            }

            value.v.double_value = dval;
        }
        if (ctx->value_label_handler) {
            if (ctx->value_label_handler(name, value, label, ctx->user_ctx) != READSTAT_HANDLER_OK) {
                retval = READSTAT_ERROR_USER_ABORT;
                goto cleanup;
            }
        }

        lbp2 += 8 + 2 + label_len + 1;
    }
Пример #15
0
readstat_error_t readstat_parse_sas7bcat(readstat_parser_t *parser, const char *path, void *user_ctx) {
    readstat_error_t retval = READSTAT_OK;
    readstat_io_t *io = parser->io;
    int64_t i;
    char *page = NULL;
    char *buffer = NULL;

    sas7bcat_ctx_t *ctx = calloc(1, sizeof(sas7bcat_ctx_t));
    sas_header_info_t *hinfo = calloc(1, sizeof(sas_header_info_t));

    ctx->block_pointers = malloc((ctx->block_pointers_capacity = 200) * sizeof(uint64_t));

    ctx->value_label_handler = parser->value_label_handler;
    ctx->metadata_handler = parser->metadata_handler;
    ctx->input_encoding = parser->input_encoding;
    ctx->output_encoding = parser->output_encoding;
    ctx->user_ctx = user_ctx;
    ctx->io = io;

    if (io->open(path, io->io_ctx) == -1) {
        retval = READSTAT_ERROR_OPEN;
        goto cleanup;
    }

    if ((retval = sas_read_header(io, hinfo, parser->error_handler, user_ctx)) != READSTAT_OK) {
        goto cleanup;
    }

    ctx->u64 = hinfo->u64;
    ctx->pad1 = hinfo->pad1;
    ctx->bswap = machine_is_little_endian() ^ hinfo->little_endian;
    ctx->header_size = hinfo->header_size;
    ctx->page_count = hinfo->page_count;
    ctx->page_size = hinfo->page_size;
    if (ctx->input_encoding == NULL) {
        ctx->input_encoding = hinfo->encoding;
    }

    if (ctx->u64) {
        retval = READSTAT_ERROR_PARSE;
        goto cleanup;
    }

    if (ctx->input_encoding && ctx->output_encoding && strcmp(ctx->input_encoding, ctx->output_encoding) != 0) {
        iconv_t converter = iconv_open(ctx->output_encoding, ctx->input_encoding);
        if (converter == (iconv_t)-1) {
            retval = READSTAT_ERROR_UNSUPPORTED_CHARSET;
            goto cleanup;
        }
        ctx->converter = converter;
    }

    if (parser->metadata_handler) {
        char file_label[4*64+1];
        retval = readstat_convert(file_label, sizeof(file_label), 
                hinfo->file_label, sizeof(hinfo->file_label), ctx->converter);
        if (retval != READSTAT_OK)
            goto cleanup;

        if (ctx->metadata_handler(file_label, hinfo->modification_time, 
                    10000 * hinfo->major_version + hinfo->minor_version, ctx->user_ctx) != READSTAT_HANDLER_OK) {
            retval = READSTAT_ERROR_USER_ABORT;
            goto cleanup;
        }
    }

    if ((page = malloc(ctx->page_size)) == NULL) {
        retval = READSTAT_ERROR_MALLOC;
        goto cleanup;
    }
    if (io->seek(ctx->header_size+SAS_CATALOG_FIRST_INDEX_PAGE*ctx->page_size, READSTAT_SEEK_SET, io->io_ctx) == -1) {
        retval = READSTAT_ERROR_SEEK;
        goto cleanup;
    }
    if (io->read(page, ctx->page_size, io->io_ctx) < ctx->page_size) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }

    sas7bcat_augment_index(&page[856+2*ctx->pad1], ctx->page_size - 856 - 2*ctx->pad1, ctx);

    // Pass 1 -- find the XLSR entries
    for (i=SAS_CATALOG_USELESS_PAGES; i<ctx->page_count; i++) {
        if (io->seek(ctx->header_size+i*ctx->page_size, READSTAT_SEEK_SET, io->io_ctx) == -1) {
            retval = READSTAT_ERROR_SEEK;
            goto cleanup;
        }
        if (io->read(page, ctx->page_size, io->io_ctx) < ctx->page_size) {
            retval = READSTAT_ERROR_READ;
            goto cleanup;
        }
        if (memcmp(&page[16], "XLSR", sizeof("XLSR")-1) == 0) {
            sas7bcat_augment_index(&page[16], ctx->page_size - 16, ctx);
        }
    }

    sas7bcat_sort_index(ctx);
    sas7bcat_uniq_index(ctx);

    // Pass 2 -- look up the individual block pointers
    for (i=0; i<ctx->block_pointers_used; i++) {
        int start_page = ctx->block_pointers[i] >> 32;
        int start_page_pos = (ctx->block_pointers[i]) & 0xFFFF;

        int buffer_len = sas7bcat_block_size(start_page, start_page_pos, ctx, &retval);
        if (buffer_len == -1) {
            goto cleanup;
        } else if (buffer_len == 0) {
            continue;
        }
        buffer = realloc(buffer, buffer_len);
        if ((retval = sas7bcat_read_block(buffer, buffer_len, start_page, start_page_pos, ctx)) != READSTAT_OK)
            goto cleanup;
        if ((retval = sas7bcat_parse_block(buffer, buffer_len, ctx)) != READSTAT_OK)
            goto cleanup;
    }

cleanup:
    io->close(io->io_ctx);
    if (page)
        free(page);
    if (buffer)
        free(buffer);
    if (ctx)
        sas7bcat_ctx_free(ctx);
    if (hinfo)
        free(hinfo);

    return retval;
}
Пример #16
0
static readstat_error_t xport_read_variables(xport_ctx_t *ctx) {
    int i;
    readstat_error_t retval = READSTAT_OK;
    for (i=0; i<ctx->var_count; i++) {
        xport_namestr_t namestr;
        ssize_t bytes_read = read_bytes(ctx, &namestr, sizeof(xport_namestr_t));
        if (bytes_read < sizeof(xport_namestr_t)) {
            retval = READSTAT_ERROR_READ;
            goto cleanup;
        }
        xport_namestr_bswap(&namestr);

        readstat_variable_t *variable = calloc(1, sizeof(readstat_variable_t));

        variable->index = i;
        variable->type = namestr.ntype == SAS_COLUMN_TYPE_CHR ? READSTAT_TYPE_STRING : READSTAT_TYPE_DOUBLE;
        variable->storage_width = namestr.nlng;
        variable->display_width = namestr.nfl;
        variable->decimals = namestr.nfd;
        variable->alignment = namestr.nfj ? READSTAT_ALIGNMENT_RIGHT : READSTAT_ALIGNMENT_LEFT;

        readstat_convert(variable->name, sizeof(variable->name),
                namestr.nname, sizeof(namestr.nname), NULL);
        if (retval != READSTAT_OK)
            goto cleanup;

        readstat_convert(variable->label, sizeof(variable->label),
                namestr.nlabel, sizeof(namestr.nlabel), NULL);
        if (retval != READSTAT_OK)
            goto cleanup;

        xport_construct_format(variable->format, sizeof(variable->format),
                namestr.nform, sizeof(namestr.nform),
                variable->display_width, variable->decimals);
        if (retval != READSTAT_OK)
            goto cleanup;

        ctx->variables[i] = variable;
    }

    retval = xport_skip_rest_of_record(ctx);
    if (retval != READSTAT_OK)
        goto cleanup;

    if (ctx->version == 5) {
        retval = xport_read_obs_header_record(ctx);
        if (retval != READSTAT_OK)
            goto cleanup;
    } else {
        xport_header_record_t xrecord;
        retval = xport_read_header_record(ctx, &xrecord);
        if (retval != READSTAT_OK)
            goto cleanup;

        if (strcmp(xrecord.name, "OBSV8") == 0) {
            /* void */
        } else if (strcmp(xrecord.name, "LABELV8") == 0) {
            retval = xport_read_labels_v8(ctx, xrecord.num1);
        } else if (strcmp(xrecord.name, "LABELV9") == 0) {
            retval = xport_read_labels_v9(ctx, xrecord.num1);
        }
        if (retval != READSTAT_OK)
            goto cleanup;
    }

    ctx->row_length = 0;

    int index_after_skipping = 0;

    for (i=0; i<ctx->var_count; i++) {
        readstat_variable_t *variable = ctx->variables[i];
        variable->index_after_skipping = index_after_skipping;
        
        int cb_retval = READSTAT_HANDLER_OK;
        if (ctx->variable_handler) {
            cb_retval = ctx->variable_handler(i, variable, variable->format, ctx->user_ctx);
        }
        if (cb_retval == READSTAT_HANDLER_ABORT) {
            retval = READSTAT_ERROR_USER_ABORT;
            goto cleanup;
        }
        if (cb_retval == READSTAT_HANDLER_SKIP_VARIABLE) {
            variable->skip = 1;
        } else {
            index_after_skipping++;
        }

        ctx->row_length += variable->storage_width;
    }

cleanup:
    return retval;
}
Пример #17
0
static readstat_error_t xport_process_row(xport_ctx_t *ctx, const char *row, size_t row_length) {
    readstat_error_t retval = READSTAT_OK;
    int i;
    off_t pos = 0;
    char *string = NULL;
    for (i=0; i<ctx->var_count; i++) {
        readstat_variable_t *variable = ctx->variables[i];
        readstat_value_t value = { .type = variable->type };

        if (variable->type == READSTAT_TYPE_STRING) {
            string = realloc(string, 4*variable->storage_width+1);
            retval = readstat_convert(string, 4*variable->storage_width+1,
                    &row[pos], variable->storage_width, NULL);
            if (retval != READSTAT_OK)
                goto cleanup;

            value.v.string_value = string;
        } else {
            double dval = NAN;
            if (variable->storage_width <= XPORT_MAX_DOUBLE_SIZE &&
                    variable->storage_width >= XPORT_MIN_DOUBLE_SIZE) {
                char full_value[8] = { 0 };
                if (memcmp(&full_value[1], &row[pos+1], variable->storage_width - 1) == 0 &&
                        (row[pos] == '_' || row[pos] == '.' || (row[pos] >= 'A' && row[pos] <= 'Z'))) {
                    if (row[pos] == '.') {
                        value.is_system_missing = 1;
                    } else {
                        value.tag = row[pos];
                        value.is_tagged_missing = 1;
                    }
                } else {
                    memcpy(full_value, &row[pos], variable->storage_width);
                    int rc = cnxptiee(full_value, CN_TYPE_XPORT, &dval, CN_TYPE_NATIVE);
                    if (rc != 0) {
                        retval = READSTAT_ERROR_CONVERT;
                        goto cleanup;
                    }
                }
            }

            value.v.double_value = dval;
        }
        pos += variable->storage_width;

        if (ctx->value_handler(ctx->parsed_row_count, variable, value, ctx->user_ctx) != READSTAT_HANDLER_OK) {
            retval = READSTAT_ERROR_USER_ABORT;
            goto cleanup;
        }
    }

cleanup:
    free(string);
    return retval;
}

static readstat_error_t xport_read_data(xport_ctx_t *ctx) {
    if (!ctx->row_length)
        return READSTAT_OK;

    if (!ctx->value_handler)
        return READSTAT_OK;

    readstat_error_t retval = READSTAT_OK;
    char *row = malloc(ctx->row_length);
    char *blank_row = malloc(ctx->row_length);
    memset(blank_row, ' ', ctx->row_length);
    int num_blank_rows = 0;
    while (1) {
        ssize_t bytes_read = read_bytes(ctx, row, ctx->row_length);
        if (bytes_read == -1) {
            retval = READSTAT_ERROR_READ;
            goto cleanup;
        } else if (bytes_read < ctx->row_length) {
            break;
        }

        off_t pos = 0;

        int row_is_blank = 1;

        for (pos=0; pos<ctx->row_length; pos++) {
            if (row[pos] != ' ') {
                row_is_blank = 0;
                break;
            }
        }

        if (row_is_blank) {
            num_blank_rows++;
            continue;
        }

        while (num_blank_rows) {
            retval = xport_process_row(ctx, blank_row, ctx->row_length);
            if (retval != READSTAT_OK)
                goto cleanup;

            if (++(ctx->parsed_row_count) == ctx->row_limit)
                goto cleanup;

            num_blank_rows--;
        }

        retval = xport_process_row(ctx, row, ctx->row_length);
        if (retval != READSTAT_OK)
            goto cleanup;

        retval = xport_update_progress(ctx);
        if (retval != READSTAT_OK)
            goto cleanup;

        if (++(ctx->parsed_row_count) == ctx->row_limit)
            break;
    }

cleanup:
    free(row);
    return retval;
}

readstat_error_t readstat_parse_xport(readstat_parser_t *parser, const char *path, void *user_ctx) {
    readstat_error_t retval = READSTAT_OK;
    readstat_io_t *io = parser->io;

    xport_ctx_t *ctx = xport_ctx_init();
    ctx->info_handler = parser->info_handler;
    ctx->metadata_handler = parser->metadata_handler;
    ctx->note_handler = parser->note_handler;
    ctx->variable_handler = parser->variable_handler;
    ctx->value_handler = parser->value_handler;
    ctx->value_label_handler = parser->value_label_handler;
    ctx->error_handler = parser->error_handler;
    ctx->progress_handler = parser->progress_handler;
    ctx->user_ctx = user_ctx;
    ctx->io = io;
    ctx->row_limit = parser->row_limit;

    if (io->open(path, io->io_ctx) == -1) {
        retval = READSTAT_ERROR_OPEN;
        goto cleanup;
    }

    if ((ctx->file_size = io->seek(0, READSTAT_SEEK_END, io->io_ctx)) == -1) {
        retval = READSTAT_ERROR_SEEK;
        goto cleanup;
    }

    if (io->seek(0, READSTAT_SEEK_SET, io->io_ctx) == -1) {
        retval = READSTAT_ERROR_SEEK;
        goto cleanup;
    }

    retval = xport_read_library_record(ctx);
    if (retval != READSTAT_OK)
        goto cleanup;

    retval = xport_skip_record(ctx);
    if (retval != READSTAT_OK)
        goto cleanup;

    retval = xport_read_timestamp_record(ctx);
    if (retval != READSTAT_OK)
        goto cleanup;

    retval = xport_expect_header_record(ctx, "MEMBER", "MEMBV8");
    if (retval != READSTAT_OK)
        goto cleanup;

    retval = xport_expect_header_record(ctx, "DSCRPTR", "DSCPTV8");
    if (retval != READSTAT_OK)
        goto cleanup;

    retval = xport_skip_record(ctx);
    if (retval != READSTAT_OK)
        goto cleanup;

    retval = xport_read_file_label_record(ctx);
    if (retval != READSTAT_OK)
        goto cleanup;

    retval = xport_read_namestr_header_record(ctx);
    if (retval != READSTAT_OK)
        goto cleanup;

    retval = xport_read_variables(ctx);
    if (retval != READSTAT_OK)
        goto cleanup;

    if (ctx->row_length) {
        retval = xport_read_data(ctx);
        if (retval != READSTAT_OK)
            goto cleanup;
    }

cleanup:
    io->close(io->io_ctx);
    xport_ctx_free(ctx);

    return retval;
}
Пример #18
0
readstat_error_t sav_parse_timestamp(sav_ctx_t *ctx, sav_file_header_record_t *header) {
    readstat_error_t retval = READSTAT_OK;
    struct tm timestamp = { .tm_isdst = -1 };

    if ((retval = sav_parse_time(header->creation_time, sizeof(header->creation_time), &timestamp, ctx)) 
            != READSTAT_OK)
        goto cleanup;

    if ((retval = sav_parse_date(header->creation_date, sizeof(header->creation_date), &timestamp, ctx)) 
            != READSTAT_OK)
        goto cleanup;

    ctx->timestamp = mktime(&timestamp);

cleanup:
    return retval;
}

readstat_error_t readstat_parse_sav(readstat_parser_t *parser, const char *path, void *user_ctx) {
    readstat_error_t retval = READSTAT_OK;
    readstat_io_t *io = parser->io;
    sav_file_header_record_t header;
    sav_ctx_t *ctx = NULL;
    size_t file_size = 0;
    
    if (io->open(path, io->io_ctx) == -1) {
        return READSTAT_ERROR_OPEN;
    }

    file_size = io->seek(0, READSTAT_SEEK_END, io->io_ctx);
    if (file_size == -1) {
        retval = READSTAT_ERROR_SEEK;
        goto cleanup;
    }

    if (io->seek(0, READSTAT_SEEK_SET, io->io_ctx) == -1) {
        retval = READSTAT_ERROR_SEEK;
        goto cleanup;
    }

    if (io->read(&header, sizeof(sav_file_header_record_t), io->io_ctx) < sizeof(sav_file_header_record_t)) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }

    ctx = sav_ctx_init(&header, io);
    if (ctx == NULL) {
        retval = READSTAT_ERROR_PARSE;
        goto cleanup;
    }

    ctx->progress_handler = parser->progress_handler;
    ctx->error_handler = parser->error_handler;
    ctx->note_handler = parser->note_handler;
    ctx->value_handler = parser->value_handler;
    ctx->value_label_handler = parser->value_label_handler;
    ctx->input_encoding = parser->input_encoding;
    ctx->output_encoding = parser->output_encoding;
    ctx->user_ctx = user_ctx;
    ctx->file_size = file_size;
    if (ctx->record_count == -1 ||
            (parser->row_limit > 0 && parser->row_limit < ctx->record_count)) {
        ctx->row_limit = parser->row_limit;
    } else {
        ctx->row_limit = ctx->record_count;
    }
    
    if ((retval = sav_parse_timestamp(ctx, &header)) != READSTAT_OK)
        goto cleanup;

    if ((retval = sav_parse_records_pass1(ctx)) != READSTAT_OK)
        goto cleanup;
    
    if (io->seek(sizeof(sav_file_header_record_t), READSTAT_SEEK_SET, io->io_ctx) == -1) {
        retval = READSTAT_ERROR_SEEK;
        goto cleanup;
    }

    if ((retval = sav_update_progress(ctx)) != READSTAT_OK)
        goto cleanup;

    if ((retval = sav_parse_records_pass2(ctx)) != READSTAT_OK)
        goto cleanup;
 
    sav_set_n_segments_and_var_count(ctx);

    if (parser->info_handler) {
        if (parser->info_handler(ctx->record_count == -1 ? -1 : ctx->row_limit,
                    ctx->var_count, ctx->user_ctx)) {
            retval = READSTAT_ERROR_USER_ABORT;
            goto cleanup;
        }
    }

    if (parser->metadata_handler) {
        if ((retval = readstat_convert(ctx->file_label, sizeof(ctx->file_label),
                        header.file_label, sizeof(header.file_label), ctx->converter)) != READSTAT_OK)
            goto cleanup;

        if (parser->metadata_handler(ctx->file_label, ctx->timestamp, 2, ctx->user_ctx)) {
            retval = READSTAT_ERROR_USER_ABORT;
            goto cleanup;
        }
    }

    sav_parse_variable_display_parameter_record(ctx);

    if ((retval = sav_handle_variables(parser, ctx)) != READSTAT_OK)
        goto cleanup;

    if ((retval = sav_handle_fweight(parser, ctx)) != READSTAT_OK)
        goto cleanup;

    if (ctx->value_handler) {
        retval = sav_read_data(ctx);
    }
    
cleanup:
    io->close(io->io_ctx);
    if (ctx)
        sav_ctx_free(ctx);
    
    return retval;
}
Пример #19
0
static readstat_error_t dta_handle_rows(dta_ctx_t *ctx) {
    readstat_io_t *io = ctx->io;
    char *buf = NULL;
    char  str_buf[2048];
    int i;
    readstat_error_t retval = READSTAT_OK;

    if ((buf = malloc(ctx->record_len)) == NULL) {
        retval = READSTAT_ERROR_MALLOC;
        goto cleanup;
    }

    for (i=0; i<ctx->row_limit; i++) {
        if (io->read(buf, ctx->record_len, io->io_ctx) != ctx->record_len) {
            retval = READSTAT_ERROR_READ;
            goto cleanup;
        }
        int j;
        off_t offset = 0;
        for (j=0; j<ctx->nvar; j++) {
            size_t max_len;
            readstat_value_t value;
            memset(&value, 0, sizeof(readstat_value_t));

            value.type = dta_type_info(ctx->typlist[j], &max_len, ctx);

            if (value.type == READSTAT_TYPE_STRING) {
                readstat_convert(str_buf, sizeof(str_buf), &buf[offset], max_len, ctx->converter);
                value.v.string_value = str_buf;
            } else if (value.type == READSTAT_TYPE_STRING_REF) {
                dta_strl_t key;
                dta_interpret_strl_vo_bytes(ctx, (unsigned char *)&buf[offset], &key);

                dta_strl_t **found = bsearch(&key, ctx->strls, ctx->strls_count, sizeof(dta_strl_t *), &dta_compare_strls);

                if (found) {
                    value.v.string_value = (*found)->data;
                }
                value.type = READSTAT_TYPE_STRING;
            } else if (value.type == READSTAT_TYPE_INT8) {
                int8_t byte = buf[offset];
                if (ctx->machine_is_twos_complement) {
                    byte = ones_to_twos_complement1(byte);
                }
                if (byte > ctx->max_int8) {
                    if (ctx->supports_tagged_missing && byte > DTA_113_MISSING_INT8) {
                        value.tag = 'a' + (byte - DTA_113_MISSING_INT8_A);
                        value.is_tagged_missing = 1;
                    } else {
                        value.is_system_missing = 1;
                    }
                }
                value.v.i8_value = byte;
            } else if (value.type == READSTAT_TYPE_INT16) {
                int16_t num = *((int16_t *)&buf[offset]);
                if (ctx->machine_needs_byte_swap) {
                    num = byteswap2(num);
                }
                if (ctx->machine_is_twos_complement) {
                    num = ones_to_twos_complement2(num);
                }
                if (num > ctx->max_int16) {
                    if (ctx->supports_tagged_missing && num > DTA_113_MISSING_INT16) {
                        value.tag = 'a' + (num - DTA_113_MISSING_INT16_A);
                        value.is_tagged_missing = 1;
                    } else {
                        value.is_system_missing = 1;
                    }
                }
                value.v.i16_value = num;
            } else if (value.type == READSTAT_TYPE_INT32) {
                int32_t num = *((int32_t *)&buf[offset]);
                if (ctx->machine_needs_byte_swap) {
                    num = byteswap4(num);
                }
                if (ctx->machine_is_twos_complement) {
                    num = ones_to_twos_complement4(num);
                }
                if (num > ctx->max_int32) {
                    if (ctx->supports_tagged_missing && num > DTA_113_MISSING_INT32) {
                        value.tag = 'a' + (num - DTA_113_MISSING_INT32_A);
                        value.is_tagged_missing = 1;
                    } else {
                        value.is_system_missing = 1;
                    }
                }
                value.v.i32_value = num;
            } else if (value.type == READSTAT_TYPE_FLOAT) {
                int32_t num = *((int32_t *)&buf[offset]);
                float f_num = NAN;
                if (ctx->machine_needs_byte_swap) {
                    num = byteswap4(num);
                }
                if (num > ctx->max_float) {
                    if (ctx->supports_tagged_missing && num > DTA_113_MISSING_FLOAT) {
                        value.tag = 'a' + ((num - DTA_113_MISSING_FLOAT_A) >> 11);
                        value.is_tagged_missing = 1;
                    } else {
                        value.is_system_missing = 1;
                    }
                } else {
Пример #20
0
static readstat_error_t sav_read_variable_record(sav_ctx_t *ctx) {
    readstat_io_t *io = ctx->io;
    sav_variable_record_t variable;
    readstat_error_t retval = READSTAT_OK;
    if (ctx->var_index == ctx->varinfo_capacity) {
        if ((ctx->varinfo = realloc(ctx->varinfo, (ctx->varinfo_capacity *= 2) * sizeof(spss_varinfo_t))) == NULL) {
            retval = READSTAT_ERROR_MALLOC;
            goto cleanup;
        }
    }
    if (io->read(&variable, sizeof(sav_variable_record_t), io->io_ctx) < sizeof(sav_variable_record_t)) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }
    variable.print = ctx->bswap ? byteswap4(variable.print) : variable.print;
    variable.write = ctx->bswap ? byteswap4(variable.write) : variable.write;

    readstat_type_t dta_type = READSTAT_TYPE_DOUBLE;
    int32_t type = ctx->bswap ? byteswap4(variable.type) : variable.type;
    int i;
    if (type < 0) {
        if (ctx->var_index == 0) {
            return READSTAT_ERROR_PARSE;
        }
        ctx->var_offset++;
        spss_varinfo_t *prev = &ctx->varinfo[ctx->var_index-1];
        prev->width++;
        return 0;
    }
    if (type > 0) {
        dta_type = READSTAT_TYPE_STRING;
        // len = type;
    }
    spss_varinfo_t *info = &ctx->varinfo[ctx->var_index];
    memset(info, 0, sizeof(spss_varinfo_t));
    info->width = 1;
    info->n_segments = 1;
    info->index = ctx->var_index;
    info->offset = ctx->var_offset;
    info->type = dta_type;

    retval = readstat_convert(info->name, sizeof(info->name),
            variable.name, sizeof(variable.name), ctx->converter);
    if (retval != READSTAT_OK)
        goto cleanup;

    retval = readstat_convert(info->longname, sizeof(info->longname), 
            variable.name, sizeof(variable.name), ctx->converter);
    if (retval != READSTAT_OK)
        goto cleanup;

    info->print_format.decimal_places = (variable.print & 0x000000FF);
    info->print_format.width = (variable.print & 0x0000FF00) >> 8;
    info->print_format.type = (variable.print  & 0x00FF0000) >> 16;

    info->write_format.decimal_places = (variable.write & 0x000000FF);
    info->write_format.width = (variable.write & 0x0000FF00) >> 8;
    info->write_format.type = (variable.write  & 0x00FF0000) >> 16;
    
    if (variable.has_var_label) {
        int32_t label_len;
        if (io->read(&label_len, sizeof(int32_t), io->io_ctx) < sizeof(int32_t)) {
            retval = READSTAT_ERROR_READ;
            goto cleanup;
        }
        label_len = ctx->bswap ? byteswap4(label_len) : label_len;
        int32_t label_capacity = (label_len + 3) / 4 * 4;
        char *label_buf = malloc(label_capacity);
        size_t out_label_len = label_len*4+1;
        info->label = malloc(out_label_len);
        if (label_buf == NULL || info->label == NULL) {
            retval = READSTAT_ERROR_MALLOC;
            goto cleanup;
        }
        if (io->read(label_buf, label_capacity, io->io_ctx) < label_capacity) {
            retval = READSTAT_ERROR_READ;
            free(label_buf);
            free(info->label);
            info->label = NULL;
            goto cleanup;
        }
        retval = readstat_convert(info->label, out_label_len, label_buf, label_len, ctx->converter);
        free(label_buf);
        if (retval != READSTAT_OK)
            goto cleanup;
    }
    
    ctx->varinfo[ctx->var_index].labels_index = -1;
    
    if (variable.n_missing_values) {
        info->n_missing_values = ctx->bswap ? byteswap4(variable.n_missing_values) : variable.n_missing_values;
        if (info->n_missing_values < 0) {
            info->missing_range = 1;
            info->n_missing_values = abs(info->n_missing_values);
        } else {
            info->missing_range = 0;
        }
        if (info->n_missing_values > 3) {
            retval = READSTAT_ERROR_PARSE;
            goto cleanup;
        }
        if (io->read(info->missing_values, info->n_missing_values * sizeof(double), io->io_ctx) < info->n_missing_values * sizeof(double)) {
            retval = READSTAT_ERROR_READ;
            goto cleanup;
        }
        for (i=0; i<info->n_missing_values; i++) {
            if (ctx->bswap) {
                info->missing_values[i] = byteswap_double(info->missing_values[i]);
            }

            uint64_t long_value = 0;
            memcpy(&long_value, &info->missing_values[i], 8);

            if (long_value == ctx->missing_double)
                info->missing_values[i] = NAN;
            if (long_value == ctx->lowest_double)
                info->missing_values[i] = -HUGE_VAL;
            if (long_value == ctx->highest_double)
                info->missing_values[i] = HUGE_VAL;
        }
    }
    
    ctx->var_index++;
    ctx->var_offset++;
    
cleanup:
    
    return retval;
}
Пример #21
0
static readstat_error_t sav_parse_long_value_labels_record(const void *data, size_t data_len, sav_ctx_t *ctx) {
    if (!ctx->value_label_handler)
        return READSTAT_OK;

    readstat_error_t retval = READSTAT_OK;
    int32_t label_name_len = 0;
    int32_t label_count = 0;
    int32_t i = 0;
    const char *data_ptr = data;
    const char *data_end = data_ptr + data_len;
    char var_name_buf[256*4+1];
    char label_name_buf[256];
    char *value_buffer = NULL;
    char *label_buffer = NULL;
    
    memset(label_name_buf, '\0', sizeof(label_name_buf));

    if (data_ptr + sizeof(int32_t) > data_end) {
        retval = READSTAT_ERROR_PARSE;
        goto cleanup;
    }

    memcpy(&label_name_len, data_ptr, sizeof(int32_t));
    if (ctx->bswap)
        label_name_len = byteswap4(label_name_len);

    data_ptr += sizeof(int32_t);

    if (data_ptr + label_name_len > data_end) {
        retval = READSTAT_ERROR_PARSE;
        goto cleanup;
    }

    retval = readstat_convert(var_name_buf, sizeof(var_name_buf), data_ptr, label_name_len, ctx->converter);
    if (retval != READSTAT_OK)
        goto cleanup;

    data_ptr += label_name_len;

    for (i=0; i<ctx->var_index;) {
        spss_varinfo_t *info = &ctx->varinfo[i];
        if (strcmp(var_name_buf, info->longname) == 0) {
            info->labels_index = ctx->value_labels_count++;
            snprintf(label_name_buf, sizeof(label_name_buf),
                     SAV_LABEL_NAME_PREFIX "%d", info->labels_index);
            break;
        }
        i += info->n_segments;
    }
    
    if (label_name_buf[0] == '\0') {
        retval = READSTAT_ERROR_PARSE;
        goto cleanup;
    }

    data_ptr += sizeof(int32_t);

    if (data_ptr + sizeof(int32_t) > data_end) {
        retval = READSTAT_ERROR_PARSE;
        goto cleanup;
    }

    memcpy(&label_count, data_ptr, sizeof(int32_t));
    if (ctx->bswap)
        label_count = byteswap4(label_count);

    data_ptr += sizeof(int32_t);

    for (i=0; i<label_count; i++) {
        int32_t value_len = 0, label_len = 0;
        int32_t value_buffer_len = 0, label_buffer_len = 0;

        if (data_ptr + sizeof(int32_t) > data_end) {
            retval = READSTAT_ERROR_PARSE;
            goto cleanup;
        }

        memcpy(&value_len, data_ptr, sizeof(int32_t));
        if (ctx->bswap)
            value_len = byteswap4(value_len);

        data_ptr += sizeof(int32_t);

        value_buffer_len = value_len*4+1;
        value_buffer = realloc(value_buffer, value_buffer_len);
        if (value_buffer == NULL) {
            retval = READSTAT_ERROR_MALLOC;
            goto cleanup;
        }

        if (data_ptr + value_len > data_end) {
            retval = READSTAT_ERROR_PARSE;
            goto cleanup;
        }

        retval = readstat_convert(value_buffer, value_buffer_len, data_ptr, value_len, ctx->converter);
        if (retval != READSTAT_OK)
            goto cleanup;

        data_ptr += value_len;

        if (data_ptr + sizeof(int32_t) > data_end) {
            retval = READSTAT_ERROR_PARSE;
            goto cleanup;
        }

        memcpy(&label_len, data_ptr, sizeof(int32_t));
        if (ctx->bswap)
            label_len = byteswap4(label_len);

        data_ptr += sizeof(int32_t);

        label_buffer_len = label_len*4+1;
        label_buffer = realloc(label_buffer, label_buffer_len);
        if (label_buffer == NULL) {
            retval = READSTAT_ERROR_MALLOC;
            goto cleanup;
        }

        if (data_ptr + label_len > data_end) {
            retval = READSTAT_ERROR_PARSE;
            goto cleanup;
        }

        retval = readstat_convert(label_buffer, label_buffer_len, data_ptr, label_len, ctx->converter);
        if (retval != READSTAT_OK)
            goto cleanup;

        data_ptr += label_len;

        readstat_value_t value = { .type = READSTAT_TYPE_STRING };
        value.v.string_value = value_buffer;

        ctx->value_label_handler(label_name_buf, value, label_buffer, ctx->user_ctx);
    }

cleanup:
    if (value_buffer)
        free(value_buffer);
    if (label_buffer)
        free(label_buffer);
    return retval;
}
Пример #22
0
static readstat_error_t sav_read_value_label_record(sav_ctx_t *ctx) {
    int32_t label_count;
    readstat_error_t retval = READSTAT_OK;
    readstat_io_t *io = ctx->io;
    int32_t *vars = NULL;
    int32_t rec_type;
    int32_t var_count;
    readstat_type_t value_type = READSTAT_TYPE_STRING;
    char label_buf[256];
    value_label_t *value_labels = NULL;

    if (io->read(&label_count, sizeof(int32_t), io->io_ctx) < sizeof(int32_t)) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }
    if (ctx->bswap)
        label_count = byteswap4(label_count);
    
    if ((value_labels = malloc(label_count * sizeof(value_label_t))) == NULL) {
        retval = READSTAT_ERROR_MALLOC;
        goto cleanup;
    }
    
    int i;
    for (i=0; i<label_count; i++) {
        value_label_t *vlabel = &value_labels[i];
        if (io->read(vlabel, 9, io->io_ctx) < 9) {
            retval = READSTAT_ERROR_READ;
            goto cleanup;
        }
        size_t label_len = (vlabel->label_len + 8) / 8 * 8 - 1;
        if (io->read(label_buf, label_len, io->io_ctx) < label_len) {
            retval = READSTAT_ERROR_READ;
            goto cleanup;
        }
        retval = readstat_convert(vlabel->label, sizeof(vlabel->label), label_buf, label_len, ctx->converter);
        if (retval != READSTAT_OK)
            goto cleanup;
    }

    if (io->read(&rec_type, sizeof(int32_t), io->io_ctx) < sizeof(int32_t)) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }
    if (ctx->bswap)
        rec_type = byteswap4(rec_type);
    
    if (rec_type != 4) {
        retval = READSTAT_ERROR_PARSE;
        goto cleanup;
    }
    if (io->read(&var_count, sizeof(int32_t), io->io_ctx) < sizeof(int32_t)) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }
    if (ctx->bswap)
        var_count = byteswap4(var_count);
    
    if ((vars = malloc(var_count * sizeof(int32_t))) == NULL) {
        retval = READSTAT_ERROR_MALLOC;
        goto cleanup;
    }
    if (io->read(vars, var_count * sizeof(int32_t), io->io_ctx) < var_count * sizeof(int32_t)) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }
    for (i=0; i<var_count; i++) {
        int var_offset = vars[i];
        if (ctx->bswap)
            var_offset = byteswap4(var_offset);

        var_offset--; // Why subtract 1????
        spss_varinfo_t *var = bsearch(&var_offset, ctx->varinfo, ctx->var_index, sizeof(spss_varinfo_t),
                &spss_varinfo_compare);
        if (var) {
            var->labels_index = ctx->value_labels_count;

            value_type = var->type;
        }
    }
    if (ctx->value_label_handler) {
        sav_submit_value_labels(value_labels, label_count, value_type, ctx);
    }
    ctx->value_labels_count++;
cleanup:
    if (vars)
        free(vars);
    if (value_labels)
        free(value_labels);
    
    return retval;
}