static readstat_error_t sas7bcat_parse_block(const char *data, size_t data_size, sas7bcat_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; size_t pad = (data[2] & 0x08) ? 4 : 0; // might be 0x10, not sure int label_count_capacity = sas_read4(&data[38+pad], ctx->bswap); int label_count_used = sas_read4(&data[42+pad], ctx->bswap); char name[4*32+1]; if ((retval = readstat_convert(name, sizeof(name), &data[8], 8, ctx->converter)) != READSTAT_OK) goto cleanup; if (pad) { pad += 16; } if ((data[2] & 0x80)) { // has long name retval = readstat_convert(name, sizeof(name), &data[106+pad], 32, ctx->converter); if (retval != READSTAT_OK) goto cleanup; pad += 32; } if ((retval = sas7bcat_parse_value_labels(&data[106+pad], data_size - 106 - pad, label_count_used, label_count_capacity, name, ctx)) != READSTAT_OK) goto cleanup; cleanup: return retval; }
static readstat_error_t xport_read_labels_v8(xport_ctx_t *ctx, int label_count) { readstat_error_t retval = READSTAT_OK; uint16_t labeldef[3]; int i; for (i=0; i<label_count; i++) { int index, name_len, label_len; if (read_bytes(ctx, labeldef, sizeof(labeldef)) != sizeof(labeldef)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (machine_is_little_endian()) { index = byteswap2(labeldef[0]); name_len = byteswap2(labeldef[1]); label_len = byteswap2(labeldef[2]); } else { index = labeldef[0]; name_len = labeldef[1]; label_len = labeldef[2]; } if (index >= ctx->var_count) { retval = READSTAT_ERROR_PARSE; goto cleanup; } char name[name_len]; char label[label_len]; readstat_variable_t *variable = ctx->variables[index]; if (read_bytes(ctx, name, name_len) != name_len || read_bytes(ctx, label, label_len) != label_len) { retval = READSTAT_ERROR_READ; goto cleanup; } retval = readstat_convert(variable->name, sizeof(variable->name), name, name_len, NULL); if (retval != READSTAT_OK) goto cleanup; retval = readstat_convert(variable->label, sizeof(variable->label), label, label_len, NULL); if (retval != READSTAT_OK) goto cleanup; } retval = xport_skip_rest_of_record(ctx); if (retval != READSTAT_OK) goto cleanup; retval = xport_read_obs_header_record(ctx); if (retval != READSTAT_OK) goto cleanup; cleanup: return retval; }
static readstat_error_t sav_submit_value_labels(value_label_t *value_labels, int32_t label_count, readstat_type_t value_type, sav_ctx_t *ctx) { char label_name_buf[256]; readstat_error_t retval = READSTAT_OK; int32_t i; snprintf(label_name_buf, sizeof(label_name_buf), SAV_LABEL_NAME_PREFIX "%d", ctx->value_labels_count); for (i=0; i<label_count; i++) { value_label_t *vlabel = &value_labels[i]; readstat_value_t value = { .type = value_type }; if (value_type == READSTAT_TYPE_DOUBLE) { double val_d = 0.0; memcpy(&val_d, vlabel->value, 8); if (ctx->bswap) val_d = byteswap_double(val_d); value.v.double_value = val_d; sav_tag_missing_double(&value, ctx); } else { char unpadded_val[8*4+1]; retval = readstat_convert(unpadded_val, sizeof(unpadded_val), vlabel->value, 8, ctx->converter); if (retval != READSTAT_OK) break; value.v.string_value = unpadded_val; } ctx->value_label_handler(label_name_buf, value, vlabel->label, ctx->user_ctx); } return retval; }
static readstat_variable_t *dta_init_variable(dta_ctx_t *ctx, int i, readstat_type_t type, size_t max_len) { readstat_variable_t *variable = calloc(1, sizeof(readstat_variable_t)); variable->type = type; variable->index = i; variable->storage_width = max_len; readstat_convert(variable->name, sizeof(variable->name), &ctx->varlist[ctx->variable_name_len*i], ctx->variable_name_len, ctx->converter); if (ctx->variable_labels[ctx->variable_labels_entry_len*i]) { readstat_convert(variable->label, sizeof(variable->label), &ctx->variable_labels[ctx->variable_labels_entry_len*i], ctx->variable_labels_entry_len, ctx->converter); } if (ctx->fmtlist[ctx->fmtlist_entry_len*i]) { readstat_convert(variable->format, sizeof(variable->format), &ctx->fmtlist[ctx->fmtlist_entry_len*i], ctx->fmtlist_entry_len, ctx->converter); if (variable->format[0] == '%') { if (variable->format[1] == '-') { variable->alignment = READSTAT_ALIGNMENT_LEFT; } else if (variable->format[1] == '~') { variable->alignment = READSTAT_ALIGNMENT_CENTER; } else { variable->alignment = READSTAT_ALIGNMENT_RIGHT; } } int display_width; if (sscanf(variable->format, "%%%ds", &display_width) == 1 || sscanf(variable->format, "%%-%ds", &display_width) == 1) { variable->display_width = display_width; } } return variable; }
static readstat_error_t sas7bdat_handle_data_value(readstat_variable_t *variable, col_info_t *col_info, const char *col_data, sas7bdat_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; char error_buf[ERROR_BUF_SIZE]; int cb_retval = 0; readstat_value_t value; memset(&value, 0, sizeof(readstat_value_t)); value.type = col_info->type; if (col_info->type == READSTAT_TYPE_STRING) { retval = readstat_convert(ctx->scratch_buffer, ctx->scratch_buffer_len, col_data, col_info->width, ctx->converter); if (retval != READSTAT_OK) { if (ctx->error_handler) { snprintf(error_buf, sizeof(error_buf), "ReadStat: Error converting string to specified encoding: %.*s\n", col_info->width, col_data); ctx->error_handler(error_buf, ctx->user_ctx); } goto cleanup; } value.v.string_value = ctx->scratch_buffer; } else if (col_info->type == READSTAT_TYPE_DOUBLE) { uint64_t val = 0; double dval = NAN; if (ctx->little_endian) { int k; for (k=0; k<col_info->width; k++) { val = (val << 8) | (unsigned char)col_data[col_info->width-1-k]; } } else { int k; for (k=0; k<col_info->width; k++) { val = (val << 8) | (unsigned char)col_data[k]; } } val <<= (8-col_info->width)*8; memcpy(&dval, &val, 8); if (isnan(dval)) { value.v.double_value = NAN; value.tag = ~((val >> 40) & 0xFF); if (value.tag) { value.is_tagged_missing = 1; } else { value.is_system_missing = 1; } } else {
static readstat_variable_t *dta_init_variable(dta_ctx_t *ctx, int i, readstat_types_t type) { readstat_variable_t *variable = calloc(1, sizeof(readstat_variable_t)); variable->type = type; variable->index = i; readstat_convert(variable->name, sizeof(variable->name), &ctx->varlist[ctx->variable_name_len*i], ctx->variable_name_len, ctx->converter); if (ctx->variable_labels[ctx->variable_labels_entry_len*i]) { readstat_convert(variable->label, sizeof(variable->label), &ctx->variable_labels[ctx->variable_labels_entry_len*i], ctx->variable_labels_entry_len, ctx->converter); } if (ctx->fmtlist[ctx->fmtlist_entry_len*i]) { readstat_convert(variable->format, sizeof(variable->format), &ctx->fmtlist[ctx->fmtlist_entry_len*i], ctx->fmtlist_entry_len, ctx->converter); } return variable; }
static readstat_error_t xport_construct_format(char *dst, size_t dst_len, const char *src, size_t src_len, int width, int decimals) { char format[4*src_len+1]; readstat_error_t retval = readstat_convert(format, sizeof(format), src, src_len, NULL); if (decimals) { snprintf(dst, dst_len, "%s%d.%d", format, width, decimals); } else if (width) { snprintf(dst, dst_len, "%s%d", format, width); } else { strcpy(dst, format); } return retval; }
static readstat_error_t handle_data_value(const char *col_data, col_info_t *col_info, sas_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; int cb_retval = 0; readstat_value_t value; memset(&value, 0, sizeof(readstat_value_t)); value.type = col_info->type; if (col_info->type == READSTAT_TYPE_STRING) { retval = readstat_convert(ctx->scratch_buffer, ctx->scratch_buffer_len, col_data, col_info->width, ctx->converter); if (retval != READSTAT_OK) goto cleanup; value.v.string_value = ctx->scratch_buffer; } else if (col_info->type == READSTAT_TYPE_DOUBLE) { uint64_t val = 0; double dval = NAN; if (ctx->little_endian) { int k; for (k=0; k<col_info->width; k++) { val = (val << 8) | (unsigned char)col_data[col_info->width-1-k]; } } else { int k; for (k=0; k<col_info->width; k++) { val = (val << 8) | (unsigned char)col_data[k]; } } val <<= (8-col_info->width)*8; memcpy(&dval, &val, 8); if (isnan(dval)) { value.v.double_value = NAN; value.tag = ~((val >> 40) & 0xFF); if (value.tag) { value.is_tagged_missing = 1; } else { value.is_system_missing = 1; } } else {
static readstat_error_t copy_text_ref(char *out_buffer, size_t out_buffer_len, sas_text_ref_t text_ref, sas_ctx_t *ctx) { if (text_ref.index < 0 || text_ref.index >= ctx->text_blob_count) return READSTAT_ERROR_PARSE; if (text_ref.length == 0) { out_buffer[0] = '\0'; return READSTAT_OK; } char *blob = ctx->text_blobs[text_ref.index]; if (text_ref.offset < 0 || text_ref.length < 0) return READSTAT_ERROR_PARSE; if (text_ref.offset + text_ref.length > ctx->text_blob_lengths[text_ref.index]) return READSTAT_ERROR_PARSE; return readstat_convert(out_buffer, out_buffer_len, &blob[text_ref.offset], text_ref.length, ctx->converter); }
static readstat_error_t xport_read_file_label_record(xport_ctx_t *ctx) { char line[LINE_LEN+1]; char label[40*4+1]; readstat_error_t retval = READSTAT_OK; retval = xport_read_record(ctx, line); if (retval != READSTAT_OK) goto cleanup; retval = readstat_convert(label, sizeof(label), &line[32], 40, NULL); if (retval != READSTAT_OK) goto cleanup; if (ctx->metadata_handler) { if (ctx->metadata_handler(label, ctx->timestamp, ctx->version, ctx->user_ctx) != READSTAT_HANDLER_OK) { retval = READSTAT_ERROR_USER_ABORT; goto cleanup; } } cleanup: return retval; }
static readstat_error_t sav_read_document_record(sav_ctx_t *ctx) { if (!ctx->note_handler) return sav_skip_document_record(ctx); int32_t n_lines; readstat_error_t retval = READSTAT_OK; readstat_io_t *io = ctx->io; if (io->read(&n_lines, sizeof(int32_t), io->io_ctx) < sizeof(int32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (ctx->bswap) n_lines = byteswap4(n_lines); char raw_buffer[SPSS_DOC_LINE_SIZE]; char utf8_buffer[4*SPSS_DOC_LINE_SIZE+1]; int i; for (i=0; i<n_lines; i++) { if (io->read(raw_buffer, SPSS_DOC_LINE_SIZE, io->io_ctx) < SPSS_DOC_LINE_SIZE) { retval = READSTAT_ERROR_READ; goto cleanup; } retval = readstat_convert(utf8_buffer, sizeof(utf8_buffer), raw_buffer, sizeof(raw_buffer), ctx->converter); if (retval != READSTAT_OK) goto cleanup; if (ctx->note_handler(i, utf8_buffer, ctx->user_ctx)) { retval = READSTAT_ERROR_USER_ABORT; goto cleanup; } } cleanup: return retval; }
static readstat_error_t sav_process_row(unsigned char *buffer, size_t buffer_len, sav_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; double fp_value; int offset = 0; readstat_off_t data_offset = 0; size_t raw_str_used = 0; int segment_offset = 0; int var_index = 0, col = 0; while (data_offset < buffer_len && col < ctx->var_index) { spss_varinfo_t *col_info = &ctx->varinfo[col]; spss_varinfo_t *var_info = &ctx->varinfo[var_index]; readstat_value_t value = { .type = var_info->type }; if (offset > 31) { retval = READSTAT_ERROR_PARSE; goto done; } if (var_info->type == READSTAT_TYPE_STRING) { if (raw_str_used + 8 <= ctx->raw_string_len) { memcpy(ctx->raw_string + raw_str_used, &buffer[data_offset], 8); raw_str_used += 8; } if (++offset == col_info->width) { if (++segment_offset < var_info->n_segments) { raw_str_used--; } offset = 0; col++; } if (segment_offset == var_info->n_segments) { retval = readstat_convert(ctx->utf8_string, ctx->utf8_string_len, ctx->raw_string, raw_str_used, ctx->converter); if (retval != READSTAT_OK) goto done; value.v.string_value = ctx->utf8_string; if (ctx->value_handler(ctx->current_row, ctx->variables[var_info->index], value, ctx->user_ctx)) { retval = READSTAT_ERROR_USER_ABORT; goto done; } raw_str_used = 0; segment_offset = 0; var_index += var_info->n_segments; } } else if (var_info->type == READSTAT_TYPE_DOUBLE) { memcpy(&fp_value, &buffer[data_offset], 8); if (ctx->bswap) { fp_value = byteswap_double(fp_value); } value.v.double_value = fp_value; sav_tag_missing_double(&value, ctx); if (ctx->value_handler(ctx->current_row, ctx->variables[var_info->index], value, ctx->user_ctx)) { retval = READSTAT_ERROR_USER_ABORT; goto done; } var_index += var_info->n_segments; col++; } data_offset += 8; } ctx->current_row++; done: return retval; } static readstat_error_t sav_read_uncompressed_data(sav_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; readstat_io_t *io = ctx->io; unsigned char *buffer = NULL; size_t bytes_read = 0; size_t buffer_len = ctx->var_offset * 8; buffer = malloc(buffer_len); while (ctx->current_row < ctx->row_limit) { retval = sav_update_progress(ctx); if (retval != READSTAT_OK) goto done; if ((bytes_read = io->read(buffer, buffer_len, io->io_ctx)) != buffer_len) goto done; retval = sav_process_row(buffer, buffer_len, ctx); if (retval != READSTAT_OK) goto done; } done: if (buffer) free(buffer); return retval; }
readstat_error_t readstat_parse_dta(readstat_parser_t *parser, const char *filename, void *user_ctx) { readstat_error_t retval = READSTAT_OK; int i; size_t record_len = 0; int fd = -1; char *buf = NULL; dta_header_t header; dta_ctx_t *ctx = NULL; char str_buf[2048]; char *long_string = NULL; size_t file_size = 0; if ((fd = readstat_open(filename)) == -1) { retval = READSTAT_ERROR_OPEN; goto cleanup; } char magic[4]; if (read(fd, magic, 4) != 4) { retval = READSTAT_ERROR_READ; goto cleanup; } file_size = readstat_lseek(fd, 0, SEEK_END); if (file_size == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } if (readstat_lseek(fd, 0, SEEK_SET) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } if (strncmp(magic, "<sta", 4) == 0) { retval = dta_read_xmlish_preamble(fd, ctx, &header); } else { if (read(fd, &header, sizeof(header)) != sizeof(header)) { retval = READSTAT_ERROR_READ; goto cleanup; } } if ((ctx = dta_ctx_init(header.nvar, header.nobs, header.byteorder, header.ds_format)) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } ctx->user_ctx = user_ctx; ctx->file_size = file_size; ctx->progress_handler = parser->progress_handler; retval = dta_update_progress(fd, ctx); if (retval != READSTAT_OK) goto cleanup; if (parser->info_handler) { if (parser->info_handler(ctx->nobs, ctx->nvar, user_ctx)) { retval = READSTAT_ERROR_USER_ABORT; goto cleanup; } } if (ctx->file_is_xmlish) { uint16_t label_len = 0; unsigned char timestamp_len; if ((retval = dta_read_tag(fd, ctx, "<label>")) != READSTAT_OK) { goto cleanup; } if (ctx->data_label_len_len == 2) { if (read(fd, &label_len, sizeof(uint16_t)) != sizeof(uint16_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } label_len = ctx->machine_needs_byte_swap ? byteswap2(label_len) : label_len; } else if (ctx->data_label_len_len == 1) { unsigned char label_len_char; if (read(fd, &label_len_char, sizeof(unsigned char)) != sizeof(unsigned char)) { retval = READSTAT_ERROR_READ; goto cleanup; } label_len = label_len_char; } if (readstat_lseek(fd, label_len, SEEK_CUR) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } if ((retval = dta_read_tag(fd, ctx, "</label>")) != READSTAT_OK) { goto cleanup; } if ((retval = dta_read_tag(fd, ctx, "<timestamp>")) != READSTAT_OK) { goto cleanup; } if (read(fd, ×tamp_len, 1) != 1) { retval = READSTAT_ERROR_READ; goto cleanup; } if (readstat_lseek(fd, timestamp_len, SEEK_CUR) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } if ((retval = dta_read_tag(fd, ctx, "</timestamp>")) != READSTAT_OK) { goto cleanup; } } else { if (readstat_lseek(fd, ctx->data_label_len, SEEK_CUR) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } if (ctx->time_stamp_len) { if (readstat_lseek(fd, ctx->time_stamp_len, SEEK_CUR) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } } } if ((retval = dta_read_tag(fd, ctx, "</header>")) != READSTAT_OK) { goto cleanup; } if (dta_read_map(fd, ctx) != READSTAT_OK) { retval = READSTAT_ERROR_READ; goto cleanup; } if (dta_read_descriptors(fd, ctx) != READSTAT_OK) { retval = READSTAT_ERROR_READ; goto cleanup; } for (i=0; i<ctx->nvar; i++) { size_t max_len; readstat_types_t type = dta_type_info(ctx->typlist[i], &max_len, ctx); record_len += max_len; if (type == READSTAT_TYPE_STRING) max_len++; /* might append NULL */ if (parser->variable_handler) { readstat_variable_t *variable = dta_init_variable(ctx, i, type); const char *value_labels = NULL; if (ctx->lbllist[ctx->lbllist_entry_len*i]) value_labels = &ctx->lbllist[ctx->lbllist_entry_len*i]; int cb_retval = parser->variable_handler(i, variable, value_labels, user_ctx); free(variable); if (cb_retval) { retval = READSTAT_ERROR_USER_ABORT; goto cleanup; } } } if ((retval = dta_skip_expansion_fields(fd, ctx)) != READSTAT_OK) { goto cleanup; } if (record_len == 0) { retval = READSTAT_ERROR_PARSE; goto cleanup; } if ((retval = dta_read_tag(fd, ctx, "<data>")) != READSTAT_OK) { goto cleanup; } if ((retval = dta_update_progress(fd, ctx)) != READSTAT_OK) { goto cleanup; } if ((buf = malloc(record_len)) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } for (i=0; i<ctx->nobs; i++) { if (read(fd, buf, record_len) != record_len) { retval = READSTAT_ERROR_READ; goto cleanup; } int j; off_t offset = 0; for (j=0; j<ctx->nvar; j++) { size_t max_len; readstat_value_t value; memset(&value, 0, sizeof(readstat_value_t)); value.type = dta_type_info(ctx->typlist[j], &max_len, ctx); if (value.type == READSTAT_TYPE_STRING) { readstat_convert(str_buf, sizeof(str_buf), &buf[offset], max_len, ctx->converter); value.v.string_value = str_buf; } else if (value.type == READSTAT_TYPE_LONG_STRING) { uint32_t v, o; v = *((uint32_t *)&buf[offset]); o = *((uint32_t *)&buf[offset+4]); if (ctx->machine_needs_byte_swap) { v = byteswap4(v); o = byteswap4(o); } if (v > 0 && o > 0) { off_t cur_pos = readstat_lseek(fd, 0, SEEK_CUR); if (cur_pos == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } retval = dta_read_long_string(fd, ctx, v, o, &long_string); if (retval != READSTAT_OK) { goto cleanup; } value.v.string_value = long_string; if (readstat_lseek(fd, cur_pos, SEEK_SET) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } } } else if (value.type == READSTAT_TYPE_CHAR) { char byte = buf[offset]; if (ctx->machine_is_twos_complement) { byte = ones_to_twos_complement1(byte); } if (byte > DTA_MAX_CHAR) { value.is_system_missing = 1; if (byte > DTA_MISSING_CHAR) { value.tag = 'a' + (byte - DTA_MISSING_CHAR_A); } } value.v.char_value = byte; } else if (value.type == READSTAT_TYPE_INT16) { int16_t num = *((int16_t *)&buf[offset]); if (ctx->machine_needs_byte_swap) { num = byteswap2(num); } if (ctx->machine_is_twos_complement) { num = ones_to_twos_complement2(num); } if (num > DTA_MAX_INT16) { value.is_system_missing = 1; if (num > DTA_MISSING_INT16) { value.tag = 'a' + (num - DTA_MISSING_INT16_A); } } value.v.i16_value = num; } else if (value.type == READSTAT_TYPE_INT32) { int32_t num = *((int32_t *)&buf[offset]); if (ctx->machine_needs_byte_swap) { num = byteswap4(num); } if (ctx->machine_is_twos_complement) { num = ones_to_twos_complement4(num); } if (num > DTA_MAX_INT32) { value.is_system_missing = 1; if (num > DTA_MISSING_INT32) { value.tag = 'a' + (num - DTA_MISSING_INT32_A); } } value.v.i32_value = num; } else if (value.type == READSTAT_TYPE_FLOAT) { uint32_t num = *((uint32_t *)&buf[offset]); float f_num = NAN; if (ctx->machine_needs_byte_swap) { num = byteswap4(num); } if (num > DTA_MAX_FLOAT) { value.is_system_missing = 1; if (num > DTA_MISSING_FLOAT) { value.tag = 'a' + ((num - DTA_MISSING_FLOAT_A) >> 11); } } else {
static readstat_error_t sas7bcat_parse_value_labels(const char *value_start, size_t value_labels_len, int label_count_used, int label_count_capacity, const char *name, sas7bcat_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; int i; const char *lbp1 = value_start; uint32_t *value_offset = calloc(label_count_used, sizeof(uint32_t)); /* Doubles appear to be stored as big-endian, always */ int bswap_doubles = machine_is_little_endian(); int is_string = (name[0] == '$'); /* Pass 1 -- find out the offset of the labels */ for (i=0; i<label_count_capacity; i++) { if (&lbp1[2] - value_start > value_labels_len) { retval = READSTAT_ERROR_PARSE; goto cleanup; } if (i<label_count_used) { uint32_t label_pos = sas_read4(&lbp1[10+ctx->pad1], ctx->bswap); if (label_pos >= label_count_used) { retval = READSTAT_ERROR_PARSE; goto cleanup; } value_offset[label_pos] = lbp1 - value_start; } lbp1 += 6 + lbp1[2]; } const char *lbp2 = lbp1; /* Pass 2 -- parse pairs of values & labels */ for (i=0; i<label_count_used && i<label_count_capacity; i++) { lbp1 = value_start + value_offset[i]; if (&lbp1[30] - value_start > value_labels_len || &lbp2[10] - value_start > value_labels_len) { retval = READSTAT_ERROR_PARSE; goto cleanup; } size_t label_len = sas_read2(&lbp2[8], ctx->bswap); size_t value_entry_len = 6 + lbp1[2]; const char *label = &lbp2[10]; readstat_value_t value = { .type = is_string ? READSTAT_TYPE_STRING : READSTAT_TYPE_DOUBLE }; if (is_string) { char val[4*16+1]; retval = readstat_convert(val, sizeof(val), &lbp1[value_entry_len-16], 16, ctx->converter); if (retval != READSTAT_OK) goto cleanup; value.v.string_value = val; } else { uint64_t val = sas_read8(&lbp1[22], bswap_doubles); double dval = NAN; if ((val | 0xFF0000000000) == 0xFFFFFFFFFFFF) { value.tag = (val >> 40); if (value.tag) { value.is_tagged_missing = 1; } else { value.is_system_missing = 1; } } else { memcpy(&dval, &val, 8); dval *= -1.0; } value.v.double_value = dval; } if (ctx->value_label_handler) { if (ctx->value_label_handler(name, value, label, ctx->user_ctx) != READSTAT_HANDLER_OK) { retval = READSTAT_ERROR_USER_ABORT; goto cleanup; } } lbp2 += 8 + 2 + label_len + 1; }
readstat_error_t readstat_parse_sas7bcat(readstat_parser_t *parser, const char *path, void *user_ctx) { readstat_error_t retval = READSTAT_OK; readstat_io_t *io = parser->io; int64_t i; char *page = NULL; char *buffer = NULL; sas7bcat_ctx_t *ctx = calloc(1, sizeof(sas7bcat_ctx_t)); sas_header_info_t *hinfo = calloc(1, sizeof(sas_header_info_t)); ctx->block_pointers = malloc((ctx->block_pointers_capacity = 200) * sizeof(uint64_t)); ctx->value_label_handler = parser->value_label_handler; ctx->metadata_handler = parser->metadata_handler; ctx->input_encoding = parser->input_encoding; ctx->output_encoding = parser->output_encoding; ctx->user_ctx = user_ctx; ctx->io = io; if (io->open(path, io->io_ctx) == -1) { retval = READSTAT_ERROR_OPEN; goto cleanup; } if ((retval = sas_read_header(io, hinfo, parser->error_handler, user_ctx)) != READSTAT_OK) { goto cleanup; } ctx->u64 = hinfo->u64; ctx->pad1 = hinfo->pad1; ctx->bswap = machine_is_little_endian() ^ hinfo->little_endian; ctx->header_size = hinfo->header_size; ctx->page_count = hinfo->page_count; ctx->page_size = hinfo->page_size; if (ctx->input_encoding == NULL) { ctx->input_encoding = hinfo->encoding; } if (ctx->u64) { retval = READSTAT_ERROR_PARSE; goto cleanup; } if (ctx->input_encoding && ctx->output_encoding && strcmp(ctx->input_encoding, ctx->output_encoding) != 0) { iconv_t converter = iconv_open(ctx->output_encoding, ctx->input_encoding); if (converter == (iconv_t)-1) { retval = READSTAT_ERROR_UNSUPPORTED_CHARSET; goto cleanup; } ctx->converter = converter; } if (parser->metadata_handler) { char file_label[4*64+1]; retval = readstat_convert(file_label, sizeof(file_label), hinfo->file_label, sizeof(hinfo->file_label), ctx->converter); if (retval != READSTAT_OK) goto cleanup; if (ctx->metadata_handler(file_label, hinfo->modification_time, 10000 * hinfo->major_version + hinfo->minor_version, ctx->user_ctx) != READSTAT_HANDLER_OK) { retval = READSTAT_ERROR_USER_ABORT; goto cleanup; } } if ((page = malloc(ctx->page_size)) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } if (io->seek(ctx->header_size+SAS_CATALOG_FIRST_INDEX_PAGE*ctx->page_size, READSTAT_SEEK_SET, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } if (io->read(page, ctx->page_size, io->io_ctx) < ctx->page_size) { retval = READSTAT_ERROR_READ; goto cleanup; } sas7bcat_augment_index(&page[856+2*ctx->pad1], ctx->page_size - 856 - 2*ctx->pad1, ctx); // Pass 1 -- find the XLSR entries for (i=SAS_CATALOG_USELESS_PAGES; i<ctx->page_count; i++) { if (io->seek(ctx->header_size+i*ctx->page_size, READSTAT_SEEK_SET, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } if (io->read(page, ctx->page_size, io->io_ctx) < ctx->page_size) { retval = READSTAT_ERROR_READ; goto cleanup; } if (memcmp(&page[16], "XLSR", sizeof("XLSR")-1) == 0) { sas7bcat_augment_index(&page[16], ctx->page_size - 16, ctx); } } sas7bcat_sort_index(ctx); sas7bcat_uniq_index(ctx); // Pass 2 -- look up the individual block pointers for (i=0; i<ctx->block_pointers_used; i++) { int start_page = ctx->block_pointers[i] >> 32; int start_page_pos = (ctx->block_pointers[i]) & 0xFFFF; int buffer_len = sas7bcat_block_size(start_page, start_page_pos, ctx, &retval); if (buffer_len == -1) { goto cleanup; } else if (buffer_len == 0) { continue; } buffer = realloc(buffer, buffer_len); if ((retval = sas7bcat_read_block(buffer, buffer_len, start_page, start_page_pos, ctx)) != READSTAT_OK) goto cleanup; if ((retval = sas7bcat_parse_block(buffer, buffer_len, ctx)) != READSTAT_OK) goto cleanup; } cleanup: io->close(io->io_ctx); if (page) free(page); if (buffer) free(buffer); if (ctx) sas7bcat_ctx_free(ctx); if (hinfo) free(hinfo); return retval; }
static readstat_error_t xport_read_variables(xport_ctx_t *ctx) { int i; readstat_error_t retval = READSTAT_OK; for (i=0; i<ctx->var_count; i++) { xport_namestr_t namestr; ssize_t bytes_read = read_bytes(ctx, &namestr, sizeof(xport_namestr_t)); if (bytes_read < sizeof(xport_namestr_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } xport_namestr_bswap(&namestr); readstat_variable_t *variable = calloc(1, sizeof(readstat_variable_t)); variable->index = i; variable->type = namestr.ntype == SAS_COLUMN_TYPE_CHR ? READSTAT_TYPE_STRING : READSTAT_TYPE_DOUBLE; variable->storage_width = namestr.nlng; variable->display_width = namestr.nfl; variable->decimals = namestr.nfd; variable->alignment = namestr.nfj ? READSTAT_ALIGNMENT_RIGHT : READSTAT_ALIGNMENT_LEFT; readstat_convert(variable->name, sizeof(variable->name), namestr.nname, sizeof(namestr.nname), NULL); if (retval != READSTAT_OK) goto cleanup; readstat_convert(variable->label, sizeof(variable->label), namestr.nlabel, sizeof(namestr.nlabel), NULL); if (retval != READSTAT_OK) goto cleanup; xport_construct_format(variable->format, sizeof(variable->format), namestr.nform, sizeof(namestr.nform), variable->display_width, variable->decimals); if (retval != READSTAT_OK) goto cleanup; ctx->variables[i] = variable; } retval = xport_skip_rest_of_record(ctx); if (retval != READSTAT_OK) goto cleanup; if (ctx->version == 5) { retval = xport_read_obs_header_record(ctx); if (retval != READSTAT_OK) goto cleanup; } else { xport_header_record_t xrecord; retval = xport_read_header_record(ctx, &xrecord); if (retval != READSTAT_OK) goto cleanup; if (strcmp(xrecord.name, "OBSV8") == 0) { /* void */ } else if (strcmp(xrecord.name, "LABELV8") == 0) { retval = xport_read_labels_v8(ctx, xrecord.num1); } else if (strcmp(xrecord.name, "LABELV9") == 0) { retval = xport_read_labels_v9(ctx, xrecord.num1); } if (retval != READSTAT_OK) goto cleanup; } ctx->row_length = 0; int index_after_skipping = 0; for (i=0; i<ctx->var_count; i++) { readstat_variable_t *variable = ctx->variables[i]; variable->index_after_skipping = index_after_skipping; int cb_retval = READSTAT_HANDLER_OK; if (ctx->variable_handler) { cb_retval = ctx->variable_handler(i, variable, variable->format, ctx->user_ctx); } if (cb_retval == READSTAT_HANDLER_ABORT) { retval = READSTAT_ERROR_USER_ABORT; goto cleanup; } if (cb_retval == READSTAT_HANDLER_SKIP_VARIABLE) { variable->skip = 1; } else { index_after_skipping++; } ctx->row_length += variable->storage_width; } cleanup: return retval; }
static readstat_error_t xport_process_row(xport_ctx_t *ctx, const char *row, size_t row_length) { readstat_error_t retval = READSTAT_OK; int i; off_t pos = 0; char *string = NULL; for (i=0; i<ctx->var_count; i++) { readstat_variable_t *variable = ctx->variables[i]; readstat_value_t value = { .type = variable->type }; if (variable->type == READSTAT_TYPE_STRING) { string = realloc(string, 4*variable->storage_width+1); retval = readstat_convert(string, 4*variable->storage_width+1, &row[pos], variable->storage_width, NULL); if (retval != READSTAT_OK) goto cleanup; value.v.string_value = string; } else { double dval = NAN; if (variable->storage_width <= XPORT_MAX_DOUBLE_SIZE && variable->storage_width >= XPORT_MIN_DOUBLE_SIZE) { char full_value[8] = { 0 }; if (memcmp(&full_value[1], &row[pos+1], variable->storage_width - 1) == 0 && (row[pos] == '_' || row[pos] == '.' || (row[pos] >= 'A' && row[pos] <= 'Z'))) { if (row[pos] == '.') { value.is_system_missing = 1; } else { value.tag = row[pos]; value.is_tagged_missing = 1; } } else { memcpy(full_value, &row[pos], variable->storage_width); int rc = cnxptiee(full_value, CN_TYPE_XPORT, &dval, CN_TYPE_NATIVE); if (rc != 0) { retval = READSTAT_ERROR_CONVERT; goto cleanup; } } } value.v.double_value = dval; } pos += variable->storage_width; if (ctx->value_handler(ctx->parsed_row_count, variable, value, ctx->user_ctx) != READSTAT_HANDLER_OK) { retval = READSTAT_ERROR_USER_ABORT; goto cleanup; } } cleanup: free(string); return retval; } static readstat_error_t xport_read_data(xport_ctx_t *ctx) { if (!ctx->row_length) return READSTAT_OK; if (!ctx->value_handler) return READSTAT_OK; readstat_error_t retval = READSTAT_OK; char *row = malloc(ctx->row_length); char *blank_row = malloc(ctx->row_length); memset(blank_row, ' ', ctx->row_length); int num_blank_rows = 0; while (1) { ssize_t bytes_read = read_bytes(ctx, row, ctx->row_length); if (bytes_read == -1) { retval = READSTAT_ERROR_READ; goto cleanup; } else if (bytes_read < ctx->row_length) { break; } off_t pos = 0; int row_is_blank = 1; for (pos=0; pos<ctx->row_length; pos++) { if (row[pos] != ' ') { row_is_blank = 0; break; } } if (row_is_blank) { num_blank_rows++; continue; } while (num_blank_rows) { retval = xport_process_row(ctx, blank_row, ctx->row_length); if (retval != READSTAT_OK) goto cleanup; if (++(ctx->parsed_row_count) == ctx->row_limit) goto cleanup; num_blank_rows--; } retval = xport_process_row(ctx, row, ctx->row_length); if (retval != READSTAT_OK) goto cleanup; retval = xport_update_progress(ctx); if (retval != READSTAT_OK) goto cleanup; if (++(ctx->parsed_row_count) == ctx->row_limit) break; } cleanup: free(row); return retval; } readstat_error_t readstat_parse_xport(readstat_parser_t *parser, const char *path, void *user_ctx) { readstat_error_t retval = READSTAT_OK; readstat_io_t *io = parser->io; xport_ctx_t *ctx = xport_ctx_init(); ctx->info_handler = parser->info_handler; ctx->metadata_handler = parser->metadata_handler; ctx->note_handler = parser->note_handler; ctx->variable_handler = parser->variable_handler; ctx->value_handler = parser->value_handler; ctx->value_label_handler = parser->value_label_handler; ctx->error_handler = parser->error_handler; ctx->progress_handler = parser->progress_handler; ctx->user_ctx = user_ctx; ctx->io = io; ctx->row_limit = parser->row_limit; if (io->open(path, io->io_ctx) == -1) { retval = READSTAT_ERROR_OPEN; goto cleanup; } if ((ctx->file_size = io->seek(0, READSTAT_SEEK_END, io->io_ctx)) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } if (io->seek(0, READSTAT_SEEK_SET, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } retval = xport_read_library_record(ctx); if (retval != READSTAT_OK) goto cleanup; retval = xport_skip_record(ctx); if (retval != READSTAT_OK) goto cleanup; retval = xport_read_timestamp_record(ctx); if (retval != READSTAT_OK) goto cleanup; retval = xport_expect_header_record(ctx, "MEMBER", "MEMBV8"); if (retval != READSTAT_OK) goto cleanup; retval = xport_expect_header_record(ctx, "DSCRPTR", "DSCPTV8"); if (retval != READSTAT_OK) goto cleanup; retval = xport_skip_record(ctx); if (retval != READSTAT_OK) goto cleanup; retval = xport_read_file_label_record(ctx); if (retval != READSTAT_OK) goto cleanup; retval = xport_read_namestr_header_record(ctx); if (retval != READSTAT_OK) goto cleanup; retval = xport_read_variables(ctx); if (retval != READSTAT_OK) goto cleanup; if (ctx->row_length) { retval = xport_read_data(ctx); if (retval != READSTAT_OK) goto cleanup; } cleanup: io->close(io->io_ctx); xport_ctx_free(ctx); return retval; }
readstat_error_t sav_parse_timestamp(sav_ctx_t *ctx, sav_file_header_record_t *header) { readstat_error_t retval = READSTAT_OK; struct tm timestamp = { .tm_isdst = -1 }; if ((retval = sav_parse_time(header->creation_time, sizeof(header->creation_time), ×tamp, ctx)) != READSTAT_OK) goto cleanup; if ((retval = sav_parse_date(header->creation_date, sizeof(header->creation_date), ×tamp, ctx)) != READSTAT_OK) goto cleanup; ctx->timestamp = mktime(×tamp); cleanup: return retval; } readstat_error_t readstat_parse_sav(readstat_parser_t *parser, const char *path, void *user_ctx) { readstat_error_t retval = READSTAT_OK; readstat_io_t *io = parser->io; sav_file_header_record_t header; sav_ctx_t *ctx = NULL; size_t file_size = 0; if (io->open(path, io->io_ctx) == -1) { return READSTAT_ERROR_OPEN; } file_size = io->seek(0, READSTAT_SEEK_END, io->io_ctx); if (file_size == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } if (io->seek(0, READSTAT_SEEK_SET, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } if (io->read(&header, sizeof(sav_file_header_record_t), io->io_ctx) < sizeof(sav_file_header_record_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } ctx = sav_ctx_init(&header, io); if (ctx == NULL) { retval = READSTAT_ERROR_PARSE; goto cleanup; } ctx->progress_handler = parser->progress_handler; ctx->error_handler = parser->error_handler; ctx->note_handler = parser->note_handler; ctx->value_handler = parser->value_handler; ctx->value_label_handler = parser->value_label_handler; ctx->input_encoding = parser->input_encoding; ctx->output_encoding = parser->output_encoding; ctx->user_ctx = user_ctx; ctx->file_size = file_size; if (ctx->record_count == -1 || (parser->row_limit > 0 && parser->row_limit < ctx->record_count)) { ctx->row_limit = parser->row_limit; } else { ctx->row_limit = ctx->record_count; } if ((retval = sav_parse_timestamp(ctx, &header)) != READSTAT_OK) goto cleanup; if ((retval = sav_parse_records_pass1(ctx)) != READSTAT_OK) goto cleanup; if (io->seek(sizeof(sav_file_header_record_t), READSTAT_SEEK_SET, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } if ((retval = sav_update_progress(ctx)) != READSTAT_OK) goto cleanup; if ((retval = sav_parse_records_pass2(ctx)) != READSTAT_OK) goto cleanup; sav_set_n_segments_and_var_count(ctx); if (parser->info_handler) { if (parser->info_handler(ctx->record_count == -1 ? -1 : ctx->row_limit, ctx->var_count, ctx->user_ctx)) { retval = READSTAT_ERROR_USER_ABORT; goto cleanup; } } if (parser->metadata_handler) { if ((retval = readstat_convert(ctx->file_label, sizeof(ctx->file_label), header.file_label, sizeof(header.file_label), ctx->converter)) != READSTAT_OK) goto cleanup; if (parser->metadata_handler(ctx->file_label, ctx->timestamp, 2, ctx->user_ctx)) { retval = READSTAT_ERROR_USER_ABORT; goto cleanup; } } sav_parse_variable_display_parameter_record(ctx); if ((retval = sav_handle_variables(parser, ctx)) != READSTAT_OK) goto cleanup; if ((retval = sav_handle_fweight(parser, ctx)) != READSTAT_OK) goto cleanup; if (ctx->value_handler) { retval = sav_read_data(ctx); } cleanup: io->close(io->io_ctx); if (ctx) sav_ctx_free(ctx); return retval; }
static readstat_error_t dta_handle_rows(dta_ctx_t *ctx) { readstat_io_t *io = ctx->io; char *buf = NULL; char str_buf[2048]; int i; readstat_error_t retval = READSTAT_OK; if ((buf = malloc(ctx->record_len)) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } for (i=0; i<ctx->row_limit; i++) { if (io->read(buf, ctx->record_len, io->io_ctx) != ctx->record_len) { retval = READSTAT_ERROR_READ; goto cleanup; } int j; off_t offset = 0; for (j=0; j<ctx->nvar; j++) { size_t max_len; readstat_value_t value; memset(&value, 0, sizeof(readstat_value_t)); value.type = dta_type_info(ctx->typlist[j], &max_len, ctx); if (value.type == READSTAT_TYPE_STRING) { readstat_convert(str_buf, sizeof(str_buf), &buf[offset], max_len, ctx->converter); value.v.string_value = str_buf; } else if (value.type == READSTAT_TYPE_STRING_REF) { dta_strl_t key; dta_interpret_strl_vo_bytes(ctx, (unsigned char *)&buf[offset], &key); dta_strl_t **found = bsearch(&key, ctx->strls, ctx->strls_count, sizeof(dta_strl_t *), &dta_compare_strls); if (found) { value.v.string_value = (*found)->data; } value.type = READSTAT_TYPE_STRING; } else if (value.type == READSTAT_TYPE_INT8) { int8_t byte = buf[offset]; if (ctx->machine_is_twos_complement) { byte = ones_to_twos_complement1(byte); } if (byte > ctx->max_int8) { if (ctx->supports_tagged_missing && byte > DTA_113_MISSING_INT8) { value.tag = 'a' + (byte - DTA_113_MISSING_INT8_A); value.is_tagged_missing = 1; } else { value.is_system_missing = 1; } } value.v.i8_value = byte; } else if (value.type == READSTAT_TYPE_INT16) { int16_t num = *((int16_t *)&buf[offset]); if (ctx->machine_needs_byte_swap) { num = byteswap2(num); } if (ctx->machine_is_twos_complement) { num = ones_to_twos_complement2(num); } if (num > ctx->max_int16) { if (ctx->supports_tagged_missing && num > DTA_113_MISSING_INT16) { value.tag = 'a' + (num - DTA_113_MISSING_INT16_A); value.is_tagged_missing = 1; } else { value.is_system_missing = 1; } } value.v.i16_value = num; } else if (value.type == READSTAT_TYPE_INT32) { int32_t num = *((int32_t *)&buf[offset]); if (ctx->machine_needs_byte_swap) { num = byteswap4(num); } if (ctx->machine_is_twos_complement) { num = ones_to_twos_complement4(num); } if (num > ctx->max_int32) { if (ctx->supports_tagged_missing && num > DTA_113_MISSING_INT32) { value.tag = 'a' + (num - DTA_113_MISSING_INT32_A); value.is_tagged_missing = 1; } else { value.is_system_missing = 1; } } value.v.i32_value = num; } else if (value.type == READSTAT_TYPE_FLOAT) { int32_t num = *((int32_t *)&buf[offset]); float f_num = NAN; if (ctx->machine_needs_byte_swap) { num = byteswap4(num); } if (num > ctx->max_float) { if (ctx->supports_tagged_missing && num > DTA_113_MISSING_FLOAT) { value.tag = 'a' + ((num - DTA_113_MISSING_FLOAT_A) >> 11); value.is_tagged_missing = 1; } else { value.is_system_missing = 1; } } else {
static readstat_error_t sav_read_variable_record(sav_ctx_t *ctx) { readstat_io_t *io = ctx->io; sav_variable_record_t variable; readstat_error_t retval = READSTAT_OK; if (ctx->var_index == ctx->varinfo_capacity) { if ((ctx->varinfo = realloc(ctx->varinfo, (ctx->varinfo_capacity *= 2) * sizeof(spss_varinfo_t))) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } } if (io->read(&variable, sizeof(sav_variable_record_t), io->io_ctx) < sizeof(sav_variable_record_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } variable.print = ctx->bswap ? byteswap4(variable.print) : variable.print; variable.write = ctx->bswap ? byteswap4(variable.write) : variable.write; readstat_type_t dta_type = READSTAT_TYPE_DOUBLE; int32_t type = ctx->bswap ? byteswap4(variable.type) : variable.type; int i; if (type < 0) { if (ctx->var_index == 0) { return READSTAT_ERROR_PARSE; } ctx->var_offset++; spss_varinfo_t *prev = &ctx->varinfo[ctx->var_index-1]; prev->width++; return 0; } if (type > 0) { dta_type = READSTAT_TYPE_STRING; // len = type; } spss_varinfo_t *info = &ctx->varinfo[ctx->var_index]; memset(info, 0, sizeof(spss_varinfo_t)); info->width = 1; info->n_segments = 1; info->index = ctx->var_index; info->offset = ctx->var_offset; info->type = dta_type; retval = readstat_convert(info->name, sizeof(info->name), variable.name, sizeof(variable.name), ctx->converter); if (retval != READSTAT_OK) goto cleanup; retval = readstat_convert(info->longname, sizeof(info->longname), variable.name, sizeof(variable.name), ctx->converter); if (retval != READSTAT_OK) goto cleanup; info->print_format.decimal_places = (variable.print & 0x000000FF); info->print_format.width = (variable.print & 0x0000FF00) >> 8; info->print_format.type = (variable.print & 0x00FF0000) >> 16; info->write_format.decimal_places = (variable.write & 0x000000FF); info->write_format.width = (variable.write & 0x0000FF00) >> 8; info->write_format.type = (variable.write & 0x00FF0000) >> 16; if (variable.has_var_label) { int32_t label_len; if (io->read(&label_len, sizeof(int32_t), io->io_ctx) < sizeof(int32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } label_len = ctx->bswap ? byteswap4(label_len) : label_len; int32_t label_capacity = (label_len + 3) / 4 * 4; char *label_buf = malloc(label_capacity); size_t out_label_len = label_len*4+1; info->label = malloc(out_label_len); if (label_buf == NULL || info->label == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } if (io->read(label_buf, label_capacity, io->io_ctx) < label_capacity) { retval = READSTAT_ERROR_READ; free(label_buf); free(info->label); info->label = NULL; goto cleanup; } retval = readstat_convert(info->label, out_label_len, label_buf, label_len, ctx->converter); free(label_buf); if (retval != READSTAT_OK) goto cleanup; } ctx->varinfo[ctx->var_index].labels_index = -1; if (variable.n_missing_values) { info->n_missing_values = ctx->bswap ? byteswap4(variable.n_missing_values) : variable.n_missing_values; if (info->n_missing_values < 0) { info->missing_range = 1; info->n_missing_values = abs(info->n_missing_values); } else { info->missing_range = 0; } if (info->n_missing_values > 3) { retval = READSTAT_ERROR_PARSE; goto cleanup; } if (io->read(info->missing_values, info->n_missing_values * sizeof(double), io->io_ctx) < info->n_missing_values * sizeof(double)) { retval = READSTAT_ERROR_READ; goto cleanup; } for (i=0; i<info->n_missing_values; i++) { if (ctx->bswap) { info->missing_values[i] = byteswap_double(info->missing_values[i]); } uint64_t long_value = 0; memcpy(&long_value, &info->missing_values[i], 8); if (long_value == ctx->missing_double) info->missing_values[i] = NAN; if (long_value == ctx->lowest_double) info->missing_values[i] = -HUGE_VAL; if (long_value == ctx->highest_double) info->missing_values[i] = HUGE_VAL; } } ctx->var_index++; ctx->var_offset++; cleanup: return retval; }
static readstat_error_t sav_parse_long_value_labels_record(const void *data, size_t data_len, sav_ctx_t *ctx) { if (!ctx->value_label_handler) return READSTAT_OK; readstat_error_t retval = READSTAT_OK; int32_t label_name_len = 0; int32_t label_count = 0; int32_t i = 0; const char *data_ptr = data; const char *data_end = data_ptr + data_len; char var_name_buf[256*4+1]; char label_name_buf[256]; char *value_buffer = NULL; char *label_buffer = NULL; memset(label_name_buf, '\0', sizeof(label_name_buf)); if (data_ptr + sizeof(int32_t) > data_end) { retval = READSTAT_ERROR_PARSE; goto cleanup; } memcpy(&label_name_len, data_ptr, sizeof(int32_t)); if (ctx->bswap) label_name_len = byteswap4(label_name_len); data_ptr += sizeof(int32_t); if (data_ptr + label_name_len > data_end) { retval = READSTAT_ERROR_PARSE; goto cleanup; } retval = readstat_convert(var_name_buf, sizeof(var_name_buf), data_ptr, label_name_len, ctx->converter); if (retval != READSTAT_OK) goto cleanup; data_ptr += label_name_len; for (i=0; i<ctx->var_index;) { spss_varinfo_t *info = &ctx->varinfo[i]; if (strcmp(var_name_buf, info->longname) == 0) { info->labels_index = ctx->value_labels_count++; snprintf(label_name_buf, sizeof(label_name_buf), SAV_LABEL_NAME_PREFIX "%d", info->labels_index); break; } i += info->n_segments; } if (label_name_buf[0] == '\0') { retval = READSTAT_ERROR_PARSE; goto cleanup; } data_ptr += sizeof(int32_t); if (data_ptr + sizeof(int32_t) > data_end) { retval = READSTAT_ERROR_PARSE; goto cleanup; } memcpy(&label_count, data_ptr, sizeof(int32_t)); if (ctx->bswap) label_count = byteswap4(label_count); data_ptr += sizeof(int32_t); for (i=0; i<label_count; i++) { int32_t value_len = 0, label_len = 0; int32_t value_buffer_len = 0, label_buffer_len = 0; if (data_ptr + sizeof(int32_t) > data_end) { retval = READSTAT_ERROR_PARSE; goto cleanup; } memcpy(&value_len, data_ptr, sizeof(int32_t)); if (ctx->bswap) value_len = byteswap4(value_len); data_ptr += sizeof(int32_t); value_buffer_len = value_len*4+1; value_buffer = realloc(value_buffer, value_buffer_len); if (value_buffer == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } if (data_ptr + value_len > data_end) { retval = READSTAT_ERROR_PARSE; goto cleanup; } retval = readstat_convert(value_buffer, value_buffer_len, data_ptr, value_len, ctx->converter); if (retval != READSTAT_OK) goto cleanup; data_ptr += value_len; if (data_ptr + sizeof(int32_t) > data_end) { retval = READSTAT_ERROR_PARSE; goto cleanup; } memcpy(&label_len, data_ptr, sizeof(int32_t)); if (ctx->bswap) label_len = byteswap4(label_len); data_ptr += sizeof(int32_t); label_buffer_len = label_len*4+1; label_buffer = realloc(label_buffer, label_buffer_len); if (label_buffer == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } if (data_ptr + label_len > data_end) { retval = READSTAT_ERROR_PARSE; goto cleanup; } retval = readstat_convert(label_buffer, label_buffer_len, data_ptr, label_len, ctx->converter); if (retval != READSTAT_OK) goto cleanup; data_ptr += label_len; readstat_value_t value = { .type = READSTAT_TYPE_STRING }; value.v.string_value = value_buffer; ctx->value_label_handler(label_name_buf, value, label_buffer, ctx->user_ctx); } cleanup: if (value_buffer) free(value_buffer); if (label_buffer) free(label_buffer); return retval; }
static readstat_error_t sav_read_value_label_record(sav_ctx_t *ctx) { int32_t label_count; readstat_error_t retval = READSTAT_OK; readstat_io_t *io = ctx->io; int32_t *vars = NULL; int32_t rec_type; int32_t var_count; readstat_type_t value_type = READSTAT_TYPE_STRING; char label_buf[256]; value_label_t *value_labels = NULL; if (io->read(&label_count, sizeof(int32_t), io->io_ctx) < sizeof(int32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (ctx->bswap) label_count = byteswap4(label_count); if ((value_labels = malloc(label_count * sizeof(value_label_t))) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } int i; for (i=0; i<label_count; i++) { value_label_t *vlabel = &value_labels[i]; if (io->read(vlabel, 9, io->io_ctx) < 9) { retval = READSTAT_ERROR_READ; goto cleanup; } size_t label_len = (vlabel->label_len + 8) / 8 * 8 - 1; if (io->read(label_buf, label_len, io->io_ctx) < label_len) { retval = READSTAT_ERROR_READ; goto cleanup; } retval = readstat_convert(vlabel->label, sizeof(vlabel->label), label_buf, label_len, ctx->converter); if (retval != READSTAT_OK) goto cleanup; } if (io->read(&rec_type, sizeof(int32_t), io->io_ctx) < sizeof(int32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (ctx->bswap) rec_type = byteswap4(rec_type); if (rec_type != 4) { retval = READSTAT_ERROR_PARSE; goto cleanup; } if (io->read(&var_count, sizeof(int32_t), io->io_ctx) < sizeof(int32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (ctx->bswap) var_count = byteswap4(var_count); if ((vars = malloc(var_count * sizeof(int32_t))) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } if (io->read(vars, var_count * sizeof(int32_t), io->io_ctx) < var_count * sizeof(int32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } for (i=0; i<var_count; i++) { int var_offset = vars[i]; if (ctx->bswap) var_offset = byteswap4(var_offset); var_offset--; // Why subtract 1???? spss_varinfo_t *var = bsearch(&var_offset, ctx->varinfo, ctx->var_index, sizeof(spss_varinfo_t), &spss_varinfo_compare); if (var) { var->labels_index = ctx->value_labels_count; value_type = var->type; } } if (ctx->value_label_handler) { sav_submit_value_labels(value_labels, label_count, value_type, ctx); } ctx->value_labels_count++; cleanup: if (vars) free(vars); if (value_labels) free(value_labels); return retval; }