static void dta_interpret_strl_vo_bytes(dta_ctx_t *ctx, unsigned char *vo_bytes, dta_strl_t *strl) { int file_is_big_endian = (!machine_is_little_endian() ^ ctx->machine_needs_byte_swap); if (ctx->strl_v_len == 2) { if (file_is_big_endian) { strl->v = (vo_bytes[0] << 8) + vo_bytes[1]; strl->o = (((uint64_t)vo_bytes[2] << 40) + ((uint64_t)vo_bytes[3] << 32) + (vo_bytes[4] << 24) + (vo_bytes[5] << 16) + (vo_bytes[6] << 8) + vo_bytes[7]); } else { strl->v = vo_bytes[0] + (vo_bytes[1] << 8); strl->o = (vo_bytes[2] + (vo_bytes[3] << 8) + (vo_bytes[4] << 16) + (vo_bytes[5] << 24) + ((uint64_t)vo_bytes[6] << 32) + ((uint64_t)vo_bytes[7] << 40)); } } else if (ctx->strl_v_len == 4) { uint32_t v, o; memcpy(&v, &vo_bytes[0], sizeof(uint32_t)); memcpy(&o, &vo_bytes[4], sizeof(uint32_t)); strl->v = ctx->machine_needs_byte_swap ? byteswap4(v) : v; strl->o = ctx->machine_needs_byte_swap ? byteswap4(o) : o; } }
sav_ctx_t *sav_ctx_init(sav_file_header_record_t *header, readstat_io_t *io) { sav_ctx_t *ctx = NULL; if ((ctx = malloc(sizeof(sav_ctx_t))) == NULL) { return NULL; } memset(ctx, 0, sizeof(sav_ctx_t)); ctx->bswap = !(header->layout_code == 2 || header->layout_code == 3); ctx->data_is_compressed = (header->compressed != 0); ctx->record_count = ctx->bswap ? byteswap4(header->ncases) : header->ncases; ctx->fweight_index = ctx->bswap ? byteswap4(header->weight_index) : header->weight_index; ctx->missing_double = SAV_MISSING_DOUBLE; ctx->lowest_double = SAV_LOWEST_DOUBLE; ctx->highest_double = SAV_HIGHEST_DOUBLE; double bias = ctx->bswap ? byteswap_double(header->bias) : header->bias; if (bias != 100.0) { sav_ctx_free(ctx); return NULL; } ctx->varinfo_capacity = SAV_VARINFO_INITIAL_CAPACITY; if ((ctx->varinfo = calloc(ctx->varinfo_capacity, sizeof(spss_varinfo_t))) == NULL) { sav_ctx_free(ctx); return NULL; } ctx->io = io; return ctx; }
static readstat_error_t sav_skip_variable_record(sav_ctx_t *ctx) { sav_variable_record_t variable; readstat_error_t retval = READSTAT_OK; readstat_io_t *io = ctx->io; if (io->read(&variable, sizeof(sav_variable_record_t), io->io_ctx) < sizeof(sav_variable_record_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (variable.has_var_label) { int32_t label_len; if (io->read(&label_len, sizeof(int32_t), io->io_ctx) < sizeof(int32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } label_len = ctx->bswap ? byteswap4(label_len) : label_len; int32_t label_capacity = (label_len + 3) / 4 * 4; if (io->seek(label_capacity, READSTAT_SEEK_CUR, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } } if (variable.n_missing_values) { int n_missing_values = ctx->bswap ? byteswap4(variable.n_missing_values) : variable.n_missing_values; if (io->seek(abs(n_missing_values) * sizeof(double), READSTAT_SEEK_CUR, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } } cleanup: return retval; }
static readstat_error_t read_sexptype_header(rdata_sexptype_info_t *header_info, rdata_ctx_t *ctx) { uint32_t sexptype; rdata_sexptype_header_t header; readstat_error_t retval = READSTAT_OK; if (read_st(ctx, &sexptype, sizeof(sexptype)) != sizeof(sexptype)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (ctx->machine_needs_byteswap) sexptype = byteswap4(sexptype); memcpy(&header, &sexptype, sizeof(sexptype)); uint32_t attributes = 0, tag = 0, ref = 0; if (header.type == RDATA_SEXPTYPE_PAIRLIST) { if (header.attributes) { if (read_st(ctx, &attributes, sizeof(attributes)) != sizeof(attributes)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (ctx->machine_needs_byteswap) header_info->attributes = byteswap4(header_info->attributes); } if (header.tag) { if (read_st(ctx, &tag, sizeof(tag)) != sizeof(tag)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (ctx->machine_needs_byteswap) tag = byteswap4(tag); } if (tag == 1) { rdata_sexptype_info_t key_info; if ((retval = read_sexptype_header(&key_info, ctx)) != READSTAT_OK) goto cleanup; if (key_info.header.type != RDATA_SEXPTYPE_CHARACTER_STRING) { retval = READSTAT_ERROR_PARSE; goto cleanup; } char key[RDATA_ATOM_LEN]; if ((retval = read_character_string(key, RDATA_ATOM_LEN, ctx)) != READSTAT_OK) goto cleanup; ref = atom_table_add(ctx->atom_table, key); } else if ((tag & 0xFF) == RDATA_PSEUDO_SXP_REF) { ref = (tag >> 8); } }
static readstat_error_t sav_skip_value_label_record(sav_ctx_t *ctx) { int32_t label_count; int32_t rec_type; int32_t var_count; readstat_error_t retval = READSTAT_OK; readstat_io_t *io = ctx->io; if (io->read(&label_count, sizeof(int32_t), io->io_ctx) < sizeof(int32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (ctx->bswap) label_count = byteswap4(label_count); int i; for (i=0; i<label_count; i++) { value_label_t vlabel; if (io->read(&vlabel, 9, io->io_ctx) < 9) { retval = READSTAT_ERROR_READ; goto cleanup; } size_t label_len = (vlabel.label_len + 8) / 8 * 8 - 1; if (io->seek(label_len, READSTAT_SEEK_CUR, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } } if (io->read(&rec_type, sizeof(int32_t), io->io_ctx) < sizeof(int32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (ctx->bswap) rec_type = byteswap4(rec_type); if (rec_type != 4) { retval = READSTAT_ERROR_PARSE; goto cleanup; } if (io->read(&var_count, sizeof(int32_t), io->io_ctx) < sizeof(int32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (ctx->bswap) var_count = byteswap4(var_count); if (io->seek(var_count * sizeof(int32_t), READSTAT_SEEK_CUR, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } cleanup: return retval; }
static readstat_error_t read_character_string(char *key, size_t keylen, rdata_ctx_t *ctx) { uint32_t length; readstat_error_t retval = READSTAT_OK; if (read_st(ctx, &length, sizeof(length)) != sizeof(length)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (ctx->machine_needs_byteswap) length = byteswap4(length); if (length == -1) { key[0] = '\0'; return 0; } if (length + 1 > keylen) { retval = READSTAT_ERROR_PARSE; goto cleanup; } if (read_st(ctx, key, length) != length) { retval = READSTAT_ERROR_READ; goto cleanup; } key[length] = '\0'; cleanup: return retval; }
static readstat_error_t dta_skip_expansion_fields(int fd, dta_ctx_t *ctx) { if (ctx->file_is_xmlish) { if (readstat_lseek(fd, ctx->data_offset, SEEK_SET) == -1) return READSTAT_ERROR_SEEK; return READSTAT_OK; } if (ctx->expansion_len_len == 0) return READSTAT_OK; while (1) { size_t len; char data_type; if (ctx->expansion_len_len == 2) { dta_short_expansion_field_t expansion_field; if (read(fd, &expansion_field, sizeof(expansion_field)) != sizeof(expansion_field)) return READSTAT_ERROR_READ; if (ctx->machine_needs_byte_swap) { len = byteswap2(expansion_field.len); } else { len = expansion_field.len; } data_type = expansion_field.data_type; } else { dta_expansion_field_t expansion_field; if (read(fd, &expansion_field, sizeof(expansion_field)) != sizeof(expansion_field)) return READSTAT_ERROR_READ; if (ctx->machine_needs_byte_swap) { len = byteswap4(expansion_field.len); } else { len = expansion_field.len; } data_type = expansion_field.data_type; } if (data_type == 0 && len == 0) return READSTAT_OK; if (data_type != 1) return READSTAT_ERROR_PARSE; if (readstat_lseek(fd, len, SEEK_CUR) == -1) return READSTAT_ERROR_SEEK; } return READSTAT_ERROR_PARSE; }
/* We don't yet know how many real variables there are, so store the values in the record * and make sense of them later. */ static readstat_error_t sav_store_variable_display_parameter_record(const void *data, int count, sav_ctx_t *ctx) { const int32_t *data_ptr = data; int i; ctx->variable_display_values = realloc(ctx->variable_display_values, count * sizeof(int32_t)); if (ctx->variable_display_values == NULL) return READSTAT_ERROR_MALLOC; ctx->variable_display_values_count = count; for (i=0; i<count; i++) { ctx->variable_display_values[i] = ctx->bswap ? byteswap4(data_ptr[i]) : data_ptr[i]; } return READSTAT_OK; }
/* {{{ void byteswapbuffer(void *pData, unsigned int uElements, unsigned int uElementSize) */ void byteswapbuffer(void *pData, unsigned int uElements, unsigned int uElementSize) { unsigned char *pbyData = (unsigned char *)pData; unsigned int cuElements = uElements; while(cuElements > 0) { if(uElementSize == 2) byteswap2(pbyData); else if(uElementSize == 4) byteswap4(pbyData); else return; pbyData += uElementSize; cuElements--; } }
static readstat_error_t dta_read_strl(dta_ctx_t *ctx, dta_strl_t *strl) { readstat_error_t retval = READSTAT_OK; readstat_io_t *io = ctx->io; dta_strl_header_t header; if (io->read(&header, sizeof(header), io->io_ctx) != sizeof(dta_strl_header_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } dta_interpret_strl_vo_bytes(ctx, header.vo_bytes, strl); strl->type = header.type; strl->len = ctx->machine_needs_byte_swap ? byteswap4(header.len) : header.len; cleanup: return retval; }
static readstat_error_t sav_skip_document_record(sav_ctx_t *ctx) { int32_t n_lines; readstat_error_t retval = READSTAT_OK; readstat_io_t *io = ctx->io; if (io->read(&n_lines, sizeof(int32_t), io->io_ctx) < sizeof(int32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (ctx->bswap) n_lines = byteswap4(n_lines); if (io->seek(n_lines * SPSS_DOC_LINE_SIZE, READSTAT_SEEK_CUR, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } cleanup: return retval; }
void xport_namestr_bswap(xport_namestr_t *namestr) { if (!machine_is_little_endian()) return; namestr->ntype = byteswap2(namestr->ntype); namestr->nhfun = byteswap2(namestr->nhfun); namestr->nlng = byteswap2(namestr->nlng); namestr->nvar0 = byteswap2(namestr->nlng); namestr->nfl = byteswap2(namestr->nfl); namestr->nfd = byteswap2(namestr->nfd); namestr->nfj = byteswap2(namestr->nfj); namestr->nifl = byteswap2(namestr->nifl); namestr->nifd = byteswap2(namestr->nifd); namestr->npos = byteswap4(namestr->npos); namestr->labeln = byteswap2(namestr->labeln); }
static readstat_error_t read_length(int32_t *outLength, rdata_ctx_t *ctx) { int32_t length; readstat_error_t retval = READSTAT_OK; if (read_st(ctx, &length, sizeof(length)) != sizeof(length)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (ctx->machine_needs_byteswap) length = byteswap4(length); if (outLength) *outLength = length; cleanup: return retval; }
/* Function to read header structure from file */ void rdat(int fd,HEADER *head_data) /* jwb 11/05/95 */ /* fd is the file descriptor */ { int nobytes; /* jwb 11/05/95 */ if((nobytes = lseek(fd, 0L, 2)) < 10*sizeof(int)) /* jwb 09/24/96 */ { P("Analysis file header is invalid. Abort\n"); /* jwb 09/24/96 */ fflush(stdout); /* jwb 11/05/95 */ exit(1); /* jwb 11/05/95 */ } lseek(fd, 0L, 0); /* jwb 11/05/95 */ head_data->performer= gstring(fd); head_data->instrument= gstring(fd); head_data->date= gstring(fd); head_data->pitch= gstring(fd); head_data->dyn= gstring(fd); head_data->vibra= gstring(fd); head_data->part= gstring(fd); head_data->type= gstring(fd); head_data->comments= gstring(fd); head_data->andate= gstring(fd); read(fd,&head_data->interpval,sizeof(float)); read(fd,&head_data->sr,sizeof(float)); read(fd,&head_data->tl,sizeof(float)); read(fd,&head_data->smax,sizeof(float)); read(fd,&head_data->fa,sizeof(float)); read(fd,&head_data->dt,sizeof(float)); read(fd,&head_data->fftlen,sizeof(int)); read(fd,&head_data->nhar,sizeof(int)); read(fd,&head_data->nchans,sizeof(int)); if( read(fd,&head_data->npts,sizeof(int)) <=0 ) { P("File read error in rdat()\n"); exit(1); } /* #ifdef __alpha */ /* jwb 8/12/94 */ /* swap the int & float bytes after reading them in */ data = (int*)&(head_data->interpval); /* jwb 8/12/94 */ for(i=0;i<10;i++) byteswap4(data+i); /* jwb 8/12/94 */ /* #endif */ } /* end rdat() */
static readstat_error_t sav_parse_machine_integer_info_record(const void *data, size_t data_len, sav_ctx_t *ctx) { if (data_len != 32) return READSTAT_ERROR_PARSE; const char *src_charset = NULL; const char *dst_charset = ctx->output_encoding; sav_machine_integer_info_record_t record; memcpy(&record, data, data_len); if (ctx->bswap) { record.character_code = byteswap4(record.character_code); } if (ctx->input_encoding) { src_charset = ctx->input_encoding; } else if (record.character_code == SAV_CHARSET_UTF8) { /* do nothing */ } else { int i; for (i=0; i<sizeof(_charset_table)/sizeof(_charset_table[0]); i++) { if (record.character_code == _charset_table[i].code) { src_charset = _charset_table[i].name; break; } } if (src_charset == NULL) { if (ctx->error_handler) { char error_buf[1024]; snprintf(error_buf, sizeof(error_buf), "Unsupported character set: %d\n", record.character_code); ctx->error_handler(error_buf, ctx->user_ctx); } return READSTAT_ERROR_UNSUPPORTED_CHARSET; } } if (src_charset && dst_charset && strcmp(src_charset, dst_charset) != 0) { iconv_t converter = iconv_open(dst_charset, src_charset); if (converter == (iconv_t)-1) { return READSTAT_ERROR_UNSUPPORTED_CHARSET; } ctx->converter = converter; } return READSTAT_OK; }
static readstat_error_t sav_read_document_record(sav_ctx_t *ctx) { if (!ctx->note_handler) return sav_skip_document_record(ctx); int32_t n_lines; readstat_error_t retval = READSTAT_OK; readstat_io_t *io = ctx->io; if (io->read(&n_lines, sizeof(int32_t), io->io_ctx) < sizeof(int32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (ctx->bswap) n_lines = byteswap4(n_lines); char raw_buffer[SPSS_DOC_LINE_SIZE]; char utf8_buffer[4*SPSS_DOC_LINE_SIZE+1]; int i; for (i=0; i<n_lines; i++) { if (io->read(raw_buffer, SPSS_DOC_LINE_SIZE, io->io_ctx) < SPSS_DOC_LINE_SIZE) { retval = READSTAT_ERROR_READ; goto cleanup; } retval = readstat_convert(utf8_buffer, sizeof(utf8_buffer), raw_buffer, sizeof(raw_buffer), ctx->converter); if (retval != READSTAT_OK) goto cleanup; if (ctx->note_handler(i, utf8_buffer, ctx->user_ctx)) { retval = READSTAT_ERROR_USER_ABORT; goto cleanup; } } cleanup: return retval; }
static readstat_error_t read_value_vector(rdata_sexptype_header_t header, const char *name, rdata_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; int32_t length; size_t input_elem_size = 0; void *vals = NULL; size_t buf_len = 0; int output_data_type; int i; switch (header.type) { case RDATA_SEXPTYPE_REAL_VECTOR: input_elem_size = sizeof(double); output_data_type = READSTAT_TYPE_DOUBLE; break; case RDATA_SEXPTYPE_INTEGER_VECTOR: input_elem_size = sizeof(int32_t); output_data_type = READSTAT_TYPE_DOUBLE; break; case RDATA_SEXPTYPE_LOGICAL_VECTOR: input_elem_size = sizeof(int32_t); output_data_type = READSTAT_TYPE_DOUBLE; break; default: retval = READSTAT_ERROR_PARSE; break; } if (retval != READSTAT_OK) goto cleanup; if ((retval = read_length(&length, ctx)) != READSTAT_OK) goto cleanup; buf_len = length * input_elem_size; vals = malloc(buf_len); if (vals == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } if (read_st(ctx, vals, buf_len) != buf_len) { retval = READSTAT_ERROR_READ; goto cleanup; } if (ctx->machine_needs_byteswap) { if (input_elem_size == sizeof(double)) { double *d_vals = (double *)vals; for (i=0; i<buf_len/sizeof(double); i++) { d_vals[i] = byteswap_double(d_vals[i]); } } else { uint32_t *i_vals = (uint32_t *)vals; for (i=0; i<buf_len/sizeof(uint32_t); i++) { i_vals[i] = byteswap4(i_vals[i]); } } } ctx->class_is_posixct = 0; if (header.attributes) { if ((retval = read_attributes(&handle_vector_attribute, ctx)) != READSTAT_OK) goto cleanup; } if (ctx->column_handler) { if (header.type == RDATA_SEXPTYPE_LOGICAL_VECTOR || header.type == RDATA_SEXPTYPE_INTEGER_VECTOR) { double *real_vals = malloc(length * sizeof(double)); int32_t *i_vals = (int32_t *)vals; for (i=0; i<length; i++) { if (i_vals[i] == INT32_MIN) { real_vals[i] = NAN; } else { real_vals[i] = i_vals[i]; } } if (ctx->column_handler(name, output_data_type, NULL, real_vals, length, ctx->user_ctx)) { retval = READSTAT_ERROR_USER_ABORT; goto cleanup; } free(real_vals); } else { if (ctx->column_handler(name, output_data_type, ctx->class_is_posixct ? "%ts" : NULL, vals, length, ctx->user_ctx)) { retval = READSTAT_ERROR_USER_ABORT; goto cleanup; } } } cleanup: return retval; }
readstat_error_t sas_read_header(readstat_io_t *io, sas_header_info_t *hinfo, readstat_error_handler error_handler, void *user_ctx) { sas_header_start_t header_start; sas_header_end_t header_end; int retval = READSTAT_OK; char error_buf[1024]; struct tm epoch_tm = { .tm_year = 60, .tm_mday = 1 }; time_t epoch = mktime(&epoch_tm); if (io->read(&header_start, sizeof(sas_header_start_t), io->io_ctx) < sizeof(sas_header_start_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (memcmp(header_start.magic, sas7bdat_magic_number, sizeof(sas7bdat_magic_number)) != 0 && memcmp(header_start.magic, sas7bcat_magic_number, sizeof(sas7bcat_magic_number)) != 0) { retval = READSTAT_ERROR_PARSE; goto cleanup; } if (header_start.a1 == SAS_ALIGNMENT_OFFSET_4) { hinfo->pad1 = 4; } if (header_start.a2 == SAS_ALIGNMENT_OFFSET_4) { hinfo->u64 = 1; } int bswap = 0; if (header_start.endian == SAS_ENDIAN_BIG) { bswap = machine_is_little_endian(); hinfo->little_endian = 0; } else if (header_start.endian == SAS_ENDIAN_LITTLE) { bswap = !machine_is_little_endian(); hinfo->little_endian = 1; } else { retval = READSTAT_ERROR_PARSE; goto cleanup; } int i; for (i=0; i<sizeof(_charset_table)/sizeof(_charset_table[0]); i++) { if (header_start.encoding == _charset_table[i].code) { hinfo->encoding = _charset_table[i].name; break; } } if (hinfo->encoding == NULL) { if (error_handler) { snprintf(error_buf, sizeof(error_buf), "Unsupported character set code: %d\n", header_start.encoding); error_handler(error_buf, user_ctx); } retval = READSTAT_ERROR_UNSUPPORTED_CHARSET; goto cleanup; } memcpy(hinfo->file_label, header_start.file_label, sizeof(header_start.file_label)); if (io->seek(hinfo->pad1, READSTAT_SEEK_CUR, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } double creation_time, modification_time; if (io->read(&creation_time, sizeof(double), io->io_ctx) < sizeof(double)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (io->read(&modification_time, sizeof(double), io->io_ctx) < sizeof(double)) { retval = READSTAT_ERROR_READ; goto cleanup; } hinfo->creation_time = bswap ? byteswap_double(creation_time) + epoch : creation_time + epoch; hinfo->modification_time = bswap ? byteswap_double(creation_time) + epoch : creation_time + epoch; if (io->seek(16, READSTAT_SEEK_CUR, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } uint32_t header_size, page_size; if (io->read(&header_size, sizeof(uint32_t), io->io_ctx) < sizeof(uint32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (io->read(&page_size, sizeof(uint32_t), io->io_ctx) < sizeof(uint32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } hinfo->header_size = bswap ? byteswap4(header_size) : header_size; hinfo->page_size = bswap ? byteswap4(page_size) : page_size; if (hinfo->header_size < 1024) { retval = READSTAT_ERROR_PARSE; goto cleanup; } if (hinfo->u64) { uint64_t page_count; if (io->read(&page_count, sizeof(uint64_t), io->io_ctx) < sizeof(uint64_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } hinfo->page_count = bswap ? byteswap8(page_count) : page_count; } else { uint32_t page_count; if (io->read(&page_count, sizeof(uint32_t), io->io_ctx) < sizeof(uint32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } hinfo->page_count = bswap ? byteswap4(page_count) : page_count; } if (io->seek(8, READSTAT_SEEK_CUR, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; if (error_handler) { snprintf(error_buf, sizeof(error_buf), "ReadStat: Failed to seek forward by %d\n", 8); error_handler(error_buf, user_ctx); } goto cleanup; } if (io->read(&header_end, sizeof(sas_header_end_t), io->io_ctx) < sizeof(sas_header_end_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } int major, minor, revision; if (sscanf(header_end.release, "%1d.%04dM%1d", &major, &minor, &revision) == 3) { hinfo->major_version = major; hinfo->minor_version = minor; hinfo->revision = revision; } if (major == 9 && minor == 0 && revision == 0) { /* A bit of a hack, but most SAS installations are running a minor update */ hinfo->vendor = READSTAT_VENDOR_STAT_TRANSFER; } else { hinfo->vendor = READSTAT_VENDOR_SAS; } if (io->seek(hinfo->header_size, READSTAT_SEEK_SET, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; if (error_handler) { snprintf(error_buf, sizeof(error_buf), "ReadStat: Failed to seek to position %" PRId64 "\n", hinfo->header_size); error_handler(error_buf, user_ctx); } goto cleanup; } cleanup: return retval; }
static readstat_error_t sav_parse_records_pass1(sav_ctx_t *ctx) { char data_buf[4096]; readstat_error_t retval = READSTAT_OK; readstat_io_t *io = ctx->io; while (1) { int32_t rec_type; int32_t extra_info[3]; size_t data_len = 0; int i; int done = 0; if (io->read(&rec_type, sizeof(int32_t), io->io_ctx) < sizeof(int32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (ctx->bswap) { rec_type = byteswap4(rec_type); } switch (rec_type) { case SAV_RECORD_TYPE_VARIABLE: retval = sav_skip_variable_record(ctx); if (retval) goto cleanup; break; case SAV_RECORD_TYPE_VALUE_LABEL: retval = sav_skip_value_label_record(ctx); if (retval) goto cleanup; break; case SAV_RECORD_TYPE_DOCUMENT: retval = sav_skip_document_record(ctx); if (retval) goto cleanup; break; case SAV_RECORD_TYPE_DICT_TERMINATION: done = 1; break; case SAV_RECORD_TYPE_HAS_DATA: if (io->read(extra_info, sizeof(extra_info), io->io_ctx) < sizeof(extra_info)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (ctx->bswap) { for (i=0; i<3; i++) extra_info[i] = byteswap4(extra_info[i]); } int subtype = extra_info[0]; int size = extra_info[1]; int count = extra_info[2]; data_len = size * count; if (subtype == SAV_RECORD_SUBTYPE_INTEGER_INFO) { if (data_len > sizeof(data_buf)) { retval = READSTAT_ERROR_PARSE; goto cleanup; } if (io->read(data_buf, data_len, io->io_ctx) < data_len) { retval = READSTAT_ERROR_PARSE; goto cleanup; } retval = sav_parse_machine_integer_info_record(data_buf, data_len, ctx); if (retval != READSTAT_OK) goto cleanup; } else { if (io->seek(data_len, READSTAT_SEEK_CUR, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } } break; default: retval = READSTAT_ERROR_PARSE; goto cleanup; break; } if (done) break; } cleanup: return retval; }
readstat_error_t rdata_parse(rdata_parser_t *parser, const char *filename, void *user_ctx) { int is_rdata = 0; readstat_error_t retval = READSTAT_OK; rdata_v2_header_t v2_header; rdata_ctx_t *ctx = init_rdata_ctx(filename); if (ctx == NULL) { retval = READSTAT_ERROR_OPEN; goto cleanup; } ctx->user_ctx = user_ctx; ctx->table_handler = parser->table_handler; ctx->column_handler = parser->column_handler; ctx->column_name_handler = parser->column_name_handler; ctx->text_value_handler = parser->text_value_handler; ctx->value_label_handler = parser->value_label_handler; ctx->error_handler = parser->error_handler; if ((retval = init_stream(ctx)) != READSTAT_OK) { goto cleanup; } char header_line[5]; if (read_st(ctx, &header_line, sizeof(header_line)) != sizeof(header_line)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (strncmp("RDX2\n", header_line, sizeof(header_line)) == 0) { is_rdata = 1; } else { reset_stream(ctx); } if (read_st(ctx, &v2_header, sizeof(v2_header)) != sizeof(v2_header)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (ctx->machine_needs_byteswap) { v2_header.format_version = byteswap4(v2_header.format_version); v2_header.writer_version = byteswap4(v2_header.writer_version); v2_header.reader_version = byteswap4(v2_header.reader_version); } if (is_rdata) { retval = read_environment(NULL, ctx); } else { retval = read_toplevel_object(NULL, NULL, ctx); } if (retval != READSTAT_OK) goto cleanup; char test; if (read_st(ctx, &test, 1) == 1) { retval = READSTAT_ERROR_PARSE; goto cleanup; } cleanup: if (ctx) { free_rdata_ctx(ctx); } return retval; }
readstat_error_t readstat_parse_dta(readstat_parser_t *parser, const char *filename, void *user_ctx) { readstat_error_t retval = READSTAT_OK; int i; size_t record_len = 0; int fd = -1; char *buf = NULL; dta_header_t header; dta_ctx_t *ctx = NULL; char str_buf[2048]; char *long_string = NULL; size_t file_size = 0; if ((fd = readstat_open(filename)) == -1) { retval = READSTAT_ERROR_OPEN; goto cleanup; } char magic[4]; if (read(fd, magic, 4) != 4) { retval = READSTAT_ERROR_READ; goto cleanup; } file_size = readstat_lseek(fd, 0, SEEK_END); if (file_size == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } if (readstat_lseek(fd, 0, SEEK_SET) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } if (strncmp(magic, "<sta", 4) == 0) { retval = dta_read_xmlish_preamble(fd, ctx, &header); } else { if (read(fd, &header, sizeof(header)) != sizeof(header)) { retval = READSTAT_ERROR_READ; goto cleanup; } } if ((ctx = dta_ctx_init(header.nvar, header.nobs, header.byteorder, header.ds_format)) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } ctx->user_ctx = user_ctx; ctx->file_size = file_size; ctx->progress_handler = parser->progress_handler; retval = dta_update_progress(fd, ctx); if (retval != READSTAT_OK) goto cleanup; if (parser->info_handler) { if (parser->info_handler(ctx->nobs, ctx->nvar, user_ctx)) { retval = READSTAT_ERROR_USER_ABORT; goto cleanup; } } if (ctx->file_is_xmlish) { uint16_t label_len = 0; unsigned char timestamp_len; if ((retval = dta_read_tag(fd, ctx, "<label>")) != READSTAT_OK) { goto cleanup; } if (ctx->data_label_len_len == 2) { if (read(fd, &label_len, sizeof(uint16_t)) != sizeof(uint16_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } label_len = ctx->machine_needs_byte_swap ? byteswap2(label_len) : label_len; } else if (ctx->data_label_len_len == 1) { unsigned char label_len_char; if (read(fd, &label_len_char, sizeof(unsigned char)) != sizeof(unsigned char)) { retval = READSTAT_ERROR_READ; goto cleanup; } label_len = label_len_char; } if (readstat_lseek(fd, label_len, SEEK_CUR) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } if ((retval = dta_read_tag(fd, ctx, "</label>")) != READSTAT_OK) { goto cleanup; } if ((retval = dta_read_tag(fd, ctx, "<timestamp>")) != READSTAT_OK) { goto cleanup; } if (read(fd, ×tamp_len, 1) != 1) { retval = READSTAT_ERROR_READ; goto cleanup; } if (readstat_lseek(fd, timestamp_len, SEEK_CUR) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } if ((retval = dta_read_tag(fd, ctx, "</timestamp>")) != READSTAT_OK) { goto cleanup; } } else { if (readstat_lseek(fd, ctx->data_label_len, SEEK_CUR) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } if (ctx->time_stamp_len) { if (readstat_lseek(fd, ctx->time_stamp_len, SEEK_CUR) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } } } if ((retval = dta_read_tag(fd, ctx, "</header>")) != READSTAT_OK) { goto cleanup; } if (dta_read_map(fd, ctx) != READSTAT_OK) { retval = READSTAT_ERROR_READ; goto cleanup; } if (dta_read_descriptors(fd, ctx) != READSTAT_OK) { retval = READSTAT_ERROR_READ; goto cleanup; } for (i=0; i<ctx->nvar; i++) { size_t max_len; readstat_types_t type = dta_type_info(ctx->typlist[i], &max_len, ctx); record_len += max_len; if (type == READSTAT_TYPE_STRING) max_len++; /* might append NULL */ if (parser->variable_handler) { readstat_variable_t *variable = dta_init_variable(ctx, i, type); const char *value_labels = NULL; if (ctx->lbllist[ctx->lbllist_entry_len*i]) value_labels = &ctx->lbllist[ctx->lbllist_entry_len*i]; int cb_retval = parser->variable_handler(i, variable, value_labels, user_ctx); free(variable); if (cb_retval) { retval = READSTAT_ERROR_USER_ABORT; goto cleanup; } } } if ((retval = dta_skip_expansion_fields(fd, ctx)) != READSTAT_OK) { goto cleanup; } if (record_len == 0) { retval = READSTAT_ERROR_PARSE; goto cleanup; } if ((retval = dta_read_tag(fd, ctx, "<data>")) != READSTAT_OK) { goto cleanup; } if ((retval = dta_update_progress(fd, ctx)) != READSTAT_OK) { goto cleanup; } if ((buf = malloc(record_len)) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } for (i=0; i<ctx->nobs; i++) { if (read(fd, buf, record_len) != record_len) { retval = READSTAT_ERROR_READ; goto cleanup; } int j; off_t offset = 0; for (j=0; j<ctx->nvar; j++) { size_t max_len; readstat_value_t value; memset(&value, 0, sizeof(readstat_value_t)); value.type = dta_type_info(ctx->typlist[j], &max_len, ctx); if (value.type == READSTAT_TYPE_STRING) { readstat_convert(str_buf, sizeof(str_buf), &buf[offset], max_len, ctx->converter); value.v.string_value = str_buf; } else if (value.type == READSTAT_TYPE_LONG_STRING) { uint32_t v, o; v = *((uint32_t *)&buf[offset]); o = *((uint32_t *)&buf[offset+4]); if (ctx->machine_needs_byte_swap) { v = byteswap4(v); o = byteswap4(o); } if (v > 0 && o > 0) { off_t cur_pos = readstat_lseek(fd, 0, SEEK_CUR); if (cur_pos == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } retval = dta_read_long_string(fd, ctx, v, o, &long_string); if (retval != READSTAT_OK) { goto cleanup; } value.v.string_value = long_string; if (readstat_lseek(fd, cur_pos, SEEK_SET) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } } } else if (value.type == READSTAT_TYPE_CHAR) { char byte = buf[offset]; if (ctx->machine_is_twos_complement) { byte = ones_to_twos_complement1(byte); } if (byte > DTA_MAX_CHAR) { value.is_system_missing = 1; if (byte > DTA_MISSING_CHAR) { value.tag = 'a' + (byte - DTA_MISSING_CHAR_A); } } value.v.char_value = byte; } else if (value.type == READSTAT_TYPE_INT16) { int16_t num = *((int16_t *)&buf[offset]); if (ctx->machine_needs_byte_swap) { num = byteswap2(num); } if (ctx->machine_is_twos_complement) { num = ones_to_twos_complement2(num); } if (num > DTA_MAX_INT16) { value.is_system_missing = 1; if (num > DTA_MISSING_INT16) { value.tag = 'a' + (num - DTA_MISSING_INT16_A); } } value.v.i16_value = num; } else if (value.type == READSTAT_TYPE_INT32) { int32_t num = *((int32_t *)&buf[offset]); if (ctx->machine_needs_byte_swap) { num = byteswap4(num); } if (ctx->machine_is_twos_complement) { num = ones_to_twos_complement4(num); } if (num > DTA_MAX_INT32) { value.is_system_missing = 1; if (num > DTA_MISSING_INT32) { value.tag = 'a' + (num - DTA_MISSING_INT32_A); } } value.v.i32_value = num; } else if (value.type == READSTAT_TYPE_FLOAT) { uint32_t num = *((uint32_t *)&buf[offset]); float f_num = NAN; if (ctx->machine_needs_byte_swap) { num = byteswap4(num); } if (num > DTA_MAX_FLOAT) { value.is_system_missing = 1; if (num > DTA_MISSING_FLOAT) { value.tag = 'a' + ((num - DTA_MISSING_FLOAT_A) >> 11); } } else {
readstat_error_t sas_read_header(int fd, sas_header_info_t *ctx, readstat_error_handler error_handler, void *user_ctx) { sas_header_start_t header_start; sas_header_end_t header_end; int retval = READSTAT_OK; char error_buf[1024]; if (read(fd, &header_start, sizeof(sas_header_start_t)) < sizeof(sas_header_start_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (memcmp(header_start.magic, sas7bdat_magic_number, sizeof(sas7bdat_magic_number)) != 0 && memcmp(header_start.magic, sas7bcat_magic_number, sizeof(sas7bcat_magic_number)) != 0) { retval = READSTAT_ERROR_PARSE; goto cleanup; } if (header_start.a1 == SAS_ALIGNMENT_OFFSET_4) { ctx->pad1 = 4; } if (header_start.a2 == SAS_ALIGNMENT_OFFSET_4) { ctx->u64 = 1; } int bswap = 0; if (header_start.endian == SAS_ENDIAN_BIG) { bswap = machine_is_little_endian(); ctx->little_endian = 0; } else if (header_start.endian == SAS_ENDIAN_LITTLE) { bswap = !machine_is_little_endian(); ctx->little_endian = 1; } else { retval = READSTAT_ERROR_PARSE; goto cleanup; } int i; for (i=0; i<sizeof(_charset_table)/sizeof(_charset_table[0]); i++) { if (header_start.encoding == _charset_table[i].code) { ctx->encoding = _charset_table[i].name; break; } } if (ctx->encoding == NULL) { if (error_handler) { snprintf(error_buf, sizeof(error_buf), "Unsupported character set code: %d\n", header_start.encoding); error_handler(error_buf, user_ctx); } retval = READSTAT_ERROR_UNSUPPORTED_CHARSET; goto cleanup; } if (readstat_lseek(fd, 196 + ctx->pad1, SEEK_SET) == -1) { retval = READSTAT_ERROR_SEEK; if (error_handler) { snprintf(error_buf, sizeof(error_buf), "ReadStat: Failed to seek to position %d\n", 196 + ctx->pad1); error_handler(error_buf, user_ctx); } goto cleanup; } uint32_t header_size, page_size; if (read(fd, &header_size, sizeof(uint32_t)) < sizeof(uint32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (read(fd, &page_size, sizeof(uint32_t)) < sizeof(uint32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } ctx->header_size = bswap ? byteswap4(header_size) : header_size; ctx->page_size = bswap ? byteswap4(page_size) : page_size; if (ctx->header_size < 1024) { retval = READSTAT_ERROR_PARSE; goto cleanup; } if (ctx->u64) { uint64_t page_count; if (read(fd, &page_count, sizeof(uint64_t)) < sizeof(uint64_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } ctx->page_count = bswap ? byteswap8(page_count) : page_count; } else { uint32_t page_count; if (read(fd, &page_count, sizeof(uint32_t)) < sizeof(uint32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } ctx->page_count = bswap ? byteswap4(page_count) : page_count; } if (readstat_lseek(fd, 8, SEEK_CUR) == -1) { retval = READSTAT_ERROR_SEEK; if (error_handler) { snprintf(error_buf, sizeof(error_buf), "ReadStat: Failed to seek forward by %d\n", 8); error_handler(error_buf, user_ctx); } goto cleanup; } if (read(fd, &header_end, sizeof(sas_header_end_t)) < sizeof(sas_header_end_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (strncmp(header_end.release, "9.0000M0", sizeof(header_end.release)) == 0) { /* A bit of a hack, but most SAS installations are running a minor update */ ctx->vendor = READSTAT_VENDOR_STAT_TRANSFER; } else { ctx->vendor = READSTAT_VENDOR_SAS; } if (readstat_lseek(fd, ctx->header_size, SEEK_SET) == -1) { retval = READSTAT_ERROR_SEEK; if (error_handler) { snprintf(error_buf, sizeof(error_buf), "ReadStat: Failed to seek to position %lld\n", ctx->header_size); error_handler(error_buf, user_ctx); } goto cleanup; } cleanup: return retval; }
uint32_t sas_read4(const char *data, int bswap) { uint32_t tmp; memcpy(&tmp, data, 4); return bswap ? byteswap4(tmp) : tmp; }
static readstat_error_t dta_handle_rows(dta_ctx_t *ctx) { readstat_io_t *io = ctx->io; char *buf = NULL; char str_buf[2048]; int i; readstat_error_t retval = READSTAT_OK; if ((buf = malloc(ctx->record_len)) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } for (i=0; i<ctx->row_limit; i++) { if (io->read(buf, ctx->record_len, io->io_ctx) != ctx->record_len) { retval = READSTAT_ERROR_READ; goto cleanup; } int j; off_t offset = 0; for (j=0; j<ctx->nvar; j++) { size_t max_len; readstat_value_t value; memset(&value, 0, sizeof(readstat_value_t)); value.type = dta_type_info(ctx->typlist[j], &max_len, ctx); if (value.type == READSTAT_TYPE_STRING) { readstat_convert(str_buf, sizeof(str_buf), &buf[offset], max_len, ctx->converter); value.v.string_value = str_buf; } else if (value.type == READSTAT_TYPE_STRING_REF) { dta_strl_t key; dta_interpret_strl_vo_bytes(ctx, (unsigned char *)&buf[offset], &key); dta_strl_t **found = bsearch(&key, ctx->strls, ctx->strls_count, sizeof(dta_strl_t *), &dta_compare_strls); if (found) { value.v.string_value = (*found)->data; } value.type = READSTAT_TYPE_STRING; } else if (value.type == READSTAT_TYPE_INT8) { int8_t byte = buf[offset]; if (ctx->machine_is_twos_complement) { byte = ones_to_twos_complement1(byte); } if (byte > ctx->max_int8) { if (ctx->supports_tagged_missing && byte > DTA_113_MISSING_INT8) { value.tag = 'a' + (byte - DTA_113_MISSING_INT8_A); value.is_tagged_missing = 1; } else { value.is_system_missing = 1; } } value.v.i8_value = byte; } else if (value.type == READSTAT_TYPE_INT16) { int16_t num = *((int16_t *)&buf[offset]); if (ctx->machine_needs_byte_swap) { num = byteswap2(num); } if (ctx->machine_is_twos_complement) { num = ones_to_twos_complement2(num); } if (num > ctx->max_int16) { if (ctx->supports_tagged_missing && num > DTA_113_MISSING_INT16) { value.tag = 'a' + (num - DTA_113_MISSING_INT16_A); value.is_tagged_missing = 1; } else { value.is_system_missing = 1; } } value.v.i16_value = num; } else if (value.type == READSTAT_TYPE_INT32) { int32_t num = *((int32_t *)&buf[offset]); if (ctx->machine_needs_byte_swap) { num = byteswap4(num); } if (ctx->machine_is_twos_complement) { num = ones_to_twos_complement4(num); } if (num > ctx->max_int32) { if (ctx->supports_tagged_missing && num > DTA_113_MISSING_INT32) { value.tag = 'a' + (num - DTA_113_MISSING_INT32_A); value.is_tagged_missing = 1; } else { value.is_system_missing = 1; } } value.v.i32_value = num; } else if (value.type == READSTAT_TYPE_FLOAT) { int32_t num = *((int32_t *)&buf[offset]); float f_num = NAN; if (ctx->machine_needs_byte_swap) { num = byteswap4(num); } if (num > ctx->max_float) { if (ctx->supports_tagged_missing && num > DTA_113_MISSING_FLOAT) { value.tag = 'a' + ((num - DTA_113_MISSING_FLOAT_A) >> 11); value.is_tagged_missing = 1; } else { value.is_system_missing = 1; } } else {
static readstat_error_t sav_read_variable_record(sav_ctx_t *ctx) { readstat_io_t *io = ctx->io; sav_variable_record_t variable; readstat_error_t retval = READSTAT_OK; if (ctx->var_index == ctx->varinfo_capacity) { if ((ctx->varinfo = realloc(ctx->varinfo, (ctx->varinfo_capacity *= 2) * sizeof(spss_varinfo_t))) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } } if (io->read(&variable, sizeof(sav_variable_record_t), io->io_ctx) < sizeof(sav_variable_record_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } variable.print = ctx->bswap ? byteswap4(variable.print) : variable.print; variable.write = ctx->bswap ? byteswap4(variable.write) : variable.write; readstat_type_t dta_type = READSTAT_TYPE_DOUBLE; int32_t type = ctx->bswap ? byteswap4(variable.type) : variable.type; int i; if (type < 0) { if (ctx->var_index == 0) { return READSTAT_ERROR_PARSE; } ctx->var_offset++; spss_varinfo_t *prev = &ctx->varinfo[ctx->var_index-1]; prev->width++; return 0; } if (type > 0) { dta_type = READSTAT_TYPE_STRING; // len = type; } spss_varinfo_t *info = &ctx->varinfo[ctx->var_index]; memset(info, 0, sizeof(spss_varinfo_t)); info->width = 1; info->n_segments = 1; info->index = ctx->var_index; info->offset = ctx->var_offset; info->type = dta_type; retval = readstat_convert(info->name, sizeof(info->name), variable.name, sizeof(variable.name), ctx->converter); if (retval != READSTAT_OK) goto cleanup; retval = readstat_convert(info->longname, sizeof(info->longname), variable.name, sizeof(variable.name), ctx->converter); if (retval != READSTAT_OK) goto cleanup; info->print_format.decimal_places = (variable.print & 0x000000FF); info->print_format.width = (variable.print & 0x0000FF00) >> 8; info->print_format.type = (variable.print & 0x00FF0000) >> 16; info->write_format.decimal_places = (variable.write & 0x000000FF); info->write_format.width = (variable.write & 0x0000FF00) >> 8; info->write_format.type = (variable.write & 0x00FF0000) >> 16; if (variable.has_var_label) { int32_t label_len; if (io->read(&label_len, sizeof(int32_t), io->io_ctx) < sizeof(int32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } label_len = ctx->bswap ? byteswap4(label_len) : label_len; int32_t label_capacity = (label_len + 3) / 4 * 4; char *label_buf = malloc(label_capacity); size_t out_label_len = label_len*4+1; info->label = malloc(out_label_len); if (label_buf == NULL || info->label == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } if (io->read(label_buf, label_capacity, io->io_ctx) < label_capacity) { retval = READSTAT_ERROR_READ; free(label_buf); free(info->label); info->label = NULL; goto cleanup; } retval = readstat_convert(info->label, out_label_len, label_buf, label_len, ctx->converter); free(label_buf); if (retval != READSTAT_OK) goto cleanup; } ctx->varinfo[ctx->var_index].labels_index = -1; if (variable.n_missing_values) { info->n_missing_values = ctx->bswap ? byteswap4(variable.n_missing_values) : variable.n_missing_values; if (info->n_missing_values < 0) { info->missing_range = 1; info->n_missing_values = abs(info->n_missing_values); } else { info->missing_range = 0; } if (info->n_missing_values > 3) { retval = READSTAT_ERROR_PARSE; goto cleanup; } if (io->read(info->missing_values, info->n_missing_values * sizeof(double), io->io_ctx) < info->n_missing_values * sizeof(double)) { retval = READSTAT_ERROR_READ; goto cleanup; } for (i=0; i<info->n_missing_values; i++) { if (ctx->bswap) { info->missing_values[i] = byteswap_double(info->missing_values[i]); } uint64_t long_value = 0; memcpy(&long_value, &info->missing_values[i], 8); if (long_value == ctx->missing_double) info->missing_values[i] = NAN; if (long_value == ctx->lowest_double) info->missing_values[i] = -HUGE_VAL; if (long_value == ctx->highest_double) info->missing_values[i] = HUGE_VAL; } } ctx->var_index++; ctx->var_offset++; cleanup: return retval; }
static readstat_error_t sav_read_value_label_record(sav_ctx_t *ctx) { int32_t label_count; readstat_error_t retval = READSTAT_OK; readstat_io_t *io = ctx->io; int32_t *vars = NULL; int32_t rec_type; int32_t var_count; readstat_type_t value_type = READSTAT_TYPE_STRING; char label_buf[256]; value_label_t *value_labels = NULL; if (io->read(&label_count, sizeof(int32_t), io->io_ctx) < sizeof(int32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (ctx->bswap) label_count = byteswap4(label_count); if ((value_labels = malloc(label_count * sizeof(value_label_t))) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } int i; for (i=0; i<label_count; i++) { value_label_t *vlabel = &value_labels[i]; if (io->read(vlabel, 9, io->io_ctx) < 9) { retval = READSTAT_ERROR_READ; goto cleanup; } size_t label_len = (vlabel->label_len + 8) / 8 * 8 - 1; if (io->read(label_buf, label_len, io->io_ctx) < label_len) { retval = READSTAT_ERROR_READ; goto cleanup; } retval = readstat_convert(vlabel->label, sizeof(vlabel->label), label_buf, label_len, ctx->converter); if (retval != READSTAT_OK) goto cleanup; } if (io->read(&rec_type, sizeof(int32_t), io->io_ctx) < sizeof(int32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (ctx->bswap) rec_type = byteswap4(rec_type); if (rec_type != 4) { retval = READSTAT_ERROR_PARSE; goto cleanup; } if (io->read(&var_count, sizeof(int32_t), io->io_ctx) < sizeof(int32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (ctx->bswap) var_count = byteswap4(var_count); if ((vars = malloc(var_count * sizeof(int32_t))) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } if (io->read(vars, var_count * sizeof(int32_t), io->io_ctx) < var_count * sizeof(int32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } for (i=0; i<var_count; i++) { int var_offset = vars[i]; if (ctx->bswap) var_offset = byteswap4(var_offset); var_offset--; // Why subtract 1???? spss_varinfo_t *var = bsearch(&var_offset, ctx->varinfo, ctx->var_index, sizeof(spss_varinfo_t), &spss_varinfo_compare); if (var) { var->labels_index = ctx->value_labels_count; value_type = var->type; } } if (ctx->value_label_handler) { sav_submit_value_labels(value_labels, label_count, value_type, ctx); } ctx->value_labels_count++; cleanup: if (vars) free(vars); if (value_labels) free(value_labels); return retval; }
static readstat_error_t recursive_discard(rdata_sexptype_header_t sexptype_header, rdata_ctx_t *ctx) { uint32_t length; rdata_sexptype_info_t info; rdata_sexptype_info_t prot, tag; readstat_error_t error = 0; int i; switch (sexptype_header.type) { case RDATA_SEXPTYPE_SYMBOL: if ((error = read_sexptype_header(&info, ctx)) != READSTAT_OK) goto cleanup; if ((error = recursive_discard(info.header, ctx)) != READSTAT_OK) goto cleanup; break; case RDATA_PSEUDO_SXP_PERSIST: case RDATA_PSEUDO_SXP_NAMESPACE: case RDATA_PSEUDO_SXP_PACKAGE: if ((error = read_sexptype_header(&info, ctx)) != READSTAT_OK) goto cleanup; if ((error = recursive_discard(info.header, ctx)) != READSTAT_OK) goto cleanup; break; case RDATA_SEXPTYPE_BUILTIN_FUNCTION: case RDATA_SEXPTYPE_SPECIAL_FUNCTION: error = discard_character_string(0, ctx); break; case RDATA_SEXPTYPE_PAIRLIST: error = discard_pairlist(sexptype_header, ctx); break; case RDATA_SEXPTYPE_CHARACTER_STRING: error = discard_character_string(1, ctx); break; case RDATA_SEXPTYPE_RAW_VECTOR: error = discard_vector(sexptype_header, 1, ctx); break; case RDATA_SEXPTYPE_LOGICAL_VECTOR: error = discard_vector(sexptype_header, 4, ctx); break; case RDATA_SEXPTYPE_INTEGER_VECTOR: error = discard_vector(sexptype_header, 4, ctx); break; case RDATA_SEXPTYPE_REAL_VECTOR: error = discard_vector(sexptype_header, 8, ctx); break; case RDATA_SEXPTYPE_COMPLEX_VECTOR: error = discard_vector(sexptype_header, 16, ctx); break; case RDATA_SEXPTYPE_CHARACTER_VECTOR: if (read_st(ctx, &length, sizeof(length)) != sizeof(length)) { return READSTAT_ERROR_READ; } if (ctx->machine_needs_byteswap) length = byteswap4(length); for (i=0; i<length; i++) { error = read_sexptype_header(&info, ctx); if (error != READSTAT_OK) goto cleanup; if (info.header.type != RDATA_SEXPTYPE_CHARACTER_STRING) { error = READSTAT_ERROR_PARSE; goto cleanup; } error = discard_character_string(0, ctx); if (error != READSTAT_OK) goto cleanup; } break; case RDATA_SEXPTYPE_GENERIC_VECTOR: case RDATA_SEXPTYPE_EXPRESSION_VECTOR: if (read_st(ctx, &length, sizeof(length)) != sizeof(length)) { return READSTAT_ERROR_READ; } if (ctx->machine_needs_byteswap) length = byteswap4(length); for (i=0; i<length; i++) { if ((error = read_sexptype_header(&info, ctx)) != READSTAT_OK) goto cleanup; if ((error = recursive_discard(info.header, ctx)) != READSTAT_OK) goto cleanup; } if (sexptype_header.attributes) { if ((error = read_attributes(NULL, ctx)) != READSTAT_OK) goto cleanup; } break; case RDATA_SEXPTYPE_DOT_DOT_DOT: case RDATA_SEXPTYPE_PROMISE: case RDATA_SEXPTYPE_LANGUAGE_OBJECT: case RDATA_SEXPTYPE_CLOSURE: if (sexptype_header.attributes) { if ((error = read_sexptype_header(&info, ctx)) != READSTAT_OK) goto cleanup; if ((error = recursive_discard(info.header, ctx)) != READSTAT_OK) goto cleanup; } if (sexptype_header.tag) { if ((error = read_sexptype_header(&info, ctx)) != READSTAT_OK) goto cleanup; if ((error = recursive_discard(info.header, ctx)) != READSTAT_OK) goto cleanup; } /* CAR */ if ((error = read_sexptype_header(&info, ctx)) != READSTAT_OK) goto cleanup; if ((error = recursive_discard(info.header, ctx)) != READSTAT_OK) goto cleanup; /* CDR */ if ((error = read_sexptype_header(&info, ctx)) != READSTAT_OK) goto cleanup; if ((error = recursive_discard(info.header, ctx)) != READSTAT_OK) goto cleanup; break; case RDATA_SEXPTYPE_EXTERNAL_POINTER: read_sexptype_header(&prot, ctx); recursive_discard(prot.header, ctx); read_sexptype_header(&tag, ctx); recursive_discard(tag.header, ctx); break; case RDATA_SEXPTYPE_ENVIRONMENT: /* locked */ if (lseek_st(ctx, sizeof(uint32_t)) == -1) { return READSTAT_ERROR_READ; } rdata_sexptype_info_t enclosure, frame, hash_table, attributes; read_sexptype_header(&enclosure, ctx); recursive_discard(enclosure.header, ctx); read_sexptype_header(&frame, ctx); recursive_discard(frame.header, ctx); read_sexptype_header(&hash_table, ctx); recursive_discard(hash_table.header, ctx); read_sexptype_header(&attributes, ctx); recursive_discard(attributes.header, ctx); /* if (sexptype_header.attributes) { if (lseek(ctx->fd, sizeof(uint32_t), SEEK_CUR) == -1) { return READSTAT_ERROR_READ; } } */ break; case RDATA_PSEUDO_SXP_REF: case RDATA_PSEUDO_SXP_NIL: case RDATA_PSEUDO_SXP_GLOBAL_ENVIRONMENT: case RDATA_PSEUDO_SXP_UNBOUND_VALUE: case RDATA_PSEUDO_SXP_MISSING_ARGUMENT: case RDATA_PSEUDO_SXP_BASE_NAMESPACE: case RDATA_PSEUDO_SXP_EMPTY_ENVIRONMENT: case RDATA_PSEUDO_SXP_BASE_ENVIRONMENT: break; default: return READSTAT_ERROR_READ; } cleanup: return error; }
static readstat_error_t sav_parse_long_value_labels_record(const void *data, size_t data_len, sav_ctx_t *ctx) { if (!ctx->value_label_handler) return READSTAT_OK; readstat_error_t retval = READSTAT_OK; int32_t label_name_len = 0; int32_t label_count = 0; int32_t i = 0; const char *data_ptr = data; const char *data_end = data_ptr + data_len; char var_name_buf[256*4+1]; char label_name_buf[256]; char *value_buffer = NULL; char *label_buffer = NULL; memset(label_name_buf, '\0', sizeof(label_name_buf)); if (data_ptr + sizeof(int32_t) > data_end) { retval = READSTAT_ERROR_PARSE; goto cleanup; } memcpy(&label_name_len, data_ptr, sizeof(int32_t)); if (ctx->bswap) label_name_len = byteswap4(label_name_len); data_ptr += sizeof(int32_t); if (data_ptr + label_name_len > data_end) { retval = READSTAT_ERROR_PARSE; goto cleanup; } retval = readstat_convert(var_name_buf, sizeof(var_name_buf), data_ptr, label_name_len, ctx->converter); if (retval != READSTAT_OK) goto cleanup; data_ptr += label_name_len; for (i=0; i<ctx->var_index;) { spss_varinfo_t *info = &ctx->varinfo[i]; if (strcmp(var_name_buf, info->longname) == 0) { info->labels_index = ctx->value_labels_count++; snprintf(label_name_buf, sizeof(label_name_buf), SAV_LABEL_NAME_PREFIX "%d", info->labels_index); break; } i += info->n_segments; } if (label_name_buf[0] == '\0') { retval = READSTAT_ERROR_PARSE; goto cleanup; } data_ptr += sizeof(int32_t); if (data_ptr + sizeof(int32_t) > data_end) { retval = READSTAT_ERROR_PARSE; goto cleanup; } memcpy(&label_count, data_ptr, sizeof(int32_t)); if (ctx->bswap) label_count = byteswap4(label_count); data_ptr += sizeof(int32_t); for (i=0; i<label_count; i++) { int32_t value_len = 0, label_len = 0; int32_t value_buffer_len = 0, label_buffer_len = 0; if (data_ptr + sizeof(int32_t) > data_end) { retval = READSTAT_ERROR_PARSE; goto cleanup; } memcpy(&value_len, data_ptr, sizeof(int32_t)); if (ctx->bswap) value_len = byteswap4(value_len); data_ptr += sizeof(int32_t); value_buffer_len = value_len*4+1; value_buffer = realloc(value_buffer, value_buffer_len); if (value_buffer == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } if (data_ptr + value_len > data_end) { retval = READSTAT_ERROR_PARSE; goto cleanup; } retval = readstat_convert(value_buffer, value_buffer_len, data_ptr, value_len, ctx->converter); if (retval != READSTAT_OK) goto cleanup; data_ptr += value_len; if (data_ptr + sizeof(int32_t) > data_end) { retval = READSTAT_ERROR_PARSE; goto cleanup; } memcpy(&label_len, data_ptr, sizeof(int32_t)); if (ctx->bswap) label_len = byteswap4(label_len); data_ptr += sizeof(int32_t); label_buffer_len = label_len*4+1; label_buffer = realloc(label_buffer, label_buffer_len); if (label_buffer == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } if (data_ptr + label_len > data_end) { retval = READSTAT_ERROR_PARSE; goto cleanup; } retval = readstat_convert(label_buffer, label_buffer_len, data_ptr, label_len, ctx->converter); if (retval != READSTAT_OK) goto cleanup; data_ptr += label_len; readstat_value_t value = { .type = READSTAT_TYPE_STRING }; value.v.string_value = value_buffer; ctx->value_label_handler(label_name_buf, value, label_buffer, ctx->user_ctx); } cleanup: if (value_buffer) free(value_buffer); if (label_buffer) free(label_buffer); return retval; }
static readstat_error_t sav_parse_records_pass2(sav_ctx_t *ctx) { void *data_buf = NULL; size_t data_buf_capacity = 4096; readstat_error_t retval = READSTAT_OK; readstat_io_t *io = ctx->io; if ((data_buf = malloc(data_buf_capacity)) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } while (1) { int32_t rec_type; int32_t extra_info[3]; size_t data_len = 0; int i; int done = 0; if (io->read(&rec_type, sizeof(int32_t), io->io_ctx) < sizeof(int32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (ctx->bswap) { rec_type = byteswap4(rec_type); } switch (rec_type) { case SAV_RECORD_TYPE_VARIABLE: retval = sav_read_variable_record(ctx); if (retval) goto cleanup; break; case SAV_RECORD_TYPE_VALUE_LABEL: retval = sav_read_value_label_record(ctx); if (retval) goto cleanup; break; case SAV_RECORD_TYPE_DOCUMENT: retval = sav_read_document_record(ctx); if (retval) goto cleanup; break; case SAV_RECORD_TYPE_DICT_TERMINATION: retval = sav_read_dictionary_termination_record(ctx); if (retval) goto cleanup; done = 1; break; case SAV_RECORD_TYPE_HAS_DATA: if (io->read(extra_info, sizeof(extra_info), io->io_ctx) < sizeof(extra_info)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (ctx->bswap) { for (i=0; i<3; i++) extra_info[i] = byteswap4(extra_info[i]); } int subtype = extra_info[0]; int size = extra_info[1]; int count = extra_info[2]; data_len = size * count; if (data_buf_capacity < data_len) { if ((data_buf = realloc(data_buf, data_buf_capacity = data_len)) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } } if (io->read(data_buf, data_len, io->io_ctx) < data_len) { retval = READSTAT_ERROR_PARSE; goto cleanup; } switch (subtype) { case SAV_RECORD_SUBTYPE_INTEGER_INFO: /* parsed in pass 1 */ break; case SAV_RECORD_SUBTYPE_FP_INFO: retval = sav_parse_machine_floating_point_record(data_buf, ctx); if (retval != READSTAT_OK) goto cleanup; break; case SAV_RECORD_SUBTYPE_VAR_DISPLAY: retval = sav_store_variable_display_parameter_record(data_buf, count, ctx); if (retval != READSTAT_OK) goto cleanup; break; case SAV_RECORD_SUBTYPE_LONG_VAR_NAME: retval = sav_parse_long_variable_names_record(data_buf, count, ctx); if (retval != READSTAT_OK) goto cleanup; break; case SAV_RECORD_SUBTYPE_VERY_LONG_STR: retval = sav_parse_very_long_string_record(data_buf, count, ctx); if (retval != READSTAT_OK) goto cleanup; break; case SAV_RECORD_SUBTYPE_LONG_VALUE_LABELS: retval = sav_parse_long_value_labels_record(data_buf, count, ctx); if (retval != READSTAT_OK) goto cleanup; default: /* misc. info */ break; } break; default: retval = READSTAT_ERROR_PARSE; goto cleanup; break; } if (done) break; } cleanup: if (data_buf) free(data_buf); return retval; }