static readstat_error_t dta_read_map(int fd, dta_ctx_t *ctx) { if (!ctx->file_is_xmlish) return 0; readstat_error_t retval = READSTAT_OK; if ((retval = dta_read_tag(fd, ctx, "<map>")) != READSTAT_OK) { goto cleanup; } uint64_t map_buffer[14]; if (read(fd, map_buffer, sizeof(map_buffer)) != sizeof(map_buffer)) { retval = READSTAT_ERROR_READ; goto cleanup; } ctx->data_offset = ctx->machine_needs_byte_swap ? byteswap8(map_buffer[9]) : map_buffer[9]; ctx->strls_offset = ctx->machine_needs_byte_swap ? byteswap8(map_buffer[10]) : map_buffer[10]; ctx->value_labels_offset = ctx->machine_needs_byte_swap ? byteswap8(map_buffer[11]) : map_buffer[11]; if ((retval = dta_read_tag(fd, ctx, "</map>")) != READSTAT_OK) { goto cleanup; } cleanup: return retval; }
static readstat_error_t sav_parse_machine_floating_point_record(const void *data, sav_ctx_t *ctx) { sav_machine_floating_point_info_record_t fp_info; memcpy(&fp_info, data, sizeof(sav_machine_floating_point_info_record_t)); ctx->missing_double = ctx->bswap ? byteswap8(fp_info.sysmis) : fp_info.sysmis; ctx->highest_double = ctx->bswap ? byteswap8(fp_info.highest) : fp_info.highest; ctx->lowest_double = ctx->bswap ? byteswap8(fp_info.lowest) : fp_info.lowest; return READSTAT_OK; }
static readstat_error_t dta_read_map(dta_ctx_t *ctx) { if (!ctx->file_is_xmlish) return READSTAT_OK; readstat_error_t retval = READSTAT_OK; uint64_t map_buffer[14]; if ((retval = dta_read_chunk(ctx, "<map>", map_buffer, sizeof(map_buffer), "</map>")) != READSTAT_OK) { goto cleanup; } ctx->data_offset = ctx->machine_needs_byte_swap ? byteswap8(map_buffer[9]) : map_buffer[9]; ctx->strls_offset = ctx->machine_needs_byte_swap ? byteswap8(map_buffer[10]) : map_buffer[10]; ctx->value_labels_offset = ctx->machine_needs_byte_swap ? byteswap8(map_buffer[11]) : map_buffer[11]; cleanup: return retval; }
static sas7bcat_block_t *sas7bcat_block_for_label_set(readstat_label_set_t *r_label_set) { size_t len = 0; size_t name_len = strlen(r_label_set->name); int j; char name[32]; len += 106; if (name_len > 8) { len += 32; // long name if (name_len > 32) { name_len = 32; } } memcpy(&name[0], r_label_set->name, name_len); for (j=0; j<r_label_set->value_labels_count; j++) { readstat_value_label_t *value_label = readstat_get_value_label(r_label_set, j); len += 30; // Value: 14-byte header + 16-byte padded value len += 8 + 2 + value_label->label_len + 1; } sas7bcat_block_t *block = calloc(1, sizeof(sas7bcat_block_t) + len); block->len = len; off_t begin = 106; int32_t count = r_label_set->value_labels_count; memcpy(&block->data[38], &count, sizeof(int32_t)); memcpy(&block->data[42], &count, sizeof(int32_t)); if (name_len > 8) { block->data[2] = (char)0x80; memcpy(&block->data[8], name, 8); memset(&block->data[106], ' ', 32); memcpy(&block->data[106], name, name_len); begin += 32; } else { memset(&block->data[8], ' ', 8); memcpy(&block->data[8], name, name_len); } char *lbp1 = &block->data[begin]; char *lbp2 = &block->data[begin+r_label_set->value_labels_count*30]; for (j=0; j<r_label_set->value_labels_count; j++) { readstat_value_label_t *value_label = readstat_get_value_label(r_label_set, j); lbp1[2] = 24; // size - 6 int32_t index = j; memcpy(&lbp1[10], &index, sizeof(int32_t)); if (r_label_set->type == READSTAT_TYPE_STRING) { size_t string_len = value_label->string_key_len; if (string_len > 16) string_len = 16; memset(&lbp1[14], ' ', 16); memcpy(&lbp1[14], value_label->string_key, string_len); } else { uint64_t big_endian_value; double double_value = -1.0 * value_label->double_key; memcpy(&big_endian_value, &double_value, sizeof(double)); if (machine_is_little_endian()) { big_endian_value = byteswap8(big_endian_value); } memcpy(&lbp1[22], &big_endian_value, sizeof(uint64_t)); } int16_t label_len = value_label->label_len; memcpy(&lbp2[8], &label_len, sizeof(int16_t)); memcpy(&lbp2[10], value_label->label, label_len); lbp1 += 30; lbp2 += 8 + 2 + value_label->label_len + 1; } return block; }
readstat_error_t sas_read_header(int fd, sas_header_info_t *ctx, readstat_error_handler error_handler, void *user_ctx) { sas_header_start_t header_start; sas_header_end_t header_end; int retval = READSTAT_OK; char error_buf[1024]; if (read(fd, &header_start, sizeof(sas_header_start_t)) < sizeof(sas_header_start_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (memcmp(header_start.magic, sas7bdat_magic_number, sizeof(sas7bdat_magic_number)) != 0 && memcmp(header_start.magic, sas7bcat_magic_number, sizeof(sas7bcat_magic_number)) != 0) { retval = READSTAT_ERROR_PARSE; goto cleanup; } if (header_start.a1 == SAS_ALIGNMENT_OFFSET_4) { ctx->pad1 = 4; } if (header_start.a2 == SAS_ALIGNMENT_OFFSET_4) { ctx->u64 = 1; } int bswap = 0; if (header_start.endian == SAS_ENDIAN_BIG) { bswap = machine_is_little_endian(); ctx->little_endian = 0; } else if (header_start.endian == SAS_ENDIAN_LITTLE) { bswap = !machine_is_little_endian(); ctx->little_endian = 1; } else { retval = READSTAT_ERROR_PARSE; goto cleanup; } int i; for (i=0; i<sizeof(_charset_table)/sizeof(_charset_table[0]); i++) { if (header_start.encoding == _charset_table[i].code) { ctx->encoding = _charset_table[i].name; break; } } if (ctx->encoding == NULL) { if (error_handler) { snprintf(error_buf, sizeof(error_buf), "Unsupported character set code: %d\n", header_start.encoding); error_handler(error_buf, user_ctx); } retval = READSTAT_ERROR_UNSUPPORTED_CHARSET; goto cleanup; } if (readstat_lseek(fd, 196 + ctx->pad1, SEEK_SET) == -1) { retval = READSTAT_ERROR_SEEK; if (error_handler) { snprintf(error_buf, sizeof(error_buf), "ReadStat: Failed to seek to position %d\n", 196 + ctx->pad1); error_handler(error_buf, user_ctx); } goto cleanup; } uint32_t header_size, page_size; if (read(fd, &header_size, sizeof(uint32_t)) < sizeof(uint32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (read(fd, &page_size, sizeof(uint32_t)) < sizeof(uint32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } ctx->header_size = bswap ? byteswap4(header_size) : header_size; ctx->page_size = bswap ? byteswap4(page_size) : page_size; if (ctx->header_size < 1024) { retval = READSTAT_ERROR_PARSE; goto cleanup; } if (ctx->u64) { uint64_t page_count; if (read(fd, &page_count, sizeof(uint64_t)) < sizeof(uint64_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } ctx->page_count = bswap ? byteswap8(page_count) : page_count; } else { uint32_t page_count; if (read(fd, &page_count, sizeof(uint32_t)) < sizeof(uint32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } ctx->page_count = bswap ? byteswap4(page_count) : page_count; } if (readstat_lseek(fd, 8, SEEK_CUR) == -1) { retval = READSTAT_ERROR_SEEK; if (error_handler) { snprintf(error_buf, sizeof(error_buf), "ReadStat: Failed to seek forward by %d\n", 8); error_handler(error_buf, user_ctx); } goto cleanup; } if (read(fd, &header_end, sizeof(sas_header_end_t)) < sizeof(sas_header_end_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (strncmp(header_end.release, "9.0000M0", sizeof(header_end.release)) == 0) { /* A bit of a hack, but most SAS installations are running a minor update */ ctx->vendor = READSTAT_VENDOR_STAT_TRANSFER; } else { ctx->vendor = READSTAT_VENDOR_SAS; } if (readstat_lseek(fd, ctx->header_size, SEEK_SET) == -1) { retval = READSTAT_ERROR_SEEK; if (error_handler) { snprintf(error_buf, sizeof(error_buf), "ReadStat: Failed to seek to position %lld\n", ctx->header_size); error_handler(error_buf, user_ctx); } goto cleanup; } cleanup: return retval; }
uint64_t sas_read8(const char *data, int bswap) { uint64_t tmp; memcpy(&tmp, data, 8); return bswap ? byteswap8(tmp) : tmp; }
readstat_error_t sas_read_header(readstat_io_t *io, sas_header_info_t *hinfo, readstat_error_handler error_handler, void *user_ctx) { sas_header_start_t header_start; sas_header_end_t header_end; int retval = READSTAT_OK; char error_buf[1024]; struct tm epoch_tm = { .tm_year = 60, .tm_mday = 1 }; time_t epoch = mktime(&epoch_tm); if (io->read(&header_start, sizeof(sas_header_start_t), io->io_ctx) < sizeof(sas_header_start_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (memcmp(header_start.magic, sas7bdat_magic_number, sizeof(sas7bdat_magic_number)) != 0 && memcmp(header_start.magic, sas7bcat_magic_number, sizeof(sas7bcat_magic_number)) != 0) { retval = READSTAT_ERROR_PARSE; goto cleanup; } if (header_start.a1 == SAS_ALIGNMENT_OFFSET_4) { hinfo->pad1 = 4; } if (header_start.a2 == SAS_ALIGNMENT_OFFSET_4) { hinfo->u64 = 1; } int bswap = 0; if (header_start.endian == SAS_ENDIAN_BIG) { bswap = machine_is_little_endian(); hinfo->little_endian = 0; } else if (header_start.endian == SAS_ENDIAN_LITTLE) { bswap = !machine_is_little_endian(); hinfo->little_endian = 1; } else { retval = READSTAT_ERROR_PARSE; goto cleanup; } int i; for (i=0; i<sizeof(_charset_table)/sizeof(_charset_table[0]); i++) { if (header_start.encoding == _charset_table[i].code) { hinfo->encoding = _charset_table[i].name; break; } } if (hinfo->encoding == NULL) { if (error_handler) { snprintf(error_buf, sizeof(error_buf), "Unsupported character set code: %d\n", header_start.encoding); error_handler(error_buf, user_ctx); } retval = READSTAT_ERROR_UNSUPPORTED_CHARSET; goto cleanup; } memcpy(hinfo->file_label, header_start.file_label, sizeof(header_start.file_label)); if (io->seek(hinfo->pad1, READSTAT_SEEK_CUR, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } double creation_time, modification_time; if (io->read(&creation_time, sizeof(double), io->io_ctx) < sizeof(double)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (io->read(&modification_time, sizeof(double), io->io_ctx) < sizeof(double)) { retval = READSTAT_ERROR_READ; goto cleanup; } hinfo->creation_time = bswap ? byteswap_double(creation_time) + epoch : creation_time + epoch; hinfo->modification_time = bswap ? byteswap_double(creation_time) + epoch : creation_time + epoch; if (io->seek(16, READSTAT_SEEK_CUR, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } uint32_t header_size, page_size; if (io->read(&header_size, sizeof(uint32_t), io->io_ctx) < sizeof(uint32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (io->read(&page_size, sizeof(uint32_t), io->io_ctx) < sizeof(uint32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } hinfo->header_size = bswap ? byteswap4(header_size) : header_size; hinfo->page_size = bswap ? byteswap4(page_size) : page_size; if (hinfo->header_size < 1024) { retval = READSTAT_ERROR_PARSE; goto cleanup; } if (hinfo->u64) { uint64_t page_count; if (io->read(&page_count, sizeof(uint64_t), io->io_ctx) < sizeof(uint64_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } hinfo->page_count = bswap ? byteswap8(page_count) : page_count; } else { uint32_t page_count; if (io->read(&page_count, sizeof(uint32_t), io->io_ctx) < sizeof(uint32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } hinfo->page_count = bswap ? byteswap4(page_count) : page_count; } if (io->seek(8, READSTAT_SEEK_CUR, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; if (error_handler) { snprintf(error_buf, sizeof(error_buf), "ReadStat: Failed to seek forward by %d\n", 8); error_handler(error_buf, user_ctx); } goto cleanup; } if (io->read(&header_end, sizeof(sas_header_end_t), io->io_ctx) < sizeof(sas_header_end_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } int major, minor, revision; if (sscanf(header_end.release, "%1d.%04dM%1d", &major, &minor, &revision) == 3) { hinfo->major_version = major; hinfo->minor_version = minor; hinfo->revision = revision; } if (major == 9 && minor == 0 && revision == 0) { /* A bit of a hack, but most SAS installations are running a minor update */ hinfo->vendor = READSTAT_VENDOR_STAT_TRANSFER; } else { hinfo->vendor = READSTAT_VENDOR_SAS; } if (io->seek(hinfo->header_size, READSTAT_SEEK_SET, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; if (error_handler) { snprintf(error_buf, sizeof(error_buf), "ReadStat: Failed to seek to position %" PRId64 "\n", hinfo->header_size); error_handler(error_buf, user_ctx); } goto cleanup; } cleanup: return retval; }