static readstat_error_t sas7bcat_parse_block(const char *data, size_t data_size, sas7bcat_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; size_t pad = (data[2] & 0x08) ? 4 : 0; // might be 0x10, not sure int label_count_capacity = sas_read4(&data[38+pad], ctx->bswap); int label_count_used = sas_read4(&data[42+pad], ctx->bswap); char name[4*32+1]; if ((retval = readstat_convert(name, sizeof(name), &data[8], 8, ctx->converter)) != READSTAT_OK) goto cleanup; if (pad) { pad += 16; } if ((data[2] & 0x80)) { // has long name retval = readstat_convert(name, sizeof(name), &data[106+pad], 32, ctx->converter); if (retval != READSTAT_OK) goto cleanup; pad += 32; } if ((retval = sas7bcat_parse_value_labels(&data[106+pad], data_size - 106 - pad, label_count_used, label_count_capacity, name, ctx)) != READSTAT_OK) goto cleanup; cleanup: return retval; }
static readstat_error_t sas7bcat_augment_index(const char *index, size_t len, sas7bcat_ctx_t *ctx) { const char *xlsr = index; readstat_error_t retval = READSTAT_OK; while (xlsr + ctx->xlsr_size <= index + len) { if (memcmp(xlsr, "XLSR", 4) != 0) // some block pointers seem to have 8 bytes of extra padding xlsr += 8; if (memcmp(xlsr, "XLSR", 4) != 0) break; if (xlsr[ctx->xlsr_O_offset] == 'O') { uint32_t page = 0, pos = 0; if (ctx->u64) { page = sas_read4(&xlsr[8], ctx->bswap); pos = sas_read4(&xlsr[16], ctx->bswap); } else { page = sas_read2(&xlsr[4], ctx->bswap); pos = sas_read2(&xlsr[8], ctx->bswap); } ctx->block_pointers[ctx->block_pointers_used++] = ((uint64_t)page << 32) + pos; } if (ctx->block_pointers_used == ctx->block_pointers_capacity) { ctx->block_pointers = readstat_realloc(ctx->block_pointers, (ctx->block_pointers_capacity *= 2) * sizeof(uint64_t)); if (ctx->block_pointers == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } } xlsr += ctx->xlsr_size; } cleanup: return retval; }
static readstat_error_t sas7bcat_read_block(char *buffer, size_t buffer_len, int start_page, int start_page_pos, sas7bcat_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; readstat_io_t *io = ctx->io; int next_page = start_page; int next_page_pos = start_page_pos; int link_count = 0; int chain_link_len = 0; int buffer_offset = 0; char chain_link[32]; int chain_link_header_len = 16; if (ctx->u64) { chain_link_header_len = 32; } while (next_page > 0 && next_page_pos > 0 && next_page <= ctx->page_count && link_count++ < ctx->page_count) { if (io->seek(ctx->header_size+(next_page-1)*ctx->page_size+next_page_pos, READSTAT_SEEK_SET, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } if (io->read(chain_link, chain_link_header_len, io->io_ctx) < chain_link_header_len) { retval = READSTAT_ERROR_READ; goto cleanup; } if (ctx->u64) { next_page = sas_read4(&chain_link[0], ctx->bswap); next_page_pos = sas_read2(&chain_link[8], ctx->bswap); chain_link_len = sas_read2(&chain_link[10], ctx->bswap); } else { next_page = sas_read4(&chain_link[0], ctx->bswap); next_page_pos = sas_read2(&chain_link[4], ctx->bswap); chain_link_len = sas_read2(&chain_link[6], ctx->bswap); } if (buffer_offset + chain_link_len > buffer_len) { retval = READSTAT_ERROR_PARSE; goto cleanup; } if (io->read(buffer + buffer_offset, chain_link_len, io->io_ctx) < chain_link_len) { retval = READSTAT_ERROR_READ; goto cleanup; } buffer_offset += chain_link_len; } cleanup: return retval; }
static readstat_error_t sas_parse_column_attributes_subheader(const char *subheader, size_t len, sas_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; size_t signature_len = ctx->u64 ? 8 : 4; int cmax = ctx->u64 ? (len-28)/16 : (len-20)/12; int i; const char *cap = &subheader[signature_len+8]; uint16_t remainder = sas_read2(&subheader[signature_len], ctx->bswap); if (remainder != len - (4+2*signature_len)) { retval = READSTAT_ERROR_PARSE; goto cleanup; } ctx->col_attrs_count += cmax; if (ctx->col_info_count < ctx->col_attrs_count) { ctx->col_info_count = ctx->col_attrs_count; ctx->col_info = realloc(ctx->col_info, ctx->col_info_count * sizeof(col_info_t)); } for (i=ctx->col_attrs_count-cmax; i<ctx->col_attrs_count; i++) { if (ctx->u64) { ctx->col_info[i].offset = sas_read8(&cap[0], ctx->bswap); } else { ctx->col_info[i].offset = sas_read4(&cap[0], ctx->bswap); } off_t off=4; if (ctx->u64) off=8; ctx->col_info[i].width = sas_read4(&cap[off], ctx->bswap); if (ctx->col_info[i].width > ctx->max_col_width) ctx->max_col_width = ctx->col_info[i].width; if (cap[off+6] == SAS_COLUMN_TYPE_NUM) { ctx->col_info[i].type = READSTAT_TYPE_DOUBLE; } else if (cap[off+6] == SAS_COLUMN_TYPE_CHR) { ctx->col_info[i].type = READSTAT_TYPE_STRING; } else { retval = READSTAT_ERROR_PARSE; goto cleanup; } ctx->col_info[i].index = i; cap += off+8; } cleanup: return retval; }
static readstat_error_t sas7bcat_read_block(char *buffer, size_t buffer_len, int start_page, int start_page_pos, sas7bcat_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; readstat_io_t *io = ctx->io; int next_page = start_page; int next_page_pos = start_page_pos; int chain_link_len = 0; int buffer_offset = 0; char chain_link[16]; while (next_page > 0 && next_page_pos > 0) { if (io->seek(ctx->header_size+(next_page-1)*ctx->page_size+next_page_pos, READSTAT_SEEK_SET, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } if (io->read(chain_link, sizeof(chain_link), io->io_ctx) < sizeof(chain_link)) { retval = READSTAT_ERROR_READ; goto cleanup; } next_page = sas_read4(&chain_link[0], ctx->bswap); next_page_pos = sas_read2(&chain_link[4], ctx->bswap); chain_link_len = sas_read2(&chain_link[6], ctx->bswap); if (io->read(buffer + buffer_offset, chain_link_len, io->io_ctx) < chain_link_len) { retval = READSTAT_ERROR_READ; goto cleanup; } buffer_offset += chain_link_len; } cleanup: return retval; }
static int sas7bcat_block_size(int start_page, int start_page_pos, sas7bcat_ctx_t *ctx, readstat_error_t *outError) { readstat_error_t retval = READSTAT_OK; readstat_io_t *io = ctx->io; int next_page = start_page; int next_page_pos = start_page_pos; int buffer_len = 0; int chain_link_len = 0; char chain_link[16]; // calculate buffer size needed while (next_page > 0 && next_page_pos > 0 && next_page <= ctx->page_count) { if (io->seek(ctx->header_size+(next_page-1)*ctx->page_size+next_page_pos, READSTAT_SEEK_SET, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } if (io->read(chain_link, sizeof(chain_link), io->io_ctx) < sizeof(chain_link)) { retval = READSTAT_ERROR_READ; goto cleanup; } next_page = sas_read4(&chain_link[0], ctx->bswap); next_page_pos = sas_read2(&chain_link[4], ctx->bswap); chain_link_len = sas_read2(&chain_link[6], ctx->bswap); buffer_len += chain_link_len; } cleanup: if (outError) *outError = retval; return retval == READSTAT_OK ? buffer_len : -1; }
static readstat_error_t sas_catalog_read_block(char *buffer, size_t buffer_len, int start_page, int start_page_pos, sas_catalog_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; int next_page = start_page; int next_page_pos = start_page_pos; int block_len = 0; int buffer_offset = 0; char *page = malloc(16); while (next_page > 0 && next_page_pos > 0) { if (readstat_lseek(ctx->fd, ctx->header_size+(next_page-1)*ctx->page_size+next_page_pos, SEEK_SET) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } if (read(ctx->fd, page, 16) < 16) { retval = READSTAT_ERROR_READ; goto cleanup; } next_page = sas_read4(&page[0], ctx->bswap); next_page_pos = sas_read2(&page[4], ctx->bswap); block_len = sas_read2(&page[6], ctx->bswap); if (read(ctx->fd, buffer + buffer_offset, block_len) < block_len) { retval = READSTAT_ERROR_READ; goto cleanup; } buffer_offset += block_len; } cleanup: if (page) free(page); return retval; }
static int sas_catalog_block_size(int start_page, int start_page_pos, sas_catalog_ctx_t *ctx, readstat_error_t *outError) { readstat_error_t retval = READSTAT_OK; int next_page = start_page; int next_page_pos = start_page_pos; int buffer_len = 0; int block_len = 0; char *page = malloc(16); // calculate buffer size needed while (next_page > 0 && next_page_pos > 0) { if (readstat_lseek(ctx->fd, ctx->header_size+(next_page-1)*ctx->page_size+next_page_pos, SEEK_SET) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } if (read(ctx->fd, page, 16) < 16) { retval = READSTAT_ERROR_READ; goto cleanup; } next_page = sas_read4(&page[0], ctx->bswap); next_page_pos = sas_read2(&page[4], ctx->bswap); block_len = sas_read2(&page[6], ctx->bswap); buffer_len += block_len; } cleanup: if (outError) *outError = retval; if (page) free(page); return retval == READSTAT_OK ? buffer_len : -1; }
static readstat_error_t sas_parse_row_size_subheader(const char *subheader, size_t len, sas_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; uint64_t total_row_count; uint64_t row_length, page_row_count; if (ctx->u64) { row_length = sas_read8(&subheader[40], ctx->bswap); total_row_count = sas_read8(&subheader[48], ctx->bswap); page_row_count = sas_read8(&subheader[120], ctx->bswap); } else { row_length = sas_read4(&subheader[20], ctx->bswap); total_row_count = sas_read4(&subheader[24], ctx->bswap); page_row_count = sas_read4(&subheader[60], ctx->bswap); } ctx->row_length = row_length; ctx->page_row_count = page_row_count; if (ctx->row_limit == 0 || total_row_count < ctx->row_limit) ctx->row_limit = total_row_count; return retval; }
static readstat_error_t sas_parse_column_size_subheader(const char *subheader, size_t len, sas_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; uint64_t col_count; if (ctx->u64) { col_count = sas_read8(&subheader[8], ctx->bswap); } else { col_count = sas_read4(&subheader[4], ctx->bswap); } ctx->column_count = col_count; return retval; }
static readstat_error_t sas7bcat_parse_value_labels(const char *value_start, size_t value_labels_len, int label_count_used, int label_count_capacity, const char *name, sas7bcat_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; int i; const char *lbp1 = value_start; uint32_t *value_offset = calloc(label_count_used, sizeof(uint32_t)); /* Doubles appear to be stored as big-endian, always */ int bswap_doubles = machine_is_little_endian(); int is_string = (name[0] == '$'); /* Pass 1 -- find out the offset of the labels */ for (i=0; i<label_count_capacity; i++) { if (&lbp1[2] - value_start > value_labels_len) { retval = READSTAT_ERROR_PARSE; goto cleanup; } if (i<label_count_used) { uint32_t label_pos = sas_read4(&lbp1[10+ctx->pad1], ctx->bswap); if (label_pos >= label_count_used) { retval = READSTAT_ERROR_PARSE; goto cleanup; } value_offset[label_pos] = lbp1 - value_start; } lbp1 += 6 + lbp1[2]; } const char *lbp2 = lbp1; /* Pass 2 -- parse pairs of values & labels */ for (i=0; i<label_count_used && i<label_count_capacity; i++) { lbp1 = value_start + value_offset[i]; if (&lbp1[30] - value_start > value_labels_len || &lbp2[10] - value_start > value_labels_len) { retval = READSTAT_ERROR_PARSE; goto cleanup; } size_t label_len = sas_read2(&lbp2[8], ctx->bswap); size_t value_entry_len = 6 + lbp1[2]; const char *label = &lbp2[10]; readstat_value_t value = { .type = is_string ? READSTAT_TYPE_STRING : READSTAT_TYPE_DOUBLE }; if (is_string) { char val[4*16+1]; retval = readstat_convert(val, sizeof(val), &lbp1[value_entry_len-16], 16, ctx->converter); if (retval != READSTAT_OK) goto cleanup; value.v.string_value = val; } else { uint64_t val = sas_read8(&lbp1[22], bswap_doubles); double dval = NAN; if ((val | 0xFF0000000000) == 0xFFFFFFFFFFFF) { value.tag = (val >> 40); if (value.tag) { value.is_tagged_missing = 1; } else { value.is_system_missing = 1; } } else { memcpy(&dval, &val, 8); dval *= -1.0; } value.v.double_value = dval; } if (ctx->value_label_handler) { if (ctx->value_label_handler(name, value, label, ctx->user_ctx) != READSTAT_HANDLER_OK) { retval = READSTAT_ERROR_USER_ABORT; goto cleanup; } } lbp2 += 8 + 2 + label_len + 1; }