static int sas7bcat_block_size(int start_page, int start_page_pos, sas7bcat_ctx_t *ctx, readstat_error_t *outError) { readstat_error_t retval = READSTAT_OK; readstat_io_t *io = ctx->io; int next_page = start_page; int next_page_pos = start_page_pos; int buffer_len = 0; int chain_link_len = 0; char chain_link[16]; // calculate buffer size needed while (next_page > 0 && next_page_pos > 0 && next_page <= ctx->page_count) { if (io->seek(ctx->header_size+(next_page-1)*ctx->page_size+next_page_pos, READSTAT_SEEK_SET, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } if (io->read(chain_link, sizeof(chain_link), io->io_ctx) < sizeof(chain_link)) { retval = READSTAT_ERROR_READ; goto cleanup; } next_page = sas_read4(&chain_link[0], ctx->bswap); next_page_pos = sas_read2(&chain_link[4], ctx->bswap); chain_link_len = sas_read2(&chain_link[6], ctx->bswap); buffer_len += chain_link_len; } cleanup: if (outError) *outError = retval; return retval == READSTAT_OK ? buffer_len : -1; }
static readstat_error_t sas7bcat_read_block(char *buffer, size_t buffer_len, int start_page, int start_page_pos, sas7bcat_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; readstat_io_t *io = ctx->io; int next_page = start_page; int next_page_pos = start_page_pos; int chain_link_len = 0; int buffer_offset = 0; char chain_link[16]; while (next_page > 0 && next_page_pos > 0) { if (io->seek(ctx->header_size+(next_page-1)*ctx->page_size+next_page_pos, READSTAT_SEEK_SET, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } if (io->read(chain_link, sizeof(chain_link), io->io_ctx) < sizeof(chain_link)) { retval = READSTAT_ERROR_READ; goto cleanup; } next_page = sas_read4(&chain_link[0], ctx->bswap); next_page_pos = sas_read2(&chain_link[4], ctx->bswap); chain_link_len = sas_read2(&chain_link[6], ctx->bswap); if (io->read(buffer + buffer_offset, chain_link_len, io->io_ctx) < chain_link_len) { retval = READSTAT_ERROR_READ; goto cleanup; } buffer_offset += chain_link_len; } cleanup: return retval; }
static readstat_error_t sas_catalog_read_block(char *buffer, size_t buffer_len, int start_page, int start_page_pos, sas_catalog_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; int next_page = start_page; int next_page_pos = start_page_pos; int block_len = 0; int buffer_offset = 0; char *page = malloc(16); while (next_page > 0 && next_page_pos > 0) { if (readstat_lseek(ctx->fd, ctx->header_size+(next_page-1)*ctx->page_size+next_page_pos, SEEK_SET) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } if (read(ctx->fd, page, 16) < 16) { retval = READSTAT_ERROR_READ; goto cleanup; } next_page = sas_read4(&page[0], ctx->bswap); next_page_pos = sas_read2(&page[4], ctx->bswap); block_len = sas_read2(&page[6], ctx->bswap); if (read(ctx->fd, buffer + buffer_offset, block_len) < block_len) { retval = READSTAT_ERROR_READ; goto cleanup; } buffer_offset += block_len; } cleanup: if (page) free(page); return retval; }
static readstat_error_t sas7bcat_augment_index(const char *index, size_t len, sas7bcat_ctx_t *ctx) { const char *xlsr = index; readstat_error_t retval = READSTAT_OK; while (xlsr + ctx->xlsr_size <= index + len) { if (memcmp(xlsr, "XLSR", 4) != 0) // some block pointers seem to have 8 bytes of extra padding xlsr += 8; if (memcmp(xlsr, "XLSR", 4) != 0) break; if (xlsr[ctx->xlsr_O_offset] == 'O') { uint32_t page = 0, pos = 0; if (ctx->u64) { page = sas_read4(&xlsr[8], ctx->bswap); pos = sas_read4(&xlsr[16], ctx->bswap); } else { page = sas_read2(&xlsr[4], ctx->bswap); pos = sas_read2(&xlsr[8], ctx->bswap); } ctx->block_pointers[ctx->block_pointers_used++] = ((uint64_t)page << 32) + pos; } if (ctx->block_pointers_used == ctx->block_pointers_capacity) { ctx->block_pointers = readstat_realloc(ctx->block_pointers, (ctx->block_pointers_capacity *= 2) * sizeof(uint64_t)); if (ctx->block_pointers == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } } xlsr += ctx->xlsr_size; } cleanup: return retval; }
static int sas_catalog_block_size(int start_page, int start_page_pos, sas_catalog_ctx_t *ctx, readstat_error_t *outError) { readstat_error_t retval = READSTAT_OK; int next_page = start_page; int next_page_pos = start_page_pos; int buffer_len = 0; int block_len = 0; char *page = malloc(16); // calculate buffer size needed while (next_page > 0 && next_page_pos > 0) { if (readstat_lseek(ctx->fd, ctx->header_size+(next_page-1)*ctx->page_size+next_page_pos, SEEK_SET) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } if (read(ctx->fd, page, 16) < 16) { retval = READSTAT_ERROR_READ; goto cleanup; } next_page = sas_read4(&page[0], ctx->bswap); next_page_pos = sas_read2(&page[4], ctx->bswap); block_len = sas_read2(&page[6], ctx->bswap); buffer_len += block_len; } cleanup: if (outError) *outError = retval; if (page) free(page); return retval == READSTAT_OK ? buffer_len : -1; }
static sas_text_ref_t sas_parse_text_ref(const char *data, sas_ctx_t *ctx) { sas_text_ref_t ref; ref.index = sas_read2(&data[0], ctx->bswap); ref.offset = sas_read2(&data[2], ctx->bswap); ref.length = sas_read2(&data[4], ctx->bswap); return ref; }
static readstat_error_t sas7bcat_read_block(char *buffer, size_t buffer_len, int start_page, int start_page_pos, sas7bcat_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; readstat_io_t *io = ctx->io; int next_page = start_page; int next_page_pos = start_page_pos; int link_count = 0; int chain_link_len = 0; int buffer_offset = 0; char chain_link[32]; int chain_link_header_len = 16; if (ctx->u64) { chain_link_header_len = 32; } while (next_page > 0 && next_page_pos > 0 && next_page <= ctx->page_count && link_count++ < ctx->page_count) { if (io->seek(ctx->header_size+(next_page-1)*ctx->page_size+next_page_pos, READSTAT_SEEK_SET, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } if (io->read(chain_link, chain_link_header_len, io->io_ctx) < chain_link_header_len) { retval = READSTAT_ERROR_READ; goto cleanup; } if (ctx->u64) { next_page = sas_read4(&chain_link[0], ctx->bswap); next_page_pos = sas_read2(&chain_link[8], ctx->bswap); chain_link_len = sas_read2(&chain_link[10], ctx->bswap); } else { next_page = sas_read4(&chain_link[0], ctx->bswap); next_page_pos = sas_read2(&chain_link[4], ctx->bswap); chain_link_len = sas_read2(&chain_link[6], ctx->bswap); } if (buffer_offset + chain_link_len > buffer_len) { retval = READSTAT_ERROR_PARSE; goto cleanup; } if (io->read(buffer + buffer_offset, chain_link_len, io->io_ctx) < chain_link_len) { retval = READSTAT_ERROR_READ; goto cleanup; } buffer_offset += chain_link_len; } cleanup: return retval; }
static readstat_error_t sas_parse_column_name_subheader(const char *subheader, size_t len, sas_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; size_t signature_len = ctx->u64 ? 8 : 4; int cmax = ctx->u64 ? (len-28)/8 : (len-20)/8; int i; const char *cnp = &subheader[signature_len+8]; uint16_t remainder = sas_read2(&subheader[signature_len], ctx->bswap); if (remainder != len - (4+2*signature_len)) { retval = READSTAT_ERROR_PARSE; goto cleanup; } ctx->col_names_count += cmax; if (ctx->col_info_count < ctx->col_names_count) { ctx->col_info_count = ctx->col_names_count; ctx->col_info = realloc(ctx->col_info, ctx->col_info_count * sizeof(col_info_t)); } for (i=ctx->col_names_count-cmax; i<ctx->col_names_count; i++) { ctx->col_info[i].name_ref = sas_parse_text_ref(cnp, ctx); cnp += 8; } cleanup: return retval; }
static readstat_error_t sas_parse_column_text_subheader(const char *subheader, size_t len, sas_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; size_t signature_len = ctx->u64 ? 8 : 4; uint16_t remainder = sas_read2(&subheader[signature_len], ctx->bswap); char *blob = NULL; if (remainder != len - (4+2*signature_len)) { retval = READSTAT_ERROR_PARSE; goto cleanup; } ctx->text_blob_count++; ctx->text_blobs = realloc(ctx->text_blobs, ctx->text_blob_count * sizeof(char *)); ctx->text_blob_lengths = realloc(ctx->text_blob_lengths, ctx->text_blob_count * sizeof(ctx->text_blob_lengths[0])); if ((blob = malloc(len-signature_len)) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } memcpy(blob, subheader+signature_len, len-signature_len); ctx->text_blob_lengths[ctx->text_blob_count-1] = len-signature_len; ctx->text_blobs[ctx->text_blob_count-1] = blob; /* another bit of a hack */ if (len-signature_len > 12 + sizeof(SAS_COMPRESSION_SIGNATURE_RDC)-1 && strncmp(blob + 12, SAS_COMPRESSION_SIGNATURE_RDC, sizeof(SAS_COMPRESSION_SIGNATURE_RDC)-1) == 0) { retval = READSTAT_ERROR_UNSUPPORTED_COMPRESSION; goto cleanup; } cleanup: return retval; }
static readstat_error_t sas_parse_column_attributes_subheader(const char *subheader, size_t len, sas_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; size_t signature_len = ctx->u64 ? 8 : 4; int cmax = ctx->u64 ? (len-28)/16 : (len-20)/12; int i; const char *cap = &subheader[signature_len+8]; uint16_t remainder = sas_read2(&subheader[signature_len], ctx->bswap); if (remainder != len - (4+2*signature_len)) { retval = READSTAT_ERROR_PARSE; goto cleanup; } ctx->col_attrs_count += cmax; if (ctx->col_info_count < ctx->col_attrs_count) { ctx->col_info_count = ctx->col_attrs_count; ctx->col_info = realloc(ctx->col_info, ctx->col_info_count * sizeof(col_info_t)); } for (i=ctx->col_attrs_count-cmax; i<ctx->col_attrs_count; i++) { if (ctx->u64) { ctx->col_info[i].offset = sas_read8(&cap[0], ctx->bswap); } else { ctx->col_info[i].offset = sas_read4(&cap[0], ctx->bswap); } off_t off=4; if (ctx->u64) off=8; ctx->col_info[i].width = sas_read4(&cap[off], ctx->bswap); if (ctx->col_info[i].width > ctx->max_col_width) ctx->max_col_width = ctx->col_info[i].width; if (cap[off+6] == SAS_COLUMN_TYPE_NUM) { ctx->col_info[i].type = READSTAT_TYPE_DOUBLE; } else if (cap[off+6] == SAS_COLUMN_TYPE_CHR) { ctx->col_info[i].type = READSTAT_TYPE_STRING; } else { retval = READSTAT_ERROR_PARSE; goto cleanup; } ctx->col_info[i].index = i; cap += off+8; } cleanup: return retval; }
static void sas7bcat_augment_index(const char *index, size_t len, sas7bcat_ctx_t *ctx) { const char *xlsr = index; while (xlsr + 212 <= index + len) { if (memcmp(xlsr, "XLSR", 4) != 0) // some block pointers seem to have 8 bytes of extra padding xlsr += 8; if (memcmp(xlsr, "XLSR", 4) != 0) break; if (xlsr[50+ctx->pad1] == 'O') ctx->block_pointers[ctx->block_pointers_used++] = ((uint64_t)sas_read2(&xlsr[4], ctx->bswap) << 32) + sas_read2(&xlsr[8], ctx->bswap); if (ctx->block_pointers_used == ctx->block_pointers_capacity) { ctx->block_pointers = realloc(ctx->block_pointers, (ctx->block_pointers_capacity *= 2) * sizeof(uint64_t)); } xlsr += 212 + ctx->pad1; } }
static readstat_error_t sas7bcat_parse_value_labels(const char *value_start, size_t value_labels_len, int label_count_used, int label_count_capacity, const char *name, sas7bcat_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; int i; const char *lbp1 = value_start; uint32_t *value_offset = calloc(label_count_used, sizeof(uint32_t)); /* Doubles appear to be stored as big-endian, always */ int bswap_doubles = machine_is_little_endian(); int is_string = (name[0] == '$'); /* Pass 1 -- find out the offset of the labels */ for (i=0; i<label_count_capacity; i++) { if (&lbp1[2] - value_start > value_labels_len) { retval = READSTAT_ERROR_PARSE; goto cleanup; } if (i<label_count_used) { uint32_t label_pos = sas_read4(&lbp1[10+ctx->pad1], ctx->bswap); if (label_pos >= label_count_used) { retval = READSTAT_ERROR_PARSE; goto cleanup; } value_offset[label_pos] = lbp1 - value_start; } lbp1 += 6 + lbp1[2]; } const char *lbp2 = lbp1; /* Pass 2 -- parse pairs of values & labels */ for (i=0; i<label_count_used && i<label_count_capacity; i++) { lbp1 = value_start + value_offset[i]; if (&lbp1[30] - value_start > value_labels_len || &lbp2[10] - value_start > value_labels_len) { retval = READSTAT_ERROR_PARSE; goto cleanup; } size_t label_len = sas_read2(&lbp2[8], ctx->bswap); size_t value_entry_len = 6 + lbp1[2]; const char *label = &lbp2[10]; readstat_value_t value = { .type = is_string ? READSTAT_TYPE_STRING : READSTAT_TYPE_DOUBLE }; if (is_string) { char val[4*16+1]; retval = readstat_convert(val, sizeof(val), &lbp1[value_entry_len-16], 16, ctx->converter); if (retval != READSTAT_OK) goto cleanup; value.v.string_value = val; } else { uint64_t val = sas_read8(&lbp1[22], bswap_doubles); double dval = NAN; if ((val | 0xFF0000000000) == 0xFFFFFFFFFFFF) { value.tag = (val >> 40); if (value.tag) { value.is_tagged_missing = 1; } else { value.is_system_missing = 1; } } else { memcpy(&dval, &val, 8); dval *= -1.0; } value.v.double_value = dval; } if (ctx->value_label_handler) { if (ctx->value_label_handler(name, value, label, ctx->user_ctx) != READSTAT_HANDLER_OK) { retval = READSTAT_ERROR_USER_ABORT; goto cleanup; } } lbp2 += 8 + 2 + label_len + 1; }