readstat_error_t readstat_parse_sas7bcat(readstat_parser_t *parser, const char *path, void *user_ctx) { readstat_error_t retval = READSTAT_OK; readstat_io_t *io = parser->io; int64_t i; char *page = NULL; char *buffer = NULL; sas7bcat_ctx_t *ctx = calloc(1, sizeof(sas7bcat_ctx_t)); sas_header_info_t *hinfo = calloc(1, sizeof(sas_header_info_t)); ctx->block_pointers = malloc((ctx->block_pointers_capacity = 200) * sizeof(uint64_t)); ctx->value_label_handler = parser->value_label_handler; ctx->metadata_handler = parser->metadata_handler; ctx->input_encoding = parser->input_encoding; ctx->output_encoding = parser->output_encoding; ctx->user_ctx = user_ctx; ctx->io = io; if (io->open(path, io->io_ctx) == -1) { retval = READSTAT_ERROR_OPEN; goto cleanup; } if ((retval = sas_read_header(io, hinfo, parser->error_handler, user_ctx)) != READSTAT_OK) { goto cleanup; } ctx->u64 = hinfo->u64; ctx->pad1 = hinfo->pad1; ctx->bswap = machine_is_little_endian() ^ hinfo->little_endian; ctx->header_size = hinfo->header_size; ctx->page_count = hinfo->page_count; ctx->page_size = hinfo->page_size; if (ctx->input_encoding == NULL) { ctx->input_encoding = hinfo->encoding; } if (ctx->u64) { retval = READSTAT_ERROR_PARSE; goto cleanup; } if (ctx->input_encoding && ctx->output_encoding && strcmp(ctx->input_encoding, ctx->output_encoding) != 0) { iconv_t converter = iconv_open(ctx->output_encoding, ctx->input_encoding); if (converter == (iconv_t)-1) { retval = READSTAT_ERROR_UNSUPPORTED_CHARSET; goto cleanup; } ctx->converter = converter; } if (parser->metadata_handler) { char file_label[4*64+1]; retval = readstat_convert(file_label, sizeof(file_label), hinfo->file_label, sizeof(hinfo->file_label), ctx->converter); if (retval != READSTAT_OK) goto cleanup; if (ctx->metadata_handler(file_label, hinfo->modification_time, 10000 * hinfo->major_version + hinfo->minor_version, ctx->user_ctx) != READSTAT_HANDLER_OK) { retval = READSTAT_ERROR_USER_ABORT; goto cleanup; } } if ((page = malloc(ctx->page_size)) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } if (io->seek(ctx->header_size+SAS_CATALOG_FIRST_INDEX_PAGE*ctx->page_size, READSTAT_SEEK_SET, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } if (io->read(page, ctx->page_size, io->io_ctx) < ctx->page_size) { retval = READSTAT_ERROR_READ; goto cleanup; } sas7bcat_augment_index(&page[856+2*ctx->pad1], ctx->page_size - 856 - 2*ctx->pad1, ctx); // Pass 1 -- find the XLSR entries for (i=SAS_CATALOG_USELESS_PAGES; i<ctx->page_count; i++) { if (io->seek(ctx->header_size+i*ctx->page_size, READSTAT_SEEK_SET, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } if (io->read(page, ctx->page_size, io->io_ctx) < ctx->page_size) { retval = READSTAT_ERROR_READ; goto cleanup; } if (memcmp(&page[16], "XLSR", sizeof("XLSR")-1) == 0) { sas7bcat_augment_index(&page[16], ctx->page_size - 16, ctx); } } sas7bcat_sort_index(ctx); sas7bcat_uniq_index(ctx); // Pass 2 -- look up the individual block pointers for (i=0; i<ctx->block_pointers_used; i++) { int start_page = ctx->block_pointers[i] >> 32; int start_page_pos = (ctx->block_pointers[i]) & 0xFFFF; int buffer_len = sas7bcat_block_size(start_page, start_page_pos, ctx, &retval); if (buffer_len == -1) { goto cleanup; } else if (buffer_len == 0) { continue; } buffer = realloc(buffer, buffer_len); if ((retval = sas7bcat_read_block(buffer, buffer_len, start_page, start_page_pos, ctx)) != READSTAT_OK) goto cleanup; if ((retval = sas7bcat_parse_block(buffer, buffer_len, ctx)) != READSTAT_OK) goto cleanup; } cleanup: io->close(io->io_ctx); if (page) free(page); if (buffer) free(buffer); if (ctx) sas7bcat_ctx_free(ctx); if (hinfo) free(hinfo); return retval; }
readstat_error_t readstat_parse_sas7bcat(readstat_parser_t *parser, const char *filename, void *user_ctx) { readstat_error_t retval = READSTAT_OK; int64_t i; char *page = NULL; char *buffer = NULL; sas_catalog_ctx_t *ctx = calloc(1, sizeof(sas_catalog_ctx_t)); sas_header_info_t *hinfo = calloc(1, sizeof(sas_header_info_t)); ctx->block_pointers = malloc((ctx->block_pointers_capacity = 200) * sizeof(uint64_t)); ctx->value_label_handler = parser->value_label_handler; ctx->user_ctx = user_ctx; if ((ctx->fd = readstat_open(filename)) == -1) { retval = READSTAT_ERROR_OPEN; goto cleanup; } if ((retval = sas_read_header(ctx->fd, hinfo, parser->error_handler, user_ctx)) != READSTAT_OK) { goto cleanup; } ctx->u64 = hinfo->u64; ctx->pad1 = hinfo->pad1; ctx->bswap = machine_is_little_endian() ^ hinfo->little_endian; ctx->header_size = hinfo->header_size; ctx->page_count = hinfo->page_count; ctx->page_size = hinfo->page_size; if (ctx->u64) { retval = READSTAT_ERROR_PARSE; goto cleanup; } if (strcmp(hinfo->encoding, "UTF-8") != 0 && strcmp(hinfo->encoding, "US-ASCII") != 0) { iconv_t converter = iconv_open("UTF-8", hinfo->encoding); if (converter == (iconv_t)-1) { retval = READSTAT_ERROR_UNSUPPORTED_CHARSET; goto cleanup; } ctx->converter = converter; } if ((page = malloc(ctx->page_size)) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } if (readstat_lseek(ctx->fd, ctx->header_size+SAS_CATALOG_FIRST_INDEX_PAGE*ctx->page_size, SEEK_SET) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } if (read(ctx->fd, page, ctx->page_size) < ctx->page_size) { retval = READSTAT_ERROR_READ; goto cleanup; } sas_catalog_augment_index(&page[856+2*ctx->pad1], ctx->page_size - 856 - 2*ctx->pad1, ctx); // Pass 1 -- find the XLSR entries for (i=SAS_CATALOG_USELESS_PAGES; i<ctx->page_count; i++) { if (readstat_lseek(ctx->fd, ctx->header_size+i*ctx->page_size, SEEK_SET) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } if (read(ctx->fd, page, ctx->page_size) < ctx->page_size) { retval = READSTAT_ERROR_READ; goto cleanup; } if (memcmp(&page[16], "XLSR", sizeof("XLSR")-1) == 0) { sas_catalog_augment_index(&page[16], ctx->page_size - 16, ctx); } } // Pass 2 -- look up the individual block pointers for (i=0; i<ctx->block_pointers_used; i++) { int start_page = ctx->block_pointers[i] >> 32; int start_page_pos = (ctx->block_pointers[i]) & 0xFFFF; int buffer_len = sas_catalog_block_size(start_page, start_page_pos, ctx, &retval); if (buffer_len == -1) { goto cleanup; } else if (buffer_len == 0) { continue; } buffer = realloc(buffer, buffer_len); if ((retval = sas_catalog_read_block(buffer, buffer_len, start_page, start_page_pos, ctx)) != READSTAT_OK) goto cleanup; if ((retval = sas_catalog_parse_block(buffer, buffer_len, ctx)) != READSTAT_OK) goto cleanup; } cleanup: if (page) free(page); if (buffer) free(buffer); if (ctx) sas_catalog_ctx_free(ctx); if (hinfo) free(hinfo); return retval; }