Esempio n. 1
0
static readstat_error_t dta_skip_expansion_fields(int fd, dta_ctx_t *ctx) {
    if (ctx->file_is_xmlish) {
        if (readstat_lseek(fd, ctx->data_offset, SEEK_SET) == -1)
            return READSTAT_ERROR_SEEK;

        return READSTAT_OK;
    }
    if (ctx->expansion_len_len == 0)
        return READSTAT_OK;
    
    while (1) {
        size_t len;
        char data_type;
        if (ctx->expansion_len_len == 2) {
            dta_short_expansion_field_t  expansion_field;
            if (read(fd, &expansion_field, sizeof(expansion_field)) != sizeof(expansion_field))
                return READSTAT_ERROR_READ;

            if (ctx->machine_needs_byte_swap) {
                len = byteswap2(expansion_field.len);
            } else {
                len = expansion_field.len;
            }
            
            data_type = expansion_field.data_type;
        } else {
            dta_expansion_field_t  expansion_field;
            if (read(fd, &expansion_field, sizeof(expansion_field)) != sizeof(expansion_field))
                return READSTAT_ERROR_READ;
            
            if (ctx->machine_needs_byte_swap) {
                len = byteswap4(expansion_field.len);
            } else {
                len = expansion_field.len;
            }
            
            data_type = expansion_field.data_type;
        }

        if (data_type == 0 && len == 0)
            return READSTAT_OK;
        
        if (data_type != 1)
            return READSTAT_ERROR_PARSE;

        if (readstat_lseek(fd, len, SEEK_CUR) == -1)
            return READSTAT_ERROR_SEEK;
    }

    return READSTAT_ERROR_PARSE;
}
Esempio n. 2
0
static readstat_error_t sas_catalog_read_block(char *buffer, size_t buffer_len, int start_page, int start_page_pos, sas_catalog_ctx_t *ctx) {
    readstat_error_t retval = READSTAT_OK;
    int next_page = start_page;
    int next_page_pos = start_page_pos;

    int block_len = 0;
    int buffer_offset = 0;

    char *page = malloc(16);

    while (next_page > 0 && next_page_pos > 0) {
        if (readstat_lseek(ctx->fd, ctx->header_size+(next_page-1)*ctx->page_size+next_page_pos, SEEK_SET) == -1) {
            retval = READSTAT_ERROR_SEEK;
            goto cleanup;
        }
        if (read(ctx->fd, page, 16) < 16) {
            retval = READSTAT_ERROR_READ;
            goto cleanup;
        }
        next_page = sas_read4(&page[0], ctx->bswap);
        next_page_pos = sas_read2(&page[4], ctx->bswap);
        block_len = sas_read2(&page[6], ctx->bswap);
        if (read(ctx->fd, buffer + buffer_offset, block_len) < block_len) {
            retval = READSTAT_ERROR_READ;
            goto cleanup;
        }
        buffer_offset += block_len;
    }
cleanup:
    if (page)
        free(page);

    return retval;
}
Esempio n. 3
0
static int sas_catalog_block_size(int start_page, int start_page_pos, sas_catalog_ctx_t *ctx, readstat_error_t *outError) {
    readstat_error_t retval = READSTAT_OK;
    int next_page = start_page;
    int next_page_pos = start_page_pos;

    int buffer_len = 0;
    int block_len = 0;

    char *page = malloc(16);

    // calculate buffer size needed
    while (next_page > 0 && next_page_pos > 0) {
        if (readstat_lseek(ctx->fd, ctx->header_size+(next_page-1)*ctx->page_size+next_page_pos, SEEK_SET) == -1) {
            retval = READSTAT_ERROR_SEEK;
            goto cleanup;
        }
        if (read(ctx->fd, page, 16) < 16) {
            retval = READSTAT_ERROR_READ;
            goto cleanup;
        }
        next_page = sas_read4(&page[0], ctx->bswap);
        next_page_pos = sas_read2(&page[4], ctx->bswap);
        block_len = sas_read2(&page[6], ctx->bswap);

        buffer_len += block_len;
    }

cleanup:
    if (outError)
        *outError = retval;
    if (page)
        free(page);

    return retval == READSTAT_OK ? buffer_len : -1;
}
Esempio n. 4
0
readstat_error_t readstat_parse_dta(readstat_parser_t *parser, const char *filename, void *user_ctx) {
    readstat_error_t retval = READSTAT_OK;
    int i;
    size_t  record_len = 0;
    int fd = -1;
    char *buf = NULL;
    dta_header_t  header;
    dta_ctx_t    *ctx = NULL;
    char  str_buf[2048];
    char *long_string = NULL;
    size_t file_size = 0;

    if ((fd = readstat_open(filename)) == -1) {
        retval = READSTAT_ERROR_OPEN;
        goto cleanup;
    }

    char magic[4];
    if (read(fd, magic, 4) != 4) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }

    file_size = readstat_lseek(fd, 0, SEEK_END);
    if (file_size == -1) {
        retval = READSTAT_ERROR_SEEK;
        goto cleanup;
    }

    if (readstat_lseek(fd, 0, SEEK_SET) == -1) {
        retval = READSTAT_ERROR_SEEK;
        goto cleanup;
    }

    if (strncmp(magic, "<sta", 4) == 0) {
        retval = dta_read_xmlish_preamble(fd, ctx, &header);
    } else {
        if (read(fd, &header, sizeof(header)) != sizeof(header)) {
            retval = READSTAT_ERROR_READ;
            goto cleanup;
        }
    }

    if ((ctx = dta_ctx_init(header.nvar, header.nobs, header.byteorder, header.ds_format)) == NULL) {
        retval = READSTAT_ERROR_MALLOC;
        goto cleanup;
    }

    ctx->user_ctx = user_ctx;
    ctx->file_size = file_size;
    ctx->progress_handler = parser->progress_handler;

    retval = dta_update_progress(fd, ctx);
    if (retval != READSTAT_OK)
        goto cleanup;
    
    if (parser->info_handler) {
        if (parser->info_handler(ctx->nobs, ctx->nvar, user_ctx)) {
            retval = READSTAT_ERROR_USER_ABORT;
            goto cleanup;
        }
    }
    
    if (ctx->file_is_xmlish) {
        uint16_t label_len = 0;
        unsigned char timestamp_len;

        if ((retval = dta_read_tag(fd, ctx, "<label>")) != READSTAT_OK) {
            goto cleanup;
        }
        
        if (ctx->data_label_len_len == 2) {
            if (read(fd, &label_len, sizeof(uint16_t)) != sizeof(uint16_t)) {
                retval = READSTAT_ERROR_READ;
                goto cleanup;
            }
            label_len = ctx->machine_needs_byte_swap ? byteswap2(label_len) : label_len;
        } else if (ctx->data_label_len_len == 1) {
            unsigned char label_len_char;
            if (read(fd, &label_len_char, sizeof(unsigned char)) != sizeof(unsigned char)) {
                retval = READSTAT_ERROR_READ;
                goto cleanup;
            }
            label_len = label_len_char;
        }
        
        if (readstat_lseek(fd, label_len, SEEK_CUR) == -1) {
            retval = READSTAT_ERROR_SEEK;
            goto cleanup;
        }
        
        if ((retval = dta_read_tag(fd, ctx, "</label>")) != READSTAT_OK) {
            goto cleanup;
        }
        
        if ((retval = dta_read_tag(fd, ctx, "<timestamp>")) != READSTAT_OK) {
            goto cleanup;
        }
        
        if (read(fd, &timestamp_len, 1) != 1) {
            retval = READSTAT_ERROR_READ;
            goto cleanup;
        }
        
        if (readstat_lseek(fd, timestamp_len, SEEK_CUR) == -1) {
            retval = READSTAT_ERROR_SEEK;
            goto cleanup;
        }

        if ((retval = dta_read_tag(fd, ctx, "</timestamp>")) != READSTAT_OK) {
            goto cleanup;
        }
    } else {
        if (readstat_lseek(fd, ctx->data_label_len, SEEK_CUR) == -1) {
            retval = READSTAT_ERROR_SEEK;
            goto cleanup;
        }
        
        if (ctx->time_stamp_len) {
            if (readstat_lseek(fd, ctx->time_stamp_len, SEEK_CUR) == -1) {
                retval = READSTAT_ERROR_SEEK;
                goto cleanup;
            }
        }
    }
    
    if ((retval = dta_read_tag(fd, ctx, "</header>")) != READSTAT_OK) {
        goto cleanup;
    }

    if (dta_read_map(fd, ctx) != READSTAT_OK) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }

    if (dta_read_descriptors(fd, ctx) != READSTAT_OK) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }

    for (i=0; i<ctx->nvar; i++) {
        size_t      max_len;
        readstat_types_t type = dta_type_info(ctx->typlist[i], &max_len, ctx);

        record_len += max_len;

        if (type == READSTAT_TYPE_STRING)
            max_len++; /* might append NULL */

        if (parser->variable_handler) {
            readstat_variable_t *variable = dta_init_variable(ctx, i, type);

            const char *value_labels = NULL;

            if (ctx->lbllist[ctx->lbllist_entry_len*i])
                value_labels = &ctx->lbllist[ctx->lbllist_entry_len*i];

            int cb_retval = parser->variable_handler(i, variable, value_labels, user_ctx);

            free(variable);

            if (cb_retval) {
                retval = READSTAT_ERROR_USER_ABORT;
                goto cleanup;
            }
        }
    }

    if ((retval = dta_skip_expansion_fields(fd, ctx)) != READSTAT_OK) {
        goto cleanup;
    }
    
    if (record_len == 0) {
        retval = READSTAT_ERROR_PARSE;
        goto cleanup;
    }

    if ((retval = dta_read_tag(fd, ctx, "<data>")) != READSTAT_OK) {
        goto cleanup;
    }

    if ((retval = dta_update_progress(fd, ctx)) != READSTAT_OK) {
        goto cleanup;
    }

    if ((buf = malloc(record_len)) == NULL) {
        retval = READSTAT_ERROR_MALLOC;
        goto cleanup;
    }

    for (i=0; i<ctx->nobs; i++) {
        if (read(fd, buf, record_len) != record_len) {
            retval = READSTAT_ERROR_READ;
            goto cleanup;
        }
        int j;
        off_t offset = 0;
        for (j=0; j<ctx->nvar; j++) {
            size_t max_len;
            readstat_value_t value;
            memset(&value, 0, sizeof(readstat_value_t));

            value.type = dta_type_info(ctx->typlist[j], &max_len, ctx);

            if (value.type == READSTAT_TYPE_STRING) {
                readstat_convert(str_buf, sizeof(str_buf), &buf[offset], max_len, ctx->converter);
                value.v.string_value = str_buf;
            } else if (value.type == READSTAT_TYPE_LONG_STRING) {
                uint32_t v, o;
                v = *((uint32_t *)&buf[offset]);
                o = *((uint32_t *)&buf[offset+4]);
                if (ctx->machine_needs_byte_swap) {
                    v = byteswap4(v);
                    o = byteswap4(o);
                }
                if (v > 0 && o > 0) {
                    off_t cur_pos = readstat_lseek(fd, 0, SEEK_CUR);
                    if (cur_pos == -1) {
                        retval = READSTAT_ERROR_SEEK;
                        goto cleanup;
                    }
                    retval = dta_read_long_string(fd, ctx, v, o, &long_string);
                    if (retval != READSTAT_OK) {
                        goto cleanup;
                    }
                    value.v.string_value = long_string;
                    if (readstat_lseek(fd, cur_pos, SEEK_SET) == -1) {
                        retval = READSTAT_ERROR_SEEK;
                        goto cleanup;
                    }
                }
            } else if (value.type == READSTAT_TYPE_CHAR) {
                char byte = buf[offset];
                if (ctx->machine_is_twos_complement) {
                    byte = ones_to_twos_complement1(byte);
                }
                if (byte > DTA_MAX_CHAR) {
                    value.is_system_missing = 1;
                    if (byte > DTA_MISSING_CHAR) {
                        value.tag = 'a' + (byte - DTA_MISSING_CHAR_A);
                    }
                }
                value.v.char_value = byte;
            } else if (value.type == READSTAT_TYPE_INT16) {
                int16_t num = *((int16_t *)&buf[offset]);
                if (ctx->machine_needs_byte_swap) {
                    num = byteswap2(num);
                }
                if (ctx->machine_is_twos_complement) {
                    num = ones_to_twos_complement2(num);
                }
                if (num > DTA_MAX_INT16) {
                    value.is_system_missing = 1;
                    if (num > DTA_MISSING_INT16) {
                        value.tag = 'a' + (num - DTA_MISSING_INT16_A);
                    }
                }
                value.v.i16_value = num;
            } else if (value.type == READSTAT_TYPE_INT32) {
                int32_t num = *((int32_t *)&buf[offset]);
                if (ctx->machine_needs_byte_swap) {
                    num = byteswap4(num);
                }
                if (ctx->machine_is_twos_complement) {
                    num = ones_to_twos_complement4(num);
                }
                if (num > DTA_MAX_INT32) {
                    value.is_system_missing = 1;
                    if (num > DTA_MISSING_INT32) {
                        value.tag = 'a' + (num - DTA_MISSING_INT32_A);
                    }
                }
                value.v.i32_value = num;
            } else if (value.type == READSTAT_TYPE_FLOAT) {
                uint32_t num = *((uint32_t *)&buf[offset]);
                float f_num = NAN;
                if (ctx->machine_needs_byte_swap) {
                    num = byteswap4(num);
                }
                if (num > DTA_MAX_FLOAT) {
                    value.is_system_missing = 1;
                    if (num > DTA_MISSING_FLOAT) {
                        value.tag = 'a' + ((num - DTA_MISSING_FLOAT_A) >> 11);
                    }
                } else {
Esempio n. 5
0
readstat_error_t dta_read_xmlish_preamble(int fd, dta_ctx_t *ctx, dta_header_t *header) {
    readstat_error_t retval = READSTAT_OK;
    
    if ((retval = dta_read_tag(fd, ctx, "<stata_dta>")) != READSTAT_OK) {
        goto cleanup;
    }
    if ((retval = dta_read_tag(fd, ctx, "<header>")) != READSTAT_OK) {
        goto cleanup;
    }

    char ds_format[3];
    if ((retval = dta_read_tag(fd, ctx, "<release>")) != READSTAT_OK) {
        goto cleanup;
    }
    if (read(fd, ds_format, sizeof(ds_format)) != sizeof(ds_format)) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }

    header->ds_format = 100 * (ds_format[0] - '0') + 10 * (ds_format[1] - '0') + (ds_format[2] - '0');

    if ((retval = dta_read_tag(fd, ctx, "</release>")) != READSTAT_OK) {
        goto cleanup;
    }

    char byteorder[3];
    if ((retval = dta_read_tag(fd, ctx, "<byteorder>")) != READSTAT_OK) {
        goto cleanup;
    }
    if (read(fd, byteorder, sizeof(byteorder)) != sizeof(byteorder)) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }
    if (strncmp(byteorder, "MSF", 3) == 0) {
        header->byteorder = DTA_HILO;
    } else if (strncmp(byteorder, "LSF", 3) == 0) {
        header->byteorder = DTA_LOHI;
    } else {
        retval = READSTAT_ERROR_PARSE;
        goto cleanup;
    }
    if ((retval = dta_read_tag(fd, ctx, "</byteorder>")) != READSTAT_OK) {
        goto cleanup;
    }

    if ((retval = dta_read_tag(fd, ctx, "<K>")) != READSTAT_OK) {
        goto cleanup;
    }
    if (read(fd, &header->nvar, sizeof(int16_t)) != sizeof(int16_t)) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }
    if ((retval = dta_read_tag(fd, ctx, "</K>")) != READSTAT_OK) {
        goto cleanup;
    }

    if ((retval = dta_read_tag(fd, ctx, "<N>")) != READSTAT_OK) {
        goto cleanup;
    }
    if (read(fd, &header->nobs, sizeof(int32_t)) != sizeof(int32_t)) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }
    if (header->ds_format >= 118) {
        /* Only support files < 4 billion rows for now */
        if (header->byteorder == DTA_HILO) {
            if (read(fd, &header->nobs, sizeof(int32_t)) != sizeof(int32_t)) {
                retval = READSTAT_ERROR_READ;
                goto cleanup;
            }
        } else {
            if (readstat_lseek(fd, 4, SEEK_CUR) == -1) {
                retval = READSTAT_ERROR_SEEK;
                goto cleanup;
            }
        }
    }
    if ((retval = dta_read_tag(fd, ctx, "</N>")) != READSTAT_OK) {
        goto cleanup;
    }

cleanup:
    return retval;
}
Esempio n. 6
0
static readstat_error_t dta_read_long_string(int fd, dta_ctx_t *ctx, int v, int o, char **long_string_out) {
    readstat_error_t retval = READSTAT_OK;
    if (readstat_lseek(fd, ctx->strls_offset, SEEK_SET) != ctx->strls_offset) {
        retval = READSTAT_ERROR_SEEK;
        goto cleanup;
    }

    retval = dta_read_tag(fd, ctx, "<strls>");
    if (retval != READSTAT_OK)
        goto cleanup;

    dta_gso_header_t header;

    while (1) {
        if (read(fd, &header, sizeof(dta_gso_header_t)) != sizeof(dta_gso_header_t)) {
            retval = READSTAT_ERROR_READ;
            goto cleanup;
        }

        if (strncmp(header.gso, "GSO", sizeof("GSO")-1) != 0) {
            retval = READSTAT_ERROR_PARSE;
            goto cleanup;
        }
        
        if (header.len <= 0) {
            retval = READSTAT_ERROR_PARSE;
            goto cleanup;
        }

        if (header.v == v && header.o == o) {
            if (header.t == DTA_GSO_TYPE_BINARY) {
                *long_string_out = NULL;
            } else if (header.t == DTA_GSO_TYPE_ASCII) {
                char *string_buf = malloc(header.len);
                if (read(fd, string_buf, header.len) != header.len) {
                    free(string_buf);
                    retval = READSTAT_ERROR_READ;
                    goto cleanup;
                }
                if (string_buf[header.len-1] != '\0') {
                    free(string_buf);
                    retval = READSTAT_ERROR_PARSE;
                    goto cleanup;
                }
                *long_string_out = string_buf;
            } else {
                retval = READSTAT_ERROR_PARSE;
                goto cleanup;
            }
            break;
        } else {
            if (readstat_lseek(fd, header.len, SEEK_CUR) == -1) {
                retval = READSTAT_ERROR_SEEK;
                goto cleanup;
            }
        }
    }

cleanup:
    return retval;
}
Esempio n. 7
0
readstat_error_t sas_read_header(int fd, sas_header_info_t *ctx, 
        readstat_error_handler error_handler, void *user_ctx) {
    sas_header_start_t  header_start;
    sas_header_end_t    header_end;
    int retval = READSTAT_OK;
    char error_buf[1024];
    if (read(fd, &header_start, sizeof(sas_header_start_t)) < sizeof(sas_header_start_t)) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }
    if (memcmp(header_start.magic, sas7bdat_magic_number, sizeof(sas7bdat_magic_number)) != 0 &&
            memcmp(header_start.magic, sas7bcat_magic_number, sizeof(sas7bcat_magic_number)) != 0) {
        retval = READSTAT_ERROR_PARSE;
        goto cleanup;
    }
    if (header_start.a1 == SAS_ALIGNMENT_OFFSET_4) {
        ctx->pad1 = 4;
    }
    if (header_start.a2 == SAS_ALIGNMENT_OFFSET_4) {
        ctx->u64 = 1;
    }
    int bswap = 0;
    if (header_start.endian == SAS_ENDIAN_BIG) {
        bswap = machine_is_little_endian();
        ctx->little_endian = 0;
    } else if (header_start.endian == SAS_ENDIAN_LITTLE) {
        bswap = !machine_is_little_endian();
        ctx->little_endian = 1;
    } else {
        retval = READSTAT_ERROR_PARSE;
        goto cleanup;
    }
    int i;
    for (i=0; i<sizeof(_charset_table)/sizeof(_charset_table[0]); i++) {
        if (header_start.encoding == _charset_table[i].code) {
            ctx->encoding = _charset_table[i].name;
            break;
        }
    }
    if (ctx->encoding == NULL) {
        if (error_handler) {
            snprintf(error_buf, sizeof(error_buf), "Unsupported character set code: %d\n", header_start.encoding);
            error_handler(error_buf, user_ctx);
        }
        retval = READSTAT_ERROR_UNSUPPORTED_CHARSET;
        goto cleanup;
    }
    if (readstat_lseek(fd, 196 + ctx->pad1, SEEK_SET) == -1) {
        retval = READSTAT_ERROR_SEEK;
        if (error_handler) {
            snprintf(error_buf, sizeof(error_buf), "ReadStat: Failed to seek to position %d\n", 196 + ctx->pad1);
            error_handler(error_buf, user_ctx);
        }
        goto cleanup;
    }

    uint32_t header_size, page_size;

    if (read(fd, &header_size, sizeof(uint32_t)) < sizeof(uint32_t)) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }
    if (read(fd, &page_size, sizeof(uint32_t)) < sizeof(uint32_t)) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }

    ctx->header_size = bswap ? byteswap4(header_size) : header_size;

    ctx->page_size = bswap ? byteswap4(page_size) : page_size;

    if (ctx->header_size < 1024) {
        retval = READSTAT_ERROR_PARSE;
        goto cleanup;
    }

    if (ctx->u64) {
        uint64_t page_count;
        if (read(fd, &page_count, sizeof(uint64_t)) < sizeof(uint64_t)) {
            retval = READSTAT_ERROR_READ;
            goto cleanup;
        }
        ctx->page_count = bswap ? byteswap8(page_count) : page_count;
    } else {
        uint32_t page_count;
        if (read(fd, &page_count, sizeof(uint32_t)) < sizeof(uint32_t)) {
            retval = READSTAT_ERROR_READ;
            goto cleanup;
        }
        ctx->page_count = bswap ? byteswap4(page_count) : page_count;
    }
    
    if (readstat_lseek(fd, 8, SEEK_CUR) == -1) {
        retval = READSTAT_ERROR_SEEK;
        if (error_handler) {
            snprintf(error_buf, sizeof(error_buf), "ReadStat: Failed to seek forward by %d\n", 8);
            error_handler(error_buf, user_ctx);
        }
        goto cleanup;
    }
    if (read(fd, &header_end, sizeof(sas_header_end_t)) < sizeof(sas_header_end_t)) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }
    if (strncmp(header_end.release, "9.0000M0", sizeof(header_end.release)) == 0) {
        /* A bit of a hack, but most SAS installations are running a minor update */
        ctx->vendor = READSTAT_VENDOR_STAT_TRANSFER;
    } else {
        ctx->vendor = READSTAT_VENDOR_SAS;
    }
    if (readstat_lseek(fd, ctx->header_size, SEEK_SET) == -1) {
        retval = READSTAT_ERROR_SEEK;
        if (error_handler) {
            snprintf(error_buf, sizeof(error_buf), "ReadStat: Failed to seek to position %lld\n", ctx->header_size);
            error_handler(error_buf, user_ctx);
        }
        goto cleanup;
    }

cleanup:
    return retval;
}
Esempio n. 8
0
readstat_error_t readstat_parse_sas7bcat(readstat_parser_t *parser, const char *filename, void *user_ctx) {
    readstat_error_t retval = READSTAT_OK;
    int64_t i;
    char *page = NULL;
    char *buffer = NULL;

    sas_catalog_ctx_t *ctx = calloc(1, sizeof(sas_catalog_ctx_t));
    sas_header_info_t *hinfo = calloc(1, sizeof(sas_header_info_t));

    ctx->block_pointers = malloc((ctx->block_pointers_capacity = 200) * sizeof(uint64_t));

    ctx->value_label_handler = parser->value_label_handler;
    ctx->user_ctx = user_ctx;

    if ((ctx->fd = readstat_open(filename)) == -1) {
        retval = READSTAT_ERROR_OPEN;
        goto cleanup;
    }

    if ((retval = sas_read_header(ctx->fd, hinfo, parser->error_handler, user_ctx)) != READSTAT_OK) {
        goto cleanup;
    }

    ctx->u64 = hinfo->u64;
    ctx->pad1 = hinfo->pad1;
    ctx->bswap = machine_is_little_endian() ^ hinfo->little_endian;
    ctx->header_size = hinfo->header_size;
    ctx->page_count = hinfo->page_count;
    ctx->page_size = hinfo->page_size;

    if (ctx->u64) {
        retval = READSTAT_ERROR_PARSE;
        goto cleanup;
    }

    if (strcmp(hinfo->encoding, "UTF-8") != 0 &&
            strcmp(hinfo->encoding, "US-ASCII") != 0) {
        iconv_t converter = iconv_open("UTF-8", hinfo->encoding);
        if (converter == (iconv_t)-1) {
            retval = READSTAT_ERROR_UNSUPPORTED_CHARSET;
            goto cleanup;
        }
        ctx->converter = converter;
    }

    if ((page = malloc(ctx->page_size)) == NULL) {
        retval = READSTAT_ERROR_MALLOC;
        goto cleanup;
    }
    if (readstat_lseek(ctx->fd, ctx->header_size+SAS_CATALOG_FIRST_INDEX_PAGE*ctx->page_size, SEEK_SET) == -1) {
        retval = READSTAT_ERROR_SEEK;
        goto cleanup;
    }
    if (read(ctx->fd, page, ctx->page_size) < ctx->page_size) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }

    sas_catalog_augment_index(&page[856+2*ctx->pad1], ctx->page_size - 856 - 2*ctx->pad1, ctx);

    // Pass 1 -- find the XLSR entries
    for (i=SAS_CATALOG_USELESS_PAGES; i<ctx->page_count; i++) {
        if (readstat_lseek(ctx->fd, ctx->header_size+i*ctx->page_size, SEEK_SET) == -1) {
            retval = READSTAT_ERROR_SEEK;
            goto cleanup;
        }
        if (read(ctx->fd, page, ctx->page_size) < ctx->page_size) {
            retval = READSTAT_ERROR_READ;
            goto cleanup;
        }
        if (memcmp(&page[16], "XLSR", sizeof("XLSR")-1) == 0) {
            sas_catalog_augment_index(&page[16], ctx->page_size - 16, ctx);
        }
    }

    // Pass 2 -- look up the individual block pointers
    for (i=0; i<ctx->block_pointers_used; i++) {
        int start_page = ctx->block_pointers[i] >> 32;
        int start_page_pos = (ctx->block_pointers[i]) & 0xFFFF;

        int buffer_len = sas_catalog_block_size(start_page, start_page_pos, ctx, &retval);
        if (buffer_len == -1) {
            goto cleanup;
        } else if (buffer_len == 0) {
            continue;
        }
        buffer = realloc(buffer, buffer_len);
        if ((retval = sas_catalog_read_block(buffer, buffer_len, start_page, start_page_pos, ctx)) != READSTAT_OK)
            goto cleanup;
        if ((retval = sas_catalog_parse_block(buffer, buffer_len, ctx)) != READSTAT_OK)
            goto cleanup;
    }

cleanup:
    if (page)
        free(page);
    if (buffer)
        free(buffer);
    if (ctx)
        sas_catalog_ctx_free(ctx);
    if (hinfo)
        free(hinfo);

    return retval;
}