static readstat_error_t sas7bcat_parse_block(const char *data, size_t data_size, sas7bcat_ctx_t *ctx) {
    readstat_error_t retval = READSTAT_OK;

    size_t pad = (data[2] & 0x08) ? 4 : 0; // might be 0x10, not sure

    int label_count_capacity = sas_read4(&data[38+pad], ctx->bswap);
    int label_count_used = sas_read4(&data[42+pad], ctx->bswap);
    char name[4*32+1];

    if ((retval = readstat_convert(name, sizeof(name), &data[8], 8, ctx->converter)) != READSTAT_OK)
        goto cleanup;

    if (pad) {
        pad += 16;
    }

    if ((data[2] & 0x80)) { // has long name
        retval = readstat_convert(name, sizeof(name), &data[106+pad], 32, ctx->converter);
        if (retval != READSTAT_OK)
            goto cleanup;
        pad += 32;
    }

    if ((retval = sas7bcat_parse_value_labels(&data[106+pad], data_size - 106 - pad, 
                    label_count_used, label_count_capacity, name, ctx)) != READSTAT_OK)
        goto cleanup;

cleanup:
    return retval;
}
static readstat_error_t sas7bcat_augment_index(const char *index, size_t len, sas7bcat_ctx_t *ctx) {
    const char *xlsr = index;
    readstat_error_t retval = READSTAT_OK;
    while (xlsr + ctx->xlsr_size <= index + len) {
        if (memcmp(xlsr, "XLSR", 4) != 0) // some block pointers seem to have 8 bytes of extra padding
            xlsr += 8;
        if (memcmp(xlsr, "XLSR", 4) != 0)
            break;

        if (xlsr[ctx->xlsr_O_offset] == 'O') {
            uint32_t page = 0, pos = 0;
            if (ctx->u64) {
                page = sas_read4(&xlsr[8], ctx->bswap);
                pos = sas_read4(&xlsr[16], ctx->bswap);
            } else {
                page = sas_read2(&xlsr[4], ctx->bswap);
                pos = sas_read2(&xlsr[8], ctx->bswap);
            }
            ctx->block_pointers[ctx->block_pointers_used++] = ((uint64_t)page << 32) + pos;
        }

        if (ctx->block_pointers_used == ctx->block_pointers_capacity) {
            ctx->block_pointers = readstat_realloc(ctx->block_pointers, (ctx->block_pointers_capacity *= 2) * sizeof(uint64_t));
            if (ctx->block_pointers == NULL) {
                retval = READSTAT_ERROR_MALLOC;
                goto cleanup;
            }
        }

        xlsr += ctx->xlsr_size;
    }
cleanup:
    return retval;
}
static readstat_error_t sas7bcat_read_block(char *buffer, size_t buffer_len,
        int start_page, int start_page_pos, sas7bcat_ctx_t *ctx) {
    readstat_error_t retval = READSTAT_OK;
    readstat_io_t *io = ctx->io;
    int next_page = start_page;
    int next_page_pos = start_page_pos;
    int link_count = 0;

    int chain_link_len = 0;
    int buffer_offset = 0;

    char chain_link[32];
    int chain_link_header_len = 16;
    if (ctx->u64) {
        chain_link_header_len = 32;
    }

    while (next_page > 0 && next_page_pos > 0 && next_page <= ctx->page_count && link_count++ < ctx->page_count) {
        if (io->seek(ctx->header_size+(next_page-1)*ctx->page_size+next_page_pos, READSTAT_SEEK_SET, io->io_ctx) == -1) {
            retval = READSTAT_ERROR_SEEK;
            goto cleanup;
        }
        if (io->read(chain_link, chain_link_header_len, io->io_ctx) < chain_link_header_len) {
            retval = READSTAT_ERROR_READ;
            goto cleanup;
        }
        if (ctx->u64) {
            next_page = sas_read4(&chain_link[0], ctx->bswap);
            next_page_pos = sas_read2(&chain_link[8], ctx->bswap);
            chain_link_len = sas_read2(&chain_link[10], ctx->bswap);
        } else {
            next_page = sas_read4(&chain_link[0], ctx->bswap);
            next_page_pos = sas_read2(&chain_link[4], ctx->bswap);
            chain_link_len = sas_read2(&chain_link[6], ctx->bswap);
        }
        if (buffer_offset + chain_link_len > buffer_len) {
            retval = READSTAT_ERROR_PARSE;
            goto cleanup;
        }
        if (io->read(buffer + buffer_offset, chain_link_len, io->io_ctx) < chain_link_len) {
            retval = READSTAT_ERROR_READ;
            goto cleanup;
        }
        buffer_offset += chain_link_len;
    }
cleanup:

    return retval;
}
Exemple #4
0
static readstat_error_t sas_parse_column_attributes_subheader(const char *subheader, size_t len, sas_ctx_t *ctx) {
    readstat_error_t retval = READSTAT_OK;
    size_t signature_len = ctx->u64 ? 8 : 4;
    int cmax = ctx->u64 ? (len-28)/16 : (len-20)/12;
    int i;
    const char *cap = &subheader[signature_len+8];
    uint16_t remainder = sas_read2(&subheader[signature_len], ctx->bswap);

    if (remainder != len - (4+2*signature_len)) {
        retval = READSTAT_ERROR_PARSE;
        goto cleanup;
    }
    ctx->col_attrs_count += cmax;
    if (ctx->col_info_count < ctx->col_attrs_count) {
        ctx->col_info_count = ctx->col_attrs_count;
        ctx->col_info = realloc(ctx->col_info, ctx->col_info_count * sizeof(col_info_t));
    }
    for (i=ctx->col_attrs_count-cmax; i<ctx->col_attrs_count; i++) {
        if (ctx->u64) {
            ctx->col_info[i].offset = sas_read8(&cap[0], ctx->bswap);
        } else {
            ctx->col_info[i].offset = sas_read4(&cap[0], ctx->bswap);
        }

        off_t off=4;
        if (ctx->u64)
            off=8;

        ctx->col_info[i].width = sas_read4(&cap[off], ctx->bswap);
        if (ctx->col_info[i].width > ctx->max_col_width)
            ctx->max_col_width = ctx->col_info[i].width;

        if (cap[off+6] == SAS_COLUMN_TYPE_NUM) {
            ctx->col_info[i].type = READSTAT_TYPE_DOUBLE;
        } else if (cap[off+6] == SAS_COLUMN_TYPE_CHR) {
            ctx->col_info[i].type = READSTAT_TYPE_STRING;
        } else {
            retval = READSTAT_ERROR_PARSE;
            goto cleanup;
        }
        ctx->col_info[i].index = i;
        cap += off+8;
    }

cleanup:

    return retval;
}
static readstat_error_t sas7bcat_read_block(char *buffer, size_t buffer_len,
        int start_page, int start_page_pos, sas7bcat_ctx_t *ctx) {
    readstat_error_t retval = READSTAT_OK;
    readstat_io_t *io = ctx->io;
    int next_page = start_page;
    int next_page_pos = start_page_pos;

    int chain_link_len = 0;
    int buffer_offset = 0;

    char chain_link[16];

    while (next_page > 0 && next_page_pos > 0) {
        if (io->seek(ctx->header_size+(next_page-1)*ctx->page_size+next_page_pos, READSTAT_SEEK_SET, io->io_ctx) == -1) {
            retval = READSTAT_ERROR_SEEK;
            goto cleanup;
        }
        if (io->read(chain_link, sizeof(chain_link), io->io_ctx) < sizeof(chain_link)) {
            retval = READSTAT_ERROR_READ;
            goto cleanup;
        }
        next_page = sas_read4(&chain_link[0], ctx->bswap);
        next_page_pos = sas_read2(&chain_link[4], ctx->bswap);
        chain_link_len = sas_read2(&chain_link[6], ctx->bswap);
        if (io->read(buffer + buffer_offset, chain_link_len, io->io_ctx) < chain_link_len) {
            retval = READSTAT_ERROR_READ;
            goto cleanup;
        }
        buffer_offset += chain_link_len;
    }
cleanup:

    return retval;
}
static int sas7bcat_block_size(int start_page, int start_page_pos, sas7bcat_ctx_t *ctx, readstat_error_t *outError) {
    readstat_error_t retval = READSTAT_OK;
    readstat_io_t *io = ctx->io;
    int next_page = start_page;
    int next_page_pos = start_page_pos;

    int buffer_len = 0;
    int chain_link_len = 0;

    char chain_link[16];

    // calculate buffer size needed
    while (next_page > 0 && next_page_pos > 0 && next_page <= ctx->page_count) {
        if (io->seek(ctx->header_size+(next_page-1)*ctx->page_size+next_page_pos, READSTAT_SEEK_SET, io->io_ctx) == -1) {
            retval = READSTAT_ERROR_SEEK;
            goto cleanup;
        }
        if (io->read(chain_link, sizeof(chain_link), io->io_ctx) < sizeof(chain_link)) {
            retval = READSTAT_ERROR_READ;
            goto cleanup;
        }
        next_page = sas_read4(&chain_link[0], ctx->bswap);
        next_page_pos = sas_read2(&chain_link[4], ctx->bswap);
        chain_link_len = sas_read2(&chain_link[6], ctx->bswap);

        buffer_len += chain_link_len;
    }

cleanup:
    if (outError)
        *outError = retval;

    return retval == READSTAT_OK ? buffer_len : -1;
}
static readstat_error_t sas_catalog_read_block(char *buffer, size_t buffer_len, int start_page, int start_page_pos, sas_catalog_ctx_t *ctx) {
    readstat_error_t retval = READSTAT_OK;
    int next_page = start_page;
    int next_page_pos = start_page_pos;

    int block_len = 0;
    int buffer_offset = 0;

    char *page = malloc(16);

    while (next_page > 0 && next_page_pos > 0) {
        if (readstat_lseek(ctx->fd, ctx->header_size+(next_page-1)*ctx->page_size+next_page_pos, SEEK_SET) == -1) {
            retval = READSTAT_ERROR_SEEK;
            goto cleanup;
        }
        if (read(ctx->fd, page, 16) < 16) {
            retval = READSTAT_ERROR_READ;
            goto cleanup;
        }
        next_page = sas_read4(&page[0], ctx->bswap);
        next_page_pos = sas_read2(&page[4], ctx->bswap);
        block_len = sas_read2(&page[6], ctx->bswap);
        if (read(ctx->fd, buffer + buffer_offset, block_len) < block_len) {
            retval = READSTAT_ERROR_READ;
            goto cleanup;
        }
        buffer_offset += block_len;
    }
cleanup:
    if (page)
        free(page);

    return retval;
}
static int sas_catalog_block_size(int start_page, int start_page_pos, sas_catalog_ctx_t *ctx, readstat_error_t *outError) {
    readstat_error_t retval = READSTAT_OK;
    int next_page = start_page;
    int next_page_pos = start_page_pos;

    int buffer_len = 0;
    int block_len = 0;

    char *page = malloc(16);

    // calculate buffer size needed
    while (next_page > 0 && next_page_pos > 0) {
        if (readstat_lseek(ctx->fd, ctx->header_size+(next_page-1)*ctx->page_size+next_page_pos, SEEK_SET) == -1) {
            retval = READSTAT_ERROR_SEEK;
            goto cleanup;
        }
        if (read(ctx->fd, page, 16) < 16) {
            retval = READSTAT_ERROR_READ;
            goto cleanup;
        }
        next_page = sas_read4(&page[0], ctx->bswap);
        next_page_pos = sas_read2(&page[4], ctx->bswap);
        block_len = sas_read2(&page[6], ctx->bswap);

        buffer_len += block_len;
    }

cleanup:
    if (outError)
        *outError = retval;
    if (page)
        free(page);

    return retval == READSTAT_OK ? buffer_len : -1;
}
Exemple #9
0
static readstat_error_t sas_parse_row_size_subheader(const char *subheader, size_t len, sas_ctx_t *ctx) {
    readstat_error_t retval = READSTAT_OK;
    uint64_t total_row_count;
    uint64_t row_length, page_row_count;

    if (ctx->u64) {
        row_length = sas_read8(&subheader[40], ctx->bswap);
        total_row_count = sas_read8(&subheader[48], ctx->bswap);
        page_row_count = sas_read8(&subheader[120], ctx->bswap);
    } else {
        row_length = sas_read4(&subheader[20], ctx->bswap);
        total_row_count = sas_read4(&subheader[24], ctx->bswap);
        page_row_count = sas_read4(&subheader[60], ctx->bswap);
    }

    ctx->row_length = row_length;
    ctx->page_row_count = page_row_count;
    if (ctx->row_limit == 0 || total_row_count < ctx->row_limit)
        ctx->row_limit = total_row_count;

    return retval;
}
Exemple #10
0
static readstat_error_t sas_parse_column_size_subheader(const char *subheader, size_t len, sas_ctx_t *ctx) {
    readstat_error_t retval = READSTAT_OK;

    uint64_t col_count;

    if (ctx->u64) {
        col_count = sas_read8(&subheader[8], ctx->bswap);
    } else {
        col_count = sas_read4(&subheader[4], ctx->bswap);
    }

    ctx->column_count = col_count;

    return retval;
}
static readstat_error_t sas7bcat_parse_value_labels(const char *value_start, size_t value_labels_len, 
        int label_count_used, int label_count_capacity, const char *name, sas7bcat_ctx_t *ctx) {
    readstat_error_t retval = READSTAT_OK;
    int i;
    const char *lbp1 = value_start;
    uint32_t *value_offset = calloc(label_count_used, sizeof(uint32_t));
    /* Doubles appear to be stored as big-endian, always */
    int bswap_doubles = machine_is_little_endian();
    int is_string = (name[0] == '$');

    /* Pass 1 -- find out the offset of the labels */
    for (i=0; i<label_count_capacity; i++) {
        if (&lbp1[2] - value_start > value_labels_len) {
            retval = READSTAT_ERROR_PARSE;
            goto cleanup;
        }
        if (i<label_count_used) {
            uint32_t label_pos = sas_read4(&lbp1[10+ctx->pad1], ctx->bswap);
            if (label_pos >= label_count_used) {
                retval = READSTAT_ERROR_PARSE;
                goto cleanup;
            }
            value_offset[label_pos] = lbp1 - value_start;
        }
        lbp1 += 6 + lbp1[2];
    }

    const char *lbp2 = lbp1;

    /* Pass 2 -- parse pairs of values & labels */
    for (i=0; i<label_count_used && i<label_count_capacity; i++) {
        lbp1 = value_start + value_offset[i];

        if (&lbp1[30] - value_start > value_labels_len ||
                &lbp2[10] - value_start > value_labels_len) {
            retval = READSTAT_ERROR_PARSE;
            goto cleanup;
        }
        size_t label_len = sas_read2(&lbp2[8], ctx->bswap);
        size_t value_entry_len = 6 + lbp1[2];
        const char *label = &lbp2[10];
        readstat_value_t value = { .type = is_string ? READSTAT_TYPE_STRING : READSTAT_TYPE_DOUBLE };
        if (is_string) {
            char val[4*16+1];
            retval = readstat_convert(val, sizeof(val), &lbp1[value_entry_len-16], 16, ctx->converter);
            if (retval != READSTAT_OK)
                goto cleanup;

            value.v.string_value = val;
        } else {
            uint64_t val = sas_read8(&lbp1[22], bswap_doubles);
            double dval = NAN;
            if ((val | 0xFF0000000000) == 0xFFFFFFFFFFFF) {
                value.tag = (val >> 40);
                if (value.tag) {
                    value.is_tagged_missing = 1;
                } else {
                    value.is_system_missing = 1;
                }
            } else {
                memcpy(&dval, &val, 8);
                dval *= -1.0;
            }

            value.v.double_value = dval;
        }
        if (ctx->value_label_handler) {
            if (ctx->value_label_handler(name, value, label, ctx->user_ctx) != READSTAT_HANDLER_OK) {
                retval = READSTAT_ERROR_USER_ABORT;
                goto cleanup;
            }
        }

        lbp2 += 8 + 2 + label_len + 1;
    }