コード例 #1
0
ファイル: readstat_dta_read.c プロジェクト: krlmlr/haven
static void dta_interpret_strl_vo_bytes(dta_ctx_t *ctx, unsigned char *vo_bytes, dta_strl_t *strl) {
    int file_is_big_endian = (!machine_is_little_endian() ^ ctx->machine_needs_byte_swap);

    if (ctx->strl_v_len == 2) {
        if (file_is_big_endian) {
            strl->v = (vo_bytes[0] << 8) + vo_bytes[1];
            strl->o = (((uint64_t)vo_bytes[2] << 40) 
                    + ((uint64_t)vo_bytes[3] << 32)
                    + (vo_bytes[4] << 24) 
                    + (vo_bytes[5] << 16)
                    + (vo_bytes[6] << 8) 
                    + vo_bytes[7]);
        } else {
            strl->v = vo_bytes[0] + (vo_bytes[1] << 8);
            strl->o = (vo_bytes[2] + (vo_bytes[3] << 8)
                    + (vo_bytes[4] << 16) + (vo_bytes[5] << 24)
                    + ((uint64_t)vo_bytes[6] << 32) 
                    + ((uint64_t)vo_bytes[7] << 40));
        }
    } else if (ctx->strl_v_len == 4) {
        uint32_t v, o;

        memcpy(&v, &vo_bytes[0], sizeof(uint32_t));
        memcpy(&o, &vo_bytes[4], sizeof(uint32_t));

        strl->v = ctx->machine_needs_byte_swap ? byteswap4(v) : v;
        strl->o = ctx->machine_needs_byte_swap ? byteswap4(o) : o;
    }
}
コード例 #2
0
ファイル: readstat_xport_read.c プロジェクト: ngr-t/smuggler
static readstat_error_t xport_read_labels_v8(xport_ctx_t *ctx, int label_count) {
    readstat_error_t retval = READSTAT_OK;
    uint16_t labeldef[3];
    int i;
    for (i=0; i<label_count; i++) {
        int index, name_len, label_len;
        if (read_bytes(ctx, labeldef, sizeof(labeldef)) != sizeof(labeldef)) {
            retval = READSTAT_ERROR_READ;
            goto cleanup;
        }

        if (machine_is_little_endian()) {
            index = byteswap2(labeldef[0]);
            name_len = byteswap2(labeldef[1]);
            label_len = byteswap2(labeldef[2]);
        } else {
            index = labeldef[0];
            name_len = labeldef[1];
            label_len = labeldef[2];
        }

        if (index >= ctx->var_count) {
            retval = READSTAT_ERROR_PARSE;
            goto cleanup;
        }

        char name[name_len];
        char label[label_len];
        readstat_variable_t *variable = ctx->variables[index];

        if (read_bytes(ctx, name, name_len) != name_len ||
                read_bytes(ctx, label, label_len) != label_len) {
            retval = READSTAT_ERROR_READ;
            goto cleanup;
        }

        retval = readstat_convert(variable->name, sizeof(variable->name),
                name, name_len,  NULL);
        if (retval != READSTAT_OK)
            goto cleanup;

        retval = readstat_convert(variable->label, sizeof(variable->label),
                label, label_len,  NULL);
        if (retval != READSTAT_OK)
            goto cleanup;
    }

    retval = xport_skip_rest_of_record(ctx);
    if (retval != READSTAT_OK)
        goto cleanup;

    retval = xport_read_obs_header_record(ctx);
    if (retval != READSTAT_OK)
        goto cleanup;

cleanup:
    return retval;
}
コード例 #3
0
ファイル: readstat_dta_write.c プロジェクト: scottme/ReadStat
static readstat_error_t dta_begin_data(void *writer_ctx) {
    readstat_writer_t *writer = (readstat_writer_t *)writer_ctx;
    readstat_error_t error = READSTAT_OK;
    
    dta_ctx_t *ctx = dta_ctx_alloc(NULL);
    dta_header_t header;
    memset(&header, 0, sizeof(dta_header_t));

    header.ds_format = 111;
    header.byteorder = machine_is_little_endian() ? DTA_LOHI : DTA_HILO;
    header.filetype  = 0x01;
    header.unused    = 0x00;
    header.nvar      = writer->variables_count;
    header.nobs      = writer->row_count;

    error = readstat_write_bytes(writer, &header, sizeof(dta_header_t));
    if (error != READSTAT_OK)
        goto cleanup;
    
    error = dta_ctx_init(ctx, header.nvar, header.nobs, header.byteorder, header.ds_format);
    if (error != READSTAT_OK)
        goto cleanup;
    
    error = dta_emit_header_data_label(writer);
    if (error != READSTAT_OK)
        goto cleanup;

    error = dta_emit_header_time_stamp(writer);
    if (error != READSTAT_OK)
        goto cleanup;

    error = dta_emit_descriptors(writer, ctx);
    if (error != READSTAT_OK)
        goto cleanup;

    error = dta_emit_variable_labels(writer, ctx);
    if (error != READSTAT_OK)
        goto cleanup;

    error = dta_emit_expansion_fields(writer, ctx);
    if (error != READSTAT_OK)
        goto cleanup;

cleanup:
    if (error != READSTAT_OK) {
        dta_ctx_free(ctx);
    } else {
        writer->module_ctx = ctx;
    }
    
    return error;
}
コード例 #4
0
void xport_namestr_bswap(xport_namestr_t *namestr) {
    if (!machine_is_little_endian())
        return;

    namestr->ntype = byteswap2(namestr->ntype);
    namestr->nhfun = byteswap2(namestr->nhfun);
    namestr->nlng = byteswap2(namestr->nlng);
    namestr->nvar0 = byteswap2(namestr->nlng);

    namestr->nfl = byteswap2(namestr->nfl);
    namestr->nfd = byteswap2(namestr->nfd);
    namestr->nfj = byteswap2(namestr->nfj);

    namestr->nifl = byteswap2(namestr->nifl);
    namestr->nifd = byteswap2(namestr->nifd);
    namestr->npos = byteswap4(namestr->npos);

    namestr->labeln  = byteswap2(namestr->labeln);
}
コード例 #5
0
ファイル: readstat_rdata.c プロジェクト: karissa/ReadStat
rdata_ctx_t *init_rdata_ctx(const char *filename) {
    int fd = readstat_open(filename);
    if (fd == -1) {
        return NULL;
    }
    rdata_ctx_t *ctx = calloc(1, sizeof(rdata_ctx_t));
    rdata_atom_table_t *atom_table = malloc(sizeof(rdata_atom_table_t));
    
    atom_table->count = 0;
    atom_table->data = NULL;

    ctx->atom_table = atom_table;

    ctx->machine_needs_byteswap = 0;
    if (machine_is_little_endian()) {
        ctx->machine_needs_byteswap = 1;
    }

    ctx->fd = fd;
    
    return ctx;
}
コード例 #6
0
ファイル: readstat_dta.c プロジェクト: WizardMac/ReadStat
readstat_error_t dta_ctx_init(dta_ctx_t *ctx, uint32_t nvar, uint64_t nobs,
        unsigned char byteorder, unsigned char ds_format,
        const char *input_encoding, const char *output_encoding) {
    readstat_error_t retval = READSTAT_OK;
    int machine_byteorder = DTA_HILO;
    if (ds_format < DTA_MIN_VERSION || ds_format > DTA_MAX_VERSION)
        return READSTAT_ERROR_UNSUPPORTED_FILE_FORMAT_VERSION;

    if (machine_is_little_endian()) {
        machine_byteorder = DTA_LOHI;
    }

    ctx->bswap = (byteorder != machine_byteorder);
    ctx->ds_format = ds_format;
    ctx->endianness = byteorder == DTA_LOHI ? READSTAT_ENDIAN_LITTLE : READSTAT_ENDIAN_BIG;

    ctx->nvar = nvar;
    ctx->nobs = nobs;

    if (ctx->nvar) {
        if ((ctx->variables = readstat_calloc(ctx->nvar, sizeof(readstat_variable_t *))) == NULL) {
            retval = READSTAT_ERROR_MALLOC;
            goto cleanup;
        }
    }

    ctx->machine_is_twos_complement = READSTAT_MACHINE_IS_TWOS_COMPLEMENT;

    if (ds_format < 105) {
        ctx->fmtlist_entry_len = 7;
    } else if (ds_format < 114) {
        ctx->fmtlist_entry_len = 12;
    } else if (ds_format < 118) {
        ctx->fmtlist_entry_len = 49;
    } else {
        ctx->fmtlist_entry_len = 57;
    }
    
    if (ds_format >= 117) {
        ctx->typlist_version = 117;
    } else if (ds_format >= 111) {
        ctx->typlist_version = 111;
    } else {
        ctx->typlist_version = 0;
    }

    if (ds_format >= 118) {
        ctx->data_label_len_len = 2;
        ctx->strl_v_len = 2;
        ctx->strl_o_len = 6;
    } else if (ds_format >= 117) {
        ctx->data_label_len_len = 1;
        ctx->strl_v_len = 4;
        ctx->strl_o_len = 4;
    }

    if (ds_format < 105) {
        ctx->expansion_len_len = 0;
    } else if (ds_format < 110) {
        ctx->expansion_len_len = 2;
    } else {
        ctx->expansion_len_len = 4;
    }
    
    if (ds_format < 110) {
        ctx->lbllist_entry_len = 9;
        ctx->variable_name_len = 9;
        ctx->ch_metadata_len = 9;
    } else if (ds_format < 118) {
        ctx->lbllist_entry_len = 33;
        ctx->variable_name_len = 33;
        ctx->ch_metadata_len = 33;
    } else {
        ctx->lbllist_entry_len = 129;
        ctx->variable_name_len = 129;
        ctx->ch_metadata_len = 129;
    }

    if (ds_format < 108) {
        ctx->variable_labels_entry_len = 32;
        ctx->data_label_len = 32;
    } else if (ds_format < 118) {
        ctx->variable_labels_entry_len = 81;
        ctx->data_label_len = 81;
    } else {
        ctx->variable_labels_entry_len = 321;
        ctx->data_label_len = 321;
    }

    if (ds_format < 105) {
        ctx->timestamp_len = 0;
        ctx->value_label_table_len_len = 2;
        ctx->value_label_table_labname_len = 12;
        ctx->value_label_table_padding_len = 2;
    } else {
        ctx->timestamp_len = 18;
        ctx->value_label_table_len_len = 4;
        if (ds_format < 118) {
            ctx->value_label_table_labname_len = 33;
        } else {
            ctx->value_label_table_labname_len = 129;
        }
        ctx->value_label_table_padding_len = 3;
    }

    if (ds_format < 117) {
        ctx->typlist_entry_len = 1;
        ctx->file_is_xmlish = 0;
    } else {
        ctx->typlist_entry_len = 2;
        ctx->file_is_xmlish = 1;
    }

    if (ds_format < 113) {
        ctx->max_int8 = DTA_OLD_MAX_INT8;
        ctx->max_int16 = DTA_OLD_MAX_INT16;
        ctx->max_int32 = DTA_OLD_MAX_INT32;
        ctx->max_float = DTA_OLD_MAX_FLOAT;
        ctx->max_double = DTA_OLD_MAX_DOUBLE;
    } else {
        ctx->max_int8 = DTA_113_MAX_INT8;
        ctx->max_int16 = DTA_113_MAX_INT16;
        ctx->max_int32 = DTA_113_MAX_INT32;
        ctx->max_float = DTA_113_MAX_FLOAT;
        ctx->max_double = DTA_113_MAX_DOUBLE;

        ctx->supports_tagged_missing = 1;
    }

    if (output_encoding) {
        if (input_encoding) {
            ctx->converter = iconv_open(output_encoding, input_encoding);
        } else if (ds_format < 118) {
            ctx->converter = iconv_open(output_encoding, "WINDOWS-1252");
        } else if (strcmp(output_encoding, "UTF-8") != 0) {
            ctx->converter = iconv_open(output_encoding, "UTF-8");
        }
        if (ctx->converter == (iconv_t)-1) {
            ctx->converter = NULL;
            retval = READSTAT_ERROR_UNSUPPORTED_CHARSET;
            goto cleanup;
        }
    }

    ctx->srtlist_len = (ctx->nvar + 1) * sizeof(int16_t);
    if ((ctx->srtlist = readstat_malloc(ctx->srtlist_len)) == NULL) {
        retval = READSTAT_ERROR_MALLOC;
        goto cleanup;
    }

    if (ctx->nvar > 0) {
        ctx->typlist_len = ctx->nvar * sizeof(uint16_t);
        ctx->varlist_len = ctx->variable_name_len * ctx->nvar * sizeof(char);
        ctx->fmtlist_len = ctx->fmtlist_entry_len * ctx->nvar * sizeof(char);
        ctx->lbllist_len = ctx->lbllist_entry_len * ctx->nvar * sizeof(char);
        ctx->variable_labels_len = ctx->variable_labels_entry_len * ctx->nvar * sizeof(char);

        if ((ctx->typlist = readstat_malloc(ctx->typlist_len)) == NULL) {
            retval = READSTAT_ERROR_MALLOC;
            goto cleanup;
        }
        if ((ctx->varlist = readstat_malloc(ctx->varlist_len)) == NULL) {
            retval = READSTAT_ERROR_MALLOC;
            goto cleanup;
        }
        if ((ctx->fmtlist = readstat_malloc(ctx->fmtlist_len)) == NULL) {
            retval = READSTAT_ERROR_MALLOC;
            goto cleanup;
        }
        if ((ctx->lbllist = readstat_malloc(ctx->lbllist_len)) == NULL) {
            retval = READSTAT_ERROR_MALLOC;
            goto cleanup;
        }
        if ((ctx->variable_labels = readstat_malloc(ctx->variable_labels_len)) == NULL) {
            retval = READSTAT_ERROR_MALLOC;
            goto cleanup;
        }
    }

    ctx->initialized = 1;

cleanup:
    return retval;
}
コード例 #7
0
static readstat_error_t sas7bcat_parse_value_labels(const char *value_start, size_t value_labels_len, 
        int label_count_used, int label_count_capacity, const char *name, sas7bcat_ctx_t *ctx) {
    readstat_error_t retval = READSTAT_OK;
    int i;
    const char *lbp1 = value_start;
    uint32_t *value_offset = calloc(label_count_used, sizeof(uint32_t));
    /* Doubles appear to be stored as big-endian, always */
    int bswap_doubles = machine_is_little_endian();
    int is_string = (name[0] == '$');

    /* Pass 1 -- find out the offset of the labels */
    for (i=0; i<label_count_capacity; i++) {
        if (&lbp1[2] - value_start > value_labels_len) {
            retval = READSTAT_ERROR_PARSE;
            goto cleanup;
        }
        if (i<label_count_used) {
            uint32_t label_pos = sas_read4(&lbp1[10+ctx->pad1], ctx->bswap);
            if (label_pos >= label_count_used) {
                retval = READSTAT_ERROR_PARSE;
                goto cleanup;
            }
            value_offset[label_pos] = lbp1 - value_start;
        }
        lbp1 += 6 + lbp1[2];
    }

    const char *lbp2 = lbp1;

    /* Pass 2 -- parse pairs of values & labels */
    for (i=0; i<label_count_used && i<label_count_capacity; i++) {
        lbp1 = value_start + value_offset[i];

        if (&lbp1[30] - value_start > value_labels_len ||
                &lbp2[10] - value_start > value_labels_len) {
            retval = READSTAT_ERROR_PARSE;
            goto cleanup;
        }
        size_t label_len = sas_read2(&lbp2[8], ctx->bswap);
        size_t value_entry_len = 6 + lbp1[2];
        const char *label = &lbp2[10];
        readstat_value_t value = { .type = is_string ? READSTAT_TYPE_STRING : READSTAT_TYPE_DOUBLE };
        if (is_string) {
            char val[4*16+1];
            retval = readstat_convert(val, sizeof(val), &lbp1[value_entry_len-16], 16, ctx->converter);
            if (retval != READSTAT_OK)
                goto cleanup;

            value.v.string_value = val;
        } else {
            uint64_t val = sas_read8(&lbp1[22], bswap_doubles);
            double dval = NAN;
            if ((val | 0xFF0000000000) == 0xFFFFFFFFFFFF) {
                value.tag = (val >> 40);
                if (value.tag) {
                    value.is_tagged_missing = 1;
                } else {
                    value.is_system_missing = 1;
                }
            } else {
                memcpy(&dval, &val, 8);
                dval *= -1.0;
            }

            value.v.double_value = dval;
        }
        if (ctx->value_label_handler) {
            if (ctx->value_label_handler(name, value, label, ctx->user_ctx) != READSTAT_HANDLER_OK) {
                retval = READSTAT_ERROR_USER_ABORT;
                goto cleanup;
            }
        }

        lbp2 += 8 + 2 + label_len + 1;
    }
コード例 #8
0
readstat_error_t readstat_parse_sas7bcat(readstat_parser_t *parser, const char *path, void *user_ctx) {
    readstat_error_t retval = READSTAT_OK;
    readstat_io_t *io = parser->io;
    int64_t i;
    char *page = NULL;
    char *buffer = NULL;

    sas7bcat_ctx_t *ctx = calloc(1, sizeof(sas7bcat_ctx_t));
    sas_header_info_t *hinfo = calloc(1, sizeof(sas_header_info_t));

    ctx->block_pointers = malloc((ctx->block_pointers_capacity = 200) * sizeof(uint64_t));

    ctx->value_label_handler = parser->value_label_handler;
    ctx->metadata_handler = parser->metadata_handler;
    ctx->input_encoding = parser->input_encoding;
    ctx->output_encoding = parser->output_encoding;
    ctx->user_ctx = user_ctx;
    ctx->io = io;

    if (io->open(path, io->io_ctx) == -1) {
        retval = READSTAT_ERROR_OPEN;
        goto cleanup;
    }

    if ((retval = sas_read_header(io, hinfo, parser->error_handler, user_ctx)) != READSTAT_OK) {
        goto cleanup;
    }

    ctx->u64 = hinfo->u64;
    ctx->pad1 = hinfo->pad1;
    ctx->bswap = machine_is_little_endian() ^ hinfo->little_endian;
    ctx->header_size = hinfo->header_size;
    ctx->page_count = hinfo->page_count;
    ctx->page_size = hinfo->page_size;
    if (ctx->input_encoding == NULL) {
        ctx->input_encoding = hinfo->encoding;
    }

    if (ctx->u64) {
        retval = READSTAT_ERROR_PARSE;
        goto cleanup;
    }

    if (ctx->input_encoding && ctx->output_encoding && strcmp(ctx->input_encoding, ctx->output_encoding) != 0) {
        iconv_t converter = iconv_open(ctx->output_encoding, ctx->input_encoding);
        if (converter == (iconv_t)-1) {
            retval = READSTAT_ERROR_UNSUPPORTED_CHARSET;
            goto cleanup;
        }
        ctx->converter = converter;
    }

    if (parser->metadata_handler) {
        char file_label[4*64+1];
        retval = readstat_convert(file_label, sizeof(file_label), 
                hinfo->file_label, sizeof(hinfo->file_label), ctx->converter);
        if (retval != READSTAT_OK)
            goto cleanup;

        if (ctx->metadata_handler(file_label, hinfo->modification_time, 
                    10000 * hinfo->major_version + hinfo->minor_version, ctx->user_ctx) != READSTAT_HANDLER_OK) {
            retval = READSTAT_ERROR_USER_ABORT;
            goto cleanup;
        }
    }

    if ((page = malloc(ctx->page_size)) == NULL) {
        retval = READSTAT_ERROR_MALLOC;
        goto cleanup;
    }
    if (io->seek(ctx->header_size+SAS_CATALOG_FIRST_INDEX_PAGE*ctx->page_size, READSTAT_SEEK_SET, io->io_ctx) == -1) {
        retval = READSTAT_ERROR_SEEK;
        goto cleanup;
    }
    if (io->read(page, ctx->page_size, io->io_ctx) < ctx->page_size) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }

    sas7bcat_augment_index(&page[856+2*ctx->pad1], ctx->page_size - 856 - 2*ctx->pad1, ctx);

    // Pass 1 -- find the XLSR entries
    for (i=SAS_CATALOG_USELESS_PAGES; i<ctx->page_count; i++) {
        if (io->seek(ctx->header_size+i*ctx->page_size, READSTAT_SEEK_SET, io->io_ctx) == -1) {
            retval = READSTAT_ERROR_SEEK;
            goto cleanup;
        }
        if (io->read(page, ctx->page_size, io->io_ctx) < ctx->page_size) {
            retval = READSTAT_ERROR_READ;
            goto cleanup;
        }
        if (memcmp(&page[16], "XLSR", sizeof("XLSR")-1) == 0) {
            sas7bcat_augment_index(&page[16], ctx->page_size - 16, ctx);
        }
    }

    sas7bcat_sort_index(ctx);
    sas7bcat_uniq_index(ctx);

    // Pass 2 -- look up the individual block pointers
    for (i=0; i<ctx->block_pointers_used; i++) {
        int start_page = ctx->block_pointers[i] >> 32;
        int start_page_pos = (ctx->block_pointers[i]) & 0xFFFF;

        int buffer_len = sas7bcat_block_size(start_page, start_page_pos, ctx, &retval);
        if (buffer_len == -1) {
            goto cleanup;
        } else if (buffer_len == 0) {
            continue;
        }
        buffer = realloc(buffer, buffer_len);
        if ((retval = sas7bcat_read_block(buffer, buffer_len, start_page, start_page_pos, ctx)) != READSTAT_OK)
            goto cleanup;
        if ((retval = sas7bcat_parse_block(buffer, buffer_len, ctx)) != READSTAT_OK)
            goto cleanup;
    }

cleanup:
    io->close(io->io_ctx);
    if (page)
        free(page);
    if (buffer)
        free(buffer);
    if (ctx)
        sas7bcat_ctx_free(ctx);
    if (hinfo)
        free(hinfo);

    return retval;
}
コード例 #9
0
static sas7bcat_block_t *sas7bcat_block_for_label_set(readstat_label_set_t *r_label_set) {
    size_t len = 0;
    size_t name_len = strlen(r_label_set->name);
    int j;
    char name[32];

    len += 106;

    if (name_len > 8) {
        len += 32; // long name
        if (name_len > 32) {
            name_len = 32;
        }
    }

    memcpy(&name[0], r_label_set->name, name_len);

    for (j=0; j<r_label_set->value_labels_count; j++) {
        readstat_value_label_t *value_label = readstat_get_value_label(r_label_set, j);
        len += 30; // Value: 14-byte header + 16-byte padded value

        len += 8 + 2 + value_label->label_len + 1;
    }

    sas7bcat_block_t *block = calloc(1, sizeof(sas7bcat_block_t) + len);
    block->len = len;

    off_t begin = 106;
    int32_t count = r_label_set->value_labels_count;
    memcpy(&block->data[38], &count, sizeof(int32_t));
    memcpy(&block->data[42], &count, sizeof(int32_t));
    if (name_len > 8) {
        block->data[2] = (char)0x80;
        memcpy(&block->data[8], name, 8);

        memset(&block->data[106], ' ', 32);
        memcpy(&block->data[106], name, name_len);

        begin += 32;
    } else {
        memset(&block->data[8], ' ', 8);
        memcpy(&block->data[8], name, name_len);
    }

    char *lbp1 = &block->data[begin];
    char *lbp2 = &block->data[begin+r_label_set->value_labels_count*30];

    for (j=0; j<r_label_set->value_labels_count; j++) {
        readstat_value_label_t *value_label = readstat_get_value_label(r_label_set, j);
        lbp1[2] = 24; // size - 6
        int32_t index = j;
        memcpy(&lbp1[10], &index, sizeof(int32_t));
        if (r_label_set->type == READSTAT_TYPE_STRING) {
            size_t string_len = value_label->string_key_len;
            if (string_len > 16)
                string_len = 16;
            memset(&lbp1[14], ' ', 16);
            memcpy(&lbp1[14], value_label->string_key, string_len);
        } else {
            uint64_t big_endian_value;
            double double_value = -1.0 * value_label->double_key;
            memcpy(&big_endian_value, &double_value, sizeof(double));
            if (machine_is_little_endian()) {
                big_endian_value = byteswap8(big_endian_value);
            }
            memcpy(&lbp1[22], &big_endian_value, sizeof(uint64_t));
        }

        int16_t label_len = value_label->label_len;
        memcpy(&lbp2[8], &label_len, sizeof(int16_t));
        memcpy(&lbp2[10], value_label->label, label_len);

        lbp1 += 30;
        lbp2 += 8 + 2 + value_label->label_len + 1;
    }

    return block;
}
コード例 #10
0
        int16_t label_len = value_label->label_len;
        memcpy(&lbp2[8], &label_len, sizeof(int16_t));
        memcpy(&lbp2[10], value_label->label, label_len);

        lbp1 += 30;
        lbp2 += 8 + 2 + value_label->label_len + 1;
    }

    return block;
}

static readstat_error_t sas7bcat_emit_header(readstat_writer_t *writer, sas_header_info_t *hinfo) {
    sas_header_start_t header_start = {
        .a2 = hinfo->u64 ? SAS_ALIGNMENT_OFFSET_4 : SAS_ALIGNMENT_OFFSET_0,
        .a1 = SAS_ALIGNMENT_OFFSET_0,
        .endian = machine_is_little_endian() ? SAS_ENDIAN_LITTLE : SAS_ENDIAN_BIG,
        .file_format = SAS_FILE_FORMAT_UNIX,
        .encoding = 20, /* UTF-8 */
        .file_type = "SAS FILE",
        .file_info = "CATALOG "
    };

    memcpy(&header_start.magic, sas7bcat_magic_number, sizeof(header_start.magic));
    strncpy(header_start.file_label, writer->file_label, sizeof(header_start.file_label));

    return sas_write_header(writer, hinfo, header_start);
}

static readstat_error_t sas7bcat_begin_data(void *writer_ctx) {
    readstat_writer_t *writer = (readstat_writer_t *)writer_ctx;
    readstat_error_t retval = READSTAT_OK;
コード例 #11
0
ファイル: readstat_sas.c プロジェクト: Jiangtang/haven
readstat_error_t sas_read_header(int fd, sas_header_info_t *ctx, 
        readstat_error_handler error_handler, void *user_ctx) {
    sas_header_start_t  header_start;
    sas_header_end_t    header_end;
    int retval = READSTAT_OK;
    char error_buf[1024];
    if (read(fd, &header_start, sizeof(sas_header_start_t)) < sizeof(sas_header_start_t)) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }
    if (memcmp(header_start.magic, sas7bdat_magic_number, sizeof(sas7bdat_magic_number)) != 0 &&
            memcmp(header_start.magic, sas7bcat_magic_number, sizeof(sas7bcat_magic_number)) != 0) {
        retval = READSTAT_ERROR_PARSE;
        goto cleanup;
    }
    if (header_start.a1 == SAS_ALIGNMENT_OFFSET_4) {
        ctx->pad1 = 4;
    }
    if (header_start.a2 == SAS_ALIGNMENT_OFFSET_4) {
        ctx->u64 = 1;
    }
    int bswap = 0;
    if (header_start.endian == SAS_ENDIAN_BIG) {
        bswap = machine_is_little_endian();
        ctx->little_endian = 0;
    } else if (header_start.endian == SAS_ENDIAN_LITTLE) {
        bswap = !machine_is_little_endian();
        ctx->little_endian = 1;
    } else {
        retval = READSTAT_ERROR_PARSE;
        goto cleanup;
    }
    int i;
    for (i=0; i<sizeof(_charset_table)/sizeof(_charset_table[0]); i++) {
        if (header_start.encoding == _charset_table[i].code) {
            ctx->encoding = _charset_table[i].name;
            break;
        }
    }
    if (ctx->encoding == NULL) {
        if (error_handler) {
            snprintf(error_buf, sizeof(error_buf), "Unsupported character set code: %d\n", header_start.encoding);
            error_handler(error_buf, user_ctx);
        }
        retval = READSTAT_ERROR_UNSUPPORTED_CHARSET;
        goto cleanup;
    }
    if (readstat_lseek(fd, 196 + ctx->pad1, SEEK_SET) == -1) {
        retval = READSTAT_ERROR_SEEK;
        if (error_handler) {
            snprintf(error_buf, sizeof(error_buf), "ReadStat: Failed to seek to position %d\n", 196 + ctx->pad1);
            error_handler(error_buf, user_ctx);
        }
        goto cleanup;
    }

    uint32_t header_size, page_size;

    if (read(fd, &header_size, sizeof(uint32_t)) < sizeof(uint32_t)) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }
    if (read(fd, &page_size, sizeof(uint32_t)) < sizeof(uint32_t)) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }

    ctx->header_size = bswap ? byteswap4(header_size) : header_size;

    ctx->page_size = bswap ? byteswap4(page_size) : page_size;

    if (ctx->header_size < 1024) {
        retval = READSTAT_ERROR_PARSE;
        goto cleanup;
    }

    if (ctx->u64) {
        uint64_t page_count;
        if (read(fd, &page_count, sizeof(uint64_t)) < sizeof(uint64_t)) {
            retval = READSTAT_ERROR_READ;
            goto cleanup;
        }
        ctx->page_count = bswap ? byteswap8(page_count) : page_count;
    } else {
        uint32_t page_count;
        if (read(fd, &page_count, sizeof(uint32_t)) < sizeof(uint32_t)) {
            retval = READSTAT_ERROR_READ;
            goto cleanup;
        }
        ctx->page_count = bswap ? byteswap4(page_count) : page_count;
    }
    
    if (readstat_lseek(fd, 8, SEEK_CUR) == -1) {
        retval = READSTAT_ERROR_SEEK;
        if (error_handler) {
            snprintf(error_buf, sizeof(error_buf), "ReadStat: Failed to seek forward by %d\n", 8);
            error_handler(error_buf, user_ctx);
        }
        goto cleanup;
    }
    if (read(fd, &header_end, sizeof(sas_header_end_t)) < sizeof(sas_header_end_t)) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }
    if (strncmp(header_end.release, "9.0000M0", sizeof(header_end.release)) == 0) {
        /* A bit of a hack, but most SAS installations are running a minor update */
        ctx->vendor = READSTAT_VENDOR_STAT_TRANSFER;
    } else {
        ctx->vendor = READSTAT_VENDOR_SAS;
    }
    if (readstat_lseek(fd, ctx->header_size, SEEK_SET) == -1) {
        retval = READSTAT_ERROR_SEEK;
        if (error_handler) {
            snprintf(error_buf, sizeof(error_buf), "ReadStat: Failed to seek to position %lld\n", ctx->header_size);
            error_handler(error_buf, user_ctx);
        }
        goto cleanup;
    }

cleanup:
    return retval;
}
コード例 #12
0
ファイル: readstat_xport_write.c プロジェクト: ngr-t/smuggler
static readstat_error_t xport_write_variables(readstat_writer_t *writer) {
    readstat_error_t retval = READSTAT_OK;
    int i;
    long offset = 0;
    int num_long_labels = 0;
    int any_has_long_format = 0;
    for (i=0; i<writer->variables_count; i++) {
        int needs_long_record = 0;
        readstat_variable_t *variable = readstat_get_variable(writer, i);
        size_t width = xport_variable_width(variable->type, variable->user_width);
        xport_namestr_t namestr = { 
            .nvar0 = i,
            .nlng = width,
            .npos = offset
        };
        if (readstat_variable_get_type_class(variable) == READSTAT_TYPE_CLASS_STRING) {
            namestr.ntype = SAS_COLUMN_TYPE_CHR;
        } else {
            namestr.ntype = SAS_COLUMN_TYPE_NUM;
        }

        copypad(namestr.nname, sizeof(namestr.nname), variable->name);
        copypad(namestr.nlabel, sizeof(namestr.nlabel), variable->label);

        if (variable->format[0]) {
            int decimals = 0;
            int width = 0;
            char name[24];

            sscanf(variable->format, "%s%d.%d", name, &width, &decimals);

            copypad(namestr.nform, sizeof(namestr.nform), name);
            namestr.nfl = width;
            namestr.nfd = decimals;

            copypad(namestr.niform, sizeof(namestr.niform), name);
            namestr.nifl = width;
            namestr.nifd = decimals;

            if (strlen(name) > 8) {
                any_has_long_format = 1;
                needs_long_record = 1;
            }
        }

        namestr.nfj = (variable->alignment == READSTAT_ALIGNMENT_RIGHT);

        if (writer->version == 8) {
            copypad(namestr.longname, sizeof(namestr.longname), variable->name);

            size_t label_len = strlen(variable->label);
            if (label_len > 40) {
                needs_long_record = 1;
            }
            namestr.labeln = label_len;
        }

        if (needs_long_record) {
            num_long_labels++;
        }

        offset += width;

        xport_namestr_bswap(&namestr);

        retval = xport_write_bytes(writer, &namestr, sizeof(xport_namestr_t));
        if (retval != READSTAT_OK)
            goto cleanup;
    }

    retval = xport_finish_record(writer);
    if (retval != READSTAT_OK)
        goto cleanup;

    if (writer->version == 8 && num_long_labels) {
        xport_header_record_t header = { 
            .name = "LABELV8",
            .num1 = num_long_labels };
        if (any_has_long_format) {
            strcpy(header.name, "LABELV9");
        }
        retval = xport_write_header_record_v8(writer, &header);
        if (retval != READSTAT_OK)
            goto cleanup;

        for (i=0; i<writer->variables_count; i++) {
            readstat_variable_t *variable = readstat_get_variable(writer, i);
            size_t label_len = strlen(variable->label);
            size_t name_len = strlen(variable->name);
            int has_long_label = 0;
            int has_long_format = 0;
            int format_len = 0;
            char format_name[24];
            memset(format_name, 0, sizeof(format_name));

            has_long_label = (label_len > 40);

            if (variable->format[0]) {
                int decimals = 2;
                int width = 8;

                int matches = sscanf(variable->format, "%s%d.%d", format_name, &width, &decimals);
                if (matches < 1) {
                    retval = READSTAT_ERROR_BAD_FORMAT_STRING;
                    goto cleanup;
                }
                format_len = strlen(format_name);
                if (format_len > 8) {
                    has_long_format = 1;
                }
            }

            if (has_long_format) {
                uint16_t labeldef[5] = { i, name_len, format_len, format_len, label_len };

                if (machine_is_little_endian()) {
                    labeldef[0] = byteswap2(labeldef[0]);
                    labeldef[1] = byteswap2(labeldef[1]);
                    labeldef[2] = byteswap2(labeldef[2]);
                    labeldef[3] = byteswap2(labeldef[3]);
                    labeldef[4] = byteswap2(labeldef[4]);
                }

                retval = readstat_write_bytes(writer, labeldef, sizeof(labeldef));
                if (retval != READSTAT_OK)
                    goto cleanup;

                retval = readstat_write_string(writer, variable->name);
                if (retval != READSTAT_OK)
                    goto cleanup;

                retval = readstat_write_string(writer, format_name);
                if (retval != READSTAT_OK)
                    goto cleanup;

                retval = readstat_write_string(writer, format_name);
                if (retval != READSTAT_OK)
                    goto cleanup;

                retval = readstat_write_string(writer, variable->label);
                if (retval != READSTAT_OK)
                    goto cleanup;

            } else if (has_long_label) {
                uint16_t labeldef[3] = { i, name_len, label_len };

                if (machine_is_little_endian()) {
                    labeldef[0] = byteswap2(labeldef[0]);
                    labeldef[1] = byteswap2(labeldef[1]);
                    labeldef[2] = byteswap2(labeldef[2]);
                }

                retval = readstat_write_bytes(writer, labeldef, sizeof(labeldef));
                if (retval != READSTAT_OK)
                    goto cleanup;

                retval = readstat_write_string(writer, variable->name);
                if (retval != READSTAT_OK)
                    goto cleanup;

                retval = readstat_write_string(writer, variable->label);
                if (retval != READSTAT_OK)
                    goto cleanup;
            }
        }

        retval = xport_finish_record(writer);
        if (retval != READSTAT_OK)
            goto cleanup;
    }

cleanup:

    return retval;
}
コード例 #13
0
readstat_error_t readstat_parse_sas7bcat(readstat_parser_t *parser, const char *filename, void *user_ctx) {
    readstat_error_t retval = READSTAT_OK;
    int64_t i;
    char *page = NULL;
    char *buffer = NULL;

    sas_catalog_ctx_t *ctx = calloc(1, sizeof(sas_catalog_ctx_t));
    sas_header_info_t *hinfo = calloc(1, sizeof(sas_header_info_t));

    ctx->block_pointers = malloc((ctx->block_pointers_capacity = 200) * sizeof(uint64_t));

    ctx->value_label_handler = parser->value_label_handler;
    ctx->user_ctx = user_ctx;

    if ((ctx->fd = readstat_open(filename)) == -1) {
        retval = READSTAT_ERROR_OPEN;
        goto cleanup;
    }

    if ((retval = sas_read_header(ctx->fd, hinfo, parser->error_handler, user_ctx)) != READSTAT_OK) {
        goto cleanup;
    }

    ctx->u64 = hinfo->u64;
    ctx->pad1 = hinfo->pad1;
    ctx->bswap = machine_is_little_endian() ^ hinfo->little_endian;
    ctx->header_size = hinfo->header_size;
    ctx->page_count = hinfo->page_count;
    ctx->page_size = hinfo->page_size;

    if (ctx->u64) {
        retval = READSTAT_ERROR_PARSE;
        goto cleanup;
    }

    if (strcmp(hinfo->encoding, "UTF-8") != 0 &&
            strcmp(hinfo->encoding, "US-ASCII") != 0) {
        iconv_t converter = iconv_open("UTF-8", hinfo->encoding);
        if (converter == (iconv_t)-1) {
            retval = READSTAT_ERROR_UNSUPPORTED_CHARSET;
            goto cleanup;
        }
        ctx->converter = converter;
    }

    if ((page = malloc(ctx->page_size)) == NULL) {
        retval = READSTAT_ERROR_MALLOC;
        goto cleanup;
    }
    if (readstat_lseek(ctx->fd, ctx->header_size+SAS_CATALOG_FIRST_INDEX_PAGE*ctx->page_size, SEEK_SET) == -1) {
        retval = READSTAT_ERROR_SEEK;
        goto cleanup;
    }
    if (read(ctx->fd, page, ctx->page_size) < ctx->page_size) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }

    sas_catalog_augment_index(&page[856+2*ctx->pad1], ctx->page_size - 856 - 2*ctx->pad1, ctx);

    // Pass 1 -- find the XLSR entries
    for (i=SAS_CATALOG_USELESS_PAGES; i<ctx->page_count; i++) {
        if (readstat_lseek(ctx->fd, ctx->header_size+i*ctx->page_size, SEEK_SET) == -1) {
            retval = READSTAT_ERROR_SEEK;
            goto cleanup;
        }
        if (read(ctx->fd, page, ctx->page_size) < ctx->page_size) {
            retval = READSTAT_ERROR_READ;
            goto cleanup;
        }
        if (memcmp(&page[16], "XLSR", sizeof("XLSR")-1) == 0) {
            sas_catalog_augment_index(&page[16], ctx->page_size - 16, ctx);
        }
    }

    // Pass 2 -- look up the individual block pointers
    for (i=0; i<ctx->block_pointers_used; i++) {
        int start_page = ctx->block_pointers[i] >> 32;
        int start_page_pos = (ctx->block_pointers[i]) & 0xFFFF;

        int buffer_len = sas_catalog_block_size(start_page, start_page_pos, ctx, &retval);
        if (buffer_len == -1) {
            goto cleanup;
        } else if (buffer_len == 0) {
            continue;
        }
        buffer = realloc(buffer, buffer_len);
        if ((retval = sas_catalog_read_block(buffer, buffer_len, start_page, start_page_pos, ctx)) != READSTAT_OK)
            goto cleanup;
        if ((retval = sas_catalog_parse_block(buffer, buffer_len, ctx)) != READSTAT_OK)
            goto cleanup;
    }

cleanup:
    if (page)
        free(page);
    if (buffer)
        free(buffer);
    if (ctx)
        sas_catalog_ctx_free(ctx);
    if (hinfo)
        free(hinfo);

    return retval;
}
コード例 #14
0
ファイル: readstat_sas.c プロジェクト: davidanthoff/ReadStat
readstat_error_t sas_read_header(readstat_io_t *io, sas_header_info_t *hinfo, 
        readstat_error_handler error_handler, void *user_ctx) {
    sas_header_start_t  header_start;
    sas_header_end_t    header_end;
    int retval = READSTAT_OK;
    char error_buf[1024];
    struct tm epoch_tm = { .tm_year = 60, .tm_mday = 1 };
    time_t epoch = mktime(&epoch_tm);

    if (io->read(&header_start, sizeof(sas_header_start_t), io->io_ctx) < sizeof(sas_header_start_t)) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }
    if (memcmp(header_start.magic, sas7bdat_magic_number, sizeof(sas7bdat_magic_number)) != 0 &&
            memcmp(header_start.magic, sas7bcat_magic_number, sizeof(sas7bcat_magic_number)) != 0) {
        retval = READSTAT_ERROR_PARSE;
        goto cleanup;
    }
    if (header_start.a1 == SAS_ALIGNMENT_OFFSET_4) {
        hinfo->pad1 = 4;
    }
    if (header_start.a2 == SAS_ALIGNMENT_OFFSET_4) {
        hinfo->u64 = 1;
    }
    int bswap = 0;
    if (header_start.endian == SAS_ENDIAN_BIG) {
        bswap = machine_is_little_endian();
        hinfo->little_endian = 0;
    } else if (header_start.endian == SAS_ENDIAN_LITTLE) {
        bswap = !machine_is_little_endian();
        hinfo->little_endian = 1;
    } else {
        retval = READSTAT_ERROR_PARSE;
        goto cleanup;
    }
    int i;
    for (i=0; i<sizeof(_charset_table)/sizeof(_charset_table[0]); i++) {
        if (header_start.encoding == _charset_table[i].code) {
            hinfo->encoding = _charset_table[i].name;
            break;
        }
    }
    if (hinfo->encoding == NULL) {
        if (error_handler) {
            snprintf(error_buf, sizeof(error_buf), "Unsupported character set code: %d\n", header_start.encoding);
            error_handler(error_buf, user_ctx);
        }
        retval = READSTAT_ERROR_UNSUPPORTED_CHARSET;
        goto cleanup;
    }
    memcpy(hinfo->file_label, header_start.file_label, sizeof(header_start.file_label));
    if (io->seek(hinfo->pad1, READSTAT_SEEK_CUR, io->io_ctx) == -1) {
        retval = READSTAT_ERROR_SEEK;
        goto cleanup;
    }

    double creation_time, modification_time;
    if (io->read(&creation_time, sizeof(double), io->io_ctx) < sizeof(double)) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }
    if (io->read(&modification_time, sizeof(double), io->io_ctx) < sizeof(double)) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }
    hinfo->creation_time = bswap ? byteswap_double(creation_time) + epoch : creation_time + epoch;
    hinfo->modification_time = bswap ? byteswap_double(creation_time) + epoch : creation_time + epoch;

    if (io->seek(16, READSTAT_SEEK_CUR, io->io_ctx) == -1) {
        retval = READSTAT_ERROR_SEEK;
        goto cleanup;
    }

    uint32_t header_size, page_size;

    if (io->read(&header_size, sizeof(uint32_t), io->io_ctx) < sizeof(uint32_t)) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }
    if (io->read(&page_size, sizeof(uint32_t), io->io_ctx) < sizeof(uint32_t)) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }

    hinfo->header_size = bswap ? byteswap4(header_size) : header_size;

    hinfo->page_size = bswap ? byteswap4(page_size) : page_size;

    if (hinfo->header_size < 1024) {
        retval = READSTAT_ERROR_PARSE;
        goto cleanup;
    }

    if (hinfo->u64) {
        uint64_t page_count;
        if (io->read(&page_count, sizeof(uint64_t), io->io_ctx) < sizeof(uint64_t)) {
            retval = READSTAT_ERROR_READ;
            goto cleanup;
        }
        hinfo->page_count = bswap ? byteswap8(page_count) : page_count;
    } else {
        uint32_t page_count;
        if (io->read(&page_count, sizeof(uint32_t), io->io_ctx) < sizeof(uint32_t)) {
            retval = READSTAT_ERROR_READ;
            goto cleanup;
        }
        hinfo->page_count = bswap ? byteswap4(page_count) : page_count;
    }
    
    if (io->seek(8, READSTAT_SEEK_CUR, io->io_ctx) == -1) {
        retval = READSTAT_ERROR_SEEK;
        if (error_handler) {
            snprintf(error_buf, sizeof(error_buf), "ReadStat: Failed to seek forward by %d\n", 8);
            error_handler(error_buf, user_ctx);
        }
        goto cleanup;
    }
    if (io->read(&header_end, sizeof(sas_header_end_t), io->io_ctx) < sizeof(sas_header_end_t)) {
        retval = READSTAT_ERROR_READ;
        goto cleanup;
    }
    int major, minor, revision;
    if (sscanf(header_end.release, "%1d.%04dM%1d", &major, &minor, &revision) == 3) {
        hinfo->major_version = major;
        hinfo->minor_version = minor;
        hinfo->revision = revision;
    }
    if (major == 9 && minor == 0 && revision == 0) {
        /* A bit of a hack, but most SAS installations are running a minor update */
        hinfo->vendor = READSTAT_VENDOR_STAT_TRANSFER;
    } else {
        hinfo->vendor = READSTAT_VENDOR_SAS;
    }
    if (io->seek(hinfo->header_size, READSTAT_SEEK_SET, io->io_ctx) == -1) {
        retval = READSTAT_ERROR_SEEK;
        if (error_handler) {
            snprintf(error_buf, sizeof(error_buf), 
                    "ReadStat: Failed to seek to position %" PRId64 "\n", hinfo->header_size);
            error_handler(error_buf, user_ctx);
        }
        goto cleanup;
    }

cleanup:
    return retval;
}