Example #1
0
// Encodes the data from infile into the ALFileStore outfile, closing both afterward
int encodeCommandEncode(FILE *infile, uint64_t infile_len, ALStore *outfile) {
    const int datatypeLen = ALDatatypeGetSize(OPTIONS.datatype);

	// Allocate the input buffer
    uint64_t buflen;
    if (infile_len > (OPTIONS.part_size_in_elem * datatypeLen))
        buflen = (OPTIONS.part_size_in_elem * datatypeLen);
    else
        buflen = infile_len;

    void *inbuf = malloc(buflen); // Input buffer

    // Prepare the output buffer (encoder)
    ALIndexForm index_form = OPTIONS.index_form;
    if (index_form >= ALCompressedInvertedIndex) // all other compressed index based on inverted index
        index_form = ALInvertedIndex;

    ALEncoderConfig econfig;
    ALEncoderConfigure(&econfig, OPTIONS.significant_bits, OPTIONS.datatype, index_form);
    ALPartitionData partdata;   // Output buffer

    size_t bytesread;
    int i = 0;
    double s  ;
    encode_time = 0;
    compress_time = 0;
    write_time = 0;
    total_time = 0;
    double ss = dclock();
    while (!feof(infile) && !ferror(infile) && (bytesread = fread(inbuf, 1, buflen, infile)) != 0) {
        i++;
        dbprintf("Encoding partition %d with size %llu...\n", i, bytesread);
        s  = dclock();
        ALEncode(&econfig, inbuf, bytesread / datatypeLen, &partdata);
        encode_time  = encode_time +  (dclock() - s);

        s = dclock();
        if (OPTIONS.index_form >= ALCompressedInvertedIndex) // all other compressed index based on inverted index
            ALConvertIndexForm(&partdata.metadata, &partdata.index, OPTIONS.index_form);
        compress_time  = compress_time +  (dclock() - s);

        s = dclock();
        if (ALStoreWritePartition(outfile, &partdata) != ALErrorNone) {
            fprintf(stderr, "Error appending ALACRITY partition to file, aborting\n");
            abort();
            return 1;
        }
        write_time  = write_time +  (dclock() - s);


        ALPartitionDataDestroy(&partdata); // TODO: make this automatic when encoding over an existing partition data
        dbprintf("Partition %d done!\n", i);
    }
    total_time = dclock() - ss;
    printf("[read: %9.3lf] [encode: %9.3lf] [compress: %9.3lf] [write: %9.3lf] [total: %9.3lf]\n", total_time - (encode_time + compress_time + write_time ),  encode_time, compress_time, write_time, total_time);
    if (ferror(infile)) {
        fprintf(stderr, "Error reading from input file, aborting\n");
        return 1;
    }

    // Cleanup
    free(inbuf);
    fclose(infile);

    printf("Encoding complete, %llu bytes of input data successfully encoded into %llu partitions\n", infile_len, outfile->cur_partition);

    dbprintf("Closing ALACRITY output file...\n");
    if (ALStoreClose(outfile) != ALErrorNone) {
        fprintf(stderr, "Error closing ALACRITY output file, aborting\n");
        return 1;
    }

    return 0;
}
Example #2
0
int main(int argc, char **argv) {
    if (argc != 4) {
        fprintf(stderr, "Usage: %s <input filename> <output filename base> <invert index?>\n", argv[0]);
        return 1;
    }

    /*
    uint64_t test = 0x1234567887654321;
    high_order_bytes_t hi;
    low_order_bytes_t lo;
    SPLIT_DATUM(&test, 8, 2, hi, lo);
    printf("%016llx -> %08lx:%016llx", test, hi, lo);
    return 0;
    */

    const char *infilename = argv[1];
    const char *outfilenamebase = argv[2];
    _Bool invertIndex = atoi(argv[3]) > 0;
    char outindexfilename[256];
    char outdatafilename[256];
    strcpy(outindexfilename, outfilenamebase);
    strcat(outindexfilename, invertIndex ? "-query_index.dat" : "-index.dat");
    strcpy(outdatafilename, outfilenamebase);
    strcat(outdatafilename, "-compressed_data.dat");

    struct stat st;
    stat(infilename, &st);

    uint64_t numDoubles = st.st_size / sizeof(double);
    void *data = malloc(numDoubles * sizeof(double));

    FILE *infile = fopen(infilename, "r");
    FILE *outindexfile = fopen(outindexfilename, "w");
    FILE *outdatafile = fopen(outdatafilename, "w");
    if (infile == NULL) {
        fprintf(stderr, "Error opening input file %s\n", infilename);
        return 1;
    }
    if (outindexfile == NULL) {
        fprintf(stderr, "Error opening output file %s\n", outindexfilename);
        return 1;
    }
    if (outdatafile == NULL) {
        fprintf(stderr, "Error opening output file %s\n", outdatafilename);
        return 1;
    }

    int rcount = fread(data, sizeof(double), numDoubles, infile);
    if (rcount != numDoubles) {
        fprintf(stderr, "Expected %d doubles, read %d\n", numDoubles, rcount);
        return 1;
    }

    ALEncoderConfig config;
    ALEncoderConfigure(&config, 16, DATATYPE_FLOAT64, invertIndex ? ALInvertedIndex : ALCompressionIndex);

    ALPartitionData output;
    ALEncode(&config, data, numDoubles, &output);

    int insigbytes = output.metadata.elementSize - output.metadata.significantBits;

    int wcount = fwrite(output.index, invertIndex ? sizeof(rid_t) : output.metadata.significantBits, numDoubles, outindexfile);
    if (wcount != numDoubles) {
        fprintf(stderr, "Expected write %d index elements, wrote %d\n", numDoubles, wcount);
        return 1;
    }

    wcount = fwrite(output.data, insigbytes, numDoubles, outdatafile);
    if (wcount != numDoubles) {
        fprintf(stderr, "Expected write %d data elements, wrote %d\n", numDoubles, wcount);
        return 1;
    }

    if (argc == 5) {
        const char *compfilenamebase = argv[4];
        char compindexfilename[256];
        char compdatafilename[256];
        strcpy(compindexfilename, compfilenamebase);
        strcat(compindexfilename, invertIndex ? "-query_index.dat" : "-index.dat");
        strcpy(compdatafilename, compfilenamebase);
        strcat(compdatafilename, "-compressed_data.dat");
        printf("Reading comparison files %s and %s\n", compdatafilename, compindexfilename);

        stat(compdatafilename, &st);

        uint64_t numDoubles = st.st_size / (insigbytes);

        uint64_t datasize = numDoubles * insigbytes;
        uint64_t indexsize = numDoubles * (invertIndex ? sizeof(rid_t) : output.metadata.significantBits);
        void *compdata = malloc(datasize);
        void *compindex = malloc(indexsize);
        assert(compdata != NULL);
        assert(compindex != NULL);

        FILE *cdf = fopen(compdatafilename, "r");
        FILE *cif = fopen(compindexfilename, "r");
        assert(cdf);
        assert(cif);

        int r1 = fread(compdata, 1, datasize, cdf);
        int r2 = fread(compindex, 1, indexsize, cif);
        fclose(cdf);
        fclose(cif);

        assert(r1 == datasize);
        assert(r2 == indexsize);

        printf("Testing against comparison data file %s...\n", compdatafilename);
        assert(memcmp(output.data, compdata, datasize) == 0);
        printf("Success!\n");
        printf("Testing against comparison index file %s...\n", compindexfilename);
        assert(memcmp(output.index, compindex, indexsize) == 0);
        printf("Success!\n");
    }

    fclose(infile);
    fclose(outindexfile);
    fclose(outdatafile);

    return 0;
}