Esempio n. 1
0
ALError ALEncoderConfigure(ALEncoderConfig  *config,
                           int              significantBits,
                           ALDatatype       datatype,
                           ALIndexForm      indexForm) {

	// Check parameters
	assert(significantBits > 0 && significantBits < 32); // 31 is a limitation of the current code
	assert(ALDatatypeIsDefined(datatype));
	assert(indexForm == ALCompressionIndex || indexForm == ALInvertedIndex || indexForm == ALCompressedInvertedIndex);

	config->significantBits = significantBits;
	config->elementSize = ALDatatypeGetSize(datatype);
	config->datatype = datatype;
	config->indexForm = indexForm;

	return ALErrorNone;
}
Esempio n. 2
0
// Encodes the data from infile into the ALFileStore outfile, closing both afterward
int encodeCommandEncode(FILE *infile, uint64_t infile_len, ALStore *outfile) {
    const int datatypeLen = ALDatatypeGetSize(OPTIONS.datatype);

	// Allocate the input buffer
    uint64_t buflen;
    if (infile_len > (OPTIONS.part_size_in_elem * datatypeLen))
        buflen = (OPTIONS.part_size_in_elem * datatypeLen);
    else
        buflen = infile_len;

    void *inbuf = malloc(buflen); // Input buffer

    // Prepare the output buffer (encoder)
    ALIndexForm index_form = OPTIONS.index_form;
    if (index_form >= ALCompressedInvertedIndex) // all other compressed index based on inverted index
        index_form = ALInvertedIndex;

    ALEncoderConfig econfig;
    ALEncoderConfigure(&econfig, OPTIONS.significant_bits, OPTIONS.datatype, index_form);
    ALPartitionData partdata;   // Output buffer

    size_t bytesread;
    int i = 0;
    double s  ;
    encode_time = 0;
    compress_time = 0;
    write_time = 0;
    total_time = 0;
    double ss = dclock();
    while (!feof(infile) && !ferror(infile) && (bytesread = fread(inbuf, 1, buflen, infile)) != 0) {
        i++;
        dbprintf("Encoding partition %d with size %llu...\n", i, bytesread);
        s  = dclock();
        ALEncode(&econfig, inbuf, bytesread / datatypeLen, &partdata);
        encode_time  = encode_time +  (dclock() - s);

        s = dclock();
        if (OPTIONS.index_form >= ALCompressedInvertedIndex) // all other compressed index based on inverted index
            ALConvertIndexForm(&partdata.metadata, &partdata.index, OPTIONS.index_form);
        compress_time  = compress_time +  (dclock() - s);

        s = dclock();
        if (ALStoreWritePartition(outfile, &partdata) != ALErrorNone) {
            fprintf(stderr, "Error appending ALACRITY partition to file, aborting\n");
            abort();
            return 1;
        }
        write_time  = write_time +  (dclock() - s);


        ALPartitionDataDestroy(&partdata); // TODO: make this automatic when encoding over an existing partition data
        dbprintf("Partition %d done!\n", i);
    }
    total_time = dclock() - ss;
    printf("[read: %9.3lf] [encode: %9.3lf] [compress: %9.3lf] [write: %9.3lf] [total: %9.3lf]\n", total_time - (encode_time + compress_time + write_time ),  encode_time, compress_time, write_time, total_time);
    if (ferror(infile)) {
        fprintf(stderr, "Error reading from input file, aborting\n");
        return 1;
    }

    // Cleanup
    free(inbuf);
    fclose(infile);

    printf("Encoding complete, %llu bytes of input data successfully encoded into %llu partitions\n", infile_len, outfile->cur_partition);

    dbprintf("Closing ALACRITY output file...\n");
    if (ALStoreClose(outfile) != ALErrorNone) {
        fprintf(stderr, "Error closing ALACRITY output file, aborting\n");
        return 1;
    }

    return 0;
}
Esempio n. 3
0
int main(int argc, char **argv) {
    cmdstr = argv[0];

    init_options();

    _Bool is_part_size_in_elem = false;

    int c;
    while ((c = getopt(argc, argv, ":p:icle:s:xhkmr")) != -1) {
        switch (c) {
        case 'p':
        {
            uint64_t multi = 1;
            int arglen = strlen(optarg);
            _Bool part_size_suffix_power_2 = false;

            while (arglen > 0) {
                switch (optarg[arglen - 1]) {
                case 'e':
                case 'E':
                    // We must wait until we know the element size before we can scale
                    is_part_size_in_elem = true;
                    break;
                case 'i':
                case 'I':
                    part_size_suffix_power_2 = true;
                    break;

                    // NOTE: intentional fall-throughs
                case 't':
                case 'T':
                    multi *= part_size_suffix_power_2 ? 1024 : 1000;
                case 'g':
                case 'G':
                    multi *= part_size_suffix_power_2 ? 1024 : 1000;
                case 'm':
                case 'M':
                    multi *= part_size_suffix_power_2 ? 1024 : 1000;
                case 'k':
                case 'K':
                    multi *= part_size_suffix_power_2 ? 1024 : 1000;
                    break;

                default:
                    arglen = 0;
                    continue;
                }

                optarg[arglen-1] = 0;
                arglen--;
            }
            OPTIONS.part_size_in_elem = (uint64_t)atoll(optarg) * multi;
            if (OPTIONS.part_size_in_elem < MIN_PART_SIZE) {
                fprintf(stderr, "Error: partition size of %llu specified, but must be at least %llu\n", OPTIONS.part_size_in_elem, MIN_PART_SIZE);
                exit(1);
            }
//            printf("Using partition size of %llu(not accounting for element size, but %s later)\n", OPTIONS.part_size_in_elem, is_part_size_in_elem ? "WILL" : "WON'T");
            break;
        }

        case 'i':
        case 'c':
        case 'x':
        case 'h': //hybrid index = p4d + rle
        case 'k': //skipping index = p4d + skipping
        case 'm': //skipping hybrid index = p4d + rle + skipping
        case 'r': // expansion/relaxing method
            if (OPTIONS.index_form_set) {
                fprintf(stderr, "Error: options -i, -c and -x are mutually exclusive\n");
                usage_and_exit();
            }
            OPTIONS.index_form_set = true;
            if (c == 'i')        OPTIONS.index_form = ALInvertedIndex;
            else if (c == 'c')   OPTIONS.index_form = ALCompressionIndex;
            else if (c == 'h')   OPTIONS.index_form = ALCompressedHybridInvertedIndex; //rpfd
            else if (c == 'k')	 OPTIONS.index_form = ALCompressedSkipInvertedIndex;
            else if (c == 'm')	 OPTIONS.index_form = ALCompressedMixInvertedIndex;
            else if (c == 'r')	 OPTIONS.index_form = ALCompressedExpansionII; // epfd
            else                 OPTIONS.index_form = ALCompressedInvertedIndex; //x pfd
            break;

        case 'l':
            OPTIONS.legacy_format = true;
            break;

        case 'e':
        	if (strcasecmp(optarg, "float") == 0)
        		OPTIONS.datatype = DATATYPE_FLOAT32;
        	else if (strcasecmp(optarg, "double") == 0)
        		OPTIONS.datatype = DATATYPE_FLOAT64;
        	else {
                fprintf(stderr, "Error: element type must be one of { float | double }, but is %s\n", optarg);
                usage_and_exit();
            }
            break;

        case 's':
            OPTIONS.significant_bits = atoi(optarg);
            if (OPTIONS.significant_bits < 1 || OPTIONS.significant_bits > 32) {
                fprintf(stderr, "Error: significant byte count must be between 1 and 32, inclusive, but is %d\n", OPTIONS.significant_bits);
                usage_and_exit();
            }
            break;

        case ':':
            fprintf(stderr, "Option %c missing required argument\n", optopt);
            usage_and_exit();
            break;
        case '?':
        default:
            fprintf(stderr, "Unknown option %c\n", optopt);
            usage_and_exit();
            break;
        }
    }

    // Do some post-calculations based on all options
    if (!is_part_size_in_elem) {
        if (OPTIONS.datatype != DATATYPE_UNDEFINED)
        	OPTIONS.part_size_in_elem /= ALDatatypeGetSize(OPTIONS.datatype);
    }

    // Now that they've been parsed, skip over the options, to
    // leave only non-option args
    argc -= optind;
    argv += optind;

    // Make sure there's at least one argument for the command, then capture
    // it and advance past it
    if (argc < 1) usage_and_exit();
    const char *cmd = argv[0];
    argc--;
    argv++;

    // Find the matching command (if any), call it, and return the value it returns
    for (int i = 0; i < NUM_COMMANDS; i++)
        if (strcmp(COMMANDS[i].name, cmd) == 0)
            return COMMANDS[i].func(argc, argv);

    // If no command matches, print an error message
    fprintf(stderr, "Error: command %s unrecognized\n", cmd);
    usage_and_exit();

    // At the compiler's complaint...
    return 0;
}