ALError ALEncoderConfigure(ALEncoderConfig *config, int significantBits, ALDatatype datatype, ALIndexForm indexForm) { // Check parameters assert(significantBits > 0 && significantBits < 32); // 31 is a limitation of the current code assert(ALDatatypeIsDefined(datatype)); assert(indexForm == ALCompressionIndex || indexForm == ALInvertedIndex || indexForm == ALCompressedInvertedIndex); config->significantBits = significantBits; config->elementSize = ALDatatypeGetSize(datatype); config->datatype = datatype; config->indexForm = indexForm; return ALErrorNone; }
// Encodes the data from infile into the ALFileStore outfile, closing both afterward int encodeCommandEncode(FILE *infile, uint64_t infile_len, ALStore *outfile) { const int datatypeLen = ALDatatypeGetSize(OPTIONS.datatype); // Allocate the input buffer uint64_t buflen; if (infile_len > (OPTIONS.part_size_in_elem * datatypeLen)) buflen = (OPTIONS.part_size_in_elem * datatypeLen); else buflen = infile_len; void *inbuf = malloc(buflen); // Input buffer // Prepare the output buffer (encoder) ALIndexForm index_form = OPTIONS.index_form; if (index_form >= ALCompressedInvertedIndex) // all other compressed index based on inverted index index_form = ALInvertedIndex; ALEncoderConfig econfig; ALEncoderConfigure(&econfig, OPTIONS.significant_bits, OPTIONS.datatype, index_form); ALPartitionData partdata; // Output buffer size_t bytesread; int i = 0; double s ; encode_time = 0; compress_time = 0; write_time = 0; total_time = 0; double ss = dclock(); while (!feof(infile) && !ferror(infile) && (bytesread = fread(inbuf, 1, buflen, infile)) != 0) { i++; dbprintf("Encoding partition %d with size %llu...\n", i, bytesread); s = dclock(); ALEncode(&econfig, inbuf, bytesread / datatypeLen, &partdata); encode_time = encode_time + (dclock() - s); s = dclock(); if (OPTIONS.index_form >= ALCompressedInvertedIndex) // all other compressed index based on inverted index ALConvertIndexForm(&partdata.metadata, &partdata.index, OPTIONS.index_form); compress_time = compress_time + (dclock() - s); s = dclock(); if (ALStoreWritePartition(outfile, &partdata) != ALErrorNone) { fprintf(stderr, "Error appending ALACRITY partition to file, aborting\n"); abort(); return 1; } write_time = write_time + (dclock() - s); ALPartitionDataDestroy(&partdata); // TODO: make this automatic when encoding over an existing partition data dbprintf("Partition %d done!\n", i); } total_time = dclock() - ss; printf("[read: %9.3lf] [encode: %9.3lf] [compress: %9.3lf] [write: %9.3lf] [total: %9.3lf]\n", total_time - (encode_time + compress_time + write_time ), encode_time, compress_time, write_time, total_time); if (ferror(infile)) { fprintf(stderr, "Error reading from input file, aborting\n"); return 1; } // Cleanup free(inbuf); fclose(infile); printf("Encoding complete, %llu bytes of input data successfully encoded into %llu partitions\n", infile_len, outfile->cur_partition); dbprintf("Closing ALACRITY output file...\n"); if (ALStoreClose(outfile) != ALErrorNone) { fprintf(stderr, "Error closing ALACRITY output file, aborting\n"); return 1; } return 0; }
int main(int argc, char **argv) { cmdstr = argv[0]; init_options(); _Bool is_part_size_in_elem = false; int c; while ((c = getopt(argc, argv, ":p:icle:s:xhkmr")) != -1) { switch (c) { case 'p': { uint64_t multi = 1; int arglen = strlen(optarg); _Bool part_size_suffix_power_2 = false; while (arglen > 0) { switch (optarg[arglen - 1]) { case 'e': case 'E': // We must wait until we know the element size before we can scale is_part_size_in_elem = true; break; case 'i': case 'I': part_size_suffix_power_2 = true; break; // NOTE: intentional fall-throughs case 't': case 'T': multi *= part_size_suffix_power_2 ? 1024 : 1000; case 'g': case 'G': multi *= part_size_suffix_power_2 ? 1024 : 1000; case 'm': case 'M': multi *= part_size_suffix_power_2 ? 1024 : 1000; case 'k': case 'K': multi *= part_size_suffix_power_2 ? 1024 : 1000; break; default: arglen = 0; continue; } optarg[arglen-1] = 0; arglen--; } OPTIONS.part_size_in_elem = (uint64_t)atoll(optarg) * multi; if (OPTIONS.part_size_in_elem < MIN_PART_SIZE) { fprintf(stderr, "Error: partition size of %llu specified, but must be at least %llu\n", OPTIONS.part_size_in_elem, MIN_PART_SIZE); exit(1); } // printf("Using partition size of %llu(not accounting for element size, but %s later)\n", OPTIONS.part_size_in_elem, is_part_size_in_elem ? "WILL" : "WON'T"); break; } case 'i': case 'c': case 'x': case 'h': //hybrid index = p4d + rle case 'k': //skipping index = p4d + skipping case 'm': //skipping hybrid index = p4d + rle + skipping case 'r': // expansion/relaxing method if (OPTIONS.index_form_set) { fprintf(stderr, "Error: options -i, -c and -x are mutually exclusive\n"); usage_and_exit(); } OPTIONS.index_form_set = true; if (c == 'i') OPTIONS.index_form = ALInvertedIndex; else if (c == 'c') OPTIONS.index_form = ALCompressionIndex; else if (c == 'h') OPTIONS.index_form = ALCompressedHybridInvertedIndex; //rpfd else if (c == 'k') OPTIONS.index_form = ALCompressedSkipInvertedIndex; else if (c == 'm') OPTIONS.index_form = ALCompressedMixInvertedIndex; else if (c == 'r') OPTIONS.index_form = ALCompressedExpansionII; // epfd else OPTIONS.index_form = ALCompressedInvertedIndex; //x pfd break; case 'l': OPTIONS.legacy_format = true; break; case 'e': if (strcasecmp(optarg, "float") == 0) OPTIONS.datatype = DATATYPE_FLOAT32; else if (strcasecmp(optarg, "double") == 0) OPTIONS.datatype = DATATYPE_FLOAT64; else { fprintf(stderr, "Error: element type must be one of { float | double }, but is %s\n", optarg); usage_and_exit(); } break; case 's': OPTIONS.significant_bits = atoi(optarg); if (OPTIONS.significant_bits < 1 || OPTIONS.significant_bits > 32) { fprintf(stderr, "Error: significant byte count must be between 1 and 32, inclusive, but is %d\n", OPTIONS.significant_bits); usage_and_exit(); } break; case ':': fprintf(stderr, "Option %c missing required argument\n", optopt); usage_and_exit(); break; case '?': default: fprintf(stderr, "Unknown option %c\n", optopt); usage_and_exit(); break; } } // Do some post-calculations based on all options if (!is_part_size_in_elem) { if (OPTIONS.datatype != DATATYPE_UNDEFINED) OPTIONS.part_size_in_elem /= ALDatatypeGetSize(OPTIONS.datatype); } // Now that they've been parsed, skip over the options, to // leave only non-option args argc -= optind; argv += optind; // Make sure there's at least one argument for the command, then capture // it and advance past it if (argc < 1) usage_and_exit(); const char *cmd = argv[0]; argc--; argv++; // Find the matching command (if any), call it, and return the value it returns for (int i = 0; i < NUM_COMMANDS; i++) if (strcmp(COMMANDS[i].name, cmd) == 0) return COMMANDS[i].func(argc, argv); // If no command matches, print an error message fprintf(stderr, "Error: command %s unrecognized\n", cmd); usage_and_exit(); // At the compiler's complaint... return 0; }