// Encodes the data from infile into the ALFileStore outfile, closing both afterward int decodeCommandDecode(ALStore *infile, FILE *outfile) { ALPartitionData partdata; // Output buffer void *outbuf; uint64_t outbufCount; uint64_t totalElemsWritten = 0; ALError err; size_t writelen; while (!ALStoreEOF(infile)) { err = ALStoreReadPartition(infile, &partdata); if (err != ALErrorNone) { fprintf(stderr, "Error reading the next partition from the input files\n"); return 1; } outbuf = malloc(ALGetDecodeLength(&partdata)); if (!outbuf) { fprintf(stderr, "Error allocating %llu btyes of temporary memory for decoding\n", ALGetDecodeLength(&partdata)); return 1; } err = ALDecode(&partdata, outbuf, &outbufCount); if (err != ALErrorNone) { fprintf(stderr, "Error decoding partition from input files\n"); return 1; } writelen = fwrite(outbuf, partdata.metadata.elementSize, outbufCount, outfile); if (writelen != outbufCount) { fprintf(stderr, "Error writing decoded elements to file (wrote %llu, expected to write %llu)\n", (uint64_t)writelen, (uint64_t)partdata.metadata.elementSize); return 1; } totalElemsWritten += outbufCount; free(outbuf); ALPartitionDataDestroy(&partdata); } // Cleanup fclose(outfile); printf("Decoding complete, wrote out %llu elements\n", totalElemsWritten); dbprintf("Closing ALACRITY input files...\n"); if (ALStoreClose(infile) != ALErrorNone) { fprintf(stderr, "Error closing ALACRITY input file, aborting (output should still be correct)\n"); return 1; } return 0; }
// Lists the percentage of bins whose length is less than PFD chunk size (32,64,or 128) in the given ALStore, partition by partition int showbinlensCommandShowBins(ALStore *infile) { ALPartitionStore pstore; ALMetadata meta; ALError err; int PFD_chunksizes[3] = {32,64,128}; bin_id_t bin_lens_counts[3] = {0}; // the number of bins whose length is less than 32, 64, or 128 bin_id_t i; double *d = malloc(sizeof(double)); uint64_t zero = 0; while (!ALStoreEOF(infile)) { ALStoreOpenPartition(infile, &pstore, true); ALPartitionStoreReadMetadata(&pstore, &meta); for(int k = 0; k < 3 ; k ++) bin_lens_counts[k]=0; assert(meta.datatype == DATATYPE_FLOAT64); bin_id_t total_bins = meta.binLayout.numBins; for (i = 0; i < total_bins ; i++) { REJOIN_DATUM_BITS(d, 8, meta.significantBits, meta.binLayout.binValues[i], 0); bin_offset_t bin_len = meta.binLayout.binStartOffsets[i+1] - meta.binLayout.binStartOffsets[i]; for (int k = 0 ; k < 3 ; k++){ if (bin_len < PFD_chunksizes[k]){ bin_lens_counts[k] ++; } } } printf("Partition %llu has %lu bins and %llu RIDs, # of bins whose length < 32,64,128 =" "{[%lu, %lu, %lu],[%4.2lf,%4.2lf,%4.2lf]}\n", pstore.partition_num, total_bins , meta.partitionLength , bin_lens_counts[0], bin_lens_counts[1], bin_lens_counts[2] , bin_lens_counts[0]*100.0/total_bins, bin_lens_counts[1]*100.0/total_bins, bin_lens_counts[2]*100.0/total_bins); ALPartitionStoreClose(&pstore); free(meta.binLayout.binStartOffsets); free(meta.binLayout.binValues); if (meta.indexMeta.indexForm == ALCompressedInvertedIndex) free(meta.indexMeta.u.ciim.indexBinStartOffsets); } free(d); dbprintf("Closing ALACRITY input files...\n"); if (ALStoreClose(infile) != ALErrorNone) { fprintf(stderr, "Error closing ALACRITY input file, aborting (output should still be correct)\n"); return 1; } return 0; }
// Lists the bins and their boundaries in the given ALStore, partition by partition int bidtovCommandConvert(ALStore *infile) { ALPartitionStore pstore; ALMetadata meta; ALError err; bin_id_t i; double *d = calloc(1, sizeof(double)); uint64_t zero = 0; while (!ALStoreEOF(infile)) { ALStoreOpenPartition(infile, &pstore, true); ALPartitionStoreReadMetadata(&pstore, &meta); assert(meta.datatype == DATATYPE_FLOAT64); const int insigbits = (meta.elementSize<<3) - meta.significantBits; const uint64_t lomask = (1ULL << (insigbits - 2)); printf("Partition %llu has %lu bins and %llu RIDs\n", pstore.partition_num, meta.binLayout.numBins, meta.partitionLength); for (i = 0; i < meta.binLayout.numBins; i++) { REJOIN_DATUM_BITS(d, 8, meta.significantBits, meta.binLayout.binValues[i], lomask); //*(uint64_t*)&d = meta.binLayout.binValues[i] << (64 - meta.significantBits); printf("%10lu -> %+.10e\n", i, *d); } ALPartitionStoreClose(&pstore); free(meta.binLayout.binStartOffsets); free(meta.binLayout.binValues); if (meta.indexMeta.indexForm == ALCompressedInvertedIndex) free(meta.indexMeta.u.ciim.indexBinStartOffsets); } free(d); dbprintf("Closing ALACRITY input files...\n"); if (ALStoreClose(infile) != ALErrorNone) { fprintf(stderr, "Error closing ALACRITY input file, aborting (output should still be correct)\n"); return 1; } return 0; }
// Lists the bins and their boundaries in the given ALStore, partition by partition int showbinsCommandShowBins(ALStore *infile) { ALPartitionStore pstore; ALMetadata meta; ALError err; bin_id_t i; double *d = malloc(sizeof(double)); uint64_t zero = 0; while (!ALStoreEOF(infile)) { ALStoreOpenPartition(infile, &pstore, true); ALPartitionStoreReadMetadata(&pstore, &meta); assert(meta.datatype == DATATYPE_FLOAT64); printf("Partition %llu has %lu bins and %llu RIDs\n", pstore.partition_num, meta.binLayout.numBins, meta.partitionLength); for (i = 0; i < meta.binLayout.numBins; i++) { REJOIN_DATUM_BITS(d, 8, meta.significantBits, meta.binLayout.binValues[i], 0); //*(uint64_t*)&d = meta.binLayout.binValues[i] << (64 - meta.significantBits); printf("%+.6e[%10lu] = %10lu (frac. %.6e) (cum.frac. %.6e)\n", *d, i, meta.binLayout.binStartOffsets[i+1] - meta.binLayout.binStartOffsets[i], (double)(meta.binLayout.binStartOffsets[i+1] - meta.binLayout.binStartOffsets[i]) / meta.partitionLength, (double)(meta.binLayout.binStartOffsets[i+1] - meta.binLayout.binStartOffsets[0]) / meta.partitionLength); } ALPartitionStoreClose(&pstore); free(meta.binLayout.binStartOffsets); free(meta.binLayout.binValues); if (meta.indexMeta.indexForm == ALCompressedInvertedIndex) free(meta.indexMeta.u.ciim.indexBinStartOffsets); } free(d); dbprintf("Closing ALACRITY input files...\n"); if (ALStoreClose(infile) != ALErrorNone) { fprintf(stderr, "Error closing ALACRITY input file, aborting (output should still be correct)\n"); return 1; } return 0; }
int main(int argc, char **argv) { if (argc < 4 || argc > 5) { fprintf(stderr, "Usage: %s <input path & base name> <low val> <high val> [<use CII? default false>]\n", argv[0]); return 1; } int i = 1; const char *infilename = argv[i++]; // const char *outfilenamebase = argv[2]; double lval = atof(argv[i++]); double uval = atof(argv[i++]); _Bool useCII = argc >= 5 ? atoi(argv[i++]) > 0 : false; ALStore store; ALStoreOpenPOSIX(&store,infilename , "r", USE_LEGACY_FORMAT); ALGlobalMetadata gmeta; ALStoreGetGlobalMetadata(&store, &gmeta); // printf("number of partition %llu \n", gmeta.num_partitions); ALQueryEngine qe; ALQueryEngineInit(&qe, &store, true); printf("Performing query for values in range %lf to %lf...\n", lval, uval); ALUnivariateQuery uniquery; ALUnivariateQueryResult result; double s_time , e_time; ALQueryEngineStartUnivariateDoubleQuery(&qe, lval, uval, VALUE_RETRIEVAL_QUERY_TYPE, &uniquery); s_time = dclock1(); while (ALQueryNextResult(&uniquery, &result)) { // printf("Read %llu results\n", result.resultCount); ALQueryResultDestroy(&result); } e_time = dclock1(); ALStoreClose(&store); printf("Uniquery time performance %f \n ", e_time - s_time); return 0; }
// Encodes the data from infile into the ALFileStore outfile, closing both afterward int encodeCommandEncode(FILE *infile, uint64_t infile_len, ALStore *outfile) { const int datatypeLen = ALDatatypeGetSize(OPTIONS.datatype); // Allocate the input buffer uint64_t buflen; if (infile_len > (OPTIONS.part_size_in_elem * datatypeLen)) buflen = (OPTIONS.part_size_in_elem * datatypeLen); else buflen = infile_len; void *inbuf = malloc(buflen); // Input buffer // Prepare the output buffer (encoder) ALIndexForm index_form = OPTIONS.index_form; if (index_form >= ALCompressedInvertedIndex) // all other compressed index based on inverted index index_form = ALInvertedIndex; ALEncoderConfig econfig; ALEncoderConfigure(&econfig, OPTIONS.significant_bits, OPTIONS.datatype, index_form); ALPartitionData partdata; // Output buffer size_t bytesread; int i = 0; double s ; encode_time = 0; compress_time = 0; write_time = 0; total_time = 0; double ss = dclock(); while (!feof(infile) && !ferror(infile) && (bytesread = fread(inbuf, 1, buflen, infile)) != 0) { i++; dbprintf("Encoding partition %d with size %llu...\n", i, bytesread); s = dclock(); ALEncode(&econfig, inbuf, bytesread / datatypeLen, &partdata); encode_time = encode_time + (dclock() - s); s = dclock(); if (OPTIONS.index_form >= ALCompressedInvertedIndex) // all other compressed index based on inverted index ALConvertIndexForm(&partdata.metadata, &partdata.index, OPTIONS.index_form); compress_time = compress_time + (dclock() - s); s = dclock(); if (ALStoreWritePartition(outfile, &partdata) != ALErrorNone) { fprintf(stderr, "Error appending ALACRITY partition to file, aborting\n"); abort(); return 1; } write_time = write_time + (dclock() - s); ALPartitionDataDestroy(&partdata); // TODO: make this automatic when encoding over an existing partition data dbprintf("Partition %d done!\n", i); } total_time = dclock() - ss; printf("[read: %9.3lf] [encode: %9.3lf] [compress: %9.3lf] [write: %9.3lf] [total: %9.3lf]\n", total_time - (encode_time + compress_time + write_time ), encode_time, compress_time, write_time, total_time); if (ferror(infile)) { fprintf(stderr, "Error reading from input file, aborting\n"); return 1; } // Cleanup free(inbuf); fclose(infile); printf("Encoding complete, %llu bytes of input data successfully encoded into %llu partitions\n", infile_len, outfile->cur_partition); dbprintf("Closing ALACRITY output file...\n"); if (ALStoreClose(outfile) != ALErrorNone) { fprintf(stderr, "Error closing ALACRITY output file, aborting\n"); return 1; } return 0; }