bool DataOStream::_disable() { if( !_enabled ) return false; if( _dataSent ) { _dataSize = _buffer.getSize(); if( !_connections.empty( )) { void* ptr = _buffer.getData() + _bufferStart; const uint64_t size = _buffer.getSize() - _bufferStart; if( size == 0 && _bufferStart == _dataSize && _compressorState == STATE_PARTIAL ) { // OPT: all data has been sent in one compressed chunk _compressorState = STATE_COMPLETE; #ifndef CO_AGGRESSIVE_CACHING _buffer.clear(); #endif } else { _compressorState = STATE_UNCOMPRESSED; _compress( ptr, size, STATE_PARTIAL ); } sendData( ptr, size, true ); // always send to finalize istream } } else if( _buffer.getSize() > 0 ) { _dataSize = _buffer.getSize(); _dataSent = true; EQASSERT( _bufferStart == 0 ); if( !_connections.empty( )) { _compressorState = STATE_UNCOMPRESSED; _compress( _buffer.getData(), _dataSize, STATE_COMPLETE ); sendData( _buffer.getData(), _dataSize, true ); } } #ifndef CO_AGGRESSIVE_CACHING if( !_save ) _buffer.clear(); #endif _enabled = false; return true; }
// Remove any id groups that contain less than the minimum number of ids // and clean up the rule void Rule::_eval () { int start, last, count; start = 0; last = m_addr[start]; count = 0; for (int id = 0; id < m_ids; id++) { int addr = m_addr[id]; // If address if ingored then skip it if (m_flags[addr] == IGNORED) continue; // Address distance must not be greater the the rule allows // else we have moved into another group if (addr <= (last + RULE_MAX_ERROR + 1)) { last = addr; count++; continue; } // End of a match group, check if it meets requirements if (count < RULE_MIN_MATCH) { // Remove this group as is too short while (count > 0) { int a = m_addr[start++]; if (m_flags[a] != IGNORED) { m_flags[a] = IGNORED; count--; } } } last = addr; start = id; count = 1; } // End of a match group, check if it meets requirements if (count < RULE_MIN_MATCH) { // Remove this group as is too short while (count > 0) { int a = m_addr[start++]; if (m_flags[a] != IGNORED) { m_flags[a] = IGNORED; count--; } } } _compress (); }
void ZCompressor::Deflate(uint8 level) { if( _iscompressed || (!size()) || level>9 ) return; char *buf; buf=new char[size()+8]; uint32 newsize=size(),oldsize=size(); reserve(size()+8); _compress((void*)buf, &newsize, (void*)contents(),size(),level); if(!newsize) return; resize(newsize); rpos(0); wpos(0); append(buf,newsize); delete [] buf; _iscompressed=true; _real_size=oldsize; }
bool compress_volume(std::string password, std::string volumename, std::string archivefolder, std::string archivename, const params & p) { if (!utils::dockerVolExists(volumename)) fatal("Can't compress non-existant volume " + volumename); return _compress(password, volumename, archivefolder+"/", archivename,p); }
void DataOStream::_resend() { EQASSERT( !_enabled ); EQASSERT( !_connections.empty( )); EQASSERT( _save ); _compress( _buffer.getData(), _dataSize, STATE_COMPLETE ); sendData( _buffer.getData(), _dataSize, true ); }
bool compress_folder(std::string password, std::string foldername, std::string archivefolder, std::string archivename, const params & p) { if (!utils::fileexists(foldername)) fatal("Can't archive non-existant folder " + foldername); std::string ap = utils::getcanonicalpath(foldername); return _compress(password, ap + "/", archivefolder + "/", archivename, p); }
io_status compressed_pstring::_asciiIn(const char* buf, int size) { // use cached version of compression agent if ( (storage_ptr -> my_oid()).ccode() == MEM_STORAGE_CODE ) agent = (compress_agent_handler*) (new handler(DICT_AGENT_CODE, storage_ptr)); else agent = new compress_agent_handler(compress_agent_id, storage_ptr); pstring::asciiIn(buf, size); // uncompress data part _compress(); return done; }
void DataOStream::_flush() { EQASSERT( _enabled ); if( !_connections.empty( )) { void* ptr = _buffer.getData() + _bufferStart; const uint64_t size = _buffer.getSize() - _bufferStart; _compressorState = STATE_UNCOMPRESSED; _compress( ptr, size, STATE_PARTIAL ); sendData( ptr, size, false ); } _dataSent = true; _resetBuffer(); }
io_status compressed_pstring::_asciiIn(istream& in) { // use cached version of compression agent if ( (storage_ptr -> my_oid()).ccode() == MEM_STORAGE_CODE ) agent = (compress_agent_handler*) (new handler(DICT_AGENT_CODE, storage_ptr)); else agent = new compress_agent_handler(compress_agent_id, storage_ptr); pstring::_asciiIn(in); _compress(); return done; }
static PyObject *py_compress(PyObject *self, PyObject *args) { int src_len, srcpath_len, dst_len, pass_len, level, res; const char * src; const char * srcpath; const char * dst; const char * pass; if (!PyArg_ParseTuple(args, "z#z#z#z#i", &src, &src_len, &srcpath, &srcpath_len, &dst, &dst_len, &pass, &pass_len, &level)) { return PyErr_Format(PyExc_ValueError, "expected arguments are compress(src, srcpath, dst, pass, level)"); } if (src_len < 1) { return PyErr_Format(PyExc_ValueError, "compress src file is None"); } if (srcpath_len > 0) { srcpath_len = 1; } if (dst_len < 1) { return PyErr_Format(PyExc_ValueError, "compress dst file is None"); } if (level < 1 || 9 < level) { level = Z_DEFAULT_COMPRESSION; } if (pass_len < 1) { pass = NULL; } res = _compress(&src, 1, &srcpath, srcpath_len, dst, level, pass, 1, NULL); if (res != ZIP_OK) { return pyerr_msg; } Py_RETURN_NONE; }
int main(const int argc, const char **argv){ initstdio(); #else int zlibrawstdio2(const int argc, const char **argv){ #endif char mode,level; if(argc<2)goto argerror; mode=argv[1][0],level=argv[1][1]; if(!mode)goto argerror; if(mode=='-')mode=argv[1][1],level=argv[1][2]; if(mode!='e'&&mode!='c'&&mode!='d')goto argerror; if(isatty(fileno(stdin))&&isatty(fileno(stdout)))goto argerror; return mode=='d'?_decompress(stdin,stdout):_compress(stdin,stdout,level?level-'0':9); argerror: fprintf(stderr,"zlibrawstdio2 e/d < in > out\nYou can also use -e,-c,-d.\n"); if(!lzmaOpen7z())fprintf(stderr,"\nNote: 7-zip is AVAILABLE.\n"),lzmaClose7z(); else fprintf(stderr,"\nNote: 7-zip is NOT available.\n"); return -1; }
void WorldPacket::compress(Opcodes opcode) { if (opcode == UNKNOWN_OPCODE) return; Opcodes uncompressedOpcode = GetOpcode(); uint32 size = wpos(); uint32 destsize = compressBound(size); std::vector<uint8> storage(destsize); _compress(static_cast<void*>(&storage[0]), &destsize, static_cast<const void*>(contents()), size); if (destsize == 0) return; clear(); reserve(destsize + sizeof(uint32)); *this << uint32(size); append(&storage[0], destsize); SetOpcode(opcode); sLog->outStaticDebug("Successfully compressed opcode %u (len %u) to %u (len %u)", uncompressedOpcode, size, opcode, destsize); }
void WorldPacket::compress(uint32 opcode) { if (opcode == OPCODE_NOT_FOUND) // this just doesn't look right, atm not using that define opcode way. return; uint32 uncompressedOpcode = GetOpcode(); uint32 size = wpos(); uint32 destsize = compressBound(size); std::vector<uint8> storage(destsize); _compress(static_cast<void*>(&storage[0]), &destsize, static_cast<const void*>(contents()), size); if (destsize == 0) return; clear(); reserve(destsize + sizeof(uint32)); *this << uint32(size); append(&storage[0], destsize); SetOpcode(opcode); sLog->outStaticDebug("Successfully compressed opcode %u (len %u) to %u (len %u)", uncompressedOpcode, size, opcode, destsize); }
static PyObject *py_compress_multiple(PyObject *self, PyObject *args) { int i; int src_len, srcpath_len, dst_len, pass_len, level, res; PyObject * src, * srcpath; char ** srcs, ** srcspath = NULL; const char * dst; const char * pass; PyObject * str_obj; /* the list of strings */ PyObject * strpath_obj; /* the list of path strings */ PyObject * progress_cb_obj = NULL; if (!PyArg_ParseTuple(args, "O!O!z#z#i|O", &PyList_Type, &src, &PyList_Type, &srcpath, &dst, &dst_len, &pass, &pass_len, &level, &progress_cb_obj)) { return PyErr_Format(PyExc_ValueError, "expected arguments are " "compress_multiple([src], [srcpath], dst, pass, level)"); } src_len = PyList_Size(src); if (src_len < 1) { return PyErr_Format(PyExc_ValueError, "compress src file is None"); } srcpath_len = PyList_Size(srcpath); if (srcpath_len < 1) { srcpath = NULL; } else if (srcpath_len != src_len) { return PyErr_Format(PyExc_ValueError, "compress src file list has different length " "than src file path list"); } if (dst_len < 1) { return PyErr_Format(PyExc_ValueError, "compress dst file is None"); } if (level < 1 || 9 < level) { level = Z_DEFAULT_COMPRESSION; } if (pass_len < 1) { pass = NULL; } if (progress_cb_obj != NULL) { if (!PyFunction_Check(progress_cb_obj)) { return PyErr_Format(PyExc_ValueError, "progress must be function or None"); } } for (i = 0; i < src_len; i++) { str_obj = PyList_GetItem(src, i); #if PY_MAJOR_VERSION >= 3 if (!PyUnicode_Check(str_obj)) #else if (!PyString_Check(str_obj) && !PyUnicode_Check(str_obj)) #endif { return PyErr_Format(PyExc_ValueError, "[src] elements must be strings"); } } srcs = (char **)malloc(src_len * sizeof(char *)); if (srcs == NULL) { return PyErr_NoMemory(); } for (i = 0; i < src_len; i++) { str_obj = PyList_GetItem(src, i); #if PY_MAJOR_VERSION >= 3 srcs[i] = PyUnicode_AsUTF8(str_obj); #else srcs[i] = PyString_AsString(str_obj); #endif } if (srcpath) { for (i = 0; i < srcpath_len; i++) { strpath_obj = PyList_GetItem(srcpath, i); #if PY_MAJOR_VERSION >= 3 if (!PyUnicode_Check(strpath_obj)) #else if (!PyString_Check(strpath_obj) && !PyUnicode_Check(strpath_obj)) #endif { return PyErr_Format(PyExc_ValueError, "[srcpath] elements must be strings"); } } srcspath = (char **)malloc(srcpath_len * sizeof(char *)); if (srcspath == NULL) { return PyErr_NoMemory(); } for (i = 0; i < srcpath_len; i++) { strpath_obj = PyList_GetItem(srcpath, i); #if PY_MAJOR_VERSION >= 3 srcspath[i] = PyUnicode_AsUTF8(strpath_obj); #else srcspath[i] = PyString_AsString(strpath_obj); #endif } } res = _compress((const char **)srcs, src_len, (const char **)srcspath, srcpath_len, dst, level, pass, 1, progress_cb_obj); // cleanup free up heap allocated memory free(srcs); if (srcspath) free(srcspath); if (res != ZIP_OK) { return pyerr_msg; } Py_RETURN_NONE; }
std::shared_ptr<BaseEncodedSegment> _on_encode(const AnySegmentIterable<pmr_string> segment_iterable, const PolymorphicAllocator<pmr_string>& allocator) { /** * First iterate over the values for two reasons. * 1) If all the strings are empty LZ4 will try to compress an empty vector which will cause a segmentation fault. * In this case we can and need to do an early exit. * 2) Sum the length of the strings to improve the performance when copying the data to the char vector. */ auto num_chars = size_t{0u}; segment_iterable.with_iterators([&](auto it, auto end) { for (; it != end; ++it) { if (!it->is_null()) { num_chars += it->value().size(); } } }); // copy values and null flags from value segment auto values = pmr_vector<char>{allocator}; values.reserve(num_chars); auto null_values = pmr_vector<bool>{allocator}; /** * If the null value vector only contains the value false, then the value segment does not have any row value that * is null. In that case, we don't store the null value vector to reduce the LZ4 segment's memory footprint. */ auto segment_contains_null = false; /** * These offsets mark the beginning of strings (and therefore end of the previous string) in the data vector. * These offsets are character offsets. The string at position 0 starts at the offset stored at position 0, which * will always be 0. * Its exclusive end is the offset stored at position 1 (i.e., offsets[1] - 1 is the last character of the string * at position 0). * In case of the last string its end is determined by the end of the data vector. * * The offsets are stored as 32 bit unsigned integer as opposed to 64 bit (size_t) so that they can later be * compressed via vector compression. */ auto offsets = pmr_vector<uint32_t>{allocator}; /** * These are the lengths of each string. They are needed to train the zstd dictionary. */ auto string_samples_lengths = pmr_vector<size_t>{allocator}; segment_iterable.with_iterators([&](auto it, auto end) { const auto segment_size = std::distance(it, end); null_values.resize(segment_size); offsets.resize(segment_size); string_samples_lengths.resize(segment_size); auto offset = uint32_t{0u}; // iterate over the iterator to access the values and increment the row index to write to the values and null // values vectors auto row_index = size_t{0}; for (; it != end; ++it) { const auto segment_element = *it; const auto contains_null = segment_element.is_null(); null_values[row_index] = contains_null; segment_contains_null = segment_contains_null || contains_null; offsets[row_index] = offset; auto sample_size = size_t{0u}; if (!contains_null) { const auto value = segment_element.value(); const auto string_length = value.size(); values.insert(values.cend(), value.begin(), value.end()); Assert(string_length <= std::numeric_limits<uint32_t>::max(), "The size of string row value exceeds the maximum of uint32 in LZ4 encoding."); offset += static_cast<uint32_t>(string_length); sample_size = string_length; } string_samples_lengths[row_index] = sample_size; ++row_index; } }); auto optional_null_values = segment_contains_null ? std::optional<pmr_vector<bool>>{null_values} : std::nullopt; /** * If the input only contained null values and/or empty strings we don't need to compress anything (and LZ4 will * cause an error). We can also throw away the offsets, since they won't be used for decompression. * We can do an early exit and return the (not encoded) segment. */ if (num_chars == 0) { auto empty_blocks = pmr_vector<pmr_vector<char>>{allocator}; auto empty_dictionary = pmr_vector<char>{}; return std::allocate_shared<LZ4Segment<pmr_string>>(allocator, std::move(empty_blocks), std::move(optional_null_values), std::move(empty_dictionary), nullptr, _block_size, 0u, 0u, null_values.size()); } // Compress the offsets with a vector compression method to reduce the memory footprint of the LZ4 segment. auto compressed_offsets = compress_vector(offsets, vector_compression_type(), allocator, {offsets.back()}); /** * Pre-compute a zstd dictionary if the input data is split among multiple blocks. This dictionary allows * independent compression of the blocks, while maintaining a good compression ratio. * If the input data fits into a single block, training of a dictionary is skipped. */ const auto input_size = values.size(); auto dictionary = pmr_vector<char>{allocator}; if (input_size > _block_size) { dictionary = _train_dictionary(values, string_samples_lengths); } /** * Compress the data and calculate the last block size (which may vary from the block size of the previous blocks) * and the total compressed size. The size of the last block is needed for decompression. The total compressed size * is pre-calculated instead of iterating over all blocks when the memory consumption of the LZ4 segment is * estimated. */ auto lz4_blocks = pmr_vector<pmr_vector<char>>{allocator}; _compress(values, lz4_blocks, dictionary); auto last_block_size = input_size % _block_size != 0 ? input_size % _block_size : _block_size; auto total_compressed_size = size_t{0u}; for (const auto& compressed_block : lz4_blocks) { total_compressed_size += compressed_block.size(); } return std::allocate_shared<LZ4Segment<pmr_string>>( allocator, std::move(lz4_blocks), std::move(optional_null_values), std::move(dictionary), std::move(compressed_offsets), _block_size, last_block_size, total_compressed_size, null_values.size()); }
std::shared_ptr<BaseEncodedSegment> _on_encode(const AnySegmentIterable<T> segment_iterable, const PolymorphicAllocator<T>& allocator) { // TODO(anyone): when value segments switch to using pmr_vectors, the data can be copied directly instead of // copying it element by element auto values = pmr_vector<T>{allocator}; auto null_values = pmr_vector<bool>{allocator}; /** * If the null value vector only contains the value false, then the value segment does not have any row value that * is null. In that case, we don't store the null value vector to reduce the LZ4 segment's memory footprint. */ auto segment_contains_null = false; segment_iterable.with_iterators([&](auto it, auto end) { const auto segment_size = static_cast<size_t>(std::distance(it, end)); values.resize(segment_size); null_values.resize(segment_size); // iterate over the segment to access the values and increment the row index to copy values and null flags auto row_index = size_t{0u}; for (; it != end; ++it) { const auto segment_value = *it; const auto contains_null = segment_value.is_null(); values[row_index] = segment_value.value(); null_values[row_index] = contains_null; segment_contains_null = segment_contains_null || contains_null; ++row_index; } }); auto optional_null_values = segment_contains_null ? std::optional<pmr_vector<bool>>{null_values} : std::nullopt; /** * Pre-compute a zstd dictionary if the input data is split among multiple blocks. This dictionary allows * independent compression of the blocks, while maintaining a good compression ratio. * If the input data fits into a single block, training of a dictionary is skipped. */ const auto input_size = values.size() * sizeof(T); auto dictionary = pmr_vector<char>{}; if (input_size > _block_size) { dictionary = _train_dictionary(values); } /** * Compress the data and calculate the last block size (which may vary from the block size of the previous blocks) * and the total compressed size. The size of the last block is needed for decompression. The total compressed * size is pre-calculated instead of iterating over all blocks when the memory consumption of the LZ4 segment is * estimated. */ auto lz4_blocks = pmr_vector<pmr_vector<char>>{allocator}; auto total_compressed_size = size_t{0u}; auto last_block_size = size_t{0u}; if (!values.empty()) { _compress(values, lz4_blocks, dictionary); last_block_size = input_size % _block_size != 0 ? input_size % _block_size : _block_size; for (const auto& compressed_block : lz4_blocks) { total_compressed_size += compressed_block.size(); } } return std::allocate_shared<LZ4Segment<T>>(allocator, std::move(lz4_blocks), std::move(optional_null_values), std::move(dictionary), _block_size, last_block_size, total_compressed_size, values.size()); }
int main(const int argc, const char **argv){ initstdio(); #else int _7ciso(const int argc, const char **argv){ #endif int cmode=0,mode=0; int zlib=0,sevenzip=0,zopfli=0,miniz=0,slz=0,libdeflate=0; int threshold=100; poptContext optCon; int optc; struct poptOption optionsTable[] = { //{ "longname", "shortname", argInfo, *arg, int val, description, argment description} { "stdout", 'c', POPT_ARG_NONE, &cmode, 0, "stdout (currently ignored)", NULL }, { "zlib", 'z', POPT_ARG_INT|POPT_ARGFLAG_OPTIONAL, NULL, 'z', "1-9 (default 6) zlib", "level" }, { "miniz", 'm', POPT_ARG_INT|POPT_ARGFLAG_OPTIONAL, NULL, 'm', "1-2 (default 1) miniz", "level" }, { "slz", 's', POPT_ARG_INT|POPT_ARGFLAG_OPTIONAL, NULL, 's', "1-1 (default 1) slz", "level" }, { "libdeflate", 'l', POPT_ARG_INT|POPT_ARGFLAG_OPTIONAL, NULL, 'l', "1-12 (default 6) libdeflate", "level" }, { "7zip", 'S', POPT_ARG_INT|POPT_ARGFLAG_OPTIONAL, NULL, 'S', "1-9 (default 2) 7zip", "level" }, { "zopfli", 'Z', POPT_ARG_INT, &zopfli, 0, "zopfli", "numiterations" }, { "threshold", 't', POPT_ARG_INT, &threshold, 0, "compression threshold (in %, 10-100)", "threshold" }, { "decompress", 'd', POPT_ARG_NONE, &mode, 0, "decompress", NULL }, POPT_AUTOHELP, POPT_TABLEEND, }; optCon = poptGetContext(argv[0], argc, argv, optionsTable, 0); poptSetOtherOptionHelp(optCon, "{-z9 dec.iso enc.cso} or {-cd <enc.cso >dec.iso}"); for(;(optc=poptGetNextOpt(optCon))>=0;){ switch(optc){ case 'z':{ char *arg=poptGetOptArg(optCon); if(arg)zlib=strtol(arg,NULL,10),free(arg); else zlib=6; break; } case 'm':{ char *arg=poptGetOptArg(optCon); if(arg)miniz=strtol(arg,NULL,10),free(arg); else miniz=1; break; } case 's':{ char *arg=poptGetOptArg(optCon); if(arg)slz=strtol(arg,NULL,10),free(arg); else slz=1; break; } case 'l':{ char *arg=poptGetOptArg(optCon); if(arg)libdeflate=strtol(arg,NULL,10),free(arg); else libdeflate=1; break; } case 'S':{ char *arg=poptGetOptArg(optCon); if(arg)sevenzip=strtol(arg,NULL,10),free(arg); else sevenzip=2; break; } } } int level_sum=zlib+sevenzip+zopfli+miniz+slz+libdeflate; if( optc<-1 || (!mode&&!zlib&&!sevenzip&&!zopfli&&!miniz&&!slz&&!libdeflate) || (mode&&(zlib||sevenzip||zopfli||miniz||slz||libdeflate)) || (!mode&&(level_sum==zlib)+(level_sum==sevenzip)+(level_sum==zopfli)+(level_sum==miniz)+(level_sum==slz)+(level_sum==libdeflate)!=1) ){ poptPrintHelp(optCon, stderr, 0); poptFreeContext(optCon); if(!lzmaOpen7z())fprintf(stderr,"\nNote: 7-zip is AVAILABLE.\n"),lzmaClose7z(); else fprintf(stderr,"\nNote: 7-zip is NOT available.\n"); return 1; } if(mode){ if(isatty(fileno(stdin))||isatty(fileno(stdout))) {poptPrintHelp(optCon, stderr, 0);poptFreeContext(optCon);return -1;} poptFreeContext(optCon); //lzmaOpen7z(); int ret=_decompress(stdin,stdout); //lzmaClose7z(); return ret; }else{ if(threshold<10)threshold=10; if(threshold>100)threshold=100; const char *fname=poptGetArg(optCon); if(!fname){poptPrintHelp(optCon, stderr, 0);poptFreeContext(optCon);return -1;} FILE *in=fopen(fname,"rb"); if(!in){fprintf(stderr,"failed to open %s\n",fname);poptFreeContext(optCon);return 2;} fname=poptGetArg(optCon); if(!fname){poptPrintHelp(optCon, stderr, 0);poptFreeContext(optCon);return -1;} FILE *out=fopen(fname,"wb"); if(!out){fclose(in);fprintf(stderr,"failed to open %s\n",fname);poptFreeContext(optCon);return 2;} poptFreeContext(optCon); fprintf(stderr,"compression level = %d ",level_sum); int ret=0; if(zlib){ fprintf(stderr,"(zlib)\n"); ret=_compress(in,out,zlib,DEFLATE_ZLIB,threshold); }else if(sevenzip){ fprintf(stderr,"(7zip)\n"); if(lzmaOpen7z()){ fprintf(stderr,"7-zip is NOT available.\n"); return -1; } ret=_compress(in,out,sevenzip,DEFLATE_7ZIP,threshold); lzmaClose7z(); }else if(zopfli){ fprintf(stderr,"(zopfli)\n"); ret=_compress(in,out,zopfli,DEFLATE_ZOPFLI,threshold); }else if(miniz){ fprintf(stderr,"(miniz)\n"); ret=_compress(in,out,miniz,DEFLATE_MINIZ,threshold); }else if(slz){ fprintf(stderr,"(slz)\n"); ret=_compress(in,out,slz,DEFLATE_SLZ,threshold); }else if(libdeflate){ fprintf(stderr,"(libdeflate)\n"); ret=_compress(in,out,libdeflate,DEFLATE_LIBDEFLATE,threshold); } fclose(in),fclose(out); return ret; } }