int ppmd_state_init(void **data, int *level, int alloc) { CPpmd8 *_ppmd; pthread_mutex_lock(&mem_init_lock); if (!mem_inited) { slab_cache_add(sizeof (CPpmd8)); slab_cache_add(ppmd8_mem_sz[*level]); } pthread_mutex_unlock(&mem_init_lock); _ppmd = (CPpmd8 *)slab_alloc(NULL, sizeof (CPpmd8)); if (!_ppmd) return (-1); /* Levels 0 - 14 correspond to PPMd model orders 0 - 14. */ if (*level > 14) *level = 14; _ppmd->Order = *level; _ppmd->Base = 0; _ppmd->Size = 0; *data = _ppmd; if (*level > 9) *level = 9; if (alloc) return (ppmd_alloc(*data)); return (0); }
int adapt_compress(void *src, uint64_t srclen, void *dst, uint64_t *dstlen, int level, uchar_t chdr, int btype, void *data) { struct adapt_data *adat = (struct adapt_data *)(data); uchar_t *src1 = (uchar_t *)src; int rv = 0, bsc_type = 0; if (btype == TYPE_UNKNOWN) { uint64_t i, tot8b, tag1, tag2, tag3; double tagcnt, pct_tag; uchar_t cur_byte, prev_byte; /* * Count number of 8-bit binary bytes and XML tags in source. */ tot8b = 0; tag1 = 0; tag2 = 0; tag3 = 0; prev_byte = cur_byte = 0; for (i = 0; i < srclen; i++) { cur_byte = src1[i]; tot8b += (cur_byte & 0x80); // This way for possible auto-vectorization tag1 += (cur_byte == '<'); tag2 += (cur_byte == '>'); tag3 += ((prev_byte == '<') & (cur_byte == '/')); tag3 += ((prev_byte == '/') & (cur_byte == '>')); if (cur_byte != ' ') prev_byte = cur_byte; } tot8b /= 0x80; tagcnt = tag1 + tag2 + tag3; pct_tag = tagcnt / (double)srclen; if (adat->adapt_mode == 2 && tot8b > FORTY_PCT(srclen)) { btype = TYPE_BINARY; } else if (adat->adapt_mode == 1 && tot8b > FIFTY_PCT(srclen)) { btype = TYPE_BINARY; } else { btype = TYPE_TEXT; if (tag1 > tag2 - 4 && tag1 < tag2 + 4 && tag3 > (double)tag1 * 0.40 && tagcnt > (double)srclen * 0.001) btype |= TYPE_MARKUP; } } /* * Use PPMd if some percentage of source is 7-bit textual bytes, otherwise * use Bzip2 or LZMA. For totally incompressible data we always use LZ4. There * is no point trying to compress such data, like Jpegs. However some archive headers * and zero paddings can exist which LZ4 can easily take care of very fast. */ #ifdef ENABLE_PC_LIBBSC bsc_type = is_bsc_type(btype); #endif if (is_incompressible(btype)) { rv = lz4_compress(src, srclen, dst, dstlen, level, chdr, btype, adat->lz4_data); if (rv < 0) return (rv); rv = ADAPT_COMPRESS_LZ4; lz4_count++; } else if (adat->adapt_mode == 2 && PC_TYPE(btype) == TYPE_BINARY && !bsc_type) { rv = lzma_compress(src, srclen, dst, dstlen, level, chdr, btype, adat->lzma_data); if (rv < 0) return (rv); rv = ADAPT_COMPRESS_LZMA; lzma_count++; } else if (adat->adapt_mode == 1 && PC_TYPE(btype) == TYPE_BINARY && !bsc_type) { rv = bzip2_compress(src, srclen, dst, dstlen, level, chdr, btype, NULL); if (rv < 0) return (rv); rv = ADAPT_COMPRESS_BZIP2; bzip2_count++; } else { #ifdef ENABLE_PC_LIBBSC if (adat->bsc_data && bsc_type) { rv = libbsc_compress(src, srclen, dst, dstlen, level, chdr, btype, adat->bsc_data); if (rv < 0) return (rv); rv = ADAPT_COMPRESS_BSC; bsc_count++; } else { #endif rv = ppmd_alloc(adat->ppmd_data); if (rv < 0) return (rv); rv = ppmd_compress(src, srclen, dst, dstlen, level, chdr, btype, adat->ppmd_data); ppmd_free(adat->ppmd_data); if (rv < 0) return (rv); rv = ADAPT_COMPRESS_PPMD; ppmd_count++; #ifdef ENABLE_PC_LIBBSC } #endif } return (rv); }