uint64_t require(uint64_t len) const { if (len > UINT32_MAX) THROW_ENCODING_EXCEPTION( "BinaryInterpolative only supports 32-bit length"); /* FIXME: Fill correct the required size */ return len; }
void decodeArray(const uint32_t *in, uint64_t len, uint32_t *out, uint64_t nvalue) const { if (len > UINT32_MAX || nvalue > UINT32_MAX) THROW_ENCODING_EXCEPTION( "BinaryInterpolative only supports 32-bit length"); BitsReader rd(in + 1, len - 1); rd.intrpolatvArray(out, nvalue, 0, 0, *in); }
void encodeArray(const uint32_t *in, uint64_t len, uint32_t *out, uint64_t *nvalue) const { if (len > UINT32_MAX || *nvalue > UINT32_MAX) THROW_ENCODING_EXCEPTION( "BinaryInterpolative only supports 32-bit length"); /* Write a maximum value in the head of out */ out[0] = in[len - 1]; /* Do actual binary interpolative code */ BitsWriter wt(out + 1, len - 1); wt.intrpolatvArray(in, len, 0, 0, in[len - 1]); wt.flush_bits(); *nvalue = wt.size() + 1; }
EncodingPtr EncodingFactory::create(const int policy) { switch (policy) { case E_N_GAMMA: { return EncodingPtr( static_cast<internals::EncodingBase *>( new internals::N_Gamma())); break; } case E_F_GAMMA: { return EncodingPtr( static_cast<internals::EncodingBase *>( new internals::F_Gamma())); break; } case E_FU_GAMMA: { return EncodingPtr( static_cast<internals::EncodingBase *>( new internals::FU_Gamma())); break; } case E_N_DELTA: { return EncodingPtr( static_cast<internals::EncodingBase *>( new internals::N_Delta())); break; } case E_F_DELTA: { return EncodingPtr( static_cast<internals::EncodingBase *>( new internals::F_Delta())); break; } case E_FU_DELTA: { return EncodingPtr( static_cast<internals::EncodingBase *>( new internals::FU_Delta())); break; } case E_FG_DELTA: { return EncodingPtr( static_cast<internals::EncodingBase *>( new internals::FG_Delta())); break; } case E_VARIABLEBYTE: { return EncodingPtr( static_cast<internals::EncodingBase *>( new internals::VariableByte())); break; } case E_BINARYIPL: { return EncodingPtr( static_cast<internals::EncodingBase *>( new internals::BinaryInterpolative())); break; } case E_SIMPLE9: { return EncodingPtr( static_cast<internals::EncodingBase *>( new internals::Simple9())); break; } case E_SIMPLE16: { return EncodingPtr( static_cast<internals::EncodingBase *>( new internals::Simple16())); break; } case E_P4D: { return EncodingPtr( static_cast<internals::EncodingBase *>( new internals::PForDelta())); break; } case E_OPTP4D: { return EncodingPtr( static_cast<internals::EncodingBase *>( new internals::OPTPForDelta())); break; } case E_VSEBLOCKS: { return EncodingPtr( static_cast<internals::EncodingBase *>( new internals::VSEncodingBlocks())); break; } case E_VSER: { return EncodingPtr( static_cast<internals::EncodingBase *>( new internals::VSE_R())); break; } case E_VSEREST: { return EncodingPtr( static_cast<internals::EncodingBase *>( new internals::VSEncodingRest())); break; } case E_VSEHYB: { return EncodingPtr( static_cast<internals::EncodingBase *>( new internals::VSEncodingBlocksHybrid())); break; } case E_VSESIMPLE: { return EncodingPtr( static_cast<internals::EncodingBase *>( new internals::VSEncodingSimple())); break; } } THROW_ENCODING_EXCEPTION("Invalid value: policy"); }
void VSEncodingNaive::encodeArray(const uint32_t *in, uint64_t len, uint32_t *out, uint64_t *nvalue) const { if (in == NULL) THROW_ENCODING_EXCEPTION("Invalid input: in"); if (len == 0) THROW_ENCODING_EXCEPTION("Invalid input: len"); if (out == NULL) THROW_ENCODING_EXCEPTION("Invalid input: out"); if (*nvalue == 0) THROW_ENCODING_EXCEPTION("Invalid input: nvalue"); ASSERT_ADDR(in, len);ASSERT_ADDR(out, *nvalue); /* Compute optimal partition */ std::vector<uint32_t> logs; for (uint64_t i = 0; i < len; i++) { // MSB的作用是返回左起第一个1之前的0的个数, // 32-MSB(x)并不表示ceiling(log(x)),而是表示二进制x需要多少位描述的意思 // uint32_t msb = MSB32(in[i]); logs.push_back(VSENAIVE_REMAPLOGS[32 - MSB32(in[i])]); } ASSERT(logs.size() == len); #ifdef PARTITIONWITHOP //使用近似划分 // 不同于DP版本的(k,b) tuple,这里k和b分开存储 optimal_partition op(logs, 64); std::vector<uint64_t> parts = op.partition; std::vector<uint32_t> bParts = op.Bs; /* for (int i = 0; i < op.Bs.size(); i++) { std::cout << parts[i] << std::endl; std::cout << op.Ks[i] << std::endl; std::cout << bParts[i] << std::endl; } std::cout << parts[parts.size() - 1] << std::endl;*/ uint64_t csize = *nvalue - 2; // 使用Simple16来压缩所有的K值 Simple16 simp; /* Write the values of K */ simp.encodeArray(op.Ks.data(), op.Ks.size(), out + 2, &csize); //分别Simple16的存储压缩大小和原始大小 BYTEORDER_FREE_STORE32(out, csize); BYTEORDER_FREE_STORE32(out + 1, op.Ks.size()); /* for (int i = 0; i < csize + 2; i++) { std::cout << out[i] << std::endl; }*/ out += csize + 2; /* std::cout << out[0] << std::endl;*/ BitsWriter wt(out, *nvalue - csize - 2); uint64_t num = parts.size() - 1; for (uint64_t i = 0; i < num; i++) { /* Write the value of B*/ wt.write_bits(VSENAIVE_CODELOGS[bParts[i]], VSENAIVE_LOGLOG); /* Write integers */ for (uint64_t j = parts[i]; j < parts[i + 1]; j++) wt.write_bits(in[j], bParts[i]); } wt.flush_bits(); *nvalue = wt.size() + 2 + csize; #else //使用动态规划 std::vector<uint32_t> parts; ASSERT(parts.size() == 0); // 计算出每一个block中的k值,存储在parts链表中 vdp_->computePartition(logs, &parts, VSENAIVE_LOGLEN + VSENAIVE_LOGLOG); ASSERT(parts.size() > 1); /* Ready to write data */ BitsWriter wt(out, *nvalue); uint64_t num = parts.size() - 1; for (uint64_t i = 0; i < num; i++) { /* Compute max B in the block */ /*在每个block中计算出所需要的b的值*/ uint32_t maxB = 0; for (auto j = parts[i]; j < parts[i + 1]; j++) { if (maxB < logs[j]) maxB = logs[j]; } /* Write the value of B and K */ wt.write_bits(VSENAIVE_CODELOGS[maxB], VSENAIVE_LOGLOG); wt.write_bits(VSENAIVE_CODELENS[parts[i + 1] - parts[i]], VSENAIVE_LOGLEN); /* Write integers */ for (uint64_t j = parts[i]; j < parts[i + 1]; j++) wt.write_bits(in[j], maxB); } wt.flush_bits(); *nvalue = wt.size(); #endif }
void VSEncodingNaive::decodeArray(const uint32_t *in, uint64_t len, uint32_t *out, uint64_t nvalue) const { if (in == NULL) THROW_ENCODING_EXCEPTION("Invalid input: in"); if (len == 0) THROW_ENCODING_EXCEPTION("Invalid input: len"); if (out == NULL) THROW_ENCODING_EXCEPTION("Invalid input: out"); if (nvalue == 0) THROW_ENCODING_EXCEPTION("Invalid input: nvalue"); ASSERT_ADDR(in, len);ASSERT_ADDR(out, nvalue); uint32_t *oterm = out + nvalue; #ifdef PARTITIONWITHOP //使用近似划分 //读取Simple16的压缩大小和原始大小 uint32_t cmpSize = BYTEORDER_FREE_LOAD32(in); uint32_t leng = BYTEORDER_FREE_LOAD32(in + 1); // for (int i = 0; i < cmpSize + 2; i++) { // std::cout << in[i] << std::endl; // } //读取Simple16压缩的Ks并另外存储 Simple16 simp; // uint32_t Ks[leng]; uint32_t* Ks = (uint32_t*) malloc(leng * 4); simp.decodeArray(in + 2, cmpSize, Ks, nvalue); FILE* stat = fopen("./share/vserOPstatistics", "a"); fwrite(Ks, 4, leng, stat);//每个分块长度 fflush(stat); fclose(stat); // // memcpy(Ks, out, leng * 4); in += cmpSize + 2; BitsReader rd(in, len - cmpSize - 2); uint32_t i = 0; // int count = 0; while (LIKELY(out < oterm)) { uint32_t B = VSENAIVE_LOGS[rd.read_bits(VSENAIVE_LOGLOG)]; for (uint32_t j = 0; j < Ks[i]; j++) { out[j] = (B != 0) ? rd.read_bits(B) : 0; // std::cout << count++ << ":" << out[j] << std::endl; } out += Ks[i++]; } #else BitsReader rd(in, len); FILE* stat = fopen("./share/vserDPstatistics", "a"); while (LIKELY(out < oterm)) { uint32_t B = VSENAIVE_LOGS[rd.read_bits(VSENAIVE_LOGLOG)]; uint32_t K = VSENAIVE_LENS[rd.read_bits(VSENAIVE_LOGLEN)]; fwrite(&K, 4, 1, stat); for (uint32_t i = 0; i < K; i++) { out[i] = (B != 0) ? rd.read_bits(B) : 0; // printf("out[%d]:%d\cmpSize",i,out[i]); } out += K; } fflush(stat); fclose(stat); #endif }