int _colenc(struct BITSTREAM *bs, int n, const double *store, int col, double scale, int Ml2) { #if 0 double h[2]; double emax = 0.0; #endif int i; int nbits = bufferSize(bs); struct DELTACODE dc; writeManyBits(bs, (int)scale, 32); writeBits(bs, Ml2, 8); #if 1 initDeltaCode(&dc, scale, Ml2); for (i=0; i<n; i++) { deltaEncode(&dc, bs, store[6*i+col]); } #else h[0] = h[1] = 0 ; for (i=0; i<n; i++) { double p = 2.0*h[0]-h[1]; double d = floor(0.5+scale*(store[6*i+col]-p)); double e = fabs(p+d/scale-store[6*i+col]); printf("p = %1.16lf, writ d=%1.0lf, Ml2=%d\n",p,d,Ml2); grsCode(bs, d, Ml2); h[1] = h[0]; h[0] = p+d/scale; if (e>emax) emax = e; } printf("emax = %le\n",emax); #endif return bufferSize(bs) - nbits; }
void lemur::index::InvDocList::binWriteC(ofstream& of) { lemur::api::COUNT_T len= end-begin; lemur::api::COUNT_T diff = lastid-begin; of.write((const char*) &uid, sizeof(lemur::api::TERMID_T)); of.write((const char*) &df, LOC_Tsize); of.write((const char*) &diff, LOC_Tsize); deltaEncode(); // compress it // it's ok to make comp the same size. the compressed will be smaller // unsigned char* comp = (unsigned char*) malloc(len*LOC_Tsize); // use new/delete so an exception will be thrown if out of memory unsigned char* comp = new unsigned char[len*LOC_Tsize]; int compbyte = lemur::utility::RVLCompress::compress_ints((int *)begin, comp, len); // write out the compressed bits of.write((const char*) &compbyte, LOC_Tsize); of.write((const char*) comp, compbyte); // of.write((const char*) &len, LOC_Tsize); //of.write((const char*) begin, sizeof(LOC_T)*len); // free(comp); delete[](comp); }
/** 使用RVL算法对inverted linkdocument list 进行压缩 */ lemur::api::LOC_T* link::api::InvLinkDocList::compInvLinkDocList(lemur::api::COUNT_T &vecLength) { int len = end - begin; int diff = lastid - begin; deltaEncode(); unsigned char* data = new unsigned char[(len+4)*sizeof(lemur::api::LOC_T)]; vecLength = lemur::utility::RVLCompress::compress_ints((int *)begin, data+4*LOC_Tsize, len); lemur::api::LOC_T* temp = (lemur::api::LOC_T*)data; temp[0] = ulid;//__64int to int possible loss of data. how to solve this bug??? temp[1] = df; temp[2] = diff; temp[3] = vecLength; vecLength = vecLength + 4*LOC_Tsize; return temp; }