DICT *RunRepair(DICT *dict, FILE *input, int length, unsigned int shared_dictsize, unsigned int codewordlength, USEDCHARTABLE *ut, unsigned int chunk_size, unsigned int sample) { RDS *rds; // DICT *dict; PAIR *max_pair; PAIR *target; CODE new_code; uint num_replaced, cseqlen, numsymbol; uint width; uint i; if (sample) rds = createRDSwithsampling(input, length, chunk_size); else rds = createRDS(input, length); // dict = createDict(rds->txt_len); if (!rds) return NULL; pqidx = 0; cseqlen = rds->txt_len; num_replaced = 0; if (dict->num_rules - CHAR_SIZE + ut->size > shared_dictsize) { dict->num_rules = shared_dictsize + CHAR_SIZE - ut->size; } // 現在の辞書を使って変換する for (i = CHAR_SIZE; i < dict->num_rules; i++) { target = locatePair(rds, dict->rule[i].left, dict->rule[i].right); if (target) { // printf("%u %u -> %u\n", target->left, target->right, i); cseqlen -= replacePairs(rds, target, i); } } // putchar('\n'); while ((max_pair = getMaxPair(rds)) != NULL && (unsigned int)(dict->num_rules + ut->size - CHAR_SIZE) < (1U << codewordlength)) { new_code = addNewPair(dict, max_pair); // printf("%u %u -> %u\n", max_pair->left, max_pair->right, new_code); // printf("%u\n", new_code); cseqlen -= replacePairs(rds, max_pair, new_code); } getCompSeq(rds, dict); /* for (i = 0; i < dict->seq_len; i++) { */ /* printf("%d ", dict->comp_seq[i]); */ /* } */ /* putchar('\n'); */ destructRDS(rds); return dict; }
/* This function will be called when the service loaded */ int search_record_init_service(ci_service_xdata_t * srv_xdata, struct ci_server_conf *server_conf) { ci_debug_printf(5, "Initialization of Search Record module......\n"); /*Tell to the icap clients that we can support up to 1024 size of preview data*/ ci_service_set_preview(srv_xdata, 128); /*Tell to the icap clients that we support 204 responses*/ ci_service_enable_204(srv_xdata); /*Tell to the icap clients to send preview data for all files*/ ci_service_set_transfer_preview(srv_xdata, "*"); /*Tell to the icap clients that we want the X-Authenticated-User and X-Authenticated-Groups headers which contains the username and the groups in which belongs. */ /*ci_service_set_xopts(srv_xdata, CI_XAUTHENTICATEDUSER|CI_XAUTHENTICATEDGROUPS);*/ initPairs(&search_engine); addNewPair(&search_engine,baidu_matchurl,baidu_wordlist,baidu_site); addNewPair(&search_engine,google_matchurl,google_wordlist,google_site); return CI_OK; }
DICT *RunCodeRepair(FILE *input, uint code_len, uint cont_len, uint mchar_size) { uint i, j; CRDS *crds; DICT *dict; CODE new_code; PAIR **mp_ary; uint limit = (uint)pow(2, code_len); uint num_loop, num_replaced; uint t_num_rules, c_seq_len; double comp_ratio; //initialization #ifdef DISPLAY printf("\n"); printf("Initializing ...\n"); #endif crds = createCRDS(input, cont_len, mchar_size); dict = createDict(crds, code_len); #ifdef DISPLAY printf("///////////////////////////////////////\n"); printf(" Input text size = %d (bytes).\n", crds->txt_len); printf(" Alphabet size = %d.\n", crds->char_size); printf(" # of contexts = %d.\n", crds->num_contexts); printf(" Code length = %d (bits).\n", code_len); printf(" # of new_code = %d.\n", limit - crds->char_size); printf("///////////////////////////////////////\n"); printf("\n"); printf("Compressing text ...\n"); #endif mp_ary = (PAIR **)malloc(sizeof(PAIR *) * (crds->num_contexts + 1)); num_loop = 0; num_replaced = 0; new_code = crds->char_size; t_num_rules = 0; c_seq_len = crds->txt_len; //select replaced pairs while (new_code < limit) { for (i = 0; i <= crds->num_contexts; i++) { mp_ary[i] = NULL; } for (i = 0; i < crds->num_contexts; i++) { mp_ary[i] = getMaxPair(crds, i); } //sort mp_ary by frequencies. qsort(mp_ary, crds->num_contexts + 1, sizeof(PAIR *), (int(*)(const void *, const void *))comparePair); //if mp_ary is empty, then break. if (mp_ary[0] == NULL) break; //replace pairs for (i = 0; mp_ary[i] != NULL; i++) { addNewPair(dict, new_code, mp_ary[i]); c_seq_len -= replacePairs(crds, mp_ary[i], new_code); t_num_rules++; } #ifdef DISPLAY comp_ratio = calCompRatio(crds->txt_len, crds->char_size, crds->cont_len, crds->num_contexts, c_seq_len, t_num_rules, code_len, false); printf("\r"); printf("new_code = [%5d], Comp.ratio = %0.3f %%.",new_code, comp_ratio); fflush(stdout); #endif //free replaced pairs for (i = 0; mp_ary[i] != NULL; i++) { destructPair(crds, mp_ary[i]); } //free unused pairs for (i = 0; i < crds->num_contexts; i++) { for (j = 1; j < THRESHOLD; j++) { deletePQ(crds, j, i); } } new_code++; } #ifdef DISPLAY printf("\n"); calCompRatio(crds->txt_len, crds->char_size, crds->cont_len, crds->num_contexts, c_seq_len, t_num_rules, code_len, true); #endif //post processing copyCompSeq(crds, dict); free(mp_ary); destructCRDS(crds); return dict; }