Пример #1
0
DICT *RunRepair(DICT *dict, FILE *input, int length, unsigned int shared_dictsize, unsigned int codewordlength, USEDCHARTABLE *ut, unsigned int chunk_size, unsigned int sample)
{
  RDS  *rds;
  //  DICT *dict;
  PAIR *max_pair;
  PAIR *target;
  CODE new_code;
  uint num_replaced, cseqlen, numsymbol;
  uint width;
  uint i;
 
  if (sample)
    rds = createRDSwithsampling(input, length, chunk_size);
  else
    rds = createRDS(input, length);
  //  dict = createDict(rds->txt_len);
  if (!rds) return NULL;
  pqidx = 0;
  cseqlen = rds->txt_len;

  
  num_replaced = 0;
  if (dict->num_rules - CHAR_SIZE + ut->size > shared_dictsize) {
    dict->num_rules = shared_dictsize + CHAR_SIZE - ut->size;
  }

  // 現在の辞書を使って変換する
  for (i = CHAR_SIZE; i < dict->num_rules; i++) {
    target = locatePair(rds, dict->rule[i].left, dict->rule[i].right);
    if (target) {
      //      printf("%u %u -> %u\n", target->left, target->right, i);
      cseqlen -= replacePairs(rds, target, i);
    }
  }
  //  putchar('\n');
  while ((max_pair = getMaxPair(rds)) != NULL && (unsigned int)(dict->num_rules + ut->size - CHAR_SIZE) < (1U << codewordlength)) {
    new_code = addNewPair(dict, max_pair);
    //    printf("%u %u -> %u\n", max_pair->left, max_pair->right, new_code);
    //    printf("%u\n", new_code);
    cseqlen -= replacePairs(rds, max_pair, new_code);
  }


  getCompSeq(rds, dict);
  /* for (i = 0; i < dict->seq_len; i++) { */
  /*   printf("%d ", dict->comp_seq[i]); */
  /* } */
  /* putchar('\n'); */
  destructRDS(rds);


  return dict;
}
/* This function will be called when the service loaded  */
int search_record_init_service(ci_service_xdata_t * srv_xdata,
                      struct ci_server_conf *server_conf)
{
     ci_debug_printf(5, "Initialization of Search Record module......\n");
     
     /*Tell to the icap clients that we can support up to 1024 size of preview data*/
     ci_service_set_preview(srv_xdata, 128);

     /*Tell to the icap clients that we support 204 responses*/
     ci_service_enable_204(srv_xdata);

     /*Tell to the icap clients to send preview data for all files*/
     ci_service_set_transfer_preview(srv_xdata, "*");

     /*Tell to the icap clients that we want the X-Authenticated-User and X-Authenticated-Groups headers
       which contains the username and the groups in which belongs.  */
     /*ci_service_set_xopts(srv_xdata,  CI_XAUTHENTICATEDUSER|CI_XAUTHENTICATEDGROUPS);*/
     
	 initPairs(&search_engine);
	 addNewPair(&search_engine,baidu_matchurl,baidu_wordlist,baidu_site);
	 addNewPair(&search_engine,google_matchurl,google_wordlist,google_site);
     return CI_OK;
}
Пример #3
0
DICT *RunCodeRepair(FILE *input, uint code_len, uint cont_len, uint mchar_size) 
{
  uint i, j;
  CRDS *crds;
  DICT *dict;
  CODE new_code;
  PAIR **mp_ary;
  uint limit = (uint)pow(2, code_len);
  uint num_loop, num_replaced;
  uint t_num_rules, c_seq_len;
  double comp_ratio;

  //initialization
#ifdef DISPLAY
  printf("\n");
  printf("Initializing ...\n");
#endif
  crds = createCRDS(input, cont_len, mchar_size);
  dict = createDict(crds, code_len);

#ifdef DISPLAY
  printf("///////////////////////////////////////\n");
  printf(" Input text size = %d (bytes).\n", crds->txt_len);
  printf(" Alphabet size   = %d.\n", crds->char_size);
  printf(" # of contexts   = %d.\n", crds->num_contexts);
  printf(" Code length     = %d (bits).\n", code_len);
  printf(" # of new_code   = %d.\n", limit - crds->char_size);
  printf("///////////////////////////////////////\n");
  printf("\n");
  printf("Compressing text ...\n");
#endif

  mp_ary = (PAIR **)malloc(sizeof(PAIR *) * (crds->num_contexts + 1));
  num_loop = 0; num_replaced = 0;
  new_code = crds->char_size;
  t_num_rules = 0;
  c_seq_len = crds->txt_len;

  //select replaced pairs
  while (new_code < limit) {
    for (i = 0; i <= crds->num_contexts; i++) {
      mp_ary[i] = NULL;
    }
    for (i = 0; i < crds->num_contexts; i++) {
      mp_ary[i] = getMaxPair(crds, i);
    }

    //sort mp_ary by frequencies.
    qsort(mp_ary, crds->num_contexts + 1, sizeof(PAIR *), 
	  (int(*)(const void *, const void *))comparePair);

    //if mp_ary is empty, then break.
    if (mp_ary[0] == NULL) break;

    //replace pairs
    for (i = 0; mp_ary[i] != NULL; i++) {
      addNewPair(dict, new_code, mp_ary[i]);
      c_seq_len -= replacePairs(crds, mp_ary[i], new_code);
      t_num_rules++;
    }

#ifdef DISPLAY
    comp_ratio = 
      calCompRatio(crds->txt_len, crds->char_size, crds->cont_len,
		   crds->num_contexts, c_seq_len, t_num_rules, code_len, false);
	  printf("\r");
    printf("new_code = [%5d], Comp.ratio = %0.3f %%.",new_code, comp_ratio);
    fflush(stdout);
#endif

    //free replaced pairs
    for (i = 0; mp_ary[i] != NULL; i++) {
      destructPair(crds, mp_ary[i]);
    }
    //free unused pairs
    for (i = 0; i < crds->num_contexts; i++) {
      for (j = 1; j < THRESHOLD; j++) {
	deletePQ(crds, j, i);
      }
    }
    new_code++;
  }

#ifdef DISPLAY
  printf("\n");
  calCompRatio(crds->txt_len, crds->char_size, crds->cont_len, 
	       crds->num_contexts, c_seq_len, t_num_rules, code_len, true);
#endif

  //post processing
  copyCompSeq(crds, dict);
  free(mp_ary);
  destructCRDS(crds);

  return dict;
}