Пример #1
0
DICT *RunRepair(DICT *dict, FILE *input, int length, unsigned int shared_dictsize, unsigned int codewordlength, USEDCHARTABLE *ut, unsigned int chunk_size, unsigned int sample)
{
  RDS  *rds;
  //  DICT *dict;
  PAIR *max_pair;
  PAIR *target;
  CODE new_code;
  uint num_replaced, cseqlen, numsymbol;
  uint width;
  uint i;
 
  if (sample)
    rds = createRDSwithsampling(input, length, chunk_size);
  else
    rds = createRDS(input, length);
  //  dict = createDict(rds->txt_len);
  if (!rds) return NULL;
  pqidx = 0;
  cseqlen = rds->txt_len;

  
  num_replaced = 0;
  if (dict->num_rules - CHAR_SIZE + ut->size > shared_dictsize) {
    dict->num_rules = shared_dictsize + CHAR_SIZE - ut->size;
  }

  // 現在の辞書を使って変換する
  for (i = CHAR_SIZE; i < dict->num_rules; i++) {
    target = locatePair(rds, dict->rule[i].left, dict->rule[i].right);
    if (target) {
      //      printf("%u %u -> %u\n", target->left, target->right, i);
      cseqlen -= replacePairs(rds, target, i);
    }
  }
  //  putchar('\n');
  while ((max_pair = getMaxPair(rds)) != NULL && (unsigned int)(dict->num_rules + ut->size - CHAR_SIZE) < (1U << codewordlength)) {
    new_code = addNewPair(dict, max_pair);
    //    printf("%u %u -> %u\n", max_pair->left, max_pair->right, new_code);
    //    printf("%u\n", new_code);
    cseqlen -= replacePairs(rds, max_pair, new_code);
  }


  getCompSeq(rds, dict);
  /* for (i = 0; i < dict->seq_len; i++) { */
  /*   printf("%d ", dict->comp_seq[i]); */
  /* } */
  /* putchar('\n'); */
  destructRDS(rds);


  return dict;
}
Пример #2
0
void RePair::compress(uint nodes) {
  uint i = 1; uint lasti=1;
  
  double prevRatio=100.0;
  
  while(true) {

    cout << "m vale = " << m << endl;
    while(replacePairs()>0) {  //** mientras se hayan hecho reemplazos...
      //cout.setw(2);
      //cout << "Repair.compress: It: " << i++ << " compressed: " << 100.*m/n << "%" << " m=" << m << endl;
      cout << "Repair.compress: It: " << i++ << " compressed: " << 100.*m/n << "%" << " m=" << m <<  ", diff prev iter =" << (prevRatio - (100.0*m/n)) << endl;

   	  //cout << "Repair.compress: It: " << i++ << " compressed: " << 100.*m/n << "%" << " m=" << m << endl;
	  
	  //if ((100.*m/n)<43.0) break;	  
//	  if ((100.*m/n)<60.0) break;	  
	  //if ((100.*m/n)<46.0) break;
	  //if ((100.*m/n)<44.0) break;
	  //if ((100.*m/n)<45.0) break;
	  //if ((100.*m/n)<47.0) break;
	//  if ((100.*m/n)< CORTE_REPAIR) break;	    //con break_bitmap=8
	//  if  (  ((prevRatio - (100.*m/n)) < 0.0005 )  ||  ((100.*m/n)< CORTE_REPAIR) ) break;
	//  if  (  ((prevRatio - (100.*m/n)) < 0.00000005 )  ||  (((prevRatio - (100.*m/n)) < CORTE_REPAIR) ) ) break;
	  	  
	  if  (((prevRatio - (100.*m/n)) < CORTE_REPAIR) )  break;
	  prevRatio = (100.*m/n);

    }
    //  cout << "Repair.compress: It: " << i++ << " compressed: " << 100.*m/n << "%" << " m=" << m <<  ", diff prev iter =" << (prevRatio - (100.0*m/n)) << endl;
    
    cout << "lasti vale " << lasti << " i vale " << i ;  //** fari **/
    if(lasti<=i) break;

    	
    	
    uint sorted=0;                           //** Normalmente no entra aqu� !!! **/
    cout << "Reordering symbols: ";
    cout.flush();
    for(uint j=0;j<m;j++) {
      if(data[j]<=nodes) {   
        for(uint k=j+1;k<=m;k++)
          if(data[k]<=nodes) {
            sort(data+j+1,data+k);
            sorted++;
            break;
          }
      }
    } 
    cout << sorted << endl;
    lasti = i;    
  }
}
Пример #3
0
DICT *RunCodeRepair(FILE *input, uint code_len, uint cont_len, uint mchar_size) 
{
  uint i, j;
  CRDS *crds;
  DICT *dict;
  CODE new_code;
  PAIR **mp_ary;
  uint limit = (uint)pow(2, code_len);
  uint num_loop, num_replaced;
  uint t_num_rules, c_seq_len;
  double comp_ratio;

  //initialization
#ifdef DISPLAY
  printf("\n");
  printf("Initializing ...\n");
#endif
  crds = createCRDS(input, cont_len, mchar_size);
  dict = createDict(crds, code_len);

#ifdef DISPLAY
  printf("///////////////////////////////////////\n");
  printf(" Input text size = %d (bytes).\n", crds->txt_len);
  printf(" Alphabet size   = %d.\n", crds->char_size);
  printf(" # of contexts   = %d.\n", crds->num_contexts);
  printf(" Code length     = %d (bits).\n", code_len);
  printf(" # of new_code   = %d.\n", limit - crds->char_size);
  printf("///////////////////////////////////////\n");
  printf("\n");
  printf("Compressing text ...\n");
#endif

  mp_ary = (PAIR **)malloc(sizeof(PAIR *) * (crds->num_contexts + 1));
  num_loop = 0; num_replaced = 0;
  new_code = crds->char_size;
  t_num_rules = 0;
  c_seq_len = crds->txt_len;

  //select replaced pairs
  while (new_code < limit) {
    for (i = 0; i <= crds->num_contexts; i++) {
      mp_ary[i] = NULL;
    }
    for (i = 0; i < crds->num_contexts; i++) {
      mp_ary[i] = getMaxPair(crds, i);
    }

    //sort mp_ary by frequencies.
    qsort(mp_ary, crds->num_contexts + 1, sizeof(PAIR *), 
	  (int(*)(const void *, const void *))comparePair);

    //if mp_ary is empty, then break.
    if (mp_ary[0] == NULL) break;

    //replace pairs
    for (i = 0; mp_ary[i] != NULL; i++) {
      addNewPair(dict, new_code, mp_ary[i]);
      c_seq_len -= replacePairs(crds, mp_ary[i], new_code);
      t_num_rules++;
    }

#ifdef DISPLAY
    comp_ratio = 
      calCompRatio(crds->txt_len, crds->char_size, crds->cont_len,
		   crds->num_contexts, c_seq_len, t_num_rules, code_len, false);
	  printf("\r");
    printf("new_code = [%5d], Comp.ratio = %0.3f %%.",new_code, comp_ratio);
    fflush(stdout);
#endif

    //free replaced pairs
    for (i = 0; mp_ary[i] != NULL; i++) {
      destructPair(crds, mp_ary[i]);
    }
    //free unused pairs
    for (i = 0; i < crds->num_contexts; i++) {
      for (j = 1; j < THRESHOLD; j++) {
	deletePQ(crds, j, i);
      }
    }
    new_code++;
  }

#ifdef DISPLAY
  printf("\n");
  calCompRatio(crds->txt_len, crds->char_size, crds->cont_len, 
	       crds->num_contexts, c_seq_len, t_num_rules, code_len, true);
#endif

  //post processing
  copyCompSeq(crds, dict);
  free(mp_ary);
  destructCRDS(crds);

  return dict;
}