コード例 #1
0
ファイル: _packrec.c プロジェクト: NickeyWoo/mysql-3.23.49
int _nisam_pack_rec_unpack(register N_INFO *info, register byte *to,
			   byte *from, uint reclength)
{
  byte *end_field;
  reg3 N_RECINFO *end;
  N_RECINFO *current_field;
  ISAM_SHARE *share=info->s;
  DBUG_ENTER("_nisam_pack_rec_unpack");

  init_bit_buffer(&info->bit_buff,from,reclength);

  for (current_field=share->rec, end=current_field+share->base.fields ;
       current_field < end ;
       current_field++,to=end_field)
  {
    end_field=to+current_field->base.length;
    (*current_field->unpack)(current_field,&info->bit_buff,(uchar*) to,
			     (uchar*) end_field);
  }
  if (! info->bit_buff.error &&
      info->bit_buff.pos - info->bit_buff.bits/8 == info->bit_buff.end)
    DBUG_RETURN(0);
  my_errno=HA_ERR_WRONG_IN_RECORD;
  info->update&= ~HA_STATE_AKTIV;
  DBUG_RETURN(-1);
} /* _nisam_pack_rec_unpack */
コード例 #2
0
ファイル: bitblt_g4.c プロジェクト: brouhaha/tumble
static void flush_bits (struct bit_buffer *buf)
{
  size_t s;
  if (buf->bit_idx != 8)
    {
      buf->byte_idx++;
      buf->bit_idx = 8;
    }
  s = fwrite (& buf->data [0], 1, buf->byte_idx, buf->f);
  /* $$$ should check result */
  init_bit_buffer (buf);
}
コード例 #3
0
ファイル: suftest2.c プロジェクト: grinner/fmindex-py
/* **********************************************************
   open filename and write p[0] .. p[n-1] using log(n) bits
   ********************************************************** */
void write_sa(char *filename, int *p, int n)
{
  int int_log2(int);
  void init_bit_buffer(void);
  void fbit_write(FILE *,int,int), fbit_flush( FILE * );
  FILE *sa;
  Int32 psize, i;

  if(_ds_Verbose)
    fprintf(stderr,"Writing sa to file %s\n",filename);
  if((sa=fopen(filename,"wb"))==NULL)
    perror(filename);

  init_bit_buffer();
  psize = int_log2(n);
  for(i=0;i<n;i++)
    fbit_write(sa,psize,p[i]);
  fbit_flush(sa);
  fclose(sa);
}
コード例 #4
0
ファイル: compr_main.c プロジェクト: peper/pizza
void build_sa(bwi_input *s)
{
  int scmp3(unsigned char *p, unsigned char *q, int maxl);
  void init_bit_buffer(void);
  int fbit_read(FILE *,int);
  int *larsson_sada_sufsort(uchar *, int, int);
  int *suffixsort5n(uchar *, int);
  void out_of_mem(char *s);
  int int_log2(int);  
  int i, n, pointer_size,q,r,sa_size;
  FILE *safile;
  
  /* ------------ check sa file ---------------- */
  n=0;
  safile = fopen(Safile_name,"rb");
  if(safile!=NULL) {
    fseek(safile,0L,SEEK_END);
    n=ftell(safile);
  }

  if (n==0) { 
    // ------- build sa using larsson-sada or 5n
    if(Verbose)  fprintf(stderr, " from scratch ");
    if(Use_larsson_sada) {
      if(Verbose)  fprintf(stderr, "(using ls) ... ");
      s->sa = larsson_sada_sufsort(s->text,s->text_size,s->alpha_size);
    }
    else {
      if(Verbose)  fprintf(stderr, "(using 5n) ... ");
      s->sa = suffixsort5n(s->text,s->text_size);
    }
  } 
  else {     
    // ------ read sa from file --------     
    pointer_size = int_log2(s->text_size);
    // --- compute  sa_size = s->text_size * pointer_size + 7)/8
    // --- use q and r to avoid overflow
    q = s->text_size/8; r = s->text_size % 8;
    sa_size = (q*pointer_size) + (r*pointer_size+7)/8; 
    if (n != sa_size)
      fatal_error("Invalid .sa file\n");
    if(Verbose) fprintf(stderr, " by reading it from file... ");
    // allocate space for the suffix array
    s->sa = (int *) malloc(s->text_size * sizeof(int));
    if(s->sa==NULL) out_of_mem("build_sa");
    rewind(safile);
    init_bit_buffer();
    for(i=0; i<s->text_size; i++)// read one suffix-array pointer at a time
      s->sa[i] = fbit_read(safile,pointer_size);
    fclose(safile);
  }
  // check the suffix array
#if 0
   for (i=0; i<s->text_size-1; ++i)
     if (scmp3(s->text+s->sa[i], s->text+s->sa[i+1], 
                MIN(s->text_size-s->sa[i], s->text_size-s->sa[i+1]))>=0) {
       fprintf(stderr, "Suffix array check failed at position %d\n", i);
       exit(1);
     }
#endif
}
コード例 #5
0
ファイル: compr_main.c プロジェクト: peper/pizza
/* *********************************************************
   The current format of the prologue is the following:
         8 bits      type of compression (2=Hier, 4=Multi Table Huff)
         1 int       size of input file
         1 int       position of eof in s->bw
         1 uint16    size of a super bucket divided by 1024
         1 uchar     size of a bucket divided by 1024 (divides the previous)
	 1 uchar     size-1 mtf_list stored in each bucket  
	 1 uchar     size-1 of the compacted alphabet of the text
	 1 uchar     remapped char selected for occurrence list
	 1 int       # skipped occ of chosen_char in bwt
	 1 int       starting byte of occ-explicit list
       256 bits      boolean map of chars in the text (S = # of 1)
         S int       prefix sum of character occurrences

      for each superbucket
	 S' bytes    map of compact_alph chars occurring in THIS superbucket
	             (S' = (S+7)/8 -- it is byte aligned)  ****FLUSH****
         S int       # occ of all compact_alphabet chars in prev superbuckets

      finally:
        NB x L    starting position of each bucket in the compressed file
                  NB is the number of buckets and L is the number of bits
		  sufficient to represent that length (byte_aligned)


     -------- Body of the compressed file [byte-aligned]   -------------

     for each bucket (let R be the # of distinct chars in the superbucket)

         R 7x8val   # of occ of each char in the previous buckets of the
                    same superbucket. Each value is represented with 
                    the 7x8 encoding. This information is missing for the 
                    first bucket of each superbucket

         R  bits    map of chars appearing in this bucket. 
                    Let R' be the # of distinct chars in this bucket 
                    and L' the # of bits required to represent R'

       L' x M bits  Initial move to front list for this bucket
                    M = min(R',Mtf_save)         

	 ... bits   needed to byte-align in case ONLY of Arith-coding	    

         ??? bits   compressed bucket in mtf + rle + [Ari|Hier|Una] format 

         --- bits   ****FLUSH**** to have byte alignment

     -------- Body of the occ explicit list [byte-aligned]   -------------
     ---------------------------------------------------------------------
     --- URL: we have occ for text positions and rows ---

         ... L bits  list of positions where character ch occurs in
	             the original text. ch = character that occurs
		     close to Marked_char_freq times in the text.
  **************************************************************** */
void write_prologue(bwi_input *s)
{
  void init_bit_buffer(void);
  int int_log2(int);
  void uint_write(int);
  void bit_write(int,int);
  void bit_flush(void);
  void write7x8(int);
  bucket_lev1 sb;
  int i,len,k;

  /* ----- write file and bucket size ------ */
  init_bit_buffer();
  bit_write(8,Type_compression);
  uint_write(s->text_size);
  uint_write(s->bwt_eof_pos);

  assert(Bucket_size_lev1>>10<65536);
  assert((Bucket_size_lev1 & 0x3ff) == 0);
  bit_write(16,Bucket_size_lev1>>10);

  assert(Bucket_size_lev2>>10<256);
  assert((Bucket_size_lev2 & 0x3ff) == 0);
  bit_write(8,Bucket_size_lev2>>10);

  // ---- mtf and alphabet information
  assert(Mtf_save>0 && Mtf_save<=256);
  bit_write(8,Mtf_save-1);

  assert(s->alpha_size>0 && s->alpha_size<=256);
  bit_write(8,s->alpha_size-1);   

  // ---- write chosen_char & starting byte of occ-list
  bit_write(8,s->chosen_char);
  uint_write(s->skip);
  uint_write(0);
  
  // ---- boolean alphabet char map
  for(i=0;i<256;i++)
   if(s->bool_char_map[i]) bit_write(1,1);
   else bit_write(1,0);  

  // ---- write prefix sum of char occ
  for(i=0; i<s->alpha_size; i++)
    uint_write(s->pfx_char_occ[i]);

  // ----- process superbuckets
  for(i=0;i<Num_bucs_lev1;i++) {
    sb = s->buclist_lev1[i];

    for(k=0;k<s->alpha_size;k++)     // boolean char_map
      if(sb.bool_char_map[k]) bit_write(1,1);
      else bit_write(1,0);
    bit_flush();                    // we keep everything byte aligned

    if(i>0)                          // write prefix-occ 
      for(k=0;k<s->alpha_size;k++)
        uint_write(sb.occ[k]);
  }

  // ----- leave space for storing the start positions of buckets
  len = (int_log2(s->text_size)+7)/8;   //it's byte-aligned
  for(i=0;i<Num_bucs_lev2;i++)
    bit_write(len * 8,0);

}
コード例 #6
0
ファイル: compr_main.c プロジェクト: peper/pizza
/* ************************************************************
   *                                                          *
   * main compression routine                                 *
   *                                                          *
   ********************************************************** */
void compress_file(void)
{
  void read_text(FILE *, bwi_input *s);
  void remap_alphabet(bwi_input *s);
  void build_sa(bwi_input *s);
  void compute_bwt(bwi_input *s);
  void compute_info_superbuckets(bwi_input *s);
  void compute_info_buckets(bwi_input *s);
  void write_prologue(bwi_input *s);
  void compress_superbucket(bwi_input *s, int);
  int compute_locations(bwi_input *s);
  int compute_locations_dict(bwi_input *s, int*);
  int compute_ranks_dict(bwi_input *s, int*);
  int compute_locations_huffword(bwi_input *s, int *);
  void bit_flush( void );
  void bit_write(int,int);  
  void init_bit_buffer(void);
  void write_susp_infos(bwi_input *s);
  bwi_input s;
  int i,len, retr_occ, retr_occ2, loc_occ_range;
  int Start_prologue_ranks;

  /* --------- Load the text file from disk ------- */  
  if(Verbose) fprintf(stderr,"Reading input file... ");  
  read_text(Infile, &s);
  if(Verbose) fprintf(stderr,"done! (%f seconds)\n",getTime());
  
  /* --------- Compact alphabet ------- */  
  if(Verbose>1) fprintf(stderr,"Remapping alphabet... ");  
  remap_alphabet(&s);
  if(Verbose>1) fprintf(stderr,"done! (%f seconds). ",getTime());
  if(Verbose>1) fprintf(stderr,"Compact alphabet size = %d\n",s.alpha_size);

  /* --------- Build suffix array ------- */  
  if(Verbose) fprintf(stderr,"Building suffix array");  
  build_sa(&s);
  if(Verbose) fprintf(stderr,"done! (%f seconds)\n",getTime());

  /* --------- Compute BWT ------- */  
  if(Verbose>1) fprintf(stderr,"Computing BWT... ");
  compute_bwt(&s);
  if(Verbose>1) fprintf(stderr,"done! (%f seconds)\n",getTime());

  /* ------- mark chars and compute locations ----- */ 
  if (Is_dictionary)
    retr_occ = compute_locations_dict(&s,&loc_occ_range);    // dictionary
  else if (Is_huffword)
    retr_occ = compute_locations_huffword(&s,&loc_occ_range);// huffword 
  else if (Is_URL)
    retr_occ = compute_ranks_dict(&s,&loc_occ_range);        // URL
  else
    retr_occ = compute_locations(&s);                        // standard


  /* --------- Compute various infos for each superbucket ------- */  
  if(Verbose>1) fprintf(stderr,"Computing infos superbukets... ");
  compute_info_superbuckets(&s);
  if(Verbose>1) fprintf(stderr,"done! (%f seconds)\n", getTime());

  /* --------- Compute various infos for each bucket ------- */  
  if(Verbose>1) fprintf(stderr,"Computing infos buckets... ");
  compute_info_buckets(&s);
  if(Verbose>1) fprintf(stderr,"done! (%f seconds)\n", getTime());

  /* --------- Writing the compressed file ------- */
  Infile_size = s.text_size; 
  Outfile_size=0;

  write_prologue(&s);
  if(Verbose) fprintf(stderr,"Prologue --> %d bytes!\n",Outfile_size);

  for(i=0;i<Num_bucs_lev1;i++)
    compress_superbucket(&s,i);

  /* ---- keep starting positions of occ-explicit list ---- */
  Start_prologue_occ = Outfile_size;

  /* -- write the starting position of buckets -- */
  write_susp_infos(&s);

  if (fseek(Outfile,Start_prologue_occ,SEEK_SET)) {
    fprintf(stderr, "Seek error on output file -compress_file-\n");
    exit(1);
  }


  /* -- write the position of the marked chars ---- */
  init_bit_buffer();
  if(Is_dictionary || Is_huffword || Is_URL)
    len = int_log2(loc_occ_range);     // bits required for each rank
  else  
    len = int_log2(s.text_size);       // bits required for each pos 

  for(i=0; i < retr_occ; i++)
    bit_write(len,s.loc_occ[i]);

  bit_flush();

  Start_prologue_ranks = (int)ftell(Outfile);

  if(Verbose)  
    fprintf(stderr,"List of %d marked ranks --> %d bytes!\n",
	    retr_occ,Start_prologue_ranks-Start_prologue_occ);

  /* -- in the case of URL we also store the DICT info -- */
  /* It should be put together with the computation above --*/
  /* Thus removing these differences in the code --*/
  /* Hence Start_prologue_occ indicates the starting position of RANKS. */
  /* After retr_occ RANKS start the LOCATIONS, which are again retr_occ */
  /* in number. The value of retr_occ can be computed at decompression time */
  /* by using the same formula adopted in compute_ranks_dict() */
  if (Is_URL) {
    retr_occ2 = compute_locations_dict(&s,&loc_occ_range);  // DICT
    
    if (retr_occ != retr_occ2)
      out_of_mem("Unequal number of sampled NULLs\n");

    for(i=0; i < retr_occ; i++)
      bit_write(len,s.loc_occ[i]);
    
    bit_flush();
  
    
  if(Verbose) 
    fprintf(stderr,"List of %d marked locations --> %d bytes!\n",
	    retr_occ2,(int)ftell(Outfile) - Start_prologue_ranks);
  }
}
コード例 #7
0
ファイル: compr_main.c プロジェクト: peper/pizza
/* **********************************************************************
   compress and write to file a bucket of length "len" starting at in[0].
   the compression is done as follows:
   first the charatcters are remapped (we expect only a few distinct chars
   in a single bucket)  then we use mtf (with a list of size Mtf_save)
   then we rle and compress using a unary code. 
   ********************************************************************** */ 
void compress_bucket(uchar *in, int len, int alpha_size)
{
  int int_log2(int);
  void init_bit_buffer(void);
  void bit_write(int,int);
  void bit_flush( void );
  void out_of_mem(char *);
  int mtf_string(uchar *, uchar *, uchar *, int);
  void rle_hierarchical(uchar *, int, int);  
  void multihuf_compr(uchar *, int, int);
  int k,j,bits_x_char,local_alpha_size,mtf_len;
  uchar c,mtf[256],local_bool_map[256], local_map[256]; 
  uchar *mtf_seq;

  /* ---------- init ------------ */
  init_bit_buffer();

  /* ---------- compute and write local boolean map ------ */
  for(k=0;k<alpha_size;k++)     
    local_bool_map[k]=local_map[k]=0;
  local_alpha_size=0;

  for(j=0;j<len;j++) {             // compute local boolean map
    c=in[j];                       // remapped char
    assert(c<alpha_size);                              
    local_bool_map[c]=1;     
  }

  for(k=0;k<alpha_size;k++)      // compute local map
    if(local_bool_map[k])
      local_map[k]=local_alpha_size++;  

  for(j=0;j<len;j++)             // remap bucket
    in[j]=local_map[in[j]];       
  
  for(k=0;k<alpha_size;k++)     // write bool char map to file 
    if(local_bool_map[k]) bit_write(1,1); 
    else bit_write(1,0);

  /* ----------- compute and write mtf picture ------------- */
  mtf_seq = (uchar *) malloc(2*len*sizeof(uchar)); // mtf temporary buffer
  if(mtf_seq==NULL) out_of_mem("compress_bucket (mtf_seq)");
  mtf_len = mtf_string(in,mtf_seq,mtf,len);  // mtf_seq=mtf(in), init mtf-list
  bits_x_char = int_log2(local_alpha_size);   // write mtf to file
  for(k=0;k<MIN(Mtf_save,local_alpha_size);k++) {
    bit_write(bits_x_char,mtf[k]);  
  }


  // -- Applies the proper compression routine --
  switch (Type_compression) 
    {
    case ARITH:  // ---- Arithmetic compression of the bucket -----
      fatal_error("Arithmetic coding no longer available -compress_bucket-\n");
      exit(1);
    case HIER3:  // ---- three-leveled model: Fenwick's proposal -----
      rle_hierarchical(mtf_seq, mtf_len,local_alpha_size);
      break;
    case UNARY:  // ---- Unary compression of mtf-ranks with escape -----
      fatal_error("Unary coding no longer available -compress_bucket-\n");
      exit(1);
    case MULTIH: // ---- RLE + MultiHuffman compression of the bucket -----
      multihuf_compr(mtf_seq,mtf_len,local_alpha_size);  
      break;
    default:
      fprintf(stderr,"\n Compression algorithm unknown! ");
      fprintf(stderr,"-compress_superbucket-\n");
      exit(1);
    }
  bit_flush();         // Byte-align the next compressed bucket
  free(mtf_seq);
}
コード例 #8
0
ファイル: _packrec.c プロジェクト: NickeyWoo/mysql-3.23.49
my_bool _nisam_read_pack_info(N_INFO *info, pbool fix_keys)
{
  File file;
  int diff_length;
  uint i,trees,huff_tree_bits,rec_reflength,length;
  uint16 *decode_table,*tmp_buff;
  ulong elements,intervall_length;
  char *disk_cache,*intervall_buff;
  uchar header[32];
  ISAM_SHARE *share=info->s;
  BIT_BUFF bit_buff;
  DBUG_ENTER("_nisam_read_pack_info");

  if (nisam_quick_table_bits < 4)
    nisam_quick_table_bits=4;
  else if (nisam_quick_table_bits > MAX_QUICK_TABLE_BITS)
    nisam_quick_table_bits=MAX_QUICK_TABLE_BITS;

  file=info->dfile;
  my_errno=0;
  if (my_read(file,(byte*) header,sizeof(header),MYF(MY_NABP)))
  {
    if (!my_errno)
      my_errno=HA_ERR_END_OF_FILE;
    DBUG_RETURN(1);
  }
  if (memcmp((byte*) header,(byte*) nisam_pack_file_magic,4))
  {
    my_errno=HA_ERR_WRONG_IN_RECORD;
    DBUG_RETURN(1);
  }
  share->pack.header_length=uint4korr(header+4);
  share->min_pack_length=(uint) uint4korr(header+8);
  share->max_pack_length=(uint) uint4korr(header+12);
  set_if_bigger(share->base.pack_reclength,share->max_pack_length);
  elements=uint4korr(header+16);
  intervall_length=uint4korr(header+20);
  trees=uint2korr(header+24);
  share->pack.ref_length=header[26];
  rec_reflength=header[27];
  diff_length=(int) rec_reflength - (int) share->base.rec_reflength;
  if (fix_keys)
    share->rec_reflength=rec_reflength;
  share->base.min_block_length=share->min_pack_length+share->pack.ref_length;

  if (!(share->decode_trees=(DECODE_TREE*)
	my_malloc((uint) (trees*sizeof(DECODE_TREE)+
			  intervall_length*sizeof(byte)),
		  MYF(MY_WME))))
    DBUG_RETURN(1);
  intervall_buff=(byte*) (share->decode_trees+trees);

  length=(uint) (elements*2+trees*(1 << nisam_quick_table_bits));
  if (!(share->decode_tables=(uint16*)
	my_malloc((length+512)*sizeof(uint16)+
		  (uint) (share->pack.header_length+7),
		  MYF(MY_WME | MY_ZEROFILL))))
  {
    my_free((gptr) share->decode_trees,MYF(0));
    DBUG_RETURN(1);
  }
  tmp_buff=share->decode_tables+length;
  disk_cache=(byte*) (tmp_buff+512);

  if (my_read(file,disk_cache,
	      (uint) (share->pack.header_length-sizeof(header)),
	      MYF(MY_NABP)))
  {
    my_free((gptr) share->decode_trees,MYF(0));
    my_free((gptr) share->decode_tables,MYF(0));
    DBUG_RETURN(1);
  }

  huff_tree_bits=max_bit(trees ? trees-1 : 0);
  init_bit_buffer(&bit_buff,disk_cache,
		  (uint) (share->pack.header_length-sizeof(header)));
	/* Read new info for each field */
  for (i=0 ; i < share->base.fields ; i++)
  {
    share->rec[i].base_type=(enum en_fieldtype) get_bits(&bit_buff,4);
    share->rec[i].pack_type=(uint) get_bits(&bit_buff,4);
    share->rec[i].space_length_bits=get_bits(&bit_buff,4);
    share->rec[i].huff_tree=share->decode_trees+(uint) get_bits(&bit_buff,
								huff_tree_bits);
    share->rec[i].unpack=get_unpack_function(share->rec+i);
  }
  skipp_to_next_byte(&bit_buff);
  decode_table=share->decode_tables;
  for (i=0 ; i < trees ; i++)
    read_huff_table(&bit_buff,share->decode_trees+i,&decode_table,
		    &intervall_buff,tmp_buff);
  decode_table=(uint16*)
    my_realloc((gptr) share->decode_tables,
	       (uint) ((byte*) decode_table - (byte*) share->decode_tables),
	       MYF(MY_HOLD_ON_ERROR));
  {
    my_ptrdiff_t diff=PTR_BYTE_DIFF(decode_table,share->decode_tables);
    share->decode_tables=decode_table;
    for (i=0 ; i < trees ; i++)
      share->decode_trees[i].table=ADD_TO_PTR(share->decode_trees[i].table,
					      diff, uint16*);
  }

	/* Fix record-ref-length for keys */
  if (fix_keys)
  {
    for (i=0 ; i < share->base.keys ; i++)
    {
      share->keyinfo[i].base.keylength+=(uint16) diff_length;
      share->keyinfo[i].base.minlength+=(uint16) diff_length;
      share->keyinfo[i].base.maxlength+=(uint16) diff_length;
      share->keyinfo[i].seg[share->keyinfo[i].base.keysegs].base.length=
	(uint16) rec_reflength;
    }
  }

  if (bit_buff.error || bit_buff.pos < bit_buff.end)
  {					/* info_length was wrong */
    my_errno=HA_ERR_WRONG_IN_RECORD;
    my_free((gptr) share->decode_trees,MYF(0));
    my_free((gptr) share->decode_tables,MYF(0));
    DBUG_RETURN(1);
  }
  DBUG_RETURN(0);
}