Esempio n. 1
0
/* ********************************************************************
   rle+compression of a string: Fenwick;s three-level model 
   input
     int len         size of mtf sequence
     uchar *in         input mtf sequence
     int alpha_size   size of the alphabet
   output
     the compressed string is written in the output file 
   note 
     the mtf rank is coded as follows: if <=2 then the first level
     codes the rank using 2 bits. In case (>2 and <=9) then we 
     write 11 as escape code and go to the second level where the rank
     is coded using 3 bits as (rank - 3). If rank > 9, then again an 
     escape code is output as 111, and (rank - 10) is represented
     using enough bits. One important point is that, when the rank
     is equal to Mtf_save then the next value denotes a character and
     thus it is coded in binary using a proper number of bits.
   ******************************************************************* */
void rle_hierarchical(uchar *in, int len, int alpha_size)
{
  int int_log2(int);
  void bit_write(int,int);
  int bits_x_char,i,z;
  uchar c;
  int mtf_size;

  mtf_size = MIN(Mtf_save,alpha_size);
  bits_x_char = int_log2(alpha_size);
  z=-1;                       // # of pending zeroes (-1) 
  for(i=0; i<len;i++) {
    assert(in[i]<alpha_size);

    if(in[i]==0) {
	 
      if(++z==255) {
	 bit_write(2,0);     // unary code for 0 
         bit_write(8,255);   // write 255 using 8 bits
	 z=-1;
      }
    }
    else {
      /* ----- check if there are pending zeores ---- */
      if(z>=0) {
	bit_write(2,0);     // unary code for 0 
	bit_write(8,z);     // write z using 8 bits
        z=-1;
      }
      // ---- write a nonzero mtf rank ----- 
      if(in[i]<=2) {
	bit_write(2,in[i]);   // binary coding
      }
      if((in[i]>2) && (in[i] <= 9)){
	bit_write(2,3);   // escape level 1
	bit_write(3,in[i]-3);   // binary coding of second level
      }
      if((in[i] > 9) && (in[i] <= mtf_size)){
	bit_write(5,31);           // escape code level 2
	bit_write(int_log2(mtf_size-10),in[i]-10);
      }
      if(in[i] == mtf_size){
	c=in[++i];                      // get actual char
        assert(c<alpha_size); 
	bit_write(bits_x_char,c);      // write remapped char
      }
    }
  }
  // ---- there could be some pending zeroes
  if(z>=0) {
     bit_write(2,0);     // unary code for 0 
     bit_write(8,z);     // 255 using 8 bits
  }
}
// ===========================================================================
// x=in, y=out, w=exp(2*pi*i*k/n), k=0..n/2-1
// ===========================================================================
void fft_myrmics_cfft2(int n, float *x, float *y, float *w) {

  int m, j, mj, tgle, i;

  m = int_log2(n); //(int) (log ((float) n) / log (1.99));
  mj = 1;
  tgle = 1;

  fft_myrmics_step(n, mj, x, x + n, y, y + 2 * mj, w);

  for (j = 0; j < m - 2; j++) {
    mj *= 2;
    if (tgle) {
      fft_myrmics_step(n, mj, y, y + n, x, x + 2 * mj, w);
      tgle = 0;
    }
    else {
      fft_myrmics_step(n, mj, x, x + n, y, y + 2 * mj, w);
      tgle = 1;
    }
  }
  if (tgle) {
    for (i = 0; i < n; i++) {
      y[i] = x[i];
    }
  }
  
  mj = n / 2;
  
  fft_myrmics_step(n, mj, x, x + n, y, y + 2 * mj, w);
}
Esempio n. 3
0
/* *****************************************************************
   write the starting position (in the output file) of each one 
   of the Num_bucs_lev2 buckets. For simplicity we use 32 bits for
   each position. These values are written at the end of the prologue
   just before the beginning of the first bucket.
   It writes also the starting position of the occurrence list
   ***************************************************************** */ 
void write_susp_infos(bwi_input *s)
{
  void bit_write(int,int);
  void uint_write(int);
  void bit_flush(void);
  int int_log2(int);
  int i,offset,len;

  /* -- write starting position of occ-explicit list --*/
  // warning! the constant 19 depends on the structure of the prologue!!!
  if (fseek(Outfile,19,SEEK_SET)) {
    fprintf(stderr,"seek error on output file -write_susp_infos-\n");
    exit(1);
  }
  uint_write(Start_prologue_occ);
  bit_flush();

  // Warning: the offset heavily depends on the structure of prologue.
  //          The value of start_level2[0] has been initialized in
  //          the procedure compress_superbucket()      
  len = (int_log2(s->text_size)+7)/8;   // variable length representation
  offset = s->start_lev2[0] - Num_bucs_lev2*len;
  if (fseek(Outfile,offset,SEEK_SET)) {
    fprintf(stderr,"seek error on output file -write_susp_infos-\n");
    exit(1);
  }

  for(i=0;i<Num_bucs_lev2;i++)
    bit_write(len*8,s->start_lev2[i]);
  bit_flush();

  assert(ftell(Outfile)==(int)s->start_lev2[0]);
}
    // Initialize for edge generation
    rmat_iterator(RandomGenerator& gen, vertices_size_type n,
                  edges_size_type m, double a, double b, double c,
                  double d, bool permute_vertices = true)
      : gen(), n(n), a(a), b(b), c(c), d(d), edge(m),
        permute_vertices(permute_vertices),
        SCALE(int_log2(n))

    {
      this->gen.reset(new uniform_01<RandomGenerator>(gen));

      assert(boost::test_tools::check_is_close(a + b + c + d, 1., boost::test_tools::fraction_tolerance(1.e-5)));

      if (permute_vertices)
        generate_permutation_vector(gen, vertexPermutation, n);

      // TODO: Generate the entire adjacency matrix then "Clip and flip" if undirected graph

      // Generate the first edge
      vertices_size_type u, v;
      boost::tie(u, v) = generate_edge(this->gen, n, SCALE, a, b, c, d);

      if (permute_vertices)
        current = std::make_pair(vertexPermutation[u],
                                 vertexPermutation[v]);
      else
        current = std::make_pair(u, v);

      --edge;
    }
Esempio n. 5
0
File: FFT.c Progetto: cran/rscimark
double FFT_num_flops(int N)
{

     double Nd = (double) N;
     double logN = (double) int_log2(N);

     return (5.0*Nd-2)*logN + 2*(Nd+1);
}
Esempio n. 6
0
/*
 * Multi-table-Huffman compressed bucket. Update the array occ[] summing
 * up all occurrencs of the chars in its prefix preceding the absolute
 * position k. Note that ch is a bucket-remapped char. 
 */
uchar
get_b_multihuf(ulong k, ulong * occ, fm_index * s, int is_odd)
{
	
	int bit, bits = int_log2(s->alpha_size_b);
	ulong bpos, j;
	uchar prev;

	bpos = k % s->bucket_size_lev2;

	if (is_odd) bpos = s->bucket_size_lev2 - bpos - 1;

	if (s->alpha_size_b == 1)
	{			/* special case bucket with only one char */
		prev = s->inv_map_b[0];
		if(is_odd) {
			for (j=0; j <= bpos; j++)
			{
				s->mtf_seq[j] = prev;
				occ[prev]--;
				
			}
			occ[prev]++;
	    } else {	
			for (j = 0; j <= bpos; j++)
			{
				s->mtf_seq[j] = prev;
				occ[prev]++;
				
			}
		}
		return prev;
	}

	fm_bit_read24(bits, prev);
	
	s->mtf_seq[0] = prev = s->inv_map_b[prev];
	if (is_odd) occ[prev]--;
	else occ[prev]++;
		
	for(j=1; j<=bpos; j++) {
			  	fm_bit_read24(1, bit);
			 	if(bit){
					fm_bit_read24(bits, prev);
					s->mtf_seq[j] = prev = s->inv_map_b[prev];
				} else s->mtf_seq[j] = prev;
			
			if (is_odd) occ[prev]--;
			else occ[prev]++;			
	}
		
	if (is_odd) occ[prev]++;
			
	return prev;
}
Esempio n. 7
0
int fm_use_index (fm_index *fmindex) {
	int error = fm_read_basic_prologue (fmindex);
	if (error)
		return error;

	if(fmindex->smalltext) {
		fmindex->skip = 0;
		if(fmindex->text_size<SMALLSMALLFILESIZE)
			return FM_OK;
		fmindex->smalltext = 2;
		return FM_OK;
	}
	
	/*
	 * init some var 
	 */
	fmindex->int_dec_bits =
		int_log2 (int_log2
			  (fmindex->bucket_size_lev1 -
			   fmindex->bucket_size_lev2));
	
	
	if(fmindex->skip >1) {
	fmindex->occcharinf = fmindex->bwt_occ[fmindex->specialchar];
	if(fmindex->specialchar==fmindex->alpha_size-1)
		fmindex->occcharsup = fmindex->text_size-1;
	else 
		fmindex->occcharsup = fmindex->bwt_occ[fmindex->specialchar+1];
	
	fmindex->num_marked_rows = fmindex->occcharsup-fmindex->occcharinf;
	} else  fmindex->num_marked_rows = 0;

	fmindex->log2_row = int_log2(fmindex->text_size);
	fmindex->var_byte_rappr = ((fmindex->log2textsize + 7) / 8)*8;
	
	return FM_OK;

}
    // Initialize for edge generation
    unique_rmat_iterator(RandomGenerator& gen, vertices_size_type n,
                         edges_size_type m, double a, double b, double c,
                         double d, bool permute_vertices = true,
                         EdgePredicate ep = keep_all_edges())
      : gen(), done(false)

    {
      assert(boost::test_tools::check_is_close(a + b + c + d, 1., boost::test_tools::fraction_tolerance(1.e-5)));

      this->gen.reset(new uniform_01<RandomGenerator>(gen));

      std::vector<vertices_size_type> vertexPermutation;
      if (permute_vertices)
        generate_permutation_vector(gen, vertexPermutation, n);

      int SCALE = int_log2(n);

      std::map<value_type, bool> edge_map;

      edges_size_type edges = 0;
      do {
        vertices_size_type u, v;
        boost::tie(u, v) = generate_edge(this->gen, n, SCALE, a, b, c, d);

        // Lowest vertex number always comes first
        // (this means we don't have to worry about i->j and j->i being in the edge list)
        if (u > v && is_same<directed_category, undirected_tag>::value)
          std::swap(u, v);

        if (edge_map.find(std::make_pair(u, v)) == edge_map.end()) {
          edge_map[std::make_pair(u, v)] = true;

          if (permute_vertices) {
            if (ep(vertexPermutation[u], vertexPermutation[v]))
              values.push_back(std::make_pair(vertexPermutation[u], vertexPermutation[v]));
          } else {
            if (ep(u, v))
              values.push_back(std::make_pair(u, v));
          }

          edges++;
        }
      } while (edges < m);
      // NGE - Asking for more than n^2 edges will result in an infinite loop here
      //       Asking for a value too close to n^2 edges may as well

      current = values.back();
      values.pop_back();
    }
Esempio n. 9
0
/* **********************************************************
   open filename and write p[0] .. p[n-1] using log(n) bits
   ********************************************************** */
void write_sa(char *filename, int *p, int n)
{
  int int_log2(int);
  void init_bit_buffer(void);
  void fbit_write(FILE *,int,int), fbit_flush( FILE * );
  FILE *sa;
  Int32 psize, i;

  if(_ds_Verbose)
    fprintf(stderr,"Writing sa to file %s\n",filename);
  if((sa=fopen(filename,"wb"))==NULL)
    perror(filename);

  init_bit_buffer();
  psize = int_log2(n);
  for(i=0;i<n;i++)
    fbit_write(sa,psize,p[i]);
  fbit_flush(sa);
  fclose(sa);
}
    // Initialize for edge generation
    sorted_rmat_iterator(RandomGenerator& gen, vertices_size_type n,
                         edges_size_type m, double a, double b, double c,
                         double d, bool permute_vertices = true,
                         EdgePredicate ep = keep_all_edges())
      : gen(), permute_vertices(permute_vertices),
        values(sort_pair<vertices_size_type>()), done(false)

    {
      assert(boost::test_tools::check_is_close(a + b + c + d, 1., boost::test_tools::fraction_tolerance(1.e-5)));

      this->gen.reset(new uniform_01<RandomGenerator>(gen));

      std::vector<vertices_size_type> vertexPermutation;
      if (permute_vertices)
        generate_permutation_vector(gen, vertexPermutation, n);

      // TODO: "Clip and flip" if undirected graph
      int SCALE = int_log2(n);

      for (edges_size_type i = 0; i < m; ++i) {

        vertices_size_type u, v;
        boost::tie(u, v) = generate_edge(this->gen, n, SCALE, a, b, c, d);

        if (permute_vertices) {
          if (ep(vertexPermutation[u], vertexPermutation[v]))
            values.push(std::make_pair(vertexPermutation[u], vertexPermutation[v]));
        } else {
          if (ep(u, v))
            values.push(std::make_pair(u, v));
        }

      }

      current = values.top();
      values.pop();
    }
Esempio n. 11
0
/* 
   compress and write to file a bucket of length "len" starting at in[0].
   the compression is done as follows:
   first the charatcters are remapped (we expect only a few distinct chars
   in a single bucket) then we use mtf and we compress. 
*/ 
int compress_bucket(fm_index *s, uchar *in, ulong len, suint alphasize) {
	
  int fm_multihuf_compr(uchar *, int, int);
	
  suint local_alpha_size, j;
  uchar c, local_bool_map[256], local_map[256]; 
 
  /* ---------- compute and write local boolean map ------ */
  for(j=0; j<alphasize; j++){     
    local_bool_map[j]=0;
	local_map[j]=0;
  }
  local_alpha_size=0;
  
  for(j=0;j<len;j++) {             // compute local boolean map
    c = in[j];                     // remapped char
    assert(c<alphasize);                              
    local_bool_map[c] = 1;     
  }

  for(j=0; j<alphasize; j++)      // compute local map
    if(local_bool_map[j])
      local_map[j] = local_alpha_size++; 
	
  for(j=0;j<alphasize;j++)     // write bool char map to file 
   		if(local_bool_map[j]) {fm_bit_write24(1,1);}
   		else {fm_bit_write24(1,0);} 
  
  for(j=0;j<len;j++)             // remap bucket
    in[j]=local_map[in[j]];
  
  int error = 0;
  switch ( s->type_compression ) {
	  case ( MULTIH ):
		   if (local_alpha_size == 1) { 
					fm_bit_flush(); 
					return FM_OK;
					}
			int bit = int_log2(local_alpha_size);
			char prev = in[0];
			fm_bit_write(bit, in[0]);
			for(j=1;j<len;j++) {
				if(prev==in[j]){
					fm_bit_write(1, 0);
				} else {
					fm_bit_write(1, 1);
					fm_bit_write(bit, in[j]);
				}
				prev = in[j];
								
			}
		    /* compute mtf picture */
  			/*mtf_string(in, s->mtf_seq, len, local_alpha_size);
			error = fm_multihuf_compr(s->mtf_seq, len, local_alpha_size);
			if ( error < 0 ) return error;*/
			fm_bit_flush(); 
			break;
	  default: 
	  		return FM_COMPNOTSUP;
  	}
 
  return FM_OK;

}
Esempio n. 12
0
static void frag_report(const char *filename)
{
	struct statfs	fsinfo;
#ifdef HAVE_FSTAT64
	struct stat64	fileinfo;
#else
	struct stat	fileinfo;
#endif
	int		bs;
	long		fd;
	unsigned long	block, last_block = 0, numblocks, i, count;
	long		bpib;	/* Blocks per indirect block */
	long		cylgroups;
	int		num_extents = 0, expected;
	int		is_ext2 = 0;
	static int	once = 1;
	unsigned int	flags;
	int rc;

#ifdef HAVE_OPEN64
	fd = open64(filename, O_RDONLY);
#else
	fd = open(filename, O_RDONLY);
#endif
	if (fd < 0) {
		perror("open");
		return;
	}

	if (statfs(filename, &fsinfo) < 0) {
		perror("statfs");
		return;
	}
#ifdef HAVE_FSTAT64
	if (stat64(filename, &fileinfo) < 0) {
#else
	if (stat(filename, &fileinfo) < 0) {
#endif
		perror("stat");
		return;
	}
	if (ioctl(fd, EXT3_IOC_GETFLAGS, &flags) < 0)
		flags = 0;
	if (!(flags & EXT4_EXTENTS_FL) &&
	    ((fsinfo.f_type == 0xef51) || (fsinfo.f_type == 0xef52) ||
	     (fsinfo.f_type == 0xef53)))
		is_ext2++;
	if (verbose && once)
		printf("Filesystem type is: %lx\n",
		       (unsigned long) fsinfo.f_type);

	cylgroups = div_ceil(fsinfo.f_blocks, fsinfo.f_bsize*8);
	if (verbose && is_ext2 && once)
		printf("Filesystem cylinder groups is approximately %ld\n",
		       cylgroups);

	physical_width = int_log10(fsinfo.f_blocks);
	if (physical_width < 8)
		physical_width = 8;

	if (ioctl(fd, FIGETBSZ, &bs) < 0) { /* FIGETBSZ takes an int */
		perror("FIGETBSZ");
		close(fd);
		return;
	}

	if (no_bs)
		bs = 1024;

	bpib = bs / 4;
	numblocks = (fileinfo.st_size + (bs-1)) / bs;
	logical_width = int_log10(numblocks);
	if (logical_width < 7)
		logical_width = 7;
	filesize = (long long)fileinfo.st_size;
	if (verbose)
		printf("File size of %s is %lld (%ld block%s, blocksize %d)\n",
		       filename, (long long) fileinfo.st_size, numblocks,
		       numblocks == 1 ? "" : "s", bs);
	if (force_bmap ||
	    filefrag_fiemap(fd, int_log2(bs), &num_extents) != 0) {
		for (i = 0, count = 0; i < numblocks; i++) {
			if (is_ext2 && last_block) {
				if (((i-EXT2_DIRECT) % bpib) == 0)
					last_block++;
				if (((i-EXT2_DIRECT-bpib) % (bpib*bpib)) == 0)
					last_block++;
				if (((i-EXT2_DIRECT-bpib-bpib*bpib) %
							(bpib*bpib*bpib)) == 0)
					last_block++;
			}
			rc = get_bmap(fd, i, &block);
			if (block == 0)
				continue;
			if (!num_extents)
				num_extents++;
			count++;
			if (last_block && (block != last_block+1) ) {
				if (verbose)
					printf("Discontinuity: Block %ld is at "
					       "%lu (was %lu)\n",
					       i, block, last_block+1);
				num_extents++;
			}
			last_block = block;
		}
	}
	if (num_extents == 1)
		printf("%s: 1 extent found", filename);
	else
		printf("%s: %d extents found", filename, num_extents);
	expected = (count/((bs*8)-(fsinfo.f_files/8/cylgroups)-3))+1;
	if (is_ext2 && expected < num_extents)
		printf(", perfection would be %d extent%s\n", expected,
			(expected>1) ? "s" : "");
	else
		fputc('\n', stdout);
	close(fd);
	once = 0;
}

static void usage(const char *progname)
{
	fprintf(stderr, "Usage: %s [-Bbvsx] file ...\n", progname);
	exit(1);
}
Esempio n. 13
0
void build_sa(bwi_input *s)
{
  int scmp3(unsigned char *p, unsigned char *q, int maxl);
  void init_bit_buffer(void);
  int fbit_read(FILE *,int);
  int *larsson_sada_sufsort(uchar *, int, int);
  int *suffixsort5n(uchar *, int);
  void out_of_mem(char *s);
  int int_log2(int);  
  int i, n, pointer_size,q,r,sa_size;
  FILE *safile;
  
  /* ------------ check sa file ---------------- */
  n=0;
  safile = fopen(Safile_name,"rb");
  if(safile!=NULL) {
    fseek(safile,0L,SEEK_END);
    n=ftell(safile);
  }

  if (n==0) { 
    // ------- build sa using larsson-sada or 5n
    if(Verbose)  fprintf(stderr, " from scratch ");
    if(Use_larsson_sada) {
      if(Verbose)  fprintf(stderr, "(using ls) ... ");
      s->sa = larsson_sada_sufsort(s->text,s->text_size,s->alpha_size);
    }
    else {
      if(Verbose)  fprintf(stderr, "(using 5n) ... ");
      s->sa = suffixsort5n(s->text,s->text_size);
    }
  } 
  else {     
    // ------ read sa from file --------     
    pointer_size = int_log2(s->text_size);
    // --- compute  sa_size = s->text_size * pointer_size + 7)/8
    // --- use q and r to avoid overflow
    q = s->text_size/8; r = s->text_size % 8;
    sa_size = (q*pointer_size) + (r*pointer_size+7)/8; 
    if (n != sa_size)
      fatal_error("Invalid .sa file\n");
    if(Verbose) fprintf(stderr, " by reading it from file... ");
    // allocate space for the suffix array
    s->sa = (int *) malloc(s->text_size * sizeof(int));
    if(s->sa==NULL) out_of_mem("build_sa");
    rewind(safile);
    init_bit_buffer();
    for(i=0; i<s->text_size; i++)// read one suffix-array pointer at a time
      s->sa[i] = fbit_read(safile,pointer_size);
    fclose(safile);
  }
  // check the suffix array
#if 0
   for (i=0; i<s->text_size-1; ++i)
     if (scmp3(s->text+s->sa[i], s->text+s->sa[i+1], 
                MIN(s->text_size-s->sa[i], s->text_size-s->sa[i+1]))>=0) {
       fprintf(stderr, "Suffix array check failed at position %d\n", i);
       exit(1);
     }
#endif
}
Esempio n. 14
0
/* *********************************************************
   The current format of the prologue is the following:
         8 bits      type of compression (2=Hier, 4=Multi Table Huff)
         1 int       size of input file
         1 int       position of eof in s->bw
         1 uint16    size of a super bucket divided by 1024
         1 uchar     size of a bucket divided by 1024 (divides the previous)
	 1 uchar     size-1 mtf_list stored in each bucket  
	 1 uchar     size-1 of the compacted alphabet of the text
	 1 uchar     remapped char selected for occurrence list
	 1 int       # skipped occ of chosen_char in bwt
	 1 int       starting byte of occ-explicit list
       256 bits      boolean map of chars in the text (S = # of 1)
         S int       prefix sum of character occurrences

      for each superbucket
	 S' bytes    map of compact_alph chars occurring in THIS superbucket
	             (S' = (S+7)/8 -- it is byte aligned)  ****FLUSH****
         S int       # occ of all compact_alphabet chars in prev superbuckets

      finally:
        NB x L    starting position of each bucket in the compressed file
                  NB is the number of buckets and L is the number of bits
		  sufficient to represent that length (byte_aligned)


     -------- Body of the compressed file [byte-aligned]   -------------

     for each bucket (let R be the # of distinct chars in the superbucket)

         R 7x8val   # of occ of each char in the previous buckets of the
                    same superbucket. Each value is represented with 
                    the 7x8 encoding. This information is missing for the 
                    first bucket of each superbucket

         R  bits    map of chars appearing in this bucket. 
                    Let R' be the # of distinct chars in this bucket 
                    and L' the # of bits required to represent R'

       L' x M bits  Initial move to front list for this bucket
                    M = min(R',Mtf_save)         

	 ... bits   needed to byte-align in case ONLY of Arith-coding	    

         ??? bits   compressed bucket in mtf + rle + [Ari|Hier|Una] format 

         --- bits   ****FLUSH**** to have byte alignment

     -------- Body of the occ explicit list [byte-aligned]   -------------
     ---------------------------------------------------------------------
     --- URL: we have occ for text positions and rows ---

         ... L bits  list of positions where character ch occurs in
	             the original text. ch = character that occurs
		     close to Marked_char_freq times in the text.
  **************************************************************** */
void write_prologue(bwi_input *s)
{
  void init_bit_buffer(void);
  int int_log2(int);
  void uint_write(int);
  void bit_write(int,int);
  void bit_flush(void);
  void write7x8(int);
  bucket_lev1 sb;
  int i,len,k;

  /* ----- write file and bucket size ------ */
  init_bit_buffer();
  bit_write(8,Type_compression);
  uint_write(s->text_size);
  uint_write(s->bwt_eof_pos);

  assert(Bucket_size_lev1>>10<65536);
  assert((Bucket_size_lev1 & 0x3ff) == 0);
  bit_write(16,Bucket_size_lev1>>10);

  assert(Bucket_size_lev2>>10<256);
  assert((Bucket_size_lev2 & 0x3ff) == 0);
  bit_write(8,Bucket_size_lev2>>10);

  // ---- mtf and alphabet information
  assert(Mtf_save>0 && Mtf_save<=256);
  bit_write(8,Mtf_save-1);

  assert(s->alpha_size>0 && s->alpha_size<=256);
  bit_write(8,s->alpha_size-1);   

  // ---- write chosen_char & starting byte of occ-list
  bit_write(8,s->chosen_char);
  uint_write(s->skip);
  uint_write(0);
  
  // ---- boolean alphabet char map
  for(i=0;i<256;i++)
   if(s->bool_char_map[i]) bit_write(1,1);
   else bit_write(1,0);  

  // ---- write prefix sum of char occ
  for(i=0; i<s->alpha_size; i++)
    uint_write(s->pfx_char_occ[i]);

  // ----- process superbuckets
  for(i=0;i<Num_bucs_lev1;i++) {
    sb = s->buclist_lev1[i];

    for(k=0;k<s->alpha_size;k++)     // boolean char_map
      if(sb.bool_char_map[k]) bit_write(1,1);
      else bit_write(1,0);
    bit_flush();                    // we keep everything byte aligned

    if(i>0)                          // write prefix-occ 
      for(k=0;k<s->alpha_size;k++)
        uint_write(sb.occ[k]);
  }

  // ----- leave space for storing the start positions of buckets
  len = (int_log2(s->text_size)+7)/8;   //it's byte-aligned
  for(i=0;i<Num_bucs_lev2;i++)
    bit_write(len * 8,0);

}
Esempio n. 15
0
/* ************************************************************
   *                                                          *
   * main compression routine                                 *
   *                                                          *
   ********************************************************** */
void compress_file(void)
{
  void read_text(FILE *, bwi_input *s);
  void remap_alphabet(bwi_input *s);
  void build_sa(bwi_input *s);
  void compute_bwt(bwi_input *s);
  void compute_info_superbuckets(bwi_input *s);
  void compute_info_buckets(bwi_input *s);
  void write_prologue(bwi_input *s);
  void compress_superbucket(bwi_input *s, int);
  int compute_locations(bwi_input *s);
  int compute_locations_dict(bwi_input *s, int*);
  int compute_ranks_dict(bwi_input *s, int*);
  int compute_locations_huffword(bwi_input *s, int *);
  void bit_flush( void );
  void bit_write(int,int);  
  void init_bit_buffer(void);
  void write_susp_infos(bwi_input *s);
  bwi_input s;
  int i,len, retr_occ, retr_occ2, loc_occ_range;
  int Start_prologue_ranks;

  /* --------- Load the text file from disk ------- */  
  if(Verbose) fprintf(stderr,"Reading input file... ");  
  read_text(Infile, &s);
  if(Verbose) fprintf(stderr,"done! (%f seconds)\n",getTime());
  
  /* --------- Compact alphabet ------- */  
  if(Verbose>1) fprintf(stderr,"Remapping alphabet... ");  
  remap_alphabet(&s);
  if(Verbose>1) fprintf(stderr,"done! (%f seconds). ",getTime());
  if(Verbose>1) fprintf(stderr,"Compact alphabet size = %d\n",s.alpha_size);

  /* --------- Build suffix array ------- */  
  if(Verbose) fprintf(stderr,"Building suffix array");  
  build_sa(&s);
  if(Verbose) fprintf(stderr,"done! (%f seconds)\n",getTime());

  /* --------- Compute BWT ------- */  
  if(Verbose>1) fprintf(stderr,"Computing BWT... ");
  compute_bwt(&s);
  if(Verbose>1) fprintf(stderr,"done! (%f seconds)\n",getTime());

  /* ------- mark chars and compute locations ----- */ 
  if (Is_dictionary)
    retr_occ = compute_locations_dict(&s,&loc_occ_range);    // dictionary
  else if (Is_huffword)
    retr_occ = compute_locations_huffword(&s,&loc_occ_range);// huffword 
  else if (Is_URL)
    retr_occ = compute_ranks_dict(&s,&loc_occ_range);        // URL
  else
    retr_occ = compute_locations(&s);                        // standard


  /* --------- Compute various infos for each superbucket ------- */  
  if(Verbose>1) fprintf(stderr,"Computing infos superbukets... ");
  compute_info_superbuckets(&s);
  if(Verbose>1) fprintf(stderr,"done! (%f seconds)\n", getTime());

  /* --------- Compute various infos for each bucket ------- */  
  if(Verbose>1) fprintf(stderr,"Computing infos buckets... ");
  compute_info_buckets(&s);
  if(Verbose>1) fprintf(stderr,"done! (%f seconds)\n", getTime());

  /* --------- Writing the compressed file ------- */
  Infile_size = s.text_size; 
  Outfile_size=0;

  write_prologue(&s);
  if(Verbose) fprintf(stderr,"Prologue --> %d bytes!\n",Outfile_size);

  for(i=0;i<Num_bucs_lev1;i++)
    compress_superbucket(&s,i);

  /* ---- keep starting positions of occ-explicit list ---- */
  Start_prologue_occ = Outfile_size;

  /* -- write the starting position of buckets -- */
  write_susp_infos(&s);

  if (fseek(Outfile,Start_prologue_occ,SEEK_SET)) {
    fprintf(stderr, "Seek error on output file -compress_file-\n");
    exit(1);
  }


  /* -- write the position of the marked chars ---- */
  init_bit_buffer();
  if(Is_dictionary || Is_huffword || Is_URL)
    len = int_log2(loc_occ_range);     // bits required for each rank
  else  
    len = int_log2(s.text_size);       // bits required for each pos 

  for(i=0; i < retr_occ; i++)
    bit_write(len,s.loc_occ[i]);

  bit_flush();

  Start_prologue_ranks = (int)ftell(Outfile);

  if(Verbose)  
    fprintf(stderr,"List of %d marked ranks --> %d bytes!\n",
	    retr_occ,Start_prologue_ranks-Start_prologue_occ);

  /* -- in the case of URL we also store the DICT info -- */
  /* It should be put together with the computation above --*/
  /* Thus removing these differences in the code --*/
  /* Hence Start_prologue_occ indicates the starting position of RANKS. */
  /* After retr_occ RANKS start the LOCATIONS, which are again retr_occ */
  /* in number. The value of retr_occ can be computed at decompression time */
  /* by using the same formula adopted in compute_ranks_dict() */
  if (Is_URL) {
    retr_occ2 = compute_locations_dict(&s,&loc_occ_range);  // DICT
    
    if (retr_occ != retr_occ2)
      out_of_mem("Unequal number of sampled NULLs\n");

    for(i=0; i < retr_occ; i++)
      bit_write(len,s.loc_occ[i]);
    
    bit_flush();
  
    
  if(Verbose) 
    fprintf(stderr,"List of %d marked locations --> %d bytes!\n",
	    retr_occ2,(int)ftell(Outfile) - Start_prologue_ranks);
  }
}
Esempio n. 16
0
/* **********************************************************************
   compress and write to file a bucket of length "len" starting at in[0].
   the compression is done as follows:
   first the charatcters are remapped (we expect only a few distinct chars
   in a single bucket)  then we use mtf (with a list of size Mtf_save)
   then we rle and compress using a unary code. 
   ********************************************************************** */ 
void compress_bucket(uchar *in, int len, int alpha_size)
{
  int int_log2(int);
  void init_bit_buffer(void);
  void bit_write(int,int);
  void bit_flush( void );
  void out_of_mem(char *);
  int mtf_string(uchar *, uchar *, uchar *, int);
  void rle_hierarchical(uchar *, int, int);  
  void multihuf_compr(uchar *, int, int);
  int k,j,bits_x_char,local_alpha_size,mtf_len;
  uchar c,mtf[256],local_bool_map[256], local_map[256]; 
  uchar *mtf_seq;

  /* ---------- init ------------ */
  init_bit_buffer();

  /* ---------- compute and write local boolean map ------ */
  for(k=0;k<alpha_size;k++)     
    local_bool_map[k]=local_map[k]=0;
  local_alpha_size=0;

  for(j=0;j<len;j++) {             // compute local boolean map
    c=in[j];                       // remapped char
    assert(c<alpha_size);                              
    local_bool_map[c]=1;     
  }

  for(k=0;k<alpha_size;k++)      // compute local map
    if(local_bool_map[k])
      local_map[k]=local_alpha_size++;  

  for(j=0;j<len;j++)             // remap bucket
    in[j]=local_map[in[j]];       
  
  for(k=0;k<alpha_size;k++)     // write bool char map to file 
    if(local_bool_map[k]) bit_write(1,1); 
    else bit_write(1,0);

  /* ----------- compute and write mtf picture ------------- */
  mtf_seq = (uchar *) malloc(2*len*sizeof(uchar)); // mtf temporary buffer
  if(mtf_seq==NULL) out_of_mem("compress_bucket (mtf_seq)");
  mtf_len = mtf_string(in,mtf_seq,mtf,len);  // mtf_seq=mtf(in), init mtf-list
  bits_x_char = int_log2(local_alpha_size);   // write mtf to file
  for(k=0;k<MIN(Mtf_save,local_alpha_size);k++) {
    bit_write(bits_x_char,mtf[k]);  
  }


  // -- Applies the proper compression routine --
  switch (Type_compression) 
    {
    case ARITH:  // ---- Arithmetic compression of the bucket -----
      fatal_error("Arithmetic coding no longer available -compress_bucket-\n");
      exit(1);
    case HIER3:  // ---- three-leveled model: Fenwick's proposal -----
      rle_hierarchical(mtf_seq, mtf_len,local_alpha_size);
      break;
    case UNARY:  // ---- Unary compression of mtf-ranks with escape -----
      fatal_error("Unary coding no longer available -compress_bucket-\n");
      exit(1);
    case MULTIH: // ---- RLE + MultiHuffman compression of the bucket -----
      multihuf_compr(mtf_seq,mtf_len,local_alpha_size);  
      break;
    default:
      fprintf(stderr,"\n Compression algorithm unknown! ");
      fprintf(stderr,"-compress_superbucket-\n");
      exit(1);
    }
  bit_flush();         // Byte-align the next compressed bucket
  free(mtf_seq);
}
Esempio n. 17
0
File: FFT.c Progetto: cran/rscimark
static void FFT_transform_internal (int N, double *data, int direction) {
    int n = N/2;
    int bit = 0;
    int logn;
    int dual = 1;

    if (n == 1) return;         /* Identity operation! */
    logn = int_log2(n);


    if (N == 0) return;

    /* bit reverse the input data for decimation in time algorithm */
    FFT_bitreverse(N, data) ;

    /* apply fft recursion */
    /* this loop executed int_log2(N) times */
    for (bit = 0; bit < logn; bit++, dual *= 2) {
      double w_real = 1.0;
      double w_imag = 0.0;
      int a;
      int b;

      double theta = 2.0 * direction * PI / (2.0 * (double) dual);
      double s = sin(theta);
      double t = sin(theta / 2.0);
      double s2 = 2.0 * t * t;

      for (a=0, b = 0; b < n; b += 2 * dual) {
        int i = 2*b ;
        int j = 2*(b + dual);

        double wd_real = data[j] ;
        double wd_imag = data[j+1] ;

        data[j]   = data[i]   - wd_real;
        data[j+1] = data[i+1] - wd_imag;
        data[i]  += wd_real;
        data[i+1]+= wd_imag;
      }

      /* a = 1 .. (dual-1) */
      for (a = 1; a < dual; a++) {
        /* trignometric recurrence for w-> exp(i theta) w */
        {
          double tmp_real = w_real - s * w_imag - s2 * w_real;
          double tmp_imag = w_imag + s * w_real - s2 * w_imag;
          w_real = tmp_real;
          w_imag = tmp_imag;
        }
        for (b = 0; b < n; b += 2 * dual) {
          int i = 2*(b + a);
          int j = 2*(b + a + dual);

          double z1_real = data[j];
          double z1_imag = data[j+1];

          double wd_real = w_real * z1_real - w_imag * z1_imag;
          double wd_imag = w_real * z1_imag + w_imag * z1_real;

          data[j]   = data[i]   - wd_real;
          data[j+1] = data[i+1] - wd_imag;
          data[i]  += wd_real;
          data[i+1]+= wd_imag;
        }
      }
    }
  }
Esempio n. 18
0
/*
 * read basic prologue from compress 
 */
static int
fm_read_basic_prologue (fm_index * s)
{
	int i;
	ulong size;

	fm_init_bit_reader (s->compress);
	s->text_size = fm_uint_read ();
	if(s->text_size< SMALLFILESIZE){
			s->smalltext=1; 
			return FM_OK;
		}
	s->smalltext = 0;
	s->type_compression = fm_bit_read (8);
	s->log2textsize = int_log2 (s->text_size - 1);
	s->bwt_eof_pos = fm_uint_read ();
	if (s->bwt_eof_pos > s->text_size)
		return FM_COMPNOTCORR;

	s->bucket_size_lev1 = fm_bit_read (16) << 10;
	s->bucket_size_lev2 = fm_bit_read (16);

	if (s->bucket_size_lev1 % s->bucket_size_lev2)
		return FM_COMPNOTCORR;

	s->num_bucs_lev1 =
		(s->text_size + s->bucket_size_lev1 - 1) / s->bucket_size_lev1;
	s->num_bucs_lev2 =
		(s->text_size + s->bucket_size_lev2 - 1) / s->bucket_size_lev2;

	/* mtf & alphabet information */
	s->alpha_size = fm_bit_read (8) + 1;


	/* read Mark mode & starting position of occ list */
	s->specialchar = (uchar) fm_bit_read (8);
	s->skip =  fm_bit_read (32);
	uint start = fm_uint_read ();

	s->start_prologue_occ = s->compress + start;
	s->start_prologue_info_sb = fm_uint_read ();
	s->pos_marked_row_extr = fm_uint_read ();

	s->subchar = (uchar) fm_bit_read (8);	/* remapped compress alphabet */

	/* some information for the user */
	#if 0
	fprintf (stdout, "Compression type %d\n", s->type_compression);
	fprintf (stdout, "Text Size %lu\n", s->text_size);
	fprintf (stdout, "Bwt EOF %lu\n",s->bwt_eof_pos);
	fprintf (stdout, "alphasize %d\n",s->alpha_size);
	fprintf(stdout, "start prologue %lu\n", s->start_prologue_occ);
	fprintf (stdout, "Compression method: ");
	switch (s->type_compression)
		{
		case MULTIH:
			fprintf (stdout, "Huffman with multiple tables.\n");
			break;
	
		default:
		return FM_COMPNOTSUP;
	}
	#endif

	/* alphabet info and inverse char maps */
	for (i = 0; i < ALPHASIZE; i++)
		s->bool_char_map[i] = fm_bit_read (1);

	for (i = 0, size = 0; i < ALPHASIZE; i++)
		if (s->bool_char_map[i])
		{
			s->char_map[i] = size;
			s->inv_char_map[size++] = (uchar) i;
		}
	assert (size == s->alpha_size);

	/* prefix summed char-occ info momorizzate con s->log2textsize bits */

	for (i = 0; i < s->alpha_size; i++)
	{			// legge somme occorrenze
		// caratteri
		s->bwt_occ[i] = fm_bit_read (s->log2textsize);
	}

	/*
	 * calcola le occorrenze di ogni carattere nel testo 
	 */
	for (i = 1; i < s->alpha_size; i++)
		s->char_occ[i - 1] = (s->bwt_occ[i]) - (s->bwt_occ[i - 1]);

	s->char_occ[(s->alpha_size) - 1] =
		(s->text_size) - (s->bwt_occ[(s->alpha_size) - 1]);

	/*
	 * Calcolo posizione inizio info buckets 
	 */
	s->sb_bitmap_size = (s->alpha_size+7)/8;
	
	s->start_prologue_info_b = s->start_prologue_info_sb + 
		(s->sb_bitmap_size*s->num_bucs_lev1)
		+ (s->alpha_size * sizeof(ulong) * (s->num_bucs_lev1 - 1));

	return FM_OK;
}
Esempio n. 19
0
/* *****************************************************************
   compute locations of "marked" occurrences. This procedure does
   the following: 
     1) compute the desired # of marked chars (=desired_marked_chars)
     2) compute the best pair i,j such that (occ[i]/2^j) is as close
        as possible (but <= ) to desired_marked_chars
        write  i in s->chosen_char and 2^j in s->skip 
     3) scan s->bwt[] and "select" one out of s->skip occurrences of 
        s->chosen_char. For each selected occurrence write in s->loc_occ
       its position in the original text.

   I think this procedure could be improved (that is, simplified 
   and faster in doing the search with bwhuffw) using the ideas
   introduced in compute_locations_dict() and compute_locations_huffword()
   more precisely
    1) remove s->chosen_char and mark simply one row every s->skip
       (this would make the marked chars more evely distributed in the
        text).
    2) consider the row starting with a "marked char" rather than ending
       (this would simplify the code)
   **************************************************************** */
int compute_locations(bwi_input *s)
{
  int i,max,j,count,chosen_occ;
  int ch_occ[256],rescaled,skip;
  int exponent, desired_marked_chars, marked_chars;
  
  if(Marked_char_freq==0) {
    s->skip=0; s->chosen_char = 0; 
    return 0;
  }
  /* ------ compute the desired number of marked chars ------ */
  desired_marked_chars =  (int) (s->text_size * Marked_char_freq);
  if(desired_marked_chars==0)
    desired_marked_chars=1;
      
  // ---- Count occurrences for each character
  ch_occ[s->alpha_size-1]= s->text_size-s->pfx_char_occ[s->alpha_size-1];
  for(i=0;i<s->alpha_size-1;i++)
    ch_occ[i]=s->pfx_char_occ[i+1]-s->pfx_char_occ[i];
  
  // ----- select best (char,skip) pair
  for(i=0, max=-1; i<s->alpha_size; i++){
    if(i==s->bwt[0]) continue;  // Exclude bwt-first-char (see below)
    /* --- determine the number of skipped char for i */
    if (ch_occ[i] > desired_marked_chars) {
      exponent = int_log2(ch_occ[i]/desired_marked_chars);
      assert(exponent > 0);
      skip = int_pow2(exponent);
    }
    else
      skip = 1;
    /* --- check if this is the best choice seen so far --- */
    rescaled = ch_occ[i] / skip;
    if(rescaled>max && rescaled <= desired_marked_chars) {
      max = rescaled;
      s->chosen_char = i;
      s->skip = skip;
    }
  }
  assert(max > 0);
  assert(s->skip>0);
  
  if(Verbose>1) {
    for(i=0;i<256;i++)
      if(s->char_map[i]==s->chosen_char) break;
    fprintf(stderr,"Marked char is ascii %d; ", i); 
    fprintf(stderr,"one occ every %d is marked; ",s->skip);
  }

  // ------- compute number of marked chars
  chosen_occ = ch_occ[s->chosen_char];
  if(chosen_occ % s->skip)
    marked_chars = chosen_occ/s->skip + 1;
  else
    marked_chars = chosen_occ/s->skip;
  // -------- alloc s->loc_occ
  s->loc_occ = (int *) malloc(sizeof(int) * (marked_chars));

  // write the text location of the ROWS ending with ch
  for(i=1,j=0,count=0; i<s->text_size; i++) 
    {                         // bwt[0] is not the marked char (see above) 
      if (s->bwt[i] == s->chosen_char) {
	if ((count % s->skip) == 0) {
	  if (i <= s->bwt_eof_pos) { 
	    s->loc_occ[j] = (int) s->sa[i-1];
	    assert(s->text[s->loc_occ[j]-1] == s->chosen_char);
	  }
	  else {
	    s->loc_occ[j] = s->sa[i]; 
	    assert(s->text[s->loc_occ[j]-1] == s->chosen_char);
	  }
	  j++;
	} 
	count++;
      }
    }
  // j is the number of marked chars
  if(Verbose>1) 
    fprintf(stderr,"%d chars marked.\n", j); 
  assert(j == marked_chars);
  assert(count == chosen_occ);  
  return j;
}       
Esempio n. 20
0
int
load_index (char * filename, void ** index)
{
	
	int error;
	fm_index *fmindex;

	fmindex = (fm_index *) malloc (sizeof (fm_index));
	if (fmindex == NULL)
		return FM_OUTMEM;
	fmindex->compress_owner = 1;
	fmindex->owner = 0;
	
	/*
	 * Load index file 
	 */
	error =
		open_file (filename, &(fmindex->compress),
			   &(fmindex->compress_size));
	if (error)
		return error;

	error = fm_read_basic_prologue (fmindex);
	if (error)
		return error;

	if(fmindex->smalltext) {
		fmindex->skip = 0;
		if(fmindex->text_size<SMALLSMALLFILESIZE) {
			fmindex->text = fmindex->compress+4;
			*index = fmindex;
			return FM_OK;
		}
		fmindex->owner = 1;
		fmindex->smalltext = 2;
		error = fm_bwt_uncompress(fmindex);
		if (error < 0) return error;
		*index = fmindex;
		return FM_OK;
	}
	
	/*
	 * init some var 
	 */
	fmindex->int_dec_bits =
		int_log2 (int_log2
			  (fmindex->bucket_size_lev1 -
			   fmindex->bucket_size_lev2));
	
	
	if(fmindex->skip >1) {
	fmindex->occcharinf = fmindex->bwt_occ[fmindex->specialchar];
	if(fmindex->specialchar==fmindex->alpha_size-1)
		fmindex->occcharsup = fmindex->text_size-1;
	else 
		fmindex->occcharsup = fmindex->bwt_occ[fmindex->specialchar+1];
	
	fmindex->num_marked_rows = fmindex->occcharsup-fmindex->occcharinf;
	} else  fmindex->num_marked_rows = 0;
	
	fmindex->log2_row = int_log2(fmindex->text_size);
	
	fmindex->mtf_seq =
		(uchar *) malloc (fmindex->bucket_size_lev2 * sizeof (uchar));
	if (fmindex->mtf_seq == NULL)
		return FM_OUTMEM;
	
	fmindex->var_byte_rappr = ((fmindex->log2textsize + 7) / 8)*8;
	
	*index = fmindex;
	return FM_OK;
}
Esempio n. 21
0
bool int_is_pow2( int n )
{
    return (1 << int_log2( n )) == n;
}