Ejemplo n.º 1
0
/* ********************************************************
   write lcp statistic to filename (plain ascii format)
   ******************************************************** */
void write_lcp(char *filename, UChar *x, int *p, int n)
{
  FILE *lcp;
  Int32 *stat, i, j, max_lcp=0, sum=0;
  unsigned long long sum_lcp=0;

  stat = (Int32 *) calloc(MAX_LCP_SIZE,sizeof(Int32)); // initialized to 0
  if(stat==NULL) {
    fprintf(stderr, "calloc failed (stat)\n");
    exit(1);
  }
  if(_ds_Verbose)
    fprintf(stderr,"Writing lcp stats to file %s\n",filename);
  if((lcp = fopen(filename,"w"))==NULL) 
     perror(filename);

  // computes lcp
  for(i=0;i<n-1;i++) {
    if (scmp3(x+p[i], x+p[i+1], & j, MIN(n-p[i], n-p[i+1]))>=0) {
      fprintf(stderr,"Error in sa file!\n");
      exit(1);
    }
    else {
      max_lcp = MAX(max_lcp,j);
      sum_lcp += j;
      if(j<MAX_LCP_SIZE)
	stat[j]++;   // one more lcp of length j
    }
  }
  // output lcp statistics
  fprintf(lcp,"Average lcp: %.2f\n",((double) sum_lcp)/(n-1));
  fprintf(lcp,"Maximum lcp: %d\n",max_lcp);
  if(max_lcp<MAX_LCP_SIZE) { 
    for(i=0;i<=max_lcp;i++) 
      if(stat[i]) {
	fprintf(lcp,"%10d %10d\n",i,stat[i]);
	sum += stat[i];
      }
    if(sum+1!=n) {
      fprintf(stderr,"Fatal error! Invalid lcp stats!\n");
      exit(1);
    }  
  }
  else {
    fprintf(stderr,"Unable to compute lcp stats. ");
    fprintf(stderr,"Please set MAX_LCP_SIZE to %d\n",max_lcp+1);
    exit(1);
  }
  fclose(lcp);
  free(stat);
}
Ejemplo n.º 2
0
// function for checking the sa (very slow) 
// if verbose>1 prints which suffixes are out of order
void check_sa_ordering(UChar *x, int *p, int n, int verbose)
{ 
  int i,j,wrong=0;

  printf("Checking...\n");
  for (i=0; i<n-1; ++i) {
    if (scmp3(x+p[i], x+p[i+1], & j, MIN(n-p[i], n-p[i+1]))>=0) {
      wrong++;
      if(verbose>1) {
	printf("---> i=%d  p[i]=%d  p[i+1]=%d\n", i, p[i], p[i+1]);
      }
    }  
  }
  if(wrong)
    printf("%d suffixes out of order!\n",wrong);
  else
    printf("done.\n");
}
Ejemplo n.º 3
0
void build_sa(bwi_input *s)
{
  int scmp3(unsigned char *p, unsigned char *q, int maxl);
  void init_bit_buffer(void);
  int fbit_read(FILE *,int);
  int *larsson_sada_sufsort(uchar *, int, int);
  int *suffixsort5n(uchar *, int);
  void out_of_mem(char *s);
  int int_log2(int);  
  int i, n, pointer_size,q,r,sa_size;
  FILE *safile;
  
  /* ------------ check sa file ---------------- */
  n=0;
  safile = fopen(Safile_name,"rb");
  if(safile!=NULL) {
    fseek(safile,0L,SEEK_END);
    n=ftell(safile);
  }

  if (n==0) { 
    // ------- build sa using larsson-sada or 5n
    if(Verbose)  fprintf(stderr, " from scratch ");
    if(Use_larsson_sada) {
      if(Verbose)  fprintf(stderr, "(using ls) ... ");
      s->sa = larsson_sada_sufsort(s->text,s->text_size,s->alpha_size);
    }
    else {
      if(Verbose)  fprintf(stderr, "(using 5n) ... ");
      s->sa = suffixsort5n(s->text,s->text_size);
    }
  } 
  else {     
    // ------ read sa from file --------     
    pointer_size = int_log2(s->text_size);
    // --- compute  sa_size = s->text_size * pointer_size + 7)/8
    // --- use q and r to avoid overflow
    q = s->text_size/8; r = s->text_size % 8;
    sa_size = (q*pointer_size) + (r*pointer_size+7)/8; 
    if (n != sa_size)
      fatal_error("Invalid .sa file\n");
    if(Verbose) fprintf(stderr, " by reading it from file... ");
    // allocate space for the suffix array
    s->sa = (int *) malloc(s->text_size * sizeof(int));
    if(s->sa==NULL) out_of_mem("build_sa");
    rewind(safile);
    init_bit_buffer();
    for(i=0; i<s->text_size; i++)// read one suffix-array pointer at a time
      s->sa[i] = fbit_read(safile,pointer_size);
    fclose(safile);
  }
  // check the suffix array
#if 0
   for (i=0; i<s->text_size-1; ++i)
     if (scmp3(s->text+s->sa[i], s->text+s->sa[i+1], 
                MIN(s->text_size-s->sa[i], s->text_size-s->sa[i+1]))>=0) {
       fprintf(stderr, "Suffix array check failed at position %d\n", i);
       exit(1);
     }
#endif
}