/* ******************************************************** write lcp statistic to filename (plain ascii format) ******************************************************** */ void write_lcp(char *filename, UChar *x, int *p, int n) { FILE *lcp; Int32 *stat, i, j, max_lcp=0, sum=0; unsigned long long sum_lcp=0; stat = (Int32 *) calloc(MAX_LCP_SIZE,sizeof(Int32)); // initialized to 0 if(stat==NULL) { fprintf(stderr, "calloc failed (stat)\n"); exit(1); } if(_ds_Verbose) fprintf(stderr,"Writing lcp stats to file %s\n",filename); if((lcp = fopen(filename,"w"))==NULL) perror(filename); // computes lcp for(i=0;i<n-1;i++) { if (scmp3(x+p[i], x+p[i+1], & j, MIN(n-p[i], n-p[i+1]))>=0) { fprintf(stderr,"Error in sa file!\n"); exit(1); } else { max_lcp = MAX(max_lcp,j); sum_lcp += j; if(j<MAX_LCP_SIZE) stat[j]++; // one more lcp of length j } } // output lcp statistics fprintf(lcp,"Average lcp: %.2f\n",((double) sum_lcp)/(n-1)); fprintf(lcp,"Maximum lcp: %d\n",max_lcp); if(max_lcp<MAX_LCP_SIZE) { for(i=0;i<=max_lcp;i++) if(stat[i]) { fprintf(lcp,"%10d %10d\n",i,stat[i]); sum += stat[i]; } if(sum+1!=n) { fprintf(stderr,"Fatal error! Invalid lcp stats!\n"); exit(1); } } else { fprintf(stderr,"Unable to compute lcp stats. "); fprintf(stderr,"Please set MAX_LCP_SIZE to %d\n",max_lcp+1); exit(1); } fclose(lcp); free(stat); }
// function for checking the sa (very slow) // if verbose>1 prints which suffixes are out of order void check_sa_ordering(UChar *x, int *p, int n, int verbose) { int i,j,wrong=0; printf("Checking...\n"); for (i=0; i<n-1; ++i) { if (scmp3(x+p[i], x+p[i+1], & j, MIN(n-p[i], n-p[i+1]))>=0) { wrong++; if(verbose>1) { printf("---> i=%d p[i]=%d p[i+1]=%d\n", i, p[i], p[i+1]); } } } if(wrong) printf("%d suffixes out of order!\n",wrong); else printf("done.\n"); }
void build_sa(bwi_input *s) { int scmp3(unsigned char *p, unsigned char *q, int maxl); void init_bit_buffer(void); int fbit_read(FILE *,int); int *larsson_sada_sufsort(uchar *, int, int); int *suffixsort5n(uchar *, int); void out_of_mem(char *s); int int_log2(int); int i, n, pointer_size,q,r,sa_size; FILE *safile; /* ------------ check sa file ---------------- */ n=0; safile = fopen(Safile_name,"rb"); if(safile!=NULL) { fseek(safile,0L,SEEK_END); n=ftell(safile); } if (n==0) { // ------- build sa using larsson-sada or 5n if(Verbose) fprintf(stderr, " from scratch "); if(Use_larsson_sada) { if(Verbose) fprintf(stderr, "(using ls) ... "); s->sa = larsson_sada_sufsort(s->text,s->text_size,s->alpha_size); } else { if(Verbose) fprintf(stderr, "(using 5n) ... "); s->sa = suffixsort5n(s->text,s->text_size); } } else { // ------ read sa from file -------- pointer_size = int_log2(s->text_size); // --- compute sa_size = s->text_size * pointer_size + 7)/8 // --- use q and r to avoid overflow q = s->text_size/8; r = s->text_size % 8; sa_size = (q*pointer_size) + (r*pointer_size+7)/8; if (n != sa_size) fatal_error("Invalid .sa file\n"); if(Verbose) fprintf(stderr, " by reading it from file... "); // allocate space for the suffix array s->sa = (int *) malloc(s->text_size * sizeof(int)); if(s->sa==NULL) out_of_mem("build_sa"); rewind(safile); init_bit_buffer(); for(i=0; i<s->text_size; i++)// read one suffix-array pointer at a time s->sa[i] = fbit_read(safile,pointer_size); fclose(safile); } // check the suffix array #if 0 for (i=0; i<s->text_size-1; ++i) if (scmp3(s->text+s->sa[i], s->text+s->sa[i+1], MIN(s->text_size-s->sa[i], s->text_size-s->sa[i+1]))>=0) { fprintf(stderr, "Suffix array check failed at position %d\n", i); exit(1); } #endif }