/* ******************************************************************* procedure to be called by external program before calling ds_ssort() using this procedure external programs can choose the parameters Anchor_dist and Blind_sort_ratio. The procedure returns 0 if something goes wrong, otherwise it returns the overshhot, that is the amount of extra space required at the end of the array contanining the text ******************************************************************** */ int init_ds_ssort(int adist, int bs_ratio) { set_global_variables(); Anchor_dist = adist; Blind_sort_ratio=bs_ratio; Shallow_limit = Anchor_dist + 50; if(check_global_variables()) return 0; return compute_overshoot(); }
//int init_ds_ssort(int adist, int bs_ratio) Int64 init_ds_ssort(Int64 adist, Int64 bs_ratio) { set_global_variables(); Anchor_dist = adist; Blind_sort_ratio = bs_ratio; Shallow_limit = Anchor_dist + 50; //Shallow_limit = Anchor_dist + 1000000; // Shallow_limit = the longest of the lcp-s! if (check_global_variables()) { return 0; } return compute_overshoot(); }
int main(int argc, char *argv[]) { void write_sa(char *filename, int *p, int n); void write_lcp(char *filename, UChar *x,int *p, int n); void write_bwt(char *filename, UChar *x,int *p, int n); void check_sa_ordering(UChar *x,int *p, int n, int); void print_sa_onscreen(UChar *x,int *p, int n, int); int c, *p, n; int print_sa, check_sa, num_opt,overshoot; UChar *x; clock_t end,start, end_real, start_real; struct tms r; double tot_time = 0.0; double tot_time_real = 0.0; extern char *optarg; extern int optind, opterr, optopt; char *fnam, *sa_filename; char *lcp_filename,*bwt_filename; // names for (optional) lcp and bwt files FILE *f; /* ------------ set default values ------------- */ set_global_variables(); print_sa=check_sa=0; sa_filename = NULL; lcp_filename = NULL; bwt_filename = NULL; /* ------------- read options from command line ----------- */ num_opt = opterr = 0; while ((c=getopt(argc, argv, "b:d:l:p:r:w:cvux:f:T:W:B:")) != -1) { switch (c) { case 'b': bwt_filename = optarg; break; case 'c': check_sa++; break; case 'd': Anchor_dist = atoi(optarg); break; case 'l': Shallow_limit = atoi(optarg); break; case 'p': print_sa = atoi(optarg); break; case 'x': _ds_Word_size = atoi(optarg); break; case 'v': _ds_Verbose++; break; case 'w': sa_filename = optarg; break; case 'f': Max_pseudo_anchor_offset = atoi(optarg); break; case 'r': B2g_ratio = atoi(optarg); break; case 'u': Update_anchor_ranks = 1; break; case 'T': Mk_qs_thresh = atoi(optarg); break; case 'W': lcp_filename = optarg; break; case 'B': Blind_sort_ratio = atoi(optarg); break; case '?': fprintf(stderr,"Unknown option: %c -main-\n", optopt); exit(1); } num_opt++; } if(optind<argc) fnam=argv[optind]; else { fprintf(stderr, "Usage:\n\t%s [-b bwtfile][-cuv][-d dist]",argv[0]); fprintf(stderr, "[-l len][-p num][-f maxoff][-r ratio]\n"); fprintf(stderr, "\t [-T thresh][-w safile][-W lcpfile][-x wsize][-B ratio]"); fprintf(stderr, " file\n\n"); fprintf(stderr,"\t-b bwtfile write bwt to bwtfile\n"); fprintf(stderr, "\t-B ratio blind_sort ratio [def. %d]\n",Blind_sort_ratio); fprintf(stderr,"\t-c check the sa (could be very slow)\n"); fprintf(stderr,"\t-d dist anchor distance [def. %d]\n",Anchor_dist); fprintf(stderr,"\t-f maxoff Maximum offset for forward "); fprintf(stderr,"pseudo-anchors [def. %d]\n",Max_pseudo_anchor_offset); fprintf(stderr, "\t-l len shallow sort limit [def. %d]\n",Shallow_limit); fprintf(stderr, "\t-r ratio bucket to group max ratio [def. %d]\n",B2g_ratio); fprintf(stderr,"\t-p num print num char of each suffix [def. 0]\n"); fprintf(stderr, "\t-T thresh Threshold for mk-qs [def. %d]\n", Mk_qs_thresh); fprintf(stderr,"\t-u updates anchor ranks in get_rank()\n"); fprintf(stderr,"\t-v produces a verbose output\n"); fprintf(stderr,"\t-w safile write sa to safile\n"); fprintf(stderr, "\t-W lcpfile check sa and write lcp to lcpfile (very slow)\n"); fprintf(stderr, "\t-x wsize word size in mkqs (default %d)\n\n",_ds_Word_size); return 0; } if(_ds_Verbose) { fprintf(stderr,"Command line: "); for(c=0;c<argc;c++) fprintf(stderr,"%s ",argv[c]); fprintf(stderr,"\n"); } /* -------- check parameters ------------- */ if(check_global_variables()) { exit(1); } /* ---------- open file and read text ----------- */ if (! (f=fopen(fnam, "rb"))) { perror(fnam); return 1; } if (fseek(f, 0L, SEEK_END)) { perror(fnam); return 1; } n=ftell(f); if (n==0) { fprintf(stderr, "%s: file empty\n", fnam); return 0; } // ------ allocate memory for text and sa ------- overshoot = compute_overshoot(); p=malloc((n)*sizeof *p); // sa x=malloc((n+overshoot)*sizeof *x); // text if (! p || ! x) { fprintf(stderr, "malloc failed\n"); return 1; } // ------------ read input text --------------- rewind(f); c=fread(x, (size_t) 1, (size_t) n, f); // lseek(fileno(f),0,SEEK_SET); // c=read(fileno(f), x, (size_t) n); if(c!=n) { fprintf(stderr,"Error in read() (%d vs %d) (main)\n",c,n); perror(fnam); return 1; } fclose(f); /* --------- start measuring time ------------- */ if(_ds_Verbose) fprintf(stderr,"Starting sa construction ... \n"); start_real = times(&r); start = (r.tms_utime+r.tms_stime); /* user + system */ ds_ssort(x, p, n); end_real = times(&r); end = (r.tms_utime+r.tms_stime); /* user + system */ // tot_time = ((double) (end-start))/CLK_TCK; //tot_time_real = ((double) (end_real-start_real))/CLK_TCK; printf("Elapsed time: %.2f seconds (user+sys). Total real time: %.2f.\n", tot_time, tot_time_real); // --------------- write bwt to a file if(bwt_filename!=NULL) write_bwt(bwt_filename,x,p,n); // --------------- write sa to a file if(sa_filename!=NULL) write_sa(sa_filename,p,n); // --------------- write lcp to a file if(lcp_filename!=NULL) write_lcp(lcp_filename,x,p,n); // ------------ check sa -------- if(check_sa) check_sa_ordering(x,p,n,check_sa); // ----- display sa ------- if(print_sa) print_sa_onscreen(x,p,n,print_sa); // deallocate and exit free(x); free(p); return 0; }