Esempio n. 1
0
/* *******************************************************************
   procedure to be called by external program before calling ds_ssort()
   using this procedure external programs can choose
   the parameters Anchor_dist and Blind_sort_ratio.
   The procedure returns 0 if something goes wrong, otherwise
   it returns the overshhot, that is the amount of extra space
   required at the end of the array contanining the text
   ******************************************************************** */
int init_ds_ssort(int adist, int bs_ratio)
{
    set_global_variables();
    Anchor_dist = adist;
    Blind_sort_ratio=bs_ratio;
    Shallow_limit =  Anchor_dist + 50;
    if(check_global_variables())
        return 0;
    return compute_overshoot();
}
Esempio n. 2
0
//int init_ds_ssort(int adist, int bs_ratio)
Int64 init_ds_ssort(Int64 adist, Int64 bs_ratio)
{
  set_global_variables();
  Anchor_dist = adist;
  Blind_sort_ratio = bs_ratio;
  Shallow_limit =  Anchor_dist + 50;
	//Shallow_limit =  Anchor_dist + 1000000; // Shallow_limit = the longest of the lcp-s!
  if (check_global_variables()) {
    return 0;
  }
  return compute_overshoot();
}
Esempio n. 3
0
int main(int argc, char *argv[])
{
  void write_sa(char *filename, int *p, int n);
  void write_lcp(char *filename, UChar *x,int *p, int n);
  void write_bwt(char *filename, UChar *x,int *p, int n);
  void check_sa_ordering(UChar *x,int *p, int n, int);
  void print_sa_onscreen(UChar *x,int *p, int n, int);
  int c, *p, n;
  int print_sa, check_sa, num_opt,overshoot;
  UChar *x;
  clock_t end,start, end_real, start_real;
  struct tms r;
  double tot_time = 0.0;
  double tot_time_real = 0.0;
  extern char *optarg;
  extern int optind, opterr, optopt;
  char *fnam, *sa_filename;
  char *lcp_filename,*bwt_filename;   // names for (optional) lcp and bwt files
  FILE *f;

  /* ------------ set default values ------------- */
  set_global_variables();
  print_sa=check_sa=0;
  sa_filename = NULL;
  lcp_filename = NULL;
  bwt_filename = NULL;

  /* ------------- read options from command line ----------- */
  num_opt = opterr = 0;
  while ((c=getopt(argc, argv, "b:d:l:p:r:w:cvux:f:T:W:B:")) != -1) {
    switch (c) 
      {
      case 'b':
        bwt_filename = optarg; break;
      case 'c':
        check_sa++; break;
      case 'd':
        Anchor_dist = atoi(optarg); break;
      case 'l':
        Shallow_limit  = atoi(optarg); break;
      case 'p':
        print_sa = atoi(optarg); break;
      case 'x':
        _ds_Word_size = atoi(optarg); break;
      case 'v':
        _ds_Verbose++; break;
      case 'w':
        sa_filename = optarg; break;
      case 'f':
        Max_pseudo_anchor_offset = atoi(optarg); break;
      case 'r':
        B2g_ratio = atoi(optarg); break;
      case 'u':
        Update_anchor_ranks = 1; break;
      case 'T':
        Mk_qs_thresh = atoi(optarg); break;
      case 'W':
        lcp_filename = optarg; break;
      case 'B':
        Blind_sort_ratio = atoi(optarg); break;
      case '?':
        fprintf(stderr,"Unknown option: %c -main-\n", optopt);
        exit(1);
      }
    num_opt++;
  }
  if(optind<argc)
    fnam=argv[optind];
  else {
    fprintf(stderr, "Usage:\n\t%s [-b bwtfile][-cuv][-d dist]",argv[0]);
    fprintf(stderr, "[-l len][-p num][-f maxoff][-r ratio]\n");
    fprintf(stderr, 
            "\t   [-T thresh][-w safile][-W lcpfile][-x wsize][-B ratio]");
    fprintf(stderr, " file\n\n");
    fprintf(stderr,"\t-b bwtfile  write bwt to bwtfile\n");    
    fprintf(stderr,
	    "\t-B ratio    blind_sort ratio [def. %d]\n",Blind_sort_ratio);
    fprintf(stderr,"\t-c          check the sa (could be very slow)\n");    
    fprintf(stderr,"\t-d dist     anchor distance [def. %d]\n",Anchor_dist);
    fprintf(stderr,"\t-f maxoff   Maximum offset for forward ");
    fprintf(stderr,"pseudo-anchors [def. %d]\n",Max_pseudo_anchor_offset);
    fprintf(stderr,
            "\t-l len      shallow sort limit [def. %d]\n",Shallow_limit);
    fprintf(stderr,
	    "\t-r ratio    bucket to group max ratio [def. %d]\n",B2g_ratio);
    fprintf(stderr,"\t-p num      print num char of each suffix [def. 0]\n");
    fprintf(stderr,
	    "\t-T thresh   Threshold for mk-qs [def. %d]\n", Mk_qs_thresh);
    fprintf(stderr,"\t-u          updates anchor ranks in get_rank()\n");
    fprintf(stderr,"\t-v          produces a verbose output\n");
    fprintf(stderr,"\t-w safile   write sa to safile\n");    
    fprintf(stderr,
            "\t-W lcpfile  check sa and write lcp to lcpfile (very slow)\n");
    fprintf(stderr,
	    "\t-x wsize    word size in mkqs (default %d)\n\n",_ds_Word_size); 
    return 0;
  }
  if(_ds_Verbose) {
    fprintf(stderr,"Command line: ");
    for(c=0;c<argc;c++)
      fprintf(stderr,"%s ",argv[c]);
    fprintf(stderr,"\n");
  }
  /* -------- check parameters ------------- */
  if(check_global_variables()) {
    exit(1);
  }

  /* ---------- open file and read text ----------- */
  if (! (f=fopen(fnam, "rb"))) {
    perror(fnam);
    return 1;
  }
  if (fseek(f, 0L, SEEK_END)) {
    perror(fnam);
    return 1;
  }
  n=ftell(f);
  if (n==0) {
    fprintf(stderr, "%s: file empty\n", fnam);
    return 0;
  }

  // ------ allocate memory for text and sa -------
  overshoot = compute_overshoot();
  p=malloc((n)*sizeof *p);               // sa
  x=malloc((n+overshoot)*sizeof *x);     // text
  if (! p || ! x) {
    fprintf(stderr, "malloc failed\n");
    return 1;
  }

  // ------------ read input text ---------------
  rewind(f); 
  c=fread(x, (size_t) 1, (size_t) n, f);
  // lseek(fileno(f),0,SEEK_SET); 
  // c=read(fileno(f), x, (size_t) n);
  if(c!=n) {
    fprintf(stderr,"Error in read() (%d vs %d) (main)\n",c,n);
    perror(fnam);
    return 1;
  }
  fclose(f);

  /* ---------  start measuring time ------------- */
  if(_ds_Verbose)
    fprintf(stderr,"Starting sa construction ... \n");
  start_real = times(&r);
  start  = (r.tms_utime+r.tms_stime);     /* user + system */
  ds_ssort(x, p, n);
  end_real = times(&r);
  end  = (r.tms_utime+r.tms_stime);     /* user + system */
 // tot_time =  ((double) (end-start))/CLK_TCK;
  //tot_time_real =  ((double) (end_real-start_real))/CLK_TCK;
  printf("Elapsed time: %.2f seconds (user+sys). Total real time: %.2f.\n", 
	 tot_time, tot_time_real);

  // --------------- write bwt to a file 
  if(bwt_filename!=NULL) 
    write_bwt(bwt_filename,x,p,n);

  // --------------- write sa to a file 
  if(sa_filename!=NULL) 
    write_sa(sa_filename,p,n);

  // --------------- write lcp to a file 
  if(lcp_filename!=NULL) 
    write_lcp(lcp_filename,x,p,n);

  // ------------ check sa --------
  if(check_sa) 
    check_sa_ordering(x,p,n,check_sa);

  // ----- display sa -------
  if(print_sa)   
    print_sa_onscreen(x,p,n,print_sa);

  // deallocate and exit
  free(x); free(p);
   return 0;
}