int build_index(UCHAR *text, ULONG length, char *build_options, void **index){
  char delimiters[] = " =;";
  char filename[256];
  int j,num_parameters;
  char ** parameters;
  int rankb_w=16,rankb_w2=128;
  int free_text=false; /* don't free text by default */
  if (build_options != NULL) {
//    printf("build_options=%s\n",build_options);
    parse_parameters(build_options,&num_parameters, &parameters, delimiters);
    for (j=0; j<num_parameters;j++) {
      if ((strcmp(parameters[j], "samplerate") == 0 ) && (j < num_parameters-1) ) {
        rankb_w=atoi(parameters[j+1]);
        j++;
      } else if  ((strcmp(parameters[j], "samplepsi") == 0 ) && (j < num_parameters-1) ) {
        rankb_w2=atoi(parameters[j+1]);
        j++;
      } else if  ((strcmp(parameters[j], "filename") == 0 ) && (j < num_parameters-1) ) {
        strcpy(filename,parameters[j+1]);
        j++;
      } else if (strcmp(parameters[j], "free_text") == 0 )
        free_text=true;
    }
    free_parameters(num_parameters, &parameters);
  }

   int n=length;
   char fname1[128],fname2[128];

   /* make the SA */
   int  i,  *x, *p;
   int  k, l;
   p= (int *)malloc((n+1)*sizeof *p);
   x= (int *)malloc((n+1)*sizeof *x);
   if (! p || ! x) {
      return 1;
   }
   for ( i=0; i<n; i++) {
     x[i]=text[i];
   }
   l=0;
   k=UCHAR_MAX+1;

   suffixsort(x, p, n, k, l);
   free(x);
   p[0] = n;
   /* End Make SA */

   /* Œ³‚Ì•¶Žš—ñ‚Í 0..n-1 ‚Å n ”Ô–Ú‚É‚Í 0 ‚ª“ü‚éB
      p[0] ‚Í•K‚¸ n ‚É‚È‚éBp[1..n]‚É0..n-1‚ª“ü‚Á‚Ä‚¢‚éB*/
   for (i=0; i<=n; ++i) p[i]++;  /* p[1..n]‚É1..n‚ª“ü‚Á‚Ä‚¢‚éBp[0]=n+1*/

   sprintf(fname1,"%s.psi",filename);
   sprintf(fname2,"%s.idx",filename);
   csa_new(n,p,text,fname1,fname2,rankb_w,rankb_w2);
   free(p);
   if (free_text) free(text);
   load_index(filename, index);
   return 0;
}
Пример #2
0
/* ***************************************************************
   build the suffix array calling the larrson-sadakane algorithm
   ************************************************************** */
int *larsson_sada_sufsort(uchar * s, int size, int alpha_size)
{
  void out_of_mem(char *s);
  void suffixsort(int *x, int *p, int n, int k, int l);
  int *sa, *aux, i;

  sa = (int *) malloc((1+size)*sizeof(int));
  aux = (int *) malloc((1+size)*sizeof(int));
  if(!sa || !aux)
    out_of_mem("larsson_sada_sufsort");

  /* ---- copy text in auxiliary array --------- */
  for(i=0;i<size;i++)
    aux[i] = s[i];   

  /* ----- build sa ---- */
  suffixsort(aux,sa,size,alpha_size,0);
  free(aux);
  return sa+1;      /* discard first position */
}