Beispiel #1
0
/*////////////////////
//Building the Index//
////////////////////*/
int build_index(uchar *text, ulong length, char *build_options, void **index){
/*if (text[length-1]!='\0') return 2;*/
  ulong i, *p;
  long overshoot;
  TSA_Un *_index= (TSA_Un *) malloc(sizeof(TSA_Un));
  uchar *x;
  char delimiters[] = " =;";
  int j,num_parameters;
  char ** parameters;
  int copy_text=false; /* don't copy text by default */
  int free_text=false; /* don't free text by default */
  if (!_index) return 1;
  if (build_options != NULL) {
    parse_parameters(build_options,&num_parameters, &parameters, delimiters);
    for (j=0; j<num_parameters;j++) {
      if (strcmp(parameters[j], "copy_text") == 0 )
        copy_text=true;
      else if (strcmp(parameters[j], "free_text") == 0 )
        free_text=true;
    }
    free_parameters(num_parameters, &parameters);
  }
  /* Consistence of parameters  */
  if ((!copy_text) && (free_text))
    return 5;
  /*                            */
  if ( !copy_text ) {
    _index->text = text;
     _index->own=false;
  } else {
    _index->text = (uchar *) malloc(sizeof(uchar)*length);
    if (!_index->text) return 1;
    for (i=0;i<length;i++) _index->text[i]=text[i];
    _index->own=true;
  }
  if ( free_text )
    free(text);
  
  _index->n=length;

  /* Make suffix array */
  overshoot = init_ds_ssort(500, 2000);
  p= (ulong *) malloc (sizeof(ulong)*(length));
  if (!p) return 1;
  x= (uchar *) malloc (sizeof(uchar)*(length+overshoot));
  if (!x) return 1;
  for (i=0;i<length;i++) x[i]=_index->text[i];
  ds_ssort( x, p, _index->n);
  free(x);

  _index->pos = p;
  (*index) = _index;
  return 0;
}
Beispiel #2
0
Int64 *getSuffixArray(Sequence *seq){
  Int64 overshoot;
  Int64 *sa;
  UChar *textu;

  /* init ds suffix sort routine (cf. DeepShallow/testlcp.c) */
  overshoot = init_ds_ssort(500, 2000);
	if (overshoot == 0) {
    eprintf("ERROR: ds initialization failed.\n");
	}
  sa = (Int64 *)emalloc((size_t)(seq->len + 1) * sizeof(Int64));
  seq->seq = (char *)erealloc(seq->seq, (size_t)(seq->len + overshoot) * sizeof(char));
  textu = (UChar *)seq->seq;
	ds_ssort(textu, (sa + 1), seq->len);
  return sa;
}
Beispiel #3
0
/* ***************************************************************
      1. Read Infile and store it to text[]. 
      2. Compute the bwt and the lcp arrays
      3. Compute an optimal paritioning and compress
   *************************************************************** */
void compress_file(void)
{
  double bwt_partition1(bwt_data *b, int *lcp);
  double pseudo_compr(uint8 *t, int size);
  uint8 *text;
  int *sa, n, overshoot, i, k, extra_bytes;
  int occ[ALPHA_SIZE], *lcp=NULL;
  bwt_data b;
  double start, end, estimate, apost_est=0, apost_est_tot=0;
  int bloque1,bloque2;

  // ----- init ds suffix sort routine -----
  overshoot=init_ds_ssort(500,2000);
  if(overshoot==0)
    fatal_error("ds initialization failed! (compress_file)\n");
  // ----- allocate text and suffix array -----
  n = Infile_size;                               // length of input text
  sa=malloc((n+1)*sizeof *sa);                   // suffix array
  text=malloc((n+overshoot)*sizeof *text);       // text
  if (! sa || ! text) out_of_mem("compress_file");
  // ----- read text and build suffix array ------
  rewind(Infile); 
  i=fread(text, (size_t) 1, (size_t) n, Infile);
  if(i!=n) fatal_error("Error reading the input file!");
  fprintf(stdout,"File size: %d bytes\n",n);
  // ----- build suffix array ----------------
  start = getTime();
  ds_ssort(text,sa+1,n);                         // sort suffixes
  end=getTime();
  fprintf(stdout,"Suffix array construction: %.2f seconds\n",end-start);
  // ---- compute lcp using 6n algorithm ---------
  start = getTime();
  for(i=0;i<ALPHA_SIZE;i++) occ[i]=0;
  for(i=0;i<n;i++) occ[text[i]]++;
  if( (b.bwt = (uint8 *) malloc(n+1)) == NULL)
    out_of_mem("bwtopt1_file");
  _bw_sa2bwt(text, n, sa, &b);
  extra_bytes = _lcp_sa2lcp_6n(text,&b,sa,occ);
  lcp = sa;
  end=getTime();
  fprintf(stdout,"lcp6 construction: %.2f seconds\n",end-start);
  fprintf(stdout,"Total memory for lcp6: %.2fn bytes\n",
	  6+(4.0*extra_bytes)/n);
  // ---- compute the optimal partition ---------
  start = getTime();
  estimate = bwt_partition1(&b,lcp);
  end=getTime();
  bloque1 = 0;
  bloque2 = lcp[0];
  while ( bloque2 != n+1 ) {
    printf("[%7d,%7d]\n", bloque1, bloque2-1);
    for (i=bloque1; i<=bloque2-1; i++)
      printf("%d ", b.bwt[i]);
    printf("\n");
    bloque1 = bloque2;
    bloque2 = lcp[bloque2];
  }
  printf("[%7d,%7d]\n", bloque1,bloque2-1);
  fprintf(stdout,"Optimal partition computation: %.2f seconds\n",end-start);
  // ---- compress --------
  for(k=i=0;i<=n; ) { 
    assert(lcp[i]>i);   // bwt[i] -> bwt[lcp[i]-1] is a segment
    if(Verbose>0) {
      apost_est_tot += apost_est = pseudo_compr(b.bwt+i,lcp[i]-i);
      if(Verbose>1)
	fprintf(stderr,"%d) %d <-> %d: %f bits\n",k,i,lcp[i]-1,apost_est);
    }
    i = lcp[i];      // starting point of next segment
    k++;             // increase # of segment 
  } 
  assert(i==n+1);
  fprintf(stdout, "Number of partitions: %d\n",k);
  fprintf(stdout, "Estimated compressed size: %lf (not reliable)\n",estimate);
  if(Verbose>0) {
    fprintf(stdout, "A posteriori estimate: %lf  ",apost_est_tot);
    fprintf(stdout, "Delta %lf (should be zero)\n",apost_est_tot-estimate);
  }
  free(b.bwt);
  free(text);
  free(sa);
}
Beispiel #4
0
ESA read_ESA_from_file(char *pFileName, unsigned char **ppExtraData, int *pDataRead)
{
	struct ESAFileFormat ff;
	int overshoot;	
	ESA esa = malloc(sizeof(*esa));
	if(!esa)
	{
		setError("Couldn't allocate memory for ESA.\n");		
		return NULL;
	}
	
	unsigned char *text;
	FILE *f;
	int n;
	
	f = fopen(pFileName, "r");
	if(!f)
	{
		char st[512];
		sprintf(st, "Could not open '%s' for reading.", pFileName);
		setError(st);
		free(esa);
		return NULL;
	}		
	
    if(fread(&ff, sizeof(struct ESAFileFormat), 1, f) != 1)
    {
    	setError("An error occurred reading the file.");
    	free(esa);
    	fclose(f);
    	return NULL;
    }
    
    if(strncmp(ff.ID, HEADERNAME, HEADERLENGTH) != 0)
    {
    	setError("Header name mismatch in ESA structure file.");
    	free(esa);
    	fclose(f);    	    	
		return NULL;
    }
    	
    if(ff.major != MAJOR_VERSION)
    {
    	setError("Incompatible version of the ESA structure file.");    	
    	free(esa);
    	fclose(f);
		return NULL;
    } 
    
    esa->alphabetSize = ff.alphabetSize;
    esa->alphabet = malloc( (esa->alphabetSize + 1) * sizeof(char));
    if(!esa->alphabet)
    {
    	setError("Couldn't allocate space for alphabet.");
    	free(esa);
    	fclose(f);
    	return NULL;
    }
    
    strncpy(esa->alphabet, ff.alphabet, ff.alphabetSize);
    esa->alphabet[esa->alphabetSize] = '\0';

    esa->ignoreAlphabetSize = ff.ignoreAlphabetSize;
    esa->ignoreAlphabet = malloc( (esa->ignoreAlphabetSize + 1) * sizeof(char));    
    if(!esa->ignoreAlphabet)
    {
    	setError("Couldn't allocate space for ignore alphabet.");
    	free(esa->alphabet);
    	free(esa);
    	fclose(f);
    	return NULL;   	
    }
    
    strncpy(esa->ignoreAlphabet, ff.ignoreAlphabet, ff.ignoreAlphabetSize);
    esa->ignoreAlphabet[esa->ignoreAlphabetSize] = '\0';
    
    n = esa->size = ff.size;
    
	overshoot=init_ds_ssort(500,2000);	
	text=malloc((n + overshoot)*sizeof *text);	
	
	if(!text)
	{
		setError("Couldn't allocate space for text.");
    	free(esa->alphabet);
    	free(esa->ignoreAlphabet);
    	free(esa);
    	fclose(f);
    	return NULL;   	
	}
	
    if(fread(text, sizeof(unsigned char), ff.size+1, f) != ff.size+1)
    {
    	setError("Couldn't read text from file.");
    	free(text);
    	free(esa->alphabet);
    	free(esa->ignoreAlphabet);
    	free(esa);
    	fclose(f);
    	return NULL;   	
    }
    
	esa->pStr = text;
    
	esa->suf = malloc(sizeof(int) * n);
	if(!esa->suf)
	{
		setError("Couldn't allocate memory for suf column in suffix array.");		
    	free(text);
    	free(esa->alphabet);
    	free(esa->ignoreAlphabet);
    	free(esa);
    	fclose(f);
    	return NULL;   	
	}
	
    if(fread(esa->suf, sizeof(int), ff.size, f) != ff.size)
    {
		setError("Couldn't read suffix column from file.");		
    	free(esa->suf);
    	free(text);
    	free(esa->alphabet);
    	free(esa->ignoreAlphabet);
    	free(esa);
    	fclose(f);
    	return NULL;  	
    }
    
	esa->lcp = malloc(sizeof(unsigned char) * n);	    
	if(!esa->lcp)
	{
		setError("Couldn't allocate memory for lcp column in suffix array.");		
    	free(esa->suf);
    	free(text);
    	free(esa->alphabet);
    	free(esa->ignoreAlphabet);
    	free(esa);
    	fclose(f);
    	return NULL;  	
	}
	
   if(fread(esa->lcp, sizeof(unsigned char), ff.size, f) != ff.size)
   {
   		setError("Couldn't read lcp from file.");		
   		free(esa->lcp);
    	free(esa->suf);
    	free(text);
    	free(esa->alphabet);
    	free(esa->ignoreAlphabet);
    	free(esa);
    	fclose(f);
    	return NULL;  	
   }

	esa->skip = malloc(sizeof(int) * n);	
	if(!esa->skip)
	{
		setError("Couldn't allocate space for skip column in suffix array.");		
   		free(esa->lcp);
    	free(esa->suf);
    	free(text);
    	free(esa->alphabet);
    	free(esa->ignoreAlphabet);
    	free(esa);
    	fclose(f);
    	return NULL;  	
	}
	
	if(calcSkip(esa) == 0)
	{
		free(esa->skip);
		free(esa->lcp);
    	free(esa->suf);
    	free(text);
    	free(esa->alphabet);
    	free(esa->ignoreAlphabet);
    	free(esa);
    	fclose(f);
    	return NULL;  	
	}
	
    // Read extra data if it was asked for and if it is present.
	if(ppExtraData != NULL) {
		*pDataRead = ff.nExtraData;
		if(*pDataRead != -1)
		{
			*ppExtraData = malloc(sizeof(unsigned char) * ff.nExtraData);
			if(*ppExtraData == NULL)
			{
				setError("Error - couldn't allocate space for extra data.\n");
				free(esa->skip);
				free(esa->lcp);
		    	free(esa->suf);
		    	free(text);
		    	free(esa->alphabet);
		    	free(esa->ignoreAlphabet);
		    	free(esa);
		    	fclose(f);
				return NULL;
			}
			if(fread(*ppExtraData, sizeof(unsigned char), ff.nExtraData, f) != (unsigned int)ff.nExtraData)
			{
				setError("Error reading extra data.\n");
				free(*ppExtraData);
				free(esa->skip);
				free(esa->lcp);
		    	free(esa->suf);
		    	free(text);
		    	free(esa->alphabet);
		    	free(esa->ignoreAlphabet);
		    	free(esa);
		    	fclose(f);
				return NULL;
			}
		} else {
			*ppExtraData = NULL;
		}
	}

	fclose(f);

	return esa;
}
Beispiel #5
0
ESA build_ESA(char *pStr, int size, char *pAlphabet, char *pIgnore, int free_pStr) {

        // Check if the string includes a zero termination
        if(pStr[size] != '\0') {
	   setError("The string MUST include a zero termination within the size\n");
	   if(free_pStr)
	     free(pStr);
	   return NULL;
	}

	initTimer();

	int overshoot;
	ESA esa = malloc(sizeof(*esa));
	if(!esa)
	{
		setError("Couldn't allocate memory for ESA.\n");
		if(free_pStr)
		{
			free(pStr);
			freeTimer();
		}
	  	return NULL;
	}
	unsigned char *text;
	int n = size + 1; // Include the zeroterninatin in the string

	// Calculate the overshoot
	overshoot=init_ds_ssort(500,2000);	

	text = malloc((n + overshoot)*sizeof *text);	
	if(!text)
	{
		setError("Couldn't allocate memory for translated text.\n");
		free(esa);
		if(free_pStr)
		{
			free(pStr);
			freeTimer();
		}
		return NULL;
	}


	// Translate the text and stop if it fails
	if(! translate(text, pStr, n-1, pAlphabet, pIgnore) ) {
	  free(text);
	  free(esa);
	  if(free_pStr)
	    free(pStr);
	  freeTimer();
	  return NULL;
	}

	// Free pStr if possible
	if(free_pStr)
	  free(pStr);

	// Save the text, alphabet and size in the esa structure
	setStr(esa, text);
	setSize(esa, n);
	setAlphabetSize(esa, strlen(pAlphabet));
	setIgnoreAlphabetSize(esa, strlen(pIgnore));
	setAlphabet(esa, pAlphabet);
	setIgnoreAlphabet(esa, pIgnore);
	
	addTimer("Initializing");
	
	// Do the sorting, calc. lcp and calc. skip
	esa->suf = malloc(sizeof(int) * n);
	if(!esa->suf)
	{
		free(text);
		free(esa);
		freeTimer();
		setError("Couldn't allocate memory for suffix column in suffix array.\n");
		return NULL;
	}

	ds_ssort(esa->pStr, esa->suf, n, MAXPSSMSIZE);
	addTimer("DS-Sort");
	
	esa->lcp = malloc(sizeof(unsigned char) * n);	
	if(!esa->lcp)
	{
		setError("Couldn't allocate memory for LCP column in suffix array.\n");				
		free(esa->suf);
		free(text);
		free(esa);
		freeTimer();
		return NULL;
	}
	
	calcLcpNaiv(esa);
	addTimer("Calc Lcp");
	
	// The line below can be commented in to verify that there are "errors" in the suffix array
	// it will scan the array for errors and report the minimum depth at which an error was found
	// the last parameter specifies the max depth to search to).
	// As a side effect it calculates lcp (when used for this purpose the depth parameter should equa
	// that used when calling ds_ssort).
	// verifyNaively(esa, n, MAX_DEPTH);

	esa->skip = malloc(sizeof(int) * n);	
	if(!esa->skip)
	{
		setError("Couldn't allocate memory for SKIP column in suffix array.\n");						
		free(esa->lcp);
		free(esa->suf);
		free(text);
		free(esa);
		freeTimer();
		return NULL;		
	}
	
	if(calcSkip(esa) == 0)
	{
		free(esa->skip);
		free(esa->lcp);
		free(esa->suf);
		free(text);
		free(esa);
		freeTimer();
		return NULL;
	}
	addTimer("Calc Skip");
	printTimer();
	freeTimer();

	return esa;
}
Beispiel #6
0
int main(int argc, char* argv[])
{
	char datafname[FNAMEBUFSIZE];
	char offsetfname[FNAMEBUFSIZE];
	int i = 0;
	int dataLen;
	int numSamples;

	int overshoot;

	unsigned char* data;
	int* offsets;
	TokenVectorHndl tvHead, tvTemp;


	if( argc < 4 ){
		fprintf( stderr, "Error: Usage 'token <datadir> <min_token_len> <min_occ_ratio>'\n" );
		exit(-1);
	}
	
	snprintf(datafname,FNAMEBUFSIZE,"%s/data",argv[1]);
	snprintf(offsetfname,FNAMEBUFSIZE,"%s/offsets",argv[1]);

	unsigned int min_token_len=atoi(argv[2]);
	double min_occ_ratio=atof(argv[3]);	

	dataLen = get_filesize(datafname);
	numSamples = (int)(get_filesize(offsetfname) / sizeof(int)) ;

	overshoot = init_ds_ssort( 500, 2000 );

	data = (unsigned char*)calloc( dataLen+overshoot, sizeof(unsigned char) );
	offsets = (int*)calloc( numSamples + 1, sizeof(int) );

	if( !loadSamplesDataIntoMemory( numSamples, data, offsets, datafname, offsetfname ) ){
		fprintf(stderr, "%s: Error loading samples into memory.\n", __FUNCTION__);
		return -1;
	}

	offsets[numSamples] = dataLen;

//	fprintf(stderr, "DONE!\n");
	tvHead = TokenExtraction( data, dataLen, offsets, numSamples, min_token_len, (unsigned int)(numSamples*min_occ_ratio));

	
	tvTemp=tvHead;
	while( tvTemp != NULL ){
		//printf("TOKEN:'");
		printf("'");
		print_str( tvTemp->string, tvTemp->strLen );

		printf("'\t");
		//printf("TOKENVECTOR ");
		//for( i = 0; i < numSamples; i++ )
		//{
		//	if( tvTemp->occuranceVector[i] != 0 ){
		//		printf("%d:%d  ", i, tvTemp->occuranceVector[i] );
		//	}
		//}
		//printf("\n");

		tvTemp = tvTemp->next;
	}

	DeleteTokenVectorList(tvHead);

	return 0;
}
Beispiel #7
0
/*////////////////////
//Building the Index//
////////////////////*/
int build_index(uchar *text, ulong length, char *build_options, void **index) {
    /*if (text[length-1]!='\0') return 2;*/
    ulong i, *p, *sa_diff;
    long overshoot;
    TSA_Un *_index= (TSA_Un *) malloc(sizeof(TSA_Un));
    uchar *x;
    FILE *f;
    char fnamext[1024];
    char fnameaux[1024];
    char delimiters[] = " =;";
    int j,num_parameters;
    char ** parameters;
    int copy_text=false;           /* don't copy text by default */
    int free_text=false;           /* don't free text by default */
    int withload=false;           /* don't load SA and BPE by default */
    int samplerate=64;             /* samplerate for bpe */
    int max_phrase=256;
    bool verbose=false;
    double cutoff=100.0;
    bool SA_treap=true,SA_psi=false;
    if (!_index) return 1;
    if (build_options != NULL) {
        parse_parameters(build_options,&num_parameters, &parameters, delimiters);
        for (j=0; j<num_parameters; j++) {
            if (strcmp(parameters[j], "copy_text") == 0 )
                copy_text=true;
            else if (strcmp(parameters[j], "withload") == 0 )
                withload=true;
            else if (strcmp(parameters[j], "filename") == 0 ) {
                strcpy(fnamext,parameters[j+1]);
                j++;
            } else if ((strcmp(parameters[j], "samplerate") == 0 ) && (j < num_parameters-1) ) {
                samplerate=atoi(parameters[j+1]);
                j++;
            } else if ((strcmp(parameters[j], "max_phrase") == 0 ) && (j < num_parameters-1) ) {
                max_phrase=atoi(parameters[j+1]);
                j++;
            } else if ((strcmp(parameters[j], "cutoff") == 0 ) && (j < num_parameters-1) ) {
                cutoff=atof(parameters[j+1]);
                j++;
            } else if (strcmp(parameters[j], "free_text") == 0 )
                free_text=true;
            else if (strcmp(parameters[j], "verbose") == 0 )
                verbose=true;
            else if (strcmp(parameters[j], "SA_treap") == 0 ) {
                SA_treap=true;
                SA_psi=false;
            } else if (strcmp(parameters[j], "SA_psi") == 0 ) {
                SA_treap=false;
                SA_psi=true;
            }
        }
        free_parameters(num_parameters, &parameters);
    }
    //printf("samplerate = %lu\n",samplerate);
    /* Consistence of parameters  */
    if ((!copy_text) && (free_text))
        return 5;
    /*                            */
    if ( !copy_text ) {
        _index->text = text;
        _index->own=false;
    }
    else {
        _index->text = (uchar *) malloc(sizeof(uchar)*length);
        if (!_index->text) return 1;
        for (i=0; i<length; i++) _index->text[i]=text[i];
        _index->own=true;
    }
    if ( free_text )
        free(text);

    _index->n=length;

    /* Make suffix array */
    if (withload) {
        ulong filename_len;
        p= (ulong *) malloc (sizeof(ulong)*(length));
        if (!p) return 1;
        sprintf (fnameaux,"%s.sa",fnamext);
        f = fopen (fnameaux,"r");
        if (fread (&filename_len,sizeof(ulong),1,f) != 1) return 25;
        assert(filename_len==_index->n);
        if (fread (p,sizeof(ulong),filename_len,f) != filename_len) return 25;
        if (fclose(f) != 0) return 28;
    } else {
        overshoot = init_ds_ssort(500, 2000);
        p= (ulong *) malloc (sizeof(ulong)*(length));
        if (!p) return 1;
        x= (uchar *) malloc (sizeof(uchar)*(length+overshoot));
        if (!x) return 1;
        for (i=0; i<length; i++) x[i]=_index->text[i];
        ds_ssort( x, p, _index->n);
        free(x);
    }

    /* Make bpe */
    if (withload && false ) {
        int error;
        sprintf (fnameaux,"%s.bpe",fnamext);
        f = fopen (fnameaux,"r");
        _index->bpe = new BPE(f,&error);
        if (error !=0) return error;
        if (fclose(f) != 0) return 28;
    } else {
        if (SA_treap) {
            sa_diff= (ulong *) malloc (sizeof(ulong)*(length+3));
            if (!sa_diff) return 1;
            for (i=0; i<length-1; i++) {
                assert(p[i+1]-p[i]+length>0);
                sa_diff[i+1]=p[i+1]-p[i]+length;
            }
            free(p);
            ulong maximo=0;
            for (i=0; i<length-1; i++) {
                if (maximo < sa_diff[i+1]) maximo=sa_diff[i+1];
            }
            sa_diff[0]=maximo+1;
            sa_diff[length+1]=maximo+2;
            sa_diff[length+2]=maximo+3;

            _index->bpe = new BPE(sa_diff,length-1+3, max_phrase, cutoff, verbose);
        }
        if (SA_psi) {
            ulong *ip= (ulong *) malloc (sizeof(ulong)*(length));
            for (i=0; i<length; i++) ip[p[i]] = i;
            for (i=0; i<length; i++) assert(ip[p[i]] == i);
            ulong *Psi= (ulong *) malloc (sizeof(ulong)*(length));
            for (i=0; i<length; i++) if (p[i] == length-1) Psi[i] = ip[0];
                else Psi[i] = ip[p[i]+1];

            ulong ini=ip[0];
            free(ip);

            sa_diff= (ulong *) malloc (sizeof(ulong)*length);
            if (!sa_diff) return 1;
            for (i=0; i<length-1; i++) {
                assert(p[i+1]-p[i]+length>0);
                sa_diff[i]=p[i+1]-p[i]+length;
            }
            free(p);
            _index->bpe = new BPE(sa_diff,Psi,ini,length-1,verbose);
        }
    }

    /* Make suffix array again */
    if (withload) {
        ulong filename_len;
        p= (ulong *) malloc (sizeof(ulong)*(length));
        if (!p) return 1;
        sprintf (fnameaux,"%s.sa",fnamext);
        f = fopen (fnameaux,"r");
        if (fread (&filename_len,sizeof(ulong),1,f) != 1) return 25;
        assert(filename_len==_index->n);
        if (fread (p,sizeof(ulong),filename_len,f) != filename_len) return 25;
        if (fclose(f) != 0) return 28;
    } else {
        overshoot = init_ds_ssort(500, 2000);
        p= (ulong *) malloc (sizeof(ulong)*(length));
        if (!p) return 1;
        x= (uchar *) malloc (sizeof(uchar)*(length+overshoot));
        if (!x) return 1;
        for (i=0; i<length; i++) x[i]=_index->text[i];
        ds_ssort( x, p, _index->n);
        free(x);
    }

    /*
    ////////////////////////////////////////////////////
          sa_diff= (ulong *) malloc (sizeof(ulong)*length);
          for (i=0;i<length-1;i++){
             assert(p[i+1]-p[i]+length>0);
             sa_diff[i]=p[i+1]-p[i]+length;
          }
          ulong *z2;
          z2=_index->bpe->dispairall();
          printf("Check SA_diff todo\n");
          for (i=0;i<length-1;i++){
            if (z2[i]-sa_diff[i] !=0) {printf("%lu, %lu         %lu,%lu\n",i, z2[i]-sa_diff[i],z2[i],sa_diff[i]);fflush(stdout);}
          }
          printf("End Check SA_diff todo\n");
          free(z2);

          printf("End Check SA_diff 2\n");
          for (ulong mmm=1; mmm < 5000; mmm++) {
            printf("Check SA_diff %lu ",mmm);
            for (i=0;i<length-1-(mmm-1);i++){
              z2=_index->bpe->dispair(i,mmm);
              //if (i % (n/10) == 0)  {printf("C2 %lu\n",i);fflush(stdout);}
              for (ulong mm =1 ; mm <= mmm; mm++)
                    if (z2[mm]-sa_diff[i+mm-1] !=0) {printf("T%lu %lu, %lu         %lu,%lu\n",mm,i, z2[mm]-sa_diff[i+mm-1],z2[mm],sa_diff[i+mm-1]);fflush(stdout);}
              free(z2);
            }
            printf("End Check SA_diff %lu\n",mmm);fflush(stdout);
          }

          free(sa_diff);



    /////////////////////////////////////////////////////////
    */
    /* Make samplerate */

    _index->samplerate = samplerate;
    _index->ns = (length-1)/samplerate+1;
    if (((length-1) % samplerate) != 0) _index->ns++;
    _index->pos = (ulong *) malloc (sizeof(ulong)*_index->ns);
    //_index->pos[0]=p[0];
    j=0;
    for (i=0; i < length ; i+=samplerate) {
        if (i != length-1) {
            //if (p[_index->bpe->BR->prev(i)] != _index->pos[j-1]) {
            _index->pos[j]=p[_index->bpe->BR->prev(i)];
            j++;
            // }
        } else {
            _index->pos[j]=p[i];
            j++;
        }
    }
    if (((length-1) % samplerate) != 0) _index->pos[j]=p[length-1];
    _index->ns=j+1;


    /*
      _index->samplerate = samplerate;
      _index->ns = (length-1)/samplerate+1;
      if (((length-1) % samplerate) != 0) _index->ns++;
      _index->pos = (ulong *) malloc (sizeof(ulong)*_index->ns);
      j=0;
      for (i=0; i < length ; i+=samplerate) {
        _index->pos[j]=p[i];
        j++;
      }
      if (((length-1) % samplerate) != 0) _index->pos[j]=p[length-1];

      assert(j+1==_index->ns);
    */

    free(p);
    (*index) = _index;
    return 0;
}