Exemplo n.º 1
0
i64 psi12_makeindex(CSA *csa, char *fname)
{
i64 psize,psize1,psize2;
i64 b, b2;
i64 i,j,x,xx;
i64 y,d,w;
int k;
FILE *f1,*f2;
char *fpsi, *fpsd;
//psi1_iterator *pi;
i64 runlen;
i64 maxrun;
i64 n,L;
psi1 *ps;
int id,id2;
FILE *out;
diskbuf *psi;
char *fbw, *flst;
SPARSEARRAY sx, sb;
int mm;

  ps = (psi1 *)csa->psi_struc;
  id = ps->id;
  id2 = id & 0x3f;

  k = strlen(fname);
  fbw = mymalloc(k+5);
  flst = mymalloc(k+5);
  fpsi = mymalloc(k+5);
  fpsd = mymalloc(k+5);
  sprintf(fbw,"%s.bw",fname);
  sprintf(flst,"%s.lst",fname);

  switch (id2) {
  case ID_DIFF_GAMMA_RR:
    sprintf(fpsi,"%s.pxi",fname);
    sprintf(fpsd,"%s.pxd",fname);
    break;
  }

  out = create_tmp(0);
  bw_to_psi(out,csa,fbw,flst,&k);
  psi = open_diskbuf(out,k);
  ps->last = getint_diskbuf(psi,0);
  printf("last = %ld\n",ps->last);

  n = csa->n;
  L = ps->L;
  if (L >= n) {
    printf("L=%ld >= n=%ld\n",L,n);
    exit(0);
  }


  maxrun = L;


  mkdecodetable();

  f1 = fopen(fpsi,"wb");
  psize1 = 0;

  f2 = fopen(fpsd,"wb");
  psize2 = 0;

  ps->k = k = (blog(n+1)+1+8-1)/8;

//  pi = psi1_iterator_new(ps,0);


  writeint(1,ID_PSI,f2);
  writeint(1,k,f2); /* #bytes of integer */
  writeint(k,n,f2);
  writeint(k,L,f2);
  psize2 += 1+1+2*k;

  writeint(1,id,f2);
  psize2 += 1;

  if (id & ID_COMPPTR) {
    mm = 0;
    for (i=0; i<SIGMA; i++) {
      if (csa->C[i] > 0) mm++;
    }
    SPARSEARRAY_construct_init(&sx, (mm+1)*(n+1), n/L+1);
    SPARSEARRAY_construct_init(&sb, n, n/L+1);
  }



  b = b2 = 0;
  mm = 0;  xx = 0;
  for (j=0; j<=n/L; j++) {
//    display_progressbar("writing psi ",j,n/L);
    if (j % 100000 == 0) {
      printf("%ld %1.3f bpc\r",j,(double)psize2*8/(j+1)/L);  fflush(stdout);
    }
    y = getint_diskbuf(psi,j*L);

    if (id & ID_COMPPTR) {
      if (y <= xx) {
        mm++;
      }
      SPARSEARRAY_construct_set(&sx, j, mm*(n+1) + y);
      SPARSEARRAY_construct_set(&sb, j, b);
      xx = y;
    } else {
//      printf("%ld   x=%ld   sp=%ld\n",j,y,b);
      writeint(k,y,f2);
      writeint(k,b,f2);
      psize2 += 2*k;
    }

    x = y;
    runlen = 0;
    b2 = 0;
    for (i=j*L+1; i<(j+1)*L && i <= n; i++) { /* psi[j*L] are not encoded */
      y = getint_diskbuf(psi,i);
      d = y - x;
      if (d <= 0) {
        d += n+1;
      }
      if (i == j*L+1) {
        if (d == 1) {setbit(Btmp,b2+1,1);  runlen = 1;}
        else {setbit(Btmp,b2+1,0);  runlen = 0;}
        b2++;
      }
      if (d > 1) {
        if (runlen>0) {
//          printf("d=%ld encode runlen=%ld\n",d,runlen);
          w = ENCODENUM(Btmp,b2,runlen);
          b2 += w;
          runlen = 0;
        }
//        printf("encode d-1=%ld\n",d-1);
        w = ENCODENUM(Btmp,b2,d-1);
        b2 += w;
        runlen = 1;
      } else {
        runlen++;
      }
      x = y;
    }
    if (runlen>0) {
//      printf("encode runlen=%ld\n",runlen);
      w = ENCODENUM(Btmp,b2,runlen);
      b2 += w;
      runlen = 0;
    }
    fwrite(Btmp,(b2+15) / 16,sizeof(short),f1);
    psize1 += (b2+15)/16*sizeof(short);
    b += (b2+15) / 16;
    b2 = 0;
  }
  if (b2 > 0) {
    fwrite(Btmp,(b2+15) / 16,sizeof(short),f1);
    psize1 += (b2+15)/16*sizeof(short);
  }
  fwrite(Btmp,1,sizeof(short),f1); // getbitDで1ワード余計に読むため
  psize1 += 1*sizeof(short);

  if (id & ID_COMPPTR) {
    SPARSEARRAY_construct_end(&sx, SDARRAY_SELECT1);
    SPARSEARRAY_construct_end(&sb, SDARRAY_SELECT1);
    SPARSEARRAY_write(&sx, f2);
    SPARSEARRAY_write(&sb, f2);
  }

  psize = psize1 + psize2;
  printf("size %ld (%1.3f bpc)\n",psize,(double)psize*8 / n);

  fclose(f1);
  fclose(f2);

//  psi1_iterator_remove(pi);

  close_diskbuf(psi);
  fclose(out);
  remove_tmp(0);

  psi1_read(csa, fpsd);

  free(fpsi);
  free(fpsd);
  free(fbw);
  free(flst);

  return psize;
}
Exemplo n.º 2
0
	void csa_new(int n, int *p, unsigned char *s, char *fname1, char *fname2, int rankb_w, int rankb_w2) {
		int i,v,b,x,b2,d,w,m;
		int *I,*J;
		int K[SIGMA+2],C[SIGMA+1],C2[SIGMA+1];
		unsigned short Btmp[64];
		FILE *f1,*f2;
		int psize,isize;

		f1 = fopen(fname1,"wb"); /* psi */
		f2 = fopen(fname2,"wb"); /* directory */
		if (f1 == NULL || f2 == NULL) {
			perror("csa2_new1: ");
			exit(1);
		}

		for (i=0; i<SIGMA; ++i) {
			C[i] = 0;
		}
		for (i=0; i<n; ++i) {
			C[s[i]]++;
		}

		for (m=0,v=1,i=0; i<SIGMA; i++) {
			if (C[i]>0) {
				m++;
				C2[m] = i;
				K[m] = v;
				v += C[i];
			}
		}
		K[m+1] = v;

		for (v=0,i=0; i<SIGMA; i++) {
			v = v + C[i];
			C[i] = v;
		}

		psize = isize = 0;

		writeint(n,f2);			 /* eLXg */
		writeint(rankb_w2,f2);	 /* psii[ */
		writeint(rankb_w,f2);	 /* SAi[ */
								 /* ISAi[ */
		writeint((rankb_w*16),f2);
		writeint(SIGMA,f2);		 /* At@xbgTCY */
		writeint(m,f2);			 /*  */
		isize += 6*sizeof(int);

		for (i = 0; i < SIGMA; i++) {
			writeint(C[i],f2);	 /* -> */
		}
		isize += SIGMA*sizeof(int);
		for (i = 1; i <= m+1; i++) {
			writeint(K[i],f2);	 /* px */
		}
		isize += (m+1)*sizeof(int);
		for (i = 1; i <= m; i++) {
			writeint(C2[i],f2);	 /* ->R[h */
		}
		isize += m*sizeof(int);

		I=(int *)malloc((n+2) * sizeof(*I));
		if (I==NULL) {
			fprintf(stderr, "psi_new2 malloc I failed\n");
			exit(1);
		}

		psisort2(p,I,s-1,n);

		writeint(-1,f2);		 /* R[0] */
		writeint(0,f2);			 /* P[0] */
		isize += 2*sizeof(int);

		x = -1;  b = b2 = 0;
		for (i=1; i<=n; i++) {
			if (I[i] < x) {
				d = (n+65536) - x;
			}
			else {
				d = I[i] - x;
			}
			w = ENCODENUM(Btmp,b2,d);
			b += w;  b2 += w;
			if (b2 >= 16) {
				fwrite(Btmp,b2 / 16,sizeof(short),f1);
				psize += (b2/16)*sizeof(short);
				Btmp[0] = Btmp[b2 / 16];
				b2 = b2 % 16;
			};
			if (I[i] < x) {
				x = -1;
				i--;
			}
			else {
				x = I[i];
				if (i % rankb_w2 == 0) {
								 /* R[i / L] */
					writeint(I[i],f2);
								 /* P[i / L] */
					writeint(b,f2);
					isize += 2*sizeof(int);
				}
			}
		}
		if (b2 > 0) {
			fwrite(Btmp,(b2+15) / 16,sizeof(short),f1);
			psize += ((b2+15)/16)*sizeof(short);
		};

		writeint(n+1,f2);		 /* SA[0] */
		isize += sizeof(int);
		for (i=rankb_w; i<=n; i+=rankb_w) {
			writeint(p[i],f2);
			isize += sizeof(int);
		}
		J = (int *)malloc(((n-1)/(rankb_w*16)+1)*sizeof(*J));
		if (J==NULL) {
			perror("csa2_new\n");
			exit(1);
		}
		for (i=1; i<=n; i++) {
			if ((p[i]-1) % (rankb_w*16) == 0) {
				J[(p[i]-1) / (rankb_w*16)] = i;
			}
		}
		for (i = 0; i <= (n-1)/(rankb_w*16); i++) {
			writeint(J[i],f2);
			isize += sizeof(int);
		}
		fclose(f1);
		fclose(f2);

		free(I);
		free(J);

	}