Пример #1
0
void bw_to_psi(FILE *out, CSA *csa, char *fbw, char *flst, int *k)
{
	FILE *in;
	i64 last,i,j;
	i64 C2[SIGMA];
	i64 c;

	in = fopen(flst,"r");
	if (in == NULL) {
		perror("bw_to_psi:");  exit(1);
	}
	fscanf(in,"%ld",&last);
	fclose(in);

	for (c=0; c<SIGMA; c++) {
		csa->C[c] = 0;
	}

	in = fopen(fbw,"r");
	if (in == NULL) {
		perror("bw_to_psi:");  exit(1);
	}
	csa->n = 0;
	while (1) {
		display_progressbar("reading ",csa->n,0L);
		c = fgetc(in);
		if (c == EOF) break;
		csa->C[c]++;
		csa->n++;
	}
	rewind(in);
	printf("n = %ld last = %ld\n",csa->n,last);

	*k = (blog(csa->n+1)+1+8-1)/8;
	for (c=0; c<SIGMA; c++) {
		psi[c] = open_diskbuf(out,*k);
	}
	for (j=1,c=0; c<SIGMA; c++) {
		C2[c] = j;
		j += csa->C[c];
	}

	for (i = 0; i<=csa->n; i++) {
		display_progressbar("computing psi ",i,csa->n);
		if (i == last) {
			setint_diskbuf(psi[0], 0, i);
		} else {
			c = fgetc(in);
			setint_diskbuf(psi[c], C2[c]++, i);
		}
	}
	fclose(in);
	for (c=0; c<SIGMA; c++) {
		close_diskbuf(psi[c]);
	}
}
Пример #2
0
void bw_to_psi(FILE *out, CSA *csa, char *fbw, char *flst, int *k)
{
  FILE *in;
  i64 last,i,j;
  i64 *C2;
  i64 c;
  diskbuf **psi;
  int sigma;
  int k2;

  in = fopen(flst,"r");
  if (in == NULL) {
    perror("bw_to_psi:");  exit(1);
  }
  fscanf(in,"%ld",&last);
  fclose(in);

  sigma = csa->sigma;
  k2 = csa->k2;
  csa->C = mymalloc(sizeof(*csa->C)*sigma);

  C2 = mymalloc(sizeof(*C2)*sigma);

  for (c=0; c<sigma; c++) {
    csa->C[c] = 0;
  }

  in = fopen(fbw,"r");
  if (in == NULL) {
    perror("bw_to_psi:");  exit(1);
  }
  csa->n = 0;
  while (1) {
    display_progressbar("reading ",csa->n,0L);
//    c = fgetc(in);
    c = readint(k2,in);
    if (c == EOF) break;
    if (c >= sigma) {
      printf("bw_to_psi: c = %d sigma = %d\n", c, sigma);
      exit(1);
    }
    csa->C[c]++;
    csa->n++;
  }
  rewind(in);
  printf("n = %ld last = %ld\n",csa->n,last);

  psi = mymalloc(sizeof(diskbuf)*sigma);

  *k = (blog(csa->n+1)+1+8-1)/8;
  for (c=0; c<sigma; c++) {
    psi[c] = open_diskbuf(out,*k);
  }
  for (j=1,c=0; c<sigma; c++) {
    C2[c] = j;
    j += csa->C[c];
  }

  for (i = 0; i<=csa->n; i++) {
    display_progressbar("computing psi ",i,csa->n);
    if (i == last) {
      setint_diskbuf(psi[0], 0, i);
    } else {
//      c = fgetc(in);
      c = readint(k2,in);
      setint_diskbuf(psi[c], C2[c]++, i);
    }
  }
  fclose(in);
  for (c=0; c<sigma; c++) {
    close_diskbuf(psi[c]);
  }
  free(psi);
  free(C2);
}
Пример #3
0
i64 psi12_makeindex(CSA *csa, char *fname)
{
i64 psize,psize1,psize2;
i64 b, b2;
i64 i,j,x,xx;
i64 y,d,w;
int k;
FILE *f1,*f2;
char *fpsi, *fpsd;
//psi1_iterator *pi;
i64 runlen;
i64 maxrun;
i64 n,L;
psi1 *ps;
int id,id2;
FILE *out;
diskbuf *psi;
char *fbw, *flst;
SPARSEARRAY sx, sb;
int mm;

  ps = (psi1 *)csa->psi_struc;
  id = ps->id;
  id2 = id & 0x3f;

  k = strlen(fname);
  fbw = mymalloc(k+5);
  flst = mymalloc(k+5);
  fpsi = mymalloc(k+5);
  fpsd = mymalloc(k+5);
  sprintf(fbw,"%s.bw",fname);
  sprintf(flst,"%s.lst",fname);

  switch (id2) {
  case ID_DIFF_GAMMA_RR:
    sprintf(fpsi,"%s.pxi",fname);
    sprintf(fpsd,"%s.pxd",fname);
    break;
  }

  out = create_tmp(0);
  bw_to_psi(out,csa,fbw,flst,&k);
  psi = open_diskbuf(out,k);
  ps->last = getint_diskbuf(psi,0);
  printf("last = %ld\n",ps->last);

  n = csa->n;
  L = ps->L;
  if (L >= n) {
    printf("L=%ld >= n=%ld\n",L,n);
    exit(0);
  }


  maxrun = L;


  mkdecodetable();

  f1 = fopen(fpsi,"wb");
  psize1 = 0;

  f2 = fopen(fpsd,"wb");
  psize2 = 0;

  ps->k = k = (blog(n+1)+1+8-1)/8;

//  pi = psi1_iterator_new(ps,0);


  writeint(1,ID_PSI,f2);
  writeint(1,k,f2); /* #bytes of integer */
  writeint(k,n,f2);
  writeint(k,L,f2);
  psize2 += 1+1+2*k;

  writeint(1,id,f2);
  psize2 += 1;

  if (id & ID_COMPPTR) {
    mm = 0;
    for (i=0; i<SIGMA; i++) {
      if (csa->C[i] > 0) mm++;
    }
    SPARSEARRAY_construct_init(&sx, (mm+1)*(n+1), n/L+1);
    SPARSEARRAY_construct_init(&sb, n, n/L+1);
  }



  b = b2 = 0;
  mm = 0;  xx = 0;
  for (j=0; j<=n/L; j++) {
//    display_progressbar("writing psi ",j,n/L);
    if (j % 100000 == 0) {
      printf("%ld %1.3f bpc\r",j,(double)psize2*8/(j+1)/L);  fflush(stdout);
    }
    y = getint_diskbuf(psi,j*L);

    if (id & ID_COMPPTR) {
      if (y <= xx) {
        mm++;
      }
      SPARSEARRAY_construct_set(&sx, j, mm*(n+1) + y);
      SPARSEARRAY_construct_set(&sb, j, b);
      xx = y;
    } else {
//      printf("%ld   x=%ld   sp=%ld\n",j,y,b);
      writeint(k,y,f2);
      writeint(k,b,f2);
      psize2 += 2*k;
    }

    x = y;
    runlen = 0;
    b2 = 0;
    for (i=j*L+1; i<(j+1)*L && i <= n; i++) { /* psi[j*L] are not encoded */
      y = getint_diskbuf(psi,i);
      d = y - x;
      if (d <= 0) {
        d += n+1;
      }
      if (i == j*L+1) {
        if (d == 1) {setbit(Btmp,b2+1,1);  runlen = 1;}
        else {setbit(Btmp,b2+1,0);  runlen = 0;}
        b2++;
      }
      if (d > 1) {
        if (runlen>0) {
//          printf("d=%ld encode runlen=%ld\n",d,runlen);
          w = ENCODENUM(Btmp,b2,runlen);
          b2 += w;
          runlen = 0;
        }
//        printf("encode d-1=%ld\n",d-1);
        w = ENCODENUM(Btmp,b2,d-1);
        b2 += w;
        runlen = 1;
      } else {
        runlen++;
      }
      x = y;
    }
    if (runlen>0) {
//      printf("encode runlen=%ld\n",runlen);
      w = ENCODENUM(Btmp,b2,runlen);
      b2 += w;
      runlen = 0;
    }
    fwrite(Btmp,(b2+15) / 16,sizeof(short),f1);
    psize1 += (b2+15)/16*sizeof(short);
    b += (b2+15) / 16;
    b2 = 0;
  }
  if (b2 > 0) {
    fwrite(Btmp,(b2+15) / 16,sizeof(short),f1);
    psize1 += (b2+15)/16*sizeof(short);
  }
  fwrite(Btmp,1,sizeof(short),f1); // getbitDで1ワード余計に読むため
  psize1 += 1*sizeof(short);

  if (id & ID_COMPPTR) {
    SPARSEARRAY_construct_end(&sx, SDARRAY_SELECT1);
    SPARSEARRAY_construct_end(&sb, SDARRAY_SELECT1);
    SPARSEARRAY_write(&sx, f2);
    SPARSEARRAY_write(&sb, f2);
  }

  psize = psize1 + psize2;
  printf("size %ld (%1.3f bpc)\n",psize,(double)psize*8 / n);

  fclose(f1);
  fclose(f2);

//  psi1_iterator_remove(pi);

  close_diskbuf(psi);
  fclose(out);
  remove_tmp(0);

  psi1_read(csa, fpsd);

  free(fpsi);
  free(fpsd);
  free(fbw);
  free(flst);

  return psize;
}
Пример #4
0
i64 psi2_makeindex(CSA *csa, char *fname)
{
i64 psize;
i64 b, b2;
i64 i,j,x;
i64 y,d,w;
int k;
FILE *f1,*f2;
char *fpsi;
//psi1_iterator *pi;
i64 n,m;
i64 nn, mm;
psi2 *ps;
int id;
FILE *out;
diskbuf *psi;
char *fbw, *flst;
sparsearray4 sa;

  ps = (psi2 *)csa->psi_struc;
  id = csa->id;

  k = strlen(fname);
  fbw = (char *) mymalloc(k+5);
  flst = (char *) mymalloc(k+5);
  fpsi = (char *) mymalloc(k+5);
  sprintf(fbw,"%s.bw",fname);
  sprintf(flst,"%s.lst",fname);

  sprintf(fpsi,"%s.psa",fname);

  out = create_tmp(0);
  bw_to_psi(out,csa,fbw,flst,&k);

  m = 0;
  for (i=0; i<SIGMA; i++) {
    if (csa->C[i] > 0) m++;
  }

  psi = open_diskbuf(out,k);
  ps->last = getint_diskbuf(psi,0);
  printf("last = %ld\n",ps->last);

  n = csa->n;

  mm = n+1;  nn = (m+1) * (n+1);
  sparsearray4_construct_init(&sa, nn, mm);

  y = 0;  d = 0;
  for (j=0; j<=n; j++) {
    display_progressbar("compressing psi ",j,n);
    x = getint_diskbuf(psi,j);
    if (x <= y) d++;
    sparsearray4_construct_set(&sa, j, d*(n+1)+x);
    y = x;
  }
  sparsearray4_construct_end(&sa,0, SDARRAY_SELECT1);

  f2 = fopen(fpsi,"wb");
  psize = 0;

  ps->k = k = (blog(n+1)+1+8-1)/8;

  writeint(1,ID_PSI,f2);
  writeint(1,k,f2); /* #bytes of integer */
  writeint(k,n,f2);
  psize += 1+1+k;

  writeint(1,id,f2);
  psize += 1;

  psize += sparsearray4_write(&sa, f2);

  printf("size %ld (%1.3f bpc)\n",psize,(double)psize*8 / n);

  fclose(f2);

//  psi1_iterator_remove(pi);

  close_diskbuf(psi);
  fclose(out);
  remove_tmp(0);

  psi2_read(csa, fpsi);

  free(fpsi);
  free(fbw);
  free(flst);

  return psize;
}