Ejemplo n.º 1
0
	CSA *csa_load(ifstream & fp) {
		CSA *SA;
		SA = (CSA *) malloc(sizeof(CSA));
		csa_init(SA);
		initranktables();
		mkdecodetable();
		SA->m = loadValue<int>(fp);
		SA->two = loadValue<int>(fp);
		SA->two2 = loadValue<int>(fp);
		SA->l = loadValue<int>(fp);
		SA->n = loadValue<int>(fp);
		for(int i=0; i<(SIGMA+2); i++)
			SA->K[i] = loadValue<int>(fp);
		for(int i=0; i<(SIGMA+1); i++)
			SA->C[i] = loadValue<int>(fp);
		for(int i=0; i<(SIGMA+1); i++)
			SA->C2[i] = loadValue<int>(fp);
		#ifdef USE_MMAP
		SA->p_size = loadValue<int>(fp);
		SA->B = loadValue<unsigned short>(fp, SA->p_size);
		SA->i_size = loadValue<int>(fp);
		SA->r_size = loadValue<int>(fp);
		SA->R = loadValue<int>(fp, SA->r_size);
		SA->sa_size = loadValue<int>(fp);
		SA->SA = loadValue<int>(fp, SA->sa_size);
		SA->isa_size = loadValue<int>(fp);
		SA->ISA = loadValue<int>(fp, SA->isa_size);
		SA->mapp = NULL;
		SA->mapi = NULL;
		#endif
		return SA;
	}
int load_index(char *filename, void **index){
   char fname1[128],fname2[128];
   CSA *SA;
   SA = (CSA *) malloc(sizeof(CSA));
   initranktables();
   mkdecodetable();
   sprintf(fname1,"%s.psi",filename);
   sprintf(fname2,"%s.idx",filename);
   csa_read(SA,fname1,fname2);
   (*index) = SA;
   return 0;
}
Ejemplo n.º 3
0
i64 psi12_makeindex(CSA *csa, char *fname)
{
i64 psize,psize1,psize2;
i64 b, b2;
i64 i,j,x,xx;
i64 y,d,w;
int k;
FILE *f1,*f2;
char *fpsi, *fpsd;
//psi1_iterator *pi;
i64 runlen;
i64 maxrun;
i64 n,L;
psi1 *ps;
int id,id2;
FILE *out;
diskbuf *psi;
char *fbw, *flst;
SPARSEARRAY sx, sb;
int mm;

  ps = (psi1 *)csa->psi_struc;
  id = ps->id;
  id2 = id & 0x3f;

  k = strlen(fname);
  fbw = mymalloc(k+5);
  flst = mymalloc(k+5);
  fpsi = mymalloc(k+5);
  fpsd = mymalloc(k+5);
  sprintf(fbw,"%s.bw",fname);
  sprintf(flst,"%s.lst",fname);

  switch (id2) {
  case ID_DIFF_GAMMA_RR:
    sprintf(fpsi,"%s.pxi",fname);
    sprintf(fpsd,"%s.pxd",fname);
    break;
  }

  out = create_tmp(0);
  bw_to_psi(out,csa,fbw,flst,&k);
  psi = open_diskbuf(out,k);
  ps->last = getint_diskbuf(psi,0);
  printf("last = %ld\n",ps->last);

  n = csa->n;
  L = ps->L;
  if (L >= n) {
    printf("L=%ld >= n=%ld\n",L,n);
    exit(0);
  }


  maxrun = L;


  mkdecodetable();

  f1 = fopen(fpsi,"wb");
  psize1 = 0;

  f2 = fopen(fpsd,"wb");
  psize2 = 0;

  ps->k = k = (blog(n+1)+1+8-1)/8;

//  pi = psi1_iterator_new(ps,0);


  writeint(1,ID_PSI,f2);
  writeint(1,k,f2); /* #bytes of integer */
  writeint(k,n,f2);
  writeint(k,L,f2);
  psize2 += 1+1+2*k;

  writeint(1,id,f2);
  psize2 += 1;

  if (id & ID_COMPPTR) {
    mm = 0;
    for (i=0; i<SIGMA; i++) {
      if (csa->C[i] > 0) mm++;
    }
    SPARSEARRAY_construct_init(&sx, (mm+1)*(n+1), n/L+1);
    SPARSEARRAY_construct_init(&sb, n, n/L+1);
  }



  b = b2 = 0;
  mm = 0;  xx = 0;
  for (j=0; j<=n/L; j++) {
//    display_progressbar("writing psi ",j,n/L);
    if (j % 100000 == 0) {
      printf("%ld %1.3f bpc\r",j,(double)psize2*8/(j+1)/L);  fflush(stdout);
    }
    y = getint_diskbuf(psi,j*L);

    if (id & ID_COMPPTR) {
      if (y <= xx) {
        mm++;
      }
      SPARSEARRAY_construct_set(&sx, j, mm*(n+1) + y);
      SPARSEARRAY_construct_set(&sb, j, b);
      xx = y;
    } else {
//      printf("%ld   x=%ld   sp=%ld\n",j,y,b);
      writeint(k,y,f2);
      writeint(k,b,f2);
      psize2 += 2*k;
    }

    x = y;
    runlen = 0;
    b2 = 0;
    for (i=j*L+1; i<(j+1)*L && i <= n; i++) { /* psi[j*L] are not encoded */
      y = getint_diskbuf(psi,i);
      d = y - x;
      if (d <= 0) {
        d += n+1;
      }
      if (i == j*L+1) {
        if (d == 1) {setbit(Btmp,b2+1,1);  runlen = 1;}
        else {setbit(Btmp,b2+1,0);  runlen = 0;}
        b2++;
      }
      if (d > 1) {
        if (runlen>0) {
//          printf("d=%ld encode runlen=%ld\n",d,runlen);
          w = ENCODENUM(Btmp,b2,runlen);
          b2 += w;
          runlen = 0;
        }
//        printf("encode d-1=%ld\n",d-1);
        w = ENCODENUM(Btmp,b2,d-1);
        b2 += w;
        runlen = 1;
      } else {
        runlen++;
      }
      x = y;
    }
    if (runlen>0) {
//      printf("encode runlen=%ld\n",runlen);
      w = ENCODENUM(Btmp,b2,runlen);
      b2 += w;
      runlen = 0;
    }
    fwrite(Btmp,(b2+15) / 16,sizeof(short),f1);
    psize1 += (b2+15)/16*sizeof(short);
    b += (b2+15) / 16;
    b2 = 0;
  }
  if (b2 > 0) {
    fwrite(Btmp,(b2+15) / 16,sizeof(short),f1);
    psize1 += (b2+15)/16*sizeof(short);
  }
  fwrite(Btmp,1,sizeof(short),f1); // getbitDで1ワード余計に読むため
  psize1 += 1*sizeof(short);

  if (id & ID_COMPPTR) {
    SPARSEARRAY_construct_end(&sx, SDARRAY_SELECT1);
    SPARSEARRAY_construct_end(&sb, SDARRAY_SELECT1);
    SPARSEARRAY_write(&sx, f2);
    SPARSEARRAY_write(&sb, f2);
  }

  psize = psize1 + psize2;
  printf("size %ld (%1.3f bpc)\n",psize,(double)psize*8 / n);

  fclose(f1);
  fclose(f2);

//  psi1_iterator_remove(pi);

  close_diskbuf(psi);
  fclose(out);
  remove_tmp(0);

  psi1_read(csa, fpsd);

  free(fpsi);
  free(fpsd);
  free(fbw);
  free(flst);

  return psize;
}
Ejemplo n.º 4
0
i64 psi1_read(CSA *csa, char *fname)
{
  FILE *f1;
  i64 psize1,psize2;
  i64 n;
  int k,l,id,id2;
  char *fpsi, *fpsd, *fname2;
  psi1 *ps;
  uchar *p,*q;
  
  csa->psi_struc = ps = mymalloc(sizeof(psi1));

  k = strlen(fname);
  fname2 = mymalloc(k-4+1);
  strncpy(fname2,fname,k-4);
  fname2[k-4] = 0;
  k -= 5;

  initranktables();
  mkdecodetable();

  fpsi = mymalloc(k+5+1);
//  fpsd = mymalloc(k+5);

//  sprintf(fpsd,"%s.psd",fname2);
  fpsd = fname;
//  printf("psi_read: read %s\n",fpsd);

  ps->mappsd = mymmap(fpsd);
  if (ps->mappsd->addr==NULL) {
    perror("psi1_read: mmap2\n");
    exit(1);
  }
  p = q = (uchar *)ps->mappsd->addr;
  psize1 = ps->mappsd->len;

  id = getuint(p,0,1);  p += 1;
  if (id != ID_PSI) {
    printf("read_psi: id = %d is not supported.\n",id);
    exit(1);
  }
  ps->k = k = getuint(p,0,1);  p += 1;
  ps->n = n = getuint(p,0,k);  p += k;
  ps->L = l = getuint(p,0,k);  p += k;

  id = getuint(p,0,1);  p += 1;
//  printf("read_psi: psi_id = %d L = %d\n",id,l);
  csa->id = ps->id = id;
  id2 = id & 0x3f;
  switch (id2) {
    case ID_DIFF_GAMMA:
      printf("#psi format = GAMMA L=%d C=%d\n",l,(id>>7));
      sprintf(fpsi,"%s.psi",fname2);
      break;
    case ID_DIFF_GAMMA_RL:
      printf("#psi format = GAMMA_RL L=%d C=%d\n",l,(id>>7));
      sprintf(fpsi,"%s.pri",fname2);
      break;
    case ID_DIFF_GAMMA_SPARSE:
      printf("#psi format = GAMMA_SPARSE L=%d C=%d\n",l,(id>>7));
      sprintf(fpsi,"%s.psi",fname2);
      break;
    case ID_DIFF_GAMMA_RL_SPARSE:
      printf("#psi format = GAMMA_RL_SPARSE L=%d C=%d\n",l,(id>>7));
      sprintf(fpsi,"%s.pri",fname2);
      break;
    case ID_DIFF_GAMMA_RR:
      printf("#psi format = GAMMA_RR L=%d C=%d\n",l,(id>>7));
      sprintf(fpsi,"%s.pxi",fname2);
      break;
    default:
      printf("read_csa: ID %d is not supported.\n",id);
      break;
  }

  if (id & ID_COMPPTR) {
    printf("COMPPTR\n");
    ps->sx = mymalloc(sizeof(SPARSEARRAY));
    ps->sb = mymalloc(sizeof(SPARSEARRAY));
    SPARSEARRAY_read(ps->sx, &p);
    SPARSEARRAY_read(ps->sb, &p);
  } else {
    ps->R = p;
  }
//  printf("psize = %ld\n",psize);

////   read psi
//  printf("psi_read: map %s\n",fpsi);
  ps->mappsi = mymmap(fpsi);
  if (ps->mappsi->addr==NULL) {
    perror("psi1_read: mmap1\n");
    exit(1);
  }
  ps->B = (unsigned short *)ps->mappsi->addr;
  psize2 = ps->mappsi->len;
//  printf("psize2 = %ld\n",psize2);

//  printf("psi1_read: psize1 = %ld psize2 = %ld\n",psize1,psize2);
  ps->psize = psize1 + psize2;

  free(fpsi);
//  free(fpsd);
  free(fname2);

// user-specific functions
  csa->psi = psi1_psi;
  if (id2 == ID_DIFF_GAMMA_RR) {
    csa->psi = psi12_psi;
    csa->psi_pred = csa_psi_pred_naive;
    csa->psi_succ = csa_psi_succ_naive;
  } else {
    if ((id & ID_COMPPTR) || 0) {
      csa->psi_pred = csa_psi_pred_naive;
      csa->psi_succ = csa_psi_succ_naive;
    } else {
      csa->psi_succ = psi1_succ_tmp;
      csa->psi_pred = psi1_pred_tmp;
//    csa->psi_succ = csa_psi_succ_naive;
//    csa->psi_pred = csa_psi_pred_naive;
    }
  }

// default functions
  csa->LF = csa_LF_by_psi;
  csa->lookup = csa_lookup;
  csa->inverse = csa_inverse;
  csa->text = csa_text;
  csa->substring = csa_substring;
  csa->T = csa_T;
  csa->head = csa_head_rank;
  csa->search = csa_search;
  csa->searchsub = csa_searchsub;


  return psize1 + psize2;
}