int main(int argc,char * argv[]) { const char * s1="xml.50MB"; const char * sa="xml.csa"; const char * s2="xml.txt"; if(argc==2) { CSA csa(atoi(argv[1])); csa.Compress(s1,sa); cout<<"compress is ok"<<endl; csa.Decompress(sa,s2); cout<<"decompress is ok"<<endl; } else { CSA csa; csa.Compress(s1,sa); cout<<"compress is ok"<<endl; // time_t t1=clock(); csa.Decompress(sa,s2); // time_t t2=clock(); // cout<<(t2-t1)/1000000.0<<endl; cout<<"decompress is ok"<<endl; } return 0; }
cst_node cst_nextchild(cst_node node, cst_node child) { int c; i64 l, r; i64 i, depth; CSA *csa; cst_node newnode; csa = node.csa; l = child.r+1; depth = node.depth; if (l > node.r) { newnode.depth = -1; newnode.l = 1; newnode.r = 0; return newnode; } for (i=0; i<depth; i++) { l = csa->psi(csa, l); } c = csa->head(csa, l); if (c == -1) { newnode.csa = csa; newnode.depth = depth+1; newnode.l = newnode.r = node.l; return newnode; } else { return cst_child(node, csa->AtoC[c]); } }
static VALUE // bug? rb_csa_search_l(VALUE self, VALUE oc, VALUE range) { CSA *sa = csa_ptr(self); i64 ll,rr; int c; i64 ret; c = FIX2INT(oc); #if USE_RANGE ll = FIX2LONG(range_first(range)); rr = FIX2LONG(range_last(range)); if (range_exclude_end_p(range) == Qtrue) rr--; #else if (RALEN(range) != 2) { return Qnil; } ll = FIX2LONG(RAPTR(range)[0]); rr = FIX2LONG(RAPTR(range)[1]); #endif ret = sa->searchsub(c, sa, &ll, &rr); if (ret == -1) return Qnil; #if USE_RANGE return rb_range_new(LONG2FIX(ll), LONG2FIX(rr), Qnil); #else return rb_ary_new3(2, LONG2FIX(ll), LONG2FIX(rr)); #endif }
cst_node cst_canonize(cst_node node) { int c1,c2; cst_node newnode; i64 l, r; i64 i, depth; CSA *csa; newnode = node; csa = node.csa; l = node.l; r = node.r; depth = node.depth; for (i=0; i<depth; i++) { l = csa->psi(csa, l); r = csa->psi(csa, r); } while (1) { c1 = csa->head(csa, l); c2 = csa->head(csa, r); if (c1 != c2) break; l = csa->psi(csa, l); r = csa->psi(csa, r); depth++; } newnode.depth = depth; return newnode; }
static VALUE //rb_csa_text(VALUE self, VALUE oi, VALUE oj) rb_csa_text(VALUE self, VALUE range) { CSA *sa = csa_ptr(self); i64 i,j,n; uchar *buf; #if USE_RANGE i = FIX2LONG(range_first(range)); j = FIX2LONG(range_last(range)); if (range_exclude_end_p(range) == Qtrue) j--; #else // i = FIX2LONG(oi); // j = FIX2LONG(oj); if (RALEN(range) != 2) { return Qnil; } i = FIX2LONG(RAPTR(range)[0]); j = FIX2LONG(RAPTR(range)[1]); #endif n = sa->n; if (i < 0 || i > n || j < 0 || j > n) { // error return Qnil; } buf = (uchar *)alloca(j-i+1+1); sa->text(buf, sa, i, j); buf[j-i+1] = 0; return rb_str_new(buf, j-i+1); }
int locate(void *index, uchar *pattern, ulong length, ulong **occ, ulong *numocc) { CSA *csa; i64 l,r; ulong *buf,*buf2; ulong i,oc; i64 mlen; csa = (CSA *)index; mlen = csa->search(pattern, length, csa, &l, &r); if (mlen < length) { *numocc = 0; return 0; } oc = (ulong)(r - l + 1); buf = malloc((*numocc) * sizeof(ulong)); if (buf == NULL) { printf("locate: not enough mem.\n"); exit(1); } for (i=0; i<oc; i++) { buf[i] = csa->lookup(csa,l + i); } *numocc = oc; *occ = buf; return 0; }
cst_node cst_firstchild(cst_node node) { int c; i64 l; i64 i, depth; CSA *csa; cst_node newnode; csa = node.csa; l = node.l; depth = node.depth; for (i=0; i<depth; i++) { l = csa->psi(csa, l); } c = csa->head(csa, l); if (c == -1) { newnode.csa = csa; newnode.depth = depth+1; newnode.depth2 = depth+1; newnode.l = newnode.r = node.l; return newnode; } else { return cst_child(node, csa->AtoC[c]); } }
cst_node cst_suflink(cst_node node) { CSA *csa; i64 l, r; cst_node newnode; csa = newnode.csa = node.csa; l = node.l; r = node.r; newnode.l = csa->psi(csa, l); newnode.r = csa->psi(csa, r); newnode.depth = node.depth; return cst_parent(newnode); }
int count(void *index, uchar *pattern, ulong length, ulong *numocc) { CSA *csa; i64 l,r; i64 mlen; csa = (CSA *)index; mlen = csa->search(pattern, length, csa, &l, &r); if (mlen < length) { *numocc = 0; return 0; } *numocc = (ulong)(r - l + 1); return 0; }
static VALUE rb_csa_inverse(VALUE self, VALUE oi) { CSA *sa = csa_ptr(self); i64 i,j,n; i = FIX2LONG(oi); n = sa->n; if (i < 0 || i > n) { // error return Qnil; } j = sa->inverse(sa, i); return LONG2FIX(j); }
static VALUE rb_csa_child_l(VALUE self, VALUE range) { CSA *sa = csa_ptr(self); i64 l,r,ll,rr; int c,i; VALUE charset; i64 ret; #if USE_RANGE ll = FIX2LONG(range_first(range)); rr = FIX2LONG(range_last(range)); if (range_exclude_end_p(range) == Qtrue) rr--; #else if (RALEN(range) != 2) { return Qnil; } ll = FIX2LONG(RAPTR(range)[0]); rr = FIX2LONG(RAPTR(range)[1]); #endif l = r = -1; if (!rb_block_given_p()) charset = rb_ary_new(); for (i=0; i<sa->m; i++) { c = sa->AtoC[i]; l = ll; r = rr; ret = sa->searchsub(c, sa, &l, &r); if (ret == 0) { if (rb_block_given_p()) { rb_yield(rb_ary_new3(2,INT2FIX(c), #if USE_RANGE rb_range_new(LONG2FIX(l), LONG2FIX(r), Qnil))); #else rb_ary_new3(2,LONG2FIX(l),LONG2FIX(r)))); #endif } else { rb_ary_push(charset, rb_ary_new3(2,INT2FIX(c), #if USE_RANGE rb_range_new(LONG2FIX(l), LONG2FIX(r), Qnil))); #else rb_ary_new3(2,LONG2FIX(l),LONG2FIX(r)))); #endif } } }
static VALUE rb_csa_T(VALUE self, VALUE oi) { CSA *sa = csa_ptr(self); i64 i, n; int c; i = FIX2LONG(oi); n = sa->n; if (i < 0 || i > n) { // error return Qnil; } c = sa->T(sa, i); return INT2FIX(c); }
static VALUE rb_csa_lf(VALUE self, VALUE oi) { CSA *sa = csa_ptr(self); i64 i, n; i64 p; i = FIX2LONG(oi); n = sa->n; if (i < 0 || i > n) { // error return Qnil; } p = sa->LF(sa, i); return LONG2FIX(p); }
static VALUE rb_csa_search(VALUE self, VALUE okey) { CSA *sa = csa_ptr(self); i64 n, l, keylen; i64 i[2]; uchar *key; key = StringValueCStr(okey); keylen = RSLEN(okey); if (sa->search(key, keylen, sa, &i[0], &i[1]) < keylen) return Qnil; #if USE_RANGE return rb_range_new(LONG2FIX(i[0]), LONG2FIX(i[1]), Qnil); #else return rb_ary_new3(2, LONG2FIX(i[0]), LONG2FIX(i[1])); #endif }
static VALUE rb_csa_substring(VALUE self, VALUE orank, VALUE olen) { CSA *sa = csa_ptr(self); i64 rank,len,n; uchar *buf; rank = FIX2LONG(orank); len = FIX2LONG(olen); n = sa->n; if (rank < 0 || rank > n || len < 0) { // error return Qnil; } buf = (uchar *)alloca(len+1); len = sa->substring(buf, sa, rank, len); buf[len] = 0; return rb_str_new(buf, len); }
int extract(void *index, ulong from, ulong to, uchar **snippet, ulong *snippet_length) { CSA *csa; uchar *text; i64 i,len; csa = (CSA *)index; from++; to++; if (to > csa->n) to = csa->n; len = to - from + 1; text = malloc(len); csa->text(text,csa,from,to); *snippet = text; *snippet_length = (ulong)len; return 0; }
int cst_isunary(cst_node node) { int c1,c2; i64 l, r; i64 i, depth; CSA *csa; csa = node.csa; l = node.l; r = node.r; depth = node.depth; for (i=0; i<depth; i++) { l = csa->psi(csa, l); r = csa->psi(csa, r); } c1 = csa->head(csa, l); c2 = csa->head(csa, r); return (c1 == c2); }
cst_node cst_parent(cst_node node) { CSA *csa; uchar *label; cst_node parent; i64 l, r; i64 len; csa = node.csa; parent.csa = csa; parent.depth = node.depth-1; label = cst_pathlabel(node); l = node.l; r = node.r; if (csa->search(label, parent.depth, csa, &l, &r) != parent.depth) { printf("cst_parent: ???\n"); } parent.l = l; parent.r = r; free(label); return parent; }
cst_node cst_weiner_link(cst_node node, int c) { CSA *csa; cst_node newnode; i64 ll, rr; i64 len; csa = newnode.csa = node.csa; ll = node.l; rr = node.r; if (csa->searchsub(c, node.csa, &ll, &rr) != 0) { newnode.depth = -1; newnode.l = 1; newnode.r = 0; } else { newnode.depth = node.depth+1; newnode.l = ll; newnode.r = rr; } return newnode; }
int main(int argc, char *argv[]) { i64 i,n; CSA csa; mytimestruct before,after; double t; if (argc<2) { fprintf(stderr, "syntax: suftest file\n"); return 1; } csa_read(&csa,argc,argv); n = csa.n; mygettime(&before); { int m; FILE *out; out = fopen("output.dec","w"); i = 0; while (i < n) { if ((i/PAGE) % PAGE == 0) { fprintf(stderr,"%ld \r",i/PAGE); fflush(stderr); } m = PAGE; if (i+m >= n) m = n-i; csa.text(buf,&csa,i,i+m-1); fwrite(buf,1,m,out); i += m; } fwrite(buf,1,0,out); fclose(out); } mygettime(&after); t = mylaptime(&before,&after); fprintf(stderr,"time %f sec\n",t); return 0; }
void csa_new_from_bwt(int argc, char *argv[]) { i64 i,j,v,m; FILE *f2; i64 psize,isize; i64 n; int k; char *fname,*fidx; char *p; int psi_id, idx_id; CSA csa; int sigma; csa.sigma = 256; /* default alphabet size */ csa.k2 = 1; // for (i=0; i<SIGMA+2; i++) csa.C[i] = 0; // for (i=0; i<SIGMA; i++) csa.C[i] = 0; fname = NULL; fidx = NULL; psi_id = idx_id = -1; for (i=1; i<argc; i++) { p = argv[i]; if (p[0] == '-') { p++; switch (toupper(p[0])) { case 'I': // -I[n]:[D]:[D2] p++; idx_id = 0; csa_options(&csa, p); break; case 'P': // -P[n]:[L] p++; psi_id = 0; psi_options(&csa, p); break; case 'C': // -C[s] p++; sigma_options(&csa, p); break; default: printf("??? no such option %s\n",argv[i]); exit(1); } } else { fname = argv[i]; k = strlen(fname); fidx = mymalloc(k+5); sprintf(fidx,"%s.idx",fname); } } if (fname == NULL) { printf("no input file.\n"); exit(0); } printf("sigma = %d k2 = %d\n", csa.sigma, csa.k2); sigma = csa.sigma; csa.C = mymalloc(sizeof(*csa.C)*sigma); // csa.CtoA = mymalloc(sizeof(*csa.CtoA)*sigma); // csa.AtoC = mymalloc(sizeof(*csa.AtoC)*sigma); // csa.K = mymalloc(sizeof(*csa.K)*(sigma+2)); // for (i=0; i<sigma; i++) csa.C[i] = 0; psi_id = csa.id; if (psi_id >= 0) { printf("create psi: id=%d\n",psi_id); } if (idx_id >= 0) { printf("create idx: id=%d D=%d D2=%d\n",idx_id,csa.D,csa.D2); } psize = 0; if (psi_id >= 0) { switch (psi_id & 0x3f) { case ID_DIFF_GAMMA: case ID_DIFF_GAMMA_RL: case ID_DIFF_GAMMA_SPARSE: case ID_DIFF_GAMMA_RL_SPARSE: psize = psi1_makeindex(&csa, fname); printf("n %ld\n",csa.n); printf("Psi %ld bytes (%1.3f bpc)\n", psize,(double)psize*8/csa.n); break; case ID_DIFF_GAMMA_RR: psize = psi12_makeindex(&csa, fname); printf("n %ld\n",csa.n); printf("Psi %ld bytes (%1.3f bpc)\n", psize,(double)psize*8/csa.n); break; case ID_BWT_DNA: psize = lf_dna_makeindex(&csa, fname); printf("n %ld\n",csa.n); printf("BW %ld bytes (%1.3f bpc)\n", psize,(double)psize*8/csa.n); break; case ID_BWT_DNA2: psize = lf_dna2_makeindex(&csa, fname); printf("n %ld\n",csa.n); printf("BW %ld bytes (%1.3f bpc)\n", psize,(double)psize*8/csa.n); break; case ID_BWT_BIT: psize = lf_bit_makeindex(&csa, fname); printf("n %ld\n",csa.n); printf("BW %ld bytes (%1.3f bpc)\n", psize,(double)psize*8/csa.n); break; case ID_BWT_WT: case ID_BWT_WT_HUF: case ID_BWT_WT_DENSE: case ID_BWT_WT_SPARSE4: case ID_BWT_WT_RR: psize = lf_wt_makeindex(&csa, fname); printf("n %ld\n",csa.n); printf("BW %ld bytes (%1.3f bpc)\n", psize,(double)psize*8/csa.n); break; #if 0 case ID_BWT_HUF: psize = lf_bwt_makeindex(&csa, fname); printf("n %ld\n",csa.n); printf("BW %ld bytes (%1.3f bpc)\n", psize,(double)psize*8/csa.n); break; #endif case ID_SPARSE4: psize = psi2_makeindex(&csa, fname); printf("n %ld\n",csa.n); printf("Psi %ld bytes (%1.3f bpc)\n", psize,(double)psize*8/csa.n); break; default: printf("psi_id = %d\n",psi_id); exit(1); } } csa.k = (blog(csa.n+1)+1+8-1)/8; for (i=0; i<sigma; i++) csa.CtoA[i] = -1; csa.K[-1+1] = 1; for (m=0,v=1,i=0; i<sigma; i++) { if (csa.C[i]>0) { csa.AtoC[m] = i; csa.CtoA[i] = m; csa.K[m+1] = v; // printf("i=%ld v = %ld C[i] = %ld\n",i,v,csa.C[i]); v += csa.C[i]; m++; } } csa.K[m+1] = v; csa.m = m; if (csa.D >= csa.n) { printf("D=%d >= n=%ld\n",csa.D,csa.n); exit(0); } if (csa.D2 >= csa.n) { printf("D2=%d >= n=%ld\n",csa.D2,csa.n); exit(0); } if (idx_id >= 0) { n = csa.n; k = csa.k; //// compute SA and ISA if (csa.D > 0) csa.SA = mymalloc(((n-1)/csa.D+1+1)*k); if (csa.D2 > 0) csa.ISA = mymalloc(((n-1)/csa.D2+1+1)*k); if (csa.D == 0 && csa.D2 == 0) goto brk; switch (psi_id & 0x3f) { case ID_DIFF_GAMMA: case ID_DIFF_GAMMA_RL: case ID_DIFF_GAMMA_SPARSE: case ID_DIFF_GAMMA_RL_SPARSE: case ID_SPARSE4: case ID_DIFF_GAMMA_RR: j = 0; for (i=0; i<=n; i++) { display_progressbar("making sa ",i,n); j = csa.psi(&csa,j); // sa[j] = i; if (csa.D > 0 && j % csa.D == 0) { putuint(csa.SA,j / csa.D,i,k); } if (csa.D2 > 0 && i % csa.D2 == 0) { putuint(csa.ISA,i / csa.D2,j,k); } } // putuint(csa.SA,0,n,k); break; case ID_BWT_DNA: case ID_BWT_DNA2: case ID_BWT_BIT: case ID_BWT_WT: case ID_BWT_WT_HUF: case ID_BWT_WT_DENSE: case ID_BWT_WT_SPARSE4: case ID_BWT_WT_RR: case ID_BWT_HUF: j = 0; for (i=n-1; i>=0; i--) { display_progressbar("making sa ",i,n); v = csa.LF(&csa,j); // printf("LF[%ld] = %ld\n",j,v); j = v; if (csa.D > 0 && j % csa.D == 0) putuint(csa.SA, j/csa.D , i, k); if (csa.D2 > 0 && i % csa.D2 == 0) putuint(csa.ISA, i/csa.D2, j, k); } // putuint(csa.SA,0,n,k); if (csa.D > 0) putuint(csa.SA,0,n,k); // 2011-12-20 break; default: break; } brk: //// write idx f2 = fopen(fidx,"wb"); /* directory */ if (f2 == NULL) { perror("csa2_new1: "); exit(1); } isize = 0; writeint(4,VERSION,f2); /* version */ isize += 4; writeint(1,ID_HEADER,f2); // header ID isize += 1; isize = write_header(&csa, f2, isize); if (csa.D > 0) { writeint(1,ID_SA,f2); isize += 1; isize = write_sa(&csa, f2, isize); } if (csa.D2 > 0) { writeint(1,ID_ISA,f2); isize += 1; isize = write_isa(&csa, f2, isize); } fclose(f2); if (csa.D > 0) free(csa.SA); if (csa.D2 > 0) free(csa.ISA); printf("Total %ld bytes (%1.3f bpc)\n",(psize+isize), (double)(psize+isize)*8/csa.n); } free(fidx); }
void csa_new_from_bwt(CSA csa, char *fname, char *fidx, int psi_id, int idx_id, bool coded) { int k; i64 i,j,v,m; FILE *f2; i64 psize,isize; i64 n; psi_id = csa.id; if (psi_id >= 0) { printf("create psi: id=%d\n",psi_id); } if (idx_id >= 0) { printf("create idx: id=%d D=%d D2=%d\n",idx_id,csa.D,csa.D2); } psize = 0; if (psi_id >= 0) { switch (psi_id & 0x3f) { case ID_DIFF_GAMMA: case ID_DIFF_GAMMA_RL: case ID_DIFF_GAMMA_SPARSE: case ID_DIFF_GAMMA_RL_SPARSE: psize = psi1_makeindex(&csa, fname); printf("n %ld\n",csa.n); printf("Psi %ld bytes (%1.3f bpc)\n", psize,(double)psize*8/csa.n); break; case ID_DIFF_GAMMA_RR: psize = psi12_makeindex(&csa, fname); printf("n %ld\n",csa.n); printf("Psi %ld bytes (%1.3f bpc)\n", psize,(double)psize*8/csa.n); break; case ID_BWT_DNA: psize = lf_dna_makeindex(&csa, fname, coded); printf("n %ld\n",csa.n); printf("BW %ld bytes (%1.3f bpc)\n", psize,(double)psize*8/csa.n); break; case ID_BWT_BIT: psize = lf_bit_makeindex(&csa, fname); printf("n %ld\n",csa.n); printf("BW %ld bytes (%1.3f bpc)\n", psize,(double)psize*8/csa.n); break; case ID_BWT_WT: case ID_BWT_WT_HUF: case ID_BWT_WT_DENSE: case ID_BWT_WT_SPARSE4: case ID_BWT_WT_RR: psize = lf_wt_makeindex(&csa, fname); printf("n %ld\n",csa.n); printf("BW %ld bytes (%1.3f bpc)\n", psize,(double)psize*8/csa.n); break; #if 0 case ID_BWT_HUF: psize = lf_bwt_makeindex(&csa, fname); printf("n %ld\n",csa.n); printf("BW %ld bytes (%1.3f bpc)\n", psize,(double)psize*8/csa.n); break; #endif case ID_SPARSE4: psize = psi2_makeindex(&csa, fname); printf("n %ld\n",csa.n); printf("Psi %ld bytes (%1.3f bpc)\n", psize,(double)psize*8/csa.n); break; default: printf("psi_id = %d\n",psi_id); exit(1); } } csa.k = (blog(csa.n+1)+1+8-1)/8; for (i=0; i<SIGMA; i++) csa.CtoA[i] = -1; // csa.K[-1+1] = 0; csa.K[-1+1] = 1; for (m=0,v=1,i=0; i<SIGMA; i++) { if (csa.C[i]>0) { csa.AtoC[m] = i; csa.CtoA[i] = m; csa.K[m+1] = v; // printf("i=%ld v = %ld C[i] = %ld\n",i,v,csa.C[i]); v += csa.C[i]; m++; } } csa.K[m+1] = v; csa.m = m; if (csa.D >= csa.n) { printf("D=%d >= n=%ld\n",csa.D,csa.n); exit(0); } if (csa.D2 >= csa.n) { printf("D2=%d >= n=%ld\n",csa.D2,csa.n); exit(0); } if (idx_id >= 0) { n = csa.n; k = csa.k; //// compute SA and ISA if (csa.D > 0) csa.SA = (uchar *) mymalloc(((n-1)/csa.D+1+1)*k); if (csa.D2 > 0) csa.ISA = (uchar *) mymalloc(((n-1)/csa.D2+1+1)*k); if (csa.D == 0 && csa.D2 == 0) goto brk; switch (psi_id & 0x3f) { case ID_DIFF_GAMMA: case ID_DIFF_GAMMA_RL: case ID_DIFF_GAMMA_SPARSE: case ID_DIFF_GAMMA_RL_SPARSE: case ID_SPARSE4: case ID_DIFF_GAMMA_RR: j = 0; for (i=0; i<=n; i++) { display_progressbar("making sa ",i,n); j = csa.psi(&csa,j); // sa[j] = i; if (csa.D > 0 && j % csa.D == 0) { putuint(csa.SA,j / csa.D,i,k); } if (csa.D2 > 0 && i % csa.D2 == 0) { putuint(csa.ISA,i / csa.D2,j,k); } } // putuint(csa.SA,0,n,k); break; case ID_BWT_DNA: case ID_BWT_BIT: case ID_BWT_WT: case ID_BWT_WT_HUF: case ID_BWT_WT_DENSE: case ID_BWT_WT_SPARSE4: case ID_BWT_WT_RR: case ID_BWT_HUF: j = 0; for (i=n-1; i>=0; i--) { display_progressbar("making sa ",i,n); v = csa.LF(&csa,j); // printf("LF[%ld] = %ld\n",j,v); j = v; if (csa.D > 0 && j % csa.D == 0) putuint(csa.SA, j/csa.D , i, k); if (csa.D2 > 0 && i % csa.D2 == 0) putuint(csa.ISA, i/csa.D2, j, k); } putuint(csa.SA,0,n,k); break; default: break; } brk: //// write idx f2 = fopen(fidx,"wb"); /* directory */ if (f2 == NULL) { perror("csa2_new1: "); exit(1); } isize = 0; writeint(4,VERSION,f2); /* version */ isize += 4; writeint(1,ID_HEADER,f2); // header ID isize += 1; isize = write_header(&csa, f2, isize); if (csa.D > 0) { writeint(1,ID_SA,f2); isize += 1; isize = write_sa(&csa, f2, isize); } if (csa.D2 > 0) { writeint(1,ID_ISA,f2); isize += 1; isize = write_isa(&csa, f2, isize); } fclose(f2); if (csa.D > 0) free(csa.SA); if (csa.D2 > 0) free(csa.ISA); printf("Total %ld bytes (%1.3f bpc)\n",(psize+isize), (double)(psize+isize)*8/csa.n); } free(fidx); }
int main(int argc, char *argv[]) { i64 n; CSA SA; if (argc<3) { fprintf(stderr, "syntax: %s {indexfiles}\n", argv[0]); return 1; } csa_read(&SA,argc-1, argv+1); n = SA.n; #if 0 { int i; rank_t x; unicode_t code; uchar buf[6], *p; x = SA.inverse(&SA, 0); for (i = 0; i < 1000; i++) { if (csa_utf8_T_psi(&SA, &x, &code) == -1) { printf("???\n"); } p = &buf[0]; unicode_to_string(&p, code); buf[unicode_len(code)] = 0; printf("%d code = %d (%s) rank = %ld\n", i, code, &buf[0], x); } for (i = 0; i < 1000; i++) { if (csa_utf8_BW_LF(&SA, &x, &code) == -1) { printf("???\n"); } p = &buf[0]; unicode_to_string(&p, code); buf[unicode_len(code)] = 0; printf("%d code = %d (%s) rank = %ld\n", i, code, &buf[0], x); } } #endif #if 1 { int i; rank_t x; unicode_t code; uchar buf[6], *p; CSAFILE *csafile; csafile = csa_fdopen(&SA, NULL); for (i = 0; i < 1000; i++) { code = csa_fgetwc(csafile); x = csafile->rank; p = &buf[0]; unicode_to_string(&p, code); buf[unicode_len(code)] = 0; printf("%d code = %d (%s) rank = %ld\n", i, code, &buf[0], x); } for (i = 0; i < 1000; i++) { code = csa_fgetwbw(csafile); x = csafile->rank; p = &buf[0]; unicode_to_string(&p, code); buf[unicode_len(code)] = 0; printf("%d code = %d (%s) rank = %ld\n", i, code, &buf[0], x); } } #endif return 0; }