/* ML type: bdd -> bdd -> bdd */ EXTERNML value mlbdd_bdd_restrict(value r, value var) /* ML */ { return mlbdd_make(bdd_restrict(Bdd_val(r),Bdd_val(var))); }
int main(int argc,char **argv) { char ambiguous_treatment='A'; char linebuff[1024]; char *parseptr; PARSE_STATE ps; uint32_t low_code,high_code; int width,i,j,vi,vj; FILE *unicode_db; BDD x,y,child[8]; BDD *queue; int queue_low,queue_high,queue_max; puts("/*\n" " * GENERATED CODE - DO NOT EDIT!\n" " * Edit mkwcw.c, which generates this, or the input to that\n" " * program, instead. Distributions of IDSgrep will nonetheless\n" " * usually include a ready-made copy of this file because\n" " * compiling and running mkwcw.c requires a library and data\n" " * file that, although free, not everyone is expected to have.\n" " */\n\n" "#include \"_stdint.h\"\n" ); if (argc>1) ambiguous_treatment=argv[1][0]&~32; bdd_init(1000000,15625); bdd_setcacheratio(64); bdd_setvarnum(32); bdd_gbc_hook(NULL); defined_codes=bddfalse; zero_codes=bddfalse; wide_codes=bddfalse; /* yes, unfortunately UnicodeData.txt and EastAsianWidth.txt are just * different enough to need separate parsers, at least if the parsers * are as stupid as I'd like these ones to be */ if (argc>2) { unicode_db=fopen(argv[2],"rt"); while (1) { fgets(linebuff,sizeof(linebuff),unicode_db); if (feof(unicode_db)) break; ps=psLOW; linebuff[sizeof(linebuff)-1]='\0'; low_code=0; width=-1; for (parseptr=linebuff;(*parseptr) && (ps!=psSTOP);parseptr++) switch (ps) { case psLOW: if ((*parseptr>='0') && (*parseptr<='9')) low_code=(low_code<<4)+(*parseptr-'0'); else if ((*parseptr>='a') && (*parseptr<='f')) low_code=(low_code<<4)+(*parseptr-'a'+10); else if ((*parseptr>='A') && (*parseptr<='F')) low_code=(low_code<<4)+(*parseptr-'A'+10); else if (*parseptr==';') ps=psSEMI; else if ((*parseptr==' ') || (*parseptr=='\t')) { /* skip spaces and tabs */ } else ps=psSTOP; /* this catches comment lines */ break; case psSEMI: if (*parseptr==';') ps=psWIDTH; break; case psWIDTH: if (((parseptr[0]=='M') && ((parseptr[1]=='e') || (parseptr[1]=='n'))) || ((parseptr[0]=='C') && (parseptr[1]=='f'))) width=0; /* FALL THROUGH */ default: ps=psSTOP; break; } if (width==0) set_range_width(low_code,low_code,0); } fclose(unicode_db); } while (1) { fgets(linebuff,sizeof(linebuff),stdin); if (feof(stdin)) break; ps=psLOW; linebuff[sizeof(linebuff)-1]='\0'; low_code=0; high_code=0; width=-1; for (parseptr=linebuff;(*parseptr) && (ps!=psSTOP);parseptr++) switch (ps) { case psLOW: if ((*parseptr>='0') && (*parseptr<='9')) low_code=(low_code<<4)+(*parseptr-'0'); else if ((*parseptr>='a') && (*parseptr<='f')) low_code=(low_code<<4)+(*parseptr-'a'+10); else if ((*parseptr>='A') && (*parseptr<='F')) low_code=(low_code<<4)+(*parseptr-'A'+10); else if (*parseptr=='.') ps=psHIGH; else if (*parseptr==';') { high_code=low_code; ps=psWIDTH; } else if ((*parseptr==' ') || (*parseptr=='\t')) { /* skip spaces and tabs */ } else ps=psSTOP; /* this catches comment lines */ break; case psHIGH: if ((*parseptr>='0') && (*parseptr<='9')) high_code=(high_code<<4)+(*parseptr-'0'); else if ((*parseptr>='a') && (*parseptr<='f')) high_code=(high_code<<4)+(*parseptr-'a'+10); else if ((*parseptr>='A') && (*parseptr<='F')) high_code=(high_code<<4)+(*parseptr-'A'+10); else if ((*parseptr=='.') || (*parseptr==' ') || (*parseptr=='\t')) { /* skip spaces, tabs, and dots */ } else if (*parseptr==';') ps=psWIDTH; else ps=psSTOP; break; case psWIDTH: if (*parseptr=='A') *parseptr=ambiguous_treatment; switch (*parseptr) { case 'F': /* full-width treated as wide */ case 'W': /* wide */ width=2; break; case 'H': /* half-width treated as narrow */ case 'N': /* narrow or neutral */ width=1; break; case '0': /* zero-width - should only appear in user database */ width=0; break; default: /* ignore all others */ break; } /* FALL THROUGH */ default: ps=psSTOP; break; } if (width>=0) set_range_width(low_code,high_code,width); } printf("/* node counts before simplification: %d %d %d */\n", bdd_nodecount(defined_codes), bdd_nodecount(zero_codes), bdd_nodecount(wide_codes)); x=bdd_addref(bdd_simplify(wide_codes,defined_codes)); bdd_delref(wide_codes); wide_codes=x; x=bdd_addref(bdd_apply(defined_codes,wide_codes,bddop_diff)); bdd_delref(defined_codes); defined_codes=x; x=bdd_addref(bdd_simplify(zero_codes,defined_codes)); bdd_delref(zero_codes); zero_codes=x; printf("/* node counts after simplification: %d %d %d */\n\n", bdd_nodecount(defined_codes), bdd_nodecount(zero_codes), bdd_nodecount(wide_codes)); bdd_varblockall(); bdd_intaddvarblock(0,7,0); bdd_intaddvarblock(8,15,0); bdd_intaddvarblock(16,23,0); bdd_intaddvarblock(24,31,0); bdd_intaddvarblock(0,31,1); bdd_reorder_probe(&reordering_size_callback); puts("typedef struct _WIDTH_BBD_ENT {\n" " int16_t child[8];\n" " char byte,shift;\n" "} WIDTH_BDD_ENT;\n\n" "static WIDTH_BDD_ENT width_bdd[]={"); queue=(BDD *)malloc(sizeof(BDD)*1000); queue_max=1000; queue_low=2; queue_high=4; queue[0]=bddfalse; queue[1]=bddtrue; queue[2]=wide_codes; queue[3]=zero_codes; while (queue_low<queue_high) { if (queue_high+8>queue_max) { queue_max/=3; queue_max*=4; queue=(BDD *)realloc(queue,sizeof(BDD)*queue_max); } reorder_focus=queue[queue_low]; bdd_reorder(BDD_REORDER_WIN2ITE); vj=bdd_var(queue[queue_low]); vi=(vj/8)*8; vj=((vj-vi+1)/3)*3-1; if (vj<0) vj=0; x=bdd_addref(bdd_restrict(queue[queue_low],bdd_nithvar(vi+vj))); y=bdd_addref(bdd_restrict(x,bdd_nithvar(vi+vj+1))); child[0]=bdd_addref(bdd_restrict(y,bdd_nithvar(vi+vj+2))); child[1]=bdd_addref(bdd_restrict(y,bdd_ithvar(vi+vj+2))); bdd_delref(y); y=bdd_addref(bdd_restrict(x,bdd_ithvar(vi+vj+1))); child[2]=bdd_addref(bdd_restrict(y,bdd_nithvar(vi+vj+2))); child[3]=bdd_addref(bdd_restrict(y,bdd_ithvar(vi+vj+2))); bdd_delref(y); bdd_delref(x); x=bdd_addref(bdd_restrict(queue[queue_low],bdd_ithvar(vi+vj))); y=bdd_addref(bdd_restrict(x,bdd_nithvar(vi+vj+1))); child[4]=bdd_addref(bdd_restrict(y,bdd_nithvar(vi+vj+2))); child[5]=bdd_addref(bdd_restrict(y,bdd_ithvar(vi+vj+2))); bdd_delref(y); y=bdd_addref(bdd_restrict(x,bdd_ithvar(vi+vj+1))); child[6]=bdd_addref(bdd_restrict(y,bdd_nithvar(vi+vj+2))); child[7]=bdd_addref(bdd_restrict(y,bdd_ithvar(vi+vj+2))); bdd_delref(y); bdd_delref(x); fputs(" {{",stdout); for (i=0;i<8;i++) { queue[queue_high]=child[i]; for (j=0;queue[j]!=child[i];j++); if (j==queue_high) queue_high++; else bdd_delref(child[i]); printf("%d",j-2); if (i<7) putchar(','); } printf("},%d,%d},\n",vi/8,5-vj); queue_low++; } puts("};\n\n" "int idsgrep_utf8cw(char *);\n" "\n" "#define WBS width_bdd[search]\n" "\n" "int idsgrep_utf8cw(char *cp) {\n" " int search;\n" "\n" " for (search=0;search>=0;)\n" " search=WBS.child[(cp[WBS.byte]>>WBS.shift)&7];\n" " if (search==-1)\n" " return 2;\n" " for (search=1;search>=0;)\n" " search=WBS.child[(cp[WBS.byte]>>WBS.shift)&7];\n" " return ((-1)-search);\n" "}\n"); bdd_done(); exit(0); }