Example #1
0
File: muddy.c Project: Armael/HOL
/* ML type: bdd -> bdd -> bdd */
EXTERNML value mlbdd_bdd_restrict(value r, value var) /* ML */
{
  return mlbdd_make(bdd_restrict(Bdd_val(r),Bdd_val(var)));
}
Example #2
0
int main(int argc,char **argv) {
   char ambiguous_treatment='A';
   char linebuff[1024];
   char *parseptr;
   PARSE_STATE ps;
   uint32_t low_code,high_code;
   int width,i,j,vi,vj;
   FILE *unicode_db;
   BDD x,y,child[8];
   BDD *queue;
   int queue_low,queue_high,queue_max;

   puts("/*\n"
	" * GENERATED CODE - DO NOT EDIT!\n"
	" * Edit mkwcw.c, which generates this, or the input to that\n"
	" * program, instead.  Distributions of IDSgrep will nonetheless\n"
	" * usually include a ready-made copy of this file because\n"
	" * compiling and running mkwcw.c requires a library and data\n"
	" * file that, although free, not everyone is expected to have.\n"
	" */\n\n"
	"#include \"_stdint.h\"\n"
       );
   
   if (argc>1)
     ambiguous_treatment=argv[1][0]&~32;
   
   bdd_init(1000000,15625);
   bdd_setcacheratio(64);
   bdd_setvarnum(32);
   bdd_gbc_hook(NULL);
   
   defined_codes=bddfalse;
   zero_codes=bddfalse;
   wide_codes=bddfalse;

   /* yes, unfortunately UnicodeData.txt and EastAsianWidth.txt are just
    * different enough to need separate parsers, at least if the parsers
    * are as stupid as I'd like these ones to be */
   
   if (argc>2) {
      unicode_db=fopen(argv[2],"rt");

      while (1) {
	 fgets(linebuff,sizeof(linebuff),unicode_db);
	 if (feof(unicode_db))
	   break;
	 
	 ps=psLOW;
	 linebuff[sizeof(linebuff)-1]='\0';
	 low_code=0;
	 width=-1;
	 
	 for (parseptr=linebuff;(*parseptr) && (ps!=psSTOP);parseptr++)
	   switch (ps) {
	      
	    case psLOW:
	      if ((*parseptr>='0') && (*parseptr<='9'))
		low_code=(low_code<<4)+(*parseptr-'0');
	      else if ((*parseptr>='a') && (*parseptr<='f'))
		low_code=(low_code<<4)+(*parseptr-'a'+10);
	      else if ((*parseptr>='A') && (*parseptr<='F'))
		low_code=(low_code<<4)+(*parseptr-'A'+10);
	      else if (*parseptr==';')
		 ps=psSEMI;
	      else if ((*parseptr==' ') || (*parseptr=='\t'))
		{ /* skip spaces and tabs */ }
	      else
		ps=psSTOP; /* this catches comment lines */
	      break;
	      
	    case psSEMI:
	      if (*parseptr==';')
		ps=psWIDTH;
	      break;
	      
	    case psWIDTH:
	      if (((parseptr[0]=='M') && ((parseptr[1]=='e') ||
					  (parseptr[1]=='n'))) ||
		  ((parseptr[0]=='C') && (parseptr[1]=='f')))
		width=0;
	      /* FALL THROUGH */
	      
	    default:
	      ps=psSTOP;
	      break;
	   }
	 
	 if (width==0)
	   set_range_width(low_code,low_code,0);
      }

      fclose(unicode_db);
   }
   
   while (1) {
      fgets(linebuff,sizeof(linebuff),stdin);
      if (feof(stdin))
	break;
      
      ps=psLOW;
      linebuff[sizeof(linebuff)-1]='\0';
      low_code=0;
      high_code=0;
      width=-1;

      for (parseptr=linebuff;(*parseptr) && (ps!=psSTOP);parseptr++)
	switch (ps) {

	 case psLOW:
	   if ((*parseptr>='0') && (*parseptr<='9'))
	     low_code=(low_code<<4)+(*parseptr-'0');
	   else if ((*parseptr>='a') && (*parseptr<='f'))
	     low_code=(low_code<<4)+(*parseptr-'a'+10);
	   else if ((*parseptr>='A') && (*parseptr<='F'))
	     low_code=(low_code<<4)+(*parseptr-'A'+10);
	   else if (*parseptr=='.')
	     ps=psHIGH;
	   else if (*parseptr==';') {
	      high_code=low_code;
	      ps=psWIDTH;
	   } else if ((*parseptr==' ') || (*parseptr=='\t'))
		{ /* skip spaces and tabs */ }
	   else
	     ps=psSTOP; /* this catches comment lines */
	   break;

	 case psHIGH:
	   if ((*parseptr>='0') && (*parseptr<='9'))
	     high_code=(high_code<<4)+(*parseptr-'0');
	   else if ((*parseptr>='a') && (*parseptr<='f'))
	     high_code=(high_code<<4)+(*parseptr-'a'+10);
	   else if ((*parseptr>='A') && (*parseptr<='F'))
	     high_code=(high_code<<4)+(*parseptr-'A'+10);
	   else if ((*parseptr=='.') || (*parseptr==' ') || (*parseptr=='\t'))
	     { /* skip spaces, tabs, and dots */ }
	   else if (*parseptr==';')
	     ps=psWIDTH;
	   else
	     ps=psSTOP;
	   break;
	   
	 case psWIDTH:
	   if (*parseptr=='A')
	     *parseptr=ambiguous_treatment;
	   switch (*parseptr) {
	    case 'F': /* full-width treated as wide */
	    case 'W': /* wide */
	      width=2;
	      break;
	      
	    case 'H': /* half-width treated as narrow */
	    case 'N': /* narrow or neutral */
	      width=1;
	      break;
	      
	    case '0': /* zero-width - should only appear in user database */
	      width=0;
	      break;
	      
	    default:
	      /* ignore all others */
	      break;
	   }
	   /* FALL THROUGH */
	   
	 default:
	   ps=psSTOP;
	   break;
	}
      
      if (width>=0)
	set_range_width(low_code,high_code,width);
   }
   
   printf("/* node counts before simplification: %d %d %d */\n",
	  bdd_nodecount(defined_codes),
	  bdd_nodecount(zero_codes),
	  bdd_nodecount(wide_codes));

   x=bdd_addref(bdd_simplify(wide_codes,defined_codes));
   bdd_delref(wide_codes);
   wide_codes=x;

   x=bdd_addref(bdd_apply(defined_codes,wide_codes,bddop_diff));
   bdd_delref(defined_codes);
   defined_codes=x;
   
   x=bdd_addref(bdd_simplify(zero_codes,defined_codes));
   bdd_delref(zero_codes);
   zero_codes=x;
   
   printf("/* node counts after simplification: %d %d %d */\n\n",
	  bdd_nodecount(defined_codes),
	  bdd_nodecount(zero_codes),
	  bdd_nodecount(wide_codes));

   bdd_varblockall();
   bdd_intaddvarblock(0,7,0);
   bdd_intaddvarblock(8,15,0);
   bdd_intaddvarblock(16,23,0);
   bdd_intaddvarblock(24,31,0);
   bdd_intaddvarblock(0,31,1);

   bdd_reorder_probe(&reordering_size_callback);
   
   puts("typedef struct _WIDTH_BBD_ENT {\n"
	"  int16_t child[8];\n"
	"  char byte,shift;\n"
	"} WIDTH_BDD_ENT;\n\n"
	"static WIDTH_BDD_ENT width_bdd[]={");

   queue=(BDD *)malloc(sizeof(BDD)*1000);
   queue_max=1000;
   queue_low=2;
   queue_high=4;
   queue[0]=bddfalse;
   queue[1]=bddtrue;
   queue[2]=wide_codes;
   queue[3]=zero_codes;
   
   while (queue_low<queue_high) {
      if (queue_high+8>queue_max) {
	 queue_max/=3;
	 queue_max*=4;
	 queue=(BDD *)realloc(queue,sizeof(BDD)*queue_max);
      }
      
      reorder_focus=queue[queue_low];
      bdd_reorder(BDD_REORDER_WIN2ITE);
      
      vj=bdd_var(queue[queue_low]);
      vi=(vj/8)*8;
      vj=((vj-vi+1)/3)*3-1;
      if (vj<0) vj=0;
      
      x=bdd_addref(bdd_restrict(queue[queue_low],bdd_nithvar(vi+vj)));
      y=bdd_addref(bdd_restrict(x,bdd_nithvar(vi+vj+1)));
      child[0]=bdd_addref(bdd_restrict(y,bdd_nithvar(vi+vj+2)));
      child[1]=bdd_addref(bdd_restrict(y,bdd_ithvar(vi+vj+2)));
      bdd_delref(y);
      y=bdd_addref(bdd_restrict(x,bdd_ithvar(vi+vj+1)));
      child[2]=bdd_addref(bdd_restrict(y,bdd_nithvar(vi+vj+2)));
      child[3]=bdd_addref(bdd_restrict(y,bdd_ithvar(vi+vj+2)));
      bdd_delref(y);
      bdd_delref(x);
      x=bdd_addref(bdd_restrict(queue[queue_low],bdd_ithvar(vi+vj)));
      y=bdd_addref(bdd_restrict(x,bdd_nithvar(vi+vj+1)));
      child[4]=bdd_addref(bdd_restrict(y,bdd_nithvar(vi+vj+2)));
      child[5]=bdd_addref(bdd_restrict(y,bdd_ithvar(vi+vj+2)));
      bdd_delref(y);
      y=bdd_addref(bdd_restrict(x,bdd_ithvar(vi+vj+1)));
      child[6]=bdd_addref(bdd_restrict(y,bdd_nithvar(vi+vj+2)));
      child[7]=bdd_addref(bdd_restrict(y,bdd_ithvar(vi+vj+2)));
      bdd_delref(y);
      bdd_delref(x);
      
      fputs("  {{",stdout);
      for (i=0;i<8;i++) {
	 queue[queue_high]=child[i];
	 for (j=0;queue[j]!=child[i];j++);
	 if (j==queue_high)
	   queue_high++;
	 else
	   bdd_delref(child[i]);
	 printf("%d",j-2);
	 if (i<7) putchar(',');
      }
      printf("},%d,%d},\n",vi/8,5-vj);
      
      queue_low++;
   }

   puts("};\n\n"
"int idsgrep_utf8cw(char *);\n"
"\n"
"#define WBS width_bdd[search]\n"
"\n"
"int idsgrep_utf8cw(char *cp) {\n"
"   int search;\n"
"\n"
"   for (search=0;search>=0;)\n"
"     search=WBS.child[(cp[WBS.byte]>>WBS.shift)&7];\n"
"   if (search==-1)\n"
"     return 2;\n"
"   for (search=1;search>=0;)\n"
"     search=WBS.child[(cp[WBS.byte]>>WBS.shift)&7];\n"
"   return ((-1)-search);\n"
"}\n");
   
   bdd_done();

   exit(0);
}