/* * The state machine looks for (approximately) these Perl regular expressions: * * m|\/\*.*?\*\/| * m|\/\/.*| * m|'.*?'| * m|".*?"| * m|#\s*include\s*"(.*?)"| * m|#\s*include\s*<(.*?>"| * * About 98% of the CPU time is spent here, and most of that is in * the 'start' paragraph. Because the current characters are * in a register, the start loop usually eats 4 or 8 characters * per memory read. The MAX4 and MIN4 tests dispose of most * input characters with 1 or 2 comparisons. */ void state_machine(const char * map, const char * end) { const char * next = map; const char * map_dot; unsigned long __buf = 0; for (;;) { start: GETNEXT __start: if (current > MAX4('/','\'','"','#')) goto start; if (current < MIN4('/','\'','"','#')) goto start; CASE('/', slash); CASE('\'', squote); CASE('"', dquote); CASE('#', pound); goto start; /* // */ slash_slash: GETNEXT CASE('\n', start); NOTCASE('\\', slash_slash); GETNEXT goto slash_slash; /* / */ slash: GETNEXT CASE('/', slash_slash); NOTCASE('*', __start); slash_star_dot_star: GETNEXT __slash_star_dot_star: NOTCASE('*', slash_star_dot_star); GETNEXT NOTCASE('/', __slash_star_dot_star); goto start; /* '.*?' */ squote: GETNEXT CASE('\'', start); NOTCASE('\\', squote); GETNEXT goto squote; /* ".*?" */ dquote: GETNEXT CASE('"', start); NOTCASE('\\', dquote); GETNEXT goto dquote; /* #\s* */ pound: GETNEXT CASE(' ', pound); CASE('\t', pound); CASE('i', pound_i); goto __start; /* #\s*i */ pound_i: GETNEXT NOTCASE('n', __start); GETNEXT NOTCASE('c', __start); GETNEXT NOTCASE('l', __start); GETNEXT NOTCASE('u', __start); GETNEXT NOTCASE('d', __start); GETNEXT NOTCASE('e', __start); goto pound_include; /* #\s*include\s* */ pound_include: GETNEXT CASE(' ', pound_include); CASE('\t', pound_include); map_dot = next; CASE('"', pound_include_dquote); CASE('<', pound_include_langle); goto __start; /* #\s*include\s*"(.*)" */ pound_include_dquote: GETNEXT CASE('\n', start); NOTCASE('"', pound_include_dquote); handle_include(0, map_dot, next - map_dot - 1); goto start; /* #\s*include\s*<(.*)> */ pound_include_langle: GETNEXT CASE('\n', start); NOTCASE('>', pound_include_langle); handle_include(1, map_dot, next - map_dot - 1); goto start; } }
/* * The state machine looks for (approximately) these Perl regular expressions: * * m|\/\*.*?\*\/| * m|\/\/.*| * m|'.*?'| * m|".*?"| * m|#\s*include\s*"(.*?)"| * m|#\s*include\s*<(.*?>"| * m|#\s*(?define|undef)\s*CONFIG_(\w*)| * m|(?!\w)CONFIG_| * * About 98% of the CPU time is spent here, and most of that is in * the 'start' paragraph. Because the current characters are * in a register, the start loop usually eats 4 or 8 characters * per memory read. The MAX5 and MIN5 tests dispose of most * input characters with 1 or 2 comparisons. */ void state_machine(const char * map, const char * end) { const char * next = map; const char * map_dot; unsigned long __buf = 0; for (;;) { start: GETNEXT __start: if (current > MAX5('/','\'','"','#','C')) goto start; if (current < MIN5('/','\'','"','#','C')) goto start; CASE('/', slash); CASE('\'', squote); CASE('"', dquote); CASE('#', pound); CASE('C', cee); goto start; /* // */ slash_slash: GETNEXT CASE('\n', start); NOTCASE('\\', slash_slash); GETNEXT goto slash_slash; /* / */ slash: GETNEXT CASE('/', slash_slash); NOTCASE('*', __start); slash_star_dot_star: GETNEXT __slash_star_dot_star: NOTCASE('*', slash_star_dot_star); GETNEXT NOTCASE('/', __slash_star_dot_star); goto start; /* '.*?' */ squote: GETNEXT CASE('\'', start); NOTCASE('\\', squote); GETNEXT goto squote; /* ".*?" */ dquote: GETNEXT CASE('"', start); NOTCASE('\\', dquote); GETNEXT goto dquote; /* #\s* */ pound: GETNEXT CASE(' ', pound); CASE('\t', pound); CASE('i', pound_i); CASE('d', pound_d); CASE('u', pound_u); goto __start; /* #\s*i */ pound_i: GETNEXT NOTCASE('n', __start); GETNEXT NOTCASE('c', __start); GETNEXT NOTCASE('l', __start); GETNEXT NOTCASE('u', __start); GETNEXT NOTCASE('d', __start); GETNEXT NOTCASE('e', __start); goto pound_include; /* #\s*include\s* */ pound_include: GETNEXT CASE(' ', pound_include); CASE('\t', pound_include); map_dot = next; CASE('"', pound_include_dquote); CASE('<', pound_include_langle); goto __start; /* #\s*include\s*"(.*)" */ pound_include_dquote: GETNEXT CASE('\n', start); NOTCASE('"', pound_include_dquote); handle_include(0, map_dot, next - map_dot - 1); goto start; /* #\s*include\s*<(.*)> */ pound_include_langle: GETNEXT CASE('\n', start); NOTCASE('>', pound_include_langle); handle_include(1, map_dot, next - map_dot - 1); goto start; /* #\s*d */ pound_d: GETNEXT NOTCASE('e', __start); GETNEXT NOTCASE('f', __start); GETNEXT NOTCASE('i', __start); GETNEXT NOTCASE('n', __start); GETNEXT NOTCASE('e', __start); goto pound_define_undef; /* #\s*u */ pound_u: GETNEXT NOTCASE('n', __start); GETNEXT NOTCASE('d', __start); GETNEXT NOTCASE('e', __start); GETNEXT NOTCASE('f', __start); goto pound_define_undef; /* * #\s*(define|undef)\s*CONFIG_(\w*) * * this does not define the word, because it could be inside another * conditional (#if 0). But I do parse the word so that this instance * does not count as a use. -- mec */ pound_define_undef: GETNEXT CASE(' ', pound_define_undef); CASE('\t', pound_define_undef); NOTCASE('C', __start); GETNEXT NOTCASE('O', __start); GETNEXT NOTCASE('N', __start); GETNEXT NOTCASE('F', __start); GETNEXT NOTCASE('I', __start); GETNEXT NOTCASE('G', __start); GETNEXT NOTCASE('_', __start); map_dot = next; pound_define_undef_CONFIG_word: GETNEXT if (isalnum(current) || current == '_') goto pound_define_undef_CONFIG_word; goto __start; /* \<CONFIG_(\w*) */ cee: if (next >= map+2 && (isalnum(next[-2]) || next[-2] == '_')) goto start; GETNEXT NOTCASE('O', __start); GETNEXT NOTCASE('N', __start); GETNEXT NOTCASE('F', __start); GETNEXT NOTCASE('I', __start); GETNEXT NOTCASE('G', __start); GETNEXT NOTCASE('_', __start); map_dot = next; cee_CONFIG_word: GETNEXT if (isalnum(current) || current == '_') goto cee_CONFIG_word; use_config(map_dot, next - map_dot - 1); goto __start; } }