int html_src_parse_tagspec( char *ts, HTlexeme lexeme, BOOLEAN checkonly, BOOLEAN isstart ) { BOOLEAN stop = 0; BOOLEAN code = 0; char *p = ts; char *tagstart = 0; char *tagend = 0; char *classstart; char *classend; char save, save1; char after_excl = 0; html_src_check_state state = HTSRC_CK_normal; HT_tagspec *head = 0; HT_tagspec *tail = 0; HT_tagspec **slot = lexeme_end; for ( ; stop == 0; p++ ) { if ( state == HTSRC_CK_after_tagname ) { switch ( p[0] ) { case 0: stop = 1; code = 1; save = tagend[0]; tagend[0] = 0; classstart = 0; if ( checkonly ) { int idx = html_src_tag_index( tagstart ); if ( WWW_TraceFlag && ( WWW_TraceMask & 8 ) ) { fprintf( TraceFP( ), "tag index(%s) = %d\n", tagstart, idx ); } tagend[0] = save; if ( idx == -1 ) stop = 1; } else if ( after_excl ) append_close_tag( tagstart, &head, &tail ); else append_open_tag( tagstart, 0, &head, &tail ); state = HTSRC_CK_normal; after_excl = 0; break; case 9: case 32: save = tagend[0]; tagend[0] = 0; classstart = 0; break; case 46: if ( after_excl ) { if ( WWW_TraceFlag && ( WWW_TraceMask & 8 ) ) { fprintf( TraceFP( ), "dot after '!' at column %d:\n\t%s\n", p[1] - ts, ts ); } stop = 1; } else state = HTSRC_CK_seen_dot; break; default: if ( WWW_TraceFlag && ( WWW_TraceMask & 8 ) ) { fprintf( TraceFP( ), "unexpected char '%c' after tagname at column %d:\n\t%s\n", p[0], p[1] - ts, ts ); } stop = 1; break; } } else { if ( state >= 2 ) { if ( state == HTSRC_CK_seen_dot && p[0] != 9 && p[0] != 32 ) { if ( p[0] == 0 ) { if ( WWW_TraceFlag && ( WWW_TraceMask & 8 ) ) { fprintf( TraceFP( ), "expected text after dot at column %d:\n\t%s\n", p[1] - ts, ts ); } stop = 1; } else { if ( !( *(short*)(*(int*)(__ctype_b_loc( )) + ( p[0] * 2 )) & 1024 ) && p[0] != '_' ) { if ( WWW_TraceFlag && ( WWW_TraceMask & 8 ) ) { fprintf( TraceFP( ), "no name starting at column %d:\n\t%s\n", p[1] - ts, ts ); } stop = 1; } else { classstart = p; for ( ; p[0] && ( ( *(short*)(*(int*)(__ctype_b_loc( )) + ( p[0] * 2 )) & 8 ) || p[0] == '_' ); p++ ) { // p++; } classend = p; p = &p[ -1 ]; save = classend[0]; classend[0] = 0; save1 = tagend[0]; tagend[0] = 0; if ( checkonly ) { int idx = html_src_tag_index( tagstart ); tagend[0] = save1; classend[0] = save; if ( idx == -1 ) { return 0; } } else append_open_tag( tagstart, classstart, &head, &tail ); state = HTSRC_CK_normal; after_excl = 0; } } } } else { switch ( p[0] ) { case 0: stop = 1; code = 1; break; case 33: if ( state == HTSRC_CK_seen_excl ) { if ( WWW_TraceFlag && ( WWW_TraceMask & 8 ) ) { fprintf( TraceFP( ), "second '!' at column %d:\n\t%s\n", p[1] - ts, ts ); } stop = 1; } else { state = HTSRC_CK_seen_excl; after_excl = 1; } break; default: if ( !( *(short*)(*(int*)(__ctype_b_loc( )) + ( p[0] * 2 )) & 1024 ) && p[0] != '_' ) { if ( WWW_TraceFlag && ( WWW_TraceMask & 8 ) ) { fprintf( TraceFP( ), "no name starting at column %d:\n\t%s\n", p[1] - ts, ts ); } stop = 1; } else { tagstart = p; for ( ; p[0] && ( ( *(short*)(*(int*)(__ctype_b_loc( )) + ( p[0] * 2 )) & 8 ) || p[0] == '_' ); p++ ) { // p++; } tagend = p; p = &p[ -1 ]; state = HTSRC_CK_after_tagname; } break; } } } // p++; } if ( code && checkonly == 0 ) slot[0] = head; return code; }
/* returns 1 if incorrect */ PUBLIC int html_src_parse_tagspec ARGS4( char*, ts, HTlexeme, lexeme, BOOL, checkonly, BOOL, isstart) { char *p = ts; char *tagstart = 0; char *tagend = 0; char *classstart; char *classend; char stop = FALSE, after_excl = FALSE; html_src_check_state state = HTSRC_CK_normal; HT_tagspec* head = NULL, *tail = NULL; HT_tagspec** slot = ( isstart ? lexeme_start : lexeme_end ) + lexeme; while (!stop) { switch (state) { case HTSRC_CK_normal: case HTSRC_CK_seen_excl: switch (*p) { case '\0': stop = TRUE; break; case ' ': case '\t': break; case '!': if (state == HTSRC_CK_seen_excl) return 1; /*second '!'*/ state = HTSRC_CK_seen_excl; after_excl = TRUE; break; default: if (isalpha(UCH(*p)) || *p == '_') { tagstart = p; while (*p && ( isalnum(UCH(*p)) || *p == '_') ) ++p; tagend = p; state = HTSRC_CK_after_tagname; } else return 1; continue; } break; case HTSRC_CK_after_tagname: switch (*p) { case '\0': stop = TRUE; /* FALLTHRU */ case ' ': /* FALLTHRU */ case '\t': { char save = *tagend; *tagend = '\0'; classstart = 0; if (checkonly) { int idx = html_src_tag_index(tagstart); *tagend = save; if (idx == -1) return 1; } else { if (after_excl) append_close_tag(tagstart, &head, &tail); else append_open_tag(tagstart, NULL, &head, &tail); } state = HTSRC_CK_normal; after_excl = FALSE; } break; case '.': if (after_excl) return 1; state = HTSRC_CK_seen_dot; break; default: return 1; } break; case HTSRC_CK_seen_dot: { switch (*p) { case ' ': case '\t': break; case '\0': return 1; default: { char save, save1; if ( isalpha(UCH(*p)) || *p == '_' ) { classstart = p; while (*p && ( isalnum(UCH(*p)) || *p == '_') ) ++p; classend = p; save = *classend; *classend = '\0'; save1 = *tagend; *tagend = '\0'; if (checkonly) { int idx = html_src_tag_index(tagstart); *tagend = save1; *classend = save; if (idx == -1) return 1; } else { append_open_tag(tagstart, classstart, &head, &tail); } state = HTSRC_CK_normal;after_excl = FALSE; continue; } else return 1; } }/*of switch(*p)*/ break; } /* of case HTSRC_CK_seen_dot: */ }/* of switch */ ++p; } if (!checkonly) *slot = head; return 0; }