Пример #1
0
int html_src_parse_tagspec( char *ts, HTlexeme lexeme, BOOLEAN checkonly, BOOLEAN isstart )
{
    BOOLEAN stop = 0;
    BOOLEAN code = 0;
    char *p = ts;
    char *tagstart = 0;
    char *tagend = 0;
    char *classstart;
    char *classend;
    char save, save1;
    char after_excl = 0;
    html_src_check_state state = HTSRC_CK_normal;
    HT_tagspec *head = 0;
    HT_tagspec *tail = 0;
    HT_tagspec **slot = lexeme_end;
    for ( ; stop == 0; p++ )
    {
        if ( state == HTSRC_CK_after_tagname )
        {
            switch ( p[0] )
            {
            case 0:
                stop = 1;
                code = 1;
                save = tagend[0];
                tagend[0] = 0;
                classstart = 0;
                if ( checkonly )
                {
                    int idx = html_src_tag_index( tagstart );
                    if ( WWW_TraceFlag && ( WWW_TraceMask & 8 ) )
                    {
                        fprintf( TraceFP( ), "tag index(%s) = %d\n", tagstart, idx );
                    }
                    tagend[0] = save;
                    if ( idx == -1 )
                        stop = 1;
                }
                else if ( after_excl )
                    append_close_tag( tagstart, &head, &tail );
                else
                    append_open_tag( tagstart, 0, &head, &tail );
                state = HTSRC_CK_normal;
                after_excl = 0;
                break;
            case 9:
            case 32:
                save = tagend[0];
                tagend[0] = 0;
                classstart = 0;
                break;
            case 46:
                if ( after_excl )
                {
                    if ( WWW_TraceFlag && ( WWW_TraceMask & 8 ) )
                    {
                        fprintf( TraceFP( ), "dot after '!' at column %d:\n\t%s\n", p[1] - ts, ts );
                    }
                    stop = 1;
                }
                else
                    state = HTSRC_CK_seen_dot;
                break;
            default:
                if ( WWW_TraceFlag && ( WWW_TraceMask & 8 ) )
                {
                    fprintf( TraceFP( ), "unexpected char '%c' after tagname at column %d:\n\t%s\n", p[0], p[1] - ts, ts );
                }
                stop = 1;
                break;
            }
        }
        else
        {
            if ( state >= 2 )
            {
                if ( state == HTSRC_CK_seen_dot && p[0] != 9 && p[0] != 32 )
                {
                    if ( p[0] == 0 )
                    {
                        if ( WWW_TraceFlag && ( WWW_TraceMask & 8 ) )
                        {
                            fprintf( TraceFP( ), "expected text after dot at column %d:\n\t%s\n", p[1] - ts, ts );
                        }
                        stop = 1;
                    }
                    else
                    {
                        if ( !( *(short*)(*(int*)(__ctype_b_loc( )) + ( p[0] * 2 )) & 1024 ) && p[0] != '_' )
                        {
                            if ( WWW_TraceFlag && ( WWW_TraceMask & 8 ) )
                            {
                                fprintf( TraceFP( ), "no name starting at column %d:\n\t%s\n", p[1] - ts, ts );
                            }
                            stop = 1;
                        }
                        else
                        {
                            classstart = p;
                            for ( ; p[0] && ( ( *(short*)(*(int*)(__ctype_b_loc( )) + ( p[0] * 2 )) & 8 ) || p[0] == '_' ); p++ )
                            {
                                // p++;
                            }
                            classend = p;
                            p = &p[ -1 ];
                            save = classend[0];
                            classend[0] = 0;
                            save1 = tagend[0];
                            tagend[0] = 0;
                            if ( checkonly )
                            {
                                int idx = html_src_tag_index( tagstart );
                                tagend[0] = save1;
                                classend[0] = save;
                                if ( idx == -1 )
                                {
                                    return 0;
                                }
                            }
                            else
                                append_open_tag( tagstart, classstart, &head, &tail );
                            state = HTSRC_CK_normal;
                            after_excl = 0;
                        }
                    }
                }
            }
            else
            {
                switch ( p[0] )
                {
                case 0:
                    stop = 1;
                    code = 1;
                    break;
                case 33:
                    if ( state == HTSRC_CK_seen_excl )
                    {
                        if ( WWW_TraceFlag && ( WWW_TraceMask & 8 ) )
                        {
                            fprintf( TraceFP( ), "second '!' at column %d:\n\t%s\n", p[1] - ts, ts );
                        }
                        stop = 1;
                    }
                    else
                    {
                        state = HTSRC_CK_seen_excl;
                        after_excl = 1;
                    }
                    break;
                default:
                    if ( !( *(short*)(*(int*)(__ctype_b_loc( )) + ( p[0] * 2 )) & 1024 ) && p[0] != '_' )
                    {
                        if ( WWW_TraceFlag && ( WWW_TraceMask & 8 ) )
                        {
                            fprintf( TraceFP( ), "no name starting at column %d:\n\t%s\n", p[1] - ts, ts );
                        }
                        stop = 1;
                    }
                    else
                    {
                        tagstart = p;
                        for ( ; p[0] && ( ( *(short*)(*(int*)(__ctype_b_loc( )) + ( p[0] * 2 )) & 8 ) || p[0] == '_' ); p++ )
                        {
                            // p++;
                        }
                        tagend = p;
                        p = &p[ -1 ];
                        state = HTSRC_CK_after_tagname;
                    }
                    break;
                }
            }
        }
        // p++;
    }
    if ( code && checkonly == 0 )
        slot[0] = head;
    return code;
}
Пример #2
0
/* returns 1 if incorrect */
PUBLIC int html_src_parse_tagspec ARGS4(
	char*,		ts,
	HTlexeme,	lexeme,
	BOOL,		checkonly,
	BOOL,		isstart)
{
    char *p = ts;
    char *tagstart = 0;
    char *tagend = 0;
    char *classstart;
    char *classend;
    char stop = FALSE, after_excl = FALSE;
    html_src_check_state state = HTSRC_CK_normal;
    HT_tagspec* head = NULL, *tail = NULL;
    HT_tagspec** slot = ( isstart ? lexeme_start : lexeme_end ) + lexeme;

    while (!stop) {
	switch (state) {
	    case HTSRC_CK_normal:
	    case HTSRC_CK_seen_excl:
		switch (*p) {
		    case '\0': stop = TRUE; break;
		    case ' ': case '\t': break;
		    case '!':
			if (state == HTSRC_CK_seen_excl)
			    return 1;	/*second '!'*/
			state = HTSRC_CK_seen_excl;
			after_excl = TRUE;
			break;
		    default:
			if (isalpha(UCH(*p)) || *p == '_') {
			    tagstart = p;
			    while (*p && ( isalnum(UCH(*p)) || *p == '_') )
				 ++p;
			    tagend = p;
			    state = HTSRC_CK_after_tagname;
			} else
			    return 1;
			continue;
		    }
		break;
	    case HTSRC_CK_after_tagname:
		switch (*p) {
		    case '\0': stop = TRUE;
			/* FALLTHRU */
		    case ' ':
			/* FALLTHRU */
		    case '\t':
			{
			    char save = *tagend;
			    *tagend = '\0';
			    classstart = 0;
			    if (checkonly) {
				int idx = html_src_tag_index(tagstart);
				*tagend = save;
				if (idx == -1)
				    return 1;
			    } else {
				if (after_excl)
				    append_close_tag(tagstart, &head, &tail);
				else
				    append_open_tag(tagstart, NULL, &head, &tail);
			    }
			    state = HTSRC_CK_normal;
			    after_excl = FALSE;
			}
			break;
		    case '.':
			if (after_excl)
			    return 1;
			state = HTSRC_CK_seen_dot;
			break;
		    default:
			return 1;
		}
		break;
	    case HTSRC_CK_seen_dot: {
		switch (*p) {
		    case ' ':
		    case '\t':
			break;
		    case '\0':
			return 1;
		    default: {
			char save, save1;
			if ( isalpha(UCH(*p)) || *p == '_' ) {
			    classstart = p;
			    while (*p && ( isalnum(UCH(*p)) || *p == '_') ) ++p;
			    classend = p;
			    save = *classend;
			    *classend = '\0';
			    save1 = *tagend;
			    *tagend = '\0';
			    if (checkonly) {
				int idx = html_src_tag_index(tagstart);
				*tagend = save1; *classend = save;
				if (idx == -1)
				return 1;
			    } else {
				append_open_tag(tagstart, classstart, &head, &tail);
			    }
			    state = HTSRC_CK_normal;after_excl = FALSE;
			    continue;
			} else
			    return 1;
		    }
		}/*of switch(*p)*/
		break;
	    } /* of case HTSRC_CK_seen_dot: */
	}/* of switch */
	++p;
    }

    if (!checkonly)
	*slot = head;
    return 0;
}