Example #1
0
///////////////////////////////////////////////////////////////////////////
//
//  Parsing Interface
//
Token *tokenizer_next( Tokenizer *tokenizer ) {
    wchar_t c, next;
    Token *token = NULL;

    next = ss_peek( tokenizer->ss_ );
    while ( next != WEOF && !token ) {
//  Whitespace
        if ( isWhitespaceStart( tokenizer->ss_, 0 ) ) {
            token = parseWhitespace( tokenizer );
        }
//  Strings
        else if ( isStringStart( tokenizer->ss_, 0 ) ) {
            token = parseString( tokenizer );
        }
//  Comments
        else if ( isCommentStart( tokenizer->ss_, 0 ) ) {
            token = parseComment( tokenizer );
        }
//  URL
        else if ( isUrlStart( tokenizer->ss_, 0 ) ) {
            token = parseUrl( tokenizer );
        }
//  SGML Comments
        else if ( isSGMLCommentStart( tokenizer->ss_, 0 ) ) {
            token = parseSGMLComment( tokenizer );
        }
//  Identifier
        else if ( isIdentifierStart( tokenizer->ss_, 0 ) ) {
            token = parseIdentifier( tokenizer );
        }
//  @keyword
        else if ( isAtkeywordStart( tokenizer->ss_, 0 ) ) {
            token = parseAtkeyword( tokenizer );
        }
//  #keyword
        else if ( isHashkeywordStart( tokenizer->ss_, 0 ) ) {
            token = parseHashkeyword( tokenizer );
        }
//  Number
        else if ( isNumberStart( tokenizer->ss_, 0 ) ) {
            token = parseNumber( tokenizer );
        }
//  Operators & Delims (everything else)
        else {
            token = parseEverythingElse( tokenizer );
        }
    }
    if ( token ) {
        return token;
    } else {
        return NULL;
    }
}
Example #2
0
Token *parseComment( Tokenizer *tokenizer ) {
    StatefulString *ss = tokenizer->ss_;
    assert( isCommentStart( ss, 0 ) );

    int start, length;
    StatefulStringPosition pos1, pos2;
    Token *token;
    wchar_t *error = malloc( 201 * sizeof( wchar_t ) );
    error[ 0 ] = L'\0';

    start               = ss->next_index;
    pos1                = ss->next_position;
    length              = 2;
    TokenType   type    = COMMENT;

    ss_getchar( ss );   ss_getchar( ss );   // Throw away `/*`
    while (
        ss_peek( ss ) != WEOF               &&
        (
            ss_peek( ss ) != L'*'       ||
            ss_peekx( ss, 1 ) != L'/'
        )
    ) {
        length++;
        if ( ss_getchar( ss ) == L'\\' ) {
            ss_getchar( ss );
            length++;
        }
    }

    if ( ss_peek( ss ) == WEOF ) {
        swprintf( error, 200, L"Encountered end-of-file while parsing a comment.  Probably a forgotten `*/`." );
    } else {
        ss_getchar( ss ); ss_getchar( ss ); // Throw away `*/`
        length += 2;
    }

    // Return the token.
    pos2    = ss->next_position;
    token   = token_new( ss_substr( ss, start, length ), length, type, pos1, pos2 ); 

    if ( wcscmp( error, L"" ) != 0 ) {
        tokenizer_error( tokenizer, error, token );
    } else {
        free( error );
    }
    return token;
}
Example #3
0
File: html.c Project: eldar/ldc
void Html::extractCode(OutBuffer *buf)
{
    //printf("Html::extractCode()\n");
    dbuf = buf;                 // save for other routines
    buf->reserve(end - p);
    inCode = 0;
    while (1)
    {
        //printf("p = %p, *p = x%x\n", p, *p);
        switch (*p)
        {
#if 0 // strings are not recognized outside of tags
            case '"':
            case '\'':
                skipString();
                continue;
#endif
            case '<':
                if (p[1] == '!' && isCommentStart())
                {   // Comments start with <!--
                    scanComment();
                }
                else if(p[1] == '!' && isCDATAStart())
                {
                    scanCDATA();
                }
                else if (p[1] == '/' && istagstart(*skipWhite(p + 2)))
                    skipTag();
                else if (istagstart(*skipWhite(p + 1)))
                    skipTag();
                else
                    goto Ldefault;
                continue;

            case 0:
            case 0x1a:
                break;          // end of file

            case '&':
                if (inCode)
                {   // Translate character entity into ascii for D parser
                    int c;

                    c = charEntity();
                    buf->writeUTF8(c);
                }
                else
                    p++;
                continue;

            case '\r':
                if (p[1] == '\n')
                    goto Ldefault;
            case '\n':
                linnum++;
                // Always extract new lines, so that D lexer counts the
                // lines right.
                buf->writeByte(*p);
                p++;
                continue;

            default:
            Ldefault:
                if (inCode)
                    buf->writeByte(*p);
                p++;
                continue;
        }
        break;
    }
    buf->writeByte(0);                          // ending sentinel
    //printf("D code is: '%s'\n", (char *)buf->data);
}
/* Small FSM. States indicate what the machine is looking for *next*, 
 * so eg _cfgKEYSTART means "looking for the token that indicates the start
 * of a key"
*/
struct configFile *_cfgParseConfigFile (struct configFile *cfg)
{
	char *currentSectionString="DEFAULT";
	char *currentStringStart=NULL;
	char *currentKey=NULL;
	unsigned int filePos=0, state=_cfgKEYSTART;
	hash_table *tempHash;
	
	/* Create the default section. */
	tempHash=hashConstructTable (31);
	hashInsert (currentSectionString, tempHash, cfg->sections);
	
	while (filePos < cfg->bbdgSize) {
		switch (state) {
			case _cfgKEYSTART:
				if (cfg->bbdg[filePos]=='[') {
					filePos++;
					currentStringStart=(char *) &(cfg->bbdg[filePos]);
					state=_cfgSECTIONEND;
					break;
				}
				if (isCommentStart(cfg->bbdg[filePos])) {
					filePos++;
					state=_cfgCOMMENTEND;
					break;
				}
				if ( !isspace (cfg->bbdg[filePos]) ) {
					currentStringStart=(char *) &(cfg->bbdg[filePos]);
					state=_cfgKEYEND;
				} else {
					filePos ++;
				}
				break;
			case _cfgCOMMENTEND:
				if (cfg->bbdg[filePos]=='\n') {
					state=_cfgKEYSTART;
				}
				filePos++;
				break;
			case _cfgSECTIONEND:
				if (cfg->bbdg[filePos]==']') {
					cfg->bbdg[filePos]='\0';
					currentSectionString=currentStringStart;
					state=_cfgKEYSTART;
				}
				filePos++;
				break;
			case _cfgKEYEND:
				if (isspace (cfg->bbdg[filePos]) || isKeyValSep(cfg->bbdg[filePos])) {
					if (isKeyValSep(cfg->bbdg[filePos])) {
						cfg->bbdg[filePos]='\0';
					} else {
						cfg->bbdg[filePos]='\0';
						filePos++;
					}
					currentKey=currentStringStart;
					state=_cfgCOLON;
				} else {
					//Do this in search routine instead (with strcasecmp)
					//cfg->bbdg[filePos] = tolower(cfg->bbdg[filePos]);
					filePos++;
				}
				break;
			case _cfgCOLON:
				if (isKeyValSep(cfg->bbdg[filePos]) || cfg->bbdg[filePos]=='\0') {
					state=_cfgVALSTART;
				}
				filePos++;
				break;
			case _cfgVALSTART:
				if (!myisblank(cfg->bbdg[filePos])) {
					currentStringStart=(char *) &(cfg->bbdg[filePos]);
					state=_cfgVALEND;
				} else {
					filePos ++;
				}
				break;
			case _cfgVALEND:
				if (cfg->bbdg[filePos]=='\n' || isCommentStart(cfg->bbdg[filePos])) {
					/* First see if the current section exists. */
					tempHash=hashLookup (currentSectionString, cfg->sections);
					if (tempHash==NULL) {
						tempHash=hashConstructTable (31);
						hashInsert (currentSectionString, tempHash, cfg->sections);
					}
					/* Now stick it in the table. */
					if (isCommentStart(cfg->bbdg[filePos])) {
						cfg->bbdg[filePos]='\0';
						hashInsert (currentKey, currentStringStart, tempHash);
						state=_cfgCOMMENTEND;
					} else {
						cfg->bbdg[filePos]='\0';
						hashInsert (currentKey, currentStringStart, tempHash);
						state=_cfgKEYSTART;
					}
				}
				filePos++;
				break;
		}
		
	}
	return cfg;
}
Example #5
0
File: html.c Project: eldar/ldc
void Html::skipTag()
{
    enum TagState       // what parsing state we're in
    {
        TStagstart,     // start of tag name
        TStag,          // in a tag name
        TSrest,         // following tag name
    };
    enum TagState state = TStagstart;
    int inot;
    unsigned char *tagstart = NULL;
    int taglen = 0;

    p++;
    inot = 0;
    if (*p == '/')
    {   inot = 1;
        p++;
    }
    while (1)
    {
        switch (*p)
        {
            case '>':           // found end of tag
                p++;
                break;

            case '"':
            case '\'':
                state = TSrest;
                skipString();
                continue;

            case '<':
                if (p[1] == '!' && isCommentStart())
                {   // Comments start with <!--
                    scanComment();
                }
                else if (p[1] == '/' && istagstart(*skipWhite(p + 2)))
                {   error("nested tag");
                    skipTag();
                }
                else if (istagstart(*skipWhite(p + 1)))
                {   error("nested tag");
                    skipTag();
                }
                // Treat comments as if they were whitespace
                state = TSrest;
                continue;

            case 0:
            case 0x1a:
                error("end of file before end of tag");
                break;          // end of file

            case '\r':
                if (p[1] == '\n')
                    goto Ldefault;
            case '\n':
                linnum++;
                // Always extract new lines, so that code lexer counts the
                // lines right.
                dbuf->writeByte(*p);
                state = TSrest;                 // end of tag
                p++;
                continue;

            case ' ':
            case '\t':
            case '\f':
            case '\v':
                if (state == TStagstart)
                {   p++;
                    continue;
                }
            default:
            Ldefault:
                switch (state)
                {
                    case TStagstart:            // start of tag name
                        assert(istagstart(*p));
                        state = TStag;
                        tagstart = p;
                        taglen = 0;
                        break;

                    case TStag:
                        if (istag(*p))
                        {   // Continuing tag name
                            taglen++;
                        }
                        else
                        {   // End of tag name
                            state = TSrest;
                        }
                        break;

                    case TSrest:
                        break;
                }
                p++;
                continue;
        }
        break;
    }

    // See if we parsed a <code> or </code> tag
    if (taglen && memicmp((char *) tagstart, (char *) "CODE", taglen) == 0
        && *(p - 2) != '/') // ignore "<code />" (XHTML)
    {
        if (inot)
        {   inCode--;
            if (inCode < 0)
                inCode = 0;             // ignore extra </code>'s
        }
        else
            inCode++;
    }
}