Exemple #1
0
int DaoLexer_Tokenize( DaoLexer *self, const char *src, int flags )
{
	DString *source = DString_New(1);
	DVector *lexenvs = DVector_New( sizeof(int) );
	DaoToken *token = DaoToken_New();
	DString *literal = & token->string;
	char ch, *ss, hex[11] = "0x00000000";
	int replace = flags & DAO_LEX_ESCAPE;
	int comment = flags & DAO_LEX_COMMENT;
	int space = flags & DAO_LEX_SPACE;
	int srcSize = (int)strlen( src );
	int old=0, state = TOK_START;
	int lexenv = LEX_ENV_NORMAL;
	int unicoded = 0;
	int line = 1;
	int cpos = 0;
	int ret = 1;
	int it = 0;
	int i, m = 4;

	DString_SetSharing( literal, 0 );
	for(it=0; it<srcSize; it++){
		if( (signed char) src[it] < 0 ){
			unicoded = 1;
			break;
		}
	}
	if( unicoded && daoConfig.mbs == 0 ){
		DString *wcs = DString_New(0);
		/* http://www.cl.cam.ac.uk/~mgk25/ucs/quotes.html */
		wchar_t quotes[] = {
			0x27 , 0x27 , 0x27, /* single q.m. */
			0x22 , 0x22 , 0x22, /* double q.m. */
			0x27 + 0xfee0 , 0x27 + 0xfee0 , 0x27 , /* single q.m. unicode */
			0x22 + 0xfee0 , 0x22 + 0xfee0 , 0x22 , /* double q.m. unicode */
			0x60 , 0x27 , 0x27, /* grave accent */
			0x2018 , 0x2019 , 0x27 , /* left/right single q.m. */
			0x201C , 0x201D , 0x22   /* left/right double q.m. */
		};
		wchar_t sl = L'\\' + 0xfee0;
		wchar_t stop;
		int i, N = 21;
		it = 0;
		DString_SetMBS( wcs, src );
		while( it < wcs->size ){ // TODO: handle verbatim string!
			for( i=0; i<N; i+=3 ){
				if( wcs->wcs[it] == quotes[i] ){
					stop = quotes[i+1];
					wcs->wcs[it] = quotes[i+2];
					it ++;
					while( it < wcs->size && wcs->wcs[it] != stop ){
						if( wcs->wcs[it] == sl || wcs->wcs[it] == L'\\' ){
							it ++;
							continue;
						}
						it ++;
					}
					if( it < wcs->size ) wcs->wcs[it] = quotes[i+2];
					break;
				}
			}
			if( it >= wcs->size ) break;
			if( wcs->wcs[it] == 0x3000 ){
				wcs->wcs[it] = 32; /* blank space */
			}else if( wcs->wcs[it] > 0xff00 && wcs->wcs[it] < 0xff5f ){
				wcs->wcs[it] -= 0xfee0; /* DBC to SBC */
			}
			it ++;
		}
		if( wcs->size ){
			DString_SetWCS( source, wcs->wcs );
			src = source->mbs;
			srcSize = source->size;
		}
		DString_Delete( wcs );
	}
	DaoLexer_Reset( self );

	DVector_PushInt( lexenvs, LEX_ENV_NORMAL );
	it = 0;
	token->cpos = 0;
	while( it < srcSize ){
#if 0
		printf( "tok: %i %i  %i  %c    %s\n", srcSize, it, ch, ch, literal->mbs );
#endif
		token->type = state;
		token->name = 0;
		token->line = line;
		ch = src[it];
		cpos += ch == '\t' ? daoConfig.tabspace : 1;
		if( ch == '\n' ) cpos = 0, line ++;
		if( literal->size == 0 ) token->cpos = cpos;
		if( state == TOK_STRING_MBS || state == TOK_STRING_WCS ){
			if( ch == '\\' ){
				it ++;
				if( replace == 0 ){
					DString_AppendChar( literal, ch );
					if( it < srcSize ){
						if( src[it] == '\n' ) cpos = 0, line ++;
						DString_AppendChar( literal, src[it] );
					}
					it ++;
					continue;
				}
				if( it >= srcSize ){
					ret = 0;
					printf( "error: incomplete string at line %i.\n", line );
					break;
				}
				if( src[it] == '\n' ) cpos = 0, line ++;
				switch( src[it] ){
				case '0' : case '1' : case '2' : case '3' :
				case '4' : case '5' : case '6' : case '7' : /* \ooo */
					i = 2;
					while( i < 5 && it < srcSize && src[it] >= '0' && src[it] < '8' ){
						hex[i] = src[it++];
						hex[++i] = 0;
					}
					DString_AppendChar( literal, (char) strtol( hex+2, NULL, 8 ) );
					it --;
					break;
				case '8' : case '9' :
					DString_AppendChar( literal, (char) (src[it] - '0') );
					break;
				case 'x' :
				case 'u' :
				case 'U' :
					i = 2;
					switch( src[it] ){
					case 'x' : m = 4;  break; /* \xhh: max 2 hex digit; */
					case 'u' : m = 6;  break; /* \uhhhh: max 4 hex digit; */
					case 'U' : m = 10; break; /* \Uhhhhhhhh: max 8 hex digit; */
					}
					while( i < m && (it+1) < srcSize && isxdigit( src[it+1] ) ){
						hex[i] = src[++it];
						hex[++i] = 0;
					}
					DString_AppendWChar( literal, (wchar_t) strtol( hex, NULL, 0 ) );
					break;
				case 't' : DString_AppendChar( literal, '\t' ); break;
				case 'n' : DString_AppendChar( literal, '\n' ); break;
				case 'r' : DString_AppendChar( literal, '\r' ); break;
				case '\'' : DString_AppendChar( literal, '\'' ); break;
				case '\"' : DString_AppendChar( literal, '\"' ); break;
				default : DString_AppendChar( literal, src[it] ); break;
				}
			}else if( ch == '\'' && state == TOK_STRING_MBS ){
				DString_AppendChar( literal, ch );
				state = TOK_RESTART;
				token->type = token->name = DTOK_MBS;
				DaoLexer_AppendToken( self, token );
				DString_Clear( literal );
			}else if( ch == '\"' && state == TOK_STRING_WCS ){
				DString_AppendChar( literal, ch );
				state = TOK_RESTART;
				token->type = token->name = DTOK_WCS;
				DaoLexer_AppendToken( self, token );
				DString_Clear( literal );
			}else{
				DString_AppendChar( literal, ch );
			}
		}else if( ch == ']' && state == TOK_VERBATIM ){
			int len = srcSize - it - 1;
			DString_AppendChar( literal, ']' );
			token->type = token->name = DTOK_VBT_OPEN;
			if( (ss = strstr( src + it + 1, literal->mbs )) != NULL ){
				len = (ss - src) - it - 1 + literal->size;
				token->type = token->name = DTOK_VERBATIM;
			}
			for(i=0; i<len; i++) if( src[it+1+i] == '\n' ) line += 1;
			DString_AppendDataMBS( literal, src + it + 1, len );
			state = TOK_RESTART;
			DaoLexer_AppendToken( self, token );
			DString_Clear( literal );
			it += len;
		}else if( lexenv == LEX_ENV_NORMAL ){
			old = state;
			if( ch >=0 ){
				state = daoLexTable[ state ][ (int)ch ];
			}else if( state <= TOK_START ){
				state = TOK_RESTART;
			}else if( state != TOK_IDENTIFIER && state != TOK_STRING_MBS
					&& state != TOK_STRING_WCS
					&& state != TOK_COMT_LINE && state != TOK_COMT_OPEN ){
				state = TOK_RESTART;
			}
			if( state >= TOK_END ){
				DString_AppendChar( literal, ch );
				token->type = token->name = daoTokenMap[ state ];
				if( token->type == DTOK_ID_THTYPE || token->type == DTOK_ID_SYMBOL )
					token->type = DTOK_IDENTIFIER;
				if( space || comment || token->type != DTOK_COMMENT ){
					if( isspace( token->string.mbs[0] ) )
						token->type = token->name = daoSpaceType[ (int)token->string.mbs[0] ];
					DaoLexer_AppendToken( self, token );
				}
				/* may be a token before the line break; */
				DString_Clear( literal );
				state = TOK_START;
			}else if( state == TOK_RESTART ){
				if( literal->size ){
					if( old == TOK_IDENTIFIER ){
						token->name = dao_key_hash( literal->mbs, literal->size );
						token->type = DTOK_IDENTIFIER;
						if( token->name == 0 ) token->name = DTOK_IDENTIFIER;
						DaoLexer_AppendToken( self, token );
					}else if( old > TOK_RESTART && old != TOK_END ){
						token->type = token->name = daoTokenMap[ old ];
						if( token->type == DTOK_ID_THTYPE || token->type == DTOK_ID_SYMBOL )
							token->type = DTOK_IDENTIFIER;
						DaoLexer_AppendToken( self, token );
					}else if( space ){
						if( isspace( token->string.mbs[0] ) )
							token->type = token->name = daoSpaceType[ (int)token->string.mbs[0] ];
						DaoLexer_AppendToken( self, token );
					}
					DString_Clear( literal );
					token->cpos = cpos;
				}
				DString_AppendChar( literal, ch );
				if( ch >=0 )
					state = daoLexTable[ TOK_START ][ (int)ch ];
				else
					state = TOK_IDENTIFIER;

			}else if( state == TOK_COMT_OPEN ){
				DString_AppendChar( literal, ch );
				lexenv = LEX_ENV_COMMENT;
				DVector_PushInt( lexenvs, LEX_ENV_COMMENT );
			}else{
				DString_AppendChar( literal, ch );
			}
		}else if( lexenv == LEX_ENV_COMMENT ){
			DString_AppendChar( literal, ch );
			if( ch == '#' ){
				state = TOK_OP_SHARP;
			}else if( ch == '{' && state == TOK_OP_SHARP ){
				state = TOK_COMT_OPEN;
				DVector_PushInt( lexenvs, LEX_ENV_COMMENT );
			}else if( ch == '}' && state == TOK_OP_SHARP ){
				state = TOK_COMT_CLOSE;
				DVector_Pop( lexenvs );
				lexenv = lexenvs->data.ints[lexenvs->size-1];
				if( lexenv != LEX_ENV_COMMENT ){
					token->type = token->name = DTOK_COMMENT;
					if( comment ) DaoLexer_AppendToken( self, token );
					DString_Clear( literal );
					state = TOK_RESTART;
				}
			}else{
				state = TOK_START;
			}
		}
		it ++;
	}
	if( literal->size ){
		token->type = token->name = daoTokenMap[ state ];
		if( lexenv == LEX_ENV_COMMENT ) token->type = token->name = DTOK_CMT_OPEN;
		switch( state ){
		case TOK_STRING_MBS : token->type = token->name = DTOK_MBS_OPEN; break;
		case TOK_STRING_WCS : token->type = token->name = DTOK_WCS_OPEN; break;
		}
		if( token->type == DTOK_IDENTIFIER ){
			token->name = dao_key_hash( literal->mbs, literal->size );
			if( token->name == 0 ) token->name = DTOK_IDENTIFIER;
		}else if( token->type == DTOK_ID_THTYPE || token->type == DTOK_ID_SYMBOL ){
			token->type = DTOK_IDENTIFIER;
		}
		if( token->type || space ){
			if( isspace( token->string.mbs[0] ) )
				token->type = token->name = daoSpaceType[ (int)token->string.mbs[0] ];
			DaoLexer_AppendToken( self, token );
		}
	}
	DaoToken_Delete( token );
	DVector_Delete( lexenvs );
	DString_Delete( source );
#if 0
	for(i=0; i<self->tokens->size; i++){
		DaoToken *tk = self->tokens->items.pToken[i];
		printf( "%4i: %4i %4i , %4i,  %s\n", i, tk->type, tk->name, tk->cpos, tk->string.mbs );
	}
#endif
	return ret ? line : 0;
}
Exemple #2
0
static int DaoParser_MakeMacroGroup( DaoParser *self, DMacroGroup *group, DMacroGroup *parent, int from, int to, DMap *vars, DMap *markers )
{
	unsigned char tk;
	int i, sep, rb, prev;
	DaoToken **toks = self->tokens->items.pToken;
	DMacroGroup *grp, *group2; /* mingw don't like grp2 ?! */
	DMacroUnit *unit;
	DNode *it;

	/*
	   for( i=from; i<to; i++ ) printf( "%s  ", toks[i]->mbs ); printf("\n");
	 */

	i = from;
	while( i < to ){
		DaoToken *tok = toks[i];
		char *chs = tok->string.mbs;
		int tk = tok->name;

#if 0
		printf( "%i %s\n", i, chs );
#endif
		self->curLine = tok->line;
		if( tk == DTOK_LB || tk == DTOK_LSB || tk == DTOK_LCB ){
			grp = DMacroGroup_New();
			grp->cpos = tok->cpos;
			grp->parent = parent;
			DArray_Append( group->units, (void*)grp );
			switch( tk ){
			case DTOK_LB :
				rb = DaoParser_FindPairToken( self, DTOK_LB, DTOK_RB, i, to );
				break;
			case DTOK_LSB :
				rb = DaoParser_FindPairToken( self, DTOK_LSB, DTOK_RSB, i, to );
				grp->repeat = DMACRO_ZERO_OR_ONE;
				break;
			case DTOK_LCB :
				rb = DaoParser_FindPairToken( self, DTOK_LCB, DTOK_RCB, i, to );
				grp->repeat = DMACRO_ZERO_OR_MORE;
				break;
			default :
				rb = -1;
				DaoParser_Error( self, DAO_CTW_INV_MAC_OPEN, & tok->string );
				break;
			}
			if( rb <0 ) return 0;

			prev = i+1;
			sep = DaoParser_FindOpenToken( self, DTOK_PIPE, i+1, rb, 0 );
			if( sep >=0 ){
				while( sep >=0 ){
					group2 = DMacroGroup_New();
					group2->parent = grp;
					if( DaoParser_MakeMacroGroup( self, group2, group2, prev, sep, vars, markers ) == 0 )
						return 0;
					DArray_Append( grp->units, (void*)group2 );
					prev = sep +1;
					sep = DaoParser_FindOpenToken( self, DTOK_PIPE, prev, rb, 0 );
					if( prev < rb && sep <0 ) sep = rb;
				}
				grp->type = DMACRO_ALT;
			}else if( DaoParser_MakeMacroGroup( self, grp, grp, i+1, rb, vars, markers ) == 0 ){
				return 0;
			}
			i = rb +1;
			tok = toks[i];
			self->curLine = tok->line;
			switch( tok->name ){
			case DTOK_NOT   : grp->repeat = DMACRO_ZERO; i++; break;
			case DTOK_QUERY : grp->repeat = DMACRO_ZERO_OR_ONE; i++; break;
			case DTOK_MUL   : grp->repeat = DMACRO_ZERO_OR_MORE; i++; break;
			case DTOK_ADD   : grp->repeat = DMACRO_ONE_OR_MORE; i++; break;
			default : break;
			}
			continue;
		}

		self->curLine = tok->line;
		unit = DMacroUnit_New();
		DaoToken_Assign( unit->marker, tok );
		DArray_Append( group->units, (void*)unit );
		if( chs[0] == '$' ){
			if( DString_FindMBS( & tok->string, "EXP", 0 ) == 1 ){
				unit->type = DMACRO_EXP;
			}else if( DString_FindMBS( & tok->string, "VAR", 0 ) == 1 ){
				unit->type = DMACRO_VAR;
			}else if( DString_FindMBS( & tok->string, "ID", 0 ) == 1 ){
				unit->type = DMACRO_ID;
			}else if( DString_FindMBS( & tok->string, "OP", 0 ) == 1 ){
				unit->type = DMACRO_OP;
			}else if( DString_FindMBS( & tok->string, "BL", 0 ) == 1 ){
				unit->type = DMACRO_BL;
			}else{
				DaoParser_Error( self, DAO_CTW_INV_MAC_SPECTOK, & tok->string );
				return 0;
			}
			if( vars != NULL ){
				it = DMap_Find( vars, & unit->marker->string );
				if( it == NULL ) it = DMap_Insert( vars, & unit->marker->string, 0 );
				it->value.pInt += 1;
			}
		}else if( tk == DTOK_MBS ){
			DaoLexer_Reset( self->wlexer );
			DaoLexer_Tokenize( self->wlexer, chs + 1, 0 );
			if( self->wlexer->tokens->size == 2 ){
				DaoToken_Assign( unit->marker, self->wlexer->tokens->items.pToken[0] );
				if( markers != NULL ){
					it = DMap_Find( markers, & unit->marker->string );
					if( it == NULL ) it = DMap_Insert( markers, & unit->marker->string, 0 );
					it->value.pInt += 1;
				}
			}
			DaoLexer_Reset( self->wlexer );

			rb = -1;
			if( tok->string.size == 3 ){
				switch( chs[1] ){
				case '(': rb = DaoParser_FindPair( self, "'('", "')'", i, to ); break;
				case '[': rb = DaoParser_FindPair( self, "'['", "']'", i, to ); break;
				case '{': rb = DaoParser_FindPair( self, "'{'", "'}'", i, to ); break;
				default : break;
				}
			}
			if( rb >= 0 ){
				grp = DMacroGroup_New();
				grp->parent = group;
				grp->repeat = DMACRO_AUTO;
				DArray_Append( group->units, (void*)grp );
				if( DaoParser_MakeMacroGroup( self, grp, parent, i+1, rb, vars, markers ) == 0 ) return 0;
				i = rb;
				continue;
			}
		}
		i ++;
	}
	return 1;
}