Beispiel #1
0
DMacroUnit* DMacroUnit_New()
{
	DMacroUnit *self = (DMacroUnit*) dao_malloc( sizeof(DMacroUnit) );
	self->type = DMACRO_TOK;
	self->stops = DArray_New(D_TOKEN);
	self->marker = DaoToken_New();
	return self;
}
Beispiel #2
0
DaoToken* DaoToken_Copy( DaoToken *self )
{
	DaoToken* copy = DaoToken_New();
	DaoToken_Assign( copy, self );
	return copy;
}
Beispiel #3
0
int DaoLexer_Tokenize( DaoLexer *self, const char *src, int flags )
{
	DString *source = DString_New(1);
	DVector *lexenvs = DVector_New( sizeof(int) );
	DaoToken *token = DaoToken_New();
	DString *literal = & token->string;
	char ch, *ss, hex[11] = "0x00000000";
	int replace = flags & DAO_LEX_ESCAPE;
	int comment = flags & DAO_LEX_COMMENT;
	int space = flags & DAO_LEX_SPACE;
	int srcSize = (int)strlen( src );
	int old=0, state = TOK_START;
	int lexenv = LEX_ENV_NORMAL;
	int unicoded = 0;
	int line = 1;
	int cpos = 0;
	int ret = 1;
	int it = 0;
	int i, m = 4;

	DString_SetSharing( literal, 0 );
	for(it=0; it<srcSize; it++){
		if( (signed char) src[it] < 0 ){
			unicoded = 1;
			break;
		}
	}
	if( unicoded && daoConfig.mbs == 0 ){
		DString *wcs = DString_New(0);
		/* http://www.cl.cam.ac.uk/~mgk25/ucs/quotes.html */
		wchar_t quotes[] = {
			0x27 , 0x27 , 0x27, /* single q.m. */
			0x22 , 0x22 , 0x22, /* double q.m. */
			0x27 + 0xfee0 , 0x27 + 0xfee0 , 0x27 , /* single q.m. unicode */
			0x22 + 0xfee0 , 0x22 + 0xfee0 , 0x22 , /* double q.m. unicode */
			0x60 , 0x27 , 0x27, /* grave accent */
			0x2018 , 0x2019 , 0x27 , /* left/right single q.m. */
			0x201C , 0x201D , 0x22   /* left/right double q.m. */
		};
		wchar_t sl = L'\\' + 0xfee0;
		wchar_t stop;
		int i, N = 21;
		it = 0;
		DString_SetMBS( wcs, src );
		while( it < wcs->size ){ // TODO: handle verbatim string!
			for( i=0; i<N; i+=3 ){
				if( wcs->wcs[it] == quotes[i] ){
					stop = quotes[i+1];
					wcs->wcs[it] = quotes[i+2];
					it ++;
					while( it < wcs->size && wcs->wcs[it] != stop ){
						if( wcs->wcs[it] == sl || wcs->wcs[it] == L'\\' ){
							it ++;
							continue;
						}
						it ++;
					}
					if( it < wcs->size ) wcs->wcs[it] = quotes[i+2];
					break;
				}
			}
			if( it >= wcs->size ) break;
			if( wcs->wcs[it] == 0x3000 ){
				wcs->wcs[it] = 32; /* blank space */
			}else if( wcs->wcs[it] > 0xff00 && wcs->wcs[it] < 0xff5f ){
				wcs->wcs[it] -= 0xfee0; /* DBC to SBC */
			}
			it ++;
		}
		if( wcs->size ){
			DString_SetWCS( source, wcs->wcs );
			src = source->mbs;
			srcSize = source->size;
		}
		DString_Delete( wcs );
	}
	DaoLexer_Reset( self );

	DVector_PushInt( lexenvs, LEX_ENV_NORMAL );
	it = 0;
	token->cpos = 0;
	while( it < srcSize ){
#if 0
		printf( "tok: %i %i  %i  %c    %s\n", srcSize, it, ch, ch, literal->mbs );
#endif
		token->type = state;
		token->name = 0;
		token->line = line;
		ch = src[it];
		cpos += ch == '\t' ? daoConfig.tabspace : 1;
		if( ch == '\n' ) cpos = 0, line ++;
		if( literal->size == 0 ) token->cpos = cpos;
		if( state == TOK_STRING_MBS || state == TOK_STRING_WCS ){
			if( ch == '\\' ){
				it ++;
				if( replace == 0 ){
					DString_AppendChar( literal, ch );
					if( it < srcSize ){
						if( src[it] == '\n' ) cpos = 0, line ++;
						DString_AppendChar( literal, src[it] );
					}
					it ++;
					continue;
				}
				if( it >= srcSize ){
					ret = 0;
					printf( "error: incomplete string at line %i.\n", line );
					break;
				}
				if( src[it] == '\n' ) cpos = 0, line ++;
				switch( src[it] ){
				case '0' : case '1' : case '2' : case '3' :
				case '4' : case '5' : case '6' : case '7' : /* \ooo */
					i = 2;
					while( i < 5 && it < srcSize && src[it] >= '0' && src[it] < '8' ){
						hex[i] = src[it++];
						hex[++i] = 0;
					}
					DString_AppendChar( literal, (char) strtol( hex+2, NULL, 8 ) );
					it --;
					break;
				case '8' : case '9' :
					DString_AppendChar( literal, (char) (src[it] - '0') );
					break;
				case 'x' :
				case 'u' :
				case 'U' :
					i = 2;
					switch( src[it] ){
					case 'x' : m = 4;  break; /* \xhh: max 2 hex digit; */
					case 'u' : m = 6;  break; /* \uhhhh: max 4 hex digit; */
					case 'U' : m = 10; break; /* \Uhhhhhhhh: max 8 hex digit; */
					}
					while( i < m && (it+1) < srcSize && isxdigit( src[it+1] ) ){
						hex[i] = src[++it];
						hex[++i] = 0;
					}
					DString_AppendWChar( literal, (wchar_t) strtol( hex, NULL, 0 ) );
					break;
				case 't' : DString_AppendChar( literal, '\t' ); break;
				case 'n' : DString_AppendChar( literal, '\n' ); break;
				case 'r' : DString_AppendChar( literal, '\r' ); break;
				case '\'' : DString_AppendChar( literal, '\'' ); break;
				case '\"' : DString_AppendChar( literal, '\"' ); break;
				default : DString_AppendChar( literal, src[it] ); break;
				}
			}else if( ch == '\'' && state == TOK_STRING_MBS ){
				DString_AppendChar( literal, ch );
				state = TOK_RESTART;
				token->type = token->name = DTOK_MBS;
				DaoLexer_AppendToken( self, token );
				DString_Clear( literal );
			}else if( ch == '\"' && state == TOK_STRING_WCS ){
				DString_AppendChar( literal, ch );
				state = TOK_RESTART;
				token->type = token->name = DTOK_WCS;
				DaoLexer_AppendToken( self, token );
				DString_Clear( literal );
			}else{
				DString_AppendChar( literal, ch );
			}
		}else if( ch == ']' && state == TOK_VERBATIM ){
			int len = srcSize - it - 1;
			DString_AppendChar( literal, ']' );
			token->type = token->name = DTOK_VBT_OPEN;
			if( (ss = strstr( src + it + 1, literal->mbs )) != NULL ){
				len = (ss - src) - it - 1 + literal->size;
				token->type = token->name = DTOK_VERBATIM;
			}
			for(i=0; i<len; i++) if( src[it+1+i] == '\n' ) line += 1;
			DString_AppendDataMBS( literal, src + it + 1, len );
			state = TOK_RESTART;
			DaoLexer_AppendToken( self, token );
			DString_Clear( literal );
			it += len;
		}else if( lexenv == LEX_ENV_NORMAL ){
			old = state;
			if( ch >=0 ){
				state = daoLexTable[ state ][ (int)ch ];
			}else if( state <= TOK_START ){
				state = TOK_RESTART;
			}else if( state != TOK_IDENTIFIER && state != TOK_STRING_MBS
					&& state != TOK_STRING_WCS
					&& state != TOK_COMT_LINE && state != TOK_COMT_OPEN ){
				state = TOK_RESTART;
			}
			if( state >= TOK_END ){
				DString_AppendChar( literal, ch );
				token->type = token->name = daoTokenMap[ state ];
				if( token->type == DTOK_ID_THTYPE || token->type == DTOK_ID_SYMBOL )
					token->type = DTOK_IDENTIFIER;
				if( space || comment || token->type != DTOK_COMMENT ){
					if( isspace( token->string.mbs[0] ) )
						token->type = token->name = daoSpaceType[ (int)token->string.mbs[0] ];
					DaoLexer_AppendToken( self, token );
				}
				/* may be a token before the line break; */
				DString_Clear( literal );
				state = TOK_START;
			}else if( state == TOK_RESTART ){
				if( literal->size ){
					if( old == TOK_IDENTIFIER ){
						token->name = dao_key_hash( literal->mbs, literal->size );
						token->type = DTOK_IDENTIFIER;
						if( token->name == 0 ) token->name = DTOK_IDENTIFIER;
						DaoLexer_AppendToken( self, token );
					}else if( old > TOK_RESTART && old != TOK_END ){
						token->type = token->name = daoTokenMap[ old ];
						if( token->type == DTOK_ID_THTYPE || token->type == DTOK_ID_SYMBOL )
							token->type = DTOK_IDENTIFIER;
						DaoLexer_AppendToken( self, token );
					}else if( space ){
						if( isspace( token->string.mbs[0] ) )
							token->type = token->name = daoSpaceType[ (int)token->string.mbs[0] ];
						DaoLexer_AppendToken( self, token );
					}
					DString_Clear( literal );
					token->cpos = cpos;
				}
				DString_AppendChar( literal, ch );
				if( ch >=0 )
					state = daoLexTable[ TOK_START ][ (int)ch ];
				else
					state = TOK_IDENTIFIER;

			}else if( state == TOK_COMT_OPEN ){
				DString_AppendChar( literal, ch );
				lexenv = LEX_ENV_COMMENT;
				DVector_PushInt( lexenvs, LEX_ENV_COMMENT );
			}else{
				DString_AppendChar( literal, ch );
			}
		}else if( lexenv == LEX_ENV_COMMENT ){
			DString_AppendChar( literal, ch );
			if( ch == '#' ){
				state = TOK_OP_SHARP;
			}else if( ch == '{' && state == TOK_OP_SHARP ){
				state = TOK_COMT_OPEN;
				DVector_PushInt( lexenvs, LEX_ENV_COMMENT );
			}else if( ch == '}' && state == TOK_OP_SHARP ){
				state = TOK_COMT_CLOSE;
				DVector_Pop( lexenvs );
				lexenv = lexenvs->data.ints[lexenvs->size-1];
				if( lexenv != LEX_ENV_COMMENT ){
					token->type = token->name = DTOK_COMMENT;
					if( comment ) DaoLexer_AppendToken( self, token );
					DString_Clear( literal );
					state = TOK_RESTART;
				}
			}else{
				state = TOK_START;
			}
		}
		it ++;
	}
	if( literal->size ){
		token->type = token->name = daoTokenMap[ state ];
		if( lexenv == LEX_ENV_COMMENT ) token->type = token->name = DTOK_CMT_OPEN;
		switch( state ){
		case TOK_STRING_MBS : token->type = token->name = DTOK_MBS_OPEN; break;
		case TOK_STRING_WCS : token->type = token->name = DTOK_WCS_OPEN; break;
		}
		if( token->type == DTOK_IDENTIFIER ){
			token->name = dao_key_hash( literal->mbs, literal->size );
			if( token->name == 0 ) token->name = DTOK_IDENTIFIER;
		}else if( token->type == DTOK_ID_THTYPE || token->type == DTOK_ID_SYMBOL ){
			token->type = DTOK_IDENTIFIER;
		}
		if( token->type || space ){
			if( isspace( token->string.mbs[0] ) )
				token->type = token->name = daoSpaceType[ (int)token->string.mbs[0] ];
			DaoLexer_AppendToken( self, token );
		}
	}
	DaoToken_Delete( token );
	DVector_Delete( lexenvs );
	DString_Delete( source );
#if 0
	for(i=0; i<self->tokens->size; i++){
		DaoToken *tk = self->tokens->items.pToken[i];
		printf( "%4i: %4i %4i , %4i,  %s\n", i, tk->type, tk->name, tk->cpos, tk->string.mbs );
	}
#endif
	return ret ? line : 0;
}
Beispiel #4
0
static int DaoParser_MacroApply( DaoParser *self, DArray *tokens,
		DMacroGroup *group, DMap *tokMap, DMap *used,
		int level, DString *tag, int pos0, int adjust )
{
	DMacroUnit **units = (DMacroUnit**) group->units->items.pVoid;
	DMacroUnit  *unit;
	DMacroGroup *grp;
	DMacroNode *node, *node2;
	DArray *toks = DArray_New(D_TOKEN);
	DaoToken *tk = DaoToken_New();
	DaoToken *tt = NULL;
	DNode  *kwnode = NULL;
	DMap *check = NULL;
	DMap one = { NULL, 0, 0, 0 };
	int M, N = group->units->size;
	int i, j, gid = -1;
	int repeated = 0;
	int start_mbs = -1;
	int start_wcs = -1;
	int squote, dquote;

	if( group->repeat != DMACRO_AUTO ) level ++;

	for( i=0; i<N; i++ ){
		unit = units[i];
		if( tokens->size >0 ) pos0 = tokens->items.pToken[ tokens->size -1 ]->line;
		self->curLine = pos0;
		/*
		   printf( "apply unit %i: %i\n", i, unit->type );
		 */
		switch( unit->type ){
		case DMACRO_TOK :
			squote = unit->marker->type == DTOK_ESC_SQUO;
			dquote = unit->marker->type == DTOK_ESC_DQUO;
			if( (squote && start_mbs >=0) || (dquote && start_wcs >=0) ){
				int qstart = squote ? start_mbs : start_wcs;
				tt = tokens->items.pToken[ qstart ];
				for(j=qstart+1,M=tokens->size; j<M; j++){
					DaoToken *jtok = tokens->items.pToken[j];
					int t = j ? tokens->items.pToken[j-1]->type : 0;
					if( t == DTOK_IDENTIFIER && jtok->type == t )
						DString_AppendChar( & tt->string, ' ' );
					DString_Append( & tt->string, & jtok->string );
				}
				if( squote ){
					DString_AppendChar( & tt->string, '\'' );
					DArray_Erase( tokens, start_mbs+1, tokens->size );
				}else{
					DString_AppendChar( & tt->string, '\"' );
					DArray_Erase( tokens, start_wcs+1, tokens->size );
				}
				start_mbs = -1;
				break;
			}else if( squote ){
				start_mbs = tokens->size;
				DArray_Append( tokens, unit->marker );
				tt = tokens->items.pToken[ start_mbs ];
				tt->type = tt->name = DTOK_MBS;
				DString_SetMBS( & tt->string, "\'" );
				break;
			}else if( dquote ){
				start_wcs = tokens->size;
				DArray_Append( tokens, unit->marker );
				tt = tokens->items.pToken[ start_wcs ];
				tt->type = tt->name = DTOK_WCS;
				DString_SetMBS( & tt->string, "\"" );
				break;
			}
			DArray_Append( tokens, unit->marker );
			tokens->items.pToken[ tokens->size-1 ]->cpos += adjust;
			break;
		case DMACRO_VAR :
			DaoToken_Assign( tk, unit->marker );
			DString_Append( & tk->string, tag );
			DArray_Append( tokens, tk );
			break;
		case DMACRO_EXP :
		case DMACRO_ID :
		case DMACRO_OP :
		case DMACRO_BL :
		case DMACRO_IBL :

			kwnode = MAP_Find( tokMap, & unit->marker->string );
			if( kwnode ==NULL ){
				DaoParser_Error( self, DAO_CTW_UNDEF_MAC_MARKER, & unit->marker->string );
				goto Failed;
			}
			node = (DMacroNode*) kwnode->value.pVoid;
			kwnode = MAP_Find( used, unit );
			if( kwnode == NULL ){
				DMap_Insert( used, unit, & one );
				kwnode = MAP_Find( used, unit );
			}
			check = (DMap*) kwnode->value.pVoid;
			repeated = 1;

			/*
			   printf( ">>>\n%s level %i: \n", unit->marker->string.mbs, level );
			   DMacroNode_Print( node );
			   printf( "\n" );
			 */
			/* search a leaf */
			node2 = DMacroNode_FindLeaf( node, check, level );
			if( node2 ){
				/*
				   printf( "appending tokens\n" );
				   DMacroNode_Print( node2 );
				   printf( "\n" );
				 */
				DArray_InsertArray( tokens, tokens->size, node2->leaves, 0, -1 );
				DMap_Insert( check, node2, NULL );
				/* DArray_Clear( node2->leaves ); */
			}else{
				DMacroNode_RemoveEmptyLeftBranch( node, level );
				goto Failed;
			}
			break;
		case DMACRO_GRP :
		case DMACRO_ALT :
			grp = (DMacroGroup*) unit;
			DArray_Clear( toks );
			j = DaoParser_MacroApply( self, toks, grp, tokMap, used, level, tag, pos0, adjust );
			switch( grp->repeat ){
			case DMACRO_AUTO :
			case DMACRO_ONE :
				if( j <0 && group->type != DMACRO_ALT ) goto Failed;
				repeated = (j>0);
				if( j >=0 ){
					gid = i;
					DArray_InsertArray( tokens, tokens->size, toks, 0, -1 );
				}
				break;
			case DMACRO_ZERO_OR_ONE :
				gid = i;
				repeated = (j>0);
				if( j >=0 ){
					DArray_InsertArray( tokens, tokens->size, toks, 0, -1 );
				}
				break;
			case DMACRO_ZERO_OR_MORE :
				gid = i;
				repeated = (j>0);
				if( j >=0 ){
					DArray_InsertArray( tokens, tokens->size, toks, 0, -1 );
				}
				while( j >0 ){
					DArray_Clear( toks );
					j = DaoParser_MacroApply( self, toks, grp, tokMap, used, level, tag, pos0, adjust );
					if( j >0 ){
						DArray_InsertArray( tokens, tokens->size, toks, 0, -1 );
					}
				}
				break;
			case DMACRO_ONE_OR_MORE :
				if( j <0 && group->type != DMACRO_ALT ) goto Failed;
				repeated = (j>0);
				if( j >=0 ){
					DArray_InsertArray( tokens, tokens->size, toks, 0, -1 );
				}

				while( j >0 ){
					gid = i;
					DArray_Clear( toks );
					j = DaoParser_MacroApply( self, toks, grp, tokMap, used, level, tag, pos0, adjust );
					if( j >0 ){
						DArray_InsertArray( tokens, tokens->size, toks, 0, -1 );
					}
				}
				break;
			}
			break;
		default : goto Failed;
		}
		if( group->type == DMACRO_ALT && gid >=0 ) break;
	}
	if( group->repeat != DMACRO_AUTO ) level --;
	if( group->type == DMACRO_ALT && gid <0 ) goto Failed;
	DaoToken_Delete( tk );
	DArray_Delete( toks );
	return repeated;
Failed :
	DaoToken_Delete( tk );
	DArray_Delete( toks );
	return -1;
}