Esempio n. 1
0
void DaoStream_WriteWCS( DaoStream *self, const wchar_t *val )
{
	const char *format = self->format;
	if( format == NULL ) format = "%ls";
	if( self->redirect && self->redirect->StdioWrite ){
		DString *mbs = DString_New(1);
		DString_SetWCS( mbs, val );
		self->redirect->StdioWrite( self->redirect, mbs );
		DString_Delete( mbs );
	}else if( self->file ){
		fprintf( self->file, format, val );
	}else if( self->attribs & DAO_IO_STRING ){
		DString_AppendWCS( self->streamString, val );
	}else{
		printf( format, val );
	}
}
Esempio n. 2
0
int DaoLexer_Tokenize( DaoLexer *self, const char *src, int flags )
{
	DString *source = DString_New(1);
	DVector *lexenvs = DVector_New( sizeof(int) );
	DaoToken *token = DaoToken_New();
	DString *literal = & token->string;
	char ch, *ss, hex[11] = "0x00000000";
	int replace = flags & DAO_LEX_ESCAPE;
	int comment = flags & DAO_LEX_COMMENT;
	int space = flags & DAO_LEX_SPACE;
	int srcSize = (int)strlen( src );
	int old=0, state = TOK_START;
	int lexenv = LEX_ENV_NORMAL;
	int unicoded = 0;
	int line = 1;
	int cpos = 0;
	int ret = 1;
	int it = 0;
	int i, m = 4;

	DString_SetSharing( literal, 0 );
	for(it=0; it<srcSize; it++){
		if( (signed char) src[it] < 0 ){
			unicoded = 1;
			break;
		}
	}
	if( unicoded && daoConfig.mbs == 0 ){
		DString *wcs = DString_New(0);
		/* http://www.cl.cam.ac.uk/~mgk25/ucs/quotes.html */
		wchar_t quotes[] = {
			0x27 , 0x27 , 0x27, /* single q.m. */
			0x22 , 0x22 , 0x22, /* double q.m. */
			0x27 + 0xfee0 , 0x27 + 0xfee0 , 0x27 , /* single q.m. unicode */
			0x22 + 0xfee0 , 0x22 + 0xfee0 , 0x22 , /* double q.m. unicode */
			0x60 , 0x27 , 0x27, /* grave accent */
			0x2018 , 0x2019 , 0x27 , /* left/right single q.m. */
			0x201C , 0x201D , 0x22   /* left/right double q.m. */
		};
		wchar_t sl = L'\\' + 0xfee0;
		wchar_t stop;
		int i, N = 21;
		it = 0;
		DString_SetMBS( wcs, src );
		while( it < wcs->size ){ // TODO: handle verbatim string!
			for( i=0; i<N; i+=3 ){
				if( wcs->wcs[it] == quotes[i] ){
					stop = quotes[i+1];
					wcs->wcs[it] = quotes[i+2];
					it ++;
					while( it < wcs->size && wcs->wcs[it] != stop ){
						if( wcs->wcs[it] == sl || wcs->wcs[it] == L'\\' ){
							it ++;
							continue;
						}
						it ++;
					}
					if( it < wcs->size ) wcs->wcs[it] = quotes[i+2];
					break;
				}
			}
			if( it >= wcs->size ) break;
			if( wcs->wcs[it] == 0x3000 ){
				wcs->wcs[it] = 32; /* blank space */
			}else if( wcs->wcs[it] > 0xff00 && wcs->wcs[it] < 0xff5f ){
				wcs->wcs[it] -= 0xfee0; /* DBC to SBC */
			}
			it ++;
		}
		if( wcs->size ){
			DString_SetWCS( source, wcs->wcs );
			src = source->mbs;
			srcSize = source->size;
		}
		DString_Delete( wcs );
	}
	DaoLexer_Reset( self );

	DVector_PushInt( lexenvs, LEX_ENV_NORMAL );
	it = 0;
	token->cpos = 0;
	while( it < srcSize ){
#if 0
		printf( "tok: %i %i  %i  %c    %s\n", srcSize, it, ch, ch, literal->mbs );
#endif
		token->type = state;
		token->name = 0;
		token->line = line;
		ch = src[it];
		cpos += ch == '\t' ? daoConfig.tabspace : 1;
		if( ch == '\n' ) cpos = 0, line ++;
		if( literal->size == 0 ) token->cpos = cpos;
		if( state == TOK_STRING_MBS || state == TOK_STRING_WCS ){
			if( ch == '\\' ){
				it ++;
				if( replace == 0 ){
					DString_AppendChar( literal, ch );
					if( it < srcSize ){
						if( src[it] == '\n' ) cpos = 0, line ++;
						DString_AppendChar( literal, src[it] );
					}
					it ++;
					continue;
				}
				if( it >= srcSize ){
					ret = 0;
					printf( "error: incomplete string at line %i.\n", line );
					break;
				}
				if( src[it] == '\n' ) cpos = 0, line ++;
				switch( src[it] ){
				case '0' : case '1' : case '2' : case '3' :
				case '4' : case '5' : case '6' : case '7' : /* \ooo */
					i = 2;
					while( i < 5 && it < srcSize && src[it] >= '0' && src[it] < '8' ){
						hex[i] = src[it++];
						hex[++i] = 0;
					}
					DString_AppendChar( literal, (char) strtol( hex+2, NULL, 8 ) );
					it --;
					break;
				case '8' : case '9' :
					DString_AppendChar( literal, (char) (src[it] - '0') );
					break;
				case 'x' :
				case 'u' :
				case 'U' :
					i = 2;
					switch( src[it] ){
					case 'x' : m = 4;  break; /* \xhh: max 2 hex digit; */
					case 'u' : m = 6;  break; /* \uhhhh: max 4 hex digit; */
					case 'U' : m = 10; break; /* \Uhhhhhhhh: max 8 hex digit; */
					}
					while( i < m && (it+1) < srcSize && isxdigit( src[it+1] ) ){
						hex[i] = src[++it];
						hex[++i] = 0;
					}
					DString_AppendWChar( literal, (wchar_t) strtol( hex, NULL, 0 ) );
					break;
				case 't' : DString_AppendChar( literal, '\t' ); break;
				case 'n' : DString_AppendChar( literal, '\n' ); break;
				case 'r' : DString_AppendChar( literal, '\r' ); break;
				case '\'' : DString_AppendChar( literal, '\'' ); break;
				case '\"' : DString_AppendChar( literal, '\"' ); break;
				default : DString_AppendChar( literal, src[it] ); break;
				}
			}else if( ch == '\'' && state == TOK_STRING_MBS ){
				DString_AppendChar( literal, ch );
				state = TOK_RESTART;
				token->type = token->name = DTOK_MBS;
				DaoLexer_AppendToken( self, token );
				DString_Clear( literal );
			}else if( ch == '\"' && state == TOK_STRING_WCS ){
				DString_AppendChar( literal, ch );
				state = TOK_RESTART;
				token->type = token->name = DTOK_WCS;
				DaoLexer_AppendToken( self, token );
				DString_Clear( literal );
			}else{
				DString_AppendChar( literal, ch );
			}
		}else if( ch == ']' && state == TOK_VERBATIM ){
			int len = srcSize - it - 1;
			DString_AppendChar( literal, ']' );
			token->type = token->name = DTOK_VBT_OPEN;
			if( (ss = strstr( src + it + 1, literal->mbs )) != NULL ){
				len = (ss - src) - it - 1 + literal->size;
				token->type = token->name = DTOK_VERBATIM;
			}
			for(i=0; i<len; i++) if( src[it+1+i] == '\n' ) line += 1;
			DString_AppendDataMBS( literal, src + it + 1, len );
			state = TOK_RESTART;
			DaoLexer_AppendToken( self, token );
			DString_Clear( literal );
			it += len;
		}else if( lexenv == LEX_ENV_NORMAL ){
			old = state;
			if( ch >=0 ){
				state = daoLexTable[ state ][ (int)ch ];
			}else if( state <= TOK_START ){
				state = TOK_RESTART;
			}else if( state != TOK_IDENTIFIER && state != TOK_STRING_MBS
					&& state != TOK_STRING_WCS
					&& state != TOK_COMT_LINE && state != TOK_COMT_OPEN ){
				state = TOK_RESTART;
			}
			if( state >= TOK_END ){
				DString_AppendChar( literal, ch );
				token->type = token->name = daoTokenMap[ state ];
				if( token->type == DTOK_ID_THTYPE || token->type == DTOK_ID_SYMBOL )
					token->type = DTOK_IDENTIFIER;
				if( space || comment || token->type != DTOK_COMMENT ){
					if( isspace( token->string.mbs[0] ) )
						token->type = token->name = daoSpaceType[ (int)token->string.mbs[0] ];
					DaoLexer_AppendToken( self, token );
				}
				/* may be a token before the line break; */
				DString_Clear( literal );
				state = TOK_START;
			}else if( state == TOK_RESTART ){
				if( literal->size ){
					if( old == TOK_IDENTIFIER ){
						token->name = dao_key_hash( literal->mbs, literal->size );
						token->type = DTOK_IDENTIFIER;
						if( token->name == 0 ) token->name = DTOK_IDENTIFIER;
						DaoLexer_AppendToken( self, token );
					}else if( old > TOK_RESTART && old != TOK_END ){
						token->type = token->name = daoTokenMap[ old ];
						if( token->type == DTOK_ID_THTYPE || token->type == DTOK_ID_SYMBOL )
							token->type = DTOK_IDENTIFIER;
						DaoLexer_AppendToken( self, token );
					}else if( space ){
						if( isspace( token->string.mbs[0] ) )
							token->type = token->name = daoSpaceType[ (int)token->string.mbs[0] ];
						DaoLexer_AppendToken( self, token );
					}
					DString_Clear( literal );
					token->cpos = cpos;
				}
				DString_AppendChar( literal, ch );
				if( ch >=0 )
					state = daoLexTable[ TOK_START ][ (int)ch ];
				else
					state = TOK_IDENTIFIER;

			}else if( state == TOK_COMT_OPEN ){
				DString_AppendChar( literal, ch );
				lexenv = LEX_ENV_COMMENT;
				DVector_PushInt( lexenvs, LEX_ENV_COMMENT );
			}else{
				DString_AppendChar( literal, ch );
			}
		}else if( lexenv == LEX_ENV_COMMENT ){
			DString_AppendChar( literal, ch );
			if( ch == '#' ){
				state = TOK_OP_SHARP;
			}else if( ch == '{' && state == TOK_OP_SHARP ){
				state = TOK_COMT_OPEN;
				DVector_PushInt( lexenvs, LEX_ENV_COMMENT );
			}else if( ch == '}' && state == TOK_OP_SHARP ){
				state = TOK_COMT_CLOSE;
				DVector_Pop( lexenvs );
				lexenv = lexenvs->data.ints[lexenvs->size-1];
				if( lexenv != LEX_ENV_COMMENT ){
					token->type = token->name = DTOK_COMMENT;
					if( comment ) DaoLexer_AppendToken( self, token );
					DString_Clear( literal );
					state = TOK_RESTART;
				}
			}else{
				state = TOK_START;
			}
		}
		it ++;
	}
	if( literal->size ){
		token->type = token->name = daoTokenMap[ state ];
		if( lexenv == LEX_ENV_COMMENT ) token->type = token->name = DTOK_CMT_OPEN;
		switch( state ){
		case TOK_STRING_MBS : token->type = token->name = DTOK_MBS_OPEN; break;
		case TOK_STRING_WCS : token->type = token->name = DTOK_WCS_OPEN; break;
		}
		if( token->type == DTOK_IDENTIFIER ){
			token->name = dao_key_hash( literal->mbs, literal->size );
			if( token->name == 0 ) token->name = DTOK_IDENTIFIER;
		}else if( token->type == DTOK_ID_THTYPE || token->type == DTOK_ID_SYMBOL ){
			token->type = DTOK_IDENTIFIER;
		}
		if( token->type || space ){
			if( isspace( token->string.mbs[0] ) )
				token->type = token->name = daoSpaceType[ (int)token->string.mbs[0] ];
			DaoLexer_AppendToken( self, token );
		}
	}
	DaoToken_Delete( token );
	DVector_Delete( lexenvs );
	DString_Delete( source );
#if 0
	for(i=0; i<self->tokens->size; i++){
		DaoToken *tk = self->tokens->items.pToken[i];
		printf( "%4i: %4i %4i , %4i,  %s\n", i, tk->type, tk->name, tk->cpos, tk->string.mbs );
	}
#endif
	return ret ? line : 0;
}