void DaoStream_WriteWCS( DaoStream *self, const wchar_t *val ) { const char *format = self->format; if( format == NULL ) format = "%ls"; if( self->redirect && self->redirect->StdioWrite ){ DString *mbs = DString_New(1); DString_SetWCS( mbs, val ); self->redirect->StdioWrite( self->redirect, mbs ); DString_Delete( mbs ); }else if( self->file ){ fprintf( self->file, format, val ); }else if( self->attribs & DAO_IO_STRING ){ DString_AppendWCS( self->streamString, val ); }else{ printf( format, val ); } }
int DaoLexer_Tokenize( DaoLexer *self, const char *src, int flags ) { DString *source = DString_New(1); DVector *lexenvs = DVector_New( sizeof(int) ); DaoToken *token = DaoToken_New(); DString *literal = & token->string; char ch, *ss, hex[11] = "0x00000000"; int replace = flags & DAO_LEX_ESCAPE; int comment = flags & DAO_LEX_COMMENT; int space = flags & DAO_LEX_SPACE; int srcSize = (int)strlen( src ); int old=0, state = TOK_START; int lexenv = LEX_ENV_NORMAL; int unicoded = 0; int line = 1; int cpos = 0; int ret = 1; int it = 0; int i, m = 4; DString_SetSharing( literal, 0 ); for(it=0; it<srcSize; it++){ if( (signed char) src[it] < 0 ){ unicoded = 1; break; } } if( unicoded && daoConfig.mbs == 0 ){ DString *wcs = DString_New(0); /* http://www.cl.cam.ac.uk/~mgk25/ucs/quotes.html */ wchar_t quotes[] = { 0x27 , 0x27 , 0x27, /* single q.m. */ 0x22 , 0x22 , 0x22, /* double q.m. */ 0x27 + 0xfee0 , 0x27 + 0xfee0 , 0x27 , /* single q.m. unicode */ 0x22 + 0xfee0 , 0x22 + 0xfee0 , 0x22 , /* double q.m. unicode */ 0x60 , 0x27 , 0x27, /* grave accent */ 0x2018 , 0x2019 , 0x27 , /* left/right single q.m. */ 0x201C , 0x201D , 0x22 /* left/right double q.m. */ }; wchar_t sl = L'\\' + 0xfee0; wchar_t stop; int i, N = 21; it = 0; DString_SetMBS( wcs, src ); while( it < wcs->size ){ // TODO: handle verbatim string! for( i=0; i<N; i+=3 ){ if( wcs->wcs[it] == quotes[i] ){ stop = quotes[i+1]; wcs->wcs[it] = quotes[i+2]; it ++; while( it < wcs->size && wcs->wcs[it] != stop ){ if( wcs->wcs[it] == sl || wcs->wcs[it] == L'\\' ){ it ++; continue; } it ++; } if( it < wcs->size ) wcs->wcs[it] = quotes[i+2]; break; } } if( it >= wcs->size ) break; if( wcs->wcs[it] == 0x3000 ){ wcs->wcs[it] = 32; /* blank space */ }else if( wcs->wcs[it] > 0xff00 && wcs->wcs[it] < 0xff5f ){ wcs->wcs[it] -= 0xfee0; /* DBC to SBC */ } it ++; } if( wcs->size ){ DString_SetWCS( source, wcs->wcs ); src = source->mbs; srcSize = source->size; } DString_Delete( wcs ); } DaoLexer_Reset( self ); DVector_PushInt( lexenvs, LEX_ENV_NORMAL ); it = 0; token->cpos = 0; while( it < srcSize ){ #if 0 printf( "tok: %i %i %i %c %s\n", srcSize, it, ch, ch, literal->mbs ); #endif token->type = state; token->name = 0; token->line = line; ch = src[it]; cpos += ch == '\t' ? daoConfig.tabspace : 1; if( ch == '\n' ) cpos = 0, line ++; if( literal->size == 0 ) token->cpos = cpos; if( state == TOK_STRING_MBS || state == TOK_STRING_WCS ){ if( ch == '\\' ){ it ++; if( replace == 0 ){ DString_AppendChar( literal, ch ); if( it < srcSize ){ if( src[it] == '\n' ) cpos = 0, line ++; DString_AppendChar( literal, src[it] ); } it ++; continue; } if( it >= srcSize ){ ret = 0; printf( "error: incomplete string at line %i.\n", line ); break; } if( src[it] == '\n' ) cpos = 0, line ++; switch( src[it] ){ case '0' : case '1' : case '2' : case '3' : case '4' : case '5' : case '6' : case '7' : /* \ooo */ i = 2; while( i < 5 && it < srcSize && src[it] >= '0' && src[it] < '8' ){ hex[i] = src[it++]; hex[++i] = 0; } DString_AppendChar( literal, (char) strtol( hex+2, NULL, 8 ) ); it --; break; case '8' : case '9' : DString_AppendChar( literal, (char) (src[it] - '0') ); break; case 'x' : case 'u' : case 'U' : i = 2; switch( src[it] ){ case 'x' : m = 4; break; /* \xhh: max 2 hex digit; */ case 'u' : m = 6; break; /* \uhhhh: max 4 hex digit; */ case 'U' : m = 10; break; /* \Uhhhhhhhh: max 8 hex digit; */ } while( i < m && (it+1) < srcSize && isxdigit( src[it+1] ) ){ hex[i] = src[++it]; hex[++i] = 0; } DString_AppendWChar( literal, (wchar_t) strtol( hex, NULL, 0 ) ); break; case 't' : DString_AppendChar( literal, '\t' ); break; case 'n' : DString_AppendChar( literal, '\n' ); break; case 'r' : DString_AppendChar( literal, '\r' ); break; case '\'' : DString_AppendChar( literal, '\'' ); break; case '\"' : DString_AppendChar( literal, '\"' ); break; default : DString_AppendChar( literal, src[it] ); break; } }else if( ch == '\'' && state == TOK_STRING_MBS ){ DString_AppendChar( literal, ch ); state = TOK_RESTART; token->type = token->name = DTOK_MBS; DaoLexer_AppendToken( self, token ); DString_Clear( literal ); }else if( ch == '\"' && state == TOK_STRING_WCS ){ DString_AppendChar( literal, ch ); state = TOK_RESTART; token->type = token->name = DTOK_WCS; DaoLexer_AppendToken( self, token ); DString_Clear( literal ); }else{ DString_AppendChar( literal, ch ); } }else if( ch == ']' && state == TOK_VERBATIM ){ int len = srcSize - it - 1; DString_AppendChar( literal, ']' ); token->type = token->name = DTOK_VBT_OPEN; if( (ss = strstr( src + it + 1, literal->mbs )) != NULL ){ len = (ss - src) - it - 1 + literal->size; token->type = token->name = DTOK_VERBATIM; } for(i=0; i<len; i++) if( src[it+1+i] == '\n' ) line += 1; DString_AppendDataMBS( literal, src + it + 1, len ); state = TOK_RESTART; DaoLexer_AppendToken( self, token ); DString_Clear( literal ); it += len; }else if( lexenv == LEX_ENV_NORMAL ){ old = state; if( ch >=0 ){ state = daoLexTable[ state ][ (int)ch ]; }else if( state <= TOK_START ){ state = TOK_RESTART; }else if( state != TOK_IDENTIFIER && state != TOK_STRING_MBS && state != TOK_STRING_WCS && state != TOK_COMT_LINE && state != TOK_COMT_OPEN ){ state = TOK_RESTART; } if( state >= TOK_END ){ DString_AppendChar( literal, ch ); token->type = token->name = daoTokenMap[ state ]; if( token->type == DTOK_ID_THTYPE || token->type == DTOK_ID_SYMBOL ) token->type = DTOK_IDENTIFIER; if( space || comment || token->type != DTOK_COMMENT ){ if( isspace( token->string.mbs[0] ) ) token->type = token->name = daoSpaceType[ (int)token->string.mbs[0] ]; DaoLexer_AppendToken( self, token ); } /* may be a token before the line break; */ DString_Clear( literal ); state = TOK_START; }else if( state == TOK_RESTART ){ if( literal->size ){ if( old == TOK_IDENTIFIER ){ token->name = dao_key_hash( literal->mbs, literal->size ); token->type = DTOK_IDENTIFIER; if( token->name == 0 ) token->name = DTOK_IDENTIFIER; DaoLexer_AppendToken( self, token ); }else if( old > TOK_RESTART && old != TOK_END ){ token->type = token->name = daoTokenMap[ old ]; if( token->type == DTOK_ID_THTYPE || token->type == DTOK_ID_SYMBOL ) token->type = DTOK_IDENTIFIER; DaoLexer_AppendToken( self, token ); }else if( space ){ if( isspace( token->string.mbs[0] ) ) token->type = token->name = daoSpaceType[ (int)token->string.mbs[0] ]; DaoLexer_AppendToken( self, token ); } DString_Clear( literal ); token->cpos = cpos; } DString_AppendChar( literal, ch ); if( ch >=0 ) state = daoLexTable[ TOK_START ][ (int)ch ]; else state = TOK_IDENTIFIER; }else if( state == TOK_COMT_OPEN ){ DString_AppendChar( literal, ch ); lexenv = LEX_ENV_COMMENT; DVector_PushInt( lexenvs, LEX_ENV_COMMENT ); }else{ DString_AppendChar( literal, ch ); } }else if( lexenv == LEX_ENV_COMMENT ){ DString_AppendChar( literal, ch ); if( ch == '#' ){ state = TOK_OP_SHARP; }else if( ch == '{' && state == TOK_OP_SHARP ){ state = TOK_COMT_OPEN; DVector_PushInt( lexenvs, LEX_ENV_COMMENT ); }else if( ch == '}' && state == TOK_OP_SHARP ){ state = TOK_COMT_CLOSE; DVector_Pop( lexenvs ); lexenv = lexenvs->data.ints[lexenvs->size-1]; if( lexenv != LEX_ENV_COMMENT ){ token->type = token->name = DTOK_COMMENT; if( comment ) DaoLexer_AppendToken( self, token ); DString_Clear( literal ); state = TOK_RESTART; } }else{ state = TOK_START; } } it ++; } if( literal->size ){ token->type = token->name = daoTokenMap[ state ]; if( lexenv == LEX_ENV_COMMENT ) token->type = token->name = DTOK_CMT_OPEN; switch( state ){ case TOK_STRING_MBS : token->type = token->name = DTOK_MBS_OPEN; break; case TOK_STRING_WCS : token->type = token->name = DTOK_WCS_OPEN; break; } if( token->type == DTOK_IDENTIFIER ){ token->name = dao_key_hash( literal->mbs, literal->size ); if( token->name == 0 ) token->name = DTOK_IDENTIFIER; }else if( token->type == DTOK_ID_THTYPE || token->type == DTOK_ID_SYMBOL ){ token->type = DTOK_IDENTIFIER; } if( token->type || space ){ if( isspace( token->string.mbs[0] ) ) token->type = token->name = daoSpaceType[ (int)token->string.mbs[0] ]; DaoLexer_AppendToken( self, token ); } } DaoToken_Delete( token ); DVector_Delete( lexenvs ); DString_Delete( source ); #if 0 for(i=0; i<self->tokens->size; i++){ DaoToken *tk = self->tokens->items.pToken[i]; printf( "%4i: %4i %4i , %4i, %s\n", i, tk->type, tk->name, tk->cpos, tk->string.mbs ); } #endif return ret ? line : 0; }