DMacroUnit* DMacroUnit_New() { DMacroUnit *self = (DMacroUnit*) dao_malloc( sizeof(DMacroUnit) ); self->type = DMACRO_TOK; self->stops = DArray_New(D_TOKEN); self->marker = DaoToken_New(); return self; }
DaoToken* DaoToken_Copy( DaoToken *self ) { DaoToken* copy = DaoToken_New(); DaoToken_Assign( copy, self ); return copy; }
int DaoLexer_Tokenize( DaoLexer *self, const char *src, int flags ) { DString *source = DString_New(1); DVector *lexenvs = DVector_New( sizeof(int) ); DaoToken *token = DaoToken_New(); DString *literal = & token->string; char ch, *ss, hex[11] = "0x00000000"; int replace = flags & DAO_LEX_ESCAPE; int comment = flags & DAO_LEX_COMMENT; int space = flags & DAO_LEX_SPACE; int srcSize = (int)strlen( src ); int old=0, state = TOK_START; int lexenv = LEX_ENV_NORMAL; int unicoded = 0; int line = 1; int cpos = 0; int ret = 1; int it = 0; int i, m = 4; DString_SetSharing( literal, 0 ); for(it=0; it<srcSize; it++){ if( (signed char) src[it] < 0 ){ unicoded = 1; break; } } if( unicoded && daoConfig.mbs == 0 ){ DString *wcs = DString_New(0); /* http://www.cl.cam.ac.uk/~mgk25/ucs/quotes.html */ wchar_t quotes[] = { 0x27 , 0x27 , 0x27, /* single q.m. */ 0x22 , 0x22 , 0x22, /* double q.m. */ 0x27 + 0xfee0 , 0x27 + 0xfee0 , 0x27 , /* single q.m. unicode */ 0x22 + 0xfee0 , 0x22 + 0xfee0 , 0x22 , /* double q.m. unicode */ 0x60 , 0x27 , 0x27, /* grave accent */ 0x2018 , 0x2019 , 0x27 , /* left/right single q.m. */ 0x201C , 0x201D , 0x22 /* left/right double q.m. */ }; wchar_t sl = L'\\' + 0xfee0; wchar_t stop; int i, N = 21; it = 0; DString_SetMBS( wcs, src ); while( it < wcs->size ){ // TODO: handle verbatim string! for( i=0; i<N; i+=3 ){ if( wcs->wcs[it] == quotes[i] ){ stop = quotes[i+1]; wcs->wcs[it] = quotes[i+2]; it ++; while( it < wcs->size && wcs->wcs[it] != stop ){ if( wcs->wcs[it] == sl || wcs->wcs[it] == L'\\' ){ it ++; continue; } it ++; } if( it < wcs->size ) wcs->wcs[it] = quotes[i+2]; break; } } if( it >= wcs->size ) break; if( wcs->wcs[it] == 0x3000 ){ wcs->wcs[it] = 32; /* blank space */ }else if( wcs->wcs[it] > 0xff00 && wcs->wcs[it] < 0xff5f ){ wcs->wcs[it] -= 0xfee0; /* DBC to SBC */ } it ++; } if( wcs->size ){ DString_SetWCS( source, wcs->wcs ); src = source->mbs; srcSize = source->size; } DString_Delete( wcs ); } DaoLexer_Reset( self ); DVector_PushInt( lexenvs, LEX_ENV_NORMAL ); it = 0; token->cpos = 0; while( it < srcSize ){ #if 0 printf( "tok: %i %i %i %c %s\n", srcSize, it, ch, ch, literal->mbs ); #endif token->type = state; token->name = 0; token->line = line; ch = src[it]; cpos += ch == '\t' ? daoConfig.tabspace : 1; if( ch == '\n' ) cpos = 0, line ++; if( literal->size == 0 ) token->cpos = cpos; if( state == TOK_STRING_MBS || state == TOK_STRING_WCS ){ if( ch == '\\' ){ it ++; if( replace == 0 ){ DString_AppendChar( literal, ch ); if( it < srcSize ){ if( src[it] == '\n' ) cpos = 0, line ++; DString_AppendChar( literal, src[it] ); } it ++; continue; } if( it >= srcSize ){ ret = 0; printf( "error: incomplete string at line %i.\n", line ); break; } if( src[it] == '\n' ) cpos = 0, line ++; switch( src[it] ){ case '0' : case '1' : case '2' : case '3' : case '4' : case '5' : case '6' : case '7' : /* \ooo */ i = 2; while( i < 5 && it < srcSize && src[it] >= '0' && src[it] < '8' ){ hex[i] = src[it++]; hex[++i] = 0; } DString_AppendChar( literal, (char) strtol( hex+2, NULL, 8 ) ); it --; break; case '8' : case '9' : DString_AppendChar( literal, (char) (src[it] - '0') ); break; case 'x' : case 'u' : case 'U' : i = 2; switch( src[it] ){ case 'x' : m = 4; break; /* \xhh: max 2 hex digit; */ case 'u' : m = 6; break; /* \uhhhh: max 4 hex digit; */ case 'U' : m = 10; break; /* \Uhhhhhhhh: max 8 hex digit; */ } while( i < m && (it+1) < srcSize && isxdigit( src[it+1] ) ){ hex[i] = src[++it]; hex[++i] = 0; } DString_AppendWChar( literal, (wchar_t) strtol( hex, NULL, 0 ) ); break; case 't' : DString_AppendChar( literal, '\t' ); break; case 'n' : DString_AppendChar( literal, '\n' ); break; case 'r' : DString_AppendChar( literal, '\r' ); break; case '\'' : DString_AppendChar( literal, '\'' ); break; case '\"' : DString_AppendChar( literal, '\"' ); break; default : DString_AppendChar( literal, src[it] ); break; } }else if( ch == '\'' && state == TOK_STRING_MBS ){ DString_AppendChar( literal, ch ); state = TOK_RESTART; token->type = token->name = DTOK_MBS; DaoLexer_AppendToken( self, token ); DString_Clear( literal ); }else if( ch == '\"' && state == TOK_STRING_WCS ){ DString_AppendChar( literal, ch ); state = TOK_RESTART; token->type = token->name = DTOK_WCS; DaoLexer_AppendToken( self, token ); DString_Clear( literal ); }else{ DString_AppendChar( literal, ch ); } }else if( ch == ']' && state == TOK_VERBATIM ){ int len = srcSize - it - 1; DString_AppendChar( literal, ']' ); token->type = token->name = DTOK_VBT_OPEN; if( (ss = strstr( src + it + 1, literal->mbs )) != NULL ){ len = (ss - src) - it - 1 + literal->size; token->type = token->name = DTOK_VERBATIM; } for(i=0; i<len; i++) if( src[it+1+i] == '\n' ) line += 1; DString_AppendDataMBS( literal, src + it + 1, len ); state = TOK_RESTART; DaoLexer_AppendToken( self, token ); DString_Clear( literal ); it += len; }else if( lexenv == LEX_ENV_NORMAL ){ old = state; if( ch >=0 ){ state = daoLexTable[ state ][ (int)ch ]; }else if( state <= TOK_START ){ state = TOK_RESTART; }else if( state != TOK_IDENTIFIER && state != TOK_STRING_MBS && state != TOK_STRING_WCS && state != TOK_COMT_LINE && state != TOK_COMT_OPEN ){ state = TOK_RESTART; } if( state >= TOK_END ){ DString_AppendChar( literal, ch ); token->type = token->name = daoTokenMap[ state ]; if( token->type == DTOK_ID_THTYPE || token->type == DTOK_ID_SYMBOL ) token->type = DTOK_IDENTIFIER; if( space || comment || token->type != DTOK_COMMENT ){ if( isspace( token->string.mbs[0] ) ) token->type = token->name = daoSpaceType[ (int)token->string.mbs[0] ]; DaoLexer_AppendToken( self, token ); } /* may be a token before the line break; */ DString_Clear( literal ); state = TOK_START; }else if( state == TOK_RESTART ){ if( literal->size ){ if( old == TOK_IDENTIFIER ){ token->name = dao_key_hash( literal->mbs, literal->size ); token->type = DTOK_IDENTIFIER; if( token->name == 0 ) token->name = DTOK_IDENTIFIER; DaoLexer_AppendToken( self, token ); }else if( old > TOK_RESTART && old != TOK_END ){ token->type = token->name = daoTokenMap[ old ]; if( token->type == DTOK_ID_THTYPE || token->type == DTOK_ID_SYMBOL ) token->type = DTOK_IDENTIFIER; DaoLexer_AppendToken( self, token ); }else if( space ){ if( isspace( token->string.mbs[0] ) ) token->type = token->name = daoSpaceType[ (int)token->string.mbs[0] ]; DaoLexer_AppendToken( self, token ); } DString_Clear( literal ); token->cpos = cpos; } DString_AppendChar( literal, ch ); if( ch >=0 ) state = daoLexTable[ TOK_START ][ (int)ch ]; else state = TOK_IDENTIFIER; }else if( state == TOK_COMT_OPEN ){ DString_AppendChar( literal, ch ); lexenv = LEX_ENV_COMMENT; DVector_PushInt( lexenvs, LEX_ENV_COMMENT ); }else{ DString_AppendChar( literal, ch ); } }else if( lexenv == LEX_ENV_COMMENT ){ DString_AppendChar( literal, ch ); if( ch == '#' ){ state = TOK_OP_SHARP; }else if( ch == '{' && state == TOK_OP_SHARP ){ state = TOK_COMT_OPEN; DVector_PushInt( lexenvs, LEX_ENV_COMMENT ); }else if( ch == '}' && state == TOK_OP_SHARP ){ state = TOK_COMT_CLOSE; DVector_Pop( lexenvs ); lexenv = lexenvs->data.ints[lexenvs->size-1]; if( lexenv != LEX_ENV_COMMENT ){ token->type = token->name = DTOK_COMMENT; if( comment ) DaoLexer_AppendToken( self, token ); DString_Clear( literal ); state = TOK_RESTART; } }else{ state = TOK_START; } } it ++; } if( literal->size ){ token->type = token->name = daoTokenMap[ state ]; if( lexenv == LEX_ENV_COMMENT ) token->type = token->name = DTOK_CMT_OPEN; switch( state ){ case TOK_STRING_MBS : token->type = token->name = DTOK_MBS_OPEN; break; case TOK_STRING_WCS : token->type = token->name = DTOK_WCS_OPEN; break; } if( token->type == DTOK_IDENTIFIER ){ token->name = dao_key_hash( literal->mbs, literal->size ); if( token->name == 0 ) token->name = DTOK_IDENTIFIER; }else if( token->type == DTOK_ID_THTYPE || token->type == DTOK_ID_SYMBOL ){ token->type = DTOK_IDENTIFIER; } if( token->type || space ){ if( isspace( token->string.mbs[0] ) ) token->type = token->name = daoSpaceType[ (int)token->string.mbs[0] ]; DaoLexer_AppendToken( self, token ); } } DaoToken_Delete( token ); DVector_Delete( lexenvs ); DString_Delete( source ); #if 0 for(i=0; i<self->tokens->size; i++){ DaoToken *tk = self->tokens->items.pToken[i]; printf( "%4i: %4i %4i , %4i, %s\n", i, tk->type, tk->name, tk->cpos, tk->string.mbs ); } #endif return ret ? line : 0; }
static int DaoParser_MacroApply( DaoParser *self, DArray *tokens, DMacroGroup *group, DMap *tokMap, DMap *used, int level, DString *tag, int pos0, int adjust ) { DMacroUnit **units = (DMacroUnit**) group->units->items.pVoid; DMacroUnit *unit; DMacroGroup *grp; DMacroNode *node, *node2; DArray *toks = DArray_New(D_TOKEN); DaoToken *tk = DaoToken_New(); DaoToken *tt = NULL; DNode *kwnode = NULL; DMap *check = NULL; DMap one = { NULL, 0, 0, 0 }; int M, N = group->units->size; int i, j, gid = -1; int repeated = 0; int start_mbs = -1; int start_wcs = -1; int squote, dquote; if( group->repeat != DMACRO_AUTO ) level ++; for( i=0; i<N; i++ ){ unit = units[i]; if( tokens->size >0 ) pos0 = tokens->items.pToken[ tokens->size -1 ]->line; self->curLine = pos0; /* printf( "apply unit %i: %i\n", i, unit->type ); */ switch( unit->type ){ case DMACRO_TOK : squote = unit->marker->type == DTOK_ESC_SQUO; dquote = unit->marker->type == DTOK_ESC_DQUO; if( (squote && start_mbs >=0) || (dquote && start_wcs >=0) ){ int qstart = squote ? start_mbs : start_wcs; tt = tokens->items.pToken[ qstart ]; for(j=qstart+1,M=tokens->size; j<M; j++){ DaoToken *jtok = tokens->items.pToken[j]; int t = j ? tokens->items.pToken[j-1]->type : 0; if( t == DTOK_IDENTIFIER && jtok->type == t ) DString_AppendChar( & tt->string, ' ' ); DString_Append( & tt->string, & jtok->string ); } if( squote ){ DString_AppendChar( & tt->string, '\'' ); DArray_Erase( tokens, start_mbs+1, tokens->size ); }else{ DString_AppendChar( & tt->string, '\"' ); DArray_Erase( tokens, start_wcs+1, tokens->size ); } start_mbs = -1; break; }else if( squote ){ start_mbs = tokens->size; DArray_Append( tokens, unit->marker ); tt = tokens->items.pToken[ start_mbs ]; tt->type = tt->name = DTOK_MBS; DString_SetMBS( & tt->string, "\'" ); break; }else if( dquote ){ start_wcs = tokens->size; DArray_Append( tokens, unit->marker ); tt = tokens->items.pToken[ start_wcs ]; tt->type = tt->name = DTOK_WCS; DString_SetMBS( & tt->string, "\"" ); break; } DArray_Append( tokens, unit->marker ); tokens->items.pToken[ tokens->size-1 ]->cpos += adjust; break; case DMACRO_VAR : DaoToken_Assign( tk, unit->marker ); DString_Append( & tk->string, tag ); DArray_Append( tokens, tk ); break; case DMACRO_EXP : case DMACRO_ID : case DMACRO_OP : case DMACRO_BL : case DMACRO_IBL : kwnode = MAP_Find( tokMap, & unit->marker->string ); if( kwnode ==NULL ){ DaoParser_Error( self, DAO_CTW_UNDEF_MAC_MARKER, & unit->marker->string ); goto Failed; } node = (DMacroNode*) kwnode->value.pVoid; kwnode = MAP_Find( used, unit ); if( kwnode == NULL ){ DMap_Insert( used, unit, & one ); kwnode = MAP_Find( used, unit ); } check = (DMap*) kwnode->value.pVoid; repeated = 1; /* printf( ">>>\n%s level %i: \n", unit->marker->string.mbs, level ); DMacroNode_Print( node ); printf( "\n" ); */ /* search a leaf */ node2 = DMacroNode_FindLeaf( node, check, level ); if( node2 ){ /* printf( "appending tokens\n" ); DMacroNode_Print( node2 ); printf( "\n" ); */ DArray_InsertArray( tokens, tokens->size, node2->leaves, 0, -1 ); DMap_Insert( check, node2, NULL ); /* DArray_Clear( node2->leaves ); */ }else{ DMacroNode_RemoveEmptyLeftBranch( node, level ); goto Failed; } break; case DMACRO_GRP : case DMACRO_ALT : grp = (DMacroGroup*) unit; DArray_Clear( toks ); j = DaoParser_MacroApply( self, toks, grp, tokMap, used, level, tag, pos0, adjust ); switch( grp->repeat ){ case DMACRO_AUTO : case DMACRO_ONE : if( j <0 && group->type != DMACRO_ALT ) goto Failed; repeated = (j>0); if( j >=0 ){ gid = i; DArray_InsertArray( tokens, tokens->size, toks, 0, -1 ); } break; case DMACRO_ZERO_OR_ONE : gid = i; repeated = (j>0); if( j >=0 ){ DArray_InsertArray( tokens, tokens->size, toks, 0, -1 ); } break; case DMACRO_ZERO_OR_MORE : gid = i; repeated = (j>0); if( j >=0 ){ DArray_InsertArray( tokens, tokens->size, toks, 0, -1 ); } while( j >0 ){ DArray_Clear( toks ); j = DaoParser_MacroApply( self, toks, grp, tokMap, used, level, tag, pos0, adjust ); if( j >0 ){ DArray_InsertArray( tokens, tokens->size, toks, 0, -1 ); } } break; case DMACRO_ONE_OR_MORE : if( j <0 && group->type != DMACRO_ALT ) goto Failed; repeated = (j>0); if( j >=0 ){ DArray_InsertArray( tokens, tokens->size, toks, 0, -1 ); } while( j >0 ){ gid = i; DArray_Clear( toks ); j = DaoParser_MacroApply( self, toks, grp, tokMap, used, level, tag, pos0, adjust ); if( j >0 ){ DArray_InsertArray( tokens, tokens->size, toks, 0, -1 ); } } break; } break; default : goto Failed; } if( group->type == DMACRO_ALT && gid >=0 ) break; } if( group->repeat != DMACRO_AUTO ) level --; if( group->type == DMACRO_ALT && gid <0 ) goto Failed; DaoToken_Delete( tk ); DArray_Delete( toks ); return repeated; Failed : DaoToken_Delete( tk ); DArray_Delete( toks ); return -1; }