CFSWString DealWithText(CFSWString text) { /* Proovin kogu sõnniku minema loopida */ CFSWString res; text.Trim(); text.Replace(L"\n\n", L"\n", 1); text.Replace(L"‘", L"'", 1); text.Replace(L"`", L"'", 1); text.Replace(L"´", L"'", 1); text.Replace(L"’", L"'", 1); for (INTPTR i = 0; i < text.GetLength(); i++) { CFSWString c = text.GetAt(i); CFSWString pc = res.GetAt(res.GetLength() - 1); CFSWString nc = text.GetAt(i + 1); if (c == L"'") { if (is_vowel(pc)) res += L"q"; else res += c; } else if (is_char(c)) res += c; else if (is_digit(c)) res += c; else if (is_hyphen(c) && is_char(pc) && is_char(nc)) res += sp; else if (is_symbol(c)) res += c; else if (is_colon(c) && !is_colon(pc)) res += c; else if (is_bbracket(c) && !is_bbracket(pc)) res += c; else if (is_ebracket(c) && is_ending(nc)) res += L""; else if (is_ebracket(c) && !is_ebracket(pc)) res += c; else if (is_comma(c) && !is_comma(pc)) res += c; else if (is_fchar(c)) res += replace_fchar(c); else if (is_space(c) && !is_whitespace(pc)) res += c; else if (is_break(c) && !is_break(pc)) { res += c; } //kahtlane else if (is_tab(c) && !is_whitespace(pc)) res += c; else if (is_ending(c) && !is_ending(pc) && !is_whitespace(pc)) res += c; } res.Trim(); return res; }
INTPTR do_phrases(utterance_struct &u) { phrase_struct p; CFSWString res; p.phone_c = 0; p.syl_c = 0; p.word_c = 0; for (INTPTR i = 0; i < u.s.GetLength(); i++) { CFSWString c = u.s.GetAt(i); CFSWString pc = res.GetAt(res.GetLength() - 1); CFSWString nc = u.s.GetAt(i + 1); CFSWString nnc = u.s.GetAt(i + 2); if ((is_comma(c) || is_colon(c) || is_semicolon(c)) && is_space(nc) && is_char(nnc)) { res.Trim(); if (res.GetLength() > 0) { push_ph_res(u, p, res); } } else if (is_bbracket(c)) { res.Trim(); if (res.GetLength() > 0) { push_ph_res(u, p, res); } p.s = L"sulgudes"; u.phr_vector.AddItem(p); } else if (is_ebracket(c)) { res.Trim(); if (res.GetLength() > 0) { push_ph_res(u, p, res); } } else if (is_space(c)) { // komatud sidesõnad CFSWString tempm = u.s.Mid(i + 1, -1); res.Trim(); if (is_conju(tempm.Left(tempm.Find(sp))) && res.GetLength() > 0) { push_ph_res(u, p, res); } else res += c; } else if (is_bhyphen(c)) { res.Trim(); if (res.GetLength() > 0 && ((is_char(pc) && is_space(nc)) || (is_space(nc) && is_char(nnc)) || (is_space(pc) && is_char(nc)))) { push_ph_res(u, p, res); } else res += c; } else res += c; } if (res.GetLength() > 0) { // if (is_ending(res.GetAt(res.GetLength() - 1))) { // res.Delete(res.GetLength() - 1, 1); // } push_ph_res(u, p, res); } return u.phr_vector.GetSize(); }
TokenSeq *glsl_expand(TokenSeq *ts, bool recursive) { TokenSeq *res = NULL; STACK_CHECK(); while (ts || !res) { if (!recursive && ts == NULL) ts = next_tokenseq(); if (ts == NULL) break; if (!glsl_tokenlist_contains(ts->hide, ts->token)) { Macro *m = glsl_macrolist_find(directive_macros, ts->token); if (m != NULL) { switch (m->type) { case MACRO_OBJECT: ts = glsl_tokenseq_destructive_reverse(subst(m->body, NULL, NULL, glsl_tokenlist_construct(ts->token, ts->hide), NULL), ts->next); continue; // effective tail call case MACRO_FUNCTION: if (!recursive && ts->next == NULL) ts->next = next_tokenseq(); if (ts->next && is_lparen(ts->next->token)) { int formal_count = glsl_tokenlist_length(m->args); int depth = 0; int count = 0; /* gather up actual parameters */ TokenSeq *tail = ts->next->next; TokenSeq *curr = NULL; TokenSeqList *actuals = NULL; while (true) { if (!recursive && tail == NULL) tail = next_tokenseq(); if (tail == NULL) glsl_compile_error(ERROR_PREPROCESSOR, 1, g_LineNumber, "mismatched parenthesis in macro invocation"); if (is_lparen(tail->token)) depth++; if (is_rparen(tail->token)) { if (depth == 0) break; else depth--; } if (is_comma(tail->token)) if (depth == 0) { actuals = glsl_tokenseqlist_construct(glsl_tokenseq_destructive_reverse(curr, NULL), actuals); count++; tail = tail->next; curr = NULL; continue; } curr = glsl_tokenseq_construct(tail->token, tail->hide, curr); tail = tail->next; } if (count > 0 || formal_count == 1 || curr != NULL) { actuals = glsl_tokenseqlist_construct(glsl_tokenseq_destructive_reverse(curr, NULL), actuals); count++; } /* check against arity of macro */ if (count == formal_count) { ts = glsl_tokenseq_destructive_reverse(subst(m->body, m->args, actuals, glsl_tokenlist_construct(ts->token, glsl_tokenlist_intersect(ts->hide, tail->hide)), NULL), tail->next); continue; // effective tail call } else glsl_compile_error(ERROR_PREPROCESSOR, 1, g_LineNumber, "arity mismatch in macro invocation"); } break; case MACRO_LINE: ts = glsl_tokenseq_construct(glsl_token_construct_ppnumberi(g_LineNumber), NULL, ts->next); break; case MACRO_FILE: ts = glsl_tokenseq_construct(glsl_token_construct_ppnumberi(g_FileNumber), NULL, ts->next); break; default: UNREACHABLE(); break; } } } res = glsl_tokenseq_construct(ts->token, ts->hide, res); ts = ts->next; } return glsl_tokenseq_destructive_reverse(res, NULL); }
char *get_token(char *lexeme , int mode){ char *token=(char*)calloc(strlen(lexeme)+50,sizeof(char)); //printf("Getting token\n"); if(is_long(lexeme)){ sprintf(token,"%d",LONG); } else if(is_static(lexeme)){ sprintf(token,"%d",STATIC); } else if(is_union(lexeme)){ sprintf(token,"%d",UNION); } else if(is_default(lexeme)){ sprintf(token,"%d",DEFAULT); } else if(is_break(lexeme)){ sprintf(token,"%d",BREAK); } else if(is_case(lexeme)){ sprintf(token,"%d",CASE); } else if(is_continue(lexeme)){ sprintf(token,"%d",CONTINUE); } else if(is_goto(lexeme)){ sprintf(token,"%d",GOTO); } else if(is_struct(lexeme)){ sprintf(token,"%d",STRUCT); } else if(is_const(lexeme)){ sprintf(token,"%d",CONST); } else if(is_void(lexeme)){ sprintf(token,"%d",VOID); } else if(is_switch(lexeme)){ sprintf(token,"%d",SWITCH); } else if(is_for(lexeme)){ sprintf(token,"%d",FOR); } else if(is_while(lexeme)){ sprintf(token,"%d",WHILE); } else if(is_do(lexeme)){ sprintf(token,"%d",DO); } else if(is_return(lexeme)){ sprintf(token,"%d",RETURN); } else if(is_bool(lexeme)){ sprintf(token,"%d",BOOL); } else if(is_char(lexeme)){ sprintf(token,"%d",CHAR); } else if(is_signed(lexeme)){ sprintf(token,"%d",SIGNED); } else if(is_unsigned(lexeme)){ sprintf(token,"%d",UNSIGNED); } else if(is_short(lexeme)){ sprintf(token,"%d",SHORT); } else if(is_int(lexeme)){ sprintf(token,"%d",INT); } else if(is_float(lexeme)){ sprintf(token,"%d",FLOAT); } else if(is_double(lexeme)){ sprintf(token,"%d",DOUBLE); } else if(is_l_square(lexeme)){ sprintf(token,"%d",L_SQUARE); } else if(is_r_square(lexeme)){ sprintf(token,"%d",R_SQUARE); } else if(is_l_paraen(lexeme)){ sprintf(token,"%d",L_PARAEN); } else if(is_r_paraen(lexeme)){ sprintf(token,"%d",R_PARAEN); } else if(is_l_cbrace(lexeme)){ sprintf(token,"%d",L_CBRACE); } else if(is_r_cbrace(lexeme)){ sprintf(token,"%d",R_CBRACE); } else if(is_comma(lexeme)){ sprintf(token,"%d",COMMA); } else if(is_semicol(lexeme)){ sprintf(token,"%d",SEMICOL); } else if(is_eq_eq(lexeme)){ sprintf(token,"%d",EQ_EQ); } else if(is_lesser(lexeme)){ sprintf(token,"%d",LESSER); } else if(is_less_eq(lexeme)){ sprintf(token,"%d",LESS_EQ); } else if(is_div(lexeme)){ sprintf(token,"%d",DIV); } else if(is_greater(lexeme)){ sprintf(token,"%d",GREATER); } else if(is_great_eq(lexeme)){ sprintf(token,"%d",GREAT_EQ); } else if(is_plus_eq(lexeme)){ sprintf(token,"%d",PLUS_EQ); } else if(is_minus_eq(lexeme)){ sprintf(token,"%d",MINUS_EQ); } else if(is_div_eq(lexeme)){ sprintf(token,"%d",DIV_EQ); } else if(is_mult_eq(lexeme)){ sprintf(token,"%d",MULT_EQ); } else if(is_minus_minus(lexeme)){ sprintf(token,"%d",MINUS_MINUS); } else if(is_plus_plus(lexeme)){ sprintf(token,"%d",PLUS_PLUS); } else if(is_percent(lexeme)){ sprintf(token,"%d",PERCENT); } else if(is_div(lexeme)){ sprintf(token,"%d",DIV); } else if(is_mult(lexeme)){ sprintf(token,"%d",MULT); } else if(is_minus(lexeme)){ sprintf(token,"%d",MINUS); } else if(is_plus(lexeme)){ sprintf(token,"%d",PLUS); } else if(is_int_const(lexeme)){ printf("int"); sprintf(token,"%d\t%s",INT_CONST,lexeme); } else if(is_flo_const(lexeme)){ printf("float"); sprintf(token,"%d\t%s",FLO_CONST,lexeme); } else if(is_comment_start(lexeme)){ sprintf(token,"$start"); } else if(is_comment_end(lexeme)){ sprintf(token,"$end"); } else if(is_identifier(lexeme)){ printf("Identifier"); if(mode==1) ht_set( symboltable, lexeme, "1"); sprintf(token,"%d\t%s",IDNTIFIER,lexeme); } else sprintf(token,"%d",NOTOK); return token; }
// 私有读取下一个词语的方法 Token TokenAnalyze::_next_token() { if(!filein) // 输入流打开错误 { return Token("",token_type::ENDOFFILE,line_number); } char ch; // 首先过滤空白符 while(filein.get(ch) && is_space(ch)) if(ch == '\n') line_number ++ ; if(is_letter(ch)) { return identifier(ch); } else if(is_digit(ch)) { return number(ch); } else { switch(ch) { case '\'': return char_con(ch);break; case '"': return string_con(ch);break; case '+': case '-': return add_operator(ch);break; case '*': case '/': return multiply_operator(ch);break; case '=': return assign_equal_operator(ch);break; case '<': case '>': case '!': return relation_operator(ch);break; case ';': return is_semicn(ch);break; case ',': return is_comma(ch);break; case '(': return left_parent(ch);break; case ')': return right_parent(ch);break; case '[': return left_brack(ch);break; case ']': return right_brack(ch);break; case '{': return left_brace(ch);break; case '}': return right_brace(ch);break; default: return Token(""+ch,token_type::UNKNOWN,line_number); } } }