Exemple #1
0
CFSWString DealWithText(CFSWString text) {
    /* Proovin kogu sõnniku minema loopida */
    CFSWString res;
    text.Trim();
    text.Replace(L"\n\n", L"\n", 1);
    text.Replace(L"‘", L"'", 1);
    text.Replace(L"`", L"'", 1);
    text.Replace(L"´", L"'", 1);
    text.Replace(L"’", L"'", 1);

    for (INTPTR i = 0; i < text.GetLength(); i++) {
        CFSWString c = text.GetAt(i);
        CFSWString pc = res.GetAt(res.GetLength() - 1);
        CFSWString nc = text.GetAt(i + 1);
        if (c == L"'") {
            if (is_vowel(pc)) 
                res += L"q";
            else
                res += c;
        }
        else
        if (is_char(c)) res += c;
        else
            if (is_digit(c)) res += c;
        else
            if (is_hyphen(c) && is_char(pc) && is_char(nc)) res += sp;
        else
            if (is_symbol(c)) res += c;
        else
            if (is_colon(c) && !is_colon(pc)) res += c;
        else
            if (is_bbracket(c) && !is_bbracket(pc)) res += c;
        else
            if (is_ebracket(c) && is_ending(nc)) res += L"";
        else
            if (is_ebracket(c) && !is_ebracket(pc)) res += c;
        else
            if (is_comma(c) && !is_comma(pc)) res += c;
        else
            if (is_fchar(c)) res += replace_fchar(c);
        else
            if (is_space(c) && !is_whitespace(pc)) res += c;
        else
            if (is_break(c) && !is_break(pc)) { 
                
                res += c;   
            } //kahtlane
        else
            if (is_tab(c) && !is_whitespace(pc)) res += c;
        else            
            if (is_ending(c) && !is_ending(pc) && !is_whitespace(pc)) res += c;

    }
    res.Trim();        
    return res;

}
Exemple #2
0
INTPTR do_phrases(utterance_struct &u) {
    phrase_struct p;
    CFSWString res;
    p.phone_c = 0;
    p.syl_c = 0;
    p.word_c = 0;

    for (INTPTR i = 0; i < u.s.GetLength(); i++) {
        CFSWString c = u.s.GetAt(i);
        CFSWString pc = res.GetAt(res.GetLength() - 1);
        CFSWString nc = u.s.GetAt(i + 1);
        CFSWString nnc = u.s.GetAt(i + 2);
        if ((is_comma(c) || is_colon(c) || is_semicolon(c)) && is_space(nc) && is_char(nnc)) {
            res.Trim();
            if (res.GetLength() > 0) {
                push_ph_res(u, p, res);
            }
        } else
            if (is_bbracket(c)) {
               res.Trim();
            if (res.GetLength() > 0) {                
                push_ph_res(u, p, res);
            }
               p.s = L"sulgudes";
               u.phr_vector.AddItem(p);

        } else
            if (is_ebracket(c)) {
             res.Trim();
            if (res.GetLength() > 0) {                
                push_ph_res(u, p, res);
            }
        } else
            if (is_space(c)) { // komatud sidesõnad
            CFSWString tempm = u.s.Mid(i + 1, -1);
            res.Trim();
            if (is_conju(tempm.Left(tempm.Find(sp))) && res.GetLength() > 0) {
                push_ph_res(u, p, res);
            } else
                res += c;
        } else
            if (is_bhyphen(c)) {
            res.Trim();
            if (res.GetLength() > 0 && ((is_char(pc) && is_space(nc)) || (is_space(nc) && is_char(nnc)) || (is_space(pc) && is_char(nc)))) {
                push_ph_res(u, p, res);
            } else
                res += c;
        } else
            res += c;
    }

    if (res.GetLength() > 0) {
        //        if (is_ending(res.GetAt(res.GetLength() - 1))) {
        //            res.Delete(res.GetLength() - 1, 1);
        //        }        
        push_ph_res(u, p, res);
    }        
    return u.phr_vector.GetSize();
}
TokenSeq *glsl_expand(TokenSeq *ts, bool recursive)
{
   TokenSeq *res = NULL;

   STACK_CHECK();

   while (ts || !res) {
      if (!recursive && ts == NULL)
         ts = next_tokenseq();

      if (ts == NULL)
         break;

      if (!glsl_tokenlist_contains(ts->hide, ts->token)) {
         Macro *m = glsl_macrolist_find(directive_macros, ts->token);

         if (m != NULL) {
            switch (m->type) {
            case MACRO_OBJECT:
               ts = glsl_tokenseq_destructive_reverse(subst(m->body, NULL, NULL, glsl_tokenlist_construct(ts->token, ts->hide), NULL), ts->next);
               continue;   // effective tail call
            case MACRO_FUNCTION:
               if (!recursive && ts->next == NULL)
                  ts->next = next_tokenseq();

               if (ts->next && is_lparen(ts->next->token)) {
                  int formal_count = glsl_tokenlist_length(m->args);

                  int depth = 0;
                  int count = 0;

                  /*
                     gather up actual parameters
                  */

                  TokenSeq *tail = ts->next->next;
                  TokenSeq *curr = NULL;

                  TokenSeqList *actuals = NULL;

                  while (true) {
                     if (!recursive && tail == NULL)
                        tail = next_tokenseq();

                     if (tail == NULL)
                        glsl_compile_error(ERROR_PREPROCESSOR, 1, g_LineNumber, "mismatched parenthesis in macro invocation");

                     if (is_lparen(tail->token))
                        depth++;
                     if (is_rparen(tail->token))
                     {
                        if (depth == 0)
                           break;
                        else
                           depth--;
                     }
                     if (is_comma(tail->token))
                        if (depth == 0) {
                           actuals = glsl_tokenseqlist_construct(glsl_tokenseq_destructive_reverse(curr, NULL), actuals);
                           count++;

                           tail = tail->next;
                           curr = NULL;

                           continue;
                        }

                     curr = glsl_tokenseq_construct(tail->token, tail->hide, curr);      

                     tail = tail->next;
                  }

                  if (count > 0 || formal_count == 1 || curr != NULL) {
                     actuals = glsl_tokenseqlist_construct(glsl_tokenseq_destructive_reverse(curr, NULL), actuals);
                     count++;
                  }  

                  /*
                     check against arity of macro
                  */

                  if (count == formal_count) {
                     ts = glsl_tokenseq_destructive_reverse(subst(m->body, m->args, actuals, glsl_tokenlist_construct(ts->token, glsl_tokenlist_intersect(ts->hide, tail->hide)), NULL), tail->next);
                     continue;   // effective tail call
                  } else 
                     glsl_compile_error(ERROR_PREPROCESSOR, 1, g_LineNumber, "arity mismatch in macro invocation");
               }
               break;
            case MACRO_LINE:
               ts = glsl_tokenseq_construct(glsl_token_construct_ppnumberi(g_LineNumber), NULL, ts->next);
               break;
            case MACRO_FILE:
               ts = glsl_tokenseq_construct(glsl_token_construct_ppnumberi(g_FileNumber), NULL, ts->next);
               break;
            default:
               UNREACHABLE();
               break;
            }
         }
      }

      res = glsl_tokenseq_construct(ts->token, ts->hide, res);
      ts = ts->next;
   }

   return glsl_tokenseq_destructive_reverse(res, NULL);
}
char *get_token(char *lexeme , int mode){
	char *token=(char*)calloc(strlen(lexeme)+50,sizeof(char));
	//printf("Getting token\n");
	if(is_long(lexeme)){
		sprintf(token,"%d",LONG);
	}
	else if(is_static(lexeme)){
		sprintf(token,"%d",STATIC);
	}
	else if(is_union(lexeme)){
		sprintf(token,"%d",UNION);
	}
	else if(is_default(lexeme)){
		sprintf(token,"%d",DEFAULT);
	}
	else if(is_break(lexeme)){
		sprintf(token,"%d",BREAK);
	}
	else if(is_case(lexeme)){
		sprintf(token,"%d",CASE);
	}
	else if(is_continue(lexeme)){
		sprintf(token,"%d",CONTINUE);
	}
	else if(is_goto(lexeme)){
		sprintf(token,"%d",GOTO);
	}
	else if(is_struct(lexeme)){
		sprintf(token,"%d",STRUCT);
	}
	else if(is_const(lexeme)){
		sprintf(token,"%d",CONST);
	}
	else if(is_void(lexeme)){
		sprintf(token,"%d",VOID);
	}
	else if(is_switch(lexeme)){
		sprintf(token,"%d",SWITCH);
	}
	else if(is_for(lexeme)){
		sprintf(token,"%d",FOR);
	}
	else if(is_while(lexeme)){
		sprintf(token,"%d",WHILE);
	}
	else if(is_do(lexeme)){
		sprintf(token,"%d",DO);
	}
	else if(is_return(lexeme)){
		sprintf(token,"%d",RETURN);
	}
	else if(is_bool(lexeme)){
		sprintf(token,"%d",BOOL);
	}
	else if(is_char(lexeme)){
		sprintf(token,"%d",CHAR);
	}
	else if(is_signed(lexeme)){
		sprintf(token,"%d",SIGNED);
	}
	else if(is_unsigned(lexeme)){
		sprintf(token,"%d",UNSIGNED);
	}
	else if(is_short(lexeme)){
		sprintf(token,"%d",SHORT);
	}
	else if(is_int(lexeme)){
		sprintf(token,"%d",INT);
	}
	else if(is_float(lexeme)){
		sprintf(token,"%d",FLOAT);
	}
	else if(is_double(lexeme)){
		sprintf(token,"%d",DOUBLE);
	}
	else if(is_l_square(lexeme)){
		sprintf(token,"%d",L_SQUARE);
	}
	else if(is_r_square(lexeme)){
		sprintf(token,"%d",R_SQUARE);
	}
	else if(is_l_paraen(lexeme)){
		sprintf(token,"%d",L_PARAEN);
	}
	else if(is_r_paraen(lexeme)){
		sprintf(token,"%d",R_PARAEN);
	}
	else if(is_l_cbrace(lexeme)){
		sprintf(token,"%d",L_CBRACE);
	}
	else if(is_r_cbrace(lexeme)){
		sprintf(token,"%d",R_CBRACE);
	}
	else if(is_comma(lexeme)){
		sprintf(token,"%d",COMMA);
	}
	else if(is_semicol(lexeme)){
		sprintf(token,"%d",SEMICOL);
	}
	else if(is_eq_eq(lexeme)){
		sprintf(token,"%d",EQ_EQ);
	}
	else if(is_lesser(lexeme)){
		sprintf(token,"%d",LESSER);
	}
	else if(is_less_eq(lexeme)){
		sprintf(token,"%d",LESS_EQ);
	}
	else if(is_div(lexeme)){
		sprintf(token,"%d",DIV);
	}
	else if(is_greater(lexeme)){
		sprintf(token,"%d",GREATER);
	}
	else if(is_great_eq(lexeme)){
		sprintf(token,"%d",GREAT_EQ);
	}
	else if(is_plus_eq(lexeme)){
		sprintf(token,"%d",PLUS_EQ);
	}
	else if(is_minus_eq(lexeme)){
		sprintf(token,"%d",MINUS_EQ);
	}
	else if(is_div_eq(lexeme)){
		sprintf(token,"%d",DIV_EQ);
	}
	else if(is_mult_eq(lexeme)){
		sprintf(token,"%d",MULT_EQ);
	}
	else if(is_minus_minus(lexeme)){
		sprintf(token,"%d",MINUS_MINUS);
	}
	else if(is_plus_plus(lexeme)){
		sprintf(token,"%d",PLUS_PLUS);
	}
	else if(is_percent(lexeme)){
		sprintf(token,"%d",PERCENT);
	}
	else if(is_div(lexeme)){
		sprintf(token,"%d",DIV);
	}
	else if(is_mult(lexeme)){
		sprintf(token,"%d",MULT);
	}
	else if(is_minus(lexeme)){
		sprintf(token,"%d",MINUS);
	}
	else if(is_plus(lexeme)){
		sprintf(token,"%d",PLUS);
	}
	else if(is_int_const(lexeme)){
		printf("int");
		sprintf(token,"%d\t%s",INT_CONST,lexeme);
	}
	else if(is_flo_const(lexeme)){
		printf("float");
		sprintf(token,"%d\t%s",FLO_CONST,lexeme);
	}
	else if(is_comment_start(lexeme)){
		sprintf(token,"$start");
	}
	else if(is_comment_end(lexeme)){
		sprintf(token,"$end");
	}
	else if(is_identifier(lexeme)){
		printf("Identifier");
		if(mode==1) ht_set( symboltable, lexeme, "1");
		sprintf(token,"%d\t%s",IDNTIFIER,lexeme);
	}
	else sprintf(token,"%d",NOTOK);
	return token;
}
Exemple #5
0
// 私有读取下一个词语的方法
Token TokenAnalyze::_next_token()
{
	if(!filein)  // 输入流打开错误
	{
		return Token("",token_type::ENDOFFILE,line_number);
	}


	char ch;
	// 首先过滤空白符
	while(filein.get(ch) && is_space(ch))
		if(ch == '\n') 
			line_number ++ ;

	if(is_letter(ch))
	{
		return identifier(ch);
	}
	else if(is_digit(ch))
	{
		return number(ch);
	}
	else 
	{
		switch(ch)
		{
		case '\'':
			return char_con(ch);break;
		case '"':
			return string_con(ch);break;
		case '+':
		case '-':
			return add_operator(ch);break;
		case '*':
		case '/':
			return multiply_operator(ch);break;
		case '=':
			return assign_equal_operator(ch);break;
		case '<':
		case '>':
		case '!':
			return relation_operator(ch);break;
		case ';':
			return is_semicn(ch);break;
		case ',':
			return is_comma(ch);break;
		case '(':
			return left_parent(ch);break;
		case ')':
			return right_parent(ch);break;
		case '[':
			return left_brack(ch);break;
		case ']':
			return right_brack(ch);break;
		case '{':
			return left_brace(ch);break;
		case '}':
			return right_brace(ch);break;
		default:
			return Token(""+ch,token_type::UNKNOWN,line_number);
		}
	}
}