Beispiel #1
0
void ATextGenerator::generateRandomWord(AOutputBuffer& target, size_t len /* = 0x1 */)
{
  if (len < 1)
    return;

  //a_Lead with consonant
  --len;
  AString str, strLast("-", 1);
  generateRandomString(str, 1, AConstant::CHARSET_LOWERCASE_CONSONANTS);
  str.makeUpper();
  target.append(str);

  bool boolConsonant = false;
  while (len > 0)
  {
    if (boolConsonant)
    {
      //a_Add a consonant    
      do
      {
        str.clear();
        generateRandomString(str, 1, AConstant::CHARSET_LOWERCASE_CONSONANTS);
      }
      while(str.at(0) == strLast.at(0) && ARandomNumberGenerator::get().nextU1() > 220);

      target.append(str);
      --len;
      strLast = str;
      boolConsonant = false;
    }
    else
    {
      //a_Add a vowel
      int twice = 0;
      do
      {
        do
        {
          str.clear();
          generateRandomString(str, 1, AConstant::CHARSET_LOWERCASE_VOWELS);
        }
        while(str.at(0) == strLast.at(0) && ARandomNumberGenerator::get().nextU1() > 130);

        target.append(str);
        --len;
        strLast = str;
      }
      while (len > 0 && twice++ < 2 && ARandomNumberGenerator::get().nextU1() > 225);
  
      boolConsonant = true;
    }
  }
}
Beispiel #2
0
int get_token()
{
	typedef enum {
		INIT,
		OPERATOR,
		SLASH,
		STRING,
		STRING_BACKSLASH,
		STRING_HEXA,
		STRING_BINARY,
		STRING_OCTA,
		NUMBER,
		FLOAT,
		FLOAT_EXP,
		ID,
		ID_KEYWORD,
		LINE_COMMENT,
		BLOCK_COMMENT,
		BLOCK_COMMENT_END,
		BASE_EXT,
		BINARY,
		OCTA,
		HEXA
	} Tstate;

	Tstate state = INIT;

	int c;
	int j = 0;

	int ret_val = 0;
	int escape_seq = 0;

	//char *check;

	strClear(buffer);

	token.type = TT_ERR;

	while ((c = fgetc(in)))
	{
		if (c == '\n')
		{
			row++;
			col = 0;
		}
		else
			col++;

		if (c == EOF)
		{
			token.type = TT_EOF;
			return EOF;
		}

	#ifdef SCANNER_DEBUG
		fprintf(stderr, "%s (%s)", fsm_states[state], strGetStr(buffer));
		if (strFirst(buffer) == '\0')
			fprintf(stderr, "\n");
		else
			fprintf(stderr, " -> ");
	#endif // DEBUG

		switch(state)
		{
			case INIT:
				if (c == '/')  // comment or operator
				{
					state = SLASH;
					strAddChar(buffer, c);
				}
				else if (is_operator(c))
				{
					state = OPERATOR;
					strAddChar(buffer, c);
				}
				else if (c == '"') // string literal
				{
					state = STRING;
				}
				else if (c == '\\') // x, b, 0 literals supported - BASE
				{
					state = BASE_EXT;
				}
				else if (isdigit(c))  // number -> integer or double literal
				{
					state = NUMBER;
					strAddChar(buffer, c);
				}
				else if (c == '_')   // id
				{
					state = ID;
					strAddChar(buffer, c);
				}
				else if (isalpha(c)) // alphabetic char -> id or keyword
				{
					state = ID_KEYWORD;
					strAddChar(buffer, c);
				}
				else if ((ret_val = is_delimiter(c)))
				{
					token.type = TYPE_DELIMITER + ret_val - 1;
					#ifdef SCANNER_DEBUG
						fprintf(stderr, "%s\n", token_name[token.type]);
					#endif
					return OK;
				}
				else if (!isspace(c))    // non valid character
				{
					lex_error("Unknown character: '%c'.\n", c);
				}
			break;

			case BASE_EXT:
				if (c == 'b')
				{
					state = BINARY;
				}
				else if (c == '0')
				{
					state = OCTA;
				}
				else if (c == 'x')
				{
					state = HEXA;
				}
				else
					lex_error("Unknown character in literal '\\%c'.\n", c);
			break;

			case HEXA:
				if (isxdigit(c))
				{
					if (j < 8)  // 8 hexadecimal digits are max int value
					{
						literal[j] = c;
						j++;
					}
					else
						lex_error("Hexadecimal literal too long -> int overflow!\n");
				}
				else
				{
					ungetc(c, in);
					token.type = TT_VALUE_INT;
					literal[j] = '\0';
					token.value_int = (int) strtol(literal, NULL, 16);  // cannot fail
					if (token.value_int < 0)
						lex_warning("Hexadecimal literal '\\x%s' overflow to negative number %d\n", literal, token.value_int);
					return OK;
				}

			break;

			case OCTA:
				if (c >= '0' && c <= '7')
				{
					if (j < 12)  // max int = \0 7777 7777 7777
					{
						literal[j] = c;
						j++;
					}
					else
						lex_error("Octal literal too long -> int overflow!\n");
				}
				else
				{
					ungetc(c, in);
					token.type = TT_VALUE_INT;
					literal[j] = '\0';
					token.value_int = (int) strtol(literal, NULL, 8);
					if (token.value_int < 0)
						lex_warning("Octal literal '\\0%s' overflow to negative number %d\n", literal, token.value_int);
					return OK;
				}

			break;

			case BINARY:
				if ((c == '0' || c == '1'))
				{
					if (j < 32)
					{
						literal[j] = c;
						j++;
					}
					else
						lex_error("Binary literal too long -> int overflow!\n");
				}
				else
				{
					ungetc(c, in);
					token.type = TT_VALUE_INT;
					literal[j] = '\0';
					token.value_int = (int) strtol(literal, NULL, 2);
					if (token.value_int < 0)
						lex_warning("Binary literal '\\b%s' overflow to negative number %d\n", literal, token.value_int);
					return OK;
				}

			break;

			case ID_KEYWORD:
				if (isalpha(c))  // add another char into buffer
				{
					strAddChar(buffer, c);
				}
				else if (c == '_' || isdigit(c))  // id - these chars are not in any keyword
				{
					state = ID;
					strAddChar(buffer, c);
				}
				else // end of id or keyword
				{

					ungetc(c, in);			 // return last read char to buffer

					ret_val = is_keyword(strGetStr(buffer));
					if (ret_val)
					{
						token.type = TYPE_KEYWORD + ret_val - 1;  // magic
						#ifdef SCANNER_DEBUG
							fprintf(stderr, "%s\n", token_name[token.type]);
						#endif
						return OK;
					}
					else
					{
						token.type = TT_ID;
						token.p_string = strGetStr(buffer);
						#ifdef SCANNER_DEBUG
							fprintf(stderr, "%s\n", token_name[token.type]);
						#endif
						return OK;
					}
				}
			break;

			case ID:
				if (isalnum(c) || c == '_')
				{
					strAddChar(buffer, c);
				}
				else
				{
					ungetc(c, in);
					token.type = TT_ID;
					token.p_string = strGetStr(buffer);
					#ifdef SCANNER_DEBUG
						fprintf(stderr, "%s\n", token_name[token.type]);
					#endif
					return OK;
				}
			break;

			case SLASH:
				if (c == '/')
				{
					state = LINE_COMMENT;
				}
				else if (c == '*')
				{
					state = BLOCK_COMMENT;
				}
				else  // it was division
				{
					ungetc(c, in);
					token.type = TT_DIVIDE;
					#ifdef SCANNER_DEBUG
						fprintf(stderr, "%s\n", token_name[token.type]);
					#endif
					return OK;
				}
			break;

			case OPERATOR:  // not precisely "normal" fsm, but easily extensible (just add operator to operators[] and Ttoken_type)
        if (is_operator(c))  // c is one of valid chars, that can be in operator
        {
					strAddChar(buffer, c);
					ret_val = determine_operator(strGetStr(buffer));   // check if we still have valid operator in buffer

					if (!ret_val)         // if it's not valid operator
					{
						ungetc(c, in);       // return last char, it was not part of operator
						strDelChar(buffer);  // delete wrong char from buffer

						ret_val = determine_operator(strGetStr(buffer));  // determine which operator we have
						token.type = TYPE_OPERATOR + ret_val - 1;         // return token
						#ifdef SCANNER_DEBUG
							fprintf(stderr, "%s\n", token_name[token.type]);
						#endif
						return OK;
					}
					// continue with loading chars if it's valid
				}
				else  // another char is not operator -> end
				{
					ungetc(c, in);
					ret_val = determine_operator(strGetStr(buffer));
					if (ret_val)
					{
						token.type = TYPE_OPERATOR + ret_val - 1;
						#ifdef SCANNER_DEBUG
							fprintf(stderr, "%s\n", token_name[token.type]);
						#endif
						return OK;
					}
					else // shouldn't occur, just to be sure..
					{
						lex_error("Unknown operator: '%s'.\n", strGetStr(buffer));
					}
				}
			break;

			case LINE_COMMENT:
				if (c == '\n')   // end of line comment
				{
					state = INIT;
					strClear(buffer);
				}
			break;

			case BLOCK_COMMENT:
				if (c == '*')    // possible end of comment
					state = BLOCK_COMMENT_END;
			break;

			case BLOCK_COMMENT_END:
				if (c == '/')    // comment ended
				{
					state = INIT;
					strClear(buffer);
				}
				else            // false alarm - comment continues
					state = BLOCK_COMMENT;
			break;

			case NUMBER:
				if (isdigit(c))
				{
					strAddChar(buffer, c);
				}
				else if (c == '.')
				{
					strAddChar(buffer, c);
					state = FLOAT;
				}
				else if (tolower(c) == 'e')
				{
					strAddChar(buffer, c);
					state = FLOAT_EXP;
				}
				else
				{
					ungetc(c, in);
					token.type = TT_VALUE_INT;
					token.value_int = (int) strtol(strGetStr(buffer), NULL, 10);

					#ifdef SCANNER_DEBUG
						fprintf(stderr, "%s\n", token_name[token.type]);
					#endif
					return OK;
				}

			break;

			case FLOAT:  // aspoň jedna číslice!
        if (isdigit(c))
				{
					strAddChar(buffer, c);
				}
				else if (tolower(c) == 'e')
				{
					strAddChar(buffer, c);
					state = FLOAT_EXP;
				}
				else
				{
					ungetc(c, in);

					token.type = TT_VALUE_DOUBLE;
					token.value_double = strtod(strGetStr(buffer), NULL); //&check);

					#ifdef SCANNER_DEBUG
						fprintf(stderr, "%s\n", token_name[token.type]);
					#endif
					return OK;
				}
			break;

			case FLOAT_EXP:
				if (isdigit(c))
				{
					strAddChar(buffer, c);
				}
				else if (tolower(strLast(buffer)) == 'e' && (c == '+' || c == '-')) // optional +/- after e/E
				{
					strAddChar(buffer, c);
				}
				else
				{
					ungetc(c, in);
					token.type = TT_VALUE_DOUBLE;
					token.value_double = strtod(strGetStr(buffer), NULL); //&check);

					#ifdef SCANNER_DEBUG
						fprintf(stderr, "%s\n", token_name[token.type]);
					#endif
					return OK;
				}
			break;

			case STRING:
				if (c == '"')   // end of string literal
				{
					token.type = TT_VALUE_STRING;
					token.p_string = strGetStr(buffer);
					#ifdef SCANNER_DEBUG
						fprintf(stderr, "%s\n", token_name[token.type]);
					#endif
					return OK;
				}
				else if (c == '\\')   // string literal continues on another line or character constant
					state = STRING_BACKSLASH;
				else if (c != '\n')
				{
					strAddChar(buffer, c);
				}
				else
				{
					lex_error("String literal not closed.\n");
				}
			break;

			case STRING_BACKSLASH:
				state = STRING;

				if (c == '\\')
				{
					strAddChar(buffer, '\\');
				}
				else if (c == 'n')
				{
					strAddChar(buffer, '\n');
				}
				else if (c == 't')
				{
					strAddChar(buffer, '\t');
				}
				else if (c == '"')
				{
					strAddChar(buffer, '"');
				}
				else if (c == 'x')
				{
					state = STRING_HEXA;
				}
				else if (c == 'b')
				{
					state = STRING_BINARY;
				}
				else if (c == '0')
				{
					state = STRING_OCTA;
				}
				else if (c == '\n')
				{
					// do nothing, string continues on next line - TODO: zdokumentovat upravu
				}
				else
				{
					lex_error("Escape sequence '\\%c' unknown.\n", c);
				}
			break;

			case STRING_HEXA:
				if (j < 2 && isxdigit(c)) // 2 is max hexadecimal escape length
				{
					literal[j] = c;
					j++;
				}
				else if (j == 0) // no valid hexadecimal digit after \x -> error
				{
					lex_error("'\\x%c' is not valid hexadecimal escape sequence.\n", c);
				}
				else  // end of hexadecimal escape
				{
					literal[j] = '\0';
					escape_seq = strtol(literal, NULL, 16);  // will always be successful

					if (escape_seq == 0)
					{
						lex_error("\\x00 is not allowed hexadecimal escape sequence.\n");
					}

					strAddChar(buffer, escape_seq);

					ungetc(c, in);   // return currently read char
					j = 0;

					state = STRING;
				}
			break;

			case STRING_BINARY:
				if (j < 8 && (c == '0' || c == '1')) // 8 is max binary escape length
				{
					literal[j] = c;
					j++;
				}
				else if (j == 0) // no valid binary digit after \b -> error
				{
					lex_error("'\\b%c' is not valid binary escape sequence.\n", c);
				}
				else  // end of binary escape
				{
					literal[j] = '\0';
					escape_seq = strtol(literal, NULL, 2);  // will always be successful

					if (escape_seq == 0)
					{
						lex_error("\\b00000000 is not allowed binary escape sequence.\n");
					}

					strAddChar(buffer, escape_seq);

					ungetc(c, in);   // return currently read char
					j = 0;

					state = STRING;
				}
			break;

			case STRING_OCTA:
				if (j < 3 && c >= '0' && c <= '7') // 3 is max octal escape length
				{
					literal[j] = c;
					j++;
				}
				else if (j == 0) // no valid octal digit after \0 -> error
				{
					lex_error("'\\0%c' is not valid octal escape sequence.\n", c);
				}
				else  // end of octal escape
				{
					literal[j] = '\0';
					escape_seq = strtol(literal, NULL, 8);  // will always be successful

					if (escape_seq == 0)
					{
						lex_error("\\000 is not allowed octal escape sequence.\n");
					}
					else if (escape_seq > 255)
					{
						lex_error("Octal escape '\\0%s' bigger than 255.\n", literal);
					}

					strAddChar(buffer, escape_seq);

					ungetc(c, in);   // return currently read char
					j = 0;

					state = STRING;
				}
			break;

			default:
				lex_error("Scanner panic!!!\n");
			break;
		} // end_switch
	} // end_while
	return 0;
}
Beispiel #3
0
   basic_str_wrap( Char_T const* str )
       :fst(str)
	   ,lst( strLast(str) )
	   ,cstr(str)
   {}