void ATextGenerator::generateRandomWord(AOutputBuffer& target, size_t len /* = 0x1 */) { if (len < 1) return; //a_Lead with consonant --len; AString str, strLast("-", 1); generateRandomString(str, 1, AConstant::CHARSET_LOWERCASE_CONSONANTS); str.makeUpper(); target.append(str); bool boolConsonant = false; while (len > 0) { if (boolConsonant) { //a_Add a consonant do { str.clear(); generateRandomString(str, 1, AConstant::CHARSET_LOWERCASE_CONSONANTS); } while(str.at(0) == strLast.at(0) && ARandomNumberGenerator::get().nextU1() > 220); target.append(str); --len; strLast = str; boolConsonant = false; } else { //a_Add a vowel int twice = 0; do { do { str.clear(); generateRandomString(str, 1, AConstant::CHARSET_LOWERCASE_VOWELS); } while(str.at(0) == strLast.at(0) && ARandomNumberGenerator::get().nextU1() > 130); target.append(str); --len; strLast = str; } while (len > 0 && twice++ < 2 && ARandomNumberGenerator::get().nextU1() > 225); boolConsonant = true; } } }
int get_token() { typedef enum { INIT, OPERATOR, SLASH, STRING, STRING_BACKSLASH, STRING_HEXA, STRING_BINARY, STRING_OCTA, NUMBER, FLOAT, FLOAT_EXP, ID, ID_KEYWORD, LINE_COMMENT, BLOCK_COMMENT, BLOCK_COMMENT_END, BASE_EXT, BINARY, OCTA, HEXA } Tstate; Tstate state = INIT; int c; int j = 0; int ret_val = 0; int escape_seq = 0; //char *check; strClear(buffer); token.type = TT_ERR; while ((c = fgetc(in))) { if (c == '\n') { row++; col = 0; } else col++; if (c == EOF) { token.type = TT_EOF; return EOF; } #ifdef SCANNER_DEBUG fprintf(stderr, "%s (%s)", fsm_states[state], strGetStr(buffer)); if (strFirst(buffer) == '\0') fprintf(stderr, "\n"); else fprintf(stderr, " -> "); #endif // DEBUG switch(state) { case INIT: if (c == '/') // comment or operator { state = SLASH; strAddChar(buffer, c); } else if (is_operator(c)) { state = OPERATOR; strAddChar(buffer, c); } else if (c == '"') // string literal { state = STRING; } else if (c == '\\') // x, b, 0 literals supported - BASE { state = BASE_EXT; } else if (isdigit(c)) // number -> integer or double literal { state = NUMBER; strAddChar(buffer, c); } else if (c == '_') // id { state = ID; strAddChar(buffer, c); } else if (isalpha(c)) // alphabetic char -> id or keyword { state = ID_KEYWORD; strAddChar(buffer, c); } else if ((ret_val = is_delimiter(c))) { token.type = TYPE_DELIMITER + ret_val - 1; #ifdef SCANNER_DEBUG fprintf(stderr, "%s\n", token_name[token.type]); #endif return OK; } else if (!isspace(c)) // non valid character { lex_error("Unknown character: '%c'.\n", c); } break; case BASE_EXT: if (c == 'b') { state = BINARY; } else if (c == '0') { state = OCTA; } else if (c == 'x') { state = HEXA; } else lex_error("Unknown character in literal '\\%c'.\n", c); break; case HEXA: if (isxdigit(c)) { if (j < 8) // 8 hexadecimal digits are max int value { literal[j] = c; j++; } else lex_error("Hexadecimal literal too long -> int overflow!\n"); } else { ungetc(c, in); token.type = TT_VALUE_INT; literal[j] = '\0'; token.value_int = (int) strtol(literal, NULL, 16); // cannot fail if (token.value_int < 0) lex_warning("Hexadecimal literal '\\x%s' overflow to negative number %d\n", literal, token.value_int); return OK; } break; case OCTA: if (c >= '0' && c <= '7') { if (j < 12) // max int = \0 7777 7777 7777 { literal[j] = c; j++; } else lex_error("Octal literal too long -> int overflow!\n"); } else { ungetc(c, in); token.type = TT_VALUE_INT; literal[j] = '\0'; token.value_int = (int) strtol(literal, NULL, 8); if (token.value_int < 0) lex_warning("Octal literal '\\0%s' overflow to negative number %d\n", literal, token.value_int); return OK; } break; case BINARY: if ((c == '0' || c == '1')) { if (j < 32) { literal[j] = c; j++; } else lex_error("Binary literal too long -> int overflow!\n"); } else { ungetc(c, in); token.type = TT_VALUE_INT; literal[j] = '\0'; token.value_int = (int) strtol(literal, NULL, 2); if (token.value_int < 0) lex_warning("Binary literal '\\b%s' overflow to negative number %d\n", literal, token.value_int); return OK; } break; case ID_KEYWORD: if (isalpha(c)) // add another char into buffer { strAddChar(buffer, c); } else if (c == '_' || isdigit(c)) // id - these chars are not in any keyword { state = ID; strAddChar(buffer, c); } else // end of id or keyword { ungetc(c, in); // return last read char to buffer ret_val = is_keyword(strGetStr(buffer)); if (ret_val) { token.type = TYPE_KEYWORD + ret_val - 1; // magic #ifdef SCANNER_DEBUG fprintf(stderr, "%s\n", token_name[token.type]); #endif return OK; } else { token.type = TT_ID; token.p_string = strGetStr(buffer); #ifdef SCANNER_DEBUG fprintf(stderr, "%s\n", token_name[token.type]); #endif return OK; } } break; case ID: if (isalnum(c) || c == '_') { strAddChar(buffer, c); } else { ungetc(c, in); token.type = TT_ID; token.p_string = strGetStr(buffer); #ifdef SCANNER_DEBUG fprintf(stderr, "%s\n", token_name[token.type]); #endif return OK; } break; case SLASH: if (c == '/') { state = LINE_COMMENT; } else if (c == '*') { state = BLOCK_COMMENT; } else // it was division { ungetc(c, in); token.type = TT_DIVIDE; #ifdef SCANNER_DEBUG fprintf(stderr, "%s\n", token_name[token.type]); #endif return OK; } break; case OPERATOR: // not precisely "normal" fsm, but easily extensible (just add operator to operators[] and Ttoken_type) if (is_operator(c)) // c is one of valid chars, that can be in operator { strAddChar(buffer, c); ret_val = determine_operator(strGetStr(buffer)); // check if we still have valid operator in buffer if (!ret_val) // if it's not valid operator { ungetc(c, in); // return last char, it was not part of operator strDelChar(buffer); // delete wrong char from buffer ret_val = determine_operator(strGetStr(buffer)); // determine which operator we have token.type = TYPE_OPERATOR + ret_val - 1; // return token #ifdef SCANNER_DEBUG fprintf(stderr, "%s\n", token_name[token.type]); #endif return OK; } // continue with loading chars if it's valid } else // another char is not operator -> end { ungetc(c, in); ret_val = determine_operator(strGetStr(buffer)); if (ret_val) { token.type = TYPE_OPERATOR + ret_val - 1; #ifdef SCANNER_DEBUG fprintf(stderr, "%s\n", token_name[token.type]); #endif return OK; } else // shouldn't occur, just to be sure.. { lex_error("Unknown operator: '%s'.\n", strGetStr(buffer)); } } break; case LINE_COMMENT: if (c == '\n') // end of line comment { state = INIT; strClear(buffer); } break; case BLOCK_COMMENT: if (c == '*') // possible end of comment state = BLOCK_COMMENT_END; break; case BLOCK_COMMENT_END: if (c == '/') // comment ended { state = INIT; strClear(buffer); } else // false alarm - comment continues state = BLOCK_COMMENT; break; case NUMBER: if (isdigit(c)) { strAddChar(buffer, c); } else if (c == '.') { strAddChar(buffer, c); state = FLOAT; } else if (tolower(c) == 'e') { strAddChar(buffer, c); state = FLOAT_EXP; } else { ungetc(c, in); token.type = TT_VALUE_INT; token.value_int = (int) strtol(strGetStr(buffer), NULL, 10); #ifdef SCANNER_DEBUG fprintf(stderr, "%s\n", token_name[token.type]); #endif return OK; } break; case FLOAT: // aspoň jedna číslice! if (isdigit(c)) { strAddChar(buffer, c); } else if (tolower(c) == 'e') { strAddChar(buffer, c); state = FLOAT_EXP; } else { ungetc(c, in); token.type = TT_VALUE_DOUBLE; token.value_double = strtod(strGetStr(buffer), NULL); //&check); #ifdef SCANNER_DEBUG fprintf(stderr, "%s\n", token_name[token.type]); #endif return OK; } break; case FLOAT_EXP: if (isdigit(c)) { strAddChar(buffer, c); } else if (tolower(strLast(buffer)) == 'e' && (c == '+' || c == '-')) // optional +/- after e/E { strAddChar(buffer, c); } else { ungetc(c, in); token.type = TT_VALUE_DOUBLE; token.value_double = strtod(strGetStr(buffer), NULL); //&check); #ifdef SCANNER_DEBUG fprintf(stderr, "%s\n", token_name[token.type]); #endif return OK; } break; case STRING: if (c == '"') // end of string literal { token.type = TT_VALUE_STRING; token.p_string = strGetStr(buffer); #ifdef SCANNER_DEBUG fprintf(stderr, "%s\n", token_name[token.type]); #endif return OK; } else if (c == '\\') // string literal continues on another line or character constant state = STRING_BACKSLASH; else if (c != '\n') { strAddChar(buffer, c); } else { lex_error("String literal not closed.\n"); } break; case STRING_BACKSLASH: state = STRING; if (c == '\\') { strAddChar(buffer, '\\'); } else if (c == 'n') { strAddChar(buffer, '\n'); } else if (c == 't') { strAddChar(buffer, '\t'); } else if (c == '"') { strAddChar(buffer, '"'); } else if (c == 'x') { state = STRING_HEXA; } else if (c == 'b') { state = STRING_BINARY; } else if (c == '0') { state = STRING_OCTA; } else if (c == '\n') { // do nothing, string continues on next line - TODO: zdokumentovat upravu } else { lex_error("Escape sequence '\\%c' unknown.\n", c); } break; case STRING_HEXA: if (j < 2 && isxdigit(c)) // 2 is max hexadecimal escape length { literal[j] = c; j++; } else if (j == 0) // no valid hexadecimal digit after \x -> error { lex_error("'\\x%c' is not valid hexadecimal escape sequence.\n", c); } else // end of hexadecimal escape { literal[j] = '\0'; escape_seq = strtol(literal, NULL, 16); // will always be successful if (escape_seq == 0) { lex_error("\\x00 is not allowed hexadecimal escape sequence.\n"); } strAddChar(buffer, escape_seq); ungetc(c, in); // return currently read char j = 0; state = STRING; } break; case STRING_BINARY: if (j < 8 && (c == '0' || c == '1')) // 8 is max binary escape length { literal[j] = c; j++; } else if (j == 0) // no valid binary digit after \b -> error { lex_error("'\\b%c' is not valid binary escape sequence.\n", c); } else // end of binary escape { literal[j] = '\0'; escape_seq = strtol(literal, NULL, 2); // will always be successful if (escape_seq == 0) { lex_error("\\b00000000 is not allowed binary escape sequence.\n"); } strAddChar(buffer, escape_seq); ungetc(c, in); // return currently read char j = 0; state = STRING; } break; case STRING_OCTA: if (j < 3 && c >= '0' && c <= '7') // 3 is max octal escape length { literal[j] = c; j++; } else if (j == 0) // no valid octal digit after \0 -> error { lex_error("'\\0%c' is not valid octal escape sequence.\n", c); } else // end of octal escape { literal[j] = '\0'; escape_seq = strtol(literal, NULL, 8); // will always be successful if (escape_seq == 0) { lex_error("\\000 is not allowed octal escape sequence.\n"); } else if (escape_seq > 255) { lex_error("Octal escape '\\0%s' bigger than 255.\n", literal); } strAddChar(buffer, escape_seq); ungetc(c, in); // return currently read char j = 0; state = STRING; } break; default: lex_error("Scanner panic!!!\n"); break; } // end_switch } // end_while return 0; }
basic_str_wrap( Char_T const* str ) :fst(str) ,lst( strLast(str) ) ,cstr(str) {}