void WordToken::PrintVerbose(std::ostream& out) const { out << '['; PrettyPrintTextString(out); out << "]" << (GetWord()->IsNewWord() ? "* " : " ") << " [" << LexString() << "] " << Token2String(token) << ' ' << offset << std::endl; }
void LX_Fetch (void) { int c; pr_tokenclass = TK_NONE; pr_token[0] = 0; if (!pr_file_p) { pr_token_type = tt_eof; return; } LexWhitespace(); c = *pr_file_p; switch (ASCIIToChrCode[c]) { case CHR_LETTER: LexName(); return; case CHR_NUMBER: pr_token_type = tt_immediate; pr_immediate_type = &type_float; pr_immediate._float = LexNumber(); return; case CHR_DQUOTE: LexString(); return; case CHR_SQUOTE: LexVector(); return; case CHR_DOLLARSIGN: LexGrab(); return; case CHR_EOF: pr_token_type = tt_eof; return; case CHR_SPECIAL: default: LexPunctuation(); return; } }
void WordToken::Print(std::ostream& out, bool printSpace) const { if (xOutputWTL) { if (GetWord()) out << LexString() << tab << SelectedTag() << tab << LemmaString() << std::endl; else Message(MSG_WARNING, "no word"); return; } bool printTag = xPrintSelectedTag; const Word *w = GetWord(); if (w) { if (xPrintHTML) out << Str2html(RealString()); else PrettyPrintTextString(out); if (xPrintWordInfo) { out << " ["; if (w->IsNewWord()) ((NewWord*)w)->PrintInfo(out); else w->PrintInfo(out); out << ' ' << GetToken() << ']'; if (IsFirstInSentence()) out << '~'; } } else out << "(NULL word-token)"; if (printTag) { if(xPrintAllWordTags) { w->PrintTags(); } else out << tab << SelectedTag() << tab; } if (xPrintLemma) { out << LemmaString() << tab; } if (xPrintOneWordPerLine || xPrintWordInfo) out << xEndl; else if (printSpace && HasTrailingSpace()) out << ' '; }
/* RGS 11/12/93: Made format string parser re-entrant: * Needed for lazy evaluation of named formatters */ CONST_FORMAT_PTR ParseFormatString(const char *FormString) { CONST_FORMAT_PTR Formatter; BOOLEAN error = FALSE; Format_Parse_Type parser; initFormatParser(&parser); parser.ParseString = FormString; LexString(&parser, FormString); parser.TokenListHead = parser.TokenList; Formatter = Parse(&parser, FALSE, &error); if (error) Formatter = new_f_formatter(BadFormatFMT, NULL); FreeTokenList(parser.TokenListHead); clearFormatParser(&parser); return Formatter; }
//////////////////////////////////////////////////////////////////////////////// // LexOther: Lex the token types known to subclass (must be defined by subclass) // Return value: // RegularExprToken::Type The type of the matched token. // // Note: May throw an integer to indicate error. //////////////////////////////////////////////////////////////////////////////// RegularExprToken::Type RegularExprLexerChar::LexOther() { switch (current) { case 'd': Consume(); return RegularExprToken::Digit; case 'D': Consume(); return RegularExprToken::NotDigit; case 'a': Consume(); return RegularExprToken::Alpha; case 'A': Consume(); return RegularExprToken::NotAlpha; case 's': Consume(); return RegularExprToken::Whitespace; case 'S': Consume(); return RegularExprToken::NotWhitespace; case 'w': Consume(); return RegularExprToken::AlphaNumeric; case 'W': Consume(); return RegularExprToken::NotAlphaNumeric; case '"': return LexString(); default: // fall through break; } if (current != '\'') { // Will be caught by NextToken() throw 1; } // Skip opening ' Skip(); //The next character can't be a '. They need to escape it with a '\' if that's //what they intend if (current == '\'') { throw 1; } //Check for the escaped single quote and backslash here if (current == '\\') { //Skip the backslash Skip(); if (current != '\'' && current != '\\') { //whoops, it wasn't an escaped character after all. Put the backslash back in! tokenText += '\\'; } else { //It is an escaped character. Consume it here so the following code doesn't end //the token with no text Consume(); } } else { //Consume our one character Consume(); } // Must end with a ' if (current != '\'') { throw 1; } // Skip closing ' Skip(); return RegularExprToken::Literal; }
TokenVector PreprocessString(char* string){ PrepocContext context = {0}; TokenVector lexedFile = LexString(string); for(int i = 0; i < lexedFile.length; i++){ Token tok = lexedFile.vals[i]; //printf("Tok: '%.*s'\n", tok.length, tok.start); if(TOKEN_IS(tok, "#")){ TokenVector directiveToks = GetAndRemoveDirectiveTokens(&lexedFile, i); //printf("Directive toks (length %d):\n", directiveToks.length); ParseDirective(directiveToks, &lexedFile, i, &context); i--; } else if(context.ifLevels > 0){ //printf("Removing token: '%.*s'\n", tok.length, tok.start); VectorRemoveToken(&lexedFile, i); i--; } else{ int expandDef = 0; for(int j = 0; j < context.simpleDefs.length; j++){ if(TokenEqual(tok, context.simpleDefs.vals[j].name)){ VectorRemoveToken(&lexedFile, i); VectorInsertVectorToken(&lexedFile, i, context.simpleDefs.vals[j].val); expandDef = 1; break; } } for(int j = 0; j < context.funcDefs.length; j++){ if(TokenEqual(tok, context.funcDefs.vals[j].name)){ //printf("FOUND ARG FUNC.\n"); int endIdx = i + 1; while(!TOKEN_IS(lexedFile.vals[endIdx], ")")){ endIdx++; } TokenVectorVector argList = {0}; TokenVector emptyVec = {0}; VectorAddTokenVector(&argList, emptyVec); int argIdx = 0; for(int k = i + 2; k < endIdx; k++){ if(TOKEN_IS(lexedFile.vals[k], ",")){ argIdx++; VectorAddTokenVector(&argList, emptyVec); } else{ VectorAddToken(&argList.vals[argIdx], lexedFile.vals[k]); } } for(int k = i; k <= endIdx; k++){ VectorRemoveToken(&lexedFile, i); } for(int funcIdx = context.funcDefs.vals[j].result.length - 1; funcIdx >= 0; funcIdx--){ int isArg = 0; for(int argIdx = 0; argIdx < context.funcDefs.vals[j].args.length; argIdx++){ if(TokenEqual(context.funcDefs.vals[j].args.vals[argIdx], context.funcDefs.vals[j].result.vals[funcIdx])){ VectorInsertVectorToken(&lexedFile, i, argList.vals[argIdx]); isArg = 1; break; } } if (!isArg) { //printf("lexedFile.length ~: %d\n", lexedFile.length); VectorInsertToken(&lexedFile, i, context.funcDefs.vals[j].result.vals[funcIdx]); } } expandDef = 1; break; } } if(expandDef){ i--; } } } return lexedFile; }