Beispiel #1
0
void WordToken::PrintVerbose(std::ostream& out) const {
  out << '[';
  PrettyPrintTextString(out);
  out << "]"
      << (GetWord()->IsNewWord() ? "* " : " ") 
      << " [" << LexString() << "] "
      << Token2String(token) << ' ' 
      << offset << std::endl;
}
void LX_Fetch (void)
{
	int		c;

	pr_tokenclass = TK_NONE;

	pr_token[0] = 0;

	if (!pr_file_p)
	{
		pr_token_type = tt_eof;
		return;
	}

	LexWhitespace();

	c = *pr_file_p;

	switch (ASCIIToChrCode[c])
	{
	case CHR_LETTER:
		LexName();
		return;
	case CHR_NUMBER:
		pr_token_type = tt_immediate;
		pr_immediate_type = &type_float;
		pr_immediate._float = LexNumber();
		return;
	case CHR_DQUOTE:
		LexString();
		return;
	case CHR_SQUOTE:
		LexVector();
		return;
	case CHR_DOLLARSIGN:
		LexGrab();
		return;
	case CHR_EOF:
		pr_token_type = tt_eof;
		return;
	case CHR_SPECIAL:
	default:
		LexPunctuation();
		return;
	}
}
Beispiel #3
0
void WordToken::Print(std::ostream& out, bool printSpace) const {
  if (xOutputWTL) {
    if (GetWord())
      out << LexString() << tab << SelectedTag() << tab << LemmaString() << std::endl;
    else
      Message(MSG_WARNING, "no word");
    return;
  }

  bool printTag = xPrintSelectedTag;

  const Word *w = GetWord();
  if (w) {
    if (xPrintHTML)
      out << Str2html(RealString());
    else
      PrettyPrintTextString(out);
    
    if (xPrintWordInfo) {
      out << " [";
      if (w->IsNewWord())
	((NewWord*)w)->PrintInfo(out);
      else
	w->PrintInfo(out);
      out << ' ' << GetToken() << ']';
      if (IsFirstInSentence())
	out << '~';
    }
  } else
    out << "(NULL word-token)";
  if (printTag) {
    if(xPrintAllWordTags) {
      w->PrintTags();
    } else 
      out << tab << SelectedTag() << tab;

  }
  if (xPrintLemma) {
    out << LemmaString() << tab;
  }
  if (xPrintOneWordPerLine || xPrintWordInfo)
    out << xEndl;
  else if (printSpace && HasTrailingSpace())
    out << ' ';
}
Beispiel #4
0
/* RGS 11/12/93: Made format string parser re-entrant: 
 *               Needed for lazy evaluation of named formatters
 */
CONST_FORMAT_PTR ParseFormatString(const char *FormString)
{ 
  CONST_FORMAT_PTR Formatter;
  BOOLEAN error = FALSE;
  Format_Parse_Type parser;
  
  initFormatParser(&parser);
  
  parser.ParseString = FormString;
  LexString(&parser, FormString);
  parser.TokenListHead = parser.TokenList;
  Formatter = Parse(&parser, FALSE, &error);
  if (error) Formatter = new_f_formatter(BadFormatFMT, NULL);
  FreeTokenList(parser.TokenListHead);
  
  clearFormatParser(&parser);

  return Formatter;
}
	////////////////////////////////////////////////////////////////////////////////
	// LexOther:  Lex the token types known to subclass (must be defined by subclass)
	// Return value:
	//	RegularExprToken::Type		The type of the matched token.
	//
	// Note: May throw an integer to indicate error.
	////////////////////////////////////////////////////////////////////////////////
	RegularExprToken::Type RegularExprLexerChar::LexOther()
	{
		switch (current) {
		case 'd':
			Consume();
			return RegularExprToken::Digit;
		case 'D':
			Consume();
			return RegularExprToken::NotDigit;
		case 'a':
			Consume();
			return RegularExprToken::Alpha;
		case 'A':
			Consume();
			return RegularExprToken::NotAlpha;
		case 's':
			Consume();
			return RegularExprToken::Whitespace;
		case 'S':
			Consume();
			return RegularExprToken::NotWhitespace;
		case 'w':
			Consume();
			return RegularExprToken::AlphaNumeric;
		case 'W':
			Consume();
			return RegularExprToken::NotAlphaNumeric;
		case '"':
			return LexString();
		default:
			// fall through
			break;
		}

		if (current != '\'') {
			// Will be caught by NextToken()
			throw 1;
		}
		// Skip opening ' 
		Skip();

		//The next character can't be a '.  They need to escape it with a '\' if that's
		//what they intend
		if (current == '\'') {
			throw 1;
		}
		//Check for the escaped single quote and backslash here
		if (current == '\\') {
			//Skip the backslash
			Skip();
			if (current != '\'' && current != '\\') {
				//whoops, it wasn't an escaped character after all.  Put the backslash back in!
				tokenText += '\\';
			} else {
				//It is an escaped character.  Consume it here so the following code doesn't end
				//the token with no text
				Consume();
			}
		} else {
			//Consume our one character
			Consume();
		}

		// Must end with a '
		if (current != '\'') {
			throw 1;
		}
		
		// Skip closing '
		Skip();
		
		return RegularExprToken::Literal;
	}
Beispiel #6
0
TokenVector PreprocessString(char* string){
	
	PrepocContext context = {0};
	
	TokenVector lexedFile = LexString(string);
	
	for(int i = 0; i < lexedFile.length; i++){
		Token tok = lexedFile.vals[i];
		//printf("Tok: '%.*s'\n", tok.length, tok.start);
		if(TOKEN_IS(tok, "#")){
			TokenVector directiveToks = GetAndRemoveDirectiveTokens(&lexedFile, i);
			//printf("Directive toks (length %d):\n", directiveToks.length);
			ParseDirective(directiveToks, &lexedFile, i, &context);
			i--;
		}
		else if(context.ifLevels > 0){
			//printf("Removing token: '%.*s'\n", tok.length, tok.start);
			VectorRemoveToken(&lexedFile, i);
			i--;
		}
		else{
			int expandDef = 0;
			for(int j = 0; j < context.simpleDefs.length; j++){
				if(TokenEqual(tok, context.simpleDefs.vals[j].name)){
					VectorRemoveToken(&lexedFile, i);
					VectorInsertVectorToken(&lexedFile, i, context.simpleDefs.vals[j].val);
					expandDef = 1;
					break;
				}
			}
			for(int j = 0; j < context.funcDefs.length; j++){
				if(TokenEqual(tok, context.funcDefs.vals[j].name)){
					//printf("FOUND ARG FUNC.\n");
					int endIdx = i + 1;
					while(!TOKEN_IS(lexedFile.vals[endIdx], ")")){
						endIdx++;
					}
					
					TokenVectorVector argList = {0};
					
					TokenVector emptyVec = {0};
					VectorAddTokenVector(&argList, emptyVec);
					
					int argIdx = 0;
					for(int k = i + 2; k < endIdx; k++){
						if(TOKEN_IS(lexedFile.vals[k], ",")){
							argIdx++;
							VectorAddTokenVector(&argList, emptyVec);
						}
						else{
							VectorAddToken(&argList.vals[argIdx], lexedFile.vals[k]);
						}
					}
					
					for(int k = i; k <= endIdx; k++){
						VectorRemoveToken(&lexedFile, i);
					}
					
					
					for(int funcIdx = context.funcDefs.vals[j].result.length - 1; funcIdx >= 0; funcIdx--){
						int isArg = 0;
						for(int argIdx = 0; argIdx < context.funcDefs.vals[j].args.length; argIdx++){
							if(TokenEqual(context.funcDefs.vals[j].args.vals[argIdx], context.funcDefs.vals[j].result.vals[funcIdx])){
								VectorInsertVectorToken(&lexedFile, i, argList.vals[argIdx]);
								isArg = 1;
								break;
							}
						}

						if (!isArg) {
							//printf("lexedFile.length ~: %d\n", lexedFile.length);
							VectorInsertToken(&lexedFile, i, context.funcDefs.vals[j].result.vals[funcIdx]);
						}
					}
					
					expandDef = 1;
					break;
				}
			}
			
			if(expandDef){
				i--;
			}
		}
	}
	
	return lexedFile;
}