Token & ReadToken(TokenType type) { if (pos >= tokens.Count()) { errors.Add(CompileError(TokenTypeToString(type) + String(L" expected but end of file encountered."), 0, CodePosition(0, 0, fileName))); throw 0; } else if (tokens[pos].Type != type) { errors.Add(CompileError(TokenTypeToString(type) + String(L" expected"), 20001, tokens[pos].Position)); throw 20001; } return tokens[pos++]; }
string CPlmLine :: GetStr () const { string Result = m_Word; if (m_bHomonym) Result = " "+Result; Result += " "; Result += Format (" %i %i ", m_FilePosition, m_TokenLengthInFile); if (m_TokenType != OTHER_TOKEN_TYPE) { MainTokenTypeEnum t = m_TokenType; if (t == ROMAN_NUM) t = NUM; Result += " "; Result += TokenTypeToString(t); Result += " "; } if ( (m_TokenType == RLE) || (m_TokenType == LLE) ) { if (m_Register == LowLow) Result += " aa "; else if (m_Register == UpLow) Result += " Aa "; else if (m_Register == UpUp) Result += " AA "; }; Result += m_GraphDescr + " "; if (m_bQuoteMarks) Result += " #QUOTED "; if (m_bFI1) Result += " FAM1 "; if (m_bFI2) Result += " FAM2 "; if (m_bName) Result += " NAM? "; if (m_bSent2) Result += " SENT_END "; if (m_MorphSign != 0) { Result += m_MorphSign+m_CommonGramCode + " " + m_Lemma + " " + m_GramCodes + " "; Result += m_ParadigmId + " " + m_HomoWeight; }; return Result; };
//============================================================================== void ExpressionParser::ThrowInvalidTokenError_(const ExpressionParser::Token &token, const std::string& descriptionText) { FlushOutput_(); std::stringstream ss; ss << "unexpected token " << TokenTypeToString(token.type) << " : \"" << token.value << "\" at " << token.line << "-" << token.column; if (!descriptionText.empty()) { ss << ", " << descriptionText; } throw std::logic_error(ss.str()); }
string CGrammarItem::GetDumpString() const { if (!m_bMeta && !m_Token.empty() && m_Attributes.empty() && m_MorphPattern.m_GrmAttribute.empty() ) return Format("'%s'",m_ItemStrId.c_str()); string Attributes; for (map<string, string>::const_iterator it = m_Attributes.begin(); it != m_Attributes.end(); it++) Attributes += Format ("%s=%s ",it->first.c_str(),it->second.c_str()); if (!m_MorphPattern.m_GrmAttribute.empty()) Attributes += Format ("grm=\"%s\" ", m_MorphPattern.m_GrmAttribute.c_str()); if (!m_bCanHaveManyHomonyms) Attributes += "hom=\"no\" "; if (m_Register != AnyRegister) if (m_Register == LowLow) Attributes += "register=\"aa\" "; else if (m_Register == UpLow) Attributes += "register=\"Aa\" "; else Attributes += "register=\"AA\" "; if (m_TokenType != OTHER_TOKEN_TYPE) Attributes += Format("type=\"%s\" ", TokenTypeToString(m_TokenType).c_str()); if (m_bSynMain) Attributes += "root "; Trim(Attributes); string Meta; if (m_bMeta) Meta = m_ItemStrId; else Meta = "TOKEN"; return Format("[%s %s]",Meta.c_str(),Attributes.c_str()); };
bool CPlmLine :: LoadPlmLineFromString (string LineStr, bool bStartLine, const CAgramtab* pRusGramTab) { m_bToDelete = false; m_bQuoteMarks = false; if (!bStartLine) { m_bHomonym = (LineStr[0] == ' '); Trim(LineStr); SetWord(::GetWordForm(LineStr) ); } else { m_bHomonym = false; SetWord( "" ); }; long i = 1; // the first char can be a space (if this line contains a homonym) for (; i < LineStr.length(); i++) if (!isspace((BYTE) LineStr[i]) == 0) break; if (sscanf (LineStr.c_str()+i, "%i %i", &m_FilePosition, &m_TokenLengthInFile) != 2) return false; // pass all numbers for (; i < LineStr.length(); i++) if ( (isdigit((BYTE) LineStr[i]) == 0) && (isspace((BYTE) LineStr[i]) == 0) && (((BYTE)LineStr[i]) != '-') ) break; int MorphSignPos = GetMorphSignPosition(LineStr.c_str()+i); if (MorphSignPos == -1) MorphSignPos = LineStr.length(); else MorphSignPos += i; // make MorphSignPos an absolute offset in LineStr m_GraphDescr = LineStr.substr (i, MorphSignPos - i); /* вставим пробел в начало, потому что часто ищут графету с пробелом в начале, например, " ЛЕ"*/ m_GraphDescr = " " + m_GraphDescr; if (MorphSignPos != LineStr.length()) { StringTokenizer tok(LineStr.c_str()+MorphSignPos," "); if (!tok() ) return false; string MorphSign = tok.val(); if (MorphSign.length() != 3) return false; m_MorphSign = MorphSign[0]; m_CommonGramCode = MorphSign.substr(1); if (!tok() ) return false; m_Lemma = tok.val(); if (m_Lemma.empty()) return false; if (!tok() ) return false; SetGramCodes ( tok.val(), pRusGramTab); if (!tok() ) return false; m_ParadigmId = tok.val(); if (!tok() ) return false; m_HomoWeight = tok.val(); }; m_TokenType = OTHER_TOKEN_TYPE; for (int k=(int)RLE; k < OTHER_TOKEN_TYPE; k++) if (init_flag (m_GraphDescr, TokenTypeToString((MainTokenTypeEnum)k).c_str() )) { m_TokenType = (MainTokenTypeEnum)k; break; }; if (init_flag (m_GraphDescr, "Aa")) m_Register = UpLow; else if (init_flag (m_GraphDescr, "AA")) m_Register = UpUp; else { init_flag (m_GraphDescr, "aa"); m_Register = LowLow; }; m_bFirstUpperAlpha = (m_Register == UpUp) || (m_Register == UpLow); m_bFI1 = init_flag (m_GraphDescr, "FAM1"); m_bFI2 = init_flag (m_GraphDescr, "FAM2"); m_bName = init_flag (m_GraphDescr, "NAM?"); m_bSent2 = init_flag (m_GraphDescr, "SENT_END"); int hyphen_occur = m_Word.find("-"); m_bHyphenWord = (hyphen_occur != string::npos) && ( (m_TokenType == RLE) ||(m_TokenType == LLE)); m_bOborot1 = (m_GraphDescr.find("EXPR1") != string::npos); m_bOborot2 = (m_GraphDescr.find("EXPR2") != string::npos); bool bRomanNumber = is_roman_number(m_Word.c_str(), m_Word.length()); if ((hyphen_occur != string::npos) && (hyphen_occur!=0)) { // "Павла I-го" // "I-го" - одно слово bRomanNumber = is_roman_number(m_Word.c_str(), hyphen_occur); }; if (bRomanNumber) { m_TokenType = ROMAN_NUM; m_CommonGramCode = ""; m_MorphSign = 0; m_ParadigmId = ""; }; Trim(m_GraphDescr); return true; };