// returns the end point of the graphematical descriptors int CLemWord::ProcessGraphematicalDescriptors(const char* LineStr) { size_t MorphSignPos = GetMorphSignPosition(LineStr); if (MorphSignPos == -1) MorphSignPos = strlen(LineStr); m_GraDescrs = parse_gra_descriptors(string (LineStr, MorphSignPos).c_str(), m_UnparsedGraphemDescriptorsStr); m_bSpace = HasDes(OSpc) || HasDes(OEOLN) || ((BYTE)m_strWord[0] == StupidSymbol1) || ( ((BYTE)m_strWord[0] == '_') && (m_strWord.length() == 1)); if (HasDes (OUpLw)) m_Register = UpLow; else if (HasDes (OUp)) m_Register = UpUp; else if (HasDes (OLw)) m_Register = LowLow; else m_Register = AnyRegister; m_bComma = (m_strWord.length() == 1) && (m_strWord[0] == ','); m_bDash = (m_strWord.length() == 1) && (m_strWord[0] == '-'); bool bRomanNumber = is_roman_number(m_strWord.c_str(), m_strWord.length() ); size_t hyphen_occur = m_strWord.find("-"); if ((hyphen_occur != string::npos) && (hyphen_occur!=0)) { // "ѕавла I-го" // "I-го" - одно слово bRomanNumber = is_roman_number(m_strWord.c_str(), hyphen_occur); }; if (bRomanNumber) m_GraDescrs |= _QM(ORoman); m_bWord = !bRomanNumber && (HasDes(ORLE) || HasDes(OLLE)); return (int)MorphSignPos; }
bool CPlmLine :: LoadPlmLineFromString (string LineStr, bool bStartLine, const CAgramtab* pRusGramTab) { m_bToDelete = false; m_bQuoteMarks = false; if (!bStartLine) { m_bHomonym = (LineStr[0] == ' '); Trim(LineStr); SetWord(::GetWordForm(LineStr) ); } else { m_bHomonym = false; SetWord( "" ); }; long i = 1; // the first char can be a space (if this line contains a homonym) for (; i < LineStr.length(); i++) if (!isspace((BYTE) LineStr[i]) == 0) break; if (sscanf (LineStr.c_str()+i, "%i %i", &m_FilePosition, &m_TokenLengthInFile) != 2) return false; // pass all numbers for (; i < LineStr.length(); i++) if ( (isdigit((BYTE) LineStr[i]) == 0) && (isspace((BYTE) LineStr[i]) == 0) && (((BYTE)LineStr[i]) != '-') ) break; int MorphSignPos = GetMorphSignPosition(LineStr.c_str()+i); if (MorphSignPos == -1) MorphSignPos = LineStr.length(); else MorphSignPos += i; // make MorphSignPos an absolute offset in LineStr m_GraphDescr = LineStr.substr (i, MorphSignPos - i); /* вставим пробел в начало, потому что часто ищут графету с пробелом в начале, например, " ЛЕ"*/ m_GraphDescr = " " + m_GraphDescr; if (MorphSignPos != LineStr.length()) { StringTokenizer tok(LineStr.c_str()+MorphSignPos," "); if (!tok() ) return false; string MorphSign = tok.val(); if (MorphSign.length() != 3) return false; m_MorphSign = MorphSign[0]; m_CommonGramCode = MorphSign.substr(1); if (!tok() ) return false; m_Lemma = tok.val(); if (m_Lemma.empty()) return false; if (!tok() ) return false; SetGramCodes ( tok.val(), pRusGramTab); if (!tok() ) return false; m_ParadigmId = tok.val(); if (!tok() ) return false; m_HomoWeight = tok.val(); }; m_TokenType = OTHER_TOKEN_TYPE; for (int k=(int)RLE; k < OTHER_TOKEN_TYPE; k++) if (init_flag (m_GraphDescr, TokenTypeToString((MainTokenTypeEnum)k).c_str() )) { m_TokenType = (MainTokenTypeEnum)k; break; }; if (init_flag (m_GraphDescr, "Aa")) m_Register = UpLow; else if (init_flag (m_GraphDescr, "AA")) m_Register = UpUp; else { init_flag (m_GraphDescr, "aa"); m_Register = LowLow; }; m_bFirstUpperAlpha = (m_Register == UpUp) || (m_Register == UpLow); m_bFI1 = init_flag (m_GraphDescr, "FAM1"); m_bFI2 = init_flag (m_GraphDescr, "FAM2"); m_bName = init_flag (m_GraphDescr, "NAM?"); m_bSent2 = init_flag (m_GraphDescr, "SENT_END"); int hyphen_occur = m_Word.find("-"); m_bHyphenWord = (hyphen_occur != string::npos) && ( (m_TokenType == RLE) ||(m_TokenType == LLE)); m_bOborot1 = (m_GraphDescr.find("EXPR1") != string::npos); m_bOborot2 = (m_GraphDescr.find("EXPR2") != string::npos); bool bRomanNumber = is_roman_number(m_Word.c_str(), m_Word.length()); if ((hyphen_occur != string::npos) && (hyphen_occur!=0)) { // "Павла I-го" // "I-го" - одно слово bRomanNumber = is_roman_number(m_Word.c_str(), hyphen_occur); }; if (bRomanNumber) { m_TokenType = ROMAN_NUM; m_CommonGramCode = ""; m_MorphSign = 0; m_ParadigmId = ""; }; Trim(m_GraphDescr); return true; };