void CMultiWordCreator::AddQuoteMultiWord(CWordSequence& ws, const TArticleRef& article) { SWordHomonymNum wh; Wtroka str; CWord* pNewWord = GetWordForMultiWord(ws, str, wh); if (pNewWord->m_SourceWords.Size() == 1 && pNewWord->HasOnlyUnknownPOS()) { size_t firstId = pNewWord->IterHomonyms().GetID(); CHomonym& h = pNewWord->GetRusHomonym(firstId); h.SetSourceWordSequence(&ws); h.PutArticle(article); wh.m_HomNum = firstId; } else { pNewWord->m_SourceWords.SetPair(ws.FirstWord(), ws.LastWord()); if (str.size() == 0) str = pNewWord->m_txt; TMorph::ToLower(str); CHomonym* pNewHom = new CHomonym(TMorph::GetMainLanguage(), str); pNewHom->SetSourceWordSequence(&ws); pNewHom->PutArticle(article); wh.m_HomNum = pNewWord->AddRusHomonym(pNewHom); } if (article.AuxDic().IsValid()) { const article_t* pArt = GlobalDictsHolder->GetAuxArticle(article.AuxDic()); YASSERT(pArt != NULL); AddFoundArticle(pArt->get_kw_type(), pArt->get_title(), wh); } else { YASSERT(!article.Gzt().Empty()); AddFoundArticle(article.Gzt().GetType(), article.Gzt().GetTitle(), wh); } m_wordSequences.push_back(&ws); }
bool CWord::RightPartIsSurname(int& iH, THomonymGrammems& grammems, Wtroka& strLemma) { iH = HasMorphNounWithGrammems_i(TGramBitSet(gSurname)); if (iH != -1) { CHomonym& h = GetRusHomonym(iH); grammems = h.Grammems; strLemma = h.GetLemma(); size_t ii = strLemma.find('-'); YASSERT(ii != Wtroka::npos); strLemma = strLemma.substr(ii + 1); return true; } //if this word is in morphology - do not try to predict if (IsDictionary()) return false; size_t ii = m_txt.find('-'); if (ii == Wtroka::npos) return false; Wtroka strRightPart = m_txt.substr(ii + 1); TMorph::ToLower(strRightPart); yvector<TSurnamePredictor::TPredictedSurname> out; if (!TMorph::PredictSurname(strRightPart, out)) return false; TGrammarBunch newForms; NSpike::ToGrammarBunch(out[0].StemGrammar, out[0].FlexGrammars, newForms); grammems.Reset(newForms); strLemma = out[0].Lemma; return true; }
CWordBase::CWordBase(docLanguage lang,const CPrimGroup &prim,const Wtroka &strWord) : m_lang(lang) { m_bHasAltWordPart = false; m_typ = prim.m_gtyp; m_num = prim.m_prim.size(); if (strWord.size() >= MAXWORD_LEN) SetText(strWord.substr(0, MAXWORD_LEN - 2)); else SetText(strWord); m_pos = prim.m_pos; m_len = prim.m_len; m_bUp = false; m_bHasUnusefulPostfix = false; m_bHasOpenQuote = false; m_bHasCloseQuote = false; m_bSingleOpenQuote = false; m_bSingleCloseQuote = false; m_bIsPunct = false; InitPunc(); }
void ToFirstUpper(Wtroka& str) { if (!str.empty() && ::IsLower(str[0])) *(str.begin()) = static_cast<wchar16>(::ToUpper(str[0])); // TODO: this does not work properly for surrogate pairs (UTF-16) }
bool EqualCiRus(const Wtroka& s1, const char* s2) { static const CodePage& cp = *CodePageByCharset(CODES_WIN); const ui16* w = s1.begin(); for (; w != s1.end() && *s2 != 0; ++w, ++s2) if (::ToLower(*w) != ::ToLower(cp.unicode[static_cast<ui8>(*s2)])) return false; return w == s1.end() && *s2 == 0; }
Wtroka CHomonym::GetLemma() const { if (m_pSourceWordSequence && (m_pSourceWordSequence->Size() > 1)) { Wtroka s = m_pSourceWordSequence->GetLemma(); if (!s.empty()) return s; } return CHomonymBase::GetLemma(); }
Wtroka CHomonym::GetShortLemma() const { Wtroka s = GetLemma(); if (s.size() > 30) { s = s.substr(0, 30); NStr::Append(s , "..."); } return s; }
//Try to predict double-word surname if the last part is found in dictionary. //Then check if the first part is found in dictionary as surname. //If it is not then try predicting it. bool CWord::PredictHyphenSurname() { if (!((m_typ == Hyphen || m_typ == HypDiv || m_typ == DivWord) && m_variant.size() > 0)) return false; size_t ii = m_txt.find('-'); if (ii == Wtroka::npos) return false; //only one hyphen is allowed if (ii != m_txt.rfind('-')) return false; Wtroka strRightPart = m_txt.substr(ii + 1); if (strRightPart.size() < 1 || !::IsUpper(strRightPart[0])) return false; int iH = -1; THomonymGrammems rightPartGrammems; Wtroka strRightPartLemma; if (!RightPartIsSurname(iH, rightPartGrammems, strRightPartLemma)) return false; ii = m_txt.find('-'); //unnecessary call? if (ii == Wtroka::npos) return false; Wtroka strFirstPart = m_txt.substr(0, ii); TMorph::ToLower(strFirstPart); //look in morphology THomonymVector res; TMorph::GetDictHomonyms(strFirstPart, res); bool found = false; for (size_t i = 0; i < res.size(); ++i) { if (!found && res[i]->HasGrammem(gSurname) && NGleiche::Gleiche(res[i]->Grammems, rightPartGrammems, NGleiche::GenderNumberCaseCheck)) { found = true; Wtroka joined_lemma = res[i]->GetLemma() + '-' + strRightPartLemma; AddHyphenSurnameLemma(iH, rightPartGrammems, joined_lemma); } } if (found) return true; //if the word was in morphology then do not do any further predictions if (res.size() > 0) return false; yvector<TSurnamePredictor::TPredictedSurname> out; TMorph::PredictSurname(strFirstPart, out); if (out.size() > 0 && NGleiche::Gleiche(out[0].FlexGrammars, rightPartGrammems.Forms(), NGleiche::GenderNumberCaseCheck)) { Wtroka joined_lemma = out[0].Lemma + '-' + strRightPartLemma; AddHyphenSurnameLemma(iH, rightPartGrammems, joined_lemma); return true; } return false; }
void Strip(Wtroka& w) { const wchar16* p = w.c_str(); const wchar16* pe = p + w.size(); while (p != pe) { if (!IsWhitespace(*p)) { if (p != w.c_str()) { w.erase(w.c_str(), p); } pe = w.c_str() - 1; p = pe + w.size(); while (p != pe) { if (!IsWhitespace(*p)) break; --p; } w.remove(p - pe); // it will not change the string if (p - pe) is not less than size return; } ++p; } // all characters are spaces w.clear(); }
size_t ReplaceSubstr(Wtroka& str, const TWtringBuf& from, const TWtringBuf& to) { size_t count = 0; size_t pos = str.off(TCharTraits<wchar16>::Find(~str, +str, ~from, +from)); while (pos != Wtroka::npos) { str.replace(pos, +from, ~to, 0, Wtroka::npos, +to); ++count; size_t next = pos + to.size(); pos = str.off(TCharTraits<wchar16>::Find(~str + next, +str - next, ~from, +from)); } return count; }
void CQuotesFinder::CreateTextField(const Stroka& TextFieldName, CSentence* pSent, const SWordHomonymNum& WH, CFactFields& factFields, const Wtroka& Postfix) { CNormalization InterpNorm(pSent->m_Words); const CHomonym& h = pSent->m_Words[WH]; const CFioWordSequence* pFioWS = dynamic_cast<const CFioWordSequence*>(h.GetSourceWordSequence()); if (pFioWS != NULL) { CFioWS newFioWS(*pFioWS); newFioWS.SetMainWord(WH); newFioWS.AddLemma(SWordSequenceLemma (InterpNorm.GetArtificialLemma(WH))); CTextWS newTextWS; newTextWS.SetPair(*pFioWS); newTextWS.SetMainWord(WH); newTextWS.ResetLemmas(newFioWS.GetLemmas(), true); if (!Postfix.empty()) newTextWS.AddLemma(SWordSequenceLemma(Postfix)); factFields.AddValue(TextFieldName, newTextWS); } else { const CFactsWS* pFactWS = dynamic_cast<const CFactsWS*>(h.GetSourceWordSequence()); if (pFactWS) { CTextWS newTextWS; newTextWS.SetPair(*pFactWS); newTextWS.SetMainWord(WH); newTextWS.ResetLemmas(pFactWS->GetLemmas(), true); if (!Postfix.empty()) newTextWS.AddLemma(SWordSequenceLemma(Postfix)); factFields.AddValue(TextFieldName, newTextWS); } else { const CTextWS* pTextWS = dynamic_cast<const CTextWS*>(h.GetSourceWordSequence()); if (pTextWS) { CTextWS newTextWS = *pTextWS; if (!Postfix.empty()) newTextWS.AddLemma(SWordSequenceLemma(Postfix)); factFields.AddValue(TextFieldName, newTextWS); } else { if (h.GetSourceWordSequence() == 0 || h.GetSourceWordSequence()->Size() == 1) { CTextWS newTextWS; newTextWS.SetPair(pSent->m_Words.GetWord(WH).GetSourcePair()); newTextWS.SetMainWord(WH); newTextWS.AddLemma(SWordSequenceLemma(h.GetLemma())); if (!Postfix.empty()) newTextWS.AddLemma(SWordSequenceLemma(Postfix)); factFields.AddValue(TextFieldName, newTextWS); } } } } }
void SArtPointer::PutStrType(const Wtroka& s) { if (s.empty()) return; m_strArt = s; m_KWType = NULL; }
bool CSentence::GetWSLemmaString(Wtroka& sLemmas, const CWordSequence& ws, bool bLem) const { sLemmas = ws.GetCapitalizedLemma(); if (bLem) return !sLemmas.empty(); if (sLemmas.empty()) for (int j = ws.FirstWord(); j <= ws.LastWord(); j++) { if (!sLemmas.empty()) sLemmas += ' '; sLemmas += getWord(j)->GetOriginalText(); } static const Wtroka trim_chars = CharToWide(" \"\'"); TWtringBuf res = sLemmas; while (!res.empty() && trim_chars.find(res[0]) != TWtringBuf::npos) res.Skip(1); while (!res.empty() && trim_chars.find(res.back()) != TWtringBuf::npos) res.Chop(1); if (sLemmas.size() != res.size()) sLemmas = ::ToWtring(res); return true; }
// обычно функция возвращает clause->ToString(), // если в клаузе есть открывающая кавычка, а нет закрывающей, тогда функция добавляет к результату // все слова до закрывающей кавычки Wtroka CQuotesFinder::FindRightQuoteIfHas(const CWordsPair& PeriodToPrint, int SentNo, const CWordsPair& GroupToExclude, yvector<SFactAddress>& FioInQuotes) { FioInQuotes.clear(); CSentenceRusProcessor* pSentPrc = GetSentPrc(SentNo); yset<int> QuoteWords; Wtroka ClauseStr; static const Wtroka COMMA = Wtroka::FromAscii(","); for (int i = PeriodToPrint.FirstWord(); i <= PeriodToPrint.LastWord(); i++) { const CWord& w = pSentPrc->m_Words.GetOriginalWord(i); if (!GroupToExclude.Contains(i) && !(GroupToExclude.Contains(i - 1) && w.GetText() == COMMA) && !(GroupToExclude.Contains(i + 1) && w.GetText() == COMMA)) { ClauseStr += w.GetOriginalText() + ' '; QuoteWords.insert(i); } } size_t index = ClauseStr.find('"'); if (index != Wtroka::npos && ClauseStr.rfind('"') == index) { yset<int> AddQuoteWords; Wtroka Add; // если только одна кавычка, то пойдем искать вторую кавычку int i; for (i = PeriodToPrint.LastWord()+1; i < (int)pSentPrc->getWordsCount(); i++) { const CWord& w = pSentPrc->m_Words.GetOriginalWord(i); if (!GroupToExclude.Contains(i)) { Add += w.GetOriginalText() + ' '; AddQuoteWords.insert(i); } if (w.HasCloseQuote()) break; } if (i != (int)pSentPrc->getWordsCount()) { ClauseStr += ' '; ClauseStr += Add; QuoteWords.insert(AddQuoteWords.begin(), AddQuoteWords.end()); } } AddFios(SentNo, QuoteWords, FioInQuotes); return ClauseStr; };
void Collapse(Wtroka& w) { size_t len = w.size(); for (size_t start = 0; start < len; ++start) { size_t n = 0; for (; start + n < len; ++n) { if (!IsWhitespace(w[start + n])) break; } if (n > 1 || (n == 1 && w[start] != ' ')) { w.replace(start, n, 1, ' '); len = w.size(); } } }
bool CWordBase::IsDoubleQuote(const Wtroka& str) { return str.size() == 1 && ::IsQuotation(str[0]) && str[0] != SINGLE_QUOTE_CHAR; /*return IsChar(str, DOUBLE_QUOTE_CHAR) || IsChar(str, 127) || // wtf? IsChar(str, 0x00BB) || // » IsChar(str, 0x00AB) || // « IsChar(str, 0x201C) || // “ IsChar(str, 0x201D); // ” */ }
bool RequiresSpace(const Wtroka& w1, const Wtroka& w2) { #define SHIFT(i) (ULL(1)<<(i)) if(w1.length() == 1) { if (NUnicode::CharHasType(w1[0], SHIFT(Ps_START) | SHIFT(Ps_SINGLE_QUOTE) | SHIFT(Pi_SINGLE_QUOTE) | SHIFT(Ps_QUOTE) | SHIFT(Pi_QUOTE))) return false; } if(w2.length() == 1) { if (NUnicode::CharHasType(w2[0], SHIFT(Pe_END) | SHIFT(Po_TERMINAL) | SHIFT(Pe_SINGLE_QUOTE) | SHIFT(Pf_SINGLE_QUOTE) | SHIFT(Pe_QUOTE) | SHIFT(Pf_QUOTE))) return false; } #undef SHIFT return true; }
Wtroka CWordBase::GetOriginalText() const { //затычка для дурацких символов, порождаемых для <BR> после конца предложения if (m_typ == Punct && IsChar(m_txt, 'W')) return Wtroka(); size_t quoteCount = (m_bHasOpenQuote ? 1 : 0) + (m_bHasCloseQuote ? 1 : 0); // optimize most frequent case if (quoteCount == 0) return m_txt; Wtroka res; res.reserve(m_txt.size() + quoteCount); if (m_bHasOpenQuote) res += m_bSingleOpenQuote ? SINGLE_QUOTE_CHAR : DOUBLE_QUOTE_CHAR; res += m_txt; if (m_bHasCloseQuote) res += m_bSingleCloseQuote ? SINGLE_QUOTE_CHAR : DOUBLE_QUOTE_CHAR; return res; }
bool CQuotesFinder::AddQuoteFact(const SValenciesValues& VerbCommunic, CSentenceRusProcessor* pSent, const Wtroka& QuoteStr, const yvector<SFactAddress>& FioInQuotes, SLeadInfo LeadInfo) { Wtroka qstr = StripString(QuoteStr); if (qstr.size() < 3) return false; if (!(LeadInfo.m_iLastSent == -1 && LeadInfo.m_iFirstSent == -1) && !(LeadInfo.m_iFirstSent >= 0 && LeadInfo.m_iLastSent >= 0)) ythrow yexception() << "CQuotesFinder::AddQuoteFact : bad lead info"; if (m_bCreateDBFact) { DECLARE_STATIC_RUS_WORD(kSUB, "СУБ"); if (!AddQuoteDBFact(VerbCommunic.GetValue(kSUB), pSent, qstr, LeadInfo, true)) return false; for (size_t i=0; i < FioInQuotes.size(); i++) if (!AddQuoteDBFact(FioInQuotes[i], GetSentPrc(FioInQuotes[i].m_iSentNum), qstr, LeadInfo, false)) return false; } else AddSimpleQuoteFact(VerbCommunic, pSent, qstr, LeadInfo); return true; }
bool CParserOptions::ResolveArticleByTitle(const Wtroka& title, yset<SArtPointer>& resolved) const { typedef NGzt::TArticlePool::TTitleMap TTitleMap; const wchar16 STAR = '*'; size_t maxSize = resolved.size() + 100; bool found = false; TWtringBuf suffix = title; TWtringBuf preffix = suffix.NextTok(STAR); TTitleMap subtrie = Singleton<CDictsHolder>()->GetGztArticleIndex()->FindTails(~preffix, +preffix); for (TTitleMap::TConstIterator it = subtrie.Begin(); it != subtrie.End(); ++it) { if (!it.IsEmpty()) { Wtroka key = it.GetKey(); key.prepend(preffix); if (CDictsHolder::IsTitleMatch(key, title)) { if (title != key) Cerr << "XML parameters: " << NStr::DebugEncode(title) << " -> " << NStr::DebugEncode(key) << Endl; resolved.insert(SArtPointer(key)); found = true; } } if (resolved.size() >= maxSize) { Cerr << "XML parameters: too many gzt-articles correspond to \"" << NStr::DebugEncode(title) << "\". Please use more specific name." << Endl; ythrow yexception() << "Too many gzt-articles resolved."; } } // it also could be a situation article name from aux_dic_kw.cxx if (Singleton<CDictsHolder>()->GetDict(KW_DICT).has_article(title)) { resolved.insert(SArtPointer(title)); found = true; } return found; }
bool CQuotesFinder::CreateQuoteValue(const Wtroka& quoteStr, CFactFields& factFields) const { DECLARE_STATIC_RUS_WORD(kChto, "что "); DECLARE_STATIC_RUS_WORD(kChtoby, "чтобы "); Wtroka qstr = quoteStr; CTextWS QuoteWS; if (qstr.has_prefix(kChto)) qstr.erase(0,4); if (qstr.has_prefix(kChtoby)) qstr.erase(0,6); if (qstr.empty()) return false; qstr = StripString(qstr); if (qstr.size() > 1 && qstr[0] =='"' && qstr.back() =='"') qstr = qstr.substr(1, qstr.size() - 2); qstr = StripString(qstr); QuoteWS.AddLemma(SWordSequenceLemma(qstr)); QuoteWS.SetArtificialPair(CWordsPair(0,0)); factFields.AddValue(CFactFields::QuoteValue, QuoteWS); return true; }
void EscapeHtmlChars(Wtroka& str) { static const Wtroka lt(LT, ARRAY_SIZE(LT)); static const Wtroka gt(GT, ARRAY_SIZE(GT)); static const Wtroka amp(AMP, ARRAY_SIZE(AMP)); static const Wtroka br(BR, ARRAY_SIZE(BR)); static const Wtroka quot(QUOT, ARRAY_SIZE(QUOT)); size_t escapedLen = 0; const Wtroka& cs = str; for (size_t i = 0; i < cs.size(); ++i) escapedLen += EscapedLen<insertBr>(cs[i]); if (escapedLen == cs.size()) return; Wtroka res; res.reserve(escapedLen); size_t start = 0; for (size_t i = 0; i < cs.size(); ++i) { const Wtroka* ent = NULL; switch (cs[i]) { case '<': ent = < break; case '>': ent = > break; case '&': ent = & break; case '\"': ent = " break; default: if (insertBr && (cs[i] == '\r' || cs[i] == '\n')) { ent = &br; break; } else continue; } res.append(cs.begin() + start, cs.begin() + i); res.append(ent->begin(), ent->end()); start = i + 1; } res.append(cs.begin() + start, cs.end()); res.swap(str); }
// ищет закрывающую кавычку в текущем или последующем предложениях, // добавляет все слова до закрывающей кавычки к ResultToAdd // игнорируем внутренние кавычки bool CQuotesFinder::FindCloseQuoteInNextSentences(int StartSentNo, int StartWordForFirstSentence, Wtroka& ResultToAdd, yvector<SFactAddress>& FioInQuotes, SLeadInfo& LeadInfo) { Wtroka Add; yvector<SFactAddress> AddFioInQuotes; int Depth = 1; for (int SentNo=StartSentNo; (SentNo-StartSentNo<=2)&& (SentNo< (int)m_vecSentence.size()); SentNo++) { CSentenceRusProcessor* pSent = GetSentPrc(SentNo); yset<int> QuoteWords; int k=0; if (SentNo == StartSentNo) k = StartWordForFirstSentence; for (; k < (int)pSent->getWordsCount(); k++) { const CWord& w = *pSent->getWordRus(k); Add += w.GetOriginalText() + ' '; QuoteWords.insert(k); bool bHasCloseQuote = false; if (w.HasOpenQuote()) { if (ispunct(w.GetText()[0])) { Depth--; // одиночную кавычку знака препинания считаем закр. кавычкой (кто-то по ошибке поставил пробел) bHasCloseQuote = true;; } else Depth++; } if (w.HasCloseQuote()) { Depth--; if ((Depth == 1) // Закрывающая кавычка в конце предложения закрывает все открытые кавычки. && ((k+1 == (int)pSent->getWordsCount()) || ((k+2 == (int)pSent->getWordsCount()) && pSent->getWordRus(k+1)->IsPunct() ) ) ) { Depth = 0; } bHasCloseQuote = true; } if (w.GetText() == Wtroka::FromAscii("\"")) { if (k+1 == (int)pSent->getWordsCount() || pSent->getWordRus(k+1)->IsPunct()) { bHasCloseQuote = true; Depth--; } } if (bHasCloseQuote && Depth == 0) { if (!Add.empty() && Add[0] == '-') { const wchar16* beg = Add.begin() + 1; StripRangeBegin(beg, Add.end()); Add.assign(beg, Add.end() - beg); } if (ResultToAdd.back() == ',' && Add.size() > 0 && NStr::IsLangAlpha(Add[0], TMorph::GetMainLanguage()) && ::IsUpper(Add[0])) { ResultToAdd.erase(ResultToAdd.size() - 1); ResultToAdd += CharToWide(". "); } ResultToAdd += Add; AddFios(SentNo, QuoteWords, AddFioInQuotes); FioInQuotes.insert(FioInQuotes.end(), AddFioInQuotes.begin(), AddFioInQuotes.end()); LeadInfo.m_iLastSent = SentNo; return true; } } AddFios(SentNo, QuoteWords, AddFioInQuotes); } return false; }
bool CWordBase::IsSingleQuote(const Wtroka& str) { return str.size() == 1 && str[0] == SINGLE_QUOTE_CHAR; }
bool CWordBase::IsDash(const Wtroka& str) { return str.size() == 1 && ::IsDash(str[0]); }
static inline bool IsChar(const Wtroka& s, wchar16 ch) { return s.size() == 1 && s[0] == ch; }
// проверяет, содержит ли ситуация VerbCommunic косвенную речь в качестве актанта-клаузы. Примеры: // "Петров сказал, что мы уходим в Яндекс." // По словам Сергея Крапина , я уехал // Она призналась , что "придумали проект наружной рекламы , но притормозили его , не захотев оказаться" . bool CQuotesFinder::TryIndirectSpeech(const SValenciesValues& VerbCommunic, int SentNo) { CWordsPair PairToSearch = GetWordIndexOfQuoteSituation(VerbCommunic, SentNo); if (PairToSearch.FirstWord() == -1) return false; const_clause_it min_it = GetSentPrc(SentNo)->GetClauseStructureOrBuilt().GetMinimalClauseThatContains(PairToSearch); CSentenceRusProcessor* pSentPrc = GetSentPrc(SentNo); const CClauseVariant& ClauseVariant = pSentPrc->GetClauseStructureOrBuilt(); if (min_it == ClauseVariant.GetEndClause()) return false; // пропускаем непервые клаузы, которые заканчиваются на тире, поскольку, это может быть только прямая речь. if ((**min_it).FirstWord() != 0 && pSentPrc->getWordRus((**min_it).LastWord())->GetText() == Wtroka::FromAscii("-")) return false; // вычисляем союз по названию статьи, например, из "_глагол_со_чтобы" получаем "чтобы" Wtroka strValencySubConj; { DECLARE_STATIC_RUS_WORD(kChto, "что"); DECLARE_STATIC_RUS_WORD(kChtoby, "чтобы"); Wtroka t = VerbCommunic.m_pArt->get_title(); if (t.length() > 4 && t[t.length() - 4] == '_' && t.has_suffix(kChto)) strValencySubConj = kChto; else if (t.length() > 6 && t[t.length() - 6] == '_' && t.has_suffix(kChtoby)) strValencySubConj = kChtoby; } // если справа стоит клауза со "что" if (!strValencySubConj.empty()) { yvector<const CClause*> SubClauses; ClauseVariant.GetIncludedClauses(min_it, SubClauses); std::sort(SubClauses.begin(), SubClauses.end(), LessBySize); { // берем первую справа const_clause_it next_it; ClauseVariant.GetNeighbourClauseR(min_it, next_it); if (next_it != ClauseVariant.GetEndClause()) SubClauses.push_back(*next_it); } // берем вложенную клаузу справа (это лучший вариант) { const CClause* pSubClause = ClauseVariant.GetIncludedClauseR(min_it); if (pSubClause) SubClauses.push_back(pSubClause); } for (int i=SubClauses.size()-1; i >=0; i--) { const CClause* pSubClause = SubClauses[i]; if (pSubClause->FromRight(PairToSearch)) for (int ConjNo=0; ConjNo < pSubClause->GetConjsCount(); ConjNo++) { const CHomonym& Conj = pSubClause->GetConj(ConjNo); const article_t* piConjArt = GlobalDictsHolder->GetAuxArticle(Conj, CONJ_DICT); if (piConjArt->get_title().substr(0, strValencySubConj.length()) == strValencySubConj) { yvector<SFactAddress> FioInQuotes; Wtroka QuoteStr = FindRightQuoteIfHas(*pSubClause, SentNo, CWordsPair(), FioInQuotes); if (AddQuoteFact(VerbCommunic, pSentPrc, QuoteStr, FioInQuotes, SLeadInfo())) return true; } }; } } // если клауза является вводной DECLARE_STATIC_RUS_WORD(kPo_slovam_Ivanova, "по_словам_иванова"); DECLARE_STATIC_RUS_WORD(kKak, "как"); if ((VerbCommunic.m_pArt->get_title() == kPo_slovam_Ivanova && (*min_it)->HasType(Parenth)) || // или использована с союзом "как", например, "как рассказал Петров, мы уже ушли" (pSentPrc->getWordRus((*min_it)->FirstWord())->FindLemma(kKak) && ClauseVariant.GetIncludedClauseL(min_it) == 0)) { // если вводная калуза стоит на первом месте и в предложении нет других клауз, тогда берем // все предложение начиная от конца вводной клаузы до if ((**min_it).FirstWord() == 0) { yvector<SFactAddress> FioInQuotes; Wtroka QuoteStr = FindRightQuoteIfHas(CWordsPair((**min_it).LastWord(), pSentPrc->getWordsCount() - 1), SentNo, **min_it, FioInQuotes); if (AddQuoteFact(VerbCommunic, pSentPrc, QuoteStr, FioInQuotes, SLeadInfo())) return true; }; // берем главную клаузу const CClause* pMainClause = ClauseVariant.GetMainClause(min_it); if (!pMainClause) { const_clause_it next_it; ClauseVariant.GetNeighbourClauseR(min_it, next_it); if (next_it != ClauseVariant.GetEndClause()) pMainClause = *next_it; } if (pMainClause) { // печатаем главную клаузу без вложенной yvector<SFactAddress> FioInQuotes; Wtroka QuoteStr = FindRightQuoteIfHas(*pMainClause, SentNo, **min_it, FioInQuotes); if (AddQuoteFact(VerbCommunic, pSentPrc, QuoteStr,FioInQuotes, SLeadInfo())) return true; } } return false; }