void CDocListRetrieverFromDisc::FillDocInfo(SDocumentAttribtes& attrs) { Stroka strFilePath = m_SmartFileFind.GetFoundFilePath(m_iCurPath); Stroka strURL; if (strFilePath == m_strSearchDir) { TStringBuf left, right; PathHelper::Split(strFilePath, left, right); strURL = ToString(right); } else strURL = strFilePath.substr(m_strSearchDir.size()); if (strURL.empty()) ythrow yexception() << "Can't build url for file \"" << strFilePath << "\" with searchdir \"" << m_strSearchDir << "\"."; TransformBackSlash(strURL); attrs.m_strUrl = strURL; Stroka strTime; if (stroka(m_strLTM) == "file") { CTime lw_time = m_SmartFileFind.GetFoundFileInfo(m_iCurPath).m_LastWriteTime; strTime = lw_time.Format("%d.%m.%Y %H:%M:%S"); } if (strTime.empty()) strTime = m_strStartTime; attrs.m_strTitle = CharToWide(strTime); attrs.m_strSource = strURL; attrs.m_strTitle = CharToWide(attrs.m_strSource); // ??? rewriting }
static inline TAutoPtr<TOutputStream> OpenOutput(const Stroka& url) { if (url.empty()) { return new TBuffered<TFileOutput>(8192, Duplicate(1)); } else { return new TBuffered<TFileOutput>(8192, url); } }
bool CAgencyInfoRetriver::Init(const Stroka& strDoc2AgFile) { if (!strDoc2AgFile.empty() && isexist(strDoc2AgFile.c_str())) m_DocId2AgNameFile = TFile(strDoc2AgFile.c_str(), OpenExisting | RdOnly); return true; }
inline void TFile::AddAliasImpl(const Stroka& name) { if (!name.empty() && !Is(name)) { Aliases_.push_back(name); if (!GeneratedDescriptor) GeneratedDescriptor = NBuiltin::FindInGeneratedPool(name); } }
bool TDependencyCollectorInputStream::TryConsumeImportFileName(const NProtoBuf::io::Tokenizer::Token& token) { ImportProcessor.NextState(token); if (!ImportProcessor.Found()) return false; YASSERT(token.type == NProtoBuf::io::Tokenizer::TYPE_STRING); // original imported file (unquoted) Stroka original; Tokenizer_.ParseString(token.text, &original); // canonic file const Stroka canonic = SourceTree->CanonicName(original); if (canonic.empty() || canonic == original) { // make a hint for this special case if (original == STRINGBUF("base.proto")) ErrorCollector->AddErrorAtCurrentFile(token.line, token.column, "Importing base.proto by shortcut name is not allowed from arcadia. You should use full path instead: import \"kernel/gazetteer/base.proto\";"); return false; // just proceed with original file name } // restore quotes and replace original with canonic in further pipeline. CanonicImportedFile.clear(); CanonicImportedFile.append('"'); EscapeC(canonic, CanonicImportedFile); CanonicImportedFile.append('"'); return true; }
void CLRCollectionSet::SaveStateCollection(Stroka GrammarFileName, const CWorkGrammar* p_WorkGrammar) const { if (GrammarFileName.empty()) return; FILE * fp = fopen(GrammarFileName.c_str(), "wb"); for (int i = 0; i < (int)m_ItemSets.size(); i++) { fprintf(fp, "I%i =\n", i); for (yset<CLRItem>::const_iterator it = m_ItemSets[i].begin(); it != m_ItemSets[i].end(); it++) { fprintf(fp, "\t%s -> ", p_WorkGrammar->m_UniqueGrammarItems[it->m_pRule->m_LeftPart].m_ItemStrId.c_str()); for (int j = 0; j < (int)it->m_pRule->m_RightPart.m_Items.size(); j++) { if (j == (int)it->m_DotSymbolNo) fprintf(fp, "; "); fprintf(fp, "%s ", p_WorkGrammar->m_UniqueGrammarItems[it->m_pRule->m_RightPart.m_Items[j]].m_ItemStrId.c_str()); } if (it->m_DotSymbolNo == it->m_pRule->m_RightPart.m_Items.size()) fprintf(fp, "; "); fprintf(fp, "\n"); } fprintf(fp, "\n"); } fprintf(fp, "\n"); for (ymap< CStateAndSymbol, size_t>::const_iterator it_goto = m_GotoFunction.begin(); it_goto != m_GotoFunction.end(); it_goto++) fprintf(fp, "GOTO( I%" PRISZT ", %s ) = I%" PRISZT "\n", it_goto->first.m_StateNo, p_WorkGrammar->m_UniqueGrammarItems[it_goto->first.m_SymbolNo].m_ItemStrId.c_str(), it_goto->second); fclose(fp); }
static inline Stroka Fix(Stroka f) { if (!f.empty() && IsDelim(f[+f - 1])) { f.pop_back(); } return f; }
Stroka StripFileComponent(const Stroka& fileName) { Stroka dir = IsDir(fileName) ? fileName : GetDirName(fileName); if (!dir.empty() && dir.back() != GetDirectorySeparator()) { dir.append(GetDirectorySeparator()); } return dir; }
Stroka BitsetToString(const yvector<TGramBitSet>& bitset, const Stroka& delim /*= ", "*/, const Stroka& groupdelim /*= ", "*/) { Stroka s; for (size_t i = 0; i < bitset.size(); ++i) { if (!s.empty()) s += groupdelim; s += bitset[i].ToString(~delim); } return s; }
// Like Stroka::Quote(), but does not quote digits-only string static Stroka QuoteForHelp(const Stroka& str) { if (str.empty()) return str.Quote(); for (size_t i = 0; i < str.size(); ++i) { if (!isdigit(str[i])) return str.Quote(); } return str; }
void CParserOptions::ParseListOfFacts(TXmlNodePtrBase piNode) { TXmlNodePtrBase piFacts = piNode->children; m_ParserOutputOptions.m_bShowFactsWithEmptyField = piNode.HasAttr("addEmpty"); for (; piFacts.Get() != NULL; piFacts = piFacts->next) { if (!piFacts.HasName("fact")) continue; Stroka str = piFacts.GetAttr("name"); if (!str.empty()) AddFactToShow(str, !piFacts.HasAttr("noequality")); } }
TGramBitSet StringToBitset(const Stroka& gram, const Stroka& delim /*= ",; "*/) { if (gram.empty()) return TGramBitSet(); TGramBitSet res; VectorStrok tokens; SplitStrokuBySet(&tokens, ~gram, ~delim); for (size_t i = 0; i < tokens.size(); ++i) { TGrammar code = TGrammarIndex::GetCode(tokens[i]); if (code != gInvalid) res.Set(code); } return res; }
void WriteToLogFile(const Stroka& sGrammarFileLog, Stroka& str, bool bRW) { if (sGrammarFileLog.empty()) return; str += '\n'; THolder<TFile> f; if (bRW) f.Reset(new TFile(sGrammarFileLog, CreateAlways | WrOnly | Seq)); else f.Reset(new TFile(sGrammarFileLog, OpenAlways | WrOnly | Seq | ForAppend)); TFileOutput out(*f); out.Write(str.c_str()); };
bool TProtoParser::BuildMessageInheritanceGraph(const TDescriptor* msg_descr, const TSimpleHierarchy& graph) { for (int i = 0; i < msg_descr->nested_type_count(); ++i) if (!BuildMessageInheritanceGraph(msg_descr->nested_type(i), graph)) return false; // Find a base descriptor. There are two possible cases: // 1) @msg_descr was taken from generated_pool (builtin) and it's base type name is stored in its GztProtoDerivedFrom option. // 2) @msg_descr was read from disk .proto (using TDependencyCollector) and its base is remembered in the inheritance graph. const Stroka* explicit_base_name = graph.GetBase(msg_descr->full_name()); const Stroka implicit_base_name = msg_descr->options().GetExtension(GztProtoDerivedFrom); if (!explicit_base_name && !implicit_base_name) // no base class return true; const NProtoBuf::Descriptor* explicit_base_descr = NULL; if (explicit_base_name) { explicit_base_descr = ResolveBaseMessageByName(*explicit_base_name, msg_descr); if (!explicit_base_descr) return false; } const NProtoBuf::Descriptor* implicit_base_descr = NULL; if (!implicit_base_name.empty()) { implicit_base_descr = ResolveBaseMessageByName(implicit_base_name, msg_descr); if (!implicit_base_descr) return false; } if (explicit_base_descr && implicit_base_descr && explicit_base_descr != implicit_base_descr) { Errors->AddError(msg_descr->file()->name(), -1, 0, NProtoBuf::strings::Substitute( "Ambiguous base type for $0: either $1 (disk) or $2 (builtin).", msg_descr->full_name(), explicit_base_descr->full_name(), implicit_base_descr->full_name())); return false; } RawMessageGraph[msg_descr] = explicit_base_descr ? explicit_base_descr : implicit_base_descr; // check if cycle dependency exists. if (!CheckIfSelfDerived(msg_descr)) return false; return true; }
inline bool ReadTo(Stroka& st, char to) { Stroka res; Stroka s_tmp; bool ret = false; while (true) { if (MemInput_.Exhausted()) { const size_t readed = Slave_->Read(Buf(), BufLen()); if (!readed) { break; } MemInput_.Reset(Buf(), readed); } const size_t a_len(MemInput_.Avail()); MemInput_.ReadTo(s_tmp, to); const size_t s_len = s_tmp.length(); /* * mega-optimization */ if (res.empty()) { res.swap(s_tmp); } else { res += s_tmp; } ret = true; if (s_len != a_len) { break; } } st.swap(res); return ret; }
void DecodeUserInput(const TStringBuf& text, Wtroka& res, ECharset encoding, const Stroka& filename, size_t linenum) { const size_t MAX_MSG_TEXT_LEN = 250; try { CharToWide(text, res, encoding); } catch (...) { Cerr << "Cannot decode supplied text, invalid encoding (expected " << NameByCharset(encoding) << "):\n\n"; if (text.size() <= MAX_MSG_TEXT_LEN) Cerr << text; else Cerr << text.SubStr(0, MAX_MSG_TEXT_LEN) << "..."; Cerr << "\n"; if (!filename.empty()) { Cerr << "\n(File " << filename; if (linenum) Cerr << ", line " << linenum; Cerr << ")"; } Cerr << Endl; throw; } }
void CParserOptions::ParseListOfMW(TXmlNodePtrBase piNode, yset<TUnresolvedArtPointer>& artPointers) { TXmlNodePtrBase piMW = piNode->children; for (; piMW.Get() != NULL; piMW = piMW->next) { if (!piMW.HasName("mw")) continue; Stroka str = piMW.GetAttr("kw"); TUnresolvedArtPointer artPointer; if (!str.empty()) artPointer.KWTypeTitle = str; else artPointer.ArticleTitle = piMW.GetWAttr("art"); artPointer.ForLink = piMW.HasAttr("link"); artPointer.ForLinkFindOnly = piMW.HasAttr("link_find_only"); artPointer.ForMainPage = piMW.HasAttr("main_page"); artPointer.ForUrl = piMW.HasAttr("url"); artPointers.insert(artPointer); } }
void CProcessor::InitInterviewFile(Stroka strNameFile) { if (strNameFile.empty()) return; if (!PathHelper::Exists(strNameFile)) ythrow yexception() << "Can't open name dic."; TBufferedFileInput file(strNameFile); Stroka str; while (file.ReadLine(str)) { TStringBuf line(str); line = StripString(line); if (line.empty()) continue; TStringBuf url, fio; if (!line.SplitImpl('\t', url, fio)) continue; url = StripString(url); fio = StripString(fio); InterviewUrl2Fio[ToString(url)] = CharToWide(ToString(fio), m_Parm.GetInputEncoding()); } }
void CAnalyticFormBuilder::ChangeGrammemsAsAnalyticForm(CHomonym& H, const CHomonym& VerbHomonym) { THomonymGrammems old_grammems; H.Grammems.Swap(old_grammems); for (THomonymGrammems::TFormIter old = old_grammems.IterForms(); old.Ok(); ++old) for (THomonymGrammems::TFormIter verb = VerbHomonym.Grammems.IterForms(); verb.Ok(); ++verb) { Stroka strPos; // auxiliary verb grammems const TGramBitSet& VerbGrammems = *verb; // meaningful part grammems TGramBitSet MainGrammems = *old; // final grammems to set TGramBitSet ResultedGrammems; if (MainGrammems.Has(gInfinitive)) { ResultedGrammems = MainGrammems & ~TMorph::AllPOS(); if (VerbGrammems.Has(gImperative)) { // analytical form for imperatives in singular number does not exist if (VerbGrammems.Has(gSingular)) continue; ResultedGrammems.Set(gImperative); // "будем же жить!" } else ResultedGrammems |= VerbGrammems & NSpike::AllTimes; // "я стал пилить" или "стану писать" ResultedGrammems |= VerbGrammems & NSpike::AllPersons; ResultedGrammems |= VerbGrammems & NSpike::AllNumbers; ResultedGrammems |= VerbGrammems & NSpike::AllGenders; //copy all POS grammems from verb to main ResultedGrammems |= VerbGrammems & TMorph::AllPOS(); H.PutAuxArticle(SDictIndex(TEMPLATE_DICT, VerbHomonym.GetAuxArticleIndex(TEMPLATE_DICT))); strPos = "Г"; } else if (TMorph::IsShortParticiple(MainGrammems)) { // "*будем же взяты!" if (VerbGrammems.Has(gImperative)) continue; ResultedGrammems = MainGrammems & ~TMorph::AllPOS(); // remove any time grammems from participle ResultedGrammems &= ~NSpike::AllTimes; ResultedGrammems |= VerbGrammems & NSpike::AllPersons; ResultedGrammems |= VerbGrammems & NSpike::AllTimes; if (VerbGrammems.Has(gImperative)) // ??? the same check second time, always false? ResultedGrammems.Set(gImperative); strPos = "ПРИЧАСТИЕ"; ResultedGrammems |= TGramBitSet(gParticiple, gShort); } else if (TMorph::IsShortAdjective(MainGrammems)) { if (VerbGrammems.Has(gImperative)) continue; // будем cчитать, что "будем же красивы!" - это плохо! // на самом деле, просто не хочется вводить ее кучу кодов. ResultedGrammems = VerbGrammems; ResultedGrammems |= MainGrammems & (NSpike::AllNumbers | NSpike::AllGenders | TGramBitSet(gAnimated, gInanimated)); ResultedGrammems &= ~TMorph::AllPOS(); if (ResultedGrammems.Has(gActive)) ResultedGrammems &= ~TGramBitSet(gActive); ResultedGrammems |= TGramBitSet(gAdjective, gShort); strPos = "П"; } else if (MainGrammems.Has(gPraedic)) // "мне было больно" { ResultedGrammems = VerbGrammems; ResultedGrammems |= NSpike::AllCases & MainGrammems; //copied from PronounPredk code (commented below) - preserve cases if any ResultedGrammems &= ~TMorph::AllPOS(); if (ResultedGrammems.Has(gActive)) ResultedGrammems.Reset(gActive); strPos = "ПРЕДК"; ResultedGrammems |= MainGrammems & TMorph::AllPOS(); } else if (MainGrammems.Has(gComparative)) // он был больше тебя { ResultedGrammems = (VerbGrammems & ~TMorph::AllPOS()) | TGramBitSet(gComparative); if (ResultedGrammems.Has(gActive)) ResultedGrammems.Reset(gActive); strPos = "П"; ResultedGrammems |= MainGrammems & TMorph::AllPOS(); } else if (TMorph::IsFullAdjective(MainGrammems)) // resolve disambiguity of homonyms, because analytical forms with full adjectives do not exist. continue; // "стал писать" "стану писать" "стать писать" - совершенный вид if (VerbHomonym.Lemma == kStat) ResultedGrammems.Reset(gImperfect).Set(gPerfect); // if the auxiliary verb was an infinitive then it is all an infinitive // "быть лучше" или "должно быть принесено" if (VerbHomonym.HasGrammem(gInfinitive)) { ResultedGrammems &= ~TMorph::AllPOS(); ResultedGrammems.Set(gInfinitive); strPos = "ИНФИНИТИВ"; } else if (VerbHomonym.HasGrammem(gGerund)) // "будучи лишней" { ResultedGrammems &= ~TMorph::AllPOS(); ResultedGrammems.Set(gGerund); strPos = "ДЕЕПРИЧАСТИЕ"; } if (strPos.empty()) continue; /* do some corrections (code taken from RusGramTab.ProcessPOSAndGrammems) */ if (ResultedGrammems.HasAll(NSpike::AllCases | TGramBitSet(gAdjPronoun))) ResultedGrammems |= NSpike::AllGenders | NSpike::AllNumbers; if (ResultedGrammems.Has(gMasFem)) ResultedGrammems |= TGramBitSet(gMasculine, gFeminine); if (!ResultedGrammems.Has(gPraedic) && ResultedGrammems.HasAll(NSpike::AllCases) && !ResultedGrammems.Has(gSingular)) ResultedGrammems |= TGramBitSet(gSingular, gPlural); H.Grammems.AddForm(ResultedGrammems); } }
bool CCommonParm::ParseConfig(const Stroka& fn) { Config.Reset(NProtoConf::LoadFromFile<TTextMinerConfig>(fn)); if (!Config) ythrow yexception() << "Cannot read the config from \"" << fn << "\"."; if (m_args.has(OPT_BINARY_DIR)) { if (Config->has_binarydir() && Config->binarydir().length() > 0) ythrow yexception() << "Both \"--" << OPT_BINARY_DIR << "\" command line argument and \"BinaryDir\" config parameter specified"; Config->set_binarydir(m_args[OPT_BINARY_DIR]); } if (Config->has_input()) { TTextMinerConfig_TInputParams inputParams = Config->input(); if (inputParams.has_file() && !inputParams.file().empty() && inputParams.has_dir() && !inputParams.dir().empty()) ythrow yexception() << "Input\\File and Input\\Dir options are meaningless together"; Stroka fn = inputParams.file(); fn.to_lower(); if (fn.empty() || "stdin" == fn || "-" == fn) m_strInputFileName = ""; else m_strInputFileName = inputParams.file(); if (inputParams.has_dir()) { if (inputParams.has_type()) ythrow yexception() << "Input\\Type field is meaningless for directory processing"; m_strSourceType = "dir"; m_strInputFileName = inputParams.dir(); m_strDocDir = inputParams.dir(); if (!PathHelper::IsDir(m_strDocDir)) ythrow yexception() << "\"" << m_strDocDir << "\" isn't a directory"; } else { if (inputParams.has_type()) { switch (inputParams.type()) { case TTextMinerConfig::TInputParams::no: m_strSourceType = "no"; break; case TTextMinerConfig::TInputParams::dpl: m_strSourceType = "dpl"; break; case TTextMinerConfig::TInputParams::arcview: m_strSourceType = "arcview"; break; case TTextMinerConfig::TInputParams::mapreduce: m_strSourceType = "mapreduce"; break; case TTextMinerConfig::TInputParams::tar: if (m_strInputFileName.empty()) ythrow yexception() << "Please specify Input\\File field in configuration file in order to use .tar archive."; m_strSourceType = "tar"; break; case TTextMinerConfig::TInputParams::som: if (m_strInputFileName.empty()) ythrow yexception() << "Please specify Input\\File field in configuration file in order to read SOM data."; m_strSourceType = "som"; break; case TTextMinerConfig::TInputParams::yarchive: if (m_strInputFileName.empty()) ythrow yexception() << "Please specify Input\\File field in configuration file in order to read Yandex archive."; m_strSourceType = "yarchive"; break; default: ythrow yexception() << "This type of input isn't supported"; } } else m_strSourceType = "no"; } } if (NULL == ParserOptions.Get()) ParserOptions.Reset(new CParserOptions); ParserOptions->InitFromConfigObject(*Config.Get()); return true; }