コード例 #1
0
void CDocListRetrieverFromDisc::FillDocInfo(SDocumentAttribtes& attrs)
{
    Stroka strFilePath = m_SmartFileFind.GetFoundFilePath(m_iCurPath);

    Stroka strURL;
    if (strFilePath == m_strSearchDir) {
        TStringBuf left, right;
        PathHelper::Split(strFilePath, left, right);
        strURL = ToString(right);
    } else
        strURL = strFilePath.substr(m_strSearchDir.size());

    if (strURL.empty())
        ythrow yexception() << "Can't build url for file \"" << strFilePath
                              << "\" with searchdir \"" << m_strSearchDir << "\".";

    TransformBackSlash(strURL);

    attrs.m_strUrl = strURL;

    Stroka strTime;
    if (stroka(m_strLTM) == "file") {
        CTime lw_time = m_SmartFileFind.GetFoundFileInfo(m_iCurPath).m_LastWriteTime;
        strTime = lw_time.Format("%d.%m.%Y %H:%M:%S");
    }

    if (strTime.empty())
        strTime = m_strStartTime;

    attrs.m_strTitle = CharToWide(strTime);
    attrs.m_strSource = strURL;
    attrs.m_strTitle = CharToWide(attrs.m_strSource);     // ??? rewriting
}
コード例 #2
0
ファイル: main.cpp プロジェクト: Frankie-666/tomita-parser
static inline TAutoPtr<TOutputStream> OpenOutput(const Stroka& url) {
    if (url.empty()) {
        return new TBuffered<TFileOutput>(8192, Duplicate(1));
    } else {
        return new TBuffered<TFileOutput>(8192, url);
    }
}
コード例 #3
0
bool CAgencyInfoRetriver::Init(const Stroka& strDoc2AgFile)
{
    if (!strDoc2AgFile.empty() && isexist(strDoc2AgFile.c_str()))
        m_DocId2AgNameFile = TFile(strDoc2AgFile.c_str(), OpenExisting | RdOnly);

    return true;
}
コード例 #4
0
ファイル: builtin.cpp プロジェクト: Frankie-666/tomita-parser
inline void TFile::AddAliasImpl(const Stroka& name) {
    if (!name.empty() && !Is(name)) {
        Aliases_.push_back(name);
        if (!GeneratedDescriptor)
            GeneratedDescriptor = NBuiltin::FindInGeneratedPool(name);
    }
}
コード例 #5
0
bool TDependencyCollectorInputStream::TryConsumeImportFileName(const NProtoBuf::io::Tokenizer::Token& token)
{
    ImportProcessor.NextState(token);
    if (!ImportProcessor.Found())
        return false;

    YASSERT(token.type == NProtoBuf::io::Tokenizer::TYPE_STRING);

    // original imported file (unquoted)
    Stroka original;
    Tokenizer_.ParseString(token.text, &original);

    // canonic file
    const Stroka canonic = SourceTree->CanonicName(original);
    if (canonic.empty() || canonic == original) {
        // make a hint for this special case
        if (original == STRINGBUF("base.proto"))
            ErrorCollector->AddErrorAtCurrentFile(token.line, token.column,
                "Importing base.proto by shortcut name is not allowed from arcadia. You should use full path instead: import \"kernel/gazetteer/base.proto\";");
        return false;   // just proceed with original file name
    }

    // restore quotes and replace original with canonic in further pipeline.
    CanonicImportedFile.clear();
    CanonicImportedFile.append('"');
    EscapeC(canonic, CanonicImportedFile);
    CanonicImportedFile.append('"');
    return true;
}
コード例 #6
0
void CLRCollectionSet::SaveStateCollection(Stroka GrammarFileName, const CWorkGrammar* p_WorkGrammar) const
{
    if (GrammarFileName.empty()) return;
    FILE * fp = fopen(GrammarFileName.c_str(), "wb");

    for (int i = 0; i < (int)m_ItemSets.size(); i++) {
        fprintf(fp, "I%i =\n", i);

        for (yset<CLRItem>::const_iterator it = m_ItemSets[i].begin(); it != m_ItemSets[i].end(); it++) {
            fprintf(fp, "\t%s -> ", p_WorkGrammar->m_UniqueGrammarItems[it->m_pRule->m_LeftPart].m_ItemStrId.c_str());

            for (int j = 0; j < (int)it->m_pRule->m_RightPart.m_Items.size(); j++) {
                if (j == (int)it->m_DotSymbolNo)
                    fprintf(fp, "; ");
                fprintf(fp, "%s ", p_WorkGrammar->m_UniqueGrammarItems[it->m_pRule->m_RightPart.m_Items[j]].m_ItemStrId.c_str());
            }

            if (it->m_DotSymbolNo == it->m_pRule->m_RightPart.m_Items.size())
                fprintf(fp, "; ");
            fprintf(fp, "\n");
        }
        fprintf(fp, "\n");
    }

    fprintf(fp, "\n");
    for (ymap< CStateAndSymbol, size_t>::const_iterator it_goto = m_GotoFunction.begin(); it_goto != m_GotoFunction.end(); it_goto++)
        fprintf(fp, "GOTO( I%" PRISZT ", %s ) = I%" PRISZT "\n", it_goto->first.m_StateNo, p_WorkGrammar->m_UniqueGrammarItems[it_goto->first.m_SymbolNo].m_ItemStrId.c_str(), it_goto->second);

    fclose(fp);
}
コード例 #7
0
ファイル: main.cpp プロジェクト: Frankie-666/tomita-parser
static inline Stroka Fix(Stroka f) {
    if (!f.empty() && IsDelim(f[+f - 1])) {
        f.pop_back();
    }

    return f;
}
コード例 #8
0
ファイル: dirut.cpp プロジェクト: Frankie-666/tomita-parser
Stroka StripFileComponent(const Stroka& fileName)
{
    Stroka dir = IsDir(fileName) ? fileName : GetDirName(fileName);
    if (!dir.empty() && dir.back() != GetDirectorySeparator()) {
        dir.append(GetDirectorySeparator());
    }
    return dir;
}
コード例 #9
0
Stroka BitsetToString(const yvector<TGramBitSet>& bitset, const Stroka& delim /*= ", "*/, const Stroka& groupdelim /*= ", "*/) {
    Stroka s;
    for (size_t i = 0; i < bitset.size(); ++i) {
        if (!s.empty())
            s += groupdelim;
        s += bitset[i].ToString(~delim);
    }
    return s;
}
コード例 #10
0
// Like Stroka::Quote(), but does not quote digits-only string
static Stroka QuoteForHelp(const Stroka& str) {
    if (str.empty())
        return str.Quote();
    for (size_t i = 0; i < str.size(); ++i) {
        if (!isdigit(str[i]))
            return str.Quote();
    }
    return str;
}
コード例 #11
0
void CParserOptions::ParseListOfFacts(TXmlNodePtrBase piNode) {
    TXmlNodePtrBase piFacts = piNode->children;
    m_ParserOutputOptions.m_bShowFactsWithEmptyField = piNode.HasAttr("addEmpty");
    for (; piFacts.Get() != NULL; piFacts = piFacts->next) {
        if (!piFacts.HasName("fact"))
            continue;

        Stroka str = piFacts.GetAttr("name");
        if (!str.empty())
            AddFactToShow(str, !piFacts.HasAttr("noequality"));
    }
}
コード例 #12
0
TGramBitSet StringToBitset(const Stroka& gram, const Stroka& delim /*= ",; "*/) {
    if (gram.empty())
        return TGramBitSet();
    TGramBitSet res;
    VectorStrok tokens;
    SplitStrokuBySet(&tokens, ~gram, ~delim);
    for (size_t i = 0; i < tokens.size(); ++i) {
        TGrammar code = TGrammarIndex::GetCode(tokens[i]);
        if (code != gInvalid)
            res.Set(code);
    }
    return res;
}
コード例 #13
0
ファイル: utilit.cpp プロジェクト: Frankie-666/tomita-parser
void WriteToLogFile(const Stroka& sGrammarFileLog, Stroka& str, bool bRW)
{
    if (sGrammarFileLog.empty())
        return;

    str += '\n';

    THolder<TFile> f;
    if (bRW)
        f.Reset(new TFile(sGrammarFileLog, CreateAlways | WrOnly | Seq));
    else
        f.Reset(new TFile(sGrammarFileLog, OpenAlways | WrOnly | Seq | ForAppend));

    TFileOutput out(*f);
    out.Write(str.c_str());
};
コード例 #14
0
bool TProtoParser::BuildMessageInheritanceGraph(const TDescriptor* msg_descr, const TSimpleHierarchy& graph)
{
    for (int i = 0; i < msg_descr->nested_type_count(); ++i)
        if (!BuildMessageInheritanceGraph(msg_descr->nested_type(i), graph))
            return false;

    // Find a base descriptor. There are two possible cases:
    // 1) @msg_descr was taken from generated_pool (builtin) and it's base type name is stored in its GztProtoDerivedFrom option.
    // 2) @msg_descr was read from disk .proto (using TDependencyCollector) and its base is remembered in the inheritance graph.

    const Stroka* explicit_base_name = graph.GetBase(msg_descr->full_name());
    const Stroka implicit_base_name = msg_descr->options().GetExtension(GztProtoDerivedFrom);
    if (!explicit_base_name && !implicit_base_name)      // no base class
        return true;

    const NProtoBuf::Descriptor* explicit_base_descr = NULL;
    if (explicit_base_name) {
         explicit_base_descr = ResolveBaseMessageByName(*explicit_base_name, msg_descr);
         if (!explicit_base_descr)
            return false;
    }

    const NProtoBuf::Descriptor* implicit_base_descr = NULL;
    if (!implicit_base_name.empty()) {
         implicit_base_descr = ResolveBaseMessageByName(implicit_base_name, msg_descr);
         if (!implicit_base_descr)
            return false;
    }

    if (explicit_base_descr && implicit_base_descr && explicit_base_descr != implicit_base_descr) {
        Errors->AddError(msg_descr->file()->name(), -1, 0, NProtoBuf::strings::Substitute(
            "Ambiguous base type for $0: either $1 (disk) or $2 (builtin).", msg_descr->full_name(),
             explicit_base_descr->full_name(), implicit_base_descr->full_name()));
        return false;
    }

    RawMessageGraph[msg_descr] = explicit_base_descr ? explicit_base_descr : implicit_base_descr;
    // check if cycle dependency exists.
    if (!CheckIfSelfDerived(msg_descr))
        return false;

    return true;
}
コード例 #15
0
ファイル: buffered.cpp プロジェクト: noscripter/tomita-parser
    inline bool ReadTo(Stroka& st, char to) {
        Stroka res;
        Stroka s_tmp;

        bool ret = false;

        while (true) {
            if (MemInput_.Exhausted()) {
                const size_t readed = Slave_->Read(Buf(), BufLen());

                if (!readed) {
                    break;
                }

                MemInput_.Reset(Buf(), readed);
            }

            const size_t a_len(MemInput_.Avail());
            MemInput_.ReadTo(s_tmp, to);
            const size_t s_len = s_tmp.length();

            /*
             * mega-optimization
             */
            if (res.empty()) {
                res.swap(s_tmp);
            } else {
                res += s_tmp;
            }

            ret = true;

            if (s_len != a_len) {
                break;
            }
        }

        st.swap(res);

        return ret;
    }
コード例 #16
0
ファイル: utilit.cpp プロジェクト: Frankie-666/tomita-parser
void DecodeUserInput(const TStringBuf& text, Wtroka& res, ECharset encoding, const Stroka& filename, size_t linenum)
{
    const size_t MAX_MSG_TEXT_LEN = 250;
    try {
        CharToWide(text, res, encoding);
    } catch (...) {
        Cerr << "Cannot decode supplied text, invalid encoding (expected " << NameByCharset(encoding) << "):\n\n";
        if (text.size() <= MAX_MSG_TEXT_LEN)
            Cerr << text;
        else
            Cerr << text.SubStr(0, MAX_MSG_TEXT_LEN) << "...";
        Cerr << "\n";
        if (!filename.empty()) {
            Cerr << "\n(File " << filename;
            if (linenum)
                Cerr << ", line " << linenum;
            Cerr << ")";
        }
        Cerr << Endl;
        throw;
    }
}
コード例 #17
0
void CParserOptions::ParseListOfMW(TXmlNodePtrBase piNode, yset<TUnresolvedArtPointer>& artPointers) {
    TXmlNodePtrBase piMW = piNode->children;

    for (; piMW.Get() != NULL; piMW = piMW->next) {
        if (!piMW.HasName("mw"))
            continue;
        Stroka str = piMW.GetAttr("kw");

        TUnresolvedArtPointer artPointer;
        if (!str.empty())
            artPointer.KWTypeTitle = str;
        else
            artPointer.ArticleTitle = piMW.GetWAttr("art");

        artPointer.ForLink = piMW.HasAttr("link");
        artPointer.ForLinkFindOnly = piMW.HasAttr("link_find_only");
        artPointer.ForMainPage = piMW.HasAttr("main_page");
        artPointer.ForUrl = piMW.HasAttr("url");

        artPointers.insert(artPointer);

    }
}
コード例 #18
0
ファイル: processor.cpp プロジェクト: leotop/tomita-parser
void CProcessor::InitInterviewFile(Stroka strNameFile)
{
    if (strNameFile.empty())
        return;
    if (!PathHelper::Exists(strNameFile))
        ythrow yexception() << "Can't open name dic.";
    TBufferedFileInput file(strNameFile);

    Stroka str;
    while (file.ReadLine(str)) {
        TStringBuf line(str);
        line = StripString(line);
        if (line.empty())
            continue;

        TStringBuf url, fio;
        if (!line.SplitImpl('\t', url, fio))
            continue;

        url = StripString(url);
        fio = StripString(fio);
        InterviewUrl2Fio[ToString(url)] = CharToWide(ToString(fio), m_Parm.GetInputEncoding());
    }
}
コード例 #19
0
void CAnalyticFormBuilder::ChangeGrammemsAsAnalyticForm(CHomonym& H, const CHomonym& VerbHomonym)
{
    THomonymGrammems old_grammems;
    H.Grammems.Swap(old_grammems);

    for (THomonymGrammems::TFormIter old = old_grammems.IterForms(); old.Ok(); ++old)
        for (THomonymGrammems::TFormIter verb = VerbHomonym.Grammems.IterForms(); verb.Ok(); ++verb) {
            Stroka strPos;

            // auxiliary verb grammems
            const TGramBitSet& VerbGrammems = *verb;
            // meaningful part grammems
            TGramBitSet MainGrammems = *old;

            // final grammems to set
            TGramBitSet ResultedGrammems;

            if (MainGrammems.Has(gInfinitive)) {
                ResultedGrammems = MainGrammems & ~TMorph::AllPOS();
                if (VerbGrammems.Has(gImperative)) {
                    // analytical form for imperatives in singular number does not exist
                    if (VerbGrammems.Has(gSingular))
                        continue;
                    ResultedGrammems.Set(gImperative); // "будем же жить!"
                } else
                    ResultedGrammems |= VerbGrammems & NSpike::AllTimes; // "я стал пилить" или "стану писать"

                ResultedGrammems |= VerbGrammems & NSpike::AllPersons;
                ResultedGrammems |= VerbGrammems & NSpike::AllNumbers;
                ResultedGrammems |= VerbGrammems & NSpike::AllGenders;

                //copy all POS grammems from verb to main
                ResultedGrammems |= VerbGrammems & TMorph::AllPOS();

                H.PutAuxArticle(SDictIndex(TEMPLATE_DICT, VerbHomonym.GetAuxArticleIndex(TEMPLATE_DICT)));
                strPos = "Г";
            } else if (TMorph::IsShortParticiple(MainGrammems)) {
                // "*будем же взяты!"
                if (VerbGrammems.Has(gImperative))
                    continue;

                ResultedGrammems = MainGrammems & ~TMorph::AllPOS();
                // remove any time grammems from participle
                ResultedGrammems &= ~NSpike::AllTimes;

                ResultedGrammems |= VerbGrammems & NSpike::AllPersons;
                ResultedGrammems |= VerbGrammems & NSpike::AllTimes;

                if (VerbGrammems.Has(gImperative))  // ??? the same check second time, always false?
                    ResultedGrammems.Set(gImperative);

                strPos = "ПРИЧАСТИЕ";
                ResultedGrammems |= TGramBitSet(gParticiple, gShort);
            } else if (TMorph::IsShortAdjective(MainGrammems)) {
                if (VerbGrammems.Has(gImperative))
                    continue; // будем cчитать, что "будем же красивы!" - это плохо!
                              // на самом деле, просто не хочется вводить  ее кучу кодов.

                ResultedGrammems =  VerbGrammems;
                ResultedGrammems |= MainGrammems & (NSpike::AllNumbers | NSpike::AllGenders | TGramBitSet(gAnimated, gInanimated));
                ResultedGrammems &= ~TMorph::AllPOS();

                if (ResultedGrammems.Has(gActive))
                    ResultedGrammems &= ~TGramBitSet(gActive);

                ResultedGrammems |= TGramBitSet(gAdjective, gShort);
                strPos = "П";
            } else if (MainGrammems.Has(gPraedic))     // "мне было больно"
            {
                ResultedGrammems = VerbGrammems;
                ResultedGrammems |= NSpike::AllCases & MainGrammems;    //copied from PronounPredk code (commented below) - preserve cases if any
                ResultedGrammems &= ~TMorph::AllPOS();

                if (ResultedGrammems.Has(gActive))
                    ResultedGrammems.Reset(gActive);

                strPos = "ПРЕДК";
                ResultedGrammems |= MainGrammems & TMorph::AllPOS();
            } else if (MainGrammems.Has(gComparative))       // он был больше тебя
            {
                ResultedGrammems = (VerbGrammems & ~TMorph::AllPOS()) | TGramBitSet(gComparative);
                if (ResultedGrammems.Has(gActive))
                    ResultedGrammems.Reset(gActive);

                strPos = "П";
                ResultedGrammems |= MainGrammems & TMorph::AllPOS();
            } else if (TMorph::IsFullAdjective(MainGrammems))
                // resolve disambiguity of homonyms, because analytical forms with full adjectives do not exist.
                continue;

            // "стал писать" "стану писать" "стать писать" - совершенный вид
            if (VerbHomonym.Lemma == kStat)
                ResultedGrammems.Reset(gImperfect).Set(gPerfect);

            // if the auxiliary verb was an infinitive then it is all an infinitive
            //  "быть  лучше" или "должно быть принесено"
            if (VerbHomonym.HasGrammem(gInfinitive)) {
                ResultedGrammems &= ~TMorph::AllPOS();
                ResultedGrammems.Set(gInfinitive);
                strPos = "ИНФИНИТИВ";
            } else if (VerbHomonym.HasGrammem(gGerund))     //  "будучи лишней"
            {
                ResultedGrammems &= ~TMorph::AllPOS();
                ResultedGrammems.Set(gGerund);

                strPos = "ДЕЕПРИЧАСТИЕ";
            }

            if (strPos.empty())
                continue;

            /* do some corrections (code taken from RusGramTab.ProcessPOSAndGrammems) */
            if (ResultedGrammems.HasAll(NSpike::AllCases | TGramBitSet(gAdjPronoun)))
                ResultedGrammems |= NSpike::AllGenders | NSpike::AllNumbers;

            if (ResultedGrammems.Has(gMasFem))
                ResultedGrammems |= TGramBitSet(gMasculine, gFeminine);

            if (!ResultedGrammems.Has(gPraedic) && ResultedGrammems.HasAll(NSpike::AllCases) && !ResultedGrammems.Has(gSingular))
                ResultedGrammems |= TGramBitSet(gSingular, gPlural);

            H.Grammems.AddForm(ResultedGrammems);
        }
}
コード例 #20
0
bool CCommonParm::ParseConfig(const Stroka& fn) {
    Config.Reset(NProtoConf::LoadFromFile<TTextMinerConfig>(fn));
    if (!Config)
        ythrow yexception() << "Cannot read the config from \"" << fn << "\".";

    if (m_args.has(OPT_BINARY_DIR)) {
        if (Config->has_binarydir() && Config->binarydir().length() > 0)
            ythrow yexception() << "Both \"--" << OPT_BINARY_DIR << "\" command line argument and \"BinaryDir\" config parameter specified";

        Config->set_binarydir(m_args[OPT_BINARY_DIR]);
    }

    if (Config->has_input()) {
        TTextMinerConfig_TInputParams inputParams = Config->input();

        if (inputParams.has_file() && !inputParams.file().empty()
            && inputParams.has_dir() && !inputParams.dir().empty())
            ythrow yexception() << "Input\\File and Input\\Dir options are meaningless together";

        Stroka fn = inputParams.file();
        fn.to_lower();
        if (fn.empty() || "stdin" == fn || "-" == fn)
            m_strInputFileName = "";
        else
            m_strInputFileName = inputParams.file();

        if (inputParams.has_dir()) {
            if (inputParams.has_type())
                ythrow yexception() << "Input\\Type field is meaningless for directory processing";

            m_strSourceType = "dir";
            m_strInputFileName = inputParams.dir();
            m_strDocDir = inputParams.dir();
            if (!PathHelper::IsDir(m_strDocDir))
                ythrow yexception() << "\"" << m_strDocDir << "\" isn't a directory";
        } else {
            if (inputParams.has_type()) {
                switch (inputParams.type()) {
                    case TTextMinerConfig::TInputParams::no:
                        m_strSourceType = "no";
                        break;

                    case TTextMinerConfig::TInputParams::dpl:
                        m_strSourceType = "dpl";
                        break;

                    case TTextMinerConfig::TInputParams::arcview:
                        m_strSourceType = "arcview";
                        break;

                    case TTextMinerConfig::TInputParams::mapreduce:
                        m_strSourceType = "mapreduce";
                        break;

                    case TTextMinerConfig::TInputParams::tar:
                        if (m_strInputFileName.empty())
                            ythrow yexception() << "Please specify Input\\File field in configuration file in order to use .tar archive.";
                        m_strSourceType = "tar";
                        break;

                    case TTextMinerConfig::TInputParams::som:
                        if (m_strInputFileName.empty())
                            ythrow yexception() << "Please specify Input\\File field in configuration file in order to read SOM data.";
                        m_strSourceType = "som";
                        break;

                    case TTextMinerConfig::TInputParams::yarchive:
                        if (m_strInputFileName.empty())
                            ythrow yexception() << "Please specify Input\\File field in configuration file in order to read Yandex archive.";
                        m_strSourceType = "yarchive";
                        break;

                    default:
                        ythrow yexception() << "This type of input isn't supported";
                }
            } else
                m_strSourceType = "no";
        }
    }

    if (NULL == ParserOptions.Get())
        ParserOptions.Reset(new CParserOptions);

    ParserOptions->InitFromConfigObject(*Config.Get());

    return true;
}