Exemplo n.º 1
0
void TStrParser::WhoAmI(const TStr& intro) const {
    switch (Type.Val) {
        case 0: printf("%s: Words [AlphN = %d]\n", intro.CStr(), GetAlphabetSize()); break;
        case 1: printf("%s: Syllabels  [AlphN = %d]\n", intro.CStr(), GetAlphabetSize()); break;
        case 2: 
            printf("%s: Chararcters [AlphN = %d]\n", intro.CStr(), GetAlphabetSize()); 
            for (int i = 0; i < WordToIdH.Len(); i++) {
                printf("[%s]  ", WordToIdH.GetKey(i).CStr());
            }
            printf("\n");
            break;
    }
}
Exemplo n.º 2
0
void TStrParser::GetIDFWeightV(TFltV& WeightV) {
    int AlphN = GetAlphabetSize();
    WeightV.Gen(AlphN);
    for (int AlphC = 0; AlphC < AlphN; AlphC++) 
        WeightV[AlphC] = log((double)DocsParsed / WordToIdH[AlphC]);
    double MaxVal = WeightV[WeightV.GetMxValN()];
    for (int AlphC = 0; AlphC < AlphN; AlphC++) 
        WeightV[AlphC] /= MaxVal;
}
Exemplo n.º 3
0
void CTrieHolder::UpdatePossibleOutputSymbols (const yset<size_t>& CurrentStates, yvector<bool>& PossibleOutputSymbols) const
{
    PossibleOutputSymbols.resize(GetAlphabetSize(), false);

    for (yset<size_t>::const_iterator it = CurrentStates.begin();
            it != CurrentStates.end();
            it++
        ) {
        UpdatePossibleOutputSymbolsbyOnState(*it, PossibleOutputSymbols);

        for (int r = m_Nodes[(*it)].m_FailureFunction; r != -1; r = m_Nodes[r].m_FailureFunction)
            UpdatePossibleOutputSymbolsbyOnState(r, PossibleOutputSymbols);
    };

};
Exemplo n.º 4
0
void CTrieHolder::ConvertAuxChildrenToNormal()
{
    m_Children.clear();
    m_Children.reserve(m_ChildrenAux.size());
    for (size_t NodeNo=0; NodeNo < m_Nodes.size(); NodeNo++) {
        m_Nodes[NodeNo].m_ChildrenIndex = m_Children.size();
        for (size_t i=0; i<GetAlphabetSize(); i++)
            if (GetChildrenAux(NodeNo)[i] != -1) {
                CTrieRelation R;
                R.m_ChildNo = GetChildrenAux(NodeNo)[i];
                R.m_RelationChar = i;
                m_Children.push_back(R);
            };
    };
    m_ChildrenAux.clear();
};
Exemplo n.º 5
0
//#pragma optimize( "", off )
void CTrieHolder::CreateChildrenSequence(CTSI begin, CTSI end, size_t ParentNo, size_t WorkRuleNo)
{
    assert (begin < end);

    //  creating a child
    CTrieNode T;
    T.m_Parent = ParentNo;
    T.m_Depth =  m_Nodes[ParentNo].m_Depth+1;
    T.m_IncomingSymbol = *begin;
    assert (T.m_IncomingSymbol < (int)GetAlphabetSize());
    AddNode(T);

    //  registering this child
    size_t ChildNo = m_Nodes.size() - 1;
    assert (GetChildrenAux(ParentNo)[T.m_IncomingSymbol] == -1);
    GetChildrenAux(ParentNo)[T.m_IncomingSymbol] = ChildNo;

    //  inserting the next child
    if (end - begin > 1)
        CreateChildrenSequence(begin+1, end, ChildNo, WorkRuleNo);
    else
        m_Nodes[ChildNo].m_GrammarRuleNo = WorkRuleNo;
};
Exemplo n.º 6
0
void CTrieHolder::CreateTrie(const yset< CWorkRule >& Patterns)
{
    assert(!Patterns.empty());
    m_Nodes.clear();
    m_ChildrenAux.clear();
    m_Nodes.reserve(2*Patterns.size());
    m_ChildrenAux.reserve(2*Patterns.size()*GetAlphabetSize());

    //  inserting root
    AddNode(CTrieNode ());

    yset< CWorkRule >::const_iterator iter, prev_iter;
    iter = prev_iter = Patterns.begin();
    size_t RuleNo = 0;
    CreateChildrenSequence(iter->m_RightPart.m_Items.begin(), iter->m_RightPart.m_Items.end(),  0, RuleNo);
    RuleNo++;

    for (iter++; iter != Patterns.end(); iter++, RuleNo++) {
        const CWorkRule& P = *iter;
        assert (!P.m_RightPart.m_Items.empty());

        //  Starter should be the node of the previous pattern, from which we should start
        //  current sequence.

        //  Example1:
        //  Previous = abcd
        //  Current  = abd
        //  We have graph (1) -a-> (2) -b-> (3) -c-> (4) -d-> (5)
        //  Starter should be pointed to node 3.

        //  Example2:
        //  Previous = abc
        //  Current  = abcd
        //  We have graph (1) -a-> (2) -b-> (3) -c-> (4)
        //  Starter should be pointed to node 4.

        size_t Starter = 0;
        size_t CharNo =0;
        for (; CharNo < P.m_RightPart.m_Items.size(); CharNo++) {
            if  ((CharNo == prev_iter->m_RightPart.m_Items.size())
                    ||  (P.m_RightPart.m_Items[CharNo] !=  (*prev_iter).m_RightPart.m_Items[CharNo])
                )
            break;

            Starter = GetChildrenAux(Starter)[P.m_RightPart.m_Items[CharNo]];
            assert ((int)Starter != -1);
        };

        if  (CharNo < P.m_RightPart.m_Items.size()) {

            CreateChildrenSequence(P.m_RightPart.m_Items.begin()+CharNo, P.m_RightPart.m_Items.end(),   Starter, RuleNo);
        } else {
            assert (P.m_RightPart.m_Items.size() ==  prev_iter->m_RightPart.m_Items.size());
            // a grammar can has structural ambiguity, which causes dublicates  in patterns
            //ErrorMessage( "a dublicate is found");
        };

        prev_iter = iter;
    };

    ConvertAuxChildrenToNormal();
};
Exemplo n.º 7
0
void CTrieHolder::AddNode(const CTrieNode& T)
{
    m_Nodes.push_back(T);
    m_ChildrenAux.insert(m_ChildrenAux.end(),  GetAlphabetSize(), -1);
};