示例#1
0
// ----------------------------------------------
// PLAIN DECODER
// ----------------------------------------------
SegmentDecoder::SegmentDecoder(
        Model *model,
        int agenda) {
    this->m_Model  = model;
    this->m_Agenda = agenda;
    Alphabet *labelAlpha = model->getAlphabet("LABELS");
    this->m_NumLabels = labelAlpha->size();

    m_Legal = new int *[m_NumLabels + 1];
    for (int i = 0; i <= m_NumLabels; ++ i) {
        char prev = 'X';
        if (i < m_NumLabels)
            prev = labelAlpha->rlookup(i)[0];

        m_Legal[i] = new int[m_NumLabels];
        for (int j = 0; j < m_NumLabels; ++ j) {
            char curr = labelAlpha->rlookup(j)[0];

            m_Legal[i][j] = 0;
            if ((prev == 'X' || prev == 'S' || prev == 'E') 
                    && (curr == 'S' || curr == 'B'))
                m_Legal[i][j] = 1;
            if ((prev == 'M' || prev == 'B') && 
                    (curr == 'M' || curr == 'E'))
                m_Legal[i][j] = 1;
        }
    }
}
示例#2
0
int OTWS_Wordseg_x(otws_t handle, 
        const string& sent,
        vector<string>& words) {

    OTWS_Engine *engine = reinterpret_cast<OTWS_Engine *>(handle);
    words.clear();

    RawSentence *tag_sent = new TagSent();
    vector<string> chars;
    int numChars = UTF::getCharactersFromUTF8String(sent, &chars);

    // something for debug.
    cerr << "TOKEN: ";
    for (int i = 0; i < numChars; ++ i) {
        cerr << chars[i] << " | ";
        tag_sent->append(new TagItem(chars[i], "X"));
    }
    cerr << endl;

    Instance *inst = (engine)->extractor->extract(tag_sent, false);
    Items *items = inst->items();

    Labels* labels = engine->decoder->decode(inst,
            engine->model->getParameter("PARAMETER"))->best();

    Alphabet *labelsDict = engine->model->getAlphabet("LABELS");

    // something for debug.
    for (int i = 0; i < labels->size(); ++ i) {
        cerr << labelsDict->rlookup(labels->at(i)) << 
            "(" << labels->at(i) << ") ";
    }
    cerr << "| label size: " << labels->size() << endl;

    string tag;
    string word;

    for (int i = 0; i < items->size(); ) {
        tag = labelsDict->rlookup(labels->at(i));

        if ("S" == tag) {
            word = chars[i];
            words.push_back(word);
            ++ i;
        } else if ("B" == tag) {
            word = "";
            while ("E" != tag && i < items->size()){
                word = word + chars[i];
                tag = labelsDict->rlookup(labels->at(i));
                ++ i;
            }
            words.push_back(word);
        } else {
            cerr << "Exception asserted." << endl;
            words.clear();
            return -1;
        }
    }

    return words.size();
}