// ---------------------------------------------- // PLAIN DECODER // ---------------------------------------------- SegmentDecoder::SegmentDecoder( Model *model, int agenda) { this->m_Model = model; this->m_Agenda = agenda; Alphabet *labelAlpha = model->getAlphabet("LABELS"); this->m_NumLabels = labelAlpha->size(); m_Legal = new int *[m_NumLabels + 1]; for (int i = 0; i <= m_NumLabels; ++ i) { char prev = 'X'; if (i < m_NumLabels) prev = labelAlpha->rlookup(i)[0]; m_Legal[i] = new int[m_NumLabels]; for (int j = 0; j < m_NumLabels; ++ j) { char curr = labelAlpha->rlookup(j)[0]; m_Legal[i][j] = 0; if ((prev == 'X' || prev == 'S' || prev == 'E') && (curr == 'S' || curr == 'B')) m_Legal[i][j] = 1; if ((prev == 'M' || prev == 'B') && (curr == 'M' || curr == 'E')) m_Legal[i][j] = 1; } } }
int OTWS_Wordseg_x(otws_t handle, const string& sent, vector<string>& words) { OTWS_Engine *engine = reinterpret_cast<OTWS_Engine *>(handle); words.clear(); RawSentence *tag_sent = new TagSent(); vector<string> chars; int numChars = UTF::getCharactersFromUTF8String(sent, &chars); // something for debug. cerr << "TOKEN: "; for (int i = 0; i < numChars; ++ i) { cerr << chars[i] << " | "; tag_sent->append(new TagItem(chars[i], "X")); } cerr << endl; Instance *inst = (engine)->extractor->extract(tag_sent, false); Items *items = inst->items(); Labels* labels = engine->decoder->decode(inst, engine->model->getParameter("PARAMETER"))->best(); Alphabet *labelsDict = engine->model->getAlphabet("LABELS"); // something for debug. for (int i = 0; i < labels->size(); ++ i) { cerr << labelsDict->rlookup(labels->at(i)) << "(" << labels->at(i) << ") "; } cerr << "| label size: " << labels->size() << endl; string tag; string word; for (int i = 0; i < items->size(); ) { tag = labelsDict->rlookup(labels->at(i)); if ("S" == tag) { word = chars[i]; words.push_back(word); ++ i; } else if ("B" == tag) { word = ""; while ("E" != tag && i < items->size()){ word = word + chars[i]; tag = labelsDict->rlookup(labels->at(i)); ++ i; } words.push_back(word); } else { cerr << "Exception asserted." << endl; words.clear(); return -1; } } return words.size(); }