int CHunpinSegmentor::_encode(const char* buf,int ret) { CMappedYin syls; syls.reserve(8); s_shpData.getMapString(buf, syls); if (syls.empty()) return -1; CMappedYin::const_iterator iter = syls.begin(); CMappedYin::const_iterator iter_end = syls.end(); m_segs.push_back (TSegment (0, 0, 1, IPySegmentor::SYLLABLE)); TSegment &s = m_segs.back(); s.m_len = 2; s.m_start = ret; s.m_syllables.clear(); s.m_type = IPySegmentor::SYLLABLE; for (; iter!=iter_end; iter++) { s.m_syllables.push_back(s_shpData.encodeSyllable(iter->c_str())); } return s.m_start; }
int CShuangpinSegmentor::_encode(const char* buf, char ch, bool isComplete) { CMappedYin syls; syls.reserve(8); s_shpData.getMapString(buf, syls); if (syls.empty()) return -1; const int len = m_pystr.size(); CMappedYin::const_iterator iter = syls.begin(); CMappedYin::const_iterator iter_end = syls.end(); if (isComplete) { TSegment &s = m_segs.back(); s.m_len = 2; s.m_start = len - s.m_len; s.m_syllables.clear(); s.m_type = IPySegmentor::SYLLABLE; for (; iter!=iter_end; iter++) { s.m_syllables.push_back(s_shpData.encodeSyllable(iter->c_str())); } m_nLastValidPos += 1; return s.m_start; } else { TSegment s; s.m_len = 1; s.m_start = len - s.m_len; m_nLastValidPos += 1; for (; iter != iter_end; ++iter) { TSyllable syl = s_shpData.encodeSyllable(iter->c_str()); if ((int)syl != 0) { s.m_syllables.push_back(syl); m_segs.push_back (s); } else { m_segs.push_back (TSegment (ch, s.m_start, 1, IPySegmentor::STRING)); } } return s.m_start; } }