size_t IsoffMainParser::parseSegmentBase(Node * segmentBaseNode, SegmentInformation *info) { SegmentBase *base; if(!segmentBaseNode || !(base = new (std::nothrow) SegmentBase(info))) return 0; if(segmentBaseNode->hasAttribute("indexRange")) { size_t start = 0, end = 0; if (std::sscanf(segmentBaseNode->getAttributeValue("indexRange").c_str(), "%zu-%zu", &start, &end) == 2) { IndexSegment *index = new (std::nothrow) DashIndexSegment(info); if(index) { index->setByteRange(start, end); base->indexSegment.Set(index); /* index must be before data, so data starts at index end */ base->setByteRange(end + 1, 0); } } } parseInitSegment(DOMHelper::getFirstChildElementByName(segmentBaseNode, "Initialization"), base, info); info->setSegmentBase(base); return 1; }
virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res)const { if(begin == end) { return false; } vector<Unicode> uRes; uRes.reserve(end - begin); if (!cut(begin, end, uRes)) { return false; } size_t offset = res.size(); res.resize(res.size() + uRes.size()); for(size_t i = 0; i < uRes.size(); i ++, offset++) { if(!TransCode::encode(uRes[i], res[offset])) { Rcout<<"encode failed."<<std::endl; } } return true; }
bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res) const { assert(_getInitFlag()); if (begin >= end) { LogError("begin >= end"); return false; } vector<Unicode> uRes; if (!cut(begin, end, uRes)) { LogError("get unicode cut result error."); return false; } string tmp; for (vector<Unicode>::const_iterator uItr = uRes.begin(); uItr != uRes.end(); uItr++) { if (TransCode::encode(*uItr, tmp)) { res.push_back(tmp); } else { LogError("encode failed."); } } return true; }
virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res)const { assert(_getInitFlag()); if(begin == end) { return false; } vector<Unicode> words; words.reserve(end - begin); if(!cut(begin, end, words)) { return false; } size_t offset = res.size(); res.resize(res.size() + words.size()); for(size_t i = 0; i < words.size(); i++) { if(!TransCode::encode(words[i], res[i + offset])) { LogError("encode failed."); res[i + offset].clear(); } } return true; }
virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res)const { assert(_getInitFlag()); if(begin == end) { return false; } vector<Unicode> words; if(!cut(begin, end, words)) { return false; } string word; for(size_t i = 0; i < words.size(); i++) { if(TransCode::encode(words[i], word)) { res.push_back(word); } else { LogError("encode failed."); } } return true; }
bool cut(const string& sentence, vector<string>& words, size_t max_word_len) const { Unicode unicode; if (!TransCode::decode(sentence, unicode)) { return false; } vector<Unicode> unicodeWords; cut(unicode.begin(), unicode.end(), unicodeWords, max_word_len); words.resize(unicodeWords.size()); for (size_t i = 0; i < words.size(); i++) { TransCode::encode(unicodeWords[i], words[i]); } return true; }
virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res)const { if(begin == end) { return false; } vector<Unicode> words; words.reserve(end - begin); if(!cut(begin, end, words)) { return false; } size_t offset = res.size(); res.resize(res.size() + words.size()); for(size_t i = 0; i < words.size(); i++) { TransCode::encode(words[i], res[offset + i]); } return true; }
virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res)const { assert(_getInitFlag()); vector<TrieNodeInfo> segWordInfos; if(!cut(begin, end, segWordInfos)) { return false; } string tmp; for(uint i = 0; i < segWordInfos.size(); i++) { if(TransCode::encode(segWordInfos[i].word, tmp)) { res.push_back(tmp); } else { LogError("encode failed."); } } return true; }