ucs4string Regexp::replace(ucs4string& text, ucs4string& subst, bool& matched) { OnigRegion* const region = matchInternal(text); if (NULL == region) { matched = false; return text; } const ucs4string beg = text.substr(0, region->beg[0] / sizeof(ucs4char)); const ucs4string end = text.substr(region->end[0] / sizeof(ucs4char), text.size() - region->end[0] / sizeof(ucs4char)); matched = true; return (beg + subst + end).data(); }
unsigned int ustrtou(const ucs4string& ustr) { unsigned int u = 0; for (size_t i=0; i<ustr.size(); ++i) { ucs4_t ch = ustr[i]; if (ch<'0' || ch>'9') break; u = u * 10 + (ch - '0'); } return u; }
void Segmentation::DoSegment(ucs4string base, double base_rr, ucs4string to_seg, SegmentData& data) { for (size_t i=1; i<to_seg.size(); ++i) { ucs4string us = to_seg.substr(0, i); DoSegment(base + ucs4_t(' ') + us, base_rr*GetRateReciprocal(us), to_seg.substr(i), data); } double rr = base_rr*GetRateReciprocal(to_seg); if (rr > data.m_rr) return; data.m_rr = rr; data.m_res = base + ucs4_t(' ') + to_seg; }
int main(int argc, char *argv[]) { if (argc != 3) { std::cerr << "Usage:" << argv[0] << " <ucs4-phrase-file> <ucs4-dictionary>" << std::endl; exit(1); } std::ifstream fin(argv[1], std::ios::binary); if (!fin.is_open()) { std::cerr << "Failed to open " << argv[1] << " for read." << std::endl; exit(2); } std::ofstream fout(argv[2], std::ios::binary); if (!fout.is_open()) { std::cerr << "Failed to open " << argv[2] << " for write." << std::endl; exit(3); } DictionaryGenerator dg; while (!fin.eof()) { ucs4string s; ucs4getline(fin, s); dg.SetmentPhrase(s); } DictMap::const_iterator it = dg.GetDictionary().begin(); DictMap::const_iterator itend = dg.GetDictionary().end(); for (; it!=itend; ++it) { const ucs4string ustr=it->first; unsigned int cnt=it->second; if (cnt==1 && ustr.size()!=1) continue; ucs4putstr(fout, ustr); ucs4putstr(fout, stdtoustr(" ")); ucs4putstr(fout, utoustr(cnt)); ucs4putch(fout, ucs4_t('\n')); } return 0; }
void ucs4putstr(std::ostream& os, const ucs4string& ustr) { size_t bytes = ustr.size() * 4; os.write(reinterpret_cast<const char *>(ustr.data()), bytes); }
void TextualOutputPort::format(const VM* theVM, const ucs4string& fmt, Object args) { ucs4string buffer = UC(""); for (uint32_t i = 0; i < fmt.size(); i++) { if (fmt[i] == '~') { i++; if (!buffer.empty()) { putString(buffer); buffer.clear(); } switch (fmt[i]) { case '~': display(theVM, Object::makeChar('~')); break; case '%': display(theVM, Object::makeChar('\n')); break; case 'a': case 'A': case 'd': case 'D': { if (args.isPair()) { display(theVM, args.car()); args = args.cdr(); } else { isErrorOccured_ = true; errorMessage_ = "too few arguments for format string"; irritants_ = Pair::list1(Object::makeString(fmt)); return; } break; } case 's': case 'S': { if (args.isPair()) { putDatum(theVM, args.car()); args = args.cdr(); } else { isErrorOccured_ = true; errorMessage_ = "too few arguments for format string"; irritants_ = Pair::list1(Object::makeString(fmt)); return; } break; } case '\0': i--; break; } } else { buffer += fmt[i]; } } if (!buffer.empty()) { putString(buffer); } flush(); //fflush(stdout); // temp return; }
void TextualOutputPort::putString(const ucs4string& s) { for (ucs4string::size_type i = 0; i < s.size(); i++) { putCharHandleSpecial(s[i]); } }