static void TestDict(DictPtr dict) { Optional<DictEntry> entry; entry = dict->MatchPrefix("BYVoid"); AssertTrue(!entry.IsNull()); AssertEquals("BYVoid", entry.Get().key); AssertEquals("byv", entry.Get().GetDefault()); entry = dict->MatchPrefix("BYVoid123"); AssertTrue(!entry.IsNull()); AssertEquals("BYVoid", entry.Get().key); AssertEquals("byv", entry.Get().GetDefault()); entry = dict->MatchPrefix(utf8("積羽沉舟")); AssertTrue(!entry.IsNull()); AssertEquals(utf8("積羽沉舟"), entry.Get().key); AssertEquals(utf8("羣輕折軸"), entry.Get().GetDefault()); entry = dict->MatchPrefix("Unknown"); AssertTrue(entry.IsNull()); const vector<DictEntry> matches = dict->MatchAllPrefixes(utf8("清華大學計算機系")); AssertEquals(3, matches.size()); AssertEquals(utf8("清華大學"), matches.at(0).key); AssertEquals("TsinghuaUniversity", matches.at(0).GetDefault()); AssertEquals(utf8("清華"), matches.at(1).key); AssertEquals("Tsinghua", matches.at(1).GetDefault()); AssertEquals(utf8("清"), matches.at(2).key); AssertEquals("Tsing", matches.at(2).GetDefault()); }
vector<string> MaxMatchSegmentation::Segment(const string& text) { vector<string> segments; vector<string> buffer; auto clearBuffer = [&segments, &buffer]() { if (buffer.size() > 0) { segments.push_back(UTF8Util::Join(buffer)); buffer.clear(); } }; for (const char* pstr = text.c_str(); *pstr != '\0';) { Optional<DictEntry> matched = dict->MatchPrefix(pstr); size_t matchedLength; if (matched.IsNull()) { matchedLength = UTF8Util::NextCharLength(pstr); buffer.push_back(UTF8Util::FromSubstr(pstr, matchedLength)); } else { clearBuffer(); matchedLength = matched.Get().key.length(); segments.push_back(matched.Get().key); } pstr += matchedLength; } clearBuffer(); return segments; }
int main(int argc, const char* argv[]) { try { TCLAP::CmdLine cmd("Open Chinese Convert (OpenCC) Command Line Tool", ' ', VERSION); CmdLineOutput cmdLineOutput; cmd.setOutput(&cmdLineOutput); TCLAP::ValueArg<string> configArg("c", "config", "Configuration file", false /* required */, "s2t.json" /* default */, "file" /* type */, cmd); TCLAP::ValueArg<string> outputArg("o", "output", "Write converted text to", false /* required */, "" /* default */, "file" /* type */, cmd); TCLAP::ValueArg<string> inputArg("i", "input", "Read original text from", false /* required */, "" /* default */, "file" /* type */, cmd); TCLAP::ValueArg<bool> noFlushArg("", "noflush", "Disable flush for every line", false /* required */, false /* default */, "bool" /* type */, cmd); cmd.parse(argc, argv); configFileName = configArg.getValue(); noFlush = noFlushArg.getValue(); if (inputArg.isSet()) { inputFileName = Optional<string>(inputArg.getValue()); } if (outputArg.isSet()) { outputFileName = Optional<string>(outputArg.getValue()); noFlush = true; } converter = config.NewFromFile(configFileName); bool lineByLine = inputFileName.IsNull(); if (lineByLine) { ConvertLineByLine(); } else { Convert(); } } catch (TCLAP::ArgException& e) { std::cerr << "error: " << e.error() << " for arg " << e.argId() << std::endl; } catch (Exception& e) { std::cerr << e.what() << std::endl; } return 0; }
void SqlConnection::DataCommand::BindDouble( SqlConnection::ArgumentIndex position, const Optional<double> &value) { if (value.IsNull()) BindNull(position); else BindDouble(position, *value); }
void SqlConnection::DataCommand::BindInt64( SqlConnection::ArgumentIndex position, const Optional<int64_t> &value) { if (value.IsNull()) BindNull(position); else BindInt64(position, *value); }
FILE* GetOutputStream() { if (outputFileName.IsNull()) { return stdout; } else { FILE* fp = fopen(outputFileName.Get().c_str(), "w"); if (!fp) { throw FileNotWritable(outputFileName.Get()); } return fp; } }
string Conversion::Convert(const string& phrase) const { std::ostringstream buffer; for (const char* pstr = phrase.c_str(); *pstr != '\0';) { Optional<const DictEntry*> matched = dict->MatchPrefix(pstr); size_t matchedLength; if (matched.IsNull()) { matchedLength = UTF8Util::NextCharLength(pstr); buffer << UTF8Util::FromSubstr(pstr, matchedLength); } else { matchedLength = matched.Get()->KeyLength(); buffer << matched.Get()->GetDefault(); } pstr += matchedLength; } return buffer.str(); }
DictEntryPtrVectorPtr MaxMatchSegmentation::Segment(const string& text) { DictEntryPtrVectorPtr segments(new DictEntryPtrVector); const char* pstr = text.c_str(); while (*pstr != '\0') { Optional<DictEntryPtr> matched = dict->MatchPrefix(pstr); size_t matchedLength; if (matched.IsNull()) { matchedLength = UTF8Util::NextCharLength(pstr); segments->push_back(DictEntryPtr(new DictEntry(UTF8Util::FromSubstr(pstr, matchedLength)))); } else { matchedLength = matched.Get()->key.length(); segments->push_back(DictEntryPtr(matched.Get())); } pstr += matchedLength; } return segments; }