Exemple #1
0
 static void TestDict(DictPtr dict) {
   Optional<DictEntry> entry;
   entry = dict->MatchPrefix("BYVoid");
   AssertTrue(!entry.IsNull());
   AssertEquals("BYVoid", entry.Get().key);
   AssertEquals("byv", entry.Get().GetDefault());
   
   entry = dict->MatchPrefix("BYVoid123");
   AssertTrue(!entry.IsNull());
   AssertEquals("BYVoid", entry.Get().key);
   AssertEquals("byv", entry.Get().GetDefault());
   
   entry = dict->MatchPrefix(utf8("積羽沉舟"));
   AssertTrue(!entry.IsNull());
   AssertEquals(utf8("積羽沉舟"), entry.Get().key);
   AssertEquals(utf8("羣輕折軸"), entry.Get().GetDefault());
   
   entry = dict->MatchPrefix("Unknown");
   AssertTrue(entry.IsNull());
   
   const vector<DictEntry> matches = dict->MatchAllPrefixes(utf8("清華大學計算機系"));
   AssertEquals(3, matches.size());
   AssertEquals(utf8("清華大學"), matches.at(0).key);
   AssertEquals("TsinghuaUniversity", matches.at(0).GetDefault());
   AssertEquals(utf8("清華"), matches.at(1).key);
   AssertEquals("Tsinghua", matches.at(1).GetDefault());
   AssertEquals(utf8("清"), matches.at(2).key);
   AssertEquals("Tsing", matches.at(2).GetDefault());
 }
vector<string> MaxMatchSegmentation::Segment(const string& text) {
  vector<string> segments;
  vector<string> buffer;
  auto clearBuffer = [&segments, &buffer]() {
                       if (buffer.size() > 0) {
                         segments.push_back(UTF8Util::Join(buffer));
                         buffer.clear();
                       }
                     };
  for (const char* pstr = text.c_str(); *pstr != '\0';) {
    Optional<DictEntry> matched = dict->MatchPrefix(pstr);
    size_t matchedLength;
    if (matched.IsNull()) {
      matchedLength = UTF8Util::NextCharLength(pstr);
      buffer.push_back(UTF8Util::FromSubstr(pstr, matchedLength));
    } else {
      clearBuffer();
      matchedLength = matched.Get().key.length();
      segments.push_back(matched.Get().key);
    }
    pstr += matchedLength;
  }
  clearBuffer();
  return segments;
}
Exemple #3
0
int main(int argc, const char* argv[]) {
  try {
    TCLAP::CmdLine cmd("Open Chinese Convert (OpenCC) Command Line Tool",
                       ' ',
                       VERSION);
    CmdLineOutput cmdLineOutput;
    cmd.setOutput(&cmdLineOutput);

    TCLAP::ValueArg<string> configArg("c", "config",
                                      "Configuration file",
                                      false /* required */,
                                      "s2t.json" /* default */,
                                      "file" /* type */,
                                      cmd);
    TCLAP::ValueArg<string> outputArg("o", "output",
                                      "Write converted text to",
                                      false /* required */,
                                      "" /* default */,
                                      "file" /* type */,
                                      cmd);
    TCLAP::ValueArg<string> inputArg("i", "input",
                                     "Read original text from",
                                     false /* required */,
                                     "" /* default */,
                                     "file" /* type */,
                                     cmd);
    TCLAP::ValueArg<bool> noFlushArg("", "noflush",
                                     "Disable flush for every line",
                                     false /* required */,
                                     false /* default */,
                                     "bool" /* type */,
                                     cmd);
    cmd.parse(argc, argv);
    configFileName = configArg.getValue();
    noFlush = noFlushArg.getValue();
    if (inputArg.isSet()) {
      inputFileName = Optional<string>(inputArg.getValue());
    }
    if (outputArg.isSet()) {
      outputFileName = Optional<string>(outputArg.getValue());
      noFlush = true;
    }
    converter = config.NewFromFile(configFileName);
    bool lineByLine = inputFileName.IsNull();
    if (lineByLine) {
      ConvertLineByLine();
    } else {
      Convert();
    }
  } catch (TCLAP::ArgException& e) {
    std::cerr << "error: " << e.error()
        << " for arg " << e.argId() << std::endl;
  } catch (Exception& e) {
    std::cerr << e.what() << std::endl;
  }
  return 0;
}
void SqlConnection::DataCommand::BindDouble(
    SqlConnection::ArgumentIndex position,
    const Optional<double> &value)
{
    if (value.IsNull())
        BindNull(position);
    else
        BindDouble(position, *value);
}
void SqlConnection::DataCommand::BindInt64(
    SqlConnection::ArgumentIndex position,
    const Optional<int64_t> &value)
{
    if (value.IsNull())
        BindNull(position);
    else
        BindInt64(position, *value);
}
Exemple #6
0
FILE* GetOutputStream() {
  if (outputFileName.IsNull()) {
    return stdout;
  } else {
    FILE* fp = fopen(outputFileName.Get().c_str(), "w");
    if (!fp) {
      throw FileNotWritable(outputFileName.Get());
    }
    return fp;
  }
}
Exemple #7
0
string Conversion::Convert(const string& phrase) const {
  std::ostringstream buffer;
  for (const char* pstr = phrase.c_str(); *pstr != '\0';) {
    Optional<const DictEntry*> matched = dict->MatchPrefix(pstr);
    size_t matchedLength;
    if (matched.IsNull()) {
      matchedLength = UTF8Util::NextCharLength(pstr);
      buffer << UTF8Util::FromSubstr(pstr, matchedLength);
    } else {
      matchedLength = matched.Get()->KeyLength();
      buffer << matched.Get()->GetDefault();
    }
    pstr += matchedLength;
  }
  return buffer.str();
}
DictEntryPtrVectorPtr MaxMatchSegmentation::Segment(const string& text) {
  DictEntryPtrVectorPtr segments(new DictEntryPtrVector);
  const char* pstr = text.c_str();
  while (*pstr != '\0') {
    Optional<DictEntryPtr> matched = dict->MatchPrefix(pstr);
    size_t matchedLength;
    if (matched.IsNull()) {
      matchedLength = UTF8Util::NextCharLength(pstr);
      segments->push_back(DictEntryPtr(new DictEntry(UTF8Util::FromSubstr(pstr, matchedLength))));
    } else {
      matchedLength = matched.Get()->key.length();
      segments->push_back(DictEntryPtr(matched.Get()));
    }
    pstr += matchedLength;
  }
  return segments;
}