Exemplo n.º 1
0
void Convert() {
  const int BUFFER_SIZE = 1024 * 1024;
  static bool bufferInitialized = false;
  static string buffer;
  static char* bufferBegin;
  static const char* bufferEnd;
  static char* bufferPtr;
  static size_t bufferSizeAvailble;
  if (!bufferInitialized) {
    bufferInitialized = true;
    buffer.resize(BUFFER_SIZE + 1);
    bufferBegin = const_cast<char*>(buffer.c_str());
    bufferEnd = buffer.c_str() + BUFFER_SIZE;
    bufferPtr = bufferBegin;
    bufferSizeAvailble = BUFFER_SIZE;
  }

  FILE* fin = fopen(inputFileName.Get().c_str(), "r");
  if (!fin) {
    throw FileNotFound(inputFileName.Get());
  }
  FILE* fout = GetOutputStream();
  while (!feof(fin)) {
    size_t length = fread(bufferPtr, sizeof(char), bufferSizeAvailble, fin);
    bufferPtr[length] = '\0';
    size_t remainingLength = 0;
    string remainingTemp;
    if (length == bufferSizeAvailble) {
      // fread may breaks UTF8 character
      // Find the end of last character
      char* lastChPtr = bufferBegin;
      while (lastChPtr < bufferEnd) {
        size_t nextCharLen = UTF8Util::NextCharLength(lastChPtr);
        if (lastChPtr + nextCharLen > bufferEnd) {
          break;
        }
        lastChPtr += nextCharLen;
      }
      remainingLength = bufferEnd - lastChPtr;
      if (remainingLength > 0) {
        remainingTemp = UTF8Util::FromSubstr(lastChPtr, remainingLength);
        *lastChPtr = '\0';
      }
    }
    // Perform conversion
    const string& converted = converter->Convert(buffer);
    fputs(converted.c_str(), fout);
    if (!noFlush) {
      // Flush every line if the output stream is stdout.
      fflush(fout);
    }
    // Reset pointer
    bufferPtr = bufferBegin + remainingLength;
    bufferSizeAvailble = BUFFER_SIZE - remainingLength;
    if (remainingLength > 0) {
      strncpy(bufferBegin, remainingTemp.c_str(), remainingLength);
    }
  }
  fclose(fout);
}
Exemplo n.º 2
0
vector<string> MaxMatchSegmentation::Segment(const string& text) {
  vector<string> segments;
  vector<string> buffer;
  auto clearBuffer = [&segments, &buffer]() {
                       if (buffer.size() > 0) {
                         segments.push_back(UTF8Util::Join(buffer));
                         buffer.clear();
                       }
                     };
  for (const char* pstr = text.c_str(); *pstr != '\0';) {
    Optional<DictEntry> matched = dict->MatchPrefix(pstr);
    size_t matchedLength;
    if (matched.IsNull()) {
      matchedLength = UTF8Util::NextCharLength(pstr);
      buffer.push_back(UTF8Util::FromSubstr(pstr, matchedLength));
    } else {
      clearBuffer();
      matchedLength = matched.Get().key.length();
      segments.push_back(matched.Get().key);
    }
    pstr += matchedLength;
  }
  clearBuffer();
  return segments;
}
Exemplo n.º 3
0
FILE* GetOutputStream() {
  if (outputFileName.IsNull()) {
    return stdout;
  } else {
    FILE* fp = fopen(outputFileName.Get().c_str(), "w");
    if (!fp) {
      throw FileNotWritable(outputFileName.Get());
    }
    return fp;
  }
}
Exemplo n.º 4
0
 Optional(Optional const& other)
 {
   if (other.valid_)
   {
     Construct(other.Get());
   }
 }
Exemplo n.º 5
0
string Conversion::Convert(const string& phrase) const {
  std::ostringstream buffer;
  for (const char* pstr = phrase.c_str(); *pstr != '\0';) {
    Optional<const DictEntry*> matched = dict->MatchPrefix(pstr);
    size_t matchedLength;
    if (matched.IsNull()) {
      matchedLength = UTF8Util::NextCharLength(pstr);
      buffer << UTF8Util::FromSubstr(pstr, matchedLength);
    } else {
      matchedLength = matched.Get()->KeyLength();
      buffer << matched.Get()->GetDefault();
    }
    pstr += matchedLength;
  }
  return buffer.str();
}
Exemplo n.º 6
0
DictEntryPtrVectorPtr MaxMatchSegmentation::Segment(const string& text) {
  DictEntryPtrVectorPtr segments(new DictEntryPtrVector);
  const char* pstr = text.c_str();
  while (*pstr != '\0') {
    Optional<DictEntryPtr> matched = dict->MatchPrefix(pstr);
    size_t matchedLength;
    if (matched.IsNull()) {
      matchedLength = UTF8Util::NextCharLength(pstr);
      segments->push_back(DictEntryPtr(new DictEntry(UTF8Util::FromSubstr(pstr, matchedLength))));
    } else {
      matchedLength = matched.Get()->key.length();
      segments->push_back(DictEntryPtr(matched.Get()));
    }
    pstr += matchedLength;
  }
  return segments;
}
Exemplo n.º 7
0
 static void TestDict(DictPtr dict) {
   Optional<DictEntry> entry;
   entry = dict->MatchPrefix("BYVoid");
   AssertTrue(!entry.IsNull());
   AssertEquals("BYVoid", entry.Get().key);
   AssertEquals("byv", entry.Get().GetDefault());
   
   entry = dict->MatchPrefix("BYVoid123");
   AssertTrue(!entry.IsNull());
   AssertEquals("BYVoid", entry.Get().key);
   AssertEquals("byv", entry.Get().GetDefault());
   
   entry = dict->MatchPrefix(utf8("積羽沉舟"));
   AssertTrue(!entry.IsNull());
   AssertEquals(utf8("積羽沉舟"), entry.Get().key);
   AssertEquals(utf8("羣輕折軸"), entry.Get().GetDefault());
   
   entry = dict->MatchPrefix("Unknown");
   AssertTrue(entry.IsNull());
   
   const vector<DictEntry> matches = dict->MatchAllPrefixes(utf8("清華大學計算機系"));
   AssertEquals(3, matches.size());
   AssertEquals(utf8("清華大學"), matches.at(0).key);
   AssertEquals("TsinghuaUniversity", matches.at(0).GetDefault());
   AssertEquals(utf8("清華"), matches.at(1).key);
   AssertEquals("Tsinghua", matches.at(1).GetDefault());
   AssertEquals(utf8("清"), matches.at(2).key);
   AssertEquals("Tsing", matches.at(2).GetDefault());
 }
void test_optional(){
  using namespace faint;

  // Helper constants for testing the Optional
  const IntSize altSize = IntSize(5,5);
  const IntSize bmpSize = IntSize(10,10);
  const Bitmap alt(altSize);
  const Bitmap bmp(bmpSize);
  VERIFY(alt.GetSize() != bmp.GetSize());

  // Uninitialized optional (not set).
  Optional<Bitmap> optional;
  VERIFY(optional.NotSet());
  VERIFY(!optional.IsSet());
  VERIFY(!optional);

  static_assert(is_true<decltype(has_or(optional))>(),
    "Optional of value type lacks Or-method");
  EQUAL(optional.Or(alt).GetSize(), altSize);
  optional.IfSet(FAIL_IF_CALLED());
  optional.Visit(FAIL_IF_CALLED(), FAIL_UNLESS_CALLED());

  optional.Set(bmp);
  VERIFY(!optional.NotSet());
  VERIFY(optional.IsSet());
  VERIFY(optional);
  EQUAL(optional.Or(alt).GetSize(), bmpSize);
  optional.IfSet(FAIL_UNLESS_CALLED());
  optional.Visit(FAIL_UNLESS_CALLED(), FAIL_IF_CALLED());
  IntSize sz = optional.VisitSimple(
    [](const Bitmap& bmp){
      return bmp.GetSize();
    },
    alt.GetSize());
  EQUAL(sz, bmpSize);
  EQUAL(optional.Get().GetSize(), bmpSize);

  // Take the object (clearing the optional)
  Bitmap bmp2 = optional.Take();
  EQUAL(bmp2.GetSize(), bmp.GetSize());
  VERIFY(optional.NotSet());

  // Initializing construction
  Optional<Bitmap> optional2(bmp2);
  VERIFY(optional2.IsSet());
  EQUAL(optional2.Get().GetSize(), bmp.GetSize());
  optional2.Clear();
  VERIFY(optional2.NotSet());

  // Reference
  int i = 7;
  Optional<int&> oi(i);
  oi.Get() = 8;

  EQUAL(i, 8);
  static_assert(is_false<decltype(has_or(oi))>(),
    "Optional of reference type has Or-method.");

  // Non-reference
  int j = 7;
  Optional<int> oj(j);
  oj.Get() = 8;
  EQUAL(j, 7);

  // Ensure that copying a reference-Optional
  // does not copy the contained value.
  FailIfCopied f(10);
  Optional<FailIfCopied&> o(f);
  o.Get().value++;
  EQUAL(o.Get().value, 11);

  Optional<FailIfCopied&> o2(o);
  VERIFY(o2.IsSet());
  o2.Get().value++;
  EQUAL(o.Get().value, 12);

  IntHolder h(12);
  EQUAL(h.value, 12);
  h.Get(true).Get()++;
  EQUAL(h.value, 13);
  EQUAL(h.Get(true).Get(), 13);
}