Ejemplo n.º 1
0
void NormalizeUtf8::normalizeNfc(std::string& str) const {
  SDL_TRACE(NormalizeUtf8, "unnormalized: '" << str << "' #bytes=" << str.size()
                                             << " #unicode=" << utf8length(str));
  if (str.empty()) return;
  FixedUtf8 fixed(str);
  fixed.moveTo(str);
  SDL_TRACE(NormalizeUtf8, "fixed-utf8: '" << str << "' #bytes=" << str.size() << " #unicode=" << utf8length(str));
  NfcOptions::normalize(str);
  SDL_TRACE(NormalizeUtf8, "nfc-normalized: '" << str << "' #bytes=" << str.size()
                                               << " #unicode=" << utf8length(str));
}
Ejemplo n.º 2
0
Position stringToHypergraph(Strings const& inputTokens, IMutableHypergraph<Arc>* pHgResult,
                            StringToHypergraphOptions const& opts = StringToHypergraphOptions(),
                            TokenWeights const& inputWeights = TokenWeights()) {
  IVocabularyPtr const& pVoc = pHgResult->getVocabulary();
  if (!pVoc) SDL_THROW_LOG(Hypergraph, InvalidInputException, "pHgResult hypergraph must contain vocabulary");
  for (std::size_t i = 0, numNonlexicalStates = inputTokens.size() + 1; i < numNonlexicalStates; ++i)
    pHgResult->addState();
  pHgResult->setStart(0);
  StateId prevState = 0;

  typedef typename Arc::Weight Weight;
  typedef FeatureInsertFct<Weight> FI;
  Position i = 0, n = inputTokens.size();
  for (; i != n; ++i) {
    std::string const& token = inputTokens[i];
    SDL_TRACE(Hypergraph.StringToHypergraph, i << ": " << token);
    const Sym sym = opts.terminalMaybeUnk(pVoc.get(), token);
    const StateId nextState = prevState + 1;
    Arc* pArc = new Arc(nextState, Tails(prevState, pHgResult->addState(sym)));
    Weight& weight = pArc->weight();
    assert(opts.inputFeatures != NULL);
    for (FeatureId featureId : opts.inputFeatures->getFeaturesForInputPosition(i)) {
      FI::insertNew(&weight, featureId, 1);
      if (opts.tokens) opts.tokens->insert(sym, featureId);
    }
    inputWeights.reweight(i, weight);
    pHgResult->addArc(pArc);
    prevState = nextState;
  }
  pHgResult->setFinal(prevState);
  return n;
}
Ejemplo n.º 3
0
void NormalizeUtf8::normalize(std::string& str, Constraints& c) const {
  normalizeNfc(str, c);
  if (constraintsIndexUnicodes(c) && FixUnicode::removeControlChars)
    SDL_THROW_LOG(NormalizeUtf8, ConfigException,
                  "remove-control-characters can't be true when using constraints (yet)");
  FixUnicode::normalize(str, true);
  SDL_TRACE(NormalizeUtf8, "windows-1252-replaced: '" << str << "' #bytes=" << str.size()
                                                      << " #unicode=" << utf8length(str));
}
Ejemplo n.º 4
0
inline std::size_t visitChompedLines(std::istream& in, StringConsumer const& consumer,
                                     NormalizeUtf8 const& opt = NormalizeUtf8()) {
  std::string line;
  std::size_t nlines = 0;
  while (opt.getlineNormalized(in, line)) {
    chomp(line);
    ++nlines;
    SDL_TRACE(Util.visitChompedLinesUntil, ":" << nlines << ": " << line);
    consumer(line);
  }
  return nlines;
}
Ejemplo n.º 5
0
inline std::size_t visitChompedLinesUntil(std::istream& in, std::string const& separatorLine,
                                          StringConsumer const& consumer,
                                          NormalizeUtf8 const& opt = NormalizeUtf8()) {
  std::string line;
  std::size_t nlines = 0;
  while (opt.getlineNormalized(in, line)) {
    chomp(line);
    if (line == separatorLine) break;
    ++nlines;
    SDL_TRACE(Util.visitChompedLinesUntil, "until " << separatorLine << ":" << nlines << ": " << line);
    consumer(line);
  }
  return nlines;
}
Ejemplo n.º 6
0
file_map_detail::view_of_file 
file_map_detail::map_view_of_file(const char* filename,
                                  uint64 const offset,  
                                  uint64 const size)
{
    A_STATIC_ASSERT_64_BIT;

    SDL_ASSERT(size);
    SDL_ASSERT(!offset); // current limitation
    SDL_ASSERT(offset < size);

    if (size && (offset < size) && !offset) {

        filesize_64 fsize = {};
        fsize.size = size;

        static_assert(sizeof(DWORD) == 4, "");
        static_assert(sizeof(fsize.size) == 8, "");
        static_assert(sizeof(fsize.d.lo) == 4, "");
        static_assert(sizeof(fsize.d.hi) == 4, "");
        SDL_ASSERT(fsize.d.lo);

        ReadFileHandler file(filename);
        if (!file.is_open()) {
            SDL_TRACE("failed to open file: ", filename);
            SDL_ASSERT(false);
            return nullptr;
        }

        // If the function fails, the return value is NULL
        HANDLE hFileMapping = ::CreateFileMapping(
            file.get(),
            nullptr,
            PAGE_READONLY,
            fsize.d.hi,  // dwMaximumSizeHigh,
            fsize.d.lo,  // dwMaximumSizeLow,
            nullptr);

        if (!hFileMapping) {
            SDL_TRACE("CreateFileMapping failed : ", filename);
            SDL_TRACE("GetLastError = ", GetLastError());
            SDL_ASSERT(false);
            return nullptr;
        }

        auto pFileView = ::MapViewOfFile(
            hFileMapping,
            FILE_MAP_READ,
            0, 0,           // file offset where the view begins
            0);             // mapping extends from the specified offset to the end of the file mapping.

        ::CloseHandle(hFileMapping);

        if (!pFileView) {
            SDL_TRACE("MapViewOfFile failed : ", filename);
            SDL_TRACE("GetLastError = ", GetLastError());
            SDL_TRACE((GetLastError() == ERROR_NOT_ENOUGH_MEMORY) ? "ERROR_NOT_ENOUGH_MEMORY" : "");
            SDL_ASSERT(false);
        }
        static_assert(std::is_same<view_of_file, decltype(pFileView)>::value, "");
        return pFileView;
    }
    return nullptr;
}
Ejemplo n.º 7
0
std::size_t MemoryInfo::size() {
  SDL_TRACE(MemoryInfo, "MemoryInfo::size() not yet supported on Apple.");
  return 0;
}