void NormalizeUtf8::normalizeNfc(std::string& str) const { SDL_TRACE(NormalizeUtf8, "unnormalized: '" << str << "' #bytes=" << str.size() << " #unicode=" << utf8length(str)); if (str.empty()) return; FixedUtf8 fixed(str); fixed.moveTo(str); SDL_TRACE(NormalizeUtf8, "fixed-utf8: '" << str << "' #bytes=" << str.size() << " #unicode=" << utf8length(str)); NfcOptions::normalize(str); SDL_TRACE(NormalizeUtf8, "nfc-normalized: '" << str << "' #bytes=" << str.size() << " #unicode=" << utf8length(str)); }
Position stringToHypergraph(Strings const& inputTokens, IMutableHypergraph<Arc>* pHgResult, StringToHypergraphOptions const& opts = StringToHypergraphOptions(), TokenWeights const& inputWeights = TokenWeights()) { IVocabularyPtr const& pVoc = pHgResult->getVocabulary(); if (!pVoc) SDL_THROW_LOG(Hypergraph, InvalidInputException, "pHgResult hypergraph must contain vocabulary"); for (std::size_t i = 0, numNonlexicalStates = inputTokens.size() + 1; i < numNonlexicalStates; ++i) pHgResult->addState(); pHgResult->setStart(0); StateId prevState = 0; typedef typename Arc::Weight Weight; typedef FeatureInsertFct<Weight> FI; Position i = 0, n = inputTokens.size(); for (; i != n; ++i) { std::string const& token = inputTokens[i]; SDL_TRACE(Hypergraph.StringToHypergraph, i << ": " << token); const Sym sym = opts.terminalMaybeUnk(pVoc.get(), token); const StateId nextState = prevState + 1; Arc* pArc = new Arc(nextState, Tails(prevState, pHgResult->addState(sym))); Weight& weight = pArc->weight(); assert(opts.inputFeatures != NULL); for (FeatureId featureId : opts.inputFeatures->getFeaturesForInputPosition(i)) { FI::insertNew(&weight, featureId, 1); if (opts.tokens) opts.tokens->insert(sym, featureId); } inputWeights.reweight(i, weight); pHgResult->addArc(pArc); prevState = nextState; } pHgResult->setFinal(prevState); return n; }
void NormalizeUtf8::normalize(std::string& str, Constraints& c) const { normalizeNfc(str, c); if (constraintsIndexUnicodes(c) && FixUnicode::removeControlChars) SDL_THROW_LOG(NormalizeUtf8, ConfigException, "remove-control-characters can't be true when using constraints (yet)"); FixUnicode::normalize(str, true); SDL_TRACE(NormalizeUtf8, "windows-1252-replaced: '" << str << "' #bytes=" << str.size() << " #unicode=" << utf8length(str)); }
inline std::size_t visitChompedLines(std::istream& in, StringConsumer const& consumer, NormalizeUtf8 const& opt = NormalizeUtf8()) { std::string line; std::size_t nlines = 0; while (opt.getlineNormalized(in, line)) { chomp(line); ++nlines; SDL_TRACE(Util.visitChompedLinesUntil, ":" << nlines << ": " << line); consumer(line); } return nlines; }
inline std::size_t visitChompedLinesUntil(std::istream& in, std::string const& separatorLine, StringConsumer const& consumer, NormalizeUtf8 const& opt = NormalizeUtf8()) { std::string line; std::size_t nlines = 0; while (opt.getlineNormalized(in, line)) { chomp(line); if (line == separatorLine) break; ++nlines; SDL_TRACE(Util.visitChompedLinesUntil, "until " << separatorLine << ":" << nlines << ": " << line); consumer(line); } return nlines; }
file_map_detail::view_of_file file_map_detail::map_view_of_file(const char* filename, uint64 const offset, uint64 const size) { A_STATIC_ASSERT_64_BIT; SDL_ASSERT(size); SDL_ASSERT(!offset); // current limitation SDL_ASSERT(offset < size); if (size && (offset < size) && !offset) { filesize_64 fsize = {}; fsize.size = size; static_assert(sizeof(DWORD) == 4, ""); static_assert(sizeof(fsize.size) == 8, ""); static_assert(sizeof(fsize.d.lo) == 4, ""); static_assert(sizeof(fsize.d.hi) == 4, ""); SDL_ASSERT(fsize.d.lo); ReadFileHandler file(filename); if (!file.is_open()) { SDL_TRACE("failed to open file: ", filename); SDL_ASSERT(false); return nullptr; } // If the function fails, the return value is NULL HANDLE hFileMapping = ::CreateFileMapping( file.get(), nullptr, PAGE_READONLY, fsize.d.hi, // dwMaximumSizeHigh, fsize.d.lo, // dwMaximumSizeLow, nullptr); if (!hFileMapping) { SDL_TRACE("CreateFileMapping failed : ", filename); SDL_TRACE("GetLastError = ", GetLastError()); SDL_ASSERT(false); return nullptr; } auto pFileView = ::MapViewOfFile( hFileMapping, FILE_MAP_READ, 0, 0, // file offset where the view begins 0); // mapping extends from the specified offset to the end of the file mapping. ::CloseHandle(hFileMapping); if (!pFileView) { SDL_TRACE("MapViewOfFile failed : ", filename); SDL_TRACE("GetLastError = ", GetLastError()); SDL_TRACE((GetLastError() == ERROR_NOT_ENOUGH_MEMORY) ? "ERROR_NOT_ENOUGH_MEMORY" : ""); SDL_ASSERT(false); } static_assert(std::is_same<view_of_file, decltype(pFileView)>::value, ""); return pFileView; } return nullptr; }
std::size_t MemoryInfo::size() { SDL_TRACE(MemoryInfo, "MemoryInfo::size() not yet supported on Apple."); return 0; }