TextFragment subText(const TextFragment& frag, int start, int end) { // this impl does an unneccesary copy, to keep TextFragment very simple for now. if(!frag) return TextFragment(); if(start >= end) return TextFragment(); // we won't know the output fragment size in bytes until iterating the code points. int len = frag.lengthInBytes(); SmallStackBuffer<char> temp(len); char* buf = temp.data(); char* pb = buf; auto first = codepoint_iterator<const char*>(frag.getText()); auto it = first; for(int i=0; i<start; ++i) { ++it; } for (int i=0; i<end - start; ++i) { // write the codepoint as UTF-8 to the buffer if(!utf::internal::validate_codepoint(*it)) return TextFragment(); pb = utf::internal::utf_traits<utf::utf8>::encode(*it, pb); ++it; } return TextFragment(buf, pb - buf); }
int SymbolTable::getSymbolID(const HashedCharArray& hsl) { int r = 0; // get the vector of symbol IDs matching this hash. It probably has one entry but may have more. const std::vector<int>& bin = mHashTable[hsl.hash].mIDVector; { bool found = false; std::unique_lock<std::mutex> lock(mHashTable[hsl.hash].mMutex); for(int ID : bin) { // there should be few collisions, so probably the first ID in the hash bin // will be the symbol we are looking for. Unfortunately to test for equality we may have to // compare the entire string. TextFragment* binFragment = &mSymbolTextsByID[ID]; if(compareSizedCharArrays(binFragment->getText(), binFragment->lengthInBytes(), hsl.pChars, hsl.len)) { r = ID; found = true; break; } } if(!found) { mSymbolTextsByID.emplace_back(TextFragment(hsl.pChars, static_cast<int>(hsl.len))); r = mSize++; mHashTable[hsl.hash].mIDVector.emplace_back(r); } } return r; }
TextFragment::TextFragment(const TextFragment& a) noexcept { construct(a.getText(), a.lengthInBytes()); }
TextFragment::TextFragment(const TextFragment& t1, const TextFragment& t2, const TextFragment& t3, const TextFragment& t4) noexcept { construct(t1.getText(), t1.lengthInBytes(), t2.getText(), t2.lengthInBytes(), t3.getText(), t3.lengthInBytes(), t4.getText(), t4.lengthInBytes()); }