bool StringCell::operator==(const StringCell &other) const { if (byteLength() != other.byteLength()) { return false; } if (dataIsInline()) { auto thisInlineString = static_cast<const InlineStringCell*>(this); auto otherInlineString = static_cast<const InlineStringCell*>(&other); return memcmp( thisInlineString->inlineData(), otherInlineString->inlineData(), thisInlineString->inlineByteLength()) == 0; } else { auto thisHeapString = static_cast<const HeapStringCell*>(this); auto thisByteArray = thisHeapString->heapByteArray(); auto otherByteArray = static_cast<const HeapStringCell*>(&other)->heapByteArray(); return thisByteArray->isEqual(otherByteArray, thisHeapString->heapByteLength()); } }
void StringCell::finalizeString() { if (!dataIsInline()) { static_cast<HeapStringCell*>(this)->heapByteArray()->unref(); } }
BytevectorCell* StringCell::toUtf8Bytevector(World &world, SliceIndexType start, SliceIndexType end) { CharRange range = charRange(start, end); if (range.isNull()) { return nullptr; } ByteLengthType newLength = range.endPointer - range.startPointer; SharedByteArray *byteArray; if ((newLength == byteLength()) && !dataIsInline()) { // Reuse our existing byte array byteArray = static_cast<HeapStringCell*>(this)->heapByteArray()->ref(); } else { // Create a new byte array and initialize it byteArray = SharedByteArray::createInstance(newLength); memcpy(byteArray->data(), range.startPointer, newLength); } return BytevectorCell::withByteArray(world, byteArray, newLength); }
SymbolCell* SymbolCell::copy(alloc::Heap &heap) { void *cellPlacement = heap.allocate(); if (dataIsInline()) { auto inlineThis = static_cast<InlineSymbolCell*>(this); auto inlineCopy = new (cellPlacement) InlineSymbolCell( inlineThis->inlineByteLength(), inlineThis->inlineCharLength() ); memcpy(inlineCopy->m_inlineData, constUtf8Data(), inlineThis->inlineByteLength()); return inlineCopy; } else { auto heapThis = static_cast<HeapSymbolCell*>(this); return new (cellPlacement) HeapSymbolCell( heapThis->heapByteArray()->ref(), heapThis->heapByteLength(), heapThis->heapCharLength() ); } }
bool SymbolCell::operator==(const SymbolCell &other) const { if (isGlobalConstant() && other.isGlobalConstant()) { // Constant folding guarantees this works return this == &other; } if (byteLength() != other.byteLength()) { return false; } if (dataIsInline()) { auto thisInlineSymbol = static_cast<const InlineSymbolCell*>(this); auto otherInlineSymbol = static_cast<const InlineSymbolCell*>(&other); return memcmp( thisInlineSymbol->inlineData(), otherInlineSymbol->inlineData(), thisInlineSymbol->inlineByteLength()) == 0; } else { auto thisHeapSymbol = static_cast<const HeapSymbolCell*>(this); auto thisByteArray = thisHeapSymbol->heapByteArray(); auto otherByteArray = static_cast<const HeapSymbolCell*>(&other)->heapByteArray(); return thisByteArray->isEqual(otherByteArray, thisHeapSymbol->heapByteLength()); } }
std::uint8_t* StringCell::utf8Data() { if (dataIsInline()) { return static_cast<InlineStringCell*>(this)->inlineData(); } else { return static_cast<HeapStringCell*>(this)->heapByteArray()->data(); } }
std::size_t StringCell::byteCapacity() const { if (dataIsInline()) { return inlineDataSize(); } else { return static_cast<const HeapStringCell*>(this)->heapByteArray()->capacity(byteLength()); } }
StringCell* StringCell::copy(World &world, SliceIndexType start, SliceIndexType end) { // Allocating a string below can actually change "this" // That is super annoying StringCell *oldThis = const_cast<StringCell*>(this); alloc::StringRef thisRef(world, oldThis); CharRange range = charRange(start, end); if (range.isNull()) { // Invalid range return nullptr; } if ((range.charCount == charLength()) && !dataIsInline()) { // We're copying the whole string // Share our byte array void *cellPlacement = alloc::allocateCells(world); HeapStringCell *heapThis = static_cast<HeapStringCell*>(thisRef.data()); return new (cellPlacement) HeapStringCell( heapThis->heapByteArray()->ref(), heapThis->byteLength(), heapThis->charLength() ); } const ByteLengthType newByteLength = range.byteCount(); // Create the new string auto newString = StringCell::createUninitialized(world, newByteLength, range.charCount); if (thisRef->dataIsInline() && (oldThis != thisRef.data())) { // The allocator ran and moved us along with our inline data // We have to update our range ptrdiff_t byteDelta = reinterpret_cast<std::uint8_t*>(thisRef.data()) - reinterpret_cast<std::uint8_t*>(oldThis); range.relocate(byteDelta); } std::uint8_t *newUtf8Data = newString->utf8Data(); memcpy(newUtf8Data, range.startPointer, newByteLength); return newString; }
SharedByteHash::ResultType StringCell::sharedByteHash() const { if (dataIsInline()) { auto inlineString = static_cast<const InlineStringCell*>(this); SharedByteHash byteHasher; return byteHasher(inlineString->inlineData(), inlineString->inlineByteLength()); } else { auto heapString = static_cast<const HeapStringCell*>(this); return heapString->heapByteArray()->hashValue(heapString->heapByteLength()); } }
StringCell* StringCell::copy(alloc::Heap &heap) { void *cellPlacement = heap.allocate(); if (dataIsInline()) { auto inlineCopy = new (cellPlacement) InlineStringCell(byteLength(), charLength()); memcpy(inlineCopy->utf8Data(), utf8Data(), byteLength()); return inlineCopy; } else { auto heapThis = static_cast<HeapStringCell*>(this); return new (cellPlacement) HeapStringCell(heapThis->heapByteArray()->ref(), byteLength(), charLength()); } }
bool StringCell::replaceBytes(const CharRange &range, const std::uint8_t *pattern, unsigned int patternBytes, unsigned int count) { assert(!isGlobalConstant()); const unsigned int requiredBytes = patternBytes * count; const unsigned int replacedBytes = range.byteCount(); // If we have exclusive access to our data and we're not resizing the string we can use the fast path if ((dataIsInline() || static_cast<HeapStringCell*>(this)->heapByteArray()->isExclusive()) && (requiredBytes == replacedBytes)) { std::uint8_t *copyDest = const_cast<std::uint8_t*>(range.startPointer); while(count--) { memmove(copyDest, pattern, patternBytes); copyDest += patternBytes; } } else { // Create a new string from pieces of the old string const std::uint64_t newByteLength = byteLength() + requiredBytes - replacedBytes; const auto newCharLength = charLength(); if (newByteLength > maximumByteLength()) { return false; } const ByteLengthType initialBytes = range.startPointer - utf8Data(); const ByteLengthType finalBytes = newByteLength - initialBytes - requiredBytes; const bool wasInline = dataIsInline(); const bool nowInline = newByteLength <= inlineDataSize(); SharedByteArray *oldByteArray = nullptr; SharedByteArray *newByteArray = nullptr; // Does this require a COW due to sharing our byte array? const bool needsCow = (!wasInline && !nowInline) && !static_cast<HeapStringCell*>(this)->heapByteArray()->isExclusive(); // Determine if we exceeded our current capacity or if we're using less than half of our allocated space // This will trigger a reallocation of our heap space const auto currentCapacity = byteCapacity(); const bool needHeapRealloc = (newByteLength > currentCapacity) || ((newByteLength < (currentCapacity / 2)) && !nowInline) || needsCow; std::uint8_t* destString; const std::uint8_t* copySource; if (!wasInline && nowInline) { // We're converting to an inline string destString = static_cast<InlineStringCell*>(this)->inlineData(); copySource = pattern; // Store our old byte array so we can unref it later // The code below will overwrite it with our new inline string oldByteArray = static_cast<HeapStringCell*>(this)->heapByteArray(); // Fill the initial chunk of the string memcpy(destString, utf8Data(), initialBytes); } else if (needHeapRealloc) { size_t byteArraySize = newByteLength; newByteArray = SharedByteArray::createInstance(byteArraySize); destString = newByteArray->data(); copySource = pattern; // Fill the initial chunk of the string memcpy(destString, utf8Data(), initialBytes); if (!wasInline) { // Store our old byte array so we can unref it later oldByteArray = static_cast<HeapStringCell*>(this)->heapByteArray(); } } else { destString = utf8Data(); // The initial chunk is already correct // Are our pattern bytes in the range we're about to overwrite? // We only need to check the end of the pattern because the pattern should only be completely inside our // completely outside our string if (((pattern + patternBytes) > (utf8Data() + initialBytes)) && ((pattern + patternBytes) <= (utf8Data() + byteLength()))) { // Create a temporary copy to work with copySource = new std::uint8_t[patternBytes]; memcpy(const_cast<std::uint8_t*>(copySource), pattern, patternBytes); } else { copySource = pattern; } } // Move the unchanged chunk at the end // We need to do this now because if the pattern bytes are longer than the byte we're replacing then we might // overwrite the beginning of the unchanged chunk memmove(destString + initialBytes + requiredBytes, range.startPointer + replacedBytes, finalBytes); std::uint8_t* copyDest = destString + initialBytes; while(count--) { memcpy(copyDest, copySource, patternBytes); copyDest += patternBytes; } if (copySource != pattern) { delete[] copySource; } // Update ourselves with our new string setLengths(newByteLength, newCharLength); if (newByteArray) { static_cast<HeapStringCell*>(this)->setHeapByteArray(newByteArray); } if (oldByteArray != nullptr) { // We can unref this now oldByteArray->unref(); } } return true; }