BytevectorCell* StringCell::toUtf8Bytevector(World &world, SliceIndexType start, SliceIndexType end) { CharRange range = charRange(start, end); if (range.isNull()) { return nullptr; } ByteLengthType newLength = range.endPointer - range.startPointer; SharedByteArray *byteArray; if ((newLength == byteLength()) && !dataIsInline()) { // Reuse our existing byte array byteArray = static_cast<HeapStringCell*>(this)->heapByteArray()->ref(); } else { // Create a new byte array and initialize it byteArray = SharedByteArray::createInstance(newLength); memcpy(byteArray->data(), range.startPointer, newLength); } return BytevectorCell::withByteArray(world, byteArray, newLength); }
StringCell* StringCell::createUninitialized(World &world, ByteLengthType byteLength, CharLengthType charLength) { void *cellPlacement = alloc::allocateCells(world); if (byteLength <= inlineDataSize()) { // We can fit this string inline auto newString = new (cellPlacement) InlineStringCell(byteLength, charLength); #ifndef NDEBUG if (byteLength < inlineDataSize()) { // Explicitly terminate with non-NULL to catch users that assume we're NULL terminated internally newString->inlineData()[byteLength] = 0xff; } #endif return newString; } else { // Allocate a new shared byte array SharedByteArray *newByteArray = SharedByteArray::createInstance(byteLength); #ifndef NDEBUG if (newByteArray->capacity(byteLength) > byteLength) { newByteArray->data()[byteLength] = 0xff; } #endif return new (cellPlacement) HeapStringCell(newByteArray, byteLength, charLength); } }
SymbolCell* SymbolCell::fromUtf8Data(World &world, const std::uint8_t *data, ByteLengthType byteLength) { const std::uint8_t *scanPtr = data; const std::uint8_t *endPtr = data + byteLength; // Validate the UTF-8 data const std::size_t charLength = utf8::validateData(scanPtr, endPtr); void *cellPlacement = alloc::allocateCells(world); if (byteLength <= inlineDataSize()) { auto inlineSymbol = new (cellPlacement) InlineSymbolCell(byteLength, charLength); memcpy(inlineSymbol->inlineData(), data, byteLength); return inlineSymbol; } else { SharedByteArray *newByteArray = SharedByteArray::createUninitialised(byteLength); memcpy(newByteArray->data(), data, byteLength); return new (cellPlacement) HeapSymbolCell(newByteArray, byteLength, charLength); } }
BytevectorCell* BytevectorCell::fromAppended(World &world, const std::list<const BytevectorCell*> &byteVectors) { if (byteVectors.size() == 1) { // This allows implicit data sharing while the below always allocates return byteVectors.front()->copy(world); } std::uint64_t totalLength = 0; for(auto byteVector : byteVectors) { totalLength += byteVector->length(); } if (totalLength > maximumLength()) { return nullptr; } SharedByteArray *newByteArray = SharedByteArray::createInstance(totalLength); std::uint8_t *copyPtr = newByteArray->data(); for(auto byteVector : byteVectors) { memcpy(copyPtr, byteVector->byteArray()->data(), byteVector->length()); copyPtr += byteVector->length(); } return BytevectorCell::withByteArray(world, newByteArray, totalLength); }
BytevectorCell* BytevectorCell::fromData(World &world, const std::uint8_t *data, LengthType length) { SharedByteArray *newByteArray = SharedByteArray::createInstance(length); memcpy(newByteArray->data(), data, length); return BytevectorCell::withByteArray(world, newByteArray, length); }
BytevectorCell* BytevectorCell::fromFill(World &world, LengthType length, std::uint8_t fill) { SharedByteArray *newByteArray = SharedByteArray::createInstance(length); if (newByteArray == nullptr) { return nullptr; } memset(newByteArray->data(), fill, length); return BytevectorCell::withByteArray(world, newByteArray, length); }
BytevectorCell* BytevectorCell::copy(World &world, SliceIndexType start, SliceIndexType end) const { if (!adjustSlice(start, end, length())) { return nullptr; } if ((start == 0) && (end == length())) { // We can do a copy-on-write here return BytevectorCell::withByteArray(world, byteArray()->ref(), length()); } const LengthType newLength = end - start; SharedByteArray *newByteArray = SharedByteArray::createInstance(newLength); memcpy(newByteArray->data(), &byteArray()->data()[start], newLength); return BytevectorCell::withByteArray(world, newByteArray, newLength); }
bool StringCell::replaceBytes(const CharRange &range, const std::uint8_t *pattern, unsigned int patternBytes, unsigned int count) { assert(!isGlobalConstant()); const unsigned int requiredBytes = patternBytes * count; const unsigned int replacedBytes = range.byteCount(); // If we have exclusive access to our data and we're not resizing the string we can use the fast path if ((dataIsInline() || static_cast<HeapStringCell*>(this)->heapByteArray()->isExclusive()) && (requiredBytes == replacedBytes)) { std::uint8_t *copyDest = const_cast<std::uint8_t*>(range.startPointer); while(count--) { memmove(copyDest, pattern, patternBytes); copyDest += patternBytes; } } else { // Create a new string from pieces of the old string const std::uint64_t newByteLength = byteLength() + requiredBytes - replacedBytes; const auto newCharLength = charLength(); if (newByteLength > maximumByteLength()) { return false; } const ByteLengthType initialBytes = range.startPointer - utf8Data(); const ByteLengthType finalBytes = newByteLength - initialBytes - requiredBytes; const bool wasInline = dataIsInline(); const bool nowInline = newByteLength <= inlineDataSize(); SharedByteArray *oldByteArray = nullptr; SharedByteArray *newByteArray = nullptr; // Does this require a COW due to sharing our byte array? const bool needsCow = (!wasInline && !nowInline) && !static_cast<HeapStringCell*>(this)->heapByteArray()->isExclusive(); // Determine if we exceeded our current capacity or if we're using less than half of our allocated space // This will trigger a reallocation of our heap space const auto currentCapacity = byteCapacity(); const bool needHeapRealloc = (newByteLength > currentCapacity) || ((newByteLength < (currentCapacity / 2)) && !nowInline) || needsCow; std::uint8_t* destString; const std::uint8_t* copySource; if (!wasInline && nowInline) { // We're converting to an inline string destString = static_cast<InlineStringCell*>(this)->inlineData(); copySource = pattern; // Store our old byte array so we can unref it later // The code below will overwrite it with our new inline string oldByteArray = static_cast<HeapStringCell*>(this)->heapByteArray(); // Fill the initial chunk of the string memcpy(destString, utf8Data(), initialBytes); } else if (needHeapRealloc) { size_t byteArraySize = newByteLength; newByteArray = SharedByteArray::createInstance(byteArraySize); destString = newByteArray->data(); copySource = pattern; // Fill the initial chunk of the string memcpy(destString, utf8Data(), initialBytes); if (!wasInline) { // Store our old byte array so we can unref it later oldByteArray = static_cast<HeapStringCell*>(this)->heapByteArray(); } } else { destString = utf8Data(); // The initial chunk is already correct // Are our pattern bytes in the range we're about to overwrite? // We only need to check the end of the pattern because the pattern should only be completely inside our // completely outside our string if (((pattern + patternBytes) > (utf8Data() + initialBytes)) && ((pattern + patternBytes) <= (utf8Data() + byteLength()))) { // Create a temporary copy to work with copySource = new std::uint8_t[patternBytes]; memcpy(const_cast<std::uint8_t*>(copySource), pattern, patternBytes); } else { copySource = pattern; } } // Move the unchanged chunk at the end // We need to do this now because if the pattern bytes are longer than the byte we're replacing then we might // overwrite the beginning of the unchanged chunk memmove(destString + initialBytes + requiredBytes, range.startPointer + replacedBytes, finalBytes); std::uint8_t* copyDest = destString + initialBytes; while(count--) { memcpy(copyDest, copySource, patternBytes); copyDest += patternBytes; } if (copySource != pattern) { delete[] copySource; } // Update ourselves with our new string setLengths(newByteLength, newCharLength); if (newByteArray) { static_cast<HeapStringCell*>(this)->setHeapByteArray(newByteArray); } if (oldByteArray != nullptr) { // We can unref this now oldByteArray->unref(); } } return true; }