StringData* StringData::append(StringSlice r1, StringSlice r2, StringSlice r3) { assert(!hasMultipleRefs()); auto const len = r1.len + r2.len + r3.len; if (len == 0) return this; if (UNLIKELY(uint32_t(len) > MaxSize)) { throw_string_too_large(len); } if (UNLIKELY(size_t(m_len) + size_t(len) > MaxSize)) { throw_string_too_large(size_t(len) + size_t(m_len)); } auto const newLen = m_len + len; /* * We may have an aliasing append. We don't allow appending with an * interior pointer, although we may be asked to append less than * the whole string in an aliasing situation. */ ALIASING_APPEND_ASSERT(r1.ptr, r1.len); ALIASING_APPEND_ASSERT(r2.ptr, r2.len); ALIASING_APPEND_ASSERT(r3.ptr, r3.len); auto const target = UNLIKELY(isShared()) ? escalate(newLen) : reserve(newLen); auto const mslice = target->bufferSlice(); /* * memcpy is safe even if it's a self append---the regions will be * disjoint, since rN.ptr can't point past the start of our source * pointer, and rN.len is smaller than the old length. */ void* p = mslice.ptr; p = memcpy((char*)p + m_len, r1.ptr, r1.len); p = memcpy((char*)p + r1.len, r2.ptr, r2.len); memcpy((char*)p + r2.len, r3.ptr, r3.len); target->setSize(newLen); assert(target->checkSane()); return target; }
void String::unserialize(VariableUnserializer *uns, char delimiter0 /* = '"' */, char delimiter1 /* = '"' */) { int64_t size = uns->readInt(); if (size >= RuntimeOption::MaxSerializedStringSize) { throw Exception("Size of serialized string (%d) exceeds max", int(size)); } if (size < 0) { throw Exception("Size of serialized string (%d) must not be negative", int(size)); } uns->expectChar(':'); uns->expectChar(delimiter0); auto px = req::ptr<StringData>::attach(StringData::Make(int(size))); auto const buf = px->bufferSlice(); assert(size <= buf.size()); uns->read(buf.data(), size); px->setSize(size); m_str = std::move(px); uns->expectChar(delimiter1); }
AINLINE void SelectBufferedSlice( bufferAbstractType *buffer, CBufferedStreamWrap::bufferSeekPointer_t& bufOffset, seekGenericType& fileSeek, size_t requestedReadCount, callbackType& cb ) { typedef CBufferedStreamWrap::seekSlice_t seekSlice_t; typedef CBufferedStreamWrap::seekType_t seekType_t; // If we do not want to read anything, quit right away. if ( requestedReadCount == 0 ) return; #ifdef FILESYSTEM_PERFORM_SANITY_CHECKS // Add simple error checking. // It could be fatal to the application to introduce an infinite loop here. unsigned int methodRepeatCount = 0; #endif //FILESYSTEM_PERFORM_SANITY_CHECKS repeatMethod: #if FILESYSTEM_PERFORM_SANITY_CHECKS methodRepeatCount++; if ( methodRepeatCount == 6000000 ) throw std::exception( "infinite buffered select repetition count" ); #endif //FILESYSTEM_PERFORM_SANITY_CHECKS // Do the actual logic. seekType_t localFileSeek = fileSeek.Tell(); size_t bufferSize = cb.GetBufferSize(); // Create the slices for the seeking operation. // We will collide them against each other. seekSlice_t readSlice( localFileSeek, requestedReadCount ); seekSlice_t bufferSlice( bufOffset.offsetOfBufferOnFileSpace, bufferSize ); seekSlice_t::eIntersectionResult intResult = readSlice.intersectWith( bufferSlice ); // Make sure the content is prepared for the action. bool hasToRepeat = cb.ContentInvokation( buffer, localFileSeek, requestedReadCount, intResult ); if ( hasToRepeat ) goto repeatMethod; if ( intResult == seekSlice_t::INTERSECT_EQUAL ) { cb.BufferedInvokation( buffer, 0, requestedReadCount ); fileSeek.Seek( localFileSeek + requestedReadCount ); } else if ( intResult == seekSlice_t::INTERSECT_INSIDE ) { cb.BufferedInvokation( buffer, (size_t)( localFileSeek - bufOffset.offsetOfBufferOnFileSpace ), requestedReadCount ); fileSeek.Seek( localFileSeek + requestedReadCount ); } else if ( intResult == seekSlice_t::INTERSECT_BORDER_END ) { // Everything read-able has to fit inside client memory. // A size_t is assumed to be as big as the client memory allows. size_t sliceStartOffset = (size_t)( bufferSlice.GetSliceStartPoint() - localFileSeek ); // First read from the file natively, to reach the buffer border. if ( sliceStartOffset > 0 ) { // Make sure the seek pointer is up to date. fileSeek.Update(); size_t actualReadCount = 0; cb.NativeInvokation( buffer, sliceStartOffset, actualReadCount ); // Update the file seek. fileSeek.Seek( localFileSeek += actualReadCount ); // Predict that we advanced by some bytes. fileSeek.PredictNativeAdvance( (seekType_t)actualReadCount ); } // Now lets read the remainder from the buffer. size_t sliceReadRemainderCount = (size_t)( requestedReadCount - sliceStartOffset ); if ( sliceReadRemainderCount > 0 ) { cb.BufferedInvokation( buffer + sliceStartOffset, 0, sliceReadRemainderCount ); fileSeek.Seek( localFileSeek += sliceReadRemainderCount ); } } else if ( intResult == seekSlice_t::INTERSECT_BORDER_START ) { // The_GTA: That +1 is very complicated. Just roll with it! size_t sliceEndOffset = (size_t)( bufferSlice.GetSliceEndPoint() + 1 - localFileSeek ); // Read what can be read from the native buffer. if ( sliceEndOffset > 0 ) { size_t sliceReadInCount = (size_t)( bufferSize - sliceEndOffset ); cb.BufferedInvokation( buffer, sliceReadInCount, sliceEndOffset ); // Update the local file seek. fileSeek.Seek( localFileSeek += sliceEndOffset ); } // Increment the buffer location and read the requested content into it. size_t sliceReadRemainderCount = (size_t)( requestedReadCount - sliceEndOffset ); if ( sliceReadRemainderCount > 0 ) { // Update the perform details. buffer += sliceEndOffset; requestedReadCount = sliceReadRemainderCount; goto repeatMethod; } } else if ( intResult == seekSlice_t::INTERSECT_ENCLOSING ) { // Read the beginning segment, that is native file memory. size_t sliceStartOffset = (size_t)( bufferSlice.GetSliceStartPoint() - localFileSeek ); if ( sliceStartOffset > 0 ) { // Make sure the seek pointer is up-to-date. fileSeek.Update(); size_t actualReadCount = 0; cb.NativeInvokation( buffer, sliceStartOffset, actualReadCount ); // Update the seek ptr. fileSeek.Seek( localFileSeek += sliceStartOffset ); // Predict that the real file offset advanced by some bytes. fileSeek.PredictNativeAdvance( (seekType_t)actualReadCount ); } // Put the content of the entire internal buffer into the output buffer. { cb.BufferedInvokation( buffer + sliceStartOffset, 0, bufferSize ); fileSeek.Seek( localFileSeek += bufferSize ); } // Read the part after the internal buffer slice. // This part must be executed on the buffer context. size_t sliceEndOffset = (size_t)( readSlice.GetSliceEndPoint() - bufferSlice.GetSliceEndPoint() ); if ( sliceEndOffset > 0 ) { // Update execution parameters and continue to dispatch. buffer += sliceStartOffset + bufferSize; requestedReadCount = sliceEndOffset; goto repeatMethod; } } else if ( seekSlice_t::isFloatingIntersect( intResult ) || intResult == seekSlice_t::INTERSECT_UNKNOWN ) { // Notify the callback about out-of-bounds content access. bool shouldContinue = cb.FloatingInvokation( buffer, localFileSeek, requestedReadCount, intResult ); if ( shouldContinue ) { // Update buffer contents depending on the stream position. UpdateStreamedBufferPosition( bufOffset, fileSeek, cb ); // Attempt to repeat reading. goto repeatMethod; } } else { // We have no hit in any way that we can detect. // Throw an exception. assert( 0 ); } }
Variant HHVM_FUNCTION(wordwrap, const String& str, int64_t linewidth /* = 75 */, const String& brk /* = s_nl */, bool cut /* = false */) { const char* brkstr = brk.data(); size_t textlen = str.size(); size_t brklen = brk.size(); if (textlen == 0) { return empty_string(); } if (brklen == 0) { raise_warning("Break string cannot be empty"); return false; } if (linewidth == 0 && cut) { raise_warning("Can't force cut when width is zero"); return false; } size_t w = linewidth >= 0 ? linewidth : 0; // If the string's length is less than or equal to the specified // width, there's nothing to do and we can just return the string. if (textlen <= w) return str; // Special case for a single-character break as it needs no // additional storage space if (brklen == 1 && !cut) { auto new_sd = StringData::Make(str.get(), CopyString); new_sd->invalidateHash(); Variant ret = new_sd; auto const bs = new_sd->bufferSlice(); char* newtext = bs.begin(); auto bc = brkstr[0]; size_t current = 0, laststart = 0, lastspace = 0; for (; current < textlen; current++) { if (newtext[current] == bc) { laststart = lastspace = current + 1; } else if (newtext[current] == ' ') { if (current - laststart >= w) { newtext[current] = bc; laststart = current + 1; } lastspace = current; } else if (current - laststart >= w && laststart != lastspace) { newtext[lastspace] = bc; laststart = lastspace + 1; } } return ret; } // Multiple character line break or forced cut // Estimate how big the output string will be. It's okay if this estimate // is wrong as we will grow or shrink as needed. The goals here are two- // fold: (1) avoid the need to grow or shrink in the common case, and // (2) for extreme cases where it's hard to make an accurate estimate // (ex. when w is very small or brk is very large) we should be careful // to avoid making huge over-estimations. StringBuffer strbuf( textlen + textlen / (std::max<size_t>(w, 16) - 8) * std::min<size_t>(brklen, 8)); const char* text = str.data(); size_t current = 0, laststart = 0, lastspace = 0; for (; current < textlen; current++) { // when we hit an existing break, copy to new buffer, and // fix up laststart and lastspace if (text[current] == brkstr[0] && current + brklen < textlen && !strncmp(text + current, brkstr, brklen)) { strbuf.append(text + laststart, current - laststart + brklen); current += brklen - 1; laststart = lastspace = current + 1; } // if it is a space, check if it is at the line boundary, // copy and insert a break, or just keep track of it else if (text[current] == ' ') { if (current - laststart >= w) { strbuf.append(text + laststart, current - laststart); strbuf.append(brkstr, brklen); laststart = current + 1; } lastspace = current; } // if we are cutting, and we've accumulated enough // characters, and we haven't see a space for this line, // copy and insert a break. else if (current - laststart >= w && cut && laststart >= lastspace) { strbuf.append(text + laststart, current - laststart); strbuf.append(brkstr, brklen); laststart = lastspace = current; } // if the current word puts us over width w, copy back up // until the last space, insert a break, and move up the // laststart else if (current - laststart >= w && laststart < lastspace) { strbuf.append(text + laststart, lastspace - laststart); strbuf.append(brkstr, brklen); laststart = lastspace = lastspace + 1; } } // copy over any stragglers if (laststart != current) { strbuf.append(text + laststart, current - laststart); } auto s = strbuf.detach(); // if it's not possible to reduce the output string's capacity by more // than 25%, then we can just return the string as is. size_t estShrinkCap = MemoryManager::estimateSmartCap(sizeof(StringData) + s.size() + 1); if (estShrinkCap * 4 >= (size_t)s.capacity() * 3) { return s; } // reallocate into a smaller buffer so that we don't waste memory return StringData::Make(s.get(), CopyString); }