inline void wildCopy8(UInt8 * dst, const UInt8 * src, UInt8 * dst_end) { do { copy8(dst, src); dst += 8; src += 8; } while (dst < dst_end); }
inline void wildCopy8(UInt8 * dst, const UInt8 * src, UInt8 * dst_end) { /// Unrolling with clang is doing >10% performance degrade. #if defined(__clang__) #pragma nounroll #endif do { copy8(dst, src); dst += 8; src += 8; } while (dst < dst_end); }
void PspMemory::copy(byte *dst, const byte *src, uint32 bytes) { DEBUG_ENTER_FUNC(); #ifdef TEST_MEMORY_COPY uint32 debugBytes = bytes; const byte *debugDst = dst, *debugSrc = src; #endif PSP_DEBUG_PRINT("copy(): dst[%p], src[%p], bytes[%d]\n", dst, src, bytes); // align the destination pointer first uint32 prefixDst = (((uint32)dst) & 0x3); if (prefixDst) { prefixDst = 4 - prefixDst; // prefix only if we have address % 4 != 0 PSP_DEBUG_PRINT("prefixDst[%d]\n", prefixDst); bytes -= prefixDst; // remember we assume bytes >= 4 if (bytes < MIN_AMOUNT_FOR_COMPLEX_COPY) { // check if it's worthwhile to continue copy8(dst, src, bytes + prefixDst); #ifdef TEST_MEMORY_COPY testCopy(debugDst, debugSrc, debugBytes); #endif return; } while (prefixDst--) { *dst++ = *src++; } } // check the source pointer alignment now uint32 alignSrc = (((uint32)src) & 0x3); if (alignSrc) { // we'll need to realign our reads copy32Misaligned((uint32 *)dst, src, bytes, alignSrc); } else { copy32Aligned((uint32 *)dst, (uint32 *)src, bytes); } #ifdef TEST_MEMORY_COPY testCopy(debugDst, debugSrc, debugBytes); #endif }
template <> void inline copy<8>(UInt8 * dst, const UInt8 * src) { copy8(dst, src); }