int strcmp(const char *s1, const char *s2){ for(;;){ #define UNROLLED(index) \ { \ const char ch1 = s1[index]; \ const int delta = (int)(unsigned char)ch1 - (int)(unsigned char)s2[index]; \ if(delta != 0){ \ return (delta >> (sizeof(int) * __CHAR_BIT__ - 1)) | 1; \ } \ if(ch1 == 0){ \ return 0; \ } \ } UNROLLED(0) UNROLLED(1) UNROLLED(2) UNROLLED(3) UNROLLED(4) UNROLLED(5) UNROLLED(6) UNROLLED(7) s1 += 8; s2 += 8; } }
int strncmp(const char *s1, const char *s2, size_t n){ size_t cnt = n; for(;;){ #define UNROLLED(idx_) \ { \ if(cnt == 0){ \ return 0; \ } \ --cnt; \ const long ch1 = (long)(unsigned char)s1[idx_]; \ const long ch2 = (long)(unsigned char)s2[idx_]; \ const long delta = ch1 - ch2; \ if(delta != 0){ \ return (delta >> (sizeof(delta) * __CHAR_BIT__ - 1)) | 1; \ } \ if(ch1 == 0){ \ return 0; \ } \ } UNROLLED(0) UNROLLED(1) UNROLLED(2) UNROLLED(3) UNROLLED(4) UNROLLED(5) UNROLLED(6) UNROLLED(7) s1 += 8; s2 += 8; } }
size_t wcslen(const wchar_t *s){ register const wchar_t *rp = s; if(((uintptr_t)rp & 1) != 0){ while(*rp != 0){ ++rp; } return (size_t)(rp - s); } // 如果 rp 是对齐到字的,就不用考虑越界的问题。 // 因为内存按页分配的,也自然对齐到页,并且也对齐到字。 // 每个字内的字节的权限必然一致。 while(((uintptr_t)rp & (sizeof(uintptr_t) - 1)) != 0){ if(*rp == 0){ return (size_t)(rp - s); } ++rp; } for(;;){ #ifdef _WIN64 # define MASK 0x0001000100010001ull #else # define MASK 0x00010001ul #endif #define UNROLLED(index) \ { \ register uintptr_t wrd = ((const uintptr_t *)rp)[(index)]; \ wrd = (wrd - MASK) & ~wrd & (MASK << 15); \ if(wrd != 0){ \ for(size_t i = 0; i < sizeof(uintptr_t) / sizeof(wchar_t) - 1; ++i){ \ if((wrd & 0x8000) != 0){ \ return (size_t)(rp + (index) * sizeof(uintptr_t) / sizeof(wchar_t) + i - s); \ } \ wrd >>= 16; \ } \ return (size_t)(rp + ((index) + 1) * sizeof(uintptr_t) / sizeof(wchar_t) - 1 - s); \ } \ } UNROLLED(0) UNROLLED(1) UNROLLED(2) UNROLLED(3) UNROLLED(4) UNROLLED(5) UNROLLED(6) UNROLLED(7) rp += 8 * sizeof(uintptr_t) / sizeof(wchar_t); } }
int memcmp(const void *p1, const void *p2, size_t cb){ const unsigned char *rp1 = (const unsigned char *)p1; const unsigned char *rp2 = (const unsigned char *)p2; size_t wcnt = cb / sizeof(uintptr_t); ++wcnt; for(;;){ #define UNROLLED(index) \ { \ if(--wcnt == 0){ \ rp1 += (index) * sizeof(uintptr_t); \ rp2 += (index) * sizeof(uintptr_t); \ break; \ } \ const uintptr_t wrd1 = ((const uintptr_t *)rp1)[index]; \ const uintptr_t wrd2 = ((const uintptr_t *)rp2)[index]; \ if(wrd1 != wrd2){ \ return (BSWAP_PTR(wrd1) > BSWAP_PTR(wrd2)) ? 1 : -1; \ } \ } UNROLLED(0) UNROLLED(1) UNROLLED(2) UNROLLED(3) UNROLLED(4) UNROLLED(5) UNROLLED(6) UNROLLED(7) rp1 += 8 * sizeof(uintptr_t); rp2 += 8 * sizeof(uintptr_t); } size_t rem = cb % sizeof(uintptr_t); while(rem-- != 0){ const int delta = (int)(unsigned char)*(rp1++) - (int)(unsigned char)*(rp2++); if(delta != 0){ return (delta >> (sizeof(int) * CHAR_BIT - 1)) | 1; } } return 0; }
size_t strlen(const char *s){ register const char *rp = s; // 如果 rp 是对齐到字的,就不用考虑越界的问题。 // 因为内存按页分配的,也自然对齐到页,并且也对齐到字。 // 每个字内的字节的权限必然一致。 while(((uintptr_t)rp & (sizeof(uintptr_t) - 1)) != 0){ if(*rp == 0){ return (size_t)(rp - s); } ++rp; } for(;;){ #define UNROLLED(idx_) \ { \ register uintptr_t wrd = ((const uintptr_t *)rp)[(idx_)]; \ wrd = (wrd - MASK) & ~wrd; \ if((wrd & (MASK << 7)) != 0){ \ for(size_t i = 0; i < sizeof(uintptr_t) - 1; ++i){ \ if((wrd & 0x80) != 0){ \ return (size_t)(rp + (idx_) * sizeof(uintptr_t) + i - s); \ } \ wrd >>= 8; \ } \ return (size_t)(rp + ((idx_) + 1) * sizeof(uintptr_t) - 1 - s); \ } \ } UNROLLED(0) UNROLLED(1) UNROLLED(2) UNROLLED(3) UNROLLED(4) UNROLLED(5) UNROLLED(6) UNROLLED(7) rp += 8 * sizeof(uintptr_t); } }
int wmemcmp(const wchar_t *p1, const wchar_t *p2, size_t cnt){ const uint16_t *rp1 = (const uint16_t *)p1; const uint16_t *rp2 = (const uint16_t *)p2; size_t wcnt = cnt / (sizeof(uintptr_t) / sizeof(wchar_t)); ++wcnt; for(;;){ #define COMPARE_LOWORD_AND_SHIFT \ { \ const int delta = (int)(uint16_t)wrd1 - (int)(uint16_t)wrd2; \ if(delta != 0){ \ return (delta >> (sizeof(int) * __CHAR_BIT__ - 1)) | 1; \ } \ wrd1 >>= 16; \ wrd2 >>= 16; \ } #ifdef _WIN64 # define COMPARE_UINTPTR \ COMPARE_LOWORD_AND_SHIFT \ COMPARE_LOWORD_AND_SHIFT \ COMPARE_LOWORD_AND_SHIFT \ COMPARE_LOWORD_AND_SHIFT #else # define COMPARE_UINTPTR \ COMPARE_LOWORD_AND_SHIFT \ COMPARE_LOWORD_AND_SHIFT #endif #define UNROLLED(index) \ { \ if(--wcnt == 0){ \ rp1 += (index) * (sizeof(uintptr_t) / sizeof(wchar_t)); \ rp2 += (index) * (sizeof(uintptr_t) / sizeof(wchar_t)); \ break; \ } \ register uintptr_t wrd1 = ((const uintptr_t *)rp1)[index]; \ register uintptr_t wrd2 = ((const uintptr_t *)rp2)[index]; \ if(wrd1 != wrd2){ \ COMPARE_UINTPTR \ } \ } UNROLLED(0) UNROLLED(1) UNROLLED(2) UNROLLED(3) UNROLLED(4) UNROLLED(5) UNROLLED(6) UNROLLED(7) rp1 += 8 * (sizeof(uintptr_t) / sizeof(wchar_t)); rp2 += 8 * (sizeof(uintptr_t) / sizeof(wchar_t)); } size_t rem = cnt % (sizeof(uintptr_t) / sizeof(wchar_t)); while(rem-- != 0){ const int delta = (int)(uint16_t)*(rp1++) - (int)(uint16_t)*(rp2++); if(delta != 0){ return (delta >> (sizeof(int) * __CHAR_BIT__ - 1)) | 1; } } return 0; }
wchar_t *wcschr(const wchar_t *s, wchar_t ch){ register const wchar_t *rp = s; if(((uintptr_t)rp & 1) != 0){ for(;;){ register const wchar_t cur = *rp; if(cur == ch){ return (wchar_t *)rp; } else if(cur == 0){ return NULL; } ++rp; } } // 如果 rp 是对齐到字的,就不用考虑越界的问题。 // 因为内存按页分配的,也自然对齐到页,并且也对齐到字。 // 每个字内的字节的权限必然一致。 while(((uintptr_t)rp & (sizeof(uintptr_t) - 1)) != 0){ register const wchar_t cur = *rp; if(cur == ch){ return (wchar_t *)rp; } else if(cur == 0){ return NULL; } ++rp; } register uintptr_t full = (uint16_t)ch; for(size_t i = 2; i < sizeof(full); i <<= 1){ full |= (full << (i * 8)); } for(;;){ #ifdef _WIN64 # define MASK 0x0001000100010001ull #else # define MASK 0x00010001ul #endif #define UNROLLED(index) \ { \ register uintptr_t wrd = ((const uintptr_t *)rp)[(index)]; \ register uintptr_t wrd2 = (wrd - MASK) & ~wrd; \ wrd ^= full; \ wrd = (wrd - MASK) & ~wrd; \ if(((wrd | wrd2) & (MASK << 15)) != 0){ \ for(size_t i = 0; i < sizeof(uintptr_t) / sizeof(wchar_t) - 1; ++i){ \ if((wrd & 0x8000) != 0){ \ return (wchar_t *)(rp + (index) * sizeof(uintptr_t) / sizeof(wchar_t) + i); \ } \ if((wrd2 & 0x8000) != 0){ \ return NULL; \ } \ wrd >>= 16; \ wrd2 >>= 16; \ } \ if((wrd & 0x8000) != 0){ \ return (wchar_t *)(rp + ((index) + 1) * sizeof(uintptr_t) / sizeof(wchar_t) - 1); \ } \ return NULL; \ } \ } UNROLLED(0) UNROLLED(1) UNROLLED(2) UNROLLED(3) UNROLLED(4) UNROLLED(5) UNROLLED(6) UNROLLED(7) rp += 8 * sizeof(uintptr_t) / sizeof(wchar_t); } }