fix SSE2 implementation of find_change

This commit is contained in:
Jamiras 2021-01-21 22:26:45 -07:00
parent 2520337ddc
commit 78ab9853a7
2 changed files with 7 additions and 4 deletions

View file

@ -63,7 +63,7 @@ static INLINE int compat_ctz(unsigned x)
return __builtin_ctz(x);
#elif _MSC_VER >= 1400 && !defined(_XBOX) && !defined(__WINRT__)
unsigned long r = 0;
_BitScanReverse((unsigned long*)&r, x);
_BitScanForward((unsigned long*)&r, x);
return (int)r;
#else
/* Only checks at nibble granularity,

View file

@ -94,14 +94,17 @@ static size_t find_change(const uint16_t *a, const uint16_t *b)
{
__m128i v0 = _mm_loadu_si128(a128);
__m128i v1 = _mm_loadu_si128(b128);
__m128i c = _mm_cmpeq_epi32(v0, v1);
__m128i c = _mm_cmpeq_epi8(v0, v1);
uint32_t mask = _mm_movemask_epi8(c);
if (mask != 0xffff) /* Something has changed, figure out where. */
{
/* calculate the real offset to the differing byte */
size_t ret = (((uint8_t*)a128 - (uint8_t*)a) |
(compat_ctz(~mask))) >> 1;
return ret | (a[ret] == b[ret]);
(compat_ctz(~mask)));
/* and convert that to the uint16_t offset */
return (ret >> 1);
}
a128++;