Skip to content

Commit 4e9a0aa

Browse files
committed
Merge branch 'master' of jsoftware.com:jsource
2 parents 7459ffa + b042482 commit 4e9a0aa

File tree

1 file changed

+59
-1
lines changed

1 file changed

+59
-1
lines changed

jsrc/str.c

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,65 @@ extern size_t Stringrchr(char *str,char ch, size_t stride,size_t len);
1818
extern size_t Stringrchr2(unsigned short *str, unsigned short ch, size_t stride,size_t len);
1919
extern size_t Stringrchr4(unsigned int *str, unsigned int ch, size_t stride,size_t len);
2020

21-
#if defined(__SSE2__) || EMU_AVX
21+
#if C_AVX2 || EMU_AVX2
22+
23+
static size_t srchr(char* str, char ch, size_t len){
24+
size_t i=len;
25+
// align to 32 bytes
26+
while ((i>0) && ((((intptr_t)str+i) & 31) != 0)){if (ch!=str[i-1]) return i; else --i;}
27+
if(!i) return 0;
28+
/* don't test i>=0 which is always true because size_t is unsigned */
29+
const __m256i xmm0 = _mm256_set1_epi8( ch );
30+
const __m256i xmm2 = _mm256_set1_epi8( 0xff );
31+
while (i > 32) {
32+
// search for ch
33+
int mask = 0;
34+
__m256i xmm1 = _mm256_load_si256((__m256i *)(str+i-32));
35+
xmm1 = _mm256_andnot_si256(_mm256_cmpeq_epi8(xmm1, xmm0),xmm2);
36+
if ((mask = _mm256_movemask_epi8(xmm1)) != 0) { // some character is not ch
37+
// got 0 somewhere within 32 bytes in xmm1, or within 32 bits in mask
38+
// find index of last set bit
39+
#if (MMSC_VER) // make sure <intrin.h> is included
40+
unsigned long pos;
41+
_BitScanBackward(&pos, mask);
42+
i -= (size_t)pos;
43+
#elif defined(__clang__) || ((__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))) // modern GCC has built-in __builtin_ctz
44+
i -= __builtin_clz(mask);
45+
#else // none of choices exist, use local BSR implementation
46+
#error __builtin_clz
47+
#endif
48+
return i;
49+
}
50+
i -= 32;
51+
}
52+
while (i > 16) {
53+
const __m128i xmm0 = _mm_set1_epi8( ch );
54+
const __m128i xmm2 = _mm_set1_epi8( 0xff );
55+
// search for ch
56+
int mask = 0;
57+
__m128i xmm1 = _mm_load_si128((__m128i *)(str+i-16));
58+
xmm1 = _mm_andnot_si128(_mm_cmpeq_epi8(xmm1, xmm0),xmm2);
59+
if ((mask = _mm_movemask_epi8(xmm1)) != 0) { // some character is not ch
60+
// got 0 somewhere within 16 bytes in xmm1, or within 16 bits in mask
61+
// find index of last set bit
62+
#if (MMSC_VER) // make sure <intrin.h> is included
63+
unsigned long pos;
64+
_BitScanBackward(&pos, mask);
65+
i -= (size_t)pos-16;
66+
#elif defined(__clang__) || ((__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))) // modern GCC has built-in __builtin_ctz
67+
i -= __builtin_clz(mask)-16; // mask is 32-bits but only lower 16-bits are significant
68+
#else // none of choices exist, use local BSR implementation
69+
#error __builtin_clz
70+
#endif
71+
return i;
72+
}
73+
i -= 16;
74+
}
75+
76+
while (i>0){if (ch!=str[i-1]) return i; else --i;}
77+
return 0;
78+
}
79+
#elif defined(__SSE2__) || EMU_AVX
2280

2381
static size_t srchr(char* str, char ch, size_t len){
2482
size_t i=len;

0 commit comments

Comments
 (0)