This table describes environment switches used to toggle various functions. The switches may be of several types: * Boolean - value "0" for false/disabled, non-zero for true * CPUID - may be used to override cpuid(1) registers by given values. For nice description of these registers, see e.g. http://www.flounder.com/cpuid_explorer2.htm#CPUID%281%29 Of particular interest are these bits: (DX) SSE2 (1 << 26) (CX) SSSE3 (1 << 9) (CX) SSE4_1 (1 << 19) (CX) SSE4_2 (1 << 20) Therefore, e.g. _IFUNC_CPUID_CX=0x100200 will enable SSSE3 and SSE4.1, but disable SSE4.2. SSE2 bit value (in the DX register) will be left as-is (set based on current CPU). Note that the patch currently modifies only x86_64 functions to take heed of the function-specific switches. The generic environment variables are as follows: _IFUNC_CPUID_CX CPUID See above. Impacts all functions. _IFUNC_CPUID_DX CPUID See above. Impacts all functions. _IFUNC_FASTREP Boolean Toggles Fast_Rep_String. Assume rep-instructions are fast, tuned for i3/i5/i7. Impacts i686 version of bcopy(), bzero(), memcpy(), memmove(), mempcpy(), memset(). _IFUNC_FASTCPBACK Boolean Toggles Fast_Copy_Backward. Assume copy backwards is fast, tuned for i3/i5/i7. Impacts x86_64 version of memcpy() and mempcpy(). _IFUNC_SLOWBSF Boolean Toggles Slow_BSF. (Instruction Bit Scan Forward.) Tuned for Atom. Impacts x86_64 and i686 version of strlen(). _IFUNC_SSEMEMOP Boolean Toggles Prefer_SSE_for_memop. Assume SSE is fast for pure-memory operations, decisive factor for memset() choice. Furthermore, functions-pecific environment variables are available. All the variables before can be used, just with the function-specific prefix listed below. E.g., you can use _IFUNC_STRCPY_CPUID_DX to change the cpuid setup of DX just for the strcpy family of functions. _IFUNC_STRCPY_* strcpy(), strncpy(), stpcpy(), stpncpy(), memcpy(), mempcpy(), memmove(), memset() _IFUNC_STRCMP_* strcmp(), strncmp(), memcmp(), strcasecmp_l(), strncasecmp_l() _IFUNC_STRSPN_* strspn(), strcspn(), strpbrk() _IFUNC_STRCHR_* strchr(), strrchr(), rawmemchr() _IFUNC_STRLEN_* strlen() _IFUNC_STRSTR_* strstr(), strcasestr(), strcmp(), strncmp(), memcmp() diff --git a/sysdeps/unix/sysv/linux/init-first.c b/sysdeps/unix/sysv/linux/init-first.c index a60212f..c6b4c96 100644 --- a/sysdeps/unix/sysv/linux/init-first.c +++ b/sysdeps/unix/sysv/linux/init-first.c @@ -28,6 +28,7 @@ #include #include +#include "multiarch/init-arch.h" /* Set nonzero if we have to be prepared for more then one libc being used in the process. Safe assumption if initializer never runs. */ @@ -74,6 +75,9 @@ _init (int argc, char **argv, char **envp) __libc_argv = argv; __environ = envp; + /* After setting up __environ, force __cpu_features reset. */ + __cpu_features.kind = arch_kind_unknown; + #ifndef SHARED __libc_init_secure (); diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c index 34ec2df..d7a48fc 100644 --- a/sysdeps/x86_64/multiarch/init-arch.c +++ b/sysdeps/x86_64/multiarch/init-arch.c @@ -19,11 +19,19 @@ 02111-1307 USA. */ #include +#include #include +#include #include "init-arch.h" struct cpu_features __cpu_features attribute_hidden; +struct cpu_features __cpu_features_strcpy attribute_hidden; +struct cpu_features __cpu_features_strstr attribute_hidden; +struct cpu_features __cpu_features_strchr attribute_hidden; +struct cpu_features __cpu_features_strlen attribute_hidden; +struct cpu_features __cpu_features_strspn attribute_hidden; +struct cpu_features __cpu_features_strcmp attribute_hidden; static void @@ -39,6 +47,50 @@ get_common_indeces (unsigned int *family, unsigned int *model) *model = (eax >> 4) & 0x0f; } +static unsigned long +xatol (const char *str) +{ + if (str[0] == '0' && (str[1] == 'x' || str[1] == 'X')) + str += 2; + unsigned long res = 0; + unsigned long b = 1; + const char *strend = str; + while ((*strend >= '0' && *strend <= '9') + || (*strend >= 'a' && *strend <= 'f') + || (*strend >= 'A' && *strend <= 'F')) + strend++; + while (--strend >= str) + { + if (*strend >= '0' && *strend <= '9') + res += b * (*strend - '0'); + else if (*strend >= 'a' && *strend <= 'f') + res += b * (10 + *strend - 'a'); + else // if (*strend >= 'A' && *strend <= 'F') + res += b * (10 + *strend - 'A'); + b *= 16; + } + return res; +} + +char * +bare_getenv(const char *name) +{ + char **ep; + if (!__environ) return NULL; + for (ep = __environ; *ep != NULL; ++ep) + { + const char *np = name; char *npe = *ep; + for (; *np && *npe && *npe != '='; np++, npe++) + if (*np != *npe) + goto next; + if (*np || *npe != '=') + goto next; + return npe + 1; +next:; + } + return NULL; +} + void __init_cpu_features (void) @@ -123,6 +175,42 @@ __init_cpu_features (void) __cpu_features.model = model; atomic_write_barrier (); __cpu_features.kind = kind; + + /* _Now_, we can safely call getenv(). */ + const char *env; + if ((env = bare_getenv("_IFUNC_CPUID_CX"))) + __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx = xatol(env); + if ((env = bare_getenv("_IFUNC_CPUID_DX"))) + __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx = xatol(env); +#define feattog(FEAT, ID, NAME) \ + if ((env = bare_getenv("_IFUNC_" # NAME))) \ + { \ + if (*env == '0') \ + FEAT.feature[index_##ID] &= ~bit_##ID; \ + else \ + FEAT.feature[index_##ID] |= bit_##ID; \ + } + feattog(__cpu_features, Fast_Rep_String, FASTREP); + feattog(__cpu_features, Fast_Copy_Backward, FASTCPBACK); + feattog(__cpu_features, Slow_BSF, SLOWBSF); + feattog(__cpu_features, Prefer_SSE_for_memop, SSEMEMOP); + +#define featfor(FEAT, NAME) \ + memcpy(&FEAT, &__cpu_features, sizeof(FEAT)); \ + if ((env = bare_getenv("_IFUNC_"#NAME"_CPUID_CX"))) \ + FEAT.cpuid[COMMON_CPUID_INDEX_1].ecx = xatol(env); \ + if ((env = bare_getenv("_IFUNC_"#NAME"_CPUID_DX"))) \ + FEAT.cpuid[COMMON_CPUID_INDEX_1].edx = xatol(env); \ + feattog(FEAT, Fast_Rep_String, FASTREP); \ + feattog(FEAT, Fast_Copy_Backward, FASTCPBACK); \ + feattog(FEAT, Slow_BSF, SLOWBSF); + + featfor(__cpu_features_strcpy, STRCPY); + featfor(__cpu_features_strstr, STRSTR); + featfor(__cpu_features_strchr, STRCHR); + featfor(__cpu_features_strspn, STRSPN); + featfor(__cpu_features_strcmp, STRCMP); + featfor(__cpu_features_strlen, STRLEN); } #undef __get_cpu_features diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h index 6e409b8..29438c0 100644 --- a/sysdeps/x86_64/multiarch/init-arch.h +++ b/sysdeps/x86_64/multiarch/init-arch.h @@ -79,6 +79,14 @@ extern struct cpu_features unsigned int model; unsigned int feature[FEATURE_INDEX_MAX]; } __cpu_features attribute_hidden; +#define __cpu_features_common __cpu_features + +extern struct cpu_features __cpu_features_strcpy attribute_hidden; +extern struct cpu_features __cpu_features_strstr attribute_hidden; +extern struct cpu_features __cpu_features_strchr attribute_hidden; +extern struct cpu_features __cpu_features_strlen attribute_hidden; +extern struct cpu_features __cpu_features_strspn attribute_hidden; +extern struct cpu_features __cpu_features_strcmp attribute_hidden; extern void __init_cpu_features (void) attribute_hidden; @@ -93,39 +101,41 @@ extern const struct cpu_features *__get_cpu_features (void) __attribute__ ((const)); # ifndef NOT_IN_libc -# define __get_cpu_features() (&__cpu_features) +# define __get_cpu_features(family) (&__cpu_features_ ## family) +# else +# define __get_cpu_features(family) __get_cpu_features() # endif -# define HAS_CPU_FEATURE(idx, reg, bit) \ - ((__get_cpu_features ()->cpuid[idx].reg & (1 << (bit))) != 0) +# define HAS_CPU_FEATURE(family, idx, reg, bit) \ + ((__get_cpu_features(family)->cpuid[idx].reg & (1 << (bit))) != 0) /* Following are the feature tests used throughout libc. */ -# define HAS_SSE2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, edx, 26) -# define HAS_POPCOUNT HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, 23) -# define HAS_SSSE3 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, 9) -# define HAS_SSE4_1 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, 19) -# define HAS_SSE4_2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, 20) -# define HAS_FMA HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, 12) +# define HAS_SSE2(family) HAS_CPU_FEATURE (family, COMMON_CPUID_INDEX_1, edx, 26) +# define HAS_POPCOUNT(family) HAS_CPU_FEATURE (family, COMMON_CPUID_INDEX_1, ecx, 23) +# define HAS_SSSE3(family) HAS_CPU_FEATURE (family, COMMON_CPUID_INDEX_1, ecx, 9) +# define HAS_SSE4_1(family) HAS_CPU_FEATURE (family, COMMON_CPUID_INDEX_1, ecx, 19) +# define HAS_SSE4_2(family) HAS_CPU_FEATURE (family, COMMON_CPUID_INDEX_1, ecx, 20) +# define HAS_FMA(family) HAS_CPU_FEATURE (family, COMMON_CPUID_INDEX_1, ecx, 12) # define index_Fast_Rep_String FEATURE_INDEX_1 # define index_Fast_Copy_Backward FEATURE_INDEX_1 # define index_Slow_BSF FEATURE_INDEX_1 # define index_Prefer_SSE_for_memop FEATURE_INDEX_1 -#define HAS_ARCH_FEATURE(idx, bit) \ - ((__get_cpu_features ()->feature[idx] & (bit)) != 0) +#define HAS_ARCH_FEATURE(family, idx, bit) \ + ((__get_cpu_features (family)->feature[idx] & (bit)) != 0) -#define HAS_FAST_REP_STRING \ - HAS_ARCH_FEATURE (index_Fast_Rep_String, bit_Fast_Rep_String) +#define HAS_FAST_REP_STRING(family) \ + HAS_ARCH_FEATURE (family, index_Fast_Rep_String, bit_Fast_Rep_String) -#define HAS_FAST_COPY_BACKWARD \ - HAS_ARCH_FEATURE (index_Fast_Copy_Backward, bit_Fast_Copy_Backward) +#define HAS_FAST_COPY_BACKWARD(family) \ + HAS_ARCH_FEATURE (family, index_Fast_Copy_Backward, bit_Fast_Copy_Backward) -#define HAS_SLOW_BSF \ - HAS_ARCH_FEATURE (index_Slow_BSF, bit_Slow_BSF) +#define HAS_SLOW_BSF(family) \ + HAS_ARCH_FEATURE (family, index_Slow_BSF, bit_Slow_BSF) -#define HAS_PREFER_SSE_FOR_MEMOP \ - HAS_ARCH_FEATURE (index_Prefer_SSE_for_memop, bit_Prefer_SSE_for_memop) +#define HAS_PREFER_SSE_FOR_MEMOP(family) \ + HAS_ARCH_FEATURE (family, index_Prefer_SSE_for_memop, bit_Prefer_SSE_for_memop) #endif /* __ASSEMBLER__ */ diff --git a/sysdeps/x86_64/multiarch/memcmp.S b/sysdeps/x86_64/multiarch/memcmp.S index 301ab28..679ae7a 100644 --- a/sysdeps/x86_64/multiarch/memcmp.S +++ b/sysdeps/x86_64/multiarch/memcmp.S @@ -26,11 +26,11 @@ .text ENTRY(memcmp) .type memcmp, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) + cmpl $0, KIND_OFFSET+__cpu_features_strcmp(%rip) jne 1f call __init_cpu_features 1: leaq __memcmp_sse2(%rip), %rax - testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip) + testl $bit_SSE4_1, __cpu_features_strcmp+CPUID_OFFSET+index_SSE4_1(%rip) jz 2f leaq __memcmp_sse4_1(%rip), %rax 2: ret diff --git a/sysdeps/x86_64/multiarch/memcpy.S b/sysdeps/x86_64/multiarch/memcpy.S index 8e9fb19..9b15c53 100644 --- a/sysdeps/x86_64/multiarch/memcpy.S +++ b/sysdeps/x86_64/multiarch/memcpy.S @@ -28,14 +28,14 @@ .text ENTRY(memcpy) .type memcpy, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) + cmpl $0, KIND_OFFSET+__cpu_features_strcpy(%rip) jne 1f call __init_cpu_features 1: leaq __memcpy_sse2(%rip), %rax - testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip) + testl $bit_SSSE3, __cpu_features_strcpy+CPUID_OFFSET+index_SSSE3(%rip) jz 2f leaq __memcpy_ssse3(%rip), %rax - testl $bit_Fast_Copy_Backward, __cpu_features+FEATURE_OFFSET+index_Fast_Copy_Backward(%rip) + testl $bit_Fast_Copy_Backward, __cpu_features_strcpy+FEATURE_OFFSET+index_Fast_Copy_Backward(%rip) jz 2f leaq __memcpy_ssse3_back(%rip), %rax 2: ret diff --git a/sysdeps/x86_64/multiarch/memcpy_chk.S b/sysdeps/x86_64/multiarch/memcpy_chk.S index 948f61c..797c44d 100644 --- a/sysdeps/x86_64/multiarch/memcpy_chk.S +++ b/sysdeps/x86_64/multiarch/memcpy_chk.S @@ -29,14 +29,14 @@ .text ENTRY(__memcpy_chk) .type __memcpy_chk, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) + cmpl $0, KIND_OFFSET+__cpu_features_strcpy(%rip) jne 1f call __init_cpu_features 1: leaq __memcpy_chk_sse2(%rip), %rax - testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip) + testl $bit_SSSE3, __cpu_features_strcpy+CPUID_OFFSET+index_SSSE3(%rip) jz 2f leaq __memcpy_chk_ssse3(%rip), %rax - testl $bit_Fast_Copy_Backward, __cpu_features+FEATURE_OFFSET+index_Fast_Copy_Backward(%rip) + testl $bit_Fast_Copy_Backward, __cpu_features_strcpy+FEATURE_OFFSET+index_Fast_Copy_Backward(%rip) jz 2f leaq __memcpy_chk_ssse3_back(%rip), %rax 2: ret diff --git a/sysdeps/x86_64/multiarch/memmove.c b/sysdeps/x86_64/multiarch/memmove.c index 3798627..0b383f0 100644 --- a/sysdeps/x86_64/multiarch/memmove.c +++ b/sysdeps/x86_64/multiarch/memmove.c @@ -38,8 +38,8 @@ extern __typeof (memmove) __memmove_ssse3_back attribute_hidden; #ifndef NOT_IN_libc libc_ifunc (memmove, - HAS_SSSE3 - ? (HAS_FAST_COPY_BACKWARD + HAS_SSSE3(strcpy) + ? (HAS_FAST_COPY_BACKWARD(strcpy) ? __memmove_ssse3_back : __memmove_ssse3) : __memmove_sse2); #endif diff --git a/sysdeps/x86_64/multiarch/memmove_chk.c b/sysdeps/x86_64/multiarch/memmove_chk.c index 962501d..05939c8 100644 --- a/sysdeps/x86_64/multiarch/memmove_chk.c +++ b/sysdeps/x86_64/multiarch/memmove_chk.c @@ -29,7 +29,7 @@ extern __typeof (__memmove_chk) __memmove_chk_ssse3_back attribute_hidden; #include "debug/memmove_chk.c" libc_ifunc (__memmove_chk, - HAS_SSSE3 - ? (HAS_FAST_COPY_BACKWARD + HAS_SSSE3(strcpy) + ? (HAS_FAST_COPY_BACKWARD(strcpy) ? __memmove_chk_ssse3_back : __memmove_chk_ssse3) : __memmove_chk_sse2); diff --git a/sysdeps/x86_64/multiarch/mempcpy.S b/sysdeps/x86_64/multiarch/mempcpy.S index e8152d6..ce70cfa 100644 --- a/sysdeps/x86_64/multiarch/mempcpy.S +++ b/sysdeps/x86_64/multiarch/mempcpy.S @@ -27,14 +27,14 @@ #if defined SHARED && !defined NOT_IN_libc ENTRY(__mempcpy) .type __mempcpy, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) + cmpl $0, KIND_OFFSET+__cpu_features_strcpy(%rip) jne 1f call __init_cpu_features 1: leaq __mempcpy_sse2(%rip), %rax - testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip) + testl $bit_SSSE3, __cpu_features_strcpy+CPUID_OFFSET+index_SSSE3(%rip) jz 2f leaq __mempcpy_ssse3(%rip), %rax - testl $bit_Fast_Copy_Backward, __cpu_features+FEATURE_OFFSET+index_Fast_Copy_Backward(%rip) + testl $bit_Fast_Copy_Backward, __cpu_features_strcpy+FEATURE_OFFSET+index_Fast_Copy_Backward(%rip) jz 2f leaq __mempcpy_ssse3_back(%rip), %rax 2: ret diff --git a/sysdeps/x86_64/multiarch/mempcpy_chk.S b/sysdeps/x86_64/multiarch/mempcpy_chk.S index 024c775..cf7df6e 100644 --- a/sysdeps/x86_64/multiarch/mempcpy_chk.S +++ b/sysdeps/x86_64/multiarch/mempcpy_chk.S @@ -29,14 +29,14 @@ .text ENTRY(__mempcpy_chk) .type __mempcpy_chk, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features(%rip) + cmpl $0, KIND_OFFSET+__cpu_features_strcpy(%rip) jne 1f call __init_cpu_features 1: leaq __mempcpy_chk_sse2(%rip), %rax - testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip) + testl $bit_SSSE3, __cpu_features_strcpy+CPUID_OFFSET+index_SSSE3(%rip) jz 2f leaq __mempcpy_chk_ssse3(%rip), %rax - testl $bit_Fast_Copy_Backward, __cpu_features+FEATURE_OFFSET+index_Fast_Copy_Backward(%rip) + testl $bit_Fast_Copy_Backward, __cpu_features_strcpy+FEATURE_OFFSET+index_Fast_Copy_Backward(%rip) jz 2f leaq __mempcpy_chk_ssse3_back(%rip), %rax 2: ret diff --git a/sysdeps/x86_64/multiarch/memset.S b/sysdeps/x86_64/multiarch/memset.S index a8d0e9e..dc44ec3 100644 --- a/sysdeps/x86_64/multiarch/memset.S +++ b/sysdeps/x86_64/multiarch/memset.S @@ -24,11 +24,11 @@ #ifndef NOT_IN_libc ENTRY(memset) .type memset, @gnu_indirect_function - cmpl $0, __cpu_features+KIND_OFFSET(%rip) + cmpl $0, __cpu_features_strcpy+KIND_OFFSET(%rip) jne 1f call __init_cpu_features 1: leaq __memset_x86_64(%rip), %rax - testl $bit_Prefer_SSE_for_memop, __cpu_features+FEATURE_OFFSET+index_Prefer_SSE_for_memop(%rip) + testl $bit_Prefer_SSE_for_memop, __cpu_features_strcpy+FEATURE_OFFSET+index_Prefer_SSE_for_memop(%rip) jz 2f leaq __memset_sse2(%rip), %rax 2: ret diff --git a/sysdeps/x86_64/multiarch/memset_chk.S b/sysdeps/x86_64/multiarch/memset_chk.S index 16afe60..7bb6d18 100644 --- a/sysdeps/x86_64/multiarch/memset_chk.S +++ b/sysdeps/x86_64/multiarch/memset_chk.S @@ -25,11 +25,11 @@ # ifdef SHARED ENTRY(__memset_chk) .type __memset_chk, @gnu_indirect_function - cmpl $0, __cpu_features+KIND_OFFSET(%rip) + cmpl $0, __cpu_features_strcpy+KIND_OFFSET(%rip) jne 1f call __init_cpu_features 1: leaq __memset_chk_x86_64(%rip), %rax - testl $bit_Prefer_SSE_for_memop, __cpu_features+FEATURE_OFFSET+index_Prefer_SSE_for_memop(%rip) + testl $bit_Prefer_SSE_for_memop, __cpu_features_strcpy+FEATURE_OFFSET+index_Prefer_SSE_for_memop(%rip) jz 2f leaq __memset_chk_sse2(%rip), %rax 2: ret diff --git a/sysdeps/x86_64/multiarch/rawmemchr.S b/sysdeps/x86_64/multiarch/rawmemchr.S index 2a8a690..c866088 100644 --- a/sysdeps/x86_64/multiarch/rawmemchr.S +++ b/sysdeps/x86_64/multiarch/rawmemchr.S @@ -26,11 +26,11 @@ .text ENTRY(rawmemchr) .type rawmemchr, @gnu_indirect_function - cmpl $0, __cpu_features+KIND_OFFSET(%rip) + cmpl $0, __cpu_features_strchr+KIND_OFFSET(%rip) jne 1f call __init_cpu_features 1: leaq __rawmemchr_sse2(%rip), %rax - testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip) + testl $bit_SSE4_2, __cpu_features_strchr+CPUID_OFFSET+index_SSE4_2(%rip) jz 2f leaq __rawmemchr_sse42(%rip), %rax 2: ret diff --git a/sysdeps/x86_64/multiarch/s_fma.c b/sysdeps/x86_64/multiarch/s_fma.c index 9a680c6..02670af 100644 --- a/sysdeps/x86_64/multiarch/s_fma.c +++ b/sysdeps/x86_64/multiarch/s_fma.c @@ -34,7 +34,7 @@ __fma_fma (double x, double y, double z) return x; } -libm_ifunc (__fma, HAS_FMA ? __fma_fma : __fma_sse2); +libm_ifunc (__fma, HAS_FMA(common) ? __fma_fma : __fma_sse2); weak_alias (__fma, fma) # define __fma __fma_sse2 diff --git a/sysdeps/x86_64/multiarch/s_fmaf.c b/sysdeps/x86_64/multiarch/s_fmaf.c index 85ef65a..1a4d892 100644 --- a/sysdeps/x86_64/multiarch/s_fmaf.c +++ b/sysdeps/x86_64/multiarch/s_fmaf.c @@ -33,7 +33,7 @@ __fmaf_fma (float x, float y, float z) return x; } -libm_ifunc (__fmaf, HAS_FMA ? __fmaf_fma : __fmaf_sse2); +libm_ifunc (__fmaf, HAS_FMA(common) ? __fmaf_fma : __fmaf_sse2); weak_alias (__fmaf, fmaf) # define __fmaf __fmaf_sse2 diff --git a/sysdeps/x86_64/multiarch/sched_cpucount.c b/sysdeps/x86_64/multiarch/sched_cpucount.c index fde6dcc..bc5335f 100644 --- a/sysdeps/x86_64/multiarch/sched_cpucount.c +++ b/sysdeps/x86_64/multiarch/sched_cpucount.c @@ -34,4 +34,4 @@ #undef __sched_cpucount libc_ifunc (__sched_cpucount, - HAS_POPCOUNT ? popcount_cpucount : generic_cpucount); + HAS_POPCOUNT(common) ? popcount_cpucount : generic_cpucount); diff --git a/sysdeps/x86_64/multiarch/strcasestr-c.c b/sysdeps/x86_64/multiarch/strcasestr-c.c index 551492d..577f876 100644 --- a/sysdeps/x86_64/multiarch/strcasestr-c.c +++ b/sysdeps/x86_64/multiarch/strcasestr-c.c @@ -9,7 +9,7 @@ extern __typeof (__strcasestr_sse2) __strcasestr_sse2 attribute_hidden; #if 1 libc_ifunc (__strcasestr, - HAS_SSE4_2 ? __strcasestr_sse42 : __strcasestr_sse2); + HAS_SSE4_2(strstr) ? __strcasestr_sse42 : __strcasestr_sse2); #else libc_ifunc (__strcasestr, 0 ? __strcasestr_sse42 : __strcasestr_sse2); diff --git a/sysdeps/x86_64/multiarch/strchr.S b/sysdeps/x86_64/multiarch/strchr.S index 71845a3..dabbd0d 100644 --- a/sysdeps/x86_64/multiarch/strchr.S +++ b/sysdeps/x86_64/multiarch/strchr.S @@ -26,11 +26,11 @@ .text ENTRY(strchr) .type strchr, @gnu_indirect_function - cmpl $0, __cpu_features+KIND_OFFSET(%rip) + cmpl $0, __cpu_features_strchr+KIND_OFFSET(%rip) jne 1f call __init_cpu_features 1: leaq __strchr_sse2(%rip), %rax - testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip) + testl $bit_SSE4_2, __cpu_features_strchr+CPUID_OFFSET+index_SSE4_2(%rip) jz 2f leaq __strchr_sse42(%rip), %rax 2: ret diff --git a/sysdeps/x86_64/multiarch/strcmp.S b/sysdeps/x86_64/multiarch/strcmp.S index 1859289..58eafa9 100644 --- a/sysdeps/x86_64/multiarch/strcmp.S +++ b/sysdeps/x86_64/multiarch/strcmp.S @@ -86,15 +86,15 @@ .text ENTRY(STRCMP) .type STRCMP, @gnu_indirect_function - cmpl $0, __cpu_features+KIND_OFFSET(%rip) + cmpl $0, __cpu_features_strcmp+KIND_OFFSET(%rip) jne 1f call __init_cpu_features 1: leaq STRCMP_SSE42(%rip), %rax - testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip) + testl $bit_SSE4_2, __cpu_features_strcmp+CPUID_OFFSET+index_SSE4_2(%rip) jnz 2f leaq STRCMP_SSSE3(%rip), %rax - testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip) + testl $bit_SSSE3, __cpu_features_strcmp+CPUID_OFFSET+index_SSSE3(%rip) jnz 2f leaq STRCMP_SSE2(%rip), %rax 2: ret @@ -103,15 +103,15 @@ END(STRCMP) # ifdef USE_AS_STRCASECMP_L ENTRY(__strcasecmp) .type __strcasecmp, @gnu_indirect_function - cmpl $0, __cpu_features+KIND_OFFSET(%rip) + cmpl $0, __cpu_features_strcmp+KIND_OFFSET(%rip) jne 1f call __init_cpu_features 1: leaq __strcasecmp_sse42(%rip), %rax - testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip) + testl $bit_SSE4_2, __cpu_features_strcmp+CPUID_OFFSET+index_SSE4_2(%rip) jnz 2f leaq __strcasecmp_ssse3(%rip), %rax - testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip) + testl $bit_SSSE3, __cpu_features_strcmp+CPUID_OFFSET+index_SSSE3(%rip) jnz 2f leaq __strcasecmp_sse2(%rip), %rax 2: ret @@ -121,15 +121,15 @@ weak_alias (__strcasecmp, strcasecmp) # ifdef USE_AS_STRNCASECMP_L ENTRY(__strncasecmp) .type __strncasecmp, @gnu_indirect_function - cmpl $0, __cpu_features+KIND_OFFSET(%rip) + cmpl $0, __cpu_features_strcmp+KIND_OFFSET(%rip) jne 1f call __init_cpu_features 1: leaq __strncasecmp_sse42(%rip), %rax - testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip) + testl $bit_SSE4_2, __cpu_features_strcmp+CPUID_OFFSET+index_SSE4_2(%rip) jnz 2f leaq __strncasecmp_ssse3(%rip), %rax - testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip) + testl $bit_SSSE3, __cpu_features_strcmp+CPUID_OFFSET+index_SSSE3(%rip) jnz 2f leaq __strncasecmp_sse2(%rip), %rax 2: ret diff --git a/sysdeps/x86_64/multiarch/strcpy.S b/sysdeps/x86_64/multiarch/strcpy.S index 02fa8d0..bd4408d 100644 --- a/sysdeps/x86_64/multiarch/strcpy.S +++ b/sysdeps/x86_64/multiarch/strcpy.S @@ -59,11 +59,11 @@ .text ENTRY(STRCPY) .type STRCPY, @gnu_indirect_function - cmpl $0, __cpu_features+KIND_OFFSET(%rip) + cmpl $0, __cpu_features_strcpy+KIND_OFFSET(%rip) jne 1f call __init_cpu_features 1: leaq STRCPY_SSE2(%rip), %rax - testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip) + testl $bit_SSSE3, __cpu_features_strcpy+CPUID_OFFSET+index_SSSE3(%rip) jz 2f leaq STRCPY_SSSE3(%rip), %rax 2: ret diff --git a/sysdeps/x86_64/multiarch/strcspn.S b/sysdeps/x86_64/multiarch/strcspn.S index f00d52c..7ad325d 100644 --- a/sysdeps/x86_64/multiarch/strcspn.S +++ b/sysdeps/x86_64/multiarch/strcspn.S @@ -45,11 +45,11 @@ .text ENTRY(STRCSPN) .type STRCSPN, @gnu_indirect_function - cmpl $0, __cpu_features+KIND_OFFSET(%rip) + cmpl $0, __cpu_features_strspn+KIND_OFFSET(%rip) jne 1f call __init_cpu_features 1: leaq STRCSPN_SSE2(%rip), %rax - testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip) + testl $bit_SSE4_2, __cpu_features_strspn+CPUID_OFFSET+index_SSE4_2(%rip) jz 2f leaq STRCSPN_SSE42(%rip), %rax 2: ret diff --git a/sysdeps/x86_64/multiarch/strlen.S b/sysdeps/x86_64/multiarch/strlen.S index 83a88ec..d5d251c 100644 --- a/sysdeps/x86_64/multiarch/strlen.S +++ b/sysdeps/x86_64/multiarch/strlen.S @@ -29,15 +29,15 @@ .text ENTRY(strlen) .type strlen, @gnu_indirect_function - cmpl $0, __cpu_features+KIND_OFFSET(%rip) + cmpl $0, __cpu_features_strlen+KIND_OFFSET(%rip) jne 1f call __init_cpu_features 1: leaq __strlen_sse2(%rip), %rax - testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip) + testl $bit_SSE4_2, __cpu_features_strlen+CPUID_OFFSET+index_SSE4_2(%rip) jz 2f leaq __strlen_sse42(%rip), %rax ret -2: testl $bit_Slow_BSF, __cpu_features+FEATURE_OFFSET+index_Slow_BSF(%rip) +2: testl $bit_Slow_BSF, __cpu_features_strlen+FEATURE_OFFSET+index_Slow_BSF(%rip) jz 3f leaq __strlen_no_bsf(%rip), %rax 3: ret diff --git a/sysdeps/x86_64/multiarch/strrchr.S b/sysdeps/x86_64/multiarch/strrchr.S index 0d17fdb..51728da 100644 --- a/sysdeps/x86_64/multiarch/strrchr.S +++ b/sysdeps/x86_64/multiarch/strrchr.S @@ -28,11 +28,11 @@ .text ENTRY(strrchr) .type strrchr, @gnu_indirect_function - cmpl $0, __cpu_features+KIND_OFFSET(%rip) + cmpl $0, __cpu_features_strchr+KIND_OFFSET(%rip) jne 1f call __init_cpu_features 1: leaq __strrchr_sse2(%rip), %rax - testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip) + testl $bit_SSE4_2, __cpu_features_strchr+CPUID_OFFSET+index_SSE4_2(%rip) jz 2f leaq __strrchr_sse42(%rip), %rax 2: ret diff --git a/sysdeps/x86_64/multiarch/strspn.S b/sysdeps/x86_64/multiarch/strspn.S index 1c56626..9a72bfa 100644 --- a/sysdeps/x86_64/multiarch/strspn.S +++ b/sysdeps/x86_64/multiarch/strspn.S @@ -30,11 +30,11 @@ .text ENTRY(strspn) .type strspn, @gnu_indirect_function - cmpl $0, __cpu_features+KIND_OFFSET(%rip) + cmpl $0, __cpu_features_strspn+KIND_OFFSET(%rip) jne 1f call __init_cpu_features 1: leaq __strspn_sse2(%rip), %rax - testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip) + testl $bit_SSE4_2, __cpu_features_strspn+CPUID_OFFSET+index_SSE4_2(%rip) jz 2f leaq __strspn_sse42(%rip), %rax 2: ret diff --git a/sysdeps/x86_64/multiarch/strstr-c.c b/sysdeps/x86_64/multiarch/strstr-c.c index b8ed316..677dc9a 100644 --- a/sysdeps/x86_64/multiarch/strstr-c.c +++ b/sysdeps/x86_64/multiarch/strstr-c.c @@ -12,4 +12,4 @@ extern char *__strstr_sse42 (const char *, const char *) attribute_hidden; extern __typeof (__strstr_sse2) __strstr_sse2 attribute_hidden; -libc_ifunc (strstr, HAS_SSE4_2 ? __strstr_sse42 : __strstr_sse2); +libc_ifunc (strstr, HAS_SSE4_2(strstr) ? __strstr_sse42 : __strstr_sse2);