2003-05-23 H.J. Lu * elf/dl-support.c (_dl_hwcap): New. (_dl_aux_init): Initialize GL(dl_hwcap). * sysdeps/i386/fpu/fclrexcpt.c: Include , and . (__feclearexcept): Clear MXCSR if needed. * sysdeps/i386/fpu/fsetexcptflg.c: Likewise. * sysdeps/i386/fpu/fedisblxcpt.c (fedisableexcept): Check GL(dl_hwcap) instead of GL(dl_hwcap_mask). Also set MXCSR for HWCAP_I386_XMM2. * sysdeps/i386/fpu/feholdexcpt.c (feholdexcept): Likewise. * sysdeps/i386/fpu/fesetround.c (fesetround): Likewise. * sysdeps/i386/fpu/ftestexcept.c (fetestexcept): Likewise. * sysdeps/i386/fpu/fedisblxcpt.c (fedisableexcept): Left shift MXCSR control word by 7. * sysdeps/i386/fpu/feenablxcpt.c (feenableexcept): Likewise. Fix typo. * sysdeps/i386/fpu_control.h (_FPU_GETCW): Commented out. (_FPU_SETCW): Likewise. * sysdeps/i386/setfpucw.c: New file. Support SSE and SSE2. --- libc/elf/dl-support.c.p4 2003-05-06 19:22:16.000000000 -0700 +++ libc/elf/dl-support.c 2003-05-23 16:48:03.000000000 -0700 @@ -123,6 +123,7 @@ int _dl_correct_cache_id = _DL_CACHE_DEF struct ElfW(Phdr) *_dl_phdr; size_t _dl_phnum; +unsigned long int _dl_hwcap; #ifdef NEED_DL_SYSINFO /* Needed for improved syscall handling on at least x86/Linux. */ @@ -167,6 +168,9 @@ _dl_aux_init (ElfW(auxv_t) *av) case AT_PHNUM: GL(dl_phnum) = av->a_un.a_val; break; + case AT_HWCAP: + GL(dl_hwcap) = av->a_un.a_val; + break; #ifdef NEED_DL_SYSINFO case AT_SYSINFO: GL(dl_sysinfo) = av->a_un.a_val; --- libc/sysdeps/i386/fpu/fclrexcpt.c.p4 2001-07-05 21:55:53.000000000 -0700 +++ libc/sysdeps/i386/fpu/fclrexcpt.c 2003-05-23 15:36:08.000000000 -0700 @@ -19,6 +19,9 @@ 02111-1307 USA. */ #include +#include +#include +#include int __feclearexcept (int excepts) @@ -38,6 +41,21 @@ __feclearexcept (int excepts) /* Put the new data in effect. */ __asm__ ("fldenv %0" : : "m" (*&temp)); + /* If the CPU supports SSE, we clear the MXCSR as well. */ + if ((GL(dl_hwcap) & (HWCAP_I386_XMM | HWCAP_I386_XMM2)) != 0) + { + unsigned int xnew_exc; + + /* Get the current MXCSR. */ + __asm__ ("stmxcsr %0" : "=m" (*&xnew_exc)); + + /* Clear the relevant bits. */ + xnew_exc &= excepts ^ FE_ALL_EXCEPT; + + /* Put the new data in effect. */ + __asm__ ("ldmxcsr %0" : : "m" (*&xnew_exc)); + } + /* Success. */ return 0; } --- libc/sysdeps/i386/fpu/fedisblxcpt.c.p4 2003-04-29 08:38:11.000000000 -0700 +++ libc/sysdeps/i386/fpu/fedisblxcpt.c 2003-05-23 15:36:12.000000000 -0700 @@ -38,15 +38,15 @@ fedisableexcept (int excepts) new_exc |= excepts; __asm__ ("fldcw %0" : : "m" (*&new_exc)); - /* If the CPU supports SSE we set the MXCSR as well. */ - if ((GL(dl_hwcap_mask) & HWCAP_I386_XMM) != 0) + /* If the CPU supports SSE, we set the MXCSR as well. */ + if ((GL(dl_hwcap) & (HWCAP_I386_XMM | HWCAP_I386_XMM2)) != 0) { unsigned int xnew_exc; - /* Get the current control word. */ + /* Get the current MXCSR. */ __asm__ ("stmxcsr %0" : "=m" (*&xnew_exc)); - xnew_exc |= excepts; + xnew_exc |= excepts << 7; __asm__ ("ldmxcsr %0" : : "m" (*&xnew_exc)); } --- libc/sysdeps/i386/fpu/feenablxcpt.c.p4 2003-04-29 08:38:11.000000000 -0700 +++ libc/sysdeps/i386/fpu/feenablxcpt.c 2003-05-23 15:36:15.000000000 -0700 @@ -38,17 +38,17 @@ feenableexcept (int excepts) new_exc &= ~excepts; __asm__ ("fldcw %0" : : "m" (*&new_exc)); - /* If the CPU supports SSE we set the MXCSR as well. */ - if ((GL(dl_hwcap_mask) & HWCAP_I386_XMM) != 0) + /* If the CPU supports SSE, we set the MXCSR as well. */ + if ((GL(dl_hwcap) & (HWCAP_I386_XMM | HWCAP_I386_XMM2)) != 0) { unsigned int xnew_exc; - /* Get the current control word. */ - __asm__ ("ldmxcsr %0" : "=m" (*&xnew_exc)); + /* Get the current MXCSR. */ + __asm__ ("stmxcsr %0" : "=m" (*&xnew_exc)); - xnew_exc &= ~excepts; + xnew_exc &= (~excepts) << 7; - __asm__ ("stmxcsr %0" : : "m" (*&xnew_exc)); + __asm__ ("ldmxcsr %0" : : "m" (*&xnew_exc)); } return old_exc; --- libc/sysdeps/i386/fpu/feholdexcpt.c.p4 2003-04-29 08:38:11.000000000 -0700 +++ libc/sysdeps/i386/fpu/feholdexcpt.c 2003-05-23 15:36:19.000000000 -0700 @@ -35,12 +35,12 @@ feholdexcept (fenv_t *envp) work = envp->__control_word | 0x3f; __asm__ ("fldcw %0" : : "m" (*&work)); - /* If the CPU supports SSE we set the MXCSR as well. */ - if ((GL(dl_hwcap_mask) & HWCAP_I386_XMM) != 0) + /* If the CPU supports SSE, we set the MXCSR as well. */ + if ((GL(dl_hwcap) & (HWCAP_I386_XMM | HWCAP_I386_XMM2)) != 0) { unsigned int xwork; - /* Get the current control word. */ + /* Get the current MXCSR. */ __asm__ ("stmxcsr %0" : "=m" (*&xwork)); /* Set all exceptions to non-stop. */ --- libc/sysdeps/i386/fpu/fesetround.c.p4 2003-05-01 20:06:49.000000000 -0700 +++ libc/sysdeps/i386/fpu/fesetround.c 2003-05-23 15:36:24.000000000 -0700 @@ -37,8 +37,8 @@ fesetround (int round) cw |= round; __asm__ ("fldcw %0" : : "m" (*&cw)); - /* If the CPU supports SSE we set the MXCSR as well. */ - if ((GL(dl_hwcap_mask) & HWCAP_I386_XMM) != 0) + /* If the CPU supports SSE, we set the MXCSR as well. */ + if ((GL(dl_hwcap) & (HWCAP_I386_XMM | HWCAP_I386_XMM2)) != 0) { unsigned int xcw; --- libc/sysdeps/i386/fpu/fsetexcptflg.c.p4 2001-07-05 21:55:53.000000000 -0700 +++ libc/sysdeps/i386/fpu/fsetexcptflg.c 2003-05-23 15:36:27.000000000 -0700 @@ -21,6 +21,9 @@ #include #include #include +#include +#include +#include int __fesetexceptflag (const fexcept_t *flagp, int excepts) @@ -39,6 +42,22 @@ __fesetexceptflag (const fexcept_t *flag the next floating-point instruction. */ __asm__ ("fldenv %0" : : "m" (*&temp)); + /* If the CPU supports SSE, we set the MXCSR as well. */ + if ((GL(dl_hwcap) & (HWCAP_I386_XMM | HWCAP_I386_XMM2)) != 0) + { + unsigned int xnew_exc; + + /* Get the current MXCSR. */ + __asm__ ("stmxcsr %0" : "=m" (*&xnew_exc)); + + /* Set the relevant bits. */ + xnew_exc &= ~(excepts & FE_ALL_EXCEPT); + xnew_exc |= *flagp & excepts & FE_ALL_EXCEPT; + + /* Put the new data in effect. */ + __asm__ ("ldmxcsr %0" : : "m" (*&xnew_exc)); + } + /* Success. */ return 0; } --- libc/sysdeps/i386/fpu/ftestexcept.c.p4 2003-05-01 20:06:49.000000000 -0700 +++ libc/sysdeps/i386/fpu/ftestexcept.c 2003-05-23 15:36:33.000000000 -0700 @@ -32,8 +32,8 @@ fetestexcept (int excepts) /* Get current exceptions. */ __asm__ ("fnstsw %0" : "=a" (temp)); - /* If the CPU supports SSE we test the MXCSR as well. */ - if ((GL(dl_hwcap_mask) & HWCAP_I386_XMM) != 0) + /* If the CPU supports SSE, we test the MXCSR as well. */ + if ((GL(dl_hwcap) & (HWCAP_I386_XMM | HWCAP_I386_XMM2)) != 0) __asm__ ("stmxcsr %0" : "=m" (*&xtemp)); return (temp | xtemp) & excepts & FE_ALL_EXCEPT; --- libc/sysdeps/i386/fpu_control.h.p4 2001-07-05 21:55:52.000000000 -0700 +++ libc/sysdeps/i386/fpu_control.h 2003-05-23 15:42:59.000000000 -0700 @@ -88,9 +88,12 @@ /* Type of the control word. */ typedef unsigned int fpu_control_t __attribute__ ((__mode__ (__HI__))); +#if 0 +/* Need to set MXCSR for SSE/SSE2. */ /* Macros for accessing the hardware control word. */ #define _FPU_GETCW(cw) __asm__ ("fnstcw %0" : "=m" (*&cw)) #define _FPU_SETCW(cw) __asm__ ("fldcw %0" : : "m" (*&cw)) +#endif /* Default control word set at startup. */ extern fpu_control_t __fpu_control; --- libc/sysdeps/i386/setfpucw.c.p4 2003-05-23 15:51:14.000000000 -0700 +++ libc/sysdeps/i386/setfpucw.c 2003-05-23 17:15:33.000000000 -0700 @@ -0,0 +1,54 @@ +/* Set the FPU control word for x86. + Copyright (C) 2003 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include +#include +#include +#include +#include + +void +__setfpucw (fpu_control_t set) +{ + fpu_control_t cw; + + /* Fetch the current control word. */ + __asm__ ("fnstcw %0" : "=m" (*&cw)); + + /* Preserve the reserved bits, and set the rest as the user + specified (or the default, if the user gave zero). */ + cw &= _FPU_RESERVED; + cw |= set & ~_FPU_RESERVED; + + __asm__ ("fldcw %0" : : "m" (*&cw)); + + /* If the CPU supports SSE, we set the MXCSR as well. */ + if ((GL(dl_hwcap) & (HWCAP_I386_XMM | HWCAP_I386_XMM2)) != 0) + { + unsigned int xnew_exc; + + /* Get the current MXCSR. */ + __asm__ ("stmxcsr %0" : "=m" (*&xnew_exc)); + + xnew_exc |= ((set & 0xc00) << 3) | ((set & FE_ALL_EXCEPT) << 7); + + __asm__ ("ldmxcsr %0" : : "m" (*&xnew_exc)); + } +}