|
@@ -0,0 +1,200 @@
|
|
|
+From f3ed8bfe8a82af1870ddc8696ed4cc1d5aa6b441 Mon Sep 17 00:00:00 2001
|
|
|
+From: Rich Felker <dalias@aerifal.cx>
|
|
|
+Date: Mon, 5 Aug 2019 18:41:47 -0400
|
|
|
+Subject: [PATCH] fix x87 stack imbalance in corner cases of i386 math asm
|
|
|
+
|
|
|
+commit 31c5fb80b9eae86f801be4f46025bc6532a554c5 introduced underflow
|
|
|
+code paths for the i386 math asm, along with checks on the fpu status
|
|
|
+word to skip the underflow-generation instructions if the underflow
|
|
|
+flag was already raised. unfortunately, at least one such path, in
|
|
|
+log1p, returned with 2 items on the x87 stack rather than just 1 item
|
|
|
+for the return value. this is a violation of the ABI's calling
|
|
|
+convention, and could cause subsequent floating point code to produce
|
|
|
+NANs due to x87 stack overflow. if floating point results are used in
|
|
|
+flow control, this can lead to runaway wrong code execution.
|
|
|
+
|
|
|
+rather than reviewing each "underflow already raised" code path for
|
|
|
+correctness, remove them all. they're likely slower than just
|
|
|
+performing the underflow code unconditionally, and significantly more
|
|
|
+complex.
|
|
|
+
|
|
|
+all of this code should be ripped out and replaced by C source files
|
|
|
+with inline asm. doing so would preclude this kind of error by having
|
|
|
+the compiler perform all x87 stack register allocation and stack
|
|
|
+manipulation, and would produce comparable or better code. however
|
|
|
+such a change is a much larger project.
|
|
|
+
|
|
|
+Signed-off-by: Peter Korsgaard <peter@korsgaard.com>
|
|
|
+---
|
|
|
+ src/math/i386/asin.s | 10 ++--------
|
|
|
+ src/math/i386/atan.s | 7 ++-----
|
|
|
+ src/math/i386/atan2.s | 5 +----
|
|
|
+ src/math/i386/atan2f.s | 5 +----
|
|
|
+ src/math/i386/atanf.s | 7 ++-----
|
|
|
+ src/math/i386/exp.s | 10 ++--------
|
|
|
+ src/math/i386/log1p.s | 7 ++-----
|
|
|
+ src/math/i386/log1pf.s | 7 ++-----
|
|
|
+ 8 files changed, 14 insertions(+), 44 deletions(-)
|
|
|
+
|
|
|
+diff --git a/src/math/i386/asin.s b/src/math/i386/asin.s
|
|
|
+index a9f691bf..920d967a 100644
|
|
|
+--- a/src/math/i386/asin.s
|
|
|
++++ b/src/math/i386/asin.s
|
|
|
+@@ -7,13 +7,10 @@ asinf:
|
|
|
+ cmp $0x01000000,%eax
|
|
|
+ jae 1f
|
|
|
+ # subnormal x, return x with underflow
|
|
|
+- fnstsw %ax
|
|
|
+- and $16,%ax
|
|
|
+- jnz 2f
|
|
|
+ fld %st(0)
|
|
|
+ fmul %st(1)
|
|
|
+ fstps 4(%esp)
|
|
|
+-2: ret
|
|
|
++ ret
|
|
|
+
|
|
|
+ .global asinl
|
|
|
+ .type asinl,@function
|
|
|
+@@ -30,11 +27,8 @@ asin:
|
|
|
+ cmp $0x00200000,%eax
|
|
|
+ jae 1f
|
|
|
+ # subnormal x, return x with underflow
|
|
|
+- fnstsw %ax
|
|
|
+- and $16,%ax
|
|
|
+- jnz 2f
|
|
|
+ fsts 4(%esp)
|
|
|
+-2: ret
|
|
|
++ ret
|
|
|
+ 1: fld %st(0)
|
|
|
+ fld1
|
|
|
+ fsub %st(0),%st(1)
|
|
|
+diff --git a/src/math/i386/atan.s b/src/math/i386/atan.s
|
|
|
+index d73137b2..a26feae1 100644
|
|
|
+--- a/src/math/i386/atan.s
|
|
|
++++ b/src/math/i386/atan.s
|
|
|
+@@ -10,8 +10,5 @@ atan:
|
|
|
+ fpatan
|
|
|
+ ret
|
|
|
+ # subnormal x, return x with underflow
|
|
|
+-1: fnstsw %ax
|
|
|
+- and $16,%ax
|
|
|
+- jnz 2f
|
|
|
+- fsts 4(%esp)
|
|
|
+-2: ret
|
|
|
++1: fsts 4(%esp)
|
|
|
++ ret
|
|
|
+diff --git a/src/math/i386/atan2.s b/src/math/i386/atan2.s
|
|
|
+index a7d2979b..1fa0524d 100644
|
|
|
+--- a/src/math/i386/atan2.s
|
|
|
++++ b/src/math/i386/atan2.s
|
|
|
+@@ -10,8 +10,5 @@ atan2:
|
|
|
+ cmp $0x00200000,%eax
|
|
|
+ jae 1f
|
|
|
+ # subnormal x, return x with underflow
|
|
|
+- fnstsw %ax
|
|
|
+- and $16,%ax
|
|
|
+- jnz 1f
|
|
|
+ fsts 4(%esp)
|
|
|
+-1: ret
|
|
|
++ ret
|
|
|
+diff --git a/src/math/i386/atan2f.s b/src/math/i386/atan2f.s
|
|
|
+index 14b88ce5..0b264726 100644
|
|
|
+--- a/src/math/i386/atan2f.s
|
|
|
++++ b/src/math/i386/atan2f.s
|
|
|
+@@ -10,10 +10,7 @@ atan2f:
|
|
|
+ cmp $0x01000000,%eax
|
|
|
+ jae 1f
|
|
|
+ # subnormal x, return x with underflow
|
|
|
+- fnstsw %ax
|
|
|
+- and $16,%ax
|
|
|
+- jnz 1f
|
|
|
+ fld %st(0)
|
|
|
+ fmul %st(1)
|
|
|
+ fstps 4(%esp)
|
|
|
+-1: ret
|
|
|
++ ret
|
|
|
+diff --git a/src/math/i386/atanf.s b/src/math/i386/atanf.s
|
|
|
+index 8caddefa..893beac5 100644
|
|
|
+--- a/src/math/i386/atanf.s
|
|
|
++++ b/src/math/i386/atanf.s
|
|
|
+@@ -10,10 +10,7 @@ atanf:
|
|
|
+ fpatan
|
|
|
+ ret
|
|
|
+ # subnormal x, return x with underflow
|
|
|
+-1: fnstsw %ax
|
|
|
+- and $16,%ax
|
|
|
+- jnz 2f
|
|
|
+- fld %st(0)
|
|
|
++1: fld %st(0)
|
|
|
+ fmul %st(1)
|
|
|
+ fstps 4(%esp)
|
|
|
+-2: ret
|
|
|
++ ret
|
|
|
+diff --git a/src/math/i386/exp.s b/src/math/i386/exp.s
|
|
|
+index c7aa5b6e..df87c497 100644
|
|
|
+--- a/src/math/i386/exp.s
|
|
|
++++ b/src/math/i386/exp.s
|
|
|
+@@ -7,13 +7,10 @@ expm1f:
|
|
|
+ cmp $0x01000000,%eax
|
|
|
+ jae 1f
|
|
|
+ # subnormal x, return x with underflow
|
|
|
+- fnstsw %ax
|
|
|
+- and $16,%ax
|
|
|
+- jnz 2f
|
|
|
+ fld %st(0)
|
|
|
+ fmul %st(1)
|
|
|
+ fstps 4(%esp)
|
|
|
+-2: ret
|
|
|
++ ret
|
|
|
+
|
|
|
+ .global expm1l
|
|
|
+ .type expm1l,@function
|
|
|
+@@ -30,11 +27,8 @@ expm1:
|
|
|
+ cmp $0x00200000,%eax
|
|
|
+ jae 1f
|
|
|
+ # subnormal x, return x with underflow
|
|
|
+- fnstsw %ax
|
|
|
+- and $16,%ax
|
|
|
+- jnz 2f
|
|
|
+ fsts 4(%esp)
|
|
|
+-2: ret
|
|
|
++ ret
|
|
|
+ 1: fldl2e
|
|
|
+ fmulp
|
|
|
+ mov $0xc2820000,%eax
|
|
|
+diff --git a/src/math/i386/log1p.s b/src/math/i386/log1p.s
|
|
|
+index 6b6929c7..354f391a 100644
|
|
|
+--- a/src/math/i386/log1p.s
|
|
|
++++ b/src/math/i386/log1p.s
|
|
|
+@@ -16,9 +16,6 @@ log1p:
|
|
|
+ fyl2x
|
|
|
+ ret
|
|
|
+ # subnormal x, return x with underflow
|
|
|
+-2: fnstsw %ax
|
|
|
+- and $16,%ax
|
|
|
+- jnz 1f
|
|
|
+- fsts 4(%esp)
|
|
|
++2: fsts 4(%esp)
|
|
|
+ fstp %st(1)
|
|
|
+-1: ret
|
|
|
++ ret
|
|
|
+diff --git a/src/math/i386/log1pf.s b/src/math/i386/log1pf.s
|
|
|
+index c0bcd30f..4d3484cd 100644
|
|
|
+--- a/src/math/i386/log1pf.s
|
|
|
++++ b/src/math/i386/log1pf.s
|
|
|
+@@ -16,10 +16,7 @@ log1pf:
|
|
|
+ fyl2x
|
|
|
+ ret
|
|
|
+ # subnormal x, return x with underflow
|
|
|
+-2: fnstsw %ax
|
|
|
+- and $16,%ax
|
|
|
+- jnz 1f
|
|
|
+- fxch
|
|
|
++2: fxch
|
|
|
+ fmul %st(1)
|
|
|
+ fstps 4(%esp)
|
|
|
+-1: ret
|
|
|
++ ret
|
|
|
+--
|
|
|
+2.11.0
|
|
|
+
|