|
@@ -1,200 +0,0 @@
|
|
|
-From f3ed8bfe8a82af1870ddc8696ed4cc1d5aa6b441 Mon Sep 17 00:00:00 2001
|
|
|
-From: Rich Felker <dalias@aerifal.cx>
|
|
|
-Date: Mon, 5 Aug 2019 18:41:47 -0400
|
|
|
-Subject: [PATCH] fix x87 stack imbalance in corner cases of i386 math asm
|
|
|
-
|
|
|
-commit 31c5fb80b9eae86f801be4f46025bc6532a554c5 introduced underflow
|
|
|
-code paths for the i386 math asm, along with checks on the fpu status
|
|
|
-word to skip the underflow-generation instructions if the underflow
|
|
|
-flag was already raised. unfortunately, at least one such path, in
|
|
|
-log1p, returned with 2 items on the x87 stack rather than just 1 item
|
|
|
-for the return value. this is a violation of the ABI's calling
|
|
|
-convention, and could cause subsequent floating point code to produce
|
|
|
-NANs due to x87 stack overflow. if floating point results are used in
|
|
|
-flow control, this can lead to runaway wrong code execution.
|
|
|
-
|
|
|
-rather than reviewing each "underflow already raised" code path for
|
|
|
-correctness, remove them all. they're likely slower than just
|
|
|
-performing the underflow code unconditionally, and significantly more
|
|
|
-complex.
|
|
|
-
|
|
|
-all of this code should be ripped out and replaced by C source files
|
|
|
-with inline asm. doing so would preclude this kind of error by having
|
|
|
-the compiler perform all x87 stack register allocation and stack
|
|
|
-manipulation, and would produce comparable or better code. however
|
|
|
-such a change is a much larger project.
|
|
|
-
|
|
|
-Signed-off-by: Peter Korsgaard <peter@korsgaard.com>
|
|
|
----
|
|
|
- src/math/i386/asin.s | 10 ++--------
|
|
|
- src/math/i386/atan.s | 7 ++-----
|
|
|
- src/math/i386/atan2.s | 5 +----
|
|
|
- src/math/i386/atan2f.s | 5 +----
|
|
|
- src/math/i386/atanf.s | 7 ++-----
|
|
|
- src/math/i386/exp.s | 10 ++--------
|
|
|
- src/math/i386/log1p.s | 7 ++-----
|
|
|
- src/math/i386/log1pf.s | 7 ++-----
|
|
|
- 8 files changed, 14 insertions(+), 44 deletions(-)
|
|
|
-
|
|
|
-diff --git a/src/math/i386/asin.s b/src/math/i386/asin.s
|
|
|
-index a9f691bf..920d967a 100644
|
|
|
---- a/src/math/i386/asin.s
|
|
|
-+++ b/src/math/i386/asin.s
|
|
|
-@@ -7,13 +7,10 @@ asinf:
|
|
|
- cmp $0x01000000,%eax
|
|
|
- jae 1f
|
|
|
- # subnormal x, return x with underflow
|
|
|
-- fnstsw %ax
|
|
|
-- and $16,%ax
|
|
|
-- jnz 2f
|
|
|
- fld %st(0)
|
|
|
- fmul %st(1)
|
|
|
- fstps 4(%esp)
|
|
|
--2: ret
|
|
|
-+ ret
|
|
|
-
|
|
|
- .global asinl
|
|
|
- .type asinl,@function
|
|
|
-@@ -30,11 +27,8 @@ asin:
|
|
|
- cmp $0x00200000,%eax
|
|
|
- jae 1f
|
|
|
- # subnormal x, return x with underflow
|
|
|
-- fnstsw %ax
|
|
|
-- and $16,%ax
|
|
|
-- jnz 2f
|
|
|
- fsts 4(%esp)
|
|
|
--2: ret
|
|
|
-+ ret
|
|
|
- 1: fld %st(0)
|
|
|
- fld1
|
|
|
- fsub %st(0),%st(1)
|
|
|
-diff --git a/src/math/i386/atan.s b/src/math/i386/atan.s
|
|
|
-index d73137b2..a26feae1 100644
|
|
|
---- a/src/math/i386/atan.s
|
|
|
-+++ b/src/math/i386/atan.s
|
|
|
-@@ -10,8 +10,5 @@ atan:
|
|
|
- fpatan
|
|
|
- ret
|
|
|
- # subnormal x, return x with underflow
|
|
|
--1: fnstsw %ax
|
|
|
-- and $16,%ax
|
|
|
-- jnz 2f
|
|
|
-- fsts 4(%esp)
|
|
|
--2: ret
|
|
|
-+1: fsts 4(%esp)
|
|
|
-+ ret
|
|
|
-diff --git a/src/math/i386/atan2.s b/src/math/i386/atan2.s
|
|
|
-index a7d2979b..1fa0524d 100644
|
|
|
---- a/src/math/i386/atan2.s
|
|
|
-+++ b/src/math/i386/atan2.s
|
|
|
-@@ -10,8 +10,5 @@ atan2:
|
|
|
- cmp $0x00200000,%eax
|
|
|
- jae 1f
|
|
|
- # subnormal x, return x with underflow
|
|
|
-- fnstsw %ax
|
|
|
-- and $16,%ax
|
|
|
-- jnz 1f
|
|
|
- fsts 4(%esp)
|
|
|
--1: ret
|
|
|
-+ ret
|
|
|
-diff --git a/src/math/i386/atan2f.s b/src/math/i386/atan2f.s
|
|
|
-index 14b88ce5..0b264726 100644
|
|
|
---- a/src/math/i386/atan2f.s
|
|
|
-+++ b/src/math/i386/atan2f.s
|
|
|
-@@ -10,10 +10,7 @@ atan2f:
|
|
|
- cmp $0x01000000,%eax
|
|
|
- jae 1f
|
|
|
- # subnormal x, return x with underflow
|
|
|
-- fnstsw %ax
|
|
|
-- and $16,%ax
|
|
|
-- jnz 1f
|
|
|
- fld %st(0)
|
|
|
- fmul %st(1)
|
|
|
- fstps 4(%esp)
|
|
|
--1: ret
|
|
|
-+ ret
|
|
|
-diff --git a/src/math/i386/atanf.s b/src/math/i386/atanf.s
|
|
|
-index 8caddefa..893beac5 100644
|
|
|
---- a/src/math/i386/atanf.s
|
|
|
-+++ b/src/math/i386/atanf.s
|
|
|
-@@ -10,10 +10,7 @@ atanf:
|
|
|
- fpatan
|
|
|
- ret
|
|
|
- # subnormal x, return x with underflow
|
|
|
--1: fnstsw %ax
|
|
|
-- and $16,%ax
|
|
|
-- jnz 2f
|
|
|
-- fld %st(0)
|
|
|
-+1: fld %st(0)
|
|
|
- fmul %st(1)
|
|
|
- fstps 4(%esp)
|
|
|
--2: ret
|
|
|
-+ ret
|
|
|
-diff --git a/src/math/i386/exp.s b/src/math/i386/exp.s
|
|
|
-index c7aa5b6e..df87c497 100644
|
|
|
---- a/src/math/i386/exp.s
|
|
|
-+++ b/src/math/i386/exp.s
|
|
|
-@@ -7,13 +7,10 @@ expm1f:
|
|
|
- cmp $0x01000000,%eax
|
|
|
- jae 1f
|
|
|
- # subnormal x, return x with underflow
|
|
|
-- fnstsw %ax
|
|
|
-- and $16,%ax
|
|
|
-- jnz 2f
|
|
|
- fld %st(0)
|
|
|
- fmul %st(1)
|
|
|
- fstps 4(%esp)
|
|
|
--2: ret
|
|
|
-+ ret
|
|
|
-
|
|
|
- .global expm1l
|
|
|
- .type expm1l,@function
|
|
|
-@@ -30,11 +27,8 @@ expm1:
|
|
|
- cmp $0x00200000,%eax
|
|
|
- jae 1f
|
|
|
- # subnormal x, return x with underflow
|
|
|
-- fnstsw %ax
|
|
|
-- and $16,%ax
|
|
|
-- jnz 2f
|
|
|
- fsts 4(%esp)
|
|
|
--2: ret
|
|
|
-+ ret
|
|
|
- 1: fldl2e
|
|
|
- fmulp
|
|
|
- mov $0xc2820000,%eax
|
|
|
-diff --git a/src/math/i386/log1p.s b/src/math/i386/log1p.s
|
|
|
-index 6b6929c7..354f391a 100644
|
|
|
---- a/src/math/i386/log1p.s
|
|
|
-+++ b/src/math/i386/log1p.s
|
|
|
-@@ -16,9 +16,6 @@ log1p:
|
|
|
- fyl2x
|
|
|
- ret
|
|
|
- # subnormal x, return x with underflow
|
|
|
--2: fnstsw %ax
|
|
|
-- and $16,%ax
|
|
|
-- jnz 1f
|
|
|
-- fsts 4(%esp)
|
|
|
-+2: fsts 4(%esp)
|
|
|
- fstp %st(1)
|
|
|
--1: ret
|
|
|
-+ ret
|
|
|
-diff --git a/src/math/i386/log1pf.s b/src/math/i386/log1pf.s
|
|
|
-index c0bcd30f..4d3484cd 100644
|
|
|
---- a/src/math/i386/log1pf.s
|
|
|
-+++ b/src/math/i386/log1pf.s
|
|
|
-@@ -16,10 +16,7 @@ log1pf:
|
|
|
- fyl2x
|
|
|
- ret
|
|
|
- # subnormal x, return x with underflow
|
|
|
--2: fnstsw %ax
|
|
|
-- and $16,%ax
|
|
|
-- jnz 1f
|
|
|
-- fxch
|
|
|
-+2: fxch
|
|
|
- fmul %st(1)
|
|
|
- fstps 4(%esp)
|
|
|
--1: ret
|
|
|
-+ ret
|
|
|
---
|
|
|
-2.11.0
|
|
|
-
|