152 lines
5.5 KiB
Diff
152 lines
5.5 KiB
Diff
From jseward@acm.org Tue Mar 27 18:05:53 2007
|
|
Date: Sat, 17 Mar 2007 17:35:38 +0000
|
|
From: Julian Seward <jseward@acm.org>
|
|
Reply-To: qemu-devel@nongnu.org
|
|
To: qemu-devel@nongnu.org
|
|
Subject: [Qemu-devel] [PATCH] Fix guest x86/amd64 helper_fprem/helper_fprem1
|
|
|
|
|
|
The helpers for x86/amd64 fprem and fprem1 in target-i386/helper.c are
|
|
significantly borked and, for example, cause konqueror in RedHat8 (x86
|
|
guest) to go into an infinite loop when displaying http://news.bbc.co.uk.
|
|
|
|
helper_fprem has the following borkage:
|
|
- various Inf/Nan/zero inputs not handled correctly
|
|
- incorrect rounding when converting negative 'dblq' to 'q'
|
|
- incorrect order of assignment to C bits (0,3,1 not 0,1,3)
|
|
|
|
helper_fprem1 has those problems and is also incorrect about the points
|
|
at which its rounding needs to differ from that of helper_fprem.
|
|
|
|
Patch below fixes all these. It brings the fprem and fprem1 behaviour
|
|
very much closer to the hardware -- not identical, but close. Some
|
|
+0.0 results should really be -0.0 and there may still be other differences.
|
|
|
|
Anyway konquerer no longer loops with the patch applied.
|
|
|
|
--- qemu-0.9.0/target-i386/helper.c.fix-x86-fprem 2007-03-27 13:48:10.000000000 -0400
|
|
+++ qemu-0.9.0/target-i386/helper.c 2007-03-27 14:03:06.000000000 -0400
|
|
@@ -3124,30 +3124,51 @@ void helper_fprem1(void)
|
|
CPU86_LDouble dblq, fpsrcop, fptemp;
|
|
CPU86_LDoubleU fpsrcop1, fptemp1;
|
|
int expdif;
|
|
- int q;
|
|
+ signed long long int q;
|
|
+
|
|
+ if (isinf(ST0) || isnan(ST0) || isnan(ST1) || (ST1 == 0.0)) {
|
|
+ ST0 = 0.0 / 0.0; /* NaN */
|
|
+ env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
|
|
+ return;
|
|
+ }
|
|
|
|
fpsrcop = ST0;
|
|
fptemp = ST1;
|
|
fpsrcop1.d = fpsrcop;
|
|
fptemp1.d = fptemp;
|
|
expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
|
|
+
|
|
+ if (expdif < 0) {
|
|
+ /* optimisation? taken from the AMD docs */
|
|
+ env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
|
|
+ /* ST0 is unchanged */
|
|
+ return;
|
|
+ }
|
|
+
|
|
if (expdif < 53) {
|
|
dblq = fpsrcop / fptemp;
|
|
- dblq = (dblq < 0.0)? ceil(dblq): floor(dblq);
|
|
+ /* round dblq towards nearest integer */
|
|
+ dblq = rint(dblq);
|
|
ST0 = fpsrcop - fptemp*dblq;
|
|
- q = (int)dblq; /* cutting off top bits is assumed here */
|
|
+
|
|
+ /* convert dblq to q by truncating towards zero */
|
|
+ if (dblq < 0.0)
|
|
+ q = (signed long long int)(-dblq);
|
|
+ else
|
|
+ q = (signed long long int)dblq;
|
|
+
|
|
env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
|
|
- /* (C0,C1,C3) <-- (q2,q1,q0) */
|
|
- env->fpus |= (q&0x4) << 6; /* (C0) <-- q2 */
|
|
- env->fpus |= (q&0x2) << 8; /* (C1) <-- q1 */
|
|
- env->fpus |= (q&0x1) << 14; /* (C3) <-- q0 */
|
|
+ /* (C0,C3,C1) <-- (q2,q1,q0) */
|
|
+ env->fpus |= (q&0x4) << (8-2); /* (C0) <-- q2 */
|
|
+ env->fpus |= (q&0x2) << (14-1); /* (C3) <-- q1 */
|
|
+ env->fpus |= (q&0x1) << (9-0); /* (C1) <-- q0 */
|
|
} else {
|
|
env->fpus |= 0x400; /* C2 <-- 1 */
|
|
fptemp = pow(2.0, expdif-50);
|
|
fpsrcop = (ST0 / ST1) / fptemp;
|
|
- /* fpsrcop = integer obtained by rounding to the nearest */
|
|
- fpsrcop = (fpsrcop-floor(fpsrcop) < ceil(fpsrcop)-fpsrcop)?
|
|
- floor(fpsrcop): ceil(fpsrcop);
|
|
+ /* fpsrcop = integer obtained by chopping */
|
|
+ fpsrcop = (fpsrcop < 0.0)?
|
|
+ -(floor(fabs(fpsrcop))): floor(fpsrcop);
|
|
ST0 -= (ST1 * fpsrcop * fptemp);
|
|
}
|
|
}
|
|
@@ -3157,26 +3178,48 @@ void helper_fprem(void)
|
|
CPU86_LDouble dblq, fpsrcop, fptemp;
|
|
CPU86_LDoubleU fpsrcop1, fptemp1;
|
|
int expdif;
|
|
- int q;
|
|
-
|
|
- fpsrcop = ST0;
|
|
- fptemp = ST1;
|
|
+ signed long long int q;
|
|
+
|
|
+ if (isinf(ST0) || isnan(ST0) || isnan(ST1) || (ST1 == 0.0)) {
|
|
+ ST0 = 0.0 / 0.0; /* NaN */
|
|
+ env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ fpsrcop = (CPU86_LDouble)ST0;
|
|
+ fptemp = (CPU86_LDouble)ST1;
|
|
fpsrcop1.d = fpsrcop;
|
|
fptemp1.d = fptemp;
|
|
expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
|
|
+
|
|
+ if (expdif < 0) {
|
|
+ /* optimisation? taken from the AMD docs */
|
|
+ env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
|
|
+ /* ST0 is unchanged */
|
|
+ return;
|
|
+ }
|
|
+
|
|
if ( expdif < 53 ) {
|
|
- dblq = fpsrcop / fptemp;
|
|
+ dblq = fpsrcop/*ST0*/ / fptemp/*ST1*/;
|
|
+ /* round dblq towards zero */
|
|
dblq = (dblq < 0.0)? ceil(dblq): floor(dblq);
|
|
- ST0 = fpsrcop - fptemp*dblq;
|
|
- q = (int)dblq; /* cutting off top bits is assumed here */
|
|
+ ST0 = fpsrcop/*ST0*/ - fptemp*dblq;
|
|
+
|
|
+ /* convert dblq to q by truncating towards zero */
|
|
+ if (dblq < 0.0)
|
|
+ q = (signed long long int)(-dblq);
|
|
+ else
|
|
+ q = (signed long long int)dblq;
|
|
+
|
|
env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
|
|
- /* (C0,C1,C3) <-- (q2,q1,q0) */
|
|
- env->fpus |= (q&0x4) << 6; /* (C0) <-- q2 */
|
|
- env->fpus |= (q&0x2) << 8; /* (C1) <-- q1 */
|
|
- env->fpus |= (q&0x1) << 14; /* (C3) <-- q0 */
|
|
+ /* (C0,C3,C1) <-- (q2,q1,q0) */
|
|
+ env->fpus |= (q&0x4) << (8-2); /* (C0) <-- q2 */
|
|
+ env->fpus |= (q&0x2) << (14-1); /* (C3) <-- q1 */
|
|
+ env->fpus |= (q&0x1) << (9-0); /* (C1) <-- q0 */
|
|
} else {
|
|
+ int N = 32 + (expdif % 32); /* as per AMD docs */
|
|
env->fpus |= 0x400; /* C2 <-- 1 */
|
|
- fptemp = pow(2.0, expdif-50);
|
|
+ fptemp = pow(2.0, (double)(expdif-N));
|
|
fpsrcop = (ST0 / ST1) / fptemp;
|
|
/* fpsrcop = integer obtained by chopping */
|
|
fpsrcop = (fpsrcop < 0.0)?
|