Disable FTZ in SSE when FCR31 FS bit is set

Remove -ffast-math, makes us IEEE compliant
This commit is contained in:
Logan McNaughton 2020-03-07 07:39:39 -07:00 committed by Dorian Fevrier
parent 1a495aefd9
commit b7b56fea51
5 changed files with 46 additions and 1 deletions

View file

@ -143,7 +143,7 @@ endif
# base CFLAGS, LDLIBS, and LDFLAGS
OPTFLAGS ?= -O3 -flto
WARNFLAGS ?= -Wall
CFLAGS += -ffast-math -fno-strict-aliasing -fvisibility=hidden -I../../src -I../../src/asm_defines -DM64P_PARALLEL
CFLAGS += -fno-strict-aliasing -fvisibility=hidden -I../../src -I../../src/asm_defines -DM64P_PARALLEL
CXXFLAGS += -fvisibility-inlines-hidden
LDLIBS += -lm

View file

@ -27,6 +27,8 @@
#include "new_dynarec/new_dynarec.h"
#define FCR31_FS_BIT UINT32_C(0x1000000)
void init_cp1(struct cp1* cp1, struct new_dynarec_hot_state* new_dynarec_hot_state)
{
#ifdef NEW_DYNAREC
@ -41,6 +43,9 @@ void poweron_cp1(struct cp1* cp1)
*r4300_cp1_fcr31(cp1) = 0;
set_fpr_pointers(cp1, UINT32_C(0x34000000)); /* c0_status value at poweron */
#ifdef OSAL_SSE
cp1->flush_mode = _MM_GET_FLUSH_ZERO_MODE();
#endif
update_x86_rounding_mode(cp1);
}
@ -120,6 +125,20 @@ void update_x86_rounding_mode(struct cp1* cp1)
{
uint32_t fcr31 = *r4300_cp1_fcr31(cp1);
#ifdef OSAL_SSE
uint32_t flush_mode;
if (fcr31 & 2)
flush_mode = (fcr31 & FCR31_FS_BIT) ? _MM_FLUSH_ZERO_OFF : _MM_FLUSH_ZERO_ON;
else
flush_mode = _MM_FLUSH_ZERO_ON;
if (flush_mode != cp1->flush_mode)
{
_MM_SET_FLUSH_ZERO_MODE(flush_mode);
cp1->flush_mode = flush_mode;
}
#endif
switch (fcr31 & 3)
{
case 0: /* Round to nearest, or to even if equidistant */

View file

@ -51,6 +51,10 @@ struct cp1
* using 32-bit stores. */
uint32_t rounding_mode;
#ifdef OSAL_SSE
uint32_t flush_mode;
#endif
#ifdef NEW_DYNAREC
/* New dynarec uses a different memory layout */
struct new_dynarec_hot_state* new_dynarec_hot_state;

View file

@ -121,6 +121,13 @@ void poweron_r4300(struct r4300_core* r4300)
void run_r4300(struct r4300_core* r4300)
{
#ifdef OSAL_SSE
//Save FTZ/DAZ mode
unsigned int daz = _MM_GET_DENORMALS_ZERO_MODE();
unsigned int ftz = _MM_GET_FLUSH_ZERO_MODE();
_MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_OFF);
#endif
*r4300_stop(r4300) = 0;
g_rom_pause = 0;
@ -196,6 +203,11 @@ void run_r4300(struct r4300_core* r4300)
if (r4300->emumode == EMUMODE_DYNAREC)
instr_counters_print();
#endif
#ifdef OSAL_SSE
//Restore FTZ/DAZ mode
_MM_SET_DENORMALS_ZERO_MODE(daz);
_MM_SET_FLUSH_ZERO_MODE(ftz);
#endif
}
int64_t* r4300_regs(struct r4300_core* r4300)

View file

@ -42,6 +42,11 @@
/* for isnan() */
#include <float.h>
#if defined(_M_X64) || (_M_IX86_FP > 0)
#include <immintrin.h>
#define OSAL_SSE
#endif
#else /* Not WIN32 */
/* for strcasecmp */
#include <strings.h>
@ -58,6 +63,11 @@
/* string functions */
#define osal_insensitive_strcmp(x, y) strcasecmp(x, y)
#ifdef __SSE__
#include <immintrin.h>
#define OSAL_SSE
#endif
#endif
/* sign-extension macros */