Skip to content

Commit

Permalink
interpreter: add host-fp support
Browse files Browse the repository at this point in the history
* It provides better performance than softfloat. But exception and NaN
  handling is slow, so we disable it in host-fp.
* FIXME: porvay in SPEC 2006 gets wrong answer
  • Loading branch information
sashimi-yzh committed Mar 31, 2021
1 parent dc35364 commit 795dd3c
Show file tree
Hide file tree
Showing 7 changed files with 244 additions and 49 deletions.
12 changes: 12 additions & 0 deletions Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,18 @@ endmenu
source "src/memory/Kconfig"
source "src/device/Kconfig"

menuconfig FPU
bool "Enable FPU"
default y
help
Enable floating point support.

if FPU
config FPU_SOFTFLOAT
bool "Use softfloat library"
default n
endif # FPU

menu "Miscellaneous"
choice
prompt "Host timer"
Expand Down
4 changes: 3 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ else
SHARE = 1
endif

ifeq ($(ENGINE),interpreter)
ifdef CONFIG_FPU_SOFTFLOAT
SOFTFLOAT = resource/softfloat/build/softfloat.a
ifeq ($(ISA),riscv64)
SPECIALIZE_TYPE = RISCV
Expand All @@ -66,6 +66,8 @@ $(SOFTFLOAT):
SPECIALIZE_TYPE=$(SPECIALIZE_TYPE) $(MAKE) -s -C resource/softfloat/

.PHONY: $(SOFTFLOAT)
else ifdef CONFIG_FPU
LDFLAGS += -lm
endif

include $(NEMU_HOME)/scripts/git.mk
Expand Down
16 changes: 16 additions & 0 deletions include/rtl/fp.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,22 @@ enum {
FPCALL_W80,
};

enum {
FPCALL_RM_RNE, // round to nearest, ties to even
FPCALL_RM_RTZ, // round towards zero
FPCALL_RM_RDN, // round down (towards -INF)
FPCALL_RM_RUP, // round up (towards +INF)
FPCALL_RM_RMM, // round to nearest, ties to max magnitude
};

enum {
FPCALL_EX_NX = 0x01, // inexact
FPCALL_EX_UF = 0x02, // underflow
FPCALL_EX_OF = 0x04, // overflow
FPCALL_EX_DZ = 0x08, // divide by zero
FPCALL_EX_NV = 0x10, // invalid operation
};

enum {
FPCALL_ADD,
FPCALL_SUB,
Expand Down
61 changes: 17 additions & 44 deletions src/engine/interpreter/fp.c
Original file line number Diff line number Diff line change
@@ -1,15 +1,11 @@
#include <rtl/rtl.h>
#include <softfloat.h>
#include <specialize.h>
#include <internals.h>
#include MUXDEF(CONFIG_FPU_SOFTFLOAT, "softfloat-fp.h", "host-fp.h")

#define BOX_MASK 0xFFFFFFFF00000000
#define F32_SIGN ((uint64_t)1ul << 31)
#define F64_SIGN ((uint64_t)1ul << 63)

static inline rtlreg_t unbox(rtlreg_t r) {
if ((r & BOX_MASK) == BOX_MASK) return r & ~BOX_MASK;
else return defaultNaNF32UI;
return MUXDEF(CONFIG_FPU_SOFTFLOAT, (r & BOX_MASK) == BOX_MASK, true)
? (r & ~BOX_MASK) : defaultNaNF32UI;
}

static inline float32_t rtlToF32(rtlreg_t r) {
Expand All @@ -22,35 +18,11 @@ static inline float64_t rtlToF64(rtlreg_t r) {
return f;
}

static inline float32_t f32_min(float32_t a, float32_t b){
bool less = f32_lt_quiet(a, b) || (f32_eq(a, b) && (a.v & F32_SIGN));
if(isNaNF32UI(a.v) && isNaNF32UI(b.v)) return rtlToF32(defaultNaNF32UI);
else return(less || isNaNF32UI(b.v) ? a : b);
}

static inline float32_t f32_max(float32_t a, float32_t b){
bool greater = f32_lt_quiet(b, a) || (f32_eq(b, a) && (b.v & F32_SIGN));
if(isNaNF32UI(a.v) && isNaNF32UI(b.v)) return rtlToF32(defaultNaNF32UI);
else return(greater || isNaNF32UI(b.v) ? a : b);
}

static inline float64_t f64_min(float64_t a, float64_t b){
bool less = f64_lt_quiet(a, b) || (f64_eq(a, b) && (a.v & F64_SIGN));
if(isNaNF64UI(a.v) && isNaNF64UI(b.v)) return rtlToF64(defaultNaNF64UI);
else return(less || isNaNF64UI(b.v) ? a : b);
}

static inline float64_t f64_max(float64_t a, float64_t b){
bool greater = f64_lt_quiet(b, a) || (f64_eq(b, a) && (b.v & F64_SIGN));
if(isNaNF64UI(a.v) && isNaNF64UI(b.v)) return rtlToF64(defaultNaNF64UI);
else return(greater || isNaNF64UI(b.v) ? a : b);
}

uint32_t isa_fp_get_rm(Decode *s);
void isa_fp_update_ex_flags(Decode *s, uint32_t ex_flags);
void isa_fp_set_ex(uint32_t ex);

def_rtl(fpcall, rtlreg_t *dest, const rtlreg_t *src1, const rtlreg_t *src2, uint32_t cmd) {
softfloat_roundingMode = isa_fp_get_rm(s);
IFDEF(CONFIG_FPU_SOFTFLOAT, fp_set_rm(isa_fp_get_rm(s)));
int w = FPCALL_W(cmd);
int op = FPCALL_OP(cmd);

Expand Down Expand Up @@ -78,10 +50,10 @@ def_rtl(fpcall, rtlreg_t *dest, const rtlreg_t *src1, const rtlreg_t *src2, uint
case FPCALL_I64ToF: *dest = i64_to_f32 (*src1).v; break;
case FPCALL_U64ToF: *dest = ui64_to_f32(*src1).v; break;

case FPCALL_FToI32: *dest = f32_to_i32 (fsrc1, softfloat_roundingMode, true); break;
case FPCALL_FToU32: *dest = f32_to_ui32(fsrc1, softfloat_roundingMode, true); break;
case FPCALL_FToI64: *dest = f32_to_i64 (fsrc1, softfloat_roundingMode, true); break;
case FPCALL_FToU64: *dest = f32_to_ui64(fsrc1, softfloat_roundingMode, true); break;
case FPCALL_FToI32: *dest = my_f32_to_i32 (fsrc1); break;
case FPCALL_FToU32: *dest = my_f32_to_ui32(fsrc1); break;
case FPCALL_FToI64: *dest = my_f32_to_i64 (fsrc1); break;
case FPCALL_FToU64: *dest = my_f32_to_ui64(fsrc1); break;
default: panic("op = %d not supported", op);
}
} else if (w == FPCALL_W64) {
Expand All @@ -108,19 +80,20 @@ def_rtl(fpcall, rtlreg_t *dest, const rtlreg_t *src1, const rtlreg_t *src2, uint
case FPCALL_I64ToF: *dest = i64_to_f64 (*src1).v; break;
case FPCALL_U64ToF: *dest = ui64_to_f64(*src1).v; break;

case FPCALL_FToI32: *dest = f64_to_i32 (fsrc1, softfloat_roundingMode, true); break;
case FPCALL_FToU32: *dest = f64_to_ui32(fsrc1, softfloat_roundingMode, true); break;
case FPCALL_FToI64: *dest = f64_to_i64 (fsrc1, softfloat_roundingMode, true); break;
case FPCALL_FToU64: *dest = f64_to_ui64(fsrc1, softfloat_roundingMode, true); break;
case FPCALL_FToI32: *dest = my_f64_to_i32 (fsrc1); break;
case FPCALL_FToU32: *dest = my_f64_to_ui32(fsrc1); break;
case FPCALL_FToI64: *dest = my_f64_to_i64 (fsrc1); break;
case FPCALL_FToU64: *dest = my_f64_to_ui64(fsrc1); break;

case FPCALL_F32ToF64: *dest = f32_to_f64(rtlToF32(*src1)).v; break;
case FPCALL_F64ToF32: *dest = f64_to_f32(fsrc1).v; break;
default: panic("op = %d not supported", op);
}
}

if (softfloat_exceptionFlags) {
isa_fp_update_ex_flags(s, softfloat_exceptionFlags);
softfloat_exceptionFlags = 0;
uint32_t ex = MUXDEF(CONFIG_FPU_SOFTFLOAT, fp_get_exception(), 0);
if (ex) {
isa_fp_set_ex(ex);
fp_clear_exception();
}
}
88 changes: 88 additions & 0 deletions src/engine/interpreter/host-fp.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
#ifndef __HOSTFP_H__
#include <rtl/fp.h>
#include <math.h>
#include <fenv.h>

#define defaultNaNF32UI 0x7FC00000

typedef union { uint32_t v; float f; } float32_t;
static inline float32_t float32(float f) { float32_t r = { .f = f }; return r; }

static inline float32_t f32_add(float32_t a, float32_t b) { return float32(a.f + b.f); }
static inline float32_t f32_sub(float32_t a, float32_t b) { return float32(a.f - b.f); }
static inline float32_t f32_mul(float32_t a, float32_t b) { return float32(a.f * b.f); }
static inline float32_t f32_div(float32_t a, float32_t b) { return float32(a.f / b.f); }
static inline float32_t f32_sqrt(float32_t a) { return float32(sqrtf(a.f)); }
static inline float32_t f32_mulAdd(float32_t a, float32_t b,
float32_t c) { return float32(fmaf(a.f, b.f, c.f)); }
static inline float32_t f32_min(float32_t a, float32_t b) { return float32(fminf(a.f, b.f)); }
static inline float32_t f32_max(float32_t a, float32_t b) { return float32(fmaxf(a.f, b.f)); }
static inline bool f32_le(float32_t a, float32_t b) { return a.f <= b.f; }
static inline bool f32_lt(float32_t a, float32_t b) { return a.f < b.f; }
static inline bool f32_eq(float32_t a, float32_t b) { return a.f == b.f; }
static inline float32_t i32_to_f32 (rtlreg_t a) { return float32((int32_t)a); }
static inline float32_t ui32_to_f32(rtlreg_t a) { return float32((uint32_t)a); }
static inline float32_t i64_to_f32 (rtlreg_t a) { return float32((int64_t)a); }
static inline float32_t ui64_to_f32(rtlreg_t a) { return float32((uint64_t)a); }
static inline int32_t my_f32_to_i32 (float32_t a) { return (int32_t)a.f; }
static inline uint32_t my_f32_to_ui32(float32_t a) { return (uint32_t)a.f; }
static inline int64_t my_f32_to_i64 (float32_t a) { return (int64_t)a.f; }
static inline uint64_t my_f32_to_ui64(float32_t a) { return (uint64_t)a.f; }


typedef union { uint64_t v; double f; } float64_t;
static inline float64_t float64(double f) { float64_t r = { .f = f }; return r; }

static inline float64_t f64_add(float64_t a, float64_t b) { return float64(a.f + b.f); }
static inline float64_t f64_sub(float64_t a, float64_t b) { return float64(a.f - b.f); }
static inline float64_t f64_mul(float64_t a, float64_t b) { return float64(a.f * b.f); }
static inline float64_t f64_div(float64_t a, float64_t b) { return float64(a.f / b.f); }
static inline float64_t f64_sqrt(float64_t a) { return float64(sqrt(a.f)); }
static inline float64_t f64_mulAdd(float64_t a, float64_t b,
float64_t c) { return float64(fma(a.f, b.f, c.f)); }
static inline float64_t f64_min(float64_t a, float64_t b) { return float64(fmin(a.f, b.f)); }
static inline float64_t f64_max(float64_t a, float64_t b) { return float64(fmax(a.f, b.f)); }
static inline bool f64_le(float64_t a, float64_t b) { return a.f <= b.f; }
static inline bool f64_lt(float64_t a, float64_t b) { return a.f < b.f; }
static inline bool f64_eq(float64_t a, float64_t b) { return a.f == b.f; }
static inline float64_t i32_to_f64 (rtlreg_t a) { return float64((int32_t)a); }
static inline float64_t ui32_to_f64(rtlreg_t a) { return float64((uint32_t)a); }
static inline float64_t i64_to_f64 (rtlreg_t a) { return float64((int64_t)a); }
static inline float64_t ui64_to_f64(rtlreg_t a) { return float64((uint64_t)a); }
static inline int32_t my_f64_to_i32 (float64_t a) { return (int32_t)a.f; }
static inline uint32_t my_f64_to_ui32(float64_t a) { return (uint32_t)a.f; }
static inline int64_t my_f64_to_i64 (float64_t a) { return (int64_t)a.f; }
static inline uint64_t my_f64_to_ui64(float64_t a) { return (uint64_t)a.f; }

static inline float64_t f32_to_f64(float32_t a) { return float64(a.f); }
static inline float32_t f64_to_f32(float64_t a) { return float32(a.f); }


static inline void fp_set_rm(int rm) {
switch (rm) {
case FPCALL_RM_RNE: rm = FE_TONEAREST; break;
case FPCALL_RM_RTZ: rm = FE_TOWARDZERO; break;
case FPCALL_RM_RDN: rm = FE_DOWNWARD; break;
case FPCALL_RM_RUP: rm = FE_UPWARD; break;
case FPCALL_RM_RMM: rm = FE_TONEAREST; break; // x86 does not support RMM
default: assert(0);
}
fesetround(rm);
}

static inline uint32_t fp_get_exception() {
uint32_t ex = 0;
uint32_t host_ex = fetestexcept(FE_ALL_EXCEPT);
if (host_ex & FE_INEXACT ) ex |= FPCALL_EX_NX;
if (host_ex & FE_UNDERFLOW) ex |= FPCALL_EX_UF;
if (host_ex & FE_OVERFLOW ) ex |= FPCALL_EX_OF;
if (host_ex & FE_DIVBYZERO) ex |= FPCALL_EX_DZ;
if (host_ex & FE_INVALID ) ex |= FPCALL_EX_NV;
return ex;
}

static inline void fp_clear_exception() {
feclearexcept(FE_ALL_EXCEPT);
}

#endif
88 changes: 88 additions & 0 deletions src/engine/interpreter/softfloat-fp.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
#ifndef __SOFTFLOAT_FP_H__
#define __SOFTFLOAT_FP_H__

#include <softfloat.h>
#include <specialize.h>
#include <internals.h>

#define F32_SIGN ((uint64_t)1ul << 31)
#define F64_SIGN ((uint64_t)1ul << 63)

static inline float32_t rtlToF32(rtlreg_t r);
static inline float64_t rtlToF64(rtlreg_t r);

static inline float32_t f32_min(float32_t a, float32_t b){
bool less = f32_lt_quiet(a, b) || (f32_eq(a, b) && (a.v & F32_SIGN));
if(isNaNF32UI(a.v) && isNaNF32UI(b.v)) return rtlToF32(defaultNaNF32UI);
else return(less || isNaNF32UI(b.v) ? a : b);
}

static inline float32_t f32_max(float32_t a, float32_t b){
bool greater = f32_lt_quiet(b, a) || (f32_eq(b, a) && (b.v & F32_SIGN));
if(isNaNF32UI(a.v) && isNaNF32UI(b.v)) return rtlToF32(defaultNaNF32UI);
else return(greater || isNaNF32UI(b.v) ? a : b);
}

static inline float64_t f64_min(float64_t a, float64_t b){
bool less = f64_lt_quiet(a, b) || (f64_eq(a, b) && (a.v & F64_SIGN));
if(isNaNF64UI(a.v) && isNaNF64UI(b.v)) return rtlToF64(defaultNaNF64UI);
else return(less || isNaNF64UI(b.v) ? a : b);
}

static inline float64_t f64_max(float64_t a, float64_t b){
bool greater = f64_lt_quiet(b, a) || (f64_eq(b, a) && (b.v & F64_SIGN));
if(isNaNF64UI(a.v) && isNaNF64UI(b.v)) return rtlToF64(defaultNaNF64UI);
else return(greater || isNaNF64UI(b.v) ? a : b);
}

static inline int32_t my_f32_to_i32 (float32_t a) {
return f32_to_i32 (a, softfloat_roundingMode, true);
}
static inline uint32_t my_f32_to_ui32(float32_t a) {
return f32_to_ui32(a, softfloat_roundingMode, true);
}
static inline int64_t my_f32_to_i64 (float32_t a) {
return f32_to_i64 (a, softfloat_roundingMode, true);
}
static inline uint64_t my_f32_to_ui64(float32_t a) {
return f32_to_ui64(a, softfloat_roundingMode, true);
}
static inline int32_t my_f64_to_i32 (float64_t a) {
return f64_to_i32 (a, softfloat_roundingMode, true);
}
static inline uint32_t my_f64_to_ui32(float64_t a) {
return f64_to_ui32(a, softfloat_roundingMode, true);
}
static inline int64_t my_f64_to_i64 (float64_t a) {
return f64_to_i64 (a, softfloat_roundingMode, true);
}
static inline uint64_t my_f64_to_ui64(float64_t a) {
return f64_to_ui64(a, softfloat_roundingMode, true);
}

static inline void fp_set_rm(int rm) {
switch (rm) {
case FPCALL_RM_RNE: softfloat_roundingMode = softfloat_round_near_even; break;
case FPCALL_RM_RTZ: softfloat_roundingMode = softfloat_round_minMag; break;
case FPCALL_RM_RDN: softfloat_roundingMode = softfloat_round_min; break;
case FPCALL_RM_RUP: softfloat_roundingMode = softfloat_round_max; break;
case FPCALL_RM_RMM: softfloat_roundingMode = softfloat_round_near_maxMag; break;
default: assert(0);
}
}

static inline uint32_t fp_get_exception() {
uint32_t ex = 0;
uint32_t softfp_ex = softfloat_exceptionFlags;
if (softfp_ex & softfloat_flag_inexact ) ex |= FPCALL_EX_NX;
if (softfp_ex & softfloat_flag_underflow) ex |= FPCALL_EX_UF;
if (softfp_ex & softfloat_flag_overflow ) ex |= FPCALL_EX_OF;
if (softfp_ex & softfloat_flag_infinite ) ex |= FPCALL_EX_DZ;
if (softfp_ex & softfloat_flag_invalid ) ex |= FPCALL_EX_NV;
return ex;
}

static inline void fp_clear_exception() {
softfloat_exceptionFlags = 0;
}
#endif
24 changes: 20 additions & 4 deletions src/isa/riscv64/instr/fp.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "../local-include/csr.h"
#include "../local-include/intr.h"
#include <rtl/fp.h>
#include <cpu/cpu.h>

bool fp_enable() {
Expand All @@ -12,16 +13,31 @@ void fp_set_dirty() {
}

uint32_t isa_fp_get_rm(Decode *s) {
int rm = s->isa.instr.fp.rm;
uint32_t rm = s->isa.instr.fp.rm;
if (rm == 7) { rm = fcsr->frm; }
if (rm <= 4) { return rm; }
if (rm <= 4) {
switch (rm) {
case 0: return FPCALL_RM_RNE;
case 1: return FPCALL_RM_RTZ;
case 2: return FPCALL_RM_RDN;
case 3: return FPCALL_RM_RUP;
case 4: return FPCALL_RM_RMM;
default: assert(0);
}
}
else {
save_globals(s);
longjmp_exception(EX_II);
}
}

void isa_fp_update_ex_flags(Decode *s, uint32_t ex_flags) {
fcsr->fflags.val |= ex_flags;
void isa_fp_set_ex(uint32_t ex) {
uint32_t f = 0;
if (ex & FPCALL_EX_NX) f |= 0x01;
if (ex & FPCALL_EX_UF) f |= 0x02;
if (ex & FPCALL_EX_OF) f |= 0x04;
if (ex & FPCALL_EX_DZ) f |= 0x08;
if (ex & FPCALL_EX_NV) f |= 0x10;
fcsr->fflags.val = f;
fp_set_dirty();
}

0 comments on commit 795dd3c

Please sign in to comment.