riscv/lazyfpu: Add option to disable lazy FPU

Adds option to use the old implementation where FPU is stored into
the process stack.
This commit is contained in:
Ville Juven 2023-06-07 14:02:48 +03:00 committed by Xiang Xiao
parent 83105cfa49
commit d0fbf9883d
5 changed files with 86 additions and 15 deletions

View file

@ -93,6 +93,7 @@ config ARCH_RISCV
select ARCH_HAVE_RDWR_MEM_CPU_RUN
select ARCH_HAVE_TCBINFO
select ARCH_HAVE_THREAD_LOCAL
select ARCH_HAVE_LAZYFPU if ARCH_HAVE_FPU
---help---
RISC-V 32 and 64-bit RV32 / RV64 architectures.
@ -416,6 +417,11 @@ config ARCH_HAVE_DPFPU
default n
select ARCH_HAVE_FPU
config ARCH_HAVE_LAZYFPU
bool
default n
depends on ARCH_HAVE_FPU
config ARCH_HAVE_MMU
bool
default n
@ -516,6 +522,41 @@ config ARCH_DPFPU
Enable toolchain support for double precision (64-bit) floating
point if both the toolchain and the hardware support it.
config ARCH_LAZYFPU
bool "Enable lazy FPU state save / restore"
default n
depends on ARCH_FPU && ARCH_HAVE_LAZYFPU
---help---
Enable lazy FPU state save and restore. Normally FPU state is saved
and restored with the integer context registers, if the task is using
FPU. The state is typically saved into the task's user stack upon
exception entry or context switch out, and restored when the
exception returns or context switches back in.
As the kernel does not use FPU, this can be optimized with the help
of the FPU hardware status and a bit of code logic inside the kernel.
The logic keeps track of the FPU state, which can be "unused",
"dirty" or "clean". A clean state means the FPU has not been used
since the last state save, while the dirty state indicates that the
FPU has been used.
The optimization saves / restores FPU registers only if:
- A context change has happened, save and restore does not happen
during exception entry / return to the same task
- FPU is in use (state is not unused) and
- FPU status is dirty, i.e. FPU has been used after the last
- FPU restore happens when status is in dirty or clean
This saves CPU time as the FPU registers do not have to be moved in
and out when handling an exception that does not result in a context
switch.
The tradeoff with the lazy FPU feature is that it requires a static
memory allocation from the task's TCB to store the FPU registers,
while the non-lazy style can use stack memory for storing the FPU
registers, saving memory as the stack frame for the FPU registers can
be skipped if the FPU is not in use.
config ARCH_USE_MMU
bool "Enable MMU"
default n

View file

@ -247,9 +247,15 @@
#define XCPTCONTEXT_REGS (INT_XCPT_REGS + FPU_XCPT_REGS)
#ifdef CONFIG_ARCH_LAZYFPU
/* Save only integer regs. FPU is handled separately */
#define XCPTCONTEXT_SIZE (INT_XCPT_SIZE)
#else
/* Save FPU registers with the integer registers */
#define XCPTCONTEXT_SIZE (INT_XCPT_SIZE + FPU_XCPT_SIZE)
#endif
/* In assembly language, values have to be referenced as byte address
* offsets. But in C, it is more convenient to reference registers as
@ -570,7 +576,7 @@ struct xcptcontext
/* FPU register save area */
#ifdef CONFIG_ARCH_FPU
#if defined(CONFIG_ARCH_FPU) && defined(CONFIG_ARCH_LAZYFPU)
uintptr_t fregs[FPU_XCPT_REGS];
#endif
};

View file

@ -109,6 +109,9 @@ pid_t riscv_fork(const struct fork_s *context)
uintptr_t newtop;
uintptr_t stacktop;
uintptr_t stackutil;
#ifdef CONFIG_ARCH_FPU
uintptr_t *fregs;
#endif
sinfo("s0:%" PRIxREG " s1:%" PRIxREG " s2:%" PRIxREG " s3:%" PRIxREG ""
" s4:%" PRIxREG "\n",
@ -228,18 +231,19 @@ pid_t riscv_fork(const struct fork_s *context)
child->cmn.xcp.regs[REG_GP] = newsp; /* Global pointer */
#endif
#ifdef CONFIG_ARCH_FPU
child->cmn.xcp.fregs[REG_FS0] = context->fs0; /* Saved register fs1 */
child->cmn.xcp.fregs[REG_FS1] = context->fs1; /* Saved register fs1 */
child->cmn.xcp.fregs[REG_FS2] = context->fs2; /* Saved register fs2 */
child->cmn.xcp.fregs[REG_FS3] = context->fs3; /* Saved register fs3 */
child->cmn.xcp.fregs[REG_FS4] = context->fs4; /* Saved register fs4 */
child->cmn.xcp.fregs[REG_FS5] = context->fs5; /* Saved register fs5 */
child->cmn.xcp.fregs[REG_FS6] = context->fs6; /* Saved register fs6 */
child->cmn.xcp.fregs[REG_FS7] = context->fs7; /* Saved register fs7 */
child->cmn.xcp.fregs[REG_FS8] = context->fs8; /* Saved register fs8 */
child->cmn.xcp.fregs[REG_FS9] = context->fs9; /* Saved register fs9 */
child->cmn.xcp.fregs[REG_FS10] = context->fs10; /* Saved register fs10 */
child->cmn.xcp.fregs[REG_FS11] = context->fs11; /* Saved register fs11 */
fregs = riscv_fpuregs(&child->cmn);
fregs[REG_FS0] = context->fs0; /* Saved register fs1 */
fregs[REG_FS1] = context->fs1; /* Saved register fs1 */
fregs[REG_FS2] = context->fs2; /* Saved register fs2 */
fregs[REG_FS3] = context->fs3; /* Saved register fs3 */
fregs[REG_FS4] = context->fs4; /* Saved register fs4 */
fregs[REG_FS5] = context->fs5; /* Saved register fs5 */
fregs[REG_FS6] = context->fs6; /* Saved register fs6 */
fregs[REG_FS7] = context->fs7; /* Saved register fs7 */
fregs[REG_FS8] = context->fs8; /* Saved register fs8 */
fregs[REG_FS9] = context->fs9; /* Saved register fs9 */
fregs[REG_FS10] = context->fs10; /* Saved register fs10 */
fregs[REG_FS11] = context->fs11; /* Saved register fs11 */
#endif
#ifdef CONFIG_LIB_SYSCALL

View file

@ -102,7 +102,11 @@ riscv_savefpu:
li t1, MSTATUS_FS
and t2, t0, t1
li t1, MSTATUS_FS_DIRTY
#ifdef CONFIG_ARCH_LAZYFPU
bne t2, t1, 1f
#else
blt t2, t1, 1f
#endif
li t1, ~MSTATUS_FS
and t0, t0, t1
li t1, MSTATUS_FS_CLEAN

View file

@ -206,10 +206,26 @@ void riscv_exception_attach(void);
void riscv_fpuconfig(void);
void riscv_savefpu(uintptr_t *regs, uintptr_t *fregs);
void riscv_restorefpu(uintptr_t *regs, uintptr_t *fregs);
/* Get FPU register save area */
static inline uintptr_t *riscv_fpuregs(struct tcb_s *tcb)
{
#ifdef CONFIG_ARCH_LAZYFPU
/* With lazy FPU the registers are simply in tcb */
return tcb->xcp.fregs;
#else
/* Otherwise they are after the integer registers */
return (uintptr_t *)((uintptr_t)tcb->xcp.regs + INT_XCPT_SIZE);
#endif
}
#else
# define riscv_fpuconfig()
# define riscv_savefpu(regs, fregs)
# define riscv_restorefpu(regs, fregs)
# define riscv_fpuregs(tcb)
#endif
/* Save / restore context of task */
@ -221,7 +237,7 @@ static inline void riscv_savecontext(struct tcb_s *tcb)
#ifdef CONFIG_ARCH_FPU
/* Save current process FPU state to TCB */
riscv_savefpu(tcb->xcp.regs, tcb->xcp.fregs);
riscv_savefpu(tcb->xcp.regs, riscv_fpuregs(tcb));
#endif
}
@ -232,7 +248,7 @@ static inline void riscv_restorecontext(struct tcb_s *tcb)
#ifdef CONFIG_ARCH_FPU
/* Restore FPU state for next process */
riscv_restorefpu(tcb->xcp.regs, tcb->xcp.fregs);
riscv_restorefpu(tcb->xcp.regs, riscv_fpuregs(tcb));
#endif
}