riscv/lazyfpu: Add option to disable lazy FPU
Adds option to use the old implementation where FPU is stored into the process stack.
This commit is contained in:
parent
83105cfa49
commit
d0fbf9883d
5 changed files with 86 additions and 15 deletions
41
arch/Kconfig
41
arch/Kconfig
|
@ -93,6 +93,7 @@ config ARCH_RISCV
|
|||
select ARCH_HAVE_RDWR_MEM_CPU_RUN
|
||||
select ARCH_HAVE_TCBINFO
|
||||
select ARCH_HAVE_THREAD_LOCAL
|
||||
select ARCH_HAVE_LAZYFPU if ARCH_HAVE_FPU
|
||||
---help---
|
||||
RISC-V 32 and 64-bit RV32 / RV64 architectures.
|
||||
|
||||
|
@ -416,6 +417,11 @@ config ARCH_HAVE_DPFPU
|
|||
default n
|
||||
select ARCH_HAVE_FPU
|
||||
|
||||
config ARCH_HAVE_LAZYFPU
|
||||
bool
|
||||
default n
|
||||
depends on ARCH_HAVE_FPU
|
||||
|
||||
config ARCH_HAVE_MMU
|
||||
bool
|
||||
default n
|
||||
|
@ -516,6 +522,41 @@ config ARCH_DPFPU
|
|||
Enable toolchain support for double precision (64-bit) floating
|
||||
point if both the toolchain and the hardware support it.
|
||||
|
||||
config ARCH_LAZYFPU
|
||||
bool "Enable lazy FPU state save / restore"
|
||||
default n
|
||||
depends on ARCH_FPU && ARCH_HAVE_LAZYFPU
|
||||
---help---
|
||||
Enable lazy FPU state save and restore. Normally FPU state is saved
|
||||
and restored with the integer context registers, if the task is using
|
||||
FPU. The state is typically saved into the task's user stack upon
|
||||
exception entry or context switch out, and restored when the
|
||||
exception returns or context switches back in.
|
||||
|
||||
As the kernel does not use FPU, this can be optimized with the help
|
||||
of the FPU hardware status and a bit of code logic inside the kernel.
|
||||
The logic keeps track of the FPU state, which can be "unused",
|
||||
"dirty" or "clean". A clean state means the FPU has not been used
|
||||
since the last state save, while the dirty state indicates that the
|
||||
FPU has been used.
|
||||
|
||||
The optimization saves / restores FPU registers only if:
|
||||
- A context change has happened, save and restore does not happen
|
||||
during exception entry / return to the same task
|
||||
- FPU is in use (state is not unused) and
|
||||
- FPU status is dirty, i.e. FPU has been used after the last
|
||||
- FPU restore happens when status is in dirty or clean
|
||||
|
||||
This saves CPU time as the FPU registers do not have to be moved in
|
||||
and out when handling an exception that does not result in a context
|
||||
switch.
|
||||
|
||||
The tradeoff with the lazy FPU feature is that it requires a static
|
||||
memory allocation from the task's TCB to store the FPU registers,
|
||||
while the non-lazy style can use stack memory for storing the FPU
|
||||
registers, saving memory as the stack frame for the FPU registers can
|
||||
be skipped if the FPU is not in use.
|
||||
|
||||
config ARCH_USE_MMU
|
||||
bool "Enable MMU"
|
||||
default n
|
||||
|
|
|
@ -247,9 +247,15 @@
|
|||
|
||||
#define XCPTCONTEXT_REGS (INT_XCPT_REGS + FPU_XCPT_REGS)
|
||||
|
||||
#ifdef CONFIG_ARCH_LAZYFPU
|
||||
/* Save only integer regs. FPU is handled separately */
|
||||
|
||||
#define XCPTCONTEXT_SIZE (INT_XCPT_SIZE)
|
||||
#else
|
||||
/* Save FPU registers with the integer registers */
|
||||
|
||||
#define XCPTCONTEXT_SIZE (INT_XCPT_SIZE + FPU_XCPT_SIZE)
|
||||
#endif
|
||||
|
||||
/* In assembly language, values have to be referenced as byte address
|
||||
* offsets. But in C, it is more convenient to reference registers as
|
||||
|
@ -570,7 +576,7 @@ struct xcptcontext
|
|||
|
||||
/* FPU register save area */
|
||||
|
||||
#ifdef CONFIG_ARCH_FPU
|
||||
#if defined(CONFIG_ARCH_FPU) && defined(CONFIG_ARCH_LAZYFPU)
|
||||
uintptr_t fregs[FPU_XCPT_REGS];
|
||||
#endif
|
||||
};
|
||||
|
|
|
@ -109,6 +109,9 @@ pid_t riscv_fork(const struct fork_s *context)
|
|||
uintptr_t newtop;
|
||||
uintptr_t stacktop;
|
||||
uintptr_t stackutil;
|
||||
#ifdef CONFIG_ARCH_FPU
|
||||
uintptr_t *fregs;
|
||||
#endif
|
||||
|
||||
sinfo("s0:%" PRIxREG " s1:%" PRIxREG " s2:%" PRIxREG " s3:%" PRIxREG ""
|
||||
" s4:%" PRIxREG "\n",
|
||||
|
@ -228,18 +231,19 @@ pid_t riscv_fork(const struct fork_s *context)
|
|||
child->cmn.xcp.regs[REG_GP] = newsp; /* Global pointer */
|
||||
#endif
|
||||
#ifdef CONFIG_ARCH_FPU
|
||||
child->cmn.xcp.fregs[REG_FS0] = context->fs0; /* Saved register fs1 */
|
||||
child->cmn.xcp.fregs[REG_FS1] = context->fs1; /* Saved register fs1 */
|
||||
child->cmn.xcp.fregs[REG_FS2] = context->fs2; /* Saved register fs2 */
|
||||
child->cmn.xcp.fregs[REG_FS3] = context->fs3; /* Saved register fs3 */
|
||||
child->cmn.xcp.fregs[REG_FS4] = context->fs4; /* Saved register fs4 */
|
||||
child->cmn.xcp.fregs[REG_FS5] = context->fs5; /* Saved register fs5 */
|
||||
child->cmn.xcp.fregs[REG_FS6] = context->fs6; /* Saved register fs6 */
|
||||
child->cmn.xcp.fregs[REG_FS7] = context->fs7; /* Saved register fs7 */
|
||||
child->cmn.xcp.fregs[REG_FS8] = context->fs8; /* Saved register fs8 */
|
||||
child->cmn.xcp.fregs[REG_FS9] = context->fs9; /* Saved register fs9 */
|
||||
child->cmn.xcp.fregs[REG_FS10] = context->fs10; /* Saved register fs10 */
|
||||
child->cmn.xcp.fregs[REG_FS11] = context->fs11; /* Saved register fs11 */
|
||||
fregs = riscv_fpuregs(&child->cmn);
|
||||
fregs[REG_FS0] = context->fs0; /* Saved register fs1 */
|
||||
fregs[REG_FS1] = context->fs1; /* Saved register fs1 */
|
||||
fregs[REG_FS2] = context->fs2; /* Saved register fs2 */
|
||||
fregs[REG_FS3] = context->fs3; /* Saved register fs3 */
|
||||
fregs[REG_FS4] = context->fs4; /* Saved register fs4 */
|
||||
fregs[REG_FS5] = context->fs5; /* Saved register fs5 */
|
||||
fregs[REG_FS6] = context->fs6; /* Saved register fs6 */
|
||||
fregs[REG_FS7] = context->fs7; /* Saved register fs7 */
|
||||
fregs[REG_FS8] = context->fs8; /* Saved register fs8 */
|
||||
fregs[REG_FS9] = context->fs9; /* Saved register fs9 */
|
||||
fregs[REG_FS10] = context->fs10; /* Saved register fs10 */
|
||||
fregs[REG_FS11] = context->fs11; /* Saved register fs11 */
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_LIB_SYSCALL
|
||||
|
|
|
@ -102,7 +102,11 @@ riscv_savefpu:
|
|||
li t1, MSTATUS_FS
|
||||
and t2, t0, t1
|
||||
li t1, MSTATUS_FS_DIRTY
|
||||
#ifdef CONFIG_ARCH_LAZYFPU
|
||||
bne t2, t1, 1f
|
||||
#else
|
||||
blt t2, t1, 1f
|
||||
#endif
|
||||
li t1, ~MSTATUS_FS
|
||||
and t0, t0, t1
|
||||
li t1, MSTATUS_FS_CLEAN
|
||||
|
|
|
@ -206,10 +206,26 @@ void riscv_exception_attach(void);
|
|||
void riscv_fpuconfig(void);
|
||||
void riscv_savefpu(uintptr_t *regs, uintptr_t *fregs);
|
||||
void riscv_restorefpu(uintptr_t *regs, uintptr_t *fregs);
|
||||
|
||||
/* Get FPU register save area */
|
||||
|
||||
static inline uintptr_t *riscv_fpuregs(struct tcb_s *tcb)
|
||||
{
|
||||
#ifdef CONFIG_ARCH_LAZYFPU
|
||||
/* With lazy FPU the registers are simply in tcb */
|
||||
|
||||
return tcb->xcp.fregs;
|
||||
#else
|
||||
/* Otherwise they are after the integer registers */
|
||||
|
||||
return (uintptr_t *)((uintptr_t)tcb->xcp.regs + INT_XCPT_SIZE);
|
||||
#endif
|
||||
}
|
||||
#else
|
||||
# define riscv_fpuconfig()
|
||||
# define riscv_savefpu(regs, fregs)
|
||||
# define riscv_restorefpu(regs, fregs)
|
||||
# define riscv_fpuregs(tcb)
|
||||
#endif
|
||||
|
||||
/* Save / restore context of task */
|
||||
|
@ -221,7 +237,7 @@ static inline void riscv_savecontext(struct tcb_s *tcb)
|
|||
#ifdef CONFIG_ARCH_FPU
|
||||
/* Save current process FPU state to TCB */
|
||||
|
||||
riscv_savefpu(tcb->xcp.regs, tcb->xcp.fregs);
|
||||
riscv_savefpu(tcb->xcp.regs, riscv_fpuregs(tcb));
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -232,7 +248,7 @@ static inline void riscv_restorecontext(struct tcb_s *tcb)
|
|||
#ifdef CONFIG_ARCH_FPU
|
||||
/* Restore FPU state for next process */
|
||||
|
||||
riscv_restorefpu(tcb->xcp.regs, tcb->xcp.fregs);
|
||||
riscv_restorefpu(tcb->xcp.regs, riscv_fpuregs(tcb));
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue