mirror of
				https://github.com/espressif/esp-idf.git
				synced 2025-10-31 21:14:37 +00:00 
			
		
		
		
	 9300bef9b8
			
		
	
	9300bef9b8
	
	
	
		
			
			This commit refactors the FPU handling code on the Xtensa port of Amazon SMP
FreeRTOS in the following ways:
Auto-pinning via XT_RTOS_CP_EXC_HOOK
------------------------------------
The "_xt_coproc_exc" exception would previously automatically pin a task that
uses the FPU to the current core (to ensure that we can lazy save the task's FPU
context). However, this would mean that "xtensa_vectors.S" would need to be
OS-aware (to read the task's TCB structure).
This is now refactored so that "_xt_coproc_exc" calls a CP exception hook
function ("XT_RTOS_CP_EXC_HOOK") implemented in "portasm.S", thus allowing
"xtensa_vectors.S" to remain OS agnostic.
Using macros to acquire owner spinlock
--------------------------------------
The taking and relasing of the "_xt_coproc_owner_sa_lock" is now mostly
abstracted as the "spinlock_take" and "spinlock_release" macro. As a result,
"_xt_coproc_release" and "_xt_coproc_exc" are refactored so that:
- They are closer to their upstream (original) versions
- The spinlock is only taken when building for multicore
- The spinlock held region is shortened (now only protects the instructions
  that access the "_xt_coproc_owner_sa" array
Other Changes
-------------
- Updated placing and comments of various "offset_..." constants used by
  portasm.S
- Update description of "get_cpsa_from_tcb" assembly macro
- Tidied up some typos in the ".S" files
		
	
		
			
				
	
	
		
			138 lines
		
	
	
		
			4.2 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			138 lines
		
	
	
		
			4.2 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * SPDX-FileCopyrightText: 2017, Intel Corporation
 | |
|  *
 | |
|  * SPDX-License-Identifier: Apache-2.0
 | |
|  *
 | |
|  * SPDX-FileContributor: 2016-2022 Espressif Systems (Shanghai) CO LTD
 | |
|  */
 | |
| 
 | |
| /* File adapted to use on IDF FreeRTOS component, extracted
 | |
|  * originally from zephyr RTOS code base:
 | |
|  * https://github.com/zephyrproject-rtos/zephyr/blob/dafd348/arch/xtensa/include/xtensa-asm2-s.h
 | |
|  */
 | |
| 
 | |
| #ifndef __XT_ASM_UTILS_H
 | |
| #define __XT_ASM_UTILS_H
 | |
| 
 | |
| /*
 | |
|  * SPILL_ALL_WINDOWS
 | |
|  *
 | |
|  * Spills all windowed registers (i.e. registers not visible as
 | |
|  * A0-A15) to their ABI-defined spill regions on the stack.
 | |
|  *
 | |
|  * Unlike the Xtensa HAL implementation, this code requires that the
 | |
|  * EXCM and WOE bit be enabled in PS, and relies on repeated hardware
 | |
|  * exception handling to do the register spills.  The trick is to do a
 | |
|  * noop write to the high registers, which the hardware will trap
 | |
|  * (into an overflow exception) in the case where those registers are
 | |
|  * already used by an existing call frame.  Then it rotates the window
 | |
|  * and repeats until all but the A0-A3 registers of the original frame
 | |
|  * are guaranteed to be spilled, eventually rotating back around into
 | |
|  * the original frame.  Advantages:
 | |
|  *
 | |
|  * - Vastly smaller code size
 | |
|  *
 | |
|  * - More easily maintained if changes are needed to window over/underflow
 | |
|  *   exception handling.
 | |
|  *
 | |
|  * - Requires no scratch registers to do its work, so can be used safely in any
 | |
|  *   context.
 | |
|  *
 | |
|  * - If the WOE bit is not enabled (for example, in code written for
 | |
|  *   the CALL0 ABI), this becomes a silent noop and operates compatbily.
 | |
|  *
 | |
|  * - Hilariously it's ACTUALLY FASTER than the HAL routine.  And not
 | |
|  *   just a little bit, it's MUCH faster.  With a mostly full register
 | |
|  *   file on an LX6 core (ESP-32) I'm measuring 145 cycles to spill
 | |
|  *   registers with this vs. 279 (!) to do it with
 | |
|  *   xthal_spill_windows().
 | |
|  */
 | |
| 
 | |
| .macro SPILL_ALL_WINDOWS
 | |
| #if XCHAL_NUM_AREGS == 64
 | |
|     and a12, a12, a12
 | |
|     rotw 3
 | |
|     and a12, a12, a12
 | |
|     rotw 3
 | |
|     and a12, a12, a12
 | |
|     rotw 3
 | |
|     and a12, a12, a12
 | |
|     rotw 3
 | |
|     and a12, a12, a12
 | |
|     rotw 4
 | |
| #elif XCHAL_NUM_AREGS == 32
 | |
|     and a12, a12, a12
 | |
|     rotw 3
 | |
|     and a12, a12, a12
 | |
|     rotw 3
 | |
|     and a4, a4, a4
 | |
|     rotw 2
 | |
| #else
 | |
| #error Unrecognized XCHAL_NUM_AREGS
 | |
| #endif
 | |
| .endm
 | |
| 
 | |
| /*
 | |
| --------------------------------------------------------------------------------
 | |
|   Macro spinlock_take
 | |
| 
 | |
|   This macro will repeatedley attempt to atomically set a spinlock variable
 | |
|   using the s32c1i instruciton. A spinlock is considered free if its value is 0.
 | |
| 
 | |
|   Entry:
 | |
|   - "reg_A/B" as scratch registers
 | |
|   - "lock_var" spinlock variable's symbol
 | |
|   - Interrupts must already be disabled by caller
 | |
|   Exit:
 | |
|   - Spinlock set to current core's ID (PRID)
 | |
|   - "reg_A/B" clobbered
 | |
| --------------------------------------------------------------------------------
 | |
| */
 | |
| 
 | |
| #if portNUM_PROCESSORS > 1
 | |
| 
 | |
|     .macro  spinlock_take       reg_A reg_B lock_var
 | |
| 
 | |
|     movi    \reg_A, \lock_var               /* reg_A = &lock_var */
 | |
| .L_spinlock_loop:
 | |
|     movi    \reg_B, 0                       /* Load spinlock free value (0) into SCOMPARE1 */
 | |
|     wsr     \reg_B, SCOMPARE1
 | |
|     rsync                                   /* Ensure that SCOMPARE1 is set before s32c1i executes */
 | |
|     rsr     \reg_B, PRID                    /* Load the current core's ID into reg_B */
 | |
|     s32c1i  \reg_B, \reg_A, 0               /* Attempt *lock_var = reg_B */
 | |
|     bnez    \reg_B, .L_spinlock_loop        /* If the write was successful (i.e., lock was free), 0 will have been written back to reg_B */
 | |
| 
 | |
|     .endm
 | |
| 
 | |
| #endif /* portNUM_PROCESSORS > 1 */
 | |
| 
 | |
| /*
 | |
| --------------------------------------------------------------------------------
 | |
|   Macro spinlock_release
 | |
| 
 | |
|   This macro will release a spinlock variable previously taken by the
 | |
|   spinlock_take macro.
 | |
| 
 | |
|   Entry:
 | |
|   - "reg_A/B" as scratch registers
 | |
|   - "lock_var" spinlock variable's symbol
 | |
|   - Interrupts must already be disabled by caller
 | |
|   Exit:
 | |
|   - "reg_A/B" clobbered
 | |
| --------------------------------------------------------------------------------
 | |
| */
 | |
| 
 | |
| #if portNUM_PROCESSORS > 1
 | |
| 
 | |
|     .macro spinlock_release     reg_A reg_B lock_var
 | |
| 
 | |
|     movi    \reg_A, \lock_var               /* reg_A = &lock_var */
 | |
|     movi    \reg_B, 0
 | |
|     s32i    \reg_B, \reg_A, 0               /* Release the spinlock (*reg_A = 0) */
 | |
| 
 | |
|     .endm
 | |
| 
 | |
| #endif /* portNUM_PROCESSORS > 1 */
 | |
| 
 | |
| #endif /* __XT_ASM_UTILS_H */
 |