Merge branch 'feature/dport_access' into 'master'

soc: Dport access with pre-read register APB

See merge request idf/esp-idf!2257
This commit is contained in:
Angus Gratton
2018-05-15 15:32:28 +08:00
8 changed files with 467 additions and 158 deletions

View File

@@ -217,3 +217,22 @@ void IRAM_ATTR esp_dport_access_int_resume(void)
#endif
}
/**
* @brief Read a sequence of DPORT registers to the buffer, SMP-safe version.
*
* This implementation uses a method of the pre-reading of the APB register
* before reading the register of the DPORT, without stall other CPU.
* There is disable/enable interrupt.
*
* @param[out] buff_out Contains the read data.
* @param[in] address Initial address for reading registers.
* @param[in] num_words The number of words.
*/
void IRAM_ATTR esp_dport_access_read_buffer(uint32_t *buff_out, uint32_t address, uint32_t num_words)
{
DPORT_INTERRUPT_DISABLE();
for (uint32_t i = 0; i < num_words; ++i) {
buff_out[i] = DPORT_SEQUENCE_REG_READ(address + i * 4);
}
DPORT_INTERRUPT_RESTORE();
}

View File

@@ -53,31 +53,23 @@ void esp_aes_acquire_hardware( void )
/* newlib locks lazy initialize on ESP-IDF */
portENTER_CRITICAL(&aes_spinlock);
DPORT_STALL_OTHER_CPU_START();
{
/* Enable AES hardware */
_DPORT_REG_SET_BIT(DPORT_PERI_CLK_EN_REG, DPORT_PERI_EN_AES);
/* Clear reset on digital signature & secure boot units,
otherwise AES unit is held in reset also. */
_DPORT_REG_CLR_BIT(DPORT_PERI_RST_EN_REG,
DPORT_PERI_EN_AES
| DPORT_PERI_EN_DIGITAL_SIGNATURE
| DPORT_PERI_EN_SECUREBOOT);
}
DPORT_STALL_OTHER_CPU_END();
/* Enable AES hardware */
DPORT_REG_SET_BIT(DPORT_PERI_CLK_EN_REG, DPORT_PERI_EN_AES);
/* Clear reset on digital signature & secure boot units,
otherwise AES unit is held in reset also. */
DPORT_REG_CLR_BIT(DPORT_PERI_RST_EN_REG,
DPORT_PERI_EN_AES
| DPORT_PERI_EN_DIGITAL_SIGNATURE
| DPORT_PERI_EN_SECUREBOOT);
}
void esp_aes_release_hardware( void )
{
DPORT_STALL_OTHER_CPU_START();
{
/* Disable AES hardware */
_DPORT_REG_SET_BIT(DPORT_PERI_RST_EN_REG, DPORT_PERI_EN_AES);
/* Don't return other units to reset, as this pulls
reset on RSA & SHA units, respectively. */
_DPORT_REG_CLR_BIT(DPORT_PERI_CLK_EN_REG, DPORT_PERI_EN_AES);
}
DPORT_STALL_OTHER_CPU_END();
/* Disable AES hardware */
DPORT_REG_SET_BIT(DPORT_PERI_RST_EN_REG, DPORT_PERI_EN_AES);
/* Don't return other units to reset, as this pulls
reset on RSA & SHA units, respectively. */
DPORT_REG_CLR_BIT(DPORT_PERI_CLK_EN_REG, DPORT_PERI_EN_AES);
portEXIT_CRITICAL(&aes_spinlock);
}
@@ -141,15 +133,8 @@ static inline void esp_aes_block(const void *input, void *output)
}
DPORT_REG_WRITE(AES_START_REG, 1);
DPORT_STALL_OTHER_CPU_START();
{
while (_DPORT_REG_READ(AES_IDLE_REG) != 1) { }
for (int i = 0; i < 4; i++) {
output_words[i] = mem_block[i];
}
}
DPORT_STALL_OTHER_CPU_END();
while (DPORT_REG_READ(AES_IDLE_REG) != 1) { }
esp_dport_access_read_buffer(output_words, (uint32_t)&mem_block[0], 4);
}
/*

View File

@@ -159,16 +159,14 @@ static void esp_sha_lock_engine_inner(sha_engine_state *engine)
_lock_acquire(&state_change_lock);
if (sha_engines_all_idle()) {
/* Enable SHA hardware */
DPORT_REG_SET_BIT(DPORT_PERI_CLK_EN_REG, DPORT_PERI_EN_SHA);
/* also clear reset on secure boot, otherwise SHA is held in reset */
DPORT_REG_CLR_BIT(DPORT_PERI_RST_EN_REG,
DPORT_PERI_EN_SHA
| DPORT_PERI_EN_SECUREBOOT);
DPORT_STALL_OTHER_CPU_START();
{
/* Enable SHA hardware */
_DPORT_REG_SET_BIT(DPORT_PERI_CLK_EN_REG, DPORT_PERI_EN_SHA);
/* also clear reset on secure boot, otherwise SHA is held in reset */
_DPORT_REG_CLR_BIT(DPORT_PERI_RST_EN_REG,
DPORT_PERI_EN_SHA
| DPORT_PERI_EN_SECUREBOOT);
ets_sha_enable();
}
ets_sha_enable();
DPORT_STALL_OTHER_CPU_END();
}
@@ -191,12 +189,8 @@ void esp_sha_unlock_engine(esp_sha_type sha_type)
if (sha_engines_all_idle()) {
/* Disable SHA hardware */
/* Don't assert reset on secure boot, otherwise AES is held in reset */
DPORT_STALL_OTHER_CPU_START();
{
_DPORT_REG_SET_BIT(DPORT_PERI_RST_EN_REG, DPORT_PERI_EN_SHA);
_DPORT_REG_CLR_BIT(DPORT_PERI_CLK_EN_REG, DPORT_PERI_EN_SHA);
}
DPORT_STALL_OTHER_CPU_END();
DPORT_REG_SET_BIT(DPORT_PERI_RST_EN_REG, DPORT_PERI_EN_SHA);
DPORT_REG_CLR_BIT(DPORT_PERI_CLK_EN_REG, DPORT_PERI_EN_SHA);
}
_lock_release(&state_change_lock);
@@ -206,16 +200,14 @@ void esp_sha_unlock_engine(esp_sha_type sha_type)
void esp_sha_wait_idle(void)
{
DPORT_STALL_OTHER_CPU_START();
while(1) {
if(_DPORT_REG_READ(SHA_1_BUSY_REG) == 0
&& _DPORT_REG_READ(SHA_256_BUSY_REG) == 0
&& _DPORT_REG_READ(SHA_384_BUSY_REG) == 0
&& _DPORT_REG_READ(SHA_512_BUSY_REG) == 0) {
if(DPORT_REG_READ(SHA_1_BUSY_REG) == 0
&& DPORT_REG_READ(SHA_256_BUSY_REG) == 0
&& DPORT_REG_READ(SHA_384_BUSY_REG) == 0
&& DPORT_REG_READ(SHA_512_BUSY_REG) == 0) {
break;
}
}
DPORT_STALL_OTHER_CPU_END();
}
void esp_sha_read_digest_state(esp_sha_type sha_type, void *digest_state)
@@ -225,27 +217,23 @@ void esp_sha_read_digest_state(esp_sha_type sha_type, void *digest_state)
esp_sha_lock_memory_block();
DPORT_STALL_OTHER_CPU_START(); // This block reads from DPORT memory (reg_addr_buf)
{
esp_sha_wait_idle();
esp_sha_wait_idle();
_DPORT_REG_WRITE(SHA_LOAD_REG(sha_type), 1);
while(_DPORT_REG_READ(SHA_BUSY_REG(sha_type)) == 1) { }
uint32_t *digest_state_words = (uint32_t *)digest_state;
uint32_t *reg_addr_buf = (uint32_t *)(SHA_TEXT_BASE);
if(sha_type == SHA2_384 || sha_type == SHA2_512) {
/* for these ciphers using 64-bit states, swap each pair of words */
for(int i = 0; i < sha_length(sha_type)/4; i += 2) {
digest_state_words[i+1] = reg_addr_buf[i];
digest_state_words[i]= reg_addr_buf[i+1];
}
} else {
memcpy(digest_state_words, reg_addr_buf, sha_length(sha_type));
DPORT_REG_WRITE(SHA_LOAD_REG(sha_type), 1);
while(DPORT_REG_READ(SHA_BUSY_REG(sha_type)) == 1) { }
uint32_t *digest_state_words = (uint32_t *)digest_state;
uint32_t *reg_addr_buf = (uint32_t *)(SHA_TEXT_BASE);
if(sha_type == SHA2_384 || sha_type == SHA2_512) {
/* for these ciphers using 64-bit states, swap each pair of words */
DPORT_INTERRUPT_DISABLE(); // Disable interrupt only on current CPU.
for(int i = 0; i < sha_length(sha_type)/4; i += 2) {
digest_state_words[i+1] = DPORT_SEQUENCE_REG_READ((uint32_t)&reg_addr_buf[i]);
digest_state_words[i] = DPORT_SEQUENCE_REG_READ((uint32_t)&reg_addr_buf[i+1]);
}
DPORT_INTERRUPT_RESTORE(); // restore the previous interrupt level
} else {
esp_dport_access_read_buffer(digest_state_words, (uint32_t)&reg_addr_buf[0], sha_length(sha_type)/4);
}
DPORT_STALL_OTHER_CPU_END();
esp_sha_unlock_memory_block();
}

View File

@@ -26,7 +26,7 @@ void esp_dport_access_stall_other_cpu_end(void);
void esp_dport_access_int_init(void);
void esp_dport_access_int_pause(void);
void esp_dport_access_int_resume(void);
void esp_dport_access_read_buffer(uint32_t *buff_out, uint32_t address, uint32_t num_words);
//This routine does not stop the dport routines in any way that is recoverable. Please
//only call in case of panic().
void esp_dport_access_int_abort(void);
@@ -34,9 +34,14 @@ void esp_dport_access_int_abort(void);
#if defined(BOOTLOADER_BUILD) || defined(CONFIG_FREERTOS_UNICORE) || !defined(ESP_PLATFORM)
#define DPORT_STALL_OTHER_CPU_START()
#define DPORT_STALL_OTHER_CPU_END()
#define DPORT_STALL_OTHER_CPU_START()
#define DPORT_INTERRUPT_DISABLE()
#define DPORT_INTERRUPT_RESTORE()
#else
#define DPORT_STALL_OTHER_CPU_START() esp_dport_access_stall_other_cpu_start()
#define DPORT_STALL_OTHER_CPU_END() esp_dport_access_stall_other_cpu_end()
#define DPORT_INTERRUPT_DISABLE() unsigned int intLvl = XTOS_SET_INTLEVEL(XCHAL_EXCM_LEVEL)
#define DPORT_INTERRUPT_RESTORE() XTOS_RESTORE_JUST_INTLEVEL(intLvl)
#endif
#ifdef __cplusplus

View File

@@ -6,12 +6,13 @@
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "freertos/semphr.h"
#include "soc/cpu.h"
#include "unity.h"
#include "rom/uart.h"
#include "soc/uart_reg.h"
#include "soc/dport_reg.h"
#include "soc/rtc.h"
#define MHZ (1000000)
static volatile bool exit_flag;
static bool dport_test_result;
static bool apb_test_result;
@@ -54,40 +55,267 @@ static void accessAPB(void *pvParameters)
vTaskDelete(NULL);
}
TEST_CASE("access DPORT and APB at same time", "[esp32]")
void run_tasks(const char *task1_description, void (* task1_func)(void *), const char *task2_description, void (* task2_func)(void *), uint32_t delay_ms)
{
int i;
TaskHandle_t th[2];
xSemaphoreHandle exit_sema[2];
for (i=0; i<2; i++) {
exit_sema[i] = xSemaphoreCreateMutex();
xSemaphoreTake(exit_sema[i], portMAX_DELAY);
if((task1_func != NULL && i == 0) || (task2_func != NULL && i == 1)){
exit_sema[i] = xSemaphoreCreateMutex();
xSemaphoreTake(exit_sema[i], portMAX_DELAY);
}
}
exit_flag = false;
#ifndef CONFIG_FREERTOS_UNICORE
printf("assign task accessing DPORT to core 0 and task accessing APB to core 1\n");
xTaskCreatePinnedToCore(accessDPORT , "accessDPORT" , 2048, &exit_sema[0], UNITY_FREERTOS_PRIORITY - 1, &th[0], 0);
xTaskCreatePinnedToCore(accessAPB , "accessAPB" , 2048, &exit_sema[1], UNITY_FREERTOS_PRIORITY - 1, &th[1], 1);
if(task1_func != NULL) xTaskCreatePinnedToCore(task1_func, task1_description, 2048, &exit_sema[0], UNITY_FREERTOS_PRIORITY - 1, &th[0], 0);
if(task2_func != NULL) xTaskCreatePinnedToCore(task2_func, task2_description, 2048, &exit_sema[1], UNITY_FREERTOS_PRIORITY - 1, &th[1], 1);
#else
printf("assign task accessing DPORT and accessing APB\n");
xTaskCreate(accessDPORT , "accessDPORT" , 2048, &exit_sema[0], UNITY_FREERTOS_PRIORITY - 1, &th[0]);
xTaskCreate(accessAPB , "accessAPB" , 2048, &exit_sema[1], UNITY_FREERTOS_PRIORITY - 1, &th[1]);
if(task1_func != NULL) xTaskCreate(task1_func, task1_description, 2048, &exit_sema[0], UNITY_FREERTOS_PRIORITY - 1, &th[0]);
if(task2_func != NULL) xTaskCreate(task2_func, task2_description, 2048, &exit_sema[1], UNITY_FREERTOS_PRIORITY - 1, &th[1]);
#endif
printf("start wait for 10 seconds\n");
vTaskDelay(10000 / portTICK_PERIOD_MS);
printf("start wait for %d seconds [Test %s and %s]\n", delay_ms/1000, task1_description, task2_description);
vTaskDelay(delay_ms / portTICK_PERIOD_MS);
// set exit flag to let thread exit
exit_flag = true;
for (i=0; i<2; i++) {
xSemaphoreTake(exit_sema[i], portMAX_DELAY);
vSemaphoreDelete(exit_sema[i]);
if ((task1_func != NULL && i == 0) || (task2_func != NULL && i == 1)) {
xSemaphoreTake(exit_sema[i], portMAX_DELAY);
vSemaphoreDelete(exit_sema[i]);
}
}
TEST_ASSERT(dport_test_result == true && apb_test_result == true);
}
TEST_CASE("access DPORT and APB at same time", "[esp32]")
{
dport_test_result = false;
apb_test_result = false;
printf("CPU_FREQ = %d MHz\n", rtc_clk_cpu_freq_value(rtc_clk_cpu_freq_get()) / MHZ);
run_tasks("accessDPORT", accessDPORT, "accessAPB", accessAPB, 10000);
}
void run_tasks_with_change_freq_cpu (rtc_cpu_freq_t cpu_freq)
{
dport_test_result = false;
apb_test_result = false;
rtc_cpu_freq_t cur_freq = rtc_clk_cpu_freq_get();
uint32_t freq_before_changed = rtc_clk_cpu_freq_value(cur_freq) / MHZ;
uint32_t freq_changed = freq_before_changed;
printf("CPU_FREQ = %d MHz\n", freq_before_changed);
if (cur_freq != cpu_freq) {
uart_tx_wait_idle(CONFIG_CONSOLE_UART_NUM);
rtc_clk_cpu_freq_set(cpu_freq);
const int uart_num = CONFIG_CONSOLE_UART_NUM;
const int uart_baud = CONFIG_CONSOLE_UART_BAUDRATE;
uart_div_modify(uart_num, (rtc_clk_apb_freq_get() << 4) / uart_baud);
freq_changed = rtc_clk_cpu_freq_value(rtc_clk_cpu_freq_get()) / MHZ;
printf("CPU_FREQ switching to %d MHz\n", freq_changed);
}
run_tasks("accessDPORT", accessDPORT, "accessAPB", accessAPB, 10000 / ((freq_before_changed <= freq_changed) ? 1 : (freq_before_changed / freq_changed)));
// return old freq.
uart_tx_wait_idle(CONFIG_CONSOLE_UART_NUM);
rtc_clk_cpu_freq_set(cur_freq);
const int uart_num = CONFIG_CONSOLE_UART_NUM;
const int uart_baud = CONFIG_CONSOLE_UART_BAUDRATE;
uart_div_modify(uart_num, (rtc_clk_apb_freq_get() << 4) / uart_baud);
}
TEST_CASE("access DPORT and APB at same time (Freq CPU and APB = 80 MHz)", "[esp32] [ignore]")
{
run_tasks_with_change_freq_cpu(RTC_CPU_FREQ_80M);
}
TEST_CASE("access DPORT and APB at same time (Freq CPU and APB = 40 MHz (XTAL))", "[esp32]")
{
run_tasks_with_change_freq_cpu(RTC_CPU_FREQ_XTAL);
}
static uint32_t stall_other_cpu_counter;
static uint32_t pre_reading_apb_counter;
static uint32_t apb_counter;
static void accessDPORT_stall_other_cpu(void *pvParameters)
{
xSemaphoreHandle *sema = (xSemaphoreHandle *) pvParameters;
uint32_t dport_date = DPORT_REG_READ(DPORT_DATE_REG);
uint32_t dport_date_cur;
dport_test_result = true;
stall_other_cpu_counter = 0;
// although exit flag is set in another task, checking (exit_flag == false) is safe
while (exit_flag == false) {
++stall_other_cpu_counter;
DPORT_STALL_OTHER_CPU_START();
dport_date_cur = _DPORT_REG_READ(DPORT_DATE_REG);
DPORT_STALL_OTHER_CPU_END();
if (dport_date != dport_date_cur) {
apb_test_result = false;
break;
}
}
xSemaphoreGive(*sema);
vTaskDelete(NULL);
}
static void accessAPB_measure_performance(void *pvParameters)
{
xSemaphoreHandle *sema = (xSemaphoreHandle *) pvParameters;
uint32_t uart_date = REG_READ(UART_DATE_REG(0));
apb_test_result = true;
apb_counter = 0;
// although exit flag is set in another task, checking (exit_flag == false) is safe
while (exit_flag == false) {
++apb_counter;
if (uart_date != REG_READ(UART_DATE_REG(0))) {
apb_test_result = false;
break;
}
}
xSemaphoreGive(*sema);
vTaskDelete(NULL);
}
static void accessDPORT_pre_reading_apb(void *pvParameters)
{
xSemaphoreHandle *sema = (xSemaphoreHandle *) pvParameters;
uint32_t dport_date = DPORT_REG_READ(DPORT_DATE_REG);
uint32_t dport_date_cur;
dport_test_result = true;
pre_reading_apb_counter = 0;
// although exit flag is set in another task, checking (exit_flag == false) is safe
while (exit_flag == false) {
++pre_reading_apb_counter;
dport_date_cur = DPORT_REG_READ(DPORT_DATE_REG);
if (dport_date != dport_date_cur) {
apb_test_result = false;
break;
}
}
xSemaphoreGive(*sema);
vTaskDelete(NULL);
}
TEST_CASE("test for DPORT access performance", "[esp32]")
{
dport_test_result = true;
apb_test_result = true;
typedef struct {
uint32_t dport;
uint32_t apb;
uint32_t summ;
} test_performance_t;
test_performance_t t[5] = {0};
uint32_t delay_ms = 5000;
run_tasks("-", NULL, "accessAPB", accessAPB_measure_performance, delay_ms);
t[0].apb = apb_counter;
t[0].dport = 0;
t[0].summ = t[0].apb + t[0].dport;
run_tasks("accessDPORT_stall_other_cpu", accessDPORT_stall_other_cpu, "-", NULL, delay_ms);
t[1].apb = 0;
t[1].dport = stall_other_cpu_counter;
t[1].summ = t[1].apb + t[1].dport;
run_tasks("accessDPORT_pre_reading_apb", accessDPORT_pre_reading_apb, "-", NULL, delay_ms);
t[2].apb = 0;
t[2].dport = pre_reading_apb_counter;
t[2].summ = t[2].apb + t[2].dport;
run_tasks("accessDPORT_stall_other_cpu", accessDPORT_stall_other_cpu, "accessAPB", accessAPB_measure_performance, delay_ms);
t[3].apb = apb_counter;
t[3].dport = stall_other_cpu_counter;
t[3].summ = t[3].apb + t[3].dport;
run_tasks("accessDPORT_pre_reading_apb", accessDPORT_pre_reading_apb, "accessAPB", accessAPB_measure_performance, delay_ms);
t[4].apb = apb_counter;
t[4].dport = pre_reading_apb_counter;
t[4].summ = t[4].apb + t[4].dport;
printf("\nPerformance table: \n"
"The number of simultaneous read operations of the APB and DPORT registers\n"
"by different methods for %d seconds.\n", delay_ms/1000);
printf("+-----------------------+----------+----------+----------+\n");
printf("| Method read DPORT | DPORT | APB | SUMM |\n");
printf("+-----------------------+----------+----------+----------+\n");
printf("|1.Only accessAPB |%10d|%10d|%10d|\n", t[0].dport, t[0].apb, t[0].summ);
printf("|2.Only STALL_OTHER_CPU |%10d|%10d|%10d|\n", t[1].dport, t[1].apb, t[1].summ);
printf("|3.Only PRE_READ_APB_REG|%10d|%10d|%10d|\n", t[2].dport, t[2].apb, t[2].summ);
printf("+-----------------------+----------+----------+----------+\n");
printf("|4.STALL_OTHER_CPU |%10d|%10d|%10d|\n", t[3].dport, t[3].apb, t[3].summ);
printf("|5.PRE_READ_APB_REG |%10d|%10d|%10d|\n", t[4].dport, t[4].apb, t[4].summ);
printf("+-----------------------+----------+----------+----------+\n");
printf("| ratio=PRE_READ/STALL |%10f|%10f|%10f|\n", (float)t[4].dport/t[3].dport, (float)t[4].apb/t[3].apb, (float)t[4].summ/t[3].summ);
printf("+-----------------------+----------+----------+----------+\n");
}
#define REPEAT_OPS 10000
static uint32_t start, end;
#define BENCHMARK_START() do { \
RSR(CCOUNT, start); \
} while(0)
#define BENCHMARK_END(OPERATION) do { \
RSR(CCOUNT, end); \
printf("%s took %d cycles/op (%d cycles for %d ops)\n", \
OPERATION, (end - start)/REPEAT_OPS, \
(end - start), REPEAT_OPS); \
} while(0)
TEST_CASE("BENCHMARK for DPORT access performance", "[freertos]")
{
BENCHMARK_START();
for (int i = 0; i < REPEAT_OPS; i++) {
DPORT_STALL_OTHER_CPU_START();
_DPORT_REG_READ(DPORT_DATE_REG);
DPORT_STALL_OTHER_CPU_END();
}
BENCHMARK_END("[old]DPORT access STALL OTHER CPU");
BENCHMARK_START();
for (int i = 0; i < REPEAT_OPS; i++) {
DPORT_REG_READ(DPORT_DATE_REG);
}
BENCHMARK_END("[new]DPORT access PRE-READ APB REG");
BENCHMARK_START();
for (int i = 0; i < REPEAT_OPS; i++) {
DPORT_SEQUENCE_REG_READ(DPORT_DATE_REG);
}
BENCHMARK_END("[seq]DPORT access PRE-READ APB REG");
BENCHMARK_START();
for (int i = 0; i < REPEAT_OPS; i++) {
REG_READ(UART_DATE_REG(0));
}
BENCHMARK_END("REG_READ");
BENCHMARK_START();
for (int i = 0; i < REPEAT_OPS; i++) {
_DPORT_REG_READ(DPORT_DATE_REG);
}
BENCHMARK_END("_DPORT_REG_READ");
}