fix(driver_spi): support un-aligned dma transaction and psram transaction

This commit is contained in:
wanckl
2025-08-18 22:12:43 +08:00
parent 4adf8aecce
commit 8ff71742e8
14 changed files with 195 additions and 162 deletions

View File

@@ -56,7 +56,8 @@ typedef struct {
uint32_t flags; ///< Flags (attributes) of the bus
int max_transfer_sz; ///< Maximum length of bytes available to send
bool dma_enabled; ///< To enable DMA or not
size_t internal_mem_align_size; ///< Buffer align byte requirement for internal memory
size_t cache_align_int; ///< Internal memory align byte requirement
size_t cache_align_ext; ///< External memory align byte requirement
spi_bus_lock_handle_t lock;
#ifdef CONFIG_PM_ENABLE
esp_pm_lock_handle_t pm_lock; ///< Power management lock
@@ -71,9 +72,13 @@ typedef struct {
spi_dma_chan_handle_t tx_dma_chan; ///< TX DMA channel, on ESP32 and ESP32S2, tx_dma_chan and rx_dma_chan are same
spi_dma_chan_handle_t rx_dma_chan; ///< RX DMA channel, on ESP32 and ESP32S2, tx_dma_chan and rx_dma_chan are same
#endif
int dma_desc_num; ///< DMA descriptor number of dmadesc_tx or dmadesc_rx.
spi_dma_desc_t *dmadesc_tx; ///< DMA descriptor array for TX
spi_dma_desc_t *dmadesc_rx; ///< DMA descriptor array for RX
size_t dma_align_tx_int; ///< Internal memory align byte requirement for TX
size_t dma_align_tx_ext; ///< External memory align byte requirement for TX
size_t dma_align_rx_int; ///< Internal memory align byte requirement for RX
size_t dma_align_rx_ext; ///< External memory align byte requirement for RX
int dma_desc_num; ///< DMA descriptor number of dmadesc_tx or dmadesc_rx.
spi_dma_desc_t *dmadesc_tx; ///< DMA descriptor array for TX
spi_dma_desc_t *dmadesc_rx; ///< DMA descriptor array for RX
} spi_dma_ctx_t;
/// Destructor called when a bus is deinitialized.

View File

@@ -5,6 +5,7 @@
*/
#pragma once
#include <stddef.h>
#include "stdbool.h"
#include "hal/spi_types.h"
@@ -36,6 +37,15 @@ typedef struct {
*/
void spi_dma_enable_burst(spi_dma_chan_handle_t chan_handle, bool data_burst, bool desc_burst);
/**
* Get the alignment constraints for DMA
*
* @param chan_handle Context of the spi_dma channel.
* @param internal_size The alignment size for internal memory.
* @param external_size The alignment size for external memory.
*/
void spi_dma_get_alignment_constraints(spi_dma_chan_handle_t chan_handle, size_t *internal_size, size_t *external_size);
/**
* Re-trigger a HW pre-load to pick up appended linked descriptor
*

View File

@@ -6,6 +6,7 @@
#include <string.h>
#include <stdatomic.h>
#include <sys/param.h>
#include "sdkconfig.h"
#include "esp_types.h"
#include "esp_attr.h"
@@ -248,6 +249,10 @@ static esp_err_t alloc_dma_chan(spi_host_device_t host_id, spi_dma_chan_t dma_ch
spi_dma_enable_burst(dma_ctx->tx_dma_chan, true, true);
spi_dma_enable_burst(dma_ctx->rx_dma_chan, true, true);
// Get DMA alignment constraints
spi_dma_get_alignment_constraints(dma_ctx->tx_dma_chan, &dma_ctx->dma_align_tx_int, &dma_ctx->dma_align_tx_ext);
spi_dma_get_alignment_constraints(dma_ctx->rx_dma_chan, &dma_ctx->dma_align_rx_int, &dma_ctx->dma_align_rx_ext);
return ret;
}
@@ -288,13 +293,16 @@ static esp_err_t alloc_dma_chan(spi_host_device_t host_id, spi_dma_chan_t dma_ch
gdma_connect(dma_ctx->rx_dma_chan, GDMA_MAKE_TRIGGER(GDMA_TRIG_PERIPH_SPI, 3));
}
#endif
// TODO: add support to allow SPI transfer PSRAM buffer
gdma_transfer_config_t trans_cfg = {
.max_data_burst_size = 16,
.access_ext_mem = false,
.access_ext_mem = true, // allow to transfer data from/to external memory directly by DMA
};
ESP_RETURN_ON_ERROR(gdma_config_transfer(dma_ctx->tx_dma_chan, &trans_cfg), SPI_TAG, "config gdma tx transfer failed");
ESP_RETURN_ON_ERROR(gdma_config_transfer(dma_ctx->rx_dma_chan, &trans_cfg), SPI_TAG, "config gdma rx transfer failed");
// Get DMA alignment constraints
gdma_get_alignment_constraints(dma_ctx->tx_dma_chan, &dma_ctx->dma_align_tx_int, &dma_ctx->dma_align_tx_ext);
gdma_get_alignment_constraints(dma_ctx->rx_dma_chan, &dma_ctx->dma_align_rx_int, &dma_ctx->dma_align_rx_ext);
}
return ret;
}
@@ -826,9 +834,9 @@ esp_err_t spi_bus_initialize(spi_host_device_t host_id, const spi_bus_config_t *
assert(bus_attr && ctx); //coverity check
bus_attr->bus_cfg = *bus_config;
if (dma_chan != SPI_DMA_DISABLED) {
bus_attr->dma_enabled = 1;
bus_attr->dma_enabled = (dma_chan != SPI_DMA_DISABLED);
bus_attr->max_transfer_sz = SOC_SPI_MAXIMUM_BUFFER_SIZE;
if (bus_attr->dma_enabled) {
err = spicommon_dma_chan_alloc(host_id, dma_chan, &ctx->dma_ctx);
if (err != ESP_OK) {
goto cleanup;
@@ -837,14 +845,10 @@ esp_err_t spi_bus_initialize(spi_host_device_t host_id, const spi_bus_config_t *
if (err != ESP_OK) {
goto cleanup;
}
#if SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE
esp_cache_get_alignment(MALLOC_CAP_DMA, (size_t *)&bus_attr->internal_mem_align_size);
#else
bus_attr->internal_mem_align_size = 4;
#endif
} else {
bus_attr->dma_enabled = 0;
bus_attr->max_transfer_sz = SOC_SPI_MAXIMUM_BUFFER_SIZE;
// Get cache alignment constraints
esp_cache_get_alignment(MALLOC_CAP_DMA, &bus_attr->cache_align_int);
esp_cache_get_alignment(MALLOC_CAP_SPIRAM, &bus_attr->cache_align_ext);
}
spi_bus_lock_config_t lock_config = {
@@ -926,11 +930,11 @@ cleanup:
void *spi_bus_dma_memory_alloc(spi_host_device_t host_id, size_t size, uint32_t extra_heap_caps)
{
(void) host_id; //remain for extendability
ESP_RETURN_ON_FALSE((extra_heap_caps & MALLOC_CAP_SPIRAM) == 0, NULL, SPI_TAG, "external memory is not supported now");
size_t dma_requir = 16; //TODO: IDF-10111, using max alignment temp, refactor to "gdma_get_alignment_constraints" instead
return heap_caps_aligned_calloc(dma_requir, 1, size, extra_heap_caps | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL);
// As don't know the buffer will used for TX or RX, so use the max alignment requirement
size_t alignment = (extra_heap_caps & MALLOC_CAP_SPIRAM) ? \
MAX(bus_ctx[host_id]->dma_ctx->dma_align_tx_ext, bus_ctx[host_id]->dma_ctx->dma_align_rx_ext) : \
MAX(bus_ctx[host_id]->dma_ctx->dma_align_tx_int, bus_ctx[host_id]->dma_ctx->dma_align_rx_int);
return heap_caps_aligned_calloc(alignment, 1, size, extra_heap_caps | MALLOC_CAP_DMA);
}
const spi_bus_attr_t* spi_bus_get_attr(spi_host_device_t host_id)

View File

@@ -27,6 +27,18 @@ void spi_dma_enable_burst(spi_dma_chan_handle_t chan_handle, bool data_burst, bo
}
}
void spi_dma_get_alignment_constraints(spi_dma_chan_handle_t chan_handle, size_t *internal_size, size_t *external_size)
{
spi_dma_dev_t *spi_dma = SPI_LL_GET_HW(chan_handle.host_id);
if (chan_handle.dir == DMA_CHANNEL_DIRECTION_TX) {
*internal_size = 1; // TX don't need to follow dma alignment in driver design
*external_size = 1;
} else {
spi_dma_ll_get_rx_alignment_require(spi_dma, (uint32_t *)internal_size, (uint32_t *)external_size);
}
}
#if SOC_SPI_SUPPORT_SLAVE_HD_VER2
void spi_dma_append(spi_dma_chan_handle_t chan_handle)
{

View File

@@ -771,31 +771,13 @@ static void SPI_MASTER_ISR_ATTR s_spi_dma_prepare_data(spi_host_t *host, spi_hal
}
}
static void SPI_MASTER_ISR_ATTR s_spi_prepare_data(spi_device_t *dev, const spi_hal_trans_config_t *hal_trans)
{
spi_host_t *host = dev->host;
spi_hal_dev_config_t *hal_dev = &(dev->hal_dev);
spi_hal_context_t *hal = &(host->hal);
if (host->bus_attr->dma_enabled) {
s_spi_dma_prepare_data(host, hal, hal_dev, hal_trans);
} else {
//Need to copy data to registers manually
spi_hal_push_tx_buffer(hal, hal_trans);
}
//in ESP32 these registers should be configured after the DMA is set
spi_hal_enable_data_line(hal->hw, (!hal_dev->half_duplex && hal_trans->rcv_buffer) || hal_trans->send_buffer, !!hal_trans->rcv_buffer);
}
static void SPI_MASTER_ISR_ATTR spi_format_hal_trans_struct(spi_device_t *dev, spi_trans_priv_t *trans_buf, spi_hal_trans_config_t *hal_trans)
{
spi_host_t *host = dev->host;
spi_transaction_t *trans = trans_buf->trans;
hal_trans->tx_bitlen = trans->length;
hal_trans->rx_bitlen = trans->rxlength;
hal_trans->rcv_buffer = (uint8_t*)host->cur_trans_buf.buffer_to_rcv;
hal_trans->send_buffer = (uint8_t*)host->cur_trans_buf.buffer_to_send;
hal_trans->rcv_buffer = (uint8_t *)trans_buf->buffer_to_rcv;
hal_trans->send_buffer = (uint8_t *)trans_buf->buffer_to_send;
hal_trans->cmd = trans->cmd;
hal_trans->addr = trans->addr;
@@ -831,6 +813,7 @@ static void SPI_MASTER_ISR_ATTR spi_format_hal_trans_struct(spi_device_t *dev, s
// Setup the transaction-specified registers and linked-list used by the DMA (or FIFO if DMA is not used)
static void SPI_MASTER_ISR_ATTR spi_new_trans(spi_device_t *dev, spi_trans_priv_t *trans_buf)
{
spi_host_t *host = dev->host;
spi_transaction_t *trans = trans_buf->trans;
spi_hal_context_t *hal = &(dev->host->hal);
spi_hal_dev_config_t *hal_dev = &(dev->hal_dev);
@@ -844,7 +827,15 @@ static void SPI_MASTER_ISR_ATTR spi_new_trans(spi_device_t *dev, spi_trans_priv_
spi_hal_trans_config_t hal_trans = {};
spi_format_hal_trans_struct(dev, trans_buf, &hal_trans);
spi_hal_setup_trans(hal, hal_dev, &hal_trans);
s_spi_prepare_data(dev, &hal_trans);
if (host->bus_attr->dma_enabled) {
s_spi_dma_prepare_data(host, hal, hal_dev, &hal_trans);
} else {
//Need to copy data to registers manually
spi_hal_push_tx_buffer(hal, &hal_trans);
}
//these registers should be configured after the DMA is set
spi_hal_enable_data_line(hal->hw, (!hal_dev->half_duplex && hal_trans.rcv_buffer) || hal_trans.send_buffer, !!hal_trans.rcv_buffer);
//Call pre-transmission callback, if any
if (dev->cfg.pre_cb) {
@@ -1011,18 +1002,6 @@ static void SPI_MASTER_ISR_ATTR spi_intr(void *arg)
//This workaround is only for esp32, where tx_dma_chan and rx_dma_chan are always same
spicommon_dmaworkaround_idle(dma_ctx->tx_dma_chan.chan_id);
#endif //#if CONFIG_IDF_TARGET_ESP32
#if SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE //invalidate here to let user access rx data in post_cb if possible
if (host->cur_trans_buf.buffer_to_rcv) {
uint16_t alignment = bus_attr->internal_mem_align_size;
uint32_t buffer_byte_len = (host->cur_trans_buf.trans->rxlength + 7) / 8;
buffer_byte_len = (buffer_byte_len + alignment - 1) & (~(alignment - 1));
// invalidate priv_trans.buffer_to_rcv anyway, only user provide aligned buffer can rcv correct data in post_cb
esp_err_t ret = esp_cache_msync((void *)host->cur_trans_buf.buffer_to_rcv, buffer_byte_len, ESP_CACHE_MSYNC_FLAG_DIR_M2C);
assert(ret == ESP_OK);
(void)ret;
}
#endif
spi_trans_dma_error_check(host);
}
@@ -1175,7 +1154,7 @@ static SPI_MASTER_ISR_ATTR esp_err_t check_trans_valid(spi_device_handle_t handl
SPI_CHECK(trans_desc->length <= SPI_LL_CPU_MAX_BIT_LEN, "txdata transfer > hardware max supported len", ESP_ERR_INVALID_ARG);
SPI_CHECK(trans_desc->rxlength <= SPI_LL_CPU_MAX_BIT_LEN, "rxdata transfer > hardware max supported len", ESP_ERR_INVALID_ARG);
}
if (esp_ptr_external_ram(trans_desc->tx_buffer) || esp_ptr_external_ram(trans_desc->rx_buffer)){
if (esp_ptr_external_ram(trans_desc->tx_buffer) || esp_ptr_external_ram(trans_desc->rx_buffer)) {
SPI_CHECK(spi_flash_cache_enabled(), "Using PSRAM must when cache is enabled", ESP_ERR_INVALID_STATE);
}
return ESP_OK;
@@ -1184,46 +1163,45 @@ static SPI_MASTER_ISR_ATTR esp_err_t check_trans_valid(spi_device_handle_t handl
static SPI_MASTER_ISR_ATTR void uninstall_priv_desc(spi_trans_priv_t* trans_buf)
{
spi_transaction_t *trans_desc = trans_buf->trans;
if ((void *)trans_buf->buffer_to_send != &trans_desc->tx_data[0] &&
trans_buf->buffer_to_send != trans_desc->tx_buffer) {
if ((void *)trans_buf->buffer_to_send != trans_desc->tx_data && trans_buf->buffer_to_send != trans_desc->tx_buffer) {
free((void *)trans_buf->buffer_to_send); //force free, ignore const
}
// copy data from temporary DMA-capable buffer back to IRAM buffer and free the temporary one.
if (trans_buf->buffer_to_rcv && (void *)trans_buf->buffer_to_rcv != &trans_desc->rx_data[0] && trans_buf->buffer_to_rcv != trans_desc->rx_buffer) { // NOLINT(clang-analyzer-unix.Malloc)
if (trans_desc->flags & SPI_TRANS_USE_RXDATA) {
memcpy((uint8_t *) & trans_desc->rx_data[0], trans_buf->buffer_to_rcv, (trans_desc->rxlength + 7) / 8);
} else {
memcpy(trans_desc->rx_buffer, trans_buf->buffer_to_rcv, (trans_desc->rxlength + 7) / 8);
}
// copy data from temporary DMA-capable buffer back to trans_desc buffer and free the temporary one.
void *orig_rx_buffer = (trans_desc->flags & SPI_TRANS_USE_RXDATA) ? trans_desc->rx_data : trans_desc->rx_buffer;
if (trans_buf->buffer_to_rcv != orig_rx_buffer) {
memcpy(orig_rx_buffer, trans_buf->buffer_to_rcv, (trans_desc->rxlength + 7) / 8);
free(trans_buf->buffer_to_rcv);
}
}
static SPI_MASTER_ISR_ATTR esp_err_t setup_dma_priv_buffer(uint32_t *buffer, uint32_t len, uint32_t alignment, bool is_tx, uint32_t flags, uint32_t **ret_buffer)
static SPI_MASTER_ISR_ATTR esp_err_t setup_dma_priv_buffer(spi_host_t *host, uint32_t *buffer, uint32_t len, bool is_tx, uint32_t flags, uint32_t **ret_buffer)
{
bool unaligned = ((((uint32_t)buffer) | len) & (alignment - 1));
#if !SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE
if (!((flags & SPI_TRANS_DMA_USE_PSRAM) && esp_ptr_dma_ext_capable(buffer))) {
// tx don't need align on addr or length on those chips
unaligned = is_tx ? false : (((uint32_t)buffer) & (alignment - 1));
}
#if CONFIG_IDF_TARGET_ESP32S2
ESP_RETURN_ON_FALSE_ISR((host->id != SPI3_HOST) || !(flags & SPI_TRANS_DMA_USE_PSRAM), ESP_ERR_NOT_SUPPORTED, SPI_TAG, "SPI3 does not support external memory");
#endif
#if SOC_PSRAM_DMA_CAPABLE
if ((flags & SPI_TRANS_DMA_USE_PSRAM) && esp_ptr_dma_ext_capable(buffer)) {
// dma psram don't do additional copy, check only
ESP_RETURN_ON_FALSE_ISR(!unaligned, ESP_ERR_INVALID_ARG, SPI_TAG, "%s buffer addr and length should align to %ld Byte to use PSRAM, use heap_caps_aligned_calloc() may helpful", is_tx ? "TX" : "RX", alignment);
ESP_RETURN_ON_ERROR_ISR(esp_cache_msync((void *)buffer, len, is_tx ? ESP_CACHE_MSYNC_FLAG_DIR_C2M : ESP_CACHE_MSYNC_FLAG_DIR_M2C), SPI_TAG, "sync failed for %s buffer", is_tx ? "TX" : "RX");
*ret_buffer = buffer;
return ESP_OK;
bool is_ptr_ext = esp_ptr_external_ram(buffer);
bool use_psram = is_ptr_ext && (flags & SPI_TRANS_DMA_USE_PSRAM);
bool need_malloc = is_ptr_ext ? (!use_psram || !esp_ptr_dma_ext_capable(buffer)) : !esp_ptr_dma_capable(buffer);
uint16_t alignment = 0;
// If psram is wanted, re-malloc also from psram.
uint32_t mem_cap = MALLOC_CAP_DMA | (use_psram ? MALLOC_CAP_SPIRAM : MALLOC_CAP_INTERNAL);
if (is_tx) {
alignment = use_psram ? host->dma_ctx->dma_align_tx_ext : host->dma_ctx->dma_align_tx_int;
} else {
// RX cache sync still need consider the cache alignment requirement
if (use_psram) {
alignment = MAX(host->dma_ctx->dma_align_rx_ext, host->bus_attr->cache_align_ext);
} else {
alignment = MAX(host->dma_ctx->dma_align_rx_int, host->bus_attr->cache_align_int);
}
}
#endif
if ((!esp_ptr_dma_capable(buffer) || unaligned)) {
need_malloc |= (((uint32_t)buffer | len) & (alignment - 1));
ESP_EARLY_LOGD(SPI_TAG, "%s %p, len %d, is_ptr_ext %d, use_psram: %d, alignment: %d, need_malloc: %d from %s", is_tx ? "TX" : "RX", buffer, len, is_ptr_ext, use_psram, alignment, need_malloc, (mem_cap & MALLOC_CAP_SPIRAM) ? "psram" : "internal");
if (need_malloc) {
ESP_RETURN_ON_FALSE_ISR(!(flags & SPI_TRANS_DMA_BUFFER_ALIGN_MANUAL), ESP_ERR_INVALID_ARG, SPI_TAG, "Set flag SPI_TRANS_DMA_BUFFER_ALIGN_MANUAL but %s addr&len not align to %d, or not dma_capable", is_tx ? "TX" : "RX", alignment);
//if buf in the desc not DMA-capable, or not bytes aligned to alignment, malloc a new one
ESP_EARLY_LOGD(SPI_TAG, "Allocate %s buffer for DMA", is_tx ? "TX" : "RX");
len = (len + alignment - 1) & (~(alignment - 1)); // up align alignment
uint32_t *temp = heap_caps_aligned_alloc(alignment, len, MALLOC_CAP_DMA);
uint32_t *temp = heap_caps_aligned_alloc(alignment, len, mem_cap);
ESP_RETURN_ON_FALSE_ISR(temp != NULL, ESP_ERR_NO_MEM, SPI_TAG, "Failed to allocate priv %s buffer", is_tx ? "TX" : "RX");
if (is_tx) {
@@ -1231,9 +1209,9 @@ static SPI_MASTER_ISR_ATTR esp_err_t setup_dma_priv_buffer(uint32_t *buffer, uin
}
buffer = temp;
}
#if SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE
ESP_RETURN_ON_ERROR_ISR(esp_cache_msync((void *)buffer, len, is_tx ? ESP_CACHE_MSYNC_FLAG_DIR_C2M : ESP_CACHE_MSYNC_FLAG_DIR_M2C), SPI_TAG, "sync failed for %s buffer", is_tx ? "TX" : "RX");
#endif
esp_err_t ret = esp_cache_msync((void *)buffer, len, is_tx ? (ESP_CACHE_MSYNC_FLAG_DIR_C2M | ESP_CACHE_MSYNC_FLAG_UNALIGNED) : ESP_CACHE_MSYNC_FLAG_DIR_M2C);
// ESP_ERR_NOT_SUPPORTED stands for not cache sync required, it's allowed here
ESP_RETURN_ON_FALSE_ISR((ret == ESP_OK) || (ret == ESP_ERR_NOT_SUPPORTED), ESP_ERR_INVALID_ARG, SPI_TAG, "sync failed for %s buffer", is_tx ? "TX" : "RX");
*ret_buffer = buffer;
return ESP_OK;
}
@@ -1242,36 +1220,22 @@ static SPI_MASTER_ISR_ATTR esp_err_t setup_priv_desc(spi_host_t *host, spi_trans
{
spi_transaction_t *trans_desc = priv_desc->trans;
const spi_bus_attr_t *bus_attr = host->bus_attr;
uint16_t alignment = bus_attr->internal_mem_align_size;
// rx memory assign
uint32_t* rcv_ptr;
if (trans_desc->flags & SPI_TRANS_USE_RXDATA) {
rcv_ptr = (uint32_t *)&trans_desc->rx_data[0];
} else {
//if not use RXDATA neither rx_buffer, buffer_to_rcv assigned to NULL
rcv_ptr = trans_desc->rx_buffer;
}
uint32_t* rcv_ptr = (trans_desc->flags & SPI_TRANS_USE_RXDATA) ? (uint32_t *)trans_desc->rx_data : (uint32_t *)trans_desc->rx_buffer;
// tx memory assign
uint32_t *send_ptr;
if (trans_desc->flags & SPI_TRANS_USE_TXDATA) {
send_ptr = (uint32_t *)&trans_desc->tx_data[0];
} else {
//if not use TXDATA neither tx_buffer, tx data assigned to NULL
send_ptr = (uint32_t *)trans_desc->tx_buffer ;
}
uint32_t *send_ptr = (trans_desc->flags & SPI_TRANS_USE_TXDATA) ? (uint32_t *)trans_desc->tx_data : (uint32_t *)trans_desc->tx_buffer;
esp_err_t ret = ESP_OK;
if (send_ptr && bus_attr->dma_enabled) {
ret = setup_dma_priv_buffer(send_ptr, (trans_desc->length + 7) / 8, alignment, true, trans_desc->flags, &send_ptr);
ret = setup_dma_priv_buffer(host, send_ptr, (trans_desc->length + 7) / 8, true, trans_desc->flags, &send_ptr);
if (ret != ESP_OK) {
goto clean_up;
}
}
if (rcv_ptr && bus_attr->dma_enabled) {
ret = setup_dma_priv_buffer(rcv_ptr, (trans_desc->rxlength + 7) / 8, alignment, false, trans_desc->flags, &rcv_ptr);
ret = setup_dma_priv_buffer(host, rcv_ptr, (trans_desc->rxlength + 7) / 8, false, trans_desc->flags, &rcv_ptr);
if (ret != ESP_OK) {
goto clean_up;
}
@@ -1343,7 +1307,6 @@ esp_err_t SPI_MASTER_ATTR spi_device_get_trans_result(spi_device_handle_t handle
BaseType_t r;
spi_trans_priv_t trans_buf;
SPI_CHECK(handle != NULL, "invalid dev handle", ESP_ERR_INVALID_ARG);
bool use_dma = handle->host->bus_attr->dma_enabled;
//if SPI_DEVICE_NO_RETURN_RESULT is set, ret_queue will always be empty
SPI_CHECK(!(handle->cfg.flags & SPI_DEVICE_NO_RETURN_RESULT), "API not Supported!", ESP_ERR_NOT_SUPPORTED);
@@ -1357,9 +1320,7 @@ esp_err_t SPI_MASTER_ATTR spi_device_get_trans_result(spi_device_handle_t handle
return ESP_ERR_TIMEOUT;
}
//release temporary buffers used by dma
if (use_dma) {
uninstall_priv_desc(&trans_buf);
}
uninstall_priv_desc(&trans_buf);
(*trans_desc) = trans_buf.trans;
return (trans_buf.trans->flags & (SPI_TRANS_DMA_RX_FAIL | SPI_TRANS_DMA_TX_FAIL)) ? ESP_ERR_INVALID_STATE : ESP_OK;
@@ -1514,18 +1475,6 @@ esp_err_t SPI_MASTER_ISR_ATTR spi_device_polling_end(spi_device_handle_t handle,
spi_trans_dma_error_check(host);
uint32_t trans_flags = host->cur_trans_buf.trans->flags; // save the flags before bus_lock release
#if SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE //invalidate here to let user access rx data in post_cb if possible
const spi_bus_attr_t *bus_attr = host->bus_attr;
if (bus_attr->dma_enabled && host->cur_trans_buf.buffer_to_rcv) {
uint16_t alignment = bus_attr->internal_mem_align_size;
uint32_t buffer_byte_len = (host->cur_trans_buf.trans->rxlength + 7) / 8;
buffer_byte_len = (buffer_byte_len + alignment - 1) & (~(alignment - 1));
esp_err_t ret = esp_cache_msync((void *)host->cur_trans_buf.buffer_to_rcv, buffer_byte_len, ESP_CACHE_MSYNC_FLAG_DIR_M2C);
if (ret != ESP_OK) {
return ret;
}
}
#endif
ESP_LOGV(SPI_TAG, "polling trans done");
//deal with the in-flight transaction
spi_post_trans(host);
@@ -1880,20 +1829,16 @@ esp_err_t SPI_MASTER_ATTR spi_device_queue_multi_trans(spi_device_handle_t handl
SPI_CHECK(handle, "Invalid arguments.", ESP_ERR_INVALID_ARG);
SPI_CHECK(SOC_SPI_SCT_SUPPORTED_PERIPH(handle->host->id), "Invalid arguments", ESP_ERR_INVALID_ARG);
SPI_CHECK(handle->host->sct_mode_enabled == 1, "SCT mode isn't enabled", ESP_ERR_INVALID_STATE);
esp_err_t ret = ESP_OK;
uint16_t alignment = handle->host->bus_attr->internal_mem_align_size;
uint32_t *conf_buffer = heap_caps_aligned_alloc(alignment, (trans_num * SOC_SPI_SCT_BUFFER_NUM_MAX * sizeof(uint32_t)), MALLOC_CAP_DMA);
SPI_CHECK(conf_buffer, "No enough memory", ESP_ERR_NO_MEM);
for (int i = 0; i < trans_num; i++) {
ret = check_trans_valid(handle, (spi_transaction_t *)&seg_trans_desc[i]);
if (ret != ESP_OK) {
return ret;
}
ESP_RETURN_ON_ERROR(check_trans_valid(handle, (spi_transaction_t *)&seg_trans_desc[i]), SPI_TAG, "Invalid transaction");
}
SPI_CHECK(!spi_bus_device_is_polling(handle), "Cannot queue new transaction while previous polling transaction is not terminated.", ESP_ERR_INVALID_STATE);
uint32_t *conf_buffer = heap_caps_malloc(trans_num * SOC_SPI_SCT_BUFFER_NUM_MAX * sizeof(uint32_t), MALLOC_CAP_DMA);
SPI_CHECK(conf_buffer, "No enough memory", ESP_ERR_NO_MEM);
spi_hal_context_t *hal = &handle->host->hal;
s_sct_init_conf_buffer(hal, conf_buffer, trans_num);

View File

@@ -1727,6 +1727,7 @@ static IRAM_ATTR void test_master_iram(void)
spi_device_handle_t dev_handle = {0};
spi_device_interface_config_t devcfg = SPI_DEVICE_TEST_DEFAULT_CONFIG();
devcfg.cs_ena_pretrans = 1;
devcfg.post_cb = test_master_iram_post_trans_cbk;
TEST_ESP_OK(spi_bus_add_device(TEST_SPI_HOST, &devcfg, &dev_handle));
@@ -1974,27 +1975,32 @@ TEST_CASE("test_spi_master_auto_sleep_retention", "[spi]")
#if CONFIG_SPIRAM && SOC_PSRAM_DMA_CAPABLE
#define TEST_EDMA_PSRAM_TRANS_NUM 5
#define TEST_EDMA_TRANS_LEN 20480 //Use PSRAM need a 16/32/64 aligned buffer len
#define TEST_EDMA_TRANS_LEN 20000
#define TEST_EDMA_BUFFER_SZ (TEST_EDMA_PSRAM_TRANS_NUM * TEST_EDMA_TRANS_LEN)
void test_spi_psram_trans(spi_device_handle_t dev_handle, void *tx, void *rx)
{
spi_transaction_t trans_cfg = {
.length = TEST_EDMA_TRANS_LEN * 8,
.flags = SPI_TRANS_DMA_USE_PSRAM,
.tx_buffer = tx,
.rx_buffer = rx,
};
int trans_len = TEST_EDMA_TRANS_LEN - TEST_EDMA_PSRAM_TRANS_NUM / 2;
for (uint8_t cnt = 0; cnt < TEST_EDMA_PSRAM_TRANS_NUM; cnt ++) {
trans_cfg.tx_buffer = tx + TEST_EDMA_TRANS_LEN * cnt;
trans_cfg.rx_buffer = rx + TEST_EDMA_TRANS_LEN * cnt;
trans_cfg.length = trans_len * 8;
trans_cfg.rxlength = trans_len * 8;
trans_cfg.flags = (cnt % 2) ? 0 : SPI_TRANS_DMA_USE_PSRAM;
// To use psram, hardware will pass data through MSPI and GDMA to GPSPI, which need some time
// GPSPI bandwidth(speed * line_num) should always no more than PSRAM bandwidth
trans_cfg.override_freq_hz = (CONFIG_SPIRAM_SPEED / 4) * 1000 * 1000;
printf("%d TX %p RX %p len %d @%ld kHz\n", cnt, trans_cfg.tx_buffer, trans_cfg.rx_buffer, TEST_EDMA_TRANS_LEN, trans_cfg.override_freq_hz / 1000);
printf("%d TX %p RX %p len %d @%ld kHz\n", cnt, trans_cfg.tx_buffer, trans_cfg.rx_buffer, trans_len, trans_cfg.override_freq_hz / 1000);
TEST_ESP_OK(spi_device_transmit(dev_handle, &trans_cfg));
TEST_ASSERT(!(trans_cfg.flags & (SPI_TRANS_DMA_RX_FAIL | SPI_TRANS_DMA_TX_FAIL)));
spitest_cmp_or_dump(trans_cfg.tx_buffer, trans_cfg.rx_buffer, TEST_EDMA_TRANS_LEN);
spitest_cmp_or_dump(trans_cfg.tx_buffer, trans_cfg.rx_buffer, trans_len);
trans_cfg.tx_buffer += trans_len;
trans_cfg.rx_buffer += trans_len;
trans_len ++;
}
}
@@ -2005,17 +2011,16 @@ TEST_CASE("SPI_Master: PSRAM buffer transaction via EDMA", "[spi]")
buscfg.max_transfer_sz = TEST_EDMA_BUFFER_SZ;
TEST_ESP_OK(spi_bus_initialize(TEST_SPI_HOST, &buscfg, SPI_DMA_CH_AUTO));
spi_device_handle_t dev_handle;
spi_device_handle_t dev_handle = NULL;
spi_device_interface_config_t devcfg = SPI_DEVICE_TEST_DEFAULT_CONFIG();
devcfg.clock_speed_hz = 80 * 1000 * 1000; // Test error case on highest freq first
TEST_ESP_OK(spi_bus_add_device(TEST_SPI_HOST, &devcfg, &dev_handle));
int real_freq_khz;
spi_device_get_actual_freq(dev_handle, &real_freq_khz);
uint32_t cache_width = cache_hal_get_cache_line_size(CACHE_LL_LEVEL_EXT_MEM, CACHE_TYPE_DATA);
uint8_t *internal_1 = heap_caps_aligned_calloc(cache_width, 1, TEST_EDMA_BUFFER_SZ, MALLOC_CAP_INTERNAL);
uint8_t *external_1 = heap_caps_aligned_calloc(cache_width, 1, TEST_EDMA_BUFFER_SZ, MALLOC_CAP_SPIRAM);
uint8_t *external_2 = heap_caps_aligned_calloc(cache_width, 1, TEST_EDMA_BUFFER_SZ, MALLOC_CAP_SPIRAM);
uint8_t *internal_1 = heap_caps_calloc(1, TEST_EDMA_BUFFER_SZ, MALLOC_CAP_INTERNAL);
uint8_t *external_1 = heap_caps_calloc(1, TEST_EDMA_BUFFER_SZ, MALLOC_CAP_SPIRAM);
uint8_t *external_2 = heap_caps_calloc(1, TEST_EDMA_BUFFER_SZ, MALLOC_CAP_SPIRAM);
test_fill_random_to_buffers_dualboard(1001, internal_1, external_2, TEST_EDMA_BUFFER_SZ);
printf("Test error case: High freq @%d kHz\n", real_freq_khz);
@@ -2025,31 +2030,32 @@ TEST_CASE("SPI_Master: PSRAM buffer transaction via EDMA", "[spi]")
.rx_buffer = external_1,
};
// also test on polling API, and automalloc mechanism
for (uint8_t i = 0; i < 2; i++) {
printf("\n==== %s ====\n", i ? "EDMA" : "Auto Malloc");
trans_cfg.flags = i ? SPI_TRANS_DMA_USE_PSRAM : 0;
uint32_t before = esp_get_free_heap_size();
spi_device_polling_start(dev_handle, &trans_cfg, portMAX_DELAY);
uint32_t after = esp_get_free_heap_size();
printf("\n==== %s ====\n", i ? "EDMA" : "Auto Malloc");
printf("before: %ld, after: %ld, diff: %ld\n", before, after, after - before);
printf("mem_diff: %ld, trans_len: %d\n", after - before, TEST_EDMA_TRANS_LEN);
// rx buffer still potential re-malloc from psram even if SPI_TRANS_DMA_USE_PSRAM is set
TEST_ASSERT(i ? (before - after) < 2 * TEST_EDMA_TRANS_LEN : (before - after) > 2 * TEST_EDMA_TRANS_LEN);
spi_device_polling_end(dev_handle, portMAX_DELAY);
printf("RX fail: %d, TX fail: %d\n", !!(trans_cfg.flags & SPI_TRANS_DMA_RX_FAIL), !!(trans_cfg.flags & SPI_TRANS_DMA_TX_FAIL));
TEST_ASSERT(i ? (before - after) < TEST_EDMA_TRANS_LEN : (before - after) > 2 * TEST_EDMA_TRANS_LEN);
TEST_ASSERT((!!i) == !!(trans_cfg.flags & (SPI_TRANS_DMA_RX_FAIL | SPI_TRANS_DMA_TX_FAIL)));
printf("TX fail: %d, RX fail: %d\n", !!(trans_cfg.flags & SPI_TRANS_DMA_TX_FAIL), !!(trans_cfg.flags & SPI_TRANS_DMA_RX_FAIL));
TEST_ASSERT((!!i) == !!(trans_cfg.flags & (SPI_TRANS_DMA_TX_FAIL | SPI_TRANS_DMA_RX_FAIL)));
if (!i) { // data should be correct if using auto malloc
spitest_cmp_or_dump(trans_cfg.tx_buffer, trans_cfg.rx_buffer, TEST_EDMA_TRANS_LEN);
}
}
printf("\nTest unaligned tx psram buffer\n");
trans_cfg.tx_buffer ++;
TEST_ESP_ERR(ESP_ERR_INVALID_ARG, spi_device_transmit(dev_handle, &trans_cfg));
printf("\nTest trans: internal -> psram\n");
memset(external_1, 0, TEST_EDMA_BUFFER_SZ);
TEST_ESP_OK(esp_cache_msync((void *)external_1, TEST_EDMA_BUFFER_SZ, ESP_CACHE_MSYNC_FLAG_DIR_C2M));
TEST_ESP_OK(esp_cache_msync((void *)external_1, TEST_EDMA_BUFFER_SZ, (ESP_CACHE_MSYNC_FLAG_DIR_C2M | ESP_CACHE_MSYNC_FLAG_UNALIGNED)));
test_spi_psram_trans(dev_handle, internal_1, external_1);
printf("\nTest trans: psram -> psram\n");
memset(external_2, 0, TEST_EDMA_BUFFER_SZ);
TEST_ESP_OK(esp_cache_msync((void *)external_2, TEST_EDMA_BUFFER_SZ, ESP_CACHE_MSYNC_FLAG_DIR_C2M));
TEST_ESP_OK(esp_cache_msync((void *)external_2, TEST_EDMA_BUFFER_SZ, (ESP_CACHE_MSYNC_FLAG_DIR_C2M | ESP_CACHE_MSYNC_FLAG_UNALIGNED)));
test_spi_psram_trans(dev_handle, external_1, external_2);
printf("\nTest trans: psram -> internal\n");

View File

@@ -312,6 +312,7 @@ static void test_slave_iram_master_normal(void)
spi_device_handle_t dev_handle = {0};
spi_device_interface_config_t devcfg = SPI_DEVICE_TEST_DEFAULT_CONFIG();
devcfg.cs_ena_pretrans = 1;
TEST_ESP_OK(spi_bus_add_device(TEST_SPI_HOST, &devcfg, &dev_handle));
uint8_t *master_send = heap_caps_malloc(TEST_BUFFER_SZ, MALLOC_CAP_DMA);

View File

@@ -1184,6 +1184,19 @@ static inline void spi_dma_ll_rx_enable_burst_desc(spi_dma_dev_t *dma_in, uint32
dma_in->dma_conf.indscr_burst_en = enable;
}
/**
* Get the DMA RX alignment requirements
*
* @param dma_dev Beginning address of the DMA peripheral registers.
* @param internal_size The internal memory alignment requirements.
* @param external_size The external memory alignment requirements.
*/
static inline void spi_dma_ll_get_rx_alignment_require(spi_dma_dev_t *dma_dev, uint32_t *internal_size, uint32_t *external_size)
{
*internal_size = 4; // esp32 needs 4 bytes alignment on hardware design
*external_size = UINT32_MAX; // dma of esp32 spi don't support external memory
}
/**
* Reset TX DMA which transmits the data from RAM to a peripheral.
*

View File

@@ -1376,6 +1376,20 @@ static inline uint32_t spi_dma_ll_get_in_suc_eof_desc_addr(spi_dma_dev_t *dma_in
return dma_in->dma_in_suc_eof_des_addr;
}
/**
* Get the DMA RX alignment requirements
*
* @param dma_dev Beginning address of the DMA peripheral registers.
* @param internal_size The internal memory alignment requirements.
* @param external_size The external memory alignment requirements.
*/
static inline void spi_dma_ll_get_rx_alignment_require(spi_dma_dev_t *dma_dev, uint32_t *internal_size, uint32_t *external_size)
{
*internal_size = 4;
// SPI2 supports external memory, SPI3 does not
*external_size = (dma_dev == &GPSPI2) ? 16 << dma_dev->dma_conf.ext_mem_bk_size : UINT32_MAX;
}
//---------------------------------------------------TX-------------------------------------------------//
/**
* Reset TX DMA which transmits the data from RAM to a peripheral.

View File

@@ -358,10 +358,9 @@ The example code for the SPI Master driver can be found in the :example:`periphe
Transactions with Data on PSRAM
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
{IDF_TARGET_NAME} supports GPSPI Master with DMA transferring data from/to PSRAM directly without extra internal copy process, by adding :c:macro:`SPI_TRANS_DMA_USE_PSRAM` flag to the transaction. Some requirements for PSRAM transactions are:
{IDF_TARGET_NAME} supports GPSPI Master with DMA transferring data from/to PSRAM directly without extra internal copy process, which saves memory, by adding :c:macro:`SPI_TRANS_DMA_USE_PSRAM` flag to the transaction.
1. The data memory **address** and **transaction length** must both be aligned to cache length, usually the cache length is 16/32/64 bytes.
2. This feature shares bandwidth with MSPI bus, so GPSPI transfer bandwidth should be less than PSRAM bandwidth, **otherwise transmission data may be lost**. You can check the :c:macro:`SPI_TRANS_DMA_RX_FAIL` and :c:macro:`SPI_TRANS_DMA_TX_FAIL` flags after the transaction is finished to check if error occurs during the transmission.
Note that this feature shares bandwidth (bus frequency * bus bits width) with MSPI bus, so GPSPI transfer bandwidth should be less than PSRAM bandwidth, **otherwise transmission data may be lost**. You can check the return value or :c:macro:`SPI_TRANS_DMA_RX_FAIL` and :c:macro:`SPI_TRANS_DMA_TX_FAIL` flags after the transaction is finished to check if error occurs during the transmission. If the transaction returns :c:macro:`ESP_ERR_INVALID_STATE` error, the transaction fails.
Transactions with Data Not Exceeding 32 Bits
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

View File

@@ -358,10 +358,9 @@ SPI 主机驱动程序的示例代码存放在 ESP-IDF 示例项目的 :example:
使用 PSRAM 的传输事务
^^^^^^^^^^^^^^^^^^^^^^
{IDF_TARGET_NAME} 支持 GPSPI Master 通过 DMA 直接传输 PSRAM 存储的数据而不用内部额外的拷贝过程,在传输配置中添加 :c:macro:`SPI_TRANS_DMA_USE_PSRAM` 标志信号即可使用。使用 PSRAM 传输事务时,请注意以下几点:
{IDF_TARGET_NAME} 支持 GPSPI Master 通过 DMA 直接传输 PSRAM 存储的数据而不用内部额外的零时拷贝,应此可以节省内存,在传输配置中添加 :c:macro:`SPI_TRANS_DMA_USE_PSRAM` 标志信号即可使用。
1. 数据内存 **地址****传输长度** 都需要与 Cache 长度对齐,通常 Cache 长度为 16/32/64 字节
2. 因为该功能与 MSPI 总线共享带宽,因此 GPSPI 传输带宽应小于 PSRAM 带宽,否则 **可能会丢失传输数据**。可通过在传输结束时检查 :c:macro:`SPI_TRANS_DMA_RX_FAIL`:c:macro:`SPI_TRANS_DMA_TX_FAIL` 标志信号来判断传输是否发生了错误。
请注意该功能共享 MSPI 总线带宽(总线频率 * 总线位宽),因此 GPSPI 传输带宽应小于 PSRAM 带宽,否则 **可能会丢失传输数据**。可通过在传输结束时检查返回值或 :c:macro:`SPI_TRANS_DMA_RX_FAIL`:c:macro:`SPI_TRANS_DMA_TX_FAIL` 标志信号来判断传输是否发生了错误。若传输事务返回 :c:macro:`ESP_ERR_INVALID_STATE` 错误,则传输事务失败
传输数据小于 32 位的传输事务
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

View File

@@ -23,4 +23,13 @@ menu "Example Configuration"
in practice the driver chips work fine with a higher clock rate, and using that gives a better framerate.
Select this to try using the out-of-spec clock rate.
config LCD_BUFFER_IN_PSRAM
bool
prompt "Malloc LCD buffer from PSRAM, it can save internal RAM"
depends on SPIRAM && SOC_PSRAM_DMA_CAPABLE
default "y"
help
Driver is now support using PSRAM memory as LCD buffer directly
without additional internal copy, using it is able to save internal
memory space, and without CPU cost.
endmenu

View File

@@ -19,6 +19,10 @@ uint16_t *pixels;
//Grab a rgb16 pixel from the esp32_tiles image
static inline uint16_t get_bgnd_pixel(int x, int y)
{
// Clamp coordinates to valid image bounds
x = (x < 0) ? 0 : (x >= IMAGE_W) ? IMAGE_W - 1 : x;
y = (y < 0) ? 0 : (y >= IMAGE_H) ? IMAGE_H - 1 : y;
//Get color of the pixel on x,y coords
return (uint16_t) * (pixels + (y * IMAGE_W) + x);
}
@@ -26,7 +30,7 @@ static inline uint16_t get_bgnd_pixel(int x, int y)
//This variable is used to detect the next frame.
static int prev_frame = -1;
//Instead of calculating the offsets for each pixel we grab, we pre-calculate the valueswhenever a frame changes, then re-use
//Instead of calculating the offsets for each pixel we grab, we pre-calculate the valueswhenever a frame changes, then reuse
//these as we go through all the pixels in the frame. This is much, much faster.
static int8_t xofs[320], yofs[240];
static int8_t xcomp[320], ycomp[240];

View File

@@ -343,7 +343,11 @@ static void send_lines(spi_device_handle_t spi, int ypos, uint16_t *linedata)
trans[4].tx_data[0] = 0x2C; //memory write
trans[5].tx_buffer = linedata; //finally send the line data
trans[5].length = 320 * 2 * 8 * PARALLEL_LINES; //Data length, in bits
#if CONFIG_LCD_BUFFER_IN_PSRAM
trans[5].flags = SPI_TRANS_DMA_USE_PSRAM; //using PSRAM
#else
trans[5].flags = 0; //undo SPI_TRANS_USE_TXDATA flag
#endif
//Queue all transactions.
for (x = 0; x < 6; x++) {
@@ -375,9 +379,17 @@ static void send_line_finish(spi_device_handle_t spi)
static void display_pretty_colors(spi_device_handle_t spi)
{
uint16_t *lines[2];
#if CONFIG_LCD_BUFFER_IN_PSRAM
uint32_t mem_cap = MALLOC_CAP_SPIRAM | MALLOC_CAP_DMA;
printf("Get LCD buffer from PSRAM\n");
#else
uint32_t mem_cap = MALLOC_CAP_INTERNAL | MALLOC_CAP_DMA;
printf("Get LCD buffer from internal\n");
#endif
//Allocate memory for the pixel buffers
for (int i = 0; i < 2; i++) {
lines[i] = spi_bus_dma_memory_alloc(LCD_HOST, 320 * PARALLEL_LINES * sizeof(uint16_t), 0);
lines[i] = spi_bus_dma_memory_alloc(LCD_HOST, 320 * PARALLEL_LINES * sizeof(uint16_t), mem_cap);
assert(lines[i] != NULL);
}
int frame = 0;