[Heap Trace Standalone] fix terrible Leaks perf on large records by using doubly linked list

This commit is contained in:
Chip Weinberger
2023-01-10 15:36:33 -08:00
parent 0025915dc4
commit 8b2b430d68
2 changed files with 219 additions and 66 deletions

View File

@@ -14,6 +14,7 @@
#include "freertos/FreeRTOS.h" #include "freertos/FreeRTOS.h"
#include "freertos/task.h" #include "freertos/task.h"
#include "esp_memory_utils.h" #include "esp_memory_utils.h"
#include "sys/queue.h"
#define STACK_DEPTH CONFIG_HEAP_TRACING_STACK_DEPTH #define STACK_DEPTH CONFIG_HEAP_TRACING_STACK_DEPTH
@@ -24,15 +25,26 @@ static portMUX_TYPE trace_mux = portMUX_INITIALIZER_UNLOCKED;
static bool tracing; static bool tracing;
static heap_trace_mode_t mode; static heap_trace_mode_t mode;
/* Define struct: linked list of records */
TAILQ_HEAD(heap_trace_record_list_struct_t, heap_trace_record_struct_t);
typedef struct heap_trace_record_list_struct_t heap_trace_record_list_t;
/* Linked List of Records */
typedef struct { typedef struct {
/* Buffer used for records, starting at offset 0 */ /* Buffer used for records. */
heap_trace_record_t *buffer; heap_trace_record_t *buffer;
/* Linked list of recorded allocations */
heap_trace_record_list_t list;
/* Linked list of available record objects */
heap_trace_record_list_t unused;
/* capacity of the buffer */ /* capacity of the buffer */
size_t capacity; size_t capacity;
/* Count of entries logged in the buffer.*/ /* Count of entries logged in the buffer. */
size_t count; size_t count;
/* During execution, we remember the maximum /* During execution, we remember the maximum
@@ -46,8 +58,13 @@ typedef struct {
// Forward Defines // Forward Defines
static void remove_record(records_t *r, int index);
static void heap_trace_dump_base(bool internal_ram, bool psram); static void heap_trace_dump_base(bool internal_ram, bool psram);
static void linked_list_setup();
static void linked_list_remove(heap_trace_record_t* rRemove);
static void linked_list_deep_copy(heap_trace_record_t *rDest, const heap_trace_record_t* rSrc);
static bool linked_list_add(const heap_trace_record_t* rAppend);
static heap_trace_record_t* linked_list_pop_unused();
static heap_trace_record_t* linked_list_find_address_reverse(void* p);
/* The actual records. */ /* The actual records. */
static records_t records; static records_t records;
@@ -64,9 +81,16 @@ esp_err_t heap_trace_init_standalone(heap_trace_record_t *record_buffer, size_t
return ESP_ERR_INVALID_STATE; return ESP_ERR_INVALID_STATE;
} }
if (num_records == 0) {
return ESP_ERR_INVALID_ARG;
}
if (record_buffer == NULL) {
return ESP_ERR_INVALID_ARG;
}
records.buffer = record_buffer; records.buffer = record_buffer;
records.capacity = num_records; records.capacity = num_records;
memset(records.buffer, 0, num_records * sizeof(heap_trace_record_t));
return ESP_OK; return ESP_OK;
} }
@@ -77,14 +101,17 @@ esp_err_t heap_trace_start(heap_trace_mode_t mode_param)
return ESP_ERR_INVALID_STATE; return ESP_ERR_INVALID_STATE;
} }
portENTER_CRITICAL(&trace_mux); portENTER_CRITICAL(&trace_mux);
tracing = false; tracing = false;
mode = mode_param; mode = mode_param;
// clear buffer
memset(records.buffer, 0, sizeof(heap_trace_record_t) * records.capacity);
records.count = 0; records.count = 0;
records.has_overflowed = false; records.has_overflowed = false;
linked_list_setup(&records);
total_allocations = 0; total_allocations = 0;
total_frees = 0; total_frees = 0;
@@ -118,9 +145,9 @@ size_t heap_trace_get_count(void)
return records.count; return records.count;
} }
esp_err_t heap_trace_get(size_t index, heap_trace_record_t *record) esp_err_t heap_trace_get(size_t index, heap_trace_record_t *rOut)
{ {
if (record == NULL) { if (rOut == NULL) {
return ESP_ERR_INVALID_STATE; return ESP_ERR_INVALID_STATE;
} }
@@ -129,9 +156,29 @@ esp_err_t heap_trace_get(size_t index, heap_trace_record_t *record)
portENTER_CRITICAL(&trace_mux); portENTER_CRITICAL(&trace_mux);
if (index >= records.count) { if (index >= records.count) {
result = ESP_ERR_INVALID_ARG; /* out of range for 'count' */ result = ESP_ERR_INVALID_ARG; /* out of range for 'count' */
} else { } else {
memcpy(record, &records.buffer[index], sizeof(heap_trace_record_t));
// Iterate through through the linked list
heap_trace_record_t* rCur = TAILQ_FIRST(&records.list);
for (int i = 0; i < index; i++) {
// this should not happen, since we already
// checked that index >= records.count == false
if (rCur == NULL) {
result = ESP_ERR_INVALID_STATE;
break;
}
rCur = TAILQ_NEXT(rCur, tailq);
}
// copy to destination
memcpy(rOut, rCur, sizeof(heap_trace_record_t));
} }
portEXIT_CRITICAL(&trace_mux); portEXIT_CRITICAL(&trace_mux);
@@ -167,6 +214,8 @@ void heap_trace_dump_caps(const uint32_t caps) {
static void heap_trace_dump_base(bool internal_ram, bool psram) static void heap_trace_dump_base(bool internal_ram, bool psram)
{ {
portENTER_CRITICAL(&trace_mux);
size_t delta_size = 0; size_t delta_size = 0;
size_t delta_allocs = 0; size_t delta_allocs = 0;
size_t start_count = records.count; size_t start_count = records.count;
@@ -174,45 +223,55 @@ static void heap_trace_dump_base(bool internal_ram, bool psram)
printf("====== Heap Trace: %u records (%u capacity) ======\n", printf("====== Heap Trace: %u records (%u capacity) ======\n",
records.count, records.capacity); records.count, records.capacity);
// Iterate through through the linked list
heap_trace_record_t *rCur = TAILQ_FIRST(&records.list);
for (int i = 0; i < records.count; i++) { for (int i = 0; i < records.count; i++) {
heap_trace_record_t *rec = &records.buffer[i]; // check corruption
if (rCur == NULL) {
printf("\nError: heap trace linked list is corrupt. expected more records.\n");
break;
}
bool should_print = rec->address != NULL && bool should_print = rCur->address != NULL &&
((psram && internal_ram) || ((psram && internal_ram) ||
(internal_ram && esp_ptr_internal(rec->address)) || (internal_ram && esp_ptr_internal(rCur->address)) ||
(psram && esp_ptr_external_ram(rec->address))); (psram && esp_ptr_external_ram(rCur->address)));
if (should_print) { if (should_print) {
const char* label = ""; const char* label = "";
if (esp_ptr_internal(rec->address)) { if (esp_ptr_internal(rCur->address)) {
label = ", Internal"; label = ", Internal";
} }
if (esp_ptr_external_ram(rec->address)) { if (esp_ptr_external_ram(rCur->address)) {
label = ", PSRAM"; label = ", PSRAM";
} }
printf("%6d bytes (@ %p%s) allocated CPU %d ccount 0x%08x caller ", printf("%6d bytes (@ %p%s) allocated CPU %d ccount 0x%08x caller ",
rec->size, rec->address, label, rec->ccount & 1, rec->ccount & ~3); rCur->size, rCur->address, label, rCur->ccount & 1, rCur->ccount & ~3);
for (int j = 0; j < STACK_DEPTH && rec->alloced_by[j] != 0; j++) { for (int j = 0; j < STACK_DEPTH && rCur->alloced_by[j] != 0; j++) {
printf("%p%s", rec->alloced_by[j], printf("%p%s", rCur->alloced_by[j],
(j < STACK_DEPTH - 1) ? ":" : ""); (j < STACK_DEPTH - 1) ? ":" : "");
} }
if (mode != HEAP_TRACE_ALL || STACK_DEPTH == 0 || rec->freed_by[0] == NULL) { if (mode != HEAP_TRACE_ALL || STACK_DEPTH == 0 || rCur->freed_by[0] == NULL) {
delta_size += rec->size; delta_size += rCur->size;
delta_allocs++; delta_allocs++;
printf("\n"); printf("\n");
} else { } else {
printf("\nfreed by "); printf("\nfreed by ");
for (int j = 0; j < STACK_DEPTH; j++) { for (int j = 0; j < STACK_DEPTH; j++) {
printf("%p%s", rec->freed_by[j], printf("%p%s", rCur->freed_by[j],
(j < STACK_DEPTH - 1) ? ":" : "\n"); (j < STACK_DEPTH - 1) ? ":" : "\n");
} }
} }
} }
rCur = TAILQ_NEXT(rCur, tailq);
} }
printf("====== Heap Trace Summary ======\n"); printf("====== Heap Trace Summary ======\n");
@@ -239,12 +298,14 @@ static void heap_trace_dump_base(bool internal_ram, bool psram)
printf("(NB: Internal Buffer has overflowed, so trace data is incomplete.)\n"); printf("(NB: Internal Buffer has overflowed, so trace data is incomplete.)\n");
} }
printf("================================\n"); printf("================================\n");
portEXIT_CRITICAL(&trace_mux);
} }
/* Add a new allocation to the heap trace records */ /* Add a new allocation to the heap trace records */
static IRAM_ATTR void record_allocation(const heap_trace_record_t *record) static IRAM_ATTR void record_allocation(const heap_trace_record_t *rAllocation)
{ {
if (!tracing || record->address == NULL) { if (!tracing || rAllocation->address == NULL) {
return; return;
} }
@@ -252,34 +313,19 @@ static IRAM_ATTR void record_allocation(const heap_trace_record_t *record)
if (tracing) { if (tracing) {
// If buffer is full, pop off the oldest
// record to make more space
if (records.count == records.capacity) { if (records.count == records.capacity) {
records.has_overflowed = true; records.has_overflowed = true;
/* Move the whole buffer back one slot. heap_trace_record_t *rFirst = TAILQ_FIRST(&records.list);
This is a bit slow, compared to treating this buffer as a linked_list_remove(rFirst);
ringbuffer and rotating a head pointer.
However, ringbuffer code gets tricky when we remove elements
in mid-buffer (for leak trace mode) while trying to keep
track of an item count that may overflow.
*/
memmove(&records.buffer[0], &records.buffer[1],
sizeof(heap_trace_record_t) * (records.capacity -1));
records.count--;
} }
// Copy new record into place // push onto end of list
memcpy(&records.buffer[records.count], record, sizeof(heap_trace_record_t)); linked_list_add(rAllocation);
records.count++;
// high water mark
if (records.count > records.high_water_mark) {
records.high_water_mark = records.count;
}
total_allocations++; total_allocations++;
} }
@@ -304,25 +350,20 @@ static IRAM_ATTR void record_free(void *p, void **callers)
total_frees++; total_frees++;
/* search backwards for the allocation record matching this free */ // search backwards for the allocation record matching this free
int i = -1; heap_trace_record_t* rFound = linked_list_find_address_reverse(p);
for (i = records.count - 1; i >= 0; i--) {
if (records.buffer[i].address == p) {
break;
}
}
if (i >= 0) { if (rFound) {
if (mode == HEAP_TRACE_ALL) { if (mode == HEAP_TRACE_ALL) {
// add 'freed_by' info to the record // add 'freed_by' info to the record
memcpy(records.buffer[i].freed_by, callers, sizeof(void *) * STACK_DEPTH); memcpy(rFound->freed_by, callers, sizeof(void *) * STACK_DEPTH);
} else { // HEAP_TRACE_LEAKS } else { // HEAP_TRACE_LEAKS
// Leak trace mode, once an allocation is freed // Leak trace mode, once an allocation is freed
// we remove it from the list // we remove it from the list
remove_record(&records, i); linked_list_remove(rFound);
} }
} }
} }
@@ -330,18 +371,126 @@ static IRAM_ATTR void record_free(void *p, void **callers)
portEXIT_CRITICAL(&trace_mux); portEXIT_CRITICAL(&trace_mux);
} }
/* remove the entry at 'index' from the ringbuffer of saved records */ // connect all records into a linked list of 'unused' records
static IRAM_ATTR void remove_record(records_t *r, int index) static void linked_list_setup()
{ {
if (index < r->count - 1) { TAILQ_INIT(&records.list);
// Remove the buffer entry from the list TAILQ_INIT(&records.unused);
memmove(&r->buffer[index], &r->buffer[index+1],
sizeof(heap_trace_record_t) * (r->capacity - index - 1)); for (int i = 0; i < records.capacity; i++) {
} else {
// For last element, just zero it out to avoid ambiguity heap_trace_record_t* rCur = &records.buffer[i];
memset(&r->buffer[index], 0, sizeof(heap_trace_record_t));
TAILQ_INSERT_HEAD(&records.unused, rCur, tailq);
} }
r->count--; }
/* 1. removes record from records.list,
2. places it into records.unused */
static IRAM_ATTR void linked_list_remove(heap_trace_record_t* rRemove)
{
assert(records.count > 0);
// remove from records.list
TAILQ_REMOVE(&records.list, rRemove, tailq);
// set as unused
rRemove->address = 0;
rRemove->size = 0;
// add to records.unused
TAILQ_INSERT_HEAD(&records.unused, rRemove, tailq);
// decrement
records.count--;
}
// pop record from unused list
static IRAM_ATTR heap_trace_record_t* linked_list_pop_unused()
{
// no records left?
if (records.count >= records.capacity) {
return NULL;
}
// get from records.unused
heap_trace_record_t* rUnused = TAILQ_FIRST(&records.unused);
assert(rUnused->address == NULL);
assert(rUnused->size == 0);
// remove from records.unused
TAILQ_REMOVE(&records.unused, rUnused, tailq);
return rUnused;
}
// deep copy a record.
// Note: only copies the *allocation data*, not the next & prev ptrs
static IRAM_ATTR void linked_list_deep_copy(heap_trace_record_t *rDest, const heap_trace_record_t *rSrc)
{
rDest->ccount = rSrc->ccount;
rDest->address = rSrc->address;
rDest->size = rSrc->size;
memcpy(rDest->freed_by, rSrc->freed_by, sizeof(void *) * STACK_DEPTH);
memcpy(rDest->alloced_by, rSrc->alloced_by, sizeof(void *) * STACK_DEPTH);
}
// Append a record to records.list
// Note: This deep copies rAppend
static IRAM_ATTR bool linked_list_add(const heap_trace_record_t *rAppend)
{
if (records.count < records.capacity) {
// get unused record
heap_trace_record_t* rDest = linked_list_pop_unused();
// we checked that there is capacity, so this
// should never be null.
assert(rDest != NULL);
// copy allocation data
linked_list_deep_copy(rDest, rAppend);
// append to records.list
TAILQ_INSERT_TAIL(&records.list, rDest, tailq);
// increment
records.count++;
// high water mark
if (records.count > records.high_water_mark) {
records.high_water_mark = records.count;
}
return true;
} else {
records.has_overflowed = true;
return false;
}
}
// search records.list backwards for the allocation record matching this address
static IRAM_ATTR heap_trace_record_t* linked_list_find_address_reverse(void* p)
{
if (records.count == 0) {
return NULL;
}
heap_trace_record_t* rFound = NULL;
// Perf: We search backwards because new allocations are appended
// to the end of the list and most allocations are short lived.
heap_trace_record_t *rCur = NULL;
TAILQ_FOREACH_REVERSE(rCur, &records.list, heap_trace_record_list_struct_t, tailq) {
if (rCur->address == p) {
rFound = rCur;
break;
}
}
return rFound;
} }
#include "heap_trace.inc" #include "heap_trace.inc"

View File

@@ -6,6 +6,7 @@
#pragma once #pragma once
#include "sdkconfig.h" #include "sdkconfig.h"
#include "sys/queue.h"
#include <stdint.h> #include <stdint.h>
#include <esp_err.h> #include <esp_err.h>
@@ -29,13 +30,16 @@ typedef enum {
/** /**
* @brief Trace record data type. Stores information about an allocated region of memory. * @brief Trace record data type. Stores information about an allocated region of memory.
*/ */
typedef struct { struct heap_trace_record_struct_t {
uint32_t ccount; ///< CCOUNT of the CPU when the allocation was made. LSB (bit value 1) is the CPU number (0 or 1). uint32_t ccount; ///< CCOUNT of the CPU when the allocation was made. LSB (bit value 1) is the CPU number (0 or 1).
void *address; ///< Address which was allocated void *address; ///< Address which was allocated. If NULL, then this record is empty.
size_t size; ///< Size of the allocation size_t size; ///< Size of the allocation
void *alloced_by[CONFIG_HEAP_TRACING_STACK_DEPTH]; ///< Call stack of the caller which allocated the memory. void *alloced_by[CONFIG_HEAP_TRACING_STACK_DEPTH]; ///< Call stack of the caller which allocated the memory.
void *freed_by[CONFIG_HEAP_TRACING_STACK_DEPTH]; ///< Call stack of the caller which freed the memory (all zero if not freed.) void *freed_by[CONFIG_HEAP_TRACING_STACK_DEPTH]; ///< Call stack of the caller which freed the memory (all zero if not freed.)
} heap_trace_record_t; TAILQ_ENTRY(heap_trace_record_struct_t) tailq; ///< Linked list: prev & next records
};
typedef struct heap_trace_record_struct_t heap_trace_record_t;
/** /**
* @brief Stores information about the result of a heap trace. * @brief Stores information about the result of a heap trace.