Merge branch 'task/ota-reject' into 'master'

Add option to report 'failed' status on OTA rollback due to MQTT timeout

See merge request app-frameworks/esp-rainmaker!620
This commit is contained in:
Piyush Shah
2026-01-12 13:15:07 +05:30
6 changed files with 132 additions and 4 deletions

View File

@@ -1,5 +1,25 @@
# Changelog
## 1.9.0
### New Features
- Implemented option to report 'failed' status on OTA rollback due to MQTT timeout.
- Enable via `CONFIG_ESP_RMAKER_OTA_ROLLBACK_REPORT_FAILED` in menuconfig.
- When enabled, if a rollback happens because MQTT did not connect within the configured timeout,
the rolled-back firmware will report 'failed' status instead of 'rejected'.
- Ensures backward compatibility: older firmware versions (without this feature) will not
report any status, preventing incorrect 'rejected' reports.
- Only applicable for `OTA_USING_TOPICS` type.
- Implementation details:
- New firmware stores failure reason and job ID in separate NVS keys before rollback.
- Rolled-back firmware reads these keys and reports 'failed' status with appropriate job ID.
- Main job ID key is erased before rollback to prevent old firmware from reporting 'rejected'.
- **WARNING**: Use this option with caution. If the new firmware has issues that cause persistent
MQTT connection failures, enabling this feature may cause the device to toggle between two
firmware versions indefinitely (new firmware boots → MQTT fails → rollback → OTA retry →
new firmware boots again).
## 1.8.9
### New Features

View File

@@ -419,6 +419,20 @@ menu "ESP RainMaker Config"
this wait period (in seconds), the firmware will be marked as invalid and the older
firmware will be booted into.
config ESP_RMAKER_OTA_ROLLBACK_REPORT_FAILED
bool "Report 'failed' status on MQTT timeout rollback"
default n
help
When enabled, if a rollback happens due to MQTT connection timeout, the rolled back firmware
will report 'failed' status instead of 'rejected'. This also ensures backward compatibility
by erasing the job ID before rollback, so older firmware versions (without this feature)
will not report any status.
WARNING: Use this option with caution. If the new firmware has issues that cause persistent
MQTT connection failures, enabling this feature may cause the device to toggle between two
firmware versions indefinitely (new firmware boots → MQTT fails → rollback → OTA retry →
new firmware boots again).
config ESP_RMAKER_OTA_DISABLE_AUTO_REBOOT
bool "Disable auto reboot"
default n

View File

@@ -1,5 +1,5 @@
## IDF Component Manager Manifest File
version: "1.8.9"
version: "1.9.0"
description: ESP RainMaker firmware agent
url: https://github.com/espressif/esp-rainmaker/tree/master/components/esp_rainmaker
repository: https://github.com/espressif/esp-rainmaker.git

View File

@@ -550,6 +550,39 @@ static void esp_ota_rollback(TimerHandle_t handle)
{
ESP_LOGE(TAG, "Could not verify firmware even after %d seconds since boot-up. Rolling back.",
RMAKER_OTA_ROLLBACK_WAIT_PERIOD);
#ifdef CONFIG_ESP_RMAKER_OTA_ROLLBACK_REPORT_FAILED
/* Store failure reason and job_id in separate NVS keys so that after rollback,
* new firmware (with this feature) can report "failed" status with the correct job_id.
* Also erase the main job_id key so that old firmware (without this feature) won't
* report "rejected" status.
*/
nvs_handle nvs_handle_fail;
esp_err_t err = nvs_open_from_partition(ESP_RMAKER_NVS_PART_NAME, RMAKER_OTA_NVS_NAMESPACE, NVS_READWRITE, &nvs_handle_fail);
if (err == ESP_OK) {
/* Store failure reason */
char fail_reason[64];
snprintf(fail_reason, sizeof(fail_reason), "MQTT did not connect within %d seconds", RMAKER_OTA_ROLLBACK_WAIT_PERIOD);
esp_err_t set_err = nvs_set_str(nvs_handle_fail, RMAKER_OTA_FAIL_REASON_NVS_NAME, fail_reason);
if (set_err != ESP_OK) {
ESP_LOGW(TAG, "Failed to store failure reason: %s", esp_err_to_name(set_err));
}
/* Copy job_id to a separate key for new firmware to use */
char job_id[64] = {0};
size_t job_id_len = sizeof(job_id);
if (nvs_get_blob(nvs_handle_fail, RMAKER_OTA_JOB_ID_NVS_NAME, job_id, &job_id_len) == ESP_OK) {
set_err = nvs_set_str(nvs_handle_fail, RMAKER_OTA_FAIL_JOB_ID_NVS_NAME, job_id);
if (set_err != ESP_OK) {
ESP_LOGW(TAG, "Failed to store failure job ID: %s", esp_err_to_name(set_err));
}
}
/* Erase main job_id key to prevent old firmware (without this feature) from reporting "rejected" */
nvs_erase_key(nvs_handle_fail, RMAKER_OTA_JOB_ID_NVS_NAME);
nvs_commit(nvs_handle_fail);
nvs_close(nvs_handle_fail);
}
#endif /* CONFIG_ESP_RMAKER_OTA_ROLLBACK_REPORT_FAILED */
esp_rmaker_ota_mark_invalid();
}
@@ -617,9 +650,11 @@ static void esp_rmaker_ota_manage_rollback(esp_rmaker_ota_t *ota)
if (ota->validation_in_progress) {
ota->rolled_back = true;
esp_rmaker_erase_rollback_flag();
if (ota->type == OTA_USING_PARAMS) {
/* Calling this only for OTA_USING_PARAMS, because for OTA_USING_TOPICS,
* the work queue function will manage the status reporting later.
/* For OTA_USING_PARAMS, just report "rejected" as it doesn't use job IDs.
* For OTA_USING_TOPICS, the work queue function will handle reading
* failure info from NVS and reporting appropriate status.
*/
esp_rmaker_ota_report_status((esp_rmaker_ota_handle_t )ota,
OTA_STATUS_REJECTED, "Firmware rolled back");

View File

@@ -22,6 +22,8 @@ extern "C" {
#define RMAKER_OTA_NVS_NAMESPACE "rmaker_ota"
#define RMAKER_OTA_JOB_ID_NVS_NAME "rmaker_ota_id"
#define RMAKER_OTA_UPDATE_FLAG_NVS_NAME "ota_update"
#define RMAKER_OTA_FAIL_REASON_NVS_NAME "ota_fail_rsn"
#define RMAKER_OTA_FAIL_JOB_ID_NVS_NAME "ota_fail_jid"
#define RMAKER_OTA_FETCH_DELAY 5
#if defined(CONFIG_BT_ENABLED) && !defined(CONFIG_IDF_TARGET_ESP32P4)

View File

@@ -548,7 +548,64 @@ static void esp_rmaker_ota_work_fn(void *priv_data)
esp_rmaker_ota_t *ota = (esp_rmaker_ota_t *)priv_data;
/* If the firmware was rolled back, indicate that first */
if (ota->rolled_back) {
esp_rmaker_ota_report_status((esp_rmaker_ota_handle_t )ota, OTA_STATUS_REJECTED, "Firmware rolled back");
char *rollback_fail_job_id = NULL;
char *rollback_fail_reason = NULL;
/* Check if there's a failure job_id stored (from new firmware with ROLLBACK_REPORT_FAILED enabled).
* The presence of ota_fail_jid is the primary indicator that we should report "failed" instead of "rejected".
*/
nvs_handle nvs_handle_fail;
esp_err_t err = nvs_open_from_partition(ESP_RMAKER_NVS_PART_NAME, RMAKER_OTA_NVS_NAMESPACE, NVS_READWRITE, &nvs_handle_fail);
if (err == ESP_OK) {
size_t job_id_len = 0;
if (nvs_get_str(nvs_handle_fail, RMAKER_OTA_FAIL_JOB_ID_NVS_NAME, NULL, &job_id_len) == ESP_OK) {
rollback_fail_job_id = MEM_CALLOC_EXTRAM(1, job_id_len);
if (rollback_fail_job_id) {
nvs_get_str(nvs_handle_fail, RMAKER_OTA_FAIL_JOB_ID_NVS_NAME, rollback_fail_job_id, &job_id_len);
ESP_LOGI(TAG, "Rollback failure job_id found: %s", rollback_fail_job_id);
}
/* Also try to read the failure reason if available */
size_t fail_reason_len = 0;
if (nvs_get_str(nvs_handle_fail, RMAKER_OTA_FAIL_REASON_NVS_NAME, NULL, &fail_reason_len) == ESP_OK) {
rollback_fail_reason = MEM_CALLOC_EXTRAM(1, fail_reason_len);
if (rollback_fail_reason) {
nvs_get_str(nvs_handle_fail, RMAKER_OTA_FAIL_REASON_NVS_NAME, rollback_fail_reason, &fail_reason_len);
ESP_LOGI(TAG, "Rollback failure reason found: %s", rollback_fail_reason);
}
}
}
nvs_close(nvs_handle_fail);
}
if (rollback_fail_job_id) {
/* Report "failed" - presence of rollback_fail_job_id indicates new firmware had the feature enabled */
ota->transient_priv = rollback_fail_job_id;
/* Use stored reason if available, else use default */
const char *fail_reason = rollback_fail_reason ?
rollback_fail_reason : "Firmware verification failed";
esp_rmaker_ota_report_status((esp_rmaker_ota_handle_t )ota, OTA_STATUS_FAILED, (char *)fail_reason);
/* Erase NVS keys after successful reporting */
err = nvs_open_from_partition(ESP_RMAKER_NVS_PART_NAME, RMAKER_OTA_NVS_NAMESPACE, NVS_READWRITE, &nvs_handle_fail);
if (err == ESP_OK) {
nvs_erase_key(nvs_handle_fail, RMAKER_OTA_FAIL_JOB_ID_NVS_NAME);
nvs_erase_key(nvs_handle_fail, RMAKER_OTA_FAIL_REASON_NVS_NAME);
/* Also erase the main job_id key for safety, in case it wasn't erased before rollback */
nvs_erase_key(nvs_handle_fail, RMAKER_OTA_JOB_ID_NVS_NAME);
nvs_commit(nvs_handle_fail);
nvs_close(nvs_handle_fail);
}
/* Clean up memory */
free(rollback_fail_job_id);
if (rollback_fail_reason) {
free(rollback_fail_reason);
}
ota->transient_priv = NULL;
} else {
esp_rmaker_ota_report_status((esp_rmaker_ota_handle_t )ota, OTA_STATUS_REJECTED, "Firmware rolled back");
}
ota->rolled_back = false;
}
esp_rmaker_ota_subscribe(priv_data);