diff --git a/components/esp_rainmaker/CHANGELOG.md b/components/esp_rainmaker/CHANGELOG.md index 911d90b..259dafd 100644 --- a/components/esp_rainmaker/CHANGELOG.md +++ b/components/esp_rainmaker/CHANGELOG.md @@ -1,5 +1,25 @@ # Changelog +## 1.9.0 + +### New Features + +- Implemented option to report 'failed' status on OTA rollback due to MQTT timeout. + - Enable via `CONFIG_ESP_RMAKER_OTA_ROLLBACK_REPORT_FAILED` in menuconfig. + - When enabled, if a rollback happens because MQTT did not connect within the configured timeout, + the rolled-back firmware will report 'failed' status instead of 'rejected'. + - Ensures backward compatibility: older firmware versions (without this feature) will not + report any status, preventing incorrect 'rejected' reports. + - Only applicable for `OTA_USING_TOPICS` type. + - Implementation details: + - New firmware stores failure reason and job ID in separate NVS keys before rollback. + - Rolled-back firmware reads these keys and reports 'failed' status with appropriate job ID. + - Main job ID key is erased before rollback to prevent old firmware from reporting 'rejected'. + - **WARNING**: Use this option with caution. If the new firmware has issues that cause persistent + MQTT connection failures, enabling this feature may cause the device to toggle between two + firmware versions indefinitely (new firmware boots → MQTT fails → rollback → OTA retry → + new firmware boots again). + ## 1.8.9 ### New Features diff --git a/components/esp_rainmaker/Kconfig.projbuild b/components/esp_rainmaker/Kconfig.projbuild index 4d77412..ed73cf4 100644 --- a/components/esp_rainmaker/Kconfig.projbuild +++ b/components/esp_rainmaker/Kconfig.projbuild @@ -419,6 +419,20 @@ menu "ESP RainMaker Config" this wait period (in seconds), the firmware will be marked as invalid and the older firmware will be booted into. + config ESP_RMAKER_OTA_ROLLBACK_REPORT_FAILED + bool "Report 'failed' status on MQTT timeout rollback" + default n + help + When enabled, if a rollback happens due to MQTT connection timeout, the rolled back firmware + will report 'failed' status instead of 'rejected'. This also ensures backward compatibility + by erasing the job ID before rollback, so older firmware versions (without this feature) + will not report any status. + + WARNING: Use this option with caution. If the new firmware has issues that cause persistent + MQTT connection failures, enabling this feature may cause the device to toggle between two + firmware versions indefinitely (new firmware boots → MQTT fails → rollback → OTA retry → + new firmware boots again). + config ESP_RMAKER_OTA_DISABLE_AUTO_REBOOT bool "Disable auto reboot" default n diff --git a/components/esp_rainmaker/idf_component.yml b/components/esp_rainmaker/idf_component.yml index 8b80c81..0852404 100644 --- a/components/esp_rainmaker/idf_component.yml +++ b/components/esp_rainmaker/idf_component.yml @@ -1,5 +1,5 @@ ## IDF Component Manager Manifest File -version: "1.8.9" +version: "1.9.0" description: ESP RainMaker firmware agent url: https://github.com/espressif/esp-rainmaker/tree/master/components/esp_rainmaker repository: https://github.com/espressif/esp-rainmaker.git diff --git a/components/esp_rainmaker/src/ota/esp_rmaker_ota.c b/components/esp_rainmaker/src/ota/esp_rmaker_ota.c index 20b6e68..6fd08ca 100644 --- a/components/esp_rainmaker/src/ota/esp_rmaker_ota.c +++ b/components/esp_rainmaker/src/ota/esp_rmaker_ota.c @@ -550,6 +550,39 @@ static void esp_ota_rollback(TimerHandle_t handle) { ESP_LOGE(TAG, "Could not verify firmware even after %d seconds since boot-up. Rolling back.", RMAKER_OTA_ROLLBACK_WAIT_PERIOD); +#ifdef CONFIG_ESP_RMAKER_OTA_ROLLBACK_REPORT_FAILED + /* Store failure reason and job_id in separate NVS keys so that after rollback, + * new firmware (with this feature) can report "failed" status with the correct job_id. + * Also erase the main job_id key so that old firmware (without this feature) won't + * report "rejected" status. + */ + nvs_handle nvs_handle_fail; + esp_err_t err = nvs_open_from_partition(ESP_RMAKER_NVS_PART_NAME, RMAKER_OTA_NVS_NAMESPACE, NVS_READWRITE, &nvs_handle_fail); + if (err == ESP_OK) { + /* Store failure reason */ + char fail_reason[64]; + snprintf(fail_reason, sizeof(fail_reason), "MQTT did not connect within %d seconds", RMAKER_OTA_ROLLBACK_WAIT_PERIOD); + esp_err_t set_err = nvs_set_str(nvs_handle_fail, RMAKER_OTA_FAIL_REASON_NVS_NAME, fail_reason); + if (set_err != ESP_OK) { + ESP_LOGW(TAG, "Failed to store failure reason: %s", esp_err_to_name(set_err)); + } + + /* Copy job_id to a separate key for new firmware to use */ + char job_id[64] = {0}; + size_t job_id_len = sizeof(job_id); + if (nvs_get_blob(nvs_handle_fail, RMAKER_OTA_JOB_ID_NVS_NAME, job_id, &job_id_len) == ESP_OK) { + set_err = nvs_set_str(nvs_handle_fail, RMAKER_OTA_FAIL_JOB_ID_NVS_NAME, job_id); + if (set_err != ESP_OK) { + ESP_LOGW(TAG, "Failed to store failure job ID: %s", esp_err_to_name(set_err)); + } + } + + /* Erase main job_id key to prevent old firmware (without this feature) from reporting "rejected" */ + nvs_erase_key(nvs_handle_fail, RMAKER_OTA_JOB_ID_NVS_NAME); + nvs_commit(nvs_handle_fail); + nvs_close(nvs_handle_fail); + } +#endif /* CONFIG_ESP_RMAKER_OTA_ROLLBACK_REPORT_FAILED */ esp_rmaker_ota_mark_invalid(); } @@ -617,9 +650,11 @@ static void esp_rmaker_ota_manage_rollback(esp_rmaker_ota_t *ota) if (ota->validation_in_progress) { ota->rolled_back = true; esp_rmaker_erase_rollback_flag(); + if (ota->type == OTA_USING_PARAMS) { - /* Calling this only for OTA_USING_PARAMS, because for OTA_USING_TOPICS, - * the work queue function will manage the status reporting later. + /* For OTA_USING_PARAMS, just report "rejected" as it doesn't use job IDs. + * For OTA_USING_TOPICS, the work queue function will handle reading + * failure info from NVS and reporting appropriate status. */ esp_rmaker_ota_report_status((esp_rmaker_ota_handle_t )ota, OTA_STATUS_REJECTED, "Firmware rolled back"); diff --git a/components/esp_rainmaker/src/ota/esp_rmaker_ota_internal.h b/components/esp_rainmaker/src/ota/esp_rmaker_ota_internal.h index 35d4f22..c4a8350 100644 --- a/components/esp_rainmaker/src/ota/esp_rmaker_ota_internal.h +++ b/components/esp_rainmaker/src/ota/esp_rmaker_ota_internal.h @@ -22,6 +22,8 @@ extern "C" { #define RMAKER_OTA_NVS_NAMESPACE "rmaker_ota" #define RMAKER_OTA_JOB_ID_NVS_NAME "rmaker_ota_id" #define RMAKER_OTA_UPDATE_FLAG_NVS_NAME "ota_update" +#define RMAKER_OTA_FAIL_REASON_NVS_NAME "ota_fail_rsn" +#define RMAKER_OTA_FAIL_JOB_ID_NVS_NAME "ota_fail_jid" #define RMAKER_OTA_FETCH_DELAY 5 #if defined(CONFIG_BT_ENABLED) && !defined(CONFIG_IDF_TARGET_ESP32P4) diff --git a/components/esp_rainmaker/src/ota/esp_rmaker_ota_using_topics.c b/components/esp_rainmaker/src/ota/esp_rmaker_ota_using_topics.c index 1fa8c6f..32a0ecb 100644 --- a/components/esp_rainmaker/src/ota/esp_rmaker_ota_using_topics.c +++ b/components/esp_rainmaker/src/ota/esp_rmaker_ota_using_topics.c @@ -548,7 +548,64 @@ static void esp_rmaker_ota_work_fn(void *priv_data) esp_rmaker_ota_t *ota = (esp_rmaker_ota_t *)priv_data; /* If the firmware was rolled back, indicate that first */ if (ota->rolled_back) { - esp_rmaker_ota_report_status((esp_rmaker_ota_handle_t )ota, OTA_STATUS_REJECTED, "Firmware rolled back"); + char *rollback_fail_job_id = NULL; + char *rollback_fail_reason = NULL; + + /* Check if there's a failure job_id stored (from new firmware with ROLLBACK_REPORT_FAILED enabled). + * The presence of ota_fail_jid is the primary indicator that we should report "failed" instead of "rejected". + */ + nvs_handle nvs_handle_fail; + esp_err_t err = nvs_open_from_partition(ESP_RMAKER_NVS_PART_NAME, RMAKER_OTA_NVS_NAMESPACE, NVS_READWRITE, &nvs_handle_fail); + if (err == ESP_OK) { + size_t job_id_len = 0; + if (nvs_get_str(nvs_handle_fail, RMAKER_OTA_FAIL_JOB_ID_NVS_NAME, NULL, &job_id_len) == ESP_OK) { + rollback_fail_job_id = MEM_CALLOC_EXTRAM(1, job_id_len); + if (rollback_fail_job_id) { + nvs_get_str(nvs_handle_fail, RMAKER_OTA_FAIL_JOB_ID_NVS_NAME, rollback_fail_job_id, &job_id_len); + ESP_LOGI(TAG, "Rollback failure job_id found: %s", rollback_fail_job_id); + } + + /* Also try to read the failure reason if available */ + size_t fail_reason_len = 0; + if (nvs_get_str(nvs_handle_fail, RMAKER_OTA_FAIL_REASON_NVS_NAME, NULL, &fail_reason_len) == ESP_OK) { + rollback_fail_reason = MEM_CALLOC_EXTRAM(1, fail_reason_len); + if (rollback_fail_reason) { + nvs_get_str(nvs_handle_fail, RMAKER_OTA_FAIL_REASON_NVS_NAME, rollback_fail_reason, &fail_reason_len); + ESP_LOGI(TAG, "Rollback failure reason found: %s", rollback_fail_reason); + } + } + } + nvs_close(nvs_handle_fail); + } + + if (rollback_fail_job_id) { + /* Report "failed" - presence of rollback_fail_job_id indicates new firmware had the feature enabled */ + ota->transient_priv = rollback_fail_job_id; + /* Use stored reason if available, else use default */ + const char *fail_reason = rollback_fail_reason ? + rollback_fail_reason : "Firmware verification failed"; + esp_rmaker_ota_report_status((esp_rmaker_ota_handle_t )ota, OTA_STATUS_FAILED, (char *)fail_reason); + + /* Erase NVS keys after successful reporting */ + err = nvs_open_from_partition(ESP_RMAKER_NVS_PART_NAME, RMAKER_OTA_NVS_NAMESPACE, NVS_READWRITE, &nvs_handle_fail); + if (err == ESP_OK) { + nvs_erase_key(nvs_handle_fail, RMAKER_OTA_FAIL_JOB_ID_NVS_NAME); + nvs_erase_key(nvs_handle_fail, RMAKER_OTA_FAIL_REASON_NVS_NAME); + /* Also erase the main job_id key for safety, in case it wasn't erased before rollback */ + nvs_erase_key(nvs_handle_fail, RMAKER_OTA_JOB_ID_NVS_NAME); + nvs_commit(nvs_handle_fail); + nvs_close(nvs_handle_fail); + } + + /* Clean up memory */ + free(rollback_fail_job_id); + if (rollback_fail_reason) { + free(rollback_fail_reason); + } + ota->transient_priv = NULL; + } else { + esp_rmaker_ota_report_status((esp_rmaker_ota_handle_t )ota, OTA_STATUS_REJECTED, "Firmware rolled back"); + } ota->rolled_back = false; } esp_rmaker_ota_subscribe(priv_data);