From 1b58dfad316d386cdc8db27640bfb539a11256f3 Mon Sep 17 00:00:00 2001 From: ryan Date: Sat, 9 Aug 2025 19:21:57 -0700 Subject: [PATCH] Implement graceful shutdown for MQTT connection failures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Added connection verification with timeout-based validation on startup - Enhanced error handling to catch Tortoise.publish_sync exceptions - Graceful exit via System.stop(1) prevents erl_crash.dump files - Added comprehensive logging for connection failures and shutdown reasons - Updated CLAUDE.md to document graceful shutdown behavior 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- CLAUDE.md | 2 ++ server/lib/systant/mqtt_client.ex | 37 ++++++++++++++++++++++--------- 2 files changed, 29 insertions(+), 10 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index f23aeb6..ccd0a31 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -74,6 +74,8 @@ This is an Elixir OTP application that serves as a systemd daemon for MQTT-based - Uses hostname-based randomized client ID to avoid conflicts across multiple hosts - Configurable startup delay (default 5 seconds) before first metrics publish - Real-time metrics collection with configurable intervals +- **Connection verification**: Tests MQTT connectivity on startup with timeout-based validation +- **Graceful shutdown**: Exits cleanly via `System.stop(1)` when MQTT broker unavailable (prevents crash dumps) ### Configuration System Systant uses a TOML-based configuration system with environment variable overrides: diff --git a/server/lib/systant/mqtt_client.ex b/server/lib/systant/mqtt_client.ex index a2c5b60..3960e46 100644 --- a/server/lib/systant/mqtt_client.ex +++ b/server/lib/systant/mqtt_client.ex @@ -57,10 +57,14 @@ defmodule Systant.MqttClient do :timeout -> Logger.error("MQTT connection timeout - broker at #{mqtt_config.host}:#{mqtt_config.port} is not responding") + Logger.error("Shutting down systant due to MQTT connection failure") + System.stop(1) {:stop, :connection_timeout} {:error, reason} -> Logger.error("MQTT connection verification failed: #{inspect(reason)}") + Logger.error("Shutting down systant due to MQTT connection failure") + System.stop(1) {:stop, reason} end @@ -151,16 +155,29 @@ defmodule Systant.MqttClient do test_topic = "systant/connection_test" test_payload = "test" - case Tortoise.publish_sync(client_id, test_topic, test_payload, qos: 0, timeout: timeout_ms) do - :ok -> - Logger.debug("MQTT connection test successful") - :ok - {:error, :timeout} -> - Logger.error("MQTT connection test timed out") - :timeout - {:error, reason} -> - Logger.error("MQTT connection test failed: #{inspect(reason)}") - {:error, reason} + try do + case Tortoise.publish_sync(client_id, test_topic, test_payload, qos: 0, timeout: timeout_ms) do + :ok -> + Logger.debug("MQTT connection test successful") + :ok + {:error, :timeout} -> + Logger.error("MQTT connection test timed out") + :timeout + {:error, reason} -> + Logger.error("MQTT connection test failed: #{inspect(reason)}") + {:error, reason} + other -> + Logger.error("MQTT connection test unexpected result: #{inspect(other)}") + {:error, other} + end + rescue + error -> + Logger.error("MQTT connection test exception: #{inspect(error)}") + {:error, :connection_failed} + catch + :exit, reason -> + Logger.error("MQTT connection test exit: #{inspect(reason)}") + {:error, :connection_failed} end end end \ No newline at end of file