Add connection verification to prevent false success on MQTT failures

- Add wait_for_connection() to test actual MQTT connectivity after start_link
- Use publish_sync with timeout to verify broker is reachable
- Properly fail startup if MQTT broker is unavailable
- Add clear timeout and connection error messages

This fixes the issue where systant reported 'connected successfully'
even when the MQTT broker was unreachable.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
ryan 2025-08-09 19:00:42 -07:00
parent 505aede8d3
commit f965bf5802

View File

@ -36,7 +36,12 @@ defmodule Systant.MqttClient do
case Tortoise.Connection.start_link(connection_opts) do case Tortoise.Connection.start_link(connection_opts) do
{:ok, _pid} -> {:ok, _pid} ->
Logger.info("MQTT client connected successfully") Logger.info("MQTT client process started, verifying connection...")
# Wait a bit to verify the connection actually works
case wait_for_connection(mqtt_config.client_id, 5000) do
:ok ->
Logger.info("MQTT connection verified successfully")
# Send system metrics after a short delay to ensure dashboard is ready # Send system metrics after a short delay to ensure dashboard is ready
startup_delay = Systant.Config.get(app_config, ["general", "startup_delay"]) || 5000 startup_delay = Systant.Config.get(app_config, ["general", "startup_delay"]) || 5000
@ -50,8 +55,17 @@ defmodule Systant.MqttClient do
schedule_stats_publish(mqtt_config.publish_interval) schedule_stats_publish(mqtt_config.publish_interval)
{:ok, state_config} {:ok, state_config}
:timeout ->
Logger.error("MQTT connection timeout - broker at #{mqtt_config.host}:#{mqtt_config.port} is not responding")
{:stop, :connection_timeout}
{:error, reason} -> {:error, reason} ->
Logger.error("Failed to connect to MQTT broker: #{inspect(reason)}") Logger.error("MQTT connection verification failed: #{inspect(reason)}")
{:stop, reason}
end
{:error, reason} ->
Logger.error("Failed to start MQTT client: #{inspect(reason)}")
{:stop, reason} {:stop, reason}
end end
end end
@ -131,4 +145,22 @@ defmodule Systant.MqttClient do
defp schedule_stats_publish(interval) do defp schedule_stats_publish(interval) do
Process.send_after(self(), :publish_stats, interval) Process.send_after(self(), :publish_stats, interval)
end end
defp wait_for_connection(client_id, timeout_ms) do
# Try to publish a test message to verify the connection
test_topic = "systant/connection_test"
test_payload = "test"
case Tortoise.publish_sync(client_id, test_topic, test_payload, qos: 0, timeout: timeout_ms) do
:ok ->
Logger.debug("MQTT connection test successful")
:ok
{:error, :timeout} ->
Logger.error("MQTT connection test timed out")
:timeout
{:error, reason} ->
Logger.error("MQTT connection test failed: #{inspect(reason)}")
{:error, reason}
end
end
end end