Alerts


/etc/prometheus/alerts.rules > batteries
GenericBatteryLow (0 active)
alert: GenericBatteryLow
expr: ha_battery_percent{entity!~".*helpful.*",entity!~".*smoke.*",friendly_name!="None"}
  < 20
for: 1d
labels:
  severity: warning
annotations:
  description: '{{ $labels.entity_id }} is below 20% battery'
  summary: '{{ $labels.friendly_name }} battery low'
SmokeDetectorBatteryLow (0 active)
alert: SmokeDetectorBatteryLow
expr: ha_battery_percent{entity=~".*smoke.*",friendly_name!="None"}
  < 78
for: 1d
labels:
  severity: warning
annotations:
  description: It will start beeping soon
  summary: '{{ $labels.friendly_name }} battery low'
/etc/prometheus/alerts.rules > cats
CatsNotFed (0 active)
alert: CatsNotFed
expr: esphome_switch_value{id="cat_feeder"}
  == 0
for: 20h
labels:
  severity: critical
annotations:
  summary: Cats have not been fed for nearly a day, according to esphome!
/etc/prometheus/alerts.rules > envoy
EnvoyDown (0 active)
alert: EnvoyDown
expr: probe_success{instance="https://library.fivesevenfive.org/robots.txt",job="blackbox-http"}
  == 0
for: 10m
labels:
  severity: critical
annotations:
  summary: Envoy is down!
/etc/prometheus/alerts.rules > freezer
FreezerHot (0 active)
alert: FreezerHot
expr: esphome_sensor_value{id="freezer_temp"}
  > 0
for: 1h
labels:
  severity: critical
annotations:
  summary: The chest freezer is above freezing!
/etc/prometheus/alerts.rules > hvac
FurnaceBlowingColdAutomation (0 active)
alert: FurnaceBlowingColdAutomation
expr: (avg(deriv(ha_sensor_temperature{entity=~"sensor.(hall|living_room|office|bedroom|kids_room)_temperature"}[10m]))
  * on (job) max(ha_binary_sensor_state{entity="binary_sensor.heat_call"}))
  < 0
for: 40m
labels:
  severity: automation
annotations:
  description: Calling for heat but it's getting colder!
  summary: Furnace is broken!
FurnaceBlowingColdEmail (0 active)
alert: FurnaceBlowingColdEmail
expr: (avg(deriv(ha_sensor_temperature{entity=~"sensor.(hall|living_room|office|bedroom|kids_room)_temperature"}[15m]))
  * on (job) max(ha_binary_sensor_state{entity="binary_sensor.heat_call"}))
  < 0
for: 1h
labels:
  severity: warning
annotations:
  description: Calling for heat but it's getting colder!
  summary: Furnace is broken!
/etc/prometheus/alerts.rules > scrape
ScrapeDown (0 active)
alert: ScrapeDown
expr: up == 0
for: 30m
labels:
  severity: warning
annotations:
  description: scrape failures for {{ $labels.instance }} via {{ $labels.job }}
  summary: '{{ $labels.instance }} is down'
/etc/prometheus/alerts.rules > ssl
SslExpiresSoon (0 active)
alert: SslExpiresSoon
expr: envoy_server_days_until_first_cert_expiring
  < 14
for: 6h
labels:
  severity: warning
annotations:
  description: Check certbot cronjob
  summary: SSL certificate expiring in 2 weeks
/etc/prometheus/alerts.rules > sunpower
SunpowerPanelDown (0 active)
alert: SunpowerPanelDown
expr: sunpower_pvs_device_state{device_type="Inverter"}
  == 0
for: 1d
labels:
  severity: warning
annotations:
  description: '{{ $labels.device_type }} with id {{ $labels.device_id }} has been
    offline for 1 day.'
  summary: '{{ $labels.device_type }} offline'
SunpowerSupervisorDown (0 active)
alert: SunpowerSupervisorDown
expr: sunpower_pvs_device_state{device_type!="Inverter"}
  == 0
for: 30m
labels:
  severity: critical
annotations:
  description: '{{ $labels.device_type }} with id {{ $labels.device_id }} has been
    offline for 30 minutes'
  summary: '{{ $labels.device_type }} offline'
/etc/prometheus/alerts.rules > ups
UpsRuntimeLow (0 active)
alert: UpsRuntimeLow
expr: nut_battery_runtime_seconds
  < 900
for: 2m
labels:
  severity: critical
annotations:
  description: UPS battery low!
  summary: '{{ $labels.instance }} UPS has <15 minutes of battery remaining'