Rule |
State |
Error |
Last Evaluation |
Evaluation Time |
alert: Prometheus
target not responding
expr: up{job!="bashcontainerstats"}
== 0
for: 2m
labels:
severity: critical
annotations:
description: '{{ $labels.job }} at {{ $labels.instance }} has been unreachable for
more than 2 minutes.'
environment: production
summary: Prometheus target at {{ $labels.instance }} is unreachable
|
ok
|
|
14.08s ago
|
321.6us |
alert: Bashcontainerstats
not responding
expr: up{job="bashcontainerstats"}
== 0
for: 2m
labels:
severity: warning
annotations:
description: '{{ $labels.job }} at {{ $labels.instance }} has been unreachable for
more than 2 minutes.'
environment: production
summary: Prometheus target at {{ $labels.instance }} is unreachable
|
ok
|
|
14.08s ago
|
64.04us |
alert: CPU
Load
expr: avg_over_time(node_load1[5m])
> 60 or avg_over_time(node_load[1m]) > 70
for: 1m
labels:
severity: critical
annotations:
description: '{{ $labels.instance }} has a high CPU load.'
environment: production
summary: Instance {{ $labels.instance }} has a high CPU load.
|
ok
|
|
14.08s ago
|
239.1us |
alert: Disk
Usage
expr: (100
- 100 * (node_filesystem_avail_bytes{device!~"by-uuid",device!~"tmpfs",mountpoint="/"}
/ node_filesystem_size_bytes{device!~"by-uuid",device!~"tmpfs",mountpoint="/"}))
> 90
for: 5m
labels:
severity: critical
annotations:
description: '{{ $labels.instance }} has disk usage higher than 90%.'
environment: production
summary: Instance {{ $labels.instance }} has disk usage greater than 90%.
|
ok
|
|
14.08s ago
|
608.6us |
alert: Postgres
DB Connections
expr: avg_over_time(pg_stat_activity_count[1m])
> 280 and avg_over_time(pg_stat_activity_count[15m]) > 275
for: 1m
labels:
severity: warning
annotations:
description: '{{ $labels.instance }} has passed a DB connection threshold.'
environment: production
summary: Instance {{ $labels.instance }} has passed a DB connection threshold.
|
ok
|
|
14.079s ago
|
1.511ms |
alert: RAM
Usage
expr: (100
* (node_memory_MemAvailable_bytes) / (node_memory_MemTotal_bytes + node_memory_SwapTotal_bytes))
< 5
for: 5m
labels:
severity: critical
annotations:
description: '{{ $labels.instance }} has less than 5% available RAM.'
environment: production
summary: Instance {{ $labels.instance }} has less than 5% available RAM.
|
ok
|
|
14.078s ago
|
278.1us |
|
21.974s ago |
1.589ms |
Rule |
State |
Error |
Last Evaluation |
Evaluation Time |
alert: Slow
vendor responses
expr: avg_over_time(ruby_http_request_duration_seconds{app_name="vendor_data_service",controller="requests",quantile="0.99"}[5m])
> 5
for: 1m
labels:
service: vendor_data_service
severity: info
annotations:
description: 99th percentile of responses over the last 5 minutes from vendor_data_service
are taking over 5 seconds
environment: production
summary: Vendor data responses are slow
|
ok
|
|
25.154s ago
|
208.7us |
alert: Account
Service down
expr: absent(ruby_rss{app_name="account_service",type="unicorn_master"})
for: 2m
labels:
service: account_service
severity: critical
annotations:
description: Account service hasn't been detected for 2 minutes
environment: production
summary: Account service is not running
|
ok
|
|
25.153s ago
|
110.6us |
alert: Admin
App down
expr: absent(ruby_rss{app_name="admin_app",type="unicorn_master"})
for: 2m
labels:
service: admin_app
severity: critical
annotations:
description: Admin App hasn't been detected for 2 minutes
environment: production
summary: Admin App is not running
|
ok
|
|
25.153s ago
|
63.89us |
alert: Agent
App down
expr: absent(ruby_rss{app_name="agent_app",type="unicorn_master"})
for: 2m
labels:
service: agent_app
severity: critical
annotations:
description: Agent App hasn't been detected for 2 minutes
environment: production
summary: Agent App is not running
|
ok
|
|
25.153s ago
|
115.7us |
alert: Application
Service down
expr: absent(ruby_rss{app_name="application_service",type="unicorn_master"})
for: 2m
labels:
service: application_service
severity: critical
annotations:
description: Application service hasn't been detected for 2 minutes
environment: production
summary: Application service is not running
|
ok
|
|
25.153s ago
|
122.8us |
alert: Credit
Reporting Service down
expr: absent(ruby_rss{app_name="credit_reporting_service",type="unicorn_master"})
for: 2m
labels:
service: credit_reporting_service
severity: critical
annotations:
description: Credit Reporting service hasn't been detected for 2 minutes
environment: production
summary: Credit Reporting service is not running
|
ok
|
|
25.153s ago
|
400.6us |
alert: Customer
Service down
expr: absent(ruby_rss{app_name="customer_service",type="unicorn_master"})
for: 2m
labels:
service: customer_service
severity: critical
annotations:
description: Customer service hasn't been detected for 2 minutes
environment: production
summary: Customer service is not running
|
ok
|
|
25.153s ago
|
152.8us |
alert: Customer
App down
expr: absent(ruby_rss{app_name="customer_app",type="unicorn_master"})
for: 2m
labels:
service: customer_app
severity: critical
annotations:
description: Customer App hasn't been detected for 2 minutes
environment: production
summary: Customer App is not running
|
ok
|
|
25.153s ago
|
117.5us |
alert: Email
Service down
expr: absent(ruby_rss{app_name="email_service",type="unicorn_master"})
for: 2m
labels:
service: email_service
severity: critical
annotations:
description: Email service hasn't been detected for 2 minutes
environment: production
summary: Email service is not running
|
ok
|
|
25.153s ago
|
95.19us |
alert: Financial
Service down
expr: absent(ruby_rss{app_name="financial_service",type="unicorn_master"})
for: 2m
labels:
service: financial_service
severity: critical
annotations:
description: Financial service hasn't been detected for 2 minutes
environment: production
summary: Financial service is not running
|
ok
|
|
25.153s ago
|
170.8us |
alert: Five9
Service down
expr: absent(ruby_rss{app_name="five9_service",type="unicorn_master"})
for: 2m
labels:
service: five9_service
severity: critical
annotations:
description: Five9 service hasn't been detected for 2 minutes
environment: production
summary: Five9 service is not running
|
ok
|
|
25.153s ago
|
78.37us |
alert: Leads
Service down
expr: absent(ruby_rss{app_name="leads_service",type="unicorn_master"})
for: 2m
labels:
service: leads_service
severity: critical
annotations:
description: Leads service hasn't been detected for 2 minutes
environment: production
summary: Leads service is not running
|
ok
|
|
25.153s ago
|
101.5us |
alert: Payment
Gateway Service down
expr: absent(ruby_rss{app_name="payment_gateway_service",type="unicorn_master"})
for: 2m
labels:
service: payment_gateway_service
severity: critical
annotations:
description: Payment Gateway service hasn't been detected for 2 minutes
environment: production
summary: Payment Gateway service is not running
|
ok
|
|
25.153s ago
|
83.35us |
alert: Scheduler
service down
expr: absent(ruby_rss{app_name="scheduler_service",type="sidekiq"})
for: 1m
labels:
service: scheduler_service
severity: critical
annotations:
description: Scheduler service sidekiq hasn't been detected for 1 minute
environment: production
summary: Scheduler service sidekiq not running
|
ok
|
|
25.153s ago
|
57.93us |
alert: Underwriting
Service down
expr: absent(ruby_rss{app_name="underwriting_service",type="unicorn_master"})
for: 2m
labels:
service: underwriting_service
severity: critical
annotations:
description: Underwriting service hasn't been detected for 2 minutes
environment: production
summary: Underwriting service is not running
|
ok
|
|
25.153s ago
|
115us |
alert: Vendor
Data Service down
expr: absent(ruby_rss{app_name="vendor_data_service",type="unicorn_master"})
for: 2m
labels:
service: vendor_data_service
severity: critical
annotations:
description: Vendor Data service hasn't been detected for 2 minutes
environment: production
summary: Vendor Data service is not running
|
ok
|
|
25.153s ago
|
173.6us |
alert: Vendor
Proxy Service is down
expr: absent(ruby_rss{app_name="vendor_proxy_service",type="unicorn_master"})
for: 2m
labels:
service: vendor_proxy_service
severity: critical
annotations:
description: Vendor Proxy service hasn't been detected for 2 minutes
environment: production
summary: Vendor Proxy service is not running
|
ok
|
|
25.153s ago
|
102.3us |
|
6.928s ago |
4.532ms |