-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathnotifications.yaml
More file actions
104 lines (101 loc) · 3.32 KB
/
notifications.yaml
File metadata and controls
104 lines (101 loc) · 3.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
endpoints:
- name: "High CPU Usage Check"
enabled: true
group: "hardware"
url: "http://prometheus.dms.dappnode:9090/api/v1/query?query=100%20*%20sum%20by%28instance%29%20%28rate%28node_cpu_seconds_total%7Bmode!%3D%22idle%22%7D%5B2m%5D%29%29%20%2F%20sum%20by%28instance%29%20%28rate%28node_cpu_seconds_total%5B2m%5D%29%29"
method: "GET"
interval: "30s"
conditions:
- "[BODY].data.result[0].value[1] <= 80"
metric:
min: 0
max: 100
unit: "%"
definition:
title: "Configure your CPU Usage Alert"
description: "Triggers if CPU usage exceeds the limit defined in the condition"
priority: "medium"
correlationId: "dms-cpu"
isBanner: "false"
alerts:
- type: custom
enabled: true
description: "CPU % usage above [CONDITION_VALUE]"
failure-threshold: 2
success-threshold: 1
send-on-resolved: true
- name: "Host Memory Check"
enabled: true
group: "hardware"
url: "http://prometheus.dms.dappnode:9090/api/v1/query?query=100*(1-node_memory_MemAvailable_bytes/node_memory_MemTotal_bytes)"
method: "GET"
interval: "30s"
conditions:
- "[BODY].data.result[0].value[1] <= 90"
metric:
min: 0
max: 100
unit: "%"
definition:
title: "Configure your Memory Usage Alert"
description: "Triggers if memory usage exceeds the limit defined in the condition"
priority: "medium"
correlationId: "dms-memory"
isBanner: "false"
alerts:
- type: custom
enabled: true
description: "Memory % usage above [CONDITION_VALUE]"
failure-threshold: 2
success-threshold: 1
send-on-resolved: true
- name: "Host Disk Space Check"
enabled: true
group: "hardware"
url: "http://prometheus.dms.dappnode:9090/api/v1/query?query=avg((1-node_filesystem_avail_bytes%7Bfstype!~%22^(fuse.*|tmpfs|cifs|nfs)%22%7D/node_filesystem_size_bytes)*100)"
method: "GET"
interval: "30s"
conditions:
- "[BODY].data.result[0].value[1] <= 90"
metric:
min: 0
max: 100
unit: "%"
definition:
title: "Configure your Disk Space Alert"
description: "Triggers if disk usage exceeds the limit defined in the condition"
priority: "high"
correlationId: "dms-disk"
isBanner: "false"
alerts:
- type: custom
enabled: true
description: "Disk % usage above [CONDITION_VALUE]"
failure-threshold: 2
success-threshold: 1
send-on-resolved: true
- name: "Host Temperature Check"
enabled: true
group: "hardware"
url: "http://prometheus.dms.dappnode:9090/api/v1/query?query=avg%28node_hwmon_temp_celsius%7Bchip%3D~%22.*coretemp.*%7C.*18_3%24%7C.*k10temp.*%22%7D%29"
method: "GET"
interval: "30s"
conditions:
- "[BODY].data.result[0].value[1] <= 85"
metric:
min: 25
max: 100
unit: "°C"
definition:
title: "Configure your Temperature Alert"
description: "Triggers if the average node temperature exceeds the defined threshold"
priority: "medium"
correlationId: "dms-temperature"
isBanner: "false"
alerts:
- type: custom
enabled: true
description: "Average node temperature above [CONDITION_VALUE]°C"
failure-threshold: 2
success-threshold: 1
send-on-resolved: true