Feature middleware (#1476)

* fix:fix error values&logs

* modify: add logs

* feature:add redis io retry logic

* feature:add redis error alert rule

* test:for test alert

* fix:fix prometheus rules

* del:del test code

---------

Co-authored-by: lin.huang <lin.huang@apulis.com>
This commit is contained in:
xuexihuang
2023-11-29 10:41:11 +08:00
committed by GitHub
parent 02142c55b2
commit ceb669dfb8
6 changed files with 82 additions and 57 deletions
+12 -1
View File
@@ -8,4 +8,15 @@ groups:
severity: critical
annotations:
summary: "Instance {{ $labels.instance }} down"
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes."
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes."
- name: database_insert_failure_alerts
rules:
- alert: DatabaseInsertFailed
expr: (increase(msg_insert_redis_failed_total[5m]) > 0) or (increase(msg_insert_mongo_failed_total[5m]) > 0)
for: 1m
labels:
severity: critical
annotations:
summary: "Increase in MsgInsertRedisFailedCounter or MsgInsertMongoFailedCounter detected"
description: "Either MsgInsertRedisFailedCounter or MsgInsertMongoFailedCounter has increased in the last 5 minutes, indicating failures in message insert operations to Redis or MongoDB,maybe the redis or mongodb is crash."