Add Prometheus monitoring function (#1337)

* Code adaptation k8s: service discovery and registration adaptation, configuration adaptation

* Initial submission of the help charts script for openim API

* change the help charts script

* change the help charts script

* change helm chart codes

* change dockerfiles script

* change chart script:add configmap mounts

* change chart script:change repository

* change chart script:msggateway add one service

* change config.yaml

* roll back some config values

* change chart script:change Ingress rule with a rewrite annotation

* add mysql charts scrible

* change chart script:add mysql.config.yaml

* add nfs provisioner charts

* change chart script:add nfs.config.yaml

* add ingress-nginx charts

* change chart script:add ingress-nginx.config.yaml

* add redis &mongodb charts

* add kafka&minio charts

* change chart script:change redis.values.yaml

* change chart script:add redis.config.yaml

* change chart script:change redis.config.yaml

* change chart script:change mongodb.value.yaml

* change chart script:change mongodb.value.yaml

* change chart script:add mongodb.config.yaml

* change chart script:change minio.values.yaml

* change chart script:add minio.config.yaml

* change chart script:change kafka.values.yaml

* change chart script:add kafka.config.yaml

* change chart script:change services.config.yaml

* bug fix:Delete websocket's Port restrictions

* bug fix:change port value

* change chart script:Submit a stable version script

* fix bug:Implement option interface

* fix bug:change K8sDR.Register

* change config.yaml

* change chats script:minio service add ingress

* change chats script:minio service add ingress

* change chats script:kafka.replicaCount=3& change minio.api ingress

* delete change chats script

* change config.yaml

* change openim.yaml

* merge go.sum

* Add monitoring function and struct for Prometheus on gin and GRPC

* Add GRPC and gin server monitoring logic

* Add GRPC and gin server monitoring logic2

* Add GRPC and gin server monitoring logic3

* Add GRPC and gin server monitoring logic4

* Add GRPC and gin server monitoring logic5

* Add GRPC and gin server monitoring logic6

* Add GRPC and gin server monitoring logic7

* delete:old monitoring code

* add for test

* fix bug:change packname

* fix bug:delete getPromPort funciton

* fix bug:delete getPromPort funciton

* fix bug:change logs

* fix bug:change registerName logic in GetGrpcCusMetrics function

* add getPrometheus url api

* fix:config path logic

* fix:prometheus enable function

* fix:prometheus enable function

* fix:transfer Multi process monitoring logic

* del:del not using manifest

* fix:openim-msgtransfer.sh

* fix:openim-msgtransfer.sh

---------

Co-authored-by: lin.huang <lin.huang@apulis.com>
Co-authored-by: Xinwei Xiong <3293172751@qq.com>
This commit is contained in:
xuexihuang
2023-11-07 14:36:56 +08:00
committed by GitHub
parent 297a8db788
commit 82a8f3351f
41 changed files with 760 additions and 720 deletions
+1 -8
View File
@@ -39,7 +39,6 @@ import (
"github.com/OpenIMSDK/tools/mw"
"github.com/openimsdk/open-im-server/v3/pkg/common/config"
"github.com/openimsdk/open-im-server/v3/pkg/common/prome"
"github.com/openimsdk/open-im-server/v3/pkg/rpcclient"
)
@@ -63,13 +62,6 @@ func NewGinRouter(discov discoveryregistry.SvcDiscoveryRegistry, rdb redis.Unive
u := NewUserApi(*userRpc)
m := NewMessageApi(messageRpc, userRpc)
if config.Config.Prometheus.Enable {
prome.NewApiRequestCounter()
prome.NewApiRequestFailedCounter()
prome.NewApiRequestSuccessCounter()
r.Use(prome.PrometheusMiddleware)
r.GET("/metrics", prome.PrometheusHandler())
}
ParseToken := GinParseToken(rdb)
userRouterGroup := r.Group("/user")
{
@@ -151,6 +143,7 @@ func NewGinRouter(discov discoveryregistry.SvcDiscoveryRegistry, rdb redis.Unive
// Third service
thirdGroup := r.Group("/third", ParseToken)
{
thirdGroup.GET("/prometheus", GetPrometheus)
t := NewThirdApi(*thirdRpc)
thirdGroup.POST("/fcm_update_token", t.FcmUpdateToken)
thirdGroup.POST("/set_app_badge", t.SetAppBadge)
+5
View File
@@ -15,6 +15,7 @@
package api
import (
config2 "github.com/openimsdk/open-im-server/v3/pkg/common/config"
"math/rand"
"net/http"
"strconv"
@@ -118,3 +119,7 @@ func (o *ThirdApi) DeleteLogs(c *gin.Context) {
func (o *ThirdApi) SearchLogs(c *gin.Context) {
a2r.Call(third.ThirdClient.SearchLogs, o.Client, c)
}
func GetPrometheus(c *gin.Context) {
c.Redirect(http.StatusFound, config2.Config.Prometheus.PrometheusUrl)
}
+2 -3
View File
@@ -33,7 +33,6 @@ import (
"github.com/OpenIMSDK/tools/utils"
"github.com/openimsdk/open-im-server/v3/pkg/common/config"
"github.com/openimsdk/open-im-server/v3/pkg/common/prome"
"github.com/openimsdk/open-im-server/v3/pkg/common/startrpc"
)
@@ -69,9 +68,10 @@ func (s *Server) SetLongConnServer(LongConnServer LongConnServer) {
s.LongConnServer = LongConnServer
}
func NewServer(rpcPort int, longConnServer LongConnServer) *Server {
func NewServer(rpcPort int, proPort int, longConnServer LongConnServer) *Server {
return &Server{
rpcPort: rpcPort,
prometheusPort: proPort,
LongConnServer: longConnServer,
pushTerminal: []int{constant.IOSPlatformID, constant.AndroidPlatformID},
}
@@ -158,7 +158,6 @@ func (s *Server) SuperGroupOnlineBatchPushOneMsg(
} else {
if utils.IsContainInt(client.PlatformID, s.pushTerminal) {
tempT.OnlinePush = true
prome.Inc(prome.MsgOnlinePushSuccessCounter)
resp = append(resp, temp)
}
}
+1 -1
View File
@@ -41,7 +41,7 @@ func RunWsAndServer(rpcPort, wsPort, prometheusPort int) error {
if err != nil {
return err
}
hubServer := NewServer(rpcPort, longServer)
hubServer := NewServer(rpcPort, prometheusPort, longServer)
go func() {
err := hubServer.Start()
if err != nil {
+3
View File
@@ -17,6 +17,7 @@ package msggateway
import (
"context"
"errors"
"github.com/openimsdk/open-im-server/v3/pkg/common/prom_metrics"
"net/http"
"strconv"
"sync"
@@ -220,6 +221,7 @@ func (ws *WsServer) registerClient(client *Client) {
if !userOK {
ws.clients.Set(client.UserID, client)
log.ZDebug(client.ctx, "user not exist", "userID", client.UserID, "platformID", client.PlatformID)
prom_metrics.OnlineUserGauge.Add(1)
ws.onlineUserNum.Add(1)
ws.onlineUserConnNum.Add(1)
} else {
@@ -364,6 +366,7 @@ func (ws *WsServer) unregisterClient(client *Client) {
isDeleteUser := ws.clients.delete(client.UserID, client.ctx.GetRemoteAddr())
if isDeleteUser {
ws.onlineUserNum.Add(-1)
prom_metrics.OnlineUserGauge.Dec()
}
ws.onlineUserConnNum.Add(-1)
ws.SetUserOnlineStatus(client.ctx, client, constant.Offline)
+24 -18
View File
@@ -15,13 +15,18 @@
package msgtransfer
import (
"errors"
"fmt"
"sync"
"github.com/openimsdk/open-im-server/v3/pkg/common/discovery_register"
"github.com/openimsdk/open-im-server/v3/pkg/common/prom_metrics"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/collectors"
"github.com/prometheus/client_golang/prometheus/promhttp"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
"github.com/openimsdk/open-im-server/v3/pkg/common/discovery_register"
"log"
"net/http"
"sync"
"github.com/OpenIMSDK/tools/mw"
@@ -31,7 +36,6 @@ import (
"github.com/openimsdk/open-im-server/v3/pkg/common/db/relation"
relationtb "github.com/openimsdk/open-im-server/v3/pkg/common/db/table/relation"
"github.com/openimsdk/open-im-server/v3/pkg/common/db/unrelation"
"github.com/openimsdk/open-im-server/v3/pkg/common/prome"
"github.com/openimsdk/open-im-server/v3/pkg/rpcclient"
)
@@ -81,7 +85,6 @@ func StartTransfer(prometheusPort int) error {
conversationRpcClient := rpcclient.NewConversationRpcClient(client)
groupRpcClient := rpcclient.NewGroupRpcClient(client)
msgTransfer := NewMsgTransfer(chatLogDatabase, msgDatabase, &conversationRpcClient, &groupRpcClient)
msgTransfer.initPrometheus()
return msgTransfer.Start(prometheusPort)
}
@@ -95,21 +98,13 @@ func NewMsgTransfer(chatLogDatabase controller.ChatLogDatabase,
}
}
func (m *MsgTransfer) initPrometheus() {
prome.NewSeqGetSuccessCounter()
prome.NewSeqGetFailedCounter()
prome.NewSeqSetSuccessCounter()
prome.NewSeqSetFailedCounter()
prome.NewMsgInsertRedisSuccessCounter()
prome.NewMsgInsertRedisFailedCounter()
prome.NewMsgInsertMongoSuccessCounter()
prome.NewMsgInsertMongoFailedCounter()
}
func (m *MsgTransfer) Start(prometheusPort int) error {
var wg sync.WaitGroup
wg.Add(1)
fmt.Println("start msg transfer", "prometheusPort:", prometheusPort)
if prometheusPort <= 0 {
return errors.New("prometheusPort not correct")
}
if config.Config.ChatPersistenceMysql {
// go m.persistentCH.persistentConsumerGroup.RegisterHandleAndConsumer(m.persistentCH)
} else {
@@ -118,10 +113,21 @@ func (m *MsgTransfer) Start(prometheusPort int) error {
go m.historyCH.historyConsumerGroup.RegisterHandleAndConsumer(m.historyCH)
go m.historyMongoCH.historyConsumerGroup.RegisterHandleAndConsumer(m.historyMongoCH)
// go m.modifyCH.modifyMsgConsumerGroup.RegisterHandleAndConsumer(m.modifyCH)
err := prome.StartPrometheusSrv(prometheusPort)
/*err := prome.StartPrometheusSrv(prometheusPort)
if err != nil {
return err
}*/
////////////////////////////
if config.Config.Prometheus.Enable {
reg := prometheus.NewRegistry()
reg.MustRegister(
collectors.NewGoCollector(),
)
reg.MustRegister(prom_metrics.GetGrpcCusMetrics("Transfer")...)
http.Handle("/metrics", promhttp.HandlerFor(reg, promhttp.HandlerOpts{Registry: reg}))
log.Fatal(http.ListenAndServe(fmt.Sprintf(":%d", prometheusPort), nil))
}
////////////////////////////////////////
wg.Wait()
return nil
}
@@ -16,6 +16,7 @@ package msgtransfer
import (
"context"
"github.com/openimsdk/open-im-server/v3/pkg/common/prom_metrics"
"github.com/IBM/sarama"
"google.golang.org/protobuf/proto"
@@ -74,6 +75,9 @@ func (mc *OnlineHistoryMongoConsumerHandler) handleChatWs2Mongo(
"conversationID",
msgFromMQ.ConversationID,
)
prom_metrics.MsgInsertMongoFailedCounter.Inc()
} else {
prom_metrics.MsgInsertMongoSuccessCounter.Inc()
}
var seqs []int64
for _, msg := range msgFromMQ.MsgData {
-9
View File
@@ -14,10 +14,6 @@
package push
import (
"github.com/openimsdk/open-im-server/v3/pkg/common/prome"
)
type Consumer struct {
pushCh ConsumerHandler
successCount uint64
@@ -29,11 +25,6 @@ func NewConsumer(pusher *Pusher) *Consumer {
}
}
func (c *Consumer) initPrometheus() {
prome.NewMsgOfflinePushSuccessCounter()
prome.NewMsgOfflinePushFailedCounter()
}
func (c *Consumer) Start() {
// statistics.NewStatistics(&c.successCount, config.Config.ModuleName.PushName, fmt.Sprintf("%d second push to
// msg_gateway count", constant.StatisticsTimeInterval), constant.StatisticsTimeInterval)
-1
View File
@@ -67,7 +67,6 @@ func Start(client discoveryregistry.SvcDiscoveryRegistry, server *grpc.Server) e
go func() {
defer wg.Done()
consumer := NewConsumer(pusher)
consumer.initPrometheus()
consumer.Start()
}()
wg.Wait()
+2 -3
View File
@@ -18,6 +18,7 @@ import (
"context"
"encoding/json"
"errors"
"github.com/openimsdk/open-im-server/v3/pkg/common/prom_metrics"
"github.com/openimsdk/open-im-server/v3/internal/push/offlinepush/dummy"
"github.com/OpenIMSDK/protocol/conversation"
@@ -40,7 +41,6 @@ import (
"github.com/openimsdk/open-im-server/v3/pkg/common/db/cache"
"github.com/openimsdk/open-im-server/v3/pkg/common/db/controller"
"github.com/openimsdk/open-im-server/v3/pkg/common/db/localcache"
"github.com/openimsdk/open-im-server/v3/pkg/common/prome"
"github.com/openimsdk/open-im-server/v3/pkg/rpcclient"
)
@@ -288,10 +288,9 @@ func (p *Pusher) offlinePushMsg(ctx context.Context, conversationID string, msg
}
err = p.offlinePusher.Push(ctx, offlinePushUserIDs, title, content, opts)
if err != nil {
prome.Inc(prome.MsgOfflinePushFailedCounter)
prom_metrics.MsgOfflinePushFailedCounter.Inc()
return err
}
prome.Inc(prome.MsgOfflinePushSuccessCounter)
return nil
}
+2
View File
@@ -16,6 +16,7 @@ package auth
import (
"context"
"github.com/openimsdk/open-im-server/v3/pkg/common/prom_metrics"
"github.com/openimsdk/open-im-server/v3/pkg/authverify"
@@ -73,6 +74,7 @@ func (s *authServer) UserToken(ctx context.Context, req *pbauth.UserTokenReq) (*
if err != nil {
return nil, err
}
prom_metrics.UserLoginCounter.Inc()
resp.Token = token
resp.ExpireTimeSeconds = config.Config.TokenPolicy.Expire * 24 * 60 * 60
return &resp, nil
+6 -11
View File
@@ -16,6 +16,7 @@ package msg
import (
"context"
"github.com/openimsdk/open-im-server/v3/pkg/common/prom_metrics"
"github.com/openimsdk/open-im-server/v3/pkg/msgprocessor"
@@ -28,8 +29,6 @@ import (
"github.com/OpenIMSDK/tools/log"
"github.com/OpenIMSDK/tools/mcontext"
"github.com/OpenIMSDK/tools/utils"
promepkg "github.com/openimsdk/open-im-server/v3/pkg/common/prome"
)
func (m *msgServer) SendMsg(ctx context.Context, req *pbmsg.SendMsgReq) (resp *pbmsg.SendMsgResp, error error) {
@@ -59,9 +58,8 @@ func (m *msgServer) sendMsgSuperGroupChat(
ctx context.Context,
req *pbmsg.SendMsgReq,
) (resp *pbmsg.SendMsgResp, err error) {
promepkg.Inc(promepkg.WorkSuperGroupChatMsgRecvSuccessCounter)
if err = m.messageVerification(ctx, req); err != nil {
promepkg.Inc(promepkg.WorkSuperGroupChatMsgProcessFailedCounter)
prom_metrics.GroupChatMsgProcessFailedCounter.Inc()
return nil, err
}
if err = callbackBeforeSendGroupMsg(ctx, req); err != nil {
@@ -80,7 +78,7 @@ func (m *msgServer) sendMsgSuperGroupChat(
if err = callbackAfterSendGroupMsg(ctx, req); err != nil {
log.ZWarn(ctx, "CallbackAfterSendGroupMsg", err)
}
promepkg.Inc(promepkg.WorkSuperGroupChatMsgProcessSuccessCounter)
prom_metrics.GroupChatMsgProcessSuccessCounter.Inc()
resp = &pbmsg.SendMsgResp{}
resp.SendTime = req.MsgData.SendTime
resp.ServerMsgID = req.MsgData.ServerMsgID
@@ -133,9 +131,7 @@ func (m *msgServer) sendMsgNotification(
ctx context.Context,
req *pbmsg.SendMsgReq,
) (resp *pbmsg.SendMsgResp, err error) {
promepkg.Inc(promepkg.SingleChatMsgRecvSuccessCounter)
if err := m.MsgDatabase.MsgToMQ(ctx, utils.GenConversationUniqueKeyForSingle(req.MsgData.SendID, req.MsgData.RecvID), req.MsgData); err != nil {
promepkg.Inc(promepkg.SingleChatMsgProcessFailedCounter)
return nil, err
}
resp = &pbmsg.SendMsgResp{
@@ -147,7 +143,6 @@ func (m *msgServer) sendMsgNotification(
}
func (m *msgServer) sendMsgSingleChat(ctx context.Context, req *pbmsg.SendMsgReq) (resp *pbmsg.SendMsgResp, err error) {
promepkg.Inc(promepkg.SingleChatMsgRecvSuccessCounter)
if err := m.messageVerification(ctx, req); err != nil {
return nil, err
}
@@ -166,7 +161,7 @@ func (m *msgServer) sendMsgSingleChat(ctx context.Context, req *pbmsg.SendMsgReq
}
}
if !isSend {
promepkg.Inc(promepkg.SingleChatMsgProcessFailedCounter)
prom_metrics.SingleChatMsgProcessFailedCounter.Inc()
return nil, nil
} else {
if err = callbackBeforeSendSingleMsg(ctx, req); err != nil {
@@ -176,7 +171,7 @@ func (m *msgServer) sendMsgSingleChat(ctx context.Context, req *pbmsg.SendMsgReq
return nil, err
}
if err := m.MsgDatabase.MsgToMQ(ctx, utils.GenConversationUniqueKeyForSingle(req.MsgData.SendID, req.MsgData.RecvID), req.MsgData); err != nil {
promepkg.Inc(promepkg.SingleChatMsgProcessFailedCounter)
prom_metrics.SingleChatMsgProcessFailedCounter.Inc()
return nil, err
}
err = callbackAfterSendSingleMsg(ctx, req)
@@ -188,7 +183,7 @@ func (m *msgServer) sendMsgSingleChat(ctx context.Context, req *pbmsg.SendMsgReq
ClientMsgID: req.MsgData.ClientMsgID,
SendTime: req.MsgData.SendTime,
}
promepkg.Inc(promepkg.SingleChatMsgProcessSuccessCounter)
prom_metrics.SingleChatMsgProcessSuccessCounter.Inc()
return resp, nil
}
}
-18
View File
@@ -28,7 +28,6 @@ import (
"github.com/openimsdk/open-im-server/v3/pkg/common/db/controller"
"github.com/openimsdk/open-im-server/v3/pkg/common/db/localcache"
"github.com/openimsdk/open-im-server/v3/pkg/common/db/unrelation"
"github.com/openimsdk/open-im-server/v3/pkg/common/prome"
"github.com/openimsdk/open-im-server/v3/pkg/rpcclient"
)
@@ -94,27 +93,10 @@ func Start(client discoveryregistry.SvcDiscoveryRegistry, server *grpc.Server) e
}
s.notificationSender = rpcclient.NewNotificationSender(rpcclient.WithLocalSendMsg(s.SendMsg))
s.addInterceptorHandler(MessageHasReadEnabled)
s.initPrometheus()
msg.RegisterMsgServer(server, s)
return nil
}
func (m *msgServer) initPrometheus() {
prome.NewMsgPullFromRedisSuccessCounter()
prome.NewMsgPullFromRedisFailedCounter()
prome.NewMsgPullFromMongoSuccessCounter()
prome.NewMsgPullFromMongoFailedCounter()
prome.NewSingleChatMsgRecvSuccessCounter()
prome.NewGroupChatMsgRecvSuccessCounter()
prome.NewWorkSuperGroupChatMsgRecvSuccessCounter()
prome.NewSingleChatMsgProcessSuccessCounter()
prome.NewSingleChatMsgProcessFailedCounter()
prome.NewGroupChatMsgProcessSuccessCounter()
prome.NewGroupChatMsgProcessFailedCounter()
prome.NewWorkSuperGroupChatMsgProcessSuccessCounter()
prome.NewWorkSuperGroupChatMsgProcessFailedCounter()
}
func (m *msgServer) conversationAndGetRecvID(conversation *conversation.Conversation, userID string) (recvID string) {
if conversation.ConversationType == constant.SingleChatType ||
conversation.ConversationType == constant.NotificationChatType {