make ReconcilePrometheusRule

Change-Id: I3c045c8c6749d9cf4e3d316c1604453cf38c2d0c
This commit is contained in:
okozachenko 2020-04-14 13:25:36 -07:00
parent 9b5c71c6a5
commit bcfb771242
3 changed files with 118 additions and 95 deletions

View File

@ -134,37 +134,9 @@ func (r *McrouterReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {
}
// Alertrule
alertRule := &monitoringv1.PrometheusRule{
ObjectMeta: metav1.ObjectMeta{
Namespace: req.Namespace,
Name: "mcrouter-alertrule",
},
if res, err := r.ReconcilePrometheusRule(ctx, req, &mcrouter, log, typeLabels); err != nil || res != (ctrl.Result{}) {
return res, err
}
op, err = k8sutils.CreateOrUpdate(ctx, r, alertRule, func() error {
return builders.PrometheusRule(alertRule, &mcrouter, r.Scheme).
Labels(typeLabels).
RuleGroups(builders.RuleGroup().
Name("mcrouter-rule").
Rules(
builders.Rule().
Alert("McrouterBackendDown").
Message("Backend Memcached servers are down.").
Priority(1).
Expr("mcrouter_servers{state='down'}!=0"),
builders.Rule().
Alert("McrouterBackendTimeout").
Message("Backend Memcached servers are timeout.").
Priority(1).
Expr("mcrouter_server_memcached_timeout_count>0"),
).
Interval("1m")).
Build()
})
if err != nil {
return ctrl.Result{}, err
}
log.WithValues("resource", "mcrouter-alertrule").WithValues("op", op).Info("Reconciled")
// Service
if res, err := r.ReconcileService(ctx, req, &mcrouter, log, labels); err != nil || res != (ctrl.Result{}) {
@ -239,3 +211,38 @@ func (r *McrouterReconciler) ReconcilePodMonitor(ctx context.Context, req ctrl.R
log.WithValues("resource", "mcrouter-podmonitor").WithValues("op", op).Info("Reconciled")
return ctrl.Result{}, nil
}
// ReconcilePrometheusRule reconciles the prometheusRule
func (r *McrouterReconciler) ReconcilePrometheusRule(ctx context.Context, req ctrl.Request, mcrouter *infrastructurev1alpha1.Mcrouter, log logr.Logger, labels map[string]string) (ctrl.Result, error) {
alertRule := &monitoringv1.PrometheusRule{
ObjectMeta: metav1.ObjectMeta{
Namespace: req.Namespace,
Name: "mcrouter-alertrule",
},
}
op, err := k8sutils.CreateOrUpdate(ctx, r, alertRule, func() error {
return builders.PrometheusRule(alertRule, mcrouter, r.Scheme).
Labels(labels).
RuleGroups(builders.RuleGroup().
Name("mcrouter-rule").
Rules(
builders.Rule().
Alert("McrouterBackendDown").
Message("Backend Memcached servers are down.").
Priority(1).
Expr("mcrouter_servers{state='down'}!=0"),
builders.Rule().
Alert("McrouterBackendTimeout").
Message("Backend Memcached servers are timeout.").
Priority(1).
Expr("mcrouter_server_memcached_timeout_count>0"),
).
Interval("1m")).
Build()
})
if err != nil {
return ctrl.Result{}, err
}
log.WithValues("resource", "mcrouter-alertrule").WithValues("op", op).Info("Reconciled")
return ctrl.Result{}, nil
}

View File

@ -158,33 +158,9 @@ func (r *MemcachedReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {
}
// Alertrule
alertRule := &monitoringv1.PrometheusRule{
ObjectMeta: metav1.ObjectMeta{
Namespace: req.Namespace,
Name: "memcached-alertrule",
},
if res, err := r.ReconcilePrometheusRule(ctx, req, &memcached, log, typeLabels); err != nil || res != (ctrl.Result{}) {
return res, err
}
op, err = k8sutils.CreateOrUpdate(ctx, r, alertRule, func() error {
return builders.PrometheusRule(alertRule, &memcached, r.Scheme).
Labels(typeLabels).
RuleGroups(builders.RuleGroup().
Name("memcached-rule").
Rules(
builders.Rule().
Alert("MemcachedConnectionLimit").
Message("This memcached connection is over max.").
Priority(1).
Expr("memcached_current_connections/memcached_max_connections*100 >90"),
).
Interval("1m")).
Build()
})
if err != nil {
return ctrl.Result{}, err
}
log.WithValues("resource", "memcached-alertrule").WithValues("op", op).Info("Reconciled")
// Make sure that they're sorted so we're idempotent
sort.Strings(servers)
@ -256,3 +232,35 @@ func (r *MemcachedReconciler) ReconcilePodMonitor(ctx context.Context, req ctrl.
log.WithValues("resource", "podmonitor").WithValues("op", op).Info("Reconciled")
return ctrl.Result{}, nil
}
// ReconcilePrometheusRule reconciles the prometheusRule
func (r *MemcachedReconciler) ReconcilePrometheusRule(ctx context.Context, req ctrl.Request, memcached *infrastructurev1alpha1.Memcached, log logr.Logger, labels map[string]string) (ctrl.Result, error) {
alertRule := &monitoringv1.PrometheusRule{
ObjectMeta: metav1.ObjectMeta{
Namespace: req.Namespace,
Name: "memcached-alertrule",
},
}
op, err := k8sutils.CreateOrUpdate(ctx, r, alertRule, func() error {
return builders.PrometheusRule(alertRule, memcached, r.Scheme).
Labels(labels).
RuleGroups(builders.RuleGroup().
Name("memcached-rule").
Rules(
builders.Rule().
Alert("MemcachedConnectionLimit").
Message("This memcached connection is over max.").
Priority(1).
Expr("memcached_current_connections/memcached_max_connections*100 >90"),
).
Interval("1m")).
Build()
})
if err != nil {
return ctrl.Result{}, err
}
log.WithValues("resource", "memcached-alertrule").WithValues("op", op).Info("Reconciled")
return ctrl.Result{}, nil
}

View File

@ -110,46 +110,9 @@ func (r *RabbitmqReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {
}
// Alertrule
alertRule := &monitoringv1.PrometheusRule{
ObjectMeta: metav1.ObjectMeta{
Namespace: req.Namespace,
Name: "rabbitmq-alertrule",
},
if res, err := r.ReconcilePrometheusRule(ctx, req, &Rabbitmq, log, typeLabels); err != nil || res != (ctrl.Result{}) {
return res, err
}
op, err = k8sutils.CreateOrUpdate(ctx, r, alertRule, func() error {
return builders.PrometheusRule(alertRule, &Rabbitmq, r.Scheme).
Labels(typeLabels).
RuleGroups(builders.RuleGroup().
Name("rabbitmq-rule").
Rules(
builders.Rule().
Alert("RabbitmqDown").
Message("Rabbitmq node down.").
Priority(1).
Expr("rabbitmq_up == 0"),
builders.Rule().
Alert("RabbitmqTooManyConnections").
Message("RabbitMQ instance has too many connections.").
Priority(1).
Expr("rabbitmq_connectionsTotal > 1000"),
builders.Rule().
Alert("RabbitmqTooManyMessagesInQueue").
Message("Queue is filling up.").
Priority(1).
Expr("rabbitmq_queue_messages_ready > 1000"),
builders.Rule().
Alert("RabbitmqSlowQueueConsuming").
Message("Queue messages are consumed slowly.").
Priority(1).
Expr("time() - rabbitmq_queue_head_message_timestamp > 60"),
).
Interval("1m")).
Build()
})
if err != nil {
return ctrl.Result{}, err
}
log.WithValues("resource", "rabbitmq-alertrule").WithValues("op", op).Info("Reconciled")
// Service
if res, err := r.ReconcileService(ctx, req, &Rabbitmq, log, labels); err != nil || res != (ctrl.Result{}) {
@ -221,3 +184,48 @@ func (r *RabbitmqReconciler) ReconcilePodMonitor(ctx context.Context, req ctrl.R
log.WithValues("resource", "rabbitmq-podmonitor").WithValues("op", op).Info("Reconciled")
return ctrl.Result{}, nil
}
// ReconcilePrometheusRule reconciles the prometheusRule
func (r *RabbitmqReconciler) ReconcilePrometheusRule(ctx context.Context, req ctrl.Request, rabbitmq *infrastructurev1alpha1.Rabbitmq, log logr.Logger, labels map[string]string) (ctrl.Result, error) {
alertRule := &monitoringv1.PrometheusRule{
ObjectMeta: metav1.ObjectMeta{
Namespace: req.Namespace,
Name: "rabbitmq-alertrule",
},
}
op, err := k8sutils.CreateOrUpdate(ctx, r, alertRule, func() error {
return builders.PrometheusRule(alertRule, rabbitmq, r.Scheme).
Labels(labels).
RuleGroups(builders.RuleGroup().
Name("rabbitmq-rule").
Rules(
builders.Rule().
Alert("RabbitmqDown").
Message("Rabbitmq node down.").
Priority(1).
Expr("rabbitmq_up == 0"),
builders.Rule().
Alert("RabbitmqTooManyConnections").
Message("RabbitMQ instance has too many connections.").
Priority(1).
Expr("rabbitmq_connectionsTotal > 1000"),
builders.Rule().
Alert("RabbitmqTooManyMessagesInQueue").
Message("Queue is filling up.").
Priority(1).
Expr("rabbitmq_queue_messages_ready > 1000"),
builders.Rule().
Alert("RabbitmqSlowQueueConsuming").
Message("Queue messages are consumed slowly.").
Priority(1).
Expr("time() - rabbitmq_queue_head_message_timestamp > 60"),
).
Interval("1m")).
Build()
})
if err != nil {
return ctrl.Result{}, err
}
log.WithValues("resource", "rabbitmq-alertrule").WithValues("op", op).Info("Reconciled")
return ctrl.Result{}, nil
}