Adds direct monitoring for sidekiq metrics
This adds diirect monitoring for sidekiq metrics. This is done via sidekiq middleware and a sampler to pull from sidekiqs api.
This commit is contained in:
		
							parent
							
								
									3bb3ac3d53
								
							
						
					
					
						commit
						cfea48dffd
					
				| 
						 | 
				
			
			@ -33,6 +33,7 @@ Sidekiq.configure_server do |config|
 | 
			
		|||
  config.redis = queues_config_hash
 | 
			
		||||
 | 
			
		||||
  config.server_middleware do |chain|
 | 
			
		||||
    chain.add Gitlab::SidekiqMiddleware::Metrics if Settings.monitoring.sidekiq_exporter
 | 
			
		||||
    chain.add Gitlab::SidekiqMiddleware::ArgumentsLogger if ENV['SIDEKIQ_LOG_ARGUMENTS'] && !enable_json_logs
 | 
			
		||||
    chain.add Gitlab::SidekiqMiddleware::MemoryKiller if ENV['SIDEKIQ_MEMORY_KILLER_MAX_RSS']
 | 
			
		||||
    chain.add Gitlab::SidekiqMiddleware::RequestStoreMiddleware unless ENV['SIDEKIQ_REQUEST_STORE'] == '0'
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -0,0 +1,48 @@
 | 
			
		|||
# frozen_string_literal: true
 | 
			
		||||
 | 
			
		||||
module Gitlab
 | 
			
		||||
  module SidekiqMiddleware
 | 
			
		||||
    class Metrics
 | 
			
		||||
      def initialize
 | 
			
		||||
        @metrics = init_metrics
 | 
			
		||||
      end
 | 
			
		||||
 | 
			
		||||
      def call(_worker, job, queue)
 | 
			
		||||
        labels = create_labels(queue)
 | 
			
		||||
        @metrics[:sidekiq_running_jobs].increment(labels, 1)
 | 
			
		||||
 | 
			
		||||
        if job['retry_count'].present?
 | 
			
		||||
          @metrics[:sidekiq_jobs_retried_total].increment(labels, 1)
 | 
			
		||||
        end
 | 
			
		||||
 | 
			
		||||
        realtime = Benchmark.realtime do
 | 
			
		||||
          yield
 | 
			
		||||
        end
 | 
			
		||||
 | 
			
		||||
        @metrics[:sidekiq_jobs_completion_seconds].observe(labels, realtime)
 | 
			
		||||
      rescue Exception # rubocop: disable Lint/RescueException
 | 
			
		||||
        @metrics[:sidekiq_jobs_failed_total].increment(labels, 1)
 | 
			
		||||
        raise
 | 
			
		||||
      ensure
 | 
			
		||||
        @metrics[:sidekiq_running_jobs].increment(labels, -1)
 | 
			
		||||
      end
 | 
			
		||||
 | 
			
		||||
      private
 | 
			
		||||
 | 
			
		||||
      def init_metrics
 | 
			
		||||
        {
 | 
			
		||||
          sidekiq_jobs_completion_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_completion_seconds, 'Seconds to complete sidekiq job'),
 | 
			
		||||
          sidekiq_jobs_failed_total:       ::Gitlab::Metrics.counter(:sidekiq_jobs_failed_total, 'Sidekiq jobs failed'),
 | 
			
		||||
          sidekiq_jobs_retried_total:      ::Gitlab::Metrics.counter(:sidekiq_jobs_retried_total, 'Sidekiq jobs retried'),
 | 
			
		||||
          sidekiq_running_jobs:            ::Gitlab::Metrics.gauge(:sidekiq_running_jobs, 'Number of Sidekiq jobs running', {}, :livesum)
 | 
			
		||||
        }
 | 
			
		||||
      end
 | 
			
		||||
 | 
			
		||||
      def create_labels(queue)
 | 
			
		||||
        {
 | 
			
		||||
          queue: queue
 | 
			
		||||
        }
 | 
			
		||||
      end
 | 
			
		||||
    end
 | 
			
		||||
  end
 | 
			
		||||
end
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,57 @@
 | 
			
		|||
# frozen_string_literal: true
 | 
			
		||||
 | 
			
		||||
require 'spec_helper'
 | 
			
		||||
 | 
			
		||||
describe Gitlab::SidekiqMiddleware::Metrics do
 | 
			
		||||
  describe '#call' do
 | 
			
		||||
    let(:middleware) { described_class.new }
 | 
			
		||||
    let(:worker) { double(:worker) }
 | 
			
		||||
 | 
			
		||||
    let(:completion_seconds_metric) { double('completion seconds metric') }
 | 
			
		||||
    let(:failed_total_metric) { double('failed total metric') }
 | 
			
		||||
    let(:retried_total_metric) { double('retried total metric') }
 | 
			
		||||
    let(:running_jobs_metric) { double('running jobs metric') }
 | 
			
		||||
 | 
			
		||||
    before do
 | 
			
		||||
      allow(Gitlab::Metrics).to receive(:histogram).with(:sidekiq_jobs_completion_seconds, anything).and_return(completion_seconds_metric)
 | 
			
		||||
      allow(Gitlab::Metrics).to receive(:counter).with(:sidekiq_jobs_failed_total, anything).and_return(failed_total_metric)
 | 
			
		||||
      allow(Gitlab::Metrics).to receive(:counter).with(:sidekiq_jobs_retried_total, anything).and_return(retried_total_metric)
 | 
			
		||||
      allow(Gitlab::Metrics).to receive(:gauge).with(:sidekiq_running_jobs, anything, {}, :livesum).and_return(running_jobs_metric)
 | 
			
		||||
 | 
			
		||||
      allow(running_jobs_metric).to receive(:increment)
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    it 'yields block' do
 | 
			
		||||
      allow(completion_seconds_metric).to receive(:observe)
 | 
			
		||||
 | 
			
		||||
      expect { |b| middleware.call(worker, {}, :test, &b) }.to yield_control.once
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    it 'sets metrics' do
 | 
			
		||||
      labels = { queue: :test }
 | 
			
		||||
 | 
			
		||||
      expect(running_jobs_metric).to receive(:increment).with(labels, 1)
 | 
			
		||||
      expect(running_jobs_metric).to receive(:increment).with(labels, -1)
 | 
			
		||||
      expect(completion_seconds_metric).to receive(:observe).with(labels, kind_of(Numeric))
 | 
			
		||||
 | 
			
		||||
      middleware.call(worker, {}, :test) { nil }
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    context 'when job is retried' do
 | 
			
		||||
      it 'sets sidekiq_jobs_retried_total metric' do
 | 
			
		||||
        allow(completion_seconds_metric).to receive(:observe)
 | 
			
		||||
 | 
			
		||||
        expect(retried_total_metric).to receive(:increment)
 | 
			
		||||
 | 
			
		||||
        middleware.call(worker, { 'retry_count' => 1 }, :test) { nil }
 | 
			
		||||
      end
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    context 'when error is raised' do
 | 
			
		||||
      it 'sets sidekiq_jobs_failed_total and reraises' do
 | 
			
		||||
        expect(failed_total_metric).to receive(:increment)
 | 
			
		||||
        expect { middleware.call(worker, {}, :test) { raise } }.to raise_error
 | 
			
		||||
      end
 | 
			
		||||
    end
 | 
			
		||||
  end
 | 
			
		||||
end
 | 
			
		||||
		Loading…
	
		Reference in New Issue