72 lines
		
	
	
		
			2.3 KiB
		
	
	
	
		
			Ruby
		
	
	
	
			
		
		
	
	
			72 lines
		
	
	
		
			2.3 KiB
		
	
	
	
		
			Ruby
		
	
	
	
# frozen_string_literal: true
 | 
						|
 | 
						|
module Gitlab
 | 
						|
  module Database
 | 
						|
    module Count
 | 
						|
      # A tablesample count executes in two phases:
 | 
						|
      # * Estimate table sizes based on reltuples.
 | 
						|
      # * Based on the estimate:
 | 
						|
      #   * If the table is considered 'small', execute an exact relation count.
 | 
						|
      #   * Otherwise, count on a sample of the table using TABLESAMPLE.
 | 
						|
      #
 | 
						|
      # The size of the sample is chosen in a way that we always roughly scan
 | 
						|
      # the same amount of rows (see TABLESAMPLE_ROW_TARGET).
 | 
						|
      #
 | 
						|
      # There are no guarantees with respect to the accuracy of the result or runtime.
 | 
						|
      class TablesampleCountStrategy < ReltuplesCountStrategy
 | 
						|
        EXACT_COUNT_THRESHOLD = 10_000
 | 
						|
        TABLESAMPLE_ROW_TARGET = 10_000
 | 
						|
 | 
						|
        def count
 | 
						|
          estimates = size_estimates(check_statistics: false)
 | 
						|
 | 
						|
          models.each_with_object({}) do |model, count_by_model|
 | 
						|
            count = perform_count(model, estimates[model])
 | 
						|
            count_by_model[model] = count if count
 | 
						|
          end
 | 
						|
        rescue *CONNECTION_ERRORS
 | 
						|
          {}
 | 
						|
        end
 | 
						|
 | 
						|
        private
 | 
						|
 | 
						|
        def perform_count(model, estimate)
 | 
						|
          # If we estimate 0, we may not have statistics at all. Don't use them.
 | 
						|
          return unless estimate && estimate > 0
 | 
						|
 | 
						|
          if estimate < EXACT_COUNT_THRESHOLD
 | 
						|
            # The table is considered small, the assumption here is that
 | 
						|
            # the exact count will be fast anyways.
 | 
						|
            model.count
 | 
						|
          else
 | 
						|
            # The table is considered large, let's only count on a sample.
 | 
						|
            tablesample_count(model, estimate)
 | 
						|
          end
 | 
						|
        end
 | 
						|
 | 
						|
        def where_clause(model)
 | 
						|
          return unless sti_model?(model)
 | 
						|
 | 
						|
          "WHERE #{model.inheritance_column} = '#{model.name}'"
 | 
						|
        end
 | 
						|
 | 
						|
        def tablesample_count(model, estimate)
 | 
						|
          portion = (TABLESAMPLE_ROW_TARGET.to_f / estimate).round(4)
 | 
						|
          inverse = 1 / portion
 | 
						|
          query = <<~SQL
 | 
						|
            SELECT (COUNT(*)*#{inverse})::integer AS count
 | 
						|
            FROM #{model.table_name}
 | 
						|
            TABLESAMPLE SYSTEM (#{portion * 100})
 | 
						|
            REPEATABLE (0)
 | 
						|
            #{where_clause(model)}
 | 
						|
          SQL
 | 
						|
 | 
						|
          rows = model.connection.select_all(query)
 | 
						|
 | 
						|
          Integer(rows.first['count'])
 | 
						|
        end
 | 
						|
      end
 | 
						|
    end
 | 
						|
  end
 | 
						|
end
 |