Merge branch 'active-record-each-batch' into 'master'
Added EachBatch for iterating tables in batches See merge request !12707
This commit is contained in:
commit
7c35ecf7e4
|
|
@ -0,0 +1,81 @@
|
|||
module EachBatch
|
||||
extend ActiveSupport::Concern
|
||||
|
||||
module ClassMethods
|
||||
# Iterates over the rows in a relation in batches, similar to Rails'
|
||||
# `in_batches` but in a more efficient way.
|
||||
#
|
||||
# Unlike `in_batches` provided by Rails this method does not support a
|
||||
# custom start/end range, nor does it provide support for the `load:`
|
||||
# keyword argument.
|
||||
#
|
||||
# This method will yield an ActiveRecord::Relation to the supplied block, or
|
||||
# return an Enumerator if no block is given.
|
||||
#
|
||||
# Example:
|
||||
#
|
||||
# User.each_batch do |relation|
|
||||
# relation.update_all(updated_at: Time.now)
|
||||
# end
|
||||
#
|
||||
# The supplied block is also passed an optional batch index:
|
||||
#
|
||||
# User.each_batch do |relation, index|
|
||||
# puts index # => 1, 2, 3, ...
|
||||
# end
|
||||
#
|
||||
# You can also specify an alternative column to use for ordering the rows:
|
||||
#
|
||||
# User.each_batch(column: :created_at) do |relation|
|
||||
# ...
|
||||
# end
|
||||
#
|
||||
# This will produce SQL queries along the lines of:
|
||||
#
|
||||
# User Load (0.7ms) SELECT "users"."id" FROM "users" WHERE ("users"."id" >= 41654) ORDER BY "users"."id" ASC LIMIT 1 OFFSET 1000
|
||||
# (0.7ms) SELECT COUNT(*) FROM "users" WHERE ("users"."id" >= 41654) AND ("users"."id" < 42687)
|
||||
#
|
||||
# of - The number of rows to retrieve per batch.
|
||||
# column - The column to use for ordering the batches.
|
||||
def each_batch(of: 1000, column: primary_key)
|
||||
unless column
|
||||
raise ArgumentError,
|
||||
'the column: argument must be set to a column name to use for ordering rows'
|
||||
end
|
||||
|
||||
start = except(:select)
|
||||
.select(column)
|
||||
.reorder(column => :asc)
|
||||
.take
|
||||
|
||||
return unless start
|
||||
|
||||
start_id = start[column]
|
||||
arel_table = self.arel_table
|
||||
|
||||
1.step do |index|
|
||||
stop = except(:select)
|
||||
.select(column)
|
||||
.where(arel_table[column].gteq(start_id))
|
||||
.reorder(column => :asc)
|
||||
.offset(of)
|
||||
.limit(1)
|
||||
.take
|
||||
|
||||
relation = where(arel_table[column].gteq(start_id))
|
||||
|
||||
if stop
|
||||
stop_id = stop[column]
|
||||
start_id = stop_id
|
||||
relation = relation.where(arel_table[column].lt(stop_id))
|
||||
end
|
||||
|
||||
# Any ORDER BYs are useless for this relation and can lead to less
|
||||
# efficient UPDATE queries, hence we get rid of it.
|
||||
yield relation.except(:order), index
|
||||
|
||||
break unless stop
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
@ -55,6 +55,7 @@
|
|||
- [Single Table Inheritance](single_table_inheritance.md)
|
||||
- [Background Migrations](background_migrations.md)
|
||||
- [Storing SHA1 Hashes As Binary](sha1_as_binary.md)
|
||||
- [Iterating Tables In Batches](iterating_tables_in_batches.md)
|
||||
|
||||
## i18n
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,37 @@
|
|||
# Iterating Tables In Batches
|
||||
|
||||
Rails provides a method called `in_batches` that can be used to iterate over
|
||||
rows in batches. For example:
|
||||
|
||||
```ruby
|
||||
User.in_batches(of: 10) do |relation|
|
||||
relation.update_all(updated_at: Time.now)
|
||||
end
|
||||
```
|
||||
|
||||
Unfortunately this method is implemented in a way that is not very efficient,
|
||||
both query and memory usage wise.
|
||||
|
||||
To work around this you can include the `EachBatch` module into your models,
|
||||
then use the `each_batch` class method. For example:
|
||||
|
||||
```ruby
|
||||
class User < ActiveRecord::Base
|
||||
include EachBatch
|
||||
end
|
||||
|
||||
User.each_batch(of: 10) do |relation|
|
||||
relation.update_all(updated_at: Time.now)
|
||||
end
|
||||
```
|
||||
|
||||
This will end up producing queries such as:
|
||||
|
||||
```
|
||||
User Load (0.7ms) SELECT "users"."id" FROM "users" WHERE ("users"."id" >= 41654) ORDER BY "users"."id" ASC LIMIT 1 OFFSET 1000
|
||||
(0.7ms) SELECT COUNT(*) FROM "users" WHERE ("users"."id" >= 41654) AND ("users"."id" < 42687)
|
||||
```
|
||||
|
||||
The API of this method is similar to `in_batches`, though it doesn't support
|
||||
all of the arguments that `in_batches` supports. You should always use
|
||||
`each_batch` _unless_ you have a specific need for `in_batches`.
|
||||
|
|
@ -34,7 +34,7 @@ module Gitlab
|
|||
|
||||
write_csv do |csv|
|
||||
ActiveRecord::Base.transaction do
|
||||
User.with_two_factor.in_batches do |relation|
|
||||
User.with_two_factor.in_batches do |relation| # rubocop: disable Cop/InBatches
|
||||
rows = relation.pluck(:id, :encrypted_otp_secret, :encrypted_otp_secret_iv, :encrypted_otp_secret_salt)
|
||||
rows.each do |row|
|
||||
user = %i[id ciphertext iv salt].zip(row).to_h
|
||||
|
|
|
|||
|
|
@ -0,0 +1,16 @@
|
|||
require_relative '../model_helpers'
|
||||
|
||||
module RuboCop
|
||||
module Cop
|
||||
# Cop that prevents the use of `in_batches`
|
||||
class InBatches < RuboCop::Cop::Cop
|
||||
MSG = 'Do not use `in_batches`, use `each_batch` from the EachBatch module instead'.freeze
|
||||
|
||||
def on_send(node)
|
||||
return unless node.children[1] == :in_batches
|
||||
|
||||
add_offense(node, :selector)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
@ -5,6 +5,7 @@ require_relative 'cop/redirect_with_status'
|
|||
require_relative 'cop/polymorphic_associations'
|
||||
require_relative 'cop/project_path_helper'
|
||||
require_relative 'cop/active_record_dependent'
|
||||
require_relative 'cop/in_batches'
|
||||
require_relative 'cop/migration/add_column'
|
||||
require_relative 'cop/migration/add_column_with_default_to_large_table'
|
||||
require_relative 'cop/migration/add_concurrent_foreign_key'
|
||||
|
|
|
|||
|
|
@ -0,0 +1,53 @@
|
|||
require 'spec_helper'
|
||||
|
||||
describe EachBatch do
|
||||
describe '.each_batch' do
|
||||
let(:model) do
|
||||
Class.new(ActiveRecord::Base) do
|
||||
include EachBatch
|
||||
|
||||
self.table_name = 'users'
|
||||
end
|
||||
end
|
||||
|
||||
before do
|
||||
5.times { create(:user, updated_at: 1.day.ago) }
|
||||
end
|
||||
|
||||
it 'yields an ActiveRecord::Relation when a block is given' do
|
||||
model.each_batch do |relation|
|
||||
expect(relation).to be_a_kind_of(ActiveRecord::Relation)
|
||||
end
|
||||
end
|
||||
|
||||
it 'yields a batch index as the second argument' do
|
||||
model.each_batch do |_, index|
|
||||
expect(index).to eq(1)
|
||||
end
|
||||
end
|
||||
|
||||
it 'accepts a custom batch size' do
|
||||
amount = 0
|
||||
|
||||
model.each_batch(of: 1) { amount += 1 }
|
||||
|
||||
expect(amount).to eq(5)
|
||||
end
|
||||
|
||||
it 'does not include ORDER BYs in the yielded relations' do
|
||||
model.each_batch do |relation|
|
||||
expect(relation.to_sql).not_to include('ORDER BY')
|
||||
end
|
||||
end
|
||||
|
||||
it 'allows updating of the yielded relations' do
|
||||
time = Time.now
|
||||
|
||||
model.each_batch do |relation|
|
||||
relation.update_all(updated_at: time)
|
||||
end
|
||||
|
||||
expect(model.where(updated_at: time).count).to eq(5)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
@ -0,0 +1,19 @@
|
|||
require 'spec_helper'
|
||||
require 'rubocop'
|
||||
require 'rubocop/rspec/support'
|
||||
require_relative '../../../rubocop/cop/in_batches'
|
||||
|
||||
describe RuboCop::Cop::InBatches do
|
||||
include CopHelper
|
||||
|
||||
subject(:cop) { described_class.new }
|
||||
|
||||
it 'registers an offense when in_batches is used' do
|
||||
inspect_source(cop, 'foo.in_batches do; end')
|
||||
|
||||
aggregate_failures do
|
||||
expect(cop.offenses.size).to eq(1)
|
||||
expect(cop.offenses.map(&:line)).to eq([1])
|
||||
end
|
||||
end
|
||||
end
|
||||
Loading…
Reference in New Issue