gitlab-ce/app/models/merge_request/diff_commit_user.rb

187 lines
6.5 KiB
Ruby

# frozen_string_literal: true
class MergeRequest::DiffCommitUser < ApplicationRecord
include SafelyChangeColumnDefault
columns_changing_default :organization_id
validates :name, length: { maximum: 512 }
validates :email, length: { maximum: 512 }
validates :name, presence: true, unless: :email
validates :email, presence: true, unless: :name
# Prepares a value to be inserted into a column in the table
# `merge_request_diff_commit_users`. Values in this table are limited to
# 512 characters.
#
# We treat empty strings as NULL values, as there's no point in (for
# example) storing a row where both the name and Email are an empty
# string. In addition, if we treated them differently we could end up with
# two rows: one where field X is NULL, and one where field X is an empty
# string. This is redundant, so we avoid storing such data.
def self.prepare(value)
value.present? ? value[0..511] : nil
end
# Creates a new row, or returns an existing one if a row already exists.
def self.find_or_create(name, email, organization_id, with_organization: false)
return find_or_create_by!(name: name, email: email) unless with_organization
# Try to find exact match first
result = find_by(name: name, email: email, organization_id: organization_id)
# If not found, look for one with nil organization_id
if !result && organization_id.present?
result = find_by(name: name, email: email, organization_id: nil)
result.update!(organization_id: organization_id) if result
end
# If still not found, try to create using find_or_create_by!
result || find_or_create_by!(name: name, email: email, organization_id: organization_id)
rescue ActiveRecord::RecordNotUnique
retry
end
# Finds many (name, email) pairs or (name, email, organization_id) triples in bulk.
def self.bulk_find(input, with_organization: false)
queries = {}
rows = []
input.each do |item|
name, email, organization_id = item
conditions = { name: name, email: email }
conditions[:organization_id] = organization_id if with_organization
queries[conditions.values] = where(conditions).to_sql
end
# We may end up having to query many users. To ensure we don't hit any
# query size limits, we get a fixed number of users at a time.
queries.values.each_slice(1_000).map do |slice|
rows.concat(from("(#{slice.join("\nUNION ALL\n")}) #{table_name}").to_a)
end
rows
end
# Finds or creates rows for the given pairs of names and Emails or
# triples of names, emails, and organization IDs.
#
# The input argument must be an Array/Set of pairs or triples like so:
#
# [
# [name, email], # legacy format when with_organization: false
# [name, email, organization_id], # new format when with_organization: true
# ...
# ]
#
# This method expects that the names and emails have already been trimmed to
# at most 512 characters.
#
# The return value is a Hash that maps these pairs/triples to instances of this model.
def self.bulk_find_or_create(input, with_organization: false)
return bulk_find_or_create_legacy(input) unless with_organization
mapping = {}
ids_to_update = []
# Extract organization_id - it's the same for all triples in this batch
organization_id = input.first&.last
# Find all existing records by (name, email) only
existing_records = bulk_find(input, with_organization: false)
existing_records.each do |row|
ids_to_update << row.id if row.organization_id.nil?
# Map all found records with the organization_id from input
mapping[[row.name, row.email, organization_id]] = row
end
# Bulk update organization_ids for records that had nil
if ids_to_update.any?
where(id: ids_to_update).update_all(organization_id: organization_id)
# Update the organization_id on the objects we already have
existing_records.each do |row|
row.organization_id = organization_id if ids_to_update.include?(row.id)
end
end
# Create missing records
create_missing_records(input, mapping)
# Handle concurrent inserts
handle_concurrent_inserts(input, mapping)
mapping
end
def self.create_missing_records(input, mapping)
create = []
# Collect records that need to be created
input.each do |(name, email, org_id)|
next if mapping[[name, email, org_id]]
create << { name: name, email: email, organization_id: org_id }
end
return if create.empty?
# Bulk insert new records
insert_all(create, returning: %w[id name email organization_id]).each do |row|
mapping[[row['name'], row['email'], row['organization_id']]] =
new(id: row['id'], name: row['name'], email: row['email'], organization_id: row['organization_id'])
end
end
def self.handle_concurrent_inserts(input, mapping)
# Find any records that were created concurrently
missing_triples = input.reject { |(name, email, org_id)| mapping.key?([name, email, org_id]) }
return if missing_triples.empty?
bulk_find(missing_triples, with_organization: true).each do |row|
mapping[[row.name, row.email, row.organization_id]] = row
end
end
def self.bulk_find_or_create_legacy(input)
mapping = {}
create = []
# Over time, fewer new rows need to be created. We take advantage of that
# here by first finding all rows that already exist, using a limited number
# of queries (in most cases only one query will be needed).
bulk_find(input).each do |row|
mapping[[row.name, row.email]] = row
end
input.each do |(name, email)|
create << { name: name, email: email } unless mapping[[name, email]]
end
return mapping if create.empty?
# Sometimes we may need to insert new users into the table. We do this in
# bulk, so we only need one INSERT for all missing users.
insert_all(create, returning: %w[id name email]).each do |row|
mapping[[row['name'], row['email']]] =
new(id: row['id'], name: row['name'], email: row['email'])
end
# It's possible for (name, email) pairs to be inserted concurrently,
# resulting in the above insert not returning anything. Here we get any
# remaining users that were created concurrently.
bulk_find(input.reject { |pair| mapping.key?(pair) }).each do |row|
mapping[[row.name, row.email]] = row
end
mapping
end
private_class_method :bulk_find_or_create_legacy,
:create_missing_records,
:handle_concurrent_inserts
end