Batch load only data from same repository when lazy object is accessed
By specifying `key`, we get a different lazy batch loader for each repository, which means that accessing a lazy object from one repository will only result in that repository's objects being fetched, not those of other repositories, saving us some unnecessary Gitaly lookups.
This commit is contained in:
		
							parent
							
								
									ba9eeea4ee
								
							
						
					
					
						commit
						5f0e4040ce
					
				|  | @ -11,10 +11,11 @@ module Resolvers | |||
|     end | ||||
| 
 | ||||
|     def model_by_full_path(model, full_path) | ||||
|       BatchLoader.for(full_path).batch(key: "#{model.model_name.param_key}:full_path") do |full_paths, loader| | ||||
|       BatchLoader.for(full_path).batch(key: model) do |full_paths, loader, args| | ||||
|         # `with_route` avoids an N+1 calculating full_path | ||||
|         results = model.where_full_path_in(full_paths).with_route | ||||
|         results.each { |project| loader.call(project.full_path, project) } | ||||
|         args[:key].where_full_path_in(full_paths).with_route.each do |project| | ||||
|           loader.call(project.full_path, project) | ||||
|         end | ||||
|       end | ||||
|     end | ||||
|   end | ||||
|  |  | |||
|  | @ -14,9 +14,10 @@ module Resolvers | |||
|     def resolve(iid:) | ||||
|       return unless project.present? | ||||
| 
 | ||||
|       BatchLoader.for(iid.to_s).batch(key: project.id) do |iids, loader| | ||||
|         results = project.merge_requests.where(iid: iids) | ||||
|         results.each { |mr| loader.call(mr.iid.to_s, mr) } | ||||
|       BatchLoader.for(iid.to_s).batch(key: project) do |iids, loader, args| | ||||
|         args[:key].merge_requests.where(iid: iids).each do |mr| | ||||
|           loader.call(mr.iid.to_s, mr) | ||||
|         end | ||||
|       end | ||||
|     end | ||||
|     # rubocop: enable CodeReuse/ActiveRecord | ||||
|  |  | |||
|  | @ -80,15 +80,9 @@ class Blob < SimpleDelegator | |||
|   end | ||||
| 
 | ||||
|   def self.lazy(project, commit_id, path) | ||||
|     BatchLoader.for({ project: project, commit_id: commit_id, path: path }).batch do |items, loader| | ||||
|       items_by_project = items.group_by { |i| i[:project] } | ||||
| 
 | ||||
|       items_by_project.each do |project, items| | ||||
|         items = items.map { |i| i.values_at(:commit_id, :path) } | ||||
| 
 | ||||
|         project.repository.blobs_at(items).each do |blob| | ||||
|           loader.call({ project: blob.project, commit_id: blob.commit_id, path: blob.path }, blob) if blob | ||||
|         end | ||||
|     BatchLoader.for([commit_id, path]).batch(key: project.repository) do |items, loader, args| | ||||
|       args[:key].blobs_at(items).each do |blob| | ||||
|         loader.call([blob.commit_id, blob.path], blob) if blob | ||||
|       end | ||||
|     end | ||||
|   end | ||||
|  |  | |||
|  | @ -0,0 +1,5 @@ | |||
| --- | ||||
| title: Batch load only data from same repository when lazy object is accessed | ||||
| merge_request: 23309 | ||||
| author: | ||||
| type: performance | ||||
|  | @ -155,17 +155,9 @@ module Gitlab | |||
|         end | ||||
| 
 | ||||
|         def extract_signature_lazily(repository, commit_id) | ||||
|           BatchLoader.for({ repository: repository, commit_id: commit_id }).batch do |items, loader| | ||||
|             items_by_repo = items.group_by { |i| i[:repository] } | ||||
| 
 | ||||
|             items_by_repo.each do |repo, items| | ||||
|               commit_ids = items.map { |i| i[:commit_id] } | ||||
| 
 | ||||
|               signatures = batch_signature_extraction(repository, commit_ids) | ||||
| 
 | ||||
|               signatures.each do |commit_sha, signature_data| | ||||
|                 loader.call({ repository: repository, commit_id: commit_sha }, signature_data) | ||||
|               end | ||||
|           BatchLoader.for(commit_id).batch(key: repository) do |commit_ids, loader, args| | ||||
|             batch_signature_extraction(args[:key], commit_ids).each do |commit_id, signature_data| | ||||
|               loader.call(commit_id, signature_data) | ||||
|             end | ||||
|           end | ||||
|         end | ||||
|  | @ -175,17 +167,9 @@ module Gitlab | |||
|         end | ||||
| 
 | ||||
|         def get_message(repository, commit_id) | ||||
|           BatchLoader.for({ repository: repository, commit_id: commit_id }).batch do |items, loader| | ||||
|             items_by_repo = items.group_by { |i| i[:repository] } | ||||
| 
 | ||||
|             items_by_repo.each do |repo, items| | ||||
|               commit_ids = items.map { |i| i[:commit_id] } | ||||
| 
 | ||||
|               messages = get_messages(repository, commit_ids) | ||||
| 
 | ||||
|               messages.each do |commit_sha, message| | ||||
|                 loader.call({ repository: repository, commit_id: commit_sha }, message) | ||||
|               end | ||||
|           BatchLoader.for(commit_id).batch(key: repository) do |commit_ids, loader, args| | ||||
|             get_messages(args[:key], commit_ids).each do |commit_id, message| | ||||
|               loader.call(commit_id, message) | ||||
|             end | ||||
|           end | ||||
|         end | ||||
|  |  | |||
|  | @ -14,17 +14,9 @@ module Gitlab | |||
| 
 | ||||
|       class << self | ||||
|         def get_message(repository, tag_id) | ||||
|           BatchLoader.for({ repository: repository, tag_id: tag_id }).batch do |items, loader| | ||||
|             items_by_repo = items.group_by { |i| i[:repository] } | ||||
| 
 | ||||
|             items_by_repo.each do |repo, items| | ||||
|               tag_ids = items.map { |i| i[:tag_id] } | ||||
| 
 | ||||
|               messages = get_messages(repository, tag_ids) | ||||
| 
 | ||||
|               messages.each do |id, message| | ||||
|                 loader.call({ repository: repository, tag_id: id }, message) | ||||
|               end | ||||
|           BatchLoader.for(tag_id).batch(key: repository) do |tag_ids, loader, args| | ||||
|             get_messages(args[:key], tag_ids).each do |tag_id, message| | ||||
|               loader.call(tag_id, message) | ||||
|             end | ||||
|           end | ||||
|         end | ||||
|  |  | |||
|  | @ -450,11 +450,17 @@ describe Gitlab::Git::Commit, :seed_helper do | |||
|             described_class.extract_signature_lazily(repository, commit_id) | ||||
|           end | ||||
| 
 | ||||
|           other_repository = double(:repository) | ||||
|           described_class.extract_signature_lazily(other_repository, commit_ids.first) | ||||
| 
 | ||||
|           expect(described_class).to receive(:batch_signature_extraction) | ||||
|             .with(repository, commit_ids) | ||||
|             .once | ||||
|             .and_return({}) | ||||
| 
 | ||||
|           expect(described_class).not_to receive(:batch_signature_extraction) | ||||
|             .with(other_repository, commit_ids.first) | ||||
| 
 | ||||
|           2.times { signatures.each(&:itself) } | ||||
|         end | ||||
|       end | ||||
|  |  | |||
|  | @ -38,6 +38,9 @@ describe Gitlab::Git::Tag, :seed_helper do | |||
|     end | ||||
| 
 | ||||
|     it 'gets messages in one batch', :request_store do | ||||
|       other_repository = double(:repository) | ||||
|       described_class.get_message(other_repository, tag_ids.first) | ||||
| 
 | ||||
|       expect { subject.map(&:itself) }.to change { Gitlab::GitalyClient.get_request_count }.by(1) | ||||
|     end | ||||
|   end | ||||
|  |  | |||
|  | @ -18,14 +18,23 @@ describe Blob do | |||
| 
 | ||||
|   describe '.lazy' do | ||||
|     let(:project) { create(:project, :repository) } | ||||
|     let(:commit) { project.commit_by(oid: 'e63f41fe459e62e1228fcef60d7189127aeba95a') } | ||||
|     let(:other_project) { create(:project, :repository) } | ||||
|     let(:commit_id) { 'e63f41fe459e62e1228fcef60d7189127aeba95a' } | ||||
| 
 | ||||
|     it 'fetches all blobs when the first is accessed' do | ||||
|       changelog = described_class.lazy(project, commit.id, 'CHANGELOG') | ||||
|       contributing = described_class.lazy(project, commit.id, 'CONTRIBUTING.md') | ||||
|     it 'does not fetch blobs when none are accessed' do | ||||
|       expect(project.repository).not_to receive(:blobs_at) | ||||
| 
 | ||||
|       expect(Gitlab::Git::Blob).to receive(:batch).once.and_call_original | ||||
|       expect(Gitlab::Git::Blob).not_to receive(:find) | ||||
|       described_class.lazy(project, commit_id, 'CHANGELOG') | ||||
|     end | ||||
| 
 | ||||
|     it 'fetches all blobs for the same repository when one is accessed' do | ||||
|       expect(project.repository).to receive(:blobs_at).with([[commit_id, 'CHANGELOG'], [commit_id, 'CONTRIBUTING.md']]).once.and_call_original | ||||
|       expect(other_project.repository).not_to receive(:blobs_at) | ||||
| 
 | ||||
|       changelog = described_class.lazy(project, commit_id, 'CHANGELOG') | ||||
|       contributing = described_class.lazy(project, commit_id, 'CONTRIBUTING.md') | ||||
| 
 | ||||
|       described_class.lazy(other_project, commit_id, 'CHANGELOG') | ||||
| 
 | ||||
|       # Access property so the values are loaded | ||||
|       changelog.id | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue