Add support for verifying remote uploads, artifacts, and LFS objects in check rake tasks
This commit is contained in:
parent
cc1b03545c
commit
2f40fb456b
|
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
title: Add support for verifying remote uploads, artifacts, and LFS objects in check rake tasks
|
||||
merge_request: 19501
|
||||
author:
|
||||
type: added
|
||||
|
|
@ -78,9 +78,10 @@ Example output:
|
|||
|
||||
## Uploaded Files Integrity
|
||||
|
||||
Various types of file can be uploaded to a GitLab installation by users.
|
||||
Checksums are generated and stored in the database upon upload, and integrity
|
||||
checks using those checksums can be run. These checks also detect missing files.
|
||||
Various types of files can be uploaded to a GitLab installation by users.
|
||||
These integrity checks can detect missing files. Additionally, for locally
|
||||
stored files, checksums are generated and stored in the database upon upload,
|
||||
and these checks will verify them against current files.
|
||||
|
||||
Currently, integrity checks are supported for the following types of file:
|
||||
|
||||
|
|
|
|||
|
|
@ -7,13 +7,15 @@ module Gitlab
|
|||
@batch_size = batch_size
|
||||
@start = start
|
||||
@finish = finish
|
||||
|
||||
fix_google_api_logger
|
||||
end
|
||||
|
||||
# Yields a Range of IDs and a Hash of failed verifications (object => error)
|
||||
def run_batches(&blk)
|
||||
relation.in_batches(of: batch_size, start: start, finish: finish) do |relation| # rubocop: disable Cop/InBatches
|
||||
range = relation.first.id..relation.last.id
|
||||
failures = run_batch(relation)
|
||||
all_relation.in_batches(of: batch_size, start: start, finish: finish) do |batch| # rubocop: disable Cop/InBatches
|
||||
range = batch.first.id..batch.last.id
|
||||
failures = run_batch_for(batch)
|
||||
|
||||
yield(range, failures)
|
||||
end
|
||||
|
|
@ -29,24 +31,56 @@ module Gitlab
|
|||
|
||||
private
|
||||
|
||||
def run_batch(relation)
|
||||
relation.map { |upload| verify(upload) }.compact.to_h
|
||||
def run_batch_for(batch)
|
||||
batch.map { |upload| verify(upload) }.compact.to_h
|
||||
end
|
||||
|
||||
def verify(object)
|
||||
local?(object) ? verify_local(object) : verify_remote(object)
|
||||
rescue => err
|
||||
failure(object, err.inspect)
|
||||
end
|
||||
|
||||
def verify_local(object)
|
||||
expected = expected_checksum(object)
|
||||
actual = actual_checksum(object)
|
||||
|
||||
raise 'Checksum missing' unless expected.present?
|
||||
raise 'Checksum mismatch' unless expected == actual
|
||||
return failure(object, 'Checksum missing') unless expected.present?
|
||||
return failure(object, 'Checksum mismatch') unless expected == actual
|
||||
|
||||
success
|
||||
end
|
||||
|
||||
# We don't calculate checksum for remote objects, so just check existence
|
||||
def verify_remote(object)
|
||||
return failure(object, 'Remote object does not exist') unless remote_object_exists?(object)
|
||||
|
||||
success
|
||||
end
|
||||
|
||||
def success
|
||||
nil
|
||||
rescue => err
|
||||
[object, err]
|
||||
end
|
||||
|
||||
def failure(object, message)
|
||||
[object, message]
|
||||
end
|
||||
|
||||
# It's already set to Logger::INFO, but acts as if it is set to
|
||||
# Logger::DEBUG, and this fixes it...
|
||||
def fix_google_api_logger
|
||||
if Object.const_defined?('Google::Apis')
|
||||
Google::Apis.logger.level = Logger::INFO
|
||||
end
|
||||
end
|
||||
|
||||
# This should return an ActiveRecord::Relation suitable for calling #in_batches on
|
||||
def relation
|
||||
def all_relation
|
||||
raise NotImplementedError.new
|
||||
end
|
||||
|
||||
# Should return true if the object is stored locally
|
||||
def local?(_object)
|
||||
raise NotImplementedError.new
|
||||
end
|
||||
|
||||
|
|
@ -59,6 +93,11 @@ module Gitlab
|
|||
def actual_checksum(_object)
|
||||
raise NotImplementedError.new
|
||||
end
|
||||
|
||||
# Be sure to perform a hard check of the remote object (don't just check DB value)
|
||||
def remote_object_exists?(object)
|
||||
raise NotImplementedError.new
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
|||
|
|
@ -11,10 +11,14 @@ module Gitlab
|
|||
|
||||
private
|
||||
|
||||
def relation
|
||||
def all_relation
|
||||
::Ci::JobArtifact.all
|
||||
end
|
||||
|
||||
def local?(artifact)
|
||||
artifact.local_store?
|
||||
end
|
||||
|
||||
def expected_checksum(artifact)
|
||||
artifact.file_sha256
|
||||
end
|
||||
|
|
@ -22,6 +26,10 @@ module Gitlab
|
|||
def actual_checksum(artifact)
|
||||
Digest::SHA256.file(artifact.file.path).hexdigest
|
||||
end
|
||||
|
||||
def remote_object_exists?(artifact)
|
||||
artifact.file.file.exists?
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
|||
|
|
@ -11,8 +11,12 @@ module Gitlab
|
|||
|
||||
private
|
||||
|
||||
def relation
|
||||
LfsObject.with_files_stored_locally
|
||||
def all_relation
|
||||
LfsObject.all
|
||||
end
|
||||
|
||||
def local?(lfs_object)
|
||||
lfs_object.local_store?
|
||||
end
|
||||
|
||||
def expected_checksum(lfs_object)
|
||||
|
|
@ -22,6 +26,10 @@ module Gitlab
|
|||
def actual_checksum(lfs_object)
|
||||
LfsObject.calculate_oid(lfs_object.file.path)
|
||||
end
|
||||
|
||||
def remote_object_exists?(lfs_object)
|
||||
lfs_object.file.file.exists?
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ module Gitlab
|
|||
return unless verbose?
|
||||
|
||||
failures.each do |object, error|
|
||||
say " - #{verifier.describe(object)}: #{error.inspect}".color(:red)
|
||||
say " - #{verifier.describe(object)}: #{error}".color(:red)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
|||
|
|
@ -11,8 +11,12 @@ module Gitlab
|
|||
|
||||
private
|
||||
|
||||
def relation
|
||||
Upload.with_files_stored_locally
|
||||
def all_relation
|
||||
Upload.all
|
||||
end
|
||||
|
||||
def local?(upload)
|
||||
upload.local?
|
||||
end
|
||||
|
||||
def expected_checksum(upload)
|
||||
|
|
@ -22,6 +26,10 @@ module Gitlab
|
|||
def actual_checksum(upload)
|
||||
Upload.hexdigest(upload.absolute_path)
|
||||
end
|
||||
|
||||
def remote_object_exists?(upload)
|
||||
upload.build_uploader.file.exists?
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
|||
|
|
@ -21,15 +21,38 @@ describe Gitlab::Verify::JobArtifacts do
|
|||
FileUtils.rm_f(artifact.file.path)
|
||||
|
||||
expect(failures.keys).to contain_exactly(artifact)
|
||||
expect(failure).to be_a(Errno::ENOENT)
|
||||
expect(failure.to_s).to include(artifact.file.path)
|
||||
expect(failure).to include('No such file or directory')
|
||||
expect(failure).to include(artifact.file.path)
|
||||
end
|
||||
|
||||
it 'fails artifacts with a mismatched checksum' do
|
||||
File.truncate(artifact.file.path, 0)
|
||||
|
||||
expect(failures.keys).to contain_exactly(artifact)
|
||||
expect(failure.to_s).to include('Checksum mismatch')
|
||||
expect(failure).to include('Checksum mismatch')
|
||||
end
|
||||
|
||||
context 'with remote files' do
|
||||
let(:file) { double(:file) }
|
||||
|
||||
before do
|
||||
stub_artifacts_object_storage
|
||||
artifact.update!(file_store: ObjectStorage::Store::REMOTE)
|
||||
expect(CarrierWave::Storage::Fog::File).to receive(:new).and_return(file)
|
||||
end
|
||||
|
||||
it 'passes artifacts in object storage that exist' do
|
||||
expect(file).to receive(:exists?).and_return(true)
|
||||
|
||||
expect(failures).to eq({})
|
||||
end
|
||||
|
||||
it 'fails artifacts in object storage that do not exist' do
|
||||
expect(file).to receive(:exists?).and_return(false)
|
||||
|
||||
expect(failures.keys).to contain_exactly(artifact)
|
||||
expect(failure).to include('Remote object does not exist')
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
|||
|
|
@ -21,30 +21,37 @@ describe Gitlab::Verify::LfsObjects do
|
|||
FileUtils.rm_f(lfs_object.file.path)
|
||||
|
||||
expect(failures.keys).to contain_exactly(lfs_object)
|
||||
expect(failure).to be_a(Errno::ENOENT)
|
||||
expect(failure.to_s).to include(lfs_object.file.path)
|
||||
expect(failure).to include('No such file or directory')
|
||||
expect(failure).to include(lfs_object.file.path)
|
||||
end
|
||||
|
||||
it 'fails LFS objects with a mismatched oid' do
|
||||
File.truncate(lfs_object.file.path, 0)
|
||||
|
||||
expect(failures.keys).to contain_exactly(lfs_object)
|
||||
expect(failure.to_s).to include('Checksum mismatch')
|
||||
expect(failure).to include('Checksum mismatch')
|
||||
end
|
||||
|
||||
context 'with remote files' do
|
||||
let(:file) { double(:file) }
|
||||
|
||||
before do
|
||||
stub_lfs_object_storage
|
||||
lfs_object.update!(file_store: ObjectStorage::Store::REMOTE)
|
||||
expect(CarrierWave::Storage::Fog::File).to receive(:new).and_return(file)
|
||||
end
|
||||
|
||||
it 'skips LFS objects in object storage' do
|
||||
local_failure = create(:lfs_object)
|
||||
create(:lfs_object, :object_storage)
|
||||
it 'passes LFS objects in object storage that exist' do
|
||||
expect(file).to receive(:exists?).and_return(true)
|
||||
|
||||
failures = {}
|
||||
described_class.new(batch_size: 10).run_batches { |_, failed| failures.merge!(failed) }
|
||||
expect(failures).to eq({})
|
||||
end
|
||||
|
||||
expect(failures.keys).to contain_exactly(local_failure)
|
||||
it 'fails LFS objects in object storage that do not exist' do
|
||||
expect(file).to receive(:exists?).and_return(false)
|
||||
|
||||
expect(failures.keys).to contain_exactly(lfs_object)
|
||||
expect(failure).to include('Remote object does not exist')
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
|||
|
|
@ -23,37 +23,44 @@ describe Gitlab::Verify::Uploads do
|
|||
FileUtils.rm_f(upload.absolute_path)
|
||||
|
||||
expect(failures.keys).to contain_exactly(upload)
|
||||
expect(failure).to be_a(Errno::ENOENT)
|
||||
expect(failure.to_s).to include(upload.absolute_path)
|
||||
expect(failure).to include('No such file or directory')
|
||||
expect(failure).to include(upload.absolute_path)
|
||||
end
|
||||
|
||||
it 'fails uploads with a mismatched checksum' do
|
||||
upload.update!(checksum: 'something incorrect')
|
||||
|
||||
expect(failures.keys).to contain_exactly(upload)
|
||||
expect(failure.to_s).to include('Checksum mismatch')
|
||||
expect(failure).to include('Checksum mismatch')
|
||||
end
|
||||
|
||||
it 'fails uploads with a missing precalculated checksum' do
|
||||
upload.update!(checksum: '')
|
||||
|
||||
expect(failures.keys).to contain_exactly(upload)
|
||||
expect(failure.to_s).to include('Checksum missing')
|
||||
expect(failure).to include('Checksum missing')
|
||||
end
|
||||
|
||||
context 'with remote files' do
|
||||
let(:file) { double(:file) }
|
||||
|
||||
before do
|
||||
stub_uploads_object_storage(AvatarUploader)
|
||||
upload.update!(store: ObjectStorage::Store::REMOTE)
|
||||
expect(CarrierWave::Storage::Fog::File).to receive(:new).and_return(file)
|
||||
end
|
||||
|
||||
it 'skips uploads in object storage' do
|
||||
local_failure = create(:upload)
|
||||
create(:upload, :object_storage)
|
||||
it 'passes uploads in object storage that exist' do
|
||||
expect(file).to receive(:exists?).and_return(true)
|
||||
|
||||
failures = {}
|
||||
described_class.new(batch_size: 10).run_batches { |_, failed| failures.merge!(failed) }
|
||||
expect(failures).to eq({})
|
||||
end
|
||||
|
||||
expect(failures.keys).to contain_exactly(local_failure)
|
||||
it 'fails uploads in object storage that do not exist' do
|
||||
expect(file).to receive(:exists?).and_return(false)
|
||||
|
||||
expect(failures.keys).to contain_exactly(upload)
|
||||
expect(failure).to include('Remote object does not exist')
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
|||
Loading…
Reference in New Issue