132 lines
		
	
	
		
			4.0 KiB
		
	
	
	
		
			Ruby
		
	
	
	
			
		
		
	
	
			132 lines
		
	
	
		
			4.0 KiB
		
	
	
	
		
			Ruby
		
	
	
	
# frozen_string_literal: true
 | 
						|
 | 
						|
module Gitlab
 | 
						|
  module Cleanup
 | 
						|
    class ProjectUploads
 | 
						|
      LOST_AND_FOUND = File.join(ProjectUploadFileFinder::ABSOLUTE_UPLOAD_DIR, '-', 'project-lost-found')
 | 
						|
 | 
						|
      attr_reader :logger
 | 
						|
 | 
						|
      def initialize(logger: nil)
 | 
						|
        @logger = logger || Rails.logger # rubocop:disable Gitlab/RailsLogger
 | 
						|
      end
 | 
						|
 | 
						|
      def run!(dry_run: true)
 | 
						|
        logger.info "Looking for orphaned project uploads to clean up#{'. Dry run' if dry_run}..."
 | 
						|
 | 
						|
        each_orphan_file do |path, upload_path|
 | 
						|
          result = cleanup(path, upload_path, dry_run)
 | 
						|
 | 
						|
          logger.info result
 | 
						|
        end
 | 
						|
      end
 | 
						|
 | 
						|
      private
 | 
						|
 | 
						|
      def cleanup(path, upload_path, dry_run)
 | 
						|
        # This happened in staging:
 | 
						|
        # `find` returned a path on which `File.delete` raised `Errno::ENOENT`
 | 
						|
        return "Cannot find file: #{path}" unless File.exist?(path)
 | 
						|
 | 
						|
        correct_path = upload_path && find_correct_path(upload_path)
 | 
						|
 | 
						|
        if correct_path
 | 
						|
          move(path, correct_path, 'fix', dry_run)
 | 
						|
        else
 | 
						|
          move_to_lost_and_found(path, dry_run)
 | 
						|
        end
 | 
						|
      end
 | 
						|
 | 
						|
      # Accepts a path in the form of "#{hex_secret}/#{filename}"
 | 
						|
      # rubocop: disable CodeReuse/ActiveRecord
 | 
						|
      def find_correct_path(upload_path)
 | 
						|
        upload = Upload.find_by(uploader: 'FileUploader', path: upload_path)
 | 
						|
        return unless upload && upload.local? && upload.model
 | 
						|
 | 
						|
        upload.absolute_path
 | 
						|
      rescue => e
 | 
						|
        logger.error e.message
 | 
						|
 | 
						|
        # absolute_path depends on a lot of code. If it doesn't work, then it
 | 
						|
        # it doesn't matter if the upload file is in the right place. Treat it
 | 
						|
        # as uncorrectable.
 | 
						|
        # I.e. the project record might be missing, which raises an exception.
 | 
						|
        nil
 | 
						|
      end
 | 
						|
      # rubocop: enable CodeReuse/ActiveRecord
 | 
						|
 | 
						|
      def move_to_lost_and_found(path, dry_run)
 | 
						|
        new_path = path.sub(/\A#{ProjectUploadFileFinder::ABSOLUTE_UPLOAD_DIR}/, LOST_AND_FOUND)
 | 
						|
 | 
						|
        move(path, new_path, 'move to lost and found', dry_run)
 | 
						|
      end
 | 
						|
 | 
						|
      def move(path, new_path, prefix, dry_run)
 | 
						|
        action = "#{prefix} #{path} -> #{new_path}"
 | 
						|
 | 
						|
        if dry_run
 | 
						|
          "Can #{action}"
 | 
						|
        else
 | 
						|
          begin
 | 
						|
            FileUtils.mkdir_p(File.dirname(new_path))
 | 
						|
            FileUtils.mv(path, new_path)
 | 
						|
 | 
						|
            "Did #{action}"
 | 
						|
          rescue => e
 | 
						|
            "Error during #{action}: #{e.inspect}"
 | 
						|
          end
 | 
						|
        end
 | 
						|
      end
 | 
						|
 | 
						|
      # Yields absolute paths of project upload files that are not in the
 | 
						|
      # uploads table
 | 
						|
      def each_orphan_file
 | 
						|
        ProjectUploadFileFinder.new.each_file_batch do |file_paths|
 | 
						|
          logger.debug "Processing batch of #{file_paths.size} project upload file paths, starting with #{file_paths.first}"
 | 
						|
 | 
						|
          file_paths.each do |path|
 | 
						|
            pup = ProjectUploadPath.from_path(path)
 | 
						|
 | 
						|
            yield(path, pup.upload_path) if pup.orphan?
 | 
						|
          end
 | 
						|
        end
 | 
						|
      end
 | 
						|
 | 
						|
      class ProjectUploadPath
 | 
						|
        PROJECT_FULL_PATH_REGEX = %r{\A#{FileUploader.root}/(.+)/(\h+/[^/]+)\z}.freeze
 | 
						|
 | 
						|
        attr_reader :full_path, :upload_path
 | 
						|
 | 
						|
        def initialize(full_path, upload_path)
 | 
						|
          @full_path = full_path
 | 
						|
          @upload_path = upload_path
 | 
						|
        end
 | 
						|
 | 
						|
        def self.from_path(path)
 | 
						|
          path_matched = path.match(PROJECT_FULL_PATH_REGEX)
 | 
						|
          return new(nil, nil) unless path_matched
 | 
						|
 | 
						|
          new(path_matched[1], path_matched[2])
 | 
						|
        end
 | 
						|
 | 
						|
        # rubocop: disable CodeReuse/ActiveRecord
 | 
						|
        def orphan?
 | 
						|
          return true if full_path.nil? || upload_path.nil?
 | 
						|
 | 
						|
          # It's possible to reduce to one query, but `where_full_path_in` is complex
 | 
						|
          !Upload.exists?(path: upload_path, model_id: project_id, model_type: 'Project', uploader: 'FileUploader')
 | 
						|
        end
 | 
						|
        # rubocop: enable CodeReuse/ActiveRecord
 | 
						|
 | 
						|
        private
 | 
						|
 | 
						|
        # rubocop: disable CodeReuse/ActiveRecord
 | 
						|
        def project_id
 | 
						|
          @project_id ||= Project.where_full_path_in([full_path]).pluck(:id)
 | 
						|
        end
 | 
						|
        # rubocop: enable CodeReuse/ActiveRecord
 | 
						|
      end
 | 
						|
    end
 | 
						|
  end
 | 
						|
end
 |