Merge branch 'fix/remove-import-url-migration' into 'master'
Remove slow migration and add fix to sanitize errors Getting rid of the slow migration and preventing the Error to occur Fixes https://gitlab.com/gitlab-org/gitlab-ce/issues/19608 See merge request !5176
This commit is contained in:
		
						commit
						11ba19e3d2
					
				| 
						 | 
				
			
			@ -1,106 +0,0 @@
 | 
			
		|||
# Updates project records containing invalid URLs using the AddressableUrlValidator.
 | 
			
		||||
# This is optimized assuming the number of invalid records is low, but
 | 
			
		||||
# we still need to loop through all the projects with an +import_url+
 | 
			
		||||
# so we use batching for the latter.
 | 
			
		||||
#
 | 
			
		||||
# This migration is non-reversible as we would have to keep the old data.
 | 
			
		||||
 | 
			
		||||
class FixNoValidatableImportUrl < ActiveRecord::Migration
 | 
			
		||||
  include Gitlab::Database::MigrationHelpers
 | 
			
		||||
  class SqlBatches
 | 
			
		||||
 | 
			
		||||
    attr_reader :results, :query
 | 
			
		||||
 | 
			
		||||
    def initialize(batch_size: 1000, query:)
 | 
			
		||||
      @offset = 0
 | 
			
		||||
      @batch_size = batch_size
 | 
			
		||||
      @query = query
 | 
			
		||||
      @results = []
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    def next?
 | 
			
		||||
      @results = ActiveRecord::Base.connection.exec_query(batched_sql)
 | 
			
		||||
      @offset += @batch_size
 | 
			
		||||
      @results.any?
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    private
 | 
			
		||||
 | 
			
		||||
    def batched_sql
 | 
			
		||||
      "#{@query} LIMIT #{@batch_size} OFFSET #{@offset}"
 | 
			
		||||
    end
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  # AddressableValidator - Snapshot of AddressableUrlValidator
 | 
			
		||||
  module AddressableUrlValidatorSnap
 | 
			
		||||
    extend self
 | 
			
		||||
 | 
			
		||||
    def valid_url?(value)
 | 
			
		||||
      return false unless value
 | 
			
		||||
 | 
			
		||||
      valid_uri?(value) && valid_protocol?(value)
 | 
			
		||||
    rescue Addressable::URI::InvalidURIError
 | 
			
		||||
      false
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    def valid_uri?(value)
 | 
			
		||||
      Addressable::URI.parse(value).is_a?(Addressable::URI)
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    def valid_protocol?(value)
 | 
			
		||||
      value =~ /\A#{URI.regexp(%w(http https ssh git))}\z/
 | 
			
		||||
    end
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def up
 | 
			
		||||
    unless defined?(Addressable::URI::InvalidURIError)
 | 
			
		||||
      say('Skipping cleaning up invalid import URLs as class from Addressable is missing')
 | 
			
		||||
      return
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    say('Nullifying empty import URLs')
 | 
			
		||||
 | 
			
		||||
    nullify_empty_urls
 | 
			
		||||
 | 
			
		||||
    say('Cleaning up invalid import URLs... This may take a few minutes if we have a large number of imported projects.')
 | 
			
		||||
 | 
			
		||||
    process_invalid_import_urls
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def process_invalid_import_urls
 | 
			
		||||
    batches = SqlBatches.new(query: "SELECT id, import_url FROM projects WHERE import_url IS NOT NULL")
 | 
			
		||||
 | 
			
		||||
    while batches.next?
 | 
			
		||||
      project_ids = []
 | 
			
		||||
 | 
			
		||||
      batches.results.each do |result|
 | 
			
		||||
        project_ids << result['id'] unless valid_url?(result['import_url'])
 | 
			
		||||
      end
 | 
			
		||||
 | 
			
		||||
      process_batch(project_ids)
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def process_batch(project_ids)
 | 
			
		||||
    Thread.new do
 | 
			
		||||
      begin
 | 
			
		||||
        project_ids.each { |project_id| cleanup_import_url(project_id) }
 | 
			
		||||
      ensure
 | 
			
		||||
        ActiveRecord::Base.connection.close
 | 
			
		||||
      end
 | 
			
		||||
    end.join
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def valid_url?(url)
 | 
			
		||||
    AddressableUrlValidatorSnap.valid_url?(url)
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def cleanup_import_url(project_id)
 | 
			
		||||
    execute("UPDATE projects SET import_url = NULL WHERE id = #{project_id}")
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def nullify_empty_urls
 | 
			
		||||
    execute("UPDATE projects SET import_url = NULL WHERE import_url = ''")
 | 
			
		||||
  end
 | 
			
		||||
end
 | 
			
		||||
| 
						 | 
				
			
			@ -4,6 +4,8 @@ module Gitlab
 | 
			
		|||
      regexp = URI::Parser.new.make_regexp(['http', 'https', 'ssh', 'git'])
 | 
			
		||||
 | 
			
		||||
      content.gsub(regexp) { |url| new(url).masked_url }
 | 
			
		||||
    rescue Addressable::URI::InvalidURIError
 | 
			
		||||
      content.gsub(regexp, '')
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    def self.valid?(url)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -45,6 +45,12 @@ describe Gitlab::UrlSanitizer, lib: true do
 | 
			
		|||
 | 
			
		||||
      expect(filtered_content).to include("user@server:project.git")
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    it 'returns an empty string for invalid URLs' do
 | 
			
		||||
      filtered_content = sanitize_url('ssh://')
 | 
			
		||||
 | 
			
		||||
      expect(filtered_content).to include("repository '' not found")
 | 
			
		||||
    end
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  describe '#sanitized_url' do
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue