343 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Ruby
		
	
	
	
			
		
		
	
	
			343 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Ruby
		
	
	
	
| # frozen_string_literal: true
 | |
| 
 | |
| module Gitlab
 | |
|   module LegacyGithubImport
 | |
|     class Importer
 | |
|       include Gitlab::ShellAdapter
 | |
| 
 | |
|       def self.refmap
 | |
|         Gitlab::GithubImport.refmap
 | |
|       end
 | |
| 
 | |
|       attr_reader :errors, :project, :repo, :repo_url
 | |
| 
 | |
|       def initialize(project)
 | |
|         @project  = project
 | |
|         @repo     = project.import_source
 | |
|         @repo_url = project.import_url
 | |
|         @errors   = []
 | |
|         @labels   = {}
 | |
|       end
 | |
| 
 | |
|       def client
 | |
|         return @client if defined?(@client)
 | |
| 
 | |
|         unless credentials
 | |
|           raise Projects::ImportService::Error,
 | |
|                 "Unable to find project import data credentials for project ID: #{@project.id}"
 | |
|         end
 | |
| 
 | |
|         opts = {}
 | |
|         # Gitea plan to be GitHub compliant
 | |
|         if project.gitea_import?
 | |
|           uri = URI.parse(project.import_url)
 | |
|           host = "#{uri.scheme}://#{uri.host}:#{uri.port}#{uri.path}".sub(%r{/?[\w-]+/[\w-]+\.git\z}, '')
 | |
|           opts = {
 | |
|             host: host,
 | |
|             api_version: 'v1'
 | |
|           }
 | |
|         end
 | |
| 
 | |
|         @client = Client.new(credentials[:user], opts)
 | |
|       end
 | |
| 
 | |
|       def execute
 | |
|         # The ordering of importing is important here due to the way GitHub structures their data
 | |
|         # 1. Labels are required by other items while not having a dependency on anything else
 | |
|         # so need to be first
 | |
|         # 2. Pull requests must come before issues. Every pull request is also an issue but not
 | |
|         # all issues are pull requests. Only the issue entity has labels defined in GitHub. GitLab
 | |
|         # doesn't structure data like this so we need to make sure that we've created the MRs
 | |
|         # before we attempt to add the labels defined in the GitHub issue for the related, already
 | |
|         # imported, pull request
 | |
|         import_labels
 | |
|         import_milestones
 | |
|         import_pull_requests
 | |
|         import_issues
 | |
|         import_comments(:issues)
 | |
|         import_comments(:pull_requests)
 | |
|         import_wiki
 | |
| 
 | |
|         # Gitea doesn't have a Release API yet
 | |
|         # See https://github.com/go-gitea/gitea/issues/330
 | |
|         unless project.gitea_import?
 | |
|           import_releases
 | |
|         end
 | |
| 
 | |
|         handle_errors
 | |
| 
 | |
|         true
 | |
|       end
 | |
| 
 | |
|       private
 | |
| 
 | |
|       def credentials
 | |
|         return @credentials if defined?(@credentials)
 | |
| 
 | |
|         @credentials = project.import_data ? project.import_data.credentials : nil
 | |
|       end
 | |
| 
 | |
|       def handle_errors
 | |
|         return unless errors.any?
 | |
| 
 | |
|         project.import_state.update_column(:last_error, {
 | |
|           message: 'The remote data could not be fully imported.',
 | |
|           errors: errors
 | |
|         }.to_json)
 | |
|       end
 | |
| 
 | |
|       def import_labels
 | |
|         fetch_resources(:labels, repo, per_page: 100) do |labels|
 | |
|           labels.each do |raw|
 | |
|             begin
 | |
|               gh_label = LabelFormatter.new(project, raw)
 | |
|               gh_label.create!
 | |
|             rescue => e
 | |
|               errors << { type: :label, url: Gitlab::UrlSanitizer.sanitize(gh_label.url), errors: e.message }
 | |
|             end
 | |
|           end
 | |
|         end
 | |
| 
 | |
|         cache_labels!
 | |
|       end
 | |
| 
 | |
|       def import_milestones
 | |
|         fetch_resources(:milestones, repo, state: :all, per_page: 100) do |milestones|
 | |
|           milestones.each do |raw|
 | |
|             begin
 | |
|               gh_milestone = MilestoneFormatter.new(project, raw)
 | |
|               gh_milestone.create!
 | |
|             rescue => e
 | |
|               errors << { type: :milestone, url: Gitlab::UrlSanitizer.sanitize(gh_milestone.url), errors: e.message }
 | |
|             end
 | |
|           end
 | |
|         end
 | |
|       end
 | |
| 
 | |
|       # rubocop: disable CodeReuse/ActiveRecord
 | |
|       def import_issues
 | |
|         fetch_resources(:issues, repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |issues|
 | |
|           issues.each do |raw|
 | |
|             gh_issue = IssueFormatter.new(project, raw, client)
 | |
| 
 | |
|             begin
 | |
|               issuable =
 | |
|                 if gh_issue.pull_request?
 | |
|                   MergeRequest.find_by(target_project_id: project.id, iid: gh_issue.number)
 | |
|                 else
 | |
|                   gh_issue.create!
 | |
|                 end
 | |
| 
 | |
|               apply_labels(issuable, raw)
 | |
|             rescue => e
 | |
|               errors << { type: :issue, url: Gitlab::UrlSanitizer.sanitize(gh_issue.url), errors: e.message }
 | |
|             end
 | |
|           end
 | |
|         end
 | |
|       end
 | |
|       # rubocop: enable CodeReuse/ActiveRecord
 | |
| 
 | |
|       def import_pull_requests
 | |
|         fetch_resources(:pull_requests, repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |pull_requests|
 | |
|           pull_requests.each do |raw|
 | |
|             gh_pull_request = PullRequestFormatter.new(project, raw, client)
 | |
| 
 | |
|             next unless gh_pull_request.valid?
 | |
| 
 | |
|             begin
 | |
|               restore_source_branch(gh_pull_request) unless gh_pull_request.source_branch_exists?
 | |
|               restore_target_branch(gh_pull_request) unless gh_pull_request.target_branch_exists?
 | |
| 
 | |
|               merge_request = gh_pull_request.create!
 | |
| 
 | |
|               # Gitea doesn't return PR in the Issue API endpoint, so labels must be assigned at this stage
 | |
|               if project.gitea_import?
 | |
|                 apply_labels(merge_request, raw)
 | |
|               end
 | |
|             rescue => e
 | |
|               errors << { type: :pull_request, url: Gitlab::UrlSanitizer.sanitize(gh_pull_request.url), errors: e.message }
 | |
|             ensure
 | |
|               clean_up_restored_branches(gh_pull_request)
 | |
|             end
 | |
|           end
 | |
|         end
 | |
| 
 | |
|         project.repository.after_remove_branch
 | |
|       end
 | |
| 
 | |
|       def restore_source_branch(pull_request)
 | |
|         project.repository.create_branch(pull_request.source_branch_name, pull_request.source_branch_sha)
 | |
|       end
 | |
| 
 | |
|       def restore_target_branch(pull_request)
 | |
|         project.repository.create_branch(pull_request.target_branch_name, pull_request.target_branch_sha)
 | |
|       end
 | |
| 
 | |
|       def remove_branch(name)
 | |
|         project.repository.delete_branch(name)
 | |
|       rescue Gitlab::Git::Repository::DeleteBranchFailed
 | |
|         errors << { type: :remove_branch, name: name }
 | |
|       end
 | |
| 
 | |
|       def clean_up_restored_branches(pull_request)
 | |
|         return if pull_request.opened?
 | |
| 
 | |
|         remove_branch(pull_request.source_branch_name) unless pull_request.source_branch_exists?
 | |
|         remove_branch(pull_request.target_branch_name) unless pull_request.target_branch_exists?
 | |
|       end
 | |
| 
 | |
|       def apply_labels(issuable, raw)
 | |
|         return unless raw.labels.count > 0
 | |
| 
 | |
|         label_ids = raw.labels
 | |
|           .map { |attrs| @labels[attrs.name] }
 | |
|           .compact
 | |
| 
 | |
|         issuable.update_attribute(:label_ids, label_ids)
 | |
|       end
 | |
| 
 | |
|       # rubocop: disable CodeReuse/ActiveRecord
 | |
|       def import_comments(issuable_type)
 | |
|         resource_type = "#{issuable_type}_comments".to_sym
 | |
| 
 | |
|         # Two notes here:
 | |
|         # 1. We don't have a distinctive attribute for comments (unlike issues iid), so we fetch the last inserted note,
 | |
|         # compare it against every comment in the current imported page until we find match, and that's where start importing
 | |
|         # 2. GH returns comments for _both_ issues and PRs through issues_comments API, while pull_requests_comments returns
 | |
|         # only comments on diffs, so select last note not based on noteable_type but on line_code
 | |
|         line_code_is = issuable_type == :pull_requests ? 'NOT NULL' : 'NULL'
 | |
|         last_note    = project.notes.where("line_code IS #{line_code_is}").last
 | |
| 
 | |
|         fetch_resources(resource_type, repo, per_page: 100) do |comments|
 | |
|           if last_note
 | |
|             discard_inserted_comments(comments, last_note)
 | |
|             last_note = nil
 | |
|           end
 | |
| 
 | |
|           create_comments(comments)
 | |
|         end
 | |
|       end
 | |
|       # rubocop: enable CodeReuse/ActiveRecord
 | |
| 
 | |
|       # rubocop: disable CodeReuse/ActiveRecord
 | |
|       def create_comments(comments)
 | |
|         ActiveRecord::Base.no_touching do
 | |
|           comments.each do |raw|
 | |
|             begin
 | |
|               comment = CommentFormatter.new(project, raw, client)
 | |
| 
 | |
|               # GH does not return info about comment's parent, so we guess it by checking its URL!
 | |
|               *_, parent, iid = URI(raw.html_url).path.split('/')
 | |
| 
 | |
|               issuable = if parent == 'issues'
 | |
|                            Issue.find_by(project_id: project.id, iid: iid)
 | |
|                          else
 | |
|                            MergeRequest.find_by(target_project_id: project.id, iid: iid)
 | |
|                          end
 | |
| 
 | |
|               next unless issuable
 | |
| 
 | |
|               issuable.notes.create!(comment.attributes)
 | |
|             rescue => e
 | |
|               errors << { type: :comment, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }
 | |
|             end
 | |
|           end
 | |
|         end
 | |
|       end
 | |
|       # rubocop: enable CodeReuse/ActiveRecord
 | |
| 
 | |
|       def discard_inserted_comments(comments, last_note)
 | |
|         last_note_attrs = nil
 | |
| 
 | |
|         cut_off_index = comments.find_index do |raw|
 | |
|           comment           = CommentFormatter.new(project, raw)
 | |
|           comment_attrs     = comment.attributes
 | |
|           last_note_attrs ||= last_note.slice(*comment_attrs.keys)
 | |
| 
 | |
|           comment_attrs.with_indifferent_access == last_note_attrs
 | |
|         end
 | |
| 
 | |
|         # No matching resource in the collection, which means we got halted right on the end of the last page, so all good
 | |
|         return unless cut_off_index
 | |
| 
 | |
|         # Otherwise, remove the resources we've already inserted
 | |
|         comments.shift(cut_off_index + 1)
 | |
|       end
 | |
| 
 | |
|       def import_wiki
 | |
|         unless project.wiki.repository_exists?
 | |
|           wiki = WikiFormatter.new(project)
 | |
|           gitlab_shell.import_wiki_repository(project, wiki)
 | |
|         end
 | |
|       rescue Gitlab::Shell::Error => e
 | |
|         # GitHub error message when the wiki repo has not been created,
 | |
|         # this means that repo has wiki enabled, but have no pages. So,
 | |
|         # we can skip the import.
 | |
|         if e.message !~ /repository not exported/
 | |
|           errors << { type: :wiki, errors: e.message }
 | |
|         end
 | |
|       end
 | |
| 
 | |
|       def import_releases
 | |
|         fetch_resources(:releases, repo, per_page: 100) do |releases|
 | |
|           releases.each do |raw|
 | |
|             begin
 | |
|               gh_release = ReleaseFormatter.new(project, raw)
 | |
|               gh_release.create! if gh_release.valid?
 | |
|             rescue => e
 | |
|               errors << { type: :release, url: Gitlab::UrlSanitizer.sanitize(gh_release.url), errors: e.message }
 | |
|             end
 | |
|           end
 | |
|         end
 | |
|       end
 | |
| 
 | |
|       def cache_labels!
 | |
|         project.labels.select(:id, :title).find_each do |label|
 | |
|           @labels[label.title] = label.id
 | |
|         end
 | |
|       end
 | |
| 
 | |
|       def fetch_resources(resource_type, *opts)
 | |
|         return if imported?(resource_type)
 | |
| 
 | |
|         opts.last[:page] = current_page(resource_type)
 | |
| 
 | |
|         client.public_send(resource_type, *opts) do |resources| # rubocop:disable GitlabSecurity/PublicSend
 | |
|           yield resources
 | |
|           increment_page(resource_type)
 | |
|         end
 | |
| 
 | |
|         imported!(resource_type)
 | |
|       end
 | |
| 
 | |
|       def imported?(resource_type)
 | |
|         Rails.cache.read("#{cache_key_prefix}:#{resource_type}:imported")
 | |
|       end
 | |
| 
 | |
|       def imported!(resource_type)
 | |
|         Rails.cache.write("#{cache_key_prefix}:#{resource_type}:imported", true, ex: 1.day)
 | |
|       end
 | |
| 
 | |
|       def increment_page(resource_type)
 | |
|         key = "#{cache_key_prefix}:#{resource_type}:current-page"
 | |
| 
 | |
|         # Rails.cache.increment calls INCRBY directly on the value stored under the key, which is
 | |
|         # a serialized ActiveSupport::Cache::Entry, so it will return an error by Redis, hence this ugly work-around
 | |
|         page = Rails.cache.read(key)
 | |
|         page += 1
 | |
|         Rails.cache.write(key, page)
 | |
| 
 | |
|         page
 | |
|       end
 | |
| 
 | |
|       def current_page(resource_type)
 | |
|         Rails.cache.fetch("#{cache_key_prefix}:#{resource_type}:current-page", ex: 1.day) { 1 }
 | |
|       end
 | |
| 
 | |
|       def cache_key_prefix
 | |
|         @cache_key_prefix ||= "github-import:#{project.id}"
 | |
|       end
 | |
|     end
 | |
|   end
 | |
| end
 |