Merge branch '35942_api_binary_encoding' into 'master'
API fix for non UTF-8 data Closes #35942 See merge request !14038
This commit is contained in:
commit
ba39b26cc2
|
|
@ -0,0 +1,3 @@
|
|||
---
|
||||
title: "Fix API to serve binary diffs that are treated as text."
|
||||
merge_request: 14038
|
||||
|
|
@ -104,7 +104,7 @@ module API
|
|||
|
||||
not_found! 'Commit' unless commit
|
||||
|
||||
commit.raw_diffs.to_a
|
||||
present commit.raw_diffs.to_a, with: Entities::RepoDiff
|
||||
end
|
||||
|
||||
desc "Get a commit's comments" do
|
||||
|
|
|
|||
|
|
@ -291,10 +291,11 @@ module API
|
|||
end
|
||||
|
||||
class RepoDiff < Grape::Entity
|
||||
expose :old_path, :new_path, :a_mode, :b_mode, :diff
|
||||
expose :old_path, :new_path, :a_mode, :b_mode
|
||||
expose :new_file?, as: :new_file
|
||||
expose :renamed_file?, as: :renamed_file
|
||||
expose :deleted_file?, as: :deleted_file
|
||||
expose :json_safe_diff, as: :diff
|
||||
end
|
||||
|
||||
class ProtectedRefAccess < Grape::Entity
|
||||
|
|
|
|||
|
|
@ -22,10 +22,10 @@ module Gitlab
|
|||
|
||||
# return message if message type is binary
|
||||
detect = CharlockHolmes::EncodingDetector.detect(message)
|
||||
return message.force_encoding("BINARY") if detect && detect[:type] == :binary
|
||||
return message.force_encoding("BINARY") if detect_binary?(message, detect)
|
||||
|
||||
# force detected encoding if we have sufficient confidence.
|
||||
if detect && detect[:encoding] && detect[:confidence] > ENCODING_CONFIDENCE_THRESHOLD
|
||||
# force detected encoding if we have sufficient confidence.
|
||||
message.force_encoding(detect[:encoding])
|
||||
end
|
||||
|
||||
|
|
@ -36,6 +36,19 @@ module Gitlab
|
|||
"--broken encoding: #{encoding}"
|
||||
end
|
||||
|
||||
def detect_binary?(data, detect = nil)
|
||||
detect ||= CharlockHolmes::EncodingDetector.detect(data)
|
||||
detect && detect[:type] == :binary && detect[:confidence] == 100
|
||||
end
|
||||
|
||||
def detect_libgit2_binary?(data)
|
||||
# EncodingDetector checks the first 1024 * 1024 bytes for NUL byte, libgit2 checks
|
||||
# only the first 8000 (https://github.com/libgit2/libgit2/blob/2ed855a9e8f9af211e7274021c2264e600c0f86b/src/filter.h#L15),
|
||||
# which is what we use below to keep a consistent behavior.
|
||||
detect = CharlockHolmes::EncodingDetector.new(8000).detect(data)
|
||||
detect && detect[:type] == :binary
|
||||
end
|
||||
|
||||
def encode_utf8(message)
|
||||
detect = CharlockHolmes::EncodingDetector.detect(message)
|
||||
if detect && detect[:encoding]
|
||||
|
|
|
|||
|
|
@ -42,14 +42,6 @@ module Gitlab
|
|||
end
|
||||
end
|
||||
|
||||
def binary?(data)
|
||||
# EncodingDetector checks the first 1024 * 1024 bytes for NUL byte, libgit2 checks
|
||||
# only the first 8000 (https://github.com/libgit2/libgit2/blob/2ed855a9e8f9af211e7274021c2264e600c0f86b/src/filter.h#L15),
|
||||
# which is what we use below to keep a consistent behavior.
|
||||
detect = CharlockHolmes::EncodingDetector.new(8000).detect(data)
|
||||
detect && detect[:type] == :binary
|
||||
end
|
||||
|
||||
# Returns an array of Blob instances, specified in blob_references as
|
||||
# [[commit_sha, path], [commit_sha, path], ...]. If blob_size_limit < 0 then the
|
||||
# full blob contents are returned. If blob_size_limit >= 0 then each blob will
|
||||
|
|
@ -65,6 +57,10 @@ module Gitlab
|
|||
end
|
||||
end
|
||||
|
||||
def binary?(data)
|
||||
EncodingHelper.detect_libgit2_binary?(data)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
# Recursive search of blob id by path
|
||||
|
|
|
|||
|
|
@ -116,6 +116,15 @@ module Gitlab
|
|||
|
||||
filtered_opts
|
||||
end
|
||||
|
||||
# Return a binary diff message like:
|
||||
#
|
||||
# "Binary files a/file/path and b/file/path differ\n"
|
||||
# This is used when we detect that a diff is binary
|
||||
# using CharlockHolmes when Rugged treats it as text.
|
||||
def binary_message(old_path, new_path)
|
||||
"Binary files #{old_path} and #{new_path} differ\n"
|
||||
end
|
||||
end
|
||||
|
||||
def initialize(raw_diff, expanded: true)
|
||||
|
|
@ -190,6 +199,13 @@ module Gitlab
|
|||
@collapsed = true
|
||||
end
|
||||
|
||||
def json_safe_diff
|
||||
return @diff unless detect_binary?(@diff)
|
||||
|
||||
# the diff is binary, let's make a message for it
|
||||
Diff.binary_message(@old_path, @new_path)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def init_from_rugged(rugged)
|
||||
|
|
|
|||
|
|
@ -273,6 +273,25 @@ EOT
|
|||
end
|
||||
end
|
||||
|
||||
describe '#json_safe_diff' do
|
||||
let(:project) { create(:project, :repository) }
|
||||
|
||||
it 'fake binary message when it detects binary' do
|
||||
# Rugged will not detect this as binary, but we can fake it
|
||||
diff_message = "Binary files files/images/icn-time-tracking.pdf and files/images/icn-time-tracking.pdf differ\n"
|
||||
binary_diff = described_class.between(project.repository, 'add-pdf-text-binary', 'add-pdf-text-binary^').first
|
||||
|
||||
expect(binary_diff.diff).not_to be_empty
|
||||
expect(binary_diff.json_safe_diff).to eq(diff_message)
|
||||
end
|
||||
|
||||
it 'leave non-binary diffs as-is' do
|
||||
diff = described_class.new(@rugged_diff)
|
||||
|
||||
expect(diff.json_safe_diff).to eq(diff.diff)
|
||||
end
|
||||
end
|
||||
|
||||
describe '#submodule?' do
|
||||
before do
|
||||
commit = repository.lookup('5937ac0a7beb003549fc5fd26fc247adbce4a52e')
|
||||
|
|
|
|||
|
|
@ -673,6 +673,12 @@ describe API::Commits do
|
|||
it_behaves_like 'ref diff'
|
||||
end
|
||||
end
|
||||
|
||||
context 'when binary diff are treated as text' do
|
||||
let(:commit_id) { TestEnv::BRANCH_SHA['add-pdf-text-binary'] }
|
||||
|
||||
it_behaves_like 'ref diff'
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue