Extract the `Zip::File::split` code into its own module.

This code is rarely used and may not even be correct according to the
standard. Also this de-clutters the `File` class.
This commit is contained in:
Robert Haines 2021-06-12 10:33:16 +01:00
parent 7df623fb0e
commit bd2f15e4bb
4 changed files with 94 additions and 82 deletions

View File

@ -75,6 +75,7 @@ Style/IfUnlessModifier:
Exclude:
- 'lib/zip/entry.rb'
- 'lib/zip/file.rb'
- 'lib/zip/file_split.rb'
- 'lib/zip/filesystem/dir.rb'
- 'lib/zip/filesystem/file.rb'
- 'lib/zip/pass_thru_decompressor.rb'
@ -100,6 +101,7 @@ Style/NumericPredicate:
- 'lib/zip/extra_field/universal_time.rb'
- 'lib/zip/extra_field/unix.rb'
- 'lib/zip/file.rb'
- 'lib/zip/file_split.rb'
- 'lib/zip/filesystem/file.rb'
- 'lib/zip/input_stream.rb'
- 'lib/zip/ioextras.rb'
@ -114,6 +116,7 @@ Style/OptionalBooleanParameter:
Exclude:
- 'lib/zip/entry.rb'
- 'lib/zip/file.rb'
- 'lib/zip/file_split.rb'
- 'lib/zip/output_stream.rb'
# Offense count: 29

View File

@ -1,5 +1,7 @@
# frozen_string_literal: true
require_relative 'file_split'
module Zip
# ZipFile is modeled after java.util.zip.ZipFile from the Java SDK.
# The most important methods are those inherited from
@ -45,12 +47,9 @@ module Zip
# interface for accessing the filesystem, ie. the File and Dir classes.
class File < CentralDirectory
extend FileSplit
CREATE = true
SPLIT_SIGNATURE = 0x08074b50
ZIP64_EOCD_SIGNATURE = 0x06064b50
MAX_SEGMENT_SIZE = 3_221_225_472
MIN_SEGMENT_SIZE = 65_536
DATA_BUFFER_SIZE = 8192
IO_METHODS = [:tell, :seek, :read, :eof, :close].freeze
attr_reader :name
@ -172,82 +171,6 @@ module Zip
zip_file.each(&block)
end
end
def get_segment_size_for_split(segment_size)
if MIN_SEGMENT_SIZE > segment_size
MIN_SEGMENT_SIZE
elsif MAX_SEGMENT_SIZE < segment_size
MAX_SEGMENT_SIZE
else
segment_size
end
end
def get_partial_zip_file_name(zip_file_name, partial_zip_file_name)
unless partial_zip_file_name.nil?
partial_zip_file_name = zip_file_name.sub(/#{::File.basename(zip_file_name)}\z/,
partial_zip_file_name + ::File.extname(zip_file_name))
end
partial_zip_file_name ||= zip_file_name
partial_zip_file_name
end
def get_segment_count_for_split(zip_file_size, segment_size)
(zip_file_size / segment_size).to_i + (zip_file_size % segment_size == 0 ? 0 : 1)
end
def put_split_signature(szip_file, segment_size)
signature_packed = [SPLIT_SIGNATURE].pack('V')
szip_file << signature_packed
segment_size - signature_packed.size
end
#
# TODO: Make the code more understandable
#
def save_splited_part(zip_file, partial_zip_file_name, zip_file_size, szip_file_index, segment_size, segment_count)
ssegment_size = zip_file_size - zip_file.pos
ssegment_size = segment_size if ssegment_size > segment_size
szip_file_name = "#{partial_zip_file_name}.#{format('%03d', szip_file_index)}"
::File.open(szip_file_name, 'wb') do |szip_file|
if szip_file_index == 1
ssegment_size = put_split_signature(szip_file, segment_size)
end
chunk_bytes = 0
until ssegment_size == chunk_bytes || zip_file.eof?
segment_bytes_left = ssegment_size - chunk_bytes
buffer_size = segment_bytes_left < DATA_BUFFER_SIZE ? segment_bytes_left : DATA_BUFFER_SIZE
chunk = zip_file.read(buffer_size)
chunk_bytes += buffer_size
szip_file << chunk
# Info for track splitting
yield segment_count, szip_file_index, chunk_bytes, ssegment_size if block_given?
end
end
end
# Splits an archive into parts with segment size
def split(zip_file_name, segment_size = MAX_SEGMENT_SIZE, delete_zip_file = true, partial_zip_file_name = nil)
raise Error, "File #{zip_file_name} not found" unless ::File.exist?(zip_file_name)
raise Errno::ENOENT, zip_file_name unless ::File.readable?(zip_file_name)
zip_file_size = ::File.size(zip_file_name)
segment_size = get_segment_size_for_split(segment_size)
return if zip_file_size <= segment_size
segment_count = get_segment_count_for_split(zip_file_size, segment_size)
::Zip::File.open(zip_file_name) {} # Check for correct zip structure.
partial_zip_file_name = get_partial_zip_file_name(zip_file_name, partial_zip_file_name)
szip_file_index = 0
::File.open(zip_file_name, 'rb') do |zip_file|
until zip_file.eof?
szip_file_index += 1
save_splited_part(zip_file, partial_zip_file_name, zip_file_size, szip_file_index, segment_size, segment_count)
end
end
::File.delete(zip_file_name) if delete_zip_file
szip_file_index
end
end
# Returns an input stream to the specified entry. If a block is passed

86
lib/zip/file_split.rb Normal file
View File

@ -0,0 +1,86 @@
# frozen_string_literal: true
module Zip
module FileSplit #:nodoc:
SPLIT_SIGNATURE = 0x08074b50
MAX_SEGMENT_SIZE = 3_221_225_472
MIN_SEGMENT_SIZE = 65_536
DATA_BUFFER_SIZE = 8192
def get_segment_size_for_split(segment_size)
if MIN_SEGMENT_SIZE > segment_size
MIN_SEGMENT_SIZE
elsif MAX_SEGMENT_SIZE < segment_size
MAX_SEGMENT_SIZE
else
segment_size
end
end
def get_partial_zip_file_name(zip_file_name, partial_zip_file_name)
unless partial_zip_file_name.nil?
partial_zip_file_name = zip_file_name.sub(/#{::File.basename(zip_file_name)}\z/,
partial_zip_file_name + ::File.extname(zip_file_name))
end
partial_zip_file_name ||= zip_file_name
partial_zip_file_name
end
def get_segment_count_for_split(zip_file_size, segment_size)
(zip_file_size / segment_size).to_i + (zip_file_size % segment_size == 0 ? 0 : 1)
end
def put_split_signature(szip_file, segment_size)
signature_packed = [SPLIT_SIGNATURE].pack('V')
szip_file << signature_packed
segment_size - signature_packed.size
end
#
# TODO: Make the code more understandable
#
def save_splited_part(zip_file, partial_zip_file_name, zip_file_size, szip_file_index, segment_size, segment_count)
ssegment_size = zip_file_size - zip_file.pos
ssegment_size = segment_size if ssegment_size > segment_size
szip_file_name = "#{partial_zip_file_name}.#{format('%03d', szip_file_index)}"
::File.open(szip_file_name, 'wb') do |szip_file|
if szip_file_index == 1
ssegment_size = put_split_signature(szip_file, segment_size)
end
chunk_bytes = 0
until ssegment_size == chunk_bytes || zip_file.eof?
segment_bytes_left = ssegment_size - chunk_bytes
buffer_size = segment_bytes_left < DATA_BUFFER_SIZE ? segment_bytes_left : DATA_BUFFER_SIZE
chunk = zip_file.read(buffer_size)
chunk_bytes += buffer_size
szip_file << chunk
# Info for track splitting
yield segment_count, szip_file_index, chunk_bytes, ssegment_size if block_given?
end
end
end
# Splits an archive into parts with segment size
def split(zip_file_name, segment_size = MAX_SEGMENT_SIZE, delete_zip_file = true, partial_zip_file_name = nil)
raise Error, "File #{zip_file_name} not found" unless ::File.exist?(zip_file_name)
raise Errno::ENOENT, zip_file_name unless ::File.readable?(zip_file_name)
zip_file_size = ::File.size(zip_file_name)
segment_size = get_segment_size_for_split(segment_size)
return if zip_file_size <= segment_size
segment_count = get_segment_count_for_split(zip_file_size, segment_size)
::Zip::File.open(zip_file_name) {} # Check for correct zip structure.
partial_zip_file_name = get_partial_zip_file_name(zip_file_name, partial_zip_file_name)
szip_file_index = 0
::File.open(zip_file_name, 'rb') do |zip_file|
until zip_file.eof?
szip_file_index += 1
save_splited_part(zip_file, partial_zip_file_name, zip_file_size, szip_file_index, segment_size, segment_count)
end
end
::File.delete(zip_file_name) if delete_zip_file
szip_file_index
end
end
end

View File

@ -33,7 +33,7 @@ class ZipFileSplitTest < MiniTest::Test
Dir["#{TEST_ZIP.zip_name}.*"].sort.each_with_index do |zip_file_name, index|
File.open(zip_file_name, 'rb') do |zip_file|
zip_file.read([::Zip::File::SPLIT_SIGNATURE].pack('V').size) if index == 0
zip_file.read([::Zip::FileSplit::SPLIT_SIGNATURE].pack('V').size) if index == 0
File.open(UNSPLITTED_FILENAME, 'ab') do |file|
file << zip_file.read
end