From bd2f15e4bb1211c58bcf29f919fa81ef044e14a7 Mon Sep 17 00:00:00 2001 From: Robert Haines Date: Sat, 12 Jun 2021 10:33:16 +0100 Subject: [PATCH] Extract the `Zip::File::split` code into its own module. This code is rarely used and may not even be correct according to the standard. Also this de-clutters the `File` class. --- .rubocop_todo.yml | 3 ++ lib/zip/file.rb | 85 ++-------------------------------------- lib/zip/file_split.rb | 86 +++++++++++++++++++++++++++++++++++++++++ test/file_split_test.rb | 2 +- 4 files changed, 94 insertions(+), 82 deletions(-) create mode 100644 lib/zip/file_split.rb diff --git a/.rubocop_todo.yml b/.rubocop_todo.yml index aafa7fe..4e443b4 100644 --- a/.rubocop_todo.yml +++ b/.rubocop_todo.yml @@ -75,6 +75,7 @@ Style/IfUnlessModifier: Exclude: - 'lib/zip/entry.rb' - 'lib/zip/file.rb' + - 'lib/zip/file_split.rb' - 'lib/zip/filesystem/dir.rb' - 'lib/zip/filesystem/file.rb' - 'lib/zip/pass_thru_decompressor.rb' @@ -100,6 +101,7 @@ Style/NumericPredicate: - 'lib/zip/extra_field/universal_time.rb' - 'lib/zip/extra_field/unix.rb' - 'lib/zip/file.rb' + - 'lib/zip/file_split.rb' - 'lib/zip/filesystem/file.rb' - 'lib/zip/input_stream.rb' - 'lib/zip/ioextras.rb' @@ -114,6 +116,7 @@ Style/OptionalBooleanParameter: Exclude: - 'lib/zip/entry.rb' - 'lib/zip/file.rb' + - 'lib/zip/file_split.rb' - 'lib/zip/output_stream.rb' # Offense count: 29 diff --git a/lib/zip/file.rb b/lib/zip/file.rb index 6286778..6665698 100644 --- a/lib/zip/file.rb +++ b/lib/zip/file.rb @@ -1,5 +1,7 @@ # frozen_string_literal: true +require_relative 'file_split' + module Zip # ZipFile is modeled after java.util.zip.ZipFile from the Java SDK. # The most important methods are those inherited from @@ -45,12 +47,9 @@ module Zip # interface for accessing the filesystem, ie. the File and Dir classes. class File < CentralDirectory + extend FileSplit + CREATE = true - SPLIT_SIGNATURE = 0x08074b50 - ZIP64_EOCD_SIGNATURE = 0x06064b50 - MAX_SEGMENT_SIZE = 3_221_225_472 - MIN_SEGMENT_SIZE = 65_536 - DATA_BUFFER_SIZE = 8192 IO_METHODS = [:tell, :seek, :read, :eof, :close].freeze attr_reader :name @@ -172,82 +171,6 @@ module Zip zip_file.each(&block) end end - - def get_segment_size_for_split(segment_size) - if MIN_SEGMENT_SIZE > segment_size - MIN_SEGMENT_SIZE - elsif MAX_SEGMENT_SIZE < segment_size - MAX_SEGMENT_SIZE - else - segment_size - end - end - - def get_partial_zip_file_name(zip_file_name, partial_zip_file_name) - unless partial_zip_file_name.nil? - partial_zip_file_name = zip_file_name.sub(/#{::File.basename(zip_file_name)}\z/, - partial_zip_file_name + ::File.extname(zip_file_name)) - end - partial_zip_file_name ||= zip_file_name - partial_zip_file_name - end - - def get_segment_count_for_split(zip_file_size, segment_size) - (zip_file_size / segment_size).to_i + (zip_file_size % segment_size == 0 ? 0 : 1) - end - - def put_split_signature(szip_file, segment_size) - signature_packed = [SPLIT_SIGNATURE].pack('V') - szip_file << signature_packed - segment_size - signature_packed.size - end - - # - # TODO: Make the code more understandable - # - def save_splited_part(zip_file, partial_zip_file_name, zip_file_size, szip_file_index, segment_size, segment_count) - ssegment_size = zip_file_size - zip_file.pos - ssegment_size = segment_size if ssegment_size > segment_size - szip_file_name = "#{partial_zip_file_name}.#{format('%03d', szip_file_index)}" - ::File.open(szip_file_name, 'wb') do |szip_file| - if szip_file_index == 1 - ssegment_size = put_split_signature(szip_file, segment_size) - end - chunk_bytes = 0 - until ssegment_size == chunk_bytes || zip_file.eof? - segment_bytes_left = ssegment_size - chunk_bytes - buffer_size = segment_bytes_left < DATA_BUFFER_SIZE ? segment_bytes_left : DATA_BUFFER_SIZE - chunk = zip_file.read(buffer_size) - chunk_bytes += buffer_size - szip_file << chunk - # Info for track splitting - yield segment_count, szip_file_index, chunk_bytes, ssegment_size if block_given? - end - end - end - - # Splits an archive into parts with segment size - def split(zip_file_name, segment_size = MAX_SEGMENT_SIZE, delete_zip_file = true, partial_zip_file_name = nil) - raise Error, "File #{zip_file_name} not found" unless ::File.exist?(zip_file_name) - raise Errno::ENOENT, zip_file_name unless ::File.readable?(zip_file_name) - - zip_file_size = ::File.size(zip_file_name) - segment_size = get_segment_size_for_split(segment_size) - return if zip_file_size <= segment_size - - segment_count = get_segment_count_for_split(zip_file_size, segment_size) - ::Zip::File.open(zip_file_name) {} # Check for correct zip structure. - partial_zip_file_name = get_partial_zip_file_name(zip_file_name, partial_zip_file_name) - szip_file_index = 0 - ::File.open(zip_file_name, 'rb') do |zip_file| - until zip_file.eof? - szip_file_index += 1 - save_splited_part(zip_file, partial_zip_file_name, zip_file_size, szip_file_index, segment_size, segment_count) - end - end - ::File.delete(zip_file_name) if delete_zip_file - szip_file_index - end end # Returns an input stream to the specified entry. If a block is passed diff --git a/lib/zip/file_split.rb b/lib/zip/file_split.rb new file mode 100644 index 0000000..5af3514 --- /dev/null +++ b/lib/zip/file_split.rb @@ -0,0 +1,86 @@ +# frozen_string_literal: true + +module Zip + module FileSplit #:nodoc: + SPLIT_SIGNATURE = 0x08074b50 + MAX_SEGMENT_SIZE = 3_221_225_472 + MIN_SEGMENT_SIZE = 65_536 + DATA_BUFFER_SIZE = 8192 + + def get_segment_size_for_split(segment_size) + if MIN_SEGMENT_SIZE > segment_size + MIN_SEGMENT_SIZE + elsif MAX_SEGMENT_SIZE < segment_size + MAX_SEGMENT_SIZE + else + segment_size + end + end + + def get_partial_zip_file_name(zip_file_name, partial_zip_file_name) + unless partial_zip_file_name.nil? + partial_zip_file_name = zip_file_name.sub(/#{::File.basename(zip_file_name)}\z/, + partial_zip_file_name + ::File.extname(zip_file_name)) + end + partial_zip_file_name ||= zip_file_name + partial_zip_file_name + end + + def get_segment_count_for_split(zip_file_size, segment_size) + (zip_file_size / segment_size).to_i + (zip_file_size % segment_size == 0 ? 0 : 1) + end + + def put_split_signature(szip_file, segment_size) + signature_packed = [SPLIT_SIGNATURE].pack('V') + szip_file << signature_packed + segment_size - signature_packed.size + end + + # + # TODO: Make the code more understandable + # + def save_splited_part(zip_file, partial_zip_file_name, zip_file_size, szip_file_index, segment_size, segment_count) + ssegment_size = zip_file_size - zip_file.pos + ssegment_size = segment_size if ssegment_size > segment_size + szip_file_name = "#{partial_zip_file_name}.#{format('%03d', szip_file_index)}" + ::File.open(szip_file_name, 'wb') do |szip_file| + if szip_file_index == 1 + ssegment_size = put_split_signature(szip_file, segment_size) + end + chunk_bytes = 0 + until ssegment_size == chunk_bytes || zip_file.eof? + segment_bytes_left = ssegment_size - chunk_bytes + buffer_size = segment_bytes_left < DATA_BUFFER_SIZE ? segment_bytes_left : DATA_BUFFER_SIZE + chunk = zip_file.read(buffer_size) + chunk_bytes += buffer_size + szip_file << chunk + # Info for track splitting + yield segment_count, szip_file_index, chunk_bytes, ssegment_size if block_given? + end + end + end + + # Splits an archive into parts with segment size + def split(zip_file_name, segment_size = MAX_SEGMENT_SIZE, delete_zip_file = true, partial_zip_file_name = nil) + raise Error, "File #{zip_file_name} not found" unless ::File.exist?(zip_file_name) + raise Errno::ENOENT, zip_file_name unless ::File.readable?(zip_file_name) + + zip_file_size = ::File.size(zip_file_name) + segment_size = get_segment_size_for_split(segment_size) + return if zip_file_size <= segment_size + + segment_count = get_segment_count_for_split(zip_file_size, segment_size) + ::Zip::File.open(zip_file_name) {} # Check for correct zip structure. + partial_zip_file_name = get_partial_zip_file_name(zip_file_name, partial_zip_file_name) + szip_file_index = 0 + ::File.open(zip_file_name, 'rb') do |zip_file| + until zip_file.eof? + szip_file_index += 1 + save_splited_part(zip_file, partial_zip_file_name, zip_file_size, szip_file_index, segment_size, segment_count) + end + end + ::File.delete(zip_file_name) if delete_zip_file + szip_file_index + end + end +end diff --git a/test/file_split_test.rb b/test/file_split_test.rb index 50fc4a4..fbe7bff 100644 --- a/test/file_split_test.rb +++ b/test/file_split_test.rb @@ -33,7 +33,7 @@ class ZipFileSplitTest < MiniTest::Test Dir["#{TEST_ZIP.zip_name}.*"].sort.each_with_index do |zip_file_name, index| File.open(zip_file_name, 'rb') do |zip_file| - zip_file.read([::Zip::File::SPLIT_SIGNATURE].pack('V').size) if index == 0 + zip_file.read([::Zip::FileSplit::SPLIT_SIGNATURE].pack('V').size) if index == 0 File.open(UNSPLITTED_FILENAME, 'ab') do |file| file << zip_file.read end