rubyzip/lib/zip/file.rb

421 lines
14 KiB
Ruby
Raw Normal View History

module Zip
# ZipFile is modeled after java.util.zip.ZipFile from the Java SDK.
# The most important methods are those inherited from
# ZipCentralDirectory for accessing information about the entries in
# the archive and methods such as get_input_stream and
# get_output_stream for reading from and writing entries to the
# archive. The class includes a few convenience methods such as
# #extract for extracting entries to the filesystem, and #remove,
# #replace, #rename and #mkdir for making simple modifications to
# the archive.
#
# Modifications to a zip archive are not committed until #commit or
# #close is called. The method #open accepts a block following
# the pattern from File.open offering a simple way to
# automatically close the archive when the block returns.
#
# The following example opens zip archive <code>my.zip</code>
# (creating it if it doesn't exist) and adds an entry
# <code>first.txt</code> and a directory entry <code>a_dir</code>
# to it.
#
2013-08-27 04:26:14 +08:00
# require 'zip'
#
2013-08-27 04:26:14 +08:00
# Zip::File.open("my.zip", Zip::File::CREATE) {
# |zipfile|
# zipfile.get_output_stream("first.txt") { |f| f.puts "Hello from ZipFile" }
# zipfile.mkdir("a_dir")
# }
#
# The next example reopens <code>my.zip</code> writes the contents of
# <code>first.txt</code> to standard out and deletes the entry from
# the archive.
#
2013-08-27 04:26:14 +08:00
# require 'zip'
#
2013-08-27 04:26:14 +08:00
# Zip::File.open("my.zip", Zip::File::CREATE) {
# |zipfile|
# puts zipfile.read("first.txt")
# zipfile.remove("first.txt")
# }
#
# ZipFileSystem offers an alternative API that emulates ruby's
# interface for accessing the filesystem, ie. the File and Dir classes.
class File < CentralDirectory
2013-06-03 18:36:06 +08:00
CREATE = 1
SPLIT_SIGNATURE = 0x08074b50
2013-08-27 04:26:14 +08:00
ZIP64_EOCD_SIGNATURE = 0x06064b50
MAX_SEGMENT_SIZE = 3221225472
MIN_SEGMENT_SIZE = 65536
DATA_BUFFER_SIZE = 8192
attr_reader :name
# default -> false
attr_accessor :restore_ownership
# default -> false
attr_accessor :restore_permissions
# default -> true
attr_accessor :restore_times
2013-08-27 04:26:14 +08:00
# Returns the zip files comment, if it has one
attr_accessor :comment
# Opens a zip archive. Pass true as the second parameter to create
# a new archive if it doesn't exist already.
def initialize(fileName, create = nil, buffer = false)
super()
2013-06-03 18:36:06 +08:00
@name = fileName
2013-08-27 04:26:14 +08:00
@comment = ''
@create = create
2012-02-14 06:03:34 +08:00
case
2013-06-03 18:36:06 +08:00
when ::File.exists?(fileName) && !buffer
@create = nil
2013-08-27 04:26:14 +08:00
::File.open(name, 'rb') do |f|
2013-06-03 18:36:06 +08:00
read_from_stream(f)
end
when create
@entry_set = EntrySet.new
else
raise ZipError, "File #{fileName} not found"
end
2013-06-03 18:36:06 +08:00
@storedEntries = @entry_set.dup
@storedComment = @comment
@restore_ownership = false
@restore_permissions = false
2013-06-03 18:36:06 +08:00
@restore_times = true
end
2012-02-14 00:55:08 +08:00
class << self
# Same as #new. If a block is passed the ZipFile object is passed
# to the block and is automatically closed afterwards just as with
# ruby's builtin File.open method.
def open(fileName, create = nil)
2013-06-03 18:36:06 +08:00
zf = ::Zip::File.new(fileName, create)
2012-02-14 00:55:08 +08:00
if block_given?
begin
yield zf
ensure
zf.close
end
else
zf
2011-11-16 23:11:18 +08:00
end
end
2012-02-14 00:55:08 +08:00
# Same as #open. But outputs data to a buffer instead of a file
def add_buffer
2013-06-03 18:36:06 +08:00
zf = ::Zip::File.new('', true, true)
yield zf
2012-02-14 00:55:08 +08:00
zf.write_buffer
end
2012-05-21 17:21:12 +08:00
# Like #open, but reads zip archive contents from a String or open IO
# stream, and outputs data to a buffer.
# (This can be used to extract data from a
# downloaded zip archive without first saving it to disk.)
def open_buffer(io)
unless io.is_a?(IO) || io.is_a?(String)
2013-08-27 04:26:14 +08:00
raise "Zip::ZipFile.open_buffer expects an argument of class String or IO. Found: #{io.class}"
end
2013-06-03 18:36:06 +08:00
zf = ::Zip::File.new('', true, true)
2013-09-07 00:20:06 +08:00
if io.is_a?(::String)
2012-05-21 17:21:12 +08:00
require 'stringio'
2013-08-27 04:26:14 +08:00
io = ::StringIO.new(io)
2012-05-21 17:21:12 +08:00
end
2013-08-27 04:26:14 +08:00
zf.read_from_stream(io)
2012-05-21 17:21:12 +08:00
yield zf
zf.write_buffer
end
2012-02-14 00:55:08 +08:00
# Iterates over the contents of the ZipFile. This is more efficient
# than using a ZipInputStream since this methods simply iterates
# through the entries in the central directory structure in the archive
# whereas ZipInputStream jumps through the entire archive accessing the
# local entry headers (which contain the same information as the
# central directory).
def foreach(aZipFileName, &block)
open(aZipFileName) do |zipFile|
zipFile.each(&block)
end
end
2013-06-03 18:36:06 +08:00
def get_segment_size_for_split(segment_size)
case
when MIN_SEGMENT_SIZE > segment_size
MIN_SEGMENT_SIZE
when MAX_SEGMENT_SIZE < segment_size
MAX_SEGMENT_SIZE
else
segment_size
end
end
def get_partial_zip_file_name(zip_file_name, partial_zip_file_name)
partial_zip_file_name = zip_file_name.sub(/#{::File.basename(zip_file_name)}\z/,
partial_zip_file_name + ::File.extname(zip_file_name)) unless partial_zip_file_name.nil?
partial_zip_file_name ||= zip_file_name
partial_zip_file_name
end
def get_segment_count_for_split(zip_file_size, segment_size)
(zip_file_size / segment_size).to_i + (zip_file_size % segment_size == 0 ? 0 : 1)
end
2013-07-01 05:11:45 +08:00
def put_split_signature(szip_file, segment_size)
signature_packed = [SPLIT_SIGNATURE].pack('V')
szip_file << signature_packed
segment_size - signature_packed.size
end
2013-06-03 18:36:06 +08:00
#
# TODO: Make the code more understandable
#
def save_splited_part(zip_file, partial_zip_file_name, zip_file_size, szip_file_index, segment_size, segment_count)
2013-07-01 05:11:45 +08:00
ssegment_size = zip_file_size - zip_file.pos
ssegment_size = segment_size if ssegment_size > segment_size
szip_file_name = "#{partial_zip_file_name}.#{'%03d'%(szip_file_index)}"
2013-06-03 18:36:06 +08:00
::File.open(szip_file_name, 'wb') do |szip_file|
if szip_file_index == 1
2013-07-01 05:11:45 +08:00
ssegment_size = put_split_signature(szip_file, segment_size)
2013-06-03 18:36:06 +08:00
end
chunk_bytes = 0
until ssegment_size == chunk_bytes || zip_file.eof?
segment_bytes_left = ssegment_size - chunk_bytes
buffer_size = segment_bytes_left < DATA_BUFFER_SIZE ? segment_bytes_left : DATA_BUFFER_SIZE
2013-07-01 05:11:45 +08:00
chunk = zip_file.read(buffer_size)
chunk_bytes += buffer_size
2013-06-03 18:36:06 +08:00
szip_file << chunk
# Info for track splitting
yield segment_count, szip_file_index, chunk_bytes, ssegment_size if block_given?
end
end
end
# Splits an archive into parts with segment size
2013-06-03 18:36:06 +08:00
def split(zip_file_name, segment_size = MAX_SEGMENT_SIZE, delete_zip_file = true, partial_zip_file_name = nil)
raise ZipError, "File #{zip_file_name} not found" unless ::File.exists?(zip_file_name)
raise Errno::ENOENT, zip_file_name unless ::File.readable?(zip_file_name)
zip_file_size = ::File.size(zip_file_name)
2013-07-01 05:11:45 +08:00
segment_size = get_segment_size_for_split(segment_size)
2013-06-03 18:36:06 +08:00
return if zip_file_size <= segment_size
segment_count = get_segment_count_for_split(zip_file_size, segment_size)
# Checking for correct zip structure
self.open(zip_file_name) {}
partial_zip_file_name = get_partial_zip_file_name(zip_file_name, partial_zip_file_name)
2013-07-01 05:11:45 +08:00
szip_file_index = 0
::File.open(zip_file_name, 'rb') do |zip_file|
until zip_file.eof?
szip_file_index += 1
2013-06-03 18:36:06 +08:00
save_splited_part(zip_file, partial_zip_file_name, zip_file_size, szip_file_index, segment_size, segment_count)
end
end
::File.delete(zip_file_name) if delete_zip_file
szip_file_index
end
end
# Returns an input stream to the specified entry. If a block is passed
# the stream object is passed to the block and the stream is automatically
# closed afterwards just as with ruby's builtin File.open method.
def get_input_stream(entry, &aProc)
get_entry(entry).get_input_stream(&aProc)
end
# Returns an output stream to the specified entry. If a block is passed
# the stream object is passed to the block and the stream is automatically
# closed afterwards just as with ruby's builtin File.open method.
def get_output_stream(entry, permissionInt = nil, &aProc)
newEntry = entry.kind_of?(Entry) ? entry : Entry.new(@name, entry.to_s)
if newEntry.directory?
raise ArgumentError,
2013-06-03 18:36:06 +08:00
"cannot open stream to directory entry - '#{newEntry}'"
end
newEntry.unix_perms = permissionInt
2013-06-03 18:36:06 +08:00
zipStreamableEntry = StreamableStream.new(newEntry)
2013-06-03 02:33:03 +08:00
@entry_set << zipStreamableEntry
zipStreamableEntry.get_output_stream(&aProc)
end
# Returns the name of the zip archive
def to_s
@name
end
# Returns a string containing the contents of the specified entry
def read(entry)
get_input_stream(entry) { |is| is.read }
end
# Convenience method for adding the contents of a file to the archive
def add(entry, srcPath, &continue_on_exists_proc)
continue_on_exists_proc ||= proc { Zip.continue_on_exists_proc }
check_entry_exists(entry, continue_on_exists_proc, "add")
newEntry = entry.kind_of?(Entry) ? entry : Entry.new(@name, entry.to_s)
newEntry.gather_fileinfo_from_srcpath(srcPath)
2013-06-03 02:33:03 +08:00
@entry_set << newEntry
end
# Removes the specified entry.
def remove(entry)
2013-06-03 02:33:03 +08:00
@entry_set.delete(get_entry(entry))
end
# Renames the specified entry.
def rename(entry, new_name, &continue_on_exists_proc)
foundEntry = get_entry(entry)
check_entry_exists(new_name, continue_on_exists_proc, 'rename')
2013-06-03 02:33:03 +08:00
@entry_set.delete(foundEntry)
foundEntry.name = new_name
2013-06-03 02:33:03 +08:00
@entry_set << foundEntry
end
# Replaces the specified entry with the contents of srcPath (from
# the file system).
def replace(entry, srcPath)
check_file(srcPath)
2012-02-06 08:57:06 +08:00
remove(entry)
add(entry, srcPath)
end
2013-06-03 02:33:03 +08:00
# Extracts entry to file dest_path.
def extract(entry, dest_path, &block)
block ||= proc { ::Zip.on_exists_proc }
2013-06-03 02:33:03 +08:00
found_entry = get_entry(entry)
found_entry.extract(dest_path, &block)
end
# Commits changes that has been made since the previous commit to
# the zip archive.
def commit
2012-02-14 00:55:08 +08:00
return if !commit_required?
on_success_replace(name) {
2011-11-16 23:11:18 +08:00
|tmpFile|
OutputStream.open(tmpFile) {
2011-11-16 23:11:18 +08:00
|zos|
2013-06-03 02:33:03 +08:00
@entry_set.each {
2011-11-17 20:29:44 +08:00
|e|
e.write_to_zip_output_stream(zos)
e.dirty = false
}
2011-11-16 23:11:18 +08:00
zos.comment = comment
}
true
}
initialize(name)
end
# Write buffer write changes to buffer and return
def write_buffer
buffer = OutputStream.write_buffer do |zos|
2013-06-03 02:33:03 +08:00
@entry_set.each { |e| e.write_to_zip_output_stream(zos) }
zos.comment = comment
end
return buffer
end
# Closes the zip file committing any changes that has been made.
def close
commit
end
# Returns true if any changes has been made to this archive since
# the previous commit
def commit_required?
2013-06-03 02:33:03 +08:00
@entry_set.each do |e|
return true if e.dirty
end
@comment != @storedComment || @entry_set != @storedEntries || @create == File::CREATE
end
# Searches for entry with the specified name. Returns nil if
# no entry is found. See also get_entry
def find_entry(entry_name)
2013-06-03 02:33:03 +08:00
@entry_set.find_entry(entry_name)
end
# Searches for entries given a glob
2013-06-03 18:36:06 +08:00
def glob(*args, &block)
@entry_set.glob(*args, &block)
end
# Searches for an entry just as find_entry, but throws Errno::ENOENT
# if no entry is found.
def get_entry(entry)
selectedEntry = find_entry(entry)
unless selectedEntry
2011-11-16 23:17:19 +08:00
raise Errno::ENOENT, entry
end
2013-06-03 18:36:06 +08:00
selectedEntry.restore_ownership = @restore_ownership
selectedEntry.restore_permissions = @restore_permissions
2013-06-03 18:36:06 +08:00
selectedEntry.restore_times = @restore_times
2012-02-01 03:46:42 +08:00
selectedEntry
end
# Creates a directory
def mkdir(entryName, permissionInt = 0755)
if find_entry(entryName)
raise Errno::EEXIST, "File exists - #{entryName}"
end
2012-03-13 16:26:51 +08:00
entryName = entryName.dup.to_s
2012-02-01 17:59:56 +08:00
entryName << '/' unless entryName.end_with?('/')
@entry_set << StreamableDirectory.new(@name, entryName, nil, permissionInt)
end
private
def is_directory(newEntry, srcPath)
2012-02-01 03:46:42 +08:00
srcPathIsDirectory = ::File.directory?(srcPath)
2013-06-03 18:36:06 +08:00
if newEntry.is_directory && !srcPathIsDirectory
2011-11-16 23:11:18 +08:00
raise ArgumentError,
2013-06-03 18:36:06 +08:00
"entry name '#{newEntry}' indicates directory entry, but "+
"'#{srcPath}' is not a directory"
2012-02-01 03:46:42 +08:00
elsif !newEntry.is_directory && srcPathIsDirectory
2011-11-16 23:11:18 +08:00
newEntry.name += "/"
end
2012-02-01 03:46:42 +08:00
newEntry.is_directory && srcPathIsDirectory
end
def check_entry_exists(entryName, continue_on_exists_proc, procedureName)
continue_on_exists_proc ||= proc { Zip.continue_on_exists_proc }
2013-06-03 02:33:03 +08:00
if @entry_set.include?(entryName)
if continue_on_exists_proc.call
2011-11-16 23:11:18 +08:00
remove get_entry(entryName)
else
raise ZipEntryExistsError,
2013-06-03 18:36:06 +08:00
procedureName + " failed. Entry #{entryName} already exists"
2011-11-16 23:11:18 +08:00
end
end
end
def check_file(path)
2012-02-01 03:46:42 +08:00
unless ::File.readable?(path)
2011-11-16 23:11:18 +08:00
raise Errno::ENOENT, path
end
end
def on_success_replace(aFilename)
2013-06-03 18:36:06 +08:00
tmpfile = get_tempfile
tmpFilename = tmpfile.path
tmpfile.close
if yield tmpFilename
2012-02-01 03:46:42 +08:00
::File.rename(tmpFilename, name)
end
end
def get_tempfile
2012-02-01 03:46:42 +08:00
tempFile = Tempfile.new(::File.basename(name), ::File.dirname(name))
tempFile.binmode
tempFile
end
end
end
# Copyright (C) 2002, 2003 Thomas Sondergaard
# rubyzip is free software; you can redistribute it and/or
# modify it under the terms of the ruby license.