Starting on Version 7.0 using the work done by others. Completely untested. I will be testing things, but I thought I'd get this base version up for others to give pull requests.

THIS IS ON THE MASTER BRANCH. The Master branch will be Python 3.0 from now on. While Python 2.7 support will not be deliberately broken, all efforts should now focus on Python 3.0 compatibility.

I can see a lot of work has been done. There's more to do. I've bumped the version number of everything I came across to the next major number for Python 3.0 compatibility indication.

Thanks everyone. I hope to update here at least once a week until we have a stable 7.0 release for calibre 5.0
This commit is contained in:
Apprentice Harper
2020-09-26 21:22:47 +01:00
parent 4868a7460e
commit afa4ac5716
40 changed files with 757 additions and 729 deletions

View File

@@ -2,10 +2,12 @@
Read and write ZIP files.
"""
import struct, os, time, sys, shutil
import binascii, cStringIO, stat
import binascii, stat
import io
import re
from io import BytesIO
try:
import zlib # We may need its compression method
crc32 = zlib.crc32
@@ -45,8 +47,8 @@ ZIP_DEFLATED = 8
# The "end of central directory" structure, magic number, size, and indices
# (section V.I in the format document)
structEndArchive = "<4s4H2LH"
stringEndArchive = "PK\005\006"
structEndArchive = b"<4s4H2LH"
stringEndArchive = b"PK\005\006"
sizeEndCentDir = struct.calcsize(structEndArchive)
_ECD_SIGNATURE = 0
@@ -64,8 +66,8 @@ _ECD_LOCATION = 9
# The "central directory" structure, magic number, size, and indices
# of entries in the structure (section V.F in the format document)
structCentralDir = "<4s4B4HL2L5H2L"
stringCentralDir = "PK\001\002"
structCentralDir = b"<4s4B4HL2L5H2L"
stringCentralDir = b"PK\001\002"
sizeCentralDir = struct.calcsize(structCentralDir)
# indexes of entries in the central directory structure
@@ -91,8 +93,8 @@ _CD_LOCAL_HEADER_OFFSET = 18
# The "local file header" structure, magic number, size, and indices
# (section V.A in the format document)
structFileHeader = "<4s2B4HL2L2H"
stringFileHeader = "PK\003\004"
structFileHeader = b"<4s2B4HL2L2H"
stringFileHeader = b"PK\003\004"
sizeFileHeader = struct.calcsize(structFileHeader)
_FH_SIGNATURE = 0
@@ -109,14 +111,14 @@ _FH_FILENAME_LENGTH = 10
_FH_EXTRA_FIELD_LENGTH = 11
# The "Zip64 end of central directory locator" structure, magic number, and size
structEndArchive64Locator = "<4sLQL"
stringEndArchive64Locator = "PK\x06\x07"
structEndArchive64Locator = b"<4sLQL"
stringEndArchive64Locator = b"PK\x06\x07"
sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
# The "Zip64 end of central directory" record, magic number, size, and indices
# (section V.G in the format document)
structEndArchive64 = "<4sQ2H2L4Q"
stringEndArchive64 = "PK\x06\x06"
structEndArchive64 = b"<4sQ2H2L4Q"
stringEndArchive64 = b"PK\x06\x06"
sizeEndCentDir64 = struct.calcsize(structEndArchive64)
_CD64_SIGNATURE = 0
@@ -275,7 +277,7 @@ class ZipInfo (object):
# Terminate the file name at the first null byte. Null bytes in file
# names are used as tricks by viruses in archives.
null_byte = filename.find(chr(0))
null_byte = filename.find(b"\0")
if null_byte >= 0:
filename = filename[0:null_byte]
# This is used to ensure paths in generated ZIP files always use
@@ -288,8 +290,8 @@ class ZipInfo (object):
self.date_time = date_time # year, month, day, hour, min, sec
# Standard values:
self.compress_type = ZIP_STORED # Type of compression for the file
self.comment = "" # Comment for each file
self.extra = "" # ZIP extra data
self.comment = b"" # Comment for each file
self.extra = b"" # ZIP extra data
if sys.platform == 'win32':
self.create_system = 0 # System which created ZIP archive
else:
@@ -343,23 +345,13 @@ class ZipInfo (object):
return header + filename + extra
def _encodeFilenameFlags(self):
if isinstance(self.filename, unicode):
if isinstance(self.filename, bytes):
return self.filename, self.flag_bits
else:
try:
return self.filename.encode('ascii'), self.flag_bits
except UnicodeEncodeError:
return self.filename.encode('utf-8'), self.flag_bits | 0x800
else:
return self.filename, self.flag_bits
def _decodeFilename(self):
if self.flag_bits & 0x800:
try:
#print "decoding filename",self.filename
return self.filename.decode('utf-8')
except:
return self.filename
else:
return self.filename
def _decodeExtra(self):
# Try to decode the extra field.
@@ -377,20 +369,20 @@ class ZipInfo (object):
elif ln == 0:
counts = ()
else:
raise RuntimeError, "Corrupt extra field %s"%(ln,)
raise RuntimeError("Corrupt extra field %s"%(ln,))
idx = 0
# ZIP64 extension (large files and/or large archives)
if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
if self.file_size in (0xffffffffffffffff, 0xffffffff):
self.file_size = counts[idx]
idx += 1
if self.compress_size == 0xFFFFFFFFL:
if self.compress_size == 0xFFFFFFFF:
self.compress_size = counts[idx]
idx += 1
if self.header_offset == 0xffffffffL:
if self.header_offset == 0xffffffff:
old = self.header_offset
self.header_offset = counts[idx]
idx+=1
@@ -481,9 +473,9 @@ class ZipExtFile(io.BufferedIOBase):
if self._compress_type == ZIP_DEFLATED:
self._decompressor = zlib.decompressobj(-15)
self._unconsumed = ''
self._unconsumed = b''
self._readbuffer = ''
self._readbuffer = b''
self._offset = 0
self._universal = 'U' in mode
@@ -514,10 +506,10 @@ class ZipExtFile(io.BufferedIOBase):
if not self._universal:
return io.BufferedIOBase.readline(self, limit)
line = ''
line = b''
while limit < 0 or len(line) < limit:
readahead = self.peek(2)
if readahead == '':
if readahead == b'':
return line
#
@@ -564,7 +556,7 @@ class ZipExtFile(io.BufferedIOBase):
If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
"""
buf = ''
buf = b''
while n < 0 or n is None or n > len(buf):
data = self.read1(n)
if len(data) == 0:
@@ -594,7 +586,7 @@ class ZipExtFile(io.BufferedIOBase):
self._compress_left -= len(data)
if data and self._decrypter is not None:
data = ''.join(map(self._decrypter, data))
data = b''.join(map(self._decrypter, data))
if self._compress_type == ZIP_STORED:
self._readbuffer = self._readbuffer[self._offset:] + data
@@ -651,10 +643,10 @@ class ZipFile:
pass
elif compression == ZIP_DEFLATED:
if not zlib:
raise RuntimeError,\
"Compression requires the (missing) zlib module"
raise RuntimeError(
"Compression requires the (missing) zlib module")
else:
raise RuntimeError, "That compression method is not supported"
raise RuntimeError("That compression method is not supported")
self._allowZip64 = allowZip64
self._didModify = False
@@ -664,10 +656,10 @@ class ZipFile:
self.compression = compression # Method of compression
self.mode = key = mode.replace('b', '')[0]
self.pwd = None
self.comment = ''
self.comment = b''
# Check if we were passed a file-like object
if isinstance(file, basestring):
if isinstance(file, str):
self._filePassed = 0
self.filename = file
modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
@@ -699,7 +691,7 @@ class ZipFile:
if not self._filePassed:
self.fp.close()
self.fp = None
raise RuntimeError, 'Mode must be "r", "w" or "a"'
raise RuntimeError('Mode must be "r", "w" or "a"')
def __enter__(self):
return self
@@ -723,9 +715,9 @@ class ZipFile:
fp = self.fp
endrec = _EndRecData(fp)
if not endrec:
raise BadZipfile, "File is not a zip file"
raise BadZipfile("File is not a zip file")
if self.debug > 1:
print endrec
print(endrec)
size_cd = endrec[_ECD_SIZE] # bytes in central directory
offset_cd = endrec[_ECD_OFFSET] # offset of central directory
self.comment = endrec[_ECD_COMMENT] # archive comment
@@ -738,20 +730,20 @@ class ZipFile:
if self.debug > 2:
inferred = concat + offset_cd
print "given, inferred, offset", offset_cd, inferred, concat
print("given, inferred, offset", offset_cd, inferred, concat)
# self.start_dir: Position of start of central directory
self.start_dir = offset_cd + concat
fp.seek(self.start_dir, 0)
data = fp.read(size_cd)
fp = cStringIO.StringIO(data)
fp = BytesIO(data)
total = 0
while total < size_cd:
centdir = fp.read(sizeCentralDir)
if centdir[0:4] != stringCentralDir:
raise BadZipfile, "Bad magic number for central directory"
raise BadZipfile("Bad magic number for central directory")
centdir = struct.unpack(structCentralDir, centdir)
if self.debug > 2:
print centdir
print(centdir)
filename = fp.read(centdir[_CD_FILENAME_LENGTH])
# Create ZipInfo instance to store file information
x = ZipInfo(filename)
@@ -769,7 +761,6 @@ class ZipFile:
x._decodeExtra()
x.header_offset = x.header_offset + concat
x.filename = x._decodeFilename()
self.filelist.append(x)
self.NameToInfo[x.filename] = x
@@ -779,7 +770,7 @@ class ZipFile:
+ centdir[_CD_COMMENT_LENGTH])
if self.debug > 2:
print "total", total
print("total", total)
def namelist(self):
@@ -796,10 +787,10 @@ class ZipFile:
def printdir(self):
"""Print a table of contents for the zip file."""
print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"))
for zinfo in self.filelist:
date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size))
def testzip(self):
"""Read all the files and check the CRC."""
@@ -834,10 +825,10 @@ class ZipFile:
def open(self, name, mode="r", pwd=None):
"""Return file-like object for 'name'."""
if mode not in ("r", "U", "rU"):
raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
raise RuntimeError('open() requires mode "r", "U", or "rU"')
if not self.fp:
raise RuntimeError, \
"Attempt to read ZIP archive that was already closed"
raise RuntimeError(
"Attempt to read ZIP archive that was already closed")
# Only open a new file for instances where we were not
# given a file object in the constructor
@@ -859,7 +850,7 @@ class ZipFile:
# Skip the file header:
fheader = zef_file.read(sizeFileHeader)
if fheader[0:4] != stringFileHeader:
raise BadZipfile, "Bad magic number for file header"
raise BadZipfile("Bad magic number for file header")
fheader = struct.unpack(structFileHeader, fheader)
fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
@@ -867,9 +858,9 @@ class ZipFile:
zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
if fname != zinfo.orig_filename:
raise BadZipfile, \
raise BadZipfile(
'File name in directory "%s" and header "%s" differ.' % (
zinfo.orig_filename, fname)
zinfo.orig_filename, fname))
# check for encrypted flag & handle password
is_encrypted = zinfo.flag_bits & 0x1
@@ -878,8 +869,8 @@ class ZipFile:
if not pwd:
pwd = self.pwd
if not pwd:
raise RuntimeError, "File %s is encrypted, " \
"password required for extraction" % name
raise RuntimeError("File %s is encrypted, " \
"password required for extraction" % name)
zd = _ZipDecrypter(pwd)
# The first 12 bytes in the cypher stream is an encryption header
@@ -956,7 +947,7 @@ class ZipFile:
return targetpath
source = self.open(member, pwd=pwd)
target = file(targetpath, "wb")
target = open(targetpath, "wb")
shutil.copyfileobj(source, target)
source.close()
target.close()
@@ -967,18 +958,18 @@ class ZipFile:
"""Check for errors before writing a file to the archive."""
if zinfo.filename in self.NameToInfo:
if self.debug: # Warning for duplicate names
print "Duplicate name:", zinfo.filename
print("Duplicate name:", zinfo.filename)
if self.mode not in ("w", "a"):
raise RuntimeError, 'write() requires mode "w" or "a"'
raise RuntimeError('write() requires mode "w" or "a"')
if not self.fp:
raise RuntimeError, \
"Attempt to write ZIP archive that was already closed"
raise RuntimeError(
"Attempt to write ZIP archive that was already closed")
if zinfo.compress_type == ZIP_DEFLATED and not zlib:
raise RuntimeError, \
"Compression requires the (missing) zlib module"
raise RuntimeError(
"Compression requires the (missing) zlib module")
if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
raise RuntimeError, \
"That compression method is not supported"
raise RuntimeError(
"That compression method is not supported")
if zinfo.file_size > ZIP64_LIMIT:
if not self._allowZip64:
raise LargeZipFile("Filesize would require ZIP64 extensions")
@@ -1006,7 +997,7 @@ class ZipFile:
if isdir:
arcname += '/'
zinfo = ZipInfo(arcname, date_time)
zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
if compress_type is None:
zinfo.compress_type = self.compression
else:
@@ -1076,7 +1067,7 @@ class ZipFile:
date_time=time.localtime(time.time())[:6])
zinfo.compress_type = self.compression
zinfo.external_attr = 0600 << 16
zinfo.external_attr = 0x0600 << 16
else:
zinfo = zinfo_or_arcname
@@ -1141,7 +1132,7 @@ class ZipFile:
if zinfo.header_offset > ZIP64_LIMIT:
extra.append(zinfo.header_offset)
header_offset = 0xffffffffL
header_offset = 0xffffffff
else:
header_offset = zinfo.header_offset
@@ -1169,14 +1160,14 @@ class ZipFile:
0, zinfo.internal_attr, zinfo.external_attr,
header_offset)
except DeprecationWarning:
print >>sys.stderr, (structCentralDir,
print(structCentralDir,
stringCentralDir, create_version,
zinfo.create_system, extract_version, zinfo.reserved,
zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
zinfo.CRC, compress_size, file_size,
len(zinfo.filename), len(extra_data), len(zinfo.comment),
0, zinfo.internal_attr, zinfo.external_attr,
header_offset)
header_offset, sys.stderr)
raise
self.fp.write(centdir)
self.fp.write(filename)
@@ -1250,10 +1241,10 @@ class PyZipFile(ZipFile):
else:
basename = name
if self.debug:
print "Adding package in", pathname, "as", basename
print("Adding package in", pathname, "as", basename)
fname, arcname = self._get_codename(initname[0:-3], basename)
if self.debug:
print "Adding", arcname
print("Adding", arcname)
self.write(fname, arcname)
dirlist = os.listdir(pathname)
dirlist.remove("__init__.py")
@@ -1269,12 +1260,12 @@ class PyZipFile(ZipFile):
fname, arcname = self._get_codename(path[0:-3],
basename)
if self.debug:
print "Adding", arcname
print("Adding", arcname)
self.write(fname, arcname)
else:
# This is NOT a package directory, add its files at top level
if self.debug:
print "Adding files from directory", pathname
print("Adding files from directory", pathname)
for filename in os.listdir(pathname):
path = os.path.join(pathname, filename)
root, ext = os.path.splitext(filename)
@@ -1282,15 +1273,15 @@ class PyZipFile(ZipFile):
fname, arcname = self._get_codename(path[0:-3],
basename)
if self.debug:
print "Adding", arcname
print("Adding", arcname)
self.write(fname, arcname)
else:
if pathname[-3:] != ".py":
raise RuntimeError, \
'Files added with writepy() must end with ".py"'
raise RuntimeError(
'Files added with writepy() must end with ".py"')
fname, arcname = self._get_codename(pathname[0:-3], basename)
if self.debug:
print "Adding file", arcname
print("Adding file", arcname)
self.write(fname, arcname)
def _get_codename(self, pathname, basename):
@@ -1310,11 +1301,11 @@ class PyZipFile(ZipFile):
os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
import py_compile
if self.debug:
print "Compiling", file_py
print("Compiling", file_py)
try:
py_compile.compile(file_py, file_pyc, None, True)
except py_compile.PyCompileError,err:
print err.msg
except py_compile.PyCompileError as err:
print(err.msg)
fname = file_pyc
else:
fname = file_pyc
@@ -1337,12 +1328,12 @@ def main(args = None):
args = sys.argv[1:]
if not args or args[0] not in ('-l', '-c', '-e', '-t'):
print USAGE
print(USAGE)
sys.exit(1)
if args[0] == '-l':
if len(args) != 2:
print USAGE
print(USAGE)
sys.exit(1)
zf = ZipFile(args[1], 'r')
zf.printdir()
@@ -1350,15 +1341,15 @@ def main(args = None):
elif args[0] == '-t':
if len(args) != 2:
print USAGE
print(USAGE)
sys.exit(1)
zf = ZipFile(args[1], 'r')
zf.testzip()
print "Done testing"
print("Done testing")
elif args[0] == '-e':
if len(args) != 3:
print USAGE
print(USAGE)
sys.exit(1)
zf = ZipFile(args[1], 'r')
@@ -1378,7 +1369,7 @@ def main(args = None):
elif args[0] == '-c':
if len(args) < 3:
print USAGE
print(USAGE)
sys.exit(1)
def addToZip(zf, path, zippath):