Compare commits

...

22 Commits
v1.1 ... v1.8

Author SHA1 Message Date
Apprentice Alf
d427f758f6 tools v1.8 2015-03-03 07:19:44 +00:00
TetraChroma
92dafd94b2 ineptpdf 8.2 fileopen 2015-03-03 07:18:58 +00:00
TetraChroma
c5ed30d8c8 Duplicate for split to fileopen version 2015-03-03 07:17:33 +00:00
Apprentice Alf
b7de1dcea5 ineptpdf 7.4 2015-03-03 07:13:37 +00:00
Anonymous
ab9d585190 ineptpdf 7.3 2015-03-03 07:11:36 +00:00
Anonymous
b92458c8c2 ineptpdf 7.2 2015-03-03 07:09:18 +00:00
Anonymous
4f19f5ac11 ineptpdf 7 2015-03-03 07:07:03 +00:00
Anonymous
f027848bff ineptpdf 6.1 2015-03-03 07:04:39 +00:00
Anonymous
26a54dd3d6 ineptpdf 6 2015-03-03 07:02:54 +00:00
Apprentice Alf
6c70a073d9 mobidedrm 0.15 2015-03-03 06:53:16 +00:00
Anonymous
37696e1495 ineptkey 4.4 2015-03-02 18:20:43 +00:00
Anonymous
446d45da6b ineptkey 4.3 2015-03-02 18:19:11 +00:00
Anonymous
a73fbbb484 ineptkey 4.2 2015-03-02 18:16:16 +00:00
Anonymous
086d25a441 ineptkey 4.1 2015-03-02 18:14:33 +00:00
Anonymous
63219d6054 ineptepub 4.1 2015-03-02 18:10:00 +00:00
Anonymous
f0d920c158 ineptepub v4 2015-03-02 18:08:10 +00:00
i♥cabbages
e9a7312759 ineptepub 3 2015-03-02 18:06:42 +00:00
Apprentice Alf
9c73801685 tools v1.6 2015-03-02 18:02:20 +00:00
Apprentice Alf
86357531a5 mobidedrm 0.13 2015-03-02 17:54:38 +00:00
Apprentice Alf
8e7d2657a4 tools v1.5 2015-03-02 07:43:31 +00:00
Apprentice Alf
6fb13373cf tools v1.4 2015-03-02 07:41:20 +00:00
Apprentice Alf
dce51ae232 tools v1.3 2015-03-02 07:35:40 +00:00
60 changed files with 4754 additions and 828 deletions

View File

@@ -1,15 +1,29 @@
#! /usr/bin/python
# -*- coding: utf-8 -*-
# ineptepub.pyw, version 2
# ineptepub.pyw, version 5.2
# Copyright © 2009-2010 i♥cabbages
# To run this program install Python 2.6 from http://www.python.org/download/
# and PyCrypto from http://www.voidspace.org.uk/python/modules.shtml#pycrypto
# (make sure to install the version for Python 2.6). Save this script file as
# Released under the terms of the GNU General Public Licence, version 3 or
# later. <http://www.gnu.org/licenses/>
# Windows users: Before running this program, you must first install Python 2.6
# from <http://www.python.org/download/> and PyCrypto from
# <http://www.voidspace.org.uk/python/modules.shtml#pycrypto> (make sure to
# install the version for Python 2.6). Save this script file as
# ineptepub.pyw and double-click on it to run it.
#
# Mac OS X users: Save this script file as ineptepub.pyw. You can run this
# program from the command line (pythonw ineptepub.pyw) or by double-clicking
# it when it has been associated with PythonLauncher.
# Revision history:
# 1 - Initial release
# 2 - Rename to INEPT, fix exit code
# 5 - Version bump to avoid (?) confusion;
# Improve OS X support by using OpenSSL when available
# 5.1 - Improve OpenSSL error checking
# 5.2 - Fix ctypes error causing segfaults on some systems
"""
Decrypt Adobe ADEPT-encrypted EPUB books.
@@ -31,33 +45,101 @@ import Tkconstants
import tkFileDialog
import tkMessageBox
try:
from Crypto.Cipher import AES
from Crypto.PublicKey import RSA
except ImportError:
AES = None
RSA = None
META_NAMES = ('mimetype', 'META-INF/rights.xml', 'META-INF/encryption.xml')
NSMAP = {'adept': 'http://ns.adobe.com/adept',
'enc': 'http://www.w3.org/2001/04/xmlenc#'}
# ASN.1 parsing code from tlslite
def bytesToNumber(bytes):
total = 0L
multiplier = 1L
for count in range(len(bytes)-1, -1, -1):
byte = bytes[count]
total += multiplier * byte
multiplier *= 256
return total
class ASN1Error(Exception):
class ADEPTError(Exception):
pass
class ASN1Parser(object):
def _load_crypto_libcrypto():
from ctypes import CDLL, POINTER, c_void_p, c_char_p, c_int, c_long, \
Structure, c_ulong, create_string_buffer, cast
from ctypes.util import find_library
libcrypto = find_library('crypto')
if libcrypto is None:
raise ADEPTError('libcrypto not found')
libcrypto = CDLL(libcrypto)
RSA_NO_PADDING = 3
AES_MAXNR = 14
c_char_pp = POINTER(c_char_p)
c_int_p = POINTER(c_int)
class RSA(Structure):
pass
RSA_p = POINTER(RSA)
class AES_KEY(Structure):
_fields_ = [('rd_key', c_long * (4 * (AES_MAXNR + 1))),
('rounds', c_int)]
AES_KEY_p = POINTER(AES_KEY)
def F(restype, name, argtypes):
func = getattr(libcrypto, name)
func.restype = restype
func.argtypes = argtypes
return func
d2i_RSAPrivateKey = F(RSA_p, 'd2i_RSAPrivateKey',
[RSA_p, c_char_pp, c_long])
RSA_size = F(c_int, 'RSA_size', [RSA_p])
RSA_private_decrypt = F(c_int, 'RSA_private_decrypt',
[c_int, c_char_p, c_char_p, RSA_p, c_int])
RSA_free = F(None, 'RSA_free', [RSA_p])
AES_set_decrypt_key = F(c_int, 'AES_set_decrypt_key',
[c_char_p, c_int, AES_KEY_p])
AES_cbc_encrypt = F(None, 'AES_cbc_encrypt',
[c_char_p, c_char_p, c_ulong, AES_KEY_p, c_char_p,
c_int])
class RSA(object):
def __init__(self, der):
buf = create_string_buffer(der)
pp = c_char_pp(cast(buf, c_char_p))
rsa = self._rsa = d2i_RSAPrivateKey(None, pp, len(der))
if rsa is None:
raise ADEPTError('Error parsing ADEPT user key DER')
def decrypt(self, from_):
rsa = self._rsa
to = create_string_buffer(RSA_size(rsa))
dlen = RSA_private_decrypt(len(from_), from_, to, rsa,
RSA_NO_PADDING)
if dlen < 0:
raise ADEPTError('RSA decryption failed')
return to[:dlen]
def __del__(self):
if self._rsa is not None:
RSA_free(self._rsa)
self._rsa = None
class AES(object):
def __init__(self, userkey):
self._blocksize = len(userkey)
key = self._key = AES_KEY()
rv = AES_set_decrypt_key(userkey, len(userkey) * 8, key)
if rv < 0:
raise ADEPTError('Failed to initialize AES key')
def decrypt(self, data):
out = create_string_buffer(len(data))
iv = ("\x00" * self._blocksize)
rv = AES_cbc_encrypt(data, out, len(data), self._key, iv, 0)
if rv == 0:
raise ADEPTError('AES decryption failed')
return out.raw
return (AES, RSA)
def _load_crypto_pycrypto():
from Crypto.Cipher import AES as _AES
from Crypto.PublicKey import RSA as _RSA
# ASN.1 parsing code from tlslite
class ASN1Error(Exception):
pass
class ASN1Parser(object):
class Parser(object):
def __init__(self, bytes):
self.bytes = bytes
@@ -141,6 +223,45 @@ class ASN1Parser(object):
lengthLength = firstLength & 0x7F
return p.get(lengthLength)
class AES(object):
def __init__(self, key):
self._aes = _AES.new(key, _AES.MODE_CBC)
def decrypt(self, data):
return self._aes.decrypt(data)
class RSA(object):
def __init__(self, der):
key = ASN1Parser([ord(x) for x in der])
key = [key.getChild(x).value for x in xrange(1, 4)]
key = [self.bytesToNumber(v) for v in key]
self._rsa = _RSA.construct(key)
def bytesToNumber(self, bytes):
total = 0L
for byte in bytes:
total = (total << 8) + byte
return total
def decrypt(self, data):
return self._rsa.decrypt(data)
return (AES, RSA)
def _load_crypto():
AES = RSA = None
for loader in (_load_crypto_libcrypto, _load_crypto_pycrypto):
try:
AES, RSA = loader()
break
except (ImportError, ADEPTError):
pass
return (AES, RSA)
AES, RSA = _load_crypto()
META_NAMES = ('mimetype', 'META-INF/rights.xml', 'META-INF/encryption.xml')
NSMAP = {'adept': 'http://ns.adobe.com/adept',
'enc': 'http://www.w3.org/2001/04/xmlenc#'}
class ZipInfo(zipfile.ZipInfo):
def __init__(self, *args, **kwargs):
@@ -149,11 +270,10 @@ class ZipInfo(zipfile.ZipInfo):
super(ZipInfo, self).__init__(*args, **kwargs)
self.compress_type = compress_type
class Decryptor(object):
def __init__(self, bookkey, encryption):
enc = lambda tag: '{%s}%s' % (NSMAP['enc'], tag)
self._aes = AES.new(bookkey, AES.MODE_CBC)
self._aes = AES(bookkey)
encryption = etree.fromstring(encryption)
self._encrypted = encrypted = set()
expr = './%s/%s/%s' % (enc('EncryptedData'), enc('CipherData'),
@@ -178,16 +298,12 @@ class Decryptor(object):
data = self.decompress(data)
return data
class ADEPTError(Exception):
pass
def cli_main(argv=sys.argv):
progname = os.path.basename(argv[0])
if AES is None:
print "%s: This script requires PyCrypto, which must be installed " \
"separately. Read the top-of-script comment for details." % \
(progname,)
print "%s: This script requires OpenSSL or PyCrypto, which must be" \
" installed separately. Read the top-of-script comment for" \
" details." % (progname,)
return 1
if len(argv) != 4:
print "usage: %s KEYFILE INBOOK OUTBOOK" % (progname,)
@@ -195,9 +311,7 @@ def cli_main(argv=sys.argv):
keypath, inpath, outpath = argv[1:]
with open(keypath, 'rb') as f:
keyder = f.read()
key = ASN1Parser([ord(x) for x in keyder])
key = [bytesToNumber(key.getChild(x).value) for x in xrange(1, 4)]
rsa = RSA.construct(key)
rsa = RSA(keyder)
with closing(ZipFile(open(inpath, 'rb'))) as inf:
namelist = set(inf.namelist())
if 'META-INF/rights.xml' not in namelist or \
@@ -224,7 +338,6 @@ def cli_main(argv=sys.argv):
outf.writestr(path, decryptor.decrypt(path, data))
return 0
class DecryptionDialog(Tkinter.Frame):
def __init__(self, root):
Tkinter.Frame.__init__(self, root, border=5)
@@ -325,8 +438,9 @@ def gui_main():
root.withdraw()
tkMessageBox.showerror(
"INEPT EPUB Decrypter",
"This script requires PyCrypto, which must be installed "
"separately. Read the top-of-script comment for details.")
"This script requires OpenSSL or PyCrypto, which must be"
" installed separately. Read the top-of-script comment for"
" details.")
return 1
root.title('INEPT EPUB Decrypter')
root.resizable(True, False)

View File

@@ -1,22 +1,38 @@
#! /usr/bin/python
# -*- coding: utf-8 -*-
# ineptkey.pyw, version 4
# ineptkey.pyw, version 5
# Copyright © 2009-2010 i♥cabbages
# To run this program install Python 2.6 from http://www.python.org/download/
# and PyCrypto from http://www.voidspace.org.uk/python/modules.shtml#pycrypto
# (make sure to install the version for Python 2.6). Save this script file as
# Released under the terms of the GNU General Public Licence, version 3 or
# later. <http://www.gnu.org/licenses/>
# Windows users: Before running this program, you must first install Python 2.6
# from <http://www.python.org/download/> and PyCrypto from
# <http://www.voidspace.org.uk/python/modules.shtml#pycrypto> (make certain
# to install the version for Python 2.6). Then save this script file as
# ineptkey.pyw and double-click on it to run it. It will create a file named
# adeptkey.der in the same directory. This is your ADEPT user key.
#
# Mac OS X users: Save this script file as ineptkey.pyw. You can run this
# program from the command line (pythonw ineptkey.pyw) or by double-clicking
# it when it has been associated with PythonLauncher. It will create a file
# named adeptkey.der in the same directory. This is your ADEPT user key.
# Revision history:
# 1 - Initial release, for Adobe Digital Editions 1.7
# 2 - Better algorithm for finding pLK; improved error handling
# 3 - Rename to INEPT
# 4 - quick beta fix for ADE 1.7.3 - for older versions use ineptkey v3
# or upgrade to ADE 1.7.3 (anon)
# 4 - Series of changes by joblack (and others?) --
# 4.1 - quick beta fix for ADE 1.7.2 (anon)
# 4.2 - added old 1.7.1 processing
# 4.3 - better key search
# 4.4 - Make it working on 64-bit Python
# 5 - Clean up and improve 4.x changes;
# Clean up and merge OS X support by unknown
"""
Retrieve Adobe ADEPT user key under Windows.
Retrieve Adobe ADEPT user key.
"""
from __future__ import with_statement
@@ -25,37 +41,37 @@ __license__ = 'GPL v3'
import sys
import os
from struct import pack
from ctypes import windll, c_char_p, c_wchar_p, c_uint, POINTER, byref, \
create_unicode_buffer, create_string_buffer, CFUNCTYPE, addressof, \
string_at, Structure, c_void_p, cast
import _winreg as winreg
import struct
import Tkinter
import Tkconstants
import tkMessageBox
import traceback
try:
from Crypto.Cipher import AES
except ImportError:
AES = None
DEVICE_KEY = 'Software\\Adobe\\Adept\\Device'
PRIVATE_LICENCE_KEY_KEY = 'Software\\Adobe\\Adept\\Activation\\%04d\\%04d'
MAX_PATH = 255
kernel32 = windll.kernel32
advapi32 = windll.advapi32
crypt32 = windll.crypt32
class ADEPTError(Exception):
pass
if sys.platform.startswith('win'):
from ctypes import windll, c_char_p, c_wchar_p, c_uint, POINTER, byref, \
create_unicode_buffer, create_string_buffer, CFUNCTYPE, addressof, \
string_at, Structure, c_void_p, cast, c_size_t, memmove
from ctypes.wintypes import LPVOID, DWORD, BOOL
import _winreg as winreg
def GetSystemDirectory():
try:
from Crypto.Cipher import AES
except ImportError:
AES = None
DEVICE_KEY_PATH = r'Software\Adobe\Adept\Device'
PRIVATE_LICENCE_KEY_PATH = r'Software\Adobe\Adept\Activation'
MAX_PATH = 255
kernel32 = windll.kernel32
advapi32 = windll.advapi32
crypt32 = windll.crypt32
def GetSystemDirectory():
GetSystemDirectoryW = kernel32.GetSystemDirectoryW
GetSystemDirectoryW.argtypes = [c_wchar_p, c_uint]
GetSystemDirectoryW.restype = c_uint
@@ -64,10 +80,9 @@ def GetSystemDirectory():
GetSystemDirectoryW(buffer, len(buffer))
return buffer.value
return GetSystemDirectory
GetSystemDirectory = GetSystemDirectory()
GetSystemDirectory = GetSystemDirectory()
def GetVolumeSerialNumber():
def GetVolumeSerialNumber():
GetVolumeInformationW = kernel32.GetVolumeInformationW
GetVolumeInformationW.argtypes = [c_wchar_p, c_wchar_p, c_uint,
POINTER(c_uint), POINTER(c_uint),
@@ -75,13 +90,13 @@ def GetVolumeSerialNumber():
GetVolumeInformationW.restype = c_uint
def GetVolumeSerialNumber(path):
vsn = c_uint(0)
GetVolumeInformationW(path, None, 0, byref(vsn), None, None, None, 0)
GetVolumeInformationW(
path, None, 0, byref(vsn), None, None, None, 0)
return vsn.value
return GetVolumeSerialNumber
GetVolumeSerialNumber = GetVolumeSerialNumber()
GetVolumeSerialNumber = GetVolumeSerialNumber()
def GetUserName():
def GetUserName():
GetUserNameW = advapi32.GetUserNameW
GetUserNameW.argtypes = [c_wchar_p, POINTER(c_uint)]
GetUserNameW.restype = c_uint
@@ -93,31 +108,107 @@ def GetUserName():
size.value = len(buffer)
return buffer.value.encode('utf-16-le')[::2]
return GetUserName
GetUserName = GetUserName()
GetUserName = GetUserName()
PAGE_EXECUTE_READWRITE = 0x40
MEM_COMMIT = 0x1000
MEM_RESERVE = 0x2000
def VirtualAlloc():
_VirtualAlloc = kernel32.VirtualAlloc
_VirtualAlloc.argtypes = [LPVOID, c_size_t, DWORD, DWORD]
_VirtualAlloc.restype = LPVOID
def VirtualAlloc(addr, size, alloctype=(MEM_COMMIT | MEM_RESERVE),
protect=PAGE_EXECUTE_READWRITE):
return _VirtualAlloc(addr, size, alloctype, protect)
return VirtualAlloc
VirtualAlloc = VirtualAlloc()
MEM_RELEASE = 0x8000
def VirtualFree():
_VirtualFree = kernel32.VirtualFree
_VirtualFree.argtypes = [LPVOID, c_size_t, DWORD]
_VirtualFree.restype = BOOL
def VirtualFree(addr, size=0, freetype=MEM_RELEASE):
return _VirtualFree(addr, size, freetype)
return VirtualFree
VirtualFree = VirtualFree()
class NativeFunction(object):
def __init__(self, restype, argtypes, insns):
self._buf = buf = VirtualAlloc(None, len(insns))
memmove(buf, insns, len(insns))
ftype = CFUNCTYPE(restype, *argtypes)
self._native = ftype(buf)
def __call__(self, *args):
return self._native(*args)
def __del__(self):
if self._buf is not None:
VirtualFree(self._buf)
self._buf = None
if struct.calcsize("P") == 4:
CPUID0_INSNS = (
"\x53" # push %ebx
"\x31\xc0" # xor %eax,%eax
"\x0f\xa2" # cpuid
"\x8b\x44\x24\x08" # mov 0x8(%esp),%eax
"\x89\x18" # mov %ebx,0x0(%eax)
"\x89\x50\x04" # mov %edx,0x4(%eax)
"\x89\x48\x08" # mov %ecx,0x8(%eax)
"\x5b" # pop %ebx
"\xc3" # ret
)
CPUID1_INSNS = (
"\x53" # push %ebx
"\x31\xc0" # xor %eax,%eax
"\x40" # inc %eax
"\x0f\xa2" # cpuid
"\x5b" # pop %ebx
"\xc3" # ret
)
else:
CPUID0_INSNS = (
"\x49\x89\xd8" # mov %rbx,%r8
"\x49\x89\xc9" # mov %rcx,%r9
"\x48\x31\xc0" # xor %rax,%rax
"\x0f\xa2" # cpuid
"\x4c\x89\xc8" # mov %r9,%rax
"\x89\x18" # mov %ebx,0x0(%rax)
"\x89\x50\x04" # mov %edx,0x4(%rax)
"\x89\x48\x08" # mov %ecx,0x8(%rax)
"\x4c\x89\xc3" # mov %r8,%rbx
"\xc3" # retq
)
CPUID1_INSNS = (
"\x53" # push %rbx
"\x48\x31\xc0" # xor %rax,%rax
"\x48\xff\xc0" # inc %rax
"\x0f\xa2" # cpuid
"\x5b" # pop %rbx
"\xc3" # retq
)
CPUID0_INSNS = create_string_buffer("\x53\x31\xc0\x0f\xa2\x8b\x44\x24\x08\x89"
"\x18\x89\x50\x04\x89\x48\x08\x5b\xc3")
def cpuid0():
buffer = create_string_buffer(12)
cpuid0__ = CFUNCTYPE(c_char_p)(addressof(CPUID0_INSNS))
def cpuid0():
cpuid0__(buffer)
return buffer.raw
_cpuid0 = NativeFunction(None, [c_char_p], CPUID0_INSNS)
buf = create_string_buffer(12)
def cpuid0():
_cpuid0(buf)
return buf.raw
return cpuid0
cpuid0 = cpuid0()
cpuid0 = cpuid0()
cpuid1 = NativeFunction(c_uint, [], CPUID1_INSNS)
CPUID1_INSNS = create_string_buffer("\x53\x31\xc0\x40\x0f\xa2\x5b\xc3")
cpuid1 = CFUNCTYPE(c_uint)(addressof(CPUID1_INSNS))
class DataBlob(Structure):
class DataBlob(Structure):
_fields_ = [('cbData', c_uint),
('pbData', c_void_p)]
DataBlob_p = POINTER(DataBlob)
DataBlob_p = POINTER(DataBlob)
def CryptUnprotectData():
def CryptUnprotectData():
_CryptUnprotectData = crypt32.CryptUnprotectData
_CryptUnprotectData.argtypes = [DataBlob_p, c_wchar_p, DataBlob_p,
c_void_p, c_void_p, c_uint, DataBlob_p]
@@ -133,66 +224,122 @@ def CryptUnprotectData():
raise ADEPTError("Failed to decrypt user key key (sic)")
return string_at(outdata.pbData, outdata.cbData)
return CryptUnprotectData
CryptUnprotectData = CryptUnprotectData()
CryptUnprotectData = CryptUnprotectData()
def retrieve_key(keypath):
def retrieve_key(keypath):
if AES is None:
tkMessageBox.showerror(
"ADEPT Key",
"This script requires PyCrypto, which must be installed "
"separately. Read the top-of-script comment for details.")
return False
root = GetSystemDirectory().split('\\')[0] + '\\'
serial = GetVolumeSerialNumber(root)
vendor = cpuid0()
signature = pack('>I', cpuid1())[1:]
signature = struct.pack('>I', cpuid1())[1:]
user = GetUserName()
entropy = pack('>I12s3s13s', serial, vendor, signature, user)
entropy = struct.pack('>I12s3s13s', serial, vendor, signature, user)
cuser = winreg.HKEY_CURRENT_USER
try:
regkey = winreg.OpenKey(cuser, DEVICE_KEY)
regkey = winreg.OpenKey(cuser, DEVICE_KEY_PATH)
except WindowsError:
raise ADEPTError("Adobe Digital Editions not activated")
device = winreg.QueryValueEx(regkey, 'key')[0]
keykey = CryptUnprotectData(device, entropy)
userkey = None
pkcs = None
for i in xrange(4, 16):
for j in xrange(0, 16):
plkkey = PRIVATE_LICENCE_KEY_KEY % (i, j)
try:
pkcs = winreg.OpenKey(cuser, plkkey)
plkroot = winreg.OpenKey(cuser, PRIVATE_LICENCE_KEY_PATH)
except WindowsError:
raise ADEPTError("Could not locate ADE activation")
for i in xrange(0, 16):
try:
plkparent = winreg.OpenKey(plkroot, "%04d" % (i,))
except WindowsError:
break
type = winreg.QueryValueEx(pkcs, None)[0]
if type != 'pkcs12':
ktype = winreg.QueryValueEx(plkparent, None)[0]
if ktype != 'credentials':
continue
pkcs = winreg.QueryValueEx(pkcs, 'value')[0]
break
if pkcs is not None:
break
for i in xrange(4, 16):
for j in xrange(0, 16):
plkkey = PRIVATE_LICENCE_KEY_KEY % (i, j)
try:
regkey = winreg.OpenKey(cuser, plkkey)
plkkey = winreg.OpenKey(plkparent, "%04d" % (j,))
except WindowsError:
break
type = winreg.QueryValueEx(regkey, None)[0]
if type != 'privateLicenseKey':
ktype = winreg.QueryValueEx(plkkey, None)[0]
if ktype != 'privateLicenseKey':
continue
userkey = winreg.QueryValueEx(regkey, 'value')[0]
userkey = winreg.QueryValueEx(plkkey, 'value')[0]
break
if userkey is not None:
break
if pkcs is None:
raise ADEPTError('Could not locate PKCS specification')
if userkey is None:
raise ADEPTError('Could not locate privateLicenseKey')
pkcs = pkcs.decode('base64')
print pkcs
userkey = userkey.decode('base64')
userkey = AES.new(keykey, AES.MODE_CBC).decrypt(userkey)
userkey = userkey[26:-ord(userkey[-1])]
with open(keypath, 'wb') as f:
f.write(userkey)
return
return True
elif sys.platform.startswith('darwin'):
import xml.etree.ElementTree as etree
import Carbon.File
import Carbon.Folder
import Carbon.Folders
import MacOS
ACTIVATION_PATH = 'Adobe/Digital Editions/activation.dat'
NSMAP = {'adept': 'http://ns.adobe.com/adept',
'enc': 'http://www.w3.org/2001/04/xmlenc#'}
def find_folder(domain, dtype):
try:
fsref = Carbon.Folder.FSFindFolder(domain, dtype, False)
return Carbon.File.pathname(fsref)
except MacOS.Error:
return None
def find_app_support_file(subpath):
dtype = Carbon.Folders.kApplicationSupportFolderType
for domain in Carbon.Folders.kUserDomain, Carbon.Folders.kLocalDomain:
path = find_folder(domain, dtype)
if path is None:
continue
path = os.path.join(path, subpath)
if os.path.isfile(path):
return path
return None
def retrieve_key(keypath):
actpath = find_app_support_file(ACTIVATION_PATH)
if actpath is None:
raise ADEPTError("Could not locate ADE activation")
tree = etree.parse(actpath)
adept = lambda tag: '{%s}%s' % (NSMAP['adept'], tag)
expr = '//%s/%s' % (adept('credentials'), adept('privateLicenseKey'))
userkey = tree.findtext(expr)
userkey = userkey.decode('base64')
userkey = userkey[26:]
with open(keypath, 'wb') as f:
f.write(userkey)
return True
elif sys.platform.startswith('cygwin'):
def retrieve_key(keypath):
tkMessageBox.showerror(
"ADEPT Key",
"This script requires a Windows-native Python, and cannot be run "
"under Cygwin. Please install a Windows-native Python and/or "
"check your file associations.")
return False
else:
def retrieve_key(keypath):
tkMessageBox.showerror(
"ADEPT Key",
"This script only supports Windows and Mac OS X. For Linux "
"you should be able to run ADE and this script under Wine (with "
"an appropriate version of Windows Python installed).")
return False
class ExceptionDialog(Tkinter.Frame):
def __init__(self, root, text):
@@ -204,29 +351,23 @@ class ExceptionDialog(Tkinter.Frame):
self.text.pack(fill=Tkconstants.BOTH, expand=1)
self.text.insert(Tkconstants.END, text)
def main(argv=sys.argv):
root = Tkinter.Tk()
root.withdraw()
progname = os.path.basename(argv[0])
if AES is None:
tkMessageBox.showerror(
"ADEPT Key",
"This script requires PyCrypto, which must be installed "
"separately. Read the top-of-script comment for details.")
return 1
keypath = 'adeptkey.der'
success = False
try:
retrieve_key(keypath)
success = retrieve_key(keypath)
except ADEPTError, e:
tkMessageBox.showerror("ADEPT Key", "Error: " + str(e))
return 1
except Exception:
root.wm_state('normal')
root.title('ADEPT Key')
text = traceback.format_exc()
ExceptionDialog(root, text).pack(fill=Tkconstants.BOTH, expand=1)
root.mainloop()
if not success:
return 1
tkMessageBox.showinfo(
"ADEPT Key", "Key successfully retrieved to %s" % (keypath))

View File

@@ -73,6 +73,7 @@ class MainDialog(Tkinter.Frame):
# post output from subprocess in scrolled text widget
def showCmdOutput(self, msg):
if msg and msg !='':
msg = msg.encode('utf-8')
self.stext.insert(Tkconstants.END,msg)
self.stext.yview_pickplace(Tkconstants.END)
return
@@ -88,7 +89,7 @@ class MainDialog(Tkinter.Frame):
cmdline = 'python lib\kindlepid.py "' + serial + '"'
else :
cmdline = 'lib\kindlepid.py "' + serial + '"'
cmdline = cmdline.encode(sys.getfilesystemencoding())
p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
return p2
@@ -113,6 +114,7 @@ class MainDialog(Tkinter.Frame):
log += 'Serial = "' + serial + '"\n'
log += '\n\n'
log += 'Please Wait ...\n\n'
log = log.encode('utf-8')
self.stext.insert(Tkconstants.END,log)
self.p2 = self.pidrdr(serial)

View File

@@ -30,7 +30,9 @@ class MainDialog(Tkinter.Frame):
Tkinter.Label(body, text='Mobi eBook input file').grid(row=0, sticky=Tkconstants.E)
self.mobipath = Tkinter.Entry(body, width=50)
self.mobipath.grid(row=0, column=1, sticky=sticky)
self.mobipath.insert(0, os.getcwd())
cwd = os.getcwdu()
cwd = cwd.encode('utf-8')
self.mobipath.insert(0, cwd)
button = Tkinter.Button(body, text="...", command=self.get_mobipath)
button.grid(row=0, column=2)
@@ -80,6 +82,7 @@ class MainDialog(Tkinter.Frame):
# post output from subprocess in scrolled text widget
def showCmdOutput(self, msg):
if msg and msg !='':
msg = msg.encode('utf-8')
self.stext.insert(Tkconstants.END,msg)
self.stext.yview_pickplace(Tkconstants.END)
return
@@ -96,6 +99,7 @@ class MainDialog(Tkinter.Frame):
else :
cmdline = 'lib\kindlefix.py "' + infile + '" "' + pidnum + '"'
cmdline = cmdline.encode(sys.getfilesystemencoding())
p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
return p2
@@ -138,6 +142,7 @@ class MainDialog(Tkinter.Frame):
log += 'PID = "' + pidnum + '"\n'
log += '\n\n'
log += 'Please Wait ...\n\n'
log = log.encode('utf-8')
self.stext.insert(Tkconstants.END,log)
self.p2 = self.krdr(mobipath, pidnum)

View File

@@ -0,0 +1,88 @@
#include <cstdlib>
#include <iostream>
#include <conio.h>
#include <fstream>
using namespace std;
int main(int argc, char *argv[])
{
// Variables
int TopazTrue = 0;
int strlength = 0;
char uinfile[80];
char outfile[80];
char command[80];
char buffer[80];
// String initialization
strcpy(uinfile,"");
strcpy(outfile,"");
strcpy(buffer,"");
strcpy(command,"skindle "); // string preloaded with "skindle "
cout << "\n\n\n Please enter the name of the book to be converted:\n\n ";
cout << " Don't forget the prc file extension!\n\n ";
cout << " Watch out for zeros and Os. Zeros are skinny and Os are fat.\n\n\n ";
cin >> uinfile; // get file name of the book to be converted from user
ifstream infile(uinfile);
infile.getline(buffer,4);
if (strncmp (buffer,"TPZ",3)==0) // open file and test first 3 char if TPZ then book is topaz
{
TopazTrue = 1; // This is a Topaz file
}
strlength = strlen(uinfile);
if(strlength > 13)
{
strncat(outfile,uinfile,10); // Create output file name using first 10 char of input file name
}
else
{
strncat(outfile,uinfile, (strlength - 4)); // If file name is less than 10 characters
}
if(TopazTrue == 1) // This is Topaz Book
{
strcat(command,"-d "); // Add the topaz switch to the command line
strcat(outfile,".tpz"); // give tpz file extension to topaz output file
} // end of TopazTrue
else
{
strcat(outfile,".azw");
} // if not Topaz make it azw
strcat(command,"-i "); // Add the input switch to the command line
strcat(command,uinfile); // add the input file name to the command line
strcat(command," -o "); // add the output switch to the command line
strcat(command,outfile); // Add the output file name to the command line
cout << "\n\n The skindle program is called here.\n";
cout << " Any errors reported between here and \"The command line used was:\"\n";
cout << " Are errors from the skindle program. Not EZskindle4PC.\n\n";
system(command); // call skindle program to convert the book
cout << "\n\n The command line used was:\n\n";
cout << " " << command << "\n";
cout << "\n\n\n Please note the output file is created from the input";
cout << "\n file name. The file extension is changed to tpz for Topaz";
cout << "\n files and to azw for non-Topaz files. Also, _EBOK is removed ";
cout << "\n from the file name. This is to make it eaiser to identify ";
cout << "\n the file with no DRM.";
system("PAUSE");
return EXIT_SUCCESS;
}

View File

@@ -0,0 +1,44 @@
LZskindle4PCv1_1 The Lazy skindle program for those who are typing impared
To setup:
1. Create a new folder: example C:\skindle
2. Place LZskindle4PCv1_1.exe and skindle.exe in this folder.
3. Create 2 subfolders: C:\skindle\input
and C:\skindle\output
To run:
1. Copy the book(s) you wish to remove DRM from into the input directory
(leave the originals in my kindle folder).
2. Double click on LZskindle4PCv1_0.exe
3. A DOS window will open and will show skindle being called for
each book in the input directory.
4. The books with the DRM removed will now be in the output directory.
Rev1_1
fixed program to allow any file extension. My testing indicates that
skindle does not care what file extension a file has. If it is a file type
that it can convert it will. If the file is not compatible it will close
and give an unknown file type message.
Rev1_0
If the program is run with no files in the input directory you will get a
<EFBFBD>File not Found<6E> error and the program will terminate.
PLEASE USE ONLY FOR YOUR PERSONAL USE <20> ON BOOKS YOU PAID FOR!!
This program is provided to allow you to archive your library in a format that
will outlast the kindle. Technology moves on and you should not have to reinvest
in an entire new library when the Kindle is obsolete. Please do not use this program
for piracy.

View File

@@ -0,0 +1,150 @@
#include <cstdlib>
#include <iostream>
#include <fstream>
//#include <conio.h>
using namespace std;
int main(int argc, char *argv[])
{
// Variable Declarations ??
char buffer[80];
int error = 0;
// int YesNo = 0;
// int exit = 0;
// Variables EZskindle4PC
int TopazTrue = 0;
int strlength = 0;
char uinfile[80];
char outfile[80];
char command[80];
char buffer2[20];
char tempfile[80];
// Initialize strings
strcpy(uinfile,"");
strcpy(outfile,"");
strcpy(buffer,"");
strcpy(buffer2,"");
strcpy(command,"skindle "); // string preloaded with "skindle "
//// Beginning of program code ////////////////////////////////////////////////////////////
system("dir /b .\\input\\*.* > books.txt"); // Create txt file with list of books
// No testing of file type being done
// I am letting skindle determing if valid
// file type
// Read in the list of book file names
ifstream infile("books.txt");
do // while not end of file
{
infile.getline(buffer,50); // load the first 50 characters of the line to buffer
if(strcmp(buffer, "")!= 0) // If there is file name in the buffer do this on last loop buffer will be empty
{
strcpy(uinfile,buffer); // load file name from buffer
strcpy(tempfile,".\\input\\"); // load directory name for input files
strcat(tempfile,buffer); // load the file name
ifstream infile2(tempfile); // open the book file for reading
infile2.getline(buffer2,4); // load first 4 char from file
infile2.close(); // close the book file
if (strncmp (buffer2,"TPZ",3)==0) // open file and test first 3 char if TPZ then book is topaz
{
TopazTrue = 1; // This is a Topaz file
}
strlength = strlen(uinfile);
if(strlength > 13)
{
strncat(outfile,uinfile,10); // Create output file name using first 10 char of input file name
}
else
{
strncat(outfile,uinfile, (strlength - 4)); // If file name is less than 10 characters
}
if(TopazTrue == 1) // This is Topaz Book
{
strcat(command,"-d "); // Add the topaz switch to the command line
strcat(outfile,".tpz"); // give tpz file extension to topaz output file
} // end of TopazTrue
else
{
strcat(outfile,".azw");
} // if not Topaz make it azw
strcat(command,"-i "); // Add the input switch to the command line
strcat(command,".\\input\\"); // Add the input directory to the command line
strcat(command,uinfile); // add the input file name to the command line
strcat(command," -o "); // add the output switch to the command line
strcat(command,".\\output\\"); // Add directory for out files
strcat(command,outfile); // Add the output file name to the command line
cout << "\n\n The skindle program is called here.\n";
cout << " Any errors reported between here and \"The command line used was:\"\n";
cout << " Are errors from the skindle program. Not EZskindle4PC.\n\n";
system(command); // call skindle program to convert the book
cout << "\n\n The command line used was:\n\n";
cout << " " << command << "\n\n\n\n";
}// end of file name in the buffer required to prevent execution on EOF
strcpy(command,"skindle "); // reset strings and variables for next book
strcpy(outfile,"");
strcpy(uinfile,"");
strcpy(buffer,"");
strcpy(buffer2,"");
TopazTrue = 0;
strlength = 0;
}while (! infile.eof() ); // no more books in the file
infile.close(); // close books.txt
// cout << "\n\n\n Do you want to delete all of the books from the input directory?\n\n";
// cout << " DO NOT DELETE IF THESE ARE ONLY COPY OF YOUR BOOKS!!!!\n\n";
// cout << " Y or N: ";
// do { // while not yes or no
// YesNo = getch(); // This is a DOS/Windows console command not standard C may not be
// // Usable under Unix or Mac implementations
//
// if((YesNo == 121)||(YesNo == 89)) // y or Y is true
// {
// exit = 1; // valid input exit do while loop
// cout << "\n\n";
// system("del .\\input\\*.*"); // delete everything in the input directory
// cout << "\n\n";
// }
// if((YesNo == 110)||(YesNo == 78)) // n or N is true
// {
// exit = 1; // valid input exit do while loop
// }
//
// }while (exit != 1);
// cout << "\n\nYesNo = " << YesNo << "\n\n";
system("PAUSE");
system("del books.txt"); // Delete txt file with list of books
return EXIT_SUCCESS;
}

View File

@@ -0,0 +1,27 @@
Ezskindle4PC.exe
This executable program makes using skindle easier for people using Windows PC<50>s.
I do not know if it will work under any other operating system, however, I have included
the source code should anyone want to port it into other operating systems.
To use this program:
1. Copy the ezskindle4PC.exe into the same directory with the skindle files.
2. Copy the kindle book into the same directory.
3. double click the EZskindle4PCv1_0.exe file.
a. A DOS window will open and you will be asked for the name of the file you want to work with.
4. Type in the book<6F>s file name. (it will look something like B000WCTBTA_EBOK.prc)
5. The program will then check if it is a Topaz file and then create the output file name using the
first part of the input file name. It will use <20>tpz<70> file extension for Topaz books and will use <20>azw<7A>
for non topaz books. The files with the <20>azw<7A> format can be converted to other ebook formats using
Calibre. If you want to convert Topaz books to other formats you need to use Topaz tools not skindle.
6. The program will then create a command line and call the skindle program to process the book and
remove the DRM.
7. The program will pause and allow you to see the result of the skindle process.
8. Press any key to close the program.
version 1.1
Ok
Found a new 32 bit compiler and I think I have worked out the kinks.

View File

@@ -30,7 +30,9 @@ class MainDialog(Tkinter.Frame):
Tkinter.Label(body, text='Mobi eBook input file').grid(row=0, sticky=Tkconstants.E)
self.mobipath = Tkinter.Entry(body, width=50)
self.mobipath.grid(row=0, column=1, sticky=sticky)
self.mobipath.insert(0, os.getcwd())
cwd = os.getcwdu()
cwd = cwd.encode('utf-8')
self.mobipath.insert(0, cwd)
button = Tkinter.Button(body, text="...", command=self.get_mobipath)
button.grid(row=0, column=2)
@@ -87,6 +89,7 @@ class MainDialog(Tkinter.Frame):
# post output from subprocess in scrolled text widget
def showCmdOutput(self, msg):
if msg and msg !='':
msg = msg.encode('utf-8')
self.stext.insert(Tkconstants.END,msg)
self.stext.yview_pickplace(Tkconstants.END)
return
@@ -103,6 +106,7 @@ class MainDialog(Tkinter.Frame):
else :
cmdline = 'lib\mobidedrm.py "' + infile + '" "' + outfile + '" "' + pidnum + '"'
cmdline = cmdline.encode(sys.getfilesystemencoding())
p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
return p2
@@ -110,7 +114,7 @@ class MainDialog(Tkinter.Frame):
def get_mobipath(self):
mobipath = tkFileDialog.askopenfilename(
parent=None, title='Select Mobi eBook File',
defaultextension='.prc', filetypes=[('Mobi eBook File', '.prc'), ('Mobi eBook File', '.mobi'),
defaultextension='.prc', filetypes=[('Mobi eBook File', '.prc'), ('Mobi eBook File', '.azw'),('Mobi eBook File', '.mobi'),
('All Files', '.*')])
if mobipath:
mobipath = os.path.normpath(mobipath)
@@ -167,6 +171,7 @@ class MainDialog(Tkinter.Frame):
log += 'PID = "' + pidnum + '"\n'
log += '\n\n'
log += 'Please Wait ...\n\n'
log = log.encode('utf-8')
self.stext.insert(Tkconstants.END,log)
self.p2 = self.mobirdr(mobipath, outpath, pidnum)

View File

@@ -76,6 +76,8 @@ def main(argv=sys.argv):
print "Kindle 2 Global serial number detected"
elif serial.startswith("B004"):
print "Kindle DX serial number detected"
elif serial.startswith("B005"):
print "Kindle DX International serial number detected"
else:
print "Warning: unrecognized serial number. Please recheck input."
return 1

View File

@@ -25,6 +25,26 @@
# import filter it works when importing unencrypted files.
# Also now handles encrypted files that don't need a specific PID.
# 0.11 - use autoflushed stdout and proper return values
# 0.12 - Fix for problems with metadata import as Calibre plugin, report errors
# 0.13 - Formatting fixes: retabbed file, removed trailing whitespace
# and extra blank lines, converted CR/LF pairs at ends of each line,
# and other cosmetic fixes.
# 0.14 - Working out when the extra data flags are present has been problematic
# Versions 7 through 9 have tried to tweak the conditions, but have been
# only partially successful. Closer examination of lots of sample
# files reveals that a confusin has arisen because trailing data entries
# are not encrypted, but it turns out that the multibyte entries
# in utf8 file are encrypted. (Although neither kind gets compressed.)
# This knowledge leads to a simplification of the test for the
# trailing data byte flags - version 5 and higher AND header size >= 0xE4.
# 0.15 - Now outputs 'hearbeat', and is also quicker for long files.
# 0.16 - And reverts to 'done' not 'done.' at the end for unswindle compatibility.
__version__ = '0.16'
import sys
import struct
import binascii
class Unbuffered:
def __init__(self, stream):
@@ -35,15 +55,10 @@ class Unbuffered:
def __getattr__(self, attr):
return getattr(self.stream, attr)
import sys
sys.stdout=Unbuffered(sys.stdout)
import struct,binascii
class DrmException(Exception):
pass
#implementation of Pukall Cipher 1
# Implementation of Pukall Cipher 1
def PC1(key, src, decryption=True):
sum1 = 0;
sum2 = 0;
@@ -108,8 +123,10 @@ def getSizeOfTrailingDataEntries(ptr, size, flags):
if testflags & 1:
num += getSizeOfTrailingDataEntry(ptr, size - num)
testflags >>= 1
if flags & 1:
num += (ord(ptr[size - num - 1]) & 0x3) + 1
# Multibyte data, if present, is included in the encryption, so
# we do not need to check the low bit.
# if flags & 1:
# num += (ord(ptr[size - num - 1]) & 0x3) + 1
return num
class DrmStripper:
@@ -159,9 +176,7 @@ class DrmStripper:
break
return found_key
def __init__(self, data_file, pid):
if checksumPid(pid[0:-2]) != pid:
raise DrmException("invalid PID checksum")
pid = pid[0:-2]
@@ -183,13 +198,11 @@ class DrmStripper:
mobi_length, = struct.unpack('>L',sect[0x14:0x18])
mobi_version, = struct.unpack('>L',sect[0x68:0x6C])
extra_data_flags = 0
print "MOBI header length = %d" %mobi_length
print "MOBI header version = %d" %mobi_version
if (mobi_length >= 0xE4) and (mobi_version > 5):
print "MOBI header version = %d, length = %d" %(mobi_version, mobi_length)
if (mobi_length >= 0xE4) and (mobi_version >= 5):
extra_data_flags, = struct.unpack('>H', sect[0xF2:0xF4])
print "Extra Data Flags = %d" %extra_data_flags
crypto_type, = struct.unpack('>H', sect[0xC:0xC+2])
if crypto_type == 0:
print "This book is not encrypted."
@@ -215,13 +228,23 @@ class DrmStripper:
self.patchSection(0, "\0" * 2, 0xC)
# decrypt sections
print "Decrypting. Please wait...",
print "Decrypting. Please wait . . .",
new_data = self.data_file[:self.sections[1][0]]
for i in xrange(1, records+1):
data = self.loadSection(i)
extra_size = getSizeOfTrailingDataEntries(data, len(data), extra_data_flags)
if i%100 == 0:
print ".",
# print "record %d, extra_size %d" %(i,extra_size)
self.patchSection(i, PC1(found_key, data[0:len(data) - extra_size]))
new_data += PC1(found_key, data[0:len(data) - extra_size])
if extra_size > 0:
new_data += data[-extra_size:]
#self.patchSection(i, PC1(found_key, data[0:len(data) - extra_size]))
if self.num_sections > records+1:
new_data += self.data_file[self.sections[records+1][0]:]
self.data_file = new_data
print "done"
def getResult(self):
return self.data_file
@@ -229,43 +252,49 @@ if not __name__ == "__main__":
from calibre.customize import FileTypePlugin
class MobiDeDRM(FileTypePlugin):
name = 'MobiDeDRM' # Name of the plugin
description = 'Removes DRM from secure Mobi files'
supported_platforms = ['linux', 'osx', 'windows'] # Platforms this plugin will run on
author = 'The Dark Reverser' # The author of this plugin
version = (0, 1, 0) # The version number of this plugin
version = (0, 1, 6) # The version number of this plugin
file_types = set(['prc','mobi','azw']) # The file types that this plugin will be applied to
on_import = True # Run this plugin during the import
def run(self, path_to_ebook):
of = self.temporary_file('.mobi')
from calibre.gui2 import is_ok_to_use_qt
from PyQt4.Qt import QMessageBox
PID = self.site_customization
data_file = file(path_to_ebook, 'rb').read()
ar = PID.split(',')
for i in ar:
try:
file(of.name, 'wb').write(DrmStripper(data_file, i).getResult())
unlocked_file = DrmStripper(data_file, i).getResult()
except DrmException:
# Hm, we should display an error dialog here.
# Dunno how though.
# Ignore the dirty hack behind the curtain.
# strexcept = 'echo exception: %s > /dev/tty' % e
# subprocess.call(strexcept,shell=True)
print i + ": not PID for book"
# ignore the error
pass
else:
of = self.temporary_file('.mobi')
of.write(unlocked_file)
of.close()
return of.name
if is_ok_to_use_qt():
d = QMessageBox(QMessageBox.Warning, "MobiDeDRM Plugin", "Couldn't decode: %s\n\nImporting encrypted version." % path_to_ebook)
d.show()
d.raise_()
d.exec_()
return path_to_ebook
def customization_help(self, gui=False):
return 'Enter PID (separate multiple PIDs with comma)'
if __name__ == "__main__":
print "MobiDeDrm v0.11. Copyright (c) 2008 The Dark Reverser"
sys.stdout=Unbuffered(sys.stdout)
print ('MobiDeDrm v%(__version__)s. '
'Copyright 2008-2010 The Dark Reverser.' % globals())
if len(sys.argv)<4:
print "Removes protection from Mobipocket books"
print "Usage:"
print " mobidedrm infile.mobi outfile.mobi (PID)"
print " %s <infile> <outfile> <PID>" % sys.argv[0]
sys.exit(1)
else:
infile = sys.argv[1]

View File

@@ -0,0 +1,310 @@
#!/usr/bin/python
#
# This is a python script. You need a Python interpreter to run it.
# For example, ActiveState Python, which exists for windows.
#
# It can run standalone to convert files, or it can be installed as a
# plugin for Calibre (http://calibre-ebook.com/about) so that
# importing files with DRM 'Just Works'.
#
# To create a Calibre plugin, rename this file so that the filename
# ends in '_plugin.py', put it into a ZIP file and import that Calibre
# using its plugin configuration GUI.
#
# Changelog
# 0.01 - Initial version
# 0.02 - Huffdic compressed books were not properly decrypted
# 0.03 - Wasn't checking MOBI header length
# 0.04 - Wasn't sanity checking size of data record
# 0.05 - It seems that the extra data flags take two bytes not four
# 0.06 - And that low bit does mean something after all :-)
# 0.07 - The extra data flags aren't present in MOBI header < 0xE8 in size
# 0.08 - ...and also not in Mobi header version < 6
# 0.09 - ...but they are there with Mobi header version 6, header size 0xE4!
# 0.10 - Outputs unencrypted files as-is, so that when run as a Calibre
# import filter it works when importing unencrypted files.
# Also now handles encrypted files that don't need a specific PID.
# 0.11 - use autoflushed stdout and proper return values
# 0.12 - Fix for problems with metadata import as Calibre plugin, report errors
# 0.13 - Formatting fixes: retabbed file, removed trailing whitespace
# and extra blank lines, converted CR/LF pairs at ends of each line,
# and other cosmetic fixes.
# 0.14 - Working out when the extra data flags are present has been problematic
# Versions 7 through 9 have tried to tweak the conditions, but have been
# only partially successful. Closer examination of lots of sample
# files reveals that a confusin has arisen because trailing data entries
# are not encrypted, but it turns out that the multibyte entries
# in utf8 file are encrypted. (Although neither kind gets compressed.)
# This knowledge leads to a simplification of the test for the
# trailing data byte flags - version 5 and higher AND header size >= 0xE4.
# 0.15 - Now outputs 'hearbeat', and is also quicker for long files.
# 0.16 - And reverts to 'done' not 'done.' at the end for unswindle compatibility.
__version__ = '0.16'
import sys
import struct
import binascii
class Unbuffered:
def __init__(self, stream):
self.stream = stream
def write(self, data):
self.stream.write(data)
self.stream.flush()
def __getattr__(self, attr):
return getattr(self.stream, attr)
class DrmException(Exception):
pass
# Implementation of Pukall Cipher 1
def PC1(key, src, decryption=True):
sum1 = 0;
sum2 = 0;
keyXorVal = 0;
if len(key)!=16:
print "Bad key length!"
return None
wkey = []
for i in xrange(8):
wkey.append(ord(key[i*2])<<8 | ord(key[i*2+1]))
dst = ""
for i in xrange(len(src)):
temp1 = 0;
byteXorVal = 0;
for j in xrange(8):
temp1 ^= wkey[j]
sum2 = (sum2+j)*20021 + sum1
sum1 = (temp1*346)&0xFFFF
sum2 = (sum2+sum1)&0xFFFF
temp1 = (temp1*20021+1)&0xFFFF
byteXorVal ^= temp1 ^ sum2
curByte = ord(src[i])
if not decryption:
keyXorVal = curByte * 257;
curByte = ((curByte ^ (byteXorVal >> 8)) ^ byteXorVal) & 0xFF
if decryption:
keyXorVal = curByte * 257;
for j in xrange(8):
wkey[j] ^= keyXorVal;
dst+=chr(curByte)
return dst
def checksumPid(s):
letters = "ABCDEFGHIJKLMNPQRSTUVWXYZ123456789"
crc = (~binascii.crc32(s,-1))&0xFFFFFFFF
crc = crc ^ (crc >> 16)
res = s
l = len(letters)
for i in (0,1):
b = crc & 0xff
pos = (b // l) ^ (b % l)
res += letters[pos%l]
crc >>= 8
return res
def getSizeOfTrailingDataEntries(ptr, size, flags):
def getSizeOfTrailingDataEntry(ptr, size):
bitpos, result = 0, 0
if size <= 0:
return result
while True:
v = ord(ptr[size-1])
result |= (v & 0x7F) << bitpos
bitpos += 7
size -= 1
if (v & 0x80) != 0 or (bitpos >= 28) or (size == 0):
return result
num = 0
testflags = flags >> 1
while testflags:
if testflags & 1:
num += getSizeOfTrailingDataEntry(ptr, size - num)
testflags >>= 1
# Multibyte data, if present, is included in the encryption, so
# we do not need to check the low bit.
# if flags & 1:
# num += (ord(ptr[size - num - 1]) & 0x3) + 1
return num
class DrmStripper:
def loadSection(self, section):
if (section + 1 == self.num_sections):
endoff = len(self.data_file)
else:
endoff = self.sections[section + 1][0]
off = self.sections[section][0]
return self.data_file[off:endoff]
def patch(self, off, new):
self.data_file = self.data_file[:off] + new + self.data_file[off+len(new):]
def patchSection(self, section, new, in_off = 0):
if (section + 1 == self.num_sections):
endoff = len(self.data_file)
else:
endoff = self.sections[section + 1][0]
off = self.sections[section][0]
assert off + in_off + len(new) <= endoff
self.patch(off + in_off, new)
def parseDRM(self, data, count, pid):
pid = pid.ljust(16,'\0')
keyvec1 = "\x72\x38\x33\xB0\xB4\xF2\xE3\xCA\xDF\x09\x01\xD6\xE2\xE0\x3F\x96"
temp_key = PC1(keyvec1, pid, False)
temp_key_sum = sum(map(ord,temp_key)) & 0xff
found_key = None
for i in xrange(count):
verification, size, type, cksum, cookie = struct.unpack('>LLLBxxx32s', data[i*0x30:i*0x30+0x30])
cookie = PC1(temp_key, cookie)
ver,flags,finalkey,expiry,expiry2 = struct.unpack('>LL16sLL', cookie)
if verification == ver and cksum == temp_key_sum and (flags & 0x1F) == 1:
found_key = finalkey
break
if not found_key:
# Then try the default encoding that doesn't require a PID
temp_key = keyvec1
temp_key_sum = sum(map(ord,temp_key)) & 0xff
for i in xrange(count):
verification, size, type, cksum, cookie = struct.unpack('>LLLBxxx32s', data[i*0x30:i*0x30+0x30])
cookie = PC1(temp_key, cookie)
ver,flags,finalkey,expiry,expiry2 = struct.unpack('>LL16sLL', cookie)
if verification == ver and cksum == temp_key_sum:
found_key = finalkey
break
return found_key
def __init__(self, data_file, pid):
if checksumPid(pid[0:-2]) != pid:
raise DrmException("invalid PID checksum")
pid = pid[0:-2]
self.data_file = data_file
header = data_file[0:72]
if header[0x3C:0x3C+8] != 'BOOKMOBI':
raise DrmException("invalid file format")
self.num_sections, = struct.unpack('>H', data_file[76:78])
self.sections = []
for i in xrange(self.num_sections):
offset, a1,a2,a3,a4 = struct.unpack('>LBBBB', data_file[78+i*8:78+i*8+8])
flags, val = a1, a2<<16|a3<<8|a4
self.sections.append( (offset, flags, val) )
sect = self.loadSection(0)
records, = struct.unpack('>H', sect[0x8:0x8+2])
mobi_length, = struct.unpack('>L',sect[0x14:0x18])
mobi_version, = struct.unpack('>L',sect[0x68:0x6C])
extra_data_flags = 0
print "MOBI header version = %d, length = %d" %(mobi_version, mobi_length)
if (mobi_length >= 0xE4) and (mobi_version >= 5):
extra_data_flags, = struct.unpack('>H', sect[0xF2:0xF4])
print "Extra Data Flags = %d" %extra_data_flags
crypto_type, = struct.unpack('>H', sect[0xC:0xC+2])
if crypto_type == 0:
print "This book is not encrypted."
else:
if crypto_type == 1:
raise DrmException("cannot decode Mobipocket encryption type 1")
if crypto_type != 2:
raise DrmException("unknown encryption type: %d" % crypto_type)
# calculate the keys
drm_ptr, drm_count, drm_size, drm_flags = struct.unpack('>LLLL', sect[0xA8:0xA8+16])
if drm_count == 0:
raise DrmException("no PIDs found in this file")
found_key = self.parseDRM(sect[drm_ptr:drm_ptr+drm_size], drm_count, pid)
if not found_key:
raise DrmException("no key found. maybe the PID is incorrect")
# kill the drm keys
self.patchSection(0, "\0" * drm_size, drm_ptr)
# kill the drm pointers
self.patchSection(0, "\xff" * 4 + "\0" * 12, 0xA8)
# clear the crypto type
self.patchSection(0, "\0" * 2, 0xC)
# decrypt sections
print "Decrypting. Please wait . . .",
new_data = self.data_file[:self.sections[1][0]]
for i in xrange(1, records+1):
data = self.loadSection(i)
extra_size = getSizeOfTrailingDataEntries(data, len(data), extra_data_flags)
if i%100 == 0:
print ".",
# print "record %d, extra_size %d" %(i,extra_size)
new_data += PC1(found_key, data[0:len(data) - extra_size])
if extra_size > 0:
new_data += data[-extra_size:]
#self.patchSection(i, PC1(found_key, data[0:len(data) - extra_size]))
if self.num_sections > records+1:
new_data += self.data_file[self.sections[records+1][0]:]
self.data_file = new_data
print "done"
def getResult(self):
return self.data_file
if not __name__ == "__main__":
from calibre.customize import FileTypePlugin
class MobiDeDRM(FileTypePlugin):
name = 'MobiDeDRM' # Name of the plugin
description = 'Removes DRM from secure Mobi files'
supported_platforms = ['linux', 'osx', 'windows'] # Platforms this plugin will run on
author = 'The Dark Reverser' # The author of this plugin
version = (0, 1, 6) # The version number of this plugin
file_types = set(['prc','mobi','azw']) # The file types that this plugin will be applied to
on_import = True # Run this plugin during the import
def run(self, path_to_ebook):
from calibre.gui2 import is_ok_to_use_qt
from PyQt4.Qt import QMessageBox
PID = self.site_customization
data_file = file(path_to_ebook, 'rb').read()
ar = PID.split(',')
for i in ar:
try:
unlocked_file = DrmStripper(data_file, i).getResult()
except DrmException:
# ignore the error
pass
else:
of = self.temporary_file('.mobi')
of.write(unlocked_file)
of.close()
return of.name
if is_ok_to_use_qt():
d = QMessageBox(QMessageBox.Warning, "MobiDeDRM Plugin", "Couldn't decode: %s\n\nImporting encrypted version." % path_to_ebook)
d.show()
d.raise_()
d.exec_()
return path_to_ebook
def customization_help(self, gui=False):
return 'Enter PID (separate multiple PIDs with comma)'
if __name__ == "__main__":
sys.stdout=Unbuffered(sys.stdout)
print ('MobiDeDrm v%(__version__)s. '
'Copyright 2008-2010 The Dark Reverser.' % globals())
if len(sys.argv)<4:
print "Removes protection from Mobipocket books"
print "Usage:"
print " %s <infile> <outfile> <PID>" % sys.argv[0]
sys.exit(1)
else:
infile = sys.argv[1]
outfile = sys.argv[2]
pid = sys.argv[3]
data_file = file(infile, 'rb').read()
try:
strippedFile = DrmStripper(data_file, pid)
file(outfile, 'wb').write(strippedFile.getResult())
except DrmException, e:
print "Error: %s" % e
sys.exit(1)
sys.exit(0)

View File

@@ -1,13 +1,13 @@
#! /usr/bin/python
# -*- coding: utf-8 -*-
# unswindle.pyw, version 6-rc1
# Copyright © 2009 i♥cabbages
# unswindle.pyw, version 7
# Copyright © 2009-2010 i♥cabbages
# Released under the terms of the GNU General Public Licence, version 3 or
# later. <http://www.gnu.org/licenses/>
# To run this program install a 32-bit version of Python 2.6 from
# Before running this program, you must first install Python 2.6 from
# <http://www.python.org/download/>. Save this script file as unswindle.pyw.
# Find and save in the same directory a copy of mobidedrm.py. Double-click on
# unswindle.pyw. It will run Kindle For PC. Open the book you want to
@@ -22,11 +22,14 @@
# detect unsupported versions of K4PC
# 5 - Work with new (20091222) version of K4PC
# 6 - Detect and just copy DRM-free books
# 7 - Work with new (20100629) version of K4PC
"""
Decrypt Kindle For PC encrypted Mobipocket books.
"""
from __future__ import with_statement
__license__ = 'GPL v3'
import sys
@@ -622,8 +625,17 @@ class PC1KeyGrabber(object):
0x0054c9e0: '_get_pc1_pid',
0x004f8ac9: '_get_book_path',
},
'd791f52dd2ecc68722212d801ad52cb79d1b6fc9': {
0x0041724d: '_i_like_wine',
0x004bfe3d: '_no_debugger_here',
0x005bd9db: '_no_debugger_here',
0x00565920: '_get_pc1_pid',
0x0050fde9: '_get_book_path',
},
}
MOBI_EXTENSIONS = set(['.prc', '.pdb', '.mobi', '.azw', '.az1', '.azw1'])
@classmethod
def supported_version(cls, hexdigest):
return (hexdigest in cls.HOOKS)
@@ -658,7 +670,8 @@ class PC1KeyGrabber(object):
path = path.decode('utf-16', 'ignore')
if u'\0' in path:
path = path[:path.index(u'\0')]
if path[-4:].lower() not in ('.prc', '.pdb', '.mobi'):
root, ext = os.path.splitext(path)
if ext.lower() not in self.MOBI_EXTENSIONS:
return
self.book_path = path
@@ -667,7 +680,6 @@ class PC1KeyGrabber(object):
addr = debugger.read_process_memory(addr, type=ctypes.c_char_p)
pid = debugger.read_process_memory(addr, 8)
pid = self._checksum_pid(pid)
print pid
self.book_pid = pid
def _checksum_pid(self, s):

View File

@@ -21,13 +21,44 @@
# 0.07 - The extra data flags aren't present in MOBI header < 0xE8 in size
# 0.08 - ...and also not in Mobi header version < 6
# 0.09 - ...but they are there with Mobi header version 6, header size 0xE4!
# 0.10 - Outputs unencrypted files as-is, so that when run as a Calibre
# import filter it works when importing unencrypted files.
# Also now handles encrypted files that don't need a specific PID.
# 0.11 - use autoflushed stdout and proper return values
# 0.12 - Fix for problems with metadata import as Calibre plugin, report errors
# 0.13 - Formatting fixes: retabbed file, removed trailing whitespace
# and extra blank lines, converted CR/LF pairs at ends of each line,
# and other cosmetic fixes.
# 0.14 - Working out when the extra data flags are present has been problematic
# Versions 7 through 9 have tried to tweak the conditions, but have been
# only partially successful. Closer examination of lots of sample
# files reveals that a confusin has arisen because trailing data entries
# are not encrypted, but it turns out that the multibyte entries
# in utf8 file are encrypted. (Although neither kind gets compressed.)
# This knowledge leads to a simplification of the test for the
# trailing data byte flags - version 5 and higher AND header size >= 0xE4.
# 0.15 - Now outputs 'hearbeat', and is also quicker for long files.
# 0.16 - And reverts to 'done' not 'done.' at the end for unswindle compatibility.
import sys,struct,binascii
__version__ = '0.16'
import sys
import struct
import binascii
class Unbuffered:
def __init__(self, stream):
self.stream = stream
def write(self, data):
self.stream.write(data)
self.stream.flush()
def __getattr__(self, attr):
return getattr(self.stream, attr)
class DrmException(Exception):
pass
#implementation of Pukall Cipher 1
# Implementation of Pukall Cipher 1
def PC1(key, src, decryption=True):
sum1 = 0;
sum2 = 0;
@@ -92,8 +123,10 @@ def getSizeOfTrailingDataEntries(ptr, size, flags):
if testflags & 1:
num += getSizeOfTrailingDataEntry(ptr, size - num)
testflags >>= 1
if flags & 1:
num += (ord(ptr[size - num - 1]) & 0x3) + 1
# Multibyte data, if present, is included in the encryption, so
# we do not need to check the low bit.
# if flags & 1:
# num += (ord(ptr[size - num - 1]) & 0x3) + 1
return num
class DrmStripper:
@@ -130,11 +163,20 @@ class DrmStripper:
if verification == ver and cksum == temp_key_sum and (flags & 0x1F) == 1:
found_key = finalkey
break
if not found_key:
# Then try the default encoding that doesn't require a PID
temp_key = keyvec1
temp_key_sum = sum(map(ord,temp_key)) & 0xff
for i in xrange(count):
verification, size, type, cksum, cookie = struct.unpack('>LLLBxxx32s', data[i*0x30:i*0x30+0x30])
cookie = PC1(temp_key, cookie)
ver,flags,finalkey,expiry,expiry2 = struct.unpack('>LL16sLL', cookie)
if verification == ver and cksum == temp_key_sum:
found_key = finalkey
break
return found_key
def __init__(self, data_file, pid):
if checksumPid(pid[0:-2]) != pid:
raise DrmException("invalid PID checksum")
pid = pid[0:-2]
@@ -156,16 +198,15 @@ class DrmStripper:
mobi_length, = struct.unpack('>L',sect[0x14:0x18])
mobi_version, = struct.unpack('>L',sect[0x68:0x6C])
extra_data_flags = 0
print "MOBI header length = %d" %mobi_length
print "MOBI header version = %d" %mobi_version
if (mobi_length >= 0xE4) and (mobi_version > 5):
print "MOBI header version = %d, length = %d" %(mobi_version, mobi_length)
if (mobi_length >= 0xE4) and (mobi_version >= 5):
extra_data_flags, = struct.unpack('>H', sect[0xF2:0xF4])
print "Extra Data Flags = %d" %extra_data_flags
crypto_type, = struct.unpack('>H', sect[0xC:0xC+2])
if crypto_type == 0:
raise DrmException("it seems that this book isn't encrypted")
print "This book is not encrypted."
else:
if crypto_type == 1:
raise DrmException("cannot decode Mobipocket encryption type 1")
if crypto_type != 2:
@@ -187,13 +228,23 @@ class DrmStripper:
self.patchSection(0, "\0" * 2, 0xC)
# decrypt sections
print "Decrypting. Please wait...",
print "Decrypting. Please wait . . .",
new_data = self.data_file[:self.sections[1][0]]
for i in xrange(1, records+1):
data = self.loadSection(i)
extra_size = getSizeOfTrailingDataEntries(data, len(data), extra_data_flags)
if i%100 == 0:
print ".",
# print "record %d, extra_size %d" %(i,extra_size)
self.patchSection(i, PC1(found_key, data[0:len(data) - extra_size]))
new_data += PC1(found_key, data[0:len(data) - extra_size])
if extra_size > 0:
new_data += data[-extra_size:]
#self.patchSection(i, PC1(found_key, data[0:len(data) - extra_size]))
if self.num_sections > records+1:
new_data += self.data_file[self.sections[records+1][0]:]
self.data_file = new_data
print "done"
def getResult(self):
return self.data_file
@@ -201,49 +252,59 @@ if not __name__ == "__main__":
from calibre.customize import FileTypePlugin
class MobiDeDRM(FileTypePlugin):
name = 'MobiDeDRM' # Name of the plugin
description = 'Removes DRM from secure Mobi files'
supported_platforms = ['linux', 'osx', 'windows'] # Platforms this plugin will run on
author = 'The Dark Reverser' # The author of this plugin
version = (0, 0, 9) # The version number of this plugin
version = (0, 1, 6) # The version number of this plugin
file_types = set(['prc','mobi','azw']) # The file types that this plugin will be applied to
on_import = True # Run this plugin during the import
def run(self, path_to_ebook):
of = self.temporary_file('.mobi')
from calibre.gui2 import is_ok_to_use_qt
from PyQt4.Qt import QMessageBox
PID = self.site_customization
data_file = file(path_to_ebook, 'rb').read()
ar = PID.split(',')
for i in ar:
try:
file(of.name, 'wb').write(DrmStripper(data_file, i).getResult())
unlocked_file = DrmStripper(data_file, i).getResult()
except DrmException:
# Hm, we should display an error dialog here.
# Dunno how though.
# Ignore the dirty hack behind the curtain.
# strexcept = 'echo exception: %s > /dev/tty' % e
# subprocess.call(strexcept,shell=True)
print i + ": not PID for book"
# ignore the error
pass
else:
of = self.temporary_file('.mobi')
of.write(unlocked_file)
of.close()
return of.name
if is_ok_to_use_qt():
d = QMessageBox(QMessageBox.Warning, "MobiDeDRM Plugin", "Couldn't decode: %s\n\nImporting encrypted version." % path_to_ebook)
d.show()
d.raise_()
d.exec_()
return path_to_ebook
def customization_help(self, gui=False):
return 'Enter PID (separate multiple PIDs with comma)'
if __name__ == "__main__":
print "MobiDeDrm v0.09. Copyright (c) 2008 The Dark Reverser"
sys.stdout=Unbuffered(sys.stdout)
print ('MobiDeDrm v%(__version__)s. '
'Copyright 2008-2010 The Dark Reverser.' % globals())
if len(sys.argv)<4:
print "Removes protection from Mobipocket books"
print "Usage:"
print " mobidedrm infile.mobi outfile.mobi PID"
print " %s <infile> <outfile> <PID>" % sys.argv[0]
sys.exit(1)
else:
infile = sys.argv[1]
outfile = sys.argv[2]
pid = sys.argv[3]
data_file = file(infile, 'rb').read()
try:
file(outfile, 'wb').write(DrmStripper(data_file, pid).getResult())
strippedFile = DrmStripper(data_file, pid)
file(outfile, 'wb').write(strippedFile.getResult())
except DrmException, e:
print "Error: %s" % e
sys.exit(1)
sys.exit(0)

View File

@@ -31,14 +31,18 @@ class MainDialog(Tkinter.Frame):
Tkinter.Label(body, text='Topaz eBook input file').grid(row=0, sticky=Tkconstants.E)
self.tpzpath = Tkinter.Entry(body, width=50)
self.tpzpath.grid(row=0, column=1, sticky=sticky)
self.tpzpath.insert(0, os.getcwd())
cwd = os.getcwdu()
cwd = cwd.encode('utf-8')
self.tpzpath.insert(0, cwd)
button = Tkinter.Button(body, text="...", command=self.get_tpzpath)
button.grid(row=0, column=2)
Tkinter.Label(body, text='Output Directory').grid(row=1, sticky=Tkconstants.E)
self.outpath = Tkinter.Entry(body, width=50)
self.outpath.grid(row=1, column=1, sticky=sticky)
self.outpath.insert(0, os.getcwd())
cwd = os.getcwdu()
cwd = cwd.encode('utf-8')
self.outpath.insert(0, cwd)
button = Tkinter.Button(body, text="...", command=self.get_outpath)
button.grid(row=1, column=2)
@@ -88,6 +92,7 @@ class MainDialog(Tkinter.Frame):
# post output from subprocess in scrolled text widget
def showCmdOutput(self, msg):
if msg and msg !='':
msg = msg.encode('utf-8')
self.stext.insert(Tkconstants.END,msg)
self.stext.yview_pickplace(Tkconstants.END)
return
@@ -108,6 +113,7 @@ class MainDialog(Tkinter.Frame):
else :
cmdline = 'lib\cmbtc_dump.py -v -d ' + pidoption + outoption + '"' + infile + '"'
cmdline = cmdline.encode(sys.getfilesystemencoding())
p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
return p2
@@ -115,7 +121,7 @@ class MainDialog(Tkinter.Frame):
def get_tpzpath(self):
tpzpath = tkFileDialog.askopenfilename(
parent=None, title='Select Topaz File',
defaultextension='.prc', filetypes=[('Topaz azw1', '.azw1'), ('Topaz prc', '.prc'),
defaultextension='.prc', filetypes=[('Topaz azw', '.azw'),('Topaz azw1', '.azw1'), ('Topaz prc', '.prc'),
('All Files', '.*')])
if tpzpath:
tpzpath = os.path.normpath(tpzpath)
@@ -124,9 +130,11 @@ class MainDialog(Tkinter.Frame):
return
def get_outpath(self):
cwd = os.getcwdu()
cwd = cwd.encode('utf-8')
outpath = tkFileDialog.askdirectory(
parent=None, title='Directory to Extract Files into',
initialdir=os.getcwd(), initialfile=None)
initialdir=cwd, initialfile=None)
if outpath:
outpath = os.path.normpath(outpath)
self.outpath.delete(0, Tkconstants.END)
@@ -168,6 +176,7 @@ class MainDialog(Tkinter.Frame):
log += 'First 8 chars of PID = "' + pidnum + '"\n'
log += '\n\n'
log += 'Please Wait ...\n'
log = log.encode('utf-8')
self.stext.insert(Tkconstants.END,log)
self.p2 = self.topazrdr(tpzpath, outpath, pidnum)

View File

@@ -0,0 +1,200 @@
#!/usr/bin/env python
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
import sys
sys.path.append('lib')
import os, os.path, urllib
import subprocess
from subprocess import Popen, PIPE, STDOUT
import Tkinter
import Tkconstants
import tkFileDialog
import tkMessageBox
import subasyncio
from subasyncio import Process
from scrolltextwidget import ScrolledText
class MainDialog(Tkinter.Frame):
def __init__(self, root):
Tkinter.Frame.__init__(self, root, border=5)
self.root = root
self.interval = 2000
self.p2 = None
self.status = Tkinter.Label(self, text='Extract Contents of Topaz eBook to a Directory')
self.status.pack(fill=Tkconstants.X, expand=1)
body = Tkinter.Frame(self)
body.pack(fill=Tkconstants.X, expand=1)
sticky = Tkconstants.E + Tkconstants.W
body.grid_columnconfigure(1, weight=2)
Tkinter.Label(body, text='Topaz eBook input file').grid(row=0, sticky=Tkconstants.E)
self.tpzpath = Tkinter.Entry(body, width=50)
self.tpzpath.grid(row=0, column=1, sticky=sticky)
cwd = os.getcwdu()
cwd = cwd.encode('utf-8')
self.tpzpath.insert(0, cwd)
button = Tkinter.Button(body, text="...", command=self.get_tpzpath)
button.grid(row=0, column=2)
Tkinter.Label(body, text='Output Directory').grid(row=1, sticky=Tkconstants.E)
self.outpath = Tkinter.Entry(body, width=50)
self.outpath.grid(row=1, column=1, sticky=sticky)
cwd = os.getcwdu()
cwd = cwd.encode('utf-8')
self.outpath.insert(0, cwd)
button = Tkinter.Button(body, text="...", command=self.get_outpath)
button.grid(row=1, column=2)
Tkinter.Label(body, text='First 8 characters of PID').grid(row=3, sticky=Tkconstants.E)
self.pidnum = Tkinter.StringVar()
self.ccinfo = Tkinter.Entry(body, width=10, textvariable=self.pidnum)
self.ccinfo.grid(row=3, column=1, sticky=sticky)
msg1 = 'Conversion Log \n\n'
self.stext = ScrolledText(body, bd=5, relief=Tkconstants.RIDGE, height=15, width=60, wrap=Tkconstants.WORD)
self.stext.grid(row=4, column=0, columnspan=2,sticky=sticky)
self.stext.insert(Tkconstants.END,msg1)
buttons = Tkinter.Frame(self)
buttons.pack()
self.sbotton = Tkinter.Button(
buttons, text="Start", width=10, command=self.convertit)
self.sbotton.pack(side=Tkconstants.LEFT)
Tkinter.Frame(buttons, width=10).pack(side=Tkconstants.LEFT)
self.qbutton = Tkinter.Button(
buttons, text="Quit", width=10, command=self.quitting)
self.qbutton.pack(side=Tkconstants.RIGHT)
# read from subprocess pipe without blocking
# invoked every interval via the widget "after"
# option being used, so need to reset it for the next time
def processPipe(self):
poll = self.p2.wait('nowait')
if poll != None:
text = self.p2.readerr()
text += self.p2.read()
msg = text + '\n\n' + 'Files successfully extracted\n'
if poll != 0:
msg = text + '\n\n' + 'Error: File Extraction Failed\n'
self.showCmdOutput(msg)
self.p2 = None
self.sbotton.configure(state='normal')
return
text = self.p2.readerr()
text += self.p2.read()
self.showCmdOutput(text)
# make sure we get invoked again by event loop after interval
self.stext.after(self.interval,self.processPipe)
return
# post output from subprocess in scrolled text widget
def showCmdOutput(self, msg):
if msg and msg !='':
msg = msg.encode('utf-8')
self.stext.insert(Tkconstants.END,msg)
self.stext.yview_pickplace(Tkconstants.END)
return
# run as a subprocess via pipes and collect stdout
def topazrdr(self, infile, outdir, pidnum):
# os.putenv('PYTHONUNBUFFERED', '1')
pidoption = ' -p "' + pidnum + '" '
outoption = ' -o "' + outdir + '" '
cmdline = 'python ./lib/cmbtc_dump_nonK4PC.py -v -d ' + pidoption + outoption + '"' + infile + '"'
if sys.platform[0:3] == 'win':
search_path = os.environ['PATH']
search_path = search_path.lower()
if search_path.find('python') >= 0:
cmdline = 'python lib\cmbtc_dump_nonK4PC.py -v -d ' + pidoption + outoption + '"' + infile + '"'
else :
cmdline = 'lib\cmbtc_dump_nonK4PC.py -v -d ' + pidoption + outoption + '"' + infile + '"'
cmdline = cmdline.encode(sys.getfilesystemencoding())
p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
return p2
def get_tpzpath(self):
tpzpath = tkFileDialog.askopenfilename(
parent=None, title='Select Topaz File',
defaultextension='.prc', filetypes=[('Topaz azw1', '.azw1'), ('Topaz prc', '.prc'),('Topaz azw', '.azw'),
('All Files', '.*')])
if tpzpath:
tpzpath = os.path.normpath(tpzpath)
self.tpzpath.delete(0, Tkconstants.END)
self.tpzpath.insert(0, tpzpath)
return
def get_outpath(self):
cwd = os.getcwdu()
cwd = cwd.encode('utf-8')
outpath = tkFileDialog.askdirectory(
parent=None, title='Directory to Extract Files into',
initialdir=cwd, initialfile=None)
if outpath:
outpath = os.path.normpath(outpath)
self.outpath.delete(0, Tkconstants.END)
self.outpath.insert(0, outpath)
return
def quitting(self):
# kill any still running subprocess
if self.p2 != None:
if (self.p2.wait('nowait') == None):
self.p2.terminate()
self.root.destroy()
# actually ready to run the subprocess and get its output
def convertit(self):
# now disable the button to prevent multiple launches
self.sbotton.configure(state='disabled')
tpzpath = self.tpzpath.get()
outpath = self.outpath.get()
if not tpzpath or not os.path.exists(tpzpath):
self.status['text'] = 'Specified Topaz eBook file does not exist'
self.sbotton.configure(state='normal')
return
if not outpath:
self.status['text'] = 'No output directory specified'
self.sbotton.configure(state='normal')
return
if not os.path.exists(outpath):
os.makedirs(outpath)
pidnum = self.pidnum.get()
if not pidnum or pidnum == '':
self.status['text'] = 'You have not entered a PID '
self.sbotton.configure(state='normal')
return
log = 'Command = "python cmbtc_dump_nonK4PC.py"\n'
log += 'Topaz Path Path = "'+ tpzpath + '"\n'
log += 'Output Directory = "' + outpath + '"\n'
log += 'First 8 chars of PID = "' + pidnum + '"\n'
log += '\n\n'
log += 'Please Wait ...\n'
log = log.encode('utf-8')
self.stext.insert(Tkconstants.END,log)
self.p2 = self.topazrdr(tpzpath, outpath, pidnum)
# python does not seem to allow you to create
# your own eventloop which every other gui does - strange
# so need to use the widget "after" command to force
# event loop to run non-gui events every interval
self.stext.after(self.interval,self.processPipe)
return
def main(argv=None):
root = Tkinter.Tk()
root.title('Topaz eBook File Extraction')
root.resizable(True, False)
root.minsize(300, 0)
MainDialog(root).pack(fill=Tkconstants.X, expand=1)
root.mainloop()
return 0
if __name__ == "__main__":
sys.exit(main())

View File

@@ -31,14 +31,18 @@ class MainDialog(Tkinter.Frame):
Tkinter.Label(body, text='Topaz eBook input file').grid(row=0, sticky=Tkconstants.E)
self.tpzpath = Tkinter.Entry(body, width=50)
self.tpzpath.grid(row=0, column=1, sticky=sticky)
self.tpzpath.insert(0, os.getcwd())
cwd = os.getcwdu()
cwd = cwd.encode('utf-8')
self.tpzpath.insert(0, cwd)
button = Tkinter.Button(body, text="...", command=self.get_tpzpath)
button.grid(row=0, column=2)
Tkinter.Label(body, text='Output Directory').grid(row=1, sticky=Tkconstants.E)
self.outpath = Tkinter.Entry(body, width=50)
self.outpath.grid(row=1, column=1, sticky=sticky)
self.outpath.insert(0, os.getcwd())
cwd = os.getcwdu()
cwd = cwd.encode('utf-8')
self.outpath.insert(0, cwd)
button = Tkinter.Button(body, text="...", command=self.get_outpath)
button.grid(row=1, column=2)
@@ -88,6 +92,7 @@ class MainDialog(Tkinter.Frame):
# post output from subprocess in scrolled text widget
def showCmdOutput(self, msg):
if msg and msg !='':
msg = msg.encode('utf-8')
self.stext.insert(Tkconstants.END,msg)
self.stext.yview_pickplace(Tkconstants.END)
return
@@ -106,6 +111,7 @@ class MainDialog(Tkinter.Frame):
else :
cmdline = 'lib\cmbtc_dump_nonK4PC.py -v -d ' + pidoption + outoption + '"' + infile + '"'
cmdline = cmdline.encode(sys.getfilesystemencoding())
p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
return p2
@@ -122,9 +128,11 @@ class MainDialog(Tkinter.Frame):
return
def get_outpath(self):
cwd = os.getcwdu()
cwd = cwd.encode('utf-8')
outpath = tkFileDialog.askdirectory(
parent=None, title='Directory to Extract Files into',
initialdir=os.getcwd(), initialfile=None)
initialdir=cwd, initialfile=None)
if outpath:
outpath = os.path.normpath(outpath)
self.outpath.delete(0, Tkconstants.END)
@@ -166,6 +174,7 @@ class MainDialog(Tkinter.Frame):
log += 'First 8 chars of PID = "' + pidnum + '"\n'
log += '\n\n'
log += 'Please Wait ...\n'
log = log.encode('utf-8')
self.stext.insert(Tkconstants.END,log)
self.p2 = self.topazrdr(tpzpath, outpath, pidnum)

View File

@@ -31,7 +31,9 @@ class MainDialog(Tkinter.Frame):
Tkinter.Label(body, text='Directory you Extracted Topaz Files into').grid(row=0, sticky=Tkconstants.E)
self.bookdir = Tkinter.Entry(body, width=50)
self.bookdir.grid(row=0, column=1, sticky=sticky)
self.bookdir.insert(0, os.getcwd())
cwd = os.getcwdu()
cwd = cwd.encode('utf-8')
self.bookdir.insert(0, cwd)
button = Tkinter.Button(body, text="...", command=self.get_bookdir)
button.grid(row=0, column=2)
@@ -76,6 +78,7 @@ class MainDialog(Tkinter.Frame):
# post output from subprocess in scrolled text widget
def showCmdOutput(self, msg):
if msg and msg !='':
msg = msg.encode('utf-8')
self.stext.insert(Tkconstants.END,msg)
self.stext.yview_pickplace(Tkconstants.END)
return
@@ -92,14 +95,17 @@ class MainDialog(Tkinter.Frame):
else :
cmdline = 'lib\genhtml.py "' + bookdir + '"'
cmdline = cmdline.encode(sys.getfilesystemencoding())
p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
return p2
def get_bookdir(self):
cwd = os.getcwdu()
cwd = cwd.encode('utf-8')
bookdir = tkFileDialog.askdirectory(
parent=None, title='Select the Directory you Extracted Topaz Files into',
initialdir=os.getcwd(), initialfile=None)
initialdir=cwd, initialfile=None)
if bookdir:
bookdir = os.path.normpath(bookdir)
self.bookdir.delete(0, Tkconstants.END)
@@ -127,6 +133,7 @@ class MainDialog(Tkinter.Frame):
log += 'Book Directory = "' + bookdir + '"\n'
log += '\n\n'
log += 'Please Wait ...\n'
log = log.encode('utf-8')
self.stext.insert(Tkconstants.END,log)
self.p2 = self.topazrdr(bookdir)

View File

@@ -31,7 +31,9 @@ class MainDialog(Tkinter.Frame):
Tkinter.Label(body, text='Directory you Extracted Topaz Files into').grid(row=0, sticky=Tkconstants.E)
self.bookdir = Tkinter.Entry(body, width=50)
self.bookdir.grid(row=0, column=1, sticky=sticky)
self.bookdir.insert(0, os.getcwd())
cwd = os.getcwdu()
cwd = cwd.encode('utf-8')
self.bookdir.insert(0, cwd)
button = Tkinter.Button(body, text="...", command=self.get_bookdir)
button.grid(row=0, column=2)
@@ -76,6 +78,7 @@ class MainDialog(Tkinter.Frame):
# post output from subprocess in scrolled text widget
def showCmdOutput(self, msg):
if msg and msg !='':
msg = msg.encode('utf-8')
self.stext.insert(Tkconstants.END,msg)
self.stext.yview_pickplace(Tkconstants.END)
return
@@ -92,14 +95,17 @@ class MainDialog(Tkinter.Frame):
else :
cmdline = 'lib\gensvg.py "' + bookdir + '"'
cmdline = cmdline.encode(sys.getfilesystemencoding())
p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
return p2
def get_bookdir(self):
cwd = os.getcwdu()
cwd = cwd.encode('utf-8')
bookdir = tkFileDialog.askdirectory(
parent=None, title='Select the Directory you Extracted Topaz Files into',
initialdir=os.getcwd(), initialfile=None)
initialdir=cwd, initialfile=None)
if bookdir:
bookdir = os.path.normpath(bookdir)
self.bookdir.delete(0, Tkconstants.END)
@@ -127,6 +133,7 @@ class MainDialog(Tkinter.Frame):
log += 'Book Directory = "' + bookdir + '"\n'
log += '\n\n'
log += 'Please Wait ...\n'
log = log.encode('utf-8')
self.stext.insert(Tkconstants.END,log)
self.p2 = self.topazrdr(bookdir)

View File

@@ -31,7 +31,9 @@ class MainDialog(Tkinter.Frame):
Tkinter.Label(body, text='Directory you Extracted Topaz Files into').grid(row=0, sticky=Tkconstants.E)
self.bookdir = Tkinter.Entry(body, width=50)
self.bookdir.grid(row=0, column=1, sticky=sticky)
self.bookdir.insert(0, os.getcwd())
cwd = os.getcwdu()
cwd = cwd.encode('utf-8')
self.bookdir.insert(0, cwd)
button = Tkinter.Button(body, text="...", command=self.get_bookdir)
button.grid(row=0, column=2)
@@ -76,6 +78,7 @@ class MainDialog(Tkinter.Frame):
# post output from subprocess in scrolled text widget
def showCmdOutput(self, msg):
if msg and msg !='':
msg = msg.encode('utf-8')
self.stext.insert(Tkconstants.END,msg)
self.stext.yview_pickplace(Tkconstants.END)
return
@@ -92,14 +95,17 @@ class MainDialog(Tkinter.Frame):
else :
cmdline = 'lib\genxml.py "' + bookdir + '"'
cmdline = cmdline.encode(sys.getfilesystemencoding())
p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
return p2
def get_bookdir(self):
cwd = os.getcwdu()
cwd = cwd.encode('utf-8')
bookdir = tkFileDialog.askdirectory(
parent=None, title='Select the Directory you Extracted Topaz Files into',
initialdir=os.getcwd(), initialfile=None)
initialdir=cwd, initialfile=None)
if bookdir:
bookdir = os.path.normpath(bookdir)
self.bookdir.delete(0, Tkconstants.END)
@@ -127,6 +133,7 @@ class MainDialog(Tkinter.Frame):
log += 'Book Directory = "' + bookdir + '"\n'
log += '\n\n'
log += 'Please Wait ...\n'
log = log.encode('utf-8')
self.stext.insert(Tkconstants.END,log)
self.p2 = self.topazrdr(bookdir)

View File

@@ -1,5 +1,5 @@
#! /usr/bin/python
# For use in Topaz Scripts version 2.2
# For use in Topaz Scripts version 2.6
"""

View File

@@ -1,5 +1,5 @@
#!/usr/bin/python
# For use with Topaz Scripts Version 2.2
# For use with Topaz Scripts Version 2.6
class Unbuffered:
def __init__(self, stream):

View File

@@ -1,6 +1,6 @@
#! /usr/bin/python
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
# For use with Topaz Scripts Version 2.2
# For use with Topaz Scripts Version 2.6
class Unbuffered:
def __init__(self, stream):
@@ -243,12 +243,15 @@ class PageParser(object):
'region.h' : (1, 'scalar_number', 0, 0),
'region.w' : (1, 'scalar_number', 0, 0),
'empty_text_region' : (1, 'snippets', 1, 0),
'img' : (1, 'snippets', 1, 0),
'img.x' : (1, 'scalar_number', 0, 0),
'img.y' : (1, 'scalar_number', 0, 0),
'img.h' : (1, 'scalar_number', 0, 0),
'img.w' : (1, 'scalar_number', 0, 0),
'img.src' : (1, 'scalar_number', 0, 0),
'img.color_src' : (1, 'scalar_number', 0, 0),
'paragraph' : (1, 'snippets', 1, 0),
'paragraph.class' : (1, 'scalar_text', 0, 0),
@@ -313,6 +316,12 @@ class PageParser(object):
'version.findlists' : (1, 'scalar_text', 0, 0),
'version.page_num' : (1, 'scalar_text', 0, 0),
'version.page_type' : (1, 'scalar_text', 0, 0),
'version.bad_text' : (1, 'scalar_text', 0, 0),
'version.glyph_mismatch' : (1, 'scalar_text', 0, 0),
'version.margins' : (1, 'scalar_text', 0, 0),
'version.staggered_lines' : (1, 'scalar_text', 0, 0),
'version.paragraph_continuation' : (1, 'scalar_text', 0, 0),
'version.toc' : (1, 'scalar_text', 0, 0),
'stylesheet' : (1, 'snippets', 1, 0),
'style' : (1, 'snippets', 1, 0),
@@ -660,16 +669,21 @@ class PageParser(object):
def process(self):
# peek at the first bytes to see what type of file it is
magic = self.fo.read(11)
if (magic[0:1] == 'p') and (magic[2:10] == '__PAGE__'):
magic = self.fo.read(9)
if (magic[0:1] == 'p') and (magic[2:9] == 'marker_'):
first_token = 'info'
elif (magic[0:1] == 'g') and (magic[2:11] == '__GLYPH__'):
skip = self.fo.read(1)
elif (magic[0:1] == 'p') and (magic[2:9] == '__PAGE_'):
skip = self.fo.read(2)
first_token = 'info'
elif (magic[0:1] == 'p') and (magic[2:8] == '_PAGE_'):
first_token = 'info'
elif (magic[0:1] == 'g') and (magic[2:9] == '__GLYPH'):
skip = self.fo.read(3)
first_token = 'info'
else :
# other0.dat file
first_token = None
self.fo.seek(-11,1)
self.fo.seek(-9,1)
# main loop to read and build the document tree
@@ -695,7 +709,10 @@ class PageParser(object):
else:
if self.debug:
print "Main Loop: Unknown value: %x" % v
if (v == 0):
if (self.peek(1) == 0x5f):
skip = self.fo.read(1)
first_token = 'info'
# now do snippet injection
if len(self.snippetList) > 0 :

View File

@@ -1,6 +1,6 @@
#! /usr/bin/python
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
# For use with Topaz Scripts Version 2.2
# For use with Topaz Scripts Version 2.6
import csv
import sys

View File

@@ -1,6 +1,6 @@
#! /usr/bin/python
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
# For use with Topaz Scripts Version 2.2
# For use with Topaz Scripts Version 2.6
import sys
import csv
@@ -32,6 +32,8 @@ class DocParser(object):
self.link_id = []
self.link_title = []
self.link_page = []
self.link_href = []
self.link_type = []
self.dehyphen_rootid = []
self.paracont_stemid = []
self.parastems_stemid = []
@@ -197,6 +199,7 @@ class DocParser(object):
# get the class
def getClass(self, pclass):
nclass = pclass
# class names are an issue given topaz may start them with numerals (not allowed),
# use a mix of cases (which cause some browsers problems), and actually
# attach numbers after "_reclustered*" to the end to deal classeses that inherit
@@ -206,7 +209,10 @@ class DocParser(object):
# so we clean this up by lowercasing, prepend 'cl-', and getting any baseclass
# that exists in the stylesheet first, and then adding this specific class
# after
# also some class names have spaces in them so need to convert to dashes
if nclass != None :
nclass = nclass.replace(' ','-')
classres = ''
nclass = nclass.lower()
nclass = 'cl-' + nclass
@@ -334,7 +340,7 @@ class DocParser(object):
result.append(('svg', num))
return pclass, result
# this type of paragrph may be made up of multiple spans, inline
# this type of paragraph may be made up of multiple spans, inline
# word monograms (images), and words with semantic meaning,
# plus glyphs used to form starting letter of first word
@@ -391,6 +397,9 @@ class DocParser(object):
result.append(('img' + word_class, int(argres)))
word_class = ''
elif name.endswith('region.img.src'):
result.append(('img' + word_class, int(argres)))
if (sp_first != -1) and (sp_last != -1):
for wordnum in xrange(sp_first, sp_last):
result.append(('ocr', wordnum))
@@ -437,6 +446,8 @@ class DocParser(object):
if (type == 'end'):
parares += ' '
lstart = len(parares)
cnt = len(pdesc)
for j in xrange( 0, cnt) :
@@ -450,17 +461,27 @@ class DocParser(object):
if handle_links:
link = self.link_id[num]
if (link > 0):
linktype = self.link_type[link-1]
title = self.link_title[link-1]
if (title == "") or (parares.rfind(title) < 0):
title='_link_'
title=parares[lstart:]
if linktype == 'external' :
linkhref = self.link_href[link-1]
linkhtml = '<a href="%s">' % linkhref
else :
if len(self.link_page) >= link :
ptarget = self.link_page[link-1] - 1
linkhtml = '<a href="#page%04d">' % ptarget
else :
# just link to the current page
linkhtml = '<a href="#' + self.id + '">'
linkhtml += title + '</a>'
pos = parares.rfind(title)
if pos >= 0:
parares = parares[0:pos] + linkhtml + parares[pos+len(title):]
else :
parares += linkhtml
lstart = len(parares)
if word == '_link_' : word = ''
elif (link < 0) :
if word == '_link_' : word = ''
@@ -532,6 +553,14 @@ class DocParser(object):
# collect link destination page numbers
self.link_page = self.getData('info.links.page',0,-1)
# collect link types (container versus external)
(pos, argres) = self.findinDoc('info.links.type',0,-1)
if argres : self.link_type = argres.split('|')
# collect link destinations
(pos, argres) = self.findinDoc('info.links.href',0,-1)
if argres : self.link_href = argres.split('|')
# collect link titles
(pos, argres) = self.findinDoc('info.links.title',0,-1)
if argres :
@@ -641,16 +670,18 @@ class DocParser(object):
htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
elif (regtype == 'synth_fcvr.center') or (regtype == 'synth_text.center'):
elif (regtype == 'synth_fcvr.center'):
(pos, simgsrc) = self.findinDoc('img.src',start,end)
if simgsrc:
htmlpage += '<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc)
else :
print 'Warning: region type', regtype
print ' Making region type', regtype,
(pos, temp) = self.findinDoc('paragraph',start,end)
if pos != -1:
print ' is a "text" region'
(pos2, temp) = self.findinDoc('span',start,end)
if pos != -1 or pos2 != -1:
print ' a "text" region'
orig_regtype = regtype
regtype = 'fixed'
ptype = 'full'
# check to see if this is a continution from the previous page
@@ -658,6 +689,11 @@ class DocParser(object):
ptype = 'end'
first_para_continued = False
(pclass, pdesc) = self.getParaDescription(start,end, regtype)
if not pclass:
if orig_regtype.endswith('.right') : pclass = 'cl-right'
elif orig_regtype.endswith('.center') : pclass = 'cl-center'
elif orig_regtype.endswith('.left') : pclass = 'cl-left'
elif orig_regtype.endswith('.justify') : pclass = 'cl-justify'
if pclass and (ptype == 'full') and (len(pclass) >= 6):
tag = 'p'
if pclass[3:6] == 'h1-' : tag = 'h4'
@@ -669,7 +705,7 @@ class DocParser(object):
else :
htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
else :
print ' is a "graphic" region'
print ' a "graphic" region'
(pos, simgsrc) = self.findinDoc('img.src',start,end)
if simgsrc:
htmlpage += '<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc)

View File

@@ -1,6 +1,6 @@
#! /usr/bin/python
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
# For use with Topaz Scripts Version 2.2
# For use with Topaz Scripts Version 2.6
class Unbuffered:
def __init__(self, stream):

View File

@@ -1,6 +1,6 @@
#! /usr/bin/python
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
# For use with Topaz Scripts Version 2.2
# For use with Topaz Scripts Version 2.6
class Unbuffered:
def __init__(self, stream):
@@ -28,15 +28,24 @@ class GParser(object):
self.gh = self.getData('info.glyph.h')
self.gw = self.getData('info.glyph.w')
self.guse = self.getData('info.glyph.use')
if self.guse :
self.count = len(self.guse)
else :
self.count = 0
self.gvtx = self.getData('info.glyph.vtx')
self.glen = self.getData('info.glyph.len')
self.gdpi = self.getData('info.glyph.dpi')
self.vx = self.getData('info.vtx.x')
self.vy = self.getData('info.vtx.y')
self.vlen = self.getData('info.len.n')
if self.vlen :
self.glen.append(len(self.vlen))
elif self.glen:
self.glen.append(0)
if self.vx :
self.gvtx.append(len(self.vx))
elif self.gvtx :
self.gvtx.append(0)
def getData(self, path):
result = None

View File

@@ -1,6 +1,6 @@
#! /usr/bin/python
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
# For use with Topaz Scripts Version 2.2
# For use with Topaz Scripts Version 2.6
class Unbuffered:
def __init__(self, stream):

View File

@@ -1,6 +1,6 @@
#! /usr/bin/python
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
# For use with Topaz Scripts Version 2.2
# For use with Topaz Scripts Version 2.6
import csv
import sys

View File

@@ -1,6 +1,6 @@
#! /usr/bin/python
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
# For use with Topaz Scripts Version 2.2
# For use with Topaz Scripts Version 2.6
import csv
import sys
@@ -85,7 +85,10 @@ class DocParser(object):
def process(self):
classlst = ''
csspage = ''
csspage = '.cl-center { text-align: center; margin-left: auto; margin-right: auto; }\n'
csspage += '.cl-right { text-align: right; }\n'
csspage += '.cl-left { text-align: left; }\n'
csspage += '.cl-justify { text-align: justify; }\n'
# generate a list of each <style> starting point in the stylesheet
styleList= self.posinDoc('book.stylesheet.style')
@@ -108,6 +111,7 @@ class DocParser(object):
# get the style class
(pos, sclass) = self.findinDoc('style.class',start,end)
if sclass != None:
sclass = sclass.replace(' ','-')
sclass = '.cl-' + sclass.lower()
else :
sclass = ''
@@ -115,6 +119,7 @@ class DocParser(object):
# check for any "after class" specifiers
(pos, aftclass) = self.findinDoc('style._after_class',start,end)
if aftclass != None:
aftclass = aftclass.replace(' ','-')
aftclass = '.cl-' + aftclass.lower()
else :
aftclass = ''
@@ -216,6 +221,7 @@ class DocParser(object):
if ctype == 'h3_' :
csspage += 'h6' + cssline + '\n'
if cssline != ' { }':
csspage += self.stags[tag] + cssline + '\n'

View File

@@ -1,3 +1,21 @@
Changes in this Version
- bug fix to prevent problems with sample books
modified version of patch submitted by that-guy
Changes in 2.6
- fix for many additional version tags
- fixes to generate better links
- fixes to handle external links
- now handles new "marker" page .dat files
- improved special region handling
- properly handle class names with spaces
- handle default alignment for synthetic regions
Changes in 2.3
- fix for use with non-latin1 based systems (thank you Tedd)
- fixes for out of order tokens in xml
Changes in 2.2
- fix for minor bug in encode_Number from clark nova
- more fixes to handle paths with spaces in them

View File

@@ -30,7 +30,9 @@ class MainDialog(Tkinter.Frame):
Tkinter.Label(body, text='eBook Pml input file').grid(row=0, sticky=Tkconstants.E)
self.pmlpath = Tkinter.Entry(body, width=50)
self.pmlpath.grid(row=0, column=1, sticky=sticky)
self.pmlpath.insert(0, os.getcwd())
cwd = os.getcwdu()
cwd = cwd.encode('utf-8')
self.pmlpath.insert(0, cwd)
button = Tkinter.Button(body, text="...", command=self.get_pmlpath)
button.grid(row=0, column=2)
@@ -82,6 +84,7 @@ class MainDialog(Tkinter.Frame):
# post output from subprocess in scrolled text widget
def showCmdOutput(self, msg):
if msg and msg !='':
msg = msg.encode('utf-8')
self.stext.insert(Tkconstants.END,msg)
self.stext.yview_pickplace(Tkconstants.END)
return
@@ -98,6 +101,7 @@ class MainDialog(Tkinter.Frame):
else :
cmdline = 'lib\\xpml2xhtml.py "' + infile + '" "' + outfile + '"'
cmdline = cmdline.encode(sys.getfilesystemencoding())
p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
return p2
@@ -156,6 +160,7 @@ class MainDialog(Tkinter.Frame):
log += 'HTML Output File = "' + outpath + '"\n'
log += '\n\n'
log += 'Please Wait ...\n\n'
log = log.encode('utf-8')
self.stext.insert(Tkconstants.END,log)
self.p2 = self.pmlhtml(pmlpath, outpath)

View File

@@ -31,14 +31,18 @@ class MainDialog(Tkinter.Frame):
Tkinter.Label(body, text='eBook PDB input file').grid(row=0, sticky=Tkconstants.E)
self.pdbpath = Tkinter.Entry(body, width=50)
self.pdbpath.grid(row=0, column=1, sticky=sticky)
self.pdbpath.insert(0, os.getcwd())
cwd = os.getcwdu()
cwd = cwd.encode('utf-8')
self.pdbpath.insert(0, cwd)
button = Tkinter.Button(body, text="...", command=self.get_pdbpath)
button.grid(row=0, column=2)
Tkinter.Label(body, text='Output Directory').grid(row=1, sticky=Tkconstants.E)
self.outpath = Tkinter.Entry(body, width=50)
self.outpath.grid(row=1, column=1, sticky=sticky)
self.outpath.insert(0, os.getcwd())
cwd = os.getcwdu()
cwd = cwd.encode('utf-8')
self.outpath.insert(0, cwd)
button = Tkinter.Button(body, text="...", command=self.get_outpath)
button.grid(row=1, column=2)
@@ -93,6 +97,7 @@ class MainDialog(Tkinter.Frame):
# post output from subprocess in scrolled text widget
def showCmdOutput(self, msg):
if msg and msg !='':
msg = msg.encode('utf-8')
self.stext.insert(Tkconstants.END,msg)
self.stext.yview_pickplace(Tkconstants.END)
return
@@ -109,6 +114,7 @@ class MainDialog(Tkinter.Frame):
else :
cmdline = 'lib\erdr2pml.py "' + infile + '" "' + outdir + '" "' + name + '" ' + ccnum
cmdline = cmdline.encode(sys.getfilesystemencoding())
p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
return p2
@@ -125,9 +131,11 @@ class MainDialog(Tkinter.Frame):
return
def get_outpath(self):
cwd = os.getcwdu()
cwd = cwd.encode('utf-8')
outpath = tkFileDialog.askdirectory(
parent=None, title='Directory to Store Output into',
initialdir=os.getcwd(), initialfile=None)
initialdir=cwd, initialfile=None)
if outpath:
outpath = os.path.normpath(outpath)
self.outpath.delete(0, Tkconstants.END)
@@ -175,6 +183,7 @@ class MainDialog(Tkinter.Frame):
log += 'Last 8 of CC = "' + ccnum + '"\n'
log += '\n\n'
log += 'Please Wait ...\n'
log = log.encode('utf-8')
self.stext.insert(Tkconstants.END,log)
self.p2 = self.erdr(pdbpath, outpath, name, ccnum)

View File

@@ -0,0 +1,180 @@
#!/usr/bin/env python
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
import sys
sys.path.append('lib')
import os, os.path, urllib
import subprocess
from subprocess import Popen, PIPE, STDOUT
import Tkinter
import Tkconstants
import tkFileDialog
import tkMessageBox
import subasyncio
from subasyncio import Process
from scrolltextwidget import ScrolledText
class MainDialog(Tkinter.Frame):
def __init__(self, root):
Tkinter.Frame.__init__(self, root, border=5)
self.root = root
self.interval = 2000
self.p2 = None
self.status = Tkinter.Label(self, text='eReader eBook Conversion to PMLZ')
self.status.pack(fill=Tkconstants.X, expand=1)
body = Tkinter.Frame(self)
body.pack(fill=Tkconstants.X, expand=1)
sticky = Tkconstants.E + Tkconstants.W
body.grid_columnconfigure(1, weight=2)
Tkinter.Label(body, text='eBook PDB input file').grid(row=0, sticky=Tkconstants.E)
self.pdbpath = Tkinter.Entry(body, width=50)
self.pdbpath.grid(row=0, column=1, sticky=sticky)
cwd = os.getcwdu()
cwd = cwd.encode('utf-8')
self.pdbpath.insert(0, cwd)
button = Tkinter.Button(body, text="...", command=self.get_pdbpath)
button.grid(row=0, column=2)
Tkinter.Label(body, text='Name on CC').grid(row=1, sticky=Tkconstants.E)
self.name = Tkinter.StringVar()
self.nameinfo = Tkinter.Entry(body, width=40, textvariable=self.name)
self.nameinfo.grid(row=1, column=1, sticky=sticky)
Tkinter.Label(body, text='Last 8 digits of CC Number').grid(row=2, sticky=Tkconstants.E)
self.ccnum = Tkinter.StringVar()
self.ccinfo = Tkinter.Entry(body, width=10, textvariable=self.ccnum)
self.ccinfo.grid(row=2, column=1, sticky=sticky)
msg1 = 'Conversion Log \n\n'
self.stext = ScrolledText(body, bd=5, relief=Tkconstants.RIDGE, height=15, width=60, wrap=Tkconstants.WORD)
self.stext.grid(row=3, column=0, columnspan=2,sticky=sticky)
self.stext.insert(Tkconstants.END,msg1)
buttons = Tkinter.Frame(self)
buttons.pack()
self.sbotton = Tkinter.Button(
buttons, text="Start", width=10, command=self.convertit)
self.sbotton.pack(side=Tkconstants.LEFT)
Tkinter.Frame(buttons, width=10).pack(side=Tkconstants.LEFT)
self.qbutton = Tkinter.Button(
buttons, text="Quit", width=10, command=self.quitting)
self.qbutton.pack(side=Tkconstants.RIGHT)
# read from subprocess pipe without blocking
# invoked every interval via the widget "after"
# option being used, so need to reset it for the next time
def processPipe(self):
poll = self.p2.wait('nowait')
if poll != None:
text = self.p2.readerr()
text += self.p2.read()
msg = text + '\n\n' + 'File successfully converted\n'
if poll != 0:
msg = text + '\n\n' + 'Error: Conversion Failed\n'
self.showCmdOutput(msg)
self.p2 = None
self.sbotton.configure(state='normal')
return
text = self.p2.readerr()
text += self.p2.read()
self.showCmdOutput(text)
# make sure we get invoked again by event loop after interval
self.stext.after(self.interval,self.processPipe)
return
# post output from subprocess in scrolled text widget
def showCmdOutput(self, msg):
if msg and msg !='':
msg = msg.encode('utf-8')
self.stext.insert(Tkconstants.END,msg)
self.stext.yview_pickplace(Tkconstants.END)
return
# run erdr2pml.py as a subprocess via pipes and collect stdout
def erdr(self, infile, name, ccnum):
# os.putenv('PYTHONUNBUFFERED', '1')
cmdline = 'python ./lib/erdr2pml.py --make-pmlz "' + infile + '" "' + name + '" ' + ccnum
if sys.platform[0:3] == 'win':
search_path = os.environ['PATH']
search_path = search_path.lower()
if search_path.find('python') >= 0:
cmdline = 'python lib\erdr2pml.py --make-pmlz "' + infile + '" "' + name + '" ' + ccnum
else :
cmdline = 'lib\erdr2pml.py --make-pmlz "' + infile + '" "' + name + '" ' + ccnum
cmdline = cmdline.encode(sys.getfilesystemencoding())
p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
return p2
def get_pdbpath(self):
pdbpath = tkFileDialog.askopenfilename(
parent=None, title='Select eReader PDB File',
defaultextension='.pdb', filetypes=[('eReader eBooks', '.pdb'),
('All Files', '.*')])
if pdbpath:
pdbpath = os.path.normpath(pdbpath)
self.pdbpath.delete(0, Tkconstants.END)
self.pdbpath.insert(0, pdbpath)
return
def quitting(self):
# kill any still running subprocess
if self.p2 != None:
if (self.p2.wait('nowait') == None):
self.p2.terminate()
self.root.destroy()
# actually ready to run the subprocess and get its output
def convertit(self):
# now disable the button to prevent multiple launches
self.sbotton.configure(state='disabled')
pdbpath = self.pdbpath.get()
if not pdbpath or not os.path.exists(pdbpath):
self.status['text'] = 'Specified eBook file does not exist'
self.sbotton.configure(state='normal')
return
name = self.name.get()
if not name or name == '':
self.status['text'] = 'Your forgot to enter the Name on the CC'
self.sbotton.configure(state='normal')
return
ccnum = self.ccnum.get()
if not ccnum or ccnum == '':
self.status['text'] = 'Your forgot to enter the last 8 digits on the CC'
self.sbotton.configure(state='normal')
return
log = 'Command = "python erdr2pml.py --make-pmlz "\n'
log += 'PDB Path = "'+ pdbpath + '"\n'
log += 'Name = "' + name + '"\n'
log += 'Last 8 of CC = "' + ccnum + '"\n'
log += '\n\n'
log += 'Please Wait ...\n'
log = log.encode('utf-8')
self.stext.insert(Tkconstants.END,log)
self.p2 = self.erdr(pdbpath, name, ccnum)
# python does not seem to allow you to create
# your own eventloop which every other gui does - strange
# so need to use the widget "after" command to force
# event loop to run non-gui events every interval
self.stext.after(self.interval,self.processPipe)
return
def main(argv=None):
root = Tkinter.Tk()
root.title('eReader PDB to PMLZ Conversion')
root.resizable(True, False)
root.minsize(300, 0)
MainDialog(root).pack(fill=Tkconstants.X, expand=1)
root.mainloop()
return 0
if __name__ == "__main__":
sys.exit(main())

View File

@@ -52,8 +52,10 @@
# 0.11 - fixups for using correct xml for footnotes and sidebars for use with Dropbook
# 0.12 - Fix added to prevent lowercasing of image names when the pml code itself uses a different case in the link name.
# 0.13 - change to unbuffered stdout for use with gui front ends
# 0.14 - contributed enhancement to support --make-pmlz switch
# 0.15 - enabled high-ascii to pml character encoding. DropBook now works on Mac.
__version__='0.13'
__version__='0.15'
# Import Psyco if available
try:
@@ -85,7 +87,7 @@ class Unbuffered:
import sys
sys.stdout=Unbuffered(sys.stdout)
import struct, binascii, zlib, os, os.path, urllib
import struct, binascii, getopt, zlib, os, os.path, urllib, tempfile
try:
from hashlib import sha1
@@ -464,17 +466,6 @@ class EreaderProcessor(object):
data = sect[62:]
return sanitizeFileName(name), data
def cleanPML(self,pml):
# Update old \b font tag with correct \B bold font tag
pml2 = pml.replace('\\b', '\\B')
# Convert special characters to proper PML code. High ASCII start at (\x82, \a130) and go up to (\xff, \a255)
for k in xrange(130,256):
# a2b_hex takes in a hexidecimal as a string and converts it
# to a binary ascii code that we search and replace for
badChar=binascii.a2b_hex('%02x' % k)
pml2 = pml2.replace(badChar, '\\a%03d' % k)
#end for k
return pml2
# def getChapterNamePMLOffsetData(self):
# cv = ''
@@ -563,6 +554,14 @@ class EreaderProcessor(object):
return r
def cleanPML(pml):
# Convert special characters to proper PML code. High ASCII start at (\x80, \a128) and go up to (\xff, \a255)
pml2 = pml
for k in xrange(128,256):
badChar = chr(k)
pml2 = pml2.replace(badChar, '\\a%03d' % k)
return pml2
def convertEreaderToPml(infile, name, cc, outdir):
if not os.path.exists(outdir):
os.makedirs(outdir)
@@ -584,7 +583,7 @@ def convertEreaderToPml(infile, name, cc, outdir):
print " Extracting pml"
pml_string = er.getText()
pmlfilename = bookname + ".pml"
file(os.path.join(outdir, pmlfilename),'wb').write(pml_string)
file(os.path.join(outdir, pmlfilename),'wb').write(cleanPML(pml_string))
# bkinfo = er.getBookInfo()
# if bkinfo != '':
@@ -592,27 +591,54 @@ def convertEreaderToPml(infile, name, cc, outdir):
# file(os.path.join(outdir, 'bookinfo.txt'),'wb').write(bkinfo)
def main(argv=None):
global bookname
if argv is None:
argv = sys.argv
print "eRdr2Pml v%s. Copyright (c) 2009 The Dark Reverser" % __version__
if len(argv)!=4 and len(argv)!=5:
def usage():
print "Converts DRMed eReader books to PML Source"
print "Usage:"
print " erdr2pml infile.pdb [outdir] \"your name\" credit_card_number "
print " erdr2pml [options] infile.pdb [outdir] \"your name\" credit_card_number "
print " "
print "Options: "
print " -h prints this message"
print " --make-pmlz create PMLZ instead of using output directory"
print " "
print "Note:"
print " if ommitted, outdir defaults based on 'infile.pdb'"
print " It's enough to enter the last 8 digits of the credit card number"
return
def main(argv=None):
global bookname
try:
opts, args = getopt.getopt(sys.argv[1:], "h", ["make-pmlz"])
except getopt.GetoptError, err:
print str(err)
usage()
return 1
make_pmlz = False
zipname = None
for o, a in opts:
if o == "-h":
usage()
return 0
elif o == "--make-pmlz":
make_pmlz = True
zipname = ''
print "eRdr2Pml v%s. Copyright (c) 2009 The Dark Reverser" % __version__
if len(args)!=3 and len(args)!=4:
usage()
return 1
else:
if len(argv)==4:
infile, name, cc = argv[1], argv[2], argv[3]
if len(args)==3:
infile, name, cc = args[0], args[1], args[2]
outdir = infile[:-4] + '_Source'
elif len(argv)==5:
infile, outdir, name, cc = argv[1], argv[2], argv[3], argv[4]
elif len(args)==4:
infile, outdir, name, cc = args[0], args[1], args[2], args[3]
if make_pmlz :
# ignore specified outdir, use tempdir instead
outdir = tempfile.mkdtemp()
bookname = os.path.splitext(os.path.basename(infile))[0]
try:
@@ -620,9 +646,37 @@ def main(argv=None):
import time
start_time = time.time()
convertEreaderToPml(infile, name, cc, outdir)
if make_pmlz :
import zipfile
import shutil
print " Creating PMLZ file"
zipname = infile[:-4] + '.pmlz'
myZipFile = zipfile.ZipFile(zipname,'w',zipfile.ZIP_STORED, False)
list = os.listdir(outdir)
for file in list:
localname = file
filePath = os.path.join(outdir,file)
if os.path.isfile(filePath):
myZipFile.write(filePath, localname)
elif os.path.isdir(filePath):
imageList = os.listdir(filePath)
localimgdir = os.path.basename(filePath)
for image in imageList:
localname = os.path.join(localimgdir,image)
imagePath = os.path.join(filePath,image)
if os.path.isfile(imagePath):
myZipFile.write(imagePath, localname)
myZipFile.close()
# remove temporary directory
shutil.rmtree(outdir)
end_time = time.time()
search_time = end_time - start_time
print 'elapsed time: %.2f seconds' % (search_time, )
if make_pmlz :
print 'output is %s' % zipname
else :
print 'output in %s' % outdir
print "done"
except ValueError, e:

View File

@@ -30,8 +30,10 @@
# 0.17 - add support for tidy.exe under windows
# 0.18 - fix corner case of lines that start with \axxx or \Uxxxx tags
# 0.19 - change to use auto flushed stdout, and use proper return values
# 0.20 - properly handle T markup inside links
# 0.21 - properly handle new sigil Chapter Breaks for 0.2X series and up
__version__='0.19'
__version__='0.21'
class Unbuffered:
def __init__(self, stream):
@@ -624,7 +626,7 @@ class PmlConverter(object):
if sigil_breaks:
if (len(final) - lastbreaksize) > 3000:
final += '<div>\n <hr class="sigilChapterBreak" />\n</div>\n'
final += '<hr class="sigilChapterBreak" />\n'
lastbreaksize = len(final)
# now create new start tags for all tags that
@@ -699,7 +701,7 @@ class PmlConverter(object):
self.skipNewLine()
elif cmd == 'T':
if inBlock():
if inBlock() or inLink() or inComment():
final += '<span style="margin-left: %s;">&nbsp;</span>' % attr
else:
final += '<p style="text-indent: %s;">' % attr

View File

@@ -1,6 +1,7 @@
#! /usr/bin/python
# ineptpdf5.pyw, version 5
# ineptpdf74.pyw
# ineptpdf, version 7.4
# To run this program install Python 2.6 from http://www.python.org/download/
# and PyCrypto from http://www.voidspace.org.uk/python/modules.shtml#pycrypto
@@ -13,7 +14,19 @@
# 3 - Correctly handle PDF >=1.5 cross-reference streams
# 4 - Removal of ciando's personal ID (anon)
# 5 - removing small bug with V3 ebooks (anon)
# 6 - changed to adeptkey4.der format for 1.7.2 support (anon)
# 6.1 - backward compatibility for 1.7.1 and old adeptkey.der
# 7 - Get cross reference streams and object streams working for input.
# Not yet supported on output but this only effects file size,
# not functionality. (by anon2)
# 7.1 - Correct a problem when an old trailer is not followed by startxref
# 7.2 - Correct malformed Mac OS resource forks for Stanza
# - Support for cross ref streams on output (decreases file size)
# 7.3 - Correct bug in trailer with cross ref stream that caused the error
# "The root object is missing or invalid" in Adobe Reader.
# 7.4 - Force all generation numbers in output file to be 0, like in v6.
# Fallback code for wrong xref improved (search till last trailer
# instead of first)
"""
Decrypt Adobe ADEPT-encrypted PDF files.
"""
@@ -41,11 +54,24 @@ try:
except ImportError:
ARC4 = None
RSA = None
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
class ADEPTError(Exception):
pass
# Do we generate cross reference streams on output?
# 0 = never
# 1 = only if present in input
# 2 = always
GEN_XREF_STM = 1
# This is the value for the current document
gen_xref_stm = False # will be set in PDFSerializer
###
### ASN.1 parsing code from tlslite
@@ -288,6 +314,7 @@ END_KEYWORD = re.compile(r'[#/%\[\]()<>{}\s]')
END_STRING = re.compile(r'[()\134]')
OCT_STRING = re.compile(r'[0-7]')
ESC_STRING = { 'b':8, 't':9, 'n':10, 'f':12, 'r':13, '(':40, ')':41, '\\':92 }
class PSBaseParser(object):
'''
@@ -568,16 +595,17 @@ class PSBaseParser(object):
pos = self.fp.tell()
buf = ''
while 0 < pos:
prevpos = pos
pos = max(0, pos-self.BUFSIZ)
self.fp.seek(pos)
s = self.fp.read(self.BUFSIZ)
s = self.fp.read(prevpos-pos)
if not s: break
while 1:
n = max(s.rfind('\r'), s.rfind('\n'))
if n == -1:
buf = s + buf
break
yield buf+s[n:]
yield s[n:]+buf
s = s[:n]
buf = ''
return
@@ -633,7 +661,7 @@ class PSStackParser(PSBaseParser):
def do_keyword(self, pos, token):
return
def nextobject(self):
def nextobject(self, direct=False):
'''
Yields a list of objects: keywords, literals, strings,
numbers, arrays and dictionaries. Arrays and dictionaries
@@ -678,6 +706,8 @@ class PSStackParser(PSBaseParser):
if self.context:
continue
else:
if direct:
return self.pop(1)[0]
self.flush()
obj = self.results.pop(0)
return obj
@@ -703,17 +733,17 @@ class PDFNotImplementedError(PSException): pass
##
class PDFObjRef(PDFObject):
def __init__(self, doc, objid, _):
def __init__(self, doc, objid, genno):
if objid == 0:
if STRICT:
raise PDFValueError('PDF object id cannot be 0.')
self.doc = doc
self.objid = objid
#self.genno = genno # Never used.
self.genno = genno
return
def __repr__(self):
return '<PDFObjRef:%d>' % (self.objid)
return '<PDFObjRef:%d %d>' % (self.objid, self.genno)
def resolve(self):
return self.doc.getobj(self.objid)
@@ -852,6 +882,7 @@ class PDFStream(PDFObject):
self.rawdata = rawdata
self.decipher = decipher
self.data = None
self.decdata = None
self.objid = None
self.genno = None
return
@@ -862,15 +893,21 @@ class PDFStream(PDFObject):
return
def __repr__(self):
if self.rawdata:
return '<PDFStream(%r): raw=%d, %r>' % \
(self.objid, len(self.rawdata), self.dic)
else:
return '<PDFStream(%r): data=%d, %r>' % \
(self.objid, len(self.data), self.dic)
def decode(self):
assert self.data == None and self.rawdata != None
assert self.data is None and self.rawdata is not None
data = self.rawdata
if self.decipher:
# Handle encryption
data = self.decipher(self.objid, self.genno, data)
if gen_xref_stm:
self.decdata = data # keep decrypted data
if 'Filter' not in self.dic:
self.data = data
self.rawdata = None
@@ -883,10 +920,6 @@ class PDFStream(PDFObject):
# will get errors if the document is encrypted.
data = zlib.decompress(data)
elif f in LITERALS_LZW_DECODE:
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
data = ''.join(LZWDecoder(StringIO(data)).run())
elif f in LITERALS_ASCII85_DECODE:
data = ascii85decode(data)
@@ -925,7 +958,7 @@ class PDFStream(PDFObject):
return
def get_data(self):
if self.data == None:
if self.data is None:
self.decode()
return self.data
@@ -933,6 +966,8 @@ class PDFStream(PDFObject):
return self.rawdata
def get_decdata(self):
if self.decdata is not None:
return self.decdata
data = self.rawdata
if self.decipher and data:
# Handle encryption
@@ -988,7 +1023,7 @@ class PDFXRef(object):
if len(f) != 2:
raise PDFNoValidXRef('Trailer not found: %r: line=%r' % (parser, line))
try:
(start, nobjs) = map(long, f)
(start, nobjs) = map(int, f)
except ValueError:
raise PDFNoValidXRef('Invalid line: %r: line=%r' % (parser, line))
for objid in xrange(start, start+nobjs):
@@ -1001,7 +1036,7 @@ class PDFXRef(object):
raise PDFNoValidXRef('Invalid XRef format: %r, line=%r' % (parser, line))
(pos, genno, use) = f
if use != 'n': continue
self.offsets[objid] = (int(genno), long(pos))
self.offsets[objid] = (int(genno), int(pos))
self.load_trailer(parser)
return
@@ -1010,7 +1045,7 @@ class PDFXRef(object):
try:
(_,kwd) = parser.nexttoken()
assert kwd is self.KEYWORD_TRAILER
(_,dic) = parser.nextobject()
(_,dic) = parser.nextobject(direct=True)
except PSEOF:
x = parser.pop(1)
if not x:
@@ -1039,7 +1074,7 @@ class PDFXRefStream(object):
return
def __repr__(self):
return '<PDFXRef: objid=%d-%d>' % (self.objid_first, self.objid_last)
return '<PDFXRef: objids=%s>' % self.index
def objids(self):
for first, size in self.index:
@@ -1124,6 +1159,7 @@ class PDFDocument(object):
for xref in self.xrefs:
trailer = xref.trailer
if not trailer: continue
# If there's an encryption info, remember it.
if 'Encrypt' in trailer:
#assert not self.encryption
@@ -1204,6 +1240,7 @@ class PDFDocument(object):
self.ready = True
return
PASSWORD_PADDING = '(\xbfN^Nu\x8aAd\x00NV\xff\xfa\x01\x08..' \
'\x00\xb6\xd0h>\x80/\x0c\xa9\xfedSiz'
def initialize_standard(self, password, docid, param):
@@ -1296,12 +1333,45 @@ class PDFDocument(object):
except KeyError:
pass
else:
return
#if STRICT:
# raise PDFSyntaxError('Cannot locate objid=%r' % objid)
return None
if stmid:
if gen_xref_stm:
return PDFObjStmRef(objid, stmid, index)
# Stuff from pdfminer: extract objects from object stream
stream = stream_value(self.getobj(stmid))
if stream.dic.get('Type') is not LITERAL_OBJSTM:
if STRICT:
raise PDFSyntaxError('Not a stream object: %r' % stream)
try:
n = stream.dic['N']
except KeyError:
if STRICT:
raise PDFSyntaxError('N is not defined: %r' % stream)
n = 0
if stmid in self.parsed_objs:
objs = self.parsed_objs[stmid]
else:
parser = PDFObjStrmParser(stream.get_data(), self)
objs = []
try:
while 1:
(_,obj) = parser.nextobject()
objs.append(obj)
except PSEOF:
pass
self.parsed_objs[stmid] = objs
genno = 0
i = n*2+index
try:
obj = objs[i]
except IndexError:
raise PDFSyntaxError('Invalid object number: objid=%r' % (objid))
if isinstance(obj, PDFStream):
obj.set_objid(objid, 0)
###
else:
self.parser.seek(index)
(_,objid1) = self.parser.nexttoken() # objid
@@ -1314,16 +1384,19 @@ class PDFDocument(object):
(_,obj) = self.parser.nextobject()
if isinstance(obj, PDFStream):
obj.set_objid(objid, genno)
self.objs[objid] = obj
if self.decipher:
obj = decipher_all(self.decipher, objid, genno, obj)
self.objs[objid] = obj
return obj
class PDFObjStmRef(object):
maxindex = 0
def __init__(self, objid, stmid, index):
self.objid = objid
self.stmid = stmid
self.index = index
if index > PDFObjStmRef.maxindex:
PDFObjStmRef.maxindex = index
## PDFParser
@@ -1417,7 +1490,7 @@ class PDFParser(PSStackParser):
prev = line
else:
raise PDFNoValidXRef('Unexpected EOF')
return long(prev)
return int(prev)
# read xref table
def read_xref_from(self, start, xrefs):
@@ -1429,6 +1502,9 @@ class PDFParser(PSStackParser):
raise PDFNoValidXRef('Unexpected EOF')
if isinstance(token, int):
# XRefStream: PDF-1.5
if GEN_XREF_STM == 1:
global gen_xref_stm
gen_xref_stm = True
self.seek(pos)
self.reset()
xref = PDFXRefStream()
@@ -1454,6 +1530,7 @@ class PDFParser(PSStackParser):
# read xref tables and trailers
def read_xref(self):
xrefs = []
trailerpos = None
try:
pos = self.find_xref()
self.read_xref_from(pos, xrefs)
@@ -1468,24 +1545,56 @@ class PDFParser(PSStackParser):
(pos, line) = self.nextline()
except PSEOF:
break
if line.startswith('trailer'): break
if line.startswith('trailer'):
trailerpos = pos # remember last trailer
m = pat.match(line)
if not m: continue
(objid, genno) = m.groups()
offsets[int(objid)] = (0, pos)
if not offsets: raise
xref.offsets = offsets
self.seek(pos)
if trailerpos:
self.seek(trailerpos)
xref.load_trailer(self)
xrefs.append(xref)
return xrefs
## PDFObjStrmParser
##
class PDFObjStrmParser(PDFParser):
def __init__(self, data, doc):
PSStackParser.__init__(self, StringIO(data))
self.doc = doc
return
def flush(self):
self.add_results(*self.popall())
return
KEYWORD_R = KWD('R')
def do_keyword(self, pos, token):
if token is self.KEYWORD_R:
# reference to indirect object
try:
((_,objid), (_,genno)) = self.pop(2)
(objid, genno) = (int(objid), int(genno))
obj = PDFObjRef(self.doc, objid, genno)
self.push((pos, obj))
except PSSyntaxError:
pass
return
# others
self.push((pos, token))
return
###
### My own code, for which there is none else to blame
class PDFSerializer(object):
def __init__(self, inf, keypath):
global GEN_XREF_STM, gen_xref_stm
gen_xref_stm = GEN_XREF_STM > 1
self.version = inf.read(8)
inf.seek(0)
self.doc = doc = PDFDocument()
@@ -1510,60 +1619,94 @@ class PDFSerializer(object):
doc = self.doc
objids = self.objids
xrefs = {}
xrefstm = {}
maxobj = max(objids)
trailer = dict(self.trailer)
trailer['Size'] = maxobj + 1
for objid in objids:
obj = doc.getobj(objid)
if isinstance(obj, PDFObjStmRef):
xrefstm[objid] = obj
xrefs[objid] = obj
continue
xrefs[objid] = self.tell()
if obj is not None:
try:
genno = obj.genno
except AttributeError:
genno = 0
xrefs[objid] = (self.tell(), genno)
self.serialize_indirect(objid, obj)
startxref = self.tell()
if not gen_xref_stm:
self.write('xref\n')
self.write('0 %d\n' % (maxobj + 1,))
for objid in xrange(0, maxobj + 1):
if objid in xrefs:
self.write("%010d %05d n \n" % (xrefs[objid], 0))
# force the genno to be 0
self.write("%010d 00000 n \n" % xrefs[objid][0])
else:
self.write("%010d %05d f \n" % (0, 65535))
self.write('trailer\n')
self.serialize_object(trailer)
self.write('\nstartxref\n%d\n%%%%EOF' % startxref)
if not xrefstm:
return
else: # Generate crossref stream.
# Calculate size of entries
maxoffset = max(startxref, maxobj)
maxindex = PDFObjStmRef.maxindex
fl2 = 2
power = 65536
while maxoffset >= power:
fl2 += 1
power *= 256
fl3 = 1
power = 256
while maxindex >= power:
fl3 += 1
power *= 256
index = []
first = None
prev = None
data = []
for objid in sorted(xrefstm):
# Put the xrefstream's reference in itself
startxref = self.tell()
maxobj += 1
xrefs[maxobj] = (startxref, 0)
for objid in sorted(xrefs):
if first is None:
first = objid
elif objid != prev + 1:
index.extend((first, prev - first + 1))
first = objid
prev = objid
stmid = xrefstm[objid].stmid
data.append(struct.pack('>BHB', 2, stmid, 0))
objref = xrefs[objid]
if isinstance(objref, PDFObjStmRef):
f1 = 2
f2 = objref.stmid
f3 = objref.index
else:
f1 = 1
f2 = objref[0]
# we force all generation numbers to be 0
# f3 = objref[1]
f3 = 0
data.append(struct.pack('>B', f1))
data.append(struct.pack('>L', f2)[-fl2:])
data.append(struct.pack('>L', f3)[-fl3:])
index.extend((first, prev - first + 1))
data = zlib.compress(''.join(data))
dic = {'Type': LITERAL_XREF, 'Size': prev + 1, 'Index': index,
'W': [1, 2, 1], 'Length': len(data), 'Prev': startxref,
'Filter': LITERALS_FLATE_DECODE[0],}
obj = PDFStream(dic, data)
self.write('\n')
trailer['XRefStm'] = startxrefstm = self.tell()
self.serialize_indirect(maxobj + 1, obj)
trailer['Prev'] = startxref
startxref = self.tell()
self.write('xref\n')
self.write('%d 1\n' % (maxobj + 1,))
self.write("%010d %05d n \n" % (startxrefstm, 0))
self.write('trailer\n')
self.serialize_object(trailer)
self.write('\nstartxref\n%d\n%%%%EOF' % startxref)
'W': [1, fl2, fl3], 'Length': len(data),
'Filter': LITERALS_FLATE_DECODE[0],
'Root': trailer['Root'],}
if 'Info' in trailer:
dic['Info'] = trailer['Info']
xrefstm = PDFStream(dic, data)
self.serialize_indirect(maxobj, xrefstm)
self.write('startxref\n%d\n%%%%EOF' % startxref)
def write(self, data):
self.outf.write(data)
@@ -1584,6 +1727,12 @@ class PDFSerializer(object):
def serialize_object(self, obj):
if isinstance(obj, dict):
# Correct malformed Mac OS resource forks for Stanza
if 'ResFork' in obj and 'Type' in obj and 'Subtype' not in obj \
and isinstance(obj['Type'], int):
obj['Subtype'] = obj['Type']
del obj['Type']
# end - hope this doesn't have bad effects
self.write('<<')
for key, val in obj.items():
self.write('/%s' % key)
@@ -1609,6 +1758,12 @@ class PDFSerializer(object):
self.write(' ')
self.write('%d %d R' % (obj.objid, 0))
elif isinstance(obj, PDFStream):
### If we don't generate cross ref streams the object streams
### are no longer useful, as we have extracted all objects from
### them. Therefore leave them out from the output.
if obj.dic.get('Type') == LITERAL_OBJSTM and not gen_xref_stm:
self.write('(deleted)')
else:
data = obj.get_decdata()
self.serialize_object(obj.dic)
self.write('stream\n')
@@ -1695,7 +1850,7 @@ class DecryptionDialog(Tkinter.Frame):
def get_inpath(self):
inpath = tkFileDialog.askopenfilename(
parent=None, title='Select ADEPT-encrypted PDF file to decrypt',
defaultextension='.epub', filetypes=[('PDF files', '.pdf'),
defaultextension='.pdf', filetypes=[('PDF files', '.pdf'),
('All files', '.*')])
if inpath:
inpath = os.path.normpath(inpath)
@@ -1706,7 +1861,7 @@ class DecryptionDialog(Tkinter.Frame):
def get_outpath(self):
outpath = tkFileDialog.asksaveasfilename(
parent=None, title='Select unencrypted PDF file to produce',
defaultextension='.epub', filetypes=[('PDF files', '.pdf'),
defaultextension='.pdf', filetypes=[('PDF files', '.pdf'),
('All files', '.*')])
if outpath:
outpath = os.path.normpath(outpath)

2206
ineptpdf8.pyw Normal file

File diff suppressed because it is too large Load Diff