tools v2.1

combined kindle/mobi plugin
This commit is contained in:
Apprentice Alf
2010-10-26 18:18:46 +01:00
parent bf03edd18c
commit 5f0671db7f
110 changed files with 5872 additions and 7089 deletions

View File

@@ -0,0 +1,172 @@
class Unbuffered:
def __init__(self, stream):
self.stream = stream
def write(self, data):
self.stream.write(data)
self.stream.flush()
def __getattr__(self, attr):
return getattr(self.stream, attr)
import sys
sys.stdout=Unbuffered(sys.stdout)
import prc, struct
from binascii import hexlify
def strByte(s,off=0):
return struct.unpack(">B",s[off])[0];
def strSWord(s,off=0):
return struct.unpack(">h",s[off:off+2])[0];
def strWord(s,off=0):
return struct.unpack(">H",s[off:off+2])[0];
def strDWord(s,off=0):
return struct.unpack(">L",s[off:off+4])[0];
def strPutDWord(s,off,i):
return s[:off]+struct.pack(">L",i)+s[off+4:];
keyvec1 = "\x72\x38\x33\xB0\xB4\xF2\xE3\xCA\xDF\x09\x01\xD6\xE2\xE0\x3F\x96"
#implementation of Pukall Cipher 1
def PC1(key, src, decryption=True):
sum1 = 0;
sum2 = 0;
keyXorVal = 0;
if len(key)!=16:
print "Bad key length!"
return None
wkey = []
for i in xrange(8):
wkey.append(ord(key[i*2])<<8 | ord(key[i*2+1]))
dst = ""
for i in xrange(len(src)):
temp1 = 0;
byteXorVal = 0;
for j in xrange(8):
temp1 ^= wkey[j]
sum2 = (sum2+j)*20021 + sum1
sum1 = (temp1*346)&0xFFFF
sum2 = (sum2+sum1)&0xFFFF
temp1 = (temp1*20021+1)&0xFFFF
byteXorVal ^= temp1 ^ sum2
curByte = ord(src[i])
if not decryption:
keyXorVal = curByte * 257;
curByte = ((curByte ^ (byteXorVal >> 8)) ^ byteXorVal) & 0xFF
if decryption:
keyXorVal = curByte * 257;
for j in xrange(8):
wkey[j] ^= keyXorVal;
dst+=chr(curByte)
return dst
def find_key(rec0, pid):
off1 = strDWord(rec0, 0xA8)
if off1==0xFFFFFFFF or off1==0:
print "No DRM"
return None
size1 = strDWord(rec0, 0xB0)
cnt = strDWord(rec0, 0xAC)
flag = strDWord(rec0, 0xB4)
temp_key = PC1(keyvec1, pid.ljust(16,'\0'), False)
cksum = 0
#print pid, "->", hexlify(temp_key)
for i in xrange(len(temp_key)):
cksum += ord(temp_key[i])
cksum &= 0xFF
temp_key = temp_key.ljust(16,'\0')
#print "pid cksum: %02X"%cksum
#print "Key records: %02X-%02X, count: %d, flag: %02X"%(off1, off1+size1, cnt, flag)
iOff = off1
drm_key = None
for i in xrange(cnt):
dwCheck = strDWord(rec0, iOff)
dwSize = strDWord(rec0, iOff+4)
dwType = strDWord(rec0, iOff+8)
nCksum = strByte(rec0, iOff+0xC)
#print "Key record %d: check=%08X, size=%d, type=%d, cksum=%02X"%(i, dwCheck, dwSize, dwType, nCksum)
if nCksum==cksum:
drmInfo = PC1(temp_key, rec0[iOff+0x10:iOff+0x30])
dw0, dw4, dw18, dw1c = struct.unpack(">II16xII", drmInfo)
#print "Decrypted drmInfo:", "%08X, %08X, %s, %08X, %08X"%(dw0, dw4, hexlify(drmInfo[0x8:0x18]), dw18, dw1c)
#print "Decrypted drmInfo:", hexlify(drmInfo)
if dw0==dwCheck:
print "Found the matching record; setting the CustomDRM flag for Kindle"
drmInfo = strPutDWord(drmInfo,4,(dw4|0x800))
dw0, dw4, dw18, dw1c = struct.unpack(">II16xII", drmInfo)
#print "Updated drmInfo:", "%08X, %08X, %s, %08X, %08X"%(dw0, dw4, hexlify(drmInfo[0x8:0x18]), dw18, dw1c)
return rec0[:iOff+0x10] + PC1(temp_key, drmInfo, False) + rec0[:iOff+0x30]
iOff += dwSize
return None
def replaceext(filename, newext):
nameparts = filename.split(".")
if len(nameparts)>1:
return (".".join(nameparts[:-1]))+newext
else:
return nameparts[0]+newext
def main(argv=sys.argv):
print "The Kindleizer v0.2. Copyright (c) 2007 Igor Skochinsky"
if len(sys.argv) != 3:
print "Fixes encrypted Mobipocket books to be readable by Kindle"
print "Usage: kindlefix.py file.mobi PID"
return 1
fname = sys.argv[1]
pid = sys.argv[2]
if len(pid)==10 and pid[-3]=='*':
pid = pid[:-2]
if len(pid)!=8 or pid[-1]!='*':
print "PID is not valid! (should be in format AAAAAAA*DD)"
return 3
db = prc.File(fname)
#print dir(db)
if db.getDBInfo()["creator"]!='MOBI':
print "Not a Mobi file!"
return 1
rec0 = db.getRecord(0)[0]
enc = strSWord(rec0, 0xC)
print "Encryption:", enc
if enc!=2:
print "Unknown encryption type"
return 1
if len(rec0)<0x28 or rec0[0x10:0x14] != 'MOBI':
print "bad file format"
return 1
print "Mobi publication type:", strDWord(rec0, 0x18)
formatVer = strDWord(rec0, 0x24)
print "Mobi format version:", formatVer
last_rec = strWord(rec0, 8)
dwE0 = 0
if formatVer>=4:
new_rec0 = find_key(rec0, pid)
if new_rec0:
db.setRecordIdx(0,new_rec0)
else:
print "PID doesn't match this file"
return 2
else:
print "Wrong Mobi format version"
return 1
outfname = replaceext(fname, ".azw")
if outfname==fname:
outfname = replaceext(fname, "_fixed.azw")
db.save(outfname)
print "Output written to "+outfname
return 0
if __name__ == "__main__":
sys.exit(main())

View File

@@ -0,0 +1,99 @@
#!/usr/bin/python
# Mobipocket PID calculator v0.2 for Amazon Kindle.
# Copyright (c) 2007, 2009 Igor Skochinsky <skochinsky@mail.ru>
# History:
# 0.1 Initial release
# 0.2 Added support for generating PID for iPhone (thanks to mbp)
# 0.3 changed to autoflush stdout, fixed return code usage
class Unbuffered:
def __init__(self, stream):
self.stream = stream
def write(self, data):
self.stream.write(data)
self.stream.flush()
def __getattr__(self, attr):
return getattr(self.stream, attr)
import sys
sys.stdout=Unbuffered(sys.stdout)
import binascii
if sys.hexversion >= 0x3000000:
print "This script is incompatible with Python 3.x. Please install Python 2.6.x from python.org"
sys.exit(2)
letters = "ABCDEFGHIJKLMNPQRSTUVWXYZ123456789"
def crc32(s):
return (~binascii.crc32(s,-1))&0xFFFFFFFF
def checksumPid(s):
crc = crc32(s)
crc = crc ^ (crc >> 16)
res = s
l = len(letters)
for i in (0,1):
b = crc & 0xff
pos = (b // l) ^ (b % l)
res += letters[pos%l]
crc >>= 8
return res
def pidFromSerial(s, l):
crc = crc32(s)
arr1 = [0]*l
for i in xrange(len(s)):
arr1[i%l] ^= ord(s[i])
crc_bytes = [crc >> 24 & 0xff, crc >> 16 & 0xff, crc >> 8 & 0xff, crc & 0xff]
for i in xrange(l):
arr1[i] ^= crc_bytes[i&3]
pid = ""
for i in xrange(l):
b = arr1[i] & 0xff
pid+=letters[(b >> 7) + ((b >> 5 & 3) ^ (b & 0x1f))]
return pid
def main(argv=sys.argv):
print "Mobipocket PID calculator for Amazon Kindle. Copyright (c) 2007, 2009 Igor Skochinsky"
if len(sys.argv)==2:
serial = sys.argv[1]
else:
print "Usage: kindlepid.py <Kindle Serial Number>/<iPhone/iPod Touch UDID>"
return 1
if len(serial)==16:
if serial.startswith("B001"):
print "Kindle 1 serial number detected"
elif serial.startswith("B002"):
print "Kindle 2 serial number detected"
elif serial.startswith("B003"):
print "Kindle 2 Global serial number detected"
elif serial.startswith("B004"):
print "Kindle DX serial number detected"
elif serial.startswith("B005"):
print "Kindle DX International serial number detected"
else:
print "Warning: unrecognized serial number. Please recheck input."
return 1
pid = pidFromSerial(serial,7)+"*"
print "Mobipocked PID for Kindle serial# "+serial+" is "+checksumPid(pid)
return 0
elif len(serial)==40:
print "iPhone serial number (UDID) detected"
pid = pidFromSerial(serial,8)
print "Mobipocked PID for iPhone serial# "+serial+" is "+checksumPid(pid)
return 0
else:
print "Warning: unrecognized serial number. Please recheck input."
return 1
return 0
if __name__ == "__main__":
sys.exit(main())

View File

@@ -0,0 +1,325 @@
#!/usr/bin/python
#
# This is a python script. You need a Python interpreter to run it.
# For example, ActiveState Python, which exists for windows.
#
# It can run standalone to convert files, or it can be installed as a
# plugin for Calibre (http://calibre-ebook.com/about) so that
# importing files with DRM 'Just Works'.
#
# To create a Calibre plugin, rename this file so that the filename
# ends in '_plugin.py', put it into a ZIP file and import that Calibre
# using its plugin configuration GUI.
#
# Changelog
# 0.01 - Initial version
# 0.02 - Huffdic compressed books were not properly decrypted
# 0.03 - Wasn't checking MOBI header length
# 0.04 - Wasn't sanity checking size of data record
# 0.05 - It seems that the extra data flags take two bytes not four
# 0.06 - And that low bit does mean something after all :-)
# 0.07 - The extra data flags aren't present in MOBI header < 0xE8 in size
# 0.08 - ...and also not in Mobi header version < 6
# 0.09 - ...but they are there with Mobi header version 6, header size 0xE4!
# 0.10 - Outputs unencrypted files as-is, so that when run as a Calibre
# import filter it works when importing unencrypted files.
# Also now handles encrypted files that don't need a specific PID.
# 0.11 - use autoflushed stdout and proper return values
# 0.12 - Fix for problems with metadata import as Calibre plugin, report errors
# 0.13 - Formatting fixes: retabbed file, removed trailing whitespace
# and extra blank lines, converted CR/LF pairs at ends of each line,
# and other cosmetic fixes.
# 0.14 - Working out when the extra data flags are present has been problematic
# Versions 7 through 9 have tried to tweak the conditions, but have been
# only partially successful. Closer examination of lots of sample
# files reveals that a confusin has arisen because trailing data entries
# are not encrypted, but it turns out that the multibyte entries
# in utf8 file are encrypted. (Although neither kind gets compressed.)
# This knowledge leads to a simplification of the test for the
# trailing data byte flags - version 5 and higher AND header size >= 0xE4.
# 0.15 - Now outputs 'hearbeat', and is also quicker for long files.
# 0.16 - And reverts to 'done' not 'done.' at the end for unswindle compatibility.
# 0.17 - added modifications to support its use as an imported python module
# both inside calibre and also in other places (ie K4DeDRM tools)
# 0.17a - disabled the standalone plugin feature since a plugin can not import
# a plugin
__version__ = '0.17'
import sys
import struct
import binascii
class Unbuffered:
def __init__(self, stream):
self.stream = stream
def write(self, data):
self.stream.write(data)
self.stream.flush()
def __getattr__(self, attr):
return getattr(self.stream, attr)
class DrmException(Exception):
pass
# Implementation of Pukall Cipher 1
def PC1(key, src, decryption=True):
sum1 = 0;
sum2 = 0;
keyXorVal = 0;
if len(key)!=16:
print "Bad key length!"
return None
wkey = []
for i in xrange(8):
wkey.append(ord(key[i*2])<<8 | ord(key[i*2+1]))
dst = ""
for i in xrange(len(src)):
temp1 = 0;
byteXorVal = 0;
for j in xrange(8):
temp1 ^= wkey[j]
sum2 = (sum2+j)*20021 + sum1
sum1 = (temp1*346)&0xFFFF
sum2 = (sum2+sum1)&0xFFFF
temp1 = (temp1*20021+1)&0xFFFF
byteXorVal ^= temp1 ^ sum2
curByte = ord(src[i])
if not decryption:
keyXorVal = curByte * 257;
curByte = ((curByte ^ (byteXorVal >> 8)) ^ byteXorVal) & 0xFF
if decryption:
keyXorVal = curByte * 257;
for j in xrange(8):
wkey[j] ^= keyXorVal;
dst+=chr(curByte)
return dst
def checksumPid(s):
letters = "ABCDEFGHIJKLMNPQRSTUVWXYZ123456789"
crc = (~binascii.crc32(s,-1))&0xFFFFFFFF
crc = crc ^ (crc >> 16)
res = s
l = len(letters)
for i in (0,1):
b = crc & 0xff
pos = (b // l) ^ (b % l)
res += letters[pos%l]
crc >>= 8
return res
def getSizeOfTrailingDataEntries(ptr, size, flags):
def getSizeOfTrailingDataEntry(ptr, size):
bitpos, result = 0, 0
if size <= 0:
return result
while True:
v = ord(ptr[size-1])
result |= (v & 0x7F) << bitpos
bitpos += 7
size -= 1
if (v & 0x80) != 0 or (bitpos >= 28) or (size == 0):
return result
num = 0
testflags = flags >> 1
while testflags:
if testflags & 1:
num += getSizeOfTrailingDataEntry(ptr, size - num)
testflags >>= 1
# Multibyte data, if present, is included in the encryption, so
# we do not need to check the low bit.
# if flags & 1:
# num += (ord(ptr[size - num - 1]) & 0x3) + 1
return num
class DrmStripper:
def loadSection(self, section):
if (section + 1 == self.num_sections):
endoff = len(self.data_file)
else:
endoff = self.sections[section + 1][0]
off = self.sections[section][0]
return self.data_file[off:endoff]
def patch(self, off, new):
self.data_file = self.data_file[:off] + new + self.data_file[off+len(new):]
def patchSection(self, section, new, in_off = 0):
if (section + 1 == self.num_sections):
endoff = len(self.data_file)
else:
endoff = self.sections[section + 1][0]
off = self.sections[section][0]
assert off + in_off + len(new) <= endoff
self.patch(off + in_off, new)
def parseDRM(self, data, count, pid):
pid = pid.ljust(16,'\0')
keyvec1 = "\x72\x38\x33\xB0\xB4\xF2\xE3\xCA\xDF\x09\x01\xD6\xE2\xE0\x3F\x96"
temp_key = PC1(keyvec1, pid, False)
temp_key_sum = sum(map(ord,temp_key)) & 0xff
found_key = None
for i in xrange(count):
verification, size, type, cksum, cookie = struct.unpack('>LLLBxxx32s', data[i*0x30:i*0x30+0x30])
cookie = PC1(temp_key, cookie)
ver,flags,finalkey,expiry,expiry2 = struct.unpack('>LL16sLL', cookie)
if verification == ver and cksum == temp_key_sum and (flags & 0x1F) == 1:
found_key = finalkey
break
if not found_key:
# Then try the default encoding that doesn't require a PID
temp_key = keyvec1
temp_key_sum = sum(map(ord,temp_key)) & 0xff
for i in xrange(count):
verification, size, type, cksum, cookie = struct.unpack('>LLLBxxx32s', data[i*0x30:i*0x30+0x30])
cookie = PC1(temp_key, cookie)
ver,flags,finalkey,expiry,expiry2 = struct.unpack('>LL16sLL', cookie)
if verification == ver and cksum == temp_key_sum:
found_key = finalkey
break
return found_key
def __init__(self, data_file, pid):
if checksumPid(pid[0:-2]) != pid:
raise DrmException("invalid PID checksum")
pid = pid[0:-2]
self.data_file = data_file
header = data_file[0:72]
if header[0x3C:0x3C+8] != 'BOOKMOBI':
raise DrmException("invalid file format")
self.num_sections, = struct.unpack('>H', data_file[76:78])
self.sections = []
for i in xrange(self.num_sections):
offset, a1,a2,a3,a4 = struct.unpack('>LBBBB', data_file[78+i*8:78+i*8+8])
flags, val = a1, a2<<16|a3<<8|a4
self.sections.append( (offset, flags, val) )
sect = self.loadSection(0)
records, = struct.unpack('>H', sect[0x8:0x8+2])
mobi_length, = struct.unpack('>L',sect[0x14:0x18])
mobi_version, = struct.unpack('>L',sect[0x68:0x6C])
extra_data_flags = 0
print "MOBI header version = %d, length = %d" %(mobi_version, mobi_length)
if (mobi_length >= 0xE4) and (mobi_version >= 5):
extra_data_flags, = struct.unpack('>H', sect[0xF2:0xF4])
print "Extra Data Flags = %d" %extra_data_flags
crypto_type, = struct.unpack('>H', sect[0xC:0xC+2])
if crypto_type == 0:
print "This book is not encrypted."
else:
if crypto_type == 1:
raise DrmException("cannot decode Mobipocket encryption type 1")
if crypto_type != 2:
raise DrmException("unknown encryption type: %d" % crypto_type)
# calculate the keys
drm_ptr, drm_count, drm_size, drm_flags = struct.unpack('>LLLL', sect[0xA8:0xA8+16])
if drm_count == 0:
raise DrmException("no PIDs found in this file")
found_key = self.parseDRM(sect[drm_ptr:drm_ptr+drm_size], drm_count, pid)
if not found_key:
raise DrmException("no key found. maybe the PID is incorrect")
# kill the drm keys
self.patchSection(0, "\0" * drm_size, drm_ptr)
# kill the drm pointers
self.patchSection(0, "\xff" * 4 + "\0" * 12, 0xA8)
# clear the crypto type
self.patchSection(0, "\0" * 2, 0xC)
# decrypt sections
print "Decrypting. Please wait . . .",
new_data = self.data_file[:self.sections[1][0]]
for i in xrange(1, records+1):
data = self.loadSection(i)
extra_size = getSizeOfTrailingDataEntries(data, len(data), extra_data_flags)
if i%100 == 0:
print ".",
# print "record %d, extra_size %d" %(i,extra_size)
new_data += PC1(found_key, data[0:len(data) - extra_size])
if extra_size > 0:
new_data += data[-extra_size:]
#self.patchSection(i, PC1(found_key, data[0:len(data) - extra_size]))
if self.num_sections > records+1:
new_data += self.data_file[self.sections[records+1][0]:]
self.data_file = new_data
print "done"
def getResult(self):
return self.data_file
def getUnencryptedBook(infile,pid):
sys.stdout=Unbuffered(sys.stdout)
data_file = file(infile, 'rb').read()
strippedFile = DrmStripper(data_file, pid)
return strippedFile.getResult()
def main(argv=sys.argv):
sys.stdout=Unbuffered(sys.stdout)
print ('MobiDeDrm v%(__version__)s. '
'Copyright 2008-2010 The Dark Reverser.' % globals())
if len(argv)<4:
print "Removes protection from Mobipocket books"
print "Usage:"
print " %s <infile> <outfile> <PID>" % sys.argv[0]
return 1
else:
infile = argv[1]
outfile = argv[2]
pid = argv[3]
try:
stripped_file = getUnencryptedBook(infile, pid)
file(outfile, 'wb').write(stripped_file)
except DrmException, e:
print "Error: %s" % e
return 1
return 0
if __name__ == "__main__":
sys.exit(main())
#if not __name__ == "__main__":
if False:
# note a calibre plugin can not import code with another calibre plugin
# in it as it ends up registering two different plugins
from calibre.customize import FileTypePlugin
class MobiDeDRM(FileTypePlugin):
name = 'MobiDeDRM' # Name of the plugin
description = 'Removes DRM from secure Mobi files'
supported_platforms = ['linux', 'osx', 'windows'] # Platforms this plugin will run on
author = 'The Dark Reverser' # The author of this plugin
version = (0, 1, 7) # The version number of this plugin
file_types = set(['prc','mobi','azw']) # The file types that this plugin will be applied to
on_import = True # Run this plugin during the import
def run(self, path_to_ebook):
from calibre.gui2 import is_ok_to_use_qt
from PyQt4.Qt import QMessageBox
PID = self.site_customization
data_file = file(path_to_ebook, 'rb').read()
ar = PID.split(',')
for i in ar:
try:
unlocked_file = DrmStripper(data_file, i).getResult()
except DrmException:
if is_ok_to_use_qt():
d = QMessageBox(QMessageBox.Warning, "MobiDeDRM Plugin", "Error decoding: %s\n" % path_to_ebook)
d.show()
d.raise_()
d.exec_()
raise Exception("MobiDeDRM Plugin: Error decoding ebook")
else:
of = self.temporary_file('.mobi')
of.write(unlocked_file)
of.close()
return of.name
def customization_help(self, gui=False):
return 'Enter PID (separate multiple PIDs with comma)'

View File

@@ -0,0 +1,189 @@
# This is a python script. You need a Python interpreter to run it.
# For example, ActiveState Python, which exists for windows.
#
# Big Thanks to Igor SKOCHINSKY for providing me with all his information
# and source code relating to the inner workings of this compression scheme.
# Without it, I wouldn't be able to solve this as easily.
#
# Changelog
# 0.01 - Initial version
# 0.02 - Fix issue with size computing
# 0.03 - Fix issue with some files
# 0.04 - make stdout self flushing and fix return values
class Unbuffered:
def __init__(self, stream):
self.stream = stream
def write(self, data):
self.stream.write(data)
self.stream.flush()
def __getattr__(self, attr):
return getattr(self.stream, attr)
import sys
sys.stdout=Unbuffered(sys.stdout)
import struct
class BitReader:
def __init__(self, data):
self.data, self.pos, self.nbits = data + "\x00\x00\x00\x00", 0, len(data) * 8
def peek(self, n):
r, g = 0, 0
while g < n:
r, g = (r << 8) | ord(self.data[(self.pos+g)>>3]), g + 8 - ((self.pos+g) & 7)
return (r >> (g - n)) & ((1 << n) - 1)
def eat(self, n):
self.pos += n
return self.pos <= self.nbits
def left(self):
return self.nbits - self.pos
class HuffReader:
def __init__(self, huffs):
self.huffs = huffs
h = huffs[0]
if huffs[0][0:4] != 'HUFF' or huffs[0][4:8] != '\x00\x00\x00\x18':
raise ValueError('invalid huff1 header')
if huffs[1][0:4] != 'CDIC' or huffs[1][4:8] != '\x00\x00\x00\x10':
raise ValueError('invalid huff2 header')
self.entry_bits, = struct.unpack('>L', huffs[1][12:16])
off1,off2 = struct.unpack('>LL', huffs[0][16:24])
self.dict1 = struct.unpack('<256L', huffs[0][off1:off1+256*4])
self.dict2 = struct.unpack('<64L', huffs[0][off2:off2+64*4])
self.dicts = huffs[1:]
self.r = ''
def _unpack(self, bits, depth = 0):
if depth > 32:
raise ValueError('corrupt file')
while bits.left():
dw = bits.peek(32)
v = self.dict1[dw >> 24]
codelen = v & 0x1F
assert codelen != 0
code = dw >> (32 - codelen)
r = (v >> 8)
if not (v & 0x80):
while code < self.dict2[(codelen-1)*2]:
codelen += 1
code = dw >> (32 - codelen)
r = self.dict2[(codelen-1)*2+1]
r -= code
assert codelen != 0
if not bits.eat(codelen):
return
dicno = r >> self.entry_bits
off1 = 16 + (r - (dicno << self.entry_bits)) * 2
dic = self.dicts[dicno]
off2 = 16 + ord(dic[off1]) * 256 + ord(dic[off1+1])
blen = ord(dic[off2]) * 256 + ord(dic[off2+1])
slice = dic[off2+2:off2+2+(blen&0x7fff)]
if blen & 0x8000:
self.r += slice
else:
self._unpack(BitReader(slice), depth + 1)
def unpack(self, data):
self.r = ''
self._unpack(BitReader(data))
return self.r
class Sectionizer:
def __init__(self, filename, ident):
self.contents = file(filename, 'rb').read()
self.header = self.contents[0:72]
self.num_sections, = struct.unpack('>H', self.contents[76:78])
if self.header[0x3C:0x3C+8] != ident:
raise ValueError('Invalid file format')
self.sections = []
for i in xrange(self.num_sections):
offset, a1,a2,a3,a4 = struct.unpack('>LBBBB', self.contents[78+i*8:78+i*8+8])
flags, val = a1, a2<<16|a3<<8|a4
self.sections.append( (offset, flags, val) )
def loadSection(self, section):
if section + 1 == self.num_sections:
end_off = len(self.contents)
else:
end_off = self.sections[section + 1][0]
off = self.sections[section][0]
return self.contents[off:end_off]
def getSizeOfTrailingDataEntry(ptr, size):
bitpos, result = 0, 0
while True:
v = ord(ptr[size-1])
result |= (v & 0x7F) << bitpos
bitpos += 7
size -= 1
if (v & 0x80) != 0 or (bitpos >= 28) or (size == 0):
return result
def getSizeOfTrailingDataEntries(ptr, size, flags):
num = 0
flags >>= 1
while flags:
if flags & 1:
num += getSizeOfTrailingDataEntry(ptr, size - num)
flags >>= 1
return num
def unpackBook(input_file):
sect = Sectionizer(input_file, 'BOOKMOBI')
header = sect.loadSection(0)
crypto_type, = struct.unpack('>H', header[0xC:0xC+2])
if crypto_type != 0:
raise ValueError('The book is encrypted. Run mobidedrm first')
if header[0:2] != 'DH':
raise ValueError('invalid compression type')
extra_flags, = struct.unpack('>L', header[0xF0:0xF4])
records, = struct.unpack('>H', header[0x8:0x8+2])
huffoff,huffnum = struct.unpack('>LL', header[0x70:0x78])
huffs = [sect.loadSection(i) for i in xrange(huffoff, huffoff+huffnum)]
huff = HuffReader(huffs)
def decompressSection(nr):
data = sect.loadSection(nr)
trail_size = getSizeOfTrailingDataEntries(data, len(data), extra_flags)
return huff.unpack(data[0:len(data)-trail_size])
r = ''
for i in xrange(1, records+1):
r += decompressSection(i)
return r
def main(argv=sys.argv):
print "MobiHuff v0.03"
print " Copyright (c) 2008 The Dark Reverser <dark.reverser@googlemail.com>"
if len(sys.argv)!=3:
print ""
print "Description:"
print " Unpacks the new mobipocket huffdic compression."
print " This program works with unencrypted files only."
print "Usage:"
print " mobihuff.py infile.mobi outfile.html"
return 1
else:
infile = sys.argv[1]
outfile = sys.argv[2]
try:
print "Decompressing...",
result = unpackBook(infile)
file(outfile, 'wb').write(result)
print "done"
except ValueError, e:
print
print "Error: %s" % e
return 1
return 0
if __name__ == "__main__":
sys.exit(main())

View File

@@ -0,0 +1,529 @@
#
# $Id: prc.py,v 1.3 2001/12/27 08:48:02 rob Exp $
#
# Copyright 1998-2001 Rob Tillotson <rob@pyrite.org>
# All Rights Reserved
#
# Permission to use, copy, modify, and distribute this software and
# its documentation for any purpose and without fee or royalty is
# hereby granted, provided that the above copyright notice appear in
# all copies and that both the copyright notice and this permission
# notice appear in supporting documentation or portions thereof,
# including modifications, that you you make.
#
# THE AUTHOR ROB TILLOTSON DISCLAIMS ALL WARRANTIES WITH REGARD TO
# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
# AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
# SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER
# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE!
#
"""PRC/PDB file I/O in pure Python.
This module serves two purposes: one, it allows access to Palm OS(tm)
database files on the desktop in pure Python without requiring
pilot-link (hence, it may be useful for import/export utilities),
and two, it caches the contents of the file in memory so it can
be freely modified using an identical API to databases over a
DLP connection.
"""
__version__ = '$Id: prc.py,v 1.3 2001/12/27 08:48:02 rob Exp $'
__copyright__ = 'Copyright 1998-2001 Rob Tillotson <robt@debian.org>'
# temporary hack until we get gettext support again
def _(s): return s
#
# DBInfo structure:
#
# int more
# unsigned int flags
# unsigned int miscflags
# unsigned long type
# unsigned long creator
# unsigned int version
# unsigned long modnum
# time_t createDate, modifydate, backupdate
# unsigned int index
# char name[34]
#
#
# DB Header:
# 32 name
# 2 flags
# 2 version
# 4 creation time
# 4 modification time
# 4 backup time
# 4 modification number
# 4 appinfo offset
# 4 sortinfo offset
# 4 type
# 4 creator
# 4 unique id seed (garbage?)
# 4 next record list id (normally 0)
# 2 num of records for this header
# (maybe 2 more bytes)
#
# Resource entry header: (if low bit of attr = 1)
# 4 type
# 2 id
# 4 offset
#
# record entry header: (if low bit of attr = 0)
# 4 offset
# 1 attributes
# 3 unique id
#
# then 2 bytes of 0
#
# then appinfo then sortinfo
#
import sys, os, stat, struct
PI_HDR_SIZE = 78
PI_RESOURCE_ENT_SIZE = 10
PI_RECORD_ENT_SIZE = 8
PILOT_TIME_DELTA = 2082844800L
flagResource = 0x0001
flagReadOnly = 0x0002
flagAppInfoDirty = 0x0004
flagBackup = 0x0008
flagOpen = 0x8000
# 2.x
flagNewer = 0x0010
flagReset = 0x0020
#
flagExcludeFromSync = 0x0080
attrDeleted = 0x80
attrDirty = 0x40
attrBusy = 0x20
attrSecret = 0x10
attrArchived = 0x08
default_info = {
'name': '',
'type': 'DATA',
'creator': ' ',
'createDate': 0,
'modifyDate': 0,
'backupDate': 0,
'modnum': 0,
'version': 0,
'flagReset': 0,
'flagResource': 0,
'flagNewer': 0,
'flagExcludeFromSync': 0,
'flagAppInfoDirty': 0,
'flagReadOnly': 0,
'flagBackup': 0,
'flagOpen': 0,
'more': 0,
'index': 0
}
def null_terminated(s):
for x in range(0, len(s)):
if s[x] == '\000': return s[:x]
return s
def trim_null(s):
return string.split(s, '\0')[0]
def pad_null(s, l):
if len(s) > l - 1:
s = s[:l-1]
s = s + '\0'
if len(s) < l: s = s + '\0' * (l - len(s))
return s
#
# new stuff
# Record object to be put in tree...
class PRecord:
def __init__(self, attr=0, id=0, category=0, raw=''):
self.raw = raw
self.id = id
self.attr = attr
self.category = category
# comparison and hashing are done by ID;
# thus, the id value *may not be changed* once
# the object is created.
def __cmp__(self, obj):
if type(obj) == type(0):
return cmp(self.id, obj)
else:
return cmp(self.id, obj.id)
def __hash__(self):
return self.id
class PResource:
def __init__(self, typ=' ', id=0, raw=''):
self.raw = raw
self.id = id
self.type = typ
def __cmp__(self, obj):
if type(obj) == type(()):
return cmp( (self.type, self.id), obj)
else:
return cmp( (self.type, self.id), (obj.type, obj.id) )
def __hash__(self):
return hash((self.type, self.id))
class PCache:
def __init__(self):
self.data = []
self.appblock = ''
self.sortblock = ''
self.dirty = 0
self.next = 0
self.info = {}
self.info.update(default_info)
# if allow_zero_ids is 1, then this prc behaves appropriately
# for a desktop database. That is, it never attempts to assign
# an ID, and lets new records be inserted with an ID of zero.
self.allow_zero_ids = 0
# pi-file API
def getRecords(self): return len(self.data)
def getAppBlock(self): return self.appblock and self.appblock or None
def setAppBlock(self, raw):
self.dirty = 1
self.appblock = raw
def getSortBlock(self): return self.sortblock and self.sortblock or None
def setSortBlock(self, raw):
self.dirty = 1
self.appblock = raw
def checkID(self, id): return id in self.data
def getRecord(self, i):
try: r = self.data[i]
except: return None
return r.raw, i, r.id, r.attr, r.category
def getRecordByID(self, id):
try:
i = self.data.index(id)
r = self.data[i]
except: return None
return r.raw, i, r.id, r.attr, r.category
def getResource(self, i):
try: r = self.data[i]
except: return None
return r.raw, r.type, r.id
def getDBInfo(self): return self.info
def setDBInfo(self, info):
self.dirty = 1
self.info = {}
self.info.update(info)
def updateDBInfo(self, info):
self.dirty = 1
self.info.update(info)
def setRecord(self, attr, id, cat, data):
if not self.allow_zero_ids and not id:
if not len(self.data): id = 1
else:
xid = self.data[0].id + 1
while xid in self.data: xid = xid + 1
id = xid
r = PRecord(attr, id, cat, data)
if id and id in self.data:
self.data.remove(id)
self.data.append(r)
self.dirty = 1
return id
def setRecordIdx(self, i, data):
self.data[i].raw = data
self.dirty = 1
def setResource(self, typ, id, data):
if (typ, id) in self.data:
self.data.remove((typ,id))
r = PResource(typ, id, data)
self.data.append(r)
self.dirty = 1
return id
def getNextRecord(self, cat):
while self.next < len(self.data):
r = self.data[self.next]
i = self.next
self.next = self.next + 1
if r.category == cat:
return r.raw, i, r.id, r.attr, r.category
return ''
def getNextModRecord(self, cat=-1):
while self.next < len(self.data):
r = self.data[self.next]
i = self.next
self.next = self.next + 1
if (r.attr & attrModified) and (cat < 0 or r.category == cat):
return r.raw, i, r.id, r.attr, r.category
def getResourceByID(self, type, id):
try: r = self.data[self.data.index((type,id))]
except: return None
return r.raw, r.type, r.id
def deleteRecord(self, id):
if not id in self.data: return None
self.data.remove(id)
self.dirty = 1
def deleteRecords(self):
self.data = []
self.dirty = 1
def deleteResource(self, type, id):
if not (type,id) in self.data: return None
self.data.remove((type,id))
self.dirty = 1
def deleteResources(self):
self.data = []
self.dirty = 1
def getRecordIDs(self, sort=0):
m = map(lambda x: x.id, self.data)
if sort: m.sort()
return m
def moveCategory(self, frm, to):
for r in self.data:
if r.category == frm:
r.category = to
self.dirty = 1
def deleteCategory(self, cat):
raise RuntimeError, _("unimplemented")
def purge(self):
ndata = []
# change to filter later
for r in self.data:
if (r.attr & attrDeleted):
continue
ndata.append(r)
self.data = ndata
self.dirty = 1
def resetNext(self):
self.next = 0
def resetFlags(self):
# special behavior for resources
if not self.info.get('flagResource',0):
# use map()
for r in self.data:
r.attr = r.attr & ~attrDirty
self.dirty = 1
import pprint
class File(PCache):
def __init__(self, name=None, read=1, write=0, info={}):
PCache.__init__(self)
self.filename = name
self.info.update(info)
self.writeback = write
self.isopen = 0
if read:
self.load(name)
self.isopen = 1
def close(self):
if self.writeback and self.dirty:
self.save(self.filename)
self.isopen = 0
def __del__(self):
if self.isopen: self.close()
def load(self, f):
if type(f) == type(''): f = open(f, 'rb')
data = f.read()
self.unpack(data)
def unpack(self, data):
if len(data) < PI_HDR_SIZE: raise IOError, _("file too short")
(name, flags, ver, ctime, mtime, btime, mnum, appinfo, sortinfo,
typ, creator, uid, nextrec, numrec) \
= struct.unpack('>32shhLLLlll4s4sllh', data[:PI_HDR_SIZE])
if nextrec or appinfo < 0 or sortinfo < 0 or numrec < 0:
raise IOError, _("invalid database header")
self.info = {
'name': null_terminated(name),
'type': typ,
'creator': creator,
'createDate': ctime - PILOT_TIME_DELTA,
'modifyDate': mtime - PILOT_TIME_DELTA,
'backupDate': btime - PILOT_TIME_DELTA,
'modnum': mnum,
'version': ver,
'flagReset': flags & flagReset,
'flagResource': flags & flagResource,
'flagNewer': flags & flagNewer,
'flagExcludeFromSync': flags & flagExcludeFromSync,
'flagAppInfoDirty': flags & flagAppInfoDirty,
'flagReadOnly': flags & flagReadOnly,
'flagBackup': flags & flagBackup,
'flagOpen': flags & flagOpen,
'more': 0,
'index': 0
}
rsrc = flags & flagResource
if rsrc: s = PI_RESOURCE_ENT_SIZE
else: s = PI_RECORD_ENT_SIZE
entries = []
pos = PI_HDR_SIZE
for x in range(0,numrec):
hstr = data[pos:pos+s]
pos = pos + s
if not hstr or len(hstr) < s:
raise IOError, _("bad database header")
if rsrc:
(typ, id, offset) = struct.unpack('>4shl', hstr)
entries.append((offset, typ, id))
else:
(offset, auid) = struct.unpack('>ll', hstr)
attr = (auid & 0xff000000) >> 24
uid = auid & 0x00ffffff
entries.append((offset, attr, uid))
offset = len(data)
entries.reverse()
for of, q, id in entries:
size = offset - of
if size < 0: raise IOError, _("bad pdb/prc record entry (size < 0)")
d = data[of:offset]
offset = of
if len(d) != size: raise IOError, _("failed to read record")
if rsrc:
r = PResource(q, id, d)
self.data.append(r)
else:
r = PRecord(q & 0xf0, id, q & 0x0f, d)
self.data.append(r)
self.data.reverse()
if sortinfo:
sortinfo_size = offset - sortinfo
offset = sortinfo
else:
sortinfo_size = 0
if appinfo:
appinfo_size = offset - appinfo
offset = appinfo
else:
appinfo_size = 0
if appinfo_size < 0 or sortinfo_size < 0:
raise IOError, _("bad database header (appinfo or sortinfo size < 0)")
if appinfo_size:
self.appblock = data[appinfo:appinfo+appinfo_size]
if len(self.appblock) != appinfo_size:
raise IOError, _("failed to read appinfo block")
if sortinfo_size:
self.sortblock = data[sortinfo:sortinfo+sortinfo_size]
if len(self.sortblock) != sortinfo_size:
raise IOError, _("failed to read sortinfo block")
def save(self, f):
"""Dump the cache to a file.
"""
if type(f) == type(''): f = open(f, 'wb')
# first, we need to precalculate the offsets.
if self.info.get('flagResource'):
entries_len = 10 * len(self.data)
else: entries_len = 8 * len(self.data)
off = PI_HDR_SIZE + entries_len + 2
if self.appblock:
appinfo_offset = off
off = off + len(self.appblock)
else:
appinfo_offset = 0
if self.sortblock:
sortinfo_offset = off
off = off + len(self.sortblock)
else:
sortinfo_offset = 0
rec_offsets = []
for x in self.data:
rec_offsets.append(off)
off = off + len(x.raw)
info = self.info
flg = 0
if info.get('flagResource',0): flg = flg | flagResource
if info.get('flagReadOnly',0): flg = flg | flagReadOnly
if info.get('flagAppInfoDirty',0): flg = flg | flagAppInfoDirty
if info.get('flagBackup',0): flg = flg | flagBackup
if info.get('flagOpen',0): flg = flg | flagOpen
if info.get('flagNewer',0): flg = flg | flagNewer
if info.get('flagReset',0): flg = flg | flagReset
# excludefromsync doesn't actually get stored?
hdr = struct.pack('>32shhLLLlll4s4sllh',
pad_null(info.get('name',''), 32),
flg,
info.get('version',0),
info.get('createDate',0L)+PILOT_TIME_DELTA,
info.get('modifyDate',0L)+PILOT_TIME_DELTA,
info.get('backupDate',0L)+PILOT_TIME_DELTA,
info.get('modnum',0),
appinfo_offset, # appinfo
sortinfo_offset, # sortinfo
info.get('type',' '),
info.get('creator',' '),
0, # uid???
0, # nextrec???
len(self.data))
f.write(hdr)
entries = []
record_data = []
rsrc = self.info.get('flagResource')
for x, off in map(None, self.data, rec_offsets):
if rsrc:
record_data.append(x.raw)
entries.append(struct.pack('>4shl', x.type, x.id, off))
else:
record_data.append(x.raw)
a = ((x.attr | x.category) << 24) | x.id
entries.append(struct.pack('>ll', off, a))
for x in entries: f.write(x)
f.write('\0\0') # padding? dunno, it's always there.
if self.appblock: f.write(self.appblock)
if self.sortblock: f.write(self.sortblock)
for x in record_data: f.write(x)

View File

@@ -0,0 +1,27 @@
#!/usr/bin/env python
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
import Tkinter
import Tkconstants
# basic scrolled text widget
class ScrolledText(Tkinter.Text):
def __init__(self, master=None, **kw):
self.frame = Tkinter.Frame(master)
self.vbar = Tkinter.Scrollbar(self.frame)
self.vbar.pack(side=Tkconstants.RIGHT, fill=Tkconstants.Y)
kw.update({'yscrollcommand': self.vbar.set})
Tkinter.Text.__init__(self, self.frame, **kw)
self.pack(side=Tkconstants.LEFT, fill=Tkconstants.BOTH, expand=True)
self.vbar['command'] = self.yview
# Copy geometry methods of self.frame without overriding Text
# methods = hack!
text_meths = vars(Tkinter.Text).keys()
methods = vars(Tkinter.Pack).keys() + vars(Tkinter.Grid).keys() + vars(Tkinter.Place).keys()
methods = set(methods).difference(text_meths)
for m in methods:
if m[0] != '_' and m != 'config' and m != 'configure':
setattr(self, m, getattr(self.frame, m))
def __str__(self):
return str(self.frame)

View File

@@ -0,0 +1,149 @@
#!/usr/bin/env python
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
import os, sys
import signal
import threading
import subprocess
from subprocess import Popen, PIPE, STDOUT
# **heavily** chopped up and modfied version of asyncproc.py
# to make it actually work on Windows as well as Mac/Linux
# For the original see:
# "http://www.lysator.liu.se/~bellman/download/"
# author is "Thomas Bellman <bellman@lysator.liu.se>"
# available under GPL version 3 or Later
# create an asynchronous subprocess whose output can be collected in
# a non-blocking manner
# What a mess! Have to use threads just to get non-blocking io
# in a cross-platform manner
# luckily all thread use is hidden within this class
class Process(object):
def __init__(self, *params, **kwparams):
if len(params) <= 3:
kwparams.setdefault('stdin', subprocess.PIPE)
if len(params) <= 4:
kwparams.setdefault('stdout', subprocess.PIPE)
if len(params) <= 5:
kwparams.setdefault('stderr', subprocess.PIPE)
self.__pending_input = []
self.__collected_outdata = []
self.__collected_errdata = []
self.__exitstatus = None
self.__lock = threading.Lock()
self.__inputsem = threading.Semaphore(0)
self.__quit = False
self.__process = subprocess.Popen(*params, **kwparams)
if self.__process.stdin:
self.__stdin_thread = threading.Thread(
name="stdin-thread",
target=self.__feeder, args=(self.__pending_input,
self.__process.stdin))
self.__stdin_thread.setDaemon(True)
self.__stdin_thread.start()
if self.__process.stdout:
self.__stdout_thread = threading.Thread(
name="stdout-thread",
target=self.__reader, args=(self.__collected_outdata,
self.__process.stdout))
self.__stdout_thread.setDaemon(True)
self.__stdout_thread.start()
if self.__process.stderr:
self.__stderr_thread = threading.Thread(
name="stderr-thread",
target=self.__reader, args=(self.__collected_errdata,
self.__process.stderr))
self.__stderr_thread.setDaemon(True)
self.__stderr_thread.start()
def pid(self):
return self.__process.pid
def kill(self, signal):
self.__process.send_signal(signal)
# check on subprocess (pass in 'nowait') to act like poll
def wait(self, flag):
if flag.lower() == 'nowait':
rc = self.__process.poll()
else:
rc = self.__process.wait()
if rc != None:
if self.__process.stdin:
self.closeinput()
if self.__process.stdout:
self.__stdout_thread.join()
if self.__process.stderr:
self.__stderr_thread.join()
return self.__process.returncode
def terminate(self):
if self.__process.stdin:
self.closeinput()
self.__process.terminate()
# thread gets data from subprocess stdout
def __reader(self, collector, source):
while True:
data = os.read(source.fileno(), 65536)
self.__lock.acquire()
collector.append(data)
self.__lock.release()
if data == "":
source.close()
break
return
# thread feeds data to subprocess stdin
def __feeder(self, pending, drain):
while True:
self.__inputsem.acquire()
self.__lock.acquire()
if not pending and self.__quit:
drain.close()
self.__lock.release()
break
data = pending.pop(0)
self.__lock.release()
drain.write(data)
# non-blocking read of data from subprocess stdout
def read(self):
self.__lock.acquire()
outdata = "".join(self.__collected_outdata)
del self.__collected_outdata[:]
self.__lock.release()
return outdata
# non-blocking read of data from subprocess stderr
def readerr(self):
self.__lock.acquire()
errdata = "".join(self.__collected_errdata)
del self.__collected_errdata[:]
self.__lock.release()
return errdata
# non-blocking write to stdin of subprocess
def write(self, data):
if self.__process.stdin is None:
raise ValueError("Writing to process with stdin not a pipe")
self.__lock.acquire()
self.__pending_input.append(data)
self.__inputsem.release()
self.__lock.release()
# close stdinput of subprocess
def closeinput(self):
self.__lock.acquire()
self.__quit = True
self.__inputsem.release()
self.__lock.release()