tools v6.0.2

2013-04-05 17:44:48 +01:00
parent a2f044e672
commit d586f74faa
72 changed files with 12418 additions and 13112 deletions
--- a/DeDRM_calibre_plugin/DeDRM_plugin/k4mobidedrm.py
+++ b/DeDRM_calibre_plugin/DeDRM_plugin/k4mobidedrm.py
@@ -2,323 +2,266 @@
 # -*- coding: utf-8 -*-

 from __future__ import with_statement
-
-# ignobleepub.pyw, version 3.6
-# Copyright © 2009-2012 by DiapDealer et al.
-
-# engine to remove drm from Kindle for Mac and Kindle for PC books
-# for personal use for archiving and converting your ebooks
-
-# PLEASE DO NOT PIRATE EBOOKS!
-
-# We want all authors and publishers, and eBook stores to live
-# long and prosperous lives but at the same time  we just want to
-# be able to read OUR books on whatever device we want and to keep
-# readable for a long, long time
-
-# This borrows very heavily from works by CMBDTC, IHeartCabbages, skindle,
-#    unswindle, DarkReverser, ApprenticeAlf, DiapDealer, some_updates
-#    and many many others
-# Special thanks to The Dark Reverser for MobiDeDrm and CMBDTC for cmbdtc_dump
-# from which this script borrows most unashamedly.
-
-
-# Changelog
-#  1.0 - Name change to k4mobidedrm. Adds Mac support, Adds plugin code
-#  1.1 - Adds support for additional kindle.info files
-#  1.2 - Better error handling for older Mobipocket
-#  1.3 - Don't try to decrypt Topaz books
-#  1.7 - Add support for Topaz books and Kindle serial numbers. Split code.
-#  1.9 - Tidy up after Topaz, minor exception changes
-#  2.1 - Topaz fix and filename sanitizing
-#  2.2 - Topaz Fix and minor Mac code fix
-#  2.3 - More Topaz fixes
-#  2.4 - K4PC/Mac key generation fix
-#  2.6 - Better handling of non-K4PC/Mac ebooks
-#  2.7 - Better trailing bytes handling in mobidedrm
-#  2.8 - Moved parsing of kindle.info files to mac & pc util files.
-#  3.1 - Updated for new calibre interface. Now __init__ in plugin.
-#  3.5 - Now support Kindle for PC/Mac 1.6
-#  3.6 - Even better trailing bytes handling in mobidedrm
-#  3.7 - Add support for Amazon Print Replica ebooks.
-#  3.8 - Improved Topaz support
-#  4.1 - Improved Topaz support and faster decryption with alfcrypto
-#  4.2 - Added support for Amazon's KF8 format ebooks
-#  4.4 - Linux calls to Wine added, and improved configuration dialog
-#  4.5 - Linux works again without Wine. Some Mac key file search changes
-#  4.6 - First attempt to handle unicode properly
-#  4.7 - Added timing reports, and changed search for Mac key files
-#  4.8 - Much better unicode handling, matching the updated inept and ignoble scripts
-#      - Moved back into plugin, __init__ in plugin now only contains plugin code.
-#  4.9 - Missed some invalid characters in cleanup_name
-#  5.0 - Extraction of info from Kindle for PC/Mac moved into kindlekey.py
-#      - tweaked GetDecryptedBook interface to leave passed parameters unchanged
-#  5.1 - moved unicode_argv call inside main for Windows DeDRM compatibility
-
-__version__ = '5.1'
-
-
-import sys, os, re
-import csv
-import getopt
+import sys
+import os, csv
+import binascii
+import zlib
 import re
+from struct import pack, unpack, unpack_from
 import traceback
-import time
-import htmlentitydefs
-import json

 class DrmException(Exception):
    pass

-if 'calibre' in sys.modules:
-    inCalibre = True
-else:
-    inCalibre = False
-
-if inCalibre:
-    from calibre_plugins.dedrm import mobidedrm
-    from calibre_plugins.dedrm import topazextract
-    from calibre_plugins.dedrm import kgenpids
-else:
-    import mobidedrm
-    import topazextract
-    import kgenpids
-
-# Wrap a stream so that output gets flushed immediately
-# and also make sure that any unicode strings get
-# encoded using "replace" before writing them.
-class SafeUnbuffered:
-    def __init__(self, stream):
-        self.stream = stream
-        self.encoding = stream.encoding
-        if self.encoding == None:
-            self.encoding = "utf-8"
-    def write(self, data):
-        if isinstance(data,unicode):
-            data = data.encode(self.encoding,"replace")
-        self.stream.write(data)
-        self.stream.flush()
-    def __getattr__(self, attr):
-        return getattr(self.stream, attr)
-
-iswindows = sys.platform.startswith('win')
-isosx = sys.platform.startswith('darwin')
-
-def unicode_argv():
-    if iswindows:
-        # Uses shell32.GetCommandLineArgvW to get sys.argv as a list of Unicode
-        # strings.
-
-        # Versions 2.x of Python don't support Unicode in sys.argv on
-        # Windows, with the underlying Windows API instead replacing multi-byte
-        # characters with '?'.
+global charMap1
+global charMap3
+global charMap4


-        from ctypes import POINTER, byref, cdll, c_int, windll
-        from ctypes.wintypes import LPCWSTR, LPWSTR
+charMap1 = 'n5Pr6St7Uv8Wx9YzAb0Cd1Ef2Gh3Jk4M'
+charMap3 = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'
+charMap4 = 'ABCDEFGHIJKLMNPQRSTUVWXYZ123456789'

-        GetCommandLineW = cdll.kernel32.GetCommandLineW
-        GetCommandLineW.argtypes = []
-        GetCommandLineW.restype = LPCWSTR
+# crypto digestroutines
+import hashlib

-        CommandLineToArgvW = windll.shell32.CommandLineToArgvW
-        CommandLineToArgvW.argtypes = [LPCWSTR, POINTER(c_int)]
-        CommandLineToArgvW.restype = POINTER(LPWSTR)
+def MD5(message):
+    ctx = hashlib.md5()
+    ctx.update(message)
+    return ctx.digest()

-        cmd = GetCommandLineW()
-        argc = c_int(0)
-        argv = CommandLineToArgvW(cmd, byref(argc))
-        if argc.value > 0:
-            # Remove Python executable and commands if present
-            start = argc.value - len(sys.argv)
-            return [argv[i] for i in
-                    xrange(start, argc.value)]
-        # if we don't have any arguments at all, just pass back script name
-        # this should never happen
-        return [u"mobidedrm.py"]
-    else:
-        argvencoding = sys.stdin.encoding
-        if argvencoding == None:
-            argvencoding = "utf-8"
-        return [arg if (type(arg) == unicode) else unicode(arg,argvencoding) for arg in sys.argv]
-
-# cleanup unicode filenames
-# borrowed from calibre from calibre/src/calibre/__init__.py
-# added in removal of control (<32) chars
-# and removal of . at start and end
-# and with some (heavily edited) code from Paul Durrant's kindlenamer.py
-def cleanup_name(name):
-    # substitute filename unfriendly characters
-    name = name.replace(u"<",u"[").replace(u">",u"]").replace(u" : ",u" – ").replace(u": ",u" – ").replace(u":",u"—").replace(u"/",u"_").replace(u"\\",u"_").replace(u"|",u"_").replace(u"\"",u"\'").replace(u"*",u"_").replace(u"?",u"")
-    # delete control characters
-    name = u"".join(char for char in name if ord(char)>=32)
-    # white space to single space, delete leading and trailing while space
-    name = re.sub(ur"\s", u" ", name).strip()
-    # remove leading dots
-    while len(name)>0 and name[0] == u".":
-        name = name[1:]
-    # remove trailing dots (Windows doesn't like them)
-    if name.endswith(u'.'):
-        name = name[:-1]
-    return name
-
-# must be passed unicode
-def unescape(text):
-    def fixup(m):
-        text = m.group(0)
-        if text[:2] == u"&#":
-            # character reference
-            try:
-                if text[:3] == u"&#x":
-                    return unichr(int(text[3:-1], 16))
-                else:
-                    return unichr(int(text[2:-1]))
-            except ValueError:
-                pass
-        else:
-            # named entity
-            try:
-                text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
-            except KeyError:
-                pass
-        return text # leave as is
-    return re.sub(u"&#?\w+;", fixup, text)
-
-def GetDecryptedBook(infile, kDatabases, serials, pids, starttime = time.time()):
-    # handle the obvious cases at the beginning
-    if not os.path.isfile(infile):
-        raise DRMException (u"Input file does not exist.")
-
-    mobi = True
-    magic3 = open(infile,'rb').read(3)
-    if magic3 == 'TPZ':
-        mobi = False
-
-    if mobi:
-        mb = mobidedrm.MobiBook(infile)
-    else:
-        mb = topazextract.TopazBook(infile)
-
-    bookname = unescape(mb.getBookTitle())
-    print u"Decrypting {1} ebook: {0}".format(bookname, mb.getBookType())
-
-    # copy list of pids
-    totalpids = list(pids)
-    # extend PID list with book-specific PIDs
-    md1, md2 = mb.getPIDMetaInfo()
-    totalpids.extend(kgenpids.getPidList(md1, md2, serials, kDatabases))
-    print u"Found {1:d} keys to try after {0:.1f} seconds".format(time.time()-starttime, len(totalpids))
-
-    try:
-        mb.processBook(totalpids)
-    except:
-        mb.cleanup
-        raise
-
-    print u"Decryption succeeded after {0:.1f} seconds".format(time.time()-starttime)
-    return mb
+def SHA1(message):
+    ctx = hashlib.sha1()
+    ctx.update(message)
+    return ctx.digest()


-# kDatabaseFiles is a list of files created by kindlekey
-def decryptBook(infile, outdir, kDatabaseFiles, serials, pids):
-    starttime = time.time()
-    kDatabases = []
-    for dbfile in kDatabaseFiles:
-        kindleDatabase = {}
-        try:
-            with open(dbfile, 'r') as keyfilein:
-                kindleDatabase = json.loads(keyfilein.read())
-            kDatabases.append([dbfile,kindleDatabase])
-        except Exception, e:
-            print u"Error getting database from file {0:s}: {1:s}".format(dbfile,e)
-            traceback.print_exc()
+# Encode the bytes in data with the characters in map
+def encode(data, map):
+    result = ''
+    for char in data:
+        value = ord(char)
+        Q = (value ^ 0x80) // len(map)
+        R = value % len(map)
+        result += map[Q]
+        result += map[R]
+    return result

+# Hash the bytes in data and then encode the digest with the characters in map
+def encodeHash(data,map):
+    return encode(MD5(data),map)

-
-    try:
-        book = GetDecryptedBook(infile, kDatabases, serials, pids, starttime)
-    except Exception, e:
-        print u"Error decrypting book after {1:.1f} seconds: {0}".format(e.args[0],time.time()-starttime)
-        traceback.print_exc()
-        return 1
-
-    # if we're saving to the same folder as the original, use file name_
-    # if to a different folder, use book name
-    if os.path.normcase(os.path.normpath(outdir)) == os.path.normcase(os.path.normpath(os.path.dirname(infile))):
-        outfilename = os.path.splitext(os.path.basename(infile))[0]
-    else:
-        outfilename = cleanup_name(book.getBookTitle())
-
-    # avoid excessively long file names
-    if len(outfilename)>150:
-        outfilename = outfilename[:150]
-
-    outfilename = outfilename+u"_nodrm"
-    outfile = os.path.join(outdir, outfilename + book.getBookExtension())
-
-    book.getFile(outfile)
-    print u"Saved decrypted book {1:s} after {0:.1f} seconds".format(time.time()-starttime, outfilename)
-
-    if book.getBookType()==u"Topaz":
-        zipname = os.path.join(outdir, outfilename + u"_SVG.zip")
-        book.getSVGZip(zipname)
-        print u"Saved SVG ZIP Archive for {1:s} after {0:.1f} seconds".format(time.time()-starttime, outfilename)
-
-    # remove internal temporary directory of Topaz pieces
-    book.cleanup()
-    return 0
-
-
-def usage(progname):
-    print u"Removes DRM protection from Mobipocket, Amazon KF8, Amazon Print Replica and Amazon Topaz ebooks"
-    print u"Usage:"
-    print u"    {0} [-k <kindle.k4i>] [-p <comma separated PIDs>] [-s <comma separated Kindle serial numbers>] <infile> <outdir>".format(progname)
+# Decode the string in data with the characters in map. Returns the decoded bytes
+def decode(data,map):
+    result = ''
+    for i in range (0,len(data)-1,2):
+        high = map.find(data[i])
+        low = map.find(data[i+1])
+        if (high == -1) or (low == -1) :
+            break
+        value = (((high * len(map)) ^ 0x80) & 0xFF) + low
+        result += pack('B',value)
+    return result

 #
-# Main
+# PID generation routines
 #
-def cli_main():
-    argv=unicode_argv()
-    progname = os.path.basename(argv[0])
-    print u"K4MobiDeDrm v{0}.\nCopyright © 2008-2013 The Dark Reverser et al.".format(__version__)

-    try:
-        opts, args = getopt.getopt(sys.argv[1:], "k:p:s:")
-    except getopt.GetoptError, err:
-        print u"Error in options or arguments: {0}".format(err.args[0])
-        usage(progname)
-        sys.exit(2)
-    if len(args)<2:
-        usage(progname)
-        sys.exit(2)
+# Returns two bit at offset from a bit field
+def getTwoBitsFromBitField(bitField,offset):
+    byteNumber = offset // 4
+    bitPosition = 6 - 2*(offset % 4)
+    return ord(bitField[byteNumber]) >> bitPosition & 3

-    infile = args[0]
-    outdir = args[1]
-    kDatabaseFiles = []
-    serials = []
+# Returns the six bits at offset from a bit field
+def getSixBitsFromBitField(bitField,offset):
+    offset *= 3
+    value = (getTwoBitsFromBitField(bitField,offset) <<4) + (getTwoBitsFromBitField(bitField,offset+1) << 2) +getTwoBitsFromBitField(bitField,offset+2)
+    return value
+
+# 8 bits to six bits encoding from hash to generate PID string
+def encodePID(hash):
+    global charMap3
+    PID = ''
+    for position in range (0,8):
+        PID += charMap3[getSixBitsFromBitField(hash,position)]
+    return PID
+
+# Encryption table used to generate the device PID
+def generatePidEncryptionTable() :
+    table = []
+    for counter1 in range (0,0x100):
+        value = counter1
+        for counter2 in range (0,8):
+            if (value & 1 == 0) :
+                value = value >> 1
+            else :
+                value = value >> 1
+                value = value ^ 0xEDB88320
+        table.append(value)
+    return table
+
+# Seed value used to generate the device PID
+def generatePidSeed(table,dsn) :
+    value = 0
+    for counter in range (0,4) :
+        index = (ord(dsn[counter]) ^ value) &0xFF
+        value = (value >> 8) ^ table[index]
+    return value
+
+# Generate the device PID
+def generateDevicePID(table,dsn,nbRoll):
+    global charMap4
+    seed = generatePidSeed(table,dsn)
+    pidAscii = ''
+    pid = [(seed >>24) &0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF,(seed>>24) & 0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF]
+    index = 0
+    for counter in range (0,nbRoll):
+        pid[index] = pid[index] ^ ord(dsn[counter])
+        index = (index+1) %8
+    for counter in range (0,8):
+        index = ((((pid[counter] >>5) & 3) ^ pid[counter]) & 0x1f) + (pid[counter] >> 7)
+        pidAscii += charMap4[index]
+    return pidAscii
+
+def crc32(s):
+    return (~binascii.crc32(s,-1))&0xFFFFFFFF
+
+# convert from 8 digit PID to 10 digit PID with checksum
+def checksumPid(s):
+    global charMap4
+    crc = crc32(s)
+    crc = crc ^ (crc >> 16)
+    res = s
+    l = len(charMap4)
+    for i in (0,1):
+        b = crc & 0xff
+        pos = (b // l) ^ (b % l)
+        res += charMap4[pos%l]
+        crc >>= 8
+    return res
+
+
+# old kindle serial number to fixed pid
+def pidFromSerial(s, l):
+    global charMap4
+    crc = crc32(s)
+    arr1 = [0]*l
+    for i in xrange(len(s)):
+        arr1[i%l] ^= ord(s[i])
+    crc_bytes = [crc >> 24 & 0xff, crc >> 16 & 0xff, crc >> 8 & 0xff, crc & 0xff]
+    for i in xrange(l):
+        arr1[i] ^= crc_bytes[i&3]
+    pid = ""
+    for i in xrange(l):
+        b = arr1[i] & 0xff
+        pid+=charMap4[(b >> 7) + ((b >> 5 & 3) ^ (b & 0x1f))]
+    return pid
+
+
+# Parse the EXTH header records and use the Kindle serial number to calculate the book pid.
+def getKindlePids(rec209, token, serialnum):
+    pids=[]
+
+    if isinstance(serialnum,unicode):
+        serialnum = serialnum.encode('ascii')
+
+    # Compute book PID
+    pidHash = SHA1(serialnum+rec209+token)
+    bookPID = encodePID(pidHash)
+    bookPID = checksumPid(bookPID)
+    pids.append(bookPID)
+
+    # compute fixed pid for old pre 2.5 firmware update pid as well
+    kindlePID = pidFromSerial(serialnum, 7) + "*"
+    kindlePID = checksumPid(kindlePID)
+    pids.append(kindlePID)
+
+    return pids
+
+
+# parse the Kindleinfo file to calculate the book pid.
+
+keynames = ['kindle.account.tokens','kindle.cookie.item','eulaVersionAccepted','login_date','kindle.token.item','login','kindle.key.item','kindle.name.info','kindle.device.info', 'MazamaRandomNumber']
+
+def getK4Pids(rec209, token, kindleDatabase):
+    global charMap1
    pids = []

-    for o, a in opts:
-        if o == "-k":
-            if a == None :
-                raise DrmException("Invalid parameter for -k")
-            kDatabaseFiles.append(a)
-        if o == "-p":
-            if a == None :
-                raise DrmException("Invalid parameter for -p")
-            pids = a.split(',')
-        if o == "-s":
-            if a == None :
-                raise DrmException("Invalid parameter for -s")
-            serials = a.split(',')
+    try:
+        # Get the Mazama Random number
+        MazamaRandomNumber = (kindleDatabase[1])['MazamaRandomNumber'].decode('hex').encode('ascii')

-    # try with built in Kindle Info files if not on Linux
-    k4 = not sys.platform.startswith('linux')
+        # Get the kindle account token
+        kindleAccountToken = (kindleDatabase[1])['kindle.account.tokens'].decode('hex').encode('ascii')

-    return decryptBook(infile, outdir, kDatabaseFiles, serials, pids)
+        # Get the IDString used to decode the Kindle Info file
+        IDString = (kindleDatabase[1])['IDString'].decode('hex').encode('ascii')

+        # Get the UserName stored when the Kindle Info file was decoded
+        UserName = (kindleDatabase[1])['UserName'].decode('hex').encode('ascii')

-if __name__ == '__main__':
-    sys.stdout=SafeUnbuffered(sys.stdout)
-    sys.stderr=SafeUnbuffered(sys.stderr)
-    sys.exit(cli_main())
+    except KeyError:
+        print u"Keys not found in the database {0}.".format(kindleDatabase[0])
+        return pids
+
+    # Get the ID string used
+    encodedIDString = encodeHash(IDString,charMap1)
+
+    # Get the current user name
+    encodedUsername = encodeHash(UserName,charMap1)
+
+    # concat, hash and encode to calculate the DSN
+    DSN = encode(SHA1(MazamaRandomNumber+encodedIDString+encodedUsername),charMap1)
+
+    # Compute the device PID (for which I can tell, is used for nothing).
+    table =  generatePidEncryptionTable()
+    devicePID = generateDevicePID(table,DSN,4)
+    devicePID = checksumPid(devicePID)
+    pids.append(devicePID)
+
+    # Compute book PIDs
+
+    # book pid
+    pidHash = SHA1(DSN+kindleAccountToken+rec209+token)
+    bookPID = encodePID(pidHash)
+    bookPID = checksumPid(bookPID)
+    pids.append(bookPID)
+
+    # variant 1
+    pidHash = SHA1(kindleAccountToken+rec209+token)
+    bookPID = encodePID(pidHash)
+    bookPID = checksumPid(bookPID)
+    pids.append(bookPID)
+
+    # variant 2
+    pidHash = SHA1(DSN+rec209+token)
+    bookPID = encodePID(pidHash)
+    bookPID = checksumPid(bookPID)
+    pids.append(bookPID)
+
+    return pids
+
+def getPidList(md1, md2, serials=[], kDatabases=[]):
+    pidlst = []
+
+    if kDatabases is None:
+        kDatabases = []
+    if serials is None:
+        serials = []
+
+    for kDatabase in kDatabases:
+        try:
+            pidlst.extend(getK4Pids(md1, md2, kDatabase))
+        except Exception, e:
+            print u"Error getting PIDs from database {0}: {1}".format(kDatabase[0],e.args[0])
+            traceback.print_exc()
+
+    for serialnum in serials:
+        try:
+            pidlst.extend(getKindlePids(md1, md2, serialnum))
+        except Exception, e:
+            print u"Error getting PIDs from serial number {0}: {1}".format(serialnum ,e.args[0])
+            traceback.print_exc()
+
+    return pidlst