tools v3.0
First combined mobi/topaz kindle tool
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
property prefsFileName : "com.apprenticealf.dedrm.plist"
|
||||
property prefsFolderName : "com.apprenticealf.dedrm"
|
||||
property handledExtensions : {"epub", "pdf", "prc", "azw", "mobi", "pdb", "der", "b64"}
|
||||
property handledExtensions : {"epub", "pdf", "prc", "azw", "azw1", "mobi", "pdb", "der", "b64"}
|
||||
|
||||
global eReaderTool
|
||||
global MobipocketTool
|
||||
@@ -15,6 +15,7 @@ global ProgressApp
|
||||
global PIDs
|
||||
global bnKeys
|
||||
global KindleInfoList
|
||||
global KindleSerialList
|
||||
global AdeptKeyList
|
||||
|
||||
global ErrorList
|
||||
@@ -73,16 +74,12 @@ on unlockmobifile(encryptedFile)
|
||||
set TOPAZ to read file encryptedFile from 1 for 4
|
||||
end try
|
||||
set ErrorCount to ErrorCount + 1
|
||||
if TOPAZ is "TPZ0" then
|
||||
set ErrorList to ErrorList & encryptedFile & " is a TOPAZ file.
|
||||
|
||||
"
|
||||
else
|
||||
set ErrorList to ErrorList & encryptedFile & " is not a Mobipocket file.
|
||||
if TOPAZ is not "TPZ0" then
|
||||
set ErrorList to ErrorList & encryptedFile & " is neither a Mobipocket nor a TOPAZ file.
|
||||
|
||||
"
|
||||
return
|
||||
end if
|
||||
return
|
||||
end if
|
||||
set encryptedFilePath to POSIX path of file encryptedFile
|
||||
tell application "Finder"
|
||||
@@ -95,14 +92,16 @@ on unlockmobifile(encryptedFile)
|
||||
set fileExtension to "." & the last text item of fileName
|
||||
set fileName to (text items 1 through -2 of fileName) as string
|
||||
end if
|
||||
set unlockedFilePath to POSIX path of file (parent_folder & fileName & "_dedrmed" & fileExtension)
|
||||
set unlockedFileParentFolderPath to POSIX path of file parent_folder
|
||||
set shellcommand to "python " & (quoted form of MobipocketTool)
|
||||
repeat with KindleInfoPath in KindleInfoList
|
||||
set shellcommand to shellcommand & " -k " & quoted form of KindleInfoPath
|
||||
end repeat
|
||||
set Serialstring to GetSerialstring()
|
||||
if Serialstring is not "" then set shellcommand to shellcommand & " -s " & Serialstring
|
||||
set PIDstring to GetPIDstring()
|
||||
if PIDstring is not "" then set shellcommand to shellcommand & " -p " & PIDstring
|
||||
set shellcommand to shellcommand & " " & (quoted form of encryptedFilePath) & " " & (quoted form of unlockedFilePath)
|
||||
set shellcommand to shellcommand & " " & (quoted form of encryptedFilePath) & " " & (quoted form of unlockedFileParentFolderPath)
|
||||
--display dialog "shellcommand: " default answer shellcommand buttons {"OK"} default button 1 giving up after 10
|
||||
try
|
||||
set shellresult to do shell script shellcommand
|
||||
@@ -450,7 +449,7 @@ on handlefile(droppedFile)
|
||||
set fileExtension to the last text item of fileName
|
||||
set fileName to (text items 1 through -2 of fileName) as string
|
||||
end if
|
||||
if fileExtension is "prc" or fileExtension is "mobi" or fileExtension is "azw" then
|
||||
if fileExtension is "prc" or fileExtension is "mobi" or fileExtension is "azw" or fileExtension is "azw1" then
|
||||
set completedebooks to completedebooks + 1
|
||||
IncProgress(fileName, completedebooks)
|
||||
unlockmobifile(droppedFile as text)
|
||||
@@ -505,6 +504,18 @@ on countfile(droppedFile)
|
||||
end if
|
||||
end countfile
|
||||
|
||||
on GetSerialstring()
|
||||
set Serialstring to ""
|
||||
repeat with Serial in KindleSerialList
|
||||
if Serialstring is "" then
|
||||
set Serialstring to Serial
|
||||
else
|
||||
set Serialstring to Serialstring & "," & Serial
|
||||
end if
|
||||
end repeat
|
||||
return Serialstring
|
||||
end GetSerialstring
|
||||
|
||||
on GetPIDstring()
|
||||
set PIDstring to ""
|
||||
repeat with PID in PIDs
|
||||
@@ -580,7 +591,7 @@ on GetPIDs()
|
||||
Enter any additional Mobipocket PIDs for your Mobipocket books one at a time:"
|
||||
set FinishedButton to "No More"
|
||||
end if
|
||||
set dialogresult to (display dialog DialogPrompt default answer "" buttons {"Delete All", "Add", FinishedButton} with title "DeDRM Applescript 2/5" default button 2)
|
||||
set dialogresult to (display dialog DialogPrompt default answer "" buttons {"Delete All", "Add", FinishedButton} with title "DeDRM Applescript 2/6" default button 2)
|
||||
if button returned of dialogresult is "Add" then
|
||||
set PID to text returned of dialogresult
|
||||
set PIDlength to length of PID
|
||||
@@ -604,6 +615,42 @@ Enter any additional Mobipocket PIDs for your Mobipocket books one at a time:"
|
||||
end repeat
|
||||
end GetPIDs
|
||||
|
||||
on GetSerials()
|
||||
repeat
|
||||
set Serialstring to GetSerialstring()
|
||||
if Serialstring is "" then
|
||||
set DialogPrompt to "Enter any Kindle Serial Numbers one at a time:"
|
||||
set FinishedButton to "None"
|
||||
else
|
||||
set DialogPrompt to "Current Kindle Serial Numbers: " & Serialstring & ".
|
||||
|
||||
Enter any additional Kindle Serial Numbers one at a time:"
|
||||
set FinishedButton to "No More"
|
||||
end if
|
||||
set dialogresult to (display dialog DialogPrompt default answer "" buttons {"Delete All", "Add", FinishedButton} with title "DeDRM Applescript 3/6" default button 2)
|
||||
if button returned of dialogresult is "Add" then
|
||||
set Serial to text returned of dialogresult
|
||||
set Seriallength to length of Serial
|
||||
if Seriallength is 16 and (first character of Serial) is "B" then
|
||||
set KindleSerialList to KindleSerialList & Serial
|
||||
else
|
||||
display dialog "Kindle Serial Numbers are 16 characters long and start with B." buttons {"OK"} default button 1 with title "DeDRM Applescript" with icon caution
|
||||
end if
|
||||
else if button returned of dialogresult is "Delete All" then
|
||||
if Serialstring is not "" then
|
||||
try
|
||||
set dialogresult to (display dialog "Are you sure you want to delete all stored Kindle Serial Numbers?" buttons {"Cancel", "Delete"} default button 1 with title "DeDRM Applescript")
|
||||
end try
|
||||
if button returned of dialogresult is "Delete" then
|
||||
set Serialstring to {}
|
||||
end if
|
||||
end if
|
||||
else
|
||||
exit repeat
|
||||
end if
|
||||
end repeat
|
||||
end GetSerials
|
||||
|
||||
on GetKindleInfoFiles()
|
||||
repeat
|
||||
set KInfostring to GetKindleInfostring()
|
||||
@@ -647,7 +694,7 @@ on GetIneptPDF(always)
|
||||
try
|
||||
tell me to activate
|
||||
if (always) then
|
||||
set promptstring to "DeDRM Applescript 5/5
|
||||
set promptstring to "DeDRM Applescript 6/6
|
||||
"
|
||||
else
|
||||
set promptstring to "DeDRM Applescript
|
||||
@@ -688,7 +735,7 @@ Please enter any additional "
|
||||
set DialogPrompt to DialogPrompt & "eReader/Barnes & Noble Name,Number key pairs one at a time. If you're only decoding eReader files, the last 8 digits of the Number will do. The full 15 or 16 are only needed for Barnes & Noble ePubs. Only the last eight will be stored or displayed. Please separate the name and number with a comma and click \"Add\". Or to add a an already generated .b64 file, just click \"Add\" with nothing in the text field."
|
||||
set dialogtitle to "DeDRM Applescript"
|
||||
if (running) then
|
||||
set dialogtitle to dialogtitle & " 3/5"
|
||||
set dialogtitle to dialogtitle & " 4/6"
|
||||
end if
|
||||
set dialogresult to (display dialog DialogPrompt default answer bnKeyText buttons {"Delete All", "Add", FinishedButton} with title dialogtitle default button 2)
|
||||
if button returned of dialogresult is "Add" then
|
||||
@@ -776,7 +823,7 @@ on GetAdeptKeyFiles()
|
||||
To add extra key files (.der), click the Add<64> button."
|
||||
set FinishedButton to "No More"
|
||||
end if
|
||||
set dialogresult to (display dialog DialogPrompt buttons {"Forget All", "Add<64>", FinishedButton} with title "DeDRM Applescript 4/5" default button 2)
|
||||
set dialogresult to (display dialog DialogPrompt buttons {"Forget All", "Add<64>", FinishedButton} with title "DeDRM Applescript 5/6" default button 2)
|
||||
if button returned of dialogresult is "Add<64>" then
|
||||
try
|
||||
set newFile to (choose file with prompt "Please select an Adept key file") as text
|
||||
@@ -957,12 +1004,19 @@ on ReadPrefs()
|
||||
set PIDs to {}
|
||||
set bnKeys to {}
|
||||
set KindleInfoList to {}
|
||||
set KindleSerialList to {}
|
||||
set AdeptKeyList to {}
|
||||
if fileexists(POSIX path of file preferencesFilePath) then
|
||||
tell application "System Events"
|
||||
try
|
||||
set PIDs to value of property list item "PIDs" of property list file preferencesFilePath
|
||||
end try
|
||||
try
|
||||
set KindleSerialList to value of property list item "KindleSerials" of property list file preferencesFilePath
|
||||
end try
|
||||
try
|
||||
set KindleInfoList to value of property list item "KindleInfoFiles" of property list file preferencesFilePath
|
||||
end try
|
||||
try
|
||||
set bnKeys to value of property list item "bnKeys" of property list file preferencesFilePath
|
||||
end try
|
||||
@@ -995,6 +1049,8 @@ on WritePrefs()
|
||||
set the base_dict to make new property list item with properties {kind:record}
|
||||
set myPrefs to make new property list file with properties {contents:base_dict, name:preferencesFilePath}
|
||||
make new property list item at end of property list items of contents of myPrefs with properties {kind:list, name:"PIDs", value:PIDs}
|
||||
make new property list item at end of property list items of contents of myPrefs with properties {kind:list, name:"KindleSerials", value:KindleSerialList}
|
||||
make new property list item at end of property list items of contents of myPrefs with properties {kind:list, name:"KindleInfoFiles", value:KindleInfoList}
|
||||
make new property list item at end of property list items of contents of myPrefs with properties {kind:list, name:"bnKeys", value:bnKeys}
|
||||
make new property list item at end of property list items of contents of myPrefs with properties {kind:list, name:"AdeptKeys", value:AdeptKeyList}
|
||||
make new property list item at end of property list items of contents of myPrefs with properties {kind:string, name:"IneptPDF", value:AdobePDFTool}
|
||||
@@ -1047,13 +1103,13 @@ on run
|
||||
if GetTools() then
|
||||
display dialog "Drag&Drop encrypted ebooks onto this AppleScript's icon in the Finder to decode them after you have finished configuring it and it has quit.
|
||||
|
||||
Click the Continue button to enter any PIDs for Mobipocket/Kindle ebooks, to enter name/number key pairs for Barnes & Noble/eReader ebooks, and to select Barnes & Noble .b64 key files and Adobe Adept .der key files.
|
||||
Click the Continue button to enter any PIDs for Mobipocket ebooks; serial numbers for Kindle ebooks; name,number key pairs for Barnes & Noble/eReader ebooks; to select extra Barnes & Noble .b64 key files; to select extra Adobe Adept .der key files; and to find th optional ineptpdf.pyw script.
|
||||
|
||||
***You do not need to enter any extra info if decoding ebooks downloaded to your installation of Kindle for Mac, or Adobe Digital Editions. If you do not have any PIDS, name/number keys or .b64 or .der files to add, just click the Cancel button.***
|
||||
***You do not need to enter any extra info if decoding ebooks downloaded to your installation of Kindle for Mac, or Adobe Digital Editions. If you do not have any PIDS; serial numbers; name,number keys, .b64 or .der files to add or want to decode PDF files, just click the Cancel button.***
|
||||
|
||||
Please only use to get access to your own books. Authors, publishers and ebook stores need to make money to produce more ebooks. Don't cheat them.
|
||||
|
||||
This AppleScript is by Apprentice Alf and uses python scripts produced by CMBDTC, IHeartCabbages, DarkReverser, DiapDealer, some_updates, ApprenticeAlf and others.
|
||||
This AppleScript is by Apprentice Alf and uses python scripts produced by CMBDTC, IHeartCabbages, DarkReverser, DiapDealer, some_updates, Apprentice Alf and others.
|
||||
|
||||
This AppleScript (but not necessarily the enclosed python scripts) is free and unencumbered software released into the public domain.
|
||||
|
||||
@@ -1061,11 +1117,13 @@ Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
|
||||
|
||||
For more information, please refer to
|
||||
<http://unlicense.org/>
|
||||
" with title "DeDRM Applescript 1/5" buttons {"Cancel", "Continue"} default button 2
|
||||
" with title "DeDRM Applescript 1/6" buttons {"Cancel", "Continue"} default button 2
|
||||
ReadPrefs()
|
||||
GetPIDs()
|
||||
GetKeys(true)
|
||||
GetAdeptKey(true)
|
||||
|
||||
GetPIDs()
|
||||
GetSerials()
|
||||
GetKeys(true)
|
||||
GetAdeptKeyFiles()
|
||||
GetIneptPDF(true)
|
||||
--GetKindleInfoFiles()
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
<key>CFBundleExecutable</key>
|
||||
<string>droplet</string>
|
||||
<key>CFBundleGetInfoString</key>
|
||||
<string>DeDRM 1.3, Copyright © 2010 by Apprentice Alf.</string>
|
||||
<string>DeDRM 1.4, Copyright © 2010 by Apprentice Alf.</string>
|
||||
<key>CFBundleIconFile</key>
|
||||
<string>droplet</string>
|
||||
<key>CFBundleInfoDictionaryVersion</key>
|
||||
@@ -34,11 +34,11 @@
|
||||
<key>CFBundlePackageType</key>
|
||||
<string>APPL</string>
|
||||
<key>CFBundleShortVersionString</key>
|
||||
<string>1.3</string>
|
||||
<key>CFBundleSignature</key>
|
||||
<string>dplt</string>
|
||||
<string>1.4</string>
|
||||
<key>LSMinimumSystemVersion</key>
|
||||
<string>10.5.0</string>
|
||||
<key>CFBundleSignature</key>
|
||||
<string>dplt</string>
|
||||
<key>LSRequiresCarbon</key>
|
||||
<true/>
|
||||
<key>WindowState</key>
|
||||
|
||||
Binary file not shown.
@@ -0,0 +1,900 @@
|
||||
#! /usr/bin/python
|
||||
|
||||
"""
|
||||
|
||||
Comprehensive Mazama Book DRM with Topaz Cryptography V2.2
|
||||
|
||||
-----BEGIN PUBLIC KEY-----
|
||||
MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDdBHJ4CNc6DNFCw4MRCw4SWAK6
|
||||
M8hYfnNEI0yQmn5Ti+W8biT7EatpauE/5jgQMPBmdNrDr1hbHyHBSP7xeC2qlRWC
|
||||
B62UCxeu/fpfnvNHDN/wPWWH4jynZ2M6cdcnE5LQ+FfeKqZn7gnG2No1U9h7oOHx
|
||||
y2/pHuYme7U1TsgSjwIDAQAB
|
||||
-----END PUBLIC KEY-----
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import with_statement
|
||||
|
||||
import csv
|
||||
import sys
|
||||
import os
|
||||
import getopt
|
||||
import zlib
|
||||
from struct import pack
|
||||
from struct import unpack
|
||||
from ctypes import windll, c_char_p, c_wchar_p, c_uint, POINTER, byref, \
|
||||
create_unicode_buffer, create_string_buffer, CFUNCTYPE, addressof, \
|
||||
string_at, Structure, c_void_p, cast
|
||||
import _winreg as winreg
|
||||
import Tkinter
|
||||
import Tkconstants
|
||||
import tkMessageBox
|
||||
import traceback
|
||||
import hashlib
|
||||
|
||||
MAX_PATH = 255
|
||||
|
||||
kernel32 = windll.kernel32
|
||||
advapi32 = windll.advapi32
|
||||
crypt32 = windll.crypt32
|
||||
|
||||
global kindleDatabase
|
||||
global bookFile
|
||||
global bookPayloadOffset
|
||||
global bookHeaderRecords
|
||||
global bookMetadata
|
||||
global bookKey
|
||||
global command
|
||||
|
||||
#
|
||||
# Various character maps used to decrypt books. Probably supposed to act as obfuscation
|
||||
#
|
||||
|
||||
charMap1 = "n5Pr6St7Uv8Wx9YzAb0Cd1Ef2Gh3Jk4M"
|
||||
charMap2 = "AaZzB0bYyCc1XxDdW2wEeVv3FfUuG4g-TtHh5SsIiR6rJjQq7KkPpL8lOoMm9Nn_"
|
||||
charMap3 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
|
||||
charMap4 = "ABCDEFGHIJKLMNPQRSTUVWXYZ123456789"
|
||||
|
||||
#
|
||||
# Exceptions for all the problems that might happen during the script
|
||||
#
|
||||
|
||||
class CMBDTCError(Exception):
|
||||
pass
|
||||
|
||||
class CMBDTCFatal(Exception):
|
||||
pass
|
||||
|
||||
#
|
||||
# Stolen stuff
|
||||
#
|
||||
|
||||
class DataBlob(Structure):
|
||||
_fields_ = [('cbData', c_uint),
|
||||
('pbData', c_void_p)]
|
||||
DataBlob_p = POINTER(DataBlob)
|
||||
|
||||
def GetSystemDirectory():
|
||||
GetSystemDirectoryW = kernel32.GetSystemDirectoryW
|
||||
GetSystemDirectoryW.argtypes = [c_wchar_p, c_uint]
|
||||
GetSystemDirectoryW.restype = c_uint
|
||||
def GetSystemDirectory():
|
||||
buffer = create_unicode_buffer(MAX_PATH + 1)
|
||||
GetSystemDirectoryW(buffer, len(buffer))
|
||||
return buffer.value
|
||||
return GetSystemDirectory
|
||||
GetSystemDirectory = GetSystemDirectory()
|
||||
|
||||
|
||||
def GetVolumeSerialNumber():
|
||||
GetVolumeInformationW = kernel32.GetVolumeInformationW
|
||||
GetVolumeInformationW.argtypes = [c_wchar_p, c_wchar_p, c_uint,
|
||||
POINTER(c_uint), POINTER(c_uint),
|
||||
POINTER(c_uint), c_wchar_p, c_uint]
|
||||
GetVolumeInformationW.restype = c_uint
|
||||
def GetVolumeSerialNumber(path):
|
||||
vsn = c_uint(0)
|
||||
GetVolumeInformationW(path, None, 0, byref(vsn), None, None, None, 0)
|
||||
return vsn.value
|
||||
return GetVolumeSerialNumber
|
||||
GetVolumeSerialNumber = GetVolumeSerialNumber()
|
||||
|
||||
|
||||
def GetUserName():
|
||||
GetUserNameW = advapi32.GetUserNameW
|
||||
GetUserNameW.argtypes = [c_wchar_p, POINTER(c_uint)]
|
||||
GetUserNameW.restype = c_uint
|
||||
def GetUserName():
|
||||
buffer = create_unicode_buffer(32)
|
||||
size = c_uint(len(buffer))
|
||||
while not GetUserNameW(buffer, byref(size)):
|
||||
buffer = create_unicode_buffer(len(buffer) * 2)
|
||||
size.value = len(buffer)
|
||||
return buffer.value.encode('utf-16-le')[::2]
|
||||
return GetUserName
|
||||
GetUserName = GetUserName()
|
||||
|
||||
|
||||
def CryptUnprotectData():
|
||||
_CryptUnprotectData = crypt32.CryptUnprotectData
|
||||
_CryptUnprotectData.argtypes = [DataBlob_p, c_wchar_p, DataBlob_p,
|
||||
c_void_p, c_void_p, c_uint, DataBlob_p]
|
||||
_CryptUnprotectData.restype = c_uint
|
||||
def CryptUnprotectData(indata, entropy):
|
||||
indatab = create_string_buffer(indata)
|
||||
indata = DataBlob(len(indata), cast(indatab, c_void_p))
|
||||
entropyb = create_string_buffer(entropy)
|
||||
entropy = DataBlob(len(entropy), cast(entropyb, c_void_p))
|
||||
outdata = DataBlob()
|
||||
if not _CryptUnprotectData(byref(indata), None, byref(entropy),
|
||||
None, None, 0, byref(outdata)):
|
||||
raise CMBDTCFatal("Failed to Unprotect Data")
|
||||
return string_at(outdata.pbData, outdata.cbData)
|
||||
return CryptUnprotectData
|
||||
CryptUnprotectData = CryptUnprotectData()
|
||||
|
||||
#
|
||||
# Returns the MD5 digest of "message"
|
||||
#
|
||||
|
||||
def MD5(message):
|
||||
ctx = hashlib.md5()
|
||||
ctx.update(message)
|
||||
return ctx.digest()
|
||||
|
||||
#
|
||||
# Returns the MD5 digest of "message"
|
||||
#
|
||||
|
||||
def SHA1(message):
|
||||
ctx = hashlib.sha1()
|
||||
ctx.update(message)
|
||||
return ctx.digest()
|
||||
|
||||
#
|
||||
# Open the book file at path
|
||||
#
|
||||
|
||||
def openBook(path):
|
||||
try:
|
||||
return open(path,'rb')
|
||||
except:
|
||||
raise CMBDTCFatal("Could not open book file: " + path)
|
||||
#
|
||||
# Encode the bytes in data with the characters in map
|
||||
#
|
||||
|
||||
def encode(data, map):
|
||||
result = ""
|
||||
for char in data:
|
||||
value = ord(char)
|
||||
Q = (value ^ 0x80) // len(map)
|
||||
R = value % len(map)
|
||||
result += map[Q]
|
||||
result += map[R]
|
||||
return result
|
||||
|
||||
#
|
||||
# Hash the bytes in data and then encode the digest with the characters in map
|
||||
#
|
||||
|
||||
def encodeHash(data,map):
|
||||
return encode(MD5(data),map)
|
||||
|
||||
#
|
||||
# Decode the string in data with the characters in map. Returns the decoded bytes
|
||||
#
|
||||
|
||||
def decode(data,map):
|
||||
result = ""
|
||||
for i in range (0,len(data),2):
|
||||
high = map.find(data[i])
|
||||
low = map.find(data[i+1])
|
||||
value = (((high * 0x40) ^ 0x80) & 0xFF) + low
|
||||
result += pack("B",value)
|
||||
return result
|
||||
|
||||
#
|
||||
# Locate and open the Kindle.info file (Hopefully in the way it is done in the Kindle application)
|
||||
#
|
||||
|
||||
def openKindleInfo():
|
||||
regkey = winreg.OpenKey(winreg.HKEY_CURRENT_USER, "Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\Shell Folders\\")
|
||||
path = winreg.QueryValueEx(regkey, 'Local AppData')[0]
|
||||
return open(path+'\\Amazon\\Kindle For PC\\{AMAwzsaPaaZAzmZzZQzgZCAkZ3AjA_AY}\\kindle.info','r')
|
||||
|
||||
#
|
||||
# Parse the Kindle.info file and return the records as a list of key-values
|
||||
#
|
||||
|
||||
def parseKindleInfo():
|
||||
DB = {}
|
||||
infoReader = openKindleInfo()
|
||||
infoReader.read(1)
|
||||
data = infoReader.read()
|
||||
items = data.split('{')
|
||||
|
||||
for item in items:
|
||||
splito = item.split(':')
|
||||
DB[splito[0]] =splito[1]
|
||||
return DB
|
||||
|
||||
#
|
||||
# Find if the original string for a hashed/encoded string is known. If so return the original string othwise return an empty string. (Totally not optimal)
|
||||
#
|
||||
|
||||
def findNameForHash(hash):
|
||||
names = ["kindle.account.tokens","kindle.cookie.item","eulaVersionAccepted","login_date","kindle.token.item","login","kindle.key.item","kindle.name.info","kindle.device.info", "MazamaRandomNumber"]
|
||||
result = ""
|
||||
for name in names:
|
||||
if hash == encodeHash(name, charMap2):
|
||||
result = name
|
||||
break
|
||||
return name
|
||||
|
||||
#
|
||||
# Print all the records from the kindle.info file (option -i)
|
||||
#
|
||||
|
||||
def printKindleInfo():
|
||||
for record in kindleDatabase:
|
||||
name = findNameForHash(record)
|
||||
if name != "" :
|
||||
print (name)
|
||||
print ("--------------------------\n")
|
||||
else :
|
||||
print ("Unknown Record")
|
||||
print getKindleInfoValueForHash(record)
|
||||
print "\n"
|
||||
#
|
||||
# Get a record from the Kindle.info file for the key "hashedKey" (already hashed and encoded). Return the decoded and decrypted record
|
||||
#
|
||||
|
||||
def getKindleInfoValueForHash(hashedKey):
|
||||
global kindleDatabase
|
||||
encryptedValue = decode(kindleDatabase[hashedKey],charMap2)
|
||||
return CryptUnprotectData(encryptedValue,"")
|
||||
|
||||
#
|
||||
# Get a record from the Kindle.info file for the string in "key" (plaintext). Return the decoded and decrypted record
|
||||
#
|
||||
|
||||
def getKindleInfoValueForKey(key):
|
||||
return getKindleInfoValueForHash(encodeHash(key,charMap2))
|
||||
|
||||
#
|
||||
# Get a 7 bit encoded number from the book file
|
||||
#
|
||||
|
||||
def bookReadEncodedNumber():
|
||||
flag = False
|
||||
data = ord(bookFile.read(1))
|
||||
|
||||
if data == 0xFF:
|
||||
flag = True
|
||||
data = ord(bookFile.read(1))
|
||||
|
||||
if data >= 0x80:
|
||||
datax = (data & 0x7F)
|
||||
while data >= 0x80 :
|
||||
data = ord(bookFile.read(1))
|
||||
datax = (datax <<7) + (data & 0x7F)
|
||||
data = datax
|
||||
|
||||
if flag:
|
||||
data = -data
|
||||
return data
|
||||
|
||||
#
|
||||
# Encode a number in 7 bit format
|
||||
#
|
||||
|
||||
def encodeNumber(number):
|
||||
result = ""
|
||||
negative = False
|
||||
flag = 0
|
||||
|
||||
if number < 0 :
|
||||
number = -number + 1
|
||||
negative = True
|
||||
|
||||
while True:
|
||||
byte = number & 0x7F
|
||||
number = number >> 7
|
||||
byte += flag
|
||||
result += chr(byte)
|
||||
flag = 0x80
|
||||
if number == 0 :
|
||||
if (byte == 0xFF and negative == False) :
|
||||
result += chr(0x80)
|
||||
break
|
||||
|
||||
if negative:
|
||||
result += chr(0xFF)
|
||||
|
||||
return result[::-1]
|
||||
|
||||
#
|
||||
# Get a length prefixed string from the file
|
||||
#
|
||||
|
||||
def bookReadString():
|
||||
stringLength = bookReadEncodedNumber()
|
||||
return unpack(str(stringLength)+"s",bookFile.read(stringLength))[0]
|
||||
|
||||
#
|
||||
# Returns a length prefixed string
|
||||
#
|
||||
|
||||
def lengthPrefixString(data):
|
||||
return encodeNumber(len(data))+data
|
||||
|
||||
|
||||
#
|
||||
# Read and return the data of one header record at the current book file position [[offset,compressedLength,decompressedLength],...]
|
||||
#
|
||||
|
||||
def bookReadHeaderRecordData():
|
||||
nbValues = bookReadEncodedNumber()
|
||||
values = []
|
||||
for i in range (0,nbValues):
|
||||
values.append([bookReadEncodedNumber(),bookReadEncodedNumber(),bookReadEncodedNumber()])
|
||||
return values
|
||||
|
||||
#
|
||||
# Read and parse one header record at the current book file position and return the associated data [[offset,compressedLength,decompressedLength],...]
|
||||
#
|
||||
|
||||
def parseTopazHeaderRecord():
|
||||
if ord(bookFile.read(1)) != 0x63:
|
||||
raise CMBDTCFatal("Parse Error : Invalid Header")
|
||||
|
||||
tag = bookReadString()
|
||||
record = bookReadHeaderRecordData()
|
||||
return [tag,record]
|
||||
|
||||
#
|
||||
# Parse the header of a Topaz file, get all the header records and the offset for the payload
|
||||
#
|
||||
|
||||
def parseTopazHeader():
|
||||
global bookHeaderRecords
|
||||
global bookPayloadOffset
|
||||
magic = unpack("4s",bookFile.read(4))[0]
|
||||
|
||||
if magic != 'TPZ0':
|
||||
raise CMBDTCFatal("Parse Error : Invalid Header, not a Topaz file")
|
||||
|
||||
nbRecords = bookReadEncodedNumber()
|
||||
bookHeaderRecords = {}
|
||||
|
||||
for i in range (0,nbRecords):
|
||||
result = parseTopazHeaderRecord()
|
||||
bookHeaderRecords[result[0]] = result[1]
|
||||
|
||||
if ord(bookFile.read(1)) != 0x64 :
|
||||
raise CMBDTCFatal("Parse Error : Invalid Header")
|
||||
|
||||
bookPayloadOffset = bookFile.tell()
|
||||
|
||||
#
|
||||
# Get a record in the book payload, given its name and index. If necessary the record is decrypted. The record is not decompressed
|
||||
#
|
||||
|
||||
def getBookPayloadRecord(name, index):
|
||||
encrypted = False
|
||||
|
||||
try:
|
||||
recordOffset = bookHeaderRecords[name][index][0]
|
||||
except:
|
||||
raise CMBDTCFatal("Parse Error : Invalid Record, record not found")
|
||||
|
||||
bookFile.seek(bookPayloadOffset + recordOffset)
|
||||
|
||||
tag = bookReadString()
|
||||
if tag != name :
|
||||
raise CMBDTCFatal("Parse Error : Invalid Record, record name doesn't match")
|
||||
|
||||
recordIndex = bookReadEncodedNumber()
|
||||
|
||||
if recordIndex < 0 :
|
||||
encrypted = True
|
||||
recordIndex = -recordIndex -1
|
||||
|
||||
if recordIndex != index :
|
||||
raise CMBDTCFatal("Parse Error : Invalid Record, index doesn't match")
|
||||
|
||||
if bookHeaderRecords[name][index][2] != 0 :
|
||||
record = bookFile.read(bookHeaderRecords[name][index][2])
|
||||
else:
|
||||
record = bookFile.read(bookHeaderRecords[name][index][1])
|
||||
|
||||
if encrypted:
|
||||
ctx = topazCryptoInit(bookKey)
|
||||
record = topazCryptoDecrypt(record,ctx)
|
||||
|
||||
return record
|
||||
|
||||
#
|
||||
# Extract, decrypt and decompress a book record indicated by name and index and print it or save it in "filename"
|
||||
#
|
||||
|
||||
def extractBookPayloadRecord(name, index, filename):
|
||||
compressed = False
|
||||
|
||||
try:
|
||||
compressed = bookHeaderRecords[name][index][2] != 0
|
||||
record = getBookPayloadRecord(name,index)
|
||||
except:
|
||||
print("Could not find record")
|
||||
|
||||
if compressed:
|
||||
try:
|
||||
record = zlib.decompress(record)
|
||||
except:
|
||||
raise CMBDTCFatal("Could not decompress record")
|
||||
|
||||
if filename != "":
|
||||
try:
|
||||
file = open(filename,"wb")
|
||||
file.write(record)
|
||||
file.close()
|
||||
except:
|
||||
raise CMBDTCFatal("Could not write to destination file")
|
||||
else:
|
||||
print(record)
|
||||
|
||||
#
|
||||
# return next record [key,value] from the book metadata from the current book position
|
||||
#
|
||||
|
||||
def readMetadataRecord():
|
||||
return [bookReadString(),bookReadString()]
|
||||
|
||||
#
|
||||
# Parse the metadata record from the book payload and return a list of [key,values]
|
||||
#
|
||||
|
||||
def parseMetadata():
|
||||
global bookHeaderRecords
|
||||
global bookPayloadAddress
|
||||
global bookMetadata
|
||||
bookMetadata = {}
|
||||
bookFile.seek(bookPayloadOffset + bookHeaderRecords["metadata"][0][0])
|
||||
tag = bookReadString()
|
||||
if tag != "metadata" :
|
||||
raise CMBDTCFatal("Parse Error : Record Names Don't Match")
|
||||
|
||||
flags = ord(bookFile.read(1))
|
||||
nbRecords = ord(bookFile.read(1))
|
||||
|
||||
for i in range (0,nbRecords) :
|
||||
record =readMetadataRecord()
|
||||
bookMetadata[record[0]] = record[1]
|
||||
|
||||
#
|
||||
# Returns two bit at offset from a bit field
|
||||
#
|
||||
|
||||
def getTwoBitsFromBitField(bitField,offset):
|
||||
byteNumber = offset // 4
|
||||
bitPosition = 6 - 2*(offset % 4)
|
||||
|
||||
return ord(bitField[byteNumber]) >> bitPosition & 3
|
||||
|
||||
#
|
||||
# Returns the six bits at offset from a bit field
|
||||
#
|
||||
|
||||
def getSixBitsFromBitField(bitField,offset):
|
||||
offset *= 3
|
||||
value = (getTwoBitsFromBitField(bitField,offset) <<4) + (getTwoBitsFromBitField(bitField,offset+1) << 2) +getTwoBitsFromBitField(bitField,offset+2)
|
||||
return value
|
||||
|
||||
#
|
||||
# 8 bits to six bits encoding from hash to generate PID string
|
||||
#
|
||||
|
||||
def encodePID(hash):
|
||||
global charMap3
|
||||
PID = ""
|
||||
for position in range (0,8):
|
||||
PID += charMap3[getSixBitsFromBitField(hash,position)]
|
||||
return PID
|
||||
|
||||
#
|
||||
# Context initialisation for the Topaz Crypto
|
||||
#
|
||||
|
||||
def topazCryptoInit(key):
|
||||
ctx1 = 0x0CAFFE19E
|
||||
|
||||
for keyChar in key:
|
||||
keyByte = ord(keyChar)
|
||||
ctx2 = ctx1
|
||||
ctx1 = ((((ctx1 >>2) * (ctx1 >>7))&0xFFFFFFFF) ^ (keyByte * keyByte * 0x0F902007)& 0xFFFFFFFF )
|
||||
return [ctx1,ctx2]
|
||||
|
||||
#
|
||||
# decrypt data with the context prepared by topazCryptoInit()
|
||||
#
|
||||
|
||||
def topazCryptoDecrypt(data, ctx):
|
||||
ctx1 = ctx[0]
|
||||
ctx2 = ctx[1]
|
||||
|
||||
plainText = ""
|
||||
|
||||
for dataChar in data:
|
||||
dataByte = ord(dataChar)
|
||||
m = (dataByte ^ ((ctx1 >> 3) &0xFF) ^ ((ctx2<<3) & 0xFF)) &0xFF
|
||||
ctx2 = ctx1
|
||||
ctx1 = (((ctx1 >> 2) * (ctx1 >> 7)) &0xFFFFFFFF) ^((m * m * 0x0F902007) &0xFFFFFFFF)
|
||||
plainText += chr(m)
|
||||
|
||||
return plainText
|
||||
|
||||
#
|
||||
# Decrypt a payload record with the PID
|
||||
#
|
||||
|
||||
def decryptRecord(data,PID):
|
||||
ctx = topazCryptoInit(PID)
|
||||
return topazCryptoDecrypt(data, ctx)
|
||||
|
||||
#
|
||||
# Try to decrypt a dkey record (contains the book PID)
|
||||
#
|
||||
|
||||
def decryptDkeyRecord(data,PID):
|
||||
record = decryptRecord(data,PID)
|
||||
fields = unpack("3sB8sB8s3s",record)
|
||||
|
||||
if fields[0] != "PID" or fields[5] != "pid" :
|
||||
raise CMBDTCError("Didn't find PID magic numbers in record")
|
||||
elif fields[1] != 8 or fields[3] != 8 :
|
||||
raise CMBDTCError("Record didn't contain correct length fields")
|
||||
elif fields[2] != PID :
|
||||
raise CMBDTCError("Record didn't contain PID")
|
||||
|
||||
return fields[4]
|
||||
|
||||
#
|
||||
# Decrypt all the book's dkey records (contain the book PID)
|
||||
#
|
||||
|
||||
def decryptDkeyRecords(data,PID):
|
||||
nbKeyRecords = ord(data[0])
|
||||
records = []
|
||||
data = data[1:]
|
||||
for i in range (0,nbKeyRecords):
|
||||
length = ord(data[0])
|
||||
try:
|
||||
key = decryptDkeyRecord(data[1:length+1],PID)
|
||||
records.append(key)
|
||||
except CMBDTCError:
|
||||
pass
|
||||
data = data[1+length:]
|
||||
|
||||
return records
|
||||
|
||||
#
|
||||
# Encryption table used to generate the device PID
|
||||
#
|
||||
|
||||
def generatePidEncryptionTable() :
|
||||
table = []
|
||||
for counter1 in range (0,0x100):
|
||||
value = counter1
|
||||
for counter2 in range (0,8):
|
||||
if (value & 1 == 0) :
|
||||
value = value >> 1
|
||||
else :
|
||||
value = value >> 1
|
||||
value = value ^ 0xEDB88320
|
||||
table.append(value)
|
||||
return table
|
||||
|
||||
#
|
||||
# Seed value used to generate the device PID
|
||||
#
|
||||
|
||||
def generatePidSeed(table,dsn) :
|
||||
value = 0
|
||||
for counter in range (0,4) :
|
||||
index = (ord(dsn[counter]) ^ value) &0xFF
|
||||
value = (value >> 8) ^ table[index]
|
||||
return value
|
||||
|
||||
#
|
||||
# Generate the device PID
|
||||
#
|
||||
|
||||
def generateDevicePID(table,dsn,nbRoll):
|
||||
seed = generatePidSeed(table,dsn)
|
||||
pidAscii = ""
|
||||
pid = [(seed >>24) &0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF,(seed>>24) & 0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF]
|
||||
index = 0
|
||||
|
||||
for counter in range (0,nbRoll):
|
||||
pid[index] = pid[index] ^ ord(dsn[counter])
|
||||
index = (index+1) %8
|
||||
|
||||
for counter in range (0,8):
|
||||
index = ((((pid[counter] >>5) & 3) ^ pid[counter]) & 0x1f) + (pid[counter] >> 7)
|
||||
pidAscii += charMap4[index]
|
||||
return pidAscii
|
||||
|
||||
#
|
||||
# Create decrypted book payload
|
||||
#
|
||||
|
||||
def createDecryptedPayload(payload):
|
||||
|
||||
# store data to be able to create the header later
|
||||
headerData= []
|
||||
currentOffset = 0
|
||||
|
||||
# Add social DRM to decrypted files
|
||||
|
||||
try:
|
||||
data = getKindleInfoValueForKey("kindle.name.info")+":"+ getKindleInfoValueForKey("login")
|
||||
if payload!= None:
|
||||
payload.write(lengthPrefixString("sdrm"))
|
||||
payload.write(encodeNumber(0))
|
||||
payload.write(data)
|
||||
else:
|
||||
currentOffset += len(lengthPrefixString("sdrm"))
|
||||
currentOffset += len(encodeNumber(0))
|
||||
currentOffset += len(data)
|
||||
except:
|
||||
pass
|
||||
|
||||
for headerRecord in bookHeaderRecords:
|
||||
name = headerRecord
|
||||
newRecord = []
|
||||
|
||||
if name != "dkey" :
|
||||
|
||||
for index in range (0,len(bookHeaderRecords[name])) :
|
||||
offset = currentOffset
|
||||
|
||||
if payload != None:
|
||||
# write tag
|
||||
payload.write(lengthPrefixString(name))
|
||||
# write data
|
||||
payload.write(encodeNumber(index))
|
||||
payload.write(getBookPayloadRecord(name, index))
|
||||
|
||||
else :
|
||||
currentOffset += len(lengthPrefixString(name))
|
||||
currentOffset += len(encodeNumber(index))
|
||||
currentOffset += len(getBookPayloadRecord(name, index))
|
||||
newRecord.append([offset,bookHeaderRecords[name][index][1],bookHeaderRecords[name][index][2]])
|
||||
|
||||
headerData.append([name,newRecord])
|
||||
|
||||
|
||||
|
||||
return headerData
|
||||
|
||||
#
|
||||
# Create decrypted book
|
||||
#
|
||||
|
||||
def createDecryptedBook(outputFile):
|
||||
outputFile = open(outputFile,"wb")
|
||||
# Write the payload in a temporary file
|
||||
headerData = createDecryptedPayload(None)
|
||||
outputFile.write("TPZ0")
|
||||
outputFile.write(encodeNumber(len(headerData)))
|
||||
|
||||
for header in headerData :
|
||||
outputFile.write(chr(0x63))
|
||||
outputFile.write(lengthPrefixString(header[0]))
|
||||
outputFile.write(encodeNumber(len(header[1])))
|
||||
for numbers in header[1] :
|
||||
outputFile.write(encodeNumber(numbers[0]))
|
||||
outputFile.write(encodeNumber(numbers[1]))
|
||||
outputFile.write(encodeNumber(numbers[2]))
|
||||
|
||||
outputFile.write(chr(0x64))
|
||||
createDecryptedPayload(outputFile)
|
||||
outputFile.close()
|
||||
|
||||
#
|
||||
# Set the command to execute by the programm according to cmdLine parameters
|
||||
#
|
||||
|
||||
def setCommand(name) :
|
||||
global command
|
||||
if command != "" :
|
||||
raise CMBDTCFatal("Invalid command line parameters")
|
||||
else :
|
||||
command = name
|
||||
|
||||
#
|
||||
# Program usage
|
||||
#
|
||||
|
||||
def usage():
|
||||
print("\nUsage:")
|
||||
print("\nCMBDTC.py [options] bookFileName\n")
|
||||
print("-p Adds a PID to the list of PIDs that are tried to decrypt the book key (can be used several times)")
|
||||
print("-d Saves a decrypted copy of the book")
|
||||
print("-r Prints or writes to disk a record indicated in the form name:index (e.g \"img:0\")")
|
||||
print("-o Output file name to write records and decrypted books")
|
||||
print("-v Verbose (can be used several times)")
|
||||
print("-i Prints kindle.info database")
|
||||
|
||||
#
|
||||
# Main
|
||||
#
|
||||
|
||||
def main(argv=sys.argv):
|
||||
global kindleDatabase
|
||||
global bookMetadata
|
||||
global bookKey
|
||||
global bookFile
|
||||
global command
|
||||
|
||||
progname = os.path.basename(argv[0])
|
||||
|
||||
verbose = 0
|
||||
recordName = ""
|
||||
recordIndex = 0
|
||||
outputFile = ""
|
||||
PIDs = []
|
||||
kindleDatabase = None
|
||||
command = ""
|
||||
|
||||
|
||||
try:
|
||||
opts, args = getopt.getopt(sys.argv[1:], "vdir:o:p:")
|
||||
except getopt.GetoptError, err:
|
||||
# print help information and exit:
|
||||
print str(err) # will print something like "option -a not recognized"
|
||||
usage()
|
||||
sys.exit(2)
|
||||
|
||||
if len(opts) == 0 and len(args) == 0 :
|
||||
usage()
|
||||
sys.exit(2)
|
||||
|
||||
for o, a in opts:
|
||||
if o == "-v":
|
||||
verbose+=1
|
||||
if o == "-i":
|
||||
setCommand("printInfo")
|
||||
if o =="-o":
|
||||
if a == None :
|
||||
raise CMBDTCFatal("Invalid parameter for -o")
|
||||
outputFile = a
|
||||
if o =="-r":
|
||||
setCommand("printRecord")
|
||||
try:
|
||||
recordName,recordIndex = a.split(':')
|
||||
except:
|
||||
raise CMBDTCFatal("Invalid parameter for -r")
|
||||
if o =="-p":
|
||||
PIDs.append(a)
|
||||
if o =="-d":
|
||||
setCommand("doit")
|
||||
|
||||
if command == "" :
|
||||
raise CMBDTCFatal("No action supplied on command line")
|
||||
|
||||
#
|
||||
# Read the encrypted database
|
||||
#
|
||||
|
||||
try:
|
||||
kindleDatabase = parseKindleInfo()
|
||||
except Exception, message:
|
||||
if verbose>0:
|
||||
print(message)
|
||||
|
||||
if kindleDatabase != None :
|
||||
if command == "printInfo" :
|
||||
printKindleInfo()
|
||||
|
||||
#
|
||||
# Compute the DSN
|
||||
#
|
||||
|
||||
# Get the Mazama Random number
|
||||
MazamaRandomNumber = getKindleInfoValueForKey("MazamaRandomNumber")
|
||||
|
||||
# Get the HDD serial
|
||||
encodedSystemVolumeSerialNumber = encodeHash(str(GetVolumeSerialNumber(GetSystemDirectory().split('\\')[0] + '\\')),charMap1)
|
||||
|
||||
# Get the current user name
|
||||
encodedUsername = encodeHash(GetUserName(),charMap1)
|
||||
|
||||
# concat, hash and encode
|
||||
DSN = encode(SHA1(MazamaRandomNumber+encodedSystemVolumeSerialNumber+encodedUsername),charMap1)
|
||||
|
||||
if verbose >1:
|
||||
print("DSN: " + DSN)
|
||||
|
||||
#
|
||||
# Compute the device PID
|
||||
#
|
||||
|
||||
table = generatePidEncryptionTable()
|
||||
devicePID = generateDevicePID(table,DSN,4)
|
||||
PIDs.append(devicePID)
|
||||
|
||||
if verbose > 0:
|
||||
print("Device PID: " + devicePID)
|
||||
|
||||
#
|
||||
# Open book and parse metadata
|
||||
#
|
||||
|
||||
if len(args) == 1:
|
||||
|
||||
bookFile = openBook(args[0])
|
||||
parseTopazHeader()
|
||||
parseMetadata()
|
||||
|
||||
#
|
||||
# Compute book PID
|
||||
#
|
||||
|
||||
# Get the account token
|
||||
|
||||
if kindleDatabase != None:
|
||||
kindleAccountToken = getKindleInfoValueForKey("kindle.account.tokens")
|
||||
|
||||
if verbose >1:
|
||||
print("Account Token: " + kindleAccountToken)
|
||||
|
||||
keysRecord = bookMetadata["keys"]
|
||||
keysRecordRecord = bookMetadata[keysRecord]
|
||||
|
||||
pidHash = SHA1(DSN+kindleAccountToken+keysRecord+keysRecordRecord)
|
||||
|
||||
bookPID = encodePID(pidHash)
|
||||
PIDs.append(bookPID)
|
||||
|
||||
if verbose > 0:
|
||||
print ("Book PID: " + bookPID )
|
||||
|
||||
#
|
||||
# Decrypt book key
|
||||
#
|
||||
|
||||
dkey = getBookPayloadRecord('dkey', 0)
|
||||
|
||||
bookKeys = []
|
||||
for PID in PIDs :
|
||||
bookKeys+=decryptDkeyRecords(dkey,PID)
|
||||
|
||||
if len(bookKeys) == 0 :
|
||||
if verbose > 0 :
|
||||
print ("Book key could not be found. Maybe this book is not registered with this device.")
|
||||
else :
|
||||
bookKey = bookKeys[0]
|
||||
if verbose > 0:
|
||||
print("Book key: " + bookKey.encode('hex'))
|
||||
|
||||
|
||||
|
||||
if command == "printRecord" :
|
||||
extractBookPayloadRecord(recordName,int(recordIndex),outputFile)
|
||||
if outputFile != "" and verbose>0 :
|
||||
print("Wrote record to file: "+outputFile)
|
||||
elif command == "doit" :
|
||||
if outputFile!="" :
|
||||
createDecryptedBook(outputFile)
|
||||
if verbose >0 :
|
||||
print ("Decrypted book saved. Don't pirate!")
|
||||
elif verbose > 0:
|
||||
print("Output file name was not supplied.")
|
||||
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
|
||||
@@ -0,0 +1,817 @@
|
||||
#! /usr/bin/python
|
||||
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
|
||||
# For use with Topaz Scripts Version 2.6
|
||||
|
||||
class Unbuffered:
|
||||
def __init__(self, stream):
|
||||
self.stream = stream
|
||||
def write(self, data):
|
||||
self.stream.write(data)
|
||||
self.stream.flush()
|
||||
def __getattr__(self, attr):
|
||||
return getattr(self.stream, attr)
|
||||
|
||||
import sys
|
||||
sys.stdout=Unbuffered(sys.stdout)
|
||||
|
||||
import csv
|
||||
import os
|
||||
import getopt
|
||||
from struct import pack
|
||||
from struct import unpack
|
||||
|
||||
|
||||
# Get a 7 bit encoded number from string. The most
|
||||
# significant byte comes first and has the high bit (8th) set
|
||||
|
||||
def readEncodedNumber(file):
|
||||
flag = False
|
||||
c = file.read(1)
|
||||
if (len(c) == 0):
|
||||
return None
|
||||
data = ord(c)
|
||||
|
||||
if data == 0xFF:
|
||||
flag = True
|
||||
c = file.read(1)
|
||||
if (len(c) == 0):
|
||||
return None
|
||||
data = ord(c)
|
||||
|
||||
if data >= 0x80:
|
||||
datax = (data & 0x7F)
|
||||
while data >= 0x80 :
|
||||
c = file.read(1)
|
||||
if (len(c) == 0):
|
||||
return None
|
||||
data = ord(c)
|
||||
datax = (datax <<7) + (data & 0x7F)
|
||||
data = datax
|
||||
|
||||
if flag:
|
||||
data = -data
|
||||
return data
|
||||
|
||||
|
||||
# returns a binary string that encodes a number into 7 bits
|
||||
# most significant byte first which has the high bit set
|
||||
|
||||
def encodeNumber(number):
|
||||
result = ""
|
||||
negative = False
|
||||
flag = 0
|
||||
|
||||
if number < 0 :
|
||||
number = -number + 1
|
||||
negative = True
|
||||
|
||||
while True:
|
||||
byte = number & 0x7F
|
||||
number = number >> 7
|
||||
byte += flag
|
||||
result += chr(byte)
|
||||
flag = 0x80
|
||||
if number == 0 :
|
||||
if (byte == 0xFF and negative == False) :
|
||||
result += chr(0x80)
|
||||
break
|
||||
|
||||
if negative:
|
||||
result += chr(0xFF)
|
||||
|
||||
return result[::-1]
|
||||
|
||||
|
||||
|
||||
# create / read a length prefixed string from the file
|
||||
|
||||
def lengthPrefixString(data):
|
||||
return encodeNumber(len(data))+data
|
||||
|
||||
def readString(file):
|
||||
stringLength = readEncodedNumber(file)
|
||||
if (stringLength == None):
|
||||
return ""
|
||||
sv = file.read(stringLength)
|
||||
if (len(sv) != stringLength):
|
||||
return ""
|
||||
return unpack(str(stringLength)+"s",sv)[0]
|
||||
|
||||
|
||||
# convert a binary string generated by encodeNumber (7 bit encoded number)
|
||||
# to the value you would find inside the page*.dat files to be processed
|
||||
|
||||
def convert(i):
|
||||
result = ''
|
||||
val = encodeNumber(i)
|
||||
for j in xrange(len(val)):
|
||||
c = ord(val[j:j+1])
|
||||
result += '%02x' % c
|
||||
return result
|
||||
|
||||
|
||||
|
||||
# the complete string table used to store all book text content
|
||||
# as well as the xml tokens and values that make sense out of it
|
||||
|
||||
class Dictionary(object):
|
||||
def __init__(self, dictFile):
|
||||
self.filename = dictFile
|
||||
self.size = 0
|
||||
self.fo = file(dictFile,'rb')
|
||||
self.stable = []
|
||||
self.size = readEncodedNumber(self.fo)
|
||||
for i in xrange(self.size):
|
||||
self.stable.append(self.escapestr(readString(self.fo)))
|
||||
self.pos = 0
|
||||
|
||||
def escapestr(self, str):
|
||||
str = str.replace('&','&')
|
||||
str = str.replace('<','<')
|
||||
str = str.replace('>','>')
|
||||
str = str.replace('=','=')
|
||||
return str
|
||||
|
||||
def lookup(self,val):
|
||||
if ((val >= 0) and (val < self.size)) :
|
||||
self.pos = val
|
||||
return self.stable[self.pos]
|
||||
else:
|
||||
print "Error - %d outside of string table limits" % val
|
||||
sys.exit(-1)
|
||||
|
||||
def getSize(self):
|
||||
return self.size
|
||||
|
||||
def getPos(self):
|
||||
return self.pos
|
||||
|
||||
def dumpDict(self):
|
||||
for i in xrange(self.size):
|
||||
print "%d %s %s" % (i, convert(i), self.stable[i])
|
||||
return
|
||||
|
||||
# parses the xml snippets that are represented by each page*.dat file.
|
||||
# also parses the other0.dat file - the main stylesheet
|
||||
# and information used to inject the xml snippets into page*.dat files
|
||||
|
||||
class PageParser(object):
|
||||
def __init__(self, filename, dict, debug, flat_xml):
|
||||
self.fo = file(filename,'rb')
|
||||
self.id = os.path.basename(filename).replace('.dat','')
|
||||
self.dict = dict
|
||||
self.debug = debug
|
||||
self.flat_xml = flat_xml
|
||||
self.tagpath = []
|
||||
self.doc = []
|
||||
self.snippetList = []
|
||||
|
||||
|
||||
# hash table used to enable the decoding process
|
||||
# This has all been developed by trial and error so it may still have omissions or
|
||||
# contain errors
|
||||
# Format:
|
||||
# tag : (number of arguments, argument type, subtags present, special case of subtags presents when escaped)
|
||||
|
||||
token_tags = {
|
||||
'x' : (1, 'scalar_number', 0, 0),
|
||||
'y' : (1, 'scalar_number', 0, 0),
|
||||
'h' : (1, 'scalar_number', 0, 0),
|
||||
'w' : (1, 'scalar_number', 0, 0),
|
||||
'firstWord' : (1, 'scalar_number', 0, 0),
|
||||
'lastWord' : (1, 'scalar_number', 0, 0),
|
||||
'rootID' : (1, 'scalar_number', 0, 0),
|
||||
'stemID' : (1, 'scalar_number', 0, 0),
|
||||
'type' : (1, 'scalar_text', 0, 0),
|
||||
|
||||
'info' : (0, 'number', 1, 0),
|
||||
|
||||
'info.word' : (0, 'number', 1, 1),
|
||||
'info.word.ocrText' : (1, 'text', 0, 0),
|
||||
'info.word.firstGlyph' : (1, 'raw', 0, 0),
|
||||
'info.word.lastGlyph' : (1, 'raw', 0, 0),
|
||||
'info.word.bl' : (1, 'raw', 0, 0),
|
||||
'info.word.link_id' : (1, 'number', 0, 0),
|
||||
|
||||
'glyph' : (0, 'number', 1, 1),
|
||||
'glyph.x' : (1, 'number', 0, 0),
|
||||
'glyph.y' : (1, 'number', 0, 0),
|
||||
'glyph.glyphID' : (1, 'number', 0, 0),
|
||||
|
||||
'dehyphen' : (0, 'number', 1, 1),
|
||||
'dehyphen.rootID' : (1, 'number', 0, 0),
|
||||
'dehyphen.stemID' : (1, 'number', 0, 0),
|
||||
'dehyphen.stemPage' : (1, 'number', 0, 0),
|
||||
'dehyphen.sh' : (1, 'number', 0, 0),
|
||||
|
||||
'links' : (0, 'number', 1, 1),
|
||||
'links.page' : (1, 'number', 0, 0),
|
||||
'links.rel' : (1, 'number', 0, 0),
|
||||
'links.row' : (1, 'number', 0, 0),
|
||||
'links.title' : (1, 'text', 0, 0),
|
||||
'links.href' : (1, 'text', 0, 0),
|
||||
'links.type' : (1, 'text', 0, 0),
|
||||
|
||||
'paraCont' : (0, 'number', 1, 1),
|
||||
'paraCont.rootID' : (1, 'number', 0, 0),
|
||||
'paraCont.stemID' : (1, 'number', 0, 0),
|
||||
'paraCont.stemPage' : (1, 'number', 0, 0),
|
||||
|
||||
'paraStems' : (0, 'number', 1, 1),
|
||||
'paraStems.stemID' : (1, 'number', 0, 0),
|
||||
|
||||
'wordStems' : (0, 'number', 1, 1),
|
||||
'wordStems.stemID' : (1, 'number', 0, 0),
|
||||
|
||||
'empty' : (1, 'snippets', 1, 0),
|
||||
|
||||
'page' : (1, 'snippets', 1, 0),
|
||||
'page.pageid' : (1, 'scalar_text', 0, 0),
|
||||
'page.pagelabel' : (1, 'scalar_text', 0, 0),
|
||||
'page.type' : (1, 'scalar_text', 0, 0),
|
||||
'page.h' : (1, 'scalar_number', 0, 0),
|
||||
'page.w' : (1, 'scalar_number', 0, 0),
|
||||
'page.startID' : (1, 'scalar_number', 0, 0),
|
||||
|
||||
'group' : (1, 'snippets', 1, 0),
|
||||
'group.type' : (1, 'scalar_text', 0, 0),
|
||||
|
||||
'region' : (1, 'snippets', 1, 0),
|
||||
'region.type' : (1, 'scalar_text', 0, 0),
|
||||
'region.x' : (1, 'scalar_number', 0, 0),
|
||||
'region.y' : (1, 'scalar_number', 0, 0),
|
||||
'region.h' : (1, 'scalar_number', 0, 0),
|
||||
'region.w' : (1, 'scalar_number', 0, 0),
|
||||
|
||||
'empty_text_region' : (1, 'snippets', 1, 0),
|
||||
|
||||
'img' : (1, 'snippets', 1, 0),
|
||||
'img.x' : (1, 'scalar_number', 0, 0),
|
||||
'img.y' : (1, 'scalar_number', 0, 0),
|
||||
'img.h' : (1, 'scalar_number', 0, 0),
|
||||
'img.w' : (1, 'scalar_number', 0, 0),
|
||||
'img.src' : (1, 'scalar_number', 0, 0),
|
||||
'img.color_src' : (1, 'scalar_number', 0, 0),
|
||||
|
||||
'paragraph' : (1, 'snippets', 1, 0),
|
||||
'paragraph.class' : (1, 'scalar_text', 0, 0),
|
||||
'paragraph.firstWord' : (1, 'scalar_number', 0, 0),
|
||||
'paragraph.lastWord' : (1, 'scalar_number', 0, 0),
|
||||
|
||||
'word_semantic' : (1, 'snippets', 1, 1),
|
||||
'word_semantic.type' : (1, 'scalar_text', 0, 0),
|
||||
'word_semantic.firstWord' : (1, 'scalar_number', 0, 0),
|
||||
'word_semantic.lastWord' : (1, 'scalar_number', 0, 0),
|
||||
|
||||
'word' : (1, 'snippets', 1, 0),
|
||||
'word.type' : (1, 'scalar_text', 0, 0),
|
||||
'word.class' : (1, 'scalar_text', 0, 0),
|
||||
'word.firstGlyph' : (1, 'scalar_number', 0, 0),
|
||||
'word.lastGlyph' : (1, 'scalar_number', 0, 0),
|
||||
|
||||
'_span' : (1, 'snippets', 1, 0),
|
||||
'_span.firstWord' : (1, 'scalar_number', 0, 0),
|
||||
'-span.lastWord' : (1, 'scalar_number', 0, 0),
|
||||
|
||||
'span' : (1, 'snippets', 1, 0),
|
||||
'span.firstWord' : (1, 'scalar_number', 0, 0),
|
||||
'span.lastWord' : (1, 'scalar_number', 0, 0),
|
||||
|
||||
'extratokens' : (1, 'snippets', 1, 0),
|
||||
'extratokens.type' : (1, 'scalar_text', 0, 0),
|
||||
'extratokens.firstGlyph' : (1, 'scalar_number', 0, 0),
|
||||
'extratokens.lastGlyph' : (1, 'scalar_number', 0, 0),
|
||||
|
||||
'glyph.h' : (1, 'number', 0, 0),
|
||||
'glyph.w' : (1, 'number', 0, 0),
|
||||
'glyph.use' : (1, 'number', 0, 0),
|
||||
'glyph.vtx' : (1, 'number', 0, 1),
|
||||
'glyph.len' : (1, 'number', 0, 1),
|
||||
'glyph.dpi' : (1, 'number', 0, 0),
|
||||
'vtx' : (0, 'number', 1, 1),
|
||||
'vtx.x' : (1, 'number', 0, 0),
|
||||
'vtx.y' : (1, 'number', 0, 0),
|
||||
'len' : (0, 'number', 1, 1),
|
||||
'len.n' : (1, 'number', 0, 0),
|
||||
|
||||
'book' : (1, 'snippets', 1, 0),
|
||||
'version' : (1, 'snippets', 1, 0),
|
||||
'version.FlowEdit_1_id' : (1, 'scalar_text', 0, 0),
|
||||
'version.FlowEdit_1_version' : (1, 'scalar_text', 0, 0),
|
||||
'version.Schema_id' : (1, 'scalar_text', 0, 0),
|
||||
'version.Schema_version' : (1, 'scalar_text', 0, 0),
|
||||
'version.Topaz_version' : (1, 'scalar_text', 0, 0),
|
||||
'version.WordDetailEdit_1_id' : (1, 'scalar_text', 0, 0),
|
||||
'version.WordDetailEdit_1_version' : (1, 'scalar_text', 0, 0),
|
||||
'version.ZoneEdit_1_id' : (1, 'scalar_text', 0, 0),
|
||||
'version.ZoneEdit_1_version' : (1, 'scalar_text', 0, 0),
|
||||
'version.chapterheaders' : (1, 'scalar_text', 0, 0),
|
||||
'version.creation_date' : (1, 'scalar_text', 0, 0),
|
||||
'version.header_footer' : (1, 'scalar_text', 0, 0),
|
||||
'version.init_from_ocr' : (1, 'scalar_text', 0, 0),
|
||||
'version.letter_insertion' : (1, 'scalar_text', 0, 0),
|
||||
'version.xmlinj_convert' : (1, 'scalar_text', 0, 0),
|
||||
'version.xmlinj_reflow' : (1, 'scalar_text', 0, 0),
|
||||
'version.xmlinj_transform' : (1, 'scalar_text', 0, 0),
|
||||
'version.findlists' : (1, 'scalar_text', 0, 0),
|
||||
'version.page_num' : (1, 'scalar_text', 0, 0),
|
||||
'version.page_type' : (1, 'scalar_text', 0, 0),
|
||||
'version.bad_text' : (1, 'scalar_text', 0, 0),
|
||||
'version.glyph_mismatch' : (1, 'scalar_text', 0, 0),
|
||||
'version.margins' : (1, 'scalar_text', 0, 0),
|
||||
'version.staggered_lines' : (1, 'scalar_text', 0, 0),
|
||||
'version.paragraph_continuation' : (1, 'scalar_text', 0, 0),
|
||||
'version.toc' : (1, 'scalar_text', 0, 0),
|
||||
|
||||
'stylesheet' : (1, 'snippets', 1, 0),
|
||||
'style' : (1, 'snippets', 1, 0),
|
||||
'style._tag' : (1, 'scalar_text', 0, 0),
|
||||
'style.type' : (1, 'scalar_text', 0, 0),
|
||||
'style._parent_type' : (1, 'scalar_text', 0, 0),
|
||||
'style.class' : (1, 'scalar_text', 0, 0),
|
||||
'style._after_class' : (1, 'scalar_text', 0, 0),
|
||||
'rule' : (1, 'snippets', 1, 0),
|
||||
'rule.attr' : (1, 'scalar_text', 0, 0),
|
||||
'rule.value' : (1, 'scalar_text', 0, 0),
|
||||
|
||||
'original' : (0, 'number', 1, 1),
|
||||
'original.pnum' : (1, 'number', 0, 0),
|
||||
'original.pid' : (1, 'text', 0, 0),
|
||||
'pages' : (0, 'number', 1, 1),
|
||||
'pages.ref' : (1, 'number', 0, 0),
|
||||
'pages.id' : (1, 'number', 0, 0),
|
||||
'startID' : (0, 'number', 1, 1),
|
||||
'startID.page' : (1, 'number', 0, 0),
|
||||
'startID.id' : (1, 'number', 0, 0),
|
||||
|
||||
}
|
||||
|
||||
|
||||
# full tag path record keeping routines
|
||||
def tag_push(self, token):
|
||||
self.tagpath.append(token)
|
||||
def tag_pop(self):
|
||||
if len(self.tagpath) > 0 :
|
||||
self.tagpath.pop()
|
||||
def tagpath_len(self):
|
||||
return len(self.tagpath)
|
||||
def get_tagpath(self, i):
|
||||
cnt = len(self.tagpath)
|
||||
if i < cnt : result = self.tagpath[i]
|
||||
for j in xrange(i+1, cnt) :
|
||||
result += '.' + self.tagpath[j]
|
||||
return result
|
||||
|
||||
|
||||
# list of absolute command byte values values that indicate
|
||||
# various types of loop meachanisms typically used to generate vectors
|
||||
|
||||
cmd_list = (0x76, 0x76)
|
||||
|
||||
# peek at and return 1 byte that is ahead by i bytes
|
||||
def peek(self, aheadi):
|
||||
c = self.fo.read(aheadi)
|
||||
if (len(c) == 0):
|
||||
return None
|
||||
self.fo.seek(-aheadi,1)
|
||||
c = c[-1:]
|
||||
return ord(c)
|
||||
|
||||
|
||||
# get the next value from the file being processed
|
||||
def getNext(self):
|
||||
nbyte = self.peek(1);
|
||||
if (nbyte == None):
|
||||
return None
|
||||
val = readEncodedNumber(self.fo)
|
||||
return val
|
||||
|
||||
|
||||
# format an arg by argtype
|
||||
def formatArg(self, arg, argtype):
|
||||
if (argtype == 'text') or (argtype == 'scalar_text') :
|
||||
result = self.dict.lookup(arg)
|
||||
elif (argtype == 'raw') or (argtype == 'number') or (argtype == 'scalar_number') :
|
||||
result = arg
|
||||
elif (argtype == 'snippets') :
|
||||
result = arg
|
||||
else :
|
||||
print "Error Unknown argtype %s" % argtype
|
||||
sys.exit(-2)
|
||||
return result
|
||||
|
||||
|
||||
# process the next tag token, recursively handling subtags,
|
||||
# arguments, and commands
|
||||
def procToken(self, token):
|
||||
|
||||
known_token = False
|
||||
self.tag_push(token)
|
||||
|
||||
if self.debug : print 'Processing: ', self.get_tagpath(0)
|
||||
cnt = self.tagpath_len()
|
||||
for j in xrange(cnt):
|
||||
tkn = self.get_tagpath(j)
|
||||
if tkn in self.token_tags :
|
||||
num_args = self.token_tags[tkn][0]
|
||||
argtype = self.token_tags[tkn][1]
|
||||
subtags = self.token_tags[tkn][2]
|
||||
splcase = self.token_tags[tkn][3]
|
||||
ntags = -1
|
||||
known_token = True
|
||||
break
|
||||
|
||||
if known_token :
|
||||
|
||||
# handle subtags if present
|
||||
subtagres = []
|
||||
if (splcase == 1):
|
||||
# this type of tag uses of escape marker 0x74 indicate subtag count
|
||||
if self.peek(1) == 0x74:
|
||||
skip = readEncodedNumber(self.fo)
|
||||
subtags = 1
|
||||
num_args = 0
|
||||
|
||||
if (subtags == 1):
|
||||
ntags = readEncodedNumber(self.fo)
|
||||
if self.debug : print 'subtags: ' + token + ' has ' + str(ntags)
|
||||
for j in xrange(ntags):
|
||||
val = readEncodedNumber(self.fo)
|
||||
subtagres.append(self.procToken(self.dict.lookup(val)))
|
||||
|
||||
# arguments can be scalars or vectors of text or numbers
|
||||
argres = []
|
||||
if num_args > 0 :
|
||||
firstarg = self.peek(1)
|
||||
if (firstarg in self.cmd_list) and (argtype != 'scalar_number') and (argtype != 'scalar_text'):
|
||||
# single argument is a variable length vector of data
|
||||
arg = readEncodedNumber(self.fo)
|
||||
argres = self.decodeCMD(arg,argtype)
|
||||
else :
|
||||
# num_arg scalar arguments
|
||||
for i in xrange(num_args):
|
||||
argres.append(self.formatArg(readEncodedNumber(self.fo), argtype))
|
||||
|
||||
# build the return tag
|
||||
result = []
|
||||
tkn = self.get_tagpath(0)
|
||||
result.append(tkn)
|
||||
result.append(subtagres)
|
||||
result.append(argtype)
|
||||
result.append(argres)
|
||||
self.tag_pop()
|
||||
return result
|
||||
|
||||
# all tokens that need to be processed should be in the hash
|
||||
# table if it may indicate a problem, either new token
|
||||
# or an out of sync condition
|
||||
else:
|
||||
result = []
|
||||
if (self.debug):
|
||||
print 'Unknown Token:', token
|
||||
self.tag_pop()
|
||||
return result
|
||||
|
||||
|
||||
# special loop used to process code snippets
|
||||
# it is NEVER used to format arguments.
|
||||
# builds the snippetList
|
||||
def doLoop72(self, argtype):
|
||||
cnt = readEncodedNumber(self.fo)
|
||||
if self.debug :
|
||||
result = 'Set of '+ str(cnt) + ' xml snippets. The overall structure \n'
|
||||
result += 'of the document is indicated by snippet number sets at the\n'
|
||||
result += 'end of each snippet. \n'
|
||||
print result
|
||||
for i in xrange(cnt):
|
||||
if self.debug: print 'Snippet:',str(i)
|
||||
snippet = []
|
||||
snippet.append(i)
|
||||
val = readEncodedNumber(self.fo)
|
||||
snippet.append(self.procToken(self.dict.lookup(val)))
|
||||
self.snippetList.append(snippet)
|
||||
return
|
||||
|
||||
|
||||
|
||||
# general loop code gracisouly submitted by "skindle" - thank you!
|
||||
def doLoop76Mode(self, argtype, cnt, mode):
|
||||
result = []
|
||||
adj = 0
|
||||
if mode & 1:
|
||||
adj = readEncodedNumber(self.fo)
|
||||
mode = mode >> 1
|
||||
x = []
|
||||
for i in xrange(cnt):
|
||||
x.append(readEncodedNumber(self.fo) - adj)
|
||||
for i in xrange(mode):
|
||||
for j in xrange(1, cnt):
|
||||
x[j] = x[j] + x[j - 1]
|
||||
for i in xrange(cnt):
|
||||
result.append(self.formatArg(x[i],argtype))
|
||||
return result
|
||||
|
||||
|
||||
# dispatches loop commands bytes with various modes
|
||||
# The 0x76 style loops are used to build vectors
|
||||
|
||||
# This was all derived by trial and error and
|
||||
# new loop types may exist that are not handled here
|
||||
# since they did not appear in the test cases
|
||||
|
||||
def decodeCMD(self, cmd, argtype):
|
||||
if (cmd == 0x76):
|
||||
|
||||
# loop with cnt, and mode to control loop styles
|
||||
cnt = readEncodedNumber(self.fo)
|
||||
mode = readEncodedNumber(self.fo)
|
||||
|
||||
if self.debug : print 'Loop for', cnt, 'with mode', mode, ': '
|
||||
return self.doLoop76Mode(argtype, cnt, mode)
|
||||
|
||||
if self.dbug: print "Unknown command", cmd
|
||||
result = []
|
||||
return result
|
||||
|
||||
|
||||
|
||||
# add full tag path to injected snippets
|
||||
def updateName(self, tag, prefix):
|
||||
name = tag[0]
|
||||
subtagList = tag[1]
|
||||
argtype = tag[2]
|
||||
argList = tag[3]
|
||||
nname = prefix + '.' + name
|
||||
nsubtaglist = []
|
||||
for j in subtagList:
|
||||
nsubtaglist.append(self.updateName(j,prefix))
|
||||
ntag = []
|
||||
ntag.append(nname)
|
||||
ntag.append(nsubtaglist)
|
||||
ntag.append(argtype)
|
||||
ntag.append(argList)
|
||||
return ntag
|
||||
|
||||
|
||||
|
||||
# perform depth first injection of specified snippets into this one
|
||||
def injectSnippets(self, snippet):
|
||||
snipno, tag = snippet
|
||||
name = tag[0]
|
||||
subtagList = tag[1]
|
||||
argtype = tag[2]
|
||||
argList = tag[3]
|
||||
nsubtagList = []
|
||||
if len(argList) > 0 :
|
||||
for j in argList:
|
||||
asnip = self.snippetList[j]
|
||||
aso, atag = self.injectSnippets(asnip)
|
||||
atag = self.updateName(atag, name)
|
||||
nsubtagList.append(atag)
|
||||
argtype='number'
|
||||
argList=[]
|
||||
if len(nsubtagList) > 0 :
|
||||
subtagList.extend(nsubtagList)
|
||||
tag = []
|
||||
tag.append(name)
|
||||
tag.append(subtagList)
|
||||
tag.append(argtype)
|
||||
tag.append(argList)
|
||||
snippet = []
|
||||
snippet.append(snipno)
|
||||
snippet.append(tag)
|
||||
return snippet
|
||||
|
||||
|
||||
|
||||
# format the tag for output
|
||||
def formatTag(self, node):
|
||||
name = node[0]
|
||||
subtagList = node[1]
|
||||
argtype = node[2]
|
||||
argList = node[3]
|
||||
fullpathname = name.split('.')
|
||||
nodename = fullpathname.pop()
|
||||
ilvl = len(fullpathname)
|
||||
indent = ' ' * (3 * ilvl)
|
||||
result = indent + '<' + nodename + '>'
|
||||
if len(argList) > 0:
|
||||
argres = ''
|
||||
for j in argList:
|
||||
if (argtype == 'text') or (argtype == 'scalar_text') :
|
||||
argres += j + '|'
|
||||
else :
|
||||
argres += str(j) + ','
|
||||
argres = argres[0:-1]
|
||||
if argtype == 'snippets' :
|
||||
result += 'snippets:' + argres
|
||||
else :
|
||||
result += argres
|
||||
if len(subtagList) > 0 :
|
||||
result += '\n'
|
||||
for j in subtagList:
|
||||
if len(j) > 0 :
|
||||
result += self.formatTag(j)
|
||||
result += indent + '</' + nodename + '>\n'
|
||||
else:
|
||||
result += '</' + nodename + '>\n'
|
||||
return result
|
||||
|
||||
|
||||
# flatten tag
|
||||
def flattenTag(self, node):
|
||||
name = node[0]
|
||||
subtagList = node[1]
|
||||
argtype = node[2]
|
||||
argList = node[3]
|
||||
result = name
|
||||
if (len(argList) > 0):
|
||||
argres = ''
|
||||
for j in argList:
|
||||
if (argtype == 'text') or (argtype == 'scalar_text') :
|
||||
argres += j + '|'
|
||||
else :
|
||||
argres += str(j) + '|'
|
||||
argres = argres[0:-1]
|
||||
if argtype == 'snippets' :
|
||||
result += '.snippets=' + argres
|
||||
else :
|
||||
result += '=' + argres
|
||||
result += '\n'
|
||||
for j in subtagList:
|
||||
if len(j) > 0 :
|
||||
result += self.flattenTag(j)
|
||||
return result
|
||||
|
||||
|
||||
# reduce create xml output
|
||||
def formatDoc(self, flat_xml):
|
||||
result = ''
|
||||
for j in self.doc :
|
||||
if len(j) > 0:
|
||||
if flat_xml:
|
||||
result += self.flattenTag(j)
|
||||
else:
|
||||
result += self.formatTag(j)
|
||||
if self.debug : print result
|
||||
return result
|
||||
|
||||
|
||||
|
||||
# main loop - parse the page.dat files
|
||||
# to create structured document and snippets
|
||||
|
||||
# FIXME: value at end of magic appears to be a subtags count
|
||||
# but for what? For now, inject an 'info" tag as it is in
|
||||
# every dictionary and seems close to what is meant
|
||||
# The alternative is to special case the last _ "0x5f" to mean something
|
||||
|
||||
def process(self):
|
||||
|
||||
# peek at the first bytes to see what type of file it is
|
||||
magic = self.fo.read(9)
|
||||
if (magic[0:1] == 'p') and (magic[2:9] == 'marker_'):
|
||||
first_token = 'info'
|
||||
elif (magic[0:1] == 'p') and (magic[2:9] == '__PAGE_'):
|
||||
skip = self.fo.read(2)
|
||||
first_token = 'info'
|
||||
elif (magic[0:1] == 'p') and (magic[2:8] == '_PAGE_'):
|
||||
first_token = 'info'
|
||||
elif (magic[0:1] == 'g') and (magic[2:9] == '__GLYPH'):
|
||||
skip = self.fo.read(3)
|
||||
first_token = 'info'
|
||||
else :
|
||||
# other0.dat file
|
||||
first_token = None
|
||||
self.fo.seek(-9,1)
|
||||
|
||||
|
||||
# main loop to read and build the document tree
|
||||
while True:
|
||||
|
||||
if first_token != None :
|
||||
# use "inserted" first token 'info' for page and glyph files
|
||||
tag = self.procToken(first_token)
|
||||
if len(tag) > 0 :
|
||||
self.doc.append(tag)
|
||||
first_token = None
|
||||
|
||||
v = self.getNext()
|
||||
if (v == None):
|
||||
break
|
||||
|
||||
if (v == 0x72):
|
||||
self.doLoop72('number')
|
||||
elif (v > 0) and (v < self.dict.getSize()) :
|
||||
tag = self.procToken(self.dict.lookup(v))
|
||||
if len(tag) > 0 :
|
||||
self.doc.append(tag)
|
||||
else:
|
||||
if self.debug:
|
||||
print "Main Loop: Unknown value: %x" % v
|
||||
if (v == 0):
|
||||
if (self.peek(1) == 0x5f):
|
||||
skip = self.fo.read(1)
|
||||
first_token = 'info'
|
||||
|
||||
# now do snippet injection
|
||||
if len(self.snippetList) > 0 :
|
||||
if self.debug : print 'Injecting Snippets:'
|
||||
snippet = self.injectSnippets(self.snippetList[0])
|
||||
snipno = snippet[0]
|
||||
tag_add = snippet[1]
|
||||
if self.debug : print self.formatTag(tag_add)
|
||||
if len(tag_add) > 0:
|
||||
self.doc.append(tag_add)
|
||||
|
||||
# handle generation of xml output
|
||||
xmlpage = self.formatDoc(self.flat_xml)
|
||||
|
||||
return xmlpage
|
||||
|
||||
|
||||
def fromData(dict, fname):
|
||||
flat_xml = True
|
||||
debug = False
|
||||
pp = PageParser(fname, dict, debug, flat_xml)
|
||||
xmlpage = pp.process()
|
||||
return xmlpage
|
||||
|
||||
def getXML(dict, fname):
|
||||
flat_xml = False
|
||||
debug = False
|
||||
pp = PageParser(fname, dict, debug, flat_xml)
|
||||
xmlpage = pp.process()
|
||||
return xmlpage
|
||||
|
||||
def usage():
|
||||
print 'Usage: '
|
||||
print ' convert2xml.py dict0000.dat infile.dat '
|
||||
print ' '
|
||||
print ' Options:'
|
||||
print ' -h print this usage help message '
|
||||
print ' -d turn on debug output to check for potential errors '
|
||||
print ' --flat-xml output the flattened xml page description only '
|
||||
print ' '
|
||||
print ' This program will attempt to convert a page*.dat file or '
|
||||
print ' glyphs*.dat file, using the dict0000.dat file, to its xml description. '
|
||||
print ' '
|
||||
print ' Use "cmbtc_dump.py" first to unencrypt, uncompress, and dump '
|
||||
print ' the *.dat files from a Topaz format e-book.'
|
||||
|
||||
#
|
||||
# Main
|
||||
#
|
||||
|
||||
def main(argv):
|
||||
dictFile = ""
|
||||
pageFile = ""
|
||||
debug = False
|
||||
flat_xml = False
|
||||
printOutput = False
|
||||
if len(argv) == 0:
|
||||
printOutput = True
|
||||
argv = sys.argv
|
||||
|
||||
try:
|
||||
opts, args = getopt.getopt(argv[1:], "hd", ["flat-xml"])
|
||||
|
||||
except getopt.GetoptError, err:
|
||||
|
||||
# print help information and exit:
|
||||
print str(err) # will print something like "option -a not recognized"
|
||||
usage()
|
||||
sys.exit(2)
|
||||
|
||||
if len(opts) == 0 and len(args) == 0 :
|
||||
usage()
|
||||
sys.exit(2)
|
||||
|
||||
for o, a in opts:
|
||||
if o =="-d":
|
||||
debug=True
|
||||
if o =="-h":
|
||||
usage()
|
||||
sys.exit(0)
|
||||
if o =="--flat-xml":
|
||||
flat_xml = True
|
||||
|
||||
dictFile, pageFile = args[0], args[1]
|
||||
|
||||
# read in the string table dictionary
|
||||
dict = Dictionary(dictFile)
|
||||
# dict.dumpDict()
|
||||
|
||||
# create a page parser
|
||||
pp = PageParser(pageFile, dict, debug, flat_xml)
|
||||
|
||||
xmlpage = pp.process()
|
||||
|
||||
if printOutput:
|
||||
print xmlpage
|
||||
return 0
|
||||
|
||||
return xmlpage
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main(''))
|
||||
@@ -0,0 +1,706 @@
|
||||
#! /usr/bin/python
|
||||
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
|
||||
# For use with Topaz Scripts Version 2.6
|
||||
|
||||
import sys
|
||||
import csv
|
||||
import os
|
||||
import math
|
||||
import getopt
|
||||
from struct import pack
|
||||
from struct import unpack
|
||||
|
||||
|
||||
class DocParser(object):
|
||||
def __init__(self, flatxml, classlst, fileid, bookDir, gdict, fixedimage):
|
||||
self.id = os.path.basename(fileid).replace('.dat','')
|
||||
self.svgcount = 0
|
||||
self.docList = flatxml.split('\n')
|
||||
self.docSize = len(self.docList)
|
||||
self.classList = {}
|
||||
self.bookDir = bookDir
|
||||
self.gdict = gdict
|
||||
tmpList = classlst.split('\n')
|
||||
for pclass in tmpList:
|
||||
if pclass != '':
|
||||
# remove the leading period from the css name
|
||||
cname = pclass[1:]
|
||||
self.classList[cname] = True
|
||||
self.fixedimage = fixedimage
|
||||
self.ocrtext = []
|
||||
self.link_id = []
|
||||
self.link_title = []
|
||||
self.link_page = []
|
||||
self.link_href = []
|
||||
self.link_type = []
|
||||
self.dehyphen_rootid = []
|
||||
self.paracont_stemid = []
|
||||
self.parastems_stemid = []
|
||||
|
||||
|
||||
def getGlyph(self, gid):
|
||||
result = ''
|
||||
id='id="gl%d"' % gid
|
||||
return self.gdict.lookup(id)
|
||||
|
||||
def glyphs_to_image(self, glyphList):
|
||||
|
||||
def extract(path, key):
|
||||
b = path.find(key) + len(key)
|
||||
e = path.find(' ',b)
|
||||
return int(path[b:e])
|
||||
|
||||
svgDir = os.path.join(self.bookDir,'svg')
|
||||
|
||||
imgDir = os.path.join(self.bookDir,'img')
|
||||
imgname = self.id + '_%04d.svg' % self.svgcount
|
||||
imgfile = os.path.join(imgDir,imgname)
|
||||
|
||||
# get glyph information
|
||||
gxList = self.getData('info.glyph.x',0,-1)
|
||||
gyList = self.getData('info.glyph.y',0,-1)
|
||||
gidList = self.getData('info.glyph.glyphID',0,-1)
|
||||
|
||||
gids = []
|
||||
maxws = []
|
||||
maxhs = []
|
||||
xs = []
|
||||
ys = []
|
||||
gdefs = []
|
||||
|
||||
# get path defintions, positions, dimensions for ecah glyph
|
||||
# that makes up the image, and find min x and min y to reposition origin
|
||||
minx = -1
|
||||
miny = -1
|
||||
for j in glyphList:
|
||||
gid = gidList[j]
|
||||
gids.append(gid)
|
||||
|
||||
xs.append(gxList[j])
|
||||
if minx == -1: minx = gxList[j]
|
||||
else : minx = min(minx, gxList[j])
|
||||
|
||||
ys.append(gyList[j])
|
||||
if miny == -1: miny = gyList[j]
|
||||
else : miny = min(miny, gyList[j])
|
||||
|
||||
path = self.getGlyph(gid)
|
||||
gdefs.append(path)
|
||||
|
||||
maxws.append(extract(path,'width='))
|
||||
maxhs.append(extract(path,'height='))
|
||||
|
||||
|
||||
# change the origin to minx, miny and calc max height and width
|
||||
maxw = maxws[0] + xs[0] - minx
|
||||
maxh = maxhs[0] + ys[0] - miny
|
||||
for j in xrange(0, len(xs)):
|
||||
xs[j] = xs[j] - minx
|
||||
ys[j] = ys[j] - miny
|
||||
maxw = max( maxw, (maxws[j] + xs[j]) )
|
||||
maxh = max( maxh, (maxhs[j] + ys[j]) )
|
||||
|
||||
# open the image file for output
|
||||
ifile = open(imgfile,'w')
|
||||
ifile.write('<?xml version="1.0" standalone="no"?>\n')
|
||||
ifile.write('<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n')
|
||||
ifile.write('<svg width="%dpx" height="%dpx" viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1">\n' % (math.floor(maxw/10), math.floor(maxh/10), maxw, maxh))
|
||||
ifile.write('<defs>\n')
|
||||
for j in xrange(0,len(gdefs)):
|
||||
ifile.write(gdefs[j])
|
||||
ifile.write('</defs>\n')
|
||||
for j in xrange(0,len(gids)):
|
||||
ifile.write('<use xlink:href="#gl%d" x="%d" y="%d" />\n' % (gids[j], xs[j], ys[j]))
|
||||
ifile.write('</svg>')
|
||||
ifile.close()
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
|
||||
# return tag at line pos in document
|
||||
def lineinDoc(self, pos) :
|
||||
if (pos >= 0) and (pos < self.docSize) :
|
||||
item = self.docList[pos]
|
||||
if item.find('=') >= 0:
|
||||
(name, argres) = item.split('=',1)
|
||||
else :
|
||||
name = item
|
||||
argres = ''
|
||||
return name, argres
|
||||
|
||||
|
||||
# find tag in doc if within pos to end inclusive
|
||||
def findinDoc(self, tagpath, pos, end) :
|
||||
result = None
|
||||
if end == -1 :
|
||||
end = self.docSize
|
||||
else:
|
||||
end = min(self.docSize, end)
|
||||
foundat = -1
|
||||
for j in xrange(pos, end):
|
||||
item = self.docList[j]
|
||||
if item.find('=') >= 0:
|
||||
(name, argres) = item.split('=',1)
|
||||
else :
|
||||
name = item
|
||||
argres = ''
|
||||
if name.endswith(tagpath) :
|
||||
result = argres
|
||||
foundat = j
|
||||
break
|
||||
return foundat, result
|
||||
|
||||
|
||||
# return list of start positions for the tagpath
|
||||
def posinDoc(self, tagpath):
|
||||
startpos = []
|
||||
pos = 0
|
||||
res = ""
|
||||
while res != None :
|
||||
(foundpos, res) = self.findinDoc(tagpath, pos, -1)
|
||||
if res != None :
|
||||
startpos.append(foundpos)
|
||||
pos = foundpos + 1
|
||||
return startpos
|
||||
|
||||
|
||||
# returns a vector of integers for the tagpath
|
||||
def getData(self, tagpath, pos, end):
|
||||
argres=[]
|
||||
(foundat, argt) = self.findinDoc(tagpath, pos, end)
|
||||
if (argt != None) and (len(argt) > 0) :
|
||||
argList = argt.split('|')
|
||||
argres = [ int(strval) for strval in argList]
|
||||
return argres
|
||||
|
||||
|
||||
# get the class
|
||||
def getClass(self, pclass):
|
||||
nclass = pclass
|
||||
|
||||
# class names are an issue given topaz may start them with numerals (not allowed),
|
||||
# use a mix of cases (which cause some browsers problems), and actually
|
||||
# attach numbers after "_reclustered*" to the end to deal classeses that inherit
|
||||
# from a base class (but then not actually provide all of these _reclustereed
|
||||
# classes in the stylesheet!
|
||||
|
||||
# so we clean this up by lowercasing, prepend 'cl-', and getting any baseclass
|
||||
# that exists in the stylesheet first, and then adding this specific class
|
||||
# after
|
||||
|
||||
# also some class names have spaces in them so need to convert to dashes
|
||||
if nclass != None :
|
||||
nclass = nclass.replace(' ','-')
|
||||
classres = ''
|
||||
nclass = nclass.lower()
|
||||
nclass = 'cl-' + nclass
|
||||
baseclass = ''
|
||||
# graphic is the base class for captions
|
||||
if nclass.find('cl-cap-') >=0 :
|
||||
classres = 'graphic' + ' '
|
||||
else :
|
||||
# strip to find baseclass
|
||||
p = nclass.find('_')
|
||||
if p > 0 :
|
||||
baseclass = nclass[0:p]
|
||||
if baseclass in self.classList:
|
||||
classres += baseclass + ' '
|
||||
classres += nclass
|
||||
nclass = classres
|
||||
return nclass
|
||||
|
||||
|
||||
# develop a sorted description of the starting positions of
|
||||
# groups and regions on the page, as well as the page type
|
||||
def PageDescription(self):
|
||||
|
||||
def compare(x, y):
|
||||
(xtype, xval) = x
|
||||
(ytype, yval) = y
|
||||
if xval > yval:
|
||||
return 1
|
||||
if xval == yval:
|
||||
return 0
|
||||
return -1
|
||||
|
||||
result = []
|
||||
(pos, pagetype) = self.findinDoc('page.type',0,-1)
|
||||
|
||||
groupList = self.posinDoc('page.group')
|
||||
groupregionList = self.posinDoc('page.group.region')
|
||||
pageregionList = self.posinDoc('page.region')
|
||||
# integrate into one list
|
||||
for j in groupList:
|
||||
result.append(('grpbeg',j))
|
||||
for j in groupregionList:
|
||||
result.append(('gregion',j))
|
||||
for j in pageregionList:
|
||||
result.append(('pregion',j))
|
||||
result.sort(compare)
|
||||
|
||||
# insert group end and page end indicators
|
||||
inGroup = False
|
||||
j = 0
|
||||
while True:
|
||||
if j == len(result): break
|
||||
rtype = result[j][0]
|
||||
rval = result[j][1]
|
||||
if not inGroup and (rtype == 'grpbeg') :
|
||||
inGroup = True
|
||||
j = j + 1
|
||||
elif inGroup and (rtype in ('grpbeg', 'pregion')):
|
||||
result.insert(j,('grpend',rval))
|
||||
inGroup = False
|
||||
else:
|
||||
j = j + 1
|
||||
if inGroup:
|
||||
result.append(('grpend',-1))
|
||||
result.append(('pageend', -1))
|
||||
return pagetype, result
|
||||
|
||||
|
||||
|
||||
# build a description of the paragraph
|
||||
def getParaDescription(self, start, end, regtype):
|
||||
|
||||
result = []
|
||||
|
||||
# paragraph
|
||||
(pos, pclass) = self.findinDoc('paragraph.class',start,end)
|
||||
|
||||
pclass = self.getClass(pclass)
|
||||
|
||||
# build up a description of the paragraph in result and return it
|
||||
# first check for the basic - all words paragraph
|
||||
(pos, sfirst) = self.findinDoc('paragraph.firstWord',start,end)
|
||||
(pos, slast) = self.findinDoc('paragraph.lastWord',start,end)
|
||||
if (sfirst != None) and (slast != None) :
|
||||
first = int(sfirst)
|
||||
last = int(slast)
|
||||
|
||||
makeImage = (regtype == 'vertical') or (regtype == 'table')
|
||||
if self.fixedimage:
|
||||
makeImage = makeImage or (regtype == 'fixed')
|
||||
|
||||
if (pclass != None):
|
||||
makeImage = makeImage or (pclass.find('.inverted') >= 0)
|
||||
if self.fixedimage :
|
||||
makeImage = makeImage or (pclass.find('cl-f-') >= 0)
|
||||
|
||||
if not makeImage :
|
||||
# standard all word paragraph
|
||||
for wordnum in xrange(first, last):
|
||||
result.append(('ocr', wordnum))
|
||||
return pclass, result
|
||||
|
||||
# convert paragraph to svg image
|
||||
# translate first and last word into first and last glyphs
|
||||
# and generate inline image and include it
|
||||
glyphList = []
|
||||
firstglyphList = self.getData('word.firstGlyph',0,-1)
|
||||
gidList = self.getData('info.glyph.glyphID',0,-1)
|
||||
firstGlyph = firstglyphList[first]
|
||||
if last < len(firstglyphList):
|
||||
lastGlyph = firstglyphList[last]
|
||||
else :
|
||||
lastGlyph = len(gidList)
|
||||
for glyphnum in xrange(firstGlyph, lastGlyph):
|
||||
glyphList.append(glyphnum)
|
||||
# include any extratokens if they exist
|
||||
(pos, sfg) = self.findinDoc('extratokens.firstGlyph',start,end)
|
||||
(pos, slg) = self.findinDoc('extratokens.lastGlyph',start,end)
|
||||
if (sfg != None) and (slg != None):
|
||||
for glyphnum in xrange(int(sfg), int(slg)):
|
||||
glyphList.append(glyphnum)
|
||||
num = self.svgcount
|
||||
self.glyphs_to_image(glyphList)
|
||||
self.svgcount += 1
|
||||
result.append(('svg', num))
|
||||
return pclass, result
|
||||
|
||||
# this type of paragraph may be made up of multiple spans, inline
|
||||
# word monograms (images), and words with semantic meaning,
|
||||
# plus glyphs used to form starting letter of first word
|
||||
|
||||
# need to parse this type line by line
|
||||
line = start + 1
|
||||
word_class = ''
|
||||
|
||||
# if end is -1 then we must search to end of document
|
||||
if end == -1 :
|
||||
end = self.docSize
|
||||
|
||||
# seems some xml has last* coming before first* so we have to
|
||||
# handle any order
|
||||
sp_first = -1
|
||||
sp_last = -1
|
||||
|
||||
gl_first = -1
|
||||
gl_last = -1
|
||||
|
||||
ws_first = -1
|
||||
ws_last = -1
|
||||
|
||||
word_class = ''
|
||||
|
||||
while (line < end) :
|
||||
|
||||
(name, argres) = self.lineinDoc(line)
|
||||
|
||||
if name.endswith('span.firstWord') :
|
||||
sp_first = int(argres)
|
||||
|
||||
elif name.endswith('span.lastWord') :
|
||||
sp_last = int(argres)
|
||||
|
||||
elif name.endswith('word.firstGlyph') :
|
||||
gl_first = int(argres)
|
||||
|
||||
elif name.endswith('word.lastGlyph') :
|
||||
gl_last = int(argres)
|
||||
|
||||
elif name.endswith('word_semantic.firstWord'):
|
||||
ws_first = int(argres)
|
||||
|
||||
elif name.endswith('word_semantic.lastWord'):
|
||||
ws_last = int(argres)
|
||||
|
||||
elif name.endswith('word.class'):
|
||||
(cname, space) = argres.split('-',1)
|
||||
if space == '' : space = '0'
|
||||
if (cname == 'spaceafter') and (int(space) > 0) :
|
||||
word_class = 'sa'
|
||||
|
||||
elif name.endswith('word.img.src'):
|
||||
result.append(('img' + word_class, int(argres)))
|
||||
word_class = ''
|
||||
|
||||
elif name.endswith('region.img.src'):
|
||||
result.append(('img' + word_class, int(argres)))
|
||||
|
||||
if (sp_first != -1) and (sp_last != -1):
|
||||
for wordnum in xrange(sp_first, sp_last):
|
||||
result.append(('ocr', wordnum))
|
||||
sp_first = -1
|
||||
sp_last = -1
|
||||
|
||||
if (gl_first != -1) and (gl_last != -1):
|
||||
glyphList = []
|
||||
for glyphnum in xrange(gl_first, gl_last):
|
||||
glyphList.append(glyphnum)
|
||||
num = self.svgcount
|
||||
self.glyphs_to_image(glyphList)
|
||||
self.svgcount += 1
|
||||
result.append(('svg', num))
|
||||
gl_first = -1
|
||||
gl_last = -1
|
||||
|
||||
if (ws_first != -1) and (ws_last != -1):
|
||||
for wordnum in xrange(ws_first, ws_last):
|
||||
result.append(('ocr', wordnum))
|
||||
ws_first = -1
|
||||
ws_last = -1
|
||||
|
||||
line += 1
|
||||
|
||||
return pclass, result
|
||||
|
||||
|
||||
def buildParagraph(self, pclass, pdesc, type, regtype) :
|
||||
parares = ''
|
||||
sep =''
|
||||
|
||||
classres = ''
|
||||
if pclass :
|
||||
classres = ' class="' + pclass + '"'
|
||||
|
||||
br_lb = (regtype == 'fixed') or (regtype == 'chapterheading') or (regtype == 'vertical')
|
||||
|
||||
handle_links = len(self.link_id) > 0
|
||||
|
||||
if (type == 'full') or (type == 'begin') :
|
||||
parares += '<p' + classres + '>'
|
||||
|
||||
if (type == 'end'):
|
||||
parares += ' '
|
||||
|
||||
lstart = len(parares)
|
||||
|
||||
cnt = len(pdesc)
|
||||
|
||||
for j in xrange( 0, cnt) :
|
||||
|
||||
(wtype, num) = pdesc[j]
|
||||
|
||||
if wtype == 'ocr' :
|
||||
word = self.ocrtext[num]
|
||||
sep = ' '
|
||||
|
||||
if handle_links:
|
||||
link = self.link_id[num]
|
||||
if (link > 0):
|
||||
linktype = self.link_type[link-1]
|
||||
title = self.link_title[link-1]
|
||||
if (title == "") or (parares.rfind(title) < 0):
|
||||
title=parares[lstart:]
|
||||
if linktype == 'external' :
|
||||
linkhref = self.link_href[link-1]
|
||||
linkhtml = '<a href="%s">' % linkhref
|
||||
else :
|
||||
if len(self.link_page) >= link :
|
||||
ptarget = self.link_page[link-1] - 1
|
||||
linkhtml = '<a href="#page%04d">' % ptarget
|
||||
else :
|
||||
# just link to the current page
|
||||
linkhtml = '<a href="#' + self.id + '">'
|
||||
linkhtml += title + '</a>'
|
||||
pos = parares.rfind(title)
|
||||
if pos >= 0:
|
||||
parares = parares[0:pos] + linkhtml + parares[pos+len(title):]
|
||||
else :
|
||||
parares += linkhtml
|
||||
lstart = len(parares)
|
||||
if word == '_link_' : word = ''
|
||||
elif (link < 0) :
|
||||
if word == '_link_' : word = ''
|
||||
|
||||
if word == '_lb_':
|
||||
if ((num-1) in self.dehyphen_rootid ) or handle_links:
|
||||
word = ''
|
||||
sep = ''
|
||||
elif br_lb :
|
||||
word = '<br />\n'
|
||||
sep = ''
|
||||
else :
|
||||
word = '\n'
|
||||
sep = ''
|
||||
|
||||
if num in self.dehyphen_rootid :
|
||||
word = word[0:-1]
|
||||
sep = ''
|
||||
|
||||
parares += word + sep
|
||||
|
||||
elif wtype == 'img' :
|
||||
sep = ''
|
||||
parares += '<img src="img/img%04d.jpg" alt="" />' % num
|
||||
parares += sep
|
||||
|
||||
elif wtype == 'imgsa' :
|
||||
sep = ' '
|
||||
parares += '<img src="img/img%04d.jpg" alt="" />' % num
|
||||
parares += sep
|
||||
|
||||
elif wtype == 'svg' :
|
||||
sep = ''
|
||||
parares += '<img src="img/' + self.id + '_%04d.svg" alt="" />' % num
|
||||
parares += sep
|
||||
|
||||
if len(sep) > 0 : parares = parares[0:-1]
|
||||
if (type == 'full') or (type == 'end') :
|
||||
parares += '</p>'
|
||||
return parares
|
||||
|
||||
|
||||
|
||||
# walk the document tree collecting the information needed
|
||||
# to build an html page using the ocrText
|
||||
|
||||
def process(self):
|
||||
|
||||
htmlpage = ''
|
||||
|
||||
# get the ocr text
|
||||
(pos, argres) = self.findinDoc('info.word.ocrText',0,-1)
|
||||
if argres : self.ocrtext = argres.split('|')
|
||||
|
||||
# get information to dehyphenate the text
|
||||
self.dehyphen_rootid = self.getData('info.dehyphen.rootID',0,-1)
|
||||
|
||||
# determine if first paragraph is continued from previous page
|
||||
(pos, self.parastems_stemid) = self.findinDoc('info.paraStems.stemID',0,-1)
|
||||
first_para_continued = (self.parastems_stemid != None)
|
||||
|
||||
# determine if last paragraph is continued onto the next page
|
||||
(pos, self.paracont_stemid) = self.findinDoc('info.paraCont.stemID',0,-1)
|
||||
last_para_continued = (self.paracont_stemid != None)
|
||||
|
||||
# collect link ids
|
||||
self.link_id = self.getData('info.word.link_id',0,-1)
|
||||
|
||||
# collect link destination page numbers
|
||||
self.link_page = self.getData('info.links.page',0,-1)
|
||||
|
||||
# collect link types (container versus external)
|
||||
(pos, argres) = self.findinDoc('info.links.type',0,-1)
|
||||
if argres : self.link_type = argres.split('|')
|
||||
|
||||
# collect link destinations
|
||||
(pos, argres) = self.findinDoc('info.links.href',0,-1)
|
||||
if argres : self.link_href = argres.split('|')
|
||||
|
||||
# collect link titles
|
||||
(pos, argres) = self.findinDoc('info.links.title',0,-1)
|
||||
if argres :
|
||||
self.link_title = argres.split('|')
|
||||
else:
|
||||
self.link_title.append('')
|
||||
|
||||
# get a descriptions of the starting points of the regions
|
||||
# and groups on the page
|
||||
(pagetype, pageDesc) = self.PageDescription()
|
||||
regcnt = len(pageDesc) - 1
|
||||
|
||||
anchorSet = False
|
||||
breakSet = False
|
||||
inGroup = False
|
||||
|
||||
# process each region on the page and convert what you can to html
|
||||
|
||||
for j in xrange(regcnt):
|
||||
|
||||
(etype, start) = pageDesc[j]
|
||||
(ntype, end) = pageDesc[j+1]
|
||||
|
||||
|
||||
# set anchor for link target on this page
|
||||
if not anchorSet and not first_para_continued:
|
||||
htmlpage += '<div style="visibility: hidden; height: 0; width: 0;" id="'
|
||||
htmlpage += self.id + '" title="pagetype_' + pagetype + '"></div>\n'
|
||||
anchorSet = True
|
||||
|
||||
# handle groups of graphics with text captions
|
||||
if (etype == 'grpbeg'):
|
||||
(pos, grptype) = self.findinDoc('group.type', start, end)
|
||||
if grptype != None:
|
||||
if grptype == 'graphic':
|
||||
gcstr = ' class="' + grptype + '"'
|
||||
htmlpage += '<div' + gcstr + '>'
|
||||
inGroup = True
|
||||
|
||||
elif (etype == 'grpend'):
|
||||
if inGroup:
|
||||
htmlpage += '</div>\n'
|
||||
inGroup = False
|
||||
|
||||
else:
|
||||
(pos, regtype) = self.findinDoc('region.type',start,end)
|
||||
|
||||
if regtype == 'graphic' :
|
||||
(pos, simgsrc) = self.findinDoc('img.src',start,end)
|
||||
if simgsrc:
|
||||
if inGroup:
|
||||
htmlpage += '<img src="img/img%04d.jpg" alt="" />' % int(simgsrc)
|
||||
else:
|
||||
htmlpage += '<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc)
|
||||
|
||||
elif regtype == 'chapterheading' :
|
||||
(pclass, pdesc) = self.getParaDescription(start,end, regtype)
|
||||
if not breakSet:
|
||||
htmlpage += '<div style="page-break-after: always;"> </div>\n'
|
||||
breakSet = True
|
||||
tag = 'h1'
|
||||
if pclass and (len(pclass) >= 7):
|
||||
if pclass[3:7] == 'ch1-' : tag = 'h1'
|
||||
if pclass[3:7] == 'ch2-' : tag = 'h2'
|
||||
if pclass[3:7] == 'ch3-' : tag = 'h3'
|
||||
htmlpage += '<' + tag + ' class="' + pclass + '">'
|
||||
else:
|
||||
htmlpage += '<' + tag + '>'
|
||||
htmlpage += self.buildParagraph(pclass, pdesc, 'middle', regtype)
|
||||
htmlpage += '</' + tag + '>'
|
||||
|
||||
elif (regtype == 'text') or (regtype == 'fixed') or (regtype == 'insert') or (regtype == 'listitem'):
|
||||
ptype = 'full'
|
||||
# check to see if this is a continution from the previous page
|
||||
if first_para_continued :
|
||||
ptype = 'end'
|
||||
first_para_continued = False
|
||||
(pclass, pdesc) = self.getParaDescription(start,end, regtype)
|
||||
if pclass and (len(pclass) >= 6) and (ptype == 'full'):
|
||||
tag = 'p'
|
||||
if pclass[3:6] == 'h1-' : tag = 'h4'
|
||||
if pclass[3:6] == 'h2-' : tag = 'h5'
|
||||
if pclass[3:6] == 'h3-' : tag = 'h6'
|
||||
htmlpage += '<' + tag + ' class="' + pclass + '">'
|
||||
htmlpage += self.buildParagraph(pclass, pdesc, 'middle', regtype)
|
||||
htmlpage += '</' + tag + '>'
|
||||
else :
|
||||
htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
|
||||
|
||||
elif (regtype == 'tocentry') :
|
||||
ptype = 'full'
|
||||
if first_para_continued :
|
||||
ptype = 'end'
|
||||
first_para_continued = False
|
||||
(pclass, pdesc) = self.getParaDescription(start,end, regtype)
|
||||
htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
|
||||
|
||||
|
||||
elif (regtype == 'vertical') or (regtype == 'table') :
|
||||
ptype = 'full'
|
||||
if inGroup:
|
||||
ptype = 'middle'
|
||||
if first_para_continued :
|
||||
ptype = 'end'
|
||||
first_para_continued = False
|
||||
(pclass, pdesc) = self.getParaDescription(start, end, regtype)
|
||||
htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
|
||||
|
||||
|
||||
elif (regtype == 'synth_fcvr.center'):
|
||||
(pos, simgsrc) = self.findinDoc('img.src',start,end)
|
||||
if simgsrc:
|
||||
htmlpage += '<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc)
|
||||
|
||||
else :
|
||||
print ' Making region type', regtype,
|
||||
(pos, temp) = self.findinDoc('paragraph',start,end)
|
||||
(pos2, temp) = self.findinDoc('span',start,end)
|
||||
if pos != -1 or pos2 != -1:
|
||||
print ' a "text" region'
|
||||
orig_regtype = regtype
|
||||
regtype = 'fixed'
|
||||
ptype = 'full'
|
||||
# check to see if this is a continution from the previous page
|
||||
if first_para_continued :
|
||||
ptype = 'end'
|
||||
first_para_continued = False
|
||||
(pclass, pdesc) = self.getParaDescription(start,end, regtype)
|
||||
if not pclass:
|
||||
if orig_regtype.endswith('.right') : pclass = 'cl-right'
|
||||
elif orig_regtype.endswith('.center') : pclass = 'cl-center'
|
||||
elif orig_regtype.endswith('.left') : pclass = 'cl-left'
|
||||
elif orig_regtype.endswith('.justify') : pclass = 'cl-justify'
|
||||
if pclass and (ptype == 'full') and (len(pclass) >= 6):
|
||||
tag = 'p'
|
||||
if pclass[3:6] == 'h1-' : tag = 'h4'
|
||||
if pclass[3:6] == 'h2-' : tag = 'h5'
|
||||
if pclass[3:6] == 'h3-' : tag = 'h6'
|
||||
htmlpage += '<' + tag + ' class="' + pclass + '">'
|
||||
htmlpage += self.buildParagraph(pclass, pdesc, 'middle', regtype)
|
||||
htmlpage += '</' + tag + '>'
|
||||
else :
|
||||
htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
|
||||
else :
|
||||
print ' a "graphic" region'
|
||||
(pos, simgsrc) = self.findinDoc('img.src',start,end)
|
||||
if simgsrc:
|
||||
htmlpage += '<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc)
|
||||
|
||||
|
||||
if last_para_continued :
|
||||
if htmlpage[-4:] == '</p>':
|
||||
htmlpage = htmlpage[0:-4]
|
||||
last_para_continued = False
|
||||
|
||||
return htmlpage
|
||||
|
||||
|
||||
|
||||
def convert2HTML(flatxml, classlst, fileid, bookDir, gdict, fixedimage):
|
||||
# create a document parser
|
||||
dp = DocParser(flatxml, classlst, fileid, bookDir, gdict, fixedimage)
|
||||
htmlpage = dp.process()
|
||||
return htmlpage
|
||||
@@ -0,0 +1,151 @@
|
||||
#! /usr/bin/python
|
||||
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
|
||||
|
||||
import sys
|
||||
import csv
|
||||
import os
|
||||
import getopt
|
||||
from struct import pack
|
||||
from struct import unpack
|
||||
|
||||
|
||||
class PParser(object):
|
||||
def __init__(self, gd, flatxml):
|
||||
self.gd = gd
|
||||
self.flatdoc = flatxml.split('\n')
|
||||
self.temp = []
|
||||
foo = self.getData('page.h') or self.getData('book.h')
|
||||
self.ph = foo[0]
|
||||
foo = self.getData('page.w') or self.getData('book.w')
|
||||
self.pw = foo[0]
|
||||
self.gx = self.getData('info.glyph.x')
|
||||
self.gy = self.getData('info.glyph.y')
|
||||
self.gid = self.getData('info.glyph.glyphID')
|
||||
def getData(self, path):
|
||||
result = None
|
||||
cnt = len(self.flatdoc)
|
||||
for j in xrange(cnt):
|
||||
item = self.flatdoc[j]
|
||||
if item.find('=') >= 0:
|
||||
(name, argt) = item.split('=')
|
||||
argres = argt.split('|')
|
||||
else:
|
||||
name = item
|
||||
argres = []
|
||||
if (name.endswith(path)):
|
||||
result = argres
|
||||
break
|
||||
if (len(argres) > 0) :
|
||||
for j in xrange(0,len(argres)):
|
||||
argres[j] = int(argres[j])
|
||||
return result
|
||||
def getDataTemp(self, path):
|
||||
result = None
|
||||
cnt = len(self.temp)
|
||||
for j in xrange(cnt):
|
||||
item = self.temp[j]
|
||||
if item.find('=') >= 0:
|
||||
(name, argt) = item.split('=')
|
||||
argres = argt.split('|')
|
||||
else:
|
||||
name = item
|
||||
argres = []
|
||||
if (name.endswith(path)):
|
||||
result = argres
|
||||
self.temp.pop(j)
|
||||
break
|
||||
if (len(argres) > 0) :
|
||||
for j in xrange(0,len(argres)):
|
||||
argres[j] = int(argres[j])
|
||||
return result
|
||||
def getImages(self):
|
||||
result = []
|
||||
self.temp = self.flatdoc
|
||||
while (self.getDataTemp('img') != None):
|
||||
h = self.getDataTemp('img.h')[0]
|
||||
w = self.getDataTemp('img.w')[0]
|
||||
x = self.getDataTemp('img.x')[0]
|
||||
y = self.getDataTemp('img.y')[0]
|
||||
src = self.getDataTemp('img.src')[0]
|
||||
result.append('<image xlink:href="../img/img%04d.jpg" x="%d" y="%d" width="%d" height="%d" />\n' % (src, x, y, w, h))
|
||||
return result
|
||||
def getGlyphs(self):
|
||||
result = []
|
||||
if (self.gid != None) and (len(self.gid) > 0):
|
||||
glyphs = []
|
||||
for j in set(self.gid):
|
||||
glyphs.append(j)
|
||||
glyphs.sort()
|
||||
for gid in glyphs:
|
||||
id='id="gl%d"' % gid
|
||||
path = self.gd.lookup(id)
|
||||
if path:
|
||||
result.append(id + ' ' + path)
|
||||
return result
|
||||
|
||||
|
||||
def convert2SVG(gdict, flat_xml, counter, numfiles, svgDir, raw, meta_array, scaledpi):
|
||||
ml = ''
|
||||
pp = PParser(gdict, flat_xml)
|
||||
ml += '<?xml version="1.0" standalone="no"?>\n'
|
||||
if (raw):
|
||||
ml += '<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n'
|
||||
ml += '<svg width="%fin" height="%fin" viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1">\n' % (pp.pw / scaledpi, pp.ph / scaledpi, pp.pw -1, pp.ph -1)
|
||||
ml += '<title>Page %d - %s by %s</title>\n' % (counter, meta_array['Title'],meta_array['Authors'])
|
||||
else:
|
||||
ml += '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n'
|
||||
ml += '<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" ><head>\n'
|
||||
ml += '<title>Page %d - %s by %s</title>\n' % (counter, meta_array['Title'],meta_array['Authors'])
|
||||
ml += '<script><![CDATA[\n'
|
||||
ml += 'function gd(){var p=window.location.href.replace(/^.*\?dpi=(\d+).*$/i,"$1");return p;}\n'
|
||||
ml += 'var dpi=%d;\n' % scaledpi
|
||||
if (counter) :
|
||||
ml += 'var prevpage="page%04d.xhtml";\n' % (counter - 1)
|
||||
if (counter < numfiles-1) :
|
||||
ml += 'var nextpage="page%04d.xhtml";\n' % (counter + 1)
|
||||
ml += 'var pw=%d;var ph=%d;' % (pp.pw, pp.ph)
|
||||
ml += 'function zoomin(){dpi=dpi*(0.8);setsize();}\n'
|
||||
ml += 'function zoomout(){dpi=dpi*1.25;setsize();}\n'
|
||||
ml += 'function setsize(){var svg=document.getElementById("svgimg");var prev=document.getElementById("prevsvg");var next=document.getElementById("nextsvg");var width=(pw/dpi)+"in";var height=(ph/dpi)+"in";svg.setAttribute("width",width);svg.setAttribute("height",height);prev.setAttribute("height",height);prev.setAttribute("width","50px");next.setAttribute("height",height);next.setAttribute("width","50px");}\n'
|
||||
ml += 'function ppage(){window.location.href=prevpage+"?dpi="+Math.round(dpi);}\n'
|
||||
ml += 'function npage(){window.location.href=nextpage+"?dpi="+Math.round(dpi);}\n'
|
||||
ml += 'var gt=gd();if(gt>0){dpi=gt;}\n'
|
||||
ml += 'window.onload=setsize;\n'
|
||||
ml += ']]></script>\n'
|
||||
ml += '</head>\n'
|
||||
ml += '<body onLoad="setsize();" style="background-color:#777;text-align:center;">\n'
|
||||
ml += '<div style="white-space:nowrap;">\n'
|
||||
if (counter == 0) :
|
||||
ml += '<a href="javascript:ppage();"><svg id="prevsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"></svg></a>\n'
|
||||
else:
|
||||
ml += '<a href="javascript:ppage();"><svg id="prevsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"><polygon points="5,150,95,5,95,295" fill="#AAAAAA" /></svg></a>\n'
|
||||
ml += '<a href="javascript:npage();"><svg id="svgimg" viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" style="background-color:#FFF;border:1px solid black;">' % (pp.pw, pp.ph)
|
||||
if (pp.gid != None):
|
||||
ml += '<defs>\n'
|
||||
gdefs = pp.getGlyphs()
|
||||
for j in xrange(0,len(gdefs)):
|
||||
ml += gdefs[j]
|
||||
ml += '</defs>\n'
|
||||
img = pp.getImages()
|
||||
if (img != None):
|
||||
for j in xrange(0,len(img)):
|
||||
ml += img[j]
|
||||
if (pp.gid != None):
|
||||
for j in xrange(0,len(pp.gid)):
|
||||
ml += '<use xlink:href="#gl%d" x="%d" y="%d" />\n' % (pp.gid[j], pp.gx[j], pp.gy[j])
|
||||
if (img == None or len(img) == 0) and (pp.gid == None or len(pp.gid) == 0):
|
||||
ml += '<text x="10" y="10" font-family="Helvetica" font-size="100" stroke="black">This page intentionally left blank.</text>\n<text x="10" y="110" font-family="Helvetica" font-size="50" stroke="black">Until this notice unintentionally gave it content. (gensvg.py)</text>\n'
|
||||
if (raw) :
|
||||
ml += '</svg>'
|
||||
else :
|
||||
ml += '</svg></a>\n'
|
||||
if (counter == numfiles - 1) :
|
||||
ml += '<a href="javascript:npage();"><svg id="nextsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"></svg></a>\n'
|
||||
else :
|
||||
ml += '<a href="javascript:npage();"><svg id="nextsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"><polygon points="5,5,5,295,95,150" fill="#AAAAAA" /></svg></a>\n'
|
||||
ml += '</div>\n'
|
||||
ml += '<div><a href="javascript:zoomin();">zoom in</a> - <a href="javascript:zoomout();">zoom out</a></div>\n'
|
||||
ml += '</body>\n'
|
||||
ml += '</html>\n'
|
||||
return ml
|
||||
|
||||
@@ -0,0 +1,561 @@
|
||||
#! /usr/bin/python
|
||||
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
|
||||
|
||||
class Unbuffered:
|
||||
def __init__(self, stream):
|
||||
self.stream = stream
|
||||
def write(self, data):
|
||||
self.stream.write(data)
|
||||
self.stream.flush()
|
||||
def __getattr__(self, attr):
|
||||
return getattr(self.stream, attr)
|
||||
|
||||
import sys
|
||||
sys.stdout=Unbuffered(sys.stdout)
|
||||
|
||||
import csv
|
||||
import os
|
||||
import getopt
|
||||
from struct import pack
|
||||
from struct import unpack
|
||||
|
||||
|
||||
# local support routines
|
||||
import convert2xml
|
||||
import flatxml2html
|
||||
import flatxml2svg
|
||||
import stylexml2css
|
||||
|
||||
|
||||
# Get a 7 bit encoded number from a file
|
||||
def readEncodedNumber(file):
|
||||
flag = False
|
||||
c = file.read(1)
|
||||
if (len(c) == 0):
|
||||
return None
|
||||
data = ord(c)
|
||||
if data == 0xFF:
|
||||
flag = True
|
||||
c = file.read(1)
|
||||
if (len(c) == 0):
|
||||
return None
|
||||
data = ord(c)
|
||||
if data >= 0x80:
|
||||
datax = (data & 0x7F)
|
||||
while data >= 0x80 :
|
||||
c = file.read(1)
|
||||
if (len(c) == 0):
|
||||
return None
|
||||
data = ord(c)
|
||||
datax = (datax <<7) + (data & 0x7F)
|
||||
data = datax
|
||||
if flag:
|
||||
data = -data
|
||||
return data
|
||||
|
||||
# Get a length prefixed string from the file
|
||||
def lengthPrefixString(data):
|
||||
return encodeNumber(len(data))+data
|
||||
|
||||
def readString(file):
|
||||
stringLength = readEncodedNumber(file)
|
||||
if (stringLength == None):
|
||||
return None
|
||||
sv = file.read(stringLength)
|
||||
if (len(sv) != stringLength):
|
||||
return ""
|
||||
return unpack(str(stringLength)+"s",sv)[0]
|
||||
|
||||
def getMetaArray(metaFile):
|
||||
# parse the meta file
|
||||
result = {}
|
||||
fo = file(metaFile,'rb')
|
||||
size = readEncodedNumber(fo)
|
||||
for i in xrange(size):
|
||||
tag = readString(fo)
|
||||
value = readString(fo)
|
||||
result[tag] = value
|
||||
# print tag, value
|
||||
fo.close()
|
||||
return result
|
||||
|
||||
|
||||
# dictionary of all text strings by index value
|
||||
class Dictionary(object):
|
||||
def __init__(self, dictFile):
|
||||
self.filename = dictFile
|
||||
self.size = 0
|
||||
self.fo = file(dictFile,'rb')
|
||||
self.stable = []
|
||||
self.size = readEncodedNumber(self.fo)
|
||||
for i in xrange(self.size):
|
||||
self.stable.append(self.escapestr(readString(self.fo)))
|
||||
self.pos = 0
|
||||
def escapestr(self, str):
|
||||
str = str.replace('&','&')
|
||||
str = str.replace('<','<')
|
||||
str = str.replace('>','>')
|
||||
str = str.replace('=','=')
|
||||
return str
|
||||
def lookup(self,val):
|
||||
if ((val >= 0) and (val < self.size)) :
|
||||
self.pos = val
|
||||
return self.stable[self.pos]
|
||||
else:
|
||||
print "Error - %d outside of string table limits" % val
|
||||
sys.exit(-1)
|
||||
def getSize(self):
|
||||
return self.size
|
||||
def getPos(self):
|
||||
return self.pos
|
||||
|
||||
|
||||
class PageDimParser(object):
|
||||
def __init__(self, flatxml):
|
||||
self.flatdoc = flatxml.split('\n')
|
||||
# find tag if within pos to end inclusive
|
||||
def findinDoc(self, tagpath, pos, end) :
|
||||
result = None
|
||||
docList = self.flatdoc
|
||||
cnt = len(docList)
|
||||
if end == -1 :
|
||||
end = cnt
|
||||
else:
|
||||
end = min(cnt,end)
|
||||
foundat = -1
|
||||
for j in xrange(pos, end):
|
||||
item = docList[j]
|
||||
if item.find('=') >= 0:
|
||||
(name, argres) = item.split('=')
|
||||
else :
|
||||
name = item
|
||||
argres = ''
|
||||
if name.endswith(tagpath) :
|
||||
result = argres
|
||||
foundat = j
|
||||
break
|
||||
return foundat, result
|
||||
def process(self):
|
||||
(pos, sph) = self.findinDoc('page.h',0,-1)
|
||||
(pos, spw) = self.findinDoc('page.w',0,-1)
|
||||
if (sph == None): sph = '-1'
|
||||
if (spw == None): spw = '-1'
|
||||
return sph, spw
|
||||
|
||||
def getPageDim(flatxml):
|
||||
# create a document parser
|
||||
dp = PageDimParser(flatxml)
|
||||
(ph, pw) = dp.process()
|
||||
return ph, pw
|
||||
|
||||
class GParser(object):
|
||||
def __init__(self, flatxml):
|
||||
self.flatdoc = flatxml.split('\n')
|
||||
self.dpi = 1440
|
||||
self.gh = self.getData('info.glyph.h')
|
||||
self.gw = self.getData('info.glyph.w')
|
||||
self.guse = self.getData('info.glyph.use')
|
||||
if self.guse :
|
||||
self.count = len(self.guse)
|
||||
else :
|
||||
self.count = 0
|
||||
self.gvtx = self.getData('info.glyph.vtx')
|
||||
self.glen = self.getData('info.glyph.len')
|
||||
self.gdpi = self.getData('info.glyph.dpi')
|
||||
self.vx = self.getData('info.vtx.x')
|
||||
self.vy = self.getData('info.vtx.y')
|
||||
self.vlen = self.getData('info.len.n')
|
||||
if self.vlen :
|
||||
self.glen.append(len(self.vlen))
|
||||
elif self.glen:
|
||||
self.glen.append(0)
|
||||
if self.vx :
|
||||
self.gvtx.append(len(self.vx))
|
||||
elif self.gvtx :
|
||||
self.gvtx.append(0)
|
||||
def getData(self, path):
|
||||
result = None
|
||||
cnt = len(self.flatdoc)
|
||||
for j in xrange(cnt):
|
||||
item = self.flatdoc[j]
|
||||
if item.find('=') >= 0:
|
||||
(name, argt) = item.split('=')
|
||||
argres = argt.split('|')
|
||||
else:
|
||||
name = item
|
||||
argres = []
|
||||
if (name == path):
|
||||
result = argres
|
||||
break
|
||||
if (len(argres) > 0) :
|
||||
for j in xrange(0,len(argres)):
|
||||
argres[j] = int(argres[j])
|
||||
return result
|
||||
def getGlyphDim(self, gly):
|
||||
maxh = (self.gh[gly] * self.dpi) / self.gdpi[gly]
|
||||
maxw = (self.gw[gly] * self.dpi) / self.gdpi[gly]
|
||||
return maxh, maxw
|
||||
def getPath(self, gly):
|
||||
path = ''
|
||||
if (gly < 0) or (gly >= self.count):
|
||||
return path
|
||||
tx = self.vx[self.gvtx[gly]:self.gvtx[gly+1]]
|
||||
ty = self.vy[self.gvtx[gly]:self.gvtx[gly+1]]
|
||||
p = 0
|
||||
for k in xrange(self.glen[gly], self.glen[gly+1]):
|
||||
if (p == 0):
|
||||
zx = tx[0:self.vlen[k]+1]
|
||||
zy = ty[0:self.vlen[k]+1]
|
||||
else:
|
||||
zx = tx[self.vlen[k-1]+1:self.vlen[k]+1]
|
||||
zy = ty[self.vlen[k-1]+1:self.vlen[k]+1]
|
||||
p += 1
|
||||
j = 0
|
||||
while ( j < len(zx) ):
|
||||
if (j == 0):
|
||||
# Start Position.
|
||||
path += 'M %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly])
|
||||
elif (j <= len(zx)-3):
|
||||
# Cubic Bezier Curve
|
||||
path += 'C %d %d %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[j+1] * self.dpi / self.gdpi[gly], zy[j+1] * self.dpi / self.gdpi[gly], zx[j+2] * self.dpi / self.gdpi[gly], zy[j+2] * self.dpi / self.gdpi[gly])
|
||||
j += 2
|
||||
elif (j == len(zx)-2):
|
||||
# Cubic Bezier Curve to Start Position
|
||||
path += 'C %d %d %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[j+1] * self.dpi / self.gdpi[gly], zy[j+1] * self.dpi / self.gdpi[gly], zx[0] * self.dpi / self.gdpi[gly], zy[0] * self.dpi / self.gdpi[gly])
|
||||
j += 1
|
||||
elif (j == len(zx)-1):
|
||||
# Quadratic Bezier Curve to Start Position
|
||||
path += 'Q %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[0] * self.dpi / self.gdpi[gly], zy[0] * self.dpi / self.gdpi[gly])
|
||||
|
||||
j += 1
|
||||
path += 'z'
|
||||
return path
|
||||
|
||||
|
||||
|
||||
# dictionary of all text strings by index value
|
||||
class GlyphDict(object):
|
||||
def __init__(self):
|
||||
self.gdict = {}
|
||||
def lookup(self, id):
|
||||
# id='id="gl%d"' % val
|
||||
if id in self.gdict:
|
||||
return self.gdict[id]
|
||||
return None
|
||||
def addGlyph(self, val, path):
|
||||
id='id="gl%d"' % val
|
||||
self.gdict[id] = path
|
||||
|
||||
|
||||
def generateBook(bookDir, raw, fixedimage):
|
||||
# sanity check Topaz file extraction
|
||||
if not os.path.exists(bookDir) :
|
||||
print "Can not find directory with unencrypted book"
|
||||
return 1
|
||||
|
||||
dictFile = os.path.join(bookDir,'dict0000.dat')
|
||||
if not os.path.exists(dictFile) :
|
||||
print "Can not find dict0000.dat file"
|
||||
return 1
|
||||
|
||||
pageDir = os.path.join(bookDir,'page')
|
||||
if not os.path.exists(pageDir) :
|
||||
print "Can not find page directory in unencrypted book"
|
||||
return 1
|
||||
|
||||
imgDir = os.path.join(bookDir,'img')
|
||||
if not os.path.exists(imgDir) :
|
||||
print "Can not find image directory in unencrypted book"
|
||||
return 1
|
||||
|
||||
glyphsDir = os.path.join(bookDir,'glyphs')
|
||||
if not os.path.exists(glyphsDir) :
|
||||
print "Can not find glyphs directory in unencrypted book"
|
||||
return 1
|
||||
|
||||
metaFile = os.path.join(bookDir,'metadata0000.dat')
|
||||
if not os.path.exists(metaFile) :
|
||||
print "Can not find metadata0000.dat in unencrypted book"
|
||||
return 1
|
||||
|
||||
svgDir = os.path.join(bookDir,'svg')
|
||||
if not os.path.exists(svgDir) :
|
||||
os.makedirs(svgDir)
|
||||
|
||||
xmlDir = os.path.join(bookDir,'xml')
|
||||
if not os.path.exists(xmlDir) :
|
||||
os.makedirs(xmlDir)
|
||||
|
||||
otherFile = os.path.join(bookDir,'other0000.dat')
|
||||
if not os.path.exists(otherFile) :
|
||||
print "Can not find other0000.dat in unencrypted book"
|
||||
return 1
|
||||
|
||||
print "Updating to color images if available"
|
||||
spath = os.path.join(bookDir,'color_img')
|
||||
dpath = os.path.join(bookDir,'img')
|
||||
filenames = os.listdir(spath)
|
||||
filenames = sorted(filenames)
|
||||
for filename in filenames:
|
||||
imgname = filename.replace('color','img')
|
||||
sfile = os.path.join(spath,filename)
|
||||
dfile = os.path.join(dpath,imgname)
|
||||
imgdata = file(sfile,'rb').read()
|
||||
file(dfile,'wb').write(imgdata)
|
||||
|
||||
print "Creating cover.jpg"
|
||||
isCover = False
|
||||
cpath = os.path.join(bookDir,'img')
|
||||
cpath = os.path.join(cpath,'img0000.jpg')
|
||||
if os.path.isfile(cpath):
|
||||
cover = file(cpath, 'rb').read()
|
||||
cpath = os.path.join(bookDir,'cover.jpg')
|
||||
file(cpath, 'wb').write(cover)
|
||||
isCover = True
|
||||
|
||||
|
||||
print 'Processing Dictionary'
|
||||
dict = Dictionary(dictFile)
|
||||
|
||||
print 'Processing Meta Data and creating OPF'
|
||||
meta_array = getMetaArray(metaFile)
|
||||
|
||||
xname = os.path.join(xmlDir, 'metadata.xml')
|
||||
metastr = ''
|
||||
for key in meta_array:
|
||||
metastr += '<meta name="' + key + '" content="' + meta_array[key] + '" />\n'
|
||||
file(xname, 'wb').write(metastr)
|
||||
|
||||
print 'Processing StyleSheet'
|
||||
# get some scaling info from metadata to use while processing styles
|
||||
fontsize = '135'
|
||||
if 'fontSize' in meta_array:
|
||||
fontsize = meta_array['fontSize']
|
||||
|
||||
# also get the size of a normal text page
|
||||
spage = '1'
|
||||
if 'firstTextPage' in meta_array:
|
||||
spage = meta_array['firstTextPage']
|
||||
pnum = int(spage)
|
||||
|
||||
# get page height and width from first text page for use in stylesheet scaling
|
||||
pname = 'page%04d.dat' % (pnum + 1)
|
||||
fname = os.path.join(pageDir,pname)
|
||||
flat_xml = convert2xml.fromData(dict, fname)
|
||||
|
||||
(ph, pw) = getPageDim(flat_xml)
|
||||
if (ph == '-1') or (ph == '0') : ph = '11000'
|
||||
if (pw == '-1') or (pw == '0') : pw = '8500'
|
||||
|
||||
# print ' ', 'other0000.dat'
|
||||
xname = os.path.join(bookDir, 'style.css')
|
||||
flat_xml = convert2xml.fromData(dict, otherFile)
|
||||
cssstr , classlst = stylexml2css.convert2CSS(flat_xml, fontsize, ph, pw)
|
||||
file(xname, 'wb').write(cssstr)
|
||||
xname = os.path.join(xmlDir, 'other0000.xml')
|
||||
file(xname, 'wb').write(convert2xml.getXML(dict, otherFile))
|
||||
|
||||
print 'Processing Glyphs'
|
||||
gd = GlyphDict()
|
||||
filenames = os.listdir(glyphsDir)
|
||||
filenames = sorted(filenames)
|
||||
glyfname = os.path.join(svgDir,'glyphs.svg')
|
||||
glyfile = open(glyfname, 'w')
|
||||
glyfile.write('<?xml version="1.0" standalone="no"?>\n')
|
||||
glyfile.write('<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n')
|
||||
glyfile.write('<svg width="512" height="512" viewBox="0 0 511 511" xmlns="http://www.w3.org/2000/svg" version="1.1">\n')
|
||||
glyfile.write('<title>Glyphs for %s</title>\n' % meta_array['Title'])
|
||||
glyfile.write('<defs>\n')
|
||||
counter = 0
|
||||
for filename in filenames:
|
||||
# print ' ', filename
|
||||
print '.',
|
||||
fname = os.path.join(glyphsDir,filename)
|
||||
flat_xml = convert2xml.fromData(dict, fname)
|
||||
|
||||
xname = os.path.join(xmlDir, filename.replace('.dat','.xml'))
|
||||
file(xname, 'wb').write(convert2xml.getXML(dict, fname))
|
||||
|
||||
gp = GParser(flat_xml)
|
||||
for i in xrange(0, gp.count):
|
||||
path = gp.getPath(i)
|
||||
maxh, maxw = gp.getGlyphDim(i)
|
||||
fullpath = '<path id="gl%d" d="%s" fill="black" /><!-- width=%d height=%d -->\n' % (counter * 256 + i, path, maxw, maxh)
|
||||
glyfile.write(fullpath)
|
||||
gd.addGlyph(counter * 256 + i, fullpath)
|
||||
counter += 1
|
||||
glyfile.write('</defs>\n')
|
||||
glyfile.write('</svg>\n')
|
||||
glyfile.close()
|
||||
print " "
|
||||
|
||||
# start up the html
|
||||
htmlFileName = "book.html"
|
||||
htmlstr = '<?xml version="1.0" encoding="utf-8"?>\n'
|
||||
htmlstr += '<!DOCTYPE HTML PUBLIC "-//W3C//DTD XHTML 1.1 Strict//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11-strict.dtd">\n'
|
||||
htmlstr += '<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">\n'
|
||||
htmlstr += '<head>\n'
|
||||
htmlstr += '<meta http-equiv="content-type" content="text/html; charset=utf-8"/>\n'
|
||||
htmlstr += '<title>' + meta_array['Title'] + ' by ' + meta_array['Authors'] + '</title>\n'
|
||||
htmlstr += '<meta name="Author" content="' + meta_array['Authors'] + '" />\n'
|
||||
htmlstr += '<meta name="Title" content="' + meta_array['Title'] + '" />\n'
|
||||
htmlstr += '<meta name="ASIN" content="' + meta_array['ASIN'] + '" />\n'
|
||||
htmlstr += '<meta name="GUID" content="' + meta_array['GUID'] + '" />\n'
|
||||
htmlstr += '<link href="style.css" rel="stylesheet" type="text/css" />\n'
|
||||
htmlstr += '</head>\n<body>\n'
|
||||
|
||||
print 'Processing Pages'
|
||||
# Books are at 1440 DPI. This is rendering at twice that size for
|
||||
# readability when rendering to the screen.
|
||||
scaledpi = 1440.0
|
||||
|
||||
svgindex = '<?xml version="1.0" encoding="utf-8"?>\n'
|
||||
svgindex += '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n'
|
||||
svgindex += '<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" >'
|
||||
svgindex += '<head>\n'
|
||||
svgindex += '<title>' + meta_array['Title'] + '</title>\n'
|
||||
svgindex += '<meta name="Author" content="' + meta_array['Authors'] + '" />\n'
|
||||
svgindex += '<meta name="Title" content="' + meta_array['Title'] + '" />\n'
|
||||
svgindex += '<meta name="ASIN" content="' + meta_array['ASIN'] + '" />\n'
|
||||
svgindex += '<meta name="GUID" content="' + meta_array['GUID'] + '" />\n'
|
||||
svgindex += '</head>\n'
|
||||
svgindex += '<body>\n'
|
||||
|
||||
filenames = os.listdir(pageDir)
|
||||
filenames = sorted(filenames)
|
||||
numfiles = len(filenames)
|
||||
counter = 0
|
||||
|
||||
for filename in filenames:
|
||||
# print ' ', filename
|
||||
print ".",
|
||||
|
||||
fname = os.path.join(pageDir,filename)
|
||||
flat_xml = convert2xml.fromData(dict, fname)
|
||||
|
||||
xname = os.path.join(xmlDir, filename.replace('.dat','.xml'))
|
||||
file(xname, 'wb').write(convert2xml.getXML(dict, fname))
|
||||
|
||||
# first get the html
|
||||
htmlstr += flatxml2html.convert2HTML(flat_xml, classlst, fname, bookDir, gd, fixedimage)
|
||||
|
||||
# now get the svg image of the page
|
||||
svgxml = flatxml2svg.convert2SVG(gd, flat_xml, counter, numfiles, svgDir, raw, meta_array, scaledpi)
|
||||
|
||||
if (raw) :
|
||||
pfile = open(os.path.join(svgDir,filename.replace('.dat','.svg')), 'w')
|
||||
svgindex += '<a href="svg/page%04d.svg">Page %d</a>\n' % (counter, counter)
|
||||
else :
|
||||
pfile = open(os.path.join(svgDir,'page%04d.xhtml' % counter), 'w')
|
||||
svgindex += '<a href="svg/page%04d.xhtml">Page %d</a>\n' % (counter, counter)
|
||||
|
||||
|
||||
pfile.write(svgxml)
|
||||
pfile.close()
|
||||
|
||||
counter += 1
|
||||
|
||||
print " "
|
||||
|
||||
# finish up the html string and output it
|
||||
htmlstr += '</body>\n</html>\n'
|
||||
file(os.path.join(bookDir, htmlFileName), 'wb').write(htmlstr)
|
||||
|
||||
# finish up the svg index string and output it
|
||||
svgindex += '</body>\n</html>\n'
|
||||
file(os.path.join(bookDir, 'index_svg.xhtml'), 'wb').write(svgindex)
|
||||
|
||||
# build the opf file
|
||||
opfname = os.path.join(bookDir, 'book.opf')
|
||||
opfstr = '<?xml version="1.0" encoding="utf-8"?>\n'
|
||||
opfstr += '<package xmlns="http://www.idpf.org/2007/opf" unique-identifier="guid_id">\n'
|
||||
# adding metadata
|
||||
opfstr += ' <metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">\n'
|
||||
opfstr += ' <dc:identifier opf:scheme="GUID" id="guid_id">' + meta_array['GUID'] + '</dc:identifier>\n'
|
||||
opfstr += ' <dc:identifier opf:scheme="ASIN">' + meta_array['ASIN'] + '</dc:identifier>\n'
|
||||
opfstr += ' <dc:identifier opf:scheme="oASIN">' + meta_array['oASIN'] + '</dc:identifier>\n'
|
||||
opfstr += ' <dc:title>' + meta_array['Title'] + '</dc:title>\n'
|
||||
opfstr += ' <dc:creator opf:role="aut">' + meta_array['Authors'] + '</dc:creator>\n'
|
||||
opfstr += ' <dc:language>en</dc:language>\n'
|
||||
opfstr += ' <dc:date>' + meta_array['UpdateTime'] + '</dc:date>\n'
|
||||
if isCover:
|
||||
opfstr += ' <meta name="cover" content="bookcover"/>\n'
|
||||
opfstr += ' </metadata>\n'
|
||||
opfstr += '<manifest>\n'
|
||||
opfstr += ' <item id="book" href="book.html" media-type="application/xhtml+xml"/>\n'
|
||||
opfstr += ' <item id="stylesheet" href="style.css" media-type="text.css"/>\n'
|
||||
# adding image files to manifest
|
||||
filenames = os.listdir(imgDir)
|
||||
filenames = sorted(filenames)
|
||||
for filename in filenames:
|
||||
imgname, imgext = os.path.splitext(filename)
|
||||
if imgext == '.jpg':
|
||||
imgext = 'jpeg'
|
||||
if imgext == '.svg':
|
||||
imgext = 'svg+xml'
|
||||
opfstr += ' <item id="' + imgname + '" href="img/' + filename + '" media-type="image/' + imgext + '"/>\n'
|
||||
if isCover:
|
||||
opfstr += ' <item id="bookcover" href="cover.jpg" media-type="image/jpeg" />\n'
|
||||
opfstr += '</manifest>\n'
|
||||
# adding spine
|
||||
opfstr += '<spine>\n <itemref idref="book" />\n</spine>\n'
|
||||
if isCover:
|
||||
opfstr += ' <guide>\n'
|
||||
opfstr += ' <reference href="cover.jpg" type="cover" title="Cover"/>\n'
|
||||
opfstr += ' </guide>\n'
|
||||
opfstr += '</package>\n'
|
||||
file(opfname, 'wb').write(opfstr)
|
||||
|
||||
print 'Processing Complete'
|
||||
|
||||
return 0
|
||||
|
||||
def usage():
|
||||
print "genbook.py generates a book from the extract Topaz Files"
|
||||
print "Usage:"
|
||||
print " genbook.py [-r] [-h [--fixed-image] <bookDir> "
|
||||
print " "
|
||||
print "Options:"
|
||||
print " -h : help - print this usage message"
|
||||
print " -r : generate raw svg files (not wrapped in xhtml)"
|
||||
print " --fixed-image : genearate any Fixed Area as an svg image in the html"
|
||||
print " "
|
||||
|
||||
|
||||
def main(argv):
|
||||
bookDir = ''
|
||||
|
||||
if len(argv) == 0:
|
||||
argv = sys.argv
|
||||
|
||||
try:
|
||||
opts, args = getopt.getopt(argv[1:], "rh:",["fixed-image"])
|
||||
|
||||
except getopt.GetoptError, err:
|
||||
print str(err)
|
||||
usage()
|
||||
return 1
|
||||
|
||||
if len(opts) == 0 and len(args) == 0 :
|
||||
usage()
|
||||
return 1
|
||||
|
||||
raw = 0
|
||||
fixedimage = False
|
||||
for o, a in opts:
|
||||
if o =="-h":
|
||||
usage()
|
||||
return 0
|
||||
if o =="-r":
|
||||
raw = 1
|
||||
if o =="--fixed-image":
|
||||
fixedimage = True
|
||||
|
||||
bookDir = args[0]
|
||||
|
||||
rv = generateBook(bookDir, raw, fixedimage)
|
||||
return rv
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main(''))
|
||||
@@ -0,0 +1,145 @@
|
||||
#! /usr/bin/python
|
||||
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
|
||||
# For use with Topaz Scripts Version 2.6
|
||||
|
||||
class Unbuffered:
|
||||
def __init__(self, stream):
|
||||
self.stream = stream
|
||||
def write(self, data):
|
||||
self.stream.write(data)
|
||||
self.stream.flush()
|
||||
def __getattr__(self, attr):
|
||||
return getattr(self.stream, attr)
|
||||
|
||||
import sys
|
||||
sys.stdout=Unbuffered(sys.stdout)
|
||||
|
||||
|
||||
import os, getopt
|
||||
|
||||
# local routines
|
||||
import convert2xml
|
||||
import flatxml2html
|
||||
import decode_meta
|
||||
|
||||
|
||||
def usage():
|
||||
print 'Usage: '
|
||||
print ' '
|
||||
print ' genxml.py dict0000.dat unencryptedBookDir'
|
||||
print ' '
|
||||
|
||||
|
||||
|
||||
def main(argv):
|
||||
bookDir = ''
|
||||
|
||||
if len(argv) == 0:
|
||||
argv = sys.argv
|
||||
|
||||
try:
|
||||
opts, args = getopt.getopt(argv[1:], "h:")
|
||||
|
||||
except getopt.GetoptError, err:
|
||||
print str(err)
|
||||
usage()
|
||||
sys.exit(1)
|
||||
|
||||
if len(opts) == 0 and len(args) == 0 :
|
||||
usage()
|
||||
sys.exit(1)
|
||||
|
||||
for o, a in opts:
|
||||
if o =="-h":
|
||||
usage()
|
||||
sys.exit(0)
|
||||
|
||||
bookDir = args[0]
|
||||
|
||||
if not os.path.exists(bookDir) :
|
||||
print "Can not find directory with unencrypted book"
|
||||
sys.exit(1)
|
||||
|
||||
dictFile = os.path.join(bookDir,'dict0000.dat')
|
||||
if not os.path.exists(dictFile) :
|
||||
print "Can not find dict0000.dat file"
|
||||
sys.exit(1)
|
||||
|
||||
pageDir = os.path.join(bookDir,'page')
|
||||
if not os.path.exists(pageDir) :
|
||||
print "Can not find page directory in unencrypted book"
|
||||
sys.exit(1)
|
||||
|
||||
glyphsDir = os.path.join(bookDir,'glyphs')
|
||||
if not os.path.exists(glyphsDir) :
|
||||
print "Can not find glyphs directory in unencrypted book"
|
||||
sys.exit(1)
|
||||
|
||||
otherFile = os.path.join(bookDir,'other0000.dat')
|
||||
if not os.path.exists(otherFile) :
|
||||
print "Can not find other0000.dat in unencrypted book"
|
||||
sys.exit(1)
|
||||
|
||||
metaFile = os.path.join(bookDir,'metadata0000.dat')
|
||||
if not os.path.exists(metaFile) :
|
||||
print "Can not find metadata0000.dat in unencrypted book"
|
||||
sys.exit(1)
|
||||
|
||||
xmlDir = os.path.join(bookDir,'xml')
|
||||
if not os.path.exists(xmlDir):
|
||||
os.makedirs(xmlDir)
|
||||
|
||||
|
||||
print 'Processing ... '
|
||||
|
||||
print ' ', 'metadata0000.dat'
|
||||
fname = os.path.join(bookDir,'metadata0000.dat')
|
||||
xname = os.path.join(xmlDir, 'metadata.txt')
|
||||
metastr = decode_meta.getMetaData(fname)
|
||||
file(xname, 'wb').write(metastr)
|
||||
|
||||
print ' ', 'other0000.dat'
|
||||
fname = os.path.join(bookDir,'other0000.dat')
|
||||
xname = os.path.join(xmlDir, 'stylesheet.xml')
|
||||
pargv=[]
|
||||
pargv.append('convert2xml.py')
|
||||
pargv.append(dictFile)
|
||||
pargv.append(fname)
|
||||
xmlstr = convert2xml.main(pargv)
|
||||
file(xname, 'wb').write(xmlstr)
|
||||
|
||||
filenames = os.listdir(pageDir)
|
||||
filenames = sorted(filenames)
|
||||
|
||||
for filename in filenames:
|
||||
print ' ', filename
|
||||
fname = os.path.join(pageDir,filename)
|
||||
xname = os.path.join(xmlDir, filename.replace('.dat','.xml'))
|
||||
pargv=[]
|
||||
pargv.append('convert2xml.py')
|
||||
pargv.append(dictFile)
|
||||
pargv.append(fname)
|
||||
xmlstr = convert2xml.main(pargv)
|
||||
file(xname, 'wb').write(xmlstr)
|
||||
|
||||
filenames = os.listdir(glyphsDir)
|
||||
filenames = sorted(filenames)
|
||||
|
||||
for filename in filenames:
|
||||
print ' ', filename
|
||||
fname = os.path.join(glyphsDir,filename)
|
||||
xname = os.path.join(xmlDir, filename.replace('.dat','.xml'))
|
||||
pargv=[]
|
||||
pargv.append('convert2xml.py')
|
||||
pargv.append(dictFile)
|
||||
pargv.append(fname)
|
||||
xmlstr = convert2xml.main(pargv)
|
||||
file(xname, 'wb').write(xmlstr)
|
||||
|
||||
|
||||
print 'Processing Complete'
|
||||
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main(''))
|
||||
@@ -144,6 +144,7 @@ class Decryptor(object):
|
||||
enc('CipherReference'))
|
||||
for elem in encryption.findall(expr):
|
||||
path = elem.get('URI', None)
|
||||
path = path.encode('utf-8')
|
||||
if path is not None:
|
||||
encrypted.add(path)
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#! /usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# ineptepub.pyw, version 5.2
|
||||
# ineptepub.pyw, version 5.4
|
||||
# Copyright © 2009-2010 i♥cabbages
|
||||
|
||||
# Released under the terms of the GNU General Public Licence, version 3 or
|
||||
@@ -25,6 +25,8 @@
|
||||
# 5.1 - Improve OpenSSL error checking
|
||||
# 5.2 - Fix ctypes error causing segfaults on some systems
|
||||
# 5.3 - add support for OpenSSL on Windows, fix bug with some versions of libcrypto 0.9.8 prior to path level o
|
||||
# 5.4 - add support for encoding to 'utf-8' when building up list of files to decrypt from encryption.xml
|
||||
|
||||
"""
|
||||
Decrypt Adobe ADEPT-encrypted EPUB books.
|
||||
"""
|
||||
@@ -288,6 +290,7 @@ class Decryptor(object):
|
||||
for elem in encryption.findall(expr):
|
||||
path = elem.get('URI', None)
|
||||
if path is not None:
|
||||
path = path.encode('utf-8')
|
||||
encrypted.add(path)
|
||||
|
||||
def decompress(self, bytes):
|
||||
|
||||
@@ -0,0 +1,333 @@
|
||||
# engine to remove drm from Kindle for Mac books
|
||||
# for personal use for archiving and converting your ebooks
|
||||
# PLEASE DO NOT PIRATE!
|
||||
# We want all authors and Publishers, and eBook stores to live long and prosperous lives
|
||||
#
|
||||
# it borrows heavily from works by CMBDTC, IHeartCabbages, skindle,
|
||||
# unswindle, DiapDealer, some_updates and many many others
|
||||
|
||||
from __future__ import with_statement
|
||||
|
||||
class Unbuffered:
|
||||
def __init__(self, stream):
|
||||
self.stream = stream
|
||||
def write(self, data):
|
||||
self.stream.write(data)
|
||||
self.stream.flush()
|
||||
def __getattr__(self, attr):
|
||||
return getattr(self.stream, attr)
|
||||
|
||||
import sys
|
||||
sys.stdout=Unbuffered(sys.stdout)
|
||||
import os, csv, getopt
|
||||
from struct import pack
|
||||
from struct import unpack
|
||||
import zlib
|
||||
|
||||
# for handling sub processes
|
||||
import subprocess
|
||||
from subprocess import Popen, PIPE, STDOUT
|
||||
import subasyncio
|
||||
from subasyncio import Process
|
||||
|
||||
|
||||
#Exception Handling
|
||||
class K4MDEDRMError(Exception):
|
||||
pass
|
||||
class K4MDEDRMFatal(Exception):
|
||||
pass
|
||||
|
||||
#
|
||||
# crypto routines
|
||||
#
|
||||
import hashlib
|
||||
|
||||
def MD5(message):
|
||||
ctx = hashlib.md5()
|
||||
ctx.update(message)
|
||||
return ctx.digest()
|
||||
|
||||
def SHA1(message):
|
||||
ctx = hashlib.sha1()
|
||||
ctx.update(message)
|
||||
return ctx.digest()
|
||||
|
||||
def SHA256(message):
|
||||
ctx = hashlib.sha256()
|
||||
ctx.update(message)
|
||||
return ctx.digest()
|
||||
|
||||
# interface to needed routines in openssl's libcrypto
|
||||
def _load_crypto_libcrypto():
|
||||
from ctypes import CDLL, byref, POINTER, c_void_p, c_char_p, c_int, c_long, \
|
||||
Structure, c_ulong, create_string_buffer, addressof, string_at, cast
|
||||
from ctypes.util import find_library
|
||||
|
||||
libcrypto = find_library('crypto')
|
||||
if libcrypto is None:
|
||||
raise K4MDEDRMError('libcrypto not found')
|
||||
libcrypto = CDLL(libcrypto)
|
||||
|
||||
AES_MAXNR = 14
|
||||
c_char_pp = POINTER(c_char_p)
|
||||
c_int_p = POINTER(c_int)
|
||||
|
||||
class AES_KEY(Structure):
|
||||
_fields_ = [('rd_key', c_long * (4 * (AES_MAXNR + 1))), ('rounds', c_int)]
|
||||
AES_KEY_p = POINTER(AES_KEY)
|
||||
|
||||
def F(restype, name, argtypes):
|
||||
func = getattr(libcrypto, name)
|
||||
func.restype = restype
|
||||
func.argtypes = argtypes
|
||||
return func
|
||||
|
||||
AES_cbc_encrypt = F(None, 'AES_cbc_encrypt',[c_char_p, c_char_p, c_ulong, AES_KEY_p, c_char_p,c_int])
|
||||
|
||||
AES_set_decrypt_key = F(c_int, 'AES_set_decrypt_key',[c_char_p, c_int, AES_KEY_p])
|
||||
|
||||
PKCS5_PBKDF2_HMAC_SHA1 = F(c_int, 'PKCS5_PBKDF2_HMAC_SHA1',
|
||||
[c_char_p, c_ulong, c_char_p, c_ulong, c_ulong, c_ulong, c_char_p])
|
||||
|
||||
class LibCrypto(object):
|
||||
def __init__(self):
|
||||
self._blocksize = 0
|
||||
self._keyctx = None
|
||||
self.iv = 0
|
||||
def set_decrypt_key(self, userkey, iv):
|
||||
self._blocksize = len(userkey)
|
||||
if (self._blocksize != 16) and (self._blocksize != 24) and (self._blocksize != 32) :
|
||||
raise K4MDEDRMError('AES improper key used')
|
||||
return
|
||||
keyctx = self._keyctx = AES_KEY()
|
||||
self.iv = iv
|
||||
rv = AES_set_decrypt_key(userkey, len(userkey) * 8, keyctx)
|
||||
if rv < 0:
|
||||
raise K4MDEDRMError('Failed to initialize AES key')
|
||||
def decrypt(self, data):
|
||||
out = create_string_buffer(len(data))
|
||||
rv = AES_cbc_encrypt(data, out, len(data), self._keyctx, self.iv, 0)
|
||||
if rv == 0:
|
||||
raise K4MDEDRMError('AES decryption failed')
|
||||
return out.raw
|
||||
def keyivgen(self, passwd):
|
||||
salt = '16743'
|
||||
saltlen = 5
|
||||
passlen = len(passwd)
|
||||
iter = 0x3e8
|
||||
keylen = 80
|
||||
out = create_string_buffer(keylen)
|
||||
rv = PKCS5_PBKDF2_HMAC_SHA1(passwd, passlen, salt, saltlen, iter, keylen, out)
|
||||
return out.raw
|
||||
return LibCrypto
|
||||
|
||||
def _load_crypto():
|
||||
LibCrypto = None
|
||||
try:
|
||||
LibCrypto = _load_crypto_libcrypto()
|
||||
except (ImportError, K4MDEDRMError):
|
||||
pass
|
||||
return LibCrypto
|
||||
|
||||
LibCrypto = _load_crypto()
|
||||
|
||||
#
|
||||
# Utility Routines
|
||||
#
|
||||
|
||||
# uses a sub process to get the Hard Drive Serial Number using ioreg
|
||||
# returns with the first found serial number in that class
|
||||
def GetVolumeSerialNumber():
|
||||
sernum = os.getenv('MYSERIALNUMBER')
|
||||
if sernum != None:
|
||||
return sernum
|
||||
cmdline = '/usr/sbin/ioreg -l -S -w 0 -r -c AppleAHCIDiskDriver'
|
||||
cmdline = cmdline.encode(sys.getfilesystemencoding())
|
||||
p = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
|
||||
poll = p.wait('wait')
|
||||
results = p.read()
|
||||
reslst = results.split('\n')
|
||||
cnt = len(reslst)
|
||||
bsdname = None
|
||||
sernum = None
|
||||
foundIt = False
|
||||
for j in xrange(cnt):
|
||||
resline = reslst[j]
|
||||
pp = resline.find('"Serial Number" = "')
|
||||
if pp >= 0:
|
||||
sernum = resline[pp+19:-1]
|
||||
sernum = sernum.strip()
|
||||
bb = resline.find('"BSD Name" = "')
|
||||
if bb >= 0:
|
||||
bsdname = resline[bb+14:-1]
|
||||
bsdname = bsdname.strip()
|
||||
if (bsdname == 'disk0') and (sernum != None):
|
||||
foundIt = True
|
||||
break
|
||||
if not foundIt:
|
||||
sernum = '9999999999'
|
||||
return sernum
|
||||
|
||||
# uses unix env to get username instead of using sysctlbyname
|
||||
def GetUserName():
|
||||
username = os.getenv('USER')
|
||||
return username
|
||||
|
||||
MAX_PATH = 255
|
||||
|
||||
#
|
||||
# start of Kindle specific routines
|
||||
#
|
||||
|
||||
global kindleDatabase
|
||||
|
||||
# Various character maps used to decrypt books. Probably supposed to act as obfuscation
|
||||
charMap1 = "n5Pr6St7Uv8Wx9YzAb0Cd1Ef2Gh3Jk4M"
|
||||
charMap2 = "ZB0bYyc1xDdW2wEV3Ff7KkPpL8UuGA4gz-Tme9Nn_tHh5SvXCsIiR6rJjQaqlOoM"
|
||||
charMap3 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
|
||||
charMap4 = "ABCDEFGHIJKLMNPQRSTUVWXYZ123456789"
|
||||
|
||||
# Encode the bytes in data with the characters in map
|
||||
def encode(data, map):
|
||||
result = ""
|
||||
for char in data:
|
||||
value = ord(char)
|
||||
Q = (value ^ 0x80) // len(map)
|
||||
R = value % len(map)
|
||||
result += map[Q]
|
||||
result += map[R]
|
||||
return result
|
||||
|
||||
# Hash the bytes in data and then encode the digest with the characters in map
|
||||
def encodeHash(data,map):
|
||||
return encode(MD5(data),map)
|
||||
|
||||
# Decode the string in data with the characters in map. Returns the decoded bytes
|
||||
def decode(data,map):
|
||||
result = ""
|
||||
for i in range (0,len(data)-1,2):
|
||||
high = map.find(data[i])
|
||||
low = map.find(data[i+1])
|
||||
if (high == -1) or (low == -1) :
|
||||
break
|
||||
value = (((high * len(map)) ^ 0x80) & 0xFF) + low
|
||||
result += pack("B",value)
|
||||
return result
|
||||
|
||||
# implements an Pseudo Mac Version of Windows built-in Crypto routine
|
||||
def CryptUnprotectData(encryptedData):
|
||||
sp = GetVolumeSerialNumber() + '!@#' + GetUserName()
|
||||
passwdData = encode(SHA256(sp),charMap1)
|
||||
crp = LibCrypto()
|
||||
key_iv = crp.keyivgen(passwdData)
|
||||
key = key_iv[0:32]
|
||||
iv = key_iv[32:48]
|
||||
crp.set_decrypt_key(key,iv)
|
||||
cleartext = crp.decrypt(encryptedData)
|
||||
return cleartext
|
||||
|
||||
# Locate and open the .kindle-info file
|
||||
def openKindleInfo():
|
||||
home = os.getenv('HOME')
|
||||
kinfopath = home + '/Library/Application Support/Amazon/Kindle/storage/.kindle-info'
|
||||
if not os.path.exists(kinfopath):
|
||||
kinfopath = home + '/Library/Application Support/Amazon/Kindle for Mac/storage/.kindle-info'
|
||||
if not os.path.exists(kinfopath):
|
||||
raise K4MDEDRMError('Error: .kindle-info file can not be found')
|
||||
return open(kinfopath,'r')
|
||||
|
||||
# Parse the Kindle.info file and return the records as a list of key-values
|
||||
def parseKindleInfo():
|
||||
DB = {}
|
||||
infoReader = openKindleInfo()
|
||||
infoReader.read(1)
|
||||
data = infoReader.read()
|
||||
items = data.split('[')
|
||||
for item in items:
|
||||
splito = item.split(':')
|
||||
DB[splito[0]] =splito[1]
|
||||
return DB
|
||||
|
||||
# Get a record from the Kindle.info file for the key "hashedKey" (already hashed and encoded). Return the decoded and decrypted record
|
||||
def getKindleInfoValueForHash(hashedKey):
|
||||
global kindleDatabase
|
||||
encryptedValue = decode(kindleDatabase[hashedKey],charMap2)
|
||||
cleartext = CryptUnprotectData(encryptedValue)
|
||||
return decode(cleartext, charMap1)
|
||||
|
||||
# Get a record from the Kindle.info file for the string in "key" (plaintext). Return the decoded and decrypted record
|
||||
def getKindleInfoValueForKey(key):
|
||||
return getKindleInfoValueForHash(encodeHash(key,charMap2))
|
||||
|
||||
# Find if the original string for a hashed/encoded string is known. If so return the original string othwise return an empty string.
|
||||
def findNameForHash(hash):
|
||||
names = ["kindle.account.tokens","kindle.cookie.item","eulaVersionAccepted","login_date","kindle.token.item","login","kindle.key.item","kindle.name.info","kindle.device.info", "MazamaRandomNumber"]
|
||||
result = ""
|
||||
for name in names:
|
||||
if hash == encodeHash(name, charMap2):
|
||||
result = name
|
||||
break
|
||||
return result
|
||||
|
||||
# Print all the records from the kindle.info file (option -i)
|
||||
def printKindleInfo():
|
||||
for record in kindleDatabase:
|
||||
name = findNameForHash(record)
|
||||
if name != "" :
|
||||
print (name)
|
||||
print ("--------------------------")
|
||||
else :
|
||||
print ("Unknown Record")
|
||||
print getKindleInfoValueForHash(record)
|
||||
print "\n"
|
||||
|
||||
#
|
||||
# PID generation routines
|
||||
#
|
||||
|
||||
# Returns two bit at offset from a bit field
|
||||
def getTwoBitsFromBitField(bitField,offset):
|
||||
byteNumber = offset // 4
|
||||
bitPosition = 6 - 2*(offset % 4)
|
||||
return ord(bitField[byteNumber]) >> bitPosition & 3
|
||||
|
||||
# Returns the six bits at offset from a bit field
|
||||
def getSixBitsFromBitField(bitField,offset):
|
||||
offset *= 3
|
||||
value = (getTwoBitsFromBitField(bitField,offset) <<4) + (getTwoBitsFromBitField(bitField,offset+1) << 2) +getTwoBitsFromBitField(bitField,offset+2)
|
||||
return value
|
||||
|
||||
# 8 bits to six bits encoding from hash to generate PID string
|
||||
def encodePID(hash):
|
||||
global charMap3
|
||||
PID = ""
|
||||
for position in range (0,8):
|
||||
PID += charMap3[getSixBitsFromBitField(hash,position)]
|
||||
return PID
|
||||
|
||||
|
||||
#
|
||||
# Main
|
||||
#
|
||||
|
||||
def main(argv=sys.argv):
|
||||
global kindleDatabase
|
||||
|
||||
kindleDatabase = None
|
||||
|
||||
#
|
||||
# Read the encrypted database
|
||||
#
|
||||
|
||||
try:
|
||||
kindleDatabase = parseKindleInfo()
|
||||
except Exception, message:
|
||||
print(message)
|
||||
|
||||
if kindleDatabase != None :
|
||||
printKindleInfo()
|
||||
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
@@ -28,7 +28,7 @@
|
||||
|
||||
from __future__ import with_statement
|
||||
|
||||
__version__ = '1.2'
|
||||
__version__ = '1.4'
|
||||
|
||||
class Unbuffered:
|
||||
def __init__(self, stream):
|
||||
@@ -44,344 +44,60 @@ import os, csv, getopt
|
||||
import binascii
|
||||
import zlib
|
||||
import re
|
||||
import zlib, zipfile, tempfile, shutil
|
||||
from struct import pack, unpack, unpack_from
|
||||
|
||||
|
||||
#Exception Handling
|
||||
class DrmException(Exception):
|
||||
pass
|
||||
|
||||
#
|
||||
# crypto digestroutines
|
||||
#
|
||||
|
||||
import hashlib
|
||||
|
||||
def MD5(message):
|
||||
ctx = hashlib.md5()
|
||||
ctx.update(message)
|
||||
return ctx.digest()
|
||||
|
||||
def SHA1(message):
|
||||
ctx = hashlib.sha1()
|
||||
ctx.update(message)
|
||||
return ctx.digest()
|
||||
|
||||
# determine if we are running as a calibre plugin
|
||||
if 'calibre' in sys.modules:
|
||||
inCalibre = True
|
||||
global openKindleInfo, CryptUnprotectData, GetUserName, GetVolumeSerialNumber, charMap1, charMap2, charMap3, charMap4
|
||||
else:
|
||||
inCalibre = False
|
||||
|
||||
#
|
||||
# start of Kindle specific routines
|
||||
#
|
||||
|
||||
if not inCalibre:
|
||||
import mobidedrm
|
||||
if sys.platform.startswith('win'):
|
||||
from k4pcutils import openKindleInfo, CryptUnprotectData, GetUserName, GetVolumeSerialNumber, charMap1, charMap2, charMap3, charMap4
|
||||
if sys.platform.startswith('darwin'):
|
||||
from k4mutils import openKindleInfo, CryptUnprotectData, GetUserName, GetVolumeSerialNumber, charMap1, charMap2, charMap3, charMap4
|
||||
|
||||
global kindleDatabase
|
||||
|
||||
# Encode the bytes in data with the characters in map
|
||||
def encode(data, map):
|
||||
result = ""
|
||||
for char in data:
|
||||
value = ord(char)
|
||||
Q = (value ^ 0x80) // len(map)
|
||||
R = value % len(map)
|
||||
result += map[Q]
|
||||
result += map[R]
|
||||
return result
|
||||
|
||||
# Hash the bytes in data and then encode the digest with the characters in map
|
||||
def encodeHash(data,map):
|
||||
return encode(MD5(data),map)
|
||||
|
||||
# Decode the string in data with the characters in map. Returns the decoded bytes
|
||||
def decode(data,map):
|
||||
result = ""
|
||||
for i in range (0,len(data)-1,2):
|
||||
high = map.find(data[i])
|
||||
low = map.find(data[i+1])
|
||||
if (high == -1) or (low == -1) :
|
||||
break
|
||||
value = (((high * len(map)) ^ 0x80) & 0xFF) + low
|
||||
result += pack("B",value)
|
||||
return result
|
||||
|
||||
|
||||
# Parse the Kindle.info file and return the records as a list of key-values
|
||||
def parseKindleInfo(kInfoFile):
|
||||
DB = {}
|
||||
infoReader = openKindleInfo(kInfoFile)
|
||||
infoReader.read(1)
|
||||
data = infoReader.read()
|
||||
if sys.platform.startswith('win'):
|
||||
items = data.split('{')
|
||||
else :
|
||||
items = data.split('[')
|
||||
for item in items:
|
||||
splito = item.split(':')
|
||||
DB[splito[0]] =splito[1]
|
||||
return DB
|
||||
|
||||
# Get a record from the Kindle.info file for the key "hashedKey" (already hashed and encoded). Return the decoded and decrypted record
|
||||
def getKindleInfoValueForHash(hashedKey):
|
||||
global kindleDatabase
|
||||
encryptedValue = decode(kindleDatabase[hashedKey],charMap2)
|
||||
if sys.platform.startswith('win'):
|
||||
return CryptUnprotectData(encryptedValue,"")
|
||||
else:
|
||||
cleartext = CryptUnprotectData(encryptedValue)
|
||||
return decode(cleartext, charMap1)
|
||||
|
||||
# Get a record from the Kindle.info file for the string in "key" (plaintext). Return the decoded and decrypted record
|
||||
def getKindleInfoValueForKey(key):
|
||||
return getKindleInfoValueForHash(encodeHash(key,charMap2))
|
||||
|
||||
# Find if the original string for a hashed/encoded string is known. If so return the original string othwise return an empty string.
|
||||
def findNameForHash(hash):
|
||||
names = ["kindle.account.tokens","kindle.cookie.item","eulaVersionAccepted","login_date","kindle.token.item","login","kindle.key.item","kindle.name.info","kindle.device.info", "MazamaRandomNumber"]
|
||||
result = ""
|
||||
for name in names:
|
||||
if hash == encodeHash(name, charMap2):
|
||||
result = name
|
||||
break
|
||||
return result
|
||||
|
||||
# Print all the records from the kindle.info file (option -i)
|
||||
def printKindleInfo():
|
||||
for record in kindleDatabase:
|
||||
name = findNameForHash(record)
|
||||
if name != "" :
|
||||
print (name)
|
||||
print ("--------------------------")
|
||||
else :
|
||||
print ("Unknown Record")
|
||||
print getKindleInfoValueForHash(record)
|
||||
print "\n"
|
||||
|
||||
#
|
||||
# PID generation routines
|
||||
#
|
||||
|
||||
# Returns two bit at offset from a bit field
|
||||
def getTwoBitsFromBitField(bitField,offset):
|
||||
byteNumber = offset // 4
|
||||
bitPosition = 6 - 2*(offset % 4)
|
||||
return ord(bitField[byteNumber]) >> bitPosition & 3
|
||||
|
||||
# Returns the six bits at offset from a bit field
|
||||
def getSixBitsFromBitField(bitField,offset):
|
||||
offset *= 3
|
||||
value = (getTwoBitsFromBitField(bitField,offset) <<4) + (getTwoBitsFromBitField(bitField,offset+1) << 2) +getTwoBitsFromBitField(bitField,offset+2)
|
||||
return value
|
||||
|
||||
# 8 bits to six bits encoding from hash to generate PID string
|
||||
def encodePID(hash):
|
||||
global charMap3
|
||||
PID = ""
|
||||
for position in range (0,8):
|
||||
PID += charMap3[getSixBitsFromBitField(hash,position)]
|
||||
return PID
|
||||
|
||||
# Encryption table used to generate the device PID
|
||||
def generatePidEncryptionTable() :
|
||||
table = []
|
||||
for counter1 in range (0,0x100):
|
||||
value = counter1
|
||||
for counter2 in range (0,8):
|
||||
if (value & 1 == 0) :
|
||||
value = value >> 1
|
||||
else :
|
||||
value = value >> 1
|
||||
value = value ^ 0xEDB88320
|
||||
table.append(value)
|
||||
return table
|
||||
|
||||
# Seed value used to generate the device PID
|
||||
def generatePidSeed(table,dsn) :
|
||||
value = 0
|
||||
for counter in range (0,4) :
|
||||
index = (ord(dsn[counter]) ^ value) &0xFF
|
||||
value = (value >> 8) ^ table[index]
|
||||
return value
|
||||
|
||||
# Generate the device PID
|
||||
def generateDevicePID(table,dsn,nbRoll):
|
||||
seed = generatePidSeed(table,dsn)
|
||||
pidAscii = ""
|
||||
pid = [(seed >>24) &0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF,(seed>>24) & 0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF]
|
||||
index = 0
|
||||
for counter in range (0,nbRoll):
|
||||
pid[index] = pid[index] ^ ord(dsn[counter])
|
||||
index = (index+1) %8
|
||||
for counter in range (0,8):
|
||||
index = ((((pid[counter] >>5) & 3) ^ pid[counter]) & 0x1f) + (pid[counter] >> 7)
|
||||
pidAscii += charMap4[index]
|
||||
return pidAscii
|
||||
|
||||
# convert from 8 digit PID to 10 digit PID with checksum
|
||||
def checksumPid(s):
|
||||
letters = "ABCDEFGHIJKLMNPQRSTUVWXYZ123456789"
|
||||
crc = (~binascii.crc32(s,-1))&0xFFFFFFFF
|
||||
crc = crc ^ (crc >> 16)
|
||||
res = s
|
||||
l = len(letters)
|
||||
for i in (0,1):
|
||||
b = crc & 0xff
|
||||
pos = (b // l) ^ (b % l)
|
||||
res += letters[pos%l]
|
||||
crc >>= 8
|
||||
return res
|
||||
|
||||
|
||||
class MobiPeek:
|
||||
def loadSection(self, section):
|
||||
before, after = self.sections[section:section+2]
|
||||
self.f.seek(before)
|
||||
return self.f.read(after - before)
|
||||
def __init__(self, filename):
|
||||
self.f = file(filename, 'rb')
|
||||
self.header = self.f.read(78)
|
||||
self.ident = self.header[0x3C:0x3C+8]
|
||||
if self.ident != 'BOOKMOBI' and self.ident != 'TEXtREAd':
|
||||
raise DrmException('invalid file format')
|
||||
self.num_sections, = unpack_from('>H', self.header, 76)
|
||||
sections = self.f.read(self.num_sections*8)
|
||||
self.sections = unpack_from('>%dL' % (self.num_sections*2), sections, 0)[::2] + (0xfffffff, )
|
||||
self.sect0 = self.loadSection(0)
|
||||
self.f.close()
|
||||
def getBookTitle(self):
|
||||
# get book title
|
||||
toff, tlen = unpack('>II', self.sect0[0x54:0x5c])
|
||||
tend = toff + tlen
|
||||
title = self.sect0[toff:tend]
|
||||
return title
|
||||
def getexthData(self):
|
||||
# if exth region exists then grab it
|
||||
# get length of this header
|
||||
length, type, codepage, unique_id, version = unpack('>LLLLL', self.sect0[20:40])
|
||||
exth_flag, = unpack('>L', self.sect0[0x80:0x84])
|
||||
exth = ''
|
||||
if exth_flag & 0x40:
|
||||
exth = self.sect0[16 + length:]
|
||||
return exth
|
||||
def isNotEncrypted(self):
|
||||
lock_type, = unpack('>H', self.sect0[0xC:0xC+2])
|
||||
if lock_type == 0:
|
||||
return True
|
||||
return False
|
||||
|
||||
# DiapDealer's stuff: Parse the EXTH header records and parse the Kindleinfo
|
||||
# file to calculate the book pid.
|
||||
def getK4Pids(exth, title, kInfoFile=None):
|
||||
global kindleDatabase
|
||||
try:
|
||||
kindleDatabase = parseKindleInfo(kInfoFile)
|
||||
except Exception, message:
|
||||
print(message)
|
||||
|
||||
if kindleDatabase != None :
|
||||
# Get the Mazama Random number
|
||||
MazamaRandomNumber = getKindleInfoValueForKey("MazamaRandomNumber")
|
||||
|
||||
# Get the HDD serial
|
||||
encodedSystemVolumeSerialNumber = encodeHash(GetVolumeSerialNumber(),charMap1)
|
||||
|
||||
# Get the current user name
|
||||
encodedUsername = encodeHash(GetUserName(),charMap1)
|
||||
|
||||
# concat, hash and encode to calculate the DSN
|
||||
DSN = encode(SHA1(MazamaRandomNumber+encodedSystemVolumeSerialNumber+encodedUsername),charMap1)
|
||||
|
||||
print("\nDSN: " + DSN)
|
||||
|
||||
# Compute the device PID (for which I can tell, is used for nothing).
|
||||
# But hey, stuff being printed out is apparently cool.
|
||||
table = generatePidEncryptionTable()
|
||||
devicePID = generateDevicePID(table,DSN,4)
|
||||
|
||||
print("Device PID: " + checksumPid(devicePID))
|
||||
|
||||
# Compute book PID
|
||||
exth_records = {}
|
||||
nitems, = unpack('>I', exth[8:12])
|
||||
pos = 12
|
||||
|
||||
exth_records[209] = None
|
||||
# Parse the exth records, storing data indexed by type
|
||||
for i in xrange(nitems):
|
||||
type, size = unpack('>II', exth[pos: pos + 8])
|
||||
content = exth[pos + 8: pos + size]
|
||||
|
||||
exth_records[type] = content
|
||||
pos += size
|
||||
|
||||
# Grab the contents of the type 209 exth record
|
||||
if exth_records[209] != None:
|
||||
data = exth_records[209]
|
||||
else:
|
||||
raise DrmException("\nNo EXTH record type 209 - Perhaps not a K4 file?")
|
||||
|
||||
# Parse the 209 data to find the the exth record with the token data.
|
||||
# The last character of the 209 data points to the record with the token.
|
||||
# Always 208 from my experience, but I'll leave the logic in case that changes.
|
||||
for i in xrange(len(data)):
|
||||
if ord(data[i]) != 0:
|
||||
if exth_records[ord(data[i])] != None:
|
||||
token = exth_records[ord(data[i])]
|
||||
|
||||
# Get the kindle account token
|
||||
kindleAccountToken = getKindleInfoValueForKey("kindle.account.tokens")
|
||||
|
||||
print("Account Token: " + kindleAccountToken)
|
||||
|
||||
pidHash = SHA1(DSN+kindleAccountToken+exth_records[209]+token)
|
||||
|
||||
bookPID = encodePID(pidHash)
|
||||
bookPID = checksumPid(bookPID)
|
||||
|
||||
if exth_records[503] != None:
|
||||
print "Pid for " + exth_records[503] + ": " + bookPID
|
||||
else:
|
||||
print "Pid for " + title + ":" + bookPID
|
||||
return bookPID
|
||||
|
||||
raise DrmException("\nCould not access K4 data - Perhaps K4 is not installed/configured?")
|
||||
return null
|
||||
def zipUpDir(myzip, tempdir,localname):
|
||||
currentdir = tempdir
|
||||
if localname != "":
|
||||
currentdir = os.path.join(currentdir,localname)
|
||||
list = os.listdir(currentdir)
|
||||
for file in list:
|
||||
afilename = file
|
||||
localfilePath = os.path.join(localname, afilename)
|
||||
realfilePath = os.path.join(currentdir,file)
|
||||
if os.path.isfile(realfilePath):
|
||||
myzip.write(realfilePath, localfilePath)
|
||||
elif os.path.isdir(realfilePath):
|
||||
zipUpDir(myzip, tempdir, localfilePath)
|
||||
|
||||
def usage(progname):
|
||||
print "Removes DRM protection from K4PC, K4M, and Mobi ebooks"
|
||||
print "Removes DRM protection from K4PC/M, Kindle, Mobi and Topaz ebooks"
|
||||
print "Usage:"
|
||||
print " %s [-k <kindle.info>] [-p <pidnums>] <infile> <outfile> " % progname
|
||||
print " %s [-k <kindle.info>] [-p <pidnums>] [-s <kindleSerialNumbers>] <infile> <outdir> " % progname
|
||||
|
||||
#
|
||||
# Main
|
||||
#
|
||||
def main(argv=sys.argv):
|
||||
global kindleDatabase
|
||||
import mobidedrm
|
||||
|
||||
import topazextract
|
||||
import kgenpids
|
||||
progname = os.path.basename(argv[0])
|
||||
|
||||
k4 = False
|
||||
kInfoFiles = []
|
||||
pidnums = ""
|
||||
serials = []
|
||||
pids = []
|
||||
|
||||
print ('K4MobiDeDrm v%(__version__)s '
|
||||
'provided by the work of many including DiapDealer, SomeUpdates, IHeartCabbages, CMBDTC, Skindle, DarkReverser, ApprenticeAlf, etc .' % globals())
|
||||
|
||||
print ' '
|
||||
try:
|
||||
opts, args = getopt.getopt(sys.argv[1:], "k:p:")
|
||||
opts, args = getopt.getopt(sys.argv[1:], "k:p:s:")
|
||||
except getopt.GetoptError, err:
|
||||
print str(err)
|
||||
usage(progname)
|
||||
sys.exit(2)
|
||||
|
||||
if len(args)<2:
|
||||
usage(progname)
|
||||
sys.exit(2)
|
||||
@@ -394,108 +110,145 @@ def main(argv=sys.argv):
|
||||
if o == "-p":
|
||||
if a == None :
|
||||
raise DrmException("Invalid parameter for -p")
|
||||
pidnums = a
|
||||
pids = a.split(',')
|
||||
if o == "-s":
|
||||
if a == None :
|
||||
raise DrmException("Invalid parameter for -s")
|
||||
serials = a.split(',')
|
||||
|
||||
# try with built in Kindle Info files
|
||||
k4 = True
|
||||
|
||||
kindleDatabase = None
|
||||
infile = args[0]
|
||||
outfile = args[1]
|
||||
DecodeErrorString = ""
|
||||
try:
|
||||
# first try with K4PC/K4M
|
||||
ex = MobiPeek(infile)
|
||||
if ex.isNotEncrypted():
|
||||
print "File was Not Encrypted"
|
||||
return 2
|
||||
title = ex.getBookTitle()
|
||||
exth = ex.getexthData()
|
||||
if exth=='':
|
||||
raise DrmException("Not a Kindle Mobipocket file")
|
||||
pid = getK4Pids(exth, title)
|
||||
unlocked_file = mobidedrm.getUnencryptedBook(infile, pid)
|
||||
except DrmException, e:
|
||||
DecodeErrorString += "Error trying default K4 info: " + str(e) + "\n"
|
||||
pass
|
||||
except mobidedrm.DrmException, e:
|
||||
DecodeErrorString += "Error trying default K4 info: " + str(e) + "\n"
|
||||
pass
|
||||
outdir = args[1]
|
||||
|
||||
# handle the obvious cases at the beginning
|
||||
if not os.path.isfile(infile):
|
||||
print "Error: Input file does not exist"
|
||||
return 1
|
||||
|
||||
mobi = True
|
||||
magic3 = file(infile,'rb').read(3)
|
||||
if magic3 == 'TPZ':
|
||||
mobi = False
|
||||
|
||||
bookname = os.path.splitext(os.path.basename(infile))[0]
|
||||
|
||||
if mobi:
|
||||
mb = mobidedrm.MobiBook(infile)
|
||||
else:
|
||||
file(outfile, 'wb').write(unlocked_file)
|
||||
return 0
|
||||
|
||||
# now try alternate kindle.info files
|
||||
if kInfoFiles:
|
||||
for infoFile in kInfoFiles:
|
||||
kindleDatabase = None
|
||||
try:
|
||||
title = ex.getBookTitle()
|
||||
exth = ex.getexthData()
|
||||
if exth=='':
|
||||
raise DrmException("Not a Kindle Mobipocket file")
|
||||
pid = getK4Pids(exth, title, infoFile)
|
||||
unlocked_file = mobidedrm.getUnencryptedBook(infile, pid)
|
||||
except DrmException, e:
|
||||
DecodeErrorString += "Error trying " + infoFile + " K4 info: " + str(e) + "\n"
|
||||
pass
|
||||
except mobidedrm.DrmException, e:
|
||||
DecodeErrorString += "Error trying " + infoFile + " K4 info: " + str(e) + "\n"
|
||||
pass
|
||||
else:
|
||||
file(outfile, 'wb').write(unlocked_file)
|
||||
return 0
|
||||
|
||||
# Lastly, try from the pid list
|
||||
pids = pidnums.split(',')
|
||||
for pid in pids:
|
||||
try:
|
||||
print 'Trying: "'+ pid + '"'
|
||||
unlocked_file = mobidedrm.getUnencryptedBook(infile, pid)
|
||||
except mobidedrm.DrmException:
|
||||
pass
|
||||
tempdir = tempfile.mkdtemp()
|
||||
mb = topazextract.TopazBook(infile, tempdir)
|
||||
|
||||
title = mb.getBookTitle()
|
||||
print "Processing Book: ", title
|
||||
|
||||
# build pid list
|
||||
md1, md2 = mb.getPIDMetaInfo()
|
||||
pidlst = kgenpids.getPidList(md1, md2, k4, pids, serials, kInfoFiles)
|
||||
|
||||
try:
|
||||
if mobi:
|
||||
unlocked_file = mb.processBook(pidlst)
|
||||
else:
|
||||
file(outfile, 'wb').write(unlocked_file)
|
||||
return 0
|
||||
mb.processBook(pidlst)
|
||||
|
||||
# we could not unencrypt book
|
||||
print DecodeErrorString
|
||||
print "Error: Could Not Unencrypt Book"
|
||||
return 1
|
||||
except mobidedrm.DrmException, e:
|
||||
print " ... not suceessful " + str(e) + "\n"
|
||||
return 1
|
||||
except topazextract.TpzDRMError, e:
|
||||
print str(e)
|
||||
print " Creating DeBug Full Zip Archive of Book"
|
||||
zipname = os.path.join(outdir, bookname + '_debug' + '.zip')
|
||||
myzip = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False)
|
||||
zipUpDir(myzip, tempdir, '')
|
||||
myzip.close()
|
||||
return 1
|
||||
|
||||
if mobi:
|
||||
outfile = os.path.join(outdir,bookname + '_nodrm' + '.azw')
|
||||
file(outfile, 'wb').write(unlocked_file)
|
||||
return 0
|
||||
|
||||
# topaz: build up zip archives of results
|
||||
print " Creating HTML ZIP Archive"
|
||||
zipname = os.path.join(outdir, bookname + '_nodrm' + '.zip')
|
||||
myzip1 = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False)
|
||||
myzip1.write(os.path.join(tempdir,'book.html'),'book.html')
|
||||
myzip1.write(os.path.join(tempdir,'book.opf'),'book.opf')
|
||||
if os.path.isfile(os.path.join(tempdir,'cover.jpg')):
|
||||
myzip1.write(os.path.join(tempdir,'cover.jpg'),'cover.jpg')
|
||||
myzip1.write(os.path.join(tempdir,'style.css'),'style.css')
|
||||
zipUpDir(myzip1, tempdir, 'img')
|
||||
myzip1.close()
|
||||
|
||||
print " Creating SVG ZIP Archive"
|
||||
zipname = os.path.join(outdir, bookname + '_SVG' + '.zip')
|
||||
myzip2 = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False)
|
||||
myzip2.write(os.path.join(tempdir,'index_svg.xhtml'),'index_svg.xhtml')
|
||||
zipUpDir(myzip2, tempdir, 'svg')
|
||||
zipUpDir(myzip2, tempdir, 'img')
|
||||
myzip2.close()
|
||||
|
||||
print " Creating XML ZIP Archive"
|
||||
zipname = os.path.join(outdir, bookname + '_XML' + '.zip')
|
||||
myzip3 = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False)
|
||||
targetdir = os.path.join(tempdir,'xml')
|
||||
zipUpDir(myzip3, targetdir, '')
|
||||
zipUpDir(myzip3, tempdir, 'img')
|
||||
myzip3.close()
|
||||
|
||||
shutil.rmtree(tempdir)
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.stdout=Unbuffered(sys.stdout)
|
||||
sys.exit(main())
|
||||
|
||||
|
||||
if not __name__ == "__main__" and inCalibre:
|
||||
from calibre.customize import FileTypePlugin
|
||||
|
||||
class K4DeDRM(FileTypePlugin):
|
||||
name = 'K4PC, K4Mac, Mobi DeDRM' # Name of the plugin
|
||||
description = 'Removes DRM from K4PC, K4Mac, and Mobi files. \
|
||||
name = 'K4PC, K4Mac, Kindle Mobi and Topaz DeDRM' # Name of the plugin
|
||||
description = 'Removes DRM from K4PC and Mac, Kindle Mobi and Topaz files. \
|
||||
Provided by the work of many including DiapDealer, SomeUpdates, IHeartCabbages, CMBDTC, Skindle, DarkReverser, ApprenticeAlf, etc.'
|
||||
supported_platforms = ['osx', 'windows', 'linux'] # Platforms this plugin will run on
|
||||
author = 'DiapDealer, SomeUpdates' # The author of this plugin
|
||||
version = (0, 1, 2) # The version number of this plugin
|
||||
file_types = set(['prc','mobi','azw']) # The file types that this plugin will be applied to
|
||||
version = (0, 1, 7) # The version number of this plugin
|
||||
file_types = set(['prc','mobi','azw','azw1','tpz']) # The file types that this plugin will be applied to
|
||||
on_import = True # Run this plugin during the import
|
||||
priority = 200 # run this plugin before mobidedrm, k4pcdedrm, k4dedrm
|
||||
priority = 210 # run this plugin before mobidedrm, k4pcdedrm, k4dedrm
|
||||
|
||||
def run(self, path_to_ebook):
|
||||
from calibre.gui2 import is_ok_to_use_qt
|
||||
from PyQt4.Qt import QMessageBox
|
||||
global kindleDatabase
|
||||
global openKindleInfo, CryptUnprotectData, GetUserName, GetVolumeSerialNumber, charMap1, charMap2, charMap3, charMap4
|
||||
if sys.platform.startswith('win'):
|
||||
from k4pcutils import openKindleInfo, CryptUnprotectData, GetUserName, GetVolumeSerialNumber, charMap1, charMap2, charMap3, charMap4
|
||||
if sys.platform.startswith('darwin'):
|
||||
from k4mutils import openKindleInfo, CryptUnprotectData, GetUserName, GetVolumeSerialNumber, charMap1, charMap2, charMap3, charMap4
|
||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||
|
||||
import kgenpids
|
||||
import zlib
|
||||
import zipfile
|
||||
import topazextract
|
||||
import mobidedrm
|
||||
|
||||
# Get supplied list of PIDs to try from plugin customization.
|
||||
pidnums = self.site_customization
|
||||
|
||||
# Load any kindle info files (*.info) included Calibre's config directory.
|
||||
k4 = True
|
||||
pids = []
|
||||
serials = []
|
||||
kInfoFiles = []
|
||||
|
||||
# Get supplied list of PIDs to try from plugin customization.
|
||||
customvalues = self.site_customization.split(',')
|
||||
for customvalue in customvalues:
|
||||
customvalue = str(customvalue)
|
||||
customvalue = customvalue.strip()
|
||||
if len(customvalue) == 10 or len(customvalue) == 8:
|
||||
pids.append(customvalue)
|
||||
else :
|
||||
if len(customvalue) == 16 and customvalue[0] == 'B':
|
||||
serials.append(customvalue)
|
||||
else:
|
||||
print "%s is not a valid Kindle serial number or PID." % str(customvalue)
|
||||
|
||||
# Load any kindle info files (*.info) included Calibre's config directory.
|
||||
try:
|
||||
# Find Calibre's configuration directory.
|
||||
confpath = os.path.split(os.path.split(self.plugin_path)[0])[0]
|
||||
@@ -513,70 +266,68 @@ if not __name__ == "__main__" and inCalibre:
|
||||
print 'K4MobiDeDRM: Error reading kindle info files from config directory.'
|
||||
pass
|
||||
|
||||
# first try with book specifc pid from K4PC or K4M
|
||||
try:
|
||||
kindleDatabase = None
|
||||
ex = MobiPeek(path_to_ebook)
|
||||
if ex.isNotEncrypted():
|
||||
return path_to_ebook
|
||||
title = ex.getBookTitle()
|
||||
exth = ex.getexthData()
|
||||
if exth=='':
|
||||
raise DrmException("Not a Kindle Mobipocket file")
|
||||
pid = getK4Pids(exth, title)
|
||||
unlocked_file = mobidedrm.getUnencryptedBook(path_to_ebook,pid)
|
||||
except DrmException:
|
||||
pass
|
||||
except mobidedrm.DrmException:
|
||||
pass
|
||||
|
||||
mobi = True
|
||||
magic3 = file(path_to_ebook,'rb').read(3)
|
||||
if magic3 == 'TPZ':
|
||||
mobi = False
|
||||
|
||||
bookname = os.path.splitext(os.path.basename(path_to_ebook))[0]
|
||||
|
||||
if mobi:
|
||||
mb = mobidedrm.MobiBook(path_to_ebook)
|
||||
else:
|
||||
of = self.temporary_file('.mobi')
|
||||
tempdir = PersistentTemporaryDirectory()
|
||||
mb = topazextract.TopazBook(path_to_ebook, tempdir)
|
||||
|
||||
title = mb.getBookTitle()
|
||||
md1, md2 = mb.getPIDMetaInfo()
|
||||
pidlst = kgenpids.getPidList(md1, md2, k4, pids, serials, kInfoFiles)
|
||||
|
||||
try:
|
||||
if mobi:
|
||||
unlocked_file = mb.processBook(pidlst)
|
||||
else:
|
||||
mb.processBook(pidlst)
|
||||
|
||||
except mobidedrm.DrmException:
|
||||
#if you reached here then no luck raise and exception
|
||||
if is_ok_to_use_qt():
|
||||
d = QMessageBox(QMessageBox.Warning, "K4MobiDeDRM Plugin", "Error decoding: %s\n" % path_to_ebook)
|
||||
d.show()
|
||||
d.raise_()
|
||||
d.exec_()
|
||||
raise Exception("K4MobiDeDRM plugin could not decode the file")
|
||||
return ""
|
||||
except topazextract.TpzDRMError:
|
||||
#if you reached here then no luck raise and exception
|
||||
if is_ok_to_use_qt():
|
||||
d = QMessageBox(QMessageBox.Warning, "K4MobiDeDRM Plugin", "Error decoding: %s\n" % path_to_ebook)
|
||||
d.show()
|
||||
d.raise_()
|
||||
d.exec_()
|
||||
raise Exception("K4MobiDeDRM plugin could not decode the file")
|
||||
return ""
|
||||
|
||||
print "Success!"
|
||||
if mobi:
|
||||
of = self.temporary_file(bookname+'.mobi')
|
||||
of.write(unlocked_file)
|
||||
of.close()
|
||||
return of.name
|
||||
|
||||
# Now try alternate kindle info files
|
||||
if kInfoFiles:
|
||||
for infoFile in kInfoFiles:
|
||||
kindleDatabase = None
|
||||
try:
|
||||
title = ex.getBookTitle()
|
||||
exth = ex.getexthData()
|
||||
if exth=='':
|
||||
raise DrmException("Not a Kindle Mobipocket file")
|
||||
pid = getK4Pids(exth, title, infoFile)
|
||||
unlocked_file = mobidedrm.getUnencryptedBook(path_to_ebook,pid)
|
||||
except DrmException:
|
||||
pass
|
||||
except mobidedrm.DrmException:
|
||||
pass
|
||||
else:
|
||||
of = self.temporary_file('.mobi')
|
||||
of.write(unlocked_file)
|
||||
of.close()
|
||||
return of.name
|
||||
|
||||
# now try from the pid list
|
||||
pids = pidnums.split(',')
|
||||
for pid in pids:
|
||||
try:
|
||||
unlocked_file = mobidedrm.getUnencryptedBook(path_to_ebook, pid)
|
||||
except mobidedrm.DrmException:
|
||||
pass
|
||||
else:
|
||||
of = self.temporary_file('.mobi')
|
||||
of.write(unlocked_file)
|
||||
of.close()
|
||||
return of.name
|
||||
|
||||
#if you reached here then no luck raise and exception
|
||||
if is_ok_to_use_qt():
|
||||
d = QMessageBox(QMessageBox.Warning, "K4MobiDeDRM Plugin", "Error decoding: %s\n" % path_to_ebook)
|
||||
d.show()
|
||||
d.raise_()
|
||||
d.exec_()
|
||||
raise Exception("K4MobiDeDRM plugin could not decode the file")
|
||||
return ""
|
||||
# topaz: build up zip archives of results
|
||||
print " Creating HTML ZIP Archive"
|
||||
of = self.temporary_file(bookname + '.zip')
|
||||
myzip = zipfile.ZipFile(of.name,'w',zipfile.ZIP_DEFLATED, False)
|
||||
myzip.write(os.path.join(tempdir,'book.html'),'book.html')
|
||||
myzip.write(os.path.join(tempdir,'book.opf'),'book.opf')
|
||||
if os.path.isfile(os.path.join(tempdir,'cover.jpg')):
|
||||
myzip.write(os.path.join(tempdir,'cover.jpg'),'cover.jpg')
|
||||
myzip.write(os.path.join(tempdir,'style.css'),'style.css')
|
||||
zipUpDir(myzip, tempdir, 'img')
|
||||
myzip.close()
|
||||
return of.name
|
||||
|
||||
def customization_help(self, gui=False):
|
||||
return 'Enter each 10 character PID separated by a comma (no spaces).'
|
||||
return 'Enter 10 character PIDs and/or Kindle serial numbers, separated by commas.'
|
||||
|
||||
@@ -1,159 +1,14 @@
|
||||
# standlone set of Mac OSX specific routines needed for K4DeDRM
|
||||
|
||||
from __future__ import with_statement
|
||||
|
||||
import sys
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
|
||||
#Exception Handling
|
||||
class K4MDrmException(Exception):
|
||||
pass
|
||||
|
||||
import signal
|
||||
import threading
|
||||
import subprocess
|
||||
from subprocess import Popen, PIPE, STDOUT
|
||||
|
||||
# **heavily** chopped up and modfied version of asyncproc.py
|
||||
# to make it actually work on Windows as well as Mac/Linux
|
||||
# For the original see:
|
||||
# "http://www.lysator.liu.se/~bellman/download/"
|
||||
# author is "Thomas Bellman <bellman@lysator.liu.se>"
|
||||
# available under GPL version 3 or Later
|
||||
|
||||
# create an asynchronous subprocess whose output can be collected in
|
||||
# a non-blocking manner
|
||||
|
||||
# What a mess! Have to use threads just to get non-blocking io
|
||||
# in a cross-platform manner
|
||||
|
||||
# luckily all thread use is hidden within this class
|
||||
|
||||
class Process(object):
|
||||
def __init__(self, *params, **kwparams):
|
||||
if len(params) <= 3:
|
||||
kwparams.setdefault('stdin', subprocess.PIPE)
|
||||
if len(params) <= 4:
|
||||
kwparams.setdefault('stdout', subprocess.PIPE)
|
||||
if len(params) <= 5:
|
||||
kwparams.setdefault('stderr', subprocess.PIPE)
|
||||
self.__pending_input = []
|
||||
self.__collected_outdata = []
|
||||
self.__collected_errdata = []
|
||||
self.__exitstatus = None
|
||||
self.__lock = threading.Lock()
|
||||
self.__inputsem = threading.Semaphore(0)
|
||||
self.__quit = False
|
||||
|
||||
self.__process = subprocess.Popen(*params, **kwparams)
|
||||
|
||||
if self.__process.stdin:
|
||||
self.__stdin_thread = threading.Thread(
|
||||
name="stdin-thread",
|
||||
target=self.__feeder, args=(self.__pending_input,
|
||||
self.__process.stdin))
|
||||
self.__stdin_thread.setDaemon(True)
|
||||
self.__stdin_thread.start()
|
||||
|
||||
if self.__process.stdout:
|
||||
self.__stdout_thread = threading.Thread(
|
||||
name="stdout-thread",
|
||||
target=self.__reader, args=(self.__collected_outdata,
|
||||
self.__process.stdout))
|
||||
self.__stdout_thread.setDaemon(True)
|
||||
self.__stdout_thread.start()
|
||||
|
||||
if self.__process.stderr:
|
||||
self.__stderr_thread = threading.Thread(
|
||||
name="stderr-thread",
|
||||
target=self.__reader, args=(self.__collected_errdata,
|
||||
self.__process.stderr))
|
||||
self.__stderr_thread.setDaemon(True)
|
||||
self.__stderr_thread.start()
|
||||
|
||||
def pid(self):
|
||||
return self.__process.pid
|
||||
|
||||
def kill(self, signal):
|
||||
self.__process.send_signal(signal)
|
||||
|
||||
# check on subprocess (pass in 'nowait') to act like poll
|
||||
def wait(self, flag):
|
||||
if flag.lower() == 'nowait':
|
||||
rc = self.__process.poll()
|
||||
else:
|
||||
rc = self.__process.wait()
|
||||
if rc != None:
|
||||
if self.__process.stdin:
|
||||
self.closeinput()
|
||||
if self.__process.stdout:
|
||||
self.__stdout_thread.join()
|
||||
if self.__process.stderr:
|
||||
self.__stderr_thread.join()
|
||||
return self.__process.returncode
|
||||
|
||||
def terminate(self):
|
||||
if self.__process.stdin:
|
||||
self.closeinput()
|
||||
self.__process.terminate()
|
||||
|
||||
# thread gets data from subprocess stdout
|
||||
def __reader(self, collector, source):
|
||||
while True:
|
||||
data = os.read(source.fileno(), 65536)
|
||||
self.__lock.acquire()
|
||||
collector.append(data)
|
||||
self.__lock.release()
|
||||
if data == "":
|
||||
source.close()
|
||||
break
|
||||
return
|
||||
|
||||
# thread feeds data to subprocess stdin
|
||||
def __feeder(self, pending, drain):
|
||||
while True:
|
||||
self.__inputsem.acquire()
|
||||
self.__lock.acquire()
|
||||
if not pending and self.__quit:
|
||||
drain.close()
|
||||
self.__lock.release()
|
||||
break
|
||||
data = pending.pop(0)
|
||||
self.__lock.release()
|
||||
drain.write(data)
|
||||
|
||||
# non-blocking read of data from subprocess stdout
|
||||
def read(self):
|
||||
self.__lock.acquire()
|
||||
outdata = "".join(self.__collected_outdata)
|
||||
del self.__collected_outdata[:]
|
||||
self.__lock.release()
|
||||
return outdata
|
||||
|
||||
# non-blocking read of data from subprocess stderr
|
||||
def readerr(self):
|
||||
self.__lock.acquire()
|
||||
errdata = "".join(self.__collected_errdata)
|
||||
del self.__collected_errdata[:]
|
||||
self.__lock.release()
|
||||
return errdata
|
||||
|
||||
# non-blocking write to stdin of subprocess
|
||||
def write(self, data):
|
||||
if self.__process.stdin is None:
|
||||
raise ValueError("Writing to process with stdin not a pipe")
|
||||
self.__lock.acquire()
|
||||
self.__pending_input.append(data)
|
||||
self.__inputsem.release()
|
||||
self.__lock.release()
|
||||
|
||||
# close stdinput of subprocess
|
||||
def closeinput(self):
|
||||
self.__lock.acquire()
|
||||
self.__quit = True
|
||||
self.__inputsem.release()
|
||||
self.__lock.release()
|
||||
|
||||
|
||||
# interface to needed routines in openssl's libcrypto
|
||||
def _load_crypto_libcrypto():
|
||||
@@ -236,6 +91,15 @@ LibCrypto = _load_crypto()
|
||||
# Utility Routines
|
||||
#
|
||||
|
||||
|
||||
# Various character maps used to decrypt books. Probably supposed to act as obfuscation
|
||||
charMap1 = "n5Pr6St7Uv8Wx9YzAb0Cd1Ef2Gh3Jk4M"
|
||||
charMap2 = "ZB0bYyc1xDdW2wEV3Ff7KkPpL8UuGA4gz-Tme9Nn_tHh5SvXCsIiR6rJjQaqlOoM"
|
||||
charMap3 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
|
||||
charMap4 = "ABCDEFGHIJKLMNPQRSTUVWXYZ123456789"
|
||||
|
||||
|
||||
|
||||
# uses a sub process to get the Hard Drive Serial Number using ioreg
|
||||
# returns with the serial number of drive whose BSD Name is "disk0"
|
||||
def GetVolumeSerialNumber():
|
||||
@@ -244,10 +108,9 @@ def GetVolumeSerialNumber():
|
||||
return sernum
|
||||
cmdline = '/usr/sbin/ioreg -l -S -w 0 -r -c AppleAHCIDiskDriver'
|
||||
cmdline = cmdline.encode(sys.getfilesystemencoding())
|
||||
p = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
|
||||
poll = p.wait('wait')
|
||||
results = p.read()
|
||||
reslst = results.split('\n')
|
||||
p = subprocess.Popen(cmdline, shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False)
|
||||
out1, out2 = p.communicate()
|
||||
reslst = out1.split('\n')
|
||||
cnt = len(reslst)
|
||||
bsdname = None
|
||||
sernum = None
|
||||
@@ -274,11 +137,6 @@ def GetUserName():
|
||||
username = os.getenv('USER')
|
||||
return username
|
||||
|
||||
# Various character maps used to decrypt books. Probably supposed to act as obfuscation
|
||||
charMap1 = "n5Pr6St7Uv8Wx9YzAb0Cd1Ef2Gh3Jk4M"
|
||||
charMap2 = "ZB0bYyc1xDdW2wEV3Ff7KkPpL8UuGA4gz-Tme9Nn_tHh5SvXCsIiR6rJjQaqlOoM"
|
||||
charMap3 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
|
||||
charMap4 = "ABCDEFGHIJKLMNPQRSTUVWXYZ123456789"
|
||||
|
||||
def encode(data, map):
|
||||
result = ""
|
||||
@@ -309,16 +167,16 @@ def CryptUnprotectData(encryptedData):
|
||||
cleartext = crp.decrypt(encryptedData)
|
||||
return cleartext
|
||||
|
||||
|
||||
# Locate and open the .kindle-info file
|
||||
def openKindleInfo(kInfoFile=None):
|
||||
if kInfoFile == None:
|
||||
home = os.getenv('HOME')
|
||||
cmdline = 'find "' + home + '/Library/Application Support" -name ".kindle-info"'
|
||||
cmdline = cmdline.encode(sys.getfilesystemencoding())
|
||||
p1 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
|
||||
poll = p1.wait('wait')
|
||||
results = p1.read()
|
||||
reslst = results.split('\n')
|
||||
p1 = subprocess.Popen(cmdline, shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False)
|
||||
out1, out2 = p1.communicate()
|
||||
reslst = out1.split('\n')
|
||||
kinfopath = 'NONE'
|
||||
cnt = len(reslst)
|
||||
for j in xrange(cnt):
|
||||
|
||||
@@ -0,0 +1,312 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from __future__ import with_statement
|
||||
import sys
|
||||
import os, csv
|
||||
import binascii
|
||||
import zlib
|
||||
import re
|
||||
from struct import pack, unpack, unpack_from
|
||||
|
||||
class DrmException(Exception):
|
||||
pass
|
||||
|
||||
global kindleDatabase
|
||||
global charMap1
|
||||
global charMap2
|
||||
global charMap3
|
||||
global charMap4
|
||||
|
||||
if sys.platform.startswith('win'):
|
||||
from k4pcutils import openKindleInfo, CryptUnprotectData, GetUserName, GetVolumeSerialNumber, charMap2
|
||||
if sys.platform.startswith('darwin'):
|
||||
from k4mutils import openKindleInfo, CryptUnprotectData, GetUserName, GetVolumeSerialNumber, charMap2
|
||||
|
||||
charMap1 = "n5Pr6St7Uv8Wx9YzAb0Cd1Ef2Gh3Jk4M"
|
||||
charMap3 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
|
||||
charMap4 = "ABCDEFGHIJKLMNPQRSTUVWXYZ123456789"
|
||||
|
||||
# crypto digestroutines
|
||||
import hashlib
|
||||
|
||||
def MD5(message):
|
||||
ctx = hashlib.md5()
|
||||
ctx.update(message)
|
||||
return ctx.digest()
|
||||
|
||||
def SHA1(message):
|
||||
ctx = hashlib.sha1()
|
||||
ctx.update(message)
|
||||
return ctx.digest()
|
||||
|
||||
|
||||
# Encode the bytes in data with the characters in map
|
||||
def encode(data, map):
|
||||
result = ""
|
||||
for char in data:
|
||||
value = ord(char)
|
||||
Q = (value ^ 0x80) // len(map)
|
||||
R = value % len(map)
|
||||
result += map[Q]
|
||||
result += map[R]
|
||||
return result
|
||||
|
||||
# Hash the bytes in data and then encode the digest with the characters in map
|
||||
def encodeHash(data,map):
|
||||
return encode(MD5(data),map)
|
||||
|
||||
# Decode the string in data with the characters in map. Returns the decoded bytes
|
||||
def decode(data,map):
|
||||
result = ""
|
||||
for i in range (0,len(data)-1,2):
|
||||
high = map.find(data[i])
|
||||
low = map.find(data[i+1])
|
||||
if (high == -1) or (low == -1) :
|
||||
break
|
||||
value = (((high * len(map)) ^ 0x80) & 0xFF) + low
|
||||
result += pack("B",value)
|
||||
return result
|
||||
|
||||
|
||||
# Parse the Kindle.info file and return the records as a list of key-values
|
||||
def parseKindleInfo(kInfoFile):
|
||||
DB = {}
|
||||
infoReader = openKindleInfo(kInfoFile)
|
||||
infoReader.read(1)
|
||||
data = infoReader.read()
|
||||
if sys.platform.startswith('win'):
|
||||
items = data.split('{')
|
||||
else :
|
||||
items = data.split('[')
|
||||
for item in items:
|
||||
splito = item.split(':')
|
||||
DB[splito[0]] =splito[1]
|
||||
return DB
|
||||
|
||||
# Get a record from the Kindle.info file for the key "hashedKey" (already hashed and encoded). Return the decoded and decrypted record
|
||||
def getKindleInfoValueForHash(hashedKey):
|
||||
global kindleDatabase
|
||||
global charMap1
|
||||
global charMap2
|
||||
encryptedValue = decode(kindleDatabase[hashedKey],charMap2)
|
||||
if sys.platform.startswith('win'):
|
||||
return CryptUnprotectData(encryptedValue,"")
|
||||
else:
|
||||
cleartext = CryptUnprotectData(encryptedValue)
|
||||
return decode(cleartext, charMap1)
|
||||
|
||||
# Get a record from the Kindle.info file for the string in "key" (plaintext). Return the decoded and decrypted record
|
||||
def getKindleInfoValueForKey(key):
|
||||
global charMap2
|
||||
return getKindleInfoValueForHash(encodeHash(key,charMap2))
|
||||
|
||||
# Find if the original string for a hashed/encoded string is known. If so return the original string othwise return an empty string.
|
||||
def findNameForHash(hash):
|
||||
global charMap2
|
||||
names = ["kindle.account.tokens","kindle.cookie.item","eulaVersionAccepted","login_date","kindle.token.item","login","kindle.key.item","kindle.name.info","kindle.device.info", "MazamaRandomNumber"]
|
||||
result = ""
|
||||
for name in names:
|
||||
if hash == encodeHash(name, charMap2):
|
||||
result = name
|
||||
break
|
||||
return result
|
||||
|
||||
# Print all the records from the kindle.info file (option -i)
|
||||
def printKindleInfo():
|
||||
for record in kindleDatabase:
|
||||
name = findNameForHash(record)
|
||||
if name != "" :
|
||||
print (name)
|
||||
print ("--------------------------")
|
||||
else :
|
||||
print ("Unknown Record")
|
||||
print getKindleInfoValueForHash(record)
|
||||
print "\n"
|
||||
|
||||
#
|
||||
# PID generation routines
|
||||
#
|
||||
|
||||
# Returns two bit at offset from a bit field
|
||||
def getTwoBitsFromBitField(bitField,offset):
|
||||
byteNumber = offset // 4
|
||||
bitPosition = 6 - 2*(offset % 4)
|
||||
return ord(bitField[byteNumber]) >> bitPosition & 3
|
||||
|
||||
# Returns the six bits at offset from a bit field
|
||||
def getSixBitsFromBitField(bitField,offset):
|
||||
offset *= 3
|
||||
value = (getTwoBitsFromBitField(bitField,offset) <<4) + (getTwoBitsFromBitField(bitField,offset+1) << 2) +getTwoBitsFromBitField(bitField,offset+2)
|
||||
return value
|
||||
|
||||
# 8 bits to six bits encoding from hash to generate PID string
|
||||
def encodePID(hash):
|
||||
global charMap3
|
||||
PID = ""
|
||||
for position in range (0,8):
|
||||
PID += charMap3[getSixBitsFromBitField(hash,position)]
|
||||
return PID
|
||||
|
||||
# Encryption table used to generate the device PID
|
||||
def generatePidEncryptionTable() :
|
||||
table = []
|
||||
for counter1 in range (0,0x100):
|
||||
value = counter1
|
||||
for counter2 in range (0,8):
|
||||
if (value & 1 == 0) :
|
||||
value = value >> 1
|
||||
else :
|
||||
value = value >> 1
|
||||
value = value ^ 0xEDB88320
|
||||
table.append(value)
|
||||
return table
|
||||
|
||||
# Seed value used to generate the device PID
|
||||
def generatePidSeed(table,dsn) :
|
||||
value = 0
|
||||
for counter in range (0,4) :
|
||||
index = (ord(dsn[counter]) ^ value) &0xFF
|
||||
value = (value >> 8) ^ table[index]
|
||||
return value
|
||||
|
||||
# Generate the device PID
|
||||
def generateDevicePID(table,dsn,nbRoll):
|
||||
global charMap4
|
||||
seed = generatePidSeed(table,dsn)
|
||||
pidAscii = ""
|
||||
pid = [(seed >>24) &0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF,(seed>>24) & 0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF]
|
||||
index = 0
|
||||
for counter in range (0,nbRoll):
|
||||
pid[index] = pid[index] ^ ord(dsn[counter])
|
||||
index = (index+1) %8
|
||||
for counter in range (0,8):
|
||||
index = ((((pid[counter] >>5) & 3) ^ pid[counter]) & 0x1f) + (pid[counter] >> 7)
|
||||
pidAscii += charMap4[index]
|
||||
return pidAscii
|
||||
|
||||
def crc32(s):
|
||||
return (~binascii.crc32(s,-1))&0xFFFFFFFF
|
||||
|
||||
# convert from 8 digit PID to 10 digit PID with checksum
|
||||
def checksumPid(s):
|
||||
global charMap4
|
||||
crc = crc32(s)
|
||||
crc = crc ^ (crc >> 16)
|
||||
res = s
|
||||
l = len(charMap4)
|
||||
for i in (0,1):
|
||||
b = crc & 0xff
|
||||
pos = (b // l) ^ (b % l)
|
||||
res += charMap4[pos%l]
|
||||
crc >>= 8
|
||||
return res
|
||||
|
||||
|
||||
# old kindle serial number to fixed pid
|
||||
def pidFromSerial(s, l):
|
||||
global charMap4
|
||||
crc = crc32(s)
|
||||
arr1 = [0]*l
|
||||
for i in xrange(len(s)):
|
||||
arr1[i%l] ^= ord(s[i])
|
||||
crc_bytes = [crc >> 24 & 0xff, crc >> 16 & 0xff, crc >> 8 & 0xff, crc & 0xff]
|
||||
for i in xrange(l):
|
||||
arr1[i] ^= crc_bytes[i&3]
|
||||
pid = ""
|
||||
for i in xrange(l):
|
||||
b = arr1[i] & 0xff
|
||||
pid+=charMap4[(b >> 7) + ((b >> 5 & 3) ^ (b & 0x1f))]
|
||||
return pid
|
||||
|
||||
|
||||
# Parse the EXTH header records and use the Kindle serial number to calculate the book pid.
|
||||
def getKindlePid(pidlst, rec209, token, serialnum):
|
||||
|
||||
if rec209 != None:
|
||||
# Compute book PID
|
||||
pidHash = SHA1(serialnum+rec209+token)
|
||||
bookPID = encodePID(pidHash)
|
||||
bookPID = checksumPid(bookPID)
|
||||
pidlst.append(bookPID)
|
||||
|
||||
# compute fixed pid for old pre 2.5 firmware update pid as well
|
||||
bookPID = pidFromSerial(serialnum, 7) + "*"
|
||||
bookPID = checksumPid(bookPID)
|
||||
pidlst.append(bookPID)
|
||||
|
||||
return pidlst
|
||||
|
||||
|
||||
# Parse the EXTH header records and parse the Kindleinfo
|
||||
# file to calculate the book pid.
|
||||
|
||||
def getK4Pids(pidlst, rec209, token, kInfoFile=None):
|
||||
global kindleDatabase
|
||||
global charMap1
|
||||
kindleDatabase = None
|
||||
try:
|
||||
kindleDatabase = parseKindleInfo(kInfoFile)
|
||||
except Exception, message:
|
||||
print(message)
|
||||
pass
|
||||
|
||||
if kindleDatabase == None :
|
||||
return pidlst
|
||||
|
||||
# Get the Mazama Random number
|
||||
MazamaRandomNumber = getKindleInfoValueForKey("MazamaRandomNumber")
|
||||
|
||||
# Get the HDD serial
|
||||
encodedSystemVolumeSerialNumber = encodeHash(GetVolumeSerialNumber(),charMap1)
|
||||
|
||||
# Get the current user name
|
||||
encodedUsername = encodeHash(GetUserName(),charMap1)
|
||||
|
||||
# concat, hash and encode to calculate the DSN
|
||||
DSN = encode(SHA1(MazamaRandomNumber+encodedSystemVolumeSerialNumber+encodedUsername),charMap1)
|
||||
|
||||
# Compute the device PID (for which I can tell, is used for nothing).
|
||||
table = generatePidEncryptionTable()
|
||||
devicePID = generateDevicePID(table,DSN,4)
|
||||
devicePID = checksumPid(devicePID)
|
||||
pidlst.append(devicePID)
|
||||
|
||||
# Compute book PID
|
||||
if rec209 == None:
|
||||
print "\nNo EXTH record type 209 - Perhaps not a K4 file?"
|
||||
return pidlst
|
||||
|
||||
# Get the kindle account token
|
||||
kindleAccountToken = getKindleInfoValueForKey("kindle.account.tokens")
|
||||
|
||||
# book pid
|
||||
pidHash = SHA1(DSN+kindleAccountToken+rec209+token)
|
||||
bookPID = encodePID(pidHash)
|
||||
bookPID = checksumPid(bookPID)
|
||||
pidlst.append(bookPID)
|
||||
|
||||
# variant 1
|
||||
pidHash = SHA1(kindleAccountToken+rec209+token)
|
||||
bookPID = encodePID(pidHash)
|
||||
bookPID = checksumPid(bookPID)
|
||||
pidlst.append(bookPID)
|
||||
|
||||
# variant 2
|
||||
pidHash = SHA1(DSN+rec209+token)
|
||||
bookPID = encodePID(pidHash)
|
||||
bookPID = checksumPid(bookPID)
|
||||
pidlst.append(bookPID)
|
||||
|
||||
return pidlst
|
||||
|
||||
def getPidList(md1, md2, k4, pids, serials, kInfoFiles):
|
||||
pidlst = []
|
||||
if k4:
|
||||
pidlst = getK4Pids(pidlst, md1, md2)
|
||||
for infoFile in kInfoFiles:
|
||||
pidlst = getK4Pids(pidlst, md1, md2, infoFile)
|
||||
for serialnum in serials:
|
||||
pidlst = getKindlePid(pidlst, md1, md2, serialnum)
|
||||
for pid in pids:
|
||||
pidlst.append(pid)
|
||||
return pidlst
|
||||
@@ -24,7 +24,7 @@
|
||||
# 0.14 - Working out when the extra data flags are present has been problematic
|
||||
# Versions 7 through 9 have tried to tweak the conditions, but have been
|
||||
# only partially successful. Closer examination of lots of sample
|
||||
# files reveals that a confusin has arisen because trailing data entries
|
||||
# files reveals that a confusion has arisen because trailing data entries
|
||||
# are not encrypted, but it turns out that the multibyte entries
|
||||
# in utf8 file are encrypted. (Although neither kind gets compressed.)
|
||||
# This knowledge leads to a simplification of the test for the
|
||||
@@ -39,13 +39,13 @@
|
||||
# Removed the disabled Calibre plug-in code
|
||||
# Permit use of 8-digit PIDs
|
||||
# 0.19 - It seems that multibyte entries aren't encrypted in a v6 file either.
|
||||
# 0.20 - Corretion: It seems that multibyte entries are encrypted in a v6 file.
|
||||
# 0.20 - Correction: It seems that multibyte entries are encrypted in a v6 file.
|
||||
# 0.21 - Added support for multiple pids
|
||||
# 0.22 - revised structure to hold MobiBook as a class to allow an extended interface
|
||||
|
||||
__version__ = '0.20'
|
||||
__version__ = '0.22'
|
||||
|
||||
import sys
|
||||
import struct
|
||||
import binascii
|
||||
|
||||
class Unbuffered:
|
||||
def __init__(self, stream):
|
||||
@@ -55,10 +55,19 @@ class Unbuffered:
|
||||
self.stream.flush()
|
||||
def __getattr__(self, attr):
|
||||
return getattr(self.stream, attr)
|
||||
sys.stdout=Unbuffered(sys.stdout)
|
||||
|
||||
import struct
|
||||
import binascii
|
||||
|
||||
class DrmException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
#
|
||||
# MobiBook Utility Routines
|
||||
#
|
||||
|
||||
# Implementation of Pukall Cipher 1
|
||||
def PC1(key, src, decryption=True):
|
||||
sum1 = 0;
|
||||
@@ -70,7 +79,6 @@ def PC1(key, src, decryption=True):
|
||||
wkey = []
|
||||
for i in xrange(8):
|
||||
wkey.append(ord(key[i*2])<<8 | ord(key[i*2+1]))
|
||||
|
||||
dst = ""
|
||||
for i in xrange(len(src)):
|
||||
temp1 = 0;
|
||||
@@ -131,7 +139,9 @@ def getSizeOfTrailingDataEntries(ptr, size, flags):
|
||||
num += (ord(ptr[size - num - 1]) & 0x3) + 1
|
||||
return num
|
||||
|
||||
class DrmStripper:
|
||||
|
||||
|
||||
class MobiBook:
|
||||
def loadSection(self, section):
|
||||
if (section + 1 == self.num_sections):
|
||||
endoff = len(self.data_file)
|
||||
@@ -140,6 +150,78 @@ class DrmStripper:
|
||||
off = self.sections[section][0]
|
||||
return self.data_file[off:endoff]
|
||||
|
||||
def __init__(self, infile):
|
||||
# initial sanity check on file
|
||||
self.data_file = file(infile, 'rb').read()
|
||||
self.header = self.data_file[0:78]
|
||||
if self.header[0x3C:0x3C+8] != 'BOOKMOBI':
|
||||
raise DrmException("invalid file format")
|
||||
|
||||
# build up section offset and flag info
|
||||
self.num_sections, = struct.unpack('>H', self.header[76:78])
|
||||
self.sections = []
|
||||
for i in xrange(self.num_sections):
|
||||
offset, a1,a2,a3,a4 = struct.unpack('>LBBBB', self.data_file[78+i*8:78+i*8+8])
|
||||
flags, val = a1, a2<<16|a3<<8|a4
|
||||
self.sections.append( (offset, flags, val) )
|
||||
|
||||
# parse information from section 0
|
||||
self.sect = self.loadSection(0)
|
||||
self.records, = struct.unpack('>H', self.sect[0x8:0x8+2])
|
||||
self.mobi_length, = struct.unpack('>L',self.sect[0x14:0x18])
|
||||
self.mobi_version, = struct.unpack('>L',self.sect[0x68:0x6C])
|
||||
print "MOBI header version = %d, length = %d" %(self.mobi_version, self.mobi_length)
|
||||
self.extra_data_flags = 0
|
||||
if (self.mobi_length >= 0xE4) and (self.mobi_version >= 5):
|
||||
self.extra_data_flags, = struct.unpack('>H', self.sect[0xF2:0xF4])
|
||||
print "Extra Data Flags = %d" % self.extra_data_flags
|
||||
if self.mobi_version < 7:
|
||||
# multibyte utf8 data is included in the encryption for mobi_version 6 and below
|
||||
# so clear that byte so that we leave it to be decrypted.
|
||||
self.extra_data_flags &= 0xFFFE
|
||||
|
||||
# if exth region exists parse it for metadata array
|
||||
self.meta_array = {}
|
||||
exth_flag, = struct.unpack('>L', self.sect[0x80:0x84])
|
||||
exth = ''
|
||||
if exth_flag & 0x40:
|
||||
exth = self.sect[16 + self.mobi_length:]
|
||||
nitems, = struct.unpack('>I', exth[8:12])
|
||||
pos = 12
|
||||
for i in xrange(nitems):
|
||||
type, size = struct.unpack('>II', exth[pos: pos + 8])
|
||||
content = exth[pos + 8: pos + size]
|
||||
self.meta_array[type] = content
|
||||
pos += size
|
||||
|
||||
def getBookTitle(self):
|
||||
title = ''
|
||||
if 503 in self.meta_array:
|
||||
title = self.meta_array[503]
|
||||
else :
|
||||
toff, tlen = struct.unpack('>II', self.sect[0x54:0x5c])
|
||||
tend = toff + tlen
|
||||
title = self.sect[toff:tend]
|
||||
if title == '':
|
||||
title = self.header[:32]
|
||||
title = title.split("\0")[0]
|
||||
return title
|
||||
|
||||
def getPIDMetaInfo(self):
|
||||
rec209 = None
|
||||
token = None
|
||||
if 209 in self.meta_array:
|
||||
rec209 = self.meta_array[209]
|
||||
data = rec209
|
||||
# Parse the 209 data to find the the exth record with the token data.
|
||||
# The last character of the 209 data points to the record with the token.
|
||||
# Always 208 from my experience, but I'll leave the logic in case that changes.
|
||||
for i in xrange(len(data)):
|
||||
if ord(data[i]) != 0:
|
||||
if self.meta_array[ord(data[i])] != None:
|
||||
token = self.meta_array[ord(data[i])]
|
||||
return rec209, token
|
||||
|
||||
def patch(self, off, new):
|
||||
self.data_file = self.data_file[:off] + new + self.data_file[off+len(new):]
|
||||
|
||||
@@ -152,134 +234,122 @@ class DrmStripper:
|
||||
assert off + in_off + len(new) <= endoff
|
||||
self.patch(off + in_off, new)
|
||||
|
||||
def parseDRM(self, data, count, pid):
|
||||
pid = pid.ljust(16,'\0')
|
||||
keyvec1 = "\x72\x38\x33\xB0\xB4\xF2\xE3\xCA\xDF\x09\x01\xD6\xE2\xE0\x3F\x96"
|
||||
temp_key = PC1(keyvec1, pid, False)
|
||||
temp_key_sum = sum(map(ord,temp_key)) & 0xff
|
||||
def parseDRM(self, data, count, pidlist):
|
||||
found_key = None
|
||||
for i in xrange(count):
|
||||
verification, size, type, cksum, cookie = struct.unpack('>LLLBxxx32s', data[i*0x30:i*0x30+0x30])
|
||||
cookie = PC1(temp_key, cookie)
|
||||
ver,flags,finalkey,expiry,expiry2 = struct.unpack('>LL16sLL', cookie)
|
||||
if verification == ver and cksum == temp_key_sum and (flags & 0x1F) == 1:
|
||||
found_key = finalkey
|
||||
keyvec1 = "\x72\x38\x33\xB0\xB4\xF2\xE3\xCA\xDF\x09\x01\xD6\xE2\xE0\x3F\x96"
|
||||
for pid in pidlist:
|
||||
bigpid = pid.ljust(16,'\0')
|
||||
temp_key = PC1(keyvec1, bigpid, False)
|
||||
temp_key_sum = sum(map(ord,temp_key)) & 0xff
|
||||
found_key = None
|
||||
for i in xrange(count):
|
||||
verification, size, type, cksum, cookie = struct.unpack('>LLLBxxx32s', data[i*0x30:i*0x30+0x30])
|
||||
if cksum == temp_key_sum:
|
||||
cookie = PC1(temp_key, cookie)
|
||||
ver,flags,finalkey,expiry,expiry2 = struct.unpack('>LL16sLL', cookie)
|
||||
if verification == ver and (flags & 0x1F) == 1:
|
||||
found_key = finalkey
|
||||
break
|
||||
if found_key != None:
|
||||
break
|
||||
if not found_key:
|
||||
# Then try the default encoding that doesn't require a PID
|
||||
pid = "00000000"
|
||||
temp_key = keyvec1
|
||||
temp_key_sum = sum(map(ord,temp_key)) & 0xff
|
||||
for i in xrange(count):
|
||||
verification, size, type, cksum, cookie = struct.unpack('>LLLBxxx32s', data[i*0x30:i*0x30+0x30])
|
||||
cookie = PC1(temp_key, cookie)
|
||||
ver,flags,finalkey,expiry,expiry2 = struct.unpack('>LL16sLL', cookie)
|
||||
if verification == ver and cksum == temp_key_sum:
|
||||
found_key = finalkey
|
||||
break
|
||||
return found_key
|
||||
if cksum == temp_key_sum:
|
||||
cookie = PC1(temp_key, cookie)
|
||||
ver,flags,finalkey,expiry,expiry2 = struct.unpack('>LL16sLL', cookie)
|
||||
if verification == ver:
|
||||
found_key = finalkey
|
||||
break
|
||||
return [found_key,pid]
|
||||
|
||||
def __init__(self, data_file, pid):
|
||||
if len(pid)==10:
|
||||
if checksumPid(pid[0:-2]) != pid:
|
||||
raise DrmException("invalid PID checksum")
|
||||
pid = pid[0:-2]
|
||||
elif len(pid)==8:
|
||||
print "PID without checksum given. With checksum PID is "+checksumPid(pid)
|
||||
else:
|
||||
raise DrmException("Invalid PID length")
|
||||
|
||||
self.data_file = data_file
|
||||
header = data_file[0:72]
|
||||
if header[0x3C:0x3C+8] != 'BOOKMOBI':
|
||||
raise DrmException("invalid file format")
|
||||
self.num_sections, = struct.unpack('>H', data_file[76:78])
|
||||
|
||||
self.sections = []
|
||||
for i in xrange(self.num_sections):
|
||||
offset, a1,a2,a3,a4 = struct.unpack('>LBBBB', data_file[78+i*8:78+i*8+8])
|
||||
flags, val = a1, a2<<16|a3<<8|a4
|
||||
self.sections.append( (offset, flags, val) )
|
||||
|
||||
sect = self.loadSection(0)
|
||||
records, = struct.unpack('>H', sect[0x8:0x8+2])
|
||||
mobi_length, = struct.unpack('>L',sect[0x14:0x18])
|
||||
mobi_version, = struct.unpack('>L',sect[0x68:0x6C])
|
||||
extra_data_flags = 0
|
||||
print "MOBI header version = %d, length = %d" %(mobi_version, mobi_length)
|
||||
if (mobi_length >= 0xE4) and (mobi_version >= 5):
|
||||
extra_data_flags, = struct.unpack('>H', sect[0xF2:0xF4])
|
||||
print "Extra Data Flags = %d" %extra_data_flags
|
||||
if mobi_version < 7:
|
||||
# multibyte utf8 data is included in the encryption for mobi_version 6 and below
|
||||
# so clear that byte so that we leave it to be decrypted.
|
||||
extra_data_flags &= 0xFFFE
|
||||
|
||||
crypto_type, = struct.unpack('>H', sect[0xC:0xC+2])
|
||||
def processBook(self, pidlist):
|
||||
crypto_type, = struct.unpack('>H', self.sect[0xC:0xC+2])
|
||||
if crypto_type == 0:
|
||||
print "This book is not encrypted."
|
||||
return self.data_file
|
||||
if crypto_type == 1:
|
||||
raise DrmException("Cannot decode Mobipocket encryption type 1")
|
||||
if crypto_type != 2:
|
||||
raise DrmException("Cannot decode unknown Mobipocket encryption type %d" % crypto_type)
|
||||
|
||||
goodpids = []
|
||||
for pid in pidlist:
|
||||
if len(pid)==10:
|
||||
if checksumPid(pid[0:-2]) != pid:
|
||||
print "Warning: PID " + pid + " has incorrect checksum, should have been "+checksumPid(pid[0:-2])
|
||||
goodpids.append(pid[0:-2])
|
||||
elif len(pid)==8:
|
||||
goodpids.append(pid)
|
||||
|
||||
# calculate the keys
|
||||
drm_ptr, drm_count, drm_size, drm_flags = struct.unpack('>LLLL', self.sect[0xA8:0xA8+16])
|
||||
if drm_count == 0:
|
||||
raise DrmException("Not yet initialised with PID. Must be opened with Mobipocket Reader first.")
|
||||
found_key, pid = self.parseDRM(self.sect[drm_ptr:drm_ptr+drm_size], drm_count, goodpids)
|
||||
if not found_key:
|
||||
raise DrmException("No key found. Most likely the correct PID has not been given.")
|
||||
|
||||
if pid=="00000000":
|
||||
print "File has default encryption, no specific PID."
|
||||
else:
|
||||
if crypto_type == 1:
|
||||
raise DrmException("cannot decode Mobipocket encryption type 1")
|
||||
if crypto_type != 2:
|
||||
raise DrmException("unknown encryption type: %d" % crypto_type)
|
||||
print "File is encoded with PID "+checksumPid(pid)+"."
|
||||
|
||||
# calculate the keys
|
||||
drm_ptr, drm_count, drm_size, drm_flags = struct.unpack('>LLLL', sect[0xA8:0xA8+16])
|
||||
if drm_count == 0:
|
||||
raise DrmException("no PIDs found in this file")
|
||||
found_key = self.parseDRM(sect[drm_ptr:drm_ptr+drm_size], drm_count, pid)
|
||||
if not found_key:
|
||||
raise DrmException("no key found. maybe the PID is incorrect")
|
||||
# kill the drm keys
|
||||
self.patchSection(0, "\0" * drm_size, drm_ptr)
|
||||
# kill the drm pointers
|
||||
self.patchSection(0, "\xff" * 4 + "\0" * 12, 0xA8)
|
||||
# clear the crypto type
|
||||
self.patchSection(0, "\0" * 2, 0xC)
|
||||
|
||||
# kill the drm keys
|
||||
self.patchSection(0, "\0" * drm_size, drm_ptr)
|
||||
# kill the drm pointers
|
||||
self.patchSection(0, "\xff" * 4 + "\0" * 12, 0xA8)
|
||||
# clear the crypto type
|
||||
self.patchSection(0, "\0" * 2, 0xC)
|
||||
|
||||
# decrypt sections
|
||||
print "Decrypting. Please wait . . .",
|
||||
new_data = self.data_file[:self.sections[1][0]]
|
||||
for i in xrange(1, records+1):
|
||||
data = self.loadSection(i)
|
||||
extra_size = getSizeOfTrailingDataEntries(data, len(data), extra_data_flags)
|
||||
if i%100 == 0:
|
||||
print ".",
|
||||
# print "record %d, extra_size %d" %(i,extra_size)
|
||||
new_data += PC1(found_key, data[0:len(data) - extra_size])
|
||||
if extra_size > 0:
|
||||
new_data += data[-extra_size:]
|
||||
#self.patchSection(i, PC1(found_key, data[0:len(data) - extra_size]))
|
||||
if self.num_sections > records+1:
|
||||
new_data += self.data_file[self.sections[records+1][0]:]
|
||||
self.data_file = new_data
|
||||
print "done"
|
||||
|
||||
def getResult(self):
|
||||
# decrypt sections
|
||||
print "Decrypting. Please wait . . .",
|
||||
new_data = self.data_file[:self.sections[1][0]]
|
||||
for i in xrange(1, self.records+1):
|
||||
data = self.loadSection(i)
|
||||
extra_size = getSizeOfTrailingDataEntries(data, len(data), self.extra_data_flags)
|
||||
if i%100 == 0:
|
||||
print ".",
|
||||
# print "record %d, extra_size %d" %(i,extra_size)
|
||||
new_data += PC1(found_key, data[0:len(data) - extra_size])
|
||||
if extra_size > 0:
|
||||
new_data += data[-extra_size:]
|
||||
if self.num_sections > self.records+1:
|
||||
new_data += self.data_file[self.sections[self.records+1][0]:]
|
||||
self.data_file = new_data
|
||||
print "done"
|
||||
return self.data_file
|
||||
|
||||
def getUnencryptedBook(infile,pid):
|
||||
sys.stdout=Unbuffered(sys.stdout)
|
||||
data_file = file(infile, 'rb').read()
|
||||
strippedFile = DrmStripper(data_file, pid)
|
||||
return strippedFile.getResult()
|
||||
if not os.path.isfile(infile):
|
||||
raise DrmException('Input File Not Found')
|
||||
book = MobiBook(infile)
|
||||
return book.processBook([pid])
|
||||
|
||||
def getUnencryptedBookWithList(infile,pidlist):
|
||||
if not os.path.isfile(infile):
|
||||
raise DrmException('Input File Not Found')
|
||||
book = MobiBook(infile)
|
||||
return book.processBook(pidlist)
|
||||
|
||||
def main(argv=sys.argv):
|
||||
sys.stdout=Unbuffered(sys.stdout)
|
||||
print ('MobiDeDrm v%(__version__)s. '
|
||||
'Copyright 2008-2010 The Dark Reverser.' % globals())
|
||||
if len(argv)<4:
|
||||
print "Removes protection from Mobipocket books"
|
||||
print "Usage:"
|
||||
print " %s <infile> <outfile> <PID>" % sys.argv[0]
|
||||
print " %s <infile> <outfile> <Comma separated list of PIDs to try>" % sys.argv[0]
|
||||
return 1
|
||||
else:
|
||||
infile = argv[1]
|
||||
outfile = argv[2]
|
||||
pid = argv[3]
|
||||
pidlist = argv[3].split(',')
|
||||
try:
|
||||
stripped_file = getUnencryptedBook(infile, pid)
|
||||
stripped_file = getUnencryptedBookWithList(infile, pidlist)
|
||||
file(outfile, 'wb').write(stripped_file)
|
||||
except DrmException, e:
|
||||
print "Error: %s" % e
|
||||
|
||||
@@ -0,0 +1,27 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
|
||||
|
||||
import Tkinter
|
||||
import Tkconstants
|
||||
|
||||
# basic scrolled text widget
|
||||
class ScrolledText(Tkinter.Text):
|
||||
def __init__(self, master=None, **kw):
|
||||
self.frame = Tkinter.Frame(master)
|
||||
self.vbar = Tkinter.Scrollbar(self.frame)
|
||||
self.vbar.pack(side=Tkconstants.RIGHT, fill=Tkconstants.Y)
|
||||
kw.update({'yscrollcommand': self.vbar.set})
|
||||
Tkinter.Text.__init__(self, self.frame, **kw)
|
||||
self.pack(side=Tkconstants.LEFT, fill=Tkconstants.BOTH, expand=True)
|
||||
self.vbar['command'] = self.yview
|
||||
# Copy geometry methods of self.frame without overriding Text
|
||||
# methods = hack!
|
||||
text_meths = vars(Tkinter.Text).keys()
|
||||
methods = vars(Tkinter.Pack).keys() + vars(Tkinter.Grid).keys() + vars(Tkinter.Place).keys()
|
||||
methods = set(methods).difference(text_meths)
|
||||
for m in methods:
|
||||
if m[0] != '_' and m != 'config' and m != 'configure':
|
||||
setattr(self, m, getattr(self.frame, m))
|
||||
|
||||
def __str__(self):
|
||||
return str(self.frame)
|
||||
@@ -0,0 +1,243 @@
|
||||
#! /usr/bin/python
|
||||
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
|
||||
# For use with Topaz Scripts Version 2.6
|
||||
|
||||
import csv
|
||||
import sys
|
||||
import os
|
||||
import getopt
|
||||
from struct import pack
|
||||
from struct import unpack
|
||||
|
||||
|
||||
class DocParser(object):
|
||||
def __init__(self, flatxml, fontsize, ph, pw):
|
||||
self.flatdoc = flatxml.split('\n')
|
||||
self.fontsize = int(fontsize)
|
||||
self.ph = int(ph) * 1.0
|
||||
self.pw = int(pw) * 1.0
|
||||
|
||||
stags = {
|
||||
'paragraph' : 'p',
|
||||
'graphic' : '.graphic'
|
||||
}
|
||||
|
||||
attr_val_map = {
|
||||
'hang' : 'text-indent: ',
|
||||
'indent' : 'text-indent: ',
|
||||
'line-space' : 'line-height: ',
|
||||
'margin-bottom' : 'margin-bottom: ',
|
||||
'margin-left' : 'margin-left: ',
|
||||
'margin-right' : 'margin-right: ',
|
||||
'margin-top' : 'margin-top: ',
|
||||
'space-after' : 'padding-bottom: ',
|
||||
}
|
||||
|
||||
attr_str_map = {
|
||||
'align-center' : 'text-align: center; margin-left: auto; margin-right: auto;',
|
||||
'align-left' : 'text-align: left;',
|
||||
'align-right' : 'text-align: right;',
|
||||
'align-justify' : 'text-align: justify;',
|
||||
'display-inline' : 'display: inline;',
|
||||
'pos-left' : 'text-align: left;',
|
||||
'pos-right' : 'text-align: right;',
|
||||
'pos-center' : 'text-align: center; margin-left: auto; margin-right: auto;',
|
||||
}
|
||||
|
||||
|
||||
# find tag if within pos to end inclusive
|
||||
def findinDoc(self, tagpath, pos, end) :
|
||||
result = None
|
||||
docList = self.flatdoc
|
||||
cnt = len(docList)
|
||||
if end == -1 :
|
||||
end = cnt
|
||||
else:
|
||||
end = min(cnt,end)
|
||||
foundat = -1
|
||||
for j in xrange(pos, end):
|
||||
item = docList[j]
|
||||
if item.find('=') >= 0:
|
||||
(name, argres) = item.split('=',1)
|
||||
else :
|
||||
name = item
|
||||
argres = ''
|
||||
if name.endswith(tagpath) :
|
||||
result = argres
|
||||
foundat = j
|
||||
break
|
||||
return foundat, result
|
||||
|
||||
|
||||
# return list of start positions for the tagpath
|
||||
def posinDoc(self, tagpath):
|
||||
startpos = []
|
||||
pos = 0
|
||||
res = ""
|
||||
while res != None :
|
||||
(foundpos, res) = self.findinDoc(tagpath, pos, -1)
|
||||
if res != None :
|
||||
startpos.append(foundpos)
|
||||
pos = foundpos + 1
|
||||
return startpos
|
||||
|
||||
|
||||
def process(self):
|
||||
|
||||
classlst = ''
|
||||
csspage = '.cl-center { text-align: center; margin-left: auto; margin-right: auto; }\n'
|
||||
csspage += '.cl-right { text-align: right; }\n'
|
||||
csspage += '.cl-left { text-align: left; }\n'
|
||||
csspage += '.cl-justify { text-align: justify; }\n'
|
||||
|
||||
# generate a list of each <style> starting point in the stylesheet
|
||||
styleList= self.posinDoc('book.stylesheet.style')
|
||||
stylecnt = len(styleList)
|
||||
styleList.append(-1)
|
||||
|
||||
# process each style converting what you can
|
||||
|
||||
for j in xrange(stylecnt):
|
||||
start = styleList[j]
|
||||
end = styleList[j+1]
|
||||
|
||||
(pos, tag) = self.findinDoc('style._tag',start,end)
|
||||
if tag == None :
|
||||
(pos, tag) = self.findinDoc('style.type',start,end)
|
||||
|
||||
# Is this something we know how to convert to css
|
||||
if tag in self.stags :
|
||||
|
||||
# get the style class
|
||||
(pos, sclass) = self.findinDoc('style.class',start,end)
|
||||
if sclass != None:
|
||||
sclass = sclass.replace(' ','-')
|
||||
sclass = '.cl-' + sclass.lower()
|
||||
else :
|
||||
sclass = ''
|
||||
|
||||
# check for any "after class" specifiers
|
||||
(pos, aftclass) = self.findinDoc('style._after_class',start,end)
|
||||
if aftclass != None:
|
||||
aftclass = aftclass.replace(' ','-')
|
||||
aftclass = '.cl-' + aftclass.lower()
|
||||
else :
|
||||
aftclass = ''
|
||||
|
||||
cssargs = {}
|
||||
|
||||
while True :
|
||||
|
||||
(pos1, attr) = self.findinDoc('style.rule.attr', start, end)
|
||||
(pos2, val) = self.findinDoc('style.rule.value', start, end)
|
||||
|
||||
if attr == None : break
|
||||
|
||||
if (attr == 'display') or (attr == 'pos') or (attr == 'align'):
|
||||
# handle text based attributess
|
||||
attr = attr + '-' + val
|
||||
if attr in self.attr_str_map :
|
||||
cssargs[attr] = (self.attr_str_map[attr], '')
|
||||
else :
|
||||
# handle value based attributes
|
||||
if attr in self.attr_val_map :
|
||||
name = self.attr_val_map[attr]
|
||||
if attr in ('margin-bottom', 'margin-top', 'space-after') :
|
||||
scale = self.ph
|
||||
elif attr in ('margin-right', 'indent', 'margin-left', 'hang') :
|
||||
scale = self.pw
|
||||
elif attr == 'line-space':
|
||||
scale = self.fontsize * 2.0
|
||||
|
||||
if not ((attr == 'hang') and (int(val) == 0)) :
|
||||
pv = float(val)/scale
|
||||
cssargs[attr] = (self.attr_val_map[attr], pv)
|
||||
keep = True
|
||||
|
||||
start = max(pos1, pos2) + 1
|
||||
|
||||
# disable all of the after class tags until I figure out how to handle them
|
||||
if aftclass != "" : keep = False
|
||||
|
||||
if keep :
|
||||
# make sure line-space does not go below 100% or above 300% since
|
||||
# it can be wacky in some styles
|
||||
if 'line-space' in cssargs:
|
||||
seg = cssargs['line-space'][0]
|
||||
val = cssargs['line-space'][1]
|
||||
if val < 1.0: val = 1.0
|
||||
if val > 3.0: val = 3.0
|
||||
del cssargs['line-space']
|
||||
cssargs['line-space'] = (self.attr_val_map['line-space'], val)
|
||||
|
||||
|
||||
# handle modifications for css style hanging indents
|
||||
if 'hang' in cssargs:
|
||||
hseg = cssargs['hang'][0]
|
||||
hval = cssargs['hang'][1]
|
||||
del cssargs['hang']
|
||||
cssargs['hang'] = (self.attr_val_map['hang'], -hval)
|
||||
mval = 0
|
||||
mseg = 'margin-left: '
|
||||
mval = hval
|
||||
if 'margin-left' in cssargs:
|
||||
mseg = cssargs['margin-left'][0]
|
||||
mval = cssargs['margin-left'][1]
|
||||
if mval < 0: mval = 0
|
||||
mval = hval + mval
|
||||
cssargs['margin-left'] = (mseg, mval)
|
||||
if 'indent' in cssargs:
|
||||
del cssargs['indent']
|
||||
|
||||
cssline = sclass + ' { '
|
||||
for key in iter(cssargs):
|
||||
mseg = cssargs[key][0]
|
||||
mval = cssargs[key][1]
|
||||
if mval == '':
|
||||
cssline += mseg + ' '
|
||||
else :
|
||||
aseg = mseg + '%.1f%%;' % (mval * 100.0)
|
||||
cssline += aseg + ' '
|
||||
|
||||
cssline += '}'
|
||||
|
||||
if sclass != '' :
|
||||
classlst += sclass + '\n'
|
||||
|
||||
# handle special case of paragraph class used inside chapter heading
|
||||
# and non-chapter headings
|
||||
if sclass != '' :
|
||||
ctype = sclass[4:7]
|
||||
if ctype == 'ch1' :
|
||||
csspage += 'h1' + cssline + '\n'
|
||||
if ctype == 'ch2' :
|
||||
csspage += 'h2' + cssline + '\n'
|
||||
if ctype == 'ch3' :
|
||||
csspage += 'h3' + cssline + '\n'
|
||||
if ctype == 'h1-' :
|
||||
csspage += 'h4' + cssline + '\n'
|
||||
if ctype == 'h2-' :
|
||||
csspage += 'h5' + cssline + '\n'
|
||||
if ctype == 'h3_' :
|
||||
csspage += 'h6' + cssline + '\n'
|
||||
|
||||
if cssline != ' { }':
|
||||
csspage += self.stags[tag] + cssline + '\n'
|
||||
|
||||
|
||||
return csspage, classlst
|
||||
|
||||
|
||||
|
||||
def convert2CSS(flatxml, fontsize, ph, pw):
|
||||
|
||||
print ' ', 'Using font size:',fontsize
|
||||
print ' ', 'Using page height:', ph
|
||||
print ' ', 'Using page width:', pw
|
||||
|
||||
# create a document parser
|
||||
dp = DocParser(flatxml, fontsize, ph, pw)
|
||||
|
||||
csspage = dp.process()
|
||||
|
||||
return csspage
|
||||
@@ -0,0 +1,434 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
class Unbuffered:
|
||||
def __init__(self, stream):
|
||||
self.stream = stream
|
||||
def write(self, data):
|
||||
self.stream.write(data)
|
||||
self.stream.flush()
|
||||
def __getattr__(self, attr):
|
||||
return getattr(self.stream, attr)
|
||||
|
||||
import sys
|
||||
sys.stdout=Unbuffered(sys.stdout)
|
||||
import os, csv, getopt
|
||||
import zlib, zipfile, tempfile, shutil
|
||||
from struct import pack
|
||||
from struct import unpack
|
||||
|
||||
class TpzDRMError(Exception):
|
||||
pass
|
||||
|
||||
# local support routines
|
||||
import kgenpids
|
||||
import genbook
|
||||
#
|
||||
# Utility routines
|
||||
#
|
||||
|
||||
# Get a 7 bit encoded number from file
|
||||
def bookReadEncodedNumber(fo):
|
||||
flag = False
|
||||
data = ord(fo.read(1))
|
||||
if data == 0xFF:
|
||||
flag = True
|
||||
data = ord(fo.read(1))
|
||||
if data >= 0x80:
|
||||
datax = (data & 0x7F)
|
||||
while data >= 0x80 :
|
||||
data = ord(fo.read(1))
|
||||
datax = (datax <<7) + (data & 0x7F)
|
||||
data = datax
|
||||
if flag:
|
||||
data = -data
|
||||
return data
|
||||
|
||||
# Get a length prefixed string from file
|
||||
def bookReadString(fo):
|
||||
stringLength = bookReadEncodedNumber(fo)
|
||||
return unpack(str(stringLength)+"s",fo.read(stringLength))[0]
|
||||
|
||||
#
|
||||
# crypto routines
|
||||
#
|
||||
|
||||
# Context initialisation for the Topaz Crypto
|
||||
def topazCryptoInit(key):
|
||||
ctx1 = 0x0CAFFE19E
|
||||
for keyChar in key:
|
||||
keyByte = ord(keyChar)
|
||||
ctx2 = ctx1
|
||||
ctx1 = ((((ctx1 >>2) * (ctx1 >>7))&0xFFFFFFFF) ^ (keyByte * keyByte * 0x0F902007)& 0xFFFFFFFF )
|
||||
return [ctx1,ctx2]
|
||||
|
||||
# decrypt data with the context prepared by topazCryptoInit()
|
||||
def topazCryptoDecrypt(data, ctx):
|
||||
ctx1 = ctx[0]
|
||||
ctx2 = ctx[1]
|
||||
plainText = ""
|
||||
for dataChar in data:
|
||||
dataByte = ord(dataChar)
|
||||
m = (dataByte ^ ((ctx1 >> 3) &0xFF) ^ ((ctx2<<3) & 0xFF)) &0xFF
|
||||
ctx2 = ctx1
|
||||
ctx1 = (((ctx1 >> 2) * (ctx1 >> 7)) &0xFFFFFFFF) ^((m * m * 0x0F902007) &0xFFFFFFFF)
|
||||
plainText += chr(m)
|
||||
return plainText
|
||||
|
||||
# Decrypt data with the PID
|
||||
def decryptRecord(data,PID):
|
||||
ctx = topazCryptoInit(PID)
|
||||
return topazCryptoDecrypt(data, ctx)
|
||||
|
||||
# Try to decrypt a dkey record (contains the bookPID)
|
||||
def decryptDkeyRecord(data,PID):
|
||||
record = decryptRecord(data,PID)
|
||||
fields = unpack("3sB8sB8s3s",record)
|
||||
if fields[0] != "PID" or fields[5] != "pid" :
|
||||
raise TpzDRMError("Didn't find PID magic numbers in record")
|
||||
elif fields[1] != 8 or fields[3] != 8 :
|
||||
raise TpzDRMError("Record didn't contain correct length fields")
|
||||
elif fields[2] != PID :
|
||||
raise TpzDRMError("Record didn't contain PID")
|
||||
return fields[4]
|
||||
|
||||
# Decrypt all dkey records (contain the book PID)
|
||||
def decryptDkeyRecords(data,PID):
|
||||
nbKeyRecords = ord(data[0])
|
||||
records = []
|
||||
data = data[1:]
|
||||
for i in range (0,nbKeyRecords):
|
||||
length = ord(data[0])
|
||||
try:
|
||||
key = decryptDkeyRecord(data[1:length+1],PID)
|
||||
records.append(key)
|
||||
except TpzDRMError:
|
||||
pass
|
||||
data = data[1+length:]
|
||||
if len(records) == 0:
|
||||
raise TpzDRMError("BookKey Not Found")
|
||||
return records
|
||||
|
||||
|
||||
class TopazBook:
|
||||
def __init__(self, filename, outdir):
|
||||
self.fo = file(filename, 'rb')
|
||||
self.outdir = outdir
|
||||
self.bookPayloadOffset = 0
|
||||
self.bookHeaderRecords = {}
|
||||
self.bookMetadata = {}
|
||||
self.bookKey = None
|
||||
magic = unpack("4s",self.fo.read(4))[0]
|
||||
if magic != 'TPZ0':
|
||||
raise TpzDRMError("Parse Error : Invalid Header, not a Topaz file")
|
||||
self.parseTopazHeaders()
|
||||
self.parseMetadata()
|
||||
|
||||
def parseTopazHeaders(self):
|
||||
def bookReadHeaderRecordData():
|
||||
# Read and return the data of one header record at the current book file position
|
||||
# [[offset,decompressedLength,compressedLength],...]
|
||||
nbValues = bookReadEncodedNumber(self.fo)
|
||||
values = []
|
||||
for i in range (0,nbValues):
|
||||
values.append([bookReadEncodedNumber(self.fo),bookReadEncodedNumber(self.fo),bookReadEncodedNumber(self.fo)])
|
||||
return values
|
||||
def parseTopazHeaderRecord():
|
||||
# Read and parse one header record at the current book file position and return the associated data
|
||||
# [[offset,decompressedLength,compressedLength],...]
|
||||
if ord(self.fo.read(1)) != 0x63:
|
||||
raise TpzDRMError("Parse Error : Invalid Header")
|
||||
tag = bookReadString(self.fo)
|
||||
record = bookReadHeaderRecordData()
|
||||
return [tag,record]
|
||||
nbRecords = bookReadEncodedNumber(self.fo)
|
||||
for i in range (0,nbRecords):
|
||||
result = parseTopazHeaderRecord()
|
||||
# print result[0], result[1]
|
||||
self.bookHeaderRecords[result[0]] = result[1]
|
||||
if ord(self.fo.read(1)) != 0x64 :
|
||||
raise TpzDRMError("Parse Error : Invalid Header")
|
||||
self.bookPayloadOffset = self.fo.tell()
|
||||
|
||||
def parseMetadata(self):
|
||||
# Parse the metadata record from the book payload and return a list of [key,values]
|
||||
self.fo.seek(self.bookPayloadOffset + self.bookHeaderRecords["metadata"][0][0])
|
||||
tag = bookReadString(self.fo)
|
||||
if tag != "metadata" :
|
||||
raise TpzDRMError("Parse Error : Record Names Don't Match")
|
||||
flags = ord(self.fo.read(1))
|
||||
nbRecords = ord(self.fo.read(1))
|
||||
for i in range (0,nbRecords) :
|
||||
record = [bookReadString(self.fo), bookReadString(self.fo)]
|
||||
self.bookMetadata[record[0]] = record[1]
|
||||
return self.bookMetadata
|
||||
|
||||
def getPIDMetaInfo(self):
|
||||
keysRecord = None
|
||||
KeysRecordRecord = None
|
||||
if 'keys' in self.bookMetadata:
|
||||
keysRecord = self.bookMetadata['keys']
|
||||
keysRecordRecord = self.bookMetadata[keysRecord]
|
||||
return keysRecord, keysRecordRecord
|
||||
|
||||
def getBookTitle(self):
|
||||
title = ''
|
||||
if 'Title' in self.bookMetadata:
|
||||
title = self.bookMetadata['Title']
|
||||
return title
|
||||
|
||||
def setBookKey(self, key):
|
||||
self.bookKey = key
|
||||
|
||||
def getBookPayloadRecord(self, name, index):
|
||||
# Get a record in the book payload, given its name and index.
|
||||
# decrypted and decompressed if necessary
|
||||
encrypted = False
|
||||
compressed = False
|
||||
try:
|
||||
recordOffset = self.bookHeaderRecords[name][index][0]
|
||||
except:
|
||||
raise TpzDRMError("Parse Error : Invalid Record, record not found")
|
||||
|
||||
self.fo.seek(self.bookPayloadOffset + recordOffset)
|
||||
|
||||
tag = bookReadString(self.fo)
|
||||
if tag != name :
|
||||
raise TpzDRMError("Parse Error : Invalid Record, record name doesn't match")
|
||||
|
||||
recordIndex = bookReadEncodedNumber(self.fo)
|
||||
if recordIndex < 0 :
|
||||
encrypted = True
|
||||
recordIndex = -recordIndex -1
|
||||
|
||||
if recordIndex != index :
|
||||
raise TpzDRMError("Parse Error : Invalid Record, index doesn't match")
|
||||
|
||||
if (self.bookHeaderRecords[name][index][2] > 0):
|
||||
compressed = True
|
||||
record = self.fo.read(self.bookHeaderRecords[name][index][2])
|
||||
else:
|
||||
record = self.fo.read(self.bookHeaderRecords[name][index][1])
|
||||
|
||||
if encrypted:
|
||||
if self.bookKey:
|
||||
ctx = topazCryptoInit(self.bookKey)
|
||||
record = topazCryptoDecrypt(record,ctx)
|
||||
else :
|
||||
raise TpzDRMError("Error: Attempt to decrypt without bookKey")
|
||||
|
||||
if compressed:
|
||||
record = zlib.decompress(record)
|
||||
|
||||
return record
|
||||
|
||||
def processBook(self, pidlst):
|
||||
raw = 0
|
||||
fixedimage=True
|
||||
try:
|
||||
keydata = self.getBookPayloadRecord('dkey', 0)
|
||||
except TpzDRMError, e:
|
||||
print "no dkey record found, book may not be encrypted"
|
||||
print "attempting to extrct files without a book key"
|
||||
self.createBookDirectory()
|
||||
self.extractFiles()
|
||||
print "Successfully Extracted Topaz contents"
|
||||
rv = genbook.generateBook(self.outdir, raw, fixedimage)
|
||||
if rv == 0:
|
||||
print "\nBook Successfully generated"
|
||||
return rv
|
||||
|
||||
# try each pid to decode the file
|
||||
bookKey = None
|
||||
for pid in pidlst:
|
||||
# use 8 digit pids here
|
||||
pid = pid[0:8]
|
||||
print "\nTrying: ", pid
|
||||
bookKeys = []
|
||||
data = keydata
|
||||
try:
|
||||
bookKeys+=decryptDkeyRecords(data,pid)
|
||||
except TpzDRMError, e:
|
||||
pass
|
||||
else:
|
||||
bookKey = bookKeys[0]
|
||||
print "Book Key Found!"
|
||||
break
|
||||
|
||||
if not bookKey:
|
||||
raise TpzDRMError('Decryption Unsucessful; No valid pid found')
|
||||
|
||||
self.setBookKey(bookKey)
|
||||
self.createBookDirectory()
|
||||
self.extractFiles()
|
||||
print "Successfully Extracted Topaz contents"
|
||||
rv = genbook.generateBook(self.outdir, raw, fixedimage)
|
||||
if rv == 0:
|
||||
print "\nBook Successfully generated"
|
||||
return rv
|
||||
|
||||
def createBookDirectory(self):
|
||||
outdir = self.outdir
|
||||
# create output directory structure
|
||||
if not os.path.exists(outdir):
|
||||
os.makedirs(outdir)
|
||||
destdir = os.path.join(outdir,'img')
|
||||
if not os.path.exists(destdir):
|
||||
os.makedirs(destdir)
|
||||
destdir = os.path.join(outdir,'color_img')
|
||||
if not os.path.exists(destdir):
|
||||
os.makedirs(destdir)
|
||||
destdir = os.path.join(outdir,'page')
|
||||
if not os.path.exists(destdir):
|
||||
os.makedirs(destdir)
|
||||
destdir = os.path.join(outdir,'glyphs')
|
||||
if not os.path.exists(destdir):
|
||||
os.makedirs(destdir)
|
||||
|
||||
def extractFiles(self):
|
||||
outdir = self.outdir
|
||||
for headerRecord in self.bookHeaderRecords:
|
||||
name = headerRecord
|
||||
if name != "dkey" :
|
||||
ext = '.dat'
|
||||
if name == 'img' : ext = '.jpg'
|
||||
if name == 'color' : ext = '.jpg'
|
||||
print "\nProcessing Section: %s " % name
|
||||
for index in range (0,len(self.bookHeaderRecords[name])) :
|
||||
fnum = "%04d" % index
|
||||
fname = name + fnum + ext
|
||||
destdir = outdir
|
||||
if name == 'img':
|
||||
destdir = os.path.join(outdir,'img')
|
||||
if name == 'color':
|
||||
destdir = os.path.join(outdir,'color_img')
|
||||
if name == 'page':
|
||||
destdir = os.path.join(outdir,'page')
|
||||
if name == 'glyphs':
|
||||
destdir = os.path.join(outdir,'glyphs')
|
||||
outputFile = os.path.join(destdir,fname)
|
||||
print ".",
|
||||
record = self.getBookPayloadRecord(name,index)
|
||||
if record != '':
|
||||
file(outputFile, 'wb').write(record)
|
||||
print " "
|
||||
|
||||
|
||||
def zipUpDir(myzip, tempdir,localname):
|
||||
currentdir = tempdir
|
||||
if localname != "":
|
||||
currentdir = os.path.join(currentdir,localname)
|
||||
list = os.listdir(currentdir)
|
||||
for file in list:
|
||||
afilename = file
|
||||
localfilePath = os.path.join(localname, afilename)
|
||||
realfilePath = os.path.join(currentdir,file)
|
||||
if os.path.isfile(realfilePath):
|
||||
myzip.write(realfilePath, localfilePath)
|
||||
elif os.path.isdir(realfilePath):
|
||||
zipUpDir(myzip, tempdir, localfilePath)
|
||||
|
||||
|
||||
def usage(progname):
|
||||
print "Removes DRM protection from Topaz ebooks and extract the contents"
|
||||
print "Usage:"
|
||||
print " %s [-k <kindle.info>] [-p <pidnums>] [-s <kindleSerialNumbers>] <infile> <outdir> " % progname
|
||||
|
||||
|
||||
# Main
|
||||
def main(argv=sys.argv):
|
||||
progname = os.path.basename(argv[0])
|
||||
k4 = False
|
||||
pids = []
|
||||
serials = []
|
||||
kInfoFiles = []
|
||||
|
||||
try:
|
||||
opts, args = getopt.getopt(sys.argv[1:], "k:p:s:")
|
||||
except getopt.GetoptError, err:
|
||||
print str(err)
|
||||
usage(progname)
|
||||
return 1
|
||||
if len(args)<2:
|
||||
usage(progname)
|
||||
return 1
|
||||
|
||||
for o, a in opts:
|
||||
if o == "-k":
|
||||
if a == None :
|
||||
print "Invalid parameter for -k"
|
||||
return 1
|
||||
kInfoFiles.append(a)
|
||||
if o == "-p":
|
||||
if a == None :
|
||||
print "Invalid parameter for -p"
|
||||
return 1
|
||||
pids = a.split(',')
|
||||
if o == "-s":
|
||||
if a == None :
|
||||
print "Invalid parameter for -s"
|
||||
return 1
|
||||
serials = a.split(',')
|
||||
k4 = True
|
||||
|
||||
infile = args[0]
|
||||
outdir = args[1]
|
||||
|
||||
if not os.path.isfile(infile):
|
||||
print "Input File Does Not Exist"
|
||||
return 1
|
||||
|
||||
bookname = os.path.splitext(os.path.basename(infile))[0]
|
||||
tempdir = tempfile.mkdtemp()
|
||||
|
||||
tb = TopazBook(infile, tempdir)
|
||||
title = tb.getBookTitle()
|
||||
print "Processing Book: ", title
|
||||
keysRecord, keysRecordRecord = tb.getPIDMetaInfo()
|
||||
pidlst = kgenpids.getPidList(keysRecord, keysRecordRecord, k4, pids, serials, kInfoFiles)
|
||||
|
||||
try:
|
||||
tb.processBook(pidlst)
|
||||
except TpzDRMError, e:
|
||||
print str(e)
|
||||
print " Creating DeBug Full Zip Archive of Book"
|
||||
zipname = os.path.join(outdir, bookname + '_debug' + '.zip')
|
||||
myzip = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False)
|
||||
zipUpDir(myzip, tempdir, '')
|
||||
myzip.close()
|
||||
return 1
|
||||
|
||||
print " Creating HTML ZIP Archive"
|
||||
zipname = os.path.join(outdir, bookname + '_nodrm' + '.zip')
|
||||
myzip1 = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False)
|
||||
myzip1.write(os.path.join(tempdir,'book.html'),'book.html')
|
||||
myzip1.write(os.path.join(tempdir,'book.opf'),'book.opf')
|
||||
if os.path.isfile(os.path.join(tempdir,'cover.jpg')):
|
||||
myzip1.write(os.path.join(tempdir,'cover.jpg'),'cover.jpg')
|
||||
myzip1.write(os.path.join(tempdir,'style.css'),'style.css')
|
||||
zipUpDir(myzip1, tempdir, 'img')
|
||||
myzip1.close()
|
||||
|
||||
print " Creating SVG ZIP Archive"
|
||||
zipname = os.path.join(outdir, bookname + '_SVG' + '.zip')
|
||||
myzip2 = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False)
|
||||
myzip2.write(os.path.join(tempdir,'index_svg.xhtml'),'index_svg.xhtml')
|
||||
zipUpDir(myzip2, tempdir, 'svg')
|
||||
zipUpDir(myzip2, tempdir, 'img')
|
||||
myzip2.close()
|
||||
|
||||
print " Creating XML ZIP Archive"
|
||||
zipname = os.path.join(outdir, bookname + '_XML' + '.zip')
|
||||
myzip3 = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False)
|
||||
targetdir = os.path.join(tempdir,'xml')
|
||||
zipUpDir(myzip3, targetdir, '')
|
||||
zipUpDir(myzip3, tempdir, 'img')
|
||||
myzip3.close()
|
||||
|
||||
shutil.rmtree(tempdir)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
|
||||
Reference in New Issue
Block a user