tools v1.5

tools v1.4
tools v1.3
2015-03-02 07:43:31 +00:00 · 2015-03-02 07:41:20 +00:00 · 2015-03-02 07:35:40 +00:00 · 2015-03-02 07:32:21 +00:00 · 2015-02-28 14:38:24 +00:00 · 2015-02-28 14:35:29 +00:00
27 changed files with 947 additions and 235 deletions
--- a/Adobe_EPUB_Tools/ineptepub.pyw
+++ b/Adobe_EPUB_Tools/ineptepub.pyw
@@ -336,5 +336,6 @@ def gui_main():
    return 0

 if __name__ == '__main__':
-    # sys.exit(cli_main())
+    if len(sys.argv) > 1:
+        sys.exit(cli_main())
    sys.exit(gui_main())
--- a/Adobe_EPUB_Tools/ineptkeymac.pyw
+++ b/Adobe_EPUB_Tools/ineptkeymac.pyw
@@ -0,0 +1,123 @@
+#! /usr/bin/env python
+
+# ineptkeymac.py, version 1
+
+# This program runs on Mac OS X, version 10.6.2 and probably several other
+# versions. It uses Python 2.6, but it probably also runs on all versions
+# 2.x with x >= 5.
+
+# This program extracts the private RSA key for your ADE account in a
+# standard binary form (DER format) in a file of your choosing. Its purpose
+# is to make a backup of that key so that your legally bought ADE encoded
+# ebooks can be salvaged in case they would no longer be supported by ADE
+# software. No other usages are intended. 
+
+# It has been tested with the key storage structure of ADE 1.7.1 and 1.7.2
+# and Sony Reader Library.
+
+# This software does not contain any encryption code. Its only use of
+# external encryption software is the use of openssl for the conversion of
+# the private key from pem to der format. It doesn't use encryption or
+# decryption, however.
+
+# You can run this program from the command line (python ineptkeymac.py
+# filename), or by doubleclicking when it has been associated with
+# Pythonlauncher. When no filename is given it will show a dialog to obtain one.
+
+from __future__ import with_statement
+
+__license__ = 'GPL v3'
+
+import sys
+import os
+import xml.etree.ElementTree as etree
+from contextlib import closing
+import Tkinter
+import Tkconstants
+import tkFileDialog
+from tkMessageBox import showerror
+from subprocess import Popen, PIPE
+import textwrap
+
+NS = 'http://ns.adobe.com/adept'
+ACTFILE = '~/Library/Application Support/Adobe/Digital Editions/activation.dat'
+HEADER = '-----BEGIN PRIVATE KEY-----\n'
+FOOTER = '\n-----END PRIVATE KEY-----\n'
+
+Gui = False
+
+def get_key():
+    '''Returns the private key as a binary string (DER format)'''
+    try:
+        filename = os.path.expanduser(ACTFILE)
+        tree = etree.parse(filename)
+        xpath = '//{%s}credentials/{%s}privateLicenseKey' % (NS, NS)
+        b64key = tree.findtext(xpath)
+        pemkey = HEADER + textwrap.fill(b64key, 64) + FOOTER
+        
+        cmd = ['openssl', 'rsa', '-outform', 'der']
+        proc = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE)
+        stdout, stderr = proc.communicate(pemkey)
+
+        if proc.returncode != 0:
+            error("openssl error: " + stderr)
+            return None
+        return stdout
+            
+    except IOError:
+        error("Can find keyfile. Maybe you should activate your Adobe ID.")
+        sys.exit(1)
+        
+def store_key(key, keypath):
+    '''Store the key in the file given as keypath. If no keypath is given a
+    dialog will ask for one.'''
+    
+    try:
+        if keypath is None:
+            keypath = get_keypath()
+            if not keypath: # Cancelled
+                return
+            
+        with closing(open(keypath, 'wb')) as outf:
+            outf.write(key)
+            
+    except IOError, e:
+        error("Can write keyfile: " + str(e))
+
+def get_keypath():
+    keypath = tkFileDialog.asksaveasfilename(
+        parent = None, title = 'Select file to store ADEPT key',
+        initialdir = os.path.expanduser('~/Desktop'),
+        initialfile = 'adeptkey.der',
+        defaultextension = '.der', filetypes = [('DER-encoded files', '.der'),
+                                                ('All Files', '.*')])
+    if keypath:
+        keypath = os.path.normpath(keypath)
+    return keypath
+
+def error(text):
+    print text
+    if Gui: showerror('Error!', text)
+    
+def gui_main():
+    root = Tkinter.Tk()
+    root.iconify() 
+    global Gui
+    Gui = True
+    store_key(get_key(), None)
+
+    return 0
+    
+def main(argv=sys.argv):
+    progname = os.path.basename(argv[0])
+    
+    if len(argv) == 1: # assume GUI if no argument given
+        return gui_main()
+    if len(argv) != 2:
+        print "usage: %s KEYFILE" % (progname,)
+        return 1
+    
+    store_key(get_key(), argv[1])
+
+if __name__ == '__main__':
+    sys.exit(main())
--- a/Kindle_Mobi_Tools/KindlePID.pyw
+++ b/Kindle_Mobi_Tools/KindlePID.pyw
@@ -73,6 +73,7 @@ class MainDialog(Tkinter.Frame):
    # post output from subprocess in scrolled text widget
    def showCmdOutput(self, msg):
        if msg and msg !='':
+            msg = msg.encode('utf-8')
            self.stext.insert(Tkconstants.END,msg)
            self.stext.yview_pickplace(Tkconstants.END)
        return
@@ -88,7 +89,7 @@ class MainDialog(Tkinter.Frame):
                cmdline = 'python lib\kindlepid.py "' + serial + '"'
            else :
                cmdline = 'lib\kindlepid.py "' + serial + '"'
-
+        cmdline = cmdline.encode(sys.getfilesystemencoding())
        p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
        return p2

@@ -113,6 +114,7 @@ class MainDialog(Tkinter.Frame):
        log += 'Serial = "' + serial + '"\n'
        log += '\n\n'
        log += 'Please Wait ...\n\n'
+        log = log.encode('utf-8')
        self.stext.insert(Tkconstants.END,log)
        self.p2 = self.pidrdr(serial)

--- a/Kindle_Mobi_Tools/Kindleizer.pyw
+++ b/Kindle_Mobi_Tools/Kindleizer.pyw
@@ -30,7 +30,9 @@ class MainDialog(Tkinter.Frame):
        Tkinter.Label(body, text='Mobi eBook input file').grid(row=0, sticky=Tkconstants.E)
        self.mobipath = Tkinter.Entry(body, width=50)
        self.mobipath.grid(row=0, column=1, sticky=sticky)
-        self.mobipath.insert(0, os.getcwd())
+        cwd = os.getcwdu()
+        cwd = cwd.encode('utf-8')
+        self.mobipath.insert(0, cwd)
        button = Tkinter.Button(body, text="...", command=self.get_mobipath)
        button.grid(row=0, column=2)

@@ -80,6 +82,7 @@ class MainDialog(Tkinter.Frame):
    # post output from subprocess in scrolled text widget
    def showCmdOutput(self, msg):
        if msg and msg !='':
+            msg = msg.encode('utf-8')
            self.stext.insert(Tkconstants.END,msg)
            self.stext.yview_pickplace(Tkconstants.END)
        return
@@ -96,6 +99,7 @@ class MainDialog(Tkinter.Frame):
            else :
                cmdline = 'lib\kindlefix.py "' + infile + '" "' + pidnum + '"'

+        cmdline = cmdline.encode(sys.getfilesystemencoding())
        p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
        return p2

@@ -138,6 +142,7 @@ class MainDialog(Tkinter.Frame):
        log += 'PID = "' + pidnum + '"\n'
        log += '\n\n'
        log += 'Please Wait ...\n\n'
+        log = log.encode('utf-8')
        self.stext.insert(Tkconstants.END,log)
        self.p2 = self.krdr(mobipath, pidnum)

--- a/Kindle_Mobi_Tools/MobiDeDRM.pyw
+++ b/Kindle_Mobi_Tools/MobiDeDRM.pyw
@@ -30,7 +30,9 @@ class MainDialog(Tkinter.Frame):
        Tkinter.Label(body, text='Mobi eBook input file').grid(row=0, sticky=Tkconstants.E)
        self.mobipath = Tkinter.Entry(body, width=50)
        self.mobipath.grid(row=0, column=1, sticky=sticky)
-        self.mobipath.insert(0, os.getcwd())
+        cwd = os.getcwdu()
+        cwd = cwd.encode('utf-8')
+        self.mobipath.insert(0, cwd)
        button = Tkinter.Button(body, text="...", command=self.get_mobipath)
        button.grid(row=0, column=2)

@@ -87,6 +89,7 @@ class MainDialog(Tkinter.Frame):
    # post output from subprocess in scrolled text widget
    def showCmdOutput(self, msg):
        if msg and msg !='':
+            msg = msg.encode('utf-8')
            self.stext.insert(Tkconstants.END,msg)
            self.stext.yview_pickplace(Tkconstants.END)
        return
@@ -103,6 +106,7 @@ class MainDialog(Tkinter.Frame):
            else :
                cmdline = 'lib\mobidedrm.py "' + infile + '" "' + outfile + '" "' + pidnum + '"'

+        cmdline = cmdline.encode(sys.getfilesystemencoding())
        p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
        return p2

@@ -167,6 +171,7 @@ class MainDialog(Tkinter.Frame):
        log += 'PID = "' + pidnum + '"\n'
        log += '\n\n'
        log += 'Please Wait ...\n\n'
+        log = log.encode('utf-8')
        self.stext.insert(Tkconstants.END,log)
        self.p2 = self.mobirdr(mobipath, outpath, pidnum)

--- a/Kindle_Mobi_Tools/lib/mobidedrm.py
+++ b/Kindle_Mobi_Tools/lib/mobidedrm.py
@@ -25,6 +25,7 @@
 #         import filter it works when importing unencrypted files.
 #         Also now handles encrypted files that don't need a specific PID.
 #  0.11 - use autoflushed stdout and proper return values
+#  0.12 - Fix for problems with metadata import as Calibre plugin, report errors

 class Unbuffered:
 	def __init__(self, stream):
@@ -234,34 +235,40 @@ if not __name__ == "__main__":
 		description         = 'Removes DRM from secure Mobi files'
 		supported_platforms = ['linux', 'osx', 'windows'] # Platforms this plugin will run on
 		author              = 'The Dark Reverser' # The author of this plugin
-		version             = (0, 1, 0)   # The version number of this plugin
+		version             = (0, 1, 2)   # The version number of this plugin
 		file_types          = set(['prc','mobi','azw']) # The file types that this plugin will be applied to
 		on_import           = True # Run this plugin during the import

 	
 		def run(self, path_to_ebook):
-			of = self.temporary_file('.mobi')
+			from calibre.gui2 import is_ok_to_use_qt
+			from PyQt4.Qt import QMessageBox
 			PID = self.site_customization
 			data_file = file(path_to_ebook, 'rb').read()
 			ar = PID.split(',')
 			for i in ar:
 				try:
-					file(of.name, 'wb').write(DrmStripper(data_file, i).getResult())
+					unlocked_file = DrmStripper(data_file, i).getResult()
 				except DrmException:
-					# Hm, we should display an error dialog here.
-					# Dunno how though.
-					# Ignore the dirty hack behind the curtain.
-#					strexcept = 'echo exception: %s > /dev/tty' % e
-#					subprocess.call(strexcept,shell=True)
-					print i + ": not PID for book"
+					# ignore the error
+					pass
 				else:
+					of = self.temporary_file('.mobi')
+					of.write(unlocked_file)
+					of.close()
 					return of.name
+			if is_ok_to_use_qt():
+				d = QMessageBox(QMessageBox.Warning, "MobiDeDRM Plugin", "Couldn't decode: %s\n\nImporting encrypted version." % path_to_ebook)
+				d.show()
+				d.raise_()
+				d.exec_()
+			return path_to_ebook

 		def customization_help(self, gui=False):
 			return 'Enter PID (separate multiple PIDs with comma)'

 if __name__ == "__main__":
-	print "MobiDeDrm v0.11. Copyright (c) 2008 The Dark Reverser"
+	print "MobiDeDrm v0.12. Copyright (c) 2008 The Dark Reverser"
 	if len(sys.argv)<4:
 		print "Removes protection from Mobipocket books"
 		print "Usage:"
--- a/Topaz_Tools/TopazExtract_Kindle4PC.pyw
+++ b/Topaz_Tools/TopazExtract_Kindle4PC.pyw
@@ -31,14 +31,18 @@ class MainDialog(Tkinter.Frame):
        Tkinter.Label(body, text='Topaz eBook input file').grid(row=0, sticky=Tkconstants.E)
        self.tpzpath = Tkinter.Entry(body, width=50)
        self.tpzpath.grid(row=0, column=1, sticky=sticky)
-        self.tpzpath.insert(0, os.getcwd())
+        cwd = os.getcwdu()
+        cwd = cwd.encode('utf-8')
+        self.tpzpath.insert(0, cwd)
        button = Tkinter.Button(body, text="...", command=self.get_tpzpath)
        button.grid(row=0, column=2)

        Tkinter.Label(body, text='Output Directory').grid(row=1, sticky=Tkconstants.E)
        self.outpath = Tkinter.Entry(body, width=50)
        self.outpath.grid(row=1, column=1, sticky=sticky)
-        self.outpath.insert(0, os.getcwd())
+        cwd = os.getcwdu()
+        cwd = cwd.encode('utf-8')
+        self.outpath.insert(0, cwd)
        button = Tkinter.Button(body, text="...", command=self.get_outpath)
        button.grid(row=1, column=2)

@@ -88,6 +92,7 @@ class MainDialog(Tkinter.Frame):
    # post output from subprocess in scrolled text widget
    def showCmdOutput(self, msg):
        if msg and msg !='':
+            msg = msg.encode('utf-8')
            self.stext.insert(Tkconstants.END,msg)
            self.stext.yview_pickplace(Tkconstants.END)
        return
@@ -108,6 +113,7 @@ class MainDialog(Tkinter.Frame):
            else :
                cmdline = 'lib\cmbtc_dump.py -v -d ' + pidoption + outoption + '"' + infile + '"'

+        cmdline = cmdline.encode(sys.getfilesystemencoding())
        p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
        return p2

@@ -124,9 +130,11 @@ class MainDialog(Tkinter.Frame):
        return

    def get_outpath(self):
+        cwd = os.getcwdu()
+        cwd = cwd.encode('utf-8')
        outpath = tkFileDialog.askdirectory(
            parent=None, title='Directory to Extract Files into',
-            initialdir=os.getcwd(), initialfile=None)
+            initialdir=cwd, initialfile=None)
        if outpath:
            outpath = os.path.normpath(outpath)
            self.outpath.delete(0, Tkconstants.END)
@@ -168,6 +176,7 @@ class MainDialog(Tkinter.Frame):
        log += 'First 8 chars of PID = "' + pidnum + '"\n'
        log += '\n\n'
        log += 'Please Wait ...\n'
+        log = log.encode('utf-8')
        self.stext.insert(Tkconstants.END,log)
        self.p2 = self.topazrdr(tpzpath, outpath, pidnum)

--- a/Topaz_Tools/TopazExtract_Kindle_iPhone.pyw
+++ b/Topaz_Tools/TopazExtract_Kindle_iPhone.pyw
@@ -31,14 +31,18 @@ class MainDialog(Tkinter.Frame):
        Tkinter.Label(body, text='Topaz eBook input file').grid(row=0, sticky=Tkconstants.E)
        self.tpzpath = Tkinter.Entry(body, width=50)
        self.tpzpath.grid(row=0, column=1, sticky=sticky)
-        self.tpzpath.insert(0, os.getcwd())
+        cwd = os.getcwdu()
+        cwd = cwd.encode('utf-8')
+        self.tpzpath.insert(0, cwd)
        button = Tkinter.Button(body, text="...", command=self.get_tpzpath)
        button.grid(row=0, column=2)

        Tkinter.Label(body, text='Output Directory').grid(row=1, sticky=Tkconstants.E)
        self.outpath = Tkinter.Entry(body, width=50)
        self.outpath.grid(row=1, column=1, sticky=sticky)
-        self.outpath.insert(0, os.getcwd())
+        cwd = os.getcwdu()
+        cwd = cwd.encode('utf-8')
+        self.outpath.insert(0, cwd)
        button = Tkinter.Button(body, text="...", command=self.get_outpath)
        button.grid(row=1, column=2)

@@ -88,6 +92,7 @@ class MainDialog(Tkinter.Frame):
    # post output from subprocess in scrolled text widget
    def showCmdOutput(self, msg):
        if msg and msg !='':
+            msg = msg.encode('utf-8')
            self.stext.insert(Tkconstants.END,msg)
            self.stext.yview_pickplace(Tkconstants.END)
        return
@@ -106,6 +111,7 @@ class MainDialog(Tkinter.Frame):
            else :
                cmdline = 'lib\cmbtc_dump_nonK4PC.py -v -d ' + pidoption + outoption + '"' + infile + '"'

+        cmdline = cmdline.encode(sys.getfilesystemencoding())
        p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
        return p2

@@ -122,9 +128,11 @@ class MainDialog(Tkinter.Frame):
        return

    def get_outpath(self):
+        cwd = os.getcwdu()
+        cwd = cwd.encode('utf-8')
        outpath = tkFileDialog.askdirectory(
            parent=None, title='Directory to Extract Files into',
-            initialdir=os.getcwd(), initialfile=None)
+            initialdir=cwd, initialfile=None)
        if outpath:
            outpath = os.path.normpath(outpath)
            self.outpath.delete(0, Tkconstants.END)
@@ -166,6 +174,7 @@ class MainDialog(Tkinter.Frame):
        log += 'First 8 chars of PID = "' + pidnum + '"\n'
        log += '\n\n'
        log += 'Please Wait ...\n'
+        log = log.encode('utf-8')
        self.stext.insert(Tkconstants.END,log)
        self.p2 = self.topazrdr(tpzpath, outpath, pidnum)

--- a/Topaz_Tools/TopazFiles2HTML.pyw
+++ b/Topaz_Tools/TopazFiles2HTML.pyw
@@ -31,7 +31,9 @@ class MainDialog(Tkinter.Frame):
        Tkinter.Label(body, text='Directory you Extracted Topaz Files into').grid(row=0, sticky=Tkconstants.E)
        self.bookdir = Tkinter.Entry(body, width=50)
        self.bookdir.grid(row=0, column=1, sticky=sticky)
-        self.bookdir.insert(0, os.getcwd())
+        cwd = os.getcwdu()
+        cwd = cwd.encode('utf-8')
+        self.bookdir.insert(0, cwd)
        button = Tkinter.Button(body, text="...", command=self.get_bookdir)
        button.grid(row=0, column=2)

@@ -76,6 +78,7 @@ class MainDialog(Tkinter.Frame):
    # post output from subprocess in scrolled text widget
    def showCmdOutput(self, msg):
        if msg and msg !='':
+            msg = msg.encode('utf-8')
            self.stext.insert(Tkconstants.END,msg)
            self.stext.yview_pickplace(Tkconstants.END)
        return
@@ -92,14 +95,17 @@ class MainDialog(Tkinter.Frame):
            else :
                cmdline = 'lib\genhtml.py "' + bookdir + '"'

+        cmdline = cmdline.encode(sys.getfilesystemencoding())
        p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
        return p2


    def get_bookdir(self):
+        cwd = os.getcwdu()
+        cwd = cwd.encode('utf-8')
        bookdir = tkFileDialog.askdirectory(
            parent=None, title='Select the Directory you Extracted Topaz Files into',
-            initialdir=os.getcwd(), initialfile=None)
+            initialdir=cwd, initialfile=None)
        if bookdir:
            bookdir = os.path.normpath(bookdir)
            self.bookdir.delete(0, Tkconstants.END)
@@ -127,6 +133,7 @@ class MainDialog(Tkinter.Frame):
        log += 'Book Directory = "' + bookdir + '"\n'
        log += '\n\n'
        log += 'Please Wait ...\n'
+        log = log.encode('utf-8')
        self.stext.insert(Tkconstants.END,log)
        self.p2 = self.topazrdr(bookdir)

--- a/Topaz_Tools/TopazFiles2SVG.pyw
+++ b/Topaz_Tools/TopazFiles2SVG.pyw
@@ -31,7 +31,9 @@ class MainDialog(Tkinter.Frame):
        Tkinter.Label(body, text='Directory you Extracted Topaz Files into').grid(row=0, sticky=Tkconstants.E)
        self.bookdir = Tkinter.Entry(body, width=50)
        self.bookdir.grid(row=0, column=1, sticky=sticky)
-        self.bookdir.insert(0, os.getcwd())
+        cwd = os.getcwdu()
+        cwd = cwd.encode('utf-8')
+        self.bookdir.insert(0, cwd)
        button = Tkinter.Button(body, text="...", command=self.get_bookdir)
        button.grid(row=0, column=2)

@@ -76,6 +78,7 @@ class MainDialog(Tkinter.Frame):
    # post output from subprocess in scrolled text widget
    def showCmdOutput(self, msg):
        if msg and msg !='':
+            msg = msg.encode('utf-8')
            self.stext.insert(Tkconstants.END,msg)
            self.stext.yview_pickplace(Tkconstants.END)
        return
@@ -92,14 +95,17 @@ class MainDialog(Tkinter.Frame):
            else :
                cmdline = 'lib\gensvg.py "' + bookdir + '"'

+        cmdline = cmdline.encode(sys.getfilesystemencoding())
        p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
        return p2


    def get_bookdir(self):
+        cwd = os.getcwdu()
+        cwd = cwd.encode('utf-8')
        bookdir = tkFileDialog.askdirectory(
            parent=None, title='Select the Directory you Extracted Topaz Files into',
-            initialdir=os.getcwd(), initialfile=None)
+            initialdir=cwd, initialfile=None)
        if bookdir:
            bookdir = os.path.normpath(bookdir)
            self.bookdir.delete(0, Tkconstants.END)
@@ -127,6 +133,7 @@ class MainDialog(Tkinter.Frame):
        log += 'Book Directory = "' + bookdir + '"\n'
        log += '\n\n'
        log += 'Please Wait ...\n'
+        log = log.encode('utf-8')
        self.stext.insert(Tkconstants.END,log)
        self.p2 = self.topazrdr(bookdir)

--- a/Topaz_Tools/TopazFiles2XML.pyw
+++ b/Topaz_Tools/TopazFiles2XML.pyw
@@ -31,7 +31,9 @@ class MainDialog(Tkinter.Frame):
        Tkinter.Label(body, text='Directory you Extracted Topaz Files into').grid(row=0, sticky=Tkconstants.E)
        self.bookdir = Tkinter.Entry(body, width=50)
        self.bookdir.grid(row=0, column=1, sticky=sticky)
-        self.bookdir.insert(0, os.getcwd())
+        cwd = os.getcwdu()
+        cwd = cwd.encode('utf-8')
+        self.bookdir.insert(0, cwd)
        button = Tkinter.Button(body, text="...", command=self.get_bookdir)
        button.grid(row=0, column=2)

@@ -76,6 +78,7 @@ class MainDialog(Tkinter.Frame):
    # post output from subprocess in scrolled text widget
    def showCmdOutput(self, msg):
        if msg and msg !='':
+            msg = msg.encode('utf-8')
            self.stext.insert(Tkconstants.END,msg)
            self.stext.yview_pickplace(Tkconstants.END)
        return
@@ -92,14 +95,17 @@ class MainDialog(Tkinter.Frame):
            else :
                cmdline = 'lib\genxml.py "' + bookdir + '"'

+        cmdline = cmdline.encode(sys.getfilesystemencoding())
        p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
        return p2


    def get_bookdir(self):
+        cwd = os.getcwdu()
+        cwd = cwd.encode('utf-8')
        bookdir = tkFileDialog.askdirectory(
            parent=None, title='Select the Directory you Extracted Topaz Files into',
-            initialdir=os.getcwd(), initialfile=None)
+            initialdir=cwd, initialfile=None)
        if bookdir:
            bookdir = os.path.normpath(bookdir)
            self.bookdir.delete(0, Tkconstants.END)
@@ -127,6 +133,7 @@ class MainDialog(Tkinter.Frame):
        log += 'Book Directory = "' + bookdir + '"\n'
        log += '\n\n'
        log += 'Please Wait ...\n'
+        log = log.encode('utf-8')
        self.stext.insert(Tkconstants.END,log)
        self.p2 = self.topazrdr(bookdir)

--- a/Topaz_Tools/lib/cmbtc_dump.py
+++ b/Topaz_Tools/lib/cmbtc_dump.py
@@ -1,5 +1,5 @@
 #! /usr/bin/python
-# For use in Topaz Scripts version 2.2
+# For use in Topaz Scripts version 2.6

 """

--- a/Topaz_Tools/lib/cmbtc_dump_nonK4PC.py
+++ b/Topaz_Tools/lib/cmbtc_dump_nonK4PC.py
@@ -1,5 +1,5 @@
 #!/usr/bin/python
-# For use with Topaz Scripts Version 2.2
+# For use with Topaz Scripts Version 2.6

 class Unbuffered:
    def __init__(self, stream):
--- a/Topaz_Tools/lib/convert2xml.py
+++ b/Topaz_Tools/lib/convert2xml.py
@@ -1,6 +1,6 @@
 #! /usr/bin/python
 # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-# For use with Topaz Scripts Version 2.2
+# For use with Topaz Scripts Version 2.6

 class Unbuffered:
    def __init__(self, stream):
@@ -243,6 +243,8 @@ class PageParser(object):
        'region.h'         : (1, 'scalar_number', 0, 0),
        'region.w'         : (1, 'scalar_number', 0, 0),

+        'empty_text_region' : (1, 'snippets', 1, 0),
+
        'img'          : (1, 'snippets', 1, 0),
        'img.x'        : (1, 'scalar_number', 0, 0),
        'img.y'        : (1, 'scalar_number', 0, 0),
@@ -313,6 +315,12 @@ class PageParser(object):
        'version.findlists'                : (1, 'scalar_text', 0, 0),
        'version.page_num'                 : (1, 'scalar_text', 0, 0),
        'version.page_type'                : (1, 'scalar_text', 0, 0),
+        'version.bad_text'                 : (1, 'scalar_text', 0, 0),
+        'version.glyph_mismatch'           : (1, 'scalar_text', 0, 0),
+        'version.margins'                  : (1, 'scalar_text', 0, 0),
+        'version.staggered_lines'          : (1, 'scalar_text', 0, 0),
+        'version.paragraph_continuation'   : (1, 'scalar_text', 0, 0),
+        'version.toc'                      : (1, 'scalar_text', 0, 0),

        'stylesheet'   : (1, 'snippets', 1, 0),
        'style'              : (1, 'snippets', 1, 0),
@@ -660,16 +668,19 @@ class PageParser(object):
    def process(self):

        # peek at the first bytes to see what type of file it is
-        magic = self.fo.read(11)
-        if (magic[0:1] == 'p') and (magic[2:10] == '__PAGE__'):
+        magic = self.fo.read(9)
+        if (magic[0:1] == 'p') and (magic[2:9] == 'marker_'):
            first_token = 'info'
-        elif (magic[0:1] == 'g') and (magic[2:11] == '__GLYPH__'):
-            skip = self.fo.read(1)
+        elif (magic[0:1] == 'p') and (magic[2:9] == '__PAGE_'):
+            skip = self.fo.read(2)
+            first_token = 'info'
+        elif (magic[0:1] == 'g') and (magic[2:9] == '__GLYPH'):
+            skip = self.fo.read(3)
            first_token = 'info'
        else :
            # other0.dat file
            first_token = None
-            self.fo.seek(-11,1)
+            self.fo.seek(-9,1)


        # main loop to read and build the document tree
--- a/Topaz_Tools/lib/decode_meta.py
+++ b/Topaz_Tools/lib/decode_meta.py
@@ -1,6 +1,6 @@
 #! /usr/bin/python
 # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-# For use with Topaz Scripts Version 2.2
+# For use with Topaz Scripts Version 2.6

 import csv
 import sys
--- a/Topaz_Tools/lib/flatxml2html.py
+++ b/Topaz_Tools/lib/flatxml2html.py
@@ -1,6 +1,6 @@
 #! /usr/bin/python
 # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-# For use with Topaz Scripts Version 2.2
+# For use with Topaz Scripts Version 2.6

 import sys
 import csv
@@ -32,6 +32,8 @@ class DocParser(object):
        self.link_id = []
        self.link_title = []
        self.link_page = []
+        self.link_href = []
+        self.link_type = []
        self.dehyphen_rootid = []
        self.paracont_stemid = []
        self.parastems_stemid = []
@@ -197,6 +199,7 @@ class DocParser(object):
    # get the class
    def getClass(self, pclass):
        nclass = pclass
+
        # class names are an issue given topaz may start them with numerals (not allowed),
        # use a mix of cases (which cause some browsers problems), and actually
        # attach numbers after "_reclustered*" to the end to deal classeses that inherit
@@ -206,7 +209,10 @@ class DocParser(object):
        # so we clean this up by lowercasing, prepend 'cl-', and getting any baseclass
        # that exists in the stylesheet first, and then adding this specific class
        # after
+        
+        # also some class names have spaces in them so need to convert to dashes
        if nclass != None :
+            nclass = nclass.replace(' ','-')
            classres = ''
            nclass = nclass.lower()
            nclass = 'cl-' + nclass
@@ -334,7 +340,7 @@ class DocParser(object):
            result.append(('svg', num))
            return pclass, result

-        # this type of paragrph may be made up of multiple spans, inline 
+        # this type of paragraph may be made up of multiple spans, inline 
        # word monograms (images), and words with semantic meaning, 
        # plus glyphs used to form starting letter of first word
        
@@ -346,35 +352,40 @@ class DocParser(object):
        if end == -1 :
            end = self.docSize

+        # seems some xml has last* coming before first* so we have to 
+        # handle any order
+        sp_first = -1
+        sp_last = -1
+
+        gl_first = -1
+        gl_last = -1
+
+        ws_first = -1
+        ws_last = -1
+
+        word_class = ''
+
        while (line < end) :

            (name, argres) = self.lineinDoc(line)

-            # handle both span and _span
            if name.endswith('span.firstWord') :
-                first = int(argres)
-                (name, argres) = self.lineinDoc(line+1)
-                if not name.endswith('span.lastWord'):
-                    print 'Error: - incorrect _span ordering inside paragraph'
-                last = int(argres)
-                for wordnum in xrange(first, last):
-                    result.append(('ocr', wordnum))
-                line += 1
+                sp_first = int(argres)
+
+            elif name.endswith('span.lastWord') :
+                sp_last = int(argres)

            elif name.endswith('word.firstGlyph') :
-                first = int(argres)
-                (name, argres) = self.lineinDoc(line+1)
-                if not name.endswith('word.lastGlyph'):
-                    print 'Error: - incorrect glyph ordering inside word in paragraph'
-                last = int(argres)
-                glyphList = []
-                for glyphnum in xrange(first, last):
-                    glyphList.append(glyphnum)
-                num = self.svgcount
-                self.glyphs_to_image(glyphList)
-                self.svgcount += 1
-                result.append(('svg', num))
-                line += 1
+                gl_first = int(argres)
+
+            elif name.endswith('word.lastGlyph') :
+                gl_last = int(argres)
+
+            elif name.endswith('word_semantic.firstWord'):
+                ws_first = int(argres)
+
+            elif name.endswith('word_semantic.lastWord'):
+                ws_last = int(argres)

            elif name.endswith('word.class'):
               (cname, space) = argres.split('-',1)
@@ -386,15 +397,31 @@ class DocParser(object):
                result.append(('img' + word_class, int(argres)))
                word_class = ''

-            elif name.endswith('word_semantic.firstWord'):
-                first = int(argres)
-                (name, argres) = self.lineinDoc(line+1)
-                if not name.endswith('word_semantic.lastWord'):
-                    print 'Error: - incorrect word_semantic ordering inside paragraph'
-                last = int(argres)
-                for wordnum in xrange(first, last):
+            elif name.endswith('region.img.src'):
+                result.append(('img' + word_class, int(argres)))
+
+            if (sp_first != -1) and (sp_last != -1):
+                for wordnum in xrange(sp_first, sp_last):
                    result.append(('ocr', wordnum))
-                line += 1
+                sp_first = -1
+                sp_last = -1
+
+            if (gl_first != -1) and (gl_last != -1):
+                glyphList = []
+                for glyphnum in xrange(gl_first, gl_last):
+                    glyphList.append(glyphnum)
+                num = self.svgcount
+                self.glyphs_to_image(glyphList)
+                self.svgcount += 1
+                result.append(('svg', num))
+                gl_first = -1
+                gl_last = -1
+
+            if (ws_first != -1) and (ws_last != -1):
+                for wordnum in xrange(ws_first, ws_last):
+                    result.append(('ocr', wordnum))
+                ws_first = -1
+                ws_last = -1
                              
            line += 1

@@ -419,6 +446,8 @@ class DocParser(object):
        if (type == 'end'):
            parares += ' '

+        lstart = len(parares)
+
        cnt = len(pdesc)

        for j in xrange( 0, cnt) :
@@ -432,9 +461,14 @@ class DocParser(object):
                if handle_links:
                    link = self.link_id[num]
                    if (link > 0):
+                        linktype = self.link_type[link-1]
                        title = self.link_title[link-1]
                        if (title == "") or (parares.rfind(title) < 0):
-                            title='_link_'
+                            title=parares[lstart:]
+                        if linktype == 'external' :
+                            linkhref = self.link_href[link-1]
+                            linkhtml = '<a href="%s">' % linkhref
+                        else :
                            ptarget = self.link_page[link-1] - 1
                            linkhtml = '<a href="#page%04d">' % ptarget
                        linkhtml += title + '</a>'
@@ -443,6 +477,7 @@ class DocParser(object):
                            parares = parares[0:pos] + linkhtml + parares[pos+len(title):]
                        else :
                            parares += linkhtml
+                        lstart = len(parares)
                        if word == '_link_' : word = ''
                    elif (link < 0) :
                        if word == '_link_' : word = ''
@@ -514,6 +549,14 @@ class DocParser(object):
        # collect link destination page numbers
        self.link_page = self.getData('info.links.page',0,-1)

+        # collect link types (container versus external)
+        (pos, argres) = self.findinDoc('info.links.type',0,-1)
+        if argres :  self.link_type = argres.split('|')
+
+        # collect link destinations
+        (pos, argres) = self.findinDoc('info.links.href',0,-1)
+        if argres :  self.link_href = argres.split('|')
+
        # collect link titles
        (pos, argres) = self.findinDoc('info.links.title',0,-1)
        if argres :
@@ -623,16 +666,18 @@ class DocParser(object):
                    htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)


-                elif (regtype == 'synth_fcvr.center') or (regtype == 'synth_text.center'):
+                elif (regtype == 'synth_fcvr.center'):
                    (pos, simgsrc) = self.findinDoc('img.src',start,end)
                    if simgsrc:
                        htmlpage += '<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc)

                else :
-                    print 'Warning: region type', regtype
+                    print '          Making region type', regtype,
                    (pos, temp) = self.findinDoc('paragraph',start,end)
-                    if pos != -1:
-                        print '   is a "text" region'
+                    (pos2, temp) = self.findinDoc('span',start,end)
+                    if pos != -1 or pos2 != -1:
+                        print ' a "text" region'
+                        orig_regtype = regtype
                        regtype = 'fixed'
                        ptype = 'full'
                        # check to see if this is a continution from the previous page
@@ -640,6 +685,11 @@ class DocParser(object):
                            ptype = 'end'
                            first_para_continued = False
                        (pclass, pdesc) = self.getParaDescription(start,end, regtype)
+                        if not pclass:
+                            if orig_regtype.endswith('.right')     : pclass = 'cl-right'
+                            elif orig_regtype.endswith('.center')  : pclass = 'cl-center'
+                            elif orig_regtype.endswith('.left')    : pclass = 'cl-left'
+                            elif orig_regtype.endswith('.justify') : pclass = 'cl-justify'
                        if pclass and (ptype == 'full') and (len(pclass) >= 6):
                            tag = 'p'
                            if pclass[3:6] == 'h1-' : tag = 'h4'
@@ -651,7 +701,7 @@ class DocParser(object):
                        else :
                            htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
                    else :
-                        print '    is a "graphic" region'
+                        print ' a "graphic" region'
                        (pos, simgsrc) = self.findinDoc('img.src',start,end)
                        if simgsrc:
                            htmlpage += '<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc)
--- a/Topaz_Tools/lib/genhtml.py
+++ b/Topaz_Tools/lib/genhtml.py
@@ -1,6 +1,6 @@
 #! /usr/bin/python
 # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-# For use with Topaz Scripts Version 2.2
+# For use with Topaz Scripts Version 2.6

 class Unbuffered:
    def __init__(self, stream):
--- a/Topaz_Tools/lib/gensvg.py
+++ b/Topaz_Tools/lib/gensvg.py
@@ -1,6 +1,6 @@
 #! /usr/bin/python
 # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-# For use with Topaz Scripts Version 2.2
+# For use with Topaz Scripts Version 2.6

 class Unbuffered:
    def __init__(self, stream):
@@ -28,15 +28,24 @@ class GParser(object):
     self.gh = self.getData('info.glyph.h')
     self.gw = self.getData('info.glyph.w')
     self.guse = self.getData('info.glyph.use')
+     if self.guse :
         self.count = len(self.guse)
+     else :
+         self.count = 0
     self.gvtx = self.getData('info.glyph.vtx')
     self.glen = self.getData('info.glyph.len')
     self.gdpi = self.getData('info.glyph.dpi')
     self.vx = self.getData('info.vtx.x')
     self.vy = self.getData('info.vtx.y')
     self.vlen = self.getData('info.len.n')
+     if self.vlen :
         self.glen.append(len(self.vlen))
+     elif self.glen:
+         self.glen.append(0)
+     if self.vx :
         self.gvtx.append(len(self.vx))
+     elif self.gvtx :
+         self.gvtx.append(0)

 def getData(self, path):
     result = None
--- a/Topaz_Tools/lib/genxml.py
+++ b/Topaz_Tools/lib/genxml.py
@@ -1,6 +1,6 @@
 #! /usr/bin/python
 # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-# For use with Topaz Scripts Version 2.2
+# For use with Topaz Scripts Version 2.6

 class Unbuffered:
    def __init__(self, stream):
--- a/Topaz_Tools/lib/getpagedim.py
+++ b/Topaz_Tools/lib/getpagedim.py
@@ -1,6 +1,6 @@
 #! /usr/bin/python
 # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-# For use with Topaz Scripts Version 2.2
+# For use with Topaz Scripts Version 2.6

 import csv
 import sys
--- a/Topaz_Tools/lib/stylexml2css.py
+++ b/Topaz_Tools/lib/stylexml2css.py
@@ -1,6 +1,6 @@
 #! /usr/bin/python
 # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-# For use with Topaz Scripts Version 2.2
+# For use with Topaz Scripts Version 2.6

 import csv
 import sys
@@ -85,7 +85,10 @@ class DocParser(object):
    def process(self):

        classlst = ''
-        csspage = ''
+        csspage = '.cl-center { text-align: center; margin-left: auto; margin-right: auto; }\n'
+        csspage += '.cl-right { text-align: right; }\n'
+        csspage += '.cl-left { text-align: left; }\n'
+        csspage += '.cl-justify { text-align: justify; }\n'

        # generate a list of each <style> starting point in the stylesheet
        styleList= self.posinDoc('book.stylesheet.style')
@@ -108,6 +111,7 @@ class DocParser(object):
                # get the style class
                (pos, sclass) = self.findinDoc('style.class',start,end)
                if sclass != None:
+                    sclass = sclass.replace(' ','-')
                    sclass = '.cl-' + sclass.lower()
                else : 
                    sclass = ''
@@ -115,6 +119,7 @@ class DocParser(object):
                # check for any "after class" specifiers
                (pos, aftclass) = self.findinDoc('style._after_class',start,end)
                if aftclass != None:
+                    aftclass = aftclass.replace(' ','-')
                    aftclass = '.cl-' + aftclass.lower()
                else : 
                    aftclass = ''
@@ -216,6 +221,7 @@ class DocParser(object):
                        if ctype == 'h3_' :
                            csspage += 'h6' + cssline + '\n'

+                    if cssline != ' { }':
                        csspage += self.stags[tag] + cssline + '\n'

                
--- a/Topaz_Tools/lib/topaz-changes.txt
+++ b/Topaz_Tools/lib/topaz-changes.txt
@@ -1,3 +1,17 @@
+Changes in 2.6
+	- fix for many additional version tags
+	- fixes to generate better links
+	- fixes to handle external links
+	- now handles new "marker" page .dat files
+	- improved special region handling
+	- properly handle class names with spaces
+	- handle default alignment for synthetic regions
+
+
+Changes in 2.3
+       - fix for use with non-latin1 based systems (thank you Tedd)
+       - fixes for out of order tokens in xml
+
 Changes in 2.2
 	- fix for minor bug in encode_Number from clark nova
 	- more fixes to handle paths with spaces in them
--- a/eReader_Tools/Pml2HTML.pyw
+++ b/eReader_Tools/Pml2HTML.pyw
@@ -30,7 +30,9 @@ class MainDialog(Tkinter.Frame):
        Tkinter.Label(body, text='eBook Pml input file').grid(row=0, sticky=Tkconstants.E)
        self.pmlpath = Tkinter.Entry(body, width=50)
        self.pmlpath.grid(row=0, column=1, sticky=sticky)
-        self.pmlpath.insert(0, os.getcwd())
+        cwd = os.getcwdu()
+        cwd = cwd.encode('utf-8')
+        self.pmlpath.insert(0, cwd)
        button = Tkinter.Button(body, text="...", command=self.get_pmlpath)
        button.grid(row=0, column=2)

@@ -82,6 +84,7 @@ class MainDialog(Tkinter.Frame):
    # post output from subprocess in scrolled text widget
    def showCmdOutput(self, msg):
        if msg and msg !='':
+            msg = msg.encode('utf-8')
            self.stext.insert(Tkconstants.END,msg)
            self.stext.yview_pickplace(Tkconstants.END)
        return
@@ -98,6 +101,7 @@ class MainDialog(Tkinter.Frame):
            else :
                cmdline = 'lib\\xpml2xhtml.py "' + infile + '" "' + outfile + '"'

+        cmdline = cmdline.encode(sys.getfilesystemencoding())
        p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
        return p2

@@ -156,6 +160,7 @@ class MainDialog(Tkinter.Frame):
        log += 'HTML Output File = "' + outpath + '"\n'
        log += '\n\n'
        log += 'Please Wait ...\n\n'
+        log = log.encode('utf-8')
        self.stext.insert(Tkconstants.END,log)
        self.p2 = self.pmlhtml(pmlpath, outpath)

--- a/eReader_Tools/eReaderPDB2PML.pyw
+++ b/eReader_Tools/eReaderPDB2PML.pyw
@@ -31,14 +31,18 @@ class MainDialog(Tkinter.Frame):
        Tkinter.Label(body, text='eBook PDB input file').grid(row=0, sticky=Tkconstants.E)
        self.pdbpath = Tkinter.Entry(body, width=50)
        self.pdbpath.grid(row=0, column=1, sticky=sticky)
-        self.pdbpath.insert(0, os.getcwd())
+        cwd = os.getcwdu()
+        cwd = cwd.encode('utf-8')
+        self.pdbpath.insert(0, cwd)
        button = Tkinter.Button(body, text="...", command=self.get_pdbpath)
        button.grid(row=0, column=2)

        Tkinter.Label(body, text='Output Directory').grid(row=1, sticky=Tkconstants.E)
        self.outpath = Tkinter.Entry(body, width=50)
        self.outpath.grid(row=1, column=1, sticky=sticky)
-        self.outpath.insert(0, os.getcwd())
+        cwd = os.getcwdu()
+        cwd = cwd.encode('utf-8')
+        self.outpath.insert(0, cwd)
        button = Tkinter.Button(body, text="...", command=self.get_outpath)
        button.grid(row=1, column=2)

@@ -93,6 +97,7 @@ class MainDialog(Tkinter.Frame):
    # post output from subprocess in scrolled text widget
    def showCmdOutput(self, msg):
        if msg and msg !='':
+            msg = msg.encode('utf-8')
            self.stext.insert(Tkconstants.END,msg)
            self.stext.yview_pickplace(Tkconstants.END)
        return
@@ -109,6 +114,7 @@ class MainDialog(Tkinter.Frame):
            else :
                cmdline = 'lib\erdr2pml.py "' + infile + '" "' + outdir + '" "' + name + '" ' + ccnum

+        cmdline = cmdline.encode(sys.getfilesystemencoding())
        p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
        return p2

@@ -125,9 +131,11 @@ class MainDialog(Tkinter.Frame):
        return

    def get_outpath(self):
+        cwd = os.getcwdu()
+        cwd = cwd.encode('utf-8')
        outpath = tkFileDialog.askdirectory(
            parent=None, title='Directory to Store Output into',
-            initialdir=os.getcwd(), initialfile=None)
+            initialdir=cwd, initialfile=None)
        if outpath:
            outpath = os.path.normpath(outpath)
            self.outpath.delete(0, Tkconstants.END)
@@ -175,6 +183,7 @@ class MainDialog(Tkinter.Frame):
        log += 'Last 8 of CC = "' + ccnum + '"\n'
        log += '\n\n'
        log += 'Please Wait ...\n'
+        log = log.encode('utf-8')
        self.stext.insert(Tkconstants.END,log)
        self.p2 = self.erdr(pdbpath, outpath, name, ccnum)

--- a/eReader_Tools/eReaderPDB2PMLZ.pyw
+++ b/eReader_Tools/eReaderPDB2PMLZ.pyw
@@ -0,0 +1,180 @@
+#!/usr/bin/env python
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+
+import sys
+sys.path.append('lib')
+
+import os, os.path, urllib
+import subprocess
+from subprocess import Popen, PIPE, STDOUT
+import Tkinter
+import Tkconstants
+import tkFileDialog
+import tkMessageBox
+import subasyncio
+from subasyncio import Process
+from scrolltextwidget import ScrolledText
+
+class MainDialog(Tkinter.Frame):
+    def __init__(self, root):
+        Tkinter.Frame.__init__(self, root, border=5)
+        self.root = root
+        self.interval = 2000
+        self.p2 = None
+        self.status = Tkinter.Label(self, text='eReader eBook Conversion to PMLZ')
+        self.status.pack(fill=Tkconstants.X, expand=1)
+        body = Tkinter.Frame(self)
+        body.pack(fill=Tkconstants.X, expand=1)
+        sticky = Tkconstants.E + Tkconstants.W
+        body.grid_columnconfigure(1, weight=2)
+
+        Tkinter.Label(body, text='eBook PDB input file').grid(row=0, sticky=Tkconstants.E)
+        self.pdbpath = Tkinter.Entry(body, width=50)
+        self.pdbpath.grid(row=0, column=1, sticky=sticky)
+        cwd = os.getcwdu()
+        cwd = cwd.encode('utf-8')
+        self.pdbpath.insert(0, cwd)
+        button = Tkinter.Button(body, text="...", command=self.get_pdbpath)
+        button.grid(row=0, column=2)
+
+        Tkinter.Label(body, text='Name on CC').grid(row=1, sticky=Tkconstants.E)
+        self.name = Tkinter.StringVar()
+        self.nameinfo = Tkinter.Entry(body, width=40, textvariable=self.name)
+        self.nameinfo.grid(row=1, column=1, sticky=sticky)
+
+        Tkinter.Label(body, text='Last 8 digits of CC Number').grid(row=2, sticky=Tkconstants.E)
+        self.ccnum = Tkinter.StringVar()
+        self.ccinfo = Tkinter.Entry(body, width=10, textvariable=self.ccnum)
+        self.ccinfo.grid(row=2, column=1, sticky=sticky)
+
+        msg1 = 'Conversion Log \n\n'
+        self.stext = ScrolledText(body, bd=5, relief=Tkconstants.RIDGE, height=15, width=60, wrap=Tkconstants.WORD)
+        self.stext.grid(row=3, column=0, columnspan=2,sticky=sticky)
+        self.stext.insert(Tkconstants.END,msg1)
+
+        buttons = Tkinter.Frame(self)
+        buttons.pack()
+        self.sbotton = Tkinter.Button(
+            buttons, text="Start", width=10, command=self.convertit)
+        self.sbotton.pack(side=Tkconstants.LEFT)
+
+        Tkinter.Frame(buttons, width=10).pack(side=Tkconstants.LEFT)
+        self.qbutton = Tkinter.Button(
+            buttons, text="Quit", width=10, command=self.quitting)
+        self.qbutton.pack(side=Tkconstants.RIGHT)
+
+    # read from subprocess pipe without blocking
+    # invoked every interval via the widget "after"
+    # option being used, so need to reset it for the next time
+    def processPipe(self):
+        poll = self.p2.wait('nowait')
+        if poll != None: 
+            text = self.p2.readerr()
+            text += self.p2.read()
+            msg = text + '\n\n' + 'File successfully converted\n'
+            if poll != 0:
+                msg = text + '\n\n' + 'Error: Conversion Failed\n'
+            self.showCmdOutput(msg)
+            self.p2 = None
+            self.sbotton.configure(state='normal')
+            return
+        text = self.p2.readerr()
+        text += self.p2.read()
+        self.showCmdOutput(text)
+        # make sure we get invoked again by event loop after interval 
+        self.stext.after(self.interval,self.processPipe)
+        return
+
+    # post output from subprocess in scrolled text widget
+    def showCmdOutput(self, msg):
+        if msg and msg !='':
+            msg = msg.encode('utf-8')
+            self.stext.insert(Tkconstants.END,msg)
+            self.stext.yview_pickplace(Tkconstants.END)
+        return
+
+    # run erdr2pml.py as a subprocess via pipes and collect stdout
+    def erdr(self, infile, name, ccnum):
+        # os.putenv('PYTHONUNBUFFERED', '1')
+        cmdline = 'python ./lib/erdr2pml.py --make-pmlz "' + infile + '" "' + name + '" ' + ccnum 
+        if sys.platform[0:3] == 'win':
+            search_path = os.environ['PATH']
+            search_path = search_path.lower()
+            if search_path.find('python') >= 0: 
+                cmdline = 'python lib\erdr2pml.py --make-pmlz "' + infile + '" "' + name + '" ' + ccnum
+            else :
+                cmdline = 'lib\erdr2pml.py --make-pmlz "' + infile + '" "' + name + '" ' + ccnum
+
+        cmdline = cmdline.encode(sys.getfilesystemencoding())
+        p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
+        return p2
+
+
+    def get_pdbpath(self):
+        pdbpath = tkFileDialog.askopenfilename(
+            parent=None, title='Select eReader PDB File',
+            defaultextension='.pdb', filetypes=[('eReader eBooks', '.pdb'),
+                                                ('All Files', '.*')])
+        if pdbpath:
+            pdbpath = os.path.normpath(pdbpath)
+            self.pdbpath.delete(0, Tkconstants.END)
+            self.pdbpath.insert(0, pdbpath)
+        return
+
+    def quitting(self):
+        # kill any still running subprocess
+        if self.p2 != None:
+            if (self.p2.wait('nowait') == None):
+                self.p2.terminate()
+        self.root.destroy()
+
+    # actually ready to run the subprocess and get its output
+    def convertit(self):
+        # now disable the button to prevent multiple launches
+        self.sbotton.configure(state='disabled')
+        pdbpath = self.pdbpath.get()
+        if not pdbpath or not os.path.exists(pdbpath):
+            self.status['text'] = 'Specified eBook file does not exist'
+            self.sbotton.configure(state='normal')
+            return
+        name = self.name.get()
+        if not name or name == '':
+            self.status['text'] = 'Your forgot to enter the Name on the CC'
+            self.sbotton.configure(state='normal')
+            return
+        ccnum = self.ccnum.get()
+        if not ccnum or ccnum == '':
+            self.status['text'] = 'Your forgot to enter the last 8 digits on the CC'
+            self.sbotton.configure(state='normal')
+            return
+
+        log = 'Command = "python erdr2pml.py --make-pmlz "\n'
+        log += 'PDB Path = "'+ pdbpath + '"\n'
+        log += 'Name = "' + name + '"\n'
+        log += 'Last 8 of CC = "' + ccnum + '"\n'
+        log += '\n\n'
+        log += 'Please Wait ...\n'
+        log = log.encode('utf-8')
+        self.stext.insert(Tkconstants.END,log)
+        self.p2 = self.erdr(pdbpath, name, ccnum)
+
+        # python does not seem to allow you to create
+        # your own eventloop which every other gui does - strange 
+        # so need to use the widget "after" command to force
+        # event loop to run non-gui events every interval
+        self.stext.after(self.interval,self.processPipe)
+        return
+
+
+def main(argv=None):
+    root = Tkinter.Tk()
+    root.title('eReader PDB to PMLZ Conversion')
+    root.resizable(True, False)
+    root.minsize(300, 0)
+    MainDialog(root).pack(fill=Tkconstants.X, expand=1)
+    root.mainloop()
+    return 0
+    
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/eReader_Tools/lib/erdr2pml.py
+++ b/eReader_Tools/lib/erdr2pml.py
@@ -52,8 +52,10 @@
 #  0.11 - fixups for using correct xml for footnotes and sidebars for use with Dropbook
 #  0.12 - Fix added to prevent lowercasing of image names when the pml code itself uses a different case in the link name.
 #  0.13 - change to unbuffered stdout for use with gui front ends
+#  0.14 - contributed enhancement to support --make-pmlz switch
+#  0.15 - enabled high-ascii to pml character encoding. DropBook now works on Mac.

-__version__='0.13'
+__version__='0.15'

 # Import Psyco if available
 try:
@@ -85,7 +87,7 @@ class Unbuffered:
 import sys
 sys.stdout=Unbuffered(sys.stdout)

-import struct, binascii, zlib, os, os.path, urllib
+import struct, binascii, getopt, zlib, os, os.path, urllib, tempfile

 try:
    from hashlib import sha1
@@ -464,17 +466,6 @@ class EreaderProcessor(object):
        data = sect[62:]
        return sanitizeFileName(name), data

-    def cleanPML(self,pml):
-        # Update old \b font tag with correct \B bold font tag
-        pml2 = pml.replace('\\b', '\\B')
-        # Convert special characters to proper PML code.  High ASCII start at (\x82, \a130) and go up to (\xff, \a255)
-        for k in xrange(130,256):
-            # a2b_hex takes in a hexidecimal as a string and converts it 
-            # to a binary ascii code that we search and replace for
-            badChar=binascii.a2b_hex('%02x' % k)
-            pml2 = pml2.replace(badChar, '\\a%03d' % k)
-            #end for k
-        return pml2

    # def getChapterNamePMLOffsetData(self):
    #     cv = ''
@@ -563,6 +554,14 @@ class EreaderProcessor(object):

        return r

+def cleanPML(pml):
+	# Convert special characters to proper PML code.  High ASCII start at (\x80, \a128) and go up to (\xff, \a255)
+	pml2 = pml
+	for k in xrange(128,256):
+		badChar = chr(k)
+		pml2 = pml2.replace(badChar, '\\a%03d' % k)
+	return pml2
+
 def convertEreaderToPml(infile, name, cc, outdir):
    if not os.path.exists(outdir):
        os.makedirs(outdir)
@@ -584,7 +583,7 @@ def convertEreaderToPml(infile, name, cc, outdir):
    print "   Extracting pml"
    pml_string = er.getText()
    pmlfilename = bookname + ".pml"
-    file(os.path.join(outdir, pmlfilename),'wb').write(pml_string)
+    file(os.path.join(outdir, pmlfilename),'wb').write(cleanPML(pml_string))

    # bkinfo = er.getBookInfo()
    # if bkinfo != '':
@@ -592,27 +591,54 @@ def convertEreaderToPml(infile, name, cc, outdir):
    #     file(os.path.join(outdir, 'bookinfo.txt'),'wb').write(bkinfo)


-def main(argv=None):
-    global bookname
-    if argv is None:
-        argv = sys.argv
-    
-    print "eRdr2Pml v%s. Copyright (c) 2009 The Dark Reverser" % __version__
-
-    if len(argv)!=4 and len(argv)!=5:
+def usage():
    print "Converts DRMed eReader books to PML Source"
    print "Usage:"
-        print "  erdr2pml infile.pdb [outdir] \"your name\" credit_card_number "
+    print "  erdr2pml [options] infile.pdb [outdir] \"your name\" credit_card_number "
+    print " "
+    print "Options: "
+    print "  -h                prints this message"
+    print "  --make-pmlz       create PMLZ instead of using output directory"
+    print " "
    print "Note:"
    print "  if ommitted, outdir defaults based on 'infile.pdb'"
    print "  It's enough to enter the last 8 digits of the credit card number"
+    return
+
+def main(argv=None):
+    global bookname
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "h", ["make-pmlz"])
+    except getopt.GetoptError, err:
+        print str(err)
+        usage()
+        return 1
+    make_pmlz = False
+    zipname = None
+    for o, a in opts:
+        if o == "-h":
+            usage()
+            return 0
+        elif o == "--make-pmlz":
+            make_pmlz = True
+            zipname = ''
+    
+    print "eRdr2Pml v%s. Copyright (c) 2009 The Dark Reverser" % __version__
+
+    if len(args)!=3 and len(args)!=4:
+        usage()
        return 1
    else:
-        if len(argv)==4:
-            infile, name, cc = argv[1], argv[2], argv[3]
+        if len(args)==3:
+            infile, name, cc = args[0], args[1], args[2]
            outdir = infile[:-4] + '_Source'
-        elif len(argv)==5:
-            infile, outdir, name, cc = argv[1], argv[2], argv[3], argv[4]
+        elif len(args)==4:
+            infile, outdir, name, cc = args[0], args[1], args[2], args[3]
+
+        if make_pmlz :
+            # ignore specified outdir, use tempdir instead
+            outdir = tempfile.mkdtemp()
+                
        bookname = os.path.splitext(os.path.basename(infile))[0]

        try:
@@ -620,9 +646,37 @@ def main(argv=None):
            import time
            start_time = time.time()
            convertEreaderToPml(infile, name, cc, outdir)
+
+            if make_pmlz :
+                import zipfile
+                import shutil
+                print "   Creating PMLZ file"
+                zipname = infile[:-4] + '.pmlz'
+                myZipFile = zipfile.ZipFile(zipname,'w',zipfile.ZIP_STORED, False)
+                list = os.listdir(outdir)
+                for file in list:
+                    localname = file
+                    filePath = os.path.join(outdir,file)
+                    if os.path.isfile(filePath):
+                        myZipFile.write(filePath, localname)
+                    elif os.path.isdir(filePath):
+                        imageList = os.listdir(filePath)
+                        localimgdir = os.path.basename(filePath)
+                        for image in imageList:
+                            localname = os.path.join(localimgdir,image)
+                            imagePath = os.path.join(filePath,image)
+                            if os.path.isfile(imagePath):
+                                myZipFile.write(imagePath, localname)
+                myZipFile.close()
+                # remove temporary directory
+                shutil.rmtree(outdir)
+
            end_time = time.time()
            search_time = end_time - start_time
            print 'elapsed time: %.2f seconds' % (search_time, ) 
+            if make_pmlz :
+                print 'output is %s' % zipname
+            else :
                print 'output in %s' % outdir 
            print "done"
        except ValueError, e:
--- a/ineptpdf.pyw
+++ b/ineptpdf.pyw
@@ -1,6 +1,6 @@
 #! /usr/bin/python

-# ineptpdf.pyw, version 2
+# ineptpdf5.pyw, version 5

 # To run this program install Python 2.6 from http://www.python.org/download/
 # and PyCrypto from http://www.voidspace.org.uk/python/modules.shtml#pycrypto
@@ -10,6 +10,9 @@
 # Revision history:
 #   1 - Initial release
 #   2 - Improved determination of key-generation algorithm
+#   3 - Correctly handle PDF >=1.5 cross-reference streams
+#   4 - Removal of ciando's personal ID (anon)
+#   5 - removing small bug with V3 ebooks (anon)

 """
 Decrypt Adobe ADEPT-encrypted PDF files.
@@ -25,7 +28,7 @@ import re
 import zlib
 import struct
 import hashlib
-from itertools import chain
+from itertools import chain, islice
 import xml.etree.ElementTree as etree
 import Tkinter
 import Tkconstants
@@ -163,16 +166,16 @@ def nunpack(s, default=0):
    elif l == 1:
        return ord(s)
    elif l == 2:
-        return unpack('>H', s)[0]
+        return struct.unpack('>H', s)[0]
    elif l == 3:
-        return unpack('>L', '\x00'+s)[0]
+        return struct.unpack('>L', '\x00'+s)[0]
    elif l == 4:
-        return unpack('>L', s)[0]
+        return struct.unpack('>L', s)[0]
    else:
        return TypeError('invalid length: %d' % l)


-STRICT = 0
+STRICT = 1


 ##  PS Exceptions
@@ -680,6 +683,12 @@ class PSStackParser(PSBaseParser):
        return obj


+LITERAL_CRYPT = PSLiteralTable.intern('Crypt')
+LITERALS_FLATE_DECODE = (PSLiteralTable.intern('FlateDecode'), PSLiteralTable.intern('Fl'))
+LITERALS_LZW_DECODE = (PSLiteralTable.intern('LZWDecode'), PSLiteralTable.intern('LZW'))
+LITERALS_ASCII85_DECODE = (PSLiteralTable.intern('ASCII85Decode'), PSLiteralTable.intern('A85'))
+
+
 ##  PDF Objects
 ##
 class PDFObject(PSObject): pass
@@ -741,11 +750,11 @@ def decipher_all(decipher, objid, genno, x):
    '''
    if isinstance(x, str):
        return decipher(objid, genno, x)
+    decf = lambda v: decipher_all(decipher, objid, genno, v)
    if isinstance(x, list):
-        x = [ decipher_all(decipher, objid, genno, v) for v in x ]
+        x = [decf(v) for v in x]
    elif isinstance(x, dict):
-        for (k,v) in x.iteritems():
-            x[k] = decipher_all(decipher, objid, genno, v)
+        x = dict((k, decf(v)) for (k, v) in x.iteritems())
    return x

 # Type cheking
@@ -805,6 +814,28 @@ def stream_value(x):
        return PDFStream({}, '')
    return x

+# ascii85decode(data)
+def ascii85decode(data):
+  n = b = 0
+  out = ''
+  for c in data:
+    if '!' <= c and c <= 'u':
+      n += 1
+      b = b*85+(ord(c)-33)
+      if n == 5:
+        out += struct.pack('>L',b)
+        n = b = 0
+    elif c == 'z':
+      assert n == 0
+      out += '\0\0\0\0'
+    elif c == '~':
+      if n:
+        for _ in range(5-n):
+          b = b*85+84
+        out += struct.pack('>L',b)[:n-1]
+      break
+  return out
+

 ##  PDFStream type
 ##
@@ -834,12 +865,76 @@ class PDFStream(PDFObject):
        return '<PDFStream(%r): raw=%d, %r>' % \
            (self.objid, len(self.rawdata), self.dic)

+    def decode(self):
+        assert self.data == None and self.rawdata != None
+        data = self.rawdata
+        if self.decipher:
+            # Handle encryption
+            data = self.decipher(self.objid, self.genno, data)
+        if 'Filter' not in self.dic:
+            self.data = data
+            self.rawdata = None
+            return
+        filters = self.dic['Filter']
+        if not isinstance(filters, list):
+            filters = [ filters ]
+        for f in filters:
+            if f in LITERALS_FLATE_DECODE:
+                # will get errors if the document is encrypted.
+                data = zlib.decompress(data)
+            elif f in LITERALS_LZW_DECODE:
+                try:
+                    from cStringIO import StringIO
+                except ImportError:
+                    from StringIO import StringIO
+                data = ''.join(LZWDecoder(StringIO(data)).run())
+            elif f in LITERALS_ASCII85_DECODE:
+                data = ascii85decode(data)
+            elif f == LITERAL_CRYPT:
+                raise PDFNotImplementedError('/Crypt filter is unsupported')
+            else:
+                raise PDFNotImplementedError('Unsupported filter: %r' % f)
+            # apply predictors
+            if 'DP' in self.dic:
+                params = self.dic['DP']
+            else:
+                params = self.dic.get('DecodeParms', {})
+            if 'Predictor' in params:
+                pred = int_value(params['Predictor'])
+                if pred:
+                    if pred != 12:
+                        raise PDFNotImplementedError(
+                            'Unsupported predictor: %r' % pred)
+                    if 'Columns' not in params:
+                        raise PDFValueError(
+                            'Columns undefined for predictor=12')
+                    columns = int_value(params['Columns'])
+                    buf = ''
+                    ent0 = '\x00' * columns
+                    for i in xrange(0, len(data), columns+1):
+                        pred = data[i]
+                        ent1 = data[i+1:i+1+columns]
+                        if pred == '\x02':
+                            ent1 = ''.join(chr((ord(a)+ord(b)) & 255) \
+                                               for (a,b) in zip(ent0,ent1))
+                        buf += ent1
+                        ent0 = ent1
+                    data = buf
+        self.data = data
+        self.rawdata = None
+        return
+
+    def get_data(self):
+        if self.data == None:
+            self.decode()
+        return self.data
+
    def get_rawdata(self):
        return self.rawdata

    def get_decdata(self):
        data = self.rawdata
-        if self.decipher:
+        if self.decipher and data:
            # Handle encryption
            data = self.decipher(self.objid, self.genno, data)
        return data
@@ -932,6 +1027,66 @@ class PDFXRef(object):
        return (None, pos)


+##  PDFXRefStream
+##
+class PDFXRefStream(object):
+
+    def __init__(self):
+        self.index = None
+        self.data = None
+        self.entlen = None
+        self.fl1 = self.fl2 = self.fl3 = None
+        return
+
+    def __repr__(self):
+        return '<PDFXRef: objid=%d-%d>' % (self.objid_first, self.objid_last)
+
+    def objids(self):
+        for first, size in self.index:
+            for objid in xrange(first, first + size):
+                yield objid
+    
+    def load(self, parser, debug=0):
+        (_,objid) = parser.nexttoken() # ignored
+        (_,genno) = parser.nexttoken() # ignored
+        (_,kwd) = parser.nexttoken()
+        (_,stream) = parser.nextobject()
+        if not isinstance(stream, PDFStream) or \
+           stream.dic['Type'] is not LITERAL_XREF:
+            raise PDFNoValidXRef('Invalid PDF stream spec.')
+        size = stream.dic['Size']
+        index = stream.dic.get('Index', (0,size))
+        self.index = zip(islice(index, 0, None, 2),
+                         islice(index, 1, None, 2))
+        (self.fl1, self.fl2, self.fl3) = stream.dic['W']
+        self.data = stream.get_data()
+        self.entlen = self.fl1+self.fl2+self.fl3
+        self.trailer = stream.dic
+        return
+    
+    def getpos(self, objid):
+        offset = 0
+        for first, size in self.index:
+            if first <= objid  and objid < (first + size):
+                break
+            offset += size
+        else:
+            raise KeyError(objid)
+        i = self.entlen * ((objid - first) + offset)
+        ent = self.data[i:i+self.entlen]
+        f1 = nunpack(ent[:self.fl1], 1)
+        if f1 == 1:
+            pos = nunpack(ent[self.fl1:self.fl1+self.fl2])
+            genno = nunpack(ent[self.fl1+self.fl2:])
+            return (None, pos)
+        elif f1 == 2:
+            objid = nunpack(ent[self.fl1:self.fl1+self.fl2])
+            index = nunpack(ent[self.fl1+self.fl2:])
+            return (objid, index)
+        # this is a free object
+        raise KeyError(objid)
+
+
 ##  PDFDocument
 ##
 ##  A PDFDocument object represents a PDF document.
@@ -1020,7 +1175,7 @@ class PDFDocument(object):
        key = ASN1Parser([ord(x) for x in keyder])
        key = [bytesToNumber(key.getChild(x).value) for x in xrange(1, 4)]
        rsa = RSA.construct(key)
-        length = int_value(param.get('Length')) / 8
+        length = int_value(param.get('Length', 0)) / 8
        rights = str_value(param.get('ADEPT_LICENSE')).decode('base64')
        rights = zlib.decompress(rights, -15)
        rights = etree.fromstring(rights)
@@ -1031,11 +1186,16 @@ class PDFDocument(object):
            raise ADEPTError('error decrypting book session key')
        index = bookkey.index('\0') + 1
        bookkey = bookkey[index:]
-        V = 2
-        if (length and len(bookkey) == (length + 1)) or \
-           (not length and len(bookkey) & 1 == 1):
+        ebx_V = int_value(param.get('V', 4))
+        ebx_type = int_value(param.get('EBX_ENCRYPTIONTYPE', 6))
+        # added because of the booktype / decryption book session key error
+        if ebx_V == 3:
+            V = 3        
+        elif ebx_V < 4 or ebx_type < 6:
            V = ord(bookkey[0])
            bookkey = bookkey[1:]
+        else:
+            V = 2
        if length and len(bookkey) != length:
            raise ADEPTError('error decrypting book session key')
        self.decrypt_key = bookkey
@@ -1131,46 +1291,17 @@ class PDFDocument(object):
        else:
            for xref in self.xrefs:
                try:
-                    (strmid, index) = xref.getpos(objid)
+                    (stmid, index) = xref.getpos(objid)
                    break
                except KeyError:
                    pass
            else:
-                if STRICT:
-                    raise PDFSyntaxError('Cannot locate objid=%r' % objid)
+                return
+                #if STRICT:
+                #    raise PDFSyntaxError('Cannot locate objid=%r' % objid)
                return None
-            if strmid:
-                stream = stream_value(self.getobj(strmid))
-                if stream.dic.get('Type') is not LITERAL_OBJSTM:
-                    if STRICT:
-                        raise PDFSyntaxError('Not a stream object: %r' % stream)
-                try:
-                    n = stream.dic['N']
-                except KeyError:
-                    if STRICT:
-                        raise PDFSyntaxError('N is not defined: %r' % stream)
-                    n = 0
-                if strmid in self.parsed_objs:
-                    objs = self.parsed_objs[strmid]
-                else:
-                    parser = PDFObjStrmParser(self, stream.get_data())
-                    objs = []
-                    try:
-                        while 1:
-                            (_,obj) = parser.nextobject()
-                            objs.append(obj)
-                    except PSEOF:
-                        pass
-                    self.parsed_objs[strmid] = objs
-                genno = 0
-                i = n*2+index
-                try:
-                    obj = objs[i]
-                except IndexError:
-                    raise PDFSyntaxError(
-                        'Invalid object number: objid=%r' % (objid))
-                if isinstance(obj, PDFStream):
-                    obj.set_objid(objid, 0)
+            if stmid:
+                return PDFObjStmRef(objid, stmid, index)
            else:
                self.parser.seek(index)
                (_,objid1) = self.parser.nexttoken() # objid
@@ -1188,6 +1319,12 @@ class PDFDocument(object):
            obj = decipher_all(self.decipher, objid, genno, obj)
        return obj

+class PDFObjStmRef(object):
+    def __init__(self, objid, stmid, index):
+        self.objid = objid
+        self.stmid = stmid
+        self.index = index
+
    
 ##  PDFParser
 ##
@@ -1290,6 +1427,13 @@ class PDFParser(PSStackParser):
            (pos, token) = self.nexttoken()
        except PSEOF:
            raise PDFNoValidXRef('Unexpected EOF')
+        if isinstance(token, int):
+            # XRefStream: PDF-1.5
+            self.seek(pos)
+            self.reset()
+            xref = PDFXRefStream()
+            xref.load(self)
+        else:
            if token is not self.KEYWORD_XREF:
                raise PDFNoValidXRef('xref not found: pos=%d, token=%r' % 
                                     (pos, token))
@@ -1298,6 +1442,9 @@ class PDFParser(PSStackParser):
            xref.load(self)
        xrefs.append(xref)
        trailer = xref.trailer
+        if 'XRefStm' in trailer:
+            pos = int_value(trailer['XRefStm'])
+            self.read_xref_from(pos, xrefs)
        if 'Prev' in trailer:
            # find previous xref
            pos = int_value(trailer['Prev'])
@@ -1345,10 +1492,13 @@ class PDFSerializer(object):
        parser = PDFParser(doc, inf)
        doc.initialize(keypath)
        self.objids = objids = set()
-        for xref in doc.xrefs:
+        for xref in reversed(doc.xrefs):
            trailer = xref.trailer
            for objid in xref.objids():
                objids.add(objid)
+        trailer = dict(trailer)
+        trailer.pop('Prev', None)
+        trailer.pop('XRefStm', None)
        if 'Encrypt' in trailer:
            objids.remove(trailer.pop('Encrypt').objid)
        self.trailer = trailer
@@ -1360,26 +1510,64 @@ class PDFSerializer(object):
        doc = self.doc
        objids = self.objids
        xrefs = {}
+        xrefstm = {}
        maxobj = max(objids)
+        trailer = dict(self.trailer)
+        trailer['Size'] = maxobj + 1
        for objid in objids:
+            obj = doc.getobj(objid)
+            if isinstance(obj, PDFObjStmRef):
+                xrefstm[objid] = obj
+                continue
            xrefs[objid] = self.tell()
-            self.serialize_indirect(objid, doc.getobj(objid))
+            self.serialize_indirect(objid, obj)
        startxref = self.tell()
        self.write('xref\n')
        self.write('0 %d\n' % (maxobj + 1,))
        for objid in xrange(0, maxobj + 1):
-            if objid in objids:
+            if objid in xrefs:
                self.write("%010d %05d n \n" % (xrefs[objid], 0))
            else:
                self.write("%010d %05d f \n" % (0, 65535))
        self.write('trailer\n')
-        self.serialize_object(self.trailer)
+        self.serialize_object(trailer)
+        self.write('\nstartxref\n%d\n%%%%EOF' % startxref)
+        if not xrefstm:
+            return
+        index = []
+        first = None
+        prev = None
+        data = []
+        for objid in sorted(xrefstm):
+            if first is None:
+                first = objid
+            elif objid != prev + 1:
+                index.extend((first, prev - first + 1))
+                first = objid
+            prev = objid
+            stmid = xrefstm[objid].stmid
+            data.append(struct.pack('>BHB', 2, stmid, 0))
+        index.extend((first, prev - first + 1))
+        data = zlib.compress(''.join(data))
+        dic = {'Type': LITERAL_XREF, 'Size': prev + 1, 'Index': index,
+               'W': [1, 2, 1], 'Length': len(data), 'Prev': startxref,
+               'Filter': LITERALS_FLATE_DECODE[0],}
+        obj = PDFStream(dic, data)
+        self.write('\n')
+        trailer['XRefStm'] = startxrefstm = self.tell()
+        self.serialize_indirect(maxobj + 1, obj)
+        trailer['Prev'] = startxref
+        startxref = self.tell()
+        self.write('xref\n')
+        self.write('%d 1\n' % (maxobj + 1,))
+        self.write("%010d %05d n \n" % (startxrefstm, 0))
+        self.write('trailer\n')
+        self.serialize_object(trailer)
        self.write('\nstartxref\n%d\n%%%%EOF' % startxref)
    
-    def write(self, *data):
-        for datum in data:
-            self.outf.write(datum)
-        self.last = data[-1][-1:]
+    def write(self, data):
+        self.outf.write(data)
+        self.last = data[-1:]

    def tell(self):
        return self.outf.tell()
@@ -1389,6 +1577,9 @@ class PDFSerializer(object):
        string = string.replace('\n', r'\n')
        string = string.replace('(', r'\(')
        string = string.replace(')', r'\)')
+         # get rid of ciando id
+        regularexp = re.compile(r'http://www.ciando.com/index.cfm/intRefererID/\d{5}')
+        if regularexp.match(string): return ('http://www.ciando.com') 
        return string
    
    def serialize_object(self, obj):
@@ -1566,5 +1757,6 @@ def gui_main():


 if __name__ == '__main__':
-    # sys.exit(cli_main())
+    if len(sys.argv) > 1:
+        sys.exit(cli_main())
    sys.exit(gui_main())
Author	SHA1	Message	Date
Apprentice Alf	8e7d2657a4	tools v1.5	2015-03-02 07:43:31 +00:00
Apprentice Alf	6fb13373cf	tools v1.4	2015-03-02 07:41:20 +00:00
Apprentice Alf	dce51ae232	tools v1.3	2015-03-02 07:35:40 +00:00
Apprentice Alf	2819550411	tools v1.1	2015-03-02 07:32:21 +00:00
Anonymous	f8154c4615	ineptpdf 5 by anon	2015-02-28 14:38:24 +00:00
i♥cabbages	58833e7dc5	Unknown date, late 2009/early 2010	2015-02-28 14:35:29 +00:00