tools v5.5.3

2012-12-26 23:17:56 +00:00
parent c010e3f77a
commit 602ff30b3a
17 changed files with 142 additions and 63 deletions
--- a/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/convert2xml.py
+++ b/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/convert2xml.py
@@ -277,6 +277,7 @@ class PageParser(object):

        'word_semantic'           : (1, 'snippets', 1, 1),
        'word_semantic.type'      : (1, 'scalar_text', 0, 0),
+        'word_semantic.class'     : (1, 'scalar_text', 0, 0),
        'word_semantic.firstWord' : (1, 'scalar_number', 0, 0),
        'word_semantic.lastWord'  : (1, 'scalar_number', 0, 0),

@@ -287,6 +288,7 @@ class PageParser(object):
        'word.lastGlyph'  : (1, 'scalar_number', 0, 0),

        '_span'           : (1, 'snippets', 1, 0),
+        '_span.class'     : (1, 'scalar_text', 0, 0),
        '_span.firstWord' : (1, 'scalar_number', 0, 0),
        '_span.lastWord'  : (1, 'scalar_number', 0, 0),
        '_span.gridSize'  : (1, 'scalar_number', 0, 0),
@@ -350,16 +352,18 @@ class PageParser(object):
        'version.paragraph_continuation'   : (1, 'scalar_text', 0, 0),
        'version.toc'                      : (1, 'scalar_text', 0, 0),

-        'stylesheet'   : (1, 'snippets', 1, 0),
-        'style'              : (1, 'snippets', 1, 0),
-        'style._tag'         : (1, 'scalar_text', 0, 0),
-        'style.type'         : (1, 'scalar_text', 0, 0),
-        'style._parent_type' : (1, 'scalar_text', 0, 0),
-        'style.class'        : (1, 'scalar_text', 0, 0),
-        'style._after_class' : (1, 'scalar_text', 0, 0),
-        'rule'               : (1, 'snippets', 1, 0),
-        'rule.attr'          : (1, 'scalar_text', 0, 0),
-        'rule.value'         : (1, 'scalar_text', 0, 0),
+        'stylesheet'                : (1, 'snippets', 1, 0),
+        'style'                     : (1, 'snippets', 1, 0),
+        'style._tag'                : (1, 'scalar_text', 0, 0),
+        'style.type'                : (1, 'scalar_text', 0, 0),
+        'style._after_type'         : (1, 'scalar_text', 0, 0),
+        'style._parent_type'        : (1, 'scalar_text', 0, 0),
+        'style._after_parent_type'  : (1, 'scalar_text', 0, 0),
+        'style.class'               : (1, 'scalar_text', 0, 0),
+        'style._after_class'        : (1, 'scalar_text', 0, 0),
+        'rule'                      : (1, 'snippets', 1, 0),
+        'rule.attr'                 : (1, 'scalar_text', 0, 0),
+        'rule.value'                : (1, 'scalar_text', 0, 0),

        'original'      : (0, 'number', 1, 1),
        'original.pnum' : (1, 'number', 0, 0),
--- a/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/getk4pcpids.py
+++ b/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/getk4pcpids.py
@@ -11,15 +11,21 @@ __version__ = '1.01'

 import sys

-class Unbuffered:
+class SafeUnbuffered:
    def __init__(self, stream):
        self.stream = stream
+        self.encoding = stream.encoding
+        if self.encoding == None:
+            self.encoding = "utf-8"
    def write(self, data):
+        if isinstance(data,unicode):
+            data = data.encode(self.encoding,"replace")
        self.stream.write(data)
        self.stream.flush()
    def __getattr__(self, attr):
        return getattr(self.stream, attr)
-sys.stdout=Unbuffered(sys.stdout)
+sys.stdout=SafeUnbuffered(sys.stdout)
+sys.stderr=SafeUnbuffered(sys.stderr)

 import os
 import struct
@@ -41,7 +47,7 @@ def getK4PCpids(path_to_ebook):
        mobi = False

    if mobi:
-        mb = mobidedrm.MobiBook(path_to_ebook,False)
+        mb = mobidedrm.MobiBook(path_to_ebook)
    else:
        mb = topazextract.TopazBook(path_to_ebook)

--- a/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/stylexml2css.py
+++ b/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/stylexml2css.py
@@ -10,6 +10,7 @@ import re
 from struct import pack
 from struct import unpack

+debug = False

 class DocParser(object):
    def __init__(self, flatxml, fontsize, ph, pw):
@@ -113,7 +114,9 @@ class DocParser(object):

        # process each style converting what you can

+        if debug: print '          ', 'Processing styles.'
        for j in xrange(stylecnt):
+            if debug: print '          ', 'Processing style %d' %(j)
            start = styleList[j]
            end = styleList[j+1]

@@ -132,6 +135,8 @@ class DocParser(object):
                else :
                    sclass = ''

+                if debug: print 'sclass', sclass
+
                # check for any "after class" specifiers
                (pos, aftclass) = self.findinDoc('style._after_class',start,end)
                if aftclass != None:
@@ -140,6 +145,8 @@ class DocParser(object):
                else :
                    aftclass = ''

+                if debug: print 'aftclass', aftclass
+
                cssargs = {}

                while True :
@@ -147,6 +154,9 @@ class DocParser(object):
                    (pos1, attr) = self.findinDoc('style.rule.attr', start, end)
                    (pos2, val) = self.findinDoc('style.rule.value', start, end)

+                    if debug: print 'attr', attr
+                    if debug: print 'val', val
+
                    if attr == None : break

                    if (attr == 'display') or (attr == 'pos') or (attr == 'align'):
@@ -164,7 +174,7 @@ class DocParser(object):
                                scale = self.pw
                            elif attr == 'line-space':
                                scale = self.fontsize * 2.0
-                            
+
                            if val == "":
                                val = 0

@@ -179,6 +189,7 @@ class DocParser(object):
                if aftclass != "" : keep = False

                if keep :
+                    if debug: print 'keeping style'
                    # make sure line-space does not go below 100% or above 300% since
                    # it can be wacky in some styles
                    if 'line-space' in cssargs:
@@ -256,7 +267,9 @@ def convert2CSS(flatxml, fontsize, ph, pw):

    # create a document parser
    dp = DocParser(flatxml, fontsize, ph, pw)
+    if debug: print '          ', 'Created DocParser.'
    csspage = dp.process()
+    if debug: print '          ', 'Processed DocParser.'
    return csspage


--- a/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/topazextract.py
+++ b/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/topazextract.py
@@ -69,6 +69,9 @@ def unicode_argv():
            argvencoding = 'utf-8'
        return [arg if (type(arg) == unicode) else unicode(arg,argvencoding) for arg in sys.argv]

+#global switch
+debug = False
+
 if 'calibre' in sys.modules:
    inCalibre = True
    from calibre_plugins.k4mobidedrm import kgenpids
@@ -206,6 +209,7 @@ class TopazBook:
            # Read and return the data of one header record at the current book file position
            # [[offset,decompressedLength,compressedLength],...]
            nbValues = bookReadEncodedNumber(self.fo)
+            if debug: print "%d records in header " % nbValues,
            values = []
            for i in range (0,nbValues):
                values.append([bookReadEncodedNumber(self.fo),bookReadEncodedNumber(self.fo),bookReadEncodedNumber(self.fo)])
@@ -219,9 +223,10 @@ class TopazBook:
            record = bookReadHeaderRecordData()
            return [tag,record]
        nbRecords = bookReadEncodedNumber(self.fo)
+        if debug: print "Headers: %d" % nbRecords
        for i in range (0,nbRecords):
            result = parseTopazHeaderRecord()
-            # print result[0], result[1]
+            if debug: print result[0], ": ", result[1]
            self.bookHeaderRecords[result[0]] = result[1]
        if ord(self.fo.read(1))  != 0x64 :
            raise DrmException(u"Parse Error : Invalid Header")
@@ -235,12 +240,12 @@ class TopazBook:
            raise DrmException(u"Parse Error : Record Names Don't Match")
        flags = ord(self.fo.read(1))
        nbRecords = ord(self.fo.read(1))
-        # print nbRecords
+        if debug: print "Metadata Records: %d" % nbRecords
        for i in range (0,nbRecords) :
            keyval = bookReadString(self.fo)
            content = bookReadString(self.fo)
-            # print keyval
-            # print content
+            if debug: print keyval
+            if debug: print content
            self.bookMetadata[keyval] = content
        return self.bookMetadata