tools v5.0

Introduction of alfcrypto library for speed Reorganisation of archive plugins,apps,other
2012-03-06 18:24:28 +00:00
parent 882edb6c69
commit 07e532f59c
112 changed files with 11472 additions and 5177 deletions
--- a/Other_Tools/KindleBooks/lib/convert2xml.py
+++ b/Other_Tools/KindleBooks/lib/convert2xml.py
@@ -23,7 +23,7 @@ from struct import unpack
 class TpzDRMError(Exception):
    pass

-# Get a 7 bit encoded number from string. The most 
+# Get a 7 bit encoded number from string. The most
 # significant byte comes first and has the high bit (8th) set

 def readEncodedNumber(file):
@@ -32,57 +32,57 @@ def readEncodedNumber(file):
    if (len(c) == 0):
        return None
    data = ord(c)
-    
+
    if data == 0xFF:
-       flag = True
-       c = file.read(1)
-       if (len(c) == 0):
-           return None
-       data = ord(c)
-       
+        flag = True
+        c = file.read(1)
+        if (len(c) == 0):
+            return None
+        data = ord(c)
+
    if data >= 0x80:
        datax = (data & 0x7F)
        while data >= 0x80 :
            c = file.read(1)
-            if (len(c) == 0): 
+            if (len(c) == 0):
                return None
            data = ord(c)
            datax = (datax <<7) + (data & 0x7F)
-        data = datax 
-    
+        data = datax
+
    if flag:
-       data = -data
+        data = -data
    return data
-    
+

 # returns a binary string that encodes a number into 7 bits
 # most significant byte first which has the high bit set

 def encodeNumber(number):
-   result = ""
-   negative = False
-   flag = 0
-   
-   if number < 0 :
-       number = -number + 1
-       negative = True
-   
-   while True:
-       byte = number & 0x7F
-       number = number >> 7
-       byte += flag
-       result += chr(byte)
-       flag = 0x80
-       if number == 0 :
-           if (byte == 0xFF and negative == False) :
-               result += chr(0x80)
-           break
-   
-   if negative:
-       result += chr(0xFF)
-   
-   return result[::-1]
-  
+    result = ""
+    negative = False
+    flag = 0
+
+    if number < 0 :
+        number = -number + 1
+        negative = True
+
+    while True:
+        byte = number & 0x7F
+        number = number >> 7
+        byte += flag
+        result += chr(byte)
+        flag = 0x80
+        if number == 0 :
+            if (byte == 0xFF and negative == False) :
+                result += chr(0x80)
+            break
+
+    if negative:
+        result += chr(0xFF)
+
+    return result[::-1]
+


 # create / read  a length prefixed string from the file
@@ -97,9 +97,9 @@ def readString(file):
    sv = file.read(stringLength)
    if (len(sv)  != stringLength):
        return ""
-    return unpack(str(stringLength)+"s",sv)[0]  
+    return unpack(str(stringLength)+"s",sv)[0]
+

- 
 # convert a binary string generated by encodeNumber (7 bit encoded number)
 # to the value you would find inside the page*.dat files to be processed

@@ -265,6 +265,8 @@ class PageParser(object):
        'paragraph.gridSize'  : (1, 'scalar_number', 0, 0),
        'paragraph.gridBottomCenter'  : (1, 'scalar_number', 0, 0),
        'paragraph.gridTopCenter' : (1, 'scalar_number', 0, 0),
+        'paragraph.gridBeginCenter' : (1, 'scalar_number', 0, 0),
+        'paragraph.gridEndCenter' : (1, 'scalar_number', 0, 0),


        'word_semantic'           : (1, 'snippets', 1, 1),
@@ -284,6 +286,8 @@ class PageParser(object):
        '_span.gridSize'  : (1, 'scalar_number', 0, 0),
        '_span.gridBottomCenter'  : (1, 'scalar_number', 0, 0),
        '_span.gridTopCenter' : (1, 'scalar_number', 0, 0),
+        '_span.gridBeginCenter' : (1, 'scalar_number', 0, 0),
+        '_span.gridEndCenter' : (1, 'scalar_number', 0, 0),

        'span'           : (1, 'snippets', 1, 0),
        'span.firstWord' : (1, 'scalar_number', 0, 0),
@@ -291,6 +295,8 @@ class PageParser(object):
        'span.gridSize'  : (1, 'scalar_number', 0, 0),
        'span.gridBottomCenter'  : (1, 'scalar_number', 0, 0),
        'span.gridTopCenter' : (1, 'scalar_number', 0, 0),
+        'span.gridBeginCenter' : (1, 'scalar_number', 0, 0),
+        'span.gridEndCenter' : (1, 'scalar_number', 0, 0),

        'extratokens'            : (1, 'snippets', 1, 0),
        'extratokens.type'       : (1, 'scalar_text', 0, 0),
@@ -376,14 +382,14 @@ class PageParser(object):
        for j in xrange(i+1, cnt) :
            result += '.' + self.tagpath[j]
        return result
-            
+

    # list of absolute command byte values values that indicate
    # various types of loop meachanisms typically used to generate vectors

    cmd_list = (0x76, 0x76)

-    # peek at and return 1 byte that is ahead by i bytes 
+    # peek at and return 1 byte that is ahead by i bytes
    def peek(self, aheadi):
        c = self.fo.read(aheadi)
        if (len(c) == 0):
@@ -416,7 +422,7 @@ class PageParser(object):
        return result


-    # process the next tag token, recursively handling subtags, 
+    # process the next tag token, recursively handling subtags,
    # arguments, and commands
    def procToken(self, token):

@@ -438,7 +444,7 @@ class PageParser(object):

        if known_token :

-            # handle subtags if present 
+            # handle subtags if present
            subtagres = []
            if (splcase == 1):
                # this type of tag uses of escape marker 0x74 indicate subtag count
@@ -447,7 +453,7 @@ class PageParser(object):
                    subtags = 1
                    num_args = 0

-            if (subtags == 1): 
+            if (subtags == 1):
                ntags = readEncodedNumber(self.fo)
                if self.debug : print 'subtags: ' + token + ' has ' + str(ntags)
                for j in xrange(ntags):
@@ -478,7 +484,7 @@ class PageParser(object):
            return result

        # all tokens that need to be processed should be in the hash
-        # table if it may indicate a problem, either new token 
+        # table if it may indicate a problem, either new token
        # or an out of sync condition
        else:
            result = []
@@ -530,7 +536,7 @@ class PageParser(object):
    # dispatches loop commands bytes with various modes
    # The 0x76 style loops are used to build vectors

-    # This was all derived by trial and error and 
+    # This was all derived by trial and error and
    # new loop types may exist that are not handled here
    # since they did not appear in the test cases

@@ -549,7 +555,7 @@ class PageParser(object):
        return result


-            
+
    # add full tag path to injected snippets
    def updateName(self, tag, prefix):
        name = tag[0]
@@ -577,7 +583,7 @@ class PageParser(object):
        argtype = tag[2]
        argList = tag[3]
        nsubtagList = []
-        if len(argList) > 0 : 
+        if len(argList) > 0 :
            for j in argList:
                asnip = self.snippetList[j]
                aso, atag = self.injectSnippets(asnip)
@@ -609,65 +615,70 @@ class PageParser(object):
        nodename = fullpathname.pop()
        ilvl = len(fullpathname)
        indent = ' ' * (3 * ilvl)
-        result = indent + '<' + nodename + '>'
+        rlst = []
+        rlst.append(indent + '<' + nodename + '>')
        if len(argList) > 0:
-            argres = ''
+            alst = []
            for j in argList:
                if (argtype == 'text') or (argtype == 'scalar_text') :
-                    argres += j + '|'
+                    alst.append(j + '|')
                else :
-                    argres += str(j) + ','
+                    alst.append(str(j) + ',')
+            argres = "".join(alst)
            argres = argres[0:-1]
            if argtype == 'snippets' :
-                result += 'snippets:' + argres
+                rlst.append('snippets:' + argres)
            else :
-                result += argres
+                rlst.append(argres)
        if len(subtagList) > 0 :
-            result += '\n'
+            rlst.append('\n')
            for j in subtagList:
                if len(j) > 0 :
-                    result += self.formatTag(j)
-            result += indent + '</' + nodename + '>\n'
+                    rlst.append(self.formatTag(j))
+            rlst.append(indent + '</' + nodename + '>\n')
        else:
-            result += '</' + nodename + '>\n'
-        return result
+            rlst.append('</' + nodename + '>\n')
+        return "".join(rlst)


-   # flatten tag
+    # flatten tag
    def flattenTag(self, node):
        name = node[0]
        subtagList = node[1]
        argtype = node[2]
        argList = node[3]
-        result = name
+        rlst = []
+        rlst.append(name)
        if (len(argList) > 0):
-            argres = ''
+            alst = []
            for j in argList:
                if (argtype == 'text') or (argtype == 'scalar_text') :
-                    argres += j + '|'
+                    alst.append(j + '|')
                else :
-                    argres += str(j) + '|'
+                    alst.append(str(j) + '|')
+            argres = "".join(alst)
            argres = argres[0:-1]
            if argtype == 'snippets' :
-                result += '.snippets=' + argres
+                rlst.append('.snippets=' + argres)
            else :
-                result += '=' + argres
-        result += '\n'
+                rlst.append('=' + argres)
+        rlst.append('\n')
        for j in subtagList:
            if len(j) > 0 :
-                result += self.flattenTag(j)
-        return result
+                rlst.append(self.flattenTag(j))
+        return "".join(rlst)


    # reduce create xml output
    def formatDoc(self, flat_xml):
-        result = ''
+        rlst = []
        for j in self.doc :
            if len(j) > 0:
                if flat_xml:
-                    result += self.flattenTag(j)
+                    rlst.append(self.flattenTag(j))
                else:
-                    result += self.formatTag(j)
+                    rlst.append(self.formatTag(j))
+        result = "".join(rlst)
        if self.debug : print result
        return result

@@ -712,7 +723,7 @@ class PageParser(object):
                first_token = None

            v = self.getNext()
-            if (v == None): 
+            if (v == None):
                break

            if (v == 0x72):
@@ -723,7 +734,7 @@ class PageParser(object):
                    self.doc.append(tag)
            else:
                if self.debug:
-                    print "Main Loop:  Unknown value: %x" % v 
+                    print "Main Loop:  Unknown value: %x" % v
                if (v == 0):
                    if (self.peek(1) == 0x5f):
                        skip = self.fo.read(1)
@@ -776,7 +787,7 @@ def usage():

 #
 # Main
-#   
+#

 def main(argv):
    dictFile = ""
@@ -797,11 +808,11 @@ def main(argv):
        print str(err) # will print something like "option -a not recognized"
        usage()
        sys.exit(2)
-    
+
    if len(opts) == 0 and len(args) == 0 :
        usage()
-        sys.exit(2) 
-       
+        sys.exit(2)
+
    for o, a in opts:
        if o =="-d":
            debug=True