tools v3.0

First combined mobi/topaz kindle tool
2010-12-30 22:41:07 +00:00
parent 38eabe7612
commit a7856f5c32
148 changed files with 13779 additions and 8871 deletions
--- a/Calibre_Plugins/K4MobiDeDRM_plugin/convert2xml.py
+++ b/Calibre_Plugins/K4MobiDeDRM_plugin/convert2xml.py
@@ -0,0 +1,817 @@
+#! /usr/bin/python
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+# For use with Topaz Scripts Version 2.6
+
+class Unbuffered:
+    def __init__(self, stream):
+        self.stream = stream
+    def write(self, data):
+        self.stream.write(data)
+        self.stream.flush()
+    def __getattr__(self, attr):
+        return getattr(self.stream, attr)
+
+import sys
+sys.stdout=Unbuffered(sys.stdout)
+
+import csv
+import os
+import getopt
+from struct import pack
+from struct import unpack
+
+
+# Get a 7 bit encoded number from string. The most 
+# significant byte comes first and has the high bit (8th) set
+
+def readEncodedNumber(file):
+    flag = False
+    c = file.read(1)
+    if (len(c) == 0):
+        return None
+    data = ord(c)
+    
+    if data == 0xFF:
+       flag = True
+       c = file.read(1)
+       if (len(c) == 0):
+           return None
+       data = ord(c)
+       
+    if data >= 0x80:
+        datax = (data & 0x7F)
+        while data >= 0x80 :
+            c = file.read(1)
+            if (len(c) == 0): 
+                return None
+            data = ord(c)
+            datax = (datax <<7) + (data & 0x7F)
+        data = datax 
+    
+    if flag:
+       data = -data
+    return data
+    
+
+# returns a binary string that encodes a number into 7 bits
+# most significant byte first which has the high bit set
+
+def encodeNumber(number):
+   result = ""
+   negative = False
+   flag = 0
+   
+   if number < 0 :
+       number = -number + 1
+       negative = True
+   
+   while True:
+       byte = number & 0x7F
+       number = number >> 7
+       byte += flag
+       result += chr(byte)
+       flag = 0x80
+       if number == 0 :
+           if (byte == 0xFF and negative == False) :
+               result += chr(0x80)
+           break
+   
+   if negative:
+       result += chr(0xFF)
+   
+   return result[::-1]
+  
+
+
+# create / read  a length prefixed string from the file
+
+def lengthPrefixString(data):
+    return encodeNumber(len(data))+data
+
+def readString(file):
+    stringLength = readEncodedNumber(file)
+    if (stringLength == None):
+        return ""
+    sv = file.read(stringLength)
+    if (len(sv)  != stringLength):
+        return ""
+    return unpack(str(stringLength)+"s",sv)[0]  
+
+ 
+# convert a binary string generated by encodeNumber (7 bit encoded number)
+# to the value you would find inside the page*.dat files to be processed
+
+def convert(i):
+    result = ''
+    val = encodeNumber(i)
+    for j in xrange(len(val)):
+        c = ord(val[j:j+1])
+        result += '%02x' % c
+    return result
+
+
+
+# the complete string table used to store all book text content
+# as well as the xml tokens and values that make sense out of it
+
+class Dictionary(object):
+    def __init__(self, dictFile):
+        self.filename = dictFile
+        self.size = 0
+        self.fo = file(dictFile,'rb')
+        self.stable = []
+        self.size = readEncodedNumber(self.fo)
+        for i in xrange(self.size):
+            self.stable.append(self.escapestr(readString(self.fo)))
+        self.pos = 0
+
+    def escapestr(self, str):
+        str = str.replace('&','&amp;')
+        str = str.replace('<','&lt;')
+        str = str.replace('>','&gt;')
+        str = str.replace('=','&#61;')
+        return str
+
+    def lookup(self,val):
+        if ((val >= 0) and (val < self.size)) :
+            self.pos = val
+            return self.stable[self.pos]
+        else:
+            print "Error - %d outside of string table limits" % val
+            sys.exit(-1)
+
+    def getSize(self):
+        return self.size
+
+    def getPos(self):
+        return self.pos
+
+    def dumpDict(self):
+        for i in xrange(self.size):
+            print "%d %s %s" % (i, convert(i), self.stable[i])
+        return
+
+# parses the xml snippets that are represented by each page*.dat file.
+# also parses the other0.dat file - the main stylesheet
+# and information used to inject the xml snippets into page*.dat files
+
+class PageParser(object):
+    def __init__(self, filename, dict, debug, flat_xml):
+        self.fo = file(filename,'rb')
+        self.id = os.path.basename(filename).replace('.dat','')
+        self.dict = dict
+        self.debug = debug
+        self.flat_xml = flat_xml
+        self.tagpath = []
+        self.doc = []
+        self.snippetList = []
+
+
+    # hash table used to enable the decoding process
+    # This has all been developed by trial and error so it may still have omissions or
+    # contain errors
+    # Format:
+    # tag : (number of arguments, argument type, subtags present, special case of subtags presents when escaped)
+
+    token_tags = {
+        'x'            : (1, 'scalar_number', 0, 0),
+        'y'            : (1, 'scalar_number', 0, 0),
+        'h'            : (1, 'scalar_number', 0, 0),
+        'w'            : (1, 'scalar_number', 0, 0),
+        'firstWord'    : (1, 'scalar_number', 0, 0),
+        'lastWord'     : (1, 'scalar_number', 0, 0),
+        'rootID'       : (1, 'scalar_number', 0, 0),
+        'stemID'       : (1, 'scalar_number', 0, 0),
+        'type'         : (1, 'scalar_text', 0, 0),
+
+        'info'            : (0, 'number', 1, 0),
+
+        'info.word'            : (0, 'number', 1, 1),
+        'info.word.ocrText'    : (1, 'text', 0, 0),
+        'info.word.firstGlyph' : (1, 'raw', 0, 0),
+        'info.word.lastGlyph'  : (1, 'raw', 0, 0),
+        'info.word.bl'         : (1, 'raw', 0, 0),
+        'info.word.link_id'    : (1, 'number', 0, 0),
+
+        'glyph'           : (0, 'number', 1, 1),
+        'glyph.x'         : (1, 'number', 0, 0),
+        'glyph.y'         : (1, 'number', 0, 0),
+        'glyph.glyphID'   : (1, 'number', 0, 0),
+
+        'dehyphen'          : (0, 'number', 1, 1),
+        'dehyphen.rootID'   : (1, 'number', 0, 0),
+        'dehyphen.stemID'   : (1, 'number', 0, 0),
+        'dehyphen.stemPage' : (1, 'number', 0, 0),
+        'dehyphen.sh'       : (1, 'number', 0, 0),
+
+        'links'        : (0, 'number', 1, 1),
+        'links.page'   : (1, 'number', 0, 0),
+        'links.rel'    : (1, 'number', 0, 0),
+        'links.row'    : (1, 'number', 0, 0),
+        'links.title'  : (1, 'text', 0, 0),
+        'links.href'   : (1, 'text', 0, 0),
+        'links.type'   : (1, 'text', 0, 0),
+
+        'paraCont'          : (0, 'number', 1, 1),
+        'paraCont.rootID'   : (1, 'number', 0, 0),
+        'paraCont.stemID'   : (1, 'number', 0, 0),
+        'paraCont.stemPage' : (1, 'number', 0, 0),
+
+        'paraStems'        : (0, 'number', 1, 1),
+        'paraStems.stemID' : (1, 'number', 0, 0),
+
+        'wordStems'          : (0, 'number', 1, 1),
+        'wordStems.stemID'   : (1, 'number', 0, 0),
+
+        'empty'          : (1, 'snippets', 1, 0),
+
+        'page'           : (1, 'snippets', 1, 0),
+        'page.pageid'    : (1, 'scalar_text', 0, 0),
+        'page.pagelabel' : (1, 'scalar_text', 0, 0),
+        'page.type'      : (1, 'scalar_text', 0, 0),
+        'page.h'         : (1, 'scalar_number', 0, 0),
+        'page.w'         : (1, 'scalar_number', 0, 0),
+        'page.startID' : (1, 'scalar_number', 0, 0),
+
+        'group'           : (1, 'snippets', 1, 0),
+        'group.type'      : (1, 'scalar_text', 0, 0),
+
+        'region'           : (1, 'snippets', 1, 0),
+        'region.type'      : (1, 'scalar_text', 0, 0),
+        'region.x'         : (1, 'scalar_number', 0, 0),
+        'region.y'         : (1, 'scalar_number', 0, 0),
+        'region.h'         : (1, 'scalar_number', 0, 0),
+        'region.w'         : (1, 'scalar_number', 0, 0),
+
+        'empty_text_region' : (1, 'snippets', 1, 0),
+
+        'img'           : (1, 'snippets', 1, 0),
+        'img.x'         : (1, 'scalar_number', 0, 0),
+        'img.y'         : (1, 'scalar_number', 0, 0),
+        'img.h'         : (1, 'scalar_number', 0, 0),
+        'img.w'         : (1, 'scalar_number', 0, 0),
+        'img.src'       : (1, 'scalar_number', 0, 0),
+        'img.color_src' : (1, 'scalar_number', 0, 0),
+
+        'paragraph'           : (1, 'snippets', 1, 0),
+        'paragraph.class'     : (1, 'scalar_text', 0, 0),
+        'paragraph.firstWord' : (1, 'scalar_number', 0, 0),
+        'paragraph.lastWord'  : (1, 'scalar_number', 0, 0),
+
+        'word_semantic'           : (1, 'snippets', 1, 1),
+        'word_semantic.type'      : (1, 'scalar_text', 0, 0),
+        'word_semantic.firstWord' : (1, 'scalar_number', 0, 0),
+        'word_semantic.lastWord'  : (1, 'scalar_number', 0, 0),
+
+        'word'            : (1, 'snippets', 1, 0),
+        'word.type'       : (1, 'scalar_text', 0, 0),
+        'word.class'      : (1, 'scalar_text', 0, 0),
+        'word.firstGlyph' : (1, 'scalar_number', 0, 0),
+        'word.lastGlyph'  : (1, 'scalar_number', 0, 0),
+
+        '_span'           : (1, 'snippets', 1, 0),
+        '_span.firstWord' : (1, 'scalar_number', 0, 0),
+        '-span.lastWord'  : (1, 'scalar_number', 0, 0),
+
+        'span'           : (1, 'snippets', 1, 0),
+        'span.firstWord' : (1, 'scalar_number', 0, 0),
+        'span.lastWord'  : (1, 'scalar_number', 0, 0),
+
+        'extratokens'            : (1, 'snippets', 1, 0),
+        'extratokens.type'       : (1, 'scalar_text', 0, 0),
+        'extratokens.firstGlyph' : (1, 'scalar_number', 0, 0),
+        'extratokens.lastGlyph'  : (1, 'scalar_number', 0, 0),
+
+        'glyph.h'      : (1, 'number', 0, 0),
+        'glyph.w'      : (1, 'number', 0, 0),
+        'glyph.use'    : (1, 'number', 0, 0),
+        'glyph.vtx'    : (1, 'number', 0, 1),
+        'glyph.len'    : (1, 'number', 0, 1),
+        'glyph.dpi'    : (1, 'number', 0, 0),
+        'vtx'          : (0, 'number', 1, 1),
+        'vtx.x'        : (1, 'number', 0, 0),
+        'vtx.y'        : (1, 'number', 0, 0),
+        'len'          : (0, 'number', 1, 1),
+        'len.n'        : (1, 'number', 0, 0),
+
+        'book'         : (1, 'snippets', 1, 0),
+        'version'      : (1, 'snippets', 1, 0),
+        'version.FlowEdit_1_id'            : (1, 'scalar_text', 0, 0),
+        'version.FlowEdit_1_version'       : (1, 'scalar_text', 0, 0),
+        'version.Schema_id'                : (1, 'scalar_text', 0, 0),
+        'version.Schema_version'           : (1, 'scalar_text', 0, 0),
+        'version.Topaz_version'            : (1, 'scalar_text', 0, 0),
+        'version.WordDetailEdit_1_id'      : (1, 'scalar_text', 0, 0),
+        'version.WordDetailEdit_1_version' : (1, 'scalar_text', 0, 0),
+        'version.ZoneEdit_1_id'            : (1, 'scalar_text', 0, 0),
+        'version.ZoneEdit_1_version'       : (1, 'scalar_text', 0, 0),
+        'version.chapterheaders'           : (1, 'scalar_text', 0, 0),
+        'version.creation_date'            : (1, 'scalar_text', 0, 0),
+        'version.header_footer'            : (1, 'scalar_text', 0, 0),
+        'version.init_from_ocr'            : (1, 'scalar_text', 0, 0),
+        'version.letter_insertion'         : (1, 'scalar_text', 0, 0),
+        'version.xmlinj_convert'           : (1, 'scalar_text', 0, 0),
+        'version.xmlinj_reflow'            : (1, 'scalar_text', 0, 0),
+        'version.xmlinj_transform'         : (1, 'scalar_text', 0, 0),
+        'version.findlists'                : (1, 'scalar_text', 0, 0),
+        'version.page_num'                 : (1, 'scalar_text', 0, 0),
+        'version.page_type'                : (1, 'scalar_text', 0, 0),
+        'version.bad_text'                 : (1, 'scalar_text', 0, 0),
+        'version.glyph_mismatch'           : (1, 'scalar_text', 0, 0),
+        'version.margins'                  : (1, 'scalar_text', 0, 0),
+        'version.staggered_lines'          : (1, 'scalar_text', 0, 0),
+        'version.paragraph_continuation'   : (1, 'scalar_text', 0, 0),
+        'version.toc'                      : (1, 'scalar_text', 0, 0),
+
+        'stylesheet'   : (1, 'snippets', 1, 0),
+        'style'              : (1, 'snippets', 1, 0),
+        'style._tag'         : (1, 'scalar_text', 0, 0),
+        'style.type'         : (1, 'scalar_text', 0, 0),
+        'style._parent_type' : (1, 'scalar_text', 0, 0),
+        'style.class'        : (1, 'scalar_text', 0, 0),
+        'style._after_class' : (1, 'scalar_text', 0, 0),
+        'rule'               : (1, 'snippets', 1, 0),
+        'rule.attr'          : (1, 'scalar_text', 0, 0),
+        'rule.value'         : (1, 'scalar_text', 0, 0),
+
+        'original'      : (0, 'number', 1, 1),
+        'original.pnum' : (1, 'number', 0, 0),
+        'original.pid'  : (1, 'text', 0, 0),
+        'pages'        : (0, 'number', 1, 1),
+        'pages.ref'    : (1, 'number', 0, 0),
+        'pages.id'     : (1, 'number', 0, 0),
+        'startID'      : (0, 'number', 1, 1),
+        'startID.page' : (1, 'number', 0, 0),
+        'startID.id'   : (1, 'number', 0, 0),
+
+     }
+
+
+    # full tag path record keeping routines
+    def tag_push(self, token):
+        self.tagpath.append(token)
+    def tag_pop(self):
+        if len(self.tagpath) > 0 :
+            self.tagpath.pop()
+    def tagpath_len(self):
+        return len(self.tagpath)
+    def get_tagpath(self, i):
+        cnt = len(self.tagpath)
+        if i < cnt : result = self.tagpath[i]
+        for j in xrange(i+1, cnt) :
+            result += '.' + self.tagpath[j]
+        return result
+            
+
+    # list of absolute command byte values values that indicate
+    # various types of loop meachanisms typically used to generate vectors
+
+    cmd_list = (0x76, 0x76)
+
+    # peek at and return 1 byte that is ahead by i bytes 
+    def peek(self, aheadi):
+        c = self.fo.read(aheadi)
+        if (len(c) == 0):
+            return None
+        self.fo.seek(-aheadi,1)
+        c = c[-1:]
+        return ord(c)
+
+
+    # get the next value from the file being processed
+    def getNext(self):
+        nbyte = self.peek(1);
+        if (nbyte == None):
+            return None
+        val = readEncodedNumber(self.fo)
+        return val
+
+
+    # format an arg by argtype
+    def formatArg(self, arg, argtype):
+        if (argtype == 'text') or (argtype == 'scalar_text') :
+            result = self.dict.lookup(arg)
+        elif (argtype == 'raw') or (argtype == 'number') or (argtype == 'scalar_number') :
+            result = arg
+        elif (argtype == 'snippets') :
+            result = arg
+        else :
+            print "Error Unknown argtype %s" % argtype
+            sys.exit(-2)
+        return result
+
+
+    # process the next tag token, recursively handling subtags, 
+    # arguments, and commands
+    def procToken(self, token):
+
+        known_token = False
+        self.tag_push(token)
+
+        if self.debug : print 'Processing: ', self.get_tagpath(0)
+        cnt = self.tagpath_len()
+        for j in xrange(cnt):
+            tkn = self.get_tagpath(j)
+            if tkn in self.token_tags :
+                num_args = self.token_tags[tkn][0]
+                argtype = self.token_tags[tkn][1]
+                subtags = self.token_tags[tkn][2]
+                splcase = self.token_tags[tkn][3]
+                ntags = -1
+                known_token = True
+                break
+
+        if known_token :
+
+            # handle subtags if present 
+            subtagres = []
+            if (splcase == 1):
+                # this type of tag uses of escape marker 0x74 indicate subtag count
+                if self.peek(1) == 0x74:
+                    skip = readEncodedNumber(self.fo)
+                    subtags = 1
+                    num_args = 0
+
+            if (subtags == 1): 
+                ntags = readEncodedNumber(self.fo)
+                if self.debug : print 'subtags: ' + token + ' has ' + str(ntags)
+                for j in xrange(ntags):
+                    val = readEncodedNumber(self.fo)
+                    subtagres.append(self.procToken(self.dict.lookup(val)))
+
+            # arguments can be scalars or vectors of text or numbers
+            argres = []
+            if num_args > 0 :
+                firstarg = self.peek(1)
+                if (firstarg in self.cmd_list) and (argtype != 'scalar_number') and (argtype != 'scalar_text'):
+                    # single argument is a variable length vector of data
+                    arg = readEncodedNumber(self.fo)
+                    argres = self.decodeCMD(arg,argtype)
+                else :
+                    # num_arg scalar arguments
+                    for i in xrange(num_args):
+                        argres.append(self.formatArg(readEncodedNumber(self.fo), argtype))
+
+            # build the return tag
+            result = []
+            tkn = self.get_tagpath(0)
+            result.append(tkn)
+            result.append(subtagres)
+            result.append(argtype)
+            result.append(argres)
+            self.tag_pop()
+            return result
+
+        # all tokens that need to be processed should be in the hash
+        # table if it may indicate a problem, either new token 
+        # or an out of sync condition
+        else:
+            result = []
+            if (self.debug):
+                print 'Unknown Token:', token
+            self.tag_pop()
+            return result
+
+
+    # special loop used to process code snippets
+    # it is NEVER used to format arguments.
+    # builds the snippetList
+    def doLoop72(self, argtype):
+        cnt = readEncodedNumber(self.fo)
+        if self.debug :
+            result = 'Set of '+ str(cnt) + ' xml snippets. The overall structure \n'
+            result += 'of the document is indicated by snippet number sets at the\n'
+            result += 'end of each snippet. \n'
+            print result
+        for i in xrange(cnt):
+            if self.debug: print 'Snippet:',str(i)
+            snippet = []
+            snippet.append(i)
+            val = readEncodedNumber(self.fo)
+            snippet.append(self.procToken(self.dict.lookup(val)))
+            self.snippetList.append(snippet)
+        return
+
+
+
+    # general loop code gracisouly submitted by "skindle" - thank you!
+    def doLoop76Mode(self, argtype, cnt, mode):
+        result = []
+        adj = 0
+        if mode & 1:
+            adj = readEncodedNumber(self.fo)
+        mode = mode >> 1
+        x = []
+        for i in xrange(cnt):
+            x.append(readEncodedNumber(self.fo) - adj)
+        for i in xrange(mode):
+            for j in xrange(1, cnt):
+                x[j] = x[j] + x[j - 1]
+        for i in xrange(cnt):
+            result.append(self.formatArg(x[i],argtype))
+        return result
+
+
+    # dispatches loop commands bytes with various modes
+    # The 0x76 style loops are used to build vectors
+
+    # This was all derived by trial and error and 
+    # new loop types may exist that are not handled here
+    # since they did not appear in the test cases
+
+    def decodeCMD(self, cmd, argtype):
+        if (cmd == 0x76):
+
+            # loop with cnt, and mode to control loop styles
+            cnt = readEncodedNumber(self.fo)
+            mode = readEncodedNumber(self.fo)
+
+            if self.debug : print 'Loop for', cnt, 'with  mode', mode,  ':  '
+            return self.doLoop76Mode(argtype, cnt, mode)
+
+        if self.dbug: print  "Unknown command", cmd
+        result = []
+        return result
+
+
+            
+    # add full tag path to injected snippets
+    def updateName(self, tag, prefix):
+        name = tag[0]
+        subtagList = tag[1]
+        argtype = tag[2]
+        argList = tag[3]
+        nname = prefix + '.' + name
+        nsubtaglist = []
+        for j in subtagList:
+            nsubtaglist.append(self.updateName(j,prefix))
+        ntag = []
+        ntag.append(nname)
+        ntag.append(nsubtaglist)
+        ntag.append(argtype)
+        ntag.append(argList)
+        return ntag
+
+
+
+    # perform depth first injection of specified snippets into this one
+    def injectSnippets(self, snippet):
+        snipno, tag = snippet
+        name = tag[0]
+        subtagList = tag[1]
+        argtype = tag[2]
+        argList = tag[3]
+        nsubtagList = []
+        if len(argList) > 0 : 
+            for j in argList:
+                asnip = self.snippetList[j]
+                aso, atag = self.injectSnippets(asnip)
+                atag = self.updateName(atag, name)
+                nsubtagList.append(atag)
+        argtype='number'
+        argList=[]
+        if len(nsubtagList) > 0 :
+            subtagList.extend(nsubtagList)
+        tag = []
+        tag.append(name)
+        tag.append(subtagList)
+        tag.append(argtype)
+        tag.append(argList)
+        snippet = []
+        snippet.append(snipno)
+        snippet.append(tag)
+        return snippet
+
+
+
+    # format the tag for output
+    def formatTag(self, node):
+        name = node[0]
+        subtagList = node[1]
+        argtype = node[2]
+        argList = node[3]
+        fullpathname = name.split('.')
+        nodename = fullpathname.pop()
+        ilvl = len(fullpathname)
+        indent = ' ' * (3 * ilvl)
+        result = indent + '<' + nodename + '>'
+        if len(argList) > 0:
+            argres = ''
+            for j in argList:
+                if (argtype == 'text') or (argtype == 'scalar_text') :
+                    argres += j + '|'
+                else :
+                    argres += str(j) + ','
+            argres = argres[0:-1]
+            if argtype == 'snippets' :
+                result += 'snippets:' + argres
+            else :
+                result += argres
+        if len(subtagList) > 0 :
+            result += '\n'
+            for j in subtagList:
+                if len(j) > 0 :
+                    result += self.formatTag(j)
+            result += indent + '</' + nodename + '>\n'
+        else:
+            result += '</' + nodename + '>\n'
+        return result
+
+
+   # flatten tag
+    def flattenTag(self, node):
+        name = node[0]
+        subtagList = node[1]
+        argtype = node[2]
+        argList = node[3]
+        result = name
+        if (len(argList) > 0):
+            argres = ''
+            for j in argList:
+                if (argtype == 'text') or (argtype == 'scalar_text') :
+                    argres += j + '|'
+                else :
+                    argres += str(j) + '|'
+            argres = argres[0:-1]
+            if argtype == 'snippets' :
+                result += '.snippets=' + argres
+            else :
+                result += '=' + argres
+        result += '\n'
+        for j in subtagList:
+            if len(j) > 0 :
+                result += self.flattenTag(j)
+        return result
+
+
+    # reduce create xml output
+    def formatDoc(self, flat_xml):
+        result = ''
+        for j in self.doc :
+            if len(j) > 0:
+                if flat_xml:
+                    result += self.flattenTag(j)
+                else:
+                    result += self.formatTag(j)
+        if self.debug : print result
+        return result
+
+
+
+    # main loop - parse the page.dat files
+    # to create structured document and snippets
+
+    # FIXME: value at end of magic appears to be a subtags count
+    # but for what?  For now, inject an 'info" tag as it is in
+    # every dictionary and seems close to what is meant
+    # The alternative is to special case the last _ "0x5f" to mean something
+
+    def process(self):
+
+        # peek at the first bytes to see what type of file it is
+        magic = self.fo.read(9)
+        if (magic[0:1] == 'p') and (magic[2:9] == 'marker_'):
+            first_token = 'info'
+        elif (magic[0:1] == 'p') and (magic[2:9] == '__PAGE_'):
+            skip = self.fo.read(2)
+            first_token = 'info'
+        elif (magic[0:1] == 'p') and (magic[2:8] == '_PAGE_'):
+            first_token = 'info'
+        elif (magic[0:1] == 'g') and (magic[2:9] == '__GLYPH'):
+            skip = self.fo.read(3)
+            first_token = 'info'
+        else :
+            # other0.dat file
+            first_token = None
+            self.fo.seek(-9,1)
+
+
+        # main loop to read and build the document tree
+        while True:
+
+            if first_token != None :
+                # use "inserted" first token 'info' for page and glyph files
+                tag = self.procToken(first_token)
+                if len(tag) > 0 :
+                    self.doc.append(tag)
+                first_token = None
+
+            v = self.getNext()
+            if (v == None): 
+                break
+
+            if (v == 0x72):
+                self.doLoop72('number')
+            elif (v > 0) and (v < self.dict.getSize()) :
+                tag = self.procToken(self.dict.lookup(v))
+                if len(tag) > 0 :
+                    self.doc.append(tag)
+            else:
+                if self.debug:
+                    print "Main Loop:  Unknown value: %x" % v 
+                if (v == 0):
+                    if (self.peek(1) == 0x5f):
+                        skip = self.fo.read(1)
+                        first_token = 'info'
+
+        # now do snippet injection
+        if len(self.snippetList) > 0 :
+            if self.debug : print 'Injecting Snippets:'
+            snippet = self.injectSnippets(self.snippetList[0])
+            snipno = snippet[0]
+            tag_add = snippet[1]
+            if self.debug : print self.formatTag(tag_add)
+            if len(tag_add) > 0:
+                self.doc.append(tag_add)
+
+        # handle generation of xml output
+        xmlpage = self.formatDoc(self.flat_xml)
+
+        return xmlpage
+
+
+def fromData(dict, fname):
+    flat_xml = True
+    debug = False
+    pp = PageParser(fname, dict, debug, flat_xml)
+    xmlpage = pp.process()
+    return xmlpage
+
+def getXML(dict, fname):
+    flat_xml = False
+    debug = False
+    pp = PageParser(fname, dict, debug, flat_xml)
+    xmlpage = pp.process()
+    return xmlpage
+
+def usage():
+    print 'Usage: '
+    print '    convert2xml.py dict0000.dat infile.dat '
+    print ' '
+    print ' Options:'
+    print '   -h            print this usage help message '
+    print '   -d            turn on debug output to check for potential errors '
+    print '   --flat-xml    output the flattened xml page description only '
+    print ' '
+    print '     This program will attempt to convert a page*.dat file or '
+    print ' glyphs*.dat file, using the dict0000.dat file, to its xml description. '
+    print ' '
+    print ' Use "cmbtc_dump.py" first to unencrypt, uncompress, and dump '
+    print ' the *.dat files from a Topaz format e-book.'
+
+#
+# Main
+#   
+
+def main(argv):
+    dictFile = ""
+    pageFile = ""
+    debug = False
+    flat_xml = False
+    printOutput = False
+    if len(argv) == 0:
+        printOutput = True
+        argv = sys.argv
+
+    try:
+        opts, args = getopt.getopt(argv[1:], "hd", ["flat-xml"])
+
+    except getopt.GetoptError, err:
+
+        # print help information and exit:
+        print str(err) # will print something like "option -a not recognized"
+        usage()
+        sys.exit(2)
+    
+    if len(opts) == 0 and len(args) == 0 :
+        usage()
+        sys.exit(2) 
+       
+    for o, a in opts:
+        if o =="-d":
+            debug=True
+        if o =="-h":
+            usage()
+            sys.exit(0)
+        if o =="--flat-xml":
+            flat_xml = True
+
+    dictFile, pageFile = args[0], args[1]
+
+    # read in the string table dictionary
+    dict = Dictionary(dictFile)
+    # dict.dumpDict()
+
+    # create a page parser
+    pp = PageParser(pageFile, dict, debug, flat_xml)
+
+    xmlpage = pp.process()
+
+    if printOutput:
+        print xmlpage
+        return 0
+
+    return xmlpage
+
+if __name__ == '__main__':
+    sys.exit(main(''))
--- a/Calibre_Plugins/K4MobiDeDRM_plugin/flatxml2html.py
+++ b/Calibre_Plugins/K4MobiDeDRM_plugin/flatxml2html.py
@@ -0,0 +1,706 @@
+#! /usr/bin/python
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+# For use with Topaz Scripts Version 2.6
+
+import sys
+import csv
+import os
+import math
+import getopt
+from struct import pack
+from struct import unpack
+
+
+class DocParser(object):
+    def __init__(self, flatxml, classlst, fileid, bookDir, gdict, fixedimage):
+        self.id = os.path.basename(fileid).replace('.dat','')
+        self.svgcount = 0
+        self.docList = flatxml.split('\n')
+        self.docSize = len(self.docList)
+        self.classList = {}
+        self.bookDir = bookDir
+        self.gdict = gdict
+        tmpList = classlst.split('\n')
+        for pclass in tmpList:
+            if pclass != '':
+                # remove the leading period from the css name
+                cname = pclass[1:]
+            self.classList[cname] = True
+        self.fixedimage = fixedimage
+        self.ocrtext = []
+        self.link_id = []
+        self.link_title = []
+        self.link_page = []
+        self.link_href = []
+        self.link_type = []
+        self.dehyphen_rootid = []
+        self.paracont_stemid = []
+        self.parastems_stemid = []
+
+
+    def getGlyph(self, gid):
+        result = ''
+        id='id="gl%d"' % gid
+        return self.gdict.lookup(id)
+
+    def glyphs_to_image(self, glyphList):
+
+        def extract(path, key):
+            b = path.find(key) + len(key)
+            e = path.find(' ',b)
+            return int(path[b:e])
+
+        svgDir = os.path.join(self.bookDir,'svg')
+
+        imgDir = os.path.join(self.bookDir,'img')
+        imgname = self.id + '_%04d.svg' % self.svgcount
+        imgfile = os.path.join(imgDir,imgname)
+
+        # get glyph information
+        gxList = self.getData('info.glyph.x',0,-1)
+        gyList = self.getData('info.glyph.y',0,-1)
+        gidList = self.getData('info.glyph.glyphID',0,-1)
+
+        gids = []
+        maxws = []
+        maxhs = []
+        xs = []
+        ys = []
+        gdefs = []
+
+        # get path defintions, positions, dimensions for ecah glyph 
+        # that makes up the image, and find min x and min y to reposition origin
+        minx = -1
+        miny = -1
+        for j in glyphList:
+            gid = gidList[j]
+            gids.append(gid)
+
+            xs.append(gxList[j])
+            if minx == -1: minx = gxList[j]
+            else : minx = min(minx, gxList[j])
+ 
+            ys.append(gyList[j])
+            if miny == -1: miny = gyList[j]
+            else : miny = min(miny, gyList[j])
+
+            path = self.getGlyph(gid)
+            gdefs.append(path)
+
+            maxws.append(extract(path,'width='))
+            maxhs.append(extract(path,'height='))
+
+
+        # change the origin to minx, miny and calc max height and width
+        maxw = maxws[0] + xs[0] - minx
+        maxh = maxhs[0] + ys[0] - miny
+        for j in xrange(0, len(xs)):
+            xs[j] = xs[j] - minx
+            ys[j] = ys[j] - miny
+            maxw = max( maxw, (maxws[j] + xs[j]) )
+            maxh = max( maxh, (maxhs[j] + ys[j]) )
+
+        # open the image file for output
+        ifile = open(imgfile,'w')
+        ifile.write('<?xml version="1.0" standalone="no"?>\n')
+        ifile.write('<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n')
+        ifile.write('<svg width="%dpx" height="%dpx" viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1">\n' % (math.floor(maxw/10), math.floor(maxh/10), maxw, maxh))
+        ifile.write('<defs>\n')
+        for j in xrange(0,len(gdefs)):
+            ifile.write(gdefs[j])
+        ifile.write('</defs>\n')
+        for j in xrange(0,len(gids)):
+            ifile.write('<use xlink:href="#gl%d" x="%d" y="%d" />\n' % (gids[j], xs[j], ys[j]))
+        ifile.write('</svg>')
+        ifile.close()
+
+        return 0
+
+
+
+    # return tag at line pos in document
+    def lineinDoc(self, pos) :
+        if (pos >= 0) and (pos < self.docSize) :
+            item = self.docList[pos]
+            if item.find('=') >= 0:
+                (name, argres) = item.split('=',1)
+            else : 
+                name = item
+                argres = ''
+        return name, argres
+
+        
+    # find tag in doc if within pos to end inclusive
+    def findinDoc(self, tagpath, pos, end) :
+        result = None
+        if end == -1 :
+            end = self.docSize
+        else:
+            end = min(self.docSize, end)
+        foundat = -1
+        for j in xrange(pos, end):
+            item = self.docList[j]
+            if item.find('=') >= 0:
+                (name, argres) = item.split('=',1)
+            else : 
+                name = item
+                argres = ''
+            if name.endswith(tagpath) : 
+                result = argres
+                foundat = j
+                break
+        return foundat, result
+
+
+    # return list of start positions for the tagpath
+    def posinDoc(self, tagpath):
+        startpos = []
+        pos = 0
+        res = ""
+        while res != None :
+            (foundpos, res) = self.findinDoc(tagpath, pos, -1)
+            if res != None :
+                startpos.append(foundpos)
+            pos = foundpos + 1
+        return startpos
+
+
+    # returns a vector of integers for the tagpath
+    def getData(self, tagpath, pos, end):
+        argres=[]
+        (foundat, argt) = self.findinDoc(tagpath, pos, end)
+        if (argt != None) and (len(argt) > 0) :
+            argList = argt.split('|')
+            argres = [ int(strval) for strval in argList]
+        return argres
+
+
+    # get the class
+    def getClass(self, pclass):
+        nclass = pclass
+
+        # class names are an issue given topaz may start them with numerals (not allowed),
+        # use a mix of cases (which cause some browsers problems), and actually
+        # attach numbers after "_reclustered*" to the end to deal classeses that inherit
+        # from a base class (but then not actually provide all of these _reclustereed 
+        # classes in the stylesheet!
+
+        # so we clean this up by lowercasing, prepend 'cl-', and getting any baseclass
+        # that exists in the stylesheet first, and then adding this specific class
+        # after
+        
+        # also some class names have spaces in them so need to convert to dashes
+        if nclass != None :
+            nclass = nclass.replace(' ','-')
+            classres = ''
+            nclass = nclass.lower()
+            nclass = 'cl-' + nclass
+            baseclass = ''
+            # graphic is the base class for captions
+            if nclass.find('cl-cap-') >=0 :
+                classres = 'graphic' + ' '
+            else :
+                # strip to find baseclass
+                p = nclass.find('_')
+                if p > 0 :
+                    baseclass = nclass[0:p]
+                    if baseclass in self.classList:
+                        classres += baseclass + ' '
+            classres += nclass
+            nclass = classres
+        return nclass
+
+
+    # develop a sorted description of the starting positions of 
+    # groups and regions on the page, as well as the page type
+    def PageDescription(self):
+
+        def compare(x, y):
+            (xtype, xval) = x
+            (ytype, yval) = y
+            if xval > yval:
+                return 1
+            if xval == yval:
+                return 0
+            return -1
+
+        result = []
+        (pos, pagetype) = self.findinDoc('page.type',0,-1)
+
+        groupList = self.posinDoc('page.group')
+        groupregionList = self.posinDoc('page.group.region')
+        pageregionList = self.posinDoc('page.region')
+        # integrate into one list
+        for j in groupList:
+            result.append(('grpbeg',j))
+        for j in groupregionList:
+            result.append(('gregion',j))
+        for j in pageregionList:
+            result.append(('pregion',j))
+        result.sort(compare)
+
+        # insert group end and page end indicators
+        inGroup = False
+        j = 0
+        while True:
+            if j == len(result): break
+            rtype = result[j][0]
+            rval = result[j][1]
+            if not inGroup and (rtype == 'grpbeg') :
+                inGroup = True
+                j = j + 1
+            elif inGroup and (rtype in ('grpbeg', 'pregion')):
+                result.insert(j,('grpend',rval))
+                inGroup = False
+            else:
+                j = j + 1
+        if inGroup:
+            result.append(('grpend',-1))
+        result.append(('pageend', -1))
+        return pagetype, result
+
+
+
+    # build a description of the paragraph
+    def getParaDescription(self, start, end, regtype):
+
+        result = []
+
+        # paragraph
+        (pos, pclass) = self.findinDoc('paragraph.class',start,end) 
+
+        pclass = self.getClass(pclass)
+
+        # build up a description of the paragraph in result and return it
+        # first check for the  basic - all words paragraph
+        (pos, sfirst) = self.findinDoc('paragraph.firstWord',start,end)
+        (pos, slast) = self.findinDoc('paragraph.lastWord',start,end)
+        if (sfirst != None) and (slast != None) :
+            first = int(sfirst)
+            last = int(slast)
+            
+            makeImage = (regtype == 'vertical') or (regtype == 'table')
+            if self.fixedimage:
+                makeImage = makeImage or (regtype == 'fixed')
+
+            if (pclass != None): 
+                makeImage = makeImage or (pclass.find('.inverted') >= 0)
+                if self.fixedimage :
+                    makeImage = makeImage or (pclass.find('cl-f-') >= 0)
+
+            if not makeImage :
+                # standard all word paragraph
+                for wordnum in xrange(first, last):
+                    result.append(('ocr', wordnum))
+                return pclass, result
+
+            # convert paragraph to svg image
+            # translate first and last word into first and last glyphs
+            # and generate inline image and include it
+            glyphList = []
+            firstglyphList = self.getData('word.firstGlyph',0,-1)
+            gidList = self.getData('info.glyph.glyphID',0,-1)
+            firstGlyph = firstglyphList[first]
+            if last < len(firstglyphList):
+                lastGlyph = firstglyphList[last]
+            else :
+                lastGlyph = len(gidList)
+            for glyphnum in xrange(firstGlyph, lastGlyph):
+                glyphList.append(glyphnum)
+            # include any extratokens if they exist
+            (pos, sfg) = self.findinDoc('extratokens.firstGlyph',start,end)
+            (pos, slg) = self.findinDoc('extratokens.lastGlyph',start,end)
+            if (sfg != None) and (slg != None):
+                for glyphnum in xrange(int(sfg), int(slg)):
+                    glyphList.append(glyphnum)
+            num = self.svgcount
+            self.glyphs_to_image(glyphList)
+            self.svgcount += 1
+            result.append(('svg', num))
+            return pclass, result
+
+        # this type of paragraph may be made up of multiple spans, inline 
+        # word monograms (images), and words with semantic meaning, 
+        # plus glyphs used to form starting letter of first word
+        
+        # need to parse this type line by line
+        line = start + 1
+        word_class = ''
+
+        # if end is -1 then we must search to end of document
+        if end == -1 :
+            end = self.docSize
+
+        # seems some xml has last* coming before first* so we have to 
+        # handle any order
+        sp_first = -1
+        sp_last = -1
+
+        gl_first = -1
+        gl_last = -1
+
+        ws_first = -1
+        ws_last = -1
+
+        word_class = ''
+
+        while (line < end) :
+
+            (name, argres) = self.lineinDoc(line)
+
+            if name.endswith('span.firstWord') :
+                sp_first = int(argres)
+
+            elif name.endswith('span.lastWord') :
+                sp_last = int(argres)
+
+            elif name.endswith('word.firstGlyph') :
+                gl_first = int(argres)
+
+            elif name.endswith('word.lastGlyph') :
+                gl_last = int(argres)
+
+            elif name.endswith('word_semantic.firstWord'):
+                ws_first = int(argres)
+
+            elif name.endswith('word_semantic.lastWord'):
+                ws_last = int(argres)
+
+            elif name.endswith('word.class'):
+               (cname, space) = argres.split('-',1)
+               if space == '' : space = '0'
+               if (cname == 'spaceafter') and (int(space) > 0) :
+                   word_class = 'sa'
+
+            elif name.endswith('word.img.src'):
+                result.append(('img' + word_class, int(argres)))
+                word_class = ''
+
+            elif name.endswith('region.img.src'):
+                result.append(('img' + word_class, int(argres)))
+
+            if (sp_first != -1) and (sp_last != -1):
+                for wordnum in xrange(sp_first, sp_last):
+                    result.append(('ocr', wordnum))
+                sp_first = -1
+                sp_last = -1
+
+            if (gl_first != -1) and (gl_last != -1):
+                glyphList = []
+                for glyphnum in xrange(gl_first, gl_last):
+                    glyphList.append(glyphnum)
+                num = self.svgcount
+                self.glyphs_to_image(glyphList)
+                self.svgcount += 1
+                result.append(('svg', num))
+                gl_first = -1
+                gl_last = -1
+
+            if (ws_first != -1) and (ws_last != -1):
+                for wordnum in xrange(ws_first, ws_last):
+                    result.append(('ocr', wordnum))
+                ws_first = -1
+                ws_last = -1
+                              
+            line += 1
+
+        return pclass, result
+                            
+
+    def buildParagraph(self, pclass, pdesc, type, regtype) :
+        parares = ''
+        sep =''
+
+        classres = ''
+        if pclass :
+            classres = ' class="' + pclass + '"'
+
+        br_lb = (regtype == 'fixed') or (regtype == 'chapterheading') or (regtype == 'vertical')
+
+        handle_links = len(self.link_id) > 0
+        
+        if (type == 'full') or (type == 'begin') :
+            parares += '<p' + classres + '>'
+
+        if (type == 'end'):
+            parares += ' '
+
+        lstart = len(parares)
+
+        cnt = len(pdesc)
+
+        for j in xrange( 0, cnt) :
+
+            (wtype, num) = pdesc[j]
+
+            if wtype == 'ocr' :
+                word = self.ocrtext[num]
+                sep = ' '
+
+                if handle_links:
+                    link = self.link_id[num]
+                    if (link > 0):
+                        linktype = self.link_type[link-1]
+                        title = self.link_title[link-1]
+                        if (title == "") or (parares.rfind(title) < 0):
+                            title=parares[lstart:]
+                        if linktype == 'external' :
+                            linkhref = self.link_href[link-1]
+                            linkhtml = '<a href="%s">' % linkhref
+                        else : 
+                            if len(self.link_page) >= link :
+                                ptarget = self.link_page[link-1] - 1
+                                linkhtml = '<a href="#page%04d">' % ptarget
+                            else :
+                                # just link to the current page
+                                linkhtml = '<a href="#' + self.id + '">'
+                        linkhtml += title + '</a>'
+                        pos = parares.rfind(title)
+                        if pos >= 0:
+                            parares = parares[0:pos] + linkhtml + parares[pos+len(title):]
+                        else :
+                            parares += linkhtml
+                        lstart = len(parares)
+                        if word == '_link_' : word = ''
+                    elif (link < 0) :
+                        if word == '_link_' : word = ''
+
+                if word == '_lb_':
+                    if ((num-1) in self.dehyphen_rootid ) or handle_links:
+                        word = ''
+                        sep = ''
+                    elif br_lb :
+                        word = '<br />\n'
+                        sep = ''
+                    else :
+                        word = '\n'
+                        sep = ''
+
+                if num in self.dehyphen_rootid :
+                    word = word[0:-1]
+                    sep = ''
+
+                parares += word + sep
+
+            elif wtype == 'img' :
+                sep = ''
+                parares += '<img src="img/img%04d.jpg" alt="" />' % num
+                parares += sep
+
+            elif wtype == 'imgsa' :
+                sep = ' '
+                parares += '<img src="img/img%04d.jpg" alt="" />' % num
+                parares += sep
+
+            elif wtype == 'svg' :
+                sep = ''
+                parares += '<img src="img/' + self.id + '_%04d.svg" alt="" />' % num 
+                parares += sep
+
+        if len(sep) > 0 : parares = parares[0:-1]
+        if (type == 'full') or (type == 'end') :
+            parares += '</p>'
+        return parares
+
+
+    
+    # walk the document tree collecting the information needed
+    # to build an html page using the ocrText
+
+    def process(self):
+
+        htmlpage = ''
+
+        # get the ocr text
+        (pos, argres) = self.findinDoc('info.word.ocrText',0,-1)
+        if argres :  self.ocrtext = argres.split('|')
+
+        # get information to dehyphenate the text
+        self.dehyphen_rootid = self.getData('info.dehyphen.rootID',0,-1)
+
+        # determine if first paragraph is continued from previous page
+        (pos, self.parastems_stemid) = self.findinDoc('info.paraStems.stemID',0,-1)
+        first_para_continued = (self.parastems_stemid  != None) 
+        
+        # determine if last paragraph is continued onto the next page
+        (pos, self.paracont_stemid) = self.findinDoc('info.paraCont.stemID',0,-1)
+        last_para_continued = (self.paracont_stemid != None)
+
+        # collect link ids
+        self.link_id = self.getData('info.word.link_id',0,-1)
+
+        # collect link destination page numbers
+        self.link_page = self.getData('info.links.page',0,-1)
+
+        # collect link types (container versus external)
+        (pos, argres) = self.findinDoc('info.links.type',0,-1)
+        if argres :  self.link_type = argres.split('|')
+
+        # collect link destinations
+        (pos, argres) = self.findinDoc('info.links.href',0,-1)
+        if argres :  self.link_href = argres.split('|')
+
+        # collect link titles
+        (pos, argres) = self.findinDoc('info.links.title',0,-1)
+        if argres :
+            self.link_title = argres.split('|')
+        else:
+            self.link_title.append('')
+
+        # get a descriptions of the starting points of the regions
+        # and groups on the page
+        (pagetype, pageDesc) = self.PageDescription() 
+        regcnt = len(pageDesc) - 1
+
+        anchorSet = False
+        breakSet = False
+        inGroup = False
+        
+        # process each region on the page and convert what you can to html
+
+        for j in xrange(regcnt):
+
+            (etype, start) = pageDesc[j]
+            (ntype, end) = pageDesc[j+1]
+            
+
+            # set anchor for link target on this page
+            if not anchorSet and not first_para_continued:
+                htmlpage += '<div style="visibility: hidden; height: 0; width: 0;" id="' 
+                htmlpage += self.id + '" title="pagetype_' + pagetype + '"></div>\n'
+                anchorSet = True
+
+            # handle groups of graphics with text captions
+            if (etype == 'grpbeg'):
+                (pos, grptype) = self.findinDoc('group.type', start, end)
+                if grptype != None:
+                    if grptype == 'graphic':
+                        gcstr = ' class="' + grptype + '"'
+                        htmlpage += '<div' + gcstr + '>'
+                        inGroup = True
+                
+            elif (etype == 'grpend'):
+                if inGroup:
+                    htmlpage += '</div>\n'
+                    inGroup = False
+
+            else:
+                (pos, regtype) = self.findinDoc('region.type',start,end)
+
+                if regtype == 'graphic' :
+                    (pos, simgsrc) = self.findinDoc('img.src',start,end)
+                    if simgsrc:
+                        if inGroup:
+                            htmlpage += '<img src="img/img%04d.jpg" alt="" />' % int(simgsrc)
+                        else:
+                            htmlpage += '<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc)
+            
+                elif regtype == 'chapterheading' :
+                    (pclass, pdesc) = self.getParaDescription(start,end, regtype)
+                    if not breakSet:
+                        htmlpage += '<div style="page-break-after: always;">&nbsp;</div>\n'
+                        breakSet = True
+                    tag = 'h1'
+                    if pclass and (len(pclass) >= 7):
+                        if pclass[3:7] == 'ch1-' : tag = 'h1'
+                        if pclass[3:7] == 'ch2-' : tag = 'h2'
+                        if pclass[3:7] == 'ch3-' : tag = 'h3'
+                        htmlpage += '<' + tag + ' class="' + pclass + '">'
+                    else:
+                        htmlpage += '<' + tag + '>'
+                    htmlpage += self.buildParagraph(pclass, pdesc, 'middle', regtype)
+                    htmlpage += '</' + tag + '>'
+
+                elif (regtype == 'text') or (regtype == 'fixed') or (regtype == 'insert') or (regtype == 'listitem'):
+                    ptype = 'full'
+                    # check to see if this is a continution from the previous page
+                    if first_para_continued :
+                        ptype = 'end'
+                        first_para_continued = False
+                    (pclass, pdesc) = self.getParaDescription(start,end, regtype)
+                    if pclass and (len(pclass) >= 6) and (ptype == 'full'):
+                        tag = 'p'
+                        if pclass[3:6] == 'h1-' : tag = 'h4'
+                        if pclass[3:6] == 'h2-' : tag = 'h5'
+                        if pclass[3:6] == 'h3-' : tag = 'h6'
+                        htmlpage += '<' + tag + ' class="' + pclass + '">'
+                        htmlpage += self.buildParagraph(pclass, pdesc, 'middle', regtype)
+                        htmlpage += '</' + tag + '>'
+                    else :
+                        htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
+
+                elif (regtype == 'tocentry') :
+                    ptype = 'full'
+                    if first_para_continued :
+                        ptype = 'end'
+                        first_para_continued = False
+                    (pclass, pdesc) = self.getParaDescription(start,end, regtype)
+                    htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
+
+
+                elif (regtype == 'vertical') or (regtype == 'table') :
+                    ptype = 'full'
+                    if inGroup:
+                        ptype = 'middle'
+                    if first_para_continued :
+                        ptype = 'end'
+                        first_para_continued = False
+                    (pclass, pdesc) = self.getParaDescription(start, end, regtype)
+                    htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
+
+
+                elif (regtype == 'synth_fcvr.center'):
+                    (pos, simgsrc) = self.findinDoc('img.src',start,end)
+                    if simgsrc:
+                        htmlpage += '<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc)
+
+                else :
+                    print '          Making region type', regtype,
+                    (pos, temp) = self.findinDoc('paragraph',start,end)
+                    (pos2, temp) = self.findinDoc('span',start,end)
+                    if pos != -1 or pos2 != -1:
+                        print ' a "text" region'
+                        orig_regtype = regtype
+                        regtype = 'fixed'
+                        ptype = 'full'
+                        # check to see if this is a continution from the previous page
+                        if first_para_continued :
+                            ptype = 'end'
+                            first_para_continued = False
+                        (pclass, pdesc) = self.getParaDescription(start,end, regtype)
+                        if not pclass:
+                            if orig_regtype.endswith('.right')     : pclass = 'cl-right'
+                            elif orig_regtype.endswith('.center')  : pclass = 'cl-center'
+                            elif orig_regtype.endswith('.left')    : pclass = 'cl-left'
+                            elif orig_regtype.endswith('.justify') : pclass = 'cl-justify'
+                        if pclass and (ptype == 'full') and (len(pclass) >= 6):
+                            tag = 'p'
+                            if pclass[3:6] == 'h1-' : tag = 'h4'
+                            if pclass[3:6] == 'h2-' : tag = 'h5'
+                            if pclass[3:6] == 'h3-' : tag = 'h6'
+                            htmlpage += '<' + tag + ' class="' + pclass + '">'
+                            htmlpage += self.buildParagraph(pclass, pdesc, 'middle', regtype)
+                            htmlpage += '</' + tag + '>'
+                        else :
+                            htmlpage += self.buildParagraph(pclass, pdesc, ptype, regtype)
+                    else :
+                        print ' a "graphic" region'
+                        (pos, simgsrc) = self.findinDoc('img.src',start,end)
+                        if simgsrc:
+                            htmlpage += '<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc)
+
+
+        if last_para_continued :
+            if htmlpage[-4:] == '</p>':
+                htmlpage = htmlpage[0:-4]
+            last_para_continued = False
+
+        return htmlpage
+
+
+
+def convert2HTML(flatxml, classlst, fileid, bookDir, gdict, fixedimage):
+    # create a document parser
+    dp = DocParser(flatxml, classlst, fileid, bookDir, gdict, fixedimage)
+    htmlpage = dp.process()
+    return htmlpage
--- a/Calibre_Plugins/K4MobiDeDRM_plugin/flatxml2svg.py
+++ b/Calibre_Plugins/K4MobiDeDRM_plugin/flatxml2svg.py
@@ -0,0 +1,151 @@
+#! /usr/bin/python
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+
+import sys
+import csv
+import os
+import getopt
+from struct import pack
+from struct import unpack
+
+
+class PParser(object):
+    def __init__(self, gd, flatxml):
+        self.gd = gd
+        self.flatdoc = flatxml.split('\n')
+        self.temp = []
+        foo = self.getData('page.h') or self.getData('book.h')
+        self.ph = foo[0]
+        foo = self.getData('page.w') or self.getData('book.w')
+        self.pw = foo[0]
+        self.gx = self.getData('info.glyph.x')
+        self.gy = self.getData('info.glyph.y')
+        self.gid = self.getData('info.glyph.glyphID')
+    def getData(self, path):
+        result = None
+        cnt = len(self.flatdoc)
+        for j in xrange(cnt):
+            item = self.flatdoc[j]
+            if item.find('=') >= 0:
+                (name, argt) = item.split('=')
+                argres = argt.split('|')
+            else:
+                name = item
+                argres = []
+            if (name.endswith(path)):
+                result = argres
+                break
+        if (len(argres) > 0) :
+            for j in xrange(0,len(argres)):
+                argres[j] = int(argres[j])
+        return result
+    def getDataTemp(self, path):
+        result = None
+        cnt = len(self.temp)
+        for j in xrange(cnt):
+            item = self.temp[j]
+            if item.find('=') >= 0:
+                (name, argt) = item.split('=')
+                argres = argt.split('|')
+            else:
+                name = item
+                argres = []
+            if (name.endswith(path)):
+                result = argres
+                self.temp.pop(j)
+                break
+        if (len(argres) > 0) :
+            for j in xrange(0,len(argres)):
+                argres[j] = int(argres[j])
+        return result
+    def getImages(self):
+        result = []
+        self.temp = self.flatdoc
+        while (self.getDataTemp('img') != None):
+            h = self.getDataTemp('img.h')[0]
+            w = self.getDataTemp('img.w')[0]
+            x = self.getDataTemp('img.x')[0]
+            y = self.getDataTemp('img.y')[0]
+            src = self.getDataTemp('img.src')[0]
+            result.append('<image xlink:href="../img/img%04d.jpg" x="%d" y="%d" width="%d" height="%d" />\n' % (src, x, y, w, h))
+        return result
+    def getGlyphs(self):
+        result = []
+        if (self.gid != None) and (len(self.gid) > 0):
+            glyphs = []
+            for j in set(self.gid):
+                glyphs.append(j)
+            glyphs.sort()
+            for gid in glyphs:
+                id='id="gl%d"' % gid
+                path = self.gd.lookup(id)
+                if path:
+                    result.append(id + ' ' + path)
+        return result
+
+
+def convert2SVG(gdict, flat_xml, counter, numfiles, svgDir, raw, meta_array, scaledpi):
+    ml = ''
+    pp = PParser(gdict, flat_xml)
+    ml += '<?xml version="1.0" standalone="no"?>\n'
+    if (raw):
+        ml += '<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n'
+        ml += '<svg width="%fin" height="%fin" viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1">\n' % (pp.pw / scaledpi, pp.ph / scaledpi, pp.pw -1, pp.ph -1)
+        ml += '<title>Page %d - %s by %s</title>\n' % (counter, meta_array['Title'],meta_array['Authors'])
+    else:
+        ml += '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n'
+        ml += '<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" ><head>\n'
+        ml += '<title>Page %d - %s by %s</title>\n' % (counter, meta_array['Title'],meta_array['Authors'])
+        ml += '<script><![CDATA[\n'
+        ml += 'function gd(){var p=window.location.href.replace(/^.*\?dpi=(\d+).*$/i,"$1");return p;}\n'
+        ml += 'var dpi=%d;\n' % scaledpi
+        if (counter) :
+            ml += 'var prevpage="page%04d.xhtml";\n' % (counter - 1)
+        if (counter < numfiles-1) :
+            ml += 'var nextpage="page%04d.xhtml";\n' % (counter + 1)
+        ml += 'var pw=%d;var ph=%d;' % (pp.pw, pp.ph)
+        ml += 'function zoomin(){dpi=dpi*(0.8);setsize();}\n'
+        ml += 'function zoomout(){dpi=dpi*1.25;setsize();}\n'
+        ml += 'function setsize(){var svg=document.getElementById("svgimg");var prev=document.getElementById("prevsvg");var next=document.getElementById("nextsvg");var width=(pw/dpi)+"in";var height=(ph/dpi)+"in";svg.setAttribute("width",width);svg.setAttribute("height",height);prev.setAttribute("height",height);prev.setAttribute("width","50px");next.setAttribute("height",height);next.setAttribute("width","50px");}\n'
+        ml += 'function ppage(){window.location.href=prevpage+"?dpi="+Math.round(dpi);}\n'
+        ml += 'function npage(){window.location.href=nextpage+"?dpi="+Math.round(dpi);}\n'
+        ml += 'var gt=gd();if(gt>0){dpi=gt;}\n'
+        ml += 'window.onload=setsize;\n'
+        ml += ']]></script>\n'
+        ml += '</head>\n'
+        ml += '<body onLoad="setsize();" style="background-color:#777;text-align:center;">\n'
+        ml += '<div style="white-space:nowrap;">\n'
+        if (counter == 0) :
+            ml += '<a href="javascript:ppage();"><svg id="prevsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"></svg></a>\n'
+        else:
+            ml += '<a href="javascript:ppage();"><svg id="prevsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"><polygon points="5,150,95,5,95,295" fill="#AAAAAA" /></svg></a>\n'
+        ml += '<a href="javascript:npage();"><svg id="svgimg" viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" style="background-color:#FFF;border:1px solid black;">' % (pp.pw, pp.ph)
+    if (pp.gid != None): 
+        ml += '<defs>\n'
+        gdefs = pp.getGlyphs()
+        for j in xrange(0,len(gdefs)):
+            ml += gdefs[j]
+        ml += '</defs>\n'
+    img = pp.getImages()
+    if (img != None):
+        for j in xrange(0,len(img)):
+            ml += img[j]
+    if (pp.gid != None): 
+        for j in xrange(0,len(pp.gid)):
+            ml += '<use xlink:href="#gl%d" x="%d" y="%d" />\n' % (pp.gid[j], pp.gx[j], pp.gy[j])
+    if (img == None or len(img) == 0) and (pp.gid == None or len(pp.gid) == 0):
+        ml += '<text x="10" y="10" font-family="Helvetica" font-size="100" stroke="black">This page intentionally left blank.</text>\n<text x="10" y="110" font-family="Helvetica" font-size="50" stroke="black">Until this notice unintentionally gave it content.  (gensvg.py)</text>\n'
+    if (raw) :
+        ml += '</svg>'
+    else :
+        ml += '</svg></a>\n'
+        if (counter == numfiles - 1) :
+            ml += '<a href="javascript:npage();"><svg id="nextsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"></svg></a>\n'
+        else :
+            ml += '<a href="javascript:npage();"><svg id="nextsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"><polygon points="5,5,5,295,95,150" fill="#AAAAAA" /></svg></a>\n'
+        ml += '</div>\n'
+        ml += '<div><a href="javascript:zoomin();">zoom in</a> - <a href="javascript:zoomout();">zoom out</a></div>\n'
+        ml += '</body>\n'
+        ml += '</html>\n'
+    return ml
+
--- a/Calibre_Plugins/K4MobiDeDRM_plugin/genbook.py
+++ b/Calibre_Plugins/K4MobiDeDRM_plugin/genbook.py
@@ -0,0 +1,561 @@
+#! /usr/bin/python
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+
+class Unbuffered:
+    def __init__(self, stream):
+        self.stream = stream
+    def write(self, data):
+        self.stream.write(data)
+        self.stream.flush()
+    def __getattr__(self, attr):
+        return getattr(self.stream, attr)
+
+import sys
+sys.stdout=Unbuffered(sys.stdout)
+
+import csv
+import os
+import getopt
+from struct import pack
+from struct import unpack
+
+
+# local support routines
+import convert2xml
+import flatxml2html
+import flatxml2svg
+import stylexml2css
+
+
+# Get a 7 bit encoded number from a file
+def readEncodedNumber(file):
+    flag = False
+    c = file.read(1)
+    if (len(c) == 0):
+        return None
+    data = ord(c)    
+    if data == 0xFF:
+       flag = True
+       c = file.read(1)
+       if (len(c) == 0):
+           return None
+       data = ord(c)       
+    if data >= 0x80:
+        datax = (data & 0x7F)
+        while data >= 0x80 :
+            c = file.read(1)
+            if (len(c) == 0): 
+                return None
+            data = ord(c)
+            datax = (datax <<7) + (data & 0x7F)
+        data = datax 
+    if flag:
+       data = -data
+    return data
+
+# Get a length prefixed string from the file 
+def lengthPrefixString(data):
+    return encodeNumber(len(data))+data
+
+def readString(file):
+    stringLength = readEncodedNumber(file)
+    if (stringLength == None):
+        return None
+    sv = file.read(stringLength)
+    if (len(sv)  != stringLength):
+        return ""
+    return unpack(str(stringLength)+"s",sv)[0]  
+
+def getMetaArray(metaFile):
+    # parse the meta file
+    result = {}
+    fo = file(metaFile,'rb')
+    size = readEncodedNumber(fo)
+    for i in xrange(size):
+        tag = readString(fo)
+        value = readString(fo)
+        result[tag] = value
+        # print tag, value
+    fo.close()
+    return result
+
+
+# dictionary of all text strings by index value
+class Dictionary(object):
+    def __init__(self, dictFile):
+        self.filename = dictFile
+        self.size = 0
+        self.fo = file(dictFile,'rb')
+        self.stable = []
+        self.size = readEncodedNumber(self.fo)
+        for i in xrange(self.size):
+            self.stable.append(self.escapestr(readString(self.fo)))
+        self.pos = 0
+    def escapestr(self, str):
+        str = str.replace('&','&amp;')
+        str = str.replace('<','&lt;')
+        str = str.replace('>','&gt;')
+        str = str.replace('=','&#61;')
+        return str
+    def lookup(self,val):
+        if ((val >= 0) and (val < self.size)) :
+            self.pos = val
+            return self.stable[self.pos]
+        else:
+            print "Error - %d outside of string table limits" % val
+            sys.exit(-1)
+    def getSize(self):
+        return self.size
+    def getPos(self):
+        return self.pos
+
+
+class PageDimParser(object):
+    def __init__(self, flatxml):
+        self.flatdoc = flatxml.split('\n')
+    # find tag if within pos to end inclusive
+    def findinDoc(self, tagpath, pos, end) :
+        result = None
+        docList = self.flatdoc
+        cnt = len(docList)
+        if end == -1 :
+            end = cnt
+        else:
+            end = min(cnt,end)
+        foundat = -1
+        for j in xrange(pos, end):
+            item = docList[j]
+            if item.find('=') >= 0:
+                (name, argres) = item.split('=')
+            else : 
+                name = item
+                argres = ''
+            if name.endswith(tagpath) : 
+                result = argres
+                foundat = j
+                break
+        return foundat, result
+    def process(self):
+        (pos, sph) = self.findinDoc('page.h',0,-1)
+        (pos, spw) = self.findinDoc('page.w',0,-1)
+        if (sph == None): sph = '-1'
+        if (spw == None): spw = '-1'
+        return sph, spw
+
+def getPageDim(flatxml):
+    # create a document parser
+    dp = PageDimParser(flatxml)
+    (ph, pw) = dp.process()
+    return ph, pw
+
+class GParser(object):
+    def __init__(self, flatxml):
+        self.flatdoc = flatxml.split('\n')
+        self.dpi = 1440
+        self.gh = self.getData('info.glyph.h')
+        self.gw = self.getData('info.glyph.w')
+        self.guse = self.getData('info.glyph.use')
+        if self.guse :
+            self.count = len(self.guse)
+        else :
+            self.count = 0
+        self.gvtx = self.getData('info.glyph.vtx')
+        self.glen = self.getData('info.glyph.len')
+        self.gdpi = self.getData('info.glyph.dpi')
+        self.vx = self.getData('info.vtx.x')
+        self.vy = self.getData('info.vtx.y')
+        self.vlen = self.getData('info.len.n')
+        if self.vlen :
+            self.glen.append(len(self.vlen))
+        elif self.glen:
+            self.glen.append(0)
+        if self.vx :
+            self.gvtx.append(len(self.vx))
+        elif self.gvtx :
+            self.gvtx.append(0)
+    def getData(self, path):
+        result = None
+        cnt = len(self.flatdoc)
+        for j in xrange(cnt):
+            item = self.flatdoc[j]
+            if item.find('=') >= 0:
+                (name, argt) = item.split('=')
+                argres = argt.split('|')
+            else:
+                name = item
+                argres = []
+            if (name == path):
+                result = argres
+                break
+        if (len(argres) > 0) :
+            for j in xrange(0,len(argres)):
+                argres[j] = int(argres[j])
+        return result
+    def getGlyphDim(self, gly):
+        maxh = (self.gh[gly] * self.dpi) / self.gdpi[gly]
+        maxw = (self.gw[gly] * self.dpi) / self.gdpi[gly]
+        return maxh, maxw
+    def getPath(self, gly):
+        path = ''
+        if (gly < 0) or (gly >= self.count):
+            return path
+        tx = self.vx[self.gvtx[gly]:self.gvtx[gly+1]]
+        ty = self.vy[self.gvtx[gly]:self.gvtx[gly+1]]
+        p = 0
+        for k in xrange(self.glen[gly], self.glen[gly+1]):
+            if (p == 0):
+                zx = tx[0:self.vlen[k]+1]
+                zy = ty[0:self.vlen[k]+1]
+            else:
+                zx = tx[self.vlen[k-1]+1:self.vlen[k]+1]
+                zy = ty[self.vlen[k-1]+1:self.vlen[k]+1]
+            p += 1
+            j = 0
+            while ( j  < len(zx) ):
+                if (j == 0):
+                    # Start Position.
+                    path += 'M %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly])
+                elif (j <= len(zx)-3):
+                    # Cubic Bezier Curve
+                    path += 'C %d %d %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[j+1] * self.dpi / self.gdpi[gly], zy[j+1] * self.dpi / self.gdpi[gly], zx[j+2] * self.dpi / self.gdpi[gly], zy[j+2] * self.dpi / self.gdpi[gly])
+                    j += 2
+                elif (j == len(zx)-2):
+                    # Cubic Bezier Curve to Start Position
+                    path += 'C %d %d %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[j+1] * self.dpi / self.gdpi[gly], zy[j+1] * self.dpi / self.gdpi[gly], zx[0] * self.dpi / self.gdpi[gly], zy[0] * self.dpi / self.gdpi[gly])
+                    j += 1
+                elif (j == len(zx)-1):
+                    # Quadratic Bezier Curve to Start Position
+                    path += 'Q %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[0] * self.dpi / self.gdpi[gly], zy[0] * self.dpi / self.gdpi[gly])
+
+                j += 1
+        path += 'z'
+        return path
+
+
+
+# dictionary of all text strings by index value
+class GlyphDict(object):
+    def __init__(self):
+        self.gdict = {}
+    def lookup(self, id):
+        # id='id="gl%d"' % val
+        if id in self.gdict:
+            return self.gdict[id]
+        return None
+    def addGlyph(self, val, path):
+        id='id="gl%d"' % val
+        self.gdict[id] = path
+
+
+def generateBook(bookDir, raw, fixedimage):
+    # sanity check Topaz file extraction
+    if not os.path.exists(bookDir) :
+        print "Can not find directory with unencrypted book"
+        return 1
+
+    dictFile = os.path.join(bookDir,'dict0000.dat')
+    if not os.path.exists(dictFile) :
+        print "Can not find dict0000.dat file"
+        return 1
+
+    pageDir = os.path.join(bookDir,'page')
+    if not os.path.exists(pageDir) :
+        print "Can not find page directory in unencrypted book"
+        return 1
+
+    imgDir = os.path.join(bookDir,'img')
+    if not os.path.exists(imgDir) :
+        print "Can not find image directory in unencrypted book"
+        return 1
+
+    glyphsDir = os.path.join(bookDir,'glyphs')
+    if not os.path.exists(glyphsDir) :
+        print "Can not find glyphs directory in unencrypted book"
+        return 1
+
+    metaFile = os.path.join(bookDir,'metadata0000.dat')
+    if not os.path.exists(metaFile) :
+        print "Can not find metadata0000.dat in unencrypted book"
+        return 1
+
+    svgDir = os.path.join(bookDir,'svg')
+    if not os.path.exists(svgDir) :
+        os.makedirs(svgDir)
+
+    xmlDir = os.path.join(bookDir,'xml')
+    if not os.path.exists(xmlDir) :
+        os.makedirs(xmlDir)
+
+    otherFile = os.path.join(bookDir,'other0000.dat')
+    if not os.path.exists(otherFile) :
+        print "Can not find other0000.dat in unencrypted book"
+        return 1
+
+    print "Updating to color images if available"
+    spath = os.path.join(bookDir,'color_img')
+    dpath = os.path.join(bookDir,'img')
+    filenames = os.listdir(spath)
+    filenames = sorted(filenames)
+    for filename in filenames:
+        imgname = filename.replace('color','img')
+        sfile = os.path.join(spath,filename)
+        dfile = os.path.join(dpath,imgname)
+        imgdata = file(sfile,'rb').read()
+        file(dfile,'wb').write(imgdata)
+
+    print "Creating cover.jpg"
+    isCover = False
+    cpath = os.path.join(bookDir,'img')
+    cpath = os.path.join(cpath,'img0000.jpg')
+    if os.path.isfile(cpath):
+        cover = file(cpath, 'rb').read()
+        cpath = os.path.join(bookDir,'cover.jpg')
+        file(cpath, 'wb').write(cover)
+        isCover = True
+
+
+    print 'Processing Dictionary'
+    dict = Dictionary(dictFile)
+
+    print 'Processing Meta Data and creating OPF'
+    meta_array = getMetaArray(metaFile)
+
+    xname = os.path.join(xmlDir, 'metadata.xml')
+    metastr = ''
+    for key in meta_array:
+        metastr += '<meta name="' + key + '" content="' + meta_array[key] + '" />\n'
+    file(xname, 'wb').write(metastr)
+
+    print 'Processing StyleSheet'
+    # get some scaling info from metadata to use while processing styles
+    fontsize = '135'
+    if 'fontSize' in meta_array:
+        fontsize = meta_array['fontSize']
+
+    # also get the size of a normal text page
+    spage = '1'
+    if 'firstTextPage' in meta_array:
+        spage = meta_array['firstTextPage']
+    pnum = int(spage)
+
+    # get page height and width from first text page for use in stylesheet scaling
+    pname = 'page%04d.dat' % (pnum + 1)
+    fname = os.path.join(pageDir,pname)
+    flat_xml = convert2xml.fromData(dict, fname)
+
+    (ph, pw) = getPageDim(flat_xml)
+    if (ph == '-1') or (ph == '0') : ph = '11000'
+    if (pw == '-1') or (pw == '0') : pw = '8500'
+
+    # print '     ', 'other0000.dat'
+    xname = os.path.join(bookDir, 'style.css')
+    flat_xml = convert2xml.fromData(dict, otherFile)
+    cssstr , classlst = stylexml2css.convert2CSS(flat_xml, fontsize, ph, pw)
+    file(xname, 'wb').write(cssstr)
+    xname = os.path.join(xmlDir, 'other0000.xml')
+    file(xname, 'wb').write(convert2xml.getXML(dict, otherFile))
+
+    print 'Processing Glyphs'
+    gd = GlyphDict()
+    filenames = os.listdir(glyphsDir)
+    filenames = sorted(filenames)
+    glyfname = os.path.join(svgDir,'glyphs.svg')
+    glyfile = open(glyfname, 'w')
+    glyfile.write('<?xml version="1.0" standalone="no"?>\n')
+    glyfile.write('<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n')
+    glyfile.write('<svg width="512" height="512" viewBox="0 0 511 511" xmlns="http://www.w3.org/2000/svg" version="1.1">\n')
+    glyfile.write('<title>Glyphs for %s</title>\n' % meta_array['Title'])
+    glyfile.write('<defs>\n')
+    counter = 0
+    for filename in filenames:
+        # print '     ', filename
+        print '.',
+        fname = os.path.join(glyphsDir,filename)
+        flat_xml = convert2xml.fromData(dict, fname)
+
+        xname = os.path.join(xmlDir, filename.replace('.dat','.xml'))
+        file(xname, 'wb').write(convert2xml.getXML(dict, fname))
+
+        gp = GParser(flat_xml)
+        for i in xrange(0, gp.count):
+            path = gp.getPath(i)
+            maxh, maxw = gp.getGlyphDim(i)
+            fullpath = '<path id="gl%d" d="%s" fill="black" /><!-- width=%d height=%d -->\n' % (counter * 256 + i, path, maxw, maxh)
+            glyfile.write(fullpath)
+            gd.addGlyph(counter * 256 + i, fullpath)
+        counter += 1
+    glyfile.write('</defs>\n')
+    glyfile.write('</svg>\n')
+    glyfile.close()
+    print " "
+
+    # start up the html
+    htmlFileName = "book.html"
+    htmlstr = '<?xml version="1.0" encoding="utf-8"?>\n'
+    htmlstr += '<!DOCTYPE HTML PUBLIC "-//W3C//DTD XHTML 1.1 Strict//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11-strict.dtd">\n'
+    htmlstr += '<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">\n'
+    htmlstr += '<head>\n'
+    htmlstr += '<meta http-equiv="content-type" content="text/html; charset=utf-8"/>\n'
+    htmlstr += '<title>' + meta_array['Title'] + ' by ' + meta_array['Authors'] + '</title>\n' 
+    htmlstr += '<meta name="Author" content="' + meta_array['Authors'] + '" />\n'
+    htmlstr += '<meta name="Title" content="' + meta_array['Title'] + '" />\n'
+    htmlstr += '<meta name="ASIN" content="' + meta_array['ASIN'] + '" />\n'
+    htmlstr += '<meta name="GUID" content="' + meta_array['GUID'] + '" />\n'
+    htmlstr += '<link href="style.css" rel="stylesheet" type="text/css" />\n'
+    htmlstr += '</head>\n<body>\n'
+
+    print 'Processing Pages'
+    # Books are at 1440 DPI.  This is rendering at twice that size for
+    # readability when rendering to the screen.  
+    scaledpi = 1440.0
+
+    svgindex = '<?xml version="1.0" encoding="utf-8"?>\n'
+    svgindex += '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n'
+    svgindex += '<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" >'
+    svgindex += '<head>\n'
+    svgindex += '<title>' + meta_array['Title'] + '</title>\n'
+    svgindex += '<meta name="Author" content="' + meta_array['Authors'] + '" />\n'
+    svgindex += '<meta name="Title" content="' + meta_array['Title'] + '" />\n'
+    svgindex += '<meta name="ASIN" content="' + meta_array['ASIN'] + '" />\n'
+    svgindex += '<meta name="GUID" content="' + meta_array['GUID'] + '" />\n'
+    svgindex += '</head>\n'
+    svgindex += '<body>\n'
+
+    filenames = os.listdir(pageDir)
+    filenames = sorted(filenames)
+    numfiles = len(filenames)
+    counter = 0
+
+    for filename in filenames:
+        # print '     ', filename
+        print ".",
+
+        fname = os.path.join(pageDir,filename)
+        flat_xml = convert2xml.fromData(dict, fname)
+
+        xname = os.path.join(xmlDir, filename.replace('.dat','.xml'))
+        file(xname, 'wb').write(convert2xml.getXML(dict, fname))
+
+        # first get the html
+        htmlstr += flatxml2html.convert2HTML(flat_xml, classlst, fname, bookDir, gd, fixedimage)
+
+        # now get the svg image of the page
+        svgxml = flatxml2svg.convert2SVG(gd, flat_xml, counter, numfiles, svgDir, raw, meta_array, scaledpi)
+
+        if (raw) :
+            pfile = open(os.path.join(svgDir,filename.replace('.dat','.svg')), 'w')
+            svgindex += '<a href="svg/page%04d.svg">Page %d</a>\n' % (counter, counter)
+        else :
+            pfile = open(os.path.join(svgDir,'page%04d.xhtml' % counter), 'w')
+            svgindex += '<a href="svg/page%04d.xhtml">Page %d</a>\n' % (counter, counter)
+
+
+        pfile.write(svgxml)
+        pfile.close()
+
+        counter += 1
+
+    print " "
+
+    # finish up the html string and output it
+    htmlstr += '</body>\n</html>\n'
+    file(os.path.join(bookDir, htmlFileName), 'wb').write(htmlstr)
+
+    # finish up the svg index string and output it
+    svgindex += '</body>\n</html>\n'
+    file(os.path.join(bookDir, 'index_svg.xhtml'), 'wb').write(svgindex)
+
+    # build the opf file
+    opfname = os.path.join(bookDir, 'book.opf')
+    opfstr = '<?xml version="1.0" encoding="utf-8"?>\n'
+    opfstr += '<package xmlns="http://www.idpf.org/2007/opf" unique-identifier="guid_id">\n'
+    # adding metadata
+    opfstr += '   <metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">\n'
+    opfstr += '      <dc:identifier opf:scheme="GUID" id="guid_id">' + meta_array['GUID'] + '</dc:identifier>\n'
+    opfstr += '      <dc:identifier opf:scheme="ASIN">' + meta_array['ASIN'] + '</dc:identifier>\n'
+    opfstr += '      <dc:identifier opf:scheme="oASIN">' + meta_array['oASIN'] + '</dc:identifier>\n'
+    opfstr += '      <dc:title>' + meta_array['Title'] + '</dc:title>\n'
+    opfstr += '      <dc:creator opf:role="aut">' + meta_array['Authors'] + '</dc:creator>\n'
+    opfstr += '      <dc:language>en</dc:language>\n'
+    opfstr += '      <dc:date>' + meta_array['UpdateTime'] + '</dc:date>\n'
+    if isCover:
+        opfstr += '      <meta name="cover" content="bookcover"/>\n'
+    opfstr += '   </metadata>\n'
+    opfstr += '<manifest>\n'
+    opfstr += '   <item id="book" href="book.html" media-type="application/xhtml+xml"/>\n'
+    opfstr += '   <item id="stylesheet" href="style.css" media-type="text.css"/>\n'
+    # adding image files to manifest
+    filenames = os.listdir(imgDir)
+    filenames = sorted(filenames)
+    for filename in filenames:
+        imgname, imgext = os.path.splitext(filename)
+        if imgext == '.jpg':
+            imgext = 'jpeg'
+        if imgext == '.svg':
+            imgext = 'svg+xml'
+        opfstr += '   <item id="' + imgname + '" href="img/' + filename + '" media-type="image/' + imgext + '"/>\n'
+    if isCover:
+        opfstr += '   <item id="bookcover" href="cover.jpg" media-type="image/jpeg" />\n'
+    opfstr += '</manifest>\n'
+    # adding spine
+    opfstr += '<spine>\n   <itemref idref="book" />\n</spine>\n'
+    if isCover:
+        opfstr += '   <guide>\n'
+        opfstr += '      <reference href="cover.jpg" type="cover" title="Cover"/>\n'
+        opfstr += '   </guide>\n'
+    opfstr += '</package>\n'
+    file(opfname, 'wb').write(opfstr)
+
+    print 'Processing Complete'
+
+    return 0
+
+def usage():
+    print "genbook.py generates a book from the extract Topaz Files"
+    print "Usage:"
+    print "    genbook.py [-r] [-h [--fixed-image] <bookDir>  "
+    print "  "
+    print "Options:"
+    print "  -h            :  help - print this usage message"
+    print "  -r            :  generate raw svg files (not wrapped in xhtml)"
+    print "  --fixed-image :  genearate any Fixed Area as an svg image in the html"
+    print "  "
+
+
+def main(argv):
+    bookDir = ''
+
+    if len(argv) == 0:
+        argv = sys.argv
+
+    try:
+        opts, args = getopt.getopt(argv[1:], "rh:",["fixed-image"])
+
+    except getopt.GetoptError, err:
+        print str(err)
+        usage()
+        return 1
+
+    if len(opts) == 0 and len(args) == 0 :
+        usage()
+        return 1 
+
+    raw = 0
+    fixedimage = False
+    for o, a in opts:
+        if o =="-h":
+            usage()
+            return 0
+        if o =="-r":
+            raw = 1
+        if o =="--fixed-image":
+            fixedimage = True
+
+    bookDir = args[0]
+
+    rv = generateBook(bookDir, raw, fixedimage)
+    return rv
+
+
+if __name__ == '__main__':
+    sys.exit(main(''))
--- a/Calibre_Plugins/K4MobiDeDRM_plugin/kgenpids.py
+++ b/Calibre_Plugins/K4MobiDeDRM_plugin/kgenpids.py
@@ -0,0 +1,312 @@
+#!/usr/bin/env python
+
+from __future__ import with_statement
+import sys
+import os, csv
+import binascii
+import zlib
+import re
+from struct import pack, unpack, unpack_from
+
+class DrmException(Exception):
+    pass
+
+global kindleDatabase
+global charMap1
+global charMap2
+global charMap3
+global charMap4
+
+if sys.platform.startswith('win'):
+    from k4pcutils import openKindleInfo, CryptUnprotectData, GetUserName, GetVolumeSerialNumber, charMap2
+if sys.platform.startswith('darwin'):
+    from k4mutils import openKindleInfo, CryptUnprotectData, GetUserName, GetVolumeSerialNumber, charMap2
+
+charMap1 = "n5Pr6St7Uv8Wx9YzAb0Cd1Ef2Gh3Jk4M"
+charMap3 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
+charMap4 = "ABCDEFGHIJKLMNPQRSTUVWXYZ123456789"
+
+# crypto digestroutines
+import hashlib
+
+def MD5(message):
+    ctx = hashlib.md5()
+    ctx.update(message)
+    return ctx.digest()
+
+def SHA1(message):
+    ctx = hashlib.sha1()
+    ctx.update(message)
+    return ctx.digest()
+
+
+# Encode the bytes in data with the characters in map
+def encode(data, map):
+    result = ""
+    for char in data:
+        value = ord(char)
+        Q = (value ^ 0x80) // len(map)
+        R = value % len(map)
+        result += map[Q]
+        result += map[R]
+    return result
+  
+# Hash the bytes in data and then encode the digest with the characters in map
+def encodeHash(data,map):
+    return encode(MD5(data),map)
+
+# Decode the string in data with the characters in map. Returns the decoded bytes
+def decode(data,map):
+    result = ""
+    for i in range (0,len(data)-1,2):
+        high = map.find(data[i])
+        low = map.find(data[i+1])
+        if (high == -1) or (low == -1) :
+            break
+        value = (((high * len(map)) ^ 0x80) & 0xFF) + low
+        result += pack("B",value)
+    return result
+
+
+# Parse the Kindle.info file and return the records as a list of key-values
+def parseKindleInfo(kInfoFile):
+    DB = {}
+    infoReader = openKindleInfo(kInfoFile)
+    infoReader.read(1)
+    data = infoReader.read()
+    if sys.platform.startswith('win'):
+        items = data.split('{')
+    else :
+        items = data.split('[')
+    for item in items:
+        splito = item.split(':')
+        DB[splito[0]] =splito[1]
+    return DB
+
+# Get a record from the Kindle.info file for the key "hashedKey" (already hashed and encoded). Return the decoded and decrypted record
+def getKindleInfoValueForHash(hashedKey):
+    global kindleDatabase
+    global charMap1
+    global charMap2
+    encryptedValue = decode(kindleDatabase[hashedKey],charMap2)
+    if sys.platform.startswith('win'):
+        return CryptUnprotectData(encryptedValue,"")
+    else:
+        cleartext = CryptUnprotectData(encryptedValue)
+        return decode(cleartext, charMap1)
+ 
+#  Get a record from the Kindle.info file for the string in "key" (plaintext). Return the decoded and decrypted record
+def getKindleInfoValueForKey(key):
+    global charMap2
+    return getKindleInfoValueForHash(encodeHash(key,charMap2))
+
+# Find if the original string for a hashed/encoded string is known. If so return the original string othwise return an empty string.
+def findNameForHash(hash):
+    global charMap2
+    names = ["kindle.account.tokens","kindle.cookie.item","eulaVersionAccepted","login_date","kindle.token.item","login","kindle.key.item","kindle.name.info","kindle.device.info", "MazamaRandomNumber"]
+    result = ""
+    for name in names:
+        if hash == encodeHash(name, charMap2):
+           result = name
+           break
+    return result
+    
+# Print all the records from the kindle.info file (option -i)
+def printKindleInfo():
+    for record in kindleDatabase:
+        name = findNameForHash(record)
+        if name != "" :
+            print (name)
+            print ("--------------------------")
+        else :
+            print ("Unknown Record")
+        print getKindleInfoValueForHash(record)
+        print "\n"
+
+#
+# PID generation routines
+#
+  
+# Returns two bit at offset from a bit field
+def getTwoBitsFromBitField(bitField,offset):
+    byteNumber = offset // 4
+    bitPosition = 6 - 2*(offset % 4)
+    return ord(bitField[byteNumber]) >> bitPosition & 3
+
+# Returns the six bits at offset from a bit field
+def getSixBitsFromBitField(bitField,offset):
+     offset *= 3
+     value = (getTwoBitsFromBitField(bitField,offset) <<4) + (getTwoBitsFromBitField(bitField,offset+1) << 2) +getTwoBitsFromBitField(bitField,offset+2)
+     return value
+     
+# 8 bits to six bits encoding from hash to generate PID string
+def encodePID(hash):
+    global charMap3
+    PID = ""
+    for position in range (0,8):
+        PID += charMap3[getSixBitsFromBitField(hash,position)]
+    return PID
+
+# Encryption table used to generate the device PID
+def generatePidEncryptionTable() :
+    table = []
+    for counter1 in range (0,0x100):
+        value = counter1
+        for counter2 in range (0,8):
+            if (value & 1 == 0) :
+                value = value >> 1
+            else :
+                value = value >> 1
+                value = value ^ 0xEDB88320
+        table.append(value)
+    return table
+
+# Seed value used to generate the device PID
+def generatePidSeed(table,dsn) :
+    value = 0
+    for counter in range (0,4) :
+       index = (ord(dsn[counter]) ^ value) &0xFF
+       value = (value >> 8) ^ table[index]
+    return value
+
+# Generate the device PID
+def generateDevicePID(table,dsn,nbRoll):
+    global charMap4
+    seed = generatePidSeed(table,dsn)
+    pidAscii = ""
+    pid = [(seed >>24) &0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF,(seed>>24) & 0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF]
+    index = 0
+    for counter in range (0,nbRoll):
+        pid[index] = pid[index] ^ ord(dsn[counter])
+        index = (index+1) %8
+    for counter in range (0,8):
+        index = ((((pid[counter] >>5) & 3) ^ pid[counter]) & 0x1f) + (pid[counter] >> 7)
+        pidAscii += charMap4[index]
+    return pidAscii
+
+def crc32(s):
+  return (~binascii.crc32(s,-1))&0xFFFFFFFF 
+
+# convert from 8 digit PID to 10 digit PID with checksum
+def checksumPid(s):
+    global charMap4
+    crc = crc32(s)
+    crc = crc ^ (crc >> 16)
+    res = s
+    l = len(charMap4)
+    for i in (0,1):
+        b = crc & 0xff
+        pos = (b // l) ^ (b % l)
+        res += charMap4[pos%l]
+        crc >>= 8
+    return res
+
+
+# old kindle serial number to fixed pid
+def pidFromSerial(s, l):
+    global charMap4
+    crc = crc32(s)
+    arr1 = [0]*l
+    for i in xrange(len(s)):
+        arr1[i%l] ^= ord(s[i])
+    crc_bytes = [crc >> 24 & 0xff, crc >> 16 & 0xff, crc >> 8 & 0xff, crc & 0xff]
+    for i in xrange(l):
+        arr1[i] ^= crc_bytes[i&3]
+    pid = ""
+    for i in xrange(l):
+        b = arr1[i] & 0xff
+        pid+=charMap4[(b >> 7) + ((b >> 5 & 3) ^ (b & 0x1f))]
+    return pid
+
+
+# Parse the EXTH header records and use the Kindle serial number to calculate the book pid.
+def getKindlePid(pidlst, rec209, token, serialnum):
+
+    if rec209 != None:
+        # Compute book PID
+        pidHash = SHA1(serialnum+rec209+token)
+        bookPID = encodePID(pidHash)
+        bookPID = checksumPid(bookPID)
+        pidlst.append(bookPID)
+
+    # compute fixed pid for old pre 2.5 firmware update pid as well
+    bookPID = pidFromSerial(serialnum, 7) + "*"
+    bookPID = checksumPid(bookPID)
+    pidlst.append(bookPID)
+
+    return pidlst
+
+
+# Parse the EXTH header records and parse the Kindleinfo
+# file to calculate the book pid.
+
+def getK4Pids(pidlst, rec209, token, kInfoFile=None):
+    global kindleDatabase
+    global charMap1
+    kindleDatabase = None
+    try:
+        kindleDatabase = parseKindleInfo(kInfoFile)
+    except Exception, message:
+        print(message)
+        pass
+    
+    if kindleDatabase == None :
+        return pidlst
+
+    # Get the Mazama Random number
+    MazamaRandomNumber = getKindleInfoValueForKey("MazamaRandomNumber")
+
+    # Get the HDD serial
+    encodedSystemVolumeSerialNumber = encodeHash(GetVolumeSerialNumber(),charMap1)
+
+    # Get the current user name
+    encodedUsername = encodeHash(GetUserName(),charMap1)
+
+    # concat, hash and encode to calculate the DSN
+    DSN = encode(SHA1(MazamaRandomNumber+encodedSystemVolumeSerialNumber+encodedUsername),charMap1)
+       
+    # Compute the device PID (for which I can tell, is used for nothing).
+    table =  generatePidEncryptionTable()
+    devicePID = generateDevicePID(table,DSN,4)
+    devicePID = checksumPid(devicePID)
+    pidlst.append(devicePID)
+
+    # Compute book PID
+    if rec209 == None:
+        print "\nNo EXTH record type 209 - Perhaps not a K4 file?"
+        return pidlst
+
+    # Get the kindle account token
+    kindleAccountToken = getKindleInfoValueForKey("kindle.account.tokens")
+
+    # book pid
+    pidHash = SHA1(DSN+kindleAccountToken+rec209+token)
+    bookPID = encodePID(pidHash)
+    bookPID = checksumPid(bookPID)
+    pidlst.append(bookPID)
+
+    # variant 1
+    pidHash = SHA1(kindleAccountToken+rec209+token)
+    bookPID = encodePID(pidHash)
+    bookPID = checksumPid(bookPID)
+    pidlst.append(bookPID)
+
+    # variant 2
+    pidHash = SHA1(DSN+rec209+token)
+    bookPID = encodePID(pidHash)
+    bookPID = checksumPid(bookPID)
+    pidlst.append(bookPID)
+
+    return pidlst
+
+def getPidList(md1, md2, k4, pids, serials, kInfoFiles):
+    pidlst = []
+    if k4:
+        pidlst = getK4Pids(pidlst, md1, md2)
+    for infoFile in kInfoFiles:
+        pidlst = getK4Pids(pidlst, md1, md2, infoFile)
+    for serialnum in serials:
+        pidlst = getKindlePid(pidlst, md1, md2, serialnum)
+    for pid in pids:
+        pidlst.append(pid)
+    return pidlst
--- a/Calibre_Plugins/K4MobiDeDRM_plugin/stylexml2css.py
+++ b/Calibre_Plugins/K4MobiDeDRM_plugin/stylexml2css.py
@@ -0,0 +1,243 @@
+#! /usr/bin/python
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+# For use with Topaz Scripts Version 2.6
+
+import csv
+import sys
+import os
+import getopt
+from struct import pack
+from struct import unpack
+
+
+class DocParser(object):
+    def __init__(self, flatxml, fontsize, ph, pw):
+        self.flatdoc = flatxml.split('\n')
+        self.fontsize = int(fontsize)
+        self.ph = int(ph) * 1.0
+        self.pw = int(pw) * 1.0
+
+    stags = {
+        'paragraph' : 'p',
+        'graphic'   : '.graphic'
+    }
+
+    attr_val_map = {
+        'hang'            : 'text-indent: ',
+        'indent'          : 'text-indent: ',
+        'line-space'      : 'line-height: ',
+        'margin-bottom'   : 'margin-bottom: ',
+        'margin-left'     : 'margin-left: ',
+        'margin-right'    : 'margin-right: ',
+        'margin-top'      : 'margin-top: ',
+        'space-after'     : 'padding-bottom: ',
+    }
+
+    attr_str_map = {
+        'align-center' : 'text-align: center; margin-left: auto; margin-right: auto;',
+        'align-left'   : 'text-align: left;',
+        'align-right'  : 'text-align: right;',
+        'align-justify' : 'text-align: justify;',
+        'display-inline' : 'display: inline;',
+        'pos-left' : 'text-align: left;',
+        'pos-right' : 'text-align: right;',
+        'pos-center' : 'text-align: center; margin-left: auto; margin-right: auto;',
+    }
+    
+    
+    # find tag if within pos to end inclusive
+    def findinDoc(self, tagpath, pos, end) :
+        result = None
+        docList = self.flatdoc
+        cnt = len(docList)
+        if end == -1 :
+            end = cnt
+        else:
+            end = min(cnt,end)
+        foundat = -1
+        for j in xrange(pos, end):
+            item = docList[j]
+            if item.find('=') >= 0:
+                (name, argres) = item.split('=',1)
+            else : 
+                name = item
+                argres = ''
+            if name.endswith(tagpath) : 
+                result = argres
+                foundat = j
+                break
+        return foundat, result
+
+
+    # return list of start positions for the tagpath
+    def posinDoc(self, tagpath):
+        startpos = []
+        pos = 0
+        res = ""
+        while res != None :
+            (foundpos, res) = self.findinDoc(tagpath, pos, -1)
+            if res != None :
+                startpos.append(foundpos)
+            pos = foundpos + 1
+        return startpos
+
+
+    def process(self):
+
+        classlst = ''
+        csspage = '.cl-center { text-align: center; margin-left: auto; margin-right: auto; }\n'
+        csspage += '.cl-right { text-align: right; }\n'
+        csspage += '.cl-left { text-align: left; }\n'
+        csspage += '.cl-justify { text-align: justify; }\n'
+
+        # generate a list of each <style> starting point in the stylesheet
+        styleList= self.posinDoc('book.stylesheet.style')
+        stylecnt = len(styleList)
+        styleList.append(-1)
+
+        # process each style converting what you can
+
+        for j in xrange(stylecnt):
+            start = styleList[j]
+            end = styleList[j+1]
+
+            (pos, tag) = self.findinDoc('style._tag',start,end)
+            if tag == None :
+                (pos, tag) = self.findinDoc('style.type',start,end)
+                
+            # Is this something we know how to convert to css
+            if tag in self.stags :
+
+                # get the style class
+                (pos, sclass) = self.findinDoc('style.class',start,end)
+                if sclass != None:
+                    sclass = sclass.replace(' ','-')
+                    sclass = '.cl-' + sclass.lower()
+                else : 
+                    sclass = ''
+
+                # check for any "after class" specifiers
+                (pos, aftclass) = self.findinDoc('style._after_class',start,end)
+                if aftclass != None:
+                    aftclass = aftclass.replace(' ','-')
+                    aftclass = '.cl-' + aftclass.lower()
+                else : 
+                    aftclass = ''
+
+                cssargs = {}
+
+                while True :
+
+                    (pos1, attr) = self.findinDoc('style.rule.attr', start, end)
+                    (pos2, val) = self.findinDoc('style.rule.value', start, end)
+
+                    if attr == None : break
+                    
+                    if (attr == 'display') or (attr == 'pos') or (attr == 'align'):
+                        # handle text based attributess
+                        attr = attr + '-' + val
+                        if attr in self.attr_str_map :
+                            cssargs[attr] = (self.attr_str_map[attr], '')
+                    else :
+                        # handle value based attributes
+                        if attr in self.attr_val_map :
+                            name = self.attr_val_map[attr]
+                            if attr in ('margin-bottom', 'margin-top', 'space-after') :
+                                scale = self.ph
+                            elif attr in ('margin-right', 'indent', 'margin-left', 'hang') :
+                                scale = self.pw
+                            elif attr == 'line-space':
+                                scale = self.fontsize * 2.0
+
+                            if not ((attr == 'hang') and (int(val) == 0)) :
+                                pv = float(val)/scale
+                                cssargs[attr] = (self.attr_val_map[attr], pv)
+                                keep = True
+
+                    start = max(pos1, pos2) + 1
+
+                # disable all of the after class tags until I figure out how to handle them
+                if aftclass != "" : keep = False
+
+                if keep :
+                    # make sure line-space does not go below 100% or above 300% since 
+                    # it can be wacky in some styles
+                    if 'line-space' in cssargs:
+                        seg = cssargs['line-space'][0]
+                        val = cssargs['line-space'][1]
+                        if val < 1.0: val = 1.0
+                        if val > 3.0: val = 3.0
+                        del cssargs['line-space']
+                        cssargs['line-space'] = (self.attr_val_map['line-space'], val)
+
+                    
+                    # handle modifications for css style hanging indents
+                    if 'hang' in cssargs:
+                        hseg = cssargs['hang'][0]
+                        hval = cssargs['hang'][1]
+                        del cssargs['hang']
+                        cssargs['hang'] = (self.attr_val_map['hang'], -hval)
+                        mval = 0
+                        mseg = 'margin-left: '
+                        mval = hval
+                        if 'margin-left' in cssargs:
+                            mseg = cssargs['margin-left'][0]
+                            mval = cssargs['margin-left'][1]
+                            if mval < 0: mval = 0
+                            mval = hval + mval
+                        cssargs['margin-left'] = (mseg, mval)
+                        if 'indent' in cssargs:
+                            del cssargs['indent']
+
+                    cssline = sclass + ' { '
+                    for key in iter(cssargs):
+                        mseg = cssargs[key][0]
+                        mval = cssargs[key][1]
+                        if mval == '':
+                            cssline += mseg + ' '
+                        else :
+                            aseg = mseg + '%.1f%%;' % (mval * 100.0)
+                            cssline += aseg + ' '
+
+                    cssline += '}'
+
+                    if sclass != '' :
+                        classlst += sclass + '\n'
+                    
+                    # handle special case of paragraph class used inside chapter heading
+                    # and non-chapter headings
+                    if sclass != '' :
+                        ctype = sclass[4:7]
+                        if ctype == 'ch1' :
+                            csspage += 'h1' + cssline + '\n'
+                        if ctype == 'ch2' :
+                            csspage += 'h2' + cssline + '\n'
+                        if ctype == 'ch3' :
+                            csspage += 'h3' + cssline + '\n'
+                        if ctype == 'h1-' :
+                            csspage += 'h4' + cssline + '\n'
+                        if ctype == 'h2-' :
+                            csspage += 'h5' + cssline + '\n'
+                        if ctype == 'h3_' :
+                            csspage += 'h6' + cssline + '\n'
+
+                    if cssline != ' { }':
+                        csspage += self.stags[tag] + cssline + '\n'
+
+                
+        return csspage, classlst
+
+
+
+def convert2CSS(flatxml, fontsize, ph, pw):
+
+    print '          ', 'Using font size:',fontsize
+    print '          ', 'Using page height:', ph
+    print '          ', 'Using page width:', pw
+
+    # create a document parser
+    dp = DocParser(flatxml, fontsize, ph, pw)
+
+    csspage = dp.process()
+
+    return csspage
--- a/Calibre_Plugins/K4MobiDeDRM_plugin/topazextract.py
+++ b/Calibre_Plugins/K4MobiDeDRM_plugin/topazextract.py
@@ -0,0 +1,434 @@
+#!/usr/bin/env python
+
+class Unbuffered:
+    def __init__(self, stream):
+        self.stream = stream
+    def write(self, data):
+        self.stream.write(data)
+        self.stream.flush()
+    def __getattr__(self, attr):
+        return getattr(self.stream, attr)
+
+import sys
+sys.stdout=Unbuffered(sys.stdout)
+import os, csv, getopt
+import zlib, zipfile, tempfile, shutil
+from struct import pack
+from struct import unpack
+
+class TpzDRMError(Exception):
+    pass
+    
+# local support routines
+import kgenpids
+import genbook
+#
+# Utility routines
+#
+
+# Get a 7 bit encoded number from file
+def bookReadEncodedNumber(fo):
+    flag = False
+    data = ord(fo.read(1))
+    if data == 0xFF:
+       flag = True
+       data = ord(fo.read(1))
+    if data >= 0x80:
+        datax = (data & 0x7F)
+        while data >= 0x80 :
+            data = ord(fo.read(1))
+            datax = (datax <<7) + (data & 0x7F)
+        data = datax 
+    if flag:
+       data = -data
+    return data
+    
+# Get a length prefixed string from file 
+def bookReadString(fo):
+    stringLength = bookReadEncodedNumber(fo)
+    return unpack(str(stringLength)+"s",fo.read(stringLength))[0]  
+
+#
+# crypto routines
+#
+
+# Context initialisation for the Topaz Crypto
+def topazCryptoInit(key):
+    ctx1 = 0x0CAFFE19E
+    for keyChar in key:
+        keyByte = ord(keyChar)
+        ctx2 = ctx1 
+        ctx1 = ((((ctx1 >>2) * (ctx1 >>7))&0xFFFFFFFF) ^ (keyByte * keyByte * 0x0F902007)& 0xFFFFFFFF )
+    return [ctx1,ctx2]
+    
+# decrypt data with the context prepared by topazCryptoInit()
+def topazCryptoDecrypt(data, ctx):
+    ctx1 = ctx[0]
+    ctx2 = ctx[1]
+    plainText = ""
+    for dataChar in data:
+        dataByte = ord(dataChar)
+        m = (dataByte ^ ((ctx1 >> 3) &0xFF) ^ ((ctx2<<3) & 0xFF)) &0xFF
+        ctx2 = ctx1
+        ctx1 = (((ctx1 >> 2) * (ctx1 >> 7)) &0xFFFFFFFF) ^((m * m * 0x0F902007) &0xFFFFFFFF)
+        plainText += chr(m)
+    return plainText
+
+# Decrypt data with the PID
+def decryptRecord(data,PID):
+    ctx = topazCryptoInit(PID)
+    return topazCryptoDecrypt(data, ctx)
+
+# Try to decrypt a dkey record (contains the bookPID)
+def decryptDkeyRecord(data,PID):
+    record = decryptRecord(data,PID)
+    fields = unpack("3sB8sB8s3s",record)
+    if fields[0] != "PID" or fields[5] != "pid" :
+        raise TpzDRMError("Didn't find PID magic numbers in record")
+    elif fields[1] != 8 or fields[3] != 8 :
+        raise TpzDRMError("Record didn't contain correct length fields")
+    elif fields[2] != PID :
+        raise TpzDRMError("Record didn't contain PID")
+    return fields[4]
+
+# Decrypt all dkey records (contain the book PID)
+def decryptDkeyRecords(data,PID):
+    nbKeyRecords = ord(data[0])
+    records = []
+    data = data[1:]
+    for i in range (0,nbKeyRecords):
+        length = ord(data[0])
+        try:
+            key = decryptDkeyRecord(data[1:length+1],PID)
+            records.append(key)
+        except TpzDRMError:
+            pass
+        data = data[1+length:]
+    if len(records) == 0:
+        raise TpzDRMError("BookKey Not Found")
+    return records
+
+
+class TopazBook:
+    def __init__(self, filename, outdir):
+        self.fo = file(filename, 'rb')
+        self.outdir = outdir
+        self.bookPayloadOffset = 0
+        self.bookHeaderRecords = {}
+        self.bookMetadata = {}
+        self.bookKey = None
+        magic = unpack("4s",self.fo.read(4))[0]
+        if magic != 'TPZ0':
+            raise TpzDRMError("Parse Error : Invalid Header, not a Topaz file")
+        self.parseTopazHeaders()
+        self.parseMetadata()
+
+    def parseTopazHeaders(self):
+        def bookReadHeaderRecordData():
+            # Read and return the data of one header record at the current book file position 
+            # [[offset,decompressedLength,compressedLength],...]
+            nbValues = bookReadEncodedNumber(self.fo)
+            values = []
+            for i in range (0,nbValues):
+                values.append([bookReadEncodedNumber(self.fo),bookReadEncodedNumber(self.fo),bookReadEncodedNumber(self.fo)])
+            return values
+        def parseTopazHeaderRecord():
+            # Read and parse one header record at the current book file position and return the associated data
+            # [[offset,decompressedLength,compressedLength],...]
+            if ord(self.fo.read(1)) != 0x63:
+                raise TpzDRMError("Parse Error : Invalid Header")
+            tag = bookReadString(self.fo)
+            record = bookReadHeaderRecordData()
+            return [tag,record]
+        nbRecords = bookReadEncodedNumber(self.fo)
+        for i in range (0,nbRecords):
+            result = parseTopazHeaderRecord()
+            # print result[0], result[1]
+            self.bookHeaderRecords[result[0]] = result[1]
+        if ord(self.fo.read(1))  != 0x64 :
+            raise TpzDRMError("Parse Error : Invalid Header")
+        self.bookPayloadOffset = self.fo.tell()
+
+    def parseMetadata(self):
+        # Parse the metadata record from the book payload and return a list of [key,values]
+        self.fo.seek(self.bookPayloadOffset + self.bookHeaderRecords["metadata"][0][0])
+        tag = bookReadString(self.fo)
+        if tag != "metadata" :
+            raise TpzDRMError("Parse Error : Record Names Don't Match")
+        flags = ord(self.fo.read(1))
+        nbRecords = ord(self.fo.read(1))
+        for i in range (0,nbRecords) :
+            record = [bookReadString(self.fo), bookReadString(self.fo)]
+            self.bookMetadata[record[0]] = record[1]
+        return self.bookMetadata
+
+    def getPIDMetaInfo(self):
+        keysRecord = None
+        KeysRecordRecord = None
+        if 'keys' in self.bookMetadata:
+            keysRecord = self.bookMetadata['keys']
+            keysRecordRecord = self.bookMetadata[keysRecord]
+        return keysRecord, keysRecordRecord
+
+    def getBookTitle(self):
+        title = ''
+        if 'Title' in self.bookMetadata:
+            title = self.bookMetadata['Title']
+        return title
+
+    def setBookKey(self, key):
+        self.bookKey = key
+
+    def getBookPayloadRecord(self, name, index):
+        # Get a record in the book payload, given its name and index. 
+        # decrypted and decompressed if necessary 
+        encrypted = False
+        compressed = False
+        try: 
+            recordOffset = self.bookHeaderRecords[name][index][0]
+        except:
+            raise TpzDRMError("Parse Error : Invalid Record, record not found")
+
+        self.fo.seek(self.bookPayloadOffset + recordOffset)
+
+        tag = bookReadString(self.fo)
+        if tag != name :
+            raise TpzDRMError("Parse Error : Invalid Record, record name doesn't match")
+
+        recordIndex = bookReadEncodedNumber(self.fo)
+        if recordIndex < 0 :
+            encrypted = True
+            recordIndex = -recordIndex -1
+
+        if recordIndex != index :
+            raise TpzDRMError("Parse Error : Invalid Record, index doesn't match")
+
+        if (self.bookHeaderRecords[name][index][2] > 0):
+            compressed = True
+            record = self.fo.read(self.bookHeaderRecords[name][index][2])
+        else:
+            record = self.fo.read(self.bookHeaderRecords[name][index][1])
+
+        if encrypted:
+            if self.bookKey:
+                ctx = topazCryptoInit(self.bookKey)
+                record = topazCryptoDecrypt(record,ctx)
+            else :
+                raise TpzDRMError("Error: Attempt to decrypt without bookKey")
+
+        if compressed:
+            record = zlib.decompress(record)
+
+        return record
+
+    def processBook(self, pidlst):
+        raw = 0
+        fixedimage=True
+        try:
+            keydata = self.getBookPayloadRecord('dkey', 0)
+        except TpzDRMError, e:
+            print "no dkey record found, book may not be encrypted"
+            print "attempting to extrct files without a book key"
+            self.createBookDirectory()
+            self.extractFiles()
+            print "Successfully Extracted Topaz contents"
+            rv = genbook.generateBook(self.outdir, raw, fixedimage)
+            if rv == 0:
+                print "\nBook Successfully generated"
+            return rv            
+    
+        # try each pid to decode the file
+        bookKey = None
+        for pid in pidlst:
+            # use 8 digit pids here
+            pid = pid[0:8]
+            print "\nTrying: ", pid
+            bookKeys = []
+            data = keydata
+            try:
+                bookKeys+=decryptDkeyRecords(data,pid)
+            except TpzDRMError, e:
+                pass
+            else:
+                bookKey = bookKeys[0]
+                print "Book Key Found!"
+                break
+
+        if not bookKey:
+            raise TpzDRMError('Decryption Unsucessful; No valid pid found')
+
+        self.setBookKey(bookKey)
+        self.createBookDirectory()
+        self.extractFiles()
+        print "Successfully Extracted Topaz contents"
+        rv = genbook.generateBook(self.outdir, raw, fixedimage)
+        if rv == 0:
+            print "\nBook Successfully generated"
+        return rv            
+
+    def createBookDirectory(self):
+        outdir = self.outdir
+        # create output directory structure
+        if not os.path.exists(outdir):
+            os.makedirs(outdir)
+        destdir =  os.path.join(outdir,'img')
+        if not os.path.exists(destdir):
+            os.makedirs(destdir)
+        destdir =  os.path.join(outdir,'color_img')
+        if not os.path.exists(destdir):
+            os.makedirs(destdir)
+        destdir =  os.path.join(outdir,'page')
+        if not os.path.exists(destdir):
+            os.makedirs(destdir)
+        destdir =  os.path.join(outdir,'glyphs')
+        if not os.path.exists(destdir):
+            os.makedirs(destdir)
+
+    def extractFiles(self):
+        outdir = self.outdir
+        for headerRecord in self.bookHeaderRecords:
+            name = headerRecord
+            if name != "dkey" :
+                ext = '.dat'
+                if name == 'img' : ext = '.jpg'
+                if name == 'color' : ext = '.jpg'
+                print "\nProcessing Section: %s " % name
+                for index in range (0,len(self.bookHeaderRecords[name])) :
+                    fnum = "%04d" % index
+                    fname = name + fnum + ext
+                    destdir = outdir
+                    if name == 'img':
+                        destdir =  os.path.join(outdir,'img')
+                    if name == 'color':
+                        destdir =  os.path.join(outdir,'color_img')
+                    if name == 'page':
+                        destdir =  os.path.join(outdir,'page')
+                    if name == 'glyphs':
+                        destdir =  os.path.join(outdir,'glyphs')
+                    outputFile = os.path.join(destdir,fname)
+                    print ".",
+                    record = self.getBookPayloadRecord(name,index)
+                    if record != '':
+                        file(outputFile, 'wb').write(record)
+        print " "
+
+
+def zipUpDir(myzip, tempdir,localname):
+    currentdir = tempdir
+    if localname != "":
+        currentdir = os.path.join(currentdir,localname)
+    list = os.listdir(currentdir)
+    for file in list:
+        afilename = file
+        localfilePath = os.path.join(localname, afilename)
+        realfilePath = os.path.join(currentdir,file)
+        if os.path.isfile(realfilePath):
+            myzip.write(realfilePath, localfilePath)
+        elif os.path.isdir(realfilePath):
+            zipUpDir(myzip, tempdir, localfilePath)
+
+
+def usage(progname):
+    print "Removes DRM protection from Topaz ebooks and extract the contents"
+    print "Usage:"
+    print "    %s [-k <kindle.info>] [-p <pidnums>] [-s <kindleSerialNumbers>] <infile> <outdir>  " % progname
+    
+
+# Main
+def main(argv=sys.argv):
+    progname = os.path.basename(argv[0])
+    k4 = False
+    pids = []
+    serials = []
+    kInfoFiles = []
+    
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "k:p:s:")
+    except getopt.GetoptError, err:
+        print str(err)
+        usage(progname)
+        return 1
+    if len(args)<2:
+        usage(progname)
+        return 1
+        
+    for o, a in opts:
+        if o == "-k":
+            if a == None :
+                print "Invalid parameter for -k"
+                return 1
+            kInfoFiles.append(a)
+        if o == "-p":
+            if a == None :
+                print "Invalid parameter for -p"
+                return 1
+            pids = a.split(',')
+        if o == "-s":
+            if a == None :
+                print "Invalid parameter for -s"
+                return 1
+            serials = a.split(',')
+    k4 = True
+
+    infile = args[0]
+    outdir = args[1]
+
+    if not os.path.isfile(infile):
+        print "Input File Does Not Exist"
+        return 1
+
+    bookname = os.path.splitext(os.path.basename(infile))[0]
+    tempdir = tempfile.mkdtemp()
+
+    tb = TopazBook(infile, tempdir)
+    title = tb.getBookTitle()
+    print "Processing Book: ", title
+    keysRecord, keysRecordRecord = tb.getPIDMetaInfo()
+    pidlst = kgenpids.getPidList(keysRecord, keysRecordRecord, k4, pids, serials, kInfoFiles) 
+
+    try:
+        tb.processBook(pidlst)
+    except TpzDRMError, e:
+        print str(e)
+        print "   Creating DeBug Full Zip Archive of Book"
+        zipname = os.path.join(outdir, bookname + '_debug' + '.zip')
+        myzip = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False)
+        zipUpDir(myzip, tempdir, '')
+        myzip.close()
+        return 1
+
+    print "   Creating HTML ZIP Archive"
+    zipname = os.path.join(outdir, bookname + '_nodrm' + '.zip')
+    myzip1 = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False)
+    myzip1.write(os.path.join(tempdir,'book.html'),'book.html')
+    myzip1.write(os.path.join(tempdir,'book.opf'),'book.opf')
+    if os.path.isfile(os.path.join(tempdir,'cover.jpg')):
+        myzip1.write(os.path.join(tempdir,'cover.jpg'),'cover.jpg')
+    myzip1.write(os.path.join(tempdir,'style.css'),'style.css')
+    zipUpDir(myzip1, tempdir, 'img')
+    myzip1.close()
+
+    print "   Creating SVG ZIP Archive"
+    zipname = os.path.join(outdir, bookname + '_SVG' + '.zip')
+    myzip2 = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False)
+    myzip2.write(os.path.join(tempdir,'index_svg.xhtml'),'index_svg.xhtml')
+    zipUpDir(myzip2, tempdir, 'svg')
+    zipUpDir(myzip2, tempdir, 'img')
+    myzip2.close()
+
+    print "   Creating XML ZIP Archive"
+    zipname = os.path.join(outdir, bookname + '_XML' + '.zip')
+    myzip3 = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False)
+    targetdir = os.path.join(tempdir,'xml')
+    zipUpDir(myzip3, targetdir, '')
+    zipUpDir(myzip3, tempdir, 'img')
+    myzip3.close()
+
+    shutil.rmtree(tempdir)
+
+    return 0
+                
+
+if __name__ == '__main__':
+    sys.exit(main())
+