Remove stand-alone apps. Only support the two plugins.

2020-02-16 10:12:25 +00:00
parent ef3c7f261c
commit 92bf51bc8f
135 changed files with 596 additions and 1582 deletions
--- a/DeDRM_plugin/flatxml2html.py
+++ b/DeDRM_plugin/flatxml2html.py
@@ -0,0 +1,802 @@
+#! /usr/bin/python
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+# For use with Topaz Scripts Version 2.6
+
+from __future__ import print_function
+import sys
+import csv
+import os
+import math
+import getopt
+from struct import pack
+from struct import unpack
+
+
+class DocParser(object):
+    def __init__(self, flatxml, classlst, fileid, bookDir, gdict, fixedimage):
+        self.id = os.path.basename(fileid).replace('.dat','')
+        self.svgcount = 0
+        self.docList = flatxml.split('\n')
+        self.docSize = len(self.docList)
+        self.classList = {}
+        self.bookDir = bookDir
+        self.gdict = gdict
+        tmpList = classlst.split('\n')
+        for pclass in tmpList:
+            if pclass != '':
+                # remove the leading period from the css name
+                cname = pclass[1:]
+            self.classList[cname] = True
+        self.fixedimage = fixedimage
+        self.ocrtext = []
+        self.link_id = []
+        self.link_title = []
+        self.link_page = []
+        self.link_href = []
+        self.link_type = []
+        self.dehyphen_rootid = []
+        self.paracont_stemid = []
+        self.parastems_stemid = []
+
+
+    def getGlyph(self, gid):
+        result = ''
+        id='id="gl%d"' % gid
+        return self.gdict.lookup(id)
+
+    def glyphs_to_image(self, glyphList):
+
+        def extract(path, key):
+            b = path.find(key) + len(key)
+            e = path.find(' ',b)
+            return int(path[b:e])
+
+        svgDir = os.path.join(self.bookDir,'svg')
+
+        imgDir = os.path.join(self.bookDir,'img')
+        imgname = self.id + '_%04d.svg' % self.svgcount
+        imgfile = os.path.join(imgDir,imgname)
+
+        # get glyph information
+        gxList = self.getData('info.glyph.x',0,-1)
+        gyList = self.getData('info.glyph.y',0,-1)
+        gidList = self.getData('info.glyph.glyphID',0,-1)
+
+        gids = []
+        maxws = []
+        maxhs = []
+        xs = []
+        ys = []
+        gdefs = []
+
+        # get path defintions, positions, dimensions for each glyph
+        # that makes up the image, and find min x and min y to reposition origin
+        minx = -1
+        miny = -1
+        for j in glyphList:
+            gid = gidList[j]
+            gids.append(gid)
+
+            xs.append(gxList[j])
+            if minx == -1: minx = gxList[j]
+            else : minx = min(minx, gxList[j])
+
+            ys.append(gyList[j])
+            if miny == -1: miny = gyList[j]
+            else : miny = min(miny, gyList[j])
+
+            path = self.getGlyph(gid)
+            gdefs.append(path)
+
+            maxws.append(extract(path,'width='))
+            maxhs.append(extract(path,'height='))
+
+
+        # change the origin to minx, miny and calc max height and width
+        maxw = maxws[0] + xs[0] - minx
+        maxh = maxhs[0] + ys[0] - miny
+        for j in xrange(0, len(xs)):
+            xs[j] = xs[j] - minx
+            ys[j] = ys[j] - miny
+            maxw = max( maxw, (maxws[j] + xs[j]) )
+            maxh = max( maxh, (maxhs[j] + ys[j]) )
+
+        # open the image file for output
+        ifile = open(imgfile,'w')
+        ifile.write('<?xml version="1.0" standalone="no"?>\n')
+        ifile.write('<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n')
+        ifile.write('<svg width="%dpx" height="%dpx" viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1">\n' % (math.floor(maxw/10), math.floor(maxh/10), maxw, maxh))
+        ifile.write('<defs>\n')
+        for j in xrange(0,len(gdefs)):
+            ifile.write(gdefs[j])
+        ifile.write('</defs>\n')
+        for j in xrange(0,len(gids)):
+            ifile.write('<use xlink:href="#gl%d" x="%d" y="%d" />\n' % (gids[j], xs[j], ys[j]))
+        ifile.write('</svg>')
+        ifile.close()
+
+        return 0
+
+
+
+    # return tag at line pos in document
+    def lineinDoc(self, pos) :
+        if (pos >= 0) and (pos < self.docSize) :
+            item = self.docList[pos]
+            if item.find('=') >= 0:
+                (name, argres) = item.split('=',1)
+            else :
+                name = item
+                argres = ''
+        return name, argres
+
+
+    # find tag in doc if within pos to end inclusive
+    def findinDoc(self, tagpath, pos, end) :
+        result = None
+        if end == -1 :
+            end = self.docSize
+        else:
+            end = min(self.docSize, end)
+        foundat = -1
+        for j in xrange(pos, end):
+            item = self.docList[j]
+            if item.find('=') >= 0:
+                (name, argres) = item.split('=',1)
+            else :
+                name = item
+                argres = ''
+            if name.endswith(tagpath) :
+                result = argres
+                foundat = j
+                break
+        return foundat, result
+
+
+    # return list of start positions for the tagpath
+    def posinDoc(self, tagpath):
+        startpos = []
+        pos = 0
+        res = ""
+        while res != None :
+            (foundpos, res) = self.findinDoc(tagpath, pos, -1)
+            if res != None :
+                startpos.append(foundpos)
+            pos = foundpos + 1
+        return startpos
+
+
+    # returns a vector of integers for the tagpath
+    def getData(self, tagpath, pos, end):
+        argres=[]
+        (foundat, argt) = self.findinDoc(tagpath, pos, end)
+        if (argt != None) and (len(argt) > 0) :
+            argList = argt.split('|')
+            argres = [ int(strval) for strval in argList]
+        return argres
+
+
+    # get the class
+    def getClass(self, pclass):
+        nclass = pclass
+
+        # class names are an issue given topaz may start them with numerals (not allowed),
+        # use a mix of cases (which cause some browsers problems), and actually
+        # attach numbers after "_reclustered*" to the end to deal classeses that inherit
+        # from a base class (but then not actually provide all of these _reclustereed
+        # classes in the stylesheet!
+
+        # so we clean this up by lowercasing, prepend 'cl-', and getting any baseclass
+        # that exists in the stylesheet first, and then adding this specific class
+        # after
+
+        # also some class names have spaces in them so need to convert to dashes
+        if nclass != None :
+            nclass = nclass.replace(' ','-')
+            classres = ''
+            nclass = nclass.lower()
+            nclass = 'cl-' + nclass
+            baseclass = ''
+            # graphic is the base class for captions
+            if nclass.find('cl-cap-') >=0 :
+                classres = 'graphic' + ' '
+            else :
+                # strip to find baseclass
+                p = nclass.find('_')
+                if p > 0 :
+                    baseclass = nclass[0:p]
+                    if baseclass in self.classList:
+                        classres += baseclass + ' '
+            classres += nclass
+            nclass = classres
+        return nclass
+
+
+    # develop a sorted description of the starting positions of
+    # groups and regions on the page, as well as the page type
+    def PageDescription(self):
+
+        def compare(x, y):
+            (xtype, xval) = x
+            (ytype, yval) = y
+            if xval > yval:
+                return 1
+            if xval == yval:
+                return 0
+            return -1
+
+        result = []
+        (pos, pagetype) = self.findinDoc('page.type',0,-1)
+
+        groupList = self.posinDoc('page.group')
+        groupregionList = self.posinDoc('page.group.region')
+        pageregionList = self.posinDoc('page.region')
+        # integrate into one list
+        for j in groupList:
+            result.append(('grpbeg',j))
+        for j in groupregionList:
+            result.append(('gregion',j))
+        for j in pageregionList:
+            result.append(('pregion',j))
+        result.sort(compare)
+
+        # insert group end and page end indicators
+        inGroup = False
+        j = 0
+        while True:
+            if j == len(result): break
+            rtype = result[j][0]
+            rval = result[j][1]
+            if not inGroup and (rtype == 'grpbeg') :
+                inGroup = True
+                j = j + 1
+            elif inGroup and (rtype in ('grpbeg', 'pregion')):
+                result.insert(j,('grpend',rval))
+                inGroup = False
+            else:
+                j = j + 1
+        if inGroup:
+            result.append(('grpend',-1))
+        result.append(('pageend', -1))
+        return pagetype, result
+
+
+
+    # build a description of the paragraph
+    def getParaDescription(self, start, end, regtype):
+
+        result = []
+
+        # paragraph
+        (pos, pclass) = self.findinDoc('paragraph.class',start,end)
+
+        pclass = self.getClass(pclass)
+
+        # if paragraph uses extratokens (extra glyphs) then make it fixed
+        (pos, extraglyphs) = self.findinDoc('paragraph.extratokens',start,end)
+
+        # build up a description of the paragraph in result and return it
+        # first check for the  basic - all words paragraph
+        (pos, sfirst) = self.findinDoc('paragraph.firstWord',start,end)
+        (pos, slast) = self.findinDoc('paragraph.lastWord',start,end)
+        if (sfirst != None) and (slast != None) :
+            first = int(sfirst)
+            last = int(slast)
+
+            makeImage = (regtype == 'vertical') or (regtype == 'table')
+            makeImage = makeImage or (extraglyphs != None)
+            if self.fixedimage:
+                makeImage = makeImage or (regtype == 'fixed')
+
+            if (pclass != None):
+                makeImage = makeImage or (pclass.find('.inverted') >= 0)
+                if self.fixedimage :
+                    makeImage = makeImage or (pclass.find('cl-f-') >= 0)
+
+            # before creating an image make sure glyph info exists
+            gidList = self.getData('info.glyph.glyphID',0,-1)
+
+            makeImage = makeImage & (len(gidList) > 0)
+
+            if not makeImage :
+                # standard all word paragraph
+                for wordnum in xrange(first, last):
+                    result.append(('ocr', wordnum))
+                return pclass, result
+
+            # convert paragraph to svg image
+            # translate first and last word into first and last glyphs
+            # and generate inline image and include it
+            glyphList = []
+            firstglyphList = self.getData('word.firstGlyph',0,-1)
+            gidList = self.getData('info.glyph.glyphID',0,-1)
+            firstGlyph = firstglyphList[first]
+            if last < len(firstglyphList):
+                lastGlyph = firstglyphList[last]
+            else :
+                lastGlyph = len(gidList)
+
+            # handle case of white sapce paragraphs with no actual glyphs in them
+            # by reverting to text based paragraph
+            if firstGlyph >= lastGlyph:
+                # revert to standard text based paragraph
+                for wordnum in xrange(first, last):
+                    result.append(('ocr', wordnum))
+                return pclass, result
+
+            for glyphnum in xrange(firstGlyph, lastGlyph):
+                glyphList.append(glyphnum)
+            # include any extratokens if they exist
+            (pos, sfg) = self.findinDoc('extratokens.firstGlyph',start,end)
+            (pos, slg) = self.findinDoc('extratokens.lastGlyph',start,end)
+            if (sfg != None) and (slg != None):
+                for glyphnum in xrange(int(sfg), int(slg)):
+                    glyphList.append(glyphnum)
+            num = self.svgcount
+            self.glyphs_to_image(glyphList)
+            self.svgcount += 1
+            result.append(('svg', num))
+            return pclass, result
+
+        # this type of paragraph may be made up of multiple spans, inline
+        # word monograms (images), and words with semantic meaning,
+        # plus glyphs used to form starting letter of first word
+
+        # need to parse this type line by line
+        line = start + 1
+        word_class = ''
+
+        # if end is -1 then we must search to end of document
+        if end == -1 :
+            end = self.docSize
+
+        # seems some xml has last* coming before first* so we have to
+        # handle any order
+        sp_first = -1
+        sp_last = -1
+
+        gl_first = -1
+        gl_last = -1
+
+        ws_first = -1
+        ws_last = -1
+
+        word_class = ''
+
+        word_semantic_type = ''
+
+        while (line < end) :
+
+            (name, argres) = self.lineinDoc(line)
+
+            if name.endswith('span.firstWord') :
+                sp_first = int(argres)
+
+            elif name.endswith('span.lastWord') :
+                sp_last = int(argres)
+
+            elif name.endswith('word.firstGlyph') :
+                gl_first = int(argres)
+
+            elif name.endswith('word.lastGlyph') :
+                gl_last = int(argres)
+
+            elif name.endswith('word_semantic.firstWord'):
+                ws_first = int(argres)
+
+            elif name.endswith('word_semantic.lastWord'):
+                ws_last = int(argres)
+
+            elif name.endswith('word.class'):
+                # we only handle spaceafter word class
+                try:
+                    (cname, space) = argres.split('-',1)
+                    if space == '' : space = '0'
+                    if (cname == 'spaceafter') and (int(space) > 0) :
+                        word_class = 'sa'
+                except:
+                    pass
+
+            elif name.endswith('word.img.src'):
+                result.append(('img' + word_class, int(argres)))
+                word_class = ''
+
+            elif name.endswith('region.img.src'):
+                result.append(('img' + word_class, int(argres)))
+
+            if (sp_first != -1) and (sp_last != -1):
+                for wordnum in xrange(sp_first, sp_last):
+                    result.append(('ocr', wordnum))
+                sp_first = -1
+                sp_last = -1
+
+            if (gl_first != -1) and (gl_last != -1):
+                glyphList = []
+                for glyphnum in xrange(gl_first, gl_last):
+                    glyphList.append(glyphnum)
+                num = self.svgcount
+                self.glyphs_to_image(glyphList)
+                self.svgcount += 1
+                result.append(('svg', num))
+                gl_first = -1
+                gl_last = -1
+
+            if (ws_first != -1) and (ws_last != -1):
+                for wordnum in xrange(ws_first, ws_last):
+                    result.append(('ocr', wordnum))
+                ws_first = -1
+                ws_last = -1
+
+            line += 1
+
+        return pclass, result
+
+
+    def buildParagraph(self, pclass, pdesc, type, regtype) :
+        parares = ''
+        sep =''
+
+        classres = ''
+        if pclass :
+            classres = ' class="' + pclass + '"'
+
+        br_lb = (regtype == 'fixed') or (regtype == 'chapterheading') or (regtype == 'vertical')
+
+        handle_links = len(self.link_id) > 0
+
+        if (type == 'full') or (type == 'begin') :
+            parares += '<p' + classres + '>'
+
+        if (type == 'end'):
+            parares += ' '
+
+        lstart = len(parares)
+
+        cnt = len(pdesc)
+
+        for j in xrange( 0, cnt) :
+
+            (wtype, num) = pdesc[j]
+
+            if wtype == 'ocr' :
+                try:
+                    word = self.ocrtext[num]
+                except:
+                    word = ""
+
+                sep = ' '
+
+                if handle_links:
+                    link = self.link_id[num]
+                    if (link > 0):
+                        linktype = self.link_type[link-1]
+                        title = self.link_title[link-1]
+                        if (title == "") or (parares.rfind(title) < 0):
+                            title=parares[lstart:]
+                        if linktype == 'external' :
+                            linkhref = self.link_href[link-1]
+                            linkhtml = '<a href="%s">' % linkhref
+                        else :
+                            if len(self.link_page) >= link :
+                                ptarget = self.link_page[link-1] - 1
+                                linkhtml = '<a href="#page%04d">' % ptarget
+                            else :
+                                # just link to the current page
+                                linkhtml = '<a href="#' + self.id + '">'
+                        linkhtml += title + '</a>'
+                        pos = parares.rfind(title)
+                        if pos >= 0:
+                            parares = parares[0:pos] + linkhtml + parares[pos+len(title):]
+                        else :
+                            parares += linkhtml
+                        lstart = len(parares)
+                        if word == '_link_' : word = ''
+                    elif (link < 0) :
+                        if word == '_link_' : word = ''
+
+                if word == '_lb_':
+                    if ((num-1) in self.dehyphen_rootid ) or handle_links:
+                        word = ''
+                        sep = ''
+                    elif br_lb :
+                        word = '<br />\n'
+                        sep = ''
+                    else :
+                        word = '\n'
+                        sep = ''
+
+                if num in self.dehyphen_rootid :
+                    word = word[0:-1]
+                    sep = ''
+
+                parares += word + sep
+
+            elif wtype == 'img' :
+                sep = ''
+                parares += '<img src="img/img%04d.jpg" alt="" />' % num
+                parares += sep
+
+            elif wtype == 'imgsa' :
+                sep = ' '
+                parares += '<img src="img/img%04d.jpg" alt="" />' % num
+                parares += sep
+
+            elif wtype == 'svg' :
+                sep = ''
+                parares += '<img src="img/' + self.id + '_%04d.svg" alt="" />' % num
+                parares += sep
+
+        if len(sep) > 0 : parares = parares[0:-1]
+        if (type == 'full') or (type == 'end') :
+            parares += '</p>'
+        return parares
+
+
+    def buildTOCEntry(self, pdesc) :
+        parares = ''
+        sep =''
+        tocentry = ''
+        handle_links = len(self.link_id) > 0
+
+        lstart = 0
+
+        cnt = len(pdesc)
+        for j in xrange( 0, cnt) :
+
+            (wtype, num) = pdesc[j]
+
+            if wtype == 'ocr' :
+                word = self.ocrtext[num]
+                sep = ' '
+
+                if handle_links:
+                    link = self.link_id[num]
+                    if (link > 0):
+                        linktype = self.link_type[link-1]
+                        title = self.link_title[link-1]
+                        title = title.rstrip('. ')
+                        alt_title = parares[lstart:]
+                        alt_title = alt_title.strip()
+                        # now strip off the actual printed page number
+                        alt_title = alt_title.rstrip('01234567890ivxldIVXLD-.')
+                        alt_title = alt_title.rstrip('. ')
+                        # skip over any external links - can't have them in a books toc
+                        if linktype == 'external' :
+                            title = ''
+                            alt_title = ''
+                            linkpage = ''
+                        else :
+                            if len(self.link_page) >= link :
+                                ptarget = self.link_page[link-1] - 1
+                                linkpage = '%04d' % ptarget
+                            else :
+                                # just link to the current page
+                                linkpage = self.id[4:]
+                        if len(alt_title) >= len(title):
+                            title = alt_title
+                        if title != '' and linkpage != '':
+                            tocentry += title + '|' + linkpage + '\n'
+                        lstart = len(parares)
+                        if word == '_link_' : word = ''
+                    elif (link < 0) :
+                        if word == '_link_' : word = ''
+
+                if word == '_lb_':
+                    word = ''
+                    sep = ''
+
+                if num in self.dehyphen_rootid :
+                    word = word[0:-1]
+                    sep = ''
+
+                parares += word + sep
+
+            else :
+                continue
+
+        return tocentry
+
+
+
+
+    # walk the document tree collecting the information needed
+    # to build an html page using the ocrText
+
+    def process(self):
+
+        tocinfo = ''
+        hlst = []
+
+        # get the ocr text
+        (pos, argres) = self.findinDoc('info.word.ocrText',0,-1)
+        if argres :  self.ocrtext = argres.split('|')
+
+        # get information to dehyphenate the text
+        self.dehyphen_rootid = self.getData('info.dehyphen.rootID',0,-1)
+
+        # determine if first paragraph is continued from previous page
+        (pos, self.parastems_stemid) = self.findinDoc('info.paraStems.stemID',0,-1)
+        first_para_continued = (self.parastems_stemid  != None)
+
+        # determine if last paragraph is continued onto the next page
+        (pos, self.paracont_stemid) = self.findinDoc('info.paraCont.stemID',0,-1)
+        last_para_continued = (self.paracont_stemid != None)
+
+        # collect link ids
+        self.link_id = self.getData('info.word.link_id',0,-1)
+
+        # collect link destination page numbers
+        self.link_page = self.getData('info.links.page',0,-1)
+
+        # collect link types (container versus external)
+        (pos, argres) = self.findinDoc('info.links.type',0,-1)
+        if argres :  self.link_type = argres.split('|')
+
+        # collect link destinations
+        (pos, argres) = self.findinDoc('info.links.href',0,-1)
+        if argres :  self.link_href = argres.split('|')
+
+        # collect link titles
+        (pos, argres) = self.findinDoc('info.links.title',0,-1)
+        if argres :
+            self.link_title = argres.split('|')
+        else:
+            self.link_title.append('')
+
+        # get a descriptions of the starting points of the regions
+        # and groups on the page
+        (pagetype, pageDesc) = self.PageDescription()
+        regcnt = len(pageDesc) - 1
+
+        anchorSet = False
+        breakSet = False
+        inGroup = False
+
+        # process each region on the page and convert what you can to html
+
+        for j in xrange(regcnt):
+
+            (etype, start) = pageDesc[j]
+            (ntype, end) = pageDesc[j+1]
+
+
+            # set anchor for link target on this page
+            if not anchorSet and not first_para_continued:
+                hlst.append('<div style="visibility: hidden; height: 0; width: 0;" id="')
+                hlst.append(self.id + '" title="pagetype_' + pagetype + '"></div>\n')
+                anchorSet = True
+
+            # handle groups of graphics with text captions
+            if (etype == 'grpbeg'):
+                (pos, grptype) = self.findinDoc('group.type', start, end)
+                if grptype != None:
+                    if grptype == 'graphic':
+                        gcstr = ' class="' + grptype + '"'
+                        hlst.append('<div' + gcstr + '>')
+                        inGroup = True
+
+            elif (etype == 'grpend'):
+                if inGroup:
+                    hlst.append('</div>\n')
+                    inGroup = False
+
+            else:
+                (pos, regtype) = self.findinDoc('region.type',start,end)
+
+                if regtype == 'graphic' :
+                    (pos, simgsrc) = self.findinDoc('img.src',start,end)
+                    if simgsrc:
+                        if inGroup:
+                            hlst.append('<img src="img/img%04d.jpg" alt="" />' % int(simgsrc))
+                        else:
+                            hlst.append('<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc))
+
+                elif regtype == 'chapterheading' :
+                    (pclass, pdesc) = self.getParaDescription(start,end, regtype)
+                    if not breakSet:
+                        hlst.append('<div style="page-break-after: always;">&nbsp;</div>\n')
+                        breakSet = True
+                    tag = 'h1'
+                    if pclass and (len(pclass) >= 7):
+                        if pclass[3:7] == 'ch1-' : tag = 'h1'
+                        if pclass[3:7] == 'ch2-' : tag = 'h2'
+                        if pclass[3:7] == 'ch3-' : tag = 'h3'
+                        hlst.append('<' + tag + ' class="' + pclass + '">')
+                    else:
+                        hlst.append('<' + tag + '>')
+                    hlst.append(self.buildParagraph(pclass, pdesc, 'middle', regtype))
+                    hlst.append('</' + tag + '>')
+
+                elif (regtype == 'text') or (regtype == 'fixed') or (regtype == 'insert') or (regtype == 'listitem'):
+                    ptype = 'full'
+                    # check to see if this is a continution from the previous page
+                    if first_para_continued :
+                        ptype = 'end'
+                        first_para_continued = False
+                    (pclass, pdesc) = self.getParaDescription(start,end, regtype)
+                    if pclass and (len(pclass) >= 6) and (ptype == 'full'):
+                        tag = 'p'
+                        if pclass[3:6] == 'h1-' : tag = 'h4'
+                        if pclass[3:6] == 'h2-' : tag = 'h5'
+                        if pclass[3:6] == 'h3-' : tag = 'h6'
+                        hlst.append('<' + tag + ' class="' + pclass + '">')
+                        hlst.append(self.buildParagraph(pclass, pdesc, 'middle', regtype))
+                        hlst.append('</' + tag + '>')
+                    else :
+                        hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype))
+
+                elif (regtype == 'tocentry') :
+                    ptype = 'full'
+                    if first_para_continued :
+                        ptype = 'end'
+                        first_para_continued = False
+                    (pclass, pdesc) = self.getParaDescription(start,end, regtype)
+                    tocinfo += self.buildTOCEntry(pdesc)
+                    hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype))
+
+                elif (regtype == 'vertical') or (regtype == 'table') :
+                    ptype = 'full'
+                    if inGroup:
+                        ptype = 'middle'
+                    if first_para_continued :
+                        ptype = 'end'
+                        first_para_continued = False
+                    (pclass, pdesc) = self.getParaDescription(start, end, regtype)
+                    hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype))
+
+
+                elif (regtype == 'synth_fcvr.center'):
+                    (pos, simgsrc) = self.findinDoc('img.src',start,end)
+                    if simgsrc:
+                        hlst.append('<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc))
+
+                else :
+                    print('          Making region type', regtype, end=' ')
+                    (pos, temp) = self.findinDoc('paragraph',start,end)
+                    (pos2, temp) = self.findinDoc('span',start,end)
+                    if pos != -1 or pos2 != -1:
+                        print(' a "text" region')
+                        orig_regtype = regtype
+                        regtype = 'fixed'
+                        ptype = 'full'
+                        # check to see if this is a continution from the previous page
+                        if first_para_continued :
+                            ptype = 'end'
+                            first_para_continued = False
+                        (pclass, pdesc) = self.getParaDescription(start,end, regtype)
+                        if not pclass:
+                            if orig_regtype.endswith('.right')     : pclass = 'cl-right'
+                            elif orig_regtype.endswith('.center')  : pclass = 'cl-center'
+                            elif orig_regtype.endswith('.left')    : pclass = 'cl-left'
+                            elif orig_regtype.endswith('.justify') : pclass = 'cl-justify'
+                        if pclass and (ptype == 'full') and (len(pclass) >= 6):
+                            tag = 'p'
+                            if pclass[3:6] == 'h1-' : tag = 'h4'
+                            if pclass[3:6] == 'h2-' : tag = 'h5'
+                            if pclass[3:6] == 'h3-' : tag = 'h6'
+                            hlst.append('<' + tag + ' class="' + pclass + '">')
+                            hlst.append(self.buildParagraph(pclass, pdesc, 'middle', regtype))
+                            hlst.append('</' + tag + '>')
+                        else :
+                            hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype))
+                    else :
+                        print(' a "graphic" region')
+                        (pos, simgsrc) = self.findinDoc('img.src',start,end)
+                        if simgsrc:
+                            hlst.append('<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc))
+
+
+        htmlpage = "".join(hlst)
+        if last_para_continued :
+            if htmlpage[-4:] == '</p>':
+                htmlpage = htmlpage[0:-4]
+            last_para_continued = False
+
+        return htmlpage, tocinfo
+
+
+def convert2HTML(flatxml, classlst, fileid, bookDir, gdict, fixedimage):
+    # create a document parser
+    dp = DocParser(flatxml, classlst, fileid, bookDir, gdict, fixedimage)
+    htmlpage, tocinfo = dp.process()
+    return htmlpage, tocinfo