tools v5.0

Introduction of alfcrypto library for speed Reorganisation of archive plugins,apps,other
2012-03-06 18:24:28 +00:00
parent 882edb6c69
commit 07e532f59c
112 changed files with 11472 additions and 5177 deletions
--- a/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/flatxml2html.py
+++ b/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/flatxml2html.py
@@ -68,7 +68,7 @@ class DocParser(object):
        ys = []
        gdefs = []

-        # get path defintions, positions, dimensions for each glyph 
+        # get path defintions, positions, dimensions for each glyph
        # that makes up the image, and find min x and min y to reposition origin
        minx = -1
        miny = -1
@@ -79,7 +79,7 @@ class DocParser(object):
            xs.append(gxList[j])
            if minx == -1: minx = gxList[j]
            else : minx = min(minx, gxList[j])
- 
+
            ys.append(gyList[j])
            if miny == -1: miny = gyList[j]
            else : miny = min(miny, gyList[j])
@@ -124,12 +124,12 @@ class DocParser(object):
            item = self.docList[pos]
            if item.find('=') >= 0:
                (name, argres) = item.split('=',1)
-            else : 
+            else :
                name = item
                argres = ''
        return name, argres

-        
+
    # find tag in doc if within pos to end inclusive
    def findinDoc(self, tagpath, pos, end) :
        result = None
@@ -142,10 +142,10 @@ class DocParser(object):
            item = self.docList[j]
            if item.find('=') >= 0:
                (name, argres) = item.split('=',1)
-            else : 
+            else :
                name = item
                argres = ''
-            if name.endswith(tagpath) : 
+            if name.endswith(tagpath) :
                result = argres
                foundat = j
                break
@@ -182,13 +182,13 @@ class DocParser(object):
        # class names are an issue given topaz may start them with numerals (not allowed),
        # use a mix of cases (which cause some browsers problems), and actually
        # attach numbers after "_reclustered*" to the end to deal classeses that inherit
-        # from a base class (but then not actually provide all of these _reclustereed 
+        # from a base class (but then not actually provide all of these _reclustereed
        # classes in the stylesheet!

        # so we clean this up by lowercasing, prepend 'cl-', and getting any baseclass
        # that exists in the stylesheet first, and then adding this specific class
        # after
-        
+
        # also some class names have spaces in them so need to convert to dashes
        if nclass != None :
            nclass = nclass.replace(' ','-')
@@ -211,7 +211,7 @@ class DocParser(object):
        return nclass


-    # develop a sorted description of the starting positions of 
+    # develop a sorted description of the starting positions of
    # groups and regions on the page, as well as the page type
    def PageDescription(self):

@@ -267,7 +267,7 @@ class DocParser(object):
        result = []

        # paragraph
-        (pos, pclass) = self.findinDoc('paragraph.class',start,end) 
+        (pos, pclass) = self.findinDoc('paragraph.class',start,end)

        pclass = self.getClass(pclass)

@@ -281,17 +281,22 @@ class DocParser(object):
        if (sfirst != None) and (slast != None) :
            first = int(sfirst)
            last = int(slast)
-            
+
            makeImage = (regtype == 'vertical') or (regtype == 'table')
-            makeImage = makeImage or (extraglyphs != None) 
+            makeImage = makeImage or (extraglyphs != None)
            if self.fixedimage:
                makeImage = makeImage or (regtype == 'fixed')

-            if (pclass != None): 
+            if (pclass != None):
                makeImage = makeImage or (pclass.find('.inverted') >= 0)
                if self.fixedimage :
                    makeImage = makeImage or (pclass.find('cl-f-') >= 0)

+            # before creating an image make sure glyph info exists
+            gidList = self.getData('info.glyph.glyphID',0,-1)
+
+            makeImage = makeImage & (len(gidList) > 0)
+
            if not makeImage :
                # standard all word paragraph
                for wordnum in xrange(first, last):
@@ -332,10 +337,10 @@ class DocParser(object):
            result.append(('svg', num))
            return pclass, result

-        # this type of paragraph may be made up of multiple spans, inline 
-        # word monograms (images), and words with semantic meaning, 
+        # this type of paragraph may be made up of multiple spans, inline
+        # word monograms (images), and words with semantic meaning,
        # plus glyphs used to form starting letter of first word
-        
+
        # need to parse this type line by line
        line = start + 1
        word_class = ''
@@ -344,7 +349,7 @@ class DocParser(object):
        if end == -1 :
            end = self.docSize

-        # seems some xml has last* coming before first* so we have to 
+        # seems some xml has last* coming before first* so we have to
        # handle any order
        sp_first = -1
        sp_last = -1
@@ -382,10 +387,10 @@ class DocParser(object):
                ws_last = int(argres)

            elif name.endswith('word.class'):
-               (cname, space) = argres.split('-',1)
-               if space == '' : space = '0'
-               if (cname == 'spaceafter') and (int(space) > 0) :
-                   word_class = 'sa'
+                (cname, space) = argres.split('-',1)
+                if space == '' : space = '0'
+                if (cname == 'spaceafter') and (int(space) > 0) :
+                    word_class = 'sa'

            elif name.endswith('word.img.src'):
                result.append(('img' + word_class, int(argres)))
@@ -416,11 +421,11 @@ class DocParser(object):
                    result.append(('ocr', wordnum))
                ws_first = -1
                ws_last = -1
-                              
+
            line += 1

        return pclass, result
-                            
+

    def buildParagraph(self, pclass, pdesc, type, regtype) :
        parares = ''
@@ -433,7 +438,7 @@ class DocParser(object):
        br_lb = (regtype == 'fixed') or (regtype == 'chapterheading') or (regtype == 'vertical')

        handle_links = len(self.link_id) > 0
-        
+
        if (type == 'full') or (type == 'begin') :
            parares += '<p' + classres + '>'

@@ -462,7 +467,7 @@ class DocParser(object):
                        if linktype == 'external' :
                            linkhref = self.link_href[link-1]
                            linkhtml = '<a href="%s">' % linkhref
-                        else : 
+                        else :
                            if len(self.link_page) >= link :
                                ptarget = self.link_page[link-1] - 1
                                linkhtml = '<a href="#page%04d">' % ptarget
@@ -509,7 +514,7 @@ class DocParser(object):

            elif wtype == 'svg' :
                sep = ''
-                parares += '<img src="img/' + self.id + '_%04d.svg" alt="" />' % num 
+                parares += '<img src="img/' + self.id + '_%04d.svg" alt="" />' % num
                parares += sep

        if len(sep) > 0 : parares = parares[0:-1]
@@ -551,7 +556,7 @@ class DocParser(object):
                            title = ''
                            alt_title = ''
                            linkpage = ''
-                        else : 
+                        else :
                            if len(self.link_page) >= link :
                                ptarget = self.link_page[link-1] - 1
                                linkpage = '%04d' % ptarget
@@ -584,7 +589,7 @@ class DocParser(object):



-    
+
    # walk the document tree collecting the information needed
    # to build an html page using the ocrText

@@ -602,8 +607,8 @@ class DocParser(object):

        # determine if first paragraph is continued from previous page
        (pos, self.parastems_stemid) = self.findinDoc('info.paraStems.stemID',0,-1)
-        first_para_continued = (self.parastems_stemid  != None) 
-        
+        first_para_continued = (self.parastems_stemid  != None)
+
        # determine if last paragraph is continued onto the next page
        (pos, self.paracont_stemid) = self.findinDoc('info.paraCont.stemID',0,-1)
        last_para_continued = (self.paracont_stemid != None)
@@ -631,24 +636,24 @@ class DocParser(object):

        # get a descriptions of the starting points of the regions
        # and groups on the page
-        (pagetype, pageDesc) = self.PageDescription() 
+        (pagetype, pageDesc) = self.PageDescription()
        regcnt = len(pageDesc) - 1

        anchorSet = False
        breakSet = False
        inGroup = False
-        
+
        # process each region on the page and convert what you can to html

        for j in xrange(regcnt):

            (etype, start) = pageDesc[j]
            (ntype, end) = pageDesc[j+1]
-            
+

            # set anchor for link target on this page
            if not anchorSet and not first_para_continued:
-                htmlpage += '<div style="visibility: hidden; height: 0; width: 0;" id="' 
+                htmlpage += '<div style="visibility: hidden; height: 0; width: 0;" id="'
                htmlpage += self.id + '" title="pagetype_' + pagetype + '"></div>\n'
                anchorSet = True

@@ -660,7 +665,7 @@ class DocParser(object):
                        gcstr = ' class="' + grptype + '"'
                        htmlpage += '<div' + gcstr + '>'
                        inGroup = True
-                
+
            elif (etype == 'grpend'):
                if inGroup:
                    htmlpage += '</div>\n'
@@ -676,7 +681,7 @@ class DocParser(object):
                            htmlpage += '<img src="img/img%04d.jpg" alt="" />' % int(simgsrc)
                        else:
                            htmlpage += '<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc)
-            
+
                elif regtype == 'chapterheading' :
                    (pclass, pdesc) = self.getParaDescription(start,end, regtype)
                    if not breakSet: