tools v5.0

Introduction of alfcrypto library for speed
Reorganisation of archive plugins,apps,other
This commit is contained in:
Apprentice Alf
2012-03-06 18:24:28 +00:00
parent 882edb6c69
commit 07e532f59c
112 changed files with 11472 additions and 5177 deletions

View File

@@ -68,7 +68,7 @@ class DocParser(object):
ys = []
gdefs = []
# get path defintions, positions, dimensions for each glyph
# get path defintions, positions, dimensions for each glyph
# that makes up the image, and find min x and min y to reposition origin
minx = -1
miny = -1
@@ -79,7 +79,7 @@ class DocParser(object):
xs.append(gxList[j])
if minx == -1: minx = gxList[j]
else : minx = min(minx, gxList[j])
ys.append(gyList[j])
if miny == -1: miny = gyList[j]
else : miny = min(miny, gyList[j])
@@ -124,12 +124,12 @@ class DocParser(object):
item = self.docList[pos]
if item.find('=') >= 0:
(name, argres) = item.split('=',1)
else :
else :
name = item
argres = ''
return name, argres
# find tag in doc if within pos to end inclusive
def findinDoc(self, tagpath, pos, end) :
result = None
@@ -142,10 +142,10 @@ class DocParser(object):
item = self.docList[j]
if item.find('=') >= 0:
(name, argres) = item.split('=',1)
else :
else :
name = item
argres = ''
if name.endswith(tagpath) :
if name.endswith(tagpath) :
result = argres
foundat = j
break
@@ -182,13 +182,13 @@ class DocParser(object):
# class names are an issue given topaz may start them with numerals (not allowed),
# use a mix of cases (which cause some browsers problems), and actually
# attach numbers after "_reclustered*" to the end to deal classeses that inherit
# from a base class (but then not actually provide all of these _reclustereed
# from a base class (but then not actually provide all of these _reclustereed
# classes in the stylesheet!
# so we clean this up by lowercasing, prepend 'cl-', and getting any baseclass
# that exists in the stylesheet first, and then adding this specific class
# after
# also some class names have spaces in them so need to convert to dashes
if nclass != None :
nclass = nclass.replace(' ','-')
@@ -211,7 +211,7 @@ class DocParser(object):
return nclass
# develop a sorted description of the starting positions of
# develop a sorted description of the starting positions of
# groups and regions on the page, as well as the page type
def PageDescription(self):
@@ -267,7 +267,7 @@ class DocParser(object):
result = []
# paragraph
(pos, pclass) = self.findinDoc('paragraph.class',start,end)
(pos, pclass) = self.findinDoc('paragraph.class',start,end)
pclass = self.getClass(pclass)
@@ -281,17 +281,22 @@ class DocParser(object):
if (sfirst != None) and (slast != None) :
first = int(sfirst)
last = int(slast)
makeImage = (regtype == 'vertical') or (regtype == 'table')
makeImage = makeImage or (extraglyphs != None)
makeImage = makeImage or (extraglyphs != None)
if self.fixedimage:
makeImage = makeImage or (regtype == 'fixed')
if (pclass != None):
if (pclass != None):
makeImage = makeImage or (pclass.find('.inverted') >= 0)
if self.fixedimage :
makeImage = makeImage or (pclass.find('cl-f-') >= 0)
# before creating an image make sure glyph info exists
gidList = self.getData('info.glyph.glyphID',0,-1)
makeImage = makeImage & (len(gidList) > 0)
if not makeImage :
# standard all word paragraph
for wordnum in xrange(first, last):
@@ -332,10 +337,10 @@ class DocParser(object):
result.append(('svg', num))
return pclass, result
# this type of paragraph may be made up of multiple spans, inline
# word monograms (images), and words with semantic meaning,
# this type of paragraph may be made up of multiple spans, inline
# word monograms (images), and words with semantic meaning,
# plus glyphs used to form starting letter of first word
# need to parse this type line by line
line = start + 1
word_class = ''
@@ -344,7 +349,7 @@ class DocParser(object):
if end == -1 :
end = self.docSize
# seems some xml has last* coming before first* so we have to
# seems some xml has last* coming before first* so we have to
# handle any order
sp_first = -1
sp_last = -1
@@ -382,10 +387,10 @@ class DocParser(object):
ws_last = int(argres)
elif name.endswith('word.class'):
(cname, space) = argres.split('-',1)
if space == '' : space = '0'
if (cname == 'spaceafter') and (int(space) > 0) :
word_class = 'sa'
(cname, space) = argres.split('-',1)
if space == '' : space = '0'
if (cname == 'spaceafter') and (int(space) > 0) :
word_class = 'sa'
elif name.endswith('word.img.src'):
result.append(('img' + word_class, int(argres)))
@@ -416,11 +421,11 @@ class DocParser(object):
result.append(('ocr', wordnum))
ws_first = -1
ws_last = -1
line += 1
return pclass, result
def buildParagraph(self, pclass, pdesc, type, regtype) :
parares = ''
@@ -433,7 +438,7 @@ class DocParser(object):
br_lb = (regtype == 'fixed') or (regtype == 'chapterheading') or (regtype == 'vertical')
handle_links = len(self.link_id) > 0
if (type == 'full') or (type == 'begin') :
parares += '<p' + classres + '>'
@@ -462,7 +467,7 @@ class DocParser(object):
if linktype == 'external' :
linkhref = self.link_href[link-1]
linkhtml = '<a href="%s">' % linkhref
else :
else :
if len(self.link_page) >= link :
ptarget = self.link_page[link-1] - 1
linkhtml = '<a href="#page%04d">' % ptarget
@@ -509,7 +514,7 @@ class DocParser(object):
elif wtype == 'svg' :
sep = ''
parares += '<img src="img/' + self.id + '_%04d.svg" alt="" />' % num
parares += '<img src="img/' + self.id + '_%04d.svg" alt="" />' % num
parares += sep
if len(sep) > 0 : parares = parares[0:-1]
@@ -551,7 +556,7 @@ class DocParser(object):
title = ''
alt_title = ''
linkpage = ''
else :
else :
if len(self.link_page) >= link :
ptarget = self.link_page[link-1] - 1
linkpage = '%04d' % ptarget
@@ -584,7 +589,7 @@ class DocParser(object):
# walk the document tree collecting the information needed
# to build an html page using the ocrText
@@ -602,8 +607,8 @@ class DocParser(object):
# determine if first paragraph is continued from previous page
(pos, self.parastems_stemid) = self.findinDoc('info.paraStems.stemID',0,-1)
first_para_continued = (self.parastems_stemid != None)
first_para_continued = (self.parastems_stemid != None)
# determine if last paragraph is continued onto the next page
(pos, self.paracont_stemid) = self.findinDoc('info.paraCont.stemID',0,-1)
last_para_continued = (self.paracont_stemid != None)
@@ -631,24 +636,24 @@ class DocParser(object):
# get a descriptions of the starting points of the regions
# and groups on the page
(pagetype, pageDesc) = self.PageDescription()
(pagetype, pageDesc) = self.PageDescription()
regcnt = len(pageDesc) - 1
anchorSet = False
breakSet = False
inGroup = False
# process each region on the page and convert what you can to html
for j in xrange(regcnt):
(etype, start) = pageDesc[j]
(ntype, end) = pageDesc[j+1]
# set anchor for link target on this page
if not anchorSet and not first_para_continued:
htmlpage += '<div style="visibility: hidden; height: 0; width: 0;" id="'
htmlpage += '<div style="visibility: hidden; height: 0; width: 0;" id="'
htmlpage += self.id + '" title="pagetype_' + pagetype + '"></div>\n'
anchorSet = True
@@ -660,7 +665,7 @@ class DocParser(object):
gcstr = ' class="' + grptype + '"'
htmlpage += '<div' + gcstr + '>'
inGroup = True
elif (etype == 'grpend'):
if inGroup:
htmlpage += '</div>\n'
@@ -676,7 +681,7 @@ class DocParser(object):
htmlpage += '<img src="img/img%04d.jpg" alt="" />' % int(simgsrc)
else:
htmlpage += '<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc)
elif regtype == 'chapterheading' :
(pclass, pdesc) = self.getParaDescription(start,end, regtype)
if not breakSet: