tools v6.0.8
This commit is contained in:
@@ -1,148 +1,82 @@
|
||||
#! /usr/bin/python
|
||||
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
|
||||
|
||||
class Unbuffered:
|
||||
def __init__(self, stream):
|
||||
self.stream = stream
|
||||
def write(self, data):
|
||||
self.stream.write(data)
|
||||
self.stream.flush()
|
||||
def __getattr__(self, attr):
|
||||
return getattr(self.stream, attr)
|
||||
|
||||
import sys
|
||||
sys.stdout=Unbuffered(sys.stdout)
|
||||
|
||||
import csv
|
||||
import os
|
||||
import getopt
|
||||
from struct import pack
|
||||
from struct import unpack
|
||||
|
||||
class TpzDRMError(Exception):
|
||||
pass
|
||||
|
||||
# local support routines
|
||||
if 'calibre' in sys.modules:
|
||||
inCalibre = True
|
||||
else:
|
||||
inCalibre = False
|
||||
|
||||
if inCalibre :
|
||||
from calibre_plugins.dedrm import convert2xml
|
||||
from calibre_plugins.dedrm import flatxml2html
|
||||
from calibre_plugins.dedrm import flatxml2svg
|
||||
from calibre_plugins.dedrm import stylexml2css
|
||||
else :
|
||||
import convert2xml
|
||||
import flatxml2html
|
||||
import flatxml2svg
|
||||
import stylexml2css
|
||||
|
||||
# global switch
|
||||
buildXML = False
|
||||
|
||||
# Get a 7 bit encoded number from a file
|
||||
def readEncodedNumber(file):
|
||||
flag = False
|
||||
c = file.read(1)
|
||||
if (len(c) == 0):
|
||||
return None
|
||||
data = ord(c)
|
||||
if data == 0xFF:
|
||||
flag = True
|
||||
c = file.read(1)
|
||||
if (len(c) == 0):
|
||||
return None
|
||||
data = ord(c)
|
||||
if data >= 0x80:
|
||||
datax = (data & 0x7F)
|
||||
while data >= 0x80 :
|
||||
c = file.read(1)
|
||||
if (len(c) == 0):
|
||||
return None
|
||||
data = ord(c)
|
||||
datax = (datax <<7) + (data & 0x7F)
|
||||
data = datax
|
||||
if flag:
|
||||
data = -data
|
||||
return data
|
||||
|
||||
# Get a length prefixed string from the file
|
||||
def lengthPrefixString(data):
|
||||
return encodeNumber(len(data))+data
|
||||
|
||||
def readString(file):
|
||||
stringLength = readEncodedNumber(file)
|
||||
if (stringLength == None):
|
||||
return None
|
||||
sv = file.read(stringLength)
|
||||
if (len(sv) != stringLength):
|
||||
return ""
|
||||
return unpack(str(stringLength)+"s",sv)[0]
|
||||
|
||||
def getMetaArray(metaFile):
|
||||
# parse the meta file
|
||||
result = {}
|
||||
fo = file(metaFile,'rb')
|
||||
size = readEncodedNumber(fo)
|
||||
for i in xrange(size):
|
||||
tag = readString(fo)
|
||||
value = readString(fo)
|
||||
result[tag] = value
|
||||
# print tag, value
|
||||
fo.close()
|
||||
return result
|
||||
|
||||
|
||||
# dictionary of all text strings by index value
|
||||
class Dictionary(object):
|
||||
def __init__(self, dictFile):
|
||||
self.filename = dictFile
|
||||
self.size = 0
|
||||
self.fo = file(dictFile,'rb')
|
||||
self.stable = []
|
||||
self.size = readEncodedNumber(self.fo)
|
||||
for i in xrange(self.size):
|
||||
self.stable.append(self.escapestr(readString(self.fo)))
|
||||
self.pos = 0
|
||||
def escapestr(self, str):
|
||||
str = str.replace('&','&')
|
||||
str = str.replace('<','<')
|
||||
str = str.replace('>','>')
|
||||
str = str.replace('=','=')
|
||||
return str
|
||||
def lookup(self,val):
|
||||
if ((val >= 0) and (val < self.size)) :
|
||||
self.pos = val
|
||||
return self.stable[self.pos]
|
||||
else:
|
||||
print "Error: %d outside of string table limits" % val
|
||||
raise TpzDRMError('outside or string table limits')
|
||||
# sys.exit(-1)
|
||||
def getSize(self):
|
||||
return self.size
|
||||
def getPos(self):
|
||||
return self.pos
|
||||
|
||||
|
||||
class PageDimParser(object):
|
||||
def __init__(self, flatxml):
|
||||
class PParser(object):
|
||||
def __init__(self, gd, flatxml, meta_array):
|
||||
self.gd = gd
|
||||
self.flatdoc = flatxml.split('\n')
|
||||
# find tag if within pos to end inclusive
|
||||
self.docSize = len(self.flatdoc)
|
||||
self.temp = []
|
||||
|
||||
self.ph = -1
|
||||
self.pw = -1
|
||||
startpos = self.posinDoc('page.h') or self.posinDoc('book.h')
|
||||
for p in startpos:
|
||||
(name, argres) = self.lineinDoc(p)
|
||||
self.ph = max(self.ph, int(argres))
|
||||
startpos = self.posinDoc('page.w') or self.posinDoc('book.w')
|
||||
for p in startpos:
|
||||
(name, argres) = self.lineinDoc(p)
|
||||
self.pw = max(self.pw, int(argres))
|
||||
|
||||
if self.ph <= 0:
|
||||
self.ph = int(meta_array.get('pageHeight', '11000'))
|
||||
if self.pw <= 0:
|
||||
self.pw = int(meta_array.get('pageWidth', '8500'))
|
||||
|
||||
res = []
|
||||
startpos = self.posinDoc('info.glyph.x')
|
||||
for p in startpos:
|
||||
argres = self.getDataatPos('info.glyph.x', p)
|
||||
res.extend(argres)
|
||||
self.gx = res
|
||||
|
||||
res = []
|
||||
startpos = self.posinDoc('info.glyph.y')
|
||||
for p in startpos:
|
||||
argres = self.getDataatPos('info.glyph.y', p)
|
||||
res.extend(argres)
|
||||
self.gy = res
|
||||
|
||||
res = []
|
||||
startpos = self.posinDoc('info.glyph.glyphID')
|
||||
for p in startpos:
|
||||
argres = self.getDataatPos('info.glyph.glyphID', p)
|
||||
res.extend(argres)
|
||||
self.gid = res
|
||||
|
||||
|
||||
# return tag at line pos in document
|
||||
def lineinDoc(self, pos) :
|
||||
if (pos >= 0) and (pos < self.docSize) :
|
||||
item = self.flatdoc[pos]
|
||||
if item.find('=') >= 0:
|
||||
(name, argres) = item.split('=',1)
|
||||
else :
|
||||
name = item
|
||||
argres = ''
|
||||
return name, argres
|
||||
|
||||
# find tag in doc if within pos to end inclusive
|
||||
def findinDoc(self, tagpath, pos, end) :
|
||||
result = None
|
||||
docList = self.flatdoc
|
||||
cnt = len(docList)
|
||||
if end == -1 :
|
||||
end = cnt
|
||||
end = self.docSize
|
||||
else:
|
||||
end = min(cnt,end)
|
||||
end = min(self.docSize, end)
|
||||
foundat = -1
|
||||
for j in xrange(pos, end):
|
||||
item = docList[j]
|
||||
item = self.flatdoc[j]
|
||||
if item.find('=') >= 0:
|
||||
(name, argres) = item.split('=')
|
||||
(name, argres) = item.split('=',1)
|
||||
else :
|
||||
name = item
|
||||
argres = ''
|
||||
@@ -151,44 +85,19 @@ class PageDimParser(object):
|
||||
foundat = j
|
||||
break
|
||||
return foundat, result
|
||||
def process(self):
|
||||
(pos, sph) = self.findinDoc('page.h',0,-1)
|
||||
(pos, spw) = self.findinDoc('page.w',0,-1)
|
||||
if (sph == None): sph = '-1'
|
||||
if (spw == None): spw = '-1'
|
||||
return sph, spw
|
||||
|
||||
def getPageDim(flatxml):
|
||||
# create a document parser
|
||||
dp = PageDimParser(flatxml)
|
||||
(ph, pw) = dp.process()
|
||||
return ph, pw
|
||||
# return list of start positions for the tagpath
|
||||
def posinDoc(self, tagpath):
|
||||
startpos = []
|
||||
pos = 0
|
||||
res = ""
|
||||
while res != None :
|
||||
(foundpos, res) = self.findinDoc(tagpath, pos, -1)
|
||||
if res != None :
|
||||
startpos.append(foundpos)
|
||||
pos = foundpos + 1
|
||||
return startpos
|
||||
|
||||
class GParser(object):
|
||||
def __init__(self, flatxml):
|
||||
self.flatdoc = flatxml.split('\n')
|
||||
self.dpi = 1440
|
||||
self.gh = self.getData('info.glyph.h')
|
||||
self.gw = self.getData('info.glyph.w')
|
||||
self.guse = self.getData('info.glyph.use')
|
||||
if self.guse :
|
||||
self.count = len(self.guse)
|
||||
else :
|
||||
self.count = 0
|
||||
self.gvtx = self.getData('info.glyph.vtx')
|
||||
self.glen = self.getData('info.glyph.len')
|
||||
self.gdpi = self.getData('info.glyph.dpi')
|
||||
self.vx = self.getData('info.vtx.x')
|
||||
self.vy = self.getData('info.vtx.y')
|
||||
self.vlen = self.getData('info.len.n')
|
||||
if self.vlen :
|
||||
self.glen.append(len(self.vlen))
|
||||
elif self.glen:
|
||||
self.glen.append(0)
|
||||
if self.vx :
|
||||
self.gvtx.append(len(self.vx))
|
||||
elif self.gvtx :
|
||||
self.gvtx.append(0)
|
||||
def getData(self, path):
|
||||
result = None
|
||||
cnt = len(self.flatdoc)
|
||||
@@ -200,522 +109,141 @@ class GParser(object):
|
||||
else:
|
||||
name = item
|
||||
argres = []
|
||||
if (name == path):
|
||||
if (name.endswith(path)):
|
||||
result = argres
|
||||
break
|
||||
if (len(argres) > 0) :
|
||||
for j in xrange(0,len(argres)):
|
||||
argres[j] = int(argres[j])
|
||||
return result
|
||||
def getGlyphDim(self, gly):
|
||||
if self.gdpi[gly] == 0:
|
||||
return 0, 0
|
||||
maxh = (self.gh[gly] * self.dpi) / self.gdpi[gly]
|
||||
maxw = (self.gw[gly] * self.dpi) / self.gdpi[gly]
|
||||
return maxh, maxw
|
||||
def getPath(self, gly):
|
||||
path = ''
|
||||
if (gly < 0) or (gly >= self.count):
|
||||
return path
|
||||
tx = self.vx[self.gvtx[gly]:self.gvtx[gly+1]]
|
||||
ty = self.vy[self.gvtx[gly]:self.gvtx[gly+1]]
|
||||
p = 0
|
||||
for k in xrange(self.glen[gly], self.glen[gly+1]):
|
||||
if (p == 0):
|
||||
zx = tx[0:self.vlen[k]+1]
|
||||
zy = ty[0:self.vlen[k]+1]
|
||||
|
||||
def getDataatPos(self, path, pos):
|
||||
result = None
|
||||
item = self.flatdoc[pos]
|
||||
if item.find('=') >= 0:
|
||||
(name, argt) = item.split('=')
|
||||
argres = argt.split('|')
|
||||
else:
|
||||
name = item
|
||||
argres = []
|
||||
if (len(argres) > 0) :
|
||||
for j in xrange(0,len(argres)):
|
||||
argres[j] = int(argres[j])
|
||||
if (name.endswith(path)):
|
||||
result = argres
|
||||
return result
|
||||
|
||||
def getDataTemp(self, path):
|
||||
result = None
|
||||
cnt = len(self.temp)
|
||||
for j in xrange(cnt):
|
||||
item = self.temp[j]
|
||||
if item.find('=') >= 0:
|
||||
(name, argt) = item.split('=')
|
||||
argres = argt.split('|')
|
||||
else:
|
||||
zx = tx[self.vlen[k-1]+1:self.vlen[k]+1]
|
||||
zy = ty[self.vlen[k-1]+1:self.vlen[k]+1]
|
||||
p += 1
|
||||
j = 0
|
||||
while ( j < len(zx) ):
|
||||
if (j == 0):
|
||||
# Start Position.
|
||||
path += 'M %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly])
|
||||
elif (j <= len(zx)-3):
|
||||
# Cubic Bezier Curve
|
||||
path += 'C %d %d %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[j+1] * self.dpi / self.gdpi[gly], zy[j+1] * self.dpi / self.gdpi[gly], zx[j+2] * self.dpi / self.gdpi[gly], zy[j+2] * self.dpi / self.gdpi[gly])
|
||||
j += 2
|
||||
elif (j == len(zx)-2):
|
||||
# Cubic Bezier Curve to Start Position
|
||||
path += 'C %d %d %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[j+1] * self.dpi / self.gdpi[gly], zy[j+1] * self.dpi / self.gdpi[gly], zx[0] * self.dpi / self.gdpi[gly], zy[0] * self.dpi / self.gdpi[gly])
|
||||
j += 1
|
||||
elif (j == len(zx)-1):
|
||||
# Quadratic Bezier Curve to Start Position
|
||||
path += 'Q %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[0] * self.dpi / self.gdpi[gly], zy[0] * self.dpi / self.gdpi[gly])
|
||||
name = item
|
||||
argres = []
|
||||
if (name.endswith(path)):
|
||||
result = argres
|
||||
self.temp.pop(j)
|
||||
break
|
||||
if (len(argres) > 0) :
|
||||
for j in xrange(0,len(argres)):
|
||||
argres[j] = int(argres[j])
|
||||
return result
|
||||
|
||||
j += 1
|
||||
path += 'z'
|
||||
return path
|
||||
def getImages(self):
|
||||
result = []
|
||||
self.temp = self.flatdoc
|
||||
while (self.getDataTemp('img') != None):
|
||||
h = self.getDataTemp('img.h')[0]
|
||||
w = self.getDataTemp('img.w')[0]
|
||||
x = self.getDataTemp('img.x')[0]
|
||||
y = self.getDataTemp('img.y')[0]
|
||||
src = self.getDataTemp('img.src')[0]
|
||||
result.append('<image xlink:href="../img/img%04d.jpg" x="%d" y="%d" width="%d" height="%d" />\n' % (src, x, y, w, h))
|
||||
return result
|
||||
|
||||
def getGlyphs(self):
|
||||
result = []
|
||||
if (self.gid != None) and (len(self.gid) > 0):
|
||||
glyphs = []
|
||||
for j in set(self.gid):
|
||||
glyphs.append(j)
|
||||
glyphs.sort()
|
||||
for gid in glyphs:
|
||||
id='id="gl%d"' % gid
|
||||
path = self.gd.lookup(id)
|
||||
if path:
|
||||
result.append(id + ' ' + path)
|
||||
return result
|
||||
|
||||
|
||||
|
||||
# dictionary of all text strings by index value
|
||||
class GlyphDict(object):
|
||||
def __init__(self):
|
||||
self.gdict = {}
|
||||
def lookup(self, id):
|
||||
# id='id="gl%d"' % val
|
||||
if id in self.gdict:
|
||||
return self.gdict[id]
|
||||
return None
|
||||
def addGlyph(self, val, path):
|
||||
id='id="gl%d"' % val
|
||||
self.gdict[id] = path
|
||||
|
||||
|
||||
def generateBook(bookDir, raw, fixedimage):
|
||||
# sanity check Topaz file extraction
|
||||
if not os.path.exists(bookDir) :
|
||||
print "Can not find directory with unencrypted book"
|
||||
return 1
|
||||
|
||||
dictFile = os.path.join(bookDir,'dict0000.dat')
|
||||
if not os.path.exists(dictFile) :
|
||||
print "Can not find dict0000.dat file"
|
||||
return 1
|
||||
|
||||
pageDir = os.path.join(bookDir,'page')
|
||||
if not os.path.exists(pageDir) :
|
||||
print "Can not find page directory in unencrypted book"
|
||||
return 1
|
||||
|
||||
imgDir = os.path.join(bookDir,'img')
|
||||
if not os.path.exists(imgDir) :
|
||||
print "Can not find image directory in unencrypted book"
|
||||
return 1
|
||||
|
||||
glyphsDir = os.path.join(bookDir,'glyphs')
|
||||
if not os.path.exists(glyphsDir) :
|
||||
print "Can not find glyphs directory in unencrypted book"
|
||||
return 1
|
||||
|
||||
metaFile = os.path.join(bookDir,'metadata0000.dat')
|
||||
if not os.path.exists(metaFile) :
|
||||
print "Can not find metadata0000.dat in unencrypted book"
|
||||
return 1
|
||||
|
||||
svgDir = os.path.join(bookDir,'svg')
|
||||
if not os.path.exists(svgDir) :
|
||||
os.makedirs(svgDir)
|
||||
|
||||
if buildXML:
|
||||
xmlDir = os.path.join(bookDir,'xml')
|
||||
if not os.path.exists(xmlDir) :
|
||||
os.makedirs(xmlDir)
|
||||
|
||||
otherFile = os.path.join(bookDir,'other0000.dat')
|
||||
if not os.path.exists(otherFile) :
|
||||
print "Can not find other0000.dat in unencrypted book"
|
||||
return 1
|
||||
|
||||
print "Updating to color images if available"
|
||||
spath = os.path.join(bookDir,'color_img')
|
||||
dpath = os.path.join(bookDir,'img')
|
||||
filenames = os.listdir(spath)
|
||||
filenames = sorted(filenames)
|
||||
for filename in filenames:
|
||||
imgname = filename.replace('color','img')
|
||||
sfile = os.path.join(spath,filename)
|
||||
dfile = os.path.join(dpath,imgname)
|
||||
imgdata = file(sfile,'rb').read()
|
||||
file(dfile,'wb').write(imgdata)
|
||||
|
||||
print "Creating cover.jpg"
|
||||
isCover = False
|
||||
cpath = os.path.join(bookDir,'img')
|
||||
cpath = os.path.join(cpath,'img0000.jpg')
|
||||
if os.path.isfile(cpath):
|
||||
cover = file(cpath, 'rb').read()
|
||||
cpath = os.path.join(bookDir,'cover.jpg')
|
||||
file(cpath, 'wb').write(cover)
|
||||
isCover = True
|
||||
|
||||
|
||||
print 'Processing Dictionary'
|
||||
dict = Dictionary(dictFile)
|
||||
|
||||
print 'Processing Meta Data and creating OPF'
|
||||
meta_array = getMetaArray(metaFile)
|
||||
|
||||
# replace special chars in title and authors like & < >
|
||||
title = meta_array.get('Title','No Title Provided')
|
||||
title = title.replace('&','&')
|
||||
title = title.replace('<','<')
|
||||
title = title.replace('>','>')
|
||||
meta_array['Title'] = title
|
||||
authors = meta_array.get('Authors','No Authors Provided')
|
||||
authors = authors.replace('&','&')
|
||||
authors = authors.replace('<','<')
|
||||
authors = authors.replace('>','>')
|
||||
meta_array['Authors'] = authors
|
||||
|
||||
if buildXML:
|
||||
xname = os.path.join(xmlDir, 'metadata.xml')
|
||||
mlst = []
|
||||
for key in meta_array:
|
||||
mlst.append('<meta name="' + key + '" content="' + meta_array[key] + '" />\n')
|
||||
metastr = "".join(mlst)
|
||||
mlst = None
|
||||
file(xname, 'wb').write(metastr)
|
||||
|
||||
print 'Processing StyleSheet'
|
||||
|
||||
# get some scaling info from metadata to use while processing styles
|
||||
# and first page info
|
||||
|
||||
fontsize = '135'
|
||||
if 'fontSize' in meta_array:
|
||||
fontsize = meta_array['fontSize']
|
||||
|
||||
# also get the size of a normal text page
|
||||
# get the total number of pages unpacked as a safety check
|
||||
filenames = os.listdir(pageDir)
|
||||
numfiles = len(filenames)
|
||||
|
||||
spage = '1'
|
||||
if 'firstTextPage' in meta_array:
|
||||
spage = meta_array['firstTextPage']
|
||||
pnum = int(spage)
|
||||
if pnum >= numfiles or pnum < 0:
|
||||
# metadata is wrong so just select a page near the front
|
||||
# 10% of the book to get a normal text page
|
||||
pnum = int(0.10 * numfiles)
|
||||
# print "first normal text page is", spage
|
||||
|
||||
# get page height and width from first text page for use in stylesheet scaling
|
||||
pname = 'page%04d.dat' % (pnum + 1)
|
||||
fname = os.path.join(pageDir,pname)
|
||||
flat_xml = convert2xml.fromData(dict, fname)
|
||||
|
||||
(ph, pw) = getPageDim(flat_xml)
|
||||
if (ph == '-1') or (ph == '0') : ph = '11000'
|
||||
if (pw == '-1') or (pw == '0') : pw = '8500'
|
||||
meta_array['pageHeight'] = ph
|
||||
meta_array['pageWidth'] = pw
|
||||
if 'fontSize' not in meta_array.keys():
|
||||
meta_array['fontSize'] = fontsize
|
||||
|
||||
# process other.dat for css info and for map of page files to svg images
|
||||
# this map is needed because some pages actually are made up of multiple
|
||||
# pageXXXX.xml files
|
||||
xname = os.path.join(bookDir, 'style.css')
|
||||
flat_xml = convert2xml.fromData(dict, otherFile)
|
||||
|
||||
# extract info.original.pid to get original page information
|
||||
pageIDMap = {}
|
||||
pageidnums = stylexml2css.getpageIDMap(flat_xml)
|
||||
if len(pageidnums) == 0:
|
||||
filenames = os.listdir(pageDir)
|
||||
numfiles = len(filenames)
|
||||
for k in range(numfiles):
|
||||
pageidnums.append(k)
|
||||
# create a map from page ids to list of page file nums to process for that page
|
||||
for i in range(len(pageidnums)):
|
||||
id = pageidnums[i]
|
||||
if id in pageIDMap.keys():
|
||||
pageIDMap[id].append(i)
|
||||
else:
|
||||
pageIDMap[id] = [i]
|
||||
|
||||
# now get the css info
|
||||
cssstr , classlst = stylexml2css.convert2CSS(flat_xml, fontsize, ph, pw)
|
||||
file(xname, 'wb').write(cssstr)
|
||||
if buildXML:
|
||||
xname = os.path.join(xmlDir, 'other0000.xml')
|
||||
file(xname, 'wb').write(convert2xml.getXML(dict, otherFile))
|
||||
|
||||
print 'Processing Glyphs'
|
||||
gd = GlyphDict()
|
||||
filenames = os.listdir(glyphsDir)
|
||||
filenames = sorted(filenames)
|
||||
glyfname = os.path.join(svgDir,'glyphs.svg')
|
||||
glyfile = open(glyfname, 'w')
|
||||
glyfile.write('<?xml version="1.0" standalone="no"?>\n')
|
||||
glyfile.write('<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n')
|
||||
glyfile.write('<svg width="512" height="512" viewBox="0 0 511 511" xmlns="http://www.w3.org/2000/svg" version="1.1">\n')
|
||||
glyfile.write('<title>Glyphs for %s</title>\n' % meta_array['Title'])
|
||||
glyfile.write('<defs>\n')
|
||||
counter = 0
|
||||
for filename in filenames:
|
||||
# print ' ', filename
|
||||
print '.',
|
||||
fname = os.path.join(glyphsDir,filename)
|
||||
flat_xml = convert2xml.fromData(dict, fname)
|
||||
|
||||
if buildXML:
|
||||
xname = os.path.join(xmlDir, filename.replace('.dat','.xml'))
|
||||
file(xname, 'wb').write(convert2xml.getXML(dict, fname))
|
||||
|
||||
gp = GParser(flat_xml)
|
||||
for i in xrange(0, gp.count):
|
||||
path = gp.getPath(i)
|
||||
maxh, maxw = gp.getGlyphDim(i)
|
||||
fullpath = '<path id="gl%d" d="%s" fill="black" /><!-- width=%d height=%d -->\n' % (counter * 256 + i, path, maxw, maxh)
|
||||
glyfile.write(fullpath)
|
||||
gd.addGlyph(counter * 256 + i, fullpath)
|
||||
counter += 1
|
||||
glyfile.write('</defs>\n')
|
||||
glyfile.write('</svg>\n')
|
||||
glyfile.close()
|
||||
print " "
|
||||
|
||||
|
||||
# start up the html
|
||||
# also build up tocentries while processing html
|
||||
htmlFileName = "book.html"
|
||||
hlst = []
|
||||
hlst.append('<?xml version="1.0" encoding="utf-8"?>\n')
|
||||
hlst.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD XHTML 1.1 Strict//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11-strict.dtd">\n')
|
||||
hlst.append('<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">\n')
|
||||
hlst.append('<head>\n')
|
||||
hlst.append('<meta http-equiv="content-type" content="text/html; charset=utf-8"/>\n')
|
||||
hlst.append('<title>' + meta_array['Title'] + ' by ' + meta_array['Authors'] + '</title>\n')
|
||||
hlst.append('<meta name="Author" content="' + meta_array['Authors'] + '" />\n')
|
||||
hlst.append('<meta name="Title" content="' + meta_array['Title'] + '" />\n')
|
||||
if 'ASIN' in meta_array:
|
||||
hlst.append('<meta name="ASIN" content="' + meta_array['ASIN'] + '" />\n')
|
||||
if 'GUID' in meta_array:
|
||||
hlst.append('<meta name="GUID" content="' + meta_array['GUID'] + '" />\n')
|
||||
hlst.append('<link href="style.css" rel="stylesheet" type="text/css" />\n')
|
||||
hlst.append('</head>\n<body>\n')
|
||||
|
||||
print 'Processing Pages'
|
||||
# Books are at 1440 DPI. This is rendering at twice that size for
|
||||
# readability when rendering to the screen.
|
||||
scaledpi = 1440.0
|
||||
|
||||
filenames = os.listdir(pageDir)
|
||||
filenames = sorted(filenames)
|
||||
numfiles = len(filenames)
|
||||
|
||||
xmllst = []
|
||||
elst = []
|
||||
|
||||
for filename in filenames:
|
||||
# print ' ', filename
|
||||
print ".",
|
||||
fname = os.path.join(pageDir,filename)
|
||||
flat_xml = convert2xml.fromData(dict, fname)
|
||||
|
||||
# keep flat_xml for later svg processing
|
||||
xmllst.append(flat_xml)
|
||||
|
||||
if buildXML:
|
||||
xname = os.path.join(xmlDir, filename.replace('.dat','.xml'))
|
||||
file(xname, 'wb').write(convert2xml.getXML(dict, fname))
|
||||
|
||||
# first get the html
|
||||
pagehtml, tocinfo = flatxml2html.convert2HTML(flat_xml, classlst, fname, bookDir, gd, fixedimage)
|
||||
elst.append(tocinfo)
|
||||
hlst.append(pagehtml)
|
||||
|
||||
# finish up the html string and output it
|
||||
hlst.append('</body>\n</html>\n')
|
||||
htmlstr = "".join(hlst)
|
||||
hlst = None
|
||||
file(os.path.join(bookDir, htmlFileName), 'wb').write(htmlstr)
|
||||
|
||||
print " "
|
||||
print 'Extracting Table of Contents from Amazon OCR'
|
||||
|
||||
# first create a table of contents file for the svg images
|
||||
tlst = []
|
||||
tlst.append('<?xml version="1.0" encoding="utf-8"?>\n')
|
||||
tlst.append('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n')
|
||||
tlst.append('<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" >')
|
||||
tlst.append('<head>\n')
|
||||
tlst.append('<title>' + meta_array['Title'] + '</title>\n')
|
||||
tlst.append('<meta name="Author" content="' + meta_array['Authors'] + '" />\n')
|
||||
tlst.append('<meta name="Title" content="' + meta_array['Title'] + '" />\n')
|
||||
if 'ASIN' in meta_array:
|
||||
tlst.append('<meta name="ASIN" content="' + meta_array['ASIN'] + '" />\n')
|
||||
if 'GUID' in meta_array:
|
||||
tlst.append('<meta name="GUID" content="' + meta_array['GUID'] + '" />\n')
|
||||
tlst.append('</head>\n')
|
||||
tlst.append('<body>\n')
|
||||
|
||||
tlst.append('<h2>Table of Contents</h2>\n')
|
||||
start = pageidnums[0]
|
||||
def convert2SVG(gdict, flat_xml, pageid, previd, nextid, svgDir, raw, meta_array, scaledpi):
|
||||
mlst = []
|
||||
pp = PParser(gdict, flat_xml, meta_array)
|
||||
mlst.append('<?xml version="1.0" standalone="no"?>\n')
|
||||
if (raw):
|
||||
startname = 'page%04d.svg' % start
|
||||
mlst.append('<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n')
|
||||
mlst.append('<svg width="%fin" height="%fin" viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1">\n' % (pp.pw / scaledpi, pp.ph / scaledpi, pp.pw -1, pp.ph -1))
|
||||
mlst.append('<title>Page %d - %s by %s</title>\n' % (pageid, meta_array['Title'],meta_array['Authors']))
|
||||
else:
|
||||
startname = 'page%04d.xhtml' % start
|
||||
|
||||
tlst.append('<h3><a href="' + startname + '">Start of Book</a></h3>\n')
|
||||
# build up a table of contents for the svg xhtml output
|
||||
tocentries = "".join(elst)
|
||||
elst = None
|
||||
toclst = tocentries.split('\n')
|
||||
toclst.pop()
|
||||
for entry in toclst:
|
||||
print entry
|
||||
title, pagenum = entry.split('|')
|
||||
id = pageidnums[int(pagenum)]
|
||||
if (raw):
|
||||
fname = 'page%04d.svg' % id
|
||||
mlst.append('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n')
|
||||
mlst.append('<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" ><head>\n')
|
||||
mlst.append('<title>Page %d - %s by %s</title>\n' % (pageid, meta_array['Title'],meta_array['Authors']))
|
||||
mlst.append('<script><![CDATA[\n')
|
||||
mlst.append('function gd(){var p=window.location.href.replace(/^.*\?dpi=(\d+).*$/i,"$1");return p;}\n')
|
||||
mlst.append('var dpi=%d;\n' % scaledpi)
|
||||
if (previd) :
|
||||
mlst.append('var prevpage="page%04d.xhtml";\n' % (previd))
|
||||
if (nextid) :
|
||||
mlst.append('var nextpage="page%04d.xhtml";\n' % (nextid))
|
||||
mlst.append('var pw=%d;var ph=%d;' % (pp.pw, pp.ph))
|
||||
mlst.append('function zoomin(){dpi=dpi*(0.8);setsize();}\n')
|
||||
mlst.append('function zoomout(){dpi=dpi*1.25;setsize();}\n')
|
||||
mlst.append('function setsize(){var svg=document.getElementById("svgimg");var prev=document.getElementById("prevsvg");var next=document.getElementById("nextsvg");var width=(pw/dpi)+"in";var height=(ph/dpi)+"in";svg.setAttribute("width",width);svg.setAttribute("height",height);prev.setAttribute("height",height);prev.setAttribute("width","50px");next.setAttribute("height",height);next.setAttribute("width","50px");}\n')
|
||||
mlst.append('function ppage(){window.location.href=prevpage+"?dpi="+Math.round(dpi);}\n')
|
||||
mlst.append('function npage(){window.location.href=nextpage+"?dpi="+Math.round(dpi);}\n')
|
||||
mlst.append('var gt=gd();if(gt>0){dpi=gt;}\n')
|
||||
mlst.append('window.onload=setsize;\n')
|
||||
mlst.append(']]></script>\n')
|
||||
mlst.append('</head>\n')
|
||||
mlst.append('<body onLoad="setsize();" style="background-color:#777;text-align:center;">\n')
|
||||
mlst.append('<div style="white-space:nowrap;">\n')
|
||||
if previd == None:
|
||||
mlst.append('<a href="javascript:ppage();"><svg id="prevsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"></svg></a>\n')
|
||||
else:
|
||||
fname = 'page%04d.xhtml' % id
|
||||
tlst.append('<h3><a href="'+ fname + '">' + title + '</a></h3>\n')
|
||||
tlst.append('</body>\n')
|
||||
tlst.append('</html>\n')
|
||||
tochtml = "".join(tlst)
|
||||
file(os.path.join(svgDir, 'toc.xhtml'), 'wb').write(tochtml)
|
||||
mlst.append('<a href="javascript:ppage();"><svg id="prevsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"><polygon points="5,150,95,5,95,295" fill="#AAAAAA" /></svg></a>\n')
|
||||
|
||||
|
||||
# now create index_svg.xhtml that points to all required files
|
||||
slst = []
|
||||
slst.append('<?xml version="1.0" encoding="utf-8"?>\n')
|
||||
slst.append('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n')
|
||||
slst.append('<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" >')
|
||||
slst.append('<head>\n')
|
||||
slst.append('<title>' + meta_array['Title'] + '</title>\n')
|
||||
slst.append('<meta name="Author" content="' + meta_array['Authors'] + '" />\n')
|
||||
slst.append('<meta name="Title" content="' + meta_array['Title'] + '" />\n')
|
||||
if 'ASIN' in meta_array:
|
||||
slst.append('<meta name="ASIN" content="' + meta_array['ASIN'] + '" />\n')
|
||||
if 'GUID' in meta_array:
|
||||
slst.append('<meta name="GUID" content="' + meta_array['GUID'] + '" />\n')
|
||||
slst.append('</head>\n')
|
||||
slst.append('<body>\n')
|
||||
|
||||
print "Building svg images of each book page"
|
||||
slst.append('<h2>List of Pages</h2>\n')
|
||||
slst.append('<div>\n')
|
||||
idlst = sorted(pageIDMap.keys())
|
||||
numids = len(idlst)
|
||||
cnt = len(idlst)
|
||||
previd = None
|
||||
for j in range(cnt):
|
||||
pageid = idlst[j]
|
||||
if j < cnt - 1:
|
||||
nextid = idlst[j+1]
|
||||
else:
|
||||
nextid = None
|
||||
print '.',
|
||||
pagelst = pageIDMap[pageid]
|
||||
flst = []
|
||||
for page in pagelst:
|
||||
flst.append(xmllst[page])
|
||||
flat_svg = "".join(flst)
|
||||
flst=None
|
||||
svgxml = flatxml2svg.convert2SVG(gd, flat_svg, pageid, previd, nextid, svgDir, raw, meta_array, scaledpi)
|
||||
if (raw) :
|
||||
pfile = open(os.path.join(svgDir,'page%04d.svg' % pageid),'w')
|
||||
slst.append('<a href="svg/page%04d.svg">Page %d</a>\n' % (pageid, pageid))
|
||||
mlst.append('<a href="javascript:npage();"><svg id="svgimg" viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" style="background-color:#FFF;border:1px solid black;">' % (pp.pw, pp.ph))
|
||||
if (pp.gid != None):
|
||||
mlst.append('<defs>\n')
|
||||
gdefs = pp.getGlyphs()
|
||||
for j in xrange(0,len(gdefs)):
|
||||
mlst.append(gdefs[j])
|
||||
mlst.append('</defs>\n')
|
||||
img = pp.getImages()
|
||||
if (img != None):
|
||||
for j in xrange(0,len(img)):
|
||||
mlst.append(img[j])
|
||||
if (pp.gid != None):
|
||||
for j in xrange(0,len(pp.gid)):
|
||||
mlst.append('<use xlink:href="#gl%d" x="%d" y="%d" />\n' % (pp.gid[j], pp.gx[j], pp.gy[j]))
|
||||
if (img == None or len(img) == 0) and (pp.gid == None or len(pp.gid) == 0):
|
||||
xpos = "%d" % (pp.pw // 3)
|
||||
ypos = "%d" % (pp.ph // 3)
|
||||
mlst.append('<text x="' + xpos + '" y="' + ypos + '" font-size="' + meta_array['fontSize'] + '" font-family="Helvetica" stroke="black">This page intentionally left blank.</text>\n')
|
||||
if (raw) :
|
||||
mlst.append('</svg>')
|
||||
else :
|
||||
mlst.append('</svg></a>\n')
|
||||
if nextid == None:
|
||||
mlst.append('<a href="javascript:npage();"><svg id="nextsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"></svg></a>\n')
|
||||
else :
|
||||
pfile = open(os.path.join(svgDir,'page%04d.xhtml' % pageid), 'w')
|
||||
slst.append('<a href="svg/page%04d.xhtml">Page %d</a>\n' % (pageid, pageid))
|
||||
previd = pageid
|
||||
pfile.write(svgxml)
|
||||
pfile.close()
|
||||
counter += 1
|
||||
slst.append('</div>\n')
|
||||
slst.append('<h2><a href="svg/toc.xhtml">Table of Contents</a></h2>\n')
|
||||
slst.append('</body>\n</html>\n')
|
||||
svgindex = "".join(slst)
|
||||
slst = None
|
||||
file(os.path.join(bookDir, 'index_svg.xhtml'), 'wb').write(svgindex)
|
||||
|
||||
print " "
|
||||
|
||||
# build the opf file
|
||||
opfname = os.path.join(bookDir, 'book.opf')
|
||||
olst = []
|
||||
olst.append('<?xml version="1.0" encoding="utf-8"?>\n')
|
||||
olst.append('<package xmlns="http://www.idpf.org/2007/opf" unique-identifier="guid_id">\n')
|
||||
# adding metadata
|
||||
olst.append(' <metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">\n')
|
||||
if 'GUID' in meta_array:
|
||||
olst.append(' <dc:identifier opf:scheme="GUID" id="guid_id">' + meta_array['GUID'] + '</dc:identifier>\n')
|
||||
if 'ASIN' in meta_array:
|
||||
olst.append(' <dc:identifier opf:scheme="ASIN">' + meta_array['ASIN'] + '</dc:identifier>\n')
|
||||
if 'oASIN' in meta_array:
|
||||
olst.append(' <dc:identifier opf:scheme="oASIN">' + meta_array['oASIN'] + '</dc:identifier>\n')
|
||||
olst.append(' <dc:title>' + meta_array['Title'] + '</dc:title>\n')
|
||||
olst.append(' <dc:creator opf:role="aut">' + meta_array['Authors'] + '</dc:creator>\n')
|
||||
olst.append(' <dc:language>en</dc:language>\n')
|
||||
olst.append(' <dc:date>' + meta_array['UpdateTime'] + '</dc:date>\n')
|
||||
if isCover:
|
||||
olst.append(' <meta name="cover" content="bookcover"/>\n')
|
||||
olst.append(' </metadata>\n')
|
||||
olst.append('<manifest>\n')
|
||||
olst.append(' <item id="book" href="book.html" media-type="application/xhtml+xml"/>\n')
|
||||
olst.append(' <item id="stylesheet" href="style.css" media-type="text/css"/>\n')
|
||||
# adding image files to manifest
|
||||
filenames = os.listdir(imgDir)
|
||||
filenames = sorted(filenames)
|
||||
for filename in filenames:
|
||||
imgname, imgext = os.path.splitext(filename)
|
||||
if imgext == '.jpg':
|
||||
imgext = 'jpeg'
|
||||
if imgext == '.svg':
|
||||
imgext = 'svg+xml'
|
||||
olst.append(' <item id="' + imgname + '" href="img/' + filename + '" media-type="image/' + imgext + '"/>\n')
|
||||
if isCover:
|
||||
olst.append(' <item id="bookcover" href="cover.jpg" media-type="image/jpeg" />\n')
|
||||
olst.append('</manifest>\n')
|
||||
# adding spine
|
||||
olst.append('<spine>\n <itemref idref="book" />\n</spine>\n')
|
||||
if isCover:
|
||||
olst.append(' <guide>\n')
|
||||
olst.append(' <reference href="cover.jpg" type="cover" title="Cover"/>\n')
|
||||
olst.append(' </guide>\n')
|
||||
olst.append('</package>\n')
|
||||
opfstr = "".join(olst)
|
||||
olst = None
|
||||
file(opfname, 'wb').write(opfstr)
|
||||
|
||||
print 'Processing Complete'
|
||||
|
||||
return 0
|
||||
|
||||
def usage():
|
||||
print "genbook.py generates a book from the extract Topaz Files"
|
||||
print "Usage:"
|
||||
print " genbook.py [-r] [-h [--fixed-image] <bookDir> "
|
||||
print " "
|
||||
print "Options:"
|
||||
print " -h : help - print this usage message"
|
||||
print " -r : generate raw svg files (not wrapped in xhtml)"
|
||||
print " --fixed-image : genearate any Fixed Area as an svg image in the html"
|
||||
print " "
|
||||
|
||||
|
||||
def main(argv):
|
||||
bookDir = ''
|
||||
if len(argv) == 0:
|
||||
argv = sys.argv
|
||||
|
||||
try:
|
||||
opts, args = getopt.getopt(argv[1:], "rh:",["fixed-image"])
|
||||
|
||||
except getopt.GetoptError, err:
|
||||
print str(err)
|
||||
usage()
|
||||
return 1
|
||||
|
||||
if len(opts) == 0 and len(args) == 0 :
|
||||
usage()
|
||||
return 1
|
||||
|
||||
raw = 0
|
||||
fixedimage = True
|
||||
for o, a in opts:
|
||||
if o =="-h":
|
||||
usage()
|
||||
return 0
|
||||
if o =="-r":
|
||||
raw = 1
|
||||
if o =="--fixed-image":
|
||||
fixedimage = True
|
||||
|
||||
bookDir = args[0]
|
||||
|
||||
rv = generateBook(bookDir, raw, fixedimage)
|
||||
return rv
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main(''))
|
||||
mlst.append('<a href="javascript:npage();"><svg id="nextsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"><polygon points="5,5,5,295,95,150" fill="#AAAAAA" /></svg></a>\n')
|
||||
mlst.append('</div>\n')
|
||||
mlst.append('<div><a href="javascript:zoomin();">zoom in</a> - <a href="javascript:zoomout();">zoom out</a></div>\n')
|
||||
mlst.append('</body>\n')
|
||||
mlst.append('</html>\n')
|
||||
return "".join(mlst)
|
||||
|
||||
Reference in New Issue
Block a user