tools v5.5.3

This commit is contained in:
Apprentice Alf
2012-12-26 23:17:56 +00:00
parent c010e3f77a
commit 602ff30b3a
17 changed files with 142 additions and 63 deletions

View File

@@ -277,6 +277,7 @@ class PageParser(object):
'word_semantic' : (1, 'snippets', 1, 1),
'word_semantic.type' : (1, 'scalar_text', 0, 0),
'word_semantic.class' : (1, 'scalar_text', 0, 0),
'word_semantic.firstWord' : (1, 'scalar_number', 0, 0),
'word_semantic.lastWord' : (1, 'scalar_number', 0, 0),
@@ -287,6 +288,7 @@ class PageParser(object):
'word.lastGlyph' : (1, 'scalar_number', 0, 0),
'_span' : (1, 'snippets', 1, 0),
'_span.class' : (1, 'scalar_text', 0, 0),
'_span.firstWord' : (1, 'scalar_number', 0, 0),
'_span.lastWord' : (1, 'scalar_number', 0, 0),
'_span.gridSize' : (1, 'scalar_number', 0, 0),
@@ -350,16 +352,18 @@ class PageParser(object):
'version.paragraph_continuation' : (1, 'scalar_text', 0, 0),
'version.toc' : (1, 'scalar_text', 0, 0),
'stylesheet' : (1, 'snippets', 1, 0),
'style' : (1, 'snippets', 1, 0),
'style._tag' : (1, 'scalar_text', 0, 0),
'style.type' : (1, 'scalar_text', 0, 0),
'style._parent_type' : (1, 'scalar_text', 0, 0),
'style.class' : (1, 'scalar_text', 0, 0),
'style._after_class' : (1, 'scalar_text', 0, 0),
'rule' : (1, 'snippets', 1, 0),
'rule.attr' : (1, 'scalar_text', 0, 0),
'rule.value' : (1, 'scalar_text', 0, 0),
'stylesheet' : (1, 'snippets', 1, 0),
'style' : (1, 'snippets', 1, 0),
'style._tag' : (1, 'scalar_text', 0, 0),
'style.type' : (1, 'scalar_text', 0, 0),
'style._after_type' : (1, 'scalar_text', 0, 0),
'style._parent_type' : (1, 'scalar_text', 0, 0),
'style._after_parent_type' : (1, 'scalar_text', 0, 0),
'style.class' : (1, 'scalar_text', 0, 0),
'style._after_class' : (1, 'scalar_text', 0, 0),
'rule' : (1, 'snippets', 1, 0),
'rule.attr' : (1, 'scalar_text', 0, 0),
'rule.value' : (1, 'scalar_text', 0, 0),
'original' : (0, 'number', 1, 1),
'original.pnum' : (1, 'number', 0, 0),

View File

@@ -11,15 +11,21 @@ __version__ = '1.01'
import sys
class Unbuffered:
class SafeUnbuffered:
def __init__(self, stream):
self.stream = stream
self.encoding = stream.encoding
if self.encoding == None:
self.encoding = "utf-8"
def write(self, data):
if isinstance(data,unicode):
data = data.encode(self.encoding,"replace")
self.stream.write(data)
self.stream.flush()
def __getattr__(self, attr):
return getattr(self.stream, attr)
sys.stdout=Unbuffered(sys.stdout)
sys.stdout=SafeUnbuffered(sys.stdout)
sys.stderr=SafeUnbuffered(sys.stderr)
import os
import struct
@@ -41,7 +47,7 @@ def getK4PCpids(path_to_ebook):
mobi = False
if mobi:
mb = mobidedrm.MobiBook(path_to_ebook,False)
mb = mobidedrm.MobiBook(path_to_ebook)
else:
mb = topazextract.TopazBook(path_to_ebook)

View File

@@ -10,6 +10,7 @@ import re
from struct import pack
from struct import unpack
debug = False
class DocParser(object):
def __init__(self, flatxml, fontsize, ph, pw):
@@ -113,7 +114,9 @@ class DocParser(object):
# process each style converting what you can
if debug: print ' ', 'Processing styles.'
for j in xrange(stylecnt):
if debug: print ' ', 'Processing style %d' %(j)
start = styleList[j]
end = styleList[j+1]
@@ -132,6 +135,8 @@ class DocParser(object):
else :
sclass = ''
if debug: print 'sclass', sclass
# check for any "after class" specifiers
(pos, aftclass) = self.findinDoc('style._after_class',start,end)
if aftclass != None:
@@ -140,6 +145,8 @@ class DocParser(object):
else :
aftclass = ''
if debug: print 'aftclass', aftclass
cssargs = {}
while True :
@@ -147,6 +154,9 @@ class DocParser(object):
(pos1, attr) = self.findinDoc('style.rule.attr', start, end)
(pos2, val) = self.findinDoc('style.rule.value', start, end)
if debug: print 'attr', attr
if debug: print 'val', val
if attr == None : break
if (attr == 'display') or (attr == 'pos') or (attr == 'align'):
@@ -164,7 +174,7 @@ class DocParser(object):
scale = self.pw
elif attr == 'line-space':
scale = self.fontsize * 2.0
if val == "":
val = 0
@@ -179,6 +189,7 @@ class DocParser(object):
if aftclass != "" : keep = False
if keep :
if debug: print 'keeping style'
# make sure line-space does not go below 100% or above 300% since
# it can be wacky in some styles
if 'line-space' in cssargs:
@@ -256,7 +267,9 @@ def convert2CSS(flatxml, fontsize, ph, pw):
# create a document parser
dp = DocParser(flatxml, fontsize, ph, pw)
if debug: print ' ', 'Created DocParser.'
csspage = dp.process()
if debug: print ' ', 'Processed DocParser.'
return csspage

View File

@@ -69,6 +69,9 @@ def unicode_argv():
argvencoding = 'utf-8'
return [arg if (type(arg) == unicode) else unicode(arg,argvencoding) for arg in sys.argv]
#global switch
debug = False
if 'calibre' in sys.modules:
inCalibre = True
from calibre_plugins.k4mobidedrm import kgenpids
@@ -206,6 +209,7 @@ class TopazBook:
# Read and return the data of one header record at the current book file position
# [[offset,decompressedLength,compressedLength],...]
nbValues = bookReadEncodedNumber(self.fo)
if debug: print "%d records in header " % nbValues,
values = []
for i in range (0,nbValues):
values.append([bookReadEncodedNumber(self.fo),bookReadEncodedNumber(self.fo),bookReadEncodedNumber(self.fo)])
@@ -219,9 +223,10 @@ class TopazBook:
record = bookReadHeaderRecordData()
return [tag,record]
nbRecords = bookReadEncodedNumber(self.fo)
if debug: print "Headers: %d" % nbRecords
for i in range (0,nbRecords):
result = parseTopazHeaderRecord()
# print result[0], result[1]
if debug: print result[0], ": ", result[1]
self.bookHeaderRecords[result[0]] = result[1]
if ord(self.fo.read(1)) != 0x64 :
raise DrmException(u"Parse Error : Invalid Header")
@@ -235,12 +240,12 @@ class TopazBook:
raise DrmException(u"Parse Error : Record Names Don't Match")
flags = ord(self.fo.read(1))
nbRecords = ord(self.fo.read(1))
# print nbRecords
if debug: print "Metadata Records: %d" % nbRecords
for i in range (0,nbRecords) :
keyval = bookReadString(self.fo)
content = bookReadString(self.fo)
# print keyval
# print content
if debug: print keyval
if debug: print content
self.bookMetadata[keyval] = content
return self.bookMetadata