tools v5.2

This commit is contained in:
Apprentice Alf
2012-09-09 01:45:24 +01:00
parent 0812438b9d
commit f3f02adc98
75 changed files with 2556 additions and 1092 deletions

View File

@@ -1,6 +1,8 @@
#! /usr/bin/env python
# ineptpdf plugin __init__.py, version 0.1.5
from __future__ import with_statement
# ineptpdf plugin __init__.py
# Released under the terms of the GNU General Public Licence, version 3 or
# later. <http://www.gnu.org/licenses/>
@@ -52,13 +54,11 @@
# 0.1.2 - back port ineptpdf 8.4.X bug fixes
# 0.1.3 - add in fix for improper rejection of session bookkeys with len(bookkey) = length + 1
# 0.1.4 - update to the new calibre plugin interface
# 0.1.5 - synced to ineptpdf 7.11
"""
Decrypts Adobe ADEPT-encrypted PDF files.
"""
from __future__ import with_statement
__license__ = 'GPL v3'
import sys
@@ -116,13 +116,13 @@ def _load_crypto_libcrypto():
class RSA(Structure):
pass
RSA_p = POINTER(RSA)
def F(restype, name, argtypes):
func = getattr(libcrypto, name)
func.restype = restype
func.argtypes = argtypes
return func
AES_cbc_encrypt = F(None, 'AES_cbc_encrypt',[c_char_p, c_char_p, c_ulong, AES_KEY_p, c_char_p,c_int])
AES_set_decrypt_key = F(c_int, 'AES_set_decrypt_key',[c_char_p, c_int, AES_KEY_p])
@@ -143,7 +143,7 @@ def _load_crypto_libcrypto():
rsa = self._rsa = d2i_RSAPrivateKey(None, pp, len(der))
if rsa is None:
raise ADEPTError('Error parsing ADEPT user key DER')
def decrypt(self, from_):
rsa = self._rsa
to = create_string_buffer(RSA_size(rsa))
@@ -152,7 +152,7 @@ def _load_crypto_libcrypto():
if dlen < 0:
raise ADEPTError('RSA decryption failed')
return to[1:dlen]
def __del__(self):
if self._rsa is not None:
RSA_free(self._rsa)
@@ -214,13 +214,13 @@ def _load_crypto_pycrypto():
# ASN.1 parsing code from tlslite
class ASN1Error(Exception):
pass
class ASN1Parser(object):
class Parser(object):
def __init__(self, bytes):
self.bytes = bytes
self.index = 0
def get(self, length):
if self.index + length > len(self.bytes):
raise ASN1Error("Error decoding ASN.1")
@@ -230,22 +230,22 @@ def _load_crypto_pycrypto():
x |= self.bytes[self.index]
self.index += 1
return x
def getFixBytes(self, lengthBytes):
bytes = self.bytes[self.index : self.index+lengthBytes]
self.index += lengthBytes
return bytes
def getVarBytes(self, lengthLength):
lengthBytes = self.get(lengthLength)
return self.getFixBytes(lengthBytes)
def getFixList(self, length, lengthList):
l = [0] * lengthList
for x in range(lengthList):
l[x] = self.get(length)
return l
def getVarList(self, length, lengthLength):
lengthList = self.get(lengthLength)
if lengthList % length != 0:
@@ -255,19 +255,19 @@ def _load_crypto_pycrypto():
for x in range(lengthList):
l[x] = self.get(length)
return l
def startLengthCheck(self, lengthLength):
self.lengthCheck = self.get(lengthLength)
self.indexCheck = self.index
def setLengthCheck(self, length):
self.lengthCheck = length
self.indexCheck = self.index
def stopLengthCheck(self):
if (self.index - self.indexCheck) != self.lengthCheck:
raise ASN1Error("Error decoding ASN.1")
def atLengthCheck(self):
if (self.index - self.indexCheck) < self.lengthCheck:
return False
@@ -275,13 +275,13 @@ def _load_crypto_pycrypto():
return True
else:
raise ASN1Error("Error decoding ASN.1")
def __init__(self, bytes):
p = self.Parser(bytes)
p.get(1)
self.length = self._getASN1Length(p)
self.value = p.getFixBytes(self.length)
def getChild(self, which):
p = self.Parser(self.value)
for x in range(which+1):
@@ -290,7 +290,7 @@ def _load_crypto_pycrypto():
length = self._getASN1Length(p)
p.getFixBytes(length)
return ASN1Parser(p.bytes[markIndex:p.index])
def _getASN1Length(self, p):
firstLength = p.get(1)
if firstLength<=127:
@@ -311,6 +311,7 @@ def _load_crypto_pycrypto():
return self._arc4.decrypt(data)
class AES(object):
MODE_CBC = _AES.MODE_CBC
@classmethod
def new(cls, userkey, mode, iv):
self = AES()
@@ -333,7 +334,7 @@ def _load_crypto_pycrypto():
for byte in bytes:
total = (total << 8) + byte
return total
def decrypt(self, data):
return self._rsa.decrypt(data)
@@ -426,7 +427,7 @@ class PSLiteral(PSObject):
def __init__(self, name):
self.name = name
return
def __repr__(self):
name = []
for char in self.name:
@@ -445,22 +446,22 @@ class PSKeyword(PSObject):
def __init__(self, name):
self.name = name
return
def __repr__(self):
return self.name
# PSSymbolTable
class PSSymbolTable(object):
'''
Symbol table that stores PSLiteral or PSKeyword.
'''
def __init__(self, classe):
self.dic = {}
self.classe = classe
return
def intern(self, name):
if name in self.dic:
lit = self.dic[name]
@@ -530,11 +531,11 @@ class PSBaseParser(object):
def flush(self):
return
def close(self):
self.flush()
return
def tell(self):
return self.bufpos+self.charpos
@@ -570,7 +571,7 @@ class PSBaseParser(object):
raise PSEOF('Unexpected EOF')
self.charpos = 0
return
def parse_main(self, s, i):
m = NONSPC.search(s, i)
if not m:
@@ -605,11 +606,11 @@ class PSBaseParser(object):
return (self.parse_wclose, j+1)
self.add_token(KWD(c))
return (self.parse_main, j+1)
def add_token(self, obj):
self.tokens.append((self.tokenstart, obj))
return
def parse_comment(self, s, i):
m = EOL.search(s, i)
if not m:
@@ -620,7 +621,7 @@ class PSBaseParser(object):
# We ignore comments.
#self.tokens.append(self.token)
return (self.parse_main, j)
def parse_literal(self, s, i):
m = END_LITERAL.search(s, i)
if not m:
@@ -634,7 +635,7 @@ class PSBaseParser(object):
return (self.parse_literal_hex, j+1)
self.add_token(LIT(self.token))
return (self.parse_main, j)
def parse_literal_hex(self, s, i):
c = s[i]
if HEX.match(c) and len(self.hex) < 2:
@@ -669,7 +670,7 @@ class PSBaseParser(object):
self.token += s[i:j]
self.add_token(float(self.token))
return (self.parse_main, j)
def parse_keyword(self, s, i):
m = END_KEYWORD.search(s, i)
if not m:
@@ -817,7 +818,7 @@ class PSStackParser(PSBaseParser):
PSBaseParser.__init__(self, fp)
self.reset()
return
def reset(self):
self.context = []
self.curtype = None
@@ -858,10 +859,10 @@ class PSStackParser(PSBaseParser):
def do_keyword(self, pos, token):
return
def nextobject(self, direct=False):
'''
Yields a list of objects: keywords, literals, strings,
Yields a list of objects: keywords, literals, strings,
numbers, arrays and dictionaries. Arrays and dictionaries
are represented as Python sequence and dictionaries.
'''
@@ -930,7 +931,7 @@ class PDFNotImplementedError(PSException): pass
## PDFObjRef
##
class PDFObjRef(PDFObject):
def __init__(self, doc, objid, genno):
if objid == 0:
if STRICT:
@@ -1045,25 +1046,25 @@ def stream_value(x):
# ascii85decode(data)
def ascii85decode(data):
n = b = 0
out = ''
for c in data:
if '!' <= c and c <= 'u':
n += 1
b = b*85+(ord(c)-33)
if n == 5:
out += struct.pack('>L',b)
n = b = 0
elif c == 'z':
assert n == 0
out += '\0\0\0\0'
elif c == '~':
if n:
for _ in range(5-n):
b = b*85+84
out += struct.pack('>L',b)[:n-1]
break
return out
n = b = 0
out = ''
for c in data:
if '!' <= c and c <= 'u':
n += 1
b = b*85+(ord(c)-33)
if n == 5:
out += struct.pack('>L',b)
n = b = 0
elif c == 'z':
assert n == 0
out += '\0\0\0\0'
elif c == '~':
if n:
for _ in range(5-n):
b = b*85+84
out += struct.pack('>L',b)[:n-1]
break
return out
## PDFStream type
@@ -1080,7 +1081,7 @@ class PDFStream(PDFObject):
else:
if eol in ('\r', '\n', '\r\n'):
rawdata = rawdata[:length]
self.dic = dic
self.rawdata = rawdata
self.decipher = decipher
@@ -1094,7 +1095,7 @@ class PDFStream(PDFObject):
self.objid = objid
self.genno = genno
return
def __repr__(self):
if self.rawdata:
return '<PDFStream(%r): raw=%d, %r>' % \
@@ -1178,7 +1179,7 @@ class PDFStream(PDFObject):
data = self.decipher(self.objid, self.genno, data)
return data
## PDF Exceptions
##
class PDFSyntaxError(PDFException): pass
@@ -1243,7 +1244,7 @@ class PDFXRef(object):
self.offsets[objid] = (int(genno), int(pos))
self.load_trailer(parser)
return
KEYWORD_TRAILER = PSKeywordTable.intern('trailer')
def load_trailer(self, parser):
try:
@@ -1284,7 +1285,7 @@ class PDFXRefStream(object):
for first, size in self.index:
for objid in xrange(first, first + size):
yield objid
def load(self, parser, debug=0):
(_,objid) = parser.nexttoken() # ignored
(_,genno) = parser.nexttoken() # ignored
@@ -1302,7 +1303,7 @@ class PDFXRefStream(object):
self.entlen = self.fl1+self.fl2+self.fl3
self.trailer = stream.dic
return
def getpos(self, objid):
offset = 0
for first, size in self.index:
@@ -1353,7 +1354,7 @@ class PDFDocument(object):
self.parser = parser
# The document is set to be temporarily ready during collecting
# all the basic information about the document, e.g.
# the header, the encryption information, and the access rights
# the header, the encryption information, and the access rights
# for the document.
self.ready = True
# Retrieve the information of each header that was appended
@@ -1429,7 +1430,7 @@ class PDFDocument(object):
length = int_value(param.get('Length', 0)) / 8
edcdata = str_value(param.get('EDCData')).decode('base64')
pdrllic = str_value(param.get('PDRLLic')).decode('base64')
pdrlpol = str_value(param.get('PDRLPol')).decode('base64')
pdrlpol = str_value(param.get('PDRLPol')).decode('base64')
edclist = []
for pair in edcdata.split('\n'):
edclist.append(pair)
@@ -1449,9 +1450,9 @@ class PDFDocument(object):
raise ADEPTError('Could not decrypt PDRLPol, aborting ...')
else:
cutter = -1 * ord(pdrlpol[-1])
pdrlpol = pdrlpol[:cutter]
pdrlpol = pdrlpol[:cutter]
return plaintext[:16]
PASSWORD_PADDING = '(\xbfN^Nu\x8aAd\x00NV\xff\xfa\x01\x08..' \
'\x00\xb6\xd0h>\x80/\x0c\xa9\xfedSiz'
# experimental aes pw support
@@ -1471,14 +1472,14 @@ class PDFDocument(object):
EncMetadata = str_value(param['EncryptMetadata'])
except:
EncMetadata = 'True'
self.is_printable = bool(P & 4)
self.is_printable = bool(P & 4)
self.is_modifiable = bool(P & 8)
self.is_extractable = bool(P & 16)
self.is_annotationable = bool(P & 32)
self.is_formsenabled = bool(P & 256)
self.is_textextractable = bool(P & 512)
self.is_assemblable = bool(P & 1024)
self.is_formprintable = bool(P & 2048)
self.is_formprintable = bool(P & 2048)
# Algorithm 3.2
password = (password+self.PASSWORD_PADDING)[:32] # 1
hash = hashlib.md5(password) # 2
@@ -1587,7 +1588,7 @@ class PDFDocument(object):
hash = hashlib.md5(key)
key = hash.digest()[:min(len(self.decrypt_key) + 5, 16)]
return key
def genkey_v3(self, objid, genno):
objid = struct.pack('<L', objid ^ 0x3569ac)
genno = struct.pack('<L', genno ^ 0xca96)
@@ -1627,14 +1628,14 @@ class PDFDocument(object):
#print cutter
plaintext = plaintext[:cutter]
return plaintext
def decrypt_rc4(self, objid, genno, data):
key = self.genkey(objid, genno)
return ARC4.new(key).decrypt(data)
KEYWORD_OBJ = PSKeywordTable.intern('obj')
def getobj(self, objid):
if not self.ready:
raise PDFException('PDFDocument not initialized')
@@ -1704,7 +1705,7 @@ class PDFDocument(object):
## if x:
## objid1 = x[-2]
## genno = x[-1]
##
##
if kwd is not self.KEYWORD_OBJ:
raise PDFSyntaxError(
'Invalid object spec: offset=%r' % index)
@@ -1716,7 +1717,7 @@ class PDFDocument(object):
self.objs[objid] = obj
return obj
class PDFObjStmRef(object):
maxindex = 0
def __init__(self, objid, stmid, index):
@@ -1726,7 +1727,7 @@ class PDFObjStmRef(object):
if index > PDFObjStmRef.maxindex:
PDFObjStmRef.maxindex = index
## PDFParser
##
class PDFParser(PSStackParser):
@@ -1752,7 +1753,7 @@ class PDFParser(PSStackParser):
if token is self.KEYWORD_ENDOBJ:
self.add_results(*self.pop(4))
return
if token is self.KEYWORD_R:
# reference to indirect object
try:
@@ -1763,7 +1764,7 @@ class PDFParser(PSStackParser):
except PSSyntaxError:
pass
return
if token is self.KEYWORD_STREAM:
# stream object
((_,dic),) = self.pop(1)
@@ -1803,7 +1804,7 @@ class PDFParser(PSStackParser):
obj = PDFStream(dic, data, self.doc.decipher)
self.push((pos, obj))
return
# others
self.push((pos, token))
return
@@ -1839,7 +1840,7 @@ class PDFParser(PSStackParser):
xref.load(self)
else:
if token is not self.KEYWORD_XREF:
raise PDFNoValidXRef('xref not found: pos=%d, token=%r' %
raise PDFNoValidXRef('xref not found: pos=%d, token=%r' %
(pos, token))
self.nextline()
xref = PDFXRef()
@@ -1854,7 +1855,7 @@ class PDFParser(PSStackParser):
pos = int_value(trailer['Prev'])
self.read_xref_from(pos, xrefs)
return
# read xref tables and trailers
def read_xref(self):
xrefs = []
@@ -1973,7 +1974,7 @@ class PDFSerializer(object):
self.write("%010d 00000 n \n" % xrefs[objid][0])
else:
self.write("%010d %05d f \n" % (0, 65535))
self.write('trailer\n')
self.serialize_object(trailer)
self.write('\nstartxref\n%d\n%%%%EOF' % startxref)
@@ -1993,7 +1994,7 @@ class PDFSerializer(object):
while maxindex >= power:
fl3 += 1
power *= 256
index = []
first = None
prev = None
@@ -2020,14 +2021,14 @@ class PDFSerializer(object):
# we force all generation numbers to be 0
# f3 = objref[1]
f3 = 0
data.append(struct.pack('>B', f1))
data.append(struct.pack('>L', f2)[-fl2:])
data.append(struct.pack('>L', f3)[-fl3:])
index.extend((first, prev - first + 1))
data = zlib.compress(''.join(data))
dic = {'Type': LITERAL_XREF, 'Size': prev + 1, 'Index': index,
'W': [1, fl2, fl3], 'Length': len(data),
'W': [1, fl2, fl3], 'Length': len(data),
'Filter': LITERALS_FLATE_DECODE[0],
'Root': trailer['Root'],}
if 'Info' in trailer:
@@ -2049,9 +2050,9 @@ class PDFSerializer(object):
string = string.replace(')', r'\)')
# get rid of ciando id
regularexp = re.compile(r'http://www.ciando.com/index.cfm/intRefererID/\d{5}')
if regularexp.match(string): return ('http://www.ciando.com')
if regularexp.match(string): return ('http://www.ciando.com')
return string
def serialize_object(self, obj):
if isinstance(obj, dict):
# Correct malformed Mac OS resource forks for Stanza
@@ -2075,21 +2076,21 @@ class PDFSerializer(object):
elif isinstance(obj, bool):
if self.last.isalnum():
self.write(' ')
self.write(str(obj).lower())
self.write(str(obj).lower())
elif isinstance(obj, (int, long, float)):
if self.last.isalnum():
self.write(' ')
self.write(str(obj))
elif isinstance(obj, PDFObjRef):
if self.last.isalnum():
self.write(' ')
self.write(' ')
self.write('%d %d R' % (obj.objid, 0))
elif isinstance(obj, PDFStream):
### If we don't generate cross ref streams the object streams
### are no longer useful, as we have extracted all objects from
### them. Therefore leave them out from the output.
if obj.dic.get('Type') == LITERAL_OBJSTM and not gen_xref_stm:
self.write('(deleted)')
self.write('(deleted)')
else:
data = obj.get_decdata()
self.serialize_object(obj.dic)
@@ -2101,7 +2102,7 @@ class PDFSerializer(object):
if data[0].isalnum() and self.last.isalnum():
self.write(' ')
self.write(data)
def serialize_indirect(self, objid, obj):
self.write('%d 0 obj' % (objid,))
self.serialize_object(obj)
@@ -2136,7 +2137,7 @@ class IneptPDFDeDRM(FileTypePlugin):
Credit given to I <3 Cabbages for the original stand-alone scripts.'
supported_platforms = ['linux', 'osx', 'windows']
author = 'DiapDealer'
version = (0, 1, 4)
version = (0, 1, 5)
minimum_calibre_version = (0, 7, 55) # for the new plugin interface
file_types = set(['pdf'])
on_import = True