tools v5.2

2012-09-09 01:45:24 +01:00
parent 0812438b9d
commit f3f02adc98
75 changed files with 2556 additions and 1092 deletions
--- a/Calibre_Plugins/ineptpdf_plugin/init.py
+++ b/Calibre_Plugins/ineptpdf_plugin/init.py
@@ -1,6 +1,8 @@
 #! /usr/bin/env python
+# ineptpdf plugin  __init__.py, version 0.1.5
+
+from __future__ import with_statement

-# ineptpdf plugin  __init__.py
 # Released under the terms of the GNU General Public Licence, version 3 or
 # later.  <http://www.gnu.org/licenses/>

@@ -52,13 +54,11 @@
 #   0.1.2 - back port ineptpdf 8.4.X bug fixes
 #   0.1.3 - add in fix for improper rejection of session bookkeys with len(bookkey) = length + 1 
 #   0.1.4 - update to the new calibre plugin interface
-
+#   0.1.5 - synced to ineptpdf 7.11
 """
 Decrypts Adobe ADEPT-encrypted PDF files.
 """

-from __future__ import with_statement
-
 __license__ = 'GPL v3'

 import sys
@@ -116,13 +116,13 @@ def _load_crypto_libcrypto():
    class RSA(Structure):
        pass
    RSA_p = POINTER(RSA)
-    
+
    def F(restype, name, argtypes):
        func = getattr(libcrypto, name)
        func.restype = restype
        func.argtypes = argtypes
        return func
-    
+
    AES_cbc_encrypt = F(None, 'AES_cbc_encrypt',[c_char_p, c_char_p, c_ulong, AES_KEY_p, c_char_p,c_int])
    AES_set_decrypt_key = F(c_int, 'AES_set_decrypt_key',[c_char_p, c_int, AES_KEY_p])

@@ -143,7 +143,7 @@ def _load_crypto_libcrypto():
            rsa = self._rsa = d2i_RSAPrivateKey(None, pp, len(der))
            if rsa is None:
                raise ADEPTError('Error parsing ADEPT user key DER')
-        
+
        def decrypt(self, from_):
            rsa = self._rsa
            to = create_string_buffer(RSA_size(rsa))
@@ -152,7 +152,7 @@ def _load_crypto_libcrypto():
            if dlen < 0:
                raise ADEPTError('RSA decryption failed')
            return to[1:dlen]
-    
+
        def __del__(self):
            if self._rsa is not None:
                RSA_free(self._rsa)
@@ -214,13 +214,13 @@ def _load_crypto_pycrypto():
    # ASN.1 parsing code from tlslite
    class ASN1Error(Exception):
        pass
-    
+
    class ASN1Parser(object):
        class Parser(object):
            def __init__(self, bytes):
                self.bytes = bytes
                self.index = 0
-    
+
            def get(self, length):
                if self.index + length > len(self.bytes):
                    raise ASN1Error("Error decoding ASN.1")
@@ -230,22 +230,22 @@ def _load_crypto_pycrypto():
                    x |= self.bytes[self.index]
                    self.index += 1
                return x
-    
+
            def getFixBytes(self, lengthBytes):
                bytes = self.bytes[self.index : self.index+lengthBytes]
                self.index += lengthBytes
                return bytes
-    
+
            def getVarBytes(self, lengthLength):
                lengthBytes = self.get(lengthLength)
                return self.getFixBytes(lengthBytes)
-    
+
            def getFixList(self, length, lengthList):
                l = [0] * lengthList
                for x in range(lengthList):
                    l[x] = self.get(length)
                return l
-    
+
            def getVarList(self, length, lengthLength):
                lengthList = self.get(lengthLength)
                if lengthList % length != 0:
@@ -255,19 +255,19 @@ def _load_crypto_pycrypto():
                for x in range(lengthList):
                    l[x] = self.get(length)
                return l
-    
+
            def startLengthCheck(self, lengthLength):
                self.lengthCheck = self.get(lengthLength)
                self.indexCheck = self.index
-    
+
            def setLengthCheck(self, length):
                self.lengthCheck = length
                self.indexCheck = self.index
-    
+
            def stopLengthCheck(self):
                if (self.index - self.indexCheck) != self.lengthCheck:
                    raise ASN1Error("Error decoding ASN.1")
-    
+
            def atLengthCheck(self):
                if (self.index - self.indexCheck) < self.lengthCheck:
                    return False
@@ -275,13 +275,13 @@ def _load_crypto_pycrypto():
                    return True
                else:
                    raise ASN1Error("Error decoding ASN.1")
-    
+
        def __init__(self, bytes):
            p = self.Parser(bytes)
            p.get(1)
            self.length = self._getASN1Length(p)
            self.value = p.getFixBytes(self.length)
-    
+
        def getChild(self, which):
            p = self.Parser(self.value)
            for x in range(which+1):
@@ -290,7 +290,7 @@ def _load_crypto_pycrypto():
                length = self._getASN1Length(p)
                p.getFixBytes(length)
            return ASN1Parser(p.bytes[markIndex:p.index])
-    
+
        def _getASN1Length(self, p):
            firstLength = p.get(1)
            if firstLength<=127:
@@ -311,6 +311,7 @@ def _load_crypto_pycrypto():
            return self._arc4.decrypt(data)

    class AES(object):
+        MODE_CBC = _AES.MODE_CBC
        @classmethod
        def new(cls, userkey, mode, iv):
            self = AES()
@@ -333,7 +334,7 @@ def _load_crypto_pycrypto():
            for byte in bytes:
                total = (total << 8) + byte
            return total
-    
+
        def decrypt(self, data):
            return self._rsa.decrypt(data)

@@ -426,7 +427,7 @@ class PSLiteral(PSObject):
    def __init__(self, name):
        self.name = name
        return
-    
+
    def __repr__(self):
        name = []
        for char in self.name:
@@ -445,22 +446,22 @@ class PSKeyword(PSObject):
    def __init__(self, name):
        self.name = name
        return
-    
+
    def __repr__(self):
        return self.name

 # PSSymbolTable
 class PSSymbolTable(object):
-    
+
    '''
    Symbol table that stores PSLiteral or PSKeyword.
    '''
-    
+
    def __init__(self, classe):
        self.dic = {}
        self.classe = classe
        return
-    
+
    def intern(self, name):
        if name in self.dic:
            lit = self.dic[name]
@@ -530,11 +531,11 @@ class PSBaseParser(object):

    def flush(self):
        return
-    
+
    def close(self):
        self.flush()
        return
-    
+
    def tell(self):
        return self.bufpos+self.charpos

@@ -570,7 +571,7 @@ class PSBaseParser(object):
            raise PSEOF('Unexpected EOF')
        self.charpos = 0
        return
-    
+
    def parse_main(self, s, i):
        m = NONSPC.search(s, i)
        if not m:
@@ -605,11 +606,11 @@ class PSBaseParser(object):
            return (self.parse_wclose, j+1)
        self.add_token(KWD(c))
        return (self.parse_main, j+1)
-                            
+
    def add_token(self, obj):
        self.tokens.append((self.tokenstart, obj))
        return
-    
+
    def parse_comment(self, s, i):
        m = EOL.search(s, i)
        if not m:
@@ -620,7 +621,7 @@ class PSBaseParser(object):
        # We ignore comments.
        #self.tokens.append(self.token)
        return (self.parse_main, j)
-    
+
    def parse_literal(self, s, i):
        m = END_LITERAL.search(s, i)
        if not m:
@@ -634,7 +635,7 @@ class PSBaseParser(object):
            return (self.parse_literal_hex, j+1)
        self.add_token(LIT(self.token))
        return (self.parse_main, j)
-    
+
    def parse_literal_hex(self, s, i):
        c = s[i]
        if HEX.match(c) and len(self.hex) < 2:
@@ -669,7 +670,7 @@ class PSBaseParser(object):
        self.token += s[i:j]
        self.add_token(float(self.token))
        return (self.parse_main, j)
-    
+
    def parse_keyword(self, s, i):
        m = END_KEYWORD.search(s, i)
        if not m:
@@ -817,7 +818,7 @@ class PSStackParser(PSBaseParser):
        PSBaseParser.__init__(self, fp)
        self.reset()
        return
-    
+
    def reset(self):
        self.context = []
        self.curtype = None
@@ -858,10 +859,10 @@ class PSStackParser(PSBaseParser):

    def do_keyword(self, pos, token):
        return
-    
+
    def nextobject(self, direct=False):
        '''
-        Yields a list of objects: keywords, literals, strings, 
+        Yields a list of objects: keywords, literals, strings,
        numbers, arrays and dictionaries. Arrays and dictionaries
        are represented as Python sequence and dictionaries.
        '''
@@ -930,7 +931,7 @@ class PDFNotImplementedError(PSException): pass
 ##  PDFObjRef
 ##
 class PDFObjRef(PDFObject):
-    
+
    def __init__(self, doc, objid, genno):
        if objid == 0:
            if STRICT:
@@ -1045,25 +1046,25 @@ def stream_value(x):

 # ascii85decode(data)
 def ascii85decode(data):
-  n = b = 0
-  out = ''
-  for c in data:
-    if '!' <= c and c <= 'u':
-      n += 1
-      b = b*85+(ord(c)-33)
-      if n == 5:
-        out += struct.pack('>L',b)
-        n = b = 0
-    elif c == 'z':
-      assert n == 0
-      out += '\0\0\0\0'
-    elif c == '~':
-      if n:
-        for _ in range(5-n):
-          b = b*85+84
-        out += struct.pack('>L',b)[:n-1]
-      break
-  return out
+    n = b = 0
+    out = ''
+    for c in data:
+        if '!' <= c and c <= 'u':
+            n += 1
+            b = b*85+(ord(c)-33)
+            if n == 5:
+                out += struct.pack('>L',b)
+                n = b = 0
+        elif c == 'z':
+            assert n == 0
+            out += '\0\0\0\0'
+        elif c == '~':
+            if n:
+                for _ in range(5-n):
+                    b = b*85+84
+                out += struct.pack('>L',b)[:n-1]
+            break
+    return out


 ##  PDFStream type
@@ -1080,7 +1081,7 @@ class PDFStream(PDFObject):
        else:
            if eol in ('\r', '\n', '\r\n'):
                rawdata = rawdata[:length]
-                
+
        self.dic = dic
        self.rawdata = rawdata
        self.decipher = decipher
@@ -1094,7 +1095,7 @@ class PDFStream(PDFObject):
        self.objid = objid
        self.genno = genno
        return
-    
+
    def __repr__(self):
        if self.rawdata:
            return '<PDFStream(%r): raw=%d, %r>' % \
@@ -1178,7 +1179,7 @@ class PDFStream(PDFObject):
            data = self.decipher(self.objid, self.genno, data)
        return data

-        
+
 ##  PDF Exceptions
 ##
 class PDFSyntaxError(PDFException): pass
@@ -1243,7 +1244,7 @@ class PDFXRef(object):
                self.offsets[objid] = (int(genno), int(pos))
        self.load_trailer(parser)
        return
-    
+
    KEYWORD_TRAILER = PSKeywordTable.intern('trailer')
    def load_trailer(self, parser):
        try:
@@ -1284,7 +1285,7 @@ class PDFXRefStream(object):
        for first, size in self.index:
            for objid in xrange(first, first + size):
                yield objid
-    
+
    def load(self, parser, debug=0):
        (_,objid) = parser.nexttoken() # ignored
        (_,genno) = parser.nexttoken() # ignored
@@ -1302,7 +1303,7 @@ class PDFXRefStream(object):
        self.entlen = self.fl1+self.fl2+self.fl3
        self.trailer = stream.dic
        return
-    
+
    def getpos(self, objid):
        offset = 0
        for first, size in self.index:
@@ -1353,7 +1354,7 @@ class PDFDocument(object):
        self.parser = parser
        # The document is set to be temporarily ready during collecting
        # all the basic information about the document, e.g.
-        # the header, the encryption information, and the access rights 
+        # the header, the encryption information, and the access rights
        # for the document.
        self.ready = True
        # Retrieve the information of each header that was appended
@@ -1429,7 +1430,7 @@ class PDFDocument(object):
        length = int_value(param.get('Length', 0)) / 8
        edcdata = str_value(param.get('EDCData')).decode('base64')
        pdrllic = str_value(param.get('PDRLLic')).decode('base64')
-        pdrlpol = str_value(param.get('PDRLPol')).decode('base64')          
+        pdrlpol = str_value(param.get('PDRLPol')).decode('base64')
        edclist = []
        for pair in edcdata.split('\n'):
            edclist.append(pair)
@@ -1449,9 +1450,9 @@ class PDFDocument(object):
            raise ADEPTError('Could not decrypt PDRLPol, aborting ...')
        else:
            cutter = -1 * ord(pdrlpol[-1])
-            pdrlpol = pdrlpol[:cutter]            
+            pdrlpol = pdrlpol[:cutter]
        return plaintext[:16]
-    
+
    PASSWORD_PADDING = '(\xbfN^Nu\x8aAd\x00NV\xff\xfa\x01\x08..' \
                       '\x00\xb6\xd0h>\x80/\x0c\xa9\xfedSiz'
    # experimental aes pw support
@@ -1471,14 +1472,14 @@ class PDFDocument(object):
            EncMetadata = str_value(param['EncryptMetadata'])
        except:
            EncMetadata = 'True'
-        self.is_printable = bool(P & 4)        
+        self.is_printable = bool(P & 4)
        self.is_modifiable = bool(P & 8)
        self.is_extractable = bool(P & 16)
        self.is_annotationable = bool(P & 32)
        self.is_formsenabled = bool(P & 256)
        self.is_textextractable = bool(P & 512)
        self.is_assemblable = bool(P & 1024)
-        self.is_formprintable = bool(P & 2048) 
+        self.is_formprintable = bool(P & 2048)
        # Algorithm 3.2
        password = (password+self.PASSWORD_PADDING)[:32] # 1
        hash = hashlib.md5(password) # 2
@@ -1587,7 +1588,7 @@ class PDFDocument(object):
        hash = hashlib.md5(key)
        key = hash.digest()[:min(len(self.decrypt_key) + 5, 16)]
        return key
-    
+
    def genkey_v3(self, objid, genno):
        objid = struct.pack('<L', objid ^ 0x3569ac)
        genno = struct.pack('<L', genno ^ 0xca96)
@@ -1627,14 +1628,14 @@ class PDFDocument(object):
        #print cutter
        plaintext = plaintext[:cutter]
        return plaintext
-    
+
    def decrypt_rc4(self, objid, genno, data):
        key = self.genkey(objid, genno)
        return ARC4.new(key).decrypt(data)


    KEYWORD_OBJ = PSKeywordTable.intern('obj')
-    
+
    def getobj(self, objid):
        if not self.ready:
            raise PDFException('PDFDocument not initialized')
@@ -1704,7 +1705,7 @@ class PDFDocument(object):
 ##                    if x:
 ##                        objid1 = x[-2]
 ##                        genno = x[-1]
-##                
+##
                if kwd is not self.KEYWORD_OBJ:
                    raise PDFSyntaxError(
                        'Invalid object spec: offset=%r' % index)
@@ -1716,7 +1717,7 @@ class PDFDocument(object):
            self.objs[objid] = obj
        return obj

-                
+
 class PDFObjStmRef(object):
    maxindex = 0
    def __init__(self, objid, stmid, index):
@@ -1726,7 +1727,7 @@ class PDFObjStmRef(object):
        if index > PDFObjStmRef.maxindex:
            PDFObjStmRef.maxindex = index

-    
+
 ##  PDFParser
 ##
 class PDFParser(PSStackParser):
@@ -1752,7 +1753,7 @@ class PDFParser(PSStackParser):
        if token is self.KEYWORD_ENDOBJ:
            self.add_results(*self.pop(4))
            return
-        
+
        if token is self.KEYWORD_R:
            # reference to indirect object
            try:
@@ -1763,7 +1764,7 @@ class PDFParser(PSStackParser):
            except PSSyntaxError:
                pass
            return
-            
+
        if token is self.KEYWORD_STREAM:
            # stream object
            ((_,dic),) = self.pop(1)
@@ -1803,7 +1804,7 @@ class PDFParser(PSStackParser):
            obj = PDFStream(dic, data, self.doc.decipher)
            self.push((pos, obj))
            return
-        
+
        # others
        self.push((pos, token))
        return
@@ -1839,7 +1840,7 @@ class PDFParser(PSStackParser):
            xref.load(self)
        else:
            if token is not self.KEYWORD_XREF:
-                raise PDFNoValidXRef('xref not found: pos=%d, token=%r' % 
+                raise PDFNoValidXRef('xref not found: pos=%d, token=%r' %
                                     (pos, token))
            self.nextline()
            xref = PDFXRef()
@@ -1854,7 +1855,7 @@ class PDFParser(PSStackParser):
            pos = int_value(trailer['Prev'])
            self.read_xref_from(pos, xrefs)
        return
-        
+
    # read xref tables and trailers
    def read_xref(self):
        xrefs = []
@@ -1973,7 +1974,7 @@ class PDFSerializer(object):
                    self.write("%010d 00000 n \n" % xrefs[objid][0])
                else:
                    self.write("%010d %05d f \n" % (0, 65535))
-            
+
            self.write('trailer\n')
            self.serialize_object(trailer)
            self.write('\nstartxref\n%d\n%%%%EOF' % startxref)
@@ -1993,7 +1994,7 @@ class PDFSerializer(object):
            while maxindex >= power:
                fl3 += 1
                power *= 256
-                    
+
            index = []
            first = None
            prev = None
@@ -2020,14 +2021,14 @@ class PDFSerializer(object):
                    # we force all generation numbers to be 0
                    # f3 = objref[1]
                    f3 = 0
-                
+
                data.append(struct.pack('>B', f1))
                data.append(struct.pack('>L', f2)[-fl2:])
                data.append(struct.pack('>L', f3)[-fl3:])
            index.extend((first, prev - first + 1))
            data = zlib.compress(''.join(data))
            dic = {'Type': LITERAL_XREF, 'Size': prev + 1, 'Index': index,
-                   'W': [1, fl2, fl3], 'Length': len(data), 
+                   'W': [1, fl2, fl3], 'Length': len(data),
                   'Filter': LITERALS_FLATE_DECODE[0],
                   'Root': trailer['Root'],}
            if 'Info' in trailer:
@@ -2049,9 +2050,9 @@ class PDFSerializer(object):
        string = string.replace(')', r'\)')
         # get rid of ciando id
        regularexp = re.compile(r'http://www.ciando.com/index.cfm/intRefererID/\d{5}')
-        if regularexp.match(string): return ('http://www.ciando.com') 
+        if regularexp.match(string): return ('http://www.ciando.com')
        return string
-    
+
    def serialize_object(self, obj):
        if isinstance(obj, dict):
            # Correct malformed Mac OS resource forks for Stanza
@@ -2075,21 +2076,21 @@ class PDFSerializer(object):
        elif isinstance(obj, bool):
            if self.last.isalnum():
                self.write(' ')
-            self.write(str(obj).lower())            
+            self.write(str(obj).lower())
        elif isinstance(obj, (int, long, float)):
            if self.last.isalnum():
                self.write(' ')
            self.write(str(obj))
        elif isinstance(obj, PDFObjRef):
            if self.last.isalnum():
-                self.write(' ')            
+                self.write(' ')
            self.write('%d %d R' % (obj.objid, 0))
        elif isinstance(obj, PDFStream):
            ### If we don't generate cross ref streams the object streams
            ### are no longer useful, as we have extracted all objects from
            ### them. Therefore leave them out from the output.
            if obj.dic.get('Type') == LITERAL_OBJSTM and not gen_xref_stm:
-                    self.write('(deleted)')
+                self.write('(deleted)')
            else:
                data = obj.get_decdata()
                self.serialize_object(obj.dic)
@@ -2101,7 +2102,7 @@ class PDFSerializer(object):
            if data[0].isalnum() and self.last.isalnum():
                self.write(' ')
            self.write(data)
-    
+
    def serialize_indirect(self, objid, obj):
        self.write('%d 0 obj' % (objid,))
        self.serialize_object(obj)
@@ -2136,7 +2137,7 @@ class IneptPDFDeDRM(FileTypePlugin):
                                Credit given to I <3 Cabbages for the original stand-alone scripts.'
    supported_platforms     = ['linux', 'osx', 'windows']
    author                  = 'DiapDealer'
-    version                 = (0, 1, 4)
+    version                 = (0, 1, 5)
    minimum_calibre_version = (0, 7, 55)  # for the new plugin interface
    file_types              = set(['pdf'])
    on_import               = True