mirror of
				https://github.com/noDRM/DeDRM_tools.git
				synced 2025-10-23 23:07:47 -04:00 
			
		
		
		
	
		
			
				
	
	
		
			330 lines
		
	
	
	
		
			13 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			330 lines
		
	
	
	
		
			13 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| #!/usr/bin/env python3
 | |
| # -*- coding: utf-8 -*-
 | |
| 
 | |
| # epubfontdecrypt.py
 | |
| # Copyright © 2021-2023 by noDRM
 | |
| 
 | |
| # Released under the terms of the GNU General Public Licence, version 3
 | |
| # <http://www.gnu.org/licenses/>
 | |
| 
 | |
| 
 | |
| # Revision history:
 | |
| #   1 - Initial release
 | |
| #   2 - Bugfix for multiple book IDs, reported at #347
 | |
| 
 | |
| """
 | |
| Decrypts / deobfuscates font files in EPUB files
 | |
| """
 | |
| 
 | |
| from __future__ import print_function
 | |
| 
 | |
| __license__ = 'GPL v3'
 | |
| __version__ = "2"
 | |
| 
 | |
| import os
 | |
| import traceback
 | |
| import zlib
 | |
| import zipfile
 | |
| from zipfile import ZipInfo, ZipFile, ZIP_STORED, ZIP_DEFLATED
 | |
| from zeroedzipinfo import ZeroedZipInfo
 | |
| from contextlib import closing
 | |
| from lxml import etree
 | |
| import itertools
 | |
| import hashlib
 | |
| import binascii
 | |
| 
 | |
| 
 | |
| class Decryptor(object):
 | |
|     def __init__(self, obfuscationkeyIETF, obfuscationkeyAdobe, encryption):
 | |
|         enc = lambda tag: '{%s}%s' % ('http://www.w3.org/2001/04/xmlenc#', tag)
 | |
|         dsig = lambda tag: '{%s}%s' % ('http://www.w3.org/2000/09/xmldsig#', tag)
 | |
|         self.obfuscation_key_Adobe = obfuscationkeyAdobe
 | |
|         self.obfuscation_key_IETF = obfuscationkeyIETF
 | |
|         
 | |
|         self._encryption = etree.fromstring(encryption)
 | |
|         # This loops through all entries in the "encryption.xml" file
 | |
|         # to figure out which files need to be decrypted.
 | |
|         self._obfuscatedIETF = obfuscatedIETF = set()
 | |
|         self._obfuscatedAdobe = obfuscatedAdobe = set()
 | |
|         self._other = other = set()
 | |
| 
 | |
|         self._json_elements_to_remove = json_elements_to_remove = set()
 | |
|         self._has_remaining_xml = False
 | |
|         expr = './%s/%s/%s' % (enc('EncryptedData'), enc('CipherData'),
 | |
|                                enc('CipherReference'))
 | |
|         for elem in self._encryption.findall(expr):
 | |
|             path = elem.get('URI', None)
 | |
|             encryption_type_url = (elem.getparent().getparent().find("./%s" % (enc('EncryptionMethod'))).get('Algorithm', None))
 | |
|             if path is not None:
 | |
| 
 | |
|                 if encryption_type_url == "http://www.idpf.org/2008/embedding":
 | |
|                     # Font files obfuscated with the IETF algorithm
 | |
|                     path = path.encode('utf-8')
 | |
|                     obfuscatedIETF.add(path)
 | |
|                     if (self.obfuscation_key_IETF is None):
 | |
|                         self._has_remaining_xml = True
 | |
|                     else:
 | |
|                         json_elements_to_remove.add(elem.getparent().getparent())
 | |
| 
 | |
|                 elif encryption_type_url == "http://ns.adobe.com/pdf/enc#RC":
 | |
|                     # Font files obfuscated with the Adobe algorithm.
 | |
|                     path = path.encode('utf-8')
 | |
|                     obfuscatedAdobe.add(path)
 | |
|                     if (self.obfuscation_key_Adobe is None):
 | |
|                         self._has_remaining_xml = True
 | |
|                     else:
 | |
|                         json_elements_to_remove.add(elem.getparent().getparent())
 | |
| 
 | |
|                 else: 
 | |
|                     path = path.encode('utf-8')
 | |
|                     other.add(path)
 | |
|                     self._has_remaining_xml = True
 | |
|                     # Other unsupported type.
 | |
|         
 | |
|         for elem in json_elements_to_remove:
 | |
|             elem.getparent().remove(elem)
 | |
| 
 | |
|     def check_if_remaining(self):
 | |
|         return self._has_remaining_xml
 | |
| 
 | |
|     def get_xml(self):
 | |
|         return "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + etree.tostring(self._encryption, encoding="utf-8", pretty_print=True, xml_declaration=False).decode("utf-8")
 | |
| 
 | |
|     def decompress(self, bytes):
 | |
|         dc = zlib.decompressobj(-15)
 | |
|         try:
 | |
|             decompressed_bytes = dc.decompress(bytes)
 | |
|             ex = dc.decompress(b'Z') + dc.flush()
 | |
|             if ex:
 | |
|                 decompressed_bytes = decompressed_bytes + ex
 | |
|         except:
 | |
|             # possibly not compressed by zip - just return bytes
 | |
|             return bytes, False
 | |
|         return decompressed_bytes , True
 | |
|     
 | |
|     def decrypt(self, path, data):
 | |
|         if path.encode('utf-8') in self._obfuscatedIETF and self.obfuscation_key_IETF is not None:
 | |
|             # de-obfuscate according to the IETF standard
 | |
|             data, was_decomp = self.decompress(data)
 | |
| 
 | |
|             if len(data) <= 1040:
 | |
|                 # de-obfuscate whole file
 | |
|                 out = self.deobfuscate_single_data(self.obfuscation_key_IETF, data)
 | |
|             else: 
 | |
|                 out = self.deobfuscate_single_data(self.obfuscation_key_IETF, data[:1040]) + data[1040:]
 | |
| 
 | |
|             if (not was_decomp):
 | |
|                 out, was_decomp = self.decompress(out)
 | |
|             return out
 | |
| 
 | |
|         elif path.encode('utf-8') in self._obfuscatedAdobe and self.obfuscation_key_Adobe is not None:
 | |
|             # de-obfuscate according to the Adobe standard
 | |
|             data, was_decomp = self.decompress(data)
 | |
| 
 | |
|             if len(data) <= 1024:
 | |
|                 # de-obfuscate whole file
 | |
|                 out = self.deobfuscate_single_data(self.obfuscation_key_Adobe, data)
 | |
|             else: 
 | |
|                 out = self.deobfuscate_single_data(self.obfuscation_key_Adobe, data[:1024]) + data[1024:]
 | |
| 
 | |
|             if (not was_decomp):
 | |
|                 out, was_decomp = self.decompress(out)
 | |
|             return out
 | |
| 
 | |
|         else: 
 | |
|             # Not encrypted or obfuscated
 | |
|             return data
 | |
| 
 | |
|     def deobfuscate_single_data(self, key, data):
 | |
|         try: 
 | |
|             msg = bytes([c^k for c,k in zip(data, itertools.cycle(key))])
 | |
|         except TypeError:
 | |
|             # Python 2
 | |
|             msg = ''.join(chr(ord(c)^ord(k)) for c,k in itertools.izip(data, itertools.cycle(key)))
 | |
|         return msg
 | |
| 
 | |
| 
 | |
| 
 | |
| def decryptFontsBook(inpath, outpath):
 | |
| 
 | |
|     with closing(ZipFile(open(inpath, 'rb'))) as inf:
 | |
|         namelist = inf.namelist()
 | |
|         if 'META-INF/encryption.xml' not in namelist:
 | |
|             return 1
 | |
| 
 | |
|         # Font key handling:
 | |
| 
 | |
|         font_master_key = None
 | |
|         adobe_master_encryption_key = None
 | |
| 
 | |
|         contNS = lambda tag: '{%s}%s' % ('urn:oasis:names:tc:opendocument:xmlns:container', tag)
 | |
|         path = None
 | |
| 
 | |
|         try:
 | |
|             container = etree.fromstring(inf.read("META-INF/container.xml"))
 | |
|             rootfiles = container.find(contNS("rootfiles")).findall(contNS("rootfile"))
 | |
|             for rootfile in rootfiles: 
 | |
|                 path = rootfile.get("full-path", None)
 | |
|                 if (path is not None):
 | |
|                     break
 | |
|         except: 
 | |
|             pass
 | |
| 
 | |
|         # If path is None, we didn't find an OPF, so we probably don't have a font key.
 | |
|         # If path is set, it's the path to the main content OPF file.
 | |
| 
 | |
|         if (path is None):
 | |
|             print("FontDecrypt: No OPF for font obfuscation found")
 | |
|             return 1
 | |
|         else:
 | |
|             packageNS = lambda tag: '{%s}%s' % ('http://www.idpf.org/2007/opf', tag)
 | |
|             metadataDCNS = lambda tag: '{%s}%s' % ('http://purl.org/dc/elements/1.1/', tag) 
 | |
| 
 | |
|             try:
 | |
|                 container = etree.fromstring(inf.read(path))
 | |
|             except: 
 | |
|                 container = []
 | |
| 
 | |
|             ## IETF font key algorithm:
 | |
|             print("FontDecrypt: Checking {0} for IETF font obfuscation keys ... ".format(path), end='')
 | |
|             secret_key_name = None
 | |
|             try:
 | |
|                 secret_key_name = container.get("unique-identifier")
 | |
|             except: 
 | |
|                 pass
 | |
| 
 | |
|             try: 
 | |
|                 identify_elements = container.find(packageNS("metadata")).findall(metadataDCNS("identifier"))
 | |
|                 for element in identify_elements:
 | |
|                     if (secret_key_name is None or secret_key_name == element.get("id")):
 | |
|                         font_master_key = element.text
 | |
|             except: 
 | |
|                 pass
 | |
| 
 | |
|             if (font_master_key is not None):
 | |
|                 if (secret_key_name is None):
 | |
|                     print("found '%s'" % (font_master_key))
 | |
|                 else:
 | |
|                     print("found '%s' (%s)" % (font_master_key, secret_key_name))
 | |
| 
 | |
|                 # Trim / remove forbidden characters from the key, then hash it:
 | |
|                 font_master_key = font_master_key.replace(' ', '')
 | |
|                 font_master_key = font_master_key.replace('\t', '')
 | |
|                 font_master_key = font_master_key.replace('\r', '')
 | |
|                 font_master_key = font_master_key.replace('\n', '')
 | |
|                 font_master_key = font_master_key.encode('utf-8')
 | |
|                 font_master_key = hashlib.sha1(font_master_key).digest()
 | |
|             else:
 | |
|                 print("not found")
 | |
| 
 | |
|             ## Adobe font key algorithm
 | |
|             print("FontDecrypt: Checking {0} for Adobe font obfuscation keys ... ".format(path), end='')
 | |
| 
 | |
|             try: 
 | |
|                 metadata = container.find(packageNS("metadata"))
 | |
|                 identifiers = metadata.findall(metadataDCNS("identifier"))
 | |
| 
 | |
|                 uid = None
 | |
|                 uidMalformed = False
 | |
| 
 | |
|                 for identifier in identifiers: 
 | |
|                     if identifier.get(packageNS("scheme")) == "UUID":
 | |
|                         if identifier.text[:9] == "urn:uuid:":
 | |
|                             uid = identifier.text[9:]
 | |
|                         else: 
 | |
|                             uid = identifier.text
 | |
|                         break
 | |
|                     if identifier.text[:9] == "urn:uuid:":
 | |
|                         uid = identifier.text[9:]
 | |
|                         break
 | |
| 
 | |
|                 
 | |
|                 if uid is not None:
 | |
|                     uid = uid.replace(chr(0x20),'').replace(chr(0x09),'')
 | |
|                     uid = uid.replace(chr(0x0D),'').replace(chr(0x0A),'').replace('-','')
 | |
| 
 | |
|                     if len(uid) < 16:
 | |
|                         uidMalformed = True
 | |
|                     if not all(c in "0123456789abcdefABCDEF" for c in uid):
 | |
|                         uidMalformed = True
 | |
|                     
 | |
|                     
 | |
|                     if not uidMalformed:
 | |
|                         print("found '{0}'".format(uid))
 | |
|                         uid = uid + uid
 | |
|                         adobe_master_encryption_key = binascii.unhexlify(uid[:32])
 | |
|                 
 | |
|                 if adobe_master_encryption_key is None:
 | |
|                     print("not found")
 | |
| 
 | |
|             except:
 | |
|                 print("exception")
 | |
|                 pass
 | |
| 
 | |
|         # Begin decrypting.
 | |
| 
 | |
|         try:
 | |
|             encryption = inf.read('META-INF/encryption.xml')
 | |
|             decryptor = Decryptor(font_master_key, adobe_master_encryption_key, encryption)
 | |
|             kwds = dict(compression=ZIP_DEFLATED, allowZip64=False)
 | |
|             with closing(ZipFile(open(outpath, 'wb'), 'w', **kwds)) as outf:
 | |
| 
 | |
|                 # Mimetype needs to be the first entry, so remove it from the list
 | |
|                 # whereever it is, then add it at the beginning. 
 | |
|                 namelist.remove("mimetype")
 | |
| 
 | |
|                 for path in (["mimetype"] + namelist):
 | |
|                     data = inf.read(path)
 | |
|                     zi = ZipInfo(path)
 | |
|                     zi.compress_type=ZIP_DEFLATED
 | |
| 
 | |
|                     if path == "mimetype":
 | |
|                         # mimetype must not be compressed
 | |
|                         zi.compress_type = ZIP_STORED
 | |
|                     
 | |
|                     elif path == "META-INF/encryption.xml":
 | |
|                         # Check if there's still other entries not related to fonts
 | |
|                         if (decryptor.check_if_remaining()):
 | |
|                             data = decryptor.get_xml()
 | |
|                             print("FontDecrypt: There's remaining entries in encryption.xml, adding file ...")
 | |
|                         else: 
 | |
|                             # No remaining entries, no need for that file.
 | |
|                             continue
 | |
| 
 | |
|                     try:
 | |
|                         # get the file info, including time-stamp
 | |
|                         oldzi = inf.getinfo(path)
 | |
|                         # copy across useful fields
 | |
|                         zi.date_time = oldzi.date_time
 | |
|                         zi.comment = oldzi.comment
 | |
|                         zi.extra = oldzi.extra
 | |
|                         zi.internal_attr = oldzi.internal_attr
 | |
|                         # external attributes are dependent on the create system, so copy both.
 | |
|                         zi.external_attr = oldzi.external_attr
 | |
|                         zi.volume = oldzi.volume
 | |
|                         zi.create_system = oldzi.create_system
 | |
|                         zi.create_version = oldzi.create_version
 | |
| 
 | |
|                         if any(ord(c) >= 128 for c in path) or any(ord(c) >= 128 for c in zi.comment):
 | |
|                             # If the file name or the comment contains any non-ASCII char, set the UTF8-flag
 | |
|                             zi.flag_bits |= 0x800
 | |
|                     except:
 | |
|                         pass
 | |
| 
 | |
|                     # Python 3 has a bug where the external_attr is reset to `0o600 << 16`
 | |
|                     # if it's NULL, so we need a workaround:
 | |
|                     if zi.external_attr == 0: 
 | |
|                         zi = ZeroedZipInfo(zi)
 | |
| 
 | |
|                     if path == "mimetype":
 | |
|                         outf.writestr(zi, inf.read('mimetype'))
 | |
|                     elif path == "META-INF/encryption.xml":
 | |
|                         outf.writestr(zi, data)
 | |
|                     else: 
 | |
|                         outf.writestr(zi, decryptor.decrypt(path, data))
 | |
|         except:
 | |
|             print("FontDecrypt: Could not decrypt fonts in {0:s} because of an exception:\n{1:s}".format(os.path.basename(inpath), traceback.format_exc()))
 | |
|             traceback.print_exc()
 | |
|             return 2
 | |
|     return 0
 | |
| 
 | 
