DeDRM_tools/DeDRM_plugin/lcpdedrm.py
2021-11-17 21:53:24 +01:00

499 lines
19 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# lcpdedrm.py
# Copyright © 2021 NoDRM
# Released under the terms of the GNU General Public Licence, version 3
# <http://www.gnu.org/licenses/>
# Revision history:
# 1 - Initial release
"""
Decrypt Readium LCP encrypted ePub and PDF books.
"""
__license__ = 'GPL v3'
__version__ = "1"
import json
import hashlib
import base64
import zlib
import binascii
from zipfile import ZipInfo, ZipFile, ZIP_STORED, ZIP_DEFLATED
from contextlib import closing
from Crypto.Cipher import AES
from lxml import etree
# Wrap a stream so that output gets flushed immediately
# and also make sure that any unicode strings get
# encoded using "replace" before writing them.
class SafeUnbuffered:
def __init__(self, stream):
self.stream = stream
self.encoding = stream.encoding
if self.encoding == None:
self.encoding = "utf-8"
def write(self, data):
if isinstance(data,str) or isinstance(data,unicode):
# str for Python3, unicode for Python2
data = data.encode(self.encoding,"replace")
try:
buffer = getattr(self.stream, 'buffer', self.stream)
# self.stream.buffer for Python3, self.stream for Python2
buffer.write(data)
buffer.flush()
except:
# We can do nothing if a write fails
raise
def __getattr__(self, attr):
return getattr(self.stream, attr)
class Decryptor(object):
def __init__(self, bookkey, encryption):
enc = lambda tag: '{%s}%s' % ('http://www.w3.org/2001/04/xmlenc#', tag)
dsig = lambda tag: '{%s}%s' % ('http://www.w3.org/2000/09/xmldsig#', tag)
self.book_key = bookkey
self._encryption = etree.fromstring(encryption)
# This loops through all entries in the "encryption.xml" file
# to figure out which files need to be decrypted.
# All encrypted file paths will be added to the "encrypted" list
self._encrypted = encrypted = set()
self._other = other = set()
self._json_elements_to_remove = json_elements_to_remove = set()
self._has_remaining_xml = False
expr = './%s/%s/%s' % (enc('EncryptedData'), enc('CipherData'),
enc('CipherReference'))
for elem in self._encryption.findall(expr):
path = elem.get('URI', None)
encryption_type_url = (elem.getparent().getparent().find("./%s" % (enc('EncryptionMethod'))).get('Algorithm', None))
retrieval_method_url = None
if (encryption_type_url == "http://www.w3.org/2001/04/xmlenc#aes256-cbc"):
try:
retrieval_method_url = (elem.getparent().getparent().find("./%s/%s" % (dsig('KeyInfo'), dsig('RetrievalMethod'))).get('Type', None))
except:
pass
if path is not None:
if retrieval_method_url == "http://readium.org/2014/01/lcp#EncryptedContentKey":
path = path.encode('utf-8')
encrypted.add(path)
if (self.book_key is None):
self._has_remaining_xml = True
else:
json_elements_to_remove.add(elem.getparent().getparent())
else:
path = path.encode('utf-8')
other.add(path)
self._has_remaining_xml = True
# Other unsupported type.
for elem in json_elements_to_remove:
elem.getparent().remove(elem)
def check_if_remaining(self):
return self._has_remaining_xml
def get_xml(self):
return "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + etree.tostring(self._encryption, encoding="utf-8", pretty_print=True, xml_declaration=False).decode("utf-8")
def decompress(self, bytes):
dc = zlib.decompressobj(-15)
try:
decompressed_bytes = dc.decompress(bytes)
ex = dc.decompress(b'Z') + dc.flush()
if ex:
decompressed_bytes = decompressed_bytes + ex
except:
# possibly not compressed by zip - just return bytes
return bytes, False
return decompressed_bytes , True
def decrypt(self, path, data):
if path.encode('utf-8') in self._encrypted and self.book_key is not None:
aes = AES.new(self.book_key, AES.MODE_CBC, data[:16])
data = aes.decrypt(data[16:])
# Fix padding
if type(data[-1]) != int:
place = ord(data[-1])
else:
place = data[-1]
data = data[:-place]
data, was_decomp = self.decompress(data)
return data
else:
# Not encrypted or obfuscated
return data
class LCPError(Exception):
pass
class LCPTransform:
@staticmethod
def secret_transform_basic(input_hash):
# basic profile doesn't have any transformation
# Takes key input as hexdigest and outputs it as hexdigest
return input_hash
@staticmethod
def secret_transform_profile10(input_hash):
# Takes an input sha256 hash as hexdigest and transforms that according to the profile-1.0 spec.
# This 64-byte master key is basically all that distinguishes the open source "open for everyone" version
# from the so-called "open source" closed-source-version that's actually being used by book distributors.
# 64 byte master key = 64 iterations
# This function is what the documentation describes as "uk = userkey(h)", the "secret userkey transform"
# 1. Take input
# 2. Hash it
# 3. Add one byte from the master key to the end of the hash
# 4. Hash that result again
# 5. Go back to 3. until you run out of bytes.
# 6. The result is the key.
masterkey = "b3a07c4d42880e69398e05392405050efeea0664c0b638b7c986556fa9b58d77b31a40eb6a4fdba1e4537229d9f779daad1cc41ee968153cb71f27dc9696d40f"
masterkey = bytearray.fromhex(masterkey)
current_hash = bytearray.fromhex(input_hash)
for byte in masterkey:
current_hash.append(byte)
current_hash = bytearray(hashlib.sha256(current_hash).digest())
return binascii.hexlify(current_hash)
@staticmethod
def userpass_to_hash(passphrase, algorithm):
# Check for the password algorithm. The Readium LCP standard only defines SHA256.
# The hashing standard documents they link to define a couple other hash algorithms, too.
# I've never seen them actually used in an LCP-encrypted file, so I didn't bother to implement them.
if (algorithm == "http://www.w3.org/2001/04/xmlenc#sha256"):
algo = "SHA256"
user_password_hashed = hashlib.sha256(passphrase).hexdigest()
# This seems to be the only algorithm that's actually defined in the Readium standard.
else:
print("LCP: Book is using unsupported user key algorithm: {0}".format(algorithm))
return None, None
return algo, user_password_hashed
# Check file to see if this is an LCP-protected file
def isLCPbook(inpath):
try:
with closing(ZipFile(open(inpath, 'rb'))) as lcpbook:
if ("META-INF/license.lcpl" not in lcpbook.namelist() or
"META-INF/encryption.xml" not in lcpbook.namelist() or
b"EncryptedContentKey" not in lcpbook.read("META-INF/encryption.xml")):
return False
license = json.loads(lcpbook.read('META-INF/license.lcpl'))
if "id" in license and "encryption" in license and "profile" in license["encryption"]:
return True
except:
return False
return False
# This function decrypts data with the given key
def dataDecryptLCP(b64data, hex_key):
try:
iv = base64.decodebytes(b64data.encode('ascii'))[:16]
cipher = base64.decodebytes(b64data.encode('ascii'))[16:]
except AttributeError:
iv = base64.decodestring(b64data.encode('ascii'))[:16]
cipher = base64.decodestring(b64data.encode('ascii'))[16:]
aes = AES.new(binascii.unhexlify(hex_key), AES.MODE_CBC, iv)
temp = aes.decrypt(cipher)
try:
padding = temp[-1]
data_temp = temp[:-padding]
except TypeError:
padding = ord(temp[-1])
data_temp = temp[:-padding]
return data_temp
# This function just returns an info string about the license
# Optional.
def returnUserInfoStringForLicense(license, user_pass = None):
if not "user" in license:
return None
user_name = None
user_email = None
if "email" in license["user"]:
user_email = license["user"]["email"]
if "name" in license["user"]:
user_name = license["user"]["name"]
# Sometimes these are encrypted
if "encrypted" in license["user"] and "email" in license["user"]["encrypted"]:
if user_pass is None:
user_email = None
else:
# Decrypt
try:
user_email_temp = dataDecryptLCP(user_email, user_pass)
user_email = str(user_email_temp.decode("utf-8"))
except:
pass
if "encrypted" in license["user"] and "name" in license["user"]["encrypted"]:
if user_pass is None:
user_name = None
else:
# Decrypt
try:
user_name_temp = dataDecryptLCP(user_name, user_pass)
user_name = str(user_name_temp.decode("utf-8"))
except:
pass
if (user_name is None and user_email is None):
return None
print_str = ""
if ("id" in license["user"]):
print_str += "ID=" + license["user"]["id"] + ", "
if (user_email is not None):
print_str += "Email=" + user_email + ", "
if (user_name is not None):
print_str += "Name=" + user_name + ", "
# Remove last comma
print_str = print_str[:-2]
return print_str
# Takes a file and a list of passphrases
def decryptLCPbook(inpath, passphrases, parent_object):
if not isLCPbook(inpath):
raise LCPError("This is not an LCP-encrypted book")
file = ZipFile(open(inpath, 'rb'))
license = json.loads(file.read('META-INF/license.lcpl'))
print("LCP: Found LCP-encrypted book {0}".format(license["id"]))
user_info_string1 = returnUserInfoStringForLicense(license, None)
if (user_info_string1 is not None):
print("LCP: Account information: " + user_info_string1)
# Check algorithm:
if license["encryption"]["profile"] == "http://readium.org/lcp/basic-profile":
print("LCP: Book is using lcp/basic-profile encryption.")
transform_algo = LCPTransform.secret_transform_basic
elif license["encryption"]["profile"] == "http://readium.org/lcp/profile-1.0":
print("LCP: Book is using lcp/profile-1.0 encryption")
transform_algo = LCPTransform.secret_transform_profile10
else:
file.close()
raise LCPError("Book is using an unknown LCP encryption standard: {0}".format(license["encryption"]["profile"]))
if (
"algorithm" in license["encryption"]["content_key"] and
license["encryption"]["content_key"]["algorithm"] != "http://www.w3.org/2001/04/xmlenc#aes256-cbc"
):
file.close()
raise LCPError("Book is using an unknown LCP encryption algorithm: {0}".format(license["encryption"]["content_key"]["algorithm"]))
key_check = license["encryption"]["user_key"]["key_check"]
encrypted_content_key = license["encryption"]["content_key"]["encrypted_value"]
# Prepare a list of encryption keys to test:
password_hashes = []
# Some providers hard-code the passphrase in the LCPL file. That doesn't happen often,
# but when it does, these files can be decrypted without knowing any passphrase.
if "value" in license["encryption"]["user_key"]:
try:
password_hashes.append(binascii.hexlify(base64.decodebytes(license["encryption"]["user_key"]["value"].encode())).decode("ascii"))
except AttributeError:
# Python 2
password_hashes.append(binascii.hexlify(base64.decodestring(license["encryption"]["user_key"]["value"].encode())).decode("ascii"))
if "hex_value" in license["encryption"]["user_key"]:
password_hashes.append(binascii.hexlify(bytearray.fromhex(license["encryption"]["user_key"]["hex_value"])).decode("ascii"))
# Hash all the passwords provided by the user:
for possible_passphrase in passphrases:
algo = "http://www.w3.org/2001/04/xmlenc#sha256"
if "algorithm" in license["encryption"]["user_key"]:
algo = license["encryption"]["user_key"]["algorithm"]
algo, tmp_pw = LCPTransform.userpass_to_hash(possible_passphrase.encode('utf-8'), algo)
if tmp_pw is not None:
password_hashes.append(tmp_pw)
# For all the password hashes, check if one of them decrypts the book:
correct_password_hash = None
for possible_hash in password_hashes:
transformed_hash = transform_algo(possible_hash)
try:
decrypted = None
decrypted = dataDecryptLCP(key_check, transformed_hash)
except:
pass
if (decrypted is not None and decrypted.decode("ascii", errors="ignore") == license["id"]):
# Found correct password hash, hooray!
correct_password_hash = transformed_hash
break
# Print an error message if none of the passwords worked
if (correct_password_hash is None):
print("LCP: None of the passphrases could decrypt the book ...")
print("LCP: Enter the correct passphrase in the DeDRM plugin settings, then try again.")
# Print password hint, if available
if ("text_hint" in license["encryption"]["user_key"] and license["encryption"]["user_key"]["text_hint"] != ""):
print("LCP: The book distributor has given you the following passphrase hint: \"{0}\"".format(license["encryption"]["user_key"]["text_hint"]))
# Print password reset instructions, if available
for link in license["links"]:
if ("rel" in link and link["rel"] == "hint"):
print("LCP: You can visit the following webpage to reset your LCP passphrase: {0}".format(link["href"]))
break
file.close()
raise LCPError("No correct passphrase found")
print("LCP: Found correct passphrase, decrypting book ...")
user_info_string2 = returnUserInfoStringForLicense(license, correct_password_hash)
if (user_info_string2 is not None):
if (user_info_string1 != user_info_string2):
print("LCP: Account information: " + user_info_string2)
# Take the key we found and decrypt the content key:
decrypted_content_key = dataDecryptLCP(encrypted_content_key, correct_password_hash)
if decrypted_content_key is None:
raise LCPError("Decrypted content key is None")
# Begin decrypting
encryption = file.read('META-INF/encryption.xml')
decryptor = Decryptor(decrypted_content_key, encryption)
kwds = dict(compression=ZIP_DEFLATED, allowZip64=False)
mimetype = file.read("mimetype").decode("latin-1")
if mimetype == "application/pdf":
# Check how many PDF files there are.
# Usually, an LCP-protected PDF/ZIP is only supposed to contain one
# PDF file, but if there are multiple, return a ZIP that contains them all.
pdf_files = []
for filename in file.namelist():
if filename.endswith(".pdf"):
pdf_files.append(filename)
if len(pdf_files) == 0:
file.close()
raise LCPError("Error: Book is an LCP-protected PDF, but doesn't contain any PDF files ...")
elif len(pdf_files) == 1:
# One PDF file found - extract and return that.
pdfdata = file.read(pdf_files[0])
outputname = parent_object.temporary_file(".pdf").name
print("LCP: Successfully decrypted, exporting to {0}".format(outputname))
with open(outputname, 'wb') as f:
f.write(decryptor.decrypt(pdf_files[0], pdfdata))
file.close()
return outputname
else:
# Multiple PDFs found
outputname = parent_object.temporary_file(".zip").name
with closing(ZipFile(open(outputname, 'wb'), 'w', **kwds)) as outfile:
for path in pdf_files:
data = file.read(path)
outfile.writestr(path, decryptor.decrypt(path, data))
print("LCP: Successfully decrypted a multi-PDF ZIP file, exporting to {0}".format(outputname))
file.close()
return outputname
else:
# Not a PDF -> EPUB
if mimetype == "application/epub+zip":
outputname = parent_object.temporary_file(".epub").name
else:
outputname = parent_object.temporary_file(".zip").name
with closing(ZipFile(open(outputname, 'wb'), 'w', **kwds)) as outfile:
# mimetype must be 1st file. Remove from list and manually add at the beginning
namelist = file.namelist()
namelist.remove("mimetype")
namelist.remove("META-INF/license.lcpl")
for path in (["mimetype"] + namelist):
data = file.read(path)
zi = ZipInfo(path)
if path == "META-INF/encryption.xml":
# Check if that's still needed
if (decryptor.check_if_remaining()):
data = decryptor.get_xml()
print("LCP: Adding encryption.xml for the remaining files.")
else:
continue
try:
oldzi = file.getinfo(path)
if path == "mimetype":
zi.compress_type = ZIP_STORED
else:
zi.compress_type = ZIP_DEFLATED
zi.date_time = oldzi.date_time
zi.comment = oldzi.comment
zi.extra = oldzi.extra
zi.internal_attr = oldzi.internal_attr
zi.external_attr = oldzi.external_attr
zi.create_system = oldzi.create_system
if any(ord(c) >= 128 for c in path) or any(ord(c) >= 128 for c in zi.comment):
# If the file name or the comment contains any non-ASCII char, set the UTF8-flag
zi.flag_bits |= 0x800
except:
pass
if path == "META-INF/encryption.xml":
outfile.writestr(zi, data)
else:
outfile.writestr(zi, decryptor.decrypt(path, data))
print("LCP: Successfully decrypted, exporting to {0}".format(outputname))
file.close()
return outputname