feat: impl ezpdf decrypter
This commit is contained in:
4
src/__init__.py
Normal file
4
src/__init__.py
Normal file
@@ -0,0 +1,4 @@
|
||||
from src.device import Android, MacOS
|
||||
from src.pdf import EZPdfReader
|
||||
|
||||
__all__ = ["EZPdfReader", "Android", "MacOS"]
|
||||
64
src/crypto.py
Normal file
64
src/crypto.py
Normal file
@@ -0,0 +1,64 @@
|
||||
import hashlib
|
||||
from functools import reduce
|
||||
|
||||
from cryptography.hazmat.decrepit.ciphers import algorithms as decrepit
|
||||
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
|
||||
|
||||
MAKEKEY16_PADDING = bytes.fromhex(
|
||||
"bf5e75410056fa082eb6682f0c647a532e2e00b6d0683e802f0ca9fe6453697a"
|
||||
)
|
||||
|
||||
|
||||
def _iter_hash(hasher, data: bytes, n: int) -> bytes:
|
||||
return reduce(lambda d, _: hasher(d).digest(), range(n), data)
|
||||
|
||||
|
||||
def _cipher(key, algo, data: bytes, *, encrypt: bool) -> bytes:
|
||||
op = (
|
||||
Cipher(algo(key), modes.ECB()).encryptor
|
||||
if encrypt
|
||||
else Cipher(algo(key), modes.ECB()).decryptor
|
||||
)()
|
||||
return op.update(data) + op.finalize()
|
||||
|
||||
|
||||
def makekey_16(key: bytes) -> bytes:
|
||||
return _iter_hash(
|
||||
hashlib.md5, hashlib.md5((key + MAKEKEY16_PADDING)[:32]).digest(), 50
|
||||
)
|
||||
|
||||
|
||||
def makekey_32(data: bytes) -> bytes:
|
||||
return _iter_hash(hashlib.sha256, hashlib.sha256(data).digest(), 10)
|
||||
|
||||
|
||||
def aes_encrypt(key: bytes, data: bytes) -> bytes:
|
||||
pad = 16 - len(data) % 16
|
||||
return _cipher(key, algorithms.AES, data + bytes([pad] * pad), encrypt=True)
|
||||
|
||||
|
||||
def aes_decrypt(key: bytes, data: bytes) -> bytes:
|
||||
pt = _cipher(key, algorithms.AES, data, encrypt=False)
|
||||
return (
|
||||
pt[: -pt[-1]]
|
||||
if 1 <= pt[-1] <= 16 and pt[-pt[-1] :] == bytes([pt[-1]] * pt[-1])
|
||||
else pt
|
||||
)
|
||||
|
||||
|
||||
def des3_encrypt(key: bytes, data: bytes) -> bytes:
|
||||
return _cipher(
|
||||
key[:24],
|
||||
decrepit.TripleDES,
|
||||
data + b"\x00" * ((-len(data)) % 8),
|
||||
encrypt=True,
|
||||
)
|
||||
|
||||
|
||||
def des3_decrypt(key: bytes, data: bytes) -> bytes:
|
||||
return _cipher(key[:24], decrepit.TripleDES, data, encrypt=False).rstrip(b"\x00")
|
||||
|
||||
|
||||
def rc4_decrypt(key: bytes, data: bytes):
|
||||
cipher = Cipher(decrepit.ARC4(key), mode=None).decryptor()
|
||||
return cipher.update(data)
|
||||
47
src/device.py
Normal file
47
src/device.py
Normal file
@@ -0,0 +1,47 @@
|
||||
import random
|
||||
import uuid
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class Device:
|
||||
@property
|
||||
def user_agent(self) -> str:
|
||||
raise NotImplementedError
|
||||
|
||||
def request_params(self) -> str:
|
||||
raise NotImplementedError
|
||||
|
||||
def open_params(self, doc_id: str, pages: int, siteid: Optional[str]) -> str:
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class Android(Device):
|
||||
def __init__(self, seed: int) -> None:
|
||||
self.seed = seed
|
||||
|
||||
@property
|
||||
def user_agent(self) -> str:
|
||||
return "Dalvik/2.1.0 (Linux; U; Android 11; Pixel 4a Build/RQ2A.210305.006)"
|
||||
|
||||
@property
|
||||
def _android_key(self):
|
||||
rnd = random.Random()
|
||||
rnd.seed(self.seed)
|
||||
|
||||
return (
|
||||
str(uuid.UUID(int=rnd.getrandbits(128), version=4)).replace("-", "").upper()
|
||||
)
|
||||
|
||||
def request_params(self) -> str:
|
||||
return (
|
||||
f"keys1={self._android_key}&keys2={self._android_key}&keys9=mobile.android"
|
||||
)
|
||||
|
||||
def open_params(self, doc_id: str, pages: int, siteid: Optional[str]) -> str:
|
||||
return f"docid={doc_id}&keys1={self._android_key}&keys2={self._android_key}&keys9=mobile.android&{f'siteID={siteid}&' if siteid is not None else ''}devicename=&sPage=1&ePage={pages}&copies=0"
|
||||
|
||||
|
||||
class MacOS(Android):
|
||||
@property
|
||||
def user_agent(self) -> str:
|
||||
return "eduPDFOSX 1.0 rv:1 (Macintosh; Mac OS X 10.16.0; ko_KR)"
|
||||
212
src/drm.py
Normal file
212
src/drm.py
Normal file
@@ -0,0 +1,212 @@
|
||||
import base64
|
||||
import hashlib
|
||||
import random
|
||||
import string
|
||||
import struct
|
||||
import typing
|
||||
from enum import Enum
|
||||
from functools import cache
|
||||
|
||||
import requests
|
||||
from cryptography.hazmat.backends import default_backend
|
||||
from cryptography.hazmat.primitives.asymmetric import padding
|
||||
from cryptography.hazmat.primitives.asymmetric.rsa import RSAPublicKey
|
||||
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
|
||||
from cryptography.hazmat.primitives.serialization import load_der_public_key
|
||||
from pypdf import PdfReader, _encryption
|
||||
from pypdf.generic import (
|
||||
ArrayObject,
|
||||
ByteStringObject,
|
||||
DictionaryObject,
|
||||
IndirectObject,
|
||||
StreamObject,
|
||||
TextStringObject,
|
||||
create_string_object,
|
||||
)
|
||||
|
||||
from src.crypto import (
|
||||
aes_decrypt,
|
||||
aes_encrypt,
|
||||
des3_decrypt,
|
||||
des3_encrypt,
|
||||
makekey_16,
|
||||
makekey_32,
|
||||
rc4_decrypt,
|
||||
)
|
||||
from src.device import Device
|
||||
from src.drminfo import DRMInfo
|
||||
from src.utils import unpad_aes
|
||||
|
||||
|
||||
class Algorithm(Enum):
|
||||
RC4 = 0
|
||||
AES = 1
|
||||
AES256 = 2
|
||||
|
||||
|
||||
class EZDRMEncryption(_encryption.Encryption):
|
||||
def __init__(self, device: Device, docs: PdfReader, encrypt_ref: IndirectObject):
|
||||
# load drm info
|
||||
drm = typing.cast(dict, encrypt_ref.get_object())
|
||||
assert drm is not None and drm.get("/Filter") == "/UDOC_EZDRM"
|
||||
|
||||
version = drm["/VER"]
|
||||
info = drm["/INFO"]
|
||||
did: str = drm["/DID"]
|
||||
algorithm = Algorithm.RC4
|
||||
|
||||
if int(version) == 2:
|
||||
algorithm = Algorithm([int((version - 2) * 10)])
|
||||
|
||||
if version < 4:
|
||||
payload = aes_decrypt(makekey_16(did.encode()), base64.b64decode(info))
|
||||
else:
|
||||
key = did.encode() if len(did) == 16 else makekey_16(did.encode())
|
||||
for _ in range(11):
|
||||
key = hashlib.sha256(key).digest()
|
||||
payload = aes_decrypt(key, info.encode())
|
||||
|
||||
# setup session
|
||||
session = requests.Session()
|
||||
session.headers.update({"User-Agent": device.user_agent})
|
||||
|
||||
self.docs = docs
|
||||
self.version = version
|
||||
self.info = DRMInfo(payload[4:])
|
||||
self.algorithm = algorithm
|
||||
self.session = session
|
||||
self.encrypt_ref = encrypt_ref
|
||||
self.device = device
|
||||
|
||||
def request(self, url: str, params: str) -> str:
|
||||
ticket = "".join(random.choices(string.digits, k=0x20))
|
||||
skx = self.public_key.encrypt(ticket.encode(), padding.PKCS1v15()).hex().upper()
|
||||
|
||||
sha_ticket = hashlib.sha256(ticket.encode()).digest()
|
||||
dx = (
|
||||
(
|
||||
des3_encrypt(sha_ticket, params.encode())
|
||||
if self.version < 4
|
||||
else aes_encrypt(sha_ticket, params.encode())
|
||||
)
|
||||
.hex()
|
||||
.upper()
|
||||
)
|
||||
|
||||
ciphertext = (
|
||||
self.session.get(f"{url}&skx={skx}&dx={dx}").content.strip().decode()
|
||||
)
|
||||
|
||||
return (
|
||||
des3_decrypt(sha_ticket, bytes.fromhex(ciphertext))
|
||||
if self.version < 4
|
||||
else aes_decrypt(sha_ticket, bytes.fromhex(ciphertext))
|
||||
).decode("euc-kr")
|
||||
|
||||
@property
|
||||
@cache
|
||||
def public_key(self) -> RSAPublicKey:
|
||||
pkey = self.session.get(
|
||||
f"{self.info.protocol1}{self.info.server1}:{self.info.port1}{self.info.reserved}"
|
||||
).content.strip()
|
||||
return typing.cast(
|
||||
RSAPublicKey,
|
||||
load_der_public_key(bytes.fromhex(pkey.decode()), default_backend()),
|
||||
)
|
||||
|
||||
@property
|
||||
@cache
|
||||
def open_key(self) -> bytes:
|
||||
resp = self.request(
|
||||
f"{self.info.protocol1}{self.info.server1}:{self.info.port1}{self.info.open}",
|
||||
self.device.open_params(self.info.doc_id, len(self.docs.pages), None),
|
||||
)
|
||||
|
||||
if not resp.startswith("ACK,1,"):
|
||||
raise Exception("docs expired")
|
||||
|
||||
return (
|
||||
makekey_16(resp.split(",")[2].encode())
|
||||
if self.version < 4
|
||||
else makekey_32(resp.split(",")[2].encode())
|
||||
)
|
||||
|
||||
def _perform_decrypt(self, objid: int, genno: int, data: bytes, attrs) -> bytes:
|
||||
if self.algorithm == Algorithm.RC4:
|
||||
key = (
|
||||
self.open_key
|
||||
+ struct.pack("<L", objid)[:3]
|
||||
+ struct.pack("<L", genno)[:2]
|
||||
)
|
||||
key = hashlib.md5(key).digest()[: min(len(key), 16)]
|
||||
return rc4_decrypt(key, data)
|
||||
elif self.algorithm == Algorithm.AES:
|
||||
key = (
|
||||
self.open_key
|
||||
+ struct.pack("<L", objid)[:3]
|
||||
+ struct.pack("<L", genno)[:2]
|
||||
+ b"sAlT"
|
||||
)
|
||||
key = hashlib.md5(key).digest()[: min(len(key), 16)]
|
||||
initialization_vector = data[:16]
|
||||
ciphertext = data[16:]
|
||||
cipher = Cipher(
|
||||
algorithms.AES(key),
|
||||
modes.CBC(initialization_vector),
|
||||
backend=default_backend(),
|
||||
) # type: ignore
|
||||
plaintext = cipher.decryptor().update(ciphertext) # type: ignore
|
||||
return unpad_aes(plaintext)
|
||||
else:
|
||||
initialization_vector = data[:16]
|
||||
ciphertext = data[16:]
|
||||
cipher = Cipher(
|
||||
algorithms.AES(self.open_key),
|
||||
modes.CBC(initialization_vector),
|
||||
backend=default_backend(),
|
||||
) # type: ignore
|
||||
plaintext = cipher.decryptor().update(ciphertext) # type: ignore
|
||||
return unpad_aes(plaintext)
|
||||
|
||||
def is_decrypted(self) -> bool:
|
||||
return True
|
||||
|
||||
def decrypt_object(
|
||||
self, obj: typing.Any, idnum: int, generation: int
|
||||
) -> typing.Any:
|
||||
if (
|
||||
self.encrypt_ref is not None
|
||||
and idnum == self.encrypt_ref.idnum
|
||||
and generation == self.encrypt_ref.generation
|
||||
):
|
||||
return obj
|
||||
return self._dec(obj, idnum, generation)
|
||||
|
||||
def _dec(self, obj: typing.Any, idnum: int, generation: int) -> typing.Any:
|
||||
if isinstance(obj, IndirectObject):
|
||||
return obj
|
||||
|
||||
if isinstance(obj, (ByteStringObject, TextStringObject)):
|
||||
plain = self._perform_decrypt(idnum, generation, obj.original_bytes, None)
|
||||
return create_string_object(plain)
|
||||
|
||||
if isinstance(obj, StreamObject):
|
||||
attrs = {str(k): v for k, v in obj.items()}
|
||||
obj._data = self._perform_decrypt(idnum, generation, obj._data, attrs)
|
||||
|
||||
for k, v in list(obj.items()):
|
||||
obj[k] = self._dec(v, idnum, generation)
|
||||
|
||||
return obj
|
||||
|
||||
if isinstance(obj, DictionaryObject):
|
||||
for k, v in list(obj.items()):
|
||||
obj[k] = self._dec(v, idnum, generation)
|
||||
return obj
|
||||
|
||||
if isinstance(obj, ArrayObject):
|
||||
for i in range(len(obj)):
|
||||
obj[i] = self._dec(obj[i], idnum, generation)
|
||||
return obj
|
||||
|
||||
return obj
|
||||
60
src/drminfo.py
Normal file
60
src/drminfo.py
Normal file
@@ -0,0 +1,60 @@
|
||||
import struct
|
||||
|
||||
|
||||
class DRMInfo:
|
||||
def __init__(self, payload: bytes) -> None:
|
||||
self.payload = payload
|
||||
|
||||
def _read_int(self, offset: int) -> int:
|
||||
return struct.unpack(">I", self.payload[offset : offset + 4])[0]
|
||||
|
||||
def _read_str(self, offset: int) -> str:
|
||||
return self.payload[offset:].split(b"\00")[0].decode("euc-kr")
|
||||
|
||||
@property
|
||||
def doc_id(self) -> str:
|
||||
return self._read_str(0x0)
|
||||
|
||||
@property
|
||||
def server1(self) -> str:
|
||||
return self._read_str(0x104)
|
||||
|
||||
@property
|
||||
def server2(self) -> str:
|
||||
return self._read_str(0x184)
|
||||
|
||||
@property
|
||||
def port1(self) -> int:
|
||||
return self._read_int(0x204)
|
||||
|
||||
@property
|
||||
def port2(self) -> int:
|
||||
return self._read_int(0x208)
|
||||
|
||||
@property
|
||||
def protocol1(self) -> str:
|
||||
return "https://" if self._read_int(0x20C) == 2 else "http://"
|
||||
|
||||
@property
|
||||
def protocol2(self) -> str:
|
||||
return "https://" if self._read_int(0x210) == 2 else "http://"
|
||||
|
||||
@property
|
||||
def handshake(self) -> int:
|
||||
return self._read_int(0x214)
|
||||
|
||||
@property
|
||||
def reserved_num(self) -> int:
|
||||
return self._read_int(0x398)
|
||||
|
||||
@property
|
||||
def reserved(self) -> str:
|
||||
return self._read_str(0x39C)
|
||||
|
||||
@property
|
||||
def open(self) -> str:
|
||||
return self._read_str(0x420)
|
||||
|
||||
@property
|
||||
def print(self) -> str:
|
||||
return self._read_str(0x4A4)
|
||||
40
src/pdf.py
Normal file
40
src/pdf.py
Normal file
@@ -0,0 +1,40 @@
|
||||
from pathlib import Path
|
||||
from typing import IO, Any, Optional, Union, cast
|
||||
|
||||
from pypdf import PdfReader
|
||||
from pypdf.generic import IndirectObject
|
||||
|
||||
from src.device import Device, MacOS
|
||||
from src.drm import EZDRMEncryption
|
||||
|
||||
|
||||
class EZPdfReader(PdfReader):
|
||||
def __init__(
|
||||
self,
|
||||
stream: Union[Union[str, IO[Any]], Path],
|
||||
strict: bool = False,
|
||||
password: Union[None, str, bytes] = None,
|
||||
device: Device = MacOS(1234),
|
||||
*,
|
||||
root_object_recovery_limit: Optional[int] = 10000,
|
||||
) -> None:
|
||||
self.device = device
|
||||
super().__init__(
|
||||
stream,
|
||||
strict,
|
||||
password,
|
||||
root_object_recovery_limit=root_object_recovery_limit,
|
||||
)
|
||||
|
||||
def _handle_encryption(self, password: Optional[Union[str, bytes]]):
|
||||
self._override_encryption = True
|
||||
|
||||
encrypt_ref = self.trailer.get("/Encrypt")
|
||||
if isinstance(encrypt_ref, IndirectObject):
|
||||
encryption = cast(dict, encrypt_ref.get_object())
|
||||
if encryption["/Filter"] == "/Standard":
|
||||
super()._handle_encryption(password)
|
||||
else:
|
||||
self._encryption = EZDRMEncryption(self.device, self, encrypt_ref)
|
||||
|
||||
self._override_encryption = False
|
||||
22
src/utils.py
Normal file
22
src/utils.py
Normal file
@@ -0,0 +1,22 @@
|
||||
# https://github.com/pdfminer/pdfminer.six/blob/master/pdfminer/utils.py#L858
|
||||
def unpad_aes(padded: bytes) -> bytes:
|
||||
"""Remove block padding as described in PDF 1.7 section 7.6.2:
|
||||
|
||||
> For an original message length of M, the pad shall consist of 16 -
|
||||
(M mod 16) bytes whose value shall also be 16 - (M mod 16).
|
||||
> Note that the pad is present when M is evenly divisible by 16;
|
||||
it contains 16 bytes of 0x10.
|
||||
"""
|
||||
if len(padded) == 0:
|
||||
return padded
|
||||
# Check for a potential padding byte (bytes are unsigned)
|
||||
padding = padded[-1]
|
||||
if padding > 16:
|
||||
return padded
|
||||
# A valid padding byte is the length of the padding
|
||||
if padding > len(padded): # Obviously invalid
|
||||
return padded
|
||||
# Every byte of padding is equal to the length of padding
|
||||
if all(x == padding for x in padded[-padding:]):
|
||||
return padded[:-padding]
|
||||
return padded
|
||||
Reference in New Issue
Block a user