feat: impl ezpdf decrypter

This commit is contained in:
senstella
2026-02-25 15:33:47 +09:00
commit 149d81d244
13 changed files with 747 additions and 0 deletions

4
src/__init__.py Normal file
View File

@@ -0,0 +1,4 @@
from src.device import Android, MacOS
from src.pdf import EZPdfReader
__all__ = ["EZPdfReader", "Android", "MacOS"]

64
src/crypto.py Normal file
View File

@@ -0,0 +1,64 @@
import hashlib
from functools import reduce
from cryptography.hazmat.decrepit.ciphers import algorithms as decrepit
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
MAKEKEY16_PADDING = bytes.fromhex(
"bf5e75410056fa082eb6682f0c647a532e2e00b6d0683e802f0ca9fe6453697a"
)
def _iter_hash(hasher, data: bytes, n: int) -> bytes:
return reduce(lambda d, _: hasher(d).digest(), range(n), data)
def _cipher(key, algo, data: bytes, *, encrypt: bool) -> bytes:
op = (
Cipher(algo(key), modes.ECB()).encryptor
if encrypt
else Cipher(algo(key), modes.ECB()).decryptor
)()
return op.update(data) + op.finalize()
def makekey_16(key: bytes) -> bytes:
return _iter_hash(
hashlib.md5, hashlib.md5((key + MAKEKEY16_PADDING)[:32]).digest(), 50
)
def makekey_32(data: bytes) -> bytes:
return _iter_hash(hashlib.sha256, hashlib.sha256(data).digest(), 10)
def aes_encrypt(key: bytes, data: bytes) -> bytes:
pad = 16 - len(data) % 16
return _cipher(key, algorithms.AES, data + bytes([pad] * pad), encrypt=True)
def aes_decrypt(key: bytes, data: bytes) -> bytes:
pt = _cipher(key, algorithms.AES, data, encrypt=False)
return (
pt[: -pt[-1]]
if 1 <= pt[-1] <= 16 and pt[-pt[-1] :] == bytes([pt[-1]] * pt[-1])
else pt
)
def des3_encrypt(key: bytes, data: bytes) -> bytes:
return _cipher(
key[:24],
decrepit.TripleDES,
data + b"\x00" * ((-len(data)) % 8),
encrypt=True,
)
def des3_decrypt(key: bytes, data: bytes) -> bytes:
return _cipher(key[:24], decrepit.TripleDES, data, encrypt=False).rstrip(b"\x00")
def rc4_decrypt(key: bytes, data: bytes):
cipher = Cipher(decrepit.ARC4(key), mode=None).decryptor()
return cipher.update(data)

47
src/device.py Normal file
View File

@@ -0,0 +1,47 @@
import random
import uuid
from typing import Optional
class Device:
@property
def user_agent(self) -> str:
raise NotImplementedError
def request_params(self) -> str:
raise NotImplementedError
def open_params(self, doc_id: str, pages: int, siteid: Optional[str]) -> str:
raise NotImplementedError
class Android(Device):
def __init__(self, seed: int) -> None:
self.seed = seed
@property
def user_agent(self) -> str:
return "Dalvik/2.1.0 (Linux; U; Android 11; Pixel 4a Build/RQ2A.210305.006)"
@property
def _android_key(self):
rnd = random.Random()
rnd.seed(self.seed)
return (
str(uuid.UUID(int=rnd.getrandbits(128), version=4)).replace("-", "").upper()
)
def request_params(self) -> str:
return (
f"keys1={self._android_key}&keys2={self._android_key}&keys9=mobile.android"
)
def open_params(self, doc_id: str, pages: int, siteid: Optional[str]) -> str:
return f"docid={doc_id}&keys1={self._android_key}&keys2={self._android_key}&keys9=mobile.android&{f'siteID={siteid}&' if siteid is not None else ''}devicename=&sPage=1&ePage={pages}&copies=0"
class MacOS(Android):
@property
def user_agent(self) -> str:
return "eduPDFOSX 1.0 rv:1 (Macintosh; Mac OS X 10.16.0; ko_KR)"

212
src/drm.py Normal file
View File

@@ -0,0 +1,212 @@
import base64
import hashlib
import random
import string
import struct
import typing
from enum import Enum
from functools import cache
import requests
from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives.asymmetric import padding
from cryptography.hazmat.primitives.asymmetric.rsa import RSAPublicKey
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
from cryptography.hazmat.primitives.serialization import load_der_public_key
from pypdf import PdfReader, _encryption
from pypdf.generic import (
ArrayObject,
ByteStringObject,
DictionaryObject,
IndirectObject,
StreamObject,
TextStringObject,
create_string_object,
)
from src.crypto import (
aes_decrypt,
aes_encrypt,
des3_decrypt,
des3_encrypt,
makekey_16,
makekey_32,
rc4_decrypt,
)
from src.device import Device
from src.drminfo import DRMInfo
from src.utils import unpad_aes
class Algorithm(Enum):
RC4 = 0
AES = 1
AES256 = 2
class EZDRMEncryption(_encryption.Encryption):
def __init__(self, device: Device, docs: PdfReader, encrypt_ref: IndirectObject):
# load drm info
drm = typing.cast(dict, encrypt_ref.get_object())
assert drm is not None and drm.get("/Filter") == "/UDOC_EZDRM"
version = drm["/VER"]
info = drm["/INFO"]
did: str = drm["/DID"]
algorithm = Algorithm.RC4
if int(version) == 2:
algorithm = Algorithm([int((version - 2) * 10)])
if version < 4:
payload = aes_decrypt(makekey_16(did.encode()), base64.b64decode(info))
else:
key = did.encode() if len(did) == 16 else makekey_16(did.encode())
for _ in range(11):
key = hashlib.sha256(key).digest()
payload = aes_decrypt(key, info.encode())
# setup session
session = requests.Session()
session.headers.update({"User-Agent": device.user_agent})
self.docs = docs
self.version = version
self.info = DRMInfo(payload[4:])
self.algorithm = algorithm
self.session = session
self.encrypt_ref = encrypt_ref
self.device = device
def request(self, url: str, params: str) -> str:
ticket = "".join(random.choices(string.digits, k=0x20))
skx = self.public_key.encrypt(ticket.encode(), padding.PKCS1v15()).hex().upper()
sha_ticket = hashlib.sha256(ticket.encode()).digest()
dx = (
(
des3_encrypt(sha_ticket, params.encode())
if self.version < 4
else aes_encrypt(sha_ticket, params.encode())
)
.hex()
.upper()
)
ciphertext = (
self.session.get(f"{url}&skx={skx}&dx={dx}").content.strip().decode()
)
return (
des3_decrypt(sha_ticket, bytes.fromhex(ciphertext))
if self.version < 4
else aes_decrypt(sha_ticket, bytes.fromhex(ciphertext))
).decode("euc-kr")
@property
@cache
def public_key(self) -> RSAPublicKey:
pkey = self.session.get(
f"{self.info.protocol1}{self.info.server1}:{self.info.port1}{self.info.reserved}"
).content.strip()
return typing.cast(
RSAPublicKey,
load_der_public_key(bytes.fromhex(pkey.decode()), default_backend()),
)
@property
@cache
def open_key(self) -> bytes:
resp = self.request(
f"{self.info.protocol1}{self.info.server1}:{self.info.port1}{self.info.open}",
self.device.open_params(self.info.doc_id, len(self.docs.pages), None),
)
if not resp.startswith("ACK,1,"):
raise Exception("docs expired")
return (
makekey_16(resp.split(",")[2].encode())
if self.version < 4
else makekey_32(resp.split(",")[2].encode())
)
def _perform_decrypt(self, objid: int, genno: int, data: bytes, attrs) -> bytes:
if self.algorithm == Algorithm.RC4:
key = (
self.open_key
+ struct.pack("<L", objid)[:3]
+ struct.pack("<L", genno)[:2]
)
key = hashlib.md5(key).digest()[: min(len(key), 16)]
return rc4_decrypt(key, data)
elif self.algorithm == Algorithm.AES:
key = (
self.open_key
+ struct.pack("<L", objid)[:3]
+ struct.pack("<L", genno)[:2]
+ b"sAlT"
)
key = hashlib.md5(key).digest()[: min(len(key), 16)]
initialization_vector = data[:16]
ciphertext = data[16:]
cipher = Cipher(
algorithms.AES(key),
modes.CBC(initialization_vector),
backend=default_backend(),
) # type: ignore
plaintext = cipher.decryptor().update(ciphertext) # type: ignore
return unpad_aes(plaintext)
else:
initialization_vector = data[:16]
ciphertext = data[16:]
cipher = Cipher(
algorithms.AES(self.open_key),
modes.CBC(initialization_vector),
backend=default_backend(),
) # type: ignore
plaintext = cipher.decryptor().update(ciphertext) # type: ignore
return unpad_aes(plaintext)
def is_decrypted(self) -> bool:
return True
def decrypt_object(
self, obj: typing.Any, idnum: int, generation: int
) -> typing.Any:
if (
self.encrypt_ref is not None
and idnum == self.encrypt_ref.idnum
and generation == self.encrypt_ref.generation
):
return obj
return self._dec(obj, idnum, generation)
def _dec(self, obj: typing.Any, idnum: int, generation: int) -> typing.Any:
if isinstance(obj, IndirectObject):
return obj
if isinstance(obj, (ByteStringObject, TextStringObject)):
plain = self._perform_decrypt(idnum, generation, obj.original_bytes, None)
return create_string_object(plain)
if isinstance(obj, StreamObject):
attrs = {str(k): v for k, v in obj.items()}
obj._data = self._perform_decrypt(idnum, generation, obj._data, attrs)
for k, v in list(obj.items()):
obj[k] = self._dec(v, idnum, generation)
return obj
if isinstance(obj, DictionaryObject):
for k, v in list(obj.items()):
obj[k] = self._dec(v, idnum, generation)
return obj
if isinstance(obj, ArrayObject):
for i in range(len(obj)):
obj[i] = self._dec(obj[i], idnum, generation)
return obj
return obj

60
src/drminfo.py Normal file
View File

@@ -0,0 +1,60 @@
import struct
class DRMInfo:
def __init__(self, payload: bytes) -> None:
self.payload = payload
def _read_int(self, offset: int) -> int:
return struct.unpack(">I", self.payload[offset : offset + 4])[0]
def _read_str(self, offset: int) -> str:
return self.payload[offset:].split(b"\00")[0].decode("euc-kr")
@property
def doc_id(self) -> str:
return self._read_str(0x0)
@property
def server1(self) -> str:
return self._read_str(0x104)
@property
def server2(self) -> str:
return self._read_str(0x184)
@property
def port1(self) -> int:
return self._read_int(0x204)
@property
def port2(self) -> int:
return self._read_int(0x208)
@property
def protocol1(self) -> str:
return "https://" if self._read_int(0x20C) == 2 else "http://"
@property
def protocol2(self) -> str:
return "https://" if self._read_int(0x210) == 2 else "http://"
@property
def handshake(self) -> int:
return self._read_int(0x214)
@property
def reserved_num(self) -> int:
return self._read_int(0x398)
@property
def reserved(self) -> str:
return self._read_str(0x39C)
@property
def open(self) -> str:
return self._read_str(0x420)
@property
def print(self) -> str:
return self._read_str(0x4A4)

40
src/pdf.py Normal file
View File

@@ -0,0 +1,40 @@
from pathlib import Path
from typing import IO, Any, Optional, Union, cast
from pypdf import PdfReader
from pypdf.generic import IndirectObject
from src.device import Device, MacOS
from src.drm import EZDRMEncryption
class EZPdfReader(PdfReader):
def __init__(
self,
stream: Union[Union[str, IO[Any]], Path],
strict: bool = False,
password: Union[None, str, bytes] = None,
device: Device = MacOS(1234),
*,
root_object_recovery_limit: Optional[int] = 10000,
) -> None:
self.device = device
super().__init__(
stream,
strict,
password,
root_object_recovery_limit=root_object_recovery_limit,
)
def _handle_encryption(self, password: Optional[Union[str, bytes]]):
self._override_encryption = True
encrypt_ref = self.trailer.get("/Encrypt")
if isinstance(encrypt_ref, IndirectObject):
encryption = cast(dict, encrypt_ref.get_object())
if encryption["/Filter"] == "/Standard":
super()._handle_encryption(password)
else:
self._encryption = EZDRMEncryption(self.device, self, encrypt_ref)
self._override_encryption = False

22
src/utils.py Normal file
View File

@@ -0,0 +1,22 @@
# https://github.com/pdfminer/pdfminer.six/blob/master/pdfminer/utils.py#L858
def unpad_aes(padded: bytes) -> bytes:
"""Remove block padding as described in PDF 1.7 section 7.6.2:
> For an original message length of M, the pad shall consist of 16 -
(M mod 16) bytes whose value shall also be 16 - (M mod 16).
> Note that the pad is present when M is evenly divisible by 16;
it contains 16 bytes of 0x10.
"""
if len(padded) == 0:
return padded
# Check for a potential padding byte (bytes are unsigned)
padding = padded[-1]
if padding > 16:
return padded
# A valid padding byte is the length of the padding
if padding > len(padded): # Obviously invalid
return padded
# Every byte of padding is equal to the length of padding
if all(x == padding for x in padded[-padding:]):
return padded[:-padding]
return padded