From 5f2d99c037c0940676acc2322c22193057b4a9f4 Mon Sep 17 00:00:00 2001 From: Roman Date: Tue, 7 Oct 2025 19:43:20 +0200 Subject: [PATCH] Add robust mail attachment caching and file attachment support with TTL and MIME detection --- input/mail_attachments.py | 130 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100644 input/mail_attachments.py diff --git a/input/mail_attachments.py b/input/mail_attachments.py new file mode 100644 index 0000000..46e7f26 --- /dev/null +++ b/input/mail_attachments.py @@ -0,0 +1,130 @@ +import hashlib +import os +import time +import urllib.request +import urllib.parse +import mimetypes +from pathlib import Path +from typing import Iterable, List, Tuple +from django.conf import settings +from django.core.mail import EmailMultiAlternatives + +def _ensure_cache_dir() -> Path: + """ + Ensure that the cache directory for attachments exists. + Creates it recursively if it doesn't. + """ + cache_dir = Path(settings.MAIL_ATTACHMENT_CACHE_DIR) + cache_dir.mkdir(parents=True, exist_ok=True) + return cache_dir + + +def _cached_filename_for(url: str) -> str: + """ + Generate a unique cache filename for the given URL (hash + original suffix if present). + """ + h = hashlib.sha1(url.encode('utf-8')).hexdigest()[:16] + parsed = urllib.parse.urlparse(url) + # path part only (without query/fragment) + name = Path(parsed.path).name # e.g. 'foo.pdf' + suffix = Path(name).suffix # e.g. '.pdf' + return f'{h}{suffix}' if suffix else h + + +def _is_fresh(path: Path, ttl_seconds: int) -> bool: + """ + Check if the cached file exists and is still fresh within TTL. + """ + try: + age = time.time() - path.stat().st_mtime + return age < ttl_seconds + except FileNotFoundError: + return False + +def download_with_cache(url: str, *, timeout: float = 10.0, size_cap_bytes: int = 8 * 1024 * 1024) -> Path: + """ + Download the file from the given URL into the cache directory, or return the cached + file if it's still fresh. Uses a temporary '.part' file and atomic replace. + A simple size cap protects against unexpectedly large downloads. + """ + cache_dir = _ensure_cache_dir() + ttl = int(getattr(settings, 'MAIL_ATTACHMENT_TTL_SECONDS', 86400)) + filename = _cached_filename_for(url) + path = cache_dir / filename + + if _is_fresh(path, ttl): + return path + + tmp_path = path.with_suffix(path.suffix + '.part') + try: + with urllib.request.urlopen(url, timeout=timeout) as resp, open(tmp_path, 'wb') as f: + # Read in chunks up to size_cap_bytes + remaining = size_cap_bytes + chunk_size = 64 * 1024 + while True: + chunk = resp.read(min(chunk_size, remaining)) + if not chunk: + break + f.write(chunk) + remaining -= len(chunk) + if remaining <= 0: + break + os.replace(tmp_path, path) + return path + except Exception: + # Best-effort cleanup of partial file + try: + if tmp_path.exists(): + tmp_path.unlink(missing_ok=True) + except Exception: + pass + # Re-raise to let caller decide + raise + +def _filename_from_url(url: str) -> str: + """ + Derive a display filename from URL path as a fallback when none provided in settings. + """ + parsed = urllib.parse.urlparse(url) + name = Path(parsed.path).name or 'attachment' + return name + + +def collect_attachment_paths(kind: str, choice: str) -> List[Tuple[Path, str]]: + """ + Return a list of (path, filename) for attachments based on settings.MAIL_ATTACHMENT_URLS. + Supports both 'url' strings and (url, filename) tuples. + """ + cfg = getattr(settings, 'MAIL_ATTACHMENT_URLS', {}) + channel = cfg.get(kind, {}) + urls: list = [] + urls.extend(channel.get('ALL', [])) + urls.extend(channel.get(choice, [])) + + result: List[Tuple[Path, str]] = [] + for item in urls: + if isinstance(item, tuple): + url, filename = item + else: + url, filename = item, _filename_from_url(item) + + path = download_with_cache(url) + # Only append if the file exists (download_with_cache raises on error by default) + result.append((path, filename)) + return result + + +def attach_files(message: EmailMultiAlternatives, files: Iterable[Tuple[Path, str]]) -> None: + """ + Attach files to the EmailMultiAlternatives message. + MIME type is guessed from filename; falls back to application/octet-stream. + """ + for path, filename in files: + # Guess MIME type from final filename first; fallback to path suffix + ctype, _ = mimetypes.guess_type(filename) + if not ctype: + ctype, _ = mimetypes.guess_type(str(path)) + ctype = ctype or 'application/octet-stream' + + with open(path, 'rb') as f: + message.attach(filename, f.read(), ctype)