foerderbarometer/input/utils/mail/attachments.py

146 lines
4.0 KiB
Python
Raw Normal View History

import hashlib
import os
import time
import urllib.request
import urllib.parse
import mimetypes
2025-10-17 10:06:23 +00:00
2025-10-17 14:04:14 +00:00
from contextlib import suppress
from pathlib import Path
from typing import Iterable, List, Tuple
2025-10-17 10:06:23 +00:00
from django.conf import settings
from django.core.mail import EmailMultiAlternatives
2025-10-17 14:04:14 +00:00
from foerderbarometer.constants import *
2025-10-17 10:06:23 +00:00
def _ensure_cache_dir() -> Path:
"""
Ensure that the cache directory for attachments exists.
Creates it recursively if it doesn't.
"""
2025-10-17 14:04:14 +00:00
cache_dir = Path(settings.MAIL_ATTACHMENT_CACHE_DIR)
cache_dir.mkdir(parents=True, exist_ok=True)
2025-10-17 14:04:14 +00:00
return cache_dir
def _cached_filename_for(url: str) -> str:
"""
Generate a unique cache filename for the given URL (hash + original suffix if present).
"""
h = hashlib.sha1(url.encode('utf-8')).hexdigest()[:16]
parsed = urllib.parse.urlparse(url)
# path part only (without query/fragment)
name = Path(parsed.path).name # e.g. 'foo.pdf'
suffix = Path(name).suffix # e.g. '.pdf'
2025-10-17 14:04:14 +00:00
return f'{h}{suffix}' if suffix else h
def _is_fresh(path: Path, ttl_seconds: int) -> bool:
"""
Check if the cached file exists and is still fresh within TTL.
"""
2025-10-17 14:04:14 +00:00
try:
2025-10-17 14:04:14 +00:00
mtime = path.stat().st_mtime
except FileNotFoundError:
return False
2025-10-17 14:04:14 +00:00
else:
return time.time() - mtime < ttl_seconds
2025-10-17 14:04:14 +00:00
def download_with_cache(url: str, *, timeout: float = 10.0, chunk_size: int = 64 * 1024, size_cap_bytes: int = 8 * 1024 * 1024) -> Path:
"""
Download the file from the given URL into the cache directory, or return the cached
file if it's still fresh. Uses a temporary '.part' file and atomic replace.
A simple size cap protects against unexpectedly large downloads.
"""
2025-10-17 14:04:14 +00:00
cache_dir = _ensure_cache_dir()
2025-10-17 14:04:14 +00:00
ttl = settings.MAIL_ATTACHMENT_TTL_SECONDS
filename = _cached_filename_for(url)
path = cache_dir / filename
if _is_fresh(path, ttl):
return path
tmp_path = path.with_suffix(path.suffix + '.part')
2025-10-17 14:04:14 +00:00
try:
with urllib.request.urlopen(url, timeout=timeout) as resp, open(tmp_path, 'wb') as f:
# Read in chunks up to size_cap_bytes
remaining = size_cap_bytes
while True:
chunk = resp.read(min(chunk_size, remaining))
if not chunk:
break
f.write(chunk)
remaining -= len(chunk)
if remaining <= 0:
break
os.replace(tmp_path, path)
return path
2025-10-17 14:04:14 +00:00
except Exception as exc:
# Best-effort cleanup of partial file
2025-10-17 14:04:14 +00:00
with suppress(Exception):
if tmp_path.exists():
tmp_path.unlink(missing_ok=True)
2025-10-17 14:04:14 +00:00
# Re-raise to let caller decide
2025-10-17 14:04:14 +00:00
raise exc
2025-10-17 14:04:14 +00:00
def get_filename_from_url(url: str) -> str:
"""
2025-10-17 14:04:14 +00:00
Derive a display filename from URL path.
"""
2025-10-17 14:04:14 +00:00
parsed = urllib.parse.urlparse(url)
name = Path(parsed.path).name or 'attachment'
2025-10-17 14:04:14 +00:00
return name
2025-10-17 14:04:14 +00:00
def collect_attachment_paths(recipient: str, type_code: str) -> List[Tuple[Path, str]]:
"""
Return a list of (path, filename) for attachments based on settings.MAIL_ATTACHMENT_URLS.
"""
2025-10-17 14:04:14 +00:00
assert recipient in RECIPIENTS
assert type_code in TYPES
config = settings.MAIL_ATTACHMENT_URLS[recipient]
urls = [*config[TYPE_ALL], *config.get(type_code, [])]
return [
(download_with_cache(url), get_filename_from_url(url))
for url in urls
]
2025-10-17 14:04:14 +00:00
def get_mime_type(filename: str, path: Path):
for value in filename, path:
mime_type, _ = mimetypes.guess_type(value)
if mime_type:
return mime_type
return 'application/octet-stream'
def attach_files(message: EmailMultiAlternatives, files: Iterable[Tuple[Path, str]]) -> None:
"""
Attach files to the EmailMultiAlternatives message.
2025-10-17 14:04:14 +00:00
MIME type is guessed from filename or path; falls back to application/octet-stream.
"""
2025-10-17 14:04:14 +00:00
for path, filename in files:
2025-10-17 14:04:14 +00:00
mime_type = get_mime_type(filename, path)
with open(path, 'rb') as f:
2025-10-17 14:04:14 +00:00
message.attach(filename, f.read(), mime_type)