clean up mail attachment code

This commit is contained in:
Oliver Zander 2025-10-17 16:04:14 +02:00
parent b3484965b3
commit 7fcde34897
4 changed files with 105 additions and 73 deletions

View File

@ -0,0 +1,33 @@
TYPE_ALL = 'ALL'
TYPE_BIB = 'BIB' # Bibliotheksstipendium
TYPE_ELIT = 'ELIT' # eLiteraturstipendium
TYPE_SOFT = 'SOFT' # Softwarestipendium
TYPE_MAIL = 'MAIL' # E-Mail-Adresse
TYPE_IFG = 'IFG' # Kostenübernahme IFG-Anfrage
TYPE_LIT = 'LIT' # Literaturstipendium
TYPE_LIST = 'LIST' # Mailingliste
TYPE_TRAV = 'TRAV' # Reisekosten
TYPE_VIS = 'VIS' # Visitenkarten
TYPE_PROJ = 'PROJ' # Projektförderung
TYPES = [
TYPE_BIB,
TYPE_ELIT,
TYPE_SOFT,
TYPE_MAIL,
TYPE_IFG,
TYPE_LIT,
TYPE_LIST,
TYPE_TRAV,
TYPE_VIS,
TYPE_PROJ,
]
RECIPIENT_APPLICANT = 'applicant'
RECIPIENT_STAFF = 'staff'
RECIPIENTS = [
RECIPIENT_APPLICANT,
RECIPIENT_STAFF,
]

View File

@ -6,6 +6,8 @@ from dotenv import load_dotenv
from input.utils.settings import env, password_validators
from .constants import *
BASE_DIR = Path(__file__).parents[1]
load_dotenv(BASE_DIR / '.env')
@ -166,32 +168,25 @@ NUTZUNGSBEDINGUNGEN_LITERATURSTIPENDIUM = 'static/input/nutzungsbedingungen-lite
NUTZUNGSBEDINGUNGEN_OTRS = 'static/input/2025_Nutzungsvereinbarung_OTRS.docx.pdf'
NUTZUNGSBEDINGUNGEN_VISITENKARTEN = 'static/input/nutzungsbedingungen-visitenkarten.pdf'
# Directory where downloaded attachments will be cached
MAIL_ATTACHMENT_CACHE_DIR = BASE_DIR / 'var' / 'mail_attachments'
# Cache TTL (default: 1 day)
MAIL_ATTACHMENT_TTL_SECONDS = 24 * 60 * 60
# File attachments via URL:
# - "applicant": attachments for emails sent to applicants
# - "staff": attachments for emails sent to the team (community@wikimedia.de)
#
# Top-level keys: "applicant" / "staff"
# Second-level keys: service code ("choice") or "ALL" for global attachments
# that should be included in all emails of this type.
MAIL_ATTACHMENT_CACHE_DIR = env('MAIL_ATTACHMENT_CACHE_DIR', BASE_DIR / 'var' / 'mail-attachments')
MAIL_ATTACHMENT_TTL_SECONDS = env('MAIL_ATTACHMENT_TTL_SECONDS', 24 * 60 * 60)
MAIL_ATTACHMENT_URLS = {
'applicant': {
# Global attachments for all applicant emails
'ALL': [],
# Special attachments for specific services:
'VIS': [('https://foerderung.wikimedia.de/static/input/nutzungsbedingungen-visitenkarten.pdf', 'Nutzungsbedingungen-Visitenkarten.pdf')], # Business cards
'MAIL': [('https://foerderung.wikimedia.de/static/input/nutzungsbedingungen-mail.pdf', 'Nutzungsbedingungen-Mail.pdf')], # Emails
'LIST': [('https://foerderung.wikimedia.de/static/input/nutzungsbedingungen-mailinglisten.pdf', 'Nutzungsbedingungen-Mailinglisten.pdf')], # Mailing lists
'LIT': [('https://foerderung.wikimedia.de/static/input/nutzungsbedingungen-literaturstipendium.pdf', 'Nutzungsbedingungen-Literaturstipendium.pdf')], # Literature grants
RECIPIENT_APPLICANT: {
TYPE_ALL: [],
TYPE_VIS: [
'https://foerderung.wikimedia.de/static/input/nutzungsbedingungen-visitenkarten.pdf',
],
TYPE_MAIL: [
'https://foerderung.wikimedia.de/static/input/nutzungsbedingungen-mail.pdf',
],
TYPE_LIST: [
'https://foerderung.wikimedia.de/static/input/nutzungsbedingungen-mailinglisten.pdf',
],
TYPE_LIT: [
'https://foerderung.wikimedia.de/static/input/nutzungsbedingungen-literaturstipendium.pdf',
],
},
'staff': {
# Global attachments for all staff emails
'ALL': [],
# Example: 'IFG': ['https://example.com/internal-guideline.pdf']
RECIPIENT_STAFF: {
TYPE_ALL: [],
},
}

View File

@ -11,6 +11,9 @@ from django.utils.functional import cached_property, classproperty
from django.utils.html import format_html
from django.utils.safestring import mark_safe
from foerderbarometer.constants import *
EMAIL_STATES = {
'NONE': 'noch keine Mail versendet',
'INF': 'die Benachrichtigung zur Projektabschlussmail wurde versendet',
@ -403,17 +406,6 @@ def type_link(path, label):
)
TYPE_BIB = 'BIB'
TYPE_ELIT = 'ELIT'
TYPE_MAIL = 'MAIL'
TYPE_IFG = 'IFG'
TYPE_LIT = 'LIT'
TYPE_LIST = 'LIST'
TYPE_TRAV = 'TRAV'
TYPE_SOFT = 'SOFT'
TYPE_VIS = 'VIS'
TYPE_PROJ = 'PROJ'
TYPE_CHOICES = {
TYPE_BIB: type_link('Zugang_zu_Fachliteratur#Bibliotheksstipendium', 'Bibliotheksstipendium'),
TYPE_ELIT: type_link('Zugang_zu_Fachliteratur#eLiteraturstipendium', 'eLiteraturstipendium'),

View File

@ -5,20 +5,25 @@ import urllib.request
import urllib.parse
import mimetypes
from contextlib import suppress
from pathlib import Path
from typing import Iterable, List, Tuple
from django.conf import settings
from django.core.mail import EmailMultiAlternatives
from foerderbarometer.constants import *
def _ensure_cache_dir() -> Path:
"""
Ensure that the cache directory for attachments exists.
Creates it recursively if it doesn't.
"""
cache_dir = Path(settings.MAIL_ATTACHMENT_CACHE_DIR)
cache_dir.mkdir(parents=True, exist_ok=True)
return cache_dir
@ -31,6 +36,7 @@ def _cached_filename_for(url: str) -> str:
# path part only (without query/fragment)
name = Path(parsed.path).name # e.g. 'foo.pdf'
suffix = Path(name).suffix # e.g. '.pdf'
return f'{h}{suffix}' if suffix else h
@ -38,20 +44,24 @@ def _is_fresh(path: Path, ttl_seconds: int) -> bool:
"""
Check if the cached file exists and is still fresh within TTL.
"""
try:
age = time.time() - path.stat().st_mtime
return age < ttl_seconds
mtime = path.stat().st_mtime
except FileNotFoundError:
return False
else:
return time.time() - mtime < ttl_seconds
def download_with_cache(url: str, *, timeout: float = 10.0, size_cap_bytes: int = 8 * 1024 * 1024) -> Path:
def download_with_cache(url: str, *, timeout: float = 10.0, chunk_size: int = 64 * 1024, size_cap_bytes: int = 8 * 1024 * 1024) -> Path:
"""
Download the file from the given URL into the cache directory, or return the cached
file if it's still fresh. Uses a temporary '.part' file and atomic replace.
A simple size cap protects against unexpectedly large downloads.
"""
cache_dir = _ensure_cache_dir()
ttl = int(getattr(settings, 'MAIL_ATTACHMENT_TTL_SECONDS', 86400))
ttl = settings.MAIL_ATTACHMENT_TTL_SECONDS
filename = _cached_filename_for(url)
path = cache_dir / filename
@ -59,11 +69,11 @@ def download_with_cache(url: str, *, timeout: float = 10.0, size_cap_bytes: int
return path
tmp_path = path.with_suffix(path.suffix + '.part')
try:
with urllib.request.urlopen(url, timeout=timeout) as resp, open(tmp_path, 'wb') as f:
# Read in chunks up to size_cap_bytes
remaining = size_cap_bytes
chunk_size = 64 * 1024
while True:
chunk = resp.read(min(chunk_size, remaining))
if not chunk:
@ -74,60 +84,62 @@ def download_with_cache(url: str, *, timeout: float = 10.0, size_cap_bytes: int
break
os.replace(tmp_path, path)
return path
except Exception:
except Exception as exc:
# Best-effort cleanup of partial file
try:
with suppress(Exception):
if tmp_path.exists():
tmp_path.unlink(missing_ok=True)
except Exception:
pass
# Re-raise to let caller decide
raise
def _filename_from_url(url: str) -> str:
# Re-raise to let caller decide
raise exc
def get_filename_from_url(url: str) -> str:
"""
Derive a display filename from URL path as a fallback when none provided in settings.
Derive a display filename from URL path.
"""
parsed = urllib.parse.urlparse(url)
name = Path(parsed.path).name or 'attachment'
return name
def collect_attachment_paths(kind: str, choice: str) -> List[Tuple[Path, str]]:
def collect_attachment_paths(recipient: str, type_code: str) -> List[Tuple[Path, str]]:
"""
Return a list of (path, filename) for attachments based on settings.MAIL_ATTACHMENT_URLS.
Supports both 'url' strings and (url, filename) tuples.
"""
cfg = getattr(settings, 'MAIL_ATTACHMENT_URLS', {})
channel = cfg.get(kind, {})
urls: list = []
urls.extend(channel.get('ALL', []))
urls.extend(channel.get(choice, []))
result: List[Tuple[Path, str]] = []
for item in urls:
if isinstance(item, tuple):
url, filename = item
else:
url, filename = item, _filename_from_url(item)
assert recipient in RECIPIENTS
assert type_code in TYPES
path = download_with_cache(url)
# Only append if the file exists (download_with_cache raises on error by default)
result.append((path, filename))
return result
config = settings.MAIL_ATTACHMENT_URLS[recipient]
urls = [*config[TYPE_ALL], *config.get(type_code, [])]
return [
(download_with_cache(url), get_filename_from_url(url))
for url in urls
]
def get_mime_type(filename: str, path: Path):
for value in filename, path:
mime_type, _ = mimetypes.guess_type(value)
if mime_type:
return mime_type
return 'application/octet-stream'
def attach_files(message: EmailMultiAlternatives, files: Iterable[Tuple[Path, str]]) -> None:
"""
Attach files to the EmailMultiAlternatives message.
MIME type is guessed from filename; falls back to application/octet-stream.
MIME type is guessed from filename or path; falls back to application/octet-stream.
"""
for path, filename in files:
# Guess MIME type from final filename first; fallback to path suffix
ctype, _ = mimetypes.guess_type(filename)
if not ctype:
ctype, _ = mimetypes.guess_type(str(path))
ctype = ctype or 'application/octet-stream'
mime_type = get_mime_type(filename, path)
with open(path, 'rb') as f:
message.attach(filename, f.read(), ctype)
message.attach(filename, f.read(), mime_type)