clean up mail attachment code

2025-10-17 16:04:14 +02:00 · 2025-10-17 16:04:14 +02:00 · 7fcde34897
parent b3484965b3
commit 7fcde34897
4 changed files with 105 additions and 73 deletions
--- a/foerderbarometer/constants.py
+++ b/foerderbarometer/constants.py
@ -0,0 +1,33 @@
 TYPE_ALL = 'ALL'
 TYPE_BIB = 'BIB'  # Bibliotheksstipendium
 TYPE_ELIT = 'ELIT'  # eLiteraturstipendium
 TYPE_SOFT = 'SOFT'  # Softwarestipendium
 TYPE_MAIL = 'MAIL'  # E-Mail-Adresse
 TYPE_IFG = 'IFG'  # Kostenübernahme IFG-Anfrage
 TYPE_LIT = 'LIT'  # Literaturstipendium
 TYPE_LIST = 'LIST'  #  Mailingliste
 TYPE_TRAV = 'TRAV'  # Reisekosten
 TYPE_VIS = 'VIS'  # Visitenkarten
 TYPE_PROJ = 'PROJ'  # Projektförderung
 TYPES = [
    TYPE_BIB,
    TYPE_ELIT,
    TYPE_SOFT,
    TYPE_MAIL,
    TYPE_IFG,
    TYPE_LIT,
    TYPE_LIST,
    TYPE_TRAV,
    TYPE_VIS,
    TYPE_PROJ,
 ]
 RECIPIENT_APPLICANT = 'applicant'
 RECIPIENT_STAFF = 'staff'
 RECIPIENTS = [
    RECIPIENT_APPLICANT,
    RECIPIENT_STAFF,
 ]
--- a/foerderbarometer/settings.py
+++ b/foerderbarometer/settings.py
@ -6,6 +6,8 @@ from dotenv import load_dotenv
 from input.utils.settings import env, password_validators
 from .constants import *
 BASE_DIR = Path(__file__).parents[1]
 load_dotenv(BASE_DIR / '.env')
@ -166,32 +168,25 @@ NUTZUNGSBEDINGUNGEN_LITERATURSTIPENDIUM = 'static/input/nutzungsbedingungen-lite
 NUTZUNGSBEDINGUNGEN_OTRS = 'static/input/2025_Nutzungsvereinbarung_OTRS.docx.pdf'
 NUTZUNGSBEDINGUNGEN_VISITENKARTEN = 'static/input/nutzungsbedingungen-visitenkarten.pdf'
-# Directory where downloaded attachments will be cached
+MAIL_ATTACHMENT_CACHE_DIR = env('MAIL_ATTACHMENT_CACHE_DIR', BASE_DIR / 'var' / 'mail-attachments')
-MAIL_ATTACHMENT_CACHE_DIR = BASE_DIR / 'var' / 'mail_attachments'
+MAIL_ATTACHMENT_TTL_SECONDS = env('MAIL_ATTACHMENT_TTL_SECONDS', 24 * 60 * 60)
 # Cache TTL (default: 1 day)
 MAIL_ATTACHMENT_TTL_SECONDS = 24 * 60 * 60
 # File attachments via URL:
 # - "applicant": attachments for emails sent to applicants
 # - "staff": attachments for emails sent to the team (community@wikimedia.de)
 #
 # Top-level keys: "applicant" / "staff"
 # Second-level keys: service code ("choice") or "ALL" for global attachments
 # that should be included in all emails of this type.
 MAIL_ATTACHMENT_URLS = {
-    'applicant': {
+    RECIPIENT_APPLICANT: {
-        # Global attachments for all applicant emails
+        TYPE_ALL: [],
-        'ALL': [],
+        TYPE_VIS: [
-        # Special attachments for specific services:
+            'https://foerderung.wikimedia.de/static/input/nutzungsbedingungen-visitenkarten.pdf',
-        'VIS': [('https://foerderung.wikimedia.de/static/input/nutzungsbedingungen-visitenkarten.pdf', 'Nutzungsbedingungen-Visitenkarten.pdf')],   # Business cards
+        ],
-        'MAIL': [('https://foerderung.wikimedia.de/static/input/nutzungsbedingungen-mail.pdf', 'Nutzungsbedingungen-Mail.pdf')],           # Emails
+        TYPE_MAIL: [
-        'LIST': [('https://foerderung.wikimedia.de/static/input/nutzungsbedingungen-mailinglisten.pdf', 'Nutzungsbedingungen-Mailinglisten.pdf')],  # Mailing lists
+            'https://foerderung.wikimedia.de/static/input/nutzungsbedingungen-mail.pdf',
-        'LIT': [('https://foerderung.wikimedia.de/static/input/nutzungsbedingungen-literaturstipendium.pdf', 'Nutzungsbedingungen-Literaturstipendium.pdf')],  # Literature grants
+        ],
        TYPE_LIST: [
            'https://foerderung.wikimedia.de/static/input/nutzungsbedingungen-mailinglisten.pdf',
        ],
        TYPE_LIT: [
            'https://foerderung.wikimedia.de/static/input/nutzungsbedingungen-literaturstipendium.pdf',
        ],
    },
-    'staff': {
+    RECIPIENT_STAFF: {
-        # Global attachments for all staff emails
+        TYPE_ALL: [],
        'ALL': [],
        # Example: 'IFG': ['https://example.com/internal-guideline.pdf']
    },
 }
--- a/input/models.py
+++ b/input/models.py
@ -11,6 +11,9 @@ from django.utils.functional import cached_property, classproperty
 from django.utils.html import format_html
 from django.utils.safestring import mark_safe
 from foerderbarometer.constants import *
 EMAIL_STATES = {
    'NONE': 'noch keine Mail versendet',
    'INF': 'die Benachrichtigung zur Projektabschlussmail wurde versendet',
@ -403,17 +406,6 @@ def type_link(path, label):
    )
 TYPE_BIB = 'BIB'
 TYPE_ELIT = 'ELIT'
 TYPE_MAIL = 'MAIL'
 TYPE_IFG = 'IFG'
 TYPE_LIT = 'LIT'
 TYPE_LIST = 'LIST'
 TYPE_TRAV = 'TRAV'
 TYPE_SOFT = 'SOFT'
 TYPE_VIS = 'VIS'
 TYPE_PROJ = 'PROJ'
 TYPE_CHOICES = {
    TYPE_BIB: type_link('Zugang_zu_Fachliteratur#Bibliotheksstipendium', 'Bibliotheksstipendium'),
    TYPE_ELIT: type_link('Zugang_zu_Fachliteratur#eLiteraturstipendium', 'eLiteraturstipendium'),
--- a/input/utils/mail/attachments.py
+++ b/input/utils/mail/attachments.py
@ -5,20 +5,25 @@ import urllib.request
 import urllib.parse
 import mimetypes
 from contextlib import suppress
 from pathlib import Path
 from typing import Iterable, List, Tuple
 from django.conf import settings
 from django.core.mail import EmailMultiAlternatives
 from foerderbarometer.constants import *
 def _ensure_cache_dir() -> Path:
    """
    Ensure that the cache directory for attachments exists.
    Creates it recursively if it doesn't.
    """
    cache_dir = Path(settings.MAIL_ATTACHMENT_CACHE_DIR)
    cache_dir.mkdir(parents=True, exist_ok=True)
    return cache_dir
@ -31,6 +36,7 @@ def _cached_filename_for(url: str) -> str:
    # path part only (without query/fragment)
    name = Path(parsed.path).name  # e.g. 'foo.pdf'
    suffix = Path(name).suffix  # e.g. '.pdf'
    return f'{h}{suffix}' if suffix else h
@ -38,20 +44,24 @@ def _is_fresh(path: Path, ttl_seconds: int) -> bool:
    """
    Check if the cached file exists and is still fresh within TTL.
    """
    try:
-        age = time.time() - path.stat().st_mtime
+        mtime = path.stat().st_mtime
        return age < ttl_seconds
    except FileNotFoundError:
        return False
    else:
        return time.time() - mtime < ttl_seconds
-def download_with_cache(url: str, *, timeout: float = 10.0, size_cap_bytes: int = 8 * 1024 * 1024) -> Path:
+
 def download_with_cache(url: str, *, timeout: float = 10.0, chunk_size: int = 64 * 1024, size_cap_bytes: int = 8 * 1024 * 1024) -> Path:
    """
    Download the file from the given URL into the cache directory, or return the cached
    file if it's still fresh. Uses a temporary '.part' file and atomic replace.
    A simple size cap protects against unexpectedly large downloads.
    """
    cache_dir = _ensure_cache_dir()
-    ttl = int(getattr(settings, 'MAIL_ATTACHMENT_TTL_SECONDS', 86400))
+    ttl = settings.MAIL_ATTACHMENT_TTL_SECONDS
    filename = _cached_filename_for(url)
    path = cache_dir / filename
@ -59,11 +69,11 @@ def download_with_cache(url: str, *, timeout: float = 10.0, size_cap_bytes: int
        return path
    tmp_path = path.with_suffix(path.suffix + '.part')
    try:
        with urllib.request.urlopen(url, timeout=timeout) as resp, open(tmp_path, 'wb') as f:
            # Read in chunks up to size_cap_bytes
            remaining = size_cap_bytes
            chunk_size = 64 * 1024
            while True:
                chunk = resp.read(min(chunk_size, remaining))
                if not chunk:
@ -74,60 +84,62 @@ def download_with_cache(url: str, *, timeout: float = 10.0, size_cap_bytes: int
                    break
        os.replace(tmp_path, path)
        return path
-    except Exception:
+    except Exception as exc:
        # Best-effort cleanup of partial file
-        try:
+        with suppress(Exception):
            if tmp_path.exists():
                tmp_path.unlink(missing_ok=True)
        except Exception:
            pass
        # Re-raise to let caller decide
        raise
-def _filename_from_url(url: str) -> str:
+        # Re-raise to let caller decide
        raise exc
 def get_filename_from_url(url: str) -> str:
    """
-    Derive a display filename from URL path as a fallback when none provided in settings.
+    Derive a display filename from URL path.
    """
    parsed = urllib.parse.urlparse(url)
    name = Path(parsed.path).name or 'attachment'
    return name
-def collect_attachment_paths(kind: str, choice: str) -> List[Tuple[Path, str]]:
+def collect_attachment_paths(recipient: str, type_code: str) -> List[Tuple[Path, str]]:
    """
    Return a list of (path, filename) for attachments based on settings.MAIL_ATTACHMENT_URLS.
    Supports both 'url' strings and (url, filename) tuples.
    """
    cfg = getattr(settings, 'MAIL_ATTACHMENT_URLS', {})
    channel = cfg.get(kind, {})
    urls: list = []
    urls.extend(channel.get('ALL', []))
    urls.extend(channel.get(choice, []))
-    result: List[Tuple[Path, str]] = []
+    assert recipient in RECIPIENTS
-    for item in urls:
+    assert type_code in TYPES
        if isinstance(item, tuple):
            url, filename = item
        else:
            url, filename = item, _filename_from_url(item)
-        path = download_with_cache(url)
+    config = settings.MAIL_ATTACHMENT_URLS[recipient]
-        # Only append if the file exists (download_with_cache raises on error by default)
+    urls = [*config[TYPE_ALL], *config.get(type_code, [])]
-        result.append((path, filename))
+
-    return result
+    return [
        (download_with_cache(url), get_filename_from_url(url))
        for url in urls
    ]
 def get_mime_type(filename: str, path: Path):
    for value in filename, path:
        mime_type, _ = mimetypes.guess_type(value)
        if mime_type:
            return mime_type
    return 'application/octet-stream'
 def attach_files(message: EmailMultiAlternatives, files: Iterable[Tuple[Path, str]]) -> None:
    """
    Attach files to the EmailMultiAlternatives message.
-    MIME type is guessed from filename; falls back to application/octet-stream.
+    MIME type is guessed from filename or path; falls back to application/octet-stream.
    """
    for path, filename in files:
-        # Guess MIME type from final filename first; fallback to path suffix
+        mime_type = get_mime_type(filename, path)
        ctype, _ = mimetypes.guess_type(filename)
        if not ctype:
            ctype, _ = mimetypes.guess_type(str(path))
        ctype = ctype or 'application/octet-stream'
        with open(path, 'rb') as f:
-            message.attach(filename, f.read(), ctype)
+            message.attach(filename, f.read(), mime_type)