From 7fcde34897ddc0c5816e1f3691bd1d80fcbbd2ee Mon Sep 17 00:00:00 2001
From: Oliver Zander <zander@cosmocode.de>
Date: Fri, 17 Oct 2025 16:04:14 +0200
Subject: [PATCH] clean up mail attachment code

---
 foerderbarometer/constants.py   | 33 +++++++++++++
 foerderbarometer/settings.py    | 45 ++++++++---------
 input/models.py                 | 14 ++----
 input/utils/mail/attachments.py | 86 +++++++++++++++++++--------------
 4 files changed, 105 insertions(+), 73 deletions(-)
 create mode 100644 foerderbarometer/constants.py

diff --git a/foerderbarometer/constants.py b/foerderbarometer/constants.py
new file mode 100644
index 0000000..0eef96f
--- /dev/null
+++ b/foerderbarometer/constants.py
@@ -0,0 +1,33 @@
+TYPE_ALL = 'ALL'
+
+TYPE_BIB = 'BIB'  # Bibliotheksstipendium
+TYPE_ELIT = 'ELIT'  # eLiteraturstipendium
+TYPE_SOFT = 'SOFT'  # Softwarestipendium
+TYPE_MAIL = 'MAIL'  # E-Mail-Adresse
+TYPE_IFG = 'IFG'  # Kostenübernahme IFG-Anfrage
+TYPE_LIT = 'LIT'  # Literaturstipendium
+TYPE_LIST = 'LIST'  #  Mailingliste
+TYPE_TRAV = 'TRAV'  # Reisekosten
+TYPE_VIS = 'VIS'  # Visitenkarten
+TYPE_PROJ = 'PROJ'  # Projektförderung
+
+TYPES = [
+    TYPE_BIB,
+    TYPE_ELIT,
+    TYPE_SOFT,
+    TYPE_MAIL,
+    TYPE_IFG,
+    TYPE_LIT,
+    TYPE_LIST,
+    TYPE_TRAV,
+    TYPE_VIS,
+    TYPE_PROJ,
+]
+
+RECIPIENT_APPLICANT = 'applicant'
+RECIPIENT_STAFF = 'staff'
+
+RECIPIENTS = [
+    RECIPIENT_APPLICANT,
+    RECIPIENT_STAFF,
+]
diff --git a/foerderbarometer/settings.py b/foerderbarometer/settings.py
index fcbf851..c51fc4a 100644
--- a/foerderbarometer/settings.py
+++ b/foerderbarometer/settings.py
@@ -6,6 +6,8 @@ from dotenv import load_dotenv
 
 from input.utils.settings import env, password_validators
 
+from .constants import *
+
 BASE_DIR = Path(__file__).parents[1]
 
 load_dotenv(BASE_DIR / '.env')
@@ -166,32 +168,25 @@ NUTZUNGSBEDINGUNGEN_LITERATURSTIPENDIUM = 'static/input/nutzungsbedingungen-lite
 NUTZUNGSBEDINGUNGEN_OTRS = 'static/input/2025_Nutzungsvereinbarung_OTRS.docx.pdf'
 NUTZUNGSBEDINGUNGEN_VISITENKARTEN = 'static/input/nutzungsbedingungen-visitenkarten.pdf'
 
-# Directory where downloaded attachments will be cached
-MAIL_ATTACHMENT_CACHE_DIR = BASE_DIR / 'var' / 'mail_attachments'
-
-# Cache TTL (default: 1 day)
-MAIL_ATTACHMENT_TTL_SECONDS = 24 * 60 * 60
-
-# File attachments via URL:
-# - "applicant": attachments for emails sent to applicants
-# - "staff": attachments for emails sent to the team (community@wikimedia.de)
-#
-# Top-level keys: "applicant" / "staff"
-# Second-level keys: service code ("choice") or "ALL" for global attachments
-# that should be included in all emails of this type.
+MAIL_ATTACHMENT_CACHE_DIR = env('MAIL_ATTACHMENT_CACHE_DIR', BASE_DIR / 'var' / 'mail-attachments')
+MAIL_ATTACHMENT_TTL_SECONDS = env('MAIL_ATTACHMENT_TTL_SECONDS', 24 * 60 * 60)
 MAIL_ATTACHMENT_URLS = {
-    'applicant': {
-        # Global attachments for all applicant emails
-        'ALL': [],
-        # Special attachments for specific services:
-        'VIS': [('https://foerderung.wikimedia.de/static/input/nutzungsbedingungen-visitenkarten.pdf', 'Nutzungsbedingungen-Visitenkarten.pdf')],   # Business cards
-        'MAIL': [('https://foerderung.wikimedia.de/static/input/nutzungsbedingungen-mail.pdf', 'Nutzungsbedingungen-Mail.pdf')],           # Emails
-        'LIST': [('https://foerderung.wikimedia.de/static/input/nutzungsbedingungen-mailinglisten.pdf', 'Nutzungsbedingungen-Mailinglisten.pdf')],  # Mailing lists
-        'LIT': [('https://foerderung.wikimedia.de/static/input/nutzungsbedingungen-literaturstipendium.pdf', 'Nutzungsbedingungen-Literaturstipendium.pdf')],  # Literature grants
+    RECIPIENT_APPLICANT: {
+        TYPE_ALL: [],
+        TYPE_VIS: [
+            'https://foerderung.wikimedia.de/static/input/nutzungsbedingungen-visitenkarten.pdf',
+        ],
+        TYPE_MAIL: [
+            'https://foerderung.wikimedia.de/static/input/nutzungsbedingungen-mail.pdf',
+        ],
+        TYPE_LIST: [
+            'https://foerderung.wikimedia.de/static/input/nutzungsbedingungen-mailinglisten.pdf',
+        ],
+        TYPE_LIT: [
+            'https://foerderung.wikimedia.de/static/input/nutzungsbedingungen-literaturstipendium.pdf',
+        ],
     },
-    'staff': {
-        # Global attachments for all staff emails
-        'ALL': [],
-        # Example: 'IFG': ['https://example.com/internal-guideline.pdf']
+    RECIPIENT_STAFF: {
+        TYPE_ALL: [],
     },
 }
diff --git a/input/models.py b/input/models.py
index 567e675..45f38e9 100755
--- a/input/models.py
+++ b/input/models.py
@@ -11,6 +11,9 @@ from django.utils.functional import cached_property, classproperty
 from django.utils.html import format_html
 from django.utils.safestring import mark_safe
 
+from foerderbarometer.constants import *
+
+
 EMAIL_STATES = {
     'NONE': 'noch keine Mail versendet',
     'INF': 'die Benachrichtigung zur Projektabschlussmail wurde versendet',
@@ -403,17 +406,6 @@ def type_link(path, label):
     )
 
 
-TYPE_BIB = 'BIB'
-TYPE_ELIT = 'ELIT'
-TYPE_MAIL = 'MAIL'
-TYPE_IFG = 'IFG'
-TYPE_LIT = 'LIT'
-TYPE_LIST = 'LIST'
-TYPE_TRAV = 'TRAV'
-TYPE_SOFT = 'SOFT'
-TYPE_VIS = 'VIS'
-TYPE_PROJ = 'PROJ'
-
 TYPE_CHOICES = {
     TYPE_BIB: type_link('Zugang_zu_Fachliteratur#Bibliotheksstipendium', 'Bibliotheksstipendium'),
     TYPE_ELIT: type_link('Zugang_zu_Fachliteratur#eLiteraturstipendium', 'eLiteraturstipendium'),
diff --git a/input/utils/mail/attachments.py b/input/utils/mail/attachments.py
index 0916f36..4873365 100644
--- a/input/utils/mail/attachments.py
+++ b/input/utils/mail/attachments.py
@@ -5,20 +5,25 @@ import urllib.request
 import urllib.parse
 import mimetypes
 
+from contextlib import suppress
 from pathlib import Path
 from typing import Iterable, List, Tuple
 
 from django.conf import settings
 from django.core.mail import EmailMultiAlternatives
 
+from foerderbarometer.constants import *
+
 
 def _ensure_cache_dir() -> Path:
     """
     Ensure that the cache directory for attachments exists.
     Creates it recursively if it doesn't.
     """
+
     cache_dir = Path(settings.MAIL_ATTACHMENT_CACHE_DIR)
     cache_dir.mkdir(parents=True, exist_ok=True)
+
     return cache_dir
 
 
@@ -31,6 +36,7 @@ def _cached_filename_for(url: str) -> str:
     # path part only (without query/fragment)
     name = Path(parsed.path).name  # e.g. 'foo.pdf'
     suffix = Path(name).suffix  # e.g. '.pdf'
+
     return f'{h}{suffix}' if suffix else h
 
 
@@ -38,20 +44,24 @@ def _is_fresh(path: Path, ttl_seconds: int) -> bool:
     """
     Check if the cached file exists and is still fresh within TTL.
     """
+
     try:
-        age = time.time() - path.stat().st_mtime
-        return age < ttl_seconds
+        mtime = path.stat().st_mtime
     except FileNotFoundError:
         return False
+    else:
+        return time.time() - mtime < ttl_seconds
 
-def download_with_cache(url: str, *, timeout: float = 10.0, size_cap_bytes: int = 8 * 1024 * 1024) -> Path:
+
+def download_with_cache(url: str, *, timeout: float = 10.0, chunk_size: int = 64 * 1024, size_cap_bytes: int = 8 * 1024 * 1024) -> Path:
     """
     Download the file from the given URL into the cache directory, or return the cached
     file if it's still fresh. Uses a temporary '.part' file and atomic replace.
     A simple size cap protects against unexpectedly large downloads.
     """
+
     cache_dir = _ensure_cache_dir()
-    ttl = int(getattr(settings, 'MAIL_ATTACHMENT_TTL_SECONDS', 86400))
+    ttl = settings.MAIL_ATTACHMENT_TTL_SECONDS
     filename = _cached_filename_for(url)
     path = cache_dir / filename
 
@@ -59,11 +69,11 @@ def download_with_cache(url: str, *, timeout: float = 10.0, size_cap_bytes: int
         return path
 
     tmp_path = path.with_suffix(path.suffix + '.part')
+
     try:
         with urllib.request.urlopen(url, timeout=timeout) as resp, open(tmp_path, 'wb') as f:
             # Read in chunks up to size_cap_bytes
             remaining = size_cap_bytes
-            chunk_size = 64 * 1024
             while True:
                 chunk = resp.read(min(chunk_size, remaining))
                 if not chunk:
@@ -74,60 +84,62 @@ def download_with_cache(url: str, *, timeout: float = 10.0, size_cap_bytes: int
                     break
         os.replace(tmp_path, path)
         return path
-    except Exception:
+    except Exception as exc:
         # Best-effort cleanup of partial file
-        try:
+        with suppress(Exception):
             if tmp_path.exists():
                 tmp_path.unlink(missing_ok=True)
-        except Exception:
-            pass
-        # Re-raise to let caller decide
-        raise
 
-def _filename_from_url(url: str) -> str:
+        # Re-raise to let caller decide
+        raise exc
+
+
+def get_filename_from_url(url: str) -> str:
     """
-    Derive a display filename from URL path as a fallback when none provided in settings.
+    Derive a display filename from URL path.
     """
+
     parsed = urllib.parse.urlparse(url)
     name = Path(parsed.path).name or 'attachment'
+
     return name
 
 
-def collect_attachment_paths(kind: str, choice: str) -> List[Tuple[Path, str]]:
+def collect_attachment_paths(recipient: str, type_code: str) -> List[Tuple[Path, str]]:
     """
     Return a list of (path, filename) for attachments based on settings.MAIL_ATTACHMENT_URLS.
-    Supports both 'url' strings and (url, filename) tuples.
     """
-    cfg = getattr(settings, 'MAIL_ATTACHMENT_URLS', {})
-    channel = cfg.get(kind, {})
-    urls: list = []
-    urls.extend(channel.get('ALL', []))
-    urls.extend(channel.get(choice, []))
 
-    result: List[Tuple[Path, str]] = []
-    for item in urls:
-        if isinstance(item, tuple):
-            url, filename = item
-        else:
-            url, filename = item, _filename_from_url(item)
+    assert recipient in RECIPIENTS
+    assert type_code in TYPES
 
-        path = download_with_cache(url)
-        # Only append if the file exists (download_with_cache raises on error by default)
-        result.append((path, filename))
-    return result
+    config = settings.MAIL_ATTACHMENT_URLS[recipient]
+    urls = [*config[TYPE_ALL], *config.get(type_code, [])]
+
+    return [
+        (download_with_cache(url), get_filename_from_url(url))
+        for url in urls
+    ]
+
+
+def get_mime_type(filename: str, path: Path):
+    for value in filename, path:
+        mime_type, _ = mimetypes.guess_type(value)
+
+        if mime_type:
+            return mime_type
+
+    return 'application/octet-stream'
 
 
 def attach_files(message: EmailMultiAlternatives, files: Iterable[Tuple[Path, str]]) -> None:
     """
     Attach files to the EmailMultiAlternatives message.
-    MIME type is guessed from filename; falls back to application/octet-stream.
+    MIME type is guessed from filename or path; falls back to application/octet-stream.
     """
+
     for path, filename in files:
-        # Guess MIME type from final filename first; fallback to path suffix
-        ctype, _ = mimetypes.guess_type(filename)
-        if not ctype:
-            ctype, _ = mimetypes.guess_type(str(path))
-        ctype = ctype or 'application/octet-stream'
+        mime_type = get_mime_type(filename, path)
 
         with open(path, 'rb') as f:
-            message.attach(filename, f.read(), ctype)
+            message.attach(filename, f.read(), mime_type)