M-Bus Gateway
← Tilbage til blog
· Python· pathlib· Path· filsystem· IoT· konfiguration· OTA· atomisk

Python pathlib til IoT platform — fil og stioperationer

Python pathlib til IoT platform: Path-objekt, rekursiv filsøgning, atomisk filskrivning, temporære filer, konfigurationsfiler og sammenligning med os.path.

Af M-Bus Gateway

pathlib.Path er det moderne alternativ til os.path og strenge. Her er de produktionsklare mønstre fra IoT gateway og server-platformen.


Path-objekt — grundlæggende

# gateway/src/config/paths.py

from pathlib import Path

# Definer alle stier ét sted — aldrig hardkodede strenge spredt i koden:
BASE_DIR = Path("/etc/mbus-gateway")
CONFIG_FILE = BASE_DIR / "config.env"
AES_KEYS_FILE = BASE_DIR / "aes-keys.json"
CERT_DIR = BASE_DIR / "certs"
LOG_DIR = Path("/var/log/mbus-gateway")
DATA_DIR = Path("/var/lib/mbus-gateway")
SQLITE_DB = DATA_DIR / "readings.db"
OTA_DIR = DATA_DIR / "ota"


# Path-operationer:
print(CONFIG_FILE.parent)       # /etc/mbus-gateway
print(CONFIG_FILE.name)         # config.env
print(CONFIG_FILE.stem)         # config
print(CONFIG_FILE.suffix)       # .env
print(CONFIG_FILE.exists())     # True/False
print(CONFIG_FILE.is_file())    # True
print(CONFIG_FILE.stat().st_size)   # Filstørrelse i bytes

# Opret mapper (ingen fejl hvis de allerede eksisterer):
DATA_DIR.mkdir(parents=True, exist_ok=True)
OTA_DIR.mkdir(parents=True, exist_ok=True)

# Sammenligning med os.path (undgå dette):
import os
old_style = os.path.join("/etc/mbus-gateway", "config.env")  # Grimt
new_style = BASE_DIR / "config.env"                          # Klart

Konfigurationsfiler

# gateway/src/config/loader.py

from pathlib import Path
import json
import os


CONFIG_FILE = Path("/etc/mbus-gateway/config.env")
AES_KEYS_FILE = Path("/etc/mbus-gateway/aes-keys.json")


def load_config() -> dict[str, str]:
    """Indlæs konfiguration fra .env fil."""
    if not CONFIG_FILE.exists():
        raise FileNotFoundError(f"Konfigurationsfil mangler: {CONFIG_FILE}")

    config = {}
    for line in CONFIG_FILE.read_text(encoding="utf-8").splitlines():
        line = line.strip()
        if not line or line.startswith("#"):
            continue
        if "=" in line:
            key, _, value = line.partition("=")
            config[key.strip()] = value.strip()

    return config


def load_aes_keys() -> dict[str, str]:
    """
    Indlæs AES-nøgler fra JSON.
    Fil skal have permissions 600 (kun owner kan læse).
    """
    if not AES_KEYS_FILE.exists():
        return {}

    # Verificér filpermissions (sikkerhedstjek):
    stat = AES_KEYS_FILE.stat()
    if stat.st_mode & 0o077:    # Gruppe/other har adgang
        raise PermissionError(
            f"AES-nøgelfil har for brede permissions: {oct(stat.st_mode)}"
            f" — skal være 600"
        )

    return json.loads(AES_KEYS_FILE.read_text(encoding="utf-8"))


def save_aes_key(meter_id: str, key_hex: str) -> None:
    """Gem ny AES-nøgle atomisk (ingen delvis skrivning)."""
    keys = load_aes_keys()
    keys[meter_id] = key_hex
    _atomic_write_json(AES_KEYS_FILE, keys, mode=0o600)


def _atomic_write_json(path: Path, data: dict, mode: int = 0o644) -> None:
    """Skriv JSON atomisk via temp-fil → rename."""
    tmp = path.with_suffix(".tmp")
    tmp.write_text(json.dumps(data, indent=2), encoding="utf-8")
    tmp.chmod(mode)
    tmp.rename(path)    # Atomisk på POSIX-systemer

OTA firmware-filer

# gateway/src/ota/downloader.py

from pathlib import Path
import hashlib
import shutil
import tempfile


OTA_DIR = Path("/var/lib/mbus-gateway/ota")
CURRENT_FIRMWARE = Path("/usr/local/lib/mbus-gateway")


def verify_sha256(path: Path, expected_hash: str) -> bool:
    """Verificér SHA256 af downloadet firmware-pakke."""
    sha256 = hashlib.sha256()
    with path.open("rb") as f:
        for chunk in iter(lambda: f.read(65536), b""):
            sha256.update(chunk)
    return sha256.hexdigest() == expected_hash


def prepare_ota_package(
    downloaded_path: Path,
    expected_hash: str,
    version: str,
) -> Path:
    """
    Verificér og placér OTA-pakke klar til installation.
    Returnerer sti til verificeret pakke.
    """
    OTA_DIR.mkdir(parents=True, exist_ok=True)

    if not verify_sha256(downloaded_path, expected_hash):
        downloaded_path.unlink()    # Slet korrupt fil
        raise ValueError(f"SHA256-verifikation fejlede for {downloaded_path.name}")

    # Flyt til OTA-mappe med version i filnavn:
    target = OTA_DIR / f"mbus-gateway-{version}.tar.gz"
    shutil.move(str(downloaded_path), target)

    return target


def list_ota_packages() -> list[Path]:
    """List alle tilgængelige OTA-pakker, nyeste først."""
    return sorted(
        OTA_DIR.glob("mbus-gateway-*.tar.gz"),
        key=lambda p: p.stat().st_mtime,
        reverse=True,
    )


def cleanup_old_ota_packages(keep: int = 3) -> None:
    """Behold kun de nyeste OTA-pakker for at spare diskplads."""
    packages = list_ota_packages()
    for old_package in packages[keep:]:
        old_package.unlink()

Rekursiv filsøgning

# server/src/files/scanner.py
# Skan uploadede PDF'er og billeder til OCR

from pathlib import Path
from datetime import datetime, timedelta


UPLOAD_DIR = Path("/var/lib/mbus-platform/uploads")


def find_pending_ocr_files(
    max_age_hours: int = 24,
) -> list[Path]:
    """Find uploadede filer der endnu ikke er OCR-processeret."""
    cutoff = datetime.utcnow() - timedelta(hours=max_age_hours)

    pending = []
    for path in UPLOAD_DIR.rglob("*"):    # Rekursiv glob
        if not path.is_file():
            continue
        if path.suffix.lower() not in {".pdf", ".jpg", ".jpeg", ".png"}:
            continue
        # Filer der er ældre end cutoff er sandsynligvis glemte:
        mtime = datetime.utcfromtimestamp(path.stat().st_mtime)
        if mtime > cutoff:
            pending.append(path)

    return sorted(pending, key=lambda p: p.stat().st_mtime)


def find_invoices_by_property(property_id: str) -> list[Path]:
    """Find alle fakturaer for en specifik ejendom."""
    property_dir = UPLOAD_DIR / "invoices" / property_id
    if not property_dir.exists():
        return []
    return sorted(property_dir.glob("*.pdf"))


def get_upload_path(
    category: str,
    property_id: str,
    filename: str,
) -> Path:
    """Beregn upload-sti og opret mapper."""
    # Sanitér filnavn (fjern path traversal risici):
    safe_name = Path(filename).name    # Kun filnavn, ingen stier
    upload_path = UPLOAD_DIR / category / property_id / safe_name
    upload_path.parent.mkdir(parents=True, exist_ok=True)
    return upload_path

Temporære filer ved PDF-generering

# server/src/pdf/generator.py
# Brug tempfile + Path til WeasyPrint PDF-generering

import tempfile
from pathlib import Path
from contextlib import contextmanager


@contextmanager
def temporary_pdf_workspace():
    """
    Context manager der opretter og rydder op i temp-mappe.
    Bruges ved PDF-generering med WeasyPrint.
    """
    with tempfile.TemporaryDirectory(prefix="mbus-pdf-") as tmp_dir:
        workspace = Path(tmp_dir)
        yield workspace
        # Temp-mappe og indhold ryddes automatisk ved context exit


async def generate_settlement_pdf(
    settlement_data: dict,
    output_path: Path,
) -> Path:
    """Generer årsafregnings-PDF via WeasyPrint."""
    from weasyprint import HTML

    with temporary_pdf_workspace() as workspace:
        # Skriv HTML til temp-fil:
        html_file = workspace / "settlement.html"
        html_content = render_settlement_template(settlement_data)
        html_file.write_text(html_content, encoding="utf-8")

        # WeasyPrint genererer PDF:
        output_path.parent.mkdir(parents=True, exist_ok=True)
        HTML(filename=str(html_file)).write_pdf(str(output_path))

    return output_path


# Alternativ: bytes direkte (ingen temp-fil):
async def generate_pdf_bytes(settlement_data: dict) -> bytes:
    from weasyprint import HTML

    html_content = render_settlement_template(settlement_data)
    return HTML(string=html_content).write_pdf()

Sammenligning: pathlib vs os.path

from pathlib import Path
import os

# Byg sti:
old = os.path.join("/var/lib", "mbus", "data.db")         # Grimm
new = Path("/var/lib") / "mbus" / "data.db"               # Klart

# Tjek om fil eksisterer:
old = os.path.exists("/etc/config.env")
new = Path("/etc/config.env").exists()

# Hent filnavn uden extension:
old = os.path.splitext(os.path.basename("/data/file.csv"))[0]
new = Path("/data/file.csv").stem

# Rekursiv glob:
old = [os.path.join(r, f) for r, ds, fs in os.walk(".") for f in fs if f.endswith(".pdf")]
new = list(Path(".").rglob("*.pdf"))

# Læs fil:
old = open("/etc/config.env").read()
new = Path("/etc/config.env").read_text(encoding="utf-8")

# Skriv fil:
old = open("/tmp/out.json", "w").write(content)
new = Path("/tmp/out.json").write_text(content, encoding="utf-8")

# Konklusion: pathlib er mere læsbart, type-sikkert og idiomatisk Python 3.

Konklusion

pathlib.Path er den eneste korrekte måde at håndtere filsystemer i moderne Python. Definer alle stier som konstanter ét centralt sted — aldrig hardkodede strenge spredt i kodebasen. Atomisk filskrivning via tmp.rename() forhindrer korrupte delvis-skrevne konfigurationsfiler ved strømsvigt. Brug rglob() til rekursiv søgning og Path.stat() til permissions-validering af AES-nøglefiler. contextlib.contextmanager + tempfile.TemporaryDirectory sikrer oprydning af midlertidige PDF-arbejdsmapper.

Se Python async context managers guide eller IoT security hardening guide.