M-Bus Gateway
← Tilbage til blog
· FastAPI· fil-upload· S3· Hetzner Object Storage· Python· backend· sikkerhed

FastAPI fil-upload til S3 — billeder, PDF og dokumenter

FastAPI fil-upload med UploadFile: validering af MIME-type og størrelse, direkte upload til Hetzner Object Storage (S3), presigned URLs til download, Celery-asynkron behandling og virusscanning.

Af M-Bus Gateway

FastAPI's UploadFile håndterer fil-upload effektivt med streaming. Her er den komplette implementation til billeder, PDF og dokumenter med S3-lagring.


Grundlæggende fil-upload

# server/src/documents/router.py

from fastapi import APIRouter, UploadFile, File, HTTPException, Depends
import mimetypes

router = APIRouter()

ALLOWED_TYPES = {
    "application/pdf",
    "image/jpeg",
    "image/png",
    "image/webp",
}
MAX_SIZE_MB = 20


@router.post("/properties/{property_id}/documents")
async def upload_document(
    property_id: uuid.UUID,
    file: UploadFile = File(...),
    category: str = "general",
    session: AsyncSession = Depends(get_session),
    current_user: User = Depends(require_role("landlord")),
) -> DocumentOut:
    # Validér MIME-type (brug magic bytes, ikke kun filnavn):
    content = await file.read(2048)    # Læs start til type-detektion
    await file.seek(0)

    detected_type = _detect_mime(content)
    if detected_type not in ALLOWED_TYPES:
        raise HTTPException(
            status_code=415,
            detail=f"Filtype ikke tilladt: {detected_type}",
        )

    # Validér størrelse via streaming (undgå at læse hele filen i RAM):
    size = await _get_file_size(file)
    if size > MAX_SIZE_MB * 1024 * 1024:
        raise HTTPException(
            status_code=413,
            detail=f"Fil for stor (max {MAX_SIZE_MB} MB)",
        )
    await file.seek(0)

    # Upload til S3 + gem i DB:
    s3_key = await upload_to_s3(file, property_id, detected_type)
    doc = await create_document_record(session, property_id, s3_key,
                                       file.filename, detected_type, size)
    return DocumentOut.model_validate(doc)

MIME-type detection med python-magic

# server/src/documents/utils.py

import magic    # python-magic (libmagic binding)

def _detect_mime(content: bytes) -> str:
    """
    Detektér MIME-type fra magic bytes — ikke filnavn eller Content-Type header.
    Modstår navngivnings-angreb (f.eks. malware.exe omdøbt til rapport.pdf).
    """
    mime = magic.from_buffer(content, mime=True)
    return mime


async def _get_file_size(file: UploadFile) -> int:
    """Stream fil for at tælle bytes uden at gemme i RAM."""
    size = 0
    chunk_size = 1024 * 64   # 64KB chunks
    while True:
        chunk = await file.read(chunk_size)
        if not chunk:
            break
        size += len(chunk)
    return size

Upload til Hetzner Object Storage (S3)

# server/src/storage/s3.py

import aioboto3
from botocore.config import Config
from server.src.config import get_settings

settings = get_settings()


def get_s3_client():
    session = aioboto3.Session()
    return session.client(
        "s3",
        endpoint_url=settings.s3_endpoint,    # https://fsn1.your-objectstorage.com
        aws_access_key_id=settings.s3_access_key.get_secret_value(),
        aws_secret_access_key=settings.s3_secret_key.get_secret_value(),
        region_name="eu-central-1",
        config=Config(signature_version="s3v4"),
    )


async def upload_to_s3(
    file: UploadFile,
    property_id: uuid.UUID,
    content_type: str,
) -> str:
    """Upload fil og returnér S3-nøgle."""
    file_ext = _ext_from_mime(content_type)
    s3_key = f"documents/{property_id}/{uuid.uuid4()}{file_ext}"

    async with get_s3_client() as s3:
        await s3.upload_fileobj(
            file.file,
            settings.s3_bucket,
            s3_key,
            ExtraArgs={
                "ContentType": content_type,
                "ServerSideEncryption": "AES256",
                "Metadata": {
                    "property_id": str(property_id),
                    "original_filename": file.filename or "unknown",
                },
            },
        )

    return s3_key


def _ext_from_mime(mime: str) -> str:
    return {
        "application/pdf": ".pdf",
        "image/jpeg": ".jpg",
        "image/png": ".png",
        "image/webp": ".webp",
    }.get(mime, ".bin")

Presigned URLs til sikker download

# Presigned URL: Tidsbegrænset adgang uden at eksponere S3-credentials

@router.get("/documents/{doc_id}/download")
async def download_document(
    doc_id: uuid.UUID,
    session: AsyncSession = Depends(get_session),
    current_user: User = Depends(require_role("landlord")),
) -> dict:
    doc = await get_document_or_404(session, doc_id, current_user.tenant_id)

    async with get_s3_client() as s3:
        url = await s3.generate_presigned_url(
            "get_object",
            Params={
                "Bucket": settings.s3_bucket,
                "Key": doc.s3_key,
                "ResponseContentDisposition": (
                    f'attachment; filename="{doc.filename}"'
                ),
            },
            ExpiresIn=300,    # 5 minutter
        )

    # Log download i audit_log:
    await create_audit_log(session, current_user.id, "document_downloaded",
                           {"document_id": str(doc_id)})

    return {"download_url": url, "expires_in": 300}

Multipart upload til store filer (>100 MB)

# Server-side multipart upload — PDF-afregninger kan være store

async def upload_large_pdf(
    local_path: Path,
    s3_key: str,
    chunk_size: int = 10 * 1024 * 1024,    # 10 MB chunks
) -> None:
    async with get_s3_client() as s3:
        # Initier multipart upload:
        response = await s3.create_multipart_upload(
            Bucket=settings.s3_bucket,
            Key=s3_key,
            ContentType="application/pdf",
            ServerSideEncryption="AES256",
        )
        upload_id = response["UploadId"]

        parts = []
        try:
            with open(local_path, "rb") as f:
                part_num = 1
                while chunk := f.read(chunk_size):
                    resp = await s3.upload_part(
                        Bucket=settings.s3_bucket,
                        Key=s3_key,
                        UploadId=upload_id,
                        PartNumber=part_num,
                        Body=chunk,
                    )
                    parts.append({"PartNumber": part_num, "ETag": resp["ETag"]})
                    part_num += 1

            await s3.complete_multipart_upload(
                Bucket=settings.s3_bucket,
                Key=s3_key,
                UploadId=upload_id,
                MultipartUpload={"Parts": parts},
            )
        except Exception:
            await s3.abort_multipart_upload(
                Bucket=settings.s3_bucket, Key=s3_key, UploadId=upload_id
            )
            raise

Asynkron virusscanning med Celery

# server/src/workers/tasks/documents.py

from celery import shared_task
import clamav    # pyclamd binding

@shared_task(name="documents.scan_uploaded_file", acks_late=True)
def scan_uploaded_file(document_id: str, s3_key: str):
    """
    Kør ClamAV-scan på uploadet fil.
    Sæt document.scan_status = 'clean' | 'infected' | 'error'.
    """
    from server.src.db import get_sync_session
    from server.src.storage.s3 import download_to_temp

    with get_sync_session() as session:
        doc = session.get(Document, uuid.UUID(document_id))
        if not doc:
            return

        try:
            with download_to_temp(s3_key) as tmp_path:
                cd = clamav.ClamdUnixSocket()
                result = cd.scan(str(tmp_path))

            if result is None:
                doc.scan_status = "clean"
            else:
                # Inficeret fil — slet fra S3:
                doc.scan_status = "infected"
                delete_from_s3(s3_key)
                log.error("Inficeret fil slettet", s3_key=s3_key,
                          result=str(result))

        except Exception as e:
            doc.scan_status = "error"
            log.error("Virusscan fejlet", error=str(e))

        session.commit()


# Dispatcher efter upload:
# scan_uploaded_file.delay(str(doc.id), doc.s3_key)

Konklusion

FastAPI fil-upload med UploadFile + python-magic MIME-detektion (ikke filnavn) giver sikker type-validering. S3 via aioboto3 uploader direkte som stream uden RAM-overhead. Presigned URLs til download eksponerer aldrig S3-credentials til frontend. Asynkron ClamAV-scanning via Celery sikrer malware-detektion uden at blokere upload-responsen. Multipart upload håndterer PDF-afregninger over 100 MB stabilt.

Se Hetzner Object Storage guide eller Celery Redis opgavekø guide.