FastAPI fil-upload til S3 — billeder, PDF og dokumenter
FastAPI fil-upload med UploadFile: validering af MIME-type og størrelse, direkte upload til Hetzner Object Storage (S3), presigned URLs til download, Celery-asynkron behandling og virusscanning.
Af M-Bus Gateway
FastAPI's UploadFile håndterer fil-upload effektivt med streaming. Her er den komplette implementation til billeder, PDF og dokumenter med S3-lagring.
Grundlæggende fil-upload
# server/src/documents/router.py
from fastapi import APIRouter, UploadFile, File, HTTPException, Depends
import mimetypes
router = APIRouter()
ALLOWED_TYPES = {
"application/pdf",
"image/jpeg",
"image/png",
"image/webp",
}
MAX_SIZE_MB = 20
@router.post("/properties/{property_id}/documents")
async def upload_document(
property_id: uuid.UUID,
file: UploadFile = File(...),
category: str = "general",
session: AsyncSession = Depends(get_session),
current_user: User = Depends(require_role("landlord")),
) -> DocumentOut:
# Validér MIME-type (brug magic bytes, ikke kun filnavn):
content = await file.read(2048) # Læs start til type-detektion
await file.seek(0)
detected_type = _detect_mime(content)
if detected_type not in ALLOWED_TYPES:
raise HTTPException(
status_code=415,
detail=f"Filtype ikke tilladt: {detected_type}",
)
# Validér størrelse via streaming (undgå at læse hele filen i RAM):
size = await _get_file_size(file)
if size > MAX_SIZE_MB * 1024 * 1024:
raise HTTPException(
status_code=413,
detail=f"Fil for stor (max {MAX_SIZE_MB} MB)",
)
await file.seek(0)
# Upload til S3 + gem i DB:
s3_key = await upload_to_s3(file, property_id, detected_type)
doc = await create_document_record(session, property_id, s3_key,
file.filename, detected_type, size)
return DocumentOut.model_validate(doc)
MIME-type detection med python-magic
# server/src/documents/utils.py
import magic # python-magic (libmagic binding)
def _detect_mime(content: bytes) -> str:
"""
Detektér MIME-type fra magic bytes — ikke filnavn eller Content-Type header.
Modstår navngivnings-angreb (f.eks. malware.exe omdøbt til rapport.pdf).
"""
mime = magic.from_buffer(content, mime=True)
return mime
async def _get_file_size(file: UploadFile) -> int:
"""Stream fil for at tælle bytes uden at gemme i RAM."""
size = 0
chunk_size = 1024 * 64 # 64KB chunks
while True:
chunk = await file.read(chunk_size)
if not chunk:
break
size += len(chunk)
return size
Upload til Hetzner Object Storage (S3)
# server/src/storage/s3.py
import aioboto3
from botocore.config import Config
from server.src.config import get_settings
settings = get_settings()
def get_s3_client():
session = aioboto3.Session()
return session.client(
"s3",
endpoint_url=settings.s3_endpoint, # https://fsn1.your-objectstorage.com
aws_access_key_id=settings.s3_access_key.get_secret_value(),
aws_secret_access_key=settings.s3_secret_key.get_secret_value(),
region_name="eu-central-1",
config=Config(signature_version="s3v4"),
)
async def upload_to_s3(
file: UploadFile,
property_id: uuid.UUID,
content_type: str,
) -> str:
"""Upload fil og returnér S3-nøgle."""
file_ext = _ext_from_mime(content_type)
s3_key = f"documents/{property_id}/{uuid.uuid4()}{file_ext}"
async with get_s3_client() as s3:
await s3.upload_fileobj(
file.file,
settings.s3_bucket,
s3_key,
ExtraArgs={
"ContentType": content_type,
"ServerSideEncryption": "AES256",
"Metadata": {
"property_id": str(property_id),
"original_filename": file.filename or "unknown",
},
},
)
return s3_key
def _ext_from_mime(mime: str) -> str:
return {
"application/pdf": ".pdf",
"image/jpeg": ".jpg",
"image/png": ".png",
"image/webp": ".webp",
}.get(mime, ".bin")
Presigned URLs til sikker download
# Presigned URL: Tidsbegrænset adgang uden at eksponere S3-credentials
@router.get("/documents/{doc_id}/download")
async def download_document(
doc_id: uuid.UUID,
session: AsyncSession = Depends(get_session),
current_user: User = Depends(require_role("landlord")),
) -> dict:
doc = await get_document_or_404(session, doc_id, current_user.tenant_id)
async with get_s3_client() as s3:
url = await s3.generate_presigned_url(
"get_object",
Params={
"Bucket": settings.s3_bucket,
"Key": doc.s3_key,
"ResponseContentDisposition": (
f'attachment; filename="{doc.filename}"'
),
},
ExpiresIn=300, # 5 minutter
)
# Log download i audit_log:
await create_audit_log(session, current_user.id, "document_downloaded",
{"document_id": str(doc_id)})
return {"download_url": url, "expires_in": 300}
Multipart upload til store filer (>100 MB)
# Server-side multipart upload — PDF-afregninger kan være store
async def upload_large_pdf(
local_path: Path,
s3_key: str,
chunk_size: int = 10 * 1024 * 1024, # 10 MB chunks
) -> None:
async with get_s3_client() as s3:
# Initier multipart upload:
response = await s3.create_multipart_upload(
Bucket=settings.s3_bucket,
Key=s3_key,
ContentType="application/pdf",
ServerSideEncryption="AES256",
)
upload_id = response["UploadId"]
parts = []
try:
with open(local_path, "rb") as f:
part_num = 1
while chunk := f.read(chunk_size):
resp = await s3.upload_part(
Bucket=settings.s3_bucket,
Key=s3_key,
UploadId=upload_id,
PartNumber=part_num,
Body=chunk,
)
parts.append({"PartNumber": part_num, "ETag": resp["ETag"]})
part_num += 1
await s3.complete_multipart_upload(
Bucket=settings.s3_bucket,
Key=s3_key,
UploadId=upload_id,
MultipartUpload={"Parts": parts},
)
except Exception:
await s3.abort_multipart_upload(
Bucket=settings.s3_bucket, Key=s3_key, UploadId=upload_id
)
raise
Asynkron virusscanning med Celery
# server/src/workers/tasks/documents.py
from celery import shared_task
import clamav # pyclamd binding
@shared_task(name="documents.scan_uploaded_file", acks_late=True)
def scan_uploaded_file(document_id: str, s3_key: str):
"""
Kør ClamAV-scan på uploadet fil.
Sæt document.scan_status = 'clean' | 'infected' | 'error'.
"""
from server.src.db import get_sync_session
from server.src.storage.s3 import download_to_temp
with get_sync_session() as session:
doc = session.get(Document, uuid.UUID(document_id))
if not doc:
return
try:
with download_to_temp(s3_key) as tmp_path:
cd = clamav.ClamdUnixSocket()
result = cd.scan(str(tmp_path))
if result is None:
doc.scan_status = "clean"
else:
# Inficeret fil — slet fra S3:
doc.scan_status = "infected"
delete_from_s3(s3_key)
log.error("Inficeret fil slettet", s3_key=s3_key,
result=str(result))
except Exception as e:
doc.scan_status = "error"
log.error("Virusscan fejlet", error=str(e))
session.commit()
# Dispatcher efter upload:
# scan_uploaded_file.delay(str(doc.id), doc.s3_key)
Konklusion
FastAPI fil-upload med UploadFile + python-magic MIME-detektion (ikke filnavn) giver sikker type-validering. S3 via aioboto3 uploader direkte som stream uden RAM-overhead. Presigned URLs til download eksponerer aldrig S3-credentials til frontend. Asynkron ClamAV-scanning via Celery sikrer malware-detektion uden at blokere upload-responsen. Multipart upload håndterer PDF-afregninger over 100 MB stabilt.
Se Hetzner Object Storage guide eller Celery Redis opgavekø guide.