FastAPI paginering — cursor vs. offset til IoT API
FastAPI paginering: cursor-baseret keyset paginering vs. offset/limit, PaginatedResponse schema, URL-parametre, Link-headers, performance og test.
Af M-Bus Gateway
Offset-paginering er simpel men forkert til IoT-data. Her er cursor-baseret keyset paginering der skalerer til millioner af aflæsninger.
Problemet med offset/limit
# PROBLEMATISK — offset-baseret paginering:
@router.get("/readings")
async def list_readings_offset(
skip: int = 0,
limit: int = 100,
session: AsyncSession = Depends(get_session),
):
"""
Offset paginering har 3 fatale problemer til IoT-data:
1. PostgreSQL scanner alle rækker INDEN offset — langsomt ved side 500+
2. Race condition: Ny aflæsning indsættes mens bruger bladrer
→ Data springer over / dubleres mellem sider
3. Skalerbarhed: 1M rækker × skip=999900 → fuld table scan
"""
stmt = select(Reading).offset(skip).limit(limit).order_by(Reading.timestamp.desc())
result = await session.execute(stmt)
return result.scalars().all()
Cursor-baseret keyset paginering
# server/src/readings/router.py
# Korrekt cursor-baseret paginering
from fastapi import APIRouter, Depends, Query
from pydantic import BaseModel
from datetime import datetime
from typing import Generic, TypeVar
import base64
import json
T = TypeVar("T")
class PaginatedResponse(BaseModel, Generic[T]):
"""Standard pagineret svar for alle lister."""
items: list[T]
next_cursor: str | None = None # None = ingen flere sider
has_more: bool
total_count: int | None = None # Kun ved eksplicit forespørgsel
def encode_cursor(timestamp: datetime, id: str) -> str:
"""Encod cursor som base64 JSON (sikker i URL)."""
payload = {"ts": timestamp.isoformat(), "id": id}
return base64.urlsafe_b64encode(
json.dumps(payload).encode()
).decode()
def decode_cursor(cursor: str) -> tuple[datetime, str]:
"""Dekod cursor til timestamp og ID."""
payload = json.loads(base64.urlsafe_b64decode(cursor))
return datetime.fromisoformat(payload["ts"]), payload["id"]
@router.get("/properties/{property_id}/readings")
async def list_readings(
property_id: str,
limit: int = Query(default=100, ge=1, le=1000),
cursor: str | None = Query(default=None),
session: AsyncSession = Depends(get_session),
) -> PaginatedResponse[ReadingOut]:
"""
Keyset paginering: Hent næste side baseret på cursor.
Skalerer til millioner af rækker uden ydelsesfald.
"""
stmt = (
select(Reading)
.where(
Reading.property_id == property_id,
Reading.deleted_at.is_(None),
)
.order_by(Reading.timestamp.desc(), Reading.id.desc())
.limit(limit + 1) # Hent én ekstra for at tjekke has_more
)
if cursor:
ts, rid = decode_cursor(cursor)
stmt = stmt.where(
or_(
Reading.timestamp < ts,
and_(Reading.timestamp == ts, Reading.id < rid),
)
)
result = await session.execute(stmt)
rows = result.scalars().all()
has_more = len(rows) > limit
items = rows[:limit]
next_cursor = None
if has_more and items:
last = items[-1]
next_cursor = encode_cursor(last.timestamp, str(last.id))
return PaginatedResponse(
items=[ReadingOut.model_validate(r) for r in items],
next_cursor=next_cursor,
has_more=has_more,
)
Offset paginering — hvornår er det OK?
# Offset paginering er OK til:
# 1. Admin-sider med lille datasæt (< 10.000 rækker)
# 2. Brugerdefinerede søgninger med sortering
# 3. Eksport-endpoints der starter forfra
@router.get("/admin/tenants")
async def list_tenants(
page: int = Query(default=1, ge=1),
size: int = Query(default=20, ge=1, le=100),
session: AsyncSession = Depends(get_session),
current_user: User = Depends(require_role("super_admin")),
) -> PaginatedResponse[TenantOut]:
"""
Offset-paginering til admin-lister.
Tenants < 10.000 → offset er OK.
"""
skip = (page - 1) * size
total_stmt = select(func.count(Tenant.id)).where(Tenant.deleted_at.is_(None))
total = await session.scalar(total_stmt)
stmt = (
select(Tenant)
.where(Tenant.deleted_at.is_(None))
.order_by(Tenant.created_at.desc())
.offset(skip)
.limit(size)
)
result = await session.execute(stmt)
items = result.scalars().all()
return PaginatedResponse(
items=[TenantOut.model_validate(t) for t in items],
next_cursor=None,
has_more=(skip + size) < total,
total_count=total,
)
Pagination dependency
# server/src/api/deps.py
# Genanvendelig pagination dependency
from dataclasses import dataclass
from fastapi import Query
@dataclass
class CursorPagination:
limit: int
cursor: str | None
@classmethod
def as_dependency(
cls,
limit: int = Query(default=50, ge=1, le=500),
cursor: str | None = Query(default=None),
) -> "CursorPagination":
return cls(limit=limit, cursor=cursor)
@dataclass
class OffsetPagination:
page: int
size: int
@property
def offset(self) -> int:
return (self.page - 1) * self.size
@classmethod
def as_dependency(
cls,
page: int = Query(default=1, ge=1),
size: int = Query(default=20, ge=1, le=100),
) -> "OffsetPagination":
return cls(page=page, size=size)
# Brug i endpoint:
@router.get("/meters")
async def list_meters(
pagination: CursorPagination = Depends(CursorPagination.as_dependency),
session: AsyncSession = Depends(get_session),
) -> PaginatedResponse[MeterOut]:
...
Link-header for API-standard kompatibilitet
# server/src/api/pagination.py
# RFC 8288 Link-header (bruges af GitHub API)
from fastapi import Response
def add_pagination_links(
response: Response,
base_url: str,
next_cursor: str | None,
limit: int,
) -> None:
"""Tilføj Link-header til response (RFC 8288)."""
links = []
if next_cursor:
next_url = f"{base_url}?cursor={next_cursor}&limit={limit}"
links.append(f'<{next_url}>; rel="next"')
if links:
response.headers["Link"] = ", ".join(links)
response.headers["X-Has-More"] = "true" if next_cursor else "false"
# Endpoint med Link-header:
@router.get("/readings")
async def list_readings_with_links(
response: Response,
pagination: CursorPagination = Depends(CursorPagination.as_dependency),
request: Request = ...,
) -> PaginatedResponse[ReadingOut]:
result = await paginate_readings(pagination)
add_pagination_links(
response=response,
base_url=str(request.url.remove_query_params(["cursor"])),
next_cursor=result.next_cursor,
limit=pagination.limit,
)
return result
Test af paginering
# server/tests/test_pagination.py
import pytest
from httpx import AsyncClient
@pytest.mark.asyncio
async def test_cursor_pagination_returns_correct_page(
client: AsyncClient,
db_session,
auth_headers: dict,
):
"""Verificér at cursor-paginering giver korrekte sider."""
# Opret 15 readings:
for i in range(15):
await create_test_reading(db_session, value=float(i))
# Side 1 (10 elementer):
resp1 = await client.get(
"/api/v1/readings",
params={"limit": 10},
headers=auth_headers,
)
assert resp1.status_code == 200
data1 = resp1.json()
assert len(data1["items"]) == 10
assert data1["has_more"] is True
assert data1["next_cursor"] is not None
# Side 2 (5 elementer):
resp2 = await client.get(
"/api/v1/readings",
params={"limit": 10, "cursor": data1["next_cursor"]},
headers=auth_headers,
)
data2 = resp2.json()
assert len(data2["items"]) == 5
assert data2["has_more"] is False
assert data2["next_cursor"] is None
# Ingen overlap:
ids1 = {item["id"] for item in data1["items"]}
ids2 = {item["id"] for item in data2["items"]}
assert ids1.isdisjoint(ids2)
Konklusion
Cursor-baseret keyset paginering er den eneste korrekte løsning til IoT-data med millioner af aflæsninger. Offset-paginering forårsager fuld table scans ved dybe sider og race conditions ved indsættelse. Cursor encodes timestamp + ID som base64 JSON — opak for klienter men deterministisk og sikker. Offset paginering accepteres stadig til admin-lister med begrænsede datasæt (tenants, users, properties). PaginatedResponse[T] som generisk Pydantic-model giver konsistent svar-format på tværs af alle list-endpoints.
Se SQLAlchemy async patterns guide eller FastAPI dependency injection guide.