M-Bus Gateway
← Tilbage til blog
· FastAPI· paginering· cursor· offset· keyset· API· Python· SQLAlchemy· IoT

FastAPI paginering — cursor vs. offset til IoT API

FastAPI paginering: cursor-baseret keyset paginering vs. offset/limit, PaginatedResponse schema, URL-parametre, Link-headers, performance og test.

Af M-Bus Gateway

Offset-paginering er simpel men forkert til IoT-data. Her er cursor-baseret keyset paginering der skalerer til millioner af aflæsninger.


Problemet med offset/limit

# PROBLEMATISK — offset-baseret paginering:

@router.get("/readings")
async def list_readings_offset(
    skip: int = 0,
    limit: int = 100,
    session: AsyncSession = Depends(get_session),
):
    """
    Offset paginering har 3 fatale problemer til IoT-data:
    1. PostgreSQL scanner alle rækker INDEN offset — langsomt ved side 500+
    2. Race condition: Ny aflæsning indsættes mens bruger bladrer
       → Data springer over / dubleres mellem sider
    3. Skalerbarhed: 1M rækker × skip=999900 → fuld table scan
    """
    stmt = select(Reading).offset(skip).limit(limit).order_by(Reading.timestamp.desc())
    result = await session.execute(stmt)
    return result.scalars().all()

Cursor-baseret keyset paginering

# server/src/readings/router.py
# Korrekt cursor-baseret paginering

from fastapi import APIRouter, Depends, Query
from pydantic import BaseModel
from datetime import datetime
from typing import Generic, TypeVar
import base64
import json

T = TypeVar("T")


class PaginatedResponse(BaseModel, Generic[T]):
    """Standard pagineret svar for alle lister."""
    items: list[T]
    next_cursor: str | None = None    # None = ingen flere sider
    has_more: bool
    total_count: int | None = None    # Kun ved eksplicit forespørgsel


def encode_cursor(timestamp: datetime, id: str) -> str:
    """Encod cursor som base64 JSON (sikker i URL)."""
    payload = {"ts": timestamp.isoformat(), "id": id}
    return base64.urlsafe_b64encode(
        json.dumps(payload).encode()
    ).decode()


def decode_cursor(cursor: str) -> tuple[datetime, str]:
    """Dekod cursor til timestamp og ID."""
    payload = json.loads(base64.urlsafe_b64decode(cursor))
    return datetime.fromisoformat(payload["ts"]), payload["id"]


@router.get("/properties/{property_id}/readings")
async def list_readings(
    property_id: str,
    limit: int = Query(default=100, ge=1, le=1000),
    cursor: str | None = Query(default=None),
    session: AsyncSession = Depends(get_session),
) -> PaginatedResponse[ReadingOut]:
    """
    Keyset paginering: Hent næste side baseret på cursor.
    Skalerer til millioner af rækker uden ydelsesfald.
    """
    stmt = (
        select(Reading)
        .where(
            Reading.property_id == property_id,
            Reading.deleted_at.is_(None),
        )
        .order_by(Reading.timestamp.desc(), Reading.id.desc())
        .limit(limit + 1)    # Hent én ekstra for at tjekke has_more
    )

    if cursor:
        ts, rid = decode_cursor(cursor)
        stmt = stmt.where(
            or_(
                Reading.timestamp < ts,
                and_(Reading.timestamp == ts, Reading.id < rid),
            )
        )

    result = await session.execute(stmt)
    rows = result.scalars().all()

    has_more = len(rows) > limit
    items = rows[:limit]

    next_cursor = None
    if has_more and items:
        last = items[-1]
        next_cursor = encode_cursor(last.timestamp, str(last.id))

    return PaginatedResponse(
        items=[ReadingOut.model_validate(r) for r in items],
        next_cursor=next_cursor,
        has_more=has_more,
    )

Offset paginering — hvornår er det OK?

# Offset paginering er OK til:
# 1. Admin-sider med lille datasæt (< 10.000 rækker)
# 2. Brugerdefinerede søgninger med sortering
# 3. Eksport-endpoints der starter forfra

@router.get("/admin/tenants")
async def list_tenants(
    page: int = Query(default=1, ge=1),
    size: int = Query(default=20, ge=1, le=100),
    session: AsyncSession = Depends(get_session),
    current_user: User = Depends(require_role("super_admin")),
) -> PaginatedResponse[TenantOut]:
    """
    Offset-paginering til admin-lister.
    Tenants < 10.000 → offset er OK.
    """
    skip = (page - 1) * size

    total_stmt = select(func.count(Tenant.id)).where(Tenant.deleted_at.is_(None))
    total = await session.scalar(total_stmt)

    stmt = (
        select(Tenant)
        .where(Tenant.deleted_at.is_(None))
        .order_by(Tenant.created_at.desc())
        .offset(skip)
        .limit(size)
    )
    result = await session.execute(stmt)
    items = result.scalars().all()

    return PaginatedResponse(
        items=[TenantOut.model_validate(t) for t in items],
        next_cursor=None,
        has_more=(skip + size) < total,
        total_count=total,
    )

Pagination dependency

# server/src/api/deps.py
# Genanvendelig pagination dependency

from dataclasses import dataclass
from fastapi import Query


@dataclass
class CursorPagination:
    limit: int
    cursor: str | None

    @classmethod
    def as_dependency(
        cls,
        limit: int = Query(default=50, ge=1, le=500),
        cursor: str | None = Query(default=None),
    ) -> "CursorPagination":
        return cls(limit=limit, cursor=cursor)


@dataclass
class OffsetPagination:
    page: int
    size: int

    @property
    def offset(self) -> int:
        return (self.page - 1) * self.size

    @classmethod
    def as_dependency(
        cls,
        page: int = Query(default=1, ge=1),
        size: int = Query(default=20, ge=1, le=100),
    ) -> "OffsetPagination":
        return cls(page=page, size=size)


# Brug i endpoint:
@router.get("/meters")
async def list_meters(
    pagination: CursorPagination = Depends(CursorPagination.as_dependency),
    session: AsyncSession = Depends(get_session),
) -> PaginatedResponse[MeterOut]:
    ...

Link-header for API-standard kompatibilitet

# server/src/api/pagination.py
# RFC 8288 Link-header (bruges af GitHub API)

from fastapi import Response


def add_pagination_links(
    response: Response,
    base_url: str,
    next_cursor: str | None,
    limit: int,
) -> None:
    """Tilføj Link-header til response (RFC 8288)."""
    links = []

    if next_cursor:
        next_url = f"{base_url}?cursor={next_cursor}&limit={limit}"
        links.append(f'<{next_url}>; rel="next"')

    if links:
        response.headers["Link"] = ", ".join(links)
    response.headers["X-Has-More"] = "true" if next_cursor else "false"


# Endpoint med Link-header:
@router.get("/readings")
async def list_readings_with_links(
    response: Response,
    pagination: CursorPagination = Depends(CursorPagination.as_dependency),
    request: Request = ...,
) -> PaginatedResponse[ReadingOut]:
    result = await paginate_readings(pagination)

    add_pagination_links(
        response=response,
        base_url=str(request.url.remove_query_params(["cursor"])),
        next_cursor=result.next_cursor,
        limit=pagination.limit,
    )
    return result

Test af paginering

# server/tests/test_pagination.py

import pytest
from httpx import AsyncClient


@pytest.mark.asyncio
async def test_cursor_pagination_returns_correct_page(
    client: AsyncClient,
    db_session,
    auth_headers: dict,
):
    """Verificér at cursor-paginering giver korrekte sider."""
    # Opret 15 readings:
    for i in range(15):
        await create_test_reading(db_session, value=float(i))

    # Side 1 (10 elementer):
    resp1 = await client.get(
        "/api/v1/readings",
        params={"limit": 10},
        headers=auth_headers,
    )
    assert resp1.status_code == 200
    data1 = resp1.json()
    assert len(data1["items"]) == 10
    assert data1["has_more"] is True
    assert data1["next_cursor"] is not None

    # Side 2 (5 elementer):
    resp2 = await client.get(
        "/api/v1/readings",
        params={"limit": 10, "cursor": data1["next_cursor"]},
        headers=auth_headers,
    )
    data2 = resp2.json()
    assert len(data2["items"]) == 5
    assert data2["has_more"] is False
    assert data2["next_cursor"] is None

    # Ingen overlap:
    ids1 = {item["id"] for item in data1["items"]}
    ids2 = {item["id"] for item in data2["items"]}
    assert ids1.isdisjoint(ids2)

Konklusion

Cursor-baseret keyset paginering er den eneste korrekte løsning til IoT-data med millioner af aflæsninger. Offset-paginering forårsager fuld table scans ved dybe sider og race conditions ved indsættelse. Cursor encodes timestamp + ID som base64 JSON — opak for klienter men deterministisk og sikker. Offset paginering accepteres stadig til admin-lister med begrænsede datasæt (tenants, users, properties). PaginatedResponse[T] som generisk Pydantic-model giver konsistent svar-format på tværs af alle list-endpoints.

Se SQLAlchemy async patterns guide eller FastAPI dependency injection guide.