M-Bus Gateway
← Tilbage til blog
· TimescaleDB· PostgreSQL· IoT· optimering· komprimering· chunking· tidsseriedata· backend

TimescaleDB compression og chunking — optimering til IoT data

TimescaleDB native komprimering, chunk-intervaller, komprimeringspolitikker, columnar storage og 93% komprimering til IoT tidsseriedata fra wM-Bus gateways.

Af M-Bus Gateway

TimescaleDB native komprimering komprimerer IoT tidsseriedata med 93-97%. Her er den komplette konfiguration til et wM-Bus gateway-system.


Chunk-interval valg

-- Chunk-intervallet er den vigtigste performance-beslutning.
-- Tommelfingerregel: Chunks bør fylde 25% af tilgængeligt RAM.

-- wM-Bus readings: 1 dag/gateway, 100 gateways = 100 aflæsninger/dag
-- Med 10.000 installations: 10.000 readings/dag
-- Chunk-interval: 1 uge (7 × 10.000 = 70.000 rows/chunk = passende størrelse)

SELECT create_hypertable(
    'reading',
    'timestamp',
    chunk_time_interval => INTERVAL '1 week',
    if_not_exists => TRUE,
    migrate_data => TRUE
);

-- Verificer chunk-størrelse:
SELECT chunk_name, range_start, range_end, 
       pg_size_pretty(total_bytes) AS chunk_size,
       num_chunks
FROM timescaledb_information.chunks
WHERE hypertable_name = 'reading'
ORDER BY range_start DESC
LIMIT 10;

-- Ændre chunk-interval (kun for nye chunks):
SELECT set_chunk_time_interval('reading', INTERVAL '1 month');
-- Bemærk: Eksisterende chunks beholder gammelt interval

Native komprimering

-- Aktiver komprimering med colonnar storage:

ALTER TABLE reading SET (
    timescaledb.compress,
    timescaledb.compress_segmentby = 'meter_installation_id',  -- Grupperingskolonne
    timescaledb.compress_orderby = 'timestamp DESC'             -- Sorteringsrækkefølge
);

-- Segment-valg er kritisk for komprimeringsratio:
-- Høj kardinalitet (mange unikke værdier) = god komprimering
-- meter_installation_id: Alle readings pr. installation samles → 95%+ komprimering

-- Manuelt komprimér én chunk:
SELECT compress_chunk('_timescaledb_internal._hyper_1_1_chunk');

-- Komprimér alle chunks ældre end 30 dage:
SELECT compress_chunk(c.chunk_schema || '.' || c.chunk_name)
FROM timescaledb_information.chunks c
WHERE c.hypertable_name = 'reading'
  AND c.range_end < NOW() - INTERVAL '30 days'
  AND NOT c.is_compressed;

Komprimeringspolitik (automatisk)

-- Automatisk komprimering via Celery ELLER TimescaleDB policy:

-- Option A: TimescaleDB native policy (anbefalet)
SELECT add_compression_policy(
    'reading',
    compress_after => INTERVAL '30 days',  -- Komprimér chunks ældre end 30 dage
    if_not_exists => TRUE
);

-- Option B: Celery task (mere kontrol)
-- server/src/workers/tasks/maintenance.py

-- Verificer policy:
SELECT * FROM timescaledb_information.jobs
WHERE application_name LIKE 'Compression%';

-- Overvåg komprimeringsstatus:
SELECT
    hypertable_name,
    COUNT(*) FILTER (WHERE is_compressed) AS compressed_chunks,
    COUNT(*) FILTER (WHERE NOT is_compressed) AS uncompressed_chunks,
    pg_size_pretty(SUM(compressed_total_bytes)) AS compressed_size,
    pg_size_pretty(SUM(uncompressed_total_bytes)) AS uncompressed_size,
    ROUND(
        100.0 - (SUM(compressed_total_bytes)::NUMERIC / 
                 NULLIF(SUM(uncompressed_total_bytes), 0) * 100),
        1
    ) AS compression_ratio_pct
FROM timescaledb_information.chunks
WHERE hypertable_name = 'reading'
GROUP BY hypertable_name;
-- Typisk output: compressed_chunks=48, compression_ratio_pct=93.2

Query-optimering på komprimerede chunks

-- Komprimerede chunks er read-only — DML kræver dekomprimering af chunk.
-- Undgå UPDATE/DELETE på komprimerede chunks.

-- Query på komprimerede chunks: Fuldt transparent — ingen kodeændring:
SELECT
    meter_installation_id,
    DATE_TRUNC('month', timestamp) AS month,
    MAX(value) - MIN(value) AS monthly_consumption
FROM reading
WHERE timestamp BETWEEN '2026-01-01' AND '2026-05-31'
  AND meter_type = 'heat'
GROUP BY 1, 2
ORDER BY 2;

-- Chunk-exclusion: TimescaleDB ekskluderer automatisk ikke-relevante chunks
-- ved timestamp-filter → scanner kun 5 chunks i stedet for 200+

-- EXPLAIN ANALYZE for at verificere chunk-exclusion:
EXPLAIN (ANALYZE, BUFFERS)
SELECT COUNT(*) FROM reading
WHERE timestamp >= NOW() - INTERVAL '7 days';
-- Se "Chunks Excluded by Constraint" i plan

-- Decompression påkrævet ved INSERT i komprimeret chunk:
-- Løsning: Altid INSERT i den nyeste (ukomprimerede) chunk
-- Gateway sender data samme dag → falder altid i ukomprimeret chunk ✓

Continuous aggregates til dashboard

-- Forud-beregnede aggregeringer der opdateres automatisk:

CREATE MATERIALIZED VIEW reading_monthly
WITH (timescaledb.continuous, timescaledb.materialized_only = false) AS
SELECT
    meter_installation_id,
    meter_type,
    TIME_BUCKET('1 month', timestamp) AS bucket,
    MAX(value) - MIN(value) AS consumption,
    AVG(battery_level_pct)  AS avg_battery,
    COUNT(*)                AS reading_count
FROM reading
WHERE deleted_at IS NULL
GROUP BY 1, 2, 3
WITH NO DATA;

-- Realtids-fallback: materialized_only=false bruger raw data for perioder
-- der ikke er i det materialiserede view endnu

-- Refresh policy:
SELECT add_continuous_aggregate_policy(
    'reading_monthly',
    start_offset => INTERVAL '3 months',  -- Genberegn 3 mdr. bagud
    end_offset   => INTERVAL '1 day',     -- Stop 1 dag fra nu (undgå partielle data)
    schedule_interval => INTERVAL '1 day'
);

-- Manuel refresh:
CALL refresh_continuous_aggregate('reading_monthly', '2026-01-01', '2026-06-01');

Retention policy

-- Automatisk sletning af gamle data:

SELECT add_retention_policy(
    'reading',
    drop_after => INTERVAL '10 years',  -- 10 år (bogføringsloven + GDPR)
    if_not_exists => TRUE
);

-- Til aggregate view: kortere retention OK
SELECT add_retention_policy(
    'reading_monthly',
    drop_after => INTERVAL '25 years',  -- Månedlige aggregater: behold meget længere
    if_not_exists => TRUE
);

-- Verificer policies:
SELECT application_name, schedule_interval, config
FROM timescaledb_information.jobs
WHERE application_name IN ('Retention Policy', 'Compression Policy', 'Refresh Continuous Aggregate');

-- BEMÆRK: Soft-delete via deleted_at tilføjer kompleksitet:
-- TimescaleDB drop_chunks sletter hele chunks — ignorerer deleted_at
-- Løsning: Anonymisér persondata ved deleted_at, behold reading-rækker til regnskab

Celery maintenance task

# server/src/workers/tasks/maintenance.py

from celery import shared_task
from sqlalchemy import text
import structlog

log = structlog.get_logger()

@shared_task(name="maintenance.compress_old_chunks")
def compress_old_chunks():
    """Komprimér chunks ældre end 30 dage — kører dagligt kl. 02:00 UTC."""
    from server.src.db import get_sync_engine
    engine = get_sync_engine()

    with engine.connect() as conn:
        result = conn.execute(text("""
            SELECT compress_chunk(format('%I.%I', chunk_schema, chunk_name))
            FROM timescaledb_information.chunks
            WHERE hypertable_name = 'reading'
              AND range_end < NOW() - INTERVAL '30 days'
              AND NOT is_compressed
        """))
        compressed = result.rowcount
        log.info("Chunks compressed", count=compressed)

@shared_task(name="maintenance.log_storage_stats")
def log_storage_stats():
    """Log daglig lagerstatistik til monitoring."""
    from server.src.db import get_sync_engine
    engine = get_sync_engine()

    with engine.connect() as conn:
        stats = conn.execute(text("""
            SELECT
                SUM(compressed_total_bytes) AS compressed_bytes,
                SUM(uncompressed_total_bytes) AS raw_bytes
            FROM timescaledb_information.chunks
            WHERE hypertable_name = 'reading'
        """)).fetchone()

        if stats and stats.raw_bytes:
            ratio = 1 - stats.compressed_bytes / stats.raw_bytes
            log.info("Storage stats",
                     compressed_gb=round(stats.compressed_bytes / 1e9, 2),
                     raw_gb=round(stats.raw_bytes / 1e9, 2),
                     compression_ratio=round(ratio, 3))

Konklusion

TimescaleDB native komprimering med compress_segmentby = 'meter_installation_id' giver 93-97% komprimering på wM-Bus readings. Chunk-intervallet på 1 uge passer til IoT-data med daglig indsendelse. Continuous aggregates på månedsbasis eliminerer dyre real-time aggregeringer i dashboard-queries. Retention policies sikrer GDPR-compliance med automatisk chunk-sletning efter 10 år.

Se TimescaleDB hypertable opsætning guide eller TimescaleDB continuous aggregates guide.