# [TASK] Nouvelle section "HĂŽtes Docker" + monitoring + actions + notifications
## đŻ Objectif principal
Ajouter une fonctionnalité complÚte de gestion Docker au Homelab Dashboard existant, permettant :
- Surveillance multi-hosts Docker en temps réel
- Actions sur containers (start/stop/restart/redeploy/logs)
- Détection proactive et alerting sur containers down
- Intégration harmonieuse avec l'architecture existante
---
## đ Contraintes d'architecture OBLIGATOIRES
### Stack technique existante (Ă respecter strictement)
```yaml
Backend:
- FastAPI (routes/ + services/ + models/ + schemas/)
- SQLAlchemy 2.x async (data/homelab.db)
- Alembic pour migrations
- APScheduler (jobs périodiques déjà configurés)
- WebSocket temps réel (websocket_manager.py)
- Auth JWT (app.auth_utils + OAuth2PasswordBearer)
- Notifications ntfy (services/notifications.py)
Frontend:
- index.html + main.js (vanilla JS)
- Tailwind CSS
- Anime.js pour animations
- Pattern navigation par sections (dashboard, hosts, tasks, schedules, etc.)
Infrastructure:
- Ansible pour automation (inventaire hosts.yml existant)
- SSH déjà configuré (automation user + clés)
- Bootstrap SSH existant (services/bootstrap.py)
```
### ModÚles DB existants à étendre (NE PAS recréer)
```python
# models/host.py - TABLE EXISTANTE
class Host(Base):
__tablename__ = "hosts"
id: int
name: str
host: str # IP/hostname
os_type: str
status: str # online/offline
bootstrap_status: dict
last_seen_at: datetime
# Ă ĂTENDRE avec : docker_enabled, docker_version, docker_status
# models/task.py - TABLE EXISTANTE
class Task(Base):
__tablename__ = "tasks"
id: int
action: str
status: str # pending/running/success/failed
# Réutiliser pour actions Docker
# Ă CRĂER (nouvelles tables uniquement)
# - docker_containers
# - docker_images
# - docker_volumes
# - docker_alerts
```
---
## đ§ DĂ©cisions techniques IMPOSĂES (pas de choix)
### 1. Collecte Docker : **SSH + docker CLI** (réutiliser pattern Ansible)
**Justification** :
- â
SSH déjà configuré pour tous les hosts (user automation + clés)
- â
ajouter la collecte Docker au processus de collecte des métriques déjà en place.
- â
Pas de config supplémentaire sur les hosts (pas de TLS Docker API)
- â
MĂȘme pattern que Ansible (cohĂ©rence)
- â
Parse JSON : `docker ps --format json`, `docker inspect`, etc.
**Implémentation** :
```python
# services/docker_service.py
async def collect_docker_host(host_id: int):
host = await get_host(host_id)
ssh = await ssh_connect(host.host, user="automation")
# Version Docker
version = await ssh_exec(ssh, "docker version --format '{{json .}}'")
# Containers
containers = await ssh_exec(ssh,
"docker ps -a --format '{{json .}}' --no-trunc")
# Images
images = await ssh_exec(ssh,
"docker images --format '{{json .}}'")
# Volumes
volumes = await ssh_exec(ssh,
"docker volume ls --format '{{json .}}'")
# System df
df = await ssh_exec(ssh, "docker system df -v --format '{{json .}}'")
```
### 2. Stockage : **Ătendre tables existantes + crĂ©er tables Docker**
```sql
-- Migration Alembic à créer
ALTER TABLE hosts ADD COLUMN docker_enabled BOOLEAN DEFAULT FALSE;
ALTER TABLE hosts ADD COLUMN docker_version TEXT;
ALTER TABLE hosts ADD COLUMN docker_last_collect_at TIMESTAMP;
CREATE TABLE docker_containers (
id INTEGER PRIMARY KEY,
host_id INTEGER REFERENCES hosts(id),
container_id TEXT NOT NULL,
name TEXT NOT NULL,
image TEXT,
state TEXT, -- running/exited/paused
status TEXT, -- Up 2 hours, Exited (0) 5 minutes ago
health TEXT, -- healthy/unhealthy/starting/none
created_at TIMESTAMP,
ports JSON,
labels JSON,
compose_project TEXT, -- com.docker.compose.project
last_update_at TIMESTAMP,
UNIQUE(host_id, container_id)
);
CREATE TABLE docker_images (
id INTEGER PRIMARY KEY,
host_id INTEGER REFERENCES hosts(id),
image_id TEXT NOT NULL,
repo_tags JSON, -- ["nginx:latest", "nginx:1.25"]
size BIGINT,
created TIMESTAMP,
last_update_at TIMESTAMP,
UNIQUE(host_id, image_id)
);
CREATE TABLE docker_volumes (
id INTEGER PRIMARY KEY,
host_id INTEGER REFERENCES hosts(id),
name TEXT NOT NULL,
driver TEXT,
mountpoint TEXT,
scope TEXT,
last_update_at TIMESTAMP,
UNIQUE(host_id, name)
);
CREATE TABLE docker_alerts (
id INTEGER PRIMARY KEY,
host_id INTEGER REFERENCES hosts(id),
container_name TEXT NOT NULL,
severity TEXT, -- warning/error/critical
state TEXT, -- open/closed
message TEXT,
opened_at TIMESTAMP NOT NULL,
closed_at TIMESTAMP,
last_notified_at TIMESTAMP,
INDEX idx_alerts_open (state, host_id)
);
```
### 3. Scheduler : **Ătendre APScheduler existant**
```python
# app_optimized.py - AJOUTER au startup
from services.docker_collector import DockerCollector
@app.on_event("startup")
async def start_docker_collector():
collector = DockerCollector(db_session, ws_manager, ntfy_service)
# Job périodique : collecter tous les hosts Docker enabled
scheduler.add_job(
collector.collect_all_hosts,
trigger="interval",
seconds=60, # Toutes les minutes
id="docker_collect",
name="Docker Metrics Collection"
)
# Job périodique : vérifier alertes containers down
scheduler.add_job(
collector.check_alerts,
trigger="interval",
seconds=30,
id="docker_alerts",
name="Docker Alerts Check"
)
```
---
## đ API Routes Ă crĂ©er (prefix /api/docker)
```python
# routes/docker.py
router = APIRouter(prefix="/api/docker", tags=["docker"])
@router.get("/hosts")
async def list_docker_hosts(
current_user: User = Depends(get_current_user)
):
"""Liste tous les hosts avec Docker enabled"""
@router.post("/hosts/{host_id}/enable")
async def enable_docker_monitoring(
host_id: int,
current_user: User = Depends(require_role("admin"))
):
"""Active la surveillance Docker sur un host"""
@router.post("/hosts/{host_id}/collect")
async def collect_docker_now(
host_id: int,
current_user: User = Depends(require_role("operator"))
):
"""Force une collecte immédiate"""
@router.get("/hosts/{host_id}/containers")
async def get_containers(host_id: int):
"""Liste containers d'un host"""
@router.post("/containers/{host_id}/{container_id}/start")
async def start_container(
host_id: int,
container_id: str,
current_user: User = Depends(require_role("operator"))
):
"""Démarre un container"""
@router.post("/containers/{host_id}/{container_id}/stop")
@router.post("/containers/{host_id}/{container_id}/restart")
@router.post("/containers/{host_id}/{container_id}/remove")
@router.post("/containers/{host_id}/{container_id}/redeploy")
@router.get("/containers/{host_id}/{container_id}/logs")
async def get_container_logs(
host_id: int,
container_id: str,
tail: int = 200
):
"""RécupÚre logs d'un container"""
@router.get("/containers/{host_id}/{container_id}/inspect")
async def inspect_container(host_id: int, container_id: str):
"""Détails complets JSON d'un container"""
@router.get("/alerts")
async def list_alerts(
host_id: Optional[int] = None,
state: Optional[str] = "open"
):
"""Liste des alertes Docker"""
@router.post("/alerts/{alert_id}/ack")
async def acknowledge_alert(
alert_id: int,
current_user: User = Depends(require_role("operator"))
):
"""Accuser réception d'une alerte"""
```
---
## đ Logique d'alerting (dĂ©tection containers down)
### RÚgles de détection
```python
# services/docker_alerts.py
async def check_container_alerts(session: AsyncSession):
"""
Vérifie tous les containers critiques et génÚre des alertes
"""
# Récupérer containers avec label homelab.monitor=true
critical_containers = await session.execute(
select(DockerContainer)
.where(DockerContainer.labels.contains({"homelab.monitor": "true"}))
)
for container in critical_containers:
expected_state = container.labels.get("homelab.desired", "running")
# Cas 1 : Container arrĂȘtĂ© alors qu'il devrait tourner
if expected_state == "running" and container.state != "running":
await open_alert(
host_id=container.host_id,
container_name=container.name,
severity="error",
message=f"Container {container.name} is {container.state}, expected running"
)
# Cas 2 : Container unhealthy
if container.health == "unhealthy":
await open_alert(
host_id=container.host_id,
container_name=container.name,
severity="warning",
message=f"Container {container.name} health check failing"
)
# Cas 3 : Container OK -> fermer alerte si ouverte
if container.state == "running" and container.health in ["healthy", "none"]:
await close_alert(container.host_id, container.name)
async def open_alert(host_id: int, container_name: str, severity: str, message: str):
"""
Ouvre une alerte et envoie notification ntfy
"""
# Vérifier si alerte déjà ouverte
existing = await get_open_alert(host_id, container_name)
if existing:
# Mettre Ă jour timestamp
existing.last_notified_at = datetime.utcnow()
return
# Créer nouvelle alerte
alert = DockerAlert(
host_id=host_id,
container_name=container_name,
severity=severity,
state="open",
message=message,
opened_at=datetime.utcnow()
)
session.add(alert)
await session.commit()
# Notification ntfy
host = await get_host(host_id)
await ntfy_service.send_notification(
topic="homelab-docker",
title=f"đš Docker Alert - {host.name}",
message=f"{container_name}: {message}",
priority=4,
tags=["warning", "docker"]
)
# WebSocket temps réel
await ws_manager.broadcast({
"type": "docker_alert_opened",
"alert": alert.to_dict()
})
```
---
## đš UI/UX Frontend (intĂ©gration dans index.html + main.js)
### Navigation (ajouter dans index.html)
```html
```
### Section Docker (nouvelle section HTML)
```html
Docker Hosts