homelab_automation/app/services/builtin_playbooks.py
Bruno Charest 68a9b0f390
Some checks failed
Tests / Backend Tests (Python) (3.10) (push) Has been cancelled
Tests / Backend Tests (Python) (3.11) (push) Has been cancelled
Tests / Backend Tests (Python) (3.12) (push) Has been cancelled
Tests / Frontend Tests (JS) (push) Has been cancelled
Tests / Integration Tests (push) Has been cancelled
Tests / All Tests Passed (push) Has been cancelled
Remove Node.js cache files containing npm vulnerability data for vitest and vite packages
2025-12-15 20:36:06 -05:00

542 lines
22 KiB
Python

"""
Service de gestion des Builtin Playbooks.
Ce service gère les playbooks intégrés à l'application pour la collecte
automatique d'informations sur les hôtes (métriques système, disque, mémoire, etc.).
Les résultats sont stockés dans la table host_metrics et visibles dans les Logs,
mais pas dans la section Tasks (pour éviter de polluer l'interface).
"""
from __future__ import annotations
import asyncio
import ast
import json
import re
import time
from datetime import datetime, timezone, timedelta
from pathlib import Path
from typing import Dict, Any, List, Optional
from pydantic import BaseModel
from app.schemas.host_metrics import (
BuiltinPlaybookDefinition,
HostMetricsCreate,
HostMetricsSummary,
)
# Définitions des builtin playbooks
BUILTIN_PLAYBOOKS: Dict[str, BuiltinPlaybookDefinition] = {
"install_base_tools": BuiltinPlaybookDefinition(
id="install_base_tools",
name="Installer les outils de base",
description="Installe les commandes requises pour la collecte et l'affichage des métriques (df, lsblk, python3, etc.)",
playbook_file="_builtin_install_base_tools.yml",
category="maintenance",
icon="fas fa-tools",
color="blue",
collect_metrics=False,
schedule_enabled=False,
visible_in_ui=True,
),
"collect_system_info": BuiltinPlaybookDefinition(
id="collect_system_info",
name="Collecte Info Système",
description="Collecte les informations système complètes (CPU, mémoire, disque, OS)",
playbook_file="_builtin_collect_system_info.yml",
category="metrics",
icon="fas fa-microchip",
color="cyan",
collect_metrics=True,
schedule_enabled=True,
visible_in_ui=True,
),
"collect_disk_usage": BuiltinPlaybookDefinition(
id="collect_disk_usage",
name="Espace Disque",
description="Collecte l'utilisation de l'espace disque sur tous les points de montage",
playbook_file="_builtin_collect_disk_usage.yml",
category="metrics",
icon="fas fa-hdd",
color="amber",
collect_metrics=True,
schedule_enabled=True,
visible_in_ui=True,
),
"collect_memory_info": BuiltinPlaybookDefinition(
id="collect_memory_info",
name="Utilisation Mémoire",
description="Collecte les informations de mémoire RAM et swap",
playbook_file="_builtin_collect_memory_info.yml",
category="metrics",
icon="fas fa-memory",
color="purple",
collect_metrics=True,
schedule_enabled=True,
visible_in_ui=True,
),
"collect_cpu_info": BuiltinPlaybookDefinition(
id="collect_cpu_info",
name="Informations CPU",
description="Collecte les informations CPU (charge, température, modèle)",
playbook_file="_builtin_collect_cpu_info.yml",
category="metrics",
icon="fas fa-tachometer-alt",
color="red",
collect_metrics=True,
schedule_enabled=True,
visible_in_ui=True,
),
"collect_network_info": BuiltinPlaybookDefinition(
id="collect_network_info",
name="Informations Réseau",
description="Collecte les informations des interfaces réseau",
playbook_file="_builtin_collect_network_info.yml",
category="metrics",
icon="fas fa-network-wired",
color="green",
collect_metrics=True,
schedule_enabled=True,
visible_in_ui=True,
),
}
class BuiltinPlaybookService:
"""Service pour gérer et exécuter les builtin playbooks."""
def _loads_jsonish(self, payload: str) -> dict:
payload = payload.strip()
payload = payload.replace('\\"', '"').replace('\\n', '\n')
payload = re.sub(r"\\\s*\n", "", payload)
try:
return json.loads(payload)
except json.JSONDecodeError:
return ast.literal_eval(payload)
def __init__(self, ansible_dir: Path, ansible_service=None):
"""
Args:
ansible_dir: Répertoire racine Ansible (contenant playbooks/)
ansible_service: Instance du service Ansible pour l'exécution
"""
self.ansible_dir = ansible_dir
self.playbooks_dir = ansible_dir / "playbooks"
self.builtin_dir = ansible_dir / "playbooks" / "builtin"
self.ansible_service = ansible_service
# Créer le répertoire builtin s'il n'existe pas
self.builtin_dir.mkdir(parents=True, exist_ok=True)
def get_all_definitions(self) -> List[BuiltinPlaybookDefinition]:
"""Retourne toutes les définitions de builtin playbooks."""
return list(BUILTIN_PLAYBOOKS.values())
def get_definition(self, builtin_id: str) -> Optional[BuiltinPlaybookDefinition]:
"""Retourne la définition d'un builtin playbook par son ID."""
return BUILTIN_PLAYBOOKS.get(builtin_id)
def get_playbook_path(self, builtin_id: str) -> Optional[Path]:
"""Retourne le chemin complet du fichier playbook."""
definition = self.get_definition(builtin_id)
if not definition:
return None
return self.builtin_dir / definition.playbook_file
def is_builtin_playbook(self, filename: str) -> bool:
"""Vérifie si un fichier est un builtin playbook (commence par _builtin_)."""
return filename.startswith("_builtin_")
async def execute_builtin(
self,
builtin_id: str,
target: str,
extra_vars: Optional[Dict[str, Any]] = None,
) -> Dict[str, Any]:
"""
Exécute un builtin playbook et retourne les résultats.
Args:
builtin_id: ID du builtin playbook
target: Cible (hostname ou groupe)
extra_vars: Variables supplémentaires pour Ansible
Returns:
Dict avec success, stdout, stderr, parsed_metrics, etc.
"""
definition = self.get_definition(builtin_id)
if not definition:
return {
"success": False,
"error": f"Builtin playbook '{builtin_id}' non trouvé",
"parsed_metrics": {},
"stdout": "",
"stderr": f"Builtin playbook '{builtin_id}' non trouvé",
}
playbook_path = self.builtin_dir / definition.playbook_file
if not playbook_path.exists():
return {
"success": False,
"error": f"Fichier playbook '{definition.playbook_file}' non trouvé à {playbook_path}",
"parsed_metrics": {},
"stdout": "",
"stderr": f"Fichier playbook '{definition.playbook_file}' non trouvé à {playbook_path}",
}
if not self.ansible_service:
return {
"success": False,
"error": "Service Ansible non initialisé",
"parsed_metrics": {},
"stdout": "",
"stderr": "Service Ansible non initialisé",
}
start_time = time.time()
try:
# Exécuter le playbook via le service Ansible
# Le playbook doit être dans le sous-dossier builtin/
playbook_relative = f"builtin/{definition.playbook_file}"
result = await self.ansible_service.execute_playbook(
playbook=playbook_relative,
target=target,
extra_vars=extra_vars or {},
check_mode=False,
verbose=False,
)
execution_time = time.time() - start_time
# Parser les métriques depuis la sortie JSON
parsed_metrics = {}
if result.get("success") and definition.collect_metrics:
parsed_metrics = self._parse_metrics_from_output(
result.get("stdout", ""),
builtin_id
)
return {
"success": result.get("success", False),
"stdout": result.get("stdout", ""),
"stderr": result.get("stderr", ""),
"execution_time": execution_time,
"execution_time_ms": int(execution_time * 1000),
"parsed_metrics": parsed_metrics,
"builtin_id": builtin_id,
"target": target,
"return_code": result.get("return_code", -1),
}
except Exception as e:
execution_time = time.time() - start_time
error_msg = f"Exception lors de l'exécution du builtin playbook: {str(e)}"
print(f"[BUILTIN] {error_msg}")
import traceback
traceback.print_exc()
return {
"success": False,
"stdout": "",
"stderr": error_msg,
"error": error_msg,
"execution_time": execution_time,
"execution_time_ms": int(execution_time * 1000),
"parsed_metrics": {},
"builtin_id": builtin_id,
"target": target,
"return_code": -1,
}
def _parse_metrics_from_output(
self,
stdout: str,
builtin_id: str
) -> Dict[str, Dict[str, Any]]:
"""
Parse les métriques JSON depuis la sortie du playbook.
Les playbooks builtin utilisent le format:
METRICS_JSON_START:{"host": "hostname", "data": {...}}:METRICS_JSON_END
Returns:
Dict mapping hostname to metrics data
"""
metrics_by_host = {}
print(f"[BUILTIN] Parsing metrics from stdout ({len(stdout)} chars)")
# Pattern pour extraire les blocs JSON de métriques
# Format: METRICS_JSON_START:{...}:METRICS_JSON_END
pattern = r'METRICS_JSON_START:(.*?):METRICS_JSON_END'
matches = re.findall(pattern, stdout, re.DOTALL)
print(f"[BUILTIN] Found {len(matches)} METRICS_JSON matches")
parse_errors: list[str] = []
for match in matches:
try:
data = self._loads_jsonish(match)
host = data.get("host", "unknown")
metrics = data.get("data", {})
metrics_by_host[host] = metrics
print(f"[BUILTIN] Parsed metrics for host: {host}")
except (ValueError, SyntaxError) as e:
parse_errors.append(str(e))
continue
# Fallback: essayer de parser les debug outputs Ansible standards
if not metrics_by_host:
print("[BUILTIN] No metrics found with primary pattern, trying fallback...")
metrics_by_host = self._parse_ansible_debug_output(stdout, builtin_id)
# Fallback 2: chercher le format "msg": "METRICS_JSON_START:..."
if not metrics_by_host:
print("[BUILTIN] Trying msg pattern fallback...")
# Pattern pour le format Ansible debug: "msg": "METRICS_JSON_START:...:METRICS_JSON_END"
msg_pattern = r'"msg":\s*"METRICS_JSON_START:(.*?):METRICS_JSON_END"'
msg_matches = re.findall(msg_pattern, stdout, re.DOTALL)
print(f"[BUILTIN] Found {len(msg_matches)} msg pattern matches")
for match in msg_matches:
try:
# Le JSON est échappé dans le msg, il faut le décoder
unescaped = match.replace('\\"', '"').replace('\\n', '\n')
data = self._loads_jsonish(unescaped)
host = data.get("host", "unknown")
metrics = data.get("data", {})
metrics_by_host[host] = metrics
print(f"[BUILTIN] Parsed metrics from msg for host: {host}")
except (ValueError, SyntaxError) as e:
parse_errors.append(str(e))
continue
print(f"[BUILTIN] Total hosts with metrics: {len(metrics_by_host)}")
if not metrics_by_host and stdout:
# Log un extrait du stdout pour debug
if parse_errors:
print(f"[BUILTIN] JSON decode error: {parse_errors[-1]}")
print(f"[BUILTIN] Stdout sample (first 500 chars): {stdout[:500]}")
return metrics_by_host
def _parse_ansible_debug_output(
self,
stdout: str,
builtin_id: str
) -> Dict[str, Dict[str, Any]]:
"""
Parse les métriques depuis les messages debug Ansible standards.
Format attendu: "host | SUCCESS => {...}" ou debug msg avec JSON
"""
metrics_by_host = {}
# Pattern pour les résultats ad-hoc ou debug
# Ex: hostname | SUCCESS => {"ansible_facts": {...}}
pattern = r'(\S+)\s*\|\s*(?:SUCCESS|CHANGED)\s*=>\s*(\{.*?\})\s*(?=\n\S|\Z)'
for line in stdout.split('\n'):
# Chercher les lignes de debug avec JSON
if '"metrics":' in line or '"cpu_' in line or '"memory_' in line or '"disk_' in line:
try:
# Trouver le JSON dans la ligne
json_match = re.search(r'\{.*\}', line)
if json_match:
data = json.loads(json_match.group())
# Essayer d'extraire le hostname depuis le contexte
host_match = re.search(r'^(\S+)\s*:', line)
if host_match:
host = host_match.group(1)
metrics_by_host[host] = data
except json.JSONDecodeError:
continue
return metrics_by_host
def _clean_numeric_value(self, value: Any) -> Optional[float]:
"""Convertit une valeur en float, retourne None si vide ou invalide."""
if value is None or value == '' or value == 'null':
return None
try:
return float(value)
except (ValueError, TypeError):
return None
def _clean_int_value(self, value: Any) -> Optional[int]:
"""Convertit une valeur en int, retourne None si vide ou invalide."""
if value is None or value == '' or value == 'null':
return None
try:
return int(float(value)) # float d'abord pour gérer "3.0"
except (ValueError, TypeError):
return None
def _clean_string_value(self, value: Any) -> Optional[str]:
"""Retourne None si la valeur est vide."""
if value is None or value == '' or value == 'null' or value == 'Unknown':
return None
return str(value)
def create_metrics_from_parsed(
self,
host_id: str,
parsed_data: Dict[str, Any],
builtin_id: str,
execution_time_ms: int
) -> HostMetricsCreate:
"""
Crée un objet HostMetricsCreate à partir des données parsées.
"""
# Mapper le builtin_id vers metric_type
metric_type_map = {
"collect_system_info": "system_info",
"collect_disk_usage": "disk_usage",
"collect_memory_info": "memory",
"collect_cpu_info": "cpu",
"collect_network_info": "network",
}
metric_type = metric_type_map.get(builtin_id, "unknown")
return HostMetricsCreate(
host_id=host_id,
metric_type=metric_type,
# CPU
cpu_count=self._clean_int_value(parsed_data.get("cpu_count")),
cpu_model=self._clean_string_value(parsed_data.get("cpu_model")),
cpu_cores=self._clean_int_value(parsed_data.get("cpu_cores")),
cpu_threads=self._clean_int_value(parsed_data.get("cpu_threads")),
cpu_threads_per_core=self._clean_int_value(parsed_data.get("cpu_threads_per_core")),
cpu_sockets=self._clean_int_value(parsed_data.get("cpu_sockets")),
cpu_mhz=self._clean_numeric_value(parsed_data.get("cpu_mhz")),
cpu_max_mhz=self._clean_numeric_value(parsed_data.get("cpu_max_mhz")),
cpu_min_mhz=self._clean_numeric_value(parsed_data.get("cpu_min_mhz")),
cpu_load_1m=self._clean_numeric_value(parsed_data.get("cpu_load_1m")),
cpu_load_5m=self._clean_numeric_value(parsed_data.get("cpu_load_5m")),
cpu_load_15m=self._clean_numeric_value(parsed_data.get("cpu_load_15m")),
cpu_usage_percent=self._clean_numeric_value(parsed_data.get("cpu_usage_percent")),
cpu_temperature=self._clean_numeric_value(parsed_data.get("cpu_temperature")),
# Memory
memory_total_mb=self._clean_int_value(parsed_data.get("memory_total_mb")),
memory_used_mb=self._clean_int_value(parsed_data.get("memory_used_mb")),
memory_free_mb=self._clean_int_value(parsed_data.get("memory_free_mb")),
memory_usage_percent=self._clean_numeric_value(parsed_data.get("memory_usage_percent")),
swap_total_mb=self._clean_int_value(parsed_data.get("swap_total_mb")),
swap_used_mb=self._clean_int_value(parsed_data.get("swap_used_mb")),
swap_usage_percent=self._clean_numeric_value(parsed_data.get("swap_usage_percent")),
# Disk
disk_info=parsed_data.get("disk_info"),
disk_devices=parsed_data.get("disk_devices"),
disk_root_total_gb=self._clean_numeric_value(parsed_data.get("disk_root_total_gb")),
disk_root_used_gb=self._clean_numeric_value(parsed_data.get("disk_root_used_gb")),
disk_root_usage_percent=self._clean_numeric_value(parsed_data.get("disk_root_usage_percent")),
# Storage stacks
lvm_info=parsed_data.get("lvm_info"),
zfs_info=parsed_data.get("zfs_info"),
# System
os_name=self._clean_string_value(parsed_data.get("os_name")),
os_version=self._clean_string_value(parsed_data.get("os_version")),
kernel_version=self._clean_string_value(parsed_data.get("kernel_version")),
hostname=self._clean_string_value(parsed_data.get("hostname")),
uptime_seconds=self._clean_int_value(parsed_data.get("uptime_seconds")),
uptime_human=self._clean_string_value(parsed_data.get("uptime_human")),
# Network
network_info=parsed_data.get("network_info"),
# Metadata
raw_data=parsed_data,
collection_source=builtin_id,
collection_duration_ms=execution_time_ms,
)
def metrics_to_summary(
self,
metrics: Any, # HostMetrics model
host_name: Optional[str] = None
) -> HostMetricsSummary:
"""Convertit un objet HostMetrics en HostMetricsSummary pour l'UI."""
if not metrics:
return HostMetricsSummary(
host_id="unknown",
host_name=host_name,
collection_status="unknown"
)
# Normaliser le timestamp en heure locale (UTC-5) pour l'affichage
collected_at = metrics.collected_at
if collected_at is not None:
if getattr(collected_at, "tzinfo", None) is None:
collected_at = collected_at.replace(tzinfo=timezone.utc)
app_tz = timezone(timedelta(hours=-5))
collected_at = collected_at.astimezone(app_tz)
return HostMetricsSummary(
host_id=metrics.host_id,
host_name=host_name,
last_collected=collected_at,
# CPU
cpu_usage_percent=metrics.cpu_usage_percent,
cpu_load_1m=metrics.cpu_load_1m,
cpu_temperature=metrics.cpu_temperature,
cpu_model=metrics.cpu_model,
cpu_count=metrics.cpu_count,
cpu_cores=getattr(metrics, "cpu_cores", None),
cpu_threads=getattr(metrics, "cpu_threads", None),
cpu_max_mhz=getattr(metrics, "cpu_max_mhz", None),
# Memory
memory_usage_percent=metrics.memory_usage_percent,
memory_total_mb=metrics.memory_total_mb,
memory_used_mb=metrics.memory_used_mb,
# Disk
disk_root_usage_percent=metrics.disk_root_usage_percent,
disk_root_total_gb=metrics.disk_root_total_gb,
disk_root_used_gb=metrics.disk_root_used_gb,
disk_info=metrics.disk_info if getattr(metrics, "disk_info", None) else None,
disk_devices=getattr(metrics, "disk_devices", None),
# Storage stacks
lvm_info=getattr(metrics, "lvm_info", None),
zfs_info=getattr(metrics, "zfs_info", None),
# System
os_name=metrics.os_name,
uptime_human=metrics.uptime_human,
# Status
collection_status="success" if not metrics.error_message else "failed",
error_message=metrics.error_message,
)
# Instance globale (sera initialisée au démarrage de l'application)
builtin_playbook_service: Optional[BuiltinPlaybookService] = None
def get_builtin_playbook_service() -> BuiltinPlaybookService:
"""Retourne l'instance du service builtin playbooks."""
global builtin_playbook_service
if builtin_playbook_service is None:
raise RuntimeError("BuiltinPlaybookService not initialized")
return builtin_playbook_service
def init_builtin_playbook_service(ansible_dir: Path, ansible_service=None) -> BuiltinPlaybookService:
"""Initialise le service builtin playbooks."""
global builtin_playbook_service
builtin_playbook_service = BuiltinPlaybookService(ansible_dir, ansible_service)
return builtin_playbook_service