Bruno Charest 46823eb42d
Some checks failed
Tests / Backend Tests (Python) (3.10) (push) Has been cancelled
Tests / Backend Tests (Python) (3.11) (push) Has been cancelled
Tests / Backend Tests (Python) (3.12) (push) Has been cancelled
Tests / Frontend Tests (JS) (push) Has been cancelled
Tests / Integration Tests (push) Has been cancelled
Tests / All Tests Passed (push) Has been cancelled
Enhance host status tracking by parsing Ansible PLAY RECAP to update host reachability and last_seen timestamps after health-check playbook executions, add inventory group resolution to host API responses, and trigger automatic data refresh in dashboard after task completion to reflect updated host health indicators
2025-12-22 10:43:17 -05:00

725 lines
25 KiB
Python

"""
Routes API pour l'exécution Ansible.
"""
import uuid
import subprocess
from datetime import datetime, timezone
from time import perf_counter
from typing import Optional, Dict, Any
import re
from fastapi import APIRouter, Depends, HTTPException
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.config import settings
from app.core.dependencies import get_db, verify_api_key
from app.crud.task import TaskRepository
from app.crud.host import HostRepository
from app.schemas.ansible import AnsibleExecutionRequest, AdHocCommandRequest, AdHocCommandResult, BootstrapRequest
from app.schemas.task_api import Task
from app.schemas.common import CommandResult
from app.schemas.common import LogEntry
from app.services import (
ansible_service,
ws_manager,
notification_service,
adhoc_history_service,
db,
bootstrap_status_service,
)
from app.services.task_log_service import TaskLogService
from app.utils.ssh_utils import find_ssh_private_key, bootstrap_host
router = APIRouter()
# Instance du service de logs de tâches
task_log_service = TaskLogService(settings.tasks_logs_dir)
def _parse_ansible_play_recap(stdout: str) -> dict[str, dict[str, int]]:
if not stdout:
return {}
recap: dict[str, dict[str, int]] = {}
pattern = re.compile(
r"^\s*(?P<host>[^\s:]+)\s*:\s*.*?unreachable=(?P<unreachable>\d+)\s+failed=(?P<failed>\d+)\b",
re.IGNORECASE,
)
for line in stdout.splitlines():
m = pattern.match(line)
if not m:
continue
host = m.group("host")
recap[host] = {
"unreachable": int(m.group("unreachable")),
"failed": int(m.group("failed")),
}
return recap
async def _resolve_host_from_recap_name(
host_repo: HostRepository,
recap_host_name: str,
inventory_alias_to_host: dict[str, str],
):
host = await host_repo.get_by_name(recap_host_name)
if not host:
host = await host_repo.get_by_ip(recap_host_name)
if host:
return host
mapped = inventory_alias_to_host.get(recap_host_name)
if mapped:
host = await host_repo.get_by_ip(mapped)
if not host:
host = await host_repo.get_by_name(mapped)
if host:
return host
return None
@router.get("/playbooks")
async def get_ansible_playbooks(
target: Optional[str] = None,
api_key_valid: bool = Depends(verify_api_key)
):
"""Liste les playbooks Ansible disponibles."""
if target:
playbooks = ansible_service.get_compatible_playbooks(target)
else:
playbooks = ansible_service.get_playbooks()
return {
"playbooks": playbooks,
"categories": ansible_service.get_playbook_categories(),
"ansible_dir": str(settings.ansible_dir),
"filter": target
}
@router.get("/inventory")
async def get_ansible_inventory(
group: Optional[str] = None,
api_key_valid: bool = Depends(verify_api_key)
):
"""Récupère l'inventaire Ansible."""
return {
"hosts": [h.dict() for h in ansible_service.get_hosts_from_inventory(group_filter=group)],
"groups": ansible_service.get_groups(),
"inventory_path": str(ansible_service.inventory_path),
"filter": group
}
@router.get("/groups")
async def get_ansible_groups(api_key_valid: bool = Depends(verify_api_key)):
"""Récupère la liste des groupes Ansible."""
return {"groups": ansible_service.get_groups()}
@router.get("/ssh-config")
async def get_ssh_config(api_key_valid: bool = Depends(verify_api_key)):
"""Diagnostic de la configuration SSH."""
from pathlib import Path
import shutil
ssh_key_path = Path(settings.ssh_key_path)
ssh_dir = ssh_key_path.parent
available_files = []
if ssh_dir.exists():
available_files = [f.name for f in ssh_dir.iterdir()]
private_key_exists = ssh_key_path.exists()
public_key_exists = Path(settings.ssh_key_path + ".pub").exists()
pub_keys_found = []
for key_type in ["id_rsa", "id_ed25519", "id_ecdsa", "id_dsa"]:
key_path = ssh_dir / f"{key_type}.pub"
if key_path.exists():
pub_keys_found.append(str(key_path))
active_private_key = find_ssh_private_key()
return {
"ssh_key_path": settings.ssh_key_path,
"ssh_dir": str(ssh_dir),
"ssh_dir_exists": ssh_dir.exists(),
"private_key_exists": private_key_exists,
"public_key_exists": public_key_exists,
"available_files": available_files,
"public_keys_found": pub_keys_found,
"active_private_key": active_private_key,
"ssh_user": settings.ssh_user,
"sshpass_available": shutil.which("sshpass") is not None,
}
@router.post("/bootstrap", response_model=CommandResult)
async def bootstrap_ansible_host_compat(
request: BootstrapRequest,
api_key_valid: bool = Depends(verify_api_key),
):
"""Compat: Bootstrap un hôte via /api/ansible/bootstrap (historique UI).
La route canonical est /api/bootstrap, mais l'UI historique appelle /api/ansible/bootstrap.
"""
import asyncio
import logging
import traceback
logger = logging.getLogger("bootstrap_endpoint")
try:
logger.info(f"[BOOTSTRAP] Bootstrap request for host={request.host}, user={request.automation_user}")
result = bootstrap_host(
host=request.host,
root_password=request.root_password,
automation_user=request.automation_user,
)
logger.info(f"[BOOTSTRAP] Bootstrap result: status={result.status}, return_code={result.return_code}")
if result.return_code != 0:
raise HTTPException(
status_code=500,
detail={
"status": result.status,
"return_code": result.return_code,
"stdout": result.stdout,
"stderr": result.stderr,
},
)
host_name = request.host
for h in db.hosts:
if h.ip == request.host or h.name == request.host:
host_name = h.name
break
bootstrap_status_service.set_bootstrap_status(
host_name=host_name,
success=True,
details=f"Bootstrap réussi via API (user: {request.automation_user})",
)
db.invalidate_hosts_cache()
log_entry = LogEntry(
id=db.get_next_id("logs"),
timestamp=datetime.now(timezone.utc),
level="INFO",
message=f"Bootstrap réussi pour {host_name} (user: {request.automation_user})",
source="bootstrap",
host=host_name,
)
db.logs.insert(0, log_entry)
await ws_manager.broadcast(
{
"type": "bootstrap_success",
"data": {
"host": host_name,
"user": request.automation_user,
"status": "ok",
"bootstrap_ok": True,
},
}
)
asyncio.create_task(notification_service.notify_bootstrap_success(host_name))
return result
except HTTPException as http_exc:
error_detail = str(http_exc.detail) if http_exc.detail else "Erreur inconnue"
asyncio.create_task(
notification_service.notify_bootstrap_failed(
hostname=request.host,
error=error_detail[:200],
)
)
raise
except Exception as e:
logger.error(f"[BOOTSTRAP] Bootstrap exception: {e}")
logger.error(traceback.format_exc())
log_entry = LogEntry(
id=db.get_next_id("logs"),
timestamp=datetime.now(timezone.utc),
level="ERROR",
message=f"Échec bootstrap pour {request.host}: {str(e)}",
source="bootstrap",
host=request.host,
)
db.logs.insert(0, log_entry)
asyncio.create_task(
notification_service.notify_bootstrap_failed(
hostname=request.host,
error=str(e)[:200],
)
)
raise HTTPException(status_code=500, detail=str(e))
@router.post("/execute")
async def execute_ansible_playbook(
request: AnsibleExecutionRequest,
api_key_valid: bool = Depends(verify_api_key),
db_session: AsyncSession = Depends(get_db)
):
"""Exécute un playbook Ansible."""
start_time_dt = datetime.now(timezone.utc)
# Valider la compatibilité playbook-target
playbooks = ansible_service.get_playbooks()
playbook_info = next(
(pb for pb in playbooks if pb['filename'] == request.playbook or pb['name'] == request.playbook.replace('.yml', '').replace('.yaml', '')),
None
)
if playbook_info:
playbook_hosts = playbook_info.get('hosts', 'all')
if not ansible_service.is_target_compatible_with_playbook(request.target, playbook_hosts):
raise HTTPException(
status_code=400,
detail=f"Le playbook '{request.playbook}' (hosts: {playbook_hosts}) n'est pas compatible avec la cible '{request.target}'."
)
# Créer une tâche en BD
task_repo = TaskRepository(db_session)
task_id = f"pb_{uuid.uuid4().hex[:12]}"
playbook_name = request.playbook.replace('.yml', '').replace('-', ' ').title()
db_task = await task_repo.create(
id=task_id,
action=f"playbook:{request.playbook}",
target=request.target,
playbook=request.playbook,
status="running",
)
await task_repo.update(db_task, started_at=start_time_dt)
await db_session.commit()
# Créer aussi en mémoire
task = Task(
id=task_id,
name=f"Playbook: {playbook_name}",
host=request.target,
status="running",
progress=0,
start_time=start_time_dt
)
db.tasks.insert(0, task)
try:
result = await ansible_service.execute_playbook(
playbook=request.playbook,
target=request.target,
extra_vars=request.extra_vars,
check_mode=request.check_mode,
verbose=request.verbose
)
# Mettre à jour la tâche
task.status = "completed" if result["success"] else "failed"
task.progress = 100
task.end_time = datetime.now(timezone.utc)
task.duration = f"{result.get('execution_time', 0):.1f}s"
task.output = result.get("stdout", "")
task.error = result.get("stderr", "") if not result["success"] else None
# Ajouter un log
log_entry = LogEntry(
id=db.get_next_id("logs"),
timestamp=datetime.now(timezone.utc),
level="INFO" if result["success"] else "ERROR",
message=f"Playbook {request.playbook} exécuté sur {request.target}: {'succès' if result['success'] else 'échec'}",
source="ansible",
host=request.target
)
db.logs.insert(0, log_entry)
# Sauvegarder le log markdown
try:
log_path = task_log_service.save_task_log(
task=task,
output=result.get("stdout", ""),
error=result.get("stderr", "")
)
created_log = task_log_service.index_log_file(log_path)
if created_log:
await ws_manager.broadcast({
"type": "task_log_created",
"data": created_log.dict()
})
except Exception as log_error:
print(f"Erreur sauvegarde log markdown: {log_error}")
await ws_manager.broadcast({
"type": "ansible_execution",
"data": result
})
# Mettre à jour la BD
await task_repo.update(
db_task,
status=task.status,
completed_at=task.end_time,
error_message=task.error,
result_data={"output": result.get("stdout", "")[:5000]}
)
# Si c'est un health-check, mettre à jour le statut/last_seen des hosts impliqués
if request.playbook and "health-check" in request.playbook and request.target:
try:
host_repo = HostRepository(db_session)
now = datetime.now(timezone.utc)
inventory_alias_to_host: dict[str, str] = {}
try:
for inv_host in ansible_service.get_hosts_from_inventory():
if inv_host.name and inv_host.ansible_host:
inventory_alias_to_host[inv_host.name] = inv_host.ansible_host
except Exception:
inventory_alias_to_host = {}
recap = _parse_ansible_play_recap(result.get("stdout", "") or "")
if recap:
for host_name, counters in recap.items():
unreachable = int(counters.get("unreachable", 0) or 0)
reachable = unreachable == 0
status = "online" if reachable else "offline"
host = await _resolve_host_from_recap_name(
host_repo,
host_name,
inventory_alias_to_host,
)
if host:
await host_repo.update(
host,
status=status,
reachable=reachable,
last_seen=now,
)
elif request.target != "all":
host = await _resolve_host_from_recap_name(
host_repo,
request.target,
inventory_alias_to_host,
)
if host:
await host_repo.update(
host,
status="online" if bool(result.get("success")) else "offline",
reachable=bool(result.get("success")),
last_seen=now,
)
except Exception:
pass
await db_session.commit()
# Notification
if result["success"]:
await notification_service.notify_task_completed(
task_name=task.name,
target=request.target,
duration=task.duration
)
else:
await notification_service.notify_task_failed(
task_name=task.name,
target=request.target,
error=result.get("stderr", "Erreur inconnue")[:200]
)
result["task_id"] = task_id
return result
except FileNotFoundError as e:
task.status = "failed"
task.end_time = datetime.now(timezone.utc)
task.error = str(e)
try:
log_path = task_log_service.save_task_log(task=task, error=str(e))
created_log = task_log_service.index_log_file(log_path)
if created_log:
await ws_manager.broadcast({
"type": "task_log_created",
"data": created_log.dict()
})
except Exception:
pass
await task_repo.update(db_task, status="failed", completed_at=task.end_time, error_message=str(e))
await db_session.commit()
await notification_service.notify_task_failed(task_name=task.name, target=request.target, error=str(e)[:200])
raise HTTPException(status_code=404, detail=str(e))
except Exception as e:
task.status = "failed"
task.end_time = datetime.now(timezone.utc)
task.error = str(e)
try:
log_path = task_log_service.save_task_log(task=task, error=str(e))
created_log = task_log_service.index_log_file(log_path)
if created_log:
await ws_manager.broadcast({
"type": "task_log_created",
"data": created_log.dict()
})
except Exception:
pass
await task_repo.update(db_task, status="failed", completed_at=task.end_time, error_message=str(e))
await db_session.commit()
await notification_service.notify_task_failed(task_name=task.name, target=request.target, error=str(e)[:200])
raise HTTPException(status_code=500, detail=str(e))
@router.post("/adhoc", response_model=AdHocCommandResult)
async def execute_adhoc_command(
request: AdHocCommandRequest,
api_key_valid: bool = Depends(verify_api_key),
db_session: AsyncSession = Depends(get_db)
):
"""Exécute une commande ad-hoc Ansible."""
start_time_perf = perf_counter()
start_time_dt = datetime.now(timezone.utc)
# Créer une tâche en BD
task_repo = TaskRepository(db_session)
task_id = f"adhoc_{uuid.uuid4().hex[:12]}"
task_name = f"Ad-hoc: {request.command[:40]}{'...' if len(request.command) > 40 else ''}"
db_task = await task_repo.create(
id=task_id,
action=f"adhoc:{request.module}",
target=request.target,
playbook=None,
status="running",
)
await task_repo.update(db_task, started_at=start_time_dt)
await db_session.commit()
# Créer aussi en mémoire
task = Task(
id=task_id,
name=task_name,
host=request.target,
status="running",
progress=0,
start_time=start_time_dt
)
db.tasks.insert(0, task)
# Construire la commande ansible
ansible_cmd = [
"ansible",
request.target,
"-i", str(settings.ansible_dir / "inventory" / "hosts.yml"),
"-m", request.module,
"-a", request.command,
"--timeout", str(request.timeout),
]
if request.become:
ansible_cmd.append("--become")
private_key = find_ssh_private_key()
if private_key:
ansible_cmd.extend(["--private-key", private_key])
if settings.ssh_user:
ansible_cmd.extend(["-u", settings.ssh_user])
try:
result = subprocess.run(
ansible_cmd,
capture_output=True,
text=True,
timeout=request.timeout + 10,
cwd=str(settings.ansible_dir)
)
duration = perf_counter() - start_time_perf
success = result.returncode == 0
task.status = "completed" if success else "failed"
task.progress = 100
task.end_time = datetime.now(timezone.utc)
task.duration = f"{round(duration, 2)}s"
task.output = result.stdout
task.error = result.stderr if result.stderr else None
log_path = task_log_service.save_task_log(task, output=result.stdout, error=result.stderr or "", source_type='adhoc')
created_log = task_log_service.index_log_file(log_path)
if created_log:
await ws_manager.broadcast({
"type": "task_log_created",
"data": created_log.dict()
})
log_entry = LogEntry(
id=db.get_next_id("logs"),
timestamp=datetime.now(timezone.utc),
level="INFO" if success else "WARN",
message=f"Ad-hoc [{request.module}] sur {request.target}: {request.command[:50]}{'...' if len(request.command) > 50 else ''}",
source="ansible-adhoc",
host=request.target
)
db.logs.insert(0, log_entry)
await ws_manager.broadcast({
"type": "adhoc_executed",
"data": {
"target": request.target,
"command": request.command,
"success": success,
"task_id": task_id
}
})
await adhoc_history_service.add_command(
command=request.command,
target=request.target,
module=request.module,
become=request.become,
category=request.category or "default"
)
await task_repo.update(
db_task,
status=task.status,
completed_at=task.end_time,
error_message=task.error,
result_data={"output": result.stdout[:5000] if result.stdout else None}
)
await db_session.commit()
if success:
await notification_service.notify_task_completed(
task_name=task.name,
target=request.target,
duration=task.duration
)
else:
await notification_service.notify_task_failed(
task_name=task.name,
target=request.target,
error=(result.stderr or "Erreur inconnue")[:200]
)
return AdHocCommandResult(
target=request.target,
command=request.command,
success=success,
return_code=result.returncode,
stdout=result.stdout,
stderr=result.stderr if result.stderr else None,
duration=round(duration, 2)
)
except subprocess.TimeoutExpired:
duration = perf_counter() - start_time_perf
task.status = "failed"
task.progress = 100
task.end_time = datetime.now(timezone.utc)
task.duration = f"{round(duration, 2)}s"
task.error = f"Timeout après {request.timeout} secondes"
try:
log_path = task_log_service.save_task_log(task, error=task.error, source_type='adhoc')
created_log = task_log_service.index_log_file(log_path)
if created_log:
await ws_manager.broadcast({
"type": "task_log_created",
"data": created_log.dict()
})
except Exception:
pass
await task_repo.update(db_task, status="failed", completed_at=task.end_time, error_message=task.error)
await db_session.commit()
await notification_service.notify_task_failed(task_name=task.name, target=request.target, error=task.error[:200])
return AdHocCommandResult(
target=request.target,
command=request.command,
success=False,
return_code=-1,
stdout="",
stderr=f"Timeout après {request.timeout} secondes",
duration=round(duration, 2)
)
except FileNotFoundError:
duration = perf_counter() - start_time_perf
error_msg = "ansible non trouvé. Vérifiez que Ansible est installé."
task.status = "failed"
task.progress = 100
task.end_time = datetime.now(timezone.utc)
task.duration = f"{round(duration, 2)}s"
task.error = error_msg
try:
log_path = task_log_service.save_task_log(task, error=error_msg, source_type='adhoc')
created_log = task_log_service.index_log_file(log_path)
if created_log:
await ws_manager.broadcast({
"type": "task_log_created",
"data": created_log.dict()
})
except Exception:
pass
await task_repo.update(db_task, status="failed", completed_at=task.end_time, error_message=error_msg)
await db_session.commit()
await notification_service.notify_task_failed(task_name=task.name, target=request.target, error=error_msg[:200])
return AdHocCommandResult(
target=request.target,
command=request.command,
success=False,
return_code=-1,
stdout="",
stderr=error_msg,
duration=round(duration, 2)
)
except Exception as e:
duration = perf_counter() - start_time_perf
error_msg = f"Erreur interne: {str(e)}"
task.status = "failed"
task.progress = 100
task.end_time = datetime.now(timezone.utc)
task.duration = f"{round(duration, 2)}s"
task.error = error_msg
try:
log_path = task_log_service.save_task_log(task, error=error_msg, source_type='adhoc')
created_log = task_log_service.index_log_file(log_path)
if created_log:
await ws_manager.broadcast({
"type": "task_log_created",
"data": created_log.dict()
})
except Exception:
pass
await task_repo.update(db_task, status="failed", completed_at=task.end_time, error_message=error_msg)
await db_session.commit()
await notification_service.notify_task_failed(task_name=task.name, target=request.target, error=error_msg[:200])
return AdHocCommandResult(
target=request.target,
command=request.command,
success=False,
return_code=-1,
stdout="",
stderr=error_msg,
duration=round(duration, 2)
)