homelab_automation/app/services/docker_actions.py
Bruno Charest 68a9b0f390
Some checks failed
Tests / Backend Tests (Python) (3.10) (push) Has been cancelled
Tests / Backend Tests (Python) (3.11) (push) Has been cancelled
Tests / Backend Tests (Python) (3.12) (push) Has been cancelled
Tests / Frontend Tests (JS) (push) Has been cancelled
Tests / Integration Tests (push) Has been cancelled
Tests / All Tests Passed (push) Has been cancelled
Remove Node.js cache files containing npm vulnerability data for vitest and vite packages
2025-12-15 20:36:06 -05:00

618 lines
23 KiB
Python

"""Docker container actions service.
Provides methods to execute actions on Docker containers via SSH.
"""
from __future__ import annotations
import asyncio
import logging
from typing import Dict, Any, Optional
try:
import asyncssh
except ModuleNotFoundError: # pragma: no cover
asyncssh = None
from app.core.config import settings
from app.models.database import async_session_maker
from app.crud.host import HostRepository
from app.crud.docker_container import DockerContainerRepository
logger = logging.getLogger("homelab.docker.actions")
# Standard log prefix
LOG_PREFIX = "[DOCKER]"
class DockerActionError(Exception):
"""Error executing Docker action."""
pass
class DockerActionsService:
"""Service for executing Docker container actions via SSH."""
def __init__(self):
self.ssh_key_path = settings.ssh_key_path
self.ssh_user = settings.ssh_user
self.connect_timeout = 5
self.exec_timeout = 30 # Longer timeout for actions
self._locks: Dict[str, asyncio.Lock] = {}
def _get_lock(self, container_key: str) -> asyncio.Lock:
"""Get or create a lock for a container to prevent concurrent actions."""
if container_key not in self._locks:
self._locks[container_key] = asyncio.Lock()
return self._locks[container_key]
async def _ssh_connect(self, host_ip: str) -> asyncssh.SSHClientConnection:
"""Establish SSH connection to a host."""
if asyncssh is None:
raise DockerActionError(
"Missing dependency: asyncssh. Install it to enable Docker container actions over SSH."
)
try:
conn = await asyncio.wait_for(
asyncssh.connect(
host_ip,
username=self.ssh_user,
client_keys=[self.ssh_key_path],
known_hosts=None,
),
timeout=self.connect_timeout
)
return conn
except asyncio.TimeoutError:
raise DockerActionError(f"SSH connection timeout to {host_ip}")
except asyncssh.Error as e:
raise DockerActionError(f"SSH error: {e}")
except Exception as e:
raise DockerActionError(f"Connection failed: {e}")
async def _ssh_exec(
self,
conn: asyncssh.SSHClientConnection,
command: str,
timeout: Optional[int] = None
) -> Dict[str, Any]:
"""Execute a command and return result dict."""
timeout = timeout or self.exec_timeout
try:
result = await asyncio.wait_for(
conn.run(command, check=False),
timeout=timeout
)
return {
"stdout": result.stdout or "",
"stderr": result.stderr or "",
"exit_code": result.exit_status,
"success": result.exit_status == 0
}
except asyncio.TimeoutError:
raise DockerActionError(f"Command timeout")
except Exception as e:
raise DockerActionError(f"Execution error: {e}")
async def _run_docker(
self,
conn: asyncssh.SSHClientConnection,
cmd: str,
use_sudo_state: Dict[str, Optional[bool]],
timeout: Optional[int] = None,
) -> Dict[str, Any]:
"""Run a docker command, preferring sudo -n to avoid docker.sock permission issues.
use_sudo_state is a mutable dict holding a cached decision for the current connection.
"""
def _is_sudo_unavailable(stderr: str) -> bool:
s = (stderr or "").lower()
return any(
token in s
for token in (
"sudo: a password is required",
"sudo: no tty present",
"not in the sudoers",
"sudo: permission denied",
"sudo: command not found",
)
)
use_sudo = use_sudo_state.get("value")
if use_sudo is True:
return await self._ssh_exec(conn, f"sudo -n {cmd}", timeout=timeout)
if use_sudo is False:
return await self._ssh_exec(conn, cmd, timeout=timeout)
# First try with sudo -n
sudo_res = await self._ssh_exec(conn, f"sudo -n {cmd}", timeout=timeout)
if sudo_res["success"]:
use_sudo_state["value"] = True
return sudo_res
# If sudo is unavailable, fall back to plain docker
if _is_sudo_unavailable(sudo_res.get("stderr", "")):
use_sudo_state["value"] = False
return await self._ssh_exec(conn, cmd, timeout=timeout)
# Sudo ran but failed; try plain docker (rootless docker)
plain_res = await self._ssh_exec(conn, cmd, timeout=timeout)
if plain_res["success"]:
use_sudo_state["value"] = False
return plain_res
# Keep sudo as preferred (best chance for next commands)
use_sudo_state["value"] = True
merged = (plain_res.get("stderr") or "").strip()
sudo_err = (sudo_res.get("stderr") or "").strip()
if sudo_err:
merged = (merged + "\n" if merged else "") + f"sudo_err: {sudo_err}"
plain_res["stderr"] = merged
return plain_res
async def _get_host_info(self, host_id: str) -> Dict[str, Any]:
"""Get host information from database."""
async with async_session_maker() as session:
host_repo = HostRepository(session)
host = await host_repo.get(host_id)
if not host:
raise DockerActionError(f"Host not found: {host_id}")
return {
"id": host.id,
"name": host.name,
"ip": host.ip_address
}
async def start_container(
self,
host_id: str,
container_id: str
) -> Dict[str, Any]:
"""Start a stopped container."""
host = await self._get_host_info(host_id)
lock_key = f"{host_id}:{container_id}"
async with self._get_lock(lock_key):
try:
conn = await self._ssh_connect(host["ip"])
try:
use_sudo_state: Dict[str, Optional[bool]] = {"value": None}
result = await self._run_docker(conn, f"docker start {container_id}", use_sudo_state)
return {
"success": result["success"],
"message": "Container started successfully" if result["success"] else "Failed to start container",
"container_id": container_id,
"action": "start",
"output": result["stdout"].strip(),
"error": result["stderr"].strip() if not result["success"] else None
}
finally:
conn.close()
except DockerActionError as e:
return {
"success": False,
"message": str(e),
"container_id": container_id,
"action": "start",
"output": None,
"error": str(e)
}
async def stop_container(
self,
host_id: str,
container_id: str,
timeout: int = 10
) -> Dict[str, Any]:
"""Stop a running container."""
host = await self._get_host_info(host_id)
lock_key = f"{host_id}:{container_id}"
async with self._get_lock(lock_key):
try:
conn = await self._ssh_connect(host["ip"])
try:
use_sudo_state: Dict[str, Optional[bool]] = {"value": None}
result = await self._ssh_exec(
conn,
f"sudo -n docker stop -t {timeout} {container_id}",
timeout=timeout + 10
)
if not result["success"]:
# Use the common docker runner to handle sudo fallback and rootless docker
result = await self._run_docker(
conn,
f"docker stop -t {timeout} {container_id}",
use_sudo_state,
timeout=timeout + 10,
)
return {
"success": result["success"],
"message": "Container stopped successfully" if result["success"] else "Failed to stop container",
"container_id": container_id,
"action": "stop",
"output": result["stdout"].strip(),
"error": result["stderr"].strip() if not result["success"] else None
}
finally:
conn.close()
except DockerActionError as e:
return {
"success": False,
"message": str(e),
"container_id": container_id,
"action": "stop",
"output": None,
"error": str(e)
}
async def restart_container(
self,
host_id: str,
container_id: str,
timeout: int = 10
) -> Dict[str, Any]:
"""Restart a container."""
host = await self._get_host_info(host_id)
lock_key = f"{host_id}:{container_id}"
async with self._get_lock(lock_key):
try:
conn = await self._ssh_connect(host["ip"])
try:
use_sudo_state: Dict[str, Optional[bool]] = {"value": None}
result = await self._ssh_exec(
conn,
f"sudo -n docker restart -t {timeout} {container_id}",
timeout=timeout + 15
)
if not result["success"]:
result = await self._run_docker(
conn,
f"docker restart -t {timeout} {container_id}",
use_sudo_state,
timeout=timeout + 15,
)
return {
"success": result["success"],
"message": "Container restarted successfully" if result["success"] else "Failed to restart container",
"container_id": container_id,
"action": "restart",
"output": result["stdout"].strip(),
"error": result["stderr"].strip() if not result["success"] else None
}
finally:
conn.close()
except DockerActionError as e:
return {
"success": False,
"message": str(e),
"container_id": container_id,
"action": "restart",
"output": None,
"error": str(e)
}
async def remove_container(
self,
host_id: str,
container_id: str,
force: bool = False,
remove_volumes: bool = False
) -> Dict[str, Any]:
"""Remove a container."""
host = await self._get_host_info(host_id)
lock_key = f"{host_id}:{container_id}"
async with self._get_lock(lock_key):
try:
conn = await self._ssh_connect(host["ip"])
try:
use_sudo_state: Dict[str, Optional[bool]] = {"value": None}
flags = []
if force:
flags.append("-f")
if remove_volumes:
flags.append("-v")
flags_str = " ".join(flags)
result = await self._run_docker(
conn,
f"docker rm {flags_str} {container_id}",
use_sudo_state,
)
return {
"success": result["success"],
"message": "Container removed successfully" if result["success"] else "Failed to remove container",
"container_id": container_id,
"action": "remove",
"output": result["stdout"].strip(),
"error": result["stderr"].strip() if not result["success"] else None
}
finally:
conn.close()
except DockerActionError as e:
return {
"success": False,
"message": str(e),
"container_id": container_id,
"action": "remove",
"output": None,
"error": str(e)
}
async def get_container_logs(
self,
host_id: str,
container_id: str,
tail: int = 200,
timestamps: bool = False,
since: Optional[str] = None
) -> Dict[str, Any]:
"""Get container logs."""
host = await self._get_host_info(host_id)
try:
conn = await self._ssh_connect(host["ip"])
try:
use_sudo_state: Dict[str, Optional[bool]] = {"value": None}
flags = [f"--tail {tail}"]
if timestamps:
flags.append("--timestamps")
if since:
flags.append(f"--since {since}")
flags_str = " ".join(flags)
result = await self._run_docker(
conn,
f"docker logs {flags_str} {container_id} 2>&1",
use_sudo_state,
timeout=30,
)
# Get container name
name_result = await self._run_docker(
conn,
f"docker inspect --format '{{{{.Name}}}}' {container_id}",
use_sudo_state,
)
container_name = name_result["stdout"].strip().lstrip("/")
logs = result["stdout"]
lines = len(logs.split('\n')) if logs else 0
return {
"container_id": container_id,
"container_name": container_name,
"logs": logs,
"lines": lines
}
finally:
conn.close()
except DockerActionError as e:
return {
"container_id": container_id,
"container_name": "",
"logs": f"Error retrieving logs: {e}",
"lines": 0
}
async def inspect_container(
self,
host_id: str,
container_id: str
) -> Dict[str, Any]:
"""Get detailed container information."""
host = await self._get_host_info(host_id)
try:
conn = await self._ssh_connect(host["ip"])
try:
use_sudo_state: Dict[str, Optional[bool]] = {"value": None}
result = await self._run_docker(
conn,
f"docker inspect {container_id}",
use_sudo_state,
)
if result["success"]:
import json
try:
inspect_data = json.loads(result["stdout"])
if isinstance(inspect_data, list) and len(inspect_data) > 0:
inspect_data = inspect_data[0]
container_name = inspect_data.get("Name", "").lstrip("/")
return {
"container_id": container_id,
"container_name": container_name,
"inspect_data": inspect_data
}
except json.JSONDecodeError:
pass
return {
"container_id": container_id,
"container_name": "",
"inspect_data": {"error": result["stderr"] or "Failed to parse inspect data"}
}
finally:
conn.close()
except DockerActionError as e:
return {
"container_id": container_id,
"container_name": "",
"inspect_data": {"error": str(e)}
}
async def redeploy_container(
self,
host_id: str,
container_id: str
) -> Dict[str, Any]:
"""Redeploy a container by pulling latest image and recreating.
This only works for containers started with docker-compose or with
enough metadata to recreate them.
"""
host = await self._get_host_info(host_id)
lock_key = f"{host_id}:{container_id}"
async with self._get_lock(lock_key):
try:
conn = await self._ssh_connect(host["ip"])
try:
use_sudo_state: Dict[str, Optional[bool]] = {"value": None}
# Get container info first
inspect_result = await self._run_docker(
conn,
f"docker inspect --format '{{{{.Config.Image}}}}' {container_id}",
use_sudo_state,
)
if not inspect_result["success"]:
return {
"success": False,
"message": "Failed to get container image",
"container_id": container_id,
"action": "redeploy",
"output": None,
"error": inspect_result["stderr"]
}
image = inspect_result["stdout"].strip()
# Check if it's a compose container
labels_result = await self._run_docker(
conn,
f"docker inspect --format '{{{{index .Config.Labels \"com.docker.compose.project.working_dir\"}}}}' {container_id}",
use_sudo_state,
)
compose_dir = labels_result["stdout"].strip()
if compose_dir:
# Use docker-compose to redeploy
service_result = await self._run_docker(
conn,
f"docker inspect --format '{{{{index .Config.Labels \"com.docker.compose.service\"}}}}' {container_id}",
use_sudo_state,
)
service_name = service_result["stdout"].strip()
# Pull and recreate with docker-compose
# Keep working directory change in shell, run docker via sudo -n
result = await self._ssh_exec(
conn,
f"cd {compose_dir} && sudo -n docker compose pull {service_name} && sudo -n docker compose up -d {service_name}",
timeout=120,
)
if not result["success"]:
# Fallback: try without sudo (rootless docker / sudo unavailable)
result = await self._ssh_exec(
conn,
f"cd {compose_dir} && docker compose pull {service_name} && docker compose up -d {service_name}",
timeout=120,
)
else:
# Simple pull and restart
pull_result = await self._run_docker(
conn,
f"docker pull {image}",
use_sudo_state,
timeout=120,
)
if not pull_result["success"]:
return {
"success": False,
"message": "Failed to pull image",
"container_id": container_id,
"action": "redeploy",
"output": pull_result["stdout"],
"error": pull_result["stderr"]
}
# Restart container with new image
result = await self._run_docker(
conn,
f"docker restart {container_id}",
use_sudo_state,
)
return {
"success": result["success"],
"message": "Container redeployed successfully" if result["success"] else "Failed to redeploy container",
"container_id": container_id,
"action": "redeploy",
"output": result["stdout"].strip(),
"error": result["stderr"].strip() if not result["success"] else None
}
finally:
conn.close()
except DockerActionError as e:
return {
"success": False,
"message": str(e),
"container_id": container_id,
"action": "redeploy",
"output": None,
"error": str(e)
}
async def remove_image(
self,
host_id: str,
image_id: str,
force: bool = False
) -> Dict[str, Any]:
"""Remove a Docker image."""
host = await self._get_host_info(host_id)
try:
conn = await self._ssh_connect(host["ip"])
try:
use_sudo_state: Dict[str, Optional[bool]] = {"value": None}
flags = "-f" if force else ""
result = await self._run_docker(
conn,
f"docker rmi {flags} {image_id}".strip(),
use_sudo_state,
)
return {
"success": result["success"],
"message": "Image removed successfully" if result["success"] else "Failed to remove image",
"image_id": image_id,
"action": "remove",
"output": result["stdout"].strip(),
"error": result["stderr"].strip() if not result["success"] else None
}
finally:
conn.close()
except DockerActionError as e:
return {
"success": False,
"message": str(e),
"image_id": image_id,
"action": "remove",
"output": None,
"error": str(e)
}
# Singleton instance
docker_actions = DockerActionsService()