Some checks failed
Tests / Backend Tests (Python) (3.10) (push) Has been cancelled
Tests / Backend Tests (Python) (3.11) (push) Has been cancelled
Tests / Backend Tests (Python) (3.12) (push) Has been cancelled
Tests / Frontend Tests (JS) (push) Has been cancelled
Tests / Integration Tests (push) Has been cancelled
Tests / All Tests Passed (push) Has been cancelled
618 lines
23 KiB
Python
618 lines
23 KiB
Python
"""Docker container actions service.
|
|
|
|
Provides methods to execute actions on Docker containers via SSH.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import logging
|
|
from typing import Dict, Any, Optional
|
|
|
|
try:
|
|
import asyncssh
|
|
except ModuleNotFoundError: # pragma: no cover
|
|
asyncssh = None
|
|
|
|
from app.core.config import settings
|
|
from app.models.database import async_session_maker
|
|
from app.crud.host import HostRepository
|
|
from app.crud.docker_container import DockerContainerRepository
|
|
|
|
logger = logging.getLogger("homelab.docker.actions")
|
|
|
|
# Standard log prefix
|
|
LOG_PREFIX = "[DOCKER]"
|
|
|
|
|
|
class DockerActionError(Exception):
|
|
"""Error executing Docker action."""
|
|
pass
|
|
|
|
|
|
class DockerActionsService:
|
|
"""Service for executing Docker container actions via SSH."""
|
|
|
|
def __init__(self):
|
|
self.ssh_key_path = settings.ssh_key_path
|
|
self.ssh_user = settings.ssh_user
|
|
self.connect_timeout = 5
|
|
self.exec_timeout = 30 # Longer timeout for actions
|
|
self._locks: Dict[str, asyncio.Lock] = {}
|
|
|
|
def _get_lock(self, container_key: str) -> asyncio.Lock:
|
|
"""Get or create a lock for a container to prevent concurrent actions."""
|
|
if container_key not in self._locks:
|
|
self._locks[container_key] = asyncio.Lock()
|
|
return self._locks[container_key]
|
|
|
|
async def _ssh_connect(self, host_ip: str) -> asyncssh.SSHClientConnection:
|
|
"""Establish SSH connection to a host."""
|
|
if asyncssh is None:
|
|
raise DockerActionError(
|
|
"Missing dependency: asyncssh. Install it to enable Docker container actions over SSH."
|
|
)
|
|
try:
|
|
conn = await asyncio.wait_for(
|
|
asyncssh.connect(
|
|
host_ip,
|
|
username=self.ssh_user,
|
|
client_keys=[self.ssh_key_path],
|
|
known_hosts=None,
|
|
),
|
|
timeout=self.connect_timeout
|
|
)
|
|
return conn
|
|
except asyncio.TimeoutError:
|
|
raise DockerActionError(f"SSH connection timeout to {host_ip}")
|
|
except asyncssh.Error as e:
|
|
raise DockerActionError(f"SSH error: {e}")
|
|
except Exception as e:
|
|
raise DockerActionError(f"Connection failed: {e}")
|
|
|
|
async def _ssh_exec(
|
|
self,
|
|
conn: asyncssh.SSHClientConnection,
|
|
command: str,
|
|
timeout: Optional[int] = None
|
|
) -> Dict[str, Any]:
|
|
"""Execute a command and return result dict."""
|
|
timeout = timeout or self.exec_timeout
|
|
try:
|
|
result = await asyncio.wait_for(
|
|
conn.run(command, check=False),
|
|
timeout=timeout
|
|
)
|
|
return {
|
|
"stdout": result.stdout or "",
|
|
"stderr": result.stderr or "",
|
|
"exit_code": result.exit_status,
|
|
"success": result.exit_status == 0
|
|
}
|
|
except asyncio.TimeoutError:
|
|
raise DockerActionError(f"Command timeout")
|
|
except Exception as e:
|
|
raise DockerActionError(f"Execution error: {e}")
|
|
|
|
async def _run_docker(
|
|
self,
|
|
conn: asyncssh.SSHClientConnection,
|
|
cmd: str,
|
|
use_sudo_state: Dict[str, Optional[bool]],
|
|
timeout: Optional[int] = None,
|
|
) -> Dict[str, Any]:
|
|
"""Run a docker command, preferring sudo -n to avoid docker.sock permission issues.
|
|
|
|
use_sudo_state is a mutable dict holding a cached decision for the current connection.
|
|
"""
|
|
|
|
def _is_sudo_unavailable(stderr: str) -> bool:
|
|
s = (stderr or "").lower()
|
|
return any(
|
|
token in s
|
|
for token in (
|
|
"sudo: a password is required",
|
|
"sudo: no tty present",
|
|
"not in the sudoers",
|
|
"sudo: permission denied",
|
|
"sudo: command not found",
|
|
)
|
|
)
|
|
|
|
use_sudo = use_sudo_state.get("value")
|
|
|
|
if use_sudo is True:
|
|
return await self._ssh_exec(conn, f"sudo -n {cmd}", timeout=timeout)
|
|
if use_sudo is False:
|
|
return await self._ssh_exec(conn, cmd, timeout=timeout)
|
|
|
|
# First try with sudo -n
|
|
sudo_res = await self._ssh_exec(conn, f"sudo -n {cmd}", timeout=timeout)
|
|
if sudo_res["success"]:
|
|
use_sudo_state["value"] = True
|
|
return sudo_res
|
|
|
|
# If sudo is unavailable, fall back to plain docker
|
|
if _is_sudo_unavailable(sudo_res.get("stderr", "")):
|
|
use_sudo_state["value"] = False
|
|
return await self._ssh_exec(conn, cmd, timeout=timeout)
|
|
|
|
# Sudo ran but failed; try plain docker (rootless docker)
|
|
plain_res = await self._ssh_exec(conn, cmd, timeout=timeout)
|
|
if plain_res["success"]:
|
|
use_sudo_state["value"] = False
|
|
return plain_res
|
|
|
|
# Keep sudo as preferred (best chance for next commands)
|
|
use_sudo_state["value"] = True
|
|
merged = (plain_res.get("stderr") or "").strip()
|
|
sudo_err = (sudo_res.get("stderr") or "").strip()
|
|
if sudo_err:
|
|
merged = (merged + "\n" if merged else "") + f"sudo_err: {sudo_err}"
|
|
plain_res["stderr"] = merged
|
|
return plain_res
|
|
|
|
async def _get_host_info(self, host_id: str) -> Dict[str, Any]:
|
|
"""Get host information from database."""
|
|
async with async_session_maker() as session:
|
|
host_repo = HostRepository(session)
|
|
host = await host_repo.get(host_id)
|
|
|
|
if not host:
|
|
raise DockerActionError(f"Host not found: {host_id}")
|
|
|
|
return {
|
|
"id": host.id,
|
|
"name": host.name,
|
|
"ip": host.ip_address
|
|
}
|
|
|
|
async def start_container(
|
|
self,
|
|
host_id: str,
|
|
container_id: str
|
|
) -> Dict[str, Any]:
|
|
"""Start a stopped container."""
|
|
host = await self._get_host_info(host_id)
|
|
lock_key = f"{host_id}:{container_id}"
|
|
|
|
async with self._get_lock(lock_key):
|
|
try:
|
|
conn = await self._ssh_connect(host["ip"])
|
|
try:
|
|
use_sudo_state: Dict[str, Optional[bool]] = {"value": None}
|
|
result = await self._run_docker(conn, f"docker start {container_id}", use_sudo_state)
|
|
|
|
return {
|
|
"success": result["success"],
|
|
"message": "Container started successfully" if result["success"] else "Failed to start container",
|
|
"container_id": container_id,
|
|
"action": "start",
|
|
"output": result["stdout"].strip(),
|
|
"error": result["stderr"].strip() if not result["success"] else None
|
|
}
|
|
finally:
|
|
conn.close()
|
|
except DockerActionError as e:
|
|
return {
|
|
"success": False,
|
|
"message": str(e),
|
|
"container_id": container_id,
|
|
"action": "start",
|
|
"output": None,
|
|
"error": str(e)
|
|
}
|
|
|
|
async def stop_container(
|
|
self,
|
|
host_id: str,
|
|
container_id: str,
|
|
timeout: int = 10
|
|
) -> Dict[str, Any]:
|
|
"""Stop a running container."""
|
|
host = await self._get_host_info(host_id)
|
|
lock_key = f"{host_id}:{container_id}"
|
|
|
|
async with self._get_lock(lock_key):
|
|
try:
|
|
conn = await self._ssh_connect(host["ip"])
|
|
try:
|
|
use_sudo_state: Dict[str, Optional[bool]] = {"value": None}
|
|
result = await self._ssh_exec(
|
|
conn,
|
|
f"sudo -n docker stop -t {timeout} {container_id}",
|
|
timeout=timeout + 10
|
|
)
|
|
|
|
if not result["success"]:
|
|
# Use the common docker runner to handle sudo fallback and rootless docker
|
|
result = await self._run_docker(
|
|
conn,
|
|
f"docker stop -t {timeout} {container_id}",
|
|
use_sudo_state,
|
|
timeout=timeout + 10,
|
|
)
|
|
|
|
return {
|
|
"success": result["success"],
|
|
"message": "Container stopped successfully" if result["success"] else "Failed to stop container",
|
|
"container_id": container_id,
|
|
"action": "stop",
|
|
"output": result["stdout"].strip(),
|
|
"error": result["stderr"].strip() if not result["success"] else None
|
|
}
|
|
finally:
|
|
conn.close()
|
|
except DockerActionError as e:
|
|
return {
|
|
"success": False,
|
|
"message": str(e),
|
|
"container_id": container_id,
|
|
"action": "stop",
|
|
"output": None,
|
|
"error": str(e)
|
|
}
|
|
|
|
async def restart_container(
|
|
self,
|
|
host_id: str,
|
|
container_id: str,
|
|
timeout: int = 10
|
|
) -> Dict[str, Any]:
|
|
"""Restart a container."""
|
|
host = await self._get_host_info(host_id)
|
|
lock_key = f"{host_id}:{container_id}"
|
|
|
|
async with self._get_lock(lock_key):
|
|
try:
|
|
conn = await self._ssh_connect(host["ip"])
|
|
try:
|
|
use_sudo_state: Dict[str, Optional[bool]] = {"value": None}
|
|
result = await self._ssh_exec(
|
|
conn,
|
|
f"sudo -n docker restart -t {timeout} {container_id}",
|
|
timeout=timeout + 15
|
|
)
|
|
|
|
if not result["success"]:
|
|
result = await self._run_docker(
|
|
conn,
|
|
f"docker restart -t {timeout} {container_id}",
|
|
use_sudo_state,
|
|
timeout=timeout + 15,
|
|
)
|
|
|
|
return {
|
|
"success": result["success"],
|
|
"message": "Container restarted successfully" if result["success"] else "Failed to restart container",
|
|
"container_id": container_id,
|
|
"action": "restart",
|
|
"output": result["stdout"].strip(),
|
|
"error": result["stderr"].strip() if not result["success"] else None
|
|
}
|
|
finally:
|
|
conn.close()
|
|
except DockerActionError as e:
|
|
return {
|
|
"success": False,
|
|
"message": str(e),
|
|
"container_id": container_id,
|
|
"action": "restart",
|
|
"output": None,
|
|
"error": str(e)
|
|
}
|
|
|
|
async def remove_container(
|
|
self,
|
|
host_id: str,
|
|
container_id: str,
|
|
force: bool = False,
|
|
remove_volumes: bool = False
|
|
) -> Dict[str, Any]:
|
|
"""Remove a container."""
|
|
host = await self._get_host_info(host_id)
|
|
lock_key = f"{host_id}:{container_id}"
|
|
|
|
async with self._get_lock(lock_key):
|
|
try:
|
|
conn = await self._ssh_connect(host["ip"])
|
|
try:
|
|
use_sudo_state: Dict[str, Optional[bool]] = {"value": None}
|
|
flags = []
|
|
if force:
|
|
flags.append("-f")
|
|
if remove_volumes:
|
|
flags.append("-v")
|
|
|
|
flags_str = " ".join(flags)
|
|
result = await self._run_docker(
|
|
conn,
|
|
f"docker rm {flags_str} {container_id}",
|
|
use_sudo_state,
|
|
)
|
|
|
|
return {
|
|
"success": result["success"],
|
|
"message": "Container removed successfully" if result["success"] else "Failed to remove container",
|
|
"container_id": container_id,
|
|
"action": "remove",
|
|
"output": result["stdout"].strip(),
|
|
"error": result["stderr"].strip() if not result["success"] else None
|
|
}
|
|
finally:
|
|
conn.close()
|
|
except DockerActionError as e:
|
|
return {
|
|
"success": False,
|
|
"message": str(e),
|
|
"container_id": container_id,
|
|
"action": "remove",
|
|
"output": None,
|
|
"error": str(e)
|
|
}
|
|
|
|
async def get_container_logs(
|
|
self,
|
|
host_id: str,
|
|
container_id: str,
|
|
tail: int = 200,
|
|
timestamps: bool = False,
|
|
since: Optional[str] = None
|
|
) -> Dict[str, Any]:
|
|
"""Get container logs."""
|
|
host = await self._get_host_info(host_id)
|
|
|
|
try:
|
|
conn = await self._ssh_connect(host["ip"])
|
|
try:
|
|
use_sudo_state: Dict[str, Optional[bool]] = {"value": None}
|
|
flags = [f"--tail {tail}"]
|
|
if timestamps:
|
|
flags.append("--timestamps")
|
|
if since:
|
|
flags.append(f"--since {since}")
|
|
|
|
flags_str = " ".join(flags)
|
|
result = await self._run_docker(
|
|
conn,
|
|
f"docker logs {flags_str} {container_id} 2>&1",
|
|
use_sudo_state,
|
|
timeout=30,
|
|
)
|
|
|
|
# Get container name
|
|
name_result = await self._run_docker(
|
|
conn,
|
|
f"docker inspect --format '{{{{.Name}}}}' {container_id}",
|
|
use_sudo_state,
|
|
)
|
|
container_name = name_result["stdout"].strip().lstrip("/")
|
|
|
|
logs = result["stdout"]
|
|
lines = len(logs.split('\n')) if logs else 0
|
|
|
|
return {
|
|
"container_id": container_id,
|
|
"container_name": container_name,
|
|
"logs": logs,
|
|
"lines": lines
|
|
}
|
|
finally:
|
|
conn.close()
|
|
except DockerActionError as e:
|
|
return {
|
|
"container_id": container_id,
|
|
"container_name": "",
|
|
"logs": f"Error retrieving logs: {e}",
|
|
"lines": 0
|
|
}
|
|
|
|
async def inspect_container(
|
|
self,
|
|
host_id: str,
|
|
container_id: str
|
|
) -> Dict[str, Any]:
|
|
"""Get detailed container information."""
|
|
host = await self._get_host_info(host_id)
|
|
|
|
try:
|
|
conn = await self._ssh_connect(host["ip"])
|
|
try:
|
|
use_sudo_state: Dict[str, Optional[bool]] = {"value": None}
|
|
result = await self._run_docker(
|
|
conn,
|
|
f"docker inspect {container_id}",
|
|
use_sudo_state,
|
|
)
|
|
|
|
if result["success"]:
|
|
import json
|
|
try:
|
|
inspect_data = json.loads(result["stdout"])
|
|
if isinstance(inspect_data, list) and len(inspect_data) > 0:
|
|
inspect_data = inspect_data[0]
|
|
|
|
container_name = inspect_data.get("Name", "").lstrip("/")
|
|
|
|
return {
|
|
"container_id": container_id,
|
|
"container_name": container_name,
|
|
"inspect_data": inspect_data
|
|
}
|
|
except json.JSONDecodeError:
|
|
pass
|
|
|
|
return {
|
|
"container_id": container_id,
|
|
"container_name": "",
|
|
"inspect_data": {"error": result["stderr"] or "Failed to parse inspect data"}
|
|
}
|
|
finally:
|
|
conn.close()
|
|
except DockerActionError as e:
|
|
return {
|
|
"container_id": container_id,
|
|
"container_name": "",
|
|
"inspect_data": {"error": str(e)}
|
|
}
|
|
|
|
async def redeploy_container(
|
|
self,
|
|
host_id: str,
|
|
container_id: str
|
|
) -> Dict[str, Any]:
|
|
"""Redeploy a container by pulling latest image and recreating.
|
|
|
|
This only works for containers started with docker-compose or with
|
|
enough metadata to recreate them.
|
|
"""
|
|
host = await self._get_host_info(host_id)
|
|
lock_key = f"{host_id}:{container_id}"
|
|
|
|
async with self._get_lock(lock_key):
|
|
try:
|
|
conn = await self._ssh_connect(host["ip"])
|
|
try:
|
|
use_sudo_state: Dict[str, Optional[bool]] = {"value": None}
|
|
# Get container info first
|
|
inspect_result = await self._run_docker(
|
|
conn,
|
|
f"docker inspect --format '{{{{.Config.Image}}}}' {container_id}",
|
|
use_sudo_state,
|
|
)
|
|
|
|
if not inspect_result["success"]:
|
|
return {
|
|
"success": False,
|
|
"message": "Failed to get container image",
|
|
"container_id": container_id,
|
|
"action": "redeploy",
|
|
"output": None,
|
|
"error": inspect_result["stderr"]
|
|
}
|
|
|
|
image = inspect_result["stdout"].strip()
|
|
|
|
# Check if it's a compose container
|
|
labels_result = await self._run_docker(
|
|
conn,
|
|
f"docker inspect --format '{{{{index .Config.Labels \"com.docker.compose.project.working_dir\"}}}}' {container_id}",
|
|
use_sudo_state,
|
|
)
|
|
|
|
compose_dir = labels_result["stdout"].strip()
|
|
|
|
if compose_dir:
|
|
# Use docker-compose to redeploy
|
|
service_result = await self._run_docker(
|
|
conn,
|
|
f"docker inspect --format '{{{{index .Config.Labels \"com.docker.compose.service\"}}}}' {container_id}",
|
|
use_sudo_state,
|
|
)
|
|
service_name = service_result["stdout"].strip()
|
|
|
|
# Pull and recreate with docker-compose
|
|
# Keep working directory change in shell, run docker via sudo -n
|
|
result = await self._ssh_exec(
|
|
conn,
|
|
f"cd {compose_dir} && sudo -n docker compose pull {service_name} && sudo -n docker compose up -d {service_name}",
|
|
timeout=120,
|
|
)
|
|
|
|
if not result["success"]:
|
|
# Fallback: try without sudo (rootless docker / sudo unavailable)
|
|
result = await self._ssh_exec(
|
|
conn,
|
|
f"cd {compose_dir} && docker compose pull {service_name} && docker compose up -d {service_name}",
|
|
timeout=120,
|
|
)
|
|
else:
|
|
# Simple pull and restart
|
|
pull_result = await self._run_docker(
|
|
conn,
|
|
f"docker pull {image}",
|
|
use_sudo_state,
|
|
timeout=120,
|
|
)
|
|
|
|
if not pull_result["success"]:
|
|
return {
|
|
"success": False,
|
|
"message": "Failed to pull image",
|
|
"container_id": container_id,
|
|
"action": "redeploy",
|
|
"output": pull_result["stdout"],
|
|
"error": pull_result["stderr"]
|
|
}
|
|
|
|
# Restart container with new image
|
|
result = await self._run_docker(
|
|
conn,
|
|
f"docker restart {container_id}",
|
|
use_sudo_state,
|
|
)
|
|
|
|
return {
|
|
"success": result["success"],
|
|
"message": "Container redeployed successfully" if result["success"] else "Failed to redeploy container",
|
|
"container_id": container_id,
|
|
"action": "redeploy",
|
|
"output": result["stdout"].strip(),
|
|
"error": result["stderr"].strip() if not result["success"] else None
|
|
}
|
|
finally:
|
|
conn.close()
|
|
except DockerActionError as e:
|
|
return {
|
|
"success": False,
|
|
"message": str(e),
|
|
"container_id": container_id,
|
|
"action": "redeploy",
|
|
"output": None,
|
|
"error": str(e)
|
|
}
|
|
|
|
|
|
async def remove_image(
|
|
self,
|
|
host_id: str,
|
|
image_id: str,
|
|
force: bool = False
|
|
) -> Dict[str, Any]:
|
|
"""Remove a Docker image."""
|
|
host = await self._get_host_info(host_id)
|
|
|
|
try:
|
|
conn = await self._ssh_connect(host["ip"])
|
|
try:
|
|
use_sudo_state: Dict[str, Optional[bool]] = {"value": None}
|
|
flags = "-f" if force else ""
|
|
result = await self._run_docker(
|
|
conn,
|
|
f"docker rmi {flags} {image_id}".strip(),
|
|
use_sudo_state,
|
|
)
|
|
|
|
return {
|
|
"success": result["success"],
|
|
"message": "Image removed successfully" if result["success"] else "Failed to remove image",
|
|
"image_id": image_id,
|
|
"action": "remove",
|
|
"output": result["stdout"].strip(),
|
|
"error": result["stderr"].strip() if not result["success"] else None
|
|
}
|
|
finally:
|
|
conn.close()
|
|
except DockerActionError as e:
|
|
return {
|
|
"success": False,
|
|
"message": str(e),
|
|
"image_id": image_id,
|
|
"action": "remove",
|
|
"output": None,
|
|
"error": str(e)
|
|
}
|
|
|
|
|
|
# Singleton instance
|
|
docker_actions = DockerActionsService()
|