"""Docker container actions service. Provides methods to execute actions on Docker containers via SSH. """ from __future__ import annotations import asyncio import logging from typing import Dict, Any, Optional try: import asyncssh except ModuleNotFoundError: # pragma: no cover asyncssh = None from app.core.config import settings from app.models.database import async_session_maker from app.crud.host import HostRepository from app.crud.docker_container import DockerContainerRepository logger = logging.getLogger("homelab.docker.actions") # Standard log prefix LOG_PREFIX = "[DOCKER]" class DockerActionError(Exception): """Error executing Docker action.""" pass class DockerActionsService: """Service for executing Docker container actions via SSH.""" def __init__(self): self.ssh_key_path = settings.ssh_key_path self.ssh_user = settings.ssh_user self.connect_timeout = 5 self.exec_timeout = 30 # Longer timeout for actions self._locks: Dict[str, asyncio.Lock] = {} def _get_lock(self, container_key: str) -> asyncio.Lock: """Get or create a lock for a container to prevent concurrent actions.""" if container_key not in self._locks: self._locks[container_key] = asyncio.Lock() return self._locks[container_key] async def _ssh_connect(self, host_ip: str) -> asyncssh.SSHClientConnection: """Establish SSH connection to a host.""" if asyncssh is None: raise DockerActionError( "Missing dependency: asyncssh. Install it to enable Docker container actions over SSH." ) try: conn = await asyncio.wait_for( asyncssh.connect( host_ip, username=self.ssh_user, client_keys=[self.ssh_key_path], known_hosts=None, ), timeout=self.connect_timeout ) return conn except asyncio.TimeoutError: raise DockerActionError(f"SSH connection timeout to {host_ip}") except asyncssh.Error as e: raise DockerActionError(f"SSH error: {e}") except Exception as e: raise DockerActionError(f"Connection failed: {e}") async def _ssh_exec( self, conn: asyncssh.SSHClientConnection, command: str, timeout: Optional[int] = None ) -> Dict[str, Any]: """Execute a command and return result dict.""" timeout = timeout or self.exec_timeout try: result = await asyncio.wait_for( conn.run(command, check=False), timeout=timeout ) return { "stdout": result.stdout or "", "stderr": result.stderr or "", "exit_code": result.exit_status, "success": result.exit_status == 0 } except asyncio.TimeoutError: raise DockerActionError(f"Command timeout") except Exception as e: raise DockerActionError(f"Execution error: {e}") async def _run_docker( self, conn: asyncssh.SSHClientConnection, cmd: str, use_sudo_state: Dict[str, Optional[bool]], timeout: Optional[int] = None, ) -> Dict[str, Any]: """Run a docker command, preferring sudo -n to avoid docker.sock permission issues. use_sudo_state is a mutable dict holding a cached decision for the current connection. """ def _is_sudo_unavailable(stderr: str) -> bool: s = (stderr or "").lower() return any( token in s for token in ( "sudo: a password is required", "sudo: no tty present", "not in the sudoers", "sudo: permission denied", "sudo: command not found", ) ) use_sudo = use_sudo_state.get("value") if use_sudo is True: return await self._ssh_exec(conn, f"sudo -n {cmd}", timeout=timeout) if use_sudo is False: return await self._ssh_exec(conn, cmd, timeout=timeout) # First try with sudo -n sudo_res = await self._ssh_exec(conn, f"sudo -n {cmd}", timeout=timeout) if sudo_res["success"]: use_sudo_state["value"] = True return sudo_res # If sudo is unavailable, fall back to plain docker if _is_sudo_unavailable(sudo_res.get("stderr", "")): use_sudo_state["value"] = False return await self._ssh_exec(conn, cmd, timeout=timeout) # Sudo ran but failed; try plain docker (rootless docker) plain_res = await self._ssh_exec(conn, cmd, timeout=timeout) if plain_res["success"]: use_sudo_state["value"] = False return plain_res # Keep sudo as preferred (best chance for next commands) use_sudo_state["value"] = True merged = (plain_res.get("stderr") or "").strip() sudo_err = (sudo_res.get("stderr") or "").strip() if sudo_err: merged = (merged + "\n" if merged else "") + f"sudo_err: {sudo_err}" plain_res["stderr"] = merged return plain_res async def _get_host_info(self, host_id: str) -> Dict[str, Any]: """Get host information from database.""" async with async_session_maker() as session: host_repo = HostRepository(session) host = await host_repo.get(host_id) if not host: raise DockerActionError(f"Host not found: {host_id}") return { "id": host.id, "name": host.name, "ip": host.ip_address } async def start_container( self, host_id: str, container_id: str ) -> Dict[str, Any]: """Start a stopped container.""" host = await self._get_host_info(host_id) lock_key = f"{host_id}:{container_id}" async with self._get_lock(lock_key): try: conn = await self._ssh_connect(host["ip"]) try: use_sudo_state: Dict[str, Optional[bool]] = {"value": None} result = await self._run_docker(conn, f"docker start {container_id}", use_sudo_state) return { "success": result["success"], "message": "Container started successfully" if result["success"] else "Failed to start container", "container_id": container_id, "action": "start", "output": result["stdout"].strip(), "error": result["stderr"].strip() if not result["success"] else None } finally: conn.close() except DockerActionError as e: return { "success": False, "message": str(e), "container_id": container_id, "action": "start", "output": None, "error": str(e) } async def stop_container( self, host_id: str, container_id: str, timeout: int = 10 ) -> Dict[str, Any]: """Stop a running container.""" host = await self._get_host_info(host_id) lock_key = f"{host_id}:{container_id}" async with self._get_lock(lock_key): try: conn = await self._ssh_connect(host["ip"]) try: use_sudo_state: Dict[str, Optional[bool]] = {"value": None} result = await self._ssh_exec( conn, f"sudo -n docker stop -t {timeout} {container_id}", timeout=timeout + 10 ) if not result["success"]: # Use the common docker runner to handle sudo fallback and rootless docker result = await self._run_docker( conn, f"docker stop -t {timeout} {container_id}", use_sudo_state, timeout=timeout + 10, ) return { "success": result["success"], "message": "Container stopped successfully" if result["success"] else "Failed to stop container", "container_id": container_id, "action": "stop", "output": result["stdout"].strip(), "error": result["stderr"].strip() if not result["success"] else None } finally: conn.close() except DockerActionError as e: return { "success": False, "message": str(e), "container_id": container_id, "action": "stop", "output": None, "error": str(e) } async def restart_container( self, host_id: str, container_id: str, timeout: int = 10 ) -> Dict[str, Any]: """Restart a container.""" host = await self._get_host_info(host_id) lock_key = f"{host_id}:{container_id}" async with self._get_lock(lock_key): try: conn = await self._ssh_connect(host["ip"]) try: use_sudo_state: Dict[str, Optional[bool]] = {"value": None} result = await self._ssh_exec( conn, f"sudo -n docker restart -t {timeout} {container_id}", timeout=timeout + 15 ) if not result["success"]: result = await self._run_docker( conn, f"docker restart -t {timeout} {container_id}", use_sudo_state, timeout=timeout + 15, ) return { "success": result["success"], "message": "Container restarted successfully" if result["success"] else "Failed to restart container", "container_id": container_id, "action": "restart", "output": result["stdout"].strip(), "error": result["stderr"].strip() if not result["success"] else None } finally: conn.close() except DockerActionError as e: return { "success": False, "message": str(e), "container_id": container_id, "action": "restart", "output": None, "error": str(e) } async def remove_container( self, host_id: str, container_id: str, force: bool = False, remove_volumes: bool = False ) -> Dict[str, Any]: """Remove a container.""" host = await self._get_host_info(host_id) lock_key = f"{host_id}:{container_id}" async with self._get_lock(lock_key): try: conn = await self._ssh_connect(host["ip"]) try: use_sudo_state: Dict[str, Optional[bool]] = {"value": None} flags = [] if force: flags.append("-f") if remove_volumes: flags.append("-v") flags_str = " ".join(flags) result = await self._run_docker( conn, f"docker rm {flags_str} {container_id}", use_sudo_state, ) return { "success": result["success"], "message": "Container removed successfully" if result["success"] else "Failed to remove container", "container_id": container_id, "action": "remove", "output": result["stdout"].strip(), "error": result["stderr"].strip() if not result["success"] else None } finally: conn.close() except DockerActionError as e: return { "success": False, "message": str(e), "container_id": container_id, "action": "remove", "output": None, "error": str(e) } async def get_container_logs( self, host_id: str, container_id: str, tail: int = 200, timestamps: bool = False, since: Optional[str] = None ) -> Dict[str, Any]: """Get container logs.""" host = await self._get_host_info(host_id) try: conn = await self._ssh_connect(host["ip"]) try: use_sudo_state: Dict[str, Optional[bool]] = {"value": None} flags = [f"--tail {tail}"] if timestamps: flags.append("--timestamps") if since: flags.append(f"--since {since}") flags_str = " ".join(flags) result = await self._run_docker( conn, f"docker logs {flags_str} {container_id} 2>&1", use_sudo_state, timeout=30, ) # Get container name name_result = await self._run_docker( conn, f"docker inspect --format '{{{{.Name}}}}' {container_id}", use_sudo_state, ) container_name = name_result["stdout"].strip().lstrip("/") logs = result["stdout"] lines = len(logs.split('\n')) if logs else 0 return { "container_id": container_id, "container_name": container_name, "logs": logs, "lines": lines } finally: conn.close() except DockerActionError as e: return { "container_id": container_id, "container_name": "", "logs": f"Error retrieving logs: {e}", "lines": 0 } async def inspect_container( self, host_id: str, container_id: str ) -> Dict[str, Any]: """Get detailed container information.""" host = await self._get_host_info(host_id) try: conn = await self._ssh_connect(host["ip"]) try: use_sudo_state: Dict[str, Optional[bool]] = {"value": None} result = await self._run_docker( conn, f"docker inspect {container_id}", use_sudo_state, ) if result["success"]: import json try: inspect_data = json.loads(result["stdout"]) if isinstance(inspect_data, list) and len(inspect_data) > 0: inspect_data = inspect_data[0] container_name = inspect_data.get("Name", "").lstrip("/") return { "container_id": container_id, "container_name": container_name, "inspect_data": inspect_data } except json.JSONDecodeError: pass return { "container_id": container_id, "container_name": "", "inspect_data": {"error": result["stderr"] or "Failed to parse inspect data"} } finally: conn.close() except DockerActionError as e: return { "container_id": container_id, "container_name": "", "inspect_data": {"error": str(e)} } async def redeploy_container( self, host_id: str, container_id: str ) -> Dict[str, Any]: """Redeploy a container by pulling latest image and recreating. This only works for containers started with docker-compose or with enough metadata to recreate them. """ host = await self._get_host_info(host_id) lock_key = f"{host_id}:{container_id}" async with self._get_lock(lock_key): try: conn = await self._ssh_connect(host["ip"]) try: use_sudo_state: Dict[str, Optional[bool]] = {"value": None} # Get container info first inspect_result = await self._run_docker( conn, f"docker inspect --format '{{{{.Config.Image}}}}' {container_id}", use_sudo_state, ) if not inspect_result["success"]: return { "success": False, "message": "Failed to get container image", "container_id": container_id, "action": "redeploy", "output": None, "error": inspect_result["stderr"] } image = inspect_result["stdout"].strip() # Check if it's a compose container labels_result = await self._run_docker( conn, f"docker inspect --format '{{{{index .Config.Labels \"com.docker.compose.project.working_dir\"}}}}' {container_id}", use_sudo_state, ) compose_dir = labels_result["stdout"].strip() if compose_dir: # Use docker-compose to redeploy service_result = await self._run_docker( conn, f"docker inspect --format '{{{{index .Config.Labels \"com.docker.compose.service\"}}}}' {container_id}", use_sudo_state, ) service_name = service_result["stdout"].strip() # Pull and recreate with docker-compose # Keep working directory change in shell, run docker via sudo -n result = await self._ssh_exec( conn, f"cd {compose_dir} && sudo -n docker compose pull {service_name} && sudo -n docker compose up -d {service_name}", timeout=120, ) if not result["success"]: # Fallback: try without sudo (rootless docker / sudo unavailable) result = await self._ssh_exec( conn, f"cd {compose_dir} && docker compose pull {service_name} && docker compose up -d {service_name}", timeout=120, ) else: # Simple pull and restart pull_result = await self._run_docker( conn, f"docker pull {image}", use_sudo_state, timeout=120, ) if not pull_result["success"]: return { "success": False, "message": "Failed to pull image", "container_id": container_id, "action": "redeploy", "output": pull_result["stdout"], "error": pull_result["stderr"] } # Restart container with new image result = await self._run_docker( conn, f"docker restart {container_id}", use_sudo_state, ) return { "success": result["success"], "message": "Container redeployed successfully" if result["success"] else "Failed to redeploy container", "container_id": container_id, "action": "redeploy", "output": result["stdout"].strip(), "error": result["stderr"].strip() if not result["success"] else None } finally: conn.close() except DockerActionError as e: return { "success": False, "message": str(e), "container_id": container_id, "action": "redeploy", "output": None, "error": str(e) } async def remove_image( self, host_id: str, image_id: str, force: bool = False ) -> Dict[str, Any]: """Remove a Docker image.""" host = await self._get_host_info(host_id) try: conn = await self._ssh_connect(host["ip"]) try: use_sudo_state: Dict[str, Optional[bool]] = {"value": None} flags = "-f" if force else "" result = await self._run_docker( conn, f"docker rmi {flags} {image_id}".strip(), use_sudo_state, ) return { "success": result["success"], "message": "Image removed successfully" if result["success"] else "Failed to remove image", "image_id": image_id, "action": "remove", "output": result["stdout"].strip(), "error": result["stderr"].strip() if not result["success"] else None } finally: conn.close() except DockerActionError as e: return { "success": False, "message": str(e), "image_id": image_id, "action": "remove", "output": None, "error": str(e) } # Singleton instance docker_actions = DockerActionsService()