ObsiGate/backend/main.py

import asyncio
import json as _json
import os
import re
import html as html_mod
import logging
import mimetypes
import secrets
import string
import time
from concurrent.futures import ThreadPoolExecutor
from contextlib import asynccontextmanager
from datetime import datetime
from functools import partial
from pathlib import Path
from typing import Optional, List, Dict, Any

import frontmatter
import mistune
from fastapi import FastAPI, HTTPException, Query, Body, Depends
from fastapi.staticfiles import StaticFiles
from fastapi.responses import HTMLResponse, FileResponse, Response, StreamingResponse
from pydantic import BaseModel, Field
from starlette.middleware.base import BaseHTTPMiddleware

from backend.indexer import (
    build_index,
    reload_index,
    index,
    path_index,
    vault_config,
    get_vault_data,
    get_vault_names,
    find_file_in_index,
    parse_markdown_file,
    _extract_tags,
    SUPPORTED_EXTENSIONS,
    update_single_file,
    remove_single_file,
    handle_file_move,
    remove_vault_from_index,
    add_vault_to_index,
)
from backend.search import search, get_all_tags, advanced_search, suggest_titles, suggest_tags
from backend.image_processor import preprocess_images
from backend.attachment_indexer import rescan_vault_attachments, get_attachment_stats
from backend.vault_settings import (
    get_vault_setting,
    update_vault_setting,
    get_all_vault_settings,
    delete_vault_setting,
)

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(name)s] %(levelname)s: %(message)s",
)
logger = logging.getLogger("obsigate")


# ---------------------------------------------------------------------------
# Pydantic response models
# ---------------------------------------------------------------------------

class VaultInfo(BaseModel):
    """Summary information about a configured vault."""
    name: str = Field(description="Display name of the vault")
    file_count: int = Field(description="Number of indexed files")
    tag_count: int = Field(description="Number of unique tags")
    type: str = Field(default="VAULT", description="Type of the vault mapping (VAULT or DIR)")


class BrowseItem(BaseModel):
    """A single entry (file or directory) returned by the browse endpoint."""
    name: str
    path: str
    type: str = Field(description="'file' or 'directory'")
    children_count: Optional[int] = None
    size: Optional[int] = None
    extension: Optional[str] = None


class BrowseResponse(BaseModel):
    """Paginated directory listing for a vault."""
    vault: str
    path: str
    items: List[BrowseItem]


class FileContentResponse(BaseModel):
    """Rendered file content with metadata."""
    vault: str
    path: str
    title: str
    tags: List[str]
    frontmatter: Dict[str, Any]
    html: str
    raw_length: int
    extension: str
    is_markdown: bool


class FileRawResponse(BaseModel):
    """Raw text content of a file."""
    vault: str
    path: str
    raw: str


class FileSaveResponse(BaseModel):
    """Confirmation after saving a file."""
    status: str
    vault: str
    path: str
    size: int


class FileDeleteResponse(BaseModel):
    """Confirmation after deleting a file."""
    status: str
    vault: str
    path: str


class SearchResultItem(BaseModel):
    """A single search result."""
    vault: str
    path: str
    title: str
    tags: List[str]
    score: int
    snippet: str
    modified: str


class SearchResponse(BaseModel):
    """Full-text search response with optional pagination."""
    query: str
    vault_filter: str
    tag_filter: Optional[str]
    count: int
    total: int = Field(0, description="Total results before pagination")
    offset: int = Field(0, description="Current pagination offset")
    limit: int = Field(200, description="Page size")
    results: List[SearchResultItem]


class TagsResponse(BaseModel):
    """Tag aggregation response."""
    vault_filter: Optional[str]
    tags: Dict[str, int]


class TreeSearchResult(BaseModel):
    """A single tree search result item."""
    vault: str
    path: str
    name: str
    type: str = Field(description="'file' or 'directory'")
    matched_path: str


class TreeSearchResponse(BaseModel):
    """Tree search response with matching paths."""
    query: str
    vault_filter: str
    results: List[TreeSearchResult]


class AdvancedSearchResultItem(BaseModel):
    """A single advanced search result with highlighted snippet."""
    vault: str
    path: str
    title: str
    tags: List[str]
    score: float
    snippet: str
    modified: str


class SearchFacets(BaseModel):
    """Faceted counts for search results."""
    tags: Dict[str, int] = Field(default_factory=dict)
    vaults: Dict[str, int] = Field(default_factory=dict)


class AdvancedSearchResponse(BaseModel):
    """Advanced search response with TF-IDF scoring, facets, and pagination."""
    results: List[AdvancedSearchResultItem]
    total: int
    offset: int
    limit: int
    facets: SearchFacets
    query_time_ms: float = Field(0, description="Server-side query time in milliseconds")


class TitleSuggestion(BaseModel):
    """A file title suggestion for autocomplete."""
    vault: str
    path: str
    title: str


class SuggestResponse(BaseModel):
    """Autocomplete suggestions for file titles."""
    query: str
    suggestions: List[TitleSuggestion]


class TagSuggestion(BaseModel):
    """A tag suggestion for autocomplete."""
    tag: str
    count: int


class TagSuggestResponse(BaseModel):
    """Autocomplete suggestions for tags."""
    query: str
    suggestions: List[TagSuggestion]


class ReloadResponse(BaseModel):
    """Index reload confirmation with per-vault stats."""
    status: str
    vaults: Dict[str, Any]


class HealthResponse(BaseModel):
    """Application health status."""
    status: str
    version: str
    vaults: int
    total_files: int


# ---------------------------------------------------------------------------
# SSE Manager — Server-Sent Events for real-time notifications
# ---------------------------------------------------------------------------

class SSEManager:
    """Manages SSE client connections and broadcasts events."""

    def __init__(self):
        self._clients: List[asyncio.Queue] = []

    async def connect(self) -> asyncio.Queue:
        """Register a new SSE client and return its message queue."""
        queue: asyncio.Queue = asyncio.Queue()
        self._clients.append(queue)
        logger.debug(f"SSE client connected (total: {len(self._clients)})")
        return queue

    def disconnect(self, queue: asyncio.Queue):
        """Remove a disconnected SSE client."""
        if queue in self._clients:
            self._clients.remove(queue)
        logger.debug(f"SSE client disconnected (total: {len(self._clients)})")

    async def broadcast(self, event_type: str, data: dict):
        """Send an event to all connected SSE clients."""
        message = _json.dumps(data, ensure_ascii=False)
        dead: List[asyncio.Queue] = []
        for q in self._clients:
            try:
                q.put_nowait({"event": event_type, "data": message})
            except asyncio.QueueFull:
                dead.append(q)
        for q in dead:
            self.disconnect(q)

    @property
    def client_count(self) -> int:
        return len(self._clients)


sse_manager = SSEManager()


# ---------------------------------------------------------------------------
# Application lifespan (replaces deprecated on_event)
# ---------------------------------------------------------------------------

from backend.watcher import VaultWatcher

# Thread pool for offloading CPU-bound search from the event loop.
# Sized to 2 workers so concurrent searches don't starve other requests.
_search_executor: Optional[ThreadPoolExecutor] = None
_vault_watcher: Optional[VaultWatcher] = None


async def _on_vault_change(events: list):
    """Callback invoked by VaultWatcher when files change in watched vaults.

    Processes each event (create/modify/delete/move) and updates the index
    incrementally, then broadcasts SSE notifications.
    """
    updated_vaults = set()
    changes = []

    for event in events:
        vault_name = event["vault"]
        event_type = event["type"]
        src = event["src"]
        dest = event.get("dest")

        try:
            if event_type in ("created", "modified"):
                result = await update_single_file(vault_name, src)
                if result:
                    changes.append({"action": "updated", "vault": vault_name, "path": result["path"]})
                    updated_vaults.add(vault_name)

            elif event_type == "deleted":
                result = await remove_single_file(vault_name, src)
                if result:
                    changes.append({"action": "deleted", "vault": vault_name, "path": result["path"]})
                    updated_vaults.add(vault_name)

            elif event_type == "moved":
                result = await handle_file_move(vault_name, src, dest)
                if result:
                    changes.append({"action": "moved", "vault": vault_name, "path": result["path"]})
                    updated_vaults.add(vault_name)

        except Exception as e:
            logger.error(f"Error processing {event_type} event for {src}: {e}")

    if changes:
        await sse_manager.broadcast("index_updated", {
            "vaults": list(updated_vaults),
            "changes": changes,
            "total_changes": len(changes),
        })
        logger.info(f"Hot-reload: {len(changes)} change(s) in {list(updated_vaults)}")


# ---------------------------------------------------------------------------
# Authentication bootstrap
# ---------------------------------------------------------------------------

def bootstrap_admin():
    """Create the initial admin account if no users exist.

    Reads OBSIGATE_ADMIN_USER and OBSIGATE_ADMIN_PASSWORD from environment.
    If no password is set, generates a random one and logs it ONCE.
    Only runs when auth is enabled and no users.json exists yet.
    """
    from backend.auth.middleware import is_auth_enabled
    from backend.auth.user_store import has_users, create_user

    if not is_auth_enabled():
        return

    if has_users():
        return  # Users already exist, skip

    admin_user = os.environ.get("OBSIGATE_ADMIN_USER", "admin")
    admin_pass = os.environ.get("OBSIGATE_ADMIN_PASSWORD", "")

    if not admin_pass:
        # Generate a random password and display it ONCE in logs
        admin_pass = "".join(
            secrets.choice(string.ascii_letters + string.digits)
            for _ in range(16)
        )
        logger.warning("=" * 60)
        logger.warning("PREMIER DÉMARRAGE — Compte admin créé automatiquement")
        logger.warning(f"  Utilisateur : {admin_user}")
        logger.warning(f"  Mot de passe : {admin_pass}")
        logger.warning("CHANGEZ CE MOT DE PASSE dès la première connexion !")
        logger.warning("=" * 60)

    create_user(admin_user, admin_pass, role="admin", vaults=["*"])
    logger.info(f"Admin '{admin_user}' créé avec succès")


# ---------------------------------------------------------------------------
# Security headers middleware
# ---------------------------------------------------------------------------

class SecurityHeadersMiddleware(BaseHTTPMiddleware):
    """Add security headers to all HTTP responses."""

    async def dispatch(self, request, call_next):
        response = await call_next(request)
        response.headers["X-Content-Type-Options"] = "nosniff"
        response.headers["X-Frame-Options"] = "SAMEORIGIN"
        response.headers["X-XSS-Protection"] = "1; mode=block"
        response.headers["Referrer-Policy"] = "strict-origin-when-cross-origin"
        response.headers["Content-Security-Policy"] = (
            "default-src 'self'; "
            "script-src 'self' 'unsafe-inline' https://cdnjs.cloudflare.com https://unpkg.com https://esm.sh; "
            "style-src 'self' 'unsafe-inline' https://cdnjs.cloudflare.com https://fonts.googleapis.com; "
            "img-src 'self' data: blob:; "
            "connect-src 'self' https://esm.sh https://unpkg.com; "
            "font-src 'self' https://fonts.gstatic.com;"
        )
        return response


@asynccontextmanager
async def lifespan(app: FastAPI):
    """Application lifespan: build index on startup, cleanup on shutdown."""
    global _search_executor, _vault_watcher
    _search_executor = ThreadPoolExecutor(max_workers=2, thread_name_prefix="search")

    # Bootstrap admin account if needed
    bootstrap_admin()

    logger.info("ObsiGate starting — building index in background...")

    async def _progress_cb(event_type: str, data: dict):
        await sse_manager.broadcast("index_" + event_type, data)

    async def _background_startup():
        logger.info("Background indexing started")
        await build_index(_progress_cb)

        # Start file watcher
        config = _load_config()
        watcher_enabled = config.get("watcher_enabled", True)
        if watcher_enabled:
            use_polling = config.get("watcher_use_polling", False)
            polling_interval = config.get("watcher_polling_interval", 5.0)
            debounce = config.get("watcher_debounce", 2.0)
            global _vault_watcher
            _vault_watcher = VaultWatcher(
                on_file_change=_on_vault_change,
                debounce_seconds=debounce,
                use_polling=use_polling,
                polling_interval=polling_interval,
            )
            from backend.indexer import vault_config
            vaults_to_watch = {name: cfg["path"] for name, cfg in vault_config.items()}
            await _vault_watcher.start(vaults_to_watch)
            logger.info("File watcher started in background.")
        else:
            logger.info("File watcher disabled by configuration.")

        logger.info("Background startup complete.")

    asyncio.create_task(_background_startup())

    logger.info("ObsiGate ready (listening for requests while indexing).")
    yield

    # Shutdown
    if _vault_watcher:
        await _vault_watcher.stop()
        _vault_watcher = None
    _search_executor.shutdown(wait=False)
    _search_executor = None


app = FastAPI(title="ObsiGate", version="1.4.0", lifespan=lifespan)

# Security headers on all responses
app.add_middleware(SecurityHeadersMiddleware)

# Auth router
from backend.auth.router import router as auth_router
from backend.auth.middleware import require_auth, require_admin, check_vault_access

app.include_router(auth_router)

# Resolve frontend path relative to this file
FRONTEND_DIR = Path(__file__).resolve().parent.parent / "frontend"


# ---------------------------------------------------------------------------
# Path safety helper
# ---------------------------------------------------------------------------

def _resolve_safe_path(vault_root: Path, relative_path: str) -> Path:
    """Resolve a relative path safely within the vault root.

    Prevents directory traversal attacks by ensuring the resolved
    absolute path is a descendant of *vault_root*.

    Args:
        vault_root: The vault's root directory (absolute).
        relative_path: The user-supplied relative path.

    Returns:
        Resolved absolute ``Path``.

    Raises:
        HTTPException(403): When the resolved path escapes the vault root.
    """
    # Construct the full path without resolving symlinks first
    full_path = vault_root / relative_path

    # Resolve both paths to handle symlinks
    try:
        resolved = full_path.resolve(strict=False)
        vault_resolved = vault_root.resolve(strict=False)
    except Exception as e:
        logger.error(f"Path resolution error - vault_root: {vault_root}, relative_path: {relative_path}, error: {e}")
        raise HTTPException(status_code=500, detail=f"Path resolution error: {str(e)}")

    # Check if resolved path is within vault using string comparison (case-insensitive on Windows)
    try:
        # This will raise ValueError if resolved is not relative to vault_resolved
        resolved.relative_to(vault_resolved)
    except ValueError:
        # Try case-insensitive comparison for Windows/Docker compatibility
        resolved_str = str(resolved).lower()
        vault_str = str(vault_resolved).lower()
        if not resolved_str.startswith(vault_str):
            logger.warning(f"Path outside vault - vault: {vault_resolved}, requested: {relative_path}, resolved: {resolved}")
            raise HTTPException(status_code=403, detail="Access denied: path outside vault")

    return resolved


# ---------------------------------------------------------------------------
# Markdown rendering helpers (singleton renderer)
# ---------------------------------------------------------------------------

# Cached mistune renderer — avoids re-creating on every request
_markdown_renderer = mistune.create_markdown(
    escape=False,
    plugins=["table", "strikethrough", "footnotes", "task_lists"],
)


def _convert_wikilinks(content: str, current_vault: str) -> str:
    """Convert ``[[wikilinks]]`` and ``[[target|display]]`` to clickable HTML.

    Resolved links get a ``data-vault`` / ``data-path`` attribute pair.
    Unresolved links are rendered as ``<span class="wikilink-missing">``.

    Args:
        content: Markdown string potentially containing wikilinks.
        current_vault: Active vault name for resolution priority.

    Returns:
        Markdown string with wikilinks replaced by HTML anchors.
    """
    def _replace(match):
        target = match.group(1).strip()
        display = match.group(2).strip() if match.group(2) else target
        found = find_file_in_index(target, current_vault)
        if found:
            return (
                f'<a class="wikilink" href="#" '
                f'data-vault="{found["vault"]}" '
                f'data-path="{found["path"]}">{display}</a>'
            )
        return f'<span class="wikilink-missing">{display}</span>'

    pattern = r'\[\[([^\]|]+)(?:\|([^\]]+))?\]\]'
    return re.sub(pattern, _replace, content)


def _render_markdown(raw_md: str, vault_name: str, current_file_path: Optional[Path] = None) -> str:
    """Render a markdown string to HTML with wikilink and image support.

    Uses the cached singleton mistune renderer for performance.

    Args:
        raw_md: Raw markdown text (frontmatter already stripped).
        vault_name: Current vault for wikilink resolution context.
        current_file_path: Absolute path to the current markdown file.

    Returns:
        HTML string.
    """
    # Get vault data for image resolution
    vault_data = get_vault_data(vault_name)
    vault_root = Path(vault_data["path"]) if vault_data else None
    attachments_path = vault_data.get("config", {}).get("attachmentsPath") if vault_data else None

    # Preprocess images first
    if vault_root:
        raw_md = preprocess_images(raw_md, vault_name, vault_root, current_file_path, attachments_path)

    # Convert wikilinks
    converted = _convert_wikilinks(raw_md, vault_name)

    return _markdown_renderer(converted)


# ---------------------------------------------------------------------------
# API Endpoints
# ---------------------------------------------------------------------------

@app.get("/api/health", response_model=HealthResponse)
async def api_health():
    """Health check endpoint for Docker and monitoring.

    Returns:
        Application status, version, vault count and total file count.
    """
    total_files = sum(len(v["files"]) for v in index.values())
    return {
        "status": "ok",
        "version": app.version,
        "vaults": len(index),
        "total_files": total_files,
    }


@app.get("/api/vaults", response_model=List[VaultInfo])
async def api_vaults(current_user=Depends(require_auth)):
    """List configured vaults the user has access to.

    Returns:
        List of vault summary objects filtered by user permissions.
    """
    user_vaults = current_user.get("_token_vaults") or current_user.get("vaults", [])
    result = []
    for name, data in index.items():
        if "*" in user_vaults or name in user_vaults:
            v_type = data.get("config", {}).get("type", "VAULT")
            result.append({
                "name": name,
                "file_count": len(data["files"]),
                "tag_count": len(data["tags"]),
                "type": v_type,
            })
    return result


def humanize_mtime(mtime: float) -> str:
    delta = time.time() - mtime
    if delta < 60: return "à l'instant"
    if delta < 3600: return f"il y a {int(delta/60)} min"
    if delta < 86400: return f"il y a {int(delta/3600)} h"
    if delta < 604800: return f"il y a {int(delta/86400)} j"
    return datetime.fromtimestamp(mtime).strftime("%d %b %Y")


@app.get("/api/recent")
async def api_recent(limit: Optional[int] = Query(None), vault: Optional[str] = Query(None), current_user=Depends(require_auth)):
    config = _load_config()
    actual_limit = limit if limit is not None else config.get("recent_files_limit", 20)

    user_vaults = current_user.get("_token_vaults") or current_user.get("vaults", [])

    all_files = []
    for v_name, v_data in index.items():
        if vault and v_name != vault:
            continue
        if "*" not in user_vaults and v_name not in user_vaults:
            continue
        for f in v_data.get("files", []):
            all_files.append((v_name, f))

    # Sort descending by ISO string "modified"
    all_files.sort(key=lambda x: x[1].get("modified", ""), reverse=True)
    recent = all_files[:actual_limit]

    files_resp = []
    for v_name, f in recent:
        iso_modified = f.get("modified", "")
        try:
            mtime_dt = datetime.fromisoformat(iso_modified.replace("Z", "+00:00"))
            mtime_val = mtime_dt.timestamp()
        except Exception:
            mtime_val = time.time()

        files_resp.append({
            "path": f["path"],
            "title": f["title"],
            "vault": v_name,
            "mtime": mtime_val,
            "mtime_human": humanize_mtime(mtime_val),
            "mtime_iso": iso_modified,
            "size_bytes": f.get("size", 0),
            "tags": [f"#{t}" for t in f.get("tags", [])][:5],
            "preview": f.get("content_preview", "")[:120]
        })

    return {
        "files": files_resp,
        "total": len(all_files),
        "limit": actual_limit,
        "generated_at": time.time()
    }


@app.get("/api/browse/{vault_name}", response_model=BrowseResponse)
async def api_browse(vault_name: str, path: str = "", current_user=Depends(require_auth)):
    """Browse directories and files in a vault at a given path level.

    Returns sorted entries (directories first, then files) with metadata.
    Hidden files/directories (starting with ``"."`` ) are excluded.

    Args:
        vault_name: Name of the vault to browse.
        path: Relative directory path within the vault (empty = root).

    Returns:
        ``BrowseResponse`` with vault name, path, and item list.
    """
    if not check_vault_access(vault_name, current_user):
        raise HTTPException(status_code=403, detail=f"Accès refusé à la vault '{vault_name}'")
    vault_data = get_vault_data(vault_name)
    if not vault_data:
        raise HTTPException(status_code=404, detail=f"Vault '{vault_name}' not found")

    vault_root = Path(vault_data["path"])
    # Path traversal protection
    target = _resolve_safe_path(vault_root, path) if path else vault_root.resolve()

    if not target.exists():
        raise HTTPException(status_code=404, detail=f"Path not found: {path}")

    # Get vault settings for hideHiddenFiles
    settings = get_vault_setting(vault_name) or {}
    hide_hidden = settings.get("hideHiddenFiles", False)

    items = []
    try:
        for entry in sorted(target.iterdir(), key=lambda e: (not e.is_dir(), e.name.lower())):
            # Skip hidden files/dirs if the setting is enabled
            if hide_hidden and entry.name.startswith("."):
                continue
            rel = str(entry.relative_to(vault_root)).replace("\\", "/")
            if entry.is_dir():
                # Count only direct children (files and subdirs) for performance
                try:
                    file_count = sum(
                        1 for child in entry.iterdir()
                        if (not hide_hidden or not child.name.startswith("."))
                        and (child.is_file() and (child.suffix.lower() in SUPPORTED_EXTENSIONS or child.name.lower() in ("dockerfile", "makefile"))
                             or child.is_dir())
                    )
                except PermissionError:
                    file_count = 0
                items.append({
                    "name": entry.name,
                    "path": rel,
                    "type": "directory",
                    "children_count": file_count,
                })
            elif entry.suffix.lower() in SUPPORTED_EXTENSIONS or entry.name.lower() in ("dockerfile", "makefile"):
                items.append({
                    "name": entry.name,
                    "path": rel,
                    "type": "file",
                    "size": entry.stat().st_size,
                    "extension": entry.suffix.lower(),
                })
    except PermissionError:
        raise HTTPException(status_code=403, detail="Permission denied")

    return {"vault": vault_name, "path": path, "items": items}


# Map file extensions to highlight.js language hints
EXT_TO_LANG = {
    ".py": "python", ".js": "javascript", ".ts": "typescript",
    ".jsx": "jsx", ".tsx": "tsx", ".sh": "bash", ".bash": "bash",
    ".zsh": "bash", ".fish": "fish", ".bat": "batch", ".cmd": "batch",
    ".ps1": "powershell", ".json": "json", ".yaml": "yaml", ".yml": "yaml",
    ".toml": "toml", ".xml": "xml", ".csv": "plaintext",
    ".cfg": "ini", ".ini": "ini", ".conf": "ini", ".env": "bash",
    ".html": "html", ".css": "css", ".scss": "scss", ".less": "less",
    ".java": "java", ".c": "c", ".cpp": "cpp", ".h": "c", ".hpp": "cpp",
    ".cs": "csharp", ".go": "go", ".rs": "rust", ".rb": "ruby",
    ".php": "php", ".sql": "sql", ".r": "r", ".swift": "swift",
    ".kt": "kotlin", ".txt": "plaintext", ".log": "plaintext",
    ".dockerfile": "dockerfile", ".makefile": "makefile", ".cmake": "cmake",
}


@app.get("/api/file/{vault_name}/raw", response_model=FileRawResponse)
async def api_file_raw(vault_name: str, path: str = Query(..., description="Relative path to file"), current_user=Depends(require_auth)):
    """Return raw file content as plain text.

    Args:
        vault_name: Name of the vault.
        path: Relative file path within the vault.

    Returns:
        ``FileRawResponse`` with vault, path, and raw text content.
    """
    if not check_vault_access(vault_name, current_user):
        raise HTTPException(status_code=403, detail=f"Accès refusé à la vault '{vault_name}'")
    vault_data = get_vault_data(vault_name)
    if not vault_data:
        raise HTTPException(status_code=404, detail=f"Vault '{vault_name}' not found")

    vault_root = Path(vault_data["path"])
    file_path = _resolve_safe_path(vault_root, path)

    if not file_path.exists() or not file_path.is_file():
        raise HTTPException(status_code=404, detail=f"File not found: {path}")

    try:
        raw = file_path.read_text(encoding="utf-8", errors="replace")
    except PermissionError as e:
        logger.error(f"Permission denied reading raw file {path}: {e}")
        raise HTTPException(status_code=403, detail=f"Permission denied: cannot read file {path}")
    except UnicodeDecodeError:
        # Binary file - try to read as binary and decode with errors='replace'
        try:
            raw = file_path.read_bytes().decode("utf-8", errors="replace")
        except Exception as e:
            logger.error(f"Error reading binary raw file {path}: {e}")
            raise HTTPException(status_code=500, detail=f"Cannot read file: {str(e)}")
    except Exception as e:
        logger.error(f"Unexpected error reading raw file {path}: {e}")
        raise HTTPException(status_code=500, detail=f"Error reading file: {str(e)}")

    return {"vault": vault_name, "path": path, "raw": raw}


@app.get("/api/file/{vault_name}/download")
async def api_file_download(vault_name: str, path: str = Query(..., description="Relative path to file"), current_user=Depends(require_auth)):
    """Download a file as an attachment.

    Args:
        vault_name: Name of the vault.
        path: Relative file path within the vault.

    Returns:
        ``FileResponse`` with ``application/octet-stream`` content-type.
    """
    if not check_vault_access(vault_name, current_user):
        raise HTTPException(status_code=403, detail=f"Accès refusé à la vault '{vault_name}'")
    vault_data = get_vault_data(vault_name)
    if not vault_data:
        raise HTTPException(status_code=404, detail=f"Vault '{vault_name}' not found")

    vault_root = Path(vault_data["path"])
    file_path = _resolve_safe_path(vault_root, path)

    if not file_path.exists() or not file_path.is_file():
        raise HTTPException(status_code=404, detail=f"File not found: {path}")

    return FileResponse(
        path=str(file_path),
        filename=file_path.name,
        media_type="application/octet-stream",
    )


@app.put("/api/file/{vault_name}/save", response_model=FileSaveResponse)
async def api_file_save(
    vault_name: str,
    path: str = Query(..., description="Relative path to file"),
    body: dict = Body(...),
    current_user=Depends(require_auth),
):
    """Save (overwrite) a file's content.

    Expects a JSON body with a ``content`` key containing the new text.
    The path is validated against traversal attacks before writing.

    Args:
        vault_name: Name of the vault.
        path: Relative file path within the vault.
        body: JSON body with ``content`` string.

    Returns:
        ``FileSaveResponse`` confirming the write.
    """
    if not check_vault_access(vault_name, current_user):
        raise HTTPException(status_code=403, detail=f"Accès refusé à la vault '{vault_name}'")
    vault_data = get_vault_data(vault_name)
    if not vault_data:
        raise HTTPException(status_code=404, detail=f"Vault '{vault_name}' not found")

    vault_root = Path(vault_data["path"])
    file_path = _resolve_safe_path(vault_root, path)

    if not file_path.exists():
        raise HTTPException(status_code=404, detail=f"File not found: {path}")

    content = body.get('content', '')

    try:
        file_path.write_text(content, encoding="utf-8")
        logger.info(f"File saved: {vault_name}/{path}")
        return {"status": "ok", "vault": vault_name, "path": path, "size": len(content)}
    except PermissionError:
        raise HTTPException(status_code=403, detail="Permission denied: vault may be read-only")
    except Exception as e:
        logger.error(f"Error saving file {vault_name}/{path}: {e}")
        raise HTTPException(status_code=500, detail=f"Error saving file: {str(e)}")


@app.delete("/api/file/{vault_name}", response_model=FileDeleteResponse)
async def api_file_delete(vault_name: str, path: str = Query(..., description="Relative path to file"), current_user=Depends(require_auth)):
    """Delete a file from the vault.

    The path is validated against traversal attacks before deletion.

    Args:
        vault_name: Name of the vault.
        path: Relative file path within the vault.

    Returns:
        ``FileDeleteResponse`` confirming the deletion.
    """
    if not check_vault_access(vault_name, current_user):
        raise HTTPException(status_code=403, detail=f"Accès refusé à la vault '{vault_name}'")
    vault_data = get_vault_data(vault_name)
    if not vault_data:
        raise HTTPException(status_code=404, detail=f"Vault '{vault_name}' not found")

    vault_root = Path(vault_data["path"])
    file_path = _resolve_safe_path(vault_root, path)

    if not file_path.exists() or not file_path.is_file():
        raise HTTPException(status_code=404, detail=f"File not found: {path}")

    try:
        file_path.unlink()
        logger.info(f"File deleted: {vault_name}/{path}")
        return {"status": "ok", "vault": vault_name, "path": path}
    except PermissionError:
        raise HTTPException(status_code=403, detail="Permission denied: vault may be read-only")
    except Exception as e:
        logger.error(f"Error deleting file {vault_name}/{path}: {e}")
        raise HTTPException(status_code=500, detail=f"Error deleting file: {str(e)}")


@app.get("/api/file/{vault_name}", response_model=FileContentResponse)
async def api_file(vault_name: str, path: str = Query(..., description="Relative path to file"), current_user=Depends(require_auth)):
    """Return rendered HTML and metadata for a file.

    Markdown files are parsed for frontmatter, rendered with wikilink
    support, and returned with extracted tags. Other supported file
    types are syntax-highlighted as code blocks.

    Args:
        vault_name: Name of the vault.
        path: Relative file path within the vault.

    Returns:
        ``FileContentResponse`` with HTML, metadata, and tags.
    """
    if not check_vault_access(vault_name, current_user):
        raise HTTPException(status_code=403, detail=f"Accès refusé à la vault '{vault_name}'")
    vault_data = get_vault_data(vault_name)
    if not vault_data:
        raise HTTPException(status_code=404, detail=f"Vault '{vault_name}' not found")

    vault_root = Path(vault_data["path"])
    file_path = _resolve_safe_path(vault_root, path)

    if not file_path.exists() or not file_path.is_file():
        raise HTTPException(status_code=404, detail=f"File not found: {path}")

    try:
        raw = file_path.read_text(encoding="utf-8", errors="replace")
    except PermissionError as e:
        logger.error(f"Permission denied reading file {path}: {e}")
        raise HTTPException(status_code=403, detail=f"Permission denied: cannot read file {path}")
    except UnicodeDecodeError:
        # Binary file - try to read as binary and decode with errors='replace'
        try:
            raw = file_path.read_bytes().decode("utf-8", errors="replace")
        except Exception as e:
            logger.error(f"Error reading binary file {path}: {e}")
            raise HTTPException(status_code=500, detail=f"Cannot read file: {str(e)}")
    except Exception as e:
        logger.error(f"Unexpected error reading file {path}: {e}")
        raise HTTPException(status_code=500, detail=f"Error reading file: {str(e)}")

    ext = file_path.suffix.lower()

    if ext == ".md":
        post = parse_markdown_file(raw)

        # Extract metadata using shared indexer logic
        tags = _extract_tags(post)

        title = post.metadata.get("title", file_path.stem.replace("-", " ").replace("_", " "))
        html_content = _render_markdown(post.content, vault_name, file_path)

        return {
            "vault": vault_name,
            "path": path,
            "title": str(title),
            "tags": tags,
            "frontmatter": dict(post.metadata) if post.metadata else {},
            "html": html_content,
            "raw_length": len(raw),
            "extension": ext,
            "is_markdown": True,
        }
    else:
        # Non-markdown: wrap in syntax-highlighted code block
        lang = EXT_TO_LANG.get(ext, "plaintext")
        escaped = html_mod.escape(raw)
        html_content = f'<pre><code class="language-{lang}">{escaped}</code></pre>'

        return {
            "vault": vault_name,
            "path": path,
            "title": file_path.name,
            "tags": [],
            "frontmatter": {},
            "html": html_content,
            "raw_length": len(raw),
            "extension": ext,
            "is_markdown": False,
        }


@app.get("/api/search", response_model=SearchResponse)
async def api_search(
    q: str = Query("", description="Search query"),
    vault: str = Query("all", description="Vault filter"),
    tag: Optional[str] = Query(None, description="Tag filter"),
    limit: int = Query(50, ge=1, le=200, description="Results per page"),
    offset: int = Query(0, ge=0, description="Pagination offset"),
    current_user=Depends(require_auth),
):
    """Full-text search across vaults with relevance scoring.

    Supports combining free-text queries with tag filters.
    Results are ranked by a multi-factor scoring algorithm.
    Pagination via ``limit`` and ``offset`` (defaults preserve backward compat).

    Args:
        q: Free-text search string.
        vault: Vault name or ``"all"`` to search everywhere.
        tag: Comma-separated tag names to require.
        limit: Max results per page (1–200).
        offset: Pagination offset.

    Returns:
        ``SearchResponse`` with ranked results and snippets.
    """
    loop = asyncio.get_event_loop()
    # Fetch full result set (capped at DEFAULT_SEARCH_LIMIT internally)
    all_results = await loop.run_in_executor(
        _search_executor,
        partial(search, q, vault_filter=vault, tag_filter=tag),
    )
    total = len(all_results)
    page = all_results[offset: offset + limit]
    return {
        "query": q, "vault_filter": vault, "tag_filter": tag,
        "count": len(page), "total": total, "offset": offset, "limit": limit,
        "results": page,
    }


@app.get("/api/tags", response_model=TagsResponse)
async def api_tags(vault: Optional[str] = Query(None, description="Vault filter"), current_user=Depends(require_auth)):
    """Return all unique tags with occurrence counts.

    Args:
        vault: Optional vault name to restrict tag aggregation.

    Returns:
        ``TagsResponse`` with tags sorted by descending count.
    """
    tags = get_all_tags(vault_filter=vault)
    return {"vault_filter": vault, "tags": tags}


@app.get("/api/tree-search", response_model=TreeSearchResponse)
async def api_tree_search(
    q: str = Query("", description="Search query"),
    vault: str = Query("all", description="Vault filter"),
    current_user=Depends(require_auth),
):
    """Search for files and directories in the tree structure using pre-built index.

    Uses the in-memory path index for instant filtering without filesystem access.

    Args:
        q: Search string to match against file/directory paths.
        vault: Vault name or "all" to search everywhere.

    Returns:
        ``TreeSearchResponse`` with matching paths.
    """
    if not q:
        return {"query": q, "vault_filter": vault, "results": []}

    query_lower = q.lower()
    results = []

    vaults_to_search = [vault] if vault != "all" else list(path_index.keys())

    for vault_name in vaults_to_search:
        vault_paths = path_index.get(vault_name, [])

        for entry in vault_paths:
            path_lower = entry["path"].lower()
            name_lower = entry["name"].lower()

            if query_lower in name_lower or query_lower in path_lower:
                results.append({
                    "vault": vault_name,
                    "path": entry["path"],
                    "name": entry["name"],
                    "type": entry["type"],
                    "matched_path": entry["path"],
                })

    return {"query": q, "vault_filter": vault, "results": results}


@app.get("/api/search/advanced", response_model=AdvancedSearchResponse)
async def api_advanced_search(
    q: str = Query("", description="Advanced search query (supports tag:, vault:, title:, path: operators)"),
    vault: str = Query("all", description="Vault filter"),
    tag: Optional[str] = Query(None, description="Comma-separated tag filter"),
    limit: int = Query(50, ge=1, le=200, description="Results per page"),
    offset: int = Query(0, ge=0, description="Pagination offset"),
    sort: str = Query("relevance", description="Sort by 'relevance' or 'modified'"),
    current_user=Depends(require_auth),
):
    """Advanced full-text search with TF-IDF scoring, facets, and pagination.

    Supports advanced query operators:
    - ``tag:<name>`` or ``#<name>`` — filter by tag
    - ``vault:<name>`` — filter by vault
    - ``title:<text>`` — filter by title substring
    - ``path:<text>`` — filter by path substring
    - Remaining text is scored using TF-IDF with accent normalization.

    Results include ``<mark>``-highlighted snippets and faceted tag/vault counts.

    Args:
        q: Query string with optional operators.
        vault: Vault name or ``"all"``.
        tag: Extra comma-separated tag names to require.
        limit: Max results per page (1–200).
        offset: Pagination offset.
        sort: ``"relevance"`` (TF-IDF) or ``"modified"`` (date).

    Returns:
        ``AdvancedSearchResponse`` with scored results, facets, and pagination info.
    """
    loop = asyncio.get_event_loop()
    return await loop.run_in_executor(
        _search_executor,
        partial(advanced_search, q, vault_filter=vault, tag_filter=tag,
                limit=limit, offset=offset, sort_by=sort),
    )


@app.get("/api/suggest", response_model=SuggestResponse)
async def api_suggest(
    q: str = Query("", description="Prefix to search for in file titles"),
    vault: str = Query("all", description="Vault filter"),
    limit: int = Query(10, ge=1, le=50, description="Max suggestions"),
    current_user=Depends(require_auth),
):
    """Suggest file titles matching a prefix (accent-insensitive).

    Used for autocomplete in the search input.

    Args:
        q: User-typed prefix (minimum 2 characters).
        vault: Vault name or ``"all"``.
        limit: Max number of suggestions.

    Returns:
        ``SuggestResponse`` with matching file title suggestions.
    """
    suggestions = suggest_titles(q, vault_filter=vault, limit=limit)
    return {"query": q, "suggestions": suggestions}


@app.get("/api/tags/suggest", response_model=TagSuggestResponse)
async def api_tags_suggest(
    q: str = Query("", description="Prefix to search for in tags"),
    vault: str = Query("all", description="Vault filter"),
    limit: int = Query(10, ge=1, le=50, description="Max suggestions"),
    current_user=Depends(require_auth),
):
    """Suggest tags matching a prefix (accent-insensitive).

    Used for autocomplete when typing ``tag:`` or ``#`` in the search input.

    Args:
        q: User-typed prefix (with or without ``#``, minimum 2 characters).
        vault: Vault name or ``"all"``.
        limit: Max number of suggestions.

    Returns:
        ``TagSuggestResponse`` with matching tag suggestions and counts.
    """
    suggestions = suggest_tags(q, vault_filter=vault, limit=limit)
    return {"query": q, "suggestions": suggestions}


@app.get("/api/index/reload", response_model=ReloadResponse)
async def api_reload(current_user=Depends(require_admin)):
    """Force a full re-index of all configured vaults.

    Returns:
        ``ReloadResponse`` with per-vault file and tag counts.
    """
    stats = await reload_index()
    await sse_manager.broadcast("index_reloaded", {
        "vaults": list(stats.keys()),
        "stats": stats,
    })
    return {"status": "ok", "vaults": stats}


@app.get("/api/index/reload/{vault_name}")
async def api_reload_vault(vault_name: str, current_user=Depends(require_admin)):
    """Force a re-index of a single vault.

    Args:
        vault_name: Name of the vault to reindex.

    Returns:
        Dict with vault statistics.
    """
    try:
        from backend.indexer import reload_single_vault
        stats = await reload_single_vault(vault_name)
        await sse_manager.broadcast("vault_reloaded", {
            "vault": vault_name,
            "stats": stats,
        })
        return {"status": "ok", "vault": vault_name, "stats": stats}
    except ValueError as e:
        raise HTTPException(status_code=404, detail=str(e))


# ---------------------------------------------------------------------------
# SSE endpoint — Server-Sent Events stream
# ---------------------------------------------------------------------------

@app.get("/api/events")
async def api_events(current_user=Depends(require_auth)):
    """SSE stream for real-time index update notifications.

    Sends keepalive comments every 30s. Events:
    - ``index_updated``: partial index change (file create/modify/delete/move)
    - ``index_reloaded``: full re-index completed
    - ``vault_added``: new vault added dynamically
    - ``vault_removed``: vault removed dynamically
    """
    queue = await sse_manager.connect()

    async def event_generator():
        try:
            # Send initial connection event
            yield f"event: connected\ndata: {_json.dumps({'sse_clients': sse_manager.client_count})}\n\n"
            while True:
                try:
                    msg = await asyncio.wait_for(queue.get(), timeout=30.0)
                    yield f"event: {msg['event']}\ndata: {msg['data']}\n\n"
                except asyncio.TimeoutError:
                    # Keepalive comment
                    yield ": keepalive\n\n"
                except asyncio.CancelledError:
                    break
        finally:
            sse_manager.disconnect(queue)

    return StreamingResponse(
        event_generator(),
        media_type="text/event-stream",
        headers={
            "Cache-Control": "no-cache",
            "Connection": "keep-alive",
            "X-Accel-Buffering": "no",
        },
    )


# ---------------------------------------------------------------------------
# Dynamic vault management endpoints
# ---------------------------------------------------------------------------

@app.post("/api/vaults/add")
async def api_add_vault(body: dict = Body(...), current_user=Depends(require_admin)):
    """Add a new vault dynamically without restarting.

    Body:
        name: Display name for the vault.
        path: Absolute filesystem path to the vault directory.
    """
    name = body.get("name", "").strip()
    vault_path = body.get("path", "").strip()

    if not name or not vault_path:
        raise HTTPException(status_code=400, detail="Both 'name' and 'path' are required")

    if name in index:
        raise HTTPException(status_code=409, detail=f"Vault '{name}' already exists")

    if not Path(vault_path).exists():
        raise HTTPException(status_code=400, detail=f"Path does not exist: {vault_path}")

    stats = await add_vault_to_index(name, vault_path)

    # Start watching the new vault
    if _vault_watcher:
        await _vault_watcher.add_vault(name, vault_path)

    await sse_manager.broadcast("vault_added", {"vault": name, "stats": stats})
    return {"status": "ok", "vault": name, "stats": stats}


@app.delete("/api/vaults/{vault_name}")
async def api_remove_vault(vault_name: str, current_user=Depends(require_admin)):
    """Remove a vault from the index and stop watching it.

    Args:
        vault_name: Name of the vault to remove.
    """
    if vault_name not in index:
        raise HTTPException(status_code=404, detail=f"Vault '{vault_name}' not found")

    # Stop watching
    if _vault_watcher:
        await _vault_watcher.remove_vault(vault_name)

    await remove_vault_from_index(vault_name)
    await sse_manager.broadcast("vault_removed", {"vault": vault_name})
    return {"status": "ok", "vault": vault_name}


@app.get("/api/vaults/status")
async def api_vaults_status(current_user=Depends(require_auth)):
    """Detailed status of all vaults including watcher state.

    Returns per-vault: file count, tag count, watching status, vault path.
    """
    statuses = {}
    for vname, vdata in index.items():
        watching = _vault_watcher is not None and vname in _vault_watcher.observers
        statuses[vname] = {
            "file_count": len(vdata.get("files", [])),
            "tag_count": len(vdata.get("tags", {})),
            "path": vdata.get("path", ""),
            "watching": watching,
        }
    return {
        "vaults": statuses,
        "watcher_active": _vault_watcher is not None,
        "sse_clients": sse_manager.client_count,
    }


@app.get("/api/image/{vault_name}")
async def api_image(vault_name: str, path: str = Query(..., description="Relative path to image"), current_user=Depends(require_auth)):
    """Serve an image file with proper MIME type.

    Args:
        vault_name: Name of the vault.
        path: Relative file path within the vault.

    Returns:
        Image file with appropriate content-type header.
    """
    if not check_vault_access(vault_name, current_user):
        raise HTTPException(status_code=403, detail=f"Accès refusé à la vault '{vault_name}'")
    vault_data = get_vault_data(vault_name)
    if not vault_data:
        raise HTTPException(status_code=404, detail=f"Vault '{vault_name}' not found")

    vault_root = Path(vault_data["path"])
    file_path = _resolve_safe_path(vault_root, path)

    if not file_path.exists() or not file_path.is_file():
        raise HTTPException(status_code=404, detail=f"Image not found: {path}")

    # Determine MIME type
    mime_type, _ = mimetypes.guess_type(str(file_path))
    if not mime_type:
        # Default to octet-stream if unknown
        mime_type = "application/octet-stream"

    try:
        # Read and return the image file
        content = file_path.read_bytes()
        return Response(content=content, media_type=mime_type)
    except PermissionError:
        raise HTTPException(status_code=403, detail="Permission denied")
    except Exception as e:
        logger.error(f"Error serving image {vault_name}/{path}: {e}")
        raise HTTPException(status_code=500, detail=f"Error serving image: {str(e)}")


@app.post("/api/attachments/rescan/{vault_name}")
async def api_rescan_attachments(vault_name: str, current_user=Depends(require_admin)):
    """Rescan attachments for a specific vault.

    Args:
        vault_name: Name of the vault to rescan.

    Returns:
        Dict with status and attachment count.
    """
    vault_data = get_vault_data(vault_name)
    if not vault_data:
        raise HTTPException(status_code=404, detail=f"Vault '{vault_name}' not found")

    vault_path = vault_data["path"]
    count = await rescan_vault_attachments(vault_name, vault_path)

    logger.info(f"Rescanned attachments for vault '{vault_name}': {count} attachments")
    return {"status": "ok", "vault": vault_name, "attachment_count": count}


@app.get("/api/attachments/stats")
async def api_attachment_stats(vault: Optional[str] = Query(None, description="Vault filter"), current_user=Depends(require_auth)):
    """Get attachment statistics for vaults.

    Args:
        vault: Optional vault name to filter stats.

    Returns:
        Dict with vault names as keys and attachment counts as values.
    """
    stats = get_attachment_stats(vault)
    return {"vaults": stats}


# ---------------------------------------------------------------------------
# Vault Settings API — Display preferences
# ---------------------------------------------------------------------------

@app.get("/api/vaults/{vault_name}/settings")
async def api_get_vault_settings(vault_name: str, current_user=Depends(require_auth)):
    """Get UI display settings for a specific vault.

    Args:
        vault_name: Name of the vault.

    Returns:
        Dict with vault settings including hideHiddenFiles.
    """
    if vault_name not in index:
        raise HTTPException(status_code=404, detail=f"Vault '{vault_name}' not found")

    # Get persisted settings
    persisted = get_vault_setting(vault_name) or {}

    # Default settings
    settings = {
        "hideHiddenFiles": False,
    }
    settings.update(persisted)

    return settings


@app.post("/api/vaults/{vault_name}/settings")
async def api_update_vault_settings(vault_name: str, body: dict = Body(...), current_user=Depends(require_admin)):
    """Update UI display settings for a specific vault.

    Args:
        vault_name: Name of the vault.
        body: Dict with settings to update (hideHiddenFiles).

    Returns:
        Updated settings dict.
    """
    if vault_name not in index:
        raise HTTPException(status_code=404, detail=f"Vault '{vault_name}' not found")

    # Validate settings
    settings_to_update = {}

    if "hideHiddenFiles" in body:
        if not isinstance(body["hideHiddenFiles"], bool):
            raise HTTPException(status_code=400, detail="hideHiddenFiles must be a boolean")
        settings_to_update["hideHiddenFiles"] = body["hideHiddenFiles"]

    # Update persisted settings
    try:
        updated = update_vault_setting(vault_name, settings_to_update)
    except PermissionError as e:
        logger.error(f"Permission error saving settings for vault '{vault_name}': {e}")
        raise HTTPException(
            status_code=500,
            detail=f"Permission denied: Cannot write to settings file. Check /app/data permissions."
        )
    except Exception as e:
        logger.error(f"Error saving settings for vault '{vault_name}': {e}")
        raise HTTPException(
            status_code=500,
            detail=f"Failed to save settings: {str(e)}"
        )

    logger.info(f"Updated settings for vault '{vault_name}': {settings_to_update}")

    return updated


@app.get("/api/vaults/settings/all")
async def api_get_all_vault_settings(current_user=Depends(require_auth)):
    """Get UI display settings for all vaults.

    Returns:
        Dict mapping vault names to their settings.
    """
    all_settings = {}

    for vault_name in index.keys():
        persisted = get_vault_setting(vault_name) or {}

        settings = {
            "hideHiddenFiles": False,
        }
        settings.update(persisted)
        all_settings[vault_name] = settings

    return all_settings


# ---------------------------------------------------------------------------
# Configuration API
# ---------------------------------------------------------------------------

_BASE_DIR = Path(__file__).resolve().parent.parent
_CONFIG_PATH = _BASE_DIR / "data" / "config.json"

_DEFAULT_CONFIG = {
    "search_workers": 2,
    "debounce_ms": 300,
    "results_per_page": 50,
    "min_query_length": 2,
    "search_timeout_ms": 30000,
    "max_content_size": 100000,
    "snippet_context_chars": 120,
    "max_snippet_highlights": 5,
    "title_boost": 3.0,
    "path_boost": 1.5,
    "watcher_enabled": True,
    "watcher_use_polling": False,
    "watcher_polling_interval": 5.0,
    "watcher_debounce": 2.0,
    "tag_boost": 2.0,
    "prefix_max_expansions": 50,
    "recent_files_limit": 20,
}


def _load_config() -> dict:
    """Load config from disk, merging with defaults."""
    config = dict(_DEFAULT_CONFIG)
    if _CONFIG_PATH.exists():
        try:
            stored = _json.loads(_CONFIG_PATH.read_text(encoding="utf-8"))
            config.update(stored)
        except Exception as e:
            logger.warning(f"Failed to read config.json: {e}")
    return config


def _save_config(config: dict) -> None:
    """Persist config to disk."""
    try:
        _CONFIG_PATH.write_text(
            _json.dumps(config, indent=2, ensure_ascii=False),
            encoding="utf-8",
        )
    except Exception as e:
        logger.error(f"Failed to write config.json: {e}")
        raise HTTPException(status_code=500, detail=f"Failed to save config: {e}")


@app.get("/api/config")
async def api_get_config(current_user=Depends(require_auth)):
    """Return current configuration with defaults for missing keys."""
    return _load_config()


@app.post("/api/config")
async def api_set_config(body: dict = Body(...), current_user=Depends(require_admin)):
    """Update configuration. Only known keys are accepted.

    Keys matching ``_DEFAULT_CONFIG`` are validated and persisted.
    Unknown keys are silently ignored.
    Returns the full merged config after update.
    """
    current = _load_config()
    updated_keys = []
    for key, value in body.items():
        if key in _DEFAULT_CONFIG:
            expected_type = type(_DEFAULT_CONFIG[key])
            if isinstance(value, expected_type) or (expected_type is float and isinstance(value, (int, float))):
                current[key] = value
                updated_keys.append(key)
            else:
                raise HTTPException(
                    status_code=400,
                    detail=f"Invalid type for '{key}': expected {expected_type.__name__}, got {type(value).__name__}",
                )
    _save_config(current)
    logger.info(f"Config updated: {updated_keys}")
    return current


# ---------------------------------------------------------------------------
# Diagnostics API
# ---------------------------------------------------------------------------

@app.get("/api/diagnostics")
async def api_diagnostics(current_user=Depends(require_admin)):
    """Return index statistics and system diagnostics.

    Includes document counts, token counts, memory estimates,
    and inverted index status.
    """
    from backend.search import get_inverted_index
    import sys

    inv = get_inverted_index()

    # Per-vault stats
    vault_stats = {}
    total_files = 0
    total_tags = 0
    for vname, vdata in index.items():
        file_count = len(vdata.get("files", []))
        tag_count = len(vdata.get("tags", {}))
        vault_stats[vname] = {"file_count": file_count, "tag_count": tag_count}
        total_files += file_count
        total_tags += tag_count

    # Memory estimate for inverted index
    word_index_entries = sum(len(docs) for docs in inv.word_index.values())
    mem_estimate_mb = round(
        (sys.getsizeof(inv.word_index) + word_index_entries * 80
         + len(inv.doc_info) * 200
         + len(inv._sorted_tokens) * 60) / (1024 * 1024), 2
    )

    return {
        "index": {
            "total_files": total_files,
            "total_tags": total_tags,
            "vaults": vault_stats,
        },
        "inverted_index": {
            "unique_tokens": len(inv.word_index),
            "total_postings": word_index_entries,
            "documents": inv.doc_count,
            "sorted_tokens": len(inv._sorted_tokens),
            "is_stale": inv.is_stale(),
            "memory_estimate_mb": mem_estimate_mb,
        },
        "config": _load_config(),
        "search_executor": {
            "active": _search_executor is not None,
            "max_workers": _search_executor._max_workers if _search_executor else 0,
        },
    }


# ---------------------------------------------------------------------------
# Static files & SPA fallback
# ---------------------------------------------------------------------------

if FRONTEND_DIR.exists():
    app.mount("/static", StaticFiles(directory=str(FRONTEND_DIR)), name="static")

    @app.get("/sw.js")
    async def serve_service_worker():
        """Serve the service worker for PWA support."""
        sw_file = FRONTEND_DIR / "sw.js"
        if sw_file.exists():
            return FileResponse(
                sw_file,
                media_type="application/javascript",
                headers={
                    "Cache-Control": "no-cache, no-store, must-revalidate",
                    "Service-Worker-Allowed": "/"
                }
            )
        raise HTTPException(status_code=404, detail="Service worker not found")

    @app.get("/manifest.json")
    async def serve_manifest():
        """Serve the PWA manifest."""
        manifest_file = FRONTEND_DIR / "manifest.json"
        if manifest_file.exists():
            return FileResponse(
                manifest_file,
                media_type="application/manifest+json",
                headers={"Cache-Control": "public, max-age=3600"}
            )
        raise HTTPException(status_code=404, detail="Manifest not found")

    @app.get("/popout/{vault_name}/{path:path}")
    async def serve_popout(vault_name: str, path: str):
        """Serve the minimalist popout page for a specific file."""
        popout_file = FRONTEND_DIR / "popout.html"
        if popout_file.exists():
            return HTMLResponse(content=popout_file.read_text(encoding="utf-8"))
        raise HTTPException(status_code=404, detail="Popout template not found")

    @app.get("/{full_path:path}")
    async def serve_spa(full_path: str):
        """Serve the SPA index.html for all non-API routes."""
        index_file = FRONTEND_DIR / "index.html"
        if index_file.exists():
            return HTMLResponse(content=index_file.read_text(encoding="utf-8"))
        raise HTTPException(status_code=404, detail="Frontend not found")