import re
import html as html_mod
import logging
from contextlib import asynccontextmanager
from pathlib import Path
from typing import Optional, List, Dict, Any
import frontmatter
import mistune
from fastapi import FastAPI, HTTPException, Query, Body
from fastapi.staticfiles import StaticFiles
from fastapi.responses import HTMLResponse, FileResponse
from pydantic import BaseModel, Field
from backend.indexer import (
build_index,
reload_index,
index,
get_vault_data,
get_vault_names,
find_file_in_index,
parse_markdown_file,
_extract_tags,
SUPPORTED_EXTENSIONS,
)
from backend.search import search, get_all_tags
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(name)s] %(levelname)s: %(message)s",
)
logger = logging.getLogger("obsigate")
# ---------------------------------------------------------------------------
# Pydantic response models
# ---------------------------------------------------------------------------
class VaultInfo(BaseModel):
"""Summary information about a configured vault."""
name: str = Field(description="Display name of the vault")
file_count: int = Field(description="Number of indexed files")
tag_count: int = Field(description="Number of unique tags")
class BrowseItem(BaseModel):
"""A single entry (file or directory) returned by the browse endpoint."""
name: str
path: str
type: str = Field(description="'file' or 'directory'")
children_count: Optional[int] = None
size: Optional[int] = None
extension: Optional[str] = None
class BrowseResponse(BaseModel):
"""Paginated directory listing for a vault."""
vault: str
path: str
items: List[BrowseItem]
class FileContentResponse(BaseModel):
"""Rendered file content with metadata."""
vault: str
path: str
title: str
tags: List[str]
frontmatter: Dict[str, Any]
html: str
raw_length: int
extension: str
is_markdown: bool
class FileRawResponse(BaseModel):
"""Raw text content of a file."""
vault: str
path: str
raw: str
class FileSaveResponse(BaseModel):
"""Confirmation after saving a file."""
status: str
vault: str
path: str
size: int
class FileDeleteResponse(BaseModel):
"""Confirmation after deleting a file."""
status: str
vault: str
path: str
class SearchResultItem(BaseModel):
"""A single search result."""
vault: str
path: str
title: str
tags: List[str]
score: int
snippet: str
modified: str
class SearchResponse(BaseModel):
"""Full-text search response."""
query: str
vault_filter: str
tag_filter: Optional[str]
count: int
results: List[SearchResultItem]
class TagsResponse(BaseModel):
"""Tag aggregation response."""
vault_filter: Optional[str]
tags: Dict[str, int]
class TreeSearchResult(BaseModel):
"""A single tree search result item."""
vault: str
path: str
name: str
type: str = Field(description="'file' or 'directory'")
matched_path: str
class TreeSearchResponse(BaseModel):
"""Tree search response with matching paths."""
query: str
vault_filter: str
results: List[TreeSearchResult]
class HealthResponse(BaseModel):
"""Application health status."""
status: str
version: str
vaults: int
total_files: int
# ---------------------------------------------------------------------------
# Application lifespan (replaces deprecated on_event)
# ---------------------------------------------------------------------------
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Application lifespan: build index on startup, cleanup on shutdown."""
logger.info("ObsiGate starting \u2014 building index...")
await build_index()
logger.info("ObsiGate ready.")
yield
app = FastAPI(title="ObsiGate", version="1.1.0", lifespan=lifespan)
# Resolve frontend path relative to this file
FRONTEND_DIR = Path(__file__).resolve().parent.parent / "frontend"
# ---------------------------------------------------------------------------
# Path safety helper
# ---------------------------------------------------------------------------
def _resolve_safe_path(vault_root: Path, relative_path: str) -> Path:
"""Resolve a relative path safely within the vault root.
Prevents directory traversal attacks by ensuring the resolved
absolute path is a descendant of *vault_root*.
Args:
vault_root: The vault's root directory (absolute).
relative_path: The user-supplied relative path.
Returns:
Resolved absolute ``Path``.
Raises:
HTTPException(403): When the resolved path escapes the vault root.
"""
resolved = (vault_root / relative_path).resolve()
vault_resolved = vault_root.resolve()
try:
resolved.relative_to(vault_resolved)
except ValueError:
raise HTTPException(status_code=403, detail="Access denied: path outside vault")
return resolved
# ---------------------------------------------------------------------------
# Markdown rendering helpers (singleton renderer)
# ---------------------------------------------------------------------------
# Cached mistune renderer — avoids re-creating on every request
_markdown_renderer = mistune.create_markdown(
escape=False,
plugins=["table", "strikethrough", "footnotes", "task_lists"],
)
def _convert_wikilinks(content: str, current_vault: str) -> str:
"""Convert ``[[wikilinks]]`` and ``[[target|display]]`` to clickable HTML.
Resolved links get a ``data-vault`` / ``data-path`` attribute pair.
Unresolved links are rendered as ````.
Args:
content: Markdown string potentially containing wikilinks.
current_vault: Active vault name for resolution priority.
Returns:
Markdown string with wikilinks replaced by HTML anchors.
"""
def _replace(match):
target = match.group(1).strip()
display = match.group(2).strip() if match.group(2) else target
found = find_file_in_index(target, current_vault)
if found:
return (
f'{display}'
)
return f'{display}'
pattern = r'\[\[([^\]|]+)(?:\|([^\]]+))?\]\]'
return re.sub(pattern, _replace, content)
def _render_markdown(raw_md: str, vault_name: str) -> str:
"""Render a markdown string to HTML with wikilink support.
Uses the cached singleton mistune renderer for performance.
Args:
raw_md: Raw markdown text (frontmatter already stripped).
vault_name: Current vault for wikilink resolution context.
Returns:
HTML string.
"""
converted = _convert_wikilinks(raw_md, vault_name)
return _markdown_renderer(converted)
# ---------------------------------------------------------------------------
# API Endpoints
# ---------------------------------------------------------------------------
@app.get("/api/health", response_model=HealthResponse)
async def api_health():
"""Health check endpoint for Docker and monitoring.
Returns:
Application status, version, vault count and total file count.
"""
total_files = sum(len(v["files"]) for v in index.values())
return {
"status": "ok",
"version": app.version,
"vaults": len(index),
"total_files": total_files,
}
@app.get("/api/vaults", response_model=List[VaultInfo])
async def api_vaults():
"""List all configured vaults with file and tag counts.
Returns:
List of vault summary objects.
"""
result = []
for name, data in index.items():
result.append({
"name": name,
"file_count": len(data["files"]),
"tag_count": len(data["tags"]),
})
return result
@app.get("/api/browse/{vault_name}", response_model=BrowseResponse)
async def api_browse(vault_name: str, path: str = ""):
"""Browse directories and files in a vault at a given path level.
Returns sorted entries (directories first, then files) with metadata.
Hidden files/directories (starting with ``"."`` ) are excluded.
Args:
vault_name: Name of the vault to browse.
path: Relative directory path within the vault (empty = root).
Returns:
``BrowseResponse`` with vault name, path, and item list.
"""
vault_data = get_vault_data(vault_name)
if not vault_data:
raise HTTPException(status_code=404, detail=f"Vault '{vault_name}' not found")
vault_root = Path(vault_data["path"])
# Path traversal protection
target = _resolve_safe_path(vault_root, path) if path else vault_root.resolve()
if not target.exists():
raise HTTPException(status_code=404, detail=f"Path not found: {path}")
items = []
try:
for entry in sorted(target.iterdir(), key=lambda e: (not e.is_dir(), e.name.lower())):
# Skip hidden files/dirs
if entry.name.startswith("."):
continue
rel = str(entry.relative_to(vault_root)).replace("\\", "/")
if entry.is_dir():
# Count only direct children (files and subdirs) for performance
try:
file_count = sum(
1 for child in entry.iterdir()
if not child.name.startswith(".")
and (child.is_file() and (child.suffix.lower() in SUPPORTED_EXTENSIONS or child.name.lower() in ("dockerfile", "makefile"))
or child.is_dir())
)
except PermissionError:
file_count = 0
items.append({
"name": entry.name,
"path": rel,
"type": "directory",
"children_count": file_count,
})
elif entry.suffix.lower() in SUPPORTED_EXTENSIONS or entry.name.lower() in ("dockerfile", "makefile"):
items.append({
"name": entry.name,
"path": rel,
"type": "file",
"size": entry.stat().st_size,
"extension": entry.suffix.lower(),
})
except PermissionError:
raise HTTPException(status_code=403, detail="Permission denied")
return {"vault": vault_name, "path": path, "items": items}
# Map file extensions to highlight.js language hints
EXT_TO_LANG = {
".py": "python", ".js": "javascript", ".ts": "typescript",
".jsx": "jsx", ".tsx": "tsx", ".sh": "bash", ".bash": "bash",
".zsh": "bash", ".fish": "fish", ".bat": "batch", ".cmd": "batch",
".ps1": "powershell", ".json": "json", ".yaml": "yaml", ".yml": "yaml",
".toml": "toml", ".xml": "xml", ".csv": "plaintext",
".cfg": "ini", ".ini": "ini", ".conf": "ini", ".env": "bash",
".html": "html", ".css": "css", ".scss": "scss", ".less": "less",
".java": "java", ".c": "c", ".cpp": "cpp", ".h": "c", ".hpp": "cpp",
".cs": "csharp", ".go": "go", ".rs": "rust", ".rb": "ruby",
".php": "php", ".sql": "sql", ".r": "r", ".swift": "swift",
".kt": "kotlin", ".txt": "plaintext", ".log": "plaintext",
".dockerfile": "dockerfile", ".makefile": "makefile", ".cmake": "cmake",
}
@app.get("/api/file/{vault_name}/raw", response_model=FileRawResponse)
async def api_file_raw(vault_name: str, path: str = Query(..., description="Relative path to file")):
"""Return raw file content as plain text.
Args:
vault_name: Name of the vault.
path: Relative file path within the vault.
Returns:
``FileRawResponse`` with vault, path, and raw text content.
"""
vault_data = get_vault_data(vault_name)
if not vault_data:
raise HTTPException(status_code=404, detail=f"Vault '{vault_name}' not found")
vault_root = Path(vault_data["path"])
file_path = _resolve_safe_path(vault_root, path)
if not file_path.exists() or not file_path.is_file():
raise HTTPException(status_code=404, detail=f"File not found: {path}")
raw = file_path.read_text(encoding="utf-8", errors="replace")
return {"vault": vault_name, "path": path, "raw": raw}
@app.get("/api/file/{vault_name}/download")
async def api_file_download(vault_name: str, path: str = Query(..., description="Relative path to file")):
"""Download a file as an attachment.
Args:
vault_name: Name of the vault.
path: Relative file path within the vault.
Returns:
``FileResponse`` with ``application/octet-stream`` content-type.
"""
vault_data = get_vault_data(vault_name)
if not vault_data:
raise HTTPException(status_code=404, detail=f"Vault '{vault_name}' not found")
vault_root = Path(vault_data["path"])
file_path = _resolve_safe_path(vault_root, path)
if not file_path.exists() or not file_path.is_file():
raise HTTPException(status_code=404, detail=f"File not found: {path}")
return FileResponse(
path=str(file_path),
filename=file_path.name,
media_type="application/octet-stream",
)
@app.put("/api/file/{vault_name}/save", response_model=FileSaveResponse)
async def api_file_save(
vault_name: str,
path: str = Query(..., description="Relative path to file"),
body: dict = Body(...),
):
"""Save (overwrite) a file's content.
Expects a JSON body with a ``content`` key containing the new text.
The path is validated against traversal attacks before writing.
Args:
vault_name: Name of the vault.
path: Relative file path within the vault.
body: JSON body with ``content`` string.
Returns:
``FileSaveResponse`` confirming the write.
"""
vault_data = get_vault_data(vault_name)
if not vault_data:
raise HTTPException(status_code=404, detail=f"Vault '{vault_name}' not found")
vault_root = Path(vault_data["path"])
file_path = _resolve_safe_path(vault_root, path)
if not file_path.exists():
raise HTTPException(status_code=404, detail=f"File not found: {path}")
content = body.get('content', '')
try:
file_path.write_text(content, encoding="utf-8")
logger.info(f"File saved: {vault_name}/{path}")
return {"status": "ok", "vault": vault_name, "path": path, "size": len(content)}
except PermissionError:
raise HTTPException(status_code=403, detail="Permission denied: vault may be read-only")
except Exception as e:
logger.error(f"Error saving file {vault_name}/{path}: {e}")
raise HTTPException(status_code=500, detail=f"Error saving file: {str(e)}")
@app.delete("/api/file/{vault_name}", response_model=FileDeleteResponse)
async def api_file_delete(vault_name: str, path: str = Query(..., description="Relative path to file")):
"""Delete a file from the vault.
The path is validated against traversal attacks before deletion.
Args:
vault_name: Name of the vault.
path: Relative file path within the vault.
Returns:
``FileDeleteResponse`` confirming the deletion.
"""
vault_data = get_vault_data(vault_name)
if not vault_data:
raise HTTPException(status_code=404, detail=f"Vault '{vault_name}' not found")
vault_root = Path(vault_data["path"])
file_path = _resolve_safe_path(vault_root, path)
if not file_path.exists() or not file_path.is_file():
raise HTTPException(status_code=404, detail=f"File not found: {path}")
try:
file_path.unlink()
logger.info(f"File deleted: {vault_name}/{path}")
return {"status": "ok", "vault": vault_name, "path": path}
except PermissionError:
raise HTTPException(status_code=403, detail="Permission denied: vault may be read-only")
except Exception as e:
logger.error(f"Error deleting file {vault_name}/{path}: {e}")
raise HTTPException(status_code=500, detail=f"Error deleting file: {str(e)}")
@app.get("/api/file/{vault_name}", response_model=FileContentResponse)
async def api_file(vault_name: str, path: str = Query(..., description="Relative path to file")):
"""Return rendered HTML and metadata for a file.
Markdown files are parsed for frontmatter, rendered with wikilink
support, and returned with extracted tags. Other supported file
types are syntax-highlighted as code blocks.
Args:
vault_name: Name of the vault.
path: Relative file path within the vault.
Returns:
``FileContentResponse`` with HTML, metadata, and tags.
"""
vault_data = get_vault_data(vault_name)
if not vault_data:
raise HTTPException(status_code=404, detail=f"Vault '{vault_name}' not found")
vault_root = Path(vault_data["path"])
file_path = _resolve_safe_path(vault_root, path)
if not file_path.exists() or not file_path.is_file():
raise HTTPException(status_code=404, detail=f"File not found: {path}")
raw = file_path.read_text(encoding="utf-8", errors="replace")
ext = file_path.suffix.lower()
if ext == ".md":
post = parse_markdown_file(raw)
# Extract metadata using shared indexer logic
tags = _extract_tags(post)
title = post.metadata.get("title", file_path.stem.replace("-", " ").replace("_", " "))
html_content = _render_markdown(post.content, vault_name)
return {
"vault": vault_name,
"path": path,
"title": str(title),
"tags": tags,
"frontmatter": dict(post.metadata) if post.metadata else {},
"html": html_content,
"raw_length": len(raw),
"extension": ext,
"is_markdown": True,
}
else:
# Non-markdown: wrap in syntax-highlighted code block
lang = EXT_TO_LANG.get(ext, "plaintext")
escaped = html_mod.escape(raw)
html_content = f'{escaped}
'
return {
"vault": vault_name,
"path": path,
"title": file_path.name,
"tags": [],
"frontmatter": {},
"html": html_content,
"raw_length": len(raw),
"extension": ext,
"is_markdown": False,
}
@app.get("/api/search", response_model=SearchResponse)
async def api_search(
q: str = Query("", description="Search query"),
vault: str = Query("all", description="Vault filter"),
tag: Optional[str] = Query(None, description="Tag filter"),
):
"""Full-text search across vaults with relevance scoring.
Supports combining free-text queries with tag filters.
Results are ranked by a multi-factor scoring algorithm.
Args:
q: Free-text search string.
vault: Vault name or ``"all"`` to search everywhere.
tag: Comma-separated tag names to require.
Returns:
``SearchResponse`` with ranked results and snippets.
"""
results = search(q, vault_filter=vault, tag_filter=tag)
return {"query": q, "vault_filter": vault, "tag_filter": tag, "count": len(results), "results": results}
@app.get("/api/tags", response_model=TagsResponse)
async def api_tags(vault: Optional[str] = Query(None, description="Vault filter")):
"""Return all unique tags with occurrence counts.
Args:
vault: Optional vault name to restrict tag aggregation.
Returns:
``TagsResponse`` with tags sorted by descending count.
"""
tags = get_all_tags(vault_filter=vault)
return {"vault_filter": vault, "tags": tags}
@app.get("/api/tree-search", response_model=TreeSearchResponse)
async def api_tree_search(
q: str = Query("", description="Search query"),
vault: str = Query("all", description="Vault filter"),
):
"""Search for files and directories in the tree structure.
Searches through the file index for matching paths, returning
both files and their parent directories that match the query.
Args:
q: Search string to match against file/directory paths.
vault: Vault name or "all" to search everywhere.
Returns:
``TreeSearchResponse`` with matching paths and their parent directories.
"""
if not q:
return {"query": q, "vault_filter": vault, "results": []}
query_lower = q.lower()
results = []
seen_paths = set() # Avoid duplicates
vaults_to_search = [vault] if vault != "all" else list(index.keys())
for vault_name in vaults_to_search:
vault_data = get_vault_data(vault_name)
if not vault_data:
continue
vault_root = Path(vault_data["path"])
if not vault_root.exists():
continue
for fpath in vault_root.rglob("*"):
if fpath.name.startswith("."):
continue
try:
rel_path = str(fpath.relative_to(vault_root)).replace("\\", "/")
path_lower = rel_path.lower()
name_lower = fpath.name.lower()
if query_lower not in name_lower and query_lower not in path_lower:
continue
entry_type = "directory" if fpath.is_dir() else "file"
entry_key = f"{vault_name}:{entry_type}:{rel_path}"
if entry_key in seen_paths:
continue
seen_paths.add(entry_key)
results.append({
"vault": vault_name,
"path": rel_path,
"name": fpath.name,
"type": entry_type,
"matched_path": rel_path,
})
except PermissionError:
continue
except Exception:
continue
return {"query": q, "vault_filter": vault, "results": results}
@app.get("/api/index/reload", response_model=ReloadResponse)
async def api_reload():
"""Force a full re-index of all configured vaults.
Returns:
``ReloadResponse`` with per-vault file and tag counts.
"""
stats = await reload_index()
return {"status": "ok", "vaults": stats}
# ---------------------------------------------------------------------------
# Static files & SPA fallback
# ---------------------------------------------------------------------------
if FRONTEND_DIR.exists():
app.mount("/static", StaticFiles(directory=str(FRONTEND_DIR)), name="static")
@app.get("/{full_path:path}")
async def serve_spa(full_path: str):
"""Serve the SPA index.html for all non-API routes."""
index_file = FRONTEND_DIR / "index.html"
if index_file.exists():
return HTMLResponse(content=index_file.read_text(encoding="utf-8"))
raise HTTPException(status_code=404, detail="Frontend not found")