import re
import logging
from pathlib import Path
from typing import Optional, Tuple
from html import escape as html_escape
from backend.attachment_indexer import resolve_image_path
logger = logging.getLogger("obsigate.image_processor")
def preprocess_images(
content: str,
vault_name: str,
vault_root: Path,
current_file_path: Optional[Path] = None,
attachments_path: Optional[str] = None
) -> str:
"""Preprocess markdown content to transform all Obsidian image syntaxes.
Handles 4 image syntax formats:
1. Standard Markdown with HTML attributes: [
](url)
2. Obsidian wiki-link embed with full path: ![[path/to/image.ext]]
3. Obsidian wiki-link embed with filename only: ![[image.ext]]
4. Standard Markdown image: 
All image paths are resolved using the multi-strategy resolver and
transformed to use the /api/image endpoint.
Args:
content: Raw markdown content.
vault_name: Name of the vault.
vault_root: Absolute path to vault root.
current_file_path: Absolute path to the current markdown file.
attachments_path: Optional configured attachments folder.
Returns:
Preprocessed markdown with resolved image paths.
"""
# Process in order of specificity to avoid conflicts
# 1. Handle [
](url) - HTML img in markdown link
content = _process_html_img_in_link(content, vault_name, vault_root, current_file_path, attachments_path)
# 2. Handle ![[image]] - Obsidian wiki-link embeds
content = _process_wikilink_embeds(content, vault_name, vault_root, current_file_path, attachments_path)
# 3. Handle  - Standard markdown images
content = _process_standard_images(content, vault_name, vault_root, current_file_path, attachments_path)
return content
def _process_html_img_in_link(
content: str,
vault_name: str,
vault_root: Path,
current_file_path: Optional[Path],
attachments_path: Optional[str]
) -> str:
"""Process [
](url) syntax.
Transforms to:
"""
# Pattern: [
](url)
pattern = r'\[
]*?)\s*/?>\]\(([^)]+)\)'
def replace_html_img(match):
img_attrs = match.group(1)
link_url = match.group(2)
# Extract src attribute
src_match = re.search(r'src\s*=\s*["\']([^"\']+)["\']', img_attrs)
if not src_match:
return match.group(0) # No src, return unchanged
src_path = src_match.group(1)
# Extract width and height if present
width_match = re.search(r'width\s*=\s*["\']([^"\']+)["\']', img_attrs)
height_match = re.search(r'height\s*=\s*["\']([^"\']+)["\']', img_attrs)
width = width_match.group(1) if width_match else None
height = height_match.group(1) if height_match else None
# Resolve the image path
resolved_path = resolve_image_path(
src_path, vault_name, vault_root, current_file_path, attachments_path
)
if resolved_path:
# Build the API URL
try:
rel_path = resolved_path.relative_to(vault_root)
api_url = f'/api/image/{vault_name}?path={str(rel_path).replace(chr(92), "/")}'
except ValueError:
# Path is outside vault - use absolute path encoding
api_url = f'/api/image/{vault_name}?path={str(resolved_path).replace(chr(92), "/")}'
# Build img tag with attributes
img_tag = f'
'
# Wrap in link
return f'{img_tag}'
else:
# Image not found - show placeholder
placeholder = f'[image not found: {html_escape(Path(src_path).name)}]'
return f'{placeholder}'
return re.sub(pattern, replace_html_img, content)
def _process_wikilink_embeds(
content: str,
vault_name: str,
vault_root: Path,
current_file_path: Optional[Path],
attachments_path: Optional[str]
) -> str:
"""Process ![[image]] and ![[path/to/image]] wiki-link embeds.
Transforms to: 
"""
# Pattern: ![[path/to/image.ext]] or ![[image.ext]]
pattern = r'!\[\[([^\]]+?\.(?:png|jpg|jpeg|gif|svg|webp|bmp|ico))\]\]'
def replace_wikilink(match):
image_target = match.group(1).strip()
# Resolve the image path
resolved_path = resolve_image_path(
image_target, vault_name, vault_root, current_file_path, attachments_path
)
if resolved_path:
# Build the API URL
try:
rel_path = resolved_path.relative_to(vault_root)
api_url = f'/api/image/{vault_name}?path={str(rel_path).replace(chr(92), "/")}'
except ValueError:
api_url = f'/api/image/{vault_name}?path={str(resolved_path).replace(chr(92), "/")}'
# Transform to standard markdown image
return f''
else:
# Image not found - show placeholder
return f'[image not found: {html_escape(Path(image_target).name)}]'
return re.sub(pattern, replace_wikilink, content, flags=re.IGNORECASE)
def _process_standard_images(
content: str,
vault_name: str,
vault_root: Path,
current_file_path: Optional[Path],
attachments_path: Optional[str]
) -> str:
"""Process  standard markdown images.
Resolves the path and updates to use /api/image endpoint.
"""
# Pattern: 
pattern = r'!\[([^\]]*)\]\(([^)]+?\.(?:png|jpg|jpeg|gif|svg|webp|bmp|ico))\)'
def replace_standard_img(match):
alt_text = match.group(1)
image_path = match.group(2)
# Skip if it's already an absolute URL (http://, https://, //)
if re.match(r'^(https?://|//)', image_path):
return match.group(0) # Keep external URLs unchanged
# Resolve the image path
resolved_path = resolve_image_path(
image_path, vault_name, vault_root, current_file_path, attachments_path
)
if resolved_path:
# Build the API URL
try:
rel_path = resolved_path.relative_to(vault_root)
api_url = f'/api/image/{vault_name}?path={str(rel_path).replace(chr(92), "/")}'
except ValueError:
api_url = f'/api/image/{vault_name}?path={str(resolved_path).replace(chr(92), "/")}'
return f''
else:
# Image not found - show placeholder
return f'[image not found: {html_escape(Path(image_path).name)}]'
return re.sub(pattern, replace_standard_img, content, flags=re.IGNORECASE)