199 lines
7.6 KiB
Python
199 lines
7.6 KiB
Python
import re
|
|
import logging
|
|
from pathlib import Path
|
|
from typing import Optional, Tuple
|
|
from html import escape as html_escape
|
|
|
|
from backend.attachment_indexer import resolve_image_path
|
|
|
|
logger = logging.getLogger("obsigate.image_processor")
|
|
|
|
|
|
def preprocess_images(
|
|
content: str,
|
|
vault_name: str,
|
|
vault_root: Path,
|
|
current_file_path: Optional[Path] = None,
|
|
attachments_path: Optional[str] = None
|
|
) -> str:
|
|
"""Preprocess markdown content to transform all Obsidian image syntaxes.
|
|
|
|
Handles 4 image syntax formats:
|
|
1. Standard Markdown with HTML attributes: [<img ... src="path"/>](url)
|
|
2. Obsidian wiki-link embed with full path: ![[path/to/image.ext]]
|
|
3. Obsidian wiki-link embed with filename only: ![[image.ext]]
|
|
4. Standard Markdown image: 
|
|
|
|
All image paths are resolved using the multi-strategy resolver and
|
|
transformed to use the /api/image endpoint.
|
|
|
|
Args:
|
|
content: Raw markdown content.
|
|
vault_name: Name of the vault.
|
|
vault_root: Absolute path to vault root.
|
|
current_file_path: Absolute path to the current markdown file.
|
|
attachments_path: Optional configured attachments folder.
|
|
|
|
Returns:
|
|
Preprocessed markdown with resolved image paths.
|
|
"""
|
|
# Process in order of specificity to avoid conflicts
|
|
|
|
# 1. Handle [<img ... src="path"/>](url) - HTML img in markdown link
|
|
content = _process_html_img_in_link(content, vault_name, vault_root, current_file_path, attachments_path)
|
|
|
|
# 2. Handle ![[image]] - Obsidian wiki-link embeds
|
|
content = _process_wikilink_embeds(content, vault_name, vault_root, current_file_path, attachments_path)
|
|
|
|
# 3. Handle  - Standard markdown images
|
|
content = _process_standard_images(content, vault_name, vault_root, current_file_path, attachments_path)
|
|
|
|
return content
|
|
|
|
|
|
def _process_html_img_in_link(
|
|
content: str,
|
|
vault_name: str,
|
|
vault_root: Path,
|
|
current_file_path: Optional[Path],
|
|
attachments_path: Optional[str]
|
|
) -> str:
|
|
"""Process [<img ... src="path"/>](url) syntax.
|
|
|
|
Transforms to: <a href="url"><img src="/api/image/vault?path=resolved" width="..." height="..."/></a>
|
|
"""
|
|
# Pattern: [<img ... src="path" ... />](url)
|
|
pattern = r'\[<img\s+([^>]*?)\s*/?>\]\(([^)]+)\)'
|
|
|
|
def replace_html_img(match):
|
|
img_attrs = match.group(1)
|
|
link_url = match.group(2)
|
|
|
|
# Extract src attribute
|
|
src_match = re.search(r'src\s*=\s*["\']([^"\']+)["\']', img_attrs)
|
|
if not src_match:
|
|
return match.group(0) # No src, return unchanged
|
|
|
|
src_path = src_match.group(1)
|
|
|
|
# Extract width and height if present
|
|
width_match = re.search(r'width\s*=\s*["\']([^"\']+)["\']', img_attrs)
|
|
height_match = re.search(r'height\s*=\s*["\']([^"\']+)["\']', img_attrs)
|
|
|
|
width = width_match.group(1) if width_match else None
|
|
height = height_match.group(1) if height_match else None
|
|
|
|
# Resolve the image path
|
|
resolved_path = resolve_image_path(
|
|
src_path, vault_name, vault_root, current_file_path, attachments_path
|
|
)
|
|
|
|
if resolved_path:
|
|
# Build the API URL
|
|
try:
|
|
rel_path = resolved_path.relative_to(vault_root)
|
|
api_url = f'/api/image/{vault_name}?path={str(rel_path).replace(chr(92), "/")}'
|
|
except ValueError:
|
|
# Path is outside vault - use absolute path encoding
|
|
api_url = f'/api/image/{vault_name}?path={str(resolved_path).replace(chr(92), "/")}'
|
|
|
|
# Build img tag with attributes
|
|
img_tag = f'<img src="{api_url}"'
|
|
if width:
|
|
img_tag += f' width="{html_escape(width)}"'
|
|
if height:
|
|
img_tag += f' height="{html_escape(height)}"'
|
|
img_tag += ' />'
|
|
|
|
# Wrap in link
|
|
return f'<a href="{html_escape(link_url)}">{img_tag}</a>'
|
|
else:
|
|
# Image not found - show placeholder
|
|
placeholder = f'<span class="image-not-found" title="Image not found: {html_escape(src_path)}">[image not found: {html_escape(Path(src_path).name)}]</span>'
|
|
return f'<a href="{html_escape(link_url)}">{placeholder}</a>'
|
|
|
|
return re.sub(pattern, replace_html_img, content)
|
|
|
|
|
|
def _process_wikilink_embeds(
|
|
content: str,
|
|
vault_name: str,
|
|
vault_root: Path,
|
|
current_file_path: Optional[Path],
|
|
attachments_path: Optional[str]
|
|
) -> str:
|
|
"""Process ![[image]] and ![[path/to/image]] wiki-link embeds.
|
|
|
|
Transforms to: 
|
|
"""
|
|
# Pattern: ![[path/to/image.ext]] or ![[image.ext]]
|
|
pattern = r'!\[\[([^\]]+?\.(?:png|jpg|jpeg|gif|svg|webp|bmp|ico))\]\]'
|
|
|
|
def replace_wikilink(match):
|
|
image_target = match.group(1).strip()
|
|
|
|
# Resolve the image path
|
|
resolved_path = resolve_image_path(
|
|
image_target, vault_name, vault_root, current_file_path, attachments_path
|
|
)
|
|
|
|
if resolved_path:
|
|
# Build the API URL
|
|
try:
|
|
rel_path = resolved_path.relative_to(vault_root)
|
|
api_url = f'/api/image/{vault_name}?path={str(rel_path).replace(chr(92), "/")}'
|
|
except ValueError:
|
|
api_url = f'/api/image/{vault_name}?path={str(resolved_path).replace(chr(92), "/")}'
|
|
|
|
# Transform to standard markdown image
|
|
return f''
|
|
else:
|
|
# Image not found - show placeholder
|
|
return f'<span class="image-not-found" title="Image not found: {html_escape(image_target)}">[image not found: {html_escape(Path(image_target).name)}]</span>'
|
|
|
|
return re.sub(pattern, replace_wikilink, content, flags=re.IGNORECASE)
|
|
|
|
|
|
def _process_standard_images(
|
|
content: str,
|
|
vault_name: str,
|
|
vault_root: Path,
|
|
current_file_path: Optional[Path],
|
|
attachments_path: Optional[str]
|
|
) -> str:
|
|
"""Process  standard markdown images.
|
|
|
|
Resolves the path and updates to use /api/image endpoint.
|
|
"""
|
|
# Pattern:  - match everything including spaces, parentheses, and emojis
|
|
# Captures from ]( to ) where the content ends with an image extension
|
|
pattern = r'!\[([^\]]*)\]\((.+\.(?:png|jpg|jpeg|gif|svg|webp|bmp|ico))\)'
|
|
|
|
def replace_standard_img(match):
|
|
alt_text = match.group(1)
|
|
image_path = match.group(2).strip()
|
|
|
|
# Skip if it's already an absolute URL (http://, https://, //)
|
|
if re.match(r'^(https?://|//)', image_path):
|
|
return match.group(0) # Keep external URLs unchanged
|
|
|
|
# Resolve the image path
|
|
resolved_path = resolve_image_path(
|
|
image_path, vault_name, vault_root, current_file_path, attachments_path
|
|
)
|
|
|
|
if resolved_path:
|
|
# Build the API URL
|
|
try:
|
|
rel_path = resolved_path.relative_to(vault_root)
|
|
api_url = f'/api/image/{vault_name}?path={str(rel_path).replace(chr(92), "/")}'
|
|
except ValueError:
|
|
api_url = f'/api/image/{vault_name}?path={str(resolved_path).replace(chr(92), "/")}'
|
|
|
|
return f''
|
|
else:
|
|
# Image not found - show placeholder
|
|
return f'<span class="image-not-found" title="Image not found: {html_escape(image_path)}">[image not found: {html_escape(Path(image_path).name)}]</span>'
|
|
|
|
return re.sub(pattern, replace_standard_img, content, flags=re.IGNORECASE)
|