Improve image path regex to handle spaces, parentheses, and emojis in standard markdown image links with greedy matching and path trimming

This commit is contained in:
Bruno Charest 2026-03-23 11:08:26 -04:00
parent 175ac3dea8
commit ba6271b89b

View File

@ -165,12 +165,13 @@ def _process_standard_images(
Resolves the path and updates to use /api/image endpoint.
"""
# Pattern: ![alt](path)
pattern = r'!\[([^\]]*)\]\(([^)]+?\.(?:png|jpg|jpeg|gif|svg|webp|bmp|ico))\)'
# Pattern: ![alt](path) - match everything including spaces, parentheses, and emojis
# Captures from ]( to ) where the content ends with an image extension
pattern = r'!\[([^\]]*)\]\((.+\.(?:png|jpg|jpeg|gif|svg|webp|bmp|ico))\)'
def replace_standard_img(match):
alt_text = match.group(1)
image_path = match.group(2)
image_path = match.group(2).strip()
# Skip if it's already an absolute URL (http://, https://, //)
if re.match(r'^(https?://|//)', image_path):