From ba6271b89b5edd1c78b7c7428fdbc0b7933e6fc7 Mon Sep 17 00:00:00 2001 From: Bruno Charest Date: Mon, 23 Mar 2026 11:08:26 -0400 Subject: [PATCH] Improve image path regex to handle spaces, parentheses, and emojis in standard markdown image links with greedy matching and path trimming --- backend/image_processor.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/backend/image_processor.py b/backend/image_processor.py index 9dbce44..8d0c2f6 100644 --- a/backend/image_processor.py +++ b/backend/image_processor.py @@ -165,12 +165,13 @@ def _process_standard_images( Resolves the path and updates to use /api/image endpoint. """ - # Pattern: ![alt](path) - pattern = r'!\[([^\]]*)\]\(([^)]+?\.(?:png|jpg|jpeg|gif|svg|webp|bmp|ico))\)' + # Pattern: ![alt](path) - match everything including spaces, parentheses, and emojis + # Captures from ]( to ) where the content ends with an image extension + pattern = r'!\[([^\]]*)\]\((.+\.(?:png|jpg|jpeg|gif|svg|webp|bmp|ico))\)' def replace_standard_img(match): alt_text = match.group(1) - image_path = match.group(2) + image_path = match.group(2).strip() # Skip if it's already an absolute URL (http://, https://, //) if re.match(r'^(https?://|//)', image_path):