From aa2c05b05fba98cddf809089a48611576cb2d470 Mon Sep 17 00:00:00 2001
From: Bruno Charest <bruno.charest@gmail.com>
Date: Wed, 27 May 2026 08:15:39 -0400
Subject: [PATCH] Add regex search with highlighted snippet support

---
 backend/search.py | 69 ++++++++++++++++++++++++++++++++++++++++++++++-
 frontend/app.js   |  4 ++-
 2 files changed, 71 insertions(+), 2 deletions(-)
diff --git a/backend/search.py b/backend/search.py
index d7fa3ee..5a84b70 100644
--- a/backend/search.py
+++ b/backend/search.py
@@ -167,6 +167,69 @@ def _extract_highlighted_snippet(
     return prefix + highlighted + suffix
 
 
+def _extract_regex_snippet(
+    content: str,
+    pattern_text: str,
+    context_chars: int = SNIPPET_CONTEXT_CHARS,
+    max_highlights: int = MAX_SNIPPET_HIGHLIGHTS,
+) -> str:
+    """Extract a snippet and highlight actual regex matches.
+
+    Unlike ``_extract_highlighted_snippet`` which works with tokenized terms,
+    this function compiles the raw regex pattern and wraps each match in
+    ``<mark>`` tags.  Falls back to the beginning of content if no match.
+
+    Args:
+        content: Full text to search within.
+        pattern_text: Raw regex pattern string.
+        context_chars: Number of context characters on each side.
+        max_highlights: Maximum highlighted regions.
+
+    Returns:
+        HTML snippet string with ``<mark>`` highlights.
+    """
+    if not content or not pattern_text:
+        return content[:200].strip() if content else ""
+
+    try:
+        pattern = re.compile(pattern_text, re.IGNORECASE)
+    except re.error:
+        return _escape_html(content[:200].strip())
+
+    matches = list(pattern.finditer(content))
+    if not matches:
+        return _escape_html(content[:200].strip())
+
+    # Find the first match position for centering the snippet
+    best_pos = matches[0].start()
+    start = max(0, best_pos - context_chars)
+    end = min(len(content), best_pos + context_chars + 40)
+    snippet = content[start:end].strip()
+    prefix = "..." if start > 0 else ""
+    suffix = "..." if end < len(content) else ""
+
+    # Highlight regex matches in the snippet (re-compile on snippet for correct positions)
+    snippet_matches = list(pattern.finditer(snippet))
+    if not snippet_matches:
+        return prefix + _escape_html(snippet) + suffix
+
+    parts = []
+    prev = 0
+    count = 0
+    for m in snippet_matches:
+        if count >= max_highlights:
+            break
+        if m.start() > prev:
+            parts.append(_escape_html(snippet[prev:m.start()]))
+        parts.append(f"<mark>{_escape_html(snippet[m.start():m.end()])}</mark>")
+        prev = m.end()
+        count += 1
+    if prev < len(snippet):
+        parts.append(_escape_html(snippet[prev:]))
+
+    return prefix + "".join(parts) + suffix
+
+
 def _highlight_terms(text: str, terms: List[str], max_highlights: int) -> str:
     """Wrap occurrences of *terms* in *text* with ``<mark>`` tags.
 
@@ -1005,7 +1068,11 @@ def advanced_search(
             # Build highlighted snippet
             content = file_info.get("content", "")
             if has_terms:
-                snippet = _extract_highlighted_snippet(content, query_terms)
+                if regex:
+                    raw_regex = " ".join(query_terms_raw) if query_terms_raw else ""
+                    snippet = _extract_regex_snippet(content, raw_regex)
+                else:
+                    snippet = _extract_highlighted_snippet(content, query_terms)
             else:
                 snippet = _escape_html(content[:200].strip()) if content else ""
 
diff --git a/frontend/app.js b/frontend/app.js
index 90545f5..6c14cc2 100644
--- a/frontend/app.js
+++ b/frontend/app.js
@@ -5173,7 +5173,9 @@
         titleDiv.textContent = r.title;
       }
       const snippetDiv = el("div", { class: "search-result-snippet" });
-      if (query && query.trim() && r.snippet) {
+      if (r.snippet && r.snippet.includes("<mark>")) {
+        snippetDiv.innerHTML = r.snippet;
+      } else if (query && query.trim() && r.snippet) {
         highlightSearchText(snippetDiv, r.snippet, query, searchCaseSensitive);
       } else {
         snippetDiv.textContent = r.snippet || "";