Add regex search with highlighted snippet support
This commit is contained in:
parent
0630aeba77
commit
aa2c05b05f
@ -167,6 +167,69 @@ def _extract_highlighted_snippet(
|
|||||||
return prefix + highlighted + suffix
|
return prefix + highlighted + suffix
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_regex_snippet(
|
||||||
|
content: str,
|
||||||
|
pattern_text: str,
|
||||||
|
context_chars: int = SNIPPET_CONTEXT_CHARS,
|
||||||
|
max_highlights: int = MAX_SNIPPET_HIGHLIGHTS,
|
||||||
|
) -> str:
|
||||||
|
"""Extract a snippet and highlight actual regex matches.
|
||||||
|
|
||||||
|
Unlike ``_extract_highlighted_snippet`` which works with tokenized terms,
|
||||||
|
this function compiles the raw regex pattern and wraps each match in
|
||||||
|
``<mark>`` tags. Falls back to the beginning of content if no match.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
content: Full text to search within.
|
||||||
|
pattern_text: Raw regex pattern string.
|
||||||
|
context_chars: Number of context characters on each side.
|
||||||
|
max_highlights: Maximum highlighted regions.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
HTML snippet string with ``<mark>`` highlights.
|
||||||
|
"""
|
||||||
|
if not content or not pattern_text:
|
||||||
|
return content[:200].strip() if content else ""
|
||||||
|
|
||||||
|
try:
|
||||||
|
pattern = re.compile(pattern_text, re.IGNORECASE)
|
||||||
|
except re.error:
|
||||||
|
return _escape_html(content[:200].strip())
|
||||||
|
|
||||||
|
matches = list(pattern.finditer(content))
|
||||||
|
if not matches:
|
||||||
|
return _escape_html(content[:200].strip())
|
||||||
|
|
||||||
|
# Find the first match position for centering the snippet
|
||||||
|
best_pos = matches[0].start()
|
||||||
|
start = max(0, best_pos - context_chars)
|
||||||
|
end = min(len(content), best_pos + context_chars + 40)
|
||||||
|
snippet = content[start:end].strip()
|
||||||
|
prefix = "..." if start > 0 else ""
|
||||||
|
suffix = "..." if end < len(content) else ""
|
||||||
|
|
||||||
|
# Highlight regex matches in the snippet (re-compile on snippet for correct positions)
|
||||||
|
snippet_matches = list(pattern.finditer(snippet))
|
||||||
|
if not snippet_matches:
|
||||||
|
return prefix + _escape_html(snippet) + suffix
|
||||||
|
|
||||||
|
parts = []
|
||||||
|
prev = 0
|
||||||
|
count = 0
|
||||||
|
for m in snippet_matches:
|
||||||
|
if count >= max_highlights:
|
||||||
|
break
|
||||||
|
if m.start() > prev:
|
||||||
|
parts.append(_escape_html(snippet[prev:m.start()]))
|
||||||
|
parts.append(f"<mark>{_escape_html(snippet[m.start():m.end()])}</mark>")
|
||||||
|
prev = m.end()
|
||||||
|
count += 1
|
||||||
|
if prev < len(snippet):
|
||||||
|
parts.append(_escape_html(snippet[prev:]))
|
||||||
|
|
||||||
|
return prefix + "".join(parts) + suffix
|
||||||
|
|
||||||
|
|
||||||
def _highlight_terms(text: str, terms: List[str], max_highlights: int) -> str:
|
def _highlight_terms(text: str, terms: List[str], max_highlights: int) -> str:
|
||||||
"""Wrap occurrences of *terms* in *text* with ``<mark>`` tags.
|
"""Wrap occurrences of *terms* in *text* with ``<mark>`` tags.
|
||||||
|
|
||||||
@ -1005,7 +1068,11 @@ def advanced_search(
|
|||||||
# Build highlighted snippet
|
# Build highlighted snippet
|
||||||
content = file_info.get("content", "")
|
content = file_info.get("content", "")
|
||||||
if has_terms:
|
if has_terms:
|
||||||
snippet = _extract_highlighted_snippet(content, query_terms)
|
if regex:
|
||||||
|
raw_regex = " ".join(query_terms_raw) if query_terms_raw else ""
|
||||||
|
snippet = _extract_regex_snippet(content, raw_regex)
|
||||||
|
else:
|
||||||
|
snippet = _extract_highlighted_snippet(content, query_terms)
|
||||||
else:
|
else:
|
||||||
snippet = _escape_html(content[:200].strip()) if content else ""
|
snippet = _escape_html(content[:200].strip()) if content else ""
|
||||||
|
|
||||||
|
|||||||
@ -5173,7 +5173,9 @@
|
|||||||
titleDiv.textContent = r.title;
|
titleDiv.textContent = r.title;
|
||||||
}
|
}
|
||||||
const snippetDiv = el("div", { class: "search-result-snippet" });
|
const snippetDiv = el("div", { class: "search-result-snippet" });
|
||||||
if (query && query.trim() && r.snippet) {
|
if (r.snippet && r.snippet.includes("<mark>")) {
|
||||||
|
snippetDiv.innerHTML = r.snippet;
|
||||||
|
} else if (query && query.trim() && r.snippet) {
|
||||||
highlightSearchText(snippetDiv, r.snippet, query, searchCaseSensitive);
|
highlightSearchText(snippetDiv, r.snippet, query, searchCaseSensitive);
|
||||||
} else {
|
} else {
|
||||||
snippetDiv.textContent = r.snippet || "";
|
snippetDiv.textContent = r.snippet || "";
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user