From e171a0dc35e41caa519a9db97be24833f27126d1 Mon Sep 17 00:00:00 2001 From: Bruno Charest Date: Mon, 23 Mar 2026 12:09:46 -0400 Subject: [PATCH] Add advanced TF-IDF search with autocomplete, query operators, facets, pagination, and accent normalization --- README.md | 60 +++- backend/main.py | 131 ++++++- backend/search.py | 658 ++++++++++++++++++++++++++++++++++- frontend/app.js | 809 +++++++++++++++++++++++++++++++++++++++++--- frontend/index.html | 25 ++ frontend/style.css | 328 ++++++++++++++++++ 6 files changed, 1965 insertions(+), 46 deletions(-) diff --git a/README.md b/README.md index 8c4affa..adcf54d 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,10 @@ - **đŸ—‚ïž Multi-vault** : Visualisez plusieurs vaults Obsidian simultanĂ©ment - **🌳 Navigation arborescente** : Parcourez vos dossiers et fichiers dans la sidebar -- **🔍 Recherche fulltext** : Recherche instantanĂ©e dans le contenu et les titres +- **🔍 Recherche avancĂ©e** : Moteur TF-IDF avec normalisation des accents, snippets surlignĂ©s, facettes, pagination et tri +- **💡 AutocomplĂ©tion intelligente** : Suggestions de fichiers, tags et historique avec navigation clavier +- **đŸ§© Syntaxe de requĂȘte** : OpĂ©rateurs `tag:`, `#`, `vault:`, `title:`, `path:` avec chips visuels +- **📜 Historique de recherche** : PersistĂ© en localStorage (max 50 entrĂ©es, LIFO, dĂ©dupliquĂ©) - **đŸ·ïž Tag cloud** : Filtrage par tags extraits des frontmatters YAML - **🔗 Wikilinks** : Les `[[liens internes]]` Obsidian sont cliquables - **đŸ–Œïž Images Obsidian** : Support complet des syntaxes d'images Obsidian avec rĂ©solution intelligente @@ -347,7 +350,10 @@ ObsiGate expose une API REST complĂšte : | `/api/file/{vault}/download?path=` | TĂ©lĂ©chargement d'un fichier | GET | | `/api/file/{vault}/save?path=` | Sauvegarder un fichier | PUT | | `/api/file/{vault}?path=` | Supprimer un fichier | DELETE | -| `/api/search?q=&vault=&tag=` | Recherche fulltext | GET | +| `/api/search?q=&vault=&tag=` | Recherche fulltext (legacy) | GET | +| `/api/search/advanced?q=&vault=&tag=&limit=&offset=&sort=` | Recherche avancĂ©e TF-IDF avec facettes et pagination | GET | +| `/api/suggest?q=&vault=&limit=` | Suggestions de titres de fichiers (autocomplĂ©tion) | GET | +| `/api/tags/suggest?q=&vault=&limit=` | Suggestions de tags (autocomplĂ©tion) | GET | | `/api/tags?vault=` | Tags uniques avec compteurs | GET | | `/api/index/reload` | Force un re-scan des vaults | GET | | `/api/image/{vault}?path=` | Servir une image avec MIME type appropriĂ© | GET | @@ -364,15 +370,63 @@ curl http://localhost:2020/api/health # Lister les vaults curl http://localhost:2020/api/vaults -# Rechercher +# Recherche simple (legacy) curl "http://localhost:2020/api/search?q=recette&vault=all" +# Recherche avancĂ©e avec TF-IDF, facettes et pagination +curl "http://localhost:2020/api/search/advanced?q=recette%20tag:cuisine&vault=all&limit=20&offset=0&sort=relevance" + +# AutocomplĂ©tion de titres +curl "http://localhost:2020/api/suggest?q=piz&vault=all" + +# AutocomplĂ©tion de tags +curl "http://localhost:2020/api/tags/suggest?q=rec&vault=all" + # Obtenir un fichier curl "http://localhost:2020/api/file/Recettes?path=pizza.md" ``` --- +## 🔍 Recherche avancĂ©e + +### Syntaxe de requĂȘte + +| OpĂ©rateur | Description | Exemple | +|-----------|-------------|---------| +| `tag:` | Filtrer par tag | `tag:recette docker` | +| `#` | Raccourci tag | `#linux serveur` | +| `vault:` | Filtrer par vault | `vault:IT kubernetes` | +| `title:` | Filtrer par titre | `title:pizza` | +| `path:` | Filtrer par chemin | `path:recettes/soupes` | +| `"phrase exacte"` | Recherche de phrase | `tag:"multi mots"` | + +Les opĂ©rateurs sont combinables : `tag:linux vault:IT serveur web` recherche "serveur web" dans le vault IT avec le tag linux. + +### Raccourcis clavier + +| Raccourci | Action | +|-----------|--------| +| `Ctrl+K` / `Cmd+K` | Focaliser la barre de recherche | +| `/` | Focaliser la recherche (hors champ texte) | +| `↑` / `↓` | Naviguer dans les suggestions | +| `Enter` | SĂ©lectionner la suggestion active ou lancer la recherche | +| `Escape` | Fermer les suggestions / quitter la recherche | + +### FonctionnalitĂ©s + +- **TF-IDF** : Scoring basĂ© sur la frĂ©quence des termes pondĂ©rĂ©e par l'inverse de la frĂ©quence documentaire +- **Boost titre** : Les correspondances dans le titre reçoivent un score 3× supĂ©rieur +- **Normalisation des accents** : `resume` trouve `rĂ©sumĂ©`, `elephant` trouve `Ă©lĂ©phant` +- **Snippets surlignĂ©s** : Les termes trouvĂ©s sont encadrĂ©s par `` dans les extraits +- **Facettes** : Compteurs par vault et par tag dans les rĂ©sultats +- **Pagination** : Navigation par pages de 50 rĂ©sultats +- **Tri** : Par pertinence (TF-IDF) ou par date de modification +- **Chips visuels** : Les filtres actifs sont affichĂ©s comme des chips colorĂ©s supprimables +- **Historique** : Les 50 derniĂšres recherches sont mĂ©morisĂ©es en localStorage + +--- + ## 🔧 DĂ©pannage ### ProblĂšmes courants diff --git a/backend/main.py b/backend/main.py index b717852..301e01a 100644 --- a/backend/main.py +++ b/backend/main.py @@ -25,7 +25,7 @@ from backend.indexer import ( _extract_tags, SUPPORTED_EXTENSIONS, ) -from backend.search import search, get_all_tags +from backend.search import search, get_all_tags, advanced_search, suggest_titles, suggest_tags from backend.image_processor import preprocess_images from backend.attachment_indexer import rescan_vault_attachments, get_attachment_stats @@ -141,6 +141,57 @@ class TreeSearchResponse(BaseModel): results: List[TreeSearchResult] +class AdvancedSearchResultItem(BaseModel): + """A single advanced search result with highlighted snippet.""" + vault: str + path: str + title: str + tags: List[str] + score: float + snippet: str + modified: str + + +class SearchFacets(BaseModel): + """Faceted counts for search results.""" + tags: Dict[str, int] = Field(default_factory=dict) + vaults: Dict[str, int] = Field(default_factory=dict) + + +class AdvancedSearchResponse(BaseModel): + """Advanced search response with TF-IDF scoring, facets, and pagination.""" + results: List[AdvancedSearchResultItem] + total: int + offset: int + limit: int + facets: SearchFacets + + +class TitleSuggestion(BaseModel): + """A file title suggestion for autocomplete.""" + vault: str + path: str + title: str + + +class SuggestResponse(BaseModel): + """Autocomplete suggestions for file titles.""" + query: str + suggestions: List[TitleSuggestion] + + +class TagSuggestion(BaseModel): + """A tag suggestion for autocomplete.""" + tag: str + count: int + + +class TagSuggestResponse(BaseModel): + """Autocomplete suggestions for tags.""" + query: str + suggestions: List[TagSuggestion] + + class ReloadResponse(BaseModel): """Index reload confirmation with per-vault stats.""" status: str @@ -711,6 +762,84 @@ async def api_tree_search( return {"query": q, "vault_filter": vault, "results": results} +@app.get("/api/search/advanced", response_model=AdvancedSearchResponse) +async def api_advanced_search( + q: str = Query("", description="Advanced search query (supports tag:, vault:, title:, path: operators)"), + vault: str = Query("all", description="Vault filter"), + tag: Optional[str] = Query(None, description="Comma-separated tag filter"), + limit: int = Query(50, ge=1, le=200, description="Results per page"), + offset: int = Query(0, ge=0, description="Pagination offset"), + sort: str = Query("relevance", description="Sort by 'relevance' or 'modified'"), +): + """Advanced full-text search with TF-IDF scoring, facets, and pagination. + + Supports advanced query operators: + - ``tag:`` or ``#`` — filter by tag + - ``vault:`` — filter by vault + - ``title:`` — filter by title substring + - ``path:`` — filter by path substring + - Remaining text is scored using TF-IDF with accent normalization. + + Results include ````-highlighted snippets and faceted tag/vault counts. + + Args: + q: Query string with optional operators. + vault: Vault name or ``"all"``. + tag: Extra comma-separated tag names to require. + limit: Max results per page (1–200). + offset: Pagination offset. + sort: ``"relevance"`` (TF-IDF) or ``"modified"`` (date). + + Returns: + ``AdvancedSearchResponse`` with scored results, facets, and pagination info. + """ + return advanced_search(q, vault_filter=vault, tag_filter=tag, limit=limit, offset=offset, sort_by=sort) + + +@app.get("/api/suggest", response_model=SuggestResponse) +async def api_suggest( + q: str = Query("", description="Prefix to search for in file titles"), + vault: str = Query("all", description="Vault filter"), + limit: int = Query(10, ge=1, le=50, description="Max suggestions"), +): + """Suggest file titles matching a prefix (accent-insensitive). + + Used for autocomplete in the search input. + + Args: + q: User-typed prefix (minimum 2 characters). + vault: Vault name or ``"all"``. + limit: Max number of suggestions. + + Returns: + ``SuggestResponse`` with matching file title suggestions. + """ + suggestions = suggest_titles(q, vault_filter=vault, limit=limit) + return {"query": q, "suggestions": suggestions} + + +@app.get("/api/tags/suggest", response_model=TagSuggestResponse) +async def api_tags_suggest( + q: str = Query("", description="Prefix to search for in tags"), + vault: str = Query("all", description="Vault filter"), + limit: int = Query(10, ge=1, le=50, description="Max suggestions"), +): + """Suggest tags matching a prefix (accent-insensitive). + + Used for autocomplete when typing ``tag:`` or ``#`` in the search input. + + Args: + q: User-typed prefix (with or without ``#``, minimum 2 characters). + vault: Vault name or ``"all"``. + limit: Max number of suggestions. + + Returns: + ``TagSuggestResponse`` with matching tag suggestions and counts. + """ + suggestions = suggest_tags(q, vault_filter=vault, limit=limit) + return {"query": q, "suggestions": suggestions} + + @app.get("/api/index/reload", response_model=ReloadResponse) async def api_reload(): """Force a full re-index of all configured vaults. diff --git a/backend/search.py b/backend/search.py index 6760f40..29a7956 100644 --- a/backend/search.py +++ b/backend/search.py @@ -1,14 +1,70 @@ import logging -from typing import List, Dict, Any, Optional +import math +import re +import unicodedata +from collections import defaultdict +from typing import List, Dict, Any, Optional, Tuple from backend.indexer import index logger = logging.getLogger("obsigate.search") -# Default maximum number of search results returned +# --------------------------------------------------------------------------- +# Constants +# --------------------------------------------------------------------------- DEFAULT_SEARCH_LIMIT = 200 +ADVANCED_SEARCH_DEFAULT_LIMIT = 50 +SNIPPET_CONTEXT_CHARS = 120 +MAX_SNIPPET_HIGHLIGHTS = 5 +TITLE_BOOST = 3.0 # TF-IDF multiplier for title matches +PATH_BOOST = 1.5 # TF-IDF multiplier for path matches +TAG_BOOST = 2.0 # TF-IDF multiplier for tag matches +MIN_PREFIX_LENGTH = 2 # Minimum chars for prefix matching +SUGGEST_LIMIT = 10 # Default max suggestions returned + +# Regex to tokenize text into alphanumeric words (Unicode-aware) +_WORD_RE = re.compile(r"[\w]+", re.UNICODE) +# --------------------------------------------------------------------------- +# Accent / Unicode normalization helpers +# --------------------------------------------------------------------------- +def normalize_text(text: str) -> str: + """Normalize text for accent-insensitive comparison. + + Decomposes Unicode characters (NFD), strips combining diacritical marks, + then lowercases the result. For example ``"ÉlĂ©phant"`` → ``"elephant"``. + + Args: + text: Raw input string. + + Returns: + Lowercased, accent-stripped string. + """ + if not text: + return "" + # NFD decomposition splits base char + combining mark + nfkd = unicodedata.normalize("NFKD", text) + # Strip combining marks (category "Mn" = Mark, Nonspacing) + stripped = "".join(ch for ch in nfkd if unicodedata.category(ch) != "Mn") + return stripped.lower() + + +def tokenize(text: str) -> List[str]: + """Split text into normalized tokens (accent-stripped, lowercased words). + + Args: + text: Raw text to tokenize. + + Returns: + List of normalized word tokens. + """ + return _WORD_RE.findall(normalize_text(text)) + + +# --------------------------------------------------------------------------- +# Tag filter helper (unchanged for backward compat) +# --------------------------------------------------------------------------- def _normalize_tag_filter(tag_filter: Optional[str]) -> List[str]: """Parse a comma-separated tag filter string into a clean list. @@ -25,7 +81,10 @@ def _normalize_tag_filter(tag_filter: Optional[str]) -> List[str]: return [tag.strip().lstrip("#") for tag in tag_filter.split(",") if tag.strip()] -def _extract_snippet(content: str, query: str, context_chars: int = 120) -> str: +# --------------------------------------------------------------------------- +# Snippet extraction helpers +# --------------------------------------------------------------------------- +def _extract_snippet(content: str, query: str, context_chars: int = SNIPPET_CONTEXT_CHARS) -> str: """Extract a text snippet around the first occurrence of *query*. Returns up to ``context_chars`` characters before and after the match. @@ -57,6 +116,263 @@ def _extract_snippet(content: str, query: str, context_chars: int = 120) -> str: return snippet +def _extract_highlighted_snippet( + content: str, + query_terms: List[str], + context_chars: int = SNIPPET_CONTEXT_CHARS, + max_highlights: int = MAX_SNIPPET_HIGHLIGHTS, +) -> str: + """Extract a snippet and wrap matching terms in ```` tags. + + Performs accent-normalized matching so ``"resume"`` highlights ``"rĂ©sumĂ©"``. + Returns at most *max_highlights* highlighted regions to keep snippets concise. + + Args: + content: Full text to search within. + query_terms: Normalized search terms. + context_chars: Number of context characters on each side. + max_highlights: Maximum highlighted regions. + + Returns: + HTML snippet string with ```` highlights. + """ + if not content or not query_terms: + return content[:200].strip() if content else "" + + norm_content = normalize_text(content) + + # Find best position — first occurrence of any query term + best_pos = len(content) + for term in query_terms: + pos = norm_content.find(term) + if pos != -1 and pos < best_pos: + best_pos = pos + + if best_pos == len(content): + # No match found — return beginning of content + return _escape_html(content[:200].strip()) + + start = max(0, best_pos - context_chars) + end = min(len(content), best_pos + context_chars + 40) + raw_snippet = content[start:end].strip() + + prefix = "..." if start > 0 else "" + suffix = "..." if end < len(content) else "" + + # Highlight all term occurrences in the snippet + highlighted = _highlight_terms(raw_snippet, query_terms, max_highlights) + return prefix + highlighted + suffix + + +def _highlight_terms(text: str, terms: List[str], max_highlights: int) -> str: + """Wrap occurrences of *terms* in *text* with ```` tags. + + Uses accent-normalized comparison so diacritical variants are matched. + Escapes HTML in non-highlighted portions to prevent XSS. + + Args: + text: Raw text snippet. + terms: Normalized search terms. + max_highlights: Cap on highlighted regions. + + Returns: + HTML-safe string with ```` wrapped matches. + """ + if not terms or not text: + return _escape_html(text) + + norm = normalize_text(text) + # Collect (start, end) spans for all term matches + spans: List[Tuple[int, int]] = [] + for term in terms: + idx = 0 + while idx < len(norm): + pos = norm.find(term, idx) + if pos == -1: + break + spans.append((pos, pos + len(term))) + idx = pos + 1 + + if not spans: + return _escape_html(text) + + # Merge overlapping spans and limit count + spans.sort() + merged: List[Tuple[int, int]] = [spans[0]] + for s, e in spans[1:]: + if s <= merged[-1][1]: + merged[-1] = (merged[-1][0], max(merged[-1][1], e)) + else: + merged.append((s, e)) + merged = merged[:max_highlights] + + # Build result with highlights + parts: List[str] = [] + prev = 0 + for s, e in merged: + if s > prev: + parts.append(_escape_html(text[prev:s])) + parts.append(f"{_escape_html(text[s:e])}") + prev = e + if prev < len(text): + parts.append(_escape_html(text[prev:])) + + return "".join(parts) + + +def _escape_html(text: str) -> str: + """Escape HTML special characters.""" + return ( + text.replace("&", "&") + .replace("<", "<") + .replace(">", ">") + .replace('"', """) + ) + + +# --------------------------------------------------------------------------- +# Inverted Index for TF-IDF +# --------------------------------------------------------------------------- +class InvertedIndex: + """In-memory inverted index supporting TF-IDF scoring. + + Built lazily from the global ``index`` dict whenever a search or + suggestion request detects that the underlying vault index has changed. + The class is designed to be a singleton — use ``get_inverted_index()``. + + Attributes: + word_index: ``{token: {doc_key: term_frequency}}`` + title_index: ``{token: [doc_key, ...]}`` + tag_norm_map: ``{normalized_tag: original_tag}`` + tag_prefix_index: ``{prefix: [original_tag, ...]}`` + doc_count: Total number of indexed documents. + _source_id: Fingerprint of the source index to detect staleness. + """ + + def __init__(self) -> None: + self.word_index: Dict[str, Dict[str, int]] = defaultdict(dict) + self.title_index: Dict[str, List[str]] = defaultdict(list) + self.tag_norm_map: Dict[str, str] = {} + self.tag_prefix_index: Dict[str, List[str]] = defaultdict(list) + self.title_norm_map: Dict[str, List[Dict[str, str]]] = defaultdict(list) + self.doc_count: int = 0 + self._source_id: Optional[int] = None + + def is_stale(self) -> bool: + """Check if the inverted index needs rebuilding.""" + current_id = id(index) + return current_id != self._source_id + + def rebuild(self) -> None: + """Rebuild inverted index from the global ``index`` dict. + + Tokenizes titles and content of every file, computes term frequencies, + and builds auxiliary indexes for tag and title prefix suggestions. + """ + logger.info("Rebuilding inverted index...") + self.word_index = defaultdict(dict) + self.title_index = defaultdict(list) + self.tag_norm_map = {} + self.tag_prefix_index = defaultdict(list) + self.title_norm_map = defaultdict(list) + self.doc_count = 0 + + for vault_name, vault_data in index.items(): + for file_info in vault_data.get("files", []): + doc_key = f"{vault_name}::{file_info['path']}" + self.doc_count += 1 + + # --- Title tokens --- + title_tokens = tokenize(file_info.get("title", "")) + for token in set(title_tokens): + self.title_index[token].append(doc_key) + + # --- Normalized title for prefix suggestions --- + norm_title = normalize_text(file_info.get("title", "")) + if norm_title: + self.title_norm_map[norm_title].append({ + "vault": vault_name, + "path": file_info["path"], + "title": file_info["title"], + }) + + # --- Content tokens (including title for combined scoring) --- + content = file_info.get("content", "") + full_text = (file_info.get("title", "") + " " + content) + tokens = tokenize(full_text) + tf: Dict[str, int] = defaultdict(int) + for token in tokens: + tf[token] += 1 + for token, freq in tf.items(): + self.word_index[token][doc_key] = freq + + # --- Tag indexes --- + for tag in vault_data.get("tags", {}): + norm_tag = normalize_text(tag) + self.tag_norm_map[norm_tag] = tag + # Build prefix entries for each prefix length ≄ MIN_PREFIX_LENGTH + for plen in range(MIN_PREFIX_LENGTH, len(norm_tag) + 1): + prefix = norm_tag[:plen] + if tag not in self.tag_prefix_index[prefix]: + self.tag_prefix_index[prefix].append(tag) + + self._source_id = id(index) + logger.info( + "Inverted index built: %d documents, %d unique tokens, %d tags", + self.doc_count, + len(self.word_index), + len(self.tag_norm_map), + ) + + def idf(self, term: str) -> float: + """Inverse Document Frequency for a term. + + ``idf(t) = log(N / (1 + df(t)))`` where *df(t)* is the number + of documents containing term *t*. + + Args: + term: Normalized term. + + Returns: + IDF score (≄ 0). + """ + df = len(self.word_index.get(term, {})) + if df == 0: + return 0.0 + return math.log((self.doc_count + 1) / (1 + df)) + + def tf_idf(self, term: str, doc_key: str) -> float: + """TF-IDF score for a term in a document. + + Uses raw term frequency (no log normalization) × IDF. + + Args: + term: Normalized term. + doc_key: ``"vault::path"`` document key. + + Returns: + TF-IDF score. + """ + tf = self.word_index.get(term, {}).get(doc_key, 0) + if tf == 0: + return 0.0 + return tf * self.idf(term) + + +# Singleton inverted index +_inverted_index = InvertedIndex() + + +def get_inverted_index() -> InvertedIndex: + """Return the singleton inverted index, rebuilding if stale.""" + if _inverted_index.is_stale(): + _inverted_index.rebuild() + return _inverted_index + + +# --------------------------------------------------------------------------- +# Backward-compatible search (unchanged API) +# --------------------------------------------------------------------------- def search( query: str, vault_filter: str = "all", @@ -155,6 +471,342 @@ def search( return results[:limit] +# --------------------------------------------------------------------------- +# Advanced search with TF-IDF scoring +# --------------------------------------------------------------------------- +def _parse_advanced_query(raw_query: str) -> Dict[str, Any]: + """Parse an advanced query string into structured filters and free text. + + Supported operators: + - ``tag:`` or ``#`` — tag filter + - ``vault:`` — vault filter + - ``title:`` — title filter + - ``path:`` — path filter + - Remaining tokens are treated as free-text search terms. + + Args: + raw_query: Raw query string from the user. + + Returns: + Dict with keys ``tags``, ``vault``, ``title``, ``path``, ``terms``. + """ + parsed: Dict[str, Any] = { + "tags": [], + "vault": None, + "title": None, + "path": None, + "terms": [], + } + if not raw_query: + return parsed + + # Use shlex-like tokenizing but handle quotes manually + tokens = _split_query_tokens(raw_query) + for token in tokens: + lower = token.lower() + if lower.startswith("tag:"): + tag_val = token[4:].strip().lstrip("#") + if tag_val: + parsed["tags"].append(tag_val) + elif lower.startswith("#") and len(token) > 1: + parsed["tags"].append(token[1:]) + elif lower.startswith("vault:"): + parsed["vault"] = token[6:].strip() + elif lower.startswith("title:"): + parsed["title"] = token[6:].strip() + elif lower.startswith("path:"): + parsed["path"] = token[5:].strip() + else: + parsed["terms"].append(token) + + return parsed + + +def _split_query_tokens(raw: str) -> List[str]: + """Split a query string respecting quoted phrases. + + ``tag:"my tag" hello world`` → ``['tag:my tag', 'hello', 'world']`` + + Args: + raw: Raw query string. + + Returns: + List of token strings. + """ + tokens: List[str] = [] + i = 0 + n = len(raw) + while i < n: + # Skip whitespace + while i < n and raw[i] == " ": + i += 1 + if i >= n: + break + + # Check for operator with quoted value, e.g., tag:"foo bar" + if i < n and raw[i] != '"': + # Read until space or quote + j = i + while j < n and raw[j] != " ": + if raw[j] == '"': + # Read quoted portion + j += 1 + while j < n and raw[j] != '"': + j += 1 + if j < n: + j += 1 # skip closing quote + else: + j += 1 + token = raw[i:j].replace('"', "") + tokens.append(token) + i = j + else: + # Quoted token + i += 1 # skip opening quote + j = i + while j < n and raw[j] != '"': + j += 1 + tokens.append(raw[i:j]) + i = j + 1 # skip closing quote + + return tokens + + +def advanced_search( + query: str, + vault_filter: str = "all", + tag_filter: Optional[str] = None, + limit: int = ADVANCED_SEARCH_DEFAULT_LIMIT, + offset: int = 0, + sort_by: str = "relevance", +) -> Dict[str, Any]: + """Advanced full-text search with TF-IDF scoring, facets, and pagination. + + Parses the query for operators (``tag:``, ``vault:``, ``title:``, + ``path:``), falls back remaining tokens to TF-IDF scored free-text + search using the inverted index. Results include highlighted snippets + with ```` tags and faceted counts for tags and vaults. + + Args: + query: Raw query string (may include operators). + vault_filter: Vault name or ``"all"`` (overridden by ``vault:`` op). + tag_filter: Comma-separated tag names (merged with ``tag:`` ops). + limit: Max results per page. + offset: Pagination offset. + sort_by: ``"relevance"`` or ``"modified"``. + + Returns: + Dict with ``results``, ``total``, ``offset``, ``limit``, ``facets``. + """ + query = query.strip() if query else "" + parsed = _parse_advanced_query(query) + + # Merge explicit tag_filter with parsed tag: operators + all_tags = list(parsed["tags"]) + extra_tags = _normalize_tag_filter(tag_filter) + for t in extra_tags: + if t not in all_tags: + all_tags.append(t) + + # Vault filter — parsed vault: overrides parameter + effective_vault = parsed["vault"] or vault_filter + + # Normalize free-text terms + query_terms = [normalize_text(t) for t in parsed["terms"] if t.strip()] + has_terms = len(query_terms) > 0 + + if not has_terms and not all_tags and not parsed["title"] and not parsed["path"]: + return {"results": [], "total": 0, "offset": offset, "limit": limit, "facets": {"tags": {}, "vaults": {}}} + + inv = get_inverted_index() + scored_results: List[Tuple[float, Dict[str, Any]]] = [] + facet_tags: Dict[str, int] = defaultdict(int) + facet_vaults: Dict[str, int] = defaultdict(int) + + for vault_name, vault_data in index.items(): + if effective_vault != "all" and vault_name != effective_vault: + continue + + for file_info in vault_data.get("files", []): + doc_key = f"{vault_name}::{file_info['path']}" + + # --- Tag filter --- + if all_tags: + file_tags_lower = [t.lower() for t in file_info.get("tags", [])] + if not all(t.lower() in file_tags_lower for t in all_tags): + continue + + # --- Title filter --- + if parsed["title"]: + norm_title_filter = normalize_text(parsed["title"]) + norm_file_title = normalize_text(file_info.get("title", "")) + if norm_title_filter not in norm_file_title: + continue + + # --- Path filter --- + if parsed["path"]: + norm_path_filter = normalize_text(parsed["path"]) + norm_file_path = normalize_text(file_info.get("path", "")) + if norm_path_filter not in norm_file_path: + continue + + # --- Scoring --- + score = 0.0 + if has_terms: + # TF-IDF scoring for each term + for term in query_terms: + tfidf = inv.tf_idf(term, doc_key) + score += tfidf + + # Title boost — check if term appears in title tokens + norm_title = normalize_text(file_info.get("title", "")) + if term in norm_title: + score += tfidf * TITLE_BOOST + + # Path boost + norm_path = normalize_text(file_info.get("path", "")) + if term in norm_path: + score += tfidf * PATH_BOOST + + # Tag boost + for tag in file_info.get("tags", []): + if term in normalize_text(tag): + score += tfidf * TAG_BOOST + break + + # Also add prefix matching bonus for partial words + for term in query_terms: + if len(term) >= MIN_PREFIX_LENGTH: + for indexed_term, docs in inv.word_index.items(): + if indexed_term.startswith(term) and indexed_term != term: + if doc_key in docs: + score += inv.tf_idf(indexed_term, doc_key) * 0.5 + else: + # Filter-only search (tag/title/path): score = 1 + score = 1.0 + + if score > 0: + # Build highlighted snippet + content = file_info.get("content", "") + if has_terms: + snippet = _extract_highlighted_snippet(content, query_terms) + else: + snippet = _escape_html(content[:200].strip()) if content else "" + + result = { + "vault": vault_name, + "path": file_info["path"], + "title": file_info["title"], + "tags": file_info.get("tags", []), + "score": round(score, 4), + "snippet": snippet, + "modified": file_info.get("modified", ""), + } + scored_results.append((score, result)) + + # Facets + facet_vaults[vault_name] = facet_vaults.get(vault_name, 0) + 1 + for tag in file_info.get("tags", []): + facet_tags[tag] = facet_tags.get(tag, 0) + 1 + + # Sort + if sort_by == "modified": + scored_results.sort(key=lambda x: x[1].get("modified", ""), reverse=True) + else: + scored_results.sort(key=lambda x: -x[0]) + + total = len(scored_results) + page = scored_results[offset: offset + limit] + + return { + "results": [r for _, r in page], + "total": total, + "offset": offset, + "limit": limit, + "facets": { + "tags": dict(sorted(facet_tags.items(), key=lambda x: -x[1])[:20]), + "vaults": dict(sorted(facet_vaults.items(), key=lambda x: -x[1])), + }, + } + + +# --------------------------------------------------------------------------- +# Suggestion helpers +# --------------------------------------------------------------------------- +def suggest_titles( + prefix: str, + vault_filter: str = "all", + limit: int = SUGGEST_LIMIT, +) -> List[Dict[str, str]]: + """Suggest file titles matching a prefix (accent-insensitive). + + Args: + prefix: User-typed prefix string. + vault_filter: Vault name or ``"all"``. + limit: Maximum suggestions. + + Returns: + List of ``{"vault", "path", "title"}`` dicts. + """ + if not prefix or len(prefix) < MIN_PREFIX_LENGTH: + return [] + + inv = get_inverted_index() + norm_prefix = normalize_text(prefix) + results: List[Dict[str, str]] = [] + seen: set = set() + + for norm_title, entries in inv.title_norm_map.items(): + if norm_prefix in norm_title: + for entry in entries: + if vault_filter != "all" and entry["vault"] != vault_filter: + continue + key = f"{entry['vault']}::{entry['path']}" + if key not in seen: + seen.add(key) + results.append(entry) + if len(results) >= limit: + return results + + return results + + +def suggest_tags( + prefix: str, + vault_filter: str = "all", + limit: int = SUGGEST_LIMIT, +) -> List[Dict[str, Any]]: + """Suggest tags matching a prefix (accent-insensitive). + + Args: + prefix: User-typed prefix (with or without leading ``#``). + vault_filter: Vault name or ``"all"``. + limit: Maximum suggestions. + + Returns: + List of ``{"tag", "count"}`` dicts sorted by descending count. + """ + prefix = prefix.lstrip("#").strip() + if not prefix or len(prefix) < MIN_PREFIX_LENGTH: + return [] + + norm_prefix = normalize_text(prefix) + all_tag_counts = get_all_tags(vault_filter) + + matches: List[Dict[str, Any]] = [] + for tag, count in all_tag_counts.items(): + norm_tag = normalize_text(tag) + if norm_prefix in norm_tag: + matches.append({"tag": tag, "count": count}) + if len(matches) >= limit: + break + + return matches + + +# --------------------------------------------------------------------------- +# Backward-compatible tag aggregation (unchanged API) +# --------------------------------------------------------------------------- def get_all_tags(vault_filter: Optional[str] = None) -> Dict[str, int]: """Aggregate tag counts across vaults, sorted by descending count. diff --git a/frontend/app.js b/frontend/app.js index 50efa53..dfb5f98 100644 --- a/frontend/app.js +++ b/frontend/app.js @@ -25,6 +25,21 @@ let activeSidebarTab = "vaults"; let filterDebounce = null; + // Advanced search state + let advancedSearchOffset = 0; + let advancedSearchTotal = 0; + let advancedSearchSort = "relevance"; + let advancedSearchLastQuery = ""; + let suggestAbortController = null; + let dropdownActiveIndex = -1; + let dropdownItems = []; + + // Advanced search constants + const SEARCH_HISTORY_KEY = "obsigate_search_history"; + const MAX_HISTORY_ENTRIES = 50; + const SUGGEST_DEBOUNCE_MS = 150; + const ADVANCED_SEARCH_LIMIT = 50; + // --------------------------------------------------------------------------- // File extension → Lucide icon mapping // --------------------------------------------------------------------------- @@ -75,6 +90,414 @@ return EXT_ICONS[ext] || "file"; } + // --------------------------------------------------------------------------- + // Search History Service (localStorage, LIFO, max 50, dedup) + // --------------------------------------------------------------------------- + const SearchHistory = { + _load() { + try { + const raw = localStorage.getItem(SEARCH_HISTORY_KEY); + return raw ? JSON.parse(raw) : []; + } catch { return []; } + }, + _save(entries) { + try { localStorage.setItem(SEARCH_HISTORY_KEY, JSON.stringify(entries)); } catch {} + }, + getAll() { return this._load(); }, + add(query) { + if (!query || !query.trim()) return; + const q = query.trim(); + let entries = this._load().filter(e => e !== q); + entries.unshift(q); + if (entries.length > MAX_HISTORY_ENTRIES) entries = entries.slice(0, MAX_HISTORY_ENTRIES); + this._save(entries); + }, + remove(query) { + const entries = this._load().filter(e => e !== query); + this._save(entries); + }, + clear() { this._save([]); }, + filter(prefix) { + if (!prefix) return this.getAll().slice(0, 8); + const lp = prefix.toLowerCase(); + return this._load().filter(e => e.toLowerCase().includes(lp)).slice(0, 8); + }, + }; + + // --------------------------------------------------------------------------- + // Query Parser — extracts operators (tag:, #, vault:, title:, path:) + // --------------------------------------------------------------------------- + const QueryParser = { + parse(raw) { + const result = { tags: [], vault: null, title: null, path: null, freeText: "" }; + if (!raw) return result; + const tokens = this._tokenize(raw); + const freeTokens = []; + for (const tok of tokens) { + const lower = tok.toLowerCase(); + if (lower.startsWith("tag:")) { + const v = tok.slice(4).replace(/"/g, "").trim().replace(/^#/, ""); + if (v) result.tags.push(v); + } else if (lower.startsWith("#") && tok.length > 1) { + result.tags.push(tok.slice(1)); + } else if (lower.startsWith("vault:")) { + result.vault = tok.slice(6).replace(/"/g, "").trim(); + } else if (lower.startsWith("title:")) { + result.title = tok.slice(6).replace(/"/g, "").trim(); + } else if (lower.startsWith("path:")) { + result.path = tok.slice(5).replace(/"/g, "").trim(); + } else { + freeTokens.push(tok); + } + } + result.freeText = freeTokens.join(" "); + return result; + }, + _tokenize(raw) { + const tokens = []; + let i = 0; + const n = raw.length; + while (i < n) { + while (i < n && raw[i] === " ") i++; + if (i >= n) break; + if (raw[i] !== '"') { + let j = i; + while (j < n && raw[j] !== " ") { + if (raw[j] === '"') { j++; while (j < n && raw[j] !== '"') j++; if (j < n) j++; } + else j++; + } + tokens.push(raw.slice(i, j).replace(/"/g, "")); + i = j; + } else { + i++; + let j = i; + while (j < n && raw[j] !== '"') j++; + tokens.push(raw.slice(i, j)); + i = j + 1; + } + } + return tokens; + }, + /** Detect the current operator context at cursor for autocomplete */ + getContext(raw, cursorPos) { + const before = raw.slice(0, cursorPos); + // Check if we're typing a tag: or # value + const tagMatch = before.match(/(?:tag:|#)([\w-]*)$/i); + if (tagMatch) return { type: "tag", prefix: tagMatch[1] }; + // Check if typing title: + const titleMatch = before.match(/title:([\w-]*)$/i); + if (titleMatch) return { type: "title", prefix: titleMatch[1] }; + // Default: free text + const words = before.trim().split(/\s+/); + const lastWord = words[words.length - 1] || ""; + return { type: "text", prefix: lastWord }; + }, + }; + + // --------------------------------------------------------------------------- + // Autocomplete Dropdown Controller + // --------------------------------------------------------------------------- + const AutocompleteDropdown = { + _dropdown: null, + _historySection: null, + _titlesSection: null, + _tagsSection: null, + _historyList: null, + _titlesList: null, + _tagsList: null, + _emptyEl: null, + _suggestTimer: null, + + init() { + this._dropdown = document.getElementById("search-dropdown"); + this._historySection = document.getElementById("search-dropdown-history"); + this._titlesSection = document.getElementById("search-dropdown-titles"); + this._tagsSection = document.getElementById("search-dropdown-tags"); + this._historyList = document.getElementById("search-dropdown-history-list"); + this._titlesList = document.getElementById("search-dropdown-titles-list"); + this._tagsList = document.getElementById("search-dropdown-tags-list"); + this._emptyEl = document.getElementById("search-dropdown-empty"); + + // Clear history button + const clearBtn = document.getElementById("search-dropdown-clear-history"); + if (clearBtn) { + clearBtn.addEventListener("click", (e) => { + e.stopPropagation(); + SearchHistory.clear(); + this.hide(); + }); + } + + // Close dropdown on outside click + document.addEventListener("click", (e) => { + if (this._dropdown && !this._dropdown.contains(e.target) && + e.target.id !== "search-input") { + this.hide(); + } + }); + }, + + show() { + if (this._dropdown) this._dropdown.hidden = false; + }, + + hide() { + if (this._dropdown) this._dropdown.hidden = true; + dropdownActiveIndex = -1; + dropdownItems = []; + }, + + isVisible() { + return this._dropdown && !this._dropdown.hidden; + }, + + /** Populate and show the dropdown with history, title suggestions, and tag suggestions */ + async populate(inputValue, cursorPos) { + // Cancel previous suggestion request + if (suggestAbortController) { suggestAbortController.abort(); suggestAbortController = null; } + + const ctx = QueryParser.getContext(inputValue, cursorPos); + const vault = document.getElementById("vault-filter").value; + + // History — always show filtered history + const historyItems = SearchHistory.filter(inputValue); + this._renderHistory(historyItems, inputValue); + + // Title and tag suggestions from API (debounced) + clearTimeout(this._suggestTimer); + if (ctx.prefix && ctx.prefix.length >= 2) { + this._suggestTimer = setTimeout(() => this._fetchSuggestions(ctx, vault, inputValue), SUGGEST_DEBOUNCE_MS); + } else { + this._renderTitles([], ""); + this._renderTags([], ""); + } + + // Show/hide sections + const hasContent = historyItems.length > 0; + this._historySection.hidden = historyItems.length === 0; + this._emptyEl.hidden = hasContent; + + if (hasContent || (ctx.prefix && ctx.prefix.length >= 2)) { + this.show(); + } else if (!hasContent) { + this.hide(); + } + + this._collectItems(); + }, + + async _fetchSuggestions(ctx, vault, inputValue) { + suggestAbortController = new AbortController(); + try { + const [titlesRes, tagsRes] = await Promise.all([ + ctx.type !== "tag" ? api(`/api/suggest?q=${encodeURIComponent(ctx.prefix)}&vault=${encodeURIComponent(vault)}&limit=8`, { signal: suggestAbortController.signal }) : Promise.resolve({ suggestions: [] }), + (ctx.type === "tag" || ctx.type === "text") ? api(`/api/tags/suggest?q=${encodeURIComponent(ctx.prefix)}&vault=${encodeURIComponent(vault)}&limit=6`, { signal: suggestAbortController.signal }) : Promise.resolve({ suggestions: [] }), + ]); + + this._renderTitles(titlesRes.suggestions || [], ctx.prefix); + this._renderTags(tagsRes.suggestions || [], ctx.prefix); + + // Update visibility + const hasTitles = (titlesRes.suggestions || []).length > 0; + const hasTags = (tagsRes.suggestions || []).length > 0; + this._titlesSection.hidden = !hasTitles; + this._tagsSection.hidden = !hasTags; + + const historyVisible = !this._historySection.hidden; + const hasAny = historyVisible || hasTitles || hasTags; + this._emptyEl.hidden = hasAny; + if (hasAny) this.show(); else if (!historyVisible) this.hide(); + + this._collectItems(); + } catch (err) { + if (err.name !== "AbortError") console.error("Suggestion fetch error:", err); + } + }, + + _renderHistory(items, query) { + this._historyList.innerHTML = ""; + items.forEach((entry) => { + const li = el("li", { class: "search-dropdown__item search-dropdown__item--history", role: "option" }); + const iconEl = el("span", { class: "search-dropdown__icon" }); + iconEl.innerHTML = ''; + const textEl = el("span", { class: "search-dropdown__text" }); + textEl.textContent = entry; + li.appendChild(iconEl); + li.appendChild(textEl); + li.addEventListener("click", () => { + const input = document.getElementById("search-input"); + input.value = entry; + this.hide(); + _triggerAdvancedSearch(entry); + }); + this._historyList.appendChild(li); + }); + }, + + _renderTitles(items, prefix) { + this._titlesList.innerHTML = ""; + items.forEach((item) => { + const li = el("li", { class: "search-dropdown__item search-dropdown__item--title", role: "option" }); + const iconEl = el("span", { class: "search-dropdown__icon" }); + iconEl.innerHTML = ''; + const textEl = el("span", { class: "search-dropdown__text" }); + if (prefix) { + this._highlightText(textEl, item.title, prefix); + } else { + textEl.textContent = item.title; + } + const metaEl = el("span", { class: "search-dropdown__meta" }); + metaEl.textContent = item.vault; + li.appendChild(iconEl); + li.appendChild(textEl); + li.appendChild(metaEl); + li.addEventListener("click", () => { + this.hide(); + openFile(item.vault, item.path); + }); + this._titlesList.appendChild(li); + }); + }, + + _renderTags(items, prefix) { + this._tagsList.innerHTML = ""; + items.forEach((item) => { + const li = el("li", { class: "search-dropdown__item search-dropdown__item--tag", role: "option" }); + const iconEl = el("span", { class: "search-dropdown__icon" }); + iconEl.innerHTML = ''; + const textEl = el("span", { class: "search-dropdown__text" }); + if (prefix) { + this._highlightText(textEl, item.tag, prefix); + } else { + textEl.textContent = item.tag; + } + const badge = el("span", { class: "search-dropdown__badge" }); + badge.textContent = item.count; + li.appendChild(iconEl); + li.appendChild(textEl); + li.appendChild(badge); + li.addEventListener("click", () => { + const input = document.getElementById("search-input"); + // Append tag: operator if not already typing one + const current = input.value; + const ctx = QueryParser.getContext(current, input.selectionStart); + if (ctx.type === "tag") { + // Replace the partial tag prefix + const before = current.slice(0, input.selectionStart - ctx.prefix.length); + input.value = before + item.tag + " "; + } else { + input.value = (current ? current + " " : "") + "tag:" + item.tag + " "; + } + this.hide(); + input.focus(); + _triggerAdvancedSearch(input.value); + }); + this._tagsList.appendChild(li); + }); + }, + + _highlightText(container, text, query) { + const lower = text.toLowerCase(); + const needle = query.toLowerCase(); + const pos = lower.indexOf(needle); + if (pos === -1) { container.textContent = text; return; } + container.appendChild(document.createTextNode(text.slice(0, pos))); + const markEl = el("mark", {}, [document.createTextNode(text.slice(pos, pos + query.length))]); + container.appendChild(markEl); + container.appendChild(document.createTextNode(text.slice(pos + query.length))); + }, + + _collectItems() { + dropdownItems = Array.from(this._dropdown.querySelectorAll(".search-dropdown__item")); + dropdownActiveIndex = -1; + dropdownItems.forEach(item => item.classList.remove("active")); + }, + + navigateDown() { + if (!this.isVisible() || dropdownItems.length === 0) return; + if (dropdownActiveIndex >= 0) dropdownItems[dropdownActiveIndex].classList.remove("active"); + dropdownActiveIndex = (dropdownActiveIndex + 1) % dropdownItems.length; + dropdownItems[dropdownActiveIndex].classList.add("active"); + dropdownItems[dropdownActiveIndex].scrollIntoView({ block: "nearest" }); + }, + + navigateUp() { + if (!this.isVisible() || dropdownItems.length === 0) return; + if (dropdownActiveIndex >= 0) dropdownItems[dropdownActiveIndex].classList.remove("active"); + dropdownActiveIndex = dropdownActiveIndex <= 0 ? dropdownItems.length - 1 : dropdownActiveIndex - 1; + dropdownItems[dropdownActiveIndex].classList.add("active"); + dropdownItems[dropdownActiveIndex].scrollIntoView({ block: "nearest" }); + }, + + selectActive() { + if (dropdownActiveIndex >= 0 && dropdownActiveIndex < dropdownItems.length) { + dropdownItems[dropdownActiveIndex].click(); + return true; + } + return false; + }, + }; + + // --------------------------------------------------------------------------- + // Search Chips Controller — renders active filter chips from parsed query + // --------------------------------------------------------------------------- + const SearchChips = { + _container: null, + init() { this._container = document.getElementById("search-chips"); }, + update(parsed) { + if (!this._container) return; + this._container.innerHTML = ""; + let hasChips = false; + parsed.tags.forEach(tag => { this._addChip("tag", `tag:${tag}`, tag); hasChips = true; }); + if (parsed.vault) { this._addChip("vault", `vault:${parsed.vault}`, parsed.vault); hasChips = true; } + if (parsed.title) { this._addChip("title", `title:${parsed.title}`, parsed.title); hasChips = true; } + if (parsed.path) { this._addChip("path", `path:${parsed.path}`, parsed.path); hasChips = true; } + this._container.hidden = !hasChips; + }, + clear() { + if (!this._container) return; + this._container.innerHTML = ""; + this._container.hidden = true; + }, + _addChip(type, fullOperator, displayText) { + const chip = el("span", { class: `search-chip search-chip--${type}` }); + const label = el("span", { class: "search-chip__label" }); + label.textContent = fullOperator; + const removeBtn = el("button", { class: "search-chip__remove", title: "Retirer ce filtre", type: "button" }); + removeBtn.innerHTML = ''; + removeBtn.addEventListener("click", () => { + // Remove this operator from the input + const input = document.getElementById("search-input"); + const raw = input.value; + // Remove the operator text from the query + const escaped = fullOperator.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + input.value = raw.replace(new RegExp("\\s*" + escaped + "\\s*", "i"), " ").trim(); + _triggerAdvancedSearch(input.value); + }); + chip.appendChild(label); + chip.appendChild(removeBtn); + this._container.appendChild(chip); + safeCreateIcons(); + }, + }; + + // --------------------------------------------------------------------------- + // Helper: trigger advanced search from input value + // --------------------------------------------------------------------------- + function _triggerAdvancedSearch(rawQuery) { + const q = (rawQuery || "").trim(); + const vault = document.getElementById("vault-filter").value; + const tagFilter = selectedTags.length > 0 ? selectedTags.join(",") : null; + advancedSearchOffset = 0; + if (q.length > 0 || tagFilter) { + SearchHistory.add(q); + performAdvancedSearch(q, vault, tagFilter); + } else { + SearchChips.clear(); + showWelcome(); + } + } + // --------------------------------------------------------------------------- // Safe CDN helpers // --------------------------------------------------------------------------- @@ -1076,7 +1499,7 @@ } else { const input = document.getElementById("search-input"); if (input.value.trim()) { - performSearch(input.value.trim(), document.getElementById("vault-filter").value, null); + performAdvancedSearch(input.value.trim(), document.getElementById("vault-filter").value, null); } else { showWelcome(); } @@ -1087,7 +1510,7 @@ const input = document.getElementById("search-input"); const query = input.value.trim(); const vault = document.getElementById("vault-filter").value; - performSearch(query, vault, selectedTags.length > 0 ? selectedTags.join(",") : null); + performAdvancedSearch(query, vault, selectedTags.length > 0 ? selectedTags.join(",") : null); } function buildSearchResultsHeader(data, query, tagFilter) { @@ -1535,77 +1958,348 @@ } // --------------------------------------------------------------------------- - // Search + // Search (enhanced with autocomplete, keyboard nav, global shortcuts) // --------------------------------------------------------------------------- function initSearch() { const input = document.getElementById("search-input"); const caseBtn = document.getElementById("search-case-btn"); const clearBtn = document.getElementById("search-clear-btn"); - + + // Initialize sub-controllers + AutocompleteDropdown.init(); + SearchChips.init(); + // Initially hide clear button clearBtn.style.display = "none"; - + + // --- Input handler: debounced search + autocomplete dropdown --- input.addEventListener("input", () => { const hasText = input.value.length > 0; clearBtn.style.display = hasText ? "flex" : "none"; - + + // Show autocomplete dropdown while typing + AutocompleteDropdown.populate(input.value, input.selectionStart); + + // Debounced search execution clearTimeout(searchTimeout); searchTimeout = setTimeout(() => { const q = input.value.trim(); const vault = document.getElementById("vault-filter").value; const tagFilter = selectedTags.length > 0 ? selectedTags.join(",") : null; + advancedSearchOffset = 0; if (q.length > 0 || tagFilter) { - performSearch(q, vault, tagFilter); + performAdvancedSearch(q, vault, tagFilter); } else { + SearchChips.clear(); showWelcome(); } }, 300); }); - + + // --- Focus handler: show history dropdown --- + input.addEventListener("focus", () => { + if (input.value.length === 0) { + const historyItems = SearchHistory.filter(""); + if (historyItems.length > 0) { + AutocompleteDropdown.populate("", 0); + } + } + }); + + // --- Keyboard navigation in dropdown --- + input.addEventListener("keydown", (e) => { + if (AutocompleteDropdown.isVisible()) { + if (e.key === "ArrowDown") { + e.preventDefault(); + AutocompleteDropdown.navigateDown(); + } else if (e.key === "ArrowUp") { + e.preventDefault(); + AutocompleteDropdown.navigateUp(); + } else if (e.key === "Enter") { + if (AutocompleteDropdown.selectActive()) { + e.preventDefault(); + return; + } + // No active item — execute search normally + AutocompleteDropdown.hide(); + const q = input.value.trim(); + if (q) { + SearchHistory.add(q); + clearTimeout(searchTimeout); + advancedSearchOffset = 0; + const vault = document.getElementById("vault-filter").value; + const tagFilter = selectedTags.length > 0 ? selectedTags.join(",") : null; + performAdvancedSearch(q, vault, tagFilter); + } + e.preventDefault(); + } else if (e.key === "Escape") { + AutocompleteDropdown.hide(); + e.stopPropagation(); + } + } else if (e.key === "Enter") { + const q = input.value.trim(); + if (q) { + SearchHistory.add(q); + clearTimeout(searchTimeout); + advancedSearchOffset = 0; + const vault = document.getElementById("vault-filter").value; + const tagFilter = selectedTags.length > 0 ? selectedTags.join(",") : null; + performAdvancedSearch(q, vault, tagFilter); + } + e.preventDefault(); + } + }); + caseBtn.addEventListener("click", () => { searchCaseSensitive = !searchCaseSensitive; caseBtn.classList.toggle("active"); }); - + clearBtn.addEventListener("click", () => { input.value = ""; clearBtn.style.display = "none"; searchCaseSensitive = false; caseBtn.classList.remove("active"); + SearchChips.clear(); + AutocompleteDropdown.hide(); showWelcome(); }); + + // --- Global keyboard shortcuts --- + document.addEventListener("keydown", (e) => { + // Ctrl+K or Cmd+K: focus search + if ((e.ctrlKey || e.metaKey) && e.key === "k") { + e.preventDefault(); + input.focus(); + input.select(); + } + // "/" key: focus search (when not in an input/textarea) + if (e.key === "/" && !_isInputFocused()) { + e.preventDefault(); + input.focus(); + } + // Escape: blur search input and close dropdown + if (e.key === "Escape" && document.activeElement === input) { + AutocompleteDropdown.hide(); + input.blur(); + } + }); } + /** Check if user is focused on an input/textarea/contenteditable */ + function _isInputFocused() { + const tag = document.activeElement?.tagName; + if (tag === "INPUT" || tag === "TEXTAREA" || tag === "SELECT") return true; + return document.activeElement?.isContentEditable === true; + } + + // --- Backward-compatible search (existing /api/search endpoint) --- async function performSearch(query, vaultFilter, tagFilter) { - // Cancel any in-flight search request - if (searchAbortController) { - searchAbortController.abort(); - } + if (searchAbortController) searchAbortController.abort(); searchAbortController = new AbortController(); - showLoading(); - let url = `/api/search?q=${encodeURIComponent(query)}&vault=${encodeURIComponent(vaultFilter)}`; if (tagFilter) url += `&tag=${encodeURIComponent(tagFilter)}`; - try { const data = await api(url, { signal: searchAbortController.signal }); renderSearchResults(data, query, tagFilter); } catch (err) { - if (err.name === "AbortError") return; // superseded by newer request + if (err.name === "AbortError") return; showWelcome(); } finally { searchAbortController = null; } } + // --- Advanced search with TF-IDF, facets, pagination --- + async function performAdvancedSearch(query, vaultFilter, tagFilter, offset, sort) { + if (searchAbortController) searchAbortController.abort(); + searchAbortController = new AbortController(); + showLoading(); + + const ofs = offset !== undefined ? offset : advancedSearchOffset; + const sortBy = sort || advancedSearchSort; + advancedSearchLastQuery = query; + + // Update chips from parsed query + const parsed = QueryParser.parse(query); + SearchChips.update(parsed); + + let url = `/api/search/advanced?q=${encodeURIComponent(query)}&vault=${encodeURIComponent(vaultFilter)}&limit=${ADVANCED_SEARCH_LIMIT}&offset=${ofs}&sort=${sortBy}`; + if (tagFilter) url += `&tag=${encodeURIComponent(tagFilter)}`; + + try { + const data = await api(url, { signal: searchAbortController.signal }); + advancedSearchTotal = data.total; + advancedSearchOffset = ofs; + renderAdvancedSearchResults(data, query, tagFilter); + } catch (err) { + if (err.name === "AbortError") return; + showWelcome(); + } finally { + searchAbortController = null; + } + } + + // --- Legacy search results renderer (kept for backward compat) --- function renderSearchResults(data, query, tagFilter) { const area = document.getElementById("content-area"); area.innerHTML = ""; - const header = buildSearchResultsHeader(data, query, tagFilter); area.appendChild(header); + if (data.results.length === 0) { + area.appendChild(el("p", { style: "color:var(--text-muted);margin-top:20px" }, [ + document.createTextNode("Aucun rĂ©sultat trouvĂ©."), + ])); + return; + } + const container = el("div", { class: "search-results" }); + data.results.forEach((r) => { + const titleDiv = el("div", { class: "search-result-title" }); + if (query && query.trim()) { + highlightSearchText(titleDiv, r.title, query, searchCaseSensitive); + } else { + titleDiv.textContent = r.title; + } + const snippetDiv = el("div", { class: "search-result-snippet" }); + if (query && query.trim() && r.snippet) { + highlightSearchText(snippetDiv, r.snippet, query, searchCaseSensitive); + } else { + snippetDiv.textContent = r.snippet || ""; + } + const item = el("div", { class: "search-result-item" }, [ + titleDiv, + el("div", { class: "search-result-vault" }, [document.createTextNode(r.vault + " / " + r.path)]), + snippetDiv, + ]); + if (r.tags && r.tags.length > 0) { + const tagsDiv = el("div", { class: "search-result-tags" }); + r.tags.forEach((tag) => { + if (!TagFilterService.isTagFiltered(tag)) { + const tagEl = el("span", { class: "file-tag" }, [document.createTextNode(`#${tag}`)]); + tagEl.addEventListener("click", (e) => { e.stopPropagation(); addTagFilter(tag); }); + tagsDiv.appendChild(tagEl); + } + }); + if (tagsDiv.children.length > 0) item.appendChild(tagsDiv); + } + item.addEventListener("click", () => openFile(r.vault, r.path)); + container.appendChild(item); + }); + area.appendChild(container); + } + // --- Advanced search results renderer (facets, highlighted snippets, pagination, sort) --- + function renderAdvancedSearchResults(data, query, tagFilter) { + const area = document.getElementById("content-area"); + area.innerHTML = ""; + + // Header with result count and sort controls + const header = el("div", { class: "search-results-header" }); + const summaryText = el("span", { class: "search-results-summary-text" }); + const parsed = QueryParser.parse(query); + const freeText = parsed.freeText; + + if (freeText && tagFilter) { + summaryText.textContent = `${data.total} rĂ©sultat(s) pour "${freeText}" avec filtres`; + } else if (freeText) { + summaryText.textContent = `${data.total} rĂ©sultat(s) pour "${freeText}"`; + } else if (parsed.tags.length > 0 || tagFilter) { + summaryText.textContent = `${data.total} fichier(s) avec filtres`; + } else { + summaryText.textContent = `${data.total} rĂ©sultat(s)`; + } + header.appendChild(summaryText); + + // Sort controls + const sortDiv = el("div", { class: "search-sort" }); + const btnRelevance = el("button", { class: "search-sort__btn" + (advancedSearchSort === "relevance" ? " active" : ""), type: "button" }); + btnRelevance.textContent = "Pertinence"; + btnRelevance.addEventListener("click", () => { + advancedSearchSort = "relevance"; + advancedSearchOffset = 0; + const vault = document.getElementById("vault-filter").value; + performAdvancedSearch(query, vault, tagFilter, 0, "relevance"); + }); + const btnDate = el("button", { class: "search-sort__btn" + (advancedSearchSort === "modified" ? " active" : ""), type: "button" }); + btnDate.textContent = "Date"; + btnDate.addEventListener("click", () => { + advancedSearchSort = "modified"; + advancedSearchOffset = 0; + const vault = document.getElementById("vault-filter").value; + performAdvancedSearch(query, vault, tagFilter, 0, "modified"); + }); + sortDiv.appendChild(btnRelevance); + sortDiv.appendChild(btnDate); + header.appendChild(sortDiv); + area.appendChild(header); + + // Active sidebar tag chips + if (selectedTags.length > 0) { + const activeTags = el("div", { class: "search-results-active-tags" }); + selectedTags.forEach((tag) => { + const removeBtn = el("button", { + class: "search-results-active-tag-remove", + title: `Retirer ${tag} du filtre`, + }, [document.createTextNode("×")]); + removeBtn.addEventListener("click", (e) => { e.stopPropagation(); removeTagFilter(tag); }); + const chip = el("span", { class: "search-results-active-tag" }, [ + document.createTextNode(`#${tag}`), removeBtn, + ]); + activeTags.appendChild(chip); + }); + area.appendChild(activeTags); + } + + // Facets panel + if (data.facets && (Object.keys(data.facets.tags || {}).length > 0 || Object.keys(data.facets.vaults || {}).length > 0)) { + const facetsDiv = el("div", { class: "search-facets" }); + + // Vault facets + const vaultFacets = data.facets.vaults || {}; + if (Object.keys(vaultFacets).length > 1) { + const group = el("div", { class: "search-facets__group" }); + const label = el("span", { class: "search-facets__label" }); + label.textContent = "Vaults"; + group.appendChild(label); + for (const [vaultName, count] of Object.entries(vaultFacets)) { + const item = el("span", { class: "search-facets__item" }); + item.innerHTML = `${vaultName} ${count}`; + item.addEventListener("click", () => { + const input = document.getElementById("search-input"); + // Add vault: operator + const current = input.value.replace(/vault:\S+\s*/gi, "").trim(); + input.value = current + " vault:" + vaultName; + _triggerAdvancedSearch(input.value); + }); + group.appendChild(item); + } + facetsDiv.appendChild(group); + } + + // Tag facets + const tagFacets = data.facets.tags || {}; + if (Object.keys(tagFacets).length > 0) { + const group = el("div", { class: "search-facets__group" }); + const label = el("span", { class: "search-facets__label" }); + label.textContent = "Tags"; + group.appendChild(label); + const entries = Object.entries(tagFacets).slice(0, 12); + for (const [tagName, count] of entries) { + const item = el("span", { class: "search-facets__item" }); + item.innerHTML = `#${tagName} ${count}`; + item.addEventListener("click", () => { + addTagFilter(tagName); + }); + group.appendChild(item); + } + facetsDiv.appendChild(group); + } + + area.appendChild(facetsDiv); + } + + // Empty state if (data.results.length === 0) { area.appendChild(el("p", { style: "color:var(--text-muted);margin-top:20px" }, [ document.createTextNode("Aucun rĂ©sultat trouvĂ©."), @@ -1613,52 +2307,89 @@ return; } + // Results list const container = el("div", { class: "search-results" }); data.results.forEach((r) => { - // Create title with highlighting const titleDiv = el("div", { class: "search-result-title" }); - if (query && query.trim()) { - highlightSearchText(titleDiv, r.title, query, searchCaseSensitive); + if (freeText) { + highlightSearchText(titleDiv, r.title, freeText, searchCaseSensitive); } else { titleDiv.textContent = r.title; } - - // Create snippet with highlighting - const snippetDiv = el("div", { class: "search-result-snippet" }); - if (query && query.trim() && r.snippet) { - highlightSearchText(snippetDiv, r.snippet, query, searchCaseSensitive); + + // Snippet — use HTML from backend (already has tags) + const snippetDiv = el("div", { class: "search-result-snippet search-result__snippet" }); + if (r.snippet && r.snippet.includes("")) { + snippetDiv.innerHTML = r.snippet; + } else if (freeText && r.snippet) { + highlightSearchText(snippetDiv, r.snippet, freeText, searchCaseSensitive); } else { snippetDiv.textContent = r.snippet || ""; } - - const item = el("div", { class: "search-result-item" }, [ - titleDiv, - el("div", { class: "search-result-vault" }, [document.createTextNode(r.vault + " / " + r.path)]), - snippetDiv, + + // Score badge + const scoreEl = el("span", { class: "search-result-score", style: "font-size:0.7rem;color:var(--text-muted);margin-left:8px" }); + scoreEl.textContent = `score: ${r.score}`; + + const vaultPath = el("div", { class: "search-result-vault" }, [ + document.createTextNode(r.vault + " / " + r.path), + scoreEl, ]); + const item = el("div", { class: "search-result-item" }, [titleDiv, vaultPath, snippetDiv]); + if (r.tags && r.tags.length > 0) { const tagsDiv = el("div", { class: "search-result-tags" }); r.tags.forEach((tag) => { if (!TagFilterService.isTagFiltered(tag)) { const tagEl = el("span", { class: "file-tag" }, [document.createTextNode(`#${tag}`)]); - tagEl.addEventListener("click", (e) => { - e.stopPropagation(); - addTagFilter(tag); - }); + tagEl.addEventListener("click", (e) => { e.stopPropagation(); addTagFilter(tag); }); tagsDiv.appendChild(tagEl); } }); - if (tagsDiv.children.length > 0) { - item.appendChild(tagsDiv); - } + if (tagsDiv.children.length > 0) item.appendChild(tagsDiv); } item.addEventListener("click", () => openFile(r.vault, r.path)); container.appendChild(item); }); - area.appendChild(container); + + // Pagination + if (data.total > ADVANCED_SEARCH_LIMIT) { + const paginationDiv = el("div", { class: "search-pagination" }); + const prevBtn = el("button", { class: "search-pagination__btn", type: "button" }); + prevBtn.textContent = "← PrĂ©cĂ©dent"; + prevBtn.disabled = advancedSearchOffset === 0; + prevBtn.addEventListener("click", () => { + advancedSearchOffset = Math.max(0, advancedSearchOffset - ADVANCED_SEARCH_LIMIT); + const vault = document.getElementById("vault-filter").value; + performAdvancedSearch(query, vault, tagFilter, advancedSearchOffset); + document.getElementById("content-area").scrollTop = 0; + }); + + const info = el("span", { class: "search-pagination__info" }); + const from = advancedSearchOffset + 1; + const to = Math.min(advancedSearchOffset + ADVANCED_SEARCH_LIMIT, data.total); + info.textContent = `${from}–${to} sur ${data.total}`; + + const nextBtn = el("button", { class: "search-pagination__btn", type: "button" }); + nextBtn.textContent = "Suivant →"; + nextBtn.disabled = advancedSearchOffset + ADVANCED_SEARCH_LIMIT >= data.total; + nextBtn.addEventListener("click", () => { + advancedSearchOffset += ADVANCED_SEARCH_LIMIT; + const vault = document.getElementById("vault-filter").value; + performAdvancedSearch(query, vault, tagFilter, advancedSearchOffset); + document.getElementById("content-area").scrollTop = 0; + }); + + paginationDiv.appendChild(prevBtn); + paginationDiv.appendChild(info); + paginationDiv.appendChild(nextBtn); + area.appendChild(paginationDiv); + } + + safeCreateIcons(); } // --------------------------------------------------------------------------- diff --git a/frontend/index.html b/frontend/index.html index 5521d28..cfccd7a 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -103,6 +103,31 @@ + + + + diff --git a/frontend/style.css b/frontend/style.css index 756233b..1b2c52d 100644 --- a/frontend/style.css +++ b/frontend/style.css @@ -2162,3 +2162,331 @@ body.resizing-v { justify-content: center; padding: 8px 16px; } + +/* --------------------------------------------------------------------------- + Advanced Search — Autocomplete Dropdown + --------------------------------------------------------------------------- */ +.search-dropdown { + position: absolute; + top: 100%; + left: 0; + right: 0; + margin-top: 4px; + background: var(--bg-secondary); + border: 1px solid var(--border); + border-radius: 8px; + box-shadow: 0 8px 24px rgba(0,0,0,0.25); + z-index: 200; + max-height: 380px; + overflow-y: auto; + overflow-x: hidden; +} +.search-dropdown[hidden] { + display: none; +} +.search-dropdown__section { + border-bottom: 1px solid var(--border); +} +.search-dropdown__section:last-child { + border-bottom: none; +} +.search-dropdown__section[hidden] { + display: none; +} +.search-dropdown__section-header { + display: flex; + align-items: center; + justify-content: space-between; + padding: 6px 12px; + font-size: 0.7rem; + font-weight: 600; + text-transform: uppercase; + letter-spacing: 0.05em; + color: var(--text-muted); + background: var(--bg-primary); + position: sticky; + top: 0; + z-index: 1; +} +.search-dropdown__clear-btn { + background: none; + border: none; + color: var(--text-muted); + cursor: pointer; + padding: 2px 4px; + border-radius: 4px; + display: flex; + align-items: center; +} +.search-dropdown__clear-btn:hover { + color: var(--danger); + background: var(--danger-bg, rgba(255,0,0,0.08)); +} +.search-dropdown__list { + list-style: none; + margin: 0; + padding: 0; +} +.search-dropdown__item { + display: flex; + align-items: center; + gap: 8px; + padding: 7px 12px; + cursor: pointer; + font-size: 0.82rem; + color: var(--text-primary); + transition: background 120ms ease; +} +.search-dropdown__item:hover, +.search-dropdown__item.active { + background: var(--bg-hover); +} +.search-dropdown__item--history { + color: var(--text-secondary); +} +.search-dropdown__item--history .search-dropdown__icon { + color: var(--text-muted); +} +.search-dropdown__item--title .search-dropdown__meta { + font-size: 0.72rem; + color: var(--text-muted); + margin-left: auto; + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; + max-width: 180px; +} +.search-dropdown__item--tag .search-dropdown__badge { + margin-left: auto; + font-size: 0.7rem; + color: var(--text-muted); + background: var(--bg-primary); + padding: 1px 6px; + border-radius: 10px; +} +.search-dropdown__icon { + flex-shrink: 0; + width: 14px; + height: 14px; + color: var(--text-muted); +} +.search-dropdown__text { + flex: 1; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} +.search-dropdown__text mark { + background: var(--accent); + color: var(--bg-primary); + border-radius: 2px; + padding: 0 1px; +} +.search-dropdown__empty { + padding: 16px 12px; + text-align: center; + font-size: 0.82rem; + color: var(--text-muted); +} +.search-dropdown__empty[hidden] { + display: none; +} + +/* --------------------------------------------------------------------------- + Advanced Search — Filter Chips + --------------------------------------------------------------------------- */ +.search-chips { + display: flex; + flex-wrap: wrap; + gap: 6px; + padding: 6px 0 0; +} +.search-chips[hidden] { + display: none; +} +.search-chip { + display: inline-flex; + align-items: center; + gap: 4px; + padding: 3px 8px; + font-size: 0.75rem; + font-family: 'JetBrains Mono', monospace; + border-radius: 12px; + background: var(--accent); + color: var(--bg-primary); + cursor: default; + max-width: 200px; + white-space: nowrap; +} +.search-chip__label { + overflow: hidden; + text-overflow: ellipsis; +} +.search-chip__remove { + display: flex; + align-items: center; + justify-content: center; + background: none; + border: none; + color: inherit; + cursor: pointer; + padding: 0; + opacity: 0.7; + transition: opacity 120ms; +} +.search-chip__remove:hover { + opacity: 1; +} +.search-chip--tag { + background: var(--accent); + color: var(--bg-primary); +} +.search-chip--vault { + background: var(--success, #22c55e); + color: #fff; +} +.search-chip--title { + background: var(--warning, #f59e0b); + color: #1a1a1a; +} +.search-chip--path { + background: var(--text-muted); + color: var(--bg-primary); +} + +/* --------------------------------------------------------------------------- + Advanced Search — Snippet Highlights + --------------------------------------------------------------------------- */ +.search-result__snippet mark { + background: var(--accent); + color: var(--bg-primary); + border-radius: 2px; + padding: 0 2px; + font-weight: 600; +} + +/* --------------------------------------------------------------------------- + Advanced Search — Facets Panel + --------------------------------------------------------------------------- */ +.search-facets { + display: flex; + flex-wrap: wrap; + gap: 12px; + margin-bottom: 12px; + padding: 10px 12px; + background: var(--bg-primary); + border: 1px solid var(--border); + border-radius: 8px; +} +.search-facets[hidden] { + display: none; +} +.search-facets__group { + display: flex; + flex-wrap: wrap; + gap: 6px; + align-items: center; +} +.search-facets__label { + font-size: 0.72rem; + font-weight: 600; + text-transform: uppercase; + letter-spacing: 0.05em; + color: var(--text-muted); + margin-right: 4px; +} +.search-facets__item { + display: inline-flex; + align-items: center; + gap: 4px; + padding: 2px 8px; + font-size: 0.75rem; + border: 1px solid var(--border); + border-radius: 12px; + color: var(--text-secondary); + background: var(--bg-secondary); + cursor: pointer; + transition: border-color 120ms, background 120ms; +} +.search-facets__item:hover { + border-color: var(--accent); + background: var(--bg-hover); +} +.search-facets__item .facet-count { + font-size: 0.68rem; + color: var(--text-muted); +} + +/* --------------------------------------------------------------------------- + Advanced Search — Pagination + --------------------------------------------------------------------------- */ +.search-pagination { + display: flex; + align-items: center; + justify-content: center; + gap: 8px; + padding: 12px 0; +} +.search-pagination__btn { + padding: 6px 14px; + font-size: 0.82rem; + border: 1px solid var(--border); + border-radius: 6px; + background: var(--bg-secondary); + color: var(--text-primary); + cursor: pointer; + transition: border-color 120ms, background 120ms; +} +.search-pagination__btn:hover:not(:disabled) { + border-color: var(--accent); + background: var(--bg-hover); +} +.search-pagination__btn:disabled { + opacity: 0.4; + cursor: not-allowed; +} +.search-pagination__info { + font-size: 0.78rem; + color: var(--text-muted); +} + +/* --------------------------------------------------------------------------- + Advanced Search — Sort Controls + --------------------------------------------------------------------------- */ +.search-sort { + display: flex; + align-items: center; + gap: 6px; + margin-left: auto; +} +.search-sort__btn { + padding: 3px 10px; + font-size: 0.75rem; + border: 1px solid var(--border); + border-radius: 6px; + background: var(--bg-secondary); + color: var(--text-secondary); + cursor: pointer; + transition: all 120ms; +} +.search-sort__btn.active { + border-color: var(--accent); + color: var(--accent); + background: var(--bg-hover); +} + +/* --------------------------------------------------------------------------- + Advanced Search — Responsive + --------------------------------------------------------------------------- */ +@media (max-width: 768px) { + .search-dropdown { + max-height: 280px; + } + .search-facets { + flex-direction: column; + gap: 8px; + } + .search-chip { + max-width: 150px; + } +}