From 84d3ad0e90f7ffb1aedb67c4c4a91f38e49f2390 Mon Sep 17 00:00:00 2001 From: Bruno Charest Date: Tue, 31 Mar 2026 13:41:07 -0400 Subject: [PATCH] feat: add ext: operator for file extension filtering in advanced search - Add ext: operator support to query parser in backend and frontend - Update search documentation in README and help modal with ext: examples - Parse ext: operator to extract file extension filter (strips leading dot, converts to lowercase) - Filter search candidates by file extension in advanced_search function - Add ext chip display in search UI alongside existing tag/vault/title/path chips - Update API documentation and function --- README.md | 11 +++++------ backend/main.py | 6 +++--- backend/search.py | 22 ++++++++++++++++++---- frontend/app.js | 10 ++++++++-- frontend/index.html | 4 +++- 5 files changed, 37 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index fde6f2a..c8508f6 100644 --- a/README.md +++ b/README.md @@ -49,7 +49,7 @@ - **🌳 Navigation arborescente** : Parcourez vos dossiers et fichiers dans la sidebar - **🔍 Recherche avancée** : Moteur TF-IDF avec normalisation des accents, snippets surlignés, facettes, pagination et tri - **💡 Autocomplétion intelligente** : Suggestions de fichiers, tags et historique avec navigation clavier -- **🧩 Syntaxe de requête** : Opérateurs `tag:`, `#`, `vault:`, `title:`, `path:` avec chips visuels +- **🧩 Syntaxe de requête** : Opérateurs `tag:`, `#`, `vault:`, `title:`, `path:`, `ext:` avec chips visuels - **📜 Historique de recherche** : Persisté en localStorage (max 50 entrées, LIFO, dédupliqué) - **🏷️ Tag cloud** : Filtrage par tags extraits des frontmatters YAML - **🔗 Wikilinks** : Les `[[liens internes]]` Obsidian sont cliquables @@ -503,10 +503,6 @@ curl "http://localhost:2020/api/tags/suggest?q=rec&vault=all" # Obtenir un fichier curl "http://localhost:2020/api/file/Recettes?path=pizza.md" -``` - ---- - ## 🔍 Recherche avancée ### Syntaxe de requête @@ -518,9 +514,12 @@ curl "http://localhost:2020/api/file/Recettes?path=pizza.md" | `vault:` | Filtrer par vault | `vault:IT kubernetes` | | `title:` | Filtrer par titre | `title:pizza` | | `path:` | Filtrer par chemin | `path:recettes/soupes` | +| `ext:` | Filtrer par type de fichier | `ext:md kubernetes` | | `"phrase exacte"` | Recherche de phrase | `tag:"multi mots"` | -Les opérateurs sont combinables : `tag:linux vault:IT serveur web` recherche "serveur web" dans le vault IT avec le tag linux. +Exemples de filtre par extension : `ext:sh` pour les scripts bash, `ext:py` pour les scripts Python, `ext:md` pour les fichiers Markdown. + +Les opérateurs sont combinables : `tag:linux vault:IT ext:md serveur web` recherche "serveur web" dans les fichiers Markdown du vault IT avec le tag linux. ### Raccourcis clavier diff --git a/backend/main.py b/backend/main.py index c4a37fa..760e0b2 100644 --- a/backend/main.py +++ b/backend/main.py @@ -1636,15 +1636,14 @@ async def api_tree_search( "path": entry["path"], "name": entry["name"], "type": entry["type"], - "matched_path": entry["path"], }) - + return {"query": q, "vault_filter": vault, "results": results} @app.get("/api/search/advanced", response_model=AdvancedSearchResponse) async def api_advanced_search( - q: str = Query("", description="Advanced search query (supports tag:, vault:, title:, path: operators)"), + q: str = Query("", description="Advanced search query (supports tag:, vault:, title:, path:, ext: operators)"), vault: str = Query("all", description="Vault filter"), tag: Optional[str] = Query(None, description="Comma-separated tag filter"), limit: int = Query(50, ge=1, le=200, description="Results per page"), @@ -1659,6 +1658,7 @@ async def api_advanced_search( - ``vault:`` — filter by vault - ``title:`` — filter by title substring - ``path:`` — filter by path substring + - ``ext:`` — filter by file extension - Remaining text is scored using TF-IDF with accent normalization. Results include ````-highlighted snippets and faceted tag/vault counts. diff --git a/backend/search.py b/backend/search.py index b23a9a4..298a7de 100644 --- a/backend/search.py +++ b/backend/search.py @@ -534,19 +534,21 @@ def _parse_advanced_query(raw_query: str) -> Dict[str, Any]: - ``vault:`` — vault filter - ``title:`` — title filter - ``path:`` — path filter + - ``ext:`` — file extension filter - Remaining tokens are treated as free-text search terms. Args: raw_query: Raw query string from the user. Returns: - Dict with keys ``tags``, ``vault``, ``title``, ``path``, ``terms``. + Dict with keys ``tags``, ``vault``, ``title``, ``path``, ``ext``, ``terms``. """ parsed: Dict[str, Any] = { "tags": [], "vault": None, "title": None, "path": None, + "ext": None, "terms": [], } if not raw_query: @@ -568,6 +570,8 @@ def _parse_advanced_query(raw_query: str) -> Dict[str, Any]: parsed["title"] = token[6:].strip() elif lower.startswith("path:"): parsed["path"] = token[5:].strip() + elif lower.startswith("ext:"): + parsed["ext"] = token[4:].strip().lstrip(".").lower() else: parsed["terms"].append(token) @@ -639,7 +643,7 @@ def advanced_search( sorted token list for O(log V + k) instead of O(V) linear scan. Parses the query for operators (``tag:``, ``vault:``, ``title:``, - ``path:``), falls back remaining tokens to TF-IDF scored free-text + ``path:``, ``ext:``), falls back remaining tokens to TF-IDF scored free-text search using the inverted index. Results include highlighted snippets with ```` tags and faceted counts for tags and vaults. @@ -673,7 +677,7 @@ def advanced_search( query_terms = [normalize_text(t) for t in parsed["terms"] if t.strip()] has_terms = len(query_terms) > 0 - if not has_terms and not all_tags and not parsed["title"] and not parsed["path"]: + if not has_terms and not all_tags and not parsed["title"] and not parsed["path"] and not parsed["ext"]: return {"results": [], "total": 0, "offset": offset, "limit": limit, "facets": {"tags": {}, "vaults": {}}, "query_time_ms": 0} @@ -725,6 +729,16 @@ def advanced_search( if norm_path_filter in normalize_text(inv.doc_info[dk].get("path", "")) } + if parsed["ext"]: + ext_filter = parsed["ext"] + candidates = { + dk for dk in candidates + if ( + inv.doc_info[dk].get("path", "").rsplit("/", 1)[-1].lower() == ext_filter + or inv.doc_info[dk].get("path", "").rsplit("/", 1)[-1].lower().endswith(f".{ext_filter}") + ) + } + # ------------------------------------------------------------------ # Step 3: Score only the candidates (not all N documents) # ------------------------------------------------------------------ @@ -774,7 +788,7 @@ def advanced_search( for expanded_term in expansions: score += inv.tf_idf(expanded_term, doc_key) * 0.5 else: - # Filter-only search (tag/title/path): score = 1 + # Filter-only search (tag/title/path/ext): score = 1 score = 1.0 if score > 0: diff --git a/frontend/app.js b/frontend/app.js index 40f5d8d..cb61bce 100644 --- a/frontend/app.js +++ b/frontend/app.js @@ -305,11 +305,11 @@ }; // --------------------------------------------------------------------------- - // Query Parser — extracts operators (tag:, #, vault:, title:, path:) + // Query Parser — extracts operators (tag:, #, vault:, title:, path:, ext:) // --------------------------------------------------------------------------- const QueryParser = { parse(raw) { - const result = { tags: [], vault: null, title: null, path: null, freeText: "" }; + const result = { tags: [], vault: null, title: null, path: null, ext: null, freeText: "" }; if (!raw) return result; const tokens = this._tokenize(raw); const freeTokens = []; @@ -326,6 +326,8 @@ result.title = tok.slice(6).replace(/"/g, "").trim(); } else if (lower.startsWith("path:")) { result.path = tok.slice(5).replace(/"/g, "").trim(); + } else if (lower.startsWith("ext:")) { + result.ext = tok.slice(4).replace(/"/g, "").trim().replace(/^\./, "").toLowerCase(); } else { freeTokens.push(tok); } @@ -657,6 +659,10 @@ this._addChip("path", `path:${parsed.path}`, parsed.path); hasChips = true; } + if (parsed.ext) { + this._addChip("ext", `ext:${parsed.ext}`, parsed.ext); + hasChips = true; + } this._container.hidden = !hasChips; }, clear() { diff --git a/frontend/index.html b/frontend/index.html index a0d935f..0cf55f0 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -776,9 +776,11 @@
  • vault:IT : Rechercher dans un vault spĂ©cifique
  • title:kubernetes : Chercher dans les titres uniquement
  • path:recettes/soupes : Filtrer par chemin
  • +
  • ext:md : Filtrer par type de fichier
  • "phrase exacte" : Recherche de phrase entre guillemets
  • -

    Exemple : tag:linux vault:IT serveur web recherche "serveur web" dans le vault IT avec le tag linux.

    +

    Exemples : ext:sh recherche dans les scripts bash, ext:py dans les scripts Python, ext:md dans les fichiers Markdown.

    +

    Exemple combiné : tag:linux vault:IT ext:md serveur web recherche "serveur web" dans les fichiers Markdown du vault IT avec le tag linux.

    Autocomplétion

    L'autocomplétion vous aide à trouver rapidement :