feat: add ext: operator for file extension filtering in advanced search

- Add ext: operator support to query parser in backend and frontend - Update search documentation in README and help modal with ext: examples - Parse ext: operator to extract file extension filter (strips leading dot, converts to lowercase) - Filter search candidates by file extension in advanced_search function - Add ext chip display in search UI alongside existing tag/vault/title/path chips - Update API documentation and function
2026-03-31 13:41:07 -04:00 · 2026-03-31 13:41:07 -04:00 · 84d3ad0e90
commit 84d3ad0e90
parent 4c4b8be215
5 changed files with 37 additions and 16 deletions
--- a/README.md
+++ b/README.md
@ -49,7 +49,7 @@
 - **🌳 Navigation arborescente** : Parcourez vos dossiers et fichiers dans la sidebar
 - **🔍 Recherche avancée** : Moteur TF-IDF avec normalisation des accents, snippets surlignés, facettes, pagination et tri
 - **💡 Autocomplétion intelligente** : Suggestions de fichiers, tags et historique avec navigation clavier
- **🧩 Syntaxe de requête** : Opérateurs `tag:`, `#`, `vault:`, `title:`, `path:` avec chips visuels
+- **🧩 Syntaxe de requête** : Opérateurs `tag:`, `#`, `vault:`, `title:`, `path:`, `ext:` avec chips visuels
 - **📜 Historique de recherche** : Persisté en localStorage (max 50 entrées, LIFO, dédupliqué)
 - **🏷️ Tag cloud** : Filtrage par tags extraits des frontmatters YAML
 - **🔗 Wikilinks** : Les `[[liens internes]]` Obsidian sont cliquables
@ -503,10 +503,6 @@ curl "http://localhost:2020/api/tags/suggest?q=rec&vault=all"
 # Obtenir un fichier
 curl "http://localhost:2020/api/file/Recettes?path=pizza.md"
 ```
 ---
 ## 🔍 Recherche avancée
 ### Syntaxe de requête
@ -518,9 +514,12 @@ curl "http://localhost:2020/api/file/Recettes?path=pizza.md"
 | `vault:<nom>` | Filtrer par vault | `vault:IT kubernetes` |
 | `title:<texte>` | Filtrer par titre | `title:pizza` |
 | `path:<texte>` | Filtrer par chemin | `path:recettes/soupes` |
 | `ext:<type>` | Filtrer par type de fichier | `ext:md kubernetes` |
 | `"phrase exacte"` | Recherche de phrase | `tag:"multi mots"` |
-Les opérateurs sont combinables : `tag:linux vault:IT serveur web` recherche "serveur web" dans le vault IT avec le tag linux.
+Exemples de filtre par extension : `ext:sh` pour les scripts bash, `ext:py` pour les scripts Python, `ext:md` pour les fichiers Markdown.
 Les opérateurs sont combinables : `tag:linux vault:IT ext:md serveur web` recherche "serveur web" dans les fichiers Markdown du vault IT avec le tag linux.
 ### Raccourcis clavier
--- a/backend/main.py
+++ b/backend/main.py
@ -1636,7 +1636,6 @@ async def api_tree_search(
                    "path": entry["path"],
                    "name": entry["name"],
                    "type": entry["type"],
                    "matched_path": entry["path"],
                })
    return {"query": q, "vault_filter": vault, "results": results}
@ -1644,7 +1643,7 @@ async def api_tree_search(
@app.get("/api/search/advanced", response_model=AdvancedSearchResponse)
 async def api_advanced_search(
-    q: str = Query("", description="Advanced search query (supports tag:, vault:, title:, path: operators)"),
+    q: str = Query("", description="Advanced search query (supports tag:, vault:, title:, path:, ext: operators)"),
    vault: str = Query("all", description="Vault filter"),
    tag: Optional[str] = Query(None, description="Comma-separated tag filter"),
    limit: int = Query(50, ge=1, le=200, description="Results per page"),
@ -1659,6 +1658,7 @@ async def api_advanced_search(
    - ``vault:<name>`` — filter by vault
    - ``title:<text>`` — filter by title substring
    - ``path:<text>`` — filter by path substring
    - ``ext:<type>`` — filter by file extension
    - Remaining text is scored using TF-IDF with accent normalization.
    Results include ``<mark>``-highlighted snippets and faceted tag/vault counts.
--- a/backend/search.py
+++ b/backend/search.py
@ -534,19 +534,21 @@ def _parse_advanced_query(raw_query: str) -> Dict[str, Any]:
    - ``vault:<name>`` — vault filter
    - ``title:<text>`` — title filter
    - ``path:<text>`` — path filter
    - ``ext:<type>`` — file extension filter
    - Remaining tokens are treated as free-text search terms.
    Args:
        raw_query: Raw query string from the user.
    Returns:
-        Dict with keys ``tags``, ``vault``, ``title``, ``path``, ``terms``.
+        Dict with keys ``tags``, ``vault``, ``title``, ``path``, ``ext``, ``terms``.
    """
    parsed: Dict[str, Any] = {
        "tags": [],
        "vault": None,
        "title": None,
        "path": None,
        "ext": None,
        "terms": [],
    }
    if not raw_query:
@ -568,6 +570,8 @@ def _parse_advanced_query(raw_query: str) -> Dict[str, Any]:
            parsed["title"] = token[6:].strip()
        elif lower.startswith("path:"):
            parsed["path"] = token[5:].strip()
        elif lower.startswith("ext:"):
            parsed["ext"] = token[4:].strip().lstrip(".").lower()
        else:
            parsed["terms"].append(token)
@ -639,7 +643,7 @@ def advanced_search(
    sorted token list for O(log V + k) instead of O(V) linear scan.
    Parses the query for operators (``tag:``, ``vault:``, ``title:``,
-    ``path:``), falls back remaining tokens to TF-IDF scored free-text
+    ``path:``, ``ext:``), falls back remaining tokens to TF-IDF scored free-text
    search using the inverted index.  Results include highlighted snippets
    with ``<mark>`` tags and faceted counts for tags and vaults.
@ -673,7 +677,7 @@ def advanced_search(
    query_terms = [normalize_text(t) for t in parsed["terms"] if t.strip()]
    has_terms = len(query_terms) > 0
-    if not has_terms and not all_tags and not parsed["title"] and not parsed["path"]:
+    if not has_terms and not all_tags and not parsed["title"] and not parsed["path"] and not parsed["ext"]:
        return {"results": [], "total": 0, "offset": offset, "limit": limit,
                "facets": {"tags": {}, "vaults": {}}, "query_time_ms": 0}
@ -725,6 +729,16 @@ def advanced_search(
            if norm_path_filter in normalize_text(inv.doc_info[dk].get("path", ""))
        }
    if parsed["ext"]:
        ext_filter = parsed["ext"]
        candidates = {
            dk for dk in candidates
            if (
                inv.doc_info[dk].get("path", "").rsplit("/", 1)[-1].lower() == ext_filter
                or inv.doc_info[dk].get("path", "").rsplit("/", 1)[-1].lower().endswith(f".{ext_filter}")
            )
        }
    # ------------------------------------------------------------------
    # Step 3: Score only the candidates (not all N documents)
    # ------------------------------------------------------------------
@ -774,7 +788,7 @@ def advanced_search(
                for expanded_term in expansions:
                    score += inv.tf_idf(expanded_term, doc_key) * 0.5
        else:
-            # Filter-only search (tag/title/path): score = 1
+            # Filter-only search (tag/title/path/ext): score = 1
            score = 1.0
        if score > 0:
--- a/frontend/app.js
+++ b/frontend/app.js
@ -305,11 +305,11 @@
  };
  // ---------------------------------------------------------------------------
-  // Query Parser — extracts operators (tag:, #, vault:, title:, path:)
+  // Query Parser — extracts operators (tag:, #, vault:, title:, path:, ext:)
  // ---------------------------------------------------------------------------
  const QueryParser = {
    parse(raw) {
-      const result = { tags: [], vault: null, title: null, path: null, freeText: "" };
+      const result = { tags: [], vault: null, title: null, path: null, ext: null, freeText: "" };
      if (!raw) return result;
      const tokens = this._tokenize(raw);
      const freeTokens = [];
@ -326,6 +326,8 @@
          result.title = tok.slice(6).replace(/"/g, "").trim();
        } else if (lower.startsWith("path:")) {
          result.path = tok.slice(5).replace(/"/g, "").trim();
        } else if (lower.startsWith("ext:")) {
          result.ext = tok.slice(4).replace(/"/g, "").trim().replace(/^\./, "").toLowerCase();
        } else {
          freeTokens.push(tok);
        }
@ -657,6 +659,10 @@
        this._addChip("path", `path:${parsed.path}`, parsed.path);
        hasChips = true;
      }
      if (parsed.ext) {
        this._addChip("ext", `ext:${parsed.ext}`, parsed.ext);
        hasChips = true;
      }
      this._container.hidden = !hasChips;
    },
    clear() {
--- a/frontend/index.html
+++ b/frontend/index.html
@ -776,9 +776,11 @@
              <li><code>vault:IT</code> : Rechercher dans un vault spécifique</li>
              <li><code>title:kubernetes</code> : Chercher dans les titres uniquement</li>
              <li><code>path:recettes/soupes</code> : Filtrer par chemin</li>
              <li><code>ext:md</code> : Filtrer par type de fichier</li>
              <li><code>"phrase exacte"</code> : Recherche de phrase entre guillemets</li>
            </ul>
-            <p><strong>Exemple</strong> : <code>tag:linux vault:IT serveur web</code> recherche "serveur web" dans le vault IT avec le tag linux.</p>
+            <p><strong>Exemples</strong> : <code>ext:sh</code> recherche dans les scripts bash, <code>ext:py</code> dans les scripts Python, <code>ext:md</code> dans les fichiers Markdown.</p>
            <p><strong>Exemple combiné</strong> : <code>tag:linux vault:IT ext:md serveur web</code> recherche "serveur web" dans les fichiers Markdown du vault IT avec le tag linux.</p>
            <h3>Autocomplétion</h3>
            <p>L'autocomplétion vous aide à trouver rapidement :</p>