feat: add ext: operator for file extension filtering in advanced search

- Add ext: operator support to query parser in backend and frontend
- Update search documentation in README and help modal with ext: examples
- Parse ext: operator to extract file extension filter (strips leading dot, converts to lowercase)
- Filter search candidates by file extension in advanced_search function
- Add ext chip display in search UI alongside existing tag/vault/title/path chips
- Update API documentation and function
This commit is contained in:
Bruno Charest 2026-03-31 13:41:07 -04:00
parent 4c4b8be215
commit 84d3ad0e90
5 changed files with 37 additions and 16 deletions

View File

@ -49,7 +49,7 @@
- **🌳 Navigation arborescente** : Parcourez vos dossiers et fichiers dans la sidebar
- **🔍 Recherche avancée** : Moteur TF-IDF avec normalisation des accents, snippets surlignés, facettes, pagination et tri
- **💡 Autocomplétion intelligente** : Suggestions de fichiers, tags et historique avec navigation clavier
- **🧩 Syntaxe de requête** : Opérateurs `tag:`, `#`, `vault:`, `title:`, `path:` avec chips visuels
- **🧩 Syntaxe de requête** : Opérateurs `tag:`, `#`, `vault:`, `title:`, `path:`, `ext:` avec chips visuels
- **📜 Historique de recherche** : Persisté en localStorage (max 50 entrées, LIFO, dédupliqué)
- **🏷️ Tag cloud** : Filtrage par tags extraits des frontmatters YAML
- **🔗 Wikilinks** : Les `[[liens internes]]` Obsidian sont cliquables
@ -503,10 +503,6 @@ curl "http://localhost:2020/api/tags/suggest?q=rec&vault=all"
# Obtenir un fichier
curl "http://localhost:2020/api/file/Recettes?path=pizza.md"
```
---
## 🔍 Recherche avancée
### Syntaxe de requête
@ -518,9 +514,12 @@ curl "http://localhost:2020/api/file/Recettes?path=pizza.md"
| `vault:<nom>` | Filtrer par vault | `vault:IT kubernetes` |
| `title:<texte>` | Filtrer par titre | `title:pizza` |
| `path:<texte>` | Filtrer par chemin | `path:recettes/soupes` |
| `ext:<type>` | Filtrer par type de fichier | `ext:md kubernetes` |
| `"phrase exacte"` | Recherche de phrase | `tag:"multi mots"` |
Les opérateurs sont combinables : `tag:linux vault:IT serveur web` recherche "serveur web" dans le vault IT avec le tag linux.
Exemples de filtre par extension : `ext:sh` pour les scripts bash, `ext:py` pour les scripts Python, `ext:md` pour les fichiers Markdown.
Les opérateurs sont combinables : `tag:linux vault:IT ext:md serveur web` recherche "serveur web" dans les fichiers Markdown du vault IT avec le tag linux.
### Raccourcis clavier

View File

@ -1636,7 +1636,6 @@ async def api_tree_search(
"path": entry["path"],
"name": entry["name"],
"type": entry["type"],
"matched_path": entry["path"],
})
return {"query": q, "vault_filter": vault, "results": results}
@ -1644,7 +1643,7 @@ async def api_tree_search(
@app.get("/api/search/advanced", response_model=AdvancedSearchResponse)
async def api_advanced_search(
q: str = Query("", description="Advanced search query (supports tag:, vault:, title:, path: operators)"),
q: str = Query("", description="Advanced search query (supports tag:, vault:, title:, path:, ext: operators)"),
vault: str = Query("all", description="Vault filter"),
tag: Optional[str] = Query(None, description="Comma-separated tag filter"),
limit: int = Query(50, ge=1, le=200, description="Results per page"),
@ -1659,6 +1658,7 @@ async def api_advanced_search(
- ``vault:<name>`` filter by vault
- ``title:<text>`` filter by title substring
- ``path:<text>`` filter by path substring
- ``ext:<type>`` filter by file extension
- Remaining text is scored using TF-IDF with accent normalization.
Results include ``<mark>``-highlighted snippets and faceted tag/vault counts.

View File

@ -534,19 +534,21 @@ def _parse_advanced_query(raw_query: str) -> Dict[str, Any]:
- ``vault:<name>`` vault filter
- ``title:<text>`` title filter
- ``path:<text>`` path filter
- ``ext:<type>`` file extension filter
- Remaining tokens are treated as free-text search terms.
Args:
raw_query: Raw query string from the user.
Returns:
Dict with keys ``tags``, ``vault``, ``title``, ``path``, ``terms``.
Dict with keys ``tags``, ``vault``, ``title``, ``path``, ``ext``, ``terms``.
"""
parsed: Dict[str, Any] = {
"tags": [],
"vault": None,
"title": None,
"path": None,
"ext": None,
"terms": [],
}
if not raw_query:
@ -568,6 +570,8 @@ def _parse_advanced_query(raw_query: str) -> Dict[str, Any]:
parsed["title"] = token[6:].strip()
elif lower.startswith("path:"):
parsed["path"] = token[5:].strip()
elif lower.startswith("ext:"):
parsed["ext"] = token[4:].strip().lstrip(".").lower()
else:
parsed["terms"].append(token)
@ -639,7 +643,7 @@ def advanced_search(
sorted token list for O(log V + k) instead of O(V) linear scan.
Parses the query for operators (``tag:``, ``vault:``, ``title:``,
``path:``), falls back remaining tokens to TF-IDF scored free-text
``path:``, ``ext:``), falls back remaining tokens to TF-IDF scored free-text
search using the inverted index. Results include highlighted snippets
with ``<mark>`` tags and faceted counts for tags and vaults.
@ -673,7 +677,7 @@ def advanced_search(
query_terms = [normalize_text(t) for t in parsed["terms"] if t.strip()]
has_terms = len(query_terms) > 0
if not has_terms and not all_tags and not parsed["title"] and not parsed["path"]:
if not has_terms and not all_tags and not parsed["title"] and not parsed["path"] and not parsed["ext"]:
return {"results": [], "total": 0, "offset": offset, "limit": limit,
"facets": {"tags": {}, "vaults": {}}, "query_time_ms": 0}
@ -725,6 +729,16 @@ def advanced_search(
if norm_path_filter in normalize_text(inv.doc_info[dk].get("path", ""))
}
if parsed["ext"]:
ext_filter = parsed["ext"]
candidates = {
dk for dk in candidates
if (
inv.doc_info[dk].get("path", "").rsplit("/", 1)[-1].lower() == ext_filter
or inv.doc_info[dk].get("path", "").rsplit("/", 1)[-1].lower().endswith(f".{ext_filter}")
)
}
# ------------------------------------------------------------------
# Step 3: Score only the candidates (not all N documents)
# ------------------------------------------------------------------
@ -774,7 +788,7 @@ def advanced_search(
for expanded_term in expansions:
score += inv.tf_idf(expanded_term, doc_key) * 0.5
else:
# Filter-only search (tag/title/path): score = 1
# Filter-only search (tag/title/path/ext): score = 1
score = 1.0
if score > 0:

View File

@ -305,11 +305,11 @@
};
// ---------------------------------------------------------------------------
// Query Parser — extracts operators (tag:, #, vault:, title:, path:)
// Query Parser — extracts operators (tag:, #, vault:, title:, path:, ext:)
// ---------------------------------------------------------------------------
const QueryParser = {
parse(raw) {
const result = { tags: [], vault: null, title: null, path: null, freeText: "" };
const result = { tags: [], vault: null, title: null, path: null, ext: null, freeText: "" };
if (!raw) return result;
const tokens = this._tokenize(raw);
const freeTokens = [];
@ -326,6 +326,8 @@
result.title = tok.slice(6).replace(/"/g, "").trim();
} else if (lower.startsWith("path:")) {
result.path = tok.slice(5).replace(/"/g, "").trim();
} else if (lower.startsWith("ext:")) {
result.ext = tok.slice(4).replace(/"/g, "").trim().replace(/^\./, "").toLowerCase();
} else {
freeTokens.push(tok);
}
@ -657,6 +659,10 @@
this._addChip("path", `path:${parsed.path}`, parsed.path);
hasChips = true;
}
if (parsed.ext) {
this._addChip("ext", `ext:${parsed.ext}`, parsed.ext);
hasChips = true;
}
this._container.hidden = !hasChips;
},
clear() {

View File

@ -776,9 +776,11 @@
<li><code>vault:IT</code> : Rechercher dans un vault spécifique</li>
<li><code>title:kubernetes</code> : Chercher dans les titres uniquement</li>
<li><code>path:recettes/soupes</code> : Filtrer par chemin</li>
<li><code>ext:md</code> : Filtrer par type de fichier</li>
<li><code>"phrase exacte"</code> : Recherche de phrase entre guillemets</li>
</ul>
<p><strong>Exemple</strong> : <code>tag:linux vault:IT serveur web</code> recherche "serveur web" dans le vault IT avec le tag linux.</p>
<p><strong>Exemples</strong> : <code>ext:sh</code> recherche dans les scripts bash, <code>ext:py</code> dans les scripts Python, <code>ext:md</code> dans les fichiers Markdown.</p>
<p><strong>Exemple combiné</strong> : <code>tag:linux vault:IT ext:md serveur web</code> recherche "serveur web" dans les fichiers Markdown du vault IT avec le tag linux.</p>
<h3>Autocomplétion</h3>
<p>L'autocomplétion vous aide à trouver rapidement :</p>