# tests/test_search_advanced.py — Tests for InvertedIndex, search/suggest functions import pytest from backend.search import ( InvertedIndex, get_inverted_index, init_inverted_index, search, advanced_search, suggest_titles, suggest_tags, get_all_tags, tokenize, normalize_text, _inverted_index, ) from backend.indexer import index, _add_file_to_structures, _remove_file_from_structures # ═══════════════════════════════════════════════════════════════════ # InvertedIndex — unit tests # ═══════════════════════════════════════════════════════════════════ class TestInvertedIndex: @pytest.fixture(autouse=True) def _setup(self): self.inv = InvertedIndex() yield def test_initial_state(self): assert self.inv.doc_count == 0 assert len(self.inv.word_index) == 0 assert len(self.inv.title_index) == 0 assert len(self.inv._sorted_tokens) == 0 assert self.inv._ready is False def test_rebuild_from_index(self, client): """rebuild() populates from the global index.""" self.inv.rebuild() assert self.inv.doc_count >= 3 assert len(self.inv.word_index) > 0 assert self.inv._ready is True def test_add_document(self, client): self.inv.rebuild() old_count = self.inv.doc_count file_info = { "path": "new/file.md", "title": "Nouveau Fichier", "tags": ["test", "nouveau"], "content": "Ceci est un nouveau document de test.", } self.inv.add_document("TestVault", "new/file.md", file_info) assert self.inv.doc_count == old_count + 1 doc_key = "TestVault::new/file.md" assert doc_key in self.inv.doc_info assert doc_key in self.inv.vault_docs["TestVault"] def test_add_document_updates_existing(self, client): self.inv.rebuild() # Find an existing doc existing_doc = next(iter(self.inv.doc_info.values())) old_count = self.inv.doc_count existing_doc_copy = dict(existing_doc) existing_doc_copy["tags"] = ["updated"] vault = self.inv.doc_vault.get( f"{list(self.inv.vault_docs.keys())[0]}::{existing_doc['path']}", list(self.inv.vault_docs.keys())[0] ) doc_key = f"{vault}::{existing_doc['path']}" if doc_key in self.inv.doc_info: self.inv.add_document(vault, existing_doc["path"], existing_doc_copy) assert self.inv.doc_count == old_count # No change def test_remove_document(self, client): self.inv.rebuild() # Get the first document doc_keys = list(self.inv.doc_info.keys()) if not doc_keys: pytest.skip("No documents in inverted index") doc_key = doc_keys[0] vault, path = doc_key.split("::", 1) old_count = self.inv.doc_count self.inv.remove_document(vault, path) assert self.inv.doc_count == old_count - 1 assert doc_key not in self.inv.doc_info def test_remove_nonexistent_document(self, client): self.inv.rebuild() old_count = self.inv.doc_count self.inv.remove_document("FakeVault", "nonexistent.md") assert self.inv.doc_count == old_count # No change def test_tag_indexing(self, client): self.inv.rebuild() # "python" tag should exist in tag_docs assert any("python" in tag.lower() for tag in self.inv.tag_docs) or len(self.inv.tag_docs) > 0 def test_title_indexing(self, client): self.inv.rebuild() assert len(self.inv.title_index) > 0 def test_sorted_tokens(self, client): self.inv.rebuild() assert len(self.inv._sorted_tokens) > 0 # Check sorted order for i in range(len(self.inv._sorted_tokens) - 1): assert self.inv._sorted_tokens[i] <= self.inv._sorted_tokens[i + 1] def test_get_inverted_index_singleton(self, client): inv1 = get_inverted_index() inv2 = get_inverted_index() assert inv1 is inv2 # Same singleton def test_skip_when_not_ready(self, client): """add_document/remove_document are no-ops when _ready is False.""" inv = InvertedIndex() assert inv._ready is False inv.add_document("V", "p.md", {"path": "p.md", "title": "T", "tags": [], "content": ""}) assert inv.doc_count == 0 # Skipped inv.remove_document("V", "p.md") assert inv.doc_count == 0 # Skipped # ═══════════════════════════════════════════════════════════════════ # Search / Advanced Search integration tests # ═══════════════════════════════════════════════════════════════════ class TestSearchFunctions: def test_search_basic(self, client): results = search("python", vault_filter="all") assert len(results) >= 1 def test_search_vault_filter(self, client): results = search("python", vault_filter="TestVault") assert len(results) >= 1 for r in results: assert r["vault"] == "TestVault" def test_search_tag_filter(self, client): results = search("", vault_filter="all", tag_filter="python") assert len(results) >= 1 def test_search_no_results(self, client): results = search("xyznonexistent12345", vault_filter="all") assert len(results) == 0 def test_get_all_tags(self, client): tags = get_all_tags(vault_filter="all") assert isinstance(tags, dict) assert len(tags) > 0 def test_get_all_tags_vault_filter(self, client): tags = get_all_tags(vault_filter="TestVault") assert "python" in tags or "docker" in tags def test_advanced_search_basic(self, client): result = advanced_search("python", vault_filter="all") assert isinstance(result, dict) results = result.get("results", []) if len(results) == 0: pytest.skip("No results from advanced search") r = results[0] assert "title" in r assert "score" in r assert "snippet" in r def test_advanced_search_with_tag(self, client): result = advanced_search("", vault_filter="all", tag_filter="python") assert isinstance(result, dict) assert "results" in result def test_advanced_search_relevance_order(self, client): """Results should be sorted by score descending.""" result = advanced_search("python", vault_filter="all") results = result.get("results", []) if len(results) < 2: pytest.skip("Not enough results to test ordering") for i in range(len(results) - 1): assert results[i]["score"] >= results[i + 1]["score"] def test_suggest_titles(self, client): suggestions = suggest_titles("intr", vault_filter="all") assert len(suggestions) >= 1 s = suggestions[0] assert "title" in s assert "vault" in s assert "path" in s def test_suggest_titles_no_match(self, client): suggestions = suggest_titles("xyznonexistent", vault_filter="all") assert len(suggestions) == 0 def test_suggest_tags(self, client): suggestions = suggest_tags("py", vault_filter="all") assert len(suggestions) >= 1 # Should find "python" def test_suggest_tags_no_match(self, client): suggestions = suggest_tags("xyznonexistent", vault_filter="all") assert len(suggestions) == 0