# tests/test_indexer.py — Tests for the indexer module import os import tempfile from pathlib import Path import pytest from backend.indexer import ( _extract_tags, _extract_inline_tags, _extract_title, parse_markdown_file, find_file_in_index, get_vault_names, get_vault_data, get_backlinks, get_conflicts, SUPPORTED_EXTENSIONS, _scan_vault, load_vault_config, ) # ═══════════════════════════════════════════════════════════════════ # _extract_tags # ═══════════════════════════════════════════════════════════════════ class TestExtractTags: def test_list_of_tags(self): import frontmatter post = frontmatter.loads("---\ntags:\n - python\n - docker\n---\n# Hello") tags = _extract_tags(post) assert tags == ["python", "docker"] def test_comma_separated_string(self): import frontmatter post = frontmatter.loads("---\ntags: python, docker, tutorial\n---\n# Hello") tags = _extract_tags(post) assert set(tags) == {"python", "docker", "tutorial"} def test_with_hash_prefix(self): import frontmatter post = frontmatter.loads("---\ntags:\n - '#python'\n - '#docker'\n---\n# Hello") tags = _extract_tags(post) assert tags == ["python", "docker"] def test_empty_tags(self): import frontmatter post = frontmatter.loads("---\ntitle: No Tags\n---\n# Hello") tags = _extract_tags(post) assert tags == [] def test_none_tags(self): import frontmatter post = frontmatter.Post("# Hello", **{}) tags = _extract_tags(post) assert tags == [] # ═══════════════════════════════════════════════════════════════════ # _extract_inline_tags # ═══════════════════════════════════════════════════════════════════ class TestExtractInlineTags: def test_simple_tag(self): tags = _extract_inline_tags("Un texte avec un #tag dedans.") assert "tag" in tags def test_multiple_tags(self): tags = _extract_inline_tags("#python est cool, #docker aussi.") assert "python" in tags assert "docker" in tags def test_no_tags(self): tags = _extract_inline_tags("Juste du texte sans tags.") assert tags == [] def test_code_block_excluded(self): content = "```python\n# This is a code comment, not a tag\nprint('hello')\n```" tags = _extract_inline_tags(content) assert tags == [] def test_inline_code_excluded(self): content = "Use `#notatag` in your code." tags = _extract_inline_tags(content) assert tags == [] def test_mixed(self): content = "#real-tag outside code, `#fake-tag` inside." tags = _extract_inline_tags(content) assert "real-tag" in tags assert "fake-tag" not in tags def test_tag_at_line_start(self): tags = _extract_inline_tags("#tag at the start\nof the line.") assert "tag" in tags # ═══════════════════════════════════════════════════════════════════ # _extract_title # ═══════════════════════════════════════════════════════════════════ class TestExtractTitle: def test_from_frontmatter(self): import frontmatter post = frontmatter.loads("---\ntitle: Mon Super Titre\n---\n# Content") title = _extract_title(post, Path("/fake/file.md")) assert title == "Mon Super Titre" def test_fallback_to_filename(self): import frontmatter post = frontmatter.Post("# Content", **{}) title = _extract_title(post, Path("/fake/my-great-note.md")) assert title == "my great note" def test_underscore_fallback(self): import frontmatter post = frontmatter.Post("# Content", **{}) title = _extract_title(post, Path("/fake/my_great_note.md")) assert title == "my great note" # ═══════════════════════════════════════════════════════════════════ # parse_markdown_file # ═══════════════════════════════════════════════════════════════════ class TestParseMarkdownFile: def test_valid_frontmatter(self): post = parse_markdown_file("---\ntags:\n - test\ntitle: Hello\n---\n# Hello\nWorld") assert post.metadata["title"] == "Hello" assert post.metadata["tags"] == ["test"] assert "World" in post.content def test_no_frontmatter(self): post = parse_markdown_file("# Just a heading\nNo frontmatter.") assert post.metadata == {} assert "Just a heading" in post.content def test_invalid_frontmatter_fallback(self): """Malformed YAML should fall back gracefully.""" post = parse_markdown_file("---\ninvalid: [unclosed\n---\n# Content") # Should not raise, should return content assert "Content" in post.content def test_empty_file(self): post = parse_markdown_file("") assert post.content == "" # ═══════════════════════════════════════════════════════════════════ # SUPPORTED_EXTENSIONS # ═══════════════════════════════════════════════════════════════════ class TestSupportedExtensions: def test_markdown_is_supported(self): assert ".md" in SUPPORTED_EXTENSIONS def test_common_code_extensions(self): for ext in [".py", ".js", ".ts", ".go", ".rs", ".java", ".rb"]: assert ext in SUPPORTED_EXTENSIONS, f"{ext} should be supported" def test_config_extensions(self): for ext in [".json", ".yaml", ".yml", ".toml", ".ini", ".cfg", ".conf"]: assert ext in SUPPORTED_EXTENSIONS, f"{ext} should be supported" def test_binary_not_supported(self): assert ".png" not in SUPPORTED_EXTENSIONS assert ".exe" not in SUPPORTED_EXTENSIONS # ═══════════════════════════════════════════════════════════════════ # _scan_vault # ═══════════════════════════════════════════════════════════════════ class TestScanVault: def test_scan_creates_file_entries(self, test_vault_dir): result = _scan_vault("TestVault", test_vault_dir) assert len(result["files"]) >= 3 # note1, note2, projet, café_crème, config.json assert len(result["tags"]) > 0 assert result["path"] == test_vault_dir def test_scan_includes_paths(self, test_vault_dir): result = _scan_vault("TestVault", test_vault_dir) # Should have at least dir + file entries paths = result.get("paths", []) file_paths = [p["path"] for p in paths if p["type"] == "file"] dir_paths = [p["path"] for p in paths if p["type"] == "directory"] assert len(file_paths) >= 3 assert any("Projets" in d for d in dir_paths) def test_file_has_required_fields(self, test_vault_dir): result = _scan_vault("TestVault", test_vault_dir) f = result["files"][0] assert "path" in f assert "title" in f assert "tags" in f assert "content" in f assert "content_preview" in f assert "size" in f assert "modified" in f assert "extension" in f def test_content_is_truncated(self, test_vault_dir): """Content should be capped at SEARCH_CONTENT_LIMIT.""" result = _scan_vault("TestVault", test_vault_dir) for f in result["files"]: assert len(f["content"]) <= 100_000 # SEARCH_CONTENT_LIMIT # ═══════════════════════════════════════════════════════════════════ # Index integration (requires built index) # ═══════════════════════════════════════════════════════════════════ class TestIndexIntegration: def test_get_vault_names(self, client): names = get_vault_names() assert "TestVault" in names def test_get_vault_data(self, client): data = get_vault_data("TestVault") assert data is not None assert len(data["files"]) >= 3 def test_find_file_in_index(self, client): # Try finding by filename first result = find_file_in_index("note1.md", "TestVault") if result is None: # Fallback: try by title result = find_file_in_index("Introduction à Python", "TestVault") assert result is not None, f"Could not find note1.md in index. Vaults: {get_vault_names()}" assert result["vault"] == "TestVault" def test_find_file_case_insensitive(self, client): result = find_file_in_index("NOTE1.MD", "TestVault") # Case insensitive via filename if result is None: result = find_file_in_index("introduction à python", "TestVault") assert result is not None def test_find_file_not_found(self, client): result = find_file_in_index("DoesNotExistXYZ123", "TestVault") assert result is None def test_get_backlinks(self, client): # note2.md links to "Introduction à Python" which should resolve to note1.md backlinks = get_backlinks("TestVault", "note1.md") if len(backlinks) == 0: # Try with .md suffix backlinks = get_backlinks("TestVault", "Introduction à Python.md") assert len(backlinks) >= 1, f"Expected backlinks for note1.md, got {backlinks}" # ═══════════════════════════════════════════════════════════════════ # load_vault_config # ═══════════════════════════════════════════════════════════════════ class TestLoadVaultConfig: def test_loads_sequential_vaults(self, test_vault_dir): os.environ["VAULT_1_NAME"] = "V1" os.environ["VAULT_1_PATH"] = test_vault_dir os.environ["VAULT_2_NAME"] = "V2" os.environ["VAULT_2_PATH"] = test_vault_dir config = load_vault_config() assert len(config) == 2 assert config["V1"]["path"] == test_vault_dir assert config["V2"]["path"] == test_vault_dir def test_stops_at_missing_pair(self, test_vault_dir): os.environ["VAULT_1_NAME"] = "V1" os.environ["VAULT_1_PATH"] = test_vault_dir # VAULT_2_NAME missing — should stop os.environ["VAULT_3_NAME"] = "V3" os.environ["VAULT_3_PATH"] = test_vault_dir config = load_vault_config() assert len(config) == 1 assert "V1" in config assert "V3" not in config def test_dir_entries(self, test_vault_dir): os.environ["DIR_1_NAME"] = "MyDir" os.environ["DIR_1_PATH"] = test_vault_dir config = load_vault_config() assert "MyDir" in config assert config["MyDir"]["type"] == "DIR"