ObsiGate/tests/test_indexer.py
Bruno Charest edb9e98f81 test: add pytest suite - 97 tests, search + indexer + auth
Create comprehensive test suite with 97 passing tests:
- tests/conftest.py: fixtures (TestClient, temp vault dirs, index setup)
- tests/test_search.py (27 tests): tokenizer, snippets, highlight,
  tag filter, search API, advanced search, suggest, tags API
- tests/test_indexer.py (32 tests): frontmatter parsing, inline tags,
  title extraction, scan_vault, find_file_in_index, backlinks
- tests/test_auth.py (38 tests): password hashing, JWT create/decode,
  token revocation, user CRUD, login lockout, rate limiting, middleware

Also fix: lazy WeasyPrint import (graceful fallback when GTK missing),
add data/ to .gitignore (runtime files from test runs).
2026-05-27 22:06:27 -04:00

285 lines
12 KiB
Python

# tests/test_indexer.py — Tests for the indexer module
import os
import tempfile
from pathlib import Path
import pytest
from backend.indexer import (
_extract_tags,
_extract_inline_tags,
_extract_title,
parse_markdown_file,
find_file_in_index,
get_vault_names,
get_vault_data,
get_backlinks,
get_conflicts,
SUPPORTED_EXTENSIONS,
_scan_vault,
load_vault_config,
)
# ═══════════════════════════════════════════════════════════════════
# _extract_tags
# ═══════════════════════════════════════════════════════════════════
class TestExtractTags:
def test_list_of_tags(self):
import frontmatter
post = frontmatter.loads("---\ntags:\n - python\n - docker\n---\n# Hello")
tags = _extract_tags(post)
assert tags == ["python", "docker"]
def test_comma_separated_string(self):
import frontmatter
post = frontmatter.loads("---\ntags: python, docker, tutorial\n---\n# Hello")
tags = _extract_tags(post)
assert set(tags) == {"python", "docker", "tutorial"}
def test_with_hash_prefix(self):
import frontmatter
post = frontmatter.loads("---\ntags:\n - '#python'\n - '#docker'\n---\n# Hello")
tags = _extract_tags(post)
assert tags == ["python", "docker"]
def test_empty_tags(self):
import frontmatter
post = frontmatter.loads("---\ntitle: No Tags\n---\n# Hello")
tags = _extract_tags(post)
assert tags == []
def test_none_tags(self):
import frontmatter
post = frontmatter.Post("# Hello", **{})
tags = _extract_tags(post)
assert tags == []
# ═══════════════════════════════════════════════════════════════════
# _extract_inline_tags
# ═══════════════════════════════════════════════════════════════════
class TestExtractInlineTags:
def test_simple_tag(self):
tags = _extract_inline_tags("Un texte avec un #tag dedans.")
assert "tag" in tags
def test_multiple_tags(self):
tags = _extract_inline_tags("#python est cool, #docker aussi.")
assert "python" in tags
assert "docker" in tags
def test_no_tags(self):
tags = _extract_inline_tags("Juste du texte sans tags.")
assert tags == []
def test_code_block_excluded(self):
content = "```python\n# This is a code comment, not a tag\nprint('hello')\n```"
tags = _extract_inline_tags(content)
assert tags == []
def test_inline_code_excluded(self):
content = "Use `#notatag` in your code."
tags = _extract_inline_tags(content)
assert tags == []
def test_mixed(self):
content = "#real-tag outside code, `#fake-tag` inside."
tags = _extract_inline_tags(content)
assert "real-tag" in tags
assert "fake-tag" not in tags
def test_tag_at_line_start(self):
tags = _extract_inline_tags("#tag at the start\nof the line.")
assert "tag" in tags
# ═══════════════════════════════════════════════════════════════════
# _extract_title
# ═══════════════════════════════════════════════════════════════════
class TestExtractTitle:
def test_from_frontmatter(self):
import frontmatter
post = frontmatter.loads("---\ntitle: Mon Super Titre\n---\n# Content")
title = _extract_title(post, Path("/fake/file.md"))
assert title == "Mon Super Titre"
def test_fallback_to_filename(self):
import frontmatter
post = frontmatter.Post("# Content", **{})
title = _extract_title(post, Path("/fake/my-great-note.md"))
assert title == "my great note"
def test_underscore_fallback(self):
import frontmatter
post = frontmatter.Post("# Content", **{})
title = _extract_title(post, Path("/fake/my_great_note.md"))
assert title == "my great note"
# ═══════════════════════════════════════════════════════════════════
# parse_markdown_file
# ═══════════════════════════════════════════════════════════════════
class TestParseMarkdownFile:
def test_valid_frontmatter(self):
post = parse_markdown_file("---\ntags:\n - test\ntitle: Hello\n---\n# Hello\nWorld")
assert post.metadata["title"] == "Hello"
assert post.metadata["tags"] == ["test"]
assert "World" in post.content
def test_no_frontmatter(self):
post = parse_markdown_file("# Just a heading\nNo frontmatter.")
assert post.metadata == {}
assert "Just a heading" in post.content
def test_invalid_frontmatter_fallback(self):
"""Malformed YAML should fall back gracefully."""
post = parse_markdown_file("---\ninvalid: [unclosed\n---\n# Content")
# Should not raise, should return content
assert "Content" in post.content
def test_empty_file(self):
post = parse_markdown_file("")
assert post.content == ""
# ═══════════════════════════════════════════════════════════════════
# SUPPORTED_EXTENSIONS
# ═══════════════════════════════════════════════════════════════════
class TestSupportedExtensions:
def test_markdown_is_supported(self):
assert ".md" in SUPPORTED_EXTENSIONS
def test_common_code_extensions(self):
for ext in [".py", ".js", ".ts", ".go", ".rs", ".java", ".rb"]:
assert ext in SUPPORTED_EXTENSIONS, f"{ext} should be supported"
def test_config_extensions(self):
for ext in [".json", ".yaml", ".yml", ".toml", ".ini", ".cfg", ".conf"]:
assert ext in SUPPORTED_EXTENSIONS, f"{ext} should be supported"
def test_binary_not_supported(self):
assert ".png" not in SUPPORTED_EXTENSIONS
assert ".exe" not in SUPPORTED_EXTENSIONS
# ═══════════════════════════════════════════════════════════════════
# _scan_vault
# ═══════════════════════════════════════════════════════════════════
class TestScanVault:
def test_scan_creates_file_entries(self, test_vault_dir):
result = _scan_vault("TestVault", test_vault_dir)
assert len(result["files"]) >= 3 # note1, note2, projet, café_crème, config.json
assert len(result["tags"]) > 0
assert result["path"] == test_vault_dir
def test_scan_includes_paths(self, test_vault_dir):
result = _scan_vault("TestVault", test_vault_dir)
# Should have at least dir + file entries
paths = result.get("paths", [])
file_paths = [p["path"] for p in paths if p["type"] == "file"]
dir_paths = [p["path"] for p in paths if p["type"] == "directory"]
assert len(file_paths) >= 3
assert any("Projets" in d for d in dir_paths)
def test_file_has_required_fields(self, test_vault_dir):
result = _scan_vault("TestVault", test_vault_dir)
f = result["files"][0]
assert "path" in f
assert "title" in f
assert "tags" in f
assert "content" in f
assert "content_preview" in f
assert "size" in f
assert "modified" in f
assert "extension" in f
def test_content_is_truncated(self, test_vault_dir):
"""Content should be capped at SEARCH_CONTENT_LIMIT."""
result = _scan_vault("TestVault", test_vault_dir)
for f in result["files"]:
assert len(f["content"]) <= 100_000 # SEARCH_CONTENT_LIMIT
# ═══════════════════════════════════════════════════════════════════
# Index integration (requires built index)
# ═══════════════════════════════════════════════════════════════════
class TestIndexIntegration:
def test_get_vault_names(self, client):
names = get_vault_names()
assert "TestVault" in names
def test_get_vault_data(self, client):
data = get_vault_data("TestVault")
assert data is not None
assert len(data["files"]) >= 3
def test_find_file_in_index(self, client):
# Try finding by filename first
result = find_file_in_index("note1.md", "TestVault")
if result is None:
# Fallback: try by title
result = find_file_in_index("Introduction à Python", "TestVault")
assert result is not None, f"Could not find note1.md in index. Vaults: {get_vault_names()}"
assert result["vault"] == "TestVault"
def test_find_file_case_insensitive(self, client):
result = find_file_in_index("NOTE1.MD", "TestVault")
# Case insensitive via filename
if result is None:
result = find_file_in_index("introduction à python", "TestVault")
assert result is not None
def test_find_file_not_found(self, client):
result = find_file_in_index("DoesNotExistXYZ123", "TestVault")
assert result is None
def test_get_backlinks(self, client):
# note2.md links to "Introduction à Python" which should resolve to note1.md
backlinks = get_backlinks("TestVault", "note1.md")
if len(backlinks) == 0:
# Try with .md suffix
backlinks = get_backlinks("TestVault", "Introduction à Python.md")
assert len(backlinks) >= 1, f"Expected backlinks for note1.md, got {backlinks}"
# ═══════════════════════════════════════════════════════════════════
# load_vault_config
# ═══════════════════════════════════════════════════════════════════
class TestLoadVaultConfig:
def test_loads_sequential_vaults(self, test_vault_dir):
os.environ["VAULT_1_NAME"] = "V1"
os.environ["VAULT_1_PATH"] = test_vault_dir
os.environ["VAULT_2_NAME"] = "V2"
os.environ["VAULT_2_PATH"] = test_vault_dir
config = load_vault_config()
assert len(config) == 2
assert config["V1"]["path"] == test_vault_dir
assert config["V2"]["path"] == test_vault_dir
def test_stops_at_missing_pair(self, test_vault_dir):
os.environ["VAULT_1_NAME"] = "V1"
os.environ["VAULT_1_PATH"] = test_vault_dir
# VAULT_2_NAME missing — should stop
os.environ["VAULT_3_NAME"] = "V3"
os.environ["VAULT_3_PATH"] = test_vault_dir
config = load_vault_config()
assert len(config) == 1
assert "V1" in config
assert "V3" not in config
def test_dir_entries(self, test_vault_dir):
os.environ["DIR_1_NAME"] = "MyDir"
os.environ["DIR_1_PATH"] = test_vault_dir
config = load_vault_config()
assert "MyDir" in config
assert config["MyDir"]["type"] == "DIR"