Create comprehensive test suite with 97 passing tests: - tests/conftest.py: fixtures (TestClient, temp vault dirs, index setup) - tests/test_search.py (27 tests): tokenizer, snippets, highlight, tag filter, search API, advanced search, suggest, tags API - tests/test_indexer.py (32 tests): frontmatter parsing, inline tags, title extraction, scan_vault, find_file_in_index, backlinks - tests/test_auth.py (38 tests): password hashing, JWT create/decode, token revocation, user CRUD, login lockout, rate limiting, middleware Also fix: lazy WeasyPrint import (graceful fallback when GTK missing), add data/ to .gitignore (runtime files from test runs).
285 lines
12 KiB
Python
285 lines
12 KiB
Python
# tests/test_indexer.py — Tests for the indexer module
|
|
import os
|
|
import tempfile
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from backend.indexer import (
|
|
_extract_tags,
|
|
_extract_inline_tags,
|
|
_extract_title,
|
|
parse_markdown_file,
|
|
find_file_in_index,
|
|
get_vault_names,
|
|
get_vault_data,
|
|
get_backlinks,
|
|
get_conflicts,
|
|
SUPPORTED_EXTENSIONS,
|
|
_scan_vault,
|
|
load_vault_config,
|
|
)
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════════
|
|
# _extract_tags
|
|
# ═══════════════════════════════════════════════════════════════════
|
|
|
|
class TestExtractTags:
|
|
def test_list_of_tags(self):
|
|
import frontmatter
|
|
post = frontmatter.loads("---\ntags:\n - python\n - docker\n---\n# Hello")
|
|
tags = _extract_tags(post)
|
|
assert tags == ["python", "docker"]
|
|
|
|
def test_comma_separated_string(self):
|
|
import frontmatter
|
|
post = frontmatter.loads("---\ntags: python, docker, tutorial\n---\n# Hello")
|
|
tags = _extract_tags(post)
|
|
assert set(tags) == {"python", "docker", "tutorial"}
|
|
|
|
def test_with_hash_prefix(self):
|
|
import frontmatter
|
|
post = frontmatter.loads("---\ntags:\n - '#python'\n - '#docker'\n---\n# Hello")
|
|
tags = _extract_tags(post)
|
|
assert tags == ["python", "docker"]
|
|
|
|
def test_empty_tags(self):
|
|
import frontmatter
|
|
post = frontmatter.loads("---\ntitle: No Tags\n---\n# Hello")
|
|
tags = _extract_tags(post)
|
|
assert tags == []
|
|
|
|
def test_none_tags(self):
|
|
import frontmatter
|
|
post = frontmatter.Post("# Hello", **{})
|
|
tags = _extract_tags(post)
|
|
assert tags == []
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════════
|
|
# _extract_inline_tags
|
|
# ═══════════════════════════════════════════════════════════════════
|
|
|
|
class TestExtractInlineTags:
|
|
def test_simple_tag(self):
|
|
tags = _extract_inline_tags("Un texte avec un #tag dedans.")
|
|
assert "tag" in tags
|
|
|
|
def test_multiple_tags(self):
|
|
tags = _extract_inline_tags("#python est cool, #docker aussi.")
|
|
assert "python" in tags
|
|
assert "docker" in tags
|
|
|
|
def test_no_tags(self):
|
|
tags = _extract_inline_tags("Juste du texte sans tags.")
|
|
assert tags == []
|
|
|
|
def test_code_block_excluded(self):
|
|
content = "```python\n# This is a code comment, not a tag\nprint('hello')\n```"
|
|
tags = _extract_inline_tags(content)
|
|
assert tags == []
|
|
|
|
def test_inline_code_excluded(self):
|
|
content = "Use `#notatag` in your code."
|
|
tags = _extract_inline_tags(content)
|
|
assert tags == []
|
|
|
|
def test_mixed(self):
|
|
content = "#real-tag outside code, `#fake-tag` inside."
|
|
tags = _extract_inline_tags(content)
|
|
assert "real-tag" in tags
|
|
assert "fake-tag" not in tags
|
|
|
|
def test_tag_at_line_start(self):
|
|
tags = _extract_inline_tags("#tag at the start\nof the line.")
|
|
assert "tag" in tags
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════════
|
|
# _extract_title
|
|
# ═══════════════════════════════════════════════════════════════════
|
|
|
|
class TestExtractTitle:
|
|
def test_from_frontmatter(self):
|
|
import frontmatter
|
|
post = frontmatter.loads("---\ntitle: Mon Super Titre\n---\n# Content")
|
|
title = _extract_title(post, Path("/fake/file.md"))
|
|
assert title == "Mon Super Titre"
|
|
|
|
def test_fallback_to_filename(self):
|
|
import frontmatter
|
|
post = frontmatter.Post("# Content", **{})
|
|
title = _extract_title(post, Path("/fake/my-great-note.md"))
|
|
assert title == "my great note"
|
|
|
|
def test_underscore_fallback(self):
|
|
import frontmatter
|
|
post = frontmatter.Post("# Content", **{})
|
|
title = _extract_title(post, Path("/fake/my_great_note.md"))
|
|
assert title == "my great note"
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════════
|
|
# parse_markdown_file
|
|
# ═══════════════════════════════════════════════════════════════════
|
|
|
|
class TestParseMarkdownFile:
|
|
def test_valid_frontmatter(self):
|
|
post = parse_markdown_file("---\ntags:\n - test\ntitle: Hello\n---\n# Hello\nWorld")
|
|
assert post.metadata["title"] == "Hello"
|
|
assert post.metadata["tags"] == ["test"]
|
|
assert "World" in post.content
|
|
|
|
def test_no_frontmatter(self):
|
|
post = parse_markdown_file("# Just a heading\nNo frontmatter.")
|
|
assert post.metadata == {}
|
|
assert "Just a heading" in post.content
|
|
|
|
def test_invalid_frontmatter_fallback(self):
|
|
"""Malformed YAML should fall back gracefully."""
|
|
post = parse_markdown_file("---\ninvalid: [unclosed\n---\n# Content")
|
|
# Should not raise, should return content
|
|
assert "Content" in post.content
|
|
|
|
def test_empty_file(self):
|
|
post = parse_markdown_file("")
|
|
assert post.content == ""
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════════
|
|
# SUPPORTED_EXTENSIONS
|
|
# ═══════════════════════════════════════════════════════════════════
|
|
|
|
class TestSupportedExtensions:
|
|
def test_markdown_is_supported(self):
|
|
assert ".md" in SUPPORTED_EXTENSIONS
|
|
|
|
def test_common_code_extensions(self):
|
|
for ext in [".py", ".js", ".ts", ".go", ".rs", ".java", ".rb"]:
|
|
assert ext in SUPPORTED_EXTENSIONS, f"{ext} should be supported"
|
|
|
|
def test_config_extensions(self):
|
|
for ext in [".json", ".yaml", ".yml", ".toml", ".ini", ".cfg", ".conf"]:
|
|
assert ext in SUPPORTED_EXTENSIONS, f"{ext} should be supported"
|
|
|
|
def test_binary_not_supported(self):
|
|
assert ".png" not in SUPPORTED_EXTENSIONS
|
|
assert ".exe" not in SUPPORTED_EXTENSIONS
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════════
|
|
# _scan_vault
|
|
# ═══════════════════════════════════════════════════════════════════
|
|
|
|
class TestScanVault:
|
|
def test_scan_creates_file_entries(self, test_vault_dir):
|
|
result = _scan_vault("TestVault", test_vault_dir)
|
|
assert len(result["files"]) >= 3 # note1, note2, projet, café_crème, config.json
|
|
assert len(result["tags"]) > 0
|
|
assert result["path"] == test_vault_dir
|
|
|
|
def test_scan_includes_paths(self, test_vault_dir):
|
|
result = _scan_vault("TestVault", test_vault_dir)
|
|
# Should have at least dir + file entries
|
|
paths = result.get("paths", [])
|
|
file_paths = [p["path"] for p in paths if p["type"] == "file"]
|
|
dir_paths = [p["path"] for p in paths if p["type"] == "directory"]
|
|
assert len(file_paths) >= 3
|
|
assert any("Projets" in d for d in dir_paths)
|
|
|
|
def test_file_has_required_fields(self, test_vault_dir):
|
|
result = _scan_vault("TestVault", test_vault_dir)
|
|
f = result["files"][0]
|
|
assert "path" in f
|
|
assert "title" in f
|
|
assert "tags" in f
|
|
assert "content" in f
|
|
assert "content_preview" in f
|
|
assert "size" in f
|
|
assert "modified" in f
|
|
assert "extension" in f
|
|
|
|
def test_content_is_truncated(self, test_vault_dir):
|
|
"""Content should be capped at SEARCH_CONTENT_LIMIT."""
|
|
result = _scan_vault("TestVault", test_vault_dir)
|
|
for f in result["files"]:
|
|
assert len(f["content"]) <= 100_000 # SEARCH_CONTENT_LIMIT
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════════
|
|
# Index integration (requires built index)
|
|
# ═══════════════════════════════════════════════════════════════════
|
|
|
|
class TestIndexIntegration:
|
|
def test_get_vault_names(self, client):
|
|
names = get_vault_names()
|
|
assert "TestVault" in names
|
|
|
|
def test_get_vault_data(self, client):
|
|
data = get_vault_data("TestVault")
|
|
assert data is not None
|
|
assert len(data["files"]) >= 3
|
|
|
|
def test_find_file_in_index(self, client):
|
|
# Try finding by filename first
|
|
result = find_file_in_index("note1.md", "TestVault")
|
|
if result is None:
|
|
# Fallback: try by title
|
|
result = find_file_in_index("Introduction à Python", "TestVault")
|
|
assert result is not None, f"Could not find note1.md in index. Vaults: {get_vault_names()}"
|
|
assert result["vault"] == "TestVault"
|
|
|
|
def test_find_file_case_insensitive(self, client):
|
|
result = find_file_in_index("NOTE1.MD", "TestVault")
|
|
# Case insensitive via filename
|
|
if result is None:
|
|
result = find_file_in_index("introduction à python", "TestVault")
|
|
assert result is not None
|
|
|
|
def test_find_file_not_found(self, client):
|
|
result = find_file_in_index("DoesNotExistXYZ123", "TestVault")
|
|
assert result is None
|
|
|
|
def test_get_backlinks(self, client):
|
|
# note2.md links to "Introduction à Python" which should resolve to note1.md
|
|
backlinks = get_backlinks("TestVault", "note1.md")
|
|
if len(backlinks) == 0:
|
|
# Try with .md suffix
|
|
backlinks = get_backlinks("TestVault", "Introduction à Python.md")
|
|
assert len(backlinks) >= 1, f"Expected backlinks for note1.md, got {backlinks}"
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════════
|
|
# load_vault_config
|
|
# ═══════════════════════════════════════════════════════════════════
|
|
|
|
class TestLoadVaultConfig:
|
|
def test_loads_sequential_vaults(self, test_vault_dir):
|
|
os.environ["VAULT_1_NAME"] = "V1"
|
|
os.environ["VAULT_1_PATH"] = test_vault_dir
|
|
os.environ["VAULT_2_NAME"] = "V2"
|
|
os.environ["VAULT_2_PATH"] = test_vault_dir
|
|
config = load_vault_config()
|
|
assert len(config) == 2
|
|
assert config["V1"]["path"] == test_vault_dir
|
|
assert config["V2"]["path"] == test_vault_dir
|
|
|
|
def test_stops_at_missing_pair(self, test_vault_dir):
|
|
os.environ["VAULT_1_NAME"] = "V1"
|
|
os.environ["VAULT_1_PATH"] = test_vault_dir
|
|
# VAULT_2_NAME missing — should stop
|
|
os.environ["VAULT_3_NAME"] = "V3"
|
|
os.environ["VAULT_3_PATH"] = test_vault_dir
|
|
config = load_vault_config()
|
|
assert len(config) == 1
|
|
assert "V1" in config
|
|
assert "V3" not in config
|
|
|
|
def test_dir_entries(self, test_vault_dir):
|
|
os.environ["DIR_1_NAME"] = "MyDir"
|
|
os.environ["DIR_1_PATH"] = test_vault_dir
|
|
config = load_vault_config()
|
|
assert "MyDir" in config
|
|
assert config["MyDir"]["type"] == "DIR"
|