diff --git a/backend/indexer.py b/backend/indexer.py index d1c0bf6..0bcd073 100644 --- a/backend/indexer.py +++ b/backend/indexer.py @@ -1,6 +1,7 @@ import os import asyncio import logging +import re from pathlib import Path from datetime import datetime, timezone from typing import Dict, List, Optional, Any @@ -62,6 +63,20 @@ def _extract_title(post: frontmatter.Post, filepath: Path) -> str: return str(title) +def parse_markdown_file(raw: str) -> frontmatter.Post: + """Parse markdown frontmatter, falling back to plain content if YAML is invalid.""" + try: + return frontmatter.loads(raw) + except Exception as exc: + logger.warning(f"Invalid frontmatter detected, falling back to plain markdown parsing: {exc}") + content = raw + if raw.startswith("---"): + match = re.match(r"^---\s*\r?\n.*?\r?\n---\s*\r?\n?", raw, flags=re.DOTALL) + if match: + content = raw[match.end():] + return frontmatter.Post(content, **{}) + + def _scan_vault(vault_name: str, vault_path: str) -> Dict[str, Any]: """Synchronously scan a single vault directory.""" vault_root = Path(vault_path) @@ -96,7 +111,7 @@ def _scan_vault(vault_name: str, vault_path: str) -> Dict[str, Any]: content_preview = raw[:200].strip() if ext == ".md": - post = frontmatter.loads(raw) + post = parse_markdown_file(raw) tags = _extract_tags(post) title = _extract_title(post, fpath) content_preview = post.content[:200].strip() diff --git a/backend/main.py b/backend/main.py index 3cfc170..9823b90 100644 --- a/backend/main.py +++ b/backend/main.py @@ -16,6 +16,7 @@ from backend.indexer import ( index, get_vault_data, find_file_in_index, + parse_markdown_file, SUPPORTED_EXTENSIONS, ) from backend.search import search, get_all_tags @@ -211,7 +212,7 @@ async def api_file(vault_name: str, path: str = Query(..., description="Relative ext = file_path.suffix.lower() if ext == ".md": - post = frontmatter.loads(raw) + post = parse_markdown_file(raw) # Extract metadata tags = post.metadata.get("tags", [])