Add fallback parser for markdown files with invalid YAML frontmatter to prevent indexing failures
This commit is contained in:
parent
2ed5f65a7a
commit
1213eb4781
@ -1,6 +1,7 @@
|
||||
import os
|
||||
import asyncio
|
||||
import logging
|
||||
import re
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timezone
|
||||
from typing import Dict, List, Optional, Any
|
||||
@ -62,6 +63,20 @@ def _extract_title(post: frontmatter.Post, filepath: Path) -> str:
|
||||
return str(title)
|
||||
|
||||
|
||||
def parse_markdown_file(raw: str) -> frontmatter.Post:
|
||||
"""Parse markdown frontmatter, falling back to plain content if YAML is invalid."""
|
||||
try:
|
||||
return frontmatter.loads(raw)
|
||||
except Exception as exc:
|
||||
logger.warning(f"Invalid frontmatter detected, falling back to plain markdown parsing: {exc}")
|
||||
content = raw
|
||||
if raw.startswith("---"):
|
||||
match = re.match(r"^---\s*\r?\n.*?\r?\n---\s*\r?\n?", raw, flags=re.DOTALL)
|
||||
if match:
|
||||
content = raw[match.end():]
|
||||
return frontmatter.Post(content, **{})
|
||||
|
||||
|
||||
def _scan_vault(vault_name: str, vault_path: str) -> Dict[str, Any]:
|
||||
"""Synchronously scan a single vault directory."""
|
||||
vault_root = Path(vault_path)
|
||||
@ -96,7 +111,7 @@ def _scan_vault(vault_name: str, vault_path: str) -> Dict[str, Any]:
|
||||
content_preview = raw[:200].strip()
|
||||
|
||||
if ext == ".md":
|
||||
post = frontmatter.loads(raw)
|
||||
post = parse_markdown_file(raw)
|
||||
tags = _extract_tags(post)
|
||||
title = _extract_title(post, fpath)
|
||||
content_preview = post.content[:200].strip()
|
||||
|
||||
@ -16,6 +16,7 @@ from backend.indexer import (
|
||||
index,
|
||||
get_vault_data,
|
||||
find_file_in_index,
|
||||
parse_markdown_file,
|
||||
SUPPORTED_EXTENSIONS,
|
||||
)
|
||||
from backend.search import search, get_all_tags
|
||||
@ -211,7 +212,7 @@ async def api_file(vault_name: str, path: str = Query(..., description="Relative
|
||||
ext = file_path.suffix.lower()
|
||||
|
||||
if ext == ".md":
|
||||
post = frontmatter.loads(raw)
|
||||
post = parse_markdown_file(raw)
|
||||
|
||||
# Extract metadata
|
||||
tags = post.metadata.get("tags", [])
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user