import os
import asyncio
import logging
import re
import threading
from pathlib import Path
from datetime import datetime, timezone
from typing import Dict, List, Optional, Any

import frontmatter

from backend.attachment_indexer import build_attachment_index

logger = logging.getLogger("obsigate.indexer")

# Global in-memory index
index: Dict[str, Dict[str, Any]] = {}

# Vault config: {name: {path, attachmentsPath, scanAttachmentsOnStartup}}
vault_config: Dict[str, Dict[str, Any]] = {}

# Thread-safe lock for index updates
_index_lock = threading.Lock()

# Async lock for partial index updates (coexists with threading lock)
_async_index_lock: asyncio.Lock = None  # initialized lazily

# Generation counter — incremented on each index rebuild so consumers
# (e.g. the inverted index in search.py) can detect staleness.
_index_generation: int = 0

# O(1) lookup table for wikilink resolution: {filename_lower: [{vault, path}, ...]}
_file_lookup: Dict[str, List[Dict[str, str]]] = {}

# O(1) path index for tree filtering: {vault_name: [{path, name, type}, ...]}
path_index: Dict[str, List[Dict[str, str]]] = {}

# Maximum content size stored per file for in-memory search (bytes)
SEARCH_CONTENT_LIMIT = 100_000

# Supported text-based file extensions
SUPPORTED_EXTENSIONS = {
    ".md", ".txt", ".log", ".py", ".js", ".ts", ".jsx", ".tsx",
    ".sh", ".bash", ".zsh", ".fish", ".bat", ".cmd", ".ps1",
    ".json", ".yaml", ".yml", ".toml", ".xml", ".csv",
    ".cfg", ".ini", ".conf", ".env",
    ".html", ".css", ".scss", ".less",
    ".java", ".c", ".cpp", ".h", ".hpp", ".cs", ".go", ".rs", ".rb",
    ".php", ".sql", ".r", ".m", ".swift", ".kt",
    ".dockerfile", ".makefile", ".cmake",
}


def load_vault_config() -> Dict[str, Dict[str, Any]]:
    """Read VAULT_N_* and DIR_N_* env vars and return vault configuration.

    Scans environment variables ``VAULT_1_NAME``/``VAULT_1_PATH``,
    ``VAULT_2_NAME``/``VAULT_2_PATH``, etc. in sequential order.
    Stops at the first missing pair.
    
    Also reads optional configuration:
    - VAULT_N_ATTACHMENTS_PATH: relative path to attachments folder
    - VAULT_N_SCAN_ATTACHMENTS: "true"/"false" to enable/disable scanning
    - VAULT_N_INCLUDE_HIDDEN: "true"/"false" to include all hidden files/folders
    - VAULT_N_HIDDEN_WHITELIST: comma-separated list of hidden paths to include (e.g., ".obsidian,.github")

    Returns:
        Dict mapping vault names to configuration dicts with keys:
        - path: filesystem path (required)
        - attachmentsPath: relative attachments folder (optional)
        - scanAttachmentsOnStartup: boolean (default True)
        - includeHidden: boolean (default False) - include all hidden files/folders
        - hiddenWhitelist: list of hidden paths to include even if includeHidden is False
        - type: "VAULT" or "DIR"
    """
    vaults: Dict[str, Dict[str, Any]] = {}
    n = 1
    while True:
        name = os.environ.get(f"VAULT_{n}_NAME")
        path = os.environ.get(f"VAULT_{n}_PATH")
        if not name or not path:
            break
        
        # Optional configuration
        attachments_path = os.environ.get(f"VAULT_{n}_ATTACHMENTS_PATH")
        scan_attachments = os.environ.get(f"VAULT_{n}_SCAN_ATTACHMENTS", "true").lower() == "true"
        include_hidden = os.environ.get(f"VAULT_{n}_INCLUDE_HIDDEN", "false").lower() == "true"
        hidden_whitelist_str = os.environ.get(f"VAULT_{n}_HIDDEN_WHITELIST", "")
        hidden_whitelist = [item.strip() for item in hidden_whitelist_str.split(",") if item.strip()]
        
        vaults[name] = {
            "path": path,
            "attachmentsPath": attachments_path,
            "scanAttachmentsOnStartup": scan_attachments,
            "includeHidden": include_hidden,
            "hiddenWhitelist": hidden_whitelist,
            "type": "VAULT"
        }
        n += 1

    n = 1
    while True:
        name = os.environ.get(f"DIR_{n}_NAME")
        path = os.environ.get(f"DIR_{n}_PATH")
        if not name or not path:
            break
        
        include_hidden = os.environ.get(f"DIR_{n}_INCLUDE_HIDDEN", "false").lower() == "true"
        hidden_whitelist_str = os.environ.get(f"DIR_{n}_HIDDEN_WHITELIST", "")
        hidden_whitelist = [item.strip() for item in hidden_whitelist_str.split(",") if item.strip()]
        
        vaults[name] = {
            "path": path,
            "attachmentsPath": None,
            "scanAttachmentsOnStartup": False,
            "includeHidden": include_hidden,
            "hiddenWhitelist": hidden_whitelist,
            "type": "DIR"
        }
        n += 1

    return vaults


def _should_include_path(rel_parts: tuple, vault_config: Dict[str, Any]) -> bool:
    """Check if a path should be included based on hidden files configuration.
    
    Args:
        rel_parts: Tuple of path parts relative to vault root
        vault_config: Vault configuration dict with includeHidden and hiddenWhitelist
        
    Returns:
        True if the path should be included, False otherwise
    """
    include_hidden = vault_config.get("includeHidden", False)
    hidden_whitelist = vault_config.get("hiddenWhitelist", [])
    
    # Check if any part of the path starts with a dot (hidden)
    hidden_parts = [part for part in rel_parts if part.startswith(".")]
    
    if not hidden_parts:
        # No hidden parts, always include
        return True
    
    if include_hidden:
        # Include all hidden files/folders
        return True
    
    # Check if any hidden part is in the whitelist
    for hidden_part in hidden_parts:
        if hidden_part in hidden_whitelist:
            return True
    
    # Not in whitelist and includeHidden is False
    return False


# Regex for extracting inline #tags from markdown body (excludes code blocks)
_INLINE_TAG_RE = re.compile(r'(?:^|\s)#([a-zA-Z][a-zA-Z0-9_/-]{1,50})', re.MULTILINE)
# Regex patterns for stripping code blocks before inline tag extraction
_CODE_BLOCK_RE = re.compile(r'```[\s\S]*?```', re.MULTILINE)
_INLINE_CODE_RE = re.compile(r'`[^`]+`')


def _extract_tags(post: frontmatter.Post) -> List[str]:
    """Extract tags from frontmatter metadata.
    

    Handles tags as comma-separated string, list, or other types.
    Strips leading ``#`` from each tag.

    Args:
        post: Parsed frontmatter Post object.

    Returns:
        List of cleaned tag strings.
    """
    tags = post.metadata.get("tags", [])
    if isinstance(tags, str):
        tags = [t.strip().lstrip("#") for t in tags.split(",") if t.strip()]
    elif isinstance(tags, list):
        tags = [str(t).strip().lstrip("#") for t in tags]
    else:
        tags = []
    return tags


def _extract_inline_tags(content: str) -> List[str]:
    """Extract inline #tag patterns from markdown content.

    Strips fenced and inline code blocks before scanning to avoid
    false positives from code comments or shell commands.

    Args:
        content: Raw markdown content (without frontmatter).

    Returns:
        Deduplicated list of inline tag strings.
    """
    stripped = _CODE_BLOCK_RE.sub('', content)
    stripped = _INLINE_CODE_RE.sub('', stripped)
    return list(set(_INLINE_TAG_RE.findall(stripped)))


def _extract_title(post: frontmatter.Post, filepath: Path) -> str:
    """Extract title from frontmatter or derive from filename.

    Falls back to the file stem with hyphens/underscores replaced by spaces
    when no ``title`` key is present in frontmatter.

    Args:
        post: Parsed frontmatter Post object.
        filepath: Path to the source file.

    Returns:
        Human-readable title string.
    """
    title = post.metadata.get("title", "")
    if not title:
        title = filepath.stem.replace("-", " ").replace("_", " ")
    return str(title)


def parse_markdown_file(raw: str) -> frontmatter.Post:
    """Parse markdown frontmatter, falling back to plain content if YAML is invalid.

    When the YAML block is malformed, strips it and returns a Post with
    empty metadata so that rendering can still proceed.

    Args:
        raw: Full raw markdown string including optional frontmatter.

    Returns:
        ``frontmatter.Post`` with ``.content`` and ``.metadata`` attributes.
    """
    try:
        return frontmatter.loads(raw)
    except Exception as exc:
        logger.debug(f"Invalid frontmatter detected, falling back to plain markdown parsing: {exc}")
        content = raw
        if raw.startswith("---"):
            match = re.match(r"^---\s*\r?\n.*?\r?\n---\s*\r?\n?", raw, flags=re.DOTALL)
            if match:
                content = raw[match.end():]
        return frontmatter.Post(content, **{})


def _scan_vault(vault_name: str, vault_path: str, vault_cfg: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
    """Synchronously scan a single vault directory and build file index.

    Walks the vault tree, reads supported files, extracts metadata
    (tags, title, content preview) and stores a capped content snapshot
    for in-memory full-text search.

    Args:
        vault_name: Display name of the vault.
        vault_path: Absolute filesystem path to the vault root.
        vault_cfg: Optional vault configuration dict with hidden files settings.

    Returns:
        Dict with keys ``files`` (list), ``tags`` (counter dict), ``path`` (str), ``paths`` (list).
    """
    vault_root = Path(vault_path)
    files: List[Dict[str, Any]] = []
    tag_counts: Dict[str, int] = {}
    paths: List[Dict[str, str]] = []
    
    # Default config if not provided
    if vault_cfg is None:
        vault_cfg = {"includeHidden": False, "hiddenWhitelist": []}

    if not vault_root.exists():
        logger.warning(f"Vault path does not exist: {vault_path}")
        return {"files": [], "tags": {}, "path": vault_path, "paths": []}

    for fpath in vault_root.rglob("*"):
        # Check if path should be included based on hidden files configuration
        rel_parts = fpath.relative_to(vault_root).parts
        if not _should_include_path(rel_parts, vault_cfg):
            continue
        
        rel_path_str = str(fpath.relative_to(vault_root)).replace("\\", "/")
        
        # Add all paths (files and directories) to path index
        if fpath.is_dir():
            paths.append({
                "path": rel_path_str,
                "name": fpath.name,
                "type": "directory"
            })
            continue
        
        # Files only from here
        if not fpath.is_file():
            continue
        ext = fpath.suffix.lower()
        # Also match extensionless files named like Dockerfile, Makefile
        basename_lower = fpath.name.lower()
        if ext not in SUPPORTED_EXTENSIONS and basename_lower not in ("dockerfile", "makefile", "cmakelists.txt"):
            continue
        
        # Add file to path index
        paths.append({
            "path": rel_path_str,
            "name": fpath.name,
            "type": "file"
        })
        
        try:
            relative = fpath.relative_to(vault_root)
            stat = fpath.stat()
            modified = datetime.fromtimestamp(stat.st_mtime, tz=timezone.utc).isoformat()

            raw = fpath.read_text(encoding="utf-8", errors="replace")

            tags: List[str] = []
            title = fpath.stem.replace("-", " ").replace("_", " ")
            content_preview = raw[:200].strip()

            if ext == ".md":
                post = parse_markdown_file(raw)
                tags = _extract_tags(post)
                # Merge inline #tags found in content body
                inline_tags = _extract_inline_tags(post.content)
                tags = list(set(tags) | set(inline_tags))
                title = _extract_title(post, fpath)
                content_preview = post.content[:200].strip()

            files.append({
                "path": str(relative).replace("\\", "/"),
                "title": title,
                "tags": tags,
                "content_preview": content_preview,
                "content": raw[:SEARCH_CONTENT_LIMIT],
                "size": stat.st_size,
                "modified": modified,
                "extension": ext,
            })

            for tag in tags:
                tag_counts[tag] = tag_counts.get(tag, 0) + 1

        except PermissionError:
            logger.debug(f"Permission denied, skipping {fpath}")
            continue
        except Exception as e:
            logger.error(f"Error indexing {fpath}: {e}")
            continue

    logger.info(f"Vault '{vault_name}': indexed {len(files)} files, {len(paths)} paths, {len(tag_counts)} unique tags")
    return {"files": files, "tags": tag_counts, "path": vault_path, "paths": paths, "config": {}}


async def build_index(progress_callback=None) -> None:
    """Build the full in-memory index for all configured vaults.

    Runs vault scans concurrently, inserting them incrementally into the global index.
    Notifies progress via the provided callback.
    """
    global index, vault_config
    vault_config.clear()
    vault_config.update(load_vault_config())
    
    global _index_generation
    with _index_lock:
        index.clear()
        _file_lookup.clear()
        path_index.clear()
        _index_generation += 1

    if not vault_config:
        logger.warning("No vaults configured. Set VAULT_N_NAME / VAULT_N_PATH env vars.")
        if progress_callback:
            await progress_callback("complete", {"total": 0})
        return

    if progress_callback:
        await progress_callback("start", {"total_vaults": len(vault_config)})

    loop = asyncio.get_event_loop()

    async def _process_vault(name: str, config: Dict[str, Any]):
        vault_path = config["path"]
        vault_data = await loop.run_in_executor(None, _scan_vault, name, vault_path, config)
        vault_data["config"] = config

        # Build lookup entries for the new vault
        new_lookup_entries: Dict[str, List[Dict[str, str]]] = {}
        for f in vault_data["files"]:
            entry = {"vault": name, "path": f["path"]}
            fname = f["path"].rsplit("/", 1)[-1].lower()
            fpath_lower = f["path"].lower()
            for key in (fname, fpath_lower):
                if key not in new_lookup_entries:
                    new_lookup_entries[key] = []
                new_lookup_entries[key].append(entry)

        async_lock = _get_async_lock()
        async with async_lock:
            with _index_lock:
                index[name] = vault_data
                for key, entries in new_lookup_entries.items():
                    if key not in _file_lookup:
                        _file_lookup[key] = []
                    _file_lookup[key].extend(entries)
                path_index[name] = vault_data.get("paths", [])
                global _index_generation
                _index_generation += 1

        if progress_callback:
            await progress_callback("progress", {
                "vault": name,
                "files": len(vault_data["files"]),
                "tags": len(vault_data["tags"])
            })

    # Run vault scans concurrently
    tasks = []
    for name, config in vault_config.items():
        tasks.append(_process_vault(name, config))
    
    if tasks:
        await asyncio.gather(*tasks)

    # Build attachment index
    await build_attachment_index(vault_config)

    total_files = sum(len(v["files"]) for v in index.values())
    logger.info(f"Index built: {len(index)} vaults, {total_files} total files")
    
    if progress_callback:
        await progress_callback("complete", {"total_vaults": len(vault_config), "total_files": total_files})


async def reload_index() -> Dict[str, Any]:
    """Force a full re-index of all vaults and return per-vault statistics.

    Returns:
        Dict mapping vault names to their file/tag counts.
    """
    await build_index()
    stats = {}
    for name, data in index.items():
        stats[name] = {"file_count": len(data["files"]), "tag_count": len(data["tags"])}
    return stats


def get_vault_names() -> List[str]:
    """Return the list of all indexed vault names."""
    return list(index.keys())


def get_vault_data(vault_name: str) -> Optional[Dict[str, Any]]:
    """Return the full index data for a vault, or ``None`` if not found."""
    return index.get(vault_name)


def _get_async_lock() -> asyncio.Lock:
    """Get or create the async lock (must be called from an event loop)."""
    global _async_index_lock
    if _async_index_lock is None:
        _async_index_lock = asyncio.Lock()
    return _async_index_lock


def _index_single_file_sync(vault_name: str, vault_path: str, file_path: str) -> Optional[Dict[str, Any]]:
    """Synchronously read and parse a single file for indexing.

    Args:
        vault_name: Name of the vault.
        vault_path: Absolute path to vault root.
        file_path: Absolute path to the file.

    Returns:
        File info dict or None if the file cannot be read.
    """
    try:
        fpath = Path(file_path)
        vault_root = Path(vault_path)

        if not fpath.exists() or not fpath.is_file():
            return None

        relative = fpath.relative_to(vault_root)
        rel_parts = relative.parts
        if any(part.startswith(".") for part in rel_parts):
            return None

        ext = fpath.suffix.lower()
        basename_lower = fpath.name.lower()
        if ext not in SUPPORTED_EXTENSIONS and basename_lower not in ("dockerfile", "makefile", "cmakelists.txt"):
            return None

        stat = fpath.stat()
        modified = datetime.fromtimestamp(stat.st_mtime, tz=timezone.utc).isoformat()
        raw = fpath.read_text(encoding="utf-8", errors="replace")

        tags: List[str] = []
        title = fpath.stem.replace("-", " ").replace("_", " ")
        content_preview = raw[:200].strip()

        if ext == ".md":
            post = parse_markdown_file(raw)
            tags = _extract_tags(post)
            inline_tags = _extract_inline_tags(post.content)
            tags = list(set(tags) | set(inline_tags))
            title = _extract_title(post, fpath)
            content_preview = post.content[:200].strip()

        return {
            "path": str(relative).replace("\\", "/"),
            "title": title,
            "tags": tags,
            "content_preview": content_preview,
            "content": raw[:SEARCH_CONTENT_LIMIT],
            "size": stat.st_size,
            "modified": modified,
            "extension": ext,
        }
    except PermissionError:
        logger.debug(f"Permission denied: {file_path}")
        return None
    except Exception as e:
        logger.error(f"Error parsing file {file_path}: {e}")
        return None


def _remove_file_from_structures(vault_name: str, rel_path: str) -> Optional[Dict[str, Any]]:
    """Remove a file from all index structures. Returns removed file info or None.

    Must be called under _index_lock or _async_index_lock.
    """
    global _index_generation
    vault_data = index.get(vault_name)
    if not vault_data:
        return None

    # Remove from files list
    removed = None
    files = vault_data["files"]
    for i, f in enumerate(files):
        if f["path"] == rel_path:
            removed = files.pop(i)
            break

    if not removed:
        return None

    # Update tag counts
    for tag in removed.get("tags", []):
        tc = vault_data["tags"]
        if tag in tc:
            tc[tag] -= 1
            if tc[tag] <= 0:
                del tc[tag]

    # Remove from _file_lookup
    fname_lower = rel_path.rsplit("/", 1)[-1].lower()
    fpath_lower = rel_path.lower()
    for key in (fname_lower, fpath_lower):
        entries = _file_lookup.get(key, [])
        _file_lookup[key] = [e for e in entries if not (e["vault"] == vault_name and e["path"] == rel_path)]
        if not _file_lookup[key]:
            del _file_lookup[key]

    # Remove from path_index
    if vault_name in path_index:
        path_index[vault_name] = [p for p in path_index[vault_name] if p["path"] != rel_path]

    _index_generation += 1
    return removed


def _add_file_to_structures(vault_name: str, file_info: Dict[str, Any]):
    """Add a file entry to all index structures.

    Must be called under _index_lock or _async_index_lock.
    """
    global _index_generation
    vault_data = index.get(vault_name)
    if not vault_data:
        return

    vault_data["files"].append(file_info)

    # Update tag counts
    for tag in file_info.get("tags", []):
        vault_data["tags"][tag] = vault_data["tags"].get(tag, 0) + 1

    # Add to _file_lookup
    rel_path = file_info["path"]
    fname_lower = rel_path.rsplit("/", 1)[-1].lower()
    fpath_lower = rel_path.lower()
    entry = {"vault": vault_name, "path": rel_path}
    for key in (fname_lower, fpath_lower):
        if key not in _file_lookup:
            _file_lookup[key] = []
        _file_lookup[key].append(entry)

    # Add to path_index
    if vault_name in path_index:
        # Check if already present (avoid duplicates)
        existing = {p["path"] for p in path_index[vault_name]}
        if rel_path not in existing:
            path_index[vault_name].append({
                "path": rel_path,
                "name": rel_path.rsplit("/", 1)[-1],
                "type": "file",
            })

    _index_generation += 1


async def update_single_file(vault_name: str, abs_file_path: str) -> Optional[Dict[str, Any]]:
    """Re-index a single file without full rebuild.

    Reads the file, removes the old entry if present, inserts the new one.
    Thread-safe via async lock.

    Args:
        vault_name: Name of the vault containing the file.
        abs_file_path: Absolute filesystem path to the file.

    Returns:
        The new file info dict, or None if file could not be indexed.
    """
    vault_data = index.get(vault_name)
    if not vault_data:
        logger.warning(f"update_single_file: vault '{vault_name}' not in index")
        return None

    vault_path = vault_data.get("path") or vault_config.get(vault_name, {}).get("path", "")
    if not vault_path:
        return None

    loop = asyncio.get_event_loop()
    file_info = await loop.run_in_executor(None, _index_single_file_sync, vault_name, vault_path, abs_file_path)

    lock = _get_async_lock()
    async with lock:
        # Remove old entry if exists
        try:
            rel_path = str(Path(abs_file_path).relative_to(vault_path)).replace("\\", "/")
        except ValueError:
            logger.warning(f"File {abs_file_path} not under vault {vault_path}")
            return None

        _remove_file_from_structures(vault_name, rel_path)

        if file_info:
            _add_file_to_structures(vault_name, file_info)

    if file_info:
        logger.debug(f"Updated: {vault_name}/{file_info['path']}")
    return file_info


async def remove_single_file(vault_name: str, abs_file_path: str) -> Optional[Dict[str, Any]]:
    """Remove a single file from the index.

    Args:
        vault_name: Name of the vault.
        abs_file_path: Absolute path to the deleted file.

    Returns:
        The removed file info dict, or None if not found.
    """
    vault_data = index.get(vault_name)
    if not vault_data:
        return None

    vault_path = vault_data.get("path") or vault_config.get(vault_name, {}).get("path", "")
    if not vault_path:
        return None

    try:
        rel_path = str(Path(abs_file_path).relative_to(vault_path)).replace("\\", "/")
    except ValueError:
        return None

    lock = _get_async_lock()
    async with lock:
        removed = _remove_file_from_structures(vault_name, rel_path)

    if removed:
        logger.debug(f"Removed: {vault_name}/{rel_path}")
    return removed


async def handle_file_move(vault_name: str, src_abs: str, dest_abs: str) -> Optional[Dict[str, Any]]:
    """Handle a file move/rename by removing old entry and indexing new location.

    Args:
        vault_name: Name of the vault.
        src_abs: Absolute path of the source (old location).
        dest_abs: Absolute path of the destination (new location).

    Returns:
        The new file info dict, or None.
    """
    await remove_single_file(vault_name, src_abs)
    return await update_single_file(vault_name, dest_abs)


async def remove_vault_from_index(vault_name: str):
    """Remove an entire vault from the index.

    Args:
        vault_name: Name of the vault to remove.
    """
    global _index_generation
    lock = _get_async_lock()
    async with lock:
        vault_data = index.pop(vault_name, None)
        if not vault_data:
            return

        # Clean _file_lookup
        for f in vault_data.get("files", []):
            rel_path = f["path"]
            fname_lower = rel_path.rsplit("/", 1)[-1].lower()
            fpath_lower = rel_path.lower()
            for key in (fname_lower, fpath_lower):
                entries = _file_lookup.get(key, [])
                _file_lookup[key] = [e for e in entries if e["vault"] != vault_name]
                if not _file_lookup[key]:
                    _file_lookup.pop(key, None)

        # Clean path_index
        path_index.pop(vault_name, None)

        # Clean vault_config
        vault_config.pop(vault_name, None)

        _index_generation += 1
        logger.info(f"Removed vault '{vault_name}' from index")


async def add_vault_to_index(vault_name: str, vault_path: str) -> Dict[str, Any]:
    """Add a new vault to the index dynamically.

    Args:
        vault_name: Display name for the vault.
        vault_path: Absolute filesystem path to the vault.

    Returns:
        Dict with vault stats (file_count, tag_count).
    """
    global _index_generation

    vault_config[vault_name] = {
        "path": vault_path,
        "attachmentsPath": None,
        "scanAttachmentsOnStartup": True,
        "includeHidden": False,
        "hiddenWhitelist": [],
    }

    loop = asyncio.get_event_loop()
    vault_data = await loop.run_in_executor(None, _scan_vault, vault_name, vault_path, vault_config[vault_name])
    vault_data["config"] = vault_config[vault_name]

    # Build lookup entries for the new vault
    new_lookup_entries: Dict[str, List[Dict[str, str]]] = {}
    for f in vault_data["files"]:
        entry = {"vault": vault_name, "path": f["path"]}
        fname = f["path"].rsplit("/", 1)[-1].lower()
        fpath_lower = f["path"].lower()
        for key in (fname, fpath_lower):
            if key not in new_lookup_entries:
                new_lookup_entries[key] = []
            new_lookup_entries[key].append(entry)

    lock = _get_async_lock()
    async with lock:
        index[vault_name] = vault_data
        for key, entries in new_lookup_entries.items():
            if key not in _file_lookup:
                _file_lookup[key] = []
            _file_lookup[key].extend(entries)
        path_index[vault_name] = vault_data.get("paths", [])
        _index_generation += 1

    stats = {"file_count": len(vault_data["files"]), "tag_count": len(vault_data["tags"])}
    logger.info(f"Added vault '{vault_name}': {stats['file_count']} files, {stats['tag_count']} tags")
    return stats


def find_file_in_index(link_target: str, current_vault: str) -> Optional[Dict[str, str]]:
    """Find a file matching a wikilink target using O(1) lookup table.

    Searches by filename first, then by full relative path.
    Prefers results from *current_vault* when multiple matches exist.

    Args:
        link_target: The wikilink target (e.g. ``"My Note"`` or ``"folder/My Note"``).
        current_vault: Name of the vault the link originates from.

    Returns:
        Dict with ``vault`` and ``path`` keys, or ``None`` if not found.
    """
    target_lower = link_target.lower().strip()
    if not target_lower.endswith(".md"):
        target_lower += ".md"

    candidates = _file_lookup.get(target_lower, [])
    if not candidates:
        return None

    # Prefer current vault when multiple vaults contain a match
    for c in candidates:
        if c["vault"] == current_vault:
            return c
    return candidates[0]