ObsiViewer/src/core/search/search-parser.ts

/**
 * Obsidian-compatible search query parser
 * Full operator support: path:, file:, content:, tag:, line:, block:, section:,
 * task:, task-todo:, task-done:, match-case:, ignore-case:, [property], OR, AND, -, *, "", /regex/
 */

import {
  SearchTerm,
  SearchGroup,
  SearchNode,
  ParsedQuery,
  SearchPredicate,
  SearchContext,
  SearchOptions,
  SectionContent,
  TaskInfo,
  ParseDiagnostics,
  SearchFilterDiagnostics
} from './search-parser.types';

// Re-export types for convenience
export type { SearchContext, SearchOptions, SectionContent, TaskInfo };

/**
 * Parse an Obsidian search query into an AST
 */
export function parseSearchQuery(query: string, options?: SearchOptions): ParsedQuery {
  if (!query || !query.trim()) {
    return {
      ast: { type: 'group', operator: 'AND', terms: [] },
      isEmpty: true,
      diagnostics: {
        tokens: [],
        filters: createEmptyDiagnosticsFilters(options),
        warnings: []
      }
    };
  }

  const tokens = tokenize(query);
  const diagnostics: ParseDiagnostics = {
    tokens: [...tokens],
    filters: createEmptyDiagnosticsFilters(options),
    warnings: []
  };
  const ast = parseTokens(tokens, options, diagnostics);

  return {
    ast,
    isEmpty: false,
    diagnostics
  };
}

/**
 * Convert parsed query into a predicate function
 */
export function queryToPredicate(parsed: ParsedQuery, options?: SearchOptions): SearchPredicate {
  if (parsed.isEmpty) {
    return () => true;
  }

  return (context: SearchContext) => evaluateNode(parsed.ast, context, options);
}

/**
 * Tokenize the query string
 */
function tokenize(query: string): string[] {
  const tokens: string[] = [];
  // This regex handles:
  // - quoted strings (double and single)
  // - regex patterns /.../
  // - parentheses
  // - property searches like [prop]:"value"
  // - operators and words
  const regex = /\s*("([^"]*)"|'([^']*)'|\/([^\/]*)\/|\(|\)|-?\[[^\]]*\]:?"[^"]*"|-?\[[^\]]*\]|-?[^\s\(\)]+)/g;
  let match;
  while ((match = regex.exec(query)) !== null) {
    if (match[1]) {
      tokens.push(match[1]);
    }
  }
  return tokens;
}

/**
 * Parse tokens into AST
 */
function parseTokens(tokens: string[], options: SearchOptions | undefined, diagnostics: ParseDiagnostics): SearchNode {
  const terms: SearchNode[] = [];
  let i = 0;

  while (i < tokens.length) {
    const token = tokens[i];

    // Handle OR operator
    if (token.toUpperCase() === 'OR') {
      i++;
      continue;
    }

    // Handle AND operator (implicit)
    if (token.toUpperCase() === 'AND') {
      i++;
      continue;
    }

    // Handle parentheses
    if (token === '(') {
      const { node, endIndex } = parseGroup(tokens, i + 1, options, diagnostics);
      terms.push(node);
      i = endIndex + 1;
      continue;
    }

    // Parse term
    const term = parseTerm(token, options, diagnostics);
    if (term) {
      terms.push(term);
    }
    i++;
  }

  // Determine operator based on OR presence
  const hasOr = tokens.some(t => t.toUpperCase() === 'OR');
  const operator = hasOr ? 'OR' : 'AND';

  return {
    type: 'group',
    operator,
    terms
  };
}

/**
 * Parse a group enclosed in parentheses
 */
function parseGroup(
  tokens: string[],
  startIndex: number,
  options: SearchOptions | undefined,
  diagnostics: ParseDiagnostics
): { node: SearchNode; endIndex: number } {
  const terms: SearchNode[] = [];
  let i = startIndex;
  let depth = 1;

  while (i < tokens.length && depth > 0) {
    const token = tokens[i];

    if (token === '(') {
      depth++;
    } else if (token === ')') {
      depth--;
      if (depth === 0) {
        break;
      }
    }

    if (token.toUpperCase() !== 'OR' && token.toUpperCase() !== 'AND' && token !== '(' && token !== ')') {
      const term = parseTerm(token, options, diagnostics);
      if (term) {
        terms.push(term);
      }
    }

    i++;
  }

  const hasOr = tokens.slice(startIndex, i).some(t => t.toUpperCase() === 'OR');
  const operator = hasOr ? 'OR' : 'AND';

  return {
    node: {
      type: 'group',
      operator,
      terms
    },
    endIndex: i
  };
}

/**
 * Parse a single search term
 */
function parseTerm(token: string, options: SearchOptions | undefined, diagnostics: ParseDiagnostics): SearchTerm | null {
  if (!token) {
    return null;
  }

  let negated = false;
  let value = token;

  if (value.startsWith('-')) {
    negated = true;
    value = value.substring(1);
  }

  let term: SearchTerm | null = null;
  let negativeValue: string | undefined;

  // Support property form: [key]:value (Obsidian compatibility)
  if (value.startsWith('[') && value.includes(']:')) {
    const closeBracket = value.indexOf(']');
    if (closeBracket > 0) {
      const propertyKey = value.substring(1, closeBracket);
      let propertyValue = value.substring(closeBracket + 2);
      let propValueQuoted = false;
      if (propertyValue.startsWith('"') && propertyValue.endsWith('"')) {
        propValueQuoted = true;
        propertyValue = propertyValue.substring(1, propertyValue.length - 1);
      }
      term = {
        type: 'property',
        value: propertyValue,
        propertyKey,
        negated,
        quoted: propValueQuoted,
        wildcard: propertyValue.includes('*')
      };
      negativeValue = propertyValue || propertyKey;
    }
  }

  if (!term && value.startsWith('/') && value.endsWith('/') && value.length > 2) {
    const regexPattern = value.substring(1, value.length - 1);
    term = { type: 'regex', value: regexPattern, negated, quoted: false, wildcard: false };
    diagnostics.filters.regex = true;
    negativeValue = regexPattern;
  }

  let quoted = false;
  if (!term && value.startsWith('"') && value.endsWith('"')) {
    quoted = true;
    value = value.substring(1, value.length - 1);
  }

  const wildcard = value.includes('*');

  if (!term) {
    const colonIndex = value.indexOf(':');
    if (colonIndex > 0) {
      const prefix = value.substring(0, colonIndex).toLowerCase();
      const searchValueRaw = value.substring(colonIndex + 1);
      let cleanValue = searchValueRaw;
      let valueQuoted = false;
      if (cleanValue.startsWith('"') && cleanValue.endsWith('"')) {
        valueQuoted = true;
        cleanValue = cleanValue.substring(1, cleanValue.length - 1);
      }

      if (prefix.startsWith('[') && prefix.endsWith(']')) {
        const propertyKey = prefix.substring(1, prefix.length - 1);
        term = {
          type: 'property',
          value: cleanValue,
          propertyKey,
          negated,
          quoted: valueQuoted,
          wildcard: cleanValue.includes('*')
        };
        negativeValue = cleanValue || propertyKey;
      } else {
        switch (prefix) {
          case 'path':
            if (!negated) {
              diagnostics.filters.path.push(cleanValue);
            }
            term = { type: 'path', value: cleanValue, negated, quoted: valueQuoted, wildcard: cleanValue.includes('*') };
            negativeValue = cleanValue;
            break;
          case 'file':
            if (!negated) {
              diagnostics.filters.file.push(cleanValue);
            }
            term = { type: 'file', value: cleanValue, negated, quoted: valueQuoted, wildcard: cleanValue.includes('*') };
            negativeValue = cleanValue;
            break;
          case 'content':
            term = { type: 'content', value: cleanValue, negated, quoted: valueQuoted, wildcard: cleanValue.includes('*') };
            negativeValue = cleanValue;
            break;
          case 'tag':
            if (!negated) {
              diagnostics.filters.tag.push(cleanValue);
            }
            term = { type: 'tag', value: cleanValue, negated, quoted: valueQuoted, wildcard: cleanValue.includes('*') };
            negativeValue = cleanValue;
            break;
          case 'line':
            term = { type: 'line', value: cleanValue, negated, quoted: valueQuoted, wildcard: cleanValue.includes('*') };
            negativeValue = cleanValue;
            break;
          case 'block':
            term = { type: 'block', value: cleanValue, negated, quoted: valueQuoted, wildcard: cleanValue.includes('*') };
            negativeValue = cleanValue;
            break;
          case 'section':
            term = { type: 'section', value: cleanValue, negated, quoted: valueQuoted, wildcard: cleanValue.includes('*') };
            negativeValue = cleanValue;
            break;
          case 'task':
            term = { type: 'task', value: cleanValue, negated, quoted: valueQuoted, wildcard: cleanValue.includes('*') };
            negativeValue = cleanValue;
            break;
          case 'task-todo':
            term = { type: 'task-todo', value: cleanValue, negated, quoted: valueQuoted, wildcard: cleanValue.includes('*') };
            negativeValue = cleanValue;
            break;
          case 'task-done':
            term = { type: 'task-done', value: cleanValue, negated, quoted: valueQuoted, wildcard: cleanValue.includes('*') };
            negativeValue = cleanValue;
            break;
          case 'match-case':
            diagnostics.filters.caseSensitive = true;
            term = { type: 'match-case', value: cleanValue, negated, quoted: valueQuoted, wildcard: cleanValue.includes('*'), caseSensitive: true };
            negativeValue = cleanValue;
            break;
          case 'ignore-case':
            diagnostics.filters.caseSensitive = false;
            term = { type: 'ignore-case', value: cleanValue, negated, quoted: valueQuoted, wildcard: cleanValue.includes('*'), caseSensitive: false };
            negativeValue = cleanValue;
            break;
          default:
            diagnostics.warnings.push(`UnknownOperator:${prefix}`);
            // treat as text with original value including prefix
            term = { type: 'text', value, negated, quoted, wildcard };
            negativeValue = value;
            break;
        }
      }
    }
  }

  if (!term && value.startsWith('[') && value.endsWith(']')) {
    const inner = value.substring(1, value.length - 1);
    const propColonIndex = inner.indexOf(':');
    if (propColonIndex > 0) {
      const propertyKey = inner.substring(0, propColonIndex);
      let propertyValue = inner.substring(propColonIndex + 1);
      let propValueQuoted = false;
      if (propertyValue.startsWith('"') && propertyValue.endsWith('"')) {
        propValueQuoted = true;
        propertyValue = propertyValue.substring(1, propertyValue.length - 1);
      }
      term = {
        type: 'property',
        value: propertyValue,
        propertyKey,
        negated,
        quoted: propValueQuoted,
        wildcard: propertyValue.includes('*')
      };
      negativeValue = propertyValue || propertyKey;
    } else {
      term = {
        type: 'property',
        value: '',
        propertyKey: inner,
        negated,
        quoted: false,
        wildcard: false
      };
      negativeValue = inner;
    }
  }

  if (!term) {
    term = { type: 'text', value, negated, quoted, wildcard };
    negativeValue = value;
  }

  if (negated && negativeValue && term) {
    diagnostics.filters.negative.push(negativeValue);
    diagnostics.filters.negativeDetails.push({
      type: term.type,
      value: negativeValue,
      wildcard: 'wildcard' in term ? Boolean((term as any).wildcard) : undefined
    });
  }

  if (term.type === 'regex') {
    diagnostics.filters.regex = true;
  }

  return term;
}

function createEmptyDiagnosticsFilters(options?: SearchOptions): SearchFilterDiagnostics {
  return {
    tag: [],
    path: [],
    file: [],
    negative: [],
    negativeDetails: [],
    regex: options?.regexMode ?? false,
    caseSensitive: options?.caseSensitive ?? false,
    wholeWord: (options as any)?.wholeWord ?? false
  };
}

/**
 * Evaluate a search node against context
 */
function evaluateNode(node: SearchNode, context: SearchContext, options?: SearchOptions): boolean {
  if (node.type === 'group') {
    return evaluateGroup(node, context, options);
  }
  return evaluateTerm(node, context, options);
}

/**
 * Evaluate a group node
 */
function evaluateGroup(group: SearchGroup, context: SearchContext, options?: SearchOptions): boolean {
  if (group.terms.length === 0) {
    return true;
  }

  const results = group.terms.map(term => evaluateNode(term, context, options));

  if (group.operator === 'OR') {
    return results.some(r => r);
  }

  // AND operator (default)
  return results.every(r => r);
}

/**
 * Evaluate a single term
 */
function evaluateTerm(term: SearchTerm, context: SearchContext, options?: SearchOptions): boolean {
  let result = false;

  // Determine case sensitivity for this term
  const caseSensitive = term.caseSensitive !== undefined
    ? term.caseSensitive
    : (options?.caseSensitive || false);

  switch (term.type) {
    case 'path':
      result = matchString(context.filePath, term.value, term.wildcard, caseSensitive);
      break;

    case 'file':
      result = matchString(context.fileName, term.value, term.wildcard, caseSensitive) ||
               matchString(context.fileNameWithExt, term.value, term.wildcard, caseSensitive);
      break;

    case 'content':
      result = matchString(context.content, term.value, term.wildcard, caseSensitive);
      break;

    case 'tag':
      const searchTag = term.value.startsWith('#') ? term.value.substring(1) : term.value;
      result = context.tags.some(tag => {
        const cleanTag = tag.startsWith('#') ? tag.substring(1) : tag;
        return matchString(cleanTag, searchTag, term.wildcard, caseSensitive);
      });
      break;

    case 'line':
      result = context.lines.some(line => matchString(line, term.value, term.wildcard, caseSensitive));
      break;

    case 'block':
      result = context.blocks.some(block => matchString(block, term.value, term.wildcard, caseSensitive));
      break;

    case 'section':
      result = context.sections.some(section =>
        matchString(section.content, term.value, term.wildcard, caseSensitive) ||
        matchString(section.heading, term.value, term.wildcard, caseSensitive)
      );
      break;

    case 'task':
      result = context.tasks.some(task => matchString(task.text, term.value, term.wildcard, caseSensitive));
      break;

    case 'task-todo':
      result = context.tasks.some(task =>
        !task.completed && matchString(task.text, term.value, term.wildcard, caseSensitive)
      );
      break;

    case 'task-done':
      result = context.tasks.some(task =>
        task.completed && matchString(task.text, term.value, term.wildcard, caseSensitive)
      );
      break;

    case 'match-case':
      // This is a text search with forced case sensitivity
      result = matchString(context.content, term.value, term.wildcard, true);
      break;

    case 'ignore-case':
      // This is a text search with forced case insensitivity
      result = matchString(context.content, term.value, term.wildcard, false);
      break;

    case 'regex':
      try {
        const flags = caseSensitive ? '' : 'i';
        const regex = new RegExp(term.value, flags);
        result = regex.test(context.content);
      } catch (e) {
        // Invalid regex, no match
        result = false;
      }
      break;

    case 'property':
      if (term.propertyKey) {
        const propValue = context.properties[term.propertyKey];
        if (term.value === '') {
          // Property existence check
          result = propValue !== undefined;
        } else {
          // Property value check
          if (Array.isArray(propValue)) {
            result = propValue.some(v => matchString(String(v), term.value, term.wildcard, caseSensitive));
          } else {
            result = matchString(String(propValue || ''), term.value, term.wildcard, caseSensitive);
          }
        }
      }
      break;

    case 'text':
    default:
      // Search in content
      result = matchString(context.content, term.value, term.wildcard, caseSensitive);
      break;
  }

  // Apply negation
  return term.negated ? !result : result;
}

/**
 * Match a string with optional wildcard support and case sensitivity
 */
function matchString(text: string, pattern: string, wildcard: boolean = false, caseSensitive: boolean = false): boolean {
  if (!caseSensitive) {
    const textLower = text.toLowerCase();
    const patternLower = pattern.toLowerCase();

    if (!wildcard) {
      return textLower.includes(patternLower);
    }

    // Convert wildcard pattern to regex
    const regexPattern = patternLower
      .replace(/[.+^${}()|[\]\\]/g, '\\$&') // Escape special regex chars
      .replace(/\*/g, '.*'); // Convert * to .*

    const regex = new RegExp(regexPattern, 'i');
    return regex.test(text);
  } else {
    // Case sensitive matching
    if (!wildcard) {
      return text.includes(pattern);
    }

    // Convert wildcard pattern to regex (case sensitive)
    const regexPattern = pattern
      .replace(/[.+^${}()|[\]\\]/g, '\\$&') // Escape special regex chars
      .replace(/\*/g, '.*'); // Convert * to .*

    const regex = new RegExp(regexPattern);
    return regex.test(text);
  }
}

/**
 * Extract search operators/prefixes from a partial query
 * Used for autocomplete suggestions
 */
export function detectQueryType(query: string): {
  type: 'path' | 'file' | 'content' | 'tag' | 'line' | 'block' | 'section' | 'task' | 'task-todo' | 'task-done' | 'match-case' | 'ignore-case' | 'property' | 'general' | null;
  prefix: string;
  value: string;
} {
  const trimmed = query.trim();

  // Check for property
  if (trimmed.startsWith('[')) {
    const closeBracket = trimmed.indexOf(']');
    if (closeBracket === -1) {
      // Still typing property
      const inner = trimmed.substring(1);
      const colonIndex = inner.indexOf(':');
      if (colonIndex > 0) {
        return { type: 'property', prefix: inner.substring(0, colonIndex), value: inner.substring(colonIndex + 1) };
      }
      return { type: 'property', prefix: '', value: inner };
    }
  }

  // Check for standard prefixes
  const colonIndex = trimmed.indexOf(':');
  if (colonIndex > 0) {
    const prefix = trimmed.substring(0, colonIndex).toLowerCase();
    const value = trimmed.substring(colonIndex + 1);

    const validPrefixes = [
      'path', 'file', 'content', 'tag', 'line', 'block', 'section',
      'task', 'task-todo', 'task-done', 'match-case', 'ignore-case'
    ];

    if (validPrefixes.includes(prefix)) {
      return { type: prefix as any, prefix, value };
    }
  }

  return { type: 'general', prefix: '', value: trimmed };
}