/** * Obsidian-compatible search query parser * Full operator support: path:, file:, content:, tag:, line:, block:, section:, * task:, task-todo:, task-done:, match-case:, ignore-case:, [property], OR, AND, -, *, "", /regex/ */ import { SearchTerm, SearchGroup, SearchNode, ParsedQuery, SearchPredicate, SearchContext, SearchOptions, SectionContent, TaskInfo, ParseDiagnostics, SearchFilterDiagnostics } from './search-parser.types'; // Re-export types for convenience export type { SearchContext, SearchOptions, SectionContent, TaskInfo }; /** * Parse an Obsidian search query into an AST */ export function parseSearchQuery(query: string, options?: SearchOptions): ParsedQuery { if (!query || !query.trim()) { return { ast: { type: 'group', operator: 'AND', terms: [] }, isEmpty: true, diagnostics: { tokens: [], filters: createEmptyDiagnosticsFilters(options), warnings: [] } }; } const tokens = tokenize(query); const diagnostics: ParseDiagnostics = { tokens: [...tokens], filters: createEmptyDiagnosticsFilters(options), warnings: [] }; const ast = parseTokens(tokens, options, diagnostics); return { ast, isEmpty: false, diagnostics }; } /** * Convert parsed query into a predicate function */ export function queryToPredicate(parsed: ParsedQuery, options?: SearchOptions): SearchPredicate { if (parsed.isEmpty) { return () => true; } return (context: SearchContext) => evaluateNode(parsed.ast, context, options); } /** * Tokenize the query string */ function tokenize(query: string): string[] { const tokens: string[] = []; // This regex handles: // - quoted strings (double and single) // - regex patterns /.../ // - parentheses // - property searches like [prop]:"value" // - operators and words const regex = /\s*("([^"]*)"|'([^']*)'|\/([^\/]*)\/|\(|\)|-?\[[^\]]*\]:?"[^"]*"|-?\[[^\]]*\]|-?[^\s\(\)]+)/g; let match; while ((match = regex.exec(query)) !== null) { if (match[1]) { tokens.push(match[1]); } } return tokens; } /** * Parse tokens into AST */ function parseTokens(tokens: string[], options: SearchOptions | undefined, diagnostics: ParseDiagnostics): SearchNode { const terms: SearchNode[] = []; let i = 0; while (i < tokens.length) { const token = tokens[i]; // Handle OR operator if (token.toUpperCase() === 'OR') { i++; continue; } // Handle AND operator (implicit) if (token.toUpperCase() === 'AND') { i++; continue; } // Handle parentheses if (token === '(') { const { node, endIndex } = parseGroup(tokens, i + 1, options, diagnostics); terms.push(node); i = endIndex + 1; continue; } // Parse term const term = parseTerm(token, options, diagnostics); if (term) { terms.push(term); } i++; } // Determine operator based on OR presence const hasOr = tokens.some(t => t.toUpperCase() === 'OR'); const operator = hasOr ? 'OR' : 'AND'; return { type: 'group', operator, terms }; } /** * Parse a group enclosed in parentheses */ function parseGroup( tokens: string[], startIndex: number, options: SearchOptions | undefined, diagnostics: ParseDiagnostics ): { node: SearchNode; endIndex: number } { const terms: SearchNode[] = []; let i = startIndex; let depth = 1; while (i < tokens.length && depth > 0) { const token = tokens[i]; if (token === '(') { depth++; } else if (token === ')') { depth--; if (depth === 0) { break; } } if (token.toUpperCase() !== 'OR' && token.toUpperCase() !== 'AND' && token !== '(' && token !== ')') { const term = parseTerm(token, options, diagnostics); if (term) { terms.push(term); } } i++; } const hasOr = tokens.slice(startIndex, i).some(t => t.toUpperCase() === 'OR'); const operator = hasOr ? 'OR' : 'AND'; return { node: { type: 'group', operator, terms }, endIndex: i }; } /** * Parse a single search term */ function parseTerm(token: string, options: SearchOptions | undefined, diagnostics: ParseDiagnostics): SearchTerm | null { if (!token) { return null; } let negated = false; let value = token; if (value.startsWith('-')) { negated = true; value = value.substring(1); } let term: SearchTerm | null = null; let negativeValue: string | undefined; // Support property form: [key]:value (Obsidian compatibility) if (value.startsWith('[') && value.includes(']:')) { const closeBracket = value.indexOf(']'); if (closeBracket > 0) { const propertyKey = value.substring(1, closeBracket); let propertyValue = value.substring(closeBracket + 2); let propValueQuoted = false; if (propertyValue.startsWith('"') && propertyValue.endsWith('"')) { propValueQuoted = true; propertyValue = propertyValue.substring(1, propertyValue.length - 1); } term = { type: 'property', value: propertyValue, propertyKey, negated, quoted: propValueQuoted, wildcard: propertyValue.includes('*') }; negativeValue = propertyValue || propertyKey; } } if (!term && value.startsWith('/') && value.endsWith('/') && value.length > 2) { const regexPattern = value.substring(1, value.length - 1); term = { type: 'regex', value: regexPattern, negated, quoted: false, wildcard: false }; diagnostics.filters.regex = true; negativeValue = regexPattern; } let quoted = false; if (!term && value.startsWith('"') && value.endsWith('"')) { quoted = true; value = value.substring(1, value.length - 1); } const wildcard = value.includes('*'); if (!term) { const colonIndex = value.indexOf(':'); if (colonIndex > 0) { const prefix = value.substring(0, colonIndex).toLowerCase(); const searchValueRaw = value.substring(colonIndex + 1); let cleanValue = searchValueRaw; let valueQuoted = false; if (cleanValue.startsWith('"') && cleanValue.endsWith('"')) { valueQuoted = true; cleanValue = cleanValue.substring(1, cleanValue.length - 1); } if (prefix.startsWith('[') && prefix.endsWith(']')) { const propertyKey = prefix.substring(1, prefix.length - 1); term = { type: 'property', value: cleanValue, propertyKey, negated, quoted: valueQuoted, wildcard: cleanValue.includes('*') }; negativeValue = cleanValue || propertyKey; } else { switch (prefix) { case 'path': if (!negated) { diagnostics.filters.path.push(cleanValue); } term = { type: 'path', value: cleanValue, negated, quoted: valueQuoted, wildcard: cleanValue.includes('*') }; negativeValue = cleanValue; break; case 'file': if (!negated) { diagnostics.filters.file.push(cleanValue); } term = { type: 'file', value: cleanValue, negated, quoted: valueQuoted, wildcard: cleanValue.includes('*') }; negativeValue = cleanValue; break; case 'content': term = { type: 'content', value: cleanValue, negated, quoted: valueQuoted, wildcard: cleanValue.includes('*') }; negativeValue = cleanValue; break; case 'tag': if (!negated) { diagnostics.filters.tag.push(cleanValue); } term = { type: 'tag', value: cleanValue, negated, quoted: valueQuoted, wildcard: cleanValue.includes('*') }; negativeValue = cleanValue; break; case 'line': term = { type: 'line', value: cleanValue, negated, quoted: valueQuoted, wildcard: cleanValue.includes('*') }; negativeValue = cleanValue; break; case 'block': term = { type: 'block', value: cleanValue, negated, quoted: valueQuoted, wildcard: cleanValue.includes('*') }; negativeValue = cleanValue; break; case 'section': term = { type: 'section', value: cleanValue, negated, quoted: valueQuoted, wildcard: cleanValue.includes('*') }; negativeValue = cleanValue; break; case 'task': term = { type: 'task', value: cleanValue, negated, quoted: valueQuoted, wildcard: cleanValue.includes('*') }; negativeValue = cleanValue; break; case 'task-todo': term = { type: 'task-todo', value: cleanValue, negated, quoted: valueQuoted, wildcard: cleanValue.includes('*') }; negativeValue = cleanValue; break; case 'task-done': term = { type: 'task-done', value: cleanValue, negated, quoted: valueQuoted, wildcard: cleanValue.includes('*') }; negativeValue = cleanValue; break; case 'match-case': diagnostics.filters.caseSensitive = true; term = { type: 'match-case', value: cleanValue, negated, quoted: valueQuoted, wildcard: cleanValue.includes('*'), caseSensitive: true }; negativeValue = cleanValue; break; case 'ignore-case': diagnostics.filters.caseSensitive = false; term = { type: 'ignore-case', value: cleanValue, negated, quoted: valueQuoted, wildcard: cleanValue.includes('*'), caseSensitive: false }; negativeValue = cleanValue; break; default: diagnostics.warnings.push(`UnknownOperator:${prefix}`); // treat as text with original value including prefix term = { type: 'text', value, negated, quoted, wildcard }; negativeValue = value; break; } } } } if (!term && value.startsWith('[') && value.endsWith(']')) { const inner = value.substring(1, value.length - 1); const propColonIndex = inner.indexOf(':'); if (propColonIndex > 0) { const propertyKey = inner.substring(0, propColonIndex); let propertyValue = inner.substring(propColonIndex + 1); let propValueQuoted = false; if (propertyValue.startsWith('"') && propertyValue.endsWith('"')) { propValueQuoted = true; propertyValue = propertyValue.substring(1, propertyValue.length - 1); } term = { type: 'property', value: propertyValue, propertyKey, negated, quoted: propValueQuoted, wildcard: propertyValue.includes('*') }; negativeValue = propertyValue || propertyKey; } else { term = { type: 'property', value: '', propertyKey: inner, negated, quoted: false, wildcard: false }; negativeValue = inner; } } if (!term) { term = { type: 'text', value, negated, quoted, wildcard }; negativeValue = value; } if (negated && negativeValue && term) { diagnostics.filters.negative.push(negativeValue); diagnostics.filters.negativeDetails.push({ type: term.type, value: negativeValue, wildcard: 'wildcard' in term ? Boolean((term as any).wildcard) : undefined }); } if (term.type === 'regex') { diagnostics.filters.regex = true; } return term; } function createEmptyDiagnosticsFilters(options?: SearchOptions): SearchFilterDiagnostics { return { tag: [], path: [], file: [], negative: [], negativeDetails: [], regex: options?.regexMode ?? false, caseSensitive: options?.caseSensitive ?? false, wholeWord: (options as any)?.wholeWord ?? false }; } /** * Evaluate a search node against context */ function evaluateNode(node: SearchNode, context: SearchContext, options?: SearchOptions): boolean { if (node.type === 'group') { return evaluateGroup(node, context, options); } return evaluateTerm(node, context, options); } /** * Evaluate a group node */ function evaluateGroup(group: SearchGroup, context: SearchContext, options?: SearchOptions): boolean { if (group.terms.length === 0) { return true; } const results = group.terms.map(term => evaluateNode(term, context, options)); if (group.operator === 'OR') { return results.some(r => r); } // AND operator (default) return results.every(r => r); } /** * Evaluate a single term */ function evaluateTerm(term: SearchTerm, context: SearchContext, options?: SearchOptions): boolean { let result = false; // Determine case sensitivity for this term const caseSensitive = term.caseSensitive !== undefined ? term.caseSensitive : (options?.caseSensitive || false); switch (term.type) { case 'path': result = matchString(context.filePath, term.value, term.wildcard, caseSensitive); break; case 'file': result = matchString(context.fileName, term.value, term.wildcard, caseSensitive) || matchString(context.fileNameWithExt, term.value, term.wildcard, caseSensitive); break; case 'content': result = matchString(context.content, term.value, term.wildcard, caseSensitive); break; case 'tag': const searchTag = term.value.startsWith('#') ? term.value.substring(1) : term.value; result = context.tags.some(tag => { const cleanTag = tag.startsWith('#') ? tag.substring(1) : tag; return matchString(cleanTag, searchTag, term.wildcard, caseSensitive); }); break; case 'line': result = context.lines.some(line => matchString(line, term.value, term.wildcard, caseSensitive)); break; case 'block': result = context.blocks.some(block => matchString(block, term.value, term.wildcard, caseSensitive)); break; case 'section': result = context.sections.some(section => matchString(section.content, term.value, term.wildcard, caseSensitive) || matchString(section.heading, term.value, term.wildcard, caseSensitive) ); break; case 'task': result = context.tasks.some(task => matchString(task.text, term.value, term.wildcard, caseSensitive)); break; case 'task-todo': result = context.tasks.some(task => !task.completed && matchString(task.text, term.value, term.wildcard, caseSensitive) ); break; case 'task-done': result = context.tasks.some(task => task.completed && matchString(task.text, term.value, term.wildcard, caseSensitive) ); break; case 'match-case': // This is a text search with forced case sensitivity result = matchString(context.content, term.value, term.wildcard, true); break; case 'ignore-case': // This is a text search with forced case insensitivity result = matchString(context.content, term.value, term.wildcard, false); break; case 'regex': try { const flags = caseSensitive ? '' : 'i'; const regex = new RegExp(term.value, flags); result = regex.test(context.content); } catch (e) { // Invalid regex, no match result = false; } break; case 'property': if (term.propertyKey) { const propValue = context.properties[term.propertyKey]; if (term.value === '') { // Property existence check result = propValue !== undefined; } else { // Property value check if (Array.isArray(propValue)) { result = propValue.some(v => matchString(String(v), term.value, term.wildcard, caseSensitive)); } else { result = matchString(String(propValue || ''), term.value, term.wildcard, caseSensitive); } } } break; case 'text': default: // Search in content result = matchString(context.content, term.value, term.wildcard, caseSensitive); break; } // Apply negation return term.negated ? !result : result; } /** * Match a string with optional wildcard support and case sensitivity */ function matchString(text: string, pattern: string, wildcard: boolean = false, caseSensitive: boolean = false): boolean { if (!caseSensitive) { const textLower = text.toLowerCase(); const patternLower = pattern.toLowerCase(); if (!wildcard) { return textLower.includes(patternLower); } // Convert wildcard pattern to regex const regexPattern = patternLower .replace(/[.+^${}()|[\]\\]/g, '\\$&') // Escape special regex chars .replace(/\*/g, '.*'); // Convert * to .* const regex = new RegExp(regexPattern, 'i'); return regex.test(text); } else { // Case sensitive matching if (!wildcard) { return text.includes(pattern); } // Convert wildcard pattern to regex (case sensitive) const regexPattern = pattern .replace(/[.+^${}()|[\]\\]/g, '\\$&') // Escape special regex chars .replace(/\*/g, '.*'); // Convert * to .* const regex = new RegExp(regexPattern); return regex.test(text); } } /** * Extract search operators/prefixes from a partial query * Used for autocomplete suggestions */ export function detectQueryType(query: string): { type: 'path' | 'file' | 'content' | 'tag' | 'line' | 'block' | 'section' | 'task' | 'task-todo' | 'task-done' | 'match-case' | 'ignore-case' | 'property' | 'general' | null; prefix: string; value: string; } { const trimmed = query.trim(); // Check for property if (trimmed.startsWith('[')) { const closeBracket = trimmed.indexOf(']'); if (closeBracket === -1) { // Still typing property const inner = trimmed.substring(1); const colonIndex = inner.indexOf(':'); if (colonIndex > 0) { return { type: 'property', prefix: inner.substring(0, colonIndex), value: inner.substring(colonIndex + 1) }; } return { type: 'property', prefix: '', value: inner }; } } // Check for standard prefixes const colonIndex = trimmed.indexOf(':'); if (colonIndex > 0) { const prefix = trimmed.substring(0, colonIndex).toLowerCase(); const value = trimmed.substring(colonIndex + 1); const validPrefixes = [ 'path', 'file', 'content', 'tag', 'line', 'block', 'section', 'task', 'task-todo', 'task-done', 'match-case', 'ignore-case' ]; if (validPrefixes.includes(prefix)) { return { type: prefix as any, prefix, value }; } } return { type: 'general', prefix: '', value: trimmed }; }