622 lines
18 KiB
TypeScript
622 lines
18 KiB
TypeScript
/**
|
|
* Obsidian-compatible search query parser
|
|
* Full operator support: path:, file:, content:, tag:, line:, block:, section:,
|
|
* task:, task-todo:, task-done:, match-case:, ignore-case:, [property], OR, AND, -, *, "", /regex/
|
|
*/
|
|
|
|
import {
|
|
SearchTerm,
|
|
SearchGroup,
|
|
SearchNode,
|
|
ParsedQuery,
|
|
SearchPredicate,
|
|
SearchContext,
|
|
SearchOptions,
|
|
SectionContent,
|
|
TaskInfo,
|
|
ParseDiagnostics,
|
|
SearchFilterDiagnostics
|
|
} from './search-parser.types';
|
|
|
|
// Re-export types for convenience
|
|
export type { SearchContext, SearchOptions, SectionContent, TaskInfo };
|
|
|
|
/**
|
|
* Parse an Obsidian search query into an AST
|
|
*/
|
|
export function parseSearchQuery(query: string, options?: SearchOptions): ParsedQuery {
|
|
if (!query || !query.trim()) {
|
|
return {
|
|
ast: { type: 'group', operator: 'AND', terms: [] },
|
|
isEmpty: true,
|
|
diagnostics: {
|
|
tokens: [],
|
|
filters: createEmptyDiagnosticsFilters(options),
|
|
warnings: []
|
|
}
|
|
};
|
|
}
|
|
|
|
const tokens = tokenize(query);
|
|
const diagnostics: ParseDiagnostics = {
|
|
tokens: [...tokens],
|
|
filters: createEmptyDiagnosticsFilters(options),
|
|
warnings: []
|
|
};
|
|
const ast = parseTokens(tokens, options, diagnostics);
|
|
|
|
return {
|
|
ast,
|
|
isEmpty: false,
|
|
diagnostics
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Convert parsed query into a predicate function
|
|
*/
|
|
export function queryToPredicate(parsed: ParsedQuery, options?: SearchOptions): SearchPredicate {
|
|
if (parsed.isEmpty) {
|
|
return () => true;
|
|
}
|
|
|
|
return (context: SearchContext) => evaluateNode(parsed.ast, context, options);
|
|
}
|
|
|
|
/**
|
|
* Tokenize the query string
|
|
*/
|
|
function tokenize(query: string): string[] {
|
|
const tokens: string[] = [];
|
|
// This regex handles:
|
|
// - quoted strings (double and single)
|
|
// - regex patterns /.../
|
|
// - parentheses
|
|
// - property searches like [prop]:"value"
|
|
// - operators and words
|
|
const regex = /\s*("([^"]*)"|'([^']*)'|\/([^\/]*)\/|\(|\)|-?\[[^\]]*\]:?"[^"]*"|-?\[[^\]]*\]|-?[^\s\(\)]+)/g;
|
|
let match;
|
|
while ((match = regex.exec(query)) !== null) {
|
|
if (match[1]) {
|
|
tokens.push(match[1]);
|
|
}
|
|
}
|
|
return tokens;
|
|
}
|
|
|
|
/**
|
|
* Parse tokens into AST
|
|
*/
|
|
function parseTokens(tokens: string[], options: SearchOptions | undefined, diagnostics: ParseDiagnostics): SearchNode {
|
|
const terms: SearchNode[] = [];
|
|
let i = 0;
|
|
|
|
while (i < tokens.length) {
|
|
const token = tokens[i];
|
|
|
|
// Handle OR operator
|
|
if (token.toUpperCase() === 'OR') {
|
|
i++;
|
|
continue;
|
|
}
|
|
|
|
// Handle AND operator (implicit)
|
|
if (token.toUpperCase() === 'AND') {
|
|
i++;
|
|
continue;
|
|
}
|
|
|
|
// Handle parentheses
|
|
if (token === '(') {
|
|
const { node, endIndex } = parseGroup(tokens, i + 1, options, diagnostics);
|
|
terms.push(node);
|
|
i = endIndex + 1;
|
|
continue;
|
|
}
|
|
|
|
// Parse term
|
|
const term = parseTerm(token, options, diagnostics);
|
|
if (term) {
|
|
terms.push(term);
|
|
}
|
|
i++;
|
|
}
|
|
|
|
// Determine operator based on OR presence
|
|
const hasOr = tokens.some(t => t.toUpperCase() === 'OR');
|
|
const operator = hasOr ? 'OR' : 'AND';
|
|
|
|
return {
|
|
type: 'group',
|
|
operator,
|
|
terms
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Parse a group enclosed in parentheses
|
|
*/
|
|
function parseGroup(
|
|
tokens: string[],
|
|
startIndex: number,
|
|
options: SearchOptions | undefined,
|
|
diagnostics: ParseDiagnostics
|
|
): { node: SearchNode; endIndex: number } {
|
|
const terms: SearchNode[] = [];
|
|
let i = startIndex;
|
|
let depth = 1;
|
|
|
|
while (i < tokens.length && depth > 0) {
|
|
const token = tokens[i];
|
|
|
|
if (token === '(') {
|
|
depth++;
|
|
} else if (token === ')') {
|
|
depth--;
|
|
if (depth === 0) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (token.toUpperCase() !== 'OR' && token.toUpperCase() !== 'AND' && token !== '(' && token !== ')') {
|
|
const term = parseTerm(token, options, diagnostics);
|
|
if (term) {
|
|
terms.push(term);
|
|
}
|
|
}
|
|
|
|
i++;
|
|
}
|
|
|
|
const hasOr = tokens.slice(startIndex, i).some(t => t.toUpperCase() === 'OR');
|
|
const operator = hasOr ? 'OR' : 'AND';
|
|
|
|
return {
|
|
node: {
|
|
type: 'group',
|
|
operator,
|
|
terms
|
|
},
|
|
endIndex: i
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Parse a single search term
|
|
*/
|
|
function parseTerm(token: string, options: SearchOptions | undefined, diagnostics: ParseDiagnostics): SearchTerm | null {
|
|
if (!token) {
|
|
return null;
|
|
}
|
|
|
|
let negated = false;
|
|
let value = token;
|
|
|
|
if (value.startsWith('-')) {
|
|
negated = true;
|
|
value = value.substring(1);
|
|
}
|
|
|
|
let term: SearchTerm | null = null;
|
|
let negativeValue: string | undefined;
|
|
|
|
// Support property form: [key]:value (Obsidian compatibility)
|
|
if (value.startsWith('[') && value.includes(']:')) {
|
|
const closeBracket = value.indexOf(']');
|
|
if (closeBracket > 0) {
|
|
const propertyKey = value.substring(1, closeBracket);
|
|
let propertyValue = value.substring(closeBracket + 2);
|
|
let propValueQuoted = false;
|
|
if (propertyValue.startsWith('"') && propertyValue.endsWith('"')) {
|
|
propValueQuoted = true;
|
|
propertyValue = propertyValue.substring(1, propertyValue.length - 1);
|
|
}
|
|
term = {
|
|
type: 'property',
|
|
value: propertyValue,
|
|
propertyKey,
|
|
negated,
|
|
quoted: propValueQuoted,
|
|
wildcard: propertyValue.includes('*')
|
|
};
|
|
negativeValue = propertyValue || propertyKey;
|
|
}
|
|
}
|
|
|
|
if (!term && value.startsWith('/') && value.endsWith('/') && value.length > 2) {
|
|
const regexPattern = value.substring(1, value.length - 1);
|
|
term = { type: 'regex', value: regexPattern, negated, quoted: false, wildcard: false };
|
|
diagnostics.filters.regex = true;
|
|
negativeValue = regexPattern;
|
|
}
|
|
|
|
let quoted = false;
|
|
if (!term && value.startsWith('"') && value.endsWith('"')) {
|
|
quoted = true;
|
|
value = value.substring(1, value.length - 1);
|
|
}
|
|
|
|
const wildcard = value.includes('*');
|
|
|
|
if (!term) {
|
|
const colonIndex = value.indexOf(':');
|
|
if (colonIndex > 0) {
|
|
const prefix = value.substring(0, colonIndex).toLowerCase();
|
|
const searchValueRaw = value.substring(colonIndex + 1);
|
|
let cleanValue = searchValueRaw;
|
|
let valueQuoted = false;
|
|
if (cleanValue.startsWith('"') && cleanValue.endsWith('"')) {
|
|
valueQuoted = true;
|
|
cleanValue = cleanValue.substring(1, cleanValue.length - 1);
|
|
}
|
|
|
|
if (prefix.startsWith('[') && prefix.endsWith(']')) {
|
|
const propertyKey = prefix.substring(1, prefix.length - 1);
|
|
term = {
|
|
type: 'property',
|
|
value: cleanValue,
|
|
propertyKey,
|
|
negated,
|
|
quoted: valueQuoted,
|
|
wildcard: cleanValue.includes('*')
|
|
};
|
|
negativeValue = cleanValue || propertyKey;
|
|
} else {
|
|
switch (prefix) {
|
|
case 'path':
|
|
if (!negated) {
|
|
diagnostics.filters.path.push(cleanValue);
|
|
}
|
|
term = { type: 'path', value: cleanValue, negated, quoted: valueQuoted, wildcard: cleanValue.includes('*') };
|
|
negativeValue = cleanValue;
|
|
break;
|
|
case 'file':
|
|
if (!negated) {
|
|
diagnostics.filters.file.push(cleanValue);
|
|
}
|
|
term = { type: 'file', value: cleanValue, negated, quoted: valueQuoted, wildcard: cleanValue.includes('*') };
|
|
negativeValue = cleanValue;
|
|
break;
|
|
case 'content':
|
|
term = { type: 'content', value: cleanValue, negated, quoted: valueQuoted, wildcard: cleanValue.includes('*') };
|
|
negativeValue = cleanValue;
|
|
break;
|
|
case 'tag':
|
|
if (!negated) {
|
|
diagnostics.filters.tag.push(cleanValue);
|
|
}
|
|
term = { type: 'tag', value: cleanValue, negated, quoted: valueQuoted, wildcard: cleanValue.includes('*') };
|
|
negativeValue = cleanValue;
|
|
break;
|
|
case 'line':
|
|
term = { type: 'line', value: cleanValue, negated, quoted: valueQuoted, wildcard: cleanValue.includes('*') };
|
|
negativeValue = cleanValue;
|
|
break;
|
|
case 'block':
|
|
term = { type: 'block', value: cleanValue, negated, quoted: valueQuoted, wildcard: cleanValue.includes('*') };
|
|
negativeValue = cleanValue;
|
|
break;
|
|
case 'section':
|
|
term = { type: 'section', value: cleanValue, negated, quoted: valueQuoted, wildcard: cleanValue.includes('*') };
|
|
negativeValue = cleanValue;
|
|
break;
|
|
case 'task':
|
|
term = { type: 'task', value: cleanValue, negated, quoted: valueQuoted, wildcard: cleanValue.includes('*') };
|
|
negativeValue = cleanValue;
|
|
break;
|
|
case 'task-todo':
|
|
term = { type: 'task-todo', value: cleanValue, negated, quoted: valueQuoted, wildcard: cleanValue.includes('*') };
|
|
negativeValue = cleanValue;
|
|
break;
|
|
case 'task-done':
|
|
term = { type: 'task-done', value: cleanValue, negated, quoted: valueQuoted, wildcard: cleanValue.includes('*') };
|
|
negativeValue = cleanValue;
|
|
break;
|
|
case 'match-case':
|
|
diagnostics.filters.caseSensitive = true;
|
|
term = { type: 'match-case', value: cleanValue, negated, quoted: valueQuoted, wildcard: cleanValue.includes('*'), caseSensitive: true };
|
|
negativeValue = cleanValue;
|
|
break;
|
|
case 'ignore-case':
|
|
diagnostics.filters.caseSensitive = false;
|
|
term = { type: 'ignore-case', value: cleanValue, negated, quoted: valueQuoted, wildcard: cleanValue.includes('*'), caseSensitive: false };
|
|
negativeValue = cleanValue;
|
|
break;
|
|
default:
|
|
diagnostics.warnings.push(`UnknownOperator:${prefix}`);
|
|
// treat as text with original value including prefix
|
|
term = { type: 'text', value, negated, quoted, wildcard };
|
|
negativeValue = value;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!term && value.startsWith('[') && value.endsWith(']')) {
|
|
const inner = value.substring(1, value.length - 1);
|
|
const propColonIndex = inner.indexOf(':');
|
|
if (propColonIndex > 0) {
|
|
const propertyKey = inner.substring(0, propColonIndex);
|
|
let propertyValue = inner.substring(propColonIndex + 1);
|
|
let propValueQuoted = false;
|
|
if (propertyValue.startsWith('"') && propertyValue.endsWith('"')) {
|
|
propValueQuoted = true;
|
|
propertyValue = propertyValue.substring(1, propertyValue.length - 1);
|
|
}
|
|
term = {
|
|
type: 'property',
|
|
value: propertyValue,
|
|
propertyKey,
|
|
negated,
|
|
quoted: propValueQuoted,
|
|
wildcard: propertyValue.includes('*')
|
|
};
|
|
negativeValue = propertyValue || propertyKey;
|
|
} else {
|
|
term = {
|
|
type: 'property',
|
|
value: '',
|
|
propertyKey: inner,
|
|
negated,
|
|
quoted: false,
|
|
wildcard: false
|
|
};
|
|
negativeValue = inner;
|
|
}
|
|
}
|
|
|
|
if (!term) {
|
|
term = { type: 'text', value, negated, quoted, wildcard };
|
|
negativeValue = value;
|
|
}
|
|
|
|
if (negated && negativeValue && term) {
|
|
diagnostics.filters.negative.push(negativeValue);
|
|
diagnostics.filters.negativeDetails.push({
|
|
type: term.type,
|
|
value: negativeValue,
|
|
wildcard: 'wildcard' in term ? Boolean((term as any).wildcard) : undefined
|
|
});
|
|
}
|
|
|
|
if (term.type === 'regex') {
|
|
diagnostics.filters.regex = true;
|
|
}
|
|
|
|
return term;
|
|
}
|
|
|
|
function createEmptyDiagnosticsFilters(options?: SearchOptions): SearchFilterDiagnostics {
|
|
return {
|
|
tag: [],
|
|
path: [],
|
|
file: [],
|
|
negative: [],
|
|
negativeDetails: [],
|
|
regex: options?.regexMode ?? false,
|
|
caseSensitive: options?.caseSensitive ?? false,
|
|
wholeWord: (options as any)?.wholeWord ?? false
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Evaluate a search node against context
|
|
*/
|
|
function evaluateNode(node: SearchNode, context: SearchContext, options?: SearchOptions): boolean {
|
|
if (node.type === 'group') {
|
|
return evaluateGroup(node, context, options);
|
|
}
|
|
return evaluateTerm(node, context, options);
|
|
}
|
|
|
|
/**
|
|
* Evaluate a group node
|
|
*/
|
|
function evaluateGroup(group: SearchGroup, context: SearchContext, options?: SearchOptions): boolean {
|
|
if (group.terms.length === 0) {
|
|
return true;
|
|
}
|
|
|
|
const results = group.terms.map(term => evaluateNode(term, context, options));
|
|
|
|
if (group.operator === 'OR') {
|
|
return results.some(r => r);
|
|
}
|
|
|
|
// AND operator (default)
|
|
return results.every(r => r);
|
|
}
|
|
|
|
/**
|
|
* Evaluate a single term
|
|
*/
|
|
function evaluateTerm(term: SearchTerm, context: SearchContext, options?: SearchOptions): boolean {
|
|
let result = false;
|
|
|
|
// Determine case sensitivity for this term
|
|
const caseSensitive = term.caseSensitive !== undefined
|
|
? term.caseSensitive
|
|
: (options?.caseSensitive || false);
|
|
|
|
switch (term.type) {
|
|
case 'path':
|
|
result = matchString(context.filePath, term.value, term.wildcard, caseSensitive);
|
|
break;
|
|
|
|
case 'file':
|
|
result = matchString(context.fileName, term.value, term.wildcard, caseSensitive) ||
|
|
matchString(context.fileNameWithExt, term.value, term.wildcard, caseSensitive);
|
|
break;
|
|
|
|
case 'content':
|
|
result = matchString(context.content, term.value, term.wildcard, caseSensitive);
|
|
break;
|
|
|
|
case 'tag':
|
|
const searchTag = term.value.startsWith('#') ? term.value.substring(1) : term.value;
|
|
result = context.tags.some(tag => {
|
|
const cleanTag = tag.startsWith('#') ? tag.substring(1) : tag;
|
|
return matchString(cleanTag, searchTag, term.wildcard, caseSensitive);
|
|
});
|
|
break;
|
|
|
|
case 'line':
|
|
result = context.lines.some(line => matchString(line, term.value, term.wildcard, caseSensitive));
|
|
break;
|
|
|
|
case 'block':
|
|
result = context.blocks.some(block => matchString(block, term.value, term.wildcard, caseSensitive));
|
|
break;
|
|
|
|
case 'section':
|
|
result = context.sections.some(section =>
|
|
matchString(section.content, term.value, term.wildcard, caseSensitive) ||
|
|
matchString(section.heading, term.value, term.wildcard, caseSensitive)
|
|
);
|
|
break;
|
|
|
|
case 'task':
|
|
result = context.tasks.some(task => matchString(task.text, term.value, term.wildcard, caseSensitive));
|
|
break;
|
|
|
|
case 'task-todo':
|
|
result = context.tasks.some(task =>
|
|
!task.completed && matchString(task.text, term.value, term.wildcard, caseSensitive)
|
|
);
|
|
break;
|
|
|
|
case 'task-done':
|
|
result = context.tasks.some(task =>
|
|
task.completed && matchString(task.text, term.value, term.wildcard, caseSensitive)
|
|
);
|
|
break;
|
|
|
|
case 'match-case':
|
|
// This is a text search with forced case sensitivity
|
|
result = matchString(context.content, term.value, term.wildcard, true);
|
|
break;
|
|
|
|
case 'ignore-case':
|
|
// This is a text search with forced case insensitivity
|
|
result = matchString(context.content, term.value, term.wildcard, false);
|
|
break;
|
|
|
|
case 'regex':
|
|
try {
|
|
const flags = caseSensitive ? '' : 'i';
|
|
const regex = new RegExp(term.value, flags);
|
|
result = regex.test(context.content);
|
|
} catch (e) {
|
|
// Invalid regex, no match
|
|
result = false;
|
|
}
|
|
break;
|
|
|
|
case 'property':
|
|
if (term.propertyKey) {
|
|
const propValue = context.properties[term.propertyKey];
|
|
if (term.value === '') {
|
|
// Property existence check
|
|
result = propValue !== undefined;
|
|
} else {
|
|
// Property value check
|
|
if (Array.isArray(propValue)) {
|
|
result = propValue.some(v => matchString(String(v), term.value, term.wildcard, caseSensitive));
|
|
} else {
|
|
result = matchString(String(propValue || ''), term.value, term.wildcard, caseSensitive);
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 'text':
|
|
default:
|
|
// Search in content
|
|
result = matchString(context.content, term.value, term.wildcard, caseSensitive);
|
|
break;
|
|
}
|
|
|
|
// Apply negation
|
|
return term.negated ? !result : result;
|
|
}
|
|
|
|
/**
|
|
* Match a string with optional wildcard support and case sensitivity
|
|
*/
|
|
function matchString(text: string, pattern: string, wildcard: boolean = false, caseSensitive: boolean = false): boolean {
|
|
if (!caseSensitive) {
|
|
const textLower = text.toLowerCase();
|
|
const patternLower = pattern.toLowerCase();
|
|
|
|
if (!wildcard) {
|
|
return textLower.includes(patternLower);
|
|
}
|
|
|
|
// Convert wildcard pattern to regex
|
|
const regexPattern = patternLower
|
|
.replace(/[.+^${}()|[\]\\]/g, '\\$&') // Escape special regex chars
|
|
.replace(/\*/g, '.*'); // Convert * to .*
|
|
|
|
const regex = new RegExp(regexPattern, 'i');
|
|
return regex.test(text);
|
|
} else {
|
|
// Case sensitive matching
|
|
if (!wildcard) {
|
|
return text.includes(pattern);
|
|
}
|
|
|
|
// Convert wildcard pattern to regex (case sensitive)
|
|
const regexPattern = pattern
|
|
.replace(/[.+^${}()|[\]\\]/g, '\\$&') // Escape special regex chars
|
|
.replace(/\*/g, '.*'); // Convert * to .*
|
|
|
|
const regex = new RegExp(regexPattern);
|
|
return regex.test(text);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Extract search operators/prefixes from a partial query
|
|
* Used for autocomplete suggestions
|
|
*/
|
|
export function detectQueryType(query: string): {
|
|
type: 'path' | 'file' | 'content' | 'tag' | 'line' | 'block' | 'section' | 'task' | 'task-todo' | 'task-done' | 'match-case' | 'ignore-case' | 'property' | 'general' | null;
|
|
prefix: string;
|
|
value: string;
|
|
} {
|
|
const trimmed = query.trim();
|
|
|
|
// Check for property
|
|
if (trimmed.startsWith('[')) {
|
|
const closeBracket = trimmed.indexOf(']');
|
|
if (closeBracket === -1) {
|
|
// Still typing property
|
|
const inner = trimmed.substring(1);
|
|
const colonIndex = inner.indexOf(':');
|
|
if (colonIndex > 0) {
|
|
return { type: 'property', prefix: inner.substring(0, colonIndex), value: inner.substring(colonIndex + 1) };
|
|
}
|
|
return { type: 'property', prefix: '', value: inner };
|
|
}
|
|
}
|
|
|
|
// Check for standard prefixes
|
|
const colonIndex = trimmed.indexOf(':');
|
|
if (colonIndex > 0) {
|
|
const prefix = trimmed.substring(0, colonIndex).toLowerCase();
|
|
const value = trimmed.substring(colonIndex + 1);
|
|
|
|
const validPrefixes = [
|
|
'path', 'file', 'content', 'tag', 'line', 'block', 'section',
|
|
'task', 'task-todo', 'task-done', 'match-case', 'ignore-case'
|
|
];
|
|
|
|
if (validPrefixes.includes(prefix)) {
|
|
return { type: prefix as any, prefix, value };
|
|
}
|
|
}
|
|
|
|
return { type: 'general', prefix: '', value: trimmed };
|
|
}
|