222 lines
6.2 KiB
JavaScript
222 lines
6.2 KiB
JavaScript
#!/usr/bin/env node
|
|
|
|
import fs from 'node:fs/promises';
|
|
import fssync from 'node:fs';
|
|
import path from 'node:path';
|
|
import { fileURLToPath } from 'node:url';
|
|
import fg from 'fast-glob';
|
|
import matter from 'gray-matter';
|
|
import removeMd from 'remove-markdown';
|
|
import { meiliClient, vaultIndexName, ensureIndexSettings } from './meilisearch.client.mjs';
|
|
import { VAULT_PATH as CFG_VAULT_PATH, MEILI_HOST as CFG_MEILI_HOST, MEILI_API_KEY as CFG_MEILI_KEY } from './config.mjs';
|
|
|
|
const __filename = fileURLToPath(import.meta.url);
|
|
const __dirname = path.dirname(__filename);
|
|
|
|
const VAULT_PATH = path.isAbsolute(CFG_VAULT_PATH)
|
|
? CFG_VAULT_PATH
|
|
: path.resolve(__dirname, '..', CFG_VAULT_PATH);
|
|
|
|
console.log('[Meili Indexer] Environment check:', {
|
|
MEILI_MASTER_KEY: CFG_MEILI_KEY ? `${String(CFG_MEILI_KEY).substring(0, 8)}... (${String(CFG_MEILI_KEY).length} chars)` : 'NOT SET',
|
|
MEILI_API_KEY: CFG_MEILI_KEY ? `${String(CFG_MEILI_KEY).substring(0, 8)}...` : 'NOT SET',
|
|
MEILI_HOST: CFG_MEILI_HOST,
|
|
VAULT_PATH: VAULT_PATH
|
|
});
|
|
|
|
/**
|
|
* Convert timestamp to year/month for faceting
|
|
*/
|
|
function toYMD(timestampMs) {
|
|
const dt = new Date(timestampMs);
|
|
return {
|
|
year: dt.getFullYear(),
|
|
month: dt.getMonth() + 1
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Extract all parent directory paths for a file
|
|
* Example: "Projects/Angular/App.md" -> ["Projects", "Projects/Angular"]
|
|
*/
|
|
function parentDirs(relativePath) {
|
|
const parts = relativePath.split(/[\\/]/);
|
|
const acc = [];
|
|
for (let i = 0; i < parts.length - 1; i++) {
|
|
acc.push(parts.slice(0, i + 1).join('/'));
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
/**
|
|
* Extract headings from markdown content
|
|
*/
|
|
function extractHeadings(content) {
|
|
const headingRegex = /^#+\s+(.+)$/gm;
|
|
const headings = [];
|
|
let match;
|
|
while ((match = headingRegex.exec(content)) !== null) {
|
|
headings.push(match[1].trim());
|
|
}
|
|
return headings.slice(0, 200); // Limit to 200 headings
|
|
}
|
|
|
|
/**
|
|
* Build a searchable document from a markdown file
|
|
*/
|
|
export async function buildDocumentFromFile(absPath) {
|
|
const rel = path.relative(VAULT_PATH, absPath).replaceAll('\\', '/');
|
|
const file = path.basename(rel);
|
|
const raw = await fs.readFile(absPath, 'utf8');
|
|
|
|
// Parse frontmatter with gray-matter
|
|
const { data: fm, content } = matter(raw);
|
|
|
|
// Remove markdown formatting and limit content size
|
|
const text = removeMd(content).slice(0, 200_000); // 200KB safety limit
|
|
|
|
// Extract metadata
|
|
const title = fm.title ?? path.parse(file).name;
|
|
const tags = Array.isArray(fm.tags)
|
|
? fm.tags.map(String)
|
|
: (fm.tags ? [String(fm.tags)] : []);
|
|
|
|
const headings = extractHeadings(content);
|
|
|
|
// Get file stats
|
|
const stat = await fs.stat(absPath);
|
|
const { year, month } = toYMD(stat.mtimeMs);
|
|
|
|
// Meilisearch requires alphanumeric IDs (a-z A-Z 0-9 - _)
|
|
// Replace dots, slashes, and other chars with underscores
|
|
const safeId = rel.replace(/[^a-zA-Z0-9_-]/g, '_');
|
|
|
|
return {
|
|
id: safeId,
|
|
path: rel,
|
|
file,
|
|
title,
|
|
tags,
|
|
properties: fm ?? {},
|
|
content: text,
|
|
headings,
|
|
createdAt: stat.birthtimeMs || stat.ctimeMs,
|
|
updatedAt: stat.mtimeMs,
|
|
year,
|
|
month,
|
|
parentDirs: parentDirs(rel),
|
|
excerpt: text.slice(0, 500),
|
|
// Extract boolean flags for quick filtering
|
|
favoris: fm.favoris === true,
|
|
template: fm.template === true,
|
|
task: fm.task === true
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Perform a full reindex of all markdown files in the vault
|
|
*/
|
|
export async function fullReindex() {
|
|
console.log('[Meili] Starting full reindex...');
|
|
const startTime = Date.now();
|
|
|
|
const client = meiliClient();
|
|
const indexUid = vaultIndexName(VAULT_PATH);
|
|
const index = await ensureIndexSettings(client, indexUid);
|
|
|
|
// Find all markdown files
|
|
const entries = await fg(['**/*.md'], {
|
|
cwd: VAULT_PATH,
|
|
dot: false,
|
|
onlyFiles: true,
|
|
absolute: true
|
|
});
|
|
|
|
console.log(`[Meili] Found ${entries.length} markdown files`);
|
|
|
|
// Process in batches to avoid memory issues
|
|
const batchSize = 750;
|
|
let totalIndexed = 0;
|
|
|
|
for (let i = 0; i < entries.length; i += batchSize) {
|
|
const chunk = entries.slice(i, i + batchSize);
|
|
const docs = await Promise.all(
|
|
chunk.map(async (file) => {
|
|
try {
|
|
return await buildDocumentFromFile(file);
|
|
} catch (err) {
|
|
console.error(`[Meili] Failed to process ${file}:`, err.message);
|
|
return null;
|
|
}
|
|
})
|
|
);
|
|
|
|
const validDocs = docs.filter(Boolean);
|
|
if (validDocs.length > 0) {
|
|
const task = await index.addDocuments(validDocs);
|
|
console.log(`[Meili] Batch ${Math.floor(i / batchSize) + 1}: Queued ${validDocs.length} documents (task ${task.taskUid})`);
|
|
totalIndexed += validDocs.length;
|
|
}
|
|
}
|
|
|
|
const elapsed = Date.now() - startTime;
|
|
console.log(`[Meili] Reindex complete: ${totalIndexed} documents indexed in ${elapsed}ms`);
|
|
|
|
return {
|
|
indexed: true,
|
|
count: totalIndexed,
|
|
elapsedMs: elapsed
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Upsert a single file (add or update)
|
|
*/
|
|
export async function upsertFile(relOrAbs) {
|
|
const abs = path.isAbsolute(relOrAbs) ? relOrAbs : path.join(VAULT_PATH, relOrAbs);
|
|
|
|
if (!fssync.existsSync(abs)) {
|
|
console.warn(`[Meili] File not found for upsert: ${abs}`);
|
|
return;
|
|
}
|
|
|
|
try {
|
|
const client = meiliClient();
|
|
const indexUid = vaultIndexName(VAULT_PATH);
|
|
const index = await ensureIndexSettings(client, indexUid);
|
|
const doc = await buildDocumentFromFile(abs);
|
|
await index.addDocuments([doc]);
|
|
console.log(`[Meili] Upserted: ${doc.id}`);
|
|
} catch (err) {
|
|
console.error(`[Meili] Failed to upsert ${abs}:`, err.message);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Delete a file from the index
|
|
*/
|
|
export async function deleteFile(relPath) {
|
|
try {
|
|
const client = meiliClient();
|
|
const indexUid = vaultIndexName(VAULT_PATH);
|
|
const index = await ensureIndexSettings(client, indexUid);
|
|
await index.deleteDocuments([relPath]);
|
|
console.log(`[Meili] Deleted: ${relPath}`);
|
|
} catch (err) {
|
|
console.error(`[Meili] Failed to delete ${relPath}:`, err.message);
|
|
}
|
|
}
|
|
|
|
// CLI execution: node server/meilisearch-indexer.mjs
|
|
if (process.argv[1] === __filename) {
|
|
fullReindex()
|
|
.then((result) => {
|
|
console.log('[Meili] Reindex done:', result);
|
|
process.exit(0);
|
|
})
|
|
.catch((err) => {
|
|
console.error('[Meili] Reindex failed:', err);
|
|
process.exit(1);
|
|
});
|
|
}
|