ObsiViewer/server/meilisearch-indexer.mjs

218 lines
6.1 KiB
JavaScript

#!/usr/bin/env node
import fs from 'node:fs/promises';
import fssync from 'node:fs';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
import fg from 'fast-glob';
import matter from 'gray-matter';
import removeMd from 'remove-markdown';
import { meiliClient, vaultIndexName, ensureIndexSettings } from './meilisearch.client.mjs';
import { VAULT_PATH as CFG_VAULT_PATH, MEILI_HOST as CFG_MEILI_HOST, MEILI_API_KEY as CFG_MEILI_KEY } from './config.mjs';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const VAULT_PATH = path.isAbsolute(CFG_VAULT_PATH)
? CFG_VAULT_PATH
: path.resolve(__dirname, '..', CFG_VAULT_PATH);
console.log('[Meili Indexer] Environment check:', {
MEILI_MASTER_KEY: CFG_MEILI_KEY ? `${String(CFG_MEILI_KEY).substring(0, 8)}... (${String(CFG_MEILI_KEY).length} chars)` : 'NOT SET',
MEILI_API_KEY: CFG_MEILI_KEY ? `${String(CFG_MEILI_KEY).substring(0, 8)}...` : 'NOT SET',
MEILI_HOST: CFG_MEILI_HOST,
VAULT_PATH: VAULT_PATH
});
/**
* Convert timestamp to year/month for faceting
*/
function toYMD(timestampMs) {
const dt = new Date(timestampMs);
return {
year: dt.getFullYear(),
month: dt.getMonth() + 1
};
}
/**
* Extract all parent directory paths for a file
* Example: "Projects/Angular/App.md" -> ["Projects", "Projects/Angular"]
*/
function parentDirs(relativePath) {
const parts = relativePath.split(/[\\/]/);
const acc = [];
for (let i = 0; i < parts.length - 1; i++) {
acc.push(parts.slice(0, i + 1).join('/'));
}
return acc;
}
/**
* Extract headings from markdown content
*/
function extractHeadings(content) {
const headingRegex = /^#+\s+(.+)$/gm;
const headings = [];
let match;
while ((match = headingRegex.exec(content)) !== null) {
headings.push(match[1].trim());
}
return headings.slice(0, 200); // Limit to 200 headings
}
/**
* Build a searchable document from a markdown file
*/
export async function buildDocumentFromFile(absPath) {
const rel = path.relative(VAULT_PATH, absPath).replaceAll('\\', '/');
const file = path.basename(rel);
const raw = await fs.readFile(absPath, 'utf8');
// Parse frontmatter with gray-matter
const { data: fm, content } = matter(raw);
// Remove markdown formatting and limit content size
const text = removeMd(content).slice(0, 200_000); // 200KB safety limit
// Extract metadata
const title = fm.title ?? path.parse(file).name;
const tags = Array.isArray(fm.tags)
? fm.tags.map(String)
: (fm.tags ? [String(fm.tags)] : []);
const headings = extractHeadings(content);
// Get file stats
const stat = await fs.stat(absPath);
const { year, month } = toYMD(stat.mtimeMs);
// Meilisearch requires alphanumeric IDs (a-z A-Z 0-9 - _)
// Replace dots, slashes, and other chars with underscores
const safeId = rel.replace(/[^a-zA-Z0-9_-]/g, '_');
return {
id: safeId,
path: rel,
file,
title,
tags,
properties: fm ?? {},
content: text,
headings,
createdAt: stat.birthtimeMs || stat.ctimeMs,
updatedAt: stat.mtimeMs,
year,
month,
parentDirs: parentDirs(rel),
excerpt: text.slice(0, 500)
};
}
/**
* Perform a full reindex of all markdown files in the vault
*/
export async function fullReindex() {
console.log('[Meili] Starting full reindex...');
const startTime = Date.now();
const client = meiliClient();
const indexUid = vaultIndexName(VAULT_PATH);
const index = await ensureIndexSettings(client, indexUid);
// Find all markdown files
const entries = await fg(['**/*.md'], {
cwd: VAULT_PATH,
dot: false,
onlyFiles: true,
absolute: true
});
console.log(`[Meili] Found ${entries.length} markdown files`);
// Process in batches to avoid memory issues
const batchSize = 750;
let totalIndexed = 0;
for (let i = 0; i < entries.length; i += batchSize) {
const chunk = entries.slice(i, i + batchSize);
const docs = await Promise.all(
chunk.map(async (file) => {
try {
return await buildDocumentFromFile(file);
} catch (err) {
console.error(`[Meili] Failed to process ${file}:`, err.message);
return null;
}
})
);
const validDocs = docs.filter(Boolean);
if (validDocs.length > 0) {
const task = await index.addDocuments(validDocs);
console.log(`[Meili] Batch ${Math.floor(i / batchSize) + 1}: Queued ${validDocs.length} documents (task ${task.taskUid})`);
totalIndexed += validDocs.length;
}
}
const elapsed = Date.now() - startTime;
console.log(`[Meili] Reindex complete: ${totalIndexed} documents indexed in ${elapsed}ms`);
return {
indexed: true,
count: totalIndexed,
elapsedMs: elapsed
};
}
/**
* Upsert a single file (add or update)
*/
export async function upsertFile(relOrAbs) {
const abs = path.isAbsolute(relOrAbs) ? relOrAbs : path.join(VAULT_PATH, relOrAbs);
if (!fssync.existsSync(abs)) {
console.warn(`[Meili] File not found for upsert: ${abs}`);
return;
}
try {
const client = meiliClient();
const indexUid = vaultIndexName(VAULT_PATH);
const index = await ensureIndexSettings(client, indexUid);
const doc = await buildDocumentFromFile(abs);
await index.addDocuments([doc]);
console.log(`[Meili] Upserted: ${doc.id}`);
} catch (err) {
console.error(`[Meili] Failed to upsert ${abs}:`, err.message);
}
}
/**
* Delete a file from the index
*/
export async function deleteFile(relPath) {
try {
const client = meiliClient();
const indexUid = vaultIndexName(VAULT_PATH);
const index = await ensureIndexSettings(client, indexUid);
await index.deleteDocuments([relPath]);
console.log(`[Meili] Deleted: ${relPath}`);
} catch (err) {
console.error(`[Meili] Failed to delete ${relPath}:`, err.message);
}
}
// CLI execution: node server/meilisearch-indexer.mjs
if (process.argv[1] === __filename) {
fullReindex()
.then((result) => {
console.log('[Meili] Reindex done:', result);
process.exit(0);
})
.catch((err) => {
console.error('[Meili] Reindex failed:', err);
process.exit(1);
});
}