222 lines
		
	
	
		
			6.2 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
			
		
		
	
	
			222 lines
		
	
	
		
			6.2 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
| #!/usr/bin/env node
 | |
| 
 | |
| import fs from 'node:fs/promises';
 | |
| import fssync from 'node:fs';
 | |
| import path from 'node:path';
 | |
| import { fileURLToPath } from 'node:url';
 | |
| import fg from 'fast-glob';
 | |
| import matter from 'gray-matter';
 | |
| import removeMd from 'remove-markdown';
 | |
| import { meiliClient, vaultIndexName, ensureIndexSettings } from './meilisearch.client.mjs';
 | |
| import { VAULT_PATH as CFG_VAULT_PATH, MEILI_HOST as CFG_MEILI_HOST, MEILI_API_KEY as CFG_MEILI_KEY } from './config.mjs';
 | |
| 
 | |
| const __filename = fileURLToPath(import.meta.url);
 | |
| const __dirname = path.dirname(__filename);
 | |
| 
 | |
| const VAULT_PATH = path.isAbsolute(CFG_VAULT_PATH)
 | |
|   ? CFG_VAULT_PATH
 | |
|   : path.resolve(__dirname, '..', CFG_VAULT_PATH);
 | |
| 
 | |
| console.log('[Meili Indexer] Environment check:', {
 | |
|   MEILI_MASTER_KEY: CFG_MEILI_KEY ? `${String(CFG_MEILI_KEY).substring(0, 8)}... (${String(CFG_MEILI_KEY).length} chars)` : 'NOT SET',
 | |
|   MEILI_API_KEY: CFG_MEILI_KEY ? `${String(CFG_MEILI_KEY).substring(0, 8)}...` : 'NOT SET',
 | |
|   MEILI_HOST: CFG_MEILI_HOST,
 | |
|   VAULT_PATH: VAULT_PATH
 | |
| });
 | |
| 
 | |
| /**
 | |
|  * Convert timestamp to year/month for faceting
 | |
|  */
 | |
| function toYMD(timestampMs) {
 | |
|   const dt = new Date(timestampMs);
 | |
|   return {
 | |
|     year: dt.getFullYear(),
 | |
|     month: dt.getMonth() + 1
 | |
|   };
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Extract all parent directory paths for a file
 | |
|  * Example: "Projects/Angular/App.md" -> ["Projects", "Projects/Angular"]
 | |
|  */
 | |
| function parentDirs(relativePath) {
 | |
|   const parts = relativePath.split(/[\\/]/);
 | |
|   const acc = [];
 | |
|   for (let i = 0; i < parts.length - 1; i++) {
 | |
|     acc.push(parts.slice(0, i + 1).join('/'));
 | |
|   }
 | |
|   return acc;
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Extract headings from markdown content
 | |
|  */
 | |
| function extractHeadings(content) {
 | |
|   const headingRegex = /^#+\s+(.+)$/gm;
 | |
|   const headings = [];
 | |
|   let match;
 | |
|   while ((match = headingRegex.exec(content)) !== null) {
 | |
|     headings.push(match[1].trim());
 | |
|   }
 | |
|   return headings.slice(0, 200); // Limit to 200 headings
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Build a searchable document from a markdown file
 | |
|  */
 | |
| export async function buildDocumentFromFile(absPath) {
 | |
|   const rel = path.relative(VAULT_PATH, absPath).replaceAll('\\', '/');
 | |
|   const file = path.basename(rel);
 | |
|   const raw = await fs.readFile(absPath, 'utf8');
 | |
|   
 | |
|   // Parse frontmatter with gray-matter
 | |
|   const { data: fm, content } = matter(raw);
 | |
|   
 | |
|   // Remove markdown formatting and limit content size
 | |
|   const text = removeMd(content).slice(0, 200_000); // 200KB safety limit
 | |
|   
 | |
|   // Extract metadata
 | |
|   const title = fm.title ?? path.parse(file).name;
 | |
|   const tags = Array.isArray(fm.tags) 
 | |
|     ? fm.tags.map(String) 
 | |
|     : (fm.tags ? [String(fm.tags)] : []);
 | |
|   
 | |
|   const headings = extractHeadings(content);
 | |
|   
 | |
|   // Get file stats
 | |
|   const stat = await fs.stat(absPath);
 | |
|   const { year, month } = toYMD(stat.mtimeMs);
 | |
|   
 | |
|   // Meilisearch requires alphanumeric IDs (a-z A-Z 0-9 - _)
 | |
|   // Replace dots, slashes, and other chars with underscores
 | |
|   const safeId = rel.replace(/[^a-zA-Z0-9_-]/g, '_');
 | |
|   
 | |
|   return {
 | |
|     id: safeId,
 | |
|     path: rel,
 | |
|     file,
 | |
|     title,
 | |
|     tags,
 | |
|     properties: fm ?? {},
 | |
|     content: text,
 | |
|     headings,
 | |
|     createdAt: stat.birthtimeMs || stat.ctimeMs,
 | |
|     updatedAt: stat.mtimeMs,
 | |
|     year,
 | |
|     month,
 | |
|     parentDirs: parentDirs(rel),
 | |
|     excerpt: text.slice(0, 500),
 | |
|     // Extract boolean flags for quick filtering
 | |
|     favoris: fm.favoris === true,
 | |
|     template: fm.template === true,
 | |
|     task: fm.task === true
 | |
|   };
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Perform a full reindex of all markdown files in the vault
 | |
|  */
 | |
| export async function fullReindex() {
 | |
|   console.log('[Meili] Starting full reindex...');
 | |
|   const startTime = Date.now();
 | |
|   
 | |
|   const client = meiliClient();
 | |
|   const indexUid = vaultIndexName(VAULT_PATH);
 | |
|   const index = await ensureIndexSettings(client, indexUid);
 | |
|   
 | |
|   // Find all markdown files
 | |
|   const entries = await fg(['**/*.md'], {
 | |
|     cwd: VAULT_PATH,
 | |
|     dot: false,
 | |
|     onlyFiles: true,
 | |
|     absolute: true
 | |
|   });
 | |
|   
 | |
|   console.log(`[Meili] Found ${entries.length} markdown files`);
 | |
|   
 | |
|   // Process in batches to avoid memory issues
 | |
|   const batchSize = 750;
 | |
|   let totalIndexed = 0;
 | |
|   
 | |
|   for (let i = 0; i < entries.length; i += batchSize) {
 | |
|     const chunk = entries.slice(i, i + batchSize);
 | |
|     const docs = await Promise.all(
 | |
|       chunk.map(async (file) => {
 | |
|         try {
 | |
|           return await buildDocumentFromFile(file);
 | |
|         } catch (err) {
 | |
|           console.error(`[Meili] Failed to process ${file}:`, err.message);
 | |
|           return null;
 | |
|         }
 | |
|       })
 | |
|     );
 | |
|     
 | |
|     const validDocs = docs.filter(Boolean);
 | |
|     if (validDocs.length > 0) {
 | |
|       const task = await index.addDocuments(validDocs);
 | |
|       console.log(`[Meili] Batch ${Math.floor(i / batchSize) + 1}: Queued ${validDocs.length} documents (task ${task.taskUid})`);
 | |
|       totalIndexed += validDocs.length;
 | |
|     }
 | |
|   }
 | |
|   
 | |
|   const elapsed = Date.now() - startTime;
 | |
|   console.log(`[Meili] Reindex complete: ${totalIndexed} documents indexed in ${elapsed}ms`);
 | |
|   
 | |
|   return {
 | |
|     indexed: true,
 | |
|     count: totalIndexed,
 | |
|     elapsedMs: elapsed
 | |
|   };
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Upsert a single file (add or update)
 | |
|  */
 | |
| export async function upsertFile(relOrAbs) {
 | |
|   const abs = path.isAbsolute(relOrAbs) ? relOrAbs : path.join(VAULT_PATH, relOrAbs);
 | |
|   
 | |
|   if (!fssync.existsSync(abs)) {
 | |
|     console.warn(`[Meili] File not found for upsert: ${abs}`);
 | |
|     return;
 | |
|   }
 | |
|   
 | |
|   try {
 | |
|     const client = meiliClient();
 | |
|     const indexUid = vaultIndexName(VAULT_PATH);
 | |
|     const index = await ensureIndexSettings(client, indexUid);
 | |
|     const doc = await buildDocumentFromFile(abs);
 | |
|     await index.addDocuments([doc]);
 | |
|     console.log(`[Meili] Upserted: ${doc.id}`);
 | |
|   } catch (err) {
 | |
|     console.error(`[Meili] Failed to upsert ${abs}:`, err.message);
 | |
|   }
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Delete a file from the index
 | |
|  */
 | |
| export async function deleteFile(relPath) {
 | |
|   try {
 | |
|     const client = meiliClient();
 | |
|     const indexUid = vaultIndexName(VAULT_PATH);
 | |
|     const index = await ensureIndexSettings(client, indexUid);
 | |
|     await index.deleteDocuments([relPath]);
 | |
|     console.log(`[Meili] Deleted: ${relPath}`);
 | |
|   } catch (err) {
 | |
|     console.error(`[Meili] Failed to delete ${relPath}:`, err.message);
 | |
|   }
 | |
| }
 | |
| 
 | |
| // CLI execution: node server/meilisearch-indexer.mjs
 | |
| if (process.argv[1] === __filename) {
 | |
|   fullReindex()
 | |
|     .then((result) => {
 | |
|       console.log('[Meili] Reindex done:', result);
 | |
|       process.exit(0);
 | |
|     })
 | |
|     .catch((err) => {
 | |
|       console.error('[Meili] Reindex failed:', err);
 | |
|       process.exit(1);
 | |
|     });
 | |
| }
 |