import express from 'express'; import * as cheerio from 'cheerio'; import axios from 'axios'; import rateLimit from 'express-rate-limit'; const router = express.Router(); // Rate limiter for Rumble scraping to prevent being blocked const rumbleLimiter = rateLimit({ windowMs: 60 * 1000, // 1 min max: 20, standardHeaders: true, legacyHeaders: false, message: { error: 'Too many requests to Rumble API. Please try again later.' } }); router.use(rumbleLimiter); // Simple in-memory cache with TTL const cache = new Map(); const TTL_MS = 60 * 1000; // 60s function cacheKey(path, params) { return `${path}?${new URLSearchParams(params).toString()}`; } function setCache(key, data) { cache.set(key, { data, expires: Date.now() + TTL_MS }); } function getCache(key) { const hit = cache.get(key); if (!hit) return null; if (Date.now() > hit.expires) { cache.delete(key); return null; } return hit.data; } async function httpGet(url) { const resp = await axios.get(url, { headers: { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.8' }, timeout: 15000 }); return resp.data; } async function scrapeRumbleVideo(videoId) { try { const html = await httpGet(`https://rumble.com/${videoId}`); const $ = cheerio.load(html); const title = $('h1.video-title, .video-title h1').first().text().trim() || $('meta[property="og:title"]').attr('content') || ''; const thumbnail = $('meta[property="og:image"]').attr('content') || ''; const uploaderName = $('.media-by--a, .channel-name').first().text().trim() || ''; const viewsText = $('.rumbles-views, .video-views').first().text().trim(); const views = parseInt((viewsText || '').replace(/[^0-9]/g, '')) || 0; const durationText = $('meta[property="video:duration"]').attr('content'); const duration = durationText ? parseInt(durationText) : 0; const uploadedDate = $('meta[property="article:published_time"]').attr('content') || ''; const description = $('meta[property="og:description"]').attr('content') || ''; return { videoId, title: title || 'Untitled Video', thumbnail, uploaderName: uploaderName || 'Unknown Uploader', views, duration, uploadedDate, description, url: `https://rumble.com/${videoId}`, type: 'video' }; } catch (e) { console.error('scrapeRumbleVideo error:', e.message); return { videoId, error: 'Scraping failed' }; } } async function scrapeRumbleList({ q, page = 1, limit = 24, sort = 'viral' }) { try { const url = q ? `https://rumble.com/search/video?q=${encodeURIComponent(q)}&page=${page}` : `https://rumble.com/videos?sort=${encodeURIComponent(sort)}&page=${page}`; const html = await httpGet(url); const $ = cheerio.load(html); const items = []; // Try to select video cards; Rumble uses different layouts, so search broadly $('a[href^="/v"], a[href^="/video/"]').each((_, el) => { const a = $(el); const href = a.attr('href') || ''; // Expect href like /vabcdef or /video/abcdef const m = href.match(/\/v([A-Za-z0-9]+)/) || href.match(/\/video\/([A-Za-z0-9]+)/); if (!m) return; const vid = `v${m[1]}`; const title = a.attr('title') || a.text().trim(); // Look around for thumbnail and meta const parent = a.closest('li, article, div'); const img = parent.find('img').first(); let thumb = img.attr('data-src') || img.attr('src') || ''; if (thumb && thumb.startsWith('//')) thumb = 'https:' + thumb; const durationText = parent.find('.video-item--duration, .video-duration, .duration').first().text().trim(); const viewsText = parent.find('.video-item--views, .rumbles-views, .views').first().text().trim(); const duration = (() => { const m = durationText.match(/(\d+):(\d+)(?::(\d+))?/); if (!m) return 0; const h = parseInt(m[3] || '0', 10), mn = parseInt(m[1] || '0', 10), s = parseInt(m[2] || '0', 10); return h * 3600 + mn * 60 + s; })(); const views = parseInt((viewsText || '').replace(/[^0-9]/g, '')) || 0; items.push({ videoId: vid, title, thumbnail: thumb, uploaderName: '', views, duration, uploadedDate: '', url: `https://rumble.com/${vid}`, type: 'video' }); }); // De-duplicate by videoId and slice to limit const seen = new Set(); const unique = []; for (const it of items) { if (!seen.has(it.videoId)) { seen.add(it.videoId); unique.push(it); } } const list = unique.slice(0, limit); const nextCursor = list.length === limit ? String(Number(page) + 1) : null; return { items: list, total: unique.length, page: Number(page), limit: Number(limit), nextCursor }; } catch (e) { console.error('scrapeRumbleList error:', e.message); return { items: [], total: 0, page: Number(page), limit: Number(limit), nextCursor: null }; } } router.get('/browse', async (req, res) => { const page = parseInt(String(req.query.page || '1'), 10) || 1; const limit = Math.min(50, parseInt(String(req.query.limit || '24'), 10) || 24); const sort = String(req.query.sort || 'viral'); const key = cacheKey('/browse', { page, limit, sort }); const cached = getCache(key); if (cached) return res.json(cached); const data = await scrapeRumbleList({ page, limit, sort }); setCache(key, data); return res.json(data); }); router.get('/search', async (req, res) => { const q = String(req.query.q || '').trim(); if (!q) return res.status(400).json({ error: 'Query parameter required' }); const limit = Math.min(50, parseInt(String(req.query.limit || '24'), 10) || 24); const page = (() => { // Support offset-based cursor from frontend by translating offset->page if (req.query.offset != null) { const offset = parseInt(String(req.query.offset), 10) || 0; return Math.floor(offset / limit) + 1; } return parseInt(String(req.query.page || '1'), 10) || 1; })(); const key = cacheKey('/search', { q, page, limit }); const cached = getCache(key); if (cached) return res.json(cached); const data = await scrapeRumbleList({ q, page, limit }); setCache(key, data); return res.json(data); }); router.get('/video/:videoId', async (req, res) => { try { const { videoId } = req.params; const key = cacheKey('/video', { videoId }); const cached = getCache(key); if (cached) return res.json(cached); const videoData = await scrapeRumbleVideo(videoId); if (videoData.error) return res.status(404).json({ error: 'Video not found or scraping failed' }); setCache(key, videoData); return res.json(videoData); } catch (error) { console.error('Rumble video error:', error); return res.status(500).json({ error: 'Failed to scrape video' }); } }); export default router;