185 lines
7.9 KiB
JavaScript
185 lines
7.9 KiB
JavaScript
import express from 'express';
|
|
import * as cheerio from 'cheerio';
|
|
import axios from 'axios';
|
|
import rateLimit from 'express-rate-limit';
|
|
|
|
const router = express.Router();
|
|
|
|
// Rate limiter for Rumble scraping to prevent being blocked
|
|
const rumbleLimiter = rateLimit({
|
|
windowMs: 60 * 1000, // 1 min
|
|
max: 20,
|
|
standardHeaders: true,
|
|
legacyHeaders: false,
|
|
message: { error: 'Too many requests to Rumble API. Please try again later.' }
|
|
});
|
|
|
|
router.use(rumbleLimiter);
|
|
|
|
// Simple in-memory cache with TTL
|
|
const cache = new Map();
|
|
const TTL_MS = 60 * 1000; // 60s
|
|
|
|
function cacheKey(path, params) {
|
|
return `${path}?${new URLSearchParams(params).toString()}`;
|
|
}
|
|
|
|
function setCache(key, data) {
|
|
cache.set(key, { data, expires: Date.now() + TTL_MS });
|
|
}
|
|
|
|
function getCache(key) {
|
|
const hit = cache.get(key);
|
|
if (!hit) return null;
|
|
if (Date.now() > hit.expires) { cache.delete(key); return null; }
|
|
return hit.data;
|
|
}
|
|
|
|
async function httpGet(url) {
|
|
const resp = await axios.get(url, {
|
|
headers: {
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
|
'Accept-Language': 'en-US,en;q=0.8'
|
|
},
|
|
timeout: 15000
|
|
});
|
|
return resp.data;
|
|
}
|
|
|
|
async function scrapeRumbleVideo(videoId) {
|
|
try {
|
|
const html = await httpGet(`https://rumble.com/${videoId}`);
|
|
const $ = cheerio.load(html);
|
|
const title = $('h1.video-title, .video-title h1').first().text().trim() || $('meta[property="og:title"]').attr('content') || '';
|
|
const thumbnail = $('meta[property="og:image"]').attr('content') || '';
|
|
const uploaderName = $('.media-by--a, .channel-name').first().text().trim() || '';
|
|
const viewsText = $('.rumbles-views, .video-views').first().text().trim();
|
|
const views = parseInt((viewsText || '').replace(/[^0-9]/g, '')) || 0;
|
|
const durationText = $('meta[property="video:duration"]').attr('content');
|
|
const duration = durationText ? parseInt(durationText) : 0;
|
|
const uploadedDate = $('meta[property="article:published_time"]').attr('content') || '';
|
|
const description = $('meta[property="og:description"]').attr('content') || '';
|
|
// Try to extract the official embed URL
|
|
let embedUrl = $('meta[property="og:video"], meta[name="twitter:player"]').attr('content') || '';
|
|
if (!embedUrl) {
|
|
const iframeSrc = $('iframe[src*="/embed/"]').attr('src') || '';
|
|
embedUrl = iframeSrc || '';
|
|
}
|
|
// Normalize protocol-less URLs
|
|
if (embedUrl && embedUrl.startsWith('//')) embedUrl = 'https:' + embedUrl;
|
|
// Detect canonical URL to extract stable ID
|
|
const canonicalUrl = $('link[rel="canonical"]').attr('href') || $('meta[property="og:url"]').attr('content') || '';
|
|
// Normalize/derive the stable Rumble ID (e.g., v464efu)
|
|
let stableId = videoId;
|
|
const mEmbed = /\/embed\/(v[0-9A-Za-z]+)/.exec(embedUrl || '');
|
|
const mCanon = /\/(v[0-9A-Za-z]+)(?:[\-./]|$)/.exec(canonicalUrl || '');
|
|
if (mEmbed && mEmbed[1]) stableId = mEmbed[1];
|
|
else if (mCanon && mCanon[1]) stableId = mCanon[1];
|
|
// If embedUrl is a page URL, convert to embed path as a fallback
|
|
if (!/\/embed\//.test(embedUrl)) {
|
|
embedUrl = `https://rumble.com/embed/${stableId}/?autoplay=2&muted=1`;
|
|
}
|
|
return { videoId: stableId, title: title || 'Untitled Video', thumbnail, uploaderName: uploaderName || 'Unknown Uploader', views, duration, uploadedDate, description, url: `https://rumble.com/${stableId}`, embedUrl, type: 'video' };
|
|
} catch (e) {
|
|
console.error('scrapeRumbleVideo error:', e.message);
|
|
return { videoId, error: 'Scraping failed' };
|
|
}
|
|
}
|
|
|
|
async function scrapeRumbleList({ q, page = 1, limit = 24, sort = 'viral' }) {
|
|
try {
|
|
const url = q
|
|
? `https://rumble.com/search/video?q=${encodeURIComponent(q)}&page=${page}`
|
|
: `https://rumble.com/videos?sort=${encodeURIComponent(sort)}&page=${page}`;
|
|
const html = await httpGet(url);
|
|
const $ = cheerio.load(html);
|
|
const items = [];
|
|
// Try to select video cards; Rumble uses different layouts, so search broadly
|
|
$('a[href^="/v"], a[href^="/video/"]').each((_, el) => {
|
|
const a = $(el);
|
|
const href = a.attr('href') || '';
|
|
// Expect href like /vabcdef or /video/abcdef
|
|
const m = href.match(/\/v([A-Za-z0-9]+)/) || href.match(/\/video\/([A-Za-z0-9]+)/);
|
|
if (!m) return;
|
|
const vid = `v${m[1]}`;
|
|
const title = a.attr('title') || a.text().trim();
|
|
// Look around for thumbnail and meta
|
|
const parent = a.closest('li, article, div');
|
|
const img = parent.find('img').first();
|
|
let thumb = img.attr('data-src') || img.attr('src') || '';
|
|
if (thumb && thumb.startsWith('//')) thumb = 'https:' + thumb;
|
|
const durationText = parent.find('.video-item--duration, .video-duration, .duration').first().text().trim();
|
|
const viewsText = parent.find('.video-item--views, .rumbles-views, .views').first().text().trim();
|
|
const duration = (() => {
|
|
const m = durationText.match(/(\d+):(\d+)(?::(\d+))?/);
|
|
if (!m) return 0;
|
|
const h = parseInt(m[3] || '0', 10), mn = parseInt(m[1] || '0', 10), s = parseInt(m[2] || '0', 10);
|
|
return h * 3600 + mn * 60 + s;
|
|
})();
|
|
const views = parseInt((viewsText || '').replace(/[^0-9]/g, '')) || 0;
|
|
items.push({ videoId: vid, title, thumbnail: thumb, uploaderName: '', views, duration, uploadedDate: '', url: `https://rumble.com/${vid}`, type: 'video' });
|
|
});
|
|
// De-duplicate by videoId and slice to limit
|
|
const seen = new Set();
|
|
const unique = [];
|
|
for (const it of items) { if (!seen.has(it.videoId)) { seen.add(it.videoId); unique.push(it); } }
|
|
const list = unique.slice(0, limit);
|
|
const nextCursor = list.length === limit ? String(Number(page) + 1) : null;
|
|
return { items: list, total: unique.length, page: Number(page), limit: Number(limit), nextCursor };
|
|
} catch (e) {
|
|
console.error('scrapeRumbleList error:', e.message);
|
|
return { items: [], total: 0, page: Number(page), limit: Number(limit), nextCursor: null };
|
|
}
|
|
}
|
|
|
|
router.get('/browse', async (req, res) => {
|
|
const page = parseInt(String(req.query.page || '1'), 10) || 1;
|
|
const limit = Math.min(50, parseInt(String(req.query.limit || '24'), 10) || 24);
|
|
const sort = String(req.query.sort || 'viral');
|
|
const key = cacheKey('/browse', { page, limit, sort });
|
|
const cached = getCache(key);
|
|
if (cached) return res.json(cached);
|
|
const data = await scrapeRumbleList({ page, limit, sort });
|
|
setCache(key, data);
|
|
return res.json(data);
|
|
});
|
|
|
|
router.get('/search', async (req, res) => {
|
|
const q = String(req.query.q || '').trim();
|
|
if (!q) return res.status(400).json({ error: 'Query parameter required' });
|
|
const limit = Math.min(50, parseInt(String(req.query.limit || '24'), 10) || 24);
|
|
const page = (() => {
|
|
// Support offset-based cursor from frontend by translating offset->page
|
|
if (req.query.offset != null) {
|
|
const offset = parseInt(String(req.query.offset), 10) || 0;
|
|
return Math.floor(offset / limit) + 1;
|
|
}
|
|
return parseInt(String(req.query.page || '1'), 10) || 1;
|
|
})();
|
|
const key = cacheKey('/search', { q, page, limit });
|
|
const cached = getCache(key);
|
|
if (cached) return res.json(cached);
|
|
const data = await scrapeRumbleList({ q, page, limit });
|
|
setCache(key, data);
|
|
return res.json(data);
|
|
});
|
|
|
|
router.get('/video/:videoId', async (req, res) => {
|
|
try {
|
|
const { videoId } = req.params;
|
|
const key = cacheKey('/video', { videoId });
|
|
const cached = getCache(key);
|
|
if (cached) return res.json(cached);
|
|
const videoData = await scrapeRumbleVideo(videoId);
|
|
if (videoData.error) return res.status(404).json({ error: 'Video not found or scraping failed' });
|
|
setCache(key, videoData);
|
|
return res.json(videoData);
|
|
} catch (error) {
|
|
console.error('Rumble video error:', error);
|
|
return res.status(500).json({ error: 'Failed to scrape video' });
|
|
}
|
|
});
|
|
|
|
export default router;
|