NewTube/server/rumble.mjs

185 lines
7.9 KiB
JavaScript

import express from 'express';
import * as cheerio from 'cheerio';
import axios from 'axios';
import rateLimit from 'express-rate-limit';
const router = express.Router();
// Rate limiter for Rumble scraping to prevent being blocked
const rumbleLimiter = rateLimit({
windowMs: 60 * 1000, // 1 min
max: 20,
standardHeaders: true,
legacyHeaders: false,
message: { error: 'Too many requests to Rumble API. Please try again later.' }
});
router.use(rumbleLimiter);
// Simple in-memory cache with TTL
const cache = new Map();
const TTL_MS = 60 * 1000; // 60s
function cacheKey(path, params) {
return `${path}?${new URLSearchParams(params).toString()}`;
}
function setCache(key, data) {
cache.set(key, { data, expires: Date.now() + TTL_MS });
}
function getCache(key) {
const hit = cache.get(key);
if (!hit) return null;
if (Date.now() > hit.expires) { cache.delete(key); return null; }
return hit.data;
}
async function httpGet(url) {
const resp = await axios.get(url, {
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.8'
},
timeout: 15000
});
return resp.data;
}
async function scrapeRumbleVideo(videoId) {
try {
const html = await httpGet(`https://rumble.com/${videoId}`);
const $ = cheerio.load(html);
const title = $('h1.video-title, .video-title h1').first().text().trim() || $('meta[property="og:title"]').attr('content') || '';
const thumbnail = $('meta[property="og:image"]').attr('content') || '';
const uploaderName = $('.media-by--a, .channel-name').first().text().trim() || '';
const viewsText = $('.rumbles-views, .video-views').first().text().trim();
const views = parseInt((viewsText || '').replace(/[^0-9]/g, '')) || 0;
const durationText = $('meta[property="video:duration"]').attr('content');
const duration = durationText ? parseInt(durationText) : 0;
const uploadedDate = $('meta[property="article:published_time"]').attr('content') || '';
const description = $('meta[property="og:description"]').attr('content') || '';
// Try to extract the official embed URL
let embedUrl = $('meta[property="og:video"], meta[name="twitter:player"]').attr('content') || '';
if (!embedUrl) {
const iframeSrc = $('iframe[src*="/embed/"]').attr('src') || '';
embedUrl = iframeSrc || '';
}
// Normalize protocol-less URLs
if (embedUrl && embedUrl.startsWith('//')) embedUrl = 'https:' + embedUrl;
// Detect canonical URL to extract stable ID
const canonicalUrl = $('link[rel="canonical"]').attr('href') || $('meta[property="og:url"]').attr('content') || '';
// Normalize/derive the stable Rumble ID (e.g., v464efu)
let stableId = videoId;
const mEmbed = /\/embed\/(v[0-9A-Za-z]+)/.exec(embedUrl || '');
const mCanon = /\/(v[0-9A-Za-z]+)(?:[\-./]|$)/.exec(canonicalUrl || '');
if (mEmbed && mEmbed[1]) stableId = mEmbed[1];
else if (mCanon && mCanon[1]) stableId = mCanon[1];
// If embedUrl is a page URL, convert to embed path as a fallback
if (!/\/embed\//.test(embedUrl)) {
embedUrl = `https://rumble.com/embed/${stableId}/?autoplay=2&muted=1`;
}
return { videoId: stableId, title: title || 'Untitled Video', thumbnail, uploaderName: uploaderName || 'Unknown Uploader', views, duration, uploadedDate, description, url: `https://rumble.com/${stableId}`, embedUrl, type: 'video' };
} catch (e) {
console.error('scrapeRumbleVideo error:', e.message);
return { videoId, error: 'Scraping failed' };
}
}
async function scrapeRumbleList({ q, page = 1, limit = 24, sort = 'viral' }) {
try {
const url = q
? `https://rumble.com/search/video?q=${encodeURIComponent(q)}&page=${page}`
: `https://rumble.com/videos?sort=${encodeURIComponent(sort)}&page=${page}`;
const html = await httpGet(url);
const $ = cheerio.load(html);
const items = [];
// Try to select video cards; Rumble uses different layouts, so search broadly
$('a[href^="/v"], a[href^="/video/"]').each((_, el) => {
const a = $(el);
const href = a.attr('href') || '';
// Expect href like /vabcdef or /video/abcdef
const m = href.match(/\/v([A-Za-z0-9]+)/) || href.match(/\/video\/([A-Za-z0-9]+)/);
if (!m) return;
const vid = `v${m[1]}`;
const title = a.attr('title') || a.text().trim();
// Look around for thumbnail and meta
const parent = a.closest('li, article, div');
const img = parent.find('img').first();
let thumb = img.attr('data-src') || img.attr('src') || '';
if (thumb && thumb.startsWith('//')) thumb = 'https:' + thumb;
const durationText = parent.find('.video-item--duration, .video-duration, .duration').first().text().trim();
const viewsText = parent.find('.video-item--views, .rumbles-views, .views').first().text().trim();
const duration = (() => {
const m = durationText.match(/(\d+):(\d+)(?::(\d+))?/);
if (!m) return 0;
const h = parseInt(m[3] || '0', 10), mn = parseInt(m[1] || '0', 10), s = parseInt(m[2] || '0', 10);
return h * 3600 + mn * 60 + s;
})();
const views = parseInt((viewsText || '').replace(/[^0-9]/g, '')) || 0;
items.push({ videoId: vid, title, thumbnail: thumb, uploaderName: '', views, duration, uploadedDate: '', url: `https://rumble.com/${vid}`, type: 'video' });
});
// De-duplicate by videoId and slice to limit
const seen = new Set();
const unique = [];
for (const it of items) { if (!seen.has(it.videoId)) { seen.add(it.videoId); unique.push(it); } }
const list = unique.slice(0, limit);
const nextCursor = list.length === limit ? String(Number(page) + 1) : null;
return { items: list, total: unique.length, page: Number(page), limit: Number(limit), nextCursor };
} catch (e) {
console.error('scrapeRumbleList error:', e.message);
return { items: [], total: 0, page: Number(page), limit: Number(limit), nextCursor: null };
}
}
router.get('/browse', async (req, res) => {
const page = parseInt(String(req.query.page || '1'), 10) || 1;
const limit = Math.min(50, parseInt(String(req.query.limit || '24'), 10) || 24);
const sort = String(req.query.sort || 'viral');
const key = cacheKey('/browse', { page, limit, sort });
const cached = getCache(key);
if (cached) return res.json(cached);
const data = await scrapeRumbleList({ page, limit, sort });
setCache(key, data);
return res.json(data);
});
router.get('/search', async (req, res) => {
const q = String(req.query.q || '').trim();
if (!q) return res.status(400).json({ error: 'Query parameter required' });
const limit = Math.min(50, parseInt(String(req.query.limit || '24'), 10) || 24);
const page = (() => {
// Support offset-based cursor from frontend by translating offset->page
if (req.query.offset != null) {
const offset = parseInt(String(req.query.offset), 10) || 0;
return Math.floor(offset / limit) + 1;
}
return parseInt(String(req.query.page || '1'), 10) || 1;
})();
const key = cacheKey('/search', { q, page, limit });
const cached = getCache(key);
if (cached) return res.json(cached);
const data = await scrapeRumbleList({ q, page, limit });
setCache(key, data);
return res.json(data);
});
router.get('/video/:videoId', async (req, res) => {
try {
const { videoId } = req.params;
const key = cacheKey('/video', { videoId });
const cached = getCache(key);
if (cached) return res.json(cached);
const videoData = await scrapeRumbleVideo(videoId);
if (videoData.error) return res.status(404).json({ error: 'Video not found or scraping failed' });
setCache(key, videoData);
return res.json(videoData);
} catch (error) {
console.error('Rumble video error:', error);
return res.status(500).json({ error: 'Failed to scrape video' });
}
});
export default router;