refactor: improve Rumble video scraping with robust ID normalization and metadata parsing
This commit is contained in:
parent
709b2e55c2
commit
d6da699c54
BIN
db/newtube.db
BIN
db/newtube.db
Binary file not shown.
@ -5,29 +5,26 @@ import rateLimit from 'express-rate-limit';
|
|||||||
|
|
||||||
const router = express.Router();
|
const router = express.Router();
|
||||||
|
|
||||||
// Rate limiter for Rumble scraping to prevent being blocked
|
/* ----------------------------- Rate limiting ----------------------------- */
|
||||||
const rumbleLimiter = rateLimit({
|
const rumbleLimiter = rateLimit({
|
||||||
windowMs: 60 * 1000, // 1 min
|
windowMs: 60 * 1000,
|
||||||
max: 20,
|
max: 20,
|
||||||
standardHeaders: true,
|
standardHeaders: true,
|
||||||
legacyHeaders: false,
|
legacyHeaders: false,
|
||||||
message: { error: 'Too many requests to Rumble API. Please try again later.' }
|
message: { error: 'Too many requests to Rumble. Please try again later.' }
|
||||||
});
|
});
|
||||||
|
|
||||||
router.use(rumbleLimiter);
|
router.use(rumbleLimiter);
|
||||||
|
|
||||||
// Simple in-memory cache with TTL
|
/* --------------------------------- Cache -------------------------------- */
|
||||||
const cache = new Map();
|
const cache = new Map();
|
||||||
const TTL_MS = 60 * 1000; // 60s
|
const TTL_MS = 60 * 1000; // 60s
|
||||||
|
|
||||||
function cacheKey(path, params) {
|
function cacheKey(path, params) {
|
||||||
return `${path}?${new URLSearchParams(params).toString()}`;
|
return `${path}?${new URLSearchParams(params).toString()}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
function setCache(key, data) {
|
function setCache(key, data) {
|
||||||
cache.set(key, { data, expires: Date.now() + TTL_MS });
|
cache.set(key, { data, expires: Date.now() + TTL_MS });
|
||||||
}
|
}
|
||||||
|
|
||||||
function getCache(key) {
|
function getCache(key) {
|
||||||
const hit = cache.get(key);
|
const hit = cache.get(key);
|
||||||
if (!hit) return null;
|
if (!hit) return null;
|
||||||
@ -35,111 +32,316 @@ function getCache(key) {
|
|||||||
return hit.data;
|
return hit.data;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ------------------------------- HTTP GET -------------------------------- */
|
||||||
async function httpGet(url) {
|
async function httpGet(url) {
|
||||||
const resp = await axios.get(url, {
|
const resp = await axios.get(url, {
|
||||||
headers: {
|
headers: {
|
||||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
// UA “desktop” moderne pour minimiser les anti-bot simples
|
||||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
'User-Agent':
|
||||||
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36',
|
||||||
|
'Accept':
|
||||||
|
'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
||||||
'Accept-Language': 'en-US,en;q=0.8'
|
'Accept-Language': 'en-US,en;q=0.8'
|
||||||
},
|
},
|
||||||
timeout: 15000
|
timeout: 15000,
|
||||||
|
// Important: pas de redirects inter-domain hasardeux
|
||||||
|
maxRedirects: 3,
|
||||||
|
validateStatus: s => s >= 200 && s < 400
|
||||||
});
|
});
|
||||||
return resp.data;
|
return resp.data;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function scrapeRumbleVideo(videoId) {
|
/* ------------------------- Utils: normalisation ID ------------------------ */
|
||||||
|
/**
|
||||||
|
* Rumble expose plusieurs formes:
|
||||||
|
* - Page canoniques: https://rumble.com/v6siqxf-some-title.html
|
||||||
|
* - Ancienne forme: https://rumble.com/video/12345
|
||||||
|
* - URL d’embed officielle: https://rumble.com/embed/v6siqxf/
|
||||||
|
* - ID brut attendu: v6siqxf (toujours commence par 'v' + base62)
|
||||||
|
*
|
||||||
|
* Cette fonction accepte: ID ou URL et renvoie { id: 'vXXXX', urlCanonique, embedUrl }
|
||||||
|
*/
|
||||||
|
function normalizeRumbleId(input, { preferEmbed = true } = {}) {
|
||||||
|
if (!input) return null;
|
||||||
|
|
||||||
|
let id = null;
|
||||||
|
let urlCanonique = null;
|
||||||
|
let embedUrl = null;
|
||||||
|
|
||||||
|
// 1) Si on nous donne déjà un ID "vXXXX"
|
||||||
|
const clean = String(input).trim();
|
||||||
|
const mIdOnly = /^v[0-9A-Za-z]+$/.exec(clean);
|
||||||
|
if (mIdOnly) {
|
||||||
|
id = clean;
|
||||||
|
urlCanonique = `https://rumble.com/${id}`;
|
||||||
|
embedUrl = `https://rumble.com/embed/${id}/`;
|
||||||
|
return { id, urlCanonique, embedUrl };
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2) Si on nous donne une URL
|
||||||
try {
|
try {
|
||||||
const html = await httpGet(`https://rumble.com/${videoId}`);
|
const u = new URL(clean, 'https://rumble.com');
|
||||||
|
// /embed/vXXXX/
|
||||||
|
let m = /\/embed\/(v[0-9A-Za-z]+)/.exec(u.pathname);
|
||||||
|
if (!m) m = /\/(v[0-9A-Za-z]+)(?:[-/.]|$)/.exec(u.pathname);
|
||||||
|
if (!m) {
|
||||||
|
// ancienne forme /video/123 → on ne sait pas convertir de manière fiable
|
||||||
|
const mOld = /\/video\/([0-9A-Za-z]+)/.exec(u.pathname);
|
||||||
|
if (mOld) {
|
||||||
|
// On garde l’URL telle quelle et laissera le parseur de la page extraire le vrai vID.
|
||||||
|
return { id: null, urlCanonique: u.href, embedUrl: null };
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
id = m[1];
|
||||||
|
urlCanonique = `https://rumble.com/${id}`;
|
||||||
|
embedUrl = `https://rumble.com/embed/${id}/`;
|
||||||
|
return { id, urlCanonique, embedUrl };
|
||||||
|
} catch {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------- Parsing robuste d’une PAGE vidéo ------------------ */
|
||||||
|
/**
|
||||||
|
* Source d’autorité pour le vrai ID: le JS inline:
|
||||||
|
* Rumble("play", {..., "video":"vXXXX", ...})
|
||||||
|
* On prend ensuite en fallback: <meta property="og:video"> (souvent /embed/vXXXX/)
|
||||||
|
* puis <link rel="canonical"> ou <meta property="og:url"> (contenant /vXXXX-...).
|
||||||
|
*
|
||||||
|
* NB: ce choix est basé sur l’observation publique: la valeur "video":"vXXXX"
|
||||||
|
* est exactement l’ID attendu par l’embed officiel.
|
||||||
|
*/
|
||||||
|
function extractVideoIdentity($) {
|
||||||
|
// 1) Script "Rumble('play', {... "video":"vXXXX" ...})"
|
||||||
|
// On évite d’exécuter quoi que ce soit; simple regex sur tout le HTML.
|
||||||
|
const html = $.html() || '';
|
||||||
|
let m = /Rumble\(\s*["']play["']\s*,\s*{[^}]*["']video["']\s*:\s*["'](v[0-9A-Za-z]+)["']/s.exec(html);
|
||||||
|
if (m && m[1]) {
|
||||||
|
const id = m[1];
|
||||||
|
return {
|
||||||
|
id,
|
||||||
|
embedUrl: `https://rumble.com/embed/${id}/`,
|
||||||
|
urlCanonique: `https://rumble.com/${id}`
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2) og:video → .../embed/vXXXX/...
|
||||||
|
let embed = $('meta[property="og:video"]').attr('content')
|
||||||
|
|| $('meta[name="twitter:player"]').attr('content');
|
||||||
|
if (embed) {
|
||||||
|
if (embed.startsWith('//')) embed = 'https:' + embed;
|
||||||
|
const mm = /\/embed\/(v[0-9A-Za-z]+)/.exec(embed);
|
||||||
|
if (mm) {
|
||||||
|
const id = mm[1];
|
||||||
|
return { id, embedUrl: `https://rumble.com/embed/${id}/`, urlCanonique: `https://rumble.com/${id}` };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3) Canonical / og:url → .../vXXXX-...
|
||||||
|
let canon = $('link[rel="canonical"]').attr('href')
|
||||||
|
|| $('meta[property="og:url"]').attr('content');
|
||||||
|
if (canon) {
|
||||||
|
const mm = /\/(v[0-9A-Za-z]+)(?:[-/.]|$)/.exec(canon);
|
||||||
|
if (mm) {
|
||||||
|
const id = mm[1];
|
||||||
|
return { id, embedUrl: `https://rumble.com/embed/${id}/`, urlCanonique: `https://rumble.com/${id}` };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* -------------------------- Scraper d’une vidéo -------------------------- */
|
||||||
|
async function scrapeRumbleVideo(videoIdOrUrl) {
|
||||||
|
try {
|
||||||
|
// Accepte /:videoId ou une URL complète.
|
||||||
|
let norm = normalizeRumbleId(videoIdOrUrl);
|
||||||
|
const fetchUrl = norm?.urlCanonique || `https://rumble.com/${videoIdOrUrl}`;
|
||||||
|
const html = await httpGet(fetchUrl);
|
||||||
const $ = cheerio.load(html);
|
const $ = cheerio.load(html);
|
||||||
const title = $('h1.video-title, .video-title h1').first().text().trim() || $('meta[property="og:title"]').attr('content') || '';
|
|
||||||
const thumbnail = $('meta[property="og:image"]').attr('content') || '';
|
// Identité fiable (id + embed + canonique)
|
||||||
const uploaderName = $('.media-by--a, .channel-name').first().text().trim() || '';
|
let ident = extractVideoIdentity($);
|
||||||
const viewsText = $('.rumbles-views, .video-views').first().text().trim();
|
if (!ident) {
|
||||||
const views = parseInt((viewsText || '').replace(/[^0-9]/g, '')) || 0;
|
// dernier recours: ré-essayer avec la page telle quelle si on est venu via /video/123
|
||||||
const durationText = $('meta[property="video:duration"]').attr('content');
|
if (!norm?.id && norm?.urlCanonique) {
|
||||||
const duration = durationText ? parseInt(durationText) : 0;
|
ident = extractVideoIdentity($);
|
||||||
const uploadedDate = $('meta[property="article:published_time"]').attr('content') || '';
|
|
||||||
const description = $('meta[property="og:description"]').attr('content') || '';
|
|
||||||
// Try to extract the official embed URL
|
|
||||||
let embedUrl = $('meta[property="og:video"], meta[name="twitter:player"]').attr('content') || '';
|
|
||||||
if (!embedUrl) {
|
|
||||||
const iframeSrc = $('iframe[src*="/embed/"]').attr('src') || '';
|
|
||||||
embedUrl = iframeSrc || '';
|
|
||||||
}
|
}
|
||||||
// Normalize protocol-less URLs
|
|
||||||
if (embedUrl && embedUrl.startsWith('//')) embedUrl = 'https:' + embedUrl;
|
|
||||||
// Detect canonical URL to extract stable ID
|
|
||||||
const canonicalUrl = $('link[rel="canonical"]').attr('href') || $('meta[property="og:url"]').attr('content') || '';
|
|
||||||
// Normalize/derive the stable Rumble ID (e.g., v464efu)
|
|
||||||
let stableId = videoId;
|
|
||||||
const mEmbed = /\/embed\/(v[0-9A-Za-z]+)/.exec(embedUrl || '');
|
|
||||||
const mCanon = /\/(v[0-9A-Za-z]+)(?:[\-./]|$)/.exec(canonicalUrl || '');
|
|
||||||
if (mEmbed && mEmbed[1]) stableId = mEmbed[1];
|
|
||||||
else if (mCanon && mCanon[1]) stableId = mCanon[1];
|
|
||||||
// If embedUrl is a page URL, convert to embed path as a fallback
|
|
||||||
if (!/\/embed\//.test(embedUrl)) {
|
|
||||||
embedUrl = `https://rumble.com/embed/${stableId}/?autoplay=2&muted=1`;
|
|
||||||
}
|
}
|
||||||
return { videoId: stableId, title: title || 'Untitled Video', thumbnail, uploaderName: uploaderName || 'Unknown Uploader', views, duration, uploadedDate, description, url: `https://rumble.com/${stableId}`, embedUrl, type: 'video' };
|
if (!ident?.id) {
|
||||||
|
return { error: 'Unable to determine Rumble video ID', input: videoIdOrUrl };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Métadonnées robustes
|
||||||
|
const title =
|
||||||
|
$('h1.video-title, .video-title h1').first().text().trim()
|
||||||
|
|| $('meta[property="og:title"]').attr('content') || 'Untitled Video';
|
||||||
|
|
||||||
|
let thumbnail = $('meta[property="og:image"]').attr('content') || '';
|
||||||
|
if (thumbnail && thumbnail.startsWith('//')) thumbnail = 'https:' + thumbnail;
|
||||||
|
|
||||||
|
const uploaderName =
|
||||||
|
$('.media-by--a, .channel-name, a[href*="/c/"]').first().text().trim() || '';
|
||||||
|
|
||||||
|
const viewsText =
|
||||||
|
$('.rumbles-views, .video-views, .media-view-count, [data-view-count]').first().text().trim() || '';
|
||||||
|
const views = parseInt(viewsText.replace(/[^\d]/g, ''), 10) || 0;
|
||||||
|
|
||||||
|
const duration = parseInt($('meta[property="video:duration"]').attr('content') || '', 10) || 0;
|
||||||
|
|
||||||
|
const uploadedDate =
|
||||||
|
$('meta[property="article:published_time"]').attr('content')
|
||||||
|
|| $('time[datetime]').attr('datetime') || '';
|
||||||
|
|
||||||
|
const description =
|
||||||
|
$('meta[property="og:description"]').attr('content')
|
||||||
|
|| $('meta[name="description"]').attr('content') || '';
|
||||||
|
|
||||||
|
// embedUrl final — toujours la forme officielle
|
||||||
|
const embedUrl = ident.embedUrl;
|
||||||
|
|
||||||
|
return {
|
||||||
|
videoId: ident.id,
|
||||||
|
title,
|
||||||
|
thumbnail,
|
||||||
|
uploaderName,
|
||||||
|
views,
|
||||||
|
duration,
|
||||||
|
uploadedDate,
|
||||||
|
description,
|
||||||
|
url: ident.urlCanonique,
|
||||||
|
embedUrl,
|
||||||
|
type: 'video'
|
||||||
|
};
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.error('scrapeRumbleVideo error:', e.message);
|
const msg = (e && e.message) ? e.message : String(e);
|
||||||
return { videoId, error: 'Scraping failed' };
|
return { error: `Scraping failed: ${msg}` };
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ------------------ Scraper de liste (search / browse) ------------------ */
|
||||||
|
function parseDurationToSeconds(text) {
|
||||||
|
if (!text) return 0;
|
||||||
|
// supporte mm:ss ou hh:mm:ss
|
||||||
|
const m = text.trim().match(/^(\d{1,2}):(\d{2})(?::(\d{2}))?$/);
|
||||||
|
if (!m) return 0;
|
||||||
|
const h = parseInt(m[3] || '0', 10), mn = parseInt(m[1] || '0', 10), s = parseInt(m[2] || '0', 10);
|
||||||
|
return h * 3600 + mn * 60 + s;
|
||||||
|
}
|
||||||
|
|
||||||
async function scrapeRumbleList({ q, page = 1, limit = 24, sort = 'viral' }) {
|
async function scrapeRumbleList({ q, page = 1, limit = 24, sort = 'viral' }) {
|
||||||
try {
|
try {
|
||||||
const url = q
|
const url = q
|
||||||
? `https://rumble.com/search/video?q=${encodeURIComponent(q)}&page=${page}`
|
? `https://rumble.com/search/video?q=${encodeURIComponent(q)}&page=${page}`
|
||||||
: `https://rumble.com/videos?sort=${encodeURIComponent(sort)}&page=${page}`;
|
: `https://rumble.com/videos?sort=${encodeURIComponent(sort)}&page=${page}`;
|
||||||
|
|
||||||
const html = await httpGet(url);
|
const html = await httpGet(url);
|
||||||
const $ = cheerio.load(html);
|
const $ = cheerio.load(html);
|
||||||
const items = [];
|
|
||||||
// Try to select video cards; Rumble uses different layouts, so search broadly
|
const found = [];
|
||||||
|
// 1) Cartes "vidéos" standards (li/article/div)
|
||||||
$('a[href^="/v"], a[href^="/video/"]').each((_, el) => {
|
$('a[href^="/v"], a[href^="/video/"]').each((_, el) => {
|
||||||
const a = $(el);
|
const href = $(el).attr('href') || '';
|
||||||
const href = a.attr('href') || '';
|
// On préfère STRICTEMENT l’ID /vXXXX
|
||||||
// Expect href like /vabcdef or /video/abcdef
|
let m = /^\/(v[0-9A-Za-z]+)(?:[-/.]|$)/.exec(href);
|
||||||
const m = href.match(/\/v([A-Za-z0-9]+)/) || href.match(/\/video\/([A-Za-z0-9]+)/);
|
let id = m?.[1] || null;
|
||||||
if (!m) return;
|
|
||||||
const vid = `v${m[1]}`;
|
// Fallback minimaliste pour /video/123 → on ne convertit pas ici; on laissera /video/:id passer au détails qui normalise par parse de la page.
|
||||||
const title = a.attr('title') || a.text().trim();
|
const isLegacy = !id && /^\/video\//.test(href);
|
||||||
// Look around for thumbnail and meta
|
|
||||||
const parent = a.closest('li, article, div');
|
if (!id && !isLegacy) return;
|
||||||
const img = parent.find('img').first();
|
|
||||||
let thumb = img.attr('data-src') || img.attr('src') || '';
|
const card = $(el).closest('li, article, .video-listing-entry, .video-item, .video-card, div');
|
||||||
|
|
||||||
|
const title = (($(el).attr('title') || '') + ' ' + $(el).text()).trim() || card.find('h3, h2, .video-item--title').first().text().trim();
|
||||||
|
|
||||||
|
// Thumb robuste: data-src > src
|
||||||
|
let thumb =
|
||||||
|
card.find('img').first().attr('data-src')
|
||||||
|
|| card.find('img').first().attr('src')
|
||||||
|
|| '';
|
||||||
if (thumb && thumb.startsWith('//')) thumb = 'https:' + thumb;
|
if (thumb && thumb.startsWith('//')) thumb = 'https:' + thumb;
|
||||||
const durationText = parent.find('.video-item--duration, .video-duration, .duration').first().text().trim();
|
|
||||||
const viewsText = parent.find('.video-item--views, .rumbles-views, .views').first().text().trim();
|
const durationText =
|
||||||
const duration = (() => {
|
card.find('.video-item--duration, .video-duration, .duration, .video-item__duration').first().text().trim();
|
||||||
const m = durationText.match(/(\d+):(\d+)(?::(\d+))?/);
|
const viewsText =
|
||||||
if (!m) return 0;
|
card.find('.video-item--views, .rumbles-views, .views, .video-item__views').first().text().trim();
|
||||||
const h = parseInt(m[3] || '0', 10), mn = parseInt(m[1] || '0', 10), s = parseInt(m[2] || '0', 10);
|
|
||||||
return h * 3600 + mn * 60 + s;
|
const duration = parseDurationToSeconds(durationText);
|
||||||
})();
|
const views = parseInt((viewsText || '').replace(/[^\d]/g, ''), 10) || 0;
|
||||||
const views = parseInt((viewsText || '').replace(/[^0-9]/g, '')) || 0;
|
|
||||||
items.push({ videoId: vid, title, thumbnail: thumb, uploaderName: '', views, duration, uploadedDate: '', url: `https://rumble.com/${vid}`, type: 'video' });
|
// Important: on renvoie TOUJOURS une URL canonique cohérente
|
||||||
|
let url = null;
|
||||||
|
let videoId = null;
|
||||||
|
|
||||||
|
if (id) {
|
||||||
|
videoId = id;
|
||||||
|
url = `https://rumble.com/${id}`;
|
||||||
|
} else if (isLegacy) {
|
||||||
|
// Laisse l’endpoint /video/:slug gérer la normalisation
|
||||||
|
videoId = href.replace(/^\//, ''); // "video/123..."
|
||||||
|
url = `https://rumble.com/${videoId}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Filtrage doublons par videoId (id ou "video/123...")
|
||||||
|
const key = videoId;
|
||||||
|
found.push({
|
||||||
|
videoId: key,
|
||||||
|
title,
|
||||||
|
thumbnail: thumb,
|
||||||
|
uploaderName: '',
|
||||||
|
views,
|
||||||
|
duration,
|
||||||
|
uploadedDate: '',
|
||||||
|
url,
|
||||||
|
type: 'video'
|
||||||
});
|
});
|
||||||
// De-duplicate by videoId and slice to limit
|
});
|
||||||
|
|
||||||
|
// De-dupe
|
||||||
const seen = new Set();
|
const seen = new Set();
|
||||||
const unique = [];
|
const unique = [];
|
||||||
for (const it of items) { if (!seen.has(it.videoId)) { seen.add(it.videoId); unique.push(it); } }
|
for (const it of found) {
|
||||||
|
if (!it.videoId) continue;
|
||||||
|
if (seen.has(it.videoId)) continue;
|
||||||
|
seen.add(it.videoId);
|
||||||
|
unique.push(it);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Limite + nextCursor (page-based)
|
||||||
const list = unique.slice(0, limit);
|
const list = unique.slice(0, limit);
|
||||||
const nextCursor = list.length === limit ? String(Number(page) + 1) : null;
|
const nextCursor = list.length === limit ? String(Number(page) + 1) : null;
|
||||||
return { items: list, total: unique.length, page: Number(page), limit: Number(limit), nextCursor };
|
|
||||||
|
return {
|
||||||
|
items: list,
|
||||||
|
total: unique.length,
|
||||||
|
page: Number(page),
|
||||||
|
limit: Number(limit),
|
||||||
|
nextCursor
|
||||||
|
};
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.error('scrapeRumbleList error:', e.message);
|
return {
|
||||||
return { items: [], total: 0, page: Number(page), limit: Number(limit), nextCursor: null };
|
items: [],
|
||||||
|
total: 0,
|
||||||
|
page: Number(page),
|
||||||
|
limit: Number(limit),
|
||||||
|
nextCursor: null,
|
||||||
|
error: (e && e.message) ? e.message : String(e)
|
||||||
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* --------------------------------- Routes -------------------------------- */
|
||||||
router.get('/browse', async (req, res) => {
|
router.get('/browse', async (req, res) => {
|
||||||
const page = parseInt(String(req.query.page || '1'), 10) || 1;
|
const page = Math.max(1, parseInt(String(req.query.page || '1'), 10) || 1);
|
||||||
const limit = Math.min(50, parseInt(String(req.query.limit || '24'), 10) || 24);
|
const limit = Math.min(50, Math.max(1, parseInt(String(req.query.limit || '24'), 10) || 24));
|
||||||
const sort = String(req.query.sort || 'viral');
|
const sort = String(req.query.sort || 'viral');
|
||||||
|
|
||||||
const key = cacheKey('/browse', { page, limit, sort });
|
const key = cacheKey('/browse', { page, limit, sort });
|
||||||
const cached = getCache(key);
|
const cached = getCache(key);
|
||||||
if (cached) return res.json(cached);
|
if (cached) return res.json(cached);
|
||||||
|
|
||||||
const data = await scrapeRumbleList({ page, limit, sort });
|
const data = await scrapeRumbleList({ page, limit, sort });
|
||||||
setCache(key, data);
|
setCache(key, data);
|
||||||
return res.json(data);
|
return res.json(data);
|
||||||
@ -148,37 +350,67 @@ router.get('/browse', async (req, res) => {
|
|||||||
router.get('/search', async (req, res) => {
|
router.get('/search', async (req, res) => {
|
||||||
const q = String(req.query.q || '').trim();
|
const q = String(req.query.q || '').trim();
|
||||||
if (!q) return res.status(400).json({ error: 'Query parameter required' });
|
if (!q) return res.status(400).json({ error: 'Query parameter required' });
|
||||||
const limit = Math.min(50, parseInt(String(req.query.limit || '24'), 10) || 24);
|
|
||||||
|
const limit = Math.min(50, Math.max(1, parseInt(String(req.query.limit || '24'), 10) || 24));
|
||||||
const page = (() => {
|
const page = (() => {
|
||||||
// Support offset-based cursor from frontend by translating offset->page
|
|
||||||
if (req.query.offset != null) {
|
if (req.query.offset != null) {
|
||||||
const offset = parseInt(String(req.query.offset), 10) || 0;
|
const offset = parseInt(String(req.query.offset), 10) || 0;
|
||||||
return Math.floor(offset / limit) + 1;
|
return Math.floor(offset / limit) + 1;
|
||||||
}
|
}
|
||||||
return parseInt(String(req.query.page || '1'), 10) || 1;
|
return Math.max(1, parseInt(String(req.query.page || '1'), 10) || 1);
|
||||||
})();
|
})();
|
||||||
|
|
||||||
const key = cacheKey('/search', { q, page, limit });
|
const key = cacheKey('/search', { q, page, limit });
|
||||||
const cached = getCache(key);
|
const cached = getCache(key);
|
||||||
if (cached) return res.json(cached);
|
if (cached) return res.json(cached);
|
||||||
|
|
||||||
const data = await scrapeRumbleList({ q, page, limit });
|
const data = await scrapeRumbleList({ q, page, limit });
|
||||||
setCache(key, data);
|
setCache(key, data);
|
||||||
return res.json(data);
|
return res.json(data);
|
||||||
});
|
});
|
||||||
|
|
||||||
router.get('/video/:videoId', async (req, res) => {
|
// Endpoint details. Accepte :videoId pouvant être "vXXXX" OU "video/123..."
|
||||||
|
router.get('/video/:videoId(*)', async (req, res) => {
|
||||||
try {
|
try {
|
||||||
const { videoId } = req.params;
|
const raw = String(req.params.videoId);
|
||||||
const key = cacheKey('/video', { videoId });
|
const key = cacheKey('/video', { videoId: raw });
|
||||||
const cached = getCache(key);
|
const cached = getCache(key);
|
||||||
if (cached) return res.json(cached);
|
if (cached) return res.json(cached);
|
||||||
const videoData = await scrapeRumbleVideo(videoId);
|
|
||||||
if (videoData.error) return res.status(404).json({ error: 'Video not found or scraping failed' });
|
// Normalise au maximum avant scrape
|
||||||
setCache(key, videoData);
|
const norm = normalizeRumbleId(raw) || { urlCanonique: `https://rumble.com/${raw}` };
|
||||||
return res.json(videoData);
|
const data = await scrapeRumbleVideo(norm.id || norm.urlCanonique);
|
||||||
|
if (data.error) return res.status(404).json(data);
|
||||||
|
|
||||||
|
setCache(key, data);
|
||||||
|
return res.json(data);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Rumble video error:', error);
|
|
||||||
return res.status(500).json({ error: 'Failed to scrape video' });
|
return res.status(500).json({ error: 'Failed to scrape video' });
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
/* ----------------- Option: “prélecteur” sans pub (non-embed) -------------- */
|
||||||
|
/**
|
||||||
|
* On NE désactive PAS les pubs côté Rumble (pas de param officiel fiable).
|
||||||
|
* Mais on peut servir un “preplay”:
|
||||||
|
* - On affiche miniature/titre.
|
||||||
|
* - Au clic: (A) ouvrir dans Rumble (UX la plus propre), ou (B) injecter l’iframe
|
||||||
|
* officiellement (ce qui déclenchera leur logique pub).
|
||||||
|
* Cette route renvoie juste les meta nécessaires pour ce composant prélecteur.
|
||||||
|
*/
|
||||||
|
router.get('/video/:videoId/preplay', async (req, res) => {
|
||||||
|
const raw = String(req.params.videoId);
|
||||||
|
const norm = normalizeRumbleId(raw) || { urlCanonique: `https://rumble.com/${raw}` };
|
||||||
|
const data = await scrapeRumbleVideo(norm.id || norm.urlCanonique);
|
||||||
|
if (data.error) return res.status(404).json(data);
|
||||||
|
const preplay = {
|
||||||
|
videoId: data.videoId,
|
||||||
|
title: data.title,
|
||||||
|
thumbnail: data.thumbnail,
|
||||||
|
rumbleUrl: data.url, // bouton "Ouvrir sur le site du fournisseur"
|
||||||
|
embedUrl: data.embedUrl // injection différée si l’utilisateur insiste pour lire ici
|
||||||
|
};
|
||||||
|
res.json(preplay);
|
||||||
|
});
|
||||||
|
|
||||||
export default router;
|
export default router;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user