From d6da699c54d6a0f9e99dc5c3a53929efa8806584 Mon Sep 17 00:00:00 2001 From: Bruno Charest Date: Fri, 19 Sep 2025 09:33:54 -0400 Subject: [PATCH] refactor: improve Rumble video scraping with robust ID normalization and metadata parsing --- db/newtube.db | Bin 208896 -> 212992 bytes server/rumble.mjs | 408 ++++++++++++++++++++++++++++++++++++---------- 2 files changed, 320 insertions(+), 88 deletions(-) diff --git a/db/newtube.db b/db/newtube.db index f8413352edbeac8b1b6ccb58ad9a9b8249a242da..0254ffdcc4ab9d3436ef3cd2ff9ef37d89a5b1c2 100644 GIT binary patch delta 3122 zcmd5;ZA=^Y8NV05;g>sTDkvcx#~Q*~dgpub8J`oHoWZ!%!N%Aa;=Lrs7~28vY@cmR zx-Gz@HPxglt*hKJZB47RQk$u)6_Y%LP1?J(7j>N~MccZnU8{Y{s+LvaL$j&-pKTxn z_M!cdvyQjd4A9Dd7l5$JJzKo+rw?Bc6#Alcp~T@ zDozY0qWM$tw3muSMLFg4Mkk66Cl}_%%C2~T6XzpyDPCC!%~x_i8m)ZrsI~IvPHW{% z4!@6cGY&V+Q++Nz#?mZLGBn8u5#~00?WvsCz$*!PKJ1#D4vlfilUZuQnVt%jsjMrN zFD5c0i@XpGMAG8{%CC6(B7sGvJdha(R$8uqt@3#J@R8b$EaPT5igr2g@U)Guy-_~V zgNN~U>|fa1*w3*SunV|zt%aDuDi>CpEw{x~xw`r+w)@hx&=1zmU?{!;Xo4L0EqoK- zsNU5Kzm2y7fTO^N>I5IM*$lpn`G#sF;4Z6q!_tGlf}gg0VzH_e0k?IQEx*Ar+ixwa zmI@X!->@xOJox>VFJm8B|7jbrKDXA=JZC8Yj5gnRoDnNU)&IOVrr%TCrESj2^>0yzp{go5h5Z@=2 z28Yi2>13E5^n}78w&tsJQY=Ll%jFy=3>71%{EEa4PKOxHR-7p$CQm=Y&b!!rT9yJn zWu!b3qBWB|aBgrk=PNJDelF~p91rA}*g(jxnR2N}I^~@W&MfABIVO8Lk@F^|y5I=@fm3?)!#W;{l`JcEcJp zylLt+enZz^rD*sQRL|?j+uJJQGu`S>55NZX;x$AM^J?%I%ozDZqO_24G!99bo+)zM zmO_eWw=7cmT$-sL*tvz&xj5Z8$mxaTqP=l&$g+^4YKQ8BPWVqvcQfc|2tAEHhrXxl z)}?eW>2B)Ds_6vmLsi)e^ES@S@<8{LOK`+^_0 z+ZSVZLkz>mm~F`6cCeJg#l+~_A%+rYA!gr(SU1g4g22Qm4LbVRH7{Jf=0zrB=C44_ zCLe@9ZOy6Jv&cc6$!GEb;oWOOl84maUqcx6gZoi~n)?aTdTAAD!t1TTYxlW8Y}M5l zkyl`Y^(|%VXrYHs=%vL3_!fFE!A*z&&hBK|V=sKsl2$M%NJ zf%TY&&EGR`m^XmK*PGP$*AaHd8+=IJxDJ40Bx+Ir@gmZye)l@^Z@ujT6lqh%A2%Fn zF z@s9hv%Wyw}AgcHmWc_o{CBs4JQf)NRLSrCA#HDE>oR&4iyG!35l%;o$c!OuQ9sSwfOG4Z6@(uH=OciMyYynV>-4CaRlRBp_X zq!?0}kI&7KwIE0>2vU~FIH?6ef*{DaM4ip1?nrpM+5a29;^B4^q0Q}0|3m#Fy`_O9HH^mOXo_^0%8cO`Hds07Lv_W=bJO#lY|Y zAu5g1hGi*5h*LlhL=O-_2Apa-5J9Ci9rO^MF|X(k6T?y=nU|7<2Z<3`DbxkfP()DL zQpkr_Di=Py^8D3wp^#PFb~_U%nLdG0i7vGHykzI>IUp2?%Qb}3ASW{HCnG^W84d<~ zdv$T!c-J07+Y@I8UlEusHoEF0X^@@gBhU-!4p?935A%E^F+81&lieWrQY(Q zod+!+vWt8^bhg&=TQYClLT=qb*6$!^W?7K2)Je{!JZ>OD1YHyHF WlSz7aCR!OjYbN!F#NJHm_lzw delta 946 zcmZ9KOKcle6o%*CvB%ELoqO$=h9pLcL#sqi@GuY0_|YT{IL1vJYZeltsM@D9!$`_{f z@ch2yV|xY@bA|kjFx#_Rj3j%9q6d1TqS9v+-|aVkT|&k;4agWRlI?O_4#%a4AVtKi zEXry~5<^lCoz@?h_sffVW+65*^suIN%`A>7f~-tUWQ0tAA=j5r^rTZ` zJ>!BpmYGkF&!ln(6NY#FIpgJp9ZgYPQsc51SHps)#s0nYU%hf=p|hE2WQf$^-|=<) zAwG={SG<9J*f?DFaQApJPL)sK|1A$(sElGrHtjc23fUr?+$F2p1Fn;rh%PH}TNN7Y z7V@xm4{~MG6#$JgStEP7+vGJ=kKe-u(uF4Rn`i}J#TxfL_Zh+b=Uf>N@QWmfZ=v7# z1UgyqdUVbzw}OnLCFtCOd?(Jcz3%V0O^jW)7==0I+Hh_$lP;gDm;V~|G1uI!t{1Hi z5!|LL0nZn}>}vy(^^gXxRyl&s8Oo~d1gSft05ta|fNCCpkVY@9C4se;go9c15#@20 zDe@{RvQL`m1gy6mthlZ68Mp?R9M25b-ERiY!Y1>5A7HH2b8ybt^dseAyD8L$za+c3 zU$~=am7K>*ToT>jukc~q%yzTK*_$?~ku`6wL)mN#(VY478CYY!z7GFz?zu~SoxkKO?KBSs=!<5dl~&CcT4=^xYNUPtfYeCL`lOZK>G%^6o&x~@ diff --git a/server/rumble.mjs b/server/rumble.mjs index 5a419fd..d283b9b 100644 --- a/server/rumble.mjs +++ b/server/rumble.mjs @@ -5,29 +5,26 @@ import rateLimit from 'express-rate-limit'; const router = express.Router(); -// Rate limiter for Rumble scraping to prevent being blocked +/* ----------------------------- Rate limiting ----------------------------- */ const rumbleLimiter = rateLimit({ - windowMs: 60 * 1000, // 1 min + windowMs: 60 * 1000, max: 20, standardHeaders: true, legacyHeaders: false, - message: { error: 'Too many requests to Rumble API. Please try again later.' } + message: { error: 'Too many requests to Rumble. Please try again later.' } }); - router.use(rumbleLimiter); -// Simple in-memory cache with TTL +/* --------------------------------- Cache -------------------------------- */ const cache = new Map(); const TTL_MS = 60 * 1000; // 60s function cacheKey(path, params) { return `${path}?${new URLSearchParams(params).toString()}`; } - function setCache(key, data) { cache.set(key, { data, expires: Date.now() + TTL_MS }); } - function getCache(key) { const hit = cache.get(key); if (!hit) return null; @@ -35,56 +32,202 @@ function getCache(key) { return hit.data; } +/* ------------------------------- HTTP GET -------------------------------- */ async function httpGet(url) { const resp = await axios.get(url, { headers: { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', - 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', + // UA “desktop” moderne pour minimiser les anti-bot simples + 'User-Agent': + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36', + 'Accept': + 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.8' }, - timeout: 15000 + timeout: 15000, + // Important: pas de redirects inter-domain hasardeux + maxRedirects: 3, + validateStatus: s => s >= 200 && s < 400 }); return resp.data; } -async function scrapeRumbleVideo(videoId) { - try { - const html = await httpGet(`https://rumble.com/${videoId}`); - const $ = cheerio.load(html); - const title = $('h1.video-title, .video-title h1').first().text().trim() || $('meta[property="og:title"]').attr('content') || ''; - const thumbnail = $('meta[property="og:image"]').attr('content') || ''; - const uploaderName = $('.media-by--a, .channel-name').first().text().trim() || ''; - const viewsText = $('.rumbles-views, .video-views').first().text().trim(); - const views = parseInt((viewsText || '').replace(/[^0-9]/g, '')) || 0; - const durationText = $('meta[property="video:duration"]').attr('content'); - const duration = durationText ? parseInt(durationText) : 0; - const uploadedDate = $('meta[property="article:published_time"]').attr('content') || ''; - const description = $('meta[property="og:description"]').attr('content') || ''; - // Try to extract the official embed URL - let embedUrl = $('meta[property="og:video"], meta[name="twitter:player"]').attr('content') || ''; - if (!embedUrl) { - const iframeSrc = $('iframe[src*="/embed/"]').attr('src') || ''; - embedUrl = iframeSrc || ''; - } - // Normalize protocol-less URLs - if (embedUrl && embedUrl.startsWith('//')) embedUrl = 'https:' + embedUrl; - // Detect canonical URL to extract stable ID - const canonicalUrl = $('link[rel="canonical"]').attr('href') || $('meta[property="og:url"]').attr('content') || ''; - // Normalize/derive the stable Rumble ID (e.g., v464efu) - let stableId = videoId; - const mEmbed = /\/embed\/(v[0-9A-Za-z]+)/.exec(embedUrl || ''); - const mCanon = /\/(v[0-9A-Za-z]+)(?:[\-./]|$)/.exec(canonicalUrl || ''); - if (mEmbed && mEmbed[1]) stableId = mEmbed[1]; - else if (mCanon && mCanon[1]) stableId = mCanon[1]; - // If embedUrl is a page URL, convert to embed path as a fallback - if (!/\/embed\//.test(embedUrl)) { - embedUrl = `https://rumble.com/embed/${stableId}/?autoplay=2&muted=1`; - } - return { videoId: stableId, title: title || 'Untitled Video', thumbnail, uploaderName: uploaderName || 'Unknown Uploader', views, duration, uploadedDate, description, url: `https://rumble.com/${stableId}`, embedUrl, type: 'video' }; - } catch (e) { - console.error('scrapeRumbleVideo error:', e.message); - return { videoId, error: 'Scraping failed' }; +/* ------------------------- Utils: normalisation ID ------------------------ */ +/** + * Rumble expose plusieurs formes: + * - Page canoniques: https://rumble.com/v6siqxf-some-title.html + * - Ancienne forme: https://rumble.com/video/12345 + * - URL d’embed officielle: https://rumble.com/embed/v6siqxf/ + * - ID brut attendu: v6siqxf (toujours commence par 'v' + base62) + * + * Cette fonction accepte: ID ou URL et renvoie { id: 'vXXXX', urlCanonique, embedUrl } + */ +function normalizeRumbleId(input, { preferEmbed = true } = {}) { + if (!input) return null; + + let id = null; + let urlCanonique = null; + let embedUrl = null; + + // 1) Si on nous donne déjà un ID "vXXXX" + const clean = String(input).trim(); + const mIdOnly = /^v[0-9A-Za-z]+$/.exec(clean); + if (mIdOnly) { + id = clean; + urlCanonique = `https://rumble.com/${id}`; + embedUrl = `https://rumble.com/embed/${id}/`; + return { id, urlCanonique, embedUrl }; } + + // 2) Si on nous donne une URL + try { + const u = new URL(clean, 'https://rumble.com'); + // /embed/vXXXX/ + let m = /\/embed\/(v[0-9A-Za-z]+)/.exec(u.pathname); + if (!m) m = /\/(v[0-9A-Za-z]+)(?:[-/.]|$)/.exec(u.pathname); + if (!m) { + // ancienne forme /video/123 → on ne sait pas convertir de manière fiable + const mOld = /\/video\/([0-9A-Za-z]+)/.exec(u.pathname); + if (mOld) { + // On garde l’URL telle quelle et laissera le parseur de la page extraire le vrai vID. + return { id: null, urlCanonique: u.href, embedUrl: null }; + } + return null; + } + id = m[1]; + urlCanonique = `https://rumble.com/${id}`; + embedUrl = `https://rumble.com/embed/${id}/`; + return { id, urlCanonique, embedUrl }; + } catch { + return null; + } +} + +/* ---------------------- Parsing robuste d’une PAGE vidéo ------------------ */ +/** + * Source d’autorité pour le vrai ID: le JS inline: + * Rumble("play", {..., "video":"vXXXX", ...}) + * On prend ensuite en fallback: (souvent /embed/vXXXX/) + * puis ou (contenant /vXXXX-...). + * + * NB: ce choix est basé sur l’observation publique: la valeur "video":"vXXXX" + * est exactement l’ID attendu par l’embed officiel. + */ +function extractVideoIdentity($) { + // 1) Script "Rumble('play', {... "video":"vXXXX" ...})" + // On évite d’exécuter quoi que ce soit; simple regex sur tout le HTML. + const html = $.html() || ''; + let m = /Rumble\(\s*["']play["']\s*,\s*{[^}]*["']video["']\s*:\s*["'](v[0-9A-Za-z]+)["']/s.exec(html); + if (m && m[1]) { + const id = m[1]; + return { + id, + embedUrl: `https://rumble.com/embed/${id}/`, + urlCanonique: `https://rumble.com/${id}` + }; + } + + // 2) og:video → .../embed/vXXXX/... + let embed = $('meta[property="og:video"]').attr('content') + || $('meta[name="twitter:player"]').attr('content'); + if (embed) { + if (embed.startsWith('//')) embed = 'https:' + embed; + const mm = /\/embed\/(v[0-9A-Za-z]+)/.exec(embed); + if (mm) { + const id = mm[1]; + return { id, embedUrl: `https://rumble.com/embed/${id}/`, urlCanonique: `https://rumble.com/${id}` }; + } + } + + // 3) Canonical / og:url → .../vXXXX-... + let canon = $('link[rel="canonical"]').attr('href') + || $('meta[property="og:url"]').attr('content'); + if (canon) { + const mm = /\/(v[0-9A-Za-z]+)(?:[-/.]|$)/.exec(canon); + if (mm) { + const id = mm[1]; + return { id, embedUrl: `https://rumble.com/embed/${id}/`, urlCanonique: `https://rumble.com/${id}` }; + } + } + + return null; +} + +/* -------------------------- Scraper d’une vidéo -------------------------- */ +async function scrapeRumbleVideo(videoIdOrUrl) { + try { + // Accepte /:videoId ou une URL complète. + let norm = normalizeRumbleId(videoIdOrUrl); + const fetchUrl = norm?.urlCanonique || `https://rumble.com/${videoIdOrUrl}`; + const html = await httpGet(fetchUrl); + const $ = cheerio.load(html); + + // Identité fiable (id + embed + canonique) + let ident = extractVideoIdentity($); + if (!ident) { + // dernier recours: ré-essayer avec la page telle quelle si on est venu via /video/123 + if (!norm?.id && norm?.urlCanonique) { + ident = extractVideoIdentity($); + } + } + if (!ident?.id) { + return { error: 'Unable to determine Rumble video ID', input: videoIdOrUrl }; + } + + // Métadonnées robustes + const title = + $('h1.video-title, .video-title h1').first().text().trim() + || $('meta[property="og:title"]').attr('content') || 'Untitled Video'; + + let thumbnail = $('meta[property="og:image"]').attr('content') || ''; + if (thumbnail && thumbnail.startsWith('//')) thumbnail = 'https:' + thumbnail; + + const uploaderName = + $('.media-by--a, .channel-name, a[href*="/c/"]').first().text().trim() || ''; + + const viewsText = + $('.rumbles-views, .video-views, .media-view-count, [data-view-count]').first().text().trim() || ''; + const views = parseInt(viewsText.replace(/[^\d]/g, ''), 10) || 0; + + const duration = parseInt($('meta[property="video:duration"]').attr('content') || '', 10) || 0; + + const uploadedDate = + $('meta[property="article:published_time"]').attr('content') + || $('time[datetime]').attr('datetime') || ''; + + const description = + $('meta[property="og:description"]').attr('content') + || $('meta[name="description"]').attr('content') || ''; + + // embedUrl final — toujours la forme officielle + const embedUrl = ident.embedUrl; + + return { + videoId: ident.id, + title, + thumbnail, + uploaderName, + views, + duration, + uploadedDate, + description, + url: ident.urlCanonique, + embedUrl, + type: 'video' + }; + } catch (e) { + const msg = (e && e.message) ? e.message : String(e); + return { error: `Scraping failed: ${msg}` }; + } +} + +/* ------------------ Scraper de liste (search / browse) ------------------ */ +function parseDurationToSeconds(text) { + if (!text) return 0; + // supporte mm:ss ou hh:mm:ss + const m = text.trim().match(/^(\d{1,2}):(\d{2})(?::(\d{2}))?$/); + if (!m) return 0; + const h = parseInt(m[3] || '0', 10), mn = parseInt(m[1] || '0', 10), s = parseInt(m[2] || '0', 10); + return h * 3600 + mn * 60 + s; } async function scrapeRumbleList({ q, page = 1, limit = 24, sort = 'viral' }) { @@ -92,54 +235,113 @@ async function scrapeRumbleList({ q, page = 1, limit = 24, sort = 'viral' }) { const url = q ? `https://rumble.com/search/video?q=${encodeURIComponent(q)}&page=${page}` : `https://rumble.com/videos?sort=${encodeURIComponent(sort)}&page=${page}`; + const html = await httpGet(url); const $ = cheerio.load(html); - const items = []; - // Try to select video cards; Rumble uses different layouts, so search broadly + + const found = []; + // 1) Cartes "vidéos" standards (li/article/div) $('a[href^="/v"], a[href^="/video/"]').each((_, el) => { - const a = $(el); - const href = a.attr('href') || ''; - // Expect href like /vabcdef or /video/abcdef - const m = href.match(/\/v([A-Za-z0-9]+)/) || href.match(/\/video\/([A-Za-z0-9]+)/); - if (!m) return; - const vid = `v${m[1]}`; - const title = a.attr('title') || a.text().trim(); - // Look around for thumbnail and meta - const parent = a.closest('li, article, div'); - const img = parent.find('img').first(); - let thumb = img.attr('data-src') || img.attr('src') || ''; + const href = $(el).attr('href') || ''; + // On préfère STRICTEMENT l’ID /vXXXX + let m = /^\/(v[0-9A-Za-z]+)(?:[-/.]|$)/.exec(href); + let id = m?.[1] || null; + + // Fallback minimaliste pour /video/123 → on ne convertit pas ici; on laissera /video/:id passer au détails qui normalise par parse de la page. + const isLegacy = !id && /^\/video\//.test(href); + + if (!id && !isLegacy) return; + + const card = $(el).closest('li, article, .video-listing-entry, .video-item, .video-card, div'); + + const title = (($(el).attr('title') || '') + ' ' + $(el).text()).trim() || card.find('h3, h2, .video-item--title').first().text().trim(); + + // Thumb robuste: data-src > src + let thumb = + card.find('img').first().attr('data-src') + || card.find('img').first().attr('src') + || ''; if (thumb && thumb.startsWith('//')) thumb = 'https:' + thumb; - const durationText = parent.find('.video-item--duration, .video-duration, .duration').first().text().trim(); - const viewsText = parent.find('.video-item--views, .rumbles-views, .views').first().text().trim(); - const duration = (() => { - const m = durationText.match(/(\d+):(\d+)(?::(\d+))?/); - if (!m) return 0; - const h = parseInt(m[3] || '0', 10), mn = parseInt(m[1] || '0', 10), s = parseInt(m[2] || '0', 10); - return h * 3600 + mn * 60 + s; - })(); - const views = parseInt((viewsText || '').replace(/[^0-9]/g, '')) || 0; - items.push({ videoId: vid, title, thumbnail: thumb, uploaderName: '', views, duration, uploadedDate: '', url: `https://rumble.com/${vid}`, type: 'video' }); + + const durationText = + card.find('.video-item--duration, .video-duration, .duration, .video-item__duration').first().text().trim(); + const viewsText = + card.find('.video-item--views, .rumbles-views, .views, .video-item__views').first().text().trim(); + + const duration = parseDurationToSeconds(durationText); + const views = parseInt((viewsText || '').replace(/[^\d]/g, ''), 10) || 0; + + // Important: on renvoie TOUJOURS une URL canonique cohérente + let url = null; + let videoId = null; + + if (id) { + videoId = id; + url = `https://rumble.com/${id}`; + } else if (isLegacy) { + // Laisse l’endpoint /video/:slug gérer la normalisation + videoId = href.replace(/^\//, ''); // "video/123..." + url = `https://rumble.com/${videoId}`; + } + + // Filtrage doublons par videoId (id ou "video/123...") + const key = videoId; + found.push({ + videoId: key, + title, + thumbnail: thumb, + uploaderName: '', + views, + duration, + uploadedDate: '', + url, + type: 'video' + }); }); - // De-duplicate by videoId and slice to limit + + // De-dupe const seen = new Set(); const unique = []; - for (const it of items) { if (!seen.has(it.videoId)) { seen.add(it.videoId); unique.push(it); } } + for (const it of found) { + if (!it.videoId) continue; + if (seen.has(it.videoId)) continue; + seen.add(it.videoId); + unique.push(it); + } + + // Limite + nextCursor (page-based) const list = unique.slice(0, limit); const nextCursor = list.length === limit ? String(Number(page) + 1) : null; - return { items: list, total: unique.length, page: Number(page), limit: Number(limit), nextCursor }; + + return { + items: list, + total: unique.length, + page: Number(page), + limit: Number(limit), + nextCursor + }; } catch (e) { - console.error('scrapeRumbleList error:', e.message); - return { items: [], total: 0, page: Number(page), limit: Number(limit), nextCursor: null }; + return { + items: [], + total: 0, + page: Number(page), + limit: Number(limit), + nextCursor: null, + error: (e && e.message) ? e.message : String(e) + }; } } +/* --------------------------------- Routes -------------------------------- */ router.get('/browse', async (req, res) => { - const page = parseInt(String(req.query.page || '1'), 10) || 1; - const limit = Math.min(50, parseInt(String(req.query.limit || '24'), 10) || 24); + const page = Math.max(1, parseInt(String(req.query.page || '1'), 10) || 1); + const limit = Math.min(50, Math.max(1, parseInt(String(req.query.limit || '24'), 10) || 24)); const sort = String(req.query.sort || 'viral'); + const key = cacheKey('/browse', { page, limit, sort }); const cached = getCache(key); if (cached) return res.json(cached); + const data = await scrapeRumbleList({ page, limit, sort }); setCache(key, data); return res.json(data); @@ -148,37 +350,67 @@ router.get('/browse', async (req, res) => { router.get('/search', async (req, res) => { const q = String(req.query.q || '').trim(); if (!q) return res.status(400).json({ error: 'Query parameter required' }); - const limit = Math.min(50, parseInt(String(req.query.limit || '24'), 10) || 24); + + const limit = Math.min(50, Math.max(1, parseInt(String(req.query.limit || '24'), 10) || 24)); const page = (() => { - // Support offset-based cursor from frontend by translating offset->page if (req.query.offset != null) { const offset = parseInt(String(req.query.offset), 10) || 0; return Math.floor(offset / limit) + 1; } - return parseInt(String(req.query.page || '1'), 10) || 1; + return Math.max(1, parseInt(String(req.query.page || '1'), 10) || 1); })(); + const key = cacheKey('/search', { q, page, limit }); const cached = getCache(key); if (cached) return res.json(cached); + const data = await scrapeRumbleList({ q, page, limit }); setCache(key, data); return res.json(data); }); -router.get('/video/:videoId', async (req, res) => { +// Endpoint details. Accepte :videoId pouvant être "vXXXX" OU "video/123..." +router.get('/video/:videoId(*)', async (req, res) => { try { - const { videoId } = req.params; - const key = cacheKey('/video', { videoId }); + const raw = String(req.params.videoId); + const key = cacheKey('/video', { videoId: raw }); const cached = getCache(key); if (cached) return res.json(cached); - const videoData = await scrapeRumbleVideo(videoId); - if (videoData.error) return res.status(404).json({ error: 'Video not found or scraping failed' }); - setCache(key, videoData); - return res.json(videoData); + + // Normalise au maximum avant scrape + const norm = normalizeRumbleId(raw) || { urlCanonique: `https://rumble.com/${raw}` }; + const data = await scrapeRumbleVideo(norm.id || norm.urlCanonique); + if (data.error) return res.status(404).json(data); + + setCache(key, data); + return res.json(data); } catch (error) { - console.error('Rumble video error:', error); return res.status(500).json({ error: 'Failed to scrape video' }); } }); +/* ----------------- Option: “prélecteur” sans pub (non-embed) -------------- */ +/** + * On NE désactive PAS les pubs côté Rumble (pas de param officiel fiable). + * Mais on peut servir un “preplay”: + * - On affiche miniature/titre. + * - Au clic: (A) ouvrir dans Rumble (UX la plus propre), ou (B) injecter l’iframe + * officiellement (ce qui déclenchera leur logique pub). + * Cette route renvoie juste les meta nécessaires pour ce composant prélecteur. + */ +router.get('/video/:videoId/preplay', async (req, res) => { + const raw = String(req.params.videoId); + const norm = normalizeRumbleId(raw) || { urlCanonique: `https://rumble.com/${raw}` }; + const data = await scrapeRumbleVideo(norm.id || norm.urlCanonique); + if (data.error) return res.status(404).json(data); + const preplay = { + videoId: data.videoId, + title: data.title, + thumbnail: data.thumbnail, + rumbleUrl: data.url, // bouton "Ouvrir sur le site du fournisseur" + embedUrl: data.embedUrl // injection différée si l’utilisateur insiste pour lire ici + }; + res.json(preplay); +}); + export default router;