From d6da699c54d6a0f9e99dc5c3a53929efa8806584 Mon Sep 17 00:00:00 2001
From: Bruno Charest <bruno.charest@gmail.com>
Date: Fri, 19 Sep 2025 09:33:54 -0400
Subject: [PATCH] refactor: improve Rumble video scraping with robust ID
 normalization and metadata parsing

---
 db/newtube.db     | Bin 208896 -> 212992 bytes
 server/rumble.mjs | 408 ++++++++++++++++++++++++++++++++++++----------
 2 files changed, 320 insertions(+), 88 deletions(-)
diff --git a/db/newtube.db b/db/newtube.db
index f8413352edbeac8b1b6ccb58ad9a9b8249a242da..0254ffdcc4ab9d3436ef3cd2ff9ef37d89a5b1c2 100644
GIT binary patch
delta 3122
zcmd5;ZA=^Y8NV05;g>sTDkvcx#~Q*~dgpub8J`oHoWZ!%!N%Aa;=Lrs7~28vY@cmR
zx-Gz@HPxglt*hKJZB47RQk$u)6_Y%LP1?J(7j>N~MccZnU8{Y{s+LvaL$j&-pKTxn
z_M!c<eL3m>dvyQjd4A9Dd7l5$JJzKo+r<w36A%PBo+OULPrQ02k>w?Bc6#Alcp~T@
zDozY0qWM$tw3muSMLFg4Mkk66Cl}_%%C2~T6XzpyDPCC!%~x_i8m)ZrsI~IvPHW{%
z4!@6cGY&V+Q++Nz#?mZLGBn8u5#~00?WvsCz$*!PKJ1#D4vlfilUZuQnVt%jsjMrN
zFD5c0i@XpGMAG8{%CC6(B7sGvJdha(R$8uqt@3#J@R8b$EaPT5igr2g@U)Guy-_~V
zgNN~U>|fa1*w3*SunV|zt%aDuDi>CpEw{x~xw`r+w)@hx&=1zmU?{!;Xo4L0EqoK-
zsNU5Kzm2y7fTO^N>I5IM*$lpn`G#sF;4Z6q!_tGlf}gg0VzH_e0k?IQEx*Ar+ixwa
zmI@X!->@xOJox>VFJm8B|7jbrKDXA=JZC<C5EbD3L?m%)Br_i`2{GSdEGOqPiOB_C
zGet7#^7yO}9!h%RnZZDKB>8Yj5gnRoDnNU)&IOVrr%TCrESj2^>0yzp{go5h5Z@=2
z28Yi2>13E5^n}78w&tsJQY=Ll%jFy=3>71%{EEa4PKOxHR-7p$CQm=Y&b!!rT9yJn
zWu!b3qBWB|aBgrk=PNJDelF~p91rA}*g(jxnR2N}I^~@W&MfA<iG-ZwCT5B=3siNr
z7mn)OjIJNCoxqZ2zv(T@CWh&MVf@Iz>BIVO8Lk@F^|y5I=@fm3?)!#W;{l`JcEcJp
zylLt+enZz^rD*sQRL|?j+uJJQGu`S>55NZX;x$AM^J?%I%ozDZqO_24G!99bo+)zM
zmO_eWw=7cmT$-sL*tvz&xj5Z8$mxaTqP=l&$g+^4YKQ8BPWVqvcQfc|2tAEHhrXxl
z)}?eW>2B)Ds_6vmLsi)e^ES@S@<8{LOK`+^_0<N%xH+2Y1CV1k#JIQ^w*@i0n|Dz>
z+ZSVZLkz>mm~F`6cCeJg#l+~_A%+rYA!gr(SU1g4g22Qm4LbVRH7{Jf=0zrB=C44_
zCLe@9ZOy6Jv&cc6$!GEb;oWOOl84maUqcx6gZoi~n)?aTdTAAD!t1TTYxlW8Y}M5l
zkyl`Y^(|<wy-g)vLXOm`Pz6=U>%VXrYHs=%vL3_!fFE!A*z&&hBK|V=sKsl2$M%NJ
zf%TY&&EGR`m^XmK*PGP$*AaHd8+=IJxDJ40Bx+Ir@gmZye)l@^Z@ujT6lqh%A2%Fn
z<?g8deH}S;cQ<G@9Eu`1g5701VkldVLs9c!)0in@T+$QhYsl}cqwqtOmS!4XPPW>F
z@s9hv%Wyw}AgcHmWc_o{CBs4JQf)NRLSr<sfL@N`1U^<}UPoYa^`1A87Pvb4SLBBS
zpU0sRgg0X?)|-}JL#3u?Oy4wS4PV#4totRpiTn)yj`?}uY_wV;(0gDoQh*Y8j_T`U
ztCzdbKbmxMj+s)W2haz&noT2=<6|qG6ef(fcPt-RKH6_GLH+x8j|w~CUISZGr1~0x
zX4Sb~ls~LZ)9qT{kqU6{pjd&f{;C%}s%y0Yx{Rhggx#`jTCZD{%tuX|raY*`mrUKu
zFmDAxv_ViUg#E`)C4S}bPRm92cx%Xh56(I%1}JgG<ESS(zI?pqW#36wd2+q`(nGNE
zY`v8|ndziNj1~(DAxlJ@7!iFU@o>CA#HDE>oR&4iyG!35l%;o<E*A(dunfv6rDh(8
z7nCjl6JI4F`Qm(*h-79X@+>$c!OuQ9sSwfOG4Z6@(uH=OciMyYynV>-4CaRlRBp_X
zq!?0}kI&7KwIE0>2vU~FIH?6ef*{DaM4ip1?nrpM+5a29;^B4^q0Q}<J%f9PiP{|d
zUooMnmf}A<6WWa1j|mN5>0|3m#Fy`_O9HH^mOXo_^0%8cO`Hds07Lv_W=bJO#lY|Y
zAu5g1hGi*5h*LlhL=O-_2Apa-5J9Ci9rO^MF|X(k6T?y=nU|7<2Z<3`DbxkfP()DL
zQpkr_Di=Py^8D3wp^#PFb~_U%nLdG0i7vGHykzI>IUp2?%Qb}3ASW{HCnG^W84d<~
zdv$T!c-J07+Y@I8UlEusHoEF<xO+3#7xNAoNo|S!Uy)JM3i~PO&{o@San$M1@D+px
zGrPUxi;cOs-&+J8*`kB2uW?UN$w)~>0X^@@gBhU-!4p?935A%E^F+81&lieWrQY(Q
zod+!+vWt8^bhg&=TQYClLT=qb*6$!^<b4Xey^-(8WE)>W?7K2)Je{!JZ>OD1YHyHF
WlSz7aCR!OjYbN!F#NJHm_<sQI_>lzw

delta 946
zcmZ9KOKcle6o%*CvB%ELoqO$=h9pLcL#sqi@GuY0_|YT{IL1v<oY%xbUeE-`t<pFT
zClw?_KwP9ycA*0rwW6q{RB9{L4KmW8q*S~mSO8TeR8&yA05*V7sgzAXWm@qloW)uE
zN9Q}gj{fCke)$Gk^*hrPMJcZYc7gQUqq$ji?wN^^1J6bWGO>JYZeltsM@D9!$`_{f
z@ch2yV|xY@bA|kjFx#_Rj3j%9q6d1TqS9v+-|aVkT|&k;4agWRlI?O_4#%a4AVtKi
zEXry~5<^l<m+sLkM~X>Coz@?h_sffVW+65*^suIN%`A>7f~-tUWQ0tAA=j5r^rTZ`
zJ>!BpmYGkF&!ln(6NY#FIpgJp9ZgYPQsc51SHps)#s0nYU%hf=p|hE2WQf$^-|=<)
zAwG={SG<9J*f?DFaQApJPL)sK|1A$(sElGrHtjc23fUr?+$F2p1Fn;rh%PH}TNN7Y
z7V@xm4{~MG6#$JgStEP7+vGJ=kKe-u(uF4Rn`i}J#TxfL_Zh+b=Uf>N@QWmfZ=v7#
z1UgyqdUVbzw}OnLCFtCOd?(Jcz3%V0O^jW)7==0I+Hh_$lP;gDm;V~|G1uI!t{1Hi
z5!|LL0nZn}>}vy(^^gXxRyl&s8Oo~d1gSft05ta|fNCCpkVY@9C4se;go9c15#@20
zDe@{RvQL`m1gy6mthlZ68Mp?R9M25b-ERiY!Y1>5A7HH2b8ybt^dseAyD8L$za+c3
zU$~=am7K>*ToT>jukc~q%yzTK*_$?~ku`6wL)mN#(VY478CYY!z7GFz?zu~S<l<jc
z)28~ZA2#3v_lYP-5lVst;%dIhImy+sa#h5gbfuVihYx-LE}?!(vS0jnSsj-lU`Z)g
z!^ejG+mj{G4j?e|U%?IkHl}{t-Pnn-kQCKLIj$)2uqLRgYIR(L)u7p?G~Zb(xu)9P
zkZSK%rF?cVC5F4w!;enQAIv<VtC9WVR{yu~okReGV-)dVFZzl5lzpq}4fl#`mU-TJ
z-tjTLX<IvHcs+yG(*e2`*lAKEJN$wui`LOb`m)<m91Q2p=@z=BR*s8mTv7y4(Xxjc
qcHr1>oxkKO?KBSs=!<5dl~&CcT4=^xYNUPtfYeCL`lOZK>G%^6o&x~@

diff --git a/server/rumble.mjs b/server/rumble.mjs
index 5a419fd..d283b9b 100644
--- a/server/rumble.mjs
+++ b/server/rumble.mjs
@@ -5,29 +5,26 @@ import rateLimit from 'express-rate-limit';
 
 const router = express.Router();
 
-// Rate limiter for Rumble scraping to prevent being blocked
+/* ----------------------------- Rate limiting ----------------------------- */
 const rumbleLimiter = rateLimit({
-  windowMs: 60 * 1000, // 1 min
+  windowMs: 60 * 1000,
   max: 20,
   standardHeaders: true,
   legacyHeaders: false,
-  message: { error: 'Too many requests to Rumble API. Please try again later.' }
+  message: { error: 'Too many requests to Rumble. Please try again later.' }
 });
-
 router.use(rumbleLimiter);
 
-// Simple in-memory cache with TTL
+/* --------------------------------- Cache -------------------------------- */
 const cache = new Map();
 const TTL_MS = 60 * 1000; // 60s
 
 function cacheKey(path, params) {
   return `${path}?${new URLSearchParams(params).toString()}`;
 }
-
 function setCache(key, data) {
   cache.set(key, { data, expires: Date.now() + TTL_MS });
 }
-
 function getCache(key) {
   const hit = cache.get(key);
   if (!hit) return null;
@@ -35,56 +32,202 @@ function getCache(key) {
   return hit.data;
 }
 
+/* ------------------------------- HTTP GET -------------------------------- */
 async function httpGet(url) {
   const resp = await axios.get(url, {
     headers: {
-      'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
-      'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
+      // UA “desktop” moderne pour minimiser les anti-bot simples
+      'User-Agent':
+        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36',
+      'Accept':
+        'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
       'Accept-Language': 'en-US,en;q=0.8'
     },
-    timeout: 15000
+    timeout: 15000,
+    // Important: pas de redirects inter-domain hasardeux
+    maxRedirects: 3,
+    validateStatus: s => s >= 200 && s < 400
   });
   return resp.data;
 }
 
-async function scrapeRumbleVideo(videoId) {
-  try {
-    const html = await httpGet(`https://rumble.com/${videoId}`);
-    const $ = cheerio.load(html);
-    const title = $('h1.video-title, .video-title h1').first().text().trim() || $('meta[property="og:title"]').attr('content') || '';
-    const thumbnail = $('meta[property="og:image"]').attr('content') || '';
-    const uploaderName = $('.media-by--a, .channel-name').first().text().trim() || '';
-    const viewsText = $('.rumbles-views, .video-views').first().text().trim();
-    const views = parseInt((viewsText || '').replace(/[^0-9]/g, '')) || 0;
-    const durationText = $('meta[property="video:duration"]').attr('content');
-    const duration = durationText ? parseInt(durationText) : 0;
-    const uploadedDate = $('meta[property="article:published_time"]').attr('content') || '';
-    const description = $('meta[property="og:description"]').attr('content') || '';
-    // Try to extract the official embed URL
-    let embedUrl = $('meta[property="og:video"], meta[name="twitter:player"]').attr('content') || '';
-    if (!embedUrl) {
-      const iframeSrc = $('iframe[src*="/embed/"]').attr('src') || '';
-      embedUrl = iframeSrc || '';
-    }
-    // Normalize protocol-less URLs
-    if (embedUrl && embedUrl.startsWith('//')) embedUrl = 'https:' + embedUrl;
-    // Detect canonical URL to extract stable ID
-    const canonicalUrl = $('link[rel="canonical"]').attr('href') || $('meta[property="og:url"]').attr('content') || '';
-    // Normalize/derive the stable Rumble ID (e.g., v464efu)
-    let stableId = videoId;
-    const mEmbed = /\/embed\/(v[0-9A-Za-z]+)/.exec(embedUrl || '');
-    const mCanon = /\/(v[0-9A-Za-z]+)(?:[\-./]|$)/.exec(canonicalUrl || '');
-    if (mEmbed && mEmbed[1]) stableId = mEmbed[1];
-    else if (mCanon && mCanon[1]) stableId = mCanon[1];
-    // If embedUrl is a page URL, convert to embed path as a fallback
-    if (!/\/embed\//.test(embedUrl)) {
-      embedUrl = `https://rumble.com/embed/${stableId}/?autoplay=2&muted=1`;
-    }
-    return { videoId: stableId, title: title || 'Untitled Video', thumbnail, uploaderName: uploaderName || 'Unknown Uploader', views, duration, uploadedDate, description, url: `https://rumble.com/${stableId}`, embedUrl, type: 'video' };
-  } catch (e) {
-    console.error('scrapeRumbleVideo error:', e.message);
-    return { videoId, error: 'Scraping failed' };
+/* ------------------------- Utils: normalisation ID ------------------------ */
+/**
+ * Rumble expose plusieurs formes:
+ * - Page canoniques:         https://rumble.com/v6siqxf-some-title.html
+ * - Ancienne forme:          https://rumble.com/video/12345
+ * - URL d’embed officielle:  https://rumble.com/embed/v6siqxf/
+ * - ID brut attendu:         v6siqxf (toujours commence par 'v' + base62)
+ *
+ * Cette fonction accepte: ID ou URL et renvoie { id: 'vXXXX', urlCanonique, embedUrl }
+ */
+function normalizeRumbleId(input, { preferEmbed = true } = {}) {
+  if (!input) return null;
+
+  let id = null;
+  let urlCanonique = null;
+  let embedUrl = null;
+
+  // 1) Si on nous donne déjà un ID "vXXXX"
+  const clean = String(input).trim();
+  const mIdOnly = /^v[0-9A-Za-z]+$/.exec(clean);
+  if (mIdOnly) {
+    id = clean;
+    urlCanonique = `https://rumble.com/${id}`;
+    embedUrl = `https://rumble.com/embed/${id}/`;
+    return { id, urlCanonique, embedUrl };
   }
+
+  // 2) Si on nous donne une URL
+  try {
+    const u = new URL(clean, 'https://rumble.com');
+    // /embed/vXXXX/
+    let m = /\/embed\/(v[0-9A-Za-z]+)/.exec(u.pathname);
+    if (!m) m = /\/(v[0-9A-Za-z]+)(?:[-/.]|$)/.exec(u.pathname);
+    if (!m) {
+      // ancienne forme /video/123 → on ne sait pas convertir de manière fiable
+      const mOld = /\/video\/([0-9A-Za-z]+)/.exec(u.pathname);
+      if (mOld) {
+        // On garde l’URL telle quelle et laissera le parseur de la page extraire le vrai vID.
+        return { id: null, urlCanonique: u.href, embedUrl: null };
+      }
+      return null;
+    }
+    id = m[1];
+    urlCanonique = `https://rumble.com/${id}`;
+    embedUrl = `https://rumble.com/embed/${id}/`;
+    return { id, urlCanonique, embedUrl };
+  } catch {
+    return null;
+  }
+}
+
+/* ---------------------- Parsing robuste d’une PAGE vidéo ------------------ */
+/**
+ * Source d’autorité pour le vrai ID: le JS inline:
+ *   Rumble("play", {..., "video":"vXXXX", ...})
+ * On prend ensuite en fallback: <meta property="og:video"> (souvent /embed/vXXXX/)
+ * puis <link rel="canonical"> ou <meta property="og:url"> (contenant /vXXXX-...).
+ *
+ * NB: ce choix est basé sur l’observation publique: la valeur "video":"vXXXX"
+ * est exactement l’ID attendu par l’embed officiel.
+ */
+function extractVideoIdentity($) {
+  // 1) Script "Rumble('play', {... "video":"vXXXX" ...})"
+  //   On évite d’exécuter quoi que ce soit; simple regex sur tout le HTML.
+  const html = $.html() || '';
+  let m = /Rumble\(\s*["']play["']\s*,\s*{[^}]*["']video["']\s*:\s*["'](v[0-9A-Za-z]+)["']/s.exec(html);
+  if (m && m[1]) {
+    const id = m[1];
+    return {
+      id,
+      embedUrl: `https://rumble.com/embed/${id}/`,
+      urlCanonique: `https://rumble.com/${id}`
+    };
+  }
+
+  // 2) og:video → .../embed/vXXXX/...
+  let embed = $('meta[property="og:video"]').attr('content')
+          || $('meta[name="twitter:player"]').attr('content');
+  if (embed) {
+    if (embed.startsWith('//')) embed = 'https:' + embed;
+    const mm = /\/embed\/(v[0-9A-Za-z]+)/.exec(embed);
+    if (mm) {
+      const id = mm[1];
+      return { id, embedUrl: `https://rumble.com/embed/${id}/`, urlCanonique: `https://rumble.com/${id}` };
+    }
+  }
+
+  // 3) Canonical / og:url → .../vXXXX-...
+  let canon = $('link[rel="canonical"]').attr('href')
+          || $('meta[property="og:url"]').attr('content');
+  if (canon) {
+    const mm = /\/(v[0-9A-Za-z]+)(?:[-/.]|$)/.exec(canon);
+    if (mm) {
+      const id = mm[1];
+      return { id, embedUrl: `https://rumble.com/embed/${id}/`, urlCanonique: `https://rumble.com/${id}` };
+    }
+  }
+
+  return null;
+}
+
+/* -------------------------- Scraper d’une vidéo -------------------------- */
+async function scrapeRumbleVideo(videoIdOrUrl) {
+  try {
+    // Accepte /:videoId ou une URL complète.
+    let norm = normalizeRumbleId(videoIdOrUrl);
+    const fetchUrl = norm?.urlCanonique || `https://rumble.com/${videoIdOrUrl}`;
+    const html = await httpGet(fetchUrl);
+    const $ = cheerio.load(html);
+
+    // Identité fiable (id + embed + canonique)
+    let ident = extractVideoIdentity($);
+    if (!ident) {
+      // dernier recours: ré-essayer avec la page telle quelle si on est venu via /video/123
+      if (!norm?.id && norm?.urlCanonique) {
+        ident = extractVideoIdentity($);
+      }
+    }
+    if (!ident?.id) {
+      return { error: 'Unable to determine Rumble video ID', input: videoIdOrUrl };
+    }
+
+    // Métadonnées robustes
+    const title =
+      $('h1.video-title, .video-title h1').first().text().trim()
+      || $('meta[property="og:title"]').attr('content') || 'Untitled Video';
+
+    let thumbnail = $('meta[property="og:image"]').attr('content') || '';
+    if (thumbnail && thumbnail.startsWith('//')) thumbnail = 'https:' + thumbnail;
+
+    const uploaderName =
+      $('.media-by--a, .channel-name, a[href*="/c/"]').first().text().trim() || '';
+
+    const viewsText =
+      $('.rumbles-views, .video-views, .media-view-count, [data-view-count]').first().text().trim() || '';
+    const views = parseInt(viewsText.replace(/[^\d]/g, ''), 10) || 0;
+
+    const duration = parseInt($('meta[property="video:duration"]').attr('content') || '', 10) || 0;
+
+    const uploadedDate =
+      $('meta[property="article:published_time"]').attr('content')
+      || $('time[datetime]').attr('datetime') || '';
+
+    const description =
+      $('meta[property="og:description"]').attr('content')
+      || $('meta[name="description"]').attr('content') || '';
+
+    // embedUrl final — toujours la forme officielle
+    const embedUrl = ident.embedUrl;
+
+    return {
+      videoId: ident.id,
+      title,
+      thumbnail,
+      uploaderName,
+      views,
+      duration,
+      uploadedDate,
+      description,
+      url: ident.urlCanonique,
+      embedUrl,
+      type: 'video'
+    };
+  } catch (e) {
+    const msg = (e && e.message) ? e.message : String(e);
+    return { error: `Scraping failed: ${msg}` };
+  }
+}
+
+/* ------------------ Scraper de liste (search / browse) ------------------ */
+function parseDurationToSeconds(text) {
+  if (!text) return 0;
+  // supporte mm:ss ou hh:mm:ss
+  const m = text.trim().match(/^(\d{1,2}):(\d{2})(?::(\d{2}))?$/);
+  if (!m) return 0;
+  const h = parseInt(m[3] || '0', 10), mn = parseInt(m[1] || '0', 10), s = parseInt(m[2] || '0', 10);
+  return h * 3600 + mn * 60 + s;
 }
 
 async function scrapeRumbleList({ q, page = 1, limit = 24, sort = 'viral' }) {
@@ -92,54 +235,113 @@ async function scrapeRumbleList({ q, page = 1, limit = 24, sort = 'viral' }) {
     const url = q
       ? `https://rumble.com/search/video?q=${encodeURIComponent(q)}&page=${page}`
       : `https://rumble.com/videos?sort=${encodeURIComponent(sort)}&page=${page}`;
+
     const html = await httpGet(url);
     const $ = cheerio.load(html);
-    const items = [];
-    // Try to select video cards; Rumble uses different layouts, so search broadly
+
+    const found = [];
+    // 1) Cartes "vidéos" standards (li/article/div)
     $('a[href^="/v"], a[href^="/video/"]').each((_, el) => {
-      const a = $(el);
-      const href = a.attr('href') || '';
-      // Expect href like /vabcdef or /video/abcdef
-      const m = href.match(/\/v([A-Za-z0-9]+)/) || href.match(/\/video\/([A-Za-z0-9]+)/);
-      if (!m) return;
-      const vid = `v${m[1]}`;
-      const title = a.attr('title') || a.text().trim();
-      // Look around for thumbnail and meta
-      const parent = a.closest('li, article, div');
-      const img = parent.find('img').first();
-      let thumb = img.attr('data-src') || img.attr('src') || '';
+      const href = $(el).attr('href') || '';
+      // On préfère STRICTEMENT l’ID /vXXXX
+      let m = /^\/(v[0-9A-Za-z]+)(?:[-/.]|$)/.exec(href);
+      let id = m?.[1] || null;
+
+      // Fallback minimaliste pour /video/123 → on ne convertit pas ici; on laissera /video/:id passer au détails qui normalise par parse de la page.
+      const isLegacy = !id && /^\/video\//.test(href);
+
+      if (!id && !isLegacy) return;
+
+      const card = $(el).closest('li, article, .video-listing-entry, .video-item, .video-card, div');
+
+      const title = (($(el).attr('title') || '') + ' ' + $(el).text()).trim() || card.find('h3, h2, .video-item--title').first().text().trim();
+
+      // Thumb robuste: data-src > src
+      let thumb =
+        card.find('img').first().attr('data-src')
+        || card.find('img').first().attr('src')
+        || '';
       if (thumb && thumb.startsWith('//')) thumb = 'https:' + thumb;
-      const durationText = parent.find('.video-item--duration, .video-duration, .duration').first().text().trim();
-      const viewsText = parent.find('.video-item--views, .rumbles-views, .views').first().text().trim();
-      const duration = (() => {
-        const m = durationText.match(/(\d+):(\d+)(?::(\d+))?/);
-        if (!m) return 0;
-        const h = parseInt(m[3] || '0', 10), mn = parseInt(m[1] || '0', 10), s = parseInt(m[2] || '0', 10);
-        return h * 3600 + mn * 60 + s;
-      })();
-      const views = parseInt((viewsText || '').replace(/[^0-9]/g, '')) || 0;
-      items.push({ videoId: vid, title, thumbnail: thumb, uploaderName: '', views, duration, uploadedDate: '', url: `https://rumble.com/${vid}`, type: 'video' });
+
+      const durationText =
+        card.find('.video-item--duration, .video-duration, .duration, .video-item__duration').first().text().trim();
+      const viewsText =
+        card.find('.video-item--views, .rumbles-views, .views, .video-item__views').first().text().trim();
+
+      const duration = parseDurationToSeconds(durationText);
+      const views = parseInt((viewsText || '').replace(/[^\d]/g, ''), 10) || 0;
+
+      // Important: on renvoie TOUJOURS une URL canonique cohérente
+      let url = null;
+      let videoId = null;
+
+      if (id) {
+        videoId = id;
+        url = `https://rumble.com/${id}`;
+      } else if (isLegacy) {
+        // Laisse l’endpoint /video/:slug gérer la normalisation
+        videoId = href.replace(/^\//, ''); // "video/123..."
+        url = `https://rumble.com/${videoId}`;
+      }
+
+      // Filtrage doublons par videoId (id ou "video/123...")
+      const key = videoId;
+      found.push({
+        videoId: key,
+        title,
+        thumbnail: thumb,
+        uploaderName: '',
+        views,
+        duration,
+        uploadedDate: '',
+        url,
+        type: 'video'
+      });
     });
-    // De-duplicate by videoId and slice to limit
+
+    // De-dupe
     const seen = new Set();
     const unique = [];
-    for (const it of items) { if (!seen.has(it.videoId)) { seen.add(it.videoId); unique.push(it); } }
+    for (const it of found) {
+      if (!it.videoId) continue;
+      if (seen.has(it.videoId)) continue;
+      seen.add(it.videoId);
+      unique.push(it);
+    }
+
+    // Limite + nextCursor (page-based)
     const list = unique.slice(0, limit);
     const nextCursor = list.length === limit ? String(Number(page) + 1) : null;
-    return { items: list, total: unique.length, page: Number(page), limit: Number(limit), nextCursor };
+
+    return {
+      items: list,
+      total: unique.length,
+      page: Number(page),
+      limit: Number(limit),
+      nextCursor
+    };
   } catch (e) {
-    console.error('scrapeRumbleList error:', e.message);
-    return { items: [], total: 0, page: Number(page), limit: Number(limit), nextCursor: null };
+    return {
+      items: [],
+      total: 0,
+      page: Number(page),
+      limit: Number(limit),
+      nextCursor: null,
+      error: (e && e.message) ? e.message : String(e)
+    };
   }
 }
 
+/* --------------------------------- Routes -------------------------------- */
 router.get('/browse', async (req, res) => {
-  const page = parseInt(String(req.query.page || '1'), 10) || 1;
-  const limit = Math.min(50, parseInt(String(req.query.limit || '24'), 10) || 24);
+  const page = Math.max(1, parseInt(String(req.query.page || '1'), 10) || 1);
+  const limit = Math.min(50, Math.max(1, parseInt(String(req.query.limit || '24'), 10) || 24));
   const sort = String(req.query.sort || 'viral');
+
   const key = cacheKey('/browse', { page, limit, sort });
   const cached = getCache(key);
   if (cached) return res.json(cached);
+
   const data = await scrapeRumbleList({ page, limit, sort });
   setCache(key, data);
   return res.json(data);
@@ -148,37 +350,67 @@ router.get('/browse', async (req, res) => {
 router.get('/search', async (req, res) => {
   const q = String(req.query.q || '').trim();
   if (!q) return res.status(400).json({ error: 'Query parameter required' });
-  const limit = Math.min(50, parseInt(String(req.query.limit || '24'), 10) || 24);
+
+  const limit = Math.min(50, Math.max(1, parseInt(String(req.query.limit || '24'), 10) || 24));
   const page = (() => {
-    // Support offset-based cursor from frontend by translating offset->page
     if (req.query.offset != null) {
       const offset = parseInt(String(req.query.offset), 10) || 0;
       return Math.floor(offset / limit) + 1;
     }
-    return parseInt(String(req.query.page || '1'), 10) || 1;
+    return Math.max(1, parseInt(String(req.query.page || '1'), 10) || 1);
   })();
+
   const key = cacheKey('/search', { q, page, limit });
   const cached = getCache(key);
   if (cached) return res.json(cached);
+
   const data = await scrapeRumbleList({ q, page, limit });
   setCache(key, data);
   return res.json(data);
 });
 
-router.get('/video/:videoId', async (req, res) => {
+// Endpoint details. Accepte :videoId pouvant être "vXXXX" OU "video/123..."
+router.get('/video/:videoId(*)', async (req, res) => {
   try {
-    const { videoId } = req.params;
-    const key = cacheKey('/video', { videoId });
+    const raw = String(req.params.videoId);
+    const key = cacheKey('/video', { videoId: raw });
     const cached = getCache(key);
     if (cached) return res.json(cached);
-    const videoData = await scrapeRumbleVideo(videoId);
-    if (videoData.error) return res.status(404).json({ error: 'Video not found or scraping failed' });
-    setCache(key, videoData);
-    return res.json(videoData);
+
+    // Normalise au maximum avant scrape
+    const norm = normalizeRumbleId(raw) || { urlCanonique: `https://rumble.com/${raw}` };
+    const data = await scrapeRumbleVideo(norm.id || norm.urlCanonique);
+    if (data.error) return res.status(404).json(data);
+
+    setCache(key, data);
+    return res.json(data);
   } catch (error) {
-    console.error('Rumble video error:', error);
     return res.status(500).json({ error: 'Failed to scrape video' });
   }
 });
 
+/* ----------------- Option: “prélecteur” sans pub (non-embed) -------------- */
+/**
+ * On NE désactive PAS les pubs côté Rumble (pas de param officiel fiable).
+ * Mais on peut servir un “preplay”:
+ *  - On affiche miniature/titre.
+ *  - Au clic: (A) ouvrir dans Rumble (UX la plus propre), ou (B) injecter l’iframe
+ *    officiellement (ce qui déclenchera leur logique pub).
+ * Cette route renvoie juste les meta nécessaires pour ce composant prélecteur.
+ */
+router.get('/video/:videoId/preplay', async (req, res) => {
+  const raw = String(req.params.videoId);
+  const norm = normalizeRumbleId(raw) || { urlCanonique: `https://rumble.com/${raw}` };
+  const data = await scrapeRumbleVideo(norm.id || norm.urlCanonique);
+  if (data.error) return res.status(404).json(data);
+  const preplay = {
+    videoId: data.videoId,
+    title: data.title,
+    thumbnail: data.thumbnail,
+    rumbleUrl: data.url,              // bouton "Ouvrir sur le site du fournisseur"
+    embedUrl: data.embedUrl           // injection différée si l’utilisateur insiste pour lire ici
+  };
+  res.json(preplay);
+});
+
 export default router;