From fe1a2be364c423f63e9a8e3f491a8fe1d996e9bf Mon Sep 17 00:00:00 2001 From: Bruno Charest Date: Fri, 29 May 2026 13:33:04 -0400 Subject: [PATCH] =?UTF-8?q?fix:=20stem=5Ftoken/stem=5Ftokens=20=E2=80=94?= =?UTF-8?q?=20try/except=20sur=20crash=20snowballstemmer=20(IndexError=20s?= =?UTF-8?q?ur=20tokens=20exotiques)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/search.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/backend/search.py b/backend/search.py index 096b2c9..6654b0b 100644 --- a/backend/search.py +++ b/backend/search.py @@ -44,13 +44,22 @@ def _get_stemmer(): def stem_token(token: str) -> str: """Reduce a word to its French stem (e.g. 'mangeons' → 'mang').""" - return _get_stemmer().stemWord(token) + try: + return _get_stemmer().stemWord(token) + except Exception: + return token # fallback: keep original token if stemmer crashes def stem_tokens(tokens: list[str] | set[str]) -> set[str]: """Return the set of unique stems for a list of tokens.""" - s = _get_stemmer() - return {s.stemWord(t) for t in tokens} + result = set() + for t in tokens: + try: + stemmed = stem_token(t) + except Exception: + stemmed = t + result.add(stemmed) + return result # ---------------------------------------------------------------------------