import pytest from unittest.mock import patch, MagicMock from app.services.ocr_service import extract_text, _detect_language def test_detect_language(): assert _detect_language("C'est une phrase en français.") == "fr" assert _detect_language("This is a sentence in English.") == "en" assert _detect_language("") == "unknown" assert _detect_language("123456789") == "unknown" @patch("app.services.ocr_service.pytesseract") @patch("app.services.ocr_service.PILImage") @patch("app.services.ocr_service.Path.exists", return_value=True) @patch("app.services.ocr_service.settings") def test_extract_text_success(mock_settings, mock_path_exists, mock_pil, mock_tesseract): mock_settings.OCR_ENABLED = True mock_settings.OCR_LANGUAGES = "fra+eng" mock_settings.TESSERACT_CMD = None # Mock image_to_data mock_tesseract.image_to_data.return_value = { "conf": ["90", "80", "-1", "70"] } mock_tesseract.Output.DICT = "dict" # Mock image_to_string mock_tesseract.image_to_string.return_value = "This is a test OCR output." result = extract_text("fake/path.jpg") assert result["has_text"] is True assert result["text"] == "This is a test OCR output." assert result["confidence"] == 0.8 # (90+80+70)/3 / 100 = 0.8 assert result["language"] == "en" @patch("app.services.ocr_service.settings") def test_extract_text_disabled(mock_settings): mock_settings.OCR_ENABLED = False result = extract_text("any/path.jpg") assert result["has_text"] is False assert result["text"] is None