Imago/tests/test_ocr_extended.py
Bruno Charest cc99fea20a
Some checks failed
CI / Lint & Format (push) Has been cancelled
CI / Tests (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / Docker Build (push) Has been cancelled
Add comprehensive test suite for image processing and related services
- Implement tests for database generator to ensure proper session handling.
- Create tests for EXIF extraction and conversion functions.
- Add tests for image-related endpoints, ensuring proper data retrieval and isolation between clients.
- Develop tests for OCR functionality, including language detection and text extraction.
- Introduce tests for the image processing pipeline, covering success and failure scenarios.
- Validate rate limiting functionality and ensure independent counters for different clients.
- Implement scraper tests to verify HTML content fetching and error handling.
- Add unit tests for various services, including storage and filename generation.
- Establish worker entry point for ARQ to handle background image processing tasks.
2026-02-24 11:22:10 -05:00

42 lines
1.5 KiB
Python

import pytest
from unittest.mock import patch, MagicMock
from app.services.ocr_service import extract_text, _detect_language
def test_detect_language():
assert _detect_language("C'est une phrase en français.") == "fr"
assert _detect_language("This is a sentence in English.") == "en"
assert _detect_language("") == "unknown"
assert _detect_language("123456789") == "unknown"
@patch("app.services.ocr_service.pytesseract")
@patch("app.services.ocr_service.PILImage")
@patch("app.services.ocr_service.Path.exists", return_value=True)
@patch("app.services.ocr_service.settings")
def test_extract_text_success(mock_settings, mock_path_exists, mock_pil, mock_tesseract):
mock_settings.OCR_ENABLED = True
mock_settings.OCR_LANGUAGES = "fra+eng"
mock_settings.TESSERACT_CMD = None
# Mock image_to_data
mock_tesseract.image_to_data.return_value = {
"conf": ["90", "80", "-1", "70"]
}
mock_tesseract.Output.DICT = "dict"
# Mock image_to_string
mock_tesseract.image_to_string.return_value = "This is a test OCR output."
result = extract_text("fake/path.jpg")
assert result["has_text"] is True
assert result["text"] == "This is a test OCR output."
assert result["confidence"] == 0.8 # (90+80+70)/3 / 100 = 0.8
assert result["language"] == "en"
@patch("app.services.ocr_service.settings")
def test_extract_text_disabled(mock_settings):
mock_settings.OCR_ENABLED = False
result = extract_text("any/path.jpg")
assert result["has_text"] is False
assert result["text"] is None