- Implement tests for database generator to ensure proper session handling. - Create tests for EXIF extraction and conversion functions. - Add tests for image-related endpoints, ensuring proper data retrieval and isolation between clients. - Develop tests for OCR functionality, including language detection and text extraction. - Introduce tests for the image processing pipeline, covering success and failure scenarios. - Validate rate limiting functionality and ensure independent counters for different clients. - Implement scraper tests to verify HTML content fetching and error handling. - Add unit tests for various services, including storage and filename generation. - Establish worker entry point for ARQ to handle background image processing tasks.
42 lines
1.5 KiB
Python
42 lines
1.5 KiB
Python
import pytest
|
|
from unittest.mock import patch, MagicMock
|
|
from app.services.ocr_service import extract_text, _detect_language
|
|
|
|
def test_detect_language():
|
|
assert _detect_language("C'est une phrase en français.") == "fr"
|
|
assert _detect_language("This is a sentence in English.") == "en"
|
|
assert _detect_language("") == "unknown"
|
|
assert _detect_language("123456789") == "unknown"
|
|
|
|
@patch("app.services.ocr_service.pytesseract")
|
|
@patch("app.services.ocr_service.PILImage")
|
|
@patch("app.services.ocr_service.Path.exists", return_value=True)
|
|
@patch("app.services.ocr_service.settings")
|
|
def test_extract_text_success(mock_settings, mock_path_exists, mock_pil, mock_tesseract):
|
|
mock_settings.OCR_ENABLED = True
|
|
mock_settings.OCR_LANGUAGES = "fra+eng"
|
|
mock_settings.TESSERACT_CMD = None
|
|
|
|
# Mock image_to_data
|
|
mock_tesseract.image_to_data.return_value = {
|
|
"conf": ["90", "80", "-1", "70"]
|
|
}
|
|
mock_tesseract.Output.DICT = "dict"
|
|
|
|
# Mock image_to_string
|
|
mock_tesseract.image_to_string.return_value = "This is a test OCR output."
|
|
|
|
result = extract_text("fake/path.jpg")
|
|
|
|
assert result["has_text"] is True
|
|
assert result["text"] == "This is a test OCR output."
|
|
assert result["confidence"] == 0.8 # (90+80+70)/3 / 100 = 0.8
|
|
assert result["language"] == "en"
|
|
|
|
@patch("app.services.ocr_service.settings")
|
|
def test_extract_text_disabled(mock_settings):
|
|
mock_settings.OCR_ENABLED = False
|
|
result = extract_text("any/path.jpg")
|
|
assert result["has_text"] is False
|
|
assert result["text"] is None
|