Imago/tests/test_scraper_extended.py
Bruno Charest cc99fea20a
Some checks failed
CI / Lint & Format (push) Has been cancelled
CI / Tests (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / Docker Build (push) Has been cancelled
Add comprehensive test suite for image processing and related services
- Implement tests for database generator to ensure proper session handling.
- Create tests for EXIF extraction and conversion functions.
- Add tests for image-related endpoints, ensuring proper data retrieval and isolation between clients.
- Develop tests for OCR functionality, including language detection and text extraction.
- Introduce tests for the image processing pipeline, covering success and failure scenarios.
- Validate rate limiting functionality and ensure independent counters for different clients.
- Implement scraper tests to verify HTML content fetching and error handling.
- Add unit tests for various services, including storage and filename generation.
- Establish worker entry point for ARQ to handle background image processing tasks.
2026-02-24 11:22:10 -05:00

46 lines
1.5 KiB
Python

import pytest
import respx
from httpx import Response
from app.services.scraper import fetch_page_content
@pytest.mark.asyncio
@respx.mock
async def test_fetch_page_content_success():
url = "https://example.com"
html_content = """
<html>
<head><title>Test Title</title><meta name="description" content="Test Description"></head>
<body>
<article>
<p>This is a long enough paragraph to be captured by the scraper logic which requires > 30 chars.</p>
<p>Another long paragraph that should be joined with the previous one for the final text.</p>
</article>
</body>
</html>
"""
respx.get(url).mock(return_value=Response(200, text=html_content))
result = await fetch_page_content(url)
assert result["title"] == "Test Title"
assert result["description"] == "Test Description"
assert "This is a long enough paragraph" in result["text"]
assert result["error"] is None
@pytest.mark.asyncio
@respx.mock
async def test_fetch_page_content_http_error():
url = "https://example.com/404"
respx.get(url).mock(return_value=Response(404))
result = await fetch_page_content(url)
assert result["error"] == "HTTP 404"
@pytest.mark.asyncio
@respx.mock
async def test_fetch_page_content_request_error():
url = "https://broken.url"
respx.get(url).mock(side_effect=Exception("Connection reset"))
result = await fetch_page_content(url)
assert "Connection reset" in result["error"]