import pytest import respx from httpx import Response from app.services.scraper import fetch_page_content @pytest.mark.asyncio @respx.mock async def test_fetch_page_content_success(): url = "https://example.com" html_content = """ Test Title

This is a long enough paragraph to be captured by the scraper logic which requires > 30 chars.

Another long paragraph that should be joined with the previous one for the final text.

""" respx.get(url).mock(return_value=Response(200, text=html_content)) result = await fetch_page_content(url) assert result["title"] == "Test Title" assert result["description"] == "Test Description" assert "This is a long enough paragraph" in result["text"] assert result["error"] is None @pytest.mark.asyncio @respx.mock async def test_fetch_page_content_http_error(): url = "https://example.com/404" respx.get(url).mock(return_value=Response(404)) result = await fetch_page_content(url) assert result["error"] == "HTTP 404" @pytest.mark.asyncio @respx.mock async def test_fetch_page_content_request_error(): url = "https://broken.url" respx.get(url).mock(side_effect=Exception("Connection reset")) result = await fetch_page_content(url) assert "Connection reset" in result["error"]