import pytest
import respx
from httpx import Response
from app.services.scraper import fetch_page_content
@pytest.mark.asyncio
@respx.mock
async def test_fetch_page_content_success():
url = "https://example.com"
html_content = """
Test Title
This is a long enough paragraph to be captured by the scraper logic which requires > 30 chars.
Another long paragraph that should be joined with the previous one for the final text.
"""
respx.get(url).mock(return_value=Response(200, text=html_content))
result = await fetch_page_content(url)
assert result["title"] == "Test Title"
assert result["description"] == "Test Description"
assert "This is a long enough paragraph" in result["text"]
assert result["error"] is None
@pytest.mark.asyncio
@respx.mock
async def test_fetch_page_content_http_error():
url = "https://example.com/404"
respx.get(url).mock(return_value=Response(404))
result = await fetch_page_content(url)
assert result["error"] == "HTTP 404"
@pytest.mark.asyncio
@respx.mock
async def test_fetch_page_content_request_error():
url = "https://broken.url"
respx.get(url).mock(side_effect=Exception("Connection reset"))
result = await fetch_page_content(url)
assert "Connection reset" in result["error"]