Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import asyncio
import logging
import sys
from dataclasses import dataclass
from datetime import timedelta
from itertools import cycle
Expand Down Expand Up @@ -911,3 +912,29 @@ async def request_handler(_context: AdaptivePlaywrightCrawlingContext) -> None:
await crawler.run(test_urls[:1])

mocked_handler.assert_called()


@pytest.mark.parametrize(
('optional_module_name'),
[
pytest.param('playwright', id='playwright'),
pytest.param('jaro', id='jaro'),
pytest.param('sklearn', id='sklearn'),
],
)
def test_import_error_handled(optional_module_name: str) -> None:
# Block the package and all its cached submodules to prevent submodule cache entries
# from bypassing the blocked top-level package.
blocked = {
mod_name: None
for mod_name in sys.modules
if mod_name == optional_module_name or mod_name.startswith(f'{optional_module_name}.')
}

with patch.dict('sys.modules', blocked):
for mod_name in list(sys.modules):
if mod_name == 'crawlee.crawlers' or mod_name.startswith('crawlee.crawlers._adaptive_playwright'):
sys.modules.pop(mod_name, None)

with pytest.raises(ImportError):
from crawlee.crawlers import AdaptivePlaywrightCrawler # noqa: F401 PLC0415
12 changes: 12 additions & 0 deletions tests/unit/crawlers/_beautifulsoup/test_beautifulsoup_crawler.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import asyncio
import sys
from datetime import timedelta
from typing import TYPE_CHECKING
from unittest import mock
Expand Down Expand Up @@ -489,3 +490,14 @@ async def request_handler(context: BeautifulSoupCrawlingContext) -> None:
mock.call(str(server_url / 'page_3')),
]
visit.assert_has_calls(expected_visit_calls, any_order=True)


def test_import_error_handled() -> None:
# Simulate ImportError for BeautifulSoup
with mock.patch.dict('sys.modules', {'bs4': None}):
# Invalidate BeautifulSoupCrawler import
for mod_name in list(sys.modules):
if mod_name == 'crawlee.crawlers' or mod_name.startswith('crawlee.crawlers._beautifulsoup'):
sys.modules.pop(mod_name, None)
with pytest.raises(ImportError):
from crawlee.crawlers import BeautifulSoupCrawler # noqa: F401 PLC0415
16 changes: 4 additions & 12 deletions tests/unit/crawlers/_parsel/test_parsel_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,24 +218,16 @@ async def test_handle_blocked_status_code(server_url: URL, http_client: HttpClie
assert crawler._statistics.error_tracker.total == 1


# TODO: Remove the skip mark when the test is fixed:
# https://github.com/apify/crawlee-python/issues/838
@pytest.mark.skip(reason='The test does not work with `crawlee._utils.try_import.ImportWrapper`.')
def test_import_error_handled() -> None:
# Simulate ImportError for parsel
with mock.patch.dict('sys.modules', {'parsel': None}):
# Invalidate ParselCrawler import
sys.modules.pop('crawlee.crawlers', None)
sys.modules.pop('crawlee.crawlers._parsel', None)
with pytest.raises(ImportError) as import_error:
for mod_name in list(sys.modules):
if mod_name == 'crawlee.crawlers' or mod_name.startswith('crawlee.crawlers._parsel'):
sys.modules.pop(mod_name, None)
with pytest.raises(ImportError):
from crawlee.crawlers import ParselCrawler # noqa: F401 PLC0415

# Check if the raised ImportError contains the expected message
assert str(import_error.value) == (
"To import this, you need to install the 'parsel' extra."
"For example, if you use pip, run `pip install 'crawlee[parsel]'`."
)


async def test_json(server_url: URL, http_client: HttpClient) -> None:
crawler = ParselCrawler(http_client=http_client)
Expand Down
13 changes: 13 additions & 0 deletions tests/unit/crawlers/_playwright/test_playwright_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import asyncio
import json
import logging
import sys
from datetime import timedelta
from typing import TYPE_CHECKING, Any, Literal
from unittest import mock
Expand Down Expand Up @@ -1239,3 +1240,15 @@ async def failed_handler(context: BasicCrawlingContext | PlaywrightCrawlingConte

assert error_handler_calls == [HELLO_WORLD.decode(), HELLO_WORLD.decode()]
assert failed_handler_calls == [HELLO_WORLD.decode()]


def test_import_error_handled() -> None:
blocked = {
mod_name: None for mod_name in sys.modules if mod_name == 'playwright' or mod_name.startswith('playwright.')
}
with mock.patch.dict('sys.modules', blocked):
for mod_name in list(sys.modules):
if mod_name.startswith('crawlee.crawlers._playwright'):
sys.modules.pop(mod_name, None)
with pytest.raises(ImportError):
from crawlee.crawlers._playwright import PlaywrightCrawler # noqa: F401 PLC0415
13 changes: 13 additions & 0 deletions tests/unit/crawlers/_stagehand/test_stagehand_crawler.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import sys
from typing import TYPE_CHECKING
from unittest.mock import AsyncMock, MagicMock, patch

Expand Down Expand Up @@ -165,3 +166,15 @@ async def handler(context: StagehandCrawlingContext) -> None:
assert isinstance(method_mock.call_args.kwargs['page'], Page)

assert argument in method_mock.call_args.kwargs


def test_import_error_handled() -> None:
blocked = {
mod_name: None for mod_name in sys.modules if mod_name == 'stagehand' or mod_name.startswith('stagehand.')
}
with patch.dict('sys.modules', blocked):
for mod_name in list(sys.modules):
if mod_name.startswith(('crawlee.crawlers._stagehand', 'crawlee.browsers._stagehand_browser_plugin')):
sys.modules.pop(mod_name, None)
with pytest.raises(ImportError):
from crawlee.crawlers._stagehand import StagehandCrawler # noqa: F401 PLC0415
24 changes: 24 additions & 0 deletions tests/unit/http_clients/test_http_clients.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from __future__ import annotations

import importlib
import os
import sys
from typing import TYPE_CHECKING
from unittest.mock import patch

import pytest
from curl_cffi import CurlHttpVersion
Expand Down Expand Up @@ -263,3 +266,24 @@ async def test_stream_rejects_non_http_scheme(http_client: HttpClient) -> None:
with pytest.raises(ValidationError):
async with http_client.stream('gopher://127.0.0.1:6379/_PING'):
pass


@pytest.mark.parametrize(
('optional_module_name', 'import_path'),
[
pytest.param('curl_cffi', 'crawlee.http_clients._curl_impersonate', id='curl_impersonate'),
pytest.param('httpx', 'crawlee.http_clients._httpx', id='httpx'),
],
)
def test_import_error_handled(optional_module_name: str, import_path: str) -> None:
blocked = {
mod_name: None
for mod_name in sys.modules
if mod_name == optional_module_name or mod_name.startswith(f'{optional_module_name}.')
}
with patch.dict('sys.modules', blocked):
for mod_name in list(sys.modules):
if mod_name.startswith(import_path):
sys.modules.pop(mod_name, None)
with pytest.raises(ImportError):
importlib.import_module(import_path)
15 changes: 15 additions & 0 deletions tests/unit/otel/test_crawler_instrumentor.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import io
import json
import re
import sys
from unittest import mock
from unittest.mock import patch

import pytest
from opentelemetry.sdk.resources import Resource
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import ConsoleSpanExporter, SimpleSpanProcessor
Expand Down Expand Up @@ -103,3 +106,15 @@ async def test_crawler_instrumentor_capability(server_url: URL) -> None:

# Check that trace_ids of unrelated traces are not the same.
assert telemetry_data[0]['context']['trace_id'] != telemetry_data[-1]['context']['trace_id']


def test_import_error_handled() -> None:
blocked = {
mod_name: None
for mod_name in sys.modules
if mod_name == 'opentelemetry' or mod_name.startswith('opentelemetry.')
}
with patch.dict('sys.modules', blocked):
sys.modules.pop('crawlee.otel.crawler_instrumentor', None)
with pytest.raises(ImportError):
from crawlee.otel.crawler_instrumentor import CrawlerInstrumentor # noqa: F401 PLC0415
16 changes: 16 additions & 0 deletions tests/unit/storage_clients/_redis/test_redis_storage_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from __future__ import annotations

import sys
from unittest.mock import patch

import pytest


def test_import_error_handled() -> None:
blocked = {mod_name: None for mod_name in sys.modules if mod_name == 'redis' or mod_name.startswith('redis.')}
with patch.dict('sys.modules', blocked):
for mod_name in list(sys.modules):
if mod_name.startswith('crawlee.storage_clients._redis'):
sys.modules.pop(mod_name, None)
with pytest.raises(ImportError):
from crawlee.storage_clients._redis import RedisStorageClient # noqa: F401 PLC0415
15 changes: 15 additions & 0 deletions tests/unit/storage_clients/_sql/test_sql_storage_client.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from __future__ import annotations

import sys
from typing import TYPE_CHECKING
from unittest.mock import patch

import pytest
from sqlalchemy import text
from sqlalchemy.ext.asyncio import create_async_engine

Expand Down Expand Up @@ -49,3 +52,15 @@ async def test_sqlite_wal_mode_not_applied_with_custom_engine(tmp_path: Path) ->
async with engine.begin() as conn:
result = await conn.execute(text('PRAGMA journal_mode'))
assert result.scalar() != 'wal'


def test_import_error_handled() -> None:
blocked = {
mod_name: None for mod_name in sys.modules if mod_name == 'sqlalchemy' or mod_name.startswith('sqlalchemy.')
}
with patch.dict('sys.modules', blocked):
for mod_name in list(sys.modules):
if mod_name.startswith('crawlee.storage_clients._sql'):
sys.modules.pop(mod_name, None)
with pytest.raises(ImportError):
from crawlee.storage_clients._sql import SqlStorageClient # noqa: F401 PLC0415
28 changes: 27 additions & 1 deletion tests/unit/test_cli.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from __future__ import annotations

import os
from unittest.mock import ANY, Mock
import sys
from unittest.mock import ANY, Mock, patch

import pytest
import readchar
Expand Down Expand Up @@ -251,3 +252,28 @@ def test_create_existing_folder_interactive_multiple_attempts(
'install_project': True,
},
)


@pytest.mark.parametrize(
('optional_module_name'),
[
pytest.param('cookiecutter', id='cookiecutter'),
pytest.param('inquirer', id='inquirer'),
pytest.param('rich', id='rich'),
pytest.param('typer', id='typer'),
],
)
def test_import_error_handled(optional_module_name: str) -> None:
# Block the package and all its submodules to prevent
# cached submodule entries from bypassing the blocked top-level package.
blocked = {
mod_name: None
for mod_name in sys.modules
if mod_name == optional_module_name or mod_name.startswith(f'{optional_module_name}.')
}

with patch.dict('sys.modules', blocked):
sys.modules.pop('crawlee._cli', None)

with pytest.raises(ImportError):
from crawlee._cli import cli # noqa: F401 PLC0415
Loading