Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/brightdata/scrapers/api_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ async def trigger(
dataset_id: str,
include_errors: bool = True,
sdk_function: Optional[str] = None,
extra_params: Optional[Dict[str, str]] = None,
) -> Optional[str]:
"""
Trigger dataset collection and get snapshot_id.
Expand All @@ -54,6 +55,7 @@ async def trigger(
dataset_id: Bright Data dataset identifier
include_errors: Include error records in results
sdk_function: SDK function name for monitoring
extra_params: Additional query parameters (e.g., for discovery endpoints)

Returns:
snapshot_id if successful, None otherwise
Expand All @@ -69,6 +71,9 @@ async def trigger(
if sdk_function:
params["sdk_function"] = sdk_function

if extra_params:
params.update(extra_params)

async with self.engine.post_to_url(
self.TRIGGER_URL, json_data=payload, params=params
) as response:
Expand Down
1 change: 1 addition & 0 deletions src/brightdata/scrapers/instagram/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,7 @@ async def _discover_with_params(
include_errors=True,
normalize_func=None,
sdk_function=sdk_function,
extra_params={"type": "discover_new", "discover_by": "url"},
)

if is_single and isinstance(result.data, list) and len(result.data) == 1:
Expand Down
3 changes: 3 additions & 0 deletions src/brightdata/scrapers/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ async def execute(
include_errors: bool = True,
normalize_func: Optional[Callable[[Any], Any]] = None,
sdk_function: Optional[str] = None,
extra_params: Optional[Dict[str, str]] = None,
) -> ScrapeResult:
"""
Execute complete trigger/poll/fetch workflow.
Expand All @@ -64,6 +65,7 @@ async def execute(
include_errors: Include error records
normalize_func: Optional function to normalize result data
sdk_function: SDK function name for monitoring
extra_params: Additional query parameters (e.g., for discovery endpoints)

Returns:
ScrapeResult with data or error
Expand All @@ -76,6 +78,7 @@ async def execute(
dataset_id=dataset_id,
include_errors=include_errors,
sdk_function=sdk_function,
extra_params=extra_params,
)
except APIError as e:
return ScrapeResult(
Expand Down
76 changes: 76 additions & 0 deletions tests/unit/test_instagram.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,3 +329,79 @@ def test_can_import_from_instagram_submodule(self):
assert IG.__name__ == "InstagramScraper"
assert IGSearch is not None
assert IGSearch.__name__ == "InstagramSearchScraper"


class TestInstagramDiscoverExtraParams:
"""Test Instagram discover endpoints include required extra_params."""

def test_workflow_executor_execute_accepts_extra_params(self):
"""Test WorkflowExecutor.execute accepts extra_params parameter."""
import inspect
from brightdata.scrapers.workflow import WorkflowExecutor

sig = inspect.signature(WorkflowExecutor.execute)
assert "extra_params" in sig.parameters

def test_api_client_trigger_accepts_extra_params(self):
"""Test DatasetAPIClient.trigger accepts extra_params parameter."""
import inspect
from brightdata.scrapers.api_client import DatasetAPIClient

sig = inspect.signature(DatasetAPIClient.trigger)
assert "extra_params" in sig.parameters

def test_discover_posts_passes_extra_params(self):
"""Test Instagram search posts passes discovery extra_params to workflow executor."""
from unittest.mock import AsyncMock, patch

scraper = InstagramSearchScraper(bearer_token="test_token_123456789")

# Mock the workflow executor's execute method
with patch.object(scraper.workflow_executor, "execute", new_callable=AsyncMock) as mock_execute:
# Set up mock return value
from brightdata.models import ScrapeResult

mock_execute.return_value = ScrapeResult(
success=True,
data=[{"test": "data"}],
platform="instagram",
)

# Call the posts method (need to run async)
import asyncio

asyncio.run(scraper.posts(url="https://instagram.com/test"))

# Verify execute was called with extra_params
mock_execute.assert_called_once()
call_kwargs = mock_execute.call_args.kwargs
assert "extra_params" in call_kwargs
assert call_kwargs["extra_params"] == {"type": "discover_new", "discover_by": "url"}

def test_discover_reels_passes_extra_params(self):
"""Test Instagram search reels passes discovery extra_params to workflow executor."""
from unittest.mock import AsyncMock, patch

scraper = InstagramSearchScraper(bearer_token="test_token_123456789")

# Mock the workflow executor's execute method
with patch.object(scraper.workflow_executor, "execute", new_callable=AsyncMock) as mock_execute:
# Set up mock return value
from brightdata.models import ScrapeResult

mock_execute.return_value = ScrapeResult(
success=True,
data=[{"test": "data"}],
platform="instagram",
)

# Call the reels method (need to run async)
import asyncio

asyncio.run(scraper.reels(url="https://instagram.com/test"))

# Verify execute was called with extra_params
mock_execute.assert_called_once()
call_kwargs = mock_execute.call_args.kwargs
assert "extra_params" in call_kwargs
assert call_kwargs["extra_params"] == {"type": "discover_new", "discover_by": "url"}