skytracker.services.browser.WebBrowser#

class skytracker.services.browser.WebBrowser(headless: bool = True, args: list[str] = None)#

Bases: object

Playwright browser for retrieving webpages

Methods

`__init__`	Create a new Playwright browser instance using Chromium
`get_images_from_page`	Get image (and corresponding) detail URLs from a web page
`get_page`	Get a website page
`start`	Open the browser
`stop`	Close the browser

__init__(headless: bool = True, args: list[str] = None) → None#

Create a new Playwright browser instance using Chromium

Parameters:

headless (bool, optional) – whether to launch Chromium headless. Defaults to True.
args (list[str], optional) – Chromium launch arguments. Defaults to [’–no-sandbox’].

async get_images_from_page(url: str, timeout: int = 10000, limit: int = 0, trusted_domains: list[str] | None = None) → list[dict[Literal['image', 'detail'], str]]#

Get image (and corresponding) detail URLs from a web page

Parameters:

url (str) – URL to fetch images from
timeout (int, optional) – timeout in milliseconds. Defaults to 10000 ms.
limit (int, optional) – maximum number of results to return (0=all). Defaults to 0 (all).
trusted_domains (list[str], optional) – only take from these domains. Defaults to None.

Returns:

image and detail URLs

Return type:

list[dict[Literal[‘image’, ‘detail’], str]]

async get_page(url: str, timeout: int = 10000, wait_for: str | None = None) → Page#

Get a website page

Parameters:

url (str) – URL to retrieve
timeout (int, optional) – timeout in milliseconds. Defaults to 10000 ms.
wait_for (str, optional) – element selector to wait for. Defaults to None.

Returns:

loaded web page

Return type:

Page

async start() → None#: Open the browser

async stop() → None#: Close the browser

Table of Contents

skytracker.services.browser.WebBrowser#