skytracker.services.browser.WebBrowser#

class skytracker.services.browser.WebBrowser(headless: bool = True, args: list[str] = None)#

Bases: object

Playwright browser for retrieving webpages

Methods

__init__

Create a new Playwright browser instance using Chromium

get_images_from_page

Get image (and corresponding) detail URLs from a web page

get_page

Get a website page

start

Open the browser

stop

Close the browser

__init__(headless: bool = True, args: list[str] = None) None#

Create a new Playwright browser instance using Chromium

Parameters:
  • headless (bool, optional) – whether to launch Chromium headless. Defaults to True.

  • args (list[str], optional) – Chromium launch arguments. Defaults to [’–no-sandbox’].

async get_images_from_page(url: str, timeout: int = 10000, limit: int = 0, trusted_domains: list[str] | None = None) list[dict[Literal['image', 'detail'], str]]#

Get image (and corresponding) detail URLs from a web page

Parameters:
  • url (str) – URL to fetch images from

  • timeout (int, optional) – timeout in milliseconds. Defaults to 10000 ms.

  • limit (int, optional) – maximum number of results to return (0=all). Defaults to 0 (all).

  • trusted_domains (list[str], optional) – only take from these domains. Defaults to None.

Returns:

image and detail URLs

Return type:

list[dict[Literal[‘image’, ‘detail’], str]]

async get_page(url: str, timeout: int = 10000, wait_for: str | None = None) Page#

Get a website page

Parameters:
  • url (str) – URL to retrieve

  • timeout (int, optional) – timeout in milliseconds. Defaults to 10000 ms.

  • wait_for (str, optional) – element selector to wait for. Defaults to None.

Returns:

loaded web page

Return type:

Page

async start() None#

Open the browser

async stop() None#

Close the browser