from __future__ import annotations from urllib.parse import urlparse from typing import Iterator from http.cookies import Morsel try: from curl_cffi.requests import Session, Response from .curl_cffi import StreamResponse, StreamSession, FormData has_curl_cffi = True except ImportError: from typing import Type as Session, Type as Response from .aiohttp import StreamResponse, StreamSession, FormData has_curl_cffi = False try: import webview import asyncio has_webview = True except ImportError: has_webview = False try: import nodriver from nodriver.cdp.network import CookieParam has_nodriver = True except ImportError: has_nodriver = False from .. import debug from .raise_for_status import raise_for_status from ..webdriver import WebDriver, WebDriverSession from ..webdriver import bypass_cloudflare, get_driver_cookies from ..errors import MissingRequirementsError from ..typing import Cookies from .defaults import DEFAULT_HEADERS, WEBVIEW_HAEDERS async def get_args_from_webview(url: str) -> dict: if not has_webview: raise MissingRequirementsError('Install "webview" package') window = webview.create_window("", url, hidden=True) await asyncio.sleep(2) body = None while body is None: try: await asyncio.sleep(1) body = window.dom.get_element("body:not(.no-js)") except: ... headers = { **WEBVIEW_HAEDERS, "User-Agent": window.evaluate_js("this.navigator.userAgent"), "Accept-Language": window.evaluate_js("this.navigator.language"), "Referer": window.real_url } cookies = [list(*cookie.items()) for cookie in window.get_cookies()] cookies = {name: cookie.value for name, cookie in cookies} window.destroy() return {"headers": headers, "cookies": cookies} def get_args_from_browser( url: str, webdriver: WebDriver = None, proxy: str = None, timeout: int = 120, do_bypass_cloudflare: bool = True, virtual_display: bool = False ) -> dict: """ Create a Session object using a WebDriver to handle cookies and headers. Args: url (str): The URL to navigate to using the WebDriver. webdriver (WebDriver, optional): The WebDriver instance to use. proxy (str, optional): Proxy server to use for the Session. timeout (int, optional): Timeout in seconds for the WebDriver. Returns: Session: A Session object configured with cookies and headers from the WebDriver. """ with WebDriverSession(webdriver, "", proxy=proxy, virtual_display=virtual_display) as driver: if do_bypass_cloudflare: bypass_cloudflare(driver, url, timeout) headers = { **DEFAULT_HEADERS, 'referer': url, } if not hasattr(driver, "requests"): headers["user-agent"] = driver.execute_script("return navigator.userAgent") else: for request in driver.requests: if request.url.startswith(url): for key, value in request.headers.items(): if key in ( "accept-encoding", "accept-language", "user-agent", "sec-ch-ua", "sec-ch-ua-platform", "sec-ch-ua-arch", "sec-ch-ua-full-version", "sec-ch-ua-platform-version", "sec-ch-ua-bitness" ): headers[key] = value break cookies = get_driver_cookies(driver) return { 'cookies': cookies, 'headers': headers, } def get_session_from_browser(url: str, webdriver: WebDriver = None, proxy: str = None, timeout: int = 120) -> Session: if not has_curl_cffi: raise MissingRequirementsError('Install "curl_cffi" package | pip install -U curl_cffi') args = get_args_from_browser(url, webdriver, proxy, timeout) return Session( **args, proxies={"https": proxy, "http": proxy}, timeout=timeout, impersonate="chrome" ) def get_cookie_params_from_dict(cookies: Cookies, url: str = None, domain: str = None) -> list[CookieParam]: [CookieParam.from_json({ "name": key, "value": value, "url": url, "domain": domain }) for key, value in cookies.items()] async def get_args_from_nodriver( url: str, proxy: str = None, timeout: int = 120, cookies: Cookies = None ) -> dict: if not has_nodriver: raise MissingRequirementsError('Install "nodriver" package | pip install -U nodriver') if debug.logging: print(f"Open nodriver with url: {url}") browser = await nodriver.start( browser_args=None if proxy is None else [f"--proxy-server={proxy}"], ) domain = urlparse(url).netloc if cookies is None: cookies = {} else: await browser.cookies.set_all(get_cookie_params_from_dict(cookies, url=url, domain=domain)) page = await browser.get(url) for c in await browser.cookies.get_all(): if c.domain.endswith(domain): cookies[c.name] = c.value user_agent = await page.evaluate("window.navigator.userAgent") await page.wait_for("body:not(.no-js)", timeout=timeout) await page.close() browser.stop() return { "cookies": cookies, "headers": { **DEFAULT_HEADERS, "user-agent": user_agent, "referer": url, }, "proxy": proxy } def merge_cookies(cookies: Iterator[Morsel], response: Response) -> Cookies: if cookies is None: cookies = {} for cookie in response.cookies.jar: cookies[cookie.name] = cookie.value