From 3f220f1811093fec9573d9f6669f754acd4ee0ac Mon Sep 17 00:00:00 2001 From: jay817 Date: Wed, 5 Feb 2025 03:26:55 +0000 Subject: [PATCH] Update chrome/README.md --- chrome/README.md | 42 ++++++++++++++++-------------------------- 1 file changed, 16 insertions(+), 26 deletions(-) diff --git a/chrome/README.md b/chrome/README.md index ba6bb2d..a8aa500 100644 --- a/chrome/README.md +++ b/chrome/README.md @@ -1,24 +1,21 @@ ```python -def Chrome(agent=None, proxy=None, cookies=None, headless=True): +def Chrome(agent=None, proxy=None, cookies=[], headless=True): import requests, subprocess, base64 try: requests.get(f'http://localhost:9222', timeout=.1) except: subprocess.Popen(['chrome', f'--remote-debugging-port={9222}', - *([f'--user-agent={agent}'] if agent else []), - *([f'--proxy-server={proxy}'] if proxy else []), - *(['--headless=new'] if headless else []), - '--disable-web-security', '--disable-translate', - '--ignore-certificate-errors', '--remote-allow-origins=*', + *([f'--user-agent={agent}'] if agent else []), *(['--headless=new'] if headless else []), + *([f'--proxy-server={proxy}'] if proxy else []), '--disable-web-security', + '--disable-translate', '--ignore-certificate-errors', '--remote-allow-origins=*', '--disable-backgrounding-occluded-windows',]) browser = requests.get(f'http://localhost:9222/json/version').json()['webSocketDebuggerUrl'] def Page(): def send(url, request): - import websocket, json; ws = websocket.create_connection(url) - try: ws.send(json.dumps(request)); return json.loads(ws.recv()) + try: import json, websocket; (ws := websocket.create_connection(url) + ).send(json.dumps(request)); return json.loads(ws.recv()) finally: ws.close() page = 'ws://localhost:9222/devtools/page/' + (targetId := send(browser, dict(id=1, method='Target.createTarget', - params=dict(url='about:blank', newWindow=True)) - )['result']['targetId']) + params=dict(url='about:blank', newWindow=True)))['result']['targetId']) def evaluate(self, javascript): try: return (res := self('Runtime.evaluate', expression=javascript, returnByValue=True)['result'])['value'] @@ -27,38 +24,31 @@ def Chrome(agent=None, proxy=None, cookies=None, headless=True): import time; start_time = time.time() while time.time() - start_time < timeout: if self.evaluate(f"!!(document.querySelector('{selector}'))"): return - time.sleep(check_interval) + self.sleep(check_interval) raise TimeoutError(f"Element '{selector}' not found within {timeout} seconds") def cookies(self, cookies=None): if cookies is None: return self('Network.getAllCookies')['cookies'] self('Network.clearBrowserCookies') for cookie in cookies: self('Network.setCookie', **cookie) def screenshot(self): - from IPython.display import HTML - display(HTML(f'')) + from IPython.display import HTML; display(HTML( + f'')) def goto(self, url): self('Page.navigate', url=url if '://' in url else f'https://{url}') - while self.evaluate('document.readyState') != 'complete': - self.sleep(0.1) + while self.evaluate('document.readyState') != 'complete': self.sleep(0.1) Soup = lambda html: __import__('bs4').BeautifulSoup(html, 'lxml') return Soup(self.source()) return type('', (), dict(__call__=lambda _, method, **params: send(page, {"id": 1, "method": method, "params": params})['result'], - goto=goto, close=lambda self: self('Target.closeTarget', targetId=targetId)['success'], - sleep=lambda _, seconds: __import__('time').sleep(seconds), - evaluate=evaluate, - wait_element=wait_element, - cookies=cookies, - screenshot=screenshot, - source=lambda self: self.evaluate('document.documentElement.outerHTML'), - ))() - (page := Page()).cookies(cookies) - return page + sleep=lambda _, seconds: __import__('time').sleep(seconds), goto=goto, + evaluate=evaluate, wait_element=wait_element, cookies=cookies, screenshot=screenshot, + source=lambda self: self.evaluate('document.documentElement.outerHTML')))() + (page := Page()).cookies(cookies); return page page = Chrome() print(page.goto('naver.com').title) # NAVER page.wait_element('img') page.screenshot() -page.evaluate('window.location.href') # 'https://www.naver.com/' +page.evaluate('2 + 2'), page.evaluate('window.location.href') # (5, 'https://www.naver.com/') ``` \ No newline at end of file