Update chrome/README.md

This commit is contained in:
2025-02-05 03:26:55 +00:00
parent df64161e57
commit 3f220f1811

View File

@@ -1,24 +1,21 @@
```python ```python
def Chrome(agent=None, proxy=None, cookies=None, headless=True): def Chrome(agent=None, proxy=None, cookies=[], headless=True):
import requests, subprocess, base64 import requests, subprocess, base64
try: requests.get(f'http://localhost:9222', timeout=.1) try: requests.get(f'http://localhost:9222', timeout=.1)
except: subprocess.Popen(['chrome', f'--remote-debugging-port={9222}', except: subprocess.Popen(['chrome', f'--remote-debugging-port={9222}',
*([f'--user-agent={agent}'] if agent else []), *([f'--user-agent={agent}'] if agent else []), *(['--headless=new'] if headless else []),
*([f'--proxy-server={proxy}'] if proxy else []), *([f'--proxy-server={proxy}'] if proxy else []), '--disable-web-security',
*(['--headless=new'] if headless else []), '--disable-translate', '--ignore-certificate-errors', '--remote-allow-origins=*',
'--disable-web-security', '--disable-translate',
'--ignore-certificate-errors', '--remote-allow-origins=*',
'--disable-backgrounding-occluded-windows',]) '--disable-backgrounding-occluded-windows',])
browser = requests.get(f'http://localhost:9222/json/version').json()['webSocketDebuggerUrl'] browser = requests.get(f'http://localhost:9222/json/version').json()['webSocketDebuggerUrl']
def Page(): def Page():
def send(url, request): def send(url, request):
import websocket, json; ws = websocket.create_connection(url) try: import json, websocket; (ws := websocket.create_connection(url)
try: ws.send(json.dumps(request)); return json.loads(ws.recv()) ).send(json.dumps(request)); return json.loads(ws.recv())
finally: ws.close() finally: ws.close()
page = 'ws://localhost:9222/devtools/page/' + (targetId := send(browser, page = 'ws://localhost:9222/devtools/page/' + (targetId := send(browser,
dict(id=1, method='Target.createTarget', dict(id=1, method='Target.createTarget',
params=dict(url='about:blank', newWindow=True)) params=dict(url='about:blank', newWindow=True)))['result']['targetId'])
)['result']['targetId'])
def evaluate(self, javascript): def evaluate(self, javascript):
try: return (res := self('Runtime.evaluate', try: return (res := self('Runtime.evaluate',
expression=javascript, returnByValue=True)['result'])['value'] expression=javascript, returnByValue=True)['result'])['value']
@@ -27,38 +24,31 @@ def Chrome(agent=None, proxy=None, cookies=None, headless=True):
import time; start_time = time.time() import time; start_time = time.time()
while time.time() - start_time < timeout: while time.time() - start_time < timeout:
if self.evaluate(f"!!(document.querySelector('{selector}'))"): return if self.evaluate(f"!!(document.querySelector('{selector}'))"): return
time.sleep(check_interval) self.sleep(check_interval)
raise TimeoutError(f"Element '{selector}' not found within {timeout} seconds") raise TimeoutError(f"Element '{selector}' not found within {timeout} seconds")
def cookies(self, cookies=None): def cookies(self, cookies=None):
if cookies is None: return self('Network.getAllCookies')['cookies'] if cookies is None: return self('Network.getAllCookies')['cookies']
self('Network.clearBrowserCookies') self('Network.clearBrowserCookies')
for cookie in cookies: self('Network.setCookie', **cookie) for cookie in cookies: self('Network.setCookie', **cookie)
def screenshot(self): def screenshot(self):
from IPython.display import HTML from IPython.display import HTML; display(HTML(
display(HTML(f'<img src="data:image/png;base64,{self('Page.captureScreenshot')['data']}">')) f'<img src="data:image/png;base64,{self('Page.captureScreenshot')['data']}">'))
def goto(self, url): def goto(self, url):
self('Page.navigate', url=url if '://' in url else f'https://{url}') self('Page.navigate', url=url if '://' in url else f'https://{url}')
while self.evaluate('document.readyState') != 'complete': while self.evaluate('document.readyState') != 'complete': self.sleep(0.1)
self.sleep(0.1)
Soup = lambda html: __import__('bs4').BeautifulSoup(html, 'lxml') Soup = lambda html: __import__('bs4').BeautifulSoup(html, 'lxml')
return Soup(self.source()) return Soup(self.source())
return type('', (), dict(__call__=lambda _, method, **params: return type('', (), dict(__call__=lambda _, method, **params:
send(page, {"id": 1, "method": method, "params": params})['result'], send(page, {"id": 1, "method": method, "params": params})['result'],
goto=goto,
close=lambda self: self('Target.closeTarget', targetId=targetId)['success'], close=lambda self: self('Target.closeTarget', targetId=targetId)['success'],
sleep=lambda _, seconds: __import__('time').sleep(seconds), sleep=lambda _, seconds: __import__('time').sleep(seconds), goto=goto,
evaluate=evaluate, evaluate=evaluate, wait_element=wait_element, cookies=cookies, screenshot=screenshot,
wait_element=wait_element, source=lambda self: self.evaluate('document.documentElement.outerHTML')))()
cookies=cookies, (page := Page()).cookies(cookies); return page
screenshot=screenshot,
source=lambda self: self.evaluate('document.documentElement.outerHTML'),
))()
(page := Page()).cookies(cookies)
return page
page = Chrome() page = Chrome()
print(page.goto('naver.com').title) # <title>NAVER</title> print(page.goto('naver.com').title) # <title>NAVER</title>
page.wait_element('img') page.wait_element('img')
page.screenshot() page.screenshot()
page.evaluate('window.location.href') # 'https://www.naver.com/' page.evaluate('2 + 2'), page.evaluate('window.location.href') # (5, 'https://www.naver.com/')
``` ```