def SQL(): import sqlite3, os, hashlib (con := sqlite3.connect('.db', isolation_level=None)).row_factory = sqlite3.Row con.execute('PRAGMA journal_mode=wal') con.execute('PRAGMA busy_timeout='f'{1e9}') con.execute('CREATE TABLE IF NOT EXISTS kv(k, v, t DEFAULT CURRENT_TIMESTAMP)') os.makedirs('.db-blob', exist_ok=True) def put(sql, filename, blob): sha1 = hashlib.sha1(blob).hexdigest() if not sql('SELECT 1 FROM kv WHERE v=?', sha1): try: with open(f'.db-blob/{sha1}', 'xb') as f: f.write(blob) print(f'{G(len(blob)):>16} {filename}') except FileExistsError: pass sql[filename] = sha1 def get(sql, filename): return open(f'.db-blob/{sql[filename]}', 'rb').read() return type('', (), dict(put=put, get=get, __call__=lambda _, q, *p: list(map(dict, con.execute(q, p))), __setitem__=lambda sql, k, v: sql('INSERT INTO kv(k,v) VALUES(?,?)', k, v), __getitem__ = lambda sql, k: sql( 'SELECT v FROM kv WHERE k=? ORDER BY t DESC LIMIT 1', k)[0]['v'], __iter__=lambda sql: (kv.values() for kv in sql( 'SELECT k, v FROM kv GROUP BY k HAVING t = MAX(t)'))))() sql = SQL() for i, c in enumerate('RGYBMC'): globals()[c] = lambda s, i=i: f'\x1b[{91+i}m{s}\x1b[0m' unsafe = __import__('contextlib').suppress(Exception) Soup = lambda html: __import__('bs4').BeautifulSoup(html, 'lxml') def sync(coro): import asyncio, functools if not asyncio.iscoroutinefunction(coro): return coro @functools.wraps(coro) def wrapper(*args, **kwargs): loop, future = asyncio.get_event_loop(), asyncio.ensure_future(coro(*args, **kwargs)) while not future.done(): loop._process_events(loop._selector.select(0)) if (ready := loop._ready) and (handle := ready.popleft())._cancelled is False: task = (tasks := asyncio.tasks._current_tasks).pop(loop, None) handle._run(); tasks[loop] = task return future.result() return wrapper @sync async def Page(headless=True): from playwright.async_api import async_playwright browser = await (await async_playwright().start()).firefox.launch(headless=headless) (page := await browser.new_page()).set_default_timeout(0) for attr in dir(page): if callable(method := getattr(page, attr)): setattr(page, attr, sync(method)) async def handle(route): with unsafe: if route.request.method == 'GET' and (response := await route.fetch()).ok: sql.put(route.request.url, await response.body()) await route.continue_() page.route('**/*', handle) def goto(url, goto=page.goto): goto(url, wait_until='networkidle') sql.put(url.split('://')[1], page.content().encode()) from IPython.display import Image return Image(page.screenshot()) page.goto = goto return page page = Page() page.goto('https://naver.com/')