From 9b64ef0f53d8be5a2b5bd047682f824d49069c62 Mon Sep 17 00:00:00 2001 From: Jaewook Lee Date: Wed, 27 Nov 2024 00:04:29 +0900 Subject: [PATCH] revise SQL --- jimm.py | 49 +++++++++++++++++++------------------------------ 1 file changed, 19 insertions(+), 30 deletions(-) diff --git a/jimm.py b/jimm.py index 2f3b8c3..63fb5b2 100644 --- a/jimm.py +++ b/jimm.py @@ -1,30 +1,22 @@ +for i, c in enumerate('RGYBMC'): globals()[c] = lambda s, i=i: f'\x1b[{91+i}m{s}\x1b[0m' Soup = lambda html: __import__('bs4').BeautifulSoup(html, 'lxml') unsafe = __import__('contextlib').suppress(Exception) -for i, c in enumerate('RGYBMC'): globals()[c] = lambda s, i=i: f'\x1b[{91+i}m{s}\x1b[0m' def SQL(): - import sqlite3, os, hashlib + import sqlite3, hashlib, os; os.makedirs('.db-blob', exist_ok=True) (con := sqlite3.connect('.db', isolation_level=None)).row_factory = sqlite3.Row - con.execute('PRAGMA journal_mode=wal') - con.execute('PRAGMA busy_timeout='f'{1e9}') + con.execute('PRAGMA journal_mode=wal'); con.execute('PRAGMA busy_timeout='f'{1e9}') con.execute('CREATE TABLE IF NOT EXISTS kv(k, v, t DEFAULT CURRENT_TIMESTAMP)') - os.makedirs('.db-blob', exist_ok=True) - def put(sql, filename, blob): - sha1 = hashlib.sha1(blob).hexdigest() - if not sql('SELECT 1 FROM kv WHERE v=?', sha1): + def setitem(sql, filename, blob): + if not sql('SELECT 1 FROM kv WHERE v=?', sha1 := hashlib.sha1(blob).hexdigest()): with open(f'.db-blob/{sha1}', 'xb') as f: f.write(blob) print(f'{G(len(blob)):>16} {filename}') - sql[filename] = sha1 - def get(sql, filename): - return open(f'.db-blob/{sql[filename]}', 'rb').read() - - return type('', (), dict(put=put, get=get, - __call__=lambda _, q, *p: list(map(dict, con.execute(q, p))), - __setitem__=lambda sql, k, v: sql('INSERT INTO kv(k,v) VALUES(?,?)', k, v), - __getitem__ = lambda sql, k: sql( - 'SELECT v FROM kv WHERE k=? ORDER BY t DESC LIMIT 1', k)[0]['v'], - __iter__=lambda sql: (kv.values() for kv in sql( - 'SELECT k, v FROM kv GROUP BY k HAVING t = MAX(t)'))))() + sql('INSERT INTO kv(k,v) VALUES(?,?)', filename, sha1) + def getitem(sql, filename): + if sha1 := sql('SELECT v FROM kv WHERE k=? ORDER BY t DESC LIMIT 1', filename): + return open(f'.db-blob/{sha1[0]["v"]}', 'rb').read() + return type('', (), dict(__setitem__=setitem, __getitem__=getitem, + __call__=lambda _, q, *p: list(map(dict, con.execute(q, p)))))() sql = SQL() def sync(coro): @@ -42,26 +34,23 @@ def sync(coro): return wrapper @sync -async def Page(headless=True): +async def Page(headless=True, wait_until='networkidle', timeout=0): from playwright.async_api import async_playwright browser = await (await async_playwright().start()).firefox.launch(headless=headless) - (page := await browser.new_page()).set_default_timeout(0) + (page := await browser.new_page()).set_default_timeout(timeout) for attr in dir(page): if callable(method := getattr(page, attr)): setattr(page, attr, sync(method)) async def handle(route): - with unsafe: - if route.request.method == 'GET' and (response := await route.fetch()).ok: - sql.put(route.request.url, await response.body()) + if route.request.method == 'GET' and (response := await route.fetch()).ok: + sql[route.request.url] = await response.body() await route.continue_() - page.route('**/*', handle) - + page.route('**', handle) def goto(url, goto=page.goto): - goto(url, wait_until='networkidle') - sql.put(url.split('://')[1], page.content().encode()) + goto(url, wait_until=wait_until) + sql[url.split('://')[1]] = page.content().encode() from IPython.display import Image return Image(page.screenshot()) page.goto = goto return page page = Page() - -page.goto('https://naver.com/') +page.goto('https://naver.com/') \ No newline at end of file