revise SQL

This commit is contained in:
2024-11-27 00:04:29 +09:00
parent f5a81f5231
commit 9b64ef0f53

49
jimm.py
View File

@@ -1,30 +1,22 @@
for i, c in enumerate('RGYBMC'): globals()[c] = lambda s, i=i: f'\x1b[{91+i}m{s}\x1b[0m'
Soup = lambda html: __import__('bs4').BeautifulSoup(html, 'lxml')
unsafe = __import__('contextlib').suppress(Exception)
for i, c in enumerate('RGYBMC'): globals()[c] = lambda s, i=i: f'\x1b[{91+i}m{s}\x1b[0m'
def SQL():
import sqlite3, os, hashlib
import sqlite3, hashlib, os; os.makedirs('.db-blob', exist_ok=True)
(con := sqlite3.connect('.db', isolation_level=None)).row_factory = sqlite3.Row
con.execute('PRAGMA journal_mode=wal')
con.execute('PRAGMA busy_timeout='f'{1e9}')
con.execute('PRAGMA journal_mode=wal'); con.execute('PRAGMA busy_timeout='f'{1e9}')
con.execute('CREATE TABLE IF NOT EXISTS kv(k, v, t DEFAULT CURRENT_TIMESTAMP)')
os.makedirs('.db-blob', exist_ok=True)
def put(sql, filename, blob):
sha1 = hashlib.sha1(blob).hexdigest()
if not sql('SELECT 1 FROM kv WHERE v=?', sha1):
def setitem(sql, filename, blob):
if not sql('SELECT 1 FROM kv WHERE v=?', sha1 := hashlib.sha1(blob).hexdigest()):
with open(f'.db-blob/{sha1}', 'xb') as f: f.write(blob)
print(f'{G(len(blob)):>16} {filename}')
sql[filename] = sha1
def get(sql, filename):
return open(f'.db-blob/{sql[filename]}', 'rb').read()
return type('', (), dict(put=put, get=get,
__call__=lambda _, q, *p: list(map(dict, con.execute(q, p))),
__setitem__=lambda sql, k, v: sql('INSERT INTO kv(k,v) VALUES(?,?)', k, v),
__getitem__ = lambda sql, k: sql(
'SELECT v FROM kv WHERE k=? ORDER BY t DESC LIMIT 1', k)[0]['v'],
__iter__=lambda sql: (kv.values() for kv in sql(
'SELECT k, v FROM kv GROUP BY k HAVING t = MAX(t)'))))()
sql('INSERT INTO kv(k,v) VALUES(?,?)', filename, sha1)
def getitem(sql, filename):
if sha1 := sql('SELECT v FROM kv WHERE k=? ORDER BY t DESC LIMIT 1', filename):
return open(f'.db-blob/{sha1[0]["v"]}', 'rb').read()
return type('', (), dict(__setitem__=setitem, __getitem__=getitem,
__call__=lambda _, q, *p: list(map(dict, con.execute(q, p)))))()
sql = SQL()
def sync(coro):
@@ -42,26 +34,23 @@ def sync(coro):
return wrapper
@sync
async def Page(headless=True):
async def Page(headless=True, wait_until='networkidle', timeout=0):
from playwright.async_api import async_playwright
browser = await (await async_playwright().start()).firefox.launch(headless=headless)
(page := await browser.new_page()).set_default_timeout(0)
(page := await browser.new_page()).set_default_timeout(timeout)
for attr in dir(page):
if callable(method := getattr(page, attr)): setattr(page, attr, sync(method))
async def handle(route):
with unsafe:
if route.request.method == 'GET' and (response := await route.fetch()).ok:
sql.put(route.request.url, await response.body())
if route.request.method == 'GET' and (response := await route.fetch()).ok:
sql[route.request.url] = await response.body()
await route.continue_()
page.route('**/*', handle)
page.route('**', handle)
def goto(url, goto=page.goto):
goto(url, wait_until='networkidle')
sql.put(url.split('://')[1], page.content().encode())
goto(url, wait_until=wait_until)
sql[url.split('://')[1]] = page.content().encode()
from IPython.display import Image
return Image(page.screenshot())
page.goto = goto
return page
page = Page()
page.goto('https://naver.com/')
page.goto('https://naver.com/')