From b1cb8155058d7f9d8c64a55c5095e526cb159319 Mon Sep 17 00:00:00 2001 From: Jaewook Lee Date: Tue, 3 Dec 2024 14:55:31 +0900 Subject: [PATCH] update dedupe --- sqlite3-python/README.md | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/sqlite3-python/README.md b/sqlite3-python/README.md index 89cdb47..f99a32a 100644 --- a/sqlite3-python/README.md +++ b/sqlite3-python/README.md @@ -8,6 +8,30 @@ sql = SQL() sql("SELECT * FROM sqlite_master WHERE type = 'table'") ``` +# Dedupe +```python +def SQL(): + import sqlite3, hashlib, os + sql = lambda q, *p, con=sqlite3.connect('.db'): list(con.execute(q, p)) + if not os.path.exists('.db-blob') and (os.mkdir('.db-blob') or True): + list(map(sql, ['PRAGMA journal_mode=WAL', + 'CREATE TABLE kv(k TEXT, v BLOB, t TIMESTAMP DEFAULT CURRENT_TIMESTAMP)', + 'CREATE INDEX idx_kv_v ON kv(v)', 'CREATE INDEX idx_kv_k_t ON kv(k, t DESC)'])) + def setitem(_, filename, blob): + if not sql('SELECT 1 FROM kv WHERE v=?', sha1 := hashlib.sha1(blob).hexdigest()): + with open(f'.db-blob/{sha1}', 'xb') as f: f.write(blob) + sql('INSERT INTO kv(k,v) VALUES(?,?)', filename, sha1) + def getitem(_, filename): + if sha1 := sql('SELECT v FROM kv WHERE k=? ORDER BY t DESC', filename): + return open(f'.db-blob/{sha1[0][0]}', 'rb').read() + return type('', (), dict(__setitem__=setitem, __getitem__=getitem))() +sql = SQL() + +print(sql['hello']) # None +sql['hello'] = b'world' +print(sql['hello']) # b'world' +``` + # Dict-like ```python def SQL():