Add snurad/README.md
This commit is contained in:
60
snurad/README.md
Normal file
60
snurad/README.md
Normal file
@@ -0,0 +1,60 @@
|
||||
```python
|
||||
def SQL(db='db.sqlite'):
|
||||
sql = __import__('sqlite3').connect(db, isolation_level=None).execute
|
||||
sql('PRAGMA journal_mode=WAL')
|
||||
sql('PRAGMA wal_checkpoint(FULL)')
|
||||
sql('PRAGMA busy_timeout='f'{1e9}')
|
||||
sql('CREATE TABLE IF NOT EXISTS cache(url PRIMARY KEY, blob)')
|
||||
return lambda q, *p: list(sql(q, p))
|
||||
sql = SQL('snurad.sqlite')
|
||||
|
||||
unsafe = __import__('contextlib').suppress(Exception)
|
||||
|
||||
globals().update({color: lambda text, ansi=91+i: f'\x1b[{ansi}m{text}\x1b[0m'
|
||||
for i, color in enumerate('red green yellow blue magenta cyan'.split())})
|
||||
|
||||
def sync(func):
|
||||
__import__('nest_asyncio').apply()
|
||||
import functools, asyncio
|
||||
return functools.wraps(func)(lambda *args, **kwargs:
|
||||
asyncio.run(func(*args, **kwargs)))
|
||||
@sync
|
||||
async def Context():
|
||||
from playwright.async_api import async_playwright
|
||||
playwright = await async_playwright().start()
|
||||
browser = await playwright.firefox.launch()
|
||||
context = await browser.new_context()
|
||||
context.set_default_timeout(0)
|
||||
@sync
|
||||
async def new_page(new_page=context.new_page):
|
||||
page = await new_page()
|
||||
for attr in dir(page):
|
||||
if attr[0] != '_' and callable(method := getattr(page, attr)):
|
||||
setattr(page, attr, sync(method))
|
||||
def goto(url, goto=page.goto):
|
||||
goto(url)
|
||||
with unsafe: display(__import__('IPython').display.Image(page.screenshot()))
|
||||
return __import__('bs4').BeautifulSoup(page.content(), 'lxml')
|
||||
page.goto = goto
|
||||
return page
|
||||
context.new_page = new_page
|
||||
async def route(route):
|
||||
response = await route.fetch()
|
||||
if response.ok:
|
||||
url, blob = route.request.url, await response.body()
|
||||
sql('INSERT OR REPLACE INTO cache VALUES(?,?)', url, blob)
|
||||
print(red(url))
|
||||
await route.fulfill(response=response)
|
||||
await context.route('**/*', route)
|
||||
return context
|
||||
context = Context()
|
||||
page = context.new_page()
|
||||
|
||||
url = 'http://self-learning.snurad.snu.ac.kr/main/sub_page.php?p_id=2&c_id=45'
|
||||
soup = page.goto(url)
|
||||
|
||||
for a in soup.find_all(href=True):
|
||||
if (href := a['href'])[0] == '/':
|
||||
page.goto('http://self-learning.snurad.snu.ac.kr' + href)
|
||||
print(green(href))
|
||||
```
|
||||
Reference in New Issue
Block a user