diff --git a/playwright/README.md b/playwright/README.md
index 00ede7c..e58b3b8 100644
--- a/playwright/README.md
+++ b/playwright/README.md
@@ -1,157 +1,101 @@
```python
-# google.py
+# %%
+from playwright.async_api import async_playwright
+from bs4 import BeautifulSoup
+from textwrap import wrap
+import re
-#%%
-import xvfbwrapper, playwright.async_api
-from PIL import Image; from io import *
-#%%
-xvfbwrapper.Xvfb().start()
-playwright = await playwright.async_api.async_playwright().start()
-browser = await playwright.chromium.launch(headless=False,
- args=['--enable-features=WebContentsForceDark'])
-page = await browser.new_page()
-#%%
-await page.goto('https://google.com')
-image = Image.open(BytesIO(await page.screenshot()))
-image.save('google.png')
-```
+async def summarize_page(page):
+ content = await page.content()
+ soup = BeautifulSoup(content, 'html.parser')
-위의 `google.py`는 top-level await를 사용하므로 아래와 같이 실행해야 한다.
-```sh
-python -m asyncio < google.py
-```
+ print(f"\n{'=' * 50}\n{soup.title.string or 'No title'}\n{'=' * 50}\n")
-```python
-# play.py
-from playwright.async_api import async_playwright as aP
-import os, io, asyncio, xvfbwrapper
-from db import DB
+ main_content = soup.body
+ if main_content:
+ texts = main_content.find_all(string=True)
-async def Page(browser='chromium', headless=True):
- if headless: xvfbwrapper.Xvfb().start()
- else: os.environ['DISPLAY'] = ':0'
+ def clean_text(text):
+ text = re.sub(r'\s+', ' ', text)
+ text = re.sub(r'\.{2,}', '.', text)
+ return text.strip()
- db = DB()
- playwright = await aP().start()
- browser = await getattr(playwright, browser).launch(headless=False)
- context = await browser.new_context(accept_downloads=True)
- context.set_default_timeout(0)
+ visible_texts = [clean_text(t) for t in texts
+ if t.parent.name not in ['style', 'script', 'head', 'title', 'meta', '[document]']]
+ visible_texts = [t for t in visible_texts if t]
- async def save(response):
- try:
- if response.ok and not db.exists(url := response.url):
- db[url] = await response.body()
- except: pass
-
- async def load(route):
- if body := db[route.request.url]: return await route.fulfill(body=body)
- await route.continue_()
+ if visible_texts:
+ print(f"{visible_texts.pop(0)}\n")
- context.on('response', save)
- await context.route('**/*', load)
- for block in ['**/*.gif', '**/css*.js']:
- await context.route(block, lambda route: route.abort())
-
- return await context.new_page()
+ summary = ' '.join(visible_texts)
+ summary = re.sub(r'\s*\.\s*', '. ', summary)
+ summary = ' '.join(summary.split()[:100])
+ print('\n'.join(wrap(summary, width=80)))
+
+ print("\n" + "-" * 50 + "\n")
+
+ seen = set()
+ for selector in ['input', 'button', 'textarea', 'select']:
+ elements = await page.query_selector_all(selector)
+ for element in elements:
+ if await element.is_visible():
+ async def get_element_info(element):
+ props = ['id', 'name', 'type', 'value', 'placeholder', 'aria-label', 'role']
+ info = {}
+ for prop in props:
+ value = await element.get_attribute(prop)
+ if value:
+ info[prop] = value
+
+ tag_name = await element.evaluate('el => el.tagName.toLowerCase()')
+ info['tag'] = tag_name
+
+ return info
+
+ element_info = await get_element_info(element)
+ tag = element_info.pop('tag', 'unknown')
+
+ attrs = ' '.join([f'{k}="{v}"' for k, v in element_info.items()])
+ element_str = f"<{tag} {attrs}>"
+
+ if element_str not in seen:
+ print(element_str)
+ seen.add(element_str)
+
+ print("\n" + "-" * 50 + "\n")
async def main():
- page = await Page()
- await page.goto('https://reddit.com')
- await page.screenshot(path='screenshot.png')
+ async with async_playwright() as p:
+ browser = await p.firefox.launch()
+ page = await browser.new_page()
+ await page.goto("https://google.com")
-if __name__ == "__main__":
- asyncio.run(main())
+ await summarize_page(page)
+
+ await browser.close()
+
+# %%
+await main()
```
-```python
-# db.py
-import sqlite3, json, os
+## Output
+```html
+==================================================
+Google
+==================================================
-class DB(sqlite3.Connection):
- def __init__(self, db_name=".db.sqlite"):
- super().__init__(os.path.expanduser(db_name))
- with self:
- self.execute('''
- CREATE TABLE IF NOT EXISTS kv_store
- (key TEXT PRIMARY KEY, value BLOB)
- ''')
+Google 정보
- def __setitem__(self, key, value):
- value = value if isinstance(value, bytes) else json.dumps(value)
- with self:
- result = self.execute('''
- INSERT OR REPLACE INTO kv_store
- (key, value) VALUES (?, ?)
- ''', (key, value)).rowcount
- return {"modified_count": result}
+스토어 Gmail 이미지 로그인 무엇에 관한 의견인지 선택하세요. 더보기 삭제 삭제 부적절한 예상 검색어 신고 Google 지원 언어:
+English 대한민국 광고 비즈니스 검색의 원리 개인정보처리방침 약관 설정 검색 설정 고급검색 Google 검색에 표시되는 데이터 검색 기록
+검색 도움말 의견 보내기 어두운 테마: 사용 안함 Google 앱
- def __getitem__(self, key):
- with self:
- result = self.execute('''
- SELECT value FROM kv_store
- WHERE key = ?
- ''', (key,)).fetchone()
- if result:
- if isinstance(value := result[0], str):
- try: return json.loads(value)
- except json.JSONDecodeError: pass
- return value
+--------------------------------------------------
- def delete(self, key):
- with self:
- result = self.execute('''
- DELETE FROM kv_store
- WHERE key = ?
- ''', (key,)).rowcount
- return {"deleted_count": result}
+
+
+