Files
wiki/auto-csv/README.md
2024-09-22 04:15:20 -04:00

35 lines
994 B
Markdown

```python
# %%
import os; os.system('pip install -q requests bs4 lxml pandas')
import requests, bs4, json, pandas as pd
def ollama(prompt):
return requests.post(
"https://ollama.yauk.tv/api/generate",
json={
"model": "llama3.1",
"format": "json",
"prompt": prompt,
"stream": False,
"options": {
"temperature": 0,
"num_ctx": 32768
}
}
).json()['response']
# %%
url = 'https://ollama.com/library'
response = requests.get(url)
soup = bs4.BeautifulSoup(response.text, 'lxml')
tags = ''.join(str(tag.prettify()) for tag in soup.find_all('li', class_='flex')[:20])
print(tags)
# %%
result = ollama(f''' Parse the following HTML snippet and extract the information into a JSON format. Output only the JSON data, without any additional text, explanation, or formatting.
HTML to analyze: {tags} ''')
df = pd.DataFrame(list(json.loads(result).values())[0])
df
```