Files
wiki/auto-csv
2024-09-22 04:15:20 -04:00
..
2024-09-22 04:15:20 -04:00

# %%
import os; os.system('pip install -q requests bs4 lxml pandas')
import requests, bs4, json, pandas as pd

def ollama(prompt):
    return requests.post(
        "https://ollama.yauk.tv/api/generate",
        json={
            "model": "llama3.1",
            "format": "json",
            "prompt": prompt,
            "stream": False,
            "options": {
                "temperature": 0,
                "num_ctx": 32768
            }
        }
    ).json()['response']

# %%
url = 'https://ollama.com/library'
response = requests.get(url)
soup = bs4.BeautifulSoup(response.text, 'lxml')
tags = ''.join(str(tag.prettify()) for tag in soup.find_all('li', class_='flex')[:20])
print(tags)

# %%
result = ollama(f''' Parse the following HTML snippet and extract the information into a JSON format. Output only the JSON data, without any additional text, explanation, or formatting.

HTML to analyze: {tags} ''')

df = pd.DataFrame(list(json.loads(result).values())[0])
df