diff --git a/auto-csv/README.md b/auto-csv/README.md new file mode 100644 index 0000000..2eaf7af --- /dev/null +++ b/auto-csv/README.md @@ -0,0 +1,35 @@ +```python +# %% +import os; os.system('pip install -q requests bs4 lxml pandas') +import requests, bs4, json, pandas as pd + +def ollama(prompt): + return requests.post( + "https://ollama.yauk.tv/api/generate", + json={ + "model": "llama3.1", + "format": "json", + "prompt": prompt, + "stream": False, + "options": { + "temperature": 0, + "num_ctx": 32768 + } + } + ).json()['response'] + +# %% +url = 'https://ollama.com/library' +response = requests.get(url) +soup = bs4.BeautifulSoup(response.text, 'lxml') +tags = ''.join(str(tag.prettify()) for tag in soup.find_all('li', class_='flex')[:20]) +print(tags) + +# %% +result = ollama(f''' Parse the following HTML snippet and extract the information into a JSON format. Output only the JSON data, without any additional text, explanation, or formatting. + +HTML to analyze: {tags} ''') + +df = pd.DataFrame(list(json.loads(result).values())[0]) +df +``` \ No newline at end of file