From f6637ab9bdd903de22f98189bd12d5a945c68ad2 Mon Sep 17 00:00:00 2001 From: Jaewook Lee <11328376+jaewooklee93@users.noreply.github.com> Date: Thu, 11 Jul 2024 10:01:47 +0900 Subject: [PATCH] add llama.cpp server README.md --- llama-cpp-logits/README.md | 144 +++++++++++++++++++++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 llama-cpp-logits/README.md diff --git a/llama-cpp-logits/README.md b/llama-cpp-logits/README.md new file mode 100644 index 0000000..82ceb84 --- /dev/null +++ b/llama-cpp-logits/README.md @@ -0,0 +1,144 @@ +## `docker-compose.yml`: llama.cpp CUDA server +```yml +services: + gemma: + image: ghcr.io/ggerganov/llama.cpp:server-cuda + ports: + - "8080:8080" + volumes: + - llama-cache:/root/.cache/llama.cpp + deploy: + resources: + reservations: + devices: + - capabilities: [gpu] + command: > + --host 0.0.0.0 + --hf-repo bartowski/gemma-2-9b-it-GGUF + --hf-file gemma-2-9b-it-IQ4_XS.gguf + --gpu-layers 99 + --main-gpu 0 +volumes: + llama-cache: +``` + +## cURL test + +```bash +$ curl --request POST \ + --url http://localhost:8080/completion \ + --header "Content-Type: application/json" \ + --data '{"prompt": "Building a website can be done in 10 simple steps:","n_predict": 128}' +``` + +```json +{"content":"\n\n**1. Define Your Purpose:**\n\n* What do you want to achieve with your website? (e.g., sell products, share information, build a community)\n* Who is your target audience?\n\n**2. Choose a Domain Name:**\n\n* Select a memorable and relevant name that reflects your website's purpose.\n* Check availability and register your domain name.\n\n**3. Select a Web Hosting Provider:**\n\n* Choose a reliable hosting provider that offers the necessary resources (storage, bandwidth, etc.) for your website.\n* Consider factors like price, uptime, and customer support.\n\n**4.","id_slot":0, ... } +``` + +## Python Client + +```python +import requests +response = requests.post( + 'http://localhost:8080/completion', + json={"prompt": "Building a website can be done in 10 simple steps:", + "n_predict": 128} +).json() + +print(response['content']) +```` + +``` +**1. Define Your Purpose:** + +* What do you want to achieve with your website? (e.g., sell products, share information, build a community) +* Who is your target audience? + +**2. Choose a Domain Name:** + +* Select a memorable and relevant name that reflects your website's purpose. +* Check availability and register your domain name. + +**3. Select a Web Hosting Provider:** + +* Choose a reliable hosting provider that offers the necessary resources (storage, bandwidth, etc.) for your website. +* Consider factors like price, uptime, and customer support. + +**4. +``` + +## Single token prediction with probs + +```sh +pip install requests polars +``` + +```python +import requests +import polars as pl +pl.Config.set_tbl_rows(40) +response = requests.post( + 'http://localhost:8080/completion', + json={"prompt": prompt, + "temperature": -1, + "n_predict": 1, + "n_probs":40 + } +).json() + +# print(response['content']) + + +df = pl.DataFrame(response['completion_probabilities'][0]['probs']) +print(df) +``` + +``` +shape: (40, 2) +┌─────────┬──────────┐ +│ tok_str ┆ prob │ +│ --- ┆ --- │ +│ str ┆ f64 │ +╞═════════╪══════════╡ +│ D ┆ 0.996538 │ +│ ** ┆ 0.002495 │ +│ ** ┆ 0.000539 │ +│ C ┆ 0.000053 │ +│ B ┆ 0.000047 │ +│ A ┆ 0.000032 │ +│ D ┆ 0.000029 │ +│ **( ┆ 0.000024 │ +│ d ┆ 0.000024 │ +│ **) ┆ 0.00002 │ +│ E ┆ 0.000017 │ +│ Seoul ┆ 0.000015 │ +│ ㄷ ┆ 0.000008 │ +│ ㄹ ┆ 0.000007 │ +│ 주 ┆ 0.000006 │ +│ Д ┆ 0.000004 │ +│ **, ┆ 0.000004 │ +│ 답 ┆ 0.000004 │ +│ 디 ┆ 0.000004 │ +│ 도 ┆ 0.000004 │ +│ ㅁ ┆ 0.000003 │ +│ D ┆ 0.000003 │ +│ Answer ┆ 0.000003 │ +│ 가 ┆ 0.000003 │ +│ ) ┆ 0.000003 │ +│ ④ ┆ 0.000003 │ +│ )** ┆ 0.000003 │ +│ ד ┆ 0.000003 │ +│ **** ┆ 0.000003 │ +│ ㄱ ┆ 0.000002 │ +│ 다 ┆ 0.000002 │ +│ 을 ┆ 0.000002 │ +│ ㅇ ┆ 0.000002 │ +│ 유 ┆ 0.000002 │ +│ Korean ┆ 0.000002 │ +│ 4 ┆ 0.000002 │ +│ G ┆ 0.000002 │ +│ 이 ┆ 0.000001 │ +│ *** ┆ 0.000001 │ +│ 하 ┆ 0.000001 │ +└─────────┴──────────┘ +``` \ No newline at end of file