From 0516da2b33d8a739d3e997bddf29a8784f79eaae Mon Sep 17 00:00:00 2001 From: Jaewook Lee <11328376+jaewooklee93@users.noreply.github.com> Date: Tue, 16 Jul 2024 21:45:26 +0900 Subject: [PATCH] add translator example --- gemma-2-translator/README.md | 103 +++++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100644 gemma-2-translator/README.md diff --git a/gemma-2-translator/README.md b/gemma-2-translator/README.md new file mode 100644 index 0000000..a84fdfd --- /dev/null +++ b/gemma-2-translator/README.md @@ -0,0 +1,103 @@ +## `app.py` +```python +import json, re, requests + +example = '''# LLaMA.cpp HTTP Server + +Fast, lightweight, pure C/C++ HTTP server based on [httplib](https://github.com/yhirose/cpp-httplib), [nlohmann::json](https://github.com/nlohmann/json) and **llama.cpp**. + +Set of LLM REST APIs and a simple web front end to interact with llama.cpp. + +**Features:** + * LLM inference of F16 and quantum models on GPU and CPU + * [OpenAI API](https://github.com/openai/openai-openapi) compatible chat completions and embeddings routes + * Parallel decoding with multi-user support + * Continuous batching + * Multimodal (wip) + * Monitoring endpoints + * Schema-constrained JSON response format + +The project is under active development, and we are [looking for feedback and contributors](https://github.com/ggerganov/llama.cpp/issues/4216).''' + +prompt = { + 'filename': '/ggerganov/llama.cpp/master/examples/server/README.md', + 'en-US': {'src': example}, + 'ko-KR': '%PLACEHOLDER%' +} +prompt = json.dumps(prompt) +prompt = re.sub(' "%PLACEHOLDER%(.*)', '', prompt) +print('\n\tPrompt:\n') +print(prompt) +print('\n---\n') + +response = requests.post( + f'http://localhost:8080/completion', + stream=True, + json=dict( + prompt=prompt, + stream=True, + json_schema=dict( + type='object', + properties=dict( + src={'type': 'string'} + ) + ) + ) +) + +print('\tResponse:\n') +content = '' +for line in response.iter_lines(): + if line := re.match('data: (.*)', line.decode()): + line = json.loads(line.group(1)) + print(line['content'], end='', flush=True) + + content += line['content'] + +result = json.loads(content)['src'] +print('\n---\n') + +print('\tMarkdown:\n') +try: + # IPython 환경인지 확인 + ipython = get_ipython() + from IPython.display import Markdown + display(Markdown(result)) +except: + print(result) +``` + +```sh +$ python app.py + + Prompt: + +{"filename": "/ggerganov/llama.cpp/master/examples/server/README.md", "en-US": {"src": "# LLaMA.cpp HTTP Server\n\nFast, lightweight, pure C/C++ HTTP server based on [httplib](https://github.com/yhirose/cpp-httplib), [nlohmann::json](https://github.com/nlohmann/json) and **llama.cpp**.\n\nSet of LLM REST APIs and a simple web front end to interact with llama.cpp.\n\n**Features:**\n * LLM inference of F16 and quantum models on GPU and CPU\n * [OpenAI API](https://github.com/openai/openai-openapi) compatible chat completions and embeddings routes\n * Parallel decoding with multi-user support\n * Continuous batching\n * Multimodal (wip)\n * Monitoring endpoints\n * Schema-constrained JSON response format\n\nThe project is under active development, and we are [looking for feedback and contributors](https://github.com/ggerganov/llama.cpp/issues/4216)."}, "ko-KR": + +--- + + Response: + +{"src": "# LLaMA.cpp HTTP 서버\n\n[httplib](https://github.com/yhirose/cpp-httplib), [nlohmann::json](https://github.com/nlohmann/json) 및 **llama.cpp** 기반의 빠르고 가벼운 순수 C/C++ HTTP 서버입니다.\n\nLLM REST API 세트 및 llama.cpp와 상호 작용하기 위한 간단한 웹 프런트 엔드입니다.\n\n**특징:**\n * GPU 및 CPU에서 F16 및 양자 모델의 LLM 유추\n * [OpenAI API](https://github.com/openai/openai-openapi) 호환되는 채팅 완성 및 잠재형 엔드포인트\n * 다중 사용자 지원을 가진 병렬 디코딩\n * 지속적인 배치 처리\n * 다중 모달 (준비 중)\n * 모니터링 엔드포인트\n * 스키마 제약 JSON 응답 형식\n\n이 프로젝트는 적극적으로 개발 중이며, [피드백 및 기여자를 찾고 있습니다](https://github.com/ggerganov/llama.cpp/issues/4216)."} + +--- + + + Markdown: +``` +# LLaMA.cpp HTTP 서버 + +[httplib](https://github.com/yhirose/cpp-httplib), [nlohmann::json](https://github.com/nlohmann/json) 및 **llama.cpp** 기반의 빠르고 가벼운 순수 C/C++ HTTP 서버입니다. + +LLM REST API 세트 및 llama.cpp와 상호 작용하기 위한 간단한 웹 프런트 엔드입니다. + +**특징:** + * GPU 및 CPU에서 F16 및 양자 모델의 LLM 유추 + * [OpenAI API](https://github.com/openai/openai-openapi) 호환되는 채팅 완성 및 잠재형 엔드포인트 + * 다중 사용자 지원을 가진 병렬 디코딩 + * 지속적인 배치 처리 + * 다중 모달 (준비 중) + * 모니터링 엔드포인트 + * 스키마 제약 JSON 응답 형식 + +이 프로젝트는 적극적으로 개발 중이며, [피드백 및 기여자를 찾고 있습니다](https://github.com/ggerganov/llama.cpp/issues/4216). \ No newline at end of file