diff --git a/elasticsearch-streamlit/README.md b/elasticsearch-streamlit/README.md index a64dfab..9ad1792 100644 --- a/elasticsearch-streamlit/README.md +++ b/elasticsearch-streamlit/README.md @@ -3,210 +3,59 @@ 2. Obsidian Vault를 elasticsearch에 업로드 3. Streamlit app으로 search-as-you-type 구현 -# 1. elasticsearch server 가동 +## 1. elasticsearch server 가동 -Ref: [Getting started with the Elastic Stack and Docker Compose](https://www.elastic.co/blog/getting-started-with-the-elastic-stack-and-docker-compose) - -## 1-1. `docker-compose.yml` +### `docker-compose.yml` ```yaml -version: "3.8" - +services: + elasticsearch: + image: docker.elastic.co/elasticsearch/elasticsearch:8.14.1 + environment: + - discovery.type=single-node + - xpack.security.enabled=false + ports: + - "9200:9200" + volumes: + - es-data:/usr/share/elasticsearch/data volumes: - certs: - driver: local - esdata01: - driver: local - kibanadata: - driver: local - metricbeatdata01: - driver: local - filebeatdata01: - driver: local - logstashdata01: - driver: local - - -networks: - default: - name: elastic - external: false - - -services: - setup: - image: docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION} - volumes: - - certs:/usr/share/elasticsearch/config/certs - user: "0" - command: > - bash -c ' - if [ x${ELASTIC_PASSWORD} == x ]; then - echo "Set the ELASTIC_PASSWORD environment variable in the .env file"; - exit 1; - elif [ x${KIBANA_PASSWORD} == x ]; then - echo "Set the KIBANA_PASSWORD environment variable in the .env file"; - exit 1; - fi; - if [ ! -f config/certs/ca.zip ]; then - echo "Creating CA"; - bin/elasticsearch-certutil ca --silent --pem -out config/certs/ca.zip; - unzip config/certs/ca.zip -d config/certs; - fi; - if [ ! -f config/certs/certs.zip ]; then - echo "Creating certs"; - echo -ne \ - "instances:\n"\ - " - name: es01\n"\ - " dns:\n"\ - " - es01\n"\ - " - localhost\n"\ - " ip:\n"\ - " - 127.0.0.1\n"\ - " - name: kibana\n"\ - " dns:\n"\ - " - kibana\n"\ - " - localhost\n"\ - " ip:\n"\ - " - 127.0.0.1\n"\ - > config/certs/instances.yml; - bin/elasticsearch-certutil cert --silent --pem -out config/certs/certs.zip --in config/certs/instances.yml --ca-cert config/certs/ca/ca.crt --ca-key config/certs/ca/ca.key; - unzip config/certs/certs.zip -d config/certs; - fi; - echo "Setting file permissions" - chown -R root:root config/certs; - find . -type d -exec chmod 750 \{\} \;; - find . -type f -exec chmod 640 \{\} \;; - echo "Waiting for Elasticsearch availability"; - until curl -s --cacert config/certs/ca/ca.crt https://es01:9200 | grep -q "missing authentication credentials"; do sleep 30; done; - echo "Setting kibana_system password"; - until curl -s -X POST --cacert config/certs/ca/ca.crt -u "elastic:${ELASTIC_PASSWORD}" -H "Content-Type: application/json" https://es01:9200/_security/user/kibana_system/_password -d "{\"password\":\"${KIBANA_PASSWORD}\"}" | grep -q "^{}"; do sleep 10; done; - echo "All done!"; - ' - healthcheck: - test: ["CMD-SHELL", "[ -f config/certs/es01/es01.crt ]"] - interval: 1s - timeout: 5s - retries: 120 - - es01: - depends_on: - setup: - condition: service_healthy - image: docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION} - labels: - co.elastic.logs/module: elasticsearch - volumes: - - certs:/usr/share/elasticsearch/config/certs - - esdata01:/usr/share/elasticsearch/data - ports: - - ${ES_PORT}:9200 - environment: - - node.name=es01 - - cluster.name=${CLUSTER_NAME} - - discovery.type=single-node - - ELASTIC_PASSWORD=${ELASTIC_PASSWORD} - - bootstrap.memory_lock=true - - xpack.security.enabled=true - - xpack.security.http.ssl.enabled=true - - xpack.security.http.ssl.key=certs/es01/es01.key - - xpack.security.http.ssl.certificate=certs/es01/es01.crt - - xpack.security.http.ssl.certificate_authorities=certs/ca/ca.crt - - xpack.security.transport.ssl.enabled=true - - xpack.security.transport.ssl.key=certs/es01/es01.key - - xpack.security.transport.ssl.certificate=certs/es01/es01.crt - - xpack.security.transport.ssl.certificate_authorities=certs/ca/ca.crt - - xpack.security.transport.ssl.verification_mode=certificate - - xpack.license.self_generated.type=${LICENSE} - mem_limit: ${ES_MEM_LIMIT} - ulimits: - memlock: - soft: -1 - hard: -1 - healthcheck: - test: - [ - "CMD-SHELL", - "curl -s --cacert config/certs/ca/ca.crt https://localhost:9200 | grep -q 'missing authentication credentials'", - ] - interval: 10s - timeout: 10s - retries: 120 + es-data: ``` -## 1-2. `.env` - -stack을 생성할때 다음의 .env도 첨부해주어야 한다. - -``` -# Project namespace (defaults to the current folder name if not set) -#COMPOSE_PROJECT_NAME=myproject - - -# Password for the 'elastic' user (at least 6 characters) -ELASTIC_PASSWORD=changeme - - -# Password for the 'kibana_system' user (at least 6 characters) -KIBANA_PASSWORD=changeme - - -# Version of Elastic products -STACK_VERSION=8.7.1 - - -# Set the cluster name -CLUSTER_NAME=docker-cluster - - -# Set to 'basic' or 'trial' to automatically start the 30-day trial -LICENSE=basic -#LICENSE=trial - - -# Port to expose Elasticsearch HTTP API to the host -ES_PORT=9200 - - -# Port to expose Kibana to the host -KIBANA_PORT=5601 - - -# Increase or decrease based on the available host memory (in bytes) -ES_MEM_LIMIT=1073741824 -KB_MEM_LIMIT=1073741824 -LS_MEM_LIMIT=1073741824 - - -# SAMPLE Predefined Key only to be used in POC environments -ENCRYPTION_KEY=c34d38b3a14956121ff2170e5030b471551370178f43e5626eec58b04a30fae2 -``` - -## 1-3. self-signed SSL 인증서 추출 -- 첫 실행때 elasticsearch가 SSL 인증서를 생성하는데, 이를 host의 `/tmp/ca.crt` 로 추출한다. -```sh -docker cp elasticstack_docker-es01-1:/usr/share/elasticsearch/config/certs/ca/ca.crt /tmp/. -``` -- `elasticstack_docker` 부분은 자신의 stack 이름으로 변경한다. - - portainer에서는 stack 이름 - - docker compose CLI에서는 docker-compose.yml이 있는 폴더 이름 - -## 1-4. PING test +### PING test ```sh -curl -k --cacert /tmp/ca.crt -u elastic:changeme https://localhost:9200 +$ curl http://localhost:9200 + +{ + "name" : "bb0c7b36b2ec", + "cluster_name" : "docker-cluster", + "cluster_uuid" : "f-t9FuV-T026RGC0nAgQXg", + "version" : { + "number" : "8.14.1", + "build_flavor" : "default", + "build_type" : "docker", + "build_hash" : "93a57a1a76f556d8aee6a90d1a95b06187501310", + "build_date" : "2024-06-10T23:35:17.114581191Z", + "build_snapshot" : false, + "lucene_version" : "9.10.0", + "minimum_wire_compatibility_version" : "7.17.0", + "minimum_index_compatibility_version" : "7.0.0" + }, + "tagline" : "You Know, for Search" +} ``` -![alt text](image4.png) -# 2. Obsidian Vault를 elasticsearch에 업로드 +## 2. Obsidian Vault를 elasticsearch에 업로드 ```python from glob import glob -from elasticsearch import Elasticsearch, ConnectionError, ConnectionTimeout, helpers +from elasticsearch import Elasticsearch, helpers +import os # 자신의 obsidian vault 주소로 변경 -md_files = glob('/mnt/c/Users/j/Documents/obsidian-sync/**/*.md') +md_files = glob('/mnt/c/Users/j/Documents/obsidian-sync/**/*.md', recursive=True) md_contents = [] for file_path in md_files: with open(file_path, 'r', encoding='utf-8') as file: @@ -219,12 +68,7 @@ md_contents # Elasticsearch 클라이언트 생성 -es = Elasticsearch( - "https://localhost:9200", - ca_certs="/tmp/ca.crt", - basic_auth=("elastic", "changeme"), - verify_certs=True -) +es = Elasticsearch('http://localhost:9200') # 인덱스 생성 index_name = 'md_files' @@ -250,7 +94,7 @@ actions = [ helpers.bulk(es, actions) ``` -# 3. Streamlit app +## 3. Streamlit app ```python # pip install streamlit streamlit-keyup elasticsearch import streamlit as st @@ -276,38 +120,29 @@ st.html("""""") # Elasticsearch 클라이언트 생성 -es = Elasticsearch( - "https://localhost:9200", - ca_certs="/tmp/ca.crt", - basic_auth=("elastic", "changeme"), - verify_certs=True -) +es = Elasticsearch('http://localhost:9200') # Elasticsearch 클라이언트 설정 index_name = 'md_files' st.title("🔍 *Jimm* 전용 검색 엔진") -query = st_keyup("Search-as-you-type", key="0") -@st.experimental_fragment -def result(): - if query: - response = es.search( - index=index_name, - body={ - 'query': { - 'match': { - 'content': query - } + +if query := st_keyup("Search-as-you-type", key="0"): + response = es.search( + index=index_name, + body={ + 'query': { + 'match': { + 'content': query } } - ) - - hits = response['hits']['hits'] - for hit in hits: - st.write(f"**File Name:** {hit['_source']['file_name']}") - st.code(f"**Content:** {hit['_source']['content'][:200]}...") # 첫 200자만 표시 - st.write('---') -# with st.container(height=1200): -result() + } + ) + + hits = response['hits']['hits'] + for hit in hits: + st.write(f"**File Name:** {hit['_source']['file_name']}") + st.code(f"**Content:** {hit['_source']['content'][:200]}...") # 첫 200자만 표시 + st.write('---') ``` \ No newline at end of file