Code/pgpt/settings-docker.yaml
2025-05-27 17:21:49 +02:00

43 lines
1.3 KiB
YAML

server:
env_name: ${APP_ENV:prod}
port: ${PORT:8080}
llm:
mode: ${PGPT_MODE:mock}
embedding:
mode: ${PGPT_EMBED_MODE:mock}
llamacpp:
llm_hf_repo_id: ${PGPT_HF_REPO_ID:lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF}
llm_hf_model_file: ${PGPT_HF_MODEL_FILE:Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf}
huggingface:
embedding_hf_model_name: ${PGPT_EMBEDDING_HF_MODEL_NAME:nomic-ai/nomic-embed-text-v1.5}
sagemaker:
llm_endpoint_name: ${PGPT_SAGEMAKER_LLM_ENDPOINT_NAME:}
embedding_endpoint_name: ${PGPT_SAGEMAKER_EMBEDDING_ENDPOINT_NAME:}
ollama:
#llm_model: ${PGPT_OLLAMA_LLM_MODEL:llama3:8b-instruct-q4_K_M}
#llm_model: llama3:8b-instruct-q4_K_M
llm_model: qwen3:14b
context_window: 5000
# llm_model: gemma3:12b
# llm_model: deepseek-r1:14b
embedding_model: ${PGPT_OLLAMA_EMBEDDING_MODEL:mxbai-embed-large}
api_base: ${PGPT_OLLAMA_API_BASE:http://ollama:11434}
embedding_api_base: ${PGPT_OLLAMA_EMBEDDING_API_BASE:http://ollama:11434}
tfs_z: ${PGPT_OLLAMA_TFS_Z:1.0}
top_k: ${PGPT_OLLAMA_TOP_K:40}
top_p: ${PGPT_OLLAMA_TOP_P:0.9}
repeat_last_n: ${PGPT_OLLAMA_REPEAT_LAST_N:64}
repeat_penalty: ${PGPT_OLLAMA_REPEAT_PENALTY:1.2}
request_timeout: ${PGPT_OLLAMA_REQUEST_TIMEOUT:6000.0}
autopull_models: ${PGPT_OLLAMA_AUTOPULL_MODELS:true}
ui:
enabled: true
path: /