# The default configuration file.
# More information about configuration can be found in the documentation: https://docs.privategpt.dev/
# Syntax in `private_pgt/settings/settings.py`
server:
    env_name: ${APP_ENV:prod}
    port: ${PORT:8001}
    cors:
        enabled: true
        allow_origins: ["*"]
        allow_methods: ["*"]
        allow_headers: ["*"]
    auth:
        enabled: false
        # python -c 'import base64; print("Basic " + base64.b64encode("secret:key".encode()).decode())'
        # 'secret' is the username and 'key' is the password for basic auth by default
        # If the auth is enabled, this value must be set in the "Authorization" header of the request.
        secret: "Basic c2VjcmV0OmtleQ=="

#data:
#    local_ingestion:
#        enabled: ${LOCAL_INGESTION_ENABLED:false}
#        allow_ingest_from: ["*"]
#    local_data_folder: local_data/Corpus/private_gpt
data:
    local_ingestion:
        enabled: true
        allow_ingest_from: ["*"]
    local_data_folder: local_data/private_gpt
ui:
    enabled: true
    path: /
    # "RAG", "Search", "Basic", or "Summarize"
    default_mode: "RAG"
    default_chat_system_prompt: >
        Vous ne devez répondre aux questions qu'à partir des données du contexte.
        Si vous connaissez la réponse, mais qu'elle n'est pas basée sur le contexte,
        faites en suggestion et non une réponse. Annoncez le clairement.
    default_query_system_prompt: >
        Vous ne devez répondre aux questions qu'à partir des données du contexte.
        Si vous connaissez la réponse, mais qu'elle n'est pas basée sur le contexte,
        faites en suggestion et non une réponse. Annoncez le clairement.
    default_summarization_system_prompt: >
        Vous ne devez répondre aux questions qu'à partir des données du contexte.
        Si vous connaissez la réponse, mais qu'elle n'est pas basée sur le contexte,
        faites en suggestion et non une réponse. Annoncez le clairement.
    delete_file_button_enabled: true
    delete_all_files_button_enabled: true

llm:
    mode: llamacpp
    prompt_style: "llama3"
    # Should be matching the selected model
    max_new_tokens: 512
    context_window: 10000
    # Select your tokenizer. Llama-index tokenizer is the default.
    # tokenizer: meta-llama/Meta-Llama-3.1-8B-Instruct
    temperature: 0.1 # The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual. (Default: 0.1)

rag:
    similarity_top_k: 15
    #This value controls how many "top" documents the RAG returns to use in the context.
    # similarity_value: 0.9
    #This value is disabled by default.  If you enable this settings, the RAG will only use articles that meet a certain percentage score.
    rerank:
        enabled: false
        model: cross-encoder/ms-marco-MiniLM-L-2-v2
        top_n: 1

summarize:
    use_async: false

clickhouse:
    host: localhost
    port: 8443
    username: admin
    password: clickhouse
    database: embeddings

llamacpp:
    llm_hf_repo_id: lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF
    llm_hf_model_file: Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf
    tfs_z: 2.0 # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting
    top_k: 10 # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)
    top_p: 0.3 # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)
    repeat_penalty: 1.1 # Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)

#embedding:
#    # Should be matching the value above in most cases
#    mode: huggingface
#    ingest_mode: simple
#    embed_dim: 768 # 768 is for nomic-ai/nomic-embed-text-v1.5

embedding:
    mode: ollama
    model: mxbai-embed-large
    ingest_mode: simple
    embed_dim: 1536

huggingface:
    embedding_hf_model_name: nomic-ai/nomic-embed-text-v1.5
    access_token: ${HF_TOKEN:}
    # Warning: Enabling this option will allow the model to download and execute code from the internet.
    # Nomic AI requires this option to be enabled to use the model, be aware if you are using a different model.
    trust_remote_code: true

nodestore:
    database: simple

milvus:
    uri: local_data/private_gpt/milvus/milvus_local.db
    collection_name: milvus_db
    overwrite: false

vectorstore:
    database: qdrant

qdrant:
    path: local_data/private_gpt/qdrant

postgres:
    host: localhost
    port: 5432
    database: postgres
    user: postgres
    password: postgres
    schema_name: private_gpt

sagemaker:
    llm_endpoint_name: huggingface-pytorch-tgi-inference-2023-09-25-19-53-32-140
    embedding_endpoint_name: huggingface-pytorch-inference-2023-11-03-07-41-36-479

openai:
    api_key: ${OPENAI_API_KEY:}
    model: gpt-3.5-turbo
    embedding_api_key: ${OPENAI_API_KEY:}

ollama:
    llm_model: qwen3:14b
    embedding_model: mxbai-embed-large
    api_base: http://localhost:11434
    embedding_api_base: http://localhost:11434 # change if your embedding model runs on another ollama
    keep_alive: 5m
    request_timeout: 1200.0
    autopull_models: true

azopenai:
    api_key: ${AZ_OPENAI_API_KEY:}
    azure_endpoint: ${AZ_OPENAI_ENDPOINT:}
    embedding_deployment_name: ${AZ_OPENAI_EMBEDDING_DEPLOYMENT_NAME:}
    llm_deployment_name: ${AZ_OPENAI_LLM_DEPLOYMENT_NAME:}
    api_version: "2023-05-15"
    embedding_model: text-embedding-ada-002
    llm_model: gpt-35-turbo

gemini:
    api_key: ${GOOGLE_API_KEY:}
    model: models/gemini-pro
    embedding_model: models/embedding-001