llm: add FastAPI shim, gateway LLM endpoints, tests, and docs
This commit is contained in:
@@ -14,6 +14,13 @@ services:
|
||||
- QDRANT_SVC_URL=http://qdrant-svc:8000
|
||||
- CARD_RENDERER_URL=http://card-renderer:8000
|
||||
- MATURITY_URL=http://maturity:8000
|
||||
- LLM_URL=${LLM_URL:-http://llm:8080}
|
||||
- LLM_ENABLED=${LLM_ENABLED:-false}
|
||||
- LLM_TIMEOUT=${LLM_TIMEOUT:-120}
|
||||
- LLM_DEFAULT_MODEL=${LLM_DEFAULT_MODEL:-qwen3-1.7b-instruct-q4_k_m}
|
||||
- LLM_MAX_TOKENS_DEFAULT=${LLM_MAX_TOKENS_DEFAULT:-256}
|
||||
- LLM_MAX_TOKENS_HARD_LIMIT=${LLM_MAX_TOKENS_HARD_LIMIT:-1024}
|
||||
- LLM_MAX_REQUEST_BYTES=${LLM_MAX_REQUEST_BYTES:-65536}
|
||||
- MATURITY_ENABLED=true
|
||||
- API_KEY=${API_KEY}
|
||||
- VISION_TIMEOUT=300
|
||||
@@ -151,3 +158,26 @@ services:
|
||||
retries: 5
|
||||
start_period: 90s
|
||||
|
||||
llm:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: llm/Dockerfile
|
||||
environment:
|
||||
- MODEL_PATH=${MODEL_PATH:-/models/Qwen3-1.7B-Instruct-Q4_K_M.gguf}
|
||||
- LLM_MODEL_NAME=${LLM_DEFAULT_MODEL:-qwen3-1.7b-instruct-q4_k_m}
|
||||
- LLM_CONTEXT_SIZE=${LLM_CONTEXT_SIZE:-4096}
|
||||
- LLM_THREADS=${LLM_THREADS:-4}
|
||||
- LLM_GPU_LAYERS=${LLM_GPU_LAYERS:-0}
|
||||
- LLM_PORT=8080
|
||||
- LLM_EXTRA_ARGS=${LLM_EXTRA_ARGS:-}
|
||||
volumes:
|
||||
- ./models/qwen3:/models:ro
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-fsS", "http://127.0.0.1:8080/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 5
|
||||
start_period: 120s
|
||||
profiles:
|
||||
- llm
|
||||
|
||||
|
||||
Reference in New Issue
Block a user