llm: add FastAPI shim, gateway LLM endpoints, tests, and docs

This commit is contained in:
2026-04-12 09:41:21 +02:00
parent baf497b015
commit 59c9584250
15 changed files with 1779 additions and 11 deletions

View File

@@ -7,6 +7,13 @@ CLIP_URL=http://clip:8000
BLIP_URL=http://blip:8000
YOLO_URL=http://yolo:8000
QDRANT_SVC_URL=http://qdrant-svc:8000
LLM_URL=http://llm:8080
LLM_ENABLED=false
LLM_TIMEOUT=120
LLM_DEFAULT_MODEL=qwen3-1.7b-instruct-q4_k_m
LLM_MAX_TOKENS_DEFAULT=256
LLM_MAX_TOKENS_HARD_LIMIT=1024
LLM_MAX_REQUEST_BYTES=65536
# HuggingFace token for private/gated models (optional). Leave empty if unused.
# Never commit a real token to this file.
@@ -21,3 +28,10 @@ VECTOR_DIM=512
# Gateway runtime
VISION_TIMEOUT=300
MAX_IMAGE_BYTES=52428800
# Local llama.cpp LLM service (only needed when you run the llm profile locally)
MODEL_PATH=/models/Qwen3-1.7B-Instruct-Q4_K_M.gguf
LLM_CONTEXT_SIZE=4096
LLM_THREADS=4
LLM_GPU_LAYERS=0
LLM_EXTRA_ARGS=