llm: add FastAPI shim, gateway LLM endpoints, tests, and docs
This commit is contained in:
25
llm/entrypoint.sh
Normal file
25
llm/entrypoint.sh
Normal file
@@ -0,0 +1,25 @@
|
||||
#!/usr/bin/env bash
|
||||
set -eu
|
||||
|
||||
MODEL_PATH="${MODEL_PATH:-/models/Qwen3-1.7B-Instruct-Q4_K_M.gguf}"
|
||||
LLM_MODEL_NAME="${LLM_MODEL_NAME:-qwen3-1.7b-instruct-q4_k_m}"
|
||||
LLM_CONTEXT_SIZE="${LLM_CONTEXT_SIZE:-4096}"
|
||||
LLM_THREADS="${LLM_THREADS:-4}"
|
||||
LLM_GPU_LAYERS="${LLM_GPU_LAYERS:-0}"
|
||||
LLM_PORT="${LLM_PORT:-8080}"
|
||||
LLAMA_SERVER_PORT="${LLAMA_SERVER_PORT:-8081}"
|
||||
|
||||
if [ ! -f "$MODEL_PATH" ]; then
|
||||
echo "llm startup failed: model file not found at $MODEL_PATH" >&2
|
||||
echo "Mount a GGUF model into ./models/qwen3 and set MODEL_PATH if the filename differs." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -r "$MODEL_PATH" ]; then
|
||||
echo "llm startup failed: model file is not readable at $MODEL_PATH" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Starting llm shim model=$LLM_MODEL_NAME model_path=$MODEL_PATH public_port=$LLM_PORT upstream_port=$LLAMA_SERVER_PORT ctx=$LLM_CONTEXT_SIZE threads=$LLM_THREADS gpu_layers=$LLM_GPU_LAYERS"
|
||||
|
||||
exec python -m uvicorn main:app --host 0.0.0.0 --port "$LLM_PORT"
|
||||
Reference in New Issue
Block a user