llm: add FastAPI shim, gateway LLM endpoints, tests, and docs

This commit is contained in:
2026-04-12 09:41:21 +02:00
parent baf497b015
commit 59c9584250
15 changed files with 1779 additions and 11 deletions

79
tests/test_llm_service.py Normal file
View File

@@ -0,0 +1,79 @@
from __future__ import annotations
import importlib
import os
import unittest
from types import SimpleNamespace
from unittest.mock import patch
import httpx
BASE_ENV = {
"MODEL_PATH": "D:/Sites/vision/models/qwen3/Qwen3-1.7B-Instruct-Q4_K_M.gguf",
"LLM_MODEL_NAME": "qwen3-1.7b-instruct-q4_k_m",
"LLM_CONTEXT_SIZE": "4096",
"LLM_THREADS": "4",
"LLM_GPU_LAYERS": "0",
"LLM_PORT": "8080",
"LLAMA_SERVER_PORT": "8081",
}
def load_llm_module():
with patch.dict(os.environ, BASE_ENV, clear=False):
import llm.main as llm_main
return importlib.reload(llm_main)
class StubHTTPClient:
def __init__(self, response: httpx.Response):
self.response = response
async def get(self, *_args, **_kwargs):
return self.response
class LLMServiceTests(unittest.IsolatedAsyncioTestCase):
async def test_health_returns_repo_owned_contract(self):
module = load_llm_module()
module._llama_process = SimpleNamespace(poll=lambda: None)
module._http_client = StubHTTPClient(
httpx.Response(200, json={"object": "list", "data": []}, request=httpx.Request("GET", "http://127.0.0.1:8081/v1/models"))
)
transport = httpx.ASGITransport(app=module.app)
async with httpx.AsyncClient(transport=transport, base_url="http://testserver") as client:
response = await client.get("/health")
self.assertEqual(response.status_code, 200)
self.assertEqual(
response.json(),
{
"status": "ok",
"model": "Qwen3-1.7B-Instruct-Q4_K_M.gguf",
"model_alias": "qwen3-1.7b-instruct-q4_k_m",
"context_size": 4096,
"threads": 4,
"gpu_layers": 0,
},
)
async def test_health_reports_unavailable_when_process_is_down(self):
module = load_llm_module()
module._llama_process = SimpleNamespace(poll=lambda: 1)
module._http_client = StubHTTPClient(
httpx.Response(200, json={"object": "list", "data": []}, request=httpx.Request("GET", "http://127.0.0.1:8081/v1/models"))
)
transport = httpx.ASGITransport(app=module.app)
async with httpx.AsyncClient(transport=transport, base_url="http://testserver") as client:
response = await client.get("/health")
self.assertEqual(response.status_code, 503)
self.assertEqual(response.json()["status"], "unavailable")
if __name__ == "__main__":
unittest.main()