llm: add FastAPI shim, gateway LLM endpoints, tests, and docs
This commit is contained in:
79
tests/test_llm_service.py
Normal file
79
tests/test_llm_service.py
Normal file
@@ -0,0 +1,79 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib
|
||||
import os
|
||||
import unittest
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import patch
|
||||
|
||||
import httpx
|
||||
|
||||
|
||||
BASE_ENV = {
|
||||
"MODEL_PATH": "D:/Sites/vision/models/qwen3/Qwen3-1.7B-Instruct-Q4_K_M.gguf",
|
||||
"LLM_MODEL_NAME": "qwen3-1.7b-instruct-q4_k_m",
|
||||
"LLM_CONTEXT_SIZE": "4096",
|
||||
"LLM_THREADS": "4",
|
||||
"LLM_GPU_LAYERS": "0",
|
||||
"LLM_PORT": "8080",
|
||||
"LLAMA_SERVER_PORT": "8081",
|
||||
}
|
||||
|
||||
|
||||
def load_llm_module():
|
||||
with patch.dict(os.environ, BASE_ENV, clear=False):
|
||||
import llm.main as llm_main
|
||||
|
||||
return importlib.reload(llm_main)
|
||||
|
||||
|
||||
class StubHTTPClient:
|
||||
def __init__(self, response: httpx.Response):
|
||||
self.response = response
|
||||
|
||||
async def get(self, *_args, **_kwargs):
|
||||
return self.response
|
||||
|
||||
|
||||
class LLMServiceTests(unittest.IsolatedAsyncioTestCase):
|
||||
async def test_health_returns_repo_owned_contract(self):
|
||||
module = load_llm_module()
|
||||
module._llama_process = SimpleNamespace(poll=lambda: None)
|
||||
module._http_client = StubHTTPClient(
|
||||
httpx.Response(200, json={"object": "list", "data": []}, request=httpx.Request("GET", "http://127.0.0.1:8081/v1/models"))
|
||||
)
|
||||
|
||||
transport = httpx.ASGITransport(app=module.app)
|
||||
async with httpx.AsyncClient(transport=transport, base_url="http://testserver") as client:
|
||||
response = await client.get("/health")
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertEqual(
|
||||
response.json(),
|
||||
{
|
||||
"status": "ok",
|
||||
"model": "Qwen3-1.7B-Instruct-Q4_K_M.gguf",
|
||||
"model_alias": "qwen3-1.7b-instruct-q4_k_m",
|
||||
"context_size": 4096,
|
||||
"threads": 4,
|
||||
"gpu_layers": 0,
|
||||
},
|
||||
)
|
||||
|
||||
async def test_health_reports_unavailable_when_process_is_down(self):
|
||||
module = load_llm_module()
|
||||
module._llama_process = SimpleNamespace(poll=lambda: 1)
|
||||
module._http_client = StubHTTPClient(
|
||||
httpx.Response(200, json={"object": "list", "data": []}, request=httpx.Request("GET", "http://127.0.0.1:8081/v1/models"))
|
||||
)
|
||||
|
||||
transport = httpx.ASGITransport(app=module.app)
|
||||
async with httpx.AsyncClient(transport=transport, base_url="http://testserver") as client:
|
||||
response = await client.get("/health")
|
||||
|
||||
self.assertEqual(response.status_code, 503)
|
||||
self.assertEqual(response.json()["status"], "unavailable")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user