fix(qdrant): complete optimization gaps from v1

- qdrant/main.py: search/file now accepts hnsw_ef, exact, indexed_only form fields
  (was silently ignoring them, using server defaults only)
- qdrant/main.py: add GET /inspect endpoint — full diagnostic summary for all
  collections: HNSW, optimizer, quantization, segment count, payload index coverage,
  raw RAM estimate (vectors * dim * 4B * 1.5)
- gateway/main.py: vectors/search/file now forwards hnsw_ef, exact, indexed_only
- gateway/main.py: add GET /vectors/inspect proxy
This commit is contained in:
2026-03-31 20:01:52 +02:00
parent c7ea347e2b
commit 609485a0f0
2 changed files with 90 additions and 2 deletions

View File

@@ -243,13 +243,18 @@ async def vectors_search_file(
limit: int = Form(5),
score_threshold: Optional[float] = Form(None),
collection: Optional[str] = Form(None),
hnsw_ef: Optional[int] = Form(None),
exact: bool = Form(False),
indexed_only: bool = Form(False),
):
data = await file.read()
fields: Dict[str, Any] = {"limit": int(limit)}
fields: Dict[str, Any] = {"limit": int(limit), "exact": exact, "indexed_only": indexed_only}
if score_threshold is not None:
fields["score_threshold"] = float(score_threshold)
if collection is not None:
fields["collection"] = collection
if hnsw_ef is not None:
fields["hnsw_ef"] = int(hnsw_ef)
async with httpx.AsyncClient(timeout=VISION_TIMEOUT) as client:
return await _post_file(client, f"{QDRANT_SVC_URL}/search/file", data, fields)
@@ -284,6 +289,13 @@ async def vectors_collection_info(name: str):
return await _get_json(client, f"{QDRANT_SVC_URL}/collections/{name}")
@app.get("/vectors/inspect")
async def vectors_inspect():
"""Full diagnostic summary for all Qdrant collections (HNSW, optimizer, payload indexes, RAM estimate)."""
async with httpx.AsyncClient(timeout=VISION_TIMEOUT) as client:
return await _get_json(client, f"{QDRANT_SVC_URL}/inspect")
@app.delete("/vectors/collections/{name}")
async def vectors_delete_collection(name: str):
async with httpx.AsyncClient(timeout=VISION_TIMEOUT) as client: