fix(qdrant): complete optimization gaps from v1
- qdrant/main.py: search/file now accepts hnsw_ef, exact, indexed_only form fields (was silently ignoring them, using server defaults only) - qdrant/main.py: add GET /inspect endpoint — full diagnostic summary for all collections: HNSW, optimizer, quantization, segment count, payload index coverage, raw RAM estimate (vectors * dim * 4B * 1.5) - gateway/main.py: vectors/search/file now forwards hnsw_ef, exact, indexed_only - gateway/main.py: add GET /vectors/inspect proxy
This commit is contained in:
@@ -243,13 +243,18 @@ async def vectors_search_file(
|
|||||||
limit: int = Form(5),
|
limit: int = Form(5),
|
||||||
score_threshold: Optional[float] = Form(None),
|
score_threshold: Optional[float] = Form(None),
|
||||||
collection: Optional[str] = Form(None),
|
collection: Optional[str] = Form(None),
|
||||||
|
hnsw_ef: Optional[int] = Form(None),
|
||||||
|
exact: bool = Form(False),
|
||||||
|
indexed_only: bool = Form(False),
|
||||||
):
|
):
|
||||||
data = await file.read()
|
data = await file.read()
|
||||||
fields: Dict[str, Any] = {"limit": int(limit)}
|
fields: Dict[str, Any] = {"limit": int(limit), "exact": exact, "indexed_only": indexed_only}
|
||||||
if score_threshold is not None:
|
if score_threshold is not None:
|
||||||
fields["score_threshold"] = float(score_threshold)
|
fields["score_threshold"] = float(score_threshold)
|
||||||
if collection is not None:
|
if collection is not None:
|
||||||
fields["collection"] = collection
|
fields["collection"] = collection
|
||||||
|
if hnsw_ef is not None:
|
||||||
|
fields["hnsw_ef"] = int(hnsw_ef)
|
||||||
async with httpx.AsyncClient(timeout=VISION_TIMEOUT) as client:
|
async with httpx.AsyncClient(timeout=VISION_TIMEOUT) as client:
|
||||||
return await _post_file(client, f"{QDRANT_SVC_URL}/search/file", data, fields)
|
return await _post_file(client, f"{QDRANT_SVC_URL}/search/file", data, fields)
|
||||||
|
|
||||||
@@ -284,6 +289,13 @@ async def vectors_collection_info(name: str):
|
|||||||
return await _get_json(client, f"{QDRANT_SVC_URL}/collections/{name}")
|
return await _get_json(client, f"{QDRANT_SVC_URL}/collections/{name}")
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/vectors/inspect")
|
||||||
|
async def vectors_inspect():
|
||||||
|
"""Full diagnostic summary for all Qdrant collections (HNSW, optimizer, payload indexes, RAM estimate)."""
|
||||||
|
async with httpx.AsyncClient(timeout=VISION_TIMEOUT) as client:
|
||||||
|
return await _get_json(client, f"{QDRANT_SVC_URL}/inspect")
|
||||||
|
|
||||||
|
|
||||||
@app.delete("/vectors/collections/{name}")
|
@app.delete("/vectors/collections/{name}")
|
||||||
async def vectors_delete_collection(name: str):
|
async def vectors_delete_collection(name: str):
|
||||||
async with httpx.AsyncClient(timeout=VISION_TIMEOUT) as client:
|
async with httpx.AsyncClient(timeout=VISION_TIMEOUT) as client:
|
||||||
|
|||||||
@@ -210,6 +210,79 @@ def health():
|
|||||||
return {"status": "error", "detail": str(e)}
|
return {"status": "error", "detail": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/inspect")
|
||||||
|
def inspect():
|
||||||
|
"""Return a full diagnostic summary for every collection.
|
||||||
|
|
||||||
|
Covers: vector counts, segment counts, HNSW config, optimizer config,
|
||||||
|
quantization, payload indexes and their coverage. Designed for production
|
||||||
|
health checks and the Qdrant optimization workflow.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
all_collections = client.get_collections().collections
|
||||||
|
except Exception as exc:
|
||||||
|
return {"status": "error", "detail": str(exc)}
|
||||||
|
|
||||||
|
result = {}
|
||||||
|
for col_desc in all_collections:
|
||||||
|
name = col_desc.name
|
||||||
|
try:
|
||||||
|
info = client.get_collection(name)
|
||||||
|
cfg = info.config
|
||||||
|
hnsw = cfg.hnsw_config
|
||||||
|
opt = cfg.optimizer_config
|
||||||
|
quant = cfg.quantization_config
|
||||||
|
params = cfg.params
|
||||||
|
|
||||||
|
# Estimate raw RAM footprint: vectors * dim * 4 bytes * 1.5 safety factor
|
||||||
|
vec_count = info.vectors_count or 0
|
||||||
|
vec_dim = (
|
||||||
|
params.vectors.size
|
||||||
|
if hasattr(params.vectors, "size")
|
||||||
|
else VECTOR_DIM
|
||||||
|
)
|
||||||
|
ram_estimate_mb = round(vec_count * vec_dim * 4 * 1.5 / 1_048_576, 1)
|
||||||
|
|
||||||
|
result[name] = {
|
||||||
|
"status": info.status.value if info.status else None,
|
||||||
|
"optimizer_status": str(info.optimizer_status) if info.optimizer_status else None,
|
||||||
|
"vectors_count": vec_count,
|
||||||
|
"indexed_vectors_count": info.indexed_vectors_count,
|
||||||
|
"points_count": info.points_count,
|
||||||
|
"segments_count": info.segments_count,
|
||||||
|
"ram_estimate_mb": ram_estimate_mb,
|
||||||
|
"hnsw": {
|
||||||
|
"m": hnsw.m,
|
||||||
|
"ef_construct": hnsw.ef_construct,
|
||||||
|
"on_disk": hnsw.on_disk,
|
||||||
|
"full_scan_threshold": hnsw.full_scan_threshold,
|
||||||
|
"max_indexing_threads": hnsw.max_indexing_threads,
|
||||||
|
} if hnsw else None,
|
||||||
|
"optimizer": {
|
||||||
|
"indexing_threshold": opt.indexing_threshold,
|
||||||
|
"default_segment_number": opt.default_segment_number,
|
||||||
|
"max_segment_size": opt.max_segment_size,
|
||||||
|
"memmap_threshold": opt.memmap_threshold,
|
||||||
|
"flush_interval_sec": opt.flush_interval_sec,
|
||||||
|
} if opt else None,
|
||||||
|
"quantization": str(quant) if quant else None,
|
||||||
|
"payload_indexes": {
|
||||||
|
k: {
|
||||||
|
"type": v.data_type.value if hasattr(v.data_type, "value") else str(v.data_type),
|
||||||
|
"points": v.points,
|
||||||
|
"coverage_pct": round(v.points / max(vec_count, 1) * 100, 1),
|
||||||
|
}
|
||||||
|
for k, v in (info.payload_schema or {}).items()
|
||||||
|
},
|
||||||
|
"payload_index_count": len(info.payload_schema or {}),
|
||||||
|
"search_hnsw_ef": SEARCH_HNSW_EF,
|
||||||
|
}
|
||||||
|
except Exception as exc:
|
||||||
|
result[name] = {"error": str(exc)}
|
||||||
|
|
||||||
|
return {"collections": result, "total": len(result)}
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Collection management
|
# Collection management
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -384,11 +457,14 @@ async def search_file(
|
|||||||
limit: int = Form(5),
|
limit: int = Form(5),
|
||||||
score_threshold: Optional[float] = Form(None),
|
score_threshold: Optional[float] = Form(None),
|
||||||
collection: Optional[str] = Form(None),
|
collection: Optional[str] = Form(None),
|
||||||
|
hnsw_ef: Optional[int] = Form(None),
|
||||||
|
exact: bool = Form(False),
|
||||||
|
indexed_only: bool = Form(False),
|
||||||
):
|
):
|
||||||
"""Embed an uploaded image via CLIP, then search Qdrant for similar vectors."""
|
"""Embed an uploaded image via CLIP, then search Qdrant for similar vectors."""
|
||||||
data = await file.read()
|
data = await file.read()
|
||||||
vector = await _embed_bytes(data)
|
vector = await _embed_bytes(data)
|
||||||
return _do_search(vector, int(limit), score_threshold, collection, {})
|
return _do_search(vector, int(limit), score_threshold, collection, {}, hnsw_ef, exact, indexed_only)
|
||||||
|
|
||||||
|
|
||||||
@app.post("/search/vector")
|
@app.post("/search/vector")
|
||||||
|
|||||||
Reference in New Issue
Block a user