fix: quantization in /configure, HNSW defaults in POST /collections, filter_metadata in search/file
This commit is contained in:
@@ -246,6 +246,7 @@ async def vectors_search_file(
|
|||||||
hnsw_ef: Optional[int] = Form(None),
|
hnsw_ef: Optional[int] = Form(None),
|
||||||
exact: bool = Form(False),
|
exact: bool = Form(False),
|
||||||
indexed_only: bool = Form(False),
|
indexed_only: bool = Form(False),
|
||||||
|
filter_metadata_json: Optional[str] = Form(None),
|
||||||
):
|
):
|
||||||
data = await file.read()
|
data = await file.read()
|
||||||
fields: Dict[str, Any] = {"limit": int(limit), "exact": exact, "indexed_only": indexed_only}
|
fields: Dict[str, Any] = {"limit": int(limit), "exact": exact, "indexed_only": indexed_only}
|
||||||
@@ -255,6 +256,8 @@ async def vectors_search_file(
|
|||||||
fields["collection"] = collection
|
fields["collection"] = collection
|
||||||
if hnsw_ef is not None:
|
if hnsw_ef is not None:
|
||||||
fields["hnsw_ef"] = int(hnsw_ef)
|
fields["hnsw_ef"] = int(hnsw_ef)
|
||||||
|
if filter_metadata_json is not None:
|
||||||
|
fields["filter_metadata_json"] = filter_metadata_json
|
||||||
async with httpx.AsyncClient(timeout=VISION_TIMEOUT) as client:
|
async with httpx.AsyncClient(timeout=VISION_TIMEOUT) as client:
|
||||||
return await _post_file(client, f"{QDRANT_SVC_URL}/search/file", data, fields)
|
return await _post_file(client, f"{QDRANT_SVC_URL}/search/file", data, fields)
|
||||||
|
|
||||||
|
|||||||
@@ -20,6 +20,8 @@ from qdrant_client.models import (
|
|||||||
OptimizersConfigDiff,
|
OptimizersConfigDiff,
|
||||||
SearchParams,
|
SearchParams,
|
||||||
PayloadSchemaType,
|
PayloadSchemaType,
|
||||||
|
ScalarQuantizationConfig,
|
||||||
|
ScalarType,
|
||||||
)
|
)
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -298,9 +300,13 @@ def create_collection(req: CollectionRequest):
|
|||||||
if req.name in collections:
|
if req.name in collections:
|
||||||
raise HTTPException(409, f"Collection '{req.name}' already exists")
|
raise HTTPException(409, f"Collection '{req.name}' already exists")
|
||||||
|
|
||||||
|
# Apply the same production defaults as _ensure_collection so all
|
||||||
|
# collections start with tuned HNSW and optimizer settings.
|
||||||
client.create_collection(
|
client.create_collection(
|
||||||
collection_name=req.name,
|
collection_name=req.name,
|
||||||
vectors_config=VectorParams(size=req.vector_dim, distance=dist),
|
vectors_config=VectorParams(size=req.vector_dim, distance=dist),
|
||||||
|
hnsw_config=HnswConfigDiff(m=16, ef_construct=200, on_disk=False),
|
||||||
|
optimizers_config=OptimizersConfigDiff(indexing_threshold=20000, default_segment_number=4),
|
||||||
)
|
)
|
||||||
return {"created": req.name, "vector_dim": req.vector_dim, "distance": req.distance}
|
return {"created": req.name, "vector_dim": req.vector_dim, "distance": req.distance}
|
||||||
|
|
||||||
@@ -460,11 +466,19 @@ async def search_file(
|
|||||||
hnsw_ef: Optional[int] = Form(None),
|
hnsw_ef: Optional[int] = Form(None),
|
||||||
exact: bool = Form(False),
|
exact: bool = Form(False),
|
||||||
indexed_only: bool = Form(False),
|
indexed_only: bool = Form(False),
|
||||||
|
filter_metadata_json: Optional[str] = Form(None),
|
||||||
):
|
):
|
||||||
"""Embed an uploaded image via CLIP, then search Qdrant for similar vectors."""
|
"""Embed an uploaded image via CLIP, then search Qdrant for similar vectors."""
|
||||||
|
import json
|
||||||
|
filter_metadata: Dict[str, Any] = {}
|
||||||
|
if filter_metadata_json:
|
||||||
|
try:
|
||||||
|
filter_metadata = json.loads(filter_metadata_json)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
raise HTTPException(400, "filter_metadata_json must be valid JSON")
|
||||||
data = await file.read()
|
data = await file.read()
|
||||||
vector = await _embed_bytes(data)
|
vector = await _embed_bytes(data)
|
||||||
return _do_search(vector, int(limit), score_threshold, collection, {}, hnsw_ef, exact, indexed_only)
|
return _do_search(vector, int(limit), score_threshold, collection, filter_metadata, hnsw_ef, exact, indexed_only)
|
||||||
|
|
||||||
|
|
||||||
@app.post("/search/vector")
|
@app.post("/search/vector")
|
||||||
@@ -685,6 +699,12 @@ class CollectionConfigRequest(BaseModel):
|
|||||||
hnsw_on_disk: Optional[bool] = Field(default=None, description="Store HNSW graph on disk (saves RAM, slightly slower queries).")
|
hnsw_on_disk: Optional[bool] = Field(default=None, description="Store HNSW graph on disk (saves RAM, slightly slower queries).")
|
||||||
indexing_threshold: Optional[int] = Field(default=None, ge=0, description="Min payload changes before a segment is indexed.")
|
indexing_threshold: Optional[int] = Field(default=None, ge=0, description="Min payload changes before a segment is indexed.")
|
||||||
default_segment_number: Optional[int] = Field(default=None, ge=1, le=32, description="Target number of segments for parallelism.")
|
default_segment_number: Optional[int] = Field(default=None, ge=1, le=32, description="Target number of segments for parallelism.")
|
||||||
|
# Scalar quantization — reduces RAM ~4x, often speeds up search on large collections.
|
||||||
|
# Set quantization_type='int8' to enable. Use always_ram=True to keep quantized
|
||||||
|
# vectors in RAM (recommended on VPS with limited memory but fast disk).
|
||||||
|
quantization_type: Optional[str] = Field(default=None, description="Enable scalar quantization: 'int8'. Set to null to keep current setting.")
|
||||||
|
quantization_quantile: float = Field(default=0.99, ge=0.5, le=1.0, description="Fraction of vectors used to calibrate quantization range (0.99 recommended).")
|
||||||
|
quantization_always_ram: bool = Field(default=True, description="Keep quantized vectors in RAM even when raw vectors are on disk.")
|
||||||
|
|
||||||
|
|
||||||
@app.post("/collections/{name}/configure")
|
@app.post("/collections/{name}/configure")
|
||||||
@@ -705,7 +725,18 @@ def configure_collection(name: str, req: CollectionConfigRequest):
|
|||||||
"default_segment_number": req.default_segment_number,
|
"default_segment_number": req.default_segment_number,
|
||||||
}.items() if v is not None}
|
}.items() if v is not None}
|
||||||
|
|
||||||
if not hnsw_kwargs and not opt_kwargs:
|
# Build optional scalar quantization config
|
||||||
|
quant_config = None
|
||||||
|
if req.quantization_type is not None:
|
||||||
|
if req.quantization_type.lower() != "int8":
|
||||||
|
raise HTTPException(400, f"Unsupported quantization_type '{req.quantization_type}'. Only 'int8' is supported.")
|
||||||
|
quant_config = ScalarQuantizationConfig(
|
||||||
|
type=ScalarType.INT8,
|
||||||
|
quantile=req.quantization_quantile,
|
||||||
|
always_ram=req.quantization_always_ram,
|
||||||
|
)
|
||||||
|
|
||||||
|
if not hnsw_kwargs and not opt_kwargs and quant_config is None:
|
||||||
raise HTTPException(400, "No configuration fields provided")
|
raise HTTPException(400, "No configuration fields provided")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -713,12 +744,14 @@ def configure_collection(name: str, req: CollectionConfigRequest):
|
|||||||
collection_name=name,
|
collection_name=name,
|
||||||
hnsw_config=HnswConfigDiff(**hnsw_kwargs) if hnsw_kwargs else None,
|
hnsw_config=HnswConfigDiff(**hnsw_kwargs) if hnsw_kwargs else None,
|
||||||
optimizers_config=OptimizersConfigDiff(**opt_kwargs) if opt_kwargs else None,
|
optimizers_config=OptimizersConfigDiff(**opt_kwargs) if opt_kwargs else None,
|
||||||
|
quantization_config=quant_config,
|
||||||
)
|
)
|
||||||
return {
|
return {
|
||||||
"collection": name,
|
"collection": name,
|
||||||
"status": "updated",
|
"status": "updated",
|
||||||
"hnsw_changes": hnsw_kwargs,
|
"hnsw_changes": hnsw_kwargs,
|
||||||
"optimizer_changes": opt_kwargs,
|
"optimizer_changes": opt_kwargs,
|
||||||
|
"quantization": {"type": req.quantization_type, "quantile": req.quantization_quantile, "always_ram": req.quantization_always_ram} if quant_config else None,
|
||||||
}
|
}
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
raise HTTPException(500, str(exc))
|
raise HTTPException(500, str(exc))
|
||||||
|
|||||||
Reference in New Issue
Block a user