Inspection findings:
- _ensure_collection() created collections with bare VectorParams (no HNSW/optimizer config)
- _do_search() had no SearchParams — used Qdrant defaults (ef often ~100, no indexed_only)
- No payload index management at all — filtered searches scanned unindexed fields every time
- collection_info() returned minimal data — impossible to inspect production state
- No way to create/ensure payload indexes via the API
Changes — qdrant/main.py:
- Add SEARCH_HNSW_EF env var (default 128, above Qdrant default for better recall)
- _ensure_collection(): configure HnswConfigDiff(m=16, ef_construct=200, on_disk=False)
and OptimizersConfigDiff(indexing_threshold=20000, default_segment_number=4) on creation
- _do_search(): use SearchParams(hnsw_ef, exact, indexed_only) on every query
- SearchUrlRequest + SearchVectorRequest: expose hnsw_ef, exact, indexed_only per request
- collection_info(): expand to full HNSW/optimizer/quantization/segment/payload_schema detail
- GET /collections/{name}/indexes — list all payload indexes
- POST /collections/{name}/indexes — create a single payload index
- POST /collections/{name}/ensure-indexes — idempotent bulk index creation (skip existing)
- POST /collections/{name}/configure — apply HNSW/optimizer changes to existing collections
Changes — gateway/main.py:
- Expose the 4 new qdrant-svc endpoints under /vectors/collections/{name}/...
Changes — docker-compose.yml:
- Add SEARCH_HNSW_EF=128 to qdrant-svc environment
Critical usage note for existing collections:
After deploying, call POST /vectors/collections/images/ensure-indexes with the
payload fields actually used in filter_metadata (is_public, category_id, etc.)
to add missing indexes. This is the highest-impact single action for filtered search.
134 lines
3.6 KiB
YAML
134 lines
3.6 KiB
YAML
services:
|
|
gateway:
|
|
build:
|
|
context: .
|
|
dockerfile: gateway/Dockerfile
|
|
ports:
|
|
- "8003:8000"
|
|
env_file:
|
|
- .env
|
|
environment:
|
|
- CLIP_URL=http://clip:8000
|
|
- BLIP_URL=http://blip:8000
|
|
- YOLO_URL=http://yolo:8000
|
|
- QDRANT_SVC_URL=http://qdrant-svc:8000
|
|
- CARD_RENDERER_URL=http://card-renderer:8000
|
|
- API_KEY=${API_KEY}
|
|
- VISION_TIMEOUT=300
|
|
- MAX_IMAGE_BYTES=52428800
|
|
depends_on:
|
|
clip:
|
|
condition: service_healthy
|
|
blip:
|
|
condition: service_healthy
|
|
yolo:
|
|
condition: service_healthy
|
|
qdrant-svc:
|
|
condition: service_healthy
|
|
card-renderer:
|
|
condition: service_healthy
|
|
healthcheck:
|
|
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://127.0.0.1:8000/health', timeout=5).read()"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
start_period: 20s
|
|
|
|
card-renderer:
|
|
build:
|
|
context: .
|
|
dockerfile: card-renderer/Dockerfile
|
|
environment:
|
|
- CARD_DEFAULT_FONT=/app/assets/fonts/Inter-Regular.ttf
|
|
- CARD_BOLD_FONT=/app/assets/fonts/Inter-Bold.ttf
|
|
- CARD_LOGO_PATH=/app/assets/logo.png
|
|
- CARD_MAX_IMAGE_BYTES=52428800
|
|
- CARD_DEFAULT_OUTPUT=webp
|
|
- CARD_DEFAULT_QUALITY=90
|
|
healthcheck:
|
|
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://127.0.0.1:8000/health', timeout=5).read()"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
start_period: 20s
|
|
|
|
qdrant:
|
|
image: qdrant/qdrant:latest
|
|
ports:
|
|
- "6333:6333"
|
|
volumes:
|
|
- ./data/qdrant:/qdrant/storage
|
|
environment:
|
|
- QDRANT__SERVICE__GRPC_PORT=6334
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "bash -c ':> /dev/tcp/127.0.0.1/6333' || exit 1"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 5
|
|
start_period: 15s
|
|
|
|
qdrant-svc:
|
|
build:
|
|
context: .
|
|
dockerfile: qdrant/Dockerfile
|
|
environment:
|
|
- QDRANT_HOST=qdrant
|
|
- QDRANT_PORT=6333
|
|
- CLIP_URL=http://clip:8000
|
|
- COLLECTION_NAME=images
|
|
- VECTOR_DIM=512
|
|
- SEARCH_HNSW_EF=128
|
|
depends_on:
|
|
qdrant:
|
|
condition: service_healthy
|
|
clip:
|
|
condition: service_healthy
|
|
healthcheck:
|
|
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://127.0.0.1:8000/health', timeout=5).read()"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
start_period: 20s
|
|
|
|
clip:
|
|
build:
|
|
context: .
|
|
dockerfile: clip/Dockerfile
|
|
environment:
|
|
- MODEL_NAME=ViT-B-32
|
|
- MODEL_PRETRAINED=openai
|
|
healthcheck:
|
|
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://127.0.0.1:8000/health', timeout=5).read()"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 5
|
|
start_period: 60s
|
|
|
|
blip:
|
|
build:
|
|
context: .
|
|
dockerfile: blip/Dockerfile
|
|
environment:
|
|
- BLIP_MODEL=Salesforce/blip-image-captioning-base
|
|
#- BLIP_MODEL=Salesforce/blip-image-captioning-small
|
|
healthcheck:
|
|
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://127.0.0.1:8000/health', timeout=5).read()"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 5
|
|
start_period: 90s
|
|
|
|
yolo:
|
|
build:
|
|
context: .
|
|
dockerfile: yolo/Dockerfile
|
|
environment:
|
|
- YOLO_MODEL=yolov8n.pt
|
|
healthcheck:
|
|
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://127.0.0.1:8000/health', timeout=5).read()"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 5
|
|
start_period: 60s
|
|
|