Multiconstraint search

Multiconstraint search combines vector similarity with multiple metadata conditions. All conditions must be satisfied for a document to appear in results. Use multiconstraint search when a single filter is not specific enough. Chain multiple .must() conditions to narrow results by both category and range, or any combination of metadata fields. Before running this example, make sure you have a VectorAI DB instance running at localhost:6574 and the relevant SDK installed. For setup instructions, see Docker installation.

from __future__ import annotations

import random

from actian_vectorai import (
    Distance,
    Field,
    FieldType,
    FilterBuilder,
    PointStruct,
    VectorAIClient,
    VectorParams,
)

SERVER = "localhost:6574"
COLLECTION = "semantic_demo"
DIM = 64
fmt = "\n=== {:50} ==="

# Simulated document corpus
DOCUMENTS = [
    {
        "id": 1,
        "text": "Python is a popular programming language",
        "topic": "programming",
        "year": 2024,
    },
    {
        "id": 2,
        "text": "Machine learning transforms data into insights",
        "topic": "ml",
        "year": 2024,
    },
    {
        "id": 3,
        "text": "Vector databases enable semantic search",
        "topic": "databases",
        "year": 2024,
    },
    {"id": 4, "text": "Neural networks learn hierarchical features", "topic": "ml", "year": 2023},
    {
        "id": 5,
        "text": "SQL is the language of relational databases",
        "topic": "databases",
        "year": 2020,
    },
    {"id": 6, "text": "Deep learning requires large datasets", "topic": "ml", "year": 2023},
    {"id": 7, "text": "Graph databases model relationships", "topic": "databases", "year": 2022},
    {"id": 8, "text": "Transformers revolutionized NLP", "topic": "ml", "year": 2023},
    {
        "id": 9,
        "text": "Rust is a memory-safe systems language",
        "topic": "programming",
        "year": 2024,
    },
    {"id": 10, "text": "Embeddings represent meaning as vectors", "topic": "ml", "year": 2024},
]


def fake_embed(text: str, dim: int = DIM) -> list[float]:
    """Deterministic pseudo-embedding based on text hash."""
    random.seed(hash(text) % (2**32))
    return [random.gauss(0, 1) for _ in range(dim)]


def main() -> None:
    with VectorAIClient(SERVER) as client:
        if client.collections.exists(COLLECTION):
            client.collections.delete(COLLECTION)
        client.collections.create(
            COLLECTION,
            vectors_config=VectorParams(size=DIM, distance=Distance.Cosine),
        )

        # Create field indexes for filtered search
        client.points.create_field_index(COLLECTION, "topic", FieldType.FieldTypeKeyword)
        client.points.create_field_index(COLLECTION, "year", FieldType.FieldTypeInteger)

        # Embed and insert documents
        points = [
            PointStruct(
                id=doc["id"],
                vector=fake_embed(doc["text"]),
                payload={"text": doc["text"], "topic": doc["topic"], "year": doc["year"]},
            )
            for doc in DOCUMENTS
        ]
        client.points.upsert(COLLECTION, points)
        print(f"✓ Indexed {len(DOCUMENTS)} documents")

        # ── Combined: multiconstraint ──────────────────────
        print(fmt.format("Multiconstraint: ml + year>=2024"))
        query_vec = fake_embed("how do vector databases work?")
        f = FilterBuilder().must(Field("topic").eq("ml")).must(Field("year").gte(2024)).build()
        results = client.points.search(
            COLLECTION,
            vector=query_vec,
            filter=f,
            limit=5,
            with_payload=True,
        )
        for r in results:
            print(f"  score={r.score:.4f} | {r.payload['text']}")

        # Cleanup
        client.collections.delete(COLLECTION)
        print("\n✓ Cleaned up")


if __name__ == "__main__":
    main()

import { VectorAIClient, Field } from '@actian/vectorai-client';

const SERVER = 'localhost:6574';
const COLLECTION = 'semantic_demo';
const DIM = 64;

// Simulated document corpus
const DOCUMENTS = [
  { id: 1, text: 'Python is a popular programming language', topic: 'programming', year: 2024 },
  { id: 2, text: 'Machine learning transforms data into insights', topic: 'ml', year: 2024 },
  { id: 3, text: 'Vector databases enable semantic search', topic: 'databases', year: 2024 },
  { id: 4, text: 'Neural networks learn hierarchical features', topic: 'ml', year: 2023 },
  { id: 5, text: 'SQL is the language of relational databases', topic: 'databases', year: 2020 },
  { id: 6, text: 'Deep learning requires large datasets', topic: 'ml', year: 2023 },
  { id: 7, text: 'Graph databases model relationships', topic: 'databases', year: 2022 },
  { id: 8, text: 'Transformers revolutionized NLP', topic: 'ml', year: 2023 },
  { id: 9, text: 'Rust is a memory-safe systems language', topic: 'programming', year: 2024 },
  { id: 10, text: 'Embeddings represent meaning as vectors', topic: 'ml', year: 2024 },
];

/** Deterministic pseudo-embedding based on text hash. */
function fakeEmbed(text, dim = DIM) {
  let hash = 0;
  for (let i = 0; i < text.length; i++) {
    hash = (hash * 31 + text.charCodeAt(i)) | 0;
  }
  const seed = Math.abs(hash);
  const vec = [];
  for (let i = 0; i < dim; i++) {
    const x = Math.sin(seed * (i + 1)) * 10000;
    vec.push(x - Math.floor(x));
  }
  return vec;
}

async function main() {
  const client = new VectorAIClient(SERVER);
  try {
    await client.collections.delete(COLLECTION).catch(() => {});
    await client.collections.create(COLLECTION, {
      dimension: DIM,
      distanceMetric: 'COSINE',
    });

    // Create field indexes for filtered search
    await client.points.createFieldIndex(COLLECTION, 'topic', { fieldType: 'KEYWORD' });
    await client.points.createFieldIndex(COLLECTION, 'year', { fieldType: 'INTEGER' });

    // Embed and insert documents
    const points = DOCUMENTS.map((doc) => ({
      id: doc.id,
      vector: fakeEmbed(doc.text),
      payload: { text: doc.text, topic: doc.topic, year: doc.year },
    }));
    await client.points.upsert(COLLECTION, points, { wait: true });
    console.log(`Indexed ${DOCUMENTS.length} documents`);

    // -- Combined: multiconstraint --
    console.log('\n=== Multiconstraint: ml + year>=2024 ===');
    const queryVec = fakeEmbed('how do vector databases work?');
    const filter = new Field('topic').eq('ml').and(new Field('year').gte(2024));
    const results = await client.points.search(COLLECTION, queryVec, {
      filter,
      limit: 5,
      withPayload: true,
    });
    for (const r of results) {
      console.log(`  score=${r.score.toFixed(4)} | ${r.payload.text}`);
    }

    // Cleanup
    await client.collections.delete(COLLECTION);
    console.log('\nCleaned up');
  } finally {
    client.close();
  }
}

main().catch(console.error);

This example combines two filter conditions that both must be true:

topic == "ml" — Restricts results to the machine learning topic.
year >= 2024 — Restricts results to documents from 2024 onward.

In Python, chain .must() calls on FilterBuilder. In JavaScript, chain .and() calls on Field objects. Both conditions must be true for a document to be included. VectorAI DB evaluates all conditions during search, so only qualifying documents are compared by vector similarity. Each result includes these fields:

id: The unique identifier of the matching document
score: Similarity score for documents that passed all filter conditions
payload: Metadata object containing the filtered attributes

You can combine any number of must, should, and must-not conditions in a single filter.

In Python, use .must(), .should(), and .must_not() on FilterBuilder.
In JavaScript, use .and(), .or(), and negation on Field objects.

For the full filter syntax and operator reference, see Filtering.

Collections

Points

Vectors

Payload

Search

Filtering

Semantic search

Hybrid search

Distance metrics

Indexing

Multiconstraint search