探索数据

在掌握了搜索中的概念后,您可以开始以其他方式探索您的数据。Qdrant 提供了一系列 API,允许您以不同的方式找到相似的向量,以及找到最不相似的向量。这些是推荐系统、数据探索和数据清理的有用工具。

推荐API

除了常规搜索外,Qdrant 还允许您基于多个正面和负面示例进行搜索。该 API 称为 recommend,示例可以是点 ID,以便您可以利用已经编码的对象;并且,从 v1.6 开始,您还可以使用原始向量作为输入,这样您可以在不将其作为点上传的情况下动态创建向量。

REST API - API 模式定义可在此处 这里 获取

POST /collections/{collection_name}/points/query
{
  "query": {
    "recommend": {
      "positive": [100, 231],
      "negative": [718, [0.2, 0.3, 0.4, 0.5]],
      "strategy": "average_vector"
    }
  },
  "filter": {
    "must": [
      {
        "key": "city",
        "match": {
          "value": "London"
        }
      }
    ]
  }
}
from qdrant_client import QdrantClient, models

client = QdrantClient(url="http://localhost:6333")

client.query_points(
    collection_name="{collection_name}",
    query=models.RecommendQuery(
        recommend=models.RecommendInput(
            positive=[100, 231],
            negative=[718, [0.2, 0.3, 0.4, 0.5]],
            strategy=models.RecommendStrategy.AVERAGE_VECTOR,
        )
    ),
    query_filter=models.Filter(
        must=[
            models.FieldCondition(
                key="city",
                match=models.MatchValue(
                    value="London",
                ),
            )
        ]
    ),
    limit=3,
)
import { QdrantClient } from "@qdrant/js-client-rest";

const client = new QdrantClient({ host: "localhost", port: 6333 });

client.query("{collection_name}", {
    query: {
        recommend: {
            positive: [100, 231],
            negative: [718, [0.2, 0.3, 0.4, 0.5]],
            strategy: "average_vector"
        }
    },
    filter: {
        must: [
            {
                key: "city",
                match: {
                    value: "London",
                },
            },
        ],
    },
    limit: 3
});
use qdrant_client::qdrant::{
    Condition, Filter, QueryPointsBuilder, RecommendInputBuilder, RecommendStrategy,
};
use qdrant_client::Qdrant;

let client = Qdrant::from_url("http://localhost:6334").build()?;
    
client
    .query(
        QueryPointsBuilder::new("{collection_name}")
            .query(
                RecommendInputBuilder::default()
                    .add_positive(100)
                    .add_positive(231)
                    .add_positive(vec![0.2, 0.3, 0.4, 0.5])
                    .add_negative(718)
                    .strategy(RecommendStrategy::AverageVector)
                    .build(),
            )
            .limit(3)
            .filter(Filter::must([Condition::matches(
                "city",
                "London".to_string(),
            )])),
    )
    .await?;
import java.util.List;

import io.qdrant.client.QdrantClient;
import io.qdrant.client.QdrantGrpcClient;
import io.qdrant.client.grpc.Points.QueryPoints;
import io.qdrant.client.grpc.Points.RecommendInput;
import io.qdrant.client.grpc.Points.RecommendStrategy;
import io.qdrant.client.grpc.Points.Filter;

import static io.qdrant.client.ConditionFactory.matchKeyword;
import static io.qdrant.client.VectorInputFactory.vectorInput;
import static io.qdrant.client.QueryFactory.recommend;

QdrantClient client =
    new QdrantClient(QdrantGrpcClient.newBuilder("localhost", 6334, false).build());

client.queryAsync(QueryPoints.newBuilder()
        .setCollectionName("{collection_name}")
        .setQuery(recommend(RecommendInput.newBuilder()
                .addAllPositive(List.of(vectorInput(100), vectorInput(200), vectorInput(100.0f, 231.0f)))
                .addAllNegative(List.of(vectorInput(718), vectorInput(0.2f, 0.3f, 0.4f, 0.5f)))
                .setStrategy(RecommendStrategy.AverageVector)
                .build()))
        .setFilter(Filter.newBuilder().addMust(matchKeyword("city", "London")))
        .setLimit(3)
        .build()).get();
using Qdrant.Client;
using Qdrant.Client.Grpc;
using static Qdrant.Client.Grpc.Conditions;

var client = new QdrantClient("localhost", 6334);

await client.QueryAsync(
    collectionName: "{collection_name}",
    query: new RecommendInput {
        Positive = { 100, 231 },
        Negative = { 718 }
    },
    filter: MatchKeyword("city", "London"),
    limit: 3
);
import (
    "context"

    "github.com/qdrant/go-client/qdrant"
)

client, err := qdrant.NewClient(&qdrant.Config{
    Host: "localhost",
    Port: 6334,
})

client.Query(context.Background(), &qdrant.QueryPoints{
    CollectionName: "{collection_name}",
    Query: qdrant.NewQueryRecommend(&qdrant.RecommendInput{
        Positive: []*qdrant.VectorInput{
            qdrant.NewVectorInputID(qdrant.NewIDNum(100)),
            qdrant.NewVectorInputID(qdrant.NewIDNum(231)),
        },
        Negative: []*qdrant.VectorInput{
            qdrant.NewVectorInputID(qdrant.NewIDNum(718)),
        },
    }),
    Filter: &qdrant.Filter{
        Must: []*qdrant.Condition{
            qdrant.NewMatch("city", "London"),
        },
    },
})

此API的示例结果将是

{
  "result": [
    { "id": 10, "score": 0.81 },
    { "id": 14, "score": 0.75 },
    { "id": 11, "score": 0.73 }
  ],
  "status": "ok",
  "time": 0.001
}

用于获取推荐的算法是从可用的strategy选项中选择的。每种算法都有其自身的优缺点,因此请进行实验并选择最适合您情况的算法。

平均向量策略

默认且第一个添加到Qdrant的策略称为average_vector。它预处理输入示例以创建一个用于搜索的单一向量。由于预处理步骤非常快,该策略的性能与常规搜索相当。这种推荐背后的直觉是,每个向量分量代表数据的一个独立特征,因此,通过对示例进行平均,我们应该能得到一个很好的推荐。

生成搜索向量的方法是首先分别对所有正例和负例进行平均,然后使用以下公式将它们组合成一个单一向量:

avg_positive + avg_positive - avg_negative

在没有负例的情况下,搜索向量将简单地等于avg_positive

这是将隐式设置的默认策略,但您可以通过在推荐请求中设置"strategy": "average_vector"来显式定义它。

最佳得分策略

自 v1.6.0 起可用

在v1.6中引入的新策略称为best_score。它基于这样一种理念:找到相似向量的最佳方法是找到那些更接近正例的向量,同时避免那些更接近负例的向量。 它的工作方式是每个候选向量都会与每个示例进行对比,然后我们选择最佳的正例得分和最佳的负例得分。最终得分通过以下步骤公式选择:

let score = if best_positive_score > best_negative_score {
    best_positive_score
} else {
    -(best_negative_score * best_negative_score)
};

由于我们在搜索的每一步都计算与每个示例的相似性,因此此策略的性能将受到示例数量的线性影响。这意味着您提供的示例越多,搜索速度就越慢。然而,这种策略可能非常强大,并且应该更加与嵌入无关。

要使用此算法,您需要在推荐请求中设置 "strategy": "best_score"

仅使用负例

best_score 策略的一个有益副作用是,你可以仅使用负样本来使用它。这将允许你找到与你提供的向量最不相似的向量。这对于在数据中查找异常值,或者找到与给定向量最不相似的向量非常有用。

将仅包含负面示例与过滤结合使用,可以成为数据探索和清理的强大工具。

多个向量

自 v0.10.0 起可用

如果集合是使用多个向量创建的,则应在推荐请求中指定向量的名称:

POST /collections/{collection_name}/points/query
{
  "query": {
    "recommend": {
      "positive": [100, 231],
      "negative": [718]
    }
  },
  "using": "image",
  "limit": 10
}
client.query_points(
    collection_name="{collection_name}",
    query=models.RecommendQuery(
        recommend=models.RecommendInput(
            positive=[100, 231],
            negative=[718],
        )
    ),
    using="image",
    limit=10,
)
client.query("{collection_name}", {
    query: {
        recommend: {
            positive: [100, 231],
            negative: [718],
        }
    },
    using: "image",
    limit: 10
});
use qdrant_client::qdrant::{QueryPointsBuilder, RecommendInputBuilder};

client
    .query(
        QueryPointsBuilder::new("{collection_name}")
            .query(
                RecommendInputBuilder::default()
                    .add_positive(100)
                    .add_positive(231)
                    .add_negative(718)
                    .build(),
            )
            .limit(10)
            .using("image"),
    )
    .await?;
import java.util.List;

import io.qdrant.client.grpc.Points.QueryPoints;
import io.qdrant.client.grpc.Points.RecommendInput;

import static io.qdrant.client.VectorInputFactory.vectorInput;
import static io.qdrant.client.QueryFactory.recommend;

client.queryAsync(QueryPoints.newBuilder()
        .setCollectionName("{collection_name}")
        .setQuery(recommend(RecommendInput.newBuilder()
                .addAllPositive(List.of(vectorInput(100), vectorInput(231)))
                .addAllNegative(List.of(vectorInput(718)))
                .build()))
        .setUsing("image")
        .setLimit(10)
        .build()).get();
using Qdrant.Client;
using Qdrant.Client.Grpc;

var client = new QdrantClient("localhost", 6334);

await client.QueryAsync(
    collectionName: "{collection_name}",
    query: new RecommendInput {
        Positive = { 100, 231 },
        Negative = { 718 }
    },
    usingVector: "image",
    limit: 10
);
import (
    "context"

    "github.com/qdrant/go-client/qdrant"
)

client, err := qdrant.NewClient(&qdrant.Config{
    Host: "localhost",
    Port: 6334,
})

client.Query(context.Background(), &qdrant.QueryPoints{
    CollectionName: "{collection_name}",
    Query: qdrant.NewQueryRecommend(&qdrant.RecommendInput{
        Positive: []*qdrant.VectorInput{
            qdrant.NewVectorInputID(qdrant.NewIDNum(100)),
            qdrant.NewVectorInputID(qdrant.NewIDNum(231)),
        },
        Negative: []*qdrant.VectorInput{
            qdrant.NewVectorInputID(qdrant.NewIDNum(718)),
        },
    }),
    Using: qdrant.PtrOf("image"),
})

参数 using 指定用于推荐的存储向量。

从另一个集合中查找向量

自 v0.11.6 版本起可用

如果您有具有相同维度的向量集合,并且您希望基于另一个集合的向量在一个集合中查找推荐,您可以使用lookup_from参数。

这可能很有用,例如在项目到用户的推荐场景中。 尽管用户和项目的嵌入具有相同的向量参数(距离类型和维度),但它们通常存储在不同的集合中。

POST /collections/{collection_name}/points/query
{
  "query": {
    "recommend": {
      "positive": [100, 231],
      "negative": [718]
    }
  },
  "limit": 10,
  "lookup_from": {
    "collection": "{external_collection_name}",
    "vector": "{external_vector_name}"
  }
}
client.query_points(
    collection_name="{collection_name}",
    query=models.RecommendQuery(
        recommend=models.RecommendInput(
            positive=[100, 231],
            negative=[718],
        )
    ),
    using="image",
    limit=10,
    lookup_from=models.LookupLocation(
        collection="{external_collection_name}", vector="{external_vector_name}"
    ),
)
client.query("{collection_name}", {
    query: {
        recommend: {
            positive: [100, 231],
            negative: [718],
        }
    },
    using: "image",
    limit: 10,
    lookup_from: {
        collection: "{external_collection_name}",
        vector: "{external_vector_name}"
    }
});
use qdrant_client::qdrant::{LookupLocationBuilder, QueryPointsBuilder, RecommendInputBuilder};

client
    .query(
        QueryPointsBuilder::new("{collection_name}")
            .query(
                RecommendInputBuilder::default()
                    .add_positive(100)
                    .add_positive(231)
                    .add_negative(718)
                    .build(),
            )
            .limit(10)
            .using("image")
            .lookup_from(
                LookupLocationBuilder::new("{external_collection_name}")
                    .vector_name("{external_vector_name}"),
            ),
    )
    .await?;
import java.util.List;

import io.qdrant.client.grpc.Points.LookupLocation;
import io.qdrant.client.grpc.Points.QueryPoints;
import io.qdrant.client.grpc.Points.RecommendInput;

import static io.qdrant.client.VectorInputFactory.vectorInput;
import static io.qdrant.client.QueryFactory.recommend;

client.queryAsync(QueryPoints.newBuilder()
        .setCollectionName("{collection_name}")
        .setQuery(recommend(RecommendInput.newBuilder()
                .addAllPositive(List.of(vectorInput(100), vectorInput(231)))
                .addAllNegative(List.of(vectorInput(718)))
                .build()))
        .setUsing("image")
        .setLimit(10)
        .setLookupFrom(
                LookupLocation.newBuilder()
                        .setCollectionName("{external_collection_name}")
                        .setVectorName("{external_vector_name}")
                        .build())
        .build()).get();
using Qdrant.Client;
using Qdrant.Client.Grpc;

var client = new QdrantClient("localhost", 6334);

await client.QueryAsync(
    collectionName: "{collection_name}",
    query: new RecommendInput {
        Positive = { 100, 231 },
        Negative = { 718 }
    },
    usingVector: "image",
    limit: 10,
    lookupFrom: new LookupLocation
    {
        CollectionName = "{external_collection_name}",
        VectorName = "{external_vector_name}",
    }
);
import (
    "context"

    "github.com/qdrant/go-client/qdrant"
)

client, err := qdrant.NewClient(&qdrant.Config{
    Host: "localhost",
    Port: 6334,
})

client.Query(context.Background(), &qdrant.QueryPoints{
    CollectionName: "{collection_name}",
    Query: qdrant.NewQueryRecommend(&qdrant.RecommendInput{
        Positive: []*qdrant.VectorInput{
            qdrant.NewVectorInputID(qdrant.NewIDNum(100)),
            qdrant.NewVectorInputID(qdrant.NewIDNum(231)),
        },
        Negative: []*qdrant.VectorInput{
            qdrant.NewVectorInputID(qdrant.NewIDNum(718)),
        },
    }),
    Using: qdrant.PtrOf("image"),
    LookupFrom: &qdrant.LookupLocation{
        CollectionName: "{external_collection_name}",
        VectorName:     qdrant.PtrOf("{external_vector_name}"),
    },
})

向量通过positivenegative列表中提供的ID从外部集合中检索。 然后,这些向量用于在当前集合中执行推荐,与“using”或默认向量进行比较。

批量推荐API

自 v0.10.0 起可用

与批量搜索API在用途和优势上相似,它能够实现推荐请求的批量处理。

POST /collections/{collection_name}/query/batch
{
  "searches": [
    {
      "query": {
        "recommend": {
          "positive": [100, 231],
          "negative": [718]
        }
      },
      "filter": {
        "must": [
          {
            "key": "city",
            "match": {
              "value": "London"
            }
          }
        ]
      },
      "limit": 10
    },
    {
      "query": {
        "recommend": {
          "positive": [200, 67],
          "negative": [300]
        }
      },
      "filter": {
        "must": [
          {
            "key": "city",
            "match": {
              "value": "London"
            }
          }
        ]
      },
      "limit": 10
    }
  ]
}
from qdrant_client import QdrantClient, models

client = QdrantClient(url="http://localhost:6333")

filter_ = models.Filter(
    must=[
        models.FieldCondition(
            key="city",
            match=models.MatchValue(
                value="London",
            ),
        )
    ]
)

recommend_queries = [
    models.QueryRequest(
        query=models.RecommendQuery(
            recommend=models.RecommendInput(positive=[100, 231], negative=[718])
        ),
        filter=filter_,
        limit=3,
    ),
    models.QueryRequest(
        query=models.RecommendQuery(
            recommend=models.RecommendInput(positive=[200, 67], negative=[300])
        ),
        filter=filter_,
        limit=3,
    ),
]

client.query_batch_points(
    collection_name="{collection_name}", requests=recommend_queries
)
import { QdrantClient } from "@qdrant/js-client-rest";

const client = new QdrantClient({ host: "localhost", port: 6333 });

const filter = {
    must: [
        {
            key: "city",
            match: {
                value: "London",
            },
        },
    ],
};

const searches = [
    {
        query: {
            recommend: {
                positive: [100, 231],
                negative: [718]
            }
        },
        filter,
        limit: 3,
    },
    {
        query: {
            recommend: {
                positive: [200, 67],
                negative: [300]
            }
        },
        filter,
        limit: 3,
    },
];

client.queryBatch("{collection_name}", {
    searches,
});
use qdrant_client::qdrant::{
    Condition, Filter, QueryBatchPointsBuilder, QueryPointsBuilder,
    RecommendInputBuilder,
};
use qdrant_client::Qdrant;

let client = Qdrant::from_url("http://localhost:6334").build()?;

let filter = Filter::must([Condition::matches("city", "London".to_string())]);

let recommend_queries = vec![
    QueryPointsBuilder::new("{collection_name}")
        .query(
            RecommendInputBuilder::default()
                .add_positive(100)
                .add_positive(231)
                .add_negative(718)
                .build(),
        )
        .filter(filter.clone())
        .build(),
    QueryPointsBuilder::new("{collection_name}")
        .query(
            RecommendInputBuilder::default()
                .add_positive(200)
                .add_positive(67)
                .add_negative(300)
                .build(),
        )
        .filter(filter)
        .build(),
];

client
    .query_batch(QueryBatchPointsBuilder::new(
        "{collection_name}",
        recommend_queries,
    ))
    .await?;
import java.util.List;

import io.qdrant.client.QdrantClient;
import io.qdrant.client.QdrantGrpcClient;
import io.qdrant.client.grpc.Points.Filter;
import io.qdrant.client.grpc.Points.QueryPoints;
import io.qdrant.client.grpc.Points.RecommendInput;

import static io.qdrant.client.ConditionFactory.matchKeyword;
import static io.qdrant.client.VectorInputFactory.vectorInput;
import static io.qdrant.client.QueryFactory.recommend;

QdrantClient client =
    new QdrantClient(QdrantGrpcClient.newBuilder("localhost", 6334, false).build());

Filter filter = Filter.newBuilder().addMust(matchKeyword("city", "London")).build();

List<QueryPoints> recommendQueries = List.of(
        QueryPoints.newBuilder()
                .setCollectionName("{collection_name}")
                .setQuery(recommend(
                        RecommendInput.newBuilder()
                                .addAllPositive(List.of(vectorInput(100), vectorInput(231)))
                                .addAllNegative(List.of(vectorInput(731)))
                                .build()))
                .setFilter(filter)
                .setLimit(3)
                .build(),
        QueryPoints.newBuilder()
                .setCollectionName("{collection_name}")
                .setQuery(recommend(
                        RecommendInput.newBuilder()
                                .addAllPositive(List.of(vectorInput(200), vectorInput(67)))
                                .addAllNegative(List.of(vectorInput(300)))
                                .build()))
                .setFilter(filter)
                .setLimit(3)
                .build());
                
client.queryBatchAsync("{collection_name}", recommendQueries).get();
using Qdrant.Client;
using Qdrant.Client.Grpc;
using static Qdrant.Client.Grpc.Conditions;

var client = new QdrantClient("localhost", 6334);

var filter = MatchKeyword("city", "london");

await client.QueryBatchAsync(
    collectionName: "{collection_name}",
    queries:
    [
        new QueryPoints()
        {
            CollectionName = "{collection_name}",
            Query = new RecommendInput {
                Positive = { 100, 231 },
                Negative = { 718 },
            },
            Limit = 3,
            Filter = filter,
        },
                new QueryPoints()
        {
            CollectionName = "{collection_name}",
            Query = new RecommendInput {
                Positive = { 200, 67 },
                Negative = { 300 },
            },
            Limit = 3,
            Filter = filter,
        }
    ]
);
import (
    "context"

    "github.com/qdrant/go-client/qdrant"
)

client, err := qdrant.NewClient(&qdrant.Config{
    Host: "localhost",
    Port: 6334,
})

filter := qdrant.Filter{
    Must: []*qdrant.Condition{
        qdrant.NewMatch("city", "London"),
    },
}
client.QueryBatch(context.Background(), &qdrant.QueryBatchPoints{
    CollectionName: "{collection_name}",
    QueryPoints: []*qdrant.QueryPoints{
        {
            CollectionName: "{collection_name}",
            Query: qdrant.NewQueryRecommend(&qdrant.RecommendInput{
                Positive: []*qdrant.VectorInput{
                    qdrant.NewVectorInputID(qdrant.NewIDNum(100)),
                    qdrant.NewVectorInputID(qdrant.NewIDNum(231)),
                },
                Negative: []*qdrant.VectorInput{
                    qdrant.NewVectorInputID(qdrant.NewIDNum(718)),
                },
            },
            ),
            Filter: &filter,
        },
        {
            CollectionName: "{collection_name}",
            Query: qdrant.NewQueryRecommend(&qdrant.RecommendInput{
                Positive: []*qdrant.VectorInput{
                    qdrant.NewVectorInputID(qdrant.NewIDNum(200)),
                    qdrant.NewVectorInputID(qdrant.NewIDNum(67)),
                },
                Negative: []*qdrant.VectorInput{
                    qdrant.NewVectorInputID(qdrant.NewIDNum(300)),
                },
            },
            ),
            Filter: &filter,
        },
    },
},
)

此API的结果包含每个推荐请求的一个数组。

{
  "result": [
    [
        { "id": 10, "score": 0.81 },
        { "id": 14, "score": 0.75 },
        { "id": 11, "score": 0.73 }
    ],
    [
        { "id": 1, "score": 0.92 },
        { "id": 3, "score": 0.89 },
        { "id": 9, "score": 0.75 }
    ]
  ],
  "status": "ok",
  "time": 0.001
}

发现API

自v1.7版本起可用

REST API 模式定义可在此处获取 这里

在此API中,Qdrant引入了context的概念,用于分割空间。上下文是一组正负对,每对将空间划分为正区和负区。在这种模式下,搜索操作根据点属于多少个正区(或它们避免了多少个负区)来优先选择点。

提供上下文的接口类似于推荐API(ids或原始向量)。然而,在这种情况下,它们需要以正负对的形式提供。

发现API允许您进行两种新类型的搜索:

  • 发现搜索: 使用上下文(正负向量对)和目标来返回与目标更相似的点,但受上下文约束。
  • 上下文搜索:仅使用上下文对,获取位于最佳区域的点,其中损失最小化

正例和负例在上下文对中的排列方式完全由您决定。因此,您可以根据模型和数据灵活尝试不同的排列技术。

这种类型的搜索特别适合结合多模态、向量约束的搜索。Qdrant已经对过滤器有广泛的支持,这些过滤器基于其有效载荷来约束搜索,但使用发现搜索,您还可以约束执行搜索的向量空间。

Discovery search

发现分数的公式可以表示为:

$$ \text{rank}(v^+, v^-) = \begin{cases} 1, &\quad s(v^+) \geq s(v^-) \\ -1, &\quad s(v^+) < s(v^-) \end{cases} $$ 其中 $v^+$ 代表正例,$v^-$ 代表负例,$s(v)$ 是向量 $v$ 与目标向量的相似度分数。然后发现分数计算如下: $$ \text{发现分数} = \text{sigmoid}(s(v_t))+ \sum \text{rank}(v_i^+, v_i^-), $$ 其中 $s(v)$ 是相似度函数,$v_t$ 是目标向量,$v_i^+$ 和 $v_i^-$ 分别是正例和负例。sigmoid 函数用于将分数归一化到 0 和 1 之间,而秩的和用于惩罚那些更接近负例而不是正例的向量。换句话说,个体秩的总和决定了一个点位于多少个正区域中,而接近层次则是次要的。

示例:

POST /collections/{collection_name}/points/query
{
  "query": {
    "discover": {
      "target": [0.2, 0.1, 0.9, 0.7],
      "context": [
        {
          "positive": 100,
          "negative": 718
        },
        {
          "positive": 200,
          "negative": 300
        }
      ]
    }
  },
  "limit": 10
}
from qdrant_client import QdrantClient, models

client = QdrantClient(url="http://localhost:6333")

discover_queries = [
    models.QueryRequest(
        query=models.DiscoverQuery(
            discover=models.DiscoverInput(
                target=[0.2, 0.1, 0.9, 0.7],
                context=[
                    models.ContextPair(
                        positive=100,
                        negative=718,
                    ),
                    models.ContextPair(
                        positive=200,
                        negative=300,
                    ),
                ],
            )
        ),
        limit=10,
    ),
]

client.query_batch_points(
    collection_name="{collection_name}", requests=discover_queries
)
import { QdrantClient } from "@qdrant/js-client-rest";

const client = new QdrantClient({ host: "localhost", port: 6333 });

client.query("{collection_name}", {
    query: {
        discover: {
            target: [0.2, 0.1, 0.9, 0.7],
            context: [
                {
                    positive: 100,
                    negative: 718,
                },
                {
                    positive: 200,
                    negative: 300,
                },
            ],
        }
    },
    limit: 10,
});
use qdrant_client::qdrant::{ContextInputBuilder, DiscoverInputBuilder, QueryPointsBuilder};
use qdrant_client::Qdrant;

client
    .query(
        QueryPointsBuilder::new("{collection_name}").query(
            DiscoverInputBuilder::new(
                vec![0.2, 0.1, 0.9, 0.7],
                ContextInputBuilder::default()
                    .add_pair(100, 718)
                    .add_pair(200, 300),
            )
            .build(),
        ),
    )
    .await?;
import java.util.List;

import io.qdrant.client.QdrantClient;
import io.qdrant.client.QdrantGrpcClient;
import io.qdrant.client.grpc.Points.ContextInput;
import io.qdrant.client.grpc.Points.ContextInputPair;
import io.qdrant.client.grpc.Points.DiscoverInput;
import io.qdrant.client.grpc.Points.QueryPoints;

import static io.qdrant.client.VectorInputFactory.vectorInput;
import static io.qdrant.client.QueryFactory.discover;

QdrantClient client =
    new QdrantClient(QdrantGrpcClient.newBuilder("localhost", 6334, false).build());

client.queryAsync(QueryPoints.newBuilder()
        .setCollectionName("{collection_name}")
        .setQuery(discover(DiscoverInput.newBuilder()
                .setTarget(vectorInput(0.2f, 0.1f, 0.9f, 0.7f))
                .setContext(ContextInput.newBuilder()
                        .addAllPairs(List.of(
                                ContextInputPair.newBuilder()
                                        .setPositive(vectorInput(100))
                                        .setNegative(vectorInput(718))
                                        .build(),
                                ContextInputPair.newBuilder()
                                        .setPositive(vectorInput(200))
                                        .setNegative(vectorInput(300))
                                        .build()))
                        .build())
                .build()))
        .setLimit(10)
        .build()).get();
using Qdrant.Client;
using Qdrant.Client.Grpc;

var client = new QdrantClient("localhost", 6334);

await client.QueryAsync(
    collectionName: "{collection_name}",
    query: new DiscoverInput {
        Target = new float[] { 0.2f, 0.1f, 0.9f, 0.7f },
        Context = new ContextInput {
            Pairs = {
                new ContextInputPair {
                    Positive = 100,
                    Negative = 718
                },
                new ContextInputPair {
                    Positive = 200,
                    Negative = 300
                },
            }   
        },
    },
    limit: 10
);
import (
    "context"

    "github.com/qdrant/go-client/qdrant"
)

client, err := qdrant.NewClient(&qdrant.Config{
    Host: "localhost",
    Port: 6334,
})

client.Query(context.Background(), &qdrant.QueryPoints{
    CollectionName: "{collection_name}",
    Query: qdrant.NewQueryDiscover(&qdrant.DiscoverInput{
        Target: qdrant.NewVectorInput(0.2, 0.1, 0.9, 0.7),
        Context: &qdrant.ContextInput{
            Pairs: []*qdrant.ContextInputPair{
                {
                    Positive: qdrant.NewVectorInputID(qdrant.NewIDNum(100)),
                    Negative: qdrant.NewVectorInputID(qdrant.NewIDNum(718)),
                },
                {
                    Positive: qdrant.NewVectorInputID(qdrant.NewIDNum(200)),
                    Negative: qdrant.NewVectorInputID(qdrant.NewIDNum(300)),
                },
            },
        },
    }),
})

相反,在没有目标的情况下,当使用像HNSW这样的邻近图时,一个严格的逐整数函数并不能为搜索提供太多指导。相反,上下文搜索采用了一种源自三重损失概念的函数,该函数通常在模型训练期间应用。对于上下文搜索,此函数被调整为引导搜索朝向负例较少的区域。

Context search

我们可以直接将评分函数与损失函数关联起来,其中0.0是一个点可以拥有的最高分数,这意味着它只存在于正区域。一旦一个点更接近负样本,它的损失将简单地是正相似度和负相似度的差值。

$$ \text{context score} = \sum \min(s(v^+_i) - s(v^-_i), 0.0) $$

其中 $v^+_i$ 和 $v^-_i$ 是每对的正例和负例,$s(v)$ 是相似度函数。

使用这种搜索方式,你可以预期输出不一定围绕一个单一的点,而是任何不接近负面例子的点,这会产生一个受限的多样化结果。因此,即使API没有被调用recommend,推荐系统也可以使用这种方法,并根据其特定用例进行调整。

示例:

POST /collections/{collection_name}/points/query
{
  "query": {
    "context": [
      {
        "positive": 100,
        "negative": 718
      },
      {
        "positive": 200,
        "negative": 300
      }
    ]
  },
  "limit": 10
}
from qdrant_client import QdrantClient, models

client = QdrantClient(url="http://localhost:6333")

discover_queries = [
    models.QueryRequest(
        query=models.ContextQuery(
            context=[
                models.ContextPair(
                    positive=100,
                    negative=718,
                ),
                models.ContextPair(
                    positive=200,
                    negative=300,
                ),
            ],
        ),
        limit=10,
    ),
]

client.query_batch_points(
    collection_name="{collection_name}", requests=discover_queries
)
import { QdrantClient } from "@qdrant/js-client-rest";

const client = new QdrantClient({ host: "localhost", port: 6333 });

client.query("{collection_name}", {
    query: {
        context: [
            {
                positive: 100,
                negative: 718,
            },
            {
                positive: 200,
                negative: 300,
            },
        ]
    },
    limit: 10,
});
use qdrant_client::qdrant::{ContextInputBuilder, QueryPointsBuilder};
use qdrant_client::Qdrant;

let client = Qdrant::from_url("http://localhost:6334").build()?;

client
    .query(
        QueryPointsBuilder::new("{collection_name}").query(
            ContextInputBuilder::default()
                .add_pair(100, 718)
                .add_pair(200, 300)
                .build(),
        ),
    )
    .await?;
import java.util.List;

import io.qdrant.client.QdrantClient;
import io.qdrant.client.QdrantGrpcClient;
import io.qdrant.client.grpc.Points.ContextInput;
import io.qdrant.client.grpc.Points.ContextInputPair;
import io.qdrant.client.grpc.Points.QueryPoints;

import static io.qdrant.client.VectorInputFactory.vectorInput;
import static io.qdrant.client.QueryFactory.context;

QdrantClient client =
    new QdrantClient(QdrantGrpcClient.newBuilder("localhost", 6334, false).build());

client.queryAsync(QueryPoints.newBuilder()
        .setCollectionName("{collection_name}")
        .setQuery(context(ContextInput.newBuilder()
                .addAllPairs(List.of(
                        ContextInputPair.newBuilder()
                                .setPositive(vectorInput(100))
                                .setNegative(vectorInput(718))
                                .build(),
                        ContextInputPair.newBuilder()
                                .setPositive(vectorInput(200))
                                .setNegative(vectorInput(300))
                                .build()))
                .build()))
        .setLimit(10)
        .build()).get();
using Qdrant.Client;
using Qdrant.Client.Grpc;

var client = new QdrantClient("localhost", 6334);

await client.QueryAsync(
  collectionName: "{collection_name}",
  query: new ContextInput {
    Pairs = {
      new ContextInputPair {
        Positive = 100,
          Negative = 718
      },
      new ContextInputPair {
        Positive = 200,
          Negative = 300
      },
    }
  },
  limit: 10
);
import (
    "context"

    "github.com/qdrant/go-client/qdrant"
)

client, err := qdrant.NewClient(&qdrant.Config{
    Host: "localhost",
    Port: 6334,
})

client.Query(context.Background(), &qdrant.QueryPoints{
    CollectionName: "{collection_name}",
    Query: qdrant.NewQueryContext(&qdrant.ContextInput{
        Pairs: []*qdrant.ContextInputPair{
            {
                Positive: qdrant.NewVectorInputID(qdrant.NewIDNum(100)),
                Negative: qdrant.NewVectorInputID(qdrant.NewIDNum(718)),
            },
            {
                Positive: qdrant.NewVectorInputID(qdrant.NewIDNum(200)),
                Negative: qdrant.NewVectorInputID(qdrant.NewIDNum(300)),
            },
        },
    }),
})

距离矩阵

自 v1.12.0 版本起可用

距离矩阵API允许计算采样向量对之间的距离,并将结果作为稀疏矩阵返回。

这样的API支持新的数据探索用例,例如聚类相似向量、连接的可视化或降维。

API输入请求由以下参数组成:

  • sample: 要采样的向量数量
  • limit: 每个样本返回的分数数量
  • filter: 应用于约束样本的过滤器

让我们来看一个基本的例子,使用 sample=100, limit=10:

引擎首先从集合中选择100个随机点,然后对于每个选中的点,它将计算样本中内部10个最近点。

这将导致总共1000个分数表示为稀疏矩阵,以便进行高效处理。

距离矩阵API提供了两种输出格式,以便于与不同工具的集成。

成对格式

返回距离矩阵作为点ids及其相应得分的对列表。

POST /collections/{collection_name}/points/search/matrix/pairs
{
    "sample": 10,
    "limit": 2,
    "filter": {
        "must": {
            "key": "color",
            "match": { "value": "red" }
        }
    }  
}
from qdrant_client import QdrantClient, models

client = QdrantClient(url="http://localhost:6333")

client.search_matrix_pairs(
    collection_name="{collection_name}",
    sample=10,
    limit=2,
    query_filter=models.Filter(
        must=[
            models.FieldCondition(
                key="color", match=models.MatchValue(value="red")
            ),
        ]
    ),
)
import static io.qdrant.client.ConditionFactory.matchKeyword;

import io.qdrant.client.QdrantClient;
import io.qdrant.client.QdrantGrpcClient;
import io.qdrant.client.grpc.Points.Filter;
import io.qdrant.client.grpc.Points.SearchMatrixPoints;

QdrantClient client =
    new QdrantClient(QdrantGrpcClient.newBuilder("localhost", 6334, false).build());

client
    .searchMatrixPairsAsync(
        Points.SearchMatrixPoints.newBuilder()
            .setCollectionName("{collection_name}")
            .setFilter(Filter.newBuilder().addMust(matchKeyword("color", "red")).build())
            .setSample(10)
            .setLimit(2)
            .build())
    .get();
use qdrant_client::qdrant::{Condition, Filter, SearchMatrixPointsBuilder};
use qdrant_client::Qdrant;

client
    .search_matrix_pairs(
        SearchMatrixPointsBuilder::new("collection_name")
           .filter(Filter::must(vec![Condition::matches(
               "color",
               "red".to_string(),
           )]))
           .sample(10)
           .limit(2),
    )
    .await?;
import { QdrantClient } from "@qdrant/js-client-rest";

const client = new QdrantClient({ host: "localhost", port: 6333 });

client.searchMatrixPairs("{collection_name}", {
    filter: {
        must: [
            {
                key: "color",
                match: {
                    value: "red",
                },
            },
        ],
    },
    sample: 10,
    limit: 2,
});
using Qdrant.Client;
using Qdrant.Client.Grpc;
using static Qdrant.Client.Grpc.Conditions;

var client = new QdrantClient("localhost", 6334);

await client.SearchMatrixPairsAsync(
    collectionName: "{collection_name}",
    filter: MatchKeyword("color", "red"),
    sample: 10,
    limit: 2
);
import (
    "context"

    "github.com/qdrant/go-client/qdrant"
)

client, err := qdrant.NewClient(&qdrant.Config{
    Host: "localhost",
    Port: 6334,
})

sample := uint64(10)
limit := uint64(2)
res, err := client.SearchMatrixPairs(ctx, &qdrant.SearchMatrixPoints{
    CollectionName: "{collection_name}",
    Sample:         &sample,
    Limit:          &limit,
    Filter: &qdrant.Filter{
        Must: []*qdrant.Condition{
            qdrant.NewMatch("color", "red"),
        },
    },
})

返回

{
    "result": {
        "pairs": [
            {"a": 1, "b": 3, "score": 1.4063001},
            {"a": 1, "b": 4, "score": 1.2531},
            {"a": 2, "b": 1, "score": 1.1550001},
            {"a": 2, "b": 8, "score": 1.1359},
            {"a": 3, "b": 1, "score": 1.4063001},
            {"a": 3, "b": 4, "score": 1.2218001},
            {"a": 4, "b": 1, "score": 1.2531},
            {"a": 4, "b": 3, "score": 1.2218001},
            {"a": 5, "b": 3, "score": 0.70239997},
            {"a": 5, "b": 1, "score": 0.6146},
            {"a": 6, "b": 3, "score": 0.6353},
            {"a": 6, "b": 4, "score": 0.5093},
            {"a": 7, "b": 3, "score": 1.0990001},
            {"a": 7, "b": 1, "score": 1.0349001},
            {"a": 8, "b": 2, "score": 1.1359},
            {"a": 8, "b": 3, "score": 1.0553}
        ]
    }
}

偏移格式

返回距离矩阵为四个数组:

  • offsets_rowoffsets_col,表示矩阵中非零距离值的位置。
  • scores 包含距离值。
  • ids 包含与距离值对应的点ID。
POST /collections/{collection_name}/points/search/matrix/offsets
{
    "sample": 10,
    "limit": 2,
    "filter": {
        "must": {
            "key": "color",
            "match": { "value": "red" }
        }
    }
}
from qdrant_client import QdrantClient, models

client = QdrantClient(url="http://localhost:6333")

client.search_matrix_offsets(
    collection_name="{collection_name}",
    sample=10,
    limit=2,
    query_filter=models.Filter(
        must=[
            models.FieldCondition(
                key="color", match=models.MatchValue(value="red")
            ),
        ]
    ),
)
import static io.qdrant.client.ConditionFactory.matchKeyword;

import io.qdrant.client.QdrantClient;
import io.qdrant.client.QdrantGrpcClient;
import io.qdrant.client.grpc.Points.Filter;
import io.qdrant.client.grpc.Points.SearchMatrixPoints;

QdrantClient client =
    new QdrantClient(QdrantGrpcClient.newBuilder("localhost", 6334, false).build());

client
    .searchMatrixOffsetsAsync(
        SearchMatrixPoints.newBuilder()
            .setCollectionName("{collection_name}")
            .setFilter(Filter.newBuilder().addMust(matchKeyword("color", "red")).build())
            .setSample(10)
            .setLimit(2)
            .build())
    .get();
use qdrant_client::qdrant::{Condition, Filter, SearchMatrixPointsBuilder};
use qdrant_client::Qdrant;

client
    .search_matrix_offsets(
        SearchMatrixPointsBuilder::new("collection_name")
           .filter(Filter::must(vec![Condition::matches(
               "color",
               "red".to_string(),
           )]))
           .sample(10)
           .limit(2),
    )
    .await?;
import { QdrantClient } from "@qdrant/js-client-rest";

const client = new QdrantClient({ host: "localhost", port: 6333 });

client.searchMatrixOffsets("{collection_name}", {
    filter: {
        must: [
            {
                key: "color",
                match: {
                    value: "red",
                },
            },
        ],
    },
    sample: 10,
    limit: 2,
});
using Qdrant.Client;
using Qdrant.Client.Grpc;
using static Qdrant.Client.Grpc.Conditions;

var client = new QdrantClient("localhost", 6334);

await client.SearchMatrixOffsetsAsync(
    collectionName: "{collection_name}",
    filter: MatchKeyword("color", "red"),
    sample: 10,
    limit: 2
);
import (
    "context"

    "github.com/qdrant/go-client/qdrant"
)

client, err := qdrant.NewClient(&qdrant.Config{
    Host: "localhost",
    Port: 6334,
})

sample := uint64(10)
limit := uint64(2)
res, err := client.SearchMatrixOffsets(ctx, &qdrant.SearchMatrixPoints{
    CollectionName: "{collection_name}",
    Sample:         &sample,
    Limit:          &limit,
    Filter: &qdrant.Filter{
        Must: []*qdrant.Condition{
            qdrant.NewMatch("color", "red"),
        },
    },
})

返回

{
    "result": {
        "offsets_row": [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7],
        "offsets_col": [2, 3, 0, 7, 0, 3, 0, 2, 2, 0, 2, 3, 2, 0, 1, 2],
        "scores": [
            1.4063001, 1.2531, 1.1550001, 1.1359, 1.4063001,
            1.2218001, 1.2531, 1.2218001, 0.70239997, 0.6146, 0.6353,
            0.5093, 1.0990001, 1.0349001, 1.1359, 1.0553
            ],
        "ids": [1, 2, 3, 4, 5, 6, 7, 8]
    }
}
这个页面有用吗?

感谢您的反馈!🙏

我们很抱歉听到这个消息。😔 你可以在GitHub上编辑这个页面,或者创建一个GitHub问题。