探索数据
在掌握了搜索中的概念后,您可以开始以其他方式探索您的数据。Qdrant 提供了一系列 API,允许您以不同的方式找到相似的向量,以及找到最不相似的向量。这些是推荐系统、数据探索和数据清理的有用工具。
推荐API
除了常规搜索外,Qdrant 还允许您基于多个正面和负面示例进行搜索。该 API 称为 recommend,示例可以是点 ID,以便您可以利用已经编码的对象;并且,从 v1.6 开始,您还可以使用原始向量作为输入,这样您可以在不将其作为点上传的情况下动态创建向量。
REST API - API 模式定义可在此处 这里 获取
POST /collections/{collection_name}/points/query
{
"query": {
"recommend": {
"positive": [100, 231],
"negative": [718, [0.2, 0.3, 0.4, 0.5]],
"strategy": "average_vector"
}
},
"filter": {
"must": [
{
"key": "city",
"match": {
"value": "London"
}
}
]
}
}
from qdrant_client import QdrantClient, models
client = QdrantClient(url="http://localhost:6333")
client.query_points(
collection_name="{collection_name}",
query=models.RecommendQuery(
recommend=models.RecommendInput(
positive=[100, 231],
negative=[718, [0.2, 0.3, 0.4, 0.5]],
strategy=models.RecommendStrategy.AVERAGE_VECTOR,
)
),
query_filter=models.Filter(
must=[
models.FieldCondition(
key="city",
match=models.MatchValue(
value="London",
),
)
]
),
limit=3,
)
import { QdrantClient } from "@qdrant/js-client-rest";
const client = new QdrantClient({ host: "localhost", port: 6333 });
client.query("{collection_name}", {
query: {
recommend: {
positive: [100, 231],
negative: [718, [0.2, 0.3, 0.4, 0.5]],
strategy: "average_vector"
}
},
filter: {
must: [
{
key: "city",
match: {
value: "London",
},
},
],
},
limit: 3
});
use qdrant_client::qdrant::{
Condition, Filter, QueryPointsBuilder, RecommendInputBuilder, RecommendStrategy,
};
use qdrant_client::Qdrant;
let client = Qdrant::from_url("http://localhost:6334").build()?;
client
.query(
QueryPointsBuilder::new("{collection_name}")
.query(
RecommendInputBuilder::default()
.add_positive(100)
.add_positive(231)
.add_positive(vec![0.2, 0.3, 0.4, 0.5])
.add_negative(718)
.strategy(RecommendStrategy::AverageVector)
.build(),
)
.limit(3)
.filter(Filter::must([Condition::matches(
"city",
"London".to_string(),
)])),
)
.await?;
import java.util.List;
import io.qdrant.client.QdrantClient;
import io.qdrant.client.QdrantGrpcClient;
import io.qdrant.client.grpc.Points.QueryPoints;
import io.qdrant.client.grpc.Points.RecommendInput;
import io.qdrant.client.grpc.Points.RecommendStrategy;
import io.qdrant.client.grpc.Points.Filter;
import static io.qdrant.client.ConditionFactory.matchKeyword;
import static io.qdrant.client.VectorInputFactory.vectorInput;
import static io.qdrant.client.QueryFactory.recommend;
QdrantClient client =
new QdrantClient(QdrantGrpcClient.newBuilder("localhost", 6334, false).build());
client.queryAsync(QueryPoints.newBuilder()
.setCollectionName("{collection_name}")
.setQuery(recommend(RecommendInput.newBuilder()
.addAllPositive(List.of(vectorInput(100), vectorInput(200), vectorInput(100.0f, 231.0f)))
.addAllNegative(List.of(vectorInput(718), vectorInput(0.2f, 0.3f, 0.4f, 0.5f)))
.setStrategy(RecommendStrategy.AverageVector)
.build()))
.setFilter(Filter.newBuilder().addMust(matchKeyword("city", "London")))
.setLimit(3)
.build()).get();
using Qdrant.Client;
using Qdrant.Client.Grpc;
using static Qdrant.Client.Grpc.Conditions;
var client = new QdrantClient("localhost", 6334);
await client.QueryAsync(
collectionName: "{collection_name}",
query: new RecommendInput {
Positive = { 100, 231 },
Negative = { 718 }
},
filter: MatchKeyword("city", "London"),
limit: 3
);
import (
"context"
"github.com/qdrant/go-client/qdrant"
)
client, err := qdrant.NewClient(&qdrant.Config{
Host: "localhost",
Port: 6334,
})
client.Query(context.Background(), &qdrant.QueryPoints{
CollectionName: "{collection_name}",
Query: qdrant.NewQueryRecommend(&qdrant.RecommendInput{
Positive: []*qdrant.VectorInput{
qdrant.NewVectorInputID(qdrant.NewIDNum(100)),
qdrant.NewVectorInputID(qdrant.NewIDNum(231)),
},
Negative: []*qdrant.VectorInput{
qdrant.NewVectorInputID(qdrant.NewIDNum(718)),
},
}),
Filter: &qdrant.Filter{
Must: []*qdrant.Condition{
qdrant.NewMatch("city", "London"),
},
},
})
此API的示例结果将是
{
"result": [
{ "id": 10, "score": 0.81 },
{ "id": 14, "score": 0.75 },
{ "id": 11, "score": 0.73 }
],
"status": "ok",
"time": 0.001
}
用于获取推荐的算法是从可用的strategy选项中选择的。每种算法都有其自身的优缺点,因此请进行实验并选择最适合您情况的算法。
平均向量策略
默认且第一个添加到Qdrant的策略称为average_vector。它预处理输入示例以创建一个用于搜索的单一向量。由于预处理步骤非常快,该策略的性能与常规搜索相当。这种推荐背后的直觉是,每个向量分量代表数据的一个独立特征,因此,通过对示例进行平均,我们应该能得到一个很好的推荐。
生成搜索向量的方法是首先分别对所有正例和负例进行平均,然后使用以下公式将它们组合成一个单一向量:
avg_positive + avg_positive - avg_negative
在没有负例的情况下,搜索向量将简单地等于avg_positive。
这是将隐式设置的默认策略,但您可以通过在推荐请求中设置"strategy": "average_vector"来显式定义它。
最佳得分策略
自 v1.6.0 起可用
在v1.6中引入的新策略称为best_score。它基于这样一种理念:找到相似向量的最佳方法是找到那些更接近正例的向量,同时避免那些更接近负例的向量。
它的工作方式是每个候选向量都会与每个示例进行对比,然后我们选择最佳的正例得分和最佳的负例得分。最终得分通过以下步骤公式选择:
let score = if best_positive_score > best_negative_score {
best_positive_score
} else {
-(best_negative_score * best_negative_score)
};
由于我们在搜索的每一步都计算与每个示例的相似性,因此此策略的性能将受到示例数量的线性影响。这意味着您提供的示例越多,搜索速度就越慢。然而,这种策略可能非常强大,并且应该更加与嵌入无关。
要使用此算法,您需要在推荐请求中设置 "strategy": "best_score"。
仅使用负例
best_score 策略的一个有益副作用是,你可以仅使用负样本来使用它。这将允许你找到与你提供的向量最不相似的向量。这对于在数据中查找异常值,或者找到与给定向量最不相似的向量非常有用。
将仅包含负面示例与过滤结合使用,可以成为数据探索和清理的强大工具。
多个向量
自 v0.10.0 起可用
如果集合是使用多个向量创建的,则应在推荐请求中指定向量的名称:
POST /collections/{collection_name}/points/query
{
"query": {
"recommend": {
"positive": [100, 231],
"negative": [718]
}
},
"using": "image",
"limit": 10
}
client.query_points(
collection_name="{collection_name}",
query=models.RecommendQuery(
recommend=models.RecommendInput(
positive=[100, 231],
negative=[718],
)
),
using="image",
limit=10,
)
client.query("{collection_name}", {
query: {
recommend: {
positive: [100, 231],
negative: [718],
}
},
using: "image",
limit: 10
});
use qdrant_client::qdrant::{QueryPointsBuilder, RecommendInputBuilder};
client
.query(
QueryPointsBuilder::new("{collection_name}")
.query(
RecommendInputBuilder::default()
.add_positive(100)
.add_positive(231)
.add_negative(718)
.build(),
)
.limit(10)
.using("image"),
)
.await?;
import java.util.List;
import io.qdrant.client.grpc.Points.QueryPoints;
import io.qdrant.client.grpc.Points.RecommendInput;
import static io.qdrant.client.VectorInputFactory.vectorInput;
import static io.qdrant.client.QueryFactory.recommend;
client.queryAsync(QueryPoints.newBuilder()
.setCollectionName("{collection_name}")
.setQuery(recommend(RecommendInput.newBuilder()
.addAllPositive(List.of(vectorInput(100), vectorInput(231)))
.addAllNegative(List.of(vectorInput(718)))
.build()))
.setUsing("image")
.setLimit(10)
.build()).get();
using Qdrant.Client;
using Qdrant.Client.Grpc;
var client = new QdrantClient("localhost", 6334);
await client.QueryAsync(
collectionName: "{collection_name}",
query: new RecommendInput {
Positive = { 100, 231 },
Negative = { 718 }
},
usingVector: "image",
limit: 10
);
import (
"context"
"github.com/qdrant/go-client/qdrant"
)
client, err := qdrant.NewClient(&qdrant.Config{
Host: "localhost",
Port: 6334,
})
client.Query(context.Background(), &qdrant.QueryPoints{
CollectionName: "{collection_name}",
Query: qdrant.NewQueryRecommend(&qdrant.RecommendInput{
Positive: []*qdrant.VectorInput{
qdrant.NewVectorInputID(qdrant.NewIDNum(100)),
qdrant.NewVectorInputID(qdrant.NewIDNum(231)),
},
Negative: []*qdrant.VectorInput{
qdrant.NewVectorInputID(qdrant.NewIDNum(718)),
},
}),
Using: qdrant.PtrOf("image"),
})
参数 using 指定用于推荐的存储向量。
从另一个集合中查找向量
自 v0.11.6 版本起可用
如果您有具有相同维度的向量集合,并且您希望基于另一个集合的向量在一个集合中查找推荐,您可以使用lookup_from参数。
这可能很有用,例如在项目到用户的推荐场景中。 尽管用户和项目的嵌入具有相同的向量参数(距离类型和维度),但它们通常存储在不同的集合中。
POST /collections/{collection_name}/points/query
{
"query": {
"recommend": {
"positive": [100, 231],
"negative": [718]
}
},
"limit": 10,
"lookup_from": {
"collection": "{external_collection_name}",
"vector": "{external_vector_name}"
}
}
client.query_points(
collection_name="{collection_name}",
query=models.RecommendQuery(
recommend=models.RecommendInput(
positive=[100, 231],
negative=[718],
)
),
using="image",
limit=10,
lookup_from=models.LookupLocation(
collection="{external_collection_name}", vector="{external_vector_name}"
),
)
client.query("{collection_name}", {
query: {
recommend: {
positive: [100, 231],
negative: [718],
}
},
using: "image",
limit: 10,
lookup_from: {
collection: "{external_collection_name}",
vector: "{external_vector_name}"
}
});
use qdrant_client::qdrant::{LookupLocationBuilder, QueryPointsBuilder, RecommendInputBuilder};
client
.query(
QueryPointsBuilder::new("{collection_name}")
.query(
RecommendInputBuilder::default()
.add_positive(100)
.add_positive(231)
.add_negative(718)
.build(),
)
.limit(10)
.using("image")
.lookup_from(
LookupLocationBuilder::new("{external_collection_name}")
.vector_name("{external_vector_name}"),
),
)
.await?;
import java.util.List;
import io.qdrant.client.grpc.Points.LookupLocation;
import io.qdrant.client.grpc.Points.QueryPoints;
import io.qdrant.client.grpc.Points.RecommendInput;
import static io.qdrant.client.VectorInputFactory.vectorInput;
import static io.qdrant.client.QueryFactory.recommend;
client.queryAsync(QueryPoints.newBuilder()
.setCollectionName("{collection_name}")
.setQuery(recommend(RecommendInput.newBuilder()
.addAllPositive(List.of(vectorInput(100), vectorInput(231)))
.addAllNegative(List.of(vectorInput(718)))
.build()))
.setUsing("image")
.setLimit(10)
.setLookupFrom(
LookupLocation.newBuilder()
.setCollectionName("{external_collection_name}")
.setVectorName("{external_vector_name}")
.build())
.build()).get();
using Qdrant.Client;
using Qdrant.Client.Grpc;
var client = new QdrantClient("localhost", 6334);
await client.QueryAsync(
collectionName: "{collection_name}",
query: new RecommendInput {
Positive = { 100, 231 },
Negative = { 718 }
},
usingVector: "image",
limit: 10,
lookupFrom: new LookupLocation
{
CollectionName = "{external_collection_name}",
VectorName = "{external_vector_name}",
}
);
import (
"context"
"github.com/qdrant/go-client/qdrant"
)
client, err := qdrant.NewClient(&qdrant.Config{
Host: "localhost",
Port: 6334,
})
client.Query(context.Background(), &qdrant.QueryPoints{
CollectionName: "{collection_name}",
Query: qdrant.NewQueryRecommend(&qdrant.RecommendInput{
Positive: []*qdrant.VectorInput{
qdrant.NewVectorInputID(qdrant.NewIDNum(100)),
qdrant.NewVectorInputID(qdrant.NewIDNum(231)),
},
Negative: []*qdrant.VectorInput{
qdrant.NewVectorInputID(qdrant.NewIDNum(718)),
},
}),
Using: qdrant.PtrOf("image"),
LookupFrom: &qdrant.LookupLocation{
CollectionName: "{external_collection_name}",
VectorName: qdrant.PtrOf("{external_vector_name}"),
},
})
向量通过positive和negative列表中提供的ID从外部集合中检索。
然后,这些向量用于在当前集合中执行推荐,与“using”或默认向量进行比较。
批量推荐API
自 v0.10.0 起可用
与批量搜索API在用途和优势上相似,它能够实现推荐请求的批量处理。
POST /collections/{collection_name}/query/batch
{
"searches": [
{
"query": {
"recommend": {
"positive": [100, 231],
"negative": [718]
}
},
"filter": {
"must": [
{
"key": "city",
"match": {
"value": "London"
}
}
]
},
"limit": 10
},
{
"query": {
"recommend": {
"positive": [200, 67],
"negative": [300]
}
},
"filter": {
"must": [
{
"key": "city",
"match": {
"value": "London"
}
}
]
},
"limit": 10
}
]
}
from qdrant_client import QdrantClient, models
client = QdrantClient(url="http://localhost:6333")
filter_ = models.Filter(
must=[
models.FieldCondition(
key="city",
match=models.MatchValue(
value="London",
),
)
]
)
recommend_queries = [
models.QueryRequest(
query=models.RecommendQuery(
recommend=models.RecommendInput(positive=[100, 231], negative=[718])
),
filter=filter_,
limit=3,
),
models.QueryRequest(
query=models.RecommendQuery(
recommend=models.RecommendInput(positive=[200, 67], negative=[300])
),
filter=filter_,
limit=3,
),
]
client.query_batch_points(
collection_name="{collection_name}", requests=recommend_queries
)
import { QdrantClient } from "@qdrant/js-client-rest";
const client = new QdrantClient({ host: "localhost", port: 6333 });
const filter = {
must: [
{
key: "city",
match: {
value: "London",
},
},
],
};
const searches = [
{
query: {
recommend: {
positive: [100, 231],
negative: [718]
}
},
filter,
limit: 3,
},
{
query: {
recommend: {
positive: [200, 67],
negative: [300]
}
},
filter,
limit: 3,
},
];
client.queryBatch("{collection_name}", {
searches,
});
use qdrant_client::qdrant::{
Condition, Filter, QueryBatchPointsBuilder, QueryPointsBuilder,
RecommendInputBuilder,
};
use qdrant_client::Qdrant;
let client = Qdrant::from_url("http://localhost:6334").build()?;
let filter = Filter::must([Condition::matches("city", "London".to_string())]);
let recommend_queries = vec![
QueryPointsBuilder::new("{collection_name}")
.query(
RecommendInputBuilder::default()
.add_positive(100)
.add_positive(231)
.add_negative(718)
.build(),
)
.filter(filter.clone())
.build(),
QueryPointsBuilder::new("{collection_name}")
.query(
RecommendInputBuilder::default()
.add_positive(200)
.add_positive(67)
.add_negative(300)
.build(),
)
.filter(filter)
.build(),
];
client
.query_batch(QueryBatchPointsBuilder::new(
"{collection_name}",
recommend_queries,
))
.await?;
import java.util.List;
import io.qdrant.client.QdrantClient;
import io.qdrant.client.QdrantGrpcClient;
import io.qdrant.client.grpc.Points.Filter;
import io.qdrant.client.grpc.Points.QueryPoints;
import io.qdrant.client.grpc.Points.RecommendInput;
import static io.qdrant.client.ConditionFactory.matchKeyword;
import static io.qdrant.client.VectorInputFactory.vectorInput;
import static io.qdrant.client.QueryFactory.recommend;
QdrantClient client =
new QdrantClient(QdrantGrpcClient.newBuilder("localhost", 6334, false).build());
Filter filter = Filter.newBuilder().addMust(matchKeyword("city", "London")).build();
List<QueryPoints> recommendQueries = List.of(
QueryPoints.newBuilder()
.setCollectionName("{collection_name}")
.setQuery(recommend(
RecommendInput.newBuilder()
.addAllPositive(List.of(vectorInput(100), vectorInput(231)))
.addAllNegative(List.of(vectorInput(731)))
.build()))
.setFilter(filter)
.setLimit(3)
.build(),
QueryPoints.newBuilder()
.setCollectionName("{collection_name}")
.setQuery(recommend(
RecommendInput.newBuilder()
.addAllPositive(List.of(vectorInput(200), vectorInput(67)))
.addAllNegative(List.of(vectorInput(300)))
.build()))
.setFilter(filter)
.setLimit(3)
.build());
client.queryBatchAsync("{collection_name}", recommendQueries).get();
using Qdrant.Client;
using Qdrant.Client.Grpc;
using static Qdrant.Client.Grpc.Conditions;
var client = new QdrantClient("localhost", 6334);
var filter = MatchKeyword("city", "london");
await client.QueryBatchAsync(
collectionName: "{collection_name}",
queries:
[
new QueryPoints()
{
CollectionName = "{collection_name}",
Query = new RecommendInput {
Positive = { 100, 231 },
Negative = { 718 },
},
Limit = 3,
Filter = filter,
},
new QueryPoints()
{
CollectionName = "{collection_name}",
Query = new RecommendInput {
Positive = { 200, 67 },
Negative = { 300 },
},
Limit = 3,
Filter = filter,
}
]
);
import (
"context"
"github.com/qdrant/go-client/qdrant"
)
client, err := qdrant.NewClient(&qdrant.Config{
Host: "localhost",
Port: 6334,
})
filter := qdrant.Filter{
Must: []*qdrant.Condition{
qdrant.NewMatch("city", "London"),
},
}
client.QueryBatch(context.Background(), &qdrant.QueryBatchPoints{
CollectionName: "{collection_name}",
QueryPoints: []*qdrant.QueryPoints{
{
CollectionName: "{collection_name}",
Query: qdrant.NewQueryRecommend(&qdrant.RecommendInput{
Positive: []*qdrant.VectorInput{
qdrant.NewVectorInputID(qdrant.NewIDNum(100)),
qdrant.NewVectorInputID(qdrant.NewIDNum(231)),
},
Negative: []*qdrant.VectorInput{
qdrant.NewVectorInputID(qdrant.NewIDNum(718)),
},
},
),
Filter: &filter,
},
{
CollectionName: "{collection_name}",
Query: qdrant.NewQueryRecommend(&qdrant.RecommendInput{
Positive: []*qdrant.VectorInput{
qdrant.NewVectorInputID(qdrant.NewIDNum(200)),
qdrant.NewVectorInputID(qdrant.NewIDNum(67)),
},
Negative: []*qdrant.VectorInput{
qdrant.NewVectorInputID(qdrant.NewIDNum(300)),
},
},
),
Filter: &filter,
},
},
},
)
此API的结果包含每个推荐请求的一个数组。
{
"result": [
[
{ "id": 10, "score": 0.81 },
{ "id": 14, "score": 0.75 },
{ "id": 11, "score": 0.73 }
],
[
{ "id": 1, "score": 0.92 },
{ "id": 3, "score": 0.89 },
{ "id": 9, "score": 0.75 }
]
],
"status": "ok",
"time": 0.001
}
发现API
自v1.7版本起可用
REST API 模式定义可在此处获取 这里
在此API中,Qdrant引入了context的概念,用于分割空间。上下文是一组正负对,每对将空间划分为正区和负区。在这种模式下,搜索操作根据点属于多少个正区(或它们避免了多少个负区)来优先选择点。
提供上下文的接口类似于推荐API(ids或原始向量)。然而,在这种情况下,它们需要以正负对的形式提供。
发现API允许您进行两种新类型的搜索:
- 发现搜索: 使用上下文(正负向量对)和目标来返回与目标更相似的点,但受上下文约束。
- 上下文搜索:仅使用上下文对,获取位于最佳区域的点,其中损失最小化
正例和负例在上下文对中的排列方式完全由您决定。因此,您可以根据模型和数据灵活尝试不同的排列技术。
发现搜索
这种类型的搜索特别适合结合多模态、向量约束的搜索。Qdrant已经对过滤器有广泛的支持,这些过滤器基于其有效载荷来约束搜索,但使用发现搜索,您还可以约束执行搜索的向量空间。

发现分数的公式可以表示为:
$$ \text{rank}(v^+, v^-) = \begin{cases} 1, &\quad s(v^+) \geq s(v^-) \\ -1, &\quad s(v^+) < s(v^-) \end{cases} $$ 其中 $v^+$ 代表正例,$v^-$ 代表负例,$s(v)$ 是向量 $v$ 与目标向量的相似度分数。然后发现分数计算如下: $$ \text{发现分数} = \text{sigmoid}(s(v_t))+ \sum \text{rank}(v_i^+, v_i^-), $$ 其中 $s(v)$ 是相似度函数,$v_t$ 是目标向量,$v_i^+$ 和 $v_i^-$ 分别是正例和负例。sigmoid 函数用于将分数归一化到 0 和 1 之间,而秩的和用于惩罚那些更接近负例而不是正例的向量。换句话说,个体秩的总和决定了一个点位于多少个正区域中,而接近层次则是次要的。
示例:
POST /collections/{collection_name}/points/query
{
"query": {
"discover": {
"target": [0.2, 0.1, 0.9, 0.7],
"context": [
{
"positive": 100,
"negative": 718
},
{
"positive": 200,
"negative": 300
}
]
}
},
"limit": 10
}
from qdrant_client import QdrantClient, models
client = QdrantClient(url="http://localhost:6333")
discover_queries = [
models.QueryRequest(
query=models.DiscoverQuery(
discover=models.DiscoverInput(
target=[0.2, 0.1, 0.9, 0.7],
context=[
models.ContextPair(
positive=100,
negative=718,
),
models.ContextPair(
positive=200,
negative=300,
),
],
)
),
limit=10,
),
]
client.query_batch_points(
collection_name="{collection_name}", requests=discover_queries
)
import { QdrantClient } from "@qdrant/js-client-rest";
const client = new QdrantClient({ host: "localhost", port: 6333 });
client.query("{collection_name}", {
query: {
discover: {
target: [0.2, 0.1, 0.9, 0.7],
context: [
{
positive: 100,
negative: 718,
},
{
positive: 200,
negative: 300,
},
],
}
},
limit: 10,
});
use qdrant_client::qdrant::{ContextInputBuilder, DiscoverInputBuilder, QueryPointsBuilder};
use qdrant_client::Qdrant;
client
.query(
QueryPointsBuilder::new("{collection_name}").query(
DiscoverInputBuilder::new(
vec![0.2, 0.1, 0.9, 0.7],
ContextInputBuilder::default()
.add_pair(100, 718)
.add_pair(200, 300),
)
.build(),
),
)
.await?;
import java.util.List;
import io.qdrant.client.QdrantClient;
import io.qdrant.client.QdrantGrpcClient;
import io.qdrant.client.grpc.Points.ContextInput;
import io.qdrant.client.grpc.Points.ContextInputPair;
import io.qdrant.client.grpc.Points.DiscoverInput;
import io.qdrant.client.grpc.Points.QueryPoints;
import static io.qdrant.client.VectorInputFactory.vectorInput;
import static io.qdrant.client.QueryFactory.discover;
QdrantClient client =
new QdrantClient(QdrantGrpcClient.newBuilder("localhost", 6334, false).build());
client.queryAsync(QueryPoints.newBuilder()
.setCollectionName("{collection_name}")
.setQuery(discover(DiscoverInput.newBuilder()
.setTarget(vectorInput(0.2f, 0.1f, 0.9f, 0.7f))
.setContext(ContextInput.newBuilder()
.addAllPairs(List.of(
ContextInputPair.newBuilder()
.setPositive(vectorInput(100))
.setNegative(vectorInput(718))
.build(),
ContextInputPair.newBuilder()
.setPositive(vectorInput(200))
.setNegative(vectorInput(300))
.build()))
.build())
.build()))
.setLimit(10)
.build()).get();
using Qdrant.Client;
using Qdrant.Client.Grpc;
var client = new QdrantClient("localhost", 6334);
await client.QueryAsync(
collectionName: "{collection_name}",
query: new DiscoverInput {
Target = new float[] { 0.2f, 0.1f, 0.9f, 0.7f },
Context = new ContextInput {
Pairs = {
new ContextInputPair {
Positive = 100,
Negative = 718
},
new ContextInputPair {
Positive = 200,
Negative = 300
},
}
},
},
limit: 10
);
import (
"context"
"github.com/qdrant/go-client/qdrant"
)
client, err := qdrant.NewClient(&qdrant.Config{
Host: "localhost",
Port: 6334,
})
client.Query(context.Background(), &qdrant.QueryPoints{
CollectionName: "{collection_name}",
Query: qdrant.NewQueryDiscover(&qdrant.DiscoverInput{
Target: qdrant.NewVectorInput(0.2, 0.1, 0.9, 0.7),
Context: &qdrant.ContextInput{
Pairs: []*qdrant.ContextInputPair{
{
Positive: qdrant.NewVectorInputID(qdrant.NewIDNum(100)),
Negative: qdrant.NewVectorInputID(qdrant.NewIDNum(718)),
},
{
Positive: qdrant.NewVectorInputID(qdrant.NewIDNum(200)),
Negative: qdrant.NewVectorInputID(qdrant.NewIDNum(300)),
},
},
},
}),
})
上下文搜索
相反,在没有目标的情况下,当使用像HNSW这样的邻近图时,一个严格的逐整数函数并不能为搜索提供太多指导。相反,上下文搜索采用了一种源自三重损失概念的函数,该函数通常在模型训练期间应用。对于上下文搜索,此函数被调整为引导搜索朝向负例较少的区域。

我们可以直接将评分函数与损失函数关联起来,其中0.0是一个点可以拥有的最高分数,这意味着它只存在于正区域。一旦一个点更接近负样本,它的损失将简单地是正相似度和负相似度的差值。
$$ \text{context score} = \sum \min(s(v^+_i) - s(v^-_i), 0.0) $$
其中 $v^+_i$ 和 $v^-_i$ 是每对的正例和负例,$s(v)$ 是相似度函数。
使用这种搜索方式,你可以预期输出不一定围绕一个单一的点,而是任何不接近负面例子的点,这会产生一个受限的多样化结果。因此,即使API没有被调用recommend,推荐系统也可以使用这种方法,并根据其特定用例进行调整。
示例:
POST /collections/{collection_name}/points/query
{
"query": {
"context": [
{
"positive": 100,
"negative": 718
},
{
"positive": 200,
"negative": 300
}
]
},
"limit": 10
}
from qdrant_client import QdrantClient, models
client = QdrantClient(url="http://localhost:6333")
discover_queries = [
models.QueryRequest(
query=models.ContextQuery(
context=[
models.ContextPair(
positive=100,
negative=718,
),
models.ContextPair(
positive=200,
negative=300,
),
],
),
limit=10,
),
]
client.query_batch_points(
collection_name="{collection_name}", requests=discover_queries
)
import { QdrantClient } from "@qdrant/js-client-rest";
const client = new QdrantClient({ host: "localhost", port: 6333 });
client.query("{collection_name}", {
query: {
context: [
{
positive: 100,
negative: 718,
},
{
positive: 200,
negative: 300,
},
]
},
limit: 10,
});
use qdrant_client::qdrant::{ContextInputBuilder, QueryPointsBuilder};
use qdrant_client::Qdrant;
let client = Qdrant::from_url("http://localhost:6334").build()?;
client
.query(
QueryPointsBuilder::new("{collection_name}").query(
ContextInputBuilder::default()
.add_pair(100, 718)
.add_pair(200, 300)
.build(),
),
)
.await?;
import java.util.List;
import io.qdrant.client.QdrantClient;
import io.qdrant.client.QdrantGrpcClient;
import io.qdrant.client.grpc.Points.ContextInput;
import io.qdrant.client.grpc.Points.ContextInputPair;
import io.qdrant.client.grpc.Points.QueryPoints;
import static io.qdrant.client.VectorInputFactory.vectorInput;
import static io.qdrant.client.QueryFactory.context;
QdrantClient client =
new QdrantClient(QdrantGrpcClient.newBuilder("localhost", 6334, false).build());
client.queryAsync(QueryPoints.newBuilder()
.setCollectionName("{collection_name}")
.setQuery(context(ContextInput.newBuilder()
.addAllPairs(List.of(
ContextInputPair.newBuilder()
.setPositive(vectorInput(100))
.setNegative(vectorInput(718))
.build(),
ContextInputPair.newBuilder()
.setPositive(vectorInput(200))
.setNegative(vectorInput(300))
.build()))
.build()))
.setLimit(10)
.build()).get();
using Qdrant.Client;
using Qdrant.Client.Grpc;
var client = new QdrantClient("localhost", 6334);
await client.QueryAsync(
collectionName: "{collection_name}",
query: new ContextInput {
Pairs = {
new ContextInputPair {
Positive = 100,
Negative = 718
},
new ContextInputPair {
Positive = 200,
Negative = 300
},
}
},
limit: 10
);
import (
"context"
"github.com/qdrant/go-client/qdrant"
)
client, err := qdrant.NewClient(&qdrant.Config{
Host: "localhost",
Port: 6334,
})
client.Query(context.Background(), &qdrant.QueryPoints{
CollectionName: "{collection_name}",
Query: qdrant.NewQueryContext(&qdrant.ContextInput{
Pairs: []*qdrant.ContextInputPair{
{
Positive: qdrant.NewVectorInputID(qdrant.NewIDNum(100)),
Negative: qdrant.NewVectorInputID(qdrant.NewIDNum(718)),
},
{
Positive: qdrant.NewVectorInputID(qdrant.NewIDNum(200)),
Negative: qdrant.NewVectorInputID(qdrant.NewIDNum(300)),
},
},
}),
})
距离矩阵
自 v1.12.0 版本起可用
距离矩阵API允许计算采样向量对之间的距离,并将结果作为稀疏矩阵返回。
这样的API支持新的数据探索用例,例如聚类相似向量、连接的可视化或降维。
API输入请求由以下参数组成:
sample: 要采样的向量数量limit: 每个样本返回的分数数量filter: 应用于约束样本的过滤器
让我们来看一个基本的例子,使用 sample=100, limit=10:
引擎首先从集合中选择100个随机点,然后对于每个选中的点,它将计算样本中内部的10个最近点。
这将导致总共1000个分数表示为稀疏矩阵,以便进行高效处理。
距离矩阵API提供了两种输出格式,以便于与不同工具的集成。
成对格式
返回距离矩阵作为点ids及其相应得分的对列表。
POST /collections/{collection_name}/points/search/matrix/pairs
{
"sample": 10,
"limit": 2,
"filter": {
"must": {
"key": "color",
"match": { "value": "red" }
}
}
}
from qdrant_client import QdrantClient, models
client = QdrantClient(url="http://localhost:6333")
client.search_matrix_pairs(
collection_name="{collection_name}",
sample=10,
limit=2,
query_filter=models.Filter(
must=[
models.FieldCondition(
key="color", match=models.MatchValue(value="red")
),
]
),
)
import static io.qdrant.client.ConditionFactory.matchKeyword;
import io.qdrant.client.QdrantClient;
import io.qdrant.client.QdrantGrpcClient;
import io.qdrant.client.grpc.Points.Filter;
import io.qdrant.client.grpc.Points.SearchMatrixPoints;
QdrantClient client =
new QdrantClient(QdrantGrpcClient.newBuilder("localhost", 6334, false).build());
client
.searchMatrixPairsAsync(
Points.SearchMatrixPoints.newBuilder()
.setCollectionName("{collection_name}")
.setFilter(Filter.newBuilder().addMust(matchKeyword("color", "red")).build())
.setSample(10)
.setLimit(2)
.build())
.get();
use qdrant_client::qdrant::{Condition, Filter, SearchMatrixPointsBuilder};
use qdrant_client::Qdrant;
client
.search_matrix_pairs(
SearchMatrixPointsBuilder::new("collection_name")
.filter(Filter::must(vec![Condition::matches(
"color",
"red".to_string(),
)]))
.sample(10)
.limit(2),
)
.await?;
import { QdrantClient } from "@qdrant/js-client-rest";
const client = new QdrantClient({ host: "localhost", port: 6333 });
client.searchMatrixPairs("{collection_name}", {
filter: {
must: [
{
key: "color",
match: {
value: "red",
},
},
],
},
sample: 10,
limit: 2,
});
using Qdrant.Client;
using Qdrant.Client.Grpc;
using static Qdrant.Client.Grpc.Conditions;
var client = new QdrantClient("localhost", 6334);
await client.SearchMatrixPairsAsync(
collectionName: "{collection_name}",
filter: MatchKeyword("color", "red"),
sample: 10,
limit: 2
);
import (
"context"
"github.com/qdrant/go-client/qdrant"
)
client, err := qdrant.NewClient(&qdrant.Config{
Host: "localhost",
Port: 6334,
})
sample := uint64(10)
limit := uint64(2)
res, err := client.SearchMatrixPairs(ctx, &qdrant.SearchMatrixPoints{
CollectionName: "{collection_name}",
Sample: &sample,
Limit: &limit,
Filter: &qdrant.Filter{
Must: []*qdrant.Condition{
qdrant.NewMatch("color", "red"),
},
},
})
返回
{
"result": {
"pairs": [
{"a": 1, "b": 3, "score": 1.4063001},
{"a": 1, "b": 4, "score": 1.2531},
{"a": 2, "b": 1, "score": 1.1550001},
{"a": 2, "b": 8, "score": 1.1359},
{"a": 3, "b": 1, "score": 1.4063001},
{"a": 3, "b": 4, "score": 1.2218001},
{"a": 4, "b": 1, "score": 1.2531},
{"a": 4, "b": 3, "score": 1.2218001},
{"a": 5, "b": 3, "score": 0.70239997},
{"a": 5, "b": 1, "score": 0.6146},
{"a": 6, "b": 3, "score": 0.6353},
{"a": 6, "b": 4, "score": 0.5093},
{"a": 7, "b": 3, "score": 1.0990001},
{"a": 7, "b": 1, "score": 1.0349001},
{"a": 8, "b": 2, "score": 1.1359},
{"a": 8, "b": 3, "score": 1.0553}
]
}
}
偏移格式
返回距离矩阵为四个数组:
offsets_row和offsets_col,表示矩阵中非零距离值的位置。scores包含距离值。ids包含与距离值对应的点ID。
POST /collections/{collection_name}/points/search/matrix/offsets
{
"sample": 10,
"limit": 2,
"filter": {
"must": {
"key": "color",
"match": { "value": "red" }
}
}
}
from qdrant_client import QdrantClient, models
client = QdrantClient(url="http://localhost:6333")
client.search_matrix_offsets(
collection_name="{collection_name}",
sample=10,
limit=2,
query_filter=models.Filter(
must=[
models.FieldCondition(
key="color", match=models.MatchValue(value="red")
),
]
),
)
import static io.qdrant.client.ConditionFactory.matchKeyword;
import io.qdrant.client.QdrantClient;
import io.qdrant.client.QdrantGrpcClient;
import io.qdrant.client.grpc.Points.Filter;
import io.qdrant.client.grpc.Points.SearchMatrixPoints;
QdrantClient client =
new QdrantClient(QdrantGrpcClient.newBuilder("localhost", 6334, false).build());
client
.searchMatrixOffsetsAsync(
SearchMatrixPoints.newBuilder()
.setCollectionName("{collection_name}")
.setFilter(Filter.newBuilder().addMust(matchKeyword("color", "red")).build())
.setSample(10)
.setLimit(2)
.build())
.get();
use qdrant_client::qdrant::{Condition, Filter, SearchMatrixPointsBuilder};
use qdrant_client::Qdrant;
client
.search_matrix_offsets(
SearchMatrixPointsBuilder::new("collection_name")
.filter(Filter::must(vec![Condition::matches(
"color",
"red".to_string(),
)]))
.sample(10)
.limit(2),
)
.await?;
import { QdrantClient } from "@qdrant/js-client-rest";
const client = new QdrantClient({ host: "localhost", port: 6333 });
client.searchMatrixOffsets("{collection_name}", {
filter: {
must: [
{
key: "color",
match: {
value: "red",
},
},
],
},
sample: 10,
limit: 2,
});
using Qdrant.Client;
using Qdrant.Client.Grpc;
using static Qdrant.Client.Grpc.Conditions;
var client = new QdrantClient("localhost", 6334);
await client.SearchMatrixOffsetsAsync(
collectionName: "{collection_name}",
filter: MatchKeyword("color", "red"),
sample: 10,
limit: 2
);
import (
"context"
"github.com/qdrant/go-client/qdrant"
)
client, err := qdrant.NewClient(&qdrant.Config{
Host: "localhost",
Port: 6334,
})
sample := uint64(10)
limit := uint64(2)
res, err := client.SearchMatrixOffsets(ctx, &qdrant.SearchMatrixPoints{
CollectionName: "{collection_name}",
Sample: &sample,
Limit: &limit,
Filter: &qdrant.Filter{
Must: []*qdrant.Condition{
qdrant.NewMatch("color", "red"),
},
},
})
返回
{
"result": {
"offsets_row": [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7],
"offsets_col": [2, 3, 0, 7, 0, 3, 0, 2, 2, 0, 2, 3, 2, 0, 1, 2],
"scores": [
1.4063001, 1.2531, 1.1550001, 1.1359, 1.4063001,
1.2218001, 1.2531, 1.2218001, 0.70239997, 0.6146, 0.6353,
0.5093, 1.0990001, 1.0349001, 1.1359, 1.0553
],
"ids": [1, 2, 3, 4, 5, 6, 7, 8]
}
}
