优化Qdrant性能:三种场景
不同的使用场景需要在内存使用、搜索速度和精度之间找到不同的平衡。Qdrant 被设计为灵活且可定制,因此您可以根据自己的具体需求进行调整。
本指南将引导您了解三种主要的优化策略:
- 高速搜索与低内存使用
- 高精度与低内存使用
- 高精度与高速搜索

1. 低内存使用的高速搜索
为了实现高搜索速度并最小化内存使用,您可以将向量存储在磁盘上,同时尽量减少磁盘读取次数。向量量化是一种压缩向量的技术,允许更多的向量存储在内存中,从而减少从磁盘读取的需求。
要配置内存中的量化,同时保留磁盘上的原始向量,您需要使用以下参数创建一个集合:
on_disk: 将原始向量存储在磁盘上。quantization_config: 使用scalar方法将量化向量压缩为int8。always_ram: 将量化向量保留在RAM中。
PUT /collections/{collection_name}
{
"vectors": {
"size": 768,
"distance": "Cosine",
"on_disk": true
},
"quantization_config": {
"scalar": {
"type": "int8",
"always_ram": true
}
}
}
from qdrant_client import QdrantClient, models
client = QdrantClient(url="http://localhost:6333")
client.create_collection(
collection_name="{collection_name}",
vectors_config=models.VectorParams(size=768, distance=models.Distance.COSINE, on_disk=True),
quantization_config=models.ScalarQuantization(
scalar=models.ScalarQuantizationConfig(
type=models.ScalarType.INT8,
always_ram=True,
),
),
)
import { QdrantClient } from "@qdrant/js-client-rest";
const client = new QdrantClient({ host: "localhost", port: 6333 });
client.createCollection("{collection_name}", {
vectors: {
size: 768,
distance: "Cosine",
on_disk: true,
},
quantization_config: {
scalar: {
type: "int8",
always_ram: true,
},
},
});
use qdrant_client::qdrant::{
CreateCollectionBuilder, Distance, QuantizationType, ScalarQuantizationBuilder,
VectorParamsBuilder,
};
use qdrant_client::Qdrant;
let client = Qdrant::from_url("http://localhost:6334").build()?;
client
.create_collection(
CreateCollectionBuilder::new("{collection_name}")
.vectors_config(VectorParamsBuilder::new(768, Distance::Cosine))
.quantization_config(
ScalarQuantizationBuilder::default()
.r#type(QuantizationType::Int8.into())
.always_ram(true),
),
)
.await?;
import io.qdrant.client.QdrantClient;
import io.qdrant.client.QdrantGrpcClient;
import io.qdrant.client.grpc.Collections.CreateCollection;
import io.qdrant.client.grpc.Collections.Distance;
import io.qdrant.client.grpc.Collections.OptimizersConfigDiff;
import io.qdrant.client.grpc.Collections.QuantizationConfig;
import io.qdrant.client.grpc.Collections.QuantizationType;
import io.qdrant.client.grpc.Collections.ScalarQuantization;
import io.qdrant.client.grpc.Collections.VectorParams;
import io.qdrant.client.grpc.Collections.VectorsConfig;
QdrantClient client =
new QdrantClient(QdrantGrpcClient.newBuilder("localhost", 6334, false).build());
client
.createCollectionAsync(
CreateCollection.newBuilder()
.setCollectionName("{collection_name}")
.setVectorsConfig(
VectorsConfig.newBuilder()
.setParams(
VectorParams.newBuilder()
.setSize(768)
.setDistance(Distance.Cosine)
.setOnDisk(true)
.build())
.build())
.setQuantizationConfig(
QuantizationConfig.newBuilder()
.setScalar(
ScalarQuantization.newBuilder()
.setType(QuantizationType.Int8)
.setAlwaysRam(true)
.build())
.build())
.build())
.get();
using Qdrant.Client;
using Qdrant.Client.Grpc;
var client = new QdrantClient("localhost", 6334);
await client.CreateCollectionAsync(
collectionName: "{collection_name}",
vectorsConfig: new VectorParams { Size = 768, Distance = Distance.Cosine, OnDisk = true },
quantizationConfig: new QuantizationConfig
{
Scalar = new ScalarQuantization { Type = QuantizationType.Int8, AlwaysRam = true }
}
);
import (
"context"
"github.com/qdrant/go-client/qdrant"
)
client, err := qdrant.NewClient(&qdrant.Config{
Host: "localhost",
Port: 6334,
})
client.CreateCollection(context.Background(), &qdrant.CreateCollection{
CollectionName: "{collection_name}",
VectorsConfig: qdrant.NewVectorsConfig(&qdrant.VectorParams{
Size: 768,
Distance: qdrant.Distance_Cosine,
OnDisk: qdrant.PtrOf(true),
}),
QuantizationConfig: qdrant.NewQuantizationScalar(&qdrant.ScalarQuantization{
Type: qdrant.QuantizationType_Int8,
AlwaysRam: qdrant.PtrOf(true),
}),
})
禁用重新评分以加快搜索速度(可选)
这是完全可选的。通过搜索params禁用重新评分可以进一步减少磁盘读取次数。请注意,这可能会略微降低精度。
POST /collections/{collection_name}/points/query
{
"query": [0.2, 0.1, 0.9, 0.7],
"params": {
"quantization": {
"rescore": false
}
},
"limit": 10
}
from qdrant_client import QdrantClient, models
client = QdrantClient(url="http://localhost:6333")
client.query_points(
collection_name="{collection_name}",
query=[0.2, 0.1, 0.9, 0.7],
search_params=models.SearchParams(
quantization=models.QuantizationSearchParams(rescore=False)
),
)
import { QdrantClient } from "@qdrant/js-client-rest";
const client = new QdrantClient({ host: "localhost", port: 6333 });
client.query("{collection_name}", {
query: [0.2, 0.1, 0.9, 0.7],
params: {
quantization: {
rescore: false,
},
},
});
use qdrant_client::qdrant::{
QuantizationSearchParamsBuilder, QueryPointsBuilder, SearchParamsBuilder,
};
use qdrant_client::Qdrant;
let client = Qdrant::from_url("http://localhost:6334").build()?;
client
.query(
QueryPointsBuilder::new("{collection_name}")
.query(vec![0.2, 0.1, 0.9, 0.7])
.limit(3)
.params(
SearchParamsBuilder::default()
.quantization(QuantizationSearchParamsBuilder::default().rescore(false)),
),
)
.await?;
import io.qdrant.client.QdrantClient;
import io.qdrant.client.QdrantGrpcClient;
import io.qdrant.client.grpc.Points.QuantizationSearchParams;
import io.qdrant.client.grpc.Points.QueryPoints;
import io.qdrant.client.grpc.Points.SearchParams;
import static io.qdrant.client.QueryFactory.nearest;
QdrantClient client =
new QdrantClient(QdrantGrpcClient.newBuilder("localhost", 6334, false).build());
client.queryAsync(
QueryPoints.newBuilder()
.setCollectionName("{collection_name}")
.setQuery(nearest(0.2f, 0.1f, 0.9f, 0.7f))
.setParams(
SearchParams.newBuilder()
.setQuantization(
QuantizationSearchParams.newBuilder().setRescore(false).build())
.build())
.setLimit(3)
.build())
.get();
using Qdrant.Client;
using Qdrant.Client.Grpc;
var client = new QdrantClient("localhost", 6334);
await client.QueryAsync(
collectionName: "{collection_name}",
query: new float[] { 0.2f, 0.1f, 0.9f, 0.7f },
searchParams: new SearchParams
{
Quantization = new QuantizationSearchParams { Rescore = false }
},
limit: 3
);
import (
"context"
"github.com/qdrant/go-client/qdrant"
)
client, err := qdrant.NewClient(&qdrant.Config{
Host: "localhost",
Port: 6334,
})
client.Query(context.Background(), &qdrant.QueryPoints{
CollectionName: "{collection_name}",
Query: qdrant.NewQuery(0.2, 0.1, 0.9, 0.7),
Params: &qdrant.SearchParams{
Quantization: &qdrant.QuantizationSearchParams{
Rescore: qdrant.PtrOf(true),
},
},
})
2. 高精度与低内存使用
如果您需要高精度但RAM有限,可以将向量和HNSW索引都存储在磁盘上。这种设置减少了内存使用,同时保持了搜索精度。
要存储向量on_disk,你需要配置向量和HNSW索引:
PUT /collections/{collection_name}
{
"vectors": {
"size": 768,
"distance": "Cosine",
"on_disk": true
},
"hnsw_config": {
"on_disk": true
}
}
from qdrant_client import QdrantClient, models
client = QdrantClient(url="http://localhost:6333")
client.create_collection(
collection_name="{collection_name}",
vectors_config=models.VectorParams(size=768, distance=models.Distance.COSINE, on_disk=True),
hnsw_config=models.HnswConfigDiff(on_disk=True),
)
import { QdrantClient } from "@qdrant/js-client-rest";
const client = new QdrantClient({ host: "localhost", port: 6333 });
client.createCollection("{collection_name}", {
vectors: {
size: 768,
distance: "Cosine",
on_disk: true,
},
hnsw_config: {
on_disk: true,
},
});
use qdrant_client::qdrant::{
CreateCollectionBuilder, Distance, HnswConfigDiffBuilder, VectorParamsBuilder,
};
use qdrant_client::{Qdrant, QdrantError};
let client = Qdrant::from_url("http://localhost:6334").build()?;
client
.create_collection(
CreateCollectionBuilder::new("{collection_name}")
.vectors_config(VectorParamsBuilder::new(768, Distance::Cosine).on_disk(true))
.hnsw_config(HnswConfigDiffBuilder::default().on_disk(true)),
)
.await?;
import io.qdrant.client.QdrantClient;
import io.qdrant.client.QdrantGrpcClient;
import io.qdrant.client.grpc.Collections.CreateCollection;
import io.qdrant.client.grpc.Collections.Distance;
import io.qdrant.client.grpc.Collections.HnswConfigDiff;
import io.qdrant.client.grpc.Collections.OptimizersConfigDiff;
import io.qdrant.client.grpc.Collections.VectorParams;
import io.qdrant.client.grpc.Collections.VectorsConfig;
QdrantClient client =
new QdrantClient(QdrantGrpcClient.newBuilder("localhost", 6334, false).build());
client
.createCollectionAsync(
CreateCollection.newBuilder()
.setCollectionName("{collection_name}")
.setVectorsConfig(
VectorsConfig.newBuilder()
.setParams(
VectorParams.newBuilder()
.setSize(768)
.setDistance(Distance.Cosine)
.setOnDisk(true)
.build())
.build())
.setHnswConfig(HnswConfigDiff.newBuilder().setOnDisk(true).build())
.build())
.get();
using Qdrant.Client;
using Qdrant.Client.Grpc;
var client = new QdrantClient("localhost", 6334);
await client.CreateCollectionAsync(
collectionName: "{collection_name}",
vectorsConfig: new VectorParams { Size = 768, Distance = Distance.Cosine, OnDisk = true},
hnswConfig: new HnswConfigDiff { OnDisk = true }
);
import (
"context"
"github.com/qdrant/go-client/qdrant"
)
client, err := qdrant.NewClient(&qdrant.Config{
Host: "localhost",
Port: 6334,
})
client.CreateCollection(context.Background(), &qdrant.CreateCollection{
CollectionName: "{collection_name}",
VectorsConfig: qdrant.NewVectorsConfig(&qdrant.VectorParams{
Size: 768,
Distance: qdrant.Distance_Cosine,
OnDisk: qdrant.PtrOf(true),
}),
HnswConfig: &qdrant.HnswConfigDiff{
OnDisk: qdrant.PtrOf(true),
},
})
提高精度
增加HNSW索引的ef和m参数以提高精度,即使在内存有限的情况下:
...
"hnsw_config": {
"m": 64,
"ef_construct": 512,
"on_disk": true
}
...
注意: 此设置的性能取决于磁盘的IOPS(每秒输入/输出操作数)。您可以使用 fio 来测量磁盘的IOPS。
3. 高精度与高速搜索
对于需要高速和高精度的场景,尽可能将数据保留在RAM中。应用量化并重新评分以调整精度。
以下是您如何为集合配置标量量化的方法:
PUT /collections/{collection_name}
{
"vectors": {
"size": 768,
"distance": "Cosine"
},
"quantization_config": {
"scalar": {
"type": "int8",
"always_ram": true
}
}
}
from qdrant_client import QdrantClient, models
client = QdrantClient(url="http://localhost:6333")
client.create_collection(
collection_name="{collection_name}",
vectors_config=models.VectorParams(size=768, distance=models.Distance.COSINE),
quantization_config=models.ScalarQuantization(
scalar=models.ScalarQuantizationConfig(
type=models.ScalarType.INT8,
always_ram=True,
),
),
)
import { QdrantClient } from "@qdrant/js-client-rest";
const client = new QdrantClient({ host: "localhost", port: 6333 });
client.createCollection("{collection_name}", {
vectors: {
size: 768,
distance: "Cosine",
},
quantization_config: {
scalar: {
type: "int8",
always_ram: true,
},
},
});
use qdrant_client::qdrant::{
CreateCollectionBuilder, Distance, QuantizationType, ScalarQuantizationBuilder,
VectorParamsBuilder,
};
use qdrant_client::Qdrant;
let client = Qdrant::from_url("http://localhost:6334").build()?;
client
.create_collection(
CreateCollectionBuilder::new("{collection_name}")
.vectors_config(VectorParamsBuilder::new(768, Distance::Cosine))
.quantization_config(
ScalarQuantizationBuilder::default()
.r#type(QuantizationType::Int8.into())
.always_ram(true),
),
)
.await?;
import io.qdrant.client.QdrantClient;
import io.qdrant.client.QdrantGrpcClient;
import io.qdrant.client.grpc.Collections.CreateCollection;
import io.qdrant.client.grpc.Collections.Distance;
import io.qdrant.client.grpc.Collections.OptimizersConfigDiff;
import io.qdrant.client.grpc.Collections.QuantizationConfig;
import io.qdrant.client.grpc.Collections.QuantizationType;
import io.qdrant.client.grpc.Collections.ScalarQuantization;
import io.qdrant.client.grpc.Collections.VectorParams;
import io.qdrant.client.grpc.Collections.VectorsConfig;
QdrantClient client =
new QdrantClient(QdrantGrpcClient.newBuilder("localhost", 6334, false).build());
client
.createCollectionAsync(
CreateCollection.newBuilder()
.setCollectionName("{collection_name}")
.setVectorsConfig(
VectorsConfig.newBuilder()
.setParams(
VectorParams.newBuilder()
.setSize(768)
.setDistance(Distance.Cosine)
.build())
.build())
.setQuantizationConfig(
QuantizationConfig.newBuilder()
.setScalar(
ScalarQuantization.newBuilder()
.setType(QuantizationType.Int8)
.setAlwaysRam(true)
.build())
.build())
.build())
.get();
using Qdrant.Client;
using Qdrant.Client.Grpc;
var client = new QdrantClient("localhost", 6334);
await client.CreateCollectionAsync(
collectionName: "{collection_name}",
vectorsConfig: new VectorParams { Size = 768, Distance = Distance.Cosine},
quantizationConfig: new QuantizationConfig
{
Scalar = new ScalarQuantization { Type = QuantizationType.Int8, AlwaysRam = true }
}
);
import (
"context"
"github.com/qdrant/go-client/qdrant"
)
client, err := qdrant.NewClient(&qdrant.Config{
Host: "localhost",
Port: 6334,
})
client.CreateCollection(context.Background(), &qdrant.CreateCollection{
CollectionName: "{collection_name}",
VectorsConfig: qdrant.NewVectorsConfig(&qdrant.VectorParams{
Size: 768,
Distance: qdrant.Distance_Cosine,
}),
QuantizationConfig: qdrant.NewQuantizationScalar(&qdrant.ScalarQuantization{
Type: qdrant.QuantizationType_Int8,
AlwaysRam: qdrant.PtrOf(true),
}),
})
微调搜索参数
您可以调整搜索参数如 hnsw_ef 和 exact 来平衡速度和精度:
关键参数:
hnsw_ef: 搜索期间访问的邻居数量(值越大 = 精度越高,速度越慢)。exact: 设置为true进行精确搜索,速度较慢但更准确。您可以使用它来比较不同hnsw_ef值的搜索结果与真实值的差异。
POST /collections/{collection_name}/points/query
{
"query": [0.2, 0.1, 0.9, 0.7],
"params": {
"hnsw_ef": 128,
"exact": false
},
"limit": 3
}
from qdrant_client import QdrantClient, models
client = QdrantClient(url="http://localhost:6333")
client.query_points(
collection_name="{collection_name}",
query=[0.2, 0.1, 0.9, 0.7],
search_params=models.SearchParams(hnsw_ef=128, exact=False),
limit=3,
)
import { QdrantClient } from "@qdrant/js-client-rest";
const client = new QdrantClient({ host: "localhost", port: 6333 });
client.query("{collection_name}", {
query: [0.2, 0.1, 0.9, 0.7],
params: {
hnsw_ef: 128,
exact: false,
},
limit: 3,
});
use qdrant_client::qdrant::{QueryPointsBuilder, SearchParamsBuilder};
use qdrant_client::Qdrant;
let client = Qdrant::from_url("http://localhost:6334").build()?;
client
.query(
QueryPointsBuilder::new("{collection_name}")
.query(vec![0.2, 0.1, 0.9, 0.7])
.limit(3)
.params(SearchParamsBuilder::default().hnsw_ef(128).exact(false)),
)
.await?;
import io.qdrant.client.QdrantClient;
import io.qdrant.client.QdrantGrpcClient;
import io.qdrant.client.grpc.Points.QueryPoints;
import io.qdrant.client.grpc.Points.SearchParams;
import static io.qdrant.client.QueryFactory.nearest;
QdrantClient client =
new QdrantClient(QdrantGrpcClient.newBuilder("localhost", 6334, false).build());
client.queryAsync(
QueryPoints.newBuilder()
.setCollectionName("{collection_name}")
.setQuery(nearest(0.2f, 0.1f, 0.9f, 0.7f))
.setParams(SearchParams.newBuilder().setHnswEf(128).setExact(false).build())
.setLimit(3)
.build())
.get();
using Qdrant.Client;
using Qdrant.Client.Grpc;
var client = new QdrantClient("localhost", 6334);
await client.QueryAsync(
collectionName: "{collection_name}",
query: new float[] { 0.2f, 0.1f, 0.9f, 0.7f },
searchParams: new SearchParams { HnswEf = 128, Exact = false },
limit: 3
);
import (
"context"
"github.com/qdrant/go-client/qdrant"
)
client, err := qdrant.NewClient(&qdrant.Config{
Host: "localhost",
Port: 6334,
})
client.Query(context.Background(), &qdrant.QueryPoints{
CollectionName: "{collection_name}",
Query: qdrant.NewQuery(0.2, 0.1, 0.9, 0.7),
Params: &qdrant.SearchParams{
HnswEf: qdrant.PtrOf(uint64(128)),
Exact: qdrant.PtrOf(false),
},
})
平衡延迟和吞吐量
在优化搜索性能时,延迟和吞吐量是两个主要需要考虑的指标:
- 延迟: 单个请求所需的时间。
- 吞吐量: 每秒处理的请求数量。
以下优化方法并非相互排斥,但在某些情况下,可能更倾向于优化其中一种或另一种。
最小化延迟
为了最小化延迟,您可以设置Qdrant以尽可能多地使用核心来处理单个请求。 您可以通过将集合中的段数设置为系统中的核心数来实现这一点。
在这种情况下,每个段将并行处理,最终结果将更快获得。
PUT /collections/{collection_name}
{
"vectors": {
"size": 768,
"distance": "Cosine"
},
"optimizers_config": {
"default_segment_number": 16
}
}
from qdrant_client import QdrantClient, models
client = QdrantClient(url="http://localhost:6333")
client.create_collection(
collection_name="{collection_name}",
vectors_config=models.VectorParams(size=768, distance=models.Distance.COSINE),
optimizers_config=models.OptimizersConfigDiff(default_segment_number=16),
)
import { QdrantClient } from "@qdrant/js-client-rest";
const client = new QdrantClient({ host: "localhost", port: 6333 });
client.createCollection("{collection_name}", {
vectors: {
size: 768,
distance: "Cosine",
},
optimizers_config: {
default_segment_number: 16,
},
});
use qdrant_client::qdrant::{
CreateCollectionBuilder, Distance, OptimizersConfigDiffBuilder, VectorParamsBuilder,
};
use qdrant_client::Qdrant;
let client = Qdrant::from_url("http://localhost:6334").build()?;
client
.create_collection(
CreateCollectionBuilder::new("{collection_name}")
.vectors_config(VectorParamsBuilder::new(768, Distance::Cosine))
.optimizers_config(
OptimizersConfigDiffBuilder::default().default_segment_number(16),
),
)
.await?;
import io.qdrant.client.QdrantClient;
import io.qdrant.client.QdrantGrpcClient;
import io.qdrant.client.grpc.Collections.CreateCollection;
import io.qdrant.client.grpc.Collections.Distance;
import io.qdrant.client.grpc.Collections.OptimizersConfigDiff;
import io.qdrant.client.grpc.Collections.VectorParams;
import io.qdrant.client.grpc.Collections.VectorsConfig;
QdrantClient client =
new QdrantClient(QdrantGrpcClient.newBuilder("localhost", 6334, false).build());
client
.createCollectionAsync(
CreateCollection.newBuilder()
.setCollectionName("{collection_name}")
.setVectorsConfig(
VectorsConfig.newBuilder()
.setParams(
VectorParams.newBuilder()
.setSize(768)
.setDistance(Distance.Cosine)
.build())
.build())
.setOptimizersConfig(
OptimizersConfigDiff.newBuilder().setDefaultSegmentNumber(16).build())
.build())
.get();
using Qdrant.Client;
using Qdrant.Client.Grpc;
var client = new QdrantClient("localhost", 6334);
await client.CreateCollectionAsync(
collectionName: "{collection_name}",
vectorsConfig: new VectorParams { Size = 768, Distance = Distance.Cosine },
optimizersConfig: new OptimizersConfigDiff { DefaultSegmentNumber = 16 }
);
import (
"context"
"github.com/qdrant/go-client/qdrant"
)
client, err := qdrant.NewClient(&qdrant.Config{
Host: "localhost",
Port: 6334,
})
client.CreateCollection(context.Background(), &qdrant.CreateCollection{
CollectionName: "{collection_name}",
VectorsConfig: qdrant.NewVectorsConfig(&qdrant.VectorParams{
Size: 768,
Distance: qdrant.Distance_Cosine,
}),
OptimizersConfig: &qdrant.OptimizersConfigDiff{
DefaultSegmentNumber: qdrant.PtrOf(uint64(16)),
},
})
最大化吞吐量
为了最大化吞吐量,配置Qdrant以使用尽可能多的核心来并行处理多个请求。
要做到这一点,使用更少的分段(通常为2)以并行处理更多的请求。
大段受益于索引的大小和找到最近邻居所需的总体较少的向量比较。然而,它们将需要更多时间来构建HNSW索引。
PUT /collections/{collection_name}
{
"vectors": {
"size": 768,
"distance": "Cosine"
},
"optimizers_config": {
"default_segment_number": 2
}
}
from qdrant_client import QdrantClient, models
client = QdrantClient(url="http://localhost:6333")
client.create_collection(
collection_name="{collection_name}",
vectors_config=models.VectorParams(size=768, distance=models.Distance.COSINE),
optimizers_config=models.OptimizersConfigDiff(default_segment_number=2),
)
import { QdrantClient } from "@qdrant/js-client-rest";
const client = new QdrantClient({ host: "localhost", port: 6333 });
client.createCollection("{collection_name}", {
vectors: {
size: 768,
distance: "Cosine",
},
optimizers_config: {
default_segment_number: 2,
},
});
use qdrant_client::qdrant::{
CreateCollectionBuilder, Distance, OptimizersConfigDiffBuilder, VectorParamsBuilder,
};
use qdrant_client::Qdrant;
let client = Qdrant::from_url("http://localhost:6334").build()?;
client
.create_collection(
CreateCollectionBuilder::new("{collection_name}")
.vectors_config(VectorParamsBuilder::new(768, Distance::Cosine))
.optimizers_config(
OptimizersConfigDiffBuilder::default().default_segment_number(2),
),
)
.await?;
import io.qdrant.client.QdrantClient;
import io.qdrant.client.QdrantGrpcClient;
import io.qdrant.client.grpc.Collections.CreateCollection;
import io.qdrant.client.grpc.Collections.Distance;
import io.qdrant.client.grpc.Collections.OptimizersConfigDiff;
import io.qdrant.client.grpc.Collections.VectorParams;
import io.qdrant.client.grpc.Collections.VectorsConfig;
QdrantClient client =
new QdrantClient(QdrantGrpcClient.newBuilder("localhost", 6334, false).build());
client
.createCollectionAsync(
CreateCollection.newBuilder()
.setCollectionName("{collection_name}")
.setVectorsConfig(
VectorsConfig.newBuilder()
.setParams(
VectorParams.newBuilder()
.setSize(768)
.setDistance(Distance.Cosine)
.build())
.build())
.setOptimizersConfig(
OptimizersConfigDiff.newBuilder().setDefaultSegmentNumber(2).build())
.build())
.get();
using Qdrant.Client;
using Qdrant.Client.Grpc;
var client = new QdrantClient("localhost", 6334);
await client.CreateCollectionAsync(
collectionName: "{collection_name}",
vectorsConfig: new VectorParams { Size = 768, Distance = Distance.Cosine },
optimizersConfig: new OptimizersConfigDiff { DefaultSegmentNumber = 2 }
);
import (
"context"
"github.com/qdrant/go-client/qdrant"
)
client, err := qdrant.NewClient(&qdrant.Config{
Host: "localhost",
Port: 6334,
})
client.CreateCollection(context.Background(), &qdrant.CreateCollection{
CollectionName: "{collection_name}",
VectorsConfig: qdrant.NewVectorsConfig(&qdrant.VectorParams{
Size: 768,
Distance: qdrant.Distance_Cosine,
}),
OptimizersConfig: &qdrant.OptimizersConfigDiff{
DefaultSegmentNumber: qdrant.PtrOf(uint64(2)),
},
})
摘要
通过调整向量存储、量化和搜索参数等配置,您可以为不同的使用场景优化Qdrant:
- 低内存 + 高速: 使用向量量化。
- 高精度 + 低内存: 在磁盘上存储向量和HNSW索引。
- 高精度 + 高速: 将数据保存在RAM中,使用量化并重新评分。
- 延迟与吞吐量: 根据优先级调整段数。
选择最适合您使用场景的策略,以充分利用Qdrant的性能能力。
