TablestoreVectorStore
表格存储 是一种全托管的NoSQL云数据库服务,能够存储海量的结构化和半结构化数据。
本笔记本展示了如何使用与 Tablestore 向量数据库相关的功能。
要使用表格存储,您必须创建一个实例。 以下是创建实例说明。
%pip install llama-index-vector-stores-tablestoreimport getpassimport os
os.environ["end_point"] = getpass.getpass("Tablestore end_point:")os.environ["instance_name"] = getpass.getpass("Tablestore instance_name:")os.environ["access_key_id"] = getpass.getpass("Tablestore access_key_id:")os.environ["access_key_secret"] = getpass.getpass( "Tablestore access_key_secret:")创建向量存储。
import os
from llama_index.core import MockEmbeddingfrom llama_index.core.schema import TextNodefrom llama_index.core.vector_stores import ( VectorStoreQuery, MetadataFilters, MetadataFilter, FilterCondition, FilterOperator,)from llama_index.core.vector_stores.types import ( VectorStoreQueryMode,)from tablestore import FieldSchema, FieldType, VectorMetricType
from llama_index.vector_stores.tablestore import TablestoreVectorStore
vector_dimension = 4
store = TablestoreVectorStore( endpoint=os.getenv("end_point"), instance_name=os.getenv("instance_name"), access_key_id=os.getenv("access_key_id"), access_key_secret=os.getenv("access_key_secret"), vector_dimension=vector_dimension, vector_metric_type=VectorMetricType.VM_COSINE, # optional: custom metadata mapping is used to filter non-vector fields. metadata_mappings=[ FieldSchema( "type", FieldType.KEYWORD, index=True, enable_sort_and_agg=True ), FieldSchema( "time", FieldType.LONG, index=True, enable_sort_and_agg=True ), ],)创建表和索引。
store.create_table_if_not_exist()store.create_search_index_if_not_exist()新建一个用于测试的模拟嵌入。
embedder = MockEmbedding(vector_dimension)准备一些文档。
texts = [ TextNode( id_="1", text="The lives of two mob hitmen, a boxer, a gangster and his wife, and a pair of diner bandits intertwine in four tales of violence and redemption.", metadata={"type": "a", "time": 1995}, ), TextNode( id_="2", text="When the menace known as the Joker wreaks havoc and chaos on the people of Gotham, Batman must accept one of the greatest psychological and physical tests of his ability to fight injustice.", metadata={"type": "a", "time": 1990}, ), TextNode( id_="3", text="An insomniac office worker and a devil-may-care soapmaker form an underground fight club that evolves into something much, much more.", metadata={"type": "a", "time": 2009}, ), TextNode( id_="4", text="A thief who steals corporate secrets through the use of dream-sharing technology is given the inverse task of planting an idea into thed of a C.E.O.", metadata={"type": "a", "time": 2023}, ), TextNode( id_="5", text="A computer hacker learns from mysterious rebels about the true nature of his reality and his role in the war against its controllers.", metadata={"type": "b", "time": 2018}, ), TextNode( id_="6", text="Two detectives, a rookie and a veteran, hunt a serial killer who uses the seven deadly sins as his motives.", metadata={"type": "c", "time": 2010}, ), TextNode( id_="7", text="An organized crime dynasty's aging patriarch transfers control of his clandestine empire to his reluctant son.", metadata={"type": "a", "time": 2023}, ),]for t in texts: t.embedding = embedder.get_text_embedding(t.text)编写一些文档。
store.add(texts)['1', '2', '3', '4', '5', '6', '7']删除文档。
store.delete("1")使用筛选器进行查询。
store.query( query=VectorStoreQuery( query_embedding=embedder.get_text_embedding("nature fight physical"), similarity_top_k=5, filters=MetadataFilters( filters=[ MetadataFilter( key="type", value="a", operator=FilterOperator.EQ ), MetadataFilter( key="time", value=2020, operator=FilterOperator.LTE ), ], condition=FilterCondition.AND, ), ),)VectorStoreQueryResult(nodes=[TextNode(id_='1', embedding=[0.5, 0.5, 0.5, 0.5], metadata={'time': 1995, 'type': 'a'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text='The lives of two mob hitmen, a boxer, a gangster and his wife, and a pair of diner bandits intertwine in four tales of violence and redemption.', mimetype='text/plain', start_char_idx=None, end_char_idx=None, metadata_seperator='\n', text_template='{metadata_str}\n\n{content}'), TextNode(id_='2', embedding=[0.5, 0.5, 0.5, 0.5], metadata={'time': 1990, 'type': 'a'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text='When the menace known as the Joker wreaks havoc and chaos on the people of Gotham, Batman must accept one of the greatest psychological and physical tests of his ability to fight injustice.', mimetype='text/plain', start_char_idx=None, end_char_idx=None, metadata_seperator='\n', text_template='{metadata_str}\n\n{content}'), TextNode(id_='3', embedding=[0.5, 0.5, 0.5, 0.5], metadata={'time': 2009, 'type': 'a'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text='An insomniac office worker and a devil-may-care soapmaker form an underground fight club that evolves into something much, much more.', mimetype='text/plain', start_char_idx=None, end_char_idx=None, metadata_seperator='\n', text_template='{metadata_str}\n\n{content}')], similarities=[1.0, 1.0, 1.0], ids=['1', '2', '3'])全文搜索:查询模式 = 文本。
query_result = store.query( query=VectorStoreQuery( mode=VectorStoreQueryMode.TEXT_SEARCH, query_str="computer", similarity_top_k=5, ),)print(query_result)VectorStoreQueryResult(nodes=[TextNode(id_='5', embedding=[0.5, 0.5, 0.5, 0.5], metadata={'time': 2018, 'type': 'b'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text='A computer hacker learns from mysterious rebels about the true nature of his reality and his role in the war against its controllers.', mimetype='text/plain', start_char_idx=None, end_char_idx=None, metadata_seperator='\n', text_template='{metadata_str}\n\n{content}')], similarities=[2.673976421356201], ids=['5'])混合查询。
query_result = store.query( query=VectorStoreQuery( mode=VectorStoreQueryMode.HYBRID, query_embedding=embedder.get_text_embedding("nature fight physical"), query_str="python", similarity_top_k=5, ),)print(query_result)VectorStoreQueryResult(nodes=[TextNode(id_='1', embedding=[0.5, 0.5, 0.5, 0.5], metadata={'time': 1995, 'type': 'a'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text='The lives of two mob hitmen, a boxer, a gangster and his wife, and a pair of diner bandits intertwine in four tales of violence and redemption.', mimetype='text/plain', start_char_idx=None, end_char_idx=None, metadata_seperator='\n', text_template='{metadata_str}\n\n{content}'), TextNode(id_='2', embedding=[0.5, 0.5, 0.5, 0.5], metadata={'time': 1990, 'type': 'a'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text='When the menace known as the Joker wreaks havoc and chaos on the people of Gotham, Batman must accept one of the greatest psychological and physical tests of his ability to fight injustice.', mimetype='text/plain', start_char_idx=None, end_char_idx=None, metadata_seperator='\n', text_template='{metadata_str}\n\n{content}'), TextNode(id_='3', embedding=[0.5, 0.5, 0.5, 0.5], metadata={'time': 2009, 'type': 'a'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text='An insomniac office worker and a devil-may-care soapmaker form an underground fight club that evolves into something much, much more.', mimetype='text/plain', start_char_idx=None, end_char_idx=None, metadata_seperator='\n', text_template='{metadata_str}\n\n{content}'), TextNode(id_='4', embedding=[0.5, 0.5, 0.5, 0.5], metadata={'time': 2023, 'type': 'a'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text='A thief who steals corporate secrets through the use of dream-sharing technology is given the inverse task of planting an idea into thed of a C.E.O.', mimetype='text/plain', start_char_idx=None, end_char_idx=None, metadata_seperator='\n', text_template='{metadata_str}\n\n{content}'), TextNode(id_='5', embedding=[0.5, 0.5, 0.5, 0.5], metadata={'time': 2018, 'type': 'b'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text='A computer hacker learns from mysterious rebels about the true nature of his reality and his role in the war against its controllers.', mimetype='text/plain', start_char_idx=None, end_char_idx=None, metadata_seperator='\n', text_template='{metadata_str}\n\n{content}')], similarities=[1.0, 1.0, 1.0, 1.0, 1.0], ids=['1', '2', '3', '4', '5'])