JSON查询引擎¶
JSON查询引擎适用于查询符合JSON模式规范的JSON文档。
该JSON模式随后在提示的上下文中使用,将自然语言查询转换为结构化的JSON Path查询。然后,这个JSON Path查询被用来检索数据以回答给定的问题。
如果你在Colab上打开这个Notebook,你可能需要安装LlamaIndex 🦙。
In [ ]:
Copied!
%pip install llama-index-llms-openai
%pip install llama-index-llms-openai
In [ ]:
Copied!
!pip install llama-index
!pip install llama-index
In [ ]:
Copied!
# First, install the jsonpath-ng package which is used by default to parse & execute the JSONPath queries.
!pip install jsonpath-ng
# 首先安装jsonpath-ng包,这是默认用于解析和执行JSONPath查询的工具。
!pip install jsonpath-ng
Requirement already satisfied: jsonpath-ng in /Users/loganmarkewich/llama_index/llama-index/lib/python3.9/site-packages (1.5.3)
Requirement already satisfied: ply in /Users/loganmarkewich/llama_index/llama-index/lib/python3.9/site-packages (from jsonpath-ng) (3.11)
Requirement already satisfied: six in /Users/loganmarkewich/llama_index/llama-index/lib/python3.9/site-packages (from jsonpath-ng) (1.16.0)
Requirement already satisfied: decorator in /Users/loganmarkewich/llama_index/llama-index/lib/python3.9/site-packages (from jsonpath-ng) (5.1.1)
WARNING: You are using pip version 21.2.4; however, version 23.2.1 is available.
You should consider upgrading via the '/Users/loganmarkewich/llama_index/llama-index/bin/python3 -m pip install --upgrade pip' command.
In [ ]:
Copied!
import logging
import sys
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
import logging
import sys
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
In [ ]:
Copied!
import os
import openai
os.environ["OPENAI_API_KEY"] = "YOUR_KEY_HERE"
import os
import openai
os.environ["OPENAI_API_KEY"] = "YOUR_KEY_HERE"
In [ ]:
Copied!
from IPython.display import Markdown, display
from IPython.display import Markdown, display
In [ ]:
Copied!
# Test on some sample data
json_value = {
"blogPosts": [
{
"id": 1,
"title": "First blog post",
"content": "This is my first blog post",
},
{
"id": 2,
"title": "Second blog post",
"content": "This is my second blog post",
},
],
"comments": [
{
"id": 1,
"content": "Nice post!",
"username": "jerry",
"blogPostId": 1,
},
{
"id": 2,
"content": "Interesting thoughts",
"username": "simon",
"blogPostId": 2,
},
{
"id": 3,
"content": "Loved reading this!",
"username": "simon",
"blogPostId": 2,
},
],
}
# JSON Schema object that the above JSON value conforms to
json_schema = {
"$schema": "http://json-schema.org/draft-07/schema#",
"description": "Schema for a very simple blog post app",
"type": "object",
"properties": {
"blogPosts": {
"description": "List of blog posts",
"type": "array",
"items": {
"type": "object",
"properties": {
"id": {
"description": "Unique identifier for the blog post",
"type": "integer",
},
"title": {
"description": "Title of the blog post",
"type": "string",
},
"content": {
"description": "Content of the blog post",
"type": "string",
},
},
"required": ["id", "title", "content"],
},
},
"comments": {
"description": "List of comments on blog posts",
"type": "array",
"items": {
"type": "object",
"properties": {
"id": {
"description": "Unique identifier for the comment",
"type": "integer",
},
"content": {
"description": "Content of the comment",
"type": "string",
},
"username": {
"description": (
"Username of the commenter (lowercased)"
),
"type": "string",
},
"blogPostId": {
"description": (
"Identifier for the blog post to which the comment"
" belongs"
),
"type": "integer",
},
},
"required": ["id", "content", "username", "blogPostId"],
},
},
},
"required": ["blogPosts", "comments"],
}
# 测试一些示例数据
json_value = {
"blogPosts": [
{
"id": 1,
"title": "First blog post",
"content": "This is my first blog post",
},
{
"id": 2,
"title": "Second blog post",
"content": "This is my second blog post",
},
],
"comments": [
{
"id": 1,
"content": "Nice post!",
"username": "jerry",
"blogPostId": 1,
},
{
"id": 2,
"content": "Interesting thoughts",
"username": "simon",
"blogPostId": 2,
},
{
"id": 3,
"content": "Loved reading this!",
"username": "simon",
"blogPostId": 2,
},
],
}
# 上述JSON值所符合的JSON Schema对象
json_schema = {
"$schema": "http://json-schema.org/draft-07/schema#",
"description": "一个非常简单的博客应用的模式",
"type": "object",
"properties": {
"blogPosts": {
"description": "博客文章列表",
"type": "array",
"items": {
"type": "object",
"properties": {
"id": {
"description": "博客文章的唯一标识符",
"type": "integer",
},
"title": {
"description": "博客文章的标题",
"type": "string",
},
"content": {
"description": "博客文章的内容",
"type": "string",
},
},
"required": ["id", "title", "content"],
},
},
"comments": {
"description": "博客文章的评论列表",
"type": "array",
"items": {
"type": "object",
"properties": {
"id": {
"description": "评论的唯一标识符",
"type": "integer",
},
"content": {
"description": "评论的内容",
"type": "string",
},
"username": {
"description": (
"评论者的用户名(小写)"
),
"type": "string",
},
"blogPostId": {
"description": (
"评论所属博客文章的标识符"
),
"type": "integer",
},
},
"required": ["id", "content", "username", "blogPostId"],
},
},
},
"required": ["blogPosts", "comments"],
}
In [ ]:
Copied!
from llama_index.llms.openai import OpenAI
from llama_index.core.indices.struct_store import JSONQueryEngine
llm = OpenAI(model="gpt-4")
nl_query_engine = JSONQueryEngine(
json_value=json_value,
json_schema=json_schema,
llm=llm,
)
raw_query_engine = JSONQueryEngine(
json_value=json_value,
json_schema=json_schema,
llm=llm,
synthesize_response=False,
)
from llama_index.llms.openai import OpenAI
from llama_index.core.indices.struct_store import JSONQueryEngine
llm = OpenAI(model="gpt-4")
nl_query_engine = JSONQueryEngine(
json_value=json_value,
json_schema=json_schema,
llm=llm,
)
raw_query_engine = JSONQueryEngine(
json_value=json_value,
json_schema=json_schema,
llm=llm,
synthesize_response=False,
)
In [ ]:
Copied!
nl_response = nl_query_engine.query(
"What comments has Jerry been writing?",
)
raw_response = raw_query_engine.query(
"What comments has Jerry been writing?",
)
nl_response = nl_query_engine.query(
"Jerry写了哪些评论?",
)
raw_response = raw_query_engine.query(
"Jerry写了哪些评论?",
)
In [ ]:
Copied!
display(
Markdown(f"<h1>Natural language Response</h1><br><b>{nl_response}</b>")
)
display(Markdown(f"<h1>Raw JSON Response</h1><br><b>{raw_response}</b>"))
display(
Markdown(f"
{nl_response}") ) display(Markdown(f"
{raw_response}"))
自然语言响应
{nl_response}") ) display(Markdown(f"
原始JSON响应
{raw_response}"))
自然语言响应
Jerry has written the comment "Nice post!".
原始JSON响应
["Nice post!"]
In [ ]:
Copied!
# get the json path query string. Same would apply to raw_response
print(nl_response.metadata["json_path_response_str"])
# 获取json路径查询字符串。同样适用于raw_response
print(nl_response.metadata["json_path_response_str"])
$.comments[?(@.username=='jerry')].content