使用langchain的评估器进行评估#

作者：

学习目标 - 完成本教程后，您应该能够：

将LangChain标准评估器应用程序转换为flex flow。
使用 CustomConnection 来存储秘密。

0. 安装依赖包#

%%capture --no-stderr
%pip install -r ./requirements.txt

1. 使用提示流跟踪您的langchain评估器#

初始化一个pf客户端#

from promptflow.client import PFClient

pf = PFClient()

创建一个自定义连接以保护您的API密钥#

您可以在自定义连接的密钥中保护您的API密钥。

import os
from dotenv import load_dotenv

from promptflow.entities import CustomConnection

conn_name = "my_llm_connection"

try:
    conn = pf.connections.get(name=conn_name)
    print("using existing connection")
except:
    if "AZURE_OPENAI_API_KEY" not in os.environ:
        # load environment variables from .env file
        load_dotenv()

    # put API key in secrets
    connection = CustomConnection(
        name=conn_name,
        configs={
            "azure_endpoint": os.environ["AZURE_OPENAI_ENDPOINT"],
        },
        secrets={
            # store API key
            # "anthropic_api_key": "<your-api-key>",
            "openai_api_key": os.environ["AZURE_OPENAI_API_KEY"],
        },
    )
    # Create the connection, note that all secret values will be scrubbed in the returned result
    conn = pf.connections.create_or_update(connection)
    print("successfully created connection")
print(conn)

使用跟踪测试评估器#

from eval_conciseness import LangChainEvaluator


evaluator = LangChainEvaluator(custom_connection=conn)
result = evaluator(
    prediction="What's 2+2? That's an elementary question. The answer you're looking for is that two and two is four.",
    input="What's 2+2?",
)
print(result)

2. 使用 flow yaml 批量运行评估器#

创建一个flow.flex.yaml文件来定义一个流程，该流程的入口指向我们定义的python函数。

data = "./data.jsonl"  # path to the data file
# create run with the flow function and data
base_run = pf.run(
    flow="./flow.flex.yaml",
    # reference custom connection by name
    init={
        "custom_connection": "my_llm_connection",
    },
    data=data,
    column_mapping={
        "prediction": "${data.prediction}",
        "input": "${data.input}",
    },
    stream=True,
)

details = pf.get_details(base_run)
details.head(10)

pf.visualize([base_run])