跳到主要内容

使用自定义模型加载的Agent Chat

Open In Colab Open on GitHub

在本笔记本中,我们将演示如何定义和加载自定义模型,以及它需要遵守的协议。

注意:根据你使用的模型,你可能需要调整Agent的默认提示

要求

Requirements

本笔记本需要一些额外的依赖项,可以通过pip安装:

pip install autogen-agentchat~=0.2 torch transformers sentencepiece

如需更多信息,请参考安装指南

from types import SimpleNamespace

from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig

import autogen
from autogen import AssistantAgent, UserProxyAgent

创建并配置自定义模型

自定义模型类可以通过多种方式创建,但需要遵循在client.py中定义并如下所示的ModelClient协议和响应结构。

响应协议有一些最低要求,但可以扩展以包含任何所需的额外信息。因此,消息检索可以定制,但需要返回一个字符串列表或一个 ModelClientResponseProtocol.Choice.Message 对象列表。

class ModelClient(Protocol):
"""
A client class must implement the following methods:
- create must return a response object that implements the ModelClientResponseProtocol
- cost must return the cost of the response
- get_usage must return a dict with the following keys:
- prompt_tokens
- completion_tokens
- total_tokens
- cost
- model

This class is used to create a client that can be used by OpenAIWrapper.
The response returned from create must adhere to the ModelClientResponseProtocol but can be extended however needed.
The message_retrieval method must be implemented to return a list of str or a list of messages from the response.
"""

RESPONSE_USAGE_KEYS = ["prompt_tokens", "completion_tokens", "total_tokens", "cost", "model"]

class ModelClientResponseProtocol(Protocol):
class Choice(Protocol):
class Message(Protocol):
content: Optional[str]

message: Message

choices: List[Choice]
model: str

def create(self, params) -> ModelClientResponseProtocol:
...

def message_retrieval(
self, response: ModelClientResponseProtocol
) -> Union[List[str], List[ModelClient.ModelClientResponseProtocol.Choice.Message]]:
"""
Retrieve and return a list of strings or a list of Choice.Message from the response.

NOTE: if a list of Choice.Message is returned, it currently needs to contain the fields of OpenAI's ChatCompletion Message object,
since that is expected for function or tool calling in the rest of the codebase at the moment, unless a custom agent is being used.
"""
...

def cost(self, response: ModelClientResponseProtocol) -> float:
...

@staticmethod
def get_usage(response: ModelClientResponseProtocol) -> Dict:
"""Return usage summary of the response using RESPONSE_USAGE_KEYS."""
...

简单自定义客户端的示例

按照huggingface的使用示例,使用Mistral’s Open-Orca

对于响应对象,python的SimpleNamespace用于创建一个简单对象,可用于存储响应数据,但任何遵循ClientResponseProtocol的对象都可以使用。

# custom client with custom model loader


class CustomModelClient:
def __init__(self, config, **kwargs):
print(f"CustomModelClient config: {config}")
self.device = config.get("device", "cpu")
self.model = AutoModelForCausalLM.from_pretrained(config["model"]).to(self.device)
self.model_name = config["model"]
self.tokenizer = AutoTokenizer.from_pretrained(config["model"], use_fast=False)
self.tokenizer.pad_token_id = self.tokenizer.eos_token_id

# params are set by the user and consumed by the user since they are providing a custom model
# so anything can be done here
gen_config_params = config.get("params", {})
self.max_length = gen_config_params.get("max_length", 256)

print(f"Loaded model {config['model']} to {self.device}")

def create(self, params):
if params.get("stream", False) and "messages" in params:
raise NotImplementedError("Local models do not support streaming.")
else:
num_of_responses = params.get("n", 1)

# can create my own data response class
# here using SimpleNamespace for simplicity
# as long as it adheres to the ClientResponseProtocol

response = SimpleNamespace()

inputs = self.tokenizer.apply_chat_template(
params["messages"], return_tensors="pt", add_generation_prompt=True
).to(self.device)
inputs_length = inputs.shape[-1]

# add inputs_length to max_length
max_length = self.max_length + inputs_length
generation_config = GenerationConfig(
max_length=max_length,
eos_token_id=self.tokenizer.eos_token_id,
pad_token_id=self.tokenizer.eos_token_id,
)

response.choices = []
response.model = self.model_name

for _ in range(num_of_responses):
outputs = self.model.generate(inputs, generation_config=generation_config)
# Decode only the newly generated text, excluding the prompt
text = self.tokenizer.decode(outputs[0, inputs_length:])
choice = SimpleNamespace()
choice.message = SimpleNamespace()
choice.message.content = text
choice.message.function_call = None
response.choices.append(choice)

return response

def message_retrieval(self, response):
"""Retrieve the messages from the response."""
choices = response.choices
return [choice.message.content for choice in choices]

def cost(self, response) -> float:
"""Calculate the cost of the response."""
response.cost = 0
return 0

@staticmethod
def get_usage(response):
# returns a dict of prompt_tokens, completion_tokens, total_tokens, cost, model
# if usage needs to be tracked, else None
return {}

设置您的API端点

config_list_from_json 函数从环境变量或 JSON 文件加载配置列表。

它首先查找指定名称的环境变量(在本例中为“OAI_CONFIG_LIST”),该变量需要是有效的json字符串。如果未找到该变量,它会查找具有相同名称的json文件。它通过模型过滤配置(你也可以通过其他键进行过滤)。

JSON 看起来如下所示:

[
{
"model": "gpt-4",
"api_key": "<your OpenAI API key here>"
},
{
"model": "gpt-4",
"api_key": "<your Azure OpenAI API key here>",
"base_url": "<your Azure OpenAI API base here>",
"api_type": "azure",
"api_version": "2024-02-01"
},
{
"model": "gpt-4-32k",
"api_key": "<your Azure OpenAI API key here>",
"base_url": "<your Azure OpenAI API base here>",
"api_type": "azure",
"api_version": "2024-02-01"
}
]

你可以以任何你喜欢的方式设置config_list的值。请参考此 notebook 以获取不同方法的完整代码示例。

为自定义模型设置配置

你可以在同一个配置列表中添加自定义模型加载所需的任何参数。

重要的是添加model_client_cls字段并将其设置为与类名对应的字符串:"CustomModelClient"

{
"model": "Open-Orca/Mistral-7B-OpenOrca",
"model_client_cls": "CustomModelClient",
"device": "cuda",
"n": 1,
"params": {
"max_length": 1000,
}
},
config_list_custom = autogen.config_list_from_json(
"OAI_CONFIG_LIST",
filter_dict={"model_client_cls": ["CustomModelClient"]},
)

构建代理

构建一个用户代理和助理代理之间的简单对话

assistant = AssistantAgent("assistant", llm_config={"config_list": config_list_custom})
user_proxy = UserProxyAgent(
"user_proxy",
code_execution_config={
"work_dir": "coding",
"use_docker": False, # Please set use_docker=True if docker is available to run the generated code. Using docker is safer than running the generated code directly.
},
)

注册自定义客户端类到助手代理

assistant.register_model_client(model_client_cls=CustomModelClient)
user_proxy.initiate_chat(assistant, message="Write python code to print Hello World!")

注册一个带有预加载模型的自定义客户端类

如果您希望更好地控制模型的加载时间,可以自行加载模型,并在注册时将其作为参数传递给CustomClient。

# custom client with custom model loader


class CustomModelClientWithArguments(CustomModelClient):
def __init__(self, config, loaded_model, tokenizer, **kwargs):
print(f"CustomModelClientWithArguments config: {config}")

self.model_name = config["model"]
self.model = loaded_model
self.tokenizer = tokenizer

self.device = config.get("device", "cpu")

gen_config_params = config.get("params", {})
self.max_length = gen_config_params.get("max_length", 256)
print(f"Loaded model {config['model']} to {self.device}")
# load model here


config = config_list_custom[0]
device = config.get("device", "cpu")
loaded_model = AutoModelForCausalLM.from_pretrained(config["model"]).to(device)
tokenizer = AutoTokenizer.from_pretrained(config["model"], use_fast=False)
tokenizer.pad_token_id = tokenizer.eos_token_id

添加新自定义模型的配置

{
"model": "Open-Orca/Mistral-7B-OpenOrca",
"model_client_cls": "CustomModelClientWithArguments",
"device": "cuda",
"n": 1,
"params": {
"max_length": 1000,
}
},
config_list_custom = autogen.config_list_from_json(
"OAI_CONFIG_LIST",
filter_dict={"model_client_cls": ["CustomModelClientWithArguments"]},
)
assistant = AssistantAgent("assistant", llm_config={"config_list": config_list_custom})
assistant.register_model_client(
model_client_cls=CustomModelClientWithArguments,
loaded_model=loaded_model,
tokenizer=tokenizer,
)
user_proxy.initiate_chat(assistant, message="Write python code to print Hello World!")