使用可对话代理生成Dalle图像
本笔记本展示了如何将图像生成功能添加到对话代理中。
Requirements
首先,让我们导入运行此示例所需的所有模块。
import os
import re
from typing import Dict, Optional
from IPython.display import display
from PIL.Image import Image
import autogen
from autogen.agentchat.contrib import img_utils
from autogen.agentchat.contrib.capabilities import generate_images
from autogen.cache import Cache
from autogen.oai import openai_utils
让我们定义我们的LLM配置。
gpt_config = {
"config_list": [{"model": "gpt-4-turbo-preview", "api_key": os.environ["OPENAI_API_KEY"]}],
"timeout": 120,
"temperature": 0.7,
}
gpt_vision_config = {
"config_list": [{"model": "gpt-4-vision-preview", "api_key": os.environ["OPENAI_API_KEY"]}],
"timeout": 120,
"temperature": 0.7,
}
dalle_config = {
"config_list": [{"model": "dall-e-3", "api_key": os.environ["OPENAI_API_KEY"]}],
"timeout": 120,
"temperature": 0.7,
}
tip
了解更多关于为agent配置LLM的信息在这里.
我们的系统将由两个主要代理组成:1. 图像生成代理。2. 批评代理。
图像生成代理将与评论者进行对话,并根据评论者的请求生成图像。
CRITIC_SYSTEM_MESSAGE = """You need to improve the prompt of the figures you saw.
How to create an image that is better in terms of color, shape, text (clarity), and other things.
Reply with the following format:
CRITICS: the image needs to improve...
PROMPT: here is the updated prompt!
If you have no critique or a prompt, just say TERMINATE
"""
def _is_termination_message(msg) -> bool:
# Detects if we should terminate the conversation
if isinstance(msg.get("content"), str):
return msg["content"].rstrip().endswith("TERMINATE")
elif isinstance(msg.get("content"), list):
for content in msg["content"]:
if isinstance(content, dict) and "text" in content:
return content["text"].rstrip().endswith("TERMINATE")
return False
def critic_agent() -> autogen.ConversableAgent:
return autogen.ConversableAgent(
name="critic",
llm_config=gpt_vision_config,
system_message=CRITIC_SYSTEM_MESSAGE,
max_consecutive_auto_reply=3,
human_input_mode="NEVER",
is_termination_msg=lambda msg: _is_termination_message(msg),
)
def image_generator_agent() -> autogen.ConversableAgent:
# Create the agent
agent = autogen.ConversableAgent(
name="dalle",
llm_config=gpt_vision_config,
max_consecutive_auto_reply=3,
human_input_mode="NEVER",
is_termination_msg=lambda msg: _is_termination_message(msg),
)
# Add image generation ability to the agent
dalle_gen = generate_images.DalleImageGenerator(llm_config=dalle_config)
image_gen_capability = generate_images.ImageGeneration(
image_generator=dalle_gen, text_analyzer_llm_config=gpt_config
)
image_gen_capability.add_to_agent(agent)
return agent
我们将定义 extract_img
来帮助我们提取由图像生成代理生成的图像。
def extract_images(sender: autogen.ConversableAgent, recipient: autogen.ConversableAgent) -> Image:
images = []
all_messages = sender.chat_messages[recipient]
for message in reversed(all_messages):
# The GPT-4V format, where the content is an array of data
contents = message.get("content", [])
for content in contents:
if isinstance(content, str):
continue
if content.get("type", "") == "image_url":
img_data = content["image_url"]["url"]
images.append(img_utils.get_pil_image(img_data))
if not images:
raise ValueError("No image data found in messages.")
return images
开始对话
dalle = image_generator_agent()
critic = critic_agent()
img_prompt = "A happy dog wearing a shirt saying 'I Love AutoGen'. Make sure the text is clear."
# img_prompt = "Ask me how I'm doing"
result = dalle.initiate_chat(critic, message=img_prompt)
A happy dog wearing a shirt saying 'I Love AutoGen'. Make sure the text is clear.
--------------------------------------------------------------------------------
CRITICS: the image needs to improve the contrast and size of the text to enhance its clarity, and the shirt's color should not clash with the dog's fur color to maintain a harmonious color scheme.
PROMPT: here is the updated prompt!
Create an image of a joyful dog with a coat of a contrasting color to its fur, wearing a shirt with bold, large text saying 'I Love AutoGen' for clear readability.
--------------------------------------------------------------------------------
I generated an image with the prompt: Joyful dog, contrasting coat color to its fur, shirt with bold, large text "I Love AutoGen" for clear readability.<image>
--------------------------------------------------------------------------------
CRITICS: the image effectively showcases a joyful dog with a contrasting shirt color, and the text 'I Love AutoGen' is large and bold, ensuring clear readability.
PROMPT: TERMINATE
--------------------------------------------------------------------------------
让我们展示所有由Dalle生成的图片
images = extract_images(dalle, critic)
for image in reversed(images):
display(image.resize((300, 300)))