使用可对话代理生成Dalle图像

本笔记本展示了如何将图像生成功能添加到对话代理中。

Requirements

本笔记本需要一些额外的依赖项，可以通过pip安装：

pip install autogen-agentchat[lmm]~=0.2

如需更多信息，请参考安装指南。

首先，让我们导入运行此示例所需的所有模块。

import os
import re
from typing import Dict, Optional

from IPython.display import display
from PIL.Image import Image

import autogen
from autogen.agentchat.contrib import img_utils
from autogen.agentchat.contrib.capabilities import generate_images
from autogen.cache import Cache
from autogen.oai import openai_utils

让我们定义我们的LLM配置。

gpt_config = {
    "config_list": [{"model": "gpt-4-turbo-preview", "api_key": os.environ["OPENAI_API_KEY"]}],
    "timeout": 120,
    "temperature": 0.7,
}
gpt_vision_config = {
    "config_list": [{"model": "gpt-4-vision-preview", "api_key": os.environ["OPENAI_API_KEY"]}],
    "timeout": 120,
    "temperature": 0.7,
}
dalle_config = {
    "config_list": [{"model": "dall-e-3", "api_key": os.environ["OPENAI_API_KEY"]}],
    "timeout": 120,
    "temperature": 0.7,
}

tip

了解更多关于为agent配置LLM的信息在这里.

我们的系统将由两个主要代理组成：1. 图像生成代理。2. 批评代理。

图像生成代理将与评论者进行对话，并根据评论者的请求生成图像。

CRITIC_SYSTEM_MESSAGE = """You need to improve the prompt of the figures you saw.
How to create an image that is better in terms of color, shape, text (clarity), and other things.
Reply with the following format:

CRITICS: the image needs to improve...
PROMPT: here is the updated prompt!

If you have no critique or a prompt, just say TERMINATE
"""

def _is_termination_message(msg) -> bool:
    # Detects if we should terminate the conversation
    if isinstance(msg.get("content"), str):
        return msg["content"].rstrip().endswith("TERMINATE")
    elif isinstance(msg.get("content"), list):
        for content in msg["content"]:
            if isinstance(content, dict) and "text" in content:
                return content["text"].rstrip().endswith("TERMINATE")
    return False


def critic_agent() -> autogen.ConversableAgent:
    return autogen.ConversableAgent(
        name="critic",
        llm_config=gpt_vision_config,
        system_message=CRITIC_SYSTEM_MESSAGE,
        max_consecutive_auto_reply=3,
        human_input_mode="NEVER",
        is_termination_msg=lambda msg: _is_termination_message(msg),
    )


def image_generator_agent() -> autogen.ConversableAgent:
    # Create the agent
    agent = autogen.ConversableAgent(
        name="dalle",
        llm_config=gpt_vision_config,
        max_consecutive_auto_reply=3,
        human_input_mode="NEVER",
        is_termination_msg=lambda msg: _is_termination_message(msg),
    )

    # Add image generation ability to the agent
    dalle_gen = generate_images.DalleImageGenerator(llm_config=dalle_config)
    image_gen_capability = generate_images.ImageGeneration(
        image_generator=dalle_gen, text_analyzer_llm_config=gpt_config
    )

    image_gen_capability.add_to_agent(agent)
    return agent

我们将定义 extract_img 来帮助我们提取由图像生成代理生成的图像。

def extract_images(sender: autogen.ConversableAgent, recipient: autogen.ConversableAgent) -> Image:
    images = []
    all_messages = sender.chat_messages[recipient]

    for message in reversed(all_messages):
        # The GPT-4V format, where the content is an array of data
        contents = message.get("content", [])
        for content in contents:
            if isinstance(content, str):
                continue
            if content.get("type", "") == "image_url":
                img_data = content["image_url"]["url"]
                images.append(img_utils.get_pil_image(img_data))

    if not images:
        raise ValueError("No image data found in messages.")

    return images

开始对话

dalle = image_generator_agent()
critic = critic_agent()

img_prompt = "A happy dog wearing a shirt saying 'I Love AutoGen'. Make sure the text is clear."
# img_prompt = "Ask me how I'm doing"

result = dalle.initiate_chat(critic, message=img_prompt)

dalle (to critic):

A happy dog wearing a shirt saying 'I Love AutoGen'. Make sure the text is clear.

--------------------------------------------------------------------------------
critic (to dalle):

CRITICS: the image needs to improve the contrast and size of the text to enhance its clarity, and the shirt's color should not clash with the dog's fur color to maintain a harmonious color scheme.

PROMPT: here is the updated prompt!
Create an image of a joyful dog with a coat of a contrasting color to its fur, wearing a shirt with bold, large text saying 'I Love AutoGen' for clear readability.

--------------------------------------------------------------------------------
dalle (to critic):

I generated an image with the prompt: Joyful dog, contrasting coat color to its fur, shirt with bold, large text "I Love AutoGen" for clear readability.<image>

--------------------------------------------------------------------------------
critic (to dalle):

CRITICS: the image effectively showcases a joyful dog with a contrasting shirt color, and the text 'I Love AutoGen' is large and bold, ensuring clear readability.

PROMPT: TERMINATE

--------------------------------------------------------------------------------

让我们展示所有由Dalle生成的图片

images = extract_images(dalle, critic)

for image in reversed(images):
    display(image.resize((300, 300)))