from pydantic import BaseModel, Field
# Let's define a simplified Pydantic model to represent the outputs. Let's say we want to generate an image of big ben at a distance, and crop into the clock face.
class AgentResponse(BaseModel):
image_url: str = Field(description="The pre-signed URL of the image generated.")
clock_image_url: str = Field(description="The pre-signed URL of the image of the clock face cropped from the generated image.")
crop_xywh: tuple[float, float, float, float] = Field(description="The (x, y, width, height) of the clock face cropped from the generated image.")
# Now we can use the Instructor client to create a chat completion
response = inst_client.chat.completions.create(
model="vlm-agent-1",
max_retries=0,
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "Generate an image of big ben at a distance. Crop into the clock face and provide a close up of the clock face."},
],
}
],
response_model=AgentResponse,
)
logger.debug(f"type={type(response)}, response={response}")