Image Predictions

The image component provides methods for processing and analyzing images using VLM Run’s models.

Process an Image

import { VlmRun } from "vlmrun";

// Initialize the client
const client = new VlmRun({
  apiKey: "your-api-key",
});

// Process an image using URL
const imageUrl =
  "https://storage.googleapis.com/vlm-data-public-prod/hub/examples/document.invoice/invoice_1.jpg";
const response = await client.image.generate({
  images: [imageUrl],
  domain: "document.invoice",
  config: {
    jsonSchema: {
      type: "object",
      properties: {
        invoice_number: { type: "string" },
        total_amount: { type: "number" },
      },
    },
  },
});

Process an image passing zod schema

import { z } from "zod";

const imageUrl =
  "https://storage.googleapis.com/vlm-data-public-prod/hub/examples/document.invoice/invoice_1.jpg";

const schema = z.object({
  invoice_number: z.string(),
  total_amount: z.number(),
});

const apiResponse = await client.image.generate({
  images: [imageUrl],
  domain: "document.invoice",
  config: {
    responseModel: schema,
  },
});
const response = apiResponse.response as z.infer<typeof schema>;
console.log(response);

Process an image using local file

// Process an image using local file
const localResponse = await client.image.generate({
  images: ["path/to/local/image.jpg"],
  model: "vlm-1",
  domain: "document.invoice",
});

TypeScript Interfaces

interface ImagePredictionParams extends PredictionGenerateParams {
  batch?: boolean;
  images: string[];
}

interface PredictionGenerateParams {
  model?: string;
  domain: string;
  config?: GenerationConfigParams;
  metadata?: RequestMetadataParams;
  callbackUrl?: string;
}

type GenerationConfigParams = {
  detail?: "auto" | "hi" | "lo";
  responseModel?: ZodType;
  jsonSchema?: Record<string, any> | null;
  confidence?: boolean;
  grounding?: boolean;
};

type RequestMetadataParams = {
  environment?: "dev" | "staging" | "prod";
  sessionId?: string | null;
  allowTraining?: boolean;
};

interface PredictionResponse {
  id: string;
  created_at: string;
  completed_at?: string;
  response?: any;
  status: JobStatus;
  message?: string;
  usage?: CreditUsage;
}

interface CreditUsage {
  elements_processed?: number;
  element_type?: "image" | "page" | "video" | "audio";
  credits_used?: number;
}

type JobStatus = string;

Error Handling

try {
  const response = await client.image.generate({
    images: ["invalid-url"],
    model: "vlm-1",
  });
} catch (error) {
  if (error instanceof ApiError) {
    console.error("API Error:", error.message);
    // Handle API-specific errors
  } else {
    console.error("File system error:", error);
    // Handle local file system errors
  }
}

Best Practices

  1. Image Formats

    • Supported formats: JPG, JPEG, PNG
    • Ensure proper image quality
    • Consider image size limits
  2. Performance

    • Use URLs for remote images
    • Process multiple images in one request when possible
    • Handle timeouts appropriately
  3. Error Handling

    • Validate image files before processing
    • Handle both API and file system errors
    • Implement proper error recovery