Skip to main content

Documentation Index

Fetch the complete documentation index at: https://docs.vlm.run/llms.txt

Use this file to discover all available pages before exploring further.

Files API

The client.files lets you upload, retrieve, and manage files used by the VLM Run platform. Files are essential for predictions, fine-tuning, and dataset creation.

Quick Examples

Upload a File

# Upload a file
file = client.files.upload("invoice.jpg")
print(f"File ID: {file.id}, URL: {file.url}")

Retrieve a File

# Get file details
file = client.files.get("file_abc123")
print(f"Name: {file.filename}, Size: {file.bytes} bytes")

List Files

# List your files
files = client.files.list(limit=10)
for file in files:
    print(f"{file.filename} ({file.id})")

Delete a File

# Delete a file
client.files.delete("file_abc123")

File Lifecycle

Files in VLM Run follow a simple lifecycle:
  1. Upload - Send files to the platform
  2. Process - Use files for predictions or other operations
  3. Manage - List, retrieve, or delete files as needed

Uploading Files

Basic Upload

# Upload a file with default settings
file = client.files.upload("document.pdf")

With Purpose

Files can be categorized by purpose, which affects how they can be used:
# Upload for fine-tuning
file = client.files.upload(
    file="training_data.json",
    purpose="fine-tune"
)

From File Object

# Upload from an open file handle
with open("document.pdf", "rb") as f:
    file = client.files.upload(file=f, filename="document.pdf")

Available Purposes

PurposeDescriptionCommon File Types
fine-tuneFor fine-tuning modelsTraining data, JSON files
assistantsGeneral usage (default)Images, PDFs, text files
assistants_outputOutput from assistantsGenerated content
batchInput files for batch processingLarge collections
batch_outputOutput from batch processingResults and reports
visionFor vision-based modelsImages, screenshots
datasetsFor dataset creationLabeled data collections

Retrieving Files

Get File Details

# Get metadata for a specific file
file = client.files.get("file_abc123")

# Access file properties
print(f"Name: {file.filename}")
print(f"Size: {file.bytes} bytes")
print(f"Purpose: {file.purpose}")
print(f"Created: {file.created_at}")

List Files

# List all files (paginated)
files = client.files.list(limit=20)

# Filter by purpose
fine_tune_files = client.files.list(
    purpose="fine-tune",
    limit=10
)

# Pagination
next_page = client.files.list(skip=20, limit=20)

Delete Files

# Delete a file by ID
client.files.delete("file_abc123")

Common Patterns

Upload and Process

The most common pattern is uploading a file and using it immediately:
# 1. Upload the file
file = client.files.upload("invoice.jpg")

# 2. Process with image API
prediction = client.image.generate(
    urls=[file.url],
    domain="document.invoice"
)

# 3. Work with the results
if prediction.status == "completed":
    print(f"Invoice #: {prediction.response.invoice_number}")
    print(f"Amount: ${prediction.response.total_amount}")

Batch Processing Multiple Files

# Upload multiple files
import glob

# Get all PDFs in a directory
pdf_files = glob.glob("./invoices/*.pdf")
results = []

# Process each file
for path in pdf_files:
    # 1. Upload file
    file = client.files.upload(path, purpose="batch")

    # 2. Process document
    prediction = client.document.generate(
        urls=[file.url],
        domain="document.invoice",
        batch=True  # Process asynchronously
    )

    results.append((file.id, prediction.id))

# Later: check results
for file_id, prediction_id in results:
    prediction = client.predictions.get(prediction_id)
    if prediction.status == "completed":
        print(f"File {file_id}: ${prediction.response.total_amount}")

Temporary File Management

Clean up files after use:
# Upload a temporary file
temp_file = client.files.upload("temp_image.jpg")

try:
    # Use the file
    result = client.image.generate(
        urls=[temp_file.url],
        domain="image.classification"
    )

    # Process the result
    print(f"Classification: {result.response}")

finally:
    # Clean up when done
    client.files.delete(temp_file.id)

Optimization Features

File Caching

VLM Run automatically detects duplicate files using content hashing:
# Upload same file twice
file1 = client.files.upload("report.pdf")
file2 = client.files.upload("report.pdf")

# Both operations return the same file ID
print(f"Same file ID: {file1.id == file2.id}")  # True

Pre-upload Caching Check

Check if a file exists before uploading:
# Check if file exists in cache
cached_file = client.files.get_cached_file("large-dataset.zip")

if cached_file:
    file_id = cached_file.id
    print(f"Using existing file: {file_id}")
else:
    file = client.files.upload("large-dataset.zip")
    file_id = file.id
    print(f"Uploaded new file: {file_id}")

Response Structure

The FileResponse object has the following structure:
class FileResponse(BaseModel):
    id: Optional[str]          # Unique file identifier
    filename: Optional[str]    # Original filename
    bytes: int                 # File size in bytes
    purpose: Literal[          # File purpose/category
        "fine-tune",
        "assistants",
        "assistants_output",
        "batch",
        "batch_output",
        "vision",
        "datasets",
    ]
    created_at: datetime       # Creation timestamp
    object: str = "file"       # Object type
Example usage:
file = client.files.get("file_abc123")

print(f"ID: {file.id}")
print(f"Filename: {file.filename}")
print(f"Size: {file.bytes} bytes")
print(f"Purpose: {file.purpose}")
print(f"Created: {file.created_at}")
print(f"Object type: {file.object}")  # Always "file"

Error Handling

from vlmrun.client.exceptions import ApiError, NotFoundError

try:
    file = client.files.upload("document.pdf")
except FileNotFoundError:
    print("Local file not found")
except ApiError as e:
    if e.status_code == 413:
        print("File too large")
    elif e.status_code == 415:
        print("Unsupported file type")
    else:
        print(f"API error: {e.message}")

Best Practices

Use Descriptive Filenames

# ✅ Good: Descriptive name
file = client.files.upload("invoice-march2023-acme-corp.jpg")

# ❌ Bad: Generic name
file = client.files.upload("file.jpg")

Check Cache for Efficiency

# Efficiently upload multiple files
for path in files_to_upload:
    cached = client.files.get_cached_file(path)
    if cached:
        file_id = cached.id  # Use existing file
    else:
        file = client.files.upload(path)
        file_id = file.id

Clean Up Unused Files

# List files older than 30 days
import datetime
from datetime import timedelta

cutoff_date = datetime.datetime.now() - timedelta(days=30)
old_files = []

for file in client.files.list():
    if file.created_at < cutoff_date and file.purpose == "assistants":
        old_files.append(file.id)

# Delete old files
for file_id in old_files:
    client.files.delete(file_id)
    print(f"Deleted old file: {file_id}")

Set Appropriate Timeouts for Large Files

# Extend timeout for large files
large_file = client.files.upload(
    file="large-video.mp4",
    timeout=600  # 10 minutes
)