Files API
The client.files
lets you upload, retrieve, and manage files used by the VLM Run platform. Files are essential for predictions, fine-tuning, and dataset creation.
Quick Examples
Upload a File
# Upload a file
file = client.files.upload("invoice.jpg")
print(f"File ID: {file.id}, URL: {file.url}")
Retrieve a File
# Get file details
file = client.files.get("file_abc123")
print(f"Name: {file.filename}, Size: {file.bytes} bytes")
List Files
# List your files
files = client.files.list(limit=10)
for file in files:
print(f"{file.filename} ({file.id})")
Delete a File
# Delete a file
client.files.delete("file_abc123")
File Lifecycle
Files in VLM Run follow a simple lifecycle:
- Upload - Send files to the platform
- Process - Use files for predictions or other operations
- Manage - List, retrieve, or delete files as needed
Uploading Files
Basic Upload
# Upload a file with default settings
file = client.files.upload("document.pdf")
With Purpose
Files can be categorized by purpose, which affects how they can be used:
# Upload for fine-tuning
file = client.files.upload(
file="training_data.json",
purpose="fine-tune"
)
From File Object
# Upload from an open file handle
with open("document.pdf", "rb") as f:
file = client.files.upload(file=f, filename="document.pdf")
Available Purposes
Purpose | Description | Common File Types |
---|
fine-tune | For fine-tuning models | Training data, JSON files |
assistants | General usage (default) | Images, PDFs, text files |
assistants_output | Output from assistants | Generated content |
batch | Input files for batch processing | Large collections |
batch_output | Output from batch processing | Results and reports |
vision | For vision-based models | Images, screenshots |
datasets | For dataset creation | Labeled data collections |
Retrieving Files
Get File Details
# Get metadata for a specific file
file = client.files.get("file_abc123")
# Access file properties
print(f"Name: {file.filename}")
print(f"Size: {file.bytes} bytes")
print(f"Purpose: {file.purpose}")
print(f"Created: {file.created_at}")
List Files
# List all files (paginated)
files = client.files.list(limit=20)
# Filter by purpose
fine_tune_files = client.files.list(
purpose="fine-tune",
limit=10
)
# Pagination
next_page = client.files.list(skip=20, limit=20)
Delete Files
# Delete a file by ID
client.files.delete("file_abc123")
Common Patterns
Upload and Process
The most common pattern is uploading a file and using it immediately:
# 1. Upload the file
file = client.files.upload("invoice.jpg")
# 2. Process with image API
prediction = client.image.generate(
urls=[file.url],
domain="document.invoice"
)
# 3. Work with the results
if prediction.status == "completed":
print(f"Invoice #: {prediction.response.invoice_number}")
print(f"Amount: ${prediction.response.total_amount}")
Batch Processing Multiple Files
# Upload multiple files
import glob
# Get all PDFs in a directory
pdf_files = glob.glob("./invoices/*.pdf")
results = []
# Process each file
for path in pdf_files:
# 1. Upload file
file = client.files.upload(path, purpose="batch")
# 2. Process document
prediction = client.document.generate(
urls=[file.url],
domain="document.invoice",
batch=True # Process asynchronously
)
results.append((file.id, prediction.id))
# Later: check results
for file_id, prediction_id in results:
prediction = client.predictions.get(prediction_id)
if prediction.status == "completed":
print(f"File {file_id}: ${prediction.response.total_amount}")
Temporary File Management
Clean up files after use:
# Upload a temporary file
temp_file = client.files.upload("temp_image.jpg")
try:
# Use the file
result = client.image.generate(
urls=[temp_file.url],
domain="image.classification"
)
# Process the result
print(f"Classification: {result.response}")
finally:
# Clean up when done
client.files.delete(temp_file.id)
Optimization Features
File Caching
VLM Run automatically detects duplicate files using content hashing:
# Upload same file twice
file1 = client.files.upload("report.pdf")
file2 = client.files.upload("report.pdf")
# Both operations return the same file ID
print(f"Same file ID: {file1.id == file2.id}") # True
Pre-upload Caching Check
Check if a file exists before uploading:
# Check if file exists in cache
cached_file = client.files.get_cached_file("large-dataset.zip")
if cached_file:
file_id = cached_file.id
print(f"Using existing file: {file_id}")
else:
file = client.files.upload("large-dataset.zip")
file_id = file.id
print(f"Uploaded new file: {file_id}")
Response Structure
The FileResponse
object has the following structure:
class FileResponse(BaseModel):
id: Optional[str] # Unique file identifier
filename: Optional[str] # Original filename
bytes: int # File size in bytes
purpose: Literal[ # File purpose/category
"fine-tune",
"assistants",
"assistants_output",
"batch",
"batch_output",
"vision",
"datasets",
]
created_at: datetime # Creation timestamp
object: str = "file" # Object type
Example usage:
file = client.files.get("file_abc123")
print(f"ID: {file.id}")
print(f"Filename: {file.filename}")
print(f"Size: {file.bytes} bytes")
print(f"Purpose: {file.purpose}")
print(f"Created: {file.created_at}")
print(f"Object type: {file.object}") # Always "file"
Error Handling
from vlmrun.client.exceptions import ApiError, NotFoundError
try:
file = client.files.upload("document.pdf")
except FileNotFoundError:
print("Local file not found")
except ApiError as e:
if e.status_code == 413:
print("File too large")
elif e.status_code == 415:
print("Unsupported file type")
else:
print(f"API error: {e.message}")
Best Practices
Use Descriptive Filenames
# ✅ Good: Descriptive name
file = client.files.upload("invoice-march2023-acme-corp.jpg")
# ❌ Bad: Generic name
file = client.files.upload("file.jpg")
Check Cache for Efficiency
# Efficiently upload multiple files
for path in files_to_upload:
cached = client.files.get_cached_file(path)
if cached:
file_id = cached.id # Use existing file
else:
file = client.files.upload(path)
file_id = file.id
Clean Up Unused Files
# List files older than 30 days
import datetime
from datetime import timedelta
cutoff_date = datetime.datetime.now() - timedelta(days=30)
old_files = []
for file in client.files.list():
if file.created_at < cutoff_date and file.purpose == "assistants":
old_files.append(file.id)
# Delete old files
for file_id in old_files:
client.files.delete(file_id)
print(f"Deleted old file: {file_id}")
Set Appropriate Timeouts for Large Files
# Extend timeout for large files
large_file = client.files.upload(
file="large-video.mp4",
timeout=600 # 10 minutes
)