document.pdf
document.pdf
type Chart {
type: String, // Type of plot, e.g. 'line', 'bar', 'scatter'
title: String, // Title of the plot
description: String, // Description of the plot
}
type Table {
description: String, // Description of the table
title: String, // Title of the table
caption: String, // Caption of the table
markdown: String, // Markdown representation of the table
}
type Markdown {
caption: String, // Caption of the markdown content
title: String, // Title of the markdown content
markdown: String, // Additional markdown content
}
class Paragraph(BaseModel):
content: str // Text content of the paragraph
bbox: BoxCoords | None // Bounding box coordinates of the paragraph
type:
class DocumentPage(BaseModel):
description: str | None // Description of the page
title: str | None // Title of the page
page_number: int | None // Page number
lines: list[Chart], // Plots on the page
tables: [Table], // Tables on the page
others: [Markdown], // Other markdown content on the page
}
type DocumentPDF {
title: String, // Title of the document
description: String, // Description of the document
pages: [DocumentPage], // Pages in the document
}
{
"title": "Sample PDF Document", # Title of the document
"description": "...", # Description of the document
"pages": [ # List of pages in the document
{
"description": "Page 1", # Description of the first page
"title": "Introduction", # Title of the first page
"page_number": 1, # Page number extracted from the first page
"lines": [ # Lines of text extracted from the first page
{
"content": "Sales over time",
"bbox": {
"xywh": [
0., 0., 1., 1.0 # Normalized bounding box coordinates of the line {x, y, width, height}
]
},
"type": "line"
}
... # Other lines of text
],
"paragraphs": [
{
"content": "This is a paragraph of text.",
"bbox": {
"xywh": [
0., 0., 1., 1. # Normalized bounding box coordinates of the line {x, y, width, height}
]
},
"type": "paragraph"
}
... # Other paragraphs of text
]
"tables": [ # Tables extracted from the first page
{
"data": [
]
}
...
],
"others": [
{
"caption": "Note",
"title": "Important Note",
"markdown": "This is an important note about the document."
}
...
]
},
... # Second page, third page, etc.
]
}