Skip to content

Basic Usage Examples

Practical examples for common Doctra use cases.

Example 1: Parse a Simple PDF

from doctra import StructuredPDFParser

# Initialize parser
parser = StructuredPDFParser()

# Parse document
parser.parse("document.pdf")

# Output saved to: outputs/document/full_parse/

Example 2: Parse with Custom Settings

from doctra import StructuredPDFParser
from doctra.engines.ocr import PytesseractOCREngine

# Initialize OCR engine
tesseract_ocr = PytesseractOCREngine(lang="eng", psm=4, oem=3)

parser = StructuredPDFParser(
    dpi=250,  # Higher quality
    min_score=0.7,  # More confident detections
    ocr_engine=tesseract_ocr
)

parser.parse("document.pdf", output_base_dir="my_results")

Example 3: Enhanced Parsing for Scanned Documents

from doctra import EnhancedPDFParser

parser = EnhancedPDFParser(
    use_image_restoration=True,
    restoration_task="appearance",
    restoration_device="cuda"  # Use GPU
)

parser.parse("scanned_document.pdf")

Example 4: Extract Structured Data with VLM

from doctra import StructuredPDFParser
from doctra.engines.vlm.service import VLMStructuredExtractor

# Initialize VLM engine
vlm_engine = VLMStructuredExtractor(
    vlm_provider="openai",
    api_key="your-api-key-here"
)

parser = StructuredPDFParser(vlm=vlm_engine)
parser.parse("data_report.pdf")

# Output includes:
# - tables.xlsx with extracted data
# - tables.html with formatted tables
# - vlm_items.json with structured data

Example 5: Extract Only Charts

from doctra import ChartTablePDFParser

parser = ChartTablePDFParser(
    extract_charts=True,
    extract_tables=False
)

parser.parse("presentation.pdf")

Example 5b: PaddleOCRVL End-to-End Parsing

from doctra import PaddleOCRVLPDFParser

# Initialize parser with all features
parser = PaddleOCRVLPDFParser(
    use_image_restoration=True,      # Enable DocRes restoration
    restoration_task="appearance",    # Use appearance enhancement
    use_chart_recognition=True,       # Enable chart recognition
    merge_split_tables=True,          # Enable split table merging
    device="gpu"                      # Use GPU for processing
)

# Parse document - automatically handles all content types
parser.parse("financial_report.pdf")

# Output in: outputs/financial_report/paddleocr_vl_parse/
# - result.md: All content in Markdown
# - result.html: Formatted HTML output
# - tables.xlsx: All tables and charts in Excel format
# - tables.html: Structured tables and charts

Example 6: Visualize Layout Detection

from doctra import StructuredPDFParser

parser = StructuredPDFParser()

# Display layout detection
parser.display_pages_with_boxes(
    pdf_path="document.pdf",
    num_pages=3,
    save_path="layout_visualization.png"
)

Example 7: Standalone Image Restoration

from doctra import DocResEngine

# Initialize restoration engine
engine = DocResEngine(device="cuda")

# Restore a single image
restored_img, metadata = engine.restore_image(
    image="blurry_document.jpg",
    task="deblurring"
)

# Save result
restored_img.save("restored.jpg")
print(f"Processed in {metadata['processing_time']:.2f}s")

Example 8: Batch Processing

import os
from doctra import StructuredPDFParser

parser = StructuredPDFParser()

# Process all PDFs in directory
pdf_directory = "documents"
for filename in os.listdir(pdf_directory):
    if filename.endswith(".pdf"):
        pdf_path = os.path.join(pdf_directory, filename)
        print(f"Processing {filename}...")
        parser.parse(pdf_path)
        print(f"Completed {filename}")

Example 9: Error Handling

from doctra import StructuredPDFParser

parser = StructuredPDFParser()

try:
    parser.parse("document.pdf")
    print("Processing successful!")
except FileNotFoundError:
    print("Error: PDF file not found")
except Exception as e:
    print(f"Error during processing: {e}")

Example 10: Using the Web UI

from doctra import launch_ui

# Launch web interface
launch_ui()

# Opens browser at http://127.0.0.1:7860

Next Steps