Skip to main content

Documentation Index

Fetch the complete documentation index at: https://docs.parzo.dev/llms.txt

Use this file to discover all available pages before exploring further.

Installation

No SDK required — Parzo works with standard HTTP libraries.
pip install requests

Basic example

import requests
import time

API_KEY = "inv_your_key_here"
BASE_URL = "https://api.parzo.dev"

def extract_invoice(pdf_path: str) -> dict:
    # 1. Submit the invoice
    with open(pdf_path, "rb") as f:
        response = requests.post(
            f"{BASE_URL}/v1/extract/invoice",
            headers={"X-API-Key": API_KEY},
            files={"file": f}
        )
    response.raise_for_status()
    job_id = response.json()["job_id"]

    # 2. Poll for the result
    while True:
        result = requests.get(
            f"{BASE_URL}/v1/jobs/{job_id}",
            headers={"X-API-Key": API_KEY}
        ).json()

        if result["status"] == "completed":
            return result["result"]
        if result["status"] == "failed":
            raise Exception(f"Extraction failed: {result['error']}")

        time.sleep(5)

# Usage
invoice = extract_invoice("invoice.pdf")
print(f"Vendor: {invoice['vendor']['name']}")
print(f"Total: {invoice['financials']['total']} {invoice['financials']['currency']}")
print(f"Invoice number: {invoice['invoice']['number']}")

With environment variables

import os
import requests
import time

API_KEY = os.environ["PARZO_API_KEY"]
BASE_URL = "https://api.parzo.dev"

Process multiple invoices

import requests
import time
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor

API_KEY = "inv_your_key_here"
BASE_URL = "https://api.parzo.dev"

def submit_invoice(pdf_path: str) -> str:
    with open(pdf_path, "rb") as f:
        response = requests.post(
            f"{BASE_URL}/v1/extract/invoice",
            headers={"X-API-Key": API_KEY},
            files={"file": f}
        )
    response.raise_for_status()
    return response.json()["job_id"]

def get_result(job_id: str) -> dict:
    for _ in range(24):  # max 2 minutes
        result = requests.get(
            f"{BASE_URL}/v1/jobs/{job_id}",
            headers={"X-API-Key": API_KEY}
        ).json()

        if result["status"] == "completed":
            return result["result"]
        if result["status"] == "failed":
            raise Exception(result["error"])

        time.sleep(5)
    raise TimeoutError(f"Job {job_id} timed out")

def process_folder(folder: str) -> list[dict]:
    pdfs = list(Path(folder).glob("*.pdf"))

    # Submit all jobs in parallel
    with ThreadPoolExecutor(max_workers=5) as executor:
        job_ids = list(executor.map(submit_invoice, pdfs))

    # Collect results
    results = []
    for job_id in job_ids:
        results.append(get_result(job_id))

    return results

# Usage
invoices = process_folder("./invoices")
for invoice in invoices:
    print(invoice["vendor"]["name"], invoice["financials"]["total"])

Save results to CSV

import csv
import requests
import time

def extract_to_csv(pdf_paths: list[str], output_file: str):
    results = []
    for path in pdf_paths:
        invoice = extract_invoice(path)
        results.append({
            "vendor": invoice["vendor"]["name"],
            "invoice_number": invoice["invoice"]["number"],
            "date": invoice["invoice"]["date"],
            "total": invoice["financials"]["total"],
            "currency": invoice["financials"]["currency"],
            "tax_amount": invoice["financials"]["tax_amount"],
        })

    with open(output_file, "w", newline="") as f:
        writer = csv.DictWriter(f, fieldnames=results[0].keys())
        writer.writeheader()
        writer.writerows(results)

    print(f"Saved {len(results)} invoices to {output_file}")

# Usage
extract_to_csv(["inv1.pdf", "inv2.pdf", "inv3.pdf"], "invoices.csv")

Error handling

import requests

def safe_extract(pdf_path: str) -> dict | None:
    try:
        with open(pdf_path, "rb") as f:
            response = requests.post(
                f"{BASE_URL}/v1/extract/invoice",
                headers={"X-API-Key": API_KEY},
                files={"file": f}
            )

        if response.status_code == 401:
            raise Exception("Invalid API key")
        if response.status_code == 413:
            raise Exception("File too large (max 50MB)")
        if response.status_code == 422:
            raise Exception("Not a valid PDF")
        if response.status_code == 429:
            raise Exception("Quota exceeded — upgrade your plan")

        response.raise_for_status()
        job_id = response.json()["job_id"]
        return get_result(job_id)

    except Exception as e:
        print(f"Error processing {pdf_path}: {e}")
        return None

Validation flags

Check for extraction warnings in the result:
result = extract_invoice("invoice.pdf")

if result["validation"]["confidence"] < 0.8:
    print("Low confidence — consider manual review")

flags = result.get("validation_flags", [])
for flag in flags:
    print(f"[{flag['severity'].upper()}] {flag['code']}: {flag['message']}")