Documentation Index
Fetch the complete documentation index at: https://docs.parzo.dev/llms.txt
Use this file to discover all available pages before exploring further.
Installation
No SDK required — Parzo works with standard HTTP libraries.pip install requests
Basic example
import requests
import time
API_KEY = "inv_your_key_here"
BASE_URL = "https://api.parzo.dev"
def extract_invoice(pdf_path: str) -> dict:
# 1. Submit the invoice
with open(pdf_path, "rb") as f:
response = requests.post(
f"{BASE_URL}/v1/extract/invoice",
headers={"X-API-Key": API_KEY},
files={"file": f}
)
response.raise_for_status()
job_id = response.json()["job_id"]
# 2. Poll for the result
while True:
result = requests.get(
f"{BASE_URL}/v1/jobs/{job_id}",
headers={"X-API-Key": API_KEY}
).json()
if result["status"] == "completed":
return result["result"]
if result["status"] == "failed":
raise Exception(f"Extraction failed: {result['error']}")
time.sleep(5)
# Usage
invoice = extract_invoice("invoice.pdf")
print(f"Vendor: {invoice['vendor']['name']}")
print(f"Total: {invoice['financials']['total']} {invoice['financials']['currency']}")
print(f"Invoice number: {invoice['invoice']['number']}")
With environment variables
import os
import requests
import time
API_KEY = os.environ["PARZO_API_KEY"]
BASE_URL = "https://api.parzo.dev"
Process multiple invoices
import requests
import time
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor
API_KEY = "inv_your_key_here"
BASE_URL = "https://api.parzo.dev"
def submit_invoice(pdf_path: str) -> str:
with open(pdf_path, "rb") as f:
response = requests.post(
f"{BASE_URL}/v1/extract/invoice",
headers={"X-API-Key": API_KEY},
files={"file": f}
)
response.raise_for_status()
return response.json()["job_id"]
def get_result(job_id: str) -> dict:
for _ in range(24): # max 2 minutes
result = requests.get(
f"{BASE_URL}/v1/jobs/{job_id}",
headers={"X-API-Key": API_KEY}
).json()
if result["status"] == "completed":
return result["result"]
if result["status"] == "failed":
raise Exception(result["error"])
time.sleep(5)
raise TimeoutError(f"Job {job_id} timed out")
def process_folder(folder: str) -> list[dict]:
pdfs = list(Path(folder).glob("*.pdf"))
# Submit all jobs in parallel
with ThreadPoolExecutor(max_workers=5) as executor:
job_ids = list(executor.map(submit_invoice, pdfs))
# Collect results
results = []
for job_id in job_ids:
results.append(get_result(job_id))
return results
# Usage
invoices = process_folder("./invoices")
for invoice in invoices:
print(invoice["vendor"]["name"], invoice["financials"]["total"])
Save results to CSV
import csv
import requests
import time
def extract_to_csv(pdf_paths: list[str], output_file: str):
results = []
for path in pdf_paths:
invoice = extract_invoice(path)
results.append({
"vendor": invoice["vendor"]["name"],
"invoice_number": invoice["invoice"]["number"],
"date": invoice["invoice"]["date"],
"total": invoice["financials"]["total"],
"currency": invoice["financials"]["currency"],
"tax_amount": invoice["financials"]["tax_amount"],
})
with open(output_file, "w", newline="") as f:
writer = csv.DictWriter(f, fieldnames=results[0].keys())
writer.writeheader()
writer.writerows(results)
print(f"Saved {len(results)} invoices to {output_file}")
# Usage
extract_to_csv(["inv1.pdf", "inv2.pdf", "inv3.pdf"], "invoices.csv")
Error handling
import requests
def safe_extract(pdf_path: str) -> dict | None:
try:
with open(pdf_path, "rb") as f:
response = requests.post(
f"{BASE_URL}/v1/extract/invoice",
headers={"X-API-Key": API_KEY},
files={"file": f}
)
if response.status_code == 401:
raise Exception("Invalid API key")
if response.status_code == 413:
raise Exception("File too large (max 50MB)")
if response.status_code == 422:
raise Exception("Not a valid PDF")
if response.status_code == 429:
raise Exception("Quota exceeded — upgrade your plan")
response.raise_for_status()
job_id = response.json()["job_id"]
return get_result(job_id)
except Exception as e:
print(f"Error processing {pdf_path}: {e}")
return None
Validation flags
Check for extraction warnings in the result:result = extract_invoice("invoice.pdf")
if result["validation"]["confidence"] < 0.8:
print("Low confidence — consider manual review")
flags = result.get("validation_flags", [])
for flag in flags:
print(f"[{flag['severity'].upper()}] {flag['code']}: {flag['message']}")