Skip to main content

Installation

pip install parsefy flask
  • parsefy: Parsefy SDK for document extraction
  • flask: Lightweight WSGI web framework for Python

Environment setup

export PARSEFY_API_KEY=pk_your_api_key

Basic setup

from flask import Flask, request, jsonify
from parsefy import Parsefy, APIError
from pydantic import BaseModel, Field

app = Flask(__name__)
client = Parsefy()

class Invoice(BaseModel):
    invoice_number: str = Field(description="The invoice number")
    date: str = Field(description="Invoice date")
    total: float = Field(description="Total amount")
    vendor: str = Field(description="Vendor name")

@app.route("/extract", methods=["POST"])
def extract_invoice():
    if "file" not in request.files:
        return jsonify({"error": "No file uploaded"}), 400
    
    file = request.files["file"]
    contents = file.read()
    
    try:
        result = client.extract(file=contents, schema=Invoice)
        
        if result.error:
            return jsonify({"error": result.error.message}), 422
        
        return jsonify({
            "data": result.data.model_dump(),
            "credits": result.metadata.credits,
        })
    except APIError as e:
        return jsonify({"error": e.message}), e.status_code

if __name__ == "__main__":
    app.run(debug=True)

With file validation

from flask import Flask, request, jsonify
from parsefy import Parsefy
from pydantic import BaseModel, Field
from werkzeug.utils import secure_filename

app = Flask(__name__)
client = Parsefy()

ALLOWED_EXTENSIONS = {'pdf', 'docx'}
MAX_FILE_SIZE = 10 * 1024 * 1024  # 10MB

def allowed_file(filename):
    return '.' in filename and \
           filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS

class Invoice(BaseModel):
    invoice_number: str = Field(description="The invoice number")
    date: str = Field(description="Invoice date")
    total: float = Field(description="Total amount")
    vendor: str = Field(description="Vendor name")
    line_items: list[dict] = Field(default=[], description="Line items")

@app.route("/extract", methods=["POST"])
def extract_invoice():
    if "file" not in request.files:
        return jsonify({"error": "No file uploaded"}), 400
    
    file = request.files["file"]
    
    if file.filename == "":
        return jsonify({"error": "No file selected"}), 400
    
    if not allowed_file(file.filename):
        return jsonify({"error": "Only PDF and DOCX files are allowed"}), 400
    
    contents = file.read()
    
    if len(contents) > MAX_FILE_SIZE:
        return jsonify({"error": "File too large. Max 10MB"}), 400
    
    result = client.extract(file=contents, schema=Invoice)
    
    if result.error:
        return jsonify({
            "error": result.error.code,
            "message": result.error.message,
        }), 422
    
    return jsonify({
        "data": result.data.model_dump(),
        "meta": {
            "processing_time_ms": result.metadata.processing_time_ms,
            "credits": result.metadata.credits,
            "fallback_triggered": result.metadata.fallback_triggered,
        },
    })

if __name__ == "__main__":
    app.run(debug=True)

Multiple schemas

from flask import Flask, request, jsonify
from parsefy import Parsefy
from pydantic import BaseModel, Field

app = Flask(__name__)
client = Parsefy()

class Invoice(BaseModel):
    invoice_number: str = Field(description="The invoice number")
    total: float = Field(description="Total amount")

class Receipt(BaseModel):
    merchant: str = Field(description="Merchant name")
    total: float = Field(description="Total paid")

class Contract(BaseModel):
    parties: list[str] = Field(description="Parties involved")
    effective_date: str = Field(description="Effective date")

SCHEMAS = {
    "invoice": Invoice,
    "receipt": Receipt,
    "contract": Contract,
}

@app.route("/extract/<doc_type>", methods=["POST"])
def extract_document(doc_type):
    if doc_type not in SCHEMAS:
        return jsonify({
            "error": f"Invalid type. Allowed: {', '.join(SCHEMAS.keys())}"
        }), 400
    
    if "file" not in request.files:
        return jsonify({"error": "No file uploaded"}), 400
    
    file = request.files["file"]
    contents = file.read()
    schema = SCHEMAS[doc_type]
    
    result = client.extract(file=contents, schema=schema)
    
    if result.error:
        return jsonify({"error": result.error.message}), 422
    
    return jsonify({
        "type": doc_type,
        "data": result.data.model_dump(),
    })

if __name__ == "__main__":
    app.run(debug=True)

Testing

# Extract invoice
curl -X POST http://localhost:5000/extract \
  -F "[email protected]"

# Extract with type
curl -X POST http://localhost:5000/extract/receipt \
  -F "[email protected]"