Installation
Copy
pip install parsefy flask
- parsefy: Parsefy SDK for document extraction
- flask: Lightweight WSGI web framework for Python
Environment setup
Copy
export PARSEFY_API_KEY=pk_your_api_key
Basic setup
Copy
from flask import Flask, request, jsonify
from parsefy import Parsefy, APIError
from pydantic import BaseModel, Field
app = Flask(__name__)
client = Parsefy()
class Invoice(BaseModel):
invoice_number: str = Field(description="The invoice number")
date: str = Field(description="Invoice date")
total: float = Field(description="Total amount")
vendor: str = Field(description="Vendor name")
@app.route("/extract", methods=["POST"])
def extract_invoice():
if "file" not in request.files:
return jsonify({"error": "No file uploaded"}), 400
file = request.files["file"]
contents = file.read()
try:
result = client.extract(file=contents, schema=Invoice)
if result.error:
return jsonify({"error": result.error.message}), 422
return jsonify({
"data": result.data.model_dump(),
"credits": result.metadata.credits,
})
except APIError as e:
return jsonify({"error": e.message}), e.status_code
if __name__ == "__main__":
app.run(debug=True)
With file validation
Copy
from flask import Flask, request, jsonify
from parsefy import Parsefy
from pydantic import BaseModel, Field
from werkzeug.utils import secure_filename
app = Flask(__name__)
client = Parsefy()
ALLOWED_EXTENSIONS = {'pdf', 'docx'}
MAX_FILE_SIZE = 10 * 1024 * 1024 # 10MB
def allowed_file(filename):
return '.' in filename and \
filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
class Invoice(BaseModel):
invoice_number: str = Field(description="The invoice number")
date: str = Field(description="Invoice date")
total: float = Field(description="Total amount")
vendor: str = Field(description="Vendor name")
line_items: list[dict] = Field(default=[], description="Line items")
@app.route("/extract", methods=["POST"])
def extract_invoice():
if "file" not in request.files:
return jsonify({"error": "No file uploaded"}), 400
file = request.files["file"]
if file.filename == "":
return jsonify({"error": "No file selected"}), 400
if not allowed_file(file.filename):
return jsonify({"error": "Only PDF and DOCX files are allowed"}), 400
contents = file.read()
if len(contents) > MAX_FILE_SIZE:
return jsonify({"error": "File too large. Max 10MB"}), 400
result = client.extract(file=contents, schema=Invoice)
if result.error:
return jsonify({
"error": result.error.code,
"message": result.error.message,
}), 422
return jsonify({
"data": result.data.model_dump(),
"meta": {
"processing_time_ms": result.metadata.processing_time_ms,
"credits": result.metadata.credits,
"fallback_triggered": result.metadata.fallback_triggered,
},
})
if __name__ == "__main__":
app.run(debug=True)
Multiple schemas
Copy
from flask import Flask, request, jsonify
from parsefy import Parsefy
from pydantic import BaseModel, Field
app = Flask(__name__)
client = Parsefy()
class Invoice(BaseModel):
invoice_number: str = Field(description="The invoice number")
total: float = Field(description="Total amount")
class Receipt(BaseModel):
merchant: str = Field(description="Merchant name")
total: float = Field(description="Total paid")
class Contract(BaseModel):
parties: list[str] = Field(description="Parties involved")
effective_date: str = Field(description="Effective date")
SCHEMAS = {
"invoice": Invoice,
"receipt": Receipt,
"contract": Contract,
}
@app.route("/extract/<doc_type>", methods=["POST"])
def extract_document(doc_type):
if doc_type not in SCHEMAS:
return jsonify({
"error": f"Invalid type. Allowed: {', '.join(SCHEMAS.keys())}"
}), 400
if "file" not in request.files:
return jsonify({"error": "No file uploaded"}), 400
file = request.files["file"]
contents = file.read()
schema = SCHEMAS[doc_type]
result = client.extract(file=contents, schema=schema)
if result.error:
return jsonify({"error": result.error.message}), 422
return jsonify({
"type": doc_type,
"data": result.data.model_dump(),
})
if __name__ == "__main__":
app.run(debug=True)
Testing
Copy
# Extract invoice
curl -X POST http://localhost:5000/extract \
-F "[email protected]"
# Extract with type
curl -X POST http://localhost:5000/extract/receipt \
-F "[email protected]"
