Installation
Copy
npm install parsefy zod express multer
npm install -D @types/express @types/multer typescript
- parsefy: Parsefy SDK for document extraction
- zod: TypeScript-first schema validation library
- express: Fast, minimalist web framework for Node.js
- multer: Middleware for handling
multipart/form-data(file uploads) - @types/*: TypeScript type definitions (dev dependency)
Environment setup
Copy
export PARSEFY_API_KEY=pk_your_api_key
Basic setup
Copy
import express from 'express';
import multer from 'multer';
import { Parsefy } from 'parsefy';
import * as z from 'zod';
const app = express();
const upload = multer(); // Store files in memory
const client = new Parsefy();
// Define your schema with required vs optional fields
const invoiceSchema = z.object({
// REQUIRED - triggers fallback if below confidence threshold
invoice_number: z.string().describe('The invoice number'),
total: z.number().describe('Total amount including tax'),
// OPTIONAL - won't trigger fallback if missing
date: z.string().optional().describe('Invoice date'),
vendor: z.string().optional().describe('Vendor name'),
});
// Extract endpoint
app.post('/extract', upload.single('file'), async (req, res) => {
try {
if (!req.file) {
return res.status(400).json({ error: 'No file uploaded' });
}
const { object, error, metadata, verification } = await client.extract({
file: req.file.buffer,
schema: invoiceSchema,
confidenceThreshold: 0.85, // default
enableVerification: true, // Enable math verification
});
if (error) {
return res.status(422).json({ error: error.message });
}
res.json({
data: object,
confidence: object?._meta.confidence_score,
fieldConfidence: object?._meta.field_confidence,
credits: metadata.credits,
fallbackTriggered: metadata.fallbackTriggered,
verification: verification,
});
} catch (err) {
res.status(500).json({ error: 'Extraction failed' });
}
});
app.listen(3000, () => {
console.log('Server running on http://localhost:3000');
});
With validation middleware
Copy
import express from 'express';
import multer from 'multer';
import { Parsefy, APIError } from 'parsefy';
import * as z from 'zod';
const app = express();
const upload = multer({
limits: { fileSize: 10 * 1024 * 1024 }, // 10MB limit
fileFilter: (req, file, cb) => {
const allowed = ['application/pdf', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'];
if (allowed.includes(file.mimetype)) {
cb(null, true);
} else {
cb(new Error('Only PDF and DOCX files are allowed'));
}
},
});
const client = new Parsefy();
const receiptSchema = z.object({
// REQUIRED
merchant: z.string().describe('Store or merchant name'),
total: z.number().describe('Total amount paid'),
// OPTIONAL - may not be on all receipts
date: z.string().optional().describe('Transaction date'),
items: z.array(z.object({
name: z.string(),
price: z.number(),
})).optional().describe('Purchased items'),
});
app.post('/receipts/extract', upload.single('file'), async (req, res) => {
try {
const { object, error, metadata, verification } = await client.extract({
file: req.file!.buffer,
schema: receiptSchema,
confidenceThreshold: 0.80, // Lower threshold for receipts
enableVerification: true,
});
if (error) {
return res.status(422).json({
error: error.code,
message: error.message,
});
}
// Check for low confidence fields
const lowConfidence = object?._meta.field_confidence.filter((fc) => fc.score < 0.80) || [];
res.json({
data: object,
meta: {
confidence: object?._meta.confidence_score,
fieldConfidence: object?._meta.field_confidence,
lowConfidenceFields: lowConfidence,
processingTime: metadata.processing_time_ms,
credits: metadata.credits,
fallbackTriggered: metadata.fallback_triggered,
},
verification: verification,
});
} catch (err) {
if (err instanceof APIError) {
return res.status(err.statusCode).json({ error: err.message });
}
res.status(500).json({ error: 'Internal server error' });
}
});
// Error handler for multer
app.use((err: any, req: express.Request, res: express.Response, next: express.NextFunction) => {
if (err instanceof multer.MulterError) {
return res.status(400).json({ error: err.message });
}
if (err) {
return res.status(400).json({ error: err.message });
}
next();
});
app.listen(3000);
Testing the endpoint
Copy
curl -X POST http://localhost:3000/extract \
-F "file=@invoice.pdf"
