Installation
Copy
npm install parsefy zod express multer
npm install -D @types/express @types/multer typescript
- parsefy: Parsefy SDK for document extraction
- zod: TypeScript-first schema validation library
- express: Fast, minimalist web framework for Node.js
- multer: Middleware for handling
multipart/form-data(file uploads) - @types/*: TypeScript type definitions (dev dependency)
Environment setup
Copy
export PARSEFY_API_KEY=pk_your_api_key
Basic setup
Copy
import express from 'express';
import multer from 'multer';
import { Parsefy } from 'parsefy';
import * as z from 'zod';
const app = express();
const upload = multer(); // Store files in memory
const client = new Parsefy();
// Define your schema
const invoiceSchema = z.object({
invoice_number: z.string().describe('The invoice number'),
date: z.string().describe('Invoice date'),
total: z.number().describe('Total amount'),
vendor: z.string().describe('Vendor name'),
});
// Extract endpoint
app.post('/extract', upload.single('file'), async (req, res) => {
try {
if (!req.file) {
return res.status(400).json({ error: 'No file uploaded' });
}
const { object, error, metadata } = await client.extract({
file: req.file.buffer,
schema: invoiceSchema,
});
if (error) {
return res.status(422).json({ error: error.message });
}
res.json({
data: object,
credits: metadata.credits,
});
} catch (err) {
res.status(500).json({ error: 'Extraction failed' });
}
});
app.listen(3000, () => {
console.log('Server running on http://localhost:3000');
});
With validation middleware
Copy
import express from 'express';
import multer from 'multer';
import { Parsefy, APIError } from 'parsefy';
import * as z from 'zod';
const app = express();
const upload = multer({
limits: { fileSize: 10 * 1024 * 1024 }, // 10MB limit
fileFilter: (req, file, cb) => {
const allowed = ['application/pdf', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'];
if (allowed.includes(file.mimetype)) {
cb(null, true);
} else {
cb(new Error('Only PDF and DOCX files are allowed'));
}
},
});
const client = new Parsefy();
const receiptSchema = z.object({
merchant: z.string().describe('Store or merchant name'),
date: z.string().describe('Transaction date'),
total: z.number().describe('Total amount paid'),
items: z.array(z.object({
name: z.string(),
price: z.number(),
})).describe('Purchased items'),
});
app.post('/receipts/extract', upload.single('file'), async (req, res) => {
try {
const { object, error, metadata } = await client.extract({
file: req.file!.buffer,
schema: receiptSchema,
});
if (error) {
return res.status(422).json({
error: error.code,
message: error.message,
});
}
res.json({
data: object,
meta: {
processingTime: metadata.processingTimeMs,
credits: metadata.credits,
},
});
} catch (err) {
if (err instanceof APIError) {
return res.status(err.statusCode).json({ error: err.message });
}
res.status(500).json({ error: 'Internal server error' });
}
});
// Error handler for multer
app.use((err: any, req: express.Request, res: express.Response, next: express.NextFunction) => {
if (err instanceof multer.MulterError) {
return res.status(400).json({ error: err.message });
}
if (err) {
return res.status(400).json({ error: err.message });
}
next();
});
app.listen(3000);
Testing the endpoint
Copy
curl -X POST http://localhost:3000/extract \
-F "[email protected]"
