Skip to main content

Installation

npm install parsefy zod express multer
npm install -D @types/express @types/multer typescript
  • parsefy: Parsefy SDK for document extraction
  • zod: TypeScript-first schema validation library
  • express: Fast, minimalist web framework for Node.js
  • multer: Middleware for handling multipart/form-data (file uploads)
  • @types/*: TypeScript type definitions (dev dependency)

Environment setup

export PARSEFY_API_KEY=pk_your_api_key

Basic setup

import express from 'express';
import multer from 'multer';
import { Parsefy } from 'parsefy';
import * as z from 'zod';

const app = express();
const upload = multer(); // Store files in memory
const client = new Parsefy();

// Define your schema
const invoiceSchema = z.object({
  invoice_number: z.string().describe('The invoice number'),
  date: z.string().describe('Invoice date'),
  total: z.number().describe('Total amount'),
  vendor: z.string().describe('Vendor name'),
});

// Extract endpoint
app.post('/extract', upload.single('file'), async (req, res) => {
  try {
    if (!req.file) {
      return res.status(400).json({ error: 'No file uploaded' });
    }

    const { object, error, metadata } = await client.extract({
      file: req.file.buffer,
      schema: invoiceSchema,
    });

    if (error) {
      return res.status(422).json({ error: error.message });
    }

    res.json({
      data: object,
      credits: metadata.credits,
    });
  } catch (err) {
    res.status(500).json({ error: 'Extraction failed' });
  }
});

app.listen(3000, () => {
  console.log('Server running on http://localhost:3000');
});

With validation middleware

import express from 'express';
import multer from 'multer';
import { Parsefy, APIError } from 'parsefy';
import * as z from 'zod';

const app = express();
const upload = multer({
  limits: { fileSize: 10 * 1024 * 1024 }, // 10MB limit
  fileFilter: (req, file, cb) => {
    const allowed = ['application/pdf', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'];
    if (allowed.includes(file.mimetype)) {
      cb(null, true);
    } else {
      cb(new Error('Only PDF and DOCX files are allowed'));
    }
  },
});

const client = new Parsefy();

const receiptSchema = z.object({
  merchant: z.string().describe('Store or merchant name'),
  date: z.string().describe('Transaction date'),
  total: z.number().describe('Total amount paid'),
  items: z.array(z.object({
    name: z.string(),
    price: z.number(),
  })).describe('Purchased items'),
});

app.post('/receipts/extract', upload.single('file'), async (req, res) => {
  try {
    const { object, error, metadata } = await client.extract({
      file: req.file!.buffer,
      schema: receiptSchema,
    });

    if (error) {
      return res.status(422).json({ 
        error: error.code,
        message: error.message,
      });
    }

    res.json({
      data: object,
      meta: {
        processingTime: metadata.processingTimeMs,
        credits: metadata.credits,
      },
    });
  } catch (err) {
    if (err instanceof APIError) {
      return res.status(err.statusCode).json({ error: err.message });
    }
    res.status(500).json({ error: 'Internal server error' });
  }
});

// Error handler for multer
app.use((err: any, req: express.Request, res: express.Response, next: express.NextFunction) => {
  if (err instanceof multer.MulterError) {
    return res.status(400).json({ error: err.message });
  }
  if (err) {
    return res.status(400).json({ error: err.message });
  }
  next();
});

app.listen(3000);

Testing the endpoint

curl -X POST http://localhost:3000/extract \
  -F "[email protected]"