Skip to main content

Installation

npm install parsefy zod fastify @fastify/multipart
  • parsefy: Parsefy SDK for document extraction
  • zod: TypeScript-first schema validation library
  • fastify: Fast and low overhead web framework for Node.js
  • @fastify/multipart: Plugin for handling file uploads

Environment setup

export PARSEFY_API_KEY=pk_your_api_key

Basic setup

import Fastify from 'fastify';
import multipart from '@fastify/multipart';
import { Parsefy } from 'parsefy';
import * as z from 'zod';

const fastify = Fastify({ logger: true });
await fastify.register(multipart);

const client = new Parsefy();

const invoiceSchema = z.object({
  invoice_number: z.string().describe('The invoice number'),
  date: z.string().describe('Invoice date'),
  total: z.number().describe('Total amount'),
  vendor: z.string().describe('Vendor name'),
});

fastify.post('/extract', async (request, reply) => {
  const data = await request.file();
  
  if (!data) {
    return reply.status(400).send({ error: 'No file uploaded' });
  }

  const buffer = await data.toBuffer();

  const { object, error, metadata } = await client.extract({
    file: buffer,
    schema: invoiceSchema,
  });

  if (error) {
    return reply.status(422).send({ error: error.message });
  }

  return {
    data: object,
    credits: metadata.credits,
  };
});

await fastify.listen({ port: 3000 });

With schema validation

import Fastify from 'fastify';
import multipart from '@fastify/multipart';
import { Parsefy, APIError } from 'parsefy';
import * as z from 'zod';

const fastify = Fastify({ logger: true });

await fastify.register(multipart, {
  limits: {
    fileSize: 10 * 1024 * 1024, // 10MB
  },
});

const client = new Parsefy();

// Different schemas for different document types
const schemas = {
  invoice: z.object({
    invoice_number: z.string().describe('The invoice number'),
    date: z.string().describe('Invoice date'),
    total: z.number().describe('Total amount'),
    vendor: z.string().describe('Vendor name'),
  }),
  receipt: z.object({
    merchant: z.string().describe('Merchant name'),
    date: z.string().describe('Transaction date'),
    total: z.number().describe('Total paid'),
  }),
  contract: z.object({
    parties: z.array(z.string()).describe('Parties involved'),
    effective_date: z.string().describe('Effective date'),
    terms: z.string().describe('Key terms summary'),
  }),
};

fastify.post<{
  Querystring: { type: keyof typeof schemas };
}>('/extract', async (request, reply) => {
  const { type = 'invoice' } = request.query;
  const schema = schemas[type];

  if (!schema) {
    return reply.status(400).send({ 
      error: `Invalid type. Allowed: ${Object.keys(schemas).join(', ')}` 
    });
  }

  const data = await request.file();
  if (!data) {
    return reply.status(400).send({ error: 'No file uploaded' });
  }

  const buffer = await data.toBuffer();

  try {
    const { object, error, metadata } = await client.extract({
      file: buffer,
      schema,
    });

    if (error) {
      return reply.status(422).send({ error: error.message });
    }

    return {
      type,
      data: object,
      meta: {
        processingTimeMs: metadata.processingTimeMs,
        credits: metadata.credits,
      },
    };
  } catch (err) {
    if (err instanceof APIError) {
      return reply.status(err.statusCode).send({ error: err.message });
    }
    throw err;
  }
});

await fastify.listen({ port: 3000 });
console.log('Server running on http://localhost:3000');

Testing

# Extract as invoice (default)
curl -X POST http://localhost:3000/extract \
  -F "[email protected]"

# Extract as receipt
curl -X POST "http://localhost:3000/extract?type=receipt" \
  -F "[email protected]"