Skip to main content

Installation

npm install parsefy zod fastify @fastify/multipart
  • parsefy: Parsefy SDK for document extraction
  • zod: TypeScript-first schema validation library
  • fastify: Fast and low overhead web framework for Node.js
  • @fastify/multipart: Plugin for handling file uploads

Environment setup

export PARSEFY_API_KEY=pk_your_api_key

Basic setup

import Fastify from 'fastify';
import multipart from '@fastify/multipart';
import { Parsefy } from 'parsefy';
import * as z from 'zod';

const fastify = Fastify({ logger: true });
await fastify.register(multipart);

const client = new Parsefy();

const invoiceSchema = z.object({
  // REQUIRED - triggers fallback if below confidence threshold
  invoice_number: z.string().describe('The invoice number'),
  total: z.number().describe('Total amount including tax'),
  
  // OPTIONAL - won't trigger fallback if missing
  date: z.string().optional().describe('Invoice date'),
  vendor: z.string().optional().describe('Vendor name'),
});

fastify.post('/extract', async (request, reply) => {
  const data = await request.file();
  
  if (!data) {
    return reply.status(400).send({ error: 'No file uploaded' });
  }

  const buffer = await data.toBuffer();

  const { object, error, metadata, verification } = await client.extract({
    file: buffer,
    schema: invoiceSchema,
    confidenceThreshold: 0.85,
    enableVerification: true,
  });

  if (error) {
    return reply.status(422).send({ error: error.message });
  }

  return {
    data: object,
    confidence: object?._meta.confidence_score,
    fieldConfidence: object?._meta.field_confidence,
    credits: metadata.credits,
    verification: verification,
  };
});

await fastify.listen({ port: 3000 });

With schema validation

import Fastify from 'fastify';
import multipart from '@fastify/multipart';
import { Parsefy, APIError } from 'parsefy';
import * as z from 'zod';

const fastify = Fastify({ logger: true });

await fastify.register(multipart, {
  limits: {
    fileSize: 10 * 1024 * 1024, // 10MB
  },
});

const client = new Parsefy();

// Different schemas for different document types
const schemas = {
  invoice: z.object({
    // REQUIRED
    invoice_number: z.string().describe('The invoice number'),
    total: z.number().describe('Total amount'),
    // OPTIONAL
    date: z.string().optional().describe('Invoice date'),
    vendor: z.string().optional().describe('Vendor name'),
  }),
  receipt: z.object({
    // REQUIRED
    merchant: z.string().describe('Merchant name'),
    total: z.number().describe('Total paid'),
    // OPTIONAL
    date: z.string().optional().describe('Transaction date'),
  }),
  bill: z.object({
    // REQUIRED
    provider: z.string().describe('Service provider'),
    amount_due: z.number().describe('Amount due'),
    // OPTIONAL
    due_date: z.string().optional().describe('Payment due date'),
    account_number: z.string().optional().describe('Account number'),
  }),
};

fastify.post<{
  Querystring: { type: keyof typeof schemas };
}>('/extract', async (request, reply) => {
  const { type = 'invoice' } = request.query;
  const schema = schemas[type];

  if (!schema) {
    return reply.status(400).send({ 
      error: `Invalid type. Allowed: ${Object.keys(schemas).join(', ')}` 
    });
  }

  const data = await request.file();
  if (!data) {
    return reply.status(400).send({ error: 'No file uploaded' });
  }

  const buffer = await data.toBuffer();

  try {
    const { object, error, metadata, verification } = await client.extract({
      file: buffer,
      schema,
      confidenceThreshold: 0.85,
      enableVerification: true,
    });

    if (error) {
      return reply.status(422).send({ error: error.message });
    }

    return {
      type,
      data: object,
      meta: {
        confidence: object?._meta.confidence_score,
        fieldConfidence: object?._meta.field_confidence,
        processingTimeMs: metadata.processing_time_ms,
        credits: metadata.credits,
        fallbackTriggered: metadata.fallback_triggered,
      },
      verification: verification,
    };
  } catch (err) {
    if (err instanceof APIError) {
      return reply.status(err.statusCode).send({ error: err.message });
    }
    throw err;
  }
});

await fastify.listen({ port: 3000 });
console.log('Server running on http://localhost:3000');

Testing

# Extract as invoice (default)
curl -X POST http://localhost:3000/extract \
  -F "file=@document.pdf"

# Extract as receipt
curl -X POST "http://localhost:3000/extract?type=receipt" \
  -F "file=@receipt.pdf"