Handling PDF in Java With Apache PDFBox

Posted on 2023-07-27 Edited on 2025-06-30 In Java , Java Libraries

Apache PDFBox is a Java tool for working with PDF documents. In this post, I will introduce how to use Apache PDFBox to handle PDF files. The code examples in this post are based on pdfbox v2.0.29.

<dependency>
    <groupId>org.apache.pdfbox</groupId>
    <artifactId>pdfbox</artifactId>
    <version>2.0.29</version>
</dependency>

Extract Text

Extract all page text

String inputFilePath = "your/pdf/filepath";
// Load PDF document
PDDocument document = PDDocument.load(new File(inputFilePath));
// Create PDFTextStripper instance
PDFTextStripper pdfStripper = new PDFTextStripper();
// Extract text from PDF
String text = pdfStripper.getText(document);
// Print extracted text
System.out.println(text);
// Close the document
document.close();

Extract page by page

String inputFilePath = "your/pdf/filepath";
// Load the PDF document
PDDocument document = PDDocument.load(new File(inputFilePath));
// Create an instance of PDFTextStripper
PDFTextStripper stripper = new PDFTextStripper();
// Iterate through each page and extract the text
for (int pageNumber = 1; pageNumber <= document.getNumberOfPages(); pageNumber++) {
    stripper.setStartPage(pageNumber);
    stripper.setEndPage(pageNumber);

    String text = stripper.getText(document);
    System.out.println("Page " + pageNumber + ":");
    System.out.println(text);
}
// Close the PDF document
document.close();

Split and Merge

Split

private static void splitPdf(String inputFilePath, String outputDir) throws IOException {
    File file = new File(inputFilePath);
    // Load the PDF document
    PDDocument document = PDDocument.load(file);
    // Create a PDF splitter object
    Splitter splitter = new Splitter();
    // Split the document
    List<PDDocument> splitDocuments = splitter.split(document);
    // Get an iterator for the split documents
    Iterator<PDDocument> iterator = splitDocuments.iterator();
    // Iterate through the split documents and save them
    int i = 1;
    while (iterator.hasNext()) {
        PDDocument splitDocument = iterator.next();
        String outputFilePath = new StringBuilder().append(outputDir)
            .append(File.separator)
            .append(file.getName().replaceAll("[.](pdf|PDF)", ""))
            .append("_split_")
            .append(i)
            .append(".pdf")
            .toString();
        splitDocument.save(outputFilePath);
        splitDocument.close();
        i++;
    }
    // Close the source document
    document.close();
    System.out.println("PDF split successfully!");
}

Merge PDF files

private static void mergePdfFiles(List<String> inputFilePaths, String outputFilePath) throws IOException {
    PDFMergerUtility merger = new PDFMergerUtility();
    // Add as many files as you need
    for (String inputFilePath : inputFilePaths) {
        merger.addSource(new File(inputFilePath));
    }
    merger.setDestinationFileName(outputFilePath);
    merger.mergeDocuments();
    System.out.println("PDF files merged successfully!");
}

Insert and remove pages

Insert pages

public static void insertPage(String sourceFile, String targetFile, int pageIndex) throws IOException {
    // Load the existing PDF document
    PDDocument sourceDoc = PDDocument.load(new File(sourceFile));
    Integer sourcePageCount = sourceDoc.getNumberOfPages();
    // Validate the requested page index
    if (pageIndex < 0 || pageIndex > sourcePageCount) {
        throw new IllegalArgumentException("Invalid page index");
    }
    // Create a new blank page
    PDPage newPage = new PDPage();
    // Insert the new page at the requested index
    if (sourcePageCount.equals(pageIndex)) {
        sourceDoc.getPages().add(newPage);
    } else {
        sourceDoc.getPages().insertBefore(newPage, sourceDoc.getPages().get(pageIndex));
    }
    // Save the modified PDF document to a target file
    sourceDoc.save(targetFile);
    // Close the source and target documents
    sourceDoc.close();
}

Remove pages

private static void removePage(String inputFilePath, String outputFilePath, int pageIndex) throws IOException {

    PDDocument sourceDoc = PDDocument.load(new File(inputFilePath));
    Integer sourcePageCount = sourceDoc.getNumberOfPages();
    // Validate the requested page index
    if (pageIndex < 0 || pageIndex >= sourcePageCount) {
        throw new IllegalArgumentException("Invalid page index");
    }
    sourceDoc.getPages().remove(pageIndex);
    sourceDoc.save(outputFilePath);
    sourceDoc.close();
}

private static void removePage2(String inputFilePath, String outputFilePath, int pageIndex) throws IOException {
    PDDocument sourceDoc = PDDocument.load(new File(inputFilePath));
    Integer sourcePageCount = sourceDoc.getNumberOfPages();
    // Validate the requested page index
    if (pageIndex < 0 || pageIndex >= sourcePageCount) {
        throw new IllegalArgumentException("Invalid page index");
    }
    Splitter splitter = new Splitter();
    List<PDDocument> pages = splitter.split(sourceDoc);
    pages.remove(pageIndex);
    PDDocument outputDocument = new PDDocument();
    for (PDDocument page : pages) {
        outputDocument.addPage(page.getPage(0));
    }
    outputDocument.save(outputFilePath);
    sourceDoc.close();
    outputDocument.close();
}

Encryption

Encrypt

public static void encryptPdf(String inputFilePath, String outputFilePath, String password) throws IOException {
    PDDocument doc = PDDocument.load(new File(inputFilePath));

    AccessPermission ap = new AccessPermission();
    // disable printing, 
    ap.setCanPrint(false);
    //disable copying
    ap.setCanExtractContent(false);
    //Disable other things if needed...

    // Owner password (to open the file with all permissions)
    // User password (to open the file but with restricted permissions)
    StandardProtectionPolicy spp = new StandardProtectionPolicy(password, password, ap);
    // Define the length of the encryption key.
    // Possible values are 40, 128 or 256.
    int keyLength = 256;
    spp.setEncryptionKeyLength(keyLength);

    //Apply protection
    doc.protect(spp);

    doc.save(outputFilePath);
    doc.close();
}

Update password

public static void updatePdfPassword(String inputFilePath, String outputFilePath,
                                      String oldPassword, String newPassword) throws IOException {
    PDDocument doc = PDDocument.load(new File(inputFilePath), oldPassword);

    AccessPermission ap = new AccessPermission();
    // disable printing,
    ap.setCanPrint(false);
    //disable copying
    ap.setCanExtractContent(false);
    //Disable other things if needed...

    // Owner password (to open the file with all permissions)
    // User password (to open the file but with restricted permissions)
    StandardProtectionPolicy spp = new StandardProtectionPolicy(newPassword, newPassword, ap);
    // Define the length of the encryption key.
    // Possible values are 40, 128 or 256.
    int keyLength = 256;
    spp.setEncryptionKeyLength(keyLength);

    //Apply protection
    doc.protect(spp);

    doc.save(outputFilePath);
    doc.close();
}

Remove password

public static void removePdfPassword(String inputFilePath, String outputFilePath,
                                      String password) throws IOException {
    PDDocument doc = PDDocument.load(new File(inputFilePath), password);
    // Set the document access permissions
    doc.setAllSecurityToBeRemoved(true);
    // Save the unprotected PDF document
    doc.save(outputFilePath);
    // Close the document
    doc.close();
}

Convert to Image

PDF to Image

public static void pdfToImage(String pdfFilePath, String imageFileDir) throws IOException {
    File file = new File(pdfFilePath);
    PDDocument document = PDDocument.load(file);
    // Create PDFRenderer object to render each page as an image
    PDFRenderer pdfRenderer = new PDFRenderer(document);
    // Iterate over all the pages and convert each page to an image
    for (int pageIndex = 0; pageIndex < document.getNumberOfPages(); pageIndex++) {
        // Render the page as an image
        // 100 DPI: general-quality
        // 300 DPI: high-quality
        // 600 DPI: pristine-quality
        BufferedImage image = pdfRenderer.renderImageWithDPI(pageIndex, 300);
        // Save the image to a file
        String imageFilePath = new StringBuilder()
            .append(imageFileDir)
            .append(File.separator)
            .append(file.getName().replaceAll("[.](pdf|PDF)", ""))
            .append("_")
            .append(pageIndex + 1)
            .append(".png")
            .toString();
        ImageIO.write(image, "PNG", new File(imageFilePath));
    }
    // Close the document
    document.close();
}

Image to PDF

private static void imageToPdf(String imagePath, String pdfPath) throws IOException {
    try (PDDocument doc = new PDDocument()) {
        PDPage page = new PDPage();
        doc.addPage(page);
        // createFromFile is the easiest way with an image file
        // if you already have the image in a BufferedImage,
        // call LosslessFactory.createFromImage() instead
        PDImageXObject pdImage = PDImageXObject.createFromFile(imagePath, doc);
        // draw the image at full size at (x=0, y=0)
        try (PDPageContentStream contents = new PDPageContentStream(doc, page)) {
            // to draw the image at PDF width
            int scaledWidth = 600;
            if (pdImage.getWidth() < 600) {
                scaledWidth = pdImage.getWidth();
            }
            contents.drawImage(pdImage, 0, 0, scaledWidth, pdImage.getHeight() * scaledWidth / pdImage.getWidth());
        }
        doc.save(pdfPath);
    }
}

Create PDFs

String outputFilePath = "output/pdf/filepath";

PDDocument document = new PDDocument();
PDPage page = new PDPage(PDRectangle.A4);
document.addPage(page);
// Create content stream to draw on the page
PDPageContentStream contentStream = new PDPageContentStream(document, page);
contentStream.setFont(PDType1Font.HELVETICA, 12);
// Insert text
contentStream.beginText();
contentStream.newLineAtOffset(100, 700);
contentStream.showText("Hello, World!");
contentStream.endText();
// Load the image
String imageFilePath = "C:\\Users\\Taogen\\Pictures\\icon.jpg";
PDImageXObject image = PDImageXObject.createFromFile(imageFilePath, document);
// Set the scale and position of the image on the page
float scale = 0.5f; // adjust the scale as needed
float x = 100; // x-coordinate of the image
float y = 500; // y-coordinate of the image
// Draw the image on the page
contentStream.drawImage(image, x, y, image.getWidth() * scale, image.getHeight() * scale);
contentStream.close();
document.save(outputFilePath);
document.close();