Apache PDFBox is a Java tool for working with PDF documents. In this post, I will introduce how to use Apache PDFBox to handle PDF files. The code examples in this post are based on pdfbox v2.0.29.
StringinputFilePath="your/pdf/filepath"; // Load PDF document PDDocumentdocument= PDDocument.load(newFile(inputFilePath)); // Create PDFTextStripper instance PDFTextStripperpdfStripper=newPDFTextStripper(); // Extract text from PDF Stringtext= pdfStripper.getText(document); // Print extracted text System.out.println(text); // Close the document document.close();
Extract page by page
StringinputFilePath="your/pdf/filepath"; // Load the PDF document PDDocumentdocument= PDDocument.load(newFile(inputFilePath)); // Create an instance of PDFTextStripper PDFTextStripperstripper=newPDFTextStripper(); // Iterate through each page and extract the text for (intpageNumber=1; pageNumber <= document.getNumberOfPages(); pageNumber++) { stripper.setStartPage(pageNumber); stripper.setEndPage(pageNumber);
Stringtext= stripper.getText(document); System.out.println("Page " + pageNumber + ":"); System.out.println(text); } // Close the PDF document document.close();
Split and Merge
Split
privatestaticvoidsplitPdf(String inputFilePath, String outputDir)throws IOException { Filefile=newFile(inputFilePath); // Load the PDF document PDDocumentdocument= PDDocument.load(file); // Create a PDF splitter object Splittersplitter=newSplitter(); // Split the document List<PDDocument> splitDocuments = splitter.split(document); // Get an iterator for the split documents Iterator<PDDocument> iterator = splitDocuments.iterator(); // Iterate through the split documents and save them inti=1; while (iterator.hasNext()) { PDDocumentsplitDocument= iterator.next(); StringoutputFilePath=newStringBuilder().append(outputDir) .append(File.separator) .append(file.getName().replaceAll("[.](pdf|PDF)", "")) .append("_split_") .append(i) .append(".pdf") .toString(); splitDocument.save(outputFilePath); splitDocument.close(); i++; } // Close the source document document.close(); System.out.println("PDF split successfully!"); }
Merge PDF files
privatestaticvoidmergePdfFiles(List<String> inputFilePaths, String outputFilePath)throws IOException { PDFMergerUtilitymerger=newPDFMergerUtility(); // Add as many files as you need for (String inputFilePath : inputFilePaths) { merger.addSource(newFile(inputFilePath)); } merger.setDestinationFileName(outputFilePath); merger.mergeDocuments(); System.out.println("PDF files merged successfully!"); }
Insert and remove pages
Insert pages
publicstaticvoidinsertPage(String sourceFile, String targetFile, int pageIndex)throws IOException { // Load the existing PDF document PDDocumentsourceDoc= PDDocument.load(newFile(sourceFile)); IntegersourcePageCount= sourceDoc.getNumberOfPages(); // Validate the requested page index if (pageIndex < 0 || pageIndex > sourcePageCount) { thrownewIllegalArgumentException("Invalid page index"); } // Create a new blank page PDPagenewPage=newPDPage(); // Insert the new page at the requested index if (sourcePageCount.equals(pageIndex)) { sourceDoc.getPages().add(newPage); } else { sourceDoc.getPages().insertBefore(newPage, sourceDoc.getPages().get(pageIndex)); } // Save the modified PDF document to a target file sourceDoc.save(targetFile); // Close the source and target documents sourceDoc.close(); }
Remove pages
privatestaticvoidremovePage(String inputFilePath, String outputFilePath, int pageIndex)throws IOException {
PDDocumentsourceDoc= PDDocument.load(newFile(inputFilePath)); IntegersourcePageCount= sourceDoc.getNumberOfPages(); // Validate the requested page index if (pageIndex < 0 || pageIndex >= sourcePageCount) { thrownewIllegalArgumentException("Invalid page index"); } sourceDoc.getPages().remove(pageIndex); sourceDoc.save(outputFilePath); sourceDoc.close(); }
privatestaticvoidremovePage2(String inputFilePath, String outputFilePath, int pageIndex)throws IOException { PDDocumentsourceDoc= PDDocument.load(newFile(inputFilePath)); IntegersourcePageCount= sourceDoc.getNumberOfPages(); // Validate the requested page index if (pageIndex < 0 || pageIndex >= sourcePageCount) { thrownewIllegalArgumentException("Invalid page index"); } Splittersplitter=newSplitter(); List<PDDocument> pages = splitter.split(sourceDoc); pages.remove(pageIndex); PDDocumentoutputDocument=newPDDocument(); for (PDDocument page : pages) { outputDocument.addPage(page.getPage(0)); } outputDocument.save(outputFilePath); sourceDoc.close(); outputDocument.close(); }
AccessPermissionap=newAccessPermission(); // disable printing, ap.setCanPrint(false); //disable copying ap.setCanExtractContent(false); //Disable other things if needed...
// Owner password (to open the file with all permissions) // User password (to open the file but with restricted permissions) StandardProtectionPolicyspp=newStandardProtectionPolicy(password, password, ap); // Define the length of the encryption key. // Possible values are 40, 128 or 256. intkeyLength=256; spp.setEncryptionKeyLength(keyLength);
AccessPermissionap=newAccessPermission(); // disable printing, ap.setCanPrint(false); //disable copying ap.setCanExtractContent(false); //Disable other things if needed...
// Owner password (to open the file with all permissions) // User password (to open the file but with restricted permissions) StandardProtectionPolicyspp=newStandardProtectionPolicy(newPassword, newPassword, ap); // Define the length of the encryption key. // Possible values are 40, 128 or 256. intkeyLength=256; spp.setEncryptionKeyLength(keyLength);
//Apply protection doc.protect(spp);
doc.save(outputFilePath); doc.close(); }
Remove password
publicstaticvoidremovePdfPassword(String inputFilePath, String outputFilePath, String password)throws IOException { PDDocumentdoc= PDDocument.load(newFile(inputFilePath), password); // Set the document access permissions doc.setAllSecurityToBeRemoved(true); // Save the unprotected PDF document doc.save(outputFilePath); // Close the document doc.close(); }
Convert to Image
PDF to Image
publicstaticvoidpdfToImage(String pdfFilePath, String imageFileDir)throws IOException { Filefile=newFile(pdfFilePath); PDDocumentdocument= PDDocument.load(file); // Create PDFRenderer object to render each page as an image PDFRendererpdfRenderer=newPDFRenderer(document); // Iterate over all the pages and convert each page to an image for (intpageIndex=0; pageIndex < document.getNumberOfPages(); pageIndex++) { // Render the page as an image // 100 DPI: general-quality // 300 DPI: high-quality // 600 DPI: pristine-quality BufferedImageimage= pdfRenderer.renderImageWithDPI(pageIndex, 300); // Save the image to a file StringimageFilePath=newStringBuilder() .append(imageFileDir) .append(File.separator) .append(file.getName().replaceAll("[.](pdf|PDF)", "")) .append("_") .append(pageIndex + 1) .append(".png") .toString(); ImageIO.write(image, "PNG", newFile(imageFilePath)); } // Close the document document.close(); }
Image to PDF
privatestaticvoidimageToPdf(String imagePath, String pdfPath)throws IOException { try (PDDocumentdoc=newPDDocument()) { PDPagepage=newPDPage(); doc.addPage(page); // createFromFile is the easiest way with an image file // if you already have the image in a BufferedImage, // call LosslessFactory.createFromImage() instead PDImageXObjectpdImage= PDImageXObject.createFromFile(imagePath, doc); // draw the image at full size at (x=0, y=0) try (PDPageContentStreamcontents=newPDPageContentStream(doc, page)) { // to draw the image at PDF width intscaledWidth=600; if (pdImage.getWidth() < 600) { scaledWidth = pdImage.getWidth(); } contents.drawImage(pdImage, 0, 0, scaledWidth, pdImage.getHeight() * scaledWidth / pdImage.getWidth()); } doc.save(pdfPath); } }
Create PDFs
StringoutputFilePath="output/pdf/filepath";
PDDocumentdocument=newPDDocument(); PDPagepage=newPDPage(PDRectangle.A4); document.addPage(page); // Create content stream to draw on the page PDPageContentStreamcontentStream=newPDPageContentStream(document, page); contentStream.setFont(PDType1Font.HELVETICA, 12); // Insert text contentStream.beginText(); contentStream.newLineAtOffset(100, 700); contentStream.showText("Hello, World!"); contentStream.endText(); // Load the image StringimageFilePath="C:\\Users\\Taogen\\Pictures\\icon.jpg"; PDImageXObjectimage= PDImageXObject.createFromFile(imageFilePath, document); // Set the scale and position of the image on the page floatscale=0.5f; // adjust the scale as needed floatx=100; // x-coordinate of the image floaty=500; // y-coordinate of the image // Draw the image on the page contentStream.drawImage(image, x, y, image.getWidth() * scale, image.getHeight() * scale); contentStream.close(); document.save(outputFilePath); document.close();