Apache PDFBox is a Java tool for working with PDF documents. In this post, we’ll introduce how to use Apache PDFBox to handle PDF files. The code examples in this post are based on pdfbox v2.0.29.
StringinputFilePath="your/pdf/filepath"; // Load PDF document PDDocumentdocument= PDDocument.load(newFile(inputFilePath)); // Create PDFTextStripper instance PDFTextStripperpdfStripper=newPDFTextStripper(); // Extract text from PDF Stringtext= pdfStripper.getText(document); // Print extracted text System.out.println(text); // Close the document document.close();
Extract page by page
StringinputFilePath="your/pdf/filepath"; // Load the PDF document PDDocumentdocument= PDDocument.load(newFile(inputFilePath)); // Create an instance of PDFTextStripper PDFTextStripperstripper=newPDFTextStripper(); // Iterate through each page and extract the text for (intpageNumber=1; pageNumber <= document.getNumberOfPages(); pageNumber++) { stripper.setStartPage(pageNumber); stripper.setEndPage(pageNumber);
Stringtext= stripper.getText(document); System.out.println("Page " + pageNumber + ":"); System.out.println(text); } // Close the PDF document document.close();
Split and Merge
Split
privatestaticvoidsplitPdf(String inputFilePath, String outputDir)throws IOException { Filefile=newFile(inputFilePath); // Load the PDF document PDDocumentdocument= PDDocument.load(file); // Create a PDF splitter object Splittersplitter=newSplitter(); // Split the document List<PDDocument> splitDocuments = splitter.split(document); // Get an iterator for the split documents Iterator<PDDocument> iterator = splitDocuments.iterator(); // Iterate through the split documents and save them inti=1; while (iterator.hasNext()) { PDDocumentsplitDocument= iterator.next(); StringoutputFilePath=newStringBuilder().append(outputDir) .append(File.separator) .append(file.getName().replaceAll("[.](pdf|PDF)", "")) .append("_split_") .append(i) .append(".pdf") .toString(); splitDocument.save(outputFilePath); splitDocument.close(); i++; } // Close the source document document.close(); System.out.println("PDF split successfully!"); }
Merge PDF files
privatestaticvoidmergePdfFiles(List<String> inputFilePaths, String outputFilePath)throws IOException { PDFMergerUtilitymerger=newPDFMergerUtility(); // Add as many files as you need for (String inputFilePath : inputFilePaths) { merger.addSource(newFile(inputFilePath)); } merger.setDestinationFileName(outputFilePath); merger.mergeDocuments(); System.out.println("PDF files merged successfully!"); }
Insert and remove pages
Insert pages
publicstaticvoidinsertPage(String sourceFile, String targetFile, int pageIndex)throws IOException { // Load the existing PDF document PDDocumentsourceDoc= PDDocument.load(newFile(sourceFile)); IntegersourcePageCount= sourceDoc.getNumberOfPages(); // Validate the requested page index if (pageIndex < 0 || pageIndex > sourcePageCount) { thrownewIllegalArgumentException("Invalid page index"); } // Create a new blank page PDPagenewPage=newPDPage(); // Insert the new page at the requested index if (sourcePageCount.equals(pageIndex)) { sourceDoc.getPages().add(newPage); } else { sourceDoc.getPages().insertBefore(newPage, sourceDoc.getPages().get(pageIndex)); } // Save the modified PDF document to a target file sourceDoc.save(targetFile); // Close the source and target documents sourceDoc.close(); }
Remove pages
privatestaticvoidremovePage(String inputFilePath, String outputFilePath, int pageIndex)throws IOException {
PDDocumentsourceDoc= PDDocument.load(newFile(inputFilePath)); IntegersourcePageCount= sourceDoc.getNumberOfPages(); // Validate the requested page index if (pageIndex < 0 || pageIndex >= sourcePageCount) { thrownewIllegalArgumentException("Invalid page index"); } sourceDoc.getPages().remove(pageIndex); sourceDoc.save(outputFilePath); sourceDoc.close(); }
privatestaticvoidremovePage2(String inputFilePath, String outputFilePath, int pageIndex)throws IOException { PDDocumentsourceDoc= PDDocument.load(newFile(inputFilePath)); IntegersourcePageCount= sourceDoc.getNumberOfPages(); // Validate the requested page index if (pageIndex < 0 || pageIndex >= sourcePageCount) { thrownewIllegalArgumentException("Invalid page index"); } Splittersplitter=newSplitter(); List<PDDocument> pages = splitter.split(sourceDoc); pages.remove(pageIndex); PDDocumentoutputDocument=newPDDocument(); for (PDDocument page : pages) { outputDocument.addPage(page.getPage(0)); } outputDocument.save(outputFilePath); sourceDoc.close(); outputDocument.close(); }
AccessPermissionap=newAccessPermission(); // disable printing, ap.setCanPrint(false); //disable copying ap.setCanExtractContent(false); //Disable other things if needed...
// Owner password (to open the file with all permissions) // User password (to open the file but with restricted permissions) StandardProtectionPolicyspp=newStandardProtectionPolicy(password, password, ap); // Define the length of the encryption key. // Possible values are 40, 128 or 256. intkeyLength=256; spp.setEncryptionKeyLength(keyLength);
AccessPermissionap=newAccessPermission(); // disable printing, ap.setCanPrint(false); //disable copying ap.setCanExtractContent(false); //Disable other things if needed...
// Owner password (to open the file with all permissions) // User password (to open the file but with restricted permissions) StandardProtectionPolicyspp=newStandardProtectionPolicy(newPassword, newPassword, ap); // Define the length of the encryption key. // Possible values are 40, 128 or 256. intkeyLength=256; spp.setEncryptionKeyLength(keyLength);
//Apply protection doc.protect(spp);
doc.save(outputFilePath); doc.close(); }
Remove password
publicstaticvoidremovePdfPassword(String inputFilePath, String outputFilePath, String password)throws IOException { PDDocumentdoc= PDDocument.load(newFile(inputFilePath), password); // Set the document access permissions doc.setAllSecurityToBeRemoved(true); // Save the unprotected PDF document doc.save(outputFilePath); // Close the document doc.close(); }
Convert to Image
PDF to Image
publicstaticvoidpdfToImage(String pdfFilePath, String imageFileDir)throws IOException { Filefile=newFile(pdfFilePath); PDDocumentdocument= PDDocument.load(file); // Create PDFRenderer object to render each page as an image PDFRendererpdfRenderer=newPDFRenderer(document); // Iterate over all the pages and convert each page to an image for (intpageIndex=0; pageIndex < document.getNumberOfPages(); pageIndex++) { // Render the page as an image // 100 DPI: general-quality // 300 DPI: high-quality // 600 DPI: pristine-quality BufferedImageimage= pdfRenderer.renderImageWithDPI(pageIndex, 300); // Save the image to a file StringimageFilePath=newStringBuilder() .append(imageFileDir) .append(File.separator) .append(file.getName().replaceAll("[.](pdf|PDF)", "")) .append("_") .append(pageIndex + 1) .append(".png") .toString(); ImageIO.write(image, "PNG", newFile(imageFilePath)); } // Close the document document.close(); }
Image to PDF
privatestaticvoidimageToPdf(String imagePath, String pdfPath)throws IOException { try (PDDocumentdoc=newPDDocument()) { PDPagepage=newPDPage(); doc.addPage(page); // createFromFile is the easiest way with an image file // if you already have the image in a BufferedImage, // call LosslessFactory.createFromImage() instead PDImageXObjectpdImage= PDImageXObject.createFromFile(imagePath, doc); // draw the image at full size at (x=0, y=0) try (PDPageContentStreamcontents=newPDPageContentStream(doc, page)) { // to draw the image at PDF width intscaledWidth=600; if (pdImage.getWidth() < 600) { scaledWidth = pdImage.getWidth(); } contents.drawImage(pdImage, 0, 0, scaledWidth, pdImage.getHeight() * scaledWidth / pdImage.getWidth()); } doc.save(pdfPath); } }
Create PDFs
StringoutputFilePath="output/pdf/filepath";
PDDocumentdocument=newPDDocument(); PDPagepage=newPDPage(PDRectangle.A4); document.addPage(page); // Create content stream to draw on the page PDPageContentStreamcontentStream=newPDPageContentStream(document, page); contentStream.setFont(PDType1Font.HELVETICA, 12); // Insert text contentStream.beginText(); contentStream.newLineAtOffset(100, 700); contentStream.showText("Hello, World!"); contentStream.endText(); // Load the image StringimageFilePath="C:\\Users\\Taogen\\Pictures\\icon.jpg"; PDImageXObjectimage= PDImageXObject.createFromFile(imageFilePath, document); // Set the scale and position of the image on the page floatscale=0.5f; // adjust the scale as needed floatx=100; // x-coordinate of the image floaty=500; // y-coordinate of the image // Draw the image on the page contentStream.drawImage(image, x, y, image.getWidth() * scale, image.getHeight() * scale); contentStream.close(); document.save(outputFilePath); document.close();