Hot questions for Using PDFBox in exception

Question:

when I try to use one of the PDFBox examples for extracting images, in the run time,it gives me the following exception:

   Exception in thread "main" java.lang.NoClassDefFoundError: org/apache/commons/logging/LogFactory
    at org.apache.pdfbox.pdfparser.BaseParser.<clinit>(BaseParser.java:68)
    at org.apache.pdfbox.pdmodel.PDDocument.load(PDDocument.java:1218)
    at org.apache.pdfbox.pdmodel.PDDocument.load(PDDocument.java:1186)
    at org.apache.pdfbox.pdmodel.PDDocument.load(PDDocument.java:1111)
    at pdfboxtest.PdfBoxTest.extractImage(PdfBoxTest.java:69)
    at pdfboxtest.PdfBoxTest.main(PdfBoxTest.java:53)
Caused by: java.lang.ClassNotFoundException: org.apache.commons.logging.LogFactory
    at java.net.URLClassLoader$1.run(URLClassLoader.java:372)
    at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
    at java.security.AccessController.doPrivileged(Native Method)
    at java.net.URLClassLoader.findClass(URLClassLoader.java:360)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
    at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
    ... 6 more

here is the part of the code that exception occurs, at the last two lines:

         String pdfFile = "pdf file path";
         File pdf=new  File(pdfFile);
         PDDocument document = null;
         document = PDDocument.loadNonSeq(pdf, null, password);
         document = PDDocument.load( pdfFile );

Answer:

Add commons-logging-1.1.1.jar or jcl-over-slf4j-1.7.6.jar in your lib directory.

Question:

I am writing a simple selenium test in which I need to take a screenshot of webpage and save it as PDF. I am using TestNG and PDFbox library with Selenium

Below is my test method:

package com.helper;

import java.io.File;
import java.io.IOException;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.openqa.selenium.OutputType;
import org.openqa.selenium.TakesScreenshot;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebDriverException;
import org.testng.annotations.Test;



public class ScreenshotPDF {

    @Test
    public void screenshotPDF(){

    WebDriver driver= DriverManager.getWebdriver("chrome");

    driver.get("https://www.google.co.in");

    try {
        File screenshot = ((TakesScreenshot)driver).getScreenshotAs(OutputType.FILE);

        PDDocument document = new PDDocument();
        PDPage page = new PDPage();
        document.addPage(page);
        PDImageXObject pdi = PDImageXObject.createFromFileByContent(screenshot ,document);
        PDPageContentStream contentStream = new PDPageContentStream(document, page);
        contentStream.drawImage(pdi,100,100);

        document.save("C:/Users/123456/Documents/sample.pdf");
        contentStream.close();
        document.close();
    } catch (WebDriverException e) {

        e.printStackTrace();
    } catch (IOException e) {

        e.printStackTrace();
    }
}
}

Here when I run this test with TestNG, I get the following errors in Console:

FAILED: screenshotPDF
java.lang.IllegalStateException: Cannot read while there is an open stream writer
    at org.apache.pdfbox.cos.COSStream.createRawInputStream(COSStream.java:129)
    at org.apache.pdfbox.pdfwriter.COSWriter.visitFromStream(COSWriter.java:1177)
    at org.apache.pdfbox.cos.COSStream.accept(COSStream.java:372)
    at org.apache.pdfbox.pdfwriter.COSWriter.doWriteObject(COSWriter.java:561)
    at org.apache.pdfbox.pdfwriter.COSWriter.doWriteObjects(COSWriter.java:490)
    at org.apache.pdfbox.pdfwriter.COSWriter.doWriteBody(COSWriter.java:474)
    at org.apache.pdfbox.pdfwriter.COSWriter.visitFromDocument(COSWriter.java:1073)
    at org.apache.pdfbox.cos.COSDocument.accept(COSDocument.java:419)
    at org.apache.pdfbox.pdfwriter.COSWriter.write(COSWriter.java:1331)
    at org.apache.pdfbox.pdfwriter.COSWriter.write(COSWriter.java:1229)
    at org.apache.pdfbox.pdmodel.PDDocument.save(PDDocument.java:1095)
    at org.apache.pdfbox.pdmodel.PDDocument.save(PDDocument.java:1067)
    at org.apache.pdfbox.pdmodel.PDDocument.save(PDDocument.java:1055)
    at com.helper.ScreenshotPDF.screenshotPDF(ScreenshotPDF.java:50)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source)
    at java.lang.reflect.Method.invoke(Unknown Source)
    at org.testng.internal.MethodInvocationHelper.invokeMethod(MethodInvocationHelper.java:85)
    at org.testng.internal.Invoker.invokeMethod(Invoker.java:639)
    at org.testng.internal.Invoker.invokeTestMethod(Invoker.java:816)
    at org.testng.internal.Invoker.invokeTestMethods(Invoker.java:1124)
    at org.testng.internal.TestMethodWorker.invokeTestMethods(TestMethodWorker.java:125)
    at org.testng.internal.TestMethodWorker.run(TestMethodWorker.java:108)
    at org.testng.TestRunner.privateRun(TestRunner.java:774)
    at org.testng.TestRunner.run(TestRunner.java:624)
    at org.testng.SuiteRunner.runTest(SuiteRunner.java:359)
    at org.testng.SuiteRunner.runSequentially(SuiteRunner.java:354)
    at org.testng.SuiteRunner.privateRun(SuiteRunner.java:312)
    at org.testng.SuiteRunner.run(SuiteRunner.java:261)
    at org.testng.SuiteRunnerWorker.runSuite(SuiteRunnerWorker.java:52)
    at org.testng.SuiteRunnerWorker.run(SuiteRunnerWorker.java:86)
    at org.testng.TestNG.runSuitesSequentially(TestNG.java:1215)
    at org.testng.TestNG.runSuitesLocally(TestNG.java:1140)
    at org.testng.TestNG.run(TestNG.java:1048)
    at org.testng.remote.AbstractRemoteTestNG.run(AbstractRemoteTestNG.java:126)
    at org.testng.remote.RemoteTestNG.initAndRun(RemoteTestNG.java:152)
    at org.testng.remote.RemoteTestNG.main(RemoteTestNG.java:57)


===============================================
    Default test
    Tests run: 1, Failures: 1, Skips: 0

I did so much of googling but did not find the solution. Please help me with this error. At least what can be the reason of this error. Thank you in advance.


Answer:

You need to close the content stream before saving:

    PDDocument document = new PDDocument();
    PDPage page = new PDPage();
    document.addPage(page);
    PDImageXObject pdi = PDImageXObject.createFromFileByContent(screenshot ,document);
    PDPageContentStream contentStream = new PDPageContentStream(document, page);
    contentStream.drawImage(pdi,100,100);
    contentStream.close(); // do this before saving!

    document.save("C:/Users/123456/Documents/sample.pdf");
    document.close();

Question:

As a newbie of pdfbox 2.0.2 (https://github.com/apache/pdfbox/tree/2.0.2) user, I would like to get all the stroked lines (for instance, column and row borders of a table) of a page (PDPage), and thus I created the following class: package org.apache.pdfbox.rendering;

import java.awt.geom.GeneralPath;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URI;

import org.apache.commons.io.IOUtils;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.apache.pdfbox.rendering.PageDrawer;
import org.apache.pdfbox.rendering.PageDrawerParameters;

public class LineCatcher {
    private PageDrawer pageDrawer;
    private PDDocument document;
    private PDFRenderer pdfRenderer;
    private PDPage page;

    public LineCatcher(URI pdfSrcURI) throws IllegalArgumentException, 
        MalformedURLException, IOException {
        this.document = PDDocument.load(IOUtils.toByteArray(pdfSrcURI));
        this.pdfRenderer = new PDFRenderer(this.document);
    }
    public GeneralPath getLinePath(int pageIndex) throws IOException {
        this.page = this.document.getPage(pageIndex);
        PageDrawerParameters parameters = new PageDrawerParameters (this.pdfRenderer, this.page);
        this.pageDrawer = new PageDrawer(parameters);
        this.pageDrawer.processPage(this.page); //catches exception here
        return this.pageDrawer.getLinePath();
    }
}

According to my understanding, in order to get the line path of a page, the page has to be processed first, so I called the method processPage in the line, where I marked "catch exception here". It caught NullPointer Excpetions int the mentioned line unexpectedly. The exception info are the following:

java.lang.NullPointerException
  at org.apache.pdfbox.rendering.PageDrawer.fillPath(PageDrawer.java:599)
  at org.apache.pdfbox.contentstream.operator.graphics.FillNonZeroRule.process(FillNonZeroRule.java:36)
  at org.apache.pdfbox.contentstream.PDFStreamEngine.processOperator(PDFStreamEngine.java:815)
  at org.apache.pdfbox.contentstream.PDFStreamEngine.processStreamOperators(PDFStreamEngine.java:472)
  at org.apache.pdfbox.contentstream.PDFStreamEngine.processStream(PDFStreamEngine.java:446)
  at org.apache.pdfbox.contentstream.PDFStreamEngine.processPage(PDFStreamEngine.java:149)
  at org.apache.pdfbox.rendering.LineCatcher.getLinePath(LineCatcher.java:33)
  at org.apache.pdfbox.rendering.TestLineCatcher.testGetLinePath(TestLineCatcher.java:21)

Is there anyone, who can give some advice about my logic or help to debug the code? Thanks in advance


Answer:

Extending PageDrawer didn't really work, so I extended PDFGraphicsStreamEngine and here's the result. I do some of the stuff that is done in PageDrawer. To collect lines, either evaluate the shape in strokePath(), or collect points and lines in the other methods where I have included a println.

public class LineCatcher extends PDFGraphicsStreamEngine
{
    private final GeneralPath linePath = new GeneralPath();
    private int clipWindingRule = -1;

    public LineCatcher(PDPage page)
    {
        super(page);
    }

    public static void main(String[] args) throws IOException
    {
        try (PDDocument document = PDDocument.load(new File("Test.pdf")))
        {
            PDPage page = document.getPage(0);
            LineCatcher test = new LineCatcher(page);
            test.processPage(page);
        }
    }

    @Override
    public void appendRectangle(Point2D p0, Point2D p1, Point2D p2, Point2D p3) throws IOException
    {
        System.out.println("appendRectangle");
        // to ensure that the path is created in the right direction, we have to create
        // it by combining single lines instead of creating a simple rectangle
        linePath.moveTo((float) p0.getX(), (float) p0.getY());
        linePath.lineTo((float) p1.getX(), (float) p1.getY());
        linePath.lineTo((float) p2.getX(), (float) p2.getY());
        linePath.lineTo((float) p3.getX(), (float) p3.getY());

        // close the subpath instead of adding the last line so that a possible set line
        // cap style isn't taken into account at the "beginning" of the rectangle
        linePath.closePath();
    }

    @Override
    public void drawImage(PDImage pdi) throws IOException
    {
    }

    @Override
    public void clip(int windingRule) throws IOException
    {
        // the clipping path will not be updated until the succeeding painting operator is called
        clipWindingRule = windingRule;

    }

    @Override
    public void moveTo(float x, float y) throws IOException
    {
        linePath.moveTo(x, y);
        System.out.println("moveTo");
    }

    @Override
    public void lineTo(float x, float y) throws IOException
    {
        linePath.lineTo(x, y);
        System.out.println("lineTo");
    }

    @Override
    public void curveTo(float x1, float y1, float x2, float y2, float x3, float y3) throws IOException
    {
        linePath.curveTo(x1, y1, x2, y2, x3, y3);
        System.out.println("curveTo");
    }

    @Override
    public Point2D getCurrentPoint() throws IOException
    {
        return linePath.getCurrentPoint();
    }

    @Override
    public void closePath() throws IOException
    {
        linePath.closePath();
    }

    @Override
    public void endPath() throws IOException
    {
        if (clipWindingRule != -1)
        {
            linePath.setWindingRule(clipWindingRule);
            getGraphicsState().intersectClippingPath(linePath);
            clipWindingRule = -1;
        }
        linePath.reset();

    }

    @Override
    public void strokePath() throws IOException
    {
        // do stuff
        System.out.println(linePath.getBounds2D());

        linePath.reset();
    }

    @Override
    public void fillPath(int windingRule) throws IOException
    {
        linePath.reset();
    }

    @Override
    public void fillAndStrokePath(int windingRule) throws IOException
    {
        linePath.reset();
    }

    @Override
    public void shadingFill(COSName cosn) throws IOException
    {
    }
}

Update 19.3.2019: See also follow-up answer by mkl here.

Question:

I am currently trying to grab text from a PDF that is already uploaded and accessed through a link by using PDFBox and Selenium. I used this as a source: http://www.seleniumeasy.com/selenium-tutorials/how-to-extract-pdf-text-and-verify-using-selenium-webdriver-java

public String function(String pdf_url) {
    PDFTextStripper pdfStripper = null;
    PDDocument pDoc;
    COSDocument cDoc;
    String parsedText = "";
    try {
        URL url = new URL(pdf_url);
        BufferedInputStream file = new BufferedInputStream(url.openStream());
        PDFParser parser = new PDFParser(file);
        parser.parse();
        cDoc = parser.getDocument();
        pdfStripper = new PDFTextStripper();
        pdfStripper.setStartPage(1);
        pdfStripper.setEndPage(1);

        pDoc = new PDDocument(cDoc);
        parsedText = pdfStripper.getText(pDoc);

    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

    return parsedText;
}

Error: End-of-File expected line
at org.apache.pdfbox.pdfparser.BaseParser.readLine(BaseParser.java:1519)
at org.apache.pdfbox.pdfparser.PDFParser.parseHeader(PDFParser.java:372)
at org.apache.pdfbox.pdfparser.PDFParser.parse(PDFParser.java:186)
at scripts.Script.grabPDF_Text(Script.java:94)
at scripts.Script.main(Script.java:817)

Why am I getting this error?


Answer:

Here is the example that you asked to share using PDFURL

string PDFURL = "https://www.adobe.com/support/products/enterprise/knowledgecenter/media/c4611_sample_explain.pdf";
function(PDFURL1);

public String function(String pdf_url)
{
 //Exact same code as yours
}

For using PDF as local file, URL and BufferedInputStream needs to be replaced by

 File file = new File(pdf_url);
 PDFParser parser = new PDFParser(new FileInputStream(file));

Hope this helps

Question:

I have downloaded the PDFBox jar file and have placed it under the {application.home}\lib folder. PDFBox has a few other assistant jar files. I have placed all of them in {application.home}\lib folder, too. I restarted the ColdFusion 2016 service. In my code file, I have some very simple test lines.

  <cftry>

    <cfset local.pdfUnitObj = CreateObject("java", "org.apache.pdfbox.pdmodel")>
    <cfcatch type="any">
      <cfdump var="#cfcatch#" output="C:\inetpub\wwwroot\cfcatcherr.txt">       
    </cfcatch>
  </cftry>

In the cfcatcherr.txt, I keep on getting an error saying, java.lang.ClassNotFoundException: org.apache.pdfbox.pdmodel

In org.apache.pdfbox.pdmodel, there is a PDDocument class. I have tried referencing org.apache.pdfbox.pdmodel.PDDocuemtn but it is still giving me the error. I have placed all those files under {application.home}\jre\lib and {application.home}\wwwroot\WEB-INF\lib. The error is still the same thing. Is it because I didn't place the jar file in the correct location? Anybody knows how I can get this problem resolved please?


Answer:

Preferred method for CF2016+

Instead of mucking about with the class path and having to restart the server each time, there's a simpler option for CF2016+. Load the jars dynamically in your Application.cfc using this.javaSettings. The LoadPaths parameter accepts an array of one or more directories (containing jar files to be loaded):

component {
    this.name = "YourAppName";
    // loads all jars in the subdirectories named "lib" and "ext"
    this.javaSettings = { LoadPaths = [ ".\lib\", ".\ext\" ] };
}

.. or supply an array of individual jar file paths:

component {
    this.name = "YourAppName";
    // loads individual jars 
    this.javaSettings = { LoadPaths = [ "C:\path\to\pdfbox-2.0.16.jar"
                                        , "C:\path\to\xmpbox-2.0.16.jar"
                                        , .. more paths
                                      ]
                        };
}

Adrian J. Moreno also pointed out another advantage of this approach:

Much easier to check the JARs in with your code base and deploy to servers instead of having to document for DevOps to remember to put JAR X in location Y should you need to upgrade or build a new server.


Old method

However, to answer your earlier questions, the most common causes of ClassNotFoundException are:

  1. Wrong class name

    CreateObject expects a class name (cAsE sEnSiTiVe).

  2. Jar files aren't in the CF class path

    Aside from the core JVM paths, CF only scans locations listed in Server Settings > ColdFusion Class Path. Jar files placed elsewhere won't be detected. (The default CF class path includes the WEB-INF\lib directory)

    Placing multiple copies of a library in the class path can sometimes cause errors. Though in my experience, that issue usually causes a different error.

  3. Forgetting to restart the CF server

    Aside from the core JVM paths, the CF server only scans the class path locations on start up. So the CF server must be restarted in order to detect any new jars.

Your error is caused by #1 - Wrong class name. org.apache.pdfbox.pdmodel is the name of a package - not a class. The class name should be org.apache.pdfbox.pdmodel.PDDocument (note the spelling of PDDocument).

Question:

I'm working in a servlet file for a web project and this is my code :

I have the v.2.0.0 of pdfbox library and my code works in a simple java application

pdfmanager.java :

public class pdfManager {

private PDFParser parser;
   private PDFTextStripper pdfStripper;
   private PDDocument pdDoc ;
   private COSDocument cosDoc ;

   private String Text ;
   private String filePath;
   private File file;

    public pdfManager() {

    }
   public String ToText() throws IOException
   {
       this.pdfStripper = null;
       this.pdDoc = null;
       this.cosDoc = null;

       file = new File(filePath);
       parser = new PDFParser(new RandomAccessFile(file,"r")); // update for PDFBox V 2.0

       parser.parse();
       cosDoc = parser.getDocument();
       pdfStripper = new PDFTextStripper();
       pdDoc = new PDDocument(cosDoc);
       pdDoc.getNumberOfPages();
       pdfStripper.setStartPage(1);
       pdfStripper.setEndPage(10);

       // reading text from page 1 to 10
       // if you want to get text from full pdf file use this code
       // pdfStripper.setEndPage(pdDoc.getNumberOfPages());

       Text = pdfStripper.getText(pdDoc);
       return Text;
   }

    public void setFilePath(String filePath) {
        this.filePath = filePath;
    }


}

the srvlet file :

    PrintWriter out = response.getWriter() ; 
    out.println("\ndata we gottoo : ") ; 



    pdfManager pdfManager = new pdfManager();
     pdfManager.setFilePath("/Users/rami/Desktop/pdf2.pdf");
        System.out.println(pdfManager.ToText());

called in doGet method


Answer:

The library you need is not on the classpath or other problems occur when the classloader wants to load the class of the library. If you are in on a server, be sure to add the library to classpath folder. This can be done by hand or your application has to provide/deliver it by itself. Since it's not clear how your app is deployed or delivered it can have many reasons

Question:

In my project I split a pdf file by bookmarks. A new pdf file should be created containing the splitted pages and some new bookmarks.

This causes following exception: Exception in thread "main" java.io.IOException: COSStream has been closed and cannot be read. Perhaps its enclosing PDDocument has been closed?

If I remove the line outline.addLast(pagesOutline); the new pdf contains the splitted pages (no exception) but obviously not the bookmarks. I think there is something wrong with the way I try to add bookmarks.

My code:

private void tryCreatePDF(List<String> listOfBookmarks) throws IOException
{
    document = PDDocument.load(new File("C:/Users/me/Desktop/existingpdf.pdf"));
    PDDocumentCatalog catalog = document.getDocumentCatalog();
    PDDocumentOutline bookmarksInExistingFile = catalog.getDocumentOutline();

    destinationPDF = new PDDocument();
    outline = new PDDocumentOutline();
    destinationPDF.getDocumentCatalog().setDocumentOutline(outline);

    pagesOutline = new PDOutlineItem();
    pagesOutline.setTitle("First Bookmark");
    outline.addLast(pagesOutline);

    for(String bookmarkToSplit : listOfBookmarks)
    {
        addPagesFromExistingFileToNewPDF(bookmarksInExistingFile, bookmarkToSplit);
    }

    createNewPDF();
}

private void addPagesFromExistingFileToNewPDF(PDOutlineNode outlineNodeFromExistingPDF, String bookmarkToSplit) throws IOException
{
    PDOutlineItem current = outlineNodeFromExistingPDF.getFirstChild();

    while(current != null)
    {
        PDPage currentPage = current.findDestinationPage(document);
        Integer pageNumber = document.getDocumentCatalog().getPages().indexOf(currentPage) + 1;

        addPagesFromExistingFileToNewPDF(current, bookmarkToSplit);

        String bookmark = current.getTitle().trim();

        if(bookmark.matches(bookmarkToSplit + "\\s.+"))
        {
            Splitter splitter = new Splitter();

            splitter.setStartPage(pageNumber);
            splitter.setEndPage(pageNumber);

            List<PDDocument> pages = splitter.split(document);

            addPagesToNewDocument(pages);
        }

        current = current.getNextSibling();
    }
}

private void addPagesToNewDocument(List<PDDocument> pages) throws IOException
{
    PDFMergerUtility pdfMergerUtility = new PDFMergerUtility();

    for(int index = 0; index < pages.size(); index++)
    {
        PDDocument doc = pages.get(index);

        pdfMergerUtility.appendDocument(destinationPDF, doc);

        if(index == 0)
        {
            addNewBookmarkToNewPDF(doc);
        }

        doc.close();
    }
}

private void addNewBookmarkToNewPDF(PDDocument doc)
{
    PDPageFitWidthDestination dest = new PDPageFitWidthDestination();
    dest.setPage(doc.getPage(0));

    PDOutlineItem bookmark = new PDOutlineItem();
    bookmark.setDestination(dest);
    bookmark.setTitle("Child Bookmark");

    pagesOutline.addLast(bookmark);
}

private void createNewPDF() throws IOException
{
    File targetFile = new File("C:/Users/me/Desktop/newpdf.pdf");

    pagesOutline.openNode();
    outline.openNode();

    destinationPDF.save(targetFile);
    destinationPDF.close();

    document.close();
}

The stacktrace:

Exception in thread "main" java.io.IOException: COSStream has been closed and cannot be read. Perhaps its enclosing PDDocument has been closed?
    at org.apache.pdfbox.cos.COSStream.checkClosed(COSStream.java:77)
    at org.apache.pdfbox.cos.COSStream.createRawInputStream(COSStream.java:125)
    at org.apache.pdfbox.pdfwriter.COSWriter.visitFromStream(COSWriter.java:1203)
    at org.apache.pdfbox.cos.COSStream.accept(COSStream.java:383)
    at org.apache.pdfbox.pdfwriter.COSWriter.doWriteObject(COSWriter.java:522)
    at org.apache.pdfbox.pdfwriter.COSWriter.doWriteObjects(COSWriter.java:460)
    at org.apache.pdfbox.pdfwriter.COSWriter.doWriteBody(COSWriter.java:444)
    at org.apache.pdfbox.pdfwriter.COSWriter.visitFromDocument(COSWriter.java:1099)
    at org.apache.pdfbox.cos.COSDocument.accept(COSDocument.java:419)
    at org.apache.pdfbox.pdfwriter.COSWriter.write(COSWriter.java:1370)
    at org.apache.pdfbox.pdfwriter.COSWriter.write(COSWriter.java:1257)
    at org.apache.pdfbox.pdmodel.PDDocument.save(PDDocument.java:1267)
    at org.apache.pdfbox.pdmodel.PDDocument.save(PDDocument.java:1238)
    at askstackoverflow.SplitPDFByBookmarks.createNewPDF(SplitPDFByBookmarks.java:126)
    at askstackoverflow.SplitPDFByBookmarks.tryCreatePDF(SplitPDFByBookmarks.java:55)
    at askstackoverflow.SplitPDFByBookmarks.<init>(SplitPDFByBookmarks.java:33)
    at askstackoverflow.Main.main(Main.java:9)

Edit: @All the following readers of this question: After solving the problem, my program doesn't work as I expected. (Every bookmark is pointing to the first page.) But this problem wasn't part of this question.


Answer:

Closing in addPagesToNewDocument() is too early. This also closes resources from the source document because these resources are used by both. Better start closing only after all documents have been saved.

Question:

I have the following code, which is taken from ShowSignature example from PDFBox 2. I'm running five tests in which it is called, 4 of them pass without problem (no signature, single signed, double signed, expired signature), but the fifth one is wit Eliptic Curve and it fails.

The fun part is that it passes when I start the JunitTest only on the test-class, but fails as soon as I'm starting it on package or project level.

I would assume something befor the test class is interfering, but can't find a hint what it could be. I checked bouncycastle (1.54 is always used), the java jdk is at any point correctly used (jdk1.8.0_181).

I'm checking the for an exception to be thrown (ERROR_VERIFYING_PDF_SIGNATURE), it is thrown in the classtest, but a different one when starting on higher level.

Error occurs in if (signerInformation.verify(new JcaSimpleSignerInfoVerifierBuilder().build(certFromSignedData)) and has this stack trace:

java.lang.AssertionError: 
Expected: (an instance of de.bdr.rt.core.common.api.BusinessLogicException and exception with message a string containing "Die Signatur des PDF-Dokuments konnte nicht verifiziert werden.")
 but: an instance of de.bdr.rt.core.common.api.BusinessLogicException <org.bouncycastle.operator.RuntimeOperatorException: exception obtaining signature: Could not verify signature> is a org.bouncycastle.operator.RuntimeOperatorException
Stacktrace was: org.bouncycastle.operator.RuntimeOperatorException: exception obtaining signature: Could not verify signature
at org.bouncycastle.operator.jcajce.JcaContentVerifierProviderBuilder$SigVerifier.verify(Unknown Source)
at org.bouncycastle.operator.jcajce.JcaContentVerifierProviderBuilder$RawSigVerifier.verify(Unknown Source)
at org.bouncycastle.cms.SignerInformation.doVerify(Unknown Source)
at org.bouncycastle.cms.SignerInformation.verify(Unknown Source)
at de.bdr.gematik.tsp.sc.antragsverwaltung.impl.itsp.ds.PDFDigitalSignatureCheckTest.verifyPKCS7(PDFDigitalSignatureCheckTest.java:280)
at de.bdr.gematik.tsp.sc.antragsverwaltung.impl.itsp.ds.PDFDigitalSignatureCheckTest.testPdfSignature(PDFDigitalSignatureCheckTest.java:170)
at de.bdr.gematik.tsp.sc.antragsverwaltung.impl.itsp.ds.PDFDigitalSignatureCheckTest.testPDFECKeySignaturFails(PDFDigitalSignatureCheckTest.java:112)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:50)
at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:47)
at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
at org.junit.rules.ExpectedException$ExpectedExceptionStatement.evaluate(ExpectedException.java:239)
at org.junit.rules.RunRules.evaluate(RunRules.java:20)
at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:325)
at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:78)
at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:57)
at org.junit.runners.ParentRunner$3.run(ParentRunner.java:290)
at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:71)
at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288)
at org.junit.runners.ParentRunner.access$000(ParentRunner.java:58)
at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268)
at org.junit.runners.ParentRunner.run(ParentRunner.java:363)
at org.eclipse.jdt.internal.junit4.runner.JUnit4TestReference.run(JUnit4TestReference.java:86)
at org.eclipse.jdt.internal.junit.runner.TestExecution.run(TestExecution.java:38)
at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.runTests(RemoteTestRunner.java:538)
at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.runTests(RemoteTestRunner.java:760)
at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.run(RemoteTestRunner.java:460)
at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.main(RemoteTestRunner.java:206)
Caused by: java.security.SignatureException: Could not verify signature
at sun.security.ec.ECDSASignature.engineVerify(ECDSASignature.java:325)
at java.security.Signature$Delegate.engineVerify(Signature.java:1222)
at java.security.Signature.verify(Signature.java:655)
at org.bouncycastle.operator.jcajce.JcaContentVerifierProviderBuilder$SignatureOutputStream.verify(Unknown Source)
... 32 more
Caused by: java.security.InvalidAlgorithmParameterException
at sun.security.ec.ECDSASignature.verifySignedDigest(Native Method)
at sun.security.ec.ECDSASignature.engineVerify(ECDSASignature.java:321)
... 35 more

at org.hamcrest.MatcherAssert.assertThat(MatcherAssert.java:20)
at org.junit.Assert.assertThat(Assert.java:956)
at org.junit.Assert.assertThat(Assert.java:923)
at org.junit.rules.ExpectedException.handleException(ExpectedException.java:252)
at org.junit.rules.ExpectedException.access$000(ExpectedException.java:106)
at org.junit.rules.ExpectedException$ExpectedExceptionStatement.evaluate(ExpectedException.java:241)
at org.junit.rules.RunRules.evaluate(RunRules.java:20)
at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:325)
at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:78)
at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:57)
at org.junit.runners.ParentRunner$3.run(ParentRunner.java:290)
at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:71)
at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288)
at org.junit.runners.ParentRunner.access$000(ParentRunner.java:58)
at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268)
at org.junit.runners.ParentRunner.run(ParentRunner.java:363)
at org.eclipse.jdt.internal.junit4.runner.JUnit4TestReference.run(JUnit4TestReference.java:86)
at org.eclipse.jdt.internal.junit.runner.TestExecution.run(TestExecution.java:38)
at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.runTests(RemoteTestRunner.java:538)
at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.runTests(RemoteTestRunner.java:760)
at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.run(RemoteTestRunner.java:460)
at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.main(RemoteTestRunner.java:206)

Code:

   private void verifyPKCS7(byte[] byteArray, COSString contents, PDSignature sig)
        throws CMSException, CertificateException, StoreException, OperatorCreationException,
        NoSuchAlgorithmException, NoSuchProviderException {
    CMSProcessable signedContent = new CMSProcessableByteArray(byteArray);
    CMSSignedData signedData = new CMSSignedData(signedContent, contents.getBytes());
    @SuppressWarnings("unchecked")
    Store<X509CertificateHolder> certificatesStore = signedData.getCertificates();
    Collection<SignerInformation> signers = signedData.getSignerInfos().getSigners();
    SignerInformation signerInformation = signers.iterator().next();
    @SuppressWarnings("unchecked")
    Collection<X509CertificateHolder> matches = certificatesStore
            .getMatches((Selector<X509CertificateHolder>) signerInformation.getSID());
    X509CertificateHolder certificateHolder = matches.iterator().next();
    X509Certificate certFromSignedData = new JcaX509CertificateConverter().getCertificate(certificateHolder);
    System.out.println("certFromSignedData: " + certFromSignedData);
    try {
        certFromSignedData.checkValidity(sig.getSignDate().getTime());
        System.out.println("Certificate valid at signing time");
    } catch (CertificateExpiredException ex) {
        System.err.println("Certificate expired at signing time");
    } catch (CertificateNotYetValidException ex) {
        System.err.println("Certificate not yet valid at signing time");
    }

    if (isSelfSigned(certFromSignedData)) {
        System.err.println("Certificate is self-signed, LOL!");
    } else {
        System.out.println("Certificate is not self-signed");
        // todo rest of chain
    }

    if (signerInformation.verify(new JcaSimpleSignerInfoVerifierBuilder().build(certFromSignedData))) {
        System.out.println("Signature verified");
    } else {
        System.out.println("Signature verification failed");
        throw new BusinessLogicException(Messages.ERROR_VERIFYING_PDF_SIGNATURE);
    }
}

Answer:

This is a shortcoming of the PDFBox example. Please change

signerInformation.verify(new JcaSimpleSignerInfoVerifierBuilder().build(certFromSignedData));

to

signerInformation.verify(new JcaSimpleSignerInfoVerifierBuilder().setProvider(BouncyCastleProvider.PROVIDER_NAME).build(certFromSignedData));

you may have to register BC as provider:

Security.addProvider(new BouncyCastleProvider());

Question:

I am trying to merge two existing PDF documents that are InputStreams together using the PDFMergerUtility.mergeDocuments() method in PDFBox. Here's my code; the entry method is pullDocumentsIntoSystem():

private boolean pullDocumentsIntoSystem(final String id, final String filePathAndName, final List<Letter> parsedLetters)
        throws IOException {

    final List<InputStream> pdfStreams = new ArrayList<InputStream>();
    final ByteArrayOutputStream mergedPdfOutputStream = new ByteArrayOutputStream();

            // make a call to retrieve each document
            for (final Letter letter : parsedLetters) {
                pdfStreams.add(this.getSpecificDocument(letter.getKey(), id));
            }

            // merge all the documents together
            this.mergePdfDocuments(pdfStreams, mergedPdfOutputStream);

            // write file to directory
            this.writeMergedPdfDocument(mergedPdfOutputStream, filePathAndName); //...more code below...

}

private InputStream getSpecificDocument(final String id, final String key) throws IOException {

    HttpURLConnection conn = null;
    InputStream pdfStream = null;

    try {
        final String url = this.getBaseURL() + "/letter/" + id + "/documents/" + key;

        conn = (HttpURLConnection) new URL(url).openConnection();
        conn.setRequestMethod("GET");
        conn.setRequestProperty("X-Letter-Authentication", this.getAuthenticationHeader());
        conn.setRequestProperty("Accept", "application/pdf");
        conn.setRequestProperty("Content-Type", "application/pdf");
        conn.setDoOutput(true);          

        pdfStream = connection.getInputStream();

    }
    finally {
        this.disconnect(connection);
    }

    return pdfStream;
}

    private void mergePdfDocuments(final List<InputStream> pdfStreams, final ByteArrayOutputStream mergedPdfOutputStream)
        throws IOException {

    final PDFMergerUtility merger = new PDFMergerUtility();

    merger.addSources(pdfStreams);

    merger.setDestinationStream(mergedPdfOutputStream);
    merger.mergeDocuments(MemoryUsageSetting.setupTempFileOnly());  // ERROR THROWN HERE
}

Here's the error I'm receiving on the line with the comment above:

Caused by: java.io.IOException: Missing root object specification in trailer.   
at org.apache.pdfbox.pdfparser.COSParser.parseTrailerValuesDynamically(COSParser.java:2832) ~[pdfbox-2.0.11.jar:2.0.11]     
at org.apache.pdfbox.pdfparser.PDFParser.initialParse(PDFParser.java:173) ~[pdfbox-2.0.11.jar:2.0.11]   
at org.apache.pdfbox.pdfparser.PDFParser.parse(PDFParser.java:220) ~[pdfbox-2.0.11.jar:2.0.11]  
at org.apache.pdfbox.pdmodel.PDDocument.load(PDDocument.java:1144) ~[pdfbox-2.0.11.jar:2.0.11]  
at org.apache.pdfbox.pdmodel.PDDocument.load(PDDocument.java:1060) ~[pdfbox-2.0.11.jar:2.0.11]  
at org.apache.pdfbox.multipdf.PDFMergerUtility.legacyMergeDocuments(PDFMergerUtility.java:379) ~[pdfbox-2.0.11.jar:2.0.11]  
at org.apache.pdfbox.multipdf.PDFMergerUtility.mergeDocuments(PDFMergerUtility.java:280) ~[pdfbox-2.0.11.jar:2.0.11]

I am using PDFBox 2.0.11.

My list of InputStreams are each coming from a separate HttpURLConnection.getInputStream() call in case that matters. I have confirmed that there are indeed documents coming back from the calls being made in the HttpURLConnection.

UPDATE
On the advice of @Tilman Hausherr below, I tested the same functionality without using the InputStreams. If I use PDFMergerUtility.addSource(File source) method instead the PDFMergerUtility.addSource(List<InputStream>) the merge works successfully. So it seems as if something with my InputStreams isn't working correctly.

I appreciate any help and am happy to provide more information if needed.

Thanks for your time!


Answer:

In the end this was really a silly mistake. I was closing the HttpURLConnection too early. If I remove the this.disconnect(connection) call at the end of the getSpecificDocument() method then everything works fine.

Well, hopefully this will help someone else.

Thanks for the leads @Фарид Азаев and @Tilman Hausherr!

Question:

I'm using Apache PdfBox library and I'm noticing almost everything throws an IOException, and it's quite annoying to deal with considering most of the IOExceptions should actually be illegal state exceptions but it seems apache wanted clients to deal with it so they forced them as checked exceptions. Anyway...

My question is, how to wrap the IOException into a custom exception extending RuntimeException so the API is easier to deal with?

Example:

private final PDDocument document;
private final PDPage page;
private final PDFont font;

public PdfBoxWrapper(PDDocument document, PDPage page, PDFont font)
{
    this.document = document;
    this.page = page;
    this.font = Objects.isNull(font) ? PDType1Font.HELVETICA : font;

    this.document.addPage(this.page);

    try 
    {
        this.canvas = new PDPageContentStream(this.document, this.page);
    } 

    catch (IOException exception) {
        logger.error(exception.getMessage());
    }
}

Notice how I have to wrap PDPageContentStream in a try catch. How to Wrap the IOException from PdfBox as a PdfBoxIllegalStateException ?

as shown below:

public class PdfBoxIllegalStateException extends RuntimeException
{
    public PdfBoxIllegalStateException(String message)
    {
        super(message);
    }

    public PdfBoxIllegalStateException(String message, Throwable cause)
    {
        super(message, cause);
    }
}

Answer:

You can catch the IOException, then wrap it in a PdfBoxIllegalStateException & then throw it again.

private final PDDocument document;
private final PDPage page;
private final PDFont font;

public PdfBoxWrapper(PDDocument document, PDPage page, PDFont font)
{
    this.document = document;
    this.page = page;
    this.font = Objects.isNull(font) ? PDType1Font.HELVETICA : font;

    this.document.addPage(this.page);

    try 
    {
        this.canvas = new PDPageContentStream(this.document, this.page);
    } 

    catch (IOException exception) {
        logger.error(exception.getMessage());
        throw new PdfBoxIllegalStateException(exception);
    }
}

Question:


Answer:

Is it acceptable to just ignore when a stream close throws exception?

In most cases it is, but it really depends on the context. If you decide not to handle the close exception and are looking to avoid extra try-catch block, then you can use IOUtils from commons-io library.

finally {
    IOUtils.closeQuietly(pdf);
}

This is equivalent to the following

finally {
    try {
        if (closeable != null) {
            closeable.close();
        }
    } catch (IOException ioe) {
        // ignore
    }
}

Question:

I'm trying to write to a PDF in Webdings font using PDFBox. What I'd like to appear is the symbol that is represented by character A in Webdings. However, I get an IllegalArgumentException saying that there's no glyph in my Font for the given character. Am I missing something around encoding or is Webdings simply not supported?

I use PDFBox 2.0.17, many solutions to similar problems seem outdated. I can verify Webdings in the Character Map tool and it works in programs such as word. The actual problem I have is not with Webdings but another non-public font and I don't have any alternatives to that one. Using Webdings as the problem pattern seems to be the same.

PDFont font = PDFontType0Font.load(doc, new File("pathToWebdings.ttf"));
PDPageContentStream cos = new PDPageContentStream(doc, page, AppendMode.APPEND, true);
PDStreamUtils.write(cos,"a", font, 100, 100, 100, Color.BLACK);
cos.close();

java.lang.IllegalArgumentException: No glyph for U+0061 (a) in font Webdings


Answer:

I looked at it with DTL OTMaster 3.7 light (this is free and very useful), the checkmark will work with \uf061 with the webdings font (tested in Windows 10).

(screenshot looks a bit weird because I have a 4K monitor)

Alternatively, use PDType1Font.ZAPF_DINGBATS and then \u2714 will also work.

Question:

I'm currently facing an issue on linux using the Apache PdfBox 2.0.3.

You can see from the code snippet and the exception bellow. The thing I try to do is simply set the Font and then write on a page. Unfortunatelly it's unsuccessful on our linux test and live environment using Tomcat 7 and Java 7 on Ubuntu. On my Windows development machine everything works smoothly.

I already tried to use the built in helvetica and currier fonts, the ttf you see in the snippet is downloaded from google fonts.

Do you have any idea what I'm doing wrong?

The code snippet i'm using:

PDPage page = detailsDocument.getPage(0);
        PDPageContentStream contentStream = new PDPageContentStream(detailsDocument, page, PDPageContentStream.AppendMode.APPEND, true, true);
        File fontFile = new File(getClass().getClassLoader().getResource("fonts/OpenSans-Regular.ttf").getFile());
        PDFont font = PDTrueTypeFont.load(document, fontFile, Encoding.getInstance(COSName.STANDARD_ENCODING));

        contentStream.beginText();
        contentStream.setFont( font, 22 );
        contentStream.newLineAtOffset(57, 495);
        contentStream.showText( collection.getDocument().getIndication().getName() + " - " + collection.getDocument().getTitle() );
        contentStream.endText();

The exception:

org.springframework.web.util.NestedServletException: Request processing failed; nested exception is java.lang.NullPointerException
    org.springframework.web.servlet.FrameworkServlet.processRequest(FrameworkServlet.java:981)
    org.springframework.web.servlet.FrameworkServlet.doGet(FrameworkServlet.java:860)
    javax.servlet.http.HttpServlet.service(HttpServlet.java:621)
    org.springframework.web.servlet.FrameworkServlet.service(FrameworkServlet.java:845)
    javax.servlet.http.HttpServlet.service(HttpServlet.java:722)
    sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
    sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    java.lang.reflect.Method.invoke(Method.java:606)
    org.apache.catalina.security.SecurityUtil$1.run(SecurityUtil.java:274)
    org.apache.catalina.security.SecurityUtil$1.run(SecurityUtil.java:271)
    java.security.AccessController.doPrivileged(Native Method)
    javax.security.auth.Subject.doAsPrivileged(Subject.java:536)
    org.apache.catalina.security.SecurityUtil.execute(SecurityUtil.java:306)
    org.apache.catalina.security.SecurityUtil.doAsPrivilege(SecurityUtil.java:166)

root cause

java.lang.NullPointerException
    org.apache.pdfbox.pdmodel.font.PDFont.encode(PDFont.java:311)
    org.apache.pdfbox.pdmodel.PDPageContentStream.showText(PDPageContentStream.java:414)
    hu.jacsomedia.tms.webapp.pdfbuilder.generators.PdfGenerator.addOrderDetailsPage(PdfGenerator.java:361)
    hu.jacsomedia.tms.webapp.pdfbuilder.generators.PdfGenerator.generatePdf(PdfGenerator.java:68)
    hu.jacsomedia.tms.webapp.pdfbuilder.generators.PdfGenerator.generatePdfToOMR(PdfGenerator.java:268)
    hu.jacsomedia.tms.webapp.pdfbuilder.generators.PdfGenerator$$FastClassBySpringCGLIB$$fcaef06.invoke(<generated>)
    org.springframework.cglib.proxy.MethodProxy.invoke(MethodProxy.java:204)
    org.springframework.aop.framework.CglibAopProxy$CglibMethodInvocation.invokeJoinpoint(CglibAopProxy.java:720)
    org.springframework.aop.framework.ReflectiveMethodInvocation.proceed(ReflectiveMethodInvocation.java:157)
    org.springframework.transaction.interceptor.TransactionInterceptor$1.proceedWithInvocation(TransactionInterceptor.java:99)
    org.springframework.transaction.interceptor.TransactionAspectSupport.invokeWithinTransaction(TransactionAspectSupport.java:281)
    org.springframework.transaction.interceptor.TransactionInterceptor.invoke(TransactionInterceptor.java:96)
    org.springframework.aop.framework.ReflectiveMethodInvocation.proceed(ReflectiveMethodInvocation.java:179)
    org.springframework.aop.framework.CglibAopProxy$DynamicAdvisedInterceptor.intercept(CglibAopProxy.java:655)
    hu.jacsomedia.tms.webapp.pdfbuilder.generators.PdfGenerator$$EnhancerBySpringCGLIB$$55740b22.generatePdfToOMR(<generated>)
    hu.jacsomedia.tms.webapp.pdfbuilder.controller.PdfGeneratorController.generate(PdfGeneratorController.java:139)
    sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
    sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    java.lang.reflect.Method.invoke(Method.java:606)
    org.springframework.web.method.support.InvocableHandlerMethod.doInvoke(InvocableHandlerMethod.java:222)
    org.springframework.web.method.support.InvocableHandlerMethod.invokeForRequest(InvocableHandlerMethod.java:137)
    org.springframework.web.servlet.mvc.method.annotation.ServletInvocableHandlerMethod.invokeAndHandle(ServletInvocableHandlerMethod.java:110)
    org.springframework.web.servlet.mvc.method.annotation.RequestMappingHandlerAdapter.invokeHandlerMethod(RequestMappingHandlerAdapter.java:814)
    org.springframework.web.servlet.mvc.method.annotation.RequestMappingHandlerAdapter.handleInternal(RequestMappingHandlerAdapter.java:737)
    org.springframework.web.servlet.mvc.method.AbstractHandlerMethodAdapter.handle(AbstractHandlerMethodAdapter.java:85)
    org.springframework.web.servlet.DispatcherServlet.doDispatch(DispatcherServlet.java:959)
    org.springframework.web.servlet.DispatcherServlet.doService(DispatcherServlet.java:893)
    org.springframework.web.servlet.FrameworkServlet.processRequest(FrameworkServlet.java:969)
    org.springframework.web.servlet.FrameworkServlet.doGet(FrameworkServlet.java:860)
    javax.servlet.http.HttpServlet.service(HttpServlet.java:621)
    org.springframework.web.servlet.FrameworkServlet.service(FrameworkServlet.java:845)
    javax.servlet.http.HttpServlet.service(HttpServlet.java:722)
    sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
    sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    java.lang.reflect.Method.invoke(Method.java:606)
    org.apache.catalina.security.SecurityUtil$1.run(SecurityUtil.java:274)
    org.apache.catalina.security.SecurityUtil$1.run(SecurityUtil.java:271)
    java.security.AccessController.doPrivileged(Native Method)
    javax.security.auth.Subject.doAsPrivileged(Subject.java:536)
    org.apache.catalina.security.SecurityUtil.execute(SecurityUtil.java:306)
    org.apache.catalina.security.SecurityUtil.doAsPrivilege(SecurityUtil.java:166)

Answer:

Actully both of the commenters was right. I did the following two things to fix the issue:

  • In deed there was 2.0.0 and 2.0.3 versions in my classpath. I had to redownload everything with maven.
  • According to mlk's comment I changed the encoding ot COSName.WIN_ANSI_ENCODING

Additionally I made a double check for every variable I try to write to the file, to prevent NULL parameters ad showText calls.

Question:

I''m trying to run the sample code from pdfbox examples but the code finishes with creating PDF with signature with exceptions below and not able to verify signature when I open it in Adobe PDF any help?

Exception in thread "main" java.lang.VerifyError: (class: org/bouncycastle/cms/CMSSignedGenerator, method: getAttributeSet signature: (Lorg/bouncycastle/asn1/cms/AttributeTable;)Lorg/bouncycastle/asn1/ASN1Set;) Incompatible argument to function at org.apache.pdfbox.examples.signature.CreateVisibleSignature.sign(CreateVisibleSignature.java:218) at org.apache.pdfbox.pdfwriter.COSWriter.doWriteSignature(COSWriter.java:784) at org.apache.pdfbox.pdfwriter.COSWriter.visitFromDocument(COSWriter.java:1171) at org.apache.pdfbox.cos.COSDocument.accept(COSDocument.java:568) at org.apache.pdfbox.pdfwriter.COSWriter.write(COSWriter.java:1517) at org.apache.pdfbox.pdmodel.PDDocument.saveIncremental(PDDocument.java:1391) at org.apache.pdfbox.examples.signature.CreateVisibleSignature.signPDF(CreateVisibleSignature.java:193) at org.apache.pdfbox.examples.signature.CreateVisibleSignature.main(CreateVisibleSignature.java:318)

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.pdfbox.examples.signature;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.security.KeyStore;
import java.security.KeyStoreException;
import java.security.NoSuchAlgorithmException;
import java.security.PrivateKey;
import java.security.Security;
import java.security.UnrecoverableKeyException;
import java.security.cert.CertStore;
import java.security.cert.Certificate;
import java.security.cert.CertificateException;
import java.security.cert.CollectionCertStoreParameters;
import java.security.cert.X509Certificate;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Enumeration;
import java.util.List;

import org.apache.pdfbox.exceptions.COSVisitorException;
import org.apache.pdfbox.exceptions.SignatureException;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.interactive.digitalsignature.PDSignature;
import org.apache.pdfbox.pdmodel.interactive.digitalsignature.SignatureInterface;
import org.apache.pdfbox.pdmodel.interactive.digitalsignature.SignatureOptions;
import org.apache.pdfbox.pdmodel.interactive.digitalsignature.visible.PDVisibleSigProperties;
import org.apache.pdfbox.pdmodel.interactive.digitalsignature.visible.PDVisibleSignDesigner;
import org.bouncycastle.cms.CMSSignedData;
import org.bouncycastle.cms.CMSSignedDataGenerator;
import org.bouncycastle.cms.CMSSignedGenerator;
import org.bouncycastle.jce.provider.BouncyCastleProvider;

/**
 * <p>
 * This is an example for signing a pdf with bouncy castle.
 * </p>
 * <p>
 * And also you can create visible signature too
 * </p>
 * <p>
 * A keystore can be created with the java keytool (e.g. keytool -genkeypair
 * -storepass 123456 -storetype pkcs12 -alias test -validity 365 -v -keyalg RSA
 * -keystore keystore.p12 )
 * </p>
 * 
 * @author Vakhtang koroghlishvili (Gogebashvili)
 */
public class CreateVisibleSignature implements SignatureInterface {

    private static BouncyCastleProvider provider = new BouncyCastleProvider();

    private PrivateKey privKey;

    private Certificate[] cert;

    private SignatureOptions options;


    // statically add provider, if it is not already there
    private static final boolean firstProvider = true;  
    static {
            if(Security.getProvider("BC") == null) {
                BouncyCastleProvider bcProv = new BouncyCastleProvider();
                if(firstProvider) {
                    Security.insertProviderAt(bcProv, 1);
                } else {
                    Security.addProvider(bcProv);
                }
            }
        }

    /**
     * Initialize the signature creator with a keystore (pkcs12) and pin that
     * should be used for the signature.
     * 
     * @param keystore
     *            is a pkcs12 keystore.
     * @param pin
     *            is the pin for the keystore / private key
     */
    public CreateVisibleSignature(KeyStore keystore, char[] pin) {
        try {
            /*
             * grabs the first alias from the keystore and get the private key.
             * An alternative method or constructor could be used for setting a
             * specific alias that should be used.
             */
            Enumeration<String> aliases = keystore.aliases();
            String alias = null;
            if (aliases.hasMoreElements()) {
                alias = aliases.nextElement();
            } else {
                throw new RuntimeException("Could not find alias");
            }
            privKey = (PrivateKey) keystore.getKey(alias, pin);
            cert = keystore.getCertificateChain(alias);
        } catch (KeyStoreException e) {
            e.printStackTrace();
        } catch (UnrecoverableKeyException e) {
            System.err.println("Could not extract private key.");
            e.printStackTrace();
        } catch (NoSuchAlgorithmException e) {
            System.err.println("Unknown algorithm.");
            e.printStackTrace();
        }
    }

    /**
     * Signs the given pdf file.
     * 
     * @param document
     *            is the pdf document
     * @param signatureProperties
     * @return the signed pdf document
     * @throws IOException
     * @throws COSVisitorException
     * @throws SignatureException
     */
    public File signPDF(File document,
            PDVisibleSigProperties signatureProperties) throws IOException,
            COSVisitorException, SignatureException {
        byte[] buffer = new byte[8 * 1024];
        if (document == null || !document.exists()) {
            new RuntimeException("Document for signing does not exist");
        }

        // creating output document and prepare the IO streams.
        String name = document.getName();
        String substring = name.substring(0, name.lastIndexOf("."));

        File outputDocument = new File(document.getParent(), substring
                + "_signed.pdf");
        FileInputStream fis = new FileInputStream(document);
        FileOutputStream fos = new FileOutputStream(outputDocument);

        int c;
        while ((c = fis.read(buffer)) != -1) {
            fos.write(buffer, 0, c);
        }
        fis.close();
        fis = new FileInputStream(outputDocument);

        // load document
        PDDocument doc = PDDocument.load(document);

        // create signature dictionary
        PDSignature signature = new PDSignature();
        signature.setFilter(PDSignature.FILTER_ADOBE_PPKLITE); // default filter
        // subfilter for basic and PAdES Part 2 signatures
        signature.setSubFilter(PDSignature.SUBFILTER_ADBE_PKCS7_DETACHED);
        signature.setName("signer name");
        signature.setLocation("signer location");
        signature.setReason("reason for signature");

        // the signing date, needed for valid signature
        signature.setSignDate(Calendar.getInstance());

        // register signature dictionary and sign interface

        if (signatureProperties != null
                && signatureProperties.isVisualSignEnabled()) {
            options = new SignatureOptions();
            options.setVisualSignature(signatureProperties);
            // options.setPage(signatureProperties.getPage());
            // options.setPreferedSignatureSize(signatureProperties.getPreferredSize());
            doc.addSignature(signature, this, options);
        } else {
            doc.addSignature(signature, this);
        }

        // write incremental (only for signing purpose)
        doc.saveIncremental(fis, fos);

        return outputDocument;
    }

    /**
     * <p>
     * SignatureInterface implementation.
     * </p>
     * <p>
     * This method will be called from inside of the pdfbox and create the pkcs7
     * signature. The given InputStream contains the bytes that are providen by
     * the byte range.
     * </p>
     * <p>
     * This method is for internal use only.
     * </p>
     * <p>
     * Here the user should use his favorite cryptographic library and implement
     * a pkcs7 signature creation.
     * </p>
     */
    public byte[] sign(InputStream content) throws SignatureException,
            IOException {
        CMSProcessableInputStream input = new CMSProcessableInputStream(content);
        CMSSignedDataGenerator gen = new CMSSignedDataGenerator();
        // CertificateChain
        List<Certificate> certList = Arrays.asList(cert);

        CertStore certStore = null;
        try {
            certStore = CertStore.getInstance("Collection",
                    new CollectionCertStoreParameters(certList), provider);
            gen.addSigner(privKey, (X509Certificate) certList.get(0),
                    CMSSignedGenerator.DIGEST_SHA256);
            gen.addCertificatesAndCRLs(certStore);
            CMSSignedData signedData = gen.generate(input, false, provider);
            return signedData.getEncoded();
        } catch (Exception e) {
            // should be handled
            System.err.println("Error while creating pkcs7 signature.");
            e.printStackTrace();
        }
        throw new RuntimeException("Problem while preparing signature");
    }

    /**
     * Arguments are [0] key store [1] pin [2] document that will be signed [3]
     * image of visible signature
     */


    public static void main(String[] args) throws KeyStoreException,
            NoSuchAlgorithmException, CertificateException,
            FileNotFoundException, IOException, COSVisitorException,
            SignatureException {

        if(Security.getProvider("BC") != null) {
            System.out.printf("Bouncy Castle Added!!!");
        }else{
            System.out.printf("Bouncy Castle Not Found!!!!!!!!");
        }

        File ksFile = new File("keystore.p12");
        KeyStore keystore = KeyStore.getInstance("PKCS12", provider);
        char[] pin = "123456".toCharArray();
        keystore.load(new FileInputStream(ksFile), pin);

        File document = new File("doc.pdf");

        CreateVisibleSignature signing = new CreateVisibleSignature(
                keystore, pin.clone());

        FileInputStream image = new FileInputStream("sign.jpg");

        PDVisibleSignDesigner visibleSig = new PDVisibleSignDesigner(
                "doc.pdf", image, 1);
        visibleSig.xAxis(0).yAxis(0).zoom(-50)
                .signatureFieldName("signature");

        PDVisibleSigProperties signatureProperties = new PDVisibleSigProperties();

        signatureProperties.signerName("name").signerLocation("location")
                .signatureReason("Security").preferredSize(0).page(1)
                .visualSignEnabled(true).setPdVisibleSignature(visibleSig)
                .buildSignature();

        signing.signPDF(document, signatureProperties);

    }

    /**
     * This will print the usage for this program.
     */
    private static void usage() {
        System.err.println("Usage: java " + CreateSignature.class.getName()
                + " <pkcs12-keystore-file> <pin> <input-pdf> <sign-image>");
    }
}

Answer:

Use version 1.44 of the Bouncy Castle libs, as mentioned here: https://pdfbox.apache.org/dependencies.html

The Bouncy Castle libs are often not backwards compatible, that is why.

Question:

So I am kind-a stuck in a strange situation, explained briefly, I have a web form that when filled fills a PDF template using PDFbox and saves it in given location. Now the App works fine in my eclipse environment but when I run it on a test tomcat server it fails with the following :

ERROR c.j.h.controller.HomeController - I=0   fields=FullName{type: PDTextField value: null}
ERROR c.j.h.controller.HomeController - I=0   fields=FullName{type: PDTextField value: COSString{fsdf}}
ERROR c.j.h.controller.HomeController - I=1   fields=Address{type: PDTextField value: null}
ERROR o.s.b.web.support.ErrorPageFilter - Forwarding to error page from request [/save] due to exception [java.io.IOException: Stream closed]
        java.lang.InternalError: java.io.IOException: Stream closed
                at sun.util.locale.provider.BreakIteratorProviderImpl.getBreakInstance(BreakIteratorProviderImpl.java:178)
                at sun.util.locale.provider.BreakIteratorProviderImpl.getLineInstance(BreakIteratorProviderImpl.java:106)
                at java.text.BreakIterator.createBreakInstance(BreakIterator.java:571)
                at java.text.BreakIterator.createBreakInstance(BreakIterator.java:553)
                at java.text.BreakIterator.getBreakInstance(BreakIterator.java:544)
                at java.text.BreakIterator.getLineInstance(BreakIterator.java:483)
                at java.text.BreakIterator.getLineInstance(BreakIterator.java:470)
                at org.apache.pdfbox.pdmodel.interactive.form.PlainText$Paragraph.getLines(PlainText.java:159)
                at org.apache.pdfbox.pdmodel.interactive.form.PlainTextFormatter.format(PlainTextFormatter.java:182)
                at org.apache.pdfbox.pdmodel.interactive.form.AppearanceGeneratorHelper.insertGeneratedAppearance(AppearanceGeneratorHelper.java:422)
                at org.apache.pdfbox.pdmodel.interactive.form.AppearanceGeneratorHelper.setAppearanceContent(AppearanceGeneratorHelper.java:288)
                at org.apache.pdfbox.pdmodel.interactive.form.AppearanceGeneratorHelper.setAppearanceValue(AppearanceGeneratorHelper.java:170)
                at org.apache.pdfbox.pdmodel.interactive.form.PDTextField.constructAppearances(PDTextField.java:263)
                at org.apache.pdfbox.pdmodel.interactive.form.PDTerminalField.applyChange(PDTerminalField.java:228)
                at org.apache.pdfbox.pdmodel.interactive.form.PDTextField.setValue(PDTextField.java:218)
                at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
                at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
                at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
                at java.lang.reflect.Method.invoke(Method.java:498)
                at org.springframework.web.method.support.InvocableHandlerMethod.doInvoke(InvocableHandlerMethod.java:205)
                at org.springframework.web.method.support.InvocableHandlerMethod.invokeForRequest(InvocableHandlerMethod.java:133)
                at org.springframework.web.servlet.mvc.method.annotation.ServletInvocableHandlerMethod.invokeAndHandle(ServletInvocableHandlerMethod.java:97)
                at org.springframework.web.servlet.mvc.method.annotation.RequestMappingHandlerAdapter.invokeHandlerMethod(RequestMappingHandlerAdapter.java:827)
                at org.springframework.web.servlet.mvc.method.annotation.RequestMappingHandlerAdapter.handleInternal(RequestMappingHandlerAdapter.java:738)
                at org.springframework.web.servlet.mvc.method.AbstractHandlerMethodAdapter.handle(AbstractHandlerMethodAdapter.java:85)
                at org.springframework.web.servlet.DispatcherServlet.doDispatch(DispatcherServlet.java:967)
                at org.springframework.web.servlet.DispatcherServlet.doService(DispatcherServlet.java:901)
                at org.springframework.web.servlet.FrameworkServlet.processRequest(FrameworkServlet.java:970)
                at org.springframework.web.servlet.FrameworkServlet.doPost(FrameworkServlet.java:872)
                at javax.servlet.http.HttpServlet.service(HttpServlet.java:650)
                at org.springframework.web.servlet.FrameworkServlet.service(FrameworkServlet.java:846)
                at javax.servlet.http.HttpServlet.service(HttpServlet.java:731)
                at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:303)
                at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:208)
                at org.apache.tomcat.websocket.server.WsFilter.doFilter(WsFilter.java:52)
                at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:241)
                at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:208)
                at org.springframework.web.filter.RequestContextFilter.doFilterInternal(RequestContextFilter.java:99)
                at org.springframework.web.filter.OncePerRequestFilter.doFilter(OncePerRequestFilter.java:107)
                at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:241)
                at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:208)
                at org.springframework.web.filter.HttpPutFormContentFilter.doFilterInternal(HttpPutFormContentFilter.java:108)
                at org.springframework.web.filter.OncePerRequestFilter.doFilter(OncePerRequestFilter.java:107)
                at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:241)
                at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:208)
    at org.springframework.web.filter.HiddenHttpMethodFilter.doFilterInternal(HiddenHttpMethodFilter.java:81)
            at org.springframework.web.filter.OncePerRequestFilter.doFilter(OncePerRequestFilter.java:107)
            at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:241)
            at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:208)
            at org.springframework.web.filter.CharacterEncodingFilter.doFilterInternal(CharacterEncodingFilter.java:197)
            at org.springframework.web.filter.OncePerRequestFilter.doFilter(OncePerRequestFilter.java:107)
            at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:241)
            at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:208)
            at org.springframework.boot.web.support.ErrorPageFilter.doFilter(ErrorPageFilter.java:115)
            at org.springframework.boot.web.support.ErrorPageFilter.access$000(ErrorPageFilter.java:59)
            at org.springframework.boot.web.support.ErrorPageFilter$1.doFilterInternal(ErrorPageFilter.java:90)
            at org.springframework.web.filter.OncePerRequestFilter.doFilter(OncePerRequestFilter.java:107)
            at org.springframework.boot.web.support.ErrorPageFilter.doFilter(ErrorPageFilter.java:108)
            at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:241)
            at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:208)
            at org.apache.catalina.core.StandardWrapperValve.invoke(StandardWrapperValve.java:218)
            at org.apache.catalina.core.StandardContextValve.invoke(StandardContextValve.java:110)
            at org.apache.catalina.authenticator.AuthenticatorBase.invoke(AuthenticatorBase.java:506)
            at org.apache.catalina.core.StandardHostValve.invoke(StandardHostValve.java:169)
            at org.apache.catalina.valves.ErrorReportValve.invoke(ErrorReportValve.java:103)
            at org.apache.catalina.valves.AccessLogValve.invoke(AccessLogValve.java:962)
            at org.apache.catalina.valves.AccessLogValve.invoke(AccessLogValve.java:962)
            at org.apache.catalina.core.StandardEngineValve.invoke(StandardEngineValve.java:116)
            at org.apache.catalina.connector.CoyoteAdapter.service(CoyoteAdapter.java:445)
            at org.apache.coyote.http11.AbstractHttp11Processor.process(AbstractHttp11Processor.java:1087)
            at org.apache.coyote.AbstractProtocol$AbstractConnectionHandler.process(AbstractProtocol.java:637)
            at org.apache.tomcat.util.net.JIoEndpoint$SocketProcessor.run(JIoEndpoint.java:318)
            at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
            at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
            at org.apache.tomcat.util.threads.TaskThread$WrappingRunnable.run(TaskThread.java:61)
            at java.lang.Thread.run(Thread.java:748)
    Caused by: java.io.IOException: Stream closed
            at java.io.BufferedInputStream.getInIfOpen(BufferedInputStream.java:159)
            at java.io.BufferedInputStream.fill(BufferedInputStream.java:246)
            at java.io.BufferedInputStream.read1(BufferedInputStream.java:286)
            at java.io.BufferedInputStream.read(BufferedInputStream.java:345)
            at java.io.FilterInputStream.read(FilterInputStream.java:107)
            at sun.util.locale.provider.RuleBasedBreakIterator.readFile(RuleBasedBreakIterator.java:462)
            at sun.util.locale.provider.RuleBasedBreakIterator.readTables(RuleBasedBreakIterator.java:375)
            at sun.util.locale.provider.RuleBasedBreakIterator.<init>(RuleBasedBreakIterator.java:321)
            at sun.util.locale.provider.BreakIteratorProviderImpl.getBreakInstance(BreakIteratorProviderImpl.java:169)
            ... 76 common frames omitted

Now If you look at the top of the errors you can see a few ERROR messages that I have created to help identify where it goes wrong. Now my specific code looks like :

InputStream template = HomeController.class.getResourceAsStream("template");
PDDocument pdfDocument = PDDocument.load(template);
PDDocumentCatalog pdfCatalog = pdfDocument.getDocumentCatalog();
PDAcroForm acroForm = pdfCatalog.getAcroForm();
PDPageTree pages = pdfCatalog.getPages();    
ArrayList<String> myObjectList = new ArrayList<String>();
List<PDField> fields = acroForm.getFields();
for(int i = 0; i< firstAidList.size(); i++) {
               log.error("I="+ i + "   " + "fields=" + fields.get(i));
               fields.get(i).setValue(myObjectList.get(i));
               log.error("I="+ i + "   " + "fields=" + fields.get(i));
}
// Few more stuff done then I close
pdfDocument.close();
template.close();

Basically myObjectList is the data I get from the web form, the fields is the fields I get from my pdf template and in the for loop I just set them up - but if you look at the code and the first error messages the problem occurs when trying to set the 2nd value. As it works in my eclipse but not the tomcat server(separate from the eclipse one) one of my thoughts is towards some permissions on the tomcat server but have no idea how to "prove" it.


Answer:

Seems the solution was to restart the tomcat server. Related to problems with BreakIterator after java updates on the server.

Question:

While trying to decrypt an encrypted PDF using PDFBox, I am facing an exception -

java.lang.ArrayIndexOutOfBoundsException
 at java.lang.System.arraycopy(Native Method)
 at org.apache.pdfbox.pdfmodel.encryption.StandardSecurityHandler.computeEncryptedKey
 at org.apache.pdfbox.pdfmodel.encryption.StandardSecurityHandler.computeUserPassword

The code I am using is

PDDocument doc = PDDocument.load(file);
if (doc.isEncrypted()) {
   StandardDecryptionMaterial dm = new StandardDecryptionMaterial(password);
   doc.openProtection(dm);
}

The exception happens at openProtection method. I also tried doc.decrypt method, but with the same result.

My maven contains

<dependency>
  <groupId>org.apache.pdfbox</groupId>
  <artifactId>pdfbox</artifactId>
  <version>1.8.13</version>
</dependency>
<dependency>
  <groupId>org.bouncycastle</groupId>
  <artifactId>bcprov-jdk15</artifactId>
  <version>1.44</version>
</dependency>
<dependency>
  <groupId>org.bouncycastle</groupId>
  <artifactId>bcmail-jdk15</artifactId>
  <version>1.44</version>
</dependency>

Java version is 1.7

I tried multiple version of PDFBox/bouncyCastle (1.8.7/1.46) - but still this exception persists.

If I try qpdf --decrypt to decrypt the file before hand and it is working perfectly. But when I try to decrypt using code, it fails with the previous exception.

I am using this inside a Spring Boot application - Can that be a reason?

If anyone can point me to a possible solution, I would be much obliged.


Answer:

Thank you, Tilman Hausherr.

I tried PDFBox 2.0.7 and it worked perfectly.

Question:

I'd like to get all filenames of attachments/embedded files of a PDF document. I've been searching for a long time now, but my code still doesn't work.

What I tried:

File input = new File(inputfile); // Input File Path, Given as param from args[]
pd = PDDocument.load(input);
PDDocumentNameDictionary names = new PDDocumentNameDictionary(pd.getDocumentCatalog());
PDEmbeddedFilesNameTreeNode efTree = names.getEmbeddedFiles();
Map<String, COSObjectable> existedNames = efTree.getNames();

System.out.println(existedNames);//Print Embedded-Filenames to console
pd.close();

I don't know if it is even possible to print the content of a MAP to console. I'm coding in eclipse which doesn't give me any errors. But when I run the jar File I get always: NullPointerException at org.apache.pdfbox.pdmodel.PDDocument.getDocumentCatalog(PDDocument.java:778)

Any ideas or help? Many thanks...


Answer:

Here's the ExtractEmbeddedFiles example from the source code download:

public final class ExtractEmbeddedFiles
{
    private ExtractEmbeddedFiles()
    {
    }

    /**
     * This is the main method.
     *
     * @param args The command line arguments.
     *
     * @throws IOException If there is an error parsing the document.
     */
    public static void main( String[] args ) throws IOException
    {
        if( args.length != 1 )
        {
            usage();
            System.exit(1);
        }
        else
        {
            PDDocument document = null;
            try
            {
                File pdfFile = new File(args[0]);
                String filePath = pdfFile.getParent() + System.getProperty("file.separator");
                document = PDDocument.load(pdfFile );
                PDDocumentNameDictionary namesDictionary = 
                        new PDDocumentNameDictionary( document.getDocumentCatalog() );
                PDEmbeddedFilesNameTreeNode efTree = namesDictionary.getEmbeddedFiles();
                if (efTree != null)
                {
                    Map<String, PDComplexFileSpecification> names = efTree.getNames();
                    if (names != null)
                    {
                        extractFiles(names, filePath);
                    }
                    else
                    {
                        List<PDNameTreeNode<PDComplexFileSpecification>> kids = efTree.getKids();
                        for (PDNameTreeNode<PDComplexFileSpecification> node : kids)
                        {
                            names = node.getNames();
                            extractFiles(names, filePath);
                        }
                    }
                }

                // extract files from annotations
                for (PDPage page : document.getPages())
                {
                    for (PDAnnotation annotation : page.getAnnotations())
                    {
                        if (annotation instanceof PDAnnotationFileAttachment)
                        {
                            PDAnnotationFileAttachment annotationFileAttachment = (PDAnnotationFileAttachment) annotation;
                            PDComplexFileSpecification fileSpec = (PDComplexFileSpecification) annotationFileAttachment.getFile();
                            PDEmbeddedFile embeddedFile = getEmbeddedFile(fileSpec);
                            extractFile(filePath, fileSpec.getFilename(), embeddedFile);
                        }
                    }
                }

            }
            finally
            {
                if( document != null )
                {
                    document.close();
                }
            }
        }
    }

    private static void extractFiles(Map<String, PDComplexFileSpecification> names, String filePath) 
            throws IOException
    {
        for (Entry<String, PDComplexFileSpecification> entry : names.entrySet())
        {
            String filename = entry.getKey();
            PDComplexFileSpecification fileSpec = entry.getValue();
            PDEmbeddedFile embeddedFile = getEmbeddedFile(fileSpec);
            extractFile(filePath, filename, embeddedFile);
        }
    }

    private static void extractFile(String filePath, String filename, PDEmbeddedFile embeddedFile)
            throws IOException
    {
        String embeddedFilename = filePath + filename;
        File file = new File(filePath + filename);
        System.out.println("Writing " + embeddedFilename);
        FileOutputStream fos = null;
        try
        {
            fos = new FileOutputStream(file);
            fos.write(embeddedFile.toByteArray());
        }
        finally
        {
            IOUtils.closeQuietly(fos);
        }
    }

    private static PDEmbeddedFile getEmbeddedFile(PDComplexFileSpecification fileSpec )
    {
        // search for the first available alternative of the embedded file
        PDEmbeddedFile embeddedFile = null;
        if (fileSpec != null)
        {
            embeddedFile = fileSpec.getEmbeddedFileUnicode(); 
            if (embeddedFile == null)
            {
                embeddedFile = fileSpec.getEmbeddedFileDos();
            }
            if (embeddedFile == null)
            {
                embeddedFile = fileSpec.getEmbeddedFileMac();
            }
            if (embeddedFile == null)
            {
                embeddedFile = fileSpec.getEmbeddedFileUnix();
            }
            if (embeddedFile == null)
            {
                embeddedFile = fileSpec.getEmbeddedFile();
            }
        }
        return embeddedFile;
    }

    /**
     * This will print the usage for this program.
     */
    private static void usage()
    {
        System.err.println( "Usage: java " + ExtractEmbeddedFiles.class.getName() + " <input-pdf>" );
    }
}

Question:

I am fetching some problem when I trying to print Arabic letter by using DirectPrint bean. This is a pjc. English fonts are printed fine, but when I want to print Arabic there is showing a exception below:

Exception in thread "main" java.lang.RuntimeException: Not yet implemented
at org.pdfbox.pdmodel.font.PDType0Font.drawString(PDType0Font.java:75)
at org.pdfbox.pdfviewer.PageDrawer.showCharacter(PageDrawer.java:160)
at org.pdfbox.util.PDFStreamEngine.showString(PDFStreamEngine.java:409)
at org.pdfbox.util.operator.ShowTextGlyph.process(ShowTextGlyph.java:80)
at org.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:452)
at org.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:215)
at org.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:174)
at org.pdfbox.pdfviewer.PageDrawer.drawPage(PageDrawer.java:104)
at org.pdfbox.pdmodel.PDPage.print(PDPage.java:741)
at sun.print.RasterPrinterJob.printPage(RasterPrinterJob.java:1936)
at sun.print.RasterPrinterJob.print(RasterPrinterJob.java:1431)
at sun.print.RasterPrinterJob.print(RasterPrinterJob.java:1247)
at dsd.printing.DirectPrint.main(DirectPrint.java:842)

Please help to solve this issue.


Answer:

Seems like the Arabic characters are not implemented to be converted by the pdfbox library you're using.