package lia.handlingtypes.pdf;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.IOException;
import org.pdfbox.cos.COSDocument;
import org.pdfbox.pdfparser.PDFParser;
import org.pdfbox.pdmodel.PDDocument;
import org.pdfbox.pdmodel.PDDocumentInformation;
import org.pdfbox.encryption.DecryptDocument;
import org.pdfbox.exceptions.InvalidPasswordException;
import org.pdfbox.exceptions.CryptographyException;
import org.pdfbox.util.PDFTextStripper;
public class PDFBoxPDFHandler{
public static String password = "";
public Document getDocument(InputStream is)
throws Exception {
COSDocument cosDoc = null;
try {
cosDoc = parseDocument(is);
}
catch (IOException e) {
closeCOSDocument(cosDoc);
throw new Exception(
"Cannot parse PDF document", e);
}
// decrypt the PDF document, if it is encrypted
try {
if (cosDoc.isEncrypted()) {
DecryptDocument decryptor = new DecryptDocument(cosDoc);
decryptor.decryptDocument(password);
}
}