Total Pageviews

Showing posts with label Apache Project. Show all posts
Showing posts with label Apache Project. Show all posts

2016/07/09

[PDFBox] No ImageWriter found for 'tif' format in WebSphere

Problem
As I try to convert PDF to TIF via PDFBox in WebSphere 8, I got the error message during the conversion process:
1
2
[Line:198][org.apache.pdfbox.tools.imageio.ImageIOUtil.writeImage]No ImageWriter found for 'tif' format
[Line:206][org.apache.pdfbox.tools.imageio.ImageIOUtil.writeImage]Supported formats: BMP bmp jpg JPG wbmp jpeg png PNG JPEG WBMP GIF gif 


Lacking of jai-imageio-core jar file in your classpath must get this error message.
But I have add jai-imageio-core dependency in pom.xml.
Snippet of pom.xml looks like:
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
<!-- for generating tif file -->
<dependency>
    <groupId>org.apache.pdfbox</groupId>
    <artifactId>pdfbox-tools</artifactId>
    <version>2.0.0</version>
</dependency>

<dependency>
    <groupId>com.github.jai-imageio</groupId>
    <artifactId>jai-imageio-core</artifactId>
    <version>1.3.1</version>
</dependency>

Therefore, I try to print log to figure out the weird problem:
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
 try {
     Enumeration<URL> urls = Thread.currentThread().getContextClassLoader()
             .getResources("META-INF/services/javax.imageio.spi.ImageWriterSpi");
     while (urls.hasMoreElements()) {
         log.info("[convertToTiff] url = " + urls.nextElement().toString());
     }
 
 } catch (IOException e1) {
     e1.printStackTrace();
     throw new RuntimeException(e1);
 }

 The jar file exists in my ear file ! What happened?
1
 [convertToTiff] url = wsjar:file:/usr/IBM/WebSphere/AppServer/profiles/AppSrv03/installedApps/FDCSRA205Node03Cell/yuantalife-ecp-manage.ear/yuantalife-ecp-manage-war-0.1.0-SNAPSHOT.war/WEB-INF/lib/jai-imageio-core-1.3.1.jar!/META-INF/services/javax.imageio.spi.ImageWriterSpi

Here is my source code:
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
package albert.practice.file;

import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.Enumeration;

import javax.imageio.ImageIO;

import lombok.extern.slf4j.Slf4j;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.ImageType;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.apache.pdfbox.tools.imageio.ImageIOUtil;

@Slf4j
public class PdfFileToTif {

    private final float dpi = 300f;

    public static void main(String[] args) {
        File pdfFile = new File("D:\\dropbox\\Getting Started.pdf");
        String destination = "D:\\dropbox\\";

        PdfFileToTif test = new PdfFileToTif();
        test.checkImageIoJarFile();
        test.convertPdfToTif(pdfFile, destination);
    }

    public void convertPdfToTif(File pdfFile, String destination) {
        if (!isFileExisted(pdfFile)) {
            throw new RuntimeException("File not found ! (" + pdfFile.getAbsolutePath() + ")");
        }

        String pdfFileName = pdfFile.getName();

        try {
            // load PDF document
            PDDocument document = PDDocument.load(pdfFile);

            // create PDF renderer
            PDFRenderer renderer = new PDFRenderer(document);

            // go through each page of PDF, and generate TIF for each PDF page.
            for (int i = 0; i < document.getNumberOfPages(); i++) {
                // Returns the given page as an RGB image with 300 DPI.
                BufferedImage image = renderer.renderImageWithDPI(i, dpi, ImageType.BINARY);

                // Assign the file name of TIF
                String fileName = pdfFileName + "_" + String.format("%02d", i + 1);
                log.debug("Generating  " + fileName + ".tif to " + destination);

                // Writes a buffered image to a file using the given image format.
                ImageIOUtil.writeImage(image, destination + fileName + ".tif", Math.round(dpi));
                image.flush();
            }
            log.debug("PDF to TIF conversion well done!");
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    /**
     * 判斷檔案是否存在
     * 
     * @param file
     * @return true - 檔案存在; false - 檔案不存在
     */
    private Boolean isFileExisted(File file) {
        Boolean isExisted = Boolean.FALSE;
        isExisted = (file.exists() && (!file.isDirectory()));
        return isExisted;
    }

    private void checkImageIoJarFile() {
        try {
            Enumeration<URL> urls = Thread.currentThread().getContextClassLoader()
                    .getResources("META-INF/services/javax.imageio.spi.ImageWriterSpi");
            while (urls.hasMoreElements()) {
                log.info("[convertToTiff] urls = " + urls.nextElement().toString());
            }

        } catch (IOException e1) {
            e1.printStackTrace();
            throw new RuntimeException(e1);
        }
    }
}


How-to
The root cause is not so clear. It may results from WebSphere's classloading problem.
Hence, I find an API to scans for plug-ins on the application class path, loads their service provider classes, and registers a service provider instance for each one found with the IIORegistry. This strange problem had been resolved as I add ImageIO.scanForPlugins();

Here is my updated source code (Line46):
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
package albert.practice.file;

import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.Enumeration;

import javax.imageio.ImageIO;

import lombok.extern.slf4j.Slf4j;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.ImageType;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.apache.pdfbox.tools.imageio.ImageIOUtil;

@Slf4j
public class PdfFileToTif {

    private final float dpi = 300f;

    public static void main(String[] args) {
        File pdfFile = new File("D:\\dropbox\\Getting Started.pdf");
        String destination = "D:\\dropbox\\";

        PdfFileToTif test = new PdfFileToTif();
        test.checkImageIoJarFile();
        test.convertPdfToTif(pdfFile, destination);
    }

    public void convertPdfToTif(File pdfFile, String destination) {
        if (!isFileExisted(pdfFile)) {
            throw new RuntimeException("File not found ! (" + pdfFile.getAbsolutePath() + ")");
        }

        String pdfFileName = pdfFile.getName();

        try {
            // load PDF document
            PDDocument document = PDDocument.load(pdfFile);

            // Scans for plug-ins on the application class path, loads their service provider
            // classes, and registers a service provider instance for each one found with the
            // IIORegistry.
            ImageIO.scanForPlugins();

            // create PDF renderer
            PDFRenderer renderer = new PDFRenderer(document);

            // go through each page of PDF, and generate TIF for each PDF page.
            for (int i = 0; i < document.getNumberOfPages(); i++) {
                // Returns the given page as an RGB image with 300 DPI.
                BufferedImage image = renderer.renderImageWithDPI(i, dpi, ImageType.BINARY);

                // Assign the file name of TIF
                String fileName = pdfFileName + "_" + String.format("%02d", i + 1);
                log.debug("Generating  " + fileName + ".tif to " + destination);

                // Writes a buffered image to a file using the given image format.
                ImageIOUtil.writeImage(image, destination + fileName + ".tif", Math.round(dpi));
                image.flush();
            }
            log.debug("PDF to TIF conversion well done!");
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    /**
     * 判斷檔案是否存在
     * 
     * @param file
     * @return true - 檔案存在; false - 檔案不存在
     */
    private Boolean isFileExisted(File file) {
        Boolean isExisted = Boolean.FALSE;
        isExisted = (file.exists() && (!file.isDirectory()));
        return isExisted;
    }

    private void checkImageIoJarFile() {
        try {
            Enumeration<URL> urls = Thread.currentThread().getContextClassLoader()
                    .getResources("META-INF/services/javax.imageio.spi.ImageWriterSpi");
            while (urls.hasMoreElements()) {
                log.info("[convertToTiff] urls = " + urls.nextElement().toString());
            }

        } catch (IOException e1) {
            e1.printStackTrace();
            throw new RuntimeException(e1);
        }
    }
}




Reference 
[1] http://stackoverflow.com/questions/17178591/how-to-add-tiff-imagereader-to-those-registered-in-grails

2016/07/05

[PDFBox] No ImageWriter found for 'tif' format

Problem
I try to convert PDF to TIF file via PDFBox, but as I get error message in the conversion process:
1
2
3
4
四月 28, 2016 3:06:38 下午 org.apache.pdfbox.tools.imageio.ImageIOUtil writeImage
SEVERE: No ImageWriter found for 'tif' format
四月 28, 2016 3:06:38 下午 org.apache.pdfbox.tools.imageio.ImageIOUtil writeImage
SEVERE: Supported formats: BMP bmp jpg JPG wbmp jpeg png JPEG PNG WBMP GIF gif 

My source code looks like:
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
package albert.practice.file;

import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;

import lombok.extern.slf4j.Slf4j;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.ImageType;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.apache.pdfbox.tools.imageio.ImageIOUtil;

@Slf4j
public class PdfFileToTif {

    private final float dpi = 300f;

    public static void main(String[] args) {
        File pdfFile = new File("D:\\dropbox\\Getting Started.pdf");
        String destination = "D:\\dropbox\\";

        PdfFileToTif test = new PdfFileToTif();
        test.convertPdfToTif(pdfFile, destination);
    }

    public void convertPdfToTif(File pdfFile, String destination) {
        if (!isFileExisted(pdfFile)) {
            throw new RuntimeException("File not found ! (" + pdfFile.getAbsolutePath() + ")");
        }

        String pdfFileName = pdfFile.getName();

        try {
            // load PDF document
            PDDocument document = PDDocument.load(pdfFile);

            // create PDF renderer
            PDFRenderer renderer = new PDFRenderer(document);

            // go through each page of PDF, and generate TIF for each PDF page.
            for (int i = 0; i < document.getNumberOfPages(); i++) {
                // Returns the given page as an RGB image with 300 DPI.
                BufferedImage image = renderer.renderImageWithDPI(i, dpi, ImageType.BINARY);

                // Assign the file name of TIF
                String fileName = pdfFileName + "_" + String.format("%02d", i + 1);
                log.debug("Generating  " + fileName + ".tif to " + destination);

                // Writes a buffered image to a file using the given image format.
                ImageIOUtil.writeImage(image, destination + fileName + ".tif", Math.round(dpi));
                image.flush();
            }
            log.debug("PDF to TIF conversion well done!");
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    /**
     * 判斷檔案是否存在
     * 
     * @param file
     * @return true - 檔案存在; false - 檔案不存在
     */
    private Boolean isFileExisted(File file) {
        Boolean isExisted = Boolean.FALSE;
        isExisted = (file.exists() && (!file.isDirectory()));
        return isExisted;
    }

}

pom.xml snippet looks like:
1
2
3
4
5
6
<!-- for generating tif file -->
  <dependency>
      <groupId>org.apache.pdfbox</groupId>
      <artifactId>pdfbox-tools</artifactId>
      <version>2.0.0</version>
  </dependency>


This error message prints from ImageIOUtil:
https://github.com/apache/pdfbox/blob/04292e421e9531616aa2856b908fbdb05b381af7/tools/src/main/java/org/apache/pdfbox/tools/imageio/ImageIOUtil.java#L198

According to the JavaDoc in writeImage method...
Writes a buffered image to a file using the given image format. Compression is fixed for PNG, GIF, BMP and WBMP, dependent of the quality parameter for JPG, and dependent of bit count for TIFF (a bitonal image will be compressed with CCITT G4, a color image with LZW). Creating a TIFF image is only supported if the jai_imageio library is in the class path.
https://github.com/apache/pdfbox/blob/04292e421e9531616aa2856b908fbdb05b381af7/tools/src/main/java/org/apache/pdfbox/tools/imageio/ImageIOUtil.java#L150-L155


How-to
This error message results from jai_imageio library is not in my class path 
Therefore, add jai-imageio-core into dependency will resolve this problem
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
<!-- for generating tif file -->
  <dependency>
      <groupId>org.apache.pdfbox</groupId>
      <artifactId>pdfbox-tools</artifactId>
      <version>2.0.0</version>
  </dependency>
  
  <dependency>
      <groupId>com.github.jai-imageio</groupId>
      <artifactId>jai-imageio-core</artifactId>
      <version>1.3.1</version>
  </dependency>



2016/07/04

[PDFBox] Building on-disk font cache, this may take a while

Problem
I am using PDFBox to convert PDF file to TIF files. 
It works in Windows platform, but fail to execute in AIX platformt. 

We can see the warning message in System.out.log:
1
2
[WARN][WebContainer : 11][Line:444][org.apache.pdfbox.pdmodel.font.FileSystemFontProvider.loadDiskCache]New fonts found, font cache will be re-built
[WARN][WebContainer : 11][Line:224][org.apache.pdfbox.pdmodel.font.FileSystemFontProvider.<init>]Building on-disk font cache, this may take a while

This warning message is printed from here:
https://github.com/apache/pdfbox/blob/04292e421e9531616aa2856b908fbdb05b381af7/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FileSystemFontProvider.java#L224-L226

But it is difficult to judge the root causes why it spend lots of time to load font files
https://github.com/apache/pdfbox/blob/04292e421e9531616aa2856b908fbdb05b381af7/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FileSystemFontProvider.java#L237-L277


Solution
In FontMapperImpl, we can see it add lots of fonts substitutes. It may be the reason:
https://github.com/apache/pdfbox/blob/04292e421e9531616aa2856b908fbdb05b381af7/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FontMapperImpl.java#L55-L127


Hence, we write a PdfBoxInitiazlier class to set empty collection to FontMapperImpl:
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
package org.apache.pdfbox.pdmodel.font;

import java.util.Collections;
import java.util.List;

import javax.annotation.PostConstruct;

import org.springframework.stereotype.Component;

@Component
public class PdfBoxInitiazlier {
    @PostConstruct
    public void setUpPdfBox() {
        FontMapperImpl mapper = new FontMapperImpl();
        FontProvider provider = new EmptyFontProvider();
        mapper.setProvider(provider);
        org.apache.pdfbox.pdmodel.font.FontMappers.set(mapper);
    }

    private static class EmptyFontProvider extends FontProvider {
        public String toDebugString() {
            return "EmptyFontProvider"; 
        }
        public List<? extends FontInfo> getFontInfo() {
            return Collections.emptyList();
        }
    }
}


Owing to our project use Spring boot, then PdfBoxInitiazlier component will be scanned and overwrited it.
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import lombok.extern.slf4j.Slf4j;

import org.apache.pdfbox.pdmodel.font.PdfBoxInitiazlier;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.ComponentScan;
import org.springframework.context.annotation.Import;

import com.cht.compost.context.web.CompostServletInitializer;
import com.yuantalife.ecp.manage.common.EcpManageConfiguration;
import com.yuantalife.ecp.member.EcpManageMemberConfiguration;

@Slf4j
@SpringBootApplication
@ComponentScan({ "com.xxx.ecp" })
@Import({ EcpManageConfiguration.class, EcpManageMemberConfiguration.class })
public class Application extends CompostServletInitializer {
    public static void main(String[] args) {
        SpringApplication.run(Application.class, args);
    }

    @Bean
    public PdfBoxInitiazlier pdfBoxInitializer() {
        log.info("enter pdfBoxInitializer");
        return new PdfBoxInitiazlier();
    }
}

It is a workaround solution to fix "Building on-disk font cache, this may take a while" warning problem. 
Although it is just a warning message, it result in my function cannot be executed successfully.




2016/07/03

[PDFBox] Warning: You did not close a PDF Document

Problem
I am using PDFBox to covert a PDF file to TIF file. 


But getting warning message as I do PDF to TIF conversion:
1
[WARN][Finalizer][Line:481][org.apache.pdfbox.cos.COSDocument.finalize]Warning: You did not close a PDF Document


Code snippet looks like:
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
 // 產TIF
 private List<String> generateTifFile(TifConversionParam params, String filePath)
          throws IOException {

      List<String> rs = null;
      PDDocument document = null;
      try {
          document = PDDocument.load(new File(params.getFile()));
          String policyNumber = params.getPolicyNumber();
          rs = writeImage(document, filePath + policyNumber, ImageType.BINARY, DPI, policyNumber);
      } catch (IOException e) {
          throw new RuntimeException(e);
      } 

      return rs;
  }

  private List<String> writeImage(PDDocument document, String outputPrefix, ImageType imageType,
          float dpi, String fileName) throws IOException {
      List<String> tifNames = new ArrayList<String>();
      PDFRenderer renderer = new PDFRenderer(document);

      for (int i = 0; i < document.getNumberOfPages(); i++) {
          BufferedImage image = renderer.renderImageWithDPI(i, dpi, imageType);

          String outputPostfix = "_" + String.format("%02d", i + 1); // 01開始
          String outputFileName = outputPrefix + outputPostfix;// 檔名: 保單號碼_0X

          tifNames.add(fileName + outputPostfix); // 回傳產了那些檔名

          ImageIOUtil.writeImage(image, outputFileName + "." + "tif", Math.round(dpi));
          image.flush();
      }
      return tifNames;
  }


Solution
You need to call close() on the PDDocument inside the finally block, if you don't then the document will not be closed properly. Also, you must close all PDDocument objects that get created. 


Updated code snippet looks like:
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
// 產TIF
 private List<String> generateTifFile(TifConversionParam params, String filePath)
          throws IOException {

      List<String> rs = null;
      PDDocument document = null;
      try {
          document = PDDocument.load(new File(params.getFile()));
          String policyNumber = params.getPolicyNumber();
          rs = writeImage(document, filePath + policyNumber, ImageType.BINARY, DPI, policyNumber);
      } catch (IOException e) {
          throw new RuntimeException(e);
      } finally {
          if (document != null) {
              document.close();
          }
      }

      return rs;
  }

  private List<String> writeImage(PDDocument document, String outputPrefix, ImageType imageType,
          float dpi, String fileName) throws IOException {
      List<String> tifNames = new ArrayList<String>();
      PDFRenderer renderer = new PDFRenderer(document);

      for (int i = 0; i < document.getNumberOfPages(); i++) {
          BufferedImage image = renderer.renderImageWithDPI(i, dpi, imageType);

          String outputPostfix = "_" + String.format("%02d", i + 1); // 01開始
          String outputFileName = outputPrefix + outputPostfix;// 檔名: 保單號碼_0X

          tifNames.add(fileName + outputPostfix); // 回傳產了那些檔名

          ImageIOUtil.writeImage(image, outputFileName + "." + "tif", Math.round(dpi));
          image.flush();
      }
      return tifNames;
  }  


Reference
[1] https://pdfbox.apache.org/1.8/faq.html#notclosed