2017/10/05

How to check file's media type?

Problem
If I have a file upload function in my web application, this function need to check the file which upload by user is image file or not. How to do it?

How-To
You can make good use of Apache Tika to fulfill this requirement.

Add dependency in your pom.xml
        <dependency>
            <groupId>org.apache.tika</groupId>
            <artifactId>tika-core</artifactId>
            <version>1.14</version>
            <scope>compile</scope>
        </dependency>




Sample code is as follow:
package albert.practice.file;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import org.apache.commons.io.FileUtils;
import org.apache.tika.Tika;

import lombok.extern.slf4j.Slf4j;

@Slf4j
public class FileContentTypeUtils {

    private static List<String> imageMediaTypes = new ArrayList<>();

    private static void setImageMediaTypes() {
        // http://www.fileformat.info/info/mimetype/image/index.htm
        imageMediaTypes = Arrays.asList("image/cgm", "image/fits",
                "image/g3fax", "image/gif", "image/ief", "image/jp2",
                "image/jpeg", "image/jpm", "image/jpx", "image/naplps",
                "image/png", "image/prs.btif", "image/prs.pti", "image/t38",
                "image/tiff", "image/tiff-fx", "image/vnd.adobe.photoshop",
                "image/vnd.cns.inf2", "image/vnd.djvu", "image/vnd.dwg",
                "image/vnd.dxf", "image/vnd.fastbidsheet", "image/vnd.fpx",
                "image/vnd.fst", "image/vnd.fujixerox.edmics-mmr",
                "image/vnd.fujixerox.edmics-rlc",
                "image/vnd.globalgraphics.pgb", "image/vnd.microsoft.icon",
                "image/vnd.mix", "image/vnd.ms-modi", "image/vnd.net-fpx",
                "image/vnd.sealed.png", "image/vnd.sealedmedia.softseal.gif",
                "image/vnd.sealedmedia.softseal.jpg", "image/vnd.svf",
                "image/vnd.wap.wbmp", "image/vnd.xiff");
    }

    public static Boolean isImage(String sourceFile) throws IOException {
        setImageMediaTypes();
        Boolean isImage = Boolean.FALSE;
        File file = FileUtils.getFile(sourceFile);
        Tika tika = new Tika();
        try {
            String mediaType = tika.detect(file);
            isImage = imageMediaTypes.contains(mediaType);
        } catch (IOException e) {
            throw e;
        }
        return isImage;
    }

}



Test code is the following:
package albert.practice.file;

import java.io.IOException;

import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;

public class FileContentTypeUtilsTest extends FileContentTypeUtils {

    private String imageFile;
    private String pdfFile;

    @Before
    public void setup() {
        imageFile = "/Users/albert/Dropbox/picture/panda.png";
        pdfFile = "/Users/albert/Dropbox/test_測試.pdf";
    }

    @Test
    public void testImageFile() throws IOException {
        Assert.assertTrue(isImage(imageFile));
    }

    @Test
    public void testPdfFile() throws IOException {
        Assert.assertFalse(isImage(pdfFile));
    }

}



Test results are shown bellow:






No comments:

Post a Comment