Total Pageviews

Showing posts with label POI. Show all posts
Showing posts with label POI. Show all posts

2019/11/12

[Apache POI] How to read large excel file

Problem
I am trying to read a large excel file , around 700000 records, via Apache POI.
It takes  lots of time and resources to read this kind of large excel file.
Is it any effective approach to handle this kind of situation?

The original code will take around 60 seconds to read:
package tw.com.abc.dcb.service;

import com.monitorjbl.xlsx.StreamingReader;
import lombok.extern.slf4j.Slf4j;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.springframework.stereotype.Component;
import tw.com.abc.dcb.vo.DcbRecord;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

@Component
@Slf4j
public class ExcelReaderService {

    /**
     * Read Excel file
     *
     * @param xlsFile excel file path
     * @return excel data
     * @throws IOException fail to read excel file
     */
    public List<DcbRecord> readExcel(String xlsFile) throws IOException {
        List<DcbRecord> result = new ArrayList<>();
        int rowCount = 1;

        try (Workbook workbook = WorkbookFactory.create(new File(xlsFile))) {
            Sheet sheet = workbook.getSheetAt(0);

            Iterator<Row> rowIterator = sheet.iterator();
            while (rowIterator.hasNext()) {
                if (rowCount == 1) {
                    // skip header
                    rowIterator.next();
                    rowCount++;
                } else {
                    Row row = rowIterator.next();

                    DcbRecord record = DcbRecord.builder()
                            .company(row.getCell(0).getStringCellValue().trim())
                            .paymentDescription(row.getCell(1).getStringCellValue().trim())
                            .amount((int) row.getCell(2).getNumericCellValue())
                            .build();

                    result.add(record);
                    rowCount++;
                }
            }
        } catch (IOException e) {
            throw new IOException("fail to read excel file : " + e.getMessage(), e);
        }
        return result;
    }

}



How-To
The original approach to read excel file is not very memory efficient.
We can make good use of Excel Streaming Reader to import this disadvantage.
The new approach's read time will improve to 20 seconds.

Sample code:
package tw.com.abc.dcb.service;

import com.monitorjbl.xlsx.StreamingReader;
import lombok.extern.slf4j.Slf4j;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.springframework.stereotype.Component;
import tw.com.abc.dcb.vo.DcbRecord;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

@Component
@Slf4j
public class ExcelReaderService {

    /**
     * Read large excel file
     *
     * @param xlsxFile large excel file path
     * @return excel data
     * @throws IOException fail to read excel file
     */
    public List<DcbRecord> readLargeExcel(String xlsxFile) throws IOException {
        List<DcbRecord> result = new ArrayList<>();
        int rowCount = 1;
        try (InputStream is = new FileInputStream(new File(xlsxFile));
             Workbook workbook = StreamingReader.builder().rowCacheSize(100).bufferSize(4096).open(is)) {
            for (Sheet sheet : workbook) {
                for (Row row : sheet) {
                    if (rowCount == 1) {
                        // skip header
                        rowCount++;
                    } else {
                        DcbRecord record = DcbRecord.builder()
                                .company(row.getCell(0).getStringCellValue().trim())
                                .paymentDescription(row.getCell(1).getStringCellValue().trim())
                                .amount((int) row.getCell(2).getNumericCellValue())
                                .build();

                        result.add(record);
                    }
                }
            }
        } catch (IOException e) {
            throw new IOException("fail to read excel file : " + e.getMessage(), e);
        }
        return result;
    }

}




Reference

[1] https://github.com/monitorjbl/excel-streaming-reader

2019/10/14

[Apache POI] How to set formula from another sheet?

Problem
I had two sheets in my excel file.
If some of cells in first sheet's formula is from second sheet, how to fulfill this requirement?

How-To
Remember to set formula after your code completed second sheet.
If you set formula before second sheet, it will not have any effect.


2019/10/12

[Apache POI] 常用 CellStyle 設定

Sample code:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
     // 為 cell 上色
     CellStyle headerCellStyle = workbook.createCellStyle();
     headerCellStyle.setFillForegroundColor(IndexedColors.LIME.getIndex());
     headerCellStyle.setFillPattern(FillPatternType.SOLID_FOREGROUND);
     headerCellStyle.setDataFormat(workbook.createDataFormat().getFormat("#,##0"));
     headerCellStyle.setFont(font);
    
     CellStyle highlightCellStyle = workbook.createCellStyle();
     highlightCellStyle.setFillForegroundColor(IndexedColors.LEMON_CHIFFON.getIndex());
     highlightCellStyle.setFillPattern(FillPatternType.SOLID_FOREGROUND);
     highlightCellStyle.setDataFormat(workbook.createDataFormat().getFormat("#,##0"));
    
     CellStyle footerCellStyle = workbook.createCellStyle();
     footerCellStyle.setFillForegroundColor(IndexedColors.LIME.getIndex());
     footerCellStyle.setFillPattern(FillPatternType.SOLID_FOREGROUND);
     footerCellStyle.setDataFormat(workbook.createDataFormat().getFormat("#,##0"));
     footerCellStyle.setFont(font);
    
     // 為 cell value 加上 1000 seperator
     CellStyle nonHighlightCellStyle = workbook.createCellStyle();
     nonHighlightCellStyle.setDataFormat(workbook.createDataFormat().getFormat("#,##0"));
    
     // 為 cell value 加上百分比
     CellStyle percentageCellStyle = workbook.createCellStyle();
     percentageCellStyle.setDataFormat(workbook.createDataFormat().getFormat("0.000%"));

2019/10/11

[Apache POI] How to draw border in Apache POI?

Problem
I would like to draw border as bellows:


How-To
You can make good use of  PropertyTemplate to draw border with ease.

If you would like to draw line for multiple tables, you can do this way:

2019/09/14

[Apache POI] How to set formula in Excel via Apache POI?

Problem
How to set formula in Excel via Apache POI?

How-To
Sample code:
    // 1. create sheet from workbook
    Sheet sheet= workbook.createSheet("Goal");

    // 2. create row
    Row row = sheet.createRow(rowCount++);

    // 3. create cell and set formula
    row.createCell(13).setCellFormula("SUM(B8:M8)");


2019/09/13

[Apache POI] How to set merge cell in Apache POI ?

Problem
I would like to merge A4 and B4 


How-To
According to Apache POI quick guide, we can use addMergedRegion to fulfill this requirement:

Code snippet:



Reference
[1] http://poi.apache.org/components/spreadsheet/quick-guide.html

2019/01/12

[Java] [POI] org.apache.poi.poifs.filesystem.OfficeXmlFileException: The supplied data appears to be in the Office 2007+ XML.

Problem
Here has a code snippet to read excel file (.xls file)
    try (InputStream inputStream = new FileInputStream(new File(xlsFile));
        Workbook workbook = new HSSFWorkbook(inputStream);) {

        Sheet sheet = workbook.getSheetAt(0);
        // ignore implementation details

    } catch (IOException e) {
        throw new IOException("讀取 Excel file 發生錯誤", e);
    }

But when I provide xlsx file, it will throw exception:
org.apache.poi.poifs.filesystem.OfficeXmlFileException: The supplied data appears to be in the Office 2007+ XML. 
You are calling the part of POI that deals with OLE2 Office Documents. 
You need to call a different part of POI to process this data (eg XSSF instead of HSSF)

How-To
If I would like to read xlsx file, the code snippet should be modified as bellow:
    try (InputStream inputStream = new FileInputStream(new File(xlsxFile));
        Workbook workbook = new XSSFWorkbook(inputStream);) {

        Sheet sheet = workbook.getSheetAt(0);
        // ignore implementation details

    } catch (IOException e) {
        throw new IOException("讀取 Excel file 發生錯誤", e);
    }
  


If you don't care about xls or xlsx file, your can modify your code as follows:
    try (Workbook workbook = WorkbookFactory.create(new File(xlsOrXlsxFile));) {
        Sheet sheet = workbook.getSheetAt(0);
        // ignore implementation details
    } catch (IOException e) {
        throw new IOException("讀取 Excel file 發生錯誤", e);
    }


Reference
[1] https://stackoverflow.com/questions/31844308/java-poi-the-supplied-data-appears-to-be-in-the-office-2007-xml

2016/12/07

[Apache POI] java.lang.IllegalArgumentException: The maximum length of cell contents (text) is 32,767 characters

Problem
I am using Apache POI to write data into Microsoft Excel, but it throw this exception during the process:
java.lang.IllegalArgumentException: The maximum length of cell contents (text) is 32,767 characters


Here is the code snippet, this exception result from line 12:
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
    private void writeIssueDataForEachRow(Issue issue, Row row, CellStyle style,
                                          List<ColumnIndex> customFieldDefinitions) {
        Cell cell = row.createCell(0);
        cell.setCellValue(issue.getId()); // 編號
        cell.setCellStyle(style);

        cell = row.createCell(1);
        cell.setCellValue(issue.getSubject()); // 主旨
        cell.setCellStyle(style);

        cell = row.createCell(2);
        cell.setCellValue(issue.getDescription()); // 敘述
        cell.setCellStyle(style);

    }



Solution
According to Apache POI's source code, it will check the maximum length of each cell cannot be larger than 32,767 characters.
https://github.com/apache/poi/blob/31f2b5fdd10c7442576cbed5d37507cb2cdf11cc/src/java/org/apache/poi/hssf/usermodel/HSSFCell.java#L574-L576    


And it is also XLS's imitation, so we need to do substring or use XLSX format instead. We choose the former option. Therefore, my source code should be modified as following:
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
    private void writeIssueDataForEachRow(Issue issue, Row row, CellStyle style,
                                          List<ColumnIndex> customFieldDefinitions) {
        Cell cell = row.createCell(0);
        cell.setCellValue(issue.getId()); // 編號
        cell.setCellStyle(style);

        cell = row.createCell(1);
        cell.setCellValue(issue.getSubject()); // 主旨
        cell.setCellStyle(style);

        // substring 的原因是要避開 The maximum length of cell contents (text) is 32,767 characters
        cell = row.createCell(2);
        cell.setCellValue(StringUtils.substring(issue.getDescription(), 0, 32767)); // 敘述
        cell.setCellStyle(style);

    }


Reference
[1] https://support.office.com/en-us/article/Excel-specifications-and-limits-16c69c74-3d6a-4aaf-ba35-e6eb276e8eaa?CorrelationId=c8dcbafe-51ff-447e-bd62-3c1ce0e1d05e&ui=en-US&rs=en-US&ad=US&ocmsassetID=HP010073849

2016/09/08

[Apache POI] How to freeze the first row

Problem
I am using Apache POI to write data into Microsoft Excel. 
If I would like to keep the first row on the top of this document when I scroll down, how to do it?


How-to
You can utilize createFreezePane API which provide by Apache POI to fulfill this requirement. Here has code snippet:
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
 @Override
 public File exportIssuesToXls(List < Issue > issues) throws IOException, RedmineException {
  Workbook workbook = new HSSFWorkbook();
  Sheet sheet = workbook.createSheet("issue list");
  CellStyle style = createCellStyle(workbook);

  //....

  //freeze the first row
  sheet.createFreezePane(0, 1);

  FileOutputStream outputStream = null;
  File tmpFile = null;
  try {
   tmpFile = File.createTempFile("issue", ".xls");
   log.info("tmpFile = " + tmpFile.getCanonicalPath());
   outputStream = new FileOutputStream(tmpFile);
   workbook.write(outputStream);

  } catch (FileNotFoundException e) {
   e.printStackTrace();
  } finally {
   if (outputStream != null) {
    outputStream.close();
   }
   if (workbook != null) {
    workbook.close();
   }
  }
  return tmpFile;
 }



Reference
[1] https://stackoverflow.com/questions/17932575/apache-poi-locking-header-rows

2016/09/07

[Apache POI] How to add new line in cell data

Problem
I am using Apache POI to write data into Microsoft Excel.
I would like to break data into new line in some cell, how to do it?


How-to
You can add \n for the new line. For example,
1
Issue issue1 = new Issue(1, "查不到資料", "新建立", "正常", "蜘蛛人(2016-05-26 17:05:00):\n這個提議不錯,來做吧!\n\n浩克(2016-05-26 17:05:00):\n測試無誤\n\n");




Reference
[1] https://stackoverflow.com/questions/14646349/how-to-add-new-line-character-in-the-cell-comment-using-apache-poi

2016/09/04

[POI] Write /Read Excel Files in Java using Apache POI

Here has a simple example to demonstrate how to write / read excel file via Apache POI.

Add Apache POI dependency:
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-ooxml</artifactId>
    <version>3.14</version>
</dependency>

Create a POJO class:
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
package albert.practice.poi;

import java.io.Serializable;

import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.ToString;

@Data
@ToString
@AllArgsConstructor
@NoArgsConstructor
public class Issue implements Serializable {
    /**
    * 
    */
    private static final long serialVersionUID = 1L;

    private Integer id;
    private String subject;
    private String status;
    private String priority;
}

Sample code is as bellows:
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
package albert.practice.poi;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;

import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellStyle;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;

import lombok.extern.slf4j.Slf4j;

//http://www.codejava.net/coding/how-to-write-excel-files-in-java-using-apache-poi
//http://viralpatel.net/blogs/java-read-write-excel-file-apache-poi/
@Slf4j
public class PoiExample {

    private static String excelFile = "D:" + File.separator + "issue.xls";

    public static void main(String[] args) throws IOException {
        PoiExample test = new PoiExample();
        test.writeExcel();

        test.readExcel(excelFile);
    }

    public List<Issue> readExcel(String excelFile) throws IOException {

        List<Issue> issues = new ArrayList<Issue>();

        InputStream inputStream = null;
        Workbook workbook = null;
        try {
            // 1. Create a Workbook.
            inputStream = new FileInputStream(new File(excelFile));
            workbook = new HSSFWorkbook(inputStream);

            // 2. Get first sheet
            Sheet sheet = workbook.getSheetAt(0);

            // 3. Iterate through each rows from first sheet
            Iterator<Row> rowIterator = sheet.iterator();
            int rowCount = 1;
            while (rowIterator.hasNext()) {
                // (1) ignore header row
                if (rowCount == 1) {
                rowIterator.next();
                rowCount++;
                }
                // (2) start to read each row from second row
                else {
                    Row row = rowIterator.next();
                    Integer id = Double.valueOf(row.getCell(0).getNumericCellValue()).intValue();
                    String subject = row.getCell(1).getStringCellValue();
                    String status = row.getCell(2).getStringCellValue();
                    String priority = row.getCell(3).getStringCellValue();

                    Issue issue = new Issue();
                    issue.setId(id);
                    issue.setSubject(subject);
                    issue.setStatus(status);
                    issue.setPriority(priority);

                    issues.add(issue);
                 }
             }
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } finally {
            if (inputStream != null) {
                inputStream.close();
            }
            if (workbook != null) {
                workbook.close();
            }
        }

        for (Issue issue : issues) {
            log.debug("issue = " + issue.toString());
        }

        return issues;
    }

    public void writeExcel() throws IOException {
        // 0. prepare dummy data
        List<Issue> issues = createDummyIssues();

        // 1. Create a Workbook.
        Workbook workbook = new HSSFWorkbook();

        // 2. Create a Sheet.
        Sheet sheet = workbook.createSheet("issue list");

        // 3. create cell style
        CellStyle style = createCellStyle(workbook);

        // 4. Repeat the following steps until all data is processed:
        // (1) Create a Row.
        // (2) Create Cells in a Row. Apply formatting using CellStyle.
        int rowCount = 0;
        Row headerRow = sheet.createRow(rowCount);
        writeHeader(headerRow, style);

        for (Issue issue : issues) {
            Row row = sheet.createRow(++rowCount);
            writeDataForEachRow(issue, row, style);
        }

        // 5. auto resize column width
        for (int i = 0; i < 4; i++) {
            sheet.autoSizeColumn(i);
        }

        // 6. Write to an OutputStream.
        // 7. Close the output stream.
        FileOutputStream outputStream = null;
        try {
            outputStream = new FileOutputStream(excelFile);
            workbook.write(outputStream);

            log.debug("write issue data to excel file successfully");
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } finally {
            if (outputStream != null) {
                outputStream.close();
            }
            if (workbook != null) {
                workbook.close();
            }
        }
    }

    private void writeHeader(Row headerRow, CellStyle style) {
        Cell cell = headerRow.createCell(0);
        cell.setCellValue("編號");
        cell.setCellStyle(style);

        cell = headerRow.createCell(1);
        cell.setCellValue("主旨");
        cell.setCellStyle(style);

        cell = headerRow.createCell(2);
        cell.setCellValue("狀態");
        cell.setCellStyle(style);

        cell = headerRow.createCell(3);
        cell.setCellValue("優先");
        cell.setCellStyle(style);
    }

    private void writeDataForEachRow(Issue issue, Row row, CellStyle style) {
        Cell cell = row.createCell(0);
        cell.setCellValue(issue.getId());
        cell.setCellStyle(style);

        cell = row.createCell(1);
        cell.setCellValue(issue.getSubject());
        cell.setCellStyle(style);

        cell = row.createCell(2);
        cell.setCellValue(issue.getStatus());
        cell.setCellStyle(style);

        cell = row.createCell(3);
        cell.setCellValue(issue.getPriority());
        cell.setCellStyle(style);
    }

    private CellStyle createCellStyle(Workbook workbook) {
        CellStyle cellStyle = workbook.createCellStyle();
        cellStyle.setBorderBottom((short) 1);
        cellStyle.setBorderTop((short) 1);
        cellStyle.setBorderLeft((short) 1);
        cellStyle.setBorderRight((short) 1);
        cellStyle.setWrapText(true);

        return cellStyle;
    }

    private List<Issue> createDummyIssues() {
        Issue issue1 = new Issue(1, "查不到資料", "新建立", "正常");
        Issue issue2 = new Issue(2, "新增時發生錯誤", "新建立", "高");
        Issue issue3 = new Issue(3, "刪除失敗", "處理中", "高");

        return Arrays.asList(issue1, issue2, issue3);
    }

}


Excel file looks like: https://github.com/junyuo/AlbertGitProject/blob/master/src/albert/practice/poi/issue.xls


Reference
[1] http://www.codejava.net/coding/how-to-write-excel-files-in-java-using-apache-poi
[2] http://viralpatel.net/blogs/java-read-write-excel-file-apache-poi/