1.解析类

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
package com.master.industry.common;

import org.apache.poi.ss.usermodel.BuiltinFormats;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

import java.text.DecimalFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.LinkedList;
import java.util.List;

/**
* @Author M_YF
* @Date 2019/10/31
* @Description: 处理大数据量Excel
*/
public class BigExcelParse extends DefaultHandler {
enum xssfDataType {
BOOL, ERROR, FORMULA, INLINESTR, SSTINDEX, NUMBER,
}
/**
* Table with styles
*/
private StylesTable stylesTable;

//取SST 的索引对应的值
private SharedStringsTable sharedStringsTable;
/**
* 最大列数
*/
private final int minColumnCount;

//单元格内容是SST 的索引
private boolean vIsOpen;

private xssfDataType nextDataType;

private short formatIndex;
private String formatString;
private final DataFormatter formatter;

private int thisColumn = -1;
// The last column printed to the output stream
private int lastColumnNumber = -1;

// Gathers characters as they are seen.
private StringBuffer value;
private String[] record;//封装每一行的结果
private List<String[]> rows = new LinkedList<>();//封装结果
private boolean isCellNull = false;
private SimpleDateFormat sdf = null;
private static DecimalFormat df = new DecimalFormat("###########");


//构造器
public BigExcelParse(StylesTable styles,
SharedStringsTable strings, int cols) {
this.stylesTable = styles;
this.sharedStringsTable = strings;
this.minColumnCount = cols;
this.value = new StringBuffer();
this.nextDataType = xssfDataType.NUMBER;
this.formatter = new DataFormatter();
record = new String[this.minColumnCount];
rows.clear();// 每次读取都清空行集合
}

public void startElement(String uri, String localName, String name,
Attributes attributes) throws SAXException {

if ("inlineStr".equals(name) || "v".equals(name)) {
vIsOpen = true;
// Clear contents cache
value.setLength(0);
}
// c => cell
else if ("c".equals(name)) {
// Get the cell reference
String r = attributes.getValue("r");
int firstDigit = -1;
for (int c = 0; c < r.length(); ++c) {
if (Character.isDigit(r.charAt(c))) {
firstDigit = c;
break;
}
}
thisColumn = nameToColumn(r.substring(0, firstDigit));

// Set up defaults.
this.nextDataType = xssfDataType.NUMBER;
this.formatIndex = -1;
this.formatString = null;
String cellType = attributes.getValue("t");
String cellStyleStr = attributes.getValue("s");
if ("b".equals(cellType))
nextDataType = xssfDataType.BOOL;
else if ("e".equals(cellType))
nextDataType = xssfDataType.ERROR;
else if ("inlineStr".equals(cellType))
nextDataType = xssfDataType.INLINESTR;
else if ("s".equals(cellType))
nextDataType = xssfDataType.SSTINDEX;
else if ("str".equals(cellType))
nextDataType = xssfDataType.FORMULA;
else if (cellStyleStr != null) {
// It's a number, but almost certainly one
// with a special style or format
int styleIndex = Integer.parseInt(cellStyleStr);
XSSFCellStyle style = stylesTable.getStyleAt(styleIndex);
this.formatIndex = style.getDataFormat();
this.formatString = style.getDataFormatString();
if (this.formatString == null)
this.formatString = BuiltinFormats
.getBuiltinFormat(this.formatIndex);
}
}

}

public void endElement(String uri, String localName, String name)
throws SAXException {

String thisStr = null;

// v => contents of a cell
//单元格内容标签结束,characters方法会被调用处理内容
if ("v".equals(name)) {
// Process the value contents as required.
// Do now, as characters() may be called more than once
switch (nextDataType) {

case BOOL:
char first = value.charAt(0);
thisStr = first == '0' ? "FALSE" : "TRUE";
break;

case ERROR:
thisStr = "\"ERROR:" + value.toString() + '"';
break;

case FORMULA:
// A formula could result in a string value,
// so always add double-quote characters.
thisStr = value.toString();
break;

case INLINESTR:
// TODO: have seen an example of this, so it's untested.
XSSFRichTextString rtsi = new XSSFRichTextString(
value.toString());
thisStr =rtsi.toString();
break;

case SSTINDEX:
String sstIndex = value.toString();
try {
int idx = Integer.parseInt(sstIndex);
XSSFRichTextString rtss = new XSSFRichTextString(
sharedStringsTable.getEntryAt(idx));
thisStr = rtss.toString();
} catch (NumberFormatException ex) {
System.out.println("Failed to parse SST index '" + sstIndex
+ "': " + ex.toString());
}
break;

case NUMBER:
String n = value.toString();
// 判断是否是日期格式
if (formatIndex == 14 || formatIndex == 31 || formatIndex == 57 || formatIndex == 58
|| (176<=formatIndex && formatIndex<=178) || (182<=formatIndex && formatIndex<=196)
|| (210<=formatIndex && formatIndex<=213) || (208==formatIndex ) ) {// 日期
sdf = new SimpleDateFormat("yyyy-MM-dd");
Date date = org.apache.poi.ss.usermodel.DateUtil.getJavaDate(Double.parseDouble(n));
thisStr=sdf.format(date);
} else if (formatIndex == 20 || formatIndex == 32 || formatIndex==183 || (200<=formatIndex && formatIndex<=209)) {//时间
sdf = new SimpleDateFormat("HH:mm");
Date date = org.apache.poi.ss.usermodel.DateUtil.getJavaDate(Double.parseDouble(n));
thisStr = sdf.format(date);
}
else{
if(n.contains("E")){//科学计数法
String[] split = n.split("\\+");
String e = split[0].replaceAll("E|e", "");
thisStr=e.replace(".","");
}else {
thisStr = n;
}
}
break;
default:
thisStr = "(TODO: Unexpected type: " + nextDataType + ")";
break;
}
if (lastColumnNumber == -1) {
lastColumnNumber = 0;
}
//判断单元格的值是否为空
if (thisStr == null || "".equals(isCellNull)) {
isCellNull = true;// 设置单元格是否为空值
}
if(thisColumn<record.length)
record[thisColumn] = thisStr;
// Update column
if (thisColumn > -1)
lastColumnNumber = thisColumn;
//行结束,存储一行数据
} else if ("row".equals(name)) {
// Print out any missing commas if needed
if (minColumnCount > 0) {
// Columns are 0 based
if (lastColumnNumber == -1) {
lastColumnNumber = 0;
}
// 判断是否空行
if(record!=null &&record.length!=0){
rows.add(record.clone());
isCellNull = false;
for (int i = 0; i < record.length; i++) {
record[i] = null;
}
}
}
lastColumnNumber = -1;
}

}

public List<String[]> getRows() {
return rows;
}

public void setRows(List<String[]> rows) {
this.rows = rows;
}

public void characters(char[] ch, int start, int length)
throws SAXException {
if (vIsOpen)
value.append(ch, start, length);
}

private int nameToColumn(String name) {
int column = -1;
for (int i = 0; i < name.length(); ++i) {
int c = name.charAt(i);
column = (column + 1) * 26 + c - 'A';
}
return column;
}
}

2.调用类

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
package com.master.industry.common;

import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.model.StylesTable;
import org.xml.sax.InputSource;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;

import java.io.InputStream;
import java.io.PrintStream;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;

/**
* @Author M_YF
* @Date 2019/10/31
* @Description:
*/
public class BigExcelParseUtils {
private OPCPackage xlsxPackage;
private int minColumns;
private PrintStream output;
private String sheetName;
//构造器
public BigExcelParseUtils(OPCPackage pkg, PrintStream output,
String sheetName, int minColumns) {
this.xlsxPackage = pkg;
this.output = output;
this.minColumns = minColumns;
this.sheetName = sheetName;
}

/**
*
* @param path----文件路径
* @param lastIndexSheet---读取sheet的截止下标
* @param minColumns---excel的最大列数
* @return
* @throws Exception
*/
public static List<String[]> parseSheet(String path, Integer lastIndexSheet, int minColumns)throws Exception {
try {
List<String[]> result = new LinkedList<>();
//文件地址
OPCPackage pkg = OPCPackage.open(path, PackageAccess.READ);
XSSFReader r = new XSSFReader(pkg);
//解析的sheet名称
// InputStream rId1 = r.getSheet("rId1");//第一个sheet表
XSSFReader.SheetIterator sheetsData = (XSSFReader.SheetIterator) r.getSheetsData();
SharedStringsTable sst = r.getSharedStringsTable();
StylesTable styles = r.getStylesTable();
XMLReader parser = XMLReaderFactory.createXMLReader();
BigExcelParse handler = new BigExcelParse(styles, sst, minColumns);
parser.setContentHandler(handler);
//遍历---获取指定的sheet名称
HashMap<String, Object> map = new HashMap<String, Object>();
int i = 0;
while (sheetsData.hasNext()) {
InputStream in = sheetsData.next();
InputSource inputSource = new InputSource(in);
if (lastIndexSheet != null && lastIndexSheet.equals(i)) { //设置读取sheet的截止下标
in.close();
break;
}
parser.parse(inputSource);
List<String[]> rows = handler.getRows();//返回所有的封装结果
result.addAll(rows);
in.close();
i++;
}
return result;
} catch (Exception e) {
throw new RuntimeException("BigExcel读取异常:{}",e);
}
}
}





参考链接https://my.oschina.net/shea1992/blog/2244646