/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintStream;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.ss.usermodel.BuiltinFormats;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
/**
* 使用CVS模式解决XLSX文件,可以有效解决用户模式内存溢出的问题
* 该模式是POI官方推荐的读取大数据的模式,在用户模式下,数据量较大、Sheet较多、或者是有很多无用的空行的情况
* ,容易出现内存溢出,用户模式读取Excel的典型代码如下: FileInputStream file=new
* FileInputStream("c:\\test.xlsx"); Workbook wb=new XSSFWorkbook(file);
*
*
* @author 山人
*/
public class XLSXCovertCSVReader {
/**
* The type of the data value is indicated by an attribute on the cell. The
* value is usually in a "v" element within the cell.
*/
enum xssfDataType {
BOOL, ERROR, FORMULA, INLINESTR, SSTINDEX, NUMBER,
}
/**
* 使用xssf_sax_API处理Excel,请参考: http://poi.apache.org/spreadsheet/how-to.html#xssf_sax_api
* <p/>
* Also see Standard ECMA-376, 1st edition, part 4, pages 1928ff, at
* http://www.ecma-international.org/publications/standards/Ecma-376.htm
* <p/>
* A web-friendly version is http://openiso.org/Ecma/376/Part4
*/
class MyXSSFSheetHandler extends DefaultHandler {
/**
* Table with styles
*/
private StylesTable stylesTable;
/**
* Table with unique strings
*/
private ReadOnlySharedStringsTable sharedStringsTable;
/**
* Destination for data
*/
private final PrintStream output;
/**
* Number of columns to read starting with leftmost
*/
private final int minColumnCount;
// Set when V start element is seen
private boolean vIsOpen;
// Set when cell start element is seen;
// used when cell close element is seen.
private xssfDataType nextDataType;
// Used to format numeric cell values.
private short formatIndex;
private String formatString;
private final DataFormatter formatter;
private int thisColumn = -1;
// The last column printed to the output stream
private int lastColumnNumber = -1;
// Gathers characters as they are seen.
private StringBuffer value;
private String[] record;
private List<String[]> rows = new ArrayList<String[]>();
private boolean isCellNull = false;
/**
* Accepts objects needed while parsing.
*
* @param styles
* Table of styles
* @param strings
* Table of shared strings
* @param cols
* Minimum number of columns to show
* @param target
* Sink for output
*/
public MyXSSFSheetHandler(StylesTable styles,
ReadOnlySharedStringsTable strings, int cols, PrintStream target) {
this.stylesTable = styles;
this.sharedStringsTable = strings;
this.minColumnCount = cols;
this.output = target;
this.value = new StringBuffer();
this.nextDataType = xssfDataType.NUMBER;
this.formatter = new DataFormatter();
record = new String[this.minColumnCount];
rows.clear();// 每次读取都清空行集合
}
/*
* (non-Javadoc)
*
* @see
* org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String,
* java.lang.String, java.lang.String, org.xml.sax.Attributes)
*/
public void startElement(String uri, String localName, String name,
Attributes attributes) throws SAXException {
if ("inlineStr".equals(name) || "v".equals(name)) {
vIsOpen = true;
// Clear contents cache
value.setLength(0);
}
// c => cell
else if ("c".equals(name)) {
// Get the cell reference
String r = attributes.getValue("r");
int firstDigit = -1;
for (int c = 0; c < r.length(); ++c) {
if (Character.isDigit(r.charAt(c))) {
firstDigit = c;
break;
}
}
thisColumn = nameToColumn(r.substring(0, firstDigit));
// Set up defaults.
this.nextDataType = xssfDataType.NUMBER;
this.formatIndex = -1;
this.formatString = null;
String cellType = attributes.getValue("t");
String cellStyleStr = attributes.getValue("s");
if ("b".equals(cellType))
nextDataType = xssfDataType.BOOL;
else if ("e".equals(cellType))
nextDataType = xssfDataType.ERROR;
else if ("inlineStr".equals(cellType))
nextDataType = xssfDataType.INLINESTR;
else if ("s".equals(cellType))
nextDataType = xssfDataType.SSTINDEX;
else if ("str".equals(cellType))
nextDataType = xssfDataType.FORMULA;
else if (cellStyleStr != null) {
// It's a number, but almost certainly one
// with a special style or format
int styleIndex = Integer.parseInt(cellStyleStr);
XSSFCellStyle style = stylesTable.getStyleAt(styleIndex);
this.formatIndex = style.getDataFormat();
this.formatString = style.getDataFormatString();
if (this.formatString == null)
this.formatString = BuiltinFormats
.getBuiltinFormat(this.formatIndex);
}
}
}
/*
* (non-Javadoc)
*
* @see org.xml.sax.helpers.DefaultHandler#endElement(java.lang.String,
* java.lang.String, java.lang.String)
*/
public void endElement(String uri, String localName, String name)
throws SAXException {
String thisStr = null;
// v => contents of a cell
if ("v".equals(name)) {
// Process the value contents as required.
// Do now, as characters() may be called more than once
switch (nextDataType) {
case BOOL:
char first = value.charAt(0);
thisStr = first == '0' ? "FALSE" : "TRUE";
break;
case ERROR:
thisStr = "\"ERROR:" + value.toString() + '"';
break;
case FORMULA:
// A formula could result in a string value,
// so always add double-quote characters.
thisStr = '"' + value.toString() + '"';
break;
case INLINESTR:
// TODO: have seen an example of this, so it's untested.
XSSFRichTextString rtsi = new XSSFRichTextString(
value.toSt
excel导入大数据失败解决方法
需积分: 50 144 浏览量
2018-08-17
11:18:22
上传
评论 2
收藏 5KB RAR 举报
计算机码农
- 粉丝: 0
- 资源: 11
最新资源
- 基于Python的图像阴影检测与去除源码(高分期末大作业项目).zip
- 基于C++/Qt实现的井字棋游戏
- 基于 Python 编程语言的 Web 框架Django
- Python和Flask实现的基于体检数据的城市公共健康可视分析系统源码+使用说明.zip
- 基于python实现的华为智慧工地-安全帽检测
- buck-boost_2023-12-16_12-12-13.eprj
- 后端开发关于数据库和API开发的介绍
- 机器学习和数据挖掘课程设计-米其林餐厅数据挖掘管理系统源码+使用文档说明.zip
- html html html展示我与ai的对化
- 数据结构课程设计-全国交通出行咨询模拟系统C语言实现源码.zip
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈