Java解析excel⽂件⼤数据量时,报OOM异常问题。
在解析EXCEL,JAVA 通常使⽤poi包下的 XSSFWorkbook 对象,但是,遇到海量数据(⽐如⼗⼏万条或者 JVM堆设置了内存⽐较⼩时),就会抛出OOM异常,下⾯就放出解决⽅法。
直接上代码:
x.xxxmon.util;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import org.apache.ptions.InvalidFormatException;
import org.apache.ptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
import org.apache.del.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFComment;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
l.sax.InputSource;
l.sax.SAXException;
l.sax.XMLReader;
l.sax.helpers.XMLReaderFactory;
/**
* @author Jimmy Shan
* @date 2021-04-09
* @desc 解析⼤数据量excel⽂件,避免OOM发⽣
*/
@Component
public class ExcelParserBigData {
private static final Logger logger = Logger(ExcelParserBigData.class);
private ISheetContentHandler contentHandler = new DefaultSheetHandler(); //表格默认处理器
private List<String[]> datas = new ArrayList<String[]>(); //读取数据
/**
* @desc 转换表格,默认为转换第⼀个表格
*/
public ExcelParserBigData parse(InputStream stream)
throws InvalidFormatException, IOException, ParseException {
return parse(stream, 1);
}
/**
* @desc 解析⽅法
*/
public synchronized ExcelParserBigData parse(InputStream stream, int sheetId)
throws InvalidFormatException, IOException, ParseException {
// 每次转换前都清空数据
datas.clear();
// 打开表格⽂件输⼊流
OPCPackage pkg = OPCPackage.open(stream);
try {
// 创建表阅读器
XSSFReader reader;
try {
reader = new XSSFReader(pkg);
} catch (OpenXML4JException e) {
<("读取表格出错");
throw new ParseException(e.fillInStackTrace());
}
// 转换指定单元表
InputStream shellStream = Sheet("rId" + sheetId);
try {
InputSource sheetSource = new InputSource(shellStream);
StylesTable styles = StylesTable();
ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(pkg);
getContentHandler().init(datas);// 设置读取出的数据
/
/ 获取转换器
XMLReader parser = getSheetParser(styles, strings);
parser.parse(sheetSource);
} catch (SAXException e) {
<("读取表格出错");
throw new ParseException(e.fillInStackTrace());
} finally {
shellStream.close();
}
} finally {
pkg.close();
}
return this;
}
/**
* @desc 获取表格读取数据,获取数据前,需要先转换数据,此⽅法不会获取第⼀⾏数据,表格读取数据
*/
public List<String[]> getDatas() {
return getDatas(true);
}
/**
* @desc 获取表格读取数据,获取数据前,需要先转换数据
*/
public List<String[]> getDatas(boolean dropFirstRow) {
if (dropFirstRow && datas.size() > 0) {
}
return datas;
}
/**
* @desc 获取读取表格的转换器
*/
protected XMLReader getSheetParser(StylesTable styles, ReadOnlySharedStringsTable strings) throws SAXException { XMLReader parser = ateXMLReader();
parser.setContentHandler(new XSSFSheetXMLHandler(styles, strings, getContentHandler(), false));
return parser;
}
public ISheetContentHandler getContentHandler() {
return contentHandler;
}
public void setContentHandler(ISheetContentHandler contentHandler) {
}
/**
* @desc 表格转换错误
*/
public class ParseException extends Exception {
private static final long serialVersionUID = -2451526411018517607L;
public ParseException(Throwable t) {
super("表格转换错误", t);
}
}
public interface ISheetContentHandler extends SheetContentsHandler {
/**
* @desc 设置转换后的数据集,⽤于存放转换结果
*/
error parse newvoid init(List<String[]> datas);
}
/**
* @desc 默认表格解析handder
*/
class DefaultSheetHandler implements ISheetContentHandler {
/**
* @desc 读取数据
*/
private List<String[]> datas;
private int columsLength;
private String[] readRow;
private ArrayList<String> fristRow = new ArrayList<String>();
@Override
public void init(List<String[]> datas) {
this.datas = datas;
//lumsLength = columsLength;
}
@Override
public void startRow(int rowNum) {
if (rowNum != 0) {
readRow = new String[columsLength];
}
}
@Override
public void endRow(int rowNum) {
//将Excel第⼀⾏表头的列数当做数组的长度,要保证后续的⾏的列数不能超过这个长度,这是个约定。 if (rowNum == 0) {
columsLength = fristRow.size();
readRow = Array(new String[fristRow.size()]);
}else {
readRow = Array(new String[columsLength]);
}
datas.add(readRow.clone());
readRow = null;
fristRow.clear();
}
@Override
public void cell(String cellReference, String formattedValue, XSSFComment comment) {
int index = getCellIndex(cellReference);//转换A1,B1,C1等表格位置为真实索引位置
try {
fristRow.set(index, formattedValue);
} catch (IndexOutOfBoundsException e) {
int size = fristRow.size();
for (int i = index - size+1;i>0;i--){
fristRow.add(null);
}
fristRow.set(index,formattedValue);
}
}
@Override
public void headerFooter(String text, boolean isHeader, String tagName) {
}
/**
* @desc 转换表格引⽤为列编号
*/
public int getCellIndex(String cellReference) {
String ref = placeAll("\\d+", "");
int num = 0;
int result = 0;
for (int i = 0; i < ref.length(); i++) {
char ch = cellReference.charAt(ref.length() - i - 1);
num = (int) (ch - 'A' + 1);
num *= Math.pow(26, i);
result += num;
}
return result - 1;
}
}
}
调⽤⽅:
x.xxx.service.impl;
x.xxxmon.util.ExcelParserBigData;
x.xxx.service.DemoBaseService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.io.FileInputStream;
import java.util.List;
/**
* @author Jimmy Shan
* @date 2021-04-07
* @desc ⽰例服务实现
*/
@Service("demoBaseService")
public class DemoBaseServiceImpl implements DemoBaseService {
private static final Logger LOGGER = Logger(DemoBaseServiceImpl.class);
@Autowired
private ExcelParserBigData excelParserBigData;
/**
* @desc 解析⼤数据量excel⽂件
*/
@Override
public void parseBigXlsx(String upPathName) {
long start = System.currentTimeMillis();
FileInputStream inf = null;
try {
inf = new FileInputStream(upPathName);
ExcelParserBigData parseBigData = excelParserBigData.parse(inf);
List<String[]> dataList = Datas(); //这⾥的List泛型是String数组,解析后的数据是以数组形式存放的。 for (int i = 0; i < dataList.size(); i++) {
String[] str = (i);
LOGGER.info("第 {} ⾏", (i +1));
LOGGER.info("oppoId = {}, oppoName = {}", str[0], str[1]);
}
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
if(inf != null) {
inf.close();
}
LOGGER.info("解析总耗时:{} 毫秒", (System.currentTimeMillis() - start));
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
接⼝定义:
x.xxx.service;
/**
* @author Jimmy Shan
* @date 2021-04-07
* @desc ⽰例服务
*/
public interface DemoBaseService {
/**
* @desc 解析⼤数据量excel⽂件
*/
void parseBigXlsx(String upPathName);
}
OK,记录到此。
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。
发表评论