Java读取各种文件格式内容--688IT编程网

Java读取各种⽂件格式内容所需的jar包哦也不要太记得了，⼤家可以搜搜，直接上代码：

import java.io.BufferedInputStream;

import java.io.File;

import java.io.FileInputStream;

import java.io.IOException;

NumberFormat;

import org.apachemons.io.FileUtils;

import org.apache.pdfbox.pdmodel.PDDocument;

import org.PDFTextStripper;

import org.apache.poi.POIXMLDocument;

import org.apache.poi.hssf.usermodel.HSSFCell;

import org.apache.poi.hssf.usermodel.HSSFRow;

import org.apache.poi.hssf.usermodel.HSSFSheet;

import org.apache.poi.hssf.usermodel.HSSFWorkbook;

import org.apache.actor.WordExtractor;

import org.apache.poi.ss.usermodel.Cell;

import org.apache.poi.xssf.usermodel.XSSFCell;

import org.apache.poi.xssf.usermodel.XSSFRow;

import org.apache.poi.xssf.usermodel.XSSFSheet;

import org.apache.poi.xssf.usermodel.XSSFWorkbook;

import org.apache.actor.XWPFWordExtractor;

/**

*⽂件内容读取转换器

public class ReadFileConverter

{

public String getContents(String path) throws Exception

{

String contents = "";

int index = path.lastIndexOf(".");

String file_suffix = path.substring(index+1).toLowerCase();

if(file_suffix.equalsIgnoreCase("txt")||file_suffix.equalsIgnoreCase("log")){

contents = adTXT(path);

}

else if(file_suffix.equalsIgnoreCase("xls")){

contents = adXLS(path);

}

else if(file_suffix.equalsIgnoreCase("xlsx")){

contents = adXLSX(path);

}

else if(file_suffix.equalsIgnoreCase("doc")){

contents = adDOC(path);

}

else if(file_suffix.equalsIgnoreCase("docx")){

contents = adDOCX(path);

}

else if(file_suffix.equalsIgnoreCase("pdf")){

contents = adPDF(path);

}

return contents;

}

public String readXLS(String file) throws Exception

{

StringBuilder content = new StringBuilder();

HSSFWorkbook workbook = new HSSFWorkbook(new FileInputStream(file));

try{

for(int numSheets = 0; numSheets < NumberOfSheets(); numSheets++){

if (null != SheetAt(numSheets)){

HSSFSheet aSheet = SheetAt(numSheets);// 获得⼀个sheet

for(int rowNumOfSheet = 0; rowNumOfSheet <= LastRowNum(); rowNumOfSheet++){

if (null != Row(rowNumOfSheet)){

HSSFRow aRow = Row(rowNumOfSheet); // 获得⼀个⾏

for(short cellNumOfRow = 0; cellNumOfRow <= LastCellNum(); cellNumOfRow++){ if (null != Cell(cellNumOfRow)){

HSSFCell aCell = Cell(cellNumOfRow);// 获得列值

if (vertCell(aCell).length() > 0){

content.vertCell(aCell));

　}

content.append("\n");

}

catch(Exception e){

content.append("xls⽂件格式不对或损坏");

}

finally{

if(workbook!=null){

workbook.close();

}

String();

}

public String readXLSX(String file) throws Exception

{

StringBuilder content = new StringBuilder();

XSSFWorkbook workbook = new XSSFWorkbook(file);

try{

for(int numSheets = 0; numSheets < NumberOfSheets(); numSheets++){

if (null != SheetAt(numSheets)){

XSSFSheet aSheet = SheetAt(numSheets);// 获得⼀个sheet

for(int rowNumOfSheet = 0; rowNumOfSheet <= LastRowNum(); rowNumOfSheet++){

if (null != Row(rowNumOfSheet)){

XSSFRow aRow = Row(rowNumOfSheet); // 获得⼀个⾏

for(short cellNumOfRow = 0; cellNumOfRow <= LastCellNum(); cellNumOfRow++){ if (null != Cell(cellNumOfRow)){

XSSFCell aCell = Cell(cellNumOfRow);// 获得列值

if (vertCell(aCell).length() > 0){

content.vertCell(aCell));

}

content.append("\n");

}

}catch(Exception e){

content.append("xlsx⽂件格式不对或损坏");

}

finally{

if(workbook!=null){

workbook.close();

}

String();

}

public String readTXT(String file) throws Exception

{

String contents = "";

try{

String encoding = _charset(new File(file));

if (encoding.equalsIgnoreCase("GBK")) {

contents = adFileToString(new File(file), "gbk");

} else {

contents = adFileToString(new File(file), "utf8");

}

}catch(Exception e){

contents = "txt⽂件格式不对或损坏";

}

return contents;

}

public String readDOC(String file) throws Exception

{

String returnStr;

WordExtractor wordExtractor = new WordExtractor(new FileInputStream(new File(file)));

try{

returnStr = Text();

}catch(Exception e){

returnStr="doc⽂件格式不对或损坏";

}

finally{

if(wordExtractor != null){

wordExtractor.close();

}

return returnStr;

}

public String readDOCX(String file) throws Exception

{

String docx;

XWPFWordExtractor xwp= new XWPFWordExtractor(POIXMLDocument.openPackage(file));

try{

docx= Text();

}catch(Exception e){

docx="docx⽂件格式不对或损坏";

}

finally{

if(xwp !=null){

xwp.close();

}

return docx;

}

public String readPDF(String file) throws Exception

{

String result = null;

FileInputStream is = null;

PDDocument document = null;

try{

is = new FileInputStream(file);

document = PDDocument.load(is);

PDFTextStripper stripper = new PDFTextStripper();

result = Text(document);

}catch(Exception e){

result="pdf⽂件格式不对或损坏";

}

finally{

if (is != null){

is.close();

}

java valueof if (document != null){

document.close();

　}

return result;

}

private String get_charset(File file) throws IOException

{

String charset = "GBK";

byte[] first3Bytes = new byte[3];

BufferedInputStream bis = null;

try {

boolean checked = false;

bis = new BufferedInputStream(new FileInputStream(file));

bis.mark(0);

int read = ad(first3Bytes, 0, 3);

if (read == -1)

return charset;

if (first3Bytes[0] == (byte) 0xFF && first3Bytes[1] == (byte) 0xFE) {

charset = "UTF-16LE";

checked = true;

} else if (first3Bytes[0] == (byte) 0xFE&& first3Bytes[1] == (byte) 0xFF) {

charset = "UTF-16BE";

checked = true;

} else if (first3Bytes[0] == (byte) 0xEF&& first3Bytes[1] == (byte) 0xBB&& first3Bytes[2] == (byte) 0xBF) { charset = "UTF-8";

checked = true;

}

set();

if (!checked) {

// int len = 0;

int loc = 0;

while ((read = ad()) != -1) {

loc=loc+1;

if (read >= 0xF0)

break;

if (0x80 <= read && read <= 0xBF) // 单独出现BF以下的，也算是GBK

break;

if (0xC0 <= read && read <= 0xDF) {

read = ad();

if (0x80 <= read && read <= 0xBF) // 双字节 (0xC0 - 0xDF)

// (0x80

// - 0xBF)，也可能在GB编码内

continue;

else

break;

} else if (0xE0 <= read && read <= 0xEF) {// 也有可能出错，但是⼏率较⼩

read = ad();

if (0x80 <= read && read <= 0xBF) {

read = ad();

if (0x80 <= read && read <= 0xBF) {

charset = "UTF-8";

break;

} else

break;

} else

break;

}

} catch (Exception e) {

e.printStackTrace();

} finally {

if (bis != null) {

bis.close();

}

return charset;

}

@SuppressWarnings("deprecation")

private String convertCell(Cell cell)

{

NumberFormat formater = Instance();

formater.setGroupingUsed(false);

String cellValue = "";

if (cell == null) {

return cellValue;

}

switch (CellTypeEnum()) {

case NUMERIC:

cellValue = formater.NumericCellValue());

break;

case STRING:

cellValue = StringCellValue();

break;

case BLANK:

cellValue = StringCellValue();

break;

case BOOLEAN:

cellValue = Boolean.BooleanCellValue()).toString(); break;

case ERROR:

cellValue = String.ErrorCellValue());

break;

default:

cellValue = "";

}

im();

}

688IT编程网

Java读取各种文件格式内容

发表评论

推荐文章

应用程序的安全检测方法、装置、电子设备和存储介质

nginx map用法正则

VBA之正则表达式(1)--基础篇

Prometheus监控学习笔记之初识PromQL

关于PHP中的webshell

热门文章

一种任意人头与任意人体的3D结合方法

正则匹配c语言中8进制

fortran数据格式

python中文本转数字用的公式

gh 文本变数值

js判断输入是否为正整数、浮点数等数字的函数代码

qt浮点数正则表达式

QT正则表达式限制输入值

手机号码和电话号码的正则表达式

str转浮点-概述说明以及解释

英豪结尾的诗句

Java正则表达式:符合以特定字符串开头,以特定字符串结尾的所有结果

machinebuilder使用手册

ASP.NET网站建设基本常用代码

LCD显示实时时钟

经纬度正则表达式解析

前端科学计数法转数字

python正则表达式re之compile函数解析

pythonunittest之断言及示例

[lua]lua中匹配字符串小数

最新文章

nginx map用法正则

Prometheus监控学习笔记之初识PromQL

关于PHP中的webshell

python中re.findall函数实例用法

nginx url表达式

nginx 正则匹配参数

标签列表

688IT编程网

Java读取各种文件格式内容

发表评论

推荐文章

应用程序的安全检测方法、装置、电子设备和存储介质

nginx map用法 正则

VBA之正则表达式(1)--基础篇

Prometheus监控学习笔记之初识PromQL

关于PHP中的webshell

热门文章

一种任意人头与任意人体的3D结合方法

正则匹配c语言中8进制

fortran数据格式

python中文本转数字用的公式

gh 文本变数值

js判断输入是否为正整数、浮点数等数字的函数代码

qt浮点数正则表达式

QT正则表达式限制输入值

手机号码和电话号码的正则表达式

str转浮点-概述说明以及解释

英豪结尾的诗句

Java正则表达式:符合以特定字符串开头,以特定字符串结尾的所有结果

machinebuilder使用手册

ASP.NET网站建设基本常用代码

LCD显示实时时钟

经纬度正则表达式解析

前端科学计数法转数字

python正则表达式re之compile函数解析

pythonunittest之断言及示例

[lua]lua中匹配字符串小数

最新文章

nginx map用法 正则

Prometheus监控学习笔记之初识PromQL

关于PHP中的webshell

python中re.findall函数实例用法

nginx url表达式

nginx 正则匹配参数

标签列表

nginx map用法正则

nginx map用法正则