Java读取各种⽂件格式内容所需的jar包哦也不要太记得了,⼤家可以搜搜,直接上代码:
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
NumberFormat;
import org.apachemons.io.FileUtils;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.PDFTextStripper;
import org.apache.poi.POIXMLDocument;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.actor.WordExtractor;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.actor.XWPFWordExtractor;
/**
*⽂件内容读取转换器
*/
public class ReadFileConverter
{
public String getContents(String path) throws Exception
{
  String contents = "";
  int index = path.lastIndexOf(".");
  String file_suffix = path.substring(index+1).toLowerCase();
  if(file_suffix.equalsIgnoreCase("txt")||file_suffix.equalsIgnoreCase("log")){
    contents = adTXT(path);
  }
  else if(file_suffix.equalsIgnoreCase("xls")){
    contents = adXLS(path);
  }
  else if(file_suffix.equalsIgnoreCase("xlsx")){
    contents = adXLSX(path);
  }
  else if(file_suffix.equalsIgnoreCase("doc")){
    contents = adDOC(path);
  }
  else if(file_suffix.equalsIgnoreCase("docx")){
    contents = adDOCX(path);
  }
  else if(file_suffix.equalsIgnoreCase("pdf")){
    contents = adPDF(path);
  }
  return contents;
}
public String readXLS(String file) throws Exception
{
  StringBuilder content = new StringBuilder();
  HSSFWorkbook workbook = new HSSFWorkbook(new FileInputStream(file));
  try{
    for(int numSheets = 0; numSheets < NumberOfSheets(); numSheets++){
      if (null != SheetAt(numSheets)){
        HSSFSheet aSheet = SheetAt(numSheets);// 获得⼀个sheet
        for(int rowNumOfSheet = 0; rowNumOfSheet <= LastRowNum(); rowNumOfSheet++){
          if (null != Row(rowNumOfSheet)){
            HSSFRow aRow = Row(rowNumOfSheet); // 获得⼀个⾏
            for(short cellNumOfRow = 0; cellNumOfRow <= LastCellNum(); cellNumOfRow++){              if (null != Cell(cellNumOfRow)){
                HSSFCell aCell = Cell(cellNumOfRow);// 获得列值
                if (vertCell(aCell).length() > 0){
                  content.vertCell(aCell));
                 }
              }
              content.append("\n");
            }
          }
        }
      }
    }
  }
  catch(Exception e){
    content.append("xls⽂件格式不对或损坏");
  }
  finally{
    if(workbook!=null){
      workbook.close();
    }
  }
  String();
}
public String readXLSX(String file) throws Exception
{
  StringBuilder content = new StringBuilder();
  XSSFWorkbook workbook = new XSSFWorkbook(file);
  try{
    for(int numSheets = 0; numSheets < NumberOfSheets(); numSheets++){
      if (null != SheetAt(numSheets)){
        XSSFSheet aSheet = SheetAt(numSheets);// 获得⼀个sheet
        for(int rowNumOfSheet = 0; rowNumOfSheet <= LastRowNum(); rowNumOfSheet++){
          if (null != Row(rowNumOfSheet)){
            XSSFRow aRow = Row(rowNumOfSheet); // 获得⼀个⾏
            for(short cellNumOfRow = 0; cellNumOfRow <= LastCellNum(); cellNumOfRow++){              if (null != Cell(cellNumOfRow)){
                XSSFCell aCell = Cell(cellNumOfRow);// 获得列值
                if (vertCell(aCell).length() > 0){
                  content.vertCell(aCell));
                }
              }
              content.append("\n");
            }
          }
        }
      }
    }
  }catch(Exception e){
    content.append("xlsx⽂件格式不对或损坏");
  }
  finally{
    if(workbook!=null){
      workbook.close();
    }
  }
  String();
}
public String readTXT(String file) throws Exception
{
  String contents = "";
  try{
    String encoding = _charset(new File(file));
    if (encoding.equalsIgnoreCase("GBK")) {
      contents = adFileToString(new File(file), "gbk");
    } else {
      contents = adFileToString(new File(file), "utf8");
    }
  }catch(Exception e){
    contents = "txt⽂件格式不对或损坏";
  }
  return contents;
}
public String readDOC(String file) throws Exception
{
  String returnStr;
  WordExtractor wordExtractor = new WordExtractor(new FileInputStream(new File(file)));
  try{
    returnStr = Text();
  }catch(Exception e){
    returnStr="doc⽂件格式不对或损坏";
  }
  finally{
    if(wordExtractor != null){
      wordExtractor.close();
    }
  }
  return returnStr;
}
public String readDOCX(String file) throws Exception
{
  String docx;
  XWPFWordExtractor xwp= new XWPFWordExtractor(POIXMLDocument.openPackage(file));
  try{
    docx= Text();
  }catch(Exception e){
    docx="docx⽂件格式不对或损坏";
  }
  finally{
    if(xwp !=null){
      xwp.close();
    }
  }
  return docx;
}
public String readPDF(String file) throws Exception
{
  String result = null;
  FileInputStream is = null;
  PDDocument document = null;
  try{
    is = new FileInputStream(file);
    document = PDDocument.load(is);
    PDFTextStripper stripper = new PDFTextStripper();
    result = Text(document);
  }catch(Exception e){
    result="pdf⽂件格式不对或损坏";
  }
  finally{
    if (is != null){
      is.close();
    }
java valueof    if (document != null){
      document.close();
   }
  }
  return result;
}
private String get_charset(File file) throws IOException
{
  String charset = "GBK";
  byte[] first3Bytes = new byte[3];
  BufferedInputStream bis = null;
  try {
    boolean checked = false;
    bis = new BufferedInputStream(new FileInputStream(file));
    bis.mark(0);
    int read = ad(first3Bytes, 0, 3);
    if (read == -1)
      return charset;
    if (first3Bytes[0] == (byte) 0xFF && first3Bytes[1] == (byte) 0xFE) {
      charset = "UTF-16LE";
      checked = true;
    } else if (first3Bytes[0] == (byte) 0xFE&& first3Bytes[1] == (byte) 0xFF) {
      charset = "UTF-16BE";
      checked = true;
    } else if (first3Bytes[0] == (byte) 0xEF&& first3Bytes[1] == (byte) 0xBB&& first3Bytes[2] == (byte) 0xBF) {      charset = "UTF-8";
      checked = true;
    }
    set();
    if (!checked) {
      // int len = 0;
      int loc = 0;
      while ((read = ad()) != -1) {
        loc=loc+1;
        if (read >= 0xF0)
          break;
        if (0x80 <= read && read <= 0xBF) // 单独出现BF以下的,也算是GBK
          break;
        if (0xC0 <= read && read <= 0xDF) {
          read = ad();
          if (0x80 <= read && read <= 0xBF) // 双字节 (0xC0 - 0xDF)
          // (0x80
          // - 0xBF),也可能在GB编码内
            continue;
          else
            break;
        } else if (0xE0 <= read && read <= 0xEF) {// 也有可能出错,但是⼏率较⼩
          read = ad();
          if (0x80 <= read && read <= 0xBF) {
            read = ad();
            if (0x80 <= read && read <= 0xBF) {
              charset = "UTF-8";
              break;
            } else
              break;
          } else
            break;
        }
      }
    }
  } catch (Exception e) {
    e.printStackTrace();
  } finally {
    if (bis != null) {
      bis.close();
    }
  }
  return charset;
}
@SuppressWarnings("deprecation")
private String convertCell(Cell cell)
{
  NumberFormat formater = Instance();
  formater.setGroupingUsed(false);
  String cellValue = "";
  if (cell == null) {
    return cellValue;
  }
  switch (CellTypeEnum()) {
    case NUMERIC:
      cellValue = formater.NumericCellValue());
      break;
    case STRING:
      cellValue = StringCellValue();
      break;
    case BLANK:
      cellValue = StringCellValue();
      break;
    case BOOLEAN:
      cellValue = Boolean.BooleanCellValue()).toString();      break;
    case ERROR:
      cellValue = String.ErrorCellValue());
      break;
    default:
      cellValue = "";
    }
    im();
  }
}

版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。