Java读写⼤⽂本⽂件(2GB以上)
如下的程序,将⼀个⾏数为fileLines的⽂本⽂件平均分为splitNum个⼩⽂本⽂件,其中换⾏符'r'是linux上的,windows的java换⾏符是'\r\n':package kddcup2012.task2.FileSystem;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
public class FileSplit
{
public static void main(String[] args) throws IOException
{
long timer = System.currentTimeMillis();
int bufferSize = 20 * 1024 * 1024;//设读取⽂件的缓存为20MB
//建⽴缓冲⽂本输⼊流
File file = new File("/media/Data/毕业设计/kdd cup/数据/");
FileInputStream fileInputStream = new FileInputStream(file);
BufferedInputStream bufferedInputStream = new BufferedInputStream(fileInputStream);
InputStreamReader inputStreamReader = new InputStreamReader(bufferedInputStream);
BufferedReader input = new BufferedReader(inputStreamReader, bufferSize);
int splitNum = 112-1;//要分割的块数减⼀
int fileLines = 23669283;//输⼊⽂件的⾏数
long perSplitLines = fileLines / splitNum;//每个块的⾏数
for (int i = 0; i <= splitNum; ++i)
{
//分割
//每个块建⽴⼀个输出
FileWriter output = new FileWriter("/home/haoqiong/part" + i + ".txt");
String line = null;
//逐⾏读取,逐⾏输出
for (long lineCounter = 0; lineCounter < perSplitLines && (line = adLine()) != null; ++lineCounter)
{
output.append(line + "\r");
}
output.flush();
output.close();
output = null;
}
input.close();
timer = System.currentTimeMillis() - timer;
System.out.println("处理时间:" + timer);
}
}
以上程序处理⼤⽂本⽂件只需要30MB左右的内存空间(这和所设的读取缓冲⼤⼩有关),但是速度不是很快,在磁盘没有其他程序占⽤的情况下,将200MB⽂件分割为112份需要20秒(机器配置:Centrino2 P7450 CPU,2GB DDR3内存, 11.10系统,硬盘最⼤读写速度⼤约60MB/S)。
另外,对于⼏百兆到2GB⼤⼩的⽂件,使⽤内存映射⽂件的话,速度会块⼀些,但是内存映射由于映射的⽂件长度不能超过java中int类型的最⼤值,所以只能处理2GB以下的⽂件。
java 读取⼀个巨⼤的⽂本⽂件既能保证内存不溢出⼜能保证性能
package helloword.helloword;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
public class ReadBig {
public static String fff = "C:\\mq\\read\\l";
public static void main1(String[] args) throws Exception {
final int BUFFER_SIZE = 0x300000;// 缓冲区⼤⼩为3M
File f = new File(fff);
MappedByteBuffer inputBuffer = new RandomAccessFile(f, "r").getChannel().map(FileChannel.MapMode.READ_ONLY,                f.length() / 2, f.length() / 2);
byte[] dst = new byte[BUFFER_SIZE];// 每次读出3M的内容
long start = System.currentTimeMillis();
for (int offset = 0; offset < inputBuffer.capacity(); offset += BUFFER_SIZE) {
if (inputBuffer.capacity() - offset >= BUFFER_SIZE) {
for (int i = 0; i < BUFFER_SIZE; i++)
linux怎么读文件内容dst[i] = (offset + i);
} else {
for (int i = 0; i < inputBuffer.capacity() - offset; i++)
dst[i] = (offset + i);
}
int length = (inputBuffer.capacity() % BUFFER_SIZE == 0) ? BUFFER_SIZE
: inputBuffer.capacity() % BUFFER_SIZE;
System.out.println(new String(dst, 0, length));// new
// String(dst,0,length)这样可以取出缓存保存的字符串,可以对其进⾏操作
}
long end = System.currentTimeMillis();
System.out.println("读取⽂件⽂件⼀半内容花费:" + (end - start) + "毫秒");
}
public static void main2(String[] args) throws Exception {
int bufSize = 1024;
byte[] bs = new byte[bufSize];
ByteBuffer byteBuf = ByteBuffer.allocate(1024);
FileChannel channel = new RandomAccessFile(fff, "r").getChannel();
while (ad(byteBuf) != -1) {
int size = byteBuf.position();
<(bs); // 把⽂件当字符串处理,直接打印做为⼀个例⼦。
System.out.print(new String(bs, 0, size));
byteBuf.clear();
}
}
public static void main3(String[] args) throws Exception {
BufferedReader br = new BufferedReader(new FileReader(fff));
String line = null;
while ((line = br.readLine()) != null) {
System.out.println(line);
}
}
public static void main(String[] args) throws Exception {
int bufSize = 1024;
byte[] bs = new byte[bufSize];
ByteBuffer byteBuf = ByteBuffer.allocate(1024);
FileChannel channel = new RandomAccessFile("d:\\filename", "r").getChannel();
while (ad(byteBuf) != -1) {
int size = byteBuf.position();
<(bs);
/
/ 把⽂件当字符串处理,直接打印做为⼀个例⼦。
System.out.print(new String(bs, 0, size));
byteBuf.clear();
}
}
}
java 读取⼤容量⽂件,内存溢出?怎么按⼏⾏读取,读取多次。最佳答案
package helloword.helloword;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.RandomAccessFile;
import java.util.Scanner;
public class TestPrint {
public static void main(String[] args) throws IOException {
String path = "你要读的⽂件的路径";
RandomAccessFile br = new RandomAccessFile(path, "rw");// 这⾥rw看你了。要是之都就只写r
String str = null, app = null;
int i = 0;
while ((str = br.readLine()) != null) {
i++;
app = app + str;
if (i >= 100) {// 假设读取100⾏
i = 0;
// 这⾥你先对这100⾏操作,然后继续读
app = null;
}
}
br.close();
}
// 当逐⾏读写⼤于2G的⽂本⽂件时推荐使⽤以下代码
void largeFileIO(String inputFile, String outputFile) {
try {
BufferedInputStream bis = new BufferedInputStream(new FileInputStream(new File(inputFile)));
BufferedReader in = new BufferedReader(new InputStreamReader(bis, "utf-8"), 10 * 1024 * 1024);// 10M缓存
FileWriter fw = new FileWriter(outputFile);
while (in.ready()) {
String line = in.readLine();
fw.append(line + " ");
}
in.close();
fw.flush();
fw.close();
} catch (IOException ex) {
ex.printStackTrace();
}
}
}
jdk本⾝就⽀持超⼤⽂件的读写。
⽹上的⽂章基本分为两⼤类:
⼀类是使⽤BufferedReader类读写超⼤⽂件;
另⼀类是使⽤RandomAccessFile类读取,经过⽐较,最后使⽤了前⼀种⽅式进⾏超⼤⽂件的读取,下⾯是相关代码,其实很简单-------------------------------------------------------------------
File file = new File(filepath);
BufferedInputStream fis = new BufferedInputStream(new FileInputStream(file));
BufferedReader reader = new BufferedReader(new InputStreamReader(fis,"utf-8"),5*1024*1024);// ⽤5M的缓冲读取⽂本⽂件
String line = "";
while((line = adLine()) != null){
//TODO: write your business
}
---------------------------------------------------------------------
注意代码,在实例化BufferedReader时,增加⼀个分配缓存的参数即可

版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。