如何在不将整个文件读入RAM的情况下逐行读取文件

问题描述 投票:-1回答:4

我正在尝试找到最有效的方法来读取大文件,操作数据,并在Java / Groovy中对输出执行一些外部函数。从我读过的内容可以通过使用BufferedReader,Scanner,FileIterator或使用Stream来实现这一点,但在每个测试用例中,我的堆溢出了。我不确定我是否正在对我的步骤进行不正确的排序,或者这是否是正常行为。任何帮助确定我是否对我的代码做错了或忽略了其他一些方法将不胜感激。源文件只是一个名为test.csv的1GB csv文件。

package test;

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.LineIterator;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.ResponseBody;
import org.springframework.web.bind.annotation.RestController;
import sftp.SftpConnector;
import sftp.SftpHandler;
import sftp.fileInfo;
import java.io.*;
import java.nio.file.Paths;
import java.text.SimpleDateFormat;
import java.util.Collection;
import java.util.Date;
import java.util.Properties;
import java.util.Scanner;
import java.util.logging.Logger;
import java.nio.file.Files;
import java.util.stream.Stream;

@RestController
public class TestController {


private SftpConnector connector;

@Autowired
public TestController(){
    try{
        this.connector = new SftpConnector(this.getProperties());
    }catch(Exception ex){
        System.out.println(ex.getMessage());
    }
}

public Properties getProperties(){
    Properties prop = new Properties();
    try{
        File propfile = new File("C:\\tmp\\connection2.properties");
        prop.load(new FileInputStream(propfile));
    }catch(FileNotFoundException fnf){
        System.out.println("Could not find the connection.properties prop file.");
    }catch(IOException IO){
        System.out.println("Could not open the connection.properties file.");
    }
    return prop;
}

@GetMapping(value="/test")
@ResponseBody
public String testConnection(){
    String response = "";
    try{
        this.connector.openSFTPConnection();
        response = "Connection has been opened";
    }catch(Exception ex){
        response = ex.getMessage();
    }finally{
        if( this.connector.getSession().isConnected()){
            try{
                this.connector.closeSFTPConnection();
            }catch(Exception ex){
                response = response + ex.getMessage();
            }
        }
    }
    return response;
}

@GetMapping(value="/testLogger")
@ResponseBody
public String testLogger(){
    String response = "";
    Logger LOG = Logger.getLogger(Logger.getGlobal().getName());
    try{
        SftpHandler handler = new SftpHandler(this.getProperties());
        LOG.addHandler(handler);
    }catch(Exception ex){
        response = ex.getMessage();
    }
    try{
        LOG.info("Opening the SFTP Channel");
        this.connector.openSFTPConnection();
        LOG.info("Channel is opened.  Getting the list of XML's");
        Collection<fileInfo> files = this.connector.ls("/IB_Test", "*.xml");
        LOG.info("List retrieved.  Downloading each xml file");
        for(fileInfo file: files){
            LOG.info("Downloading " + file.getName());
            this.connector.get("/IB_Test/" + file.getName(), "C:\\tmp\\");
            LOG.info("Downloaded");
        }
        LOG.info("All XML's have been downloaded.");
        response = "All files have been downloaded";
    }catch(Exception ex){
        LOG.severe("Exception thrown:" + ex.getMessage());
        response = ex.getMessage();
    }finally{
        if(this.connector.getSession().isConnected()){
            try{
                LOG.info("Attempting to close the SFTP connection");
                this.connector.closeSFTPConnection();
            }catch(Exception ex){
                LOG.severe("Exception thrown: " + ex.getMessage());
                response = response + ex.getMessage();
            }
        }
    }
    return response;
}

@GetMapping(value="/testScanner")
@ResponseBody public String testScanner(){
    InputStream input = null;
    try{
        input = new FileInputStream(new File("C:\\tmp\\test.csv"));
        Scanner sc = new Scanner(input);
        while(sc.hasNextLine()){
            String line = sc.nextLine();
            System.out.println(line);
        }
    }catch(Exception fnf){
        System.out.println(fnf.getMessage());
    }
    return "File has completed upload line by line...";
}

@GetMapping(value="/testFilesNIO")
@ResponseBody public String testFilesNIO(){
    String output = "";
    try{
        Stream<String> lines = Files.lines(Paths.get("C:\\tmp\\test.csv"));
        lines.forEach(
                line ->
                        System.out.println(line)
        );
    }catch( Exception ex){
        output = ex.getMessage();
    }
    return output;
}

@GetMapping(value="/testFilesNIOwBufferedReader")
@ResponseBody public String testFilesNIOwBufferedReader(){
    String output = "";
    try{
        BufferedReader reader = Files.newBufferedReader(Paths.get("C:\\tmp\\test.csv"));
        reader.lines().skip(1).forEach(
                line -> System.out.println(line)
        );
    }catch( Exception ex){
        output = ex.getMessage();
    }
    return output;
}

@GetMapping(value="/trueBufferedReader")
@ResponseBody public String trueBufferedReader(){
    String output = "";
    try{
        BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(new File("C:\\tmp\\test.csv"))),10000000);
        String line ;
        while( (line = reader.readLine()) != null){
            System.out.println(line);
        }
    }catch(Exception ex){
        output = ex.getMessage();
    }
    return output;
}

@GetMapping(value="/lineIterator")
@ResponseBody public String lineIterator(){
    String output = "";
    try{
        LineIterator it = FileUtils.lineIterator(new File("C:\\tmp\\test.csv"));
        try{
            while( it.hasNext()){
                System.out.println(it.nextLine());
            }

        }finally{
            LineIterator.closeQuietly(it);
        }
    }catch(Exception ex){
        output = ex.getMessage();
    }
    return output;
}
}

预期的结果是看到CSV文件的每一行都打印到控制台。

java groovy file-io inputstream
4个回答
1
投票
Charset yourCharset = Charset.forName("UTF-8");
try (BufferedReader br = Files.newBufferedReader(your_file, yourCharset )) {
    String singleLine = null;
    while ((singleLine = br.readLine()) != null) {
        System.out.println(singleLine );
    }
} catch (IOException ex) {
    // handle exception
}

1
投票

或者在Groovy中:

yourFile.withReader('UTF-8') { r ->
    r.eachLine { line ->
        println line
    }
}

1
投票

在所有测试中,您逐行读取文件以将其传输到System.out

如果您需要将输入流传输到输出流,您可以使用此方法,它将是最快的:

yourFile.withInputStream{stream->
    System.out << stream
}

请注意,System.out很慢......


0
投票

这是我的CSV文件中的拼写错误,导致最后没有换行。因此它将整个文档准备好为1行。

© www.soinside.com 2019 - 2024. All rights reserved.