为什么在阻止Node.js中的文件I / O同步时异步性能更差?

问题描述 投票:1回答:1

[从阅读Stack Overflow上有关同步与异步的一些帖子看,异步似乎应该具有较小的开销,或者比用于阻止I / O操作的同步调用要快:

我研究过的一些地方:Is non-blocking I/O really faster than multi-threaded blocking I/O? How?What is the overhead of Javascript async functions

我编写了一个小型基准测试,制作了4个256MB至1GB的文件,以查看fs.readFile()的性能。

const {performance} = require('perf_hooks');
const fs = require('fs');
const {execSync} = require("child_process");

const sizes = [512, 1024, 256, 512]; //file sizes in MiB
function makeFiles() {
    for (let i = 0; i < sizes.length; i++) {
        execSync(`dd if=/dev/urandom of=file-${i}.txt bs=1M count=${sizes[i]}`, (error, stdout, stderr) => {
            console.log(`stdout: ${stdout}`);
        });
    }
}

function syncTest() {
    const startTime = performance.now();
    const results = [];

    for (let i = 0; i < sizes.length; i++) {
        results.push(fs.readFileSync(`file-${i}.txt`));
    }
    console.log(`Sync version took ${performance.now() - startTime}ms`);
}

async function asyncTest() {
    const startTime = performance.now();
    const results = [];

    for (let i = 0; i < sizes.length; i++) {
        results.push(fs.promises.readFile(`file-${i}.txt`));
    }
    await Promise.all(results);

    console.log(`Async version took ${performance.now() - startTime}ms`);
}

makeFiles();
syncTest();
asyncTest();

输出:

> makeFiles();

512+0 records in
512+0 records out
536870912 bytes (537 MB, 512 MiB) copied, 4.28077 s, 125 MB/s
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB, 1.0 GiB) copied, 8.45918 s, 127 MB/s
256+0 records in
256+0 records out
268435456 bytes (268 MB, 256 MiB) copied, 1.96678 s, 136 MB/s
512+0 records in
512+0 records out
536870912 bytes (537 MB, 512 MiB) copied, 4.32488 s, 124 MB/s
undefined
> syncTest();
Sync version took 1055.9131410121918ms
undefined
> asyncTest();
Promise { <pending> }
> Async version took 6991.523499011993ms

因此看来异步版本比同步版本慢约7倍。如何解释这种放缓?何时应该使用同步版本?

Repl.it链接:https://repl.it/repls/VioletredFatherlyDaemons

系统:Arch linux 5.5.4-arch1-1上的节点13.9.0

node.js asynchronous fs
1个回答
0
投票
FYI,除了上面我的所有评论之外,这是我可以获得异步版本的最快速度:

async function asyncTestStreamParallel(files) { const startTime = performance.now(); let results = []; for (let filename of files) { results.push(new Promise((resolve, reject) => { const stream = fs.createReadStream(filename, {highWaterMark: 64 * 1024 * 10}); const data = []; stream.on('data', chunk => { data.push(chunk); }).on('end', () => { resolve(Buffer.concat(data)); }).on('error', reject); })); } await Promise.all(results); console.log(`Async stream parallel version took ${performance.now() - startTime}ms`); }

注意,我对该方案进行了一些修改,以将文件名数组传递给每个测试,而不是每次都创建文件名,以便我可以集中文件的创建。

帮助我加快速度的是:

    使用较大的highWaterMark,大概是流缓冲区的大小
  1. 将数据收集到一个数组中,然后在最后进行连接(这大大减少了峰值内存消耗和GC工作)。
  2. 允许循环中的不同文件相互并行运行
  • 有了这些更改,它的速度与同步版本大约相同,有时会慢一些,有时大约相同。

    我还在每次测试的运行之间放置了2秒的延迟,并强制运行垃圾收集器以确保GC的运行不会干扰我的结果。

    这是我的整个脚本,可以在任何平台上运行。请注意,您必须像在--expose_gc中使用node --expose_gc temp.js命令行参数:

    // Run this with the --expose_gc command line option const {performance} = require('perf_hooks'); const fs = require('fs'); const path = require('path') const sizes = [512, 1024, 256, 512]; // file sizes in MB const data = "0123456789\n"; const testDir = path.join(__dirname, "bigfile"); function makeFiles() { // make a bigger string to make fewer disk writes const bData = []; for (let i = 0; i < 1000; i++) { bData.push(data); } const biggerData = bData.join(""); try { fs.mkdirSync(testDir); // ignore errors if it already exists } catch(e) { // do nothing if it already exists } const files = []; for (let i = 0; i < sizes.length; i++) { let targetLen = sizes[i] * 1024 * 1024; let f; try { let fname = `${path.join(testDir, "test")}-${i}.txt`; f = fs.openSync(fname, 'w'); files.push(fname); let len = 0; while (len < targetLen) { fs.writeSync(f, biggerData); len += biggerData.length; } } catch(e) { console.log(e); process.exit(1); } finally { if (f) fs.closeSync(f); } } return files; } function clearFiles(files) { for (let filename of files) { fs.unlinkSync(filename); } fs.rmdirSync(testDir); } function syncTest(files) { const startTime = performance.now(); const results = []; for (let filename of files) { results.push(fs.readFileSync(filename)); } console.log(`Sync version took ${performance.now() - startTime}ms`); } async function asyncTest(files) { const startTime = performance.now(); const results = []; for (let filename of files) { results.push(fs.promises.readFile(filename)); } await Promise.all(results); console.log(`Async version took ${performance.now() - startTime}ms`); } async function asyncTestStream(files) { const startTime = performance.now(); for (let filename of files) { await new Promise((resolve, reject) => { let stream = fs.createReadStream(filename, {highWaterMark: 64 * 1024 * 10}); let data = []; stream.on('data', chunk => { data.push(chunk); }).on('close', () => { resolve(Buffer.concat(data)); }).on('error', reject); }); } console.log(`Async stream version took ${performance.now() - startTime}ms`); } async function asyncTestStreamParallel(files) { const startTime = performance.now(); let results = []; for (let filename of files) { results.push(new Promise((resolve, reject) => { const stream = fs.createReadStream(filename, {highWaterMark: 64 * 1024 * 100}); const data = []; stream.on('data', chunk => { data.push(chunk); }).on('end', () => { resolve(Buffer.concat(data)); }).on('error', reject); })); } await Promise.all(results); console.log(`Async stream parallel version took ${performance.now() - startTime}ms`); } async function asyncTestSerial(files) { const startTime = performance.now(); const results = []; for (let filename of files) { results.push(await fs.promises.readFile(filename)); } console.log(`Async serial version took ${performance.now() - startTime}ms`); } function delay(t) { return new Promise(resolve => { global.gc(); setTimeout(resolve, t); }); } // delay between each test to let any system stuff calm down async function run() { const files = makeFiles(); try { await delay(2000); syncTest(files); await delay(2000); await asyncTest(files) await delay(2000); await asyncTestStream(files); await delay(2000); await asyncTestStreamParallel(files); await delay(2000); await asyncTestSerial(files); } catch(e) { console.log(e); } finally { clearFiles(files); } } run();

    而且,这是我在Windows 10节点v12.13.1上的结果:

    node --expose_gc temp Sync version took 1175.2680000066757ms Async version took 2315.0439999699593ms Async stream version took 1600.0085990428925ms Async stream parallel version took 1111.310200035572ms Async serial version took 4387.053400993347ms

    版本2

    然后,我发现对于2GB以下的文件,我们可以为整个文件预先分配一个缓冲区,然后一次读取就可以读取它们,这甚至可以更快。此版本为syncTestSingleRead()asyncTestSingleReadSerial()asyncTestSingleReadParallel()添加了几个新选项。

    这些新选项都更快,并且异步选项始终比同步选项快一遍:

    node --expose_gc temp Sync version took 1602.546700000763ms Sync single read version took 680.5937000513077ms Async version took 2337.3639990091324ms Async serial version took 4320.517499983311ms Async stream version took 1625.9839000105858ms Async stream parallel version took 1119.7469999790192ms Async single read serial version took 580.7244000434875ms Async single read parallel version took 360.47460001707077ms

    并且,与这些匹配的代码:

    // Run this with the --expose_gc command line option const {performance} = require('perf_hooks'); const fs = require('fs'); const fsp = fs.promises; const path = require('path') const sizes = [512, 1024, 256, 512]; // file sizes in MB const data = "0123456789\n"; const testDir = path.join(__dirname, "bigfile"); function makeFiles() { // make a bigger string to make fewer disk writes const bData = []; for (let i = 0; i < 1000; i++) { bData.push(data); } const biggerData = bData.join(""); try { fs.mkdirSync(testDir); // ignore errors if it already exists } catch(e) { // do nothing if it already exists } const files = []; for (let i = 0; i < sizes.length; i++) { let targetLen = sizes[i] * 1024 * 1024; let f; try { let fname = `${path.join(testDir, "test")}-${i}.txt`; f = fs.openSync(fname, 'w'); files.push(fname); let len = 0; while (len < targetLen) { fs.writeSync(f, biggerData); len += biggerData.length; } } catch(e) { console.log(e); process.exit(1); } finally { if (f) fs.closeSync(f); } } return files; } function clearFiles(files) { for (let filename of files) { fs.unlinkSync(filename); } fs.rmdirSync(testDir); } function readFileSync(filename) { let handle = fs.openSync(filename, "r"); try { let stats = fs.fstatSync(handle); let buffer = Buffer.allocUnsafe(stats.size); let bytesRead = fs.readSync(handle, buffer, 0, stats.size, 0); if (bytesRead !== stats.size) { throw new Error("bytesRead not full file size") } } finally { fs.closeSync(handle); } } // read a file in one single read async function readFile(filename) { let handle = await fsp.open(filename, "r"); try { let stats = await handle.stat(); let buffer = Buffer.allocUnsafe(stats.size); let {bytesRead} = await handle.read(buffer, 0, stats.size, 0); if (bytesRead !== stats.size) { throw new Error("bytesRead not full file size") } } finally { handle.close() } } function syncTest(files) { const startTime = performance.now(); const results = []; for (let filename of files) { results.push(fs.readFileSync(filename)); } console.log(`Sync version took ${performance.now() - startTime}ms`); } function syncTestSingleRead(files) { const startTime = performance.now(); const results = []; for (let filename of files) { readFileSync(filename); } console.log(`Sync single read version took ${performance.now() - startTime}ms`); } async function asyncTest(files) { const startTime = performance.now(); const results = []; for (let filename of files) { results.push(fs.promises.readFile(filename)); } await Promise.all(results); console.log(`Async version took ${performance.now() - startTime}ms`); } async function asyncTestStream(files) { const startTime = performance.now(); for (let filename of files) { await new Promise((resolve, reject) => { let stream = fs.createReadStream(filename, {highWaterMark: 64 * 1024 * 10}); let data = []; stream.on('data', chunk => { data.push(chunk); }).on('close', () => { resolve(Buffer.concat(data)); }).on('error', reject); }); } console.log(`Async stream version took ${performance.now() - startTime}ms`); } async function asyncTestStreamParallel(files) { const startTime = performance.now(); let results = []; for (let filename of files) { results.push(new Promise((resolve, reject) => { const stream = fs.createReadStream(filename, {highWaterMark: 64 * 1024 * 100}); const data = []; stream.on('data', chunk => { data.push(chunk); }).on('end', () => { resolve(Buffer.concat(data)); }).on('error', reject); })); } await Promise.all(results); console.log(`Async stream parallel version took ${performance.now() - startTime}ms`); } async function asyncTestSingleReadSerial(files) { const startTime = performance.now(); let buffer; for (let filename of files) { let handle = await fsp.open(filename, "r"); try { let stats = await handle.stat(); if (!buffer || buffer.length < stats.size) { buffer = Buffer.allocUnsafe(stats.size); } let {bytesRead} = await handle.read(buffer, 0, stats.size, 0); if (bytesRead !== stats.size) { throw new Error("bytesRead not full file size") } } finally { handle.close() } } console.log(`Async single read serial version took ${performance.now() - startTime}ms`); } async function asyncTestSingleReadParallel(files) { const startTime = performance.now(); await Promise.all(files.map(readFile)); console.log(`Async single read parallel version took ${performance.now() - startTime}ms`); } async function asyncTestSerial(files) { const startTime = performance.now(); const results = []; for (let filename of files) { results.push(await fs.promises.readFile(filename)); } console.log(`Async serial version took ${performance.now() - startTime}ms`); } function delay(t) { return new Promise(resolve => { global.gc(); setTimeout(resolve, t); }); } // delay between each test to let any system stuff calm down async function run() { const files = makeFiles(); try { await delay(2000); syncTest(files); await delay(2000); syncTestSingleRead(files); await delay(2000); await asyncTest(files) await delay(2000); await asyncTestSerial(files); await delay(2000); await asyncTestStream(files); await delay(2000); await asyncTestStreamParallel(files); await delay(2000); await asyncTestSingleReadSerial(files); await delay(2000); await asyncTestSingleReadParallel(files); } catch(e) { console.log(e); } finally { clearFiles(files); } } run();

  • © www.soinside.com 2019 - 2024. All rights reserved.