From 0145d44287af984dfc365df5747872d89f001381 Mon Sep 17 00:00:00 2001 From: Jakub Freisler Date: Mon, 12 Nov 2018 02:42:02 +0100 Subject: [PATCH] feat(async): Better parallelization Async uses fast-glob streams to speed up replacing Replace async available in benchmark test suite README benchmark section update fixes #10 --- README.md | 19 +- .../multiple-file-replace.benchmark.test.js | 28 +- benchmark/replace.spec.js | 318 ++++++++++++++++++ src/replace.js | 89 ++++- 4 files changed, 419 insertions(+), 35 deletions(-) create mode 100644 benchmark/replace.spec.js diff --git a/README.md b/README.md index 8dd78af..02dd531 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ # FRS-replace -CLI & Node wrapper around [javascript replace](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace) which allows on-the-fly replacing (with or without changing input files), [globbing](https://en.wikipedia.org/wiki/Glob_(programming)), [piping](https://en.wikipedia.org/wiki/Pipeline_(Unix)) and many more! +The fastest ([see benchmarks](#benchmarks)) CLI & Node wrapper around [javascript replace](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace) which allows on-the-fly replacing (with or without changing input files), [globbing](https://en.wikipedia.org/wiki/Glob_(programming)), [piping](https://en.wikipedia.org/wiki/Pipeline_(Unix)) and many more! * [Installation](#installation) * [Node API usage](#node-api-usage) @@ -234,18 +234,19 @@ FRS-replace a b -i foo.js | #### input as glob pattern [1000 iterations x 100 repetitions] | Library (best bolded) | Execution time [s] | Difference percentage (comparing to best time) | | --- | --- | --- | -| **FRS-replace async** | 0.36640944 | 0.0000% | -| FRS-replace sync | 0.39553770 | 7.9496% | -| replace-in-file | 1.78587186 | 387.3979% | -| replace async | *N/A* | *N/A* | -| replace sync | 0.44655926 | 21.8744% | +| **FRS-replace async** | 0.07656150 | 0.0000% | +| FRS-replace sync | 0.31196953 | 307.4757% | +| replace-in-file | 0.76240075 | 895.8017% | +| replace async | 0.11774627 | 53.7931% | +| replace sync | 0.91518713 | 1095.3620% | | replace-string | *N/A* | *N/A* | #### input & replacement as strings [1000 iterations x 100 repetitions] | Library (best bolded) | Execution time [s] | Difference percentage (comparing to best time) | | --- | --- | --- | -| FRS-replace async | 0.01015828 | 59.0095% | -| FRS-replace sync | 0.00657347 | 2.8957% | +| FRS-replace async | 0.00511845 | 77.4972% | +| **FRS-replace sync** | 0.00288368 | 0.0000% | | replace-in-file | *N/A* | *N/A* | | replace async | *N/A* | *N/A* | | replace sync | *N/A* | *N/A* | -| **replace-string** | 0.00638847 | 0.0000% | +| replace-string | 0.00292622 | 1.4752% | + diff --git a/benchmark/multiple-file-replace.benchmark.test.js b/benchmark/multiple-file-replace.benchmark.test.js index c7e743c..4c110af 100644 --- a/benchmark/multiple-file-replace.benchmark.test.js +++ b/benchmark/multiple-file-replace.benchmark.test.js @@ -63,13 +63,17 @@ tap.beforeEach(async () => { testInput.replace = { regex, - replacement + replacement, + recursive: true, + silent: true } testInput.replaceAsync = { regex, replacement, - async: true + async: true, + recursive: true, + silent: true } testInput.replaceInFile = { @@ -104,7 +108,7 @@ tap.afterEach((done) => { tap.test(`input as glob pattern [${iterationsNo} iterations x ${repetitionsNo / iterationsNo} repetitions]`, async ct => { const results = await multipleTests(ct, [ { - fn: () => FRSreplace.async(testInput.FRSReplace), + fn: () => { FRSreplace.async(testInput.FRSReplace) }, // IMPORTANT: test doesn't wait for function to finish, because replace (async) doesn't support that kind of behaviour (https://github.com/harthur/replace/issues/25) before: () => (testInput.FRSReplace.input = `${dir}/${tmpPrefixes.input}*`) }, { @@ -115,26 +119,24 @@ tap.test(`input as glob pattern [${iterationsNo} iterations x ${repetitionsNo / fn: () => replaceInFile(testInput.replaceInFile), before: () => (testInput.replaceInFile.files = `${dir}/${tmpPrefixes.input}*`) }, - // { - // fn: () => replace(testInput.replaceAsync), before: () => { - // testInput.replaceAsync.paths = [dir.replace(/\\/g, '/')] - // testInput.replaceAsync.include = `${tmpPrefixes.input}*` - // } - // }, // COMMENTED OUT - waits for better FRS-replace async methods - undefined, + { + fn: () => replace(testInput.replaceAsync), + before: () => { + testInput.replaceAsync.paths = [dir.replace(/\\/g, '/')] + } + }, { fn: () => replace(testInput.replace), before: () => { testInput.replace.paths = [dir.replace(/\\/g, '/')] - testInput.replace.include = `${tmpPrefixes.input}*` } }, undefined ]) const sortedResults = results.slice().sort(sortByNanoseconds) - ct.not(sortedResults[0].name.indexOf('FRS-replace'), -1, 'FRS-replace should be the fastest') - // results.map((result) => result.testCfg && ct.is(result.result, results[0].result, `${result.name} are results the same`)) + ct.is((sortedResults[0].name.indexOf('FRS-replace') !== -1 || (sortedResults[1].name.indexOf('FRS-replace') !== -1 && sortedResults[1].avgPercentageDifference < 5)), true, 'FRS-replace should be the fastest or second, but at most with 5% difference to best') + ct.not(sortedResults[2].name.indexOf('FRS-replace sync'), -1, 'FRS-replace sync should be third (right after async replace)') outputPerfy(ct, results, sortedResults[0]) diff --git a/benchmark/replace.spec.js b/benchmark/replace.spec.js new file mode 100644 index 0000000..4c110af --- /dev/null +++ b/benchmark/replace.spec.js @@ -0,0 +1,318 @@ +const tap = require('tap') +const tmp = require('tmp-promise') +const path = require('path') +const fs = require('fs') +const perfy = require('perfy') +const glob = require('fast-glob') + +const FRSreplace = require('../src/replace') +const replace = require('replace') +const replaceInFile = require('replace-in-file') +const replaceString = require('replace-string') + +const regex = new RegExp('^[adjox]', 'gm') +const replacement = 'ą|' +const content = `aąbcćdeęfg%hi +jklmn +oópqr,stuvwxyZ` +const tmpPrefixes = { + input: 'FRS-replace-replace-in', + output: 'FRS-replace-replace-out' +} +const defaults = { + inputReadOptions: 'utf8', + outputWriteOptions: 'utf8', + inputJoinString: '\n' +} +const repetitionsNo = 100000 +const iterationsNo = 1000 +const testInput = {} +const testedLibraries = [ + 'FRS-replace async', + 'FRS-replace sync', + 'replace-in-file', + 'replace async', + 'replace sync', + 'replace-string' +] + +let dir, output, input + +const readmeContent = fs.readFileSync('./README.md').toString() + +let perfyResults = '' + +{ + const dirObj = tmp.dirSync() // removing all files similar our tmp + dir = dirObj.name + + glob.sync( + [ + path.join(dir, tmpPrefixes.input), + path.join(dir, tmpPrefixes.output) + ].map(v => v + '*') + ) + .forEach(fs.unlinkSync) +} + +tap.beforeEach(async () => { + testInput.FRSReplace = { + regex, + replacement + } + + testInput.replace = { + regex, + replacement, + recursive: true, + silent: true + } + + testInput.replaceAsync = { + regex, + replacement, + async: true, + recursive: true, + silent: true + } + + testInput.replaceInFile = { + from: regex, + to: replacement + } + + cleanInputs() + + await tmp.file({ prefix: tmpPrefixes.input, keep: true, dir }) + .then( + async f => { + input = f + return new Promise( + resolve => fs.appendFile(f.path, content, { encoding: defaults.inputReadOptions }, resolve) + ) + }) +}) + +const cleanInputs = (done) => { + input && input.cleanup() + input = undefined + done && done() // to be runned either by node-tap or manually +} + +tap.afterEach((done) => { + fs.existsSync(output) && fs.unlinkSync(output) + cleanInputs() + done() +}) + +tap.test(`input as glob pattern [${iterationsNo} iterations x ${repetitionsNo / iterationsNo} repetitions]`, async ct => { + const results = await multipleTests(ct, [ + { + fn: () => { FRSreplace.async(testInput.FRSReplace) }, // IMPORTANT: test doesn't wait for function to finish, because replace (async) doesn't support that kind of behaviour (https://github.com/harthur/replace/issues/25) + before: () => (testInput.FRSReplace.input = `${dir}/${tmpPrefixes.input}*`) + }, + { + fn: () => FRSreplace.sync(testInput.FRSReplace), + before: () => (testInput.FRSReplace.input = `${dir}/${tmpPrefixes.input}*`) + }, + { + fn: () => replaceInFile(testInput.replaceInFile), + before: () => (testInput.replaceInFile.files = `${dir}/${tmpPrefixes.input}*`) + }, + { + fn: () => replace(testInput.replaceAsync), + before: () => { + testInput.replaceAsync.paths = [dir.replace(/\\/g, '/')] + } + }, + { + fn: () => replace(testInput.replace), + before: () => { + testInput.replace.paths = [dir.replace(/\\/g, '/')] + } + }, + undefined + ]) + const sortedResults = results.slice().sort(sortByNanoseconds) + + ct.is((sortedResults[0].name.indexOf('FRS-replace') !== -1 || (sortedResults[1].name.indexOf('FRS-replace') !== -1 && sortedResults[1].avgPercentageDifference < 5)), true, 'FRS-replace should be the fastest or second, but at most with 5% difference to best') + ct.not(sortedResults[2].name.indexOf('FRS-replace sync'), -1, 'FRS-replace sync should be third (right after async replace)') + + outputPerfy(ct, results, sortedResults[0]) + + ct.end() +}) + +tap.test(`input & replacement as strings [${iterationsNo} iterations x ${repetitionsNo / iterationsNo} repetitions]`, async ct => { + const results = await multipleTests(ct, [ + { + fn: () => FRSreplace.async(testInput.FRSReplace), + before: () => { + testInput.FRSReplace.regex = regex.source + testInput.FRSReplace.content = content + } + }, + { + fn: () => FRSreplace.sync(testInput.FRSReplace), + before: () => { + testInput.FRSReplace.regex = regex.source + testInput.FRSReplace.content = content + } + }, + undefined, + undefined, + undefined, + { fn: () => replaceString(content, regex.source, replacement) } + ]) + + const result = outputPerfy(ct, results, results.slice().sort(sortByNanoseconds)[0]) + + const sortedResults = result.results.slice().sort(sortByNanoseconds) + + ct.is((sortedResults[0].name.indexOf('FRS-replace') !== -1 || (sortedResults[1].name.indexOf('FRS-replace') !== -1 && sortedResults[1].avgPercentageDifference < 10)), true, 'FRS-replace should be the fastest or second, but at most with 10% difference to best') + + ct.end() +}) + +tap.teardown(() => { + fs.writeFileSync('./README.md', readmeContent.replace(/(##\sBenchmarks\s\s)[\s\S]*?(?:$|(?:\s##\s))/, '$1' + perfyResults)) +}) + +function outputPerfy (t, testResults, best) { + best = best.fullNanoseconds + + const result = { + name: t.name, + results: testResults.reduce( + (p, v) => p.push({ + name: v.name, + avgTime: + ( + v.fullNanoseconds === undefined + ? null + : (v.fullNanoseconds / 1000000000) + ), + avgPercentageDifference: + ( + v.fullNanoseconds === undefined + ? null + : ((v.fullNanoseconds / best - 1) * 100) + ) + }) && p, + [] + ) + } + + t.parser.write( + ' ---\n' + + ' name: \'' + result.name + '\'\n' + + ' results: \n' + result.results.reduce( + (p, v) => p + + ' - name: \'' + v.name + '\'\n' + + ' avgTime: ' + v.avgTime + '\n' + + ' avgPercentageDifference: ' + v.avgPercentageDifference + '\n' + , + '' + ) + + ' ...\n\n' + ) + + perfyResults += + '#### ' + result.name + '\n' + + '| Library (best bolded) | Execution time [s] | Difference percentage (comparing to best time) |\n' + + '| --- | --- | --- |\n' + + result.results.reduce( + (p, v) => p + + '| ' + (v.avgTime * 1000000000 === best ? ('**' + v.name + '**') : v.name) + + ' | ' + (v.avgTime === null ? '*N/A*' : (v.avgTime.toFixed(8))) + + ' | ' + (v.avgPercentageDifference == null ? '*N/A*' : (v.avgPercentageDifference.toFixed(4) + '%')) + ' |\n' + , + '' + ) + + return result +} + +async function multipleTests (t, testCfgs, n, iterations) { + const results = [] + + n = (n || repetitionsNo) / iterationsNo + iterations = iterations || iterationsNo + + testCfgs = testCfgs.reduce((p, v, i) => { + if (v === undefined) { + results[i] = { name: testedLibraries[i] } + return p + } + + return p.concat({ i, v }) + }, []) + + const testCfgLen = testCfgs.length + + for (let i = 0; i < n; ++i) { + for (let k = testCfgLen - 1; k >= 0; --k) { + const { v: testCfg, i: index } = testCfgs[k] + const prevResult = results[index] + const libName = testedLibraries[index] + + await t.test(`${t.name} - ${libName} #${i}`, async ct => { + testCfg.before && testCfg.before() + const result = await singleTest(libName, testCfg.fn, iterations) + + if (!prevResult) { + results[index] = result + result.testCfg = testCfg + } else { + for (const prop in result) { + if (Object.prototype.hasOwnProperty.call(result, prop) && typeof result[prop] === 'number') { + prevResult[prop] += result[prop] + } + } + } + + ct.end() + }) + } + } + + testCfgs.forEach(({ i: index }) => { + const result = results[index] + + for (const prop in result) { + if (Object.prototype.hasOwnProperty.call(result, prop) && typeof result[prop] === 'number') { + result[prop] /= n + } + } + }) + + return results +} + +async function singleTest (name, test, n) { + n = n || repetitionsNo + + perfy.start(name) + + while (--n) { + await test() + } + + const testResult = await test() + const result = perfy.end(name) + + result.result = testResult + return result +} + +function sortByNanoseconds (a, b) { + if (a.fullNanoseconds === undefined) { + return b.fullNanoseconds === undefined ? 0 : 1 + } + + if (b.fullNanoseconds === undefined) { + return -1 + } + + return a.fullNanoseconds - b.fullNanoseconds +} diff --git a/src/replace.js b/src/replace.js index 87242dc..a22f5d0 100644 --- a/src/replace.js +++ b/src/replace.js @@ -1,19 +1,9 @@ module.exports = { - sync: replace, - async: (...args) => new Promise((resolve, reject) => { - let result - - try { - result = replace.apply(this, args) - } catch (e) { - return reject(e) - } - - resolve(result) - }) + sync: replaceSync, + async: replaceAsync } -function replace ({ +function replaceSync ({ input, inputReadOptions = 'utf8', inputGlobOptions, @@ -58,10 +48,83 @@ function replace ({ return result } +async function replaceAsync ({ + input, + inputReadOptions = 'utf8', + inputGlobOptions, + inputJoinString = '\n', + content, + output, + outputWriteOptions = 'utf8', + regex, + replacement +}) { + let result + const replaceFn = typeof regex === 'string' ? replaceString : replaceRegex + + if (content !== void 0) { + result = replaceFn(content, regex, replacement) + } else if (input !== void 0) { + const fileStream = await require('fast-glob').stream(input, inputGlobOptions) + let filesFound = false + + result = '' + + const fileReaderPromise = multiFileReaderBuilder(require('fs'), inputReadOptions, fileReader => { + fileStream.on('data', entry => { + filesFound = true + return fileReader( + entry, + content => (result += replaceFn(content, regex, replacement)) + ) + }) + fileStream.once('error', writeError) + }) + + await new Promise((resolve) => fileStream.once('end', () => { + return resolve(filesFound ? fileReaderPromise : void 0) + })) + } else { + writeError('at least one input source must be defined!') + } + + if (output !== void 0) { + if (typeof outputWriteOptions === 'string') { + outputWriteOptions = { encoding: outputWriteOptions } + } + + await require('write')(require('path').normalize(output), result, outputWriteOptions) + } + + return result +} + function writeError (msg) { throw new Error(`FRS-replace :: ${msg}`) } +function multiFileReaderBuilder (fs, inputReadOptions, setup) { + let i = 0 + return new Promise((resolve, reject) => { + setup((path, callback) => { + if (++i < 1) return + + fs.readFile(path, inputReadOptions, (error, data) => { + if (error) { + i = -1 + return reject(error) + } + + callback(data) + + if (--i === 0) { + resolve() + } + }) + }) + }) +} + function replaceRegex (content, needle, replacement) { return content.replace(needle, replacement) }